src/lexer.c

   1 /* ------------------------------------------------------------------------- */
   2 /*   "lexer" : Lexical analyser                                              */
   3 /*                                                                           */
   4 /*   Part of Inform 6.35                                                     */
   5 /*   copyright (c) Graham Nelson 1993 - 2020                                 */
   6 /*                                                                           */
   7 /* Inform is free software: you can redistribute it and/or modify            */
   8 /* it under the terms of the GNU General Public License as published by      */
   9 /* the Free Software Foundation, either version 3 of the License, or         */
  10 /* (at your option) any later version.                                       */
  11 /*                                                                           */
  12 /* Inform is distributed in the hope that it will be useful,                 */
  13 /* but WITHOUT ANY WARRANTY; without even the implied warranty of            */
  14 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the              */
  15 /* GNU General Public License for more details.                              */
  16 /*                                                                           */
  17 /* You should have received a copy of the GNU General Public License         */
  18 /* along with Inform. If not, see https://gnu.org/licenses/                  *
  19 /*                                                                           */
  20 /* ------------------------------------------------------------------------- */
  21
  22 #include "header.h"
  23
  24 int total_source_line_count,            /* Number of source lines so far     */
  25
  26     no_hash_printed_yet,                /* Have not yet printed the first #  */
  27     hash_printed_since_newline,         /* A hash has been printed since the
  28                                            most recent new-line was printed
  29                                            (generally as a result of an error
  30                                            message or the start of pass)     */
  31     dont_enter_into_symbol_table,       /* Return names as text (with
  32                                            token type DQ_TT, i.e., as if
  33                                            they had double-quotes around)
  34                                            and not as entries in the symbol
  35                                            table, when TRUE. If -2, only the
  36                                            keyword table is searched.        */
  37     return_sp_as_variable;              /* When TRUE, the word "sp" denotes
  38                                            the stack pointer variable
  39                                            (used in assembly language only)  */
  40 int next_token_begins_syntax_line;      /* When TRUE, start a new syntax
  41                                            line (for error reporting, etc.)
  42                                            on the source code line where
  43                                            the next token appears            */
  44
  45 int32 last_mapped_line;  /* Last syntax line reported to debugging file      */
  46
  47 /* ------------------------------------------------------------------------- */
  48 /*   The lexer's output is a sequence of triples, each called a "token",     */
  49 /*   representing one lexical unit (or "lexeme") each.  Instead of providing */
  50 /*   "lookahead" (that is, always having available the next token after the  */
  51 /*   current one, so that syntax analysers higher up in Inform can have      */
  52 /*   advance knowledge of what is coming), the lexer instead has a system    */
  53 /*   where tokens can be read in and then "put back again".                  */
  54 /*   The meaning of the number (and to some extent the text) supplied with   */
  55 /*   a token depends on its type: see "header.h" for the list of types.      */
  56 /*   For example, the lexeme "$1e3" is understood by Inform as a hexadecimal */
  57 /*   number, and translated to the token:                                    */
  58 /*     type NUMBER_TT, value 483, text "$1e3"                                */
  59 /* ------------------------------------------------------------------------- */
  60 /*   These three variables are set to the current token on a call to         */
  61 /*   get_next_token() (but are not changed by a call to put_token_back()).   */
  62 /* ------------------------------------------------------------------------- */
  63
  64 int token_type;
  65 int32 token_value;
  66 char *token_text;
  67
  68 /* ------------------------------------------------------------------------- */
  69 /*   The next two variables are the head and tail of a singly linked list.   */
  70 /*   The tail stores the portion most recently read from the current         */
  71 /*   lexical block; its end values therefore describe the location of the    */
  72 /*   current token, and are updated whenever the three variables above are   */
  73 /*   via set_token_location(...).  Earlier vertices, if any, represent the   */
  74 /*   regions of lexical blocks read beforehand, where new vertices are       */
  75 /*   only introduced by interruptions like a file inclusion or an EOF.       */
  76 /*   Vertices are deleted off of the front of the list once they are no      */
  77 /*   longer referenced by pending debug information records.                 */
  78 /* ------------------------------------------------------------------------- */
  79
  80 static debug_locations *first_token_locations;
  81 static debug_locations *last_token_location;
  82
  83 extern debug_location get_token_location(void)
  84 {   debug_location result;
  85     debug_location *location = &(last_token_location->location);
  86     result.file_index = location->file_index;
  87     result.beginning_byte_index = location->end_byte_index;
  88     result.end_byte_index = location->end_byte_index;
  89     result.beginning_line_number = location->end_line_number;
  90     result.end_line_number = location->end_line_number;
  91     result.beginning_character_number = location->end_character_number;
  92     result.end_character_number = location->end_character_number;
  93     result.orig_file_index = location->orig_file_index;
  94     result.orig_beg_line_number = location->orig_beg_line_number;
  95     result.orig_beg_char_number = location->orig_beg_char_number;
  96     return result;
  97 }
  98
  99 extern debug_locations get_token_locations(void)
 100 {   debug_locations result;
 101     result.location = get_token_location();
 102     result.next = NULL;
 103     result.reference_count = 0;
 104     return result;
 105 }
 106
 107 static void set_token_location(debug_location location)
 108 {   if (location.file_index == last_token_location->location.file_index)
 109     {   last_token_location->location.end_byte_index =
 110             location.end_byte_index;
 111         last_token_location->location.end_line_number =
 112             location.end_line_number;
 113         last_token_location->location.end_character_number =
 114             location.end_character_number;
 115         last_token_location->location.orig_file_index =
 116             location.orig_file_index;
 117         last_token_location->location.orig_beg_line_number =
 118             location.orig_beg_line_number;
 119         last_token_location->location.orig_beg_char_number =
 120             location.orig_beg_char_number;
 121     } else
 122     {   debug_locations*successor =
 123             my_malloc
 124                 (sizeof(debug_locations),
 125                  "debug locations of recent tokens");
 126         successor->location = location;
 127         successor->next = NULL;
 128         successor->reference_count = 0;
 129         last_token_location->next = successor;
 130         last_token_location = successor;
 131     }
 132 }
 133
 134 extern debug_location_beginning get_token_location_beginning(void)
 135 {   debug_location_beginning result;
 136     ++(last_token_location->reference_count);
 137     result.head = last_token_location;
 138     result.beginning_byte_index =
 139         last_token_location->location.end_byte_index;
 140     result.beginning_line_number =
 141         last_token_location->location.end_line_number;
 142     result.beginning_character_number =
 143         last_token_location->location.end_character_number;
 144     result.orig_file_index = last_token_location->location.orig_file_index;
 145     result.orig_beg_line_number = last_token_location->location.orig_beg_line_number;
 146     result.orig_beg_char_number = last_token_location->location.orig_beg_char_number;
 147
 148     return result;
 149 }
 150
 151 static void cleanup_token_locations(debug_location_beginning*beginning)
 152 {   if (first_token_locations)
 153     {   while (first_token_locations &&
 154                !first_token_locations->reference_count)
 155         {   debug_locations*moribund = first_token_locations;
 156             first_token_locations = moribund->next;
 157             my_free(&moribund, "debug locations of recent tokens");
 158             if (beginning &&
 159                 (beginning->head == moribund || !first_token_locations))
 160             {   compiler_error
 161                     ("Records needed by a debug_location_beginning are no "
 162                      "longer allocated, perhaps because of an invalid reuse "
 163                      "of this or an earlier beginning");
 164             }
 165         }
 166     } else
 167     {   if (beginning)
 168         {   compiler_error
 169                 ("Attempt to use a debug_location_beginning when no token "
 170                  "locations are defined");
 171         } else
 172         {   compiler_error
 173                 ("Attempt to clean up token locations when no token locations "
 174                  "are defined");
 175         }
 176     }
 177 }
 178
 179 extern void discard_token_location(debug_location_beginning beginning)
 180 {   --(beginning.head->reference_count);
 181 }
 182
 183 extern debug_locations get_token_location_end
 184     (debug_location_beginning beginning)
 185 {   debug_locations result;
 186     cleanup_token_locations(&beginning);
 187     --(beginning.head->reference_count);
 188     /* Sometimes we know what we'll read before we switch to the lexical block
 189        where we'll read it.  In such cases the beginning will be placed in the
 190        prior block and last exactly zero bytes there.  It's misleading to
 191        include such ranges, so we gobble them. */
 192     if (beginning.head->location.end_byte_index ==
 193           beginning.beginning_byte_index &&
 194         beginning.head->next)
 195     {   beginning.head = beginning.head->next;
 196         result.location = beginning.head->location;
 197         result.location.beginning_byte_index = 0;
 198         result.location.beginning_line_number = 1;
 199         result.location.beginning_character_number = 1;
 200     } else
 201     {   result.location = beginning.head->location;
 202         result.location.beginning_byte_index =
 203             beginning.beginning_byte_index;
 204         result.location.beginning_line_number =
 205             beginning.beginning_line_number;
 206         result.location.beginning_character_number =
 207             beginning.beginning_character_number;
 208     }
 209
 210     result.location.orig_file_index =
 211         beginning.orig_file_index;
 212     result.location.orig_beg_line_number =
 213         beginning.orig_beg_line_number;
 214     result.location.orig_beg_char_number =
 215         beginning.orig_beg_char_number;
 216
 217     result.next = beginning.head->next;
 218     result.reference_count = 0;
 219     return result;
 220 }
 221
 222 /* ------------------------------------------------------------------------- */
 223 /*   In order to be able to put tokens back efficiently, the lexer stores    */
 224 /*   tokens in a "circle": the variable circle_position ranges between       */
 225 /*   0 and CIRCLE_SIZE-1.  We only need a circle size as large as the        */
 226 /*   maximum number of tokens ever put back at once, plus 1 (in effect, the  */
 227 /*   maximum token lookahead ever needed in syntax analysis, plus 1).        */
 228 /*                                                                           */
 229 /*   Unlike some compilers, Inform does not have a context-free lexer: in    */
 230 /*   fact it has 12288 different possible states.  However, the context only */
 231 /*   affects the interpretation of "identifiers": lexemes beginning with a   */
 232 /*   letter and containing up to 32 chars of alphanumeric and underscore     */
 233 /*   chars.  (For example, "default" may refer to the directive or statement */
 234 /*   of that name, and which token values are returned depends on the        */
 235 /*   current lexical context.)                                               */
 236 /*                                                                           */
 237 /*   Along with each token, we also store the lexical context it was         */
 238 /*   translated under; because if it is called for again, there may need     */
 239 /*   to be a fresh interpretation of it if the context has changed.          */
 240 /* ------------------------------------------------------------------------- */
 241
 242 #define CIRCLE_SIZE 6
 243
 244 /*   (The worst case for token lookahead is distinguishing between an
 245      old-style "objectloop (a in b)" and a new "objectloop (a in b ...)".)   */
 246
 247 static int circle_position;
 248 static token_data circle[CIRCLE_SIZE];
 249
 250 static int token_contexts[CIRCLE_SIZE];
 251
 252 /* ------------------------------------------------------------------------- */
 253 /*   A complication, however, is that the text of some lexemes needs to be   */
 254 /*   held in Inform's memory for much longer periods: for example, a         */
 255 /*   dictionary word lexeme (like "'south'") must have its text preserved    */
 256 /*   until the code generation time for the expression it occurs in, when    */
 257 /*   the dictionary reference is actually made.  Code generation in general  */
 258 /*   occurs as early as possible in Inform: pending some better method of    */
 259 /*   garbage collection, we simply use a buffer so large that unless         */
 260 /*   expressions spread across 10K of source code are found, there can be    */
 261 /*   no problem.                                                             */
 262 /* ------------------------------------------------------------------------- */
 263
 264 static char *lexeme_memory;
 265 static char *lex_p;                     /* Current write position            */
 266
 267 /* ------------------------------------------------------------------------- */
 268 /*   The lexer itself needs up to 3 characters of lookahead (it uses an      */
 269 /*   LR(3) grammar to translate characters into tokens).                     */
 270 /* ------------------------------------------------------------------------- */
 271
 272 #define LOOKAHEAD_SIZE 3
 273
 274 static int current, lookahead,          /* The latest character read, and    */
 275     lookahead2, lookahead3;             /* the three characters following it */
 276
 277 static int pipeline_made;               /* Whether or not the pipeline of
 278                                            characters has been constructed
 279                                            yet (this pass)                   */
 280
 281 static int (* get_next_char)(void);     /* Routine for reading the stream of
 282                                            characters: the lexer does not
 283                                            need any "ungetc" routine for
 284                                            putting them back again.  End of
 285                                            stream is signalled by returning
 286                                            zero.                             */
 287
 288 static char *source_to_analyse;         /* The current lexical source:
 289                                            NULL for "load from source files",
 290                                            otherwise this points to a string
 291                                            containing Inform code            */
 292
 293 static int tokens_put_back;             /* Count of the number of backward
 294                                            moves made from the last-read
 295                                            token                             */
 296
 297 extern void describe_token(token_data t)
 298 {
 299     /*  Many of the token types are not set in this file, but later on in
 300         Inform's higher stages (for example, in the expression evaluator);
 301         but this routine describes them all.                                 */
 302
 303     printf("{ ");
 304
 305     switch(t.type)
 306     {
 307         /*  The following token types occur in lexer output:                 */
 308
 309         case SYMBOL_TT:          printf("symbol ");
 310                                  describe_symbol(t.value);
 311                                  break;
 312         case NUMBER_TT:          printf("literal number %d", t.value);
 313                                  break;
 314         case DQ_TT:              printf("string \"%s\"", t.text);
 315                                  break;
 316         case SQ_TT:              printf("string '%s'", t.text);
 317                                  break;
 318         case SEP_TT:             printf("separator '%s'", t.text);
 319                                  break;
 320         case EOF_TT:             printf("end of file");
 321                                  break;
 322
 323         case STATEMENT_TT:       printf("statement name '%s'", t.text);
 324                                  break;
 325         case SEGMENT_MARKER_TT:  printf("object segment marker '%s'", t.text);
 326                                  break;
 327         case DIRECTIVE_TT:       printf("directive name '%s'", t.text);
 328                                  break;
 329         case CND_TT:             printf("textual conditional '%s'", t.text);
 330                                  break;
 331         case OPCODE_NAME_TT:     printf("opcode name '%s'", t.text);
 332                                  break;
 333         case SYSFUN_TT:          printf("built-in function name '%s'", t.text);
 334                                  break;
 335         case LOCAL_VARIABLE_TT:  printf("local variable name '%s'", t.text);
 336                                  break;
 337         case MISC_KEYWORD_TT:    printf("statement keyword '%s'", t.text);
 338                                  break;
 339         case DIR_KEYWORD_TT:     printf("directive keyword '%s'", t.text);
 340                                  break;
 341         case TRACE_KEYWORD_TT:   printf("'trace' keyword '%s'", t.text);
 342                                  break;
 343         case SYSTEM_CONSTANT_TT: printf("system constant name '%s'", t.text);
 344                                  break;
 345
 346         /*  The remaining are etoken types, not set by the lexer             */
 347
 348         case OP_TT:              printf("operator '%s'",
 349                                      operators[t.value].description);
 350                                  break;
 351         case ENDEXP_TT:          printf("end of expression");
 352                                  break;
 353         case SUBOPEN_TT:         printf("open bracket");
 354                                  break;
 355         case SUBCLOSE_TT:        printf("close bracket");
 356                                  break;
 357         case LARGE_NUMBER_TT:    printf("large number: '%s'=%d",t.text,t.value);
 358                                  break;
 359         case SMALL_NUMBER_TT:    printf("small number: '%s'=%d",t.text,t.value);
 360                                  break;
 361         case VARIABLE_TT:        printf("variable '%s'=%d", t.text, t.value);
 362                                  break;
 363         case DICTWORD_TT:        printf("dictionary word '%s'", t.text);
 364                                  break;
 365         case ACTION_TT:          printf("action name '%s'", t.text);
 366                                  break;
 367
 368         default:
 369             printf("** unknown token type %d, text='%s', value=%d **",
 370             t.type, t.text, t.value);
 371     }
 372     printf(" }");
 373 }
 374
 375 /* ------------------------------------------------------------------------- */
 376 /*   All but one of the 280 Inform keywords (118 of them opcode names used   */
 377 /*   only by the assembler).  (The one left over is "sp", a keyword used in  */
 378 /*   assembly language only.)                                                */
 379 /*                                                                           */
 380 /*   A "keyword group" is a set of keywords to be searched for.  If a match  */
 381 /*   is made on an identifier, the token type becomes that given in the KG   */
 382 /*   and the token value is its index in the KG.                             */
 383 /*                                                                           */
 384 /*   The keyword ordering must correspond with the appropriate #define's in  */
 385 /*   "header.h" but is otherwise not significant.                            */
 386 /* ------------------------------------------------------------------------- */
 387
 388 #define MAX_KEYWORDS 350
 389
 390 /* The values will be filled in at compile time, when we know
 391    which opcode set to use. */
 392 keyword_group opcode_names =
 393 { { "" },
 394     OPCODE_NAME_TT, FALSE, TRUE
 395 };
 396
 397 static char *opcode_list_z[] = {
 398     "je", "jl", "jg", "dec_chk", "inc_chk", "jin", "test", "or", "and",
 399     "test_attr", "set_attr", "clear_attr", "store", "insert_obj", "loadw",
 400     "loadb", "get_prop", "get_prop_addr", "get_next_prop", "add", "sub",
 401     "mul", "div", "mod", "call", "storew", "storeb", "put_prop", "sread",
 402     "print_char", "print_num", "random", "push", "pull", "split_window",
 403     "set_window", "output_stream", "input_stream", "sound_effect", "jz",
 404     "get_sibling", "get_child", "get_parent", "get_prop_len", "inc", "dec",
 405     "print_addr", "remove_obj", "print_obj", "ret", "jump", "print_paddr",
 406     "load", "not", "rtrue", "rfalse", "print", "print_ret", "nop", "save",
 407     "restore", "restart", "ret_popped", "pop", "quit", "new_line",
 408     "show_status", "verify", "call_2s", "call_vs", "aread", "call_vs2",
 409     "erase_window", "erase_line", "set_cursor", "get_cursor",
 410     "set_text_style", "buffer_mode", "read_char", "scan_table", "call_1s",
 411     "call_2n", "set_colour", "throw", "call_vn", "call_vn2", "tokenise",
 412     "encode_text", "copy_table", "print_table", "check_arg_count", "call_1n",
 413     "catch", "piracy", "log_shift", "art_shift", "set_font", "save_undo",
 414     "restore_undo", "draw_picture", "picture_data", "erase_picture",
 415     "set_margins", "move_window", "window_size", "window_style",
 416     "get_wind_prop", "scroll_window", "pop_stack", "read_mouse",
 417     "mouse_window", "push_stack", "put_wind_prop", "print_form",
 418     "make_menu", "picture_table", "print_unicode", "check_unicode",
 419     ""
 420 };
 421
 422 static char *opcode_list_g[] = {
 423     "nop", "add", "sub", "mul", "div", "mod", "neg", "bitand", "bitor",
 424     "bitxor", "bitnot", "shiftl", "sshiftr", "ushiftr", "jump", "jz",
 425     "jnz", "jeq", "jne", "jlt", "jge", "jgt", "jle",
 426     "jltu", "jgeu", "jgtu", "jleu",
 427     "call", "return",
 428     "catch", "throw", "tailcall",
 429     "copy", "copys", "copyb", "sexs", "sexb", "aload",
 430     "aloads", "aloadb", "aloadbit", "astore", "astores", "astoreb",
 431     "astorebit", "stkcount", "stkpeek", "stkswap", "stkroll", "stkcopy",
 432     "streamchar", "streamnum", "streamstr",
 433     "gestalt", "debugtrap", "getmemsize", "setmemsize", "jumpabs",
 434     "random", "setrandom", "quit", "verify",
 435     "restart", "save", "restore", "saveundo", "restoreundo", "protect",
 436     "glk", "getstringtbl", "setstringtbl", "getiosys", "setiosys",
 437     "linearsearch", "binarysearch", "linkedsearch",
 438     "callf", "callfi", "callfii", "callfiii",
 439     "streamunichar",
 440     "mzero", "mcopy", "malloc", "mfree",
 441     "accelfunc", "accelparam",
 442     "numtof", "ftonumz", "ftonumn", "ceil", "floor",
 443     "fadd", "fsub", "fmul", "fdiv", "fmod",
 444     "sqrt", "exp", "log", "pow",
 445     "sin", "cos", "tan", "asin", "acos", "atan", "atan2",
 446     "jfeq", "jfne", "jflt", "jfle", "jfgt", "jfge", "jisnan", "jisinf",
 447     ""
 448 };
 449
 450 keyword_group opcode_macros =
 451 { { "" },
 452   OPCODE_MACRO_TT, FALSE, TRUE
 453 };
 454
 455 static char *opmacro_list_z[] = { "" };
 456
 457 static char *opmacro_list_g[] = {
 458     "pull", "push",
 459     ""
 460 };
 461
 462 keyword_group directives =
 463 { { "abbreviate", "array", "attribute", "class", "constant",
 464     "default", "dictionary", "end", "endif", "extend", "fake_action",
 465     "global", "ifdef", "ifndef", "ifnot", "ifv3", "ifv5", "iftrue",
 466     "iffalse", "import", "include", "link", "lowstring", "message",
 467     "nearby", "object", "origsource", "property", "release", "replace",
 468     "serial", "switches", "statusline", "stub", "system_file", "trace",
 469     "undef", "verb", "version", "zcharacter",
 470     "" },
 471     DIRECTIVE_TT, FALSE, FALSE
 472 };
 473
 474 keyword_group trace_keywords =
 475 { { "dictionary", "symbols", "objects", "verbs",
 476     "assembly", "expressions", "lines", "tokens", "linker",
 477     "on", "off", "" },
 478     TRACE_KEYWORD_TT, FALSE, TRUE
 479 };
 480
 481 keyword_group segment_markers =
 482 { { "class", "has", "private", "with", "" },
 483     SEGMENT_MARKER_TT, FALSE, TRUE
 484 };
 485
 486 keyword_group directive_keywords =
 487 { { "alias", "long", "additive",
 488     "score", "time",
 489     "noun", "held", "multi", "multiheld", "multiexcept",
 490     "multiinside", "creature", "special", "number", "scope", "topic",
 491     "reverse", "meta", "only", "replace", "first", "last",
 492     "string", "table", "buffer", "data", "initial", "initstr",
 493     "with", "private", "has", "class",
 494     "error", "fatalerror", "warning",
 495     "terminating", "static",
 496     "" },
 497     DIR_KEYWORD_TT, FALSE, TRUE
 498 };
 499
 500 keyword_group misc_keywords =
 501 { { "char", "name", "the", "a", "an", "The", "number",
 502     "roman", "reverse", "bold", "underline", "fixed", "on", "off",
 503     "to", "address", "string", "object", "near", "from", "property", "A", "" },
 504     MISC_KEYWORD_TT, FALSE, TRUE
 505 };
 506
 507 keyword_group statements =
 508 { { "box", "break", "continue", "default", "do", "else", "font", "for",
 509     "give", "if", "inversion", "jump", "move", "new_line", "objectloop",
 510     "print", "print_ret", "quit", "read", "remove", "restore", "return",
 511     "rfalse", "rtrue", "save", "spaces", "string", "style", "switch",
 512     "until", "while", "" },
 513     STATEMENT_TT, FALSE, TRUE
 514 };
 515
 516 keyword_group conditions =
 517 { { "has", "hasnt", "in", "notin", "ofclass", "or", "provides", "" },
 518     CND_TT, FALSE, TRUE
 519 };
 520
 521 keyword_group system_functions =
 522 { { "child", "children", "elder", "eldest", "indirect", "parent", "random",
 523     "sibling", "younger", "youngest", "metaclass", "glk", "" },
 524     SYSFUN_TT, FALSE, TRUE
 525 };
 526
 527 keyword_group system_constants =
 528 { { "adjectives_table", "actions_table", "classes_table",
 529     "identifiers_table", "preactions_table", "version_number",
 530     "largest_object", "strings_offset", "code_offset",
 531     "dict_par1", "dict_par2", "dict_par3", "actual_largest_object",
 532     "static_memory_offset", "array_names_offset", "readable_memory_offset",
 533     "cpv__start", "cpv__end", "ipv__start", "ipv__end",
 534     "array__start", "array__end",
 535     "lowest_attribute_number", "highest_attribute_number",
 536     "attribute_names_array",
 537     "lowest_property_number", "highest_property_number",
 538     "property_names_array",
 539     "lowest_action_number", "highest_action_number",
 540     "action_names_array",
 541     "lowest_fake_action_number", "highest_fake_action_number",
 542     "fake_action_names_array",
 543     "lowest_routine_number", "highest_routine_number", "routines_array",
 544     "routine_names_array", "routine_flags_array",
 545     "lowest_global_number", "highest_global_number", "globals_array",
 546     "global_names_array", "global_flags_array",
 547     "lowest_array_number", "highest_array_number", "arrays_array",
 548     "array_names_array", "array_flags_array",
 549     "lowest_constant_number", "highest_constant_number", "constants_array",
 550     "constant_names_array",
 551     "lowest_class_number", "highest_class_number", "class_objects_array",
 552     "lowest_object_number", "highest_object_number",
 553     "oddeven_packing",
 554     "grammar_table", "dictionary_table", "dynam_string_table",
 555     "" },
 556     SYSTEM_CONSTANT_TT, FALSE, TRUE
 557 };
 558
 559 keyword_group *keyword_groups[12]
 560 = { NULL, &opcode_names, &directives, &trace_keywords, &segment_markers,
 561     &directive_keywords, &misc_keywords, &statements, &conditions,
 562     &system_functions, &system_constants, &opcode_macros};
 563
 564 keyword_group local_variables =
 565 { { "" },                                 /* Filled in when routine declared */
 566     LOCAL_VARIABLE_TT, FALSE, FALSE
 567 };
 568
 569 static int lexical_context(void)
 570 {
 571     /*  The lexical context is a number representing all of the context
 572         information in the lexical analyser: the same input text will
 573         always translate to the same output tokens whenever the context
 574         is the same.
 575
 576         In fact, for efficiency reasons this number omits the bit of
 577         information held in the variable "dont_enter_into_symbol_table".
 578         Inform never needs to backtrack through tokens parsed in that
 579         way (thankfully, as it would be expensive indeed to check
 580         the tokens).                                                         */
 581
 582     int c = 0;
 583     if (opcode_names.enabled)         c |= 1;
 584     if (directives.enabled)           c |= 2;
 585     if (trace_keywords.enabled)       c |= 4;
 586     if (segment_markers.enabled)      c |= 8;
 587     if (directive_keywords.enabled)   c |= 16;
 588     if (misc_keywords.enabled)        c |= 32;
 589     if (statements.enabled)           c |= 64;
 590     if (conditions.enabled)           c |= 128;
 591     if (system_functions.enabled)     c |= 256;
 592     if (system_constants.enabled)     c |= 512;
 593     if (local_variables.enabled)      c |= 1024;
 594
 595     if (return_sp_as_variable)        c |= 2048;
 596     return(c);
 597 }
 598
 599 static void print_context(int c)
 600 {
 601     if ((c & 1) != 0) printf("OPC ");
 602     if ((c & 2) != 0) printf("DIR ");
 603     if ((c & 4) != 0) printf("TK ");
 604     if ((c & 8) != 0) printf("SEG ");
 605     if ((c & 16) != 0) printf("DK ");
 606     if ((c & 32) != 0) printf("MK ");
 607     if ((c & 64) != 0) printf("STA ");
 608     if ((c & 128) != 0) printf("CND ");
 609     if ((c & 256) != 0) printf("SFUN ");
 610     if ((c & 512) != 0) printf("SCON ");
 611     if ((c & 1024) != 0) printf("LV ");
 612     if ((c & 2048) != 0) printf("sp ");
 613 }
 614
 615 static int *keywords_hash_table;
 616 static int *keywords_hash_ends_table;
 617 static int *keywords_data_table;
 618
 619 static int *local_variable_hash_table;
 620 static int *local_variable_hash_codes;
 621 char **local_variable_texts;
 622 static char *local_variable_text_table;
 623
 624 static char one_letter_locals[128];
 625
 626 static void make_keywords_tables(void)
 627 {   int i, j, h, tp=0;
 628     char **oplist, **maclist;
 629
 630     if (!glulx_mode) {
 631         oplist = opcode_list_z;
 632         maclist = opmacro_list_z;
 633     }
 634     else {
 635         oplist = opcode_list_g;
 636         maclist = opmacro_list_g;
 637     }
 638
 639     for (j=0; *(oplist[j]); j++) {
 640         opcode_names.keywords[j] = oplist[j];
 641     }
 642     opcode_names.keywords[j] = "";
 643
 644     for (j=0; *(maclist[j]); j++) {
 645         opcode_macros.keywords[j] = maclist[j];
 646     }
 647     opcode_macros.keywords[j] = "";
 648
 649     for (i=0; i<HASH_TAB_SIZE; i++)
 650     {   keywords_hash_table[i] = -1;
 651         keywords_hash_ends_table[i] = -1;
 652     }
 653
 654     for (i=1; i<=11; i++)
 655     {   keyword_group *kg = keyword_groups[i];
 656         for (j=0; *(kg->keywords[j]) != 0; j++)
 657         {   h = hash_code_from_string(kg->keywords[j]);
 658             if (keywords_hash_table[h] == -1)
 659                 keywords_hash_table[h] = tp;
 660             else
 661               *(keywords_data_table + 3*(keywords_hash_ends_table[h]) + 2) = tp;
 662             keywords_hash_ends_table[h] = tp;
 663             *(keywords_data_table + 3*tp) = i;
 664             *(keywords_data_table + 3*tp+1) = j;
 665             *(keywords_data_table + 3*tp+2) = -1;
 666             tp++;
 667         }
 668     }
 669 }
 670
 671 extern void construct_local_variable_tables(void)
 672 {   int i, h; char *p = local_variable_text_table;
 673     for (i=0; i<HASH_TAB_SIZE; i++) local_variable_hash_table[i] = -1;
 674     for (i=0; i<128; i++) one_letter_locals[i] = MAX_LOCAL_VARIABLES;
 675
 676     for (i=0; i<no_locals; i++)
 677     {   char *q = local_variables.keywords[i];
 678         if (q[1] == 0)
 679         {   one_letter_locals[(uchar)q[0]] = i;
 680             if (isupper(q[0])) one_letter_locals[tolower(q[0])] = i;
 681             if (islower(q[0])) one_letter_locals[toupper(q[0])] = i;
 682         }
 683         h = hash_code_from_string(q);
 684         if (local_variable_hash_table[h] == -1)
 685             local_variable_hash_table[h] = i;
 686         local_variable_hash_codes[i] = h;
 687         local_variable_texts[i] = p;
 688         strcpy(p, q);
 689         p += strlen(p)+1;
 690     }
 691     for (;i<MAX_LOCAL_VARIABLES-1;i++)
 692       local_variable_texts[i] = "<no such local variable>";
 693 }
 694
 695 static void interpret_identifier(int pos, int dirs_only_flag)
 696 {   int index, hashcode; char *p = circle[pos].text;
 697
 698     /*  An identifier is either a keyword or a "symbol", a name which the
 699         lexical analyser leaves to higher levels of Inform to understand.    */
 700
 701     hashcode = hash_code_from_string(p);
 702
 703     if (dirs_only_flag) goto KeywordSearch;
 704
 705     /*  If this is assembly language, perhaps it is "sp"?                    */
 706
 707     if (return_sp_as_variable && (p[0]=='s') && (p[1]=='p') && (p[2]==0))
 708     {   circle[pos].value = 0; circle[pos].type = LOCAL_VARIABLE_TT;
 709         return;
 710     }
 711
 712     /*  Test for local variables first, quite quickly.                       */
 713
 714     if (local_variables.enabled)
 715     {   if (p[1]==0)
 716         {   index = one_letter_locals[(uchar)p[0]];
 717             if (index<MAX_LOCAL_VARIABLES)
 718             {   circle[pos].type = LOCAL_VARIABLE_TT;
 719                 circle[pos].value = index+1;
 720                 return;
 721             }
 722         }
 723         index = local_variable_hash_table[hashcode];
 724         if (index >= 0)
 725         {   for (;index<no_locals;index++)
 726             {   if (hashcode == local_variable_hash_codes[index])
 727                 {   if (strcmpcis(p, local_variable_texts[index])==0)
 728                     {   circle[pos].type = LOCAL_VARIABLE_TT;
 729                         circle[pos].value = index+1;
 730                         return;
 731                     }
 732                 }
 733             }
 734         }
 735     }
 736
 737     /*  Now the bulk of the keywords.  Note that the lexer doesn't recognise
 738         the name of a system function which has been Replaced.               */
 739
 740     KeywordSearch:
 741     index = keywords_hash_table[hashcode];
 742     while (index >= 0)
 743     {   int *i = keywords_data_table + 3*index;
 744         keyword_group *kg = keyword_groups[*i];
 745         if (((!dirs_only_flag) && (kg->enabled))
 746             || (dirs_only_flag && (kg == &directives)))
 747         {   char *q = kg->keywords[*(i+1)];
 748             if (((kg->case_sensitive) && (strcmp(p, q)==0))
 749                 || ((!(kg->case_sensitive)) && (strcmpcis(p, q)==0)))
 750             {   if ((kg != &system_functions)
 751                     || (system_function_usage[*(i+1)]!=2))
 752                 {   circle[pos].type = kg->change_token_type;
 753                     circle[pos].value = *(i+1);
 754                     return;
 755                 }
 756             }
 757         }
 758         index = *(i+2);
 759     }
 760
 761     if (dirs_only_flag) return;
 762
 763     /*  Search for the name; create it if necessary.                         */
 764
 765     circle[pos].value = symbol_index(p, hashcode);
 766     circle[pos].type = SYMBOL_TT;
 767 }
 768
 769
 770 /* ------------------------------------------------------------------------- */
 771 /*   The tokeniser grid aids a rapid decision about the consequences of a    */
 772 /*   character reached in the buffer.  In effect it is an efficiently stored */
 773 /*   transition table using an algorithm similar to that of S. C. Johnson's  */
 774 /*   "yacc" lexical analyser (see Aho, Sethi and Ullman, section 3.9).       */
 775 /*   My thanks to Dilip Sequeira for suggesting this.                        */
 776 /*                                                                           */
 777 /*       tokeniser_grid[c]   is (16*n + m) if c is the first character of    */
 778 /*                               separator numbers n, n+1, ..., n+m-1        */
 779 /*                           or certain special values (QUOTE_CODE, etc)     */
 780 /*                           or 0 otherwise                                  */
 781 /*                                                                           */
 782 /*   Since 1000/16 = 62, the code numbers below will need increasing if the  */
 783 /*   number of separators supported exceeds 61.                              */
 784 /* ------------------------------------------------------------------------- */
 785
 786 static int tokeniser_grid[256];
 787
 788 #define QUOTE_CODE      1000
 789 #define DQUOTE_CODE     1001
 790 #define NULL_CODE       1002
 791 #define SPACE_CODE      1003
 792 #define NEGATIVE_CODE   1004
 793 #define DIGIT_CODE      1005
 794 #define RADIX_CODE      1006
 795 #define KEYWORD_CODE    1007
 796 #define EOF_CODE        1008
 797 #define WHITESPACE_CODE 1009
 798 #define COMMENT_CODE    1010
 799 #define IDENTIFIER_CODE 1011
 800
 801 /*  This list cannot safely be changed without also changing the header
 802     separator #defines.  The ordering is significant in that (i) all entries
 803     beginning with the same character must be adjacent and (ii) that if
 804     X is a an initial substring of Y then X must come before Y.
 805
 806     E.g. --> must occur before -- to prevent "-->0" being tokenised
 807     wrongly as "--", ">", "0" rather than "-->", "0".                        */
 808
 809 static const char separators[NUMBER_SEPARATORS][4] =
 810 {   "->", "-->", "--", "-", "++", "+", "*", "/", "%",
 811     "||", "|", "&&", "&", "~~",
 812     "~=", "~", "==", "=", ">=", ">",
 813     "<=", "<", "(", ")", ",",
 814     ".&", ".#", "..&", "..#", "..", ".",
 815     "::", ":", "@", ";", "[", "]", "{", "}",
 816     "$", "?~", "?",
 817     "#a$", "#g$", "#n$", "#r$", "#w$", "##", "#"
 818 };
 819
 820 static void make_tokeniser_grid(void)
 821 {
 822     /*  Construct the grid to the specification above.                       */
 823
 824     int i, j;
 825
 826     for (i=0; i<256; i++) tokeniser_grid[i]=0;
 827
 828     for (i=0; i<NUMBER_SEPARATORS; i++)
 829     {   j=separators[i][0];
 830         if (tokeniser_grid[j]==0)
 831             tokeniser_grid[j]=i*16+1; else tokeniser_grid[j]++;
 832     }
 833     tokeniser_grid['\''] = QUOTE_CODE;
 834     tokeniser_grid['\"'] = DQUOTE_CODE;
 835     tokeniser_grid[0]    = EOF_CODE;
 836     tokeniser_grid[' ']  = WHITESPACE_CODE;
 837     tokeniser_grid['\n'] = WHITESPACE_CODE;
 838     tokeniser_grid['$']  = RADIX_CODE;
 839     tokeniser_grid['!']  = COMMENT_CODE;
 840
 841     tokeniser_grid['0']  = DIGIT_CODE;
 842     tokeniser_grid['1']  = DIGIT_CODE;
 843     tokeniser_grid['2']  = DIGIT_CODE;
 844     tokeniser_grid['3']  = DIGIT_CODE;
 845     tokeniser_grid['4']  = DIGIT_CODE;
 846     tokeniser_grid['5']  = DIGIT_CODE;
 847     tokeniser_grid['6']  = DIGIT_CODE;
 848     tokeniser_grid['7']  = DIGIT_CODE;
 849     tokeniser_grid['8']  = DIGIT_CODE;
 850     tokeniser_grid['9']  = DIGIT_CODE;
 851
 852     tokeniser_grid['a']  = IDENTIFIER_CODE;
 853     tokeniser_grid['b']  = IDENTIFIER_CODE;
 854     tokeniser_grid['c']  = IDENTIFIER_CODE;
 855     tokeniser_grid['d']  = IDENTIFIER_CODE;
 856     tokeniser_grid['e']  = IDENTIFIER_CODE;
 857     tokeniser_grid['f']  = IDENTIFIER_CODE;
 858     tokeniser_grid['g']  = IDENTIFIER_CODE;
 859     tokeniser_grid['h']  = IDENTIFIER_CODE;
 860     tokeniser_grid['i']  = IDENTIFIER_CODE;
 861     tokeniser_grid['j']  = IDENTIFIER_CODE;
 862     tokeniser_grid['k']  = IDENTIFIER_CODE;
 863     tokeniser_grid['l']  = IDENTIFIER_CODE;
 864     tokeniser_grid['m']  = IDENTIFIER_CODE;
 865     tokeniser_grid['n']  = IDENTIFIER_CODE;
 866     tokeniser_grid['o']  = IDENTIFIER_CODE;
 867     tokeniser_grid['p']  = IDENTIFIER_CODE;
 868     tokeniser_grid['q']  = IDENTIFIER_CODE;
 869     tokeniser_grid['r']  = IDENTIFIER_CODE;
 870     tokeniser_grid['s']  = IDENTIFIER_CODE;
 871     tokeniser_grid['t']  = IDENTIFIER_CODE;
 872     tokeniser_grid['u']  = IDENTIFIER_CODE;
 873     tokeniser_grid['v']  = IDENTIFIER_CODE;
 874     tokeniser_grid['w']  = IDENTIFIER_CODE;
 875     tokeniser_grid['x']  = IDENTIFIER_CODE;
 876     tokeniser_grid['y']  = IDENTIFIER_CODE;
 877     tokeniser_grid['z']  = IDENTIFIER_CODE;
 878
 879     tokeniser_grid['A']  = IDENTIFIER_CODE;
 880     tokeniser_grid['B']  = IDENTIFIER_CODE;
 881     tokeniser_grid['C']  = IDENTIFIER_CODE;
 882     tokeniser_grid['D']  = IDENTIFIER_CODE;
 883     tokeniser_grid['E']  = IDENTIFIER_CODE;
 884     tokeniser_grid['F']  = IDENTIFIER_CODE;
 885     tokeniser_grid['G']  = IDENTIFIER_CODE;
 886     tokeniser_grid['H']  = IDENTIFIER_CODE;
 887     tokeniser_grid['I']  = IDENTIFIER_CODE;
 888     tokeniser_grid['J']  = IDENTIFIER_CODE;
 889     tokeniser_grid['K']  = IDENTIFIER_CODE;
 890     tokeniser_grid['L']  = IDENTIFIER_CODE;
 891     tokeniser_grid['M']  = IDENTIFIER_CODE;
 892     tokeniser_grid['N']  = IDENTIFIER_CODE;
 893     tokeniser_grid['O']  = IDENTIFIER_CODE;
 894     tokeniser_grid['P']  = IDENTIFIER_CODE;
 895     tokeniser_grid['Q']  = IDENTIFIER_CODE;
 896     tokeniser_grid['R']  = IDENTIFIER_CODE;
 897     tokeniser_grid['S']  = IDENTIFIER_CODE;
 898     tokeniser_grid['T']  = IDENTIFIER_CODE;
 899     tokeniser_grid['U']  = IDENTIFIER_CODE;
 900     tokeniser_grid['V']  = IDENTIFIER_CODE;
 901     tokeniser_grid['W']  = IDENTIFIER_CODE;
 902     tokeniser_grid['X']  = IDENTIFIER_CODE;
 903     tokeniser_grid['Y']  = IDENTIFIER_CODE;
 904     tokeniser_grid['Z']  = IDENTIFIER_CODE;
 905
 906     tokeniser_grid['_']  = IDENTIFIER_CODE;
 907 }
 908
 909 /* ------------------------------------------------------------------------- */
 910 /*   Definition of a lexical block: a source file or a string containing     */
 911 /*   text for lexical analysis; an independent source from the point of      */
 912 /*   view of issuing error reports.                                          */
 913 /* ------------------------------------------------------------------------- */
 914
 915 typedef struct LexicalBlock_s
 916 {   char *filename;                              /*  Full translated name    */
 917     int   main_flag;                             /*  TRUE if the main file
 918                                                      (the first one opened)  */
 919     int   sys_flag;                              /*  TRUE if a System_File   */
 920     int   source_line;                           /*  Line number count       */
 921     int   line_start;                            /*  Char number within file
 922                                                      where the current line
 923                                                      starts                  */
 924     int   chars_read;                            /*  Char number of read pos */
 925     int   file_no;                               /*  Or 255 if not from a
 926                                                      file; used for debug
 927                                                      information             */
 928     char *orig_source;                           /* From #origsource direct  */
 929     int orig_file;
 930     int32 orig_line;
 931     int32 orig_char;
 932 } LexicalBlock;
 933
 934 static LexicalBlock NoFileOpen =
 935     {   "<before compilation>",  FALSE, FALSE, 0, 0, 0, 255, NULL, 0, 0, 0 };
 936
 937 static LexicalBlock MakingOutput =
 938     {   "<constructing output>", FALSE, FALSE, 0, 0, 0, 255, NULL, 0, 0, 0 };
 939
 940 static LexicalBlock StringLB =
 941     {   "<veneer routine>",      FALSE, TRUE,  0, 0, 0, 255, NULL, 0, 0, 0 };
 942
 943 static LexicalBlock *CurrentLB;                  /*  The current lexical
 944                                                      block of input text     */
 945
 946 extern void declare_systemfile(void)
 947 {   CurrentLB->sys_flag = TRUE;
 948 }
 949
 950 extern int is_systemfile(void)
 951 {   return ((CurrentLB->sys_flag)?1:0);
 952 }
 953
 954 extern void set_origsource_location(char *source, int32 line, int32 charnum)
 955 {
 956     if (!source) {
 957         /* Clear the Origsource declaration. */
 958         CurrentLB->orig_file = 0;
 959         CurrentLB->orig_source = NULL;
 960         CurrentLB->orig_line = 0;
 961         CurrentLB->orig_char = 0;
 962         return;
 963     }
 964
 965     /* Get the file number for a new or existing InputFiles entry. */
 966     int file_no = register_orig_sourcefile(source);
 967
 968     CurrentLB->orig_file = file_no;
 969     CurrentLB->orig_source = InputFiles[file_no-1].filename;
 970     CurrentLB->orig_line = line;
 971     CurrentLB->orig_char = charnum;
 972 }
 973
 974 /* Error locations. */
 975
 976 extern debug_location get_current_debug_location(void)
 977 {   debug_location result;
 978     /* Assume that all input characters are one byte. */
 979     result.file_index = CurrentLB->file_no;
 980     result.beginning_byte_index = CurrentLB->chars_read - LOOKAHEAD_SIZE;
 981     result.end_byte_index = result.beginning_byte_index;
 982     result.beginning_line_number = CurrentLB->source_line;
 983     result.end_line_number = result.beginning_line_number;
 984     result.beginning_character_number =
 985         CurrentLB->chars_read - CurrentLB->line_start;
 986     result.end_character_number = result.beginning_character_number;
 987     result.orig_file_index = CurrentLB->orig_file;
 988     result.orig_beg_line_number = CurrentLB->orig_line;
 989     result.orig_beg_char_number = CurrentLB->orig_char;
 990     return result;
 991 }
 992
 993 static debug_location ErrorReport_debug_location;
 994
 995 extern void report_errors_at_current_line(void)
 996 {   ErrorReport.line_number = CurrentLB->source_line;
 997     ErrorReport.file_number = CurrentLB->file_no;
 998     if (ErrorReport.file_number == 255)
 999         ErrorReport.file_number = -1;
1000     ErrorReport.source      = CurrentLB->filename;
1001     ErrorReport.main_flag   = CurrentLB->main_flag;
1002     if (debugfile_switch)
1003         ErrorReport_debug_location = get_current_debug_location();
1004     ErrorReport.orig_file = CurrentLB->orig_file;
1005     ErrorReport.orig_source = CurrentLB->orig_source;
1006     ErrorReport.orig_line = CurrentLB->orig_line;
1007     ErrorReport.orig_char = CurrentLB->orig_char;
1008 }
1009
1010 extern debug_location get_error_report_debug_location(void)
1011 {   return ErrorReport_debug_location;
1012 }
1013
1014 extern int32 get_current_line_start(void)
1015 {   return CurrentLB->line_start;
1016 }
1017
1018 brief_location blank_brief_location;
1019
1020 extern brief_location get_brief_location(ErrorPosition *errpos)
1021 {
1022     brief_location loc;
1023     loc.file_index = errpos->file_number;
1024     loc.line_number = errpos->line_number;
1025     loc.orig_file_index = errpos->orig_file;
1026     loc.orig_line_number = errpos->orig_line;
1027     return loc;
1028 }
1029
1030 extern void export_brief_location(brief_location loc, ErrorPosition *errpos)
1031 {
1032     if (loc.file_index != -1)
1033     {   errpos->file_number = loc.file_index;
1034         errpos->line_number = loc.line_number;
1035         errpos->main_flag = (errpos->file_number == 1);
1036         errpos->orig_source = NULL;
1037         errpos->orig_file = loc.orig_file_index;
1038         errpos->orig_line = loc.orig_line_number;
1039         errpos->orig_char = 0;
1040     }
1041 }
1042
1043 /* ------------------------------------------------------------------------- */
1044 /*   Hash printing and line counting                                         */
1045 /* ------------------------------------------------------------------------- */
1046
1047 static void print_hash(void)
1048 {
1049     /*  Hash-printing is the practice of printing a # character every 100
1050         lines of source code (the -x switch), reassuring the user that
1051         progress is being made                                               */
1052
1053     if (no_hash_printed_yet)
1054     {   printf("::"); no_hash_printed_yet = FALSE;
1055     }
1056     printf("#"); hash_printed_since_newline = TRUE;
1057
1058 #ifndef MAC_FACE
1059     /*  On some systems, text output is buffered to a line at a time, and
1060         this would frustrate the point of hash-printing, so:                 */
1061
1062     fflush(stdout);
1063 #endif
1064 }
1065
1066 static void reached_new_line(void)
1067 {
1068     /*  Called to signal that a new line has been reached in the source code */
1069
1070     forerrors_pointer = 0;
1071
1072     CurrentLB->source_line++;
1073     CurrentLB->line_start = CurrentLB->chars_read;
1074
1075     total_source_line_count++;
1076
1077     if (total_source_line_count%100==0)
1078     {   if (hash_switch) print_hash();
1079 #ifdef MAC_MPW
1080         SpinCursor(32);                    /* I.e., allow other tasks to run */
1081 #endif
1082     }
1083
1084 #ifdef MAC_FACE
1085     if (total_source_line_count%((**g_pm_hndl).linespercheck) == 0)
1086     {   ProcessEvents (&g_proc);
1087         if (g_proc != true)
1088         {   free_arrays();
1089             close_all_source();
1090             if (temporary_files_switch)
1091                 remove_temp_files();
1092             if (store_the_text)
1093                 my_free(&all_text,"transcription text");
1094             abort_transcript_file();
1095             longjmp (g_fallback, 1);
1096         }
1097     }
1098 #endif
1099 }
1100
1101 static void new_syntax_line(void)
1102 {   if (source_to_analyse != NULL) forerrors_pointer = 0;
1103     report_errors_at_current_line();
1104 }
1105
1106 /* Return 10 raised to the expo power.
1107  *
1108  * I'm avoiding the standard pow() function for a rather lame reason:
1109  * it's in the libmath (-lm) library, and I don't want to change the
1110  * build model for the compiler. So, this is implemented with a stupid
1111  * lookup table. It's faster than pow() for small values of expo.
1112  * Probably not as fast if expo is 200, but "$+1e200" is an overflow
1113  * anyway, so I don't expect that to be a problem.
1114  *
1115  * (For some reason, frexp() and ldexp(), which are used later on, do
1116  * not require libmath to be linked in.)
1117  */
1118 static double pow10_cheap(int expo)
1119 {
1120     #define POW10_RANGE (8)
1121     static double powers[POW10_RANGE*2+1] = {
1122         0.00000001, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1,
1123         1.0,
1124         10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 10000000.0, 100000000.0
1125     };
1126
1127     double res = 1.0;
1128
1129     if (expo < 0) {
1130         for (; expo < -POW10_RANGE; expo += POW10_RANGE) {
1131             res *= powers[0];
1132         }
1133         return res * powers[POW10_RANGE+expo];
1134     }
1135     else {
1136         for (; expo > POW10_RANGE; expo -= POW10_RANGE) {
1137             res *= powers[POW10_RANGE*2];
1138         }
1139         return res * powers[POW10_RANGE+expo];
1140     }
1141 }
1142
1143 /* Return the IEEE-754 single-precision encoding of a floating-point
1144  * number. See http://www.psc.edu/general/software/packages/ieee/ieee.php
1145  * for an explanation.
1146  *
1147  * The number is provided in the pieces it was parsed in:
1148  *    [+|-] intv "." fracv "e" [+|-]expo
1149  *
1150  * If the magnitude is too large (beyond about 3.4e+38), this returns
1151  * an infinite value (0x7f800000 or 0xff800000). If the magnitude is too
1152  * small (below about 1e-45), this returns a zero value (0x00000000 or
1153  * 0x80000000). If any of the inputs are NaN, this returns NaN (but the
1154  * lexer should never do that).
1155  *
1156  * Note that using a float constant does *not* set the uses_float_features
1157  * flag (which would cause the game file to be labelled 3.1.2). There's
1158  * no VM feature here, just an integer. Of course, any use of the float
1159  * *opcodes* will set the flag.
1160  *
1161  * The math functions in this routine require #including <math.h>, but
1162  * they should not require linking the math library (-lm). At least,
1163  * they do not on OSX and Linux.
1164  */
1165 static int32 construct_float(int signbit, double intv, double fracv, int expo)
1166 {
1167     double absval = (intv + fracv) * pow10_cheap(expo);
1168     int32 sign = (signbit ? 0x80000000 : 0x0);
1169     double mant;
1170     int32 fbits;
1171
1172     if (isinf(absval)) {
1173         return sign | 0x7f800000; /* infinity */
1174     }
1175     if (isnan(absval)) {
1176         return sign | 0x7fc00000;
1177     }
1178
1179     mant = frexp(absval, &expo);
1180
1181     /* Normalize mantissa to be in the range [1.0, 2.0) */
1182     if (0.5 <= mant && mant < 1.0) {
1183         mant *= 2.0;
1184         expo--;
1185     }
1186     else if (mant == 0.0) {
1187         expo = 0;
1188     }
1189     else {
1190         return sign | 0x7f800000; /* infinity */
1191     }
1192
1193     if (expo >= 128) {
1194         return sign | 0x7f800000; /* infinity */
1195     }
1196     else if (expo < -126) {
1197         /* Denormalized (very small) number */
1198         mant = ldexp(mant, 126 + expo);
1199         expo = 0;
1200     }
1201     else if (!(expo == 0 && mant == 0.0)) {
1202         expo += 127;
1203         mant -= 1.0; /* Get rid of leading 1 */
1204     }
1205
1206     mant *= 8388608.0; /* 2^23 */
1207     fbits = (int32)(mant + 0.5); /* round mant to nearest int */
1208     if (fbits >> 23) {
1209         /* The carry propagated out of a string of 23 1 bits. */
1210         fbits = 0;
1211         expo++;
1212         if (expo >= 255) {
1213             return sign | 0x7f800000; /* infinity */
1214         }
1215     }
1216
1217     return (sign) | ((int32)(expo << 23)) | (fbits);
1218 }
1219
1220 /* ------------------------------------------------------------------------- */
1221 /*   Characters are read via a "pipeline" of variables, allowing us to look  */
1222 /*       up to three characters ahead of the current position.               */
1223 /*                                                                           */
1224 /*   There are two possible sources: from the source files being loaded in,  */
1225 /*   and from a string inside Inform (which is where the code for veneer     */
1226 /*   routines comes from).  Each source has its own get-next-character       */
1227 /*   routine.                                                                */
1228 /* ------------------------------------------------------------------------- */
1229 /*   Source 1: from files                                                    */
1230 /*                                                                           */
1231 /*   Note that file_load_chars(p, size) loads "size" bytes into buffer "p"   */
1232 /*   from the current input file.  If the file runs out, then if it was      */
1233 /*   the last source file 4 EOF characters are placed in the buffer: if it   */
1234 /*   was only an Include file ending, then a '\n' character is placed there  */
1235 /*   (essentially to force termination of any comment line) followed by      */
1236 /*   three harmless spaces.                                                  */
1237 /*                                                                           */
1238 /*   The routine returns the number of characters it has written, and note   */
1239 /*   that this conveniently ensures that all characters in the buffer come   */
1240 /*   from the same file.                                                     */
1241 /* ------------------------------------------------------------------------- */
1242
1243 #define SOURCE_BUFFER_SIZE 4096                  /*  Typical disc block size */
1244
1245 typedef struct Sourcefile_s
1246 {   char *buffer;                                /*  Input buffer            */
1247     int   read_pos;                              /*  Read position in buffer */
1248     int   size;                                  /*  Number of meaningful
1249                                                      characters in buffer    */
1250     int   la, la2, la3;                          /*  Three characters of
1251                                                      lookahead pipeline      */
1252     int   file_no;                               /*  Internal file number
1253                                                      (1, 2, 3, ...)          */
1254     LexicalBlock LB;
1255 } Sourcefile;
1256
1257 static Sourcefile *FileStack;
1258 static int File_sp;                              /*  Stack pointer           */
1259
1260 static Sourcefile *CF;                           /*  Top entry on stack      */
1261
1262 static int last_input_file;
1263
1264 static void begin_buffering_file(int i, int file_no)
1265 {   int j, cnt; uchar *p;
1266
1267     if (i >= MAX_INCLUSION_DEPTH)
1268        memoryerror("MAX_INCLUSION_DEPTH",MAX_INCLUSION_DEPTH);
1269
1270     p = (uchar *) FileStack[i].buffer;
1271
1272     if (i>0)
1273     {   FileStack[i-1].la  = lookahead;
1274         FileStack[i-1].la2 = lookahead2;
1275         FileStack[i-1].la3 = lookahead3;
1276     }
1277
1278     FileStack[i].file_no = file_no;
1279     FileStack[i].size = file_load_chars(file_no,
1280         (char *) p, SOURCE_BUFFER_SIZE);
1281     lookahead  = source_to_iso_grid[p[0]];
1282     lookahead2 = source_to_iso_grid[p[1]];
1283     lookahead3 = source_to_iso_grid[p[2]];
1284     if (LOOKAHEAD_SIZE != 3)
1285         compiler_error
1286             ("Lexer lookahead size does not match hard-coded lookahead code");
1287     FileStack[i].read_pos = LOOKAHEAD_SIZE;
1288
1289     if (file_no==1) FileStack[i].LB.main_flag = TRUE;
1290                else FileStack[i].LB.main_flag = FALSE;
1291     FileStack[i].LB.sys_flag = FALSE;
1292     FileStack[i].LB.source_line = 1;
1293     FileStack[i].LB.line_start = LOOKAHEAD_SIZE;
1294     FileStack[i].LB.chars_read = LOOKAHEAD_SIZE;
1295     FileStack[i].LB.filename = InputFiles[file_no-1].filename;
1296     FileStack[i].LB.file_no = file_no;
1297     FileStack[i].LB.orig_source = NULL; FileStack[i].LB.orig_file = 0;
1298     FileStack[i].LB.orig_line = 0; FileStack[i].LB.orig_char = 0;
1299
1300     CurrentLB = &(FileStack[i].LB);
1301     CF = &(FileStack[i]);
1302
1303     /* Check for recursive inclusion */
1304     cnt = 0;
1305     for (j=0; j<i; j++)
1306     {   if (!strcmp(FileStack[i].LB.filename, FileStack[j].LB.filename))
1307             cnt++;
1308     }
1309     if (cnt==1)
1310         warning_named("File included more than once",
1311             FileStack[j].LB.filename);
1312 }
1313
1314 static void create_char_pipeline(void)
1315 {
1316     File_sp = 0;
1317     begin_buffering_file(File_sp++, 1);
1318     pipeline_made = TRUE;
1319     last_input_file = current_input_file;
1320 }
1321
1322 static int get_next_char_from_pipeline(void)
1323 {   uchar *p;
1324
1325     while (last_input_file < current_input_file)
1326     {
1327         /*  An "Include" file must have opened since the last character
1328             was read. Perhaps more than one. We run forward through the
1329             list and add them to the include stack. But we ignore
1330             "Origsource" files (which were never actually opened for
1331             reading). */
1332
1333         last_input_file++;
1334         if (!InputFiles[last_input_file-1].is_input)
1335             continue;
1336
1337         begin_buffering_file(File_sp++, last_input_file);
1338     }
1339     if (last_input_file != current_input_file)
1340         compiler_error("last_input_file did not match after Include");
1341
1342     if (File_sp == 0)
1343     {   lookahead  = 0; lookahead2 = 0; lookahead3 = 0; return 0;
1344     }
1345
1346     if (CF->read_pos == CF->size)
1347     {   CF->size =
1348             file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
1349         CF->read_pos = 0;
1350     }
1351     else
1352     if (CF->read_pos == -(CF->size))
1353     {   set_token_location(get_current_debug_location());
1354         File_sp--;
1355         if (File_sp == 0)
1356         {   lookahead  = 0; lookahead2 = 0; lookahead3 = 0; return 0;
1357         }
1358         CF = &(FileStack[File_sp-1]);
1359         CurrentLB = &(FileStack[File_sp-1].LB);
1360         lookahead  = CF->la; lookahead2 = CF->la2; lookahead3 = CF->la3;
1361         if (CF->read_pos == CF->size)
1362         {   CF->size =
1363                 file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
1364             CF->read_pos = 0;
1365         }
1366         set_token_location(get_current_debug_location());
1367     }
1368
1369     p = (uchar *) (CF->buffer);
1370
1371     current = lookahead;
1372     lookahead = lookahead2;
1373     lookahead2 = lookahead3;
1374     lookahead3 = source_to_iso_grid[p[CF->read_pos++]];
1375
1376     CurrentLB->chars_read++;
1377     if (forerrors_pointer < 511)
1378         forerrors_buff[forerrors_pointer++] = current;
1379     if (current == '\n') reached_new_line();
1380     return(current);
1381 }
1382
1383 /* ------------------------------------------------------------------------- */
1384 /*   Source 2: from a string                                                 */
1385 /* ------------------------------------------------------------------------- */
1386
1387 static int source_to_analyse_pointer;            /*  Current read position   */
1388
1389 static int get_next_char_from_string(void)
1390 {   uchar *p = (uchar *) source_to_analyse + source_to_analyse_pointer++;
1391     current = source_to_iso_grid[p[0]];
1392
1393     if (current == 0)    lookahead  = 0;
1394                     else lookahead  = source_to_iso_grid[p[1]];
1395     if (lookahead == 0)  lookahead2 = 0;
1396                     else lookahead2 = source_to_iso_grid[p[2]];
1397     if (lookahead2 == 0) lookahead3 = 0;
1398                     else lookahead3 = source_to_iso_grid[p[3]];
1399
1400     CurrentLB->chars_read++;
1401     if (forerrors_pointer < 511)
1402         forerrors_buff[forerrors_pointer++] = current;
1403     if (current == '\n') reached_new_line();
1404     return(current);
1405 }
1406
1407 /* ========================================================================= */
1408 /*   The interface between the lexer and Inform's higher levels:             */
1409 /*                                                                           */
1410 /*       put_token_back()            (effectively) move the read position    */
1411 /*                                       back by one token                   */
1412 /*                                                                           */
1413 /*       get_next_token()            copy the token at the current read      */
1414 /*                                       position into the triple            */
1415 /*                                   (token_type, token_value, token_text)   */
1416 /*                                       and move the read position forward  */
1417 /*                                       by one                              */
1418 /*                                                                           */
1419 /*       restart_lexer(source, name) if source is NULL, initialise the lexer */
1420 /*                                       to read from source files;          */
1421 /*                                   otherwise, to read from this string.    */
1422 /* ------------------------------------------------------------------------- */
1423
1424 extern void put_token_back(void)
1425 {   tokens_put_back++;
1426
1427     if (tokens_trace_level > 0)
1428     {   if (tokens_trace_level == 1) printf("<- ");
1429         else printf("<-\n");
1430     }
1431
1432     /*  The following error, of course, should never happen!                 */
1433
1434     if (tokens_put_back == CIRCLE_SIZE)
1435     {   compiler_error("The lexical analyser has collapsed because of a wrong \
1436 assumption inside Inform");
1437         tokens_put_back--;
1438         return;
1439     }
1440 }
1441
1442 extern void get_next_token(void)
1443 {   int d, i, j, k, quoted_size, e, radix, context; int32 n; char *r;
1444     int returning_a_put_back_token = TRUE;
1445
1446     context = lexical_context();
1447
1448     if (tokens_put_back > 0)
1449     {   i = circle_position - tokens_put_back + 1;
1450         if (i<0) i += CIRCLE_SIZE;
1451         tokens_put_back--;
1452         if (context != token_contexts[i])
1453         {   j = circle[i].type;
1454             if ((j==0) || ((j>=100) && (j<200)))
1455                 interpret_identifier(i, FALSE);
1456         }
1457         goto ReturnBack;
1458     }
1459     returning_a_put_back_token = FALSE;
1460
1461     if (circle_position == CIRCLE_SIZE-1) circle_position = 0;
1462     else circle_position++;
1463
1464     if (lex_p > lexeme_memory + 4*MAX_QTEXT_SIZE)
1465         lex_p = lexeme_memory;
1466
1467     circle[circle_position].text = lex_p;
1468     circle[circle_position].value = 0;
1469     *lex_p = 0;
1470
1471     StartTokenAgain:
1472     d = (*get_next_char)();
1473     e = tokeniser_grid[d];
1474
1475     if (next_token_begins_syntax_line)
1476     {   if ((e != WHITESPACE_CODE) && (e != COMMENT_CODE))
1477         {   new_syntax_line();
1478             next_token_begins_syntax_line = FALSE;
1479         }
1480     }
1481
1482     circle[circle_position].location = get_current_debug_location();
1483
1484     switch(e)
1485     {   case 0: char_error("Illegal character found in source:", d);
1486             goto StartTokenAgain;
1487
1488         case WHITESPACE_CODE:
1489             while (tokeniser_grid[lookahead] == WHITESPACE_CODE)
1490                 (*get_next_char)();
1491             goto StartTokenAgain;
1492
1493         case COMMENT_CODE:
1494             while ((lookahead != '\n') && (lookahead != 0))
1495                 (*get_next_char)();
1496             goto StartTokenAgain;
1497
1498         case EOF_CODE:
1499             circle[circle_position].type = EOF_TT;
1500             strcpy(lex_p, "<end of file>");
1501             lex_p += strlen(lex_p) + 1;
1502             break;
1503
1504         case DIGIT_CODE:
1505             radix = 10;
1506             ReturnNumber:
1507             n=0;
1508             do
1509             {   n = n*radix + character_digit_value[d];
1510                 *lex_p++ = d;
1511             } while ((character_digit_value[lookahead] < radix)
1512                      && (d = (*get_next_char)(), TRUE));
1513
1514             *lex_p++ = 0;
1515             circle[circle_position].type = NUMBER_TT;
1516             circle[circle_position].value = n;
1517             break;
1518
1519             FloatNumber:
1520             {   int expo=0; double intv=0, fracv=0;
1521                 int expocount=0, intcount=0, fraccount=0;
1522                 int signbit = (d == '-');
1523                 *lex_p++ = d;
1524                 while (character_digit_value[lookahead] < 10) {
1525                     intv = 10.0*intv + character_digit_value[lookahead];
1526                     intcount++;
1527                     *lex_p++ = lookahead;
1528                     (*get_next_char)();
1529                 }
1530                 if (lookahead == '.') {
1531                     double fracpow = 1.0;
1532                     *lex_p++ = lookahead;
1533                     (*get_next_char)();
1534                     while (character_digit_value[lookahead] < 10) {
1535                         fracpow *= 0.1;
1536                         fracv = fracv + fracpow*character_digit_value[lookahead];
1537                         fraccount++;
1538                         *lex_p++ = lookahead;
1539                         (*get_next_char)();
1540                     }
1541                 }
1542                 if (lookahead == 'e' || lookahead == 'E') {
1543                     int exposign = 0;
1544                     *lex_p++ = lookahead;
1545                     (*get_next_char)();
1546                     if (lookahead == '+' || lookahead == '-') {
1547                         exposign = (lookahead == '-');
1548                         *lex_p++ = lookahead;
1549                         (*get_next_char)();
1550                     }
1551                     while (character_digit_value[lookahead] < 10) {
1552                         expo = 10*expo + character_digit_value[lookahead];
1553                         expocount++;
1554                         *lex_p++ = lookahead;
1555                         (*get_next_char)();
1556                     }
1557                     if (expocount == 0)
1558                         error("Floating-point literal must have digits after the 'e'");
1559                     if (exposign) { expo = -expo; }
1560                 }
1561                 if (intcount + fraccount == 0)
1562                     error("Floating-point literal must have digits");
1563                 n = construct_float(signbit, intv, fracv, expo);
1564             }
1565             *lex_p++ = 0;
1566             circle[circle_position].type = NUMBER_TT;
1567             circle[circle_position].value = n;
1568             if (!glulx_mode && dont_enter_into_symbol_table != -2) error("Floating-point literals are not available in Z-code");
1569             break;
1570
1571         case RADIX_CODE:
1572             radix = 16; d = (*get_next_char)();
1573             if (d == '-' || d == '+') { goto FloatNumber; }
1574             if (d == '$') { d = (*get_next_char)(); radix = 2; }
1575             if (character_digit_value[d] >= radix)
1576             {   if (radix == 2)
1577                     error("Binary number expected after '$$'");
1578                 else
1579                     error("Hexadecimal number expected after '$'");
1580             }
1581             goto ReturnNumber;
1582
1583         case QUOTE_CODE:     /* Single-quotes: scan a literal string */
1584             quoted_size=0;
1585             do
1586             {   e = d; d = (*get_next_char)(); *lex_p++ = d;
1587                 if (quoted_size++==64)
1588                 {   error(
1589                     "Too much text for one pair of quotations '...' to hold");
1590                     *lex_p='\''; break;
1591                 }
1592                 if ((d == '\'') && (e != '@'))
1593                 {   if (quoted_size == 1)
1594                     {   d = (*get_next_char)(); *lex_p++ = d;
1595                         if (d != '\'')
1596                             error("No text between quotation marks ''");
1597                     }
1598                     break;
1599                 }
1600             } while (d != EOF);
1601             if (d==EOF) ebf_error("'\''", "end of file");
1602             *(lex_p-1) = 0;
1603             circle[circle_position].type = SQ_TT;
1604             break;
1605
1606         case DQUOTE_CODE:    /* Double-quotes: scan a literal string */
1607             quoted_size=0;
1608             do
1609             {   d = (*get_next_char)(); *lex_p++ = d;
1610                 if (quoted_size++==MAX_QTEXT_SIZE)
1611                 {   memoryerror("MAX_QTEXT_SIZE", MAX_QTEXT_SIZE);
1612                     break;
1613                 }
1614                 if (d == '\n')
1615                 {   lex_p--;
1616                     while (*(lex_p-1) == ' ') lex_p--;
1617                     if (*(lex_p-1) != '^') *lex_p++ = ' ';
1618                     while ((lookahead != EOF) &&
1619                           (tokeniser_grid[lookahead] == WHITESPACE_CODE))
1620                     (*get_next_char)();
1621                 }
1622                 else if (d == '\\')
1623                 {   int newline_passed = FALSE;
1624                     lex_p--;
1625                     while ((lookahead != EOF) &&
1626                           (tokeniser_grid[lookahead] == WHITESPACE_CODE))
1627                         if ((d = (*get_next_char)()) == '\n')
1628                             newline_passed = TRUE;
1629                     if (!newline_passed)
1630                     {   char chb[4];
1631                         chb[0] = '\"'; chb[1] = lookahead;
1632                         chb[2] = '\"'; chb[3] = 0;
1633                         ebf_error("empty rest of line after '\\' in string",
1634                             chb);
1635                     }
1636                 }
1637             }   while ((d != EOF) && (d!='\"'));
1638             if (d==EOF) ebf_error("'\"'", "end of file");
1639             *(lex_p-1) = 0;
1640             circle[circle_position].type = DQ_TT;
1641             break;
1642
1643         case IDENTIFIER_CODE:    /* Letter or underscore: an identifier */
1644
1645             *lex_p++ = d; n=1;
1646             while ((n<=MAX_IDENTIFIER_LENGTH)
1647                    && ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1648                    || (tokeniser_grid[lookahead] == DIGIT_CODE)))
1649                 n++, *lex_p++ = (*get_next_char)();
1650
1651             *lex_p++ = 0;
1652
1653             if (n > MAX_IDENTIFIER_LENGTH)
1654             {   char bad_length[100];
1655                 sprintf(bad_length,
1656                     "Name exceeds the maximum length of %d characters:",
1657                          MAX_IDENTIFIER_LENGTH);
1658                 error_named(bad_length, circle[circle_position].text);
1659                 /* Trim token so that it doesn't violate
1660                    MAX_IDENTIFIER_LENGTH during error recovery */
1661                 circle[circle_position].text[MAX_IDENTIFIER_LENGTH] = 0;
1662             }
1663
1664             if (dont_enter_into_symbol_table)
1665             {   circle[circle_position].type = DQ_TT;
1666                 circle[circle_position].value = 0;
1667                 if (dont_enter_into_symbol_table == -2)
1668                     interpret_identifier(circle_position, TRUE);
1669                 break;
1670             }
1671
1672             interpret_identifier(circle_position, FALSE);
1673             break;
1674
1675         default:
1676
1677             /*  The character is initial to at least one of the separators  */
1678
1679             for (j=e>>4, k=j+(e&0x0f); j<k; j++)
1680             {   r = (char *) separators[j];
1681                 if (r[1]==0)
1682                 {   *lex_p++=d; *lex_p++=0;
1683                     goto SeparatorMatched;
1684                 }
1685                 else
1686                 if (r[2]==0)
1687                 {   if (*(r+1) == lookahead)
1688                     {   *lex_p++=d;
1689                         *lex_p++=(*get_next_char)();
1690                         *lex_p++=0;
1691                         goto SeparatorMatched;
1692                     }
1693                 }
1694                 else
1695                 {   if ((*(r+1) == lookahead) && (*(r+2) == lookahead2))
1696                     {   *lex_p++=d;
1697                         *lex_p++=(*get_next_char)();
1698                         *lex_p++=(*get_next_char)();
1699                         *lex_p++=0;
1700                         goto SeparatorMatched;
1701                     }
1702                 }
1703             }
1704
1705             /*  The following contingency never in fact arises with the
1706                 current set of separators, but might in future  */
1707
1708             *lex_p++ = d; *lex_p++ = lookahead; *lex_p++ = lookahead2;
1709             *lex_p++ = 0;
1710             error_named("Unrecognised combination in source:", lex_p);
1711             goto StartTokenAgain;
1712
1713             SeparatorMatched:
1714
1715             circle[circle_position].type = SEP_TT;
1716             circle[circle_position].value = j;
1717             switch(j)
1718             {   case SEMICOLON_SEP: break;
1719                 case HASHNDOLLAR_SEP:
1720                 case HASHWDOLLAR_SEP:
1721                     if (tokeniser_grid[lookahead] == WHITESPACE_CODE)
1722                     {   error_named("Character expected after",
1723                             circle[circle_position].text);
1724                         break;
1725                     }
1726                     lex_p--;
1727                     *lex_p++ = (*get_next_char)();
1728                     while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1729                            || (tokeniser_grid[lookahead] == DIGIT_CODE))
1730                         *lex_p++ = (*get_next_char)();
1731                     *lex_p++ = 0;
1732                     break;
1733                 case HASHADOLLAR_SEP:
1734                 case HASHGDOLLAR_SEP:
1735                 case HASHRDOLLAR_SEP:
1736                 case HASHHASH_SEP:
1737                     if (tokeniser_grid[lookahead] != IDENTIFIER_CODE)
1738                     {   error_named("Alphabetic character expected after",
1739                             circle[circle_position].text);
1740                         break;
1741                     }
1742                     lex_p--;
1743                     while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1744                            || (tokeniser_grid[lookahead] == DIGIT_CODE))
1745                         *lex_p++ = (*get_next_char)();
1746                     *lex_p++ = 0;
1747                     break;
1748             }
1749             break;
1750     }
1751
1752     i = circle_position;
1753
1754     ReturnBack:
1755     token_value = circle[i].value;
1756     token_type = circle[i].type;
1757     token_text = circle[i].text;
1758     if (!returning_a_put_back_token)
1759     {   set_token_location(circle[i].location);
1760     }
1761     token_contexts[i] = context;
1762
1763     if (tokens_trace_level > 0)
1764     {   if (tokens_trace_level == 1)
1765             printf("'%s' ", circle[i].text);
1766         else
1767         {   printf("-> "); describe_token(circle[i]);
1768             printf(" ");
1769             if (tokens_trace_level > 2) print_context(token_contexts[i]);
1770             printf("\n");
1771         }
1772     }
1773 }
1774
1775 static char veneer_error_title[64];
1776
1777 extern void restart_lexer(char *lexical_source, char *name)
1778 {   int i;
1779     circle_position = 0;
1780     for (i=0; i<CIRCLE_SIZE; i++)
1781     {   circle[i].type = 0;
1782         circle[i].value = 0;
1783         circle[i].text = "(if this is ever visible, there is a bug)";
1784         token_contexts[i] = 0;
1785     }
1786
1787     lex_p = lexeme_memory;
1788     tokens_put_back = 0;
1789     forerrors_pointer = 0;
1790     dont_enter_into_symbol_table = FALSE;
1791     return_sp_as_variable = FALSE;
1792     next_token_begins_syntax_line = TRUE;
1793
1794     source_to_analyse = lexical_source;
1795
1796     if (source_to_analyse == NULL)
1797     {   get_next_char = get_next_char_from_pipeline;
1798         if (!pipeline_made) create_char_pipeline();
1799         forerrors_buff[0] = 0; forerrors_pointer = 0;
1800     }
1801     else
1802     {   get_next_char = get_next_char_from_string;
1803         source_to_analyse_pointer = 0;
1804         CurrentLB = &StringLB;
1805         sprintf(veneer_error_title, "<veneer routine '%s'>", name);
1806         StringLB.filename = veneer_error_title;
1807
1808         CurrentLB->source_line = 1;
1809         CurrentLB->line_start  = 0;
1810         CurrentLB->chars_read  = 0;
1811     }
1812 }
1813
1814 /* ========================================================================= */
1815 /*   Data structure management routines                                      */
1816 /* ------------------------------------------------------------------------- */
1817
1818 extern void init_lexer_vars(void)
1819 {
1820     blank_brief_location.file_index = -1;
1821     blank_brief_location.line_number = 0;
1822     blank_brief_location.orig_file_index = 0;
1823     blank_brief_location.orig_line_number = 0;
1824 }
1825
1826 extern void lexer_begin_prepass(void)
1827 {   total_source_line_count = 0;
1828     CurrentLB = &NoFileOpen;
1829     report_errors_at_current_line();
1830 }
1831
1832 extern void lexer_begin_pass(void)
1833 {   no_hash_printed_yet = TRUE;
1834     hash_printed_since_newline = FALSE;
1835
1836     pipeline_made = FALSE;
1837
1838     restart_lexer(NULL, NULL);
1839 }
1840
1841 extern void lexer_endpass(void)
1842 {   CurrentLB = &MakingOutput;
1843     report_errors_at_current_line();
1844 }
1845
1846 extern void lexer_allocate_arrays(void)
1847 {   int i;
1848
1849     FileStack = my_malloc(MAX_INCLUSION_DEPTH*sizeof(Sourcefile),
1850         "filestack buffer");
1851
1852     for (i=0; i<MAX_INCLUSION_DEPTH; i++)
1853     FileStack[i].buffer = my_malloc(SOURCE_BUFFER_SIZE+4, "source file buffer");
1854
1855     lexeme_memory = my_malloc(5*MAX_QTEXT_SIZE, "lexeme memory");
1856
1857     keywords_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1858         "keyword hash table");
1859     keywords_hash_ends_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1860         "keyword hash end table");
1861     keywords_data_table = my_calloc(sizeof(int), 3*MAX_KEYWORDS,
1862         "keyword hashing linked list");
1863     local_variable_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1864         "local variable hash table");
1865     local_variable_text_table = my_malloc(
1866         (MAX_LOCAL_VARIABLES-1)*(MAX_IDENTIFIER_LENGTH+1),
1867         "text of local variable names");
1868
1869     local_variable_hash_codes = my_calloc(sizeof(int), MAX_LOCAL_VARIABLES,
1870         "local variable hash codes");
1871     local_variable_texts = my_calloc(sizeof(char *), MAX_LOCAL_VARIABLES,
1872         "local variable text pointers");
1873
1874     make_tokeniser_grid();
1875     make_keywords_tables();
1876
1877     first_token_locations =
1878         my_malloc(sizeof(debug_locations), "debug locations of recent tokens");
1879     first_token_locations->location.file_index = 0;
1880     first_token_locations->location.beginning_byte_index = 0;
1881     first_token_locations->location.end_byte_index = 0;
1882     first_token_locations->location.beginning_line_number = 0;
1883     first_token_locations->location.end_line_number = 0;
1884     first_token_locations->location.beginning_character_number = 0;
1885     first_token_locations->location.end_character_number = 0;
1886     first_token_locations->location.orig_file_index = 0;
1887     first_token_locations->location.orig_beg_line_number = 0;
1888     first_token_locations->location.orig_beg_char_number = 0;
1889     first_token_locations->next = NULL;
1890     first_token_locations->reference_count = 0;
1891     last_token_location = first_token_locations;
1892 }
1893
1894 extern void lexer_free_arrays(void)
1895 {   int i; char *p;
1896
1897     for (i=0; i<MAX_INCLUSION_DEPTH; i++)
1898     {   p = FileStack[i].buffer;
1899         my_free(&p, "source file buffer");
1900     }
1901     my_free(&FileStack, "filestack buffer");
1902     my_free(&lexeme_memory, "lexeme memory");
1903
1904     my_free(&keywords_hash_table, "keyword hash table");
1905     my_free(&keywords_hash_ends_table, "keyword hash end table");
1906     my_free(&keywords_data_table, "keyword hashing linked list");
1907     my_free(&local_variable_hash_table, "local variable hash table");
1908     my_free(&local_variable_text_table, "text of local variable names");
1909
1910     my_free(&local_variable_hash_codes, "local variable hash codes");
1911     my_free(&local_variable_texts, "local variable text pointers");
1912
1913     cleanup_token_locations(NULL);
1914 }
1915
1916 /* ========================================================================= */