lexer.c

   1 /* ------------------------------------------------------------------------- */
   2 /*   "lexer" : Lexical analyser                                              */
   3 /*                                                                           */
   4 /* Copyright (c) Graham Nelson 1993 - 2016                                   */
   5 /*                                                                           */
   6 /* This file is part of Inform.                                              */
   7 /*                                                                           */
   8 /* Inform is free software: you can redistribute it and/or modify            */
   9 /* it under the terms of the GNU General Public License as published by      */
  10 /* the Free Software Foundation, either version 3 of the License, or         */
  11 /* (at your option) any later version.                                       */
  12 /*                                                                           */
  13 /* Inform is distributed in the hope that it will be useful,                 */
  14 /* but WITHOUT ANY WARRANTY; without even the implied warranty of            */
  15 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the              */
  16 /* GNU General Public License for more details.                              */
  17 /*                                                                           */
  18 /* You should have received a copy of the GNU General Public License         */
  19 /* along with Inform. If not, see https://gnu.org/licenses/                  */
  20 /*                                                                           */
  21 /* ------------------------------------------------------------------------- */
  22
  23 #include "header.h"
  24
  25 int total_source_line_count,            /* Number of source lines so far     */
  26
  27     no_hash_printed_yet,                /* Have not yet printed the first #  */
  28     hash_printed_since_newline,         /* A hash has been printed since the
  29                                            most recent new-line was printed
  30                                            (generally as a result of an error
  31                                            message or the start of pass)     */
  32     dont_enter_into_symbol_table,       /* Return names as text (with
  33                                            token type DQ_TT, i.e., as if
  34                                            they had double-quotes around)
  35                                            and not as entries in the symbol
  36                                            table, when TRUE. If -2, only the
  37                                            keyword table is searched.        */
  38     return_sp_as_variable;              /* When TRUE, the word "sp" denotes
  39                                            the stack pointer variable
  40                                            (used in assembly language only)  */
  41 int next_token_begins_syntax_line;      /* When TRUE, start a new syntax
  42                                            line (for error reporting, etc.)
  43                                            on the source code line where
  44                                            the next token appears            */
  45
  46 int32 last_mapped_line;  /* Last syntax line reported to debugging file      */
  47
  48 /* ------------------------------------------------------------------------- */
  49 /*   The lexer's output is a sequence of triples, each called a "token",     */
  50 /*   representing one lexical unit (or "lexeme") each.  Instead of providing */
  51 /*   "lookahead" (that is, always having available the next token after the  */
  52 /*   current one, so that syntax analysers higher up in Inform can have      */
  53 /*   advance knowledge of what is coming), the lexer instead has a system    */
  54 /*   where tokens can be read in and then "put back again".                  */
  55 /*   The meaning of the number (and to some extent the text) supplied with   */
  56 /*   a token depends on its type: see "header.h" for the list of types.      */
  57 /*   For example, the lexeme "$1e3" is understood by Inform as a hexadecimal */
  58 /*   number, and translated to the token:                                    */
  59 /*     type NUMBER_TT, value 483, text "$1e3"                                */
  60 /* ------------------------------------------------------------------------- */
  61 /*   These three variables are set to the current token on a call to         */
  62 /*   get_next_token() (but are not changed by a call to put_token_back()).   */
  63 /* ------------------------------------------------------------------------- */
  64
  65 int token_type;
  66 int32 token_value;
  67 char *token_text;
  68
  69 /* ------------------------------------------------------------------------- */
  70 /*   The next two variables are the head and tail of a singly linked list.   */
  71 /*   The tail stores the portion most recently read from the current         */
  72 /*   lexical block; its end values therefore describe the location of the    */
  73 /*   current token, and are updated whenever the three variables above are   */
  74 /*   via set_token_location(...).  Earlier vertices, if any, represent the   */
  75 /*   regions of lexical blocks read beforehand, where new vertices are       */
  76 /*   only introduced by interruptions like a file inclusion or an EOF.       */
  77 /*   Vertices are deleted off of the front of the list once they are no      */
  78 /*   longer referenced by pending debug information records.                 */
  79 /* ------------------------------------------------------------------------- */
  80
  81 static debug_locations *first_token_locations;
  82 static debug_locations *last_token_location;
  83
  84 extern debug_location get_token_location(void)
  85 {   debug_location result;
  86     debug_location *location = &(last_token_location->location);
  87     result.file_index = location->file_index;
  88     result.beginning_byte_index = location->end_byte_index;
  89     result.end_byte_index = location->end_byte_index;
  90     result.beginning_line_number = location->end_line_number;
  91     result.end_line_number = location->end_line_number;
  92     result.beginning_character_number = location->end_character_number;
  93     result.end_character_number = location->end_character_number;
  94     result.orig_file_index = location->orig_file_index;
  95     result.orig_beg_line_number = location->orig_beg_line_number;
  96     result.orig_beg_char_number = location->orig_beg_char_number;
  97     return result;
  98 }
  99
 100 extern debug_locations get_token_locations(void)
 101 {   debug_locations result;
 102     result.location = get_token_location();
 103     result.next = NULL;
 104     result.reference_count = 0;
 105     return result;
 106 }
 107
 108 static void set_token_location(debug_location location)
 109 {   if (location.file_index == last_token_location->location.file_index)
 110     {   last_token_location->location.end_byte_index =
 111             location.end_byte_index;
 112         last_token_location->location.end_line_number =
 113             location.end_line_number;
 114         last_token_location->location.end_character_number =
 115             location.end_character_number;
 116         last_token_location->location.orig_file_index =
 117             location.orig_file_index;
 118         last_token_location->location.orig_beg_line_number =
 119             location.orig_beg_line_number;
 120         last_token_location->location.orig_beg_char_number =
 121             location.orig_beg_char_number;
 122     } else
 123     {   debug_locations*successor =
 124             my_malloc
 125                 (sizeof(debug_locations),
 126                  "debug locations of recent tokens");
 127         successor->location = location;
 128         successor->next = NULL;
 129         successor->reference_count = 0;
 130         last_token_location->next = successor;
 131         last_token_location = successor;
 132     }
 133 }
 134
 135 extern debug_location_beginning get_token_location_beginning(void)
 136 {   debug_location_beginning result;
 137     ++(last_token_location->reference_count);
 138     result.head = last_token_location;
 139     result.beginning_byte_index =
 140         last_token_location->location.end_byte_index;
 141     result.beginning_line_number =
 142         last_token_location->location.end_line_number;
 143     result.beginning_character_number =
 144         last_token_location->location.end_character_number;
 145     result.orig_file_index = last_token_location->location.orig_file_index;
 146     result.orig_beg_line_number = last_token_location->location.orig_beg_line_number;
 147     result.orig_beg_char_number = last_token_location->location.orig_beg_char_number;
 148
 149     return result;
 150 }
 151
 152 static void cleanup_token_locations(debug_location_beginning*beginning)
 153 {   if (first_token_locations)
 154     {   while (first_token_locations &&
 155                !first_token_locations->reference_count)
 156         {   debug_locations*moribund = first_token_locations;
 157             first_token_locations = moribund->next;
 158             my_free(&moribund, "debug locations of recent tokens");
 159             if (beginning &&
 160                 (beginning->head == moribund || !first_token_locations))
 161             {   compiler_error
 162                     ("Records needed by a debug_location_beginning are no "
 163                      "longer allocated, perhaps because of an invalid reuse "
 164                      "of this or an earlier beginning");
 165             }
 166         }
 167     } else
 168     {   if (beginning)
 169         {   compiler_error
 170                 ("Attempt to use a debug_location_beginning when no token "
 171                  "locations are defined");
 172         } else
 173         {   compiler_error
 174                 ("Attempt to clean up token locations when no token locations "
 175                  "are defined");
 176         }
 177     }
 178 }
 179
 180 extern void discard_token_location(debug_location_beginning beginning)
 181 {   --(beginning.head->reference_count);
 182 }
 183
 184 extern debug_locations get_token_location_end
 185     (debug_location_beginning beginning)
 186 {   debug_locations result;
 187     cleanup_token_locations(&beginning);
 188     --(beginning.head->reference_count);
 189     /* Sometimes we know what we'll read before we switch to the lexical block
 190        where we'll read it.  In such cases the beginning will be placed in the
 191        prior block and last exactly zero bytes there.  It's misleading to
 192        include such ranges, so we gobble them. */
 193     if (beginning.head->location.end_byte_index ==
 194           beginning.beginning_byte_index &&
 195         beginning.head->next)
 196     {   beginning.head = beginning.head->next;
 197         result.location = beginning.head->location;
 198         result.location.beginning_byte_index = 0;
 199         result.location.beginning_line_number = 1;
 200         result.location.beginning_character_number = 1;
 201     } else
 202     {   result.location = beginning.head->location;
 203         result.location.beginning_byte_index =
 204             beginning.beginning_byte_index;
 205         result.location.beginning_line_number =
 206             beginning.beginning_line_number;
 207         result.location.beginning_character_number =
 208             beginning.beginning_character_number;
 209     }
 210
 211     result.location.orig_file_index =
 212         beginning.orig_file_index;
 213     result.location.orig_beg_line_number =
 214         beginning.orig_beg_line_number;
 215     result.location.orig_beg_char_number =
 216         beginning.orig_beg_char_number;
 217
 218     result.next = beginning.head->next;
 219     result.reference_count = 0;
 220     return result;
 221 }
 222
 223 /* ------------------------------------------------------------------------- */
 224 /*   In order to be able to put tokens back efficiently, the lexer stores    */
 225 /*   tokens in a "circle": the variable circle_position ranges between       */
 226 /*   0 and CIRCLE_SIZE-1.  We only need a circle size as large as the        */
 227 /*   maximum number of tokens ever put back at once, plus 1 (in effect, the  */
 228 /*   maximum token lookahead ever needed in syntax analysis, plus 1).        */
 229 /*                                                                           */
 230 /*   Unlike some compilers, Inform does not have a context-free lexer: in    */
 231 /*   fact it has 12288 different possible states.  However, the context only */
 232 /*   affects the interpretation of "identifiers": lexemes beginning with a   */
 233 /*   letter and containing up to 32 chars of alphanumeric and underscore     */
 234 /*   chars.  (For example, "default" may refer to the directive or statement */
 235 /*   of that name, and which token values are returned depends on the        */
 236 /*   current lexical context.)                                               */
 237 /*                                                                           */
 238 /*   Along with each token, we also store the lexical context it was         */
 239 /*   translated under; because if it is called for again, there may need     */
 240 /*   to be a fresh interpretation of it if the context has changed.          */
 241 /* ------------------------------------------------------------------------- */
 242
 243 #define CIRCLE_SIZE 6
 244
 245 /*   (The worst case for token lookahead is distinguishing between an
 246      old-style "objectloop (a in b)" and a new "objectloop (a in b ...)".)   */
 247
 248 static int circle_position;
 249 static token_data circle[CIRCLE_SIZE];
 250
 251 static int token_contexts[CIRCLE_SIZE];
 252
 253 /* ------------------------------------------------------------------------- */
 254 /*   A complication, however, is that the text of some lexemes needs to be   */
 255 /*   held in Inform's memory for much longer periods: for example, a         */
 256 /*   dictionary word lexeme (like "'south'") must have its text preserved    */
 257 /*   until the code generation time for the expression it occurs in, when    */
 258 /*   the dictionary reference is actually made.  Code generation in general  */
 259 /*   occurs as early as possible in Inform: pending some better method of    */
 260 /*   garbage collection, we simply use a buffer so large that unless         */
 261 /*   expressions spread across 10K of source code are found, there can be    */
 262 /*   no problem.                                                             */
 263 /* ------------------------------------------------------------------------- */
 264
 265 static char *lexeme_memory;
 266 static char *lex_p;                     /* Current write position            */
 267
 268 /* ------------------------------------------------------------------------- */
 269 /*   The lexer itself needs up to 3 characters of lookahead (it uses an      */
 270 /*   LR(3) grammar to translate characters into tokens).                     */
 271 /* ------------------------------------------------------------------------- */
 272
 273 #define LOOKAHEAD_SIZE 3
 274
 275 static int current, lookahead,          /* The latest character read, and    */
 276     lookahead2, lookahead3;             /* the three characters following it */
 277
 278 static int pipeline_made;               /* Whether or not the pipeline of
 279                                            characters has been constructed
 280                                            yet (this pass)                   */
 281
 282 static int (* get_next_char)(void);     /* Routine for reading the stream of
 283                                            characters: the lexer does not
 284                                            need any "ungetc" routine for
 285                                            putting them back again.  End of
 286                                            stream is signalled by returning
 287                                            zero.                             */
 288
 289 static char *source_to_analyse;         /* The current lexical source:
 290                                            NULL for "load from source files",
 291                                            otherwise this points to a string
 292                                            containing Inform code            */
 293
 294 static int tokens_put_back;             /* Count of the number of backward
 295                                            moves made from the last-read
 296                                            token                             */
 297
 298 extern void describe_token(token_data t)
 299 {
 300     /*  Many of the token types are not set in this file, but later on in
 301         Inform's higher stages (for example, in the expression evaluator);
 302         but this routine describes them all.                                 */
 303
 304     printf("{ ");
 305
 306     switch(t.type)
 307     {
 308         /*  The following token types occur in lexer output:                 */
 309
 310         case SYMBOL_TT:          printf("symbol ");
 311                                  describe_symbol(t.value);
 312                                  break;
 313         case NUMBER_TT:          printf("literal number %d", t.value);
 314                                  break;
 315         case DQ_TT:              printf("string \"%s\"", t.text);
 316                                  break;
 317         case SQ_TT:              printf("string '%s'", t.text);
 318                                  break;
 319         case SEP_TT:             printf("separator '%s'", t.text);
 320                                  break;
 321         case EOF_TT:             printf("end of file");
 322                                  break;
 323
 324         case STATEMENT_TT:       printf("statement name '%s'", t.text);
 325                                  break;
 326         case SEGMENT_MARKER_TT:  printf("object segment marker '%s'", t.text);
 327                                  break;
 328         case DIRECTIVE_TT:       printf("directive name '%s'", t.text);
 329                                  break;
 330         case CND_TT:             printf("textual conditional '%s'", t.text);
 331                                  break;
 332         case OPCODE_NAME_TT:     printf("opcode name '%s'", t.text);
 333                                  break;
 334         case SYSFUN_TT:          printf("built-in function name '%s'", t.text);
 335                                  break;
 336         case LOCAL_VARIABLE_TT:  printf("local variable name '%s'", t.text);
 337                                  break;
 338         case MISC_KEYWORD_TT:    printf("statement keyword '%s'", t.text);
 339                                  break;
 340         case DIR_KEYWORD_TT:     printf("directive keyword '%s'", t.text);
 341                                  break;
 342         case TRACE_KEYWORD_TT:   printf("'trace' keyword '%s'", t.text);
 343                                  break;
 344         case SYSTEM_CONSTANT_TT: printf("system constant name '%s'", t.text);
 345                                  break;
 346
 347         /*  The remaining are etoken types, not set by the lexer             */
 348
 349         case OP_TT:              printf("operator '%s'",
 350                                      operators[t.value].description);
 351                                  break;
 352         case ENDEXP_TT:          printf("end of expression");
 353                                  break;
 354         case SUBOPEN_TT:         printf("open bracket");
 355                                  break;
 356         case SUBCLOSE_TT:        printf("close bracket");
 357                                  break;
 358         case LARGE_NUMBER_TT:    printf("large number: '%s'=%d",t.text,t.value);
 359                                  break;
 360         case SMALL_NUMBER_TT:    printf("small number: '%s'=%d",t.text,t.value);
 361                                  break;
 362         case VARIABLE_TT:        printf("variable '%s'=%d", t.text, t.value);
 363                                  break;
 364         case DICTWORD_TT:        printf("dictionary word '%s'", t.text);
 365                                  break;
 366         case ACTION_TT:          printf("action name '%s'", t.text);
 367                                  break;
 368
 369         default:
 370             printf("** unknown token type %d, text='%s', value=%d **",
 371             t.type, t.text, t.value);
 372     }
 373     printf(" }");
 374 }
 375
 376 /* ------------------------------------------------------------------------- */
 377 /*   All but one of the 280 Inform keywords (118 of them opcode names used   */
 378 /*   only by the assembler).  (The one left over is "sp", a keyword used in  */
 379 /*   assembly language only.)                                                */
 380 /*                                                                           */
 381 /*   A "keyword group" is a set of keywords to be searched for.  If a match  */
 382 /*   is made on an identifier, the token type becomes that given in the KG   */
 383 /*   and the token value is its index in the KG.                             */
 384 /*                                                                           */
 385 /*   The keyword ordering must correspond with the appropriate #define's in  */
 386 /*   "header.h" but is otherwise not significant.                            */
 387 /* ------------------------------------------------------------------------- */
 388
 389 #define MAX_KEYWORDS 350
 390
 391 /* The values will be filled in at compile time, when we know
 392    which opcode set to use. */
 393 keyword_group opcode_names =
 394 { { "" },
 395     OPCODE_NAME_TT, FALSE, TRUE
 396 };
 397
 398 static char *opcode_list_z[] = {
 399     "je", "jl", "jg", "dec_chk", "inc_chk", "jin", "test", "or", "and",
 400     "test_attr", "set_attr", "clear_attr", "store", "insert_obj", "loadw",
 401     "loadb", "get_prop", "get_prop_addr", "get_next_prop", "add", "sub",
 402     "mul", "div", "mod", "call", "storew", "storeb", "put_prop", "sread",
 403     "print_char", "print_num", "random", "push", "pull", "split_window",
 404     "set_window", "output_stream", "input_stream", "sound_effect", "jz",
 405     "get_sibling", "get_child", "get_parent", "get_prop_len", "inc", "dec",
 406     "print_addr", "remove_obj", "print_obj", "ret", "jump", "print_paddr",
 407     "load", "not", "rtrue", "rfalse", "print", "print_ret", "nop", "save",
 408     "restore", "restart", "ret_popped", "pop", "quit", "new_line",
 409     "show_status", "verify", "call_2s", "call_vs", "aread", "call_vs2",
 410     "erase_window", "erase_line", "set_cursor", "get_cursor",
 411     "set_text_style", "buffer_mode", "read_char", "scan_table", "call_1s",
 412     "call_2n", "set_colour", "throw", "call_vn", "call_vn2", "tokenise",
 413     "encode_text", "copy_table", "print_table", "check_arg_count", "call_1n",
 414     "catch", "piracy", "log_shift", "art_shift", "set_font", "save_undo",
 415     "restore_undo", "draw_picture", "picture_data", "erase_picture",
 416     "set_margins", "move_window", "window_size", "window_style",
 417     "get_wind_prop", "scroll_window", "pop_stack", "read_mouse",
 418     "mouse_window", "push_stack", "put_wind_prop", "print_form",
 419     "make_menu", "picture_table", "print_unicode", "check_unicode",
 420     ""
 421 };
 422
 423 static char *opcode_list_g[] = {
 424     "nop", "add", "sub", "mul", "div", "mod", "neg", "bitand", "bitor",
 425     "bitxor", "bitnot", "shiftl", "sshiftr", "ushiftr", "jump", "jz",
 426     "jnz", "jeq", "jne", "jlt", "jge", "jgt", "jle",
 427     "jltu", "jgeu", "jgtu", "jleu",
 428     "call", "return",
 429     "catch", "throw", "tailcall",
 430     "copy", "copys", "copyb", "sexs", "sexb", "aload",
 431     "aloads", "aloadb", "aloadbit", "astore", "astores", "astoreb",
 432     "astorebit", "stkcount", "stkpeek", "stkswap", "stkroll", "stkcopy",
 433     "streamchar", "streamnum", "streamstr",
 434     "gestalt", "debugtrap", "getmemsize", "setmemsize", "jumpabs",
 435     "random", "setrandom", "quit", "verify",
 436     "restart", "save", "restore", "saveundo", "restoreundo", "protect",
 437     "glk", "getstringtbl", "setstringtbl", "getiosys", "setiosys",
 438     "linearsearch", "binarysearch", "linkedsearch",
 439     "callf", "callfi", "callfii", "callfiii",
 440     "streamunichar",
 441     "mzero", "mcopy", "malloc", "mfree",
 442     "accelfunc", "accelparam",
 443     "numtof", "ftonumz", "ftonumn", "ceil", "floor",
 444     "fadd", "fsub", "fmul", "fdiv", "fmod",
 445     "sqrt", "exp", "log", "pow",
 446     "sin", "cos", "tan", "asin", "acos", "atan", "atan2",
 447     "jfeq", "jfne", "jflt", "jfle", "jfgt", "jfge", "jisnan", "jisinf",
 448     ""
 449 };
 450
 451 keyword_group opcode_macros =
 452 { { "" },
 453   OPCODE_MACRO_TT, FALSE, TRUE
 454 };
 455
 456 static char *opmacro_list_z[] = { "" };
 457
 458 static char *opmacro_list_g[] = {
 459     "pull", "push",
 460     ""
 461 };
 462
 463 keyword_group directives =
 464 { { "abbreviate", "array", "attribute", "class", "constant",
 465     "default", "dictionary", "end", "endif", "extend", "fake_action",
 466     "global", "ifdef", "ifndef", "ifnot", "ifv3", "ifv5", "iftrue",
 467     "iffalse", "import", "include", "link", "lowstring", "message",
 468     "nearby", "object", "origsource", "property", "release", "replace",
 469     "serial", "switches", "statusline", "stub", "system_file", "trace",
 470     "undef", "verb", "version", "zcharacter",
 471     "" },
 472     DIRECTIVE_TT, FALSE, FALSE
 473 };
 474
 475 keyword_group trace_keywords =
 476 { { "dictionary", "symbols", "objects", "verbs",
 477     "assembly", "expressions", "lines", "tokens", "linker",
 478     "on", "off", "" },
 479     TRACE_KEYWORD_TT, FALSE, TRUE
 480 };
 481
 482 keyword_group segment_markers =
 483 { { "class", "has", "private", "with", "" },
 484     SEGMENT_MARKER_TT, FALSE, TRUE
 485 };
 486
 487 keyword_group directive_keywords =
 488 { { "alias", "long", "additive",
 489     "score", "time",
 490     "noun", "held", "multi", "multiheld", "multiexcept",
 491     "multiinside", "creature", "special", "number", "scope", "topic",
 492     "reverse", "meta", "only", "replace", "first", "last",
 493     "string", "table", "buffer", "data", "initial", "initstr",
 494     "with", "private", "has", "class",
 495     "error", "fatalerror", "warning",
 496     "terminating",
 497     "" },
 498     DIR_KEYWORD_TT, FALSE, TRUE
 499 };
 500
 501 keyword_group misc_keywords =
 502 { { "char", "name", "the", "a", "an", "The", "number",
 503     "roman", "reverse", "bold", "underline", "fixed", "on", "off",
 504     "to", "address", "string", "object", "near", "from", "property", "A", "" },
 505     MISC_KEYWORD_TT, FALSE, TRUE
 506 };
 507
 508 keyword_group statements =
 509 { { "box", "break", "continue", "default", "do", "else", "font", "for",
 510     "give", "if", "inversion", "jump", "move", "new_line", "objectloop",
 511     "print", "print_ret", "quit", "read", "remove", "restore", "return",
 512     "rfalse", "rtrue", "save", "spaces", "string", "style", "switch",
 513     "until", "while", "" },
 514     STATEMENT_TT, FALSE, TRUE
 515 };
 516
 517 keyword_group conditions =
 518 { { "has", "hasnt", "in", "notin", "ofclass", "or", "provides", "" },
 519     CND_TT, FALSE, TRUE
 520 };
 521
 522 keyword_group system_functions =
 523 { { "child", "children", "elder", "eldest", "indirect", "parent", "random",
 524     "sibling", "younger", "youngest", "metaclass", "glk", "" },
 525     SYSFUN_TT, FALSE, TRUE
 526 };
 527
 528 keyword_group system_constants =
 529 { { "adjectives_table", "actions_table", "classes_table",
 530     "identifiers_table", "preactions_table", "version_number",
 531     "largest_object", "strings_offset", "code_offset",
 532     "dict_par1", "dict_par2", "dict_par3", "actual_largest_object",
 533     "static_memory_offset", "array_names_offset", "readable_memory_offset",
 534     "cpv__start", "cpv__end", "ipv__start", "ipv__end",
 535     "array__start", "array__end",
 536     "lowest_attribute_number", "highest_attribute_number",
 537     "attribute_names_array",
 538     "lowest_property_number", "highest_property_number",
 539     "property_names_array",
 540     "lowest_action_number", "highest_action_number",
 541     "action_names_array",
 542     "lowest_fake_action_number", "highest_fake_action_number",
 543     "fake_action_names_array",
 544     "lowest_routine_number", "highest_routine_number", "routines_array",
 545     "routine_names_array", "routine_flags_array",
 546     "lowest_global_number", "highest_global_number", "globals_array",
 547     "global_names_array", "global_flags_array",
 548     "lowest_array_number", "highest_array_number", "arrays_array",
 549     "array_names_array", "array_flags_array",
 550     "lowest_constant_number", "highest_constant_number", "constants_array",
 551     "constant_names_array",
 552     "lowest_class_number", "highest_class_number", "class_objects_array",
 553     "lowest_object_number", "highest_object_number",
 554     "oddeven_packing",
 555     "grammar_table", "dictionary_table", "dynam_string_table",
 556     "" },
 557     SYSTEM_CONSTANT_TT, FALSE, TRUE
 558 };
 559
 560 keyword_group *keyword_groups[12]
 561 = { NULL, &opcode_names, &directives, &trace_keywords, &segment_markers,
 562     &directive_keywords, &misc_keywords, &statements, &conditions,
 563     &system_functions, &system_constants, &opcode_macros};
 564
 565 keyword_group local_variables =
 566 { { "" },                                 /* Filled in when routine declared */
 567     LOCAL_VARIABLE_TT, FALSE, FALSE
 568 };
 569
 570 static int lexical_context(void)
 571 {
 572     /*  The lexical context is a number representing all of the context
 573         information in the lexical analyser: the same input text will
 574         always translate to the same output tokens whenever the context
 575         is the same.
 576
 577         In fact, for efficiency reasons this number omits the bit of
 578         information held in the variable "dont_enter_into_symbol_table".
 579         Inform never needs to backtrack through tokens parsed in that
 580         way (thankfully, as it would be expensive indeed to check
 581         the tokens).                                                         */
 582
 583     int c = 0;
 584     if (opcode_names.enabled)         c |= 1;
 585     if (directives.enabled)           c |= 2;
 586     if (trace_keywords.enabled)       c |= 4;
 587     if (segment_markers.enabled)      c |= 8;
 588     if (directive_keywords.enabled)   c |= 16;
 589     if (misc_keywords.enabled)        c |= 32;
 590     if (statements.enabled)           c |= 64;
 591     if (conditions.enabled)           c |= 128;
 592     if (system_functions.enabled)     c |= 256;
 593     if (system_constants.enabled)     c |= 512;
 594     if (local_variables.enabled)      c |= 1024;
 595
 596     if (return_sp_as_variable)        c |= 2048;
 597     return(c);
 598 }
 599
 600 static void print_context(int c)
 601 {
 602     if ((c & 1) != 0) printf("OPC ");
 603     if ((c & 2) != 0) printf("DIR ");
 604     if ((c & 4) != 0) printf("TK ");
 605     if ((c & 8) != 0) printf("SEG ");
 606     if ((c & 16) != 0) printf("DK ");
 607     if ((c & 32) != 0) printf("MK ");
 608     if ((c & 64) != 0) printf("STA ");
 609     if ((c & 128) != 0) printf("CND ");
 610     if ((c & 256) != 0) printf("SFUN ");
 611     if ((c & 512) != 0) printf("SCON ");
 612     if ((c & 1024) != 0) printf("LV ");
 613     if ((c & 2048) != 0) printf("sp ");
 614 }
 615
 616 static int *keywords_hash_table;
 617 static int *keywords_hash_ends_table;
 618 static int *keywords_data_table;
 619
 620 static int *local_variable_hash_table;
 621 static int *local_variable_hash_codes;
 622 char **local_variable_texts;
 623 static char *local_variable_text_table;
 624
 625 static char one_letter_locals[128];
 626
 627 static void make_keywords_tables(void)
 628 {   int i, j, h, tp=0;
 629     char **oplist, **maclist;
 630
 631     if (!glulx_mode) {
 632         oplist = opcode_list_z;
 633         maclist = opmacro_list_z;
 634     }
 635     else {
 636         oplist = opcode_list_g;
 637         maclist = opmacro_list_g;
 638     }
 639
 640     for (j=0; *(oplist[j]); j++) {
 641         opcode_names.keywords[j] = oplist[j];
 642     }
 643     opcode_names.keywords[j] = "";
 644
 645     for (j=0; *(maclist[j]); j++) {
 646         opcode_macros.keywords[j] = maclist[j];
 647     }
 648     opcode_macros.keywords[j] = "";
 649
 650     for (i=0; i<HASH_TAB_SIZE; i++)
 651     {   keywords_hash_table[i] = -1;
 652         keywords_hash_ends_table[i] = -1;
 653     }
 654
 655     for (i=1; i<=11; i++)
 656     {   keyword_group *kg = keyword_groups[i];
 657         for (j=0; *(kg->keywords[j]) != 0; j++)
 658         {   h = hash_code_from_string(kg->keywords[j]);
 659             if (keywords_hash_table[h] == -1)
 660                 keywords_hash_table[h] = tp;
 661             else
 662               *(keywords_data_table + 3*(keywords_hash_ends_table[h]) + 2) = tp;
 663             keywords_hash_ends_table[h] = tp;
 664             *(keywords_data_table + 3*tp) = i;
 665             *(keywords_data_table + 3*tp+1) = j;
 666             *(keywords_data_table + 3*tp+2) = -1;
 667             tp++;
 668         }
 669     }
 670 }
 671
 672 extern void construct_local_variable_tables(void)
 673 {   int i, h; char *p = local_variable_text_table;
 674     for (i=0; i<HASH_TAB_SIZE; i++) local_variable_hash_table[i] = -1;
 675     for (i=0; i<128; i++) one_letter_locals[i] = MAX_LOCAL_VARIABLES;
 676
 677     for (i=0; i<no_locals; i++)
 678     {   char *q = local_variables.keywords[i];
 679         if (q[1] == 0)
 680         {   one_letter_locals[(uchar)q[0]] = i;
 681             if (isupper(q[0])) one_letter_locals[tolower(q[0])] = i;
 682             if (islower(q[0])) one_letter_locals[toupper(q[0])] = i;
 683         }
 684         h = hash_code_from_string(q);
 685         if (local_variable_hash_table[h] == -1)
 686             local_variable_hash_table[h] = i;
 687         local_variable_hash_codes[i] = h;
 688         local_variable_texts[i] = p;
 689         strcpy(p, q);
 690         p += strlen(p)+1;
 691     }
 692     for (;i<MAX_LOCAL_VARIABLES-1;i++)
 693       local_variable_texts[i] = "<no such local variable>";
 694 }
 695
 696 static void interpret_identifier(int pos, int dirs_only_flag)
 697 {   int index, hashcode; char *p = circle[pos].text;
 698
 699     /*  An identifier is either a keyword or a "symbol", a name which the
 700         lexical analyser leaves to higher levels of Inform to understand.    */
 701
 702     hashcode = hash_code_from_string(p);
 703
 704     if (dirs_only_flag) goto KeywordSearch;
 705
 706     /*  If this is assembly language, perhaps it is "sp"?                    */
 707
 708     if (return_sp_as_variable && (p[0]=='s') && (p[1]=='p') && (p[2]==0))
 709     {   circle[pos].value = 0; circle[pos].type = LOCAL_VARIABLE_TT;
 710         return;
 711     }
 712
 713     /*  Test for local variables first, quite quickly.                       */
 714
 715     if (local_variables.enabled)
 716     {   if (p[1]==0)
 717         {   index = one_letter_locals[(uchar)p[0]];
 718             if (index<MAX_LOCAL_VARIABLES)
 719             {   circle[pos].type = LOCAL_VARIABLE_TT;
 720                 circle[pos].value = index+1;
 721                 return;
 722             }
 723         }
 724         index = local_variable_hash_table[hashcode];
 725         if (index >= 0)
 726         {   for (;index<no_locals;index++)
 727             {   if (hashcode == local_variable_hash_codes[index])
 728                 {   if (strcmpcis(p, local_variable_texts[index])==0)
 729                     {   circle[pos].type = LOCAL_VARIABLE_TT;
 730                         circle[pos].value = index+1;
 731                         return;
 732                     }
 733                 }
 734             }
 735         }
 736     }
 737
 738     /*  Now the bulk of the keywords.  Note that the lexer doesn't recognise
 739         the name of a system function which has been Replaced.               */
 740
 741     KeywordSearch:
 742     index = keywords_hash_table[hashcode];
 743     while (index >= 0)
 744     {   int *i = keywords_data_table + 3*index;
 745         keyword_group *kg = keyword_groups[*i];
 746         if (((!dirs_only_flag) && (kg->enabled))
 747             || (dirs_only_flag && (kg == &directives)))
 748         {   char *q = kg->keywords[*(i+1)];
 749             if (((kg->case_sensitive) && (strcmp(p, q)==0))
 750                 || ((!(kg->case_sensitive)) && (strcmpcis(p, q)==0)))
 751             {   if ((kg != &system_functions)
 752                     || (system_function_usage[*(i+1)]!=2))
 753                 {   circle[pos].type = kg->change_token_type;
 754                     circle[pos].value = *(i+1);
 755                     return;
 756                 }
 757             }
 758         }
 759         index = *(i+2);
 760     }
 761
 762     if (dirs_only_flag) return;
 763
 764     /*  Search for the name; create it if necessary.                         */
 765
 766     circle[pos].value = symbol_index(p, hashcode);
 767     circle[pos].type = SYMBOL_TT;
 768 }
 769
 770
 771 /* ------------------------------------------------------------------------- */
 772 /*   The tokeniser grid aids a rapid decision about the consequences of a    */
 773 /*   character reached in the buffer.  In effect it is an efficiently stored */
 774 /*   transition table using an algorithm similar to that of S. C. Johnson's  */
 775 /*   "yacc" lexical analyser (see Aho, Sethi and Ullman, section 3.9).       */
 776 /*   My thanks to Dilip Sequeira for suggesting this.                        */
 777 /*                                                                           */
 778 /*       tokeniser_grid[c]   is (16*n + m) if c is the first character of    */
 779 /*                               separator numbers n, n+1, ..., n+m-1        */
 780 /*                           or certain special values (QUOTE_CODE, etc)     */
 781 /*                           or 0 otherwise                                  */
 782 /*                                                                           */
 783 /*   Since 1000/16 = 62, the code numbers below will need increasing if the  */
 784 /*   number of separators supported exceeds 61.                              */
 785 /* ------------------------------------------------------------------------- */
 786
 787 static int tokeniser_grid[256];
 788
 789 #define QUOTE_CODE      1000
 790 #define DQUOTE_CODE     1001
 791 #define NULL_CODE       1002
 792 #define SPACE_CODE      1003
 793 #define NEGATIVE_CODE   1004
 794 #define DIGIT_CODE      1005
 795 #define RADIX_CODE      1006
 796 #define KEYWORD_CODE    1007
 797 #define EOF_CODE        1008
 798 #define WHITESPACE_CODE 1009
 799 #define COMMENT_CODE    1010
 800 #define IDENTIFIER_CODE 1011
 801
 802 /*  This list cannot safely be changed without also changing the header
 803     separator #defines.  The ordering is significant in that (i) all entries
 804     beginning with the same character must be adjacent and (ii) that if
 805     X is a an initial substring of Y then X must come before Y.
 806
 807     E.g. --> must occur before -- to prevent "-->0" being tokenised
 808     wrongly as "--", ">", "0" rather than "-->", "0".                        */
 809
 810 static const char separators[NUMBER_SEPARATORS][4] =
 811 {   "->", "-->", "--", "-", "++", "+", "*", "/", "%",
 812     "||", "|", "&&", "&", "~~",
 813     "~=", "~", "==", "=", ">=", ">",
 814     "<=", "<", "(", ")", ",",
 815     ".&", ".#", "..&", "..#", "..", ".",
 816     "::", ":", "@", ";", "[", "]", "{", "}",
 817     "$", "?~", "?",
 818     "#a$", "#g$", "#n$", "#r$", "#w$", "##", "#"
 819 };
 820
 821 static void make_tokeniser_grid(void)
 822 {
 823     /*  Construct the grid to the specification above.                       */
 824
 825     int i, j;
 826
 827     for (i=0; i<256; i++) tokeniser_grid[i]=0;
 828
 829     for (i=0; i<NUMBER_SEPARATORS; i++)
 830     {   j=separators[i][0];
 831         if (tokeniser_grid[j]==0)
 832             tokeniser_grid[j]=i*16+1; else tokeniser_grid[j]++;
 833     }
 834     tokeniser_grid['\''] = QUOTE_CODE;
 835     tokeniser_grid['\"'] = DQUOTE_CODE;
 836     tokeniser_grid[0]    = EOF_CODE;
 837     tokeniser_grid[' ']  = WHITESPACE_CODE;
 838     tokeniser_grid['\n'] = WHITESPACE_CODE;
 839     tokeniser_grid['$']  = RADIX_CODE;
 840     tokeniser_grid['!']  = COMMENT_CODE;
 841
 842     tokeniser_grid['0']  = DIGIT_CODE;
 843     tokeniser_grid['1']  = DIGIT_CODE;
 844     tokeniser_grid['2']  = DIGIT_CODE;
 845     tokeniser_grid['3']  = DIGIT_CODE;
 846     tokeniser_grid['4']  = DIGIT_CODE;
 847     tokeniser_grid['5']  = DIGIT_CODE;
 848     tokeniser_grid['6']  = DIGIT_CODE;
 849     tokeniser_grid['7']  = DIGIT_CODE;
 850     tokeniser_grid['8']  = DIGIT_CODE;
 851     tokeniser_grid['9']  = DIGIT_CODE;
 852
 853     tokeniser_grid['a']  = IDENTIFIER_CODE;
 854     tokeniser_grid['b']  = IDENTIFIER_CODE;
 855     tokeniser_grid['c']  = IDENTIFIER_CODE;
 856     tokeniser_grid['d']  = IDENTIFIER_CODE;
 857     tokeniser_grid['e']  = IDENTIFIER_CODE;
 858     tokeniser_grid['f']  = IDENTIFIER_CODE;
 859     tokeniser_grid['g']  = IDENTIFIER_CODE;
 860     tokeniser_grid['h']  = IDENTIFIER_CODE;
 861     tokeniser_grid['i']  = IDENTIFIER_CODE;
 862     tokeniser_grid['j']  = IDENTIFIER_CODE;
 863     tokeniser_grid['k']  = IDENTIFIER_CODE;
 864     tokeniser_grid['l']  = IDENTIFIER_CODE;
 865     tokeniser_grid['m']  = IDENTIFIER_CODE;
 866     tokeniser_grid['n']  = IDENTIFIER_CODE;
 867     tokeniser_grid['o']  = IDENTIFIER_CODE;
 868     tokeniser_grid['p']  = IDENTIFIER_CODE;
 869     tokeniser_grid['q']  = IDENTIFIER_CODE;
 870     tokeniser_grid['r']  = IDENTIFIER_CODE;
 871     tokeniser_grid['s']  = IDENTIFIER_CODE;
 872     tokeniser_grid['t']  = IDENTIFIER_CODE;
 873     tokeniser_grid['u']  = IDENTIFIER_CODE;
 874     tokeniser_grid['v']  = IDENTIFIER_CODE;
 875     tokeniser_grid['w']  = IDENTIFIER_CODE;
 876     tokeniser_grid['x']  = IDENTIFIER_CODE;
 877     tokeniser_grid['y']  = IDENTIFIER_CODE;
 878     tokeniser_grid['z']  = IDENTIFIER_CODE;
 879
 880     tokeniser_grid['A']  = IDENTIFIER_CODE;
 881     tokeniser_grid['B']  = IDENTIFIER_CODE;
 882     tokeniser_grid['C']  = IDENTIFIER_CODE;
 883     tokeniser_grid['D']  = IDENTIFIER_CODE;
 884     tokeniser_grid['E']  = IDENTIFIER_CODE;
 885     tokeniser_grid['F']  = IDENTIFIER_CODE;
 886     tokeniser_grid['G']  = IDENTIFIER_CODE;
 887     tokeniser_grid['H']  = IDENTIFIER_CODE;
 888     tokeniser_grid['I']  = IDENTIFIER_CODE;
 889     tokeniser_grid['J']  = IDENTIFIER_CODE;
 890     tokeniser_grid['K']  = IDENTIFIER_CODE;
 891     tokeniser_grid['L']  = IDENTIFIER_CODE;
 892     tokeniser_grid['M']  = IDENTIFIER_CODE;
 893     tokeniser_grid['N']  = IDENTIFIER_CODE;
 894     tokeniser_grid['O']  = IDENTIFIER_CODE;
 895     tokeniser_grid['P']  = IDENTIFIER_CODE;
 896     tokeniser_grid['Q']  = IDENTIFIER_CODE;
 897     tokeniser_grid['R']  = IDENTIFIER_CODE;
 898     tokeniser_grid['S']  = IDENTIFIER_CODE;
 899     tokeniser_grid['T']  = IDENTIFIER_CODE;
 900     tokeniser_grid['U']  = IDENTIFIER_CODE;
 901     tokeniser_grid['V']  = IDENTIFIER_CODE;
 902     tokeniser_grid['W']  = IDENTIFIER_CODE;
 903     tokeniser_grid['X']  = IDENTIFIER_CODE;
 904     tokeniser_grid['Y']  = IDENTIFIER_CODE;
 905     tokeniser_grid['Z']  = IDENTIFIER_CODE;
 906
 907     tokeniser_grid['_']  = IDENTIFIER_CODE;
 908 }
 909
 910 /* ------------------------------------------------------------------------- */
 911 /*   Definition of a lexical block: a source file or a string containing     */
 912 /*   text for lexical analysis; an independent source from the point of      */
 913 /*   view of issuing error reports.                                          */
 914 /* ------------------------------------------------------------------------- */
 915
 916 typedef struct LexicalBlock_s
 917 {   char *filename;                              /*  Full translated name    */
 918     int   main_flag;                             /*  TRUE if the main file
 919                                                      (the first one opened)  */
 920     int   sys_flag;                              /*  TRUE if a System_File   */
 921     int   source_line;                           /*  Line number count       */
 922     int   line_start;                            /*  Char number within file
 923                                                      where the current line
 924                                                      starts                  */
 925     int   chars_read;                            /*  Char number of read pos */
 926     int   file_no;                               /*  Or 255 if not from a
 927                                                      file; used for debug
 928                                                      information             */
 929     char *orig_source;                           /* From #origsource direct  */
 930     int orig_file;
 931     int32 orig_line;
 932     int32 orig_char;
 933 } LexicalBlock;
 934
 935 static LexicalBlock NoFileOpen =
 936     {   "<before compilation>",  FALSE, FALSE, 0, 0, 0, 255, NULL, 0, 0, 0 };
 937
 938 static LexicalBlock MakingOutput =
 939     {   "<constructing output>", FALSE, FALSE, 0, 0, 0, 255, NULL, 0, 0, 0 };
 940
 941 static LexicalBlock StringLB =
 942     {   "<veneer routine>",      FALSE, TRUE,  0, 0, 0, 255, NULL, 0, 0, 0 };
 943
 944 static LexicalBlock *CurrentLB;                  /*  The current lexical
 945                                                      block of input text     */
 946
 947 extern void declare_systemfile(void)
 948 {   CurrentLB->sys_flag = TRUE;
 949 }
 950
 951 extern int is_systemfile(void)
 952 {   return ((CurrentLB->sys_flag)?1:0);
 953 }
 954
 955 extern void set_origsource_location(char *source, int32 line, int32 charnum)
 956 {
 957     if (!source) {
 958         /* Clear the Origsource declaration. */
 959         CurrentLB->orig_file = 0;
 960         CurrentLB->orig_source = NULL;
 961         CurrentLB->orig_line = 0;
 962         CurrentLB->orig_char = 0;
 963         return;
 964     }
 965
 966     /* Get the file number for a new or existing InputFiles entry. */
 967     int file_no = register_orig_sourcefile(source);
 968
 969     CurrentLB->orig_file = file_no;
 970     CurrentLB->orig_source = InputFiles[file_no-1].filename;
 971     CurrentLB->orig_line = line;
 972     CurrentLB->orig_char = charnum;
 973 }
 974
 975 /* Error locations. */
 976
 977 extern debug_location get_current_debug_location(void)
 978 {   debug_location result;
 979     /* Assume that all input characters are one byte. */
 980     result.file_index = CurrentLB->file_no;
 981     result.beginning_byte_index = CurrentLB->chars_read - LOOKAHEAD_SIZE;
 982     result.end_byte_index = result.beginning_byte_index;
 983     result.beginning_line_number = CurrentLB->source_line;
 984     result.end_line_number = result.beginning_line_number;
 985     result.beginning_character_number =
 986         CurrentLB->chars_read - CurrentLB->line_start;
 987     result.end_character_number = result.beginning_character_number;
 988     result.orig_file_index = CurrentLB->orig_file;
 989     result.orig_beg_line_number = CurrentLB->orig_line;
 990     result.orig_beg_char_number = CurrentLB->orig_char;
 991     return result;
 992 }
 993
 994 static debug_location ErrorReport_debug_location;
 995
 996 extern void report_errors_at_current_line(void)
 997 {   ErrorReport.line_number = CurrentLB->source_line;
 998     ErrorReport.file_number = CurrentLB->file_no;
 999     if (ErrorReport.file_number == 255)
1000         ErrorReport.file_number = -1;
1001     ErrorReport.source      = CurrentLB->filename;
1002     ErrorReport.main_flag   = CurrentLB->main_flag;
1003     if (debugfile_switch)
1004         ErrorReport_debug_location = get_current_debug_location();
1005     ErrorReport.orig_file = CurrentLB->orig_file;
1006     ErrorReport.orig_source = CurrentLB->orig_source;
1007     ErrorReport.orig_line = CurrentLB->orig_line;
1008     ErrorReport.orig_char = CurrentLB->orig_char;
1009 }
1010
1011 extern debug_location get_error_report_debug_location(void)
1012 {   return ErrorReport_debug_location;
1013 }
1014
1015 extern int32 get_current_line_start(void)
1016 {   return CurrentLB->line_start;
1017 }
1018
1019 brief_location blank_brief_location;
1020
1021 extern brief_location get_brief_location(ErrorPosition *errpos)
1022 {
1023     brief_location loc;
1024     loc.file_index = errpos->file_number;
1025     loc.line_number = errpos->line_number;
1026     loc.orig_file_index = errpos->orig_file;
1027     loc.orig_line_number = errpos->orig_line;
1028     return loc;
1029 }
1030
1031 extern void export_brief_location(brief_location loc, ErrorPosition *errpos)
1032 {
1033     if (loc.file_index != -1)
1034     {   errpos->file_number = loc.file_index;
1035         errpos->line_number = loc.line_number;
1036         errpos->main_flag = (errpos->file_number == 1);
1037         errpos->orig_source = NULL;
1038         errpos->orig_file = loc.orig_file_index;
1039         errpos->orig_line = loc.orig_line_number;
1040         errpos->orig_char = 0;
1041     }
1042 }
1043
1044 /* ------------------------------------------------------------------------- */
1045 /*   Hash printing and line counting                                         */
1046 /* ------------------------------------------------------------------------- */
1047
1048 static void print_hash(void)
1049 {
1050     /*  Hash-printing is the practice of printing a # character every 100
1051         lines of source code (the -x switch), reassuring the user that
1052         progress is being made                                               */
1053
1054     if (no_hash_printed_yet)
1055     {   printf("::"); no_hash_printed_yet = FALSE;
1056     }
1057     printf("#"); hash_printed_since_newline = TRUE;
1058
1059 #ifndef MAC_FACE
1060     /*  On some systems, text output is buffered to a line at a time, and
1061         this would frustrate the point of hash-printing, so:                 */
1062
1063     fflush(stdout);
1064 #endif
1065 }
1066
1067 static void reached_new_line(void)
1068 {
1069     /*  Called to signal that a new line has been reached in the source code */
1070
1071     forerrors_pointer = 0;
1072
1073     CurrentLB->source_line++;
1074     CurrentLB->line_start = CurrentLB->chars_read;
1075
1076     total_source_line_count++;
1077
1078     if (total_source_line_count%100==0)
1079     {   if (hash_switch) print_hash();
1080 #ifdef MAC_MPW
1081         SpinCursor(32);                    /* I.e., allow other tasks to run */
1082 #endif
1083     }
1084
1085 #ifdef MAC_FACE
1086     if (total_source_line_count%((**g_pm_hndl).linespercheck) == 0)
1087     {   ProcessEvents (&g_proc);
1088         if (g_proc != true)
1089         {   free_arrays();
1090             close_all_source();
1091             if (temporary_files_switch)
1092                 remove_temp_files();
1093             if (store_the_text)
1094                 my_free(&all_text,"transcription text");
1095             abort_transcript_file();
1096             longjmp (g_fallback, 1);
1097         }
1098     }
1099 #endif
1100 }
1101
1102 static void new_syntax_line(void)
1103 {   if (source_to_analyse != NULL) forerrors_pointer = 0;
1104     report_errors_at_current_line();
1105 }
1106
1107 /* Return 10 raised to the expo power.
1108  *
1109  * I'm avoiding the standard pow() function for a rather lame reason:
1110  * it's in the libmath (-lm) library, and I don't want to change the
1111  * build model for the compiler. So, this is implemented with a stupid
1112  * lookup table. It's faster than pow() for small values of expo.
1113  * Probably not as fast if expo is 200, but "$+1e200" is an overflow
1114  * anyway, so I don't expect that to be a problem.
1115  *
1116  * (For some reason, frexp() and ldexp(), which are used later on, do
1117  * not require libmath to be linked in.)
1118  */
1119 static double pow10_cheap(int expo)
1120 {
1121     #define POW10_RANGE (8)
1122     static double powers[POW10_RANGE*2+1] = {
1123         0.00000001, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1,
1124         1.0,
1125         10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 10000000.0, 100000000.0
1126     };
1127
1128     double res = 1.0;
1129
1130     if (expo < 0) {
1131         for (; expo < -POW10_RANGE; expo += POW10_RANGE) {
1132             res *= powers[0];
1133         }
1134         return res * powers[POW10_RANGE+expo];
1135     }
1136     else {
1137         for (; expo > POW10_RANGE; expo -= POW10_RANGE) {
1138             res *= powers[POW10_RANGE*2];
1139         }
1140         return res * powers[POW10_RANGE+expo];
1141     }
1142 }
1143
1144 /* Return the IEEE-754 single-precision encoding of a floating-point
1145  * number. See http://www.psc.edu/general/software/packages/ieee/ieee.php
1146  * for an explanation.
1147  *
1148  * The number is provided in the pieces it was parsed in:
1149  *    [+|-] intv "." fracv "e" [+|-]expo
1150  *
1151  * If the magnitude is too large (beyond about 3.4e+38), this returns
1152  * an infinite value (0x7f800000 or 0xff800000). If the magnitude is too
1153  * small (below about 1e-45), this returns a zero value (0x00000000 or
1154  * 0x80000000). If any of the inputs are NaN, this returns NaN (but the
1155  * lexer should never do that).
1156  *
1157  * Note that using a float constant does *not* set the uses_float_features
1158  * flag (which would cause the game file to be labelled 3.1.2). There's
1159  * no VM feature here, just an integer. Of course, any use of the float
1160  * *opcodes* will set the flag.
1161  *
1162  * The math functions in this routine require #including <math.h>, but
1163  * they should not require linking the math library (-lm). At least,
1164  * they do not on OSX and Linux.
1165  */
1166 static int32 construct_float(int signbit, double intv, double fracv, int expo)
1167 {
1168     double absval = (intv + fracv) * pow10_cheap(expo);
1169     int32 sign = (signbit ? 0x80000000 : 0x0);
1170     double mant;
1171     int32 fbits;
1172
1173     if (isinf(absval)) {
1174         return sign | 0x7f800000; /* infinity */
1175     }
1176     if (isnan(absval)) {
1177         return sign | 0x7fc00000;
1178     }
1179
1180     mant = frexp(absval, &expo);
1181
1182     /* Normalize mantissa to be in the range [1.0, 2.0) */
1183     if (0.5 <= mant && mant < 1.0) {
1184         mant *= 2.0;
1185         expo--;
1186     }
1187     else if (mant == 0.0) {
1188         expo = 0;
1189     }
1190     else {
1191         return sign | 0x7f800000; /* infinity */
1192     }
1193
1194     if (expo >= 128) {
1195         return sign | 0x7f800000; /* infinity */
1196     }
1197     else if (expo < -126) {
1198         /* Denormalized (very small) number */
1199         mant = ldexp(mant, 126 + expo);
1200         expo = 0;
1201     }
1202     else if (!(expo == 0 && mant == 0.0)) {
1203         expo += 127;
1204         mant -= 1.0; /* Get rid of leading 1 */
1205     }
1206
1207     mant *= 8388608.0; /* 2^23 */
1208     fbits = (int32)(mant + 0.5); /* round mant to nearest int */
1209     if (fbits >> 23) {
1210         /* The carry propagated out of a string of 23 1 bits. */
1211         fbits = 0;
1212         expo++;
1213         if (expo >= 255) {
1214             return sign | 0x7f800000; /* infinity */
1215         }
1216     }
1217
1218     return (sign) | ((int32)(expo << 23)) | (fbits);
1219 }
1220
1221 /* ------------------------------------------------------------------------- */
1222 /*   Characters are read via a "pipeline" of variables, allowing us to look  */
1223 /*       up to three characters ahead of the current position.               */
1224 /*                                                                           */
1225 /*   There are two possible sources: from the source files being loaded in,  */
1226 /*   and from a string inside Inform (which is where the code for veneer     */
1227 /*   routines comes from).  Each source has its own get-next-character       */
1228 /*   routine.                                                                */
1229 /* ------------------------------------------------------------------------- */
1230 /*   Source 1: from files                                                    */
1231 /*                                                                           */
1232 /*   Note that file_load_chars(p, size) loads "size" bytes into buffer "p"   */
1233 /*   from the current input file.  If the file runs out, then if it was      */
1234 /*   the last source file 4 EOF characters are placed in the buffer: if it   */
1235 /*   was only an Include file ending, then a '\n' character is placed there  */
1236 /*   (essentially to force termination of any comment line) followed by      */
1237 /*   three harmless spaces.                                                  */
1238 /*                                                                           */
1239 /*   The routine returns the number of characters it has written, and note   */
1240 /*   that this conveniently ensures that all characters in the buffer come   */
1241 /*   from the same file.                                                     */
1242 /* ------------------------------------------------------------------------- */
1243
1244 #define SOURCE_BUFFER_SIZE 4096                  /*  Typical disc block size */
1245
1246 typedef struct Sourcefile_s
1247 {   char *buffer;                                /*  Input buffer            */
1248     int   read_pos;                              /*  Read position in buffer */
1249     int   size;                                  /*  Number of meaningful
1250                                                      characters in buffer    */
1251     int   la, la2, la3;                          /*  Three characters of
1252                                                      lookahead pipeline      */
1253     int   file_no;                               /*  Internal file number
1254                                                      (1, 2, 3, ...)          */
1255     LexicalBlock LB;
1256 } Sourcefile;
1257
1258 static Sourcefile *FileStack;
1259 static int File_sp;                              /*  Stack pointer           */
1260
1261 static Sourcefile *CF;                           /*  Top entry on stack      */
1262
1263 static int last_input_file;
1264
1265 static void begin_buffering_file(int i, int file_no)
1266 {   int j, cnt; uchar *p;
1267
1268     if (i >= MAX_INCLUSION_DEPTH)
1269        memoryerror("MAX_INCLUSION_DEPTH",MAX_INCLUSION_DEPTH);
1270
1271     p = (uchar *) FileStack[i].buffer;
1272
1273     if (i>0)
1274     {   FileStack[i-1].la  = lookahead;
1275         FileStack[i-1].la2 = lookahead2;
1276         FileStack[i-1].la3 = lookahead3;
1277     }
1278
1279     FileStack[i].file_no = file_no;
1280     FileStack[i].size = file_load_chars(file_no,
1281         (char *) p, SOURCE_BUFFER_SIZE);
1282     lookahead  = source_to_iso_grid[p[0]];
1283     lookahead2 = source_to_iso_grid[p[1]];
1284     lookahead3 = source_to_iso_grid[p[2]];
1285     if (LOOKAHEAD_SIZE != 3)
1286         compiler_error
1287             ("Lexer lookahead size does not match hard-coded lookahead code");
1288     FileStack[i].read_pos = LOOKAHEAD_SIZE;
1289
1290     if (file_no==1) FileStack[i].LB.main_flag = TRUE;
1291                else FileStack[i].LB.main_flag = FALSE;
1292     FileStack[i].LB.sys_flag = FALSE;
1293     FileStack[i].LB.source_line = 1;
1294     FileStack[i].LB.line_start = LOOKAHEAD_SIZE;
1295     FileStack[i].LB.chars_read = LOOKAHEAD_SIZE;
1296     FileStack[i].LB.filename = InputFiles[file_no-1].filename;
1297     FileStack[i].LB.file_no = file_no;
1298     FileStack[i].LB.orig_source = NULL; FileStack[i].LB.orig_file = 0;
1299     FileStack[i].LB.orig_line = 0; FileStack[i].LB.orig_char = 0;
1300
1301     CurrentLB = &(FileStack[i].LB);
1302     CF = &(FileStack[i]);
1303
1304     /* Check for recursive inclusion */
1305     cnt = 0;
1306     for (j=0; j<i; j++)
1307     {   if (!strcmp(FileStack[i].LB.filename, FileStack[j].LB.filename))
1308             cnt++;
1309     }
1310     if (cnt==1)
1311         warning_named("File included more than once",
1312             FileStack[j].LB.filename);
1313 }
1314
1315 static void create_char_pipeline(void)
1316 {
1317     File_sp = 0;
1318     begin_buffering_file(File_sp++, 1);
1319     pipeline_made = TRUE;
1320     last_input_file = current_input_file;
1321 }
1322
1323 static int get_next_char_from_pipeline(void)
1324 {   uchar *p;
1325
1326     while (last_input_file < current_input_file)
1327     {
1328         /*  An "Include" file must have opened since the last character
1329             was read. Perhaps more than one. We run forward through the
1330             list and add them to the include stack. But we ignore
1331             "Origsource" files (which were never actually opened for
1332             reading). */
1333
1334         last_input_file++;
1335         if (!InputFiles[last_input_file-1].is_input)
1336             continue;
1337
1338         begin_buffering_file(File_sp++, last_input_file);
1339     }
1340     if (last_input_file != current_input_file)
1341         compiler_error("last_input_file did not match after Include");
1342
1343     if (File_sp == 0)
1344     {   lookahead  = 0; lookahead2 = 0; lookahead3 = 0; return 0;
1345     }
1346
1347     if (CF->read_pos == CF->size)
1348     {   CF->size =
1349             file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
1350         CF->read_pos = 0;
1351     }
1352     else
1353     if (CF->read_pos == -(CF->size))
1354     {   set_token_location(get_current_debug_location());
1355         File_sp--;
1356         if (File_sp == 0)
1357         {   lookahead  = 0; lookahead2 = 0; lookahead3 = 0; return 0;
1358         }
1359         CF = &(FileStack[File_sp-1]);
1360         CurrentLB = &(FileStack[File_sp-1].LB);
1361         lookahead  = CF->la; lookahead2 = CF->la2; lookahead3 = CF->la3;
1362         if (CF->read_pos == CF->size)
1363         {   CF->size =
1364                 file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
1365             CF->read_pos = 0;
1366         }
1367         set_token_location(get_current_debug_location());
1368     }
1369
1370     p = (uchar *) (CF->buffer);
1371
1372     current = lookahead;
1373     lookahead = lookahead2;
1374     lookahead2 = lookahead3;
1375     lookahead3 = source_to_iso_grid[p[CF->read_pos++]];
1376
1377     CurrentLB->chars_read++;
1378     if (forerrors_pointer < 511)
1379         forerrors_buff[forerrors_pointer++] = current;
1380     if (current == '\n') reached_new_line();
1381     return(current);
1382 }
1383
1384 /* ------------------------------------------------------------------------- */
1385 /*   Source 2: from a string                                                 */
1386 /* ------------------------------------------------------------------------- */
1387
1388 static int source_to_analyse_pointer;            /*  Current read position   */
1389
1390 static int get_next_char_from_string(void)
1391 {   uchar *p = (uchar *) source_to_analyse + source_to_analyse_pointer++;
1392     current = source_to_iso_grid[p[0]];
1393
1394     if (current == 0)    lookahead  = 0;
1395                     else lookahead  = source_to_iso_grid[p[1]];
1396     if (lookahead == 0)  lookahead2 = 0;
1397                     else lookahead2 = source_to_iso_grid[p[2]];
1398     if (lookahead2 == 0) lookahead3 = 0;
1399                     else lookahead3 = source_to_iso_grid[p[3]];
1400
1401     CurrentLB->chars_read++;
1402     if (forerrors_pointer < 511)
1403         forerrors_buff[forerrors_pointer++] = current;
1404     if (current == '\n') reached_new_line();
1405     return(current);
1406 }
1407
1408 /* ========================================================================= */
1409 /*   The interface between the lexer and Inform's higher levels:             */
1410 /*                                                                           */
1411 /*       put_token_back()            (effectively) move the read position    */
1412 /*                                       back by one token                   */
1413 /*                                                                           */
1414 /*       get_next_token()            copy the token at the current read      */
1415 /*                                       position into the triple            */
1416 /*                                   (token_type, token_value, token_text)   */
1417 /*                                       and move the read position forward  */
1418 /*                                       by one                              */
1419 /*                                                                           */
1420 /*       restart_lexer(source, name) if source is NULL, initialise the lexer */
1421 /*                                       to read from source files;          */
1422 /*                                   otherwise, to read from this string.    */
1423 /* ------------------------------------------------------------------------- */
1424
1425 extern void put_token_back(void)
1426 {   tokens_put_back++;
1427
1428     if (tokens_trace_level > 0)
1429     {   if (tokens_trace_level == 1) printf("<- ");
1430         else printf("<-\n");
1431     }
1432
1433     /*  The following error, of course, should never happen!                 */
1434
1435     if (tokens_put_back == CIRCLE_SIZE)
1436     {   compiler_error("The lexical analyser has collapsed because of a wrong \
1437 assumption inside Inform");
1438         tokens_put_back--;
1439         return;
1440     }
1441 }
1442
1443 extern void get_next_token(void)
1444 {   int d, i, j, k, quoted_size, e, radix, context; int32 n; char *r;
1445     int returning_a_put_back_token = TRUE;
1446
1447     context = lexical_context();
1448
1449     if (tokens_put_back > 0)
1450     {   i = circle_position - tokens_put_back + 1;
1451         if (i<0) i += CIRCLE_SIZE;
1452         tokens_put_back--;
1453         if (context != token_contexts[i])
1454         {   j = circle[i].type;
1455             if ((j==0) || ((j>=100) && (j<200)))
1456                 interpret_identifier(i, FALSE);
1457         }
1458         goto ReturnBack;
1459     }
1460     returning_a_put_back_token = FALSE;
1461
1462     if (circle_position == CIRCLE_SIZE-1) circle_position = 0;
1463     else circle_position++;
1464
1465     if (lex_p > lexeme_memory + 4*MAX_QTEXT_SIZE)
1466         lex_p = lexeme_memory;
1467
1468     circle[circle_position].text = lex_p;
1469     circle[circle_position].value = 0;
1470     *lex_p = 0;
1471
1472     StartTokenAgain:
1473     d = (*get_next_char)();
1474     e = tokeniser_grid[d];
1475
1476     if (next_token_begins_syntax_line)
1477     {   if ((e != WHITESPACE_CODE) && (e != COMMENT_CODE))
1478         {   new_syntax_line();
1479             next_token_begins_syntax_line = FALSE;
1480         }
1481     }
1482
1483     circle[circle_position].location = get_current_debug_location();
1484
1485     switch(e)
1486     {   case 0: char_error("Illegal character found in source:", d);
1487             goto StartTokenAgain;
1488
1489         case WHITESPACE_CODE:
1490             while (tokeniser_grid[lookahead] == WHITESPACE_CODE)
1491                 (*get_next_char)();
1492             goto StartTokenAgain;
1493
1494         case COMMENT_CODE:
1495             while ((lookahead != '\n') && (lookahead != 0))
1496                 (*get_next_char)();
1497             goto StartTokenAgain;
1498
1499         case EOF_CODE:
1500             circle[circle_position].type = EOF_TT;
1501             strcpy(lex_p, "<end of file>");
1502             lex_p += strlen(lex_p) + 1;
1503             break;
1504
1505         case DIGIT_CODE:
1506             radix = 10;
1507             ReturnNumber:
1508             n=0;
1509             do
1510             {   n = n*radix + character_digit_value[d];
1511                 *lex_p++ = d;
1512             } while ((character_digit_value[lookahead] < radix)
1513                      && (d = (*get_next_char)(), TRUE));
1514
1515             *lex_p++ = 0;
1516             circle[circle_position].type = NUMBER_TT;
1517             circle[circle_position].value = n;
1518             break;
1519
1520             FloatNumber:
1521             {   int expo=0; double intv=0, fracv=0;
1522                 int expocount=0, intcount=0, fraccount=0;
1523                 int signbit = (d == '-');
1524                 *lex_p++ = d;
1525                 while (character_digit_value[lookahead] < 10) {
1526                     intv = 10.0*intv + character_digit_value[lookahead];
1527                     intcount++;
1528                     *lex_p++ = lookahead;
1529                     (*get_next_char)();
1530                 }
1531                 if (lookahead == '.') {
1532                     double fracpow = 1.0;
1533                     *lex_p++ = lookahead;
1534                     (*get_next_char)();
1535                     while (character_digit_value[lookahead] < 10) {
1536                         fracpow *= 0.1;
1537                         fracv = fracv + fracpow*character_digit_value[lookahead];
1538                         fraccount++;
1539                         *lex_p++ = lookahead;
1540                         (*get_next_char)();
1541                     }
1542                 }
1543                 if (lookahead == 'e' || lookahead == 'E') {
1544                     int exposign = 0;
1545                     *lex_p++ = lookahead;
1546                     (*get_next_char)();
1547                     if (lookahead == '+' || lookahead == '-') {
1548                         exposign = (lookahead == '-');
1549                         *lex_p++ = lookahead;
1550                         (*get_next_char)();
1551                     }
1552                     while (character_digit_value[lookahead] < 10) {
1553                         expo = 10*expo + character_digit_value[lookahead];
1554                         expocount++;
1555                         *lex_p++ = lookahead;
1556                         (*get_next_char)();
1557                     }
1558                     if (expocount == 0)
1559                         error("Floating-point literal must have digits after the 'e'");
1560                     if (exposign) { expo = -expo; }
1561                 }
1562                 if (intcount + fraccount == 0)
1563                     error("Floating-point literal must have digits");
1564                 n = construct_float(signbit, intv, fracv, expo);
1565             }
1566             *lex_p++ = 0;
1567             circle[circle_position].type = NUMBER_TT;
1568             circle[circle_position].value = n;
1569             if (!glulx_mode && dont_enter_into_symbol_table != -2) error("Floating-point literals are not available in Z-code");
1570             break;
1571
1572         case RADIX_CODE:
1573             radix = 16; d = (*get_next_char)();
1574             if (d == '-' || d == '+') { goto FloatNumber; }
1575             if (d == '$') { d = (*get_next_char)(); radix = 2; }
1576             if (character_digit_value[d] >= radix)
1577             {   if (radix == 2)
1578                     error("Binary number expected after '$$'");
1579                 else
1580                     error("Hexadecimal number expected after '$'");
1581             }
1582             goto ReturnNumber;
1583
1584         case QUOTE_CODE:     /* Single-quotes: scan a literal string */
1585             quoted_size=0;
1586             do
1587             {   e = d; d = (*get_next_char)(); *lex_p++ = d;
1588                 if (quoted_size++==64)
1589                 {   error(
1590                     "Too much text for one pair of quotations '...' to hold");
1591                     *lex_p='\''; break;
1592                 }
1593                 if ((d == '\'') && (e != '@'))
1594                 {   if (quoted_size == 1)
1595                     {   d = (*get_next_char)(); *lex_p++ = d;
1596                         if (d != '\'')
1597                             error("No text between quotation marks ''");
1598                     }
1599                     break;
1600                 }
1601             } while (d != EOF);
1602             if (d==EOF) ebf_error("'\''", "end of file");
1603             *(lex_p-1) = 0;
1604             circle[circle_position].type = SQ_TT;
1605             break;
1606
1607         case DQUOTE_CODE:    /* Double-quotes: scan a literal string */
1608             quoted_size=0;
1609             do
1610             {   d = (*get_next_char)(); *lex_p++ = d;
1611                 if (quoted_size++==MAX_QTEXT_SIZE)
1612                 {   memoryerror("MAX_QTEXT_SIZE", MAX_QTEXT_SIZE);
1613                     break;
1614                 }
1615                 if (d == '\n')
1616                 {   lex_p--;
1617                     while (*(lex_p-1) == ' ') lex_p--;
1618                     if (*(lex_p-1) != '^') *lex_p++ = ' ';
1619                     while ((lookahead != EOF) &&
1620                           (tokeniser_grid[lookahead] == WHITESPACE_CODE))
1621                     (*get_next_char)();
1622                 }
1623                 else if (d == '\\')
1624                 {   int newline_passed = FALSE;
1625                     lex_p--;
1626                     while ((lookahead != EOF) &&
1627                           (tokeniser_grid[lookahead] == WHITESPACE_CODE))
1628                         if ((d = (*get_next_char)()) == '\n')
1629                             newline_passed = TRUE;
1630                     if (!newline_passed)
1631                     {   char chb[4];
1632                         chb[0] = '\"'; chb[1] = lookahead;
1633                         chb[2] = '\"'; chb[3] = 0;
1634                         ebf_error("empty rest of line after '\\' in string",
1635                             chb);
1636                     }
1637                 }
1638             }   while ((d != EOF) && (d!='\"'));
1639             if (d==EOF) ebf_error("'\"'", "end of file");
1640             *(lex_p-1) = 0;
1641             circle[circle_position].type = DQ_TT;
1642             break;
1643
1644         case IDENTIFIER_CODE:    /* Letter or underscore: an identifier */
1645
1646             *lex_p++ = d; n=1;
1647             while ((n<=MAX_IDENTIFIER_LENGTH)
1648                    && ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1649                    || (tokeniser_grid[lookahead] == DIGIT_CODE)))
1650                 n++, *lex_p++ = (*get_next_char)();
1651
1652             *lex_p++ = 0;
1653
1654             if (n > MAX_IDENTIFIER_LENGTH)
1655             {   char bad_length[100];
1656                 sprintf(bad_length,
1657                     "Name exceeds the maximum length of %d characters:",
1658                          MAX_IDENTIFIER_LENGTH);
1659                 error_named(bad_length, circle[circle_position].text);
1660                 /* Trim token so that it doesn't violate
1661                    MAX_IDENTIFIER_LENGTH during error recovery */
1662                 circle[circle_position].text[MAX_IDENTIFIER_LENGTH] = 0;
1663             }
1664
1665             if (dont_enter_into_symbol_table)
1666             {   circle[circle_position].type = DQ_TT;
1667                 circle[circle_position].value = 0;
1668                 if (dont_enter_into_symbol_table == -2)
1669                     interpret_identifier(circle_position, TRUE);
1670                 break;
1671             }
1672
1673             interpret_identifier(circle_position, FALSE);
1674             break;
1675
1676         default:
1677
1678             /*  The character is initial to at least one of the separators  */
1679
1680             for (j=e>>4, k=j+(e&0x0f); j<k; j++)
1681             {   r = (char *) separators[j];
1682                 if (r[1]==0)
1683                 {   *lex_p++=d; *lex_p++=0;
1684                     goto SeparatorMatched;
1685                 }
1686                 else
1687                 if (r[2]==0)
1688                 {   if (*(r+1) == lookahead)
1689                     {   *lex_p++=d;
1690                         *lex_p++=(*get_next_char)();
1691                         *lex_p++=0;
1692                         goto SeparatorMatched;
1693                     }
1694                 }
1695                 else
1696                 {   if ((*(r+1) == lookahead) && (*(r+2) == lookahead2))
1697                     {   *lex_p++=d;
1698                         *lex_p++=(*get_next_char)();
1699                         *lex_p++=(*get_next_char)();
1700                         *lex_p++=0;
1701                         goto SeparatorMatched;
1702                     }
1703                 }
1704             }
1705
1706             /*  The following contingency never in fact arises with the
1707                 current set of separators, but might in future  */
1708
1709             *lex_p++ = d; *lex_p++ = lookahead; *lex_p++ = lookahead2;
1710             *lex_p++ = 0;
1711             error_named("Unrecognised combination in source:", lex_p);
1712             goto StartTokenAgain;
1713
1714             SeparatorMatched:
1715
1716             circle[circle_position].type = SEP_TT;
1717             circle[circle_position].value = j;
1718             switch(j)
1719             {   case SEMICOLON_SEP: break;
1720                 case HASHNDOLLAR_SEP:
1721                 case HASHWDOLLAR_SEP:
1722                     if (tokeniser_grid[lookahead] == WHITESPACE_CODE)
1723                     {   error_named("Character expected after",
1724                             circle[circle_position].text);
1725                         break;
1726                     }
1727                     lex_p--;
1728                     *lex_p++ = (*get_next_char)();
1729                     while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1730                            || (tokeniser_grid[lookahead] == DIGIT_CODE))
1731                         *lex_p++ = (*get_next_char)();
1732                     *lex_p++ = 0;
1733                     break;
1734                 case HASHADOLLAR_SEP:
1735                 case HASHGDOLLAR_SEP:
1736                 case HASHRDOLLAR_SEP:
1737                 case HASHHASH_SEP:
1738                     if (tokeniser_grid[lookahead] != IDENTIFIER_CODE)
1739                     {   error_named("Alphabetic character expected after",
1740                             circle[circle_position].text);
1741                         break;
1742                     }
1743                     lex_p--;
1744                     while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1745                            || (tokeniser_grid[lookahead] == DIGIT_CODE))
1746                         *lex_p++ = (*get_next_char)();
1747                     *lex_p++ = 0;
1748                     break;
1749             }
1750             break;
1751     }
1752
1753     i = circle_position;
1754
1755     ReturnBack:
1756     token_value = circle[i].value;
1757     token_type = circle[i].type;
1758     token_text = circle[i].text;
1759     if (!returning_a_put_back_token)
1760     {   set_token_location(circle[i].location);
1761     }
1762     token_contexts[i] = context;
1763
1764     if (tokens_trace_level > 0)
1765     {   if (tokens_trace_level == 1)
1766             printf("'%s' ", circle[i].text);
1767         else
1768         {   printf("-> "); describe_token(circle[i]);
1769             printf(" ");
1770             if (tokens_trace_level > 2) print_context(token_contexts[i]);
1771             printf("\n");
1772         }
1773     }
1774 }
1775
1776 static char veneer_error_title[64];
1777
1778 extern void restart_lexer(char *lexical_source, char *name)
1779 {   int i;
1780     circle_position = 0;
1781     for (i=0; i<CIRCLE_SIZE; i++)
1782     {   circle[i].type = 0;
1783         circle[i].value = 0;
1784         circle[i].text = "(if this is ever visible, there is a bug)";
1785         token_contexts[i] = 0;
1786     }
1787
1788     lex_p = lexeme_memory;
1789     tokens_put_back = 0;
1790     forerrors_pointer = 0;
1791     dont_enter_into_symbol_table = FALSE;
1792     return_sp_as_variable = FALSE;
1793     next_token_begins_syntax_line = TRUE;
1794
1795     source_to_analyse = lexical_source;
1796
1797     if (source_to_analyse == NULL)
1798     {   get_next_char = get_next_char_from_pipeline;
1799         if (!pipeline_made) create_char_pipeline();
1800         forerrors_buff[0] = 0; forerrors_pointer = 0;
1801     }
1802     else
1803     {   get_next_char = get_next_char_from_string;
1804         source_to_analyse_pointer = 0;
1805         CurrentLB = &StringLB;
1806         sprintf(veneer_error_title, "<veneer routine '%s'>", name);
1807         StringLB.filename = veneer_error_title;
1808
1809         CurrentLB->source_line = 1;
1810         CurrentLB->line_start  = 0;
1811         CurrentLB->chars_read  = 0;
1812     }
1813 }
1814
1815 /* ========================================================================= */
1816 /*   Data structure management routines                                      */
1817 /* ------------------------------------------------------------------------- */
1818
1819 extern void init_lexer_vars(void)
1820 {
1821     blank_brief_location.file_index = -1;
1822     blank_brief_location.line_number = 0;
1823     blank_brief_location.orig_file_index = 0;
1824     blank_brief_location.orig_line_number = 0;
1825 }
1826
1827 extern void lexer_begin_prepass(void)
1828 {   total_source_line_count = 0;
1829     CurrentLB = &NoFileOpen;
1830     report_errors_at_current_line();
1831 }
1832
1833 extern void lexer_begin_pass(void)
1834 {   no_hash_printed_yet = TRUE;
1835     hash_printed_since_newline = FALSE;
1836
1837     pipeline_made = FALSE;
1838
1839     restart_lexer(NULL, NULL);
1840 }
1841
1842 extern void lexer_endpass(void)
1843 {   CurrentLB = &MakingOutput;
1844     report_errors_at_current_line();
1845 }
1846
1847 extern void lexer_allocate_arrays(void)
1848 {   int i;
1849
1850     FileStack = my_malloc(MAX_INCLUSION_DEPTH*sizeof(Sourcefile),
1851         "filestack buffer");
1852
1853     for (i=0; i<MAX_INCLUSION_DEPTH; i++)
1854     FileStack[i].buffer = my_malloc(SOURCE_BUFFER_SIZE+4, "source file buffer");
1855
1856     lexeme_memory = my_malloc(5*MAX_QTEXT_SIZE, "lexeme memory");
1857
1858     keywords_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1859         "keyword hash table");
1860     keywords_hash_ends_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1861         "keyword hash end table");
1862     keywords_data_table = my_calloc(sizeof(int), 3*MAX_KEYWORDS,
1863         "keyword hashing linked list");
1864     local_variable_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1865         "local variable hash table");
1866     local_variable_text_table = my_malloc(
1867         (MAX_LOCAL_VARIABLES-1)*(MAX_IDENTIFIER_LENGTH+1),
1868         "text of local variable names");
1869
1870     local_variable_hash_codes = my_calloc(sizeof(int), MAX_LOCAL_VARIABLES,
1871         "local variable hash codes");
1872     local_variable_texts = my_calloc(sizeof(char *), MAX_LOCAL_VARIABLES,
1873         "local variable text pointers");
1874
1875     make_tokeniser_grid();
1876     make_keywords_tables();
1877
1878     first_token_locations =
1879         my_malloc(sizeof(debug_locations), "debug locations of recent tokens");
1880     first_token_locations->location.file_index = 0;
1881     first_token_locations->location.beginning_byte_index = 0;
1882     first_token_locations->location.end_byte_index = 0;
1883     first_token_locations->location.beginning_line_number = 0;
1884     first_token_locations->location.end_line_number = 0;
1885     first_token_locations->location.beginning_character_number = 0;
1886     first_token_locations->location.end_character_number = 0;
1887     first_token_locations->location.orig_file_index = 0;
1888     first_token_locations->location.orig_beg_line_number = 0;
1889     first_token_locations->location.orig_beg_char_number = 0;
1890     first_token_locations->next = NULL;
1891     first_token_locations->reference_count = 0;
1892     last_token_location = first_token_locations;
1893 }
1894
1895 extern void lexer_free_arrays(void)
1896 {   int i; char *p;
1897
1898     for (i=0; i<MAX_INCLUSION_DEPTH; i++)
1899     {   p = FileStack[i].buffer;
1900         my_free(&p, "source file buffer");
1901     }
1902     my_free(&FileStack, "filestack buffer");
1903     my_free(&lexeme_memory, "lexeme memory");
1904
1905     my_free(&keywords_hash_table, "keyword hash table");
1906     my_free(&keywords_hash_ends_table, "keyword hash end table");
1907     my_free(&keywords_data_table, "keyword hashing linked list");
1908     my_free(&local_variable_hash_table, "local variable hash table");
1909     my_free(&local_variable_text_table, "text of local variable names");
1910
1911     my_free(&local_variable_hash_codes, "local variable hash codes");
1912     my_free(&local_variable_texts, "local variable text pointers");
1913
1914     cleanup_token_locations(NULL);
1915 }
1916
1917 /* ========================================================================= */