src/lexer.c

   1 /* ------------------------------------------------------------------------- */
   2 /*   "lexer" : Lexical analyser                                              */
   3 /*                                                                           */
   4 /*   Part of Inform 6.35                                                     */
   5 /*   copyright (c) Graham Nelson 1993 - 2021                                 */
   6 /*                                                                           */
   7 /* Inform is free software: you can redistribute it and/or modify            */
   8 /* it under the terms of the GNU General Public License as published by      */
   9 /* the Free Software Foundation, either version 3 of the License, or         */
  10 /* (at your option) any later version.                                       */
  11 /*                                                                           */
  12 /* Inform is distributed in the hope that it will be useful,                 */
  13 /* but WITHOUT ANY WARRANTY; without even the implied warranty of            */
  14 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the              */
  15 /* GNU General Public License for more details.                              */
  16 /*                                                                           */
  17 /* You should have received a copy of the GNU General Public License         */
  18 /* along with Inform. If not, see https://gnu.org/licenses/                  *
  19 /*                                                                           */
  20 /* ------------------------------------------------------------------------- */
  21
  22 #include "header.h"
  23
  24 int total_source_line_count,            /* Number of source lines so far     */
  25
  26     no_hash_printed_yet,                /* Have not yet printed the first #  */
  27     hash_printed_since_newline,         /* A hash has been printed since the
  28                                            most recent new-line was printed
  29                                            (generally as a result of an error
  30                                            message or the start of pass)     */
  31     dont_enter_into_symbol_table,       /* Return names as text (with
  32                                            token type DQ_TT, i.e., as if
  33                                            they had double-quotes around)
  34                                            and not as entries in the symbol
  35                                            table, when TRUE. If -2, only the
  36                                            keyword table is searched.        */
  37     return_sp_as_variable;              /* When TRUE, the word "sp" denotes
  38                                            the stack pointer variable
  39                                            (used in assembly language only)  */
  40 int next_token_begins_syntax_line;      /* When TRUE, start a new syntax
  41                                            line (for error reporting, etc.)
  42                                            on the source code line where
  43                                            the next token appears            */
  44
  45 int32 last_mapped_line;  /* Last syntax line reported to debugging file      */
  46
  47 /* ------------------------------------------------------------------------- */
  48 /*   The lexer's output is a sequence of triples, each called a "token",     */
  49 /*   representing one lexical unit (or "lexeme") each.  Instead of providing */
  50 /*   "lookahead" (that is, always having available the next token after the  */
  51 /*   current one, so that syntax analysers higher up in Inform can have      */
  52 /*   advance knowledge of what is coming), the lexer instead has a system    */
  53 /*   where tokens can be read in and then "put back again".                  */
  54 /*   The meaning of the number (and to some extent the text) supplied with   */
  55 /*   a token depends on its type: see "header.h" for the list of types.      */
  56 /*   For example, the lexeme "$1e3" is understood by Inform as a hexadecimal */
  57 /*   number, and translated to the token:                                    */
  58 /*     type NUMBER_TT, value 483, text "$1e3"                                */
  59 /* ------------------------------------------------------------------------- */
  60 /*   These three variables are set to the current token on a call to         */
  61 /*   get_next_token() (but are not changed by a call to put_token_back()).   */
  62 /* ------------------------------------------------------------------------- */
  63
  64 int token_type;
  65 int32 token_value;
  66 char *token_text;
  67
  68 /* ------------------------------------------------------------------------- */
  69 /*   The next two variables are the head and tail of a singly linked list.   */
  70 /*   The tail stores the portion most recently read from the current         */
  71 /*   lexical block; its end values therefore describe the location of the    */
  72 /*   current token, and are updated whenever the three variables above are   */
  73 /*   via set_token_location(...).  Earlier vertices, if any, represent the   */
  74 /*   regions of lexical blocks read beforehand, where new vertices are       */
  75 /*   only introduced by interruptions like a file inclusion or an EOF.       */
  76 /*   Vertices are deleted off of the front of the list once they are no      */
  77 /*   longer referenced by pending debug information records.                 */
  78 /* ------------------------------------------------------------------------- */
  79
  80 static debug_locations *first_token_locations;
  81 static debug_locations *last_token_location;
  82
  83 extern debug_location get_token_location(void)
  84 {   debug_location result;
  85     debug_location *location = &(last_token_location->location);
  86     result.file_index = location->file_index;
  87     result.beginning_byte_index = location->end_byte_index;
  88     result.end_byte_index = location->end_byte_index;
  89     result.beginning_line_number = location->end_line_number;
  90     result.end_line_number = location->end_line_number;
  91     result.beginning_character_number = location->end_character_number;
  92     result.end_character_number = location->end_character_number;
  93     result.orig_file_index = location->orig_file_index;
  94     result.orig_beg_line_number = location->orig_beg_line_number;
  95     result.orig_beg_char_number = location->orig_beg_char_number;
  96     return result;
  97 }
  98
  99 extern debug_locations get_token_locations(void)
 100 {   debug_locations result;
 101     result.location = get_token_location();
 102     result.next = NULL;
 103     result.reference_count = 0;
 104     return result;
 105 }
 106
 107 static void set_token_location(debug_location location)
 108 {   if (location.file_index == last_token_location->location.file_index)
 109     {   last_token_location->location.end_byte_index =
 110             location.end_byte_index;
 111         last_token_location->location.end_line_number =
 112             location.end_line_number;
 113         last_token_location->location.end_character_number =
 114             location.end_character_number;
 115         last_token_location->location.orig_file_index =
 116             location.orig_file_index;
 117         last_token_location->location.orig_beg_line_number =
 118             location.orig_beg_line_number;
 119         last_token_location->location.orig_beg_char_number =
 120             location.orig_beg_char_number;
 121     } else
 122     {   debug_locations*successor =
 123             my_malloc
 124                 (sizeof(debug_locations),
 125                  "debug locations of recent tokens");
 126         successor->location = location;
 127         successor->next = NULL;
 128         successor->reference_count = 0;
 129         last_token_location->next = successor;
 130         last_token_location = successor;
 131     }
 132 }
 133
 134 extern debug_location_beginning get_token_location_beginning(void)
 135 {   debug_location_beginning result;
 136     ++(last_token_location->reference_count);
 137     result.head = last_token_location;
 138     result.beginning_byte_index =
 139         last_token_location->location.end_byte_index;
 140     result.beginning_line_number =
 141         last_token_location->location.end_line_number;
 142     result.beginning_character_number =
 143         last_token_location->location.end_character_number;
 144     result.orig_file_index = last_token_location->location.orig_file_index;
 145     result.orig_beg_line_number = last_token_location->location.orig_beg_line_number;
 146     result.orig_beg_char_number = last_token_location->location.orig_beg_char_number;
 147
 148     return result;
 149 }
 150
 151 static void cleanup_token_locations(debug_location_beginning*beginning)
 152 {   if (first_token_locations)
 153     {   while (first_token_locations &&
 154                !first_token_locations->reference_count)
 155         {   debug_locations*moribund = first_token_locations;
 156             first_token_locations = moribund->next;
 157             my_free(&moribund, "debug locations of recent tokens");
 158             if (beginning &&
 159                 (beginning->head == moribund || !first_token_locations))
 160             {   compiler_error
 161                     ("Records needed by a debug_location_beginning are no "
 162                      "longer allocated, perhaps because of an invalid reuse "
 163                      "of this or an earlier beginning");
 164             }
 165         }
 166     } else
 167     {   if (beginning)
 168         {   compiler_error
 169                 ("Attempt to use a debug_location_beginning when no token "
 170                  "locations are defined");
 171         } else
 172         {   compiler_error
 173                 ("Attempt to clean up token locations when no token locations "
 174                  "are defined");
 175         }
 176     }
 177 }
 178
 179 extern void discard_token_location(debug_location_beginning beginning)
 180 {   --(beginning.head->reference_count);
 181 }
 182
 183 extern debug_locations get_token_location_end
 184     (debug_location_beginning beginning)
 185 {   debug_locations result;
 186     cleanup_token_locations(&beginning);
 187     --(beginning.head->reference_count);
 188     /* Sometimes we know what we'll read before we switch to the lexical block
 189        where we'll read it.  In such cases the beginning will be placed in the
 190        prior block and last exactly zero bytes there.  It's misleading to
 191        include such ranges, so we gobble them. */
 192     if (beginning.head->location.end_byte_index ==
 193           beginning.beginning_byte_index &&
 194         beginning.head->next)
 195     {   beginning.head = beginning.head->next;
 196         result.location = beginning.head->location;
 197         result.location.beginning_byte_index = 0;
 198         result.location.beginning_line_number = 1;
 199         result.location.beginning_character_number = 1;
 200     } else
 201     {   result.location = beginning.head->location;
 202         result.location.beginning_byte_index =
 203             beginning.beginning_byte_index;
 204         result.location.beginning_line_number =
 205             beginning.beginning_line_number;
 206         result.location.beginning_character_number =
 207             beginning.beginning_character_number;
 208     }
 209
 210     result.location.orig_file_index =
 211         beginning.orig_file_index;
 212     result.location.orig_beg_line_number =
 213         beginning.orig_beg_line_number;
 214     result.location.orig_beg_char_number =
 215         beginning.orig_beg_char_number;
 216
 217     result.next = beginning.head->next;
 218     result.reference_count = 0;
 219     return result;
 220 }
 221
 222 /* ------------------------------------------------------------------------- */
 223 /*   In order to be able to put tokens back efficiently, the lexer stores    */
 224 /*   tokens in a "circle": the variable circle_position ranges between       */
 225 /*   0 and CIRCLE_SIZE-1.  We only need a circle size as large as the        */
 226 /*   maximum number of tokens ever put back at once, plus 1 (in effect, the  */
 227 /*   maximum token lookahead ever needed in syntax analysis, plus 1).        */
 228 /*                                                                           */
 229 /*   Unlike some compilers, Inform does not have a context-free lexer: in    */
 230 /*   fact it has 12288 different possible states.  However, the context only */
 231 /*   affects the interpretation of "identifiers": lexemes beginning with a   */
 232 /*   letter and containing up to 32 chars of alphanumeric and underscore     */
 233 /*   chars.  (For example, "default" may refer to the directive or statement */
 234 /*   of that name, and which token values are returned depends on the        */
 235 /*   current lexical context.)                                               */
 236 /*                                                                           */
 237 /*   Along with each token, we also store the lexical context it was         */
 238 /*   translated under; because if it is called for again, there may need     */
 239 /*   to be a fresh interpretation of it if the context has changed.          */
 240 /* ------------------------------------------------------------------------- */
 241
 242 #define CIRCLE_SIZE 6
 243
 244 /*   (The worst case for token lookahead is distinguishing between an
 245      old-style "objectloop (a in b)" and a new "objectloop (a in b ...)".)   */
 246
 247 static int circle_position;
 248 static token_data circle[CIRCLE_SIZE];
 249
 250 static int token_contexts[CIRCLE_SIZE];
 251
 252 /* ------------------------------------------------------------------------- */
 253 /*   A complication, however, is that the text of some lexemes needs to be   */
 254 /*   held in Inform's memory for much longer periods: for example, a         */
 255 /*   dictionary word lexeme (like "'south'") must have its text preserved    */
 256 /*   until the code generation time for the expression it occurs in, when    */
 257 /*   the dictionary reference is actually made.  Code generation in general  */
 258 /*   occurs as early as possible in Inform: pending some better method of    */
 259 /*   garbage collection, we simply use a buffer so large that unless         */
 260 /*   expressions spread across 10K of source code are found, there can be    */
 261 /*   no problem.                                                             */
 262 /* ------------------------------------------------------------------------- */
 263
 264 static char *lexeme_memory;
 265 static char *lex_p;                     /* Current write position            */
 266
 267 /* ------------------------------------------------------------------------- */
 268 /*   The lexer itself needs up to 3 characters of lookahead (it uses an      */
 269 /*   LR(3) grammar to translate characters into tokens).                     */
 270 /* ------------------------------------------------------------------------- */
 271
 272 #define LOOKAHEAD_SIZE 3
 273
 274 static int current, lookahead,          /* The latest character read, and    */
 275     lookahead2, lookahead3;             /* the three characters following it */
 276
 277 static int pipeline_made;               /* Whether or not the pipeline of
 278                                            characters has been constructed
 279                                            yet (this pass)                   */
 280
 281 static int (* get_next_char)(void);     /* Routine for reading the stream of
 282                                            characters: the lexer does not
 283                                            need any "ungetc" routine for
 284                                            putting them back again.  End of
 285                                            stream is signalled by returning
 286                                            zero.                             */
 287
 288 static char *source_to_analyse;         /* The current lexical source:
 289                                            NULL for "load from source files",
 290                                            otherwise this points to a string
 291                                            containing Inform code            */
 292
 293 static int tokens_put_back;             /* Count of the number of backward
 294                                            moves made from the last-read
 295                                            token                             */
 296
 297 extern void describe_token(token_data t)
 298 {
 299     /*  Many of the token types are not set in this file, but later on in
 300         Inform's higher stages (for example, in the expression evaluator);
 301         but this routine describes them all.                                 */
 302
 303     printf("{ ");
 304
 305     switch(t.type)
 306     {
 307         /*  The following token types occur in lexer output:                 */
 308
 309         case SYMBOL_TT:          printf("symbol ");
 310                                  describe_symbol(t.value);
 311                                  break;
 312         case NUMBER_TT:          printf("literal number %d", t.value);
 313                                  break;
 314         case DQ_TT:              printf("string \"%s\"", t.text);
 315                                  break;
 316         case SQ_TT:              printf("string '%s'", t.text);
 317                                  break;
 318         case SEP_TT:             printf("separator '%s'", t.text);
 319                                  break;
 320         case EOF_TT:             printf("end of file");
 321                                  break;
 322
 323         case STATEMENT_TT:       printf("statement name '%s'", t.text);
 324                                  break;
 325         case SEGMENT_MARKER_TT:  printf("object segment marker '%s'", t.text);
 326                                  break;
 327         case DIRECTIVE_TT:       printf("directive name '%s'", t.text);
 328                                  break;
 329         case CND_TT:             printf("textual conditional '%s'", t.text);
 330                                  break;
 331         case OPCODE_NAME_TT:     printf("opcode name '%s'", t.text);
 332                                  break;
 333         case SYSFUN_TT:          printf("built-in function name '%s'", t.text);
 334                                  break;
 335         case LOCAL_VARIABLE_TT:  printf("local variable name '%s'", t.text);
 336                                  break;
 337         case MISC_KEYWORD_TT:    printf("statement keyword '%s'", t.text);
 338                                  break;
 339         case DIR_KEYWORD_TT:     printf("directive keyword '%s'", t.text);
 340                                  break;
 341         case TRACE_KEYWORD_TT:   printf("'trace' keyword '%s'", t.text);
 342                                  break;
 343         case SYSTEM_CONSTANT_TT: printf("system constant name '%s'", t.text);
 344                                  break;
 345
 346         /*  The remaining are etoken types, not set by the lexer             */
 347
 348         case OP_TT:              printf("operator '%s'",
 349                                      operators[t.value].description);
 350                                  break;
 351         case ENDEXP_TT:          printf("end of expression");
 352                                  break;
 353         case SUBOPEN_TT:         printf("open bracket");
 354                                  break;
 355         case SUBCLOSE_TT:        printf("close bracket");
 356                                  break;
 357         case LARGE_NUMBER_TT:    printf("large number: '%s'=%d",t.text,t.value);
 358                                  break;
 359         case SMALL_NUMBER_TT:    printf("small number: '%s'=%d",t.text,t.value);
 360                                  break;
 361         case VARIABLE_TT:        printf("variable '%s'=%d", t.text, t.value);
 362                                  break;
 363         case DICTWORD_TT:        printf("dictionary word '%s'", t.text);
 364                                  break;
 365         case ACTION_TT:          printf("action name '%s'", t.text);
 366                                  break;
 367
 368         default:
 369             printf("** unknown token type %d, text='%s', value=%d **",
 370             t.type, t.text, t.value);
 371     }
 372     printf(" }");
 373 }
 374
 375 /* ------------------------------------------------------------------------- */
 376 /*   All but one of the 280 Inform keywords (118 of them opcode names used   */
 377 /*   only by the assembler).  (The one left over is "sp", a keyword used in  */
 378 /*   assembly language only.)                                                */
 379 /*                                                                           */
 380 /*   A "keyword group" is a set of keywords to be searched for.  If a match  */
 381 /*   is made on an identifier, the token type becomes that given in the KG   */
 382 /*   and the token value is its index in the KG.                             */
 383 /*                                                                           */
 384 /*   The keyword ordering must correspond with the appropriate #define's in  */
 385 /*   "header.h" but is otherwise not significant.                            */
 386 /* ------------------------------------------------------------------------- */
 387
 388 #define MAX_KEYWORDS 350
 389
 390 /* The values will be filled in at compile time, when we know
 391    which opcode set to use. */
 392 keyword_group opcode_names =
 393 { { "" },
 394     OPCODE_NAME_TT, FALSE, TRUE
 395 };
 396
 397 static char *opcode_list_z[] = {
 398     "je", "jl", "jg", "dec_chk", "inc_chk", "jin", "test", "or", "and",
 399     "test_attr", "set_attr", "clear_attr", "store", "insert_obj", "loadw",
 400     "loadb", "get_prop", "get_prop_addr", "get_next_prop", "add", "sub",
 401     "mul", "div", "mod", "call", "storew", "storeb", "put_prop", "sread",
 402     "print_char", "print_num", "random", "push", "pull", "split_window",
 403     "set_window", "output_stream", "input_stream", "sound_effect", "jz",
 404     "get_sibling", "get_child", "get_parent", "get_prop_len", "inc", "dec",
 405     "print_addr", "remove_obj", "print_obj", "ret", "jump", "print_paddr",
 406     "load", "not", "rtrue", "rfalse", "print", "print_ret", "nop", "save",
 407     "restore", "restart", "ret_popped", "pop", "quit", "new_line",
 408     "show_status", "verify", "call_2s", "call_vs", "aread", "call_vs2",
 409     "erase_window", "erase_line", "set_cursor", "get_cursor",
 410     "set_text_style", "buffer_mode", "read_char", "scan_table", "call_1s",
 411     "call_2n", "set_colour", "throw", "call_vn", "call_vn2", "tokenise",
 412     "encode_text", "copy_table", "print_table", "check_arg_count", "call_1n",
 413     "catch", "piracy", "log_shift", "art_shift", "set_font", "save_undo",
 414     "restore_undo", "draw_picture", "picture_data", "erase_picture",
 415     "set_margins", "move_window", "window_size", "window_style",
 416     "get_wind_prop", "scroll_window", "pop_stack", "read_mouse",
 417     "mouse_window", "push_stack", "put_wind_prop", "print_form",
 418     "make_menu", "picture_table", "print_unicode", "check_unicode",
 419     ""
 420 };
 421
 422 static char *opcode_list_g[] = {
 423     "nop", "add", "sub", "mul", "div", "mod", "neg", "bitand", "bitor",
 424     "bitxor", "bitnot", "shiftl", "sshiftr", "ushiftr", "jump", "jz",
 425     "jnz", "jeq", "jne", "jlt", "jge", "jgt", "jle",
 426     "jltu", "jgeu", "jgtu", "jleu",
 427     "call", "return",
 428     "catch", "throw", "tailcall",
 429     "copy", "copys", "copyb", "sexs", "sexb", "aload",
 430     "aloads", "aloadb", "aloadbit", "astore", "astores", "astoreb",
 431     "astorebit", "stkcount", "stkpeek", "stkswap", "stkroll", "stkcopy",
 432     "streamchar", "streamnum", "streamstr",
 433     "gestalt", "debugtrap", "getmemsize", "setmemsize", "jumpabs",
 434     "random", "setrandom", "quit", "verify",
 435     "restart", "save", "restore", "saveundo", "restoreundo", "protect",
 436     "glk", "getstringtbl", "setstringtbl", "getiosys", "setiosys",
 437     "linearsearch", "binarysearch", "linkedsearch",
 438     "callf", "callfi", "callfii", "callfiii",
 439     "streamunichar",
 440     "mzero", "mcopy", "malloc", "mfree",
 441     "accelfunc", "accelparam",
 442     "numtof", "ftonumz", "ftonumn", "ceil", "floor",
 443     "fadd", "fsub", "fmul", "fdiv", "fmod",
 444     "sqrt", "exp", "log", "pow",
 445     "sin", "cos", "tan", "asin", "acos", "atan", "atan2",
 446     "jfeq", "jfne", "jflt", "jfle", "jfgt", "jfge", "jisnan", "jisinf",
 447     ""
 448 };
 449
 450 keyword_group opcode_macros =
 451 { { "" },
 452   OPCODE_MACRO_TT, FALSE, TRUE
 453 };
 454
 455 static char *opmacro_list_z[] = { "" };
 456
 457 static char *opmacro_list_g[] = {
 458     "pull", "push",
 459     ""
 460 };
 461
 462 keyword_group directives =
 463 { { "abbreviate", "array", "attribute", "class", "constant",
 464     "default", "dictionary", "end", "endif", "extend", "fake_action",
 465     "global", "ifdef", "ifndef", "ifnot", "ifv3", "ifv5", "iftrue",
 466     "iffalse", "import", "include", "link", "lowstring", "message",
 467     "nearby", "object", "origsource", "property", "release", "replace",
 468     "serial", "switches", "statusline", "stub", "system_file", "trace",
 469     "undef", "verb", "version", "zcharacter",
 470     "" },
 471     DIRECTIVE_TT, FALSE, FALSE
 472 };
 473
 474 keyword_group trace_keywords =
 475 { { "dictionary", "symbols", "objects", "verbs",
 476     "assembly", "expressions", "lines", "tokens", "linker",
 477     "on", "off", "" },
 478     TRACE_KEYWORD_TT, FALSE, TRUE
 479 };
 480
 481 keyword_group segment_markers =
 482 { { "class", "has", "private", "with", "" },
 483     SEGMENT_MARKER_TT, FALSE, TRUE
 484 };
 485
 486 keyword_group directive_keywords =
 487 { { "alias", "long", "additive",
 488     "score", "time",
 489     "noun", "held", "multi", "multiheld", "multiexcept",
 490     "multiinside", "creature", "special", "number", "scope", "topic",
 491     "reverse", "meta", "only", "replace", "first", "last",
 492     "string", "table", "buffer", "data", "initial", "initstr",
 493     "with", "private", "has", "class",
 494     "error", "fatalerror", "warning",
 495     "terminating", "static",
 496     "" },
 497     DIR_KEYWORD_TT, FALSE, TRUE
 498 };
 499
 500 keyword_group misc_keywords =
 501 { { "char", "name", "the", "a", "an", "The", "number",
 502     "roman", "reverse", "bold", "underline", "fixed", "on", "off",
 503     "to", "address", "string", "object", "near", "from", "property", "A", "" },
 504     MISC_KEYWORD_TT, FALSE, TRUE
 505 };
 506
 507 keyword_group statements =
 508 { { "box", "break", "continue", "default", "do", "else", "font", "for",
 509     "give", "if", "inversion", "jump", "move", "new_line", "objectloop",
 510     "print", "print_ret", "quit", "read", "remove", "restore", "return",
 511     "rfalse", "rtrue", "save", "spaces", "string", "style", "switch",
 512     "until", "while", "" },
 513     STATEMENT_TT, FALSE, TRUE
 514 };
 515
 516 keyword_group conditions =
 517 { { "has", "hasnt", "in", "notin", "ofclass", "or", "provides", "" },
 518     CND_TT, FALSE, TRUE
 519 };
 520
 521 keyword_group system_functions =
 522 { { "child", "children", "elder", "eldest", "indirect", "parent", "random",
 523     "sibling", "younger", "youngest", "metaclass", "glk", "" },
 524     SYSFUN_TT, FALSE, TRUE
 525 };
 526
 527 keyword_group system_constants =
 528 { { "adjectives_table", "actions_table", "classes_table",
 529     "identifiers_table", "preactions_table", "version_number",
 530     "largest_object", "strings_offset", "code_offset",
 531     "dict_par1", "dict_par2", "dict_par3", "actual_largest_object",
 532     "static_memory_offset", "array_names_offset", "readable_memory_offset",
 533     "cpv__start", "cpv__end", "ipv__start", "ipv__end",
 534     "array__start", "array__end",
 535     "lowest_attribute_number", "highest_attribute_number",
 536     "attribute_names_array",
 537     "lowest_property_number", "highest_property_number",
 538     "property_names_array",
 539     "lowest_action_number", "highest_action_number",
 540     "action_names_array",
 541     "lowest_fake_action_number", "highest_fake_action_number",
 542     "fake_action_names_array",
 543     "lowest_routine_number", "highest_routine_number", "routines_array",
 544     "routine_names_array", "routine_flags_array",
 545     "lowest_global_number", "highest_global_number", "globals_array",
 546     "global_names_array", "global_flags_array",
 547     "lowest_array_number", "highest_array_number", "arrays_array",
 548     "array_names_array", "array_flags_array",
 549     "lowest_constant_number", "highest_constant_number", "constants_array",
 550     "constant_names_array",
 551     "lowest_class_number", "highest_class_number", "class_objects_array",
 552     "lowest_object_number", "highest_object_number",
 553     "oddeven_packing",
 554     "grammar_table", "dictionary_table", "dynam_string_table",
 555     "" },
 556     SYSTEM_CONSTANT_TT, FALSE, TRUE
 557 };
 558
 559 keyword_group *keyword_groups[12]
 560 = { NULL, &opcode_names, &directives, &trace_keywords, &segment_markers,
 561     &directive_keywords, &misc_keywords, &statements, &conditions,
 562     &system_functions, &system_constants, &opcode_macros};
 563
 564 keyword_group local_variables =
 565 { { "" },                                 /* Filled in when routine declared */
 566     LOCAL_VARIABLE_TT, FALSE, FALSE
 567 };
 568
 569 static int lexical_context(void)
 570 {
 571     /*  The lexical context is a number representing all of the context
 572         information in the lexical analyser: the same input text will
 573         always translate to the same output tokens whenever the context
 574         is the same.
 575
 576         In fact, for efficiency reasons this number omits the bit of
 577         information held in the variable "dont_enter_into_symbol_table".
 578         Inform never needs to backtrack through tokens parsed in that
 579         way (thankfully, as it would be expensive indeed to check
 580         the tokens).                                                         */
 581
 582     int c = 0;
 583     if (opcode_names.enabled)         c |= 1;
 584     if (directives.enabled)           c |= 2;
 585     if (trace_keywords.enabled)       c |= 4;
 586     if (segment_markers.enabled)      c |= 8;
 587     if (directive_keywords.enabled)   c |= 16;
 588     if (misc_keywords.enabled)        c |= 32;
 589     if (statements.enabled)           c |= 64;
 590     if (conditions.enabled)           c |= 128;
 591     if (system_functions.enabled)     c |= 256;
 592     if (system_constants.enabled)     c |= 512;
 593     if (local_variables.enabled)      c |= 1024;
 594
 595     if (return_sp_as_variable)        c |= 2048;
 596     return(c);
 597 }
 598
 599 static void print_context(int c)
 600 {
 601     if ((c & 1) != 0) printf("OPC ");
 602     if ((c & 2) != 0) printf("DIR ");
 603     if ((c & 4) != 0) printf("TK ");
 604     if ((c & 8) != 0) printf("SEG ");
 605     if ((c & 16) != 0) printf("DK ");
 606     if ((c & 32) != 0) printf("MK ");
 607     if ((c & 64) != 0) printf("STA ");
 608     if ((c & 128) != 0) printf("CND ");
 609     if ((c & 256) != 0) printf("SFUN ");
 610     if ((c & 512) != 0) printf("SCON ");
 611     if ((c & 1024) != 0) printf("LV ");
 612     if ((c & 2048) != 0) printf("sp ");
 613 }
 614
 615 static int *keywords_hash_table;
 616 static int *keywords_hash_ends_table;
 617 static int *keywords_data_table;
 618
 619 static int *local_variable_hash_table;
 620 static int *local_variable_hash_codes;
 621 char **local_variable_texts;
 622 static char *local_variable_text_table;
 623
 624 static char one_letter_locals[128];
 625
 626 static void make_keywords_tables(void)
 627 {   int i, j, h, tp=0;
 628     char **oplist, **maclist;
 629
 630     if (!glulx_mode) {
 631         oplist = opcode_list_z;
 632         maclist = opmacro_list_z;
 633     }
 634     else {
 635         oplist = opcode_list_g;
 636         maclist = opmacro_list_g;
 637     }
 638
 639     for (j=0; *(oplist[j]); j++) {
 640         opcode_names.keywords[j] = oplist[j];
 641     }
 642     opcode_names.keywords[j] = "";
 643
 644     for (j=0; *(maclist[j]); j++) {
 645         opcode_macros.keywords[j] = maclist[j];
 646     }
 647     opcode_macros.keywords[j] = "";
 648
 649     for (i=0; i<HASH_TAB_SIZE; i++)
 650     {   keywords_hash_table[i] = -1;
 651         keywords_hash_ends_table[i] = -1;
 652     }
 653
 654     for (i=1; i<=11; i++)
 655     {   keyword_group *kg = keyword_groups[i];
 656         for (j=0; *(kg->keywords[j]) != 0; j++)
 657         {   h = hash_code_from_string(kg->keywords[j]);
 658             if (keywords_hash_table[h] == -1)
 659                 keywords_hash_table[h] = tp;
 660             else
 661               *(keywords_data_table + 3*(keywords_hash_ends_table[h]) + 2) = tp;
 662             keywords_hash_ends_table[h] = tp;
 663             *(keywords_data_table + 3*tp) = i;
 664             *(keywords_data_table + 3*tp+1) = j;
 665             *(keywords_data_table + 3*tp+2) = -1;
 666             tp++;
 667         }
 668     }
 669 }
 670
 671 extern void construct_local_variable_tables(void)
 672 {   int i, h; char *p = local_variable_text_table;
 673     for (i=0; i<HASH_TAB_SIZE; i++) local_variable_hash_table[i] = -1;
 674     for (i=0; i<128; i++) one_letter_locals[i] = MAX_LOCAL_VARIABLES;
 675
 676     for (i=0; i<no_locals; i++)
 677     {   char *q = local_variables.keywords[i];
 678         if (q[1] == 0)
 679         {   one_letter_locals[(uchar)q[0]] = i;
 680             if (isupper(q[0])) one_letter_locals[tolower(q[0])] = i;
 681             if (islower(q[0])) one_letter_locals[toupper(q[0])] = i;
 682         }
 683         h = hash_code_from_string(q);
 684         if (local_variable_hash_table[h] == -1)
 685             local_variable_hash_table[h] = i;
 686         local_variable_hash_codes[i] = h;
 687         local_variable_texts[i] = p;
 688         strcpy(p, q);
 689         p += strlen(p)+1;
 690     }
 691     for (;i<MAX_LOCAL_VARIABLES-1;i++)
 692       local_variable_texts[i] = "<no such local variable>";
 693 }
 694
 695 static void interpret_identifier(int pos, int dirs_only_flag)
 696 {   int index, hashcode; char *p = circle[pos].text;
 697
 698     /*  An identifier is either a keyword or a "symbol", a name which the
 699         lexical analyser leaves to higher levels of Inform to understand.    */
 700
 701     hashcode = hash_code_from_string(p);
 702
 703     if (dirs_only_flag) goto KeywordSearch;
 704
 705     /*  If this is assembly language, perhaps it is "sp"?                    */
 706
 707     if (return_sp_as_variable && (p[0]=='s') && (p[1]=='p') && (p[2]==0))
 708     {   circle[pos].value = 0; circle[pos].type = LOCAL_VARIABLE_TT;
 709         return;
 710     }
 711
 712     /*  Test for local variables first, quite quickly.                       */
 713
 714     if (local_variables.enabled)
 715     {   if (p[1]==0)
 716         {   index = one_letter_locals[(uchar)p[0]];
 717             if (index<MAX_LOCAL_VARIABLES)
 718             {   circle[pos].type = LOCAL_VARIABLE_TT;
 719                 circle[pos].value = index+1;
 720                 return;
 721             }
 722         }
 723         index = local_variable_hash_table[hashcode];
 724         if (index >= 0)
 725         {   for (;index<no_locals;index++)
 726             {   if (hashcode == local_variable_hash_codes[index])
 727                 {   if (strcmpcis(p, local_variable_texts[index])==0)
 728                     {   circle[pos].type = LOCAL_VARIABLE_TT;
 729                         circle[pos].value = index+1;
 730                         return;
 731                     }
 732                 }
 733             }
 734         }
 735     }
 736
 737     /*  Now the bulk of the keywords.  Note that the lexer doesn't recognise
 738         the name of a system function which has been Replaced.               */
 739
 740     KeywordSearch:
 741     index = keywords_hash_table[hashcode];
 742     while (index >= 0)
 743     {   int *i = keywords_data_table + 3*index;
 744         keyword_group *kg = keyword_groups[*i];
 745         if (((!dirs_only_flag) && (kg->enabled))
 746             || (dirs_only_flag && (kg == &directives)))
 747         {   char *q = kg->keywords[*(i+1)];
 748             if (((kg->case_sensitive) && (strcmp(p, q)==0))
 749                 || ((!(kg->case_sensitive)) && (strcmpcis(p, q)==0)))
 750             {   if ((kg != &system_functions)
 751                     || (system_function_usage[*(i+1)]!=2))
 752                 {   circle[pos].type = kg->change_token_type;
 753                     circle[pos].value = *(i+1);
 754                     return;
 755                 }
 756             }
 757         }
 758         index = *(i+2);
 759     }
 760
 761     if (dirs_only_flag) return;
 762
 763     /*  Search for the name; create it if necessary.                         */
 764
 765     circle[pos].value = symbol_index(p, hashcode);
 766     circle[pos].type = SYMBOL_TT;
 767 }
 768
 769
 770 /* ------------------------------------------------------------------------- */
 771 /*   The tokeniser grid aids a rapid decision about the consequences of a    */
 772 /*   character reached in the buffer.  In effect it is an efficiently stored */
 773 /*   transition table using an algorithm similar to that of S. C. Johnson's  */
 774 /*   "yacc" lexical analyser (see Aho, Sethi and Ullman, section 3.9).       */
 775 /*   My thanks to Dilip Sequeira for suggesting this.                        */
 776 /*                                                                           */
 777 /*       tokeniser_grid[c]   is (16*n + m) if c is the first character of    */
 778 /*                               separator numbers n, n+1, ..., n+m-1        */
 779 /*                           or certain special values (QUOTE_CODE, etc)     */
 780 /*                           or 0 otherwise                                  */
 781 /*                                                                           */
 782 /*   Since 1000/16 = 62, the code numbers below will need increasing if the  */
 783 /*   number of separators supported exceeds 61.                              */
 784 /* ------------------------------------------------------------------------- */
 785
 786 static int tokeniser_grid[256];
 787
 788 #define QUOTE_CODE      1000
 789 #define DQUOTE_CODE     1001
 790 #define NULL_CODE       1002
 791 #define SPACE_CODE      1003
 792 #define NEGATIVE_CODE   1004
 793 #define DIGIT_CODE      1005
 794 #define RADIX_CODE      1006
 795 #define KEYWORD_CODE    1007
 796 #define EOF_CODE        1008
 797 #define WHITESPACE_CODE 1009
 798 #define COMMENT_CODE    1010
 799 #define IDENTIFIER_CODE 1011
 800
 801 /*  This list cannot safely be changed without also changing the header
 802     separator #defines.  The ordering is significant in that (i) all entries
 803     beginning with the same character must be adjacent and (ii) that if
 804     X is a an initial substring of Y then X must come before Y.
 805
 806     E.g. --> must occur before -- to prevent "-->0" being tokenised
 807     wrongly as "--", ">", "0" rather than "-->", "0".                        */
 808
 809 static const char separators[NUMBER_SEPARATORS][4] =
 810 {   "->", "-->", "--", "-", "++", "+", "*", "/", "%",
 811     "||", "|", "&&", "&", "~~",
 812     "~=", "~", "==", "=", ">=", ">",
 813     "<=", "<", "(", ")", ",",
 814     ".&", ".#", "..&", "..#", "..", ".",
 815     "::", ":", "@", ";", "[", "]", "{", "}",
 816     "$", "?~", "?",
 817     "#a$", "#g$", "#n$", "#r$", "#w$", "##", "#"
 818 };
 819
 820 static void make_tokeniser_grid(void)
 821 {
 822     /*  Construct the grid to the specification above.                       */
 823
 824     int i, j;
 825
 826     for (i=0; i<256; i++) tokeniser_grid[i]=0;
 827
 828     for (i=0; i<NUMBER_SEPARATORS; i++)
 829     {   j=separators[i][0];
 830         if (tokeniser_grid[j]==0)
 831             tokeniser_grid[j]=i*16+1; else tokeniser_grid[j]++;
 832     }
 833     tokeniser_grid['\''] = QUOTE_CODE;
 834     tokeniser_grid['\"'] = DQUOTE_CODE;
 835     tokeniser_grid[0]    = EOF_CODE;
 836     tokeniser_grid[' ']  = WHITESPACE_CODE;
 837     tokeniser_grid['\n'] = WHITESPACE_CODE;
 838     tokeniser_grid['$']  = RADIX_CODE;
 839     tokeniser_grid['!']  = COMMENT_CODE;
 840
 841     tokeniser_grid['0']  = DIGIT_CODE;
 842     tokeniser_grid['1']  = DIGIT_CODE;
 843     tokeniser_grid['2']  = DIGIT_CODE;
 844     tokeniser_grid['3']  = DIGIT_CODE;
 845     tokeniser_grid['4']  = DIGIT_CODE;
 846     tokeniser_grid['5']  = DIGIT_CODE;
 847     tokeniser_grid['6']  = DIGIT_CODE;
 848     tokeniser_grid['7']  = DIGIT_CODE;
 849     tokeniser_grid['8']  = DIGIT_CODE;
 850     tokeniser_grid['9']  = DIGIT_CODE;
 851
 852     tokeniser_grid['a']  = IDENTIFIER_CODE;
 853     tokeniser_grid['b']  = IDENTIFIER_CODE;
 854     tokeniser_grid['c']  = IDENTIFIER_CODE;
 855     tokeniser_grid['d']  = IDENTIFIER_CODE;
 856     tokeniser_grid['e']  = IDENTIFIER_CODE;
 857     tokeniser_grid['f']  = IDENTIFIER_CODE;
 858     tokeniser_grid['g']  = IDENTIFIER_CODE;
 859     tokeniser_grid['h']  = IDENTIFIER_CODE;
 860     tokeniser_grid['i']  = IDENTIFIER_CODE;
 861     tokeniser_grid['j']  = IDENTIFIER_CODE;
 862     tokeniser_grid['k']  = IDENTIFIER_CODE;
 863     tokeniser_grid['l']  = IDENTIFIER_CODE;
 864     tokeniser_grid['m']  = IDENTIFIER_CODE;
 865     tokeniser_grid['n']  = IDENTIFIER_CODE;
 866     tokeniser_grid['o']  = IDENTIFIER_CODE;
 867     tokeniser_grid['p']  = IDENTIFIER_CODE;
 868     tokeniser_grid['q']  = IDENTIFIER_CODE;
 869     tokeniser_grid['r']  = IDENTIFIER_CODE;
 870     tokeniser_grid['s']  = IDENTIFIER_CODE;
 871     tokeniser_grid['t']  = IDENTIFIER_CODE;
 872     tokeniser_grid['u']  = IDENTIFIER_CODE;
 873     tokeniser_grid['v']  = IDENTIFIER_CODE;
 874     tokeniser_grid['w']  = IDENTIFIER_CODE;
 875     tokeniser_grid['x']  = IDENTIFIER_CODE;
 876     tokeniser_grid['y']  = IDENTIFIER_CODE;
 877     tokeniser_grid['z']  = IDENTIFIER_CODE;
 878
 879     tokeniser_grid['A']  = IDENTIFIER_CODE;
 880     tokeniser_grid['B']  = IDENTIFIER_CODE;
 881     tokeniser_grid['C']  = IDENTIFIER_CODE;
 882     tokeniser_grid['D']  = IDENTIFIER_CODE;
 883     tokeniser_grid['E']  = IDENTIFIER_CODE;
 884     tokeniser_grid['F']  = IDENTIFIER_CODE;
 885     tokeniser_grid['G']  = IDENTIFIER_CODE;
 886     tokeniser_grid['H']  = IDENTIFIER_CODE;
 887     tokeniser_grid['I']  = IDENTIFIER_CODE;
 888     tokeniser_grid['J']  = IDENTIFIER_CODE;
 889     tokeniser_grid['K']  = IDENTIFIER_CODE;
 890     tokeniser_grid['L']  = IDENTIFIER_CODE;
 891     tokeniser_grid['M']  = IDENTIFIER_CODE;
 892     tokeniser_grid['N']  = IDENTIFIER_CODE;
 893     tokeniser_grid['O']  = IDENTIFIER_CODE;
 894     tokeniser_grid['P']  = IDENTIFIER_CODE;
 895     tokeniser_grid['Q']  = IDENTIFIER_CODE;
 896     tokeniser_grid['R']  = IDENTIFIER_CODE;
 897     tokeniser_grid['S']  = IDENTIFIER_CODE;
 898     tokeniser_grid['T']  = IDENTIFIER_CODE;
 899     tokeniser_grid['U']  = IDENTIFIER_CODE;
 900     tokeniser_grid['V']  = IDENTIFIER_CODE;
 901     tokeniser_grid['W']  = IDENTIFIER_CODE;
 902     tokeniser_grid['X']  = IDENTIFIER_CODE;
 903     tokeniser_grid['Y']  = IDENTIFIER_CODE;
 904     tokeniser_grid['Z']  = IDENTIFIER_CODE;
 905
 906     tokeniser_grid['_']  = IDENTIFIER_CODE;
 907 }
 908
 909 /* ------------------------------------------------------------------------- */
 910 /*   Definition of a lexical block: a source file or a string containing     */
 911 /*   text for lexical analysis; an independent source from the point of      */
 912 /*   view of issuing error reports.                                          */
 913 /* ------------------------------------------------------------------------- */
 914
 915 typedef struct LexicalBlock_s
 916 {   char *filename;                              /*  Full translated name    */
 917     int   main_flag;                             /*  TRUE if the main file
 918                                                      (the first one opened)  */
 919     int   sys_flag;                              /*  TRUE if a System_File   */
 920     int   source_line;                           /*  Line number count       */
 921     int   line_start;                            /*  Char number within file
 922                                                      where the current line
 923                                                      starts                  */
 924     int   chars_read;                            /*  Char number of read pos */
 925     int   file_no;                               /*  Or 255 if not from a
 926                                                      file; used for debug
 927                                                      information             */
 928     char *orig_source;                           /* From #origsource direct  */
 929     int orig_file;
 930     int32 orig_line;
 931     int32 orig_char;
 932 } LexicalBlock;
 933
 934 static LexicalBlock NoFileOpen =
 935     {   "<before compilation>",  FALSE, FALSE, 0, 0, 0, 255, NULL, 0, 0, 0 };
 936
 937 static LexicalBlock MakingOutput =
 938     {   "<constructing output>", FALSE, FALSE, 0, 0, 0, 255, NULL, 0, 0, 0 };
 939
 940 static LexicalBlock StringLB =
 941     {   "<veneer routine>",      FALSE, TRUE,  0, 0, 0, 255, NULL, 0, 0, 0 };
 942
 943 static LexicalBlock *CurrentLB;                  /*  The current lexical
 944                                                      block of input text     */
 945
 946 extern void declare_systemfile(void)
 947 {   CurrentLB->sys_flag = TRUE;
 948 }
 949
 950 extern int is_systemfile(void)
 951 {   return ((CurrentLB->sys_flag)?1:0);
 952 }
 953
 954 extern void set_origsource_location(char *source, int32 line, int32 charnum)
 955 {
 956     int file_no;
 957
 958     if (!source) {
 959         /* Clear the Origsource declaration. */
 960         CurrentLB->orig_file = 0;
 961         CurrentLB->orig_source = NULL;
 962         CurrentLB->orig_line = 0;
 963         CurrentLB->orig_char = 0;
 964         return;
 965     }
 966
 967     /* Get the file number for a new or existing InputFiles entry. */
 968     file_no = register_orig_sourcefile(source);
 969
 970     CurrentLB->orig_file = file_no;
 971     CurrentLB->orig_source = InputFiles[file_no-1].filename;
 972     CurrentLB->orig_line = line;
 973     CurrentLB->orig_char = charnum;
 974 }
 975
 976 /* Error locations. */
 977
 978 extern debug_location get_current_debug_location(void)
 979 {   debug_location result;
 980     /* Assume that all input characters are one byte. */
 981     result.file_index = CurrentLB->file_no;
 982     result.beginning_byte_index = CurrentLB->chars_read - LOOKAHEAD_SIZE;
 983     result.end_byte_index = result.beginning_byte_index;
 984     result.beginning_line_number = CurrentLB->source_line;
 985     result.end_line_number = result.beginning_line_number;
 986     result.beginning_character_number =
 987         CurrentLB->chars_read - CurrentLB->line_start;
 988     result.end_character_number = result.beginning_character_number;
 989     result.orig_file_index = CurrentLB->orig_file;
 990     result.orig_beg_line_number = CurrentLB->orig_line;
 991     result.orig_beg_char_number = CurrentLB->orig_char;
 992     return result;
 993 }
 994
 995 static debug_location ErrorReport_debug_location;
 996
 997 extern void report_errors_at_current_line(void)
 998 {   ErrorReport.line_number = CurrentLB->source_line;
 999     ErrorReport.file_number = CurrentLB->file_no;
1000     if (ErrorReport.file_number == 255)
1001         ErrorReport.file_number = -1;
1002     ErrorReport.source      = CurrentLB->filename;
1003     ErrorReport.main_flag   = CurrentLB->main_flag;
1004     if (debugfile_switch)
1005         ErrorReport_debug_location = get_current_debug_location();
1006     ErrorReport.orig_file = CurrentLB->orig_file;
1007     ErrorReport.orig_source = CurrentLB->orig_source;
1008     ErrorReport.orig_line = CurrentLB->orig_line;
1009     ErrorReport.orig_char = CurrentLB->orig_char;
1010 }
1011
1012 extern debug_location get_error_report_debug_location(void)
1013 {   return ErrorReport_debug_location;
1014 }
1015
1016 extern int32 get_current_line_start(void)
1017 {   return CurrentLB->line_start;
1018 }
1019
1020 brief_location blank_brief_location;
1021
1022 extern brief_location get_brief_location(ErrorPosition *errpos)
1023 {
1024     brief_location loc;
1025     loc.file_index = errpos->file_number;
1026     loc.line_number = errpos->line_number;
1027     loc.orig_file_index = errpos->orig_file;
1028     loc.orig_line_number = errpos->orig_line;
1029     return loc;
1030 }
1031
1032 extern void export_brief_location(brief_location loc, ErrorPosition *errpos)
1033 {
1034     if (loc.file_index != -1)
1035     {   errpos->file_number = loc.file_index;
1036         errpos->line_number = loc.line_number;
1037         errpos->main_flag = (errpos->file_number == 1);
1038         errpos->orig_source = NULL;
1039         errpos->orig_file = loc.orig_file_index;
1040         errpos->orig_line = loc.orig_line_number;
1041         errpos->orig_char = 0;
1042     }
1043 }
1044
1045 /* ------------------------------------------------------------------------- */
1046 /*   Hash printing and line counting                                         */
1047 /* ------------------------------------------------------------------------- */
1048
1049 static void print_hash(void)
1050 {
1051     /*  Hash-printing is the practice of printing a # character every 100
1052         lines of source code (the -x switch), reassuring the user that
1053         progress is being made                                               */
1054
1055     if (no_hash_printed_yet)
1056     {   printf("::"); no_hash_printed_yet = FALSE;
1057     }
1058     printf("#"); hash_printed_since_newline = TRUE;
1059
1060 #ifndef MAC_FACE
1061     /*  On some systems, text output is buffered to a line at a time, and
1062         this would frustrate the point of hash-printing, so:                 */
1063
1064     fflush(stdout);
1065 #endif
1066 }
1067
1068 static void reached_new_line(void)
1069 {
1070     /*  Called to signal that a new line has been reached in the source code */
1071
1072     forerrors_pointer = 0;
1073
1074     CurrentLB->source_line++;
1075     CurrentLB->line_start = CurrentLB->chars_read;
1076
1077     total_source_line_count++;
1078
1079     if (total_source_line_count%100==0)
1080     {   if (hash_switch) print_hash();
1081 #ifdef MAC_MPW
1082         SpinCursor(32);                    /* I.e., allow other tasks to run */
1083 #endif
1084     }
1085
1086 #ifdef MAC_FACE
1087     if (total_source_line_count%((**g_pm_hndl).linespercheck) == 0)
1088     {   ProcessEvents (&g_proc);
1089         if (g_proc != true)
1090         {   free_arrays();
1091             close_all_source();
1092             if (temporary_files_switch)
1093                 remove_temp_files();
1094             if (store_the_text)
1095                 my_free(&all_text,"transcription text");
1096             abort_transcript_file();
1097             longjmp (g_fallback, 1);
1098         }
1099     }
1100 #endif
1101 }
1102
1103 static void new_syntax_line(void)
1104 {   if (source_to_analyse != NULL) forerrors_pointer = 0;
1105     report_errors_at_current_line();
1106 }
1107
1108 /* Return 10 raised to the expo power.
1109  *
1110  * I'm avoiding the standard pow() function for a rather lame reason:
1111  * it's in the libmath (-lm) library, and I don't want to change the
1112  * build model for the compiler. So, this is implemented with a stupid
1113  * lookup table. It's faster than pow() for small values of expo.
1114  * Probably not as fast if expo is 200, but "$+1e200" is an overflow
1115  * anyway, so I don't expect that to be a problem.
1116  *
1117  * (For some reason, frexp() and ldexp(), which are used later on, do
1118  * not require libmath to be linked in.)
1119  */
1120 static double pow10_cheap(int expo)
1121 {
1122     #define POW10_RANGE (8)
1123     static double powers[POW10_RANGE*2+1] = {
1124         0.00000001, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1,
1125         1.0,
1126         10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 10000000.0, 100000000.0
1127     };
1128
1129     double res = 1.0;
1130
1131     if (expo < 0) {
1132         for (; expo < -POW10_RANGE; expo += POW10_RANGE) {
1133             res *= powers[0];
1134         }
1135         return res * powers[POW10_RANGE+expo];
1136     }
1137     else {
1138         for (; expo > POW10_RANGE; expo -= POW10_RANGE) {
1139             res *= powers[POW10_RANGE*2];
1140         }
1141         return res * powers[POW10_RANGE+expo];
1142     }
1143 }
1144
1145 /* Return the IEEE-754 single-precision encoding of a floating-point
1146  * number. See http://www.psc.edu/general/software/packages/ieee/ieee.php
1147  * for an explanation.
1148  *
1149  * The number is provided in the pieces it was parsed in:
1150  *    [+|-] intv "." fracv "e" [+|-]expo
1151  *
1152  * If the magnitude is too large (beyond about 3.4e+38), this returns
1153  * an infinite value (0x7f800000 or 0xff800000). If the magnitude is too
1154  * small (below about 1e-45), this returns a zero value (0x00000000 or
1155  * 0x80000000). If any of the inputs are NaN, this returns NaN (but the
1156  * lexer should never do that).
1157  *
1158  * Note that using a float constant does *not* set the uses_float_features
1159  * flag (which would cause the game file to be labelled 3.1.2). There's
1160  * no VM feature here, just an integer. Of course, any use of the float
1161  * *opcodes* will set the flag.
1162  *
1163  * The math functions in this routine require #including <math.h>, but
1164  * they should not require linking the math library (-lm). At least,
1165  * they do not on OSX and Linux.
1166  */
1167 static int32 construct_float(int signbit, double intv, double fracv, int expo)
1168 {
1169     double absval = (intv + fracv) * pow10_cheap(expo);
1170     int32 sign = (signbit ? 0x80000000 : 0x0);
1171     double mant;
1172     int32 fbits;
1173
1174     if (isinf(absval)) {
1175         return sign | 0x7f800000; /* infinity */
1176     }
1177     if (isnan(absval)) {
1178         return sign | 0x7fc00000;
1179     }
1180
1181     mant = frexp(absval, &expo);
1182
1183     /* Normalize mantissa to be in the range [1.0, 2.0) */
1184     if (0.5 <= mant && mant < 1.0) {
1185         mant *= 2.0;
1186         expo--;
1187     }
1188     else if (mant == 0.0) {
1189         expo = 0;
1190     }
1191     else {
1192         return sign | 0x7f800000; /* infinity */
1193     }
1194
1195     if (expo >= 128) {
1196         return sign | 0x7f800000; /* infinity */
1197     }
1198     else if (expo < -126) {
1199         /* Denormalized (very small) number */
1200         mant = ldexp(mant, 126 + expo);
1201         expo = 0;
1202     }
1203     else if (!(expo == 0 && mant == 0.0)) {
1204         expo += 127;
1205         mant -= 1.0; /* Get rid of leading 1 */
1206     }
1207
1208     mant *= 8388608.0; /* 2^23 */
1209     fbits = (int32)(mant + 0.5); /* round mant to nearest int */
1210     if (fbits >> 23) {
1211         /* The carry propagated out of a string of 23 1 bits. */
1212         fbits = 0;
1213         expo++;
1214         if (expo >= 255) {
1215             return sign | 0x7f800000; /* infinity */
1216         }
1217     }
1218
1219     return (sign) | ((int32)(expo << 23)) | (fbits);
1220 }
1221
1222 /* ------------------------------------------------------------------------- */
1223 /*   Characters are read via a "pipeline" of variables, allowing us to look  */
1224 /*       up to three characters ahead of the current position.               */
1225 /*                                                                           */
1226 /*   There are two possible sources: from the source files being loaded in,  */
1227 /*   and from a string inside Inform (which is where the code for veneer     */
1228 /*   routines comes from).  Each source has its own get-next-character       */
1229 /*   routine.                                                                */
1230 /* ------------------------------------------------------------------------- */
1231 /*   Source 1: from files                                                    */
1232 /*                                                                           */
1233 /*   Note that file_load_chars(p, size) loads "size" bytes into buffer "p"   */
1234 /*   from the current input file.  If the file runs out, then if it was      */
1235 /*   the last source file 4 EOF characters are placed in the buffer: if it   */
1236 /*   was only an Include file ending, then a '\n' character is placed there  */
1237 /*   (essentially to force termination of any comment line) followed by      */
1238 /*   three harmless spaces.                                                  */
1239 /*                                                                           */
1240 /*   The routine returns the number of characters it has written, and note   */
1241 /*   that this conveniently ensures that all characters in the buffer come   */
1242 /*   from the same file.                                                     */
1243 /* ------------------------------------------------------------------------- */
1244
1245 #define SOURCE_BUFFER_SIZE 4096                  /*  Typical disc block size */
1246
1247 typedef struct Sourcefile_s
1248 {   char *buffer;                                /*  Input buffer            */
1249     int   read_pos;                              /*  Read position in buffer */
1250     int   size;                                  /*  Number of meaningful
1251                                                      characters in buffer    */
1252     int   la, la2, la3;                          /*  Three characters of
1253                                                      lookahead pipeline      */
1254     int   file_no;                               /*  Internal file number
1255                                                      (1, 2, 3, ...)          */
1256     LexicalBlock LB;
1257 } Sourcefile;
1258
1259 static Sourcefile *FileStack;
1260 static int File_sp;                              /*  Stack pointer           */
1261
1262 static Sourcefile *CF;                           /*  Top entry on stack      */
1263
1264 static int last_input_file;
1265
1266 static void begin_buffering_file(int i, int file_no)
1267 {   int j, cnt; uchar *p;
1268
1269     if (i >= MAX_INCLUSION_DEPTH)
1270        memoryerror("MAX_INCLUSION_DEPTH",MAX_INCLUSION_DEPTH);
1271
1272     p = (uchar *) FileStack[i].buffer;
1273
1274     if (i>0)
1275     {   FileStack[i-1].la  = lookahead;
1276         FileStack[i-1].la2 = lookahead2;
1277         FileStack[i-1].la3 = lookahead3;
1278     }
1279
1280     FileStack[i].file_no = file_no;
1281     FileStack[i].size = file_load_chars(file_no,
1282         (char *) p, SOURCE_BUFFER_SIZE);
1283     lookahead  = source_to_iso_grid[p[0]];
1284     lookahead2 = source_to_iso_grid[p[1]];
1285     lookahead3 = source_to_iso_grid[p[2]];
1286     if (LOOKAHEAD_SIZE != 3)
1287         compiler_error
1288             ("Lexer lookahead size does not match hard-coded lookahead code");
1289     FileStack[i].read_pos = LOOKAHEAD_SIZE;
1290
1291     if (file_no==1) FileStack[i].LB.main_flag = TRUE;
1292                else FileStack[i].LB.main_flag = FALSE;
1293     FileStack[i].LB.sys_flag = FALSE;
1294     FileStack[i].LB.source_line = 1;
1295     FileStack[i].LB.line_start = LOOKAHEAD_SIZE;
1296     FileStack[i].LB.chars_read = LOOKAHEAD_SIZE;
1297     FileStack[i].LB.filename = InputFiles[file_no-1].filename;
1298     FileStack[i].LB.file_no = file_no;
1299     FileStack[i].LB.orig_source = NULL; FileStack[i].LB.orig_file = 0;
1300     FileStack[i].LB.orig_line = 0; FileStack[i].LB.orig_char = 0;
1301
1302     CurrentLB = &(FileStack[i].LB);
1303     CF = &(FileStack[i]);
1304
1305     /* Check for recursive inclusion */
1306     cnt = 0;
1307     for (j=0; j<i; j++)
1308     {   if (!strcmp(FileStack[i].LB.filename, FileStack[j].LB.filename))
1309             cnt++;
1310     }
1311     if (cnt==1)
1312         warning_named("File included more than once",
1313             FileStack[j].LB.filename);
1314 }
1315
1316 static void create_char_pipeline(void)
1317 {
1318     File_sp = 0;
1319     begin_buffering_file(File_sp++, 1);
1320     pipeline_made = TRUE;
1321     last_input_file = current_input_file;
1322 }
1323
1324 static int get_next_char_from_pipeline(void)
1325 {   uchar *p;
1326
1327     while (last_input_file < current_input_file)
1328     {
1329         /*  An "Include" file must have opened since the last character
1330             was read. Perhaps more than one. We run forward through the
1331             list and add them to the include stack. But we ignore
1332             "Origsource" files (which were never actually opened for
1333             reading). */
1334
1335         last_input_file++;
1336         if (!InputFiles[last_input_file-1].is_input)
1337             continue;
1338
1339         begin_buffering_file(File_sp++, last_input_file);
1340     }
1341     if (last_input_file != current_input_file)
1342         compiler_error("last_input_file did not match after Include");
1343
1344     if (File_sp == 0)
1345     {   lookahead  = 0; lookahead2 = 0; lookahead3 = 0; return 0;
1346     }
1347
1348     if (CF->read_pos == CF->size)
1349     {   CF->size =
1350             file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
1351         CF->read_pos = 0;
1352     }
1353     else
1354     if (CF->read_pos == -(CF->size))
1355     {   set_token_location(get_current_debug_location());
1356         File_sp--;
1357         if (File_sp == 0)
1358         {   lookahead  = 0; lookahead2 = 0; lookahead3 = 0; return 0;
1359         }
1360         CF = &(FileStack[File_sp-1]);
1361         CurrentLB = &(FileStack[File_sp-1].LB);
1362         lookahead  = CF->la; lookahead2 = CF->la2; lookahead3 = CF->la3;
1363         if (CF->read_pos == CF->size)
1364         {   CF->size =
1365                 file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
1366             CF->read_pos = 0;
1367         }
1368         set_token_location(get_current_debug_location());
1369     }
1370
1371     p = (uchar *) (CF->buffer);
1372
1373     current = lookahead;
1374     lookahead = lookahead2;
1375     lookahead2 = lookahead3;
1376     lookahead3 = source_to_iso_grid[p[CF->read_pos++]];
1377
1378     CurrentLB->chars_read++;
1379     if (forerrors_pointer < 511)
1380         forerrors_buff[forerrors_pointer++] = current;
1381     if (current == '\n') reached_new_line();
1382     return(current);
1383 }
1384
1385 /* ------------------------------------------------------------------------- */
1386 /*   Source 2: from a string                                                 */
1387 /* ------------------------------------------------------------------------- */
1388
1389 static int source_to_analyse_pointer;            /*  Current read position   */
1390
1391 static int get_next_char_from_string(void)
1392 {   uchar *p = (uchar *) source_to_analyse + source_to_analyse_pointer++;
1393     current = source_to_iso_grid[p[0]];
1394
1395     if (current == 0)    lookahead  = 0;
1396                     else lookahead  = source_to_iso_grid[p[1]];
1397     if (lookahead == 0)  lookahead2 = 0;
1398                     else lookahead2 = source_to_iso_grid[p[2]];
1399     if (lookahead2 == 0) lookahead3 = 0;
1400                     else lookahead3 = source_to_iso_grid[p[3]];
1401
1402     CurrentLB->chars_read++;
1403     if (forerrors_pointer < 511)
1404         forerrors_buff[forerrors_pointer++] = current;
1405     if (current == '\n') reached_new_line();
1406     return(current);
1407 }
1408
1409 /* ========================================================================= */
1410 /*   The interface between the lexer and Inform's higher levels:             */
1411 /*                                                                           */
1412 /*       put_token_back()            (effectively) move the read position    */
1413 /*                                       back by one token                   */
1414 /*                                                                           */
1415 /*       get_next_token()            copy the token at the current read      */
1416 /*                                       position into the triple            */
1417 /*                                   (token_type, token_value, token_text)   */
1418 /*                                       and move the read position forward  */
1419 /*                                       by one                              */
1420 /*                                                                           */
1421 /*       restart_lexer(source, name) if source is NULL, initialise the lexer */
1422 /*                                       to read from source files;          */
1423 /*                                   otherwise, to read from this string.    */
1424 /* ------------------------------------------------------------------------- */
1425
1426 extern void put_token_back(void)
1427 {   tokens_put_back++;
1428
1429     if (tokens_trace_level > 0)
1430     {   if (tokens_trace_level == 1) printf("<- ");
1431         else printf("<-\n");
1432     }
1433
1434     /*  The following error, of course, should never happen!                 */
1435
1436     if (tokens_put_back == CIRCLE_SIZE)
1437     {   compiler_error("The lexical analyser has collapsed because of a wrong \
1438 assumption inside Inform");
1439         tokens_put_back--;
1440         return;
1441     }
1442 }
1443
1444 extern void get_next_token(void)
1445 {   int d, i, j, k, quoted_size, e, radix, context; int32 n; char *r;
1446     int returning_a_put_back_token = TRUE;
1447
1448     context = lexical_context();
1449
1450     if (tokens_put_back > 0)
1451     {   i = circle_position - tokens_put_back + 1;
1452         if (i<0) i += CIRCLE_SIZE;
1453         tokens_put_back--;
1454         if (context != token_contexts[i])
1455         {   j = circle[i].type;
1456             if ((j==0) || ((j>=100) && (j<200)))
1457                 interpret_identifier(i, FALSE);
1458         }
1459         goto ReturnBack;
1460     }
1461     returning_a_put_back_token = FALSE;
1462
1463     if (circle_position == CIRCLE_SIZE-1) circle_position = 0;
1464     else circle_position++;
1465
1466     if (lex_p > lexeme_memory + 4*MAX_QTEXT_SIZE)
1467         lex_p = lexeme_memory;
1468
1469     circle[circle_position].text = lex_p;
1470     circle[circle_position].value = 0;
1471     *lex_p = 0;
1472
1473     StartTokenAgain:
1474     d = (*get_next_char)();
1475     e = tokeniser_grid[d];
1476
1477     if (next_token_begins_syntax_line)
1478     {   if ((e != WHITESPACE_CODE) && (e != COMMENT_CODE))
1479         {   new_syntax_line();
1480             next_token_begins_syntax_line = FALSE;
1481         }
1482     }
1483
1484     circle[circle_position].location = get_current_debug_location();
1485
1486     switch(e)
1487     {   case 0: char_error("Illegal character found in source:", d);
1488             goto StartTokenAgain;
1489
1490         case WHITESPACE_CODE:
1491             while (tokeniser_grid[lookahead] == WHITESPACE_CODE)
1492                 (*get_next_char)();
1493             goto StartTokenAgain;
1494
1495         case COMMENT_CODE:
1496             while ((lookahead != '\n') && (lookahead != 0))
1497                 (*get_next_char)();
1498             goto StartTokenAgain;
1499
1500         case EOF_CODE:
1501             circle[circle_position].type = EOF_TT;
1502             strcpy(lex_p, "<end of file>");
1503             lex_p += strlen(lex_p) + 1;
1504             break;
1505
1506         case DIGIT_CODE:
1507             radix = 10;
1508             ReturnNumber:
1509             n=0;
1510             do
1511             {   n = n*radix + character_digit_value[d];
1512                 *lex_p++ = d;
1513             } while ((character_digit_value[lookahead] < radix)
1514                      && (d = (*get_next_char)(), TRUE));
1515
1516             *lex_p++ = 0;
1517             circle[circle_position].type = NUMBER_TT;
1518             circle[circle_position].value = n;
1519             break;
1520
1521             FloatNumber:
1522             {   int expo=0; double intv=0, fracv=0;
1523                 int expocount=0, intcount=0, fraccount=0;
1524                 int signbit = (d == '-');
1525                 *lex_p++ = d;
1526                 while (character_digit_value[lookahead] < 10) {
1527                     intv = 10.0*intv + character_digit_value[lookahead];
1528                     intcount++;
1529                     *lex_p++ = lookahead;
1530                     (*get_next_char)();
1531                 }
1532                 if (lookahead == '.') {
1533                     double fracpow = 1.0;
1534                     *lex_p++ = lookahead;
1535                     (*get_next_char)();
1536                     while (character_digit_value[lookahead] < 10) {
1537                         fracpow *= 0.1;
1538                         fracv = fracv + fracpow*character_digit_value[lookahead];
1539                         fraccount++;
1540                         *lex_p++ = lookahead;
1541                         (*get_next_char)();
1542                     }
1543                 }
1544                 if (lookahead == 'e' || lookahead == 'E') {
1545                     int exposign = 0;
1546                     *lex_p++ = lookahead;
1547                     (*get_next_char)();
1548                     if (lookahead == '+' || lookahead == '-') {
1549                         exposign = (lookahead == '-');
1550                         *lex_p++ = lookahead;
1551                         (*get_next_char)();
1552                     }
1553                     while (character_digit_value[lookahead] < 10) {
1554                         expo = 10*expo + character_digit_value[lookahead];
1555                         expocount++;
1556                         *lex_p++ = lookahead;
1557                         (*get_next_char)();
1558                     }
1559                     if (expocount == 0)
1560                         error("Floating-point literal must have digits after the 'e'");
1561                     if (exposign) { expo = -expo; }
1562                 }
1563                 if (intcount + fraccount == 0)
1564                     error("Floating-point literal must have digits");
1565                 n = construct_float(signbit, intv, fracv, expo);
1566             }
1567             *lex_p++ = 0;
1568             circle[circle_position].type = NUMBER_TT;
1569             circle[circle_position].value = n;
1570             if (!glulx_mode && dont_enter_into_symbol_table != -2) error("Floating-point literals are not available in Z-code");
1571             break;
1572
1573         case RADIX_CODE:
1574             radix = 16; d = (*get_next_char)();
1575             if (d == '-' || d == '+') { goto FloatNumber; }
1576             if (d == '$') { d = (*get_next_char)(); radix = 2; }
1577             if (character_digit_value[d] >= radix)
1578             {   if (radix == 2)
1579                     error("Binary number expected after '$$'");
1580                 else
1581                     error("Hexadecimal number expected after '$'");
1582             }
1583             goto ReturnNumber;
1584
1585         case QUOTE_CODE:     /* Single-quotes: scan a literal string */
1586             quoted_size=0;
1587             do
1588             {   e = d; d = (*get_next_char)(); *lex_p++ = d;
1589                 if (quoted_size++==64)
1590                 {   error(
1591                     "Too much text for one pair of quotations '...' to hold");
1592                     *lex_p='\''; break;
1593                 }
1594                 if ((d == '\'') && (e != '@'))
1595                 {   if (quoted_size == 1)
1596                     {   d = (*get_next_char)(); *lex_p++ = d;
1597                         if (d != '\'')
1598                             error("No text between quotation marks ''");
1599                     }
1600                     break;
1601                 }
1602             } while (d != EOF);
1603             if (d==EOF) ebf_error("'\''", "end of file");
1604             *(lex_p-1) = 0;
1605             circle[circle_position].type = SQ_TT;
1606             break;
1607
1608         case DQUOTE_CODE:    /* Double-quotes: scan a literal string */
1609             quoted_size=0;
1610             do
1611             {   d = (*get_next_char)(); *lex_p++ = d;
1612                 if (quoted_size++==MAX_QTEXT_SIZE)
1613                 {   memoryerror("MAX_QTEXT_SIZE", MAX_QTEXT_SIZE);
1614                     break;
1615                 }
1616                 if (d == '\n')
1617                 {   lex_p--;
1618                     while (*(lex_p-1) == ' ') lex_p--;
1619                     if (*(lex_p-1) != '^') *lex_p++ = ' ';
1620                     while ((lookahead != EOF) &&
1621                           (tokeniser_grid[lookahead] == WHITESPACE_CODE))
1622                     (*get_next_char)();
1623                 }
1624                 else if (d == '\\')
1625                 {   int newline_passed = FALSE;
1626                     lex_p--;
1627                     while ((lookahead != EOF) &&
1628                           (tokeniser_grid[lookahead] == WHITESPACE_CODE))
1629                         if ((d = (*get_next_char)()) == '\n')
1630                             newline_passed = TRUE;
1631                     if (!newline_passed)
1632                     {   char chb[4];
1633                         chb[0] = '\"'; chb[1] = lookahead;
1634                         chb[2] = '\"'; chb[3] = 0;
1635                         ebf_error("empty rest of line after '\\' in string",
1636                             chb);
1637                     }
1638                 }
1639             }   while ((d != EOF) && (d!='\"'));
1640             if (d==EOF) ebf_error("'\"'", "end of file");
1641             *(lex_p-1) = 0;
1642             circle[circle_position].type = DQ_TT;
1643             break;
1644
1645         case IDENTIFIER_CODE:    /* Letter or underscore: an identifier */
1646
1647             *lex_p++ = d; n=1;
1648             while ((n<=MAX_IDENTIFIER_LENGTH)
1649                    && ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1650                    || (tokeniser_grid[lookahead] == DIGIT_CODE)))
1651                 n++, *lex_p++ = (*get_next_char)();
1652
1653             *lex_p++ = 0;
1654
1655             if (n > MAX_IDENTIFIER_LENGTH)
1656             {   char bad_length[100];
1657                 sprintf(bad_length,
1658                     "Name exceeds the maximum length of %d characters:",
1659                          MAX_IDENTIFIER_LENGTH);
1660                 error_named(bad_length, circle[circle_position].text);
1661                 /* Trim token so that it doesn't violate
1662                    MAX_IDENTIFIER_LENGTH during error recovery */
1663                 circle[circle_position].text[MAX_IDENTIFIER_LENGTH] = 0;
1664             }
1665
1666             if (dont_enter_into_symbol_table)
1667             {   circle[circle_position].type = DQ_TT;
1668                 circle[circle_position].value = 0;
1669                 if (dont_enter_into_symbol_table == -2)
1670                     interpret_identifier(circle_position, TRUE);
1671                 break;
1672             }
1673
1674             interpret_identifier(circle_position, FALSE);
1675             break;
1676
1677         default:
1678
1679             /*  The character is initial to at least one of the separators  */
1680
1681             for (j=e>>4, k=j+(e&0x0f); j<k; j++)
1682             {   r = (char *) separators[j];
1683                 if (r[1]==0)
1684                 {   *lex_p++=d; *lex_p++=0;
1685                     goto SeparatorMatched;
1686                 }
1687                 else
1688                 if (r[2]==0)
1689                 {   if (*(r+1) == lookahead)
1690                     {   *lex_p++=d;
1691                         *lex_p++=(*get_next_char)();
1692                         *lex_p++=0;
1693                         goto SeparatorMatched;
1694                     }
1695                 }
1696                 else
1697                 {   if ((*(r+1) == lookahead) && (*(r+2) == lookahead2))
1698                     {   *lex_p++=d;
1699                         *lex_p++=(*get_next_char)();
1700                         *lex_p++=(*get_next_char)();
1701                         *lex_p++=0;
1702                         goto SeparatorMatched;
1703                     }
1704                 }
1705             }
1706
1707             /*  The following contingency never in fact arises with the
1708                 current set of separators, but might in future  */
1709
1710             *lex_p++ = d; *lex_p++ = lookahead; *lex_p++ = lookahead2;
1711             *lex_p++ = 0;
1712             error_named("Unrecognised combination in source:", lex_p);
1713             goto StartTokenAgain;
1714
1715             SeparatorMatched:
1716
1717             circle[circle_position].type = SEP_TT;
1718             circle[circle_position].value = j;
1719             switch(j)
1720             {   case SEMICOLON_SEP: break;
1721                 case HASHNDOLLAR_SEP:
1722                 case HASHWDOLLAR_SEP:
1723                     if (tokeniser_grid[lookahead] == WHITESPACE_CODE)
1724                     {   error_named("Character expected after",
1725                             circle[circle_position].text);
1726                         break;
1727                     }
1728                     lex_p--;
1729                     *lex_p++ = (*get_next_char)();
1730                     while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1731                            || (tokeniser_grid[lookahead] == DIGIT_CODE))
1732                         *lex_p++ = (*get_next_char)();
1733                     *lex_p++ = 0;
1734                     break;
1735                 case HASHADOLLAR_SEP:
1736                 case HASHGDOLLAR_SEP:
1737                 case HASHRDOLLAR_SEP:
1738                 case HASHHASH_SEP:
1739                     if (tokeniser_grid[lookahead] != IDENTIFIER_CODE)
1740                     {   error_named("Alphabetic character expected after",
1741                             circle[circle_position].text);
1742                         break;
1743                     }
1744                     lex_p--;
1745                     while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1746                            || (tokeniser_grid[lookahead] == DIGIT_CODE))
1747                         *lex_p++ = (*get_next_char)();
1748                     *lex_p++ = 0;
1749                     break;
1750             }
1751             break;
1752     }
1753
1754     i = circle_position;
1755
1756     ReturnBack:
1757     token_value = circle[i].value;
1758     token_type = circle[i].type;
1759     token_text = circle[i].text;
1760     if (!returning_a_put_back_token)
1761     {   set_token_location(circle[i].location);
1762     }
1763     token_contexts[i] = context;
1764
1765     if (tokens_trace_level > 0)
1766     {   if (tokens_trace_level == 1)
1767             printf("'%s' ", circle[i].text);
1768         else
1769         {   printf("-> "); describe_token(circle[i]);
1770             printf(" ");
1771             if (tokens_trace_level > 2) print_context(token_contexts[i]);
1772             printf("\n");
1773         }
1774     }
1775 }
1776
1777 static char veneer_error_title[64];
1778
1779 extern void restart_lexer(char *lexical_source, char *name)
1780 {   int i;
1781     circle_position = 0;
1782     for (i=0; i<CIRCLE_SIZE; i++)
1783     {   circle[i].type = 0;
1784         circle[i].value = 0;
1785         circle[i].text = "(if this is ever visible, there is a bug)";
1786         token_contexts[i] = 0;
1787     }
1788
1789     lex_p = lexeme_memory;
1790     tokens_put_back = 0;
1791     forerrors_pointer = 0;
1792     dont_enter_into_symbol_table = FALSE;
1793     return_sp_as_variable = FALSE;
1794     next_token_begins_syntax_line = TRUE;
1795
1796     source_to_analyse = lexical_source;
1797
1798     if (source_to_analyse == NULL)
1799     {   get_next_char = get_next_char_from_pipeline;
1800         if (!pipeline_made) create_char_pipeline();
1801         forerrors_buff[0] = 0; forerrors_pointer = 0;
1802     }
1803     else
1804     {   get_next_char = get_next_char_from_string;
1805         source_to_analyse_pointer = 0;
1806         CurrentLB = &StringLB;
1807         sprintf(veneer_error_title, "<veneer routine '%s'>", name);
1808         StringLB.filename = veneer_error_title;
1809
1810         CurrentLB->source_line = 1;
1811         CurrentLB->line_start  = 0;
1812         CurrentLB->chars_read  = 0;
1813     }
1814 }
1815
1816 /* ========================================================================= */
1817 /*   Data structure management routines                                      */
1818 /* ------------------------------------------------------------------------- */
1819
1820 extern void init_lexer_vars(void)
1821 {
1822     blank_brief_location.file_index = -1;
1823     blank_brief_location.line_number = 0;
1824     blank_brief_location.orig_file_index = 0;
1825     blank_brief_location.orig_line_number = 0;
1826 }
1827
1828 extern void lexer_begin_prepass(void)
1829 {   total_source_line_count = 0;
1830     CurrentLB = &NoFileOpen;
1831     report_errors_at_current_line();
1832 }
1833
1834 extern void lexer_begin_pass(void)
1835 {   no_hash_printed_yet = TRUE;
1836     hash_printed_since_newline = FALSE;
1837
1838     pipeline_made = FALSE;
1839
1840     restart_lexer(NULL, NULL);
1841 }
1842
1843 extern void lexer_endpass(void)
1844 {   CurrentLB = &MakingOutput;
1845     report_errors_at_current_line();
1846 }
1847
1848 extern void lexer_allocate_arrays(void)
1849 {   int i;
1850
1851     FileStack = my_malloc(MAX_INCLUSION_DEPTH*sizeof(Sourcefile),
1852         "filestack buffer");
1853
1854     for (i=0; i<MAX_INCLUSION_DEPTH; i++)
1855     FileStack[i].buffer = my_malloc(SOURCE_BUFFER_SIZE+4, "source file buffer");
1856
1857     lexeme_memory = my_malloc(5*MAX_QTEXT_SIZE, "lexeme memory");
1858
1859     keywords_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1860         "keyword hash table");
1861     keywords_hash_ends_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1862         "keyword hash end table");
1863     keywords_data_table = my_calloc(sizeof(int), 3*MAX_KEYWORDS,
1864         "keyword hashing linked list");
1865     local_variable_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1866         "local variable hash table");
1867     local_variable_text_table = my_malloc(
1868         (MAX_LOCAL_VARIABLES-1)*(MAX_IDENTIFIER_LENGTH+1),
1869         "text of local variable names");
1870
1871     local_variable_hash_codes = my_calloc(sizeof(int), MAX_LOCAL_VARIABLES,
1872         "local variable hash codes");
1873     local_variable_texts = my_calloc(sizeof(char *), MAX_LOCAL_VARIABLES,
1874         "local variable text pointers");
1875
1876     make_tokeniser_grid();
1877     make_keywords_tables();
1878
1879     first_token_locations =
1880         my_malloc(sizeof(debug_locations), "debug locations of recent tokens");
1881     first_token_locations->location.file_index = 0;
1882     first_token_locations->location.beginning_byte_index = 0;
1883     first_token_locations->location.end_byte_index = 0;
1884     first_token_locations->location.beginning_line_number = 0;
1885     first_token_locations->location.end_line_number = 0;
1886     first_token_locations->location.beginning_character_number = 0;
1887     first_token_locations->location.end_character_number = 0;
1888     first_token_locations->location.orig_file_index = 0;
1889     first_token_locations->location.orig_beg_line_number = 0;
1890     first_token_locations->location.orig_beg_char_number = 0;
1891     first_token_locations->next = NULL;
1892     first_token_locations->reference_count = 0;
1893     last_token_location = first_token_locations;
1894 }
1895
1896 extern void lexer_free_arrays(void)
1897 {   int i; char *p;
1898
1899     for (i=0; i<MAX_INCLUSION_DEPTH; i++)
1900     {   p = FileStack[i].buffer;
1901         my_free(&p, "source file buffer");
1902     }
1903     my_free(&FileStack, "filestack buffer");
1904     my_free(&lexeme_memory, "lexeme memory");
1905
1906     my_free(&keywords_hash_table, "keyword hash table");
1907     my_free(&keywords_hash_ends_table, "keyword hash end table");
1908     my_free(&keywords_data_table, "keyword hashing linked list");
1909     my_free(&local_variable_hash_table, "local variable hash table");
1910     my_free(&local_variable_text_table, "text of local variable names");
1911
1912     my_free(&local_variable_hash_codes, "local variable hash codes");
1913     my_free(&local_variable_texts, "local variable text pointers");
1914
1915     cleanup_token_locations(NULL);
1916 }
1917
1918 /* ========================================================================= */