fa45ab49a4252864fc726c0cb9fe8ce077276bee
[inform.git] / lexer.c
1 /* ------------------------------------------------------------------------- */
2 /*   "lexer" : Lexical analyser                                              */
3 /*                                                                           */
4 /* Copyright (c) Graham Nelson 1993 - 2016                                   */
5 /*                                                                           */
6 /* This file is part of Inform.                                              */
7 /*                                                                           */
8 /* Inform is free software: you can redistribute it and/or modify            */
9 /* it under the terms of the GNU General Public License as published by      */
10 /* the Free Software Foundation, either version 3 of the License, or         */
11 /* (at your option) any later version.                                       */
12 /*                                                                           */
13 /* Inform is distributed in the hope that it will be useful,                 */
14 /* but WITHOUT ANY WARRANTY; without even the implied warranty of            */
15 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the              */
16 /* GNU General Public License for more details.                              */
17 /*                                                                           */
18 /* You should have received a copy of the GNU General Public License         */
19 /* along with Inform. If not, see https://gnu.org/licenses/                  */
20 /*                                                                           */
21 /* ------------------------------------------------------------------------- */
22
23 #include "header.h"
24
25 int total_source_line_count,            /* Number of source lines so far     */
26
27     no_hash_printed_yet,                /* Have not yet printed the first #  */
28     hash_printed_since_newline,         /* A hash has been printed since the
29                                            most recent new-line was printed
30                                            (generally as a result of an error
31                                            message or the start of pass)     */
32     dont_enter_into_symbol_table,       /* Return names as text (with
33                                            token type DQ_TT, i.e., as if
34                                            they had double-quotes around)
35                                            and not as entries in the symbol
36                                            table, when TRUE. If -2, only the
37                                            keyword table is searched.        */
38     return_sp_as_variable;              /* When TRUE, the word "sp" denotes
39                                            the stack pointer variable
40                                            (used in assembly language only)  */
41 int next_token_begins_syntax_line;      /* When TRUE, start a new syntax
42                                            line (for error reporting, etc.)
43                                            on the source code line where
44                                            the next token appears            */
45
46 int32 last_mapped_line;  /* Last syntax line reported to debugging file      */
47
48 /* ------------------------------------------------------------------------- */
49 /*   The lexer's output is a sequence of triples, each called a "token",     */
50 /*   representing one lexical unit (or "lexeme") each.  Instead of providing */
51 /*   "lookahead" (that is, always having available the next token after the  */
52 /*   current one, so that syntax analysers higher up in Inform can have      */
53 /*   advance knowledge of what is coming), the lexer instead has a system    */
54 /*   where tokens can be read in and then "put back again".                  */
55 /*   The meaning of the number (and to some extent the text) supplied with   */
56 /*   a token depends on its type: see "header.h" for the list of types.      */
57 /*   For example, the lexeme "$1e3" is understood by Inform as a hexadecimal */
58 /*   number, and translated to the token:                                    */
59 /*     type NUMBER_TT, value 483, text "$1e3"                                */
60 /* ------------------------------------------------------------------------- */
61 /*   These three variables are set to the current token on a call to         */
62 /*   get_next_token() (but are not changed by a call to put_token_back()).   */
63 /* ------------------------------------------------------------------------- */
64
65 int token_type;
66 int32 token_value;
67 char *token_text;
68
69 /* ------------------------------------------------------------------------- */
70 /*   The next two variables are the head and tail of a singly linked list.   */
71 /*   The tail stores the portion most recently read from the current         */
72 /*   lexical block; its end values therefore describe the location of the    */
73 /*   current token, and are updated whenever the three variables above are   */
74 /*   via set_token_location(...).  Earlier vertices, if any, represent the   */
75 /*   regions of lexical blocks read beforehand, where new vertices are       */
76 /*   only introduced by interruptions like a file inclusion or an EOF.       */
77 /*   Vertices are deleted off of the front of the list once they are no      */
78 /*   longer referenced by pending debug information records.                 */
79 /* ------------------------------------------------------------------------- */
80
81 static debug_locations *first_token_locations;
82 static debug_locations *last_token_location;
83
84 extern debug_location get_token_location(void)
85 {   debug_location result;
86     debug_location *location = &(last_token_location->location);
87     result.file_index = location->file_index;
88     result.beginning_byte_index = location->end_byte_index;
89     result.end_byte_index = location->end_byte_index;
90     result.beginning_line_number = location->end_line_number;
91     result.end_line_number = location->end_line_number;
92     result.beginning_character_number = location->end_character_number;
93     result.end_character_number = location->end_character_number;
94     return result;
95 }
96
97 extern debug_locations get_token_locations(void)
98 {   debug_locations result;
99     result.location = get_token_location();
100     result.next = NULL;
101     result.reference_count = 0;
102     return result;
103 }
104
105 static void set_token_location(debug_location location)
106 {   if (location.file_index == last_token_location->location.file_index)
107     {   last_token_location->location.end_byte_index =
108             location.end_byte_index;
109         last_token_location->location.end_line_number =
110             location.end_line_number;
111         last_token_location->location.end_character_number =
112             location.end_character_number;
113     } else
114     {   debug_locations*successor =
115             my_malloc
116                 (sizeof(debug_locations),
117                  "debug locations of recent tokens");
118         successor->location = location;
119         successor->next = NULL;
120         successor->reference_count = 0;
121         last_token_location->next = successor;
122         last_token_location = successor;
123     }
124 }
125
126 extern debug_location_beginning get_token_location_beginning(void)
127 {   debug_location_beginning result;
128     ++(last_token_location->reference_count);
129     result.head = last_token_location;
130     result.beginning_byte_index =
131         last_token_location->location.end_byte_index;
132     result.beginning_line_number =
133         last_token_location->location.end_line_number;
134     result.beginning_character_number =
135         last_token_location->location.end_character_number;
136     return result;
137 }
138
139 static void cleanup_token_locations(debug_location_beginning*beginning)
140 {   if (first_token_locations)
141     {   while (first_token_locations &&
142                !first_token_locations->reference_count)
143         {   debug_locations*moribund = first_token_locations;
144             first_token_locations = moribund->next;
145             my_free(&moribund, "debug locations of recent tokens");
146             if (beginning &&
147                 (beginning->head == moribund || !first_token_locations))
148             {   compiler_error
149                     ("Records needed by a debug_location_beginning are no "
150                      "longer allocated, perhaps because of an invalid reuse "
151                      "of this or an earlier beginning");
152             }
153         }
154     } else
155     {   if (beginning)
156         {   compiler_error
157                 ("Attempt to use a debug_location_beginning when no token "
158                  "locations are defined");
159         } else
160         {   compiler_error
161                 ("Attempt to clean up token locations when no token locations "
162                  "are defined");
163         }
164     }
165 }
166
167 extern void discard_token_location(debug_location_beginning beginning)
168 {   --(beginning.head->reference_count);
169 }
170
171 extern debug_locations get_token_location_end
172     (debug_location_beginning beginning)
173 {   debug_locations result;
174     cleanup_token_locations(&beginning);
175     --(beginning.head->reference_count);
176     /* Sometimes we know what we'll read before we switch to the lexical block
177        where we'll read it.  In such cases the beginning will be placed in the
178        prior block and last exactly zero bytes there.  It's misleading to
179        include such ranges, so we gobble them. */
180     if (beginning.head->location.end_byte_index ==
181           beginning.beginning_byte_index &&
182         beginning.head->next)
183     {   beginning.head = beginning.head->next;
184         result.location = beginning.head->location;
185         result.location.beginning_byte_index = 0;
186         result.location.beginning_line_number = 1;
187         result.location.beginning_character_number = 1;
188     } else
189     {   result.location = beginning.head->location;
190         result.location.beginning_byte_index =
191             beginning.beginning_byte_index;
192         result.location.beginning_line_number =
193             beginning.beginning_line_number;
194         result.location.beginning_character_number =
195             beginning.beginning_character_number;
196     }
197     result.next = beginning.head->next;
198     result.reference_count = 0;
199     return result;
200 }
201
202 /* ------------------------------------------------------------------------- */
203 /*   In order to be able to put tokens back efficiently, the lexer stores    */
204 /*   tokens in a "circle": the variable circle_position ranges between       */
205 /*   0 and CIRCLE_SIZE-1.  We only need a circle size as large as the        */
206 /*   maximum number of tokens ever put back at once, plus 1 (in effect, the  */
207 /*   maximum token lookahead ever needed in syntax analysis, plus 1).        */
208 /*                                                                           */
209 /*   Unlike some compilers, Inform does not have a context-free lexer: in    */
210 /*   fact it has 12288 different possible states.  However, the context only */
211 /*   affects the interpretation of "identifiers": lexemes beginning with a   */
212 /*   letter and containing up to 32 chars of alphanumeric and underscore     */
213 /*   chars.  (For example, "default" may refer to the directive or statement */
214 /*   of that name, and which token values are returned depends on the        */
215 /*   current lexical context.)                                               */
216 /*                                                                           */
217 /*   Along with each token, we also store the lexical context it was         */
218 /*   translated under; because if it is called for again, there may need     */
219 /*   to be a fresh interpretation of it if the context has changed.          */
220 /* ------------------------------------------------------------------------- */
221
222 #define CIRCLE_SIZE 6
223
224 /*   (The worst case for token lookahead is distinguishing between an
225      old-style "objectloop (a in b)" and a new "objectloop (a in b ...)".)   */
226
227 static int circle_position;
228 static token_data circle[CIRCLE_SIZE];
229
230 static int token_contexts[CIRCLE_SIZE];
231
232 /* ------------------------------------------------------------------------- */
233 /*   A complication, however, is that the text of some lexemes needs to be   */
234 /*   held in Inform's memory for much longer periods: for example, a         */
235 /*   dictionary word lexeme (like "'south'") must have its text preserved    */
236 /*   until the code generation time for the expression it occurs in, when    */
237 /*   the dictionary reference is actually made.  Code generation in general  */
238 /*   occurs as early as possible in Inform: pending some better method of    */
239 /*   garbage collection, we simply use a buffer so large that unless         */
240 /*   expressions spread across 10K of source code are found, there can be    */
241 /*   no problem.                                                             */
242 /* ------------------------------------------------------------------------- */
243
244 static char *lexeme_memory;
245 static char *lex_p;                     /* Current write position            */
246
247 /* ------------------------------------------------------------------------- */
248 /*   The lexer itself needs up to 3 characters of lookahead (it uses an      */
249 /*   LR(3) grammar to translate characters into tokens).                     */
250 /* ------------------------------------------------------------------------- */
251
252 #define LOOKAHEAD_SIZE 3
253
254 static int current, lookahead,          /* The latest character read, and    */
255     lookahead2, lookahead3;             /* the three characters following it */
256
257 static int pipeline_made;               /* Whether or not the pipeline of
258                                            characters has been constructed
259                                            yet (this pass)                   */
260
261 static int (* get_next_char)(void);     /* Routine for reading the stream of
262                                            characters: the lexer does not
263                                            need any "ungetc" routine for
264                                            putting them back again.  End of
265                                            stream is signalled by returning
266                                            zero.                             */
267
268 static char *source_to_analyse;         /* The current lexical source:
269                                            NULL for "load from source files",
270                                            otherwise this points to a string
271                                            containing Inform code            */
272
273 static int tokens_put_back;             /* Count of the number of backward
274                                            moves made from the last-read
275                                            token                             */
276
277 extern void describe_token(token_data t)
278 {
279     /*  Many of the token types are not set in this file, but later on in
280         Inform's higher stages (for example, in the expression evaluator);
281         but this routine describes them all.                                 */
282
283     printf("{ ");
284
285     switch(t.type)
286     {
287         /*  The following token types occur in lexer output:                 */
288
289         case SYMBOL_TT:          printf("symbol ");
290                                  describe_symbol(t.value);
291                                  break;
292         case NUMBER_TT:          printf("literal number %d", t.value);
293                                  break;
294         case DQ_TT:              printf("string \"%s\"", t.text);
295                                  break;
296         case SQ_TT:              printf("string '%s'", t.text);
297                                  break;
298         case SEP_TT:             printf("separator '%s'", t.text);
299                                  break;
300         case EOF_TT:             printf("end of file");
301                                  break;
302
303         case STATEMENT_TT:       printf("statement name '%s'", t.text);
304                                  break;
305         case SEGMENT_MARKER_TT:  printf("object segment marker '%s'", t.text);
306                                  break;
307         case DIRECTIVE_TT:       printf("directive name '%s'", t.text);
308                                  break;
309         case CND_TT:             printf("textual conditional '%s'", t.text);
310                                  break;
311         case OPCODE_NAME_TT:     printf("opcode name '%s'", t.text);
312                                  break;
313         case SYSFUN_TT:          printf("built-in function name '%s'", t.text);
314                                  break;
315         case LOCAL_VARIABLE_TT:  printf("local variable name '%s'", t.text);
316                                  break;
317         case MISC_KEYWORD_TT:    printf("statement keyword '%s'", t.text);
318                                  break;
319         case DIR_KEYWORD_TT:     printf("directive keyword '%s'", t.text);
320                                  break;
321         case TRACE_KEYWORD_TT:   printf("'trace' keyword '%s'", t.text);
322                                  break;
323         case SYSTEM_CONSTANT_TT: printf("system constant name '%s'", t.text);
324                                  break;
325
326         /*  The remaining are etoken types, not set by the lexer             */
327
328         case OP_TT:              printf("operator '%s'",
329                                      operators[t.value].description);
330                                  break;
331         case ENDEXP_TT:          printf("end of expression");
332                                  break;
333         case SUBOPEN_TT:         printf("open bracket");
334                                  break;
335         case SUBCLOSE_TT:        printf("close bracket");
336                                  break;
337         case LARGE_NUMBER_TT:    printf("large number: '%s'=%d",t.text,t.value);
338                                  break;
339         case SMALL_NUMBER_TT:    printf("small number: '%s'=%d",t.text,t.value);
340                                  break;
341         case VARIABLE_TT:        printf("variable '%s'=%d", t.text, t.value);
342                                  break;
343         case DICTWORD_TT:        printf("dictionary word '%s'", t.text);
344                                  break;
345         case ACTION_TT:          printf("action name '%s'", t.text);
346                                  break;
347
348         default:
349             printf("** unknown token type %d, text='%s', value=%d **",
350             t.type, t.text, t.value);
351     }
352     printf(" }");
353 }
354
355 /* ------------------------------------------------------------------------- */
356 /*   All but one of the 280 Inform keywords (118 of them opcode names used   */
357 /*   only by the assembler).  (The one left over is "sp", a keyword used in  */
358 /*   assembly language only.)                                                */
359 /*                                                                           */
360 /*   A "keyword group" is a set of keywords to be searched for.  If a match  */
361 /*   is made on an identifier, the token type becomes that given in the KG   */
362 /*   and the token value is its index in the KG.                             */
363 /*                                                                           */
364 /*   The keyword ordering must correspond with the appropriate #define's in  */
365 /*   "header.h" but is otherwise not significant.                            */
366 /* ------------------------------------------------------------------------- */
367
368 #define MAX_KEYWORDS 350
369
370 /* The values will be filled in at compile time, when we know
371    which opcode set to use. */
372 keyword_group opcode_names =
373 { { "" },
374     OPCODE_NAME_TT, FALSE, TRUE
375 };
376
377 static char *opcode_list_z[] = {
378     "je", "jl", "jg", "dec_chk", "inc_chk", "jin", "test", "or", "and",
379     "test_attr", "set_attr", "clear_attr", "store", "insert_obj", "loadw",
380     "loadb", "get_prop", "get_prop_addr", "get_next_prop", "add", "sub",
381     "mul", "div", "mod", "call", "storew", "storeb", "put_prop", "sread",
382     "print_char", "print_num", "random", "push", "pull", "split_window",
383     "set_window", "output_stream", "input_stream", "sound_effect", "jz",
384     "get_sibling", "get_child", "get_parent", "get_prop_len", "inc", "dec",
385     "print_addr", "remove_obj", "print_obj", "ret", "jump", "print_paddr",
386     "load", "not", "rtrue", "rfalse", "print", "print_ret", "nop", "save",
387     "restore", "restart", "ret_popped", "pop", "quit", "new_line",
388     "show_status", "verify", "call_2s", "call_vs", "aread", "call_vs2",
389     "erase_window", "erase_line", "set_cursor", "get_cursor",
390     "set_text_style", "buffer_mode", "read_char", "scan_table", "call_1s",
391     "call_2n", "set_colour", "throw", "call_vn", "call_vn2", "tokenise",
392     "encode_text", "copy_table", "print_table", "check_arg_count", "call_1n",
393     "catch", "piracy", "log_shift", "art_shift", "set_font", "save_undo",
394     "restore_undo", "draw_picture", "picture_data", "erase_picture",
395     "set_margins", "move_window", "window_size", "window_style",
396     "get_wind_prop", "scroll_window", "pop_stack", "read_mouse",
397     "mouse_window", "push_stack", "put_wind_prop", "print_form",
398     "make_menu", "picture_table", "print_unicode", "check_unicode",
399     ""
400 };
401
402 static char *opcode_list_g[] = {
403     "nop", "add", "sub", "mul", "div", "mod", "neg", "bitand", "bitor",
404     "bitxor", "bitnot", "shiftl", "sshiftr", "ushiftr", "jump", "jz",
405     "jnz", "jeq", "jne", "jlt", "jge", "jgt", "jle", 
406     "jltu", "jgeu", "jgtu", "jleu", 
407     "call", "return",
408     "catch", "throw", "tailcall", 
409     "copy", "copys", "copyb", "sexs", "sexb", "aload",
410     "aloads", "aloadb", "aloadbit", "astore", "astores", "astoreb",
411     "astorebit", "stkcount", "stkpeek", "stkswap", "stkroll", "stkcopy",
412     "streamchar", "streamnum", "streamstr", 
413     "gestalt", "debugtrap", "getmemsize", "setmemsize", "jumpabs",
414     "random", "setrandom", "quit", "verify",
415     "restart", "save", "restore", "saveundo", "restoreundo", "protect",
416     "glk", "getstringtbl", "setstringtbl", "getiosys", "setiosys",
417     "linearsearch", "binarysearch", "linkedsearch",
418     "callf", "callfi", "callfii", "callfiii", 
419     "streamunichar",
420     "mzero", "mcopy", "malloc", "mfree",
421     "accelfunc", "accelparam",
422     "numtof", "ftonumz", "ftonumn", "ceil", "floor",
423     "fadd", "fsub", "fmul", "fdiv", "fmod",
424     "sqrt", "exp", "log", "pow",
425     "sin", "cos", "tan", "asin", "acos", "atan", "atan2",
426     "jfeq", "jfne", "jflt", "jfle", "jfgt", "jfge", "jisnan", "jisinf",
427     ""
428 };
429
430 keyword_group opcode_macros =
431 { { "" },
432   OPCODE_MACRO_TT, FALSE, TRUE
433 };
434
435 static char *opmacro_list_z[] = { "" };
436
437 static char *opmacro_list_g[] = {
438     "pull", "push",
439     ""
440 };
441
442 keyword_group directives =
443 { { "abbreviate", "array", "attribute", "class", "constant",
444     "default", "dictionary", "end", "endif", "extend", "fake_action",
445     "global", "ifdef", "ifndef", "ifnot", "ifv3", "ifv5", "iftrue",
446     "iffalse", "import", "include", "link", "lowstring", "message",
447     "nearby", "object", "property", "release", "replace",
448     "serial", "switches", "statusline", "stub", "system_file", "trace",
449     "undef", "verb", "version", "zcharacter",
450     "" },
451     DIRECTIVE_TT, FALSE, FALSE
452 };
453
454 keyword_group trace_keywords =
455 { { "dictionary", "symbols", "objects", "verbs",
456     "assembly", "expressions", "lines", "tokens", "linker",
457     "on", "off", "" },
458     TRACE_KEYWORD_TT, FALSE, TRUE
459 };
460
461 keyword_group segment_markers =
462 { { "class", "has", "private", "with", "" },
463     SEGMENT_MARKER_TT, FALSE, TRUE
464 };
465
466 keyword_group directive_keywords =
467 { { "alias", "long", "additive",
468     "score", "time",
469     "noun", "held", "multi", "multiheld", "multiexcept",
470     "multiinside", "creature", "special", "number", "scope", "topic",
471     "reverse", "meta", "only", "replace", "first", "last",
472     "string", "table", "buffer", "data", "initial", "initstr",
473     "with", "private", "has", "class",
474     "error", "fatalerror", "warning",
475     "terminating",
476     "" },
477     DIR_KEYWORD_TT, FALSE, TRUE
478 };
479
480 keyword_group misc_keywords =
481 { { "char", "name", "the", "a", "an", "The", "number",
482     "roman", "reverse", "bold", "underline", "fixed", "on", "off",
483     "to", "address", "string", "object", "near", "from", "property", "A", "" },
484     MISC_KEYWORD_TT, FALSE, TRUE
485 };
486
487 keyword_group statements =
488 { { "box", "break", "continue", "default", "do", "else", "font", "for",
489     "give", "if", "inversion", "jump", "move", "new_line", "objectloop",
490     "print", "print_ret", "quit", "read", "remove", "restore", "return",
491     "rfalse", "rtrue", "save", "spaces", "string", "style", "switch",
492     "until", "while", "" },
493     STATEMENT_TT, FALSE, TRUE
494 };
495
496 keyword_group conditions =
497 { { "has", "hasnt", "in", "notin", "ofclass", "or", "provides", "" },
498     CND_TT, FALSE, TRUE
499 };
500
501 keyword_group system_functions =
502 { { "child", "children", "elder", "eldest", "indirect", "parent", "random",
503     "sibling", "younger", "youngest", "metaclass", "glk", "" },
504     SYSFUN_TT, FALSE, TRUE
505 };
506
507 keyword_group system_constants =
508 { { "adjectives_table", "actions_table", "classes_table",
509     "identifiers_table", "preactions_table", "version_number",
510     "largest_object", "strings_offset", "code_offset",
511     "dict_par1", "dict_par2", "dict_par3", "actual_largest_object",
512     "static_memory_offset", "array_names_offset", "readable_memory_offset",
513     "cpv__start", "cpv__end", "ipv__start", "ipv__end",
514     "array__start", "array__end",
515     "lowest_attribute_number", "highest_attribute_number",
516     "attribute_names_array",
517     "lowest_property_number", "highest_property_number",
518     "property_names_array",
519     "lowest_action_number", "highest_action_number",
520     "action_names_array",
521     "lowest_fake_action_number", "highest_fake_action_number",
522     "fake_action_names_array",
523     "lowest_routine_number", "highest_routine_number", "routines_array",
524     "routine_names_array", "routine_flags_array",
525     "lowest_global_number", "highest_global_number", "globals_array",
526     "global_names_array", "global_flags_array",
527     "lowest_array_number", "highest_array_number", "arrays_array",
528     "array_names_array", "array_flags_array",
529     "lowest_constant_number", "highest_constant_number", "constants_array",
530     "constant_names_array",
531     "lowest_class_number", "highest_class_number", "class_objects_array",
532     "lowest_object_number", "highest_object_number",
533     "oddeven_packing",
534     "grammar_table", "dictionary_table", "dynam_string_table",
535     "" },
536     SYSTEM_CONSTANT_TT, FALSE, TRUE
537 };
538
539 keyword_group *keyword_groups[12]
540 = { NULL, &opcode_names, &directives, &trace_keywords, &segment_markers,
541     &directive_keywords, &misc_keywords, &statements, &conditions,
542     &system_functions, &system_constants, &opcode_macros};
543
544 keyword_group local_variables =
545 { { "" },                                 /* Filled in when routine declared */
546     LOCAL_VARIABLE_TT, FALSE, FALSE
547 };
548
549 static int lexical_context(void)
550 {
551     /*  The lexical context is a number representing all of the context
552         information in the lexical analyser: the same input text will
553         always translate to the same output tokens whenever the context
554         is the same.
555
556         In fact, for efficiency reasons this number omits the bit of
557         information held in the variable "dont_enter_into_symbol_table".
558         Inform never needs to backtrack through tokens parsed in that
559         way (thankfully, as it would be expensive indeed to check
560         the tokens).                                                         */
561
562     int c = 0;
563     if (opcode_names.enabled)         c |= 1;
564     if (directives.enabled)           c |= 2;
565     if (trace_keywords.enabled)       c |= 4;
566     if (segment_markers.enabled)      c |= 8;
567     if (directive_keywords.enabled)   c |= 16;
568     if (misc_keywords.enabled)        c |= 32;
569     if (statements.enabled)           c |= 64;
570     if (conditions.enabled)           c |= 128;
571     if (system_functions.enabled)     c |= 256;
572     if (system_constants.enabled)     c |= 512;
573     if (local_variables.enabled)      c |= 1024;
574
575     if (return_sp_as_variable)        c |= 2048;
576     return(c);
577 }
578
579 static void print_context(int c)
580 {
581     if ((c & 1) != 0) printf("OPC ");
582     if ((c & 2) != 0) printf("DIR ");
583     if ((c & 4) != 0) printf("TK ");
584     if ((c & 8) != 0) printf("SEG ");
585     if ((c & 16) != 0) printf("DK ");
586     if ((c & 32) != 0) printf("MK ");
587     if ((c & 64) != 0) printf("STA ");
588     if ((c & 128) != 0) printf("CND ");
589     if ((c & 256) != 0) printf("SFUN ");
590     if ((c & 512) != 0) printf("SCON ");
591     if ((c & 1024) != 0) printf("LV ");
592     if ((c & 2048) != 0) printf("sp ");
593 }
594
595 static int *keywords_hash_table;
596 static int *keywords_hash_ends_table;
597 static int *keywords_data_table;
598
599 static int *local_variable_hash_table;
600 static int *local_variable_hash_codes;
601 char **local_variable_texts;
602 static char *local_variable_text_table;
603
604 static char one_letter_locals[128];
605
606 static void make_keywords_tables(void)
607 {   int i, j, h, tp=0;
608     char **oplist, **maclist;
609
610     if (!glulx_mode) {
611         oplist = opcode_list_z;
612         maclist = opmacro_list_z;
613     }
614     else {
615         oplist = opcode_list_g;
616         maclist = opmacro_list_g;
617     }
618
619     for (j=0; *(oplist[j]); j++) {
620         opcode_names.keywords[j] = oplist[j];
621     }
622     opcode_names.keywords[j] = "";
623     
624     for (j=0; *(maclist[j]); j++) {
625         opcode_macros.keywords[j] = maclist[j];
626     }
627     opcode_macros.keywords[j] = "";
628
629     for (i=0; i<HASH_TAB_SIZE; i++)
630     {   keywords_hash_table[i] = -1;
631         keywords_hash_ends_table[i] = -1;
632     }
633
634     for (i=1; i<=11; i++)
635     {   keyword_group *kg = keyword_groups[i];
636         for (j=0; *(kg->keywords[j]) != 0; j++)
637         {   h = hash_code_from_string(kg->keywords[j]);
638             if (keywords_hash_table[h] == -1)
639                 keywords_hash_table[h] = tp;
640             else
641               *(keywords_data_table + 3*(keywords_hash_ends_table[h]) + 2) = tp;
642             keywords_hash_ends_table[h] = tp;
643             *(keywords_data_table + 3*tp) = i;
644             *(keywords_data_table + 3*tp+1) = j;
645             *(keywords_data_table + 3*tp+2) = -1;
646             tp++;
647         }
648     }
649 }
650
651 extern void construct_local_variable_tables(void)
652 {   int i, h; char *p = local_variable_text_table;
653     for (i=0; i<HASH_TAB_SIZE; i++) local_variable_hash_table[i] = -1;
654     for (i=0; i<128; i++) one_letter_locals[i] = MAX_LOCAL_VARIABLES;
655
656     for (i=0; i<no_locals; i++)
657     {   char *q = local_variables.keywords[i];
658         if (q[1] == 0)
659         {   one_letter_locals[(uchar)q[0]] = i;
660             if (isupper(q[0])) one_letter_locals[tolower(q[0])] = i;
661             if (islower(q[0])) one_letter_locals[toupper(q[0])] = i;
662         }
663         h = hash_code_from_string(q);
664         if (local_variable_hash_table[h] == -1)
665             local_variable_hash_table[h] = i;
666         local_variable_hash_codes[i] = h;
667         local_variable_texts[i] = p;
668         strcpy(p, q);
669         p += strlen(p)+1;
670     }
671     for (;i<MAX_LOCAL_VARIABLES-1;i++) 
672       local_variable_texts[i] = "<no such local variable>";
673 }
674
675 static void interpret_identifier(int pos, int dirs_only_flag)
676 {   int index, hashcode; char *p = circle[pos].text;
677
678     /*  An identifier is either a keyword or a "symbol", a name which the
679         lexical analyser leaves to higher levels of Inform to understand.    */
680
681     hashcode = hash_code_from_string(p);
682
683     if (dirs_only_flag) goto KeywordSearch;
684
685     /*  If this is assembly language, perhaps it is "sp"?                    */
686
687     if (return_sp_as_variable && (p[0]=='s') && (p[1]=='p') && (p[2]==0))
688     {   circle[pos].value = 0; circle[pos].type = LOCAL_VARIABLE_TT;
689         return;
690     }
691
692     /*  Test for local variables first, quite quickly.                       */
693
694     if (local_variables.enabled)
695     {   if (p[1]==0)
696         {   index = one_letter_locals[(uchar)p[0]];
697             if (index<MAX_LOCAL_VARIABLES)
698             {   circle[pos].type = LOCAL_VARIABLE_TT;
699                 circle[pos].value = index+1;
700                 return;
701             }
702         }
703         index = local_variable_hash_table[hashcode];
704         if (index >= 0)
705         {   for (;index<no_locals;index++)
706             {   if (hashcode == local_variable_hash_codes[index])
707                 {   if (strcmpcis(p, local_variable_texts[index])==0)
708                     {   circle[pos].type = LOCAL_VARIABLE_TT;
709                         circle[pos].value = index+1;
710                         return;
711                     }
712                 }
713             }
714         }
715     }
716
717     /*  Now the bulk of the keywords.  Note that the lexer doesn't recognise
718         the name of a system function which has been Replaced.               */
719
720     KeywordSearch:
721     index = keywords_hash_table[hashcode];
722     while (index >= 0)
723     {   int *i = keywords_data_table + 3*index;
724         keyword_group *kg = keyword_groups[*i];
725         if (((!dirs_only_flag) && (kg->enabled))
726             || (dirs_only_flag && (kg == &directives)))
727         {   char *q = kg->keywords[*(i+1)];
728             if (((kg->case_sensitive) && (strcmp(p, q)==0))
729                 || ((!(kg->case_sensitive)) && (strcmpcis(p, q)==0)))
730             {   if ((kg != &system_functions)
731                     || (system_function_usage[*(i+1)]!=2))
732                 {   circle[pos].type = kg->change_token_type;
733                     circle[pos].value = *(i+1);
734                     return;
735                 }
736             }
737         }
738         index = *(i+2);
739     }
740
741     if (dirs_only_flag) return;
742
743     /*  Search for the name; create it if necessary.                         */
744
745     circle[pos].value = symbol_index(p, hashcode);
746     circle[pos].type = SYMBOL_TT;
747 }
748
749
750 /* ------------------------------------------------------------------------- */
751 /*   The tokeniser grid aids a rapid decision about the consequences of a    */
752 /*   character reached in the buffer.  In effect it is an efficiently stored */
753 /*   transition table using an algorithm similar to that of S. C. Johnson's  */
754 /*   "yacc" lexical analyser (see Aho, Sethi and Ullman, section 3.9).       */
755 /*   My thanks to Dilip Sequeira for suggesting this.                        */
756 /*                                                                           */
757 /*       tokeniser_grid[c]   is (16*n + m) if c is the first character of    */
758 /*                               separator numbers n, n+1, ..., n+m-1        */
759 /*                           or certain special values (QUOTE_CODE, etc)     */
760 /*                           or 0 otherwise                                  */
761 /*                                                                           */
762 /*   Since 1000/16 = 62, the code numbers below will need increasing if the  */
763 /*   number of separators supported exceeds 61.                              */
764 /* ------------------------------------------------------------------------- */
765
766 static int tokeniser_grid[256];
767
768 #define QUOTE_CODE      1000
769 #define DQUOTE_CODE     1001
770 #define NULL_CODE       1002
771 #define SPACE_CODE      1003
772 #define NEGATIVE_CODE   1004
773 #define DIGIT_CODE      1005
774 #define RADIX_CODE      1006
775 #define KEYWORD_CODE    1007
776 #define EOF_CODE        1008
777 #define WHITESPACE_CODE 1009
778 #define COMMENT_CODE    1010
779 #define IDENTIFIER_CODE 1011
780
781 /*  This list cannot safely be changed without also changing the header
782     separator #defines.  The ordering is significant in that (i) all entries
783     beginning with the same character must be adjacent and (ii) that if
784     X is a an initial substring of Y then X must come before Y.
785
786     E.g. --> must occur before -- to prevent "-->0" being tokenised
787     wrongly as "--", ">", "0" rather than "-->", "0".                        */
788
789 static const char separators[NUMBER_SEPARATORS][4] =
790 {   "->", "-->", "--", "-", "++", "+", "*", "/", "%",
791     "||", "|", "&&", "&", "~~",
792     "~=", "~", "==", "=", ">=", ">",
793     "<=", "<", "(", ")", ",",
794     ".&", ".#", "..&", "..#", "..", ".",
795     "::", ":", "@", ";", "[", "]", "{", "}",
796     "$", "?~", "?",
797     "#a$", "#g$", "#n$", "#r$", "#w$", "##", "#"
798 };
799
800 static void make_tokeniser_grid(void)
801 {
802     /*  Construct the grid to the specification above.                       */
803
804     int i, j;
805
806     for (i=0; i<256; i++) tokeniser_grid[i]=0;
807
808     for (i=0; i<NUMBER_SEPARATORS; i++)
809     {   j=separators[i][0];
810         if (tokeniser_grid[j]==0)
811             tokeniser_grid[j]=i*16+1; else tokeniser_grid[j]++;
812     }
813     tokeniser_grid['\''] = QUOTE_CODE;
814     tokeniser_grid['\"'] = DQUOTE_CODE;
815     tokeniser_grid[0]    = EOF_CODE;
816     tokeniser_grid[' ']  = WHITESPACE_CODE;
817     tokeniser_grid['\n'] = WHITESPACE_CODE;
818     tokeniser_grid['$']  = RADIX_CODE;
819     tokeniser_grid['!']  = COMMENT_CODE;
820
821     tokeniser_grid['0']  = DIGIT_CODE;
822     tokeniser_grid['1']  = DIGIT_CODE;
823     tokeniser_grid['2']  = DIGIT_CODE;
824     tokeniser_grid['3']  = DIGIT_CODE;
825     tokeniser_grid['4']  = DIGIT_CODE;
826     tokeniser_grid['5']  = DIGIT_CODE;
827     tokeniser_grid['6']  = DIGIT_CODE;
828     tokeniser_grid['7']  = DIGIT_CODE;
829     tokeniser_grid['8']  = DIGIT_CODE;
830     tokeniser_grid['9']  = DIGIT_CODE;
831
832     tokeniser_grid['a']  = IDENTIFIER_CODE;
833     tokeniser_grid['b']  = IDENTIFIER_CODE;
834     tokeniser_grid['c']  = IDENTIFIER_CODE;
835     tokeniser_grid['d']  = IDENTIFIER_CODE;
836     tokeniser_grid['e']  = IDENTIFIER_CODE;
837     tokeniser_grid['f']  = IDENTIFIER_CODE;
838     tokeniser_grid['g']  = IDENTIFIER_CODE;
839     tokeniser_grid['h']  = IDENTIFIER_CODE;
840     tokeniser_grid['i']  = IDENTIFIER_CODE;
841     tokeniser_grid['j']  = IDENTIFIER_CODE;
842     tokeniser_grid['k']  = IDENTIFIER_CODE;
843     tokeniser_grid['l']  = IDENTIFIER_CODE;
844     tokeniser_grid['m']  = IDENTIFIER_CODE;
845     tokeniser_grid['n']  = IDENTIFIER_CODE;
846     tokeniser_grid['o']  = IDENTIFIER_CODE;
847     tokeniser_grid['p']  = IDENTIFIER_CODE;
848     tokeniser_grid['q']  = IDENTIFIER_CODE;
849     tokeniser_grid['r']  = IDENTIFIER_CODE;
850     tokeniser_grid['s']  = IDENTIFIER_CODE;
851     tokeniser_grid['t']  = IDENTIFIER_CODE;
852     tokeniser_grid['u']  = IDENTIFIER_CODE;
853     tokeniser_grid['v']  = IDENTIFIER_CODE;
854     tokeniser_grid['w']  = IDENTIFIER_CODE;
855     tokeniser_grid['x']  = IDENTIFIER_CODE;
856     tokeniser_grid['y']  = IDENTIFIER_CODE;
857     tokeniser_grid['z']  = IDENTIFIER_CODE;
858
859     tokeniser_grid['A']  = IDENTIFIER_CODE;
860     tokeniser_grid['B']  = IDENTIFIER_CODE;
861     tokeniser_grid['C']  = IDENTIFIER_CODE;
862     tokeniser_grid['D']  = IDENTIFIER_CODE;
863     tokeniser_grid['E']  = IDENTIFIER_CODE;
864     tokeniser_grid['F']  = IDENTIFIER_CODE;
865     tokeniser_grid['G']  = IDENTIFIER_CODE;
866     tokeniser_grid['H']  = IDENTIFIER_CODE;
867     tokeniser_grid['I']  = IDENTIFIER_CODE;
868     tokeniser_grid['J']  = IDENTIFIER_CODE;
869     tokeniser_grid['K']  = IDENTIFIER_CODE;
870     tokeniser_grid['L']  = IDENTIFIER_CODE;
871     tokeniser_grid['M']  = IDENTIFIER_CODE;
872     tokeniser_grid['N']  = IDENTIFIER_CODE;
873     tokeniser_grid['O']  = IDENTIFIER_CODE;
874     tokeniser_grid['P']  = IDENTIFIER_CODE;
875     tokeniser_grid['Q']  = IDENTIFIER_CODE;
876     tokeniser_grid['R']  = IDENTIFIER_CODE;
877     tokeniser_grid['S']  = IDENTIFIER_CODE;
878     tokeniser_grid['T']  = IDENTIFIER_CODE;
879     tokeniser_grid['U']  = IDENTIFIER_CODE;
880     tokeniser_grid['V']  = IDENTIFIER_CODE;
881     tokeniser_grid['W']  = IDENTIFIER_CODE;
882     tokeniser_grid['X']  = IDENTIFIER_CODE;
883     tokeniser_grid['Y']  = IDENTIFIER_CODE;
884     tokeniser_grid['Z']  = IDENTIFIER_CODE;
885
886     tokeniser_grid['_']  = IDENTIFIER_CODE;
887 }
888
889 /* ------------------------------------------------------------------------- */
890 /*   Definition of a lexical block: a source file or a string containing     */
891 /*   text for lexical analysis; an independent source from the point of      */
892 /*   view of issuing error reports.                                          */
893 /* ------------------------------------------------------------------------- */
894
895 typedef struct LexicalBlock_s
896 {   char *filename;                              /*  Full translated name    */
897     int   main_flag;                             /*  TRUE if the main file
898                                                      (the first one opened)  */
899     int   sys_flag;                              /*  TRUE if a System_File   */
900     int   source_line;                           /*  Line number count       */
901     int   line_start;                            /*  Char number within file
902                                                      where the current line
903                                                      starts                  */
904     int   chars_read;                            /*  Char number of read pos */
905     int   file_no;                               /*  Or 255 if not from a
906                                                      file; used for debug
907                                                      information             */
908 } LexicalBlock;
909
910 static LexicalBlock NoFileOpen =
911 {   "<before compilation>", FALSE, FALSE, 0, 0, 0, 255 };
912
913 static LexicalBlock MakingOutput =
914 {   "<constructing output>", FALSE, FALSE, 0, 0, 0, 255 };
915
916 static LexicalBlock StringLB =
917 {   "<veneer routine>", FALSE, TRUE, 0, 0, 0, 255 };
918
919 static LexicalBlock *CurrentLB;                  /*  The current lexical
920                                                      block of input text     */
921
922 extern void declare_systemfile(void)
923 {   CurrentLB->sys_flag = TRUE;
924 }
925
926 extern int is_systemfile(void)
927 {   return ((CurrentLB->sys_flag)?1:0);
928 }
929
930 extern debug_location get_current_debug_location(void)
931 {   debug_location result;
932     /* Assume that all input characters are one byte. */
933     result.file_index = CurrentLB->file_no;
934     result.beginning_byte_index = CurrentLB->chars_read - LOOKAHEAD_SIZE;
935     result.end_byte_index = result.beginning_byte_index;
936     result.beginning_line_number = CurrentLB->source_line;
937     result.end_line_number = result.beginning_line_number;
938     result.beginning_character_number =
939         CurrentLB->chars_read - CurrentLB->line_start;
940     result.end_character_number = result.beginning_character_number;
941     return result;
942 }
943
944 static debug_location ErrorReport_debug_location;
945
946 extern void report_errors_at_current_line(void)
947 {   ErrorReport.line_number = CurrentLB->source_line;
948     ErrorReport.file_number = CurrentLB->file_no;
949     if (ErrorReport.file_number == 255)
950         ErrorReport.file_number = -1;
951     ErrorReport.source      = CurrentLB->filename;
952     ErrorReport.main_flag   = CurrentLB->main_flag;
953     if (debugfile_switch)
954         ErrorReport_debug_location = get_current_debug_location();
955 }
956
957 extern debug_location get_error_report_debug_location(void)
958 {   return ErrorReport_debug_location;
959 }
960
961 extern int32 get_current_line_start(void)
962 {   return CurrentLB->line_start;
963 }
964
965 /* ------------------------------------------------------------------------- */
966 /*   Hash printing and line counting                                         */
967 /* ------------------------------------------------------------------------- */
968
969 static void print_hash(void)
970 {
971     /*  Hash-printing is the practice of printing a # character every 100
972         lines of source code (the -x switch), reassuring the user that
973         progress is being made                                               */
974
975     if (no_hash_printed_yet)
976     {   printf("::"); no_hash_printed_yet = FALSE;
977     }
978     printf("#"); hash_printed_since_newline = TRUE;
979
980 #ifndef MAC_FACE
981     /*  On some systems, text output is buffered to a line at a time, and
982         this would frustrate the point of hash-printing, so:                 */
983
984     fflush(stdout);
985 #endif
986 }
987
988 static void reached_new_line(void)
989 {
990     /*  Called to signal that a new line has been reached in the source code */
991
992     forerrors_pointer = 0;
993
994     CurrentLB->source_line++;
995     CurrentLB->line_start = CurrentLB->chars_read;
996
997     total_source_line_count++;
998
999     if (total_source_line_count%100==0)
1000     {   if (hash_switch) print_hash();
1001 #ifdef MAC_MPW
1002         SpinCursor(32);                    /* I.e., allow other tasks to run */
1003 #endif
1004     }
1005
1006 #ifdef MAC_FACE
1007     if (total_source_line_count%((**g_pm_hndl).linespercheck) == 0)
1008     {   ProcessEvents (&g_proc);
1009         if (g_proc != true)
1010         {   free_arrays();
1011             close_all_source();
1012             if (temporary_files_switch)
1013                 remove_temp_files();
1014             if (store_the_text)
1015                 my_free(&all_text,"transcription text");
1016             abort_transcript_file();
1017             longjmp (g_fallback, 1);
1018         }
1019     }
1020 #endif
1021 }
1022
1023 static void new_syntax_line(void)
1024 {   if (source_to_analyse != NULL) forerrors_pointer = 0;
1025     report_errors_at_current_line();
1026 }
1027
1028 /* Return 10 raised to the expo power.
1029  *
1030  * I'm avoiding the standard pow() function for a rather lame reason:
1031  * it's in the libmath (-lm) library, and I don't want to change the
1032  * build model for the compiler. So, this is implemented with a stupid
1033  * lookup table. It's faster than pow() for small values of expo.
1034  * Probably not as fast if expo is 200, but "$+1e200" is an overflow
1035  * anyway, so I don't expect that to be a problem.
1036  *
1037  * (For some reason, frexp() and ldexp(), which are used later on, do
1038  * not require libmath to be linked in.)
1039  */
1040 static double pow10_cheap(int expo)
1041 {
1042     #define POW10_RANGE (8)
1043     static double powers[POW10_RANGE*2+1] = {
1044         0.00000001, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1,
1045         1.0,
1046         10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 10000000.0, 100000000.0
1047     };
1048
1049     double res = 1.0;
1050
1051     if (expo < 0) {
1052         for (; expo < -POW10_RANGE; expo += POW10_RANGE) {
1053             res *= powers[0];
1054         }
1055         return res * powers[POW10_RANGE+expo];
1056     }
1057     else {
1058         for (; expo > POW10_RANGE; expo -= POW10_RANGE) {
1059             res *= powers[POW10_RANGE*2];
1060         }
1061         return res * powers[POW10_RANGE+expo];
1062     }
1063 }
1064
1065 /* Return the IEEE-754 single-precision encoding of a floating-point
1066  * number. See http://www.psc.edu/general/software/packages/ieee/ieee.php
1067  * for an explanation.
1068  *
1069  * The number is provided in the pieces it was parsed in:
1070  *    [+|-] intv "." fracv "e" [+|-]expo
1071  *
1072  * If the magnitude is too large (beyond about 3.4e+38), this returns
1073  * an infinite value (0x7f800000 or 0xff800000). If the magnitude is too
1074  * small (below about 1e-45), this returns a zero value (0x00000000 or 
1075  * 0x80000000). If any of the inputs are NaN, this returns NaN (but the
1076  * lexer should never do that).
1077  *
1078  * Note that using a float constant does *not* set the uses_float_features
1079  * flag (which would cause the game file to be labelled 3.1.2). There's
1080  * no VM feature here, just an integer. Of course, any use of the float
1081  * *opcodes* will set the flag.
1082  *
1083  * The math functions in this routine require #including <math.h>, but
1084  * they should not require linking the math library (-lm). At least,
1085  * they do not on OSX and Linux.
1086  */
1087 static int32 construct_float(int signbit, double intv, double fracv, int expo)
1088 {
1089     double absval = (intv + fracv) * pow10_cheap(expo);
1090     int32 sign = (signbit ? 0x80000000 : 0x0);
1091     double mant;
1092     int32 fbits;
1093  
1094     if (isinf(absval)) {
1095         return sign | 0x7f800000; /* infinity */
1096     }
1097     if (isnan(absval)) {
1098         return sign | 0x7fc00000;
1099     }
1100
1101     mant = frexp(absval, &expo);
1102
1103     /* Normalize mantissa to be in the range [1.0, 2.0) */
1104     if (0.5 <= mant && mant < 1.0) {
1105         mant *= 2.0;
1106         expo--;
1107     }
1108     else if (mant == 0.0) {
1109         expo = 0;
1110     }
1111     else {
1112         return sign | 0x7f800000; /* infinity */
1113     }
1114
1115     if (expo >= 128) {
1116         return sign | 0x7f800000; /* infinity */
1117     }
1118     else if (expo < -126) {
1119         /* Denormalized (very small) number */
1120         mant = ldexp(mant, 126 + expo);
1121         expo = 0;
1122     }
1123     else if (!(expo == 0 && mant == 0.0)) {
1124         expo += 127;
1125         mant -= 1.0; /* Get rid of leading 1 */
1126     }
1127
1128     mant *= 8388608.0; /* 2^23 */
1129     fbits = (int32)(mant + 0.5); /* round mant to nearest int */
1130     if (fbits >> 23) {
1131         /* The carry propagated out of a string of 23 1 bits. */
1132         fbits = 0;
1133         expo++;
1134         if (expo >= 255) {
1135             return sign | 0x7f800000; /* infinity */
1136         }
1137     }
1138
1139     return (sign) | ((int32)(expo << 23)) | (fbits);
1140 }
1141
1142 /* ------------------------------------------------------------------------- */
1143 /*   Characters are read via a "pipeline" of variables, allowing us to look  */
1144 /*       up to three characters ahead of the current position.               */
1145 /*                                                                           */
1146 /*   There are two possible sources: from the source files being loaded in,  */
1147 /*   and from a string inside Inform (which is where the code for veneer     */
1148 /*   routines comes from).  Each source has its own get-next-character       */
1149 /*   routine.                                                                */
1150 /* ------------------------------------------------------------------------- */
1151 /*   Source 1: from files                                                    */
1152 /*                                                                           */
1153 /*   Note that file_load_chars(p, size) loads "size" bytes into buffer "p"   */
1154 /*   from the current input file.  If the file runs out, then if it was      */
1155 /*   the last source file 4 EOF characters are placed in the buffer: if it   */
1156 /*   was only an Include file ending, then a '\n' character is placed there  */
1157 /*   (essentially to force termination of any comment line) followed by      */
1158 /*   three harmless spaces.                                                  */
1159 /*                                                                           */
1160 /*   The routine returns the number of characters it has written, and note   */
1161 /*   that this conveniently ensures that all characters in the buffer come   */
1162 /*   from the same file.                                                     */
1163 /* ------------------------------------------------------------------------- */
1164
1165 #define SOURCE_BUFFER_SIZE 4096                  /*  Typical disc block size */
1166
1167 typedef struct Sourcefile_s
1168 {   char *buffer;                                /*  Input buffer            */
1169     int   read_pos;                              /*  Read position in buffer */
1170     int   size;                                  /*  Number of meaningful
1171                                                      characters in buffer    */
1172     int   la, la2, la3;                          /*  Three characters of
1173                                                      lookahead pipeline      */
1174     int   file_no;                               /*  Internal file number
1175                                                      (1, 2, 3, ...)          */
1176     LexicalBlock LB;
1177 } Sourcefile;
1178
1179 static Sourcefile *FileStack;
1180 static int File_sp;                              /*  Stack pointer           */
1181
1182 static Sourcefile *CF;                           /*  Top entry on stack      */
1183
1184 static int last_no_files;
1185
1186 static void begin_buffering_file(int i, int file_no)
1187 {   int j, cnt; uchar *p;
1188
1189     if (i >= MAX_INCLUSION_DEPTH) 
1190        memoryerror("MAX_INCLUSION_DEPTH",MAX_INCLUSION_DEPTH);
1191
1192     p = (uchar *) FileStack[i].buffer;
1193
1194     if (i>0)
1195     {   FileStack[i-1].la  = lookahead;
1196         FileStack[i-1].la2 = lookahead2;
1197         FileStack[i-1].la3 = lookahead3;
1198     }
1199
1200     FileStack[i].file_no = file_no;
1201     FileStack[i].size = file_load_chars(file_no,
1202         (char *) p, SOURCE_BUFFER_SIZE);
1203     lookahead  = source_to_iso_grid[p[0]];
1204     lookahead2 = source_to_iso_grid[p[1]];
1205     lookahead3 = source_to_iso_grid[p[2]];
1206     if (LOOKAHEAD_SIZE != 3)
1207         compiler_error
1208             ("Lexer lookahead size does not match hard-coded lookahead code");
1209     FileStack[i].read_pos = LOOKAHEAD_SIZE;
1210
1211     if (file_no==1) FileStack[i].LB.main_flag = TRUE;
1212                else FileStack[i].LB.main_flag = FALSE;
1213     FileStack[i].LB.sys_flag = FALSE;
1214     FileStack[i].LB.source_line = 1;
1215     FileStack[i].LB.line_start = LOOKAHEAD_SIZE;
1216     FileStack[i].LB.chars_read = LOOKAHEAD_SIZE;
1217     FileStack[i].LB.filename = InputFiles[file_no-1].filename;
1218     FileStack[i].LB.file_no = file_no;
1219
1220     CurrentLB = &(FileStack[i].LB);
1221     CF = &(FileStack[i]);
1222
1223     /* Check for recursive inclusion */
1224     cnt = 0;
1225     for (j=0; j<i; j++)
1226     {   if (!strcmp(FileStack[i].LB.filename, FileStack[j].LB.filename))
1227             cnt++;
1228     }
1229     if (cnt==1)
1230         warning_named("File included more than once",
1231             FileStack[j].LB.filename);
1232 }
1233
1234 static void create_char_pipeline(void)
1235 {
1236     File_sp = 0;
1237     begin_buffering_file(File_sp++, 1);
1238     pipeline_made = TRUE; last_no_files = input_file;
1239 }
1240
1241 static int get_next_char_from_pipeline(void)
1242 {   uchar *p;
1243
1244     while (last_no_files < input_file)
1245     {
1246         /*  An "Include" file must have opened since the last character
1247             was read...                                                      */
1248
1249         begin_buffering_file(File_sp++, ++last_no_files);
1250     }
1251     last_no_files = input_file;
1252
1253     if (File_sp == 0)
1254     {   lookahead  = 0; lookahead2 = 0; lookahead3 = 0; return 0;
1255     }
1256
1257     if (CF->read_pos == CF->size)
1258     {   CF->size =
1259             file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
1260         CF->read_pos = 0;
1261     }
1262     else
1263     if (CF->read_pos == -(CF->size))
1264     {   set_token_location(get_current_debug_location());
1265         File_sp--;
1266         if (File_sp == 0)
1267         {   lookahead  = 0; lookahead2 = 0; lookahead3 = 0; return 0;
1268         }
1269         CF = &(FileStack[File_sp-1]);
1270         CurrentLB = &(FileStack[File_sp-1].LB);
1271         lookahead  = CF->la; lookahead2 = CF->la2; lookahead3 = CF->la3;
1272         if (CF->read_pos == CF->size)
1273         {   CF->size =
1274                 file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
1275             CF->read_pos = 0;
1276         }
1277         set_token_location(get_current_debug_location());
1278     }
1279
1280     p = (uchar *) (CF->buffer);
1281
1282     current = lookahead;
1283     lookahead = lookahead2;
1284     lookahead2 = lookahead3;
1285     lookahead3 = source_to_iso_grid[p[CF->read_pos++]];
1286
1287     CurrentLB->chars_read++;
1288     if (forerrors_pointer < 511)
1289         forerrors_buff[forerrors_pointer++] = current;
1290     if (current == '\n') reached_new_line();
1291     return(current);
1292 }
1293
1294 /* ------------------------------------------------------------------------- */
1295 /*   Source 2: from a string                                                 */
1296 /* ------------------------------------------------------------------------- */
1297
1298 static int source_to_analyse_pointer;            /*  Current read position   */
1299
1300 static int get_next_char_from_string(void)
1301 {   uchar *p = (uchar *) source_to_analyse + source_to_analyse_pointer++;
1302     current = source_to_iso_grid[p[0]];
1303
1304     if (current == 0)    lookahead  = 0;
1305                     else lookahead  = source_to_iso_grid[p[1]];
1306     if (lookahead == 0)  lookahead2 = 0;
1307                     else lookahead2 = source_to_iso_grid[p[2]];
1308     if (lookahead2 == 0) lookahead3 = 0;
1309                     else lookahead3 = source_to_iso_grid[p[3]];
1310
1311     CurrentLB->chars_read++;
1312     if (forerrors_pointer < 511)
1313         forerrors_buff[forerrors_pointer++] = current;
1314     if (current == '\n') reached_new_line();
1315     return(current);
1316 }
1317
1318 /* ========================================================================= */
1319 /*   The interface between the lexer and Inform's higher levels:             */
1320 /*                                                                           */
1321 /*       put_token_back()            (effectively) move the read position    */
1322 /*                                       back by one token                   */
1323 /*                                                                           */
1324 /*       get_next_token()            copy the token at the current read      */
1325 /*                                       position into the triple            */
1326 /*                                   (token_type, token_value, token_text)   */
1327 /*                                       and move the read position forward  */
1328 /*                                       by one                              */
1329 /*                                                                           */
1330 /*       restart_lexer(source, name) if source is NULL, initialise the lexer */
1331 /*                                       to read from source files;          */
1332 /*                                   otherwise, to read from this string.    */
1333 /* ------------------------------------------------------------------------- */
1334
1335 extern void put_token_back(void)
1336 {   tokens_put_back++;
1337
1338     if (tokens_trace_level > 0)
1339     {   if (tokens_trace_level == 1) printf("<- ");
1340         else printf("<-\n");
1341     }
1342
1343     /*  The following error, of course, should never happen!                 */
1344
1345     if (tokens_put_back == CIRCLE_SIZE)
1346     {   compiler_error("The lexical analyser has collapsed because of a wrong \
1347 assumption inside Inform");
1348         tokens_put_back--;
1349         return;
1350     }
1351 }
1352
1353 extern void get_next_token(void)
1354 {   int d, i, j, k, quoted_size, e, radix, context; int32 n; char *r;
1355     int returning_a_put_back_token = TRUE;
1356
1357     context = lexical_context();
1358
1359     if (tokens_put_back > 0)
1360     {   i = circle_position - tokens_put_back + 1;
1361         if (i<0) i += CIRCLE_SIZE;
1362         tokens_put_back--;
1363         if (context != token_contexts[i])
1364         {   j = circle[i].type;
1365             if ((j==0) || ((j>=100) && (j<200)))
1366                 interpret_identifier(i, FALSE);
1367         }
1368         goto ReturnBack;
1369     }
1370     returning_a_put_back_token = FALSE;
1371
1372     if (circle_position == CIRCLE_SIZE-1) circle_position = 0;
1373     else circle_position++;
1374
1375     if (lex_p > lexeme_memory + 4*MAX_QTEXT_SIZE)
1376         lex_p = lexeme_memory;
1377
1378     circle[circle_position].text = lex_p;
1379     circle[circle_position].value = 0;
1380     *lex_p = 0;
1381
1382     StartTokenAgain:
1383     d = (*get_next_char)();
1384     e = tokeniser_grid[d];
1385
1386     if (next_token_begins_syntax_line)
1387     {   if ((e != WHITESPACE_CODE) && (e != COMMENT_CODE))
1388         {   new_syntax_line();
1389             next_token_begins_syntax_line = FALSE;
1390         }
1391     }
1392
1393     circle[circle_position].location = get_current_debug_location();
1394
1395     switch(e)
1396     {   case 0: char_error("Illegal character found in source:", d);
1397             goto StartTokenAgain;
1398
1399         case WHITESPACE_CODE:
1400             while (tokeniser_grid[lookahead] == WHITESPACE_CODE)
1401                 (*get_next_char)();
1402             goto StartTokenAgain;
1403
1404         case COMMENT_CODE:
1405             while ((lookahead != '\n') && (lookahead != 0))
1406                 (*get_next_char)();
1407             goto StartTokenAgain;
1408
1409         case EOF_CODE:
1410             circle[circle_position].type = EOF_TT;
1411             strcpy(lex_p, "<end of file>");
1412             lex_p += strlen(lex_p) + 1;
1413             break;
1414
1415         case DIGIT_CODE:
1416             radix = 10;
1417             ReturnNumber:
1418             n=0;
1419             do
1420             {   n = n*radix + character_digit_value[d];
1421                 *lex_p++ = d;
1422             } while ((character_digit_value[lookahead] < radix)
1423                      && (d = (*get_next_char)(), TRUE));
1424
1425             *lex_p++ = 0;
1426             circle[circle_position].type = NUMBER_TT;
1427             circle[circle_position].value = n;
1428             break;
1429
1430             FloatNumber:
1431             {   int expo=0; double intv=0, fracv=0;
1432                 int expocount=0, intcount=0, fraccount=0;
1433                 int signbit = (d == '-');
1434                 *lex_p++ = d;
1435                 while (character_digit_value[lookahead] < 10) {
1436                     intv = 10.0*intv + character_digit_value[lookahead];
1437                     intcount++;
1438                     *lex_p++ = lookahead;
1439                     (*get_next_char)();
1440                 }
1441                 if (lookahead == '.') {
1442                     double fracpow = 1.0;
1443                     *lex_p++ = lookahead;
1444                     (*get_next_char)();
1445                     while (character_digit_value[lookahead] < 10) {
1446                         fracpow *= 0.1;
1447                         fracv = fracv + fracpow*character_digit_value[lookahead];
1448                         fraccount++;
1449                         *lex_p++ = lookahead;
1450                         (*get_next_char)();
1451                     }
1452                 }
1453                 if (lookahead == 'e' || lookahead == 'E') {
1454                     int exposign = 0;
1455                     *lex_p++ = lookahead;
1456                     (*get_next_char)();
1457                     if (lookahead == '+' || lookahead == '-') {
1458                         exposign = (lookahead == '-');
1459                         *lex_p++ = lookahead;
1460                         (*get_next_char)();
1461                     }
1462                     while (character_digit_value[lookahead] < 10) {
1463                         expo = 10*expo + character_digit_value[lookahead];
1464                         expocount++;
1465                         *lex_p++ = lookahead;
1466                         (*get_next_char)();
1467                     }
1468                     if (expocount == 0)
1469                         error("Floating-point literal must have digits after the 'e'");
1470                     if (exposign) { expo = -expo; }
1471                 }
1472                 if (intcount + fraccount == 0)
1473                     error("Floating-point literal must have digits");
1474                 n = construct_float(signbit, intv, fracv, expo);
1475             }
1476             *lex_p++ = 0;
1477             circle[circle_position].type = NUMBER_TT;
1478             circle[circle_position].value = n;
1479             if (!glulx_mode && dont_enter_into_symbol_table != -2) error("Floating-point literals are not available in Z-code");
1480             break;
1481
1482         case RADIX_CODE:
1483             radix = 16; d = (*get_next_char)();
1484             if (d == '-' || d == '+') { goto FloatNumber; }
1485             if (d == '$') { d = (*get_next_char)(); radix = 2; }
1486             if (character_digit_value[d] >= radix)
1487             {   if (radix == 2)
1488                     error("Binary number expected after '$$'");
1489                 else
1490                     error("Hexadecimal number expected after '$'");
1491             }
1492             goto ReturnNumber;
1493
1494         case QUOTE_CODE:     /* Single-quotes: scan a literal string */
1495             quoted_size=0;
1496             do
1497             {   e = d; d = (*get_next_char)(); *lex_p++ = d;
1498                 if (quoted_size++==64)
1499                 {   error(
1500                     "Too much text for one pair of quotations '...' to hold");
1501                     *lex_p='\''; break;
1502                 }
1503                 if ((d == '\'') && (e != '@'))
1504                 {   if (quoted_size == 1)
1505                     {   d = (*get_next_char)(); *lex_p++ = d;
1506                         if (d != '\'')
1507                             error("No text between quotation marks ''");
1508                     }
1509                     break;
1510                 }
1511             } while (d != EOF);
1512             if (d==EOF) ebf_error("'\''", "end of file");
1513             *(lex_p-1) = 0;
1514             circle[circle_position].type = SQ_TT;
1515             break;
1516
1517         case DQUOTE_CODE:    /* Double-quotes: scan a literal string */
1518             quoted_size=0;
1519             do
1520             {   d = (*get_next_char)(); *lex_p++ = d;
1521                 if (quoted_size++==MAX_QTEXT_SIZE)
1522                 {   memoryerror("MAX_QTEXT_SIZE", MAX_QTEXT_SIZE);
1523                     break;
1524                 }
1525                 if (d == '\n')
1526                 {   lex_p--;
1527                     while (*(lex_p-1) == ' ') lex_p--;
1528                     if (*(lex_p-1) != '^') *lex_p++ = ' ';
1529                     while ((lookahead != EOF) &&
1530                           (tokeniser_grid[lookahead] == WHITESPACE_CODE))
1531                     (*get_next_char)();
1532                 }
1533                 else if (d == '\\')
1534                 {   int newline_passed = FALSE;
1535                     lex_p--;
1536                     while ((lookahead != EOF) &&
1537                           (tokeniser_grid[lookahead] == WHITESPACE_CODE))
1538                         if ((d = (*get_next_char)()) == '\n')
1539                             newline_passed = TRUE;
1540                     if (!newline_passed)
1541                     {   char chb[4];
1542                         chb[0] = '\"'; chb[1] = lookahead;
1543                         chb[2] = '\"'; chb[3] = 0;
1544                         ebf_error("empty rest of line after '\\' in string",
1545                             chb);
1546                     }
1547                 }
1548             }   while ((d != EOF) && (d!='\"'));
1549             if (d==EOF) ebf_error("'\"'", "end of file");
1550             *(lex_p-1) = 0;
1551             circle[circle_position].type = DQ_TT;
1552             break;
1553
1554         case IDENTIFIER_CODE:    /* Letter or underscore: an identifier */
1555
1556             *lex_p++ = d; n=1;
1557             while ((n<=MAX_IDENTIFIER_LENGTH)
1558                    && ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1559                    || (tokeniser_grid[lookahead] == DIGIT_CODE)))
1560                 n++, *lex_p++ = (*get_next_char)();
1561
1562             *lex_p++ = 0;
1563
1564             if (n > MAX_IDENTIFIER_LENGTH)
1565             {   char bad_length[100];
1566                 sprintf(bad_length,
1567                     "Name exceeds the maximum length of %d characters:",
1568                          MAX_IDENTIFIER_LENGTH);
1569                 error_named(bad_length, circle[circle_position].text);
1570                 /* Trim token so that it doesn't violate
1571                    MAX_IDENTIFIER_LENGTH during error recovery */
1572                 circle[circle_position].text[MAX_IDENTIFIER_LENGTH] = 0;
1573             }
1574
1575             if (dont_enter_into_symbol_table)
1576             {   circle[circle_position].type = DQ_TT;
1577                 circle[circle_position].value = 0;
1578                 if (dont_enter_into_symbol_table == -2)
1579                     interpret_identifier(circle_position, TRUE);
1580                 break;
1581             }
1582
1583             interpret_identifier(circle_position, FALSE);
1584             break;
1585
1586         default:
1587
1588             /*  The character is initial to at least one of the separators  */
1589
1590             for (j=e>>4, k=j+(e&0x0f); j<k; j++)
1591             {   r = (char *) separators[j];
1592                 if (r[1]==0)
1593                 {   *lex_p++=d; *lex_p++=0;
1594                     goto SeparatorMatched;
1595                 }
1596                 else
1597                 if (r[2]==0)
1598                 {   if (*(r+1) == lookahead)
1599                     {   *lex_p++=d;
1600                         *lex_p++=(*get_next_char)();
1601                         *lex_p++=0;
1602                         goto SeparatorMatched;
1603                     }
1604                 }
1605                 else
1606                 {   if ((*(r+1) == lookahead) && (*(r+2) == lookahead2))
1607                     {   *lex_p++=d;
1608                         *lex_p++=(*get_next_char)();
1609                         *lex_p++=(*get_next_char)();
1610                         *lex_p++=0;
1611                         goto SeparatorMatched;
1612                     }
1613                 }
1614             }
1615
1616             /*  The following contingency never in fact arises with the
1617                 current set of separators, but might in future  */
1618
1619             *lex_p++ = d; *lex_p++ = lookahead; *lex_p++ = lookahead2;
1620             *lex_p++ = 0;
1621             error_named("Unrecognised combination in source:", lex_p);
1622             goto StartTokenAgain;
1623
1624             SeparatorMatched:
1625
1626             circle[circle_position].type = SEP_TT;
1627             circle[circle_position].value = j;
1628             switch(j)
1629             {   case SEMICOLON_SEP: break;
1630                 case HASHNDOLLAR_SEP:
1631                 case HASHWDOLLAR_SEP:
1632                     if (tokeniser_grid[lookahead] == WHITESPACE_CODE)
1633                     {   error_named("Character expected after",
1634                             circle[circle_position].text);
1635                         break;
1636                     }
1637                     lex_p--;
1638                     *lex_p++ = (*get_next_char)();
1639                     while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1640                            || (tokeniser_grid[lookahead] == DIGIT_CODE))
1641                         *lex_p++ = (*get_next_char)();
1642                     *lex_p++ = 0;
1643                     break;
1644                 case HASHADOLLAR_SEP:
1645                 case HASHGDOLLAR_SEP:
1646                 case HASHRDOLLAR_SEP:
1647                 case HASHHASH_SEP:
1648                     if (tokeniser_grid[lookahead] != IDENTIFIER_CODE)
1649                     {   error_named("Alphabetic character expected after",
1650                             circle[circle_position].text);
1651                         break;
1652                     }
1653                     lex_p--;
1654                     while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1655                            || (tokeniser_grid[lookahead] == DIGIT_CODE))
1656                         *lex_p++ = (*get_next_char)();
1657                     *lex_p++ = 0;
1658                     break;
1659             }
1660             break;
1661     }
1662
1663     i = circle_position;
1664
1665     ReturnBack:
1666     token_value = circle[i].value;
1667     token_type = circle[i].type;
1668     token_text = circle[i].text;
1669     if (!returning_a_put_back_token)
1670     {   set_token_location(circle[i].location);
1671     }
1672     token_contexts[i] = context;
1673
1674     if (tokens_trace_level > 0)
1675     {   if (tokens_trace_level == 1)
1676             printf("'%s' ", circle[i].text);
1677         else
1678         {   printf("-> "); describe_token(circle[i]);
1679             printf(" ");
1680             if (tokens_trace_level > 2) print_context(token_contexts[i]);
1681             printf("\n");
1682         }
1683     }
1684 }
1685
1686 static char veneer_error_title[64];
1687
1688 extern void restart_lexer(char *lexical_source, char *name)
1689 {   int i;
1690     circle_position = 0;
1691     for (i=0; i<CIRCLE_SIZE; i++)
1692     {   circle[i].type = 0;
1693         circle[i].value = 0;
1694         circle[i].text = "(if this is ever visible, there is a bug)";
1695         token_contexts[i] = 0;
1696     }
1697
1698     lex_p = lexeme_memory;
1699     tokens_put_back = 0;
1700     forerrors_pointer = 0;
1701     dont_enter_into_symbol_table = FALSE;
1702     return_sp_as_variable = FALSE;
1703     next_token_begins_syntax_line = TRUE;
1704
1705     source_to_analyse = lexical_source;
1706
1707     if (source_to_analyse == NULL)
1708     {   get_next_char = get_next_char_from_pipeline;
1709         if (!pipeline_made) create_char_pipeline();
1710         forerrors_buff[0] = 0; forerrors_pointer = 0;
1711     }
1712     else
1713     {   get_next_char = get_next_char_from_string;
1714         source_to_analyse_pointer = 0;
1715         CurrentLB = &StringLB;
1716         sprintf(veneer_error_title, "<veneer routine '%s'>", name);
1717         StringLB.filename = veneer_error_title;
1718
1719         CurrentLB->source_line = 1;
1720         CurrentLB->line_start  = 0;
1721         CurrentLB->chars_read  = 0;
1722     }
1723 }
1724
1725 /* ========================================================================= */
1726 /*   Data structure management routines                                      */
1727 /* ------------------------------------------------------------------------- */
1728
1729 extern void init_lexer_vars(void)
1730 {
1731 }
1732
1733 extern void lexer_begin_prepass(void)
1734 {   total_source_line_count = 0;
1735     CurrentLB = &NoFileOpen;
1736     report_errors_at_current_line();
1737 }
1738
1739 extern void lexer_begin_pass(void)
1740 {   no_hash_printed_yet = TRUE;
1741     hash_printed_since_newline = FALSE;
1742
1743     pipeline_made = FALSE;
1744
1745     restart_lexer(NULL, NULL);
1746 }
1747
1748 extern void lexer_endpass(void)
1749 {   CurrentLB = &MakingOutput;
1750     report_errors_at_current_line();
1751 }
1752
1753 extern void lexer_allocate_arrays(void)
1754 {   int i;
1755
1756     FileStack = my_malloc(MAX_INCLUSION_DEPTH*sizeof(Sourcefile),
1757         "filestack buffer");
1758
1759     for (i=0; i<MAX_INCLUSION_DEPTH; i++)
1760     FileStack[i].buffer = my_malloc(SOURCE_BUFFER_SIZE+4, "source file buffer");
1761
1762     lexeme_memory = my_malloc(5*MAX_QTEXT_SIZE, "lexeme memory");
1763
1764     keywords_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1765         "keyword hash table");
1766     keywords_hash_ends_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1767         "keyword hash end table");
1768     keywords_data_table = my_calloc(sizeof(int), 3*MAX_KEYWORDS,
1769         "keyword hashing linked list");
1770     local_variable_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
1771         "local variable hash table");
1772     local_variable_text_table = my_malloc(
1773         (MAX_LOCAL_VARIABLES-1)*(MAX_IDENTIFIER_LENGTH+1),
1774         "text of local variable names");
1775
1776     local_variable_hash_codes = my_calloc(sizeof(int), MAX_LOCAL_VARIABLES,
1777         "local variable hash codes");
1778     local_variable_texts = my_calloc(sizeof(char *), MAX_LOCAL_VARIABLES,
1779         "local variable text pointers");
1780
1781     make_tokeniser_grid();
1782     make_keywords_tables();
1783
1784     first_token_locations =
1785         my_malloc(sizeof(debug_locations), "debug locations of recent tokens");
1786     first_token_locations->location.file_index = 0;
1787     first_token_locations->location.beginning_byte_index = 0;
1788     first_token_locations->location.end_byte_index = 0;
1789     first_token_locations->location.beginning_line_number = 0;
1790     first_token_locations->location.end_line_number = 0;
1791     first_token_locations->location.beginning_character_number = 0;
1792     first_token_locations->location.end_character_number = 0;
1793     first_token_locations->next = NULL;
1794     first_token_locations->reference_count = 0;
1795     last_token_location = first_token_locations;
1796 }
1797
1798 extern void lexer_free_arrays(void)
1799 {   int i; char *p;
1800
1801     for (i=0; i<MAX_INCLUSION_DEPTH; i++)
1802     {   p = FileStack[i].buffer;
1803         my_free(&p, "source file buffer");
1804     }
1805     my_free(&FileStack, "filestack buffer");
1806     my_free(&lexeme_memory, "lexeme memory");
1807
1808     my_free(&keywords_hash_table, "keyword hash table");
1809     my_free(&keywords_hash_ends_table, "keyword hash end table");
1810     my_free(&keywords_data_table, "keyword hashing linked list");
1811     my_free(&local_variable_hash_table, "local variable hash table");
1812     my_free(&local_variable_text_table, "text of local variable names");
1813
1814     my_free(&local_variable_hash_codes, "local variable hash codes");
1815     my_free(&local_variable_texts, "local variable text pointers");
1816
1817     cleanup_token_locations(NULL);
1818 }
1819
1820 /* ========================================================================= */