1 /* ------------------------------------------------------------------------- */
2 /* "lexer" : Lexical analyser */
4 /* Part of Inform 6.41 */
5 /* copyright (c) Graham Nelson 1993 - 2022 */
7 /* Inform is free software: you can redistribute it and/or modify */
8 /* it under the terms of the GNU General Public License as published by */
9 /* the Free Software Foundation, either version 3 of the License, or */
10 /* (at your option) any later version. */
12 /* Inform is distributed in the hope that it will be useful, */
13 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
14 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
15 /* GNU General Public License for more details. */
17 /* You should have received a copy of the GNU General Public License */
18 /* along with Inform. If not, see https://gnu.org/licenses/ */
20 /* ------------------------------------------------------------------------- */
24 int total_source_line_count, /* Number of source lines so far */
26 no_hash_printed_yet, /* Have not yet printed the first # */
27 hash_printed_since_newline, /* A hash has been printed since the
28 most recent new-line was printed
29 (generally as a result of an error
30 message or the start of pass) */
31 dont_enter_into_symbol_table, /* Return names as text (with
32 token type DQ_TT, i.e., as if
33 they had double-quotes around)
34 and not as entries in the symbol
35 table, when TRUE. If -2, only the
36 keyword table is searched. */
37 return_sp_as_variable; /* When TRUE, the word "sp" denotes
38 the stack pointer variable
39 (used in assembly language only) */
40 int next_token_begins_syntax_line; /* When TRUE, start a new syntax
41 line (for error reporting, etc.)
42 on the source code line where
43 the next token appears */
45 int32 last_mapped_line; /* Last syntax line reported to debugging file */
47 /* ------------------------------------------------------------------------- */
48 /* The lexer's output is a sequence of structs, each called a "token", */
49 /* representing one lexical unit (or "lexeme") each. Instead of providing */
50 /* "lookahead" (that is, always having available the next token after the */
51 /* current one, so that syntax analysers higher up in Inform can have */
52 /* advance knowledge of what is coming), the lexer instead has a system */
53 /* where tokens can be read in and then "put back again". */
54 /* The meaning of the number (and to some extent the text) supplied with */
55 /* a token depends on its type: see "header.h" for the list of types. */
56 /* For example, the lexeme "$1e3" is understood by Inform as a hexadecimal */
57 /* number, and translated to the token: */
58 /* type NUMBER_TT, value 483, text "$1e3" */
59 /* ------------------------------------------------------------------------- */
60 /* These three variables are set to the current token on a call to */
61 /* get_next_token() (but are not changed by a call to put_token_back()). */
62 /* (It would be tidier to use a token_data structure, rather than having */
63 /* get_next_token() unpack three values. But this is the way it is.) */
64 /* ------------------------------------------------------------------------- */
70 /* ------------------------------------------------------------------------- */
71 /* The next two variables are the head and tail of a singly linked list. */
72 /* The tail stores the portion most recently read from the current */
73 /* lexical block; its end values therefore describe the location of the */
74 /* current token, and are updated whenever the three variables above are */
75 /* via set_token_location(...). Earlier vertices, if any, represent the */
76 /* regions of lexical blocks read beforehand, where new vertices are */
77 /* only introduced by interruptions like a file inclusion or an EOF. */
78 /* Vertices are deleted off of the front of the list once they are no */
79 /* longer referenced by pending debug information records. */
80 /* ------------------------------------------------------------------------- */
82 static debug_locations *first_token_locations;
83 static debug_locations *last_token_location;
85 extern debug_location get_token_location(void)
86 { debug_location result;
87 debug_location *location = &(last_token_location->location);
88 result.file_index = location->file_index;
89 result.beginning_byte_index = location->end_byte_index;
90 result.end_byte_index = location->end_byte_index;
91 result.beginning_line_number = location->end_line_number;
92 result.end_line_number = location->end_line_number;
93 result.beginning_character_number = location->end_character_number;
94 result.end_character_number = location->end_character_number;
95 result.orig_file_index = location->orig_file_index;
96 result.orig_beg_line_number = location->orig_beg_line_number;
97 result.orig_beg_char_number = location->orig_beg_char_number;
101 extern debug_locations get_token_locations(void)
102 { debug_locations result;
103 result.location = get_token_location();
105 result.reference_count = 0;
109 static void set_token_location(debug_location location)
110 { if (location.file_index == last_token_location->location.file_index)
111 { last_token_location->location.end_byte_index =
112 location.end_byte_index;
113 last_token_location->location.end_line_number =
114 location.end_line_number;
115 last_token_location->location.end_character_number =
116 location.end_character_number;
117 last_token_location->location.orig_file_index =
118 location.orig_file_index;
119 last_token_location->location.orig_beg_line_number =
120 location.orig_beg_line_number;
121 last_token_location->location.orig_beg_char_number =
122 location.orig_beg_char_number;
124 { debug_locations*successor =
126 (sizeof(debug_locations),
127 "debug locations of recent tokens");
128 successor->location = location;
129 successor->next = NULL;
130 successor->reference_count = 0;
131 last_token_location->next = successor;
132 last_token_location = successor;
136 extern debug_location_beginning get_token_location_beginning(void)
137 { debug_location_beginning result;
138 ++(last_token_location->reference_count);
139 result.head = last_token_location;
140 result.beginning_byte_index =
141 last_token_location->location.end_byte_index;
142 result.beginning_line_number =
143 last_token_location->location.end_line_number;
144 result.beginning_character_number =
145 last_token_location->location.end_character_number;
146 result.orig_file_index = last_token_location->location.orig_file_index;
147 result.orig_beg_line_number = last_token_location->location.orig_beg_line_number;
148 result.orig_beg_char_number = last_token_location->location.orig_beg_char_number;
153 static void cleanup_token_locations(debug_location_beginning*beginning)
154 { if (first_token_locations)
155 { while (first_token_locations &&
156 !first_token_locations->reference_count)
157 { debug_locations*moribund = first_token_locations;
158 first_token_locations = moribund->next;
159 my_free(&moribund, "debug locations of recent tokens");
161 (beginning->head == moribund || !first_token_locations))
163 ("Records needed by a debug_location_beginning are no "
164 "longer allocated, perhaps because of an invalid reuse "
165 "of this or an earlier beginning");
171 ("Attempt to use a debug_location_beginning when no token "
172 "locations are defined");
175 ("Attempt to clean up token locations when no token locations "
181 extern void discard_token_location(debug_location_beginning beginning)
182 { --(beginning.head->reference_count);
185 extern debug_locations get_token_location_end
186 (debug_location_beginning beginning)
187 { debug_locations result;
188 cleanup_token_locations(&beginning);
189 --(beginning.head->reference_count);
190 /* Sometimes we know what we'll read before we switch to the lexical block
191 where we'll read it. In such cases the beginning will be placed in the
192 prior block and last exactly zero bytes there. It's misleading to
193 include such ranges, so we gobble them. */
194 if (beginning.head->location.end_byte_index ==
195 beginning.beginning_byte_index &&
196 beginning.head->next)
197 { beginning.head = beginning.head->next;
198 result.location = beginning.head->location;
199 result.location.beginning_byte_index = 0;
200 result.location.beginning_line_number = 1;
201 result.location.beginning_character_number = 1;
203 { result.location = beginning.head->location;
204 result.location.beginning_byte_index =
205 beginning.beginning_byte_index;
206 result.location.beginning_line_number =
207 beginning.beginning_line_number;
208 result.location.beginning_character_number =
209 beginning.beginning_character_number;
212 result.location.orig_file_index =
213 beginning.orig_file_index;
214 result.location.orig_beg_line_number =
215 beginning.orig_beg_line_number;
216 result.location.orig_beg_char_number =
217 beginning.orig_beg_char_number;
219 result.next = beginning.head->next;
220 result.reference_count = 0;
224 /* ------------------------------------------------------------------------- */
225 /* In order to be able to put tokens back efficiently, the lexer stores */
226 /* tokens in a "circle": the variable circle_position ranges between */
227 /* 0 and CIRCLE_SIZE-1. We only need a circle size as large as the */
228 /* maximum number of tokens ever put back at once, plus 1 (in effect, the */
229 /* maximum token lookahead ever needed in syntax analysis, plus 1). */
231 /* Note that the circle struct type is lexeme_data, whereas the expression */
232 /* code all works in token_data. They have slightly different needs. The */
233 /* data is exported through the token_text, token_value, token_type */
234 /* globals, so there's no need to use the same struct at both levels. */
236 /* Unlike some compilers, Inform does not have a context-free lexer: in */
237 /* fact it has 12288 different possible states. However, the context only */
238 /* affects the interpretation of "identifiers": lexemes beginning with a */
239 /* letter and containing up to 32 chars of alphanumeric and underscore */
240 /* chars. (For example, "default" may refer to the directive or statement */
241 /* of that name, and which token values are returned depends on the */
242 /* current lexical context.) */
244 /* Along with each token, we also store the lexical context it was */
245 /* translated under; because if it is called for again, there may need */
246 /* to be a fresh interpretation of it if the context has changed. */
247 /* ------------------------------------------------------------------------- */
249 #define CIRCLE_SIZE 6
251 /* (The worst case for token lookahead is distinguishing between an
252 old-style "objectloop (a in b)" and a new "objectloop (a in b ...)".) */
254 static int circle_position;
255 static lexeme_data circle[CIRCLE_SIZE];
257 /* ------------------------------------------------------------------------- */
258 /* A complication, however, is that the text of some lexemes needs to be */
259 /* held in Inform's memory for much longer periods: for example, a */
260 /* dictionary word lexeme (like "'south'") must have its text preserved */
261 /* until the code generation time for the expression it occurs in, when */
262 /* the dictionary reference is actually made. We handle this by keeping */
263 /* all lexeme text until the end of the statement (or, for top-level */
264 /* directives, until the end of the directive). Then we call */
265 /* release_token_texts() to start over. The lextexts array will therefore */
266 /* grow to the largest number of lexemes in a single statement or */
268 /* ------------------------------------------------------------------------- */
270 typedef struct lextext_s {
272 size_t size; /* Allocated size (including terminal null)
273 This is always at least MAX_IDENTIFIER_LENGTH+1 */
276 static lextext *lextexts; /* Allocated to no_lextexts */
277 static memory_list lextexts_memlist;
278 static int no_lextexts;
280 static int cur_lextexts; /* Number of lextexts in current use
281 (cur_lextexts <= no_lextexts) */
283 static int lex_index; /* Index of lextext being written to */
284 static int lex_pos; /* Current write position in that lextext */
286 /* ------------------------------------------------------------------------- */
287 /* The lexer itself needs up to 3 characters of lookahead (it uses an */
288 /* LR(3) grammar to translate characters into tokens). */
289 /* ------------------------------------------------------------------------- */
291 #define LOOKAHEAD_SIZE 3
293 static int current, lookahead, /* The latest character read, and */
294 lookahead2, lookahead3; /* the three characters following it */
296 static int pipeline_made; /* Whether or not the pipeline of
297 characters has been constructed
300 static int (* get_next_char)(void); /* Routine for reading the stream of
301 characters: the lexer does not
302 need any "ungetc" routine for
303 putting them back again. End of
304 stream is signalled by returning
307 static char *source_to_analyse; /* The current lexical source:
308 NULL for "load from source files",
309 otherwise this points to a string
310 containing Inform code */
312 static int tokens_put_back; /* Count of the number of backward
313 moves made from the last-read
316 /* This gets called for both token_data and lexeme_data structs. It prints
317 a description of the common part (the text, value, type fields).
319 extern void describe_token_triple(const char *text, int32 value, int type)
321 /* Many of the token types are not set in this file, but later on in
322 Inform's higher stages (for example, in the expression evaluator);
323 but this routine describes them all. */
329 /* The following token types occur in lexer output: */
331 case SYMBOL_TT: printf("symbol ");
332 describe_symbol(value);
334 case NUMBER_TT: printf("literal number %d", value);
336 case DQ_TT: printf("string \"%s\"", text);
338 case SQ_TT: printf("string '%s'", text);
340 case SEP_TT: printf("separator '%s'", text);
342 case EOF_TT: printf("end of file");
345 case STATEMENT_TT: printf("statement name '%s'", text);
347 case SEGMENT_MARKER_TT: printf("object segment marker '%s'", text);
349 case DIRECTIVE_TT: printf("directive name '%s'", text);
351 case CND_TT: printf("textual conditional '%s'", text);
353 case OPCODE_NAME_TT: printf("opcode name '%s'", text);
355 case SYSFUN_TT: printf("built-in function name '%s'", text);
357 case LOCAL_VARIABLE_TT: printf("local variable name '%s'", text);
359 case MISC_KEYWORD_TT: printf("statement keyword '%s'", text);
361 case DIR_KEYWORD_TT: printf("directive keyword '%s'", text);
363 case TRACE_KEYWORD_TT: printf("'trace' keyword '%s'", text);
365 case SYSTEM_CONSTANT_TT: printf("system constant name '%s'", text);
368 /* The remaining are etoken types, not set by the lexer */
370 case OP_TT: printf("operator '%s'",
371 operators[value].description);
373 case ENDEXP_TT: printf("end of expression");
375 case SUBOPEN_TT: printf("open bracket");
377 case SUBCLOSE_TT: printf("close bracket");
379 case LARGE_NUMBER_TT: printf("large number: '%s'=%d",text,value);
381 case SMALL_NUMBER_TT: printf("small number: '%s'=%d",text,value);
383 case VARIABLE_TT: printf("variable '%s'=%d", text, value);
385 case DICTWORD_TT: printf("dictionary word '%s'", text);
387 case ACTION_TT: printf("action name '%s'", text);
391 printf("** unknown token type %d, text='%s', value=%d **",
397 /* ------------------------------------------------------------------------- */
398 /* All but one of the Inform keywords (most of them opcode names used */
399 /* only by the assembler). (The one left over is "sp", a keyword used in */
400 /* assembly language only.) */
402 /* A "keyword group" is a set of keywords to be searched for. If a match */
403 /* is made on an identifier, the token type becomes that given in the KG */
404 /* and the token value is its index in the KG. */
406 /* The keyword ordering must correspond with the appropriate #define's in */
407 /* "header.h" but is otherwise not significant. */
408 /* ------------------------------------------------------------------------- */
410 /* This must exceed the total number of keywords across all groups,
411 including opcodes. */
412 #define MAX_KEYWORDS (500)
414 /* The values will be filled in at compile time, when we know
415 which opcode set to use. */
416 keyword_group opcode_names =
418 OPCODE_NAME_TT, FALSE, TRUE
421 static char *opcode_list_z[] = {
422 "je", "jl", "jg", "dec_chk", "inc_chk", "jin", "test", "or", "and",
423 "test_attr", "set_attr", "clear_attr", "store", "insert_obj", "loadw",
424 "loadb", "get_prop", "get_prop_addr", "get_next_prop", "add", "sub",
425 "mul", "div", "mod", "call", "storew", "storeb", "put_prop", "sread",
426 "print_char", "print_num", "random", "push", "pull", "split_window",
427 "set_window", "output_stream", "input_stream", "sound_effect", "jz",
428 "get_sibling", "get_child", "get_parent", "get_prop_len", "inc", "dec",
429 "print_addr", "remove_obj", "print_obj", "ret", "jump", "print_paddr",
430 "load", "not", "rtrue", "rfalse", "print", "print_ret", "nop", "save",
431 "restore", "restart", "ret_popped", "pop", "quit", "new_line",
432 "show_status", "verify", "call_2s", "call_vs", "aread", "call_vs2",
433 "erase_window", "erase_line", "set_cursor", "get_cursor",
434 "set_text_style", "buffer_mode", "read_char", "scan_table", "call_1s",
435 "call_2n", "set_colour", "throw", "call_vn", "call_vn2", "tokenise",
436 "encode_text", "copy_table", "print_table", "check_arg_count", "call_1n",
437 "catch", "piracy", "log_shift", "art_shift", "set_font", "save_undo",
438 "restore_undo", "draw_picture", "picture_data", "erase_picture",
439 "set_margins", "move_window", "window_size", "window_style",
440 "get_wind_prop", "scroll_window", "pop_stack", "read_mouse",
441 "mouse_window", "push_stack", "put_wind_prop", "print_form",
442 "make_menu", "picture_table", "print_unicode", "check_unicode",
446 static char *opcode_list_g[] = {
447 "nop", "add", "sub", "mul", "div", "mod", "neg", "bitand", "bitor",
448 "bitxor", "bitnot", "shiftl", "sshiftr", "ushiftr", "jump", "jz",
449 "jnz", "jeq", "jne", "jlt", "jge", "jgt", "jle",
450 "jltu", "jgeu", "jgtu", "jleu",
452 "catch", "throw", "tailcall",
453 "copy", "copys", "copyb", "sexs", "sexb", "aload",
454 "aloads", "aloadb", "aloadbit", "astore", "astores", "astoreb",
455 "astorebit", "stkcount", "stkpeek", "stkswap", "stkroll", "stkcopy",
456 "streamchar", "streamnum", "streamstr",
457 "gestalt", "debugtrap", "getmemsize", "setmemsize", "jumpabs",
458 "random", "setrandom", "quit", "verify",
459 "restart", "save", "restore", "saveundo", "restoreundo", "protect",
460 "glk", "getstringtbl", "setstringtbl", "getiosys", "setiosys",
461 "linearsearch", "binarysearch", "linkedsearch",
462 "callf", "callfi", "callfii", "callfiii",
464 "mzero", "mcopy", "malloc", "mfree",
465 "accelfunc", "accelparam",
466 "hasundo", "discardundo",
467 "numtof", "ftonumz", "ftonumn", "ceil", "floor",
468 "fadd", "fsub", "fmul", "fdiv", "fmod",
469 "sqrt", "exp", "log", "pow",
470 "sin", "cos", "tan", "asin", "acos", "atan", "atan2",
471 "jfeq", "jfne", "jflt", "jfle", "jfgt", "jfge", "jisnan", "jisinf",
472 "numtod", "dtonumz", "dtonumn", "ftod", "dtof", "dceil", "dfloor",
473 "dadd", "dsub", "dmul", "ddiv", "dmodr", "dmodq",
474 "dsqrt", "dexp", "dlog", "dpow",
475 "dsin", "dcos", "dtan", "dasin", "dacos", "datan", "datan2",
476 "jdeq", "jdne", "jdlt", "jdle", "jdgt", "jdge", "jdisnan", "jdisinf",
480 keyword_group opcode_macros =
482 OPCODE_MACRO_TT, FALSE, TRUE
485 static char *opmacro_list_z[] = { "" };
487 static char *opmacro_list_g[] = {
488 "pull", "push", "dload", "dstore",
492 keyword_group directives =
493 { { "abbreviate", "array", "attribute", "class", "constant",
494 "default", "dictionary", "end", "endif", "extend", "fake_action",
495 "global", "ifdef", "ifndef", "ifnot", "ifv3", "ifv5", "iftrue",
496 "iffalse", "import", "include", "link", "lowstring", "message",
497 "nearby", "object", "origsource", "property", "release", "replace",
498 "serial", "switches", "statusline", "stub", "system_file", "trace",
499 "undef", "verb", "version", "zcharacter",
501 DIRECTIVE_TT, FALSE, FALSE
504 keyword_group trace_keywords =
505 { { "dictionary", "symbols", "objects", "verbs",
506 "assembly", "expressions", "lines", "tokens", "linker",
508 TRACE_KEYWORD_TT, FALSE, TRUE
511 keyword_group segment_markers =
512 { { "class", "has", "private", "with", "" },
513 SEGMENT_MARKER_TT, FALSE, TRUE
516 keyword_group directive_keywords =
517 { { "alias", "long", "additive",
519 "noun", "held", "multi", "multiheld", "multiexcept",
520 "multiinside", "creature", "special", "number", "scope", "topic",
521 "reverse", "meta", "only", "replace", "first", "last",
522 "string", "table", "buffer", "data", "initial", "initstr",
523 "with", "private", "has", "class",
524 "error", "fatalerror", "warning",
525 "terminating", "static", "individual",
527 DIR_KEYWORD_TT, FALSE, TRUE
530 keyword_group misc_keywords =
531 { { "char", "name", "the", "a", "an", "The", "number",
532 "roman", "reverse", "bold", "underline", "fixed", "on", "off",
533 "to", "address", "string", "object", "near", "from", "property", "A", "" },
534 MISC_KEYWORD_TT, FALSE, TRUE
537 keyword_group statements =
538 { { "box", "break", "continue", "default", "do", "else", "font", "for",
539 "give", "if", "inversion", "jump", "move", "new_line", "objectloop",
540 "print", "print_ret", "quit", "read", "remove", "restore", "return",
541 "rfalse", "rtrue", "save", "spaces", "string", "style", "switch",
542 "until", "while", "" },
543 STATEMENT_TT, FALSE, TRUE
546 keyword_group conditions =
547 { { "has", "hasnt", "in", "notin", "ofclass", "or", "provides", "" },
551 keyword_group system_functions =
552 { { "child", "children", "elder", "eldest", "indirect", "parent", "random",
553 "sibling", "younger", "youngest", "metaclass", "glk", "" },
554 SYSFUN_TT, FALSE, TRUE
557 keyword_group system_constants =
558 { { "adjectives_table", "actions_table", "classes_table",
559 "identifiers_table", "preactions_table", "version_number",
560 "largest_object", "strings_offset", "code_offset",
561 "dict_par1", "dict_par2", "dict_par3", "actual_largest_object",
562 "static_memory_offset", "array_names_offset", "readable_memory_offset",
563 "cpv__start", "cpv__end", "ipv__start", "ipv__end",
564 "array__start", "array__end",
565 "lowest_attribute_number", "highest_attribute_number",
566 "attribute_names_array",
567 "lowest_property_number", "highest_property_number",
568 "property_names_array",
569 "lowest_action_number", "highest_action_number",
570 "action_names_array",
571 "lowest_fake_action_number", "highest_fake_action_number",
572 "fake_action_names_array",
573 "lowest_routine_number", "highest_routine_number", "routines_array",
574 "routine_names_array", "routine_flags_array",
575 "lowest_global_number", "highest_global_number", "globals_array",
576 "global_names_array", "global_flags_array",
577 "lowest_array_number", "highest_array_number", "arrays_array",
578 "array_names_array", "array_flags_array",
579 "lowest_constant_number", "highest_constant_number", "constants_array",
580 "constant_names_array",
581 "lowest_class_number", "highest_class_number", "class_objects_array",
582 "lowest_object_number", "highest_object_number",
584 "grammar_table", "dictionary_table", "dynam_string_table",
586 SYSTEM_CONSTANT_TT, FALSE, TRUE
589 keyword_group *keyword_groups[12]
590 = { NULL, &opcode_names, &directives, &trace_keywords, &segment_markers,
591 &directive_keywords, &misc_keywords, &statements, &conditions,
592 &system_functions, &system_constants, &opcode_macros};
594 /* These keywords are set to point to local_variable_names entries when
595 a routine header is parsed. See construct_local_variable_tables(). */
596 keyword_group local_variables =
598 LOCAL_VARIABLE_TT, FALSE, FALSE
601 static int lexical_context(void)
603 /* The lexical context is a number representing all of the context
604 information in the lexical analyser: the same input text will
605 always translate to the same output tokens whenever the context
608 In fact, for efficiency reasons this number omits the bit of
609 information held in the variable "dont_enter_into_symbol_table".
610 Inform never needs to backtrack through tokens parsed in that
611 way (thankfully, as it would be expensive indeed to check
615 if (opcode_names.enabled) c |= 1;
616 if (directives.enabled) c |= 2;
617 if (trace_keywords.enabled) c |= 4;
618 if (segment_markers.enabled) c |= 8;
619 if (directive_keywords.enabled) c |= 16;
620 if (misc_keywords.enabled) c |= 32;
621 if (statements.enabled) c |= 64;
622 if (conditions.enabled) c |= 128;
623 if (system_functions.enabled) c |= 256;
624 if (system_constants.enabled) c |= 512;
625 if (local_variables.enabled) c |= 1024;
627 if (return_sp_as_variable) c |= 2048;
631 static void print_context(int c)
633 if ((c & 1) != 0) printf("OPC ");
634 if ((c & 2) != 0) printf("DIR ");
635 if ((c & 4) != 0) printf("TK ");
636 if ((c & 8) != 0) printf("SEG ");
637 if ((c & 16) != 0) printf("DK ");
638 if ((c & 32) != 0) printf("MK ");
639 if ((c & 64) != 0) printf("STA ");
640 if ((c & 128) != 0) printf("CND ");
641 if ((c & 256) != 0) printf("SFUN ");
642 if ((c & 512) != 0) printf("SCON ");
643 if ((c & 1024) != 0) printf("LV ");
644 if ((c & 2048) != 0) printf("sp ");
647 static int *keywords_hash_table;
648 static int *keywords_hash_ends_table;
649 static int *keywords_data_table;
651 static int *local_variable_hash_table;
652 static int *local_variable_hash_codes;
654 /* Note that MAX_LOCAL_VARIABLES is the maximum number of local variables
655 for this VM, *including* "sp" (the stack pointer "local").
656 This used to be a memory setting. Now it is a constant: 16 for Z-code,
660 /* Names of local variables in the current routine.
661 This is allocated to MAX_LOCAL_VARIABLES-1. (The stack pointer "local"
662 is not included in this array.)
664 (This could be a memlist, growing as needed up to MAX_LOCAL_VARIABLES-1.
665 But right now we just allocate the max.)
667 identstruct *local_variable_names;
669 static char one_letter_locals[128];
671 static void make_keywords_tables(void)
673 char **oplist, **maclist;
676 oplist = opcode_list_z;
677 maclist = opmacro_list_z;
680 oplist = opcode_list_g;
681 maclist = opmacro_list_g;
684 for (j=0; *(oplist[j]); j++) {
685 if (j >= MAX_KEYWORD_GROUP_SIZE) {
686 /* Gotta increase MAX_KEYWORD_GROUP_SIZE */
687 compiler_error("opcode_list has overflowed opcode_names.keywords");
690 opcode_names.keywords[j] = oplist[j];
692 opcode_names.keywords[j] = "";
694 for (j=0; *(maclist[j]); j++) {
695 if (j >= MAX_KEYWORD_GROUP_SIZE) {
696 /* Gotta increase MAX_KEYWORD_GROUP_SIZE */
697 compiler_error("opmacro_list has overflowed opcode_macros.keywords");
700 opcode_macros.keywords[j] = maclist[j];
702 opcode_macros.keywords[j] = "";
704 for (i=0; i<HASH_TAB_SIZE; i++)
705 { keywords_hash_table[i] = -1;
706 keywords_hash_ends_table[i] = -1;
709 for (i=1; i<=11; i++)
710 { keyword_group *kg = keyword_groups[i];
711 for (j=0; *(kg->keywords[j]) != 0; j++)
713 if (tp >= MAX_KEYWORDS) {
714 /* Gotta increase MAX_KEYWORDS */
715 compiler_error("keywords_data_table has overflowed MAX_KEYWORDS");
718 h = hash_code_from_string(kg->keywords[j]);
719 if (keywords_hash_table[h] == -1)
720 keywords_hash_table[h] = tp;
722 *(keywords_data_table + 3*(keywords_hash_ends_table[h]) + 2) = tp;
723 keywords_hash_ends_table[h] = tp;
724 *(keywords_data_table + 3*tp) = i;
725 *(keywords_data_table + 3*tp+1) = j;
726 *(keywords_data_table + 3*tp+2) = -1;
732 /* Look at the strings stored in local_variable_names (from 0 to no_locals).
733 Set local_variables.keywords to point to these, and also prepare the
735 extern void construct_local_variable_tables(void)
737 for (i=0; i<HASH_TAB_SIZE; i++) local_variable_hash_table[i] = -1;
738 for (i=0; i<128; i++) one_letter_locals[i] = MAX_LOCAL_VARIABLES;
740 for (i=0; i<no_locals; i++)
742 char *p = local_variable_names[i].text;
743 local_variables.keywords[i] = p;
745 { one_letter_locals[(uchar)p[0]] = i;
746 if (isupper(p[0])) one_letter_locals[tolower(p[0])] = i;
747 if (islower(p[0])) one_letter_locals[toupper(p[0])] = i;
749 h = hash_code_from_string(p);
750 if (local_variable_hash_table[h] == -1)
751 local_variable_hash_table[h] = i;
752 local_variable_hash_codes[i] = h;
754 /* Clear the rest. */
755 for (;i<MAX_LOCAL_VARIABLES-1;i++) {
756 local_variables.keywords[i] = "";
757 local_variable_hash_codes[i] = 0;
761 static void interpret_identifier(char *p, int pos, int dirs_only_flag)
762 { int index, hashcode;
764 /* An identifier is either a keyword or a "symbol", a name which the
765 lexical analyser leaves to higher levels of Inform to understand. */
767 hashcode = hash_code_from_string(p);
769 if (dirs_only_flag) goto KeywordSearch;
771 /* If this is assembly language, perhaps it is "sp"? */
773 if (return_sp_as_variable && (p[0]=='s') && (p[1]=='p') && (p[2]==0))
774 { circle[pos].value = 0; circle[pos].type = LOCAL_VARIABLE_TT;
778 /* Test for local variables first, quite quickly. */
780 if (local_variables.enabled)
782 { index = one_letter_locals[(uchar)p[0]];
783 if (index<MAX_LOCAL_VARIABLES)
784 { circle[pos].type = LOCAL_VARIABLE_TT;
785 circle[pos].value = index+1;
789 index = local_variable_hash_table[hashcode];
791 { for (;index<no_locals;index++)
792 { if (hashcode == local_variable_hash_codes[index])
793 { if (strcmpcis(p, local_variable_names[index].text)==0)
794 { circle[pos].type = LOCAL_VARIABLE_TT;
795 circle[pos].value = index+1;
803 /* Now the bulk of the keywords. Note that the lexer doesn't recognise
804 the name of a system function which has been Replaced. */
807 index = keywords_hash_table[hashcode];
809 { int *i = keywords_data_table + 3*index;
810 keyword_group *kg = keyword_groups[*i];
811 if (((!dirs_only_flag) && (kg->enabled))
812 || (dirs_only_flag && (kg == &directives)))
813 { char *q = kg->keywords[*(i+1)];
814 if (((kg->case_sensitive) && (strcmp(p, q)==0))
815 || ((!(kg->case_sensitive)) && (strcmpcis(p, q)==0)))
816 { if ((kg != &system_functions)
817 || (system_function_usage[*(i+1)]!=2))
818 { circle[pos].type = kg->change_token_type;
819 circle[pos].value = *(i+1);
827 if (dirs_only_flag) return;
829 /* Search for the name; create it if necessary. */
831 circle[pos].value = symbol_index(p, hashcode);
832 circle[pos].type = SYMBOL_TT;
836 /* ------------------------------------------------------------------------- */
837 /* The tokeniser grid aids a rapid decision about the consequences of a */
838 /* character reached in the buffer. In effect it is an efficiently stored */
839 /* transition table using an algorithm similar to that of S. C. Johnson's */
840 /* "yacc" lexical analyser (see Aho, Sethi and Ullman, section 3.9). */
841 /* My thanks to Dilip Sequeira for suggesting this. */
843 /* tokeniser_grid[c] is (16*n + m) if c is the first character of */
844 /* separator numbers n, n+1, ..., n+m-1 */
845 /* or certain special values (QUOTE_CODE, etc) */
848 /* Since 1000/16 = 62, the code numbers below will need increasing if the */
849 /* number of separators supported exceeds 61. */
850 /* ------------------------------------------------------------------------- */
852 static int tokeniser_grid[256];
854 #define QUOTE_CODE 1000
855 #define DQUOTE_CODE 1001
856 #define NULL_CODE 1002
857 #define SPACE_CODE 1003
858 #define NEGATIVE_CODE 1004
859 #define DIGIT_CODE 1005
860 #define RADIX_CODE 1006
861 #define KEYWORD_CODE 1007
862 #define EOF_CODE 1008
863 #define WHITESPACE_CODE 1009
864 #define COMMENT_CODE 1010
865 #define IDENTIFIER_CODE 1011
867 /* This list cannot safely be changed without also changing the header
868 separator #defines. The ordering is significant in that (i) all entries
869 beginning with the same character must be adjacent and (ii) that if
870 X is a an initial substring of Y then X must come before Y.
872 E.g. --> must occur before -- to prevent "-->0" being tokenised
873 wrongly as "--", ">", "0" rather than "-->", "0". */
875 static const char separators[NUMBER_SEPARATORS][4] =
876 { "->", "-->", "--", "-", "++", "+", "*", "/", "%",
877 "||", "|", "&&", "&", "~~",
878 "~=", "~", "==", "=", ">=", ">",
879 "<=", "<", "(", ")", ",",
880 ".&", ".#", "..&", "..#", "..", ".",
881 "::", ":", "@", ";", "[", "]", "{", "}",
883 "#a$", "#g$", "#n$", "#r$", "#w$", "##", "#"
886 static void make_tokeniser_grid(void)
888 /* Construct the grid to the specification above. */
892 for (i=0; i<256; i++) tokeniser_grid[i]=0;
894 for (i=0; i<NUMBER_SEPARATORS; i++)
895 { j=separators[i][0];
896 if (tokeniser_grid[j]==0)
897 tokeniser_grid[j]=i*16+1; else tokeniser_grid[j]++;
899 tokeniser_grid['\''] = QUOTE_CODE;
900 tokeniser_grid['\"'] = DQUOTE_CODE;
901 tokeniser_grid[0] = EOF_CODE;
902 tokeniser_grid[' '] = WHITESPACE_CODE;
903 tokeniser_grid['\n'] = WHITESPACE_CODE;
904 tokeniser_grid['$'] = RADIX_CODE;
905 tokeniser_grid['!'] = COMMENT_CODE;
907 tokeniser_grid['0'] = DIGIT_CODE;
908 tokeniser_grid['1'] = DIGIT_CODE;
909 tokeniser_grid['2'] = DIGIT_CODE;
910 tokeniser_grid['3'] = DIGIT_CODE;
911 tokeniser_grid['4'] = DIGIT_CODE;
912 tokeniser_grid['5'] = DIGIT_CODE;
913 tokeniser_grid['6'] = DIGIT_CODE;
914 tokeniser_grid['7'] = DIGIT_CODE;
915 tokeniser_grid['8'] = DIGIT_CODE;
916 tokeniser_grid['9'] = DIGIT_CODE;
918 tokeniser_grid['a'] = IDENTIFIER_CODE;
919 tokeniser_grid['b'] = IDENTIFIER_CODE;
920 tokeniser_grid['c'] = IDENTIFIER_CODE;
921 tokeniser_grid['d'] = IDENTIFIER_CODE;
922 tokeniser_grid['e'] = IDENTIFIER_CODE;
923 tokeniser_grid['f'] = IDENTIFIER_CODE;
924 tokeniser_grid['g'] = IDENTIFIER_CODE;
925 tokeniser_grid['h'] = IDENTIFIER_CODE;
926 tokeniser_grid['i'] = IDENTIFIER_CODE;
927 tokeniser_grid['j'] = IDENTIFIER_CODE;
928 tokeniser_grid['k'] = IDENTIFIER_CODE;
929 tokeniser_grid['l'] = IDENTIFIER_CODE;
930 tokeniser_grid['m'] = IDENTIFIER_CODE;
931 tokeniser_grid['n'] = IDENTIFIER_CODE;
932 tokeniser_grid['o'] = IDENTIFIER_CODE;
933 tokeniser_grid['p'] = IDENTIFIER_CODE;
934 tokeniser_grid['q'] = IDENTIFIER_CODE;
935 tokeniser_grid['r'] = IDENTIFIER_CODE;
936 tokeniser_grid['s'] = IDENTIFIER_CODE;
937 tokeniser_grid['t'] = IDENTIFIER_CODE;
938 tokeniser_grid['u'] = IDENTIFIER_CODE;
939 tokeniser_grid['v'] = IDENTIFIER_CODE;
940 tokeniser_grid['w'] = IDENTIFIER_CODE;
941 tokeniser_grid['x'] = IDENTIFIER_CODE;
942 tokeniser_grid['y'] = IDENTIFIER_CODE;
943 tokeniser_grid['z'] = IDENTIFIER_CODE;
945 tokeniser_grid['A'] = IDENTIFIER_CODE;
946 tokeniser_grid['B'] = IDENTIFIER_CODE;
947 tokeniser_grid['C'] = IDENTIFIER_CODE;
948 tokeniser_grid['D'] = IDENTIFIER_CODE;
949 tokeniser_grid['E'] = IDENTIFIER_CODE;
950 tokeniser_grid['F'] = IDENTIFIER_CODE;
951 tokeniser_grid['G'] = IDENTIFIER_CODE;
952 tokeniser_grid['H'] = IDENTIFIER_CODE;
953 tokeniser_grid['I'] = IDENTIFIER_CODE;
954 tokeniser_grid['J'] = IDENTIFIER_CODE;
955 tokeniser_grid['K'] = IDENTIFIER_CODE;
956 tokeniser_grid['L'] = IDENTIFIER_CODE;
957 tokeniser_grid['M'] = IDENTIFIER_CODE;
958 tokeniser_grid['N'] = IDENTIFIER_CODE;
959 tokeniser_grid['O'] = IDENTIFIER_CODE;
960 tokeniser_grid['P'] = IDENTIFIER_CODE;
961 tokeniser_grid['Q'] = IDENTIFIER_CODE;
962 tokeniser_grid['R'] = IDENTIFIER_CODE;
963 tokeniser_grid['S'] = IDENTIFIER_CODE;
964 tokeniser_grid['T'] = IDENTIFIER_CODE;
965 tokeniser_grid['U'] = IDENTIFIER_CODE;
966 tokeniser_grid['V'] = IDENTIFIER_CODE;
967 tokeniser_grid['W'] = IDENTIFIER_CODE;
968 tokeniser_grid['X'] = IDENTIFIER_CODE;
969 tokeniser_grid['Y'] = IDENTIFIER_CODE;
970 tokeniser_grid['Z'] = IDENTIFIER_CODE;
972 tokeniser_grid['_'] = IDENTIFIER_CODE;
975 /* ------------------------------------------------------------------------- */
976 /* Definition of a lexical block: a source file or a string containing */
977 /* text for lexical analysis; an independent source from the point of */
978 /* view of issuing error reports. */
979 /* ------------------------------------------------------------------------- */
981 typedef struct LexicalBlock_s
982 { char *filename; /* Full translated name */
983 int main_flag; /* TRUE if the main file
984 (the first one opened) */
985 int sys_flag; /* TRUE if a System_File */
986 int source_line; /* Line number count */
987 int line_start; /* Char number within file
988 where the current line
990 int chars_read; /* Char number of read pos */
991 int file_no; /* Or 255 if not from a
994 char *orig_source; /* From #origsource direct */
1000 static LexicalBlock NoFileOpen =
1001 { "<before compilation>", FALSE, FALSE, 0, 0, 0, 255, NULL, 0, 0, 0 };
1003 static LexicalBlock MakingOutput =
1004 { "<constructing output>", FALSE, FALSE, 0, 0, 0, 255, NULL, 0, 0, 0 };
1006 static LexicalBlock StringLB =
1007 { "<veneer routine>", FALSE, TRUE, 0, 0, 0, 255, NULL, 0, 0, 0 };
1009 static LexicalBlock *CurrentLB; /* The current lexical
1010 block of input text */
1012 extern void declare_systemfile(void)
1013 { CurrentLB->sys_flag = TRUE;
1016 extern int is_systemfile(void)
1017 { return ((CurrentLB->sys_flag)?1:0);
1020 extern void set_origsource_location(char *source, int32 line, int32 charnum)
1025 /* Clear the Origsource declaration. */
1026 CurrentLB->orig_file = 0;
1027 CurrentLB->orig_source = NULL;
1028 CurrentLB->orig_line = 0;
1029 CurrentLB->orig_char = 0;
1033 /* Get the file number for a new or existing InputFiles entry. */
1034 file_no = register_orig_sourcefile(source);
1036 CurrentLB->orig_file = file_no;
1037 CurrentLB->orig_source = InputFiles[file_no-1].filename;
1038 CurrentLB->orig_line = line;
1039 CurrentLB->orig_char = charnum;
1042 /* Error locations. */
1044 extern debug_location get_current_debug_location(void)
1045 { debug_location result;
1046 /* Assume that all input characters are one byte. */
1047 result.file_index = CurrentLB->file_no;
1048 result.beginning_byte_index = CurrentLB->chars_read - LOOKAHEAD_SIZE;
1049 result.end_byte_index = result.beginning_byte_index;
1050 result.beginning_line_number = CurrentLB->source_line;
1051 result.end_line_number = result.beginning_line_number;
1052 result.beginning_character_number =
1053 CurrentLB->chars_read - CurrentLB->line_start;
1054 result.end_character_number = result.beginning_character_number;
1055 result.orig_file_index = CurrentLB->orig_file;
1056 result.orig_beg_line_number = CurrentLB->orig_line;
1057 result.orig_beg_char_number = CurrentLB->orig_char;
1061 static debug_location ErrorReport_debug_location;
1063 extern void report_errors_at_current_line(void)
1064 { ErrorReport.line_number = CurrentLB->source_line;
1065 ErrorReport.file_number = CurrentLB->file_no;
1066 if (ErrorReport.file_number == 255)
1067 ErrorReport.file_number = -1;
1068 ErrorReport.source = CurrentLB->filename;
1069 ErrorReport.main_flag = CurrentLB->main_flag;
1070 if (debugfile_switch)
1071 ErrorReport_debug_location = get_current_debug_location();
1072 ErrorReport.orig_file = CurrentLB->orig_file;
1073 ErrorReport.orig_source = CurrentLB->orig_source;
1074 ErrorReport.orig_line = CurrentLB->orig_line;
1075 ErrorReport.orig_char = CurrentLB->orig_char;
1078 extern debug_location get_error_report_debug_location(void)
1079 { return ErrorReport_debug_location;
1082 extern int32 get_current_line_start(void)
1083 { return CurrentLB->line_start;
1086 brief_location blank_brief_location;
1088 extern brief_location get_brief_location(ErrorPosition *errpos)
1091 loc.file_index = errpos->file_number;
1092 loc.line_number = errpos->line_number;
1093 loc.orig_file_index = errpos->orig_file;
1094 loc.orig_line_number = errpos->orig_line;
1098 extern void export_brief_location(brief_location loc, ErrorPosition *errpos)
1100 if (loc.file_index != -1)
1101 { errpos->file_number = loc.file_index;
1102 errpos->line_number = loc.line_number;
1103 errpos->main_flag = (errpos->file_number == 1);
1104 errpos->orig_source = NULL;
1105 errpos->orig_file = loc.orig_file_index;
1106 errpos->orig_line = loc.orig_line_number;
1107 errpos->orig_char = 0;
1111 /* ------------------------------------------------------------------------- */
1112 /* Hash printing and line counting */
1113 /* ------------------------------------------------------------------------- */
1115 static void print_hash(void)
1117 /* Hash-printing is the practice of printing a # character every 100
1118 lines of source code (the -x switch), reassuring the user that
1119 progress is being made */
1121 if (no_hash_printed_yet)
1122 { printf("::"); no_hash_printed_yet = FALSE;
1124 printf("#"); hash_printed_since_newline = TRUE;
1127 /* On some systems, text output is buffered to a line at a time, and
1128 this would frustrate the point of hash-printing, so: */
1134 static void reached_new_line(void)
1136 /* Called to signal that a new line has been reached in the source code */
1138 forerrors_pointer = 0;
1140 CurrentLB->source_line++;
1141 CurrentLB->line_start = CurrentLB->chars_read;
1143 total_source_line_count++;
1145 if (total_source_line_count%100==0)
1146 { if (hash_switch) print_hash();
1148 SpinCursor(32); /* I.e., allow other tasks to run */
1153 if (total_source_line_count%((**g_pm_hndl).linespercheck) == 0)
1154 { ProcessEvents (&g_proc);
1158 abort_transcript_file();
1159 longjmp (g_fallback, 1);
1165 static void new_syntax_line(void)
1166 { if (source_to_analyse != NULL) forerrors_pointer = 0;
1167 report_errors_at_current_line();
1170 /* Return 10 raised to the expo power.
1172 * I'm avoiding the standard pow() function for a rather lame reason:
1173 * it's in the libmath (-lm) library, and I don't want to change the
1174 * build model for the compiler. So, this is implemented with a stupid
1175 * lookup table. It's faster than pow() for small values of expo.
1176 * Probably not as fast if expo is 200, but "$+1e200" is an overflow
1177 * anyway, so I don't expect that to be a problem.
1179 * (For some reason, frexp() and ldexp(), which are used later on, do
1180 * not require libmath to be linked in.)
1182 static double pow10_cheap(int expo)
1184 #define POW10_RANGE (8)
1185 static double powers[POW10_RANGE*2+1] = {
1186 0.00000001, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1,
1188 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 10000000.0, 100000000.0
1194 for (; expo < -POW10_RANGE; expo += POW10_RANGE) {
1197 return res * powers[POW10_RANGE+expo];
1200 for (; expo > POW10_RANGE; expo -= POW10_RANGE) {
1201 res *= powers[POW10_RANGE*2];
1203 return res * powers[POW10_RANGE+expo];
1207 /* Return the IEEE-754 single-precision encoding of a floating-point
1210 * The number is provided in the pieces it was parsed in:
1211 * [+|-] intv "." fracv "e" [+|-]expo
1213 * If the magnitude is too large (beyond about 3.4e+38), this returns
1214 * an infinite value (0x7f800000 or 0xff800000). If the magnitude is too
1215 * small (below about 1e-45), this returns a zero value (0x00000000 or
1216 * 0x80000000). If any of the inputs are NaN, this returns NaN (but the
1217 * lexer should never do that).
1219 * Note that using a float constant does *not* set the uses_float_features
1220 * flag (which would cause the game file to be labelled 3.1.2). Same with
1221 * double constants and the uses_double_features flag. There's no VM
1222 * feature here, just an integer. Of course, any use of the float *opcodes*
1223 * will set the flag.
1225 * The math functions in this routine require #including <math.h>, but
1226 * they should not require linking the math library (-lm). At least,
1227 * they do not on OSX and Linux.
1229 static int32 construct_float(int signbit, double intv, double fracv, int expo)
1231 double absval = (intv + fracv) * pow10_cheap(expo);
1232 int32 sign = (signbit ? 0x80000000 : 0x0);
1236 if (isinf(absval)) {
1237 return sign | 0x7f800000; /* infinity */
1239 if (isnan(absval)) {
1240 return sign | 0x7fc00000;
1243 mant = frexp(absval, &expo);
1245 /* Normalize mantissa to be in the range [1.0, 2.0) */
1246 if (0.5 <= mant && mant < 1.0) {
1250 else if (mant == 0.0) {
1254 return sign | 0x7f800000; /* infinity */
1258 return sign | 0x7f800000; /* infinity */
1260 else if (expo < -126) {
1261 /* Denormalized (very small) number */
1262 mant = ldexp(mant, 126 + expo);
1265 else if (!(expo == 0 && mant == 0.0)) {
1267 mant -= 1.0; /* Get rid of leading 1 */
1270 mant *= 8388608.0; /* 2^23 */
1271 fbits = (int32)(mant + 0.5); /* round mant to nearest int */
1273 /* The carry propagated out of a string of 23 1 bits. */
1277 return sign | 0x7f800000; /* infinity */
1281 /* At this point, expo is less than 2^8; fbits is less than 2^23; neither is negative. */
1282 return (sign) | ((int32)(expo << 23)) | (fbits);
1285 /* Same as the above, but we return *half* of a 64-bit double, depending on whether wanthigh is true (high half) or false (low half).
1287 static int32 construct_double(int wanthigh, int signbit, double intv, double fracv, int expo)
1289 double absval = (intv + fracv) * pow10_cheap(expo);
1290 int32 sign = (signbit ? 0x80000000 : 0x0);
1294 if (isinf(absval)) {
1297 if (isnan(absval)) {
1301 mant = frexp(absval, &expo);
1303 /* Normalize mantissa to be in the range [1.0, 2.0) */
1304 if (0.5 <= mant && mant < 1.0) {
1308 else if (mant == 0.0) {
1318 else if (expo < -1022) {
1319 /* Denormalized (very small) number */
1320 mant = ldexp(mant, 1022 + expo);
1323 else if (!(expo == 0 && mant == 0.0)) {
1325 mant -= 1.0; /* Get rid of leading 1 */
1328 /* fhi receives the high 28 bits; flo the low 24 bits (total 52 bits) */
1329 mant *= 268435456.0; /* 2^28 */
1330 fhi = (uint32)mant; /* Truncate */
1331 mant -= (double)fhi;
1332 mant *= 16777216.0; /* 2^24 */
1333 flo = (uint32)(mant+0.5); /* Round */
1336 /* The carry propagated out of a string of 24 1 bits. */
1340 /* And it also propagated out of the next 28 bits. */
1349 /* At this point, expo is less than 2^11; fhi is less than 2^28; flo is less than 2^24; none are negative. */
1351 return (sign) | ((int32)(expo << 20)) | ((int32)(fhi >> 8));
1353 return (int32)((fhi & 0xFF) << 24) | (int32)(flo);
1357 return sign | 0x7FF00000;
1363 return sign | 0x7FF80000;
1368 /* ------------------------------------------------------------------------- */
1369 /* Characters are read via a "pipeline" of variables, allowing us to look */
1370 /* up to three characters ahead of the current position. */
1372 /* There are two possible sources: from the source files being loaded in, */
1373 /* and from a string inside Inform (which is where the code for veneer */
1374 /* routines comes from). Each source has its own get-next-character */
1376 /* ------------------------------------------------------------------------- */
1377 /* Source 1: from files */
1379 /* Note that file_load_chars(p, size) loads "size" bytes into buffer "p" */
1380 /* from the current input file. If the file runs out, then if it was */
1381 /* the last source file 4 EOF characters are placed in the buffer: if it */
1382 /* was only an Include file ending, then a '\n' character is placed there */
1383 /* (essentially to force termination of any comment line) followed by */
1384 /* three harmless spaces. */
1386 /* The routine returns the number of characters it has written, and note */
1387 /* that this conveniently ensures that all characters in the buffer come */
1388 /* from the same file. */
1389 /* ------------------------------------------------------------------------- */
1391 #define SOURCE_BUFFER_SIZE 4096 /* Typical disc block size */
1393 typedef struct Sourcefile_s
1394 { char *buffer; /* Input buffer */
1395 int read_pos; /* Read position in buffer */
1396 int size; /* Number of meaningful
1397 characters in buffer */
1398 int la, la2, la3; /* Three characters of
1399 lookahead pipeline */
1400 int file_no; /* Internal file number
1405 static Sourcefile *FileStack; /* Allocated to FileStack_max */
1406 static memory_list FileStack_memlist;
1407 static int FileStack_max; /* The highest value that File_sp has
1409 (Filestack entries to this depth have
1410 a buffer allocated) */
1412 static int File_sp; /* Current stack pointer */
1413 static Sourcefile *CF; /* Top entry on stack (always equal to
1414 FileStack[File_sp-1]) */
1416 static int last_input_file;
1418 /* Set CF and CurrentLB.
1419 This does not increment File_sp; the caller must do that. */
1420 static void begin_buffering_file(int i, int file_no)
1421 { int j, cnt; uchar *p;
1426 ensure_memory_list_available(&FileStack_memlist, i+1);
1427 while (i >= FileStack_max) {
1428 FileStack[FileStack_max++].buffer = my_malloc(SOURCE_BUFFER_SIZE+4, "source file buffer");
1431 p = (uchar *) FileStack[i].buffer;
1434 { FileStack[i-1].la = lookahead;
1435 FileStack[i-1].la2 = lookahead2;
1436 FileStack[i-1].la3 = lookahead3;
1439 FileStack[i].file_no = file_no;
1440 FileStack[i].size = file_load_chars(file_no,
1441 (char *) p, SOURCE_BUFFER_SIZE);
1442 /* If the file is shorter than SOURCE_BUFFER_SIZE, it's now closed already. We still need to set up the file entry though. */
1444 lookahead = source_to_iso_grid[p[0]];
1445 lookahead2 = source_to_iso_grid[p[1]];
1446 lookahead3 = source_to_iso_grid[p[2]];
1447 if (LOOKAHEAD_SIZE != 3)
1449 ("Lexer lookahead size does not match hard-coded lookahead code");
1450 FileStack[i].read_pos = LOOKAHEAD_SIZE;
1452 if (file_no==1) FileStack[i].LB.main_flag = TRUE;
1453 else FileStack[i].LB.main_flag = FALSE;
1454 FileStack[i].LB.sys_flag = FALSE;
1455 FileStack[i].LB.source_line = 1;
1456 FileStack[i].LB.line_start = LOOKAHEAD_SIZE;
1457 FileStack[i].LB.chars_read = LOOKAHEAD_SIZE;
1458 FileStack[i].LB.filename = InputFiles[file_no-1].filename;
1459 FileStack[i].LB.file_no = file_no;
1460 FileStack[i].LB.orig_source = NULL; FileStack[i].LB.orig_file = 0;
1461 FileStack[i].LB.orig_line = 0; FileStack[i].LB.orig_char = 0;
1463 InputFiles[file_no-1].initial_buffering = FALSE;
1465 CurrentLB = &(FileStack[i].LB);
1466 CF = &(FileStack[i]);
1468 /* Check for recursive inclusion */
1471 { if (!strcmp(FileStack[i].LB.filename, FileStack[j].LB.filename))
1475 warning_named("File included more than once",
1476 FileStack[j].LB.filename);
1479 static void create_char_pipeline(void)
1482 begin_buffering_file(File_sp++, 1);
1483 pipeline_made = TRUE;
1484 last_input_file = current_input_file;
1487 static int get_next_char_from_pipeline(void)
1490 while (last_input_file < current_input_file)
1492 /* An "Include" file must have opened since the last character
1493 was read. Perhaps more than one. We run forward through the
1494 list and add them to the include stack. But we ignore
1495 "Origsource" files (which were never actually opened for
1499 if (!InputFiles[last_input_file-1].is_input)
1502 begin_buffering_file(File_sp++, last_input_file);
1504 if (last_input_file != current_input_file)
1505 compiler_error("last_input_file did not match after Include");
1508 { lookahead = 0; lookahead2 = 0; lookahead3 = 0; return 0;
1511 if (CF->read_pos == CF->size)
1513 file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
1517 if (CF->read_pos == -(CF->size))
1518 { set_token_location(get_current_debug_location());
1521 { lookahead = 0; lookahead2 = 0; lookahead3 = 0; return 0;
1523 CF = &(FileStack[File_sp-1]);
1524 CurrentLB = &(FileStack[File_sp-1].LB);
1525 lookahead = CF->la; lookahead2 = CF->la2; lookahead3 = CF->la3;
1526 if (CF->read_pos == CF->size)
1528 file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
1531 set_token_location(get_current_debug_location());
1534 p = (uchar *) (CF->buffer);
1536 current = lookahead;
1537 lookahead = lookahead2;
1538 lookahead2 = lookahead3;
1539 lookahead3 = source_to_iso_grid[p[CF->read_pos++]];
1541 CurrentLB->chars_read++;
1542 if (forerrors_pointer < FORERRORS_SIZE-1)
1543 forerrors_buff[forerrors_pointer++] = current;
1544 if (current == '\n') reached_new_line();
1548 /* ------------------------------------------------------------------------- */
1549 /* Source 2: from a string */
1550 /* ------------------------------------------------------------------------- */
1552 static int source_to_analyse_pointer; /* Current read position */
1554 static int get_next_char_from_string(void)
1555 { uchar *p = (uchar *) source_to_analyse + source_to_analyse_pointer++;
1556 current = source_to_iso_grid[p[0]];
1558 if (current == 0) lookahead = 0;
1559 else lookahead = source_to_iso_grid[p[1]];
1560 if (lookahead == 0) lookahead2 = 0;
1561 else lookahead2 = source_to_iso_grid[p[2]];
1562 if (lookahead2 == 0) lookahead3 = 0;
1563 else lookahead3 = source_to_iso_grid[p[3]];
1565 CurrentLB->chars_read++;
1566 if (forerrors_pointer < FORERRORS_SIZE-1)
1567 forerrors_buff[forerrors_pointer++] = current;
1568 if (current == '\n') reached_new_line();
1572 /* ========================================================================= */
1573 /* The interface between the lexer and Inform's higher levels: */
1575 /* put_token_back() (effectively) move the read position */
1576 /* back by one token */
1578 /* get_next_token() copy the token at the current read */
1579 /* position into the triple */
1580 /* (token_type, token_value, token_text) */
1581 /* and move the read position forward */
1584 /* release_token_texts() discard all the tokens that have been */
1585 /* read in, except for put-back ones */
1587 /* restart_lexer(source, name) if source is NULL, initialise the lexer */
1588 /* to read from source files; */
1589 /* otherwise, to read from this string. */
1590 /* ------------------------------------------------------------------------- */
1592 extern void release_token_texts(void)
1594 /* This is called at the beginning of every (top-level) directive and
1595 every statement. It drops all token usage so that the lextexts
1596 array can be reused.
1598 Call this immediately before a get_next_token() call.
1600 This should *not* be called within parse_expression(). Expression
1601 code generation relies on token data being retained across the whole
1608 if (tokens_put_back == 0) {
1613 /* If any tokens have been put back, we have to preserve their text.
1614 Move their lextext usage to the head of the lextexts array. */
1616 for (ix=0; ix<tokens_put_back; ix++) {
1619 int pos = circle_position - tokens_put_back + 1 + ix;
1620 if (pos < 0) pos += CIRCLE_SIZE;
1622 oldpos = circle[pos].lextext;
1623 circle[pos].lextext = ix;
1624 /* Swap the entire lextext structure (including size) */
1625 temp = lextexts[ix];
1626 lextexts[ix] = lextexts[oldpos];
1627 lextexts[oldpos] = temp;
1629 cur_lextexts = tokens_put_back;
1632 extern void put_token_back(void)
1633 { tokens_put_back++;
1635 if (tokens_trace_level > 0)
1636 { if (tokens_trace_level == 1) printf("<- ");
1637 else printf("<-\n");
1640 /* The following error, of course, should never happen! */
1642 if (tokens_put_back == CIRCLE_SIZE)
1643 { compiler_error("The lexical analyser has collapsed because of a wrong \
1644 assumption inside Inform");
1650 /* The get_next_token() code reads characters into the current lextext,
1651 which is lextexts[lex_index]. It uses these routines to add and remove
1652 characters, reallocing when necessary.
1654 lex_pos is the current number of characters in the lextext. It is
1655 not necessarily null-terminated until get_next_token() completes.
1658 /* Add one character */
1659 static void lexaddc(char ch)
1661 if ((size_t)lex_pos >= lextexts[lex_index].size) {
1662 size_t newsize = lextexts[lex_index].size * 2;
1663 my_realloc(&lextexts[lex_index].text, lextexts[lex_index].size, newsize, "one lexeme text");
1664 lextexts[lex_index].size = newsize;
1666 lextexts[lex_index].text[lex_pos++] = ch;
1669 /* Remove the last character and ensure it's null-terminated */
1670 static void lexdelc(void)
1675 lextexts[lex_index].text[lex_pos] = 0;
1678 /* Return the last character */
1679 static char lexlastc(void)
1684 return lextexts[lex_index].text[lex_pos-1];
1687 /* Add a string of characters (including the null) */
1688 static void lexadds(char *str)
1697 extern void get_next_token(void)
1698 { int d, i, j, k, quoted_size, e, radix, context; int32 n; char *r;
1700 int returning_a_put_back_token = TRUE;
1702 context = lexical_context();
1704 if (tokens_put_back > 0)
1705 { i = circle_position - tokens_put_back + 1;
1706 if (i<0) i += CIRCLE_SIZE;
1708 if (context != circle[i].context)
1709 { j = circle[i].type;
1710 if ((j==0) || ((j>=100) && (j<200)))
1711 interpret_identifier(circle[i].text, i, FALSE);
1712 circle[i].context = context;
1716 returning_a_put_back_token = FALSE;
1718 if (circle_position == CIRCLE_SIZE-1) circle_position = 0;
1719 else circle_position++;
1721 lex_index = cur_lextexts++;
1722 if (lex_index >= no_lextexts) {
1723 /* fresh lextext block; must init it */
1724 no_lextexts = lex_index+1;
1725 ensure_memory_list_available(&lextexts_memlist, no_lextexts);
1726 lextexts[lex_index].size = MAX_IDENTIFIER_LENGTH + 1;
1727 lextexts[lex_index].text = my_malloc(lextexts[lex_index].size, "one lexeme text");
1730 lextexts[lex_index].text[0] = 0; /* start with an empty string */
1732 circle[circle_position].lextext = lex_index;
1733 circle[circle_position].text = NULL; /* will fill in later */
1734 circle[circle_position].value = 0;
1735 circle[circle_position].type = 0;
1736 circle[circle_position].context = context;
1739 d = (*get_next_char)();
1740 e = tokeniser_grid[d];
1742 if (next_token_begins_syntax_line)
1743 { if ((e != WHITESPACE_CODE) && (e != COMMENT_CODE))
1744 { new_syntax_line();
1745 next_token_begins_syntax_line = FALSE;
1749 circle[circle_position].location = get_current_debug_location();
1752 { case 0: char_error("Illegal character found in source:", d);
1753 goto StartTokenAgain;
1755 case WHITESPACE_CODE:
1756 while (tokeniser_grid[lookahead] == WHITESPACE_CODE)
1758 goto StartTokenAgain;
1761 while ((lookahead != '\n') && (lookahead != 0))
1763 goto StartTokenAgain;
1766 circle[circle_position].type = EOF_TT;
1767 lexadds("<end of file>");
1775 { n = n*radix + character_digit_value[d];
1777 } while ((character_digit_value[lookahead] < radix)
1778 && (d = (*get_next_char)(), TRUE));
1781 circle[circle_position].type = NUMBER_TT;
1782 circle[circle_position].value = n;
1786 /* When we reach here, d is the sign bit ('+' or '-').
1787 If we're constructing a 32-bit float, floatend is 0;
1788 for a 64-bit double, floatend is '>' for high, '<' for low. */
1789 { int expo=0; double intv=0, fracv=0;
1790 int expocount=0, intcount=0, fraccount=0;
1791 int signbit = (d == '-');
1793 while (character_digit_value[lookahead] < 10) {
1794 intv = 10.0*intv + character_digit_value[lookahead];
1799 if (lookahead == '.') {
1800 double fracpow = 1.0;
1803 while (character_digit_value[lookahead] < 10) {
1805 fracv = fracv + fracpow*character_digit_value[lookahead];
1811 if (lookahead == 'e' || lookahead == 'E') {
1815 if (lookahead == '+' || lookahead == '-') {
1816 exposign = (lookahead == '-');
1820 while (character_digit_value[lookahead] < 10) {
1821 expo = 10*expo + character_digit_value[lookahead];
1827 error("Floating-point literal must have digits after the 'e'");
1828 if (exposign) { expo = -expo; }
1830 if (intcount + fraccount == 0)
1831 error("Floating-point literal must have digits");
1832 if (floatend == '>')
1833 n = construct_double(TRUE, signbit, intv, fracv, expo);
1834 else if (floatend == '<')
1835 n = construct_double(FALSE, signbit, intv, fracv, expo);
1837 n = construct_float(signbit, intv, fracv, expo);
1840 circle[circle_position].type = NUMBER_TT;
1841 circle[circle_position].value = n;
1842 if (!glulx_mode && dont_enter_into_symbol_table != -2) error("Floating-point literals are not available in Z-code");
1846 radix = 16; d = (*get_next_char)();
1847 if (d == '-' || d == '+') {
1851 if (d == '<' || d == '>') {
1853 d = (*get_next_char)();
1854 if (d == '-' || d == '+') {
1857 error("Signed number expected after '$<' or '$>'");
1859 if (d == '$') { d = (*get_next_char)(); radix = 2; }
1860 if (character_digit_value[d] >= radix)
1862 error("Binary number expected after '$$'");
1864 error("Hexadecimal number expected after '$'");
1868 case QUOTE_CODE: /* Single-quotes: scan a literal string */
1871 { e = d; d = (*get_next_char)(); lexaddc(d);
1872 if (quoted_size++==64)
1874 "Too much text for one pair of quotations '...' to hold");
1875 lexaddc('\''); break;
1877 if ((d == '\'') && (e != '@'))
1878 { if (quoted_size == 1)
1879 { d = (*get_next_char)(); lexaddc(d);
1881 error("No text between quotation marks ''");
1886 if (d==EOF) ebf_error("'\''", "end of file");
1888 circle[circle_position].type = SQ_TT;
1891 case DQUOTE_CODE: /* Double-quotes: scan a literal string */
1894 { d = (*get_next_char)(); lexaddc(d);
1897 while (lexlastc() == ' ') lex_pos--;
1898 if (lexlastc() != '^') lexaddc(' ');
1899 while ((lookahead != EOF) &&
1900 (tokeniser_grid[lookahead] == WHITESPACE_CODE))
1904 { int newline_passed = FALSE;
1906 while ((lookahead != EOF) &&
1907 (tokeniser_grid[lookahead] == WHITESPACE_CODE))
1908 if ((d = (*get_next_char)()) == '\n')
1909 newline_passed = TRUE;
1910 if (!newline_passed)
1912 chb[0] = '\"'; chb[1] = lookahead;
1913 chb[2] = '\"'; chb[3] = 0;
1914 ebf_error("empty rest of line after '\\' in string",
1918 } while ((d != EOF) && (d!='\"'));
1919 if (d==EOF) ebf_error("'\"'", "end of file");
1921 circle[circle_position].type = DQ_TT;
1924 case IDENTIFIER_CODE: /* Letter or underscore: an identifier */
1927 while ((n<=MAX_IDENTIFIER_LENGTH)
1928 && ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1929 || (tokeniser_grid[lookahead] == DIGIT_CODE)))
1930 n++, lexaddc((*get_next_char)());
1934 if (n > MAX_IDENTIFIER_LENGTH)
1935 { char bad_length[100];
1937 "Name exceeds the maximum length of %d characters:",
1938 MAX_IDENTIFIER_LENGTH);
1939 error_named(bad_length, lextexts[lex_index].text);
1940 /* Eat any further extra characters in the identifier */
1941 while (((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
1942 || (tokeniser_grid[lookahead] == DIGIT_CODE)))
1944 /* Trim token so that it doesn't violate
1945 MAX_IDENTIFIER_LENGTH during error recovery */
1946 lextexts[lex_index].text[MAX_IDENTIFIER_LENGTH] = 0;
1949 if (dont_enter_into_symbol_table)
1950 { circle[circle_position].type = DQ_TT;
1951 circle[circle_position].value = 0;
1952 if (dont_enter_into_symbol_table == -2)
1953 interpret_identifier(lextexts[lex_index].text, circle_position, TRUE);
1957 interpret_identifier(lextexts[lex_index].text, circle_position, FALSE);
1962 /* The character is initial to at least one of the separators */
1964 for (j=e>>4, k=j+(e&0x0f); j<k; j++)
1965 { r = (char *) separators[j];
1969 goto SeparatorMatched;
1973 { if (*(r+1) == lookahead)
1975 lexaddc((*get_next_char)());
1977 goto SeparatorMatched;
1981 { if ((*(r+1) == lookahead) && (*(r+2) == lookahead2))
1983 lexaddc((*get_next_char)());
1984 lexaddc((*get_next_char)());
1986 goto SeparatorMatched;
1991 /* The following contingency never in fact arises with the
1992 current set of separators, but might in future */
1994 lexaddc(d); lexaddc(lookahead); lexaddc(lookahead2);
1996 error_named("Unrecognised combination in source:",
1997 lextexts[lex_index].text);
1998 goto StartTokenAgain;
2002 circle[circle_position].type = SEP_TT;
2003 circle[circle_position].value = j;
2005 { case SEMICOLON_SEP: break;
2006 case HASHNDOLLAR_SEP:
2007 case HASHWDOLLAR_SEP:
2008 if (tokeniser_grid[lookahead] == WHITESPACE_CODE)
2009 { error_named("Character expected after",
2010 lextexts[lex_index].text);
2014 lexaddc((*get_next_char)());
2015 while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
2016 || (tokeniser_grid[lookahead] == DIGIT_CODE))
2017 lexaddc((*get_next_char)());
2020 case HASHADOLLAR_SEP:
2021 case HASHGDOLLAR_SEP:
2022 case HASHRDOLLAR_SEP:
2024 if (tokeniser_grid[lookahead] != IDENTIFIER_CODE)
2025 { error_named("Alphabetic character expected after",
2026 lextexts[lex_index].text);
2030 while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
2031 || (tokeniser_grid[lookahead] == DIGIT_CODE))
2032 lexaddc((*get_next_char)());
2039 /* We can now assign the text pointer, since the lextext has finished reallocing. */
2040 circle[circle_position].text = lextexts[lex_index].text;
2041 i = circle_position;
2044 /* We've either parsed a new token or selected a put-back one.
2045 i is the circle-position of the token in question. Time to
2046 export the token data where higher-level code can find it. */
2047 token_value = circle[i].value;
2048 token_type = circle[i].type;
2049 token_text = circle[i].text;
2050 if (!returning_a_put_back_token)
2051 { set_token_location(circle[i].location);
2054 if (tokens_trace_level > 0)
2055 { if (tokens_trace_level == 1) {
2056 printf("'%s' ", circle[i].text);
2057 if (circle[i].type == EOF_TT) printf("\n");
2060 { printf("-> "); describe_token(&circle[i]);
2062 if (tokens_trace_level > 2) print_context(circle[i].context);
2068 static char veneer_error_title[64];
2070 extern void restart_lexer(char *lexical_source, char *name)
2072 circle_position = 0;
2073 for (i=0; i<CIRCLE_SIZE; i++)
2074 { circle[i].type = 0;
2075 circle[i].value = 0;
2076 circle[i].text = "(if this is ever visible, there is a bug)";
2077 circle[i].lextext = -1;
2078 circle[i].context = 0;
2082 /* But we don't touch no_lextexts; those allocated blocks can be reused */
2086 tokens_put_back = 0;
2087 forerrors_pointer = 0;
2088 dont_enter_into_symbol_table = FALSE;
2089 return_sp_as_variable = FALSE;
2090 next_token_begins_syntax_line = TRUE;
2092 source_to_analyse = lexical_source;
2094 if (source_to_analyse == NULL)
2095 { get_next_char = get_next_char_from_pipeline;
2096 if (!pipeline_made) create_char_pipeline();
2097 forerrors_buff[0] = 0; forerrors_pointer = 0;
2100 { get_next_char = get_next_char_from_string;
2101 source_to_analyse_pointer = 0;
2102 CurrentLB = &StringLB;
2103 sprintf(veneer_error_title, "<veneer routine '%s'>", name);
2104 StringLB.filename = veneer_error_title;
2106 CurrentLB->source_line = 1;
2107 CurrentLB->line_start = 0;
2108 CurrentLB->chars_read = 0;
2112 /* ========================================================================= */
2113 /* Data structure management routines */
2114 /* ------------------------------------------------------------------------- */
2116 extern void init_lexer_vars(void)
2129 blank_brief_location.file_index = -1;
2130 blank_brief_location.line_number = 0;
2131 blank_brief_location.orig_file_index = 0;
2132 blank_brief_location.orig_line_number = 0;
2135 extern void lexer_begin_prepass(void)
2136 { total_source_line_count = 0;
2137 CurrentLB = &NoFileOpen;
2138 report_errors_at_current_line();
2141 extern void lexer_begin_pass(void)
2142 { no_hash_printed_yet = TRUE;
2143 hash_printed_since_newline = FALSE;
2145 pipeline_made = FALSE;
2147 restart_lexer(NULL, NULL);
2150 extern void lexer_endpass(void)
2151 { CurrentLB = &MakingOutput;
2152 report_errors_at_current_line();
2155 extern void lexer_allocate_arrays(void)
2157 initialise_memory_list(&FileStack_memlist,
2158 sizeof(Sourcefile), 4, (void**)&FileStack,
2159 "source file stack");
2162 initialise_memory_list(&lextexts_memlist,
2163 sizeof(lextext), 200, (void**)&lextexts,
2164 "lexeme texts list");
2167 keywords_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
2168 "keyword hash table");
2169 keywords_hash_ends_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
2170 "keyword hash end table");
2171 keywords_data_table = my_calloc(sizeof(int), 3*MAX_KEYWORDS,
2172 "keyword hashing linked list");
2174 local_variable_names = my_calloc(sizeof(identstruct), MAX_LOCAL_VARIABLES-1,
2175 "text of local variable names");
2176 local_variable_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
2177 "local variable hash table");
2178 local_variable_hash_codes = my_calloc(sizeof(int), MAX_LOCAL_VARIABLES,
2179 "local variable hash codes");
2181 make_tokeniser_grid();
2182 make_keywords_tables();
2184 first_token_locations =
2185 my_malloc(sizeof(debug_locations), "debug locations of recent tokens");
2186 first_token_locations->location.file_index = 0;
2187 first_token_locations->location.beginning_byte_index = 0;
2188 first_token_locations->location.end_byte_index = 0;
2189 first_token_locations->location.beginning_line_number = 0;
2190 first_token_locations->location.end_line_number = 0;
2191 first_token_locations->location.beginning_character_number = 0;
2192 first_token_locations->location.end_character_number = 0;
2193 first_token_locations->location.orig_file_index = 0;
2194 first_token_locations->location.orig_beg_line_number = 0;
2195 first_token_locations->location.orig_beg_char_number = 0;
2196 first_token_locations->next = NULL;
2197 first_token_locations->reference_count = 0;
2198 last_token_location = first_token_locations;
2201 extern void lexer_free_arrays(void)
2206 for (ix=0; ix<FileStack_max; ix++) {
2207 my_free(&FileStack[ix].buffer, "source file buffer");
2209 deallocate_memory_list(&FileStack_memlist);
2211 for (ix=0; ix<no_lextexts; ix++) {
2212 my_free(&lextexts[ix].text, "one lexeme text");
2214 deallocate_memory_list(&lextexts_memlist);
2216 my_free(&keywords_hash_table, "keyword hash table");
2217 my_free(&keywords_hash_ends_table, "keyword hash end table");
2218 my_free(&keywords_data_table, "keyword hashing linked list");
2220 my_free(&local_variable_names, "text of local variable names");
2221 my_free(&local_variable_hash_table, "local variable hash table");
2222 my_free(&local_variable_hash_codes, "local variable hash codes");
2224 cleanup_token_locations(NULL);
2227 /* ========================================================================= */