1 /* ------------------------------------------------------------------------- */
2 /* "lexer" : Lexical analyser */
4 /* Part of Inform 6.42 */
5 /* copyright (c) Graham Nelson 1993 - 2024 */
7 /* Inform is free software: you can redistribute it and/or modify */
8 /* it under the terms of the GNU General Public License as published by */
9 /* the Free Software Foundation, either version 3 of the License, or */
10 /* (at your option) any later version. */
12 /* Inform is distributed in the hope that it will be useful, */
13 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
14 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
15 /* GNU General Public License for more details. */
17 /* You should have received a copy of the GNU General Public License */
18 /* along with Inform. If not, see https://gnu.org/licenses/ */
20 /* ------------------------------------------------------------------------- */
24 int total_source_line_count, /* Number of source lines so far */
26 no_hash_printed_yet, /* Have not yet printed the first # */
27 hash_printed_since_newline, /* A hash has been printed since the
28 most recent new-line was printed
29 (generally as a result of an error
30 message or the start of pass) */
31 dont_enter_into_symbol_table, /* Return names as text (with
32 token type UQ_TT) and not as
33 entries in the symbol table,
34 when TRUE. If -2, only the
35 keyword table is searched. */
36 return_sp_as_variable; /* When TRUE, the word "sp" denotes
37 the stack pointer variable
38 (used in assembly language only) */
39 int next_token_begins_syntax_line; /* When TRUE, start a new syntax
40 line (for error reporting, etc.)
41 on the source code line where
42 the next token appears */
44 int32 last_mapped_line; /* Last syntax line reported to debugging file */
46 /* ------------------------------------------------------------------------- */
47 /* The lexer's output is a sequence of structs, each called a "token", */
48 /* representing one lexical unit (or "lexeme") each. Instead of providing */
49 /* "lookahead" (that is, always having available the next token after the */
50 /* current one, so that syntax analysers higher up in Inform can have */
51 /* advance knowledge of what is coming), the lexer instead has a system */
52 /* where tokens can be read in and then "put back again". */
53 /* The meaning of the number (and to some extent the text) supplied with */
54 /* a token depends on its type: see "header.h" for the list of types. */
55 /* For example, the lexeme "$1e3" is understood by Inform as a hexadecimal */
56 /* number, and translated to the token: */
57 /* type NUMBER_TT, value 483, text "$1e3" */
58 /* ------------------------------------------------------------------------- */
59 /* These three variables are set to the current token on a call to */
60 /* get_next_token() (but are not changed by a call to put_token_back()). */
61 /* (It would be tidier to use a token_data structure, rather than having */
62 /* get_next_token() unpack three values. But this is the way it is.) */
63 /* ------------------------------------------------------------------------- */
69 /* ------------------------------------------------------------------------- */
70 /* The next two variables are the head and tail of a singly linked list. */
71 /* The tail stores the portion most recently read from the current */
72 /* lexical block; its end values therefore describe the location of the */
73 /* current token, and are updated whenever the three variables above are */
74 /* via set_token_location(...). Earlier vertices, if any, represent the */
75 /* regions of lexical blocks read beforehand, where new vertices are */
76 /* only introduced by interruptions like a file inclusion or an EOF. */
77 /* Vertices are deleted off of the front of the list once they are no */
78 /* longer referenced by pending debug information records. */
79 /* ------------------------------------------------------------------------- */
81 static debug_locations *first_token_locations;
82 static debug_locations *last_token_location;
84 extern debug_location get_token_location(void)
85 { debug_location result;
86 debug_location *location = &(last_token_location->location);
87 result.file_index = location->file_index;
88 result.beginning_byte_index = location->end_byte_index;
89 result.end_byte_index = location->end_byte_index;
90 result.beginning_line_number = location->end_line_number;
91 result.end_line_number = location->end_line_number;
92 result.beginning_character_number = location->end_character_number;
93 result.end_character_number = location->end_character_number;
94 result.orig_file_index = location->orig_file_index;
95 result.orig_beg_line_number = location->orig_beg_line_number;
96 result.orig_beg_char_number = location->orig_beg_char_number;
100 extern debug_locations get_token_locations(void)
101 { debug_locations result;
102 result.location = get_token_location();
104 result.reference_count = 0;
108 static void set_token_location(debug_location location)
109 { if (location.file_index == last_token_location->location.file_index)
110 { last_token_location->location.end_byte_index =
111 location.end_byte_index;
112 last_token_location->location.end_line_number =
113 location.end_line_number;
114 last_token_location->location.end_character_number =
115 location.end_character_number;
116 last_token_location->location.orig_file_index =
117 location.orig_file_index;
118 last_token_location->location.orig_beg_line_number =
119 location.orig_beg_line_number;
120 last_token_location->location.orig_beg_char_number =
121 location.orig_beg_char_number;
123 { debug_locations*successor =
125 (sizeof(debug_locations),
126 "debug locations of recent tokens");
127 successor->location = location;
128 successor->next = NULL;
129 successor->reference_count = 0;
130 last_token_location->next = successor;
131 last_token_location = successor;
135 extern debug_location_beginning get_token_location_beginning(void)
136 { debug_location_beginning result;
137 ++(last_token_location->reference_count);
138 result.head = last_token_location;
139 result.beginning_byte_index =
140 last_token_location->location.end_byte_index;
141 result.beginning_line_number =
142 last_token_location->location.end_line_number;
143 result.beginning_character_number =
144 last_token_location->location.end_character_number;
145 result.orig_file_index = last_token_location->location.orig_file_index;
146 result.orig_beg_line_number = last_token_location->location.orig_beg_line_number;
147 result.orig_beg_char_number = last_token_location->location.orig_beg_char_number;
152 static void cleanup_token_locations(debug_location_beginning*beginning)
153 { if (first_token_locations)
154 { while (first_token_locations &&
155 !first_token_locations->reference_count)
156 { debug_locations*moribund = first_token_locations;
157 first_token_locations = moribund->next;
158 my_free(&moribund, "debug locations of recent tokens");
160 (beginning->head == moribund || !first_token_locations))
162 ("Records needed by a debug_location_beginning are no "
163 "longer allocated, perhaps because of an invalid reuse "
164 "of this or an earlier beginning");
170 ("Attempt to use a debug_location_beginning when no token "
171 "locations are defined");
174 ("Attempt to clean up token locations when no token locations "
180 extern void discard_token_location(debug_location_beginning beginning)
181 { --(beginning.head->reference_count);
184 extern debug_locations get_token_location_end
185 (debug_location_beginning beginning)
186 { debug_locations result;
187 cleanup_token_locations(&beginning);
188 --(beginning.head->reference_count);
189 /* Sometimes we know what we'll read before we switch to the lexical block
190 where we'll read it. In such cases the beginning will be placed in the
191 prior block and last exactly zero bytes there. It's misleading to
192 include such ranges, so we gobble them. */
193 if (beginning.head->location.end_byte_index ==
194 beginning.beginning_byte_index &&
195 beginning.head->next)
196 { beginning.head = beginning.head->next;
197 result.location = beginning.head->location;
198 result.location.beginning_byte_index = 0;
199 result.location.beginning_line_number = 1;
200 result.location.beginning_character_number = 1;
202 { result.location = beginning.head->location;
203 result.location.beginning_byte_index =
204 beginning.beginning_byte_index;
205 result.location.beginning_line_number =
206 beginning.beginning_line_number;
207 result.location.beginning_character_number =
208 beginning.beginning_character_number;
211 result.location.orig_file_index =
212 beginning.orig_file_index;
213 result.location.orig_beg_line_number =
214 beginning.orig_beg_line_number;
215 result.location.orig_beg_char_number =
216 beginning.orig_beg_char_number;
218 result.next = beginning.head->next;
219 result.reference_count = 0;
223 /* ------------------------------------------------------------------------- */
224 /* In order to be able to put tokens back efficiently, the lexer stores */
225 /* tokens in a "circle": the variable circle_position ranges between */
226 /* 0 and CIRCLE_SIZE-1. We only need a circle size as large as the */
227 /* maximum number of tokens ever put back at once, plus 1 (in effect, the */
228 /* maximum token lookahead ever needed in syntax analysis, plus 1). */
230 /* Note that the circle struct type is lexeme_data, whereas the expression */
231 /* code all works in token_data. They have slightly different needs. The */
232 /* data is exported through the token_text, token_value, token_type */
233 /* globals, so there's no need to use the same struct at both levels. */
235 /* Unlike some compilers, Inform does not have a context-free lexer: in */
236 /* fact it has 12288 different possible states. However, the context only */
237 /* affects the interpretation of "identifiers": lexemes beginning with a */
238 /* letter and containing up to 32 chars of alphanumeric and underscore */
239 /* chars. (For example, "default" may refer to the directive or statement */
240 /* of that name, and which token values are returned depends on the */
241 /* current lexical context.) */
243 /* Along with each token, we also store the lexical context it was */
244 /* translated under; because if it is called for again, there may need */
245 /* to be a fresh interpretation of it if the context has changed. */
246 /* ------------------------------------------------------------------------- */
248 #define CIRCLE_SIZE 6
250 /* (The worst case for token lookahead is distinguishing between an
251 old-style "objectloop (a in b)" and a new "objectloop (a in b ...)".) */
253 static int circle_position;
254 static lexeme_data circle[CIRCLE_SIZE];
256 /* ------------------------------------------------------------------------- */
257 /* A complication, however, is that the text of some lexemes needs to be */
258 /* held in Inform's memory for much longer periods: for example, a */
259 /* dictionary word lexeme (like "'south'") must have its text preserved */
260 /* until the code generation time for the expression it occurs in, when */
261 /* the dictionary reference is actually made. We handle this by keeping */
262 /* all lexeme text until the end of the statement (or, for top-level */
263 /* directives, until the end of the directive). Then we call */
264 /* release_token_texts() to start over. The lextexts array will therefore */
265 /* grow to the largest number of lexemes in a single statement or */
267 /* ------------------------------------------------------------------------- */
269 typedef struct lextext_s {
271 size_t size; /* Allocated size (including terminal null) */
274 static lextext *lextexts; /* Allocated to no_lextexts */
275 static memory_list lextexts_memlist;
276 static int no_lextexts;
278 static int cur_lextexts; /* Number of lextexts in current use
279 (cur_lextexts <= no_lextexts) */
281 static int lex_index; /* Index of lextext being written to */
282 static int lex_pos; /* Current write position in that lextext */
284 /* ------------------------------------------------------------------------- */
285 /* The lexer itself needs up to 3 characters of lookahead (it uses an */
286 /* LR(3) grammar to translate characters into tokens). */
288 /* Past the end of the stream, we fill in zeros. This has the awkward */
289 /* side effect that a zero byte in a source file will silently terminate */
290 /* it, rather than producing an "illegal source character" error. */
291 /* On the up side, we can compile veneer routines (which are null- */
292 /* terminated strings) with no extra work. */
293 /* ------------------------------------------------------------------------- */
295 #define LOOKAHEAD_SIZE 3
297 static int current, lookahead, /* The latest character read, and */
298 lookahead2, lookahead3; /* the three characters following it */
299 /* (zero means end-of-stream) */
301 static int pipeline_made; /* Whether or not the pipeline of
302 characters has been constructed
305 static int (* get_next_char)(void); /* Routine for reading the stream of
306 characters: the lexer does not
307 need any "ungetc" routine for
308 putting them back again. End of
309 stream is signalled by returning
312 static char *source_to_analyse; /* The current lexical source:
313 NULL for "load from source files",
314 otherwise this points to a string
315 containing Inform code */
317 static int tokens_put_back; /* Count of the number of backward
318 moves made from the last-read
321 /* This gets called for both token_data and lexeme_data structs. It prints
322 a description of the common part (the text, value, type fields).
324 extern void describe_token_triple(const char *text, int32 value, int type)
326 /* Many of the token types are not set in this file, but later on in
327 Inform's higher stages (for example, in the expression evaluator);
328 but this routine describes them all. */
334 /* The following token types occur in lexer output: */
336 case SYMBOL_TT: printf("symbol ");
337 describe_symbol(value);
339 case NUMBER_TT: printf("literal number %d", value);
341 case DQ_TT: printf("string \"%s\"", text);
343 case SQ_TT: printf("string '%s'", text);
345 case UQ_TT: printf("barestring %s", text);
347 case SEP_TT: printf("separator '%s'", text);
349 case EOF_TT: printf("end of file");
352 case STATEMENT_TT: printf("statement name '%s'", text);
354 case SEGMENT_MARKER_TT: printf("object segment marker '%s'", text);
356 case DIRECTIVE_TT: printf("directive name '%s'", text);
358 case CND_TT: printf("textual conditional '%s'", text);
360 case OPCODE_NAME_TT: printf("opcode name '%s'", text);
362 case SYSFUN_TT: printf("built-in function name '%s'", text);
364 case LOCAL_VARIABLE_TT: printf("local variable name '%s'", text);
366 case MISC_KEYWORD_TT: printf("statement keyword '%s'", text);
368 case DIR_KEYWORD_TT: printf("directive keyword '%s'", text);
370 case TRACE_KEYWORD_TT: printf("'trace' keyword '%s'", text);
372 case SYSTEM_CONSTANT_TT: printf("system constant name '%s'", text);
375 /* The remaining are etoken types, not set by the lexer */
377 case OP_TT: printf("operator '%s'",
378 operators[value].description);
380 case ENDEXP_TT: printf("end of expression");
382 case SUBOPEN_TT: printf("open bracket");
384 case SUBCLOSE_TT: printf("close bracket");
386 case LARGE_NUMBER_TT: printf("large number: '%s'=%d",text,value);
388 case SMALL_NUMBER_TT: printf("small number: '%s'=%d",text,value);
390 case VARIABLE_TT: printf("variable '%s'=%d", text, value);
392 case DICTWORD_TT: printf("dictionary word '%s'", text);
394 case ACTION_TT: printf("action name '%s'", text);
398 printf("** unknown token type %d, text='%s', value=%d **",
404 /* ------------------------------------------------------------------------- */
405 /* All but one of the Inform keywords (most of them opcode names used */
406 /* only by the assembler). (The one left over is "sp", a keyword used in */
407 /* assembly language only.) */
409 /* A "keyword group" is a set of keywords to be searched for. If a match */
410 /* is made on an identifier, the token type becomes that given in the KG */
411 /* and the token value is its index in the KG. */
413 /* The keyword ordering must correspond with the appropriate #define's in */
414 /* "header.h" but is otherwise not significant. */
415 /* ------------------------------------------------------------------------- */
417 /* This must exceed the total number of keywords across all groups,
418 including opcodes. */
419 #define MAX_KEYWORDS (500)
421 /* The values will be filled in at compile time, when we know
422 which opcode set to use. */
423 keyword_group opcode_names =
425 OPCODE_NAME_TT, FALSE, TRUE
428 static char *opcode_list_z[] = {
429 "je", "jl", "jg", "dec_chk", "inc_chk", "jin", "test", "or", "and",
430 "test_attr", "set_attr", "clear_attr", "store", "insert_obj", "loadw",
431 "loadb", "get_prop", "get_prop_addr", "get_next_prop", "add", "sub",
432 "mul", "div", "mod", "call", "storew", "storeb", "put_prop", "sread",
433 "print_char", "print_num", "random", "push", "pull", "split_window",
434 "set_window", "output_stream", "input_stream", "sound_effect", "jz",
435 "get_sibling", "get_child", "get_parent", "get_prop_len", "inc", "dec",
436 "print_addr", "remove_obj", "print_obj", "ret", "jump", "print_paddr",
437 "load", "not", "rtrue", "rfalse", "print", "print_ret", "nop", "save",
438 "restore", "restart", "ret_popped", "pop", "quit", "new_line",
439 "show_status", "verify", "call_2s", "call_vs", "aread", "call_vs2",
440 "erase_window", "erase_line", "set_cursor", "get_cursor",
441 "set_text_style", "buffer_mode", "read_char", "scan_table", "call_1s",
442 "call_2n", "set_colour", "throw", "call_vn", "call_vn2", "tokenise",
443 "encode_text", "copy_table", "print_table", "check_arg_count", "call_1n",
444 "catch", "piracy", "log_shift", "art_shift", "set_font", "save_undo",
445 "restore_undo", "draw_picture", "picture_data", "erase_picture",
446 "set_margins", "move_window", "window_size", "window_style",
447 "get_wind_prop", "scroll_window", "pop_stack", "read_mouse",
448 "mouse_window", "push_stack", "put_wind_prop", "print_form",
449 "make_menu", "picture_table", "print_unicode", "check_unicode",
450 "set_true_colour", "buffer_screen",
454 static char *opcode_list_g[] = {
455 "nop", "add", "sub", "mul", "div", "mod", "neg", "bitand", "bitor",
456 "bitxor", "bitnot", "shiftl", "sshiftr", "ushiftr", "jump", "jz",
457 "jnz", "jeq", "jne", "jlt", "jge", "jgt", "jle",
458 "jltu", "jgeu", "jgtu", "jleu",
460 "catch", "throw", "tailcall",
461 "copy", "copys", "copyb", "sexs", "sexb", "aload",
462 "aloads", "aloadb", "aloadbit", "astore", "astores", "astoreb",
463 "astorebit", "stkcount", "stkpeek", "stkswap", "stkroll", "stkcopy",
464 "streamchar", "streamnum", "streamstr",
465 "gestalt", "debugtrap", "getmemsize", "setmemsize", "jumpabs",
466 "random", "setrandom", "quit", "verify",
467 "restart", "save", "restore", "saveundo", "restoreundo", "protect",
468 "glk", "getstringtbl", "setstringtbl", "getiosys", "setiosys",
469 "linearsearch", "binarysearch", "linkedsearch",
470 "callf", "callfi", "callfii", "callfiii",
472 "mzero", "mcopy", "malloc", "mfree",
473 "accelfunc", "accelparam",
474 "hasundo", "discardundo",
475 "numtof", "ftonumz", "ftonumn", "ceil", "floor",
476 "fadd", "fsub", "fmul", "fdiv", "fmod",
477 "sqrt", "exp", "log", "pow",
478 "sin", "cos", "tan", "asin", "acos", "atan", "atan2",
479 "jfeq", "jfne", "jflt", "jfle", "jfgt", "jfge", "jisnan", "jisinf",
480 "numtod", "dtonumz", "dtonumn", "ftod", "dtof", "dceil", "dfloor",
481 "dadd", "dsub", "dmul", "ddiv", "dmodr", "dmodq",
482 "dsqrt", "dexp", "dlog", "dpow",
483 "dsin", "dcos", "dtan", "dasin", "dacos", "datan", "datan2",
484 "jdeq", "jdne", "jdlt", "jdle", "jdgt", "jdge", "jdisnan", "jdisinf",
488 keyword_group opcode_macros =
490 OPCODE_MACRO_TT, FALSE, TRUE
493 static char *opmacro_list_z[] = { "" };
495 static char *opmacro_list_g[] = {
496 "pull", "push", "dload", "dstore",
500 keyword_group directives =
501 { { "abbreviate", "array", "attribute", "class", "constant",
502 "default", "dictionary", "end", "endif", "extend", "fake_action",
503 "global", "ifdef", "ifndef", "ifnot", "ifv3", "ifv5", "iftrue",
504 "iffalse", "import", "include", "link", "lowstring", "message",
505 "nearby", "object", "origsource", "property", "release", "replace",
506 "serial", "switches", "statusline", "stub", "system_file", "trace",
507 "undef", "verb", "version", "zcharacter",
509 DIRECTIVE_TT, FALSE, FALSE
512 keyword_group trace_keywords =
513 { { "dictionary", "symbols", "objects", "verbs",
514 "assembly", "expressions", "lines", "tokens", "linker",
516 TRACE_KEYWORD_TT, FALSE, TRUE
519 keyword_group segment_markers =
520 { { "class", "has", "private", "with", "" },
521 SEGMENT_MARKER_TT, FALSE, TRUE
524 keyword_group directive_keywords =
525 { { "alias", "long", "additive",
527 "noun", "held", "multi", "multiheld", "multiexcept",
528 "multiinside", "creature", "special", "number", "scope", "topic",
529 "reverse", "meta", "only", "replace", "first", "last",
530 "string", "table", "buffer", "data", "initial", "initstr",
531 "with", "private", "has", "class",
532 "error", "fatalerror", "warning",
533 "terminating", "static", "individual",
535 DIR_KEYWORD_TT, FALSE, TRUE
538 keyword_group misc_keywords =
539 { { "char", "name", "the", "a", "an", "The", "number",
540 "roman", "reverse", "bold", "underline", "fixed", "on", "off",
541 "to", "address", "string", "object", "near", "from", "property", "A", "" },
542 MISC_KEYWORD_TT, FALSE, TRUE
545 keyword_group statements =
546 { { "box", "break", "continue", "default", "do", "else", "font", "for",
547 "give", "if", "inversion", "jump", "move", "new_line", "objectloop",
548 "print", "print_ret", "quit", "read", "remove", "restore", "return",
549 "rfalse", "rtrue", "save", "spaces", "string", "style", "switch",
550 "until", "while", "" },
551 STATEMENT_TT, FALSE, TRUE
554 keyword_group conditions =
555 { { "has", "hasnt", "in", "notin", "ofclass", "or", "provides", "" },
559 keyword_group system_functions =
560 { { "child", "children", "elder", "eldest", "indirect", "parent", "random",
561 "sibling", "younger", "youngest", "metaclass", "glk", "" },
562 SYSFUN_TT, FALSE, TRUE
565 keyword_group system_constants =
566 { { "adjectives_table", "actions_table", "classes_table",
567 "identifiers_table", "preactions_table", "version_number",
568 "largest_object", "strings_offset", "code_offset",
569 "dict_par1", "dict_par2", "dict_par3", "actual_largest_object",
570 "static_memory_offset", "array_names_offset", "readable_memory_offset",
571 "cpv__start", "cpv__end", "ipv__start", "ipv__end",
572 "array__start", "array__end",
573 "lowest_attribute_number", "highest_attribute_number",
574 "attribute_names_array",
575 "lowest_property_number", "highest_property_number",
576 "property_names_array",
577 "lowest_action_number", "highest_action_number",
578 "action_names_array",
579 "lowest_fake_action_number", "highest_fake_action_number",
580 "fake_action_names_array",
581 "lowest_routine_number", "highest_routine_number", "routines_array",
582 "routine_names_array", "routine_flags_array",
583 "lowest_global_number", "highest_global_number", "globals_array",
584 "global_names_array", "global_flags_array",
585 "lowest_array_number", "highest_array_number", "arrays_array",
586 "array_names_array", "array_flags_array",
587 "lowest_constant_number", "highest_constant_number", "constants_array",
588 "constant_names_array",
589 "lowest_class_number", "highest_class_number", "class_objects_array",
590 "lowest_object_number", "highest_object_number",
592 "grammar_table", "dictionary_table", "dynam_string_table",
594 SYSTEM_CONSTANT_TT, FALSE, TRUE
597 keyword_group *keyword_groups[12]
598 = { NULL, &opcode_names, &directives, &trace_keywords, &segment_markers,
599 &directive_keywords, &misc_keywords, &statements, &conditions,
600 &system_functions, &system_constants, &opcode_macros};
602 /* These keywords are set to point to local_variable_names entries when
603 a routine header is parsed. See construct_local_variable_tables(). */
604 keyword_group local_variables =
606 LOCAL_VARIABLE_TT, FALSE, FALSE
609 static int lexical_context(void)
611 /* The lexical context is a number representing all of the context
612 information in the lexical analyser: the same input text will
613 always translate to the same output tokens whenever the context
616 (For many years, the "dont_enter_into_symbol_table" variable
617 was omitted from this number. But now we can include it.) */
620 if (opcode_names.enabled) c |= 1;
621 if (directives.enabled) c |= 2;
622 if (trace_keywords.enabled) c |= 4;
623 if (segment_markers.enabled) c |= 8;
624 if (directive_keywords.enabled) c |= 16;
625 if (misc_keywords.enabled) c |= 32;
626 if (statements.enabled) c |= 64;
627 if (conditions.enabled) c |= 128;
628 if (system_functions.enabled) c |= 256;
629 if (system_constants.enabled) c |= 512;
630 if (local_variables.enabled) c |= 1024;
632 if (return_sp_as_variable) c |= 2048;
633 if (dont_enter_into_symbol_table) c |= 4096;
638 static void print_context(int c)
644 if ((c & 1) != 0) printf("OPC ");
645 if ((c & 2) != 0) printf("DIR ");
646 if ((c & 4) != 0) printf("TK ");
647 if ((c & 8) != 0) printf("SEG ");
648 if ((c & 16) != 0) printf("DK ");
649 if ((c & 32) != 0) printf("MK ");
650 if ((c & 64) != 0) printf("STA ");
651 if ((c & 128) != 0) printf("CND ");
652 if ((c & 256) != 0) printf("SFUN ");
653 if ((c & 512) != 0) printf("SCON ");
654 if ((c & 1024) != 0) printf("LV ");
655 if ((c & 2048) != 0) printf("sp ");
656 if ((c & 4096) != 0) printf("dontent ");
659 static int *keywords_hash_table;
660 static int *keywords_hash_ends_table;
661 static int *keywords_data_table;
663 static int *local_variable_hash_table;
664 static int *local_variable_hash_codes;
666 /* Note that MAX_LOCAL_VARIABLES is the maximum number of local variables
667 for this VM, *including* "sp" (the stack pointer "local").
668 This used to be a memory setting. Now it is a constant: 16 for Z-code,
672 /* The number of local variables in the current routine. */
675 /* Names of local variables in the current routine.
676 The values are positions in local_variable_names_memlist.
677 This is allocated to MAX_LOCAL_VARIABLES-1. (The stack pointer "local"
678 is not included in this array.)
680 (This could be a memlist, growing as needed up to MAX_LOCAL_VARIABLES-1.
681 But right now we just allocate the max.)
683 int *local_variable_name_offsets;
685 static memory_list local_variable_names_memlist;
686 /* How much of local_variable_names_memlist is used by the no_local locals. */
687 static int local_variable_names_usage;
689 static char one_letter_locals[128];
691 static void make_keywords_tables(void)
693 char **oplist, **maclist;
696 oplist = opcode_list_z;
697 maclist = opmacro_list_z;
700 oplist = opcode_list_g;
701 maclist = opmacro_list_g;
704 for (j=0; *(oplist[j]); j++) {
705 if (j >= MAX_KEYWORD_GROUP_SIZE) {
706 /* Gotta increase MAX_KEYWORD_GROUP_SIZE */
707 compiler_error("opcode_list has overflowed opcode_names.keywords");
710 opcode_names.keywords[j] = oplist[j];
712 opcode_names.keywords[j] = "";
714 for (j=0; *(maclist[j]); j++) {
715 if (j >= MAX_KEYWORD_GROUP_SIZE) {
716 /* Gotta increase MAX_KEYWORD_GROUP_SIZE */
717 compiler_error("opmacro_list has overflowed opcode_macros.keywords");
720 opcode_macros.keywords[j] = maclist[j];
722 opcode_macros.keywords[j] = "";
724 for (i=0; i<HASH_TAB_SIZE; i++)
725 { keywords_hash_table[i] = -1;
726 keywords_hash_ends_table[i] = -1;
729 for (i=1; i<=11; i++)
730 { keyword_group *kg = keyword_groups[i];
731 for (j=0; *(kg->keywords[j]) != 0; j++)
733 if (tp >= MAX_KEYWORDS) {
734 /* Gotta increase MAX_KEYWORDS */
735 compiler_error("keywords_data_table has overflowed MAX_KEYWORDS");
738 h = hash_code_from_string(kg->keywords[j]);
739 if (keywords_hash_table[h] == -1)
740 keywords_hash_table[h] = tp;
742 *(keywords_data_table + 3*(keywords_hash_ends_table[h]) + 2) = tp;
743 keywords_hash_ends_table[h] = tp;
744 *(keywords_data_table + 3*tp) = i;
745 *(keywords_data_table + 3*tp+1) = j;
746 *(keywords_data_table + 3*tp+2) = -1;
752 extern void clear_local_variables(void)
755 local_variable_names_usage = 0;
758 extern void add_local_variable(char *name)
762 if (no_locals >= MAX_LOCAL_VARIABLES-1) {
763 /* This should have been caught before we got here */
764 error("too many local variables");
768 len = strlen(name)+1;
769 ensure_memory_list_available(&local_variable_names_memlist, local_variable_names_usage + len);
770 local_variable_name_offsets[no_locals++] = local_variable_names_usage;
771 strcpy((char *)local_variable_names_memlist.data+local_variable_names_usage, name);
772 local_variable_names_usage += len;
775 extern char *get_local_variable_name(int index)
777 if (index < 0 || index >= no_locals)
778 return "???"; /* shouldn't happen */
780 return (char *)local_variable_names_memlist.data + local_variable_name_offsets[index];
783 /* Look at the strings stored in local_variable_names (from 0 to no_locals).
784 Set local_variables.keywords to point to these, and also prepare the
786 This must be called after add_local_variable(), but before we start
787 compiling function code. */
788 extern void construct_local_variable_tables(void)
790 for (i=0; i<HASH_TAB_SIZE; i++) local_variable_hash_table[i] = -1;
791 for (i=0; i<128; i++) one_letter_locals[i] = MAX_LOCAL_VARIABLES;
793 for (i=0; i<no_locals; i++)
795 char *p = (char *)local_variable_names_memlist.data + local_variable_name_offsets[i];
796 local_variables.keywords[i] = p;
798 { one_letter_locals[(uchar)p[0]] = i;
799 if (isupper(p[0])) one_letter_locals[tolower(p[0])] = i;
800 if (islower(p[0])) one_letter_locals[toupper(p[0])] = i;
802 h = hash_code_from_string(p);
803 if (local_variable_hash_table[h] == -1)
804 local_variable_hash_table[h] = i;
805 local_variable_hash_codes[i] = h;
807 /* Clear the rest. */
808 for (;i<MAX_LOCAL_VARIABLES-1;i++) {
809 local_variables.keywords[i] = "";
810 local_variable_hash_codes[i] = 0;
814 static void interpret_identifier(char *p, int pos)
815 { int index, hashcode;
817 /* An identifier is either a keyword or a "symbol", a name which the
818 lexical analyser leaves to higher levels of Inform to understand. */
820 circle[pos].newsymbol = FALSE;
822 hashcode = hash_code_from_string(p);
824 /* If dont_enter_into_symbol_table is true, we skip all keywords
825 (and variables) and just mark the name as an unquoted string.
826 Except that if dont_enter_into_symbol_table is -2, we recognize
827 directive keywords (only).
830 if (dont_enter_into_symbol_table) {
832 if (dont_enter_into_symbol_table == -2) {
833 /* This is a simplified version of the keyword-checking loop
835 index = keywords_hash_table[hashcode];
837 { int *i = keywords_data_table + 3*index;
838 keyword_group *kg = keyword_groups[*i];
839 if (kg == &directives)
840 { char *q = kg->keywords[*(i+1)];
841 if (((kg->case_sensitive) && (strcmp(p, q)==0))
842 || ((!(kg->case_sensitive)) && (strcmpcis(p, q)==0)))
843 { circle[pos].type = kg->change_token_type;
844 circle[pos].value = *(i+1);
852 circle[pos].type = UQ_TT;
853 circle[pos].value = 0;
857 /* If this is assembly language, perhaps it is "sp"? */
859 if (return_sp_as_variable && (p[0]=='s') && (p[1]=='p') && (p[2]==0))
860 { circle[pos].value = 0; circle[pos].type = LOCAL_VARIABLE_TT;
864 /* Test for local variables first, quite quickly. */
866 if (local_variables.enabled)
868 { index = one_letter_locals[(uchar)p[0]];
869 if (index<MAX_LOCAL_VARIABLES)
870 { circle[pos].type = LOCAL_VARIABLE_TT;
871 circle[pos].value = index+1;
875 index = local_variable_hash_table[hashcode];
877 { for (;index<no_locals;index++)
878 { if (hashcode == local_variable_hash_codes[index])
880 char *locname = (char *)local_variable_names_memlist.data + local_variable_name_offsets[index];
881 if (strcmpcis(p, locname)==0)
882 { circle[pos].type = LOCAL_VARIABLE_TT;
883 circle[pos].value = index+1;
891 /* Now the bulk of the keywords. Note that the lexer doesn't recognise
892 the name of a system function which has been Replaced. */
894 index = keywords_hash_table[hashcode];
896 { int *i = keywords_data_table + 3*index;
897 keyword_group *kg = keyword_groups[*i];
899 { char *q = kg->keywords[*(i+1)];
900 if (((kg->case_sensitive) && (strcmp(p, q)==0))
901 || ((!(kg->case_sensitive)) && (strcmpcis(p, q)==0)))
902 { if ((kg != &system_functions)
903 || (system_function_usage[*(i+1)]!=2))
904 { circle[pos].type = kg->change_token_type;
905 circle[pos].value = *(i+1);
913 /* Search for the name; create it if necessary. */
915 circle[pos].value = symbol_index(p, hashcode, &circle[pos].newsymbol);
916 circle[pos].type = SYMBOL_TT;
920 /* ------------------------------------------------------------------------- */
921 /* The tokeniser grid aids a rapid decision about the consequences of a */
922 /* character reached in the buffer. In effect it is an efficiently stored */
923 /* transition table using an algorithm similar to that of S. C. Johnson's */
924 /* "yacc" lexical analyser (see Aho, Sethi and Ullman, section 3.9). */
925 /* My thanks to Dilip Sequeira for suggesting this. */
927 /* tokeniser_grid[c] is (16*n + m) if c is the first character of */
928 /* separator numbers n, n+1, ..., n+m-1 */
929 /* or certain special values (QUOTE_CODE, etc) */
932 /* Since 1000/16 = 62, the code numbers below will need increasing if the */
933 /* number of separators supported exceeds 61. */
934 /* ------------------------------------------------------------------------- */
936 static int tokeniser_grid[256];
938 #define QUOTE_CODE 1000
939 #define DQUOTE_CODE 1001
940 #define NULL_CODE 1002
941 #define SPACE_CODE 1003
942 #define NEGATIVE_CODE 1004
943 #define DIGIT_CODE 1005
944 #define RADIX_CODE 1006
945 #define KEYWORD_CODE 1007
946 #define EOF_CODE 1008
947 #define WHITESPACE_CODE 1009
948 #define COMMENT_CODE 1010
949 #define IDENTIFIER_CODE 1011
951 /* This list cannot safely be changed without also changing the header
952 separator #defines. The ordering is significant in that (i) all entries
953 beginning with the same character must be adjacent and (ii) that if
954 X is a an initial substring of Y then X must come before Y.
956 E.g. --> must occur before -- to prevent "-->0" being tokenised
957 wrongly as "--", ">", "0" rather than "-->", "0". */
959 static const char separators[NUMBER_SEPARATORS][4] =
960 { "->", "-->", "--", "-", "++", "+", "*", "/", "%",
961 "||", "|", "&&", "&", "~~",
962 "~=", "~", "==", "=", ">=", ">",
963 "<=", "<", "(", ")", ",",
964 ".&", ".#", "..&", "..#", "..", ".",
965 "::", ":", "@", ";", "[", "]", "{", "}",
967 "#a$", "#g$", "#n$", "#r$", "#w$", "##", "#"
970 static void make_tokeniser_grid(void)
972 /* Construct the grid to the specification above. */
976 for (i=0; i<256; i++) tokeniser_grid[i]=0;
978 for (i=0; i<NUMBER_SEPARATORS; i++)
979 { j=separators[i][0];
980 if (tokeniser_grid[j]==0)
981 tokeniser_grid[j]=i*16+1; else tokeniser_grid[j]++;
983 tokeniser_grid['\''] = QUOTE_CODE;
984 tokeniser_grid['\"'] = DQUOTE_CODE;
985 tokeniser_grid[0] = EOF_CODE;
986 tokeniser_grid[' '] = WHITESPACE_CODE;
987 tokeniser_grid['\n'] = WHITESPACE_CODE;
988 tokeniser_grid['\r'] = WHITESPACE_CODE;
989 tokeniser_grid['$'] = RADIX_CODE;
990 tokeniser_grid['!'] = COMMENT_CODE;
992 tokeniser_grid['0'] = DIGIT_CODE;
993 tokeniser_grid['1'] = DIGIT_CODE;
994 tokeniser_grid['2'] = DIGIT_CODE;
995 tokeniser_grid['3'] = DIGIT_CODE;
996 tokeniser_grid['4'] = DIGIT_CODE;
997 tokeniser_grid['5'] = DIGIT_CODE;
998 tokeniser_grid['6'] = DIGIT_CODE;
999 tokeniser_grid['7'] = DIGIT_CODE;
1000 tokeniser_grid['8'] = DIGIT_CODE;
1001 tokeniser_grid['9'] = DIGIT_CODE;
1003 tokeniser_grid['a'] = IDENTIFIER_CODE;
1004 tokeniser_grid['b'] = IDENTIFIER_CODE;
1005 tokeniser_grid['c'] = IDENTIFIER_CODE;
1006 tokeniser_grid['d'] = IDENTIFIER_CODE;
1007 tokeniser_grid['e'] = IDENTIFIER_CODE;
1008 tokeniser_grid['f'] = IDENTIFIER_CODE;
1009 tokeniser_grid['g'] = IDENTIFIER_CODE;
1010 tokeniser_grid['h'] = IDENTIFIER_CODE;
1011 tokeniser_grid['i'] = IDENTIFIER_CODE;
1012 tokeniser_grid['j'] = IDENTIFIER_CODE;
1013 tokeniser_grid['k'] = IDENTIFIER_CODE;
1014 tokeniser_grid['l'] = IDENTIFIER_CODE;
1015 tokeniser_grid['m'] = IDENTIFIER_CODE;
1016 tokeniser_grid['n'] = IDENTIFIER_CODE;
1017 tokeniser_grid['o'] = IDENTIFIER_CODE;
1018 tokeniser_grid['p'] = IDENTIFIER_CODE;
1019 tokeniser_grid['q'] = IDENTIFIER_CODE;
1020 tokeniser_grid['r'] = IDENTIFIER_CODE;
1021 tokeniser_grid['s'] = IDENTIFIER_CODE;
1022 tokeniser_grid['t'] = IDENTIFIER_CODE;
1023 tokeniser_grid['u'] = IDENTIFIER_CODE;
1024 tokeniser_grid['v'] = IDENTIFIER_CODE;
1025 tokeniser_grid['w'] = IDENTIFIER_CODE;
1026 tokeniser_grid['x'] = IDENTIFIER_CODE;
1027 tokeniser_grid['y'] = IDENTIFIER_CODE;
1028 tokeniser_grid['z'] = IDENTIFIER_CODE;
1030 tokeniser_grid['A'] = IDENTIFIER_CODE;
1031 tokeniser_grid['B'] = IDENTIFIER_CODE;
1032 tokeniser_grid['C'] = IDENTIFIER_CODE;
1033 tokeniser_grid['D'] = IDENTIFIER_CODE;
1034 tokeniser_grid['E'] = IDENTIFIER_CODE;
1035 tokeniser_grid['F'] = IDENTIFIER_CODE;
1036 tokeniser_grid['G'] = IDENTIFIER_CODE;
1037 tokeniser_grid['H'] = IDENTIFIER_CODE;
1038 tokeniser_grid['I'] = IDENTIFIER_CODE;
1039 tokeniser_grid['J'] = IDENTIFIER_CODE;
1040 tokeniser_grid['K'] = IDENTIFIER_CODE;
1041 tokeniser_grid['L'] = IDENTIFIER_CODE;
1042 tokeniser_grid['M'] = IDENTIFIER_CODE;
1043 tokeniser_grid['N'] = IDENTIFIER_CODE;
1044 tokeniser_grid['O'] = IDENTIFIER_CODE;
1045 tokeniser_grid['P'] = IDENTIFIER_CODE;
1046 tokeniser_grid['Q'] = IDENTIFIER_CODE;
1047 tokeniser_grid['R'] = IDENTIFIER_CODE;
1048 tokeniser_grid['S'] = IDENTIFIER_CODE;
1049 tokeniser_grid['T'] = IDENTIFIER_CODE;
1050 tokeniser_grid['U'] = IDENTIFIER_CODE;
1051 tokeniser_grid['V'] = IDENTIFIER_CODE;
1052 tokeniser_grid['W'] = IDENTIFIER_CODE;
1053 tokeniser_grid['X'] = IDENTIFIER_CODE;
1054 tokeniser_grid['Y'] = IDENTIFIER_CODE;
1055 tokeniser_grid['Z'] = IDENTIFIER_CODE;
1057 tokeniser_grid['_'] = IDENTIFIER_CODE;
1060 /* ------------------------------------------------------------------------- */
1061 /* Definition of a lexical block: a source file or a string containing */
1062 /* text for lexical analysis; an independent source from the point of */
1063 /* view of issuing error reports. */
1064 /* ------------------------------------------------------------------------- */
1066 typedef struct LexicalBlock_s
1067 { char *filename; /* Full translated name */
1068 int main_flag; /* TRUE if the main file
1069 (the first one opened) */
1070 int sys_flag; /* TRUE if a System_File */
1071 int source_line; /* Line number count */
1072 int line_start; /* Char number within file
1073 where the current line
1075 int chars_read; /* Char number of read pos */
1076 int file_no; /* Or 255 if not from a
1077 file; used for debug
1079 char *orig_source; /* From #origsource direct */
1085 static LexicalBlock NoFileOpen =
1086 { "<before compilation>", FALSE, FALSE, 0, 0, 0, 255, NULL, 0, 0, 0 };
1088 static LexicalBlock MakingOutput =
1089 { "<constructing output>", FALSE, FALSE, 0, 0, 0, 255, NULL, 0, 0, 0 };
1091 static LexicalBlock StringLB =
1092 { "<veneer routine>", FALSE, TRUE, 0, 0, 0, 255, NULL, 0, 0, 0 };
1094 static LexicalBlock *CurrentLB; /* The current lexical
1095 block of input text */
1097 extern void declare_systemfile(void)
1098 { CurrentLB->sys_flag = TRUE;
1101 extern int is_systemfile(void)
1102 { return ((CurrentLB->sys_flag)?1:0);
1105 extern void set_origsource_location(char *source, int32 line, int32 charnum)
1110 /* Clear the Origsource declaration. */
1111 CurrentLB->orig_file = 0;
1112 CurrentLB->orig_source = NULL;
1113 CurrentLB->orig_line = 0;
1114 CurrentLB->orig_char = 0;
1118 /* Get the file number for a new or existing InputFiles entry. */
1119 file_no = register_orig_sourcefile(source);
1121 CurrentLB->orig_file = file_no;
1122 CurrentLB->orig_source = InputFiles[file_no-1].filename;
1123 CurrentLB->orig_line = line;
1124 CurrentLB->orig_char = charnum;
1127 /* Error locations. */
1129 extern debug_location get_current_debug_location(void)
1130 { debug_location result;
1131 /* Assume that all input characters are one byte. */
1132 result.file_index = CurrentLB->file_no;
1133 result.beginning_byte_index = CurrentLB->chars_read - LOOKAHEAD_SIZE;
1134 result.end_byte_index = result.beginning_byte_index;
1135 result.beginning_line_number = CurrentLB->source_line;
1136 result.end_line_number = result.beginning_line_number;
1137 result.beginning_character_number =
1138 CurrentLB->chars_read - CurrentLB->line_start;
1139 result.end_character_number = result.beginning_character_number;
1140 result.orig_file_index = CurrentLB->orig_file;
1141 result.orig_beg_line_number = CurrentLB->orig_line;
1142 result.orig_beg_char_number = CurrentLB->orig_char;
1146 static debug_location ErrorReport_debug_location;
1148 extern void report_errors_at_current_line(void)
1149 { ErrorReport.line_number = CurrentLB->source_line;
1150 ErrorReport.file_number = CurrentLB->file_no;
1151 if (ErrorReport.file_number == 255)
1152 ErrorReport.file_number = -1;
1153 ErrorReport.source = CurrentLB->filename;
1154 ErrorReport.main_flag = CurrentLB->main_flag;
1155 if (debugfile_switch)
1156 ErrorReport_debug_location = get_current_debug_location();
1157 ErrorReport.orig_file = CurrentLB->orig_file;
1158 ErrorReport.orig_source = CurrentLB->orig_source;
1159 ErrorReport.orig_line = CurrentLB->orig_line;
1160 ErrorReport.orig_char = CurrentLB->orig_char;
1163 extern debug_location get_error_report_debug_location(void)
1164 { return ErrorReport_debug_location;
1167 extern int32 get_current_line_start(void)
1168 { return CurrentLB->line_start;
1171 brief_location blank_brief_location;
1173 extern brief_location get_brief_location(ErrorPosition *errpos)
1176 loc.file_index = errpos->file_number;
1177 loc.line_number = errpos->line_number;
1178 loc.orig_file_index = errpos->orig_file;
1179 loc.orig_line_number = errpos->orig_line;
1183 extern void export_brief_location(brief_location loc, ErrorPosition *errpos)
1185 if (loc.file_index != -1)
1186 { errpos->file_number = loc.file_index;
1187 errpos->line_number = loc.line_number;
1188 errpos->main_flag = (errpos->file_number == 1);
1189 errpos->orig_source = NULL;
1190 errpos->orig_file = loc.orig_file_index;
1191 errpos->orig_line = loc.orig_line_number;
1192 errpos->orig_char = 0;
1196 /* ------------------------------------------------------------------------- */
1197 /* Hash printing and line counting */
1198 /* ------------------------------------------------------------------------- */
1200 static void print_hash(void)
1202 /* Hash-printing is the practice of printing a # character every 100
1203 lines of source code (the -x switch), reassuring the user that
1204 progress is being made */
1206 if (no_hash_printed_yet)
1207 { printf("::"); no_hash_printed_yet = FALSE;
1209 printf("#"); hash_printed_since_newline = TRUE;
1212 /* On some systems, text output is buffered to a line at a time, and
1213 this would frustrate the point of hash-printing, so: */
1219 static void reached_new_line(void)
1221 /* Called to signal that a new line has been reached in the source code */
1223 forerrors_pointer = 0;
1225 CurrentLB->source_line++;
1226 CurrentLB->line_start = CurrentLB->chars_read;
1228 total_source_line_count++;
1230 if (total_source_line_count%100==0)
1231 { if (hash_switch) print_hash();
1233 SpinCursor(32); /* I.e., allow other tasks to run */
1238 if (total_source_line_count%((**g_pm_hndl).linespercheck) == 0)
1239 { ProcessEvents (&g_proc);
1243 abort_transcript_file();
1244 longjmp (g_fallback, 1);
1250 static void new_syntax_line(void)
1251 { if (source_to_analyse != NULL) forerrors_pointer = 0;
1252 report_errors_at_current_line();
1255 /* Return 10 raised to the expo power.
1257 * I'm avoiding the standard pow() function for a rather lame reason:
1258 * it's in the libmath (-lm) library, and I don't want to change the
1259 * build model for the compiler. So, this is implemented with a stupid
1260 * lookup table. It's faster than pow() for small values of expo.
1261 * Probably not as fast if expo is 200, but "$+1e200" is an overflow
1262 * anyway, so I don't expect that to be a problem.
1264 * (For some reason, frexp() and ldexp(), which are used later on, do
1265 * not require libmath to be linked in.)
1267 static double pow10_cheap(int expo)
1269 #define POW10_RANGE (8)
1270 static double powers[POW10_RANGE*2+1] = {
1271 0.00000001, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1,
1273 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 10000000.0, 100000000.0
1279 for (; expo < -POW10_RANGE; expo += POW10_RANGE) {
1282 return res * powers[POW10_RANGE+expo];
1285 for (; expo > POW10_RANGE; expo -= POW10_RANGE) {
1286 res *= powers[POW10_RANGE*2];
1288 return res * powers[POW10_RANGE+expo];
1292 /* Return the IEEE-754 single-precision encoding of a floating-point
1295 * The number is provided in the pieces it was parsed in:
1296 * [+|-] intv "." fracv "e" [+|-]expo
1298 * If the magnitude is too large (beyond about 3.4e+38), this returns
1299 * an infinite value (0x7f800000 or 0xff800000). If the magnitude is too
1300 * small (below about 1e-45), this returns a zero value (0x00000000 or
1301 * 0x80000000). If any of the inputs are NaN, this returns NaN (but the
1302 * lexer should never do that).
1304 * Note that using a float constant does *not* set the uses_float_features
1305 * flag (which would cause the game file to be labelled 3.1.2). Same with
1306 * double constants and the uses_double_features flag. There's no VM
1307 * feature here, just an integer. Of course, any use of the float *opcodes*
1308 * will set the flag.
1310 * The math functions in this routine require #including <math.h>, but
1311 * they should not require linking the math library (-lm). At least,
1312 * they do not on OSX and Linux.
1314 static int32 construct_float(int signbit, double intv, double fracv, int expo)
1316 double absval = (intv + fracv) * pow10_cheap(expo);
1317 int32 sign = (signbit ? 0x80000000 : 0x0);
1321 if (isinf(absval)) {
1322 return sign | 0x7f800000; /* infinity */
1324 if (isnan(absval)) {
1325 return sign | 0x7fc00000;
1328 mant = frexp(absval, &expo);
1330 /* Normalize mantissa to be in the range [1.0, 2.0) */
1331 if (0.5 <= mant && mant < 1.0) {
1335 else if (mant == 0.0) {
1339 return sign | 0x7f800000; /* infinity */
1343 return sign | 0x7f800000; /* infinity */
1345 else if (expo < -126) {
1346 /* Denormalized (very small) number */
1347 mant = ldexp(mant, 126 + expo);
1350 else if (!(expo == 0 && mant == 0.0)) {
1352 mant -= 1.0; /* Get rid of leading 1 */
1355 mant *= 8388608.0; /* 2^23 */
1356 fbits = (int32)(mant + 0.5); /* round mant to nearest int */
1358 /* The carry propagated out of a string of 23 1 bits. */
1362 return sign | 0x7f800000; /* infinity */
1366 /* At this point, expo is less than 2^8; fbits is less than 2^23; neither is negative. */
1367 return (sign) | ((int32)(expo << 23)) | (fbits);
1370 /* Same as the above, but we return *half* of a 64-bit double, depending on whether wanthigh is true (high half) or false (low half).
1372 static int32 construct_double(int wanthigh, int signbit, double intv, double fracv, int expo)
1374 double absval = (intv + fracv) * pow10_cheap(expo);
1375 int32 sign = (signbit ? 0x80000000 : 0x0);
1379 if (isinf(absval)) {
1382 if (isnan(absval)) {
1386 mant = frexp(absval, &expo);
1388 /* Normalize mantissa to be in the range [1.0, 2.0) */
1389 if (0.5 <= mant && mant < 1.0) {
1393 else if (mant == 0.0) {
1403 else if (expo < -1022) {
1404 /* Denormalized (very small) number */
1405 mant = ldexp(mant, 1022 + expo);
1408 else if (!(expo == 0 && mant == 0.0)) {
1410 mant -= 1.0; /* Get rid of leading 1 */
1413 /* fhi receives the high 28 bits; flo the low 24 bits (total 52 bits) */
1414 mant *= 268435456.0; /* 2^28 */
1415 fhi = (uint32)mant; /* Truncate */
1416 mant -= (double)fhi;
1417 mant *= 16777216.0; /* 2^24 */
1418 flo = (uint32)(mant+0.5); /* Round */
1421 /* The carry propagated out of a string of 24 1 bits. */
1425 /* And it also propagated out of the next 28 bits. */
1434 /* At this point, expo is less than 2^11; fhi is less than 2^28; flo is less than 2^24; none are negative. */
1436 return (sign) | ((int32)(expo << 20)) | ((int32)(fhi >> 8));
1438 return (int32)((fhi & 0xFF) << 24) | (int32)(flo);
1442 return sign | 0x7FF00000;
1448 return sign | 0x7FF80000;
1453 /* ------------------------------------------------------------------------- */
1454 /* Characters are read via a "pipeline" of variables, allowing us to look */
1455 /* up to three characters ahead of the current position. */
1457 /* There are two possible sources: from the source files being loaded in, */
1458 /* and from a string inside Inform (which is where the code for veneer */
1459 /* routines comes from). Each source has its own get-next-character */
1461 /* ------------------------------------------------------------------------- */
1462 /* Source 1: from files */
1464 /* Note that file_load_chars(p, size) loads "size" bytes into buffer "p" */
1465 /* from the current input file. If the file runs out, then if it was */
1466 /* the last source file 4 null characters are placed in the buffer: if it */
1467 /* was only an Include file ending, then a '\n' character is placed there */
1468 /* (essentially to force termination of any comment line) followed by */
1469 /* three harmless spaces. */
1471 /* The routine returns the number of characters it has written, and note */
1472 /* that this conveniently ensures that all characters in the buffer come */
1473 /* from the same file. */
1474 /* ------------------------------------------------------------------------- */
1476 #define SOURCE_BUFFER_SIZE 4096 /* Typical disc block size */
1478 typedef struct Sourcefile_s
1479 { char *buffer; /* Input buffer */
1480 int read_pos; /* Read position in buffer */
1481 int size; /* Number of meaningful
1482 characters in buffer */
1483 int la, la2, la3; /* Three characters of
1484 lookahead pipeline */
1485 int file_no; /* Internal file number
1490 static Sourcefile *FileStack; /* Allocated to FileStack_max */
1491 static memory_list FileStack_memlist;
1492 static int FileStack_max; /* The highest value that File_sp has
1494 (Filestack entries to this depth have
1495 a buffer allocated) */
1497 static int File_sp; /* Current stack pointer */
1498 static Sourcefile *CF; /* Top entry on stack (always equal to
1499 FileStack[File_sp-1]) */
1501 static int last_input_file;
1503 /* Set CF and CurrentLB.
1504 This does not increment File_sp; the caller must do that. */
1505 static void begin_buffering_file(int i, int file_no)
1506 { int j, cnt; uchar *p;
1511 ensure_memory_list_available(&FileStack_memlist, i+1);
1512 while (i >= FileStack_max) {
1513 FileStack[FileStack_max++].buffer = my_malloc(SOURCE_BUFFER_SIZE+4, "source file buffer");
1516 p = (uchar *) FileStack[i].buffer;
1519 { FileStack[i-1].la = lookahead;
1520 FileStack[i-1].la2 = lookahead2;
1521 FileStack[i-1].la3 = lookahead3;
1524 FileStack[i].file_no = file_no;
1525 FileStack[i].size = file_load_chars(file_no,
1526 (char *) p, SOURCE_BUFFER_SIZE);
1527 /* If the file is shorter than SOURCE_BUFFER_SIZE, it's now closed already. We still need to set up the file entry though. */
1529 lookahead = source_to_iso_grid[p[0]];
1530 lookahead2 = source_to_iso_grid[p[1]];
1531 lookahead3 = source_to_iso_grid[p[2]];
1532 if (LOOKAHEAD_SIZE != 3)
1534 ("Lexer lookahead size does not match hard-coded lookahead code");
1535 FileStack[i].read_pos = LOOKAHEAD_SIZE;
1537 if (file_no==1) FileStack[i].LB.main_flag = TRUE;
1538 else FileStack[i].LB.main_flag = FALSE;
1539 FileStack[i].LB.sys_flag = FALSE;
1540 FileStack[i].LB.source_line = 1;
1541 FileStack[i].LB.line_start = LOOKAHEAD_SIZE;
1542 FileStack[i].LB.chars_read = LOOKAHEAD_SIZE;
1543 FileStack[i].LB.filename = InputFiles[file_no-1].filename;
1544 FileStack[i].LB.file_no = file_no;
1545 FileStack[i].LB.orig_source = NULL; FileStack[i].LB.orig_file = 0;
1546 FileStack[i].LB.orig_line = 0; FileStack[i].LB.orig_char = 0;
1548 InputFiles[file_no-1].initial_buffering = FALSE;
1550 CurrentLB = &(FileStack[i].LB);
1551 CF = &(FileStack[i]);
1553 /* Check for recursive inclusion */
1556 { if (!strcmp(FileStack[i].LB.filename, FileStack[j].LB.filename))
1560 warning_named("File included more than once",
1561 FileStack[j].LB.filename);
1564 static void create_char_pipeline(void)
1567 begin_buffering_file(File_sp++, 1);
1568 pipeline_made = TRUE;
1569 last_input_file = current_input_file;
1572 static int get_next_char_from_pipeline(void)
1575 while (last_input_file < current_input_file)
1577 /* An "Include" file must have opened since the last character
1578 was read. Perhaps more than one. We run forward through the
1579 list and add them to the include stack. But we ignore
1580 "Origsource" files (which were never actually opened for
1584 if (!InputFiles[last_input_file-1].is_input)
1587 begin_buffering_file(File_sp++, last_input_file);
1589 if (last_input_file != current_input_file)
1590 compiler_error("last_input_file did not match after Include");
1593 { lookahead = 0; lookahead2 = 0; lookahead3 = 0; return 0;
1596 if (CF->read_pos == CF->size)
1598 file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
1602 if (CF->read_pos == -(CF->size))
1603 { set_token_location(get_current_debug_location());
1606 { lookahead = 0; lookahead2 = 0; lookahead3 = 0; return 0;
1608 CF = &(FileStack[File_sp-1]);
1609 CurrentLB = &(FileStack[File_sp-1].LB);
1610 lookahead = CF->la; lookahead2 = CF->la2; lookahead3 = CF->la3;
1611 if (CF->read_pos == CF->size)
1613 file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
1616 set_token_location(get_current_debug_location());
1619 p = (uchar *) (CF->buffer);
1621 current = lookahead;
1622 lookahead = lookahead2;
1623 lookahead2 = lookahead3;
1624 lookahead3 = source_to_iso_grid[p[CF->read_pos++]];
1626 CurrentLB->chars_read++;
1627 if (forerrors_pointer < FORERRORS_SIZE-1)
1628 forerrors_buff[forerrors_pointer++] = current;
1630 /* The file is open in binary mode, so we have to do our own newline
1631 conversion. (We want to do it consistently across all platforms.)
1633 The strategy is to convert all \r (CR) characters to \n (LF), but
1634 *don't* advance the line counter for \r if it's followed by \n.
1635 The rest of the lexer treats multiple \n characters the same as
1636 one, so the simple conversion will work out okay.
1638 (Note that, for historical reasons, a ctrl-L (formfeed) is also
1639 treated as \r. This conversion has already been handled by
1640 source_to_iso_grid[].)
1642 if (current == '\n') {
1645 else if (current == '\r') {
1647 if (lookahead != '\n')
1654 /* ------------------------------------------------------------------------- */
1655 /* Source 2: from a (null-terminated) string */
1656 /* ------------------------------------------------------------------------- */
1658 static int source_to_analyse_pointer; /* Current read position */
1660 static int get_next_char_from_string(void)
1661 { uchar *p = (uchar *) source_to_analyse + source_to_analyse_pointer++;
1662 current = source_to_iso_grid[p[0]];
1664 if (current == 0) lookahead = 0;
1665 else lookahead = source_to_iso_grid[p[1]];
1666 if (lookahead == 0) lookahead2 = 0;
1667 else lookahead2 = source_to_iso_grid[p[2]];
1668 if (lookahead2 == 0) lookahead3 = 0;
1669 else lookahead3 = source_to_iso_grid[p[3]];
1671 CurrentLB->chars_read++;
1672 if (forerrors_pointer < FORERRORS_SIZE-1)
1673 forerrors_buff[forerrors_pointer++] = current;
1675 /* We shouldn't have \r when compiling from string (veneer function).
1676 If we do, just shove it under the carpet. */
1677 if (current == '\r') current = '\n';
1678 if (current == '\n') reached_new_line();
1683 /* ========================================================================= */
1684 /* The interface between the lexer and Inform's higher levels: */
1686 /* put_token_back() (effectively) move the read position */
1687 /* back by one token */
1689 /* get_next_token() copy the token at the current read */
1690 /* position into the triple */
1691 /* (token_type, token_value, token_text) */
1692 /* and move the read position forward */
1695 /* release_token_texts() discard all the tokens that have been */
1696 /* read in, except for put-back ones */
1698 /* restart_lexer(source, name) if source is NULL, initialise the lexer */
1699 /* to read from source files; */
1700 /* otherwise, to read from this null- */
1701 /* terminated string. */
1702 /* ------------------------------------------------------------------------- */
1704 extern void release_token_texts(void)
1706 /* This is called at the beginning of every (top-level) directive and
1707 every statement. It drops all token usage so that the lextexts
1708 array can be reused.
1710 Call this immediately before a get_next_token() call.
1712 This should *not* be called within parse_expression(). Expression
1713 code generation relies on token data being retained across the whole
1720 if (tokens_put_back == 0) {
1725 /* If any tokens have been put back, we have to preserve their text.
1726 Move their lextext usage to the head of the lextexts array. */
1728 for (ix=0; ix<tokens_put_back; ix++) {
1731 int pos = circle_position - tokens_put_back + 1 + ix;
1732 if (pos < 0) pos += CIRCLE_SIZE;
1734 oldpos = circle[pos].lextext;
1735 circle[pos].lextext = ix;
1736 /* Swap the entire lextext structure (including size) */
1737 temp = lextexts[ix];
1738 lextexts[ix] = lextexts[oldpos];
1739 lextexts[oldpos] = temp;
1741 cur_lextexts = tokens_put_back;
1744 extern void put_token_back(void)
1745 { tokens_put_back++;
1747 int pos = circle_position - tokens_put_back + 1;
1748 if (pos<0) pos += CIRCLE_SIZE;
1750 if (tokens_trace_level > 0)
1753 if (tokens_trace_level > 1) {
1754 describe_token(&circle[pos]);
1759 if (circle[pos].type == SYMBOL_TT && circle[pos].newsymbol) {
1760 /* Remove the symbol from the symbol table. (Or mark it as unreachable
1762 end_symbol_scope(circle[pos].value, TRUE);
1763 /* Remove new-symbol flag, and force reinterpretation next time
1764 we see the symbol. */
1765 circle[pos].newsymbol = FALSE;
1766 circle[pos].context = -1;
1769 /* The following error, of course, should never happen! */
1771 if (tokens_put_back == CIRCLE_SIZE)
1772 { compiler_error("The lexical analyser has collapsed because of a wrong \
1773 assumption inside Inform");
1779 /* The get_next_token() code reads characters into the current lextext,
1780 which is lextexts[lex_index]. It uses these routines to add and remove
1781 characters, reallocing when necessary.
1783 lex_pos is the current number of characters in the lextext. It is
1784 not necessarily null-terminated until get_next_token() completes.
1787 /* Add one character */
1788 static void lexaddc(char ch)
1790 if ((size_t)lex_pos >= lextexts[lex_index].size) {
1791 size_t newsize = lextexts[lex_index].size * 2;
1792 my_realloc(&lextexts[lex_index].text, lextexts[lex_index].size, newsize, "one lexeme text");
1793 lextexts[lex_index].size = newsize;
1795 lextexts[lex_index].text[lex_pos++] = ch;
1798 /* Remove the last character and ensure it's null-terminated */
1799 static void lexdelc(void)
1804 lextexts[lex_index].text[lex_pos] = 0;
1807 /* Return the last character */
1808 static char lexlastc(void)
1813 return lextexts[lex_index].text[lex_pos-1];
1816 /* Add a string of characters (including the null) */
1817 static void lexadds(char *str)
1826 extern void get_next_token(void)
1827 { int d, i, j, k, quoted_size, e, radix, context;
1831 int returning_a_put_back_token = TRUE;
1833 context = lexical_context();
1835 if (tokens_put_back > 0)
1836 { i = circle_position - tokens_put_back + 1;
1837 if (i<0) i += CIRCLE_SIZE;
1839 if (context != circle[i].context)
1840 { j = circle[i].type;
1841 if ((j==0) || ((j>=100) && (j<200)))
1842 interpret_identifier(circle[i].text, i);
1843 circle[i].context = context;
1847 returning_a_put_back_token = FALSE;
1849 if (circle_position == CIRCLE_SIZE-1) circle_position = 0;
1850 else circle_position++;
1852 lex_index = cur_lextexts++;
1853 if (lex_index >= no_lextexts) {
1854 /* fresh lextext block; must init it */
1855 no_lextexts = lex_index+1;
1856 ensure_memory_list_available(&lextexts_memlist, no_lextexts);
1857 lextexts[lex_index].size = 64; /* this can grow */
1858 lextexts[lex_index].text = my_malloc(lextexts[lex_index].size, "one lexeme text");
1861 lextexts[lex_index].text[0] = 0; /* start with an empty string */
1863 circle[circle_position].lextext = lex_index;
1864 circle[circle_position].text = NULL; /* will fill in later */
1865 circle[circle_position].value = 0;
1866 circle[circle_position].type = 0;
1867 circle[circle_position].newsymbol = FALSE;
1868 circle[circle_position].context = context;
1871 d = (*get_next_char)();
1872 e = tokeniser_grid[d];
1874 if (next_token_begins_syntax_line)
1875 { if ((e != WHITESPACE_CODE) && (e != COMMENT_CODE))
1876 { new_syntax_line();
1877 next_token_begins_syntax_line = FALSE;
1881 circle[circle_position].location = get_current_debug_location();
1884 { case 0: char_error("Illegal character found in source:", d);
1885 goto StartTokenAgain;
1887 case WHITESPACE_CODE:
1888 while (tokeniser_grid[lookahead] == WHITESPACE_CODE)
1890 goto StartTokenAgain;
1893 while ((lookahead != '\n') && (lookahead != '\r') && (lookahead != 0))
1895 goto StartTokenAgain;
1898 circle[circle_position].type = EOF_TT;
1899 lexadds("<end of file>");
1907 { n = n*radix + character_digit_value[d];
1909 } while ((character_digit_value[lookahead] < radix)
1910 && (d = (*get_next_char)(), TRUE));
1913 circle[circle_position].type = NUMBER_TT;
1914 circle[circle_position].value = (int32)n;
1918 /* When we reach here, d is the sign bit ('+' or '-').
1919 If we're constructing a 32-bit float, floatend is 0;
1920 for a 64-bit double, floatend is '>' for high, '<' for low. */
1921 { int expo=0; double intv=0, fracv=0;
1922 int expocount=0, intcount=0, fraccount=0;
1923 int signbit = (d == '-');
1925 while (character_digit_value[lookahead] < 10) {
1926 intv = 10.0*intv + character_digit_value[lookahead];
1931 if (lookahead == '.') {
1932 double fracpow = 1.0;
1935 while (character_digit_value[lookahead] < 10) {
1937 fracv = fracv + fracpow*character_digit_value[lookahead];
1943 if (lookahead == 'e' || lookahead == 'E') {
1947 if (lookahead == '+' || lookahead == '-') {
1948 exposign = (lookahead == '-');
1952 while (character_digit_value[lookahead] < 10) {
1953 expo = 10*expo + character_digit_value[lookahead];
1959 error("Floating-point literal must have digits after the 'e'");
1960 if (exposign) { expo = -expo; }
1962 if (intcount + fraccount == 0)
1963 error("Floating-point literal must have digits");
1964 if (floatend == '>')
1965 n = construct_double(TRUE, signbit, intv, fracv, expo);
1966 else if (floatend == '<')
1967 n = construct_double(FALSE, signbit, intv, fracv, expo);
1969 n = construct_float(signbit, intv, fracv, expo);
1972 circle[circle_position].type = NUMBER_TT;
1973 circle[circle_position].value = n;
1974 if (!glulx_mode && dont_enter_into_symbol_table != -2) error("Floating-point literals are not available in Z-code");
1978 radix = 16; d = (*get_next_char)();
1979 if (d == '-' || d == '+') {
1983 if (d == '<' || d == '>') {
1985 d = (*get_next_char)();
1986 if (d == '-' || d == '+') {
1989 error("Signed number expected after '$<' or '$>'");
1991 if (d == '$') { d = (*get_next_char)(); radix = 2; }
1992 if (character_digit_value[d] >= radix)
1994 error("Binary number expected after '$$'");
1996 error("Hexadecimal number expected after '$'");
2000 case QUOTE_CODE: /* Single-quotes: scan a literal string */
2003 { e = d; d = (*get_next_char)(); lexaddc(d);
2005 if ((d == '\'') && (e != '@'))
2006 { if (quoted_size == 1)
2007 { d = (*get_next_char)(); lexaddc(d);
2009 error("No text between quotation marks ''");
2014 if (d==0) ebf_error("'\''", "end of file");
2016 circle[circle_position].type = SQ_TT;
2019 case DQUOTE_CODE: /* Double-quotes: scan a literal string */
2021 { d = (*get_next_char)(); lexaddc(d);
2024 while (lexlastc() == ' ') lex_pos--;
2025 if (lexlastc() != '^') lexaddc(' ');
2026 while ((lookahead != 0) &&
2027 (tokeniser_grid[lookahead] == WHITESPACE_CODE))
2031 { int newline_passed = FALSE;
2033 while ((lookahead != 0) &&
2034 (tokeniser_grid[lookahead] == WHITESPACE_CODE))
2035 if ((d = (*get_next_char)()) == '\n')
2036 newline_passed = TRUE;
2037 if (!newline_passed)
2039 chb[0] = '\"'; chb[1] = lookahead;
2040 chb[2] = '\"'; chb[3] = 0;
2041 ebf_error("empty rest of line after '\\' in string",
2045 } while ((d != 0) && (d!='\"'));
2046 if (d==0) ebf_error("'\"'", "end of file");
2048 circle[circle_position].type = DQ_TT;
2051 case IDENTIFIER_CODE: /* Letter or underscore: an identifier */
2054 while (((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
2055 || (tokeniser_grid[lookahead] == DIGIT_CODE)))
2056 n++, lexaddc((*get_next_char)());
2060 interpret_identifier(lextexts[lex_index].text, circle_position);
2065 /* The character is initial to at least one of the separators */
2067 for (j=e>>4, k=j+(e&0x0f); j<k; j++)
2068 { r = (char *) separators[j];
2072 goto SeparatorMatched;
2076 { if (*(r+1) == lookahead)
2078 lexaddc((*get_next_char)());
2080 goto SeparatorMatched;
2084 { if ((*(r+1) == lookahead) && (*(r+2) == lookahead2))
2086 lexaddc((*get_next_char)());
2087 lexaddc((*get_next_char)());
2089 goto SeparatorMatched;
2094 /* The following contingency never in fact arises with the
2095 current set of separators, but might in future */
2097 lexaddc(d); lexaddc(lookahead); lexaddc(lookahead2);
2099 error_named("Unrecognised combination in source:",
2100 lextexts[lex_index].text);
2101 goto StartTokenAgain;
2105 circle[circle_position].type = SEP_TT;
2106 circle[circle_position].value = j;
2108 { case SEMICOLON_SEP: break;
2109 case HASHNDOLLAR_SEP:
2110 case HASHWDOLLAR_SEP:
2111 if (tokeniser_grid[lookahead] == WHITESPACE_CODE)
2112 { error_named("Character expected after",
2113 lextexts[lex_index].text);
2117 lexaddc((*get_next_char)());
2118 while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
2119 || (tokeniser_grid[lookahead] == DIGIT_CODE))
2120 lexaddc((*get_next_char)());
2123 case HASHADOLLAR_SEP:
2124 case HASHGDOLLAR_SEP:
2125 case HASHRDOLLAR_SEP:
2127 if (tokeniser_grid[lookahead] != IDENTIFIER_CODE)
2128 { error_named("Alphabetic character expected after",
2129 lextexts[lex_index].text);
2133 while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
2134 || (tokeniser_grid[lookahead] == DIGIT_CODE))
2135 lexaddc((*get_next_char)());
2142 /* We can now assign the text pointer, since the lextext has finished reallocing. */
2143 circle[circle_position].text = lextexts[lex_index].text;
2144 i = circle_position;
2147 /* We've either parsed a new token or selected a put-back one.
2148 i is the circle-position of the token in question. Time to
2149 export the token data where higher-level code can find it. */
2150 token_value = circle[i].value;
2151 token_type = circle[i].type;
2152 token_text = circle[i].text;
2153 if (!returning_a_put_back_token)
2154 { set_token_location(circle[i].location);
2157 if (tokens_trace_level > 0)
2158 { if (tokens_trace_level == 1) {
2159 printf("'%s' ", circle[i].text);
2160 if (circle[i].type == EOF_TT) printf("\n");
2163 { printf("-> "); describe_token(&circle[i]);
2165 if (tokens_trace_level > 2) {
2166 if (circle[i].newsymbol) printf("newsym ");
2167 print_context(circle[i].context);
2174 static char veneer_error_title[64];
2176 extern void restart_lexer(char *lexical_source, char *name)
2178 circle_position = 0;
2179 for (i=0; i<CIRCLE_SIZE; i++)
2180 { circle[i].type = 0;
2181 circle[i].value = 0;
2182 circle[i].newsymbol = FALSE;
2183 circle[i].text = "(if this is ever visible, there is a bug)";
2184 circle[i].lextext = -1;
2185 circle[i].context = 0;
2189 /* But we don't touch no_lextexts; those allocated blocks can be reused */
2193 tokens_put_back = 0;
2194 forerrors_pointer = 0;
2195 dont_enter_into_symbol_table = FALSE;
2196 return_sp_as_variable = FALSE;
2197 next_token_begins_syntax_line = TRUE;
2199 source_to_analyse = lexical_source;
2201 if (source_to_analyse == NULL)
2202 { get_next_char = get_next_char_from_pipeline;
2203 if (!pipeline_made) create_char_pipeline();
2204 forerrors_buff[0] = 0; forerrors_pointer = 0;
2207 { get_next_char = get_next_char_from_string;
2208 source_to_analyse_pointer = 0;
2209 CurrentLB = &StringLB;
2210 sprintf(veneer_error_title, "<veneer routine '%s'>", name);
2211 StringLB.filename = veneer_error_title;
2213 CurrentLB->source_line = 1;
2214 CurrentLB->line_start = 0;
2215 CurrentLB->chars_read = 0;
2219 /* ========================================================================= */
2220 /* Data structure management routines */
2221 /* ------------------------------------------------------------------------- */
2223 extern void init_lexer_vars(void)
2237 local_variable_names_usage = 0;
2239 blank_brief_location.file_index = -1;
2240 blank_brief_location.line_number = 0;
2241 blank_brief_location.orig_file_index = 0;
2242 blank_brief_location.orig_line_number = 0;
2245 extern void lexer_begin_prepass(void)
2246 { total_source_line_count = 0;
2247 CurrentLB = &NoFileOpen;
2248 report_errors_at_current_line();
2251 extern void lexer_begin_pass(void)
2252 { no_hash_printed_yet = TRUE;
2253 hash_printed_since_newline = FALSE;
2255 pipeline_made = FALSE;
2259 restart_lexer(NULL, NULL);
2262 extern void lexer_endpass(void)
2263 { CurrentLB = &MakingOutput;
2264 report_errors_at_current_line();
2267 extern void lexer_allocate_arrays(void)
2269 initialise_memory_list(&FileStack_memlist,
2270 sizeof(Sourcefile), 4, (void**)&FileStack,
2271 "source file stack");
2274 initialise_memory_list(&lextexts_memlist,
2275 sizeof(lextext), 200, (void**)&lextexts,
2276 "lexeme texts list");
2279 keywords_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
2280 "keyword hash table");
2281 keywords_hash_ends_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
2282 "keyword hash end table");
2283 keywords_data_table = my_calloc(sizeof(int), 3*MAX_KEYWORDS,
2284 "keyword hashing linked list");
2286 initialise_memory_list(&local_variable_names_memlist,
2287 sizeof(char), MAX_LOCAL_VARIABLES*32, NULL,
2288 "text of local variable names");
2289 local_variable_name_offsets = my_calloc(sizeof(int), MAX_LOCAL_VARIABLES-1,
2290 "offsets of local variable names");
2291 local_variable_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
2292 "local variable hash table");
2293 local_variable_hash_codes = my_calloc(sizeof(int), MAX_LOCAL_VARIABLES,
2294 "local variable hash codes");
2296 make_tokeniser_grid();
2297 make_keywords_tables();
2299 first_token_locations =
2300 my_malloc(sizeof(debug_locations), "debug locations of recent tokens");
2301 first_token_locations->location.file_index = 0;
2302 first_token_locations->location.beginning_byte_index = 0;
2303 first_token_locations->location.end_byte_index = 0;
2304 first_token_locations->location.beginning_line_number = 0;
2305 first_token_locations->location.end_line_number = 0;
2306 first_token_locations->location.beginning_character_number = 0;
2307 first_token_locations->location.end_character_number = 0;
2308 first_token_locations->location.orig_file_index = 0;
2309 first_token_locations->location.orig_beg_line_number = 0;
2310 first_token_locations->location.orig_beg_char_number = 0;
2311 first_token_locations->next = NULL;
2312 first_token_locations->reference_count = 0;
2313 last_token_location = first_token_locations;
2316 extern void lexer_free_arrays(void)
2321 for (ix=0; ix<FileStack_max; ix++) {
2322 my_free(&FileStack[ix].buffer, "source file buffer");
2324 deallocate_memory_list(&FileStack_memlist);
2326 for (ix=0; ix<no_lextexts; ix++) {
2327 my_free(&lextexts[ix].text, "one lexeme text");
2329 deallocate_memory_list(&lextexts_memlist);
2331 my_free(&keywords_hash_table, "keyword hash table");
2332 my_free(&keywords_hash_ends_table, "keyword hash end table");
2333 my_free(&keywords_data_table, "keyword hashing linked list");
2335 deallocate_memory_list(&local_variable_names_memlist);
2336 my_free(&local_variable_name_offsets, "offsets of local variable names");
2337 my_free(&local_variable_hash_table, "local variable hash table");
2338 my_free(&local_variable_hash_codes, "local variable hash codes");
2340 cleanup_token_locations(NULL);
2343 /* ========================================================================= */