Implement a Makefile for Inform.
[inform.git] / src / lexer.c
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644 (file)
index 0000000..f03aba5
--- /dev/null
@@ -0,0 +1,1917 @@
+/* ------------------------------------------------------------------------- */
+/*   "lexer" : Lexical analyser                                              */
+/*                                                                           */
+/* Copyright (c) Graham Nelson 1993 - 2018                                   */
+/*                                                                           */
+/* This file is part of Inform.                                              */
+/*                                                                           */
+/* Inform is free software: you can redistribute it and/or modify            */
+/* it under the terms of the GNU General Public License as published by      */
+/* the Free Software Foundation, either version 3 of the License, or         */
+/* (at your option) any later version.                                       */
+/*                                                                           */
+/* Inform is distributed in the hope that it will be useful,                 */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of            */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the              */
+/* GNU General Public License for more details.                              */
+/*                                                                           */
+/* You should have received a copy of the GNU General Public License         */
+/* along with Inform. If not, see https://gnu.org/licenses/                  */
+/*                                                                           */
+/* ------------------------------------------------------------------------- */
+
+#include "header.h"
+
+int total_source_line_count,            /* Number of source lines so far     */
+
+    no_hash_printed_yet,                /* Have not yet printed the first #  */
+    hash_printed_since_newline,         /* A hash has been printed since the
+                                           most recent new-line was printed
+                                           (generally as a result of an error
+                                           message or the start of pass)     */
+    dont_enter_into_symbol_table,       /* Return names as text (with
+                                           token type DQ_TT, i.e., as if
+                                           they had double-quotes around)
+                                           and not as entries in the symbol
+                                           table, when TRUE. If -2, only the
+                                           keyword table is searched.        */
+    return_sp_as_variable;              /* When TRUE, the word "sp" denotes
+                                           the stack pointer variable
+                                           (used in assembly language only)  */
+int next_token_begins_syntax_line;      /* When TRUE, start a new syntax
+                                           line (for error reporting, etc.)
+                                           on the source code line where
+                                           the next token appears            */
+
+int32 last_mapped_line;  /* Last syntax line reported to debugging file      */
+
+/* ------------------------------------------------------------------------- */
+/*   The lexer's output is a sequence of triples, each called a "token",     */
+/*   representing one lexical unit (or "lexeme") each.  Instead of providing */
+/*   "lookahead" (that is, always having available the next token after the  */
+/*   current one, so that syntax analysers higher up in Inform can have      */
+/*   advance knowledge of what is coming), the lexer instead has a system    */
+/*   where tokens can be read in and then "put back again".                  */
+/*   The meaning of the number (and to some extent the text) supplied with   */
+/*   a token depends on its type: see "header.h" for the list of types.      */
+/*   For example, the lexeme "$1e3" is understood by Inform as a hexadecimal */
+/*   number, and translated to the token:                                    */
+/*     type NUMBER_TT, value 483, text "$1e3"                                */
+/* ------------------------------------------------------------------------- */
+/*   These three variables are set to the current token on a call to         */
+/*   get_next_token() (but are not changed by a call to put_token_back()).   */
+/* ------------------------------------------------------------------------- */
+
+int token_type;
+int32 token_value;
+char *token_text;
+
+/* ------------------------------------------------------------------------- */
+/*   The next two variables are the head and tail of a singly linked list.   */
+/*   The tail stores the portion most recently read from the current         */
+/*   lexical block; its end values therefore describe the location of the    */
+/*   current token, and are updated whenever the three variables above are   */
+/*   via set_token_location(...).  Earlier vertices, if any, represent the   */
+/*   regions of lexical blocks read beforehand, where new vertices are       */
+/*   only introduced by interruptions like a file inclusion or an EOF.       */
+/*   Vertices are deleted off of the front of the list once they are no      */
+/*   longer referenced by pending debug information records.                 */
+/* ------------------------------------------------------------------------- */
+
+static debug_locations *first_token_locations;
+static debug_locations *last_token_location;
+
+extern debug_location get_token_location(void)
+{   debug_location result;
+    debug_location *location = &(last_token_location->location);
+    result.file_index = location->file_index;
+    result.beginning_byte_index = location->end_byte_index;
+    result.end_byte_index = location->end_byte_index;
+    result.beginning_line_number = location->end_line_number;
+    result.end_line_number = location->end_line_number;
+    result.beginning_character_number = location->end_character_number;
+    result.end_character_number = location->end_character_number;
+    result.orig_file_index = location->orig_file_index;
+    result.orig_beg_line_number = location->orig_beg_line_number;
+    result.orig_beg_char_number = location->orig_beg_char_number;
+    return result;
+}
+
+extern debug_locations get_token_locations(void)
+{   debug_locations result;
+    result.location = get_token_location();
+    result.next = NULL;
+    result.reference_count = 0;
+    return result;
+}
+
+static void set_token_location(debug_location location)
+{   if (location.file_index == last_token_location->location.file_index)
+    {   last_token_location->location.end_byte_index =
+            location.end_byte_index;
+        last_token_location->location.end_line_number =
+            location.end_line_number;
+        last_token_location->location.end_character_number =
+            location.end_character_number;
+        last_token_location->location.orig_file_index =
+            location.orig_file_index;
+        last_token_location->location.orig_beg_line_number =
+            location.orig_beg_line_number;
+        last_token_location->location.orig_beg_char_number =
+            location.orig_beg_char_number;
+    } else
+    {   debug_locations*successor =
+            my_malloc
+                (sizeof(debug_locations),
+                 "debug locations of recent tokens");
+        successor->location = location;
+        successor->next = NULL;
+        successor->reference_count = 0;
+        last_token_location->next = successor;
+        last_token_location = successor;
+    }
+}
+
+extern debug_location_beginning get_token_location_beginning(void)
+{   debug_location_beginning result;
+    ++(last_token_location->reference_count);
+    result.head = last_token_location;
+    result.beginning_byte_index =
+        last_token_location->location.end_byte_index;
+    result.beginning_line_number =
+        last_token_location->location.end_line_number;
+    result.beginning_character_number =
+        last_token_location->location.end_character_number;
+    result.orig_file_index = last_token_location->location.orig_file_index;
+    result.orig_beg_line_number = last_token_location->location.orig_beg_line_number;
+    result.orig_beg_char_number = last_token_location->location.orig_beg_char_number;
+
+    return result;
+}
+
+static void cleanup_token_locations(debug_location_beginning*beginning)
+{   if (first_token_locations)
+    {   while (first_token_locations &&
+               !first_token_locations->reference_count)
+        {   debug_locations*moribund = first_token_locations;
+            first_token_locations = moribund->next;
+            my_free(&moribund, "debug locations of recent tokens");
+            if (beginning &&
+                (beginning->head == moribund || !first_token_locations))
+            {   compiler_error
+                    ("Records needed by a debug_location_beginning are no "
+                     "longer allocated, perhaps because of an invalid reuse "
+                     "of this or an earlier beginning");
+            }
+        }
+    } else
+    {   if (beginning)
+        {   compiler_error
+                ("Attempt to use a debug_location_beginning when no token "
+                 "locations are defined");
+        } else
+        {   compiler_error
+                ("Attempt to clean up token locations when no token locations "
+                 "are defined");
+        }
+    }
+}
+
+extern void discard_token_location(debug_location_beginning beginning)
+{   --(beginning.head->reference_count);
+}
+
+extern debug_locations get_token_location_end
+    (debug_location_beginning beginning)
+{   debug_locations result;
+    cleanup_token_locations(&beginning);
+    --(beginning.head->reference_count);
+    /* Sometimes we know what we'll read before we switch to the lexical block
+       where we'll read it.  In such cases the beginning will be placed in the
+       prior block and last exactly zero bytes there.  It's misleading to
+       include such ranges, so we gobble them. */
+    if (beginning.head->location.end_byte_index ==
+          beginning.beginning_byte_index &&
+        beginning.head->next)
+    {   beginning.head = beginning.head->next;
+        result.location = beginning.head->location;
+        result.location.beginning_byte_index = 0;
+        result.location.beginning_line_number = 1;
+        result.location.beginning_character_number = 1;
+    } else
+    {   result.location = beginning.head->location;
+        result.location.beginning_byte_index =
+            beginning.beginning_byte_index;
+        result.location.beginning_line_number =
+            beginning.beginning_line_number;
+        result.location.beginning_character_number =
+            beginning.beginning_character_number;
+    }
+
+    result.location.orig_file_index =
+        beginning.orig_file_index;
+    result.location.orig_beg_line_number =
+        beginning.orig_beg_line_number;
+    result.location.orig_beg_char_number =
+        beginning.orig_beg_char_number;
+    
+    result.next = beginning.head->next;
+    result.reference_count = 0;
+    return result;
+}
+
+/* ------------------------------------------------------------------------- */
+/*   In order to be able to put tokens back efficiently, the lexer stores    */
+/*   tokens in a "circle": the variable circle_position ranges between       */
+/*   0 and CIRCLE_SIZE-1.  We only need a circle size as large as the        */
+/*   maximum number of tokens ever put back at once, plus 1 (in effect, the  */
+/*   maximum token lookahead ever needed in syntax analysis, plus 1).        */
+/*                                                                           */
+/*   Unlike some compilers, Inform does not have a context-free lexer: in    */
+/*   fact it has 12288 different possible states.  However, the context only */
+/*   affects the interpretation of "identifiers": lexemes beginning with a   */
+/*   letter and containing up to 32 chars of alphanumeric and underscore     */
+/*   chars.  (For example, "default" may refer to the directive or statement */
+/*   of that name, and which token values are returned depends on the        */
+/*   current lexical context.)                                               */
+/*                                                                           */
+/*   Along with each token, we also store the lexical context it was         */
+/*   translated under; because if it is called for again, there may need     */
+/*   to be a fresh interpretation of it if the context has changed.          */
+/* ------------------------------------------------------------------------- */
+
+#define CIRCLE_SIZE 6
+
+/*   (The worst case for token lookahead is distinguishing between an
+     old-style "objectloop (a in b)" and a new "objectloop (a in b ...)".)   */
+
+static int circle_position;
+static token_data circle[CIRCLE_SIZE];
+
+static int token_contexts[CIRCLE_SIZE];
+
+/* ------------------------------------------------------------------------- */
+/*   A complication, however, is that the text of some lexemes needs to be   */
+/*   held in Inform's memory for much longer periods: for example, a         */
+/*   dictionary word lexeme (like "'south'") must have its text preserved    */
+/*   until the code generation time for the expression it occurs in, when    */
+/*   the dictionary reference is actually made.  Code generation in general  */
+/*   occurs as early as possible in Inform: pending some better method of    */
+/*   garbage collection, we simply use a buffer so large that unless         */
+/*   expressions spread across 10K of source code are found, there can be    */
+/*   no problem.                                                             */
+/* ------------------------------------------------------------------------- */
+
+static char *lexeme_memory;
+static char *lex_p;                     /* Current write position            */
+
+/* ------------------------------------------------------------------------- */
+/*   The lexer itself needs up to 3 characters of lookahead (it uses an      */
+/*   LR(3) grammar to translate characters into tokens).                     */
+/* ------------------------------------------------------------------------- */
+
+#define LOOKAHEAD_SIZE 3
+
+static int current, lookahead,          /* The latest character read, and    */
+    lookahead2, lookahead3;             /* the three characters following it */
+
+static int pipeline_made;               /* Whether or not the pipeline of
+                                           characters has been constructed
+                                           yet (this pass)                   */
+
+static int (* get_next_char)(void);     /* Routine for reading the stream of
+                                           characters: the lexer does not
+                                           need any "ungetc" routine for
+                                           putting them back again.  End of
+                                           stream is signalled by returning
+                                           zero.                             */
+
+static char *source_to_analyse;         /* The current lexical source:
+                                           NULL for "load from source files",
+                                           otherwise this points to a string
+                                           containing Inform code            */
+
+static int tokens_put_back;             /* Count of the number of backward
+                                           moves made from the last-read
+                                           token                             */
+
+extern void describe_token(token_data t)
+{
+    /*  Many of the token types are not set in this file, but later on in
+        Inform's higher stages (for example, in the expression evaluator);
+        but this routine describes them all.                                 */
+
+    printf("{ ");
+
+    switch(t.type)
+    {
+        /*  The following token types occur in lexer output:                 */
+
+        case SYMBOL_TT:          printf("symbol ");
+                                 describe_symbol(t.value);
+                                 break;
+        case NUMBER_TT:          printf("literal number %d", t.value);
+                                 break;
+        case DQ_TT:              printf("string \"%s\"", t.text);
+                                 break;
+        case SQ_TT:              printf("string '%s'", t.text);
+                                 break;
+        case SEP_TT:             printf("separator '%s'", t.text);
+                                 break;
+        case EOF_TT:             printf("end of file");
+                                 break;
+
+        case STATEMENT_TT:       printf("statement name '%s'", t.text);
+                                 break;
+        case SEGMENT_MARKER_TT:  printf("object segment marker '%s'", t.text);
+                                 break;
+        case DIRECTIVE_TT:       printf("directive name '%s'", t.text);
+                                 break;
+        case CND_TT:             printf("textual conditional '%s'", t.text);
+                                 break;
+        case OPCODE_NAME_TT:     printf("opcode name '%s'", t.text);
+                                 break;
+        case SYSFUN_TT:          printf("built-in function name '%s'", t.text);
+                                 break;
+        case LOCAL_VARIABLE_TT:  printf("local variable name '%s'", t.text);
+                                 break;
+        case MISC_KEYWORD_TT:    printf("statement keyword '%s'", t.text);
+                                 break;
+        case DIR_KEYWORD_TT:     printf("directive keyword '%s'", t.text);
+                                 break;
+        case TRACE_KEYWORD_TT:   printf("'trace' keyword '%s'", t.text);
+                                 break;
+        case SYSTEM_CONSTANT_TT: printf("system constant name '%s'", t.text);
+                                 break;
+
+        /*  The remaining are etoken types, not set by the lexer             */
+
+        case OP_TT:              printf("operator '%s'",
+                                     operators[t.value].description);
+                                 break;
+        case ENDEXP_TT:          printf("end of expression");
+                                 break;
+        case SUBOPEN_TT:         printf("open bracket");
+                                 break;
+        case SUBCLOSE_TT:        printf("close bracket");
+                                 break;
+        case LARGE_NUMBER_TT:    printf("large number: '%s'=%d",t.text,t.value);
+                                 break;
+        case SMALL_NUMBER_TT:    printf("small number: '%s'=%d",t.text,t.value);
+                                 break;
+        case VARIABLE_TT:        printf("variable '%s'=%d", t.text, t.value);
+                                 break;
+        case DICTWORD_TT:        printf("dictionary word '%s'", t.text);
+                                 break;
+        case ACTION_TT:          printf("action name '%s'", t.text);
+                                 break;
+
+        default:
+            printf("** unknown token type %d, text='%s', value=%d **",
+            t.type, t.text, t.value);
+    }
+    printf(" }");
+}
+
+/* ------------------------------------------------------------------------- */
+/*   All but one of the 280 Inform keywords (118 of them opcode names used   */
+/*   only by the assembler).  (The one left over is "sp", a keyword used in  */
+/*   assembly language only.)                                                */
+/*                                                                           */
+/*   A "keyword group" is a set of keywords to be searched for.  If a match  */
+/*   is made on an identifier, the token type becomes that given in the KG   */
+/*   and the token value is its index in the KG.                             */
+/*                                                                           */
+/*   The keyword ordering must correspond with the appropriate #define's in  */
+/*   "header.h" but is otherwise not significant.                            */
+/* ------------------------------------------------------------------------- */
+
+#define MAX_KEYWORDS 350
+
+/* The values will be filled in at compile time, when we know
+   which opcode set to use. */
+keyword_group opcode_names =
+{ { "" },
+    OPCODE_NAME_TT, FALSE, TRUE
+};
+
+static char *opcode_list_z[] = {
+    "je", "jl", "jg", "dec_chk", "inc_chk", "jin", "test", "or", "and",
+    "test_attr", "set_attr", "clear_attr", "store", "insert_obj", "loadw",
+    "loadb", "get_prop", "get_prop_addr", "get_next_prop", "add", "sub",
+    "mul", "div", "mod", "call", "storew", "storeb", "put_prop", "sread",
+    "print_char", "print_num", "random", "push", "pull", "split_window",
+    "set_window", "output_stream", "input_stream", "sound_effect", "jz",
+    "get_sibling", "get_child", "get_parent", "get_prop_len", "inc", "dec",
+    "print_addr", "remove_obj", "print_obj", "ret", "jump", "print_paddr",
+    "load", "not", "rtrue", "rfalse", "print", "print_ret", "nop", "save",
+    "restore", "restart", "ret_popped", "pop", "quit", "new_line",
+    "show_status", "verify", "call_2s", "call_vs", "aread", "call_vs2",
+    "erase_window", "erase_line", "set_cursor", "get_cursor",
+    "set_text_style", "buffer_mode", "read_char", "scan_table", "call_1s",
+    "call_2n", "set_colour", "throw", "call_vn", "call_vn2", "tokenise",
+    "encode_text", "copy_table", "print_table", "check_arg_count", "call_1n",
+    "catch", "piracy", "log_shift", "art_shift", "set_font", "save_undo",
+    "restore_undo", "draw_picture", "picture_data", "erase_picture",
+    "set_margins", "move_window", "window_size", "window_style",
+    "get_wind_prop", "scroll_window", "pop_stack", "read_mouse",
+    "mouse_window", "push_stack", "put_wind_prop", "print_form",
+    "make_menu", "picture_table", "print_unicode", "check_unicode",
+    ""
+};
+
+static char *opcode_list_g[] = {
+    "nop", "add", "sub", "mul", "div", "mod", "neg", "bitand", "bitor",
+    "bitxor", "bitnot", "shiftl", "sshiftr", "ushiftr", "jump", "jz",
+    "jnz", "jeq", "jne", "jlt", "jge", "jgt", "jle", 
+    "jltu", "jgeu", "jgtu", "jleu", 
+    "call", "return",
+    "catch", "throw", "tailcall", 
+    "copy", "copys", "copyb", "sexs", "sexb", "aload",
+    "aloads", "aloadb", "aloadbit", "astore", "astores", "astoreb",
+    "astorebit", "stkcount", "stkpeek", "stkswap", "stkroll", "stkcopy",
+    "streamchar", "streamnum", "streamstr", 
+    "gestalt", "debugtrap", "getmemsize", "setmemsize", "jumpabs",
+    "random", "setrandom", "quit", "verify",
+    "restart", "save", "restore", "saveundo", "restoreundo", "protect",
+    "glk", "getstringtbl", "setstringtbl", "getiosys", "setiosys",
+    "linearsearch", "binarysearch", "linkedsearch",
+    "callf", "callfi", "callfii", "callfiii", 
+    "streamunichar",
+    "mzero", "mcopy", "malloc", "mfree",
+    "accelfunc", "accelparam",
+    "numtof", "ftonumz", "ftonumn", "ceil", "floor",
+    "fadd", "fsub", "fmul", "fdiv", "fmod",
+    "sqrt", "exp", "log", "pow",
+    "sin", "cos", "tan", "asin", "acos", "atan", "atan2",
+    "jfeq", "jfne", "jflt", "jfle", "jfgt", "jfge", "jisnan", "jisinf",
+    ""
+};
+
+keyword_group opcode_macros =
+{ { "" },
+  OPCODE_MACRO_TT, FALSE, TRUE
+};
+
+static char *opmacro_list_z[] = { "" };
+
+static char *opmacro_list_g[] = {
+    "pull", "push",
+    ""
+};
+
+keyword_group directives =
+{ { "abbreviate", "array", "attribute", "class", "constant",
+    "default", "dictionary", "end", "endif", "extend", "fake_action",
+    "global", "ifdef", "ifndef", "ifnot", "ifv3", "ifv5", "iftrue",
+    "iffalse", "import", "include", "link", "lowstring", "message",
+    "nearby", "object", "origsource", "property", "release", "replace",
+    "serial", "switches", "statusline", "stub", "system_file", "trace",
+    "undef", "verb", "version", "zcharacter",
+    "" },
+    DIRECTIVE_TT, FALSE, FALSE
+};
+
+keyword_group trace_keywords =
+{ { "dictionary", "symbols", "objects", "verbs",
+    "assembly", "expressions", "lines", "tokens", "linker",
+    "on", "off", "" },
+    TRACE_KEYWORD_TT, FALSE, TRUE
+};
+
+keyword_group segment_markers =
+{ { "class", "has", "private", "with", "" },
+    SEGMENT_MARKER_TT, FALSE, TRUE
+};
+
+keyword_group directive_keywords =
+{ { "alias", "long", "additive",
+    "score", "time",
+    "noun", "held", "multi", "multiheld", "multiexcept",
+    "multiinside", "creature", "special", "number", "scope", "topic",
+    "reverse", "meta", "only", "replace", "first", "last",
+    "string", "table", "buffer", "data", "initial", "initstr",
+    "with", "private", "has", "class",
+    "error", "fatalerror", "warning",
+    "terminating",
+    "" },
+    DIR_KEYWORD_TT, FALSE, TRUE
+};
+
+keyword_group misc_keywords =
+{ { "char", "name", "the", "a", "an", "The", "number",
+    "roman", "reverse", "bold", "underline", "fixed", "on", "off",
+    "to", "address", "string", "object", "near", "from", "property", "A", "" },
+    MISC_KEYWORD_TT, FALSE, TRUE
+};
+
+keyword_group statements =
+{ { "box", "break", "continue", "default", "do", "else", "font", "for",
+    "give", "if", "inversion", "jump", "move", "new_line", "objectloop",
+    "print", "print_ret", "quit", "read", "remove", "restore", "return",
+    "rfalse", "rtrue", "save", "spaces", "string", "style", "switch",
+    "until", "while", "" },
+    STATEMENT_TT, FALSE, TRUE
+};
+
+keyword_group conditions =
+{ { "has", "hasnt", "in", "notin", "ofclass", "or", "provides", "" },
+    CND_TT, FALSE, TRUE
+};
+
+keyword_group system_functions =
+{ { "child", "children", "elder", "eldest", "indirect", "parent", "random",
+    "sibling", "younger", "youngest", "metaclass", "glk", "" },
+    SYSFUN_TT, FALSE, TRUE
+};
+
+keyword_group system_constants =
+{ { "adjectives_table", "actions_table", "classes_table",
+    "identifiers_table", "preactions_table", "version_number",
+    "largest_object", "strings_offset", "code_offset",
+    "dict_par1", "dict_par2", "dict_par3", "actual_largest_object",
+    "static_memory_offset", "array_names_offset", "readable_memory_offset",
+    "cpv__start", "cpv__end", "ipv__start", "ipv__end",
+    "array__start", "array__end",
+    "lowest_attribute_number", "highest_attribute_number",
+    "attribute_names_array",
+    "lowest_property_number", "highest_property_number",
+    "property_names_array",
+    "lowest_action_number", "highest_action_number",
+    "action_names_array",
+    "lowest_fake_action_number", "highest_fake_action_number",
+    "fake_action_names_array",
+    "lowest_routine_number", "highest_routine_number", "routines_array",
+    "routine_names_array", "routine_flags_array",
+    "lowest_global_number", "highest_global_number", "globals_array",
+    "global_names_array", "global_flags_array",
+    "lowest_array_number", "highest_array_number", "arrays_array",
+    "array_names_array", "array_flags_array",
+    "lowest_constant_number", "highest_constant_number", "constants_array",
+    "constant_names_array",
+    "lowest_class_number", "highest_class_number", "class_objects_array",
+    "lowest_object_number", "highest_object_number",
+    "oddeven_packing",
+    "grammar_table", "dictionary_table", "dynam_string_table",
+    "" },
+    SYSTEM_CONSTANT_TT, FALSE, TRUE
+};
+
+keyword_group *keyword_groups[12]
+= { NULL, &opcode_names, &directives, &trace_keywords, &segment_markers,
+    &directive_keywords, &misc_keywords, &statements, &conditions,
+    &system_functions, &system_constants, &opcode_macros};
+
+keyword_group local_variables =
+{ { "" },                                 /* Filled in when routine declared */
+    LOCAL_VARIABLE_TT, FALSE, FALSE
+};
+
+static int lexical_context(void)
+{
+    /*  The lexical context is a number representing all of the context
+        information in the lexical analyser: the same input text will
+        always translate to the same output tokens whenever the context
+        is the same.
+
+        In fact, for efficiency reasons this number omits the bit of
+        information held in the variable "dont_enter_into_symbol_table".
+        Inform never needs to backtrack through tokens parsed in that
+        way (thankfully, as it would be expensive indeed to check
+        the tokens).                                                         */
+
+    int c = 0;
+    if (opcode_names.enabled)         c |= 1;
+    if (directives.enabled)           c |= 2;
+    if (trace_keywords.enabled)       c |= 4;
+    if (segment_markers.enabled)      c |= 8;
+    if (directive_keywords.enabled)   c |= 16;
+    if (misc_keywords.enabled)        c |= 32;
+    if (statements.enabled)           c |= 64;
+    if (conditions.enabled)           c |= 128;
+    if (system_functions.enabled)     c |= 256;
+    if (system_constants.enabled)     c |= 512;
+    if (local_variables.enabled)      c |= 1024;
+
+    if (return_sp_as_variable)        c |= 2048;
+    return(c);
+}
+
+static void print_context(int c)
+{
+    if ((c & 1) != 0) printf("OPC ");
+    if ((c & 2) != 0) printf("DIR ");
+    if ((c & 4) != 0) printf("TK ");
+    if ((c & 8) != 0) printf("SEG ");
+    if ((c & 16) != 0) printf("DK ");
+    if ((c & 32) != 0) printf("MK ");
+    if ((c & 64) != 0) printf("STA ");
+    if ((c & 128) != 0) printf("CND ");
+    if ((c & 256) != 0) printf("SFUN ");
+    if ((c & 512) != 0) printf("SCON ");
+    if ((c & 1024) != 0) printf("LV ");
+    if ((c & 2048) != 0) printf("sp ");
+}
+
+static int *keywords_hash_table;
+static int *keywords_hash_ends_table;
+static int *keywords_data_table;
+
+static int *local_variable_hash_table;
+static int *local_variable_hash_codes;
+char **local_variable_texts;
+static char *local_variable_text_table;
+
+static char one_letter_locals[128];
+
+static void make_keywords_tables(void)
+{   int i, j, h, tp=0;
+    char **oplist, **maclist;
+
+    if (!glulx_mode) {
+        oplist = opcode_list_z;
+        maclist = opmacro_list_z;
+    }
+    else {
+        oplist = opcode_list_g;
+        maclist = opmacro_list_g;
+    }
+
+    for (j=0; *(oplist[j]); j++) {
+        opcode_names.keywords[j] = oplist[j];
+    }
+    opcode_names.keywords[j] = "";
+    
+    for (j=0; *(maclist[j]); j++) {
+        opcode_macros.keywords[j] = maclist[j];
+    }
+    opcode_macros.keywords[j] = "";
+
+    for (i=0; i<HASH_TAB_SIZE; i++)
+    {   keywords_hash_table[i] = -1;
+        keywords_hash_ends_table[i] = -1;
+    }
+
+    for (i=1; i<=11; i++)
+    {   keyword_group *kg = keyword_groups[i];
+        for (j=0; *(kg->keywords[j]) != 0; j++)
+        {   h = hash_code_from_string(kg->keywords[j]);
+            if (keywords_hash_table[h] == -1)
+                keywords_hash_table[h] = tp;
+            else
+              *(keywords_data_table + 3*(keywords_hash_ends_table[h]) + 2) = tp;
+            keywords_hash_ends_table[h] = tp;
+            *(keywords_data_table + 3*tp) = i;
+            *(keywords_data_table + 3*tp+1) = j;
+            *(keywords_data_table + 3*tp+2) = -1;
+            tp++;
+        }
+    }
+}
+
+extern void construct_local_variable_tables(void)
+{   int i, h; char *p = local_variable_text_table;
+    for (i=0; i<HASH_TAB_SIZE; i++) local_variable_hash_table[i] = -1;
+    for (i=0; i<128; i++) one_letter_locals[i] = MAX_LOCAL_VARIABLES;
+
+    for (i=0; i<no_locals; i++)
+    {   char *q = local_variables.keywords[i];
+        if (q[1] == 0)
+        {   one_letter_locals[(uchar)q[0]] = i;
+            if (isupper(q[0])) one_letter_locals[tolower(q[0])] = i;
+            if (islower(q[0])) one_letter_locals[toupper(q[0])] = i;
+        }
+        h = hash_code_from_string(q);
+        if (local_variable_hash_table[h] == -1)
+            local_variable_hash_table[h] = i;
+        local_variable_hash_codes[i] = h;
+        local_variable_texts[i] = p;
+        strcpy(p, q);
+        p += strlen(p)+1;
+    }
+    for (;i<MAX_LOCAL_VARIABLES-1;i++) 
+      local_variable_texts[i] = "<no such local variable>";
+}
+
+static void interpret_identifier(int pos, int dirs_only_flag)
+{   int index, hashcode; char *p = circle[pos].text;
+
+    /*  An identifier is either a keyword or a "symbol", a name which the
+        lexical analyser leaves to higher levels of Inform to understand.    */
+
+    hashcode = hash_code_from_string(p);
+
+    if (dirs_only_flag) goto KeywordSearch;
+
+    /*  If this is assembly language, perhaps it is "sp"?                    */
+
+    if (return_sp_as_variable && (p[0]=='s') && (p[1]=='p') && (p[2]==0))
+    {   circle[pos].value = 0; circle[pos].type = LOCAL_VARIABLE_TT;
+        return;
+    }
+
+    /*  Test for local variables first, quite quickly.                       */
+
+    if (local_variables.enabled)
+    {   if (p[1]==0)
+        {   index = one_letter_locals[(uchar)p[0]];
+            if (index<MAX_LOCAL_VARIABLES)
+            {   circle[pos].type = LOCAL_VARIABLE_TT;
+                circle[pos].value = index+1;
+                return;
+            }
+        }
+        index = local_variable_hash_table[hashcode];
+        if (index >= 0)
+        {   for (;index<no_locals;index++)
+            {   if (hashcode == local_variable_hash_codes[index])
+                {   if (strcmpcis(p, local_variable_texts[index])==0)
+                    {   circle[pos].type = LOCAL_VARIABLE_TT;
+                        circle[pos].value = index+1;
+                        return;
+                    }
+                }
+            }
+        }
+    }
+
+    /*  Now the bulk of the keywords.  Note that the lexer doesn't recognise
+        the name of a system function which has been Replaced.               */
+
+    KeywordSearch:
+    index = keywords_hash_table[hashcode];
+    while (index >= 0)
+    {   int *i = keywords_data_table + 3*index;
+        keyword_group *kg = keyword_groups[*i];
+        if (((!dirs_only_flag) && (kg->enabled))
+            || (dirs_only_flag && (kg == &directives)))
+        {   char *q = kg->keywords[*(i+1)];
+            if (((kg->case_sensitive) && (strcmp(p, q)==0))
+                || ((!(kg->case_sensitive)) && (strcmpcis(p, q)==0)))
+            {   if ((kg != &system_functions)
+                    || (system_function_usage[*(i+1)]!=2))
+                {   circle[pos].type = kg->change_token_type;
+                    circle[pos].value = *(i+1);
+                    return;
+                }
+            }
+        }
+        index = *(i+2);
+    }
+
+    if (dirs_only_flag) return;
+
+    /*  Search for the name; create it if necessary.                         */
+
+    circle[pos].value = symbol_index(p, hashcode);
+    circle[pos].type = SYMBOL_TT;
+}
+
+
+/* ------------------------------------------------------------------------- */
+/*   The tokeniser grid aids a rapid decision about the consequences of a    */
+/*   character reached in the buffer.  In effect it is an efficiently stored */
+/*   transition table using an algorithm similar to that of S. C. Johnson's  */
+/*   "yacc" lexical analyser (see Aho, Sethi and Ullman, section 3.9).       */
+/*   My thanks to Dilip Sequeira for suggesting this.                        */
+/*                                                                           */
+/*       tokeniser_grid[c]   is (16*n + m) if c is the first character of    */
+/*                               separator numbers n, n+1, ..., n+m-1        */
+/*                           or certain special values (QUOTE_CODE, etc)     */
+/*                           or 0 otherwise                                  */
+/*                                                                           */
+/*   Since 1000/16 = 62, the code numbers below will need increasing if the  */
+/*   number of separators supported exceeds 61.                              */
+/* ------------------------------------------------------------------------- */
+
+static int tokeniser_grid[256];
+
+#define QUOTE_CODE      1000
+#define DQUOTE_CODE     1001
+#define NULL_CODE       1002
+#define SPACE_CODE      1003
+#define NEGATIVE_CODE   1004
+#define DIGIT_CODE      1005
+#define RADIX_CODE      1006
+#define KEYWORD_CODE    1007
+#define EOF_CODE        1008
+#define WHITESPACE_CODE 1009
+#define COMMENT_CODE    1010
+#define IDENTIFIER_CODE 1011
+
+/*  This list cannot safely be changed without also changing the header
+    separator #defines.  The ordering is significant in that (i) all entries
+    beginning with the same character must be adjacent and (ii) that if
+    X is a an initial substring of Y then X must come before Y.
+
+    E.g. --> must occur before -- to prevent "-->0" being tokenised
+    wrongly as "--", ">", "0" rather than "-->", "0".                        */
+
+static const char separators[NUMBER_SEPARATORS][4] =
+{   "->", "-->", "--", "-", "++", "+", "*", "/", "%",
+    "||", "|", "&&", "&", "~~",
+    "~=", "~", "==", "=", ">=", ">",
+    "<=", "<", "(", ")", ",",
+    ".&", ".#", "..&", "..#", "..", ".",
+    "::", ":", "@", ";", "[", "]", "{", "}",
+    "$", "?~", "?",
+    "#a$", "#g$", "#n$", "#r$", "#w$", "##", "#"
+};
+
+static void make_tokeniser_grid(void)
+{
+    /*  Construct the grid to the specification above.                       */
+
+    int i, j;
+
+    for (i=0; i<256; i++) tokeniser_grid[i]=0;
+
+    for (i=0; i<NUMBER_SEPARATORS; i++)
+    {   j=separators[i][0];
+        if (tokeniser_grid[j]==0)
+            tokeniser_grid[j]=i*16+1; else tokeniser_grid[j]++;
+    }
+    tokeniser_grid['\''] = QUOTE_CODE;
+    tokeniser_grid['\"'] = DQUOTE_CODE;
+    tokeniser_grid[0]    = EOF_CODE;
+    tokeniser_grid[' ']  = WHITESPACE_CODE;
+    tokeniser_grid['\n'] = WHITESPACE_CODE;
+    tokeniser_grid['$']  = RADIX_CODE;
+    tokeniser_grid['!']  = COMMENT_CODE;
+
+    tokeniser_grid['0']  = DIGIT_CODE;
+    tokeniser_grid['1']  = DIGIT_CODE;
+    tokeniser_grid['2']  = DIGIT_CODE;
+    tokeniser_grid['3']  = DIGIT_CODE;
+    tokeniser_grid['4']  = DIGIT_CODE;
+    tokeniser_grid['5']  = DIGIT_CODE;
+    tokeniser_grid['6']  = DIGIT_CODE;
+    tokeniser_grid['7']  = DIGIT_CODE;
+    tokeniser_grid['8']  = DIGIT_CODE;
+    tokeniser_grid['9']  = DIGIT_CODE;
+
+    tokeniser_grid['a']  = IDENTIFIER_CODE;
+    tokeniser_grid['b']  = IDENTIFIER_CODE;
+    tokeniser_grid['c']  = IDENTIFIER_CODE;
+    tokeniser_grid['d']  = IDENTIFIER_CODE;
+    tokeniser_grid['e']  = IDENTIFIER_CODE;
+    tokeniser_grid['f']  = IDENTIFIER_CODE;
+    tokeniser_grid['g']  = IDENTIFIER_CODE;
+    tokeniser_grid['h']  = IDENTIFIER_CODE;
+    tokeniser_grid['i']  = IDENTIFIER_CODE;
+    tokeniser_grid['j']  = IDENTIFIER_CODE;
+    tokeniser_grid['k']  = IDENTIFIER_CODE;
+    tokeniser_grid['l']  = IDENTIFIER_CODE;
+    tokeniser_grid['m']  = IDENTIFIER_CODE;
+    tokeniser_grid['n']  = IDENTIFIER_CODE;
+    tokeniser_grid['o']  = IDENTIFIER_CODE;
+    tokeniser_grid['p']  = IDENTIFIER_CODE;
+    tokeniser_grid['q']  = IDENTIFIER_CODE;
+    tokeniser_grid['r']  = IDENTIFIER_CODE;
+    tokeniser_grid['s']  = IDENTIFIER_CODE;
+    tokeniser_grid['t']  = IDENTIFIER_CODE;
+    tokeniser_grid['u']  = IDENTIFIER_CODE;
+    tokeniser_grid['v']  = IDENTIFIER_CODE;
+    tokeniser_grid['w']  = IDENTIFIER_CODE;
+    tokeniser_grid['x']  = IDENTIFIER_CODE;
+    tokeniser_grid['y']  = IDENTIFIER_CODE;
+    tokeniser_grid['z']  = IDENTIFIER_CODE;
+
+    tokeniser_grid['A']  = IDENTIFIER_CODE;
+    tokeniser_grid['B']  = IDENTIFIER_CODE;
+    tokeniser_grid['C']  = IDENTIFIER_CODE;
+    tokeniser_grid['D']  = IDENTIFIER_CODE;
+    tokeniser_grid['E']  = IDENTIFIER_CODE;
+    tokeniser_grid['F']  = IDENTIFIER_CODE;
+    tokeniser_grid['G']  = IDENTIFIER_CODE;
+    tokeniser_grid['H']  = IDENTIFIER_CODE;
+    tokeniser_grid['I']  = IDENTIFIER_CODE;
+    tokeniser_grid['J']  = IDENTIFIER_CODE;
+    tokeniser_grid['K']  = IDENTIFIER_CODE;
+    tokeniser_grid['L']  = IDENTIFIER_CODE;
+    tokeniser_grid['M']  = IDENTIFIER_CODE;
+    tokeniser_grid['N']  = IDENTIFIER_CODE;
+    tokeniser_grid['O']  = IDENTIFIER_CODE;
+    tokeniser_grid['P']  = IDENTIFIER_CODE;
+    tokeniser_grid['Q']  = IDENTIFIER_CODE;
+    tokeniser_grid['R']  = IDENTIFIER_CODE;
+    tokeniser_grid['S']  = IDENTIFIER_CODE;
+    tokeniser_grid['T']  = IDENTIFIER_CODE;
+    tokeniser_grid['U']  = IDENTIFIER_CODE;
+    tokeniser_grid['V']  = IDENTIFIER_CODE;
+    tokeniser_grid['W']  = IDENTIFIER_CODE;
+    tokeniser_grid['X']  = IDENTIFIER_CODE;
+    tokeniser_grid['Y']  = IDENTIFIER_CODE;
+    tokeniser_grid['Z']  = IDENTIFIER_CODE;
+
+    tokeniser_grid['_']  = IDENTIFIER_CODE;
+}
+
+/* ------------------------------------------------------------------------- */
+/*   Definition of a lexical block: a source file or a string containing     */
+/*   text for lexical analysis; an independent source from the point of      */
+/*   view of issuing error reports.                                          */
+/* ------------------------------------------------------------------------- */
+
+typedef struct LexicalBlock_s
+{   char *filename;                              /*  Full translated name    */
+    int   main_flag;                             /*  TRUE if the main file
+                                                     (the first one opened)  */
+    int   sys_flag;                              /*  TRUE if a System_File   */
+    int   source_line;                           /*  Line number count       */
+    int   line_start;                            /*  Char number within file
+                                                     where the current line
+                                                     starts                  */
+    int   chars_read;                            /*  Char number of read pos */
+    int   file_no;                               /*  Or 255 if not from a
+                                                     file; used for debug
+                                                     information             */
+    char *orig_source;                           /* From #origsource direct  */
+    int orig_file;
+    int32 orig_line;
+    int32 orig_char;
+} LexicalBlock;
+
+static LexicalBlock NoFileOpen =
+    {   "<before compilation>",  FALSE, FALSE, 0, 0, 0, 255, NULL, 0, 0, 0 };
+
+static LexicalBlock MakingOutput =
+    {   "<constructing output>", FALSE, FALSE, 0, 0, 0, 255, NULL, 0, 0, 0 };
+
+static LexicalBlock StringLB =
+    {   "<veneer routine>",      FALSE, TRUE,  0, 0, 0, 255, NULL, 0, 0, 0 };
+
+static LexicalBlock *CurrentLB;                  /*  The current lexical
+                                                     block of input text     */
+
+extern void declare_systemfile(void)
+{   CurrentLB->sys_flag = TRUE;
+}
+
+extern int is_systemfile(void)
+{   return ((CurrentLB->sys_flag)?1:0);
+}
+
+extern void set_origsource_location(char *source, int32 line, int32 charnum)
+{
+    if (!source) {
+        /* Clear the Origsource declaration. */
+        CurrentLB->orig_file = 0;
+        CurrentLB->orig_source = NULL;
+        CurrentLB->orig_line = 0;
+        CurrentLB->orig_char = 0;
+        return;
+    }
+
+    /* Get the file number for a new or existing InputFiles entry. */
+    int file_no = register_orig_sourcefile(source);
+
+    CurrentLB->orig_file = file_no;
+    CurrentLB->orig_source = InputFiles[file_no-1].filename;
+    CurrentLB->orig_line = line;
+    CurrentLB->orig_char = charnum;
+}
+
+/* Error locations. */
+
+extern debug_location get_current_debug_location(void)
+{   debug_location result;
+    /* Assume that all input characters are one byte. */
+    result.file_index = CurrentLB->file_no;
+    result.beginning_byte_index = CurrentLB->chars_read - LOOKAHEAD_SIZE;
+    result.end_byte_index = result.beginning_byte_index;
+    result.beginning_line_number = CurrentLB->source_line;
+    result.end_line_number = result.beginning_line_number;
+    result.beginning_character_number =
+        CurrentLB->chars_read - CurrentLB->line_start;
+    result.end_character_number = result.beginning_character_number;
+    result.orig_file_index = CurrentLB->orig_file;
+    result.orig_beg_line_number = CurrentLB->orig_line;
+    result.orig_beg_char_number = CurrentLB->orig_char;
+    return result;
+}
+
+static debug_location ErrorReport_debug_location;
+
+extern void report_errors_at_current_line(void)
+{   ErrorReport.line_number = CurrentLB->source_line;
+    ErrorReport.file_number = CurrentLB->file_no;
+    if (ErrorReport.file_number == 255)
+        ErrorReport.file_number = -1;
+    ErrorReport.source      = CurrentLB->filename;
+    ErrorReport.main_flag   = CurrentLB->main_flag;
+    if (debugfile_switch)
+        ErrorReport_debug_location = get_current_debug_location();
+    ErrorReport.orig_file = CurrentLB->orig_file;
+    ErrorReport.orig_source = CurrentLB->orig_source;
+    ErrorReport.orig_line = CurrentLB->orig_line;
+    ErrorReport.orig_char = CurrentLB->orig_char;
+}
+
+extern debug_location get_error_report_debug_location(void)
+{   return ErrorReport_debug_location;
+}
+
+extern int32 get_current_line_start(void)
+{   return CurrentLB->line_start;
+}
+
+brief_location blank_brief_location;
+
+extern brief_location get_brief_location(ErrorPosition *errpos)
+{
+    brief_location loc;
+    loc.file_index = errpos->file_number;
+    loc.line_number = errpos->line_number;
+    loc.orig_file_index = errpos->orig_file;
+    loc.orig_line_number = errpos->orig_line;
+    return loc;
+}
+
+extern void export_brief_location(brief_location loc, ErrorPosition *errpos)
+{
+    if (loc.file_index != -1)
+    {   errpos->file_number = loc.file_index;
+        errpos->line_number = loc.line_number;
+        errpos->main_flag = (errpos->file_number == 1);
+        errpos->orig_source = NULL;
+        errpos->orig_file = loc.orig_file_index;
+        errpos->orig_line = loc.orig_line_number;
+        errpos->orig_char = 0;
+    }
+}
+
+/* ------------------------------------------------------------------------- */
+/*   Hash printing and line counting                                         */
+/* ------------------------------------------------------------------------- */
+
+static void print_hash(void)
+{
+    /*  Hash-printing is the practice of printing a # character every 100
+        lines of source code (the -x switch), reassuring the user that
+        progress is being made                                               */
+
+    if (no_hash_printed_yet)
+    {   printf("::"); no_hash_printed_yet = FALSE;
+    }
+    printf("#"); hash_printed_since_newline = TRUE;
+
+#ifndef MAC_FACE
+    /*  On some systems, text output is buffered to a line at a time, and
+        this would frustrate the point of hash-printing, so:                 */
+
+    fflush(stdout);
+#endif
+}
+
+static void reached_new_line(void)
+{
+    /*  Called to signal that a new line has been reached in the source code */
+
+    forerrors_pointer = 0;
+
+    CurrentLB->source_line++;
+    CurrentLB->line_start = CurrentLB->chars_read;
+
+    total_source_line_count++;
+
+    if (total_source_line_count%100==0)
+    {   if (hash_switch) print_hash();
+#ifdef MAC_MPW
+        SpinCursor(32);                    /* I.e., allow other tasks to run */
+#endif
+    }
+
+#ifdef MAC_FACE
+    if (total_source_line_count%((**g_pm_hndl).linespercheck) == 0)
+    {   ProcessEvents (&g_proc);
+        if (g_proc != true)
+        {   free_arrays();
+            close_all_source();
+            if (temporary_files_switch)
+                remove_temp_files();
+            if (store_the_text)
+                my_free(&all_text,"transcription text");
+            abort_transcript_file();
+            longjmp (g_fallback, 1);
+        }
+    }
+#endif
+}
+
+static void new_syntax_line(void)
+{   if (source_to_analyse != NULL) forerrors_pointer = 0;
+    report_errors_at_current_line();
+}
+
+/* Return 10 raised to the expo power.
+ *
+ * I'm avoiding the standard pow() function for a rather lame reason:
+ * it's in the libmath (-lm) library, and I don't want to change the
+ * build model for the compiler. So, this is implemented with a stupid
+ * lookup table. It's faster than pow() for small values of expo.
+ * Probably not as fast if expo is 200, but "$+1e200" is an overflow
+ * anyway, so I don't expect that to be a problem.
+ *
+ * (For some reason, frexp() and ldexp(), which are used later on, do
+ * not require libmath to be linked in.)
+ */
+static double pow10_cheap(int expo)
+{
+    #define POW10_RANGE (8)
+    static double powers[POW10_RANGE*2+1] = {
+        0.00000001, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1,
+        1.0,
+        10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 10000000.0, 100000000.0
+    };
+
+    double res = 1.0;
+
+    if (expo < 0) {
+        for (; expo < -POW10_RANGE; expo += POW10_RANGE) {
+            res *= powers[0];
+        }
+        return res * powers[POW10_RANGE+expo];
+    }
+    else {
+        for (; expo > POW10_RANGE; expo -= POW10_RANGE) {
+            res *= powers[POW10_RANGE*2];
+        }
+        return res * powers[POW10_RANGE+expo];
+    }
+}
+
+/* Return the IEEE-754 single-precision encoding of a floating-point
+ * number. See http://www.psc.edu/general/software/packages/ieee/ieee.php
+ * for an explanation.
+ *
+ * The number is provided in the pieces it was parsed in:
+ *    [+|-] intv "." fracv "e" [+|-]expo
+ *
+ * If the magnitude is too large (beyond about 3.4e+38), this returns
+ * an infinite value (0x7f800000 or 0xff800000). If the magnitude is too
+ * small (below about 1e-45), this returns a zero value (0x00000000 or 
+ * 0x80000000). If any of the inputs are NaN, this returns NaN (but the
+ * lexer should never do that).
+ *
+ * Note that using a float constant does *not* set the uses_float_features
+ * flag (which would cause the game file to be labelled 3.1.2). There's
+ * no VM feature here, just an integer. Of course, any use of the float
+ * *opcodes* will set the flag.
+ *
+ * The math functions in this routine require #including <math.h>, but
+ * they should not require linking the math library (-lm). At least,
+ * they do not on OSX and Linux.
+ */
+static int32 construct_float(int signbit, double intv, double fracv, int expo)
+{
+    double absval = (intv + fracv) * pow10_cheap(expo);
+    int32 sign = (signbit ? 0x80000000 : 0x0);
+    double mant;
+    int32 fbits;
+    if (isinf(absval)) {
+        return sign | 0x7f800000; /* infinity */
+    }
+    if (isnan(absval)) {
+        return sign | 0x7fc00000;
+    }
+
+    mant = frexp(absval, &expo);
+
+    /* Normalize mantissa to be in the range [1.0, 2.0) */
+    if (0.5 <= mant && mant < 1.0) {
+        mant *= 2.0;
+        expo--;
+    }
+    else if (mant == 0.0) {
+        expo = 0;
+    }
+    else {
+        return sign | 0x7f800000; /* infinity */
+    }
+
+    if (expo >= 128) {
+        return sign | 0x7f800000; /* infinity */
+    }
+    else if (expo < -126) {
+        /* Denormalized (very small) number */
+        mant = ldexp(mant, 126 + expo);
+        expo = 0;
+    }
+    else if (!(expo == 0 && mant == 0.0)) {
+        expo += 127;
+        mant -= 1.0; /* Get rid of leading 1 */
+    }
+
+    mant *= 8388608.0; /* 2^23 */
+    fbits = (int32)(mant + 0.5); /* round mant to nearest int */
+    if (fbits >> 23) {
+        /* The carry propagated out of a string of 23 1 bits. */
+        fbits = 0;
+        expo++;
+        if (expo >= 255) {
+            return sign | 0x7f800000; /* infinity */
+        }
+    }
+
+    return (sign) | ((int32)(expo << 23)) | (fbits);
+}
+
+/* ------------------------------------------------------------------------- */
+/*   Characters are read via a "pipeline" of variables, allowing us to look  */
+/*       up to three characters ahead of the current position.               */
+/*                                                                           */
+/*   There are two possible sources: from the source files being loaded in,  */
+/*   and from a string inside Inform (which is where the code for veneer     */
+/*   routines comes from).  Each source has its own get-next-character       */
+/*   routine.                                                                */
+/* ------------------------------------------------------------------------- */
+/*   Source 1: from files                                                    */
+/*                                                                           */
+/*   Note that file_load_chars(p, size) loads "size" bytes into buffer "p"   */
+/*   from the current input file.  If the file runs out, then if it was      */
+/*   the last source file 4 EOF characters are placed in the buffer: if it   */
+/*   was only an Include file ending, then a '\n' character is placed there  */
+/*   (essentially to force termination of any comment line) followed by      */
+/*   three harmless spaces.                                                  */
+/*                                                                           */
+/*   The routine returns the number of characters it has written, and note   */
+/*   that this conveniently ensures that all characters in the buffer come   */
+/*   from the same file.                                                     */
+/* ------------------------------------------------------------------------- */
+
+#define SOURCE_BUFFER_SIZE 4096                  /*  Typical disc block size */
+
+typedef struct Sourcefile_s
+{   char *buffer;                                /*  Input buffer            */
+    int   read_pos;                              /*  Read position in buffer */
+    int   size;                                  /*  Number of meaningful
+                                                     characters in buffer    */
+    int   la, la2, la3;                          /*  Three characters of
+                                                     lookahead pipeline      */
+    int   file_no;                               /*  Internal file number
+                                                     (1, 2, 3, ...)          */
+    LexicalBlock LB;
+} Sourcefile;
+
+static Sourcefile *FileStack;
+static int File_sp;                              /*  Stack pointer           */
+
+static Sourcefile *CF;                           /*  Top entry on stack      */
+
+static int last_input_file;
+
+static void begin_buffering_file(int i, int file_no)
+{   int j, cnt; uchar *p;
+
+    if (i >= MAX_INCLUSION_DEPTH) 
+       memoryerror("MAX_INCLUSION_DEPTH",MAX_INCLUSION_DEPTH);
+
+    p = (uchar *) FileStack[i].buffer;
+
+    if (i>0)
+    {   FileStack[i-1].la  = lookahead;
+        FileStack[i-1].la2 = lookahead2;
+        FileStack[i-1].la3 = lookahead3;
+    }
+
+    FileStack[i].file_no = file_no;
+    FileStack[i].size = file_load_chars(file_no,
+        (char *) p, SOURCE_BUFFER_SIZE);
+    lookahead  = source_to_iso_grid[p[0]];
+    lookahead2 = source_to_iso_grid[p[1]];
+    lookahead3 = source_to_iso_grid[p[2]];
+    if (LOOKAHEAD_SIZE != 3)
+        compiler_error
+            ("Lexer lookahead size does not match hard-coded lookahead code");
+    FileStack[i].read_pos = LOOKAHEAD_SIZE;
+
+    if (file_no==1) FileStack[i].LB.main_flag = TRUE;
+               else FileStack[i].LB.main_flag = FALSE;
+    FileStack[i].LB.sys_flag = FALSE;
+    FileStack[i].LB.source_line = 1;
+    FileStack[i].LB.line_start = LOOKAHEAD_SIZE;
+    FileStack[i].LB.chars_read = LOOKAHEAD_SIZE;
+    FileStack[i].LB.filename = InputFiles[file_no-1].filename;
+    FileStack[i].LB.file_no = file_no;
+    FileStack[i].LB.orig_source = NULL; FileStack[i].LB.orig_file = 0; 
+    FileStack[i].LB.orig_line = 0; FileStack[i].LB.orig_char = 0;
+
+    CurrentLB = &(FileStack[i].LB);
+    CF = &(FileStack[i]);
+
+    /* Check for recursive inclusion */
+    cnt = 0;
+    for (j=0; j<i; j++)
+    {   if (!strcmp(FileStack[i].LB.filename, FileStack[j].LB.filename))
+            cnt++;
+    }
+    if (cnt==1)
+        warning_named("File included more than once",
+            FileStack[j].LB.filename);
+}
+
+static void create_char_pipeline(void)
+{
+    File_sp = 0;
+    begin_buffering_file(File_sp++, 1);
+    pipeline_made = TRUE;
+    last_input_file = current_input_file;
+}
+
+static int get_next_char_from_pipeline(void)
+{   uchar *p;
+
+    while (last_input_file < current_input_file)
+    {
+        /*  An "Include" file must have opened since the last character
+            was read. Perhaps more than one. We run forward through the
+            list and add them to the include stack. But we ignore
+            "Origsource" files (which were never actually opened for
+            reading). */
+
+        last_input_file++;
+        if (!InputFiles[last_input_file-1].is_input)
+            continue;
+
+        begin_buffering_file(File_sp++, last_input_file);
+    }
+    if (last_input_file != current_input_file)
+        compiler_error("last_input_file did not match after Include");
+
+    if (File_sp == 0)
+    {   lookahead  = 0; lookahead2 = 0; lookahead3 = 0; return 0;
+    }
+
+    if (CF->read_pos == CF->size)
+    {   CF->size =
+            file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
+        CF->read_pos = 0;
+    }
+    else
+    if (CF->read_pos == -(CF->size))
+    {   set_token_location(get_current_debug_location());
+        File_sp--;
+        if (File_sp == 0)
+        {   lookahead  = 0; lookahead2 = 0; lookahead3 = 0; return 0;
+        }
+        CF = &(FileStack[File_sp-1]);
+        CurrentLB = &(FileStack[File_sp-1].LB);
+        lookahead  = CF->la; lookahead2 = CF->la2; lookahead3 = CF->la3;
+        if (CF->read_pos == CF->size)
+        {   CF->size =
+                file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
+            CF->read_pos = 0;
+        }
+        set_token_location(get_current_debug_location());
+    }
+
+    p = (uchar *) (CF->buffer);
+
+    current = lookahead;
+    lookahead = lookahead2;
+    lookahead2 = lookahead3;
+    lookahead3 = source_to_iso_grid[p[CF->read_pos++]];
+
+    CurrentLB->chars_read++;
+    if (forerrors_pointer < 511)
+        forerrors_buff[forerrors_pointer++] = current;
+    if (current == '\n') reached_new_line();
+    return(current);
+}
+
+/* ------------------------------------------------------------------------- */
+/*   Source 2: from a string                                                 */
+/* ------------------------------------------------------------------------- */
+
+static int source_to_analyse_pointer;            /*  Current read position   */
+
+static int get_next_char_from_string(void)
+{   uchar *p = (uchar *) source_to_analyse + source_to_analyse_pointer++;
+    current = source_to_iso_grid[p[0]];
+
+    if (current == 0)    lookahead  = 0;
+                    else lookahead  = source_to_iso_grid[p[1]];
+    if (lookahead == 0)  lookahead2 = 0;
+                    else lookahead2 = source_to_iso_grid[p[2]];
+    if (lookahead2 == 0) lookahead3 = 0;
+                    else lookahead3 = source_to_iso_grid[p[3]];
+
+    CurrentLB->chars_read++;
+    if (forerrors_pointer < 511)
+        forerrors_buff[forerrors_pointer++] = current;
+    if (current == '\n') reached_new_line();
+    return(current);
+}
+
+/* ========================================================================= */
+/*   The interface between the lexer and Inform's higher levels:             */
+/*                                                                           */
+/*       put_token_back()            (effectively) move the read position    */
+/*                                       back by one token                   */
+/*                                                                           */
+/*       get_next_token()            copy the token at the current read      */
+/*                                       position into the triple            */
+/*                                   (token_type, token_value, token_text)   */
+/*                                       and move the read position forward  */
+/*                                       by one                              */
+/*                                                                           */
+/*       restart_lexer(source, name) if source is NULL, initialise the lexer */
+/*                                       to read from source files;          */
+/*                                   otherwise, to read from this string.    */
+/* ------------------------------------------------------------------------- */
+
+extern void put_token_back(void)
+{   tokens_put_back++;
+
+    if (tokens_trace_level > 0)
+    {   if (tokens_trace_level == 1) printf("<- ");
+        else printf("<-\n");
+    }
+
+    /*  The following error, of course, should never happen!                 */
+
+    if (tokens_put_back == CIRCLE_SIZE)
+    {   compiler_error("The lexical analyser has collapsed because of a wrong \
+assumption inside Inform");
+        tokens_put_back--;
+        return;
+    }
+}
+
+extern void get_next_token(void)
+{   int d, i, j, k, quoted_size, e, radix, context; int32 n; char *r;
+    int returning_a_put_back_token = TRUE;
+
+    context = lexical_context();
+
+    if (tokens_put_back > 0)
+    {   i = circle_position - tokens_put_back + 1;
+        if (i<0) i += CIRCLE_SIZE;
+        tokens_put_back--;
+        if (context != token_contexts[i])
+        {   j = circle[i].type;
+            if ((j==0) || ((j>=100) && (j<200)))
+                interpret_identifier(i, FALSE);
+        }
+        goto ReturnBack;
+    }
+    returning_a_put_back_token = FALSE;
+
+    if (circle_position == CIRCLE_SIZE-1) circle_position = 0;
+    else circle_position++;
+
+    if (lex_p > lexeme_memory + 4*MAX_QTEXT_SIZE)
+        lex_p = lexeme_memory;
+
+    circle[circle_position].text = lex_p;
+    circle[circle_position].value = 0;
+    *lex_p = 0;
+
+    StartTokenAgain:
+    d = (*get_next_char)();
+    e = tokeniser_grid[d];
+
+    if (next_token_begins_syntax_line)
+    {   if ((e != WHITESPACE_CODE) && (e != COMMENT_CODE))
+        {   new_syntax_line();
+            next_token_begins_syntax_line = FALSE;
+        }
+    }
+
+    circle[circle_position].location = get_current_debug_location();
+
+    switch(e)
+    {   case 0: char_error("Illegal character found in source:", d);
+            goto StartTokenAgain;
+
+        case WHITESPACE_CODE:
+            while (tokeniser_grid[lookahead] == WHITESPACE_CODE)
+                (*get_next_char)();
+            goto StartTokenAgain;
+
+        case COMMENT_CODE:
+            while ((lookahead != '\n') && (lookahead != 0))
+                (*get_next_char)();
+            goto StartTokenAgain;
+
+        case EOF_CODE:
+            circle[circle_position].type = EOF_TT;
+            strcpy(lex_p, "<end of file>");
+            lex_p += strlen(lex_p) + 1;
+            break;
+
+        case DIGIT_CODE:
+            radix = 10;
+            ReturnNumber:
+            n=0;
+            do
+            {   n = n*radix + character_digit_value[d];
+                *lex_p++ = d;
+            } while ((character_digit_value[lookahead] < radix)
+                     && (d = (*get_next_char)(), TRUE));
+
+            *lex_p++ = 0;
+            circle[circle_position].type = NUMBER_TT;
+            circle[circle_position].value = n;
+            break;
+
+            FloatNumber:
+            {   int expo=0; double intv=0, fracv=0;
+                int expocount=0, intcount=0, fraccount=0;
+                int signbit = (d == '-');
+                *lex_p++ = d;
+                while (character_digit_value[lookahead] < 10) {
+                    intv = 10.0*intv + character_digit_value[lookahead];
+                    intcount++;
+                    *lex_p++ = lookahead;
+                    (*get_next_char)();
+                }
+                if (lookahead == '.') {
+                    double fracpow = 1.0;
+                    *lex_p++ = lookahead;
+                    (*get_next_char)();
+                    while (character_digit_value[lookahead] < 10) {
+                        fracpow *= 0.1;
+                        fracv = fracv + fracpow*character_digit_value[lookahead];
+                        fraccount++;
+                        *lex_p++ = lookahead;
+                        (*get_next_char)();
+                    }
+                }
+                if (lookahead == 'e' || lookahead == 'E') {
+                    int exposign = 0;
+                    *lex_p++ = lookahead;
+                    (*get_next_char)();
+                    if (lookahead == '+' || lookahead == '-') {
+                        exposign = (lookahead == '-');
+                        *lex_p++ = lookahead;
+                        (*get_next_char)();
+                    }
+                    while (character_digit_value[lookahead] < 10) {
+                        expo = 10*expo + character_digit_value[lookahead];
+                        expocount++;
+                        *lex_p++ = lookahead;
+                        (*get_next_char)();
+                    }
+                    if (expocount == 0)
+                        error("Floating-point literal must have digits after the 'e'");
+                    if (exposign) { expo = -expo; }
+                }
+                if (intcount + fraccount == 0)
+                    error("Floating-point literal must have digits");
+                n = construct_float(signbit, intv, fracv, expo);
+            }
+            *lex_p++ = 0;
+            circle[circle_position].type = NUMBER_TT;
+            circle[circle_position].value = n;
+            if (!glulx_mode && dont_enter_into_symbol_table != -2) error("Floating-point literals are not available in Z-code");
+            break;
+
+        case RADIX_CODE:
+            radix = 16; d = (*get_next_char)();
+            if (d == '-' || d == '+') { goto FloatNumber; }
+            if (d == '$') { d = (*get_next_char)(); radix = 2; }
+            if (character_digit_value[d] >= radix)
+            {   if (radix == 2)
+                    error("Binary number expected after '$$'");
+                else
+                    error("Hexadecimal number expected after '$'");
+            }
+            goto ReturnNumber;
+
+        case QUOTE_CODE:     /* Single-quotes: scan a literal string */
+            quoted_size=0;
+            do
+            {   e = d; d = (*get_next_char)(); *lex_p++ = d;
+                if (quoted_size++==64)
+                {   error(
+                    "Too much text for one pair of quotations '...' to hold");
+                    *lex_p='\''; break;
+                }
+                if ((d == '\'') && (e != '@'))
+                {   if (quoted_size == 1)
+                    {   d = (*get_next_char)(); *lex_p++ = d;
+                        if (d != '\'')
+                            error("No text between quotation marks ''");
+                    }
+                    break;
+                }
+            } while (d != EOF);
+            if (d==EOF) ebf_error("'\''", "end of file");
+            *(lex_p-1) = 0;
+            circle[circle_position].type = SQ_TT;
+            break;
+
+        case DQUOTE_CODE:    /* Double-quotes: scan a literal string */
+            quoted_size=0;
+            do
+            {   d = (*get_next_char)(); *lex_p++ = d;
+                if (quoted_size++==MAX_QTEXT_SIZE)
+                {   memoryerror("MAX_QTEXT_SIZE", MAX_QTEXT_SIZE);
+                    break;
+                }
+                if (d == '\n')
+                {   lex_p--;
+                    while (*(lex_p-1) == ' ') lex_p--;
+                    if (*(lex_p-1) != '^') *lex_p++ = ' ';
+                    while ((lookahead != EOF) &&
+                          (tokeniser_grid[lookahead] == WHITESPACE_CODE))
+                    (*get_next_char)();
+                }
+                else if (d == '\\')
+                {   int newline_passed = FALSE;
+                    lex_p--;
+                    while ((lookahead != EOF) &&
+                          (tokeniser_grid[lookahead] == WHITESPACE_CODE))
+                        if ((d = (*get_next_char)()) == '\n')
+                            newline_passed = TRUE;
+                    if (!newline_passed)
+                    {   char chb[4];
+                        chb[0] = '\"'; chb[1] = lookahead;
+                        chb[2] = '\"'; chb[3] = 0;
+                        ebf_error("empty rest of line after '\\' in string",
+                            chb);
+                    }
+                }
+            }   while ((d != EOF) && (d!='\"'));
+            if (d==EOF) ebf_error("'\"'", "end of file");
+            *(lex_p-1) = 0;
+            circle[circle_position].type = DQ_TT;
+            break;
+
+        case IDENTIFIER_CODE:    /* Letter or underscore: an identifier */
+
+            *lex_p++ = d; n=1;
+            while ((n<=MAX_IDENTIFIER_LENGTH)
+                   && ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
+                   || (tokeniser_grid[lookahead] == DIGIT_CODE)))
+                n++, *lex_p++ = (*get_next_char)();
+
+            *lex_p++ = 0;
+
+            if (n > MAX_IDENTIFIER_LENGTH)
+            {   char bad_length[100];
+                sprintf(bad_length,
+                    "Name exceeds the maximum length of %d characters:",
+                         MAX_IDENTIFIER_LENGTH);
+                error_named(bad_length, circle[circle_position].text);
+                /* Trim token so that it doesn't violate
+                   MAX_IDENTIFIER_LENGTH during error recovery */
+                circle[circle_position].text[MAX_IDENTIFIER_LENGTH] = 0;
+            }
+
+            if (dont_enter_into_symbol_table)
+            {   circle[circle_position].type = DQ_TT;
+                circle[circle_position].value = 0;
+                if (dont_enter_into_symbol_table == -2)
+                    interpret_identifier(circle_position, TRUE);
+                break;
+            }
+
+            interpret_identifier(circle_position, FALSE);
+            break;
+
+        default:
+
+            /*  The character is initial to at least one of the separators  */
+
+            for (j=e>>4, k=j+(e&0x0f); j<k; j++)
+            {   r = (char *) separators[j];
+                if (r[1]==0)
+                {   *lex_p++=d; *lex_p++=0;
+                    goto SeparatorMatched;
+                }
+                else
+                if (r[2]==0)
+                {   if (*(r+1) == lookahead)
+                    {   *lex_p++=d;
+                        *lex_p++=(*get_next_char)();
+                        *lex_p++=0;
+                        goto SeparatorMatched;
+                    }
+                }
+                else
+                {   if ((*(r+1) == lookahead) && (*(r+2) == lookahead2))
+                    {   *lex_p++=d;
+                        *lex_p++=(*get_next_char)();
+                        *lex_p++=(*get_next_char)();
+                        *lex_p++=0;
+                        goto SeparatorMatched;
+                    }
+                }
+            }
+
+            /*  The following contingency never in fact arises with the
+                current set of separators, but might in future  */
+
+            *lex_p++ = d; *lex_p++ = lookahead; *lex_p++ = lookahead2;
+            *lex_p++ = 0;
+            error_named("Unrecognised combination in source:", lex_p);
+            goto StartTokenAgain;
+
+            SeparatorMatched:
+
+            circle[circle_position].type = SEP_TT;
+            circle[circle_position].value = j;
+            switch(j)
+            {   case SEMICOLON_SEP: break;
+                case HASHNDOLLAR_SEP:
+                case HASHWDOLLAR_SEP:
+                    if (tokeniser_grid[lookahead] == WHITESPACE_CODE)
+                    {   error_named("Character expected after",
+                            circle[circle_position].text);
+                        break;
+                    }
+                    lex_p--;
+                    *lex_p++ = (*get_next_char)();
+                    while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
+                           || (tokeniser_grid[lookahead] == DIGIT_CODE))
+                        *lex_p++ = (*get_next_char)();
+                    *lex_p++ = 0;
+                    break;
+                case HASHADOLLAR_SEP:
+                case HASHGDOLLAR_SEP:
+                case HASHRDOLLAR_SEP:
+                case HASHHASH_SEP:
+                    if (tokeniser_grid[lookahead] != IDENTIFIER_CODE)
+                    {   error_named("Alphabetic character expected after",
+                            circle[circle_position].text);
+                        break;
+                    }
+                    lex_p--;
+                    while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
+                           || (tokeniser_grid[lookahead] == DIGIT_CODE))
+                        *lex_p++ = (*get_next_char)();
+                    *lex_p++ = 0;
+                    break;
+            }
+            break;
+    }
+
+    i = circle_position;
+
+    ReturnBack:
+    token_value = circle[i].value;
+    token_type = circle[i].type;
+    token_text = circle[i].text;
+    if (!returning_a_put_back_token)
+    {   set_token_location(circle[i].location);
+    }
+    token_contexts[i] = context;
+
+    if (tokens_trace_level > 0)
+    {   if (tokens_trace_level == 1)
+            printf("'%s' ", circle[i].text);
+        else
+        {   printf("-> "); describe_token(circle[i]);
+            printf(" ");
+            if (tokens_trace_level > 2) print_context(token_contexts[i]);
+            printf("\n");
+        }
+    }
+}
+
+static char veneer_error_title[64];
+
+extern void restart_lexer(char *lexical_source, char *name)
+{   int i;
+    circle_position = 0;
+    for (i=0; i<CIRCLE_SIZE; i++)
+    {   circle[i].type = 0;
+        circle[i].value = 0;
+        circle[i].text = "(if this is ever visible, there is a bug)";
+        token_contexts[i] = 0;
+    }
+
+    lex_p = lexeme_memory;
+    tokens_put_back = 0;
+    forerrors_pointer = 0;
+    dont_enter_into_symbol_table = FALSE;
+    return_sp_as_variable = FALSE;
+    next_token_begins_syntax_line = TRUE;
+
+    source_to_analyse = lexical_source;
+
+    if (source_to_analyse == NULL)
+    {   get_next_char = get_next_char_from_pipeline;
+        if (!pipeline_made) create_char_pipeline();
+        forerrors_buff[0] = 0; forerrors_pointer = 0;
+    }
+    else
+    {   get_next_char = get_next_char_from_string;
+        source_to_analyse_pointer = 0;
+        CurrentLB = &StringLB;
+        sprintf(veneer_error_title, "<veneer routine '%s'>", name);
+        StringLB.filename = veneer_error_title;
+
+        CurrentLB->source_line = 1;
+        CurrentLB->line_start  = 0;
+        CurrentLB->chars_read  = 0;
+    }
+}
+
+/* ========================================================================= */
+/*   Data structure management routines                                      */
+/* ------------------------------------------------------------------------- */
+
+extern void init_lexer_vars(void)
+{
+    blank_brief_location.file_index = -1;
+    blank_brief_location.line_number = 0;
+    blank_brief_location.orig_file_index = 0;
+    blank_brief_location.orig_line_number = 0;
+}
+
+extern void lexer_begin_prepass(void)
+{   total_source_line_count = 0;
+    CurrentLB = &NoFileOpen;
+    report_errors_at_current_line();
+}
+
+extern void lexer_begin_pass(void)
+{   no_hash_printed_yet = TRUE;
+    hash_printed_since_newline = FALSE;
+
+    pipeline_made = FALSE;
+
+    restart_lexer(NULL, NULL);
+}
+
+extern void lexer_endpass(void)
+{   CurrentLB = &MakingOutput;
+    report_errors_at_current_line();
+}
+
+extern void lexer_allocate_arrays(void)
+{   int i;
+
+    FileStack = my_malloc(MAX_INCLUSION_DEPTH*sizeof(Sourcefile),
+        "filestack buffer");
+
+    for (i=0; i<MAX_INCLUSION_DEPTH; i++)
+    FileStack[i].buffer = my_malloc(SOURCE_BUFFER_SIZE+4, "source file buffer");
+
+    lexeme_memory = my_malloc(5*MAX_QTEXT_SIZE, "lexeme memory");
+
+    keywords_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
+        "keyword hash table");
+    keywords_hash_ends_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
+        "keyword hash end table");
+    keywords_data_table = my_calloc(sizeof(int), 3*MAX_KEYWORDS,
+        "keyword hashing linked list");
+    local_variable_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
+        "local variable hash table");
+    local_variable_text_table = my_malloc(
+        (MAX_LOCAL_VARIABLES-1)*(MAX_IDENTIFIER_LENGTH+1),
+        "text of local variable names");
+
+    local_variable_hash_codes = my_calloc(sizeof(int), MAX_LOCAL_VARIABLES,
+        "local variable hash codes");
+    local_variable_texts = my_calloc(sizeof(char *), MAX_LOCAL_VARIABLES,
+        "local variable text pointers");
+
+    make_tokeniser_grid();
+    make_keywords_tables();
+
+    first_token_locations =
+        my_malloc(sizeof(debug_locations), "debug locations of recent tokens");
+    first_token_locations->location.file_index = 0;
+    first_token_locations->location.beginning_byte_index = 0;
+    first_token_locations->location.end_byte_index = 0;
+    first_token_locations->location.beginning_line_number = 0;
+    first_token_locations->location.end_line_number = 0;
+    first_token_locations->location.beginning_character_number = 0;
+    first_token_locations->location.end_character_number = 0;
+    first_token_locations->location.orig_file_index = 0;
+    first_token_locations->location.orig_beg_line_number = 0;
+    first_token_locations->location.orig_beg_char_number = 0;
+    first_token_locations->next = NULL;
+    first_token_locations->reference_count = 0;
+    last_token_location = first_token_locations;
+}
+
+extern void lexer_free_arrays(void)
+{   int i; char *p;
+
+    for (i=0; i<MAX_INCLUSION_DEPTH; i++)
+    {   p = FileStack[i].buffer;
+        my_free(&p, "source file buffer");
+    }
+    my_free(&FileStack, "filestack buffer");
+    my_free(&lexeme_memory, "lexeme memory");
+
+    my_free(&keywords_hash_table, "keyword hash table");
+    my_free(&keywords_hash_ends_table, "keyword hash end table");
+    my_free(&keywords_data_table, "keyword hashing linked list");
+    my_free(&local_variable_hash_table, "local variable hash table");
+    my_free(&local_variable_text_table, "text of local variable names");
+
+    my_free(&local_variable_hash_codes, "local variable hash codes");
+    my_free(&local_variable_texts, "local variable text pointers");
+
+    cleanup_token_locations(NULL);
+}
+
+/* ========================================================================= */