X-Git-Url: https://jxself.org/git/?a=blobdiff_plain;f=src%2Flexer.c;h=58841268af2ebfd495b5d6f86ea75573a5041081;hb=8e63120c630c94c598d4e2d6ba823dac59bce8fa;hp=f79a6e6118c823aed23552c54b77431786ca41c0;hpb=d11f2f726ed7feea617476d99cf7505ddd9a27ce;p=inform.git diff --git a/src/lexer.c b/src/lexer.c index f79a6e6..5884126 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,8 +1,8 @@ /* ------------------------------------------------------------------------- */ /* "lexer" : Lexical analyser */ /* */ -/* Part of Inform 6.35 */ -/* copyright (c) Graham Nelson 1993 - 2021 */ +/* Part of Inform 6.40 */ +/* copyright (c) Graham Nelson 1993 - 2022 */ /* */ /* Inform is free software: you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ @@ -15,7 +15,7 @@ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ -/* along with Inform. If not, see https://gnu.org/licenses/ * +/* along with Inform. If not, see https://gnu.org/licenses/ */ /* */ /* ------------------------------------------------------------------------- */ @@ -45,7 +45,7 @@ int next_token_begins_syntax_line; /* When TRUE, start a new syntax int32 last_mapped_line; /* Last syntax line reported to debugging file */ /* ------------------------------------------------------------------------- */ -/* The lexer's output is a sequence of triples, each called a "token", */ +/* The lexer's output is a sequence of structs, each called a "token", */ /* representing one lexical unit (or "lexeme") each. Instead of providing */ /* "lookahead" (that is, always having available the next token after the */ /* current one, so that syntax analysers higher up in Inform can have */ @@ -59,6 +59,8 @@ int32 last_mapped_line; /* Last syntax line reported to debugging file */ /* ------------------------------------------------------------------------- */ /* These three variables are set to the current token on a call to */ /* get_next_token() (but are not changed by a call to put_token_back()). */ +/* (It would be tidier to use a token_data structure, rather than having */ +/* get_next_token() unpack three values. But this is the way it is.) */ /* ------------------------------------------------------------------------- */ int token_type; @@ -226,6 +228,11 @@ extern debug_locations get_token_location_end /* maximum number of tokens ever put back at once, plus 1 (in effect, the */ /* maximum token lookahead ever needed in syntax analysis, plus 1). */ /* */ +/* Note that the circle struct type is lexeme_data, whereas the expression */ +/* code all works in token_data. They have slightly different needs. The */ +/* data is exported through the token_text, token_value, token_type */ +/* globals, so there's no need to use the same struct at both levels. */ +/* */ /* Unlike some compilers, Inform does not have a context-free lexer: in */ /* fact it has 12288 different possible states. However, the context only */ /* affects the interpretation of "identifiers": lexemes beginning with a */ @@ -245,24 +252,36 @@ extern debug_locations get_token_location_end old-style "objectloop (a in b)" and a new "objectloop (a in b ...)".) */ static int circle_position; -static token_data circle[CIRCLE_SIZE]; - -static int token_contexts[CIRCLE_SIZE]; +static lexeme_data circle[CIRCLE_SIZE]; /* ------------------------------------------------------------------------- */ /* A complication, however, is that the text of some lexemes needs to be */ /* held in Inform's memory for much longer periods: for example, a */ /* dictionary word lexeme (like "'south'") must have its text preserved */ /* until the code generation time for the expression it occurs in, when */ -/* the dictionary reference is actually made. Code generation in general */ -/* occurs as early as possible in Inform: pending some better method of */ -/* garbage collection, we simply use a buffer so large that unless */ -/* expressions spread across 10K of source code are found, there can be */ -/* no problem. */ +/* the dictionary reference is actually made. We handle this by keeping */ +/* all lexeme text until the end of the statement (or, for top-level */ +/* directives, until the end of the directive). Then we call */ +/* release_token_texts() to start over. The lextexts array will therefore */ +/* grow to the largest number of lexemes in a single statement or */ +/* directive. */ /* ------------------------------------------------------------------------- */ -static char *lexeme_memory; -static char *lex_p; /* Current write position */ +typedef struct lextext_s { + char *text; + size_t size; /* Allocated size (including terminal null) + This is always at least MAX_IDENTIFIER_LENGTH+1 */ +} lextext; + +static lextext *lextexts; /* Allocated to no_lextexts */ +static memory_list lextexts_memlist; +static int no_lextexts; + +static int cur_lextexts; /* Number of lextexts in current use + (cur_lextexts <= no_lextexts) */ + +static int lex_index; /* Index of lextext being written to */ +static int lex_pos; /* Current write position in that lextext */ /* ------------------------------------------------------------------------- */ /* The lexer itself needs up to 3 characters of lookahead (it uses an */ @@ -294,7 +313,10 @@ static int tokens_put_back; /* Count of the number of backward moves made from the last-read token */ -extern void describe_token(token_data t) +/* This gets called for both token_data and lexeme_data structs. It prints + a description of the common part (the text, value, type fields). +*/ +extern void describe_token_triple(const char *text, int32 value, int type) { /* Many of the token types are not set in this file, but later on in Inform's higher stages (for example, in the expression evaluator); @@ -302,51 +324,51 @@ extern void describe_token(token_data t) printf("{ "); - switch(t.type) + switch(type) { /* The following token types occur in lexer output: */ case SYMBOL_TT: printf("symbol "); - describe_symbol(t.value); + describe_symbol(value); break; - case NUMBER_TT: printf("literal number %d", t.value); + case NUMBER_TT: printf("literal number %d", value); break; - case DQ_TT: printf("string \"%s\"", t.text); + case DQ_TT: printf("string \"%s\"", text); break; - case SQ_TT: printf("string '%s'", t.text); + case SQ_TT: printf("string '%s'", text); break; - case SEP_TT: printf("separator '%s'", t.text); + case SEP_TT: printf("separator '%s'", text); break; case EOF_TT: printf("end of file"); break; - case STATEMENT_TT: printf("statement name '%s'", t.text); + case STATEMENT_TT: printf("statement name '%s'", text); break; - case SEGMENT_MARKER_TT: printf("object segment marker '%s'", t.text); + case SEGMENT_MARKER_TT: printf("object segment marker '%s'", text); break; - case DIRECTIVE_TT: printf("directive name '%s'", t.text); + case DIRECTIVE_TT: printf("directive name '%s'", text); break; - case CND_TT: printf("textual conditional '%s'", t.text); + case CND_TT: printf("textual conditional '%s'", text); break; - case OPCODE_NAME_TT: printf("opcode name '%s'", t.text); + case OPCODE_NAME_TT: printf("opcode name '%s'", text); break; - case SYSFUN_TT: printf("built-in function name '%s'", t.text); + case SYSFUN_TT: printf("built-in function name '%s'", text); break; - case LOCAL_VARIABLE_TT: printf("local variable name '%s'", t.text); + case LOCAL_VARIABLE_TT: printf("local variable name '%s'", text); break; - case MISC_KEYWORD_TT: printf("statement keyword '%s'", t.text); + case MISC_KEYWORD_TT: printf("statement keyword '%s'", text); break; - case DIR_KEYWORD_TT: printf("directive keyword '%s'", t.text); + case DIR_KEYWORD_TT: printf("directive keyword '%s'", text); break; - case TRACE_KEYWORD_TT: printf("'trace' keyword '%s'", t.text); + case TRACE_KEYWORD_TT: printf("'trace' keyword '%s'", text); break; - case SYSTEM_CONSTANT_TT: printf("system constant name '%s'", t.text); + case SYSTEM_CONSTANT_TT: printf("system constant name '%s'", text); break; /* The remaining are etoken types, not set by the lexer */ case OP_TT: printf("operator '%s'", - operators[t.value].description); + operators[value].description); break; case ENDEXP_TT: printf("end of expression"); break; @@ -354,26 +376,26 @@ extern void describe_token(token_data t) break; case SUBCLOSE_TT: printf("close bracket"); break; - case LARGE_NUMBER_TT: printf("large number: '%s'=%d",t.text,t.value); + case LARGE_NUMBER_TT: printf("large number: '%s'=%d",text,value); break; - case SMALL_NUMBER_TT: printf("small number: '%s'=%d",t.text,t.value); + case SMALL_NUMBER_TT: printf("small number: '%s'=%d",text,value); break; - case VARIABLE_TT: printf("variable '%s'=%d", t.text, t.value); + case VARIABLE_TT: printf("variable '%s'=%d", text, value); break; - case DICTWORD_TT: printf("dictionary word '%s'", t.text); + case DICTWORD_TT: printf("dictionary word '%s'", text); break; - case ACTION_TT: printf("action name '%s'", t.text); + case ACTION_TT: printf("action name '%s'", text); break; default: printf("** unknown token type %d, text='%s', value=%d **", - t.type, t.text, t.value); + type, text, value); } printf(" }"); } /* ------------------------------------------------------------------------- */ -/* All but one of the 280 Inform keywords (118 of them opcode names used */ +/* All but one of the Inform keywords (most of them opcode names used */ /* only by the assembler). (The one left over is "sp", a keyword used in */ /* assembly language only.) */ /* */ @@ -385,7 +407,9 @@ extern void describe_token(token_data t) /* "header.h" but is otherwise not significant. */ /* ------------------------------------------------------------------------- */ -#define MAX_KEYWORDS 350 +/* This must exceed the total number of keywords across all groups, + including opcodes. */ +#define MAX_KEYWORDS (350) /* The values will be filled in at compile time, when we know which opcode set to use. */ @@ -444,6 +468,7 @@ static char *opcode_list_g[] = { "sqrt", "exp", "log", "pow", "sin", "cos", "tan", "asin", "acos", "atan", "atan2", "jfeq", "jfne", "jflt", "jfle", "jfgt", "jfge", "jisnan", "jisinf", + "hasundo", "discardundo", "" }; @@ -492,7 +517,7 @@ keyword_group directive_keywords = "string", "table", "buffer", "data", "initial", "initstr", "with", "private", "has", "class", "error", "fatalerror", "warning", - "terminating", "static", + "terminating", "static", "individual", "" }, DIR_KEYWORD_TT, FALSE, TRUE }; @@ -561,8 +586,10 @@ keyword_group *keyword_groups[12] &directive_keywords, &misc_keywords, &statements, &conditions, &system_functions, &system_constants, &opcode_macros}; +/* These keywords are set to point to local_variable_names entries when + a routine header is parsed. See construct_local_variable_tables(). */ keyword_group local_variables = -{ { "" }, /* Filled in when routine declared */ +{ { "" }, LOCAL_VARIABLE_TT, FALSE, FALSE }; @@ -618,8 +645,21 @@ static int *keywords_data_table; static int *local_variable_hash_table; static int *local_variable_hash_codes; -char **local_variable_texts; -static char *local_variable_text_table; + +/* Note that MAX_LOCAL_VARIABLES is the maximum number of local variables + for this VM, *including* "sp" (the stack pointer "local"). + This used to be a memory setting. Now it is a constant: 16 for Z-code, + 119 for Glulx. +*/ + +/* Names of local variables in the current routine. + This is allocated to MAX_LOCAL_VARIABLES-1. (The stack pointer "local" + is not included in this array.) + + (This could be a memlist, growing as needed up to MAX_LOCAL_VARIABLES-1. + But right now we just allocate the max.) + */ +identstruct *local_variable_names; static char one_letter_locals[128]; @@ -637,11 +677,21 @@ static void make_keywords_tables(void) } for (j=0; *(oplist[j]); j++) { + if (j >= MAX_KEYWORD_GROUP_SIZE) { + /* Gotta increase MAX_KEYWORD_GROUP_SIZE */ + compiler_error("opcode_list has overflowed opcode_names.keywords"); + break; + } opcode_names.keywords[j] = oplist[j]; } opcode_names.keywords[j] = ""; for (j=0; *(maclist[j]); j++) { + if (j >= MAX_KEYWORD_GROUP_SIZE) { + /* Gotta increase MAX_KEYWORD_GROUP_SIZE */ + compiler_error("opmacro_list has overflowed opcode_macros.keywords"); + break; + } opcode_macros.keywords[j] = maclist[j]; } opcode_macros.keywords[j] = ""; @@ -654,7 +704,13 @@ static void make_keywords_tables(void) for (i=1; i<=11; i++) { keyword_group *kg = keyword_groups[i]; for (j=0; *(kg->keywords[j]) != 0; j++) - { h = hash_code_from_string(kg->keywords[j]); + { + if (tp >= MAX_KEYWORDS) { + /* Gotta increase MAX_KEYWORDS */ + compiler_error("keywords_data_table has overflowed MAX_KEYWORDS"); + break; + } + h = hash_code_from_string(kg->keywords[j]); if (keywords_hash_table[h] == -1) keywords_hash_table[h] = tp; else @@ -668,32 +724,37 @@ static void make_keywords_tables(void) } } +/* Look at the strings stored in local_variable_names (from 0 to no_locals). + Set local_variables.keywords to point to these, and also prepare the + hash tables. */ extern void construct_local_variable_tables(void) -{ int i, h; char *p = local_variable_text_table; +{ int i, h; for (i=0; i= 0) { for (;index= MAX_INCLUSION_DEPTH) - memoryerror("MAX_INCLUSION_DEPTH",MAX_INCLUSION_DEPTH); + CF = NULL; + CurrentLB = NULL; + + ensure_memory_list_available(&FileStack_memlist, i+1); + while (i >= FileStack_max) { + FileStack[FileStack_max++].buffer = my_malloc(SOURCE_BUFFER_SIZE+4, "source file buffer"); + } p = (uchar *) FileStack[i].buffer; @@ -1280,6 +1349,8 @@ static void begin_buffering_file(int i, int file_no) FileStack[i].file_no = file_no; FileStack[i].size = file_load_chars(file_no, (char *) p, SOURCE_BUFFER_SIZE); + /* If the file is shorter than SOURCE_BUFFER_SIZE, it's now closed already. We still need to set up the file entry though. */ + lookahead = source_to_iso_grid[p[0]]; lookahead2 = source_to_iso_grid[p[1]]; lookahead3 = source_to_iso_grid[p[2]]; @@ -1299,6 +1370,8 @@ static void begin_buffering_file(int i, int file_no) FileStack[i].LB.orig_source = NULL; FileStack[i].LB.orig_file = 0; FileStack[i].LB.orig_line = 0; FileStack[i].LB.orig_char = 0; + InputFiles[file_no-1].initial_buffering = FALSE; + CurrentLB = &(FileStack[i].LB); CF = &(FileStack[i]); @@ -1376,7 +1449,7 @@ static int get_next_char_from_pipeline(void) lookahead3 = source_to_iso_grid[p[CF->read_pos++]]; CurrentLB->chars_read++; - if (forerrors_pointer < 511) + if (forerrors_pointer < FORERRORS_SIZE-1) forerrors_buff[forerrors_pointer++] = current; if (current == '\n') reached_new_line(); return(current); @@ -1400,7 +1473,7 @@ static int get_next_char_from_string(void) else lookahead3 = source_to_iso_grid[p[3]]; CurrentLB->chars_read++; - if (forerrors_pointer < 511) + if (forerrors_pointer < FORERRORS_SIZE-1) forerrors_buff[forerrors_pointer++] = current; if (current == '\n') reached_new_line(); return(current); @@ -1418,11 +1491,54 @@ static int get_next_char_from_string(void) /* and move the read position forward */ /* by one */ /* */ +/* release_token_texts() discard all the tokens that have been */ +/* read in, except for put-back ones */ +/* */ /* restart_lexer(source, name) if source is NULL, initialise the lexer */ /* to read from source files; */ /* otherwise, to read from this string. */ /* ------------------------------------------------------------------------- */ +extern void release_token_texts(void) +{ + /* This is called at the beginning of every (top-level) directive and + every statement. It drops all token usage so that the lextexts + array can be reused. + + Call this immediately before a get_next_token() call. + + This should *not* be called within parse_expression(). Expression + code generation relies on token data being retained across the whole + expression. + */ + int ix; + + token_text = NULL; + + if (tokens_put_back == 0) { + cur_lextexts = 0; + return; + } + + /* If any tokens have been put back, we have to preserve their text. + Move their lextext usage to the head of the lextexts array. */ + + for (ix=0; ix= lextexts[lex_index].size) { + size_t newsize = lextexts[lex_index].size * 2; + my_realloc(&lextexts[lex_index].text, lextexts[lex_index].size, newsize, "one lexeme text"); + lextexts[lex_index].size = newsize; + } + lextexts[lex_index].text[lex_pos++] = ch; +} + +/* Remove the last character and ensure it's null-terminated */ +static void lexdelc(void) +{ + if (lex_pos > 0) { + lex_pos--; + } + lextexts[lex_index].text[lex_pos] = 0; +} + +/* Return the last character */ +static char lexlastc(void) +{ + if (lex_pos == 0) { + return 0; + } + return lextexts[lex_index].text[lex_pos-1]; +} + +/* Add a string of characters (including the null) */ +static void lexadds(char *str) +{ + while (*str) { + lexaddc(*str); + str++; + } + lexaddc(0); +} + extern void get_next_token(void) { int d, i, j, k, quoted_size, e, radix, context; int32 n; char *r; int returning_a_put_back_token = TRUE; - + context = lexical_context(); if (tokens_put_back > 0) { i = circle_position - tokens_put_back + 1; if (i<0) i += CIRCLE_SIZE; tokens_put_back--; - if (context != token_contexts[i]) + if (context != circle[i].context) { j = circle[i].type; if ((j==0) || ((j>=100) && (j<200))) - interpret_identifier(i, FALSE); + interpret_identifier(circle[i].text, i, FALSE); + circle[i].context = context; } goto ReturnBack; } @@ -1463,12 +1627,22 @@ extern void get_next_token(void) if (circle_position == CIRCLE_SIZE-1) circle_position = 0; else circle_position++; - if (lex_p > lexeme_memory + 4*MAX_QTEXT_SIZE) - lex_p = lexeme_memory; - - circle[circle_position].text = lex_p; + lex_index = cur_lextexts++; + if (lex_index >= no_lextexts) { + /* fresh lextext block; must init it */ + no_lextexts = lex_index+1; + ensure_memory_list_available(&lextexts_memlist, no_lextexts); + lextexts[lex_index].size = MAX_IDENTIFIER_LENGTH + 1; + lextexts[lex_index].text = my_malloc(lextexts[lex_index].size, "one lexeme text"); + } + lex_pos = 0; + lextexts[lex_index].text[0] = 0; /* start with an empty string */ + + circle[circle_position].lextext = lex_index; + circle[circle_position].text = NULL; /* will fill in later */ circle[circle_position].value = 0; - *lex_p = 0; + circle[circle_position].type = 0; + circle[circle_position].context = context; StartTokenAgain: d = (*get_next_char)(); @@ -1499,8 +1673,7 @@ extern void get_next_token(void) case EOF_CODE: circle[circle_position].type = EOF_TT; - strcpy(lex_p, ""); - lex_p += strlen(lex_p) + 1; + lexadds(""); break; case DIGIT_CODE: @@ -1509,11 +1682,11 @@ extern void get_next_token(void) n=0; do { n = n*radix + character_digit_value[d]; - *lex_p++ = d; + lexaddc(d); } while ((character_digit_value[lookahead] < radix) && (d = (*get_next_char)(), TRUE)); - *lex_p++ = 0; + lexaddc(0); circle[circle_position].type = NUMBER_TT; circle[circle_position].value = n; break; @@ -1522,38 +1695,38 @@ extern void get_next_token(void) { int expo=0; double intv=0, fracv=0; int expocount=0, intcount=0, fraccount=0; int signbit = (d == '-'); - *lex_p++ = d; + lexaddc(d); while (character_digit_value[lookahead] < 10) { intv = 10.0*intv + character_digit_value[lookahead]; intcount++; - *lex_p++ = lookahead; + lexaddc(lookahead); (*get_next_char)(); } if (lookahead == '.') { double fracpow = 1.0; - *lex_p++ = lookahead; + lexaddc(lookahead); (*get_next_char)(); while (character_digit_value[lookahead] < 10) { fracpow *= 0.1; fracv = fracv + fracpow*character_digit_value[lookahead]; fraccount++; - *lex_p++ = lookahead; + lexaddc(lookahead); (*get_next_char)(); } } if (lookahead == 'e' || lookahead == 'E') { int exposign = 0; - *lex_p++ = lookahead; + lexaddc(lookahead); (*get_next_char)(); if (lookahead == '+' || lookahead == '-') { exposign = (lookahead == '-'); - *lex_p++ = lookahead; + lexaddc(lookahead); (*get_next_char)(); } while (character_digit_value[lookahead] < 10) { expo = 10*expo + character_digit_value[lookahead]; expocount++; - *lex_p++ = lookahead; + lexaddc(lookahead); (*get_next_char)(); } if (expocount == 0) @@ -1564,7 +1737,7 @@ extern void get_next_token(void) error("Floating-point literal must have digits"); n = construct_float(signbit, intv, fracv, expo); } - *lex_p++ = 0; + lexaddc(0); circle[circle_position].type = NUMBER_TT; circle[circle_position].value = n; if (!glulx_mode && dont_enter_into_symbol_table != -2) error("Floating-point literals are not available in Z-code"); @@ -1585,15 +1758,15 @@ extern void get_next_token(void) case QUOTE_CODE: /* Single-quotes: scan a literal string */ quoted_size=0; do - { e = d; d = (*get_next_char)(); *lex_p++ = d; + { e = d; d = (*get_next_char)(); lexaddc(d); if (quoted_size++==64) { error( "Too much text for one pair of quotations '...' to hold"); - *lex_p='\''; break; + lexaddc('\''); break; } if ((d == '\'') && (e != '@')) { if (quoted_size == 1) - { d = (*get_next_char)(); *lex_p++ = d; + { d = (*get_next_char)(); lexaddc(d); if (d != '\'') error("No text between quotation marks ''"); } @@ -1601,29 +1774,25 @@ extern void get_next_token(void) } } while (d != EOF); if (d==EOF) ebf_error("'\''", "end of file"); - *(lex_p-1) = 0; + lexdelc(); circle[circle_position].type = SQ_TT; break; case DQUOTE_CODE: /* Double-quotes: scan a literal string */ quoted_size=0; do - { d = (*get_next_char)(); *lex_p++ = d; - if (quoted_size++==MAX_QTEXT_SIZE) - { memoryerror("MAX_QTEXT_SIZE", MAX_QTEXT_SIZE); - break; - } + { d = (*get_next_char)(); lexaddc(d); if (d == '\n') - { lex_p--; - while (*(lex_p-1) == ' ') lex_p--; - if (*(lex_p-1) != '^') *lex_p++ = ' '; + { lex_pos--; + while (lexlastc() == ' ') lex_pos--; + if (lexlastc() != '^') lexaddc(' '); while ((lookahead != EOF) && (tokeniser_grid[lookahead] == WHITESPACE_CODE)) (*get_next_char)(); } else if (d == '\\') { int newline_passed = FALSE; - lex_p--; + lex_pos--; while ((lookahead != EOF) && (tokeniser_grid[lookahead] == WHITESPACE_CODE)) if ((d = (*get_next_char)()) == '\n') @@ -1638,40 +1807,44 @@ extern void get_next_token(void) } } while ((d != EOF) && (d!='\"')); if (d==EOF) ebf_error("'\"'", "end of file"); - *(lex_p-1) = 0; + lexdelc(); circle[circle_position].type = DQ_TT; break; case IDENTIFIER_CODE: /* Letter or underscore: an identifier */ - *lex_p++ = d; n=1; + lexaddc(d); n=1; while ((n<=MAX_IDENTIFIER_LENGTH) && ((tokeniser_grid[lookahead] == IDENTIFIER_CODE) || (tokeniser_grid[lookahead] == DIGIT_CODE))) - n++, *lex_p++ = (*get_next_char)(); + n++, lexaddc((*get_next_char)()); - *lex_p++ = 0; + lexaddc(0); if (n > MAX_IDENTIFIER_LENGTH) { char bad_length[100]; sprintf(bad_length, "Name exceeds the maximum length of %d characters:", MAX_IDENTIFIER_LENGTH); - error_named(bad_length, circle[circle_position].text); + error_named(bad_length, lextexts[lex_index].text); + /* Eat any further extra characters in the identifier */ + while (((tokeniser_grid[lookahead] == IDENTIFIER_CODE) + || (tokeniser_grid[lookahead] == DIGIT_CODE))) + (*get_next_char)(); /* Trim token so that it doesn't violate MAX_IDENTIFIER_LENGTH during error recovery */ - circle[circle_position].text[MAX_IDENTIFIER_LENGTH] = 0; + lextexts[lex_index].text[MAX_IDENTIFIER_LENGTH] = 0; } if (dont_enter_into_symbol_table) { circle[circle_position].type = DQ_TT; circle[circle_position].value = 0; if (dont_enter_into_symbol_table == -2) - interpret_identifier(circle_position, TRUE); + interpret_identifier(lextexts[lex_index].text, circle_position, TRUE); break; } - interpret_identifier(circle_position, FALSE); + interpret_identifier(lextexts[lex_index].text, circle_position, FALSE); break; default: @@ -1681,24 +1854,25 @@ extern void get_next_token(void) for (j=e>>4, k=j+(e&0x0f); j 0) - { if (tokens_trace_level == 1) + { if (tokens_trace_level == 1) { printf("'%s' ", circle[i].text); + if (circle[i].type == EOF_TT) printf("\n"); + } else - { printf("-> "); describe_token(circle[i]); + { printf("-> "); describe_token(&circle[i]); printf(" "); - if (tokens_trace_level > 2) print_context(token_contexts[i]); + if (tokens_trace_level > 2) print_context(circle[i].context); printf("\n"); } } @@ -1783,10 +1964,15 @@ extern void restart_lexer(char *lexical_source, char *name) { circle[i].type = 0; circle[i].value = 0; circle[i].text = "(if this is ever visible, there is a bug)"; - token_contexts[i] = 0; + circle[i].lextext = -1; + circle[i].context = 0; } - lex_p = lexeme_memory; + cur_lextexts = 0; + /* But we don't touch no_lextexts; those allocated blocks can be reused */ + lex_index = -1; + lex_pos = -1; + tokens_put_back = 0; forerrors_pointer = 0; dont_enter_into_symbol_table = FALSE; @@ -1819,6 +2005,17 @@ extern void restart_lexer(char *lexical_source, char *name) extern void init_lexer_vars(void) { + FileStack = NULL; + FileStack_max = 0; + CF = NULL; + CurrentLB = NULL; + + lextexts = NULL; + no_lextexts = 0; + cur_lextexts = 0; + lex_index = -1; + lex_pos = -1; + blank_brief_location.file_index = -1; blank_brief_location.line_number = 0; blank_brief_location.orig_file_index = 0; @@ -1846,15 +2043,16 @@ extern void lexer_endpass(void) } extern void lexer_allocate_arrays(void) -{ int i; - - FileStack = my_malloc(MAX_INCLUSION_DEPTH*sizeof(Sourcefile), - "filestack buffer"); - - for (i=0; i