X-Git-Url: https://jxself.org/git/?a=blobdiff_plain;f=src%2Flexer.c;h=1843b0aee63e2cbcc07a4a144c757085e7ad4356;hb=56a5292888e1d46fe3033cd1d5c636051692453f;hp=58841268af2ebfd495b5d6f86ea75573a5041081;hpb=8e63120c630c94c598d4e2d6ba823dac59bce8fa;p=inform.git diff --git a/src/lexer.c b/src/lexer.c index 5884126..1843b0a 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,8 +1,8 @@ /* ------------------------------------------------------------------------- */ /* "lexer" : Lexical analyser */ /* */ -/* Part of Inform 6.40 */ -/* copyright (c) Graham Nelson 1993 - 2022 */ +/* Part of Inform 6.42 */ +/* copyright (c) Graham Nelson 1993 - 2024 */ /* */ /* Inform is free software: you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ @@ -29,10 +29,9 @@ int total_source_line_count, /* Number of source lines so far */ (generally as a result of an error message or the start of pass) */ dont_enter_into_symbol_table, /* Return names as text (with - token type DQ_TT, i.e., as if - they had double-quotes around) - and not as entries in the symbol - table, when TRUE. If -2, only the + token type UQ_TT) and not as + entries in the symbol table, + when TRUE. If -2, only the keyword table is searched. */ return_sp_as_variable; /* When TRUE, the word "sp" denotes the stack pointer variable @@ -269,8 +268,7 @@ static lexeme_data circle[CIRCLE_SIZE]; typedef struct lextext_s { char *text; - size_t size; /* Allocated size (including terminal null) - This is always at least MAX_IDENTIFIER_LENGTH+1 */ + size_t size; /* Allocated size (including terminal null) */ } lextext; static lextext *lextexts; /* Allocated to no_lextexts */ @@ -286,12 +284,19 @@ static int lex_pos; /* Current write position in that lextext */ /* ------------------------------------------------------------------------- */ /* The lexer itself needs up to 3 characters of lookahead (it uses an */ /* LR(3) grammar to translate characters into tokens). */ +/* */ +/* Past the end of the stream, we fill in zeros. This has the awkward */ +/* side effect that a zero byte in a source file will silently terminate */ +/* it, rather than producing an "illegal source character" error. */ +/* On the up side, we can compile veneer routines (which are null- */ +/* terminated strings) with no extra work. */ /* ------------------------------------------------------------------------- */ #define LOOKAHEAD_SIZE 3 static int current, lookahead, /* The latest character read, and */ lookahead2, lookahead3; /* the three characters following it */ + /* (zero means end-of-stream) */ static int pipeline_made; /* Whether or not the pipeline of characters has been constructed @@ -337,6 +342,8 @@ extern void describe_token_triple(const char *text, int32 value, int type) break; case SQ_TT: printf("string '%s'", text); break; + case UQ_TT: printf("barestring %s", text); + break; case SEP_TT: printf("separator '%s'", text); break; case EOF_TT: printf("end of file"); @@ -409,7 +416,7 @@ extern void describe_token_triple(const char *text, int32 value, int type) /* This must exceed the total number of keywords across all groups, including opcodes. */ -#define MAX_KEYWORDS (350) +#define MAX_KEYWORDS (500) /* The values will be filled in at compile time, when we know which opcode set to use. */ @@ -440,6 +447,7 @@ static char *opcode_list_z[] = { "get_wind_prop", "scroll_window", "pop_stack", "read_mouse", "mouse_window", "push_stack", "put_wind_prop", "print_form", "make_menu", "picture_table", "print_unicode", "check_unicode", + "set_true_colour", "buffer_screen", "" }; @@ -463,12 +471,17 @@ static char *opcode_list_g[] = { "streamunichar", "mzero", "mcopy", "malloc", "mfree", "accelfunc", "accelparam", + "hasundo", "discardundo", "numtof", "ftonumz", "ftonumn", "ceil", "floor", "fadd", "fsub", "fmul", "fdiv", "fmod", "sqrt", "exp", "log", "pow", "sin", "cos", "tan", "asin", "acos", "atan", "atan2", "jfeq", "jfne", "jflt", "jfle", "jfgt", "jfge", "jisnan", "jisinf", - "hasundo", "discardundo", + "numtod", "dtonumz", "dtonumn", "ftod", "dtof", "dceil", "dfloor", + "dadd", "dsub", "dmul", "ddiv", "dmodr", "dmodq", + "dsqrt", "dexp", "dlog", "dpow", + "dsin", "dcos", "dtan", "dasin", "dacos", "datan", "datan2", + "jdeq", "jdne", "jdlt", "jdle", "jdgt", "jdge", "jdisnan", "jdisinf", "" }; @@ -480,7 +493,7 @@ keyword_group opcode_macros = static char *opmacro_list_z[] = { "" }; static char *opmacro_list_g[] = { - "pull", "push", + "pull", "push", "dload", "dstore", "" }; @@ -600,11 +613,8 @@ static int lexical_context(void) always translate to the same output tokens whenever the context is the same. - In fact, for efficiency reasons this number omits the bit of - information held in the variable "dont_enter_into_symbol_table". - Inform never needs to backtrack through tokens parsed in that - way (thankfully, as it would be expensive indeed to check - the tokens). */ + (For many years, the "dont_enter_into_symbol_table" variable + was omitted from this number. But now we can include it.) */ int c = 0; if (opcode_names.enabled) c |= 1; @@ -620,11 +630,17 @@ static int lexical_context(void) if (local_variables.enabled) c |= 1024; if (return_sp_as_variable) c |= 2048; + if (dont_enter_into_symbol_table) c |= 4096; + return(c); } static void print_context(int c) { + if (c < 0) { + printf("??? "); + return; + } if ((c & 1) != 0) printf("OPC "); if ((c & 2) != 0) printf("DIR "); if ((c & 4) != 0) printf("TK "); @@ -637,6 +653,7 @@ static void print_context(int c) if ((c & 512) != 0) printf("SCON "); if ((c & 1024) != 0) printf("LV "); if ((c & 2048) != 0) printf("sp "); + if ((c & 4096) != 0) printf("dontent "); } static int *keywords_hash_table; @@ -652,14 +669,22 @@ static int *local_variable_hash_codes; 119 for Glulx. */ +/* The number of local variables in the current routine. */ +int no_locals; + /* Names of local variables in the current routine. + The values are positions in local_variable_names_memlist. This is allocated to MAX_LOCAL_VARIABLES-1. (The stack pointer "local" is not included in this array.) (This could be a memlist, growing as needed up to MAX_LOCAL_VARIABLES-1. But right now we just allocate the max.) */ -identstruct *local_variable_names; +int *local_variable_name_offsets; + +static memory_list local_variable_names_memlist; +/* How much of local_variable_names_memlist is used by the no_local locals. */ +static int local_variable_names_usage; static char one_letter_locals[128]; @@ -724,9 +749,42 @@ static void make_keywords_tables(void) } } +extern void clear_local_variables(void) +{ + no_locals = 0; + local_variable_names_usage = 0; +} + +extern void add_local_variable(char *name) +{ + int len; + + if (no_locals >= MAX_LOCAL_VARIABLES-1) { + /* This should have been caught before we got here */ + error("too many local variables"); + return; + } + + len = strlen(name)+1; + ensure_memory_list_available(&local_variable_names_memlist, local_variable_names_usage + len); + local_variable_name_offsets[no_locals++] = local_variable_names_usage; + strcpy((char *)local_variable_names_memlist.data+local_variable_names_usage, name); + local_variable_names_usage += len; +} + +extern char *get_local_variable_name(int index) +{ + if (index < 0 || index >= no_locals) + return "???"; /* shouldn't happen */ + + return (char *)local_variable_names_memlist.data + local_variable_name_offsets[index]; +} + /* Look at the strings stored in local_variable_names (from 0 to no_locals). Set local_variables.keywords to point to these, and also prepare the - hash tables. */ + hash tables. + This must be called after add_local_variable(), but before we start + compiling function code. */ extern void construct_local_variable_tables(void) { int i, h; for (i=0; i= 0) + { int *i = keywords_data_table + 3*index; + keyword_group *kg = keyword_groups[*i]; + if (kg == &directives) + { char *q = kg->keywords[*(i+1)]; + if (((kg->case_sensitive) && (strcmp(p, q)==0)) + || ((!(kg->case_sensitive)) && (strcmpcis(p, q)==0))) + { circle[pos].type = kg->change_token_type; + circle[pos].value = *(i+1); + return; + } + } + index = *(i+2); + } + } + + circle[pos].type = UQ_TT; + circle[pos].value = 0; + return; + } + /* If this is assembly language, perhaps it is "sp"? */ if (return_sp_as_variable && (p[0]=='s') && (p[1]=='p') && (p[2]==0)) @@ -785,7 +876,9 @@ static void interpret_identifier(char *p, int pos, int dirs_only_flag) if (index >= 0) { for (;index= 0) { int *i = keywords_data_table + 3*index; keyword_group *kg = keyword_groups[*i]; - if (((!dirs_only_flag) && (kg->enabled)) - || (dirs_only_flag && (kg == &directives))) + if (kg->enabled) { char *q = kg->keywords[*(i+1)]; if (((kg->case_sensitive) && (strcmp(p, q)==0)) || ((!(kg->case_sensitive)) && (strcmpcis(p, q)==0))) @@ -819,11 +910,9 @@ static void interpret_identifier(char *p, int pos, int dirs_only_flag) index = *(i+2); } - if (dirs_only_flag) return; - /* Search for the name; create it if necessary. */ - circle[pos].value = symbol_index(p, hashcode); + circle[pos].value = symbol_index(p, hashcode, &circle[pos].newsymbol); circle[pos].type = SYMBOL_TT; } @@ -896,6 +985,7 @@ static void make_tokeniser_grid(void) tokeniser_grid[0] = EOF_CODE; tokeniser_grid[' '] = WHITESPACE_CODE; tokeniser_grid['\n'] = WHITESPACE_CODE; + tokeniser_grid['\r'] = WHITESPACE_CODE; tokeniser_grid['$'] = RADIX_CODE; tokeniser_grid['!'] = COMMENT_CODE; @@ -1212,9 +1302,10 @@ static double pow10_cheap(int expo) * lexer should never do that). * * Note that using a float constant does *not* set the uses_float_features - * flag (which would cause the game file to be labelled 3.1.2). There's - * no VM feature here, just an integer. Of course, any use of the float - * *opcodes* will set the flag. + * flag (which would cause the game file to be labelled 3.1.2). Same with + * double constants and the uses_double_features flag. There's no VM + * feature here, just an integer. Of course, any use of the float *opcodes* + * will set the flag. * * The math functions in this routine require #including , but * they should not require linking the math library (-lm). At least, @@ -1272,9 +1363,93 @@ static int32 construct_float(int signbit, double intv, double fracv, int expo) } } + /* At this point, expo is less than 2^8; fbits is less than 2^23; neither is negative. */ return (sign) | ((int32)(expo << 23)) | (fbits); } +/* Same as the above, but we return *half* of a 64-bit double, depending on whether wanthigh is true (high half) or false (low half). + */ +static int32 construct_double(int wanthigh, int signbit, double intv, double fracv, int expo) +{ + double absval = (intv + fracv) * pow10_cheap(expo); + int32 sign = (signbit ? 0x80000000 : 0x0); + double mant; + uint32 fhi, flo; + + if (isinf(absval)) { + goto Infinity; + } + if (isnan(absval)) { + goto NotANumber; + } + + mant = frexp(absval, &expo); + + /* Normalize mantissa to be in the range [1.0, 2.0) */ + if (0.5 <= mant && mant < 1.0) { + mant *= 2.0; + expo--; + } + else if (mant == 0.0) { + expo = 0; + } + else { + goto Infinity; + } + + if (expo >= 1024) { + goto Infinity; + } + else if (expo < -1022) { + /* Denormalized (very small) number */ + mant = ldexp(mant, 1022 + expo); + expo = 0; + } + else if (!(expo == 0 && mant == 0.0)) { + expo += 1023; + mant -= 1.0; /* Get rid of leading 1 */ + } + + /* fhi receives the high 28 bits; flo the low 24 bits (total 52 bits) */ + mant *= 268435456.0; /* 2^28 */ + fhi = (uint32)mant; /* Truncate */ + mant -= (double)fhi; + mant *= 16777216.0; /* 2^24 */ + flo = (uint32)(mant+0.5); /* Round */ + + if (flo >> 24) { + /* The carry propagated out of a string of 24 1 bits. */ + flo = 0; + fhi++; + if (fhi >> 28) { + /* And it also propagated out of the next 28 bits. */ + fhi = 0; + expo++; + if (expo >= 2047) { + goto Infinity; + } + } + } + + /* At this point, expo is less than 2^11; fhi is less than 2^28; flo is less than 2^24; none are negative. */ + if (wanthigh) + return (sign) | ((int32)(expo << 20)) | ((int32)(fhi >> 8)); + else + return (int32)((fhi & 0xFF) << 24) | (int32)(flo); + + Infinity: + if (wanthigh) + return sign | 0x7FF00000; + else + return 0x00000000; + + NotANumber: + if (wanthigh) + return sign | 0x7FF80000; + else + return 0x00000001; +} + /* ------------------------------------------------------------------------- */ /* Characters are read via a "pipeline" of variables, allowing us to look */ /* up to three characters ahead of the current position. */ @@ -1288,7 +1463,7 @@ static int32 construct_float(int signbit, double intv, double fracv, int expo) /* */ /* Note that file_load_chars(p, size) loads "size" bytes into buffer "p" */ /* from the current input file. If the file runs out, then if it was */ -/* the last source file 4 EOF characters are placed in the buffer: if it */ +/* the last source file 4 null characters are placed in the buffer: if it */ /* was only an Include file ending, then a '\n' character is placed there */ /* (essentially to force termination of any comment line) followed by */ /* three harmless spaces. */ @@ -1451,12 +1626,33 @@ static int get_next_char_from_pipeline(void) CurrentLB->chars_read++; if (forerrors_pointer < FORERRORS_SIZE-1) forerrors_buff[forerrors_pointer++] = current; - if (current == '\n') reached_new_line(); + + /* The file is open in binary mode, so we have to do our own newline + conversion. (We want to do it consistently across all platforms.) + + The strategy is to convert all \r (CR) characters to \n (LF), but + *don't* advance the line counter for \r if it's followed by \n. + The rest of the lexer treats multiple \n characters the same as + one, so the simple conversion will work out okay. + + (Note that, for historical reasons, a ctrl-L (formfeed) is also + treated as \r. This conversion has already been handled by + source_to_iso_grid[].) + */ + if (current == '\n') { + reached_new_line(); + } + else if (current == '\r') { + current = '\n'; + if (lookahead != '\n') + reached_new_line(); + } + return(current); } /* ------------------------------------------------------------------------- */ -/* Source 2: from a string */ +/* Source 2: from a (null-terminated) string */ /* ------------------------------------------------------------------------- */ static int source_to_analyse_pointer; /* Current read position */ @@ -1475,7 +1671,12 @@ static int get_next_char_from_string(void) CurrentLB->chars_read++; if (forerrors_pointer < FORERRORS_SIZE-1) forerrors_buff[forerrors_pointer++] = current; + + /* We shouldn't have \r when compiling from string (veneer function). + If we do, just shove it under the carpet. */ + if (current == '\r') current = '\n'; if (current == '\n') reached_new_line(); + return(current); } @@ -1496,7 +1697,8 @@ static int get_next_char_from_string(void) /* */ /* restart_lexer(source, name) if source is NULL, initialise the lexer */ /* to read from source files; */ -/* otherwise, to read from this string. */ +/* otherwise, to read from this null- */ +/* terminated string. */ /* ------------------------------------------------------------------------- */ extern void release_token_texts(void) @@ -1542,11 +1744,28 @@ extern void release_token_texts(void) extern void put_token_back(void) { tokens_put_back++; + int pos = circle_position - tokens_put_back + 1; + if (pos<0) pos += CIRCLE_SIZE; + if (tokens_trace_level > 0) - { if (tokens_trace_level == 1) printf("<- "); - else printf("<-\n"); + { + printf("<- "); + if (tokens_trace_level > 1) { + describe_token(&circle[pos]); + printf("\n"); + } } + if (circle[pos].type == SYMBOL_TT && circle[pos].newsymbol) { + /* Remove the symbol from the symbol table. (Or mark it as unreachable + anyhow.) */ + end_symbol_scope(circle[pos].value, TRUE); + /* Remove new-symbol flag, and force reinterpretation next time + we see the symbol. */ + circle[pos].newsymbol = FALSE; + circle[pos].context = -1; + } + /* The following error, of course, should never happen! */ if (tokens_put_back == CIRCLE_SIZE) @@ -1605,7 +1824,10 @@ static void lexadds(char *str) } extern void get_next_token(void) -{ int d, i, j, k, quoted_size, e, radix, context; int32 n; char *r; +{ int d, i, j, k, quoted_size, e, radix, context; + uint32 n; + char *r; + int floatend; int returning_a_put_back_token = TRUE; context = lexical_context(); @@ -1617,7 +1839,7 @@ extern void get_next_token(void) if (context != circle[i].context) { j = circle[i].type; if ((j==0) || ((j>=100) && (j<200))) - interpret_identifier(circle[i].text, i, FALSE); + interpret_identifier(circle[i].text, i); circle[i].context = context; } goto ReturnBack; @@ -1632,7 +1854,7 @@ extern void get_next_token(void) /* fresh lextext block; must init it */ no_lextexts = lex_index+1; ensure_memory_list_available(&lextexts_memlist, no_lextexts); - lextexts[lex_index].size = MAX_IDENTIFIER_LENGTH + 1; + lextexts[lex_index].size = 64; /* this can grow */ lextexts[lex_index].text = my_malloc(lextexts[lex_index].size, "one lexeme text"); } lex_pos = 0; @@ -1642,6 +1864,7 @@ extern void get_next_token(void) circle[circle_position].text = NULL; /* will fill in later */ circle[circle_position].value = 0; circle[circle_position].type = 0; + circle[circle_position].newsymbol = FALSE; circle[circle_position].context = context; StartTokenAgain: @@ -1667,7 +1890,7 @@ extern void get_next_token(void) goto StartTokenAgain; case COMMENT_CODE: - while ((lookahead != '\n') && (lookahead != 0)) + while ((lookahead != '\n') && (lookahead != '\r') && (lookahead != 0)) (*get_next_char)(); goto StartTokenAgain; @@ -1688,10 +1911,13 @@ extern void get_next_token(void) lexaddc(0); circle[circle_position].type = NUMBER_TT; - circle[circle_position].value = n; + circle[circle_position].value = (int32)n; break; FloatNumber: + /* When we reach here, d is the sign bit ('+' or '-'). + If we're constructing a 32-bit float, floatend is 0; + for a 64-bit double, floatend is '>' for high, '<' for low. */ { int expo=0; double intv=0, fracv=0; int expocount=0, intcount=0, fraccount=0; int signbit = (d == '-'); @@ -1735,7 +1961,12 @@ extern void get_next_token(void) } if (intcount + fraccount == 0) error("Floating-point literal must have digits"); - n = construct_float(signbit, intv, fracv, expo); + if (floatend == '>') + n = construct_double(TRUE, signbit, intv, fracv, expo); + else if (floatend == '<') + n = construct_double(FALSE, signbit, intv, fracv, expo); + else + n = construct_float(signbit, intv, fracv, expo); } lexaddc(0); circle[circle_position].type = NUMBER_TT; @@ -1745,7 +1976,18 @@ extern void get_next_token(void) case RADIX_CODE: radix = 16; d = (*get_next_char)(); - if (d == '-' || d == '+') { goto FloatNumber; } + if (d == '-' || d == '+') { + floatend = 0; + goto FloatNumber; + } + if (d == '<' || d == '>') { + floatend = d; + d = (*get_next_char)(); + if (d == '-' || d == '+') { + goto FloatNumber; + } + error("Signed number expected after '$<' or '$>'"); + } if (d == '$') { d = (*get_next_char)(); radix = 2; } if (character_digit_value[d] >= radix) { if (radix == 2) @@ -1759,11 +2001,7 @@ extern void get_next_token(void) quoted_size=0; do { e = d; d = (*get_next_char)(); lexaddc(d); - if (quoted_size++==64) - { error( - "Too much text for one pair of quotations '...' to hold"); - lexaddc('\''); break; - } + quoted_size++; if ((d == '\'') && (e != '@')) { if (quoted_size == 1) { d = (*get_next_char)(); lexaddc(d); @@ -1772,28 +2010,27 @@ extern void get_next_token(void) } break; } - } while (d != EOF); - if (d==EOF) ebf_error("'\''", "end of file"); + } while (d != 0); + if (d==0) ebf_error("'\''", "end of file"); lexdelc(); circle[circle_position].type = SQ_TT; break; case DQUOTE_CODE: /* Double-quotes: scan a literal string */ - quoted_size=0; do { d = (*get_next_char)(); lexaddc(d); if (d == '\n') { lex_pos--; while (lexlastc() == ' ') lex_pos--; if (lexlastc() != '^') lexaddc(' '); - while ((lookahead != EOF) && + while ((lookahead != 0) && (tokeniser_grid[lookahead] == WHITESPACE_CODE)) (*get_next_char)(); } else if (d == '\\') { int newline_passed = FALSE; lex_pos--; - while ((lookahead != EOF) && + while ((lookahead != 0) && (tokeniser_grid[lookahead] == WHITESPACE_CODE)) if ((d = (*get_next_char)()) == '\n') newline_passed = TRUE; @@ -1805,8 +2042,8 @@ extern void get_next_token(void) chb); } } - } while ((d != EOF) && (d!='\"')); - if (d==EOF) ebf_error("'\"'", "end of file"); + } while ((d != 0) && (d!='\"')); + if (d==0) ebf_error("'\"'", "end of file"); lexdelc(); circle[circle_position].type = DQ_TT; break; @@ -1814,37 +2051,13 @@ extern void get_next_token(void) case IDENTIFIER_CODE: /* Letter or underscore: an identifier */ lexaddc(d); n=1; - while ((n<=MAX_IDENTIFIER_LENGTH) - && ((tokeniser_grid[lookahead] == IDENTIFIER_CODE) + while (((tokeniser_grid[lookahead] == IDENTIFIER_CODE) || (tokeniser_grid[lookahead] == DIGIT_CODE))) n++, lexaddc((*get_next_char)()); lexaddc(0); - if (n > MAX_IDENTIFIER_LENGTH) - { char bad_length[100]; - sprintf(bad_length, - "Name exceeds the maximum length of %d characters:", - MAX_IDENTIFIER_LENGTH); - error_named(bad_length, lextexts[lex_index].text); - /* Eat any further extra characters in the identifier */ - while (((tokeniser_grid[lookahead] == IDENTIFIER_CODE) - || (tokeniser_grid[lookahead] == DIGIT_CODE))) - (*get_next_char)(); - /* Trim token so that it doesn't violate - MAX_IDENTIFIER_LENGTH during error recovery */ - lextexts[lex_index].text[MAX_IDENTIFIER_LENGTH] = 0; - } - - if (dont_enter_into_symbol_table) - { circle[circle_position].type = DQ_TT; - circle[circle_position].value = 0; - if (dont_enter_into_symbol_table == -2) - interpret_identifier(lextexts[lex_index].text, circle_position, TRUE); - break; - } - - interpret_identifier(lextexts[lex_index].text, circle_position, FALSE); + interpret_identifier(lextexts[lex_index].text, circle_position); break; default: @@ -1949,7 +2162,10 @@ extern void get_next_token(void) else { printf("-> "); describe_token(&circle[i]); printf(" "); - if (tokens_trace_level > 2) print_context(circle[i].context); + if (tokens_trace_level > 2) { + if (circle[i].newsymbol) printf("newsym "); + print_context(circle[i].context); + } printf("\n"); } } @@ -1963,6 +2179,7 @@ extern void restart_lexer(char *lexical_source, char *name) for (i=0; i