1 /* ------------------------------------------------------------------------- */
2 /* "syntax" : Syntax analyser and compiler */
4 /* Part of Inform 6.40 */
5 /* copyright (c) Graham Nelson 1993 - 2022 */
7 /* Inform is free software: you can redistribute it and/or modify */
8 /* it under the terms of the GNU General Public License as published by */
9 /* the Free Software Foundation, either version 3 of the License, or */
10 /* (at your option) any later version. */
12 /* Inform is distributed in the hope that it will be useful, */
13 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
14 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
15 /* GNU General Public License for more details. */
17 /* You should have received a copy of the GNU General Public License */
18 /* along with Inform. If not, see https://gnu.org/licenses/ */
20 /* ------------------------------------------------------------------------- */
24 static char *lexical_source;
26 int no_syntax_lines; /* Syntax line count */
28 static void begin_syntax_line(int statement_mode)
30 next_token_begins_syntax_line = TRUE;
32 clear_expression_space();
34 { statements.enabled = TRUE;
35 conditions.enabled = TRUE;
36 local_variables.enabled = TRUE;
37 system_functions.enabled = TRUE;
39 misc_keywords.enabled = FALSE;
40 directive_keywords.enabled = FALSE;
41 directives.enabled = FALSE;
42 segment_markers.enabled = FALSE;
43 opcode_names.enabled = FALSE;
46 { directives.enabled = TRUE;
47 segment_markers.enabled = TRUE;
49 statements.enabled = FALSE;
50 misc_keywords.enabled = FALSE;
51 directive_keywords.enabled = FALSE;
52 local_variables.enabled = FALSE;
53 system_functions.enabled = FALSE;
54 conditions.enabled = FALSE;
55 opcode_names.enabled = FALSE;
58 sequence_point_follows = TRUE;
62 statement_debug_location = get_token_location();
67 extern void panic_mode_error_recovery(void)
69 /* Consume tokens until the next semicolon (or end of file).
70 This is typically called after a syntax error, in hopes of
71 getting parsing back on track. */
73 while ((token_type != EOF_TT)
74 && ((token_type != SEP_TT)||(token_value != SEMICOLON_SEP)))
79 extern void get_next_token_with_directives(void)
81 /* A higher-level version of get_next_token(), which detects and
82 obeys directives such as #ifdef/#ifnot/#endif. (The # sign is
83 required in this case.)
85 This is called while parsing a long construct, such as Class or
86 Object, where we want to support internal #ifdefs. (Although
87 function-parsing predates this and doesn't make use of it.) */
89 int directives_save, segment_markers_save, statements_save;
95 /* If the first token is not a '#', return it directly. */
96 if ((token_type != SEP_TT) || (token_value != HASH_SEP))
99 /* Save the lexer flags, and set up for directive parsing. */
100 directives_save = directives.enabled;
101 segment_markers_save = segment_markers.enabled;
102 statements_save = statements.enabled;
104 directives.enabled = TRUE;
105 segment_markers.enabled = FALSE;
106 statements.enabled = FALSE;
107 conditions.enabled = FALSE;
108 local_variables.enabled = FALSE;
109 misc_keywords.enabled = FALSE;
110 system_functions.enabled = FALSE;
114 if ((token_type == SEP_TT) && (token_value == OPEN_SQUARE_SEP))
115 { error("It is illegal to nest a routine inside an object using '#['");
119 if (token_type == DIRECTIVE_TT)
120 parse_given_directive(TRUE);
122 { ebf_error("directive", token_text);
126 /* Restore all the lexer flags. (We are squashing several of them
127 into a single save variable, which I think is safe because that's
130 directive_keywords.enabled = FALSE;
131 directives.enabled = directives_save;
132 segment_markers.enabled = segment_markers_save;
135 local_variables.enabled =
136 misc_keywords.enabled =
137 system_functions.enabled = statements_save;
141 extern void parse_program(char *source)
143 lexical_source = source;
144 while (parse_directive(FALSE)) ;
147 extern int parse_directive(int internal_flag)
149 /* Internal_flag is FALSE if the directive is encountered normally
150 (at the top level of the program); TRUE if encountered with
151 a # prefix inside a routine or object definition.
153 (Only directives like #ifdef are permitted inside a definition.)
155 Returns: TRUE if program continues, FALSE if end of file reached. */
157 int routine_symbol, rep_symbol;
160 begin_syntax_line(FALSE);
161 if (!internal_flag) {
162 /* An internal directive can occur in the middle of an expression or
163 object definition. So we only release for top-level directives. */
164 release_token_texts();
168 if (token_type == EOF_TT) return(FALSE);
170 if ((token_type == SEP_TT) && (token_value == HASH_SEP))
173 if ((token_type == SEP_TT) && (token_value == OPEN_SQUARE_SEP))
175 { error("It is illegal to nest routines using '#['");
179 directives.enabled = FALSE;
180 directive_keywords.enabled = FALSE;
181 segment_markers.enabled = FALSE;
183 /* The upcoming symbol is a definition; don't count it as a
184 top-level reference *to* the function. */
185 df_dont_note_global_symbols = TRUE;
187 df_dont_note_global_symbols = FALSE;
188 if (token_type != SYMBOL_TT)
189 { ebf_error("routine name", token_text);
192 if ((!(symbols[token_value].flags & UNKNOWN_SFLAG))
193 && (!(symbols[token_value].flags & REPLACE_SFLAG)))
194 { ebf_symbol_error("routine name", token_text, typename(symbols[token_value].type), symbols[token_value].line);
198 routine_symbol = token_value;
200 rep_symbol = routine_symbol;
201 is_renamed = find_symbol_replacement(&rep_symbol);
203 if ((symbols[routine_symbol].flags & REPLACE_SFLAG)
204 && !is_renamed && (is_systemfile()))
205 { /* The function is definitely being replaced (system_file
206 always loses priority in a replacement) but is not
207 being renamed to something else. Skip its definition
209 dont_enter_into_symbol_table = TRUE;
212 } while (!((token_type == EOF_TT)
213 || ((token_type==SEP_TT)
214 && (token_value==CLOSE_SQUARE_SEP))));
215 dont_enter_into_symbol_table = FALSE;
216 if (token_type == EOF_TT) return FALSE;
219 { /* Parse the function definition and assign its symbol. */
220 assign_symbol(routine_symbol,
221 parse_routine(lexical_source, FALSE,
222 symbols[routine_symbol].name, FALSE, routine_symbol),
224 symbols[routine_symbol].line = routine_starts_line;
228 /* This function was subject to a "Replace X Y" directive.
229 The first time we see a definition for symbol X, we
230 copy it to Y -- that's the "original" form of the
232 if (symbols[rep_symbol].value == 0) {
233 assign_symbol(rep_symbol, symbols[routine_symbol].value, ROUTINE_T);
238 if ((token_type != SEP_TT) || (token_value != SEMICOLON_SEP))
239 { ebf_error("';' after ']'", token_text);
245 if ((token_type == SYMBOL_TT) && (symbols[token_value].type == CLASS_T))
247 { error("It is illegal to nest an object in a routine using '#classname'");
250 symbols[token_value].flags |= USED_SFLAG;
251 make_object(FALSE, NULL, -1, -1, symbols[token_value].value);
255 if (token_type != DIRECTIVE_TT)
256 { /* If we're internal, we expect only a directive here. If
257 we're top-level, the possibilities are broader. */
259 ebf_error("directive", token_text);
261 ebf_error("directive, '[' or class name", token_text);
262 panic_mode_error_recovery();
266 return !(parse_given_directive(internal_flag));
269 /* Check what's coming up after a switch case value. */
270 static int switch_sign(void)
272 if ((token_type == SEP_TT)&&(token_value == COLON_SEP)) return 1;
273 if ((token_type == SEP_TT)&&(token_value == COMMA_SEP)) return 2;
274 if ((token_type==MISC_KEYWORD_TT)&&(token_value==TO_MK)) return 3;
278 /* Info for the current switch statement. Both arrays indexed by spec_sp */
279 #define MAX_SPEC_STACK (32)
280 static assembly_operand spec_stack[MAX_SPEC_STACK];
281 static int spec_type[MAX_SPEC_STACK];
283 static void compile_alternatives_z(assembly_operand switch_value, int n,
284 int stack_level, int label, int flag)
287 assemblez_2_branch(je_zc, switch_value,
288 spec_stack[stack_level],
289 label, flag); return;
291 assemblez_3_branch(je_zc, switch_value,
292 spec_stack[stack_level], spec_stack[stack_level+1],
293 label, flag); return;
295 assemblez_4_branch(je_zc, switch_value,
296 spec_stack[stack_level], spec_stack[stack_level+1],
297 spec_stack[stack_level+2],
298 label, flag); return;
302 static void compile_alternatives_g(assembly_operand switch_value, int n,
303 int stack_level, int label, int flag)
305 int the_zc = (flag) ? jeq_gc : jne_gc;
308 assembleg_2_branch(the_zc, switch_value,
309 spec_stack[stack_level],
313 error("*** Cannot generate multi-equality tests in Glulx ***");
317 static void compile_alternatives(assembly_operand switch_value, int n,
318 int stack_level, int label, int flag)
321 compile_alternatives_z(switch_value, n, stack_level, label, flag);
323 compile_alternatives_g(switch_value, n, stack_level, label, flag);
326 static void parse_switch_spec(assembly_operand switch_value, int label,
329 int i, j, label_after = -1, spec_sp = 0;
330 int max_equality_args = ((!glulx_mode) ? 3 : 1);
332 sequence_point_follows = FALSE;
335 { if (spec_sp >= MAX_SPEC_STACK)
336 { error("At most 32 values can be given in a single 'switch' case");
337 panic_mode_error_recovery();
343 if (token_type == SQ_TT || token_type == DQ_TT) {
344 ebf_error("action (or fake action) name", token_text);
347 spec_stack[spec_sp] = action_of_name(token_text);
349 if (spec_stack[spec_sp].value == -1)
350 { spec_stack[spec_sp].value = 0;
351 ebf_error("action (or fake action) name", token_text);
355 spec_stack[spec_sp] =
356 code_generate(parse_expression(CONSTANT_CONTEXT), CONSTANT_CONTEXT, -1);
358 misc_keywords.enabled = TRUE;
360 misc_keywords.enabled = FALSE;
362 spec_type[spec_sp++] = switch_sign();
363 switch(spec_type[spec_sp-1])
366 ebf_error("',' or ':'", token_text);
367 else ebf_error("',', ':' or 'to'", token_text);
368 panic_mode_error_recovery();
370 case 1: goto GenSpecCode;
371 case 3: if (label_after == -1) label_after = next_label++;
377 if ((spec_sp > max_equality_args) && (label_after == -1))
378 label_after = next_label++;
380 if (label_after == -1)
381 { compile_alternatives(switch_value, spec_sp, 0, label, FALSE); return;
384 for (i=0; i<spec_sp;)
386 j=i; while ((j<spec_sp) && (spec_type[j] != 3)) j++;
389 { if (j-i > max_equality_args) j=i+max_equality_args;
392 compile_alternatives(switch_value, j-i, i, label, FALSE);
394 compile_alternatives(switch_value, j-i, i, label_after, TRUE);
401 if (i == spec_sp - 2)
402 { assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
404 assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
408 { assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
410 assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
412 assemble_label_no(next_label++);
416 if (i == spec_sp - 2)
417 { assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
419 assembleg_2_branch(jgt_gc, switch_value, spec_stack[i+1],
423 { assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
425 assembleg_2_branch(jle_gc, switch_value, spec_stack[i+1],
427 assemble_label_no(next_label++);
434 assemble_label_no(label_after);
437 extern int32 parse_routine(char *source, int embedded_flag, char *name,
438 int veneer_flag, int r_symbol)
439 { int32 packed_address; int i; int debug_flag = FALSE;
440 int switch_clause_made = FALSE, default_clause_made = FALSE,
442 debug_location_beginning beginning_debug_location =
443 get_token_location_beginning();
445 /* (switch_label needs no initialisation here, but it prevents some
446 compilers from issuing warnings) */
448 if ((source != lexical_source) || (veneer_flag))
449 { lexical_source = source;
450 restart_lexer(lexical_source, name);
455 for (i=0;i<MAX_LOCAL_VARIABLES-1;i++)
456 local_variable_names[i].text[0] = 0;
459 { statements.enabled = TRUE;
460 dont_enter_into_symbol_table = TRUE;
462 dont_enter_into_symbol_table = FALSE;
463 if ((token_type == SEP_TT) && (token_value == TIMES_SEP)
464 && (no_locals == 0) && (!debug_flag))
465 { debug_flag = TRUE; continue;
468 if (token_type != DQ_TT)
469 { if ((token_type == SEP_TT)
470 && (token_value == SEMICOLON_SEP)) break;
471 ebf_error("local variable name or ';'", token_text);
472 panic_mode_error_recovery();
476 if (strlen(token_text) > MAX_IDENTIFIER_LENGTH)
477 { error_named("Local variable identifier too long:", token_text);
478 panic_mode_error_recovery();
482 if (no_locals == MAX_LOCAL_VARIABLES-1)
483 { error_numbered("Too many local variables for a routine; max is",
484 MAX_LOCAL_VARIABLES-1);
485 panic_mode_error_recovery();
489 for (i=0;i<no_locals;i++) {
490 if (strcmpcis(token_text, local_variable_names[i].text)==0)
491 error_named("Local variable defined twice:", token_text);
493 strcpy(local_variable_names[no_locals++].text, token_text);
496 /* Set up the local variable hash and the local_variables.keywords
498 construct_local_variable_tables();
500 if ((trace_fns_setting==3)
501 || ((trace_fns_setting==2) && (veneer_mode==FALSE))
502 || ((trace_fns_setting==1) && (is_systemfile()==FALSE)))
504 if ((embedded_flag == FALSE) && (veneer_mode == FALSE) && debug_flag)
505 symbols[r_symbol].flags |= STAR_SFLAG;
507 packed_address = assemble_routine_header(no_locals, debug_flag,
508 name, embedded_flag, r_symbol);
511 { begin_syntax_line(TRUE);
512 release_token_texts();
515 if (token_type == EOF_TT)
516 { ebf_error("']'", token_text);
519 get_token_location_end(beginning_debug_location));
524 if ((token_type == SEP_TT)
525 && (token_value == CLOSE_SQUARE_SEP))
526 { if (switch_clause_made && (!default_clause_made))
527 assemble_label_no(switch_label);
528 directives.enabled = TRUE;
529 sequence_point_follows = TRUE;
533 get_token_location_end(beginning_debug_location));
538 if ((token_type == STATEMENT_TT) && (token_value == SDEFAULT_CODE))
539 { if (default_clause_made)
540 error("Multiple 'default' clauses defined in same 'switch'");
541 default_clause_made = TRUE;
543 if (switch_clause_made)
544 { if (!execution_never_reaches_here)
545 { sequence_point_follows = FALSE;
547 assemblez_0((embedded_flag)?rfalse_zc:rtrue_zc);
549 assembleg_1(return_gc,
550 ((embedded_flag)?zero_operand:one_operand));
552 assemble_label_no(switch_label);
554 switch_clause_made = TRUE;
557 if ((token_type == SEP_TT) &&
558 (token_value == COLON_SEP)) continue;
559 ebf_error("':' after 'default'", token_text);
560 panic_mode_error_recovery();
564 /* Only check for the form of a case switch if the initial token
565 isn't double-quoted text, as that would mean it was a print_ret
566 statement: this is a mild ambiguity in the grammar.
567 Action statements also cannot be cases. */
569 if ((token_type != DQ_TT) && (token_type != SEP_TT))
571 if (switch_sign() > 0)
572 { assembly_operand AO;
573 if (default_clause_made)
574 error("'default' must be the last 'switch' case");
576 if (switch_clause_made)
577 { if (!execution_never_reaches_here)
578 { sequence_point_follows = FALSE;
580 assemblez_0((embedded_flag)?rfalse_zc:rtrue_zc);
582 assembleg_1(return_gc,
583 ((embedded_flag)?zero_operand:one_operand));
585 assemble_label_no(switch_label);
588 switch_label = next_label++;
589 switch_clause_made = TRUE;
590 put_token_back(); put_token_back();
593 INITAOTV(&AO, VARIABLE_OT, 249);
596 INITAOTV(&AO, GLOBALVAR_OT, MAX_LOCAL_VARIABLES+6); /* sw__var */
598 parse_switch_spec(AO, switch_label, TRUE);
603 { put_token_back(); put_token_back(); get_next_token();
604 sequence_point_follows = TRUE;
608 parse_statement(-1, -1);
612 return packed_address;
615 /* Parse one block of code (a statement or brace-delimited stanza).
616 This is used by the IF, DO, FOR, OBJECTLOOP, SWITCH, and WHILE
618 (Note that this is *not* called by the top-level parse_routine()
620 The break_label and continue_label arguments are the labels in
621 the calling block to jump to on "break" or "continue". -1 means
622 we can't "break"/"continue" here (because we're not in a loop/switch).
623 If switch_rule is true, we're in a switch block; case labels are
626 extern void parse_code_block(int break_label, int continue_label,
628 { int switch_clause_made = FALSE, default_clause_made = FALSE, switch_label = 0;
629 int unary_minus_flag, saved_entire_flag;
631 saved_entire_flag = (execution_never_reaches_here & EXECSTATE_ENTIRE);
632 if (execution_never_reaches_here)
633 execution_never_reaches_here |= EXECSTATE_ENTIRE;
635 begin_syntax_line(TRUE);
636 release_token_texts();
639 if (token_type == SEP_TT && token_value == OPEN_BRACE_SEP)
641 /* Parse a braced stanza of statements. */
643 { begin_syntax_line(TRUE);
644 release_token_texts();
647 if ((token_type == SEP_TT) && (token_value == HASH_SEP))
648 { parse_directive(TRUE);
651 if (token_type == SEP_TT && token_value == CLOSE_BRACE_SEP)
652 { if (switch_clause_made && (!default_clause_made))
653 assemble_label_no(switch_label);
656 if (token_type == EOF_TT)
657 { ebf_error("'}'", token_text);
661 if (switch_rule != 0)
663 /* Within a 'switch' block */
665 if ((token_type==STATEMENT_TT)&&(token_value==SDEFAULT_CODE))
666 { if (default_clause_made)
667 error("Multiple 'default' clauses defined in same 'switch'");
668 default_clause_made = TRUE;
670 if (switch_clause_made)
671 { if (!execution_never_reaches_here)
672 { sequence_point_follows = FALSE;
673 assemble_jump(break_label);
675 assemble_label_no(switch_label);
677 switch_clause_made = TRUE;
680 if ((token_type == SEP_TT) &&
681 (token_value == COLON_SEP)) continue;
682 ebf_error("':' after 'default'", token_text);
683 panic_mode_error_recovery();
687 /* Decide: is this an ordinary statement, or the start
690 if (token_type == DQ_TT) goto NotASwitchCase;
693 = ((token_type == SEP_TT)&&(token_value == MINUS_SEP));
694 if (unary_minus_flag) get_next_token();
696 /* Now read the token _after_ any possible constant:
697 if that's a 'to', ',' or ':' then we have a case */
699 misc_keywords.enabled = TRUE;
701 misc_keywords.enabled = FALSE;
703 if (switch_sign() > 0)
704 { assembly_operand AO;
706 if (default_clause_made)
707 error("'default' must be the last 'switch' case");
709 if (switch_clause_made)
710 { if (!execution_never_reaches_here)
711 { sequence_point_follows = FALSE;
712 assemble_jump(break_label);
714 assemble_label_no(switch_label);
717 switch_label = next_label++;
718 switch_clause_made = TRUE;
719 put_token_back(); put_token_back();
720 if (unary_minus_flag) put_token_back();
723 parse_switch_spec(AO, switch_label, FALSE);
727 { put_token_back(); put_token_back();
728 if (unary_minus_flag) put_token_back();
733 if ((switch_rule != 0) && (!switch_clause_made))
734 ebf_error("switch value", token_text);
737 sequence_point_follows = TRUE;
738 parse_statement(break_label, continue_label);
743 if (switch_rule != 0)
744 ebf_error("braced code block after 'switch'", token_text);
746 /* Parse a single statement. */
747 parse_statement(break_label, continue_label);
750 if (saved_entire_flag)
751 execution_never_reaches_here |= EXECSTATE_ENTIRE;
753 execution_never_reaches_here &= ~EXECSTATE_ENTIRE;
756 /* ========================================================================= */
757 /* Data structure management routines */
758 /* ------------------------------------------------------------------------- */
760 extern void init_syntax_vars(void)
764 extern void syntax_begin_pass(void)
765 { no_syntax_lines = 0;
768 extern void syntax_allocate_arrays(void)
772 extern void syntax_free_arrays(void)
776 /* ========================================================================= */