1 /* ------------------------------------------------------------------------- */
2 /* "syntax" : Syntax analyser and compiler */
4 /* Part of Inform 6.42 */
5 /* copyright (c) Graham Nelson 1993 - 2024 */
7 /* Inform is free software: you can redistribute it and/or modify */
8 /* it under the terms of the GNU General Public License as published by */
9 /* the Free Software Foundation, either version 3 of the License, or */
10 /* (at your option) any later version. */
12 /* Inform is distributed in the hope that it will be useful, */
13 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
14 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
15 /* GNU General Public License for more details. */
17 /* You should have received a copy of the GNU General Public License */
18 /* along with Inform. If not, see https://gnu.org/licenses/ */
20 /* ------------------------------------------------------------------------- */
24 static char *lexical_source;
26 int no_syntax_lines; /* Syntax line count */
28 static void begin_syntax_line(int statement_mode)
30 next_token_begins_syntax_line = TRUE;
32 clear_expression_space();
34 { statements.enabled = TRUE;
35 conditions.enabled = TRUE;
36 local_variables.enabled = TRUE;
37 system_functions.enabled = TRUE;
39 misc_keywords.enabled = FALSE;
40 directive_keywords.enabled = FALSE;
41 directives.enabled = FALSE;
42 segment_markers.enabled = FALSE;
43 opcode_names.enabled = FALSE;
46 { directives.enabled = TRUE;
47 segment_markers.enabled = TRUE;
49 statements.enabled = FALSE;
50 misc_keywords.enabled = FALSE;
51 directive_keywords.enabled = FALSE;
52 local_variables.enabled = FALSE;
53 system_functions.enabled = FALSE;
54 conditions.enabled = FALSE;
55 opcode_names.enabled = FALSE;
58 sequence_point_follows = TRUE;
62 statement_debug_location = get_token_location();
67 extern void panic_mode_error_recovery(void)
69 /* Consume tokens until the next semicolon (or end of file).
70 This is typically called after a syntax error, in hopes of
71 getting parsing back on track. */
73 while ((token_type != EOF_TT)
74 && ((token_type != SEP_TT)||(token_value != SEMICOLON_SEP)))
79 extern void get_next_token_with_directives(void)
81 /* A higher-level version of get_next_token(), which detects and
82 obeys directives such as #ifdef/#ifnot/#endif. (The # sign is
83 required in this case.)
85 This is called while parsing a long construct, such as Class or
86 Object, where we want to support internal #ifdefs. (Although
87 function-parsing predates this and doesn't make use of it.) */
91 int directives_save, segment_markers_save, statements_save,
92 conditions_save, local_variables_save, misc_keywords_save,
93 system_functions_save;
97 /* If the first token is not a '#', return it directly. */
98 if ((token_type != SEP_TT) || (token_value != HASH_SEP))
101 /* Save the lexer flags, and set up for directive parsing. */
102 directives_save = directives.enabled;
103 segment_markers_save = segment_markers.enabled;
104 statements_save = statements.enabled;
105 conditions_save = conditions.enabled;
106 local_variables_save = local_variables.enabled;
107 misc_keywords_save = misc_keywords.enabled;
108 system_functions_save = system_functions.enabled;
110 directives.enabled = TRUE;
111 segment_markers.enabled = FALSE;
112 statements.enabled = FALSE;
113 conditions.enabled = FALSE;
114 local_variables.enabled = FALSE;
115 misc_keywords.enabled = FALSE;
116 system_functions.enabled = FALSE;
120 if ((token_type == SEP_TT) && (token_value == OPEN_SQUARE_SEP))
121 { error("It is illegal to nest a routine inside an object using '#['");
125 if (token_type == DIRECTIVE_TT)
126 parse_given_directive(TRUE);
128 { ebf_curtoken_error("directive");
132 /* Restore all the lexer flags. */
133 directive_keywords.enabled = FALSE;
134 directives.enabled = directives_save;
135 segment_markers.enabled = segment_markers_save;
136 statements.enabled = statements_save;
137 conditions.enabled = conditions_save;
138 local_variables.enabled = local_variables_save;
139 misc_keywords.enabled = misc_keywords_save;
140 system_functions.enabled = system_functions_save;
144 extern void parse_program(char *source)
146 lexical_source = source;
147 while (parse_directive(FALSE)) ;
150 extern int parse_directive(int internal_flag)
152 /* Internal_flag is FALSE if the directive is encountered normally
153 (at the top level of the program); TRUE if encountered with
154 a # prefix inside a routine or object definition.
156 (Only directives like #ifdef are permitted inside a definition.)
158 Returns: TRUE if program continues, FALSE if end of file reached. */
160 int routine_symbol, rep_symbol;
163 begin_syntax_line(FALSE);
164 if (!internal_flag) {
165 /* An internal directive can occur in the middle of an expression or
166 object definition. So we only release for top-level directives. */
167 release_token_texts();
171 if (token_type == EOF_TT) return(FALSE);
173 if ((token_type == SEP_TT) && (token_value == HASH_SEP))
176 if ((token_type == SEP_TT) && (token_value == OPEN_SQUARE_SEP))
178 { error("It is illegal to nest routines using '#['");
182 directives.enabled = FALSE;
183 directive_keywords.enabled = FALSE;
184 segment_markers.enabled = FALSE;
186 /* The upcoming symbol is a definition; don't count it as a
187 top-level reference *to* the function. */
188 df_dont_note_global_symbols = TRUE;
190 df_dont_note_global_symbols = FALSE;
191 if (token_type != SYMBOL_TT)
192 { ebf_curtoken_error("routine name");
195 if ((!(symbols[token_value].flags & UNKNOWN_SFLAG))
196 && (!(symbols[token_value].flags & REPLACE_SFLAG)))
197 { ebf_symbol_error("routine name", token_text, typename(symbols[token_value].type), symbols[token_value].line);
201 routine_symbol = token_value;
203 rep_symbol = routine_symbol;
204 is_renamed = find_symbol_replacement(&rep_symbol);
206 if ((symbols[routine_symbol].flags & REPLACE_SFLAG)
207 && !is_renamed && (is_systemfile()))
208 { /* The function is definitely being replaced (system_file
209 always loses priority in a replacement) but is not
210 being renamed to something else. Skip its definition
212 dont_enter_into_symbol_table = TRUE;
215 } while (!((token_type == EOF_TT)
216 || ((token_type==SEP_TT)
217 && (token_value==CLOSE_SQUARE_SEP))));
218 dont_enter_into_symbol_table = FALSE;
219 if (token_type == EOF_TT) return FALSE;
222 { /* Parse the function definition and assign its symbol. */
223 assign_symbol(routine_symbol,
224 parse_routine(lexical_source, FALSE,
225 symbols[routine_symbol].name, FALSE, routine_symbol),
227 symbols[routine_symbol].line = routine_starts_line;
231 /* This function was subject to a "Replace X Y" directive.
232 The first time we see a definition for symbol X, we
233 copy it to Y -- that's the "original" form of the
235 if (symbols[rep_symbol].value == 0) {
236 assign_symbol(rep_symbol, symbols[routine_symbol].value, ROUTINE_T);
241 if ((token_type != SEP_TT) || (token_value != SEMICOLON_SEP))
242 { ebf_curtoken_error("';' after ']'");
248 if ((token_type == SYMBOL_TT) && (symbols[token_value].type == CLASS_T))
250 { error("It is illegal to nest an object in a routine using '#classname'");
253 symbols[token_value].flags |= USED_SFLAG;
254 make_object(FALSE, NULL, -1, -1, symbols[token_value].value);
258 if (token_type != DIRECTIVE_TT)
259 { /* If we're internal, we expect only a directive here. If
260 we're top-level, the possibilities are broader. */
262 ebf_curtoken_error("directive");
264 ebf_curtoken_error("directive, '[' or class name");
265 panic_mode_error_recovery();
269 return !(parse_given_directive(internal_flag));
272 /* Check what's coming up after a switch case value.
273 (This is "switch sign" in the sense of "worm sign", not like a signed
275 static int switch_sign(void)
277 if ((token_type == SEP_TT)&&(token_value == COLON_SEP)) return 1;
278 if ((token_type == SEP_TT)&&(token_value == COMMA_SEP)) return 2;
279 if ((token_type==MISC_KEYWORD_TT)&&(token_value==TO_MK)) return 3;
283 /* Info for the current switch statement. Both arrays indexed by spec_sp */
284 #define MAX_SPEC_STACK (32)
285 static assembly_operand spec_stack[MAX_SPEC_STACK];
286 static int spec_type[MAX_SPEC_STACK];
288 static void compile_alternatives_z(assembly_operand switch_value, int n,
289 int stack_level, int label, int flag)
292 assemblez_2_branch(je_zc, switch_value,
293 spec_stack[stack_level],
294 label, flag); return;
296 assemblez_3_branch(je_zc, switch_value,
297 spec_stack[stack_level], spec_stack[stack_level+1],
298 label, flag); return;
300 assemblez_4_branch(je_zc, switch_value,
301 spec_stack[stack_level], spec_stack[stack_level+1],
302 spec_stack[stack_level+2],
303 label, flag); return;
307 static void compile_alternatives_g(assembly_operand switch_value, int n,
308 int stack_level, int label, int flag)
310 int the_zc = (flag) ? jeq_gc : jne_gc;
313 assembleg_2_branch(the_zc, switch_value,
314 spec_stack[stack_level],
318 error("*** Cannot generate multi-equality tests in Glulx ***");
322 static void compile_alternatives(assembly_operand switch_value, int n,
323 int stack_level, int label, int flag)
326 compile_alternatives_z(switch_value, n, stack_level, label, flag);
328 compile_alternatives_g(switch_value, n, stack_level, label, flag);
331 static void generate_switch_spec(assembly_operand switch_value, int label, int label_after, int speccount);
333 static void parse_switch_spec(assembly_operand switch_value, int label,
336 int label_after = -1, spec_sp = 0;
338 sequence_point_follows = FALSE;
341 { if (spec_sp >= MAX_SPEC_STACK)
342 { error_fmt("At most %d values can be given in a single 'switch' case", MAX_SPEC_STACK);
343 panic_mode_error_recovery();
349 if (token_type == SQ_TT || token_type == DQ_TT) {
350 ebf_curtoken_error("action (or fake action) name");
353 spec_stack[spec_sp] = action_of_name(token_text);
355 if (spec_stack[spec_sp].value == -1)
356 { spec_stack[spec_sp].value = 0;
357 ebf_curtoken_error("action (or fake action) name");
361 spec_stack[spec_sp] =
362 code_generate(parse_expression(CONSTANT_CONTEXT), CONSTANT_CONTEXT, -1);
365 misc_keywords.enabled = TRUE;
367 misc_keywords.enabled = FALSE;
369 spec_type[spec_sp++] = switch_sign();
370 switch(spec_type[spec_sp-1])
373 ebf_curtoken_error("',' or ':'");
374 else ebf_curtoken_error("',', ':' or 'to'");
375 panic_mode_error_recovery();
377 case 1: goto GenSpecCode;
378 case 3: if (label_after == -1) label_after = next_label++;
383 generate_switch_spec(switch_value, label, label_after, spec_sp);
386 /* Generate code for a switch case. The case values are in spec_stack[]
388 static void generate_switch_spec(assembly_operand switch_value, int label, int label_after, int speccount)
391 int max_equality_args = ((!glulx_mode) ? 3 : 1);
393 sequence_point_follows = FALSE;
395 if ((speccount > max_equality_args) && (label_after == -1))
396 label_after = next_label++;
398 if (label_after == -1)
399 { compile_alternatives(switch_value, speccount, 0, label, FALSE); return;
402 for (i=0; i<speccount;)
404 j=i; while ((j<speccount) && (spec_type[j] != 3)) j++;
407 { if (j-i > max_equality_args) j=i+max_equality_args;
410 compile_alternatives(switch_value, j-i, i, label, FALSE);
412 compile_alternatives(switch_value, j-i, i, label_after, TRUE);
419 if (i == speccount - 2)
420 { assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
422 assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
426 { assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
428 assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
430 assemble_label_no(next_label++);
434 if (i == speccount - 2)
435 { assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
437 assembleg_2_branch(jgt_gc, switch_value, spec_stack[i+1],
441 { assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
443 assembleg_2_branch(jle_gc, switch_value, spec_stack[i+1],
445 assemble_label_no(next_label++);
452 assemble_label_no(label_after);
455 extern int32 parse_routine(char *source, int embedded_flag, char *name,
456 int veneer_flag, int r_symbol)
457 { int32 packed_address; int i; int debug_flag = FALSE;
458 int switch_clause_made = FALSE, default_clause_made = FALSE,
460 debug_location_beginning beginning_debug_location =
461 get_token_location_beginning();
463 /* (switch_label needs no initialisation here, but it prevents some
464 compilers from issuing warnings) */
466 if ((source != lexical_source) || (veneer_flag))
467 { lexical_source = source;
468 restart_lexer(lexical_source, name);
471 clear_local_variables();
474 { statements.enabled = TRUE;
475 dont_enter_into_symbol_table = TRUE;
477 dont_enter_into_symbol_table = FALSE;
478 if ((token_type == SEP_TT) && (token_value == TIMES_SEP)
479 && (no_locals == 0) && (!debug_flag))
480 { debug_flag = TRUE; continue;
483 if (token_type != UQ_TT)
484 { if ((token_type == SEP_TT)
485 && (token_value == SEMICOLON_SEP)) break;
486 ebf_curtoken_error("local variable name or ';'");
487 panic_mode_error_recovery();
491 if (no_locals == MAX_LOCAL_VARIABLES-1)
492 { error_fmt("Too many local variables for a routine; max is %d",
493 MAX_LOCAL_VARIABLES-1);
494 panic_mode_error_recovery();
498 for (i=0;i<no_locals;i++) {
499 if (strcmpcis(token_text, get_local_variable_name(i))==0)
500 error_named("Local variable defined twice:", token_text);
502 add_local_variable(token_text);
505 /* Set up the local variable hash and the local_variables.keywords
507 construct_local_variable_tables();
509 if ((trace_fns_setting==3)
510 || ((trace_fns_setting==2) && (veneer_mode==FALSE))
511 || ((trace_fns_setting==1) && (is_systemfile()==FALSE)))
513 if ((embedded_flag == FALSE) && (veneer_mode == FALSE) && debug_flag)
514 symbols[r_symbol].flags |= STAR_SFLAG;
516 packed_address = assemble_routine_header(debug_flag,
517 name, embedded_flag, r_symbol);
520 { begin_syntax_line(TRUE);
521 release_token_texts();
524 if (token_type == EOF_TT)
525 { ebf_curtoken_error("']'");
528 get_token_location_end(beginning_debug_location));
533 if ((token_type == SEP_TT)
534 && (token_value == CLOSE_SQUARE_SEP))
535 { if (switch_clause_made && (!default_clause_made))
536 assemble_label_no(switch_label);
537 directives.enabled = TRUE;
538 sequence_point_follows = TRUE;
542 get_token_location_end(beginning_debug_location));
547 if ((token_type == STATEMENT_TT) && (token_value == SDEFAULT_CODE))
548 { if (default_clause_made)
549 error("Multiple 'default' clauses defined in same 'switch'");
550 default_clause_made = TRUE;
552 if (switch_clause_made)
553 { if (!execution_never_reaches_here)
554 { sequence_point_follows = FALSE;
556 assemblez_0((embedded_flag)?rfalse_zc:rtrue_zc);
558 assembleg_1(return_gc,
559 ((embedded_flag)?zero_operand:one_operand));
561 assemble_label_no(switch_label);
563 switch_clause_made = TRUE;
566 if ((token_type == SEP_TT) &&
567 (token_value == COLON_SEP)) continue;
568 ebf_curtoken_error("':' after 'default'");
569 panic_mode_error_recovery();
573 /* Only check for the form of a case switch if the initial token
574 isn't double-quoted text, as that would mean it was a print_ret
575 statement: this is a mild ambiguity in the grammar.
576 Action statements also cannot be cases.
577 We don't try to handle parenthesized expressions as cases
580 if ((token_type != DQ_TT) && (token_type != SEP_TT))
582 if (switch_sign() > 0)
583 { assembly_operand AO;
584 if (default_clause_made)
585 error("'default' must be the last 'switch' case");
587 if (switch_clause_made)
588 { if (!execution_never_reaches_here)
589 { sequence_point_follows = FALSE;
591 assemblez_0((embedded_flag)?rfalse_zc:rtrue_zc);
593 assembleg_1(return_gc,
594 ((embedded_flag)?zero_operand:one_operand));
596 assemble_label_no(switch_label);
599 switch_label = next_label++;
600 switch_clause_made = TRUE;
601 put_token_back(); put_token_back();
604 INITAOTV(&AO, VARIABLE_OT, 249);
607 INITAOTV(&AO, GLOBALVAR_OT, MAX_LOCAL_VARIABLES+6); /* sw__var */
609 parse_switch_spec(AO, switch_label, TRUE);
614 { put_token_back(); put_token_back(); get_next_token();
615 sequence_point_follows = TRUE;
619 parse_statement(-1, -1);
623 return packed_address;
626 /* Parse one block of code (a statement or brace-delimited stanza).
627 This is used by the IF, DO, FOR, OBJECTLOOP, SWITCH, and WHILE
629 (Note that this is *not* called by the top-level parse_routine()
631 The break_label and continue_label arguments are the labels in
632 the calling block to jump to on "break" or "continue". -1 means
633 we can't "break"/"continue" here (because we're not in a loop/switch).
634 If switch_rule is true, we're in a switch block; case labels are
637 extern void parse_code_block(int break_label, int continue_label,
639 { int switch_clause_made = FALSE, default_clause_made = FALSE, switch_label = 0;
640 int unary_minus_flag, saved_entire_flag;
642 saved_entire_flag = (execution_never_reaches_here & EXECSTATE_ENTIRE);
643 if (execution_never_reaches_here)
644 execution_never_reaches_here |= EXECSTATE_ENTIRE;
646 begin_syntax_line(TRUE);
647 release_token_texts();
650 if (token_type == SEP_TT && token_value == OPEN_BRACE_SEP)
652 /* Parse a braced stanza of statements. */
654 { begin_syntax_line(TRUE);
655 release_token_texts();
658 if ((token_type == SEP_TT) && (token_value == HASH_SEP))
659 { parse_directive(TRUE);
662 if (token_type == SEP_TT && token_value == CLOSE_BRACE_SEP)
663 { if (switch_clause_made && (!default_clause_made))
664 assemble_label_no(switch_label);
667 if (token_type == EOF_TT)
668 { ebf_curtoken_error("'}'");
672 if (switch_rule != 0)
674 /* Within a 'switch' block */
676 if ((token_type==STATEMENT_TT)&&(token_value==SDEFAULT_CODE))
677 { if (default_clause_made)
678 error("Multiple 'default' clauses defined in same 'switch'");
679 default_clause_made = TRUE;
681 if (switch_clause_made)
682 { if (!execution_never_reaches_here)
683 { sequence_point_follows = FALSE;
684 assemble_jump(break_label);
686 assemble_label_no(switch_label);
688 switch_clause_made = TRUE;
691 if ((token_type == SEP_TT) &&
692 (token_value == COLON_SEP)) continue;
693 ebf_curtoken_error("':' after 'default'");
694 panic_mode_error_recovery();
698 /* Decide: is this an ordinary statement, or the start
701 /* Again, double-quoted text is a print_ret statement. */
702 if (token_type == DQ_TT) goto NotASwitchCase;
704 if ((token_type == SEP_TT)&&(token_value == OPENB_SEP)) {
705 /* An open-paren means we need to parse a full
710 AO = parse_expression(VOID_CONTEXT);
711 /* If this expression is followed by a colon, we'll
712 handle it as a switch case. */
713 constcount = test_constant_op_list(&AO, spec_stack, MAX_SPEC_STACK);
714 if ((token_type == SEP_TT)&&(token_value == COLON_SEP)) {
719 ebf_error("constant", "<expression>");
720 panic_mode_error_recovery();
724 if (constcount > MAX_SPEC_STACK)
725 { error_fmt("At most %d values can be given in a single 'switch' case", MAX_SPEC_STACK);
726 panic_mode_error_recovery();
731 /* Gotta fill in the spec_type values for the
733 for (ix=0; ix<constcount-1; ix++)
734 spec_type[ix] = 2; /* comma */
735 spec_type[constcount-1] = 1; /* colon */
737 /* The rest of this is parallel to the
738 parse_switch_spec() case below. */
739 /* Before you ask: yes, the spec_stacks values
740 appear in the reverse order from how
741 parse_switch_spec() would do it. The results
742 are the same because we're just comparing
743 temp_var1 with a bunch of constants. */
744 if (default_clause_made)
745 error("'default' must be the last 'switch' case");
747 if (switch_clause_made)
748 { if (!execution_never_reaches_here)
749 { sequence_point_follows = FALSE;
750 assemble_jump(break_label);
752 assemble_label_no(switch_label);
755 switch_label = next_label++;
756 switch_clause_made = TRUE;
759 generate_switch_spec(AO, switch_label, -1, constcount);
763 /* Otherwise, treat this as a statement. Imagine
764 we've jumped down to NotASwitchCase, except that
765 we have the expression AO already parsed. */
766 sequence_point_follows = TRUE;
767 parse_statement_singleexpr(AO);
772 = ((token_type == SEP_TT)&&(token_value == MINUS_SEP));
773 if (unary_minus_flag) get_next_token();
775 /* Now read the token _after_ any possible constant:
776 if that's a 'to', ',' or ':' then we have a case */
778 misc_keywords.enabled = TRUE;
780 misc_keywords.enabled = FALSE;
782 if (switch_sign() > 0)
783 { assembly_operand AO;
785 if (default_clause_made)
786 error("'default' must be the last 'switch' case");
788 if (switch_clause_made)
789 { if (!execution_never_reaches_here)
790 { sequence_point_follows = FALSE;
791 assemble_jump(break_label);
793 assemble_label_no(switch_label);
796 switch_label = next_label++;
797 switch_clause_made = TRUE;
798 put_token_back(); put_token_back();
799 if (unary_minus_flag) put_token_back();
802 parse_switch_spec(AO, switch_label, FALSE);
806 { put_token_back(); put_token_back();
807 if (unary_minus_flag) put_token_back();
812 if ((switch_rule != 0) && (!switch_clause_made))
813 ebf_curtoken_error("switch value");
816 sequence_point_follows = TRUE;
817 parse_statement(break_label, continue_label);
822 if (switch_rule != 0)
823 ebf_curtoken_error("braced code block after 'switch'");
825 /* Parse a single statement. */
826 parse_statement(break_label, continue_label);
829 if (saved_entire_flag)
830 execution_never_reaches_here |= EXECSTATE_ENTIRE;
832 execution_never_reaches_here &= ~EXECSTATE_ENTIRE;
835 /* ========================================================================= */
836 /* Data structure management routines */
837 /* ------------------------------------------------------------------------- */
839 extern void init_syntax_vars(void)
843 extern void syntax_begin_pass(void)
844 { no_syntax_lines = 0;
847 extern void syntax_allocate_arrays(void)
851 extern void syntax_free_arrays(void)
855 /* ========================================================================= */