Update to commit af5309356bfa197d7a7ea09101c317f94e9b856b
[inform.git] / src / syntax.c
1 /* ------------------------------------------------------------------------- */
2 /*   "syntax" : Syntax analyser and compiler                                 */
3 /*                                                                           */
4 /*   Part of Inform 6.35                                                     */
5 /*   copyright (c) Graham Nelson 1993 - 2021                                 */
6 /*                                                                           */
7 /* Inform is free software: you can redistribute it and/or modify            */
8 /* it under the terms of the GNU General Public License as published by      */
9 /* the Free Software Foundation, either version 3 of the License, or         */
10 /* (at your option) any later version.                                       */
11 /*                                                                           */
12 /* Inform is distributed in the hope that it will be useful,                 */
13 /* but WITHOUT ANY WARRANTY; without even the implied warranty of            */
14 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the              */
15 /* GNU General Public License for more details.                              */
16 /*                                                                           */
17 /* You should have received a copy of the GNU General Public License         */
18 /* along with Inform. If not, see https://gnu.org/licenses/                  *
19 /*                                                                           */
20 /* ------------------------------------------------------------------------- */
21
22 #include "header.h"
23
24 static char *lexical_source;
25
26 int no_syntax_lines;                                  /*  Syntax line count  */
27
28 static void begin_syntax_line(int statement_mode)
29 {   no_syntax_lines++;
30     next_token_begins_syntax_line = TRUE;
31
32     clear_expression_space();
33     if (statement_mode)
34     {   statements.enabled = TRUE;
35         conditions.enabled = TRUE;
36         local_variables.enabled = TRUE;
37         system_functions.enabled = TRUE;
38
39         misc_keywords.enabled = FALSE;
40         directive_keywords.enabled = FALSE;
41         directives.enabled = FALSE;
42         segment_markers.enabled = FALSE;
43         opcode_names.enabled = FALSE;
44     }
45     else
46     {   directives.enabled = TRUE;
47         segment_markers.enabled = TRUE;
48
49         statements.enabled = FALSE;
50         misc_keywords.enabled = FALSE;
51         directive_keywords.enabled = FALSE;
52         local_variables.enabled = FALSE;
53         system_functions.enabled = FALSE;
54         conditions.enabled = FALSE;
55         opcode_names.enabled = FALSE;
56     }
57
58     sequence_point_follows = TRUE;
59
60     if (debugfile_switch)
61     {   get_next_token();
62         statement_debug_location = get_token_location();
63         put_token_back();
64     }
65 }
66
67 extern void panic_mode_error_recovery(void)
68 {
69     /* Consume tokens until the next semicolon (or end of file).
70        This is typically called after a syntax error, in hopes of
71        getting parsing back on track. */
72
73     while ((token_type != EOF_TT)
74            && ((token_type != SEP_TT)||(token_value != SEMICOLON_SEP)))
75
76         get_next_token();
77 }
78
79 extern void get_next_token_with_directives(void)
80 {
81     /* A higher-level version of get_next_token(), which detects and
82        obeys directives such as #ifdef/#ifnot/#endif. (The # sign is
83        required in this case.)
84
85        This is called while parsing a long construct, such as Class or
86        Object, where we want to support internal #ifdefs. (Although
87        function-parsing predates this and doesn't make use of it.) */
88
89     int directives_save, segment_markers_save, statements_save;
90
91     while (TRUE)
92     {
93         get_next_token();
94
95         /* If the first token is not a '#', return it directly. */
96         if ((token_type != SEP_TT) || (token_value != HASH_SEP))
97             return;
98
99         /* Save the lexer flags, and set up for directive parsing. */
100         directives_save = directives.enabled;
101         segment_markers_save = segment_markers.enabled;
102         statements_save = statements.enabled;
103
104         directives.enabled = TRUE;
105         segment_markers.enabled = FALSE;
106         statements.enabled = FALSE;
107         conditions.enabled = FALSE;
108         local_variables.enabled = FALSE;
109         misc_keywords.enabled = FALSE;
110         system_functions.enabled = FALSE;
111
112         get_next_token();
113
114         if ((token_type == SEP_TT) && (token_value == OPEN_SQUARE_SEP))
115         {   error("It is illegal to nest a routine inside an object using '#['");
116             return;
117         }
118
119         if (token_type == DIRECTIVE_TT)
120             parse_given_directive(TRUE);
121         else
122         {   ebf_error("directive", token_text);
123             return;
124         }
125
126         /* Restore all the lexer flags. (We are squashing several of them
127            into a single save variable, which I think is safe because that's
128            what CKnight did.)
129         */
130         directive_keywords.enabled = FALSE;
131         directives.enabled = directives_save;
132         segment_markers.enabled = segment_markers_save;
133         statements.enabled =
134             conditions.enabled =
135             local_variables.enabled =
136             misc_keywords.enabled = 
137             system_functions.enabled = statements_save;
138     }
139 }
140
141 extern void parse_program(char *source)
142 {
143     lexical_source = source;
144     while (parse_directive(FALSE)) ;
145 }
146
147 extern int parse_directive(int internal_flag)
148 {
149     /*  Internal_flag is FALSE if the directive is encountered normally,
150         TRUE if encountered with a # prefix inside a routine or object
151         definition.
152
153         (Only directives like #ifdef are permitted inside a definition.)
154
155         Returns: TRUE if program continues, FALSE if end of file reached.    */
156
157     int routine_symbol, rep_symbol;
158     int is_renamed;
159
160     begin_syntax_line(FALSE);
161     get_next_token();
162
163     if (token_type == EOF_TT) return(FALSE);
164
165     if ((token_type == SEP_TT) && (token_value == HASH_SEP))
166         get_next_token();
167
168     if ((token_type == SEP_TT) && (token_value == OPEN_SQUARE_SEP))
169     {   if (internal_flag)
170         {   error("It is illegal to nest routines using '#['");
171             return(TRUE);
172         }
173
174         directives.enabled = FALSE;
175         directive_keywords.enabled = FALSE;
176         segment_markers.enabled = FALSE;
177
178         /* The upcoming symbol is a definition; don't count it as a
179            top-level reference *to* the function. */
180         df_dont_note_global_symbols = TRUE;
181         get_next_token();
182         df_dont_note_global_symbols = FALSE;
183         if (token_type != SYMBOL_TT)
184         {   ebf_error("routine name", token_text);
185             return(FALSE);
186         }
187         if ((!(sflags[token_value] & UNKNOWN_SFLAG))
188             && (!(sflags[token_value] & REPLACE_SFLAG)))
189         {   ebf_symbol_error("routine name", token_text, typename(stypes[token_value]), slines[token_value]);
190             return(FALSE);
191         }
192
193         routine_symbol = token_value;
194
195         rep_symbol = routine_symbol;
196         is_renamed = find_symbol_replacement(&rep_symbol);
197
198         if ((sflags[routine_symbol] & REPLACE_SFLAG) 
199             && !is_renamed && (is_systemfile()))
200         {   /* The function is definitely being replaced (system_file
201                always loses priority in a replacement) but is not
202                being renamed to something else. Skip its definition
203                entirely. */
204             dont_enter_into_symbol_table = TRUE;
205             do
206             {   get_next_token();
207             } while (!((token_type == EOF_TT)
208                      || ((token_type==SEP_TT)
209                          && (token_value==CLOSE_SQUARE_SEP))));
210             dont_enter_into_symbol_table = FALSE;
211             if (token_type == EOF_TT) return FALSE;
212         }
213         else
214         {   /* Parse the function definition and assign its symbol. */
215             assign_symbol(routine_symbol,
216                 parse_routine(lexical_source, FALSE,
217                     (char *) symbs[routine_symbol], FALSE, routine_symbol),
218                 ROUTINE_T);
219             slines[routine_symbol] = routine_starts_line;
220         }
221
222         if (is_renamed) {
223             /* This function was subject to a "Replace X Y" directive.
224                The first time we see a definition for symbol X, we
225                copy it to Y -- that's the "original" form of the
226                function. */
227             if (svals[rep_symbol] == 0) {
228                 assign_symbol(rep_symbol, svals[routine_symbol], ROUTINE_T);
229             }
230         }
231
232         get_next_token();
233         if ((token_type != SEP_TT) || (token_value != SEMICOLON_SEP))
234         {   ebf_error("';' after ']'", token_text);
235             put_token_back();
236         }
237         return TRUE;
238     }
239
240     if ((token_type == SYMBOL_TT) && (stypes[token_value] == CLASS_T))
241     {   if (internal_flag)
242         {   error("It is illegal to nest an object in a routine using '#classname'");
243             return(TRUE);
244         }
245         sflags[token_value] |= USED_SFLAG;
246         make_object(FALSE, NULL, -1, -1, svals[token_value]);
247         return TRUE;
248     }
249
250     if (token_type != DIRECTIVE_TT)
251     {   /* If we're internal, we expect only a directive here. If
252            we're top-level, the possibilities are broader. */
253         if (internal_flag)
254             ebf_error("directive", token_text);
255         else
256             ebf_error("directive, '[' or class name", token_text);
257         panic_mode_error_recovery();
258         return TRUE;
259     }
260
261     return !(parse_given_directive(internal_flag));
262 }
263
264 static int switch_sign(void)
265 {
266     if ((token_type == SEP_TT)&&(token_value == COLON_SEP))   return 1;
267     if ((token_type == SEP_TT)&&(token_value == COMMA_SEP))   return 2;
268     if ((token_type==MISC_KEYWORD_TT)&&(token_value==TO_MK))  return 3;
269     return 0;
270 }
271
272 static assembly_operand spec_stack[32];
273 static int spec_type[32];
274
275 static void compile_alternatives_z(assembly_operand switch_value, int n,
276     int stack_level, int label, int flag)
277 {   switch(n)
278     {   case 1:
279             assemblez_2_branch(je_zc, switch_value,
280                 spec_stack[stack_level],
281                 label, flag); return;
282         case 2:
283             assemblez_3_branch(je_zc, switch_value,
284                 spec_stack[stack_level], spec_stack[stack_level+1],
285                 label, flag); return;
286         case 3:
287             assemblez_4_branch(je_zc, switch_value,
288                 spec_stack[stack_level], spec_stack[stack_level+1],
289                 spec_stack[stack_level+2],
290                 label, flag); return;
291     }
292 }
293
294 static void compile_alternatives_g(assembly_operand switch_value, int n,
295     int stack_level, int label, int flag)
296 {   
297     int the_zc = (flag) ? jeq_gc : jne_gc;
298
299     if (n == 1) {
300       assembleg_2_branch(the_zc, switch_value,
301         spec_stack[stack_level],
302         label); 
303     }
304     else {
305       error("*** Cannot generate multi-equality tests in Glulx ***");
306     }
307 }
308
309 static void compile_alternatives(assembly_operand switch_value, int n,
310     int stack_level, int label, int flag)
311 {
312   if (!glulx_mode)
313     compile_alternatives_z(switch_value, n, stack_level, label, flag);
314   else
315     compile_alternatives_g(switch_value, n, stack_level, label, flag);
316 }
317
318 static void parse_switch_spec(assembly_operand switch_value, int label,
319     int action_switch)
320 {
321     int i, j, label_after = -1, spec_sp = 0;
322     int max_equality_args = ((!glulx_mode) ? 3 : 1);
323
324     sequence_point_follows = FALSE;
325
326     do
327     {   if (spec_sp == 32)
328         {   error("At most 32 values can be given in a single 'switch' case");
329             panic_mode_error_recovery();
330             return;
331         }
332
333         if (action_switch)
334         {   get_next_token();
335             if (token_type == SQ_TT || token_type == DQ_TT) {
336                 ebf_error("action (or fake action) name", token_text);
337                 continue;
338             }
339             spec_stack[spec_sp] = action_of_name(token_text);
340
341             if (spec_stack[spec_sp].value == -1)
342             {   spec_stack[spec_sp].value = 0;
343                 ebf_error("action (or fake action) name", token_text);
344             }
345         }
346         else
347             spec_stack[spec_sp] =
348       code_generate(parse_expression(CONSTANT_CONTEXT), CONSTANT_CONTEXT, -1);
349
350         misc_keywords.enabled = TRUE;
351         get_next_token();
352         misc_keywords.enabled = FALSE;
353
354         spec_type[spec_sp++] = switch_sign();
355         switch(spec_type[spec_sp-1])
356         {   case 0:
357                 if (action_switch)
358                     ebf_error("',' or ':'", token_text);
359                 else ebf_error("',', ':' or 'to'", token_text);
360                 panic_mode_error_recovery();
361                 return;
362             case 1: goto GenSpecCode;
363             case 3: if (label_after == -1) label_after = next_label++;
364         }
365      } while(TRUE);
366
367      GenSpecCode:
368
369      if ((spec_sp > max_equality_args) && (label_after == -1))
370          label_after = next_label++;
371
372      if (label_after == -1)
373      {   compile_alternatives(switch_value, spec_sp, 0, label, FALSE); return;
374      }
375
376      for (i=0; i<spec_sp;)
377      {
378          j=i; while ((j<spec_sp) && (spec_type[j] != 3)) j++;
379
380          if (j > i)
381          {   if (j-i > max_equality_args) j=i+max_equality_args;
382
383              if (j == spec_sp)
384                  compile_alternatives(switch_value, j-i, i, label, FALSE);
385              else
386                  compile_alternatives(switch_value, j-i, i, label_after, TRUE);
387
388              i=j;
389          }
390          else
391          {   
392            if (!glulx_mode) {
393              if (i == spec_sp - 2)
394              {   assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
395                      label, TRUE);
396                  assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
397                      label, TRUE);
398              }
399              else
400              {   assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
401                      next_label, TRUE);
402                  assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
403                      label_after, FALSE);
404                  assemble_label_no(next_label++);
405              }
406            }
407            else {
408              if (i == spec_sp - 2)
409              {   assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
410                      label);
411                  assembleg_2_branch(jgt_gc, switch_value, spec_stack[i+1],
412                      label);
413              }
414              else
415              {   assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
416                      next_label);
417                  assembleg_2_branch(jle_gc, switch_value, spec_stack[i+1],
418                      label_after);
419                  assemble_label_no(next_label++);
420              }
421            }
422            i = i+2;
423          }
424      }
425
426      assemble_label_no(label_after);
427 }
428
429 extern int32 parse_routine(char *source, int embedded_flag, char *name,
430     int veneer_flag, int r_symbol)
431 {   int32 packed_address; int i; int debug_flag = FALSE;
432     int switch_clause_made = FALSE, default_clause_made = FALSE,
433         switch_label = 0;
434     debug_location_beginning beginning_debug_location =
435         get_token_location_beginning();
436
437     /*  (switch_label needs no initialisation here, but it prevents some
438         compilers from issuing warnings)   */
439
440     if ((source != lexical_source) || (veneer_flag))
441     {   lexical_source = source;
442         restart_lexer(lexical_source, name);
443     }
444
445     no_locals = 0;
446
447     for (i=0;i<MAX_LOCAL_VARIABLES-1;i++) local_variables.keywords[i] = "";
448
449     do
450     {   statements.enabled = TRUE;
451         dont_enter_into_symbol_table = TRUE;
452         get_next_token();
453         dont_enter_into_symbol_table = FALSE;
454         if ((token_type == SEP_TT) && (token_value == TIMES_SEP)
455             && (no_locals == 0) && (!debug_flag))
456         {   debug_flag = TRUE; continue;
457         }
458
459         if (token_type != DQ_TT)
460         {   if ((token_type == SEP_TT)
461                 && (token_value == SEMICOLON_SEP)) break;
462             ebf_error("local variable name or ';'", token_text);
463             panic_mode_error_recovery();
464             break;
465         }
466
467         if (strlen(token_text) > MAX_IDENTIFIER_LENGTH)
468         {   error_named("Local variable identifier too long:", token_text);
469             panic_mode_error_recovery();
470             break;
471         }
472
473         if (no_locals == MAX_LOCAL_VARIABLES-1)
474         {   error_numbered("Too many local variables for a routine; max is",
475                 MAX_LOCAL_VARIABLES-1);
476             panic_mode_error_recovery();
477             break;
478         }
479
480         for (i=0;i<no_locals;i++)
481             if (strcmpcis(token_text, local_variables.keywords[i])==0)
482                 error_named("Local variable defined twice:", token_text);
483         local_variables.keywords[no_locals++] = token_text;
484     } while(TRUE);
485
486     construct_local_variable_tables();
487
488     if ((trace_fns_setting==3)
489         || ((trace_fns_setting==2) && (veneer_mode==FALSE))
490         || ((trace_fns_setting==1) && (is_systemfile()==FALSE)))
491         debug_flag = TRUE;
492     if ((embedded_flag == FALSE) && (veneer_mode == FALSE) && debug_flag)
493         sflags[r_symbol] |= STAR_SFLAG;
494
495     packed_address = assemble_routine_header(no_locals, debug_flag,
496         name, embedded_flag, r_symbol);
497
498     do
499     {   begin_syntax_line(TRUE);
500
501         get_next_token();
502
503         if (token_type == EOF_TT)
504         {   ebf_error("']'", token_text);
505             assemble_routine_end
506                 (embedded_flag,
507                  get_token_location_end(beginning_debug_location));
508             put_token_back();
509             break;
510         }
511
512         if ((token_type == SEP_TT)
513             && (token_value == CLOSE_SQUARE_SEP))
514         {   if (switch_clause_made && (!default_clause_made))
515                 assemble_label_no(switch_label);
516             directives.enabled = TRUE;
517             sequence_point_follows = TRUE;
518             get_next_token();
519             assemble_routine_end
520                 (embedded_flag,
521                  get_token_location_end(beginning_debug_location));
522             put_token_back();
523             break;
524         }
525
526         if ((token_type == STATEMENT_TT) && (token_value == SDEFAULT_CODE))
527         {   if (default_clause_made)
528                 error("Multiple 'default' clauses defined in same 'switch'");
529             default_clause_made = TRUE;
530
531             if (switch_clause_made)
532             {   if (!execution_never_reaches_here)
533                 {   sequence_point_follows = FALSE;
534                     if (!glulx_mode)
535                         assemblez_0((embedded_flag)?rfalse_zc:rtrue_zc);
536                     else
537                         assembleg_1(return_gc, 
538                             ((embedded_flag)?zero_operand:one_operand));
539                 }
540                 assemble_label_no(switch_label);
541             }
542             switch_clause_made = TRUE;
543
544             get_next_token();
545             if ((token_type == SEP_TT) &&
546                 (token_value == COLON_SEP)) continue;
547             ebf_error("':' after 'default'", token_text);
548             panic_mode_error_recovery();
549             continue;
550         }
551
552         /*  Only check for the form of a case switch if the initial token
553             isn't double-quoted text, as that would mean it was a print_ret
554             statement: this is a mild ambiguity in the grammar. 
555             Action statements also cannot be cases. */
556
557         if ((token_type != DQ_TT) && (token_type != SEP_TT))
558         {   get_next_token();
559             if (switch_sign() > 0)
560             {   assembly_operand AO;
561                 if (default_clause_made)
562                     error("'default' must be the last 'switch' case");
563
564                 if (switch_clause_made)
565                 {   if (!execution_never_reaches_here)
566                     {   sequence_point_follows = FALSE;
567                         if (!glulx_mode)
568                             assemblez_0((embedded_flag)?rfalse_zc:rtrue_zc);
569                         else
570                             assembleg_1(return_gc, 
571                                 ((embedded_flag)?zero_operand:one_operand));
572                     }
573                     assemble_label_no(switch_label);
574                 }
575
576                 switch_label = next_label++;
577                 switch_clause_made = TRUE;
578                 put_token_back(); put_token_back();
579
580                 if (!glulx_mode) {
581                     INITAOTV(&AO, VARIABLE_OT, 249);
582                 }
583                 else {
584                     INITAOTV(&AO, GLOBALVAR_OT, MAX_LOCAL_VARIABLES+6); /* sw__var */
585                 }
586                 parse_switch_spec(AO, switch_label, TRUE);
587
588                 continue;
589             }
590             else
591             {   put_token_back(); put_token_back(); get_next_token();
592                 sequence_point_follows = TRUE;
593             }
594         }
595
596         parse_statement(-1, -1);
597
598     } while (TRUE);
599
600     return packed_address;
601 }
602
603 extern void parse_code_block(int break_label, int continue_label,
604     int switch_rule)
605 {   int switch_clause_made = FALSE, default_clause_made = FALSE, switch_label = 0,
606         unary_minus_flag;
607
608     begin_syntax_line(TRUE);
609     get_next_token();
610
611     if (token_type == SEP_TT && token_value == OPEN_BRACE_SEP)
612     {   do
613         {   begin_syntax_line(TRUE);
614             get_next_token();
615             
616             if ((token_type == SEP_TT) && (token_value == HASH_SEP))
617             {   parse_directive(TRUE);
618                 continue;
619             }
620             if (token_type == SEP_TT && token_value == CLOSE_BRACE_SEP)
621             {   if (switch_clause_made && (!default_clause_made))
622                     assemble_label_no(switch_label);
623                 return;
624             }
625             if (token_type == EOF_TT)
626             {   ebf_error("'}'", token_text); return; }
627
628             if (switch_rule != 0)
629             {
630                 /*  Within a 'switch' block  */
631
632                 if ((token_type==STATEMENT_TT)&&(token_value==SDEFAULT_CODE))
633                 {   if (default_clause_made)
634                 error("Multiple 'default' clauses defined in same 'switch'");
635                     default_clause_made = TRUE;
636
637                     if (switch_clause_made)
638                     {   if (!execution_never_reaches_here)
639                         {   sequence_point_follows = FALSE;
640                             assemble_jump(break_label);
641                         }
642                         assemble_label_no(switch_label);
643                     }
644                     switch_clause_made = TRUE;
645
646                     get_next_token();
647                     if ((token_type == SEP_TT) &&
648                         (token_value == COLON_SEP)) continue;
649                     ebf_error("':' after 'default'", token_text);
650                     panic_mode_error_recovery();
651                     continue;
652                 }
653
654                 /*  Decide: is this an ordinary statement, or the start
655                     of a new case?  */
656
657                 if (token_type == DQ_TT) goto NotASwitchCase;
658
659                 unary_minus_flag
660                     = ((token_type == SEP_TT)&&(token_value == MINUS_SEP));
661                 if (unary_minus_flag) get_next_token();
662
663                 /*  Now read the token _after_ any possible constant:
664                     if that's a 'to', ',' or ':' then we have a case  */
665
666                 misc_keywords.enabled = TRUE;
667                 get_next_token();
668                 misc_keywords.enabled = FALSE;
669
670                 if (switch_sign() > 0)
671                 {   assembly_operand AO;
672
673                     if (default_clause_made)
674                         error("'default' must be the last 'switch' case");
675
676                     if (switch_clause_made)
677                     {   if (!execution_never_reaches_here)
678                         {   sequence_point_follows = FALSE;
679                             assemble_jump(break_label);
680                         }
681                         assemble_label_no(switch_label);
682                     }
683
684                     switch_label = next_label++;
685                     switch_clause_made = TRUE;
686                     put_token_back(); put_token_back();
687                     if (unary_minus_flag) put_token_back();
688
689                     AO = temp_var1;
690                     parse_switch_spec(AO, switch_label, FALSE);
691                     continue;
692                 }
693                 else
694                 {   put_token_back(); put_token_back();
695                     if (unary_minus_flag) put_token_back();
696                     get_next_token();
697                 }
698             }
699
700             if ((switch_rule != 0) && (!switch_clause_made))
701                 ebf_error("switch value", token_text);
702
703             NotASwitchCase:
704             sequence_point_follows = TRUE;
705             parse_statement(break_label, continue_label);
706         }
707         while(TRUE);
708     }
709
710     if (switch_rule != 0)
711         ebf_error("braced code block after 'switch'", token_text);
712
713     parse_statement(break_label, continue_label);
714     return;
715 }
716
717 /* ========================================================================= */
718 /*   Data structure management routines                                      */
719 /* ------------------------------------------------------------------------- */
720
721 extern void init_syntax_vars(void)
722 {
723 }
724
725 extern void syntax_begin_pass(void)
726 {   no_syntax_lines = 0;
727 }
728
729 extern void syntax_allocate_arrays(void)
730 {
731 }
732
733 extern void syntax_free_arrays(void)
734 {
735 }
736
737 /* ========================================================================= */