Update to Inform v6.42
[inform.git] / src / syntax.c
index ad7e121e62ffa0d2f83519fdb52016564b6b1d56..cae1bc47a087d2ee6aa7bedc7ba4d525dd865f81 100644 (file)
@@ -1,8 +1,8 @@
 /* ------------------------------------------------------------------------- */
 /*   "syntax" : Syntax analyser and compiler                                 */
 /*                                                                           */
-/*   Part of Inform 6.41                                                     */
-/*   copyright (c) Graham Nelson 1993 - 2022                                 */
+/*   Part of Inform 6.42                                                     */
+/*   copyright (c) Graham Nelson 1993 - 2024                                 */
 /*                                                                           */
 /* Inform is free software: you can redistribute it and/or modify            */
 /* it under the terms of the GNU General Public License as published by      */
@@ -86,10 +86,12 @@ extern void get_next_token_with_directives(void)
        Object, where we want to support internal #ifdefs. (Although
        function-parsing predates this and doesn't make use of it.) */
 
-    int directives_save, segment_markers_save, statements_save;
-
     while (TRUE)
     {
+        int directives_save, segment_markers_save, statements_save,
+            conditions_save, local_variables_save, misc_keywords_save,
+            system_functions_save;
+
         get_next_token();
 
         /* If the first token is not a '#', return it directly. */
@@ -100,6 +102,10 @@ extern void get_next_token_with_directives(void)
         directives_save = directives.enabled;
         segment_markers_save = segment_markers.enabled;
         statements_save = statements.enabled;
+        conditions_save = conditions.enabled;
+        local_variables_save = local_variables.enabled;
+        misc_keywords_save = misc_keywords.enabled;
+        system_functions_save = system_functions.enabled;
 
         directives.enabled = TRUE;
         segment_markers.enabled = FALSE;
@@ -119,22 +125,19 @@ extern void get_next_token_with_directives(void)
         if (token_type == DIRECTIVE_TT)
             parse_given_directive(TRUE);
         else
-        {   ebf_error("directive", token_text);
+        {   ebf_curtoken_error("directive");
             return;
         }
 
-        /* Restore all the lexer flags. (We are squashing several of them
-           into a single save variable, which I think is safe because that's
-           what CKnight did.)
-        */
+        /* Restore all the lexer flags. */
         directive_keywords.enabled = FALSE;
         directives.enabled = directives_save;
         segment_markers.enabled = segment_markers_save;
-        statements.enabled =
-            conditions.enabled =
-            local_variables.enabled =
-            misc_keywords.enabled = 
-            system_functions.enabled = statements_save;
+        statements.enabled = statements_save;
+        conditions.enabled = conditions_save;
+        local_variables.enabled = local_variables_save;
+        misc_keywords.enabled = misc_keywords_save; 
+        system_functions.enabled = system_functions_save;
     }
 }
 
@@ -186,7 +189,7 @@ extern int parse_directive(int internal_flag)
         get_next_token();
         df_dont_note_global_symbols = FALSE;
         if (token_type != SYMBOL_TT)
-        {   ebf_error("routine name", token_text);
+        {   ebf_curtoken_error("routine name");
             return(FALSE);
         }
         if ((!(symbols[token_value].flags & UNKNOWN_SFLAG))
@@ -236,7 +239,7 @@ extern int parse_directive(int internal_flag)
 
         get_next_token();
         if ((token_type != SEP_TT) || (token_value != SEMICOLON_SEP))
-        {   ebf_error("';' after ']'", token_text);
+        {   ebf_curtoken_error("';' after ']'");
             put_token_back();
         }
         return TRUE;
@@ -256,9 +259,9 @@ extern int parse_directive(int internal_flag)
     {   /* If we're internal, we expect only a directive here. If
            we're top-level, the possibilities are broader. */
         if (internal_flag)
-            ebf_error("directive", token_text);
+            ebf_curtoken_error("directive");
         else
-            ebf_error("directive, '[' or class name", token_text);
+            ebf_curtoken_error("directive, '[' or class name");
         panic_mode_error_recovery();
         return TRUE;
     }
@@ -266,7 +269,9 @@ extern int parse_directive(int internal_flag)
     return !(parse_given_directive(internal_flag));
 }
 
-/* Check what's coming up after a switch case value. */
+/* Check what's coming up after a switch case value.
+   (This is "switch sign" in the sense of "worm sign", not like a signed
+   variable.) */
 static int switch_sign(void)
 {
     if ((token_type == SEP_TT)&&(token_value == COLON_SEP))   return 1;
@@ -323,17 +328,18 @@ static void compile_alternatives(assembly_operand switch_value, int n,
     compile_alternatives_g(switch_value, n, stack_level, label, flag);
 }
 
+static void generate_switch_spec(assembly_operand switch_value, int label, int label_after, int speccount);
+
 static void parse_switch_spec(assembly_operand switch_value, int label,
     int action_switch)
 {
-    int i, j, label_after = -1, spec_sp = 0;
-    int max_equality_args = ((!glulx_mode) ? 3 : 1);
+    int label_after = -1, spec_sp = 0;
 
     sequence_point_follows = FALSE;
 
     do
     {   if (spec_sp >= MAX_SPEC_STACK)
-        {   error("At most 32 values can be given in a single 'switch' case");
+        {   error_fmt("At most %d values can be given in a single 'switch' case", MAX_SPEC_STACK);
             panic_mode_error_recovery();
             return;
         }
@@ -341,19 +347,20 @@ static void parse_switch_spec(assembly_operand switch_value, int label,
         if (action_switch)
         {   get_next_token();
             if (token_type == SQ_TT || token_type == DQ_TT) {
-                ebf_error("action (or fake action) name", token_text);
+                ebf_curtoken_error("action (or fake action) name");
                 continue;
             }
             spec_stack[spec_sp] = action_of_name(token_text);
 
             if (spec_stack[spec_sp].value == -1)
             {   spec_stack[spec_sp].value = 0;
-                ebf_error("action (or fake action) name", token_text);
+                ebf_curtoken_error("action (or fake action) name");
             }
         }
-        else
+        else {
             spec_stack[spec_sp] =
       code_generate(parse_expression(CONSTANT_CONTEXT), CONSTANT_CONTEXT, -1);
+        }
 
         misc_keywords.enabled = TRUE;
         get_next_token();
@@ -363,75 +370,86 @@ static void parse_switch_spec(assembly_operand switch_value, int label,
         switch(spec_type[spec_sp-1])
         {   case 0:
                 if (action_switch)
-                    ebf_error("',' or ':'", token_text);
-                else ebf_error("',', ':' or 'to'", token_text);
+                    ebf_curtoken_error("',' or ':'");
+                else ebf_curtoken_error("',', ':' or 'to'");
                 panic_mode_error_recovery();
                 return;
             case 1: goto GenSpecCode;
             case 3: if (label_after == -1) label_after = next_label++;
         }
-     } while(TRUE);
-
-     GenSpecCode:
-
-     if ((spec_sp > max_equality_args) && (label_after == -1))
-         label_after = next_label++;
-
-     if (label_after == -1)
-     {   compile_alternatives(switch_value, spec_sp, 0, label, FALSE); return;
-     }
-
-     for (i=0; i<spec_sp;)
-     {
-         j=i; while ((j<spec_sp) && (spec_type[j] != 3)) j++;
-
-         if (j > i)
-         {   if (j-i > max_equality_args) j=i+max_equality_args;
-
-             if (j == spec_sp)
-                 compile_alternatives(switch_value, j-i, i, label, FALSE);
-             else
-                 compile_alternatives(switch_value, j-i, i, label_after, TRUE);
-
-             i=j;
-         }
-         else
-         {   
-           if (!glulx_mode) {
-             if (i == spec_sp - 2)
-             {   assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
-                     label, TRUE);
-                 assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
-                     label, TRUE);
-             }
-             else
-             {   assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
-                     next_label, TRUE);
-                 assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
-                     label_after, FALSE);
-                 assemble_label_no(next_label++);
-             }
-           }
-           else {
-             if (i == spec_sp - 2)
-             {   assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
-                     label);
-                 assembleg_2_branch(jgt_gc, switch_value, spec_stack[i+1],
-                     label);
-             }
-             else
-             {   assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
-                     next_label);
-                 assembleg_2_branch(jle_gc, switch_value, spec_stack[i+1],
-                     label_after);
-                 assemble_label_no(next_label++);
-             }
-           }
-           i = i+2;
-         }
-     }
-
-     assemble_label_no(label_after);
+    } while(TRUE);
+
+ GenSpecCode:
+    generate_switch_spec(switch_value, label, label_after, spec_sp);
+}
+
+/* Generate code for a switch case. The case values are in spec_stack[]
+   and spec_type[]. */
+static void generate_switch_spec(assembly_operand switch_value, int label, int label_after, int speccount)
+{
+    int i, j;
+    int max_equality_args = ((!glulx_mode) ? 3 : 1);
+
+    sequence_point_follows = FALSE;
+
+    if ((speccount > max_equality_args) && (label_after == -1))
+        label_after = next_label++;
+
+    if (label_after == -1)
+    {   compile_alternatives(switch_value, speccount, 0, label, FALSE); return;
+    }
+
+    for (i=0; i<speccount;)
+    {
+        j=i; while ((j<speccount) && (spec_type[j] != 3)) j++;
+
+        if (j > i)
+        {   if (j-i > max_equality_args) j=i+max_equality_args;
+
+            if (j == speccount)
+                compile_alternatives(switch_value, j-i, i, label, FALSE);
+            else
+                compile_alternatives(switch_value, j-i, i, label_after, TRUE);
+
+            i=j;
+        }
+        else
+        {   
+          if (!glulx_mode) {
+            if (i == speccount - 2)
+            {   assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
+                    label, TRUE);
+                assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
+                    label, TRUE);
+            }
+            else
+            {   assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
+                    next_label, TRUE);
+                assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
+                    label_after, FALSE);
+                assemble_label_no(next_label++);
+            }
+          }
+          else {
+            if (i == speccount - 2)
+            {   assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
+                    label);
+                assembleg_2_branch(jgt_gc, switch_value, spec_stack[i+1],
+                    label);
+            }
+            else
+            {   assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
+                    next_label);
+                assembleg_2_branch(jle_gc, switch_value, spec_stack[i+1],
+                    label_after);
+                assemble_label_no(next_label++);
+            }
+          }
+          i = i+2;
+        }
+    }
+
+    assemble_label_no(label_after);
 }
 
 extern int32 parse_routine(char *source, int embedded_flag, char *name,
@@ -450,10 +468,7 @@ extern int32 parse_routine(char *source, int embedded_flag, char *name,
         restart_lexer(lexical_source, name);
     }
 
-    no_locals = 0;
-
-    for (i=0;i<MAX_LOCAL_VARIABLES-1;i++)
-        local_variable_names[i].text[0] = 0;
+    clear_local_variables();
 
     do
     {   statements.enabled = TRUE;
@@ -465,32 +480,26 @@ extern int32 parse_routine(char *source, int embedded_flag, char *name,
         {   debug_flag = TRUE; continue;
         }
 
-        if (token_type != DQ_TT)
+        if (token_type != UQ_TT)
         {   if ((token_type == SEP_TT)
                 && (token_value == SEMICOLON_SEP)) break;
-            ebf_error("local variable name or ';'", token_text);
-            panic_mode_error_recovery();
-            break;
-        }
-
-        if (strlen(token_text) > MAX_IDENTIFIER_LENGTH)
-        {   error_named("Local variable identifier too long:", token_text);
+            ebf_curtoken_error("local variable name or ';'");
             panic_mode_error_recovery();
             break;
         }
 
         if (no_locals == MAX_LOCAL_VARIABLES-1)
-        {   error_numbered("Too many local variables for a routine; max is",
+        {   error_fmt("Too many local variables for a routine; max is %d",
                 MAX_LOCAL_VARIABLES-1);
             panic_mode_error_recovery();
             break;
         }
 
         for (i=0;i<no_locals;i++) {
-            if (strcmpcis(token_text, local_variable_names[i].text)==0)
+            if (strcmpcis(token_text, get_local_variable_name(i))==0)
                 error_named("Local variable defined twice:", token_text);
         }
-        strcpy(local_variable_names[no_locals++].text, token_text);
+        add_local_variable(token_text);
     } while(TRUE);
 
     /* Set up the local variable hash and the local_variables.keywords
@@ -504,7 +513,7 @@ extern int32 parse_routine(char *source, int embedded_flag, char *name,
     if ((embedded_flag == FALSE) && (veneer_mode == FALSE) && debug_flag)
         symbols[r_symbol].flags |= STAR_SFLAG;
 
-    packed_address = assemble_routine_header(no_locals, debug_flag,
+    packed_address = assemble_routine_header(debug_flag,
         name, embedded_flag, r_symbol);
 
     do
@@ -513,7 +522,7 @@ extern int32 parse_routine(char *source, int embedded_flag, char *name,
         get_next_token();
 
         if (token_type == EOF_TT)
-        {   ebf_error("']'", token_text);
+        {   ebf_curtoken_error("']'");
             assemble_routine_end
                 (embedded_flag,
                  get_token_location_end(beginning_debug_location));
@@ -556,7 +565,7 @@ extern int32 parse_routine(char *source, int embedded_flag, char *name,
             get_next_token();
             if ((token_type == SEP_TT) &&
                 (token_value == COLON_SEP)) continue;
-            ebf_error("':' after 'default'", token_text);
+            ebf_curtoken_error("':' after 'default'");
             panic_mode_error_recovery();
             continue;
         }
@@ -564,7 +573,9 @@ extern int32 parse_routine(char *source, int embedded_flag, char *name,
         /*  Only check for the form of a case switch if the initial token
             isn't double-quoted text, as that would mean it was a print_ret
             statement: this is a mild ambiguity in the grammar. 
-            Action statements also cannot be cases. */
+            Action statements also cannot be cases.
+            We don't try to handle parenthesized expressions as cases
+            at the top level. */
 
         if ((token_type != DQ_TT) && (token_type != SEP_TT))
         {   get_next_token();
@@ -654,7 +665,7 @@ extern void parse_code_block(int break_label, int continue_label,
                 break;
             }
             if (token_type == EOF_TT)
-            {   ebf_error("'}'", token_text);
+            {   ebf_curtoken_error("'}'");
                 break;
             }
 
@@ -679,7 +690,7 @@ extern void parse_code_block(int break_label, int continue_label,
                     get_next_token();
                     if ((token_type == SEP_TT) &&
                         (token_value == COLON_SEP)) continue;
-                    ebf_error("':' after 'default'", token_text);
+                    ebf_curtoken_error("':' after 'default'");
                     panic_mode_error_recovery();
                     continue;
                 }
@@ -687,8 +698,76 @@ extern void parse_code_block(int break_label, int continue_label,
                 /*  Decide: is this an ordinary statement, or the start
                     of a new case?  */
 
+                /*  Again, double-quoted text is a print_ret statement. */
                 if (token_type == DQ_TT) goto NotASwitchCase;
 
+                if ((token_type == SEP_TT)&&(token_value == OPENB_SEP)) {
+                    /* An open-paren means we need to parse a full
+                       expression. */
+                    assembly_operand AO;
+                    int constcount;
+                    put_token_back();
+                    AO = parse_expression(VOID_CONTEXT);
+                    /* If this expression is followed by a colon, we'll
+                       handle it as a switch case. */
+                    constcount = test_constant_op_list(&AO, spec_stack, MAX_SPEC_STACK);
+                    if ((token_type == SEP_TT)&&(token_value == COLON_SEP)) {
+                        int ix;
+
+                        if (!constcount)
+                        {
+                            ebf_error("constant", "<expression>");
+                            panic_mode_error_recovery();
+                            continue;
+                        }
+
+                        if (constcount > MAX_SPEC_STACK)
+                        {   error_fmt("At most %d values can be given in a single 'switch' case", MAX_SPEC_STACK);
+                            panic_mode_error_recovery();
+                            continue;
+                        }
+
+                        get_next_token();
+                        /* Gotta fill in the spec_type values for the
+                           spec_stacks. */
+                        for (ix=0; ix<constcount-1; ix++)
+                            spec_type[ix] = 2; /* comma */
+                        spec_type[constcount-1] = 1; /* colon */
+                        
+                        /* The rest of this is parallel to the
+                           parse_switch_spec() case below. */
+                        /* Before you ask: yes, the spec_stacks values
+                           appear in the reverse order from how
+                           parse_switch_spec() would do it. The results
+                           are the same because we're just comparing
+                           temp_var1 with a bunch of constants. */
+                        if (default_clause_made)
+                            error("'default' must be the last 'switch' case");
+                        
+                        if (switch_clause_made)
+                        {   if (!execution_never_reaches_here)
+                                {   sequence_point_follows = FALSE;
+                                    assemble_jump(break_label);
+                                }
+                            assemble_label_no(switch_label);
+                        }
+                        
+                        switch_label = next_label++;
+                        switch_clause_made = TRUE;
+                        
+                        AO = temp_var1;
+                        generate_switch_spec(AO, switch_label, -1, constcount);
+                        continue;
+                    }
+                    
+                    /* Otherwise, treat this as a statement. Imagine
+                       we've jumped down to NotASwitchCase, except that
+                       we have the expression AO already parsed. */
+                    sequence_point_follows = TRUE;
+                    parse_statement_singleexpr(AO);
+                    continue;
+                }
+
                 unary_minus_flag
                     = ((token_type == SEP_TT)&&(token_value == MINUS_SEP));
                 if (unary_minus_flag) get_next_token();
@@ -731,7 +810,7 @@ extern void parse_code_block(int break_label, int continue_label,
             }
 
             if ((switch_rule != 0) && (!switch_clause_made))
-                ebf_error("switch value", token_text);
+                ebf_curtoken_error("switch value");
 
             NotASwitchCase:
             sequence_point_follows = TRUE;
@@ -741,7 +820,7 @@ extern void parse_code_block(int break_label, int continue_label,
     }
     else {
         if (switch_rule != 0)
-            ebf_error("braced code block after 'switch'", token_text);
+            ebf_curtoken_error("braced code block after 'switch'");
         
         /* Parse a single statement. */
         parse_statement(break_label, continue_label);