Update to commit e33eef4f8fab800eaf4a32b2d159cde6c4bbb38e

[inform.git] / src / text.c
diff --git a/src/text.c b/src/text.c

index 31612387929280e1516df021592ac09109cf2e4b..88ceaecba825ae552ad94f1f9fc53f69131dd06a 100644 (file)
--- a/src/text.c
+++ b/src/text.c
@@ -1,9 +1,8 @@
  /* ------------------------------------------------------------------------- */
  /*   "text" : Text translation, the abbreviations optimiser, the dictionary  */
  /*                                                                           */
-/* Copyright (c) Graham Nelson 1993 - 2020                                   */
-/*                                                                           */
-/* This file is part of Inform.                                              */
+/*   Part of Inform 6.35                                                     */
+/*   copyright (c) Graham Nelson 1993 - 2021                                 */
  /*                                                                           */
  /* Inform is free software: you can redistribute it and/or modify            */
  /* it under the terms of the GNU General Public License as published by      */
@@ -16,7 +15,7 @@
  /* GNU General Public License for more details.                              */
  /*                                                                           */
  /* You should have received a copy of the GNU General Public License         */
-/* along with Inform. If not, see https://gnu.org/licenses/                  */
+/* along with Inform. If not, see https://gnu.org/licenses/                  *
  /*                                                                           */
  /* ------------------------------------------------------------------------- */
  
@@ -39,13 +38,7 @@ char *all_text, *all_text_top;         /* Start and next byte free in (large)
                                            text buffer holding the entire text
                                            of the game, when it is being
                                            recorded                           */
-int put_strings_in_low_memory,         /* When TRUE, put static strings in
-                                          the low strings pool at 0x100 rather
-                                          than in the static strings area    */
-    is_abbreviation,                   /* When TRUE, the string being trans
-                                          is itself an abbreviation string
-                                          so can't make use of abbreviations */
-    abbrevs_lookup_table_made,         /* The abbreviations lookup table is
+int abbrevs_lookup_table_made,         /* The abbreviations lookup table is
                                            constructed when the first non-
                                            abbreviation string is translated:
                                            this flag is TRUE after that       */
@@ -217,9 +210,7 @@ extern void make_abbreviation(char *text)
      strcpy((char *)abbreviations_at
              + no_abbreviations*MAX_ABBREV_LENGTH, text);
  
-    is_abbreviation = TRUE;
-    abbrev_values[no_abbreviations] = compile_string(text, TRUE, TRUE);
-    is_abbreviation = FALSE;
+    abbrev_values[no_abbreviations] = compile_string(text, STRCTX_ABBREV);
  
      /*   The quality is the number of Z-chars saved by using this            */
      /*   abbreviation: note that it takes 2 Z-chars to print it.             */
@@ -228,30 +219,32 @@ extern void make_abbreviation(char *text)
  }
  
  /* ------------------------------------------------------------------------- */
-/*   The front end routine for text translation                              */
+/*   The front end routine for text translation.                             */
+/*   strctx indicates the purpose of the string. This is mostly used for     */
+/*   informational output (gametext.txt), but we treat some string contexts  */
+/*   specially during compilation.                                           */
  /* ------------------------------------------------------------------------- */
  
-extern int32 compile_string(char *b, int in_low_memory, int is_abbrev)
+extern int32 compile_string(char *b, int strctx)
  {   int i, j; uchar *c;
  
-    is_abbreviation = is_abbrev;
-
-    /* Put into the low memory pool (at 0x100 in the Z-machine) of strings   */
-    /* which may be wanted as possible entries in the abbreviations table    */
+    /* In Z-code, abbreviations go in the low memory pool (0x100). So
+       do strings explicitly defined with the Lowstring directive.
+       (In Glulx, the in_low_memory flag is ignored.) */
+    int in_low_memory = (strctx == STRCTX_ABBREV || strctx == STRCTX_LOWSTRING);
  
      if (!glulx_mode && in_low_memory)
      {   j=subtract_pointers(low_strings_top,low_strings);
-        low_strings_top=translate_text(low_strings_top, low_strings+MAX_LOW_STRINGS, b);
+        low_strings_top=translate_text(low_strings_top, low_strings+MAX_LOW_STRINGS, b, strctx);
          if (!low_strings_top)
              memoryerror("MAX_LOW_STRINGS", MAX_LOW_STRINGS);
-        is_abbreviation = FALSE;
          return(0x21+(j/2));
      }
  
      if (glulx_mode && done_compression)
          compiler_error("Tried to add a string after compression was done.");
  
-    c = translate_text(strings_holding_area, strings_holding_area+MAX_STATIC_STRINGS, b);
+    c = translate_text(strings_holding_area, strings_holding_area+MAX_STATIC_STRINGS, b, strctx);
      if (!c)
          memoryerror("MAX_STATIC_STRINGS",MAX_STATIC_STRINGS);
  
@@ -286,8 +279,6 @@ extern int32 compile_string(char *b, int in_low_memory, int is_abbrev)
              write_byte_to_memory_block(&static_strings_area,
                  static_strings_extent, *c);
  
-    is_abbreviation = FALSE;
-
      if (!glulx_mode) {
          return(j/scale_factor);
      }
@@ -382,11 +373,20 @@ static void write_z_char_g(int i)
  /*   Note that the source text may be corrupted by this routine.             */
  /* ------------------------------------------------------------------------- */
  
-extern uchar *translate_text(uchar *p, uchar *p_limit, char *s_text)
+extern uchar *translate_text(uchar *p, uchar *p_limit, char *s_text, int strctx)
  {   int i, j, k, in_alphabet, lookup_value;
      int32 unicode; int zscii;
      unsigned char *text_in;
  
+    /* For STRCTX_ABBREV, the string being translated is itself an
+       abbreviation string, so it can't make use of abbreviations. Set
+       the is_abbreviation flag to indicate this.
+       The compiler has historically set this flag for the Lowstring
+       directive as well -- the in_low_memory and is_abbreviation flag were
+       always the same. I am preserving that convention. */
+    int is_abbreviation = (strctx == STRCTX_ABBREV || strctx == STRCTX_LOWSTRING);
+
+
      /*  Cast the input and output streams to unsigned char: text_out_pc will
          advance as bytes of Z-coded text are written, but text_in doesn't    */
  
@@ -425,9 +425,20 @@ extern uchar *translate_text(uchar *p, uchar *p_limit, char *s_text)
          all_text_top += strlen(all_text_top);
      }
  
-    if (transcript_switch && (!veneer_mode))
-        write_to_transcript_file(s_text);
-
+    if (transcript_switch) {
+        /* Omit veneer strings, unless we're using the new transcript format, which includes everything. */
+        if ((!veneer_mode) || TRANSCRIPT_FORMAT == 1) {
+            int label = strctx;
+            if (veneer_mode) {
+                if (label == STRCTX_GAME)
+                    label = STRCTX_VENEER;
+                else if (label == STRCTX_GAMEOPC)
+                    label = STRCTX_VENEEROPC;
+            }
+            write_to_transcript_file(s_text, label);
+        }
+    }
+    
    if (!glulx_mode) {
  
      /*  The empty string of Z-text is illegal, since it can't carry an end
@@ -460,8 +471,9 @@ extern uchar *translate_text(uchar *p, uchar *p_limit, char *s_text)
          if ((economy_switch) && (!is_abbreviation)
              && ((k=abbrevs_lookup[text_in[i]])!=-1))
          {   if ((j=try_abbreviations_from(text_in, i, k))!=-1)
-            {   if (j<32) { write_z_char_z(2); write_z_char_z(j); }
-                else { write_z_char_z(3); write_z_char_z(j-32); }
+            {   /* abbreviations run from MAX_DYNAMIC_STRINGS to 96 */
+                j += MAX_DYNAMIC_STRINGS;
+                write_z_char_z(j/32+1); write_z_char_z(j%32);
              }
          }
  
@@ -517,15 +529,25 @@ advance as part of 'Zcharacter table':", unicode);
              else if (isdigit(text_in[i+1])!=0)
              {   int d1, d2;
  
-                /*   @..   */
+                /*   @.. (dynamic string)   */
  
                  d1 = character_digit_value[text_in[i+1]];
                  d2 = character_digit_value[text_in[i+2]];
                  if ((d1 == 127) || (d1 >= 10) || (d2 == 127) || (d2 >= 10))
                      error("'@..' must have two decimal digits");
                  else
-                {   i+=2;
-                    write_z_char_z(1); write_z_char_z(d1*10 + d2);
+                {
+                    j = d1*10 + d2;
+                    if (!glulx_mode && j >= 96)
+                    {   error("Z-machine dynamic strings are limited to 96");
+                        j = 0;
+                    }
+                    if (j >= MAX_DYNAMIC_STRINGS) {
+                        memoryerror("MAX_DYNAMIC_STRINGS", MAX_DYNAMIC_STRINGS);
+                        j = 0;
+                    }
+                    i+=2;
+                    write_z_char_z(j/32+1); write_z_char_z(j%32);
                  }
              }
              else
@@ -1488,8 +1510,8 @@ extern void optimise_abbreviations(void)
  /*   For Glulx, the form is instead: (But see below about Unicode-valued     */
  /*   dictionaries and my heinie.)                                            */
  /*                                                                           */
-/*        <plain text>      <flags>   <verbnumber>     <adjectivenumber>     */
-/*        DICT_WORD_SIZE     short       short            short              */
+/*        <tag>  <plain text>    <flags>  <verbnumber>   <adjectivenumber>   */
+/*         $60    DICT_WORD_SIZE  short    short          short              */
  /*                                                                           */
  /*   These records are stored in "accession order" (i.e. in order of their   */
  /*   first being received by these routines) and only alphabetically sorted  */
@@ -1573,6 +1595,8 @@ static void dictionary_prepare_z(char *dword, uchar *optresult)
         applying to the text of dictionary entries: first produce a sequence
         of 6 (v3) or 9 (v4+) Z-characters                                     */
  
+    int dictsize = (version_number==3) ? 6 : 9;
+
      number_and_case = 0;
  
      for (i=0, j=0; dword[j]!=0; i++, j++)
@@ -1588,7 +1612,7 @@ to give number of dictionary word", dword);
              }
              break;
          }
-        if (i>=9) break;
+        if (i>=dictsize) break;
  
          k=(int) dword[j];
          if (k==(int) '\'')
@@ -1637,7 +1661,7 @@ apostrophe in", dword);
  
      for (; i<9; i++) wd[i]=5;
  
-    /* The array of Z-chars is converted to three 2-byte blocks              */
+    /* The array of Z-chars is converted to two or three 2-byte blocks       */
  
      tot = wd[2] + wd[1]*(1<<5) + wd[0]*(1<<10);
      prepared_sort[1]=tot%0x100;
@@ -1645,7 +1669,10 @@ apostrophe in", dword);
      tot = wd[5] + wd[4]*(1<<5) + wd[3]*(1<<10);
      prepared_sort[3]=tot%0x100;
      prepared_sort[2]=(tot/0x100)%0x100;
-    tot = wd[8] + wd[7]*(1<<5) + wd[6]*(1<<10);
+    if (version_number==3)
+        tot = 0;
+    else
+        tot = wd[8] + wd[7]*(1<<5) + wd[6]*(1<<10);
      prepared_sort[5]=tot%0x100;
      prepared_sort[4]=(tot/0x100)%0x100;
  
@@ -2041,17 +2068,81 @@ extern void dictionary_set_verb_number(char *dword, int to)
  /*   by the linker.                                                          */
  /* ------------------------------------------------------------------------- */
  
-static char *d_show_to;
-static int d_show_total;
+/* In the dictionary-showing code, if d_show_buf is NULL, the text is
+   printed directly. (The "Trace dictionary" directive does this.)
+   If d_show_buf is not NULL, we add words to it (reallocing if necessary)
+   until it's a page-width. 
+*/
+static char *d_show_buf = NULL;
+static int d_show_size; /* allocated size */
+static int d_show_len;  /* current length */
  
  static void show_char(char c)
-{   if (d_show_to == NULL) printf("%c", c);
-    else
-    {   int i = strlen(d_show_to);
-        d_show_to[i] = c; d_show_to[i+1] = 0;
+{
+    if (d_show_buf == NULL) {
+        printf("%c", c);
+    }
+    else {
+        if (d_show_len+2 >= d_show_size) {
+            int newsize = 2 * d_show_len + 16;
+            my_realloc(&d_show_buf, d_show_size, newsize, "dictionary display buffer");
+            d_show_size = newsize;
+        }
+        d_show_buf[d_show_len++] = c;
+        d_show_buf[d_show_len] = '\0';
      }
  }
  
+/* Display a Unicode character in user-readable form. This uses the same
+   character encoding as the source code. */
+static void show_uchar(uint32 c)
+{
+    char buf[16];
+    int ix;
+    
+    if (c < 0x80) {
+        /* ASCII always works */
+        show_char(c);
+        return;
+    }
+    if (character_set_unicode) {
+        /* UTF-8 the character */
+        if (c < 0x80) {
+            show_char(c);
+        }
+        else if (c < 0x800) {
+            show_char((0xC0 | ((c & 0x7C0) >> 6)));
+            show_char((0x80 |  (c & 0x03F)     ));
+        }
+        else if (c < 0x10000) {
+            show_char((0xE0 | ((c & 0xF000) >> 12)));
+            show_char((0x80 | ((c & 0x0FC0) >>  6)));
+            show_char((0x80 |  (c & 0x003F)      ));
+        }
+        else if (c < 0x200000) {
+            show_char((0xF0 | ((c & 0x1C0000) >> 18)));
+            show_char((0x80 | ((c & 0x03F000) >> 12)));
+            show_char((0x80 | ((c & 0x000FC0) >>  6)));
+            show_char((0x80 |  (c & 0x00003F)      ));
+        }
+        else {
+            show_char('?');
+        }
+        return;
+    }
+    if (character_set_setting == 1 && c < 0x100) {
+        /* Fits in Latin-1 */
+        show_char(c);
+        return;
+    }
+    /* Supporting other character_set_setting is harder; not currently implemented. */
+    
+    /* Use the escaped form */
+    sprintf(buf, "@{%x}", c);
+    for (ix=0; buf[ix]; ix++)
+        show_char(buf[ix]);
+}
+
  extern void word_to_ascii(uchar *p, char *results)
  {   int i, shift, cc, zchar; uchar encoded_word[9];
      encoded_word[0] = (((int) p[0])&0x7c)/4;
@@ -2097,7 +2188,7 @@ extern void word_to_ascii(uchar *p, char *results)
  static void recursively_show_z(int node)
  {   int i, cprinted, flags; uchar *p;
      char textual_form[32];
-    int res = (version_number == 3)?4:6;
+    int res = (version_number == 3)?4:6; /* byte length of encoded text */
  
      if (dtree[node].branch[0] != VACANT)
          recursively_show_z(dtree[node].branch[0]);
@@ -2111,7 +2202,7 @@ static void recursively_show_z(int node)
      for (; cprinted < 4 + ((version_number==3)?6:9); cprinted++)
          show_char(' ');
  
-    if (d_show_to == NULL)
+    if (d_show_buf == NULL)
      {   for (i=0; i<3+res; i++) printf("%02x ",p[i]);
  
          flags = (int) p[res];
@@ -2132,12 +2223,11 @@ static void recursively_show_z(int node)
          printf("\n");
      }
  
-    if (d_show_total++ == 5)
-    {   d_show_total = 0;
-        if (d_show_to != NULL)
-        {   write_to_transcript_file(d_show_to);
-            d_show_to[0] = 0;
-        }
+    /* Show five words per line in classic TRANSCRIPT_FORMAT; one per line in the new format. */
+    if (d_show_buf && (d_show_len >= 64 || TRANSCRIPT_FORMAT == 1))
+    {
+        write_to_transcript_file(d_show_buf, STRCTX_DICT);
+        d_show_len = 0;
      }
  
      if (dtree[node].branch[1] != VACANT)
@@ -2145,8 +2235,56 @@ static void recursively_show_z(int node)
  }
  
  static void recursively_show_g(int node)
-{
-  warning("### Glulx dictionary-show not yet implemented.\n");
+{   int i, cprinted;
+    uchar *p;
+
+    if (dtree[node].branch[0] != VACANT)
+        recursively_show_g(dtree[node].branch[0]);
+
+    p = (uchar *)dictionary + 4 + DICT_ENTRY_BYTE_LENGTH*node;
+
+    for (cprinted = 0; cprinted<DICT_WORD_SIZE; cprinted++)
+    {
+        uint32 ch;
+        if (DICT_CHAR_SIZE == 1)
+            ch = p[1+cprinted];
+        else
+            ch = (p[4*cprinted+4] << 24) + (p[4*cprinted+5] << 16) + (p[4*cprinted+6] << 8) + (p[4*cprinted+7]);
+        if (!ch)
+            break;
+        show_uchar(ch);
+    }
+    for (; cprinted<DICT_WORD_SIZE+4; cprinted++)
+        show_char(' ');
+
+    if (d_show_buf == NULL)
+    {   for (i=0; i<DICT_ENTRY_BYTE_LENGTH; i++) printf("%02x ",p[i]);
+        int flagpos = (DICT_CHAR_SIZE == 1) ? (DICT_WORD_SIZE+1) : (DICT_WORD_BYTES+4);
+        int flags = (p[flagpos+0] << 8) | (p[flagpos+1]);
+        int verbnum = (p[flagpos+2] << 8) | (p[flagpos+3]);
+        if (flags & 128)
+        {   printf("noun ");
+            if (flags & 4)  printf("p"); else printf(" ");
+            printf(" ");
+        }
+        else printf("       ");
+        if (flags & 8)
+        {   printf("preposition    ");
+        }
+        if ((flags & 3) == 3) printf("metaverb:%d  ", verbnum);
+        else if ((flags & 3) == 1) printf("verb:%d  ", verbnum);
+        printf("\n");
+    }
+
+    /* Show five words per line in classic TRANSCRIPT_FORMAT; one per line in the new format. */
+    if (d_show_buf && (d_show_len >= 64 || TRANSCRIPT_FORMAT == 1))
+    {
+        write_to_transcript_file(d_show_buf, STRCTX_DICT);
+        d_show_len = 0;
+    }
+
+    if (dtree[node].branch[1] != VACANT)
+        recursively_show_g(dtree[node].branch[1]);
  }
  
  static void show_alphabet(int i)
@@ -2168,33 +2306,43 @@ static void show_alphabet(int i)
  extern void show_dictionary(void)
  {   printf("Dictionary contains %d entries:\n",dict_entries);
      if (dict_entries != 0)
-    {   d_show_total = 0; d_show_to = NULL; 
+    {   d_show_len = 0; d_show_buf = NULL; 
          if (!glulx_mode)    
              recursively_show_z(root);
          else
              recursively_show_g(root);
      }
-    printf("\nZ-machine alphabet entries:\n");
-    show_alphabet(0);
-    show_alphabet(1);
-    show_alphabet(2);
+    if (!glulx_mode)
+    {
+        printf("\nZ-machine alphabet entries:\n");
+        show_alphabet(0);
+        show_alphabet(1);
+        show_alphabet(2);
+    }
  }
  
  extern void write_dictionary_to_transcript(void)
-{   char d_buffer[81];
-
-    sprintf(d_buffer, "\n[Dictionary contains %d entries:]\n", dict_entries);
+{
+    d_show_size = 80; /* initial size */
+    d_show_buf = my_malloc(d_show_size, "dictionary display buffer");
  
-    d_buffer[0] = 0; write_to_transcript_file(d_buffer);
+    write_to_transcript_file("", STRCTX_INFO);
+    sprintf(d_show_buf, "[Dictionary contains %d entries:]", dict_entries);
+    write_to_transcript_file(d_show_buf, STRCTX_INFO);
+    
+    d_show_len = 0;
  
      if (dict_entries != 0)
-    {   d_show_total = 0; d_show_to = d_buffer; 
+    {
          if (!glulx_mode)    
              recursively_show_z(root);
          else
              recursively_show_g(root);
      }
-    if (d_show_total != 0) write_to_transcript_file(d_buffer);
+    if (d_show_len != 0) write_to_transcript_file(d_show_buf, STRCTX_DICT);
+
+    my_free(&d_show_buf, "dictionary display buffer");
+    d_show_len = 0; d_show_buf = NULL;
  }
  
  /* ========================================================================= */
@@ -2209,8 +2357,6 @@ extern void init_text_vars(void)
      grandtable = NULL;
      grandflags = NULL;
      no_chars_transcribed = 0;
-    is_abbreviation = FALSE;
-    put_strings_in_low_memory = FALSE;
  
      for (j=0; j<256; j++) abbrevs_lookup[j] = -1;
  
@@ -2265,6 +2411,10 @@ extern void text_allocate_arrays(void)
           = my_malloc(MAX_STATIC_STRINGS,"static strings holding area");
      low_strings = my_malloc(MAX_LOW_STRINGS,"low (abbreviation) strings");
  
+    d_show_buf = NULL;
+    d_show_size = 0;
+    d_show_len = 0;
+
      huff_entities = NULL;
      hufflist = NULL;
      unicode_usage_entries = NULL;