X-Git-Url: https://jxself.org/git/?a=blobdiff_plain;f=src%2Ftext.c;h=88ceaecba825ae552ad94f1f9fc53f69131dd06a;hb=d8d68d0bd4c45af6f0dc69b4fc33d37d961aca85;hp=31612387929280e1516df021592ac09109cf2e4b;hpb=e536ce9e39cc1bfa82ecd1d6d73f874af655f9db;p=inform.git diff --git a/src/text.c b/src/text.c index 3161238..88ceaec 100644 --- a/src/text.c +++ b/src/text.c @@ -1,9 +1,8 @@ /* ------------------------------------------------------------------------- */ /* "text" : Text translation, the abbreviations optimiser, the dictionary */ /* */ -/* Copyright (c) Graham Nelson 1993 - 2020 */ -/* */ -/* This file is part of Inform. */ +/* Part of Inform 6.35 */ +/* copyright (c) Graham Nelson 1993 - 2021 */ /* */ /* Inform is free software: you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ @@ -16,7 +15,7 @@ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ -/* along with Inform. If not, see https://gnu.org/licenses/ */ +/* along with Inform. If not, see https://gnu.org/licenses/ * /* */ /* ------------------------------------------------------------------------- */ @@ -39,13 +38,7 @@ char *all_text, *all_text_top; /* Start and next byte free in (large) text buffer holding the entire text of the game, when it is being recorded */ -int put_strings_in_low_memory, /* When TRUE, put static strings in - the low strings pool at 0x100 rather - than in the static strings area */ - is_abbreviation, /* When TRUE, the string being trans - is itself an abbreviation string - so can't make use of abbreviations */ - abbrevs_lookup_table_made, /* The abbreviations lookup table is +int abbrevs_lookup_table_made, /* The abbreviations lookup table is constructed when the first non- abbreviation string is translated: this flag is TRUE after that */ @@ -217,9 +210,7 @@ extern void make_abbreviation(char *text) strcpy((char *)abbreviations_at + no_abbreviations*MAX_ABBREV_LENGTH, text); - is_abbreviation = TRUE; - abbrev_values[no_abbreviations] = compile_string(text, TRUE, TRUE); - is_abbreviation = FALSE; + abbrev_values[no_abbreviations] = compile_string(text, STRCTX_ABBREV); /* The quality is the number of Z-chars saved by using this */ /* abbreviation: note that it takes 2 Z-chars to print it. */ @@ -228,30 +219,32 @@ extern void make_abbreviation(char *text) } /* ------------------------------------------------------------------------- */ -/* The front end routine for text translation */ +/* The front end routine for text translation. */ +/* strctx indicates the purpose of the string. This is mostly used for */ +/* informational output (gametext.txt), but we treat some string contexts */ +/* specially during compilation. */ /* ------------------------------------------------------------------------- */ -extern int32 compile_string(char *b, int in_low_memory, int is_abbrev) +extern int32 compile_string(char *b, int strctx) { int i, j; uchar *c; - is_abbreviation = is_abbrev; - - /* Put into the low memory pool (at 0x100 in the Z-machine) of strings */ - /* which may be wanted as possible entries in the abbreviations table */ + /* In Z-code, abbreviations go in the low memory pool (0x100). So + do strings explicitly defined with the Lowstring directive. + (In Glulx, the in_low_memory flag is ignored.) */ + int in_low_memory = (strctx == STRCTX_ABBREV || strctx == STRCTX_LOWSTRING); if (!glulx_mode && in_low_memory) { j=subtract_pointers(low_strings_top,low_strings); - low_strings_top=translate_text(low_strings_top, low_strings+MAX_LOW_STRINGS, b); + low_strings_top=translate_text(low_strings_top, low_strings+MAX_LOW_STRINGS, b, strctx); if (!low_strings_top) memoryerror("MAX_LOW_STRINGS", MAX_LOW_STRINGS); - is_abbreviation = FALSE; return(0x21+(j/2)); } if (glulx_mode && done_compression) compiler_error("Tried to add a string after compression was done."); - c = translate_text(strings_holding_area, strings_holding_area+MAX_STATIC_STRINGS, b); + c = translate_text(strings_holding_area, strings_holding_area+MAX_STATIC_STRINGS, b, strctx); if (!c) memoryerror("MAX_STATIC_STRINGS",MAX_STATIC_STRINGS); @@ -286,8 +279,6 @@ extern int32 compile_string(char *b, int in_low_memory, int is_abbrev) write_byte_to_memory_block(&static_strings_area, static_strings_extent, *c); - is_abbreviation = FALSE; - if (!glulx_mode) { return(j/scale_factor); } @@ -382,11 +373,20 @@ static void write_z_char_g(int i) /* Note that the source text may be corrupted by this routine. */ /* ------------------------------------------------------------------------- */ -extern uchar *translate_text(uchar *p, uchar *p_limit, char *s_text) +extern uchar *translate_text(uchar *p, uchar *p_limit, char *s_text, int strctx) { int i, j, k, in_alphabet, lookup_value; int32 unicode; int zscii; unsigned char *text_in; + /* For STRCTX_ABBREV, the string being translated is itself an + abbreviation string, so it can't make use of abbreviations. Set + the is_abbreviation flag to indicate this. + The compiler has historically set this flag for the Lowstring + directive as well -- the in_low_memory and is_abbreviation flag were + always the same. I am preserving that convention. */ + int is_abbreviation = (strctx == STRCTX_ABBREV || strctx == STRCTX_LOWSTRING); + + /* Cast the input and output streams to unsigned char: text_out_pc will advance as bytes of Z-coded text are written, but text_in doesn't */ @@ -425,9 +425,20 @@ extern uchar *translate_text(uchar *p, uchar *p_limit, char *s_text) all_text_top += strlen(all_text_top); } - if (transcript_switch && (!veneer_mode)) - write_to_transcript_file(s_text); - + if (transcript_switch) { + /* Omit veneer strings, unless we're using the new transcript format, which includes everything. */ + if ((!veneer_mode) || TRANSCRIPT_FORMAT == 1) { + int label = strctx; + if (veneer_mode) { + if (label == STRCTX_GAME) + label = STRCTX_VENEER; + else if (label == STRCTX_GAMEOPC) + label = STRCTX_VENEEROPC; + } + write_to_transcript_file(s_text, label); + } + } + if (!glulx_mode) { /* The empty string of Z-text is illegal, since it can't carry an end @@ -460,8 +471,9 @@ extern uchar *translate_text(uchar *p, uchar *p_limit, char *s_text) if ((economy_switch) && (!is_abbreviation) && ((k=abbrevs_lookup[text_in[i]])!=-1)) { if ((j=try_abbreviations_from(text_in, i, k))!=-1) - { if (j<32) { write_z_char_z(2); write_z_char_z(j); } - else { write_z_char_z(3); write_z_char_z(j-32); } + { /* abbreviations run from MAX_DYNAMIC_STRINGS to 96 */ + j += MAX_DYNAMIC_STRINGS; + write_z_char_z(j/32+1); write_z_char_z(j%32); } } @@ -517,15 +529,25 @@ advance as part of 'Zcharacter table':", unicode); else if (isdigit(text_in[i+1])!=0) { int d1, d2; - /* @.. */ + /* @.. (dynamic string) */ d1 = character_digit_value[text_in[i+1]]; d2 = character_digit_value[text_in[i+2]]; if ((d1 == 127) || (d1 >= 10) || (d2 == 127) || (d2 >= 10)) error("'@..' must have two decimal digits"); else - { i+=2; - write_z_char_z(1); write_z_char_z(d1*10 + d2); + { + j = d1*10 + d2; + if (!glulx_mode && j >= 96) + { error("Z-machine dynamic strings are limited to 96"); + j = 0; + } + if (j >= MAX_DYNAMIC_STRINGS) { + memoryerror("MAX_DYNAMIC_STRINGS", MAX_DYNAMIC_STRINGS); + j = 0; + } + i+=2; + write_z_char_z(j/32+1); write_z_char_z(j%32); } } else @@ -1488,8 +1510,8 @@ extern void optimise_abbreviations(void) /* For Glulx, the form is instead: (But see below about Unicode-valued */ /* dictionaries and my heinie.) */ /* */ -/* */ -/* DICT_WORD_SIZE short short short */ +/* */ +/* $60 DICT_WORD_SIZE short short short */ /* */ /* These records are stored in "accession order" (i.e. in order of their */ /* first being received by these routines) and only alphabetically sorted */ @@ -1573,6 +1595,8 @@ static void dictionary_prepare_z(char *dword, uchar *optresult) applying to the text of dictionary entries: first produce a sequence of 6 (v3) or 9 (v4+) Z-characters */ + int dictsize = (version_number==3) ? 6 : 9; + number_and_case = 0; for (i=0, j=0; dword[j]!=0; i++, j++) @@ -1588,7 +1612,7 @@ to give number of dictionary word", dword); } break; } - if (i>=9) break; + if (i>=dictsize) break; k=(int) dword[j]; if (k==(int) '\'') @@ -1637,7 +1661,7 @@ apostrophe in", dword); for (; i<9; i++) wd[i]=5; - /* The array of Z-chars is converted to three 2-byte blocks */ + /* The array of Z-chars is converted to two or three 2-byte blocks */ tot = wd[2] + wd[1]*(1<<5) + wd[0]*(1<<10); prepared_sort[1]=tot%0x100; @@ -1645,7 +1669,10 @@ apostrophe in", dword); tot = wd[5] + wd[4]*(1<<5) + wd[3]*(1<<10); prepared_sort[3]=tot%0x100; prepared_sort[2]=(tot/0x100)%0x100; - tot = wd[8] + wd[7]*(1<<5) + wd[6]*(1<<10); + if (version_number==3) + tot = 0; + else + tot = wd[8] + wd[7]*(1<<5) + wd[6]*(1<<10); prepared_sort[5]=tot%0x100; prepared_sort[4]=(tot/0x100)%0x100; @@ -2041,17 +2068,81 @@ extern void dictionary_set_verb_number(char *dword, int to) /* by the linker. */ /* ------------------------------------------------------------------------- */ -static char *d_show_to; -static int d_show_total; +/* In the dictionary-showing code, if d_show_buf is NULL, the text is + printed directly. (The "Trace dictionary" directive does this.) + If d_show_buf is not NULL, we add words to it (reallocing if necessary) + until it's a page-width. +*/ +static char *d_show_buf = NULL; +static int d_show_size; /* allocated size */ +static int d_show_len; /* current length */ static void show_char(char c) -{ if (d_show_to == NULL) printf("%c", c); - else - { int i = strlen(d_show_to); - d_show_to[i] = c; d_show_to[i+1] = 0; +{ + if (d_show_buf == NULL) { + printf("%c", c); + } + else { + if (d_show_len+2 >= d_show_size) { + int newsize = 2 * d_show_len + 16; + my_realloc(&d_show_buf, d_show_size, newsize, "dictionary display buffer"); + d_show_size = newsize; + } + d_show_buf[d_show_len++] = c; + d_show_buf[d_show_len] = '\0'; } } +/* Display a Unicode character in user-readable form. This uses the same + character encoding as the source code. */ +static void show_uchar(uint32 c) +{ + char buf[16]; + int ix; + + if (c < 0x80) { + /* ASCII always works */ + show_char(c); + return; + } + if (character_set_unicode) { + /* UTF-8 the character */ + if (c < 0x80) { + show_char(c); + } + else if (c < 0x800) { + show_char((0xC0 | ((c & 0x7C0) >> 6))); + show_char((0x80 | (c & 0x03F) )); + } + else if (c < 0x10000) { + show_char((0xE0 | ((c & 0xF000) >> 12))); + show_char((0x80 | ((c & 0x0FC0) >> 6))); + show_char((0x80 | (c & 0x003F) )); + } + else if (c < 0x200000) { + show_char((0xF0 | ((c & 0x1C0000) >> 18))); + show_char((0x80 | ((c & 0x03F000) >> 12))); + show_char((0x80 | ((c & 0x000FC0) >> 6))); + show_char((0x80 | (c & 0x00003F) )); + } + else { + show_char('?'); + } + return; + } + if (character_set_setting == 1 && c < 0x100) { + /* Fits in Latin-1 */ + show_char(c); + return; + } + /* Supporting other character_set_setting is harder; not currently implemented. */ + + /* Use the escaped form */ + sprintf(buf, "@{%x}", c); + for (ix=0; buf[ix]; ix++) + show_char(buf[ix]); +} + extern void word_to_ascii(uchar *p, char *results) { int i, shift, cc, zchar; uchar encoded_word[9]; encoded_word[0] = (((int) p[0])&0x7c)/4; @@ -2097,7 +2188,7 @@ extern void word_to_ascii(uchar *p, char *results) static void recursively_show_z(int node) { int i, cprinted, flags; uchar *p; char textual_form[32]; - int res = (version_number == 3)?4:6; + int res = (version_number == 3)?4:6; /* byte length of encoded text */ if (dtree[node].branch[0] != VACANT) recursively_show_z(dtree[node].branch[0]); @@ -2111,7 +2202,7 @@ static void recursively_show_z(int node) for (; cprinted < 4 + ((version_number==3)?6:9); cprinted++) show_char(' '); - if (d_show_to == NULL) + if (d_show_buf == NULL) { for (i=0; i<3+res; i++) printf("%02x ",p[i]); flags = (int) p[res]; @@ -2132,12 +2223,11 @@ static void recursively_show_z(int node) printf("\n"); } - if (d_show_total++ == 5) - { d_show_total = 0; - if (d_show_to != NULL) - { write_to_transcript_file(d_show_to); - d_show_to[0] = 0; - } + /* Show five words per line in classic TRANSCRIPT_FORMAT; one per line in the new format. */ + if (d_show_buf && (d_show_len >= 64 || TRANSCRIPT_FORMAT == 1)) + { + write_to_transcript_file(d_show_buf, STRCTX_DICT); + d_show_len = 0; } if (dtree[node].branch[1] != VACANT) @@ -2145,8 +2235,56 @@ static void recursively_show_z(int node) } static void recursively_show_g(int node) -{ - warning("### Glulx dictionary-show not yet implemented.\n"); +{ int i, cprinted; + uchar *p; + + if (dtree[node].branch[0] != VACANT) + recursively_show_g(dtree[node].branch[0]); + + p = (uchar *)dictionary + 4 + DICT_ENTRY_BYTE_LENGTH*node; + + for (cprinted = 0; cprinted= 64 || TRANSCRIPT_FORMAT == 1)) + { + write_to_transcript_file(d_show_buf, STRCTX_DICT); + d_show_len = 0; + } + + if (dtree[node].branch[1] != VACANT) + recursively_show_g(dtree[node].branch[1]); } static void show_alphabet(int i) @@ -2168,33 +2306,43 @@ static void show_alphabet(int i) extern void show_dictionary(void) { printf("Dictionary contains %d entries:\n",dict_entries); if (dict_entries != 0) - { d_show_total = 0; d_show_to = NULL; + { d_show_len = 0; d_show_buf = NULL; if (!glulx_mode) recursively_show_z(root); else recursively_show_g(root); } - printf("\nZ-machine alphabet entries:\n"); - show_alphabet(0); - show_alphabet(1); - show_alphabet(2); + if (!glulx_mode) + { + printf("\nZ-machine alphabet entries:\n"); + show_alphabet(0); + show_alphabet(1); + show_alphabet(2); + } } extern void write_dictionary_to_transcript(void) -{ char d_buffer[81]; - - sprintf(d_buffer, "\n[Dictionary contains %d entries:]\n", dict_entries); +{ + d_show_size = 80; /* initial size */ + d_show_buf = my_malloc(d_show_size, "dictionary display buffer"); - d_buffer[0] = 0; write_to_transcript_file(d_buffer); + write_to_transcript_file("", STRCTX_INFO); + sprintf(d_show_buf, "[Dictionary contains %d entries:]", dict_entries); + write_to_transcript_file(d_show_buf, STRCTX_INFO); + + d_show_len = 0; if (dict_entries != 0) - { d_show_total = 0; d_show_to = d_buffer; + { if (!glulx_mode) recursively_show_z(root); else recursively_show_g(root); } - if (d_show_total != 0) write_to_transcript_file(d_buffer); + if (d_show_len != 0) write_to_transcript_file(d_show_buf, STRCTX_DICT); + + my_free(&d_show_buf, "dictionary display buffer"); + d_show_len = 0; d_show_buf = NULL; } /* ========================================================================= */ @@ -2209,8 +2357,6 @@ extern void init_text_vars(void) grandtable = NULL; grandflags = NULL; no_chars_transcribed = 0; - is_abbreviation = FALSE; - put_strings_in_low_memory = FALSE; for (j=0; j<256; j++) abbrevs_lookup[j] = -1; @@ -2265,6 +2411,10 @@ extern void text_allocate_arrays(void) = my_malloc(MAX_STATIC_STRINGS,"static strings holding area"); low_strings = my_malloc(MAX_LOW_STRINGS,"low (abbreviation) strings"); + d_show_buf = NULL; + d_show_size = 0; + d_show_len = 0; + huff_entities = NULL; hufflist = NULL; unicode_usage_entries = NULL;