X-Git-Url: https://jxself.org/git/?p=inform.git;a=blobdiff_plain;f=src%2Ftext.c;fp=src%2Ftext.c;h=149f0f9a98a8ff45ea33b607c0a9deffd21b3a28;hp=03d11301d6668ecb284aea2990b9d1f2ea6c892c;hb=56a5292888e1d46fe3033cd1d5c636051692453f;hpb=20cbfff96015938809d0e3da6cd0d83b76d27f14 diff --git a/src/text.c b/src/text.c index 03d1130..149f0f9 100644 --- a/src/text.c +++ b/src/text.c @@ -1,8 +1,8 @@ /* ------------------------------------------------------------------------- */ /* "text" : Text translation, the abbreviations optimiser, the dictionary */ /* */ -/* Part of Inform 6.41 */ -/* copyright (c) Graham Nelson 1993 - 2022 */ +/* Part of Inform 6.42 */ +/* copyright (c) Graham Nelson 1993 - 2024 */ /* */ /* Inform is free software: you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ @@ -105,11 +105,10 @@ static int unicode_entity_index(int32 unicode); abbreviation *abbreviations; /* Allocated up to no_abbreviations */ static memory_list abbreviations_memlist; -/* Memory to hold the text of any abbreviation strings declared. This is - counted in units of MAX_ABBREV_LENGTH bytes. (An abbreviation must fit - in that many bytes, null included.) */ -uchar *abbreviations_at; /* Allocated up to no_abbreviations */ -static memory_list abbreviations_at_memlist; +/* Memory to hold the text of any abbreviation strings declared. */ +static int32 abbreviations_totaltext; +static char *abbreviations_text; /* Allocated up to abbreviations_totaltext */ +static memory_list abbreviations_text_memlist; static int *abbreviations_optimal_parse_schedule; static memory_list abbreviations_optimal_parse_schedule_memlist; @@ -137,6 +136,11 @@ uchar *translated_text; /* Area holding translated strings static_strings_area below */ static memory_list translated_text_memlist; +static char *temp_symbol; /* Temporary symbol name used while + processing "@(...)". */ +static memory_list temp_symbol_memlist; + + static int32 text_out_pos; /* The "program counter" during text translation: the next position to write Z-coded text output to */ @@ -162,26 +166,26 @@ static int text_out_overflow; /* During text translation, becomes /* ------------------------------------------------------------------------- */ static void make_abbrevs_lookup(void) -{ int bubble_sort, j, k, l; char p[MAX_ABBREV_LENGTH]; char *p1, *p2; +{ int bubble_sort, j, k; + char *p1, *p2; do { bubble_sort = FALSE; for (j=0; j=0; j--) - { p1=(char *)abbreviations_at+j*MAX_ABBREV_LENGTH; + { p1=abbreviation_text(j); abbrevs_lookup[(uchar)p1[0]]=j; abbreviations[j].freq=0; } @@ -206,9 +210,13 @@ static void make_abbrevs_lookup(void) static int try_abbreviations_from(unsigned char *text, int i, int from) { int j, k; uchar *p, c; c=text[i]; - for (j=from, p=(uchar *)abbreviations_at+from*MAX_ABBREV_LENGTH; - (j= no_abbreviations) { + compiler_error("Invalid abbrev for abbreviation_text()"); + return ""; + } + + return abbreviations_text + abbreviations[num].textpos; +} + /* ------------------------------------------------------------------------- */ /* The front end routine for text translation. */ /* strctx indicates the purpose of the string. This is mostly used for */ @@ -256,6 +286,18 @@ extern void make_abbreviation(char *text) /* specially during compilation. */ /* ------------------------------------------------------------------------- */ +/* TODO: When called from a print statement (parse_print()), it would be + nice to detect if the generated string is exactly one character. In that + case, we could return the character value and a flag to indicate the + caller could use @print_char/@streamchar/@new_line/@streamunichar + instead of printing a compiled string. + + We'd need a new STRCTX value or two to distinguish direct-printed strings + from referenceable strings. + + Currently, parse_print() checks for the "^" case manually, which is a + bit icky. */ + extern int32 compile_string(char *b, int strctx) { int32 i, j, k; uchar *c; @@ -425,7 +467,9 @@ static void write_z_char_g(int i) /* Helper routine to compute the weight, in units, of a character handled by the Z-Machine */ static int zchar_weight(int c) { - int lookup = iso_to_alphabet_grid[c]; + int lookup; + if (c == ' ') return 1; + lookup = iso_to_alphabet_grid[c]; if (lookup < 0) return 4; if (lookup < 26) return 1; return 2; @@ -543,9 +587,12 @@ extern int32 translate_text(int32 p_limit, char *s_text, int strctx) { c = text_in[j]; /* Loop on all abbreviations starting with what is in c. */ - for (k=from, q=(uchar *)abbreviations_at+from*MAX_ABBREV_LENGTH; - (ku.val*MAX_ABBREV_LENGTH; + cx = abbreviation_text(ent->u.val); compression_table_size += (1 + 1 + strlen(cx)); break; case 4: @@ -1423,12 +1472,27 @@ typedef struct optab_s int32 popularity; int32 score; int32 location; - char text[MAX_ABBREV_LENGTH]; + char *text; /* allocated to textsize, min 4 */ + int32 textsize; } optab; static int32 MAX_BESTYET; static optab *bestyet; /* High-score entries (up to MAX_BESTYET used/allocated) */ static optab *bestyet2; /* The selected entries (up to selected used; allocated to MAX_ABBREVS) */ +static void optab_copy(optab *dest, const optab *src) +{ + dest->length = src->length; + dest->popularity = src->popularity; + dest->score = src->score; + dest->location = src->location; + if (src->length+1 > dest->textsize) { + int32 oldsize = dest->textsize; + dest->textsize = (src->length+1)*2; + my_realloc(&dest->text, oldsize, dest->textsize, "bestyet2.text"); + } + strcpy(dest->text, src->text); +} + static int pass_no; static void optimise_pass(void) @@ -1459,7 +1523,7 @@ static void optimise_pass(void) for (j=0; j=2)&&(nl=2) { nl++; for (j2=0; j2 bestyet[i].textsize) { + int32 oldsize = bestyet[i].textsize; + bestyet[i].textsize = (nl+1)*2; + my_realloc(&bestyet[i].text, oldsize, bestyet[i].textsize, "bestyet.text"); + } for (j2=0; j20) { char testtext[4]; - bestyet2[selected++]=bestyet[maxat]; + optab_copy(&bestyet2[selected++], &bestyet[maxat]); if (optabbrevs_trace_setting >= 1) { printf( @@ -1813,14 +1899,11 @@ int dict_entries; /* Total number of records entered */ /* In modifying the compiler for Glulx, I found it easier to discard the */ /* typedef, and operate directly on uchar arrays of length DICT_WORD_SIZE. */ /* In Z-code, DICT_WORD_SIZE will be 6, so the Z-code compiler will work */ -/* as before. In Glulx, it can be any value up to MAX_DICT_WORD_SIZE. */ -/* (That limit is defined as 40 in the header; it exists only for a few */ -/* static buffers, and can be increased without using significant memory.) */ +/* as before. In Glulx, it can be any value. */ /* */ -/* ...Well, that certainly bit me on the butt, didn't it. In further */ -/* modifying the compiler to generate a Unicode dictionary, I have to */ -/* store four-byte values in the uchar array. This is handled by making */ -/* the array size DICT_WORD_BYTES (which is DICT_WORD_SIZE*DICT_CHAR_SIZE).*/ +/* In further modifying the compiler to generate a Unicode dictionary, */ +/* I have to store four-byte values in the uchar array. We make the array */ +/* size DICT_WORD_BYTES (which is DICT_WORD_SIZE*DICT_CHAR_SIZE). */ /* Then we store the 32-bit character value big-endian. This lets us */ /* continue to compare arrays bytewise, which is a nice simplification. */ /* ------------------------------------------------------------------------- */ @@ -1840,14 +1923,17 @@ extern void copy_sorts(uchar *d1, uchar *d2) d1[i] = d2[i]; } -static uchar prepared_sort[MAX_DICT_WORD_BYTES]; /* Holds the sort code - of current word */ +static memory_list prepared_sort_memlist; +static uchar *prepared_sort; /* Holds the sort code of current word */ -static int number_and_case; +static int prepared_dictflags_pos; /* Dict flags set by the current word */ +static int prepared_dictflags_neg; /* Dict flags *not* set by the word */ /* Also used by verbs.c */ static void dictionary_prepare_z(char *dword, uchar *optresult) -{ int i, j, k, k2, wd[13]; int32 tot; +{ int i, j, k, k2, wd[13]; + int32 tot; + int negflag; /* A rapid text translation algorithm using only the simplified rules applying to the text of dictionary entries: first produce a sequence @@ -1855,22 +1941,50 @@ static void dictionary_prepare_z(char *dword, uchar *optresult) int dictsize = (version_number==3) ? 6 : 9; - number_and_case = 0; + prepared_dictflags_pos = 0; + prepared_dictflags_neg = 0; - for (i=0, j=0; dword[j]!=0; i++, j++) - { if ((dword[j] == '/') && (dword[j+1] == '/')) - { for (j+=2; dword[j] != 0; j++) - { switch(dword[j]) - { case 'p': number_and_case |= 4; break; + for (i=0, j=0; dword[j]!=0; j++) + { + if ((dword[j] == '/') && (dword[j+1] == '/')) + { + /* The rest of the word is dict flags. Run through them. */ + negflag = FALSE; + for (j+=2; dword[j] != 0; j++) + { + switch(dword[j]) + { + case '~': + if (!dword[j+1]) + error_named("'//~' with no flag character (pn) in dict word", dword); + negflag = !negflag; + break; + case 'p': + if (!negflag) + prepared_dictflags_pos |= 4; + else + prepared_dictflags_neg |= 4; + negflag = FALSE; + break; + case 'n': + if (!negflag) + prepared_dictflags_pos |= 128; + else + prepared_dictflags_neg |= 128; + negflag = FALSE; + break; default: - error_named("Expected 'p' after '//' \ -to give number of dictionary word", dword); + error_named("Expected flag character (pn~) after '//' in dict word", dword); break; } } break; } - if (i>=dictsize) break; + + /* LONG_DICT_FLAG_BUG emulates the old behavior where we stop looping + at dictsize. */ + if (LONG_DICT_FLAG_BUG && i>=dictsize) + break; k=(int) dword[j]; if (k==(int) '\'') @@ -1901,26 +2015,37 @@ apostrophe in", dword); char_error("Character can be printed but not input:", k); else { /* Use 4 more Z-chars to encode a ZSCII escape sequence */ - - wd[i++] = 5; wd[i++] = 6; + if (i dictsize) + compiler_error("dict word buffer overflow"); + + /* Fill up to the end of the dictionary block with PAD characters + (for safety, we right-pad to 9 chars even in V3) */ for (; i<9; i++) wd[i]=5; /* The array of Z-chars is converted to two or three 2-byte blocks */ - + ensure_memory_list_available(&prepared_sort_memlist, DICT_WORD_BYTES); + tot = wd[2] + wd[1]*(1<<5) + wd[0]*(1<<10); prepared_sort[1]=tot%0x100; prepared_sort[0]=(tot/0x100)%0x100; @@ -1947,25 +2072,48 @@ static void dictionary_prepare_g(char *dword, uchar *optresult) { int i, j, k; int32 unicode; + int negflag; - number_and_case = 0; + prepared_dictflags_pos = 0; + prepared_dictflags_neg = 0; - for (i=0, j=0; (dword[j]!=0); i++, j++) { + for (i=0, j=0; (dword[j]!=0); j++) { if ((dword[j] == '/') && (dword[j+1] == '/')) { + /* The rest of the word is dict flags. Run through them. */ + negflag = FALSE; for (j+=2; dword[j] != 0; j++) { switch(dword[j]) { + case '~': + if (!dword[j+1]) + error_named("'//~' with no flag character (pn) in dict word", dword); + negflag = !negflag; + break; case 'p': - number_and_case |= 4; - break; + if (!negflag) + prepared_dictflags_pos |= 4; + else + prepared_dictflags_neg |= 4; + negflag = FALSE; + break; + case 'n': + if (!negflag) + prepared_dictflags_pos |= 128; + else + prepared_dictflags_neg |= 128; + negflag = FALSE; + break; default: - error_named("Expected 'p' after '//' \ -to give gender or number of dictionary word", dword); + error_named("Expected flag character (pn~) after '//' in dict word", dword); break; } } break; } - if (i>=DICT_WORD_SIZE) break; + + /* LONG_DICT_FLAG_BUG emulates the old behavior where we stop looping + at DICT_WORD_SIZE. */ + if (LONG_DICT_FLAG_BUG && i>=DICT_WORD_SIZE) + break; k= ((unsigned char *)dword)[j]; if (k=='\'') @@ -1996,17 +2144,27 @@ Define DICT_CHAR_SIZE=4 for a Unicode-compatible dictionary."); if (k >= (unsigned)'A' && k <= (unsigned)'Z') k += ('a' - 'A'); + ensure_memory_list_available(&prepared_sort_memlist, DICT_WORD_BYTES); + if (DICT_CHAR_SIZE == 1) { - prepared_sort[i] = k; + if (i> 24) & 0xFF; - prepared_sort[4*i+1] = (k >> 16) & 0xFF; - prepared_sort[4*i+2] = (k >> 8) & 0xFF; - prepared_sort[4*i+3] = (k) & 0xFF; + if (i> 24) & 0xFF; + prepared_sort[4*i+1] = (k >> 16) & 0xFF; + prepared_sort[4*i+2] = (k >> 8) & 0xFF; + prepared_sort[4*i+3] = (k) & 0xFF; + i++; + } } } + if (i > DICT_WORD_SIZE) + compiler_error("dict word buffer overflow"); + + /* Right-pad with zeroes */ if (DICT_CHAR_SIZE == 1) { for (; i 3) { p[4]=prepared_sort[4]; p[5]=prepared_sort[5]; } - p[res]=x; p[res+1]=y; - if (!ZCODE_LESS_DICT_DATA) p[res+2]=z; - if (x & 128) p[res] = (p[res])|number_and_case; + p[res]=flag1; p[res+1]=flag2; + if (!ZCODE_LESS_DICT_DATA) p[res+2]=flag3; dictionary_top += DICT_ENTRY_BYTE_LENGTH; @@ -2288,11 +2449,9 @@ extern int dictionary_add(char *dword, int x, int y, int z) p[i] = prepared_sort[i]; p += DICT_WORD_BYTES; - p[0] = 0; p[1] = x; - p[2] = y/256; p[3] = y%256; - p[4] = 0; p[5] = z; - if (x & 128) - p[1] |= number_and_case; + p[0] = (flag1/256); p[1] = (flag1%256); + p[2] = (flag2/256); p[3] = (flag2%256); + p[4] = (flag3/256); p[5] = (flag3%256); dictionary_top += DICT_ENTRY_BYTE_LENGTH; @@ -2512,11 +2671,13 @@ static void recursively_show_z(int node, int level) flags = (int) p[res]; if (flags & 128) - { printf("noun "); - if (flags & 4) printf("p"); else printf(" "); - printf(" "); - } - else printf(" "); + printf("noun "); + else + printf(" "); + if (flags & 4) + printf("p "); + else + printf(" "); if (flags & 8) { if (grammar_version_number == 1) printf("preposition:%d ", (int) p[res+2]); @@ -2571,11 +2732,13 @@ static void recursively_show_g(int node, int level) for (i=0; i