/* ------------------------------------------------------------------------- */
/* "text" : Text translation, the abbreviations optimiser, the dictionary */
/* */
-/* Part of Inform 6.35 */
-/* copyright (c) Graham Nelson 1993 - 2020 */
+/* Part of Inform 6.42 */
+/* copyright (c) Graham Nelson 1993 - 2024 */
/* */
/* Inform is free software: you can redistribute it and/or modify */
/* it under the terms of the GNU General Public License as published by */
/* GNU General Public License for more details. */
/* */
/* You should have received a copy of the GNU General Public License */
-/* along with Inform. If not, see https://gnu.org/licenses/ *
+/* along with Inform. If not, see https://gnu.org/licenses/ */
/* */
/* ------------------------------------------------------------------------- */
#include "header.h"
-uchar *low_strings, *low_strings_top; /* Start and next free byte in the low
- strings pool */
+uchar *low_strings; /* Allocated to low_strings_top */
+int32 low_strings_top;
+static memory_list low_strings_memlist;
int32 static_strings_extent; /* Number of bytes of static strings
made so far */
-memory_block static_strings_area; /* Used if (!temporary_files_switch) to
- hold the static strings area so far */
+uchar *static_strings_area; /* Used to hold the static strings
+ area so far
+ Allocated to static_strings_extent */
+memory_list static_strings_area_memlist;
-static uchar *strings_holding_area; /* Area holding translated strings
- until they are moved into either
- a temporary file, or the
- static_strings_area below */
-
-char *all_text, *all_text_top; /* Start and next byte free in (large)
- text buffer holding the entire text
+static char *all_text; /* Text buffer holding the entire text
of the game, when it is being
- recorded */
-int put_strings_in_low_memory, /* When TRUE, put static strings in
- the low strings pool at 0x100 rather
- than in the static strings area */
- is_abbreviation, /* When TRUE, the string being trans
- is itself an abbreviation string
- so can't make use of abbreviations */
- abbrevs_lookup_table_made, /* The abbreviations lookup table is
+ recorded
+ (Allocated to all_text_top) */
+static memory_list all_text_memlist;
+static int32 all_text_top;
+
+int abbrevs_lookup_table_made, /* The abbreviations lookup table is
constructed when the first non-
abbreviation string is translated:
this flag is TRUE after that */
with ASCII character n, or -1
if none of the abbreviations do */
int no_abbreviations; /* No of abbreviations defined so far */
-uchar *abbreviations_at; /* Memory to hold the text of any
- abbreviation strings declared */
/* ------------------------------------------------------------------------- */
/* Glulx string compression storage */
/* ------------------------------------------------------------------------- */
int no_unicode_chars; /* Number of distinct Unicode chars
used. (Beyond 0xFF.) */
-static int MAX_CHARACTER_SET; /* Number of possible entities */
huffentity_t *huff_entities; /* The list of entities (characters,
abbreviations, @.. escapes, and
the terminator) */
int32 *compressed_offsets; /* The beginning of every string in
the game, relative to the beginning
of the Huffman table. (So entry 0
- is equal to compression_table_size)*/
+ is equal to compression_table_size).
+ Allocated to no_strings at
+ compress_game_text() time. */
+static memory_list compressed_offsets_memlist;
+
+unicode_usage_t *unicode_usage_entries; /* Allocated to no_unicode_chars */
+static memory_list unicode_usage_entries_memlist;
#define UNICODE_HASH_BUCKETS (64)
-unicode_usage_t *unicode_usage_entries;
-static unicode_usage_t *unicode_usage_hash[UNICODE_HASH_BUCKETS];
+static int unicode_usage_hash[UNICODE_HASH_BUCKETS];
static int unicode_entity_index(int32 unicode);
/* Abbreviation arrays */
/* ------------------------------------------------------------------------- */
-int *abbrev_values;
-int *abbrev_quality;
-int *abbrev_freqs;
+abbreviation *abbreviations; /* Allocated up to no_abbreviations */
+static memory_list abbreviations_memlist;
+
+/* Memory to hold the text of any abbreviation strings declared. */
+static int32 abbreviations_totaltext;
+static char *abbreviations_text; /* Allocated up to abbreviations_totaltext */
+static memory_list abbreviations_text_memlist;
+
+static int *abbreviations_optimal_parse_schedule;
+static memory_list abbreviations_optimal_parse_schedule_memlist;
+
+static int *abbreviations_optimal_parse_scores;
+static memory_list abbreviations_optimal_parse_scores_memlist;
/* ------------------------------------------------------------------------- */
zchars_trans_in_last_string; /* Number of Z-chars in last string:
needed only for abbrev efficiency
calculation in "directs.c" */
-static int32 total_zchars_trans, /* Number of Z-chars of text out
+static int32 total_zchars_trans; /* Number of Z-chars of text out
(only used to calculate the above) */
- no_chars_transcribed; /* Number of ASCII chars written to
- the text transcription area (used
- for the -r and -u switches) */
static int zchars_out_buffer[3], /* During text translation, a buffer of
3 Z-chars at a time: when it's full
these are written as a 2-byte word */
zob_index; /* Index (0 to 2) into it */
-static unsigned char *text_out_pc; /* The "program counter" during text
- translation: the next address to
+uchar *translated_text; /* Area holding translated strings
+ until they are moved into the
+ static_strings_area below */
+static memory_list translated_text_memlist;
+
+static char *temp_symbol; /* Temporary symbol name used while
+ processing "@(...)". */
+static memory_list temp_symbol_memlist;
+
+
+static int32 text_out_pos; /* The "program counter" during text
+ translation: the next position to
write Z-coded text output to */
-static unsigned char *text_out_limit; /* The upper limit of text_out_pc
- during text translation */
+static int32 text_out_limit; /* The upper limit of text_out_pos
+ during text translation (or -1
+ for no limit) */
static int text_out_overflow; /* During text translation, becomes
- true if text_out_pc tries to pass
+ true if text_out_pos tries to pass
text_out_limit */
/* ------------------------------------------------------------------------- */
/* ------------------------------------------------------------------------- */
static void make_abbrevs_lookup(void)
-{ int bubble_sort, j, k, l; char p[MAX_ABBREV_LENGTH]; char *p1, *p2;
+{ int bubble_sort, j, k;
+ char *p1, *p2;
do
{ bubble_sort = FALSE;
for (j=0; j<no_abbreviations; j++)
for (k=j+1; k<no_abbreviations; k++)
- { p1=(char *)abbreviations_at+j*MAX_ABBREV_LENGTH;
- p2=(char *)abbreviations_at+k*MAX_ABBREV_LENGTH;
+ { p1=abbreviation_text(j);
+ p2=abbreviation_text(k);
if (strcmp(p1,p2)<0)
- { strcpy(p,p1); strcpy(p1,p2); strcpy(p2,p);
- l=abbrev_values[j]; abbrev_values[j]=abbrev_values[k];
- abbrev_values[k]=l;
- l=abbrev_quality[j]; abbrev_quality[j]=abbrev_quality[k];
- abbrev_quality[k]=l;
+ {
+ abbreviation temp = abbreviations[j];
+ abbreviations[j] = abbreviations[k];
+ abbreviations[k] = temp;
bubble_sort = TRUE;
}
}
} while (bubble_sort);
for (j=no_abbreviations-1; j>=0; j--)
- { p1=(char *)abbreviations_at+j*MAX_ABBREV_LENGTH;
+ { p1=abbreviation_text(j);
abbrevs_lookup[(uchar)p1[0]]=j;
- abbrev_freqs[j]=0;
+ abbreviations[j].freq=0;
}
abbrevs_lookup_table_made = TRUE;
}
static int try_abbreviations_from(unsigned char *text, int i, int from)
{ int j, k; uchar *p, c;
c=text[i];
- for (j=from, p=(uchar *)abbreviations_at+from*MAX_ABBREV_LENGTH;
- (j<no_abbreviations)&&(c==p[0]); j++, p+=MAX_ABBREV_LENGTH)
- { if (text[i+1]==p[1])
+ for (j=from;
+ j<no_abbreviations;
+ j++)
+ {
+ p=(uchar *)abbreviations_text+abbreviations[j].textpos;
+ if (c != p[0]) break;
+ if (text[i+1]==p[1])
{ for (k=2; p[k]!=0; k++)
if (text[i+k]!=p[k]) goto NotMatched;
if (!glulx_mode) {
for (k=0; p[k]!=0; k++) text[i+k]=1;
}
- abbrev_freqs[j]++;
+ abbreviations[j].freq++;
return(j);
NotMatched: ;
}
return(-1);
}
+/* Create an abbreviation. */
extern void make_abbreviation(char *text)
{
- strcpy((char *)abbreviations_at
- + no_abbreviations*MAX_ABBREV_LENGTH, text);
+ int alen;
+ int32 pos;
+
+ /* If -e mode is off, we won't waste space creating an abbreviation entry. */
+ if (!economy_switch)
+ return;
+
+ alen = strlen(text);
+ pos = abbreviations_totaltext;
+
+ ensure_memory_list_available(&abbreviations_memlist, no_abbreviations+1);
+ ensure_memory_list_available(&abbreviations_text_memlist, pos+alen+1);
- is_abbreviation = TRUE;
- abbrev_values[no_abbreviations] = compile_string(text, TRUE, TRUE);
- is_abbreviation = FALSE;
+ strcpy(abbreviations_text+pos, text);
+ abbreviations_totaltext += (alen+1);
+
+ abbreviations[no_abbreviations].textpos = pos;
+ abbreviations[no_abbreviations].textlen = alen;
+ abbreviations[no_abbreviations].value = compile_string(text, STRCTX_ABBREV);
+ abbreviations[no_abbreviations].freq = 0;
/* The quality is the number of Z-chars saved by using this */
/* abbreviation: note that it takes 2 Z-chars to print it. */
- abbrev_quality[no_abbreviations++] = zchars_trans_in_last_string - 2;
+ abbreviations[no_abbreviations].quality = zchars_trans_in_last_string - 2;
+
+ if (abbreviations[no_abbreviations].quality <= 0) {
+ warning_named("Abbreviation does not save any characters:", text);
+ }
+
+ no_abbreviations++;
+}
+
+/* Return a pointer to the (uncompressed) abbreviation text.
+ This should be treated as temporary; it is only valid until the next
+ make_abbreviation() call. */
+extern char *abbreviation_text(int num)
+{
+ if (num < 0 || num >= no_abbreviations) {
+ compiler_error("Invalid abbrev for abbreviation_text()");
+ return "";
+ }
+
+ return abbreviations_text + abbreviations[num].textpos;
}
/* ------------------------------------------------------------------------- */
-/* The front end routine for text translation */
+/* The front end routine for text translation. */
+/* strctx indicates the purpose of the string. This is mostly used for */
+/* informational output (gametext.txt), but we treat some string contexts */
+/* specially during compilation. */
/* ------------------------------------------------------------------------- */
-extern int32 compile_string(char *b, int in_low_memory, int is_abbrev)
-{ int i, j; uchar *c;
-
- is_abbreviation = is_abbrev;
-
- /* Put into the low memory pool (at 0x100 in the Z-machine) of strings */
- /* which may be wanted as possible entries in the abbreviations table */
+/* TODO: When called from a print statement (parse_print()), it would be
+ nice to detect if the generated string is exactly one character. In that
+ case, we could return the character value and a flag to indicate the
+ caller could use @print_char/@streamchar/@new_line/@streamunichar
+ instead of printing a compiled string.
+
+ We'd need a new STRCTX value or two to distinguish direct-printed strings
+ from referenceable strings.
+
+ Currently, parse_print() checks for the "^" case manually, which is a
+ bit icky. */
+
+extern int32 compile_string(char *b, int strctx)
+{ int32 i, j, k;
+ uchar *c;
+ int in_low_memory;
+
+ if (execution_never_reaches_here) {
+ /* No need to put strings into gametext.txt or the static/low
+ strings areas. */
+ if (strctx == STRCTX_GAME || strctx == STRCTX_GAMEOPC || strctx == STRCTX_LOWSTRING || strctx == STRCTX_INFIX) {
+ /* VENEER and VENEEROPC are only used at the translate_text level,
+ so we don't have to catch them here. */
+ return 0;
+ }
+ }
+
+ /* In Z-code, abbreviations go in the low memory pool (0x100). So
+ do strings explicitly defined with the Lowstring directive.
+ (In Glulx, the in_low_memory flag is ignored.) */
+ in_low_memory = (strctx == STRCTX_ABBREV || strctx == STRCTX_LOWSTRING);
if (!glulx_mode && in_low_memory)
- { j=subtract_pointers(low_strings_top,low_strings);
- low_strings_top=translate_text(low_strings_top, low_strings+MAX_LOW_STRINGS, b);
- if (!low_strings_top)
- memoryerror("MAX_LOW_STRINGS", MAX_LOW_STRINGS);
- is_abbreviation = FALSE;
+ {
+ k = translate_text(-1, b, strctx);
+ if (k<0) {
+ error("text translation failed");
+ k = 0;
+ }
+ ensure_memory_list_available(&low_strings_memlist, low_strings_top+k);
+ memcpy(low_strings+low_strings_top, translated_text, k);
+ j = low_strings_top;
+ low_strings_top += k;
return(0x21+(j/2));
}
if (glulx_mode && done_compression)
compiler_error("Tried to add a string after compression was done.");
- c = translate_text(strings_holding_area, strings_holding_area+MAX_STATIC_STRINGS, b);
- if (!c)
- memoryerror("MAX_STATIC_STRINGS",MAX_STATIC_STRINGS);
-
- i = subtract_pointers(c, strings_holding_area);
+ i = translate_text(-1, b, strctx);
+ if (i < 0) {
+ error("text translation failed");
+ i = 0;
+ }
/* Insert null bytes as needed to ensure that the next static string */
/* also occurs at an address expressible as a packed address */
textalign = scale_factor;
while ((i%textalign)!=0)
{
- if (i+2 > MAX_STATIC_STRINGS)
- memoryerror("MAX_STATIC_STRINGS",MAX_STATIC_STRINGS);
- i+=2; *c++ = 0; *c++ = 0;
+ ensure_memory_list_available(&translated_text_memlist, i+2);
+ translated_text[i++] = 0;
+ translated_text[i++] = 0;
}
}
j = static_strings_extent;
- if (temporary_files_switch)
- for (c=strings_holding_area; c<strings_holding_area+i;
- c++, static_strings_extent++)
- fputc(*c,Temp1_fp);
- else
- for (c=strings_holding_area; c<strings_holding_area+i;
- c++, static_strings_extent++)
- write_byte_to_memory_block(&static_strings_area,
- static_strings_extent, *c);
-
- is_abbreviation = FALSE;
+ ensure_memory_list_available(&static_strings_area_memlist, static_strings_extent+i);
+ for (c=translated_text; c<translated_text+i;
+ c++, static_strings_extent++)
+ static_strings_area[static_strings_extent] = *c;
if (!glulx_mode) {
return(j/scale_factor);
zob_index=0;
j= zchars_out_buffer[0]*0x0400 + zchars_out_buffer[1]*0x0020
+ zchars_out_buffer[2];
- if (text_out_pc+2 > text_out_limit) {
- text_out_overflow = TRUE;
- return;
+
+ if (text_out_limit >= 0) {
+ if (text_out_pos+2 > text_out_limit) {
+ text_out_overflow = TRUE;
+ return;
+ }
+ }
+ else {
+ ensure_memory_list_available(&translated_text_memlist, text_out_pos+2);
}
- text_out_pc[0] = j/256; text_out_pc[1] = j%256; text_out_pc+=2;
+
+ translated_text[text_out_pos++] = j/256; translated_text[text_out_pos++] = j%256;
total_bytes_trans+=2;
}
/* ------------------------------------------------------------------------- */
static void end_z_chars(void)
-{ unsigned char *p;
+{
zchars_trans_in_last_string=total_zchars_trans-zchars_trans_in_last_string;
while (zob_index!=0) write_z_char_z(5);
- p=(unsigned char *) text_out_pc;
- *(p-2)= *(p-2)+128;
+ if (text_out_pos < 2) {
+ /* Something went wrong. */
+ text_out_overflow = TRUE;
+ return;
+ }
+ translated_text[text_out_pos-2] += 128;
}
/* Glulx handles this much more simply -- compression is done elsewhere. */
static void write_z_char_g(int i)
{
- ASSERT_GLULX();
- if (text_out_pc+1 > text_out_limit) {
- text_out_overflow = TRUE;
- return;
- }
- total_zchars_trans++;
- text_out_pc[0] = i;
- text_out_pc++;
- total_bytes_trans++;
+ ASSERT_GLULX();
+ if (text_out_limit >= 0) {
+ if (text_out_pos+1 > text_out_limit) {
+ text_out_overflow = TRUE;
+ return;
+ }
+ }
+ else {
+ ensure_memory_list_available(&translated_text_memlist, text_out_pos+1);
+ }
+ total_zchars_trans++;
+ translated_text[text_out_pos++] = i;
+ total_bytes_trans++;
+}
+
+/* Helper routine to compute the weight, in units, of a character handled by the Z-Machine */
+static int zchar_weight(int c)
+{
+ int lookup;
+ if (c == ' ') return 1;
+ lookup = iso_to_alphabet_grid[c];
+ if (lookup < 0) return 4;
+ if (lookup < 26) return 1;
+ return 2;
}
/* ------------------------------------------------------------------------- */
/* The main routine "text.c" provides to the rest of Inform: the text */
-/* translator. p is the address to write output to, s_text the source text */
-/* and the return value is the next free address to write output to. */
-/* The return value will not exceed p_limit. If the translation tries to */
-/* overflow this boundary, the return value will be NULL (and you should */
-/* display an error). */
+/* translator. s_text is the source text and the return value is the */
+/* number of bytes translated. */
+/* The translated text will be stored in translated_text. */
+/* */
+/* If p_limit is >= 0, the text length will not exceed that many bytes. */
+/* If the translation tries to overflow this boundary, the return value */
+/* will be -1. (You should display an error and not read translated_text.) */
+/* */
+/* If p_limit is negative, any amount of text is accepted (up to int32 */
+/* anyway). */
+/* */
/* Note that the source text may be corrupted by this routine. */
/* ------------------------------------------------------------------------- */
-extern uchar *translate_text(uchar *p, uchar *p_limit, char *s_text)
-{ int i, j, k, in_alphabet, lookup_value;
+extern int32 translate_text(int32 p_limit, char *s_text, int strctx)
+{ int i, j, k, in_alphabet, lookup_value, is_abbreviation;
int32 unicode; int zscii;
unsigned char *text_in;
- /* Cast the input and output streams to unsigned char: text_out_pc will
+ if (p_limit >= 0) {
+ ensure_memory_list_available(&translated_text_memlist, p_limit);
+ }
+
+ /* For STRCTX_ABBREV, the string being translated is itself an
+ abbreviation string, so it can't make use of abbreviations. Set
+ the is_abbreviation flag to indicate this.
+ The compiler has historically set this flag for the Lowstring
+ directive as well -- the in_low_memory and is_abbreviation flag were
+ always the same. I am preserving that convention. */
+ is_abbreviation = (strctx == STRCTX_ABBREV || strctx == STRCTX_LOWSTRING);
+
+
+ /* Cast the input and output streams to unsigned char: text_out_pos will
advance as bytes of Z-coded text are written, but text_in doesn't */
text_in = (unsigned char *) s_text;
- text_out_pc = (unsigned char *) p;
- text_out_limit = (unsigned char *) p_limit;
+ text_out_pos = 0;
+ text_out_limit = p_limit;
text_out_overflow = FALSE;
/* Remember the Z-chars total so that later we can subtract to find the
&& (!is_abbreviation))
make_abbrevs_lookup();
- /* If we're storing the whole game text to memory, then add this text */
+ /* If we're storing the whole game text to memory, then add this text.
+ We will put two newlines between each text and four at the very end.
+ (The optimise code does a lot of sloppy text[i+2], so the extra
+ two newlines past all_text_top are necessary.) */
if ((!is_abbreviation) && (store_the_text))
- { no_chars_transcribed += strlen(s_text)+2;
- if (no_chars_transcribed >= MAX_TRANSCRIPT_SIZE)
- memoryerror("MAX_TRANSCRIPT_SIZE", MAX_TRANSCRIPT_SIZE);
- sprintf(all_text_top, "%s\n\n", s_text);
- all_text_top += strlen(all_text_top);
+ { int addlen = strlen(s_text);
+ ensure_memory_list_available(&all_text_memlist, all_text_top+addlen+5);
+ sprintf(all_text+all_text_top, "%s\n\n\n\n", s_text);
+ /* Advance past two newlines. */
+ all_text_top += (addlen+2);
+ }
+
+ if (transcript_switch) {
+ /* Omit veneer strings, unless we're using the new transcript format, which includes everything. */
+ if ((!veneer_mode) || TRANSCRIPT_FORMAT == 1) {
+ int label = strctx;
+ if (veneer_mode) {
+ if (label == STRCTX_GAME)
+ label = STRCTX_VENEER;
+ else if (label == STRCTX_GAMEOPC)
+ label = STRCTX_VENEEROPC;
+ }
+ write_to_transcript_file(s_text, label);
+ }
+ }
+
+ /* Computing the optimal way to parse strings to insert abbreviations with dynamic programming */
+ /* (ref: R.A. Wagner , "Common phrases and minimum-space text storage", Commun. ACM, 16 (3) (1973)) */
+ /* We compute this optimal way here; it's stored in abbreviations_optimal_parse_schedule */
+ if (economy_switch)
+ {
+ uchar *q, c;
+ int l, min_score, from;
+ int text_in_length;
+
+ text_in_length = strlen( (char*) text_in);
+ ensure_memory_list_available(&abbreviations_optimal_parse_schedule_memlist, text_in_length);
+ ensure_memory_list_available(&abbreviations_optimal_parse_scores_memlist, text_in_length+1);
+
+ abbreviations_optimal_parse_scores[text_in_length] = 0;
+ for(j=text_in_length-1; j>=0; j--)
+ { /* Initial values: empty schedule, score = just write the letter without abbreviating. */
+ abbreviations_optimal_parse_schedule[j] = -1;
+ min_score = zchar_weight(text_in[j]) + abbreviations_optimal_parse_scores[j+1];
+ /* If there's an abbreviation starting with that letter... */
+ if ( (from = abbrevs_lookup[text_in[j]]) != -1)
+ {
+ c = text_in[j];
+ /* Loop on all abbreviations starting with what is in c. */
+ for (k=from;
+ k<no_abbreviations;
+ k++)
+ {
+ q=(uchar *)abbreviations_text+abbreviations[k].textpos;
+ if (c!=q[0]) break;
+ /* Let's compare; we also keep track of the length of the abbreviation. */
+ for (l=1; q[l]!=0; l++)
+ { if (text_in[j+l]!=q[l]) {goto NotMatched;}
+ }
+ /* We have a match (length l), but is it smaller in size? */
+ if (min_score > 2 + abbreviations_optimal_parse_scores[j+l])
+ { /* It is indeed smaller, so let's write it down in our schedule. */
+ min_score = 2 + abbreviations_optimal_parse_scores[j+l];
+ abbreviations_optimal_parse_schedule[j] = k;
+ }
+ NotMatched: ;
+ }
+ }
+ /* We gave it our best, this is the smallest we got. */
+ abbreviations_optimal_parse_scores[j] = min_score;
+ }
}
- if (transcript_switch && (!veneer_mode))
- write_to_transcript_file(s_text);
+
if (!glulx_mode) {
/* The empty string of Z-text is illegal, since it can't carry an end
}
}
- /* Try abbreviations if the economy switch set */
-
- if ((economy_switch) && (!is_abbreviation)
- && ((k=abbrevs_lookup[text_in[i]])!=-1))
- { if ((j=try_abbreviations_from(text_in, i, k))!=-1)
- { if (j<32) { write_z_char_z(2); write_z_char_z(j); }
- else { write_z_char_z(3); write_z_char_z(j-32); }
- }
+ /* Try abbreviations if the economy switch set. */
+ /* Look at the abbreviation schedule to see if we should abbreviate here. */
+ /* Note: Just because the schedule has something doesn't mean we should abbreviate there; */
+ /* sometimes you abbreviate before because it's better. If we have already replaced the */
+ /* char by a '1', it means we're in the middle of an abbreviation; don't try to abbreviate then. */
+ if ((economy_switch) && (!is_abbreviation) && text_in[i] != 1 &&
+ ((j = abbreviations_optimal_parse_schedule[i]) != -1))
+ {
+ /* Fill with 1s, which will get ignored by everyone else. */
+ uchar *p = (uchar *)abbreviation_text(j);
+ for (k=0; p[k]!=0; k++) text_in[i+k]=1;
+ /* Actually write the abbreviation in the story file. */
+ abbreviations[j].freq++;
+ /* Abbreviations run from MAX_DYNAMIC_STRINGS to 96. */
+ j += MAX_DYNAMIC_STRINGS;
+ write_z_char_z(j/32+1); write_z_char_z(j%32);
}
+
/* If Unicode switch set, use text_to_unicode to perform UTF-8
decoding */
/* '@' is the escape character in Inform string notation: the various
possibilities are:
- (printing only)
@@decimalnumber : write this ZSCII char (0 to 1023)
- @twodigits : write the abbreviation string with this
- decimal number
-
- (any string context)
+ @twodigits or : write the abbreviation string with this
+ @(digits) decimal number
+ @(symbol) : write the abbreviation string with this
+ (constant) value
@accentcode : this accented character: e.g.,
for @'e write an E-acute
@{...} : this Unicode char (in hex) */
if (text_in[i]=='@')
{ if (text_in[i+1]=='@')
{
- /* @@... */
+ /* @@... (ascii value) */
i+=2; j=atoi((char *) (text_in+i));
switch(j)
}
while (isdigit(text_in[i])) i++; i--;
}
+ else if (text_in[i+1]=='(')
+ {
+ /* @(...) (dynamic string) */
+ int len = 0, digits = 0;
+ i += 2;
+ /* This accepts "12xyz" as a symbol, which it really isn't,
+ but that just means it won't be found. */
+ while ((text_in[i] == '_' || isalnum(text_in[i]))) {
+ char ch = text_in[i++];
+ if (isdigit(ch)) digits++;
+ ensure_memory_list_available(&temp_symbol_memlist, len+1);
+ temp_symbol[len++] = ch;
+ }
+ ensure_memory_list_available(&temp_symbol_memlist, len+1);
+ temp_symbol[len] = '\0';
+ j = -1;
+ /* We would like to parse temp_symbol as *either* a decimal
+ number or a constant symbol. */
+ if (text_in[i] != ')' || len == 0) {
+ error("'@(...)' abbreviation must contain a symbol");
+ }
+ else if (digits == len) {
+ /* all digits; parse as decimal */
+ j = atoi(temp_symbol);
+ }
+ else {
+ int sym = get_symbol_index(temp_symbol);
+ if (sym < 0 || (symbols[sym].flags & UNKNOWN_SFLAG) || symbols[sym].type != CONSTANT_T || symbols[sym].marker) {
+ error_named("'@(...)' abbreviation expected a known constant value, but contained", temp_symbol);
+ }
+ else {
+ symbols[sym].flags |= USED_SFLAG;
+ j = symbols[sym].value;
+ }
+ }
+ if (!glulx_mode && j >= 96) {
+ error_max_dynamic_strings(j);
+ j = -1;
+ }
+ if (j >= MAX_DYNAMIC_STRINGS) {
+ error_max_dynamic_strings(j);
+ j = -1;
+ }
+ if (j >= 0) {
+ write_z_char_z(j/32+1); write_z_char_z(j%32);
+ }
+ else {
+ write_z_char_z(' '); /* error fallback */
+ }
+ }
else if (isdigit(text_in[i+1])!=0)
{ int d1, d2;
- /* @.. */
+ /* @.. (dynamic string) */
d1 = character_digit_value[text_in[i+1]];
d2 = character_digit_value[text_in[i+2]];
if ((d1 == 127) || (d1 >= 10) || (d2 == 127) || (d2 >= 10))
error("'@..' must have two decimal digits");
else
- { i+=2;
- write_z_char_z(1); write_z_char_z(d1*10 + d2);
+ {
+ j = d1*10 + d2;
+ if (!glulx_mode && j >= 96) {
+ error_max_dynamic_strings(j);
+ j = -1;
+ }
+ if (j >= MAX_DYNAMIC_STRINGS) {
+ /* Shouldn't get here with two digits */
+ error_max_dynamic_strings(j);
+ j = -1;
+ }
+ i+=2;
+ if (j >= 0) {
+ write_z_char_z(j/32+1); write_z_char_z(j%32);
+ }
+ else {
+ write_z_char_z(' '); /* error fallback */
+ }
}
}
else
/* Flush the Z-characters output buffer and set the "end" bit */
end_z_chars();
-
}
else {
if ((economy_switch) && (compression_switch) && (!is_abbreviation)
&& ((k=abbrevs_lookup[text_in[i]])!=-1)
&& ((j=try_abbreviations_from(text_in, i, k)) != -1)) {
- char *cx = (char *)abbreviations_at+j*MAX_ABBREV_LENGTH;
+ char *cx = abbreviation_text(j);
i += (strlen(cx)-1);
write_z_char_g('@');
write_z_char_g('A');
write_z_char_g(j);
while (isdigit(text_in[i])) i++; i--;
}
+ else if (text_in[i+1]=='(') {
+ int len = 0, digits = 0;
+ i += 2;
+ /* This accepts "12xyz" as a symbol, which it really isn't,
+ but that just means it won't be found. */
+ while ((text_in[i] == '_' || isalnum(text_in[i]))) {
+ char ch = text_in[i++];
+ if (isdigit(ch)) digits++;
+ ensure_memory_list_available(&temp_symbol_memlist, len+1);
+ temp_symbol[len++] = ch;
+ }
+ ensure_memory_list_available(&temp_symbol_memlist, len+1);
+ temp_symbol[len] = '\0';
+ j = -1;
+ /* We would like to parse temp_symbol as *either* a decimal
+ number or a constant symbol. */
+ if (text_in[i] != ')' || len == 0) {
+ error("'@(...)' abbreviation must contain a symbol");
+ }
+ else if (digits == len) {
+ /* all digits; parse as decimal */
+ j = atoi(temp_symbol);
+ }
+ else {
+ int sym = get_symbol_index(temp_symbol);
+ if (sym < 0 || (symbols[sym].flags & UNKNOWN_SFLAG) || symbols[sym].type != CONSTANT_T || symbols[sym].marker) {
+ error_named("'@(...)' abbreviation expected a known constant value, but contained", temp_symbol);
+ }
+ else {
+ symbols[sym].flags |= USED_SFLAG;
+ j = symbols[sym].value;
+ }
+ }
+ if (j >= MAX_DYNAMIC_STRINGS) {
+ error_max_dynamic_strings(j);
+ j = -1;
+ }
+ if (j+1 >= no_dynamic_strings)
+ no_dynamic_strings = j+1;
+ if (j >= 0) {
+ write_z_char_g('@');
+ write_z_char_g('D');
+ write_z_char_g('A' + ((j >>12) & 0x0F));
+ write_z_char_g('A' + ((j >> 8) & 0x0F));
+ write_z_char_g('A' + ((j >> 4) & 0x0F));
+ write_z_char_g('A' + ((j ) & 0x0F));
+ }
+ else {
+ write_z_char_g(' '); /* error fallback */
+ }
+ }
else if (isdigit(text_in[i+1])) {
int d1, d2;
d1 = character_digit_value[text_in[i+1]];
i += 2;
j = d1*10 + d2;
if (j >= MAX_DYNAMIC_STRINGS) {
- memoryerror("MAX_DYNAMIC_STRINGS", MAX_DYNAMIC_STRINGS);
- j = 0;
+ error_max_dynamic_strings(j);
+ j = -1;
}
if (j+1 >= no_dynamic_strings)
no_dynamic_strings = j+1;
- write_z_char_g('@');
- write_z_char_g('D');
- write_z_char_g('A' + ((j >>12) & 0x0F));
- write_z_char_g('A' + ((j >> 8) & 0x0F));
- write_z_char_g('A' + ((j >> 4) & 0x0F));
- write_z_char_g('A' + ((j ) & 0x0F));
+ if (j >= 0) {
+ write_z_char_g('@');
+ write_z_char_g('D');
+ write_z_char_g('A' + ((j >>12) & 0x0F));
+ write_z_char_g('A' + ((j >> 8) & 0x0F));
+ write_z_char_g('A' + ((j >> 4) & 0x0F));
+ write_z_char_g('A' + ((j ) & 0x0F));
+ }
+ else {
+ write_z_char_g(' '); /* error fallback */
+ }
}
}
else {
}
}
write_z_char_g(0);
+ zchars_trans_in_last_string=total_zchars_trans-zchars_trans_in_last_string;
}
if (text_out_overflow)
- return NULL;
+ return -1;
else
- return((uchar *) text_out_pc);
+ return text_out_pos;
}
static int unicode_entity_index(int32 unicode)
{
- unicode_usage_t *uptr;
int j;
int buck = unicode % UNICODE_HASH_BUCKETS;
- for (uptr = unicode_usage_hash[buck]; uptr; uptr=uptr->next) {
- if (uptr->ch == unicode)
+ for (j = unicode_usage_hash[buck]; j >= 0; j=unicode_usage_entries[j].next) {
+ if (unicode_usage_entries[j].ch == unicode)
break;
}
- if (uptr) {
- j = (uptr - unicode_usage_entries);
- }
- else {
- if (no_unicode_chars >= MAX_UNICODE_CHARS) {
- memoryerror("MAX_UNICODE_CHARS", MAX_UNICODE_CHARS);
- j = 0;
- }
- else {
- j = no_unicode_chars;
- no_unicode_chars++;
- uptr = unicode_usage_entries + j;
- uptr->ch = unicode;
- uptr->next = unicode_usage_hash[buck];
- unicode_usage_hash[buck] = uptr;
- }
+ if (j < 0) {
+ ensure_memory_list_available(&unicode_usage_entries_memlist, no_unicode_chars+1);
+ j = no_unicode_chars++;
+ unicode_usage_entries[j].ch = unicode;
+ unicode_usage_entries[j].next = unicode_usage_hash[buck];
+ unicode_usage_hash[buck] = j;
}
return j;
int jx;
int ch;
int32 ix;
+ int max_char_set;
huffbitlist_t bits;
if (compression_switch) {
+ max_char_set = 257 + no_abbreviations + no_dynamic_strings + no_unicode_chars;
+
+ huff_entities = my_calloc(sizeof(huffentity_t), max_char_set*2+1,
+ "huffman entities");
+ hufflist = my_calloc(sizeof(huffentity_t *), max_char_set,
+ "huffman node list");
/* How many entities have we currently got? Well, 256 plus the
string-terminator plus Unicode chars plus abbrevations plus
huff_dynam_start = entities;
entities += no_dynamic_strings;
- if (entities > MAX_CHARACTER_SET)
- memoryerror("MAX_CHARACTER_SET",MAX_CHARACTER_SET);
+ if (entities > max_char_set)
+ compiler_error("Too many entities for max_char_set");
/* Characters */
for (jx=0; jx<256; jx++) {
no_huff_entities = 257;
huff_unicode_start = 257;
huff_abbrev_start = 257;
- huff_dynam_start = 257+MAX_ABBREVS;
+ huff_dynam_start = 257+no_abbreviations;
compression_table_size = 0;
}
- if (temporary_files_switch) {
- fclose(Temp1_fp);
- Temp1_fp=fopen(Temp1_Name,"rb");
- if (Temp1_fp==NULL)
- fatalerror("I/O failure: couldn't reopen temporary file 1");
- }
-
if (compression_switch) {
for (lx=0, ix=0; lx<no_strings; lx++) {
int done=FALSE;
int32 escapeval=0;
while (!done) {
- if (temporary_files_switch)
- ch = fgetc(Temp1_fp);
- else
- ch = read_byte_from_memory_block(&static_strings_area, ix);
+ ch = static_strings_area[ix];
ix++;
if (ix > static_strings_extent || ch < 0)
compiler_error("Read too much not-yet-compressed text.");
without actually doing the compression. */
compression_string_size = 0;
- if (temporary_files_switch) {
- fseek(Temp1_fp, 0, SEEK_SET);
- }
-
- if (no_strings >= MAX_NUM_STATIC_STRINGS)
- memoryerror("MAX_NUM_STATIC_STRINGS", MAX_NUM_STATIC_STRINGS);
+ ensure_memory_list_available(&compressed_offsets_memlist, no_strings);
for (lx=0, ix=0; lx<no_strings; lx++) {
int escapelen=0, escapetype=0;
compressed_offsets[lx] = compression_table_size + compression_string_size;
compression_string_size++; /* for the type byte */
while (!done) {
- if (temporary_files_switch)
- ch = fgetc(Temp1_fp);
- else
- ch = read_byte_from_memory_block(&static_strings_area, ix);
+ ch = static_strings_area[ix];
ix++;
if (ix > static_strings_extent || ch < 0)
compiler_error("Read too much not-yet-compressed text.");
compression_table_size += 2;
break;
case 3:
- cx = (char *)abbreviations_at + ent->u.val*MAX_ABBREV_LENGTH;
+ cx = abbreviation_text(ent->u.val);
compression_table_size += (1 + 1 + strlen(cx));
break;
case 4:
/* for compatibility with previous releases. */
/* ------------------------------------------------------------------------- */
+/* The complete game text. */
+static char *opttext;
+static int32 opttextlen;
+
typedef struct tlb_s
{ char text[4];
int32 intab, occurrences;
} tlb;
-static tlb *tlbtab;
+static tlb *tlbtab; /* Three-letter blocks (allocated up to no_occs) */
+static memory_list tlbtab_memlist;
static int32 no_occs;
static int32 *grandtable;
int32 popularity;
int32 score;
int32 location;
- char text[MAX_ABBREV_LENGTH];
+ char *text; /* allocated to textsize, min 4 */
+ int32 textsize;
} optab;
-static optab *bestyet, *bestyet2;
+static int32 MAX_BESTYET;
+static optab *bestyet; /* High-score entries (up to MAX_BESTYET used/allocated) */
+static optab *bestyet2; /* The selected entries (up to selected used; allocated to MAX_ABBREVS) */
-static int pass_no;
+static void optab_copy(optab *dest, const optab *src)
+{
+ dest->length = src->length;
+ dest->popularity = src->popularity;
+ dest->score = src->score;
+ dest->location = src->location;
+ if (src->length+1 > dest->textsize) {
+ int32 oldsize = dest->textsize;
+ dest->textsize = (src->length+1)*2;
+ my_realloc(&dest->text, oldsize, dest->textsize, "bestyet2.text");
+ }
+ strcpy(dest->text, src->text);
+}
-static char *sub_buffer;
+static int pass_no;
static void optimise_pass(void)
-{ int32 i; int t1, t2;
+{
+ TIMEVALUE t1, t2;
+ float duration;
+ int32 i;
int32 j, j2, k, nl, matches, noflags, score, min, minat=0, x, scrabble, c;
- for (i=0; i<256; i++) bestyet[i].length=0;
+ for (i=0; i<MAX_BESTYET; i++) bestyet[i].length=0;
for (i=0; i<no_occs; i++)
{ if ((*(tlbtab[i].text)!=(int) '\n')&&(tlbtab[i].occurrences!=0))
{
if (i%((**g_pm_hndl).linespercheck) == 0)
{ ProcessEvents (&g_proc);
if (g_proc != true)
- { free_arrays();
- if (store_the_text)
- my_free(&all_text,"transcription text");
+ { ao_free_arrays();
longjmp (g_fallback, 1);
}
}
#endif
- printf("Pass %d, %4ld/%ld '%s' (%ld occurrences) ",
- pass_no, (long int) i, (long int) no_occs, tlbtab[i].text,
- (long int) tlbtab[i].occurrences);
- t1=(int) (time(0));
+ if (optabbrevs_trace_setting >= 2) {
+ printf("Pass %d, %4ld/%ld '%s' (%ld occurrences) ",
+ pass_no, (long int) i, (long int) no_occs, tlbtab[i].text,
+ (long int) tlbtab[i].occurrences);
+ }
+ TIMEVALUE_NOW(&t1);
for (j=0; j<tlbtab[i].occurrences; j++)
{ for (j2=0; j2<tlbtab[i].occurrences; j2++) grandflags[j2]=1;
nl=2; noflags=tlbtab[i].occurrences;
- while ((noflags>=2)&&(nl<=62))
+ while (noflags>=2)
{ nl++;
for (j2=0; j2<nl; j2++)
- if (all_text[grandtable[tlbtab[i].intab+j]+j2]=='\n')
+ if (opttext[grandtable[tlbtab[i].intab+j]+j2]=='\n')
goto FinishEarly;
matches=0;
for (j2=j; j2<tlbtab[i].occurrences; j2++)
{ x=grandtable[tlbtab[i].intab+j2]
- grandtable[tlbtab[i].intab+j];
if (((x>-nl)&&(x<nl))
- || (memcmp(all_text+grandtable[tlbtab[i].intab+j],
- all_text+grandtable[tlbtab[i].intab+j2],
+ || (memcmp(opttext+grandtable[tlbtab[i].intab+j],
+ opttext+grandtable[tlbtab[i].intab+j2],
nl)!=0))
{ grandflags[j2]=0; noflags--; }
else matches++;
scrabble=0;
for (k=0; k<nl; k++)
{ scrabble++;
- c=all_text[grandtable[tlbtab[i].intab+j+k]];
+ c=opttext[grandtable[tlbtab[i].intab+j+k]];
if (c!=(int) ' ')
{ if (iso_to_alphabet_grid[c]<0)
scrabble+=2;
}
score=(matches-1)*(scrabble-2);
min=score;
- for (j2=0; j2<256; j2++)
+ for (j2=0; j2<MAX_BESTYET; j2++)
{ if ((nl==bestyet[j2].length)
- && (memcmp(all_text+bestyet[j2].location,
- all_text+grandtable[tlbtab[i].intab+j],
+ && (memcmp(opttext+bestyet[j2].location,
+ opttext+grandtable[tlbtab[i].intab+j],
nl)==0))
- { j2=256; min=score; }
+ { j2=MAX_BESTYET; min=score; }
else
{ if (bestyet[j2].score<min)
{ min=bestyet[j2].score; minat=j2;
bestyet[minat].length=nl;
bestyet[minat].location=grandtable[tlbtab[i].intab+j];
bestyet[minat].popularity=matches;
- for (j2=0; j2<nl; j2++) sub_buffer[j2]=
- all_text[bestyet[minat].location+j2];
- sub_buffer[nl]=0;
}
}
FinishEarly: ;
}
- t2=((int) time(0)) - t1;
- printf(" (%d seconds)\n",t2);
+ if (optabbrevs_trace_setting >= 2) {
+ TIMEVALUE_NOW(&t2);
+ duration = TIMEVALUE_DIFFERENCE(&t1, &t2);
+ printf(" (%.4f seconds)\n", duration);
+ }
}
}
}
return(0);
}
-#define MAX_TLBS 8000
-
extern void optimise_abbreviations(void)
-{ int32 i, j, t, max=0, MAX_GTABLE;
+{ int32 i, j, tcount, max=0, MAX_GTABLE;
int32 j2, selected, available, maxat=0, nl;
- tlb test;
+ if (opttext == NULL)
+ return;
+
+ /* We insist that the first two abbreviations will be ". " and ", ". */
+ if (MAX_ABBREVS < 2)
+ return;
+
+ /* Note that it's safe to access opttext[opttextlen+2]. There are
+ two newlines and a null beyond opttextlen. */
+
printf("Beginning calculation of optimal abbreviations...\n");
pass_no = 0;
- tlbtab=my_calloc(sizeof(tlb), MAX_TLBS, "tlb table"); no_occs=0;
- sub_buffer=my_calloc(sizeof(char), 4000, "sub_buffer");
- for (i=0; i<MAX_TLBS; i++) tlbtab[i].occurrences=0;
- bestyet=my_calloc(sizeof(optab), 256, "bestyet");
- bestyet2=my_calloc(sizeof(optab), 64, "bestyet2");
+ initialise_memory_list(&tlbtab_memlist,
+ sizeof(tlb), 1000, (void**)&tlbtab,
+ "three-letter-blocks buffer");
+
+ no_occs=0;
+
+ /* Not sure what the optimal size is for MAX_BESTYET. The original code always created 64 abbreviations and used MAX_BESTYET=256. I'm guessing that 4*MAX_ABBREVS is reasonable. */
+ MAX_BESTYET = 4 * MAX_ABBREVS;
+
+ bestyet=my_calloc(sizeof(optab), MAX_BESTYET, "bestyet");
+ for (i=0; i<MAX_BESTYET; i++) {
+ bestyet[i].length = 0;
+ bestyet[i].popularity = 0;
+ bestyet[i].score = 0;
+ bestyet[i].location = 0;
+ bestyet[i].textsize = 4;
+ bestyet[i].text = my_malloc(bestyet[i].textsize, "bestyet.text");
+ }
+
+ bestyet2=my_calloc(sizeof(optab), MAX_ABBREVS, "bestyet2");
+ for (i=0; i<MAX_ABBREVS; i++) {
+ bestyet2[i].length = 0;
+ bestyet2[i].popularity = 0;
+ bestyet2[i].score = 0;
+ bestyet2[i].location = 0;
+ bestyet2[i].textsize = 4;
+ bestyet2[i].text = my_malloc(bestyet2[i].textsize, "bestyet2.text");
+ }
bestyet2[0].text[0]='.';
bestyet2[0].text[1]=' ';
bestyet2[1].text[1]=' ';
bestyet2[1].text[2]=0;
- for (i=0; all_text+i<all_text_top; i++)
+ selected=2;
+
+ for (i=0; i<opttextlen; i++)
{
- if ((all_text[i]=='.') && (all_text[i+1]==' ') && (all_text[i+2]==' '))
- { all_text[i]='\n'; all_text[i+1]='\n'; all_text[i+2]='\n';
+ if ((opttext[i]=='.') && (opttext[i+1]==' ') && (opttext[i+2]==' '))
+ { opttext[i]='\n'; opttext[i+1]='\n'; opttext[i+2]='\n';
bestyet2[0].popularity++;
}
- if ((all_text[i]=='.') && (all_text[i+1]==' '))
- { all_text[i]='\n'; all_text[i+1]='\n';
+ if ((opttext[i]=='.') && (opttext[i+1]==' '))
+ { opttext[i]='\n'; opttext[i+1]='\n';
bestyet2[0].popularity++;
}
- if ((all_text[i]==',') && (all_text[i+1]==' '))
- { all_text[i]='\n'; all_text[i+1]='\n';
+ if ((opttext[i]==',') && (opttext[i+1]==' '))
+ { opttext[i]='\n'; opttext[i+1]='\n';
bestyet2[1].popularity++;
}
}
- MAX_GTABLE=subtract_pointers(all_text_top,all_text)+1;
+ MAX_GTABLE=opttextlen+1;
grandtable=my_calloc(4*sizeof(int32), MAX_GTABLE/4, "grandtable");
- for (i=0, t=0; all_text+i<all_text_top; i++)
- { test.text[0]=all_text[i];
- test.text[1]=all_text[i+1];
- test.text[2]=all_text[i+2];
+ for (i=0, tcount=0; i<opttextlen; i++)
+ {
+ tlb test;
+ test.text[0]=opttext[i];
+ test.text[1]=opttext[i+1];
+ test.text[2]=opttext[i+2];
test.text[3]=0;
if ((test.text[0]=='\n')||(test.text[1]=='\n')||(test.text[2]=='\n'))
goto DontKeep;
- for (j=0; j<no_occs; j++)
+ for (j=0; j<no_occs; j++) {
if (strcmp(test.text,tlbtab[j].text)==0)
goto DontKeep;
+ }
test.occurrences=0;
- for (j=i+3; all_text+j<all_text_top; j++)
+ test.intab=0;
+ for (j=i+3; j<opttextlen; j++)
{
#ifdef MAC_FACE
if (j%((**g_pm_hndl).linespercheck) == 0)
{ ProcessEvents (&g_proc);
if (g_proc != true)
- { free_arrays();
- if (store_the_text)
- my_free(&all_text,"transcription text");
+ { ao_free_arrays();
longjmp (g_fallback, 1);
}
}
#endif
- if ((all_text[i]==all_text[j])
- && (all_text[i+1]==all_text[j+1])
- && (all_text[i+2]==all_text[j+2]))
- { grandtable[t+test.occurrences]=j;
+ if ((opttext[i]==opttext[j])
+ && (opttext[i+1]==opttext[j+1])
+ && (opttext[i+2]==opttext[j+2]))
+ { grandtable[tcount+test.occurrences]=j;
test.occurrences++;
- if (t+test.occurrences==MAX_GTABLE)
+ if (tcount+test.occurrences==MAX_GTABLE)
{ printf("All %ld cross-references used\n",
(long int) MAX_GTABLE);
goto Built;
}
}
if (test.occurrences>=2)
- { tlbtab[no_occs]=test;
- tlbtab[no_occs].intab=t; t+=tlbtab[no_occs].occurrences;
+ {
+ ensure_memory_list_available(&tlbtab_memlist, no_occs+1);
+ tlbtab[no_occs]=test;
+ tlbtab[no_occs].intab=tcount;
+ tcount += tlbtab[no_occs].occurrences;
if (max<tlbtab[no_occs].occurrences)
max=tlbtab[no_occs].occurrences;
no_occs++;
- if (no_occs==MAX_TLBS)
- { printf("All %d three-letter-blocks used\n",
- MAX_TLBS);
- goto Built;
- }
}
DontKeep: ;
}
grandflags=my_calloc(sizeof(int), max, "grandflags");
- printf("Cross-reference table (%ld entries) built...\n",
- (long int) no_occs);
+ if (optabbrevs_trace_setting >= 1) {
+ printf("Cross-reference table (%ld entries) built...\n",
+ (long int) no_occs);
+ }
/* for (i=0; i<no_occs; i++)
printf("%4d %4d '%s' %d\n",i,tlbtab[i].intab,tlbtab[i].text,
tlbtab[i].occurrences);
*/
- for (i=0; i<64; i++) bestyet2[i].length=0; selected=2;
- available=256;
- while ((available>0)&&(selected<64))
- { printf("Pass %d\n", ++pass_no);
-
+ for (i=0; i<MAX_ABBREVS; i++) bestyet2[i].length=0;
+ available=MAX_BESTYET;
+ while ((available>0)&&(selected<MAX_ABBREVS))
+ {
+ pass_no++;
+ if (optabbrevs_trace_setting >= 1) {
+ printf("Pass %d\n", pass_no);
+ }
+
optimise_pass();
available=0;
- for (i=0; i<256; i++)
+ for (i=0; i<MAX_BESTYET; i++)
if (bestyet[i].score!=0)
{ available++;
nl=bestyet[i].length;
+ if (nl+1 > bestyet[i].textsize) {
+ int32 oldsize = bestyet[i].textsize;
+ bestyet[i].textsize = (nl+1)*2;
+ my_realloc(&bestyet[i].text, oldsize, bestyet[i].textsize, "bestyet.text");
+ }
for (j2=0; j2<nl; j2++) bestyet[i].text[j2]=
- all_text[bestyet[i].location+j2];
+ opttext[bestyet[i].location+j2];
bestyet[i].text[nl]=0;
}
/* printf("End of pass results:\n");
printf("\nno score freq string\n");
- for (i=0; i<256; i++)
+ for (i=0; i<MAX_BESTYET; i++)
if (bestyet[i].score>0)
printf("%02d: %4d %4d '%s'\n", i, bestyet[i].score,
bestyet[i].popularity, bestyet[i].text);
do
{ max=0;
- for (i=0; i<256; i++)
+ for (i=0; i<MAX_BESTYET; i++)
if (max<bestyet[i].score)
{ max=bestyet[i].score;
maxat=i;
}
if (max>0)
- { bestyet2[selected++]=bestyet[maxat];
-
- printf(
- "Selection %2ld: '%s' (repeated %ld times, scoring %ld)\n",
- (long int) selected,bestyet[maxat].text,
- (long int) bestyet[maxat].popularity,
- (long int) bestyet[maxat].score);
+ {
+ char testtext[4];
+ optab_copy(&bestyet2[selected++], &bestyet[maxat]);
+
+ if (optabbrevs_trace_setting >= 1) {
+ printf(
+ "Selection %2ld: '%s' (repeated %ld times, scoring %ld)\n",
+ (long int) selected,bestyet[maxat].text,
+ (long int) bestyet[maxat].popularity,
+ (long int) bestyet[maxat].score);
+ }
- test.text[0]=bestyet[maxat].text[0];
- test.text[1]=bestyet[maxat].text[1];
- test.text[2]=bestyet[maxat].text[2];
- test.text[3]=0;
+ testtext[0]=bestyet[maxat].text[0];
+ testtext[1]=bestyet[maxat].text[1];
+ testtext[2]=bestyet[maxat].text[2];
+ testtext[3]=0;
for (i=0; i<no_occs; i++)
- if (strcmp(test.text,tlbtab[i].text)==0)
+ if (strcmp(testtext,tlbtab[i].text)==0)
break;
for (j=0; j<tlbtab[i].occurrences; j++)
{ if (memcmp(bestyet[maxat].text,
- all_text+grandtable[tlbtab[i].intab+j],
+ opttext+grandtable[tlbtab[i].intab+j],
bestyet[maxat].length)==0)
{ for (j2=0; j2<bestyet[maxat].length; j2++)
- all_text[grandtable[tlbtab[i].intab+j]+j2]='\n';
+ opttext[grandtable[tlbtab[i].intab+j]+j2]='\n';
}
}
- for (i=0; i<256; i++)
+ for (i=0; i<MAX_BESTYET; i++)
if ((bestyet[i].score>0)&&
(any_overlap(bestyet[maxat].text,bestyet[i].text)==1))
{ bestyet[i].score=0;
bestyet[i].text); */
}
}
- } while ((max>0)&&(available>0)&&(selected<64));
+ } while ((max>0)&&(available>0)&&(selected<MAX_ABBREVS));
}
printf("\nChosen abbreviations (in Inform syntax):\n\n");
/* <Z-coded text> <flags> <verbnumber> <adjectivenumber> */
/* 4 or 6 bytes byte byte byte */
/* */
-/* For Glulx, the form is instead: (But see below about Unicode-valued */
-/* dictionaries and my heinie.) */
+/* For Glulx, the form is instead: (See below about Unicode-valued */
+/* dictionaries and DICT_WORD_BYTES.) */
/* */
-/* <plain text> <flags> <verbnumber> <adjectivenumber> */
-/* DICT_WORD_SIZE short short short */
+/* <tag> <plain text> <flags> <verbnumber> <adjectivenumber> */
+/* $60 DICT_WORD_BYTES short short short */
/* */
/* These records are stored in "accession order" (i.e. in order of their */
/* first being received by these routines) and only alphabetically sorted */
/* fields. (The high bytes are $DICT_WORD_SIZE+1/3/5.) */
/* ------------------------------------------------------------------------- */
-uchar *dictionary, /* (These two pointers are externally
+uchar *dictionary; /* (These two variables are externally
used only in "tables.c" when
building the story-file) */
- *dictionary_top; /* Pointer to next free record */
+static memory_list dictionary_memlist;
+int32 dictionary_top; /* Position of the next free record
+ in dictionary (i.e., the current
+ number of bytes) */
int dict_entries; /* Total number of records entered */
/* ------------------------------------------------------------------------- */
-/* dict_word is a typedef for a struct of 6 unsigned chars (defined in */
-/* "header.h"): it holds the (4 or) 6 bytes of Z-coded text of a word. */
+/* dict_word was originally a typedef for a struct of 6 unsigned chars. */
+/* It held the (4 or) 6 bytes of Z-coded text of a word. */
/* Usefully, because the PAD character 5 is < all alphabetic characters, */
/* alphabetic order corresponds to numeric order. For this reason, the */
/* dict_word is called the "sort code" of the original text word. */
/* */
-/* ###- In modifying the compiler, I've found it easier to discard the */
+/* In modifying the compiler for Glulx, I found it easier to discard the */
/* typedef, and operate directly on uchar arrays of length DICT_WORD_SIZE. */
/* In Z-code, DICT_WORD_SIZE will be 6, so the Z-code compiler will work */
-/* as before. In Glulx, it can be any value up to MAX_DICT_WORD_SIZE. */
-/* (That limit is defined as 40 in the header; it exists only for a few */
-/* static buffers, and can be increased without using significant memory.) */
+/* as before. In Glulx, it can be any value. */
/* */
-/* ###- Well, that certainly bit me on the butt, didn't it. In further */
-/* modifying the compiler to generate a Unicode dictionary, I have to */
-/* store four-byte values in the uchar array. This is handled by making */
-/* the array size DICT_WORD_BYTES (which is DICT_WORD_SIZE*DICT_CHAR_SIZE).*/
+/* In further modifying the compiler to generate a Unicode dictionary, */
+/* I have to store four-byte values in the uchar array. We make the array */
+/* size DICT_WORD_BYTES (which is DICT_WORD_SIZE*DICT_CHAR_SIZE). */
/* Then we store the 32-bit character value big-endian. This lets us */
/* continue to compare arrays bytewise, which is a nice simplification. */
/* ------------------------------------------------------------------------- */
d1[i] = d2[i];
}
-static uchar prepared_sort[MAX_DICT_WORD_BYTES]; /* Holds the sort code
- of current word */
+static memory_list prepared_sort_memlist;
+static uchar *prepared_sort; /* Holds the sort code of current word */
-static int number_and_case;
+static int prepared_dictflags_pos; /* Dict flags set by the current word */
+static int prepared_dictflags_neg; /* Dict flags *not* set by the word */
/* Also used by verbs.c */
static void dictionary_prepare_z(char *dword, uchar *optresult)
-{ int i, j, k, k2, wd[13]; int32 tot;
+{ int i, j, k, k2, wd[13];
+ int32 tot;
+ int negflag;
/* A rapid text translation algorithm using only the simplified rules
applying to the text of dictionary entries: first produce a sequence
int dictsize = (version_number==3) ? 6 : 9;
- number_and_case = 0;
+ prepared_dictflags_pos = 0;
+ prepared_dictflags_neg = 0;
- for (i=0, j=0; dword[j]!=0; i++, j++)
- { if ((dword[j] == '/') && (dword[j+1] == '/'))
- { for (j+=2; dword[j] != 0; j++)
- { switch(dword[j])
- { case 'p': number_and_case |= 4; break;
+ for (i=0, j=0; dword[j]!=0; j++)
+ {
+ if ((dword[j] == '/') && (dword[j+1] == '/'))
+ {
+ /* The rest of the word is dict flags. Run through them. */
+ negflag = FALSE;
+ for (j+=2; dword[j] != 0; j++)
+ {
+ switch(dword[j])
+ {
+ case '~':
+ if (!dword[j+1])
+ error_named("'//~' with no flag character (pn) in dict word", dword);
+ negflag = !negflag;
+ break;
+ case 'p':
+ if (!negflag)
+ prepared_dictflags_pos |= 4;
+ else
+ prepared_dictflags_neg |= 4;
+ negflag = FALSE;
+ break;
+ case 'n':
+ if (!negflag)
+ prepared_dictflags_pos |= 128;
+ else
+ prepared_dictflags_neg |= 128;
+ negflag = FALSE;
+ break;
default:
- error_named("Expected 'p' after '//' \
-to give number of dictionary word", dword);
+ error_named("Expected flag character (pn~) after '//' in dict word", dword);
break;
}
}
break;
}
- if (i>=dictsize) break;
+
+ /* LONG_DICT_FLAG_BUG emulates the old behavior where we stop looping
+ at dictsize. */
+ if (LONG_DICT_FLAG_BUG && i>=dictsize)
+ break;
k=(int) dword[j];
if (k==(int) '\'')
char_error("Character can be printed but not input:", k);
else
{ /* Use 4 more Z-chars to encode a ZSCII escape sequence */
-
- wd[i++] = 5; wd[i++] = 6;
+ if (i<dictsize)
+ wd[i++] = 5;
+ if (i<dictsize)
+ wd[i++] = 6;
k2 = -k2;
- wd[i++] = k2/32; wd[i] = k2%32;
+ if (i<dictsize)
+ wd[i++] = k2/32;
+ if (i<dictsize)
+ wd[i++] = k2%32;
}
}
else
{ alphabet_used[k2] = 'Y';
- if ((k2/26)!=0)
+ if ((k2/26)!=0 && i<dictsize)
wd[i++]=3+(k2/26); /* Change alphabet for symbols */
- wd[i]=6+(k2%26); /* Write the Z character */
+ if (i<dictsize)
+ wd[i++]=6+(k2%26); /* Write the Z character */
}
}
- /* Fill up to the end of the dictionary block with PAD characters */
+ if (i > dictsize)
+ compiler_error("dict word buffer overflow");
- for (; i<9; i++) wd[i]=5;
+ /* Fill up to the end of the dictionary block with PAD characters
+ (for safety, we right-pad to 9 chars even in V3) */
- /* The array of Z-chars is converted to three 2-byte blocks */
+ for (; i<9; i++) wd[i]=5;
+ /* The array of Z-chars is converted to two or three 2-byte blocks */
+ ensure_memory_list_available(&prepared_sort_memlist, DICT_WORD_BYTES);
+
tot = wd[2] + wd[1]*(1<<5) + wd[0]*(1<<10);
prepared_sort[1]=tot%0x100;
prepared_sort[0]=(tot/0x100)%0x100;
tot = wd[5] + wd[4]*(1<<5) + wd[3]*(1<<10);
prepared_sort[3]=tot%0x100;
prepared_sort[2]=(tot/0x100)%0x100;
- tot = wd[8] + wd[7]*(1<<5) + wd[6]*(1<<10);
+ if (version_number==3)
+ tot = 0;
+ else
+ tot = wd[8] + wd[7]*(1<<5) + wd[6]*(1<<10);
prepared_sort[5]=tot%0x100;
prepared_sort[4]=(tot/0x100)%0x100;
{
int i, j, k;
int32 unicode;
+ int negflag;
- number_and_case = 0;
+ prepared_dictflags_pos = 0;
+ prepared_dictflags_neg = 0;
- for (i=0, j=0; (dword[j]!=0); i++, j++) {
+ for (i=0, j=0; (dword[j]!=0); j++) {
if ((dword[j] == '/') && (dword[j+1] == '/')) {
+ /* The rest of the word is dict flags. Run through them. */
+ negflag = FALSE;
for (j+=2; dword[j] != 0; j++) {
switch(dword[j]) {
+ case '~':
+ if (!dword[j+1])
+ error_named("'//~' with no flag character (pn) in dict word", dword);
+ negflag = !negflag;
+ break;
case 'p':
- number_and_case |= 4;
- break;
+ if (!negflag)
+ prepared_dictflags_pos |= 4;
+ else
+ prepared_dictflags_neg |= 4;
+ negflag = FALSE;
+ break;
+ case 'n':
+ if (!negflag)
+ prepared_dictflags_pos |= 128;
+ else
+ prepared_dictflags_neg |= 128;
+ negflag = FALSE;
+ break;
default:
- error_named("Expected 'p' after '//' \
-to give gender or number of dictionary word", dword);
+ error_named("Expected flag character (pn~) after '//' in dict word", dword);
break;
}
}
break;
}
- if (i>=DICT_WORD_SIZE) break;
+
+ /* LONG_DICT_FLAG_BUG emulates the old behavior where we stop looping
+ at DICT_WORD_SIZE. */
+ if (LONG_DICT_FLAG_BUG && i>=DICT_WORD_SIZE)
+ break;
k= ((unsigned char *)dword)[j];
if (k=='\'')
if (k >= (unsigned)'A' && k <= (unsigned)'Z')
k += ('a' - 'A');
+ ensure_memory_list_available(&prepared_sort_memlist, DICT_WORD_BYTES);
+
if (DICT_CHAR_SIZE == 1) {
- prepared_sort[i] = k;
+ if (i<DICT_WORD_SIZE)
+ prepared_sort[i++] = k;
}
else {
- prepared_sort[4*i] = (k >> 24) & 0xFF;
- prepared_sort[4*i+1] = (k >> 16) & 0xFF;
- prepared_sort[4*i+2] = (k >> 8) & 0xFF;
- prepared_sort[4*i+3] = (k) & 0xFF;
+ if (i<DICT_WORD_SIZE) {
+ prepared_sort[4*i] = (k >> 24) & 0xFF;
+ prepared_sort[4*i+1] = (k >> 16) & 0xFF;
+ prepared_sort[4*i+2] = (k >> 8) & 0xFF;
+ prepared_sort[4*i+3] = (k) & 0xFF;
+ i++;
+ }
}
}
+ if (i > DICT_WORD_SIZE)
+ compiler_error("dict word buffer overflow");
+
+ /* Right-pad with zeroes */
if (DICT_CHAR_SIZE == 1) {
for (; i<DICT_WORD_SIZE; i++)
prepared_sort[i] = 0;
char colour; /* The colour of the branch to the parent */
} dict_tree_node;
-static dict_tree_node *dtree;
+static dict_tree_node *dtree; /* Allocated to dict_entries */
+static memory_list dtree_memlist;
+
+static uchar *dict_sort_codes; /* Allocated to dict_entries*DICT_WORD_BYTES */
+static memory_list dict_sort_codes_memlist;
-int *final_dict_order;
-static uchar *dict_sort_codes;
+int *final_dict_order; /* Allocated at sort_dictionary() time */
static void dictionary_begin_pass(void)
{
/* Glulx has a 4-byte header instead. */
if (!glulx_mode)
- dictionary_top=dictionary+7;
+ dictionary_top = 7;
else
- dictionary_top=dictionary+4;
+ dictionary_top = 4;
+ ensure_memory_list_available(&dictionary_memlist, dictionary_top);
+
root = VACANT;
dict_entries = 0;
}
}
extern void sort_dictionary(void)
-{ int i;
- if (module_switch)
- { for (i=0; i<dict_entries; i++)
- final_dict_order[i] = i;
- return;
- }
-
+{
+ final_dict_order = my_calloc(sizeof(int), dict_entries, "final dictionary ordering table");
+
if (root != VACANT)
{ fdo_count = 0; recursively_sort(root);
}
}
/* ------------------------------------------------------------------------- */
-/* Add "dword" to the dictionary with (x,y,z) as its data fields; unless */
-/* it already exists, in which case OR the data with (x,y,z) */
+/* Add "dword" to the dictionary with (flag1,flag2,flag3) as its data */
+/* fields; unless it already exists, in which case OR the data fields with */
+/* those flags. */
/* */
/* These fields are one byte each in Z-code, two bytes each in Glulx. */
/* */
/* Returns: the accession number. */
/* ------------------------------------------------------------------------- */
-extern int dictionary_add(char *dword, int x, int y, int z)
+extern int dictionary_add(char *dword, int flag1, int flag2, int flag3)
{ int n; uchar *p;
int ggfr = 0, gfr = 0, fr = 0, r = 0;
int ggf = VACANT, gf = VACANT, f = VACANT, at = root;
int a, b;
int res=((version_number==3)?4:6);
+ /* Fill in prepared_sort and prepared_dictflags. */
dictionary_prepare(dword, NULL);
+ /* Adjust flag1 according to prepared_dictflags. */
+ flag1 &= (~prepared_dictflags_neg);
+ flag1 |= prepared_dictflags_pos;
+
if (root == VACANT)
{ root = 0; goto CreateEntry;
}
if (n==0)
{
if (!glulx_mode) {
- p = dictionary+7 + at*(3+res) + res;
- p[0]=(p[0])|x; p[1]=(p[1])|y; p[2]=(p[2])|z;
- if (x & 128) p[0] = (p[0])|number_and_case;
+ p = dictionary+7 + at*DICT_ENTRY_BYTE_LENGTH + res;
+ p[0] |= flag1; p[1] |= flag2;
+ if (!ZCODE_LESS_DICT_DATA)
+ p[2] |= flag3;
}
else {
p = dictionary+4 + at*DICT_ENTRY_BYTE_LENGTH + DICT_ENTRY_FLAG_POS;
- p[0]=(p[0])|(x/256); p[1]=(p[1])|(x%256);
- p[2]=(p[2])|(y/256); p[3]=(p[3])|(y%256);
- p[4]=(p[4])|(z/256); p[5]=(p[5])|(z%256);
- if (x & 128) p[1] = (p[1]) | number_and_case;
+ p[0] |= (flag1/256); p[1] |= (flag1%256);
+ p[2] |= (flag2/256); p[3] |= (flag2%256);
+ p[4] |= (flag3/256); p[5] |= (flag3%256);
}
return at;
}
CreateEntry:
- if (dict_entries==MAX_DICT_ENTRIES)
- memoryerror("MAX_DICT_ENTRIES",MAX_DICT_ENTRIES);
+ ensure_memory_list_available(&dtree_memlist, dict_entries+1);
+ ensure_memory_list_available(&dict_sort_codes_memlist, (dict_entries+1)*DICT_WORD_BYTES);
dtree[dict_entries].branch[0] = VACANT;
dtree[dict_entries].branch[1] = VACANT;
if (!glulx_mode) {
- p = dictionary + (3+res)*dict_entries + 7;
+ ensure_memory_list_available(&dictionary_memlist, dictionary_top + DICT_ENTRY_BYTE_LENGTH);
+ p = dictionary + DICT_ENTRY_BYTE_LENGTH*dict_entries + 7;
/* So copy in the 4 (or 6) bytes of Z-coded text and the 3 data
bytes */
p[2]=prepared_sort[2]; p[3]=prepared_sort[3];
if (version_number > 3)
{ p[4]=prepared_sort[4]; p[5]=prepared_sort[5]; }
- p[res]=x; p[res+1]=y; p[res+2]=z;
- if (x & 128) p[res] = (p[res])|number_and_case;
+ p[res]=flag1; p[res+1]=flag2;
+ if (!ZCODE_LESS_DICT_DATA) p[res+2]=flag3;
- dictionary_top += res+3;
+ dictionary_top += DICT_ENTRY_BYTE_LENGTH;
}
else {
int i;
+ ensure_memory_list_available(&dictionary_memlist, dictionary_top + DICT_ENTRY_BYTE_LENGTH);
p = dictionary + 4 + DICT_ENTRY_BYTE_LENGTH*dict_entries;
p[0] = 0x60; /* type byte -- dict word */
p[i] = prepared_sort[i];
p += DICT_WORD_BYTES;
- p[0] = 0; p[1] = x;
- p[2] = y/256; p[3] = y%256;
- p[4] = 0; p[5] = z;
- if (x & 128)
- p[1] |= number_and_case;
+ p[0] = (flag1/256); p[1] = (flag1%256);
+ p[2] = (flag2/256); p[3] = (flag2%256);
+ p[4] = (flag3/256); p[5] = (flag3%256);
dictionary_top += DICT_ENTRY_BYTE_LENGTH;
if (i!=0)
{
if (!glulx_mode) {
- p=dictionary+7+(i-1)*(3+res)+res;
+ p=dictionary+7+(i-1)*DICT_ENTRY_BYTE_LENGTH+res;
p[1]=to;
}
else {
}
/* ------------------------------------------------------------------------- */
-/* Tracing code for the dictionary: used not only by "trace" and text */
-/* transcription, but also (in the case of "word_to_ascii") in a vital */
-/* by the linker. */
+/* Tracing code for the dictionary: used by "trace" and text */
+/* transcription. */
/* ------------------------------------------------------------------------- */
-static char *d_show_to;
-static int d_show_total;
+/* In the dictionary-showing code, if d_show_buf is NULL, the text is
+ printed directly. (The "Trace dictionary" directive does this.)
+ If d_show_buf is not NULL, we add words to it (reallocing if necessary)
+ until it's a page-width.
+*/
+static char *d_show_buf = NULL;
+static int d_show_size; /* allocated size */
+static int d_show_len; /* current length */
static void show_char(char c)
-{ if (d_show_to == NULL) printf("%c", c);
- else
- { int i = strlen(d_show_to);
- d_show_to[i] = c; d_show_to[i+1] = 0;
+{
+ if (d_show_buf == NULL) {
+ printf("%c", c);
+ }
+ else {
+ if (d_show_len+2 >= d_show_size) {
+ int newsize = 2 * d_show_len + 16;
+ my_realloc(&d_show_buf, d_show_size, newsize, "dictionary display buffer");
+ d_show_size = newsize;
+ }
+ d_show_buf[d_show_len++] = c;
+ d_show_buf[d_show_len] = '\0';
+ }
+}
+
+/* Display a Unicode character in user-readable form. This uses the same
+ character encoding as the source code. */
+static void show_uchar(uint32 c)
+{
+ char buf[16];
+ int ix;
+
+ if (c < 0x80) {
+ /* ASCII always works */
+ show_char(c);
+ return;
+ }
+ if (character_set_unicode) {
+ /* UTF-8 the character */
+ if (c < 0x80) {
+ show_char(c);
+ }
+ else if (c < 0x800) {
+ show_char((0xC0 | ((c & 0x7C0) >> 6)));
+ show_char((0x80 | (c & 0x03F) ));
+ }
+ else if (c < 0x10000) {
+ show_char((0xE0 | ((c & 0xF000) >> 12)));
+ show_char((0x80 | ((c & 0x0FC0) >> 6)));
+ show_char((0x80 | (c & 0x003F) ));
+ }
+ else if (c < 0x200000) {
+ show_char((0xF0 | ((c & 0x1C0000) >> 18)));
+ show_char((0x80 | ((c & 0x03F000) >> 12)));
+ show_char((0x80 | ((c & 0x000FC0) >> 6)));
+ show_char((0x80 | (c & 0x00003F) ));
+ }
+ else {
+ show_char('?');
+ }
+ return;
}
+ if (character_set_setting == 1 && c < 0x100) {
+ /* Fits in Latin-1 */
+ show_char(c);
+ return;
+ }
+ /* Supporting other character_set_setting is harder; not currently implemented. */
+
+ /* Use the escaped form */
+ sprintf(buf, "@{%x}", c);
+ for (ix=0; buf[ix]; ix++)
+ show_char(buf[ix]);
}
extern void word_to_ascii(uchar *p, char *results)
encoded_word[7] = 8*(((int) p[4])&0x3) + (((int) p[5])&0xe0)/32;
encoded_word[8] = ((int) p[5])&0x1f;
}
+ else
+ {
+ encoded_word[6] = encoded_word[7] = encoded_word[8] = 0;
+ }
shift = 0; cc = 0;
for (i=0; i< ((version_number==3)?6:9); i++)
results[cc] = 0;
}
-static void recursively_show_z(int node)
+/* Print a dictionary word to stdout.
+ (This assumes that d_show_buf is null.)
+ */
+void print_dict_word(int node)
+{
+ uchar *p;
+ int cprinted;
+
+ if (!glulx_mode) {
+ char textual_form[32];
+ p = (uchar *)dictionary + 7 + DICT_ENTRY_BYTE_LENGTH*node;
+
+ word_to_ascii(p, textual_form);
+
+ for (cprinted = 0; textual_form[cprinted]!=0; cprinted++)
+ show_char(textual_form[cprinted]);
+ }
+ else {
+ p = (uchar *)dictionary + 4 + DICT_ENTRY_BYTE_LENGTH*node;
+
+ for (cprinted = 0; cprinted<DICT_WORD_SIZE; cprinted++)
+ {
+ uint32 ch;
+ if (DICT_CHAR_SIZE == 1)
+ ch = p[1+cprinted];
+ else
+ ch = (p[4*cprinted+4] << 24) + (p[4*cprinted+5] << 16) + (p[4*cprinted+6] << 8) + (p[4*cprinted+7]);
+ if (!ch)
+ break;
+ show_uchar(ch);
+ }
+ }
+}
+
+static void recursively_show_z(int node, int level)
{ int i, cprinted, flags; uchar *p;
char textual_form[32];
- int res = (version_number == 3)?4:6;
+ int res = (version_number == 3)?4:6; /* byte length of encoded text */
if (dtree[node].branch[0] != VACANT)
- recursively_show_z(dtree[node].branch[0]);
+ recursively_show_z(dtree[node].branch[0], level);
- p = (uchar *)dictionary + 7 + (3+res)*node;
+ p = (uchar *)dictionary + 7 + DICT_ENTRY_BYTE_LENGTH*node;
word_to_ascii(p, textual_form);
for (; cprinted < 4 + ((version_number==3)?6:9); cprinted++)
show_char(' ');
- if (d_show_to == NULL)
- { for (i=0; i<3+res; i++) printf("%02x ",p[i]);
+ /* The level-1 info can only be printfed (d_show_buf must be null). */
+ if (d_show_buf == NULL && level >= 1)
+ {
+ if (level >= 2) {
+ for (i=0; i<DICT_ENTRY_BYTE_LENGTH; i++) printf("%02x ",p[i]);
+ }
flags = (int) p[res];
if (flags & 128)
- { printf("noun ");
- if (flags & 4) printf("p"); else printf(" ");
- printf(" ");
- }
- else printf(" ");
+ printf("noun ");
+ else
+ printf(" ");
+ if (flags & 4)
+ printf("p ");
+ else
+ printf(" ");
if (flags & 8)
{ if (grammar_version_number == 1)
printf("preposition:%d ", (int) p[res+2]);
printf("\n");
}
- if (d_show_total++ == 5)
- { d_show_total = 0;
- if (d_show_to != NULL)
- { write_to_transcript_file(d_show_to);
- d_show_to[0] = 0;
- }
+ /* Show five words per line in classic TRANSCRIPT_FORMAT; one per line in the new format. */
+ if (d_show_buf && (d_show_len >= 64 || TRANSCRIPT_FORMAT == 1))
+ {
+ write_to_transcript_file(d_show_buf, STRCTX_DICT);
+ d_show_len = 0;
}
if (dtree[node].branch[1] != VACANT)
- recursively_show_z(dtree[node].branch[1]);
+ recursively_show_z(dtree[node].branch[1], level);
}
-static void recursively_show_g(int node)
-{
- warning("### Glulx dictionary-show not yet implemented.\n");
+static void recursively_show_g(int node, int level)
+{ int i, cprinted;
+ uchar *p;
+
+ if (dtree[node].branch[0] != VACANT)
+ recursively_show_g(dtree[node].branch[0], level);
+
+ p = (uchar *)dictionary + 4 + DICT_ENTRY_BYTE_LENGTH*node;
+
+ for (cprinted = 0; cprinted<DICT_WORD_SIZE; cprinted++)
+ {
+ uint32 ch;
+ if (DICT_CHAR_SIZE == 1)
+ ch = p[1+cprinted];
+ else
+ ch = (p[4*cprinted+4] << 24) + (p[4*cprinted+5] << 16) + (p[4*cprinted+6] << 8) + (p[4*cprinted+7]);
+ if (!ch)
+ break;
+ show_uchar(ch);
+ }
+ for (; cprinted<DICT_WORD_SIZE+4; cprinted++)
+ show_char(' ');
+
+ /* The level-1 info can only be printfed (d_show_buf must be null). */
+ if (d_show_buf == NULL && level >= 1)
+ { int flagpos = (DICT_CHAR_SIZE == 1) ? (DICT_WORD_SIZE+1) : (DICT_WORD_BYTES+4);
+ int flags = (p[flagpos+0] << 8) | (p[flagpos+1]);
+ int verbnum = (p[flagpos+2] << 8) | (p[flagpos+3]);
+ if (level >= 2) {
+ for (i=0; i<DICT_ENTRY_BYTE_LENGTH; i++) printf("%02x ",p[i]);
+ }
+ if (flags & 128)
+ printf("noun ");
+ else
+ printf(" ");
+ if (flags & 4)
+ printf("p ");
+ else
+ printf(" ");
+ if (flags & 8)
+ { printf("preposition ");
+ }
+ if ((flags & 3) == 3) printf("metaverb:%d ", verbnum);
+ else if ((flags & 3) == 1) printf("verb:%d ", verbnum);
+ printf("\n");
+ }
+
+ /* Show five words per line in classic TRANSCRIPT_FORMAT; one per line in the new format. */
+ if (d_show_buf && (d_show_len >= 64 || TRANSCRIPT_FORMAT == 1))
+ {
+ write_to_transcript_file(d_show_buf, STRCTX_DICT);
+ d_show_len = 0;
+ }
+
+ if (dtree[node].branch[1] != VACANT)
+ recursively_show_g(dtree[node].branch[1], level);
}
static void show_alphabet(int i)
printf("\n");
}
-extern void show_dictionary(void)
-{ printf("Dictionary contains %d entries:\n",dict_entries);
+extern void show_dictionary(int level)
+{
+ /* Level 0: show words only. Level 1: show words and flags.
+ Level 2: also show bytes.*/
+ printf("Dictionary contains %d entries:\n",dict_entries);
if (dict_entries != 0)
- { d_show_total = 0; d_show_to = NULL;
+ { d_show_len = 0; d_show_buf = NULL;
if (!glulx_mode)
- recursively_show_z(root);
+ recursively_show_z(root, level);
else
- recursively_show_g(root);
+ recursively_show_g(root, level);
+ }
+ if (!glulx_mode)
+ {
+ printf("\nZ-machine alphabet entries:\n");
+ show_alphabet(0);
+ show_alphabet(1);
+ show_alphabet(2);
}
- printf("\nZ-machine alphabet entries:\n");
- show_alphabet(0);
- show_alphabet(1);
- show_alphabet(2);
}
extern void write_dictionary_to_transcript(void)
-{ char d_buffer[81];
-
- sprintf(d_buffer, "\n[Dictionary contains %d entries:]\n", dict_entries);
+{
+ d_show_size = 80; /* initial size */
+ d_show_buf = my_malloc(d_show_size, "dictionary display buffer");
- d_buffer[0] = 0; write_to_transcript_file(d_buffer);
+ write_to_transcript_file("", STRCTX_INFO);
+ sprintf(d_show_buf, "[Dictionary contains %d entries:]", dict_entries);
+ write_to_transcript_file(d_show_buf, STRCTX_INFO);
+
+ d_show_len = 0;
if (dict_entries != 0)
- { d_show_total = 0; d_show_to = d_buffer;
+ {
if (!glulx_mode)
- recursively_show_z(root);
+ recursively_show_z(root, 0);
else
- recursively_show_g(root);
+ recursively_show_g(root, 0);
}
- if (d_show_total != 0) write_to_transcript_file(d_buffer);
+ if (d_show_len != 0) write_to_transcript_file(d_show_buf, STRCTX_DICT);
+
+ my_free(&d_show_buf, "dictionary display buffer");
+ d_show_len = 0; d_show_buf = NULL;
}
/* ========================================================================= */
extern void init_text_vars(void)
{ int j;
+
+ opttext = NULL;
+ opttextlen = 0;
bestyet = NULL;
bestyet2 = NULL;
tlbtab = NULL;
grandtable = NULL;
grandflags = NULL;
- no_chars_transcribed = 0;
- is_abbreviation = FALSE;
- put_strings_in_low_memory = FALSE;
+
+ translated_text = NULL;
+ temp_symbol = NULL;
+ all_text = NULL;
for (j=0; j<256; j++) abbrevs_lookup[j] = -1;
total_zchars_trans = 0;
+ dictionary = NULL;
+ dictionary_top = 0;
dtree = NULL;
final_dict_order = NULL;
dict_sort_codes = NULL;
+ prepared_sort = NULL;
dict_entries=0;
- initialise_memory_block(&static_strings_area);
+ static_strings_area = NULL;
+ abbreviations_optimal_parse_schedule = NULL;
+ abbreviations_optimal_parse_scores = NULL;
+
+ compressed_offsets = NULL;
+ huff_entities = NULL;
+ hufflist = NULL;
+ unicode_usage_entries = NULL;
}
extern void text_begin_pass(void)
{ abbrevs_lookup_table_made = FALSE;
no_abbreviations=0;
+ abbreviations_totaltext=0;
total_chars_trans=0; total_bytes_trans=0;
- if (store_the_text) all_text_top=all_text;
+ all_text_top=0;
dictionary_begin_pass();
- low_strings_top = low_strings;
+ low_strings_top = 0;
static_strings_extent = 0;
no_strings = 0;
/* Note: for allocation and deallocation of all_the_text, see inform.c */
extern void text_allocate_arrays(void)
-{ abbreviations_at = my_malloc(MAX_ABBREVS*MAX_ABBREV_LENGTH,
- "abbreviations");
- abbrev_values = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev values");
- abbrev_quality = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev quality");
- abbrev_freqs = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev freqs");
+{
+ int ix;
- dtree = my_calloc(sizeof(dict_tree_node), MAX_DICT_ENTRIES,
- "red-black tree for dictionary");
- final_dict_order = my_calloc(sizeof(int), MAX_DICT_ENTRIES,
- "final dictionary ordering table");
- dict_sort_codes = my_calloc(DICT_WORD_BYTES, MAX_DICT_ENTRIES,
- "dictionary sort codes");
+ initialise_memory_list(&translated_text_memlist,
+ sizeof(uchar), 8000, (void**)&translated_text,
+ "translated text holding area");
+
+ initialise_memory_list(&temp_symbol_memlist,
+ sizeof(char), 32, (void**)&temp_symbol,
+ "temporary symbol name");
+
+ initialise_memory_list(&all_text_memlist,
+ sizeof(char), 0, (void**)&all_text,
+ "transcription text for optimise");
+
+ initialise_memory_list(&static_strings_area_memlist,
+ sizeof(uchar), 128, (void**)&static_strings_area,
+ "static strings area");
+
+ initialise_memory_list(&abbreviations_text_memlist,
+ sizeof(char), 64, (void**)&abbreviations_text,
+ "abbreviation text");
- if (!glulx_mode)
- dictionary = my_malloc(9*MAX_DICT_ENTRIES+7,
- "dictionary");
- else
- dictionary = my_malloc(DICT_ENTRY_BYTE_LENGTH*MAX_DICT_ENTRIES+4,
- "dictionary");
+ initialise_memory_list(&abbreviations_memlist,
+ sizeof(abbreviation), 64, (void**)&abbreviations,
+ "abbreviations");
- strings_holding_area
- = my_malloc(MAX_STATIC_STRINGS,"static strings holding area");
- low_strings = my_malloc(MAX_LOW_STRINGS,"low (abbreviation) strings");
+ initialise_memory_list(&abbreviations_optimal_parse_schedule_memlist,
+ sizeof(int), 0, (void**)&abbreviations_optimal_parse_schedule,
+ "abbreviations optimal parse schedule");
+ initialise_memory_list(&abbreviations_optimal_parse_scores_memlist,
+ sizeof(int), 0, (void**)&abbreviations_optimal_parse_scores,
+ "abbreviations optimal parse scores");
+
+ initialise_memory_list(&dtree_memlist,
+ sizeof(dict_tree_node), 1500, (void**)&dtree,
+ "red-black tree for dictionary");
+ initialise_memory_list(&dict_sort_codes_memlist,
+ sizeof(uchar), 1500*DICT_WORD_BYTES, (void**)&dict_sort_codes,
+ "dictionary sort codes");
+ initialise_memory_list(&prepared_sort_memlist,
+ sizeof(uchar), DICT_WORD_BYTES, (void**)&prepared_sort,
+ "prepared sort buffer");
+
+ final_dict_order = NULL; /* will be allocated at sort_dictionary() time */
+
+ /* The exact size will be 7+7*num for z3, 7+9*num for z4+,
+ 4+DICT_ENTRY_BYTE_LENGTH*num for Glulx. But this is just an initial
+ allocation; we don't have to be precise. */
+ initialise_memory_list(&dictionary_memlist,
+ sizeof(uchar), 1000*DICT_ENTRY_BYTE_LENGTH, (void**)&dictionary,
+ "dictionary");
+
+ initialise_memory_list(&low_strings_memlist,
+ sizeof(uchar), 1024, (void**)&low_strings,
+ "low (abbreviation) strings");
+
+ d_show_buf = NULL;
+ d_show_size = 0;
+ d_show_len = 0;
huff_entities = NULL;
hufflist = NULL;
compression_table_size = 0;
compressed_offsets = NULL;
- MAX_CHARACTER_SET = 0;
+ initialise_memory_list(&unicode_usage_entries_memlist,
+ sizeof(unicode_usage_t), 0, (void**)&unicode_usage_entries,
+ "unicode entity entries");
- if (glulx_mode) {
- if (compression_switch) {
- int ix;
- MAX_CHARACTER_SET = 257 + MAX_ABBREVS + MAX_DYNAMIC_STRINGS
- + MAX_UNICODE_CHARS;
- huff_entities = my_calloc(sizeof(huffentity_t), MAX_CHARACTER_SET*2+1,
- "huffman entities");
- hufflist = my_calloc(sizeof(huffentity_t *), MAX_CHARACTER_SET,
- "huffman node list");
- unicode_usage_entries = my_calloc(sizeof(unicode_usage_t),
- MAX_UNICODE_CHARS, "unicode entity entries");
- for (ix=0; ix<UNICODE_HASH_BUCKETS; ix++)
- unicode_usage_hash[ix] = NULL;
- }
- compressed_offsets = my_calloc(sizeof(int32), MAX_NUM_STATIC_STRINGS,
+ /* hufflist and huff_entities will be allocated at compress_game_text() time. */
+
+ /* This hash table is only used in Glulx */
+ for (ix=0; ix<UNICODE_HASH_BUCKETS; ix++)
+ unicode_usage_hash[ix] = -1;
+
+ initialise_memory_list(&compressed_offsets_memlist,
+ sizeof(int32), 0, (void**)&compressed_offsets,
"static strings index table");
- }
+}
+
+extern void extract_all_text()
+{
+ /* optimise_abbreviations() is called after free_arrays(). Therefore,
+ we need to preserve the text transcript where it will not be
+ freed up. We do this by copying the pointer to opttext. */
+ opttext = all_text;
+ opttextlen = all_text_top;
+
+ /* Re-init all_text_memlist. This causes it to forget all about the
+ old pointer. Deallocating it in text_free_arrays() will be a no-op. */
+ initialise_memory_list(&all_text_memlist,
+ sizeof(char), 0, (void**)&all_text,
+ "dummy transcription text");
}
extern void text_free_arrays(void)
{
- my_free(&strings_holding_area, "static strings holding area");
- my_free(&low_strings, "low (abbreviation) strings");
- my_free(&abbreviations_at, "abbreviations");
- my_free(&abbrev_values, "abbrev values");
- my_free(&abbrev_quality, "abbrev quality");
- my_free(&abbrev_freqs, "abbrev freqs");
-
- my_free(&dtree, "red-black tree for dictionary");
+ deallocate_memory_list(&translated_text_memlist);
+ deallocate_memory_list(&temp_symbol_memlist);
+
+ deallocate_memory_list(&all_text_memlist);
+
+ deallocate_memory_list(&low_strings_memlist);
+ deallocate_memory_list(&abbreviations_text_memlist);
+ deallocate_memory_list(&abbreviations_memlist);
+
+ deallocate_memory_list(&abbreviations_optimal_parse_schedule_memlist);
+ deallocate_memory_list(&abbreviations_optimal_parse_scores_memlist);
+
+ deallocate_memory_list(&dtree_memlist);
+ deallocate_memory_list(&dict_sort_codes_memlist);
+ deallocate_memory_list(&prepared_sort_memlist);
my_free(&final_dict_order, "final dictionary ordering table");
- my_free(&dict_sort_codes, "dictionary sort codes");
- my_free(&dictionary,"dictionary");
+ deallocate_memory_list(&dictionary_memlist);
- my_free(&compressed_offsets, "static strings index table");
+ deallocate_memory_list(&compressed_offsets_memlist);
my_free(&hufflist, "huffman node list");
my_free(&huff_entities, "huffman entities");
- my_free(&unicode_usage_entries, "unicode entity entities");
+
+ deallocate_memory_list(&unicode_usage_entries_memlist);
- deallocate_memory_block(&static_strings_area);
+ deallocate_memory_list(&static_strings_area_memlist);
}
extern void ao_free_arrays(void)
-{ my_free (&tlbtab,"tlb table");
- my_free (&sub_buffer,"sub_buffer");
+{
+ /* Called only after optimise_abbreviations() runs. */
+
+ int32 i;
+ if (bestyet) {
+ for (i=0; i<MAX_BESTYET; i++) {
+ my_free(&bestyet[i].text, "bestyet.text");
+ }
+ }
+ if (bestyet2) {
+ for (i=0; i<MAX_ABBREVS; i++) {
+ my_free(&bestyet2[i].text, "bestyet2.text");
+ }
+ }
+
+ my_free (&opttext,"stashed transcript for optimisation");
my_free (&bestyet,"bestyet");
my_free (&bestyet2,"bestyet2");
my_free (&grandtable,"grandtable");
my_free (&grandflags,"grandflags");
+
+ deallocate_memory_list(&tlbtab_memlist);
+
+ /* This was re-inited, so we should re-deallocate it. */
+ deallocate_memory_list(&all_text_memlist);
}
/* ========================================================================= */