X-Git-Url: https://jxself.org/git/?p=inform.git;a=blobdiff_plain;f=text.c;fp=text.c;h=0000000000000000000000000000000000000000;hp=cd2fcd8c7b589fc8a642eb983621cb635f1d6aa1;hb=81ffe9a7de1db0b3a318a053b38882d1b7ab304c;hpb=d1090135a32de7b38b48c55d4e21f95da4c405bc diff --git a/text.c b/text.c deleted file mode 100644 index cd2fcd8..0000000 --- a/text.c +++ /dev/null @@ -1,2328 +0,0 @@ -/* ------------------------------------------------------------------------- */ -/* "text" : Text translation, the abbreviations optimiser, the dictionary */ -/* */ -/* Copyright (c) Graham Nelson 1993 - 2018 */ -/* */ -/* This file is part of Inform. */ -/* */ -/* Inform is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ -/* */ -/* Inform is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU General Public License */ -/* along with Inform. If not, see https://gnu.org/licenses/ */ -/* */ -/* ------------------------------------------------------------------------- */ - -#include "header.h" - -uchar *low_strings, *low_strings_top; /* Start and next free byte in the low - strings pool */ - -int32 static_strings_extent; /* Number of bytes of static strings - made so far */ -memory_block static_strings_area; /* Used if (!temporary_files_switch) to - hold the static strings area so far */ - -static uchar *strings_holding_area; /* Area holding translated strings - until they are moved into either - a temporary file, or the - static_strings_area below */ - -char *all_text, *all_text_top; /* Start and next byte free in (large) - text buffer holding the entire text - of the game, when it is being - recorded */ -int put_strings_in_low_memory, /* When TRUE, put static strings in - the low strings pool at 0x100 rather - than in the static strings area */ - is_abbreviation, /* When TRUE, the string being trans - is itself an abbreviation string - so can't make use of abbreviations */ - abbrevs_lookup_table_made, /* The abbreviations lookup table is - constructed when the first non- - abbreviation string is translated: - this flag is TRUE after that */ - abbrevs_lookup[256]; /* Once this has been constructed, - abbrevs_lookup[n] = the smallest - number of any abbreviation beginning - with ASCII character n, or -1 - if none of the abbreviations do */ -int no_abbreviations; /* No of abbreviations defined so far */ -uchar *abbreviations_at; /* Memory to hold the text of any - abbreviation strings declared */ -/* ------------------------------------------------------------------------- */ -/* Glulx string compression storage */ -/* ------------------------------------------------------------------------- */ - -int no_strings; /* No of strings in static strings - area. */ -int no_dynamic_strings; /* No. of @.. string escapes used - (actually, the highest value used - plus one) */ -int no_unicode_chars; /* Number of distinct Unicode chars - used. (Beyond 0xFF.) */ - -static int MAX_CHARACTER_SET; /* Number of possible entities */ -huffentity_t *huff_entities; /* The list of entities (characters, - abbreviations, @.. escapes, and - the terminator) */ -static huffentity_t **hufflist; /* Copy of the list, for sorting */ - -int no_huff_entities; /* The number of entities in the list */ -int huff_unicode_start; /* Position in the list where Unicode - chars begin. */ -int huff_abbrev_start; /* Position in the list where string - abbreviations begin. */ -int huff_dynam_start; /* Position in the list where @.. - entities begin. */ -int huff_entity_root; /* The position in the list of the root - entry (when considering the table - as a tree). */ - -int done_compression; /* Has the game text been compressed? */ -int32 compression_table_size; /* Length of the Huffman table, in - bytes */ -int32 compression_string_size; /* Length of the compressed string - data, in bytes */ -int32 *compressed_offsets; /* The beginning of every string in - the game, relative to the beginning - of the Huffman table. (So entry 0 - is equal to compression_table_size)*/ - -#define UNICODE_HASH_BUCKETS (64) -unicode_usage_t *unicode_usage_entries; -static unicode_usage_t *unicode_usage_hash[UNICODE_HASH_BUCKETS]; - -static int unicode_entity_index(int32 unicode); - -/* ------------------------------------------------------------------------- */ -/* Abbreviation arrays */ -/* ------------------------------------------------------------------------- */ - -int *abbrev_values; -int *abbrev_quality; -int *abbrev_freqs; - -/* ------------------------------------------------------------------------- */ - -int32 total_chars_trans, /* Number of ASCII chars of text in */ - total_bytes_trans, /* Number of bytes of Z-code text out */ - zchars_trans_in_last_string; /* Number of Z-chars in last string: - needed only for abbrev efficiency - calculation in "directs.c" */ -static int32 total_zchars_trans, /* Number of Z-chars of text out - (only used to calculate the above) */ - no_chars_transcribed; /* Number of ASCII chars written to - the text transcription area (used - for the -r and -u switches) */ - -static int zchars_out_buffer[3], /* During text translation, a buffer of - 3 Z-chars at a time: when it's full - these are written as a 2-byte word */ - zob_index; /* Index (0 to 2) into it */ - -static unsigned char *text_out_pc; /* The "program counter" during text - translation: the next address to - write Z-coded text output to */ - -static unsigned char *text_out_limit; /* The upper limit of text_out_pc - during text translation */ - -static int text_out_overflow; /* During text translation, becomes - true if text_out_pc tries to pass - text_out_limit */ - -/* ------------------------------------------------------------------------- */ -/* For variables/arrays used by the dictionary manager, see below */ -/* ------------------------------------------------------------------------- */ - -/* ------------------------------------------------------------------------- */ -/* Prepare the abbreviations lookup table (used to speed up abbreviation */ -/* detection in text translation). We first bubble-sort the abbrevs into */ -/* alphabetical order (this is necessary for the detection algorithm to */ -/* to work). Since the table is only prepared once, and for a table */ -/* of size at most 96, there's no point using an efficient sort algorithm. */ -/* ------------------------------------------------------------------------- */ - -static void make_abbrevs_lookup(void) -{ int bubble_sort, j, k, l; char p[MAX_ABBREV_LENGTH]; char *p1, *p2; - do - { bubble_sort = FALSE; - for (j=0; j=0; j--) - { p1=(char *)abbreviations_at+j*MAX_ABBREV_LENGTH; - abbrevs_lookup[(uchar)p1[0]]=j; - abbrev_freqs[j]=0; - } - abbrevs_lookup_table_made = TRUE; -} - -/* ------------------------------------------------------------------------- */ -/* Search the abbreviations lookup table (a routine which must be fast). */ -/* The source text to compare is text[i], text[i+1], ... and this routine */ -/* is only called if text[i] is indeed the first character of at least one */ -/* abbreviation, "from" begin the least index into the abbreviations table */ -/* of an abbreviation for which text[i] is the first character. Recall */ -/* that the abbrevs table is in alphabetical order. */ -/* */ -/* The return value is -1 if there is no match. If there is a match, the */ -/* text to be abbreviated out is over-written by a string of null chars */ -/* with "ASCII" value 1, and the abbreviation number is returned. */ -/* */ -/* In Glulx, we *do not* do this overwriting with 1's. */ -/* ------------------------------------------------------------------------- */ - -static int try_abbreviations_from(unsigned char *text, int i, int from) -{ int j, k; uchar *p, c; - c=text[i]; - for (j=from, p=(uchar *)abbreviations_at+from*MAX_ABBREV_LENGTH; - (j MAX_STATIC_STRINGS) - memoryerror("MAX_STATIC_STRINGS",MAX_STATIC_STRINGS); - i+=2; *c++ = 0; *c++ = 0; - } - } - - j = static_strings_extent; - - if (temporary_files_switch) - for (c=strings_holding_area; c text_out_limit) { - text_out_overflow = TRUE; - return; - } - text_out_pc[0] = j/256; text_out_pc[1] = j%256; text_out_pc+=2; - total_bytes_trans+=2; -} - -static void write_zscii(int zsc) -{ - int lookup_value, in_alphabet; - - if (zsc==' ') - { write_z_char_z(0); - return; - } - - if (zsc < 0x100) lookup_value = zscii_to_alphabet_grid[zsc]; - - else lookup_value = -1; - - if (lookup_value >= 0) - { alphabet_used[lookup_value] = 'Y'; - in_alphabet = lookup_value/26; - if (in_alphabet==1) write_z_char_z(4); /* SHIFT to A1 */ - if (in_alphabet==2) write_z_char_z(5); /* SHIFT to A2 */ - write_z_char_z(lookup_value%26 + 6); - } - else - { write_z_char_z(5); write_z_char_z(6); - write_z_char_z(zsc/32); write_z_char_z(zsc%32); - } -} - -/* ------------------------------------------------------------------------- */ -/* Finish a Z-coded string, padding out with Z-char 5s if necessary and */ -/* setting the "end" bit on the final 2-byte word */ -/* ------------------------------------------------------------------------- */ - -static void end_z_chars(void) -{ unsigned char *p; - zchars_trans_in_last_string=total_zchars_trans-zchars_trans_in_last_string; - while (zob_index!=0) write_z_char_z(5); - p=(unsigned char *) text_out_pc; - *(p-2)= *(p-2)+128; -} - -/* Glulx handles this much more simply -- compression is done elsewhere. */ -static void write_z_char_g(int i) -{ - ASSERT_GLULX(); - if (text_out_pc+1 > text_out_limit) { - text_out_overflow = TRUE; - return; - } - total_zchars_trans++; - text_out_pc[0] = i; - text_out_pc++; - total_bytes_trans++; -} - -/* ------------------------------------------------------------------------- */ -/* The main routine "text.c" provides to the rest of Inform: the text */ -/* translator. p is the address to write output to, s_text the source text */ -/* and the return value is the next free address to write output to. */ -/* The return value will not exceed p_limit. If the translation tries to */ -/* overflow this boundary, the return value will be NULL (and you should */ -/* display an error). */ -/* Note that the source text may be corrupted by this routine. */ -/* ------------------------------------------------------------------------- */ - -extern uchar *translate_text(uchar *p, uchar *p_limit, char *s_text) -{ int i, j, k, in_alphabet, lookup_value; - int32 unicode; int zscii; - unsigned char *text_in; - - /* Cast the input and output streams to unsigned char: text_out_pc will - advance as bytes of Z-coded text are written, but text_in doesn't */ - - text_in = (unsigned char *) s_text; - text_out_pc = (unsigned char *) p; - text_out_limit = (unsigned char *) p_limit; - text_out_overflow = FALSE; - - /* Remember the Z-chars total so that later we can subtract to find the - number of Z-chars translated on this string */ - - zchars_trans_in_last_string = total_zchars_trans; - - /* Start with the Z-characters output buffer empty */ - - zob_index=0; - - /* If this is the first text translated since the abbreviations were - declared, and if some were declared, then it's time to make the - lookup table for abbreviations - - (Except: we don't if the text being translated is itself - the text of an abbreviation currently being defined) */ - - if ((!abbrevs_lookup_table_made) && (no_abbreviations > 0) - && (!is_abbreviation)) - make_abbrevs_lookup(); - - /* If we're storing the whole game text to memory, then add this text */ - - if ((!is_abbreviation) && (store_the_text)) - { no_chars_transcribed += strlen(s_text)+2; - if (no_chars_transcribed >= MAX_TRANSCRIPT_SIZE) - memoryerror("MAX_TRANSCRIPT_SIZE", MAX_TRANSCRIPT_SIZE); - sprintf(all_text_top, "%s\n\n", s_text); - all_text_top += strlen(all_text_top); - } - - if (transcript_switch && (!veneer_mode)) - write_to_transcript_file(s_text); - - if (!glulx_mode) { - - /* The empty string of Z-text is illegal, since it can't carry an end - bit: so we translate an empty string of ASCII text to just the - pad character 5. Printing this causes nothing to appear on screen. */ - - if (text_in[0]==0) write_z_char_z(5); - - /* Loop through the characters of the null-terminated input text: note - that if 1 is written over a character in the input text, it is - afterwards ignored */ - - for (i=0; text_in[i]!=0; i++) - { total_chars_trans++; - - /* Contract ". " into ". " if double-space-removing switch set: - likewise "? " and "! " if the setting is high enough */ - - if ((double_space_setting >= 1) - && (text_in[i+1]==' ') && (text_in[i+2]==' ')) - { if (text_in[i]=='.') text_in[i+2]=1; - if (double_space_setting >= 2) - { if (text_in[i]=='?') text_in[i+2]=1; - if (text_in[i]=='!') text_in[i+2]=1; - } - } - - /* Try abbreviations if the economy switch set */ - - if ((economy_switch) && (!is_abbreviation) - && ((k=abbrevs_lookup[text_in[i]])!=-1)) - { if ((j=try_abbreviations_from(text_in, i, k))!=-1) - { if (j<32) { write_z_char_z(2); write_z_char_z(j); } - else { write_z_char_z(3); write_z_char_z(j-32); } - } - } - - /* If Unicode switch set, use text_to_unicode to perform UTF-8 - decoding */ - if (character_set_unicode && (text_in[i] & 0x80)) - { unicode = text_to_unicode((char *) (text_in+i)); - zscii = unicode_to_zscii(unicode); - if (zscii != 5) write_zscii(zscii); - else - { unicode_char_error( - "Character can only be used if declared in \ -advance as part of 'Zcharacter table':", unicode); - } - i += textual_form_length - 1; - continue; - } - - /* '@' is the escape character in Inform string notation: the various - possibilities are: - - (printing only) - @@decimalnumber : write this ZSCII char (0 to 1023) - @twodigits : write the abbreviation string with this - decimal number - - (any string context) - @accentcode : this accented character: e.g., - for @'e write an E-acute - @{...} : this Unicode char (in hex) */ - - if (text_in[i]=='@') - { if (text_in[i+1]=='@') - { - /* @@... */ - - i+=2; j=atoi((char *) (text_in+i)); - switch(j) - { /* Prevent ~ and ^ from being translated to double-quote - and new-line, as they ordinarily would be */ - - case 94: write_z_char_z(5); write_z_char_z(6); - write_z_char_z(94/32); write_z_char_z(94%32); - break; - case 126: write_z_char_z(5); write_z_char_z(6); - write_z_char_z(126/32); write_z_char_z(126%32); - break; - - default: write_zscii(j); break; - } - while (isdigit(text_in[i])) i++; i--; - } - else if (isdigit(text_in[i+1])!=0) - { int d1, d2; - - /* @.. */ - - d1 = character_digit_value[text_in[i+1]]; - d2 = character_digit_value[text_in[i+2]]; - if ((d1 == 127) || (d1 >= 10) || (d2 == 127) || (d2 >= 10)) - error("'@..' must have two decimal digits"); - else - { i+=2; - write_z_char_z(1); write_z_char_z(d1*10 + d2); - } - } - else - { - /* A string escape specifying an unusual character */ - - unicode = text_to_unicode((char *) (text_in+i)); - zscii = unicode_to_zscii(unicode); - if (zscii != 5) write_zscii(zscii); - else - { unicode_char_error( - "Character can only be used if declared in \ -advance as part of 'Zcharacter table':", unicode); - } - i += textual_form_length - 1; - } - } - else - { /* Skip a character which has been over-written with the null - value 1 earlier on */ - - if (text_in[i]!=1) - { if (text_in[i]==' ') write_z_char_z(0); - else - { j = (int) text_in[i]; - lookup_value = iso_to_alphabet_grid[j]; - if (lookup_value < 0) - { /* The character isn't in the standard alphabets, so - we have to use the ZSCII 4-Z-char sequence */ - - if (lookup_value == -5) - { /* Character isn't in the ZSCII set at all */ - - unicode = iso_to_unicode(j); - unicode_char_error( - "Character can only be used if declared in \ -advance as part of 'Zcharacter table':", unicode); - write_zscii(0x200 + unicode/0x100); - write_zscii(0x300 + unicode%0x100); - } - else write_zscii(-lookup_value); - } - else - { /* The character is in one of the standard alphabets: - write a SHIFT to temporarily change alphabet if - it isn't in alphabet 0, then write the Z-char */ - - alphabet_used[lookup_value] = 'Y'; - in_alphabet = lookup_value/26; - if (in_alphabet==1) write_z_char_z(4); /* SHIFT to A1 */ - if (in_alphabet==2) write_z_char_z(5); /* SHIFT to A2 */ - write_z_char_z(lookup_value%26 + 6); - } - } - } - } - } - - /* Flush the Z-characters output buffer and set the "end" bit */ - - end_z_chars(); - - } - else { - - /* The text storage here is, of course, temporary. Compression - will occur when we're finished compiling, so that all the - clever Huffman stuff will work. - In the stored text, we use "@@" to indicate @, - "@0" to indicate a zero byte, - "@ANNNN" to indicate an abbreviation, - "@DNNNN" to indicate a dynamic string thing. - "@UNNNN" to indicate a four-byte Unicode value (0x100 or higher). - (NNNN is a four-digit hex number using the letters A-P... an - ugly representation but a convenient one.) - */ - - for (i=0; text_in[i]!=0; i++) { - - /* Contract ". " into ". " if double-space-removing switch set: - likewise "? " and "! " if the setting is high enough. */ - if ((double_space_setting >= 1) - && (text_in[i+1]==' ') && (text_in[i+2]==' ')) { - if (text_in[i]=='.' - || (double_space_setting >= 2 - && (text_in[i]=='?' || text_in[i]=='!'))) { - text_in[i+1] = text_in[i]; - i++; - } - } - - total_chars_trans++; - - /* Try abbreviations if the economy switch set. We have to be in - compression mode too, since the abbreviation mechanism is part - of string decompression. */ - - if ((economy_switch) && (compression_switch) && (!is_abbreviation) - && ((k=abbrevs_lookup[text_in[i]])!=-1) - && ((j=try_abbreviations_from(text_in, i, k)) != -1)) { - char *cx = (char *)abbreviations_at+j*MAX_ABBREV_LENGTH; - i += (strlen(cx)-1); - write_z_char_g('@'); - write_z_char_g('A'); - write_z_char_g('A' + ((j >>12) & 0x0F)); - write_z_char_g('A' + ((j >> 8) & 0x0F)); - write_z_char_g('A' + ((j >> 4) & 0x0F)); - write_z_char_g('A' + ((j ) & 0x0F)); - } - else if (text_in[i] == '@') { - if (text_in[i+1]=='@') { - /* An ASCII code */ - i+=2; j=atoi((char *) (text_in+i)); - if (j == '@' || j == '\0') { - write_z_char_g('@'); - if (j == 0) { - j = '0'; - if (!compression_switch) - warning("Ascii @@0 will prematurely terminate non-compressed \ -string."); - } - } - write_z_char_g(j); - while (isdigit(text_in[i])) i++; i--; - } - else if (isdigit(text_in[i+1])) { - int d1, d2; - d1 = character_digit_value[text_in[i+1]]; - d2 = character_digit_value[text_in[i+2]]; - if ((d1 == 127) || (d1 >= 10) || (d2 == 127) || (d2 >= 10)) { - error("'@..' must have two decimal digits"); - } - else { - if (!compression_switch) - warning("'@..' print variable will not work in non-compressed \ -string; substituting ' '."); - i += 2; - j = d1*10 + d2; - if (j >= MAX_DYNAMIC_STRINGS) { - memoryerror("MAX_DYNAMIC_STRINGS", MAX_DYNAMIC_STRINGS); - j = 0; - } - if (j+1 >= no_dynamic_strings) - no_dynamic_strings = j+1; - write_z_char_g('@'); - write_z_char_g('D'); - write_z_char_g('A' + ((j >>12) & 0x0F)); - write_z_char_g('A' + ((j >> 8) & 0x0F)); - write_z_char_g('A' + ((j >> 4) & 0x0F)); - write_z_char_g('A' + ((j ) & 0x0F)); - } - } - else { - unicode = text_to_unicode((char *) (text_in+i)); - i += textual_form_length - 1; - if (unicode == '@' || unicode == '\0') { - write_z_char_g('@'); - write_z_char_g(unicode ? '@' : '0'); - } - else if (unicode >= 0 && unicode < 256) { - write_z_char_g(unicode); - } - else { - if (!compression_switch) { - warning("Unicode characters will not work in non-compressed \ -string; substituting '?'."); - write_z_char_g('?'); - } - else { - j = unicode_entity_index(unicode); - write_z_char_g('@'); - write_z_char_g('U'); - write_z_char_g('A' + ((j >>12) & 0x0F)); - write_z_char_g('A' + ((j >> 8) & 0x0F)); - write_z_char_g('A' + ((j >> 4) & 0x0F)); - write_z_char_g('A' + ((j ) & 0x0F)); - } - } - } - } - else if (text_in[i] == '^') - write_z_char_g(0x0A); - else if (text_in[i] == '~') - write_z_char_g('"'); - else if (character_set_unicode) { - if (text_in[i] & 0x80) { - unicode = text_to_unicode((char *) (text_in+i)); - i += textual_form_length - 1; - if (unicode >= 0 && unicode < 256) { - write_z_char_g(unicode); - } - else { - if (!compression_switch) { - warning("Unicode characters will not work in non-compressed \ -string; substituting '?'."); - write_z_char_g('?'); - } - else { - j = unicode_entity_index(unicode); - write_z_char_g('@'); - write_z_char_g('U'); - write_z_char_g('A' + ((j >>12) & 0x0F)); - write_z_char_g('A' + ((j >> 8) & 0x0F)); - write_z_char_g('A' + ((j >> 4) & 0x0F)); - write_z_char_g('A' + ((j ) & 0x0F)); - } - } - } - else { - write_z_char_g(text_in[i]); - } - } - else { - unicode = iso_to_unicode_grid[text_in[i]]; - if (unicode >= 0 && unicode < 256) { - write_z_char_g(unicode); - } - else { - if (!compression_switch) { - warning("Unicode characters will not work in non-compressed \ -string; substituting '?'."); - write_z_char_g('?'); - } - else { - j = unicode_entity_index(unicode); - write_z_char_g('@'); - write_z_char_g('U'); - write_z_char_g('A' + ((j >>12) & 0x0F)); - write_z_char_g('A' + ((j >> 8) & 0x0F)); - write_z_char_g('A' + ((j >> 4) & 0x0F)); - write_z_char_g('A' + ((j ) & 0x0F)); - } - } - } - } - write_z_char_g(0); - - } - - if (text_out_overflow) - return NULL; - else - return((uchar *) text_out_pc); -} - -static int unicode_entity_index(int32 unicode) -{ - unicode_usage_t *uptr; - int j; - int buck = unicode % UNICODE_HASH_BUCKETS; - - for (uptr = unicode_usage_hash[buck]; uptr; uptr=uptr->next) { - if (uptr->ch == unicode) - break; - } - if (uptr) { - j = (uptr - unicode_usage_entries); - } - else { - if (no_unicode_chars >= MAX_UNICODE_CHARS) { - memoryerror("MAX_UNICODE_CHARS", MAX_UNICODE_CHARS); - j = 0; - } - else { - j = no_unicode_chars; - no_unicode_chars++; - uptr = unicode_usage_entries + j; - uptr->ch = unicode; - uptr->next = unicode_usage_hash[buck]; - unicode_usage_hash[buck] = uptr; - } - } - - return j; -} - -/* ------------------------------------------------------------------------- */ -/* Glulx compression code */ -/* ------------------------------------------------------------------------- */ - - -static void compress_makebits(int entnum, int depth, int prevbit, - huffbitlist_t *bits); - -/* The compressor. This uses the usual Huffman compression algorithm. */ -void compress_game_text() -{ - int entities=0, branchstart, branches; - int numlive; - int32 lx; - int jx; - int ch; - int32 ix; - huffbitlist_t bits; - - if (compression_switch) { - - /* How many entities have we currently got? Well, 256 plus the - string-terminator plus Unicode chars plus abbrevations plus - dynamic strings. */ - entities = 256+1; - huff_unicode_start = entities; - entities += no_unicode_chars; - huff_abbrev_start = entities; - if (economy_switch) - entities += no_abbreviations; - huff_dynam_start = entities; - entities += no_dynamic_strings; - - if (entities > MAX_CHARACTER_SET) - memoryerror("MAX_CHARACTER_SET",MAX_CHARACTER_SET); - - /* Characters */ - for (jx=0; jx<256; jx++) { - huff_entities[jx].type = 2; - huff_entities[jx].count = 0; - huff_entities[jx].u.ch = jx; - } - /* Terminator */ - huff_entities[256].type = 1; - huff_entities[256].count = 0; - for (jx=0; jx static_strings_extent || ch < 0) - compiler_error("Read too much not-yet-compressed text."); - if (escapelen == -1) { - escapelen = 0; - if (ch == '@') { - ch = '@'; - } - else if (ch == '0') { - ch = '\0'; - } - else if (ch == 'A' || ch == 'D' || ch == 'U') { - escapelen = 4; - escapetype = ch; - escapeval = 0; - continue; - } - else { - compiler_error("Strange @ escape in processed text."); - } - } - else if (escapelen) { - escapeval = (escapeval << 4) | ((ch-'A') & 0x0F); - escapelen--; - if (escapelen == 0) { - if (escapetype == 'A') { - ch = huff_abbrev_start+escapeval; - } - else if (escapetype == 'D') { - ch = huff_dynam_start+escapeval; - } - else if (escapetype == 'U') { - ch = huff_unicode_start+escapeval; - } - else { - compiler_error("Strange @ escape in processed text."); - } - } - else - continue; - } - else { - if (ch == '@') { - escapelen = -1; - continue; - } - if (ch == 0) { - ch = 256; - done = TRUE; - } - } - huff_entities[ch].count++; - } - } - - numlive = 0; - for (jx=0; jx 1) { - int best1, best2; - int best1num, best2num; - huffentity_t *bran; - - if (hufflist[0]->count < hufflist[1]->count) { - best1 = 0; - best2 = 1; - } - else { - best2 = 0; - best1 = 1; - } - - best1num = hufflist[best1]->count; - best2num = hufflist[best2]->count; - - for (jx=2; jxcount < best1num) { - best2 = best1; - best2num = best1num; - best1 = jx; - best1num = hufflist[best1]->count; - } - else if (hufflist[jx]->count < best2num) { - best2 = jx; - best2num = hufflist[best2]->count; - } - } - - bran = &(huff_entities[branchstart+branches]); - branches++; - bran->type = 0; - bran->count = hufflist[best1]->count + hufflist[best2]->count; - bran->u.branch[0] = (hufflist[best1] - huff_entities); - bran->u.branch[1] = (hufflist[best2] - huff_entities); - hufflist[best1] = bran; - if (best2 < numlive-1) { - memmove(&(hufflist[best2]), &(hufflist[best2+1]), - ((numlive-1) - best2) * sizeof(huffentity_t *)); - } - numlive--; - } - - huff_entity_root = (hufflist[0] - huff_entities); - - for (ix=0; ix= MAX_NUM_STATIC_STRINGS) - memoryerror("MAX_NUM_STATIC_STRINGS", MAX_NUM_STATIC_STRINGS); - - for (lx=0, ix=0; lx static_strings_extent || ch < 0) - compiler_error("Read too much not-yet-compressed text."); - if (escapelen == -1) { - escapelen = 0; - if (ch == '@') { - ch = '@'; - } - else if (ch == '0') { - ch = '\0'; - } - else if (ch == 'A' || ch == 'D' || ch == 'U') { - escapelen = 4; - escapetype = ch; - escapeval = 0; - continue; - } - else { - compiler_error("Strange @ escape in processed text."); - } - } - else if (escapelen) { - escapeval = (escapeval << 4) | ((ch-'A') & 0x0F); - escapelen--; - if (escapelen == 0) { - if (escapetype == 'A') { - ch = huff_abbrev_start+escapeval; - } - else if (escapetype == 'D') { - ch = huff_dynam_start+escapeval; - } - else if (escapetype == 'U') { - ch = huff_unicode_start+escapeval; - } - else { - compiler_error("Strange @ escape in processed text."); - } - } - else - continue; - } - else { - if (ch == '@') { - escapelen = -1; - continue; - } - if (ch == 0) { - ch = 256; - done = TRUE; - } - } - - if (compression_switch) { - jx += huff_entities[ch].depth; - compression_string_size += (jx/8); - jx = (jx % 8); - } - else { - if (ch >= huff_dynam_start) { - compression_string_size += 3; - } - else if (ch >= huff_unicode_start) { - compiler_error("Abbreviation/Unicode in non-compressed string \ -should be impossible."); - } - else - compression_string_size += 1; - } - } - if (compression_switch && jx) - compression_string_size++; - } - - done_compression = TRUE; -} - -static void compress_makebits(int entnum, int depth, int prevbit, - huffbitlist_t *bits) -{ - huffentity_t *ent = &(huff_entities[entnum]); - char *cx; - - no_huff_entities++; - ent->addr = compression_table_size; - ent->depth = depth; - ent->bits = *bits; - if (depth > 0) { - if (prevbit) - ent->bits.b[(depth-1) / 8] |= (1 << ((depth-1) % 8)); - } - - switch (ent->type) { - case 0: - compression_table_size += 9; - compress_makebits(ent->u.branch[0], depth+1, 0, &ent->bits); - compress_makebits(ent->u.branch[1], depth+1, 1, &ent->bits); - break; - case 1: - compression_table_size += 1; - break; - case 2: - compression_table_size += 2; - break; - case 3: - cx = (char *)abbreviations_at + ent->u.val*MAX_ABBREV_LENGTH; - compression_table_size += (1 + 1 + strlen(cx)); - break; - case 4: - case 9: - compression_table_size += 5; - break; - } -} - -/* ------------------------------------------------------------------------- */ -/* The abbreviations optimiser */ -/* */ -/* This is a very complex, memory and time expensive algorithm to */ -/* approximately solve the problem of which abbreviation strings would */ -/* minimise the total number of Z-chars to which the game text translates. */ -/* It is in some ways a quite separate program but remains inside Inform */ -/* for compatibility with previous releases. */ -/* ------------------------------------------------------------------------- */ - -typedef struct tlb_s -{ char text[4]; - int32 intab, occurrences; -} tlb; -static tlb *tlbtab; -static int32 no_occs; - -static int32 *grandtable; -static int32 *grandflags; -typedef struct optab_s -{ int32 length; - int32 popularity; - int32 score; - int32 location; - char text[MAX_ABBREV_LENGTH]; -} optab; -static optab *bestyet, *bestyet2; - -static int pass_no; - -static char *sub_buffer; - -static void optimise_pass(void) -{ int32 i; int t1, t2; - int32 j, j2, k, nl, matches, noflags, score, min, minat=0, x, scrabble, c; - for (i=0; i<256; i++) bestyet[i].length=0; - for (i=0; i=2)&&(nl<=62)) - { nl++; - for (j2=0; j2-nl)&&(x=26) - scrabble++; - } - } - score=(matches-1)*(scrabble-2); - min=score; - for (j2=0; j2<256; j2++) - { if ((nl==bestyet[j2].length) - && (memcmp(all_text+bestyet[j2].location, - all_text+grandtable[tlbtab[i].intab+j], - nl)==0)) - { j2=256; min=score; } - else - { if (bestyet[j2].score=2) - { tlbtab[no_occs]=test; - tlbtab[no_occs].intab=t; t+=tlbtab[no_occs].occurrences; - if (max0)&&(selected<64)) - { printf("Pass %d\n", ++pass_no); - - optimise_pass(); - available=0; - for (i=0; i<256; i++) - if (bestyet[i].score!=0) - { available++; - nl=bestyet[i].length; - for (j2=0; j20) - printf("%02d: %4d %4d '%s'\n", i, bestyet[i].score, - bestyet[i].popularity, bestyet[i].text); - */ - - do - { max=0; - for (i=0; i<256; i++) - if (max0) - { bestyet2[selected++]=bestyet[maxat]; - - printf( - "Selection %2ld: '%s' (repeated %ld times, scoring %ld)\n", - (long int) selected,bestyet[maxat].text, - (long int) bestyet[maxat].popularity, - (long int) bestyet[maxat].score); - - test.text[0]=bestyet[maxat].text[0]; - test.text[1]=bestyet[maxat].text[1]; - test.text[2]=bestyet[maxat].text[2]; - test.text[3]=0; - - for (i=0; i0)&& - (any_overlap(bestyet[maxat].text,bestyet[i].text)==1)) - { bestyet[i].score=0; - /* printf("Discarding '%s' as overlapping\n", - bestyet[i].text); */ - } - } - } while ((max>0)&&(available>0)&&(selected<64)); - } - - printf("\nChosen abbreviations (in Inform syntax):\n\n"); - for (i=0; i */ -/* 4 or 6 bytes byte byte byte */ -/* */ -/* For Glulx, the form is instead: (But see below about Unicode-valued */ -/* dictionaries and my heinie.) */ -/* */ -/* */ -/* DICT_WORD_SIZE short short short */ -/* */ -/* These records are stored in "accession order" (i.e. in order of their */ -/* first being received by these routines) and only alphabetically sorted */ -/* by construct_storyfile() (using the array below). */ -/* ------------------------------------------------------------------------- */ -/* */ -/* Further notes about the data fields... */ -/* The flags are currently: */ -/* bit 0: word is used as a verb (in verb grammar) */ -/* bit 1: word is used as a meta verb */ -/* bit 2: word is plural (set by '//p') */ -/* bit 3: word is used as a preposition (in verb grammar) */ -/* bit 6: set for all verbs, but not used by the parser? */ -/* bit 7: word is used as a noun (set for every word that appears in */ -/* code or in an object property) */ -/* */ -/* In grammar version 2, the third field (adjectivenumber) is unused (and */ -/* zero). */ -/* */ -/* The compiler generates special constants #dict_par1, #dict_par2, */ -/* #dict_par3 to refer to the byte offsets of the three fields. In */ -/* Z-code v3, these are 4/5/6; in v4+, they are 6/7/8. In Glulx, they */ -/* are $DICT_WORD_SIZE+2/4/6, referring to the *low* bytes of the three */ -/* fields. (The high bytes are $DICT_WORD_SIZE+1/3/5.) */ -/* ------------------------------------------------------------------------- */ - -uchar *dictionary, /* (These two pointers are externally - used only in "tables.c" when - building the story-file) */ - *dictionary_top; /* Pointer to next free record */ - -int dict_entries; /* Total number of records entered */ - -/* ------------------------------------------------------------------------- */ -/* dict_word is a typedef for a struct of 6 unsigned chars (defined in */ -/* "header.h"): it holds the (4 or) 6 bytes of Z-coded text of a word. */ -/* Usefully, because the PAD character 5 is < all alphabetic characters, */ -/* alphabetic order corresponds to numeric order. For this reason, the */ -/* dict_word is called the "sort code" of the original text word. */ -/* */ -/* ###- In modifying the compiler, I've found it easier to discard the */ -/* typedef, and operate directly on uchar arrays of length DICT_WORD_SIZE. */ -/* In Z-code, DICT_WORD_SIZE will be 6, so the Z-code compiler will work */ -/* as before. In Glulx, it can be any value up to MAX_DICT_WORD_SIZE. */ -/* (That limit is defined as 40 in the header; it exists only for a few */ -/* static buffers, and can be increased without using significant memory.) */ -/* */ -/* ###- Well, that certainly bit me on the butt, didn't it. In further */ -/* modifying the compiler to generate a Unicode dictionary, I have to */ -/* store four-byte values in the uchar array. This is handled by making */ -/* the array size DICT_WORD_BYTES (which is DICT_WORD_SIZE*DICT_CHAR_SIZE).*/ -/* Then we store the 32-bit character value big-endian. This lets us */ -/* continue to compare arrays bytewise, which is a nice simplification. */ -/* ------------------------------------------------------------------------- */ - -extern int compare_sorts(uchar *d1, uchar *d2) -{ int i; - for (i=0; i=9) break; - - k=(int) dword[j]; - if (k==(int) '\'') - warning_named("Obsolete usage: use the ^ character for the \ -apostrophe in", dword); - if (k==(int) '^') k=(int) '\''; - if (k=='\"') k='~'; - - if (k==(int) '@' || (character_set_unicode && (k & 0x80))) - { int unicode = text_to_unicode(dword+j); - if ((unicode < 128) && isupper(unicode)) unicode = tolower(unicode); - k = unicode_to_zscii(unicode); - j += textual_form_length - 1; - if ((k == 5) || (k >= 0x100)) - { unicode_char_error( - "Character can be printed but not input:", unicode); - k = '?'; - } - k2 = zscii_to_alphabet_grid[(uchar) k]; - } - else - { if (isupper(k)) k = tolower(k); - k2 = iso_to_alphabet_grid[(uchar) k]; - } - - if (k2 < 0) - { if ((k2 == -5) || (k2 <= -0x100)) - char_error("Character can be printed but not input:", k); - else - { /* Use 4 more Z-chars to encode a ZSCII escape sequence */ - - wd[i++] = 5; wd[i++] = 6; - k2 = -k2; - wd[i++] = k2/32; wd[i] = k2%32; - } - } - else - { alphabet_used[k2] = 'Y'; - if ((k2/26)!=0) - wd[i++]=3+(k2/26); /* Change alphabet for symbols */ - wd[i]=6+(k2%26); /* Write the Z character */ - } - } - - /* Fill up to the end of the dictionary block with PAD characters */ - - for (; i<9; i++) wd[i]=5; - - /* The array of Z-chars is converted to three 2-byte blocks */ - - tot = wd[2] + wd[1]*(1<<5) + wd[0]*(1<<10); - prepared_sort[1]=tot%0x100; - prepared_sort[0]=(tot/0x100)%0x100; - tot = wd[5] + wd[4]*(1<<5) + wd[3]*(1<<10); - prepared_sort[3]=tot%0x100; - prepared_sort[2]=(tot/0x100)%0x100; - tot = wd[8] + wd[7]*(1<<5) + wd[6]*(1<<10); - prepared_sort[5]=tot%0x100; - prepared_sort[4]=(tot/0x100)%0x100; - - /* Set the "end bit" on the 2nd (in v3) or the 3rd (v4+) 2-byte block */ - - if (version_number==3) prepared_sort[2]+=0x80; - else prepared_sort[4]+=0x80; - - if (optresult) copy_sorts(optresult, prepared_sort); -} - -/* Also used by verbs.c */ -static void dictionary_prepare_g(char *dword, uchar *optresult) -{ - int i, j, k; - int32 unicode; - - number_and_case = 0; - - for (i=0, j=0; (dword[j]!=0); i++, j++) { - if ((dword[j] == '/') && (dword[j+1] == '/')) { - for (j+=2; dword[j] != 0; j++) { - switch(dword[j]) { - case 'p': - number_and_case |= 4; - break; - default: - error_named("Expected 'p' after '//' \ -to give gender or number of dictionary word", dword); - break; - } - } - break; - } - if (i>=DICT_WORD_SIZE) break; - - k= ((unsigned char *)dword)[j]; - if (k=='\'') - warning_named("Obsolete usage: use the ^ character for the \ -apostrophe in", dword); - if (k=='^') - k='\''; - if (k=='~') /* as in iso_to_alphabet_grid */ - k='\"'; - - if (k=='@' || (character_set_unicode && (k & 0x80))) { - unicode = text_to_unicode(dword+j); - j += textual_form_length - 1; - } - else { - unicode = iso_to_unicode_grid[k]; - } - - if (DICT_CHAR_SIZE != 1 || (unicode >= 0 && unicode < 256)) { - k = unicode; - } - else { - error("The dictionary cannot contain Unicode characters beyond Latin-1. \ -Define DICT_CHAR_SIZE=4 for a Unicode-compatible dictionary."); - k = '?'; - } - - if (k >= (unsigned)'A' && k <= (unsigned)'Z') - k += ('a' - 'A'); - - if (DICT_CHAR_SIZE == 1) { - prepared_sort[i] = k; - } - else { - prepared_sort[4*i] = (k >> 24) & 0xFF; - prepared_sort[4*i+1] = (k >> 16) & 0xFF; - prepared_sort[4*i+2] = (k >> 8) & 0xFF; - prepared_sort[4*i+3] = (k) & 0xFF; - } - } - - if (DICT_CHAR_SIZE == 1) { - for (; i0) at = dtree[at].branch[1]; else at = dtree[at].branch[0]; - } - return 0; -} - -/* ------------------------------------------------------------------------- */ -/* Add "dword" to the dictionary with (x,y,z) as its data fields; unless */ -/* it already exists, in which case OR the data with (x,y,z) */ -/* */ -/* These fields are one byte each in Z-code, two bytes each in Glulx. */ -/* */ -/* Returns: the accession number. */ -/* ------------------------------------------------------------------------- */ - -extern int dictionary_add(char *dword, int x, int y, int z) -{ int n; uchar *p; - int ggfr = 0, gfr = 0, fr = 0, r = 0; - int ggf = VACANT, gf = VACANT, f = VACANT, at = root; - int a, b; - int res=((version_number==3)?4:6); - - dictionary_prepare(dword, NULL); - - if (root == VACANT) - { root = 0; goto CreateEntry; - } - while (TRUE) - { - n = compare_sorts(prepared_sort, dict_sort_codes+at*DICT_WORD_BYTES); - if (n==0) - { - if (!glulx_mode) { - p = dictionary+7 + at*(3+res) + res; - p[0]=(p[0])|x; p[1]=(p[1])|y; p[2]=(p[2])|z; - if (x & 128) p[0] = (p[0])|number_and_case; - } - else { - p = dictionary+4 + at*DICT_ENTRY_BYTE_LENGTH + DICT_ENTRY_FLAG_POS; - p[0]=(p[0])|(x/256); p[1]=(p[1])|(x%256); - p[2]=(p[2])|(y/256); p[3]=(p[3])|(y%256); - p[4]=(p[4])|(z/256); p[5]=(p[5])|(z%256); - if (x & 128) p[1] = (p[1]) | number_and_case; - } - return at; - } - if (n>0) r=1; else r=0; - - a = dtree[at].branch[0]; b = dtree[at].branch[1]; - if ((a != VACANT) && (dtree[a].colour == RED) && - (b != VACANT) && (dtree[b].colour == RED)) - { dtree[a].colour = BLACK; - dtree[b].colour = BLACK; - - dtree[at].colour = RED; - - /* A tree rotation may be needed to avoid two red links in a row: - e.g. - ggf (or else gf is root) ggf (or f is root) - | | - gf f - / \(red) / \ (both red) - f becomes gf at - / \(red) / \ / \ - at - / \ - - In effect we rehang the "gf" subtree from "f". - See the Technical Manual for further details. - */ - - if ((f != VACANT) && (gf != VACANT) && (dtree[f].colour == RED)) - { - if (fr == gfr) - { if (ggf == VACANT) root = f; else dtree[ggf].branch[ggfr] = f; - dtree[gf].branch[gfr] = dtree[f].branch[1-fr]; - dtree[f].branch[1-fr] = gf; - dtree[f].colour = BLACK; - dtree[gf].colour = RED; - gf = ggf; gfr = ggfr; - } - else - { if (ggf == VACANT) root = at; else dtree[ggf].branch[ggfr] = at; - dtree[at].colour = BLACK; - dtree[gf].colour = RED; - dtree[f].branch[fr] = dtree[at].branch[gfr]; - dtree[gf].branch[gfr] = dtree[at].branch[fr]; - dtree[at].branch[gfr] = f; - dtree[at].branch[fr] = gf; - - r = 1-r; n = at; if (r==fr) at = f; else at = gf; - f = n; gf = ggf; fr = 1-r; gfr = ggfr; - } - } - } - - if (dtree[at].branch[r] == VACANT) - { dtree[at].colour = RED; - - if ((f != VACANT) && (gf != VACANT) && (dtree[f].colour == RED)) - { if (fr == gfr) - { if (ggf == VACANT) root = f; else dtree[ggf].branch[ggfr] = f; - dtree[gf].branch[gfr] = dtree[f].branch[1-fr]; - dtree[f].branch[1-fr] = gf; - dtree[f].colour = BLACK; - dtree[gf].colour = RED; - } - else - { if (ggf == VACANT) root = at; else dtree[ggf].branch[ggfr] = at; - dtree[at].colour = BLACK; - dtree[gf].colour = RED; - dtree[f].branch[fr] = dtree[at].branch[gfr]; - dtree[gf].branch[gfr] = dtree[at].branch[fr]; - dtree[at].branch[gfr] = f; - dtree[at].branch[fr] = gf; - - r = 1-r; n = at; if (r==fr) at = f; else at = gf; - f = n; gf = ggf; - } - } - dtree[at].branch[r] = dict_entries; - goto CreateEntry; - } - ggf = gf; gf = f; f = at; at = dtree[at].branch[r]; - ggfr = gfr; gfr = fr; fr = r; - } - - CreateEntry: - - if (dict_entries==MAX_DICT_ENTRIES) - memoryerror("MAX_DICT_ENTRIES",MAX_DICT_ENTRIES); - - dtree[dict_entries].branch[0] = VACANT; - dtree[dict_entries].branch[1] = VACANT; - dtree[dict_entries].colour = BLACK; - - /* Address in Inform's own dictionary table to write the record to */ - - if (!glulx_mode) { - - p = dictionary + (3+res)*dict_entries + 7; - - /* So copy in the 4 (or 6) bytes of Z-coded text and the 3 data - bytes */ - - p[0]=prepared_sort[0]; p[1]=prepared_sort[1]; - p[2]=prepared_sort[2]; p[3]=prepared_sort[3]; - if (version_number > 3) - { p[4]=prepared_sort[4]; p[5]=prepared_sort[5]; } - p[res]=x; p[res+1]=y; p[res+2]=z; - if (x & 128) p[res] = (p[res])|number_and_case; - - dictionary_top += res+3; - - } - else { - int i; - p = dictionary + 4 + DICT_ENTRY_BYTE_LENGTH*dict_entries; - p[0] = 0x60; /* type byte -- dict word */ - - p += DICT_CHAR_SIZE; - for (i=0; i 3) - { encoded_word[6] = (((int) p[4])&0x7c)/4; - encoded_word[7] = 8*(((int) p[4])&0x3) + (((int) p[5])&0xe0)/32; - encoded_word[8] = ((int) p[5])&0x1f; - } - - shift = 0; cc = 0; - for (i=0; i< ((version_number==3)?6:9); i++) - { zchar = encoded_word[i]; - - if (zchar == 4) shift = 1; - else - if (zchar == 5) shift = 2; - else - { if ((shift == 2) && (zchar == 6)) - { zchar = 32*encoded_word[i+1] + encoded_word[i+2]; - i += 2; - if ((zchar>=32) && (zchar<=126)) - results[cc++] = zchar; - else - { zscii_to_text(results+cc, zchar); - cc = strlen(results); - } - } - else - { zscii_to_text(results+cc, (alphabet[shift])[zchar-6]); - cc = strlen(results); - } - shift = 0; - } - } - results[cc] = 0; -} - -static void recursively_show_z(int node) -{ int i, cprinted, flags; uchar *p; - char textual_form[32]; - int res = (version_number == 3)?4:6; - - if (dtree[node].branch[0] != VACANT) - recursively_show_z(dtree[node].branch[0]); - - p = (uchar *)dictionary + 7 + (3+res)*node; - - word_to_ascii(p, textual_form); - - for (cprinted = 0; textual_form[cprinted]!=0; cprinted++) - show_char(textual_form[cprinted]); - for (; cprinted < 4 + ((version_number==3)?6:9); cprinted++) - show_char(' '); - - if (d_show_to == NULL) - { for (i=0; i<3+res; i++) printf("%02x ",p[i]); - - flags = (int) p[res]; - if (flags & 128) - { printf("noun "); - if (flags & 4) printf("p"); else printf(" "); - printf(" "); - } - else printf(" "); - if (flags & 8) - { if (grammar_version_number == 1) - printf("preposition:%d ", (int) p[res+2]); - else - printf("preposition "); - } - if ((flags & 3) == 3) printf("metaverb:%d ", (int) p[res+1]); - else if ((flags & 3) == 1) printf("verb:%d ", (int) p[res+1]); - printf("\n"); - } - - if (d_show_total++ == 5) - { d_show_total = 0; - if (d_show_to != NULL) - { write_to_transcript_file(d_show_to); - d_show_to[0] = 0; - } - } - - if (dtree[node].branch[1] != VACANT) - recursively_show_z(dtree[node].branch[1]); -} - -static void recursively_show_g(int node) -{ - warning("### Glulx dictionary-show not yet implemented.\n"); -} - -static void show_alphabet(int i) -{ int j, c; char chartext[8]; - - for (j=0; j<26; j++) - { c = alphabet[i][j]; - - if (alphabet_used[26*i+j] == 'N') printf("("); else printf(" "); - - zscii_to_text(chartext, c); - printf("%s", chartext); - - if (alphabet_used[26*i+j] == 'N') printf(")"); else printf(" "); - } - printf("\n"); -} - -extern void show_dictionary(void) -{ printf("Dictionary contains %d entries:\n",dict_entries); - if (dict_entries != 0) - { d_show_total = 0; d_show_to = NULL; - if (!glulx_mode) - recursively_show_z(root); - else - recursively_show_g(root); - } - printf("\nZ-machine alphabet entries:\n"); - show_alphabet(0); - show_alphabet(1); - show_alphabet(2); -} - -extern void write_dictionary_to_transcript(void) -{ char d_buffer[81]; - - sprintf(d_buffer, "\n[Dictionary contains %d entries:]\n", dict_entries); - - d_buffer[0] = 0; write_to_transcript_file(d_buffer); - - if (dict_entries != 0) - { d_show_total = 0; d_show_to = d_buffer; - if (!glulx_mode) - recursively_show_z(root); - else - recursively_show_g(root); - } - if (d_show_total != 0) write_to_transcript_file(d_buffer); -} - -/* ========================================================================= */ -/* Data structure management routines */ -/* ------------------------------------------------------------------------- */ - -extern void init_text_vars(void) -{ int j; - bestyet = NULL; - bestyet2 = NULL; - tlbtab = NULL; - grandtable = NULL; - grandflags = NULL; - no_chars_transcribed = 0; - is_abbreviation = FALSE; - put_strings_in_low_memory = FALSE; - - for (j=0; j<256; j++) abbrevs_lookup[j] = -1; - - total_zchars_trans = 0; - - dtree = NULL; - final_dict_order = NULL; - dict_sort_codes = NULL; - dict_entries=0; - - initialise_memory_block(&static_strings_area); -} - -extern void text_begin_pass(void) -{ abbrevs_lookup_table_made = FALSE; - no_abbreviations=0; - total_chars_trans=0; total_bytes_trans=0; - if (store_the_text) all_text_top=all_text; - dictionary_begin_pass(); - low_strings_top = low_strings; - - static_strings_extent = 0; - no_strings = 0; - no_dynamic_strings = 0; - no_unicode_chars = 0; -} - -/* Note: for allocation and deallocation of all_the_text, see inform.c */ - -extern void text_allocate_arrays(void) -{ abbreviations_at = my_malloc(MAX_ABBREVS*MAX_ABBREV_LENGTH, - "abbreviations"); - abbrev_values = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev values"); - abbrev_quality = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev quality"); - abbrev_freqs = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev freqs"); - - dtree = my_calloc(sizeof(dict_tree_node), MAX_DICT_ENTRIES, - "red-black tree for dictionary"); - final_dict_order = my_calloc(sizeof(int), MAX_DICT_ENTRIES, - "final dictionary ordering table"); - dict_sort_codes = my_calloc(DICT_WORD_BYTES, MAX_DICT_ENTRIES, - "dictionary sort codes"); - - if (!glulx_mode) - dictionary = my_malloc(9*MAX_DICT_ENTRIES+7, - "dictionary"); - else - dictionary = my_malloc(DICT_ENTRY_BYTE_LENGTH*MAX_DICT_ENTRIES+4, - "dictionary"); - - strings_holding_area - = my_malloc(MAX_STATIC_STRINGS,"static strings holding area"); - low_strings = my_malloc(MAX_LOW_STRINGS,"low (abbreviation) strings"); - - huff_entities = NULL; - hufflist = NULL; - unicode_usage_entries = NULL; - done_compression = FALSE; - compression_table_size = 0; - compressed_offsets = NULL; - - MAX_CHARACTER_SET = 0; - - if (glulx_mode) { - if (compression_switch) { - int ix; - MAX_CHARACTER_SET = 257 + MAX_ABBREVS + MAX_DYNAMIC_STRINGS - + MAX_UNICODE_CHARS; - huff_entities = my_calloc(sizeof(huffentity_t), MAX_CHARACTER_SET*2+1, - "huffman entities"); - hufflist = my_calloc(sizeof(huffentity_t *), MAX_CHARACTER_SET, - "huffman node list"); - unicode_usage_entries = my_calloc(sizeof(unicode_usage_t), - MAX_UNICODE_CHARS, "unicode entity entries"); - for (ix=0; ix