1 /* ------------------------------------------------------------------------- */
2 /* "text" : Text translation, the abbreviations optimiser, the dictionary */
4 /* Part of Inform 6.35 */
5 /* copyright (c) Graham Nelson 1993 - 2021 */
7 /* Inform is free software: you can redistribute it and/or modify */
8 /* it under the terms of the GNU General Public License as published by */
9 /* the Free Software Foundation, either version 3 of the License, or */
10 /* (at your option) any later version. */
12 /* Inform is distributed in the hope that it will be useful, */
13 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
14 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
15 /* GNU General Public License for more details. */
17 /* You should have received a copy of the GNU General Public License */
18 /* along with Inform. If not, see https://gnu.org/licenses/ *
20 /* ------------------------------------------------------------------------- */
24 uchar *low_strings, *low_strings_top; /* Start and next free byte in the low
27 int32 static_strings_extent; /* Number of bytes of static strings
29 memory_block static_strings_area; /* Used if (!temporary_files_switch) to
30 hold the static strings area so far */
32 static uchar *strings_holding_area; /* Area holding translated strings
33 until they are moved into either
34 a temporary file, or the
35 static_strings_area below */
37 char *all_text, *all_text_top; /* Start and next byte free in (large)
38 text buffer holding the entire text
39 of the game, when it is being
41 int abbrevs_lookup_table_made, /* The abbreviations lookup table is
42 constructed when the first non-
43 abbreviation string is translated:
44 this flag is TRUE after that */
45 abbrevs_lookup[256]; /* Once this has been constructed,
46 abbrevs_lookup[n] = the smallest
47 number of any abbreviation beginning
48 with ASCII character n, or -1
49 if none of the abbreviations do */
50 int no_abbreviations; /* No of abbreviations defined so far */
51 uchar *abbreviations_at; /* Memory to hold the text of any
52 abbreviation strings declared */
53 /* ------------------------------------------------------------------------- */
54 /* Glulx string compression storage */
55 /* ------------------------------------------------------------------------- */
57 int no_strings; /* No of strings in static strings
59 int no_dynamic_strings; /* No. of @.. string escapes used
60 (actually, the highest value used
62 int no_unicode_chars; /* Number of distinct Unicode chars
63 used. (Beyond 0xFF.) */
65 static int MAX_CHARACTER_SET; /* Number of possible entities */
66 huffentity_t *huff_entities; /* The list of entities (characters,
67 abbreviations, @.. escapes, and
69 static huffentity_t **hufflist; /* Copy of the list, for sorting */
71 int no_huff_entities; /* The number of entities in the list */
72 int huff_unicode_start; /* Position in the list where Unicode
74 int huff_abbrev_start; /* Position in the list where string
75 abbreviations begin. */
76 int huff_dynam_start; /* Position in the list where @..
78 int huff_entity_root; /* The position in the list of the root
79 entry (when considering the table
82 int done_compression; /* Has the game text been compressed? */
83 int32 compression_table_size; /* Length of the Huffman table, in
85 int32 compression_string_size; /* Length of the compressed string
87 int32 *compressed_offsets; /* The beginning of every string in
88 the game, relative to the beginning
89 of the Huffman table. (So entry 0
90 is equal to compression_table_size)*/
92 #define UNICODE_HASH_BUCKETS (64)
93 unicode_usage_t *unicode_usage_entries;
94 static unicode_usage_t *unicode_usage_hash[UNICODE_HASH_BUCKETS];
96 static int unicode_entity_index(int32 unicode);
98 /* ------------------------------------------------------------------------- */
99 /* Abbreviation arrays */
100 /* ------------------------------------------------------------------------- */
106 /* ------------------------------------------------------------------------- */
108 int32 total_chars_trans, /* Number of ASCII chars of text in */
109 total_bytes_trans, /* Number of bytes of Z-code text out */
110 zchars_trans_in_last_string; /* Number of Z-chars in last string:
111 needed only for abbrev efficiency
112 calculation in "directs.c" */
113 static int32 total_zchars_trans, /* Number of Z-chars of text out
114 (only used to calculate the above) */
115 no_chars_transcribed; /* Number of ASCII chars written to
116 the text transcription area (used
117 for the -r and -u switches) */
119 static int zchars_out_buffer[3], /* During text translation, a buffer of
120 3 Z-chars at a time: when it's full
121 these are written as a 2-byte word */
122 zob_index; /* Index (0 to 2) into it */
124 static unsigned char *text_out_pc; /* The "program counter" during text
125 translation: the next address to
126 write Z-coded text output to */
128 static unsigned char *text_out_limit; /* The upper limit of text_out_pc
129 during text translation */
131 static int text_out_overflow; /* During text translation, becomes
132 true if text_out_pc tries to pass
135 /* ------------------------------------------------------------------------- */
136 /* For variables/arrays used by the dictionary manager, see below */
137 /* ------------------------------------------------------------------------- */
139 /* ------------------------------------------------------------------------- */
140 /* Prepare the abbreviations lookup table (used to speed up abbreviation */
141 /* detection in text translation). We first bubble-sort the abbrevs into */
142 /* alphabetical order (this is necessary for the detection algorithm to */
143 /* to work). Since the table is only prepared once, and for a table */
144 /* of size at most 96, there's no point using an efficient sort algorithm. */
145 /* ------------------------------------------------------------------------- */
147 static void make_abbrevs_lookup(void)
148 { int bubble_sort, j, k, l; char p[MAX_ABBREV_LENGTH]; char *p1, *p2;
150 { bubble_sort = FALSE;
151 for (j=0; j<no_abbreviations; j++)
152 for (k=j+1; k<no_abbreviations; k++)
153 { p1=(char *)abbreviations_at+j*MAX_ABBREV_LENGTH;
154 p2=(char *)abbreviations_at+k*MAX_ABBREV_LENGTH;
156 { strcpy(p,p1); strcpy(p1,p2); strcpy(p2,p);
157 l=abbrev_values[j]; abbrev_values[j]=abbrev_values[k];
159 l=abbrev_quality[j]; abbrev_quality[j]=abbrev_quality[k];
164 } while (bubble_sort);
166 for (j=no_abbreviations-1; j>=0; j--)
167 { p1=(char *)abbreviations_at+j*MAX_ABBREV_LENGTH;
168 abbrevs_lookup[(uchar)p1[0]]=j;
171 abbrevs_lookup_table_made = TRUE;
174 /* ------------------------------------------------------------------------- */
175 /* Search the abbreviations lookup table (a routine which must be fast). */
176 /* The source text to compare is text[i], text[i+1], ... and this routine */
177 /* is only called if text[i] is indeed the first character of at least one */
178 /* abbreviation, "from" begin the least index into the abbreviations table */
179 /* of an abbreviation for which text[i] is the first character. Recall */
180 /* that the abbrevs table is in alphabetical order. */
182 /* The return value is -1 if there is no match. If there is a match, the */
183 /* text to be abbreviated out is over-written by a string of null chars */
184 /* with "ASCII" value 1, and the abbreviation number is returned. */
186 /* In Glulx, we *do not* do this overwriting with 1's. */
187 /* ------------------------------------------------------------------------- */
189 static int try_abbreviations_from(unsigned char *text, int i, int from)
190 { int j, k; uchar *p, c;
192 for (j=from, p=(uchar *)abbreviations_at+from*MAX_ABBREV_LENGTH;
193 (j<no_abbreviations)&&(c==p[0]); j++, p+=MAX_ABBREV_LENGTH)
194 { if (text[i+1]==p[1])
195 { for (k=2; p[k]!=0; k++)
196 if (text[i+k]!=p[k]) goto NotMatched;
198 for (k=0; p[k]!=0; k++) text[i+k]=1;
208 extern void make_abbreviation(char *text)
210 strcpy((char *)abbreviations_at
211 + no_abbreviations*MAX_ABBREV_LENGTH, text);
213 abbrev_values[no_abbreviations] = compile_string(text, STRCTX_ABBREV);
215 /* The quality is the number of Z-chars saved by using this */
216 /* abbreviation: note that it takes 2 Z-chars to print it. */
218 abbrev_quality[no_abbreviations++] = zchars_trans_in_last_string - 2;
221 /* ------------------------------------------------------------------------- */
222 /* The front end routine for text translation. */
223 /* strctx indicates the purpose of the string. This is mostly used for */
224 /* informational output (gametext.txt), but we treat some string contexts */
225 /* specially during compilation. */
226 /* ------------------------------------------------------------------------- */
228 extern int32 compile_string(char *b, int strctx)
229 { int i, j; uchar *c;
231 /* In Z-code, abbreviations go in the low memory pool (0x100). So
232 do strings explicitly defined with the Lowstring directive.
233 (In Glulx, the in_low_memory flag is ignored.) */
234 int in_low_memory = (strctx == STRCTX_ABBREV || strctx == STRCTX_LOWSTRING);
236 if (!glulx_mode && in_low_memory)
237 { j=subtract_pointers(low_strings_top,low_strings);
238 low_strings_top=translate_text(low_strings_top, low_strings+MAX_LOW_STRINGS, b, strctx);
239 if (!low_strings_top)
240 memoryerror("MAX_LOW_STRINGS", MAX_LOW_STRINGS);
244 if (glulx_mode && done_compression)
245 compiler_error("Tried to add a string after compression was done.");
247 c = translate_text(strings_holding_area, strings_holding_area+MAX_STATIC_STRINGS, b, strctx);
249 memoryerror("MAX_STATIC_STRINGS",MAX_STATIC_STRINGS);
251 i = subtract_pointers(c, strings_holding_area);
253 /* Insert null bytes as needed to ensure that the next static string */
254 /* also occurs at an address expressible as a packed address */
258 if (oddeven_packing_switch)
259 textalign = scale_factor*2;
261 textalign = scale_factor;
262 while ((i%textalign)!=0)
264 if (i+2 > MAX_STATIC_STRINGS)
265 memoryerror("MAX_STATIC_STRINGS",MAX_STATIC_STRINGS);
266 i+=2; *c++ = 0; *c++ = 0;
270 j = static_strings_extent;
272 if (temporary_files_switch)
273 for (c=strings_holding_area; c<strings_holding_area+i;
274 c++, static_strings_extent++)
277 for (c=strings_holding_area; c<strings_holding_area+i;
278 c++, static_strings_extent++)
279 write_byte_to_memory_block(&static_strings_area,
280 static_strings_extent, *c);
283 return(j/scale_factor);
286 /* The marker value is a one-based string number. (We reserve zero
287 to mean "not a string at all". */
288 return (++no_strings);
292 /* ------------------------------------------------------------------------- */
293 /* Output a single Z-character into the buffer, and flush it if full */
294 /* ------------------------------------------------------------------------- */
296 static void write_z_char_z(int i)
299 total_zchars_trans++;
300 zchars_out_buffer[zob_index++]=(i%32);
301 if (zob_index!=3) return;
303 j= zchars_out_buffer[0]*0x0400 + zchars_out_buffer[1]*0x0020
304 + zchars_out_buffer[2];
305 if (text_out_pc+2 > text_out_limit) {
306 text_out_overflow = TRUE;
309 text_out_pc[0] = j/256; text_out_pc[1] = j%256; text_out_pc+=2;
310 total_bytes_trans+=2;
313 static void write_zscii(int zsc)
315 int lookup_value, in_alphabet;
322 if (zsc < 0x100) lookup_value = zscii_to_alphabet_grid[zsc];
324 else lookup_value = -1;
326 if (lookup_value >= 0)
327 { alphabet_used[lookup_value] = 'Y';
328 in_alphabet = lookup_value/26;
329 if (in_alphabet==1) write_z_char_z(4); /* SHIFT to A1 */
330 if (in_alphabet==2) write_z_char_z(5); /* SHIFT to A2 */
331 write_z_char_z(lookup_value%26 + 6);
334 { write_z_char_z(5); write_z_char_z(6);
335 write_z_char_z(zsc/32); write_z_char_z(zsc%32);
339 /* ------------------------------------------------------------------------- */
340 /* Finish a Z-coded string, padding out with Z-char 5s if necessary and */
341 /* setting the "end" bit on the final 2-byte word */
342 /* ------------------------------------------------------------------------- */
344 static void end_z_chars(void)
346 zchars_trans_in_last_string=total_zchars_trans-zchars_trans_in_last_string;
347 while (zob_index!=0) write_z_char_z(5);
348 p=(unsigned char *) text_out_pc;
352 /* Glulx handles this much more simply -- compression is done elsewhere. */
353 static void write_z_char_g(int i)
356 if (text_out_pc+1 > text_out_limit) {
357 text_out_overflow = TRUE;
360 total_zchars_trans++;
366 /* ------------------------------------------------------------------------- */
367 /* The main routine "text.c" provides to the rest of Inform: the text */
368 /* translator. p is the address to write output to, s_text the source text */
369 /* and the return value is the next free address to write output to. */
370 /* The return value will not exceed p_limit. If the translation tries to */
371 /* overflow this boundary, the return value will be NULL (and you should */
372 /* display an error). */
373 /* Note that the source text may be corrupted by this routine. */
374 /* ------------------------------------------------------------------------- */
376 extern uchar *translate_text(uchar *p, uchar *p_limit, char *s_text, int strctx)
377 { int i, j, k, in_alphabet, lookup_value;
378 int32 unicode; int zscii;
379 unsigned char *text_in;
381 /* For STRCTX_ABBREV, the string being translated is itself an
382 abbreviation string, so it can't make use of abbreviations. Set
383 the is_abbreviation flag to indicate this.
384 The compiler has historically set this flag for the Lowstring
385 directive as well -- the in_low_memory and is_abbreviation flag were
386 always the same. I am preserving that convention. */
387 int is_abbreviation = (strctx == STRCTX_ABBREV || strctx == STRCTX_LOWSTRING);
390 /* Cast the input and output streams to unsigned char: text_out_pc will
391 advance as bytes of Z-coded text are written, but text_in doesn't */
393 text_in = (unsigned char *) s_text;
394 text_out_pc = (unsigned char *) p;
395 text_out_limit = (unsigned char *) p_limit;
396 text_out_overflow = FALSE;
398 /* Remember the Z-chars total so that later we can subtract to find the
399 number of Z-chars translated on this string */
401 zchars_trans_in_last_string = total_zchars_trans;
403 /* Start with the Z-characters output buffer empty */
407 /* If this is the first text translated since the abbreviations were
408 declared, and if some were declared, then it's time to make the
409 lookup table for abbreviations
411 (Except: we don't if the text being translated is itself
412 the text of an abbreviation currently being defined) */
414 if ((!abbrevs_lookup_table_made) && (no_abbreviations > 0)
415 && (!is_abbreviation))
416 make_abbrevs_lookup();
418 /* If we're storing the whole game text to memory, then add this text */
420 if ((!is_abbreviation) && (store_the_text))
421 { no_chars_transcribed += strlen(s_text)+2;
422 if (no_chars_transcribed >= MAX_TRANSCRIPT_SIZE)
423 memoryerror("MAX_TRANSCRIPT_SIZE", MAX_TRANSCRIPT_SIZE);
424 sprintf(all_text_top, "%s\n\n", s_text);
425 all_text_top += strlen(all_text_top);
428 if (transcript_switch) {
429 /* Omit veneer strings, unless we're using the new transcript format, which includes everything. */
430 if ((!veneer_mode) || TRANSCRIPT_FORMAT == 1) {
433 if (label == STRCTX_GAME)
434 label = STRCTX_VENEER;
435 else if (label == STRCTX_GAMEOPC)
436 label = STRCTX_VENEEROPC;
438 write_to_transcript_file(s_text, label);
444 /* The empty string of Z-text is illegal, since it can't carry an end
445 bit: so we translate an empty string of ASCII text to just the
446 pad character 5. Printing this causes nothing to appear on screen. */
448 if (text_in[0]==0) write_z_char_z(5);
450 /* Loop through the characters of the null-terminated input text: note
451 that if 1 is written over a character in the input text, it is
452 afterwards ignored */
454 for (i=0; text_in[i]!=0; i++)
455 { total_chars_trans++;
457 /* Contract ". " into ". " if double-space-removing switch set:
458 likewise "? " and "! " if the setting is high enough */
460 if ((double_space_setting >= 1)
461 && (text_in[i+1]==' ') && (text_in[i+2]==' '))
462 { if (text_in[i]=='.') text_in[i+2]=1;
463 if (double_space_setting >= 2)
464 { if (text_in[i]=='?') text_in[i+2]=1;
465 if (text_in[i]=='!') text_in[i+2]=1;
469 /* Try abbreviations if the economy switch set */
471 if ((economy_switch) && (!is_abbreviation)
472 && ((k=abbrevs_lookup[text_in[i]])!=-1))
473 { if ((j=try_abbreviations_from(text_in, i, k))!=-1)
474 { /* abbreviations run from MAX_DYNAMIC_STRINGS to 96 */
475 j += MAX_DYNAMIC_STRINGS;
476 write_z_char_z(j/32+1); write_z_char_z(j%32);
480 /* If Unicode switch set, use text_to_unicode to perform UTF-8
482 if (character_set_unicode && (text_in[i] & 0x80))
483 { unicode = text_to_unicode((char *) (text_in+i));
484 zscii = unicode_to_zscii(unicode);
485 if (zscii != 5) write_zscii(zscii);
487 { unicode_char_error(
488 "Character can only be used if declared in \
489 advance as part of 'Zcharacter table':", unicode);
491 i += textual_form_length - 1;
495 /* '@' is the escape character in Inform string notation: the various
499 @@decimalnumber : write this ZSCII char (0 to 1023)
500 @twodigits : write the abbreviation string with this
504 @accentcode : this accented character: e.g.,
505 for @'e write an E-acute
506 @{...} : this Unicode char (in hex) */
509 { if (text_in[i+1]=='@')
513 i+=2; j=atoi((char *) (text_in+i));
515 { /* Prevent ~ and ^ from being translated to double-quote
516 and new-line, as they ordinarily would be */
518 case 94: write_z_char_z(5); write_z_char_z(6);
519 write_z_char_z(94/32); write_z_char_z(94%32);
521 case 126: write_z_char_z(5); write_z_char_z(6);
522 write_z_char_z(126/32); write_z_char_z(126%32);
525 default: write_zscii(j); break;
527 while (isdigit(text_in[i])) i++; i--;
529 else if (isdigit(text_in[i+1])!=0)
532 /* @.. (dynamic string) */
534 d1 = character_digit_value[text_in[i+1]];
535 d2 = character_digit_value[text_in[i+2]];
536 if ((d1 == 127) || (d1 >= 10) || (d2 == 127) || (d2 >= 10))
537 error("'@..' must have two decimal digits");
541 if (!glulx_mode && j >= 96)
542 { error("Z-machine dynamic strings are limited to 96");
545 if (j >= MAX_DYNAMIC_STRINGS) {
546 memoryerror("MAX_DYNAMIC_STRINGS", MAX_DYNAMIC_STRINGS);
550 write_z_char_z(j/32+1); write_z_char_z(j%32);
555 /* A string escape specifying an unusual character */
557 unicode = text_to_unicode((char *) (text_in+i));
558 zscii = unicode_to_zscii(unicode);
559 if (zscii != 5) write_zscii(zscii);
561 { unicode_char_error(
562 "Character can only be used if declared in \
563 advance as part of 'Zcharacter table':", unicode);
565 i += textual_form_length - 1;
569 { /* Skip a character which has been over-written with the null
570 value 1 earlier on */
573 { if (text_in[i]==' ') write_z_char_z(0);
575 { j = (int) text_in[i];
576 lookup_value = iso_to_alphabet_grid[j];
577 if (lookup_value < 0)
578 { /* The character isn't in the standard alphabets, so
579 we have to use the ZSCII 4-Z-char sequence */
581 if (lookup_value == -5)
582 { /* Character isn't in the ZSCII set at all */
584 unicode = iso_to_unicode(j);
586 "Character can only be used if declared in \
587 advance as part of 'Zcharacter table':", unicode);
588 write_zscii(0x200 + unicode/0x100);
589 write_zscii(0x300 + unicode%0x100);
591 else write_zscii(-lookup_value);
594 { /* The character is in one of the standard alphabets:
595 write a SHIFT to temporarily change alphabet if
596 it isn't in alphabet 0, then write the Z-char */
598 alphabet_used[lookup_value] = 'Y';
599 in_alphabet = lookup_value/26;
600 if (in_alphabet==1) write_z_char_z(4); /* SHIFT to A1 */
601 if (in_alphabet==2) write_z_char_z(5); /* SHIFT to A2 */
602 write_z_char_z(lookup_value%26 + 6);
609 /* Flush the Z-characters output buffer and set the "end" bit */
616 /* The text storage here is, of course, temporary. Compression
617 will occur when we're finished compiling, so that all the
618 clever Huffman stuff will work.
619 In the stored text, we use "@@" to indicate @,
620 "@0" to indicate a zero byte,
621 "@ANNNN" to indicate an abbreviation,
622 "@DNNNN" to indicate a dynamic string thing.
623 "@UNNNN" to indicate a four-byte Unicode value (0x100 or higher).
624 (NNNN is a four-digit hex number using the letters A-P... an
625 ugly representation but a convenient one.)
628 for (i=0; text_in[i]!=0; i++) {
630 /* Contract ". " into ". " if double-space-removing switch set:
631 likewise "? " and "! " if the setting is high enough. */
632 if ((double_space_setting >= 1)
633 && (text_in[i+1]==' ') && (text_in[i+2]==' ')) {
635 || (double_space_setting >= 2
636 && (text_in[i]=='?' || text_in[i]=='!'))) {
637 text_in[i+1] = text_in[i];
644 /* Try abbreviations if the economy switch set. We have to be in
645 compression mode too, since the abbreviation mechanism is part
646 of string decompression. */
648 if ((economy_switch) && (compression_switch) && (!is_abbreviation)
649 && ((k=abbrevs_lookup[text_in[i]])!=-1)
650 && ((j=try_abbreviations_from(text_in, i, k)) != -1)) {
651 char *cx = (char *)abbreviations_at+j*MAX_ABBREV_LENGTH;
655 write_z_char_g('A' + ((j >>12) & 0x0F));
656 write_z_char_g('A' + ((j >> 8) & 0x0F));
657 write_z_char_g('A' + ((j >> 4) & 0x0F));
658 write_z_char_g('A' + ((j ) & 0x0F));
660 else if (text_in[i] == '@') {
661 if (text_in[i+1]=='@') {
663 i+=2; j=atoi((char *) (text_in+i));
664 if (j == '@' || j == '\0') {
668 if (!compression_switch)
669 warning("Ascii @@0 will prematurely terminate non-compressed \
674 while (isdigit(text_in[i])) i++; i--;
676 else if (isdigit(text_in[i+1])) {
678 d1 = character_digit_value[text_in[i+1]];
679 d2 = character_digit_value[text_in[i+2]];
680 if ((d1 == 127) || (d1 >= 10) || (d2 == 127) || (d2 >= 10)) {
681 error("'@..' must have two decimal digits");
684 if (!compression_switch)
685 warning("'@..' print variable will not work in non-compressed \
686 string; substituting ' '.");
689 if (j >= MAX_DYNAMIC_STRINGS) {
690 memoryerror("MAX_DYNAMIC_STRINGS", MAX_DYNAMIC_STRINGS);
693 if (j+1 >= no_dynamic_strings)
694 no_dynamic_strings = j+1;
697 write_z_char_g('A' + ((j >>12) & 0x0F));
698 write_z_char_g('A' + ((j >> 8) & 0x0F));
699 write_z_char_g('A' + ((j >> 4) & 0x0F));
700 write_z_char_g('A' + ((j ) & 0x0F));
704 unicode = text_to_unicode((char *) (text_in+i));
705 i += textual_form_length - 1;
706 if (unicode == '@' || unicode == '\0') {
708 write_z_char_g(unicode ? '@' : '0');
710 else if (unicode >= 0 && unicode < 256) {
711 write_z_char_g(unicode);
714 if (!compression_switch) {
715 warning("Unicode characters will not work in non-compressed \
716 string; substituting '?'.");
720 j = unicode_entity_index(unicode);
723 write_z_char_g('A' + ((j >>12) & 0x0F));
724 write_z_char_g('A' + ((j >> 8) & 0x0F));
725 write_z_char_g('A' + ((j >> 4) & 0x0F));
726 write_z_char_g('A' + ((j ) & 0x0F));
731 else if (text_in[i] == '^')
732 write_z_char_g(0x0A);
733 else if (text_in[i] == '~')
735 else if (character_set_unicode) {
736 if (text_in[i] & 0x80) {
737 unicode = text_to_unicode((char *) (text_in+i));
738 i += textual_form_length - 1;
739 if (unicode >= 0 && unicode < 256) {
740 write_z_char_g(unicode);
743 if (!compression_switch) {
744 warning("Unicode characters will not work in non-compressed \
745 string; substituting '?'.");
749 j = unicode_entity_index(unicode);
752 write_z_char_g('A' + ((j >>12) & 0x0F));
753 write_z_char_g('A' + ((j >> 8) & 0x0F));
754 write_z_char_g('A' + ((j >> 4) & 0x0F));
755 write_z_char_g('A' + ((j ) & 0x0F));
760 write_z_char_g(text_in[i]);
764 unicode = iso_to_unicode_grid[text_in[i]];
765 if (unicode >= 0 && unicode < 256) {
766 write_z_char_g(unicode);
769 if (!compression_switch) {
770 warning("Unicode characters will not work in non-compressed \
771 string; substituting '?'.");
775 j = unicode_entity_index(unicode);
778 write_z_char_g('A' + ((j >>12) & 0x0F));
779 write_z_char_g('A' + ((j >> 8) & 0x0F));
780 write_z_char_g('A' + ((j >> 4) & 0x0F));
781 write_z_char_g('A' + ((j ) & 0x0F));
790 if (text_out_overflow)
793 return((uchar *) text_out_pc);
796 static int unicode_entity_index(int32 unicode)
798 unicode_usage_t *uptr;
800 int buck = unicode % UNICODE_HASH_BUCKETS;
802 for (uptr = unicode_usage_hash[buck]; uptr; uptr=uptr->next) {
803 if (uptr->ch == unicode)
807 j = (uptr - unicode_usage_entries);
810 if (no_unicode_chars >= MAX_UNICODE_CHARS) {
811 memoryerror("MAX_UNICODE_CHARS", MAX_UNICODE_CHARS);
815 j = no_unicode_chars;
817 uptr = unicode_usage_entries + j;
819 uptr->next = unicode_usage_hash[buck];
820 unicode_usage_hash[buck] = uptr;
827 /* ------------------------------------------------------------------------- */
828 /* Glulx compression code */
829 /* ------------------------------------------------------------------------- */
832 static void compress_makebits(int entnum, int depth, int prevbit,
833 huffbitlist_t *bits);
835 /* The compressor. This uses the usual Huffman compression algorithm. */
836 void compress_game_text()
838 int entities=0, branchstart, branches;
846 if (compression_switch) {
848 /* How many entities have we currently got? Well, 256 plus the
849 string-terminator plus Unicode chars plus abbrevations plus
852 huff_unicode_start = entities;
853 entities += no_unicode_chars;
854 huff_abbrev_start = entities;
856 entities += no_abbreviations;
857 huff_dynam_start = entities;
858 entities += no_dynamic_strings;
860 if (entities > MAX_CHARACTER_SET)
861 memoryerror("MAX_CHARACTER_SET",MAX_CHARACTER_SET);
864 for (jx=0; jx<256; jx++) {
865 huff_entities[jx].type = 2;
866 huff_entities[jx].count = 0;
867 huff_entities[jx].u.ch = jx;
870 huff_entities[256].type = 1;
871 huff_entities[256].count = 0;
872 for (jx=0; jx<no_unicode_chars; jx++) {
873 huff_entities[huff_unicode_start+jx].type = 4;
874 huff_entities[huff_unicode_start+jx].count = 0;
875 huff_entities[huff_unicode_start+jx].u.val = jx;
877 if (economy_switch) {
878 for (jx=0; jx<no_abbreviations; jx++) {
879 huff_entities[huff_abbrev_start+jx].type = 3;
880 huff_entities[huff_abbrev_start+jx].count = 0;
881 huff_entities[huff_abbrev_start+jx].u.val = jx;
884 for (jx=0; jx<no_dynamic_strings; jx++) {
885 huff_entities[huff_dynam_start+jx].type = 9;
886 huff_entities[huff_dynam_start+jx].count = 0;
887 huff_entities[huff_dynam_start+jx].u.val = jx;
891 /* No compression; use defaults that will make it easy to check
893 no_huff_entities = 257;
894 huff_unicode_start = 257;
895 huff_abbrev_start = 257;
896 huff_dynam_start = 257+MAX_ABBREVS;
897 compression_table_size = 0;
900 if (temporary_files_switch) {
902 Temp1_fp=fopen(Temp1_Name,"rb");
904 fatalerror("I/O failure: couldn't reopen temporary file 1");
907 if (compression_switch) {
909 for (lx=0, ix=0; lx<no_strings; lx++) {
910 int escapelen=0, escapetype=0;
914 if (temporary_files_switch)
915 ch = fgetc(Temp1_fp);
917 ch = read_byte_from_memory_block(&static_strings_area, ix);
919 if (ix > static_strings_extent || ch < 0)
920 compiler_error("Read too much not-yet-compressed text.");
921 if (escapelen == -1) {
926 else if (ch == '0') {
929 else if (ch == 'A' || ch == 'D' || ch == 'U') {
936 compiler_error("Strange @ escape in processed text.");
939 else if (escapelen) {
940 escapeval = (escapeval << 4) | ((ch-'A') & 0x0F);
942 if (escapelen == 0) {
943 if (escapetype == 'A') {
944 ch = huff_abbrev_start+escapeval;
946 else if (escapetype == 'D') {
947 ch = huff_dynam_start+escapeval;
949 else if (escapetype == 'U') {
950 ch = huff_unicode_start+escapeval;
953 compiler_error("Strange @ escape in processed text.");
969 huff_entities[ch].count++;
974 for (jx=0; jx<entities; jx++) {
975 if (huff_entities[jx].count) {
976 hufflist[numlive] = &(huff_entities[jx]);
981 branchstart = entities;
984 while (numlive > 1) {
986 int best1num, best2num;
989 if (hufflist[0]->count < hufflist[1]->count) {
998 best1num = hufflist[best1]->count;
999 best2num = hufflist[best2]->count;
1001 for (jx=2; jx<numlive; jx++) {
1002 if (hufflist[jx]->count < best1num) {
1004 best2num = best1num;
1006 best1num = hufflist[best1]->count;
1008 else if (hufflist[jx]->count < best2num) {
1010 best2num = hufflist[best2]->count;
1014 bran = &(huff_entities[branchstart+branches]);
1017 bran->count = hufflist[best1]->count + hufflist[best2]->count;
1018 bran->u.branch[0] = (hufflist[best1] - huff_entities);
1019 bran->u.branch[1] = (hufflist[best2] - huff_entities);
1020 hufflist[best1] = bran;
1021 if (best2 < numlive-1) {
1022 memmove(&(hufflist[best2]), &(hufflist[best2+1]),
1023 ((numlive-1) - best2) * sizeof(huffentity_t *));
1028 huff_entity_root = (hufflist[0] - huff_entities);
1030 for (ix=0; ix<MAXHUFFBYTES; ix++)
1032 compression_table_size = 12;
1034 no_huff_entities = 0; /* compress_makebits will total this up */
1035 compress_makebits(huff_entity_root, 0, -1, &bits);
1038 /* Now, sadly, we have to compute the size of the string section,
1039 without actually doing the compression. */
1040 compression_string_size = 0;
1042 if (temporary_files_switch) {
1043 fseek(Temp1_fp, 0, SEEK_SET);
1046 if (no_strings >= MAX_NUM_STATIC_STRINGS)
1047 memoryerror("MAX_NUM_STATIC_STRINGS", MAX_NUM_STATIC_STRINGS);
1049 for (lx=0, ix=0; lx<no_strings; lx++) {
1050 int escapelen=0, escapetype=0;
1054 compressed_offsets[lx] = compression_table_size + compression_string_size;
1055 compression_string_size++; /* for the type byte */
1057 if (temporary_files_switch)
1058 ch = fgetc(Temp1_fp);
1060 ch = read_byte_from_memory_block(&static_strings_area, ix);
1062 if (ix > static_strings_extent || ch < 0)
1063 compiler_error("Read too much not-yet-compressed text.");
1064 if (escapelen == -1) {
1069 else if (ch == '0') {
1072 else if (ch == 'A' || ch == 'D' || ch == 'U') {
1079 compiler_error("Strange @ escape in processed text.");
1082 else if (escapelen) {
1083 escapeval = (escapeval << 4) | ((ch-'A') & 0x0F);
1085 if (escapelen == 0) {
1086 if (escapetype == 'A') {
1087 ch = huff_abbrev_start+escapeval;
1089 else if (escapetype == 'D') {
1090 ch = huff_dynam_start+escapeval;
1092 else if (escapetype == 'U') {
1093 ch = huff_unicode_start+escapeval;
1096 compiler_error("Strange @ escape in processed text.");
1113 if (compression_switch) {
1114 jx += huff_entities[ch].depth;
1115 compression_string_size += (jx/8);
1119 if (ch >= huff_dynam_start) {
1120 compression_string_size += 3;
1122 else if (ch >= huff_unicode_start) {
1123 compiler_error("Abbreviation/Unicode in non-compressed string \
1124 should be impossible.");
1127 compression_string_size += 1;
1130 if (compression_switch && jx)
1131 compression_string_size++;
1134 done_compression = TRUE;
1137 static void compress_makebits(int entnum, int depth, int prevbit,
1138 huffbitlist_t *bits)
1140 huffentity_t *ent = &(huff_entities[entnum]);
1144 ent->addr = compression_table_size;
1149 ent->bits.b[(depth-1) / 8] |= (1 << ((depth-1) % 8));
1152 switch (ent->type) {
1154 compression_table_size += 9;
1155 compress_makebits(ent->u.branch[0], depth+1, 0, &ent->bits);
1156 compress_makebits(ent->u.branch[1], depth+1, 1, &ent->bits);
1159 compression_table_size += 1;
1162 compression_table_size += 2;
1165 cx = (char *)abbreviations_at + ent->u.val*MAX_ABBREV_LENGTH;
1166 compression_table_size += (1 + 1 + strlen(cx));
1170 compression_table_size += 5;
1175 /* ------------------------------------------------------------------------- */
1176 /* The abbreviations optimiser */
1178 /* This is a very complex, memory and time expensive algorithm to */
1179 /* approximately solve the problem of which abbreviation strings would */
1180 /* minimise the total number of Z-chars to which the game text translates. */
1181 /* It is in some ways a quite separate program but remains inside Inform */
1182 /* for compatibility with previous releases. */
1183 /* ------------------------------------------------------------------------- */
1185 typedef struct tlb_s
1187 int32 intab, occurrences;
1190 static int32 no_occs;
1192 static int32 *grandtable;
1193 static int32 *grandflags;
1194 typedef struct optab_s
1199 char text[MAX_ABBREV_LENGTH];
1201 static optab *bestyet, *bestyet2;
1205 static char *sub_buffer;
1207 static void optimise_pass(void)
1208 { int32 i; int t1, t2;
1209 int32 j, j2, k, nl, matches, noflags, score, min, minat=0, x, scrabble, c;
1210 for (i=0; i<256; i++) bestyet[i].length=0;
1211 for (i=0; i<no_occs; i++)
1212 { if ((*(tlbtab[i].text)!=(int) '\n')&&(tlbtab[i].occurrences!=0))
1215 if (i%((**g_pm_hndl).linespercheck) == 0)
1216 { ProcessEvents (&g_proc);
1220 my_free(&all_text,"transcription text");
1221 longjmp (g_fallback, 1);
1225 printf("Pass %d, %4ld/%ld '%s' (%ld occurrences) ",
1226 pass_no, (long int) i, (long int) no_occs, tlbtab[i].text,
1227 (long int) tlbtab[i].occurrences);
1229 for (j=0; j<tlbtab[i].occurrences; j++)
1230 { for (j2=0; j2<tlbtab[i].occurrences; j2++) grandflags[j2]=1;
1231 nl=2; noflags=tlbtab[i].occurrences;
1232 while ((noflags>=2)&&(nl<=62))
1234 for (j2=0; j2<nl; j2++)
1235 if (all_text[grandtable[tlbtab[i].intab+j]+j2]=='\n')
1238 for (j2=j; j2<tlbtab[i].occurrences; j2++)
1239 { if (grandflags[j2]==1)
1240 { x=grandtable[tlbtab[i].intab+j2]
1241 - grandtable[tlbtab[i].intab+j];
1242 if (((x>-nl)&&(x<nl))
1243 || (memcmp(all_text+grandtable[tlbtab[i].intab+j],
1244 all_text+grandtable[tlbtab[i].intab+j2],
1246 { grandflags[j2]=0; noflags--; }
1251 for (k=0; k<nl; k++)
1253 c=all_text[grandtable[tlbtab[i].intab+j+k]];
1255 { if (iso_to_alphabet_grid[c]<0)
1258 if (iso_to_alphabet_grid[c]>=26)
1262 score=(matches-1)*(scrabble-2);
1264 for (j2=0; j2<256; j2++)
1265 { if ((nl==bestyet[j2].length)
1266 && (memcmp(all_text+bestyet[j2].location,
1267 all_text+grandtable[tlbtab[i].intab+j],
1269 { j2=256; min=score; }
1271 { if (bestyet[j2].score<min)
1272 { min=bestyet[j2].score; minat=j2;
1277 { bestyet[minat].score=score;
1278 bestyet[minat].length=nl;
1279 bestyet[minat].location=grandtable[tlbtab[i].intab+j];
1280 bestyet[minat].popularity=matches;
1281 for (j2=0; j2<nl; j2++) sub_buffer[j2]=
1282 all_text[bestyet[minat].location+j2];
1288 t2=((int) time(0)) - t1;
1289 printf(" (%d seconds)\n",t2);
1294 static int any_overlap(char *s1, char *s2)
1295 { int a, b, i, j, flag;
1296 a=strlen(s1); b=strlen(s2);
1297 for (i=1-b; i<a; i++)
1300 if ((0<=i+j)&&(i+j<=a-1))
1301 if (s1[i+j]!=s2[j]) flag=1;
1302 if (flag==0) return(1);
1307 #define MAX_TLBS 8000
1309 extern void optimise_abbreviations(void)
1310 { int32 i, j, t, max=0, MAX_GTABLE;
1311 int32 j2, selected, available, maxat=0, nl;
1314 printf("Beginning calculation of optimal abbreviations...\n");
1317 tlbtab=my_calloc(sizeof(tlb), MAX_TLBS, "tlb table"); no_occs=0;
1318 sub_buffer=my_calloc(sizeof(char), 4000, "sub_buffer");
1319 for (i=0; i<MAX_TLBS; i++) tlbtab[i].occurrences=0;
1321 bestyet=my_calloc(sizeof(optab), 256, "bestyet");
1322 bestyet2=my_calloc(sizeof(optab), 64, "bestyet2");
1324 bestyet2[0].text[0]='.';
1325 bestyet2[0].text[1]=' ';
1326 bestyet2[0].text[2]=0;
1328 bestyet2[1].text[0]=',';
1329 bestyet2[1].text[1]=' ';
1330 bestyet2[1].text[2]=0;
1332 for (i=0; all_text+i<all_text_top; i++)
1334 if ((all_text[i]=='.') && (all_text[i+1]==' ') && (all_text[i+2]==' '))
1335 { all_text[i]='\n'; all_text[i+1]='\n'; all_text[i+2]='\n';
1336 bestyet2[0].popularity++;
1339 if ((all_text[i]=='.') && (all_text[i+1]==' '))
1340 { all_text[i]='\n'; all_text[i+1]='\n';
1341 bestyet2[0].popularity++;
1344 if ((all_text[i]==',') && (all_text[i+1]==' '))
1345 { all_text[i]='\n'; all_text[i+1]='\n';
1346 bestyet2[1].popularity++;
1350 MAX_GTABLE=subtract_pointers(all_text_top,all_text)+1;
1351 grandtable=my_calloc(4*sizeof(int32), MAX_GTABLE/4, "grandtable");
1353 for (i=0, t=0; all_text+i<all_text_top; i++)
1354 { test.text[0]=all_text[i];
1355 test.text[1]=all_text[i+1];
1356 test.text[2]=all_text[i+2];
1358 if ((test.text[0]=='\n')||(test.text[1]=='\n')||(test.text[2]=='\n'))
1360 for (j=0; j<no_occs; j++)
1361 if (strcmp(test.text,tlbtab[j].text)==0)
1364 for (j=i+3; all_text+j<all_text_top; j++)
1367 if (j%((**g_pm_hndl).linespercheck) == 0)
1368 { ProcessEvents (&g_proc);
1372 my_free(&all_text,"transcription text");
1373 longjmp (g_fallback, 1);
1377 if ((all_text[i]==all_text[j])
1378 && (all_text[i+1]==all_text[j+1])
1379 && (all_text[i+2]==all_text[j+2]))
1380 { grandtable[t+test.occurrences]=j;
1382 if (t+test.occurrences==MAX_GTABLE)
1383 { printf("All %ld cross-references used\n",
1384 (long int) MAX_GTABLE);
1389 if (test.occurrences>=2)
1390 { tlbtab[no_occs]=test;
1391 tlbtab[no_occs].intab=t; t+=tlbtab[no_occs].occurrences;
1392 if (max<tlbtab[no_occs].occurrences)
1393 max=tlbtab[no_occs].occurrences;
1395 if (no_occs==MAX_TLBS)
1396 { printf("All %d three-letter-blocks used\n",
1405 grandflags=my_calloc(sizeof(int), max, "grandflags");
1408 printf("Cross-reference table (%ld entries) built...\n",
1409 (long int) no_occs);
1410 /* for (i=0; i<no_occs; i++)
1411 printf("%4d %4d '%s' %d\n",i,tlbtab[i].intab,tlbtab[i].text,
1412 tlbtab[i].occurrences);
1415 for (i=0; i<64; i++) bestyet2[i].length=0; selected=2;
1417 while ((available>0)&&(selected<64))
1418 { printf("Pass %d\n", ++pass_no);
1422 for (i=0; i<256; i++)
1423 if (bestyet[i].score!=0)
1425 nl=bestyet[i].length;
1426 for (j2=0; j2<nl; j2++) bestyet[i].text[j2]=
1427 all_text[bestyet[i].location+j2];
1428 bestyet[i].text[nl]=0;
1431 /* printf("End of pass results:\n");
1432 printf("\nno score freq string\n");
1433 for (i=0; i<256; i++)
1434 if (bestyet[i].score>0)
1435 printf("%02d: %4d %4d '%s'\n", i, bestyet[i].score,
1436 bestyet[i].popularity, bestyet[i].text);
1441 for (i=0; i<256; i++)
1442 if (max<bestyet[i].score)
1443 { max=bestyet[i].score;
1448 { bestyet2[selected++]=bestyet[maxat];
1451 "Selection %2ld: '%s' (repeated %ld times, scoring %ld)\n",
1452 (long int) selected,bestyet[maxat].text,
1453 (long int) bestyet[maxat].popularity,
1454 (long int) bestyet[maxat].score);
1456 test.text[0]=bestyet[maxat].text[0];
1457 test.text[1]=bestyet[maxat].text[1];
1458 test.text[2]=bestyet[maxat].text[2];
1461 for (i=0; i<no_occs; i++)
1462 if (strcmp(test.text,tlbtab[i].text)==0)
1465 for (j=0; j<tlbtab[i].occurrences; j++)
1466 { if (memcmp(bestyet[maxat].text,
1467 all_text+grandtable[tlbtab[i].intab+j],
1468 bestyet[maxat].length)==0)
1469 { for (j2=0; j2<bestyet[maxat].length; j2++)
1470 all_text[grandtable[tlbtab[i].intab+j]+j2]='\n';
1474 for (i=0; i<256; i++)
1475 if ((bestyet[i].score>0)&&
1476 (any_overlap(bestyet[maxat].text,bestyet[i].text)==1))
1477 { bestyet[i].score=0;
1478 /* printf("Discarding '%s' as overlapping\n",
1479 bestyet[i].text); */
1482 } while ((max>0)&&(available>0)&&(selected<64));
1485 printf("\nChosen abbreviations (in Inform syntax):\n\n");
1486 for (i=0; i<selected; i++)
1487 printf("Abbreviate \"%s\";\n", bestyet2[i].text);
1492 /* ------------------------------------------------------------------------- */
1493 /* The dictionary manager begins here. */
1495 /* Speed is extremely important in these algorithms. If a linear-time */
1496 /* routine were used to search the dictionary words so far built up, then */
1497 /* Inform would crawl. */
1499 /* Instead, the dictionary is stored as a binary tree, which is kept */
1500 /* balanced with the red-black algorithm. */
1501 /* ------------------------------------------------------------------------- */
1502 /* A dictionary table similar to the Z-machine format is kept: there is a */
1503 /* 7-byte header (left blank here to be filled in at the */
1504 /* construct_storyfile() stage in "tables.c") and then a sequence of */
1505 /* records, one per word, in the form */
1507 /* <Z-coded text> <flags> <verbnumber> <adjectivenumber> */
1508 /* 4 or 6 bytes byte byte byte */
1510 /* For Glulx, the form is instead: (But see below about Unicode-valued */
1511 /* dictionaries and my heinie.) */
1513 /* <tag> <plain text> <flags> <verbnumber> <adjectivenumber> */
1514 /* $60 DICT_WORD_SIZE short short short */
1516 /* These records are stored in "accession order" (i.e. in order of their */
1517 /* first being received by these routines) and only alphabetically sorted */
1518 /* by construct_storyfile() (using the array below). */
1519 /* ------------------------------------------------------------------------- */
1521 /* Further notes about the data fields... */
1522 /* The flags are currently: */
1523 /* bit 0: word is used as a verb (in verb grammar) */
1524 /* bit 1: word is used as a meta verb */
1525 /* bit 2: word is plural (set by '//p') */
1526 /* bit 3: word is used as a preposition (in verb grammar) */
1527 /* bit 6: set for all verbs, but not used by the parser? */
1528 /* bit 7: word is used as a noun (set for every word that appears in */
1529 /* code or in an object property) */
1531 /* In grammar version 2, the third field (adjectivenumber) is unused (and */
1534 /* The compiler generates special constants #dict_par1, #dict_par2, */
1535 /* #dict_par3 to refer to the byte offsets of the three fields. In */
1536 /* Z-code v3, these are 4/5/6; in v4+, they are 6/7/8. In Glulx, they */
1537 /* are $DICT_WORD_SIZE+2/4/6, referring to the *low* bytes of the three */
1538 /* fields. (The high bytes are $DICT_WORD_SIZE+1/3/5.) */
1539 /* ------------------------------------------------------------------------- */
1541 uchar *dictionary, /* (These two pointers are externally
1542 used only in "tables.c" when
1543 building the story-file) */
1544 *dictionary_top; /* Pointer to next free record */
1546 int dict_entries; /* Total number of records entered */
1548 /* ------------------------------------------------------------------------- */
1549 /* dict_word is a typedef for a struct of 6 unsigned chars (defined in */
1550 /* "header.h"): it holds the (4 or) 6 bytes of Z-coded text of a word. */
1551 /* Usefully, because the PAD character 5 is < all alphabetic characters, */
1552 /* alphabetic order corresponds to numeric order. For this reason, the */
1553 /* dict_word is called the "sort code" of the original text word. */
1555 /* ###- In modifying the compiler, I've found it easier to discard the */
1556 /* typedef, and operate directly on uchar arrays of length DICT_WORD_SIZE. */
1557 /* In Z-code, DICT_WORD_SIZE will be 6, so the Z-code compiler will work */
1558 /* as before. In Glulx, it can be any value up to MAX_DICT_WORD_SIZE. */
1559 /* (That limit is defined as 40 in the header; it exists only for a few */
1560 /* static buffers, and can be increased without using significant memory.) */
1562 /* ###- Well, that certainly bit me on the butt, didn't it. In further */
1563 /* modifying the compiler to generate a Unicode dictionary, I have to */
1564 /* store four-byte values in the uchar array. This is handled by making */
1565 /* the array size DICT_WORD_BYTES (which is DICT_WORD_SIZE*DICT_CHAR_SIZE).*/
1566 /* Then we store the 32-bit character value big-endian. This lets us */
1567 /* continue to compare arrays bytewise, which is a nice simplification. */
1568 /* ------------------------------------------------------------------------- */
1570 extern int compare_sorts(uchar *d1, uchar *d2)
1572 for (i=0; i<DICT_WORD_BYTES; i++)
1573 if (d1[i]!=d2[i]) return((int)(d1[i]) - (int)(d2[i]));
1574 /* (since memcmp(d1, d2, DICT_WORD_BYTES); runs into a bug on some Unix
1579 extern void copy_sorts(uchar *d1, uchar *d2)
1581 for (i=0; i<DICT_WORD_BYTES; i++)
1585 static uchar prepared_sort[MAX_DICT_WORD_BYTES]; /* Holds the sort code
1588 static int number_and_case;
1590 /* Also used by verbs.c */
1591 static void dictionary_prepare_z(char *dword, uchar *optresult)
1592 { int i, j, k, k2, wd[13]; int32 tot;
1594 /* A rapid text translation algorithm using only the simplified rules
1595 applying to the text of dictionary entries: first produce a sequence
1596 of 6 (v3) or 9 (v4+) Z-characters */
1598 int dictsize = (version_number==3) ? 6 : 9;
1600 number_and_case = 0;
1602 for (i=0, j=0; dword[j]!=0; i++, j++)
1603 { if ((dword[j] == '/') && (dword[j+1] == '/'))
1604 { for (j+=2; dword[j] != 0; j++)
1606 { case 'p': number_and_case |= 4; break;
1608 error_named("Expected 'p' after '//' \
1609 to give number of dictionary word", dword);
1615 if (i>=dictsize) break;
1619 warning_named("Obsolete usage: use the ^ character for the \
1620 apostrophe in", dword);
1621 if (k==(int) '^') k=(int) '\'';
1624 if (k==(int) '@' || (character_set_unicode && (k & 0x80)))
1625 { int unicode = text_to_unicode(dword+j);
1626 if ((unicode < 128) && isupper(unicode)) unicode = tolower(unicode);
1627 k = unicode_to_zscii(unicode);
1628 j += textual_form_length - 1;
1629 if ((k == 5) || (k >= 0x100))
1630 { unicode_char_error(
1631 "Character can be printed but not input:", unicode);
1634 k2 = zscii_to_alphabet_grid[(uchar) k];
1637 { if (isupper(k)) k = tolower(k);
1638 k2 = iso_to_alphabet_grid[(uchar) k];
1642 { if ((k2 == -5) || (k2 <= -0x100))
1643 char_error("Character can be printed but not input:", k);
1645 { /* Use 4 more Z-chars to encode a ZSCII escape sequence */
1647 wd[i++] = 5; wd[i++] = 6;
1649 wd[i++] = k2/32; wd[i] = k2%32;
1653 { alphabet_used[k2] = 'Y';
1655 wd[i++]=3+(k2/26); /* Change alphabet for symbols */
1656 wd[i]=6+(k2%26); /* Write the Z character */
1660 /* Fill up to the end of the dictionary block with PAD characters */
1662 for (; i<9; i++) wd[i]=5;
1664 /* The array of Z-chars is converted to two or three 2-byte blocks */
1666 tot = wd[2] + wd[1]*(1<<5) + wd[0]*(1<<10);
1667 prepared_sort[1]=tot%0x100;
1668 prepared_sort[0]=(tot/0x100)%0x100;
1669 tot = wd[5] + wd[4]*(1<<5) + wd[3]*(1<<10);
1670 prepared_sort[3]=tot%0x100;
1671 prepared_sort[2]=(tot/0x100)%0x100;
1672 if (version_number==3)
1675 tot = wd[8] + wd[7]*(1<<5) + wd[6]*(1<<10);
1676 prepared_sort[5]=tot%0x100;
1677 prepared_sort[4]=(tot/0x100)%0x100;
1679 /* Set the "end bit" on the 2nd (in v3) or the 3rd (v4+) 2-byte block */
1681 if (version_number==3) prepared_sort[2]+=0x80;
1682 else prepared_sort[4]+=0x80;
1684 if (optresult) copy_sorts(optresult, prepared_sort);
1687 /* Also used by verbs.c */
1688 static void dictionary_prepare_g(char *dword, uchar *optresult)
1693 number_and_case = 0;
1695 for (i=0, j=0; (dword[j]!=0); i++, j++) {
1696 if ((dword[j] == '/') && (dword[j+1] == '/')) {
1697 for (j+=2; dword[j] != 0; j++) {
1700 number_and_case |= 4;
1703 error_named("Expected 'p' after '//' \
1704 to give gender or number of dictionary word", dword);
1710 if (i>=DICT_WORD_SIZE) break;
1712 k= ((unsigned char *)dword)[j];
1714 warning_named("Obsolete usage: use the ^ character for the \
1715 apostrophe in", dword);
1718 if (k=='~') /* as in iso_to_alphabet_grid */
1721 if (k=='@' || (character_set_unicode && (k & 0x80))) {
1722 unicode = text_to_unicode(dword+j);
1723 j += textual_form_length - 1;
1726 unicode = iso_to_unicode_grid[k];
1729 if (DICT_CHAR_SIZE != 1 || (unicode >= 0 && unicode < 256)) {
1733 error("The dictionary cannot contain Unicode characters beyond Latin-1. \
1734 Define DICT_CHAR_SIZE=4 for a Unicode-compatible dictionary.");
1738 if (k >= (unsigned)'A' && k <= (unsigned)'Z')
1741 if (DICT_CHAR_SIZE == 1) {
1742 prepared_sort[i] = k;
1745 prepared_sort[4*i] = (k >> 24) & 0xFF;
1746 prepared_sort[4*i+1] = (k >> 16) & 0xFF;
1747 prepared_sort[4*i+2] = (k >> 8) & 0xFF;
1748 prepared_sort[4*i+3] = (k) & 0xFF;
1752 if (DICT_CHAR_SIZE == 1) {
1753 for (; i<DICT_WORD_SIZE; i++)
1754 prepared_sort[i] = 0;
1757 for (; i<DICT_WORD_SIZE; i++) {
1758 prepared_sort[4*i] = 0;
1759 prepared_sort[4*i+1] = 0;
1760 prepared_sort[4*i+2] = 0;
1761 prepared_sort[4*i+3] = 0;
1765 if (optresult) copy_sorts(optresult, prepared_sort);
1768 extern void dictionary_prepare(char *dword, uchar *optresult)
1771 dictionary_prepare_z(dword, optresult);
1773 dictionary_prepare_g(dword, optresult);
1776 /* ------------------------------------------------------------------------- */
1777 /* The arrays below are all concerned with the problem of alphabetically */
1778 /* sorting the dictionary during the compilation pass. */
1779 /* Note that it is not enough simply to apply qsort to the dictionary at */
1780 /* the end of the pass: we need to ensure that no duplicates are ever */
1783 /* dict_sort_codes[n] the sort code of record n: i.e., of the nth */
1784 /* word to be entered into the dictionary, where */
1785 /* n counts upward from 0 */
1786 /* (n is also called the "accession number") */
1788 /* The tree structure encodes an ordering. The special value VACANT means */
1789 /* "no node here": otherwise, node numbers are the same as accession */
1790 /* numbers. At all times, "root" holds the node number of the top of the */
1791 /* tree; each node has up to two branches, such that the subtree of the */
1792 /* left branch is always alphabetically before what's at the node, and */
1793 /* the subtree to the right is always after; and all branches are coloured */
1794 /* either "black" or "red". These colours are used to detect points where */
1795 /* the tree is growing asymmetrically (and therefore becoming inefficient */
1797 /* ------------------------------------------------------------------------- */
1804 typedef struct dict_tree_node_s
1805 { int branch[2]; /* Branch 0 is "left", 1 is "right" */
1806 char colour; /* The colour of the branch to the parent */
1809 static dict_tree_node *dtree;
1811 int *final_dict_order;
1812 static uchar *dict_sort_codes;
1814 static void dictionary_begin_pass(void)
1816 /* Leave room for the 7-byte header (added in "tables.c" much later) */
1817 /* Glulx has a 4-byte header instead. */
1820 dictionary_top=dictionary+7;
1822 dictionary_top=dictionary+4;
1828 static int fdo_count;
1829 static void recursively_sort(int node)
1830 { if (dtree[node].branch[0] != VACANT)
1831 recursively_sort(dtree[node].branch[0]);
1832 final_dict_order[node] = fdo_count++;
1833 if (dtree[node].branch[1] != VACANT)
1834 recursively_sort(dtree[node].branch[1]);
1837 extern void sort_dictionary(void)
1840 { for (i=0; i<dict_entries; i++)
1841 final_dict_order[i] = i;
1846 { fdo_count = 0; recursively_sort(root);
1850 /* ------------------------------------------------------------------------- */
1851 /* If "dword" is in the dictionary, return its accession number plus 1; */
1852 /* If not, return 0. */
1853 /* ------------------------------------------------------------------------- */
1855 static int dictionary_find(char *dword)
1858 dictionary_prepare(dword, NULL);
1860 while (at != VACANT)
1861 { n = compare_sorts(prepared_sort, dict_sort_codes+at*DICT_WORD_BYTES);
1862 if (n==0) return at + 1;
1863 if (n>0) at = dtree[at].branch[1]; else at = dtree[at].branch[0];
1868 /* ------------------------------------------------------------------------- */
1869 /* Add "dword" to the dictionary with (x,y,z) as its data fields; unless */
1870 /* it already exists, in which case OR the data with (x,y,z) */
1872 /* These fields are one byte each in Z-code, two bytes each in Glulx. */
1874 /* Returns: the accession number. */
1875 /* ------------------------------------------------------------------------- */
1877 extern int dictionary_add(char *dword, int x, int y, int z)
1879 int ggfr = 0, gfr = 0, fr = 0, r = 0;
1880 int ggf = VACANT, gf = VACANT, f = VACANT, at = root;
1882 int res=((version_number==3)?4:6);
1884 dictionary_prepare(dword, NULL);
1887 { root = 0; goto CreateEntry;
1891 n = compare_sorts(prepared_sort, dict_sort_codes+at*DICT_WORD_BYTES);
1895 p = dictionary+7 + at*(3+res) + res;
1896 p[0]=(p[0])|x; p[1]=(p[1])|y; p[2]=(p[2])|z;
1897 if (x & 128) p[0] = (p[0])|number_and_case;
1900 p = dictionary+4 + at*DICT_ENTRY_BYTE_LENGTH + DICT_ENTRY_FLAG_POS;
1901 p[0]=(p[0])|(x/256); p[1]=(p[1])|(x%256);
1902 p[2]=(p[2])|(y/256); p[3]=(p[3])|(y%256);
1903 p[4]=(p[4])|(z/256); p[5]=(p[5])|(z%256);
1904 if (x & 128) p[1] = (p[1]) | number_and_case;
1908 if (n>0) r=1; else r=0;
1910 a = dtree[at].branch[0]; b = dtree[at].branch[1];
1911 if ((a != VACANT) && (dtree[a].colour == RED) &&
1912 (b != VACANT) && (dtree[b].colour == RED))
1913 { dtree[a].colour = BLACK;
1914 dtree[b].colour = BLACK;
1916 dtree[at].colour = RED;
1918 /* A tree rotation may be needed to avoid two red links in a row:
1920 ggf (or else gf is root) ggf (or f is root)
1923 / \(red) / \ (both red)
1929 In effect we rehang the "gf" subtree from "f".
1930 See the Technical Manual for further details.
1933 if ((f != VACANT) && (gf != VACANT) && (dtree[f].colour == RED))
1936 { if (ggf == VACANT) root = f; else dtree[ggf].branch[ggfr] = f;
1937 dtree[gf].branch[gfr] = dtree[f].branch[1-fr];
1938 dtree[f].branch[1-fr] = gf;
1939 dtree[f].colour = BLACK;
1940 dtree[gf].colour = RED;
1941 gf = ggf; gfr = ggfr;
1944 { if (ggf == VACANT) root = at; else dtree[ggf].branch[ggfr] = at;
1945 dtree[at].colour = BLACK;
1946 dtree[gf].colour = RED;
1947 dtree[f].branch[fr] = dtree[at].branch[gfr];
1948 dtree[gf].branch[gfr] = dtree[at].branch[fr];
1949 dtree[at].branch[gfr] = f;
1950 dtree[at].branch[fr] = gf;
1952 r = 1-r; n = at; if (r==fr) at = f; else at = gf;
1953 f = n; gf = ggf; fr = 1-r; gfr = ggfr;
1958 if (dtree[at].branch[r] == VACANT)
1959 { dtree[at].colour = RED;
1961 if ((f != VACANT) && (gf != VACANT) && (dtree[f].colour == RED))
1963 { if (ggf == VACANT) root = f; else dtree[ggf].branch[ggfr] = f;
1964 dtree[gf].branch[gfr] = dtree[f].branch[1-fr];
1965 dtree[f].branch[1-fr] = gf;
1966 dtree[f].colour = BLACK;
1967 dtree[gf].colour = RED;
1970 { if (ggf == VACANT) root = at; else dtree[ggf].branch[ggfr] = at;
1971 dtree[at].colour = BLACK;
1972 dtree[gf].colour = RED;
1973 dtree[f].branch[fr] = dtree[at].branch[gfr];
1974 dtree[gf].branch[gfr] = dtree[at].branch[fr];
1975 dtree[at].branch[gfr] = f;
1976 dtree[at].branch[fr] = gf;
1978 r = 1-r; n = at; if (r==fr) at = f; else at = gf;
1982 dtree[at].branch[r] = dict_entries;
1985 ggf = gf; gf = f; f = at; at = dtree[at].branch[r];
1986 ggfr = gfr; gfr = fr; fr = r;
1991 if (dict_entries==MAX_DICT_ENTRIES)
1992 memoryerror("MAX_DICT_ENTRIES",MAX_DICT_ENTRIES);
1994 dtree[dict_entries].branch[0] = VACANT;
1995 dtree[dict_entries].branch[1] = VACANT;
1996 dtree[dict_entries].colour = BLACK;
1998 /* Address in Inform's own dictionary table to write the record to */
2002 p = dictionary + (3+res)*dict_entries + 7;
2004 /* So copy in the 4 (or 6) bytes of Z-coded text and the 3 data
2007 p[0]=prepared_sort[0]; p[1]=prepared_sort[1];
2008 p[2]=prepared_sort[2]; p[3]=prepared_sort[3];
2009 if (version_number > 3)
2010 { p[4]=prepared_sort[4]; p[5]=prepared_sort[5]; }
2011 p[res]=x; p[res+1]=y; p[res+2]=z;
2012 if (x & 128) p[res] = (p[res])|number_and_case;
2014 dictionary_top += res+3;
2019 p = dictionary + 4 + DICT_ENTRY_BYTE_LENGTH*dict_entries;
2020 p[0] = 0x60; /* type byte -- dict word */
2022 p += DICT_CHAR_SIZE;
2023 for (i=0; i<DICT_WORD_BYTES; i++)
2024 p[i] = prepared_sort[i];
2026 p += DICT_WORD_BYTES;
2028 p[2] = y/256; p[3] = y%256;
2031 p[1] |= number_and_case;
2033 dictionary_top += DICT_ENTRY_BYTE_LENGTH;
2037 copy_sorts(dict_sort_codes+dict_entries*DICT_WORD_BYTES, prepared_sort);
2039 return dict_entries++;
2042 /* ------------------------------------------------------------------------- */
2043 /* Used in "tables.c" for "Extend ... only", to renumber a verb-word to a */
2044 /* new verb syntax of its own. (Otherwise existing verb-words never */
2045 /* change their verb-numbers.) */
2046 /* ------------------------------------------------------------------------- */
2048 extern void dictionary_set_verb_number(char *dword, int to)
2050 int res=((version_number==3)?4:6);
2051 i=dictionary_find(dword);
2055 p=dictionary+7+(i-1)*(3+res)+res;
2059 p=dictionary+4 + (i-1)*DICT_ENTRY_BYTE_LENGTH + DICT_ENTRY_FLAG_POS;
2060 p[2]=to/256; p[3]=to%256;
2065 /* ------------------------------------------------------------------------- */
2066 /* Tracing code for the dictionary: used not only by "trace" and text */
2067 /* transcription, but also (in the case of "word_to_ascii") in a vital */
2068 /* by the linker. */
2069 /* ------------------------------------------------------------------------- */
2071 /* In the dictionary-showing code, if d_show_buf is NULL, the text is
2072 printed directly. (The "Trace dictionary" directive does this.)
2073 If d_show_buf is not NULL, we add words to it (reallocing if necessary)
2074 until it's a page-width.
2076 static char *d_show_buf = NULL;
2077 static int d_show_size; /* allocated size */
2078 static int d_show_len; /* current length */
2080 static void show_char(char c)
2082 if (d_show_buf == NULL) {
2086 if (d_show_len+2 >= d_show_size) {
2087 int newsize = 2 * d_show_len + 16;
2088 my_realloc(&d_show_buf, d_show_size, newsize, "dictionary display buffer");
2089 d_show_size = newsize;
2091 d_show_buf[d_show_len++] = c;
2092 d_show_buf[d_show_len] = '\0';
2096 /* Display a Unicode character in user-readable form. This uses the same
2097 character encoding as the source code. */
2098 static void show_uchar(uint32 c)
2104 /* ASCII always works */
2108 if (character_set_unicode) {
2109 /* UTF-8 the character */
2113 else if (c < 0x800) {
2114 show_char((0xC0 | ((c & 0x7C0) >> 6)));
2115 show_char((0x80 | (c & 0x03F) ));
2117 else if (c < 0x10000) {
2118 show_char((0xE0 | ((c & 0xF000) >> 12)));
2119 show_char((0x80 | ((c & 0x0FC0) >> 6)));
2120 show_char((0x80 | (c & 0x003F) ));
2122 else if (c < 0x200000) {
2123 show_char((0xF0 | ((c & 0x1C0000) >> 18)));
2124 show_char((0x80 | ((c & 0x03F000) >> 12)));
2125 show_char((0x80 | ((c & 0x000FC0) >> 6)));
2126 show_char((0x80 | (c & 0x00003F) ));
2133 if (character_set_setting == 1 && c < 0x100) {
2134 /* Fits in Latin-1 */
2138 /* Supporting other character_set_setting is harder; not currently implemented. */
2140 /* Use the escaped form */
2141 sprintf(buf, "@{%x}", c);
2142 for (ix=0; buf[ix]; ix++)
2146 extern void word_to_ascii(uchar *p, char *results)
2147 { int i, shift, cc, zchar; uchar encoded_word[9];
2148 encoded_word[0] = (((int) p[0])&0x7c)/4;
2149 encoded_word[1] = 8*(((int) p[0])&0x3) + (((int) p[1])&0xe0)/32;
2150 encoded_word[2] = ((int) p[1])&0x1f;
2151 encoded_word[3] = (((int) p[2])&0x7c)/4;
2152 encoded_word[4] = 8*(((int) p[2])&0x3) + (((int) p[3])&0xe0)/32;
2153 encoded_word[5] = ((int) p[3])&0x1f;
2154 if (version_number > 3)
2155 { encoded_word[6] = (((int) p[4])&0x7c)/4;
2156 encoded_word[7] = 8*(((int) p[4])&0x3) + (((int) p[5])&0xe0)/32;
2157 encoded_word[8] = ((int) p[5])&0x1f;
2161 for (i=0; i< ((version_number==3)?6:9); i++)
2162 { zchar = encoded_word[i];
2164 if (zchar == 4) shift = 1;
2166 if (zchar == 5) shift = 2;
2168 { if ((shift == 2) && (zchar == 6))
2169 { zchar = 32*encoded_word[i+1] + encoded_word[i+2];
2171 if ((zchar>=32) && (zchar<=126))
2172 results[cc++] = zchar;
2174 { zscii_to_text(results+cc, zchar);
2175 cc = strlen(results);
2179 { zscii_to_text(results+cc, (alphabet[shift])[zchar-6]);
2180 cc = strlen(results);
2188 static void recursively_show_z(int node)
2189 { int i, cprinted, flags; uchar *p;
2190 char textual_form[32];
2191 int res = (version_number == 3)?4:6; /* byte length of encoded text */
2193 if (dtree[node].branch[0] != VACANT)
2194 recursively_show_z(dtree[node].branch[0]);
2196 p = (uchar *)dictionary + 7 + (3+res)*node;
2198 word_to_ascii(p, textual_form);
2200 for (cprinted = 0; textual_form[cprinted]!=0; cprinted++)
2201 show_char(textual_form[cprinted]);
2202 for (; cprinted < 4 + ((version_number==3)?6:9); cprinted++)
2205 if (d_show_buf == NULL)
2206 { for (i=0; i<3+res; i++) printf("%02x ",p[i]);
2208 flags = (int) p[res];
2211 if (flags & 4) printf("p"); else printf(" ");
2216 { if (grammar_version_number == 1)
2217 printf("preposition:%d ", (int) p[res+2]);
2219 printf("preposition ");
2221 if ((flags & 3) == 3) printf("metaverb:%d ", (int) p[res+1]);
2222 else if ((flags & 3) == 1) printf("verb:%d ", (int) p[res+1]);
2226 /* Show five words per line in classic TRANSCRIPT_FORMAT; one per line in the new format. */
2227 if (d_show_buf && (d_show_len >= 64 || TRANSCRIPT_FORMAT == 1))
2229 write_to_transcript_file(d_show_buf, STRCTX_DICT);
2233 if (dtree[node].branch[1] != VACANT)
2234 recursively_show_z(dtree[node].branch[1]);
2237 static void recursively_show_g(int node)
2241 if (dtree[node].branch[0] != VACANT)
2242 recursively_show_g(dtree[node].branch[0]);
2244 p = (uchar *)dictionary + 4 + DICT_ENTRY_BYTE_LENGTH*node;
2246 for (cprinted = 0; cprinted<DICT_WORD_SIZE; cprinted++)
2249 if (DICT_CHAR_SIZE == 1)
2252 ch = (p[4*cprinted+4] << 24) + (p[4*cprinted+5] << 16) + (p[4*cprinted+6] << 8) + (p[4*cprinted+7]);
2257 for (; cprinted<DICT_WORD_SIZE+4; cprinted++)
2260 if (d_show_buf == NULL)
2261 { for (i=0; i<DICT_ENTRY_BYTE_LENGTH; i++) printf("%02x ",p[i]);
2262 int flagpos = (DICT_CHAR_SIZE == 1) ? (DICT_WORD_SIZE+1) : (DICT_WORD_BYTES+4);
2263 int flags = (p[flagpos+0] << 8) | (p[flagpos+1]);
2264 int verbnum = (p[flagpos+2] << 8) | (p[flagpos+3]);
2267 if (flags & 4) printf("p"); else printf(" ");
2272 { printf("preposition ");
2274 if ((flags & 3) == 3) printf("metaverb:%d ", verbnum);
2275 else if ((flags & 3) == 1) printf("verb:%d ", verbnum);
2279 /* Show five words per line in classic TRANSCRIPT_FORMAT; one per line in the new format. */
2280 if (d_show_buf && (d_show_len >= 64 || TRANSCRIPT_FORMAT == 1))
2282 write_to_transcript_file(d_show_buf, STRCTX_DICT);
2286 if (dtree[node].branch[1] != VACANT)
2287 recursively_show_g(dtree[node].branch[1]);
2290 static void show_alphabet(int i)
2291 { int j, c; char chartext[8];
2293 for (j=0; j<26; j++)
2294 { c = alphabet[i][j];
2296 if (alphabet_used[26*i+j] == 'N') printf("("); else printf(" ");
2298 zscii_to_text(chartext, c);
2299 printf("%s", chartext);
2301 if (alphabet_used[26*i+j] == 'N') printf(")"); else printf(" ");
2306 extern void show_dictionary(void)
2307 { printf("Dictionary contains %d entries:\n",dict_entries);
2308 if (dict_entries != 0)
2309 { d_show_len = 0; d_show_buf = NULL;
2311 recursively_show_z(root);
2313 recursively_show_g(root);
2317 printf("\nZ-machine alphabet entries:\n");
2324 extern void write_dictionary_to_transcript(void)
2326 d_show_size = 80; /* initial size */
2327 d_show_buf = my_malloc(d_show_size, "dictionary display buffer");
2329 write_to_transcript_file("", STRCTX_INFO);
2330 sprintf(d_show_buf, "[Dictionary contains %d entries:]", dict_entries);
2331 write_to_transcript_file(d_show_buf, STRCTX_INFO);
2335 if (dict_entries != 0)
2338 recursively_show_z(root);
2340 recursively_show_g(root);
2342 if (d_show_len != 0) write_to_transcript_file(d_show_buf, STRCTX_DICT);
2344 my_free(&d_show_buf, "dictionary display buffer");
2345 d_show_len = 0; d_show_buf = NULL;
2348 /* ========================================================================= */
2349 /* Data structure management routines */
2350 /* ------------------------------------------------------------------------- */
2352 extern void init_text_vars(void)
2359 no_chars_transcribed = 0;
2361 for (j=0; j<256; j++) abbrevs_lookup[j] = -1;
2363 total_zchars_trans = 0;
2366 final_dict_order = NULL;
2367 dict_sort_codes = NULL;
2370 initialise_memory_block(&static_strings_area);
2373 extern void text_begin_pass(void)
2374 { abbrevs_lookup_table_made = FALSE;
2376 total_chars_trans=0; total_bytes_trans=0;
2377 if (store_the_text) all_text_top=all_text;
2378 dictionary_begin_pass();
2379 low_strings_top = low_strings;
2381 static_strings_extent = 0;
2383 no_dynamic_strings = 0;
2384 no_unicode_chars = 0;
2387 /* Note: for allocation and deallocation of all_the_text, see inform.c */
2389 extern void text_allocate_arrays(void)
2390 { abbreviations_at = my_malloc(MAX_ABBREVS*MAX_ABBREV_LENGTH,
2392 abbrev_values = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev values");
2393 abbrev_quality = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev quality");
2394 abbrev_freqs = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev freqs");
2396 dtree = my_calloc(sizeof(dict_tree_node), MAX_DICT_ENTRIES,
2397 "red-black tree for dictionary");
2398 final_dict_order = my_calloc(sizeof(int), MAX_DICT_ENTRIES,
2399 "final dictionary ordering table");
2400 dict_sort_codes = my_calloc(DICT_WORD_BYTES, MAX_DICT_ENTRIES,
2401 "dictionary sort codes");
2404 dictionary = my_malloc(9*MAX_DICT_ENTRIES+7,
2407 dictionary = my_malloc(DICT_ENTRY_BYTE_LENGTH*MAX_DICT_ENTRIES+4,
2410 strings_holding_area
2411 = my_malloc(MAX_STATIC_STRINGS,"static strings holding area");
2412 low_strings = my_malloc(MAX_LOW_STRINGS,"low (abbreviation) strings");
2418 huff_entities = NULL;
2420 unicode_usage_entries = NULL;
2421 done_compression = FALSE;
2422 compression_table_size = 0;
2423 compressed_offsets = NULL;
2425 MAX_CHARACTER_SET = 0;
2428 if (compression_switch) {
2430 MAX_CHARACTER_SET = 257 + MAX_ABBREVS + MAX_DYNAMIC_STRINGS
2431 + MAX_UNICODE_CHARS;
2432 huff_entities = my_calloc(sizeof(huffentity_t), MAX_CHARACTER_SET*2+1,
2433 "huffman entities");
2434 hufflist = my_calloc(sizeof(huffentity_t *), MAX_CHARACTER_SET,
2435 "huffman node list");
2436 unicode_usage_entries = my_calloc(sizeof(unicode_usage_t),
2437 MAX_UNICODE_CHARS, "unicode entity entries");
2438 for (ix=0; ix<UNICODE_HASH_BUCKETS; ix++)
2439 unicode_usage_hash[ix] = NULL;
2441 compressed_offsets = my_calloc(sizeof(int32), MAX_NUM_STATIC_STRINGS,
2442 "static strings index table");
2446 extern void text_free_arrays(void)
2448 my_free(&strings_holding_area, "static strings holding area");
2449 my_free(&low_strings, "low (abbreviation) strings");
2450 my_free(&abbreviations_at, "abbreviations");
2451 my_free(&abbrev_values, "abbrev values");
2452 my_free(&abbrev_quality, "abbrev quality");
2453 my_free(&abbrev_freqs, "abbrev freqs");
2455 my_free(&dtree, "red-black tree for dictionary");
2456 my_free(&final_dict_order, "final dictionary ordering table");
2457 my_free(&dict_sort_codes, "dictionary sort codes");
2459 my_free(&dictionary,"dictionary");
2461 my_free(&compressed_offsets, "static strings index table");
2462 my_free(&hufflist, "huffman node list");
2463 my_free(&huff_entities, "huffman entities");
2464 my_free(&unicode_usage_entries, "unicode entity entities");
2466 deallocate_memory_block(&static_strings_area);
2469 extern void ao_free_arrays(void)
2470 { my_free (&tlbtab,"tlb table");
2471 my_free (&sub_buffer,"sub_buffer");
2472 my_free (&bestyet,"bestyet");
2473 my_free (&bestyet2,"bestyet2");
2474 my_free (&grandtable,"grandtable");
2475 my_free (&grandflags,"grandflags");
2478 /* ========================================================================= */