2 * Copyright (c) 2010-2011 Chris Spiegel
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 #define ASIZE(a) (sizeof (a) / sizeof *(a))
43 /* These are for system/usage errors (malloc() failure, invalid
44 * command-line argument, etc.).
46 #define err(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, ": %s\n", strerror(errno)); exit(1); } while(0)
47 #define errx(...) do { fprintf(stderr, __VA_ARGS__); fputc('\n', stderr); exit(1); } while(0)
49 /* These are for syntax errors. */
50 static const char *current_file;
51 static char current_line[1024];
52 static int current_lineno;
53 static void msg(const char *type, const char *fmt, ...)
59 /* Messages can be generated before the file is parsed. */
60 if(current_file == NULL)
62 fprintf(stderr, "internal %s: ", type);
63 vfprintf(stderr, fmt, ap);
68 fprintf(stderr, "%s:%d: %s: ", current_file, current_lineno, type);
69 vfprintf(stderr, fmt, ap);
70 fprintf(stderr, "\n\t%s\n\t^\n", current_line);
75 #define die(...) do { msg("error", __VA_ARGS__); exit(1); } while(0)
76 #define warning(...) msg("warning", __VA_ARGS__)
78 static long release = 1;
79 static char serial[7];
81 static long zversion = 8;
83 /* Return routine alignment for a particular Z-machine version. In
84 * addition to being used when a routine is created, this also is used
85 * as the divisor when storing the file size and as the default
86 * alignment for the “align” directive.
88 static int alignment(void)
92 case 1: case 2: case 3:
94 case 4: case 5: case 6: case 7:
99 errx("unsupported version %ld", zversion);
103 /* Pack an address; this does not currently understand routine/string
104 * offsets of V6 and V7 (they are both set to zero), so routines at the
105 * top of memory will not properly pack.
107 static uint32_t pack(uint32_t a)
109 return a / alignment();
112 static unsigned long max_size(void)
116 case 1: case 2: case 3:
125 errx("unsupported version %ld", zversion);
131 static void WRITE(const void *b, ssize_t l) { if(write(fd, b, l) != l) errx("short write"); }
132 static void PWRITE(const void *b, ssize_t l, off_t o) { if(pwrite(fd, b, l, o) != l) errx("short write"); }
133 static void BYTE(uint8_t v) { WRITE(&v, 1); }
134 static void PBYTE(uint8_t v, off_t o) { PWRITE(&v, 1, o); }
135 static void WORD(uint16_t v) { BYTE(v >> 8); BYTE(v & 0xff); }
136 static void PWORD(uint16_t v, off_t o) { PBYTE(v >> 8, o); PBYTE(v & 0xff, o + 1); }
137 static void SEEK(off_t o, int w) { if(lseek(fd, o, w) == -1) err("lseek"); }
138 static off_t TELL(void) { off_t r = lseek(fd, 0, SEEK_CUR); if(r == -1) err("lseek"); return r; }
140 static unsigned long roundup(unsigned long v, unsigned long multiple)
142 if(multiple == 0) return 0;
144 return multiple * (((v - 1) / multiple) + 1);
147 static uint32_t ALIGN(uint32_t v) { return roundup(v, alignment()); }
148 static void SEEKALIGN(void) { SEEK(ALIGN(TELL()), SEEK_SET); }
150 #define UNICODE_TABLE_SIZE 97
151 static uint16_t unicode_table[UNICODE_TABLE_SIZE];
152 static int unicode_index;
153 static uint8_t unicode_to_zscii(uint16_t u)
155 if(unicode_index > 0)
157 for(int i = 0; i < unicode_index; i++)
159 if(unicode_table[i] == u) return i + 155;
163 if(unicode_index == UNICODE_TABLE_SIZE) die("too many unicode characters for the table (max %d)", UNICODE_TABLE_SIZE);
164 unicode_table[unicode_index++] = u;
166 return unicode_index + 154;
169 static size_t decode_utf8(const char *string, uint16_t *utf)
171 uint16_t *saved = utf;
174 for(const char *p = string; *p != 0; p++)
176 if((*p & 0x80) == 0) /* One byte. */
180 else if((*p & 0xe0) == 0xc0) /* Two bytes. */
182 if(p[1] == 0) die("invalid utf-8 sequence at byte %d", (int)(p - string));
184 ret = (*p++ & 0x1f) << 6;
187 else if((*p & 0xf0) == 0xe0) /* Three bytes. */
189 if(p[1] == 0 || p[2] == 0) die("invalid utf-8 sequence at byte %d", (int)(p - string));
191 ret = (*p++ & 0x0f) << 12;
192 ret |= (*p++ & 0x3f) << 6;
195 else if((*p & 0xf8) == 0xf0)
197 die("4-byte utf-8 is not supported, byte %d", (int)(p - string));
201 die("invalid utf-8 sequence at byte %d", (int)(p - string));
204 if(ret > UINT16_MAX) die("too-large unicode value");
212 #define F_INDIRECT 0x01
215 #define F_DOUBLE 0x08
219 enum count { C_ZERO, C_ONE, C_TWO, C_VAR, C_EXT } count;
223 const char *prototype;
228 static struct opcode opcodes[256];
229 static size_t nopcodes = 0;
231 static void OP_(enum count count, const char *name, int min, int max, int number, const char *prototype, int flags)
233 int v = zversion > 6 ? 5 : zversion;
236 if(v < min || v > max) return;
238 /* An extra slot at the end of the opcodes list is reserved as a
239 * sentinel: the “name” member will be NULL.
241 if(nopcodes >= (sizeof opcodes / sizeof *opcodes) - 1) errx("internal error: opcode overflow");
243 opcodes[nopcodes] = (struct opcode){ .count = count, .name = name, .number = number, .prototype = prototype, .flags = flags };
245 e = regcomp(&opcodes[nopcodes].re, opcodes[nopcodes].prototype, REG_EXTENDED | REG_NOSUB);
249 regerror(e, &opcodes[nopcodes].re, emsg, sizeof emsg);
250 errx("error compiling %s: %s", opcodes[nopcodes].prototype, emsg);
256 #define OP(count, name, min, max, number, prototype, flags) OP_(count, name, min, max, number, "^" prototype "$", flags)
258 #define ZEROOP(...) OP(C_ZERO, __VA_ARGS__)
259 #define ONEOP(...) OP(C_ONE, __VA_ARGS__)
260 #define TWOOP(...) OP(C_TWO, __VA_ARGS__)
261 #define VAROP(...) OP(C_VAR, __VA_ARGS__)
262 #define EXTOP(...) OP(C_EXT, __VA_ARGS__)
264 /* Convenience macros. The arguments to most opcodes can be of any
265 * type, so provide easy ways to accomplish this.
269 #define TWO "[vn][vn]"
270 #define THREE "[vn][vn][vn]"
271 #define FOUR "[vn][vn][vn][vn]"
275 static void setup_opcodes(void)
277 ZEROOP("rtrue", 1, 6, 0x00, NONE, 0);
278 ZEROOP("rfalse", 1, 6, 0x01, NONE, 0);
279 ZEROOP("nop", 1, 6, 0x04, NONE, 0);
280 ZEROOP("save", 1, 3, 0x05, BRANCH, 0);
281 ZEROOP("save", 4, 4, 0x05, STORE, 0);
282 ZEROOP("restore", 1, 3, 0x06, BRANCH, 0);
283 ZEROOP("restore", 4, 4, 0x06, STORE, 0);
284 ZEROOP("restart", 1, 6, 0x07, NONE, 0);
285 ZEROOP("ret_popped", 1, 6, 0x08, NONE, 0);
286 ZEROOP("pop", 1, 4, 0x09, NONE, 0);
287 ZEROOP("catch", 5, 6, 0x09, NONE STORE, 0);
288 ZEROOP("quit", 1, 6, 0x0a, NONE, 0);
289 ZEROOP("new_line", 1, 6, 0x0b, NONE, 0);
290 ZEROOP("show_status", 3, 3, 0x0c, NONE, 0);
291 ZEROOP("verify", 3, 6, 0x0d, NONE BRANCH, 0);
292 ZEROOP("piracy", 5, 6, 0x0f, NONE BRANCH, 0);
294 ONEOP("jz", 1, 6, 0x00, ONE BRANCH, 0);
295 ONEOP("get_sibling", 1, 6, 0x01, ONE BRANCH STORE, 0);
296 ONEOP("get_child", 1, 6, 0x02, ONE BRANCH STORE, 0);
297 ONEOP("get_parent", 1, 6, 0x03, ONE STORE, 0);
298 ONEOP("get_prop_len", 1, 6, 0x04, ONE STORE, 0);
299 ONEOP("inc", 1, 6, 0x05, "v", F_INDIRECT);
300 ONEOP("dec", 1, 6, 0x06, "v", F_INDIRECT);
301 ONEOP("print_addr", 1, 6, 0x07, ONE, 0);
302 ONEOP("call_1s", 4, 6, 0x08, ONE STORE, 0);
303 ONEOP("remove_obj", 1, 6, 0x09, ONE, 0);
304 ONEOP("print_obj", 1, 6, 0x0a, ONE, 0);
305 ONEOP("ret", 1, 6, 0x0b, ONE, 0);
306 ONEOP("jump", 1, 6, 0x0c, BRANCH, F_JUMP);
307 ONEOP("print_paddr", 1, 6, 0x0d, ONE, 0);
308 ONEOP("load", 1, 6, 0x0e, "v" STORE, F_INDIRECT);
309 ONEOP("not", 1, 4, 0x0f, ONE STORE, 0);
310 ONEOP("call_1n", 5, 6, 0x0f, ONE, 0);
312 TWOOP("je", 1, 6, 0x01, "[vn][vn]{0,3}" BRANCH, F_2VAR);
313 TWOOP("jl", 1, 6, 0x02, TWO BRANCH, 0);
314 TWOOP("jg", 1, 6, 0x03, TWO BRANCH, 0);
315 TWOOP("dec_chk", 1, 6, 0x04, "v[vn]" BRANCH, F_INDIRECT);
316 TWOOP("inc_chk", 1, 6, 0x05, "v[vn]" BRANCH, F_INDIRECT);
317 TWOOP("jin", 1, 6, 0x06, TWO BRANCH, 0);
318 TWOOP("test", 1, 6, 0x07, TWO BRANCH, 0);
319 TWOOP("or", 1, 6, 0x08, TWO STORE, 0);
320 TWOOP("and", 1, 6, 0x09, TWO STORE, 0);
321 TWOOP("test_attr", 1, 6, 0x0a, TWO BRANCH, 0);
322 TWOOP("set_attr", 1, 6, 0x0b, TWO, 0);
323 TWOOP("clear_attr", 1, 6, 0x0c, TWO, 0);
324 TWOOP("store", 1, 6, 0x0d, "v[vn]", F_INDIRECT);
325 TWOOP("insert_obj", 1, 6, 0x0e, TWO, 0);
326 TWOOP("loadw", 1, 6, 0x0f, TWO STORE, 0);
327 TWOOP("loadb", 1, 6, 0x10, TWO STORE, 0);
328 TWOOP("get_prop", 1, 6, 0x11, TWO STORE, 0);
329 TWOOP("get_prop_addr", 1, 6, 0x12, TWO STORE, 0);
330 TWOOP("get_next_prop", 1, 6, 0x13, TWO STORE, 0);
331 TWOOP("add", 1, 6, 0x14, TWO STORE, 0);
332 TWOOP("sub", 1, 6, 0x15, TWO STORE, 0);
333 TWOOP("mul", 1, 6, 0x16, TWO STORE, 0);
334 TWOOP("div", 1, 6, 0x17, TWO STORE, 0);
335 TWOOP("mod", 1, 6, 0x18, TWO STORE, 0);
336 TWOOP("call_2s", 4, 6, 0x19, TWO STORE, 0);
337 TWOOP("call_2n", 5, 6, 0x1a, TWO, 0);
338 TWOOP("set_colour", 5, 5, 0x1b, TWO, 0);
339 TWOOP("set_colour", 6, 6, 0x1b, "[vn][vn][vn]?", F_2VAR);
340 TWOOP("throw", 5, 6, 0x1c, TWO, 0);
342 VAROP("call_vs", 1, 6, 0x00, "[vn][vn]{0,3}" STORE, 0);
343 VAROP("storew", 1, 6, 0x01, THREE, 0);
344 VAROP("storeb", 1, 6, 0x02, THREE, 0);
345 VAROP("put_prop", 1, 6, 0x03, THREE, 0);
346 VAROP("read", 1, 3, 0x04, TWO, 0);
347 VAROP("read", 4, 4, 0x04, "[vn][vn]([vn][vn])?", 0);
348 VAROP("read", 5, 6, 0x04, "[vn][vn]([vn][vn])?" STORE, 0);
349 VAROP("print_char", 1, 6, 0x05, ONE, 0);
350 VAROP("print_num", 1, 6, 0x06, ONE, 0);
351 VAROP("random", 1, 6, 0x07, ONE STORE, 0);
352 VAROP("push", 1, 6, 0x08, ONE, 0);
353 VAROP("pull", 1, 5, 0x09, "v", F_INDIRECT);
354 VAROP("pull", 6, 6, 0x09, "[vn]?" STORE, 0);
355 VAROP("split_window", 3, 6, 0x0a, ONE, 0);
356 VAROP("set_window", 3, 6, 0x0b, ONE, 0);
357 VAROP("call_vs2", 4, 6, 0x0c, "[vn][vn]{0,7}" STORE, F_DOUBLE);
358 VAROP("erase_window", 4, 6, 0x0d, ONE, 0);
359 VAROP("erase_line", 4, 6, 0x0e, ONE, 0);
360 VAROP("set_cursor", 4, 5, 0x0f, TWO, 0);
361 VAROP("set_cursor", 6, 6, 0x0f, THREE, 0);
362 VAROP("get_cursor", 4, 6, 0x10, ONE, 0);
363 VAROP("set_text_style", 4, 6, 0x11, ONE, 0);
364 VAROP("buffer_mode", 4, 6, 0x12, ONE, 0);
365 VAROP("output_stream", 3, 4, 0x13, ONE, 0);
366 VAROP("output_stream", 5, 5, 0x13, "[vn][vn]?", 0);
367 VAROP("output_stream", 6, 6, 0x13, "[vn][vn]{0,2}", 0);
368 VAROP("input_stream", 3, 6, 0x14, ONE, 0);
369 VAROP("sound_effect", 3, 6, 0x15, FOUR, 0);
370 VAROP("read_char", 4, 6, 0x16, "[vn]([vn][vn])?" STORE, 0);
371 VAROP("scan_table", 4, 6, 0x17, "[vn][vn][vn][vn]?" BRANCH STORE, 0);
372 VAROP("not", 5, 6, 0x18, ONE STORE, 0);
373 VAROP("call_vn", 5, 6, 0x19, "[vn][vn]{0,3}", 0);
374 VAROP("call_vn2", 5, 6, 0x1a, "[vn][vn]{0,7}", F_DOUBLE);
375 VAROP("tokenise", 5, 6, 0x1b, FOUR, 0);
376 VAROP("encode_text", 5, 6, 0x1c, FOUR, 0);
377 VAROP("copy_table", 5, 6, 0x1d, THREE, 0);
378 VAROP("print_table", 5, 6, 0x1e, "[vn][vn][vn]{0,2}", 0);
379 VAROP("check_arg_count", 5, 6, 0x1f, ONE BRANCH, 0);
381 EXTOP("save", 5, 6, 0x00, "([vn]{3})?" STORE, 0);
382 EXTOP("restore", 5, 6, 0x01, "([vn]{3})?" STORE, 0);
383 EXTOP("log_shift", 5, 6, 0x02, TWO STORE, 0);
384 EXTOP("art_shift", 5, 6, 0x03, TWO STORE, 0);
385 EXTOP("set_font", 5, 6, 0x04, ONE STORE, 0);
386 EXTOP("draw_picture", 6, 6, 0x05, "[vn][vn]{0,2}", 0);
387 EXTOP("picture_data", 6, 6, 0x06, TWO BRANCH, 0);
388 EXTOP("erase_picture", 6, 6, 0x07, THREE, 0);
389 EXTOP("set_margins", 6, 6, 0x08, THREE, 0);
390 EXTOP("save_undo", 5, 6, 0x09, STORE, 0);
391 EXTOP("restore_undo", 5, 6, 0x0a, STORE, 0);
392 EXTOP("print_unicode", 5, 6, 0x0b, ONE, 0);
393 EXTOP("check_unicode", 5, 6, 0x0c, ONE STORE, 0);
394 EXTOP("set_true_colour", 5, 5, 0x0d, TWO, 0);
395 EXTOP("set_true_colour", 6, 6, 0x0d, THREE, 0);
396 EXTOP("move_window", 6, 6, 0x10, THREE, 0);
397 EXTOP("window_size", 6, 6, 0x11, THREE, 0);
398 EXTOP("window_style", 6, 6, 0x12, "[vn][vn][vn]?", 0);
399 EXTOP("get_wind_prop", 6, 6, 0x13, TWO STORE, 0);
400 EXTOP("scroll_window", 6, 6, 0x14, TWO, 0);
401 EXTOP("pop_stack", 6, 6, 0x15, "[vn][vn]?", 0);
402 EXTOP("read_mouse", 6, 6, 0x16, ONE, 0);
403 EXTOP("mouse_window", 6, 6, 0x17, "[vn]?", 0);
404 EXTOP("push_stack", 6, 6, 0x18, TWO BRANCH, 0);
405 EXTOP("put_wind_prop", 6, 6, 0x19, THREE, 0);
406 EXTOP("print_form", 6, 6, 0x1a, ONE, 0);
407 EXTOP("make_menu", 6, 6, 0x1b, TWO BRANCH, 0);
408 EXTOP("picture_table", 6, 6, 0x1c, ONE, 0);
410 /* Zoom extensions. */
411 EXTOP("start_timer", 5, 6, 0x80, NONE, 0);
412 EXTOP("stop_timer", 5, 6, 0x81, NONE, 0);
413 EXTOP("read_timer", 5, 6, 0x82, STORE, 0);
414 EXTOP("print_timer", 5, 6, 0x83, NONE, 0);
432 static char *xstrdup(const char *s)
440 if(r == NULL) err("malloc");
447 /* Each operand to an opcode is represented by a “struct arg”.
448 * There are four types:
449 * • none, indicating no argument
450 * • jump, indicating an argument that is a label
451 * • numeric, indicating a constant (large or small)
452 * • variable, indicating a variable.
454 * jump_type indicates whether the jump is a routine, label, or branch.
456 * value indicates the value for a numeric or variable (for a variable,
457 * 0 is the stack pointer, 1 is local var 1, etc., as in the standard.)
459 * name is the name of the label to branch to.
461 * small is true if this is a one-byte (6-bit) offset as opposed to a
462 * two-byte (14-bit) offset.
464 * invert is true if the argument is a branch target and the test should
465 * be inverted (branch on false).
467 * ret is set to 0 or 1 if the branch, instead of branching, should
468 * return false or true; this value should be checked iff name is NULL.
472 enum { A_NONE, A_JUMP, A_NUMERIC, A_VARIABLE } type;
473 enum jump_type { J_BRANCH, J_JUMP, J_LABEL, J_PACKED } jump_type;
483 /* Information on each label that’s found (either a label for jumping
484 * to, or a routine name).
485 * “name” is the name, and “addr” is the location of the label. Note
486 * that a single list is used so routines and labels cannot have the
497 static struct label *labels = NULL;
499 static void add_label(const char *name, uint32_t addr)
503 for(new = labels; new != NULL; new = new->next)
505 if(strcmp(new->name, name) == 0) die("label %s already exists", name);
508 new = malloc(sizeof *new);
509 if(new == NULL) err("malloc");
511 new->name = xstrdup(name);
518 /* Each jump struct represents a jump: either a routine call, a jump, or
519 * a branch instruction.
520 * “from” is the address whence the jump occurs, and “to” is the label
521 * to which a jump is requested.
522 * If the type is a branch, “small” is set if this is a one-byte (6-bit)
523 * offset, and “invert” is set if the branch should be taken in the
542 static struct jump *jumps = NULL;
544 static void add_jump(struct arg jump)
548 if(jump.jump_type == J_BRANCH && jump.name == NULL)
550 BYTE((!jump.invert << 7) | 0x40 | jump.ret);
554 /* @jump ? is special-cased because the syntax is natural, but
555 * allowing ? also allows ?0, ?~, and %, so catch those errors here.
557 if(jump.jump_type == J_JUMP)
559 if(jump.name == NULL || jump.invert || jump.small) die("syntax: jump ?Label");
562 new = malloc(sizeof *new);
563 if(new == NULL) err("malloc");
565 new->type = jump.jump_type;
567 new->to = xstrdup(jump.name);
568 if(jump.jump_type == J_BRANCH)
570 new->small = jump.small;
571 new->invert = jump.invert;
574 new->file = xstrdup(current_file);
575 new->line = xstrdup(current_line);
576 new->lineno = current_lineno;
581 if(jump.small) BYTE(0);
585 static void apply_jumps(void)
587 for(struct jump *j = jumps; j != NULL; j = j->next)
591 current_file = j->file;
592 strcpy(current_line, j->line); /* guaranteed to be large enough because j->line was originally copied from current_line. */
593 current_lineno = j->lineno;
595 for(struct label *l = labels; l != NULL; l = l->next)
597 if(strcmp(l->name, j->to) == 0)
606 tempo = (long long)l->addr - (long long)j->from;
610 /* The offset needs to have two added to it (decoding the
611 * offset is done as PC = PC + Offset - 2); at this point
612 * j->from is at the part right where the offset is
613 * written. It is therefore, if the offset is 16 bits,
614 * already two bytes farther than is “correct”, or if the
615 * offset is 8 bits, one byte farther; so in this case one
616 * needs to be added on.
621 * 61 is @je, 01 01 is L01 L01. The first arrow points to
622 * where the offset is stored, which is also where j->from
623 * is currently pointing. The second arrow is the
624 * location whence the offset should be counted. Since
625 * the offset is calculated by using j->from as the
626 * starting address, it is already one byte longer than is
627 * the “actual” offset, which means it it one byte shorter
628 * than the *stored* offset needs to be. Adding one gives
633 if(tempo < 0 || tempo > 63) die("offset (%lld) does not fit into unsigned 6-bit value", tempo);
639 if(!j->invert) offset |= 0x80;
641 PBYTE(offset, j->from);
645 /* A quick example of the offset wackiness as described
646 * above, for a 14-bit offset.
651 * Here again the first arrow is where j->from is
652 * pointing, whereas the second arrow is whence the offset
653 * is counted. This time, the two bytes needed for the
654 * stored value are already present if j->from is used as
655 * the starting point, so nothing special needs to be
658 if(tempo < -8192 || tempo > 8191) die("offset (%lld) does not fit into signed 14-bit value", tempo);
662 if(!j->invert) offset |= 0x8000;
664 PWORD(offset, j->from);
669 tempo = (long long)l->addr - (long long)j->from;
670 if(tempo < INT16_MIN || tempo > INT16_MAX) die("offset (%lld) does not fit into signed 16-bit value", tempo);
672 PWORD(tempo, j->from);
676 if(l->addr > UINT16_MAX) die("address of %s is too large (%lx)", l->name, (unsigned long)l->addr);
677 PWORD(l->addr, j->from);
681 if(pack(l->addr) > UINT16_MAX) die("address of %s is too large to pack (%lx -> %lx)", l->name, (unsigned long)l->addr, (unsigned long)pack(l->addr));
682 PWORD(pack(l->addr), j->from);
691 if(!found) die("no label %s", j->to);
695 static struct arg NONE(void) { return (struct arg){ .type = A_NONE }; }
696 static struct arg N(uint16_t n) { return (struct arg){ .type = A_NUMERIC, .value = n }; }
697 static struct arg SP(void) { return (struct arg){ .type = A_VARIABLE, .value = 0 }; }
698 static struct arg L(uint8_t n) { return (struct arg){ .type = A_VARIABLE, .value = n + 0x01 }; }
699 static struct arg G(uint8_t n) { return (struct arg){ .type = A_VARIABLE, .value = n + 0x10 }; }
700 static struct arg LBL(const char *n) { return (struct arg){ .type = A_JUMP, .jump_type = J_LABEL, .name = xstrdup(n) }; }
701 static struct arg PCK(const char *n) { return (struct arg){ .type = A_JUMP, .jump_type = J_PACKED, .name = xstrdup(n) }; }
703 static struct arg BRANCH(const char *n, int small)
705 struct arg arg = { .type = A_JUMP, .jump_type = J_BRANCH, .small = small };
713 if(*n == '0' || *n == '1') arg.ret = *n - '0';
714 else arg.name = xstrdup(n);
719 static uint8_t make_type(struct arg arg)
724 return 3; /* omitted */
727 if(arg.value <= 255) return 1; /* small constant */
728 else return 0; /* large constant */
731 return 0; /* large constant */
734 return 2; /* variable */
737 die("invalid type: %d", (int)arg.type);
741 static void write_arg(struct arg arg)
743 if(arg.type == A_NONE) return;
745 else if(arg.type == A_JUMP) add_jump(arg);
747 else switch(make_type(arg))
749 case 0: /* large constant */
752 case 1: /* small constant */
753 case 2: /* variable */
759 static void make(const struct opcode *op, int znargs, struct arg zargs[], struct arg branch)
761 uint8_t varbyte = 0xe0;
762 int count = op->count;
764 /* Special case for a few opcodes.
765 * These require the first argument to be a reference to a variable,
766 * so a small constant value of 1 would mean local variable 1;
767 * however, the user should be able to use L01, SP, etc to refer to
768 * them. Thus rewrite the first argument for these few opcodes.
770 if(op->flags & F_INDIRECT)
772 zargs[0] = N(zargs[0].value);
775 /* @jump takes a label but it doesn’t act like a branch: it’s a 16-bit
776 * offset, not 14-bit; the top two bits are not special as they are in
777 * branch instructions. When parsing, ?Foo is treated as a branch,
778 * but it should be acceptable to @jump, so rewrite it.
780 if(op->flags & F_JUMP)
783 branch.jump_type = J_JUMP;
788 /* @je and @set_colour are both 2OP, but can take a variable number of
789 * operands. If there are not two operands, these should be assembled
790 * as a variable VAR would, except the top bits will be 110 instead of
791 * 111, indicating 2OP.
793 if((op->flags & F_2VAR) && znargs != 2)
799 /* All 0OP are short */
802 if(znargs != 0) die("0OP called with arguments");
804 BYTE(0xb0 | op->number);
806 /* All 1OP are short */
807 else if(count == C_ONE)
810 (make_type(zargs[0]) << 4) |
815 /* 2OP are either long (if no large constant is required) or variable (if one is) */
816 else if(count == C_TWO)
820 type1 = make_type(zargs[0]);
821 type2 = make_type(zargs[1]);
824 if(type1 == 0 || type2 == 0)
826 BYTE(0xc0 | op->number);
827 BYTE( (type1 << 6 ) |
835 BYTE( ((type1 - 1) << 6) |
843 /* VAR are all variable form */
844 else if(count == C_VAR || count == C_EXT)
846 struct arg args[8] = { NONE(), NONE(), NONE(), NONE(), NONE(), NONE(), NONE(), NONE() };
848 for(int i = 0; i < znargs; i++) args[i] = zargs[i];
857 BYTE(varbyte | op->number);
860 BYTE((make_type(args[0]) << 6) |
861 (make_type(args[1]) << 4) |
862 (make_type(args[2]) << 2) |
863 (make_type(args[3]) << 0));
865 if(op->flags & F_DOUBLE)
867 BYTE((make_type(args[4]) << 6) |
868 (make_type(args[5]) << 4) |
869 (make_type(args[6]) << 2) |
870 (make_type(args[7]) << 0));
878 if(op->flags & F_DOUBLE)
887 if(strchr(op->prototype, '>') != NULL) BYTE(zargs[znargs].value); /* guaranteed to be a variable due to the pattern matching */
889 if(branch.type == A_JUMP) add_jump(branch);
892 static int stol(const char *s, int base, long *v, long min, long max)
896 *v = strtol(s, &endp, base);
898 if(endp == s || *endp != 0 || *v < min || *v > max) return 0;
903 static int started = 0;
908 void (*proc)(int, const char **);
911 static void label_directive(int nargs, const char **args)
913 if(nargs != 2) die("invalid label syntax");
914 if(args[1][0] >= '0' && args[1][0] <= '9') die("label names cannot start with a digit");
915 add_label(args[1], TELL());
918 static void alabel_directive(int nargs, const char **args)
921 label_directive(nargs, args);
924 static void routine_directive(int nargs, const char **args)
928 if(nargs < 3) die("invalid routine syntax");
929 if(args[1][0] >= '0' && args[1][0] <= '9') die("routine names cannot start with a digit");
931 add_label(args[1], TELL());
932 if(!stol(args[2], 10, &val, 0, 15)) die("invalid number of locals (must be a number between 0 and 15)");
937 for(int i = 3; i < nargs; i++)
941 if(!stol(args[i], 0, &local, INT16_MIN, UINT16_MAX)) die("invalid local (must be a number between %ld and %ld", (long)INT16_MIN, (long)UINT16_MAX);
948 if(val < 0) die("too many local values provided");
950 for(long i = 0; i < val; i++) WORD(0);
954 if(nargs != 3) die("only V1-4 allow initial local values to be provided");
958 static void byte_directive(int nargs, const char **args)
962 for(int i = 1; i < nargs; i++)
964 if(!stol(args[i], 16, &val, 0, 255)) die("invalid byte %s (must be a number between 0x00 and 0xff)", args[i]);
969 static void align_directive(int nargs, const char **args)
979 if(!stol(args[1], 0, &val, 0, LONG_MAX)) die("invalid alignment %s (must be a positive number)", args[1]);
981 SEEK(roundup(TELL(), val), SEEK_SET);
985 die("syntax: align [alignment]");
989 static void seek_directive(int nargs, const char **args)
993 if(nargs != 2) die("syntax: seek bytes");
994 if(!stol(args[1], 0, &val, 0, max_size())) die("invalid seek %s (must be a positive number less than %lu)", args[1], max_size());
998 static void seeknop_directive(int nargs, const char **args)
1003 if(nargs != 2) die("syntax: seeknop bytes");
1004 if(!stol(args[1], 0, &val, 0, max_size())) die("invalid seek %s (must be a positive number less than %lu)", args[1], max_size());
1006 /* Try to write the block out in one fell swoop. */
1007 buffer = malloc(val);
1010 memset(buffer, 0xb4, val);
1016 for(long i = 0; i < val; i++) BYTE(0xb4);
1020 static void status_directive(int nargs, const char **args)
1022 if(zversion != 3) die("status type can only be set in V3");
1023 if(nargs != 2) die("syntax: status (score|time)");
1025 if (strcmp(args[1], "score") == 0) PBYTE(0x00, 0x01);
1026 else if(strcmp(args[1], "time") == 0) PBYTE(0x02, 0x01);
1027 else die("syntax: status (score|time)");
1030 static void start_directive(int nargs, const char **args)
1032 if(started) die("only one start directive can be used");
1035 if(TELL() > UINT16_MAX) errx("dynamic memory overflow");
1037 PWORD(TELL(), 0x04); /* base of high memory */
1038 PWORD(TELL(), 0x0e); /* overlap static and high for now */
1041 PWORD(TELL(), 0x08);
1046 /* Temp hack to put packed fuctions above address 255 */
1047 SEEK(0x3450, SEEK_SET);
1049 /* Packed address of initial routine (V6) or initial PC value (otherwise). */
1053 PWORD(pack(TELL()), 0x06);
1054 routine_directive(nargs, args);
1058 PWORD(TELL(), 0x06);
1062 #define DIRECTIVE(name_) { .name = #name_, .proc = name_##_directive }
1064 static const struct directive directives[] =
1081 static void parse_args(int nargs, const char **args)
1084 char prototype[nargs];
1086 struct arg zargs[nargs];
1087 struct arg branch = NONE();
1088 const struct opcode *op;
1091 for(size_t i = 0; i < ASIZE(zargs); i++) zargs[i] = NONE();
1093 for(op = opcodes; op->name != NULL; op++)
1095 if(strcmp(op->name, args[0]) == 0) break;
1098 if(op->name == NULL)
1100 const struct directive *dir;
1102 for(dir = directives; dir->name != NULL; dir++)
1104 if(strcmp(dir->name, args[0]) == 0) break;
1107 if(dir->name == NULL) die("invalid instruction: %s", args[0]);
1109 dir->proc(nargs, args);
1114 for(int i = 1; i < nargs; i++)
1116 if(strcmp(args[i], "sp") == 0 || strcmp(args[i], "SP") == 0)
1118 prototype[n++] = 'v';
1119 zargs[znargs++] = SP();
1121 else if(args[i][0] == 'G')
1123 prototype[n++] = 'v';
1124 if(!stol(&args[i][1], 16, &val, 0, 239)) die("invalid global %s (must be a number between 0x00 and 0xef)", args[i]);
1125 zargs[znargs++] = G(val);
1127 else if(args[i][0] == 'L')
1129 prototype[n++] = 'v';
1130 if(!stol(&args[i][1], 10, &val, 0, 15)) die("invalid local %s (must be a number between 0 and 15)", args[i]);
1131 zargs[znargs++] = L(val);
1133 else if(args[i][0] == '?')
1135 prototype[n++] = '?';
1136 branch = BRANCH(&args[i][1], 0);
1138 else if(args[i][0] == '%')
1140 prototype[n++] = '?';
1141 branch = BRANCH(&args[i][1], 1);
1143 else if(args[i][0] == '!')
1145 prototype[n++] = 'n';
1146 zargs[znargs++] = PCK(&args[i][1]);
1148 else if(args[i][0] == '&')
1150 prototype[n++] = 'n';
1151 zargs[znargs++] = LBL(&args[i][1]);
1153 else if(strcmp(args[i], "->") == 0)
1155 prototype[n++] = '>';
1159 if(!stol(args[i], 0, &val, LONG_MIN, LONG_MAX)) die("syntax error: %s", args[i]);
1161 if(val < INT16_MIN || val > UINT16_MAX) die("number out of range: must be in the range %ld to %ld", (long)INT16_MIN, (long)UINT16_MAX);
1163 zargs[znargs++] = N(val);
1165 prototype[n++] = 'n';
1171 /* If there is a store, don't count the target variable as an
1172 * argument, because it's not one. In the case of a store,
1173 * zargs[znargs] will be the variable in which to store.
1175 if(strchr(prototype, '>') != NULL) znargs--;
1177 if(regexec(&op->re, prototype, 0, NULL, 0) != 0) die("no matching pattern: expected %s, found %s", op->prototype, prototype);
1179 make(op, znargs, zargs, branch);
1181 for(size_t i = 0; i < ASIZE(zargs); i++) free(zargs[i].name);
1186 static uint8_t atable[26 * 3] =
1189 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
1190 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
1193 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
1194 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
1197 0x0, '^', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.',
1198 ',', '!', '?', '_', '#', '\'','"', '/', '\\','-', ':', '(', ')',
1201 static long etable_unicode_addr;
1203 static int in_atable(int c)
1205 /* 52 is A2 character 6, which is special and should not be matched,
1208 for(int i = 0; i < 52 ; i++) if(atable[i] == c) return i;
1209 for(int i = 53; i < 26 * 3; i++) if(atable[i] == c) return i;
1214 /* Encode a string, passing each Z-character (and its index) to “cb”. */
1215 static int encode_string_backend(const uint16_t *s, size_t len, void (*cb)(uint16_t, int))
1217 const int shiftbase = zversion <= 2 ? 1 : 3;
1220 for(size_t i = 0; i < len; i++)
1222 int pos = in_atable(s[i]);
1224 if(zversion == 1 && s[i] == '^')
1230 int shift = pos / 26;
1233 if(shift) cb(shiftbase + shift, n++);
1236 else if(s[i] == ' ')
1242 cb(shiftbase + 2, n++);
1247 uint8_t c = unicode_to_zscii(s[i]);
1255 cb(s[i] & 0x1f, n++);
1261 while(n == 0 || (n % 3) != 0) cb(5, n++);
1263 /* Convert Z-character count to bytes. */
1267 /* Stub function, used to calculate how long an encoded string will be. */
1268 static void encode_length_cb(uint16_t c, int n)
1272 static uint16_t GET_WORD(uint8_t *base)
1274 return (base[0] << 8) | base[1];
1276 static void MAKE_WORD(uint8_t *base, uint16_t val)
1279 base[1] = val & 0xff;
1282 static uint8_t *encoded;
1283 static void encode_string_cb(uint16_t c, int n)
1285 uint16_t w = GET_WORD(&encoded[2 * (n / 3)]);
1287 w |= (c & 0x1f) << (5 * (2 - (n % 3)));
1289 MAKE_WORD(&encoded[2 * (n / 3)], w);
1291 static int encode_string(const uint16_t *s, size_t len)
1297 enclen = encode_string_backend(s, len, encode_length_cb);
1298 encoded = calloc(enclen, 1);
1299 if(encoded == NULL) err("calloc");
1301 encode_string_backend(s, len, encode_string_cb);
1304 encoded[enclen - 2] |= 0x80;
1306 WRITE(encoded, enclen);
1311 static int print_handler(const char *string)
1313 uint16_t utf[strlen(string)];
1316 n = decode_utf8(string, utf);
1318 return encode_string(utf, n);
1321 /* Write out an object name, which is a byte describing how long the
1322 * (encoded) name is, followed by the encoded name.
1324 static void object_name(const char *name)
1328 BYTE(0); /* placeholder for text-length */
1329 bytes = print_handler(name);
1330 if(bytes > 255) die("object name too long");
1331 PBYTE(bytes, TELL() - bytes - 1); /* write text-length */
1334 static void start_file(const char *filename)
1336 fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0644);
1337 if(fd == -1) err("out.z5");
1339 /* Zero the header out. */
1340 for(int i = 0; i < 64; i++) PBYTE(0x00, i);
1342 PBYTE(zversion, 0x00); /* version */
1343 PWORD(release, 0x02); /* release */
1344 PWRITE(serial, 6, 0x12); /* serial number */
1346 /* Alphabet table. */
1347 SEEK(0x40, SEEK_SET);
1348 PWORD(TELL(), 0x34);
1349 WRITE(atable, 26 * 3);
1351 /* Header extension table. */
1354 PWORD(TELL(), 0x36);
1358 etable_unicode_addr = TELL();
1363 PWORD(TELL(), 0x0c);
1365 /* The first global needs to be set to a valid object for show_status;
1366 * point it to the default object. In the future, when objects are
1367 * fully supported, this should be configurable by the user.
1372 SEEK(478, SEEK_CUR);
1376 SEEK(480, SEEK_CUR);
1379 /* Property defaults table. */
1380 PWORD(TELL(), 0x0a);
1381 SEEK(zversion <= 3 ? 62 : 126, SEEK_CUR);
1383 if(zversion == 6 || zversion == 7)
1385 PWORD(0x0000, 0x28); /* Routines offset. */
1386 PWORD(0x0000, 0x2a); /* Static strings offset. */
1389 /* Object table (just one object is created for now). */
1390 PWORD(TELL(), 0x0a);
1395 for(int i = 0; i < 31; i++) WORD(0x0000); /* Property defaults table. */
1396 for(int i = 0; i < 2; i++) WORD(0x0000); /* Attribute flags. */
1397 for(int i = 0; i < 3; i++) BYTE(0x00); /* Parent, sibling, child. */
1398 WORD(TELL() + 2); /* Properties. */
1402 for(int i = 0; i < 63; i++) WORD(0x0000); /* Property defaults table. */
1403 for(int i = 0; i < 3; i++) WORD(0x0000); /* Attribute flags. */
1404 for(int i = 0; i < 3; i++) WORD(0x0000); /* Parent, sibling, child. */
1405 WORD(TELL() + 2); /* Properties. */
1408 /* Property table (just the name and a terminating marker). */
1409 object_name("Default object");
1413 static void end_file(void)
1417 unsigned char buf[8192];
1418 uint16_t checksum = 0;
1420 /* Unicode table. */
1421 if(unicode_index != 0)
1423 PWORD(TELL(), etable_unicode_addr);
1424 BYTE(unicode_index);
1425 for(int i = 0; i < unicode_index; i++) WORD(unicode_table[i]);
1430 SEEK(0x40, SEEK_SET);
1432 while((n = read(fd, buf, sizeof buf)) > 0)
1434 for(ssize_t i = 0; i < n; i++) checksum += buf[i];
1439 if(n < 0) err("read");
1441 for(size_t i = 0; i < ALIGN(file_size) - file_size; i++) BYTE(0);
1442 file_size = ALIGN(file_size);
1444 if(file_size > max_size()) errx("file size too large (%zu)", file_size);
1446 PWORD(file_size / (zversion == 3 ? 2 : zversion <= 5 ? 4 : 8), 0x1a);
1447 PWORD(checksum, 0x1c);
1453 static void process_file(FILE *fp, const char *fn)
1455 char buf[sizeof current_line];
1461 while(fgets(buf, sizeof buf, fp) != NULL)
1465 const char *args[32];
1470 p = strchr(buf, '\n');
1471 if(p == NULL) die("no newline; line too long?");
1474 strcpy(current_line, buf);
1476 p = strchr(buf, '#');
1477 if(p != NULL) *p = 0;
1479 if(buf[0] == 0) continue;
1481 /* @print and @print_ret must be handled here, before tokenization,
1482 * because spaces are significant. The string directive is similar.
1484 if(strncmp(buf, "print ", 6) == 0)
1487 print_handler(strchr(buf, ' ') + 1);
1491 if(strncmp(buf, "print_ret ", 10) == 0)
1494 print_handler(strchr(buf, ' ') + 1);
1498 if(strncmp(buf, "string ", 7) == 0)
1501 print_handler(strchr(buf, ' ') + 1);
1505 p = strtok(buf, " \t");
1507 /* This does NOT detect recursion! */
1508 if(strcmp(p, "include") == 0)
1510 const char *file = strtok(NULL, "");
1513 if(file == NULL) die("bad line");
1515 fp2 = fopen(file, "r");
1516 if(fp2 == NULL) die("fopen: %s", strerror(errno));
1518 saved_lineno = current_lineno;
1520 process_file(fp2, file);
1525 current_lineno = saved_lineno;
1533 for(p = strtok(NULL, " \t"); p != NULL; p = strtok(NULL, " \t"))
1535 if(i == ASIZE(args)) die("too many tokens");
1539 parse_args(i, args);
1542 if(!started) errx("no start directive found");
1545 int main(int argc, char **argv)
1548 const char *infile, *outfile = "out.z5";
1551 if(strftime(serial, sizeof serial, "%y%m%d", localtime(&(time_t){ time(NULL) })) != 6) strcpy(serial, "000000");
1553 while((c = getopt(argc, argv, "o:r:s:v:")) != -1)
1561 if(!stol(optarg, 10, &release, 0, UINT16_MAX)) errx("release must be a number from 0 to %lu", (unsigned long)UINT16_MAX);
1564 if(strlen(optarg) != 6) errx("serial number must be a six-digit string");
1565 strcpy(serial, optarg);
1568 if(!stol(optarg, 10, &zversion, 1, 8)) errx("invalid z-machine version: must be 1 to 8");
1575 if(zversion == 1) memcpy(&atable[26 * 2], " 0123456789.,!?_#'\"/\\<-:()", 26);
1577 if(argc <= optind) exit(1);
1579 infile = argv[optind];
1583 start_file(outfile);
1585 fp = fopen(infile, "r");
1586 if(fp == NULL) err("%s", infile);
1588 process_file(fp, infile);