From: Jason Self Date: Wed, 3 Aug 2016 02:14:12 +0000 (-0700) Subject: Importing zdevtools release 20110529 from https://zdevtools.codeplex.com/ minus the... X-Git-Url: https://jxself.org/git/?a=commitdiff_plain;ds=inline;p=zdevtools.git Importing zdevtools release 20110529 from https://zdevtools.codeplex.com/ minus the unlicensed Ruby things --- 3cfb2966dcd22bcd1274def688053934bc3d2646 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1b0be3c --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +SUBDIRS= za zd + +.PHONY: subdirs $(SUBDIRS) + +subdirs: $(SUBDIRS) + +$(SUBDIRS): + $(MAKE) -C $@ + +clean: + for subdir in $(SUBDIRS); do $(MAKE) -C $$subdir clean; done diff --git a/README b/README new file mode 100644 index 0000000..5caa240 --- /dev/null +++ b/README @@ -0,0 +1,10 @@ +To build, gmake is required, as is a compiler that supports C99, or at least the +subset of C99 that these tools make use of. Most modern compilers should work. + +A POSIX environment is also required. These should port rather easily to other +environments. The most onerous requirement is probably that seeking beyond the +end of a file is expected to work and create a hole that will be interpreted as +zeroes. Patches to support non-POSIX environments are welcome. + +Chris Spiegel +cspiegel@gmail.com diff --git a/compiler.mk b/compiler.mk new file mode 100644 index 0000000..444efae --- /dev/null +++ b/compiler.mk @@ -0,0 +1,16 @@ +ifeq ($(CC), gcc) +CFLAGS+= -Wall -Wshadow -std=c99 -pedantic + +else ifeq ($(CC), clang) +CFLAGS+= -Wall -std=c99 -pedantic + +else ifeq ($(CC), icc) +CFLAGS+= -w2 -wd2259,2557,869,981 -std=c99 + +else ifeq ($(CC), suncc) +CFLAGS+= -xc99=all -Xc + +else ifeq ($(CC), opencc) +CFLAGS+= -Wall -std=c99 + +endif diff --git a/config.mk b/config.mk new file mode 100644 index 0000000..364fb20 --- /dev/null +++ b/config.mk @@ -0,0 +1,2 @@ +CC= gcc +OPT= -O2 diff --git a/prog.mk b/prog.mk new file mode 100644 index 0000000..fd0d904 --- /dev/null +++ b/prog.mk @@ -0,0 +1,8 @@ +%.o: %.c + $(CC) -g $(OPT) $(CFLAGS) $(MACROS) -c $< -o $@ + +$(PROG): $(PROG).o + $(CC) $(OPT) $< -o $@ $(LDADD) + +clean: + rm -f $(PROG) *.o diff --git a/za/Makefile b/za/Makefile new file mode 100644 index 0000000..c5e6929 --- /dev/null +++ b/za/Makefile @@ -0,0 +1,8 @@ +include ../config.mk +include ../compiler.mk + +PROG= za + +MACROS= -D_XOPEN_SOURCE=600 + +include ../prog.mk diff --git a/za/README b/za/README new file mode 100644 index 0000000..4f03b4e --- /dev/null +++ b/za/README @@ -0,0 +1,246 @@ +It is assumed that the user has a good working knowledge of the Z-machine; this +document is not meant as a tutorial, nor is this assembler meant to be useful +for anything beyond the testing of interpreters. + +All Z-machine instructions for all Z-machine versions are implemented. However, +there are some major features of the Z-machine that are not currently +implemented. These include (but surely are not limited to) objects, the +dictionary, and abbreviations. There is no direct access to the Unicode table, +but if you use UTF-8 characters in a string, they will automatically be added to +the Unicode table. + +Diagnostics are generally useful, but might sometimes be cryptic. If all else +fails, look at the source code. This assembler is quite rough around the edges. + +Source files must be encoded in UTF-8. If you're only using ASCII, that will be +fine. + +In this file, the term "C-style constant" refers to a number as written in C: a +leading 0x means the number is hexadecimal, a leading 0 means octal, otherwise, +decimal. + +Instruction names are identical to those given in §15 of the Z-machine standard. +Unlike Inform, instructions are not prefixed with @. + +Comments are introduced with the # character and extend to the end of the line. + +A label is introduced with the "label" directive: +label LabelName + +An aligned label (which is the same as calling "align" then "label") has the +same syntax: +alabel ALabelName + +A routine is introduced with the "routine" directive, and includes the number of +locals the routune has (this cannot be omitted even if there are no locals): +routune RoutineName 5 + +If a version 1-4 story file is being created, initial values can be given to +each local variable by listing their values after the number of locals. The +following gives the value 1 to L00, 2 to L01, 3 to L02 and, because values were +omitted, L03 and L04 are set to zero: +routune RoutineName 5 1 2 3 + +An arbitrary byte sequence is introduced with the "byte" directive, each byte +specified in hex, separated by space: +bytes fa ff fa + +The "align" directive, when given no arguments, forces routine alignment, +ensuring that the next instruction is on a 2-, 4-, or 8-byte boundardy, +depending on the Z-Machine version (1-3, 4-7, and 8, respectively). When given +a C-style constant as an argument, align to that value instead: +align +align 0x10 + +The "include" directive causes the contents of another file to be included for +assemby, analogous to #include in C (recursion is not detected): +include anotherfile + +The "seek" directive inserts the specified number of zeroes into the output +file; the argument is a C-style constant: +seek 0x100 + +The "seeknop" directive is identical to "seek", except instead of zeroes, nop +instructions are inserted. + +The "status" directive, available only in V3 stories, indicates whether this is +a "score game" or a "time game". The syntax is: +status score # or +status time + +The "status" directive may be specified at any point in the source file any +number of times. The last takes precedence. The default game type is a score +game. Although objects are not supported by this assembler, object 1 is created +so that interpreters can properly display the status bar. This object has the +name "Default object" and no properties. + +The instructions "print" and "print_ret" take a string as their argument; this +should not be enclosed in quotes unless you want to print out quotes. However, +a ^ character does indicate newline as in Inform: +print "Try" our hot dog's^Try "our" hot dog's^ + +In order to allow strings to start with space, the two printing instructions are +tokenized slightly different from other instructions. Rather than skipping all +whicespace after the instruction name, a single space is skipped; and this must +be a space character, not a tab. So these could be thought of as the "print " +and "print_ret " instructions, in a sense: +print Foo + +This will print " Foo" because there are two spaces; one to separate the +arguments from the instruction name, and then one before the 'F' of "Foo". + +The directive "string " is treated in the same manner. This directive will +directly insert an encoded string, suitable for use with print_paddr. + +The instruction "print_char" cannot be used with literal characters, but instead +must be supplied with a ZSCII value: +print_char 65 + +The "aread" and "sread" opcodes are just called "read" in this assembler. + +If you are in a routine, local variables can be accessed as L00, L01, ... L15. +The numbers are decimal. + +Global variables are available in G00, G01, ... Gef. The numbers are hex. + +The stack pointer is called sp or SP. + +For instructions that require a store, use -> to indicate that: +call_1s Routine -> sp + +Literal numbers are stored appropriately, meaning as a small constant for values +that fit into 8 bits, a large constant otherwise. Only values that will fit +into a 16-bit integer (signed or unsigned) are allowed, meaning -32768 to 65535. +Numbers are parsed as C-style constants. + +The "start" directive must be used once, before any opcodes are used. Until +"start" is seen, the assembler writes to dynamic memory. Once "start" is +encountered, static memory begins, and the starting program counter is set to +that address. The reason for this is chiefly to allow arrays to be placed in +dynamic memory: +# Combine label and seek to produce arrays. +label Array +seek 100 +# Execution will begin here. +start +storew &Array 0 0x1234 + +In V6, the starting point of the story must be a routine, not an instruction. +To accomplish this, "start" has a special syntax for V6, which looks identical +to the syntax for "routine": +start main 0 + +This causes a routine called "main" with zero locals to be created, and uses +this as the entry point. These arguments are allowed in non-V6 games, but are +ignored. + + +Labels in the Z-Machine are slightly convoluted. One might think that a label +could be both branched to and called, but this is not exactly the case. When +using a call instruction, the address of the routine is packed, meaning (in V8) +that it is divided by 8 and stored in a 16-bit word. When branching, the +address is stored either as a 14-bit signed offset or a 6-bit unsigned offset. +And, finally, the @jump instruction takes a 16-bit signed offset. + +Above it was noted that routune labels are produced differently than "normal" +labels; this is because routunes start with a byte that specifies the number of +local variables, and this byte needs to be jumped past when calling the routine. +Branching to a routine label would make no sense, just as calling a normal label +would make no sense. + +How to encode a label depends entirely on how it's going to be used: routine +call (or print_paddr), branch, or jump. + +The type of a label name could be inferred from how it was defined, but due to a +design choice in this assembler, when branching to a label, a prefix is +required. So as not to deviate from Inform's syntax too much, this prefix is a +question mark: +label Label +je 0 0 ?Label + +This will branch to the label Label if 0 is equal to 0. As with Inform a prefix +of ?~ will invert the test. This stores a 14-bit offset. By using % instead of +?, an unsigned 6-bit offset can be chosen, which means one fewer byte used in +the output. Of course, this also means that you can only jump about 63 bytes +forward. The assembler will tell you if you're trying to jump too far. + +Branching instructions can, instead of branching when the test passes, return +true or false from the current routine. This can be accomplished with ?1 and +?0, respectively. + +When a packed address is desired (e.g. when you are trying to call a routine), +an exclamation point should be used: +routine Routine 0 +quit + +call_1n !Routine + +This syntax should also be used when a packed string is required for use with +the print_paddr opcode: +print_paddr !String +quit + +alabel String +string This is a string. + +Note that routine and string offsets (for V6 and V7 games) are not supported, so +the packing of strings and routines at the top of memory (beyond 256K) will +fail. This should not be an issue unless you deliberately seek this far before +creating a string or routine. + +The actual address of a label can be retrieved with the ampersand prefix. This +means either an unpacked routine, or when used with a label, the absolute +address of the label, not an offset. So to print the address of an instruction: +label Label +print_num &Label + +Or to use an array (as seen in the "start" directive above): +label Array +seek 100 +start +storew &Array 0 0x1234 + +Note that it is up to you to use the proper prefix with labels. The assembler +will cheerfully let you call a label, or jump to a routine, neither of which +makes sense. So the following assembles, but is nonsensical: +label Label +call_1n !Label +routine Routine 0 +je 0 0 ?Routine + + +How should the jump instruction be used, since it is neither a branch nor a +routine call? This instruction is special-cased by pretending that it is a +branch instruction. Before this nefarious lie can cause problems, however, the +assembler steps and and writes a proper jump offset. So: +label Label +jump ?Label + + +Here is a full working sample: + +start + +call_1n !main +quit + +routine main 0 +je 0 0 ?Equal +print This will not be seen.^ +label Equal + +jump ?Past +print This will not be seen either.^ +label Past + +print The main routine is: +print_num &main +new_line +print Packed, the main routine is: +print_num !main +new_line +print The Equal label is: +print_num &Equal +new_line + +quit diff --git a/za/za.1 b/za/za.1 new file mode 100644 index 0000000..b1b4573 --- /dev/null +++ b/za/za.1 @@ -0,0 +1,63 @@ +.\" Copyright (c) 2011 Chris Spiegel. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd May 29, 2011 +.Dt ZA 1 +.Os +.Sh NAME +.Nm za +.Nd Z\-machine assembler +.Sh SYNOPSIS +.Nm +.Op Fl o Ar outfile +.Op Fl r Ar release +.Op Fl s Ar serial +.Op Fl v Ar version +.Ar file +.Sh DESCRIPTION +The +.Nm +utility assembles Z\-machine stories. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl o Ar outfile +Specify the filename of the resulting story file. The default is +.Pa out.z5 , +regardless of the Z-machine version. +.It Fl r Ar release +Set the release number. This must be a value between 0 and 65535. The default +is +.Ar 1 . +.It Fl s Ar serial +Set the serial number. This must be a string of six characters. The default is +the current date in the traditional Z\-machine format. +.It Fl v Ar version +Set the Z\-machine version. Versions 1\-8 are supported. The default is +.Ar 8 . +.El +.Sh AUTHORS +.An "Chris Spiegel" Aq cspiegel@gmail.com +.Sh SEE ALSO +.Xr zd 1 diff --git a/za/za.c b/za/za.c new file mode 100644 index 0000000..a3d7639 --- /dev/null +++ b/za/za.c @@ -0,0 +1,1597 @@ +/*- + * Copyright (c) 2010-2011 Chris Spiegel + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define ASIZE(a) (sizeof (a) / sizeof *(a)) + +/* These are for system/usage errors (malloc() failure, invalid + * command-line argument, etc.). + */ +#define err(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, ": %s\n", strerror(errno)); exit(1); } while(0) +#define errx(...) do { fprintf(stderr, __VA_ARGS__); fputc('\n', stderr); exit(1); } while(0) + +/* These are for syntax errors. */ +static const char *current_file; +static char current_line[1024]; +static int current_lineno; +static void msg(const char *type, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + + /* Messages can be generated before the file is parsed. */ + if(current_file == NULL) + { + fprintf(stderr, "internal %s: ", type); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); + } + else + { + fprintf(stderr, "%s:%d: %s: ", current_file, current_lineno, type); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n\t%s\n\t^\n", current_line); + } + + va_end(ap); +} +#define die(...) do { msg("error", __VA_ARGS__); exit(1); } while(0) +#define warning(...) msg("warning", __VA_ARGS__) + +static long release = 1; +static char serial[7]; + +static long zversion = 8; + +/* Return routine alignment for a particular Z-machine version. In + * addition to being used when a routine is created, this also is used + * as the divisor when storing the file size and as the default + * alignment for the “align” directive. + */ +static int alignment(void) +{ + switch(zversion) + { + case 1: case 2: case 3: + return 2; + case 4: case 5: case 6: case 7: + return 4; + case 8: + return 8; + default: + errx("unsupported version %ld", zversion); + } +} + +/* Pack an address; this does not currently understand routine/string + * offsets of V6 and V7 (they are both set to zero), so routines at the + * top of memory will not properly pack. + */ +static uint32_t pack(uint32_t a) +{ + return a / alignment(); +} + +static unsigned long max_size(void) +{ + switch(zversion) + { + case 1: case 2: case 3: + return 131072UL; + case 4: case 5: + return 262144UL; + case 7: + return 327680UL; + case 6: case 8: + return 524288UL; + default: + errx("unsupported version %ld", zversion); + } +} + +static int fd; + +static void WRITE(const void *b, ssize_t l) { if(write(fd, b, l) != l) errx("short write"); } +static void PWRITE(const void *b, ssize_t l, off_t o) { if(pwrite(fd, b, l, o) != l) errx("short write"); } +static void BYTE(uint8_t v) { WRITE(&v, 1); } +static void PBYTE(uint8_t v, off_t o) { PWRITE(&v, 1, o); } +static void WORD(uint16_t v) { BYTE(v >> 8); BYTE(v & 0xff); } +static void PWORD(uint16_t v, off_t o) { PBYTE(v >> 8, o); PBYTE(v & 0xff, o + 1); } +static void SEEK(off_t o, int w) { if(lseek(fd, o, w) == -1) err("lseek"); } +static off_t TELL(void) { off_t r = lseek(fd, 0, SEEK_CUR); if(r == -1) err("lseek"); return r; } + +static unsigned long roundup(unsigned long v, unsigned long multiple) +{ + if(multiple == 0) return 0; + + return multiple * (((v - 1) / multiple) + 1); +} + +static uint32_t ALIGN(uint32_t v) { return roundup(v, alignment()); } +static void SEEKALIGN(void) { SEEK(ALIGN(TELL()), SEEK_SET); } + +#define UNICODE_TABLE_SIZE 97 +static uint16_t unicode_table[UNICODE_TABLE_SIZE]; +static int unicode_index; +static uint8_t unicode_to_zscii(uint16_t u) +{ + if(unicode_index > 0) + { + for(int i = 0; i < unicode_index; i++) + { + if(unicode_table[i] == u) return i + 155; + } + } + + if(unicode_index == UNICODE_TABLE_SIZE) die("too many unicode characters for the table (max %d)", UNICODE_TABLE_SIZE); + unicode_table[unicode_index++] = u; + + return unicode_index + 154; +} + +static size_t decode_utf8(const char *string, uint16_t *utf) +{ + uint16_t *saved = utf; + uint32_t ret; + + for(const char *p = string; *p != 0; p++) + { + if((*p & 0x80) == 0) /* One byte. */ + { + ret = *p; + } + else if((*p & 0xe0) == 0xc0) /* Two bytes. */ + { + if(p[1] == 0) die("invalid utf-8 sequence at byte %d", (int)(p - string)); + + ret = (*p++ & 0x1f) << 6; + ret |= (*p & 0x3f); + } + else if((*p & 0xf0) == 0xe0) /* Three bytes. */ + { + if(p[1] == 0 || p[2] == 0) die("invalid utf-8 sequence at byte %d", (int)(p - string)); + + ret = (*p++ & 0x0f) << 12; + ret |= (*p++ & 0x3f) << 6; + ret |= (*p & 0x3f); + } + else if((*p & 0xf8) == 0xf0) + { + die("4-byte utf-8 is not supported, byte %d", (int)(p - string)); + } + else + { + die("invalid utf-8 sequence at byte %d", (int)(p - string)); + } + + if(ret > UINT16_MAX) die("too-large unicode value"); + + *utf++ = ret; + } + + return utf - saved; +} + +#define F_INDIRECT 0x01 +#define F_JUMP 0x02 +#define F_2VAR 0x04 +#define F_DOUBLE 0x08 + +struct opcode +{ + enum count { C_ZERO, C_ONE, C_TWO, C_VAR, C_EXT } count; + int number; + + const char *name; + const char *prototype; + regex_t re; + int flags; +}; + +static struct opcode opcodes[256]; +static size_t nopcodes = 0; + +static void OP_(enum count count, const char *name, int min, int max, int number, const char *prototype, int flags) +{ + int v = zversion > 6 ? 5 : zversion; + int e; + + if(v < min || v > max) return; + + /* An extra slot at the end of the opcodes list is reserved as a + * sentinel: the “name” member will be NULL. + */ + if(nopcodes >= (sizeof opcodes / sizeof *opcodes) - 1) errx("internal error: opcode overflow"); + + opcodes[nopcodes] = (struct opcode){ .count = count, .name = name, .number = number, .prototype = prototype, .flags = flags }; + + e = regcomp(&opcodes[nopcodes].re, opcodes[nopcodes].prototype, REG_EXTENDED | REG_NOSUB); + if(e != 0) + { + char emsg[256]; + regerror(e, &opcodes[nopcodes].re, emsg, sizeof emsg); + errx("error compiling %s: %s", opcodes[nopcodes].prototype, emsg); + } + + nopcodes++; +} + +#define OP(count, name, min, max, number, prototype, flags) OP_(count, name, min, max, number, "^" prototype "$", flags) + +#define ZEROOP(...) OP(C_ZERO, __VA_ARGS__) +#define ONEOP(...) OP(C_ONE, __VA_ARGS__) +#define TWOOP(...) OP(C_TWO, __VA_ARGS__) +#define VAROP(...) OP(C_VAR, __VA_ARGS__) +#define EXTOP(...) OP(C_EXT, __VA_ARGS__) + +/* Convenience macros. The arguments to most opcodes can be of any + * type, so provide easy ways to accomplish this. + */ +#define NONE "" +#define ONE "[vn]" +#define TWO "[vn][vn]" +#define THREE "[vn][vn][vn]" +#define FOUR "[vn][vn][vn][vn]" +#define BRANCH "\\?" +#define STORE ">v" + +static void setup_opcodes(void) +{ + ZEROOP("rtrue", 1, 6, 0x00, NONE, 0); + ZEROOP("rfalse", 1, 6, 0x01, NONE, 0); + ZEROOP("nop", 1, 6, 0x04, NONE, 0); + ZEROOP("save", 1, 3, 0x05, BRANCH, 0); + ZEROOP("save", 4, 4, 0x05, STORE, 0); + ZEROOP("restore", 1, 3, 0x06, BRANCH, 0); + ZEROOP("restore", 4, 4, 0x06, STORE, 0); + ZEROOP("restart", 1, 6, 0x07, NONE, 0); + ZEROOP("ret_popped", 1, 6, 0x08, NONE, 0); + ZEROOP("pop", 1, 4, 0x09, NONE, 0); + ZEROOP("catch", 5, 6, 0x09, NONE STORE, 0); + ZEROOP("quit", 1, 6, 0x0a, NONE, 0); + ZEROOP("new_line", 1, 6, 0x0b, NONE, 0); + ZEROOP("show_status", 3, 3, 0x0c, NONE, 0); + ZEROOP("verify", 3, 6, 0x0d, NONE BRANCH, 0); + ZEROOP("piracy", 5, 6, 0x0f, NONE BRANCH, 0); + + ONEOP("jz", 1, 6, 0x00, ONE BRANCH, 0); + ONEOP("get_sibling", 1, 6, 0x01, ONE BRANCH STORE, 0); + ONEOP("get_child", 1, 6, 0x02, ONE BRANCH STORE, 0); + ONEOP("get_parent", 1, 6, 0x03, ONE STORE, 0); + ONEOP("get_prop_len", 1, 6, 0x04, ONE STORE, 0); + ONEOP("inc", 1, 6, 0x05, "v", F_INDIRECT); + ONEOP("dec", 1, 6, 0x06, "v", F_INDIRECT); + ONEOP("print_addr", 1, 6, 0x07, ONE, 0); + ONEOP("call_1s", 4, 6, 0x08, ONE STORE, 0); + ONEOP("remove_obj", 1, 6, 0x09, ONE, 0); + ONEOP("print_obj", 1, 6, 0x0a, ONE, 0); + ONEOP("ret", 1, 6, 0x0b, ONE, 0); + ONEOP("jump", 1, 6, 0x0c, BRANCH, F_JUMP); + ONEOP("print_paddr", 1, 6, 0x0d, ONE, 0); + ONEOP("load", 1, 6, 0x0e, "v" STORE, F_INDIRECT); + ONEOP("not", 1, 4, 0x0f, ONE STORE, 0); + ONEOP("call_1n", 5, 6, 0x0f, ONE, 0); + + TWOOP("je", 1, 6, 0x01, "[vn][vn]{0,3}" BRANCH, F_2VAR); + TWOOP("jl", 1, 6, 0x02, TWO BRANCH, 0); + TWOOP("jg", 1, 6, 0x03, TWO BRANCH, 0); + TWOOP("dec_chk", 1, 6, 0x04, "v[vn]" BRANCH, F_INDIRECT); + TWOOP("inc_chk", 1, 6, 0x05, "v[vn]" BRANCH, F_INDIRECT); + TWOOP("jin", 1, 6, 0x06, TWO BRANCH, 0); + TWOOP("test", 1, 6, 0x07, TWO BRANCH, 0); + TWOOP("or", 1, 6, 0x08, TWO STORE, 0); + TWOOP("and", 1, 6, 0x09, TWO STORE, 0); + TWOOP("test_attr", 1, 6, 0x0a, TWO BRANCH, 0); + TWOOP("set_attr", 1, 6, 0x0b, TWO, 0); + TWOOP("clear_attr", 1, 6, 0x0c, TWO, 0); + TWOOP("store", 1, 6, 0x0d, "v[vn]", F_INDIRECT); + TWOOP("insert_obj", 1, 6, 0x0e, TWO, 0); + TWOOP("loadw", 1, 6, 0x0f, TWO STORE, 0); + TWOOP("loadb", 1, 6, 0x10, TWO STORE, 0); + TWOOP("get_prop", 1, 6, 0x11, TWO STORE, 0); + TWOOP("get_prop_addr", 1, 6, 0x12, TWO STORE, 0); + TWOOP("get_next_prop", 1, 6, 0x13, TWO STORE, 0); + TWOOP("add", 1, 6, 0x14, TWO STORE, 0); + TWOOP("sub", 1, 6, 0x15, TWO STORE, 0); + TWOOP("mul", 1, 6, 0x16, TWO STORE, 0); + TWOOP("div", 1, 6, 0x17, TWO STORE, 0); + TWOOP("mod", 1, 6, 0x18, TWO STORE, 0); + TWOOP("call_2s", 4, 6, 0x19, TWO STORE, 0); + TWOOP("call_2n", 5, 6, 0x1a, TWO, 0); + TWOOP("set_colour", 5, 5, 0x1b, TWO, 0); + TWOOP("set_colour", 6, 6, 0x1b, "[vn][vn][vn]?", F_2VAR); + TWOOP("throw", 5, 6, 0x1c, TWO, 0); + + VAROP("call_vs", 1, 6, 0x00, "[vn][vn]{0,3}" STORE, 0); + VAROP("storew", 1, 6, 0x01, THREE, 0); + VAROP("storeb", 1, 6, 0x02, THREE, 0); + VAROP("put_prop", 1, 6, 0x03, THREE, 0); + VAROP("read", 1, 3, 0x04, TWO, 0); + VAROP("read", 4, 4, 0x04, "[vn][vn]([vn][vn])?", 0); + VAROP("read", 5, 6, 0x04, "[vn][vn]([vn][vn])?" STORE, 0); + VAROP("print_char", 1, 6, 0x05, ONE, 0); + VAROP("print_num", 1, 6, 0x06, ONE, 0); + VAROP("random", 1, 6, 0x07, ONE STORE, 0); + VAROP("push", 1, 6, 0x08, ONE, 0); + VAROP("pull", 1, 5, 0x09, "v", F_INDIRECT); + VAROP("pull", 6, 6, 0x09, "[vn]?" STORE, 0); + VAROP("split_window", 3, 6, 0x0a, ONE, 0); + VAROP("set_window", 3, 6, 0x0b, ONE, 0); + VAROP("call_vs2", 4, 6, 0x0c, "[vn][vn]{0,7}" STORE, F_DOUBLE); + VAROP("erase_window", 4, 6, 0x0d, ONE, 0); + VAROP("erase_line", 4, 6, 0x0e, ONE, 0); + VAROP("set_cursor", 4, 5, 0x0f, TWO, 0); + VAROP("set_cursor", 6, 6, 0x0f, THREE, 0); + VAROP("get_cursor", 4, 6, 0x10, ONE, 0); + VAROP("set_text_style", 4, 6, 0x11, ONE, 0); + VAROP("buffer_mode", 4, 6, 0x12, ONE, 0); + VAROP("output_stream", 3, 4, 0x13, ONE, 0); + VAROP("output_stream", 5, 5, 0x13, "[vn][vn]?", 0); + VAROP("output_stream", 6, 6, 0x13, "[vn][vn]{0,2}", 0); + VAROP("input_stream", 3, 6, 0x14, ONE, 0); + VAROP("sound_effect", 3, 6, 0x15, FOUR, 0); + VAROP("read_char", 4, 6, 0x16, "[vn]([vn][vn])?" STORE, 0); + VAROP("scan_table", 4, 6, 0x17, "[vn][vn][vn][vn]?" BRANCH STORE, 0); + VAROP("not", 5, 6, 0x18, ONE STORE, 0); + VAROP("call_vn", 5, 6, 0x19, "[vn][vn]{0,3}", 0); + VAROP("call_vn2", 5, 6, 0x1a, "[vn][vn]{0,7}", F_DOUBLE); + VAROP("tokenise", 5, 6, 0x1b, FOUR, 0); + VAROP("encode_text", 5, 6, 0x1c, FOUR, 0); + VAROP("copy_table", 5, 6, 0x1d, THREE, 0); + VAROP("print_table", 5, 6, 0x1e, "[vn][vn][vn]{0,2}", 0); + VAROP("check_arg_count", 5, 6, 0x1f, ONE BRANCH, 0); + + EXTOP("save", 5, 6, 0x00, "([vn]{3})?" STORE, 0); + EXTOP("restore", 5, 6, 0x01, "([vn]{3})?" STORE, 0); + EXTOP("log_shift", 5, 6, 0x02, TWO STORE, 0); + EXTOP("art_shift", 5, 6, 0x03, TWO STORE, 0); + EXTOP("set_font", 5, 6, 0x04, ONE STORE, 0); + EXTOP("draw_picture", 6, 6, 0x05, "[vn][vn]{0,2}", 0); + EXTOP("picture_data", 6, 6, 0x06, TWO BRANCH, 0); + EXTOP("erase_picture", 6, 6, 0x07, THREE, 0); + EXTOP("set_margins", 6, 6, 0x08, THREE, 0); + EXTOP("save_undo", 5, 6, 0x09, STORE, 0); + EXTOP("restore_undo", 5, 6, 0x0a, STORE, 0); + EXTOP("print_unicode", 5, 6, 0x0b, ONE, 0); + EXTOP("check_unicode", 5, 6, 0x0c, ONE STORE, 0); + EXTOP("set_true_colour", 5, 5, 0x0d, TWO, 0); + EXTOP("set_true_colour", 6, 6, 0x0d, THREE, 0); + EXTOP("move_window", 6, 6, 0x10, THREE, 0); + EXTOP("window_size", 6, 6, 0x11, THREE, 0); + EXTOP("window_style", 6, 6, 0x12, "[vn][vn][vn]?", 0); + EXTOP("get_wind_prop", 6, 6, 0x13, TWO STORE, 0); + EXTOP("scroll_window", 6, 6, 0x14, TWO, 0); + EXTOP("pop_stack", 6, 6, 0x15, "[vn][vn]?", 0); + EXTOP("read_mouse", 6, 6, 0x16, ONE, 0); + EXTOP("mouse_window", 6, 6, 0x17, "[vn]?", 0); + EXTOP("push_stack", 6, 6, 0x18, TWO BRANCH, 0); + EXTOP("put_wind_prop", 6, 6, 0x19, THREE, 0); + EXTOP("print_form", 6, 6, 0x1a, ONE, 0); + EXTOP("make_menu", 6, 6, 0x1b, TWO BRANCH, 0); + EXTOP("picture_table", 6, 6, 0x1c, ONE, 0); + + /* Zoom extensions. */ + EXTOP("start_timer", 5, 6, 0x80, NONE, 0); + EXTOP("stop_timer", 5, 6, 0x81, NONE, 0); + EXTOP("read_timer", 5, 6, 0x82, STORE, 0); + EXTOP("print_timer", 5, 6, 0x83, NONE, 0); +} + +#undef NONE +#undef ONE +#undef TWO +#undef THREE +#undef FOUR +#undef BRANCH +#undef STORE + +#undef ZEROOP +#undef ONEOP +#undef TWOOP +#undef VAROP +#undef EXTOP +#undef OP + +static char *xstrdup(const char *s) +{ + char *r; + size_t l; + + l = strlen(s); + + r = malloc(l + 1); + if(r == NULL) err("malloc"); + + memcpy(r, s, l + 1); + + return r; +} + +/* Each operand to an opcode is represented by a “struct arg”. + * There are four types: + * • none, indicating no argument + * • jump, indicating an argument that is a label + * • numeric, indicating a constant (large or small) + * • variable, indicating a variable. + * + * jump_type indicates whether the jump is a routine, label, or branch. + * + * value indicates the value for a numeric or variable (for a variable, + * 0 is the stack pointer, 1 is local var 1, etc., as in the standard.) + * + * name is the name of the label to branch to. + * + * small is true if this is a one-byte (6-bit) offset as opposed to a + * two-byte (14-bit) offset. + * + * invert is true if the argument is a branch target and the test should + * be inverted (branch on false). + * + * ret is set to 0 or 1 if the branch, instead of branching, should + * return false or true; this value should be checked iff name is NULL. + */ +struct arg +{ + enum { A_NONE, A_JUMP, A_NUMERIC, A_VARIABLE } type; + enum jump_type { J_BRANCH, J_JUMP, J_LABEL, J_PACKED } jump_type; + + uint16_t value; + char *name; + + int small; + int invert; + int ret; +}; + +/* Information on each label that’s found (either a label for jumping + * to, or a routine name). + * “name” is the name, and “addr” is the location of the label. Note + * that a single list is used so routines and labels cannot have the + * same name. + */ +struct label +{ + const char *name; + uint32_t addr; + + struct label *next; +}; + +static struct label *labels = NULL; + +static void add_label(const char *name, uint32_t addr) +{ + struct label *new; + + for(new = labels; new != NULL; new = new->next) + { + if(strcmp(new->name, name) == 0) die("label %s already exists", name); + } + + new = malloc(sizeof *new); + if(new == NULL) err("malloc"); + + new->name = xstrdup(name); + new->addr = addr; + + new->next = labels; + labels = new; +} + +/* Each jump struct represents a jump: either a routine call, a jump, or + * a branch instruction. + * “from” is the address whence the jump occurs, and “to” is the label + * to which a jump is requested. + * If the type is a branch, “small” is set if this is a one-byte (6-bit) + * offset, and “invert” is set if the branch should be taken in the + * false case. + */ +struct jump +{ + enum jump_type type; + uint32_t from; + const char *to; + + int small; + int invert; + + char *file; + char *line; + int lineno; + + struct jump *next; +}; + +static struct jump *jumps = NULL; + +static void add_jump(struct arg jump) +{ + struct jump *new; + + if(jump.jump_type == J_BRANCH && jump.name == NULL) + { + BYTE((!jump.invert << 7) | 0x40 | jump.ret); + return; + } + + /* @jump ? is special-cased because the syntax is natural, but + * allowing ? also allows ?0, ?~, and %, so catch those errors here. + */ + if(jump.jump_type == J_JUMP) + { + if(jump.name == NULL || jump.invert || jump.small) die("syntax: jump ?Label"); + } + + new = malloc(sizeof *new); + if(new == NULL) err("malloc"); + + new->type = jump.jump_type; + new->from = TELL(); + new->to = xstrdup(jump.name); + if(jump.jump_type == J_BRANCH) + { + new->small = jump.small; + new->invert = jump.invert; + } + + new->file = xstrdup(current_file); + new->line = xstrdup(current_line); + new->lineno = current_lineno; + + new->next = jumps; + jumps = new; + + if(jump.small) BYTE(0); + else WORD(0); +} + +static void apply_jumps(void) +{ + for(struct jump *j = jumps; j != NULL; j = j->next) + { + int found = 0; + + current_file = j->file; + strcpy(current_line, j->line); /* guaranteed to be large enough because j->line was originally copied from current_line. */ + current_lineno = j->lineno; + + for(struct label *l = labels; l != NULL; l = l->next) + { + if(strcmp(l->name, j->to) == 0) + { + found = 1; + switch(j->type) + { + uint16_t offset; + long long tempo; + + case J_BRANCH: + tempo = (long long)l->addr - (long long)j->from; + + if(j->small) + { + /* The offset needs to have two added to it (decoding the + * offset is done as PC = PC + Offset - 2); at this point + * j->from is at the part right where the offset is + * written. It is therefore, if the offset is 16 bits, + * already two bytes farther than is “correct”, or if the + * offset is 8 bits, one byte farther; so in this case one + * needs to be added on. + * + * 61 01 01 c2 + * ^ ^ + * + * 61 is @je, 01 01 is L01 L01. The first arrow points to + * where the offset is stored, which is also where j->from + * is currently pointing. The second arrow is the + * location whence the offset should be counted. Since + * the offset is calculated by using j->from as the + * starting address, it is already one byte longer than is + * the “actual” offset, which means it it one byte shorter + * than the *stored* offset needs to be. Adding one gives + * the proper value. + */ + tempo++; + + if(tempo < 0 || tempo > 63) die("offset (%lld) does not fit into unsigned 6-bit value", tempo); + + offset = tempo; + offset &= 0x3f; + + offset |= 0x40; + if(!j->invert) offset |= 0x80; + + PBYTE(offset, j->from); + } + else + { + /* A quick example of the offset wackiness as described + * above, for a 14-bit offset. + * + * 61 01 01 80 02 + * ^ ^ + * + * Here again the first arrow is where j->from is + * pointing, whereas the second arrow is whence the offset + * is counted. This time, the two bytes needed for the + * stored value are already present if j->from is used as + * the starting point, so nothing special needs to be + * done. + */ + if(tempo < -8192 || tempo > 8191) die("offset (%lld) does not fit into signed 14-bit value", tempo); + + offset = tempo; + offset &= 0x3fff; + if(!j->invert) offset |= 0x8000; + + PWORD(offset, j->from); + } + + break; + case J_JUMP: + tempo = (long long)l->addr - (long long)j->from; + if(tempo < INT16_MIN || tempo > INT16_MAX) die("offset (%lld) does not fit into signed 16-bit value", tempo); + + PWORD(tempo, j->from); + + break; + case J_LABEL: + if(l->addr > UINT16_MAX) die("address of %s is too large (%lx)", l->name, (unsigned long)l->addr); + PWORD(l->addr, j->from); + + break; + case J_PACKED: + if(pack(l->addr) > UINT16_MAX) die("address of %s is too large to pack (%lx -> %lx)", l->name, (unsigned long)l->addr, (unsigned long)pack(l->addr)); + PWORD(pack(l->addr), j->from); + + break; + } + + break; + } + } + + if(!found) die("no label %s", j->to); + } +} + +static struct arg NONE(void) { return (struct arg){ .type = A_NONE }; } +static struct arg N(uint16_t n) { return (struct arg){ .type = A_NUMERIC, .value = n }; } +static struct arg SP(void) { return (struct arg){ .type = A_VARIABLE, .value = 0 }; } +static struct arg L(uint8_t n) { return (struct arg){ .type = A_VARIABLE, .value = n + 0x01 }; } +static struct arg G(uint8_t n) { return (struct arg){ .type = A_VARIABLE, .value = n + 0x10 }; } +static struct arg LBL(const char *n) { return (struct arg){ .type = A_JUMP, .jump_type = J_LABEL, .name = xstrdup(n) }; } +static struct arg PCK(const char *n) { return (struct arg){ .type = A_JUMP, .jump_type = J_PACKED, .name = xstrdup(n) }; } + +static struct arg BRANCH(const char *n, int small) +{ + struct arg arg = { .type = A_JUMP, .jump_type = J_BRANCH, .small = small }; + + if(*n == '~') + { + arg.invert = 1; + n++; + } + + if(*n == '0' || *n == '1') arg.ret = *n - '0'; + else arg.name = xstrdup(n); + + return arg; +} + +static uint8_t make_type(struct arg arg) +{ + switch(arg.type) + { + case A_NONE: + return 3; /* omitted */ + + case A_NUMERIC: + if(arg.value <= 255) return 1; /* small constant */ + else return 0; /* large constant */ + + case A_JUMP: + return 0; /* large constant */ + + case A_VARIABLE: + return 2; /* variable */ + + default: + die("invalid type: %d", (int)arg.type); + } +} + +static void write_arg(struct arg arg) +{ + if(arg.type == A_NONE) return; + + else if(arg.type == A_JUMP) add_jump(arg); + + else switch(make_type(arg)) + { + case 0: /* large constant */ + WORD(arg.value); + break; + case 1: /* small constant */ + case 2: /* variable */ + BYTE(arg.value); + break; + } +} + +static void make(const struct opcode *op, int znargs, struct arg zargs[], struct arg branch) +{ + uint8_t varbyte = 0xe0; + int count = op->count; + + /* Special case for a few opcodes. + * These require the first argument to be a reference to a variable, + * so a small constant value of 1 would mean local variable 1; + * however, the user should be able to use L01, SP, etc to refer to + * them. Thus rewrite the first argument for these few opcodes. + */ + if(op->flags & F_INDIRECT) + { + zargs[0] = N(zargs[0].value); + } + + /* @jump takes a label but it doesn’t act like a branch: it’s a 16-bit + * offset, not 14-bit; the top two bits are not special as they are in + * branch instructions. When parsing, ?Foo is treated as a branch, + * but it should be acceptable to @jump, so rewrite it. + */ + if(op->flags & F_JUMP) + { + BYTE(0x8c); + branch.jump_type = J_JUMP; + add_jump(branch); + return; + } + + /* @je and @set_colour are both 2OP, but can take a variable number of + * operands. If there are not two operands, these should be assembled + * as a variable VAR would, except the top bits will be 110 instead of + * 111, indicating 2OP. + */ + if((op->flags & F_2VAR) && znargs != 2) + { + varbyte = 0xc0; + count = C_VAR; + } + + /* All 0OP are short */ + if(count == C_ZERO) + { + if(znargs != 0) die("0OP called with arguments"); + + BYTE(0xb0 | op->number); + } + /* All 1OP are short */ + else if(count == C_ONE) + { + BYTE(0x80 | + (make_type(zargs[0]) << 4) | + op->number + ); + write_arg(zargs[0]); + } + /* 2OP are either long (if no large constant is required) or variable (if one is) */ + else if(count == C_TWO) + { + int type1, type2; + + type1 = make_type(zargs[0]); + type2 = make_type(zargs[1]); + + /* Variable. */ + if(type1 == 0 || type2 == 0) + { + BYTE(0xc0 | op->number); + BYTE( (type1 << 6 ) | + (type2 << 4 ) | + 0xf + ); + } + /* Long. */ + else + { + BYTE( ((type1 - 1) << 6) | + ((type2 - 1) << 5) | + op->number + ); + } + write_arg(zargs[0]); + write_arg(zargs[1]); + } + /* VAR are all variable form */ + else if(count == C_VAR || count == C_EXT) + { + struct arg args[8] = { NONE(), NONE(), NONE(), NONE(), NONE(), NONE(), NONE(), NONE() }; + + for(int i = 0; i < znargs; i++) args[i] = zargs[i]; + + if(count == C_EXT) + { + BYTE(0xbe); + BYTE(op->number); + } + else + { + BYTE(varbyte | op->number); + } + + BYTE((make_type(args[0]) << 6) | + (make_type(args[1]) << 4) | + (make_type(args[2]) << 2) | + (make_type(args[3]) << 0)); + + if(op->flags & F_DOUBLE) + { + BYTE((make_type(args[4]) << 6) | + (make_type(args[5]) << 4) | + (make_type(args[6]) << 2) | + (make_type(args[7]) << 0)); + } + + write_arg(args[0]); + write_arg(args[1]); + write_arg(args[2]); + write_arg(args[3]); + + if(op->flags & F_DOUBLE) + { + write_arg(args[4]); + write_arg(args[5]); + write_arg(args[6]); + write_arg(args[7]); + } + } + + if(strchr(op->prototype, '>') != NULL) BYTE(zargs[znargs].value); /* guaranteed to be a variable due to the pattern matching */ + + if(branch.type == A_JUMP) add_jump(branch); +} + +static int stol(const char *s, int base, long *v, long min, long max) +{ + char *endp; + + *v = strtol(s, &endp, base); + + if(endp == s || *endp != 0 || *v < min || *v > max) return 0; + + return 1; +} + +static int started = 0; + +struct directive +{ + const char *name; + void (*proc)(int, const char **); +}; + +static void label_directive(int nargs, const char **args) +{ + if(nargs != 2) die("invalid label syntax"); + if(args[1][0] >= '0' && args[1][0] <= '9') die("label names cannot start with a digit"); + add_label(args[1], TELL()); +} + +static void alabel_directive(int nargs, const char **args) +{ + SEEKALIGN(); + label_directive(nargs, args); +} + +static void routine_directive(int nargs, const char **args) +{ + long val; + + if(nargs < 3) die("invalid routine syntax"); + if(args[1][0] >= '0' && args[1][0] <= '9') die("routine names cannot start with a digit"); + SEEKALIGN(); + add_label(args[1], TELL()); + if(!stol(args[2], 10, &val, 0, 15)) die("invalid number of locals (must be a number between 0 and 15)"); + BYTE(val); + + if(zversion <= 4) + { + for(int i = 3; i < nargs; i++) + { + long local; + + if(!stol(args[i], 0, &local, INT16_MIN, UINT16_MAX)) die("invalid local (must be a number between %ld and %ld", (long)INT16_MIN, (long)UINT16_MAX); + + WORD(local); + + val--; + } + + if(val < 0) die("too many local values provided"); + + for(long i = 0; i < val; i++) WORD(0); + } + else + { + if(nargs != 3) die("only V1-4 allow initial local values to be provided"); + } +} + +static void byte_directive(int nargs, const char **args) +{ + long val; + + for(int i = 1; i < nargs; i++) + { + if(!stol(args[i], 16, &val, 0, 255)) die("invalid byte %s (must be a number between 0x00 and 0xff)", args[i]); + BYTE(val); + } +} + +static void align_directive(int nargs, const char **args) +{ + if(nargs == 1) + { + SEEKALIGN(); + } + else if(nargs == 2) + { + long val; + + if(!stol(args[1], 0, &val, 0, LONG_MAX)) die("invalid alignment %s (must be a positive number)", args[1]); + + SEEK(roundup(TELL(), val), SEEK_SET); + } + else + { + die("syntax: align [alignment]"); + } +} + +static void seek_directive(int nargs, const char **args) +{ + long val; + + if(nargs != 2) die("syntax: seek bytes"); + if(!stol(args[1], 0, &val, 0, max_size())) die("invalid seek %s (must be a positive number less than %lu)", args[1], max_size()); + SEEK(val, SEEK_CUR); +} + +static void seeknop_directive(int nargs, const char **args) +{ + long val; + uint8_t *buffer; + + if(nargs != 2) die("syntax: seeknop bytes"); + if(!stol(args[1], 0, &val, 0, max_size())) die("invalid seek %s (must be a positive number less than %lu)", args[1], max_size()); + + /* Try to write the block out in one fell swoop. */ + buffer = malloc(val); + if(buffer != NULL) + { + memset(buffer, 0xb4, val); + WRITE(buffer, val); + free(buffer); + } + else + { + for(long i = 0; i < val; i++) BYTE(0xb4); + } +} + +static void status_directive(int nargs, const char **args) +{ + if(zversion != 3) die("status type can only be set in V3"); + if(nargs != 2) die("syntax: status (score|time)"); + + if (strcmp(args[1], "score") == 0) PBYTE(0x00, 0x01); + else if(strcmp(args[1], "time") == 0) PBYTE(0x02, 0x01); + else die("syntax: status (score|time)"); +} + +static void start_directive(int nargs, const char **args) +{ + if(started) die("only one start directive can be used"); + started = 1; + + if(TELL() > UINT16_MAX) errx("dynamic memory overflow"); + + PWORD(TELL(), 0x04); /* base of high memory */ + PWORD(TELL(), 0x0e); /* overlap static and high for now */ + + /* Dictionary. */ + PWORD(TELL(), 0x08); + BYTE(0x00); + BYTE(0x06); + WORD(0x0000); + + /* Temp hack to put packed fuctions above address 255 */ + SEEK(0x3450, SEEK_SET); + + /* Packed address of initial routine (V6) or initial PC value (otherwise). */ + if(zversion == 6) + { + SEEKALIGN(); + PWORD(pack(TELL()), 0x06); + routine_directive(nargs, args); + } + else + { + PWORD(TELL(), 0x06); + } +} + +#define DIRECTIVE(name_) { .name = #name_, .proc = name_##_directive } + +static const struct directive directives[] = +{ + DIRECTIVE(label), + DIRECTIVE(alabel), + DIRECTIVE(routine), + DIRECTIVE(byte), + DIRECTIVE(align), + DIRECTIVE(seek), + DIRECTIVE(seeknop), + DIRECTIVE(status), + DIRECTIVE(start), + + { NULL }, +}; + +#undef DIRECTIVE + +static void parse_args(int nargs, const char **args) +{ + int n = 0; + char prototype[nargs]; + int znargs = 0; + struct arg zargs[nargs]; + struct arg branch = NONE(); + const struct opcode *op; + long val; + + for(size_t i = 0; i < ASIZE(zargs); i++) zargs[i] = NONE(); + + for(op = opcodes; op->name != NULL; op++) + { + if(strcmp(op->name, args[0]) == 0) break; + } + + if(op->name == NULL) + { + const struct directive *dir; + + for(dir = directives; dir->name != NULL; dir++) + { + if(strcmp(dir->name, args[0]) == 0) break; + } + + if(dir->name == NULL) die("invalid instruction: %s", args[0]); + + dir->proc(nargs, args); + + return; + } + + for(int i = 1; i < nargs; i++) + { + if(strcmp(args[i], "sp") == 0 || strcmp(args[i], "SP") == 0) + { + prototype[n++] = 'v'; + zargs[znargs++] = SP(); + } + else if(args[i][0] == 'G') + { + prototype[n++] = 'v'; + if(!stol(&args[i][1], 16, &val, 0, 239)) die("invalid global %s (must be a number between 0x00 and 0xef)", args[i]); + zargs[znargs++] = G(val); + } + else if(args[i][0] == 'L') + { + prototype[n++] = 'v'; + if(!stol(&args[i][1], 10, &val, 0, 15)) die("invalid local %s (must be a number between 0 and 15)", args[i]); + zargs[znargs++] = L(val); + } + else if(args[i][0] == '?') + { + prototype[n++] = '?'; + branch = BRANCH(&args[i][1], 0); + } + else if(args[i][0] == '%') + { + prototype[n++] = '?'; + branch = BRANCH(&args[i][1], 1); + } + else if(args[i][0] == '!') + { + prototype[n++] = 'n'; + zargs[znargs++] = PCK(&args[i][1]); + } + else if(args[i][0] == '&') + { + prototype[n++] = 'n'; + zargs[znargs++] = LBL(&args[i][1]); + } + else if(strcmp(args[i], "->") == 0) + { + prototype[n++] = '>'; + } + else + { + if(!stol(args[i], 0, &val, LONG_MIN, LONG_MAX)) die("syntax error: %s", args[i]); + + if(val < INT16_MIN || val > UINT16_MAX) die("number out of range: must be in the range %ld to %ld", (long)INT16_MIN, (long)UINT16_MAX); + + zargs[znargs++] = N(val); + + prototype[n++] = 'n'; + } + } + + prototype[n] = 0; + + /* If there is a store, don't count the target variable as an + * argument, because it's not one. In the case of a store, + * zargs[znargs] will be the variable in which to store. + */ + if(strchr(prototype, '>') != NULL) znargs--; + + if(regexec(&op->re, prototype, 0, NULL, 0) != 0) die("no matching pattern: expected %s, found %s", op->prototype, prototype); + + make(op, znargs, zargs, branch); + + for(size_t i = 0; i < ASIZE(zargs); i++) free(zargs[i].name); + + free(branch.name); +} + +static uint8_t atable[26 * 3] = +{ + /* A0 */ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + + /* A1 */ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + + /* A2 */ + 0x0, '^', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', + ',', '!', '?', '_', '#', '\'','"', '/', '\\','-', ':', '(', ')', +}; + +static long etable_unicode_addr; + +static int in_atable(int c) +{ + /* 52 is A2 character 6, which is special and should not be matched, + * so skip over it. + */ + for(int i = 0; i < 52 ; i++) if(atable[i] == c) return i; + for(int i = 53; i < 26 * 3; i++) if(atable[i] == c) return i; + + return -1; +} + +/* Encode a string, passing each Z-character (and its index) to “cb”. */ +static int encode_string_backend(const uint16_t *s, size_t len, void (*cb)(uint16_t, int)) +{ + const int shiftbase = zversion <= 2 ? 1 : 3; + int n = 0; + + for(size_t i = 0; i < len; i++) + { + int pos = in_atable(s[i]); + + if(zversion == 1 && s[i] == '^') + { + cb(1, n++); + } + else if(pos >= 0) + { + int shift = pos / 26; + int c = pos % 26; + + if(shift) cb(shiftbase + shift, n++); + cb(c + 6, n++); + } + else if(s[i] == ' ') + { + cb(0, n++); + } + else + { + cb(shiftbase + 2, n++); + cb(6, n++); + + if(s[i] > 126) + { + uint8_t c = unicode_to_zscii(s[i]); + + cb(c >> 5, n++); + cb(c & 0x1f, n++); + } + else + { + cb(s[i] >> 5, n++); + cb(s[i] & 0x1f, n++); + } + } + } + + /* Pad */ + while(n == 0 || (n % 3) != 0) cb(5, n++); + + /* Convert Z-character count to bytes. */ + return 2 * (n / 3); +} + +/* Stub function, used to calculate how long an encoded string will be. */ +static void encode_length_cb(uint16_t c, int n) +{ +} + +static uint16_t GET_WORD(uint8_t *base) +{ + return (base[0] << 8) | base[1]; +} +static void MAKE_WORD(uint8_t *base, uint16_t val) +{ + base[0] = val >> 8; + base[1] = val & 0xff; +} + +static uint8_t *encoded; +static void encode_string_cb(uint16_t c, int n) +{ + uint16_t w = GET_WORD(&encoded[2 * (n / 3)]); + + w |= (c & 0x1f) << (5 * (2 - (n % 3))); + + MAKE_WORD(&encoded[2 * (n / 3)], w); +} +static int encode_string(const uint16_t *s, size_t len) +{ + int enclen; + + free(encoded); + + enclen = encode_string_backend(s, len, encode_length_cb); + encoded = calloc(enclen, 1); + if(encoded == NULL) err("calloc"); + + encode_string_backend(s, len, encode_string_cb); + + /* Mark the end */ + encoded[enclen - 2] |= 0x80; + + WRITE(encoded, enclen); + + return enclen; +} + +static int print_handler(const char *string) +{ + uint16_t utf[strlen(string)]; + size_t n; + + n = decode_utf8(string, utf); + + return encode_string(utf, n); +} + +/* Write out an object name, which is a byte describing how long the + * (encoded) name is, followed by the encoded name. + */ +static void object_name(const char *name) +{ + int bytes; + + BYTE(0); /* placeholder for text-length */ + bytes = print_handler(name); + if(bytes > 255) die("object name too long"); + PBYTE(bytes, TELL() - bytes - 1); /* write text-length */ +} + +static void start_file(const char *filename) +{ + fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0644); + if(fd == -1) err("out.z5"); + + /* Zero the header out. */ + for(int i = 0; i < 64; i++) PBYTE(0x00, i); + + PBYTE(zversion, 0x00); /* version */ + PWORD(release, 0x02); /* release */ + PWRITE(serial, 6, 0x12); /* serial number */ + + /* Alphabet table. */ + SEEK(0x40, SEEK_SET); + PWORD(TELL(), 0x34); + WRITE(atable, 26 * 3); + + /* Header extension table. */ + if(zversion >= 5) + { + PWORD(TELL(), 0x36); + WORD(0x0003); + WORD(0x0000); + WORD(0x0000); + etable_unicode_addr = TELL(); + WORD(0x0000); + } + + /* Globals. */ + PWORD(TELL(), 0x0c); + + /* The first global needs to be set to a valid object for show_status; + * point it to the default object. In the future, when objects are + * fully supported, this should be configurable by the user. + */ + if(zversion <= 3) + { + WORD(0x0001); + SEEK(478, SEEK_CUR); + } + else + { + SEEK(480, SEEK_CUR); + } + + /* Property defaults table. */ + PWORD(TELL(), 0x0a); + SEEK(zversion <= 3 ? 62 : 126, SEEK_CUR); + + if(zversion == 6 || zversion == 7) + { + PWORD(0x0000, 0x28); /* Routines offset. */ + PWORD(0x0000, 0x2a); /* Static strings offset. */ + } + + /* Object table (just one object is created for now). */ + PWORD(TELL(), 0x0a); + + /* Object 1. */ + if(zversion <= 3) + { + for(int i = 0; i < 31; i++) WORD(0x0000); /* Property defaults table. */ + for(int i = 0; i < 2; i++) WORD(0x0000); /* Attribute flags. */ + for(int i = 0; i < 3; i++) BYTE(0x00); /* Parent, sibling, child. */ + WORD(TELL() + 2); /* Properties. */ + } + else + { + for(int i = 0; i < 63; i++) WORD(0x0000); /* Property defaults table. */ + for(int i = 0; i < 3; i++) WORD(0x0000); /* Attribute flags. */ + for(int i = 0; i < 3; i++) WORD(0x0000); /* Parent, sibling, child. */ + WORD(TELL() + 2); /* Properties. */ + } + + /* Property table (just the name and a terminating marker). */ + object_name("Default object"); + BYTE(0); +} + +static void end_file(void) +{ + ssize_t n; + size_t file_size; + unsigned char buf[8192]; + uint16_t checksum = 0; + + /* Unicode table. */ + if(unicode_index != 0) + { + PWORD(TELL(), etable_unicode_addr); + BYTE(unicode_index); + for(int i = 0; i < unicode_index; i++) WORD(unicode_table[i]); + } + + if(zversion >= 3) + { + SEEK(0x40, SEEK_SET); + file_size = 0x40; + while((n = read(fd, buf, sizeof buf)) > 0) + { + for(ssize_t i = 0; i < n; i++) checksum += buf[i]; + + file_size += n; + } + + if(n < 0) err("read"); + + for(size_t i = 0; i < ALIGN(file_size) - file_size; i++) BYTE(0); + file_size = ALIGN(file_size); + + if(file_size > max_size()) errx("file size too large (%zu)", file_size); + + PWORD(file_size / (zversion == 3 ? 2 : zversion <= 5 ? 4 : 8), 0x1a); + PWORD(checksum, 0x1c); + } + + close(fd); +} + +static void process_file(FILE *fp, const char *fn) +{ + char buf[sizeof current_line]; + int saved_lineno; + + current_file = fn; + current_lineno = 0; + + while(fgets(buf, sizeof buf, fp) != NULL) + { + char *p; + int i; + const char *args[32]; + + *current_line = 0; + current_lineno++; + + p = strchr(buf, '\n'); + if(p == NULL) die("no newline; line too long?"); + *p = 0; + + strcpy(current_line, buf); + + p = strchr(buf, '#'); + if(p != NULL) *p = 0; + + if(buf[0] == 0) continue; + + /* @print and @print_ret must be handled here, before tokenization, + * because spaces are significant. The string directive is similar. + */ + if(strncmp(buf, "print ", 6) == 0) + { + BYTE(0xb2); + print_handler(strchr(buf, ' ') + 1); + continue; + } + + if(strncmp(buf, "print_ret ", 10) == 0) + { + BYTE(0xb3); + print_handler(strchr(buf, ' ') + 1); + continue; + } + + if(strncmp(buf, "string ", 7) == 0) + { + SEEKALIGN(); + print_handler(strchr(buf, ' ') + 1); + continue; + } + + p = strtok(buf, " \t"); + + /* This does NOT detect recursion! */ + if(strcmp(p, "include") == 0) + { + const char *file = strtok(NULL, ""); + FILE *fp2; + + if(file == NULL) die("bad line"); + + fp2 = fopen(file, "r"); + if(fp2 == NULL) die("fopen: %s", strerror(errno)); + + saved_lineno = current_lineno; + + process_file(fp2, file); + + fclose(fp2); + + current_file = fn; + current_lineno = saved_lineno; + + continue; + } + + i = 0; + args[i++] = p; + + for(p = strtok(NULL, " \t"); p != NULL; p = strtok(NULL, " \t")) + { + if(i == ASIZE(args)) die("too many tokens"); + args[i++] = p; + } + + parse_args(i, args); + } + + if(!started) errx("no start directive found"); +} + +int main(int argc, char **argv) +{ + int c; + const char *infile, *outfile = "out.z5"; + FILE *fp; + + if(strftime(serial, sizeof serial, "%y%m%d", localtime(&(time_t){ time(NULL) })) != 6) strcpy(serial, "000000"); + + while((c = getopt(argc, argv, "o:r:s:v:")) != -1) + { + switch(c) + { + case 'o': + outfile = optarg; + break; + case 'r': + if(!stol(optarg, 10, &release, 0, UINT16_MAX)) errx("release must be a number from 0 to %lu", (unsigned long)UINT16_MAX); + break; + case 's': + if(strlen(optarg) != 6) errx("serial number must be a six-digit string"); + strcpy(serial, optarg); + break; + case 'v': + if(!stol(optarg, 10, &zversion, 1, 8)) errx("invalid z-machine version: must be 1 to 8"); + break; + default: + exit(1); + } + } + + if(zversion == 1) memcpy(&atable[26 * 2], " 0123456789.,!?_#'\"/\\<-:()", 26); + + if(argc <= optind) exit(1); + + infile = argv[optind]; + + setup_opcodes(); + + start_file(outfile); + + fp = fopen(infile, "r"); + if(fp == NULL) err("%s", infile); + + process_file(fp, infile); + + fclose(fp); + + apply_jumps(); + + end_file(); + + return 0; +} diff --git a/zd/Makefile b/zd/Makefile new file mode 100644 index 0000000..435f458 --- /dev/null +++ b/zd/Makefile @@ -0,0 +1,8 @@ +include ../config.mk +include ../compiler.mk + +PROG= zd + +MACROS= -D_XOPEN_SOURCE=600 + +include ../prog.mk diff --git a/zd/README b/zd/README new file mode 100644 index 0000000..f393a6a --- /dev/null +++ b/zd/README @@ -0,0 +1,8 @@ +The man page documents the useful parts of the disassembler. + +The included interp.rb Ruby script can be used to extract action and preaction +routine addresses from the output of infodump -g, allowing zd to disassemble +these routines (it cannot find them on its own). The resulting file can be +passed to zd's -A flag. For example: + +infodump -g zork1.z3 | ./interp.rb | ./zd -a 0 -A - zork1.z3 diff --git a/zd/zd.1 b/zd/zd.1 new file mode 100644 index 0000000..80596bb --- /dev/null +++ b/zd/zd.1 @@ -0,0 +1,156 @@ +.\" Copyright (c) 2011 Chris Spiegel. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd May 3, 2011 +.Dt ZD 1 +.Os +.Sh NAME +.Nm zd +.Nd disassemble Z\-machine stories +.Sh SYNOPSIS +.Nm +.Op Fl bdDinv +.Op Fl a Ar address +.Op Fl A Ar address_file +.Op Fl w Ar width +.Ar file +.Sh DESCRIPTION +The +.Nm +utility disassembles Z\-machine stories, displaying the results in a +manner similar to the +.Xr txd 1 +disassembler. The main difference to +.Xr txd 1 +is that +.Xr txd 1 +has knowledge of how Infocom and Inform lay out story files. As such, +.Xr txd 1 +can get confused by stories that do not conform to a layout it expects. +On the other hand, +.Xr txd 1 +is able to find code (such as action routines) that +.Nm +will not detect, because +.Nm +only finds code that is reachable directly from the initial program +counter address. If you know the address of code that is not reachable +from the initial program counter address, +.Nm +can decode it if you use the +.Fl a +or +.Fl A +option. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl a Ar address +By default, +.Nm +starts disassembling at the address at byte +.Li 0x06 +in the header, which is the starting location of the program counter. +The +.Fl a +option allows other addresses to be used as a starting location. +Any number of +.Fl a +options can be given, allowing multiple addresses to be examined. +Addresses must be specified in hexadecimal, with an optional leading +.Li 0x . +.Pp +If the +.Fl a +option is used, the story's reported program counter starting address +will not be examined. If you wish to include it as well, specify an +address of 0. +.It Fl A Ar address_file +This is identical to the +.Fl a +option, except that addresses are read, one per line, from the file +.Ar address_file . +If +.Ar address_file +is a single dash +.Pq Sq Fl , +read from standard input. +.It Fl b +Disable buffered output. This is intended to be used with the +.Fl v +option, to ensure that in case of abnormal program termination (such as +a segfault), the last-processed instruction will be seen. Such abnormal +terminations are the result of bugs in +.Nm , +so this is primarily an option to aid in development and debugging. +.It Fl d +In addition to the instructions, print out a hexadecimal representation +of the constituent bytes of each instruction. See also the +.Fl D +option. +.It Fl D +By default, the +.Fl d +option will not dump the constituent bytes of strings, because these +tend to be very long. The +.Fl D +option instructs +.Nm +to dump the strings as well as instructions. +.It Fl i +If an unknown opcode is found or an attempt is made to jump outside of +memory, +.Nm +will exit with an error message. This option instructs it to continue +to attempt to disassemble. All bets are off at this point, although in +some cases +.Nm +will continue to disassemble correctly. +.It Fl n +When printing strings, use a +.Li ^ +character to represent a newline instead +of an actual newline characer. +.It Fl v +.Nm +runs in two phases. First, it scans through a story file, decoding +everything that appears to be an instruction. Next, it displays the +decoded instructions in order. This second phase is what's normally +displayed. The +.Fl v +option also displays the first phase. +.It Fl w Ar width +When the +.Fl d +option is selected, the display of bytes is wrapped at +.Ar width +bytes per line, so the screen is less cluttered. If no +.Fl w +option is given, a default of 8 is used. +.El +.Sh AUTHORS +.An "Chris Spiegel" Aq cspiegel@gmail.com +.Sh SEE ALSO +.Xr txd 1 , +.Xr za 1 diff --git a/zd/zd.c b/zd/zd.c new file mode 100644 index 0000000..7014e62 --- /dev/null +++ b/zd/zd.c @@ -0,0 +1,1100 @@ +/*- + * Copyright (c) 2010-2011 Chris Spiegel + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define err(...) do { fprintf(stderr, __VA_ARGS__); fprintf(stderr, ": %s\n", strerror(errno)); exit(1); } while(0) +#define errx(...) do { fprintf(stderr, __VA_ARGS__); fputc('\n', stderr); exit(1); } while(0) + +static int dumphex = 0; +static int dumpstring = 0; +static int print_width = 8; +static char newlinechar = '\n'; + +static uint8_t *memory; +static size_t memory_size; +static uint16_t abbr_table; +static uint32_t R_O, S_O; +static uint32_t main_routine; + +static int ignore_errors = 0; + +static uint8_t atable[26 * 3] = +{ + /* A0 */ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + + /* A1 */ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + + /* A2 */ + 0x0, 0xd, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', + ',', '!', '?', '_', '#', '\'','"', '/', '\\','-', ':', '(', ')', +}; + + +static int nargs; +static long args[8]; + +static uint8_t BYTE(uint32_t addr) +{ + if(addr >= memory_size) errx("byte %lx is out of bounds", (unsigned long)addr); + + return memory[addr]; +} + +static uint16_t WORD(uint32_t addr) +{ + if(addr + 1 >= memory_size) errx("word %lx is out of bounds", (unsigned long)addr); + + return memory[addr] << 8 | memory[addr + 1]; +} + +static int zversion; + +#define F_STORE 0x001 +#define F_BRANCH 0x002 +#define F_STRING 0x004 +#define F_PSTRING 0x008 +#define F_CALL1 0x010 +#define F_CALL2 0x020 +#define F_CALL3 0x040 +#define F_JUMP 0x080 +#define F_INDIRECT 0x100 +#define F_RETURN 0x200 + +struct opcode +{ + enum count { ZERO, ONE, TWO, VAR, EXT } count; + int number; + + const char *name; + int flags; +}; + +struct instruction +{ + unsigned long addr, nextaddr; + + const struct opcode *opcode; + + int nargs; + long args[8]; + + int invbranch; + unsigned long branchto; + + long storeto; + + char *string; + + struct instruction *next; +}; + +static struct instruction *instructions; +static size_t ninst; + +static void add_instruction(struct instruction inst) +{ + struct instruction *new; + + new = malloc(sizeof *new); + if(new == NULL) err("malloc"); + + *new = inst; + new->next = instructions; + instructions = new; + + ninst++; +} + +static void append(struct instruction **list, struct instruction **tail, struct instruction *node) +{ + node->next = NULL; + + if(*list == NULL) + { + *list = *tail = node; + } + else + { + (*tail)->next = node; + *tail = node; + } +} + +static struct instruction *merge(struct instruction *l, struct instruction *r) +{ + struct instruction *ret = NULL, *tail = NULL, *tmp; + + while(l != NULL && r != NULL) + { + if(l->addr < r->addr) + { + tmp = l; + l = l->next; + append(&ret, &tail, tmp); + } + else + { + tmp = r; + r = r->next; + append(&ret, &tail, tmp); + } + } + + if(l == NULL) tail->next = r; + else tail->next = l; + + return ret; +} + +static struct instruction *merge_sort(struct instruction *list, size_t n) +{ + struct instruction *left = NULL, *lt = NULL, *right = NULL, *rt = NULL; + + if(n <= 1) return list; + + for(size_t i = 0; i < n; i++) + { + struct instruction *tmp; + + tmp = list; + list = list->next; + + if(i < n / 2) append(&left, <, tmp); + else append(&right, &rt, tmp); + } + + left = merge_sort(left, n / 2); + right = merge_sort(right, n - (n / 2)); + + return merge(left, right); +} + +static void sort_instructions(void) +{ + instructions = merge_sort(instructions, ninst); +} + +static void free_instructions(void) +{ + struct instruction *tmp; + + while(instructions != NULL) + { + tmp = instructions; + instructions = instructions->next; + free(tmp->string); + free(tmp); + } +} + +#define MAX_STORY_SIZE 524288 + +static unsigned long routines[MAX_STORY_SIZE]; +static unsigned char seen_address[MAX_STORY_SIZE]; + +static struct opcode opcodes[5][256]; + +static void OP(enum count count, const char *name, int min, int max, int number, int flags) +{ + int v = zversion > 6 ? 5 : zversion; + + if(count > 4 || number > 255) errx("internal error: opcode %d %d is out of range", (int)count, number); + + if(v >= min && v <= max) opcodes[count][number] = (struct opcode){ .count = count, .number = number, .name = name, .flags = flags }; +} + +#define ZEROOP(...) OP(ZERO, __VA_ARGS__) +#define ONEOP(...) OP(ONE, __VA_ARGS__) +#define TWOOP(...) OP(TWO, __VA_ARGS__) +#define VAROP(...) OP(VAR, __VA_ARGS__) +#define EXTOP(...) OP(EXT, __VA_ARGS__) + +static void setup_opcodes(void) +{ + ZEROOP("rtrue", 1, 6, 0x00, F_RETURN); + ZEROOP("rfalse", 1, 6, 0x01, F_RETURN); + ZEROOP("print", 1, 6, 0x02, F_STRING); + ZEROOP("print_ret", 1, 6, 0x03, F_RETURN | F_STRING); + ZEROOP("nop", 1, 6, 0x04, 0); + ZEROOP("save", 1, 3, 0x05, F_BRANCH); + ZEROOP("save", 4, 4, 0x05, F_STORE); + ZEROOP("restore", 1, 3, 0x06, F_BRANCH); + ZEROOP("restore", 4, 4, 0x06, F_STORE); + ZEROOP("restart", 1, 6, 0x07, F_RETURN); + ZEROOP("ret_popped", 1, 6, 0x08, F_RETURN); + ZEROOP("pop", 1, 4, 0x09, 0); + ZEROOP("catch", 5, 6, 0x09, F_STORE); + ZEROOP("quit", 1, 6, 0x0a, F_RETURN); + ZEROOP("new_line", 1, 6, 0x0b, 0); + ZEROOP("show_status", 3, 3, 0x0c, 0); + ZEROOP("nop", 4, 6, 0x0c, 0); + ZEROOP("verify", 3, 6, 0x0d, F_BRANCH); + ZEROOP("piracy", 5, 6, 0x0f, F_BRANCH); + + ONEOP("jz", 1, 6, 0x00, F_BRANCH); + ONEOP("get_sibling", 1, 6, 0x01, F_STORE | F_BRANCH); + ONEOP("get_child", 1, 6, 0x02, F_STORE | F_BRANCH); + ONEOP("get_parent", 1, 6, 0x03, F_STORE); + ONEOP("get_prop_len", 1, 6, 0x04, F_STORE); + ONEOP("inc", 1, 6, 0x05, F_INDIRECT); + ONEOP("dec", 1, 6, 0x06, F_INDIRECT); + ONEOP("print_addr", 1, 6, 0x07, 0); + ONEOP("call_1s", 4, 6, 0x08, F_STORE | F_CALL1); + ONEOP("remove_obj", 1, 6, 0x09, 0); + ONEOP("print_obj", 1, 6, 0x0a, 0); + ONEOP("ret", 1, 6, 0x0b, F_RETURN); + ONEOP("jump", 1, 6, 0x0c, F_RETURN | F_JUMP); + ONEOP("print_paddr", 1, 6, 0x0d, F_PSTRING); + ONEOP("load", 1, 6, 0x0e, F_STORE | F_INDIRECT); + ONEOP("not", 1, 4, 0x0f, F_STORE); + ONEOP("call_1n", 5, 6, 0x0f, F_CALL1); + + TWOOP("je", 1, 6, 0x01, F_BRANCH); + TWOOP("jl", 1, 6, 0x02, F_BRANCH); + TWOOP("jg", 1, 6, 0x03, F_BRANCH); + TWOOP("dec_chk", 1, 6, 0x04, F_BRANCH | F_INDIRECT); + TWOOP("inc_chk", 1, 6, 0x05, F_BRANCH | F_INDIRECT); + TWOOP("jin", 1, 6, 0x06, F_BRANCH); + TWOOP("test", 1, 6, 0x07, F_BRANCH); + TWOOP("or", 1, 6, 0x08, F_STORE); + TWOOP("and", 1, 6, 0x09, F_STORE); + TWOOP("test_attr", 1, 6, 0x0a, F_BRANCH); + TWOOP("set_attr", 1, 6, 0x0b, 0); + TWOOP("clear_attr", 1, 6, 0x0c, 0); + TWOOP("store", 1, 6, 0x0d, F_INDIRECT); + TWOOP("insert_obj", 1, 6, 0x0e, 0); + TWOOP("loadw", 1, 6, 0x0f, F_STORE); + TWOOP("loadb", 1, 6, 0x10, F_STORE); + TWOOP("get_prop", 1, 6, 0x11, F_STORE); + TWOOP("get_prop_addr", 1, 6, 0x12, F_STORE); + TWOOP("get_next_prop", 1, 6, 0x13, F_STORE); + TWOOP("add", 1, 6, 0x14, F_STORE); + TWOOP("sub", 1, 6, 0x15, F_STORE); + TWOOP("mul", 1, 6, 0x16, F_STORE); + TWOOP("div", 1, 6, 0x17, F_STORE); + TWOOP("mod", 1, 6, 0x18, F_STORE); + TWOOP("call_2s", 4, 6, 0x19, F_STORE | F_CALL1); + TWOOP("call_2n", 5, 6, 0x1a, F_CALL1); + TWOOP("set_colour", 5, 6, 0x1b, 0); + TWOOP("throw", 5, 6, 0x1c, 0); + + VAROP("call_vs", 1, 6, 0x00, F_STORE | F_CALL1); + VAROP("storew", 1, 6, 0x01, 0); + VAROP("storeb", 1, 6, 0x02, 0); + VAROP("put_prop", 1, 6, 0x03, 0); + VAROP("read", 1, 3, 0x04, 0); + VAROP("read", 4, 4, 0x04, F_CALL3); + VAROP("read", 5, 6, 0x04, F_STORE | F_CALL3); + VAROP("print_char", 1, 6, 0x05, 0); + VAROP("print_num", 1, 6, 0x06, 0); + VAROP("random", 1, 6, 0x07, F_STORE); + VAROP("push", 1, 6, 0x08, 0); + VAROP("pull", 1, 5, 0x09, F_INDIRECT); + VAROP("pull", 6, 6, 0x09, F_STORE); + VAROP("split_window", 3, 6, 0x0a, 0); + VAROP("set_window", 3, 6, 0x0b, 0); + VAROP("call_vs2", 4, 6, 0x0c, F_STORE | F_CALL1); + VAROP("erase_window", 4, 6, 0x0d, 0); + VAROP("erase_line", 4, 6, 0x0e, 0); + VAROP("set_cursor", 4, 6, 0x0f, 0); + VAROP("get_cursor", 4, 6, 0x10, 0); + VAROP("set_text_style", 4, 6, 0x11, 0); + VAROP("buffer_mode", 4, 6, 0x12, 0); + VAROP("output_stream", 3, 6, 0x13, 0); + VAROP("input_stream", 3, 6, 0x14, 0); + VAROP("sound_effect", 3, 4, 0x15, 0); + VAROP("sound_effect", 5, 6, 0x15, F_CALL3); + VAROP("read_char", 4, 6, 0x16, F_STORE | F_CALL2); + VAROP("scan_table", 4, 6, 0x17, F_STORE | F_BRANCH); + VAROP("not", 5, 6, 0x18, F_STORE); + VAROP("call_vn", 5, 6, 0x19, F_CALL1); + VAROP("call_vn2", 5, 6, 0x1a, F_CALL1); + VAROP("tokenise", 5, 6, 0x1b, 0); + VAROP("encode_text", 5, 6, 0x1c, 0); + VAROP("copy_table", 5, 6, 0x1d, 0); + VAROP("print_table", 5, 6, 0x1e, 0); + VAROP("check_arg_count", 5, 6, 0x1f, F_BRANCH); + + EXTOP("save", 5, 6, 0x00, F_STORE); + EXTOP("restore", 5, 6, 0x01, F_STORE); + EXTOP("log_shift", 5, 6, 0x02, F_STORE); + EXTOP("art_shift", 5, 6, 0x03, F_STORE); + EXTOP("set_font", 5, 6, 0x04, F_STORE); + EXTOP("draw_picture", 6, 6, 0x05, 0); + EXTOP("picture_data", 6, 6, 0x06, F_BRANCH); + EXTOP("erase_picture", 6, 6, 0x07, 0); + EXTOP("set_margins", 6, 6, 0x08, 0); + EXTOP("save_undo", 5, 6, 0x09, F_STORE); + EXTOP("restore_undo", 5, 6, 0x0a, F_STORE); + EXTOP("print_unicode", 5, 6, 0x0b, 0); + EXTOP("check_unicode", 5, 6, 0x0c, F_STORE); + EXTOP("set_true_colour", 5, 6, 0x0d, 0); + EXTOP("move_window", 6, 6, 0x10, 0); + EXTOP("window_size", 6, 6, 0x11, 0); + EXTOP("window_style", 6, 6, 0x12, 0); + EXTOP("get_wind_prop", 6, 6, 0x13, F_STORE); + EXTOP("scroll_window", 6, 6, 0x14, 0); + EXTOP("pop_stack", 6, 6, 0x15, 0); + EXTOP("read_mouse", 6, 6, 0x16, 0); + EXTOP("mouse_window", 6, 6, 0x17, 0); + EXTOP("push_stack", 6, 6, 0x18, F_BRANCH); + EXTOP("put_wind_prop", 6, 6, 0x19, 0); + EXTOP("print_form", 6, 6, 0x1a, 0); + EXTOP("make_menu", 6, 6, 0x1b, F_BRANCH); + EXTOP("picture_table", 6, 6, 0x1c, 0); + EXTOP("buffer_screen", 6, 6, 0x1d, 0); + + /* Zoom extensions. */ + EXTOP("start_timer", 5, 6, 0x80, 0); + EXTOP("stop_timer", 5, 6, 0x81, 0); + EXTOP("read_timer", 5, 6, 0x82, F_STORE); + EXTOP("print_timer", 5, 6, 0x83, 0); +} + +#undef ZEROOP +#undef ONEOP +#undef TWOOP +#undef VAROP +#undef EXTOP +#undef OP + +static unsigned long unpack(uint32_t addr, int string) +{ + switch(zversion) + { + case 1: case 2: case 3: + return addr * 2UL; + case 4: case 5: + return addr * 4UL; + case 6: case 7: + return (addr * 4UL) + (string ? S_O : R_O); + case 8: + return addr * 8UL; + } + + errx("bad version %d", zversion); +} + +/* Both constants and variables are stored in a long. They are + * differentiated by making sure that variables are always negative, + * while the unsigned representations of constants are used, meaning + * they will be positive (or zero). Variables are encoded as the + * negative value of the variable, minus one, so the stack pointer, + * which is variable number 0, is stored as -1, the first local + * (variable 1) is stored as -2, and so on. + */ +static long variable(int var) +{ + return -var - 1; +} + +static void decode_var(uint8_t types, uint32_t *pc) +{ + for(int i = 6; i >= 0; i -= 2) + { + int type = (types >> i) & 0x03; + + if (type == 0) args[nargs++] = WORD((*pc)++); + else if(type == 1) args[nargs++] = BYTE(*pc); + else if(type == 2) args[nargs++] = variable(BYTE(*pc)); + else return; + + (*pc)++; + } +} + +/* Given the (packed) address of a routine, this function returns the + * address of the first instruction in the routine. In addition, it + * stores the address of the routine in a table so it can be identified + * during the printout of the disassembly. + */ +static uint32_t handle_routine(uint32_t addr) +{ + uint8_t nlocals; + uint32_t new; + + if(addr == 0) return 0; + + addr = unpack(addr, 0); + new = addr; + nlocals = BYTE(new++); + if(zversion <= 4) new += (nlocals * 2); + + routines[new] = addr; + + return new; +} + +static size_t znchars, zindex; +static char *zchars; + +static void add_zchar(uint8_t c) +{ + if(zindex == znchars) + { + zchars = realloc(zchars, znchars += 256); + if(zchars == NULL) err("realloc"); + } + + if(c == 13) c = newlinechar; + + zchars[zindex++] = c; +} + +static int print_zcode(uint32_t addr, int in_abbr, void (*outc)(uint8_t)) +{ + int abbrev = 0, shift = 0, special = 0; + int c, lastc = 0; /* Initialize lastc to shut gcc up */ + uint16_t w; + uint32_t counter = addr; + int current_alphabet = 0; + + do + { + w = WORD(counter); + + for(int i = 10; i >= 0; i -= 5) + { + c = (w >> i) & 0x1f; + + if(special) + { + if(special == 2) lastc = c; + else outc((lastc << 5) | c); + + special--; + } + + else if(abbrev) + { + uint32_t new_addr; + + new_addr = WORD(abbr_table + 64 * (abbrev - 1) + 2 * c); + + /* new_addr is a word address, so multiply by 2 */ + print_zcode(new_addr * 2, 1, outc); + + abbrev = 0; + } + + else switch(c) + { + case 0: + outc(' '); + shift = 0; + break; + case 1: + if(zversion == 1) + { + outc('\n'); + shift = 0; + break; + } + /* fallthrough */ + case 2: case 3: + if(zversion > 2 || (zversion == 2 && c == 1)) + { + if(in_abbr) errx("abbreviation being used recursively"); + abbrev = c; + shift = 0; + } + else + { + shift = c - 1; + } + break; + case 4: + if(zversion <= 2) + { + current_alphabet = (current_alphabet + 1) % 3; + shift = 0; + } + else + { + shift = 1; + } + break; + case 5: + if(zversion <= 2) + { + current_alphabet = (current_alphabet + 2) % 3; + shift = 0; + } + else + { + shift = 2; + } + break; + case 6: + if(zversion <= 2) shift = (current_alphabet + shift) % 3; + if(shift == 2) + { + shift = 0; + special = 2; + break; + } + /* fallthrough */ + default: + if(zversion <= 2 && c != 6) shift = (current_alphabet + shift) % 3; + outc(atable[(26 * shift) + (c - 6)]); + shift = 0; + break; + } + } + + counter += 2; + } while((w & 0x8000) == 0); + + return counter - addr; +} + +/* Turn an argument into a string representation. Values zero and above + * are constants, whereas those below zero are variables. + */ +static const char *decode(long var, int store) +{ + static char ret[12]; + + if(var < -256 || var > 65535) errx("invalid variable %ld", var); + + if(var >= 0) + { + snprintf(ret, sizeof ret, "#%02lx", (unsigned long)var); + } + else + { + var = -(var + 1); + if(var == 0) strcpy(ret, store ? "-(SP)" : "(SP)+"); + else if(var <= 0x0f) snprintf(ret, sizeof ret, "L%02ld", var - 1); + else snprintf(ret, sizeof ret, "G%02lx", (unsigned long)(var - 0x10)); + } + + return ret; +} + +/* Begin interpreting at address “pc”. If a call or branch instruction + * is encountered, recursively call this function with the new address. + * Because there is no “end of code” marker, interpretation has to stop + * whenever we cannot be sure there is more code, which means opcodes + * such as @rtrue, @quit, and @jump will terminate the current + * interpretation loop, either returning to the previous loop or, if + * this is the first call, returning to the initial caller. Opcodes + * that trigger this behavior have F_RETURN set. + */ +static void interpret(uint32_t pc, int indent, int verbose) +{ +#define iprintf(...) do { if(verbose) printf(__VA_ARGS__); } while(0) + if(pc >= memory_size && verbose) + { + iprintf("%*s%04lx: outside of memory\n", indent * 2, "", (unsigned long)pc); + if(ignore_errors) return; + errx("cannot continiue"); + } + + while(1) + { + uint8_t opcode; + unsigned long orig_pc; + struct instruction inst = { 0 }; + int count, number; + + uint32_t newpc = 0; + + if(seen_address[pc]) return; + + seen_address[pc] = 1; + + orig_pc = pc; + + opcode = BYTE(pc++); + + /* long 2OP */ + if(opcode < 0x80) + { + nargs = 2; + if(opcode & 0x40) args[0] = variable(BYTE(pc++)); + else args[0] = BYTE(pc++); + + if(opcode & 0x20) args[1] = variable(BYTE(pc++)); + else args[1] = BYTE(pc++); + + count = TWO; + number = opcode & 0x1f; + } + + /* short 1OP */ + else if(opcode < 0xb0) + { + nargs = 1; + if(opcode < 0x90) + { + args[0] = WORD(pc); + pc += 2; + } + else if(opcode < 0xa0) + { + args[0] = BYTE(pc++); + } + else + { + args[0] = variable(BYTE(pc++)); + } + + count = ONE; + number = opcode & 0x0f; + } + + /* 0OP */ + else if(opcode < 0xc0) + { + nargs = 0; + + count = ZERO; + number = opcode & 0x0f; + } + + /* variable 2OP */ + else if(opcode < 0xe0) + { + nargs = 0; + + decode_var(BYTE(pc++), &pc); + + count = TWO; + number = opcode & 0x1f; + } + + /* Double variable VAR */ + else if(opcode == 0xec || opcode == 0xfa) + { + uint8_t types1, types2; + + nargs = 0; + + types1 = BYTE(pc++); + types2 = BYTE(pc++); + decode_var(types1, &pc); + decode_var(types2, &pc); + + count = VAR; + number = opcode & 0x1f; + } + + /* variable VAR */ + else + { + nargs = 0; + + decode_var(BYTE(pc++), &pc); + + count = VAR; + number = opcode & 0x1f; + } + + /* extended */ + if(count == ZERO && number == 0x0e) + { + nargs = 0; + + count = EXT; + number = BYTE(pc++); + + decode_var(BYTE(pc++), &pc); + } + + iprintf("%*s%04lx: ", indent * 2, "", orig_pc); + + inst.opcode = &opcodes[count][number]; + if(inst.opcode->name == NULL) + { + iprintf("unknown opcode %d %d\n", count, number); + + if(ignore_errors) return; + + if(verbose) errx("cannot continiue"); + else errx("unknown opcode %d %d @%lx", count, number, orig_pc); + } + + iprintf("%s ", inst.opcode->name); + + for(int i = 0; i < nargs; i++) iprintf("%s ", decode(args[i], 0)); + + if(inst.opcode->flags & F_STORE) + { + inst.storeto = variable(BYTE(pc++)); + iprintf("-> %s", decode(inst.storeto, 1)); + } + + if(inst.opcode->flags & F_BRANCH) + { + uint8_t branch; + uint16_t offset; + + branch = BYTE(pc++); + + offset = branch & 0x3f; + + if((branch & 0x40) == 0) + { + offset = (offset << 8) | BYTE(pc++); + + /* Get the sign right. */ + if(offset & 0x2000) offset |= 0xc000; + } + + inst.invbranch = !(branch & 0x80); + + /* Branch to new offset from pc. */ + if(offset > 1) + { + newpc = pc + (int16_t)offset - 2; + inst.branchto = newpc; + } + else + { + inst.branchto = offset; + } + + if(inst.branchto > 1) iprintf("[%s] %lx", inst.invbranch ? "FALSE" : "TRUE", inst.branchto); + else iprintf("[%s] %s", inst.invbranch ? "FALSE" : "TRUE", inst.branchto == 1 ? "RTRUE" : "RFALSE"); + } + + zindex = 0; + + if(inst.opcode->flags & F_STRING) + { + pc += print_zcode(pc, 0, add_zchar); + } + + if((inst.opcode->flags & F_PSTRING) && args[0] > 0) + { + print_zcode(unpack(args[0], 1), 0, add_zchar); + nargs = 0; + } + + if(zindex != 0) + { + inst.string = malloc(zindex + 1); + if(inst.string == NULL) err("malloc"); + memcpy(inst.string, zchars, zindex); + inst.string[zindex] = 0; + } + + if(inst.opcode->flags & F_CALL1) + { + if(args[0] > 0) newpc = handle_routine(args[0]); + } + + if(inst.opcode->flags & F_CALL2) + { + if(nargs == 3 && args[2] > 0) newpc = handle_routine(args[2]); + } + + if(inst.opcode->flags & F_CALL3) + { + if(nargs == 4 && args[3] > 0) newpc = handle_routine(args[3]); + } + + if((inst.opcode->flags & F_JUMP) && args[0] >= 0) + { + newpc = pc + (int16_t)args[0] - 2; + } + + if(inst.opcode->flags & F_INDIRECT) + { + args[0] = variable(args[0]); + } + + inst.addr = orig_pc; + inst.nextaddr = pc; + + inst.nargs = nargs; + for(int i = 0; i < nargs; i++) inst.args[i] = args[i]; + + add_instruction(inst); + + iprintf("\n"); + + if(newpc != 0) interpret(newpc, indent + 1, verbose); + + if(inst.opcode->flags & F_RETURN) return; + } +} + +static unsigned long roundup(unsigned long v, unsigned long multiple) +{ + return ((v + multiple - 1) / multiple) * multiple; +} + +static void print_code(void) +{ + uint32_t lastaddr = 0; + + sort_instructions(); + + for(struct instruction *inst = instructions; inst != NULL; inst = inst->next) + { + /* If the last instruction does not directly abut the current + * instruction, there is a gap (probably caused by a jump of some + * sort); represent that with a newline... + */ + if(lastaddr != inst->addr) putchar('\n'); + + /* ... except that it’s possible for the start of the main routine + * (which really isn’t a routine since it has no header) to be + * adjacent to a routine placed right before it. In this case, + * there should be a newline, so add it. + */ + if(inst->addr == main_routine) printf("%sMAIN ROUTINE %lx\n", lastaddr == inst->addr ? "\n" : "", inst->addr - 1); + else if(routines[inst->addr]) printf("ROUTINE %lx\n", routines[inst->addr]); + + lastaddr = inst->nextaddr; + + printf("%6lx: ", inst->addr); + + if(dumphex) + { + /* Strings can be very long, so don’t dump unless requested. */ + if(!dumpstring && inst->opcode->flags & F_STRING) + { + printf("%02x ...%*s", (unsigned)memory[inst->addr], (print_width * 3) - 6, ""); + } + else + { + for(unsigned long i = 0; i < roundup(inst->nextaddr - inst->addr, print_width); i++) + { + if(i > 0 && i % print_width == 0) printf("\n%8s", ""); + + if(inst->addr + i < inst->nextaddr) printf("%02x ", (unsigned)memory[inst->addr + i]); + else printf(" "); + } + } + + putchar(' '); + } + + printf("%-16s ", inst->opcode->name); + + if(inst->string != NULL) printf("%s ", inst->string); + + if((inst->opcode->flags & F_CALL1) && inst->args[0] >= 0) + { + printf("#%lx ", unpack(inst->args[0], 0)); + for(int i = 1; i < inst->nargs; i++) printf("%s ", decode(inst->args[i], 0)); + } + else if((inst->opcode->flags & F_JUMP) && inst->args[0] >= 0) + { + printf("#%lx ", inst->addr + (int16_t)inst->args[0] + 1); + } + else + { + for(int i = 0; i < inst->nargs; i++) printf("%s ", decode(inst->args[i], 0)); + } + + if(inst->opcode->flags & F_STORE) printf("-> %s ", decode(inst->storeto, 1)); + + if(inst->opcode->flags & F_BRANCH) + { + if(inst->branchto > 1) printf("[%s] %lx", inst->invbranch ? "FALSE" : "TRUE", inst->branchto); + else printf("[%s] %s", inst->invbranch ? "FALSE" : "TRUE", inst->branchto == 1 ? "RTRUE" : "RFALSE"); + } + + putchar('\n'); + } +} + +int main(int argc, char **argv) +{ + int c; + int vflag = 0; + FILE *fp; + uint32_t start; + static const size_t MAXADDR = 1024; + size_t naddr = 0; + uint32_t addr[MAXADDR]; + struct stat st; + + while((c = getopt(argc, argv, "a:A:bdDinvw:")) != -1) + { + switch(c) + { + case 'a': + if(naddr == MAXADDR) errx("too many addresses: max %zu", MAXADDR); + addr[naddr++] = strtoul(optarg, NULL, 16); + break; + case 'A': + { + char line[16]; + + if(strcmp(optarg, "-") == 0) + { + fp = stdin; + } + else + { + fp = fopen(optarg, "r"); + if(fp == NULL) err("%s", optarg); + } + + while(fgets(line, sizeof line, fp) != NULL) + { + if(naddr == MAXADDR) errx("too many addresses: max %zu", MAXADDR); + addr[naddr++] = strtoul(line, NULL, 16); + } + + if(fp != stdin) fclose(fp); + } + break; + case 'b': + setvbuf(stdout, NULL, _IONBF, 0); + break; + case 'd': + dumphex = 1; + break; + case 'D': + dumpstring = 1; + break; + case 'i': + ignore_errors = 1; + break; + case 'n': + newlinechar = '^'; + break; + case 'v': + vflag = 1; + break; + case 'w': + print_width = strtol(optarg, NULL, 10); + if(print_width <= 0) print_width = 8; + break; + default: + exit(1); + } + } + + if(argc - optind != 1) errx("bad call"); + + fp = fopen(argv[optind], "rb"); + if(fp == NULL) err("%s", argv[optind]); + + if(fstat(fileno(fp), &st) == -1) err("fstat"); + + if(st.st_size > MAX_STORY_SIZE) errx("file too large"); + + memory = malloc(memory_size = st.st_size); + if(memory == NULL) err("malloc"); + + if(fread(memory, memory_size, 1, fp) != 1) errx("short read"); + + fclose(fp); + + zversion = BYTE(0); + start = WORD(0x06); + abbr_table = WORD(0x18); + + if(zversion == 0 || zversion > 8) errx("unknown z-machine version %d", zversion); + + if(zversion == 6 || zversion == 7) + { + R_O = WORD(0x28) * 8UL; + S_O = WORD(0x2a) * 8UL; + } + + if(zversion == 1) + { + memcpy(&atable[26 * 2], " 0123456789.,!?_#'\"/\\<-:()", 26); + } + else if(zversion >= 5 && WORD(0x34) != 0) + { + if(WORD(0x34) + 26 * 3 >= memory_size) errx("corrupted story: alphabet table out of range"); + + memcpy(atable, &memory[WORD(0x34)], 26 * 3); + atable[52] = 0x00; + atable[53] = 0x0d; + } + + if(zversion == 6) start = handle_routine(start); + + main_routine = start; + + setup_opcodes(); + + if(naddr == 0) + { + addr[naddr++] = start; + } + else + { + for(size_t i = 0; i < naddr; i++) + { + if(addr[i] == 0) + { + addr[i] = start; + } + else + { + uint32_t orig = addr[i]; + + if(zversion <= 4) addr[i] += (2 * BYTE(addr[i])); + + addr[i]++; + routines[addr[i]] = orig; + } + } + } + + for(size_t i = 0; i < naddr; i++) + { + if(vflag) printf("Beginning disassembly at %lx\n" + "-----------------------------\n", + (unsigned long)addr[i]); + interpret(addr[i], 0, vflag); + if(vflag) putchar('\n'); + } + + print_code(); + + free(memory); + free(zchars); + + free_instructions(); + + return 0; +}