From d7bc793189eb934ffac3fa1fc0b39f3dbbeca1e7 Mon Sep 17 00:00:00 2001 From: Jason Self Date: Mon, 19 Aug 2019 05:40:15 -0700 Subject: [PATCH 1/1] Import baZic by David Given Version 0.1 From http://cowlark.com/bazic/ --- .gitingore | 1 + Makefile | 11 + README | 104 +++++ bazic.inf | 111 +++++ interpreter.h | 1099 +++++++++++++++++++++++++++++++++++++++++++++++++ malloc.h | 202 +++++++++ mktokens.awk | 39 ++ script.h | 266 ++++++++++++ store.h | 203 +++++++++ string.h | 180 ++++++++ token-table.h | 203 +++++++++ tokeniser.h | 323 +++++++++++++++ tokens.dat | 25 ++ utils.h | 100 +++++ 14 files changed, 2867 insertions(+) create mode 100644 .gitingore create mode 100644 Makefile create mode 100644 README create mode 100644 bazic.inf create mode 100644 interpreter.h create mode 100644 malloc.h create mode 100644 mktokens.awk create mode 100644 script.h create mode 100644 store.h create mode 100644 string.h create mode 100644 token-table.h create mode 100644 tokeniser.h create mode 100644 tokens.dat create mode 100644 utils.h diff --git a/.gitingore b/.gitingore new file mode 100644 index 0000000..bddb1e1 --- /dev/null +++ b/.gitingore @@ -0,0 +1 @@ +bazic.z* \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8dcfa1b --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +INFORM = inform +IFLAGS = +include_path= '$$MAX_STATIC_DATA=65535' -D + +all: bazic + +token-table.h: tokens.dat mktokens.awk + awk -f mktokens.awk tokens.dat > token-table.h + +bazic: *.inf *.h token-table.h + $(INFORM) $(IFLAGS) bazic.inf bazic.z5 + diff --git a/README b/README new file mode 100644 index 0000000..73f2530 --- /dev/null +++ b/README @@ -0,0 +1,104 @@ +baZic version 0.1 +================= + +(C) 2001 David Given +http://www.cowlark.com +dg@cowlark.com + +Introduction +------------ + +Welcome to baZic, my totally mad Basic interpreter for the Z-machine! + +Please note that this is unfinished. While enough exists to run a decent game +of Hunt the Wumpus, there are big chunks of functionality missing; roughly 50% +of the Basic language. Notably, unimplemented features are: + +* String operators (the hooks are there, I just need to write the code) +* Subroutines (GOSUB & friends) +* Procedures (SUB & friends) +* Proper arrays + +I am unlikely ever to finish this; the Basic language is just too annoying for +words. It's even more annoying to implement than to write in. (It uses the +Parser from Hell.) If you wish to continue it, be my guest. + +What you *do* get, however, is: + +* Dynamic memory allocation, complete with block coalescing +* A full mark/sweep garbage collector +* Dynamic typing +* Full tokenisation for fast(!) execution of programs +* Full detokenisation when listing them again +* An interactive Basic development environment (cough, cough) +* State-of-the-art implementations of Hunt the Wumpus and Guess the Number + built in to the very interpreter +* Pseudo-arrays + +The memory allocator is nicely modular if you feel like ripping it out. Ditto +the garbage collector. + +Usage +----- + +Run the program in the normal way. You get presented with a Basic prompt. You +can now type in lines of Basic code and they will get executed. You can add +lines to the current program by prefixing them with a line number. Most basic +Basic keywords work. + +SAVE, LOAD and QUIT work as expected (for a text adventure). LIST has some +extra features; LIST -1 will display the program's byte code (for those with a +morbid curiosity). LIST -2 will dump the currently defined variables. + +If you modify the current program, all currently defined variables are lost. +Variables are dynamically typed; while you can put $ and % on the end of the +names if you want, they're meaningless. + +Arrays are funny. Rather than implement DIM, I just added a quick hack to +concatenate the array index on the end of the array name. So you don't need to +declare an array, and storage is only taken up for those elements that actually +exist. If you have any array entries defined, LIST -2 may do strange things +(the name of the array entry variables aren't strictly printable). + +The SCRIPT keyword allows you to load one of several predefined programs into +the interpreter's memory. Use it with no arguments to list the available +programs. Give it the program index for more information. SCRIPT 0 gives a +brief guide to baZic (even briefer than this, although written when I had more +sleep). + +Bugs +---- + +This program contains no bugs whatsoever. Excuse me, the New World Order made +me say that. + +Credits +------- + +You can blame Graham Nelson, for the Inform compiler with which this is +written, and Acorn, for producing the BBC Microcomputer on which I got the hack +habit. + +License +------- + +This code is licensed under the MIT open source license. + +Copyright (c) 2001, David Given All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS +IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE +AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/bazic.inf b/bazic.inf new file mode 100644 index 0000000..b727dc1 --- /dev/null +++ b/bazic.inf @@ -0,0 +1,111 @@ +include "malloc.h"; +include "tokeniser.h"; +include "store.h"; +include "string.h"; +include "interpreter.h"; +include "utils.h"; +include "script.h"; + +constant HEAP_SIZE $7FFF; +array heap -> HEAP_SIZE; +array parsebuffer -> 256; + +! Draw the status line. + +[ draw_status; + @split_window 1; + @set_window 1; + @set_cursor 1 1; + style reverse; + spaces (0->33)-1; + @set_cursor 1 2; + print "baZic 0.1 (C) 2001 David Given. "; + ! We add on some stuff in the next line, because we don't want to + ! include the input buffer in the memory count. + print (mem_countfree()+255+AB__SIZE), " bytes free."; + @set_cursor 1 1; + style roman; + @set_window 0; +]; + +! Read in a command from the user into the parse buffer. + +[ read_interactive_command buf in; + buf = mem_alloc(255); + if (buf == 0) + return -2; + + ! Prompt the user for input. + + buf->0 = 255; + print "> "; + read buf 0 draw_status; + + ! Ensure the string is zero-terminated. + + in = buf+2; + in->(buf->1) = 0; + + ! Tokenise the stream. + + in = tokenise_stream(in, parsebuffer); + + mem_free(buf); + return in; +]; + +! Clear all variables and reinit the heap. + +[ cmd_clear; + mem_init(store_eop+1, heap+HEAP_SIZE); + store_heapclean(); + return ESTATE_NORMAL; +]; + +! Invoke a command loop. + +[ command_loop func i; + cmd_clear(); + do { + string_gc(); + i = indirect(func); + switch (i) + { + -1: ! success + !detokenise_stream(parsebuffer+1); + i = execute_command(parsebuffer); + break; + + -2: ! out of memory + print "Insufficient memory for interactive prompt!^"; + print "Dumping variables and trying again.^"; + i = ESTATE_DANGEROUS; + break; + + -3: ! end of program or script or whatever + i = ESTATE_ERROR; + break; + + default: ! parse error + print "Parse error at offset ", i, ".^"; + i = ESTATE_ERROR; + break; + } + } until (i < 0); + return i; +]; + +[ Main i; + print "^^^^^^^^baZic v0.1^(C) 2001 David Given^^"; + print "To play ~Hunt the Wumpus~, type ~script 2~ to load "; + print "the program, and then ~run~ to start it. Try ~script~ on its "; + print "own to see what else is available.^^"; + + do { + store_init(heap, heap+HEAP_SIZE); + do { + i = command_loop(read_interactive_command); + } until ((i == ESTATE_QUIT) || (i == ESTATE_NEW)); + } until (i == ESTATE_QUIT); +]; + diff --git a/interpreter.h b/interpreter.h new file mode 100644 index 0000000..47cdce1 --- /dev/null +++ b/interpreter.h @@ -0,0 +1,1099 @@ +constant ESTATE_NEW -3; +constant ESTATE_QUIT -2; +constant ESTATE_DANGEROUS -1; +constant ESTATE_NORMAL 0; +constant ESTATE_ERROR 1; + +constant TYPE_INT = 0; +constant TYPE_STRING = 1; + +constant STACK_NONE = 0; +constant STACK_FOR = 1; +constant STACK_REPEAT = 2; +constant STACK_WHILE = 3; +constant STACK_GOSUB = 4; + +constant STACK_SIZE = 16; +array stack_type table STACK_SIZE; +array stack_cmd table STACK_SIZE; +array stack_line table STACK_SIZE; +global stack_ptr = 1; + +global cmd; +global program_ptr = 0; +global program_lineno; + +! An error has occurred. + +[ error msg; + print "^", (string) msg; + if (program_ptr) + print " at line ", program_lineno; + print ".^"; +]; + +! Miscellaneous errors. + +[ error_nomore; error("Token didn't expect more input"); ]; +[ error_typemismatch; error("Type mismatch"); ]; +[ error_missingopenp; error("Missing open parenthesis"); ]; +[ error_missingclosep; error("Missing close parenthesis"); ]; +[ error_syntaxerror; error("Syntax error"); ]; +[ error_outofmemory; error("Out of memory"); ]; +[ error_notrunning; error("Not running a program"); ]; +[ error_stackoverflow; error("Stack overflow"); ]; + +! End of statement token? + +[ eos t; + return ((t == TOKEN__EOL) || (t == TOKEN__COLON)); +]; + +! Skip white space. + +[ skipwhite; + while (cmd->0 == TOKEN__SPACE) + cmd++; +]; + +! Read and evaluate an lvalue. + +[ eval_lvalue varname i j val; + skipwhite(); + + varname = cmd; + while ((cmd++)->0); + + skipwhite(); + if (cmd->0 ~= TOKEN__LPAREN) + { + ! It's not an array, so we can return the raw name. + return strdup(varname); + } + + cmd++; + val = eval_expression(); + if (val == 0) + return 0; + if (val-->0 ~= TYPE_INT) + { + error_typemismatch(); + mem_free(val); + return 0; + } + i = val-->1; + mem_free(val); + val = i; + + skipwhite(); + if (cmd->0 == TOKEN__COMMA) + { + mem_free(val); + error("Multidimensional arrays are not supported"); + return 0; + } + if (cmd->0 ~= TOKEN__RPAREN) + { + mem_free(val); + error_syntaxerror(); + return 0; + } + cmd++; + + i = strlen(varname); + j = mem_alloc(i+2); + strcpy(j, varname); + j->i = val; + j->(i+1) = 0; + + return j; +]; + +! Read and evaluate a leaf expression. + +[ eval_leaf i j ret; + skipwhite(); + + i = ((cmd++)->0); + switch (i) + { + TOKEN__NUMBER: + ret = mem_alloc(4); + ret-->0 = TYPE_INT; + ret-->1 = cmd-->0; + cmd = cmd + 2; + return ret; + + TOKEN__STRING: + ret = mem_alloc(4); + ret-->0 = TYPE_STRING; + ret-->1 = string_alloc(cmd+1, cmd->0); + cmd = cmd + cmd->0 + 1; + return ret; + + TOKEN__MINUS: + ret = eval_leaf(); + if (ret-->0 ~= TYPE_INT) + { + mem_free(ret); + error("Can only use - operator on strings"); + return 0; + } + ret-->1 = -(ret-->1); + return ret; + + TOKEN__VAR: + i = cmd; + while ((cmd++)->0); + skipwhite(); + if (cmd->0 ~= TOKEN__LPAREN) + { + ret = store_lookup(i); + if (ret == 0) + { + error("Variable not found"); + return 0; + } + return ret; + } + cmd++; + + ret = eval_expression(); + if (ret == 0) + return 0; + if (ret-->0 ~= TYPE_INT) + { + mem_free(ret); + error_typemismatch(); + return 0; + } + + skipwhite(); + if ((cmd++)->0 ~= TOKEN__RPAREN) + { + mem_free(ret); + error_syntaxerror(); + return 0; + } + + j = mem_alloc(2+cmd-i); + strcpy(j, i); + i = strlen(j); + j->i = ret-->1; + j->(i+1) = 0; + mem_free(ret); + + ret = store_lookup(j); + mem_free(j); + if (ret == 0) + { + error("Array or array index not found"); + return 0; + } + return ret; + + TOKEN__LPAREN: + ret = eval_expression(); + if (ret == 0) + return 0; + if ((cmd++)->0 ~= TOKEN__RPAREN) + { + error_missingclosep(); + return 0; + } + return ret; + + ! Simple function. + + TOKEN_RND, TOKEN_VAL: + skipwhite(); + if ((cmd++)->0 ~= TOKEN__LPAREN) + { + error_missingopenp(); + return 0; + } + ret = eval_expression(); + if (ret == 0) + return 0; + if ((cmd++)->0 ~= TOKEN__RPAREN) + { + error_missingclosep(); + return 0; + } + + switch (i) + { + TOKEN_RND: + if (ret-->0 ~= TYPE_INT) + { + error_typemismatch(); + mem_free(ret); + return 0; + } + + ret-->1 = random(ret-->1) - 1; + break; + + TOKEN_VAL: + if (ret-->0 ~= TYPE_STRING) + { + error_typemismatch(); + mem_free(ret); + return 0; + } + + ret-->0 = TYPE_INT; + ret-->1 = string_toint(ret-->1); + break; + } + + return ret; + } + + error("Botched leaf expression"); + return 0; +]; + +! Evaluate an expression. + +[ eval_expression ret val i; + ret = eval_leaf(); + if (ret == 0) + return ret; + skipwhite(); + + i = cmd->0; + switch (i) + { + TOKEN__EOL, TOKEN__COLON, TOKEN__SEMICOLON, TOKEN__COMMA, TOKEN__RPAREN, + TOKEN_THEN, TOKEN_TO, TOKEN_STEP: + return ret; + + ! Operators that can work on any type. + + TOKEN__PLUS, TOKEN__LARROW, TOKEN__RARROW, TOKEN__EQUALS, TOKEN__NEQUAL: + cmd++; + val = eval_expression(); + if (val == 0) + jump reterror; + if (ret-->0 ~= val-->0) + jump typemismatch; + + switch (ret-->0) + { + TYPE_INT: + switch (i) + { + TOKEN__PLUS: + ret-->1 = (ret-->1 + val-->1); + break; + + TOKEN__LARROW: + ret-->1 = (ret-->1 < val-->1); + break; + + TOKEN__RARROW: + ret-->1 = (ret-->1 > val-->1); + break; + + TOKEN__EQUALS: + ret-->1 = (ret-->1 == val-->1); + break; + + TOKEN__NEQUAL: + ret-->1 = (ret-->1 ~= val-->1); + break; + } + break; + + TYPE_STRING: + switch (i) + { + TOKEN__EQUALS: + ret-->0 = TYPE_INT; + ret-->1 = (string_compare(ret-->1, val-->1) == 0); + break; + + TOKEN__NEQUAL: + ret-->0 = TYPE_INT; + ret-->1 = (string_compare(ret-->1, val-->1) ~= 0); + break; + + TOKEN__PLUS, TOKEN__LARROW, TOKEN__RARROW: + error("Unimplemented opcode"); + jump valreterror; + } + break; + } + mem_free(val); + break; + + ! Operators that only work on ints. + + TOKEN__MINUS, TOKEN__STAR, TOKEN__SLASH, TOKEN_AND, TOKEN_OR: + cmd++; + val = eval_expression(); + if (val == 0) + jump reterror; + if ((ret-->0 ~= TYPE_INT) || (val-->0 ~= TYPE_INT)) + jump typemismatch; + + switch (i) + { + TOKEN__MINUS: + ret-->1 = ret-->1 - val-->1; + break; + + TOKEN__STAR: + ret-->1 = ret-->1 * val-->1; + break; + + TOKEN__SLASH: + if (val-->1 == 0) + { + error("Division by zero"); + jump valreterror; + } + ret-->1 = ret-->1 / val-->1; + break; + + TOKEN_AND: + ret-->1 = ret-->1 && val-->1; + break; + + TOKEN_OR: + ret-->1 = ret-->1 || val-->1; + break; + } + mem_free(val); + break; + + default: + error("Botched complex expression"); + jump reterror; + } + + return ret; + +.typemismatch; + error_typemismatch(); +.valreterror; + mem_free(val); +.reterror; + mem_free(ret); + return 0; +]; + +! List the current program. + +[ cmd_list val; + skipwhite(); + if (eos(cmd->0) == 0) + val = eval_expression(); + else + { + val = mem_alloc(4); + val-->0 = TYPE_INT; + val-->1 = 0; + } + + if (val-->0 ~= TYPE_INT) + { + error_typemismatch(); + mem_free(val); + return ESTATE_ERROR; + } + + switch (val-->1) + { + -2: + store_listvars(); + break; + + -1: + store_listprogramhex(); + break; + + default: + store_listprogram(); + break; + } + + mem_free(val); + return ESTATE_NORMAL; +]; + +! Prints out an expression. + +[ cmd_print val; + while (1) + { + skipwhite(); + if (eos(cmd->0)) + { + print "^"; + break; + } + + val = eval_expression(); + if (val == 0) + return ESTATE_ERROR; + switch (val-->0) + { + TYPE_INT: + print val-->1; + break; + + TYPE_STRING: + string_print(val-->1); + break; + + default: + mem_free(val); + error("Internal error --- invalid type in print!"); + return ESTATE_ERROR; + } + mem_free(val); + + switch (cmd->0) + { + TOKEN__COMMA: + print " "; + cmd++; + break; + + TOKEN__SEMICOLON: + cmd++; + if (eos(cmd->0)) + return ESTATE_NORMAL; + break; + } + } + + return ESTATE_NORMAL; +]; + +! Invoke a script. + +[ cmd_script val id; + skipwhite(); + if (eos(cmd->0)) + { + script_list(); + return ESTATE_NORMAL; + } + val = eval_expression(); + if (val == 0) + return ESTATE_ERROR; + if (val-->0 ~= TYPE_INT) + { + mem_free(val); + jump typemismatch; + } + id = val-->1; + mem_free(val); + ! When we call this, it's entirely possible that the heap will be + ! trashed. + return script_invoke(id); + +.typemismatch; + error_typemismatch(); + return ESTATE_ERROR; +]; + +! Variable assignment. + +[ cmd_varassignment varname val; + varname = eval_lvalue(); + if (varname == 0) + return ESTATE_ERROR; + skipwhite(); + + if ((cmd++)->0 ~= TOKEN__EQUALS) + { + mem_free(varname); + error("Unrecognised keyword"); + return ESTATE_ERROR; + } + skipwhite(); + + val = eval_expression(); + if (val == 0) + { + mem_free(varname); + return ESTATE_ERROR; + } + + store_assign(varname, val-->0, val-->1); + mem_free(varname); + mem_free(val); + + return ESTATE_NORMAL; +]; + +! Run the program. + +[ cmd_run i p; + cmd_clear(); + stack_ptr = 1; + stack_type-->1 = STACK_NONE; + program_ptr = store_bottom; + + ! As the program is already tokenised, we can directly run the + ! bytecode in the store. + + do { + ! Reached the end of the program? + + if (program_ptr->0 == 0) + { + i = ESTATE_NORMAL; + break; + } + + ! Read in the line number and execute the line.. + + p = program_ptr + 1; + program_ptr = program_ptr + program_ptr->0; + program_lineno = p-->0; + p++; + + ! Execute the line. Remember execute_command needs to be + ! pointed at the byte *before* the bytecode... + + i = execute_command(p); + } until (i ~= ESTATE_NORMAL); + + program_ptr = 0; + return i; +]; + +! Read in a string. + +[ cmd_input val varname buf; + skipwhite(); + + ! Is there a label? + + if (cmd->0 == TOKEN__STRING) + { + val = eval_leaf(); + if (val == 0) + return ESTATE_ERROR; + if (val-->0 == TYPE_STRING) + string_print(val-->1); + else + { + error_typemismatch(); + mem_free(val); + return ESTATE_ERROR; + } + mem_free(val); + + skipwhite(); + switch (cmd->0) + { + TOKEN__COMMA: + print " "; + break; + + TOKEN__SEMICOLON: + break; + + default: + error_syntaxerror(); + return ESTATE_ERROR; + } + cmd++; + + skipwhite(); + } + else + print "? "; + + ! Get the variable name to put the result into. + + if ((cmd++)->0 ~= TOKEN__VAR) + { + error_syntaxerror(); + return ESTATE_ERROR; + } + varname = eval_lvalue(); + if (varname == 0) + return ESTATE_ERROR; + + ! Get the user's input. + + buf = mem_alloc(255); + if (buf == 0) + { + mem_free(varname); + error_outofmemory(); + return ESTATE_ERROR; + } + + buf->0 = 255; + read buf 0; + + ! Assign to the variable. + + store_assign(varname, TYPE_STRING, string_alloc(buf+2, buf->1)); + + ! Free the temporary buffer. + + mem_free(varname); + mem_free(buf); + + return ESTATE_NORMAL; +]; + +! Jump to a line number. + +[ cmd_goto val i; + if (program_ptr == 0) + { + error_notrunning(); + return ESTATE_ERROR; + } + + val = eval_expression(); + if (val == 0) + return ESTATE_ERROR; + if (val-->0 ~= TYPE_INT) + { + mem_free(val); + error_typemismatch(); + return ESTATE_ERROR; + } + + i = store_findline(val-->1); + mem_free(val); + if (i == 0) + { + error("No such line number"); + return ESTATE_ERROR; + } + + program_ptr = i; + return ESTATE_NORMAL; +]; + + +! Conditional execution. + +[ cmd_if val; + val = eval_expression(); + if (val == 0) + return ESTATE_ERROR; + skipwhite(); + if ((cmd++)->0 ~= TOKEN_THEN) + { + mem_free(val); + error_syntaxerror(); + return ESTATE_ERROR; + } + + if ((val-->0 == TYPE_INT) && (val-->1 == 0)) + cmd = 0; + + mem_free(val); + return ESTATE_NORMAL; +]; + +! Top half of a FOR loop. + +[ cmd_for varname val initialval targetval stepval cmdptr; + ! FOR can only be used when running a program. + + if (program_ptr == 0) + { + error_notrunning(); + return ESTATE_ERROR; + } + + ! Store the address of the FOR instruction. + + cmdptr = cmd-1; + + ! Read the variable name. + + skipwhite(); + if ((cmd++)->0 ~= TOKEN__VAR) + { + error_syntaxerror(); + return ESTATE_ERROR; + } + varname = eval_lvalue(); + if (varname == 0) + return ESTATE_ERROR; + + ! Skip over the =. + + skipwhite(); + if ((cmd++)->0 ~= TOKEN__EQUALS) + { + error_syntaxerror(); + jump varnameexit; + } + + ! Read the initial value. + + val = eval_expression(); + if (val == 0) + jump varnameexit; + if (val-->0 ~= TYPE_INT) + { + error_typemismatch(); + jump varnameexit; + } + initialval = val-->1; + mem_free(val); + + ! Read the TO. + + skipwhite(); + if ((cmd++)->0 ~= TOKEN_TO) + { + error_syntaxerror(); + return ESTATE_ERROR; + } + + ! Read the target value. + + val = eval_expression(); + if (val == 0) + jump varnameexit; + if (val-->0 ~= TYPE_INT) + { + error_typemismatch(); + jump varnameexit; + } + targetval = val-->1; + mem_free(val); + + ! Is there a STEP clause? + + skipwhite(); + if (cmd->0 == TOKEN_STEP) + { + cmd++; + skipwhite(); + + ! Read the STEP value. + + val = eval_expression(); + if (val == 0) + jump varnameexit; + if (val-->0 ~= TYPE_INT) + { + error_typemismatch(); + jump valexit; + } + stepval = val-->1; + mem_free(val); + } + else + { + ! Otherwise, default to 1. + + stepval = 1; + } + + ! Is this a new loop? + + if (stack_type-->stack_ptr == STACK_NONE) + { + ! Yes. Ensure there's room on the stack. + + if ((stack_ptr+1) >= STACK_SIZE) + { + error_stackoverflow(); + jump varnameexit; + } + stack_ptr-->stack_type = STACK_NONE; + + ! ...and set the initial value. + + store_assign(varname, TYPE_INT, initialval); + } + else + { + ! Otherwise, load the loop counter. + + val = store_lookup(varname); + if (val == 0) + { + error("FOR loop counter has disappeared"); + jump varnameexit; + } + if (val-->0 ~= TYPE_INT) + { + error_typemismatch(); + jump valexit; + } + initialval = val-->1; + mem_free(val); + + ! Increment it. + + initialval = initialval + stepval; + + ! Test. + + if (((stepval < 0) && (initialval < targetval)) || + ((stepval >= 0) && (initialval > targetval))) + { + ! Abort! The NEXT keyword has placed the pointer to + ! to the next instruction after the loop on the stack. + + cmd = stack_cmd-->stack_ptr; + program_ptr = stack_line-->stack_ptr; + stack_type-->stack_ptr = 0; + return ESTATE_NORMAL; + } + else + { + ! Write back the new loop counter. + + store_assign(varname, TYPE_INT, initialval); + } + } + + mem_free(varname); + stack_type-->stack_ptr = STACK_FOR; + stack_cmd-->stack_ptr = cmdptr; + stack_line-->stack_ptr = program_ptr; + stack_ptr++; + return ESTATE_NORMAL; + +.valexit; + mem_free(val); +.varnameexit; + mem_free(varname); + return ESTATE_ERROR; +]; + +! Bottom half of a FOR loop. + +[ cmd_next i j; + ! NEXT can only be used when running a program. + + if (program_ptr == 0) + { + error_notrunning(); + return ESTATE_ERROR; + } + + stack_ptr--; + if ((stack_ptr == 0) || (stack_type-->stack_ptr ~= STACK_FOR)) + { + error("NEXT without FOR"); + return ESTATE_ERROR; + } + + i = stack_cmd-->stack_ptr; + j = stack_line-->stack_ptr; + stack_cmd-->stack_ptr = cmd; + stack_line-->stack_ptr = program_ptr; + cmd = i; + program_ptr = j; + + return ESTATE_NORMAL; +]; + +! Top half of a REPEAT..UNTIL loop. + +[ cmd_repeat; + ! REPEAT can only be used when running a program. + + if (program_ptr == 0) + { + error_notrunning(); + return ESTATE_ERROR; + } + + if ((stack_ptr+1) >= STACK_SIZE) + { + error_stackoverflow(); + return ESTATE_ERROR; + } + + stack_type-->stack_ptr = STACK_REPEAT; + stack_cmd-->stack_ptr = cmd+1; + stack_line-->stack_ptr = program_ptr; + stack_ptr++; + return ESTATE_NORMAL; +]; + +! Bottom half of a REPEAT..UNTIL loop. + +[ cmd_until val; + ! REPEAT can only be used when running a program. + + if (program_ptr == 0) + { + error_notrunning(); + return ESTATE_ERROR; + } + + stack_ptr--; + if ((stack_ptr == 0) || (stack_type-->stack_ptr ~= STACK_REPEAT)) + { + error("UNTIL without REPEAT"); + return ESTATE_ERROR; + } + + val = eval_expression(); + if (val == 0) + return ESTATE_ERROR; + if (val-->0 ~= TYPE_INT) + { + mem_free(val); + error_typemismatch(); + return ESTATE_ERROR; + } + + if (val-->1 == 0) + { + cmd = stack_cmd-->stack_ptr; + program_ptr = stack_line-->stack_ptr; + stack_ptr++; + } + else + stack_type-->stack_ptr = STACK_NONE; + + return ESTATE_NORMAL; +]; + +! Execute a command. + +[ execute_command _cmd i; + cmd = _cmd; + cmd++; + while (1) + { + i = (cmd++)->0; + + switch (i) + { + TOKEN__EOL: return ESTATE_NORMAL; + TOKEN__SPACE: continue; + + TOKEN__NUMBER: + i = cmd-->0; + cmd = cmd + 2; + if (store_addline(i, cmd, _cmd->0 - 4)) + return ESTATE_ERROR; + ! Don't execute anything else on this line. + return ESTATE_DANGEROUS; + + TOKEN__VAR: + i = cmd_varassignment(); + jump checkresult; + + TOKEN_CLEAR: + i = cmd_clear(); + jump checkresult; + + TOKEN_CLS: + @erase_window -1; + break; + + TOKEN_FOR: + i = cmd_for(); + jump checkresult; + + TOKEN_GOTO: + i = cmd_goto(); + jump checkresult; + + TOKEN_IF: + i = cmd_if(); + if (i ~= ESTATE_NORMAL) + return i; + if (cmd == 0) + return ESTATE_NORMAL; + continue; + + TOKEN_INPUT: + i = cmd_input(); + jump checkresult; + + TOKEN_LIST: + i = cmd_list(); + jump checkresult; + + TOKEN_LOAD: + restore endofstatement; + error("Load failed"); + return ESTATE_ERROR; + + TOKEN_NEW: + return ESTATE_NEW; + + TOKEN_NEXT: + i = cmd_next(); + if (i == ESTATE_NORMAL) + continue; + return i; + + TOKEN_PRINT: + i = cmd_print(); + jump checkresult; + + TOKEN_QUIT: + error("Quit"); + return ESTATE_QUIT; + + TOKEN_REPEAT: + i = cmd_repeat(); + jump checkresult; + + TOKEN_RUN: + i = cmd_run(); + return i; + + TOKEN_SAVE: + save endofstatement; + error("Save failed"); + return ESTATE_ERROR; + + TOKEN_SCRIPT: + i = cmd_script(); + jump checkresult; + + TOKEN_STOP: + error("Stop"); + return ESTATE_ERROR; + + TOKEN_UNTIL: + i = cmd_until(); + if (i ~= ESTATE_ERROR) + return i; + continue; + jump checkresult; + + default: + error("Unimplemented token"); + return ESTATE_ERROR; + + .checkresult; + if (i == ESTATE_ERROR) + return ESTATE_ERROR; + break; + } + + .endofstatement; + if (eos(cmd->0) == 0) + { + error_nomore(); + return ESTATE_ERROR; + } + + if (cmd->0 == TOKEN__COLON) + cmd++; + } + + return ESTATE_NORMAL; +]; + diff --git a/malloc.h b/malloc.h new file mode 100644 index 0000000..4716918 --- /dev/null +++ b/malloc.h @@ -0,0 +1,202 @@ +constant FREE_MAGIC $FAEE; +constant ALLOC_MAGIC $A10C; + +constant FB_MAGIC 0; +constant FB_PREV 1; +constant FB_NEXT 2; +constant FB_SIZE 3; +constant FB__SIZE (4*2); + +constant AB_MAGIC 0; +constant AB_SIZE 1; +constant AB__SIZE (2*2); + +global mem_top = 0; +global mem_bottom = 0; +global mem_firstfree = 0; + +! Initialise the heap manager, given a value for TOP. It's assumed that the +! heap will stretch to HIMEM. + +[ mem_init bottom top; + mem_bottom = bottom; + mem_top = top; + ! Create a free chunk spanning the length of the block. + mem_firstfree = mem_bottom; + mem_firstfree-->FB_MAGIC = FREE_MAGIC; + mem_firstfree-->FB_PREV = 0; + mem_firstfree-->FB_NEXT = 0; + mem_firstfree-->FB_SIZE = mem_top - mem_bottom; +]; + +! Zero a block of memory. + +[ mem_zero ptr size; + while (size) + { + (ptr++)->0 = 0; + size--; + } +]; + +! Remove a node from the list. + +[ mem_node_remove p; + if (p == mem_firstfree) + mem_firstfree = p-->FB_NEXT; + if (p-->FB_PREV) + p-->FB_PREV-->FB_NEXT = p-->FB_NEXT; + if (p-->FB_NEXT) + p-->FB_NEXT-->FB_PREV = p-->FB_PREV; +]; + +! Try and allocate a block. + +[ mem_alloc size p; + ! Add space for the AB_ header. + + size = size + AB__SIZE; + + ! This block will eventually have to be a FB, once it's freed. So it + ! has to be big enough for the FB_ structure. + + if (size < FB__SIZE) + size = FB__SIZE; + + ! Iterate through the list trying to find a free chunk large enough. + + p = mem_firstfree; + while ((p ~= 0) && (p-->FB_SIZE < size)) + { + p = p-->FB_NEXT; + } + + if (p == 0) + { + ! No sufficiently large chunk could be found. + return 0; + } + + ! Can the block be shrunk, or is there not enough room? + + if ((p-->FB_SIZE - size) < FB__SIZE) + { + ! Yes; remove the node completely. + size = p-->FB_SIZE; + mem_node_remove(p); + } + else + { + ! No. Instead of removing the node, we shrink it + p-->FB_SIZE = p-->FB_SIZE - size; + p = p + p-->FB_SIZE; + } + + ! Initialise the allocated node. + + mem_zero(p, size); + p-->AB_MAGIC = ALLOC_MAGIC; + p-->AB_SIZE = size; + + return p+AB__SIZE; +]; + +! Try to free a block. + +[ mem_free p q; + ! Adjust the pointer to point to the alloc node itself. + p = p - AB__SIZE; + +#ifdef DEBUG; + ! Check the magic number. + if (p-->AB_MAGIC ~= ALLOC_MAGIC) + { + print "Trying to free invalid node ", p, "!^"; + print "Magic was "; phex(p-->AB_MAGIC, 4); + print " when it should have been "; phex(ALLOC_MAGIC, 4); + print ".^"; + return; + } +#endif; + + ! Turn the alloc node into a free node. + + q = p-->AB_SIZE; +#ifdef DEBUG; + memset(p, $55, q); +#endif; + p-->FB_MAGIC = FREE_MAGIC; + p-->FB_NEXT = mem_firstfree; + p-->FB_PREV = 0; + p-->FB_SIZE = q; + if (mem_firstfree) + mem_firstfree-->FB_PREV = p; + mem_firstfree = p; + + ! Right. We've successfully freed the block; p points to the FB + ! structure. + ! + ! Unfortunately, they way we use memory leads to lots of + ! fragmentation, which is bad. So we need to find out if we can coalesce + ! with the block immediately afterwards. + + if ((p+q)-->0 ~= FREE_MAGIC) + { + ! Nothing coalescable. + return; + } + + ! Change the size of our block to encompass the next block... + + p-->FB_SIZE = q + (p+q)-->FB_SIZE; + + ! ...and remove the next block from the free list. + + mem_node_remove(p+q); +]; + +! Get amount of free memory. + +[ mem_countfree p size; + size = 0; + p = mem_firstfree; + while (p ~= 0) + { + size = size + p-->FB_SIZE; + p = p-->FB_NEXT; + } + return size; +]; + +! Get total amount of memory. + +[ mem_counttotal; + return (mem_top - mem_bottom); +]; + +! Get amount of used memory. + +[ mem_countused; + return mem_counttotal() - mem_countfree(); +]; + +#ifdef DEBUG; +! Dump the free list. + +[ mem_show_free_list p; + print "Free list start^"; + p = mem_firstfree; + while (p ~= 0) + { + print " node ", p, " prev=", p-->FB_PREV, " next=", p-->FB_NEXT; + print " size=", p-->FB_SIZE; + if (p-->FB_MAGIC ~= FREE_MAGIC) + print " invalid magic"; + print "^"; + p = p-->FB_NEXT; + } + print "Free list end; used=", mem_countused(), " total=", mem_counttotal(), "^"; +]; + +#endif; + diff --git a/mktokens.awk b/mktokens.awk new file mode 100644 index 0000000..fc4da64 --- /dev/null +++ b/mktokens.awk @@ -0,0 +1,39 @@ +BEGIN { + count = 255 +} + +{ + t = $1 + token[t] = count + count-- + if (length(t) > 1) + print "constant TOKEN_" toupper(t) " " token[t] ";" +} + +END { + print "[ token_decode t;" + print "switch(t) {" + for (i in token) + print token[i] ": return \"" i "\";" + print "}" + print "return 0;" + print "];" + + print "[ token_encode p d;" + for (i in token) + { + if (length(i) > 1) + { + print "if (" + for (j=1; j<=length(i); j++) + print "(p->" (j-1) " == '" substr(i, j, 1) "') &&" + print " token_invalidchar(p->" (j-1) ")) { d->0 =" token[i] "; return " length(i) "; }" + } + else + print "if (p->0 == '" i "') { d->0 = " token[i] "; return 1; }" + } + print "return 0;" + print "];" +} + + diff --git a/script.h b/script.h new file mode 100644 index 0000000..a2103c0 --- /dev/null +++ b/script.h @@ -0,0 +1,266 @@ +array about_script --> + "About baZic" + "cls" + "print:print:print" + "print ~baZic is a downright scary implementation of ~;" + "print ~Basic for the Z-machine. It features proper dynamic ~;" + "print ~memory allocation, mark-sweep garbage collection, ~;" + "print ~fully tokenised program storage, a built-in program ~;" + "print ~editor (cough, cough) and is almost entirely useless. ~;" + "print ~It is complete enough to play Hunt the Wumpus on, and ~;" + "print ~a copy is provided.~" + "print" + "print ~The language is not complete. I gave up after a certain ~;" + "print ~point when I realised that Basic is so crappy a language ~;" + "print ~that it's not fun to implement, just annoying. I am now ~;" + "print ~even more impressed with the authors of things like 8kB ~;" + "print ~Microsoft Basic and BBC Basic; impressive work.~" + "print" + "print ~The main lacks to baZic are string operations, proper arrays, and ~;" + "print ~subroutines. The bulk of the string code is in there --- the ~;" + "print ~scary garbage collector --- but I haven't done the string ~;" + "print ~operators themselves. Arrays are sort of implemented, but they're ~;" + "print ~done by munging the variable name with the index. (See the Wumpus ~;" + "print ~program for an example.) Subroutines just haven't been done.~" + "print" + "print ~If you're interested in the innards of the program, you can do ~;" + "print ~'list -1' to list the stored Basic program with byte-code, or ~;" + "print ~'list -2' to view the variables in memory. If you do this while ~;" + "print ~an array is in use, odd things will happen.~" + "print" + "print ~baZic is licensed under the MIT public license. For more information, contact ~;" + "print ~dg@@64cowlark.com.~" + "print:print" + 0; + +array higherlower_script --> + "Higher / Lower" + "new" + "10print ~Higher, Lower~" + "15print" + "20target = rnd(100)" + "30tries = 1" + "40input ~What is your guess? ~; guess" + "50guess = val(guess)" + "60if guess < target then print ~Too low!~" + "70if guess > target then print ~Too high!~" + "80if guess = target then goto 110" + "90tries = tries + 1" + "100goto 40" + "110print ~You got it!~" + "120print ~In only~, tries, ~tries, too.~" + 0; + +array wumpus_script --> + "Hunt the Wumpus" + "new" + "10cls" + "20print ~Hunt the Wumpus --- loading data~" + "30cave1(0)=1:cave2(0)=4:cave3(0)=7" + "40cave1(1)=0:cave2(1)=2:cave3(1)=9" + "50cave1(2)=1:cave2(2)=3:cave3(2)=11" + "60cave1(3)=2:cave2(3)=4:cave3(3)=13" + "70cave1(4)=0:cave2(4)=3:cave3(4)=5" + "80cave1(5)=4:cave2(5)=6:cave3(5)=14" + "90cave1(6)=5:cave2(6)=7:cave3(6)=16" + "100cave1(7)=0:cave2(7)=6:cave3(7)=8" + "110cave1(8)=7:cave2(8)=9:cave3(8)=17" + "120cave1(9)=1:cave2(9)=8:cave3(9)=10" + "130cave1(10)=9:cave2(10)=11:cave3(10)=18" + "140cave1(11)=2:cave2(11)=10:cave3(11)=12" + "150cave1(12)=11:cave2(12)=13:cave3(12)=19" + "160cave1(13)=3:cave2(13)=12:cave3(13)=14" + "170cave1(14)=5:cave2(14)=13:cave3(14)=15" + "180cave1(15)=14:cave2(15)=16:cave3(15)=19" + "190cave1(16)=6:cave2(16)=15:cave3(16)=17" + "200cave1(17)=8:cave2(17)=16:cave3(17)=18" + "210cave1(18)=10:cave2(18)=17:cave3(18)=19" + "220cave1(19)=12:cave2(19)=15:cave3(19)=18" + "225print:input ~Do you want instructions? (y/n) ~; i$" + "226if i$ <> ~y~ then goto 1000" + "230cls" + "240print ~Welcome to HUNT THE WUMPUS!~" + "250print:print ~The wumpus lives in a cave of twenty rooms. ~;" + "260print ~Each room has three tunnels leading to other rooms. ~;" + "270print ~Two rooms have bottomless pits --- don't fall on. ~;" + "280print ~Two more contain Alien Space Bats. Disturb them, ~;" + "290print ~and they will whisk you away to some other room. ~;" + "300print:print:print ~Naturally, the wumpus is immune to all hazards. ~;" + "310print ~Usually, he's asleep: he'll wake up if he hears you ~;" + "320print ~shooting an arrow, or if you walk in on him. ~;" + "330print ~Once awake, he'll move around most turns (1 in 4 ~;" + "340print ~probability). If *he* walks in on *you*, he'll eat yer. ~;" + "350print:print:print ~Your only defence is your arrows. These arrows, ~;" + "360print ~which contain a Z-machine microcontroller, can be ~;" + "370print ~programmed to follow a certain course for up to five ~;" + "380print ~rooms. If they can't go where you told them, they'll ~;" + "390print ~move randomly. Did I mention they'll kill you, too, ~;" + "400print ~if you program them incorrectly? ~;" + "410print:print:print ~The wumpus has smelly feet; you can smell him in the ~;" + "420print ~next room. The bats rustle, you can hear them; the pits are ~;" + "430print ~drafty, and you can feel that.~" + "1000print:print ~Placing objects in maze...~" + "1010wumpus=rnd(20):player=rnd(20):pit1=rnd(20):pit2=rnd(20)" + "1020bat1=rnd(20):bat2=rnd(20)" + "1030if wumpus = player then goto 1000" + "1040if wumpus = pit1 then goto 1000" + "1050if wumpus = pit2 then goto 1000" + "1060if wumpus = bat1 then goto 1000" + "1070if wumpus = bat2 then goto 1000" + "1080if player = pit1 then goto 1000" + "1090if player = pit2 then goto 1000" + "1100if player = bat1 then goto 1000" + "1110if player = bat2 then goto 1000" + "1120if pit1 = pit2 then goto 1000" + "1130if pit1 = bat1 then goto 1000" + "1140if pit1 = bat2 then goto 1000" + "1150if pit2 = bat1 then goto 1000" + "1160if pit2 = bat2 then goto 1000" + "1170if bat1 = bat2 then goto 1000" + "1180awake=0" + "2000print" + "2010print ~You are in room number ~; player; ~. Exits lead off to rooms ~;" + "2020print cave1(player); ~, ~; cave2(player); ~ and ~; cave3(player); ~. ~" + "2030if cave1(player) = wumpus then goto 2070" + "2040if cave2(player) = wumpus then goto 2070" + "2050if cave3(player) = wumpus then goto 2070" + "2060goto 2080" + "2070print ~You can smell the wumpus!~" + "2080if (cave1(player) = pit1) or (cave1(player) = pit2) then goto 2120" + "2090if (cave2(player) = pit1) or (cave2(player) = pit2) then goto 2120" + "2100if (cave3(player) = pit1) or (cave3(player) = pit2) then goto 2120" + "2110goto 2121" + "2120print ~You feel a draught!~" + "2121if (cave1(player) = bat1) or (cave1(player) = bat2) then goto 2125" + "2122if (cave2(player) = bat1) or (cave2(player) = bat2) then goto 2125" + "2123if (cave3(player) = bat1) or (cave3(player) = bat2) then goto 2125" + "2124goto 2130" + "2125print ~You hear rustling!~" + "2130input ~Which room do you want to go to (or -1 to fire an arrow)? ~; i$" + "2135dest = val(i$)" + "2140if dest = -1 then goto 3000" + "2150if (cave1(player) = dest) or (cave2(player) = dest) or (cave3(player) = dest) then goto 2200" + "2160print:print ~You can't go that way.~" + "2170goto 2130" + "2200print" + "2210if dest=wumpus then goto 2600" + "2220if (dest=pit1) or (dest=pit2) then print ~*** AAAAAAaaaaaah ***~:print:print ~You have fallen down a pit.~:goto 5000" + "2230if (dest=bat1) or (dest=bat2) then print ~You have been abducted by alien space bats.~:dest=rnd(20)" + "2240player=dest" + "2500if awake=0 then goto 2000" + "2510i = rnd(4)" + "2520if i<>0 then goto 2000" + "2530print ~You hear the patter of tiny feet as the wumpus moves.~" + "2540i = rnd(3)" + "2550if i=0 then wumpus = cave1(wumpus)" + "2560if i=1 then wumpus = cave2(wumpus)" + "2570if i=2 then wumpus = cave3(wumpus)" + "2580goto 2000" + "2600player=dest" + "2210if awake=1 then print ~*** CRUNCH ***~:print:print ~You have been eaten by the wumpus.~:goto 5000" + "2620awake=1" + "2630print ~The wumpus is here! Asleep, though it won't stay like that for long.~" + "2640goto 2000" + "3000print" + "3005arrow=player" + "3010for i=1 to 5" + "3020input ~Please enter the next room for the arrow to go to: ~; i$" + "3025j = val(i$)" + "3030if (j = cave1(arrow)) or (j = cave2(arrow)) or (j = cave3(arrow)) then arrow=j:goto 3071" + "3040j = rnd(3)" + "3050if j = 0 then arrow = cave1(arrow)" + "3060if j = 1 then arrow = cave2(arrow)" + "3070if j = 2 then arrow = cave3(arrow)" + "3071if j = wumpus then print:print ~You hear a distant scream... the wumpus is dead! Now you just have to deal with the RSPCA lawyers.~:goto 5010" + "3080if j = player then print:print ~You have just been impaled on one of your own arrows.~:goto 5000" + "3090next" + "3095awake=1" + "3100goto 2500" + "5000print:input ~You have died. Play again? (y/n) ~; i$:goto 5020" + "5010print:input ~You have won. Play again? (y/n) ~; i$" + "5020if i$=~y~ then goto 1000" + "5030print:print ~Bye bye!~" + 0; + +array scripts --> + about_script + higherlower_script + wumpus_script + 0; +constant NUM_SCRIPTS 3; + +global script_id = 0; +global script_lineno = 0; + +! List the available scripts. + +[ script_list i; + print "Available scripts:^"; + i = 0; + while (scripts-->i) + { + print " ", i, ": ", (string) scripts-->i-->0, "^"; + i++; + } + return ESTATE_NORMAL; +]; + +! Invoke a script. + +[ script_invoke i; + if ((i < 0) || (i >= NUM_SCRIPTS)) + { + error("Script ID out of range"); + return ESTATE_ERROR; + } + + print "Invoking script ~", (string) scripts-->i-->0, "~...^"; + script_id = i; + script_lineno = 1; + + do { + store_init(heap, heap+HEAP_SIZE); + do { + i = command_loop(script_reader); + } until ((i == ESTATE_QUIT) || (i == ESTATE_NEW)); + } until (i == ESTATE_QUIT); + + print "Script finished.^"; + return ESTATE_NORMAL; +]; + +! Read in a command from a script into the parse buffer. + +[ script_reader buf in l; + buf = mem_alloc(255); + if (buf == 0) + return -2; + + ! Get the compressed data. + + l = scripts-->script_id-->script_lineno; + if (l == 0) + l = "quit"; + else + script_lineno++; + + ! Decompress it into the temporary buffer. + + buf-->0 = 255; + @output_stream 3 buf; + print (string) l; + @output_stream -3; + + ! Ensure the string is zero-terminated. + + in = buf+2; + in->(buf-->0) = 0; + + ! Tokenise the stream. + + in = tokenise_stream(in, parsebuffer); + + mem_free(buf); + return in; +]; + diff --git a/store.h b/store.h new file mode 100644 index 0000000..2792b00 --- /dev/null +++ b/store.h @@ -0,0 +1,203 @@ +global store_bottom = 0; +global store_eop = 0; +global store_top = 0; + +constant VN_NEXT = 0; +constant VN_TYPE = 1; +constant VN_VALUE = 2; +constant VN__SIZE = (3*2); + +global store_firstvar = 0; + +! Initialise the program store. + +[ store_init bottom top; + store_bottom = bottom; + store_top = top; + store_bottom->0 = 0; + store_eop = store_bottom; + string_init(); +]; + +! --- Heap management ------------------------------------------------------- + +! Heap reinitialisation; all heap pointers are invalidated. + +[ store_heapclean; + store_firstvar = 0; +]; + +! Create a new variable. + +[ store_newvar varname type value node; + node = mem_alloc(VN__SIZE + strlen(varname) + 1); + node-->VN_NEXT = store_firstvar; + node-->VN_TYPE = type; + node-->VN_VALUE = value; + strcpy(node+VN__SIZE, varname); + store_firstvar = node; +]; + +! Look for an old variable. + +[ store_searchvar varname node; + node = store_firstvar; + while (node) + { + if (strcmp(varname, node+VN__SIZE) == 0) + return node; + node = node-->VN_NEXT; + } + return 0; +]; + +! Assign a value to a variable. + +[ store_assign varname type value node; + node = store_searchvar(varname); + if (node) + { + node-->VN_TYPE = type; + node-->VN_VALUE = value; + return; + } + store_newvar(varname, type, value); +]; + +! Get the contents of a variable. + +[ store_lookup varname node val; + node = store_searchvar(varname); + if (node) + { + val = mem_alloc(4); + val-->0 = node-->VN_TYPE; + val-->1 = node-->VN_VALUE; + return val; + } + return 0; +]; + +! List all variables. + +[ store_listvars var count; + var = store_firstvar; + count = 0; + print "Variables:^"; + while (var) + { + print " ", (astring) var+VN__SIZE; + switch (var-->VN_TYPE) + { + TYPE_INT: + print " integer ", var-->VN_VALUE; + break; + + TYPE_STRING: + print " string ~"; + string_print(var-->VN_VALUE); + print "~"; + break; + + default: + print " unknown type"; + } + print "^"; + + var = var-->VN_NEXT; + count++; + } + print "Total: ", count, "^"; +]; + +! --- Program storage ------------------------------------------------------- + +! List the current program. + +[ store_listprogram line linelen; + line = store_bottom; + while (linelen = line->0) + { + print " ", (line+1)-->0; + detokenise_stream(line+3); + line = line + linelen; + } +]; + +[ store_listprogramhex line linelen; + line = store_bottom; + while (linelen = line->0) + { + print " ", (line+1)-->0; + detokenise_stream(line+3); + print "-> "; + hexdump(line, linelen); + line = line + linelen; + } +]; + +! Add a line to the program. + +[ store_addline linenum cmd linelen line i; + line = store_bottom; + while (line->0) + { + if ((line+1)-->0 >= linenum) + break; + line = line + line->0; + } + + ! Do we need to append this line to the end of the program? + + if (line->0 == 0) + { + ! Yes. + + line = store_eop; + store_eop->(linelen+3) = 0; + store_eop = store_eop + linelen + 3; + } + else + { + ! Do we need to remove an existing line first? + + if ((line+1)-->0 == linenum) + { + i = line->0; + memcpy(line, line+i, store_eop-line-i); + store_eop = store_eop - i; + store_eop->0 = 0; + } + + ! If there's no actual data in the new line, give up. + + if (linelen == 1) + return 0; + + ! Open up space for the new line. + + memcpy(line+linelen+3, line, store_eop-line); + store_eop = store_eop + linelen + 3; + store_eop->0 = 0; + } + + ! Copy the line in. + + line->0 = linelen+3; + (line+1)-->0 = linenum; + memcpy(line+3, cmd, linelen); + + return 0; +]; + +! Find a line, by number. + +[ store_findline lineno line; + line = store_bottom; + while (line->0 && ((line+1)-->0 ~= lineno)) + line = line + line->0; + if (line->0 == 0) + return 0; + return line; +]; + diff --git a/string.h b/string.h new file mode 100644 index 0000000..6e29a39 --- /dev/null +++ b/string.h @@ -0,0 +1,180 @@ +global string_first; + +constant STRING_NEXT 0; +constant STRING_PTR 1; +constant STRING_LEN 2; +constant STRING_REF 3; +constant STRING__SIZE (4*2); + +! Initialise the string store. + +[ string_init; + string_first = 0; +]; + +! Returns false if the string is constant (i.e., it points to a literal in +! the byte-code itself). + +[ string_isvar s; + return ((s < store_bottom) || (s > store_eop)); +]; + +! Add an entry for a new string. + +[ string_alloc str len s; + if (string_isvar(str)) + { + ! Need to duplicate the string. + s = mem_alloc(len); + memcpy(s, str, len); + str = s; + } + s = mem_alloc(STRING__SIZE); + s-->STRING_NEXT = string_first; + s-->STRING_PTR = str; + s-->STRING_LEN = len; + string_first = s; + return s; +]; + +! Print a string. + +[ string_print s i; + i = s-->STRING_LEN; + s = s-->STRING_PTR; + while (i--) + print (char) (s++)->0; +]; + +! Compare two strings. + +[ string_compare s1 s2 i; + i = s1-->STRING_LEN; + if (i ~= s2-->STRING_LEN) + return -1; + s1 = s1-->STRING_PTR; + s2 = s2-->STRING_PTR; + while (i--) + if ((s1++)->0 ~= (s2++)->0) + return -1; + return 0; +]; + +! Garbage collection: clean. + +[ string_clean s; + s = string_first; + while (s) + { + s-->STRING_REF = 0; + s = s-->STRING_NEXT; + } +]; + +! Garbage collection: mark. + +[ string_mark var; + var = store_firstvar; + while (var) + { + if (var-->VN_TYPE == TYPE_STRING) + var-->VN_VALUE-->STRING_REF = 1; + var = var-->VN_NEXT; + } +]; + +#ifdef DEBUG; +! Garbage collection: list status. + +[ string_status s; + s = string_first; + while (s) + { + print s, " ", s-->STRING_NEXT, " "; + string_print(s); + print " ", s-->STRING_REF, "^"; + s = s-->STRING_NEXT; + } +]; +#endif; + +! Garbage collection: sweep. + +[ string_sweep s olds news; + olds = 0; + s = string_first; + while (s) + { + news = s-->STRING_NEXT; + if (s-->STRING_REF == 0) + { + if (string_isvar(s-->STRING_PTR)) + mem_free(s-->STRING_PTR); + if (olds) + olds-->STRING_NEXT = news; + else + { + ! If olds is null, then we're on the first + ! string in the list; which means we need to + ! change string_first when we remove it. + string_first = news; + } + mem_free(s); + } + else + olds = s; + + s = news; + } +]; + +! Garbage collector. + +[ string_gc; + ! Phase one: clean. + string_clean(); + ! Phase two: mark. + string_mark(); +#ifdef DEBUG; + !string_status(); +#endif; + ! Phase three: sweep. + string_sweep(); +]; + +! Turn a string into an int. + +[ string_toint s len i neg; + len = s-->STRING_LEN; + s = s-->STRING_PTR; + i = 0; + neg = 1; + + while (len && (s->0 == 32 or 9)) + { + s++; + len--; + } + + if (len == 0) + return i; + + if (s->0 == '-') + { + neg = -1; + s++; + len--; + } + + if (len == 0) + return i; + + while (len && (s->0 >= '0') && (s->0 <= '9')) + { + i = i*10 + (s->0 - '0'); + s++; + len--; + } + + return i*neg; +]; diff --git a/token-table.h b/token-table.h new file mode 100644 index 0000000..b17114f --- /dev/null +++ b/token-table.h @@ -0,0 +1,203 @@ +constant TOKEN_AND 255; +constant TOKEN_CLEAR 254; +constant TOKEN_CLS 253; +constant TOKEN_IF 252; +constant TOKEN_INPUT 251; +constant TOKEN_FOR 250; +constant TOKEN_GOTO 249; +constant TOKEN_LIST 248; +constant TOKEN_LOAD 247; +constant TOKEN_RND 246; +constant TOKEN_NEW 245; +constant TOKEN_NEXT 244; +constant TOKEN_OR 243; +constant TOKEN_PRINT 242; +constant TOKEN_QUIT 241; +constant TOKEN_REPEAT 240; +constant TOKEN_RUN 239; +constant TOKEN_SAVE 238; +constant TOKEN_SCRIPT 237; +constant TOKEN_STEP 236; +constant TOKEN_STOP 235; +constant TOKEN_THEN 234; +constant TOKEN_TO 233; +constant TOKEN_UNTIL 232; +constant TOKEN_VAL 231; +[ token_decode t; +switch(t) { +255: return "and"; +236: return "step"; +251: return "input"; +243: return "or"; +244: return "next"; +237: return "script"; +245: return "new"; +242: return "print"; +239: return "run"; +233: return "to"; +249: return "goto"; +253: return "cls"; +247: return "load"; +235: return "stop"; +254: return "clear"; +238: return "save"; +232: return "until"; +240: return "repeat"; +252: return "if"; +231: return "val"; +250: return "for"; +246: return "rnd"; +248: return "list"; +234: return "then"; +241: return "quit"; +} +return 0; +]; +[ token_encode p d; +if ( +(p->0 == 'a') && +(p->1 == 'n') && +(p->2 == 'd') && + token_invalidchar(p->3)) { d->0 =255; return 3; } +if ( +(p->0 == 's') && +(p->1 == 't') && +(p->2 == 'e') && +(p->3 == 'p') && + token_invalidchar(p->4)) { d->0 =236; return 4; } +if ( +(p->0 == 'i') && +(p->1 == 'n') && +(p->2 == 'p') && +(p->3 == 'u') && +(p->4 == 't') && + token_invalidchar(p->5)) { d->0 =251; return 5; } +if ( +(p->0 == 'n') && +(p->1 == 'e') && +(p->2 == 'x') && +(p->3 == 't') && + token_invalidchar(p->4)) { d->0 =244; return 4; } +if ( +(p->0 == 'o') && +(p->1 == 'r') && + token_invalidchar(p->2)) { d->0 =243; return 2; } +if ( +(p->0 == 'n') && +(p->1 == 'e') && +(p->2 == 'w') && + token_invalidchar(p->3)) { d->0 =245; return 3; } +if ( +(p->0 == 's') && +(p->1 == 'c') && +(p->2 == 'r') && +(p->3 == 'i') && +(p->4 == 'p') && +(p->5 == 't') && + token_invalidchar(p->6)) { d->0 =237; return 6; } +if ( +(p->0 == 'p') && +(p->1 == 'r') && +(p->2 == 'i') && +(p->3 == 'n') && +(p->4 == 't') && + token_invalidchar(p->5)) { d->0 =242; return 5; } +if ( +(p->0 == 'r') && +(p->1 == 'u') && +(p->2 == 'n') && + token_invalidchar(p->3)) { d->0 =239; return 3; } +if ( +(p->0 == 'c') && +(p->1 == 'l') && +(p->2 == 's') && + token_invalidchar(p->3)) { d->0 =253; return 3; } +if ( +(p->0 == 'g') && +(p->1 == 'o') && +(p->2 == 't') && +(p->3 == 'o') && + token_invalidchar(p->4)) { d->0 =249; return 4; } +if ( +(p->0 == 't') && +(p->1 == 'o') && + token_invalidchar(p->2)) { d->0 =233; return 2; } +if ( +(p->0 == 'l') && +(p->1 == 'o') && +(p->2 == 'a') && +(p->3 == 'd') && + token_invalidchar(p->4)) { d->0 =247; return 4; } +if ( +(p->0 == 's') && +(p->1 == 't') && +(p->2 == 'o') && +(p->3 == 'p') && + token_invalidchar(p->4)) { d->0 =235; return 4; } +if ( +(p->0 == 'c') && +(p->1 == 'l') && +(p->2 == 'e') && +(p->3 == 'a') && +(p->4 == 'r') && + token_invalidchar(p->5)) { d->0 =254; return 5; } +if ( +(p->0 == 's') && +(p->1 == 'a') && +(p->2 == 'v') && +(p->3 == 'e') && + token_invalidchar(p->4)) { d->0 =238; return 4; } +if ( +(p->0 == 'u') && +(p->1 == 'n') && +(p->2 == 't') && +(p->3 == 'i') && +(p->4 == 'l') && + token_invalidchar(p->5)) { d->0 =232; return 5; } +if ( +(p->0 == 'r') && +(p->1 == 'e') && +(p->2 == 'p') && +(p->3 == 'e') && +(p->4 == 'a') && +(p->5 == 't') && + token_invalidchar(p->6)) { d->0 =240; return 6; } +if ( +(p->0 == 'i') && +(p->1 == 'f') && + token_invalidchar(p->2)) { d->0 =252; return 2; } +if ( +(p->0 == 'f') && +(p->1 == 'o') && +(p->2 == 'r') && + token_invalidchar(p->3)) { d->0 =250; return 3; } +if ( +(p->0 == 'v') && +(p->1 == 'a') && +(p->2 == 'l') && + token_invalidchar(p->3)) { d->0 =231; return 3; } +if ( +(p->0 == 'r') && +(p->1 == 'n') && +(p->2 == 'd') && + token_invalidchar(p->3)) { d->0 =246; return 3; } +if ( +(p->0 == 'l') && +(p->1 == 'i') && +(p->2 == 's') && +(p->3 == 't') && + token_invalidchar(p->4)) { d->0 =248; return 4; } +if ( +(p->0 == 'q') && +(p->1 == 'u') && +(p->2 == 'i') && +(p->3 == 't') && + token_invalidchar(p->4)) { d->0 =241; return 4; } +if ( +(p->0 == 't') && +(p->1 == 'h') && +(p->2 == 'e') && +(p->3 == 'n') && + token_invalidchar(p->4)) { d->0 =234; return 4; } +return 0; +]; diff --git a/tokeniser.h b/tokeniser.h new file mode 100644 index 0000000..dc1d7e8 --- /dev/null +++ b/tokeniser.h @@ -0,0 +1,323 @@ +include "token-table.h"; +constant TOKEN__EOL 0; +constant TOKEN__NUMBER 1; +constant TOKEN__STRING 2; +constant TOKEN__VAR 3; +constant TOKEN__SPACE 4; +constant TOKEN__PLUS 5; +constant TOKEN__MINUS 6; +constant TOKEN__STAR 7; +constant TOKEN__SLASH 8; +constant TOKEN__COLON 9; +constant TOKEN__EQUALS 10; +constant TOKEN__COMMA 11; +constant TOKEN__LPAREN 12; +constant TOKEN__RPAREN 13; +constant TOKEN__LARROW 14; +constant TOKEN__RARROW 15; +constant TOKEN__SEMICOLON 16; +constant TOKEN__GEQUAL 17; +constant TOKEN__LEQUAL 18; +constant TOKEN__NEQUAL 19; + +! Is this an invalid alphabetical token character? + +[ token_invalidchar c; + return ((c < 'a') || (c > 'z')); +]; + +! Is this a valid variable name character? + +[ token_validvarnamechar c; + return (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9')) || + (c == '_') || (c == '%') || (c == '$')); +]; + +! Is this a number? + +[ token_isnumber c; + return ((c >= '0') && (c <= '9')); +]; + +! Is this whitespace? + +[ token_isspace c; + return ((c == 32) || (c == 9)); +]; + +! Tokenise an input stream. +! +! The input and output pointers must point to different regions of memory. + +[ tokenise_stream in out incount outcount i j k; + out->0 = 0; + outcount = 1; + incount = 0; + + while (in->incount) + { + i = token_encode(in+incount, out+outcount); + if (i ~= 0) + { + incount = incount + i; + outcount = outcount + 1; + } + else + { + ! Not a recognised token. We test against all the + ! other things we recognise. Note the order! This + ! is important. + + i = in->incount; + + ! Is it white space? + + if (token_isspace(i)) + { + while (token_isspace(in->(incount))) + incount++; + out->(outcount++) = TOKEN__SPACE; + continue; + } + + ! Is it a number? + + if (token_isnumber(i)) + { + out->(outcount++) = TOKEN__NUMBER; + i = 0; + do { + i = i*10 + (in->incount - '0'); + incount++; + } until (token_isnumber(in->incount) == 0); + (out+outcount)-->0 = i; + outcount = outcount + 2; + continue; + } + + ! Is it a string? + + if (i == '"') + { + ! Work out the size of the string. + + incount++; + i = incount; + do { + k = in->(incount++); + } until ((k == '"') || (k == 0)); + j = incount-i-1; + + ! Emit the opcode. + + out->(outcount++) = TOKEN__STRING; + out->(outcount++) = j; + + ! And now emit the string itself. + + memcpy(out+outcount, in+i, j); + outcount = outcount + j; + + ! Remember to skip over the close quote + ! before exiting. + + !incount++; + continue; + } + + ! Is it an operator? + + switch (i) + { + '+': out->(outcount++) = TOKEN__PLUS; + incount++; + continue; + + '-': out->(outcount++) = TOKEN__MINUS; + incount++; + continue; + + '*': out->(outcount++) = TOKEN__STAR; + incount++; + continue; + + '/': out->(outcount++) = TOKEN__SLASH; + incount++; + continue; + + ':': out->(outcount++) = TOKEN__COLON; + incount++; + continue; + + '=': out->(outcount++) = TOKEN__EQUALS; + incount++; + continue; + + ',': out->(outcount++) = TOKEN__COMMA; + incount++; + continue; + + '(': out->(outcount++) = TOKEN__LPAREN; + incount++; + continue; + + ')': out->(outcount++) = TOKEN__RPAREN; + incount++; + continue; + + '<': switch (in->(++incount)) + { + '>': out->(outcount++) = TOKEN__NEQUAL; + incount++; + break; + + '=': out->(outcount++) = TOKEN__LEQUAL; + incount++; + break; + + default: out->(outcount++) = TOKEN__LARROW; + break; + } + continue; + + '>': switch (in->(++incount)) + { + '=': out->(outcount++) = TOKEN__GEQUAL; + incount++; + break; + + default: out->(outcount++) = TOKEN__RARROW; + break; + } + continue; + + ';': out->(outcount++) = TOKEN__SEMICOLON; + incount++; + continue; + } + + ! Is it a variable name? + + if (token_validvarnamechar(i)) + { + out->(outcount++) = TOKEN__VAR; + do { + out->(outcount++) = in->(incount++); + } until (token_validvarnamechar(in->incount) == 0); + out->(outcount++) = 0; + continue; + } + + return incount; + } + } + + ! Patch up the line length. + + out->outcount = TOKEN__EOL; + out->0 = outcount + 1; + + return -1; +]; + +! Detokenise a stream. + +[ detokenise_stream in i; + while (1) + { + i = (in++)->0; + switch(i) + { + TOKEN__EOL: + print "^"; + return; + + TOKEN__VAR: + while (i = (in++)->0) + print (char) i; + break; + + TOKEN__NUMBER: + print in-->0; + in = in + 2; + break; + + TOKEN__SPACE: + print " "; + break; + + TOKEN__STRING: + i = (in++)->0; + print "~"; + while (i--) + print (char) (in++)->0; + print "~"; + break; + + TOKEN__PLUS: + print "+"; + break; + + TOKEN__MINUS: + print "-"; + break; + + TOKEN__STAR: + print "*"; + break; + + TOKEN__SLASH: + print "/"; + break; + + TOKEN__COLON: + print ":"; + break; + + TOKEN__EQUALS: + print "="; + break; + + TOKEN__COMMA: + print ","; + break; + + TOKEN__LPAREN: + print "("; + break; + + TOKEN__RPAREN: + print ")"; + break; + + TOKEN__LARROW: + print "<"; + break; + + TOKEN__RARROW: + print ">"; + break; + + TOKEN__SEMICOLON: + print ";"; + break; + + TOKEN__GEQUAL: + print ">="; + break; + + TOKEN__LEQUAL: + print "<="; + break; + + TOKEN__NEQUAL: + print "<>"; + break; + default: + print (string) token_decode(i); + } + } +]; + diff --git a/tokens.dat b/tokens.dat new file mode 100644 index 0000000..7ae533c --- /dev/null +++ b/tokens.dat @@ -0,0 +1,25 @@ +and +clear +cls +if +input +for +goto +list +load +rnd +new +next +or +print +quit +repeat +run +save +script +step +stop +then +to +until +val diff --git a/utils.h b/utils.h new file mode 100644 index 0000000..aed486f --- /dev/null +++ b/utils.h @@ -0,0 +1,100 @@ +array hextable -> '0' '1' '2' '3' '4' '5' '6' '7' '8' '9' 'A' 'B' 'C' 'D' 'E' 'F'; + +! Fills a chunk of memory. + +[ memset ptr i size; + while (size--) + (ptr++)->0 = i; +]; + +! Copies a chunk of memory. + +[ memcpy dest src len; + if (dest > src) + { + ! Copy down. + dest = dest + len; + src = src + len; + while (len--) + (--dest)->0 = (--src)->0; + } + else + { + ! Copy up. + while (len--) + (dest++)->0 = (src++)->0; + } +]; + +! Emits a string. + +[ astring s; + while (s->0) + print (char) (s++)->0; +]; + +! Duplicates a string. + +[ strdup s s1 i; + i = strlen(s); + s1 = mem_alloc(i+1); + strcpy(s1, s); + return s1; +]; + +! Compares two strings. + +[ strcmp s1 s2; + while (s1->0 == s2->0) + { + if (s1->0 == 0) + return 0; + s1++; + s2++; + } + return 1; +]; + +! Counts the length of a string. + +[ strlen s i; + i = 0; + while ((s++)->0) + i++; + return i; +]; + +! Copies a string from one place to another. + +[ strcpy dest src; + do { + (dest++)->0 = (src++)->0; + } until (src->0 == 0); + dest->0 = 0; +]; + +! Outputs a hex number. + +[ phex i digits; + if (digits == 4) + { + print (char) hextable->((i / 4096) & 15); + print (char) hextable->((i / 256) & 15); + } + print (char) hextable->((i / 16) & 15); + print (char) hextable->(i & 15); +]; + +! Dumps some memory in hex. + +[ hexdump p size; + while (size) + { + phex(p->0, 2); + print " "; + size--; + p++; + } + print "^"; +]; + -- 2.31.1