From: Jason Self Date: Mon, 19 Aug 2019 12:40:15 +0000 (-0700) Subject: Import baZic by David Given X-Git-Url: https://jxself.org/git/?p=bazic.git;a=commitdiff_plain;h=d7bc793189eb934ffac3fa1fc0b39f3dbbeca1e7 Import baZic by David Given Version 0.1 From http://cowlark.com/bazic/ --- d7bc793189eb934ffac3fa1fc0b39f3dbbeca1e7 diff --git a/.gitingore b/.gitingore new file mode 100644 index 0000000..bddb1e1 --- /dev/null +++ b/.gitingore @@ -0,0 +1 @@ +bazic.z* \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8dcfa1b --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +INFORM = inform +IFLAGS = +include_path= '$$MAX_STATIC_DATA=65535' -D + +all: bazic + +token-table.h: tokens.dat mktokens.awk + awk -f mktokens.awk tokens.dat > token-table.h + +bazic: *.inf *.h token-table.h + $(INFORM) $(IFLAGS) bazic.inf bazic.z5 + diff --git a/README b/README new file mode 100644 index 0000000..73f2530 --- /dev/null +++ b/README @@ -0,0 +1,104 @@ +baZic version 0.1 +================= + +(C) 2001 David Given +http://www.cowlark.com +dg@cowlark.com + +Introduction +------------ + +Welcome to baZic, my totally mad Basic interpreter for the Z-machine! + +Please note that this is unfinished. While enough exists to run a decent game +of Hunt the Wumpus, there are big chunks of functionality missing; roughly 50% +of the Basic language. Notably, unimplemented features are: + +* String operators (the hooks are there, I just need to write the code) +* Subroutines (GOSUB & friends) +* Procedures (SUB & friends) +* Proper arrays + +I am unlikely ever to finish this; the Basic language is just too annoying for +words. It's even more annoying to implement than to write in. (It uses the +Parser from Hell.) If you wish to continue it, be my guest. + +What you *do* get, however, is: + +* Dynamic memory allocation, complete with block coalescing +* A full mark/sweep garbage collector +* Dynamic typing +* Full tokenisation for fast(!) execution of programs +* Full detokenisation when listing them again +* An interactive Basic development environment (cough, cough) +* State-of-the-art implementations of Hunt the Wumpus and Guess the Number + built in to the very interpreter +* Pseudo-arrays + +The memory allocator is nicely modular if you feel like ripping it out. Ditto +the garbage collector. + +Usage +----- + +Run the program in the normal way. You get presented with a Basic prompt. You +can now type in lines of Basic code and they will get executed. You can add +lines to the current program by prefixing them with a line number. Most basic +Basic keywords work. + +SAVE, LOAD and QUIT work as expected (for a text adventure). LIST has some +extra features; LIST -1 will display the program's byte code (for those with a +morbid curiosity). LIST -2 will dump the currently defined variables. + +If you modify the current program, all currently defined variables are lost. +Variables are dynamically typed; while you can put $ and % on the end of the +names if you want, they're meaningless. + +Arrays are funny. Rather than implement DIM, I just added a quick hack to +concatenate the array index on the end of the array name. So you don't need to +declare an array, and storage is only taken up for those elements that actually +exist. If you have any array entries defined, LIST -2 may do strange things +(the name of the array entry variables aren't strictly printable). + +The SCRIPT keyword allows you to load one of several predefined programs into +the interpreter's memory. Use it with no arguments to list the available +programs. Give it the program index for more information. SCRIPT 0 gives a +brief guide to baZic (even briefer than this, although written when I had more +sleep). + +Bugs +---- + +This program contains no bugs whatsoever. Excuse me, the New World Order made +me say that. + +Credits +------- + +You can blame Graham Nelson, for the Inform compiler with which this is +written, and Acorn, for producing the BBC Microcomputer on which I got the hack +habit. + +License +------- + +This code is licensed under the MIT open source license. + +Copyright (c) 2001, David Given All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS +IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE +AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/bazic.inf b/bazic.inf new file mode 100644 index 0000000..b727dc1 --- /dev/null +++ b/bazic.inf @@ -0,0 +1,111 @@ +include "malloc.h"; +include "tokeniser.h"; +include "store.h"; +include "string.h"; +include "interpreter.h"; +include "utils.h"; +include "script.h"; + +constant HEAP_SIZE $7FFF; +array heap -> HEAP_SIZE; +array parsebuffer -> 256; + +! Draw the status line. + +[ draw_status; + @split_window 1; + @set_window 1; + @set_cursor 1 1; + style reverse; + spaces (0->33)-1; + @set_cursor 1 2; + print "baZic 0.1 (C) 2001 David Given. "; + ! We add on some stuff in the next line, because we don't want to + ! include the input buffer in the memory count. + print (mem_countfree()+255+AB__SIZE), " bytes free."; + @set_cursor 1 1; + style roman; + @set_window 0; +]; + +! Read in a command from the user into the parse buffer. + +[ read_interactive_command buf in; + buf = mem_alloc(255); + if (buf == 0) + return -2; + + ! Prompt the user for input. + + buf->0 = 255; + print "> "; + read buf 0 draw_status; + + ! Ensure the string is zero-terminated. + + in = buf+2; + in->(buf->1) = 0; + + ! Tokenise the stream. + + in = tokenise_stream(in, parsebuffer); + + mem_free(buf); + return in; +]; + +! Clear all variables and reinit the heap. + +[ cmd_clear; + mem_init(store_eop+1, heap+HEAP_SIZE); + store_heapclean(); + return ESTATE_NORMAL; +]; + +! Invoke a command loop. + +[ command_loop func i; + cmd_clear(); + do { + string_gc(); + i = indirect(func); + switch (i) + { + -1: ! success + !detokenise_stream(parsebuffer+1); + i = execute_command(parsebuffer); + break; + + -2: ! out of memory + print "Insufficient memory for interactive prompt!^"; + print "Dumping variables and trying again.^"; + i = ESTATE_DANGEROUS; + break; + + -3: ! end of program or script or whatever + i = ESTATE_ERROR; + break; + + default: ! parse error + print "Parse error at offset ", i, ".^"; + i = ESTATE_ERROR; + break; + } + } until (i < 0); + return i; +]; + +[ Main i; + print "^^^^^^^^baZic v0.1^(C) 2001 David Given^^"; + print "To play ~Hunt the Wumpus~, type ~script 2~ to load "; + print "the program, and then ~run~ to start it. Try ~script~ on its "; + print "own to see what else is available.^^"; + + do { + store_init(heap, heap+HEAP_SIZE); + do { + i = command_loop(read_interactive_command); + } until ((i == ESTATE_QUIT) || (i == ESTATE_NEW)); + } until (i == ESTATE_QUIT); +]; + diff --git a/interpreter.h b/interpreter.h new file mode 100644 index 0000000..47cdce1 --- /dev/null +++ b/interpreter.h @@ -0,0 +1,1099 @@ +constant ESTATE_NEW -3; +constant ESTATE_QUIT -2; +constant ESTATE_DANGEROUS -1; +constant ESTATE_NORMAL 0; +constant ESTATE_ERROR 1; + +constant TYPE_INT = 0; +constant TYPE_STRING = 1; + +constant STACK_NONE = 0; +constant STACK_FOR = 1; +constant STACK_REPEAT = 2; +constant STACK_WHILE = 3; +constant STACK_GOSUB = 4; + +constant STACK_SIZE = 16; +array stack_type table STACK_SIZE; +array stack_cmd table STACK_SIZE; +array stack_line table STACK_SIZE; +global stack_ptr = 1; + +global cmd; +global program_ptr = 0; +global program_lineno; + +! An error has occurred. + +[ error msg; + print "^", (string) msg; + if (program_ptr) + print " at line ", program_lineno; + print ".^"; +]; + +! Miscellaneous errors. + +[ error_nomore; error("Token didn't expect more input"); ]; +[ error_typemismatch; error("Type mismatch"); ]; +[ error_missingopenp; error("Missing open parenthesis"); ]; +[ error_missingclosep; error("Missing close parenthesis"); ]; +[ error_syntaxerror; error("Syntax error"); ]; +[ error_outofmemory; error("Out of memory"); ]; +[ error_notrunning; error("Not running a program"); ]; +[ error_stackoverflow; error("Stack overflow"); ]; + +! End of statement token? + +[ eos t; + return ((t == TOKEN__EOL) || (t == TOKEN__COLON)); +]; + +! Skip white space. + +[ skipwhite; + while (cmd->0 == TOKEN__SPACE) + cmd++; +]; + +! Read and evaluate an lvalue. + +[ eval_lvalue varname i j val; + skipwhite(); + + varname = cmd; + while ((cmd++)->0); + + skipwhite(); + if (cmd->0 ~= TOKEN__LPAREN) + { + ! It's not an array, so we can return the raw name. + return strdup(varname); + } + + cmd++; + val = eval_expression(); + if (val == 0) + return 0; + if (val-->0 ~= TYPE_INT) + { + error_typemismatch(); + mem_free(val); + return 0; + } + i = val-->1; + mem_free(val); + val = i; + + skipwhite(); + if (cmd->0 == TOKEN__COMMA) + { + mem_free(val); + error("Multidimensional arrays are not supported"); + return 0; + } + if (cmd->0 ~= TOKEN__RPAREN) + { + mem_free(val); + error_syntaxerror(); + return 0; + } + cmd++; + + i = strlen(varname); + j = mem_alloc(i+2); + strcpy(j, varname); + j->i = val; + j->(i+1) = 0; + + return j; +]; + +! Read and evaluate a leaf expression. + +[ eval_leaf i j ret; + skipwhite(); + + i = ((cmd++)->0); + switch (i) + { + TOKEN__NUMBER: + ret = mem_alloc(4); + ret-->0 = TYPE_INT; + ret-->1 = cmd-->0; + cmd = cmd + 2; + return ret; + + TOKEN__STRING: + ret = mem_alloc(4); + ret-->0 = TYPE_STRING; + ret-->1 = string_alloc(cmd+1, cmd->0); + cmd = cmd + cmd->0 + 1; + return ret; + + TOKEN__MINUS: + ret = eval_leaf(); + if (ret-->0 ~= TYPE_INT) + { + mem_free(ret); + error("Can only use - operator on strings"); + return 0; + } + ret-->1 = -(ret-->1); + return ret; + + TOKEN__VAR: + i = cmd; + while ((cmd++)->0); + skipwhite(); + if (cmd->0 ~= TOKEN__LPAREN) + { + ret = store_lookup(i); + if (ret == 0) + { + error("Variable not found"); + return 0; + } + return ret; + } + cmd++; + + ret = eval_expression(); + if (ret == 0) + return 0; + if (ret-->0 ~= TYPE_INT) + { + mem_free(ret); + error_typemismatch(); + return 0; + } + + skipwhite(); + if ((cmd++)->0 ~= TOKEN__RPAREN) + { + mem_free(ret); + error_syntaxerror(); + return 0; + } + + j = mem_alloc(2+cmd-i); + strcpy(j, i); + i = strlen(j); + j->i = ret-->1; + j->(i+1) = 0; + mem_free(ret); + + ret = store_lookup(j); + mem_free(j); + if (ret == 0) + { + error("Array or array index not found"); + return 0; + } + return ret; + + TOKEN__LPAREN: + ret = eval_expression(); + if (ret == 0) + return 0; + if ((cmd++)->0 ~= TOKEN__RPAREN) + { + error_missingclosep(); + return 0; + } + return ret; + + ! Simple function. + + TOKEN_RND, TOKEN_VAL: + skipwhite(); + if ((cmd++)->0 ~= TOKEN__LPAREN) + { + error_missingopenp(); + return 0; + } + ret = eval_expression(); + if (ret == 0) + return 0; + if ((cmd++)->0 ~= TOKEN__RPAREN) + { + error_missingclosep(); + return 0; + } + + switch (i) + { + TOKEN_RND: + if (ret-->0 ~= TYPE_INT) + { + error_typemismatch(); + mem_free(ret); + return 0; + } + + ret-->1 = random(ret-->1) - 1; + break; + + TOKEN_VAL: + if (ret-->0 ~= TYPE_STRING) + { + error_typemismatch(); + mem_free(ret); + return 0; + } + + ret-->0 = TYPE_INT; + ret-->1 = string_toint(ret-->1); + break; + } + + return ret; + } + + error("Botched leaf expression"); + return 0; +]; + +! Evaluate an expression. + +[ eval_expression ret val i; + ret = eval_leaf(); + if (ret == 0) + return ret; + skipwhite(); + + i = cmd->0; + switch (i) + { + TOKEN__EOL, TOKEN__COLON, TOKEN__SEMICOLON, TOKEN__COMMA, TOKEN__RPAREN, + TOKEN_THEN, TOKEN_TO, TOKEN_STEP: + return ret; + + ! Operators that can work on any type. + + TOKEN__PLUS, TOKEN__LARROW, TOKEN__RARROW, TOKEN__EQUALS, TOKEN__NEQUAL: + cmd++; + val = eval_expression(); + if (val == 0) + jump reterror; + if (ret-->0 ~= val-->0) + jump typemismatch; + + switch (ret-->0) + { + TYPE_INT: + switch (i) + { + TOKEN__PLUS: + ret-->1 = (ret-->1 + val-->1); + break; + + TOKEN__LARROW: + ret-->1 = (ret-->1 < val-->1); + break; + + TOKEN__RARROW: + ret-->1 = (ret-->1 > val-->1); + break; + + TOKEN__EQUALS: + ret-->1 = (ret-->1 == val-->1); + break; + + TOKEN__NEQUAL: + ret-->1 = (ret-->1 ~= val-->1); + break; + } + break; + + TYPE_STRING: + switch (i) + { + TOKEN__EQUALS: + ret-->0 = TYPE_INT; + ret-->1 = (string_compare(ret-->1, val-->1) == 0); + break; + + TOKEN__NEQUAL: + ret-->0 = TYPE_INT; + ret-->1 = (string_compare(ret-->1, val-->1) ~= 0); + break; + + TOKEN__PLUS, TOKEN__LARROW, TOKEN__RARROW: + error("Unimplemented opcode"); + jump valreterror; + } + break; + } + mem_free(val); + break; + + ! Operators that only work on ints. + + TOKEN__MINUS, TOKEN__STAR, TOKEN__SLASH, TOKEN_AND, TOKEN_OR: + cmd++; + val = eval_expression(); + if (val == 0) + jump reterror; + if ((ret-->0 ~= TYPE_INT) || (val-->0 ~= TYPE_INT)) + jump typemismatch; + + switch (i) + { + TOKEN__MINUS: + ret-->1 = ret-->1 - val-->1; + break; + + TOKEN__STAR: + ret-->1 = ret-->1 * val-->1; + break; + + TOKEN__SLASH: + if (val-->1 == 0) + { + error("Division by zero"); + jump valreterror; + } + ret-->1 = ret-->1 / val-->1; + break; + + TOKEN_AND: + ret-->1 = ret-->1 && val-->1; + break; + + TOKEN_OR: + ret-->1 = ret-->1 || val-->1; + break; + } + mem_free(val); + break; + + default: + error("Botched complex expression"); + jump reterror; + } + + return ret; + +.typemismatch; + error_typemismatch(); +.valreterror; + mem_free(val); +.reterror; + mem_free(ret); + return 0; +]; + +! List the current program. + +[ cmd_list val; + skipwhite(); + if (eos(cmd->0) == 0) + val = eval_expression(); + else + { + val = mem_alloc(4); + val-->0 = TYPE_INT; + val-->1 = 0; + } + + if (val-->0 ~= TYPE_INT) + { + error_typemismatch(); + mem_free(val); + return ESTATE_ERROR; + } + + switch (val-->1) + { + -2: + store_listvars(); + break; + + -1: + store_listprogramhex(); + break; + + default: + store_listprogram(); + break; + } + + mem_free(val); + return ESTATE_NORMAL; +]; + +! Prints out an expression. + +[ cmd_print val; + while (1) + { + skipwhite(); + if (eos(cmd->0)) + { + print "^"; + break; + } + + val = eval_expression(); + if (val == 0) + return ESTATE_ERROR; + switch (val-->0) + { + TYPE_INT: + print val-->1; + break; + + TYPE_STRING: + string_print(val-->1); + break; + + default: + mem_free(val); + error("Internal error --- invalid type in print!"); + return ESTATE_ERROR; + } + mem_free(val); + + switch (cmd->0) + { + TOKEN__COMMA: + print " "; + cmd++; + break; + + TOKEN__SEMICOLON: + cmd++; + if (eos(cmd->0)) + return ESTATE_NORMAL; + break; + } + } + + return ESTATE_NORMAL; +]; + +! Invoke a script. + +[ cmd_script val id; + skipwhite(); + if (eos(cmd->0)) + { + script_list(); + return ESTATE_NORMAL; + } + val = eval_expression(); + if (val == 0) + return ESTATE_ERROR; + if (val-->0 ~= TYPE_INT) + { + mem_free(val); + jump typemismatch; + } + id = val-->1; + mem_free(val); + ! When we call this, it's entirely possible that the heap will be + ! trashed. + return script_invoke(id); + +.typemismatch; + error_typemismatch(); + return ESTATE_ERROR; +]; + +! Variable assignment. + +[ cmd_varassignment varname val; + varname = eval_lvalue(); + if (varname == 0) + return ESTATE_ERROR; + skipwhite(); + + if ((cmd++)->0 ~= TOKEN__EQUALS) + { + mem_free(varname); + error("Unrecognised keyword"); + return ESTATE_ERROR; + } + skipwhite(); + + val = eval_expression(); + if (val == 0) + { + mem_free(varname); + return ESTATE_ERROR; + } + + store_assign(varname, val-->0, val-->1); + mem_free(varname); + mem_free(val); + + return ESTATE_NORMAL; +]; + +! Run the program. + +[ cmd_run i p; + cmd_clear(); + stack_ptr = 1; + stack_type-->1 = STACK_NONE; + program_ptr = store_bottom; + + ! As the program is already tokenised, we can directly run the + ! bytecode in the store. + + do { + ! Reached the end of the program? + + if (program_ptr->0 == 0) + { + i = ESTATE_NORMAL; + break; + } + + ! Read in the line number and execute the line.. + + p = program_ptr + 1; + program_ptr = program_ptr + program_ptr->0; + program_lineno = p-->0; + p++; + + ! Execute the line. Remember execute_command needs to be + ! pointed at the byte *before* the bytecode... + + i = execute_command(p); + } until (i ~= ESTATE_NORMAL); + + program_ptr = 0; + return i; +]; + +! Read in a string. + +[ cmd_input val varname buf; + skipwhite(); + + ! Is there a label? + + if (cmd->0 == TOKEN__STRING) + { + val = eval_leaf(); + if (val == 0) + return ESTATE_ERROR; + if (val-->0 == TYPE_STRING) + string_print(val-->1); + else + { + error_typemismatch(); + mem_free(val); + return ESTATE_ERROR; + } + mem_free(val); + + skipwhite(); + switch (cmd->0) + { + TOKEN__COMMA: + print " "; + break; + + TOKEN__SEMICOLON: + break; + + default: + error_syntaxerror(); + return ESTATE_ERROR; + } + cmd++; + + skipwhite(); + } + else + print "? "; + + ! Get the variable name to put the result into. + + if ((cmd++)->0 ~= TOKEN__VAR) + { + error_syntaxerror(); + return ESTATE_ERROR; + } + varname = eval_lvalue(); + if (varname == 0) + return ESTATE_ERROR; + + ! Get the user's input. + + buf = mem_alloc(255); + if (buf == 0) + { + mem_free(varname); + error_outofmemory(); + return ESTATE_ERROR; + } + + buf->0 = 255; + read buf 0; + + ! Assign to the variable. + + store_assign(varname, TYPE_STRING, string_alloc(buf+2, buf->1)); + + ! Free the temporary buffer. + + mem_free(varname); + mem_free(buf); + + return ESTATE_NORMAL; +]; + +! Jump to a line number. + +[ cmd_goto val i; + if (program_ptr == 0) + { + error_notrunning(); + return ESTATE_ERROR; + } + + val = eval_expression(); + if (val == 0) + return ESTATE_ERROR; + if (val-->0 ~= TYPE_INT) + { + mem_free(val); + error_typemismatch(); + return ESTATE_ERROR; + } + + i = store_findline(val-->1); + mem_free(val); + if (i == 0) + { + error("No such line number"); + return ESTATE_ERROR; + } + + program_ptr = i; + return ESTATE_NORMAL; +]; + + +! Conditional execution. + +[ cmd_if val; + val = eval_expression(); + if (val == 0) + return ESTATE_ERROR; + skipwhite(); + if ((cmd++)->0 ~= TOKEN_THEN) + { + mem_free(val); + error_syntaxerror(); + return ESTATE_ERROR; + } + + if ((val-->0 == TYPE_INT) && (val-->1 == 0)) + cmd = 0; + + mem_free(val); + return ESTATE_NORMAL; +]; + +! Top half of a FOR loop. + +[ cmd_for varname val initialval targetval stepval cmdptr; + ! FOR can only be used when running a program. + + if (program_ptr == 0) + { + error_notrunning(); + return ESTATE_ERROR; + } + + ! Store the address of the FOR instruction. + + cmdptr = cmd-1; + + ! Read the variable name. + + skipwhite(); + if ((cmd++)->0 ~= TOKEN__VAR) + { + error_syntaxerror(); + return ESTATE_ERROR; + } + varname = eval_lvalue(); + if (varname == 0) + return ESTATE_ERROR; + + ! Skip over the =. + + skipwhite(); + if ((cmd++)->0 ~= TOKEN__EQUALS) + { + error_syntaxerror(); + jump varnameexit; + } + + ! Read the initial value. + + val = eval_expression(); + if (val == 0) + jump varnameexit; + if (val-->0 ~= TYPE_INT) + { + error_typemismatch(); + jump varnameexit; + } + initialval = val-->1; + mem_free(val); + + ! Read the TO. + + skipwhite(); + if ((cmd++)->0 ~= TOKEN_TO) + { + error_syntaxerror(); + return ESTATE_ERROR; + } + + ! Read the target value. + + val = eval_expression(); + if (val == 0) + jump varnameexit; + if (val-->0 ~= TYPE_INT) + { + error_typemismatch(); + jump varnameexit; + } + targetval = val-->1; + mem_free(val); + + ! Is there a STEP clause? + + skipwhite(); + if (cmd->0 == TOKEN_STEP) + { + cmd++; + skipwhite(); + + ! Read the STEP value. + + val = eval_expression(); + if (val == 0) + jump varnameexit; + if (val-->0 ~= TYPE_INT) + { + error_typemismatch(); + jump valexit; + } + stepval = val-->1; + mem_free(val); + } + else + { + ! Otherwise, default to 1. + + stepval = 1; + } + + ! Is this a new loop? + + if (stack_type-->stack_ptr == STACK_NONE) + { + ! Yes. Ensure there's room on the stack. + + if ((stack_ptr+1) >= STACK_SIZE) + { + error_stackoverflow(); + jump varnameexit; + } + stack_ptr-->stack_type = STACK_NONE; + + ! ...and set the initial value. + + store_assign(varname, TYPE_INT, initialval); + } + else + { + ! Otherwise, load the loop counter. + + val = store_lookup(varname); + if (val == 0) + { + error("FOR loop counter has disappeared"); + jump varnameexit; + } + if (val-->0 ~= TYPE_INT) + { + error_typemismatch(); + jump valexit; + } + initialval = val-->1; + mem_free(val); + + ! Increment it. + + initialval = initialval + stepval; + + ! Test. + + if (((stepval < 0) && (initialval < targetval)) || + ((stepval >= 0) && (initialval > targetval))) + { + ! Abort! The NEXT keyword has placed the pointer to + ! to the next instruction after the loop on the stack. + + cmd = stack_cmd-->stack_ptr; + program_ptr = stack_line-->stack_ptr; + stack_type-->stack_ptr = 0; + return ESTATE_NORMAL; + } + else + { + ! Write back the new loop counter. + + store_assign(varname, TYPE_INT, initialval); + } + } + + mem_free(varname); + stack_type-->stack_ptr = STACK_FOR; + stack_cmd-->stack_ptr = cmdptr; + stack_line-->stack_ptr = program_ptr; + stack_ptr++; + return ESTATE_NORMAL; + +.valexit; + mem_free(val); +.varnameexit; + mem_free(varname); + return ESTATE_ERROR; +]; + +! Bottom half of a FOR loop. + +[ cmd_next i j; + ! NEXT can only be used when running a program. + + if (program_ptr == 0) + { + error_notrunning(); + return ESTATE_ERROR; + } + + stack_ptr--; + if ((stack_ptr == 0) || (stack_type-->stack_ptr ~= STACK_FOR)) + { + error("NEXT without FOR"); + return ESTATE_ERROR; + } + + i = stack_cmd-->stack_ptr; + j = stack_line-->stack_ptr; + stack_cmd-->stack_ptr = cmd; + stack_line-->stack_ptr = program_ptr; + cmd = i; + program_ptr = j; + + return ESTATE_NORMAL; +]; + +! Top half of a REPEAT..UNTIL loop. + +[ cmd_repeat; + ! REPEAT can only be used when running a program. + + if (program_ptr == 0) + { + error_notrunning(); + return ESTATE_ERROR; + } + + if ((stack_ptr+1) >= STACK_SIZE) + { + error_stackoverflow(); + return ESTATE_ERROR; + } + + stack_type-->stack_ptr = STACK_REPEAT; + stack_cmd-->stack_ptr = cmd+1; + stack_line-->stack_ptr = program_ptr; + stack_ptr++; + return ESTATE_NORMAL; +]; + +! Bottom half of a REPEAT..UNTIL loop. + +[ cmd_until val; + ! REPEAT can only be used when running a program. + + if (program_ptr == 0) + { + error_notrunning(); + return ESTATE_ERROR; + } + + stack_ptr--; + if ((stack_ptr == 0) || (stack_type-->stack_ptr ~= STACK_REPEAT)) + { + error("UNTIL without REPEAT"); + return ESTATE_ERROR; + } + + val = eval_expression(); + if (val == 0) + return ESTATE_ERROR; + if (val-->0 ~= TYPE_INT) + { + mem_free(val); + error_typemismatch(); + return ESTATE_ERROR; + } + + if (val-->1 == 0) + { + cmd = stack_cmd-->stack_ptr; + program_ptr = stack_line-->stack_ptr; + stack_ptr++; + } + else + stack_type-->stack_ptr = STACK_NONE; + + return ESTATE_NORMAL; +]; + +! Execute a command. + +[ execute_command _cmd i; + cmd = _cmd; + cmd++; + while (1) + { + i = (cmd++)->0; + + switch (i) + { + TOKEN__EOL: return ESTATE_NORMAL; + TOKEN__SPACE: continue; + + TOKEN__NUMBER: + i = cmd-->0; + cmd = cmd + 2; + if (store_addline(i, cmd, _cmd->0 - 4)) + return ESTATE_ERROR; + ! Don't execute anything else on this line. + return ESTATE_DANGEROUS; + + TOKEN__VAR: + i = cmd_varassignment(); + jump checkresult; + + TOKEN_CLEAR: + i = cmd_clear(); + jump checkresult; + + TOKEN_CLS: + @erase_window -1; + break; + + TOKEN_FOR: + i = cmd_for(); + jump checkresult; + + TOKEN_GOTO: + i = cmd_goto(); + jump checkresult; + + TOKEN_IF: + i = cmd_if(); + if (i ~= ESTATE_NORMAL) + return i; + if (cmd == 0) + return ESTATE_NORMAL; + continue; + + TOKEN_INPUT: + i = cmd_input(); + jump checkresult; + + TOKEN_LIST: + i = cmd_list(); + jump checkresult; + + TOKEN_LOAD: + restore endofstatement; + error("Load failed"); + return ESTATE_ERROR; + + TOKEN_NEW: + return ESTATE_NEW; + + TOKEN_NEXT: + i = cmd_next(); + if (i == ESTATE_NORMAL) + continue; + return i; + + TOKEN_PRINT: + i = cmd_print(); + jump checkresult; + + TOKEN_QUIT: + error("Quit"); + return ESTATE_QUIT; + + TOKEN_REPEAT: + i = cmd_repeat(); + jump checkresult; + + TOKEN_RUN: + i = cmd_run(); + return i; + + TOKEN_SAVE: + save endofstatement; + error("Save failed"); + return ESTATE_ERROR; + + TOKEN_SCRIPT: + i = cmd_script(); + jump checkresult; + + TOKEN_STOP: + error("Stop"); + return ESTATE_ERROR; + + TOKEN_UNTIL: + i = cmd_until(); + if (i ~= ESTATE_ERROR) + return i; + continue; + jump checkresult; + + default: + error("Unimplemented token"); + return ESTATE_ERROR; + + .checkresult; + if (i == ESTATE_ERROR) + return ESTATE_ERROR; + break; + } + + .endofstatement; + if (eos(cmd->0) == 0) + { + error_nomore(); + return ESTATE_ERROR; + } + + if (cmd->0 == TOKEN__COLON) + cmd++; + } + + return ESTATE_NORMAL; +]; + diff --git a/malloc.h b/malloc.h new file mode 100644 index 0000000..4716918 --- /dev/null +++ b/malloc.h @@ -0,0 +1,202 @@ +constant FREE_MAGIC $FAEE; +constant ALLOC_MAGIC $A10C; + +constant FB_MAGIC 0; +constant FB_PREV 1; +constant FB_NEXT 2; +constant FB_SIZE 3; +constant FB__SIZE (4*2); + +constant AB_MAGIC 0; +constant AB_SIZE 1; +constant AB__SIZE (2*2); + +global mem_top = 0; +global mem_bottom = 0; +global mem_firstfree = 0; + +! Initialise the heap manager, given a value for TOP. It's assumed that the +! heap will stretch to HIMEM. + +[ mem_init bottom top; + mem_bottom = bottom; + mem_top = top; + ! Create a free chunk spanning the length of the block. + mem_firstfree = mem_bottom; + mem_firstfree-->FB_MAGIC = FREE_MAGIC; + mem_firstfree-->FB_PREV = 0; + mem_firstfree-->FB_NEXT = 0; + mem_firstfree-->FB_SIZE = mem_top - mem_bottom; +]; + +! Zero a block of memory. + +[ mem_zero ptr size; + while (size) + { + (ptr++)->0 = 0; + size--; + } +]; + +! Remove a node from the list. + +[ mem_node_remove p; + if (p == mem_firstfree) + mem_firstfree = p-->FB_NEXT; + if (p-->FB_PREV) + p-->FB_PREV-->FB_NEXT = p-->FB_NEXT; + if (p-->FB_NEXT) + p-->FB_NEXT-->FB_PREV = p-->FB_PREV; +]; + +! Try and allocate a block. + +[ mem_alloc size p; + ! Add space for the AB_ header. + + size = size + AB__SIZE; + + ! This block will eventually have to be a FB, once it's freed. So it + ! has to be big enough for the FB_ structure. + + if (size < FB__SIZE) + size = FB__SIZE; + + ! Iterate through the list trying to find a free chunk large enough. + + p = mem_firstfree; + while ((p ~= 0) && (p-->FB_SIZE < size)) + { + p = p-->FB_NEXT; + } + + if (p == 0) + { + ! No sufficiently large chunk could be found. + return 0; + } + + ! Can the block be shrunk, or is there not enough room? + + if ((p-->FB_SIZE - size) < FB__SIZE) + { + ! Yes; remove the node completely. + size = p-->FB_SIZE; + mem_node_remove(p); + } + else + { + ! No. Instead of removing the node, we shrink it + p-->FB_SIZE = p-->FB_SIZE - size; + p = p + p-->FB_SIZE; + } + + ! Initialise the allocated node. + + mem_zero(p, size); + p-->AB_MAGIC = ALLOC_MAGIC; + p-->AB_SIZE = size; + + return p+AB__SIZE; +]; + +! Try to free a block. + +[ mem_free p q; + ! Adjust the pointer to point to the alloc node itself. + p = p - AB__SIZE; + +#ifdef DEBUG; + ! Check the magic number. + if (p-->AB_MAGIC ~= ALLOC_MAGIC) + { + print "Trying to free invalid node ", p, "!^"; + print "Magic was "; phex(p-->AB_MAGIC, 4); + print " when it should have been "; phex(ALLOC_MAGIC, 4); + print ".^"; + return; + } +#endif; + + ! Turn the alloc node into a free node. + + q = p-->AB_SIZE; +#ifdef DEBUG; + memset(p, $55, q); +#endif; + p-->FB_MAGIC = FREE_MAGIC; + p-->FB_NEXT = mem_firstfree; + p-->FB_PREV = 0; + p-->FB_SIZE = q; + if (mem_firstfree) + mem_firstfree-->FB_PREV = p; + mem_firstfree = p; + + ! Right. We've successfully freed the block; p points to the FB + ! structure. + ! + ! Unfortunately, they way we use memory leads to lots of + ! fragmentation, which is bad. So we need to find out if we can coalesce + ! with the block immediately afterwards. + + if ((p+q)-->0 ~= FREE_MAGIC) + { + ! Nothing coalescable. + return; + } + + ! Change the size of our block to encompass the next block... + + p-->FB_SIZE = q + (p+q)-->FB_SIZE; + + ! ...and remove the next block from the free list. + + mem_node_remove(p+q); +]; + +! Get amount of free memory. + +[ mem_countfree p size; + size = 0; + p = mem_firstfree; + while (p ~= 0) + { + size = size + p-->FB_SIZE; + p = p-->FB_NEXT; + } + return size; +]; + +! Get total amount of memory. + +[ mem_counttotal; + return (mem_top - mem_bottom); +]; + +! Get amount of used memory. + +[ mem_countused; + return mem_counttotal() - mem_countfree(); +]; + +#ifdef DEBUG; +! Dump the free list. + +[ mem_show_free_list p; + print "Free list start^"; + p = mem_firstfree; + while (p ~= 0) + { + print " node ", p, " prev=", p-->FB_PREV, " next=", p-->FB_NEXT; + print " size=", p-->FB_SIZE; + if (p-->FB_MAGIC ~= FREE_MAGIC) + print " invalid magic"; + print "^"; + p = p-->FB_NEXT; + } + print "Free list end; used=", mem_countused(), " total=", mem_counttotal(), "^"; +]; + +#endif; + diff --git a/mktokens.awk b/mktokens.awk new file mode 100644 index 0000000..fc4da64 --- /dev/null +++ b/mktokens.awk @@ -0,0 +1,39 @@ +BEGIN { + count = 255 +} + +{ + t = $1 + token[t] = count + count-- + if (length(t) > 1) + print "constant TOKEN_" toupper(t) " " token[t] ";" +} + +END { + print "[ token_decode t;" + print "switch(t) {" + for (i in token) + print token[i] ": return \"" i "\";" + print "}" + print "return 0;" + print "];" + + print "[ token_encode p d;" + for (i in token) + { + if (length(i) > 1) + { + print "if (" + for (j=1; j<=length(i); j++) + print "(p->" (j-1) " == '" substr(i, j, 1) "') &&" + print " token_invalidchar(p->" (j-1) ")) { d->0 =" token[i] "; return " length(i) "; }" + } + else + print "if (p->0 == '" i "') { d->0 = " token[i] "; return 1; }" + } + print "return 0;" + print "];" +} + + diff --git a/script.h b/script.h new file mode 100644 index 0000000..a2103c0 --- /dev/null +++ b/script.h @@ -0,0 +1,266 @@ +array about_script --> + "About baZic" + "cls" + "print:print:print" + "print ~baZic is a downright scary implementation of ~;" + "print ~Basic for the Z-machine. It features proper dynamic ~;" + "print ~memory allocation, mark-sweep garbage collection, ~;" + "print ~fully tokenised program storage, a built-in program ~;" + "print ~editor (cough, cough) and is almost entirely useless. ~;" + "print ~It is complete enough to play Hunt the Wumpus on, and ~;" + "print ~a copy is provided.~" + "print" + "print ~The language is not complete. I gave up after a certain ~;" + "print ~point when I realised that Basic is so crappy a language ~;" + "print ~that it's not fun to implement, just annoying. I am now ~;" + "print ~even more impressed with the authors of things like 8kB ~;" + "print ~Microsoft Basic and BBC Basic; impressive work.~" + "print" + "print ~The main lacks to baZic are string operations, proper arrays, and ~;" + "print ~subroutines. The bulk of the string code is in there --- the ~;" + "print ~scary garbage collector --- but I haven't done the string ~;" + "print ~operators themselves. Arrays are sort of implemented, but they're ~;" + "print ~done by munging the variable name with the index. (See the Wumpus ~;" + "print ~program for an example.) Subroutines just haven't been done.~" + "print" + "print ~If you're interested in the innards of the program, you can do ~;" + "print ~'list -1' to list the stored Basic program with byte-code, or ~;" + "print ~'list -2' to view the variables in memory. If you do this while ~;" + "print ~an array is in use, odd things will happen.~" + "print" + "print ~baZic is licensed under the MIT public license. For more information, contact ~;" + "print ~dg@@64cowlark.com.~" + "print:print" + 0; + +array higherlower_script --> + "Higher / Lower" + "new" + "10print ~Higher, Lower~" + "15print" + "20target = rnd(100)" + "30tries = 1" + "40input ~What is your guess? ~; guess" + "50guess = val(guess)" + "60if guess < target then print ~Too low!~" + "70if guess > target then print ~Too high!~" + "80if guess = target then goto 110" + "90tries = tries + 1" + "100goto 40" + "110print ~You got it!~" + "120print ~In only~, tries, ~tries, too.~" + 0; + +array wumpus_script --> + "Hunt the Wumpus" + "new" + "10cls" + "20print ~Hunt the Wumpus --- loading data~" + "30cave1(0)=1:cave2(0)=4:cave3(0)=7" + "40cave1(1)=0:cave2(1)=2:cave3(1)=9" + "50cave1(2)=1:cave2(2)=3:cave3(2)=11" + "60cave1(3)=2:cave2(3)=4:cave3(3)=13" + "70cave1(4)=0:cave2(4)=3:cave3(4)=5" + "80cave1(5)=4:cave2(5)=6:cave3(5)=14" + "90cave1(6)=5:cave2(6)=7:cave3(6)=16" + "100cave1(7)=0:cave2(7)=6:cave3(7)=8" + "110cave1(8)=7:cave2(8)=9:cave3(8)=17" + "120cave1(9)=1:cave2(9)=8:cave3(9)=10" + "130cave1(10)=9:cave2(10)=11:cave3(10)=18" + "140cave1(11)=2:cave2(11)=10:cave3(11)=12" + "150cave1(12)=11:cave2(12)=13:cave3(12)=19" + "160cave1(13)=3:cave2(13)=12:cave3(13)=14" + "170cave1(14)=5:cave2(14)=13:cave3(14)=15" + "180cave1(15)=14:cave2(15)=16:cave3(15)=19" + "190cave1(16)=6:cave2(16)=15:cave3(16)=17" + "200cave1(17)=8:cave2(17)=16:cave3(17)=18" + "210cave1(18)=10:cave2(18)=17:cave3(18)=19" + "220cave1(19)=12:cave2(19)=15:cave3(19)=18" + "225print:input ~Do you want instructions? (y/n) ~; i$" + "226if i$ <> ~y~ then goto 1000" + "230cls" + "240print ~Welcome to HUNT THE WUMPUS!~" + "250print:print ~The wumpus lives in a cave of twenty rooms. ~;" + "260print ~Each room has three tunnels leading to other rooms. ~;" + "270print ~Two rooms have bottomless pits --- don't fall on. ~;" + "280print ~Two more contain Alien Space Bats. Disturb them, ~;" + "290print ~and they will whisk you away to some other room. ~;" + "300print:print:print ~Naturally, the wumpus is immune to all hazards. ~;" + "310print ~Usually, he's asleep: he'll wake up if he hears you ~;" + "320print ~shooting an arrow, or if you walk in on him. ~;" + "330print ~Once awake, he'll move around most turns (1 in 4 ~;" + "340print ~probability). If *he* walks in on *you*, he'll eat yer. ~;" + "350print:print:print ~Your only defence is your arrows. These arrows, ~;" + "360print ~which contain a Z-machine microcontroller, can be ~;" + "370print ~programmed to follow a certain course for up to five ~;" + "380print ~rooms. If they can't go where you told them, they'll ~;" + "390print ~move randomly. Did I mention they'll kill you, too, ~;" + "400print ~if you program them incorrectly? ~;" + "410print:print:print ~The wumpus has smelly feet; you can smell him in the ~;" + "420print ~next room. The bats rustle, you can hear them; the pits are ~;" + "430print ~drafty, and you can feel that.~" + "1000print:print ~Placing objects in maze...~" + "1010wumpus=rnd(20):player=rnd(20):pit1=rnd(20):pit2=rnd(20)" + "1020bat1=rnd(20):bat2=rnd(20)" + "1030if wumpus = player then goto 1000" + "1040if wumpus = pit1 then goto 1000" + "1050if wumpus = pit2 then goto 1000" + "1060if wumpus = bat1 then goto 1000" + "1070if wumpus = bat2 then goto 1000" + "1080if player = pit1 then goto 1000" + "1090if player = pit2 then goto 1000" + "1100if player = bat1 then goto 1000" + "1110if player = bat2 then goto 1000" + "1120if pit1 = pit2 then goto 1000" + "1130if pit1 = bat1 then goto 1000" + "1140if pit1 = bat2 then goto 1000" + "1150if pit2 = bat1 then goto 1000" + "1160if pit2 = bat2 then goto 1000" + "1170if bat1 = bat2 then goto 1000" + "1180awake=0" + "2000print" + "2010print ~You are in room number ~; player; ~. Exits lead off to rooms ~;" + "2020print cave1(player); ~, ~; cave2(player); ~ and ~; cave3(player); ~. ~" + "2030if cave1(player) = wumpus then goto 2070" + "2040if cave2(player) = wumpus then goto 2070" + "2050if cave3(player) = wumpus then goto 2070" + "2060goto 2080" + "2070print ~You can smell the wumpus!~" + "2080if (cave1(player) = pit1) or (cave1(player) = pit2) then goto 2120" + "2090if (cave2(player) = pit1) or (cave2(player) = pit2) then goto 2120" + "2100if (cave3(player) = pit1) or (cave3(player) = pit2) then goto 2120" + "2110goto 2121" + "2120print ~You feel a draught!~" + "2121if (cave1(player) = bat1) or (cave1(player) = bat2) then goto 2125" + "2122if (cave2(player) = bat1) or (cave2(player) = bat2) then goto 2125" + "2123if (cave3(player) = bat1) or (cave3(player) = bat2) then goto 2125" + "2124goto 2130" + "2125print ~You hear rustling!~" + "2130input ~Which room do you want to go to (or -1 to fire an arrow)? ~; i$" + "2135dest = val(i$)" + "2140if dest = -1 then goto 3000" + "2150if (cave1(player) = dest) or (cave2(player) = dest) or (cave3(player) = dest) then goto 2200" + "2160print:print ~You can't go that way.~" + "2170goto 2130" + "2200print" + "2210if dest=wumpus then goto 2600" + "2220if (dest=pit1) or (dest=pit2) then print ~*** AAAAAAaaaaaah ***~:print:print ~You have fallen down a pit.~:goto 5000" + "2230if (dest=bat1) or (dest=bat2) then print ~You have been abducted by alien space bats.~:dest=rnd(20)" + "2240player=dest" + "2500if awake=0 then goto 2000" + "2510i = rnd(4)" + "2520if i<>0 then goto 2000" + "2530print ~You hear the patter of tiny feet as the wumpus moves.~" + "2540i = rnd(3)" + "2550if i=0 then wumpus = cave1(wumpus)" + "2560if i=1 then wumpus = cave2(wumpus)" + "2570if i=2 then wumpus = cave3(wumpus)" + "2580goto 2000" + "2600player=dest" + "2210if awake=1 then print ~*** CRUNCH ***~:print:print ~You have been eaten by the wumpus.~:goto 5000" + "2620awake=1" + "2630print ~The wumpus is here! Asleep, though it won't stay like that for long.~" + "2640goto 2000" + "3000print" + "3005arrow=player" + "3010for i=1 to 5" + "3020input ~Please enter the next room for the arrow to go to: ~; i$" + "3025j = val(i$)" + "3030if (j = cave1(arrow)) or (j = cave2(arrow)) or (j = cave3(arrow)) then arrow=j:goto 3071" + "3040j = rnd(3)" + "3050if j = 0 then arrow = cave1(arrow)" + "3060if j = 1 then arrow = cave2(arrow)" + "3070if j = 2 then arrow = cave3(arrow)" + "3071if j = wumpus then print:print ~You hear a distant scream... the wumpus is dead! Now you just have to deal with the RSPCA lawyers.~:goto 5010" + "3080if j = player then print:print ~You have just been impaled on one of your own arrows.~:goto 5000" + "3090next" + "3095awake=1" + "3100goto 2500" + "5000print:input ~You have died. Play again? (y/n) ~; i$:goto 5020" + "5010print:input ~You have won. Play again? (y/n) ~; i$" + "5020if i$=~y~ then goto 1000" + "5030print:print ~Bye bye!~" + 0; + +array scripts --> + about_script + higherlower_script + wumpus_script + 0; +constant NUM_SCRIPTS 3; + +global script_id = 0; +global script_lineno = 0; + +! List the available scripts. + +[ script_list i; + print "Available scripts:^"; + i = 0; + while (scripts-->i) + { + print " ", i, ": ", (string) scripts-->i-->0, "^"; + i++; + } + return ESTATE_NORMAL; +]; + +! Invoke a script. + +[ script_invoke i; + if ((i < 0) || (i >= NUM_SCRIPTS)) + { + error("Script ID out of range"); + return ESTATE_ERROR; + } + + print "Invoking script ~", (string) scripts-->i-->0, "~...^"; + script_id = i; + script_lineno = 1; + + do { + store_init(heap, heap+HEAP_SIZE); + do { + i = command_loop(script_reader); + } until ((i == ESTATE_QUIT) || (i == ESTATE_NEW)); + } until (i == ESTATE_QUIT); + + print "Script finished.^"; + return ESTATE_NORMAL; +]; + +! Read in a command from a script into the parse buffer. + +[ script_reader buf in l; + buf = mem_alloc(255); + if (buf == 0) + return -2; + + ! Get the compressed data. + + l = scripts-->script_id-->script_lineno; + if (l == 0) + l = "quit"; + else + script_lineno++; + + ! Decompress it into the temporary buffer. + + buf-->0 = 255; + @output_stream 3 buf; + print (string) l; + @output_stream -3; + + ! Ensure the string is zero-terminated. + + in = buf+2; + in->(buf-->0) = 0; + + ! Tokenise the stream. + + in = tokenise_stream(in, parsebuffer); + + mem_free(buf); + return in; +]; + diff --git a/store.h b/store.h new file mode 100644 index 0000000..2792b00 --- /dev/null +++ b/store.h @@ -0,0 +1,203 @@ +global store_bottom = 0; +global store_eop = 0; +global store_top = 0; + +constant VN_NEXT = 0; +constant VN_TYPE = 1; +constant VN_VALUE = 2; +constant VN__SIZE = (3*2); + +global store_firstvar = 0; + +! Initialise the program store. + +[ store_init bottom top; + store_bottom = bottom; + store_top = top; + store_bottom->0 = 0; + store_eop = store_bottom; + string_init(); +]; + +! --- Heap management ------------------------------------------------------- + +! Heap reinitialisation; all heap pointers are invalidated. + +[ store_heapclean; + store_firstvar = 0; +]; + +! Create a new variable. + +[ store_newvar varname type value node; + node = mem_alloc(VN__SIZE + strlen(varname) + 1); + node-->VN_NEXT = store_firstvar; + node-->VN_TYPE = type; + node-->VN_VALUE = value; + strcpy(node+VN__SIZE, varname); + store_firstvar = node; +]; + +! Look for an old variable. + +[ store_searchvar varname node; + node = store_firstvar; + while (node) + { + if (strcmp(varname, node+VN__SIZE) == 0) + return node; + node = node-->VN_NEXT; + } + return 0; +]; + +! Assign a value to a variable. + +[ store_assign varname type value node; + node = store_searchvar(varname); + if (node) + { + node-->VN_TYPE = type; + node-->VN_VALUE = value; + return; + } + store_newvar(varname, type, value); +]; + +! Get the contents of a variable. + +[ store_lookup varname node val; + node = store_searchvar(varname); + if (node) + { + val = mem_alloc(4); + val-->0 = node-->VN_TYPE; + val-->1 = node-->VN_VALUE; + return val; + } + return 0; +]; + +! List all variables. + +[ store_listvars var count; + var = store_firstvar; + count = 0; + print "Variables:^"; + while (var) + { + print " ", (astring) var+VN__SIZE; + switch (var-->VN_TYPE) + { + TYPE_INT: + print " integer ", var-->VN_VALUE; + break; + + TYPE_STRING: + print " string ~"; + string_print(var-->VN_VALUE); + print "~"; + break; + + default: + print " unknown type"; + } + print "^"; + + var = var-->VN_NEXT; + count++; + } + print "Total: ", count, "^"; +]; + +! --- Program storage ------------------------------------------------------- + +! List the current program. + +[ store_listprogram line linelen; + line = store_bottom; + while (linelen = line->0) + { + print " ", (line+1)-->0; + detokenise_stream(line+3); + line = line + linelen; + } +]; + +[ store_listprogramhex line linelen; + line = store_bottom; + while (linelen = line->0) + { + print " ", (line+1)-->0; + detokenise_stream(line+3); + print "-> "; + hexdump(line, linelen); + line = line + linelen; + } +]; + +! Add a line to the program. + +[ store_addline linenum cmd linelen line i; + line = store_bottom; + while (line->0) + { + if ((line+1)-->0 >= linenum) + break; + line = line + line->0; + } + + ! Do we need to append this line to the end of the program? + + if (line->0 == 0) + { + ! Yes. + + line = store_eop; + store_eop->(linelen+3) = 0; + store_eop = store_eop + linelen + 3; + } + else + { + ! Do we need to remove an existing line first? + + if ((line+1)-->0 == linenum) + { + i = line->0; + memcpy(line, line+i, store_eop-line-i); + store_eop = store_eop - i; + store_eop->0 = 0; + } + + ! If there's no actual data in the new line, give up. + + if (linelen == 1) + return 0; + + ! Open up space for the new line. + + memcpy(line+linelen+3, line, store_eop-line); + store_eop = store_eop + linelen + 3; + store_eop->0 = 0; + } + + ! Copy the line in. + + line->0 = linelen+3; + (line+1)-->0 = linenum; + memcpy(line+3, cmd, linelen); + + return 0; +]; + +! Find a line, by number. + +[ store_findline lineno line; + line = store_bottom; + while (line->0 && ((line+1)-->0 ~= lineno)) + line = line + line->0; + if (line->0 == 0) + return 0; + return line; +]; + diff --git a/string.h b/string.h new file mode 100644 index 0000000..6e29a39 --- /dev/null +++ b/string.h @@ -0,0 +1,180 @@ +global string_first; + +constant STRING_NEXT 0; +constant STRING_PTR 1; +constant STRING_LEN 2; +constant STRING_REF 3; +constant STRING__SIZE (4*2); + +! Initialise the string store. + +[ string_init; + string_first = 0; +]; + +! Returns false if the string is constant (i.e., it points to a literal in +! the byte-code itself). + +[ string_isvar s; + return ((s < store_bottom) || (s > store_eop)); +]; + +! Add an entry for a new string. + +[ string_alloc str len s; + if (string_isvar(str)) + { + ! Need to duplicate the string. + s = mem_alloc(len); + memcpy(s, str, len); + str = s; + } + s = mem_alloc(STRING__SIZE); + s-->STRING_NEXT = string_first; + s-->STRING_PTR = str; + s-->STRING_LEN = len; + string_first = s; + return s; +]; + +! Print a string. + +[ string_print s i; + i = s-->STRING_LEN; + s = s-->STRING_PTR; + while (i--) + print (char) (s++)->0; +]; + +! Compare two strings. + +[ string_compare s1 s2 i; + i = s1-->STRING_LEN; + if (i ~= s2-->STRING_LEN) + return -1; + s1 = s1-->STRING_PTR; + s2 = s2-->STRING_PTR; + while (i--) + if ((s1++)->0 ~= (s2++)->0) + return -1; + return 0; +]; + +! Garbage collection: clean. + +[ string_clean s; + s = string_first; + while (s) + { + s-->STRING_REF = 0; + s = s-->STRING_NEXT; + } +]; + +! Garbage collection: mark. + +[ string_mark var; + var = store_firstvar; + while (var) + { + if (var-->VN_TYPE == TYPE_STRING) + var-->VN_VALUE-->STRING_REF = 1; + var = var-->VN_NEXT; + } +]; + +#ifdef DEBUG; +! Garbage collection: list status. + +[ string_status s; + s = string_first; + while (s) + { + print s, " ", s-->STRING_NEXT, " "; + string_print(s); + print " ", s-->STRING_REF, "^"; + s = s-->STRING_NEXT; + } +]; +#endif; + +! Garbage collection: sweep. + +[ string_sweep s olds news; + olds = 0; + s = string_first; + while (s) + { + news = s-->STRING_NEXT; + if (s-->STRING_REF == 0) + { + if (string_isvar(s-->STRING_PTR)) + mem_free(s-->STRING_PTR); + if (olds) + olds-->STRING_NEXT = news; + else + { + ! If olds is null, then we're on the first + ! string in the list; which means we need to + ! change string_first when we remove it. + string_first = news; + } + mem_free(s); + } + else + olds = s; + + s = news; + } +]; + +! Garbage collector. + +[ string_gc; + ! Phase one: clean. + string_clean(); + ! Phase two: mark. + string_mark(); +#ifdef DEBUG; + !string_status(); +#endif; + ! Phase three: sweep. + string_sweep(); +]; + +! Turn a string into an int. + +[ string_toint s len i neg; + len = s-->STRING_LEN; + s = s-->STRING_PTR; + i = 0; + neg = 1; + + while (len && (s->0 == 32 or 9)) + { + s++; + len--; + } + + if (len == 0) + return i; + + if (s->0 == '-') + { + neg = -1; + s++; + len--; + } + + if (len == 0) + return i; + + while (len && (s->0 >= '0') && (s->0 <= '9')) + { + i = i*10 + (s->0 - '0'); + s++; + len--; + } + + return i*neg; +]; diff --git a/token-table.h b/token-table.h new file mode 100644 index 0000000..b17114f --- /dev/null +++ b/token-table.h @@ -0,0 +1,203 @@ +constant TOKEN_AND 255; +constant TOKEN_CLEAR 254; +constant TOKEN_CLS 253; +constant TOKEN_IF 252; +constant TOKEN_INPUT 251; +constant TOKEN_FOR 250; +constant TOKEN_GOTO 249; +constant TOKEN_LIST 248; +constant TOKEN_LOAD 247; +constant TOKEN_RND 246; +constant TOKEN_NEW 245; +constant TOKEN_NEXT 244; +constant TOKEN_OR 243; +constant TOKEN_PRINT 242; +constant TOKEN_QUIT 241; +constant TOKEN_REPEAT 240; +constant TOKEN_RUN 239; +constant TOKEN_SAVE 238; +constant TOKEN_SCRIPT 237; +constant TOKEN_STEP 236; +constant TOKEN_STOP 235; +constant TOKEN_THEN 234; +constant TOKEN_TO 233; +constant TOKEN_UNTIL 232; +constant TOKEN_VAL 231; +[ token_decode t; +switch(t) { +255: return "and"; +236: return "step"; +251: return "input"; +243: return "or"; +244: return "next"; +237: return "script"; +245: return "new"; +242: return "print"; +239: return "run"; +233: return "to"; +249: return "goto"; +253: return "cls"; +247: return "load"; +235: return "stop"; +254: return "clear"; +238: return "save"; +232: return "until"; +240: return "repeat"; +252: return "if"; +231: return "val"; +250: return "for"; +246: return "rnd"; +248: return "list"; +234: return "then"; +241: return "quit"; +} +return 0; +]; +[ token_encode p d; +if ( +(p->0 == 'a') && +(p->1 == 'n') && +(p->2 == 'd') && + token_invalidchar(p->3)) { d->0 =255; return 3; } +if ( +(p->0 == 's') && +(p->1 == 't') && +(p->2 == 'e') && +(p->3 == 'p') && + token_invalidchar(p->4)) { d->0 =236; return 4; } +if ( +(p->0 == 'i') && +(p->1 == 'n') && +(p->2 == 'p') && +(p->3 == 'u') && +(p->4 == 't') && + token_invalidchar(p->5)) { d->0 =251; return 5; } +if ( +(p->0 == 'n') && +(p->1 == 'e') && +(p->2 == 'x') && +(p->3 == 't') && + token_invalidchar(p->4)) { d->0 =244; return 4; } +if ( +(p->0 == 'o') && +(p->1 == 'r') && + token_invalidchar(p->2)) { d->0 =243; return 2; } +if ( +(p->0 == 'n') && +(p->1 == 'e') && +(p->2 == 'w') && + token_invalidchar(p->3)) { d->0 =245; return 3; } +if ( +(p->0 == 's') && +(p->1 == 'c') && +(p->2 == 'r') && +(p->3 == 'i') && +(p->4 == 'p') && +(p->5 == 't') && + token_invalidchar(p->6)) { d->0 =237; return 6; } +if ( +(p->0 == 'p') && +(p->1 == 'r') && +(p->2 == 'i') && +(p->3 == 'n') && +(p->4 == 't') && + token_invalidchar(p->5)) { d->0 =242; return 5; } +if ( +(p->0 == 'r') && +(p->1 == 'u') && +(p->2 == 'n') && + token_invalidchar(p->3)) { d->0 =239; return 3; } +if ( +(p->0 == 'c') && +(p->1 == 'l') && +(p->2 == 's') && + token_invalidchar(p->3)) { d->0 =253; return 3; } +if ( +(p->0 == 'g') && +(p->1 == 'o') && +(p->2 == 't') && +(p->3 == 'o') && + token_invalidchar(p->4)) { d->0 =249; return 4; } +if ( +(p->0 == 't') && +(p->1 == 'o') && + token_invalidchar(p->2)) { d->0 =233; return 2; } +if ( +(p->0 == 'l') && +(p->1 == 'o') && +(p->2 == 'a') && +(p->3 == 'd') && + token_invalidchar(p->4)) { d->0 =247; return 4; } +if ( +(p->0 == 's') && +(p->1 == 't') && +(p->2 == 'o') && +(p->3 == 'p') && + token_invalidchar(p->4)) { d->0 =235; return 4; } +if ( +(p->0 == 'c') && +(p->1 == 'l') && +(p->2 == 'e') && +(p->3 == 'a') && +(p->4 == 'r') && + token_invalidchar(p->5)) { d->0 =254; return 5; } +if ( +(p->0 == 's') && +(p->1 == 'a') && +(p->2 == 'v') && +(p->3 == 'e') && + token_invalidchar(p->4)) { d->0 =238; return 4; } +if ( +(p->0 == 'u') && +(p->1 == 'n') && +(p->2 == 't') && +(p->3 == 'i') && +(p->4 == 'l') && + token_invalidchar(p->5)) { d->0 =232; return 5; } +if ( +(p->0 == 'r') && +(p->1 == 'e') && +(p->2 == 'p') && +(p->3 == 'e') && +(p->4 == 'a') && +(p->5 == 't') && + token_invalidchar(p->6)) { d->0 =240; return 6; } +if ( +(p->0 == 'i') && +(p->1 == 'f') && + token_invalidchar(p->2)) { d->0 =252; return 2; } +if ( +(p->0 == 'f') && +(p->1 == 'o') && +(p->2 == 'r') && + token_invalidchar(p->3)) { d->0 =250; return 3; } +if ( +(p->0 == 'v') && +(p->1 == 'a') && +(p->2 == 'l') && + token_invalidchar(p->3)) { d->0 =231; return 3; } +if ( +(p->0 == 'r') && +(p->1 == 'n') && +(p->2 == 'd') && + token_invalidchar(p->3)) { d->0 =246; return 3; } +if ( +(p->0 == 'l') && +(p->1 == 'i') && +(p->2 == 's') && +(p->3 == 't') && + token_invalidchar(p->4)) { d->0 =248; return 4; } +if ( +(p->0 == 'q') && +(p->1 == 'u') && +(p->2 == 'i') && +(p->3 == 't') && + token_invalidchar(p->4)) { d->0 =241; return 4; } +if ( +(p->0 == 't') && +(p->1 == 'h') && +(p->2 == 'e') && +(p->3 == 'n') && + token_invalidchar(p->4)) { d->0 =234; return 4; } +return 0; +]; diff --git a/tokeniser.h b/tokeniser.h new file mode 100644 index 0000000..dc1d7e8 --- /dev/null +++ b/tokeniser.h @@ -0,0 +1,323 @@ +include "token-table.h"; +constant TOKEN__EOL 0; +constant TOKEN__NUMBER 1; +constant TOKEN__STRING 2; +constant TOKEN__VAR 3; +constant TOKEN__SPACE 4; +constant TOKEN__PLUS 5; +constant TOKEN__MINUS 6; +constant TOKEN__STAR 7; +constant TOKEN__SLASH 8; +constant TOKEN__COLON 9; +constant TOKEN__EQUALS 10; +constant TOKEN__COMMA 11; +constant TOKEN__LPAREN 12; +constant TOKEN__RPAREN 13; +constant TOKEN__LARROW 14; +constant TOKEN__RARROW 15; +constant TOKEN__SEMICOLON 16; +constant TOKEN__GEQUAL 17; +constant TOKEN__LEQUAL 18; +constant TOKEN__NEQUAL 19; + +! Is this an invalid alphabetical token character? + +[ token_invalidchar c; + return ((c < 'a') || (c > 'z')); +]; + +! Is this a valid variable name character? + +[ token_validvarnamechar c; + return (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9')) || + (c == '_') || (c == '%') || (c == '$')); +]; + +! Is this a number? + +[ token_isnumber c; + return ((c >= '0') && (c <= '9')); +]; + +! Is this whitespace? + +[ token_isspace c; + return ((c == 32) || (c == 9)); +]; + +! Tokenise an input stream. +! +! The input and output pointers must point to different regions of memory. + +[ tokenise_stream in out incount outcount i j k; + out->0 = 0; + outcount = 1; + incount = 0; + + while (in->incount) + { + i = token_encode(in+incount, out+outcount); + if (i ~= 0) + { + incount = incount + i; + outcount = outcount + 1; + } + else + { + ! Not a recognised token. We test against all the + ! other things we recognise. Note the order! This + ! is important. + + i = in->incount; + + ! Is it white space? + + if (token_isspace(i)) + { + while (token_isspace(in->(incount))) + incount++; + out->(outcount++) = TOKEN__SPACE; + continue; + } + + ! Is it a number? + + if (token_isnumber(i)) + { + out->(outcount++) = TOKEN__NUMBER; + i = 0; + do { + i = i*10 + (in->incount - '0'); + incount++; + } until (token_isnumber(in->incount) == 0); + (out+outcount)-->0 = i; + outcount = outcount + 2; + continue; + } + + ! Is it a string? + + if (i == '"') + { + ! Work out the size of the string. + + incount++; + i = incount; + do { + k = in->(incount++); + } until ((k == '"') || (k == 0)); + j = incount-i-1; + + ! Emit the opcode. + + out->(outcount++) = TOKEN__STRING; + out->(outcount++) = j; + + ! And now emit the string itself. + + memcpy(out+outcount, in+i, j); + outcount = outcount + j; + + ! Remember to skip over the close quote + ! before exiting. + + !incount++; + continue; + } + + ! Is it an operator? + + switch (i) + { + '+': out->(outcount++) = TOKEN__PLUS; + incount++; + continue; + + '-': out->(outcount++) = TOKEN__MINUS; + incount++; + continue; + + '*': out->(outcount++) = TOKEN__STAR; + incount++; + continue; + + '/': out->(outcount++) = TOKEN__SLASH; + incount++; + continue; + + ':': out->(outcount++) = TOKEN__COLON; + incount++; + continue; + + '=': out->(outcount++) = TOKEN__EQUALS; + incount++; + continue; + + ',': out->(outcount++) = TOKEN__COMMA; + incount++; + continue; + + '(': out->(outcount++) = TOKEN__LPAREN; + incount++; + continue; + + ')': out->(outcount++) = TOKEN__RPAREN; + incount++; + continue; + + '<': switch (in->(++incount)) + { + '>': out->(outcount++) = TOKEN__NEQUAL; + incount++; + break; + + '=': out->(outcount++) = TOKEN__LEQUAL; + incount++; + break; + + default: out->(outcount++) = TOKEN__LARROW; + break; + } + continue; + + '>': switch (in->(++incount)) + { + '=': out->(outcount++) = TOKEN__GEQUAL; + incount++; + break; + + default: out->(outcount++) = TOKEN__RARROW; + break; + } + continue; + + ';': out->(outcount++) = TOKEN__SEMICOLON; + incount++; + continue; + } + + ! Is it a variable name? + + if (token_validvarnamechar(i)) + { + out->(outcount++) = TOKEN__VAR; + do { + out->(outcount++) = in->(incount++); + } until (token_validvarnamechar(in->incount) == 0); + out->(outcount++) = 0; + continue; + } + + return incount; + } + } + + ! Patch up the line length. + + out->outcount = TOKEN__EOL; + out->0 = outcount + 1; + + return -1; +]; + +! Detokenise a stream. + +[ detokenise_stream in i; + while (1) + { + i = (in++)->0; + switch(i) + { + TOKEN__EOL: + print "^"; + return; + + TOKEN__VAR: + while (i = (in++)->0) + print (char) i; + break; + + TOKEN__NUMBER: + print in-->0; + in = in + 2; + break; + + TOKEN__SPACE: + print " "; + break; + + TOKEN__STRING: + i = (in++)->0; + print "~"; + while (i--) + print (char) (in++)->0; + print "~"; + break; + + TOKEN__PLUS: + print "+"; + break; + + TOKEN__MINUS: + print "-"; + break; + + TOKEN__STAR: + print "*"; + break; + + TOKEN__SLASH: + print "/"; + break; + + TOKEN__COLON: + print ":"; + break; + + TOKEN__EQUALS: + print "="; + break; + + TOKEN__COMMA: + print ","; + break; + + TOKEN__LPAREN: + print "("; + break; + + TOKEN__RPAREN: + print ")"; + break; + + TOKEN__LARROW: + print "<"; + break; + + TOKEN__RARROW: + print ">"; + break; + + TOKEN__SEMICOLON: + print ";"; + break; + + TOKEN__GEQUAL: + print ">="; + break; + + TOKEN__LEQUAL: + print "<="; + break; + + TOKEN__NEQUAL: + print "<>"; + break; + default: + print (string) token_decode(i); + } + } +]; + diff --git a/tokens.dat b/tokens.dat new file mode 100644 index 0000000..7ae533c --- /dev/null +++ b/tokens.dat @@ -0,0 +1,25 @@ +and +clear +cls +if +input +for +goto +list +load +rnd +new +next +or +print +quit +repeat +run +save +script +step +stop +then +to +until +val diff --git a/utils.h b/utils.h new file mode 100644 index 0000000..aed486f --- /dev/null +++ b/utils.h @@ -0,0 +1,100 @@ +array hextable -> '0' '1' '2' '3' '4' '5' '6' '7' '8' '9' 'A' 'B' 'C' 'D' 'E' 'F'; + +! Fills a chunk of memory. + +[ memset ptr i size; + while (size--) + (ptr++)->0 = i; +]; + +! Copies a chunk of memory. + +[ memcpy dest src len; + if (dest > src) + { + ! Copy down. + dest = dest + len; + src = src + len; + while (len--) + (--dest)->0 = (--src)->0; + } + else + { + ! Copy up. + while (len--) + (dest++)->0 = (src++)->0; + } +]; + +! Emits a string. + +[ astring s; + while (s->0) + print (char) (s++)->0; +]; + +! Duplicates a string. + +[ strdup s s1 i; + i = strlen(s); + s1 = mem_alloc(i+1); + strcpy(s1, s); + return s1; +]; + +! Compares two strings. + +[ strcmp s1 s2; + while (s1->0 == s2->0) + { + if (s1->0 == 0) + return 0; + s1++; + s2++; + } + return 1; +]; + +! Counts the length of a string. + +[ strlen s i; + i = 0; + while ((s++)->0) + i++; + return i; +]; + +! Copies a string from one place to another. + +[ strcpy dest src; + do { + (dest++)->0 = (src++)->0; + } until (src->0 == 0); + dest->0 = 0; +]; + +! Outputs a hex number. + +[ phex i digits; + if (digits == 4) + { + print (char) hextable->((i / 4096) & 15); + print (char) hextable->((i / 256) & 15); + } + print (char) hextable->((i / 16) & 15); + print (char) hextable->(i & 15); +]; + +! Dumps some memory in hex. + +[ hexdump p size; + while (size) + { + phex(p->0, 2); + print " "; + size--; + p++; + } + print "^"; +]; +