From 8d55156675587a770c5654362bcbd3d2a98e4aa9 Mon Sep 17 00:00:00 2001 From: Kaz Wesley Date: Fri, 12 Jan 2018 17:02:40 -0800 Subject: [PATCH] beginnings of REPL Read and print expressions composed of lists/forms/vectors/the number 4. E.g., (4 <[4] 4 ()>). Eval is a WIP and included but disabled for now. Signed-off-by: Kaz Wesley --- .gitignore | 19 ++++ Makefile.am | 2 + configure.ac | 9 ++ doc/DESIGN.md | 109 +++++++++++++++++++ doc/INTRO.md | 122 +++++++++++++++++++++ doc/MISC_NOTES.md | 163 ++++++++++++++++++++++++++++ doc/ROADMAP.md | 28 +++++ src/Makefile.am | 3 + src/alloc.c | 47 ++++++++ src/alloc.h | 101 +++++++++++++++++ src/eval.c | 113 +++++++++++++++++++ src/eval.h | 47 ++++++++ src/main.c | 124 +++++++++++++++++++++ src/object.c | 66 ++++++++++++ src/object.h | 270 ++++++++++++++++++++++++++++++++++++++++++++++ src/print.c | 85 +++++++++++++++ src/print.h | 25 +++++ src/read.c | 207 +++++++++++++++++++++++++++++++++++ src/read.h | 35 ++++++ 19 files changed, 1575 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile.am create mode 100644 configure.ac create mode 100644 doc/DESIGN.md create mode 100644 doc/INTRO.md create mode 100644 doc/MISC_NOTES.md create mode 100644 doc/ROADMAP.md create mode 100644 src/Makefile.am create mode 100644 src/alloc.c create mode 100644 src/alloc.h create mode 100644 src/eval.c create mode 100644 src/eval.h create mode 100644 src/main.c create mode 100644 src/object.c create mode 100644 src/object.h create mode 100644 src/print.c create mode 100644 src/print.h create mode 100644 src/read.c create mode 100644 src/read.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e4fb84f --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +/Makefile +/Makefile.in +/aclocal.m4 +/autom4te.cache/ +/compile +/config.h +/config.h.in +/config.log +/config.status +/configure +/depcomp +/install-sh +/missing +/src/*.o +/src/.deps +/src/Makefile +/src/Makefile.in +/src/muddle +/stamp-h1 diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..98ed79c --- /dev/null +++ b/Makefile.am @@ -0,0 +1,2 @@ +SUBDIRS = src +# dist_doc_DATA = README diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..e26493e --- /dev/null +++ b/configure.ac @@ -0,0 +1,9 @@ +AC_INIT([muddle], [1.0], [keziahw@gmail.com]) +AM_INIT_AUTOMAKE([-Wall -Werror foreign]) +AC_PROG_CC +AC_CONFIG_HEADERS([config.h]) +AC_CONFIG_FILES([ + Makefile + src/Makefile +]) +AC_OUTPUT diff --git a/doc/DESIGN.md b/doc/DESIGN.md new file mode 100644 index 0000000..61a5aa3 --- /dev/null +++ b/doc/DESIGN.md @@ -0,0 +1,109 @@ +# Object format and target platform + +The semantics of the language are closely tied to the layout of its +objects; it is critical for efficient interpretation that the new +layout be similar to the original design. + +Key features of the original format: +* `WORD` size: 36-bit +* object size: 2 words +* `WORD`:pointer size ratio: 2:1 + +## `WORD` size + +Shrinking `WORD`s to 32 bits would pose more backward compatibility +hazards than extending them to 64 bits. `WORD`, and derived types like +`FIX`, must support 64-bit operations. + +## Pointer size + +On modern platforms, we have two available pointer sizes: 32-bit or +64-bit. With 64-bit pointers all objects would be 4-words and +256-bits, even though most of that would be dead space for most +`TYPE`s. The Muddle object layout is only reasonable with 32-bit +pointers. + +## 64-bit `WORD`s, 32-bit pointers + +So in order to maintain compatibility with objects designed for a 2:1 +pointer size ratio, our target platform is constrained to look like +the x32 ABI: an ILP32 environment with access to instructions that +operate on 64-bit words. + +It is not necessary to compile for the actual x32 ABI, as support for +x32 executables is not widespread. The implementation can be compiled +for x86-64, but internally ensure that its memory for storing objects +is mapped in the low 32-bits of its address space and then cast 64-bit +pointers to 32-bit values for storage in objects. + +## Implications for target platforms: + +### x86-64 (primary target) + +Muddle processes will be restricted to 4GB of address space per +process. That "should be enough for anybody," right? + +Nonstandard pointers add a wrinkle to FFI, but only a superficial one: +Muddle objects need to be GC-managed, so they can't be externally +allocated anyway; and FFI-pointers belong in `WORD`s, not object +pointers. + +If using an off-the-shelf GC like BDW, it does complicate things: the +library would need modification to recognize the packed pointers. + +### x86-32 (possible port) + +It wouldn't be hard to port the interpreter to 32-bit systems, but if +`FIX`es are allowed to use 32-bit arithmetic that could break old code +that assumes they are at least 36 bits, and any new code that assumed +they were 64 bits. In the future, we should consider switching to +explicitly-sized `FIX32`/`FIX64` or the like; for now let's just make +`FIX` 64-bit. + +# Interpretation + +The semantics documented in M-PL effectively mandate that the +interpreter act like an AST-interpreter: +* any `ATOM` could be rebound at any time to something that takes its + arguments as a `CALL` and edits, as syntax, the code of its caller + -- so a non-AST interpreter would still have to keep the AST + available, and would have to detect whether a `CALL` modified its + callee (e.g. by setting a `WRITE` monitor for all AST values) +* debugging mechanisms like `FRAME` allow direct inspection and even + modification of the call stack, which requires non-internal + subroutine calls to use the MCALL calling convention -- so most of + the optimization a JIT or bytecode interpreter could perform is + rendered moot + +The central implementation decision is how to overcome or avoid the +difficulties inherent in implementing an interpreter with more +advanced control flow than its host language. In this case: +coroutines, re-entrant stack-allocated continuations, and non-local +return are not straightforward in C-like languages. Options include: +* stackless interpreter +* assembly-language interpreter that can follow the program's complex + control flow +* implement just the compiler and use a metacircular evaluator to get + an interpreter + +The stackless approach goes well with an AST interpreter, and from the +wording in M-PL's Appendix I think it was the original +implementation's approach (although it's also possible they matched +the program's control flow, since that wouldn't have been hard for a +program written in PDP assembly). + +## Stackless interpreter + +Implement the interpreter core in plain C. Decouple the interpreter's +control flow from the interpreted program: +* Allocate the CONTROL STACKs on the heap. The interpreter core + subroutines use slots in their `FRAME` for local variables that may + need to be persisted across a `RESUME`. +* Track program execution explicitly as a state machine. + +Advantages: +* portability and readability of C + +Disadvantages: +* using state machines in place of direct control flow and eliminating + local variables makes for tricky C (cf. Boost's ASIO) diff --git a/doc/INTRO.md b/doc/INTRO.md new file mode 100644 index 0000000..452e6b1 --- /dev/null +++ b/doc/INTRO.md @@ -0,0 +1,122 @@ +# _Muddling_ for Lispers and Schemers + +This is an overview of the key differences between Muddle and other +lispy languages. Good Muddle is easy for humans to read, but it is +also tasteful: it efficiently matches the operation of the program to +the operation of the Muddle interpreter. Each semantic note is +followed by a review of the performance implications. + +## Objects + +Muddle code deals exclusively with Muddle _objects_. Objects are +always passed by value. An object can be a _scalar_ (e.g. `FIX`), with +its value stored directly inside it, or a _collection_ (e.g. `LIST`, +`VECTOR`, `TEMPLATE`), holding a reference to external data. Copying a +collection object yields a new object that refers to the same +collection _body_, but the object itself can have metadata that isn't +shared -- like a `VECTOR`'s current length (see below). + +### Performance + +An object is bigger than a pointer, but smaller enough to copy around +easily. Every object is tagged with a type, so dispatching on types +never requires following references. Collection bodies are allocated +in heaplike spaces and managed by a tracing GC; copying references has +no special overhead, but there is a write barrier when modifying +objects stored in collections. + +## Calls + +A `FUNCTION` or `FSUBR` may not evaluate all of its arguments; as a +result, the normal call process of evaluating arguments before passing +them to the callee is not practical. Arguments are passed unevaluated, +and may then be evaluated by the callee (usually implicitly, based on +its argument declarations). The main programmer-visible consequences +of this are: +* there are no "special forms"; first-class `FUNCTION`s can operate on + syntax at runtime (and even modify it) +* arguments are evaluated in a deterministic sequence (left-to-right + is the default, but the callee can do anything; adhering to the + Principle of Least Astonishment is recommended) +* stack traces show arguments evaluated in the callee's frame + +### Performance + +The by-name calling strategy combined with the ability to redefine any +global allow incredible flexibility for debugging, but neither feature +is recommended to be used widely in production. Accordingly, while the +functionality is always available to interpreted programs, performance +of code that doesn't use such dynamism is the implementation priority; +certain unusual operations, like replacing an applicable function with +a `"CALL"` function, may carry a high runtime cost. + +## Scope + +Scoping is dynamic by default. You can create nested bindings to keep +variables relatively local, e.g. with `"AUX"` pseudo-arguments. You +can also create a full lexical scope with `BLOCK`. + +## `LIST` + +Unlike other lisps that implement their "lists" with binary trees, +Muddle's `LIST`s are intrusive singly-linked lists: the value is not +behind a pointer like a "car", and *every* object has a `REST` +field. Lists can share tails, but no object can otherwise be an +element of more than one list, or in a list and also anywhere +else. Cyclic or self-containing lists are possible, and must be +handled with care. + +### Performance + +`LIST`s can be very efficient for true lists (including lists of +objects that may be `LIST`s, like Muddle code itself). The GC handles +them specially in order to store objects next to their `REST`s +whenever possible. To get efficient allocations, help the GC predict +what you want by building your list idiomatically: +* LIFO order (fastest): consecutive `CONS`es to one list +* FIFO order (slow): `PUTREST` 1-element lists +* fixed-size: `` (including `MAPF ,LIST`), but if you expect + to extend it right away, you should build it with `CONS` or + `PUTREST` + +## `VECTOR` + +Muddle's `VECTOR` is an array-structured container of objects. Like +similar types in other languages, it supports efficient random access +and expensive resizing to a larger allocation. More unusually, it has +stack-like behavior: each `VECTOR` object maintains a current length +that is less than or equal to the allocated length of its body. The +length can be decreased with `REST` or increased (to at most the +body's actual size) with `BACK` / `TOP`, which make objects accessible +again, with their original values. Because all of its allocated +objects are reachable with `TOP` (or potentially through a different +`VECTOR` to the same body), its full allocation is always composed of +initialized, live objects (although those objects can be `LOSE`s). + +### Performance + +Choose a `VECTOR` if you want random access, or the stacklike +features. + +`REST` is non-destructive; if you won't be using the objects again, +overwriting them with `LOSE`s will allow the GC to free anything they +refer to. + +## User-defined data types + +A common idiom for user-defined data types in Muddle is to use a type +with a `TYPEPRIM` of `VECTOR`, and use `ATOM`s that resolve to `FIX`es +as accessors. + +## Control flow + +[coroutines, continuations, ...] + +There is no TCE for normal calls, but `AGAIN` resembles a +self-tailcall. + +## Misc + +Reference semantics can be had by indirecting through a collection. If +you need to accept a value by reference without knowing what kind of +collection it's in, you want a `LOCATIVE`. diff --git a/doc/MISC_NOTES.md b/doc/MISC_NOTES.md new file mode 100644 index 0000000..58a9248 --- /dev/null +++ b/doc/MISC_NOTES.md @@ -0,0 +1,163 @@ +# Design goals + +## Compatibility + +Support the same language as the original interpreter, except for +platform differences. + +## Architectural faithfulness + +Try to maintain the spirit of the initial implementation. + +The original was written in assembly and ran in environments we'd now +consider extremely memory constrained. Update the design to fit new +platforms, but don't expand its resource requirements to fit the glut +of available memory. + +## Suitability for new projects + +The implementation should not be optimized for the PDP at the expense +of performance on modern hardware. + +The language should not be complicated solely for backward +compatibility. + +The runtime should not be larger or slower than necessitated by the +dynamism of the language. + +# Distinctive features + +## Package system + +The package-oriented nature of the language is a huge plus. Easy +sharing of packages along the lines of Quicklisp/MELPA/Cargo is a key +feature for building a community. + +## Coroutines (stackful) + +## Interrupt mechanism + +Very powerful. In combination with coroutines, could be used to +implement pre-emptively multitasked green threads. Supports setting +"breakpoints" on variable accesses. + +## Fexprs + +In Muddle there are no "magic" forms that can only be provided by the +runtime (or, anything can be magic). Disciplined use is required to +avoid madness, but sometimes FEXPRs can be clearer than macros. + +## First-class term-rewriting fexprs... + +"CALL" is the most (dangerously) powerful language feature I've ever +heard of. + +## Re-entrant first-class stack-allocated continuations + +That can jump forward. + +With UNWIND. + +## Powerful type system + +### Slot accessors + +Simple accessors just evaluate to numbers that perform access into +vector slots, but the caller sees the same interface as if it were a +function. + +### Truthiness is an `Either` type + +# language features + +## no tail call elimination + +TCE would elide FRAMEs, and mess with ACTIVATIONs and stack introspection. + +## variable scope + +* primarily dynamic, with namespaces +* lexical scoping features available + * e.g. PROG + +## coroutines (stackful) + +## evaluation model + +### fexprs: FSUBR, CALL/QUOTE/ARGS + +Any "function" can accept arguments unevaluated, and then it may BIND +and EVAL them. Hence short-circuiting operators such as AND could be +defined as library functions. + +## locatives + +* reference to a struct field + +## interrupts (PL-21) + +* operationally similar to UNIX signals +* checked for a defined times (PL-21.7.1) +* could be implemented with an atomic flag + +## fixnums + +* FIX is defined as a 36-bit integer +* by default, an error will be produced on overflow! this makes + compatibility easier than if software counted on wrapping... +* RANDOM returns a 36-bit random + * though code that compares to may be ok +* out of a 64-bit word, this leaves a lot of room for tags or something... + +## truth + +* false/true is actually: left(list) / any +* T exists as true value carrying no additional information + +## uvector + +* type-homogenous vector + +## closures + +* since scope is dynamic, closure variables are captured explicitly + +## macros (PL-17) + +# Packages + +Package metadata: +* MANIFEST GVALs +* NEWTYPE definitions +* MACROs that don't depend on the package's functions +* RSUBR DECLs + +# Mediation + +The documentation refers to "mediation" when compiled code calls in to +the interpreter or interpreted code calls a compiled subroutine. + +## Mediated subroutine calls + +Calls to `RSUBR`s that haven't been `SUBRIFY`'d are _mediated_ calls; +the interpreter does a full `MCALL`, and the compiled code has an +entry point that translates to a `BINCALL`. + +# State machine + +## Non-local control flow + +### `ERRET` / return from +Check `PROCESS` id. Unwind stack until the specified `FRAME`. Return. + +### `RESUME` +Change `PROCESS` id. Return from new `PROCESS`es previous `RESUME` +frame with specified value. + +### `GO`/`TAG` +`TAG`: holds a pointer to an AST node, and the call depth of that +node's enclosing `PROG`. +`GO`: unwind to depth specified in `TAG`; resume execution. + +### `REPEAT` +Uwind to enclosing `PROG`; continue. diff --git a/doc/ROADMAP.md b/doc/ROADMAP.md new file mode 100644 index 0000000..296efed --- /dev/null +++ b/doc/ROADMAP.md @@ -0,0 +1,28 @@ +Roughly dependency-ordered todo list: + +# Minimal core language: +* [ ] *PROCESS/MCALL* +* [ ] EVAL; initial interpreter and REPL +* [ ] atoms, global bindings, normal FUNCTIONs +* [ ] *GC* +* [ ] local bindings +* [ ] control flow +* [ ] advanced FUNCTIONs; macros +*Bold*: APIs everything else is built on, impl as early as possible + +# Other essential language components: +* [ ] atom OBLISTs, BLOCK +* [ ] INTERRUPTs +* [ ] DECLs checking +* [ ] PACKAGEs +* [ ] STRINGs +* [ ] checkpointing? +* [ ] FFI + +# Bonus features: +* [ ] package management? +* [ ] COMPILE function? +* [ ] I/O threadpool / BLOCKED process state? +* [ ] DECL TEMPLATEs? +* [ ] optimizing AOT compiler? +* [ ] heapstacks? diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..2da8bbd --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,3 @@ +bin_PROGRAMS = muddle +muddle_SOURCES = main.c read.c eval.c print.c alloc.c object.c +muddle_CFLAGS = -Wall -Wno-unused-function -Werror=implicit-function-declaration -Werror=incompatible-pointer-types diff --git a/src/alloc.c b/src/alloc.c new file mode 100644 index 0000000..7209060 --- /dev/null +++ b/src/alloc.c @@ -0,0 +1,47 @@ +/* +Copyright (C) 2017 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#include "alloc.h" +#include "object.h" + +pool_ptr +pool_copy_array_rev (const pool_object * objs, uint32_t len) +{ + if (!len) + return 0; + pool_object *xs = pool_alloc (len); + for (int i = 0; i < (int) len; i++) + { + xs[i].type = objs[len - 1 - (unsigned) i].type; + xs[i].rest = POOL_PTR (&xs[i + 1]); + xs[i].val = objs[len - 1 - (unsigned) i].val; + } + xs[len - 1].rest = 0; + return POOL_PTR (xs); +} + +heap_ptr +heap_copy_array_rev (const object * objs, uint32_t len) +{ + object *xs = heap_alloc (len); + for (int i = 0; i < (int) len; i++) + { + xs[i] = objs[len - 1 - (unsigned) i]; + } + return HEAP_PTR_OF_OBJECT (xs); +} diff --git a/src/alloc.h b/src/alloc.h new file mode 100644 index 0000000..239a7e0 --- /dev/null +++ b/src/alloc.h @@ -0,0 +1,101 @@ +/* +Copyright (C) 2017 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#ifndef ALLOC_H +#define ALLOC_H + +#include +#include +#include + +/// 0, or a "pointer" to an object allocated in the pool and fully-initialized +typedef uint32_t pool_ptr; +/// 0, or a "pointer" to an object allocated in the heap and fully-initialized +typedef int32_t heap_ptr; + +typedef union pool_object pool_object; +typedef union object object; + +extern char *pool; // pool_object +extern char *vhp_base; // object +extern char *vhp; // object + +static inline pool_object * +POOL_OBJECT (pool_ptr p) +{ + return (pool_object *) (uintptr_t) p; +} + +static inline bool +IS_VALID_POOL_OBJECT (pool_object * p) +{ + pool_ptr pp = (pool_ptr) (uintptr_t) p; + return (uintptr_t) pp == (uintptr_t) p; +} + +static inline pool_ptr +POOL_PTR (pool_object * p) +{ + pool_ptr pp = (pool_ptr) (uintptr_t) p; + assert (IS_VALID_POOL_OBJECT (p)); + return pp; +} + +// TODO make (heap_ptr)0 nullish +static inline object * +OBJECT_OF_HEAP_PTR (heap_ptr p) +{ + assert (p >= 0); + return (object *) (vhp_base + (p << 4)); +} + +static inline pool_object * +pool_alloc (uint32_t len) +{ + char *pp = pool; + pool += (len << 4); + return (pool_object *) pp; +} + +static inline heap_ptr +HEAP_PTR_OF_OBJECT (object * p) +{ + assert ((uintptr_t) p >= (uintptr_t) vhp_base); + heap_ptr h = (heap_ptr) ((uintptr_t) p - (uintptr_t) vhp_base); + return h; +} + +static inline object * +heap_alloc (uint32_t len) +{ + enum + { DOPE_LEN = 1 }; + char *p = vhp; + vhp += (len + DOPE_LEN) << 4; + return (object *) p; +} + +// given a headerless array of objects of known size, +// copy it backwards into newly-allocated pool space +pool_ptr pool_copy_array_rev (const pool_object * objs, uint32_t len); + +// given a headerless array of objects of known size, +// copy it backwards into a newly-allocated vector body +heap_ptr heap_copy_array_rev (const object * objs, uint32_t len); + +#endif // ALLOC_H diff --git a/src/eval.c b/src/eval.c new file mode 100644 index 0000000..7bbcad7 --- /dev/null +++ b/src/eval.c @@ -0,0 +1,113 @@ +/* +Copyright (C) 2017 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#include "eval.h" + +#include "alloc.h" + +// globals for now +extern object ret; +extern frame *cf; +extern object *cst; + +void +push_frame (void (*fn) (), tuple_object args, void (*cont) ()) +{ + // update current frame's continuation + cf->cont = new_subr (cont); + + // allocate new frame, make current + frame *prev = cf; + cf = (frame *) cst; + cst += sizeof (frame) / sizeof (object); + + // set new frame's frametop + cf->cont = new_subr (fn); + cf->args = args; + cf->prevframe = + new_tuple ((object *) prev, sizeof (frame) / sizeof (object)); +} + +void +pop_frame () +{ + cf = (frame *) cf->prevframe.body; + cst = (object *) cf - sizeof (frame) / sizeof (object); +} + +#define RETURN(x) do { ret = (x); pop_frame(); return; } while (0) +#define CALL_THEN(fn, args, cont) do { push_frame(fn, args, cont); return; } while (0) +#define TAILCALL(fn) do { cf->cont = fn; return; } while (0) + +/* + uint32_t len = 0; + pool_object *x = POOL_OBJECT(o->head); + while (x) { + *--cst = *x; + x = POOL_OBJECT(o->rest); + len++; + } + return mcall(evaluator, len); +*/ + +static void +eval_list () +{ + // store result of previous call + cf->locals[1] = ret; + + // get next input, and advance input pointer + pool_ptr rest_in = as_list(&cf->args.body[0])->head; + if (!rest_in) + RETURN(cf->locals[0]); + POOL_OBJECT(as_list(&cf->locals[1])->head)->rest = + as_list(&cf->locals[1])->head + (pool_ptr)sizeof(pool_object); + + // eval next element + CALL_THEN (eval, new_tuple ((object*)POOL_OBJECT(rest_in), 1), eval_list); +} + +void +eval () +{ + assert (cf->args.len == 1); + switch (cf->args.body[0].type) + { + case EVALTYPE_FIX32: + case EVALTYPE_FIX64: + RETURN (cf->args.body[0]); + case EVALTYPE_LIST: + // Handle `head` now; then iterate on `.rest`. + if (!cf->args.body[0].list.head) + RETURN (cf->args.body[0]); + // locals: { list_object list, list_object tail } + cst += 2; + // Allocate the new list contiguously and keep track of the + // current tail so we can build it in forward order. + cf->locals[0].list = + new_list (POOL_PTR (pool_alloc (list_length (&cf->args.body[0].list)))); + cf->locals[1] = cf->locals[0]; + CALL_THEN (eval, new_tuple ((object*)POOL_OBJECT (cf->args.body[0].list.head), 1), eval_list); + /* + case EVALTYPE_FORM: TAILCALL(eval_form); + case EVALTYPE_VECTOR: TAILCALL(eval_vector); + */ + default: + assert (0 && "I don't know how to eval that"); + } +} diff --git a/src/eval.h b/src/eval.h new file mode 100644 index 0000000..22b5374 --- /dev/null +++ b/src/eval.h @@ -0,0 +1,47 @@ +/* +Copyright (C) 2017 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#ifndef EVAL_H +#define EVAL_H + +#include "object.h" + +void eval (); +void push_frame (void (*fn) (), tuple_object args, void (*cont) ()); + +// stack: +// <0> args... +// <0> frametop +// <0> framebottom +// <0> temps... +// <1> args... +// <1> frametop +typedef struct frame frame; +struct frame +{ + // <0> frametop (set for this call) + subr_object cont; + tuple_object args; + tuple_object prevframe; + // <0> framebottom (state saved before child call) + + // <0> temps, <1> args + object locals[]; +}; + +#endif // EVAL_H diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..b57ee6d --- /dev/null +++ b/src/main.c @@ -0,0 +1,124 @@ +/* +Copyright (C) 2017 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#include "read.h" +#include "eval.h" +#include "print.h" +#include "object.h" + +#include +#include +#include + +// TODO: put these in interpreter-wide ctx object +char *pool; +char *vhp_base; +char *vhp; + +// TODO: store these in current PROCESS +frame *cf; +object ret; +object *cst; + +// Define the address spaces. +enum +{ + // Max objects that can be in linked lists (must be < 2^32). + POOL_OBJCT = 1024, + // Max size, in objects, of control stack segment. + STACK_OBJCT = 256, + // Max size, in objects, of VECTOR heap. + VECTOR_OBJCT = 1024, + // Max objects reader can handle at once. + // TODO: allocate as VECTOR and eliminate this arbitrary limit + READER_OBJCT = 64 +}; + +int +main () +{ + // The REST pool (in low mem). + char *pool_base = + mmap (0, POOL_OBJCT * sizeof (object), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); + pool = pool_base; + + // The CONTROL STACKs (TODO: per-PROCESS). + object *cst_base = + mmap (0, STACK_OBJCT * sizeof (object), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + cst = cst_base; + cf = (frame *) cst; + cst += sizeof (frame) / sizeof (object); + + // The VECTOR heap. + vhp_base = + mmap (0, VECTOR_OBJCT * sizeof (object), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + vhp = vhp_base; + + // Reader stack (TODO: dynamically allocate as VECTOR). + object rst_base[READER_OBJCT]; + object *rst = rst_base + READER_OBJCT; + + // TODO: push frames for `>>>`. + // Entire toplevel becomes `for (;;) cf->cont.fn();` + char buf[512]; + ssize_t n; + while ((n = read (STDIN_FILENO, buf, sizeof (buf))) > 0) + { + // mock GC (no object persistence) + pool = pool_base; + vhp = vhp_base; + // terminate input + assert (buf[n - 1] == '\n'); + buf[n - 1] = '\0'; + // Read a thing + reader_stack st; + st.pos = rst; + st.framelen = 0; + const char *p = buf; + while (p && p[0]) + { + p = read_token (p, &st); + } + assert (p); + if (!st.framelen) + continue; + assert (st.framelen == 1); + /* + // Eval the thing + push_frame (eval, new_tuple (st.pos, 1), 0); + while (cf->cont.fn) + { + cf->cont.fn (); + } + // Print the thing + print_object (&ret); + */ + // debugging: print without eval + print_object (st.pos); + printf ("\n"); + // Loop! + } + + munmap (cst_base, STACK_OBJCT * sizeof (object)); + munmap (vhp_base, VECTOR_OBJCT * sizeof (object)); + munmap (pool_base, POOL_OBJCT * sizeof (object)); + return 0; +} diff --git a/src/object.c b/src/object.c new file mode 100644 index 0000000..a2a81a6 --- /dev/null +++ b/src/object.c @@ -0,0 +1,66 @@ +/* +Copyright (C) 2017 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#include "object.h" + +uint32_t +list_length (const list_object * o) +{ + const pool_object *p = POOL_OBJECT (o->head); + uint32_t n = 0; + while (p) + { + n++; + p = POOL_OBJECT (p->rest); + } + return n; +} + +/* +static object *cons_new(evaltype type, value v, object *cdr) { + object o; + o.type = type; + o.v = v; + return cons(&o, cdr); +} +*/ + +/* +object *cons(const object *car, const object *cdr) { + assert(car); + assert(cdr); + object *head = pool_alloc(1); + head->type = car->type; + head->rest = cdr->v.head; + head->v = car->v; + return head; +} +*/ + +/* +static object rest(const object *lst) { + assert(lst); + object *head = OBJECT_OF_POOL_PTR(lst->v.head); + assert(head); + object o; + o.type = EVALTYPE_LIST; + o.rest = 0; + o.v.head = ((object*)head)->rest; + return o; +} +*/ diff --git a/src/object.h b/src/object.h new file mode 100644 index 0000000..cb1ade1 --- /dev/null +++ b/src/object.h @@ -0,0 +1,270 @@ +/* +Copyright (C) 2017 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#ifndef OBJECT_H +#define OBJECT_H + +#include "alloc.h" + +#include +#include +#include +#include + +typedef uint32_t evaltype; + +enum +{ +// pool OK + TYPEPRIM_FIX32 = 0x00010000, + TYPEPRIM_FIX64 = 0x00020000, + TYPEPRIM_LIST = 0x00030000, + TYPEPRIM_VECTOR = 0x00040000, + TYPEPRIM_SUBR = 0x00050000, + +// can't be in pool + TYPEPRIM_NOPOOL_MASK = 0x70000000, + TYPEPRIM_TUPLE = 0x70010000, + +// TYPEPRIM is half of EVALTYPE + TYPEPRIM_MASK = 0x7fff0000 +}; + +enum +{ + EVALTYPE_FIX32 = TYPEPRIM_FIX32, + + EVALTYPE_FIX64 = TYPEPRIM_FIX64, + + EVALTYPE_LIST = TYPEPRIM_LIST, + EVALTYPE_FORM, + + EVALTYPE_VECTOR = TYPEPRIM_VECTOR, + + EVALTYPE_SUBR = TYPEPRIM_SUBR, + + EVALTYPE_TUPLE = TYPEPRIM_TUPLE, +}; + +static inline uint32_t +TYPEPRIM (evaltype x) +{ + return x & TYPEPRIM_MASK; +} + +static inline bool +TYPEPRIM_EQ (evaltype a, evaltype b) +{ + return !((a ^ b) & TYPEPRIM_MASK); +} + +typedef struct +{ + uint32_t _dummy; +} opaque32; +typedef struct +{ + uint64_t _dummy; +} opaque64; + +/** +Object types. + +An Object's value is accessed through a concrete `foo_object` +type. + +`object` can be used to refer to Objects of unspecified type, which +are opaque except for their `type` field. Checked downcasts can be +performed via the `as_foo` functions; unchecked downcasts via +`object.foo` (use only when type information is locally +obvious). Some objects can be upcast to more specific supertypes, +such as `pool_object` for objects that are known to be storeable in +the pool. + +The generic `object` type should not be used to accept parameters +that have constraints on their type, and should not be used to +return objects that are of a statically-known type. Encoding type +information in function signatures allows strictly local reasoning +about types. +*/ + +typedef union object object; + +typedef struct +{ + alignas (16) evaltype type; + pool_ptr rest; + uint32_t _pad; + uint32_t val; +} fix32_object; + +typedef struct +{ + alignas (16) evaltype type; + pool_ptr rest; + uint64_t val; +} fix64_object; + +typedef struct +{ + alignas (16) evaltype type; + pool_ptr rest; + uint32_t _pad; + pool_ptr head; +} list_object; + +typedef struct +{ + alignas (16) evaltype type; + pool_ptr rest; + uint32_t len; + heap_ptr body; +} vector_object; + +typedef struct +{ + alignas (16) evaltype type; + pool_ptr rest; + void (*fn) (); +} subr_object; + +typedef struct +{ + alignas (16) + /// no rest; is a NOPOOL type + evaltype type; + uint32_t len; + /// allocation can be anywhere + object *body; + // uniq_id uid ?? +} tuple_object; + +/// Object of a type that can be stored in the pool. +/// NB. a pool_object* can point outside the pool; contrast with pool_ptr. +typedef union pool_object +{ + /// any pool object has a type and a rest + struct + { + alignas (16) evaltype type; + pool_ptr rest; + opaque64 val; + }; + /// objects of statically known type + fix32_object fix32; + fix64_object fix64; + list_object list; + vector_object vector; +} pool_object; + +union object +{ + /// any object has a type + struct + { + alignas (16) evaltype type; + opaque32 _unknown0; + opaque64 _unknown1; + }; + /// objects of statically known type + /// use as_X() for checked downcast + pool_object pool; + fix32_object fix32; + fix64_object fix64; + list_object list; + vector_object vector; + tuple_object tuple; +}; + +/** +Initialization helpers. +*/ + +static inline fix64_object +new_fix64 (uint64_t n) +{ + return (fix64_object) + { + .type = EVALTYPE_FIX64,.rest = 0,.val = n,}; +} + +static inline list_object +new_list (pool_ptr head) +{ + return (list_object) + { + .type = EVALTYPE_LIST,.rest = 0,.head = head,}; +} + +static inline vector_object +new_vector (heap_ptr body, uint32_t length) +{ + return (vector_object) + { + .type = EVALTYPE_VECTOR,.rest = 0,.len = length,.body = body,}; +} + +static inline tuple_object +new_tuple (object * body, uint32_t length) +{ + return (tuple_object) + { + .type = EVALTYPE_TUPLE,.len = length,.body = body,}; +} + +static inline subr_object +new_subr (void (*fn) ()) +{ + return (subr_object) + { + .type = EVALTYPE_SUBR,.rest = 0,.fn = fn,}; +} + +/** +Common object operations. +*/ + +uint32_t list_length (const list_object * o); + +/** +Checked downcasts. +*/ + +static inline list_object * +as_list (object * o) +{ + assert (TYPEPRIM_EQ (o->type, EVALTYPE_LIST)); + return &o->list; +} + +static inline vector_object * +as_vector (object * o) +{ + assert (TYPEPRIM_EQ (o->type, EVALTYPE_VECTOR)); + return &o->vector; +} + +static inline pool_object * +as_pool (object * p) +{ + if (TYPEPRIM (p->type) & TYPEPRIM_NOPOOL_MASK) + return 0; + return (pool_object *) p; +} + +#endif // OBJECT_H diff --git a/src/print.c b/src/print.c new file mode 100644 index 0000000..64a6115 --- /dev/null +++ b/src/print.c @@ -0,0 +1,85 @@ +/* +Copyright (C) 2017 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#include "print.h" +#include "object.h" + +// TODO: "print" into buffer +#include + +#include + +static void +print_vector_body (const vector_object * o) +{ + const object *p = OBJECT_OF_HEAP_PTR (o->body); + if (!p) + return; + if (o->len) + print_object (&p[0]); + for (uint32_t i = 1; i < o->len; i++) + { + printf (" "); + print_object (&p[i]); + } +} + +static void +print_list_body (const list_object * o) +{ + const pool_object *p = POOL_OBJECT (o->head); + if (!p) + return; + print_object ((const object *) p); + while ((p = POOL_OBJECT (p->rest))) + { + printf (" "); + print_object ((const object *) p); + } +} + +void +print_object (const object * o) +{ + switch (o->type) + { + case EVALTYPE_FIX32: + printf ("%u", o->fix32.val); + break; + case EVALTYPE_FIX64: + printf ("%lu", o->fix64.val); + break; + case EVALTYPE_LIST: + printf ("("); + print_list_body (&o->list); + printf (")"); + break; + case EVALTYPE_FORM: + printf ("<"); + print_list_body (&o->list); + printf (">"); + break; + case EVALTYPE_VECTOR: + printf ("["); + print_vector_body (&o->vector); + printf ("]"); + break; + default: + assert (0 && "I don't know how to print that"); + } +} diff --git a/src/print.h b/src/print.h new file mode 100644 index 0000000..9241ee2 --- /dev/null +++ b/src/print.h @@ -0,0 +1,25 @@ +/* +Copyright (C) 2017 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#ifndef PRINT_H +#define PRINT_H + +typedef union object object; +void print_object (const object * o); + +#endif // PRINT_H diff --git a/src/read.c b/src/read.c new file mode 100644 index 0000000..57dd81f --- /dev/null +++ b/src/read.c @@ -0,0 +1,207 @@ +/* +Copyright (C) 2017 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#include "read.h" +#include "object.h" + +#include +#include + +/** +Tokenization +*/ + +// Return the number of whitespace characters at the beginning of the input. +static int +count_whitespace (const char *p) +{ + for (int i = 0;; i++) + { + switch (p[i]) + { + case 0: + case ' ': + case '\n': + case '\r': + case '\t': + case '\v': + case '\f': + break; + default: + return i; + } + } +} + +// Return the number of characters at the beginning of the input +// constituting a valid PNAME. +// If the input ends with '\', it is incomplete, and -1 is returned. +static int +count_pname (const char *p) +{ + // rule (1): valid FLOAT / FIX isn't a PNAME. + // Caller must try to read as number first! + + // rule (2): dot can't be first + if (p[0] == '.') + return 0; + + // rule (3): "if you can't type it interactively, it's only valid non-interactively" + // (not currently enforced; just don't give your atoms names like ^L) + + for (int i = 0;; i++) + { + switch (p[i]) + { + // separators end the PNAME + // rule (4): whitespace + // rule (5): special characters + // rule (6): brackets + case ' ': + case '\n': + case '\r': + case '\t': + case '\v': + case '\f': + case ',': + case '#': + case '\'': + case ';': + case '%': + case '(': + case ')': + case '[': + case ']': + case '<': + case '>': + case '{': + case '}': + case '"': + case 0: + return i; + // escape: next char becomes normal + case '\\': + if (!p[++i]) + return -1; + } + } +} + +/** +State machine +*/ + +/* +static uint32_t obj_get_fix32(const object *o) { + assert(o->type == EVALTYPE_FIX32); + return o->v.fix32; +} +*/ + +// stack[0..len]: objs in current list +// stack[len]: parent len +const char * +read_token (const char *p, reader_stack * st) +{ + p += count_whitespace (p); + switch (p[0]) + { + case '\0': + break; + case '(': + case '<': + case '[': + { + // opener: push current framelen; start new frame + // store child type and parent framelen in a pseudo-object together + evaltype type; + switch (p++[0]) + { + case '(': + type = EVALTYPE_LIST; + break; + case '<': + type = EVALTYPE_FORM; + break; + case '[': + type = EVALTYPE_VECTOR; + break; + default: + assert (0 && "martian opener token?"); + } + *--(st->pos) = (object) + { + .fix32.type = type,.fix32.rest = 0,.fix32.val = st->framelen,}; + st->framelen = 0; + break; + } + case ')': + case '>': + { + evaltype type; + switch (p++[0]) + { + case ')': + type = EVALTYPE_LIST; + break; + case '>': + type = EVALTYPE_FORM; + break; + default: + assert (0 && "martian list closer token?"); + } + // build list from current stack frame + pool_ptr o = + pool_copy_array_rev ((pool_object *) st->pos, st->framelen); + // pop frame, push new LIST + st->pos += st->framelen; + assert (st->pos->type == type); + st->framelen = st->pos->fix32.val + 1; + // overwrite the frame marker with the collection it became + st->pos->list = (list_object) + { + .type = type,.rest = 0,.head = o}; + break; + } + case ']': + { + p++; + // build vector from current stack frame + heap_ptr h = heap_copy_array_rev (st->pos, st->framelen); + // pop frame, push new VECTOR + uint32_t len = st->framelen; + st->pos += st->framelen; + assert (st->pos->type == EVALTYPE_VECTOR); + st->framelen = st->pos->fix32.val + 1; + st->pos->vector = new_vector (h, len); + break; + } + case '4': + { + p++; + // push fix obj, extending frame + (--(st->pos))->fix64 = new_fix64 (4); + st->framelen++; + break; + } + default: + fprintf (stderr, "read unimplemented for char: '%c'\n", *p); + assert (0 && "read unimplemented for char"); + } + return p; +} diff --git a/src/read.h b/src/read.h new file mode 100644 index 0000000..2d87835 --- /dev/null +++ b/src/read.h @@ -0,0 +1,35 @@ +/* +Copyright (C) 2017 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#ifndef READ_H +#define READ_H + +#include + +typedef union object object; +typedef struct +{ + object *pos; + uint32_t framelen; + uint32_t _pad; +} reader_stack; + +// TODO: hide this, export higher-level interface +const char *read_token (const char *p, reader_stack * st); + +#endif // READ_H -- 2.31.1