From 081a7c4eedd6e6aa9da616ed88c82ab85efdb98f Mon Sep 17 00:00:00 2001 From: Kaz Wesley Date: Fri, 2 Feb 2018 13:20:06 -0800 Subject: [PATCH] Implement OBLISTs Signed-off-by: Kaz Wesley --- src/Makefile.am | 4 +- src/alloc.c | 15 +++++++ src/alloc.h | 5 ++- src/atom.c | 37 ++++++++++++++++ src/atom.h | 23 ++++++---- src/eval.c | 1 + src/hash.h | 47 +++++++++++++++++++++ src/main.c | 10 +++++ src/object.c | 7 ++++ src/object.h | 66 +++++++++++++++++++++-------- src/oblist.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ src/oblist.h | 40 ++++++++++++++++++ src/print.c | 36 ++++++++++++++++ src/read.c | 3 +- src/test_oblists | 9 ++++ 15 files changed, 382 insertions(+), 28 deletions(-) create mode 100644 src/hash.h create mode 100644 src/oblist.c create mode 100644 src/oblist.h create mode 100755 src/test_oblists diff --git a/src/Makefile.am b/src/Makefile.am index 922b9b6..68f749c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,3 +1,5 @@ bin_PROGRAMS = muddle -muddle_SOURCES = main.c read.c eval.c print.c alloc.c object.c atom.c +muddle_SOURCES = main.c read.c eval.c print.c alloc.c object.c atom.c oblist.c muddle_CFLAGS = -Wall -Wno-unused-function -Werror=implicit-function-declaration -Werror=incompatible-pointer-types + +TESTS = test_oblists diff --git a/src/alloc.c b/src/alloc.c index ad55630..356a038 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -17,6 +17,7 @@ License along with this file. If not, see */ #include "alloc.h" +#include "atom.h" #include "object.h" extern pool_object *pool; @@ -86,3 +87,17 @@ heap_copy_array_rev (const object * objs, uint32_t len) } return p; } + +uv_val * +UV_VAL (heap_ptr p) +{ + assert (p > 0); + return (uv_val *) & vhp_base[p]; +} + +atom_body * +ATOM_BODY (heap_ptr p) +{ + assert (p); + return (atom_body *) (&vhp_base[p]); +} diff --git a/src/alloc.h b/src/alloc.h index 58ee808..bd7c46b 100644 --- a/src/alloc.h +++ b/src/alloc.h @@ -28,16 +28,19 @@ typedef int32_t heap_ptr; typedef union pool_object pool_object; typedef union object object; +typedef union uv_val uv_val; pool_object *POOL_OBJECT (pool_ptr p); object *HEAP_OBJECT (heap_ptr p); +uv_val *UV_VAL (heap_ptr p); pool_ptr pool_alloc (uint32_t len); heap_ptr heap_alloc (uint32_t len); inline static heap_ptr heap_alloc_uv (uint32_t len) { - return heap_alloc ((len + 1) >> 1); + // divide by 2 (rounding up), then add one for dope + return heap_alloc (((len + 1) >> 1) + 1); } // given a headerless array of objects of known size, diff --git a/src/atom.c b/src/atom.c index 38cadc0..5015455 100644 --- a/src/atom.c +++ b/src/atom.c @@ -18,3 +18,40 @@ License along with this file. If not, see #include "alloc.h" #include "atom.h" + +#include + +typedef struct atom_body +{ + evaltype type; // UNBOUND/LOCI + // bindid + // value ptr + // oblist ptr + // type ptr + char pname[]; +} atom_body; + +atom_object +atom_create (const char *name, uint32_t namelen) +{ + // C-compatible strings for simplicity + namelen += 1; + heap_ptr body = atom_body_alloc (namelen); + atom_body *content = (atom_body *) HEAP_OBJECT (body); + memcpy (&content->pname, name, namelen - 1); + content->pname[namelen - 1] = '\0'; + atom_object new = new_atom (body, namelen); + return new; +} + +heap_ptr +atom_body_alloc (uint32_t namelen) +{ + return heap_alloc_uv ((sizeof (atom_body) + namelen + 63) / 64); +} + +const char * +atom_pname (atom_object o) +{ + return ATOM_BODY (o.val.body)->pname; +} diff --git a/src/atom.h b/src/atom.h index 5b71ad3..787fece 100644 --- a/src/atom.h +++ b/src/atom.h @@ -21,14 +21,21 @@ License along with this file. If not, see #include "object.h" -typedef struct +typedef struct atom_body atom_body; + +// Return an atom with a newly-allocated body. +atom_object atom_create (const char *name, uint32_t namelen); + +atom_body *ATOM_BODY (heap_ptr p); + +const char *atom_pname (atom_object o); + +inline static uint32_t +atom_namelen (atom_object body) { - evaltype type; // UNBOUND/LOCI - // bindid - // value ptr - // oblist ptr - // type ptr - const char pname[]; -} atom_body; + return body.val.namelen; +} + +heap_ptr atom_body_alloc (uint32_t ct); #endif diff --git a/src/eval.c b/src/eval.c index f322946..7d28057 100644 --- a/src/eval.c +++ b/src/eval.c @@ -141,6 +141,7 @@ eval () { case EVALTYPE_FIX32: case EVALTYPE_FIX64: + case EVALTYPE_ATOM: RETURN (cf->args.body[0]); case EVALTYPE_LIST: // Handle `head` now; then iterate on `.rest`. diff --git a/src/hash.h b/src/hash.h new file mode 100644 index 0000000..478d33c --- /dev/null +++ b/src/hash.h @@ -0,0 +1,47 @@ +/* +Copyright (C) 2018 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#ifndef HASH_H +#define HASH_H + +#include + +// Very fast, non collision-resistant hash + +inline static uint32_t +fnv_32a_init () +{ + return 0x811c9dc5; +} + +inline static uint32_t +fnv_32a_extend (const void *buf, size_t len, uint32_t hval) +{ + unsigned char *bp = (unsigned char *) buf; + unsigned char *be = bp + len; + while (bp < be) + { + hval ^= (uint32_t) * bp++; + /* multiply by the 32 bit FNV magic prime mod 2^32 */ + hval += + (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24); + } + return hval; +} + +#endif // HASH_H diff --git a/src/main.c b/src/main.c index 6645f6e..c9876ce 100644 --- a/src/main.c +++ b/src/main.c @@ -20,6 +20,7 @@ License along with this file. If not, see #include "eval.h" #include "print.h" #include "object.h" +#include "oblist.h" #include #include @@ -31,6 +32,9 @@ pool_ptr ptop; object *vhp_base; heap_ptr vhp; +// oblists (move to ASOCs once implemented) +uvector_object root; + // TODO: store these in current PROCESS frame *cf; object ret; @@ -86,6 +90,7 @@ main () // mock GC (no object persistence) ptop = 1; vhp = 1; + root = oblist_create (13); // terminate input assert (buf[n - 1] == '\n'); buf[n - 1] = '\0'; @@ -112,6 +117,11 @@ main () // Print the thing print_object (&ret); printf ("\n"); + /* + // debugging oblists... + print_object ((object*) &root); + printf ("\n"); + */ // Loop! } diff --git a/src/object.c b/src/object.c index ff65f41..e64e68e 100644 --- a/src/object.c +++ b/src/object.c @@ -64,3 +64,10 @@ static object rest(const object *lst) { return o; } */ + +dope_object * +uv_dope (const uvector_object * o) +{ + return (dope_object *) & HEAP_OBJECT (o->val.body)[(o->val.len + 1) / 2 + + 1]; +} diff --git a/src/object.h b/src/object.h index ec322b5..8ebd807 100644 --- a/src/object.h +++ b/src/object.h @@ -192,7 +192,7 @@ typedef struct typedef struct { - alignas (8) uint32_t _pad; + alignas (8) uint32_t namelen; heap_ptr body; } atom_val; typedef struct @@ -221,6 +221,18 @@ typedef struct uint32_t gc; } dope_object; +/// Value half of a poolable object, for storage in a uvector. +typedef union uv_val +{ + fix32_val fix32; + fix64_val fix64; + list_val list; + vector_val vector; + uvector_val uvector; + subr_val subr; + atom_val atom; +} uv_val; + /// Object of a type that can be stored in the pool. /// NB. a pool_object* can point outside the pool; contrast with pool_ptr. typedef union pool_object @@ -231,7 +243,7 @@ typedef union pool_object // NB. never take the address of these type-punned fields! alignas (16) evaltype type; pool_ptr rest; - opaque64 val; + uv_val val; }; /// objects of statically known type fix32_object fix32; @@ -242,18 +254,6 @@ typedef union pool_object atom_object atom; } pool_object; -/// Value half of a poolable object, for storage in a uvector. -typedef union -{ - fix32_val fix32; - fix64_val fix64; - list_val list; - vector_val vector; - uvector_val uvector; - subr_val subr; - atom_val atom; -} uv_val; - union object { /// any object has a type @@ -313,6 +313,7 @@ new_list (pool_ptr head) ,}; } +// TODO: take a dope_object like uvector static inline vector_object new_vector (heap_ptr body, uint32_t length) { @@ -329,7 +330,7 @@ new_uvector (heap_ptr body, uint32_t length) { return (uvector_object) { - .type = EVALTYPE_VECTOR,.rest = 0,.val = (uvector_val) + .type = EVALTYPE_UVECTOR,.rest = 0,.val = (uvector_val) { .len = length,.body = body} }; @@ -355,22 +356,46 @@ new_subr (void (*fn) ()) } static inline atom_object -new_atom (pool_ptr body) +new_atom (pool_ptr body, uint32_t namelen) { return (atom_object) { .type = EVALTYPE_ATOM,.rest = 0,.val = (atom_val) { - .body = body} + .body = body,.namelen = namelen} }; } +static inline dope_object +new_dope (uint32_t len, evaltype type) +{ + return (dope_object) + { + .type = type,.grow = 0,.len = len,.gc = 0}; +} + /** Common object operations. */ uint32_t list_length (const list_object * o); +dope_object *uv_dope (const uvector_object * o); + +static inline evaltype +utype (const uvector_object * o) +{ + return uv_dope (o)->type; +} + +// Change the EVALTYPE of an object. New type must have same PRIMTYPE. +static inline void +chtype (object * o, evaltype type) +{ + assert (TYPEPRIM_EQ (o->type, type)); + o->type = type; +} + /** Checked downcasts. */ @@ -403,4 +428,11 @@ as_pool (object * p) return (pool_object *) p; } +static inline atom_object * +as_atom (object * o) +{ + assert (TYPEPRIM_EQ (o->type, EVALTYPE_ATOM)); + return &o->atom; +} + #endif // OBJECT_H diff --git a/src/oblist.c b/src/oblist.c new file mode 100644 index 0000000..895920a --- /dev/null +++ b/src/oblist.c @@ -0,0 +1,107 @@ +/* +Copyright (C) 2017-2018 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#include "alloc.h" +#include "atom.h" +#include "hash.h" +#include "object.h" +#include "oblist.h" + +#include +#include + +uvector_object +oblist_create (uint32_t buckct) +{ + heap_ptr body = heap_alloc_uv (buckct); + memset (UV_VAL (body), '\0', buckct * sizeof (uv_val)); + uvector_object oblist = new_uvector (body, buckct); + *uv_dope (&oblist) = new_dope (buckct, EVALTYPE_LIST); + chtype ((object *) & oblist, EVALTYPE_OBLIST); + return oblist; +} + +// TODO: define for other numeric types, move to general utility library +inline static uint32_t +u32_min (uint32_t x, uint32_t y) +{ + return (x < y) ? x : y; +} + +#define MIN(x, y) \ + _Generic((x+y), \ + uint32_t: u32_min(x, y) \ + ) + +// debugging: +#include +atom_object +oblist_find_or_insert (uvector_object oblist, const char *name, + uint32_t namelen) +{ + assert (oblist.val.len); + uint32_t hash = fnv_32a_extend (name, namelen, fnv_32a_init ()); + uint32_t nbucket = hash % oblist.val.len; + list_val *bucket = &UV_VAL (oblist.val.body)[nbucket].list; + // linear search in the bucket + pool_ptr *prev = &bucket->head; + pool_ptr tail = 0; + while (*prev) + { + atom_object *other = &POOL_OBJECT (*prev)->atom; + int cmp = + strncmp (name, atom_pname (*other), + MIN (namelen, atom_namelen (*other))); + if (cmp < 0) + { + tail = *prev; + break; + } + else if (!cmp) + return *other; + prev = &POOL_OBJECT (*prev)->rest; + } + // Either: + // - reached end of list without finding; tail==0 + // - found where it should be in the list; tail is the rest (if any) + *prev = pool_alloc (1); + atom_object new = atom_create (name, namelen); + POOL_OBJECT (*prev)->atom = new; + POOL_OBJECT (*prev)->atom.rest = tail; + return new; +} + +/* +object +oblist_lookup (uvector_object oblist, string_object name) +{ +} +*/ + +#if 0 +atom_object +intern_atom (const char *p, int len) +{ + // TODO: look up in existing oblists first + + uint32_t lenobjs = (len + sizeof (object) - 1) / sizeof (object); + //heap_ptr body = heap_alloc (sizeof (atom_body) + lenobjs); + heap_ptr body = atom_body_alloc (1); + return new_atom (0); +} +#endif diff --git a/src/oblist.h b/src/oblist.h new file mode 100644 index 0000000..590a38d --- /dev/null +++ b/src/oblist.h @@ -0,0 +1,40 @@ +/* +Copyright (C) 2017-2018 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#ifndef OBLIST_H +#define OBLIST_H + +#include "object.h" + +// UVECTOR of LISTs of ATOMs +// each LIST is a hash bucket +// each LIST is sorted by PNAME +// each LIST is headed by a LOSE sentry object + +// for now, 1 global oblist +extern uvector_object root; + +uvector_object oblist_create (uint32_t buckct); + +atom_object oblist_find_or_insert (uvector_object oblist, const char *name, + uint32_t namelen); + +// returns a ATOM object or #FALSE () +//object oblist_lookup (uvector_object oblist, const char * name, uint32_t namelen); + +#endif diff --git a/src/print.c b/src/print.c index 3a5866a..02456bc 100644 --- a/src/print.c +++ b/src/print.c @@ -16,6 +16,7 @@ License along with this file. If not, see . */ +#include "atom.h" #include "print.h" #include "object.h" @@ -39,6 +40,28 @@ print_vector_body (const vector_object * o) } } +static void +print_uvector_body (const uvector_object * o) +{ + const uv_val *p = UV_VAL (o->val.body); + if (!p) + return; + pool_object x; + x.type = utype (o); + x.rest = 0; + if (o->val.len) + { + x.val = p[0]; + print_object ((object *) & x); + } + for (uint32_t i = 1; i < o->val.len; i++) + { + printf (" "); + x.val = p[i]; + print_object ((object *) & x); + } +} + static void print_list_body (const list_object * o) { @@ -79,7 +102,20 @@ print_object (const object * o) print_vector_body (&o->vector); printf ("]"); break; + case EVALTYPE_OBLIST: + // for now, handle non-primtype print as special case + printf ("#OBLIST "); + // FALLTHROUGH + case EVALTYPE_UVECTOR: + printf ("!["); + print_uvector_body (&o->uvector); + printf ("!]"); + break; + case EVALTYPE_ATOM: + printf ("%s", atom_pname (o->atom)); + break; default: + fprintf (stderr, "Tried to print the unprintable: 0x%x\n", o->type); assert (0 && "I don't know how to print that"); } } diff --git a/src/read.c b/src/read.c index b8570f3..0f6a63d 100644 --- a/src/read.c +++ b/src/read.c @@ -18,6 +18,7 @@ License along with this file. If not, see #include "read.h" #include "object.h" +#include "oblist.h" #include #include @@ -262,7 +263,7 @@ read_token (const char *p, reader_stack * st) n = count_pname (p); if (n > 0) { - (--(st->pos))->atom = new_atom (0); + (--(st->pos))->atom = oblist_find_or_insert (root, p, n); st->framelen++; return p + n; } diff --git a/src/test_oblists b/src/test_oblists new file mode 100755 index 0000000..7ce5674 --- /dev/null +++ b/src/test_oblists @@ -0,0 +1,9 @@ +#!/bin/sh + +assert_eq () { + [ "$1" = "$2" ] || ( echo Assertion failed: "\"$1\"" = "\"$2\""; exit 1 ) +} + +assert_eq foo "$(echo foo | ./muddle)" +assert_eq '(foo bar baz)' "$(echo '(foo bar baz)' | ./muddle)" +assert_eq '(foo foo foo)' "$(echo '(foo foo foo)' | ./muddle)" -- 2.31.1