X-Git-Url: https://jxself.org/git/?a=blobdiff_plain;f=src%2Fread.c;fp=src%2Fread.c;h=57dd81f511d8f3538e73ee7efed7bd1bd7945029;hb=8d55156675587a770c5654362bcbd3d2a98e4aa9;hp=0000000000000000000000000000000000000000;hpb=d6e1960007fcac962144382332f7a9c8c56fa3a1;p=muddle-interpreter.git diff --git a/src/read.c b/src/read.c new file mode 100644 index 0000000..57dd81f --- /dev/null +++ b/src/read.c @@ -0,0 +1,207 @@ +/* +Copyright (C) 2017 Keziah Wesley + +You can redistribute and/or modify this file under the terms of the +GNU Affero General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with this file. If not, see +. +*/ + +#include "read.h" +#include "object.h" + +#include +#include + +/** +Tokenization +*/ + +// Return the number of whitespace characters at the beginning of the input. +static int +count_whitespace (const char *p) +{ + for (int i = 0;; i++) + { + switch (p[i]) + { + case 0: + case ' ': + case '\n': + case '\r': + case '\t': + case '\v': + case '\f': + break; + default: + return i; + } + } +} + +// Return the number of characters at the beginning of the input +// constituting a valid PNAME. +// If the input ends with '\', it is incomplete, and -1 is returned. +static int +count_pname (const char *p) +{ + // rule (1): valid FLOAT / FIX isn't a PNAME. + // Caller must try to read as number first! + + // rule (2): dot can't be first + if (p[0] == '.') + return 0; + + // rule (3): "if you can't type it interactively, it's only valid non-interactively" + // (not currently enforced; just don't give your atoms names like ^L) + + for (int i = 0;; i++) + { + switch (p[i]) + { + // separators end the PNAME + // rule (4): whitespace + // rule (5): special characters + // rule (6): brackets + case ' ': + case '\n': + case '\r': + case '\t': + case '\v': + case '\f': + case ',': + case '#': + case '\'': + case ';': + case '%': + case '(': + case ')': + case '[': + case ']': + case '<': + case '>': + case '{': + case '}': + case '"': + case 0: + return i; + // escape: next char becomes normal + case '\\': + if (!p[++i]) + return -1; + } + } +} + +/** +State machine +*/ + +/* +static uint32_t obj_get_fix32(const object *o) { + assert(o->type == EVALTYPE_FIX32); + return o->v.fix32; +} +*/ + +// stack[0..len]: objs in current list +// stack[len]: parent len +const char * +read_token (const char *p, reader_stack * st) +{ + p += count_whitespace (p); + switch (p[0]) + { + case '\0': + break; + case '(': + case '<': + case '[': + { + // opener: push current framelen; start new frame + // store child type and parent framelen in a pseudo-object together + evaltype type; + switch (p++[0]) + { + case '(': + type = EVALTYPE_LIST; + break; + case '<': + type = EVALTYPE_FORM; + break; + case '[': + type = EVALTYPE_VECTOR; + break; + default: + assert (0 && "martian opener token?"); + } + *--(st->pos) = (object) + { + .fix32.type = type,.fix32.rest = 0,.fix32.val = st->framelen,}; + st->framelen = 0; + break; + } + case ')': + case '>': + { + evaltype type; + switch (p++[0]) + { + case ')': + type = EVALTYPE_LIST; + break; + case '>': + type = EVALTYPE_FORM; + break; + default: + assert (0 && "martian list closer token?"); + } + // build list from current stack frame + pool_ptr o = + pool_copy_array_rev ((pool_object *) st->pos, st->framelen); + // pop frame, push new LIST + st->pos += st->framelen; + assert (st->pos->type == type); + st->framelen = st->pos->fix32.val + 1; + // overwrite the frame marker with the collection it became + st->pos->list = (list_object) + { + .type = type,.rest = 0,.head = o}; + break; + } + case ']': + { + p++; + // build vector from current stack frame + heap_ptr h = heap_copy_array_rev (st->pos, st->framelen); + // pop frame, push new VECTOR + uint32_t len = st->framelen; + st->pos += st->framelen; + assert (st->pos->type == EVALTYPE_VECTOR); + st->framelen = st->pos->fix32.val + 1; + st->pos->vector = new_vector (h, len); + break; + } + case '4': + { + p++; + // push fix obj, extending frame + (--(st->pos))->fix64 = new_fix64 (4); + st->framelen++; + break; + } + default: + fprintf (stderr, "read unimplemented for char: '%c'\n", *p); + assert (0 && "read unimplemented for char"); + } + return p; +}