src/read.c

   1 /*
   2 Copyright (C) 2017 Keziah Wesley
   3
   4 You can redistribute and/or modify this file under the terms of the
   5 GNU Affero General Public License as published by the Free Software
   6 Foundation, either version 3 of the License, or (at your option) any
   7 later version.
   8
   9 This file is distributed in the hope that it will be useful, but
  10 WITHOUT ANY WARRANTY; without even the implied warranty of
  11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12 Affero General Public License for more details.
  13
  14 You should have received a copy of the GNU Affero General Public
  15 License along with this file. If not, see
  16 <http://www.gnu.org/licenses/>.
  17 */
  18
  19 #include "read.h"
  20 #include "object.h"
  21
  22 #include <assert.h>
  23 #include <stdio.h>
  24
  25 /**
  26 Tokenization
  27 */
  28
  29 // Return the number of whitespace characters at the beginning of the input.
  30 static int
  31 count_whitespace (const char *p)
  32 {
  33   for (int i = 0;; i++)
  34     {
  35       switch (p[i])
  36         {
  37         case 0:
  38         case ' ':
  39         case '\n':
  40         case '\r':
  41         case '\t':
  42         case '\v':
  43         case '\f':
  44           break;
  45         default:
  46           return i;
  47         }
  48     }
  49 }
  50
  51 // Return the number of characters at the beginning of the input
  52 // constituting a valid PNAME.
  53 // If the input ends with '\', it is incomplete, and -1 is returned.
  54 static int
  55 count_pname (const char *p)
  56 {
  57   // rule (1): valid FLOAT / FIX isn't a PNAME.
  58   // Caller must try to read as number first!
  59
  60   // rule (2): dot can't be first
  61   if (p[0] == '.')
  62     return 0;
  63
  64   // rule (3): "if you can't type it interactively, it's only valid non-interactively"
  65   // (not currently enforced; just don't give your atoms names like ^L)
  66
  67   for (int i = 0;; i++)
  68     {
  69       switch (p[i])
  70         {
  71           // separators end the PNAME
  72           // rule (4): whitespace
  73           // rule (5): special characters
  74           // rule (6): brackets
  75         case ' ':
  76         case '\n':
  77         case '\r':
  78         case '\t':
  79         case '\v':
  80         case '\f':
  81         case ',':
  82         case '#':
  83         case '\'':
  84         case ';':
  85         case '%':
  86         case '(':
  87         case ')':
  88         case '[':
  89         case ']':
  90         case '<':
  91         case '>':
  92         case '{':
  93         case '}':
  94         case '"':
  95         case 0:
  96           return i;
  97           // escape: next char becomes normal
  98         case '\\':
  99           if (!p[++i])
 100             return -1;
 101         }
 102     }
 103 }
 104
 105 /**
 106 State machine
 107 */
 108
 109 /*
 110 static uint32_t obj_get_fix32(const object *o) {
 111     assert(o->type == EVALTYPE_FIX32);
 112     return o->v.fix32;
 113 }
 114 */
 115
 116 static int
 117 read_num (const char *p, reader_stack *st)
 118 {
 119   int i = 0;
 120   // Use an unsigned intermediate to simplify overflow checks.
 121   uint64_t x = 0;
 122   // Disallow "number" composed of ancillary number components only.
 123   bool gotdigits = false;
 124   // Skip for now, later we'll check again to potentially negate.
 125   if (p[0] == '-')
 126     i++;
 127   // TODO: asterisk-deliminated *octal*
 128   // TODO: other bases?
 129   for (; p[i]; i++)
 130     {
 131       if (p[i] >= '0' && p[i] <= '9')
 132         {
 133           if (x * 10 + (p[i] - '0') < x)
 134             goto read_float;
 135           x = x * 10 + (p[i] - '0');
 136           gotdigits = true;
 137           continue;
 138         }
 139
 140       // TODO: decimal points, exponent notation
 141       // NB. a terminal decimal denotes a FIX
 142       // NB. exponent notation doesn't necessarily indicate a float
 143
 144       // TODO: distinguish 'delimiter' characters that terminate
 145       // number from 'identifier' chars that cause parsing to fail
 146       // (and potentially be parsed as an atom)
 147       break;
 148     }
 149   if (!gotdigits)
 150     return 0;
 151   if (p[0] != '-')
 152     {
 153       if (x <= INT32_MAX)
 154         (--(st->pos))->fix32 = new_fix32 ((int32_t)x);
 155       else if (x <= INT64_MAX)
 156         (--(st->pos))->fix64 = new_fix64 (x);
 157       else
 158         goto read_float;
 159     }
 160   else
 161     {
 162       if (-x >= (uint64_t)INT32_MIN)
 163         (--(st->pos))->fix32 = new_fix32 (0 - (int32_t)x);
 164       else if (-x >= (uint64_t)INT64_MIN)
 165         (--(st->pos))->fix64 = new_fix64 (0 - (int64_t)x);
 166       else
 167         goto read_float;
 168     }
 169   st->framelen++;
 170   return i;
 171  read_float:
 172   assert(0 && "unimplemented: promote num to float");
 173   return i;
 174 }
 175
 176 // stack[0..len]: objs in current list
 177 // stack[len]: parent len
 178 const char *
 179 read_token (const char *p, reader_stack *st)
 180 {
 181   p += count_whitespace (p);
 182   switch (p[0])
 183     {
 184     case '\0':
 185       break;
 186     case '(':
 187     case '<':
 188     case '[':
 189       {
 190         // opener: push current framelen; start new frame
 191         // store child type and parent framelen in a pseudo-object together
 192         evaltype type;
 193         switch (p++[0])
 194           {
 195           case '(':
 196             type = EVALTYPE_LIST;
 197             break;
 198           case '<':
 199             type = EVALTYPE_FORM;
 200             break;
 201           case '[':
 202             type = EVALTYPE_VECTOR;
 203             break;
 204           default:
 205             assert (0 && "martian opener token?");
 206           }
 207         *--(st->pos) = (object)
 208         {
 209         .fix32.type = type,.fix32.rest = 0,.fix32.val = st->framelen,};
 210         st->framelen = 0;
 211         break;
 212       }
 213     case ')':
 214     case '>':
 215       {
 216         evaltype type;
 217         switch (p++[0])
 218           {
 219           case ')':
 220             type = EVALTYPE_LIST;
 221             break;
 222           case '>':
 223             type = EVALTYPE_FORM;
 224             break;
 225           default:
 226             assert (0 && "martian list closer token?");
 227           }
 228         // build list from current stack frame
 229         pool_ptr o =
 230           pool_copy_array_rev ((pool_object *) st->pos, st->framelen);
 231         // pop frame, push new LIST
 232         st->pos += st->framelen;
 233         assert (st->pos->type == type);
 234         st->framelen = st->pos->fix32.val + 1;
 235         // overwrite the frame marker with the collection it became
 236         st->pos->list = (list_object)
 237         {
 238         .type = type,.rest = 0,.head = o};
 239         break;
 240       }
 241     case ']':
 242       {
 243         p++;
 244         // build vector from current stack frame
 245         heap_ptr h = heap_copy_array_rev (st->pos, st->framelen);
 246         // pop frame, push new VECTOR
 247         uint32_t len = st->framelen;
 248         st->pos += st->framelen;
 249         assert (st->pos->type == EVALTYPE_VECTOR);
 250         st->framelen = st->pos->fix32.val + 1;
 251         st->pos->vector = new_vector (h, len);
 252         break;
 253       }
 254     default:
 255       {
 256         int n = read_num (p, st);
 257         if (n)
 258           return p + n;
 259
 260         // TODO: try read pname
 261
 262         fprintf (stderr, "read unimplemented for char: '%c'\n", *p);
 263         assert (0 && "read unimplemented for char");
 264       }
 265     }
 266   return p;
 267 }