+;"Copyright (C) 2018 Keziah Wesley
+
+You can redistribute and/or modify this file under the terms of the
+GNU Affero General Public License as published by the Free Software
+Foundation, either version 3 of the License, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with this file. If not, see
+<http://www.gnu.org/licenses/>."
+
+<FLOAD "chartable.mud">
+
+<MOBLIST PARSER>
+
+parse!-
+PARSE-TABLE!-
+
+DEFAULT-TABLE!-PARSER
+
+read-all!-PARSER
+
+%<AND <BLOCK (<MOBLIST pp> <GET PARSER OBLIST> <GET CHARTABLE OBLIST> <ROOT>)> <>>
+
+;"Operation:
+ Input string is provided to parser. Parser consumes from input and updates state.
+ Beginning and end of input will be treated as token boundaries.
+ Parser will return producing a value after reading 1 complete expression
+ (possibly leaving some input leftover)."
+
+;"If PARSE receives a partial input, it emits an ERROR. READ installs a
+ handler for the ERROR INTERRUPT that attempts to get more data and
+ ERRET."
+
+<DEFINE CHARSTREAM (s) [.s 1 1]>
+<DEFINE COPY-CHARSTREAM (s) <MAPF ,VECTOR ,AND? .s>>
+<DEFINE SET-CHARSTREAM (x y) <PUT .x 1 <1 .y>> <PUT .x 2 <2 .y>> <PUT .x 3 <3 .y>>>
+<DEFINE NEWLINE-POS (s) <PUT .s 2 <+ 1 <2 .s>>> <PUT .s 3 0>>
+<DEFINE GET-POS (x) (<2 .x> <3 .x>)>
+<DEFINE EAT-CHAR (s) <PUT .s 1 <REST <1 .s>>> <PUT .s 3 <+ 1 <3 .s>>>>
+<DEFINE PEEK-CHAR (s) <ASCII <1 <1 .s>>>>
+<DEFINE NEXT-CHAR (s "AUX" (c <PEEK-CHAR .s>))
+ <EAT-CHAR .s>
+ <COND (<==? 33 .c> <SET c <bang <PEEK-CHAR .s>>> <EAT-CHAR .s>)>
+ .c>
+;"TODO: eliminate this and rely on peeking"
+<DEFINE RETURN-CHAR (s "AUX")
+ <COND (<==? %<ASCII 10> <1 <BACK <1 .s>>>> <PUT .s 2 <- <2 .s> 1>>)>
+ <PUT .s 1 <BACK <1 .s>>>>
+
+<DEFINE parse (t s "AUX" c sp "NAME" a)
+ <COND (<EMPTY? <1 .s>> <RETURN EOF .a>)>
+ <SET c <NEXT-CHAR .s>>
+ <SET sp <SPECIAL? .t .c>>
+ <COND (.sp <APPLY .sp .t .s .a>)
+ (ELSE <APPLY <DEFAULT-HANDLER .t> .t .c .s .a>)>>
+
+;"BLOCK is documented as modifying the value of .OBLIST, so I'm
+ assuming passing a lookup param to READ has a similar effect rather
+ than setting an internal variable."
+
+<DEFINE read (s "OPTIONAL" (OBLIST .OBLIST))
+ <parse ,default <CHARSTREAM .s>>>
+
+<DEFINE read-all (s "OPTIONAL" (OBLIST .OBLIST))
+ <SET s <CHARSTREAM .s>>
+ <MAPF ,LIST <FUNCTION ("AUX" res)
+ <COND (<EMPTY? <1 .s>> <MAPSTOP>)
+ (ELSE <SET res <parse ,default .s>>
+ <COND (<==? .res EOF> <MAPSTOP>) (ELSE .res)>)>>>>
+
+;"Don't try to READ in objects of these types."
+<NEWTYPE closer FIX>
+<NEWTYPE prototemplate VECTOR>
+;"TODO: set appropriate ERROR as closer's EVALTYPE."
+
+<DEFINE bang (c) <+ .c #FIX %<LSH 1 21>>>
+<DEFINE banged? (c) <G=? .c #FIX %<LSH 1 21>>>
+<DEFINE unbang (c) <- .c #FIX %<LSH 1 21>>>
+<DEFINE explode-bang (c)
+ <COND (<banged? .c> <STRING %<1 "!"> <CHTYPE .c CHARACTER>>) (ELSE <CHTYPE .c CHARACTER>)>>
+
+;"Create the appropriate ERROR if there's a brace mismatch."
+;"Otherwise do nothing. Return value is ignored."
+<DEFINE check-closer (pos c opener s "TUPLE" closers)
+ <COND
+ (<TYPE? .c closer>
+ <COND
+ (<MEMQ <CHTYPE .c FIX> .closers>)
+ ;"TODO: position info"
+ (ELSE <ERROR BRACE-MISMATCH (POS .pos
+ OPENER <explode-bang .opener>
+ EXPECTED <MAPF ,LIST ,explode-bang .closers>
+ FOUND <explode-bang <CHTYPE .c FIX>>)>)>)
+ (<==? .c EOF> <ERROR UNCLOSED-BRACKET <explode-bang .opener> .pos>)>>
+
+;"TODO: accept radix in PARSE"
+<SETG radix 10>
+
+;"Create a standard Muddle CHARTABLE."
+<DEFINE make-default ("AUX" (t <CHARTABLE>))
+ <BULK-UPDATE .t
+ <SET-SPECIAL .t 9 T <FUNCTION (t s a) <AGAIN .a>>> ;"HT"
+ <SET-SPECIAL .t 10 T <FUNCTION (t s a) <NEWLINE-POS .s> <AGAIN .a>>> ;"LF"
+ <SET-SPECIAL .t 11 T <FUNCTION (t s a) <AGAIN .a>>> ;"VT"
+ <SET-SPECIAL .t 12 T <FUNCTION (t s a) <AGAIN .a>>> ;"FF"
+ <SET-SPECIAL .t 13 T <FUNCTION (t s a) <AGAIN .a>>> ;"CR"
+ <SET-SPECIAL .t 32 T <FUNCTION (t s a) <AGAIN .a>>> ;"space"
+ <SET-SPECIAL .t <ASCII !\.> <> <FUNCTION (t s a) <FORM LVAL <parse .t .s>>>>
+ <SET-SPECIAL .t <ASCII !\,> T <FUNCTION (t s a) <FORM GVAL <parse .t .s>>>>
+ <SET-SPECIAL .t <bang <ASCII !\.>> <> <FUNCTION (t s a) <CHTYPE <FORM LVAL <parse .t .s>> SEGMENT>>>
+ <SET-SPECIAL .t <bang <ASCII !\,>> T <FUNCTION (t s a) <CHTYPE <FORM GVAL <parse .t .s>> SEGMENT>>>
+ <SET-SPECIAL .t <ASCII !\'> T <FUNCTION (t s a) <FORM QUOTE <parse .t .s>>>>
+ <SET-SPECIAL .t <ASCII !\;> T <FUNCTION (t s a) <parse .t .s> <AGAIN .a>>>
+ <SET-SPECIAL .t <ASCII !\%> T <FUNCTION (t s a)
+ <COND (<==? %<ASCII !\%> <PEEK-CHAR .s>> <EAT-CHAR .s> <EVAL <parse .t .s>> <AGAIN .a>)>
+ <EVAL <parse .t .s>>>>
+ <SET-SPECIAL .t <ASCII !\#> T <FUNCTION (t s a "AUX" (type <parse .t .s>)) <CHTYPE <parse .t .s> .type>>>
+ <SET-SPECIAL .t <bang <ASCII !\\>> T <FUNCTION (t s a) <NEXT-CHAR .s>>>
+ <SET-SPECIAL .t <ASCII !\*> <> <FUNCTION (t s a) <parse-octal .t .s .a>>>
+
+ <SET-SPECIAL .t <ASCII !\"> T <FUNCTION (t s a)
+ <MAPF ,STRING <FUNCTION ("AUX" (c <NEXT-CHAR .s>))
+ <COND (<==? .c %<ASCII !\\>> <MAPRET <CHTYPE <NEXT-CHAR .s> CHARACTER>>)
+ (<banged? .c> <MAPRET %<1 "!"> <CHTYPE <unbang .c> CHARACTER>>)
+ (<==? .c %<ASCII !\">> <MAPSTOP>)
+ (<==? .c 10> <NEWLINE-POS .s> <MAPRET <CHTYPE .c CHARACTER>>)
+ (ELSE <MAPRET <CHTYPE .c CHARACTER>>)>>>>>
+
+ ;"TODO: macros for reducing the redundancy of all these openers and closers."
+
+ ;"A closer is an illegal object for anything except the corresponding opener."
+ <SET-SPECIAL .t <ASCII !\>> T <FUNCTION (t s a) <CHTYPE <ASCII !\>> closer>>>
+ <SET-SPECIAL .t <ASCII !\)> T <FUNCTION (t s a) <CHTYPE <ASCII !\)> closer>>>
+ <SET-SPECIAL .t <ASCII !\]> T <FUNCTION (t s a) <CHTYPE <ASCII !\]> closer>>>
+ <SET-SPECIAL .t <ASCII !\}> T <FUNCTION (t s a) <CHTYPE <ASCII !\}> closer>>>
+ <SET-SPECIAL .t <bang <ASCII !\>>> T <FUNCTION (t s a) <CHTYPE <bang <ASCII !\>>> closer>>>
+ <SET-SPECIAL .t <bang <ASCII !\)>> T <FUNCTION (t s a) <CHTYPE <bang <ASCII !\)>> closer>>>
+ <SET-SPECIAL .t <bang <ASCII !\]>> T <FUNCTION (t s a) <CHTYPE <bang <ASCII !\]>> closer>>>
+ <SET-SPECIAL .t <bang <ASCII !\}>> T <FUNCTION (t s a) <CHTYPE <bang <ASCII !\}>> closer>>>
+
+ <SET-SPECIAL .t <ASCII !\(> T <FUNCTION (t s a "AUX" pos)
+ <SET pos <GET-POS .s>>
+ <MAPF ,LIST <FUNCTION ("AUX" (x <parse .t .s>))
+ <COND (<check-closer .x <ASCII !\(> .s <ASCII !\)>> <MAPSTOP>)
+ (ELSE <MAPRET .x>)>>>>>
+
+ <SET-SPECIAL .t <ASCII !\<> T <FUNCTION (t s a "AUX" pos)
+ <SET pos <GET-POS .s>>
+ <MAPF ,FORM <FUNCTION ("AUX" (x <parse .t .s>))
+ <COND (<check-closer .pos .x <ASCII !\<> .s <ASCII !\>>> <MAPSTOP>)
+ (ELSE <MAPRET .x>)>>>>>
+
+ <SET-SPECIAL .t <ASCII !\[> T <FUNCTION (t s a "AUX" pos)
+ <SET pos <GET-POS .s>>
+ <MAPF ,VECTOR <FUNCTION ("AUX" (x <parse .t .s>))
+ <COND (<check-closer .pos .x <ASCII !\[> .s <ASCII !\]>> <MAPSTOP>)
+ (ELSE <MAPRET .x>)>>>>>
+
+ <SET-SPECIAL .t <bang <ASCII !\[>> T <FUNCTION (t s a "AUX" pos)
+ <SET pos <GET-POS .s>>
+ <MAPF ,UVECTOR <FUNCTION ("AUX" (x <parse .t .s>))
+ <COND (<check-closer .pos .x <bang <ASCII !\[>> .s <ASCII !\]>> <bang <ASCII !\]>> <MAPSTOP>)
+ (ELSE <MAPRET .x>)>>>>>
+
+ <SET-SPECIAL .t <bang <ASCII !\<>> T <FUNCTION (t s a "AUX" pos)
+ <SET pos <GET-POS .s>>
+ <CHTYPE <MAPF ,FORM <FUNCTION ("AUX" (x <parse .t .s>))
+ <COND (<check-closer .pos .x <bang <ASCII !\<>> .s <ASCII !\>>> <bang <ASCII !\>>> <MAPSTOP>)
+ (ELSE <MAPRET .x>)>>> SEGMENT>>>
+
+ ;"The prototemplate returned is an illegal object for anything except #-syntax."
+ <SET-SPECIAL .t <ASCII !\{> T <FUNCTION (t s a "AUX" pos)
+ <SET pos <GET-POS .s>>
+ <CHTYPE <MAPF ,VECTOR <FUNCTION ("AUX" (x <parse .t .s>))
+ <COND (<check-closer .pos .x <ASCII !\{> .s <ASCII !\}>> <MAPSTOP>)
+ (ELSE <MAPRET .x>)>>> prototemplate>>>
+
+ ;"Plain backslash just gets default handling in the lexer. It's only
+ special to the scalar sublexer."
+
+ <SET-DEFAULT .t ,parse-scalar>>>
+
+;"Lookup a string in an OBLIST / OBLIST list. If it isn't found,
+ insert it into the OBLIST / head of the OBLIST list."
+<DEFINE lookup-multi (a os)
+ <COND (<TYPE? .os OBLIST> <OR <LOOKUP .a .os> <INSERT .a .os>>)
+ (ELSE <OR <MAPF <> <FUNCTION (o "AUX" (atm <LOOKUP .a .o>))
+ <COND (.atm <MAPLEAVE .atm>)>> .os>
+ <INSERT .a <1 .os>>>)>>
+
+;"TODO: BLOCK..."
+<DEFINE parse-atom (t s a "AUX" (atoms ()) atm pn is-trailed "NAME" getpn)
+ ;"Copy up to any trailer into a temporary STRING."
+ <SET is-trailed <>>
+ <SET pn <MAPF ,STRING <FUNCTION ("AUX" (k <PEEK-CHAR .s>))
+ <COND (<BREAKER? .t .k> <MAPSTOP>)
+ (<==? .k %<bang <ASCII !\->>> <SET is-trailed T> <EAT-CHAR .s> <MAPSTOP>)
+ (<banged? .k> <EAT-CHAR .s> <MAPRET %<1 "!"> <CHTYPE <unbang .k> CHARACTER>>)
+ (ELSE <EAT-CHAR .s> <MAPRET <CHTYPE .k CHARACTER>>)>>>>
+ ;"Gather trailer-separated PNAMEs into a stack."
+ <COND (.is-trailed <SET atoms (.pn !.atoms)> <AGAIN .getpn>)>
+ ;"Now we have a LIST of STRINGs containing our trailed PNAMEs, with
+ the last still in .pn. The final PNAME is special: because it has
+ no trailer, we look it up in our oblist list."
+ <COND
+ (<EMPTY? .pn> <ROOT>
+ ;"Special case: we have an OBLIST, not an ATOM identifying an OBLIST."
+ <SET atm <OR <LOOKUP <1 .atoms> .atm> <INSERT <1 .atoms> .atm>>>
+ <SET atoms <REST .atoms>>)
+ (ELSE <SET atm <lookup-multi .pn .OBLIST>>)>
+ <COND (<EMPTY? .atoms> <RETURN .atm .getpn>)>
+ ;"Lookup each atom in the oblist denoted the previous (trailer),"
+ ;"except the last, which is our ATOM."
+ <MAPF <>
+ <FUNCTION (x "AUX" ob)
+ <SET ob <MOBLIST .atm>>
+ <SET atm <OR <LOOKUP .x .ob> <INSERT .x .ob>>>>
+ .atoms>>
+
+<DEFINE parse-octal (t s a "AUX" (z <COPY-CHARSTREAM .s>) c (n 0) (havedigits <>) "NAME" oct)
+ <SET c <NEXT-CHAR .z>>
+ <COND
+ ;"Ended without a trailing *?"
+ (<BREAKER? .t .c> <RETURN-CHAR .s> <parse-atom .t .s .a>)
+ ;"Made it to *?"
+ (<==? .c %<ASCII !\*>>
+ <COND (.havedigits <SET-CHARSTREAM .s .z> <RETURN <CHTYPE .n FIX> .a>)
+ (ELSE <RETURN-CHAR .s> <parse-atom .t .s .a>)>)
+ ;"Got another octit?"
+ (<AND <L=? %<ASCII !\0> .c> <L=? .c %<ASCII !\9>>>
+ <SET n <ORB <- .c %<ASCII !\0>> <LSH .n 3>>>
+ <SET havedigits T>
+ <AGAIN .oct>)
+ ;"Not octal after all."
+ (ELSE <RETURN-CHAR .s> <parse-atom .t .s .a>)>>
+
+;"If current radix isn't 10, pre-scan to see whether there's a decimal point."
+<DEFINE num-rad (s)
+ <COND
+ (<==? ,radix 10> 10)
+ (ELSE
+ <REPEAT ((z <COPY-CHARSTREAM .s>) "NAME" rdx)
+ <SET c <NEXT-CHAR .z>>
+ <COND (<BREAKER? .t .c> <RETURN ,radix .rdx>)
+ ;"Docs aren't clear on exponent-notation fixes without decimal points."
+ ;"Decimal makes more sense..."
+ (<==? %<ASCII !\e> .c> <RETURN 10 .rdx>)
+ (<==? %<ASCII !\E> .c> <RETURN 10 .rdx>)
+ (<==? %<ASCII !\.> .c> <RETURN 10 .rdx>)>>)>>
+
+<DEFINE parse-decimal (t c s a "AUX" (havedigits <>) (n 0) (sgn 1) (z <COPY-CHARSTREAM .s>) (rad <num-rad .s>))
+ <COND (<==? .c %<ASCII !\->> <SET sgn -1>)
+ (ELSE <RETURN-CHAR .z> <SET havedigits T>)>
+ <REPEAT ("NAME" decimal)
+ <SET c <NEXT-CHAR .z>>
+ <COND
+ ;"Done?"
+ (<BREAKER? .t .c>
+ <COND (.havedigits <RETURN-CHAR .z> <SET-CHARSTREAM .s .z> <RETURN <* .n .sgn> .a>)
+ (ELSE <RETURN-CHAR .s> <RETURN <parse-atom .t .s .a> .a>)>)
+ ;"Got a digit?"
+ (<AND <L=? %<ASCII !\0> .c> <L? .c <+ %<ASCII !\0> .rad>>>
+ <SET n <+ <* .n .rad> <- .c %<ASCII !\0>>>>
+ <SET havedigits T>)
+ ;".?"
+ ;"[eE]?"
+ ;"Not a decimal."
+ (ELSE <RETURN-CHAR .s> <RETURN <parse-atom .t .s .a> .a>)>>>
+
+;"The Scalar Sublexer."
+;"Parse an object that begins with a non-special character: FIX/FLOAT/ATOM."
+<DEFINE parse-scalar (t c s a "AUX" rad)
+ <COND
+ (<OR <==? .c %<ASCII !\->> <AND <L=? %<ASCII !\0> .c> <L=? .c %<ASCII !\9>>>>
+ <parse-decimal .t .c .s .a>)
+ (ELSE <RETURN-CHAR .s> <parse-atom .t .s .a>)>>
+
+;"Fallback to use if PARSE-TABLE!- is empty and no argument is provided."
+<SETG default <make-default>>
+<DEFINE DEFAULT-TABLE () ,default>
+
+%<AND <ENDBLOCK> <>>
+
+;"Like MAPF ,AND with early termination."
+<DEFINE ALL? (fn "TUPLE" argses "NAME" all)
+ <MAPF <> <FUNCTION ("TUPLE" xs) <COND (<NOT <APPLY .fn !.xs>> <RETURN <> .all>)>> !.argses>
+ T>
+
+;"This belongs in a PRINT module. Here for testing."
+<DEFINE PPRINT (x "OPTIONAL" (lvl 0))
+ <COND
+ (<TYPE? .x LIST> <PRINC !\(> <PPRINT-SEQ .x <+ .lvl 1>> <PRINC !\)>)
+ (<AND <TYPE? .x FORM> <NOT <OR <EMPTY? .x> <==? <1 .x> LVAL> <==? <1 .x> GVAL>>>>
+ <PRINC !\<> <PPRINT-SEQ .x <+ .lvl 1>> <PRINC !\>>)
+ (ELSE <PRIN1 .x>)>>
+<DEFINE PPRINT-SEQ (x lvl)
+ <COND (<EMPTY? .x>)
+ ;"TODO: more aggressive single-lining"
+ (<ALL? <FUNCTION (y) <OR <NOT <TYPE? .y FORM LIST>>
+ <EMPTY? .y>
+ <==? <1 .y> LVAL>
+ <==? <1 .y> GVAL>>> .x>
+ <PRIN1 <1 .x>>
+ <MAPF <> <FUNCTION (y) <PRINC !\ > <PRIN1 .y>> <REST .x>>)
+ (ELSE
+ <PPRINT <1 .x> .lvl>
+ <MAPF <> <FUNCTION (y) <CRLF> <INDENT .lvl> <PPRINT .y .lvl>> <REST .x>>)>>
+<DEFINE INDENT (lvl) <PRINC <ISTRING .lvl !\ >>>
+
+<PROG ()
+ <SET sss <ISTRING 40000>>
+ <SET sss <SUBSTRUC .sss 0 <READSTRING .sss <OPEN "READB" "./chartable.mud">>>>
+ <PPRINT <read-all!-PARSER .sss>>
+ >
+
+<QUIT>