1 ;"Copyright (C) 2018 Keziah Wesley
3 You can redistribute and/or modify this file under the terms of the
4 GNU Affero General Public License as published by the Free Software
5 Foundation, either version 3 of the License, or (at your option) any
8 This file is distributed in the hope that it will be useful, but
9 WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Affero General Public License for more details.
13 You should have received a copy of the GNU Affero General Public
14 License along with this file. If not, see
15 <http://www.gnu.org/licenses/>."
19 ;"A CHARTABLE maps reader-chars (Unicode chars optionally prefixed by
20 a !) to properties. All special handling of any character is defined
21 through CHARTABLE properties."
23 ;"Properties are of two varieties:
24 - Lexical classes. Each character is a member of 0 or more classes.
25 Class membership alone determines tokenization.
26 - Parse functions. Every SPECIAL character has an associated
27 function that handles creating any resultant object, including
28 PARSEing any children and matching delimiters, in
29 recursive-descent fashion."
31 ;"Lexical class combinations used by the standard parser:
32 [.] Special non-breaking: (SP)
33 [,] Special breaking: (SP BR)
35 The behavior of a hypothetical non-special breaker is currently
36 unspecified: e.g. defining * as (BR) may cause *a* to parse as 2 or 3
39 ;"One category is particularly interesting: unspecial tokens, parsed
40 by DEFAULT-HANDLER. The parser for unspecials has to distinguish
41 between atoms and the various numeric encodings, but in all cases
42 its outward behavior is the same: it consumes a non-SP followed by
43 zero or more non-BR, and then pushes some kind of object onto the
44 result stack. It's important to note that this scalar-decoding
45 mechanism is independent of lexical classes, other than those
46 necessary to delimit the sequence--a character may have a different
47 meaning as part of an unspecial token."
49 ;"The current implementation is simple, and reasonably efficient under
50 typical circumstances (lots of low codepoints in input, not too many
51 characters with properties defined). Rules are 'compiled' from an
52 editable format into a query-optimized data structure, which is
53 transparent to the user except that you'll want to use a BULK-UPDATE
54 block if you have many edits to make and want to avoid spending time
55 on unneeded intermediate recompiles."
59 ;"Querying table contents"
62 DEFAULT-HANDLER!-CHARTABLE
64 ;"Creating / updating tables"
66 SET-SPECIAL!-CHARTABLE
67 SET-UNSPECIAL!-CHARTABLE
68 SET-DEFAULT!-CHARTABLE
69 BULK-UPDATE!-CHARTABLE
72 %<AND <BLOCK (<MOBLIST ct> <GET CHARTABLE OBLIST> <ROOT>)> <>>
74 ;"Lowest 21-bits: Unicode codepoint"
75 ;"Highest non-sign bit: flag indicating ! prefix"
78 <NEWTYPE CTABLE VECTOR>
80 ;"compiled: range table [start . post]"
83 ;"compiled: extended range table [start . post . index]"
85 ;"SRC: LIST ((rchar handler) !rest)"
86 ;"compiled: vector of functions, ordered by char"
88 ;"Compiled: points to read table data in editable data structures."
89 ;"Simpler to keep the source data than decompile the range tables."
91 ;"Keep track of BULK-UPDATEs in progress."
96 ;"--- edit operations (SRC tables) ---"
98 ;"TODO: factor out redundancy between all these list-removers. Macro?"
100 <DEFINE set-is-breaker (src char val)
101 ;"Remove from breakers whether setting or clearing. (Prevent dupes)."
102 <PUT .src ,is-breaker
103 <MAPF ,LIST <FUNCTION (x)
104 <COND (<==? .x .char> <MAPRET>) (ELSE <MAPRET .x>)>>
106 ;"(Re-)add to breakers if setting."
107 <COND (.val <PUT .src ,is-breaker (.char !<is-breaker .src>)>)>>
109 <DEFINE remove-special (src char)
110 <PUT .src ,is-special
111 <MAPF ,LIST <FUNCTION (x)
112 <COND (<==? .x .char> <MAPRET>) (ELSE <MAPRET .x>)>>
115 <DEFINE remove-handler (src char)
117 <MAPF ,LIST <FUNCTION (x)
118 <COND (<==? <1 .x> .char> <MAPRET>) (ELSE <MAPRET .x>)>>
121 <DEFINE SET-UNSPECIAL (tab char "OPTIONAL" (is-breaker #FALSE ()) "AUX" (src <rt-src .tab>))
122 <remove-special .src .char>
123 <remove-handler .src .char>
124 <set-is-breaker .src .char .is-breaker>
125 <maybe-recompile .tab>>
127 <DEFINE SET-SPECIAL (tab char is-breaker fn "AUX" (src <rt-src .tab>))
128 ;"remove any previous specialness properties and update is-breaker"
129 <SET-UNSPECIAL .tab .char .is-breaker>
130 <PUT .src ,is-special (.char !<is-special .src>)>
131 <PUT .src ,handlers ((.char .fn) !<handlers .src>)>
132 <maybe-recompile .tab>>
134 <DEFINE SET-DEFAULT (tab fn) <PUT .tab ,default .fn>>
135 <DEFINE DEFAULT-HANDLER (tab) <NTH .tab ,default>>
137 ;"--- query operations (compiled tables) ---"
139 ;"Simple linear search implementation."
141 ;"Look up whether the character is a breaker."
142 <DEFINE BREAKER? (t c "AUX" (breakers <is-breaker .t>) "NAME" act)
144 (<EMPTY? .breakers> <>)
145 (<L? .c <1 .breakers>> <>)
146 (<L? .c <2 .breakers>> T)
147 (ELSE <SET breakers <REST .breakers 2>> <AGAIN .act>)>>
149 ;"Look up whether the character is special. If it is, return its handler."
150 <DEFINE SPECIAL? (t c "AUX" (specials <is-special .t>) "NAME" act)
152 (<EMPTY? .specials> <>)
153 (<L? .c <1 .specials>> <>)
154 (<L? .c <2 .specials>> <NTH <handlers .t> <- .c <3 .specials>>>)
155 (ELSE <SET specials <REST .specials 3>> <AGAIN .act>)>>
159 ;"Create a new empty CHARTABLE."
160 <DEFINE CHARTABLE () <CHTYPE [![] ![] ![] (() () ()) 0 <> <>] CTABLE>>
162 <DEFINE end-update (tab)
163 <PUT .tab ,batch-depth <- <batch-depth .tab> 1>>
165 <maybe-recompile .tab>
168 ;"defer any recompiles to the end of the block"
169 <DEFINE BULK-UPDATE (tab "ARGS" stmts)
170 <PUT .tab ,batch-depth <+ <batch-depth .tab> 1>>
171 <UNWIND <MAPF <> ,EVAL .stmts> <end-update .tab>>
174 ;"Call after modifying the table. Will recompile now, or later if
175 there's a BULK-UPDATE in progress."
176 <DEFINE maybe-recompile (t)
177 <COND (<0? <batch-depth .t>> <recompile .t>)
178 (ELSE <PUT .t ,is-dirty T>)>>
180 <DEFINE sorted (xs) <SORT <> ![!.xs!]>>
182 <DEFINE count-spans (v "AUX" (pv -2) (n 0) "NAME" act)
183 <COND (<EMPTY? .v> <RETURN .n .act>)
184 (<L? 1 <- <1 .v> .pv>> <SET n <+ 1 .n>>)>
192 ;"Given a list of values, return a rangetable: [begin end...]"
194 <COND (<EMPTY? .xs> ![])
195 (ELSE <make-rt-imp std <sorted .xs>>)>>
197 ;"Given a list of values, return an extended rangetable: [begin end running-excluded...]"
198 <DEFINE make-ext-rt (xs)
199 <COND (<EMPTY? .xs> ![])
200 (ELSE <make-rt-imp ext <sorted .xs>>)>>
202 <DEFINE make-rt-imp (type v "AUX"
203 (rt <IUVECTOR <* ,.type <count-spans .v>> 0>)
209 <PUT .t 1 <SET pv <1 .v>>>
211 ;"Advance v until it points to a value beyond the span."
212 <REPEAT ("NAME" span)
213 <SET pv <+ 1 .pv>> ;"increment prev in advance"
214 <COND (<COND (<EMPTY? .v> <RETURN <> .span>)>)
215 (<==? <1 .v> .pv> <SET v <REST .v>>)
216 (ELSE <RETURN <> .span>)>>
217 ;"End the span. pv is the first value missing. <1 .v> will begin the next span."
219 <COND (<==? .type ext> <PUT .t 3 .gaps>)>
220 <SET t <REST .t ,.type>>
221 <COND (<EMPTY? .v> <RETURN .rt .act>)>
222 <SET gaps <+ .gaps <- <1 .v> .pv>>>
225 ;"XXX: Confusion's sort is crashy with predicates."
226 ;<DEFINE sort-handlers (xs) <MAPF ,UVECTOR 2 <SORT <FUNCTION (a b) <G? <1 .a> <1 .b>>> ![!.xs]>>>
227 ;"XXX: Confusion's sort ignores the second sequence."
228 ;<DEFINE sort-handlers (xs "AUX" (keys <MAPF ,UVECTOR 1 .xs>) (vals <MAPF ,UVECTOR 2 .xs>))
229 <SORT <> .keys 1 0 .vals>
232 ;"Given a sequence of (key value) pairs, return a vector of the values
233 sorted by their keys."
234 <DEFINE sort-handlers (xs "AUX"
236 (kv <MAPF ,VECTOR ,MAPRET <MAPF ,UVECTOR 1 .xs> <MAPF ,UVECTOR 2 .xs>>))
237 ;"Sort zipped inputs."
239 ;"Gather every 2nd value."
240 <MAPR ,UVECTOR <FUNCTION (xs)
241 <COND (<==? #WORD 0 <ANDB 1 <LENGTH .xs>>> <MAPRET>)
242 (ELSE <MAPRET <1 .xs>>)>> .kv>>
244 ;"Rebuild the table to incorporate any new modifications."
245 <DEFINE recompile (t "AUX" (src <rt-src .t>))
246 <PUT .t ,is-breaker <make-rt <is-breaker .src>>>
247 <PUT .t ,is-special <make-ext-rt <is-special .src>>>
248 <PUT .t ,handlers <sort-handlers <handlers .src>>>
249 <PUT .t ,is-dirty #FALSE ()>>
253 ;<DEFINE assert ('x "AUX" (ex <EVAL .x>)) <COND (<NOT .ex> <ERROR ASSERT-FAILED .x .ex>)>>
254 ;<DEFINE assert-eq ('x 'y "AUX" (ex <EVAL .x>) (ey <EVAL .y>))
255 <COND (<N=? .ex .ey> <ERROR ASSERT-FAILED (.x .y) (.ex .ey)>)>>
257 ;<PROG ((t <CHARTABLE!-CHARTABLE>))
259 <assert <NOT <SPECIAL?!-CHARTABLE .t *007*>>>
260 <assert <NOT <BREAKER?!-CHARTABLE .t *007*>>>
261 <SET-SPECIAL!-CHARTABLE .t *007* T <FUNCTION () 5>>
262 <SET-SPECIAL!-CHARTABLE .t *006* T <FUNCTION () 3>>
264 <assert <SPECIAL?!-CHARTABLE .t *007*>>
265 <assert <BREAKER?!-CHARTABLE .t *007*>>
266 <BULK-UPDATE!-CHARTABLE .t
267 <SET-UNSPECIAL!-CHARTABLE .t *007*>>
269 <assert <NOT <SPECIAL?!-CHARTABLE .t *007*>>>
270 <assert <NOT <BREAKER?!-CHARTABLE .t *007*>>>
273 ;"Confusion needs this."