3 ;;; Copyright (C) 2015-2017 Matthew R. Wette
5 ;;; This program is free software: you can redistribute it and/or modify
6 ;;; it under the terms of the GNU General Public License as published by
7 ;;; the Free Software Foundation, either version 3 of the License, or
8 ;;; (at your option) any later version.
10 ;;; This program is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ;;; GNU General Public License for more details.
15 ;;; You should have received a copy of the GNU General Public License
16 ;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
18 ;; Notes on the code design may be found in doc/nyacc/lang/c99-hg.info
20 ;; @section The C99 Parser Body
21 ;; This code provides the front end to the C99 parser, including the lexical
22 ;; analyzer and optional CPP processing. In @code{'file} mode the lex'er
23 ;; passes CPP statements to the parser; in @code{'code} mode the lex'er
24 ;; parses and evaluates the CPP statements. In the case of included files
25 ;; (e.g., via @code{#include <file.h>}) the include files are parsed if
26 ;; not in @code{inc-help}. The a-list @code{inc-help} maps
27 ;; include file names to typenames (e.g., @code{stdio.h} to @code{FILE}) and
28 ;; CPP defines (e.g., "INT_MAX=12344").
30 (use-modules ((srfi srfi-9) #:select (define-record-type)))
31 (use-modules ((sxml xpath) #:select (sxpath)))
32 (use-modules (ice-9 regex))
34 (define-record-type cpi
37 (debug cpi-debug set-cpi-debug!) ; debug #t #f
38 (defines cpi-defs set-cpi-defs!) ; #defines
39 (incdirs cpi-incs set-cpi-incs!) ; #includes
40 (inc-tynd cpi-itynd set-cpi-itynd!) ; a-l of incfile => typenames
41 (inc-defd cpi-idefd set-cpi-idefd!) ; a-l of incfile => defines
42 (ptl cpi-ptl set-cpi-ptl!) ; parent typename list
43 (ctl cpi-ctl set-cpi-ctl!) ; current typename list
46 ;;.@deffn Procedure split-cppdef defstr => (<name> . <repl>)| \
47 ;; ((<name> <args> . <repl>)|#f
48 ;; Convert define string to a dict item. Examples:
50 ;; "ABC=123" => '("ABC" . "123")
51 ;; "MAX(X,Y)=((X)>(Y)?(X):(Y))" => ("MAX" ("X" "Y") . "((X)>(Y)?(X):(Y))")
57 (let ((rx1 (make-regexp "^([A-Za-z0-9_]+)\\([^)]*\\)=(.*)$"))
58 (rx2 (make-regexp "^([A-Za-z0-9_]+)=(.*)$")))
60 (let* ((m1 (regexp-exec rx1 defstr))
61 (m2 (or m1 (regexp-exec rx2 defstr))))
63 ((regexp-exec rx1 defstr) =>
65 (let* ((s1 (match:substring m1 1))
66 (s2 (match:substring m1 2))
67 (s3 (match:substring m1 3)))
68 (cons s1 (cons s2 s3)))))
69 ((regexp-exec rx2 defstr) =>
71 (let* ((s1 (match:substring m2 1))
72 (s2 (match:substring m2 2)))
76 (define (split-cppdef s)
77 (apply cons (string-split s #\=)))))
79 ;; @deffn Procedure make-cpi debug defines incdirs inchelp
81 (define (make-cpi debug defines incdirs inchelp)
82 ;; convert inchelp into inc-file->typenames and inc-file->defines
83 ;; Any entry for an include file which contains `=' is considered
84 ;; a define; otherwise, the entry is a typename.
86 (define (split-helper helper)
87 (let ((file (car helper)))
88 (let iter ((tyns '()) (defs '()) (ents (cdr helper)))
90 ((null? ents) (values (cons file tyns) (cons file defs)))
91 ((split-cppdef (car ents)) =>
92 (lambda (def) (iter tyns (cons def defs) (cdr ents))))
93 (else (iter (cons (car ents) tyns) defs (cdr ents)))))))
95 (let* ((cpi (make-cpi-1)))
96 (set-cpi-debug! cpi debug) ; print states debug
97 (set-cpi-defs! cpi (map split-cppdef defines)) ; list of define strings
98 (set-cpi-incs! cpi incdirs) ; list of include dir's
99 (set-cpi-ptl! cpi '()) ; list of lists of typenames
100 (set-cpi-ctl! cpi '()) ; list of typenames
102 (let iter ((itynd '()) (idefd '()) (helpers inchelp))
103 (cond ((null? helpers)
104 (set-cpi-itynd! cpi itynd)
105 (set-cpi-idefd! cpi idefd))
108 (lambda () (split-helper (car helpers)))
109 (lambda (ityns idefs)
110 (iter (cons ityns itynd) (cons idefs idefd) (cdr helpers)))))))
113 (define *info* (make-fluid #f))
115 ;; @deffn {Procedure} typename? name
116 ;; Called by lexer to determine if symbol is a typename.
117 ;; Check current sibling for each generation.
119 (define (typename? name)
120 (let ((cpi (fluid-ref *info*)))
121 (if (member name (cpi-ctl cpi)) #t
122 (let iter ((ptl (cpi-ptl cpi)))
124 (if (member name (car ptl)) #t
125 (iter (cdr ptl))))))))
127 ;; @deffn {Procedure} add-typename name
128 ;; Helper for @code{save-typenames}.
130 (define (add-typename name)
131 (let ((cpi (fluid-ref *info*)))
132 (set-cpi-ctl! cpi (cons name (cpi-ctl cpi)))))
134 (define (cpi-push) ;; on #if
135 (let ((cpi (fluid-ref *info*)))
136 (set-cpi-ptl! cpi (cons (cpi-ctl cpi) (cpi-ptl cpi)))
137 (set-cpi-ctl! cpi '())))
139 (define (cpi-shift) ;; on #elif #else
140 (set-cpi-ctl! (fluid-ref *info*) '()))
142 (define (cpi-pop) ;; on #endif
143 (let ((cpi (fluid-ref *info*)))
144 (set-cpi-ctl! cpi (append (cpi-ctl cpi) (car (cpi-ptl cpi))))
145 (set-cpi-ptl! cpi (cdr (cpi-ptl cpi)))))
147 ;; @deffn {Procedure} find-new-typenames decl
148 ;; Helper for @code{save-typenames}.
149 ;; Given declaration return a list of new typenames (via @code{typedef}).
151 (define (find-new-typenames decl)
153 ;; like declr->ident in util2.scm
154 (define (declr->id-name declr)
156 ((ident) (sx-ref declr 1))
157 ((init-declr) (declr->id-name (sx-ref declr 1)))
158 ((comp-declr) (declr->id-name (sx-ref declr 1)))
159 ((array-of) (declr->id-name (sx-ref declr 1)))
160 ((ptr-declr) (declr->id-name (sx-ref declr 2)))
161 ((ftn-declr) (declr->id-name (sx-ref declr 1)))
162 ((scope) (declr->id-name (sx-ref declr 1)))
163 (else (error "coding bug: " declr))))
165 (let* ((spec (sx-ref decl 1))
166 (stor (sx-find 'stor-spec spec))
167 (id-l (sx-ref decl 2)))
168 (if (and stor (eqv? 'typedef (caadr stor)))
169 (let iter ((res '()) (idl (cdr id-l)))
171 (iter (cons (declr->id-name (sx-ref (car idl) 1)) res)
175 ;; @deffn {Procedure} save-typenames decl
176 ;; Save the typenames for the lexical analyzer and return the decl.
178 (define (save-typenames decl)
179 ;; This finds typenames using @code{find-new-typenames} and adds via
180 ;; @code{add-typename}. Then return the decl.
181 (for-each add-typename (find-new-typenames decl))
184 ;; ------------------------------------------------------------------------
186 (define (p-err . args)
187 (apply throw 'c99-error args))
189 ;; @deffn {Procedure} read-cpp-line ch => #f | (cpp-xxxx)??
190 ;; Given if ch is #\# read a cpp-statement.
191 ;; The standard implies that comments are tossed here but we keep them
192 ;; so that they can end up in the pretty-print output.
194 (define (read-cpp-line ch)
195 (if (not (eq? ch #\#)) #f
196 (let iter ((cl '()) (ch (read-char)))
198 ((eof-object? ch) (throw 'cpp-error "CPP lines must end in newline"))
199 ((eq? ch #\newline) (unread-char ch) (list->string (reverse cl)))
201 (let ((c2 (read-char)))
202 (if (eq? c2 #\newline)
203 (iter cl (read-char))
204 (iter (cons* c2 ch cl) (read-char)))))
205 ((eq? ch #\/) ;; swallow comments, event w/ newlines
206 (let ((c2 (read-char)))
209 (let iter2 ((cl2 (cons* #\* #\/ cl)) (ch (read-char)))
212 (let ((c2 (read-char)))
214 (iter (cons* #\/ #\* cl2) (read-char)) ;; keep comment
215 (iter2 (cons #\* cl2) c2))))
217 (iter2 (cons ch cl2) (read-char))))))
219 (iter (cons #\/ cl) c2)))))
220 (else (iter (cons ch cl) (read-char)))))))
222 ;; @deffn {Procedure} find-file-in-dirl file dirl => path
224 (define (find-file-in-dirl file dirl)
225 (let iter ((dirl dirl))
227 (let ((p (string-append (car dirl) "/" file)))
228 (if (access? p R_OK) p (iter (cdr dirl)))))))
230 (define (def-xdef? name mode)
233 ;; @deffn {Procedure} gen-c-lexer [#:mode mode] [#:xdef? proc] => procedure
234 ;; Generate a context-sensitive lexer for the C99 language. The generated
235 ;; lexical analyzer reads and passes comments and optionally CPP statements
236 ;; to the parser. The keyword argument @var{mode} will determine if CPP
237 ;; statements are passed (@code{'file} mode) or parsed and executed
238 ;; (@code{'file} mode) as described above. Comments will be passed as
239 ;; ``line'' comments or ``lone'' comments: lone comments appear on a line
240 ;; without code. The @code{xdef?} keyword argument allows user to pass
241 ;; a predicate which determines whether CPP symbols in code are expanded.
242 ;; The default predicate is
244 ;; (define (def-xdef? mode name) (eqv? mode 'code))
248 ;; This gets ugly in order to handle cpp.
249 ;;.need to add support for num's w/ letters like @code{14L} and @code{1.3f}.
250 ;; todo: I think there is a bug wrt the comment reader because // ... \n
251 ;; will end up in same mode... so after
253 ;; the lexer will think we are not at BOL.
254 (let* ((match-table mtab)
255 (read-ident read-c-ident)
256 (read-comm read-c-comm)
258 (ident-like? (make-ident-like-p read-ident))
260 (strtab (filter-mt string? match-table)) ; strings in grammar
261 (kwstab (filter-mt ident-like? strtab)) ; keyword strings =>
262 (keytab (map-mt string->symbol kwstab)) ; keywords in grammar
263 (chrseq (remove-mt ident-like? strtab)) ; character sequences
264 (symtab (filter-mt symbol? match-table)) ; symbols in grammar
265 (chrtab (filter-mt char? match-table)) ; characters in grammar
267 (read-chseq (make-chseq-reader chrseq))
268 (assc-$ (lambda (pair) (cons (assq-ref symtab (car pair)) (cdr pair))))
270 (t-ident (assq-ref symtab '$ident))
271 (t-typename (assq-ref symtab 'typename))
272 (xp1 (sxpath '(cpp-stmt define)))
273 (xp2 (sxpath '(decl))))
275 ;; xdef?: (proc name mode) => #t|#f : do we expand #define?
276 (lambda* (#:key (mode 'code) (xdef? #f))
277 (let ((bol #t) ; begin-of-line condition
278 (ppxs (list 'keep)) ; CPP execution state stack
279 (info (fluid-ref *info*)) ; assume make and run in same thread
280 (x-def? (or xdef? def-xdef?)))
281 ;; Return the first (tval . lval) pair not excluded by the CPP.
284 (define (exec-cpp?) ; exec (vs pass to parser) CPP stmts?
287 (define (cpp-flow? keyw)
288 (memq keyw '(if elif else)))
290 (define (add-define tree)
291 (let* ((tail (cdr tree))
292 (name (car (assq-ref tail 'name)))
293 (args (assq-ref tail 'args))
294 (repl (car (assq-ref tail 'repl)))
295 (cell (cons name (if args (cons args repl) repl))))
296 (set-cpi-defs! info (cons cell (cpi-defs info)))))
298 (define (rem-define name)
299 (set-cpi-defs! info (delete name (cpi-defs info))))
301 (define (apply-helper file)
302 (let* ((tyns (assoc-ref (cpi-itynd info) file))
303 (defs (assoc-ref (cpi-idefd info) file)))
305 (for-each add-typename tyns)
306 (set-cpi-defs! info (append defs (cpi-defs info))))
309 ;; Evaluate expression text in #if of #elif statement.
310 (define (eval-cpp-cond-text text)
314 (let* ((defs (cpi-defs info))
315 (rhs (cpp-expand-text text defs))
316 (exp (parse-cpp-expr rhs)))
317 (eval-cpp-expr exp defs)))
318 (lambda (key fmt . args)
319 (report-error fmt args)
320 (throw 'c99-error "CPP error"))))
322 (define (inc-stmt->file stmt)
323 (let* ((arg (cadr stmt)) (len (string-length arg)))
324 (substring arg 1 (1- len))))
326 (define (inc-file->path file)
327 (find-file-in-dirl file (cpi-incs info)))
329 (define (eval-cpp-stmt-1/code stmt)
330 ;; eval control flow states: {skip-look, keep, skip-done, skip}
334 ((skip-look skip-done skip) ;; don't eval if excluded
335 (set! ppxs (cons 'skip ppxs)))
337 (let ((val (eval-cpp-cond-text (cadr stmt))))
338 (if (not val) (p-err "unresolved: ~S" (cadr stmt)))
339 (if (eq? 'keep (car ppxs))
341 (set! ppxs (cons 'skip-look ppxs))
342 (set! ppxs (cons 'keep ppxs)))
343 (set! ppxs (cons 'skip-done ppxs)))))))
346 ((skip) #t) ;; don't eval if excluded
348 (let ((val (eval-cpp-cond-text (cadr stmt))))
349 (if (not val) (p-err "unresolved: ~S" (cadr stmt)))
351 ((skip-look) (if (not (zero? val)) (set-car! ppxs 'keep)))
352 ((keep) (set-car! ppxs 'skip-done)))))))
355 ((skip-look) (set-car! ppxs 'keep))
356 ((keep) (set-car! ppxs 'skip-done))))
358 (set! ppxs (cdr ppxs)))
360 (if (eqv? 'keep (car ppxs))
361 (eval-cpp-stmt-2/code stmt)))))
363 (define (eval-cpp-stmt-2/code stmt)
364 ;; eval non-control flow
368 (let* ((file (inc-stmt->file stmt))
369 (path (inc-file->path file)))
371 ((apply-helper file)) ; use helper
372 ((not path) (p-err "not found: ~S" file)) ; file not found
373 (else (set! bol #t) (push-input (open-input-file path))))))
374 ((define) (add-define stmt))
375 ((undef) (rem-define (cadr stmt)))
376 ((error) (p-err "error: #error ~A" (cadr stmt)))
377 ((pragma) #t) ;; ignore for now
378 (else (error "bad cpp flow stmt"))))
380 (define (eval-cpp-stmt-1/file stmt)
383 ((elif else) (cpi-shift))
385 (else (eval-cpp-stmt-2/file stmt))))
387 (define (eval-cpp-stmt-2/file stmt)
388 ;; eval non-control flow
392 (let* ((file (inc-stmt->file stmt))
393 (path (inc-file->path file)))
395 ((apply-helper file)) ; use helper
396 ((not path) (p-err "not found: ~S" file)) ; file not found
397 ((with-input-from-file path run-parse) => ; include tree
398 (lambda (tree) (for-each add-define (xp1 tree))))
399 (else (p-err "included from ~S" path)))))
400 ((define) (add-define stmt))
401 ((undef) (rem-define (cadr stmt)))
403 ((pragma) #t) ;; need to work this
404 (else (error "bad cpp flow stmt"))))
406 (define (eval-cpp-stmt stmt)
411 ((code) (eval-cpp-stmt-1/code stmt))
412 ((file) (eval-cpp-stmt-1/file stmt))))
413 (lambda (key fmt . rest)
414 (report-error fmt rest)
415 (throw 'c99-error "CPP error"))))
417 ;; Composition of @code{read-cpp-line} and @code{eval-cpp-line}.
418 (define (read-cpp-stmt ch)
419 (and=> (read-cpp-line ch) cpp-line->stmt))
422 (let iter ((ch (read-char)))
425 (if (pop-input) (iter (read-char)) (assc-$ '($end . ""))))
426 ((eq? ch #\newline) (set! bol #t) (iter (read-char)))
427 ((char-set-contains? c:ws ch) (iter (read-char)))
430 (cond ;; things that depend on bol only
431 ((read-comm ch #t) => assc-$)
432 ((read-cpp-stmt ch) =>
436 ((code) (iter (read-char)))
437 ((file) (assc-$ `(cpp-stmt . ,stmt))))))
441 (let ((symb (string->symbol name)))
443 ((and (x-def? name mode)
444 (expand-cpp-macro-ref name (cpi-defs info)))
446 (push-input (open-input-string st))
448 ((assq-ref keytab symb)
449 => (lambda (t) (cons t name)))
451 (cons (assq-ref symtab 'typename) name))
453 (cons (assq-ref symtab '$ident) name))))))
454 ((read-c-num ch) => assc-$)
455 ((read-c-string ch) => assc-$)
456 ((read-c-chlit ch) => assc-$)
457 ((read-comm ch #f) => assc-$)
458 ((read-chseq ch) => identity)
459 ((assq-ref chrtab ch) => (lambda (t) (cons t (string ch))))
460 ((eqv? ch #\\) ;; C allows \ at end of line to continue
461 (let ((ch (read-char)))
462 (cond ((eqv? #\newline ch) (iter (read-char))) ;; extend line
463 (else (unread-char ch) (cons #\\ "\\"))))) ;; parse err
464 (else (cons ch (string ch))))))
466 ;; Loop between reading tokens and skipping tokens via CPP logic.
467 (let iter ((pair (read-token)))
469 ((keep) ;;(simple-format #t "lx=>~S\n" pair)
471 ((skip-done skip-look skip)
473 (else (error "coding error"))))