(define ((in? l) x)
(match l
((h . t)
(or (eq? x h)
((in? t) x)))
(_
#f)))
(define (non-terminals grammar)
(delete-duplicates
(map (λ ((non-terminal => . production))
non-terminal)
grammar)))
(define (terminals grammar)
(let ((non-terminals (non-terminals grammar))
(productions (append-map (λ ((non-terminal => . production))
production)
grammar)))
(delete-duplicates (lset-difference eq? productions non-terminals))))
(define ((recursive-descent-parser grammar) input)
(let ((non-terminals (non-terminals grammar))
(terminals (terminals grammar))
(((initial-rule => . _) . _) grammar))
(define (initial-match? rule input)
(let* ((prefix _ (span (in? terminals) rule))
(prefix-length (length prefix)))
(and (>= (length input) prefix-length)
(equal? prefix (take input prefix-length)))))
(define (match-rule rule input)
(match rule
(()
(values
'()
input))
(((? (in? terminals) t) . next)
(let (((token . input) input))
(if (eq? token t)
(let ((rest input (match-rule next input)))
(values
`(,token . ,rest)
input))
(throw 'parse-error input))))
(((? (in? non-terminals) A) . next)
(let* ((parsed input (parse-rule A input))
(rest input (match-rule next input)))
(values
`(,parsed . ,rest)
input)))))
(define (parse-rule rule-name input)
(let try ((variants (filter (λ ((name => . rule))
(and (eq? name rule-name)
(initial-match? rule input)))
grammar)))
(catch 'parse-error
(λ ()
(match variants
(((A => . first-rule) . _)
(let ((parsed input (match-rule first-rule input)))
(values
`(,A . ,parsed)
input)))
(_
(throw 'parse-error input))))
(λ errors
(match variants
((failed . remaining)
(try remaining))
(_
(throw 'parse-error input)))))))
(parse-rule initial-rule input)))
;; for example:
((recursive-descent-parser '((A => a A b)
(A => c)))
'(a a a c b b b))
;; => (A a (A a (A a (A c) b) b) b)
((recursive-descent-parser '((<S> => a <S> d)
(<S> => <B>)
(<B> => b <B> c)
(<B> => e)))
'(a a b b e c c d d))
; => (<S> a (<S> a (<S> (<B> b (<B> b (<B> e) c) c)) d) d)
((recursive-descent-parser '((<S> => <A> <B>)
(<A> => a <A>)
(<A> => x)
(<B> => b <B>)
(<B> => x)))
'(a a a a x b b x))
; => (<S> (<A> a (<A> a (<A> a (<A> a (<A> x))))) (<B> b (<B> b (<B> x))))
;; note that the parser doesn''t always work;
;; it only works for the so-called LL grammars
;; for instance, the following grammar will result
;; in stack overflow:
((recursive-descent-parser '((<A> => <A> + <B>)
(<A> => <B>)
(<B> => a)
(<B> => c)))
'(a + c + a + c + a + a))
;; ~~~> <boom!>
;; However, the grammar can be transformed to an equivalent form:
((recursive-descent-parser '((<A> => <B> +<B>*)
(+<B>* => + <B> +<B>*)
(+<B>* => )
(<B> => a)
(<B> => c)))
'(a + c + a + c + a + a))
;; but then, some strange non-terminal symbols appear:
;=> (<A> (<B> a) (+<B>* + (<B> c)
; (+<B>* + (<B> a) (+<B>* + (<B> c)
; (+<B>* + (<B> a)
; (+<B>* + (<B> a) (+<B>*)))))))
; We need to introduce additional transformation on the ouput:
(define (eliminate+<B>* tree)
(match tree
(('<A> <B> +<B>*)
`(<A> ,<B> . ,(eliminate+<B>* +<B>*)))
(('+<B>* '+ <B> +<B>*)
`(+ ,<B> . ,(eliminate+<B>* +<B>*)))
(('+<B>*)
'())
(_
tree)))
(eliminate+<B>*
((recursive-descent-parser '((<A> => <B> +<B>*)
(+<B>* => + <B> +<B>*)
(+<B>* => )
(<B> => a)
(<B> => c)))
'(a + c + a + c + a + a)))
;=> (<A> (<B> a) + (<B> c) + (<B> a) + (<B> c) + (<B> a) + (<B> a))
Happy birthday, Guile!