emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[nongnu] elpa/parseclj 47cf208a91 091/185: Fix parsing of tags/discard w


From: ELPA Syncer
Subject: [nongnu] elpa/parseclj 47cf208a91 091/185: Fix parsing of tags/discard with :lexical-preservation
Date: Tue, 28 Dec 2021 14:05:21 -0500 (EST)

branch: elpa/parseclj
commit 47cf208a91b11499ccc58642c4728e8ce0be1f38
Author: Arne Brasseur <arne@arnebrasseur.net>
Commit: Arne Brasseur <arne@arnebrasseur.net>

    Fix parsing of tags/discard with :lexical-preservation
    
    When nodes on the stack can be "empty" (i.e. whitespace, comments), then 
extra
    care needs to be taken to prevent e.g. a #_ from discarding the whitespace 
node
    that follows it in "#_ 1 2 3".
    
    This introduces parseclj--take-token and parseclj--take-value, which should 
also
    come in handy to implement three-element reduction rule for metadata 
literals.
---
 parseclj.el           | 116 ++++++++++++++++++++++++++++++++++++++++++--------
 test/parseclj-test.el |  70 +++++++++++++++++++++++++++++-
 2 files changed, 168 insertions(+), 18 deletions(-)

diff --git a/parseclj.el b/parseclj.el
index 6119058514..7f0e662e44 100644
--- a/parseclj.el
+++ b/parseclj.el
@@ -130,25 +130,85 @@ can be handled with `condition-case'."
   "Reduce collection based on the top of the stack"
   (let ((opening-token-type (parseclj--find-opening-token stack closing-token))
         (fail-fast (a-get options :fail-fast t))
-        (coll nil))
+        (collection nil))
+
+    ;; unwind the stack until opening-token-type is found, adding to collection
     (while (and stack (not (eq (parseclj-lex-token-type (car stack)) 
opening-token-type)))
-      (push (pop stack) coll))
+      (push (pop stack) collection))
 
+    ;; did we find the right token?
     (if (eq (parseclj-lex-token-type (car stack)) opening-token-type)
-        (let ((node (pop stack)))
+        (progn
           (when fail-fast
-            (when-let ((token (seq-find #'parseclj-lex-token? coll)))
+            ;; any unreduced tokens left: bail early
+            (when-let ((token (seq-find #'parseclj-lex-token? collection)))
               (parseclj--error "parseclj: Syntax Error at position %s, 
unmatched %S"
                                (a-get token :pos)
                                (parseclj-lex-token-type token))))
-          (funcall reduce-branch stack node coll))
 
+          ;; all good, call the reducer so it can return an updated stack with 
a
+          ;; new node at the top.
+          (let ((opening-token (pop stack)))
+            (funcall reduce-branch stack opening-token collection)))
+
+      ;; Unwound the stack without finding a matching paren: either bail early
+      ;; or return the original stack and continue parsing
       (if fail-fast
           (parseclj--error "parseclj: Syntax Error at position %s, unmatched 
%S"
                            (a-get closing-token :pos)
                            (parseclj-lex-token-type closing-token))
-        ;; Unwound the stack without finding a matching paren: return the 
original stack and continue parsing
-        (reverse coll)))))
+
+        (reverse collection)))))
+
+(defun parseclj--take-value (stack value-p)
+  "Scan until a value is found.
+Return everything up to the value in reversed order (meaning the
+value comes first in the result).
+
+STACK is the current parse stack to scan.
+
+VALUE-P a predicate to distinguish reduced values from
+non-values (tokens and whitespace)."
+  (let ((result nil))
+    (cl-block nil
+      (while stack
+        (cond
+         ((parseclj-lex-token? (car stack))
+          (cl-return nil))
+
+         ((funcall value-p (car stack))
+          (cl-return (cons (car stack) result)))
+
+         (t
+          (push (pop stack) result)))))))
+
+(defun parseclj--take-token (stack value-p token-types)
+  "Scan until a token of a certain type is found.
+Returns nil if a value is encountered before a matching token is
+found. Return everything up to the token in reversed
+order (meaning the token comes first in the result).
+
+STACK is the current parse stack to scan.
+
+VALUE-P a predicate to distinguish reduced values from
+non-values (tokens and whitespace).
+
+TOKEN-TYPES are the token types to look for."
+  (let ((result nil))
+    (cl-block nil
+      (while stack
+        (cond
+         ((member (parseclj-lex-token-type (car stack)) token-types)
+          (cl-return (cons (car stack) result)))
+
+         ((funcall value-p (car stack))
+          (cl-return nil))
+
+         ((parseclj-lex-token? (car stack))
+          (cl-return nil))
+
+         (t
+          (push (pop stack) result)))))))
 
 (defun parseclj-parse (reduce-leaf reduce-branch &optional options)
   "Clojure/EDN stack-based shift-reduce parser.
@@ -172,12 +232,25 @@ parser relies on the presence or absence of these to 
detect parse
 errors.
 
 OPTIONS is an association list which is passed on to the reducing
-functions.
-"
+functions. Additionally the following options are recognized
+
+- :fail-fast
+  Raise an error when a parse error is encountered, rather than
+  continuing with a partial result.
+- :value-p
+  A predicate function to differentiate values from tokens and
+  whitespace. This is needed when scanning the stack to see if
+  any reductions can be performed. By default anything that isn't
+  a token is considered a value. This can be problematic when
+  parsing with `:lexical-preservation', and which case you should
+  provide an implementation that also returns falsy for
+  :whitespace, :comment, and :discard AST nodes. "
   (let ((fail-fast (a-get options :fail-fast t))
-        (stack nil))
+        (value-p (a-get options :value-p (lambda (e) (not (parseclj-lex-token? 
e)))))
+        (stack nil)
+        (token (parseclj-lex-next)))
 
-    (while (not (eq (parseclj-lex-token-type (setq token (parseclj-lex-next))) 
:eof))
+    (while (not (eq (parseclj-lex-token-type token) :eof))
       ;; (message "STACK: %S" stack)
       ;; (message "TOKEN: %S\n" token)
 
@@ -192,11 +265,17 @@ functions.
        (t (push token stack)))
 
       ;; Reduce based on top two items on the stack (special prefixed elements)
-      (seq-let [top lookup] stack
-        (when (and (parseclj-lex-token? lookup)
-                   (not (parseclj-lex-token? top)) ;; top is fully reduced
-                   (member (parseclj-lex-token-type lookup) '(:discard :tag)))
-          (setf stack (funcall reduce-branch (cddr stack) lookup (list 
top))))))
+      (let* ((top-value (parseclj--take-value stack value-p))
+             (opening-token (parseclj--take-token (nthcdr (length top-value) 
stack) value-p '(:discard :tag)))
+             (new-stack (nthcdr (+ (length top-value) (length opening-token)) 
stack)))
+        (when (and top-value opening-token)
+          ;; (message "Reducing...")
+          ;; (message "  - STACK %S" stack)
+          ;; (message "  - OPENING_TOKEN %S" opening-token)
+          ;; (message "  - TOP_VALUE %S\n" top-value)
+          (setq stack (funcall reduce-branch new-stack (car opening-token) 
(append (cdr opening-token) top-value)))))
+
+      (setq token (parseclj-lex-next)))
 
     ;; reduce root
     (when fail-fast
@@ -229,7 +308,10 @@ key-value pairs to specify parsing options.
         (insert (car string-and-options))
         (goto-char 1)
         (apply 'parseclj-parse-clojure (cdr string-and-options)))
-    (let* ((options (apply 'a-list string-and-options))
+    (let* ((value-p (lambda (e)
+                      (and (parseclj-ast-node? e)
+                           (not (member (parseclj-ast-node-type e) 
'(:whitespace :comment :discard))))))
+           (options (apply 'a-list :value-p value-p string-and-options))
            (lexical? (a-get options :lexical-preservation)))
       (parseclj-parse (if lexical?
                           #'parseclj-ast--reduce-leaf-with-lexical-preservation
diff --git a/test/parseclj-test.el b/test/parseclj-test.el
index a2773157df..1809464141 100644
--- a/test/parseclj-test.el
+++ b/test/parseclj-test.el
@@ -194,7 +194,75 @@
                         ((:node-type . :true)
                          (:position . 12)
                          (:form . "true")
-                         (:value . t)))))))
+                         (:value . t))))))
+
+  (should (equal
+           (parseclj-parse-clojure "#_#_4 5" :lexical-preservation t)
+           '((:node-type . :root)
+             (:lexical-preservation . t)
+             (:position . 1)
+             (:children ((:node-type . :discard)
+                         (:position . 1)
+                         (:children ((:node-type . :discard)
+                                     (:position . 3)
+                                     (:children ((:node-type . :number) 
(:position . 5) (:form . "4") (:value . 4))))
+                                    ((:node-type . :whitespace) (:position . 
6) (:form . " "))
+                                    ((:node-type . :number) (:position . 7) 
(:form . "5") (:value . 5)))))))))
+
+(ert-deftest parseclj--take-token-test ()
+  (should (equal
+           (parseclj--take-token
+            (list (parseclj-ast-node :whitespace 10)
+                  (parseclj-ast-node :comment 20)
+                  (parseclj-lex-token :discard "#_" 30)
+                  (parseclj-ast-node :comment 20))
+            (lambda (e)
+              (and (parseclj-ast-node? e)
+                   (not (member (parseclj-ast-node-type e) '(:whitespace 
:comment :discard)))))
+            '(:discard))
+           '(((:token-type . :discard) (:form . "#_") (:pos . 30))
+             ((:node-type . :comment) (:position . 20))
+             ((:node-type . :whitespace) (:position . 10)))))
+
+  (should (equal
+           (parseclj--take-token
+            (list (parseclj-ast-node :whitespace 10)
+                  (parseclj-ast-node :number 20)
+                  (parseclj-lex-token :discard "#_" 30)
+                  (parseclj-ast-node :comment 20))
+            (lambda (e)
+              (and (parseclj-ast-node? e)
+                   (not (member (parseclj-ast-node-type e) '(:whitespace 
:comment :discard)))))
+            '(:discard))
+           nil)))
+
+(ert-deftest parseclj--take-value-test ()
+  (let ((stack '(((:node-type . :number) (:position . 3) (:form . "4") (:value 
. 4))
+                 ((:token-type . :discard) (:form . "#_") (:pos . 1))))
+        (value-p (lambda (e)
+                   (and (parseclj-ast-node? e)
+                        (not (member (parseclj-ast-node-type e) '(:whitespace 
:comment :discard)))))))
+    (should (equal (parseclj--take-value stack value-p)
+                   '(((:node-type . :number) (:position . 3) (:form . "4") 
(:value . 4)))))
+
+    (let* ((top-value (parseclj--take-value stack value-p))
+           (opening-token (parseclj--take-token (nthcdr (length top-value) 
stack) value-p '(:discard :tag)))
+           (new-stack (nthcdr (+ (length top-value) (length opening-token)) 
stack)))
+
+      (should (equal top-value '(((:node-type . :number) (:position . 3) 
(:form . "4") (:value . 4)))))
+      (should (equal opening-token '(((:token-type . :discard) (:form . "#_") 
(:pos . 1)))))
+      (should (equal new-stack nil))))
+
+  (let ((stack '(((:node-type . :whitespace) (:position . 3) (:form . " "))
+                 ((:token-type . :discard) (:form . "#_") (:pos . 1))))
+        (value-p (lambda (e)
+                   (and (parseclj-ast-node? e)
+                        (not (member (parseclj-ast-node-type e) '(:whitespace 
:comment :discard)))))))
+
+    (let* ((top-value (parseclj--take-value stack value-p))
+           (opening-token (parseclj--take-token (nthcdr (length top-value) 
stack) value-p '(:discard :tag)))
+           (new-stack (nthcdr (+ (length top-value) (length opening-token)) 
stack)))
+      top-value)))
 
 (provide 'parseclj-test)
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]