emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[nongnu] elpa/parseclj 93a0f43529 150/185: Merge pull request #21 from c


From: ELPA Syncer
Subject: [nongnu] elpa/parseclj 93a0f43529 150/185: Merge pull request #21 from clojure-emacs/add-more-syntax-features
Date: Tue, 28 Dec 2021 14:05:31 -0500 (EST)

branch: elpa/parseclj
commit 93a0f43529598984835f88e64b62fa68bebda89b
Merge: af6102c4a8 b40670a561
Author: Arne Brasseur <arne.brasseur@gmail.com>
Commit: GitHub <noreply@github.com>

    Merge pull request #21 from clojure-emacs/add-more-syntax-features
    
    Add enough feature to be able to parse clojure.core
---
 parseclj-ast.el           |   7 +++
 parseclj-lex.el           | 135 +++++++++++++++++++++++++++++++++++++++++-----
 parseclj-parser.el        |  32 +++++++++--
 test/parseclj-lex-test.el |  43 ++++++++++++---
 test/parseclj-test.el     |  79 +++++++++++++++++++++++++++
 5 files changed, 273 insertions(+), 23 deletions(-)

diff --git a/parseclj-ast.el b/parseclj-ast.el
index 5719659068..e7b3cebc71 100644
--- a/parseclj-ast.el
+++ b/parseclj-ast.el
@@ -133,6 +133,13 @@ on available options."
                                      :tag (intern (substring (a-get 
opening-token :form) 1))
                                      :children children)
                   stack))
+      (:metadata (cons (parseclj-ast-node :with-meta
+                                          pos
+                                          :children children)
+                       stack))
+      (:map-prefix (cons (a-assoc (car children)
+                                  :map-prefix opening-token)
+                         stack))
       (t (cons
           (parseclj-ast-node type pos :children children)
           stack)))))
diff --git a/parseclj-lex.el b/parseclj-lex.el
index 2f5f8f57b4..5f73d90d43 100644
--- a/parseclj-lex.el
+++ b/parseclj-lex.el
@@ -36,6 +36,7 @@
                                     :symbol
                                     :keyword
                                     :string
+                                    :regex
                                     :character)
   "Types of tokens that represent leaf nodes in the AST.")
 
@@ -44,6 +45,22 @@
                                        :rbrace)
   "Types of tokens that mark the end of a non-atomic form.")
 
+(defvar parseclj-lex--prefix-tokens '(:quote
+                                      :backquote
+                                      :unquote
+                                      :unquote-splice
+                                      :discard
+                                      :tag
+                                      :reader-conditional
+                                      :reader-conditional-splice
+                                      :var
+                                      :deref
+                                      :map-prefix)
+  "Tokens that modify the form that follows.")
+
+(defvar parseclj-lex--prefix-2-tokens '(:metadata)
+  "Tokens that modify the two forms that follow.")
+
 ;; Token interface
 
 (defun parseclj-lex-token (type form pos &rest attributes)
@@ -81,6 +98,11 @@ A token is an association list with :token-type as its first 
key."
   (and (consp token)
        (cdr (assq :token-type token))))
 
+(defun parseclj-lex-token-form (token)
+  "Get the form of TOKEN."
+  (and (consp token)
+       (cdr (assq :form token))))
+
 (defun parseclj-lex-leaf-token-p (token)
   "Return t if the given AST TOKEN is a leaf node."
   (member (parseclj-lex-token-type token) parseclj-lex--leaf-tokens))
@@ -89,6 +111,9 @@ A token is an association list with :token-type as its first 
key."
   "Return t if the given ast TOKEN is a closing token."
   (member (parseclj-lex-token-type token) parseclj-lex--closing-tokens))
 
+(defun parseclj-lex-error-p (token)
+  "Return t if the TOKEN represents a lexing error token."
+  (eq (parseclj-lex-token-type token) :lex-error))
 
 ;; Elisp values from tokens
 
@@ -177,18 +202,32 @@ S goes through three transformations:
               (<= (char-after (point)) ?9))
     (right-char)))
 
+(defun parseclj-lex-skip-hex ()
+  "Skip all consecutive hex digits after point."
+  (while (and (char-after (point))
+              (or (<= ?0 (char-after (point)) ?9)
+                  (<= ?a (char-after (point)) ?f)
+                  (<= ?A (char-after (point)) ?F)))
+    (right-char)))
+
 (defun parseclj-lex-skip-number ()
   "Skip a number at point."
   ;; [\+\-]?\d+\.\d+
-  (when (member (char-after (point)) '(?+ ?-))
-    (right-char))
+  (if (and (eq ?0 (char-after (point)))
+           (eq ?x (char-after (1+ (point)))))
+      (progn
+        (right-char 2)
+        (parseclj-lex-skip-hex))
+    (progn
+      (when (member (char-after (point)) '(?+ ?-))
+        (right-char))
 
-  (parseclj-lex-skip-digits)
+      (parseclj-lex-skip-digits)
 
-  (when (eq (char-after (point)) ?.)
-    (right-char))
+      (when (eq (char-after (point)) ?.)
+        (right-char))
 
-  (parseclj-lex-skip-digits))
+      (parseclj-lex-skip-digits))))
 
 (defun parseclj-lex-number ()
   "Consume a number and return a `:number' token representing it."
@@ -270,22 +309,39 @@ are returned as their own lex tokens."
        ((equal sym "false") (parseclj-lex-token :false "false" pos))
        (t (parseclj-lex-token :symbol sym pos))))))
 
-(defun parseclj-lex-string ()
-  "Return a lex token representing a string.
-If EOF is reached without finding a closing double quote, a :lex-error
-token is returned."
+(defun parseclj-lex-string* ()
+  "Helper for string/regex lexing.
+Returns either the string, or an error token"
   (let ((pos (point)))
     (right-char)
     (while (not (or (equal (char-after (point)) ?\") (parseclj-lex-at-eof-p)))
       (if (equal (char-after (point)) ?\\)
           (right-char 2)
         (right-char)))
-    (if (equal (char-after (point)) ?\")
-        (progn
-          (right-char)
-          (parseclj-lex-token :string (buffer-substring-no-properties pos 
(point)) pos))
+    (when (equal (char-after (point)) ?\")
+      (right-char)
+      (buffer-substring-no-properties pos (point)))))
+
+(defun parseclj-lex-string ()
+  "Return a lex token representing a string.
+If EOF is reached without finding a closing double quote, a :lex-error
+token is returned."
+  (let ((pos (point))
+        (str (parseclj-lex-string*)))
+    (if str
+        (parseclj-lex-token :string str pos)
       (parseclj-lex-error-token pos :invalid-string))))
 
+(defun parseclj-lex-regex ()
+  "Return a lex token representing a regular expression.
+If EOF is reached without finding a closing double quote, a :lex-error
+token is returned."
+  (let ((pos (1- (point)))
+        (str (parseclj-lex-string*)))
+    (if str
+        (parseclj-lex-token :regex (concat "#" str) pos)
+      (parseclj-lex-error-token pos :invalid-regex))))
+
 (defun parseclj-lex-lookahead (n)
   "Return a lookahead string of N characters after point."
   (buffer-substring-no-properties (point) (min (+ (point) n) (point-max))))
@@ -351,6 +407,16 @@ See `parseclj-lex-symbol', `parseclj-lex-symbol-start-p'."
       (right-char))
     (parseclj-lex-token :comment (buffer-substring-no-properties pos (point)) 
pos)))
 
+(defun parseclj-lex-map-prefix ()
+  "Return a lex token representing a map prefix."
+  (let ((pos (1- (point))))
+    (right-char)
+    (when (equal (char-after (point)) ?:)
+      (right-char))
+    (while (parseclj-lex-symbol-rest-p (char-after (point)))
+      (right-char))
+    (parseclj-lex-token :map-prefix (buffer-substring-no-properties pos 
(point)) pos)))
+
 (defun parseclj-lex-next ()
   "Consume characters at point and return the next lexical token.
 
@@ -387,6 +453,22 @@ See `parseclj-lex-token'."
         (right-char)
         (parseclj-lex-token :rbrace "}" pos))
 
+       ((equal char ?')
+        (right-char)
+        (parseclj-lex-token :quote "'" pos))
+
+       ((equal char ?`)
+        (right-char)
+        (parseclj-lex-token :backquote "`" pos))
+
+       ((equal char ?~)
+        (right-char)
+        (if (eq ?@ (char-after (point)))
+            (progn
+              (right-char)
+              (parseclj-lex-token :unquote-splice "~@" pos))
+          (parseclj-lex-token :unquote "~" pos)))
+
        ((parseclj-lex-at-number-p)
         (parseclj-lex-number))
 
@@ -405,6 +487,14 @@ See `parseclj-lex-token'."
        ((equal char ?\;)
         (parseclj-lex-comment))
 
+       ((equal char ?^)
+        (right-char)
+        (parseclj-lex-token :metadata "^" pos))
+
+       ((equal char ?@)
+        (right-char)
+        (parseclj-lex-token :deref "@" pos))
+
        ((equal char ?#)
         (right-char)
         (let ((char (char-after (point))))
@@ -415,6 +505,23 @@ See `parseclj-lex-token'."
            ((equal char ?_)
             (right-char)
             (parseclj-lex-token :discard "#_" pos))
+           ((equal char ?\()
+            (right-char)
+            (parseclj-lex-token :lambda "#(" pos))
+           ((equal char ?')
+            (right-char)
+            (parseclj-lex-token :var "#'" pos))
+           ((equal char ?\")
+            (parseclj-lex-regex))
+           ((equal char ?:)
+            (parseclj-lex-map-prefix))
+           ((equal char ?\?)
+            (right-char)
+            (if (eq ?@ (char-after (point)))
+                (progn
+                  (right-char)
+                  (parseclj-lex-token :reader-conditional-splice "#?@" pos))
+              (parseclj-lex-token :reader-conditional "#?" pos)))
            ((parseclj-lex-symbol-start-p char t)
             (right-char)
             (parseclj-lex-token :tag (concat "#" 
(parseclj-lex-get-symbol-at-point (1+ pos))) pos))
diff --git a/parseclj-parser.el b/parseclj-parser.el
index 34ed9d4694..ef04e16ef7 100644
--- a/parseclj-parser.el
+++ b/parseclj-parser.el
@@ -47,7 +47,11 @@ can be handled with `condition-case'."
 (defun parseclj--find-opening-token (stack closing-token)
   "Scan STACK for an opening-token matching CLOSING-TOKEN."
   (cl-case (parseclj-lex-token-type closing-token)
-    (:rparen :lparen)
+    (:rparen (parseclj-lex-token-type
+              (seq-find (lambda (token)
+                          (member (parseclj-lex-token-type token)
+                                  '(:lparen :lambda)))
+                        stack)))
     (:rbracket :lbracket)
     (:rbrace (parseclj-lex-token-type
               (seq-find (lambda (token)
@@ -192,6 +196,11 @@ functions. Additionally the following options are 
recognized
       ;; (message "STACK: %S" stack)
       ;; (message "TOKEN: %S\n" token)
 
+      (when (and fail-fast (parseclj-lex-error-p token))
+        (parseclj--error "Invalid token at %s: %S"
+                         (a-get token :pos)
+                         (parseclj-lex-token-form token)))
+
       ;; Reduce based on the top item on the stack (collections)
       (cond
        ((parseclj-lex-leaf-token-p token)
@@ -204,7 +213,7 @@ functions. Additionally the following options are recognized
 
       ;; Reduce based on top two items on the stack (special prefixed elements)
       (let* ((top-value (parseclj--take-value stack value-p))
-             (opening-token (parseclj--take-token (nthcdr (length top-value) 
stack) value-p '(:discard :tag)))
+             (opening-token (parseclj--take-token (nthcdr (length top-value) 
stack) value-p parseclj-lex--prefix-tokens))
              new-stack)
         (while (and top-value opening-token)
           ;; (message "Reducing...")
@@ -214,8 +223,25 @@ functions. Additionally the following options are 
recognized
           (setq new-stack (nthcdr (+ (length top-value) (length 
opening-token)) stack))
           (setq stack (funcall reduce-branch new-stack (car opening-token) 
(append (cdr opening-token) top-value) options))
 
+          ;; recur
           (setq top-value (parseclj--take-value stack value-p))
-          (setq opening-token (parseclj--take-token (nthcdr (length top-value) 
stack) value-p '(:discard :tag)))))
+          (setq opening-token (parseclj--take-token (nthcdr (length top-value) 
stack) value-p parseclj-lex--prefix-tokens))))
+
+      ;; Reduce based on top three items on the stack (metadata, namespaced 
maps)
+      (let* ((top-value-1 (parseclj--take-value stack value-p))
+             (top-value-2 (parseclj--take-value (nthcdr (length top-value-1) 
stack) value-p))
+             (opening-token (parseclj--take-token (nthcdr (+ (length 
top-value-1)
+                                                             (length 
top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))
+             new-stack)
+        (while (and top-value-1 top-value-2 opening-token)
+          (setq new-stack (nthcdr (apply #'+ (mapcar #'length (list 
top-value-1 top-value-2 opening-token))) stack))
+          (setq stack (funcall reduce-branch new-stack (car opening-token) 
(append (cdr opening-token) top-value-2 top-value-1) options))
+
+          ;; recur
+          (setq top-value-1 (parseclj--take-value stack value-p))
+          (setq top-value-2 (parseclj--take-value (nthcdr (length top-value-1) 
stack) value-p))
+          (setq opening-token (parseclj--take-token (nthcdr (+ (length 
top-value-1)
+                                                               (length 
top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))))
 
       (setq token (parseclj-lex-next)))
 
diff --git a/test/parseclj-lex-test.el b/test/parseclj-lex-test.el
index 183c6caa42..289fa72026 100644
--- a/test/parseclj-lex-test.el
+++ b/test/parseclj-lex-test.el
@@ -52,6 +52,25 @@
                                          (:form . "123e34M")
                                          (:pos . 1)))))
 
+  (with-temp-buffer
+    (insert "0xff00AA")
+    (goto-char 1)
+    (should (equal (parseclj-lex-next) '((:token-type . :number)
+                                         (:form . "0xff00AA")
+                                         (:pos . 1)))))
+
+  (with-temp-buffer
+    (insert "#?(:clj 1 :cljs 2)")
+    (goto-char 1)
+    (should (equal (parseclj-lex-next)
+                   '((:token-type . :reader-conditional) (:form . "#?") (:pos 
. 1)))))
+
+  (with-temp-buffer
+    (insert "#?@(:clj [1] :cljs [2])")
+    (goto-char 1)
+    (should (equal (parseclj-lex-next)
+                   '((:token-type . :reader-conditional-splice) (:form . 
"#?@") (:pos . 1)))))
+
   (with-temp-buffer
     (insert "123x")
     (goto-char 1)
@@ -203,12 +222,7 @@
     (should (equal (parseclj-lex-next) (parseclj-lex-token :number "13" 18)))
     (should (equal (parseclj-lex-next) (parseclj-lex-token :whitespace " " 
20)))
     (should (equal (parseclj-lex-next) (parseclj-lex-token :number "14" 21)))
-    (should (equal (parseclj-lex-next) (parseclj-lex-token :rparen ")" 23))))
-
-  (with-temp-buffer
-    (insert "~")
-    (goto-char 1)
-    (should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error "~" 
1)))))
+    (should (equal (parseclj-lex-next) (parseclj-lex-token :rparen ")" 23)))))
 
 (ert-deftest parseclj-lex-test-at-number-p ()
   (dolist (str '("123" ".9" "+1" "0" "-456"))
@@ -295,6 +309,12 @@
     (goto-char 1)
     (should (equal (parseclj-lex-string) (parseclj-lex-token :string 
"\"abc\\\"\"" 1)))))
 
+(ert-deftest parseclj-lex-test-regex ()
+  (with-temp-buffer
+    (insert "#\"abc\"")
+    (goto-char 1)
+    (should (equal (parseclj-lex-next) (parseclj-lex-token :regex "#\"abc\"" 
1)))))
+
 (ert-deftest parseclj-lex-test-tag ()
   (with-temp-buffer
     (insert "#inst")
@@ -306,6 +326,17 @@
     (goto-char 1)
     (should (equal (parseclj-lex-next) (parseclj-lex-token :tag "#foo/bar" 
1)))))
 
+(ert-deftest parseclj-lex-test-quote ()
+  (with-temp-buffer
+    (insert "'foo")
+    (goto-char 1)
+    (should (equal (parseclj-lex-next) (parseclj-lex-token :quote "'" 1))))
+
+  (with-temp-buffer
+    (insert "`foo")
+    (goto-char 1)
+    (should (equal (parseclj-lex-next) (parseclj-lex-token :backquote "`" 
1)))))
+
 (provide 'parseclj-lex-test)
 
 ;;; parseclj-lex-test.el ends here
diff --git a/test/parseclj-test.el b/test/parseclj-test.el
index 190922abb1..a25d2e4e3b 100644
--- a/test/parseclj-test.el
+++ b/test/parseclj-test.el
@@ -185,6 +185,85 @@
                                     ((:node-type . :whitespace) (:position . 
6) (:form . " "))
                                     ((:node-type . :number) (:position . 7) 
(:form . "5") (:value . 5)))))))))
 
+(ert-deftest parseclj--parse-reader-conditionals-test ()
+  (should (equal
+           (parseclj-parse-clojure "#?(:clj 1 :cljs 2)")
+           '((:node-type . :root)
+             (:position . 1)
+             (:children ((:node-type . :reader-conditional)
+                         (:position . 1)
+                         (:children ((:node-type . :list)
+                                     (:position . 3)
+                                     (:children ((:node-type . :keyword)
+                                                 (:position . 4)
+                                                 (:form . ":clj")
+                                                 (:value . :clj))
+                                                ((:node-type . :number)
+                                                 (:position . 9)
+                                                 (:form . "1")
+                                                 (:value . 1))
+                                                ((:node-type . :keyword)
+                                                 (:position . 11)
+                                                 (:form . ":cljs")
+                                                 (:value . :cljs))
+                                                ((:node-type . :number)
+                                                 (:position . 17)
+                                                 (:form . "2")
+                                                 (:value . 2)))))))))))
+
+(ert-deftest parseclj--parse-metadata-test ()
+  (should (equal
+           (parseclj-parse-clojure "^{} [123]")
+           '((:node-type . :root)
+             (:position . 1)
+             (:children ((:node-type . :with-meta)
+                         (:position . 1)
+                         (:children ((:node-type . :map)
+                                     (:position . 2)
+                                     (:children))
+                                    ((:node-type . :vector)
+                                     (:position . 5)
+                                     (:children ((:node-type . :number)
+                                                 (:position . 6)
+                                                 (:form . "123")
+                                                 (:value . 123)))))))))))
+
+(ert-deftest parseclj--parse-var-test ()
+  (should (equal
+           (parseclj-parse-clojure "#'foo")
+           '((:node-type . :root)
+             (:position . 1)
+             (:children ((:node-type . :var)
+                         (:position . 1)
+                         (:children ((:node-type . :symbol)
+                                     (:position . 3)
+                                     (:form . "foo")
+                                     (:value . foo)))))))))
+
+(ert-deftest parseclj--parse-lambda-test ()
+  (should (equal
+           (parseclj-parse-clojure "#(foo)")
+           '((:node-type . :root)
+             (:position . 1)
+             (:children ((:node-type . :lambda)
+                         (:position . 1)
+                         (:children ((:node-type . :symbol)
+                                     (:position . 3)
+                                     (:form . "foo")
+                                     (:value . foo)))))))))
+
+(ert-deftest parseclj--parse-namespaced-map-test ()
+  (should (equal
+           (parseclj-parse-clojure "#:foo.bar{}")
+           '((:node-type . :root)
+             (:position . 1)
+             (:children ((:map-prefix . ((:token-type . :map-prefix)
+                                         (:form . "#:foo.bar")
+                                         (:pos . 1)))
+                         (:node-type . :map)
+                         (:position . 10)
+                         (:children)))))))
+
 (ert-deftest parseclj--take-token-test ()
   (should (equal
            (parseclj--take-token



reply via email to

[Prev in Thread] Current Thread [Next in Thread]