[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[nongnu] elpa/parseclj 93a0f43529 150/185: Merge pull request #21 from c
From: |
ELPA Syncer |
Subject: |
[nongnu] elpa/parseclj 93a0f43529 150/185: Merge pull request #21 from clojure-emacs/add-more-syntax-features |
Date: |
Tue, 28 Dec 2021 14:05:31 -0500 (EST) |
branch: elpa/parseclj
commit 93a0f43529598984835f88e64b62fa68bebda89b
Merge: af6102c4a8 b40670a561
Author: Arne Brasseur <arne.brasseur@gmail.com>
Commit: GitHub <noreply@github.com>
Merge pull request #21 from clojure-emacs/add-more-syntax-features
Add enough feature to be able to parse clojure.core
---
parseclj-ast.el | 7 +++
parseclj-lex.el | 135 +++++++++++++++++++++++++++++++++++++++++-----
parseclj-parser.el | 32 +++++++++--
test/parseclj-lex-test.el | 43 ++++++++++++---
test/parseclj-test.el | 79 +++++++++++++++++++++++++++
5 files changed, 273 insertions(+), 23 deletions(-)
diff --git a/parseclj-ast.el b/parseclj-ast.el
index 5719659068..e7b3cebc71 100644
--- a/parseclj-ast.el
+++ b/parseclj-ast.el
@@ -133,6 +133,13 @@ on available options."
:tag (intern (substring (a-get
opening-token :form) 1))
:children children)
stack))
+ (:metadata (cons (parseclj-ast-node :with-meta
+ pos
+ :children children)
+ stack))
+ (:map-prefix (cons (a-assoc (car children)
+ :map-prefix opening-token)
+ stack))
(t (cons
(parseclj-ast-node type pos :children children)
stack)))))
diff --git a/parseclj-lex.el b/parseclj-lex.el
index 2f5f8f57b4..5f73d90d43 100644
--- a/parseclj-lex.el
+++ b/parseclj-lex.el
@@ -36,6 +36,7 @@
:symbol
:keyword
:string
+ :regex
:character)
"Types of tokens that represent leaf nodes in the AST.")
@@ -44,6 +45,22 @@
:rbrace)
"Types of tokens that mark the end of a non-atomic form.")
+(defvar parseclj-lex--prefix-tokens '(:quote
+ :backquote
+ :unquote
+ :unquote-splice
+ :discard
+ :tag
+ :reader-conditional
+ :reader-conditional-splice
+ :var
+ :deref
+ :map-prefix)
+ "Tokens that modify the form that follows.")
+
+(defvar parseclj-lex--prefix-2-tokens '(:metadata)
+ "Tokens that modify the two forms that follow.")
+
;; Token interface
(defun parseclj-lex-token (type form pos &rest attributes)
@@ -81,6 +98,11 @@ A token is an association list with :token-type as its first
key."
(and (consp token)
(cdr (assq :token-type token))))
+(defun parseclj-lex-token-form (token)
+ "Get the form of TOKEN."
+ (and (consp token)
+ (cdr (assq :form token))))
+
(defun parseclj-lex-leaf-token-p (token)
"Return t if the given AST TOKEN is a leaf node."
(member (parseclj-lex-token-type token) parseclj-lex--leaf-tokens))
@@ -89,6 +111,9 @@ A token is an association list with :token-type as its first
key."
"Return t if the given ast TOKEN is a closing token."
(member (parseclj-lex-token-type token) parseclj-lex--closing-tokens))
+(defun parseclj-lex-error-p (token)
+ "Return t if the TOKEN represents a lexing error token."
+ (eq (parseclj-lex-token-type token) :lex-error))
;; Elisp values from tokens
@@ -177,18 +202,32 @@ S goes through three transformations:
(<= (char-after (point)) ?9))
(right-char)))
+(defun parseclj-lex-skip-hex ()
+ "Skip all consecutive hex digits after point."
+ (while (and (char-after (point))
+ (or (<= ?0 (char-after (point)) ?9)
+ (<= ?a (char-after (point)) ?f)
+ (<= ?A (char-after (point)) ?F)))
+ (right-char)))
+
(defun parseclj-lex-skip-number ()
"Skip a number at point."
;; [\+\-]?\d+\.\d+
- (when (member (char-after (point)) '(?+ ?-))
- (right-char))
+ (if (and (eq ?0 (char-after (point)))
+ (eq ?x (char-after (1+ (point)))))
+ (progn
+ (right-char 2)
+ (parseclj-lex-skip-hex))
+ (progn
+ (when (member (char-after (point)) '(?+ ?-))
+ (right-char))
- (parseclj-lex-skip-digits)
+ (parseclj-lex-skip-digits)
- (when (eq (char-after (point)) ?.)
- (right-char))
+ (when (eq (char-after (point)) ?.)
+ (right-char))
- (parseclj-lex-skip-digits))
+ (parseclj-lex-skip-digits))))
(defun parseclj-lex-number ()
"Consume a number and return a `:number' token representing it."
@@ -270,22 +309,39 @@ are returned as their own lex tokens."
((equal sym "false") (parseclj-lex-token :false "false" pos))
(t (parseclj-lex-token :symbol sym pos))))))
-(defun parseclj-lex-string ()
- "Return a lex token representing a string.
-If EOF is reached without finding a closing double quote, a :lex-error
-token is returned."
+(defun parseclj-lex-string* ()
+ "Helper for string/regex lexing.
+Returns either the string, or an error token"
(let ((pos (point)))
(right-char)
(while (not (or (equal (char-after (point)) ?\") (parseclj-lex-at-eof-p)))
(if (equal (char-after (point)) ?\\)
(right-char 2)
(right-char)))
- (if (equal (char-after (point)) ?\")
- (progn
- (right-char)
- (parseclj-lex-token :string (buffer-substring-no-properties pos
(point)) pos))
+ (when (equal (char-after (point)) ?\")
+ (right-char)
+ (buffer-substring-no-properties pos (point)))))
+
+(defun parseclj-lex-string ()
+ "Return a lex token representing a string.
+If EOF is reached without finding a closing double quote, a :lex-error
+token is returned."
+ (let ((pos (point))
+ (str (parseclj-lex-string*)))
+ (if str
+ (parseclj-lex-token :string str pos)
(parseclj-lex-error-token pos :invalid-string))))
+(defun parseclj-lex-regex ()
+ "Return a lex token representing a regular expression.
+If EOF is reached without finding a closing double quote, a :lex-error
+token is returned."
+ (let ((pos (1- (point)))
+ (str (parseclj-lex-string*)))
+ (if str
+ (parseclj-lex-token :regex (concat "#" str) pos)
+ (parseclj-lex-error-token pos :invalid-regex))))
+
(defun parseclj-lex-lookahead (n)
"Return a lookahead string of N characters after point."
(buffer-substring-no-properties (point) (min (+ (point) n) (point-max))))
@@ -351,6 +407,16 @@ See `parseclj-lex-symbol', `parseclj-lex-symbol-start-p'."
(right-char))
(parseclj-lex-token :comment (buffer-substring-no-properties pos (point))
pos)))
+(defun parseclj-lex-map-prefix ()
+ "Return a lex token representing a map prefix."
+ (let ((pos (1- (point))))
+ (right-char)
+ (when (equal (char-after (point)) ?:)
+ (right-char))
+ (while (parseclj-lex-symbol-rest-p (char-after (point)))
+ (right-char))
+ (parseclj-lex-token :map-prefix (buffer-substring-no-properties pos
(point)) pos)))
+
(defun parseclj-lex-next ()
"Consume characters at point and return the next lexical token.
@@ -387,6 +453,22 @@ See `parseclj-lex-token'."
(right-char)
(parseclj-lex-token :rbrace "}" pos))
+ ((equal char ?')
+ (right-char)
+ (parseclj-lex-token :quote "'" pos))
+
+ ((equal char ?`)
+ (right-char)
+ (parseclj-lex-token :backquote "`" pos))
+
+ ((equal char ?~)
+ (right-char)
+ (if (eq ?@ (char-after (point)))
+ (progn
+ (right-char)
+ (parseclj-lex-token :unquote-splice "~@" pos))
+ (parseclj-lex-token :unquote "~" pos)))
+
((parseclj-lex-at-number-p)
(parseclj-lex-number))
@@ -405,6 +487,14 @@ See `parseclj-lex-token'."
((equal char ?\;)
(parseclj-lex-comment))
+ ((equal char ?^)
+ (right-char)
+ (parseclj-lex-token :metadata "^" pos))
+
+ ((equal char ?@)
+ (right-char)
+ (parseclj-lex-token :deref "@" pos))
+
((equal char ?#)
(right-char)
(let ((char (char-after (point))))
@@ -415,6 +505,23 @@ See `parseclj-lex-token'."
((equal char ?_)
(right-char)
(parseclj-lex-token :discard "#_" pos))
+ ((equal char ?\()
+ (right-char)
+ (parseclj-lex-token :lambda "#(" pos))
+ ((equal char ?')
+ (right-char)
+ (parseclj-lex-token :var "#'" pos))
+ ((equal char ?\")
+ (parseclj-lex-regex))
+ ((equal char ?:)
+ (parseclj-lex-map-prefix))
+ ((equal char ?\?)
+ (right-char)
+ (if (eq ?@ (char-after (point)))
+ (progn
+ (right-char)
+ (parseclj-lex-token :reader-conditional-splice "#?@" pos))
+ (parseclj-lex-token :reader-conditional "#?" pos)))
((parseclj-lex-symbol-start-p char t)
(right-char)
(parseclj-lex-token :tag (concat "#"
(parseclj-lex-get-symbol-at-point (1+ pos))) pos))
diff --git a/parseclj-parser.el b/parseclj-parser.el
index 34ed9d4694..ef04e16ef7 100644
--- a/parseclj-parser.el
+++ b/parseclj-parser.el
@@ -47,7 +47,11 @@ can be handled with `condition-case'."
(defun parseclj--find-opening-token (stack closing-token)
"Scan STACK for an opening-token matching CLOSING-TOKEN."
(cl-case (parseclj-lex-token-type closing-token)
- (:rparen :lparen)
+ (:rparen (parseclj-lex-token-type
+ (seq-find (lambda (token)
+ (member (parseclj-lex-token-type token)
+ '(:lparen :lambda)))
+ stack)))
(:rbracket :lbracket)
(:rbrace (parseclj-lex-token-type
(seq-find (lambda (token)
@@ -192,6 +196,11 @@ functions. Additionally the following options are
recognized
;; (message "STACK: %S" stack)
;; (message "TOKEN: %S\n" token)
+ (when (and fail-fast (parseclj-lex-error-p token))
+ (parseclj--error "Invalid token at %s: %S"
+ (a-get token :pos)
+ (parseclj-lex-token-form token)))
+
;; Reduce based on the top item on the stack (collections)
(cond
((parseclj-lex-leaf-token-p token)
@@ -204,7 +213,7 @@ functions. Additionally the following options are recognized
;; Reduce based on top two items on the stack (special prefixed elements)
(let* ((top-value (parseclj--take-value stack value-p))
- (opening-token (parseclj--take-token (nthcdr (length top-value)
stack) value-p '(:discard :tag)))
+ (opening-token (parseclj--take-token (nthcdr (length top-value)
stack) value-p parseclj-lex--prefix-tokens))
new-stack)
(while (and top-value opening-token)
;; (message "Reducing...")
@@ -214,8 +223,25 @@ functions. Additionally the following options are
recognized
(setq new-stack (nthcdr (+ (length top-value) (length
opening-token)) stack))
(setq stack (funcall reduce-branch new-stack (car opening-token)
(append (cdr opening-token) top-value) options))
+ ;; recur
(setq top-value (parseclj--take-value stack value-p))
- (setq opening-token (parseclj--take-token (nthcdr (length top-value)
stack) value-p '(:discard :tag)))))
+ (setq opening-token (parseclj--take-token (nthcdr (length top-value)
stack) value-p parseclj-lex--prefix-tokens))))
+
+ ;; Reduce based on top three items on the stack (metadata, namespaced
maps)
+ (let* ((top-value-1 (parseclj--take-value stack value-p))
+ (top-value-2 (parseclj--take-value (nthcdr (length top-value-1)
stack) value-p))
+ (opening-token (parseclj--take-token (nthcdr (+ (length
top-value-1)
+ (length
top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))
+ new-stack)
+ (while (and top-value-1 top-value-2 opening-token)
+ (setq new-stack (nthcdr (apply #'+ (mapcar #'length (list
top-value-1 top-value-2 opening-token))) stack))
+ (setq stack (funcall reduce-branch new-stack (car opening-token)
(append (cdr opening-token) top-value-2 top-value-1) options))
+
+ ;; recur
+ (setq top-value-1 (parseclj--take-value stack value-p))
+ (setq top-value-2 (parseclj--take-value (nthcdr (length top-value-1)
stack) value-p))
+ (setq opening-token (parseclj--take-token (nthcdr (+ (length
top-value-1)
+ (length
top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))))
(setq token (parseclj-lex-next)))
diff --git a/test/parseclj-lex-test.el b/test/parseclj-lex-test.el
index 183c6caa42..289fa72026 100644
--- a/test/parseclj-lex-test.el
+++ b/test/parseclj-lex-test.el
@@ -52,6 +52,25 @@
(:form . "123e34M")
(:pos . 1)))))
+ (with-temp-buffer
+ (insert "0xff00AA")
+ (goto-char 1)
+ (should (equal (parseclj-lex-next) '((:token-type . :number)
+ (:form . "0xff00AA")
+ (:pos . 1)))))
+
+ (with-temp-buffer
+ (insert "#?(:clj 1 :cljs 2)")
+ (goto-char 1)
+ (should (equal (parseclj-lex-next)
+ '((:token-type . :reader-conditional) (:form . "#?") (:pos
. 1)))))
+
+ (with-temp-buffer
+ (insert "#?@(:clj [1] :cljs [2])")
+ (goto-char 1)
+ (should (equal (parseclj-lex-next)
+ '((:token-type . :reader-conditional-splice) (:form .
"#?@") (:pos . 1)))))
+
(with-temp-buffer
(insert "123x")
(goto-char 1)
@@ -203,12 +222,7 @@
(should (equal (parseclj-lex-next) (parseclj-lex-token :number "13" 18)))
(should (equal (parseclj-lex-next) (parseclj-lex-token :whitespace " "
20)))
(should (equal (parseclj-lex-next) (parseclj-lex-token :number "14" 21)))
- (should (equal (parseclj-lex-next) (parseclj-lex-token :rparen ")" 23))))
-
- (with-temp-buffer
- (insert "~")
- (goto-char 1)
- (should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error "~"
1)))))
+ (should (equal (parseclj-lex-next) (parseclj-lex-token :rparen ")" 23)))))
(ert-deftest parseclj-lex-test-at-number-p ()
(dolist (str '("123" ".9" "+1" "0" "-456"))
@@ -295,6 +309,12 @@
(goto-char 1)
(should (equal (parseclj-lex-string) (parseclj-lex-token :string
"\"abc\\\"\"" 1)))))
+(ert-deftest parseclj-lex-test-regex ()
+ (with-temp-buffer
+ (insert "#\"abc\"")
+ (goto-char 1)
+ (should (equal (parseclj-lex-next) (parseclj-lex-token :regex "#\"abc\""
1)))))
+
(ert-deftest parseclj-lex-test-tag ()
(with-temp-buffer
(insert "#inst")
@@ -306,6 +326,17 @@
(goto-char 1)
(should (equal (parseclj-lex-next) (parseclj-lex-token :tag "#foo/bar"
1)))))
+(ert-deftest parseclj-lex-test-quote ()
+ (with-temp-buffer
+ (insert "'foo")
+ (goto-char 1)
+ (should (equal (parseclj-lex-next) (parseclj-lex-token :quote "'" 1))))
+
+ (with-temp-buffer
+ (insert "`foo")
+ (goto-char 1)
+ (should (equal (parseclj-lex-next) (parseclj-lex-token :backquote "`"
1)))))
+
(provide 'parseclj-lex-test)
;;; parseclj-lex-test.el ends here
diff --git a/test/parseclj-test.el b/test/parseclj-test.el
index 190922abb1..a25d2e4e3b 100644
--- a/test/parseclj-test.el
+++ b/test/parseclj-test.el
@@ -185,6 +185,85 @@
((:node-type . :whitespace) (:position .
6) (:form . " "))
((:node-type . :number) (:position . 7)
(:form . "5") (:value . 5)))))))))
+(ert-deftest parseclj--parse-reader-conditionals-test ()
+ (should (equal
+ (parseclj-parse-clojure "#?(:clj 1 :cljs 2)")
+ '((:node-type . :root)
+ (:position . 1)
+ (:children ((:node-type . :reader-conditional)
+ (:position . 1)
+ (:children ((:node-type . :list)
+ (:position . 3)
+ (:children ((:node-type . :keyword)
+ (:position . 4)
+ (:form . ":clj")
+ (:value . :clj))
+ ((:node-type . :number)
+ (:position . 9)
+ (:form . "1")
+ (:value . 1))
+ ((:node-type . :keyword)
+ (:position . 11)
+ (:form . ":cljs")
+ (:value . :cljs))
+ ((:node-type . :number)
+ (:position . 17)
+ (:form . "2")
+ (:value . 2)))))))))))
+
+(ert-deftest parseclj--parse-metadata-test ()
+ (should (equal
+ (parseclj-parse-clojure "^{} [123]")
+ '((:node-type . :root)
+ (:position . 1)
+ (:children ((:node-type . :with-meta)
+ (:position . 1)
+ (:children ((:node-type . :map)
+ (:position . 2)
+ (:children))
+ ((:node-type . :vector)
+ (:position . 5)
+ (:children ((:node-type . :number)
+ (:position . 6)
+ (:form . "123")
+ (:value . 123)))))))))))
+
+(ert-deftest parseclj--parse-var-test ()
+ (should (equal
+ (parseclj-parse-clojure "#'foo")
+ '((:node-type . :root)
+ (:position . 1)
+ (:children ((:node-type . :var)
+ (:position . 1)
+ (:children ((:node-type . :symbol)
+ (:position . 3)
+ (:form . "foo")
+ (:value . foo)))))))))
+
+(ert-deftest parseclj--parse-lambda-test ()
+ (should (equal
+ (parseclj-parse-clojure "#(foo)")
+ '((:node-type . :root)
+ (:position . 1)
+ (:children ((:node-type . :lambda)
+ (:position . 1)
+ (:children ((:node-type . :symbol)
+ (:position . 3)
+ (:form . "foo")
+ (:value . foo)))))))))
+
+(ert-deftest parseclj--parse-namespaced-map-test ()
+ (should (equal
+ (parseclj-parse-clojure "#:foo.bar{}")
+ '((:node-type . :root)
+ (:position . 1)
+ (:children ((:map-prefix . ((:token-type . :map-prefix)
+ (:form . "#:foo.bar")
+ (:pos . 1)))
+ (:node-type . :map)
+ (:position . 10)
+ (:children)))))))
+
(ert-deftest parseclj--take-token-test ()
(should (equal
(parseclj--take-token
- [nongnu] elpa/parseclj 903d60284e 130/185: Update the Travis CI badge, (continued)
- [nongnu] elpa/parseclj 903d60284e 130/185: Update the Travis CI badge, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj d691df5d63 131/185: Update the copyright years, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj acf4a29778 132/185: Update the README, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj d42b4ad744 144/185: Require a couple of dependencies to `parseclj-ast.el`, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj c32017ecc9 143/185: Merge pull request #20 from clojure-emacs/remove-parseedn, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj ece9648128 179/185: Merge pull request #33 from clojure-emacs/arne/remove-a-from-tests, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj c7f50e3414 178/185: Update CHANGELOG, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 30c950a4ad 167/185: Use map-contains-key, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 0d157d759b 163/185: Merge branch 'master' into shebang-and-symbolic-values, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 089160a487 169/185: Drop a.el dependency, bump copyright year, bump version, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 93a0f43529 150/185: Merge pull request #21 from clojure-emacs/add-more-syntax-features,
ELPA Syncer <=
- [nongnu] elpa/parseclj adb55fa579 183/185: Merge pull request #34 from clojure-emacs/cl-case-to-cond, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj dec638c5ca 153/185: Tweak the keywords, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 5cf5cd5f53 182/185: Replace seq-doseq with mapcar... IDK, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 3de700b057 154/185: Add a changelog, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj f5f7ec1660 173/185: Release v1.0.1, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj b34d3e13a2 156/185: Support eval #=(foo...) forms, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj d659079598 174/185: Remove the remaining a.el dependency from non-test code, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 507720a632 170/185: Release 1.0, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 6d1c9c348a 184/185: Update CHANGELOG, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 61513d2c99 164/185: Merge pull request #27 from clojure-emacs/shebang-and-symbolic-values, ELPA Syncer, 2021/12/28