[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[nongnu] elpa/parseclj a9dba19760 086/185: Clean up node and token types
From: |
ELPA Syncer |
Subject: |
[nongnu] elpa/parseclj a9dba19760 086/185: Clean up node and token types |
Date: |
Tue, 28 Dec 2021 14:05:21 -0500 (EST) |
branch: elpa/parseclj
commit a9dba19760a3b9801883449893fcb312d7b0abb5
Author: Arne Brasseur <arne@arnebrasseur.net>
Commit: Arne Brasseur <arne@arnebrasseur.net>
Clean up node and token types
Make token keys keywords, as with nodes.
Use :token-type instead of just 'type, analogous with :node-type.
Provide similar helper functions for tokens and nodes.
---
parseclj-ast.el | 44 ++++++++++++++++++++++++++-----------
parseclj-lex.el | 49 +++++++++++++++++++++++++----------------
parseclj.el | 48 +++++++++++++++++++++--------------------
parseedn.el | 2 +-
test/parseclj-lex-test.el | 55 +++++++++++++++++++++++++----------------------
test/parseclj-test.el | 2 +-
6 files changed, 118 insertions(+), 82 deletions(-)
diff --git a/parseclj-ast.el b/parseclj-ast.el
index 29a0207d5e..a56cfd8c00 100644
--- a/parseclj-ast.el
+++ b/parseclj-ast.el
@@ -27,20 +27,38 @@
;;; Code:
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; Parser
+;; AST helper functions
+
+(defun parseclj-ast-node (type position &rest attributes)
+ "Create an AST node with given TYPE and POSITION.
+
+Other ATTRIBUTES can be given as a flat list of key-value pairs. "
+ (apply 'a-list :node-type type :position position attributes))
+
+(defun parseclj-ast-node? (node)
+ "Return `t' if the given NODE is a Clojure AST node."
+ (and (consp node)
+ (consp (car node))
+ (eq :node-type (caar node))))
+
+(defun parseclj-ast-node-type (node)
+ "Return the type of the AST node NODE."
+ (a-get node :node-type))
+
+(defun parseclj-ast-leaf-node? (node)
+ "Return `t' if the given ast NODE is a leaf node."
+ (member (parseclj-ast-node-type node) parseclj--leaf-tokens))
-(defun parseclj-ast--node (type position &rest kvs)
- (apply 'a-list ':node-type type ':position position kvs))
+;; Parse/reduce strategy functions
(defun parseclj-ast--reduce-leaf (stack token)
(if (member (parseclj-lex-token-type token) '(:whitespace :comment))
stack
(cons
- (parseclj-ast--node (parseclj-lex-token-type token)
- (a-get token 'pos)
- ':form (a-get token 'form)
- ':value (parseclj--leaf-token-value token))
+ (parseclj-ast-node (parseclj-lex-token-type token)
+ (a-get token :pos)
+ :form (a-get token :form)
+ :value (parseclj--leaf-token-value token))
stack)))
(defun parseclj-ast--reduce-leaf-with-lexical-preservation (stack token)
@@ -49,16 +67,16 @@
(if (member token-type '(:whitespace :comment))
;; merge consecutive whitespace or comment tokens
(if (eq token-type (a-get top :node-type))
- (cons (a-update top :form #'concat (a-get token 'form))
+ (cons (a-update top :form #'concat (a-get token :form))
(cdr stack))
- (cons (parseclj-ast--node (parseclj-lex-token-type token)
- (a-get token 'pos)
- ':form (a-get token 'form))
+ (cons (parseclj-ast-node (parseclj-lex-token-type token)
+ (a-get token :pos)
+ :form (a-get token :form))
stack))
(parseclj-ast--reduce-leaf stack token))))
(defun parseclj-ast--reduce-branch (stack opener-token children)
- (let* ((pos (a-get opener-token 'pos))
+ (let* ((pos (a-get opener-token :pos))
(type (parseclj-lex-token-type opener-token))
(type (cl-case type
(:lparen :list)
diff --git a/parseclj-lex.el b/parseclj-lex.el
index 9e72a2972c..df59881f67 100644
--- a/parseclj-lex.el
+++ b/parseclj-lex.el
@@ -25,26 +25,39 @@
;; A reader for EDN data files and parser for Clojure source files.
-(defun parseclj-lex-token (type form pos &rest args)
- `((type . ,type)
- (form . ,form)
- (pos . ,pos)
- ,@(mapcar (lambda (pair)
- (cons (car pair) (cadr pair)))
- (seq-partition args 2))))
+;;; Code
-(defun parseclj-lex-token? (token)
- (and (consp token) (consp (car token)) (eq 'type (caar token))))
+(defun parseclj-lex-token (type form pos &rest attributes)
+ "Create a lexer token with the specified attributes.
-(defun parseclj-lex-token-type (token)
- (and (listp token)
- (cdr (assq 'type token))))
+Tokens at a mimimum have these attributes
+- TYPE: the type of token, like :whitespace or :lparen
+- FORM: the source form, a string
+- POS: the position in the input, starts from 1 (like point)
+
+Other ATTRIBUTES can be given as a flat list of key-value pairs."
+ (apply 'a-list :token-type type :form form :pos pos attributes))
(defun parseclj-lex-token? (token)
- (and (listp token)
+ "Is the given TOKEN a parseclj-lex TOKEN.
+
+A token is an association list with :token-type as its first key. "
+ (and (consp token)
(consp (car token))
- (eq 'type (caar token))
- (not (listp (cdar token)))))
+ (eq :token-type (caar token))))
+
+(defun parseclj-lex-token-type (token)
+ "Get the type of TOKEN."
+ (and (consp token)
+ (cdr (assq :token-type token))))
+
+(defun parseclj-lex-leaf-token? (token)
+ "Return `t' if the given ast TOKEN is a leaf node."
+ (member (parseclj-lex-token-type token) parseclj--leaf-tokens))
+
+(defun parseclj-lex-closing-token? (token)
+ "Return `t' if the given ast TOKEN is a closing toking."
+ (member (parseclj-lex-token-type token) parseclj--closing-tokens))
(defun parseclj-lex-at-whitespace? ()
(let ((char (char-after (point))))
@@ -106,7 +119,7 @@
(parseclj-lex-token :lex-error
(buffer-substring-no-properties pos (point))
pos
- 'error-type :invalid-number-format))
+ :error-type :invalid-number-format))
(parseclj-lex-token :number
(buffer-substring-no-properties pos (point))
@@ -214,7 +227,7 @@ behavior."
(if (equal (char-after (point)) ?:) ;; three colons in a row => lex-error
(progn
(right-char)
- (parseclj-lex-token :lex-error (buffer-substring-no-properties pos
(point)) pos 'error-type :invalid-keyword))
+ (parseclj-lex-token :lex-error (buffer-substring-no-properties pos
(point)) pos :error-type :invalid-keyword))
(progn
(while (or (parseclj-lex-symbol-rest? (char-after (point)))
(equal (char-after (point)) ?#))
@@ -296,7 +309,7 @@ behavior."
(while (not (or (parseclj-lex-at-whitespace?)
(parseclj-lex-at-eof?)))
(right-char))
- (parseclj-lex-token :lex-error (buffer-substring-no-properties pos
(point)) pos 'error-type :invalid-hashtag-dispatcher)))))
+ (parseclj-lex-token :lex-error (buffer-substring-no-properties pos
(point)) pos :error-type :invalid-hashtag-dispatcher)))))
(t
(concat ":(" (char-to-string char)))))))
diff --git a/parseclj.el b/parseclj.el
index 306ceff31c..1d98452502 100644
--- a/parseclj.el
+++ b/parseclj.el
@@ -47,16 +47,12 @@
:keyword
:string
:character)
- "Tokens that represent leaf nodes in the AST.")
+ "Types of tokens that represent leaf nodes in the AST.")
-(defvar parseclj--closer-tokens '(:rparen
- :rbracket
- :rbrace)
- "Tokens that represent closing of an AST branch.")
-
-(defun parseclj--leaf? (node)
- "Return `t' if the given ast NODE is a leaf node."
- (member (a-get node ':node-type) parseclj--leaf-tokens))
+(defvar parseclj--closing-tokens '(:rparen
+ :rbracket
+ :rbrace)
+ "Types of tokens that mark the end of a non-atomic form.")
;; The EDN spec is not clear about wether \u0123 and \o012 are supported in
;; strings. They are described as character literals, but not as string escape
@@ -99,14 +95,14 @@
(defun parseclj--leaf-token-value (token)
(cl-case (parseclj-lex-token-type token)
- (:number (string-to-number (alist-get 'form token)))
+ (:number (string-to-number (alist-get :form token)))
(:nil nil)
(:true t)
(:false nil)
- (:symbol (intern (alist-get 'form token)))
- (:keyword (intern (alist-get 'form token)))
- (:string (parseclj--string (alist-get 'form token)))
- (:character (parseclj--character (alist-get 'form token)))))
+ (:symbol (intern (alist-get :form token)))
+ (:keyword (intern (alist-get :form token)))
+ (:string (parseclj--string (alist-get :form token)))
+ (:character (parseclj--character (alist-get :form token)))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Shift-Reduce Parser
@@ -125,7 +121,10 @@ can be handled with `condition-case'."
(:rparen :lparen)
(:rbracket :lbracket)
(:rbrace (parseclj-lex-token-type
- (seq-find (lambda (token) (member (parseclj-lex-token-type
token) '(:lbrace :set))) stack)))))
+ (seq-find (lambda (token)
+ (member (parseclj-lex-token-type token)
+ '(:lbrace :set)))
+ stack)))))
(defun parseclj--reduce-coll (stack closer-token reduce-branch options)
"Reduce collection based on the top of the stack"
@@ -140,13 +139,13 @@ can be handled with `condition-case'."
(when fail-fast
(when-let ((token (seq-find #'parseclj-lex-token? coll)))
(parseclj--error "parseclj: Syntax Error at position %s,
unmatched %S"
- (a-get token 'pos)
+ (a-get token :pos)
(parseclj-lex-token-type token))))
(funcall reduce-branch stack node coll))
(if fail-fast
(parseclj--error "parseclj: Syntax Error at position %s, unmatched
%S"
- (a-get closer-token 'pos)
+ (a-get closer-token :pos)
(parseclj-lex-token-type closer-token))
;; Unwound the stack without finding a matching paren: return the
original stack and continue parsing
(reverse coll)))))
@@ -183,11 +182,14 @@ functions.
;; (message "TOKEN: %S\n" token)
;; Reduce based on the top item on the stack (collections)
- (let ((token-type (parseclj-lex-token-type token)))
- (cond
- ((member token-type parseclj--leaf-tokens) (setf stack (funcall
reduce-leaf stack token)))
- ((member token-type parseclj--closer-tokens) (setf stack
(parseclj--reduce-coll stack token reduce-branch options)))
- (t (push token stack))))
+ (cond
+ ((parseclj-lex-leaf-token? token)
+ (setf stack (funcall reduce-leaf stack token)))
+
+ ((parseclj-lex-closing-token? token)
+ (setf stack (parseclj--reduce-coll stack token reduce-branch options)))
+
+ (t (push token stack)))
;; Reduce based on top two items on the stack (special prefixed elements)
(seq-let [top lookup] stack
@@ -200,7 +202,7 @@ functions.
(when fail-fast
(when-let ((token (seq-find #'parseclj-lex-token? stack)))
(parseclj--error "parseclj: Syntax Error at position %s, unmatched %S"
- (a-get token 'pos)
+ (a-get token :pos)
(parseclj-lex-token-type token))))
(funcall reduce-branch stack '((type . :root) (pos . 1)) (reverse stack))))
diff --git a/parseedn.el b/parseedn.el
index 510d5536c7..be15573824 100644
--- a/parseedn.el
+++ b/parseedn.el
@@ -62,7 +62,7 @@ handlers as an optional argument to the reader functions.")
(puthash (car pair) (cadr pair) hash-map))
kvs)
hash-map))
- (:tag (let* ((tag (intern (substring (a-get opener-token 'form) 1)))
+ (:tag (let* ((tag (intern (substring (a-get opening-token :form)
1)))
(reader (a-get tag-readers tag :missing)))
(when (eq :missing reader)
(user-error "No reader for tag #%S in %S" tag (a-keys
tag-readers)))
diff --git a/test/parseclj-lex-test.el b/test/parseclj-lex-test.el
index 56e5c195cb..6e96522fbc 100644
--- a/test/parseclj-lex-test.el
+++ b/test/parseclj-lex-test.el
@@ -34,73 +34,73 @@
(with-temp-buffer
(insert "()")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :lparen) (form . "(") (pos .
1))))
- (should (equal (parseclj-lex-next) '((type . :rparen) (form . ")") (pos .
2))))
- (should (equal (parseclj-lex-next) '((type . :eof) (form . nil) (pos .
3)))))
+ (should (equal (parseclj-lex-next) '((:token-type . :lparen) (:form . "(")
(:pos . 1))))
+ (should (equal (parseclj-lex-next) '((:token-type . :rparen) (:form . ")")
(:pos . 2))))
+ (should (equal (parseclj-lex-next) '((:token-type . :eof) (:form . nil)
(:pos . 3)))))
(with-temp-buffer
(insert "123")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :number)
- (form . "123")
- (pos . 1)))))
+ (should (equal (parseclj-lex-next) '((:token-type . :number)
+ (:form . "123")
+ (:pos . 1)))))
(with-temp-buffer
(insert "123e34M")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :number)
- (form . "123e34M")
- (pos . 1)))))
+ (should (equal (parseclj-lex-next) '((:token-type . :number)
+ (:form . "123e34M")
+ (:pos . 1)))))
(with-temp-buffer
(insert "123x")
(goto-char 1)
- (should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error "123x" 1
'error-type :invalid-number-format))))
+ (should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error "123x" 1
:error-type :invalid-number-format))))
(with-temp-buffer
(insert " \t \n")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :whitespace) (form . " \t
\n") (pos . 1)))))
+ (should (equal (parseclj-lex-next) '((:token-type . :whitespace) (:form .
" \t \n") (:pos . 1)))))
(with-temp-buffer
(insert "nil")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :nil) (form . "nil") (pos .
1)))))
+ (should (equal (parseclj-lex-next) '((:token-type . :nil) (:form . "nil")
(:pos . 1)))))
(with-temp-buffer
(insert "true")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :true) (form . "true") (pos .
1)))))
+ (should (equal (parseclj-lex-next) '((:token-type . :true) (:form .
"true") (:pos . 1)))))
(with-temp-buffer
(insert "false")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :false) (form . "false") (pos
. 1)))))
+ (should (equal (parseclj-lex-next) '((:token-type . :false) (:form .
"false") (:pos . 1)))))
(with-temp-buffer
(insert "hello-world")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :symbol) (form .
"hello-world") (pos . 1)))))
+ (should (equal (parseclj-lex-next) '((:token-type . :symbol) (:form .
"hello-world") (:pos . 1)))))
(with-temp-buffer
(insert "-hello-world")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :symbol) (form .
"-hello-world") (pos . 1)))))
+ (should (equal (parseclj-lex-next) '((:token-type . :symbol) (:form .
"-hello-world") (:pos . 1)))))
(with-temp-buffer
(insert "foo#")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :symbol) (form . "foo#") (pos
. 1)))))
+ (should (equal (parseclj-lex-next) '((:token-type . :symbol) (:form .
"foo#") (:pos . 1)))))
(with-temp-buffer
(insert "#inst")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :tag) (form . "#inst") (pos .
1)))))
+ (should (equal (parseclj-lex-next) '((:token-type . :tag) (:form .
"#inst") (:pos . 1)))))
(with-temp-buffer
(insert "#qualified/tag")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :tag) (form .
"#qualified/tag") (pos . 1)))))
+ (should (equal (parseclj-lex-next) '((:token-type . :tag) (:form .
"#qualified/tag") (:pos . 1)))))
(with-temp-buffer
(insert "\\newline\\return\\space\\tab\\a\\b\\c")
@@ -153,7 +153,7 @@
(with-temp-buffer
(insert ":::hello-world")
(goto-char 1)
- (should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error ":::" 1
'error-type :invalid-keyword))))
+ (should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error ":::" 1
:error-type :invalid-keyword))))
(with-temp-buffer
(insert "[123]")
@@ -212,9 +212,9 @@
(ert-deftest parseclj-lex-test-token ()
(should (equal (parseclj-lex-token :whitespace ",,," 10)
- '((type . :whitespace)
- (form . ",,,")
- (pos . 10)))))
+ '((:token-type . :whitespace)
+ (:form . ",,,")
+ (:pos . 10)))))
(ert-deftest parseclj-lex-test-digit? ()
(should (equal (parseclj-lex-digit? ?0) t))
@@ -251,17 +251,20 @@
(with-temp-buffer
(insert "#.not-a-tag")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :lex-error) (form .
"#.not-a-tag") (pos . 1) (error-type . :invalid-hashtag-dispatcher)))))
+ (should (equal (parseclj-lex-next)
+ (parseclj-lex-token :lex-error "#.not-a-tag" 1 :error-type
:invalid-hashtag-dispatcher))))
(with-temp-buffer
(insert "#-not-a-tag")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :lex-error) (form .
"#-not-a-tag") (pos . 1) (error-type . :invalid-hashtag-dispatcher)))))
+ (should (equal (parseclj-lex-next)
+ (parseclj-lex-token :lex-error "#-not-a-tag" 1 :error-type
:invalid-hashtag-dispatcher))))
(with-temp-buffer
(insert "#+not-a-tag")
(goto-char 1)
- (should (equal (parseclj-lex-next) '((type . :lex-error) (form .
"#+not-a-tag") (pos . 1) (error-type . :invalid-hashtag-dispatcher))))))
+ (should (equal (parseclj-lex-next)
+ (parseclj-lex-token :lex-error "#+not-a-tag" 1 :error-type
:invalid-hashtag-dispatcher)))))
(ert-deftest parseclj-lex-test-string ()
(with-temp-buffer
diff --git a/test/parseclj-test.el b/test/parseclj-test.el
index 39658d642a..35f563c8ff 100644
--- a/test/parseclj-test.el
+++ b/test/parseclj-test.el
@@ -138,7 +138,7 @@
((:node-type . :map)
(:position . 7)
(:children ((:node-type
. :number) (:position . 8) (:form . "3") (:value . 3))
- ((type .
:lparen) (form . "(") (pos . 10))
+
((:token-type . :lparen) (:form . "(") (:pos . 10))
((:node-type
. :number) (:position . 12) (:form . "4") (:value . 4))))))))))))
;; TODO: uneven map forms
- [nongnu] elpa/parseclj 62c9bf9126 177/185: Remove the last remains of a.el, restructure requires, (continued)
- [nongnu] elpa/parseclj 62c9bf9126 177/185: Remove the last remains of a.el, restructure requires, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 517a371616 166/185: Use map-elt instead of parseclj-alist-get, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj ba04dbe334 171/185: Remove remaining a-list call, move a.el to be test-only, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 48abe456c6 155/185: Update the installation instructions, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj b234692020 157/185: error on unmatched closing paren/brace, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 1bb3800f8f 162/185: Update CHANGELOG, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj fcebf65075 180/185: Provide parseclj-alist-merge for older Emacsen, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj eff9411268 161/185: Merge pull request #26 from ikappaki/master, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 689ebddbdd 158/185: Add support for shebang and symbolic values, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 1e46607912 015/185: Split files into packages, More test setup, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj a9dba19760 086/185: Clean up node and token types,
ELPA Syncer <=
- [nongnu] elpa/parseclj 9480ae09b3 068/185: push is destructive, in this case cons will do, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj f395b9cbcc 097/185: Move `parseclj--leaf-token-value` to `parseedn` module, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 91dd43667c 110/185: Fix `parseclj-ast--reduce-branch` for tags., ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 9a586f267d 103/185: Remove `parseedn` requirement from `parseclj`, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj e65eb085ad 114/185: Remove duplicated test, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 811f35e05a 117/185: Loops reduction over the first 2 elements of the stack, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 5b4b222b4f 124/185: Return error token when there's invalid input in `parseclj-lex-next`, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj b26fadbc05 128/185: Get rid of `parseclj-lex-error-token` side-effect, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 2ffadc6239 134/185: Mark OPTIONS as unused in `parseedn-reduce-leaf`, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 45cd754c32 140/185: Remove parseedn files, ELPA Syncer, 2021/12/28