emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[nongnu] elpa/parseclj a9dba19760 086/185: Clean up node and token types


From: ELPA Syncer
Subject: [nongnu] elpa/parseclj a9dba19760 086/185: Clean up node and token types
Date: Tue, 28 Dec 2021 14:05:21 -0500 (EST)

branch: elpa/parseclj
commit a9dba19760a3b9801883449893fcb312d7b0abb5
Author: Arne Brasseur <arne@arnebrasseur.net>
Commit: Arne Brasseur <arne@arnebrasseur.net>

    Clean up node and token types
    
    Make token keys keywords, as with nodes.
    Use :token-type instead of just 'type, analogous with :node-type.
    Provide similar helper functions for tokens and nodes.
---
 parseclj-ast.el           | 44 ++++++++++++++++++++++++++-----------
 parseclj-lex.el           | 49 +++++++++++++++++++++++++----------------
 parseclj.el               | 48 +++++++++++++++++++++--------------------
 parseedn.el               |  2 +-
 test/parseclj-lex-test.el | 55 +++++++++++++++++++++++++----------------------
 test/parseclj-test.el     |  2 +-
 6 files changed, 118 insertions(+), 82 deletions(-)

diff --git a/parseclj-ast.el b/parseclj-ast.el
index 29a0207d5e..a56cfd8c00 100644
--- a/parseclj-ast.el
+++ b/parseclj-ast.el
@@ -27,20 +27,38 @@
 
 ;;; Code:
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; Parser
+;; AST helper functions
+
+(defun parseclj-ast-node (type position &rest attributes)
+  "Create an AST node with given TYPE and POSITION.
+
+Other ATTRIBUTES can be given as a flat list of key-value pairs. "
+  (apply 'a-list :node-type type :position position attributes))
+
+(defun parseclj-ast-node? (node)
+  "Return `t' if the given NODE is a Clojure AST node."
+  (and (consp node)
+       (consp (car node))
+       (eq :node-type (caar node))))
+
+(defun parseclj-ast-node-type (node)
+  "Return the type of the AST node NODE."
+  (a-get node :node-type))
+
+(defun parseclj-ast-leaf-node? (node)
+  "Return `t' if the given ast NODE is a leaf node."
+  (member (parseclj-ast-node-type node) parseclj--leaf-tokens))
 
-(defun parseclj-ast--node (type position &rest kvs)
-  (apply 'a-list ':node-type type ':position position kvs))
+;; Parse/reduce strategy functions
 
 (defun parseclj-ast--reduce-leaf (stack token)
   (if (member (parseclj-lex-token-type token) '(:whitespace :comment))
       stack
     (cons
-     (parseclj-ast--node (parseclj-lex-token-type token)
-                         (a-get token 'pos)
-                         ':form (a-get token 'form)
-                         ':value (parseclj--leaf-token-value token))
+     (parseclj-ast-node (parseclj-lex-token-type token)
+                        (a-get token :pos)
+                        :form (a-get token :form)
+                        :value (parseclj--leaf-token-value token))
      stack)))
 
 (defun parseclj-ast--reduce-leaf-with-lexical-preservation (stack token)
@@ -49,16 +67,16 @@
     (if (member token-type '(:whitespace :comment))
         ;; merge consecutive whitespace or comment tokens
         (if (eq token-type (a-get top :node-type))
-            (cons (a-update top :form #'concat (a-get token 'form))
+            (cons (a-update top :form #'concat (a-get token :form))
                   (cdr stack))
-          (cons (parseclj-ast--node (parseclj-lex-token-type token)
-                                    (a-get token 'pos)
-                                    ':form (a-get token 'form))
+          (cons (parseclj-ast-node (parseclj-lex-token-type token)
+                                   (a-get token :pos)
+                                   :form (a-get token :form))
                 stack))
       (parseclj-ast--reduce-leaf stack token))))
 
 (defun parseclj-ast--reduce-branch (stack opener-token children)
-  (let* ((pos (a-get opener-token 'pos))
+  (let* ((pos (a-get opener-token :pos))
          (type (parseclj-lex-token-type opener-token))
          (type (cl-case type
                  (:lparen :list)
diff --git a/parseclj-lex.el b/parseclj-lex.el
index 9e72a2972c..df59881f67 100644
--- a/parseclj-lex.el
+++ b/parseclj-lex.el
@@ -25,26 +25,39 @@
 
 ;; A reader for EDN data files and parser for Clojure source files.
 
-(defun parseclj-lex-token (type form pos &rest args)
-  `((type . ,type)
-    (form . ,form)
-    (pos  . ,pos)
-    ,@(mapcar (lambda (pair)
-                (cons (car pair) (cadr pair)))
-              (seq-partition args 2))))
+;;; Code
 
-(defun parseclj-lex-token? (token)
-  (and (consp token) (consp (car token)) (eq 'type (caar token))))
+(defun parseclj-lex-token (type form pos &rest attributes)
+  "Create a lexer token with the specified attributes.
 
-(defun parseclj-lex-token-type (token)
-  (and (listp token)
-       (cdr (assq 'type token))))
+Tokens at a mimimum have these attributes
+- TYPE: the type of token, like :whitespace or :lparen
+- FORM: the source form, a string
+- POS: the position in the input, starts from 1 (like point)
+
+Other ATTRIBUTES can be given as a flat list of key-value pairs."
+  (apply 'a-list :token-type type :form form :pos pos attributes))
 
 (defun parseclj-lex-token? (token)
-  (and (listp token)
+  "Is the given TOKEN a parseclj-lex TOKEN.
+
+A token is an association list with :token-type as its first key. "
+  (and (consp token)
        (consp (car token))
-       (eq 'type (caar token))
-       (not (listp (cdar token)))))
+       (eq :token-type (caar token))))
+
+(defun parseclj-lex-token-type (token)
+  "Get the type of TOKEN."
+  (and (consp token)
+       (cdr (assq :token-type token))))
+
+(defun parseclj-lex-leaf-token? (token)
+  "Return `t' if the given ast TOKEN is a leaf node."
+  (member (parseclj-lex-token-type token) parseclj--leaf-tokens))
+
+(defun parseclj-lex-closing-token? (token)
+  "Return `t' if the given ast TOKEN is a closing toking."
+  (member (parseclj-lex-token-type token) parseclj--closing-tokens))
 
 (defun parseclj-lex-at-whitespace? ()
   (let ((char (char-after (point))))
@@ -106,7 +119,7 @@
             (parseclj-lex-token :lex-error
                            (buffer-substring-no-properties pos (point))
                            pos
-                           'error-type :invalid-number-format))
+                           :error-type :invalid-number-format))
 
         (parseclj-lex-token :number
                        (buffer-substring-no-properties pos (point))
@@ -214,7 +227,7 @@ behavior."
     (if (equal (char-after (point)) ?:) ;; three colons in a row => lex-error
         (progn
           (right-char)
-          (parseclj-lex-token :lex-error (buffer-substring-no-properties pos 
(point)) pos 'error-type :invalid-keyword))
+          (parseclj-lex-token :lex-error (buffer-substring-no-properties pos 
(point)) pos :error-type :invalid-keyword))
       (progn
         (while (or (parseclj-lex-symbol-rest? (char-after (point)))
                    (equal (char-after (point)) ?#))
@@ -296,7 +309,7 @@ behavior."
             (while (not (or (parseclj-lex-at-whitespace?)
                             (parseclj-lex-at-eof?)))
               (right-char))
-            (parseclj-lex-token :lex-error (buffer-substring-no-properties pos 
(point)) pos 'error-type :invalid-hashtag-dispatcher)))))
+            (parseclj-lex-token :lex-error (buffer-substring-no-properties pos 
(point)) pos :error-type :invalid-hashtag-dispatcher)))))
 
        (t
         (concat ":(" (char-to-string char)))))))
diff --git a/parseclj.el b/parseclj.el
index 306ceff31c..1d98452502 100644
--- a/parseclj.el
+++ b/parseclj.el
@@ -47,16 +47,12 @@
                                 :keyword
                                 :string
                                 :character)
-  "Tokens that represent leaf nodes in the AST.")
+  "Types of tokens that represent leaf nodes in the AST.")
 
-(defvar parseclj--closer-tokens '(:rparen
-                                  :rbracket
-                                  :rbrace)
-  "Tokens that represent closing of an AST branch.")
-
-(defun parseclj--leaf? (node)
-  "Return `t' if the given ast NODE is a leaf node."
-  (member (a-get node ':node-type) parseclj--leaf-tokens))
+(defvar parseclj--closing-tokens '(:rparen
+                                   :rbracket
+                                   :rbrace)
+  "Types of tokens that mark the end of a non-atomic form.")
 
 ;; The EDN spec is not clear about wether \u0123 and \o012 are supported in
 ;; strings. They are described as character literals, but not as string escape
@@ -99,14 +95,14 @@
 
 (defun parseclj--leaf-token-value (token)
   (cl-case (parseclj-lex-token-type token)
-    (:number (string-to-number (alist-get 'form token)))
+    (:number (string-to-number (alist-get :form token)))
     (:nil nil)
     (:true t)
     (:false nil)
-    (:symbol (intern (alist-get 'form token)))
-    (:keyword (intern (alist-get 'form token)))
-    (:string (parseclj--string (alist-get 'form token)))
-    (:character (parseclj--character (alist-get 'form token)))))
+    (:symbol (intern (alist-get :form token)))
+    (:keyword (intern (alist-get :form token)))
+    (:string (parseclj--string (alist-get :form token)))
+    (:character (parseclj--character (alist-get :form token)))))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;; Shift-Reduce Parser
@@ -125,7 +121,10 @@ can be handled with `condition-case'."
     (:rparen :lparen)
     (:rbracket :lbracket)
     (:rbrace (parseclj-lex-token-type
-              (seq-find (lambda (token) (member (parseclj-lex-token-type 
token) '(:lbrace :set))) stack)))))
+              (seq-find (lambda (token)
+                          (member (parseclj-lex-token-type token)
+                                  '(:lbrace :set)))
+                        stack)))))
 
 (defun parseclj--reduce-coll (stack closer-token reduce-branch options)
   "Reduce collection based on the top of the stack"
@@ -140,13 +139,13 @@ can be handled with `condition-case'."
           (when fail-fast
             (when-let ((token (seq-find #'parseclj-lex-token? coll)))
               (parseclj--error "parseclj: Syntax Error at position %s, 
unmatched %S"
-                               (a-get token 'pos)
+                               (a-get token :pos)
                                (parseclj-lex-token-type token))))
           (funcall reduce-branch stack node coll))
 
       (if fail-fast
           (parseclj--error "parseclj: Syntax Error at position %s, unmatched 
%S"
-                           (a-get closer-token 'pos)
+                           (a-get closer-token :pos)
                            (parseclj-lex-token-type closer-token))
         ;; Unwound the stack without finding a matching paren: return the 
original stack and continue parsing
         (reverse coll)))))
@@ -183,11 +182,14 @@ functions.
       ;; (message "TOKEN: %S\n" token)
 
       ;; Reduce based on the top item on the stack (collections)
-      (let ((token-type (parseclj-lex-token-type token)))
-        (cond
-         ((member token-type parseclj--leaf-tokens) (setf stack (funcall 
reduce-leaf stack token)))
-         ((member token-type parseclj--closer-tokens) (setf stack 
(parseclj--reduce-coll stack token reduce-branch options)))
-         (t (push token stack))))
+      (cond
+       ((parseclj-lex-leaf-token? token)
+        (setf stack (funcall reduce-leaf stack token)))
+
+       ((parseclj-lex-closing-token? token)
+        (setf stack (parseclj--reduce-coll stack token reduce-branch options)))
+
+       (t (push token stack)))
 
       ;; Reduce based on top two items on the stack (special prefixed elements)
       (seq-let [top lookup] stack
@@ -200,7 +202,7 @@ functions.
     (when fail-fast
       (when-let ((token (seq-find #'parseclj-lex-token? stack)))
         (parseclj--error "parseclj: Syntax Error at position %s, unmatched %S"
-                         (a-get token 'pos)
+                         (a-get token :pos)
                          (parseclj-lex-token-type token))))
 
     (funcall reduce-branch stack '((type . :root) (pos . 1)) (reverse stack))))
diff --git a/parseedn.el b/parseedn.el
index 510d5536c7..be15573824 100644
--- a/parseedn.el
+++ b/parseedn.el
@@ -62,7 +62,7 @@ handlers as an optional argument to the reader functions.")
                                 (puthash (car pair) (cadr pair) hash-map))
                               kvs)
                       hash-map))
-           (:tag (let* ((tag (intern (substring (a-get opener-token 'form) 1)))
+           (:tag (let* ((tag (intern (substring (a-get opening-token :form) 
1)))
                         (reader (a-get tag-readers tag :missing)))
                    (when (eq :missing reader)
                      (user-error "No reader for tag #%S in %S" tag (a-keys 
tag-readers)))
diff --git a/test/parseclj-lex-test.el b/test/parseclj-lex-test.el
index 56e5c195cb..6e96522fbc 100644
--- a/test/parseclj-lex-test.el
+++ b/test/parseclj-lex-test.el
@@ -34,73 +34,73 @@
   (with-temp-buffer
     (insert "()")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :lparen) (form . "(") (pos . 
1))))
-    (should (equal (parseclj-lex-next) '((type . :rparen) (form . ")") (pos . 
2))))
-    (should (equal (parseclj-lex-next) '((type . :eof) (form . nil) (pos . 
3)))))
+    (should (equal (parseclj-lex-next) '((:token-type . :lparen) (:form . "(") 
(:pos . 1))))
+    (should (equal (parseclj-lex-next) '((:token-type . :rparen) (:form . ")") 
(:pos . 2))))
+    (should (equal (parseclj-lex-next) '((:token-type . :eof) (:form . nil) 
(:pos . 3)))))
 
   (with-temp-buffer
     (insert "123")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :number)
-                                    (form . "123")
-                                    (pos . 1)))))
+    (should (equal (parseclj-lex-next) '((:token-type . :number)
+                                         (:form . "123")
+                                         (:pos . 1)))))
 
   (with-temp-buffer
     (insert "123e34M")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :number)
-                                    (form . "123e34M")
-                                    (pos . 1)))))
+    (should (equal (parseclj-lex-next) '((:token-type . :number)
+                                         (:form . "123e34M")
+                                         (:pos . 1)))))
 
   (with-temp-buffer
     (insert "123x")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error "123x" 1 
'error-type :invalid-number-format))))
+    (should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error "123x" 1 
:error-type :invalid-number-format))))
 
   (with-temp-buffer
     (insert " \t  \n")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :whitespace) (form . " \t  
\n") (pos . 1)))))
+    (should (equal (parseclj-lex-next) '((:token-type . :whitespace) (:form . 
" \t  \n") (:pos . 1)))))
 
   (with-temp-buffer
     (insert "nil")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :nil) (form . "nil") (pos . 
1)))))
+    (should (equal (parseclj-lex-next) '((:token-type . :nil) (:form . "nil") 
(:pos . 1)))))
 
   (with-temp-buffer
     (insert "true")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :true) (form . "true") (pos . 
1)))))
+    (should (equal (parseclj-lex-next) '((:token-type . :true) (:form . 
"true") (:pos . 1)))))
 
   (with-temp-buffer
     (insert "false")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :false) (form . "false") (pos 
. 1)))))
+    (should (equal (parseclj-lex-next) '((:token-type . :false) (:form . 
"false") (:pos . 1)))))
 
   (with-temp-buffer
     (insert "hello-world")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :symbol) (form . 
"hello-world") (pos . 1)))))
+    (should (equal (parseclj-lex-next) '((:token-type . :symbol) (:form . 
"hello-world") (:pos . 1)))))
 
   (with-temp-buffer
     (insert "-hello-world")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :symbol) (form . 
"-hello-world") (pos . 1)))))
+    (should (equal (parseclj-lex-next) '((:token-type . :symbol) (:form . 
"-hello-world") (:pos . 1)))))
 
   (with-temp-buffer
     (insert "foo#")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :symbol) (form . "foo#") (pos 
. 1)))))
+    (should (equal (parseclj-lex-next) '((:token-type . :symbol) (:form . 
"foo#") (:pos . 1)))))
 
   (with-temp-buffer
     (insert "#inst")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :tag) (form . "#inst") (pos . 
1)))))
+    (should (equal (parseclj-lex-next) '((:token-type . :tag) (:form . 
"#inst") (:pos . 1)))))
 
   (with-temp-buffer
     (insert "#qualified/tag")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :tag) (form . 
"#qualified/tag") (pos . 1)))))
+    (should (equal (parseclj-lex-next) '((:token-type . :tag) (:form . 
"#qualified/tag") (:pos . 1)))))
 
   (with-temp-buffer
     (insert "\\newline\\return\\space\\tab\\a\\b\\c")
@@ -153,7 +153,7 @@
   (with-temp-buffer
     (insert ":::hello-world")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error ":::" 1 
'error-type :invalid-keyword))))
+    (should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error ":::" 1 
:error-type :invalid-keyword))))
 
   (with-temp-buffer
     (insert "[123]")
@@ -212,9 +212,9 @@
 
 (ert-deftest parseclj-lex-test-token ()
   (should (equal (parseclj-lex-token :whitespace ",,," 10)
-                 '((type . :whitespace)
-                   (form . ",,,")
-                   (pos . 10)))))
+                 '((:token-type . :whitespace)
+                   (:form . ",,,")
+                   (:pos . 10)))))
 
 (ert-deftest parseclj-lex-test-digit? ()
   (should (equal (parseclj-lex-digit? ?0) t))
@@ -251,17 +251,20 @@
   (with-temp-buffer
     (insert "#.not-a-tag")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :lex-error) (form . 
"#.not-a-tag") (pos . 1) (error-type . :invalid-hashtag-dispatcher)))))
+    (should (equal (parseclj-lex-next)
+                   (parseclj-lex-token :lex-error "#.not-a-tag" 1 :error-type 
:invalid-hashtag-dispatcher))))
 
   (with-temp-buffer
     (insert "#-not-a-tag")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :lex-error) (form . 
"#-not-a-tag") (pos . 1) (error-type . :invalid-hashtag-dispatcher)))))
+    (should (equal (parseclj-lex-next)
+                   (parseclj-lex-token :lex-error "#-not-a-tag" 1 :error-type 
:invalid-hashtag-dispatcher))))
 
   (with-temp-buffer
     (insert "#+not-a-tag")
     (goto-char 1)
-    (should (equal (parseclj-lex-next) '((type . :lex-error) (form . 
"#+not-a-tag") (pos . 1) (error-type . :invalid-hashtag-dispatcher))))))
+    (should (equal (parseclj-lex-next)
+                   (parseclj-lex-token :lex-error "#+not-a-tag" 1 :error-type 
:invalid-hashtag-dispatcher)))))
 
 (ert-deftest parseclj-lex-test-string ()
   (with-temp-buffer
diff --git a/test/parseclj-test.el b/test/parseclj-test.el
index 39658d642a..35f563c8ff 100644
--- a/test/parseclj-test.el
+++ b/test/parseclj-test.el
@@ -138,7 +138,7 @@
                                                       ((:node-type . :map)
                                                        (:position . 7)
                                                        (:children ((:node-type 
. :number) (:position . 8) (:form . "3") (:value . 3))
-                                                                  ((type . 
:lparen) (form . "(") (pos . 10))
+                                                                  
((:token-type . :lparen) (:form . "(") (:pos . 10))
                                                                   ((:node-type 
. :number) (:position . 12) (:form . "4") (:value . 4))))))))))))
 
   ;; TODO: uneven map forms



reply via email to

[Prev in Thread] Current Thread [Next in Thread]