emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[nongnu] elpa/parseclj 1be462e0f2 108/185: Move parser to its own module


From: ELPA Syncer
Subject: [nongnu] elpa/parseclj 1be462e0f2 108/185: Move parser to its own module
Date: Tue, 28 Dec 2021 14:05:25 -0500 (EST)

branch: elpa/parseclj
commit 1be462e0f2b247c22fe89aa4c7555d90c100d3e1
Author: Daniel Barreto <daniel.barreto.n@gmail.com>
Commit: Daniel Barreto <daniel.barreto.n@gmail.com>

    Move parser to its own module
    
    This commit also moves `leaf-token-value` to `parseclj-lex`
---
 parseclj-ast.el                   |   2 +-
 parseclj-lex.el                   |  53 +++++++++
 parseclj.el => parseclj-parser.el |  81 ++------------
 parseclj.el                       | 218 ++------------------------------------
 parseedn.el                       |  56 +---------
 5 files changed, 78 insertions(+), 332 deletions(-)

diff --git a/parseclj-ast.el b/parseclj-ast.el
index 51b00be35a..9d16549806 100644
--- a/parseclj-ast.el
+++ b/parseclj-ast.el
@@ -67,7 +67,7 @@ on available options."
      (parseclj-ast-node (parseclj-lex-token-type token)
                         (a-get token :pos)
                         :form (a-get token :form)
-                        :value (parseedn--leaf-token-value token))
+                        :value (parseclj-lex--leaf-token-value token))
      stack)))
 
 (defun parseclj-ast--reduce-leaf-with-lexical-preservation (stack token 
options)
diff --git a/parseclj-lex.el b/parseclj-lex.el
index dc63737336..d835457903 100644
--- a/parseclj-lex.el
+++ b/parseclj-lex.el
@@ -44,6 +44,7 @@
                                        :rbrace)
   "Types of tokens that mark the end of a non-atomic form.")
 
+;; Token interface
 
 (defun parseclj-lex-token (type form pos &rest attributes)
   "Create a lexer token with the specified attributes.
@@ -77,6 +78,58 @@ A token is an association list with :token-type as its first 
key."
   "Return t if the given ast TOKEN is a closing token."
   (member (parseclj-lex-token-type token) parseclj-lex--closing-tokens))
 
+
+;; Elisp values from tokens
+
+(defun parseclj-lex--string-value (s)
+  ""
+  (replace-regexp-in-string
+   "\\\\o[0-8]\\{3\\}"
+   (lambda (x)
+     (make-string 1 (string-to-number (substring x 2) 8) ))
+   (replace-regexp-in-string
+    "\\\\u[0-9a-fA-F]\\{4\\}"
+    (lambda (x)
+      (make-string 1 (string-to-number (substring x 2) 16)))
+    (replace-regexp-in-string "\\\\[tbnrf'\"\\]"
+                              (lambda (x)
+                                (cl-case (elt x 1)
+                                  (?t "\t")
+                                  (?f "\f")
+                                  (?\" "\"")
+                                  (?r "\r")
+                                  (?n "\n")
+                                  (?\\ "\\\\")
+                                  (t (substring x 1))))
+                              (substring s 1 -1)))))
+
+(defun parseclj-lex--character-value (c)
+  "Parse a EDN character C into an Emacs Lisp character."
+  (let ((first-char (elt c 1)))
+    (cond
+     ((equal c "\\newline") ?\n)
+     ((equal c "\\return") ?\r)
+     ((equal c "\\space") ?\ )
+     ((equal c "\\tab") ?\t)
+     ((eq first-char ?u) (string-to-number (substring c 2) 16))
+     ((eq first-char ?o) (string-to-number (substring c 2) 8))
+     (t first-char))))
+
+(defun parseclj-lex--leaf-token-value (token)
+  "Parse the given leaf TOKEN to an Emacs Lisp value."
+  (cl-case (parseclj-lex-token-type token)
+    (:number (string-to-number (alist-get :form token)))
+    (:nil nil)
+    (:true t)
+    (:false nil)
+    (:symbol (intern (alist-get :form token)))
+    (:keyword (intern (alist-get :form token)))
+    (:string (parseclj-lex--string-value (alist-get :form token)))
+    (:character (parseclj-lex--character-value (alist-get :form token)))))
+
+
+;; Stream tokenization
+
 (defun parseclj-lex-at-whitespace-p ()
   "Return t if char at point is white space."
   (let ((char (char-after (point))))
diff --git a/parseclj.el b/parseclj-parser.el
similarity index 74%
copy from parseclj.el
copy to parseclj-parser.el
index fd2d8e32ff..1e07d2d0a7 100644
--- a/parseclj.el
+++ b/parseclj-parser.el
@@ -1,4 +1,4 @@
-;;; parseclj.el --- Clojure/EDN parser              -*- lexical-binding: t; -*-
+;;; parseclj-parser.el --- Clojure/EDN parser              -*- 
lexical-binding: t; -*-
 
 ;; Copyright (C) 2017  Arne Brasseur
 
@@ -26,7 +26,7 @@
 
 ;;; Commentary:
 
-;; A reader for EDN data files and parser for Clojure source files.
+;; A shift/reduce parser for Clojure source.
 
 ;;; Code:
 
@@ -34,19 +34,15 @@
 (require 'subr-x)
 (require 'a)
 (require 'parseclj-lex)
-(require 'parseclj-ast)
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; Shift-Reduce Parser
-
-(define-error 'parseclj-parse-error "parseclj: Syntax error")
+(define-error 'parseclj-parser-error "parseclj: Syntax error")
 
 (defun parseclj--error (format &rest args)
   "Signal a parse error.
 Takes a FORMAT string and optional ARGS to be passed to
-`format-message'.  Signals a 'parseclj-parse-error signal, which
+`format-message'.  Signals a 'parseclj-parser-error signal, which
 can be handled with `condition-case'."
-  (signal 'parseclj-parse-error (list (apply #'format-message format args))))
+  (signal 'parseclj-parser-error (list (apply #'format-message format args))))
 
 (defun parseclj--find-opening-token (stack closing-token)
   "Scan STACK for an opening-token matching CLOSING-TOKEN."
@@ -68,7 +64,7 @@ CLOSING-TOKEN.  This function should return an AST token 
representing such
 collection.
 
 OPTIONS is an association list.  This list is also passed down to the
-REDUCE-BRANCH function.  See `parseclj-parse' for more information on
+REDUCE-BRANCH function.  See `parseclj-parser' for more information on
 available options."
   (let ((opening-token-type (parseclj--find-opening-token stack closing-token))
         (fail-fast (a-get options :fail-fast t))
@@ -149,7 +145,7 @@ TOKEN-TYPES are the token types to look for."
          (t
           (push (pop stack) result)))))))
 
-(defun parseclj-parse (reduce-leaf reduce-branch &optional options)
+(defun parseclj-parser (reduce-leaf reduce-branch &optional options)
   "Clojure/EDN stack-based shift-reduce parser.
 
 REDUCE-LEAF does reductions for leaf nodes.  It is a function that takes
@@ -163,7 +159,7 @@ and a list of child nodes, and returns an updated stack, 
with the new node
 at the top (front).
 
 What \"node\" means in this case is up to the reducing functions, it could
-be AST nodes (as in the case of `parseclj-parse-clojure'), or plain
+be AST nodes (as in the case of `parseclj-parser-clojure'), or plain
 values/sexps (as in the case of `parseedn-read'), or something else. The
 only requirement is that they should not put raw tokens back on the stack,
 as the parser relies on the presence or absence of these to detect parse
@@ -230,62 +226,5 @@ functions. Additionally the following options are 
recognized
                   (reverse stack)
                   options))))
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Top level API
-
-(defun parseclj-parse-clojure (&rest string-and-options)
-  "Parse Clojure source to AST.
-
-Reads either from the current buffer, starting from point, until
-`point-max', or reads from the optional string argument.
-
-STRING-AND-OPTIONS can be an optional string, followed by
-key-value pairs to specify parsing options.
-
-- `:lexical-preservation' Retain whitespace, comments, and
-  discards.  Defaults to nil.
-- `:fail-fast' Raise an error
-  when encountering invalid syntax.  Defaults to t."
-  (if (stringp (car string-and-options))
-      (with-temp-buffer
-        (insert (car string-and-options))
-        (goto-char 1)
-        (apply 'parseclj-parse-clojure (cdr string-and-options)))
-    (let* ((value-p (lambda (e)
-                      (and (parseclj-ast-node-p e)
-                           (not (member (parseclj-ast-node-type e) 
'(:whitespace :comment :discard))))))
-           (options (apply 'a-list :value-p value-p string-and-options))
-           (lexical? (a-get options :lexical-preservation)))
-      (parseclj-parse (if lexical?
-                          #'parseclj-ast--reduce-leaf-with-lexical-preservation
-                        #'parseclj-ast--reduce-leaf)
-                      (if lexical?
-                          
#'parseclj-ast--reduce-branch-with-lexical-preservation
-                        #'parseclj-ast--reduce-branch)
-                      options))))
-
-(defun parseclj-unparse-clojure (ast)
-  "Parse Clojure AST to source code.
-
-Given an abstract syntax tree AST (as returned by
-parseclj-parse-clojure), turn it back into source code, and
-insert it into the current buffer."
-  (if (parseclj-ast-leaf-node-p ast)
-      (insert (a-get ast :form))
-    (if (eql (parseclj-ast-node-type ast) :tag)
-        (parseclj-ast--unparse-tag ast)
-      (parseclj-ast--unparse-collection ast))))
-
-(defun parseclj-unparse-clojure-to-string (ast)
-  "Parse Clojure AST to a source code string.
-
-Given an abstract syntax tree AST (as returned by
-parseclj-parse-clojure), turn it back into source code, and
-return it as a string"
-  (with-temp-buffer
-    (parseclj-unparse-clojure ast)
-    (buffer-substring-no-properties (point-min) (point-max))))
-
-(provide 'parseclj)
-
-;;; parseclj.el ends here
+(provide 'parseclj-parser)
+;;; parseclj-parser.el ends here
diff --git a/parseclj.el b/parseclj.el
index fd2d8e32ff..515a5bbe80 100644
--- a/parseclj.el
+++ b/parseclj.el
@@ -26,213 +26,13 @@
 
 ;;; Commentary:
 
-;; A reader for EDN data files and parser for Clojure source files.
+;; Top level API for the Clojure parser.
 
 ;;; Code:
 
-(require 'cl-lib)
-(require 'subr-x)
-(require 'a)
-(require 'parseclj-lex)
+(require 'parseclj-parser)
 (require 'parseclj-ast)
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; Shift-Reduce Parser
-
-(define-error 'parseclj-parse-error "parseclj: Syntax error")
-
-(defun parseclj--error (format &rest args)
-  "Signal a parse error.
-Takes a FORMAT string and optional ARGS to be passed to
-`format-message'.  Signals a 'parseclj-parse-error signal, which
-can be handled with `condition-case'."
-  (signal 'parseclj-parse-error (list (apply #'format-message format args))))
-
-(defun parseclj--find-opening-token (stack closing-token)
-  "Scan STACK for an opening-token matching CLOSING-TOKEN."
-  (cl-case (parseclj-lex-token-type closing-token)
-    (:rparen :lparen)
-    (:rbracket :lbracket)
-    (:rbrace (parseclj-lex-token-type
-              (seq-find (lambda (token)
-                          (member (parseclj-lex-token-type token)
-                                  '(:lbrace :set)))
-                        stack)))))
-
-(defun parseclj--reduce-coll (stack closing-token reduce-branch options)
-  "Reduce collection based on the top of the STACK and a CLOSING-TOKEN.
-
-REDUCE-BRANCH is a function to be applied to the collection of tokens found
-from the top of the stack until an opening token that matches
-CLOSING-TOKEN.  This function should return an AST token representing such
-collection.
-
-OPTIONS is an association list.  This list is also passed down to the
-REDUCE-BRANCH function.  See `parseclj-parse' for more information on
-available options."
-  (let ((opening-token-type (parseclj--find-opening-token stack closing-token))
-        (fail-fast (a-get options :fail-fast t))
-        (collection nil))
-
-    ;; unwind the stack until opening-token-type is found, adding to collection
-    (while (and stack (not (eq (parseclj-lex-token-type (car stack)) 
opening-token-type)))
-      (push (pop stack) collection))
-
-    ;; did we find the right token?
-    (if (eq (parseclj-lex-token-type (car stack)) opening-token-type)
-        (progn
-          (when fail-fast
-            ;; any unreduced tokens left: bail early
-            (when-let ((token (seq-find #'parseclj-lex-token-p collection)))
-              (parseclj--error "At position %s, unmatched %S"
-                               (a-get token :pos)
-                               (parseclj-lex-token-type token))))
-
-          ;; all good, call the reducer so it can return an updated stack with 
a
-          ;; new node at the top.
-          (let ((opening-token (pop stack)))
-            (funcall reduce-branch stack opening-token collection options)))
-
-      ;; Unwound the stack without finding a matching paren: either bail early
-      ;; or return the original stack and continue parsing
-      (if fail-fast
-          (parseclj--error "At position %s, unmatched %S"
-                           (a-get closing-token :pos)
-                           (parseclj-lex-token-type closing-token))
-
-        (reverse collection)))))
-
-(defun parseclj--take-value (stack value-p)
-  "Scan STACK until a value is found.
-Return everything up to the value in reversed order (meaning the value
-comes first in the result).
-
-STACK is the current parse stack to scan.
-
-VALUE-P a predicate to distinguish reduced values from non-values (tokens
-and whitespace)."
-  (let ((result nil))
-    (cl-block nil
-      (while stack
-        (cond
-         ((parseclj-lex-token-p (car stack))
-          (cl-return nil))
-
-         ((funcall value-p (car stack))
-          (cl-return (cons (car stack) result)))
-
-         (t
-          (push (pop stack) result)))))))
-
-(defun parseclj--take-token (stack value-p token-types)
-  "Scan STACK until a token of a certain type is found.
-Returns nil if a value is encountered before a matching token is found.
-Return everything up to the token in reversed order (meaning the token
-comes first in the result).
-
-STACK is the current parse stack to scan.
-
-VALUE-P a predicate to distinguish reduced values from non-values (tokens
-and whitespace).
-
-TOKEN-TYPES are the token types to look for."
-  (let ((result nil))
-    (cl-block nil
-      (while stack
-        (cond
-         ((member (parseclj-lex-token-type (car stack)) token-types)
-          (cl-return (cons (car stack) result)))
-         ((funcall value-p (car stack))
-          (cl-return nil))
-         ((parseclj-lex-token-p (car stack))
-          (cl-return nil))
-         (t
-          (push (pop stack) result)))))))
-
-(defun parseclj-parse (reduce-leaf reduce-branch &optional options)
-  "Clojure/EDN stack-based shift-reduce parser.
-
-REDUCE-LEAF does reductions for leaf nodes.  It is a function that takes
-the current value of the stack and a token, and either returns an updated
-stack, with a new leaf node at the top (front), or returns the stack
-unmodified.
-
-REDUCE-BRANCH does reductions for branch nodes.  It is a function that
-takes the current value of the stack, the type of branch node to create,
-and a list of child nodes, and returns an updated stack, with the new node
-at the top (front).
-
-What \"node\" means in this case is up to the reducing functions, it could
-be AST nodes (as in the case of `parseclj-parse-clojure'), or plain
-values/sexps (as in the case of `parseedn-read'), or something else. The
-only requirement is that they should not put raw tokens back on the stack,
-as the parser relies on the presence or absence of these to detect parse
-errors.
-
-OPTIONS is an association list which is passed on to the reducing
-functions. Additionally the following options are recognized
-
-- `:fail-fast'
-  Raise an error when a parse error is encountered, rather than continuing
-  with a partial result.
-- `:value-p'
-  A predicate function to differentiate values from tokens and
-  whitespace. This is needed when scanning the stack to see if any
-  reductions can be performed. By default anything that isn't a token is
-  considered a value. This can be problematic when parsing with
-  `:lexical-preservation', and which case you should provide an
-  implementation that also returns falsy for :whitespace, :comment, and
-  :discard AST nodes.
-- `:tag-readers'
-  An association list that describes tag handler functions for any possible
-  tag.  This options in only available in `parseedn-read', for more
-  information, please refer to its documentation."
-  (let ((fail-fast (a-get options :fail-fast t))
-        (value-p (a-get options :value-p (lambda (e) (not 
(parseclj-lex-token-p e)))))
-        (stack nil)
-        (token (parseclj-lex-next)))
-
-    (while (not (eq (parseclj-lex-token-type token) :eof))
-      ;; (message "STACK: %S" stack)
-      ;; (message "TOKEN: %S\n" token)
-
-      ;; Reduce based on the top item on the stack (collections)
-      (cond
-       ((parseclj-lex-leaf-token-p token)
-        (setf stack (funcall reduce-leaf stack token options)))
-
-       ((parseclj-lex-closing-token-p token)
-        (setf stack (parseclj--reduce-coll stack token reduce-branch options)))
-
-       (t (push token stack)))
-
-      ;; Reduce based on top two items on the stack (special prefixed elements)
-      (let* ((top-value (parseclj--take-value stack value-p))
-             (opening-token (parseclj--take-token (nthcdr (length top-value) 
stack) value-p '(:discard :tag)))
-             (new-stack (nthcdr (+ (length top-value) (length opening-token)) 
stack)))
-        (when (and top-value opening-token)
-          ;; (message "Reducing...")
-          ;; (message "  - STACK %S" stack)
-          ;; (message "  - OPENING_TOKEN %S" opening-token)
-          ;; (message "  - TOP_VALUE %S\n" top-value)
-          (setq stack (funcall reduce-branch new-stack (car opening-token) 
(append (cdr opening-token) top-value) options))))
-
-      (setq token (parseclj-lex-next)))
-
-    ;; reduce root
-    (when fail-fast
-      (when-let ((token (seq-find #'parseclj-lex-token-p stack)))
-        (parseclj--error "At position %s, unmatched %S"
-                         (a-get token :pos)
-                         (parseclj-lex-token-type token))))
-
-    (car (funcall reduce-branch nil (parseclj-lex-token :root "" 1)
-                  (reverse stack)
-                  options))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Top level API
-
 (defun parseclj-parse-clojure (&rest string-and-options)
   "Parse Clojure source to AST.
 
@@ -256,13 +56,13 @@ key-value pairs to specify parsing options.
                            (not (member (parseclj-ast-node-type e) 
'(:whitespace :comment :discard))))))
            (options (apply 'a-list :value-p value-p string-and-options))
            (lexical? (a-get options :lexical-preservation)))
-      (parseclj-parse (if lexical?
-                          #'parseclj-ast--reduce-leaf-with-lexical-preservation
-                        #'parseclj-ast--reduce-leaf)
-                      (if lexical?
-                          
#'parseclj-ast--reduce-branch-with-lexical-preservation
-                        #'parseclj-ast--reduce-branch)
-                      options))))
+      (parseclj-parser (if lexical?
+                           
#'parseclj-ast--reduce-leaf-with-lexical-preservation
+                         #'parseclj-ast--reduce-leaf)
+                       (if lexical?
+                           
#'parseclj-ast--reduce-branch-with-lexical-preservation
+                         #'parseclj-ast--reduce-branch)
+                       options))))
 
 (defun parseclj-unparse-clojure (ast)
   "Parse Clojure AST to source code.
diff --git a/parseedn.el b/parseedn.el
index a225b1635f..b1ee4654f7 100644
--- a/parseedn.el
+++ b/parseedn.el
@@ -36,53 +36,7 @@
 ;; don't have the right forms.
 
 (require 'a)
-(require 'parseclj-lex)
-
-(defun parseedn--string (s)
-  ""
-  (replace-regexp-in-string
-   "\\\\o[0-8]\\{3\\}"
-   (lambda (x)
-     (make-string 1 (string-to-number (substring x 2) 8) ))
-   (replace-regexp-in-string
-    "\\\\u[0-9a-fA-F]\\{4\\}"
-    (lambda (x)
-      (make-string 1 (string-to-number (substring x 2) 16)))
-    (replace-regexp-in-string "\\\\[tbnrf'\"\\]"
-                              (lambda (x)
-                                (cl-case (elt x 1)
-                                  (?t "\t")
-                                  (?f "\f")
-                                  (?\" "\"")
-                                  (?r "\r")
-                                  (?n "\n")
-                                  (?\\ "\\\\")
-                                  (t (substring x 1))))
-                              (substring s 1 -1)))))
-
-(defun parseedn--character (c)
-  "Parse a EDN character C into an Emacs Lisp character."
-  (let ((first-char (elt c 1)))
-    (cond
-     ((equal c "\\newline") ?\n)
-     ((equal c "\\return") ?\r)
-     ((equal c "\\space") ?\ )
-     ((equal c "\\tab") ?\t)
-     ((eq first-char ?u) (string-to-number (substring c 2) 16))
-     ((eq first-char ?o) (string-to-number (substring c 2) 8))
-     (t first-char))))
-
-(defun parseedn--leaf-token-value (token)
-  "Parse the given leaf TOKEN to an Emacs Lisp value."
-  (cl-case (parseclj-lex-token-type token)
-    (:number (string-to-number (alist-get :form token)))
-    (:nil nil)
-    (:true t)
-    (:false nil)
-    (:symbol (intern (alist-get :form token)))
-    (:keyword (intern (alist-get :form token)))
-    (:string (parseedn--string (alist-get :form token)))
-    (:character (parseedn--character (alist-get :form token)))))
+(require 'parseclj-parser)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; Reader
@@ -105,7 +59,7 @@ OPTIONS is an association list.  See `parseclj-parse' for 
more information
 on available options."
   (if (member (parseclj-lex-token-type token) (list :whitespace :comment))
       stack
-    (cons (parseedn--leaf-token-value token) stack)))
+    (cons (parseclj-lex--leaf-token-value token) stack)))
 
 (defun parseedn-reduce-branch (stack opening-token children options)
   "Reduce STACK with an sequence containing a collection of other elisp values.
@@ -147,9 +101,9 @@ Returns an Emacs Lisp value.
 TAG-READERS is an optional association list where keys are symbols
 identifying *tags*, and values are tag handler functions that receive one
 argument: *the tagged element*, and specify how to interpret it."
-  (parseclj-parse #'parseedn-reduce-leaf
-                  #'parseedn-reduce-branch
-                  (a-list :tag-readers tag-readers)))
+  (parseclj-parser #'parseedn-reduce-leaf
+                   #'parseedn-reduce-branch
+                   (a-list :tag-readers tag-readers)))
 
 (defun parseedn-read-str (s &optional tag-readers)
   "Parse string S as EDN.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]