[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
feature/tree-sitter f331be1f07 2/3: Add handling of contextual entities
From: |
Yuan Fu |
Subject: |
feature/tree-sitter f331be1f07 2/3: Add handling of contextual entities in tree-sitter font-lock |
Date: |
Wed, 2 Nov 2022 20:23:02 -0400 (EDT) |
branch: feature/tree-sitter
commit f331be1f074d68e7e5cdbac324419e07c186492a
Author: Yuan Fu <casouri@gmail.com>
Commit: Yuan Fu <casouri@gmail.com>
Add handling of contextual entities in tree-sitter font-lock
* lisp/progmodes/python.el: Remove function.
(python--treesit-settings): Capture contextual node.
* lisp/treesit.el (treesit--set-nonsticky):
(treesit-font-lock-contextual-post-process): New functions.
(treesit-font-lock-fontify-region): Change local variable START and
END to NODE-START and NODE-END, handle special capture name
"contextual".
* doc/lispref/modes.texi (Parser-based Font Lock): Update manual.
---
doc/lispref/modes.texi | 20 +++++++++++
lisp/progmodes/python.el | 10 ++----
lisp/treesit.el | 88 ++++++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 107 insertions(+), 11 deletions(-)
diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi
index 3c91893bbf..c6f848ffb2 100644
--- a/doc/lispref/modes.texi
+++ b/doc/lispref/modes.texi
@@ -3992,6 +3992,26 @@ priority. If a capture name is neither a face nor a
function, it is
ignored.
@end defun
+Contextual entities, like multi-line strings, or @code{/* */} style
+comments, need special care, because change in these entities might
+cause change in a large portion of the buffer. For example, inserting
+the closing comment delimiter @code{*/} will change all the text
+between it and the opening delimiter to comment face. Such entities
+should be captured in a special name @code{contextual}, so Emacs can
+correctly update their fontification. Here is an example for
+comments:
+
+@example
+@group
+(treesit-font-lock-rules
+ :language 'javascript
+ :feature 'comment
+ :override t
+ '((comment) @@font-lock-comment-face)
+ (comment) @@contextual))
+@end group
+@end example
+
@defvar treesit-font-lock-feature-list
This is a list of lists of feature symbols. Each element of the list
is a list that represents a decoration level.
diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el
index 46559db2cd..603cdb14e1 100644
--- a/lisp/progmodes/python.el
+++ b/lisp/progmodes/python.el
@@ -1035,12 +1035,6 @@ f for f-strings. OVERRIDE is the override flag
described in
(cl-incf string-beg))
(treesit-fontify-with-override string-beg string-end face override)))
-(defun python--treesit-fontify-string-end (node &rest _)
- "Mark the whole string as to-be-fontified.
-NODE is the ending quote of a string."
- (let ((string (treesit-node-parent node)))
- (setq jit-lock-context-unfontify-pos (treesit-node-start string))))
-
(defvar python--treesit-settings
(treesit-font-lock-rules
:feature 'comment
@@ -1051,8 +1045,8 @@ NODE is the ending quote of a string."
:language 'python
:override t
;; TODO Document on why we do this.
- '((string "\"" @python--treesit-fontify-string-end :anchor)
- (string :anchor "\"" @python--treesit-fontify-string :anchor))
+ '((string :anchor "\"" @python--treesit-fontify-string)
+ (string) @contextual)
:feature 'string-interpolation
:language 'python
diff --git a/lisp/treesit.el b/lisp/treesit.el
index 248c23bf88..6a7ba87e83 100644
--- a/lisp/treesit.el
+++ b/lisp/treesit.el
@@ -636,6 +636,82 @@ See `treesit-font-lock-rules' for their semantic."
"Unrecognized value of :override option"
override)))))
+(defun treesit--set-nonsticky (start end sym &optional remove)
+ "Set `rear-nonsticky' property between START and END.
+Set the proeprty to a list containing SYM. If there is already a
+list, add SYM to that list. If REMOVE is non-nil, remove SYM
+instead."
+ (let* ((prop (get-text-property start 'rear-nonsticky))
+ (new-prop
+ (pcase prop
+ ((pred listp) ; PROP is a list or nil.
+ (if remove
+ (remove sym prop)
+ ;; We should make sure PORP doesn't contain SYM, but
+ ;; whatever.
+ (cons sym prop)))
+ ;; PROP is t.
+ (_ (if remove
+ nil
+ (list sym))))))
+ (if (null new-prop)
+ (remove-text-properties start end '(rear-nonsticky nil))
+ (put-text-property start end 'rear-nonsticky new-prop))))
+
+;; This post-processing tries to deal with the following scenario:
+;; User inserts "/*", then go down the buffer and inserts "*/".
+;; Before the user inserts "*/", tree-sitter cannot construct a
+;; comment node and the parse tree is incomplete, and we can't fontify
+;; the comment. But once the user inserts the "*/", the parse-tree is
+;; complete and we want to refontify the whole comment, and possibly
+;; text after comment (the "/*" could damage the parse tree enough
+;; that makes tree-sitter unable to produce reasonable information for
+;; text after it).
+;;
+;; So we set jit-lock-context-unfontify-pos to comment start, and
+;; jit-lock-context will refontify text after that position in a
+;; timer. Refontifying those text will end up calling this function
+;; again, and we don't want to fall into infinite recursion. So we
+;; mark the end of the comment with a text property, so we can
+;; distinguish between initial and follow up invocation of this
+;; function.
+(defun treesit-font-lock-contextual-post-process
+ (node start end &optional verbose)
+ "Post-processing for contextual syntax nodes.
+NODE is a comment or string node, START and END are the region
+being fontified.
+
+If VERBOSE is non-nil, print debugging information."
+ (let* ((node-start (treesit-node-start node))
+ (node-end (treesit-node-end node))
+ (node-end-1 (max (point-min) (1- node-end)))
+ (prop-sym 'treesit-context-refontify-in-progress))
+ (when verbose
+ (message "Contextual: region: %s-%s, node: %s-%s"
+ start end node-start node-end))
+ (when (<= node-end end)
+ (if (get-text-property node-end-1 prop-sym)
+ ;; We are called from a refontification by jit-lock-context,
+ ;; caused by a previous call to this function.
+ (progn (when verbose
+ (message "Contextual: in progress"))
+ (remove-text-properties
+ node-end-1 node-end `(,prop-sym nil))
+ (treesit--set-nonsticky node-end-1 node-end prop-sym t))
+ ;; We are called from a normal fontification.
+ (when verbose
+ (message "Contextual: initial"))
+ (setq jit-lock-context-unfontify-pos node-start)
+ (put-text-property node-end-1 node-end prop-sym t)
+ (treesit--set-nonsticky node-end-1 node-end prop-sym)))))
+
+;; Some details worth explaining:
+;;
+;; 1. When we apply face to a node, we clip the face into the
+;; currently fontifying region, this way we don't overwrite faces
+;; applied by regexp-based font-lock. The clipped part will be
+;; fontified fine when Emacs fontifies the region containing it.
+;;
(defun treesit-font-lock-fontify-region
(start end &optional loudly)
"Fontify the region between START and END.
@@ -666,11 +742,17 @@ If LOUDLY is non-nil, display some debugging information."
(dolist (capture captures)
(let* ((face (car capture))
(node (cdr capture))
- (start (treesit-node-start node))
- (end (treesit-node-end node)))
+ (node-start (treesit-node-start node))
+ (node-end (treesit-node-end node)))
(cond
+ ((eq face 'contextual)
+ (treesit-font-lock-contextual-post-process
+ node start end
+ (or loudly treesit--font-lock-verbose)))
((facep face)
- (treesit-fontify-with-override start end face override))
+ (treesit-fontify-with-override
+ (max node-start start) (min node-end end)
+ face override))
((functionp face)
(funcall face node override)))
;; Don't raise an error if FACE is neither a face nor