[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master 3d49ad7: cperl-mode.el: Allow non-ASCII Perl identifiers
From: |
Harald Jörg |
Subject: |
master 3d49ad7: cperl-mode.el: Allow non-ASCII Perl identifiers |
Date: |
Tue, 14 Sep 2021 11:57:20 -0400 (EDT) |
branch: master
commit 3d49ad73e5a93625629c96b6c0b921bb019ea9da
Author: Harald Jörg <haj@posteo.de>
Commit: Harald Jörg <haj@posteo.de>
cperl-mode.el: Allow non-ASCII Perl identifiers
Replace all "A-Z" regexp literals with unicode-aware rx constructs
wherever Perl allows non-ASCII identifiers.
* lisp/progmodes/cperl-mode.el (cperl-after-sub-regexp)
(cperl-after-label. cperl-sniff-for-indent)
(cperl-find-pods-heres, cperl-indent-exp)
(cperl-fix-line-spacing, cperl-imenu--create-perl-index)
(cperl-init-faces, cperl-find-tags):
Replace ASCII regex literals by unicode-aware rx constructs.
(cperl-init-faces): Eliminate unused lexical `font-lock-anchored'.
(cperl-have-help-regexp, cperl-word-at-point-hard): Allow non-ASCII
word characters.
* test/lisp/progmodes/cperl-mode-tests.el
(cperl-test-fontify-special-variables): New test for $^T
and $^{VARNAME}.
(cperl-test-ws-rx cperl-test-ws+-rx),
(cperl-test-version-regexp, cperl-test-package-regexp): Skip
for perl-mode.
(cperl-test-identifier-rx, cperl--test-unicode-setup)
(cperl-test-unicode-labels, cperl-test-unicode-sub)
(cperl-test-unicode-varname)
(cperl-test-unicode-varname-list, cperl-test-unicode-arrays)
(cperl-test-unicode-hashes, cperl-test-unicode-hashref)
(cperl-test-unicode-proto, cperl-test-unicode-fhs)
(cperl-test-unicode-hashkeys, cperl-test-word-at-point):
New tests for unicode identifiers.
(cperl-test-imenu-index): Add a unicode identifier to the test.
* test/lisp/progmodes/cperl-mode-resources/grammar.pl: Add a
function with non-ASCII name for imenu tests.
---
lisp/progmodes/cperl-mode.el | 330 ++++++++++++++-------
.../lisp/progmodes/cperl-mode-resources/grammar.pl | 14 +
test/lisp/progmodes/cperl-mode-tests.el | 301 ++++++++++++++++++-
3 files changed, 545 insertions(+), 100 deletions(-)
diff --git a/lisp/progmodes/cperl-mode.el b/lisp/progmodes/cperl-mode.el
index 76c82f8..1147889 100644
--- a/lisp/progmodes/cperl-mode.el
+++ b/lisp/progmodes/cperl-mode.el
@@ -1407,7 +1407,7 @@ the last)."
(concat ; Assume n groups before this...
"\\(" ; n+1=name-group
cperl-white-and-comment-rex ; n+2=pre-name
- "\\(::[a-zA-Z_0-9:']+\\|[a-zA-Z_'][a-zA-Z_0-9:']*\\)" ; n+3=name
+ (rx-to-string `(group ,cperl--normal-identifier-rx))
"\\)" ; END n+1=name-group
(if named "" "?")
"\\(" ; n+4=proto-group
@@ -2573,7 +2573,8 @@ Return the amount the indentation changed by."
'(?w ?_))
(progn
(backward-sexp)
- (looking-at "[a-zA-Z_][a-zA-Z0-9_]*:[^:]"))))
+ (looking-at (rx (sequence (eval cperl--label-rx)
+ (not (in ":"))))))))
(defun cperl-get-state (&optional parse-start start-state)
"Return list (START STATE DEPTH PRESTART),
@@ -2740,7 +2741,9 @@ Will not look before LIM."
(progn
(forward-sexp -1)
(skip-chars-backward " \t")
- (looking-at "[ \t]*[a-zA-Z_][a-zA-Z_0-9]*[
\t]*:")))
+ (looking-at
+ (rx (sequence (0+ blank)
+ (eval cperl--label-rx))))))
(get-text-property (point) 'first-format-line)))
;; Look at previous line that's at column 0
@@ -3836,7 +3839,8 @@ recursive calls in starting lines of here-documents."
"\\<" cperl-sub-regexp "\\>" ; sub with proto/attr
"\\("
cperl-white-and-comment-rex
- "\\(::[a-zA-Z_:'0-9]*\\|[a-zA-Z_'][a-zA-Z_:'0-9]*\\)\\)?" ;
name
+ (rx (group (eval cperl--normal-identifier-rx)))
+ "\\)"
"\\("
cperl-maybe-white-and-comment-rex
"\\(([^()]*)\\|:[^:]\\)\\)" ; prototype or attribute start
@@ -4111,10 +4115,12 @@ recursive calls in starting lines of here-documents."
(t t))))
;; <file> or <$file>
(and (eq c ?\<)
- ;; Do not stringify <FH>, <$fh> :
+ ;; Stringify what looks like a glob, but
+ ;; do not stringify file handles <FH>, <$fh> :
(save-match-data
(looking-at
- "\\$?\\([_a-zA-Z:][_a-zA-Z0-9:]*\\)?>"))))
+ (rx (sequence (opt "$")
+ (eval
cperl--normal-identifier-rx)))))))
tb (match-beginning 0))
(goto-char (match-beginning b1))
(cperl-backward-to-noncomment (point-min))
@@ -4184,7 +4190,16 @@ recursive calls in starting lines of here-documents."
(error nil)))
(if (or bb
(looking-at ; $foo -> {s}
- "[$@]\\$*\\([a-zA-Z0-9_:]+\\|[^{]\\)\\([
\t\n]*->\\)?[ \t\n]*{")
+ (rx
+ (sequence
+ (in "$@") (0+ "$")
+ (or
+ (eval cperl--normal-identifier-rx)
+ (not (in "{")))
+ (opt (sequence (eval cperl--ws*-rx))
+ "->")
+ (eval cperl--ws*-rx)
+ "{")))
(and ; $foo[12] -> {s}
(memq (following-char) '(?\{ ?\[))
(progn
@@ -4199,7 +4214,12 @@ recursive calls in starting lines of here-documents."
(setq bb t))
((and (eq (following-char) ?:)
(eq b1 ?\{) ; Check for $ { s::bar }
- (looking-at "::[a-zA-Z0-9_:]*[ \t\n\f]*}")
+ ;; (looking-at "::[a-zA-Z0-9_:]*[ \t\n\f]*}")
+ (looking-at
+ (rx (sequence "::"
+ (eval
cperl--normal-identifier-rx)
+ (eval cperl--ws*-rx)
+ "}")))
(progn
(goto-char (1- go))
(skip-chars-backward " \t\n\f")
@@ -4364,7 +4384,7 @@ recursive calls in starting lines of here-documents."
"\\(" ;; XXXX 1-char variables, exc. |()\s
"[$@]"
"\\("
- "[_a-zA-Z:][_a-zA-Z0-9:]*"
+ (rx (eval
cperl--normal-identifier-rx))
"\\|"
"{[^{}]*}" ; only one-level allowed
"\\|"
@@ -4820,6 +4840,7 @@ recursive calls in starting lines of here-documents."
(progn
(backward-sexp)
;; sub {BLK}, print {BLK} $data, but NOT `bless', `return', `tr',
`constant'
+ ;; a-zA-Z is fine here, these are Perl keywords
(or (and (looking-at "[a-zA-Z0-9_:]+[ \t\n\f]*[{#]") ; Method call
syntax
(not (looking-at
"\\(bless\\|return\\|q[wqrx]?\\|tr\\|[smy]\\|constant\\)\\>")))
;; sub bless::foo {}
@@ -5028,7 +5049,11 @@ conditional/loop constructs."
cperl-maybe-white-and-comment-rex
"\\(state\\|my\\|local\\|our\\)\\)?"
cperl-maybe-white-and-comment-rex
- "\\$[_a-zA-Z0-9]+\\)?\\)\\>"))
+ (rx
+ (sequence
+ "$"
+ (eval cperl--basic-identifier-rx)))
+ "\\)?\\)\\>"))
(progn
(goto-char top)
(forward-sexp 1)
@@ -5122,7 +5147,14 @@ Returns some position at the last line."
;; Looking at:
;; foreach my $var (
(if (looking-at
- "[ \t]*\\<for\\(each\\)?[ \t]+\\(state\\|my\\|local\\|our\\)[
\t]*\\$[_a-zA-Z0-9]+\\(\t*\\|[ \t][ \t]+\\)[^ \t\n#]")
+ (rx (sequence (0+ blank) symbol-start
+ "for" (opt "each")
+ (1+ blank)
+ (or "state" "my" "local" "our")
+ (0+ blank)
+ "$" (eval cperl--basic-identifier-rx)
+ (1+ blank)
+ (not (in " \t\n#")))))
(progn
(forward-sexp 3)
(delete-horizontal-space)
@@ -5132,9 +5164,25 @@ Returns some position at the last line."
;; Looking at (with or without "}" at start, ending after "({"):
;; } foreach my $var () OR {
(if (looking-at
- "[ \t]*\\(}[
\t]*\\)?\\<\\(els\\(e\\|if\\)\\|continue\\|if\\|unless\\|while\\|for\\(each\\)?\\(\\([
\t]+\\(state\\|my\\|local\\|our\\)\\)?[
\t]*\\$[_a-zA-Z0-9]+\\)?\\|until\\)\\>\\([ \t]*(\\|[ \t\n]*{\\)\\|[ \t]*{")
+ (rx (sequence
+ (0+ blank)
+ (opt (sequence "}" (0+ blank) ))
+ symbol-start
+ (or "else" "elsif" "continue" "if" "unless" "while" "until"
+ (sequence (or "for" "foreach")
+ (opt
+ (opt (sequence (1+ blank)
+ (or "state" "my" "local"
"our")))
+ (0+ blank)
+ "$" (eval cperl--basic-identifier-rx))))
+ symbol-end
+ (group-n 1
+ (or
+ (or (sequence (0+ blank) "(")
+ (sequence (eval cperl--ws*-rx) "{"))
+ (sequence (0+ blank) "{"))))))
(progn
- (setq ml (match-beginning 8)) ; "(" or "{" after control word
+ (setq ml (match-beginning 1)) ; "(" or "{" after control word
(re-search-forward "[({]")
(forward-char -1)
(setq p (point))
@@ -5544,7 +5592,11 @@ comment, or POD."
(setq lst index-sub-alist)
(while lst
(setq elt (car lst) lst (cdr lst))
- (cond ((string-match "\\(::\\|'\\)[_a-zA-Z0-9]+$" (car elt))
+ (cond ((string-match
+ (rx (sequence (or "::" "'")
+ (eval cperl--basic-identifier-rx)
+ string-end))
+ (car elt))
(setq pack (substring (car elt) 0 (match-beginning 0)))
(if (setq group (assoc pack hier-list))
(if (listp (cdr group))
@@ -5646,8 +5698,7 @@ default function."
(defun cperl-init-faces ()
(condition-case errs
(progn
- (let (t-font-lock-keywords t-font-lock-keywords-1 font-lock-anchored)
- (setq font-lock-anchored t)
+ (let (t-font-lock-keywords t-font-lock-keywords-1)
(setq
t-font-lock-keywords
(list
@@ -5760,20 +5811,41 @@ default function."
(if (eq (char-after (cperl-1- (match-end 0))) ?\{ )
'font-lock-function-name-face
'font-lock-variable-name-face))))
- '("\\<\\(package\\|require\\|use\\|import\\|no\\|bootstrap\\)[
\t]+\\([a-zA-Z_][a-zA-Z_0-9:]*\\)[ \t;]" ; require A if B;
- 2 font-lock-function-name-face)
+ `(,(rx (sequence symbol-start
+ (or "package" "require" "use" "import"
+ "no" "bootstrap")
+ (eval cperl--ws+-rx)
+ (group-n 1 (eval cperl--normal-identifier-rx))
+ (any " \t;"))) ; require A if B;
+ 1 font-lock-function-name-face)
'("^[ \t]*format[ \t]+\\([a-zA-Z_][a-zA-Z_0-9:]*\\)[ \t]*=[ \t]*$"
1 font-lock-function-name-face)
- (cond (font-lock-anchored
- '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[
\t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
- (2 font-lock-string-face t)
- ("\\=[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
- nil nil
- (1 font-lock-string-face t))))
- (t '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[
\t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
- 2 font-lock-string-face t)))
- '("[[ \t{,(]\\(-?[a-zA-Z0-9_:]+\\)[ \t]*=>" 1
- font-lock-string-face t)
+ ;; bareword hash key: $foo{bar}
+ `(,(rx (or (in "]}\\%@>*&") ; What Perl is this?
+ (sequence "$" (eval cperl--normal-identifier-rx)))
+ (0+ blank) "{" (0+ blank)
+ (group-n 1 (sequence (opt "-")
+ (eval cperl--basic-identifier-rx)))
+ (0+ blank) "}")
+;; '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[
\t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
+ (1 font-lock-string-face t)
+ ;; anchored bareword hash key: $foo{bar}{baz}
+ (,(rx point
+ (0+ blank) "{" (0+ blank)
+ (group-n 1 (sequence (opt "-")
+ (eval cperl--basic-identifier-rx)))
+ (0+ blank) "}")
+ ;; ("\\=[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
+ nil nil
+ (1 font-lock-string-face t)))
+ ;; hash element assignments with bareword key => value
+ `(,(rx (in "[ \t{,()")
+ (group-n 1 (sequence (opt "-")
+ (eval cperl--basic-identifier-rx)))
+ (0+ blank) "=>")
+ 1 font-lock-string-face t)
+;; '("[[ \t{,(]\\(-?[a-zA-Z0-9_:]+\\)[ \t]*=>" 1
+;; font-lock-string-face t)
;; labels
`(,(rx
(sequence
@@ -5797,83 +5869,130 @@ default function."
;;; '("[$*]{?\\(\\sw+\\)" 1 font-lock-variable-name-face)
;;; '("\\([@%]\\|\\$#\\)\\(\\sw+\\)"
;;; (2 (cons font-lock-variable-name-face '(underline))))
- (cond (font-lock-anchored
;; 1=my_etc, 2=white? 3=(+white? 4=white? 5=var
- `(,(concat "\\<\\(state\\|my\\|local\\|our\\)"
- cperl-maybe-white-and-comment-rex
- "\\(("
- cperl-maybe-white-and-comment-rex
-
"\\)?\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
- (5 ,(if cperl-font-lock-multiline
- 'font-lock-variable-name-face
- '(progn (setq cperl-font-lock-multiline-start
- (match-beginning 0))
- 'font-lock-variable-name-face)))
- (,(concat "\\="
- cperl-maybe-white-and-comment-rex
- ","
- cperl-maybe-white-and-comment-rex
-
"\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
- ;; Bug in font-lock: limit is used not only to limit
- ;; searches, but to set the "extend window for
- ;; facification" property. Thus we need to minimize.
- ,(if cperl-font-lock-multiline
- '(if (match-beginning 3)
- (save-excursion
- (goto-char (match-beginning 3))
- (condition-case nil
- (forward-sexp 1)
- (error
- (condition-case nil
- (forward-char 200)
- (error nil)))) ; typeahead
- (1- (point))) ; report limit
- (forward-char -2)) ; disable continued expr
- '(if (match-beginning 3)
- (point-max) ; No limit for continuation
- (forward-char -2))) ; disable continued expr
- ,(if cperl-font-lock-multiline
- nil
- '(progn ; Do at end
- ;; "my" may be already fontified (POD),
- ;; so cperl-font-lock-multiline-start is nil
- (if (or (not cperl-font-lock-multiline-start)
- (> 2 (count-lines
- cperl-font-lock-multiline-start
- (point))))
- nil
- (put-text-property
- (1+ cperl-font-lock-multiline-start) (point)
- 'syntax-type 'multiline))
- (setq cperl-font-lock-multiline-start nil)))
- (3 font-lock-variable-name-face))))
- (t '("^[ \t{}]*\\(state\\|my\\|local\\|our\\)[ \t]*\\(([
\t]*\\)?\\([$@%*][a-zA-Z0-9_:]+\\)"
- 3 font-lock-variable-name-face)))
- '("\\<for\\(each\\)?\\([ \t]+\\(state\\|my\\|local\\|our\\)\\)?[
\t]*\\(\\$[a-zA-Z_][a-zA-Z_0-9]*\\)[ \t]*("
+ `(,(rx (sequence (or "state" "my" "local" "our"))
+ (eval cperl--ws*-rx)
+ (opt (sequence "(" (eval cperl--ws*-rx)))
+ (group
+ (in "$@%*")
+ (or
+ (eval cperl--normal-identifier-rx)
+ (eval cperl--special-identifier-rx))
+ )
+ )
+ ;; (concat "\\<\\(state\\|my\\|local\\|our\\)"
+ ;; cperl-maybe-white-and-comment-rex
+ ;; "\\(("
+ ;; cperl-maybe-white-and-comment-rex
+ ;;
"\\)?\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
+ ;; (5 ,(if cperl-font-lock-multiline
+ (1 ,(if cperl-font-lock-multiline
+ 'font-lock-variable-name-face
+ '(progn (setq cperl-font-lock-multiline-start
+ (match-beginning 0))
+ 'font-lock-variable-name-face)))
+ (,(rx (sequence point
+ (eval cperl--ws*-rx)
+ ","
+ (eval cperl--ws*-rx)
+ (group
+ (in "$@%*")
+ (or
+ (eval cperl--normal-identifier-rx)
+ (eval cperl--special-identifier-rx))
+ )
+ )
+ )
+ ;; ,(concat "\\="
+ ;; cperl-maybe-white-and-comment-rex
+ ;; ","
+ ;; cperl-maybe-white-and-comment-rex
+ ;; "\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
+ ;; Bug in font-lock: limit is used not only to limit
+ ;; searches, but to set the "extend window for
+ ;; facification" property. Thus we need to minimize.
+ ,(if cperl-font-lock-multiline
+ '(if (match-beginning 1)
+ (save-excursion
+ (goto-char (match-beginning 1))
+ (condition-case nil
+ (forward-sexp 1)
+ (error
+ (condition-case nil
+ (forward-char 200)
+ (error nil)))) ; typeahead
+ (1- (point))) ; report limit
+ (forward-char -2)) ; disable continued expr
+ '(if (match-beginning 1)
+ (point-max) ; No limit for continuation
+ (forward-char -2))) ; disable continued expr
+ ,(if cperl-font-lock-multiline
+ nil
+ '(progn ; Do at end
+ ;; "my" may be already fontified (POD),
+ ;; so cperl-font-lock-multiline-start is nil
+ (if (or (not cperl-font-lock-multiline-start)
+ (> 2 (count-lines
+ cperl-font-lock-multiline-start
+ (point))))
+ nil
+ (put-text-property
+ (1+ cperl-font-lock-multiline-start) (point)
+ 'syntax-type 'multiline))
+ (setq cperl-font-lock-multiline-start nil)))
+ (1 font-lock-variable-name-face)))
+ ;; foreach my $foo (
+ `(,(rx symbol-start "for" (opt "each")
+ (opt (sequence (1+ blank)
+ (or "state" "my" "local" "our")))
+ (0+ blank)
+ (group-n 1 (sequence "$"
+ (eval cperl--basic-identifier-rx)))
+ (0+ blank) "(")
+;; '("\\<for\\(each\\)?\\([ \t]+\\(state\\|my\\|local\\|our\\)\\)?[
\t]*\\(\\$[a-zA-Z_][a-zA-Z_0-9]*\\)[ \t]*("
4 font-lock-variable-name-face)
;; Avoid $!, and s!!, qq!! etc. when not fontifying syntactically
'("\\(?:^\\|[^smywqrx$]\\)\\(!\\)" 1 font-lock-negation-char-face)
'("\\[\\(\\^\\)" 1 font-lock-negation-char-face prepend)))
(setq
t-font-lock-keywords-1
- '(
- ("\\(\\([@%]\\|\\$#\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)" 1
+ `(
+ ;; arrays and hashes. Access to elements is fixed below
+ (,(rx (group-n 1 (group-n 2 (or (in "@%") "$#"))
+ (eval cperl--normal-identifier-rx)))
+ 1
+;; ("\\(\\([@%]\\|\\$#\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)" 1
(if (eq (char-after (match-beginning 2)) ?%)
'cperl-hash-face
'cperl-array-face)
nil) ; arrays and hashes
- ("\\(\\([$@%]+\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)[ \t]*\\([[{]\\)"
+ ;; access to array/hash elements
+ (,(rx (group-n 1 (group-n 2 (in "$@%"))
+ (eval cperl--normal-identifier-rx))
+ (0+ blank)
+ (group-n 3 (in "[{")))
+;; ("\\(\\([$@%]+\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)[ \t]*\\([[{]\\)"
1
(if (= (- (match-end 2) (match-beginning 2)) 1)
(if (eq (char-after (match-beginning 3)) ?{)
'cperl-hash-face
'cperl-array-face) ; arrays and hashes
font-lock-variable-name-face) ; Just to put something
- t)
- ("\\(@\\|\\$#\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^
\t\n]\\)\\)"
+ t) ; override previous
+ ;; @$ array dereferences, $#$ last array index
+ (,(rx (group-n 1 (or "@" "$#"))
+ (group-n 2 (sequence "$"
+ (or (eval cperl--normal-identifier-rx)
+ (not (in " \t\n"))))))
+ ;; ("\\(@\\|\\$#\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^
\t\n]\\)\\)"
(1 'cperl-array-face)
(2 font-lock-variable-name-face))
- ("\\(%\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
+ ;; %$ hash dereferences
+ (,(rx (group-n 1 "%")
+ (group-n 2 (sequence "$"
+ (or (eval cperl--normal-identifier-rx)
+ (not (in " \t\n"))))))
+ ;; ("\\(%\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
(1 'cperl-hash-face)
(2 font-lock-variable-name-face))
;;("\\([smy]\\|tr\\)\\([^a-z_A-Z0-9]\\)\\(\\([^\n\\]*||\\)\\)\\2")
@@ -6435,6 +6554,8 @@ Will not move the position at the start to the left."
(indent-region beg end nil)
(goto-char beg)
(setq col (current-column))
+ ;; Assuming that lineup is done on Perl syntax, this regexp
+ ;; doesn't need to be unicode aware -- haj, 2021-09-10
(if (looking-at "[a-zA-Z0-9_]")
(if (looking-at "\\<[a-zA-Z0-9_]+\\>")
(setq search
@@ -6472,6 +6593,9 @@ Will not move the position at the start to the left."
"Run etags with appropriate options for Perl files.
If optional argument ALL is `recursive', will process Perl files
in subdirectories too."
+ ;; Apparently etags doesn't support UTF-8 encoded sources, and usage
+ ;; of etags has been commented out in the menu since ... well,
+ ;; forever. So, let's just stick to ASCII here. -- haj, 2021-09-14
(interactive)
(let ((cmd "etags")
(args `("-l" "none" "-r"
@@ -6611,6 +6735,9 @@ Does not move point."
;; Search for the function
(progn ;;save-match-data
(while (re-search-forward
+ ;; FIXME: Should XS code be unicode aware? Recent C
+ ;; compilers (Gcc 10+) are, but I guess this isn't used
+ ;; much. -- haj, 2021-09-14
"^\\([ \t]*MODULE\\>[^\n]*\\<PACKAGE[ \t]*=[
\t]*\\([a-zA-Z_][a-zA-Z_0-9:]*\\)\\>\\|\\([a-zA-Z_][a-zA-Z_0-9]*\\)(\\|[
\t]*BOOT:\\)"
nil t)
(cond
@@ -6673,7 +6800,7 @@ Does not move point."
(setq lst
(mapcar
(lambda (elt)
- (cond ((string-match "^[_a-zA-Z]" (car elt))
+ (cond ((string-match (rx line-start (or alpha "_")) (car elt))
(goto-char (cdr elt))
(beginning-of-line) ; pos should be of the start of
the line
(list (car elt)
@@ -6703,9 +6830,14 @@ Does not move point."
","
(number-to-string (1- (elt elt 1))) ; Char pos 0-based
"\n")
- (if (and (string-match "^[_a-zA-Z]+::" (car elt))
- (string-match (concat "^" cperl-sub-regexp "[
\t]+\\([_a-zA-Z]+\\)[^:_a-zA-Z]")
- (elt elt 3)))
+ (if (and (string-match (rx line-start
+ (eval cperl--basic-identifier-rx)
"++")
+ (car elt))
+ (string-match (rx-to-string `(sequence line-start
+ (regexp
,cperl-sub-regexp)
+ (1+ (in " \t"))
+
,cperl--normal-identifier-rx))
+ (elt elt 3)))
;; Need to insert the name without package as well
(setq lst (cons (cons (substring (elt elt 3)
(match-beginning 1)
@@ -7155,14 +7287,14 @@ Currently it is tuned to C and Perl syntax."
;;(concat "\\("
(mapconcat
#'identity
- '("[$@%*&][0-9a-zA-Z_:]+\\([ \t]*[[{]\\)?" ; Usual variable
+ '("[$@%*&][[:alnum:]_:]+\\([ \t]*[[{]\\)?" ; Usual variable
"[$@]\\^[a-zA-Z]" ; Special variable
"[$@][^ \n\t]" ; Special variable
"-[a-zA-Z]" ; File test
"\\\\[a-zA-Z0]" ; Special chars
"^=[a-z][a-zA-Z0-9_]*" ; POD sections
"[-!&*+,./<=>?\\^|~]+" ; Operator
- "[a-zA-Z_0-9:]+" ; symbol or number
+ "[[:alnum:]_:]+" ; symbol or number
"x="
"#!")
;;"\\)\\|\\("
@@ -7178,7 +7310,7 @@ Currently it is tuned to C and Perl syntax."
;; Does not save-excursion
;; Get to the something meaningful
(or (eobp) (eolp) (forward-char 1))
- (re-search-backward "[-a-zA-Z0-9_:!&*+,./<=>?\\^|~$%@]"
+ (re-search-backward "[-[:alnum:]_:!&*+,./<=>?\\^|~$%@]"
(point-at-bol)
'to-beg)
;; (cond
@@ -7187,8 +7319,8 @@ Currently it is tuned to C and Perl syntax."
;; (or (bobp) (backward-char 1))))
;; Try to backtrace
(cond
- ((looking-at "[a-zA-Z0-9_:]") ; symbol
- (skip-chars-backward "a-zA-Z0-9_:")
+ ((looking-at "[[:alnum:]_:]") ; symbol
+ (skip-chars-backward "[:alnum:]_:")
(cond
((and (eq (preceding-char) ?^) ; $^I
(eq (char-after (- (point) 2)) ?\$))
@@ -7199,7 +7331,7 @@ Currently it is tuned to C and Perl syntax."
(eq (current-column) 1))
(forward-char -1))) ; =head1
(if (and (eq (preceding-char) ?\<)
- (looking-at "\\$?[a-zA-Z0-9_:]+>")) ; <FH>
+ (looking-at "\\$?[[:alnum:]_:]+>")) ; <FH>
(forward-char -1)))
((and (looking-at "=") (eq (preceding-char) ?x)) ; x=
(forward-char -1))
@@ -7212,15 +7344,15 @@ Currently it is tuned to C and Perl syntax."
(not (eq (char-after (- (point) 2)) ?\$))) ; $-
(forward-char -1))
((and (eq (following-char) ?\>)
- (string-match "[a-zA-Z0-9_]" (char-to-string (preceding-char)))
+ (string-match "[[:alnum:]_]" (char-to-string (preceding-char)))
(save-excursion
(forward-sexp -1)
(and (eq (preceding-char) ?\<)
- (looking-at "\\$?[a-zA-Z0-9_:]+>")))) ; <FH>
+ (looking-at "\\$?[[:alnum:]_:]+>")))) ; <FH>
(search-backward "<"))))
((and (eq (following-char) ?\$)
(eq (preceding-char) ?\<)
- (looking-at "\\$?[a-zA-Z0-9_:]+>")) ; <$fh>
+ (looking-at "\\$?[[:alnum:]_:]+>")) ; <$fh>
(forward-char -1)))
(if (looking-at cperl-have-help-regexp)
(buffer-substring (match-beginning 0) (match-end 0))))
diff --git a/test/lisp/progmodes/cperl-mode-resources/grammar.pl
b/test/lisp/progmodes/cperl-mode-resources/grammar.pl
index c05fd7e..96a8699 100644
--- a/test/lisp/progmodes/cperl-mode-resources/grammar.pl
+++ b/test/lisp/progmodes/cperl-mode-resources/grammar.pl
@@ -1,6 +1,7 @@
use 5.024;
use strict;
use warnings;
+use utf8;
sub outside {
say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}'";
@@ -155,4 +156,17 @@ package :: {
Shoved::elsewhere();
+# Finally, try unicode identifiers.
+package Erdős::Number;
+
+sub erdős_number {
+ my $name = shift;
+ if ($name eq "Erdős Pál") {
+ return 0;
+ }
+ else {
+ die "No access to the database. Sorry.";
+ }
+}
+
1;
diff --git a/test/lisp/progmodes/cperl-mode-tests.el
b/test/lisp/progmodes/cperl-mode-tests.el
index 54012c3..29b9e3f 100644
--- a/test/lisp/progmodes/cperl-mode-tests.el
+++ b/test/lisp/progmodes/cperl-mode-tests.el
@@ -154,6 +154,22 @@ point in the distant past, and is still broken in
perl-mode. "
(should (equal (get-text-property (match-beginning 0) 'face)
'font-lock-keyword-face))))
+(ert-deftest cperl-test-fontify-special-variables ()
+ "Test fontification of variables like $^T or ${^ENCODING}.
+These can occur as \"local\" aliases."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (with-temp-buffer
+ (insert "local ($^I, ${^UNICODE});\n")
+ (goto-char (point-min))
+ (funcall cperl-test-mode)
+ (font-lock-ensure)
+ (search-forward "$")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-variable-name-face))
+ (search-forward "$")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-variable-name-face))))
+
(ert-deftest cperl-test-identify-heredoc ()
"Test whether a construct containing \"<<\" followed by a
bareword is properly identified for a here-document if
@@ -297,6 +313,7 @@ the whole string."
(ert-deftest cperl-test-ws-rx ()
"Tests capture of very simple regular expressions (yawn)."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'(" " "\t" "\n"))
(invalid
@@ -306,6 +323,7 @@ the whole string."
(ert-deftest cperl-test-ws+-rx ()
"Tests sequences of whitespace and comment lines."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
`(" " "\t#\n" "\n# \n"
,(concat "# comment\n" "# comment\n" "\n" "#comment\n")))
@@ -316,6 +334,7 @@ the whole string."
(ert-deftest cperl-test-version-regexp ()
"Tests the regexp for recommended syntax of versions in Perl."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'("1" "1.1" "1.1_1" "5.032001"
"v120.100.103"))
@@ -331,6 +350,7 @@ the whole string."
(ert-deftest cperl-test-package-regexp ()
"Tests the regular expression of Perl package names with versions.
Also includes valid cases with whitespace in strange places."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'("package Foo"
"package Foo::Bar"
@@ -346,6 +366,284 @@ Also includes valid cases with whitespace in strange
places."
(cperl-test--validate-regexp (rx (eval cperl--package-rx))
valid invalid)))
+(ert-deftest cperl-test-identifier-rx ()
+ "Test valid and invalid identifiers (no sigils)."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (let ((valid
+ '("foo" "FOO" "f_oo" "a123"
+ "manÄťis")) ; Unicode is allowed!
+ (invalid
+ '("$foo" ; no sigils allowed (yet)
+ "Foo::bar" ; no package qualifiers allowed
+ "lots_of_€"))) ; € is not alphabetic
+ (cperl-test--validate-regexp (rx (eval cperl--basic-identifier-rx))
+ valid invalid)))
+
+;;; Test unicode identifier in various places
+
+(defun cperl--test-unicode-setup (code string)
+ "Insert CODE, prepare it for tests, and find STRING.
+Invoke the appropriate major mode, ensure fontification, and set
+point after the first occurrence of STRING (no regexp!)."
+ (insert code)
+ (funcall cperl-test-mode)
+ (font-lock-ensure)
+ (goto-char (point-min))
+ (search-forward string))
+
+(ert-deftest cperl-test-unicode-labels ()
+ "Verify that non-ASCII labels are processed correctly."
+ (with-temp-buffer
+ (cperl--test-unicode-setup "LABEĹ‚: for ($manÄťi) { say; }" "LAB")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-constant-face))))
+
+(ert-deftest cperl-test-unicode-sub ()
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ (concat "use strict;\n" ; distinguish bob from b-o-f
+ "sub â„Ź {\n"
+ " 6.62607015e-34\n"
+ "};")
+ "sub ") ; point is before "â„Ź"
+
+ ;; Testing fontification
+ ;; FIXME 2021-09-10: This tests succeeds because cperl-mode
+ ;; accepts almost anything as a sub name for fontification. For
+ ;; example, it fontifies "sub @ {...;}" which is a syntax error in
+ ;; Perl. I let this pass for the moment.
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-function-name-face))
+
+ ;; Testing `beginning-of-defun'. Not available in perl-mode,
+ ;; where it jumps to the beginning of the buffer.
+ (when (eq cperl-test-mode #'cperl-mode)
+ (goto-char (point-min))
+ (search-forward "-34")
+ (beginning-of-defun)
+ (should (looking-at "sub")))))
+
+(ert-deftest cperl-test-unicode-varname ()
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ (concat "use strict;\n"
+ "my $Ď€ = 3.1415926535897932384626433832795028841971;\n"
+ "\n"
+ "my $manÄťi = $Ď€;\n"
+ "__END__\n")
+ "my $") ; perl-mode doesn't fontify the sigil, so include it here
+
+ ;; Testing fontification
+ ;; FIXME 2021-09-10: This test succeeds in cperl-mode because the
+ ;; π character is "not ASCII alphabetic", so it treats $π as a
+ ;; punctuation variable. The following two `should' forms with a
+ ;; longer variable name were added for stronger verification.
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-variable-name-face))
+ ;; Test both ends of a longer variable name
+ (search-forward "my $") ; again skip the sigil
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-variable-name-face))
+ (search-forward "manÄťi")
+ (should (equal (get-text-property (1- (match-end 0)) 'face)
+ 'font-lock-variable-name-face))))
+
+(ert-deftest cperl-test-unicode-varname-list ()
+ "Verify that all elements of a variable list are fontified."
+
+ (let ((hash-face (if (eq cperl-test-mode #'perl-mode)
+ 'perl-non-scalar-variable
+ 'cperl-hash-face))
+ (array-face (if (eq cperl-test-mode #'perl-mode)
+ 'perl-non-scalar-variable
+ 'cperl-array-face)))
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "my (%äsh,@ärräy,$scâlâr);" "%")
+ (should (equal (get-text-property (point) 'face)
+ hash-face))
+ (search-forward "@")
+ (should (equal (get-text-property (point) 'face)
+ array-face))
+ (search-forward "scâlâr")
+ (should (equal (get-text-property (match-beginning 0) 'face)
+ 'font-lock-variable-name-face))
+ (should (equal (get-text-property (1- (match-end 0)) 'face)
+ 'font-lock-variable-name-face)))
+
+ ;; Now with package-qualified variables
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "local (%Søme::äsh,@Søme::ärräy,$Søme::scâlâr);" "%")
+ (should (equal (get-text-property (point) 'face)
+ hash-face))
+ (search-forward "Søme::") ; test basic identifier
+ (should (equal (get-text-property (point) 'face)
+ hash-face))
+ (search-forward "@") ; test package name
+ (should (equal (get-text-property (point) 'face)
+ array-face))
+ (search-forward "Søme::") ; test basic identifier
+ (should (equal (get-text-property (point) 'face)
+ array-face))
+ (search-forward "Søme") ; test package name
+ (should (equal (get-text-property (match-beginning 0) 'face)
+ 'font-lock-variable-name-face))
+ (should (equal (get-text-property (1- (match-end 0)) 'face)
+ 'font-lock-variable-name-face))
+ (search-forward "scâlâr") ; test basic identifier
+ (should (equal (get-text-property (match-beginning 0) 'face)
+ 'font-lock-variable-name-face))
+ (should (equal (get-text-property (1- (match-end 0)) 'face)
+ 'font-lock-variable-name-face)))))
+
+(ert-deftest cperl-test-unicode-arrays ()
+ "Test fontification of array access."
+ ;; Perl mode just looks at the sigil, for element access
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ ;; simple array element
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$ärräy[1] = 7;" "$")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-array-face)))
+ ;; array slice
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "@ärräy[(1..3)] = (4..6);" "@")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-array-face)))
+ ;; array max index
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$#ärräy = 1;" "$")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-array-face)))
+ ;; array dereference
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "@$ärräy = (1,2,3)" "@")
+ (should (equal (get-text-property (1- (point)) 'face)
+ 'cperl-array-face))
+ (should (equal (get-text-property (1+ (point)) 'face)
+ 'font-lock-variable-name-face))))
+
+(ert-deftest cperl-test-unicode-hashes ()
+ "Test fontification of hash access."
+ ;; Perl mode just looks at the sigil, for element access
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ ;; simple hash element
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$häsh{'a'} = 7;" "$")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-hash-face)))
+ ;; hash array slice
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "@häsh{(1..3)} = (4..6);" "@")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-hash-face)))
+ ;; hash subset
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "my %hash = %häsh{'a',2,3};" "= %")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-hash-face)))
+ ;; hash dereference
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "%$äsh = (key => 'value');" "%")
+ (should (equal (get-text-property (1- (point)) 'face)
+ 'cperl-hash-face))
+ (should (equal (get-text-property (1+ (point)) 'face)
+ 'font-lock-variable-name-face))))
+
+(ert-deftest cperl-test-unicode-hashref ()
+ "Verify that a hashref access disambiguates {s}.
+CPerl mode takes the token \"s\" as a substitution unless
+detected otherwise. Not for perl-mode: it doesn't stringify
+bareword hash keys and doesn't recognize a substitution
+\"s}foo}bar}\""
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (with-temp-buffer
+ (cperl--test-unicode-setup "$häshref->{s} # }}" "{")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face))
+ (should (equal (get-text-property (1+ (point)) 'face)
+ nil))))
+
+(ert-deftest cperl-test-unicode-proto ()
+ ;; perl-mode doesn't fontify prototypes at all
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ (concat "sub prötötyped ($) {\n"
+ " ...;"
+ "}\n")
+ "prötötyped (")
+
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face))))
+
+(ert-deftest cperl-test-unicode-fhs ()
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ (concat "while (<BAREWĂ–RD>) {\n"
+ " ...;)\n"
+ "}\n")
+ "while (<") ; point is before the first char of the handle
+ ;; Testing fontification
+ ;; FIXME 2021-09-10: perl-mode.el and cperl-mode.el handle these
+ ;; completely differently. perl-mode interprets barewords as
+ ;; constants, cperl-mode does not fontify them. Both treat
+ ;; non-barewords as globs, which are not fontified by perl-mode,
+ ;; but fontified as strings in cperl-mode. We keep (and test)
+ ;; that behavior "as is" because both bareword filehandles and
+ ;; <glob> syntax are no longer recommended.
+ (let ((bareword-face
+ (if (equal cperl-test-mode 'perl-mode) 'font-lock-constant-face
+ nil)))
+ (should (equal (get-text-property (point) 'face)
+ bareword-face)))))
+
+(ert-deftest cperl-test-unicode-hashkeys ()
+ "Test stringification of bareword hash keys. Not in perl-mode.
+perl-mode generally does not stringify bareword hash keys."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ ;; Plain hash key
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$häsh { kéy }" "{ ")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face)))
+ ;; Nested hash key
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$häsh { kéy } { kèy }" "} { ")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face)))
+ ;; Key => value
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "( kéy => 'value'," "( ")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face))))
+
+(ert-deftest cperl-test-word-at-point ()
+ "Test whether the function captures non-ASCII words."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (let ((words '("rôle" "café" "ångström"
+ "Data::Dump::dump"
+ "_underscore")))
+ (dolist (word words)
+ (with-temp-buffer
+ (insert " + ") ; this will be the suffix
+ (beginning-of-line)
+ (insert ")") ; A non-word char
+ (insert word)
+ (should (string= word (cperl-word-at-point-hard)))))))
+
;;; Function test: Building an index for imenu
(ert-deftest cperl-test-imenu-index ()
@@ -369,7 +667,8 @@ created by CPerl mode, so skip it for Perl mode."
"Versioned::Package::outer"
"lexical"
"Versioned::Block::signatured"
- "Package::in_package_again")))
+ "Package::in_package_again"
+ "Erdős::Number::erdős_number")))
(dolist (sub expected)
(should (assoc-string sub index)))))))
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- master 3d49ad7: cperl-mode.el: Allow non-ASCII Perl identifiers,
Harald Jörg <=