[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: ASCII-folded search [was: Re: Upcoming loss of usability ...]
From: |
Artur Malabarba |
Subject: |
Re: ASCII-folded search [was: Re: Upcoming loss of usability ...] |
Date: |
Mon, 22 Jun 2015 22:03:56 +0100 |
> Shall I merge? (It adds about 5 seconds of compile time in my laptop)
Inlining the patch attached above (sorry, force of habit).
From: Artur Malabarba <address@hidden>
Date: Tue, 27 Jan 2015 14:08:01 -0200
Subject: [PATCH] * lisp/isearch.el: Fold many unicode characters to ASCII
(isearch-character-fold-search, isearch--character-fold-extras)
(isearch--character-fold-table): New variable.
(isearch--character-folded-regexp): New function.
(isearch-search-fun-default): Use them.
* lisp/replace.el (replace-character-fold): New variable.
(replace-search): Use it.
---
lisp/isearch.el | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
lisp/replace.el | 9 +++++++
2 files changed, 84 insertions(+)
diff --git a/lisp/isearch.el b/lisp/isearch.el
index d1b92bd..eb0f965 100644
--- a/lisp/isearch.el
+++ b/lisp/isearch.el
@@ -272,6 +272,74 @@ Default value, nil, means edit the string instead."
:version "23.1"
:group 'isearch)
+(defvar isearch-character-fold-search t
+ "Whether regular isearch should fold similar characters.
+This means some characters will match entire groups of charactes,
+such as \" matching all variants of double quotes, for instance.")
+
+(defconst isearch--character-fold-extras
+ '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝"
"〟" "🙷" "🙶" "🙸" "«" "»")
+ (?' "`" "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›")
+ (?` "❛" "‘" "‛" "" "❮" "‹")
+ ;; `isearch-character-fold-search' doesn't interact with
+ ;; `isearch-lax-whitespace' yet. So we need to add this here.
+ (?\s " " "\r" "\n"))
+ "Extra entries to add to `isearch--character-fold-table'.
+Used to specify character folding not covered by unicode
+decomposition. Each car is a character and each cdr is a list of
+strings that it should match (itself excluded).")
+
+(defvar isearch--character-fold-table
+ (eval-when-compile (funcall (byte-compile (lambda ()
+ (require 'subr-x)
+ (let ((equiv (make-char-table 'character-fold-table)))
+ ;; Compile a list of all complex characters that each simple
+ ;; character should match.
+ (dotimes (i (length equiv))
+ (let ((dd (get-char-code-property i 'decomposition))
+ d k found)
+ ;; Skip trivial cases (?a decomposes to (?a)).
+ (unless (and (eq i (car dd)))
+ ;; Discard a possible formatting tag.
+ (when (symbolp (car-safe dd))
+ (setq dd (cdr dd)))
+ ;; Is k a number or letter, per unicode standard?
+ (setq d dd)
+ (while (and d (not found))
+ (setq k (pop d))
+ (setq found (and (characterp k)
+ (memq (get-char-code-property k
'general-category)
+ '(Lu Ll Lt Lm Lo Nd Nl No)))))
+ ;; If there's no number or letter on the
+ ;; decomposition, find the first character in it.
+ (setq d dd)
+ (while (and d (not found))
+ (setq k (pop d))
+ (setq found (characterp k)))
+ ;; Add i to the list of characters that k can
+ ;; represent. Also add its decomposition, so we can
+ ;; match multi-char representations like (format "a%c" 769)
+ (when (and found (not (eq i k)))
+ (aset equiv k (cons (apply #'string dd)
+ (cons (string i)
+ (aref equiv k))))))))
+ (dotimes (i (length equiv))
+ (when-let ((chars (append (cdr (assq i isearch--character-fold-extras))
+ (aref equiv i))))
+ (aset equiv i (regexp-opt (cons (string i) chars)))))
+ equiv)))))
+ "Used for folding characters of the same group during search.")
+
+(defun isearch--character-folded-regexp (string)
+ "Return a regexp matching anything that character-folds into STRING.
+That is, any character in STRING that has an entry in
+`isearch--character-fold-table' is replaced with that entry (which is a
+regexp). Other characters are `regexp-quote'd."
+ (apply #'concat
+ (mapcar (lambda (c) (or (aref isearch--character-fold-table c)
+ (regexp-quote (string c))))
+ string)))
+
(defcustom isearch-lazy-highlight t
"Controls the lazy-highlighting during incremental search.
When non-nil, all text in the buffer matching the current search
@@ -2607,6 +2675,13 @@ Can be changed via
`isearch-search-fun-function' for special needs."
're-search-backward-lax-whitespace))
(isearch-regexp
(if isearch-forward 're-search-forward 're-search-backward))
+ ;; `isearch-regexp' is essentially a superset of
+ ;; `isearch-fold-groups'. So fold-groups comes after it.
+ (isearch-character-fold-search
+ (lambda (string &optional bound noerror count)
+ (funcall (if isearch-forward #'re-search-forward #'re-search-backward)
+ (isearch--character-folded-regexp string)
+ bound noerror count)))
((and isearch-lax-whitespace search-whitespace-regexp)
(if isearch-forward
'search-forward-lax-whitespace
diff --git a/lisp/replace.el b/lisp/replace.el
index 1bf1343..96bbd61 100644
--- a/lisp/replace.el
+++ b/lisp/replace.el
@@ -33,6 +33,14 @@
:type 'boolean
:group 'matching)
+(defcustom replace-character-fold t
+ "Non-nil means `query-replace' should do character folding in matches.
+This means, for instance, that ' will match a large variety of
+unicode quotes."
+ :type 'boolean
+ :group 'matching
+ :version "25.1")
+
(defcustom replace-lax-whitespace nil
"Non-nil means `query-replace' matches a sequence of whitespace chars.
When you enter a space or spaces in the strings to be replaced,
@@ -2003,6 +2011,7 @@ It is called with three arguments, as if it were
;; used after `recursive-edit' might override them.
(let* ((isearch-regexp regexp-flag)
(isearch-word delimited-flag)
+ (isearch-character-fold-search replace-character-fold)
(isearch-lax-whitespace
replace-lax-whitespace)
(isearch-regexp-lax-whitespace
--
2.4.4
- ASCII-folded search [was: Re: Upcoming loss of usability ...], (continued)
- ASCII-folded search [was: Re: Upcoming loss of usability ...], Stephen J. Turnbull, 2015/06/18
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Eli Zaretskii, 2015/06/18
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Stephen J. Turnbull, 2015/06/18
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Eli Zaretskii, 2015/06/18
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Artur Malabarba, 2015/06/18
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Eli Zaretskii, 2015/06/18
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Artur Malabarba, 2015/06/18
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Eli Zaretskii, 2015/06/18
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Artur Malabarba, 2015/06/22
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Artur Malabarba, 2015/06/22
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...],
Artur Malabarba <=
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Juri Linkov, 2015/06/22
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Artur Malabarba, 2015/06/22
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Juri Linkov, 2015/06/23
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Artur Malabarba, 2015/06/24
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Eli Zaretskii, 2015/06/24
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Artur Malabarba, 2015/06/24
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Stefan Monnier, 2015/06/24
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Juri Linkov, 2015/06/24
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Juri Linkov, 2015/06/24
- Re: ASCII-folded search [was: Re: Upcoming loss of usability ...], Artur Malabarba, 2015/06/24