[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/pyim bf92514: 拼音输入法可以搜索当前 buffer 来获取词条。
From: |
ELPA Syncer |
Subject: |
[elpa] externals/pyim bf92514: 拼音输入法可以搜索当前 buffer 来获取词条。 |
Date: |
Fri, 10 Dec 2021 09:57:35 -0500 (EST) |
branch: externals/pyim
commit bf925142a53a36179ec42e7d93e389da0487dc8c
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>
拼音输入法可以搜索当前 buffer 来获取词条。
* tests/pyim-tests.el (pyim-tests-pyim-cregexp): test cinese-only
argument.
* pyim-cregexp.el (pyim-cregexp-build, pyim-cregexp-build-1): Add
chinese-only argument.
* pyim-common.el (pyim-time-limit-while): New macro.
* pyim-candidates.el (pyim-candidates-create:quanpin): support async.
(pyim-candidates-search-buffer): New function.
---
pyim-candidates.el | 28 +++++++++++++++++++++++++++-
pyim-common.el | 13 +++++++++++++
pyim-cregexp.el | 20 ++++++++++++--------
tests/pyim-tests.el | 6 ++++++
4 files changed, 58 insertions(+), 9 deletions(-)
diff --git a/pyim-candidates.el b/pyim-candidates.el
index c9d1232..afe61de 100644
--- a/pyim-candidates.el
+++ b/pyim-candidates.el
@@ -91,7 +91,17 @@ IMOBJS 获得候选词条。"
(defun pyim-candidates-create:quanpin (imobjs scheme-name &optional async)
"`pyim-candidates-create' 处理全拼输入法的函数."
- (unless async
+ (if async
+ ;; 使用当前的 entered 构建一个搜索中文的正则表达式, 然后使用这个正则表达式
+ ;; 在当前 buffer 中搜索词条。
+ (let ((str (pyim-entered-get)))
+ (if (< (length str) 1)
+ pyim-candidates
+ ;; NOTE: 让第一个词保持不变是不是合理,有待进一步的观察。
+ `(,(car pyim-candidates)
+ ,@(pyim-candidates-search-buffer
+ (pyim-cregexp-build str 3 t))
+ ,@(cdr pyim-candidates))))
;; 这段代码主要实现以下功能:假如用户输入 nihaomazheshi, 但词库里面找不到对
;; 应的词条,那么输入法自动用 nihaoma 和 zheshi 的第一个词条:"你好吗" 和 "
;; 这是" 连接成一个新的字符串 "你好吗这是" 做为第一个候选词。
@@ -110,6 +120,22 @@ IMOBJS 获得候选词条。"
(append (pyim-subconcat (nreverse output) "")
candidates))))
+(defun pyim-candidates-search-buffer (regexp)
+ "在当前 buffer 中使用 REGEXP 搜索词条。"
+ (save-excursion
+ (let ((start (current-time))
+ words)
+ (goto-char (point-min))
+ ;; Search after pos.
+ (pyim-time-limit-while (and (not (input-pending-p))
+ (re-search-forward regexp nil t))
+ start 0.1 25
+ (let ((match (match-string-no-properties 0)))
+ ;; NOTE: 单个汉字我觉得不值得收集。
+ (when (>= (length match) 2)
+ (cl-pushnew match words :test #'equal))))
+ words)))
+
(defun pyim-candidates-create-quanpin (imobjs scheme-name &optional
fast-search)
"`pyim-candidates-create:quanpin' 内部使用的函数。"
(let (jianpin-words znabc-words personal-words common-words pinyin-chars-1
pinyin-chars-2)
diff --git a/pyim-common.el b/pyim-common.el
index 5158da3..0173c12 100644
--- a/pyim-common.el
+++ b/pyim-common.el
@@ -178,6 +178,19 @@ When CARE-FIRST-ONE is no-nil, ((a b c) (d e)) => (a d)."
(append key nil))
unread-command-events))))
+;; Fork from `company-dabbrev--time-limit-while' in company-mode."
+(defmacro pyim-time-limit-while (test start limit freq &rest body)
+ (declare (indent 3) (debug t))
+ `(let ((pyim-time-limit-while-counter 0))
+ (catch 'done
+ (while ,test
+ ,@body
+ (and ,limit
+ (= (cl-incf pyim-time-limit-while-counter) ,freq)
+ (setq pyim-time-limit-while-counter 0)
+ (> (float-time (time-since ,start)) ,limit)
+ (throw 'done 'pyim-time-out))))))
+
;; * Footer
(provide 'pyim-common)
diff --git a/pyim-cregexp.el b/pyim-cregexp.el
index 56a5a1b..2a999a1 100644
--- a/pyim-cregexp.el
+++ b/pyim-cregexp.el
@@ -49,7 +49,7 @@
(max (min num 4) 1)
4))
-(defun pyim-cregexp-build (string &optional char-level-num)
+(defun pyim-cregexp-build (string &optional char-level-num chinese-only)
"根据 STRING 构建一个中文 regexp, 用于 \"拼音搜索汉字\".
比如:\"nihao\" -> \"[你呢...][好号...] \\| nihao\"
@@ -60,6 +60,8 @@ CHAR-LEVEL-NUM 代表汉字常用级别,pyim 中根据汉字的使用频率,
如果这个参数设置为3, 那么代表在构建 regexp 是,只使用常用级别小于
等于3的汉字。
+如果 CHINESE-ONLY 为真,那么生成的 regexp 只能搜索汉字。
+
注意事项:如果生成的 regexp 太长,Emacs 无法处理,那么,这个命令
会抛弃一些不常用的汉字,重新生成,知道生成一个 Emacs 可以处理的
regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子的时候,
@@ -77,7 +79,7 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
(pyim-cregexp-build-from-rx
(lambda (x)
(if (stringp x)
- (xr (pyim-cregexp-build-1 x num))
+ (xr (pyim-cregexp-build-1 x num chinese-only))
x))
(xr string))))
string))
@@ -104,7 +106,7 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
rx-form))
(_ (funcall fn rx-form))))
-(defun pyim-cregexp-build-1 (str &optional char-level-num)
+(defun pyim-cregexp-build-1 (str &optional char-level-num chinese-only)
(let* ((num (pyim-cregexp-char-level-num char-level-num))
(scheme-name (pyim-scheme-name))
(class (pyim-scheme-get-option scheme-name :class))
@@ -139,11 +141,13 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
(delq nil regexp-list)
"\\|")))
(regexp
- (if (> (length regexp) 0)
- (if (equal string string1)
- (concat string "\\|" regexp)
- (concat string "\\|" string1 "\\|" regexp))
- string)))
+ (if chinese-only
+ regexp
+ (if (> (length regexp) 0)
+ (if (equal string string1)
+ (concat string "\\|" regexp)
+ (concat string "\\|" string1 "\\|" regexp))
+ string))))
(format "\\(?:%s\\)" regexp))))
lst "")))
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index da23ba2..ca552e4 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -429,6 +429,12 @@
(should (string-match-p regexp "你好"))
(should (string-match-p regexp "哈哈你好吗")))
+ (let ((regexp (pyim-cregexp-build "nihao" nil t)))
+ (should-not (string-match-p regexp "nihao"))
+ (should-not (string-match-p regexp "anihaob"))
+ (should (string-match-p regexp "你好"))
+ (should (string-match-p regexp "哈哈你好吗")))
+
(let ((regexp (pyim-cregexp-build "beng")))
(should (string-match-p regexp "痭"))
(should (string-match-p regexp "泵"))
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [elpa] externals/pyim bf92514: 拼音输入法可以搜索当前 buffer 来获取词条。,
ELPA Syncer <=