[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/pyim 4dd1d12 1/2: 实现拼音连续联想。
From: |
ELPA Syncer |
Subject: |
[elpa] externals/pyim 4dd1d12 1/2: 实现拼音连续联想。 |
Date: |
Sun, 5 Dec 2021 06:57:27 -0500 (EST) |
branch: externals/pyim
commit 4dd1d121351c9ccf3fae6a9f9f91bfc75620713b
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>
实现拼音连续联想。
* pyim-process.el (pyim-process-create-word): 不保存8个汉字以上的词条。
* pyim-common.el (pyim-zip): 添加 care-first-one 参数.
* pyim-candidates.el (pyim-candidates-create-quanpin): new function.
(pyim-candidates-create:quanpin): 实现连续联想。
主要实现以下功能:假如用户输入 nihaomazheshi, 但词库里面找不到对应的词条,那么输
入法自动用 nihaoma 和 zheshi 的第一个词条:"你好吗" 和 "这是" 连接成一个新的字符
串 "你好吗这是" 做为第一个候选词。
---
pyim-candidates.el | 149 ++++++++++++++++++++++++++++++-----------------------
pyim-common.el | 10 +++-
pyim-process.el | 5 +-
3 files changed, 97 insertions(+), 67 deletions(-)
diff --git a/pyim-candidates.el b/pyim-candidates.el
index 8c81237..51cee3a 100644
--- a/pyim-candidates.el
+++ b/pyim-candidates.el
@@ -91,70 +91,91 @@ IMOBJS 获得候选词条。"
(defun pyim-candidates-create:quanpin (imobjs scheme-name &optional async)
"`pyim-candidates-create' 处理全拼输入法的函数."
(unless async
- (let (znabc-words pinyin-chars personal-words common-words)
- ;; 智能ABC模式,得到尽可能的拼音组合,查询这些组合,得到的词条做为联想词。
- (let ((codes (mapcar (lambda (x)
- (pyim-subconcat x "-"))
- (mapcar (lambda (imobj)
- (pyim-codes-create imobj scheme-name))
- imobjs))))
- (setq znabc-words
- (pyim-zip (mapcar #'pyim-dcache-get
- (pyim-zip codes)))))
-
- ;; 获取个人词条,词库词条和第一汉字列表。
- (dolist (imobj imobjs)
- (let* (;; 个人词条
- (w1 (pyim-dcache-get
- (mapconcat #'identity
- (pyim-codes-create imobj scheme-name)
- "-")
- (if pyim-enable-shortcode
- '(icode2word ishortcode2word)
- '(icode2word))))
- ;; 词库词条
- (w2 (pyim-dcache-get
- (mapconcat #'identity
- (pyim-codes-create imobj scheme-name)
- "-")
- (if pyim-enable-shortcode
- '(code2word shortcode2word)
- '(code2word))))
- ;; 第一个汉字
- (w3 (pyim-dcache-get
- (car (pyim-codes-create imobj scheme-name)))))
- (push w1 personal-words)
- (push w2 common-words)
- (push w3 pinyin-chars)))
-
- (setq personal-words (pyim-zip (nreverse personal-words)))
- (setq common-words (pyim-zip (nreverse common-words)))
- (setq pinyin-chars (pyim-zip (nreverse pinyin-chars)))
-
- ;; 个人词条排序:使用词频信息对个人词库得到的候选词排序,第一个词条的位置
- ;; 比较特殊,不参与排序,具体原因请参考 `pyim-page-select-word' 中的
- ;; comment.
- (setq personal-words
- `(,(car personal-words)
- ,@(pyim-dcache-call-api
- 'sort-words (cdr personal-words))))
-
- ;; 调试输出
- (when pyim-debug
- (print (list :imobjs imobjs
- :personal-words personal-words
- :common-words common-words
- :znabc-words znabc-words
- :pinyin-chars
- (cl-subseq pinyin-chars
- 0 (min (length pinyin-chars) 5)))))
-
- (delete-dups
- (delq nil
- `(,@personal-words
- ,@common-words
- ,@znabc-words
- ,@pinyin-chars))))))
+ ;; 这段代码主要实现以下功能:假如用户输入 nihaomazheshi, 但词库里面找不到对
+ ;; 应的词条,那么输入法自动用 nihaoma 和 zheshi 的第一个词条:"你好吗" 和 "
+ ;; 这是" 连接成一个新的字符串 "你好吗这是" 做为第一个候选词。
+ (let* ((candidates (pyim-candidates-create-quanpin imobjs scheme-name))
+ (n (length (car candidates)))
+ output)
+ (push (car candidates) output)
+ (while (and (> n 0)
+ (car (setq imobjs
+ (mapcar (lambda (imobj)
+ (nthcdr n imobj))
+ imobjs))))
+ (let ((candidates (pyim-candidates-create-quanpin imobjs scheme-name)))
+ (push (car (pyim-candidates-create-quanpin imobjs scheme-name t))
output)
+ (setq n (length (car candidates)))))
+ `(,(mapconcat #'identity (nreverse output) "")
+ ,@candidates))))
+
+(defun pyim-candidates-create-quanpin (imobjs scheme-name &optional
fast-search)
+ "`pyim-candidates-create:quanpin' 内部使用的函数。"
+ (let (znabc-words pinyin-chars personal-words common-words)
+ ;; 智能ABC模式,得到尽可能的拼音组合,查询这些组合,得到的词条做为联想词。
+ (let ((codes (mapcar (lambda (x)
+ (pyim-subconcat x "-"))
+ (mapcar (lambda (imobj)
+ (pyim-codes-create imobj scheme-name))
+ imobjs))))
+ (setq znabc-words
+ (pyim-zip (mapcar #'pyim-dcache-get
+ (pyim-zip codes))
+ fast-search)))
+
+ ;; 获取个人词条,词库词条和第一汉字列表。
+ (dolist (imobj imobjs)
+ (let* (;; 个人词条
+ (w1 (pyim-dcache-get
+ (mapconcat #'identity
+ (pyim-codes-create imobj scheme-name)
+ "-")
+ (if pyim-enable-shortcode
+ '(icode2word ishortcode2word)
+ '(icode2word))))
+ ;; 词库词条
+ (w2 (pyim-dcache-get
+ (mapconcat #'identity
+ (pyim-codes-create imobj scheme-name)
+ "-")
+ (if pyim-enable-shortcode
+ '(code2word shortcode2word)
+ '(code2word))))
+ ;; 第一个汉字
+ (w3 (pyim-dcache-get
+ (car (pyim-codes-create imobj scheme-name)))))
+ (push w1 personal-words)
+ (push w2 common-words)
+ (push w3 pinyin-chars)))
+
+ (setq personal-words (pyim-zip (nreverse personal-words) fast-search))
+ (setq common-words (pyim-zip (nreverse common-words) fast-search))
+ (setq pinyin-chars (pyim-zip (nreverse pinyin-chars) fast-search))
+
+ ;; 个人词条排序:使用词频信息对个人词库得到的候选词排序,第一个词条的位置
+ ;; 比较特殊,不参与排序,具体原因请参考 `pyim-page-select-word' 中的
+ ;; comment.
+ (setq personal-words
+ `(,(car personal-words)
+ ,@(pyim-dcache-call-api
+ 'sort-words (cdr personal-words))))
+
+ ;; 调试输出
+ (when pyim-debug
+ (print (list :imobjs imobjs
+ :personal-words personal-words
+ :common-words common-words
+ :znabc-words znabc-words
+ :pinyin-chars
+ (cl-subseq pinyin-chars
+ 0 (min (length pinyin-chars) 5)))))
+
+ (delete-dups
+ (delq nil
+ `(,@personal-words
+ ,@common-words
+ ,@znabc-words
+ ,@pinyin-chars)))))
(defun pyim-candidates-create:shuangpin (imobjs _scheme-name &optional async)
"`pyim-candidates-create' 处理双拼输入法的函数."
diff --git a/pyim-common.el b/pyim-common.el
index dd29d30..63b9f4c 100644
--- a/pyim-common.el
+++ b/pyim-common.el
@@ -85,8 +85,14 @@
append (mapcar (lambda (l) (cons element l))
(pyim-permutate-list list-tail)))))))
-(defun pyim-zip (lists)
- "Zip LISTS and delete dups: ((a b c) (d e)) => (a d b e c)."
+(defun pyim-zip (lists &optional care-first-one)
+ "Zip LISTS and delete dups: ((a b c) (d e)) => (a d b e c).
+When CARE-FIRST-ONE is no-nil, ((a b c) (d e)) => (a d)."
+ (when care-first-one
+ (setq lists
+ (mapcar (lambda (x)
+ (list (car x)))
+ lists)))
(setq lists (remove nil lists))
(if (< (length lists) 2)
(car lists)
diff --git a/pyim-process.el b/pyim-process.el
index 1c814eb..e0bbe53 100644
--- a/pyim-process.el
+++ b/pyim-process.el
@@ -533,7 +533,10 @@ WORDCOUNT-HANDLER 也可以是一个函数,其返回值将设置为 WORD 的
BUG:拼音无法有效地处理多音字。"
(when (and (> (length word) 0)
- (< (length word) 11) ;十个汉字以上的词条,加到个人词库里面用处不大,忽略。
+ ;; 8个汉字以上的词条不加入个人缓存,原因有:
+ ;; 1. 比较长的词一般用的比较少。
+ ;; 2. 由于 criteria 比较长的原因,会出现严重卡顿。
+ (< (length word) 8)
(not (pyim-string-match-p "\\CC" word)))
;; 记录最近创建的词条,用于快速删词功能。
(setq pyim-process-last-created-word word)