emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim 4dd1d12 1/2: 实现拼音连续联想。


From: ELPA Syncer
Subject: [elpa] externals/pyim 4dd1d12 1/2: 实现拼音连续联想。
Date: Sun, 5 Dec 2021 06:57:27 -0500 (EST)

branch: externals/pyim
commit 4dd1d121351c9ccf3fae6a9f9f91bfc75620713b
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    实现拼音连续联想。
    
        * pyim-process.el (pyim-process-create-word): 不保存8个汉字以上的词条。
    
        * pyim-common.el (pyim-zip): 添加 care-first-one 参数.
    
        * pyim-candidates.el (pyim-candidates-create-quanpin): new function.
        (pyim-candidates-create:quanpin): 实现连续联想。
    
    主要实现以下功能:假如用户输入 nihaomazheshi, 但词库里面找不到对应的词条,那么输
    入法自动用 nihaoma 和 zheshi 的第一个词条:"你好吗" 和 "这是" 连接成一个新的字符
    串 "你好吗这是" 做为第一个候选词。
---
 pyim-candidates.el | 149 ++++++++++++++++++++++++++++++-----------------------
 pyim-common.el     |  10 +++-
 pyim-process.el    |   5 +-
 3 files changed, 97 insertions(+), 67 deletions(-)

diff --git a/pyim-candidates.el b/pyim-candidates.el
index 8c81237..51cee3a 100644
--- a/pyim-candidates.el
+++ b/pyim-candidates.el
@@ -91,70 +91,91 @@ IMOBJS 获得候选词条。"
 (defun pyim-candidates-create:quanpin (imobjs scheme-name &optional async)
   "`pyim-candidates-create' 处理全拼输入法的函数."
   (unless async
-    (let (znabc-words pinyin-chars personal-words common-words)
-      ;; 智能ABC模式,得到尽可能的拼音组合,查询这些组合,得到的词条做为联想词。
-      (let ((codes (mapcar (lambda (x)
-                             (pyim-subconcat x "-"))
-                           (mapcar (lambda (imobj)
-                                     (pyim-codes-create imobj scheme-name))
-                                   imobjs))))
-        (setq znabc-words
-              (pyim-zip (mapcar #'pyim-dcache-get
-                                (pyim-zip codes)))))
-
-      ;; 获取个人词条,词库词条和第一汉字列表。
-      (dolist (imobj imobjs)
-        (let* (;; 个人词条
-               (w1 (pyim-dcache-get
-                    (mapconcat #'identity
-                               (pyim-codes-create imobj scheme-name)
-                               "-")
-                    (if pyim-enable-shortcode
-                        '(icode2word ishortcode2word)
-                      '(icode2word))))
-               ;; 词库词条
-               (w2 (pyim-dcache-get
-                    (mapconcat #'identity
-                               (pyim-codes-create imobj scheme-name)
-                               "-")
-                    (if pyim-enable-shortcode
-                        '(code2word shortcode2word)
-                      '(code2word))))
-               ;; 第一个汉字
-               (w3 (pyim-dcache-get
-                    (car (pyim-codes-create imobj scheme-name)))))
-          (push w1 personal-words)
-          (push w2 common-words)
-          (push w3 pinyin-chars)))
-
-      (setq personal-words (pyim-zip (nreverse personal-words)))
-      (setq common-words (pyim-zip (nreverse common-words)))
-      (setq pinyin-chars (pyim-zip (nreverse pinyin-chars)))
-
-      ;; 个人词条排序:使用词频信息对个人词库得到的候选词排序,第一个词条的位置
-      ;; 比较特殊,不参与排序,具体原因请参考 `pyim-page-select-word' 中的
-      ;; comment.
-      (setq personal-words
-            `(,(car personal-words)
-              ,@(pyim-dcache-call-api
-                 'sort-words (cdr personal-words))))
-
-      ;; 调试输出
-      (when pyim-debug
-        (print (list :imobjs imobjs
-                     :personal-words personal-words
-                     :common-words common-words
-                     :znabc-words znabc-words
-                     :pinyin-chars
-                     (cl-subseq pinyin-chars
-                                0 (min (length pinyin-chars) 5)))))
-
-      (delete-dups
-       (delq nil
-             `(,@personal-words
-               ,@common-words
-               ,@znabc-words
-               ,@pinyin-chars))))))
+    ;; 这段代码主要实现以下功能:假如用户输入 nihaomazheshi, 但词库里面找不到对
+    ;; 应的词条,那么输入法自动用 nihaoma 和 zheshi 的第一个词条:"你好吗" 和 "
+    ;; 这是" 连接成一个新的字符串 "你好吗这是" 做为第一个候选词。
+    (let* ((candidates (pyim-candidates-create-quanpin imobjs scheme-name))
+           (n (length (car candidates)))
+           output)
+      (push (car candidates) output)
+      (while (and (> n 0)
+                  (car (setq imobjs
+                             (mapcar (lambda (imobj)
+                                       (nthcdr n imobj))
+                                     imobjs))))
+        (let ((candidates (pyim-candidates-create-quanpin imobjs scheme-name)))
+          (push (car (pyim-candidates-create-quanpin imobjs scheme-name t)) 
output)
+          (setq n (length (car candidates)))))
+      `(,(mapconcat #'identity (nreverse output) "")
+        ,@candidates))))
+
+(defun pyim-candidates-create-quanpin (imobjs scheme-name &optional 
fast-search)
+  "`pyim-candidates-create:quanpin' 内部使用的函数。"
+  (let (znabc-words pinyin-chars personal-words common-words)
+    ;; 智能ABC模式,得到尽可能的拼音组合,查询这些组合,得到的词条做为联想词。
+    (let ((codes (mapcar (lambda (x)
+                           (pyim-subconcat x "-"))
+                         (mapcar (lambda (imobj)
+                                   (pyim-codes-create imobj scheme-name))
+                                 imobjs))))
+      (setq znabc-words
+            (pyim-zip (mapcar #'pyim-dcache-get
+                              (pyim-zip codes))
+                      fast-search)))
+
+    ;; 获取个人词条,词库词条和第一汉字列表。
+    (dolist (imobj imobjs)
+      (let* (;; 个人词条
+             (w1 (pyim-dcache-get
+                  (mapconcat #'identity
+                             (pyim-codes-create imobj scheme-name)
+                             "-")
+                  (if pyim-enable-shortcode
+                      '(icode2word ishortcode2word)
+                    '(icode2word))))
+             ;; 词库词条
+             (w2 (pyim-dcache-get
+                  (mapconcat #'identity
+                             (pyim-codes-create imobj scheme-name)
+                             "-")
+                  (if pyim-enable-shortcode
+                      '(code2word shortcode2word)
+                    '(code2word))))
+             ;; 第一个汉字
+             (w3 (pyim-dcache-get
+                  (car (pyim-codes-create imobj scheme-name)))))
+        (push w1 personal-words)
+        (push w2 common-words)
+        (push w3 pinyin-chars)))
+
+    (setq personal-words (pyim-zip (nreverse personal-words) fast-search))
+    (setq common-words (pyim-zip (nreverse common-words) fast-search))
+    (setq pinyin-chars (pyim-zip (nreverse pinyin-chars) fast-search))
+
+    ;; 个人词条排序:使用词频信息对个人词库得到的候选词排序,第一个词条的位置
+    ;; 比较特殊,不参与排序,具体原因请参考 `pyim-page-select-word' 中的
+    ;; comment.
+    (setq personal-words
+          `(,(car personal-words)
+            ,@(pyim-dcache-call-api
+               'sort-words (cdr personal-words))))
+
+    ;; 调试输出
+    (when pyim-debug
+      (print (list :imobjs imobjs
+                   :personal-words personal-words
+                   :common-words common-words
+                   :znabc-words znabc-words
+                   :pinyin-chars
+                   (cl-subseq pinyin-chars
+                              0 (min (length pinyin-chars) 5)))))
+
+    (delete-dups
+     (delq nil
+           `(,@personal-words
+             ,@common-words
+             ,@znabc-words
+             ,@pinyin-chars)))))
 
 (defun pyim-candidates-create:shuangpin (imobjs _scheme-name &optional async)
   "`pyim-candidates-create' 处理双拼输入法的函数."
diff --git a/pyim-common.el b/pyim-common.el
index dd29d30..63b9f4c 100644
--- a/pyim-common.el
+++ b/pyim-common.el
@@ -85,8 +85,14 @@
                       append (mapcar (lambda (l) (cons element l))
                                      (pyim-permutate-list list-tail)))))))
 
-(defun pyim-zip (lists)
-  "Zip LISTS and delete dups: ((a b c) (d e)) => (a d b e c)."
+(defun pyim-zip (lists &optional care-first-one)
+  "Zip LISTS and delete dups: ((a b c) (d e)) => (a d b e c).
+When CARE-FIRST-ONE is no-nil, ((a b c) (d e)) => (a d)."
+  (when care-first-one
+    (setq lists
+          (mapcar (lambda (x)
+                    (list (car x)))
+                  lists)))
   (setq lists (remove nil lists))
   (if (< (length lists) 2)
       (car lists)
diff --git a/pyim-process.el b/pyim-process.el
index 1c814eb..e0bbe53 100644
--- a/pyim-process.el
+++ b/pyim-process.el
@@ -533,7 +533,10 @@ WORDCOUNT-HANDLER 也可以是一个函数,其返回值将设置为 WORD 的
 
 BUG:拼音无法有效地处理多音字。"
   (when (and (> (length word) 0)
-             (< (length word) 11) ;十个汉字以上的词条,加到个人词库里面用处不大,忽略。
+             ;; 8个汉字以上的词条不加入个人缓存,原因有:
+             ;; 1. 比较长的词一般用的比较少。
+             ;; 2. 由于 criteria 比较长的原因,会出现严重卡顿。
+             (< (length word) 8)
              (not (pyim-string-match-p "\\CC" word)))
     ;; 记录最近创建的词条,用于快速删词功能。
     (setq pyim-process-last-created-word word)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]