emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim 5289127408 3/3: pyim-cstring-to-pinyin: use dcache


From: ELPA Syncer
Subject: [elpa] externals/pyim 5289127408 3/3: pyim-cstring-to-pinyin: use dcache when possible.
Date: Mon, 16 Jan 2023 02:58:10 -0500 (EST)

branch: externals/pyim
commit 5289127408140f6fec442e61b8e3c3134a448ccb
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    pyim-cstring-to-pinyin: use dcache when possible.
---
 pyim-cstring.el    | 49 +++++++++++++++++++++++++++++++++----------------
 pyim-dhashcache.el | 50 ++++++++++++++++++++++++++++++++++----------------
 2 files changed, 67 insertions(+), 32 deletions(-)

diff --git a/pyim-cstring.el b/pyim-cstring.el
index a0cb3b2c5c..5f8c27cb85 100644
--- a/pyim-cstring.el
+++ b/pyim-cstring.el
@@ -109,22 +109,10 @@ t, 遇到多音字时,只使用第一个拼音,其它拼音忽略。
 BUG: 当 STRING 中包含其它标点符号,并且设置 SEPERATER 时,结果会
 包含多余的连接符:比如: \"你=好\" --> \"ni-=-hao\""
   (if (not (pyim-string-match-p "\\cc" string))
-      (if return-list
-          (list string)
-        string)
-    (let* ((string-parts (pyim-cstring--partition string t))
-           (pinyins-list
-            ;; ("Hello" "银" "行") -> (("Hello") ("yin") ("hang" "xing"))
-            (mapcar (lambda (str)
-                      (if (pyim-string-match-p "\\cc" str)
-                          (pyim-pymap-cchar2py-get str)
-                        (list str)))
-                    string-parts))
-           ;; 通过排列组合的方式, 重排 pinyins-list。
-           ;; 比如:(("Hello") ("yin") ("hang")) -> (("Hello" "yin" "hang"))
-           (pinyins-list (pyim-permutate-list
-                          (pyim-cstring--adjust-duoyinzi
-                           string-parts pinyins-list)))
+      (if return-list (list string) string)
+    (let* ((pinyins-list
+            (or (pyim-cstring-to-pinyin--from-dcache string)
+                (pyim-cstring-to-pinyin--from-pymap string)))
            (list (mapcar (lambda (x)
                            (mapconcat (lambda (str)
                                         (if shou-zi-mu
@@ -139,6 +127,35 @@ BUG: 当 STRING 中包含其它标点符号,并且设置 SEPERATER 时,结
           list
         (string-join list " ")))))
 
+(defun pyim-cstring-to-pinyin--from-dcache (cstring)
+  "从 Dcache 中搜索 CSTRING 对应的拼音。"
+  (let* ((string-parts (pyim-cstring--partition cstring))
+         (pinyins-list
+          (mapcar (lambda (str)
+                    (if (pyim-string-match-p "\\cc" str)
+                        (when-let ((code (car (pyim-dcache-get str 
'(word2code)))))
+                          (split-string code "-"))
+                      (list str)))
+                  string-parts)))
+    (unless (member nil pinyins-list)
+      (list (apply #'append pinyins-list)))))
+
+(defun pyim-cstring-to-pinyin--from-pymap (cstring)
+  "使用 PYMAP 提供的工具来搜索 CSTRING 对应的拼音。"
+  (let* ((string-parts (pyim-cstring--partition cstring t))
+         (pinyins-list
+          ;; ("Hello" "银" "行") -> (("Hello") ("yin") ("hang" "xing"))
+          (mapcar (lambda (str)
+                    (if (pyim-string-match-p "\\cc" str)
+                        (pyim-pymap-cchar2py-get str)
+                      (list str)))
+                  string-parts)))
+    ;; 通过排列组合的方式, 重排 pinyins-list。
+    ;; 比如:(("Hello") ("yin") ("hang")) -> (("Hello" "yin" "hang"))
+    (pyim-permutate-list
+     (pyim-cstring--adjust-duoyinzi
+      string-parts pinyins-list))))
+
 (defun pyim-cstring--adjust-duoyinzi (string-parts pinyins-list)
   "根据 STRING-PARTS 对 PINYINS-LIST 进行校正。
 
diff --git a/pyim-dhashcache.el b/pyim-dhashcache.el
index 7f4b463bba..bd216ab42e 100644
--- a/pyim-dhashcache.el
+++ b/pyim-dhashcache.el
@@ -40,6 +40,7 @@
 (require 'pyim-dcache)
 (require 'pyim-dict)
 (require 'pyim-scheme)
+(require 'pyim-pymap)
 (require 'sort)
 
 (defvar pyim-dhashcache--count-types
@@ -426,25 +427,42 @@ DCACHE 是一个 code -> words 的 hashtable.
     (let ((hashtable (make-hash-table :size 1000000 :test #'equal)))
       (maphash
        (lambda (code words)
-         ;; 这里主要考虑五笔仓颉等形码输入法,也就是 code-prefix 中包含 "/" 的输
-         ;; 入法,全拼输入法反查功能主要使用 pymap 实现,不使用这个表。
-         (when (pyim-string-match-p "/" code)
+         (if (pyim-string-match-p "/" code)
+             ;; 这里主要考虑五笔仓颉等形码输入法,也就是 code-prefix 中包含 "/"
+             ;; 的输入法,
+             (dolist (word words)
+               (let ((value (gethash word hashtable))
+                     ;; NOTE: 这里使用 `cl-copy-seq', 可以让保存的文件内容类似:
+                     ;;
+                     ;;   "呵" ("he" "a")
+                     ;;
+                     ;; 而不是:
+                     ;;
+                     ;;   "呵" (#9="he" #2#)
+                     ;;
+                     (code (cl-copy-seq code)))
+                 (puthash word
+                          (if value
+                              `(,code ,@value)
+                            (list code))
+                          hashtable)))
+           ;; 使用拼音输入法时,构建词条到拼音的哈希表非常消耗内存,在这里只处理
+           ;; 包含多音字的词条(2-4个字),测试发现,生成的哈希表也不小,大约是
+           ;; code2word 的 1/4.
+           ;;
+           ;; 除了包含多音字的 2-4 字词条,其余词条的拼音反查功能主要使用 pymap
+           ;; 实现,不使用这个表。
            (dolist (word words)
              (let ((value (gethash word hashtable))
-                   ;; NOTE: 这里使用 `cl-copy-seq', 可以让保存的文件内容类似:
-                   ;;
-                   ;;   "呵" ("he" "a")
-                   ;;
-                   ;; 而不是:
-                   ;;
-                   ;;   "呵" (#9="he" #2#)
-                   ;;
                    (code (cl-copy-seq code)))
-               (puthash word
-                        (if value
-                            `(,code ,@value)
-                          (list code))
-                        hashtable)))))
+               (when (and (> (length word) 1)
+                          (< (length word) 5)
+                          (pyim-pymap-duoyinzi-include-p word))
+                 (puthash word
+                          (if value
+                              `(,code ,@value)
+                            (list code))
+                          hashtable))))))
        dcache)
       (pyim-dcache-save-value-to-file hashtable file))))
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]