emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim 172e8e14e4: Add pyim-pymap-cchars2pys-get and use


From: ELPA Syncer
Subject: [elpa] externals/pyim 172e8e14e4: Add pyim-pymap-cchars2pys-get and use it.
Date: Mon, 16 Jan 2023 20:58:14 -0500 (EST)

branch: externals/pyim
commit 172e8e14e44c91c471dcabdbd9798d211f90bca6
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    Add pyim-pymap-cchars2pys-get and use it.
---
 pyim-cstring.el     | 83 +++++++++++------------------------------------------
 pyim-pymap.el       | 69 ++++++++++++++++++++++++++++++++++++++++++++
 tests/pyim-tests.el | 10 +++++--
 3 files changed, 92 insertions(+), 70 deletions(-)

diff --git a/pyim-cstring.el b/pyim-cstring.el
index 1d1e4df051..2d718a72dd 100644
--- a/pyim-cstring.el
+++ b/pyim-cstring.el
@@ -131,79 +131,28 @@ BUG: 当 STRING 中包含其它标点符号,并且设置 SEPERATER 时,结
   "从 Dcache 中搜索 CSTRING 对应的拼音。"
   (let* ((string-parts (pyim-cstring--partition cstring))
          (pinyins-list
-          (mapcar (lambda (str)
-                    (if (pyim-string-match-p "\\cc" str)
-                        (when-let ((code (cl-find-if-not
-                                          (lambda (c)
-                                            ;; 注意:Pinyin 词库中不包含 "/" 字符。
-                                            (string-match-p c "/"))
-                                          (pyim-dcache-get str '(word2code)))))
-                          (split-string code "-"))
-                      (list str)))
+          (mapcar #'pyim-cstring--get-pinyin-code
                   string-parts)))
     (unless (member nil pinyins-list)
       (list (apply #'append pinyins-list)))))
 
+(defun pyim-cstring--get-pinyin-code (str)
+  "从 Dcache 中获取中文字符串 STR 对应的拼音。
+
+如果 STR 不包含中文,不做特殊处理。"
+  (if (pyim-string-match-p "\\cc" str)
+      (when-let ((code (cl-find-if-not
+                        (lambda (c)
+                          ;; 注意:Pinyin 词库中不包含 "/" 字符。
+                          (string-match-p c "/"))
+                        (pyim-dcache-get str '(word2code)))))
+        (split-string code "-"))
+    (list str)))
+
 (defun pyim-cstring-to-pinyin--from-pymap (cstring)
   "使用 PYMAP 提供的工具来搜索 CSTRING 对应的拼音。"
-  (let* ((string-parts (pyim-cstring--partition cstring t))
-         (pinyins-list
-          ;; ("Hello" "银" "行") -> (("Hello") ("yin") ("hang" "xing"))
-          (mapcar (lambda (str)
-                    (if (pyim-string-match-p "\\cc" str)
-                        (pyim-pymap-cchar2py-get str)
-                      (list str)))
-                  string-parts)))
-    ;; 通过排列组合的方式, 重排 pinyins-list。
-    ;; 比如:(("Hello") ("yin") ("hang")) -> (("Hello" "yin" "hang"))
-    (pyim-permutate-list
-     (pyim-cstring--adjust-duoyinzi
-      string-parts pinyins-list))))
-
-(defun pyim-cstring--adjust-duoyinzi (string-parts pinyins-list)
-  "根据 STRING-PARTS 对 PINYINS-LIST 进行校正。
-
-比如:
-
-1. STRING-PARTS: (\"人\" \"民\" \"银\" \"行\")
-2. PINYINS-LIST: ((\"ren\") (\"min\") (\"yin\") (\"hang\" \"xing\"))
-3. 输出结果为:  ((\"ren\") (\"min\") (\"yin\") (\"hang\"))
-
-这个函数依赖 `pyim-pymap-duoyinzi' 提供的多音字数据。"
-  (let ((n (length pinyins-list))
-        output)
-    (dotimes (i n)
-      (let ((pinyins (nth i pinyins-list))
-            ;; 当前位置对应的汉字和位置前后汉字组成的两字词语。
-            (words-list (list (when (>= (- i 1) 0)
-                                (concat (nth (- i 1) string-parts)
-                                        (nth i string-parts)))
-                              (when (< (+ i 1) n)
-                                (concat (nth i string-parts)
-                                        (nth (+ i 1) string-parts)))))
-            ;; 当前位置汉字
-            (char-list (list (nth i string-parts))))
-        (if (= (length pinyins) 1)
-            (push pinyins output)
-          (let ((py-adjusted
-                 (or
-                  ;; NOTE: 多音字校正规则:
-                  ;; 1. 首先通过 pyim 自带的多音字词语来校正,具体见:
-                  ;; `pyim-pymap-duoyinzi-words'
-                  (pyim-pymap-possible-cchar-pinyin pinyins words-list)
-                  ;; 2. 然后通过 pyim 自带的多音字常用读音进行校正, 具体见:
-                  ;; `pyim-pymap-duoyinzi-chars',
-                  ;;
-                  ;; NOTE: 如果用户想要使用某个汉字的偏僻读音,这样处理是有问题
-                  ;; 的,但大多数情况我们还是使用汉字的常用读音,让偏僻的读音进
-                  ;; 入用户个人词库似乎也没有什么好处。
-                  (pyim-pymap-possible-cchar-pinyin pinyins char-list t))))
-            ;; 3. 如果多音字校正没有结果,就使用未校正的信息。
-            (push (if py-adjusted
-                      (list py-adjusted)
-                    pinyins)
-                  output)))))
-    (reverse output)))
+  (pyim-pymap-cchars2pys-get
+   (pyim-cstring--partition cstring t)))
 
 ;;;###autoload
 (defun pyim-cstring-to-pinyin-simple (string &optional shou-zi-mu separator 
return-list)
diff --git a/pyim-pymap.el b/pyim-pymap.el
index 4d014d9025..66dfae4069 100644
--- a/pyim-pymap.el
+++ b/pyim-pymap.el
@@ -1044,6 +1044,30 @@ If FORCE is non-nil, FORCE build."
           output
         (remove "|" output)))))
 
+(defun pyim-pymap-cchars2pys-get (cchars)
+  "将汉字列表转换为拼音列表,转换过程中矫正多音字。
+
+比如:
+1. CCHARS:  (\"你\" \"好\")
+2. OUTPUTS: ((\"ni\" \"hao\"))
+
+注意事项:
+1. 这个函数遇到非汉字字符串时,原样输出。
+2. 多音字矫正依赖 pymap 自带的多音字矫正信息的完善程度,可能会出
+   现矫正不正确的情况,这个函数为了保证性能,只处理常用多音字。"
+  (let* ((pinyins-list
+          ;; ("Hello" "银" "行") -> (("Hello") ("yin") ("hang" "xing"))
+          (mapcar (lambda (str)
+                    (if (pyim-string-match-p "\\cc" str)
+                        (pyim-pymap-cchar2py-get str)
+                      (list str)))
+                  cchars)))
+    ;; 通过排列组合的方式, 重排 pinyins-list。
+    ;; 比如:(("Hello") ("yin") ("hang")) -> (("Hello" "yin" "hang"))
+    (pyim-permutate-list
+     (pyim-pymap--adjust-duoyinzi
+      cchars pinyins-list))))
+
 (defun pyim-pymap-cchar2py-get (char-or-str)
   "获取字符或者字符串 CHAR-OR-STR 对应的拼音 code.
 
@@ -1062,6 +1086,51 @@ pyim 在特定的时候需要读取一个汉字的拼音,这个工作由此完
     (when (= (length key) 1)
       (gethash key pyim-pymap--cchar2py-cache))))
 
+(defun pyim-pymap--adjust-duoyinzi (cchars-list pinyins-list)
+  "根据 CCHARS-LIST 对 PINYINS-LIST 进行校正。
+
+比如:
+
+1. CCHARS-LIST:  (\"人\" \"民\" \"银\" \"行\")
+2. PINYINS-LIST: ((\"ren\") (\"min\") (\"yin\") (\"hang\" \"xing\"))
+3. 输出结果为:  ((\"ren\") (\"min\") (\"yin\") (\"hang\"))
+
+这个函数依赖 `pyim-pymap-duoyinzi' 提供的多音字数据。"
+  (let ((n (length pinyins-list))
+        output)
+    (dotimes (i n)
+      (let ((pinyins (nth i pinyins-list))
+            ;; 当前位置对应的汉字和位置前后汉字组成的两字词语。
+            (words-list (list (when (>= (- i 1) 0)
+                                (concat (nth (- i 1) cchars-list)
+                                        (nth i cchars-list)))
+                              (when (< (+ i 1) n)
+                                (concat (nth i cchars-list)
+                                        (nth (+ i 1) cchars-list)))))
+            ;; 当前位置汉字
+            (char-list (list (nth i cchars-list))))
+        (if (= (length pinyins) 1)
+            (push pinyins output)
+          (let ((py-adjusted
+                 (or
+                  ;; NOTE: 多音字校正规则:
+                  ;; 1. 首先通过 pyim 自带的多音字词语来校正,具体见:
+                  ;; `pyim-pymap-duoyinzi-words'
+                  (pyim-pymap-possible-cchar-pinyin pinyins words-list)
+                  ;; 2. 然后通过 pyim 自带的多音字常用读音进行校正, 具体见:
+                  ;; `pyim-pymap-duoyinzi-chars',
+                  ;;
+                  ;; NOTE: 如果用户想要使用某个汉字的偏僻读音,这样处理是有问题
+                  ;; 的,但大多数情况我们还是使用汉字的常用读音,让偏僻的读音进
+                  ;; 入用户个人词库似乎也没有什么好处。
+                  (pyim-pymap-possible-cchar-pinyin pinyins char-list t))))
+            ;; 3. 如果多音字校正没有结果,就使用未校正的信息。
+            (push (if py-adjusted
+                      (list py-adjusted)
+                    pinyins)
+                  output)))))
+    (reverse output)))
+
 (defun pyim-pymap-possible-cchar-pinyin (cchar-pinyins cchar-words &optional 
search-char)
   "寻找一个汉字当前最可能的读音。
 
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index 87f145f496..818ab5ce31 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -297,6 +297,10 @@
   (should-not (pyim-pymap-duoyinzi-include-p "银子"))
   (should (equal (pyim-pymap-py2duoyinzi-get "ai" t)
                  '("艾")))
+
+  (should (equal (pyim-pymap-cchars2pys-get '("hello" "你" "好" "ma"))
+                 '(("hello" "ni" "hao" "ma"))))
+
   (should (equal (mapcar (lambda (x)
                            (concat (substring x 0 1)
                                    (substring x -1)))
@@ -808,19 +812,19 @@
                   '("bu" "pi") '("不") t)
                  "bu"))
 
-  (should (equal (pyim-cstring--adjust-duoyinzi
+  (should (equal (pyim-pymap--adjust-duoyinzi
                   '("银" "行" "传" "说")
                   '(("yin") ("xing" "heng" "hang")
                     ("zhuan" "chuan") ("yue" "shuo" "shui")))
                  '(("yin") ("hang") ("chuan") ("shuo"))))
 
-  (should (equal (pyim-cstring--adjust-duoyinzi
+  (should (equal (pyim-pymap--adjust-duoyinzi
                   '("银" "行" "很" "行")
                   '(("yin") ("xing" "heng" "hang")
                     ("hen") ("xing" "heng" "hang")))
                  '(("yin") ("hang") ("hen") ("xing"))))
 
-  (should (equal (pyim-cstring--adjust-duoyinzi
+  (should (equal (pyim-pymap--adjust-duoyinzi
                   '("银" "行" "行" "业" "很" "行"
                     "不" "行" "也" "行"
                     "行" "也" "行")



reply via email to

[Prev in Thread] Current Thread [Next in Thread]