emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim 5e7a92f: 添加 pyim-pinyin-valid-charpy-p 和 pyim-pin


From: ELPA Syncer
Subject: [elpa] externals/pyim 5e7a92f: 添加 pyim-pinyin-valid-charpy-p 和 pyim-pinyin 相关 test.
Date: Wed, 8 Dec 2021 21:57:31 -0500 (EST)

branch: externals/pyim
commit 5e7a92f4b82d800c17a76312b4bc60ead2513480
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    添加 pyim-pinyin-valid-charpy-p 和 pyim-pinyin 相关 test.
    
        * tests/pyim-tests.el (pyim-test-pyim-pinyin): New test.
    
        * pyim-pinyin.el (pyim-pinyin-valid-charpy-p): New function.
        (pyim-pinyin-get-charpy): Use pyim-pinyin-valid-charpy-p.
---
 pyim-pinyin.el      | 74 ++++++++++++++++++++++++++++-------------------------
 tests/pyim-tests.el | 66 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 35 deletions(-)

diff --git a/pyim-pinyin.el b/pyim-pinyin.el
index 666ef2e..0958996 100644
--- a/pyim-pinyin.el
+++ b/pyim-pinyin.el
@@ -78,8 +78,10 @@
 
 (defun pyim-pinyin-build-regexp (pinyin &optional match-beginning first-equal 
all-equal)
   "从 PINYIN 构建一个 regexp,用于搜索联想词,
-比如:ni-hao-si-j --> ^ni-hao[a-z]*-si[a-z]*-j[a-z]* , when FIRST-EQUAL set to `t'
-                  --> ^ni[a-z]*-hao[a-z]*-si[a-z]*-j[a-z]* , when FIRST-EQUAL 
set to `nil'"
+
+比如:ni-hao:
+1. ^ni-hao[a-z]* , when FIRST-EQUAL set to `t'
+2. ^ni[a-z]*-hao[a-z]* , when FIRST-EQUAL set to `nil'"
   (when (and pinyin (stringp pinyin))
     (let ((pinyin-list (split-string pinyin "-"))
           (count 0))
@@ -108,6 +110,17 @@
     (cons shenmu
           (substring pinyin (length shenmu)))))
 
+(defun pyim-pinyin-valid-charpy-p (shenmu yunmu)
+  "测试由 SHENMU 和 YUNMU 组成的拼音,是否是一个有效的汉字拼音。
+这个函数尊重 `pyim-pinyin-fuzzy-alist' 模糊音设置。"
+  (cl-some
+   (lambda (char-pinyin)
+     (pyim-pymap-py2cchar-get char-pinyin t))
+   (mapcar (lambda (x)
+             (concat (nth 0 x) (nth 1 x)))
+           (pyim-pinyin-find-fuzzy
+            (list shenmu yunmu shenmu yunmu)))))
+
 (defun pyim-pinyin-get-charpy (pinyin)
   "将拼音字符串 PINYIN 分解成声母,韵母和剩余部分."
   (let* ((x (pyim-pinyin-get-shenmu pinyin))
@@ -115,40 +128,31 @@
          (yunmu-and-rest (cdr x))
          (i (min (length yunmu-and-rest) 5))
          yunmu rest)
-    (cl-flet ((pinyin-valid-p
-               (shenmu yunmu)
-               (cl-some
-                (lambda (char-pinyin)
-                  (pyim-pymap-py2cchar-get char-pinyin t))
-                (mapcar (lambda (x)
-                          (concat (nth 0 x) (nth 1 x)))
-                        (pyim-pinyin-find-fuzzy
-                         (list shenmu yunmu shenmu yunmu))))))
-      (while (> i 0)
-        (setq yunmu (substring yunmu-and-rest 0 i))
-        (setq rest (substring yunmu-and-rest i))
-        (if (member yunmu pyim-pinyin-yunmu)
-            (cond (;; 如果声母和韵母组成的拼音不是一个有效的拼音,
-                   ;; 就继续缩短,如果是,就进一步检测。
-                   (not (pinyin-valid-p shenmu yunmu))
+    (while (> i 0)
+      (setq yunmu (substring yunmu-and-rest 0 i))
+      (setq rest (substring yunmu-and-rest i))
+      (if (member yunmu pyim-pinyin-yunmu)
+          (cond (;; 如果声母和韵母组成的拼音不是一个有效的拼音,
+                 ;; 就继续缩短,如果是,就进一步检测。
+                 (not (pyim-pinyin-valid-charpy-p shenmu yunmu))
+                 (setq i (1- i))
+                 (setq yunmu ""))
+                ((and (string< "" rest)
+                      ;; 截取后剩余的字符串 rest 找不出声母
+                      (equal (car (pyim-pinyin-get-shenmu rest)) "")
+                      ;; 截取后的韵母最后一个字符是一个有效声母
+                      (member (substring yunmu -1) pyim-pinyin-shenmu)
+                      ;; 截取得到的韵母如果去掉最后一个字符,还是有效的韵母
+                      (member (substring yunmu 0 -1) pyim-pinyin-yunmu))
+                 (if (not (pyim-pinyin-valid-charpy-p shenmu (substring yunmu 
0 -1)))
+                     ;; 如果去掉韵母最后一个字符后,无法组成一个有效的拼音。
+                     ;; 就不要缩短了。
+                     (setq i 0)
                    (setq i (1- i))
-                   (setq yunmu ""))
-                  ((and (string< "" rest)
-                        ;; 截取后剩余的字符串 rest 找不出声母
-                        (equal (car (pyim-pinyin-get-shenmu rest)) "")
-                        ;; 截取后的韵母最后一个字符是一个有效声母
-                        (member (substring yunmu -1) pyim-pinyin-shenmu)
-                        ;; 截取得到的韵母如果去掉最后一个字符,还是有效的韵母
-                        (member (substring yunmu 0 -1) pyim-pinyin-yunmu))
-                   (if (not (pinyin-valid-p shenmu (substring yunmu 0 -1)))
-                       ;; 如果去掉韵母最后一个字符后,无法组成一个有效的拼音。
-                       ;; 就不要缩短了。
-                       (setq i 0)
-                     (setq i (1- i))
-                     (setq yunmu "")))
-                  (t (setq i 0)))
-          (setq i (1- i))
-          (setq yunmu ""))))
+                   (setq yunmu "")))
+                (t (setq i 0)))
+        (setq i (1- i))
+        (setq yunmu "")))
     (cons (list shenmu yunmu shenmu yunmu)
           (substring yunmu-and-rest (length yunmu)))))
 
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index fcf3bf7..ebd1add 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -96,6 +96,72 @@
   (should (equal (length (pyim-pymap-py2cchar-get "a")) 5))
   (should (equal (length (pyim-pymap-py2cchar-get "z")) 36)))
 
+;; ** pyim-pinyin 相关单元测试
+(ert-deftest pyim-test-pyim-pinyin ()
+  ;; pyim-pinyin-get-shenmu
+  (should (equal (pyim-pinyin-get-shenmu "nihao")
+                 '("n" . "ihao")))
+  (should (equal (pyim-pinyin-get-shenmu "ao")
+                 '("" . "ao")))
+  (should (equal (pyim-pinyin-get-shenmu "")
+                 '(nil . "")))
+
+  ;; pyim-pinyin-valid-charpy-p
+  (should (pyim-pinyin-valid-charpy-p "n" "i"))
+  (should (pyim-pinyin-valid-charpy-p "" "a"))
+  (should (pyim-pinyin-valid-charpy-p "" "ao"))
+  (should-not (pyim-pinyin-valid-charpy-p "n" "k"))
+  (should-not (pyim-pinyin-valid-charpy-p "a" "k"))
+
+  ;; pyim-pinyin-get-charpy
+  (should (equal (pyim-pinyin-get-charpy "nihao")
+                 '(("n" "i" "n" "i") . "hao")))
+  (should (equal (pyim-pinyin-get-charpy "ao")
+                 '(("" "ao" "" "ao") . "")))
+  (should (equal (pyim-pinyin-get-charpy "nh")
+                 '(("n" "" "n" "") . "h")))
+
+  ;; pyim-pinyin-split
+  (should (equal (pyim-pinyin-split "n")
+                 '(("n" nil "n" nil))))
+  (should (equal (pyim-pinyin-split "ni")
+                 '(("n" "i" "n" "i"))))
+  (should (equal (pyim-pinyin-split "nih")
+                 '(("n" "i" "n" "i")
+                   ("h" nil "h" nil))))
+  (should (equal (pyim-pinyin-split "nihao")
+                 '(("n" "i" "n" "i")
+                   ("h" "ao" "h" "ao"))))
+  (should (equal (pyim-pinyin-split "a")
+                 '(("" "a" "" "a"))))
+  (should (equal (pyim-pinyin-split "a")
+                 '(("" "a" "" "a"))))
+  (should (equal (pyim-pinyin-split "xian")
+                 '(("x" "ian" "x" "ian"))))
+  (should (equal (pyim-pinyin-split "xi'an")
+                 '(("" "xi'an" "" "xi'an"))))
+
+  ;; pyim-pinyin-find-fuzzy
+  (let ((pyim-pinyin-fuzzy-alist
+         '(("en" "eng")
+           ("f" "h"))))
+    (should (equal (pyim-pinyin-find-fuzzy '("f" "en" "f" "en"))
+                   '(("f" "en" "f" "en")
+                     ("f" "eng" "f" "en")
+                     ("h" "en" "f" "en")
+                     ("h" "eng" "f" "en")))))
+  ;; pyim-pinyin-build-regexp
+  (should (equal (pyim-pinyin-build-regexp "ni-hao")
+                 "ni[a-z]*-hao[a-z]*"))
+  (should (equal (pyim-pinyin-build-regexp "ni-hao" t)
+                 "^ni[a-z]*-hao[a-z]*"))
+  (should (equal (pyim-pinyin-build-regexp "ni-hao" nil t)
+                 "ni-hao[a-z]*"))
+  (should (equal (pyim-pinyin-build-regexp "ni-hao" nil nil t)
+                 "ni-hao"))
+  (should (equal (pyim-pinyin-build-regexp "ni-hao" t t)
+                 "^ni-hao[a-z]*")))
+
 ;; ** pyim-cstring 相关单元测试
 (ert-deftest pyim-test-pyim-cstring-partition ()
   (should (equal (pyim-cstring-partition "你好 hello 你好")



reply via email to

[Prev in Thread] Current Thread [Next in Thread]