[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/pyim 172e8e14e4: Add pyim-pymap-cchars2pys-get and use
From: |
ELPA Syncer |
Subject: |
[elpa] externals/pyim 172e8e14e4: Add pyim-pymap-cchars2pys-get and use it. |
Date: |
Mon, 16 Jan 2023 20:58:14 -0500 (EST) |
branch: externals/pyim
commit 172e8e14e44c91c471dcabdbd9798d211f90bca6
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>
Add pyim-pymap-cchars2pys-get and use it.
---
pyim-cstring.el | 83 +++++++++++------------------------------------------
pyim-pymap.el | 69 ++++++++++++++++++++++++++++++++++++++++++++
tests/pyim-tests.el | 10 +++++--
3 files changed, 92 insertions(+), 70 deletions(-)
diff --git a/pyim-cstring.el b/pyim-cstring.el
index 1d1e4df051..2d718a72dd 100644
--- a/pyim-cstring.el
+++ b/pyim-cstring.el
@@ -131,79 +131,28 @@ BUG: 当 STRING 中包含其它标点符号,并且设置 SEPERATER 时,结
"从 Dcache 中搜索 CSTRING 对应的拼音。"
(let* ((string-parts (pyim-cstring--partition cstring))
(pinyins-list
- (mapcar (lambda (str)
- (if (pyim-string-match-p "\\cc" str)
- (when-let ((code (cl-find-if-not
- (lambda (c)
- ;; 注意:Pinyin 词库中不包含 "/" 字符。
- (string-match-p c "/"))
- (pyim-dcache-get str '(word2code)))))
- (split-string code "-"))
- (list str)))
+ (mapcar #'pyim-cstring--get-pinyin-code
string-parts)))
(unless (member nil pinyins-list)
(list (apply #'append pinyins-list)))))
+(defun pyim-cstring--get-pinyin-code (str)
+ "从 Dcache 中获取中文字符串 STR 对应的拼音。
+
+如果 STR 不包含中文,不做特殊处理。"
+ (if (pyim-string-match-p "\\cc" str)
+ (when-let ((code (cl-find-if-not
+ (lambda (c)
+ ;; 注意:Pinyin 词库中不包含 "/" 字符。
+ (string-match-p c "/"))
+ (pyim-dcache-get str '(word2code)))))
+ (split-string code "-"))
+ (list str)))
+
(defun pyim-cstring-to-pinyin--from-pymap (cstring)
"使用 PYMAP 提供的工具来搜索 CSTRING 对应的拼音。"
- (let* ((string-parts (pyim-cstring--partition cstring t))
- (pinyins-list
- ;; ("Hello" "银" "行") -> (("Hello") ("yin") ("hang" "xing"))
- (mapcar (lambda (str)
- (if (pyim-string-match-p "\\cc" str)
- (pyim-pymap-cchar2py-get str)
- (list str)))
- string-parts)))
- ;; 通过排列组合的方式, 重排 pinyins-list。
- ;; 比如:(("Hello") ("yin") ("hang")) -> (("Hello" "yin" "hang"))
- (pyim-permutate-list
- (pyim-cstring--adjust-duoyinzi
- string-parts pinyins-list))))
-
-(defun pyim-cstring--adjust-duoyinzi (string-parts pinyins-list)
- "根据 STRING-PARTS 对 PINYINS-LIST 进行校正。
-
-比如:
-
-1. STRING-PARTS: (\"人\" \"民\" \"银\" \"行\")
-2. PINYINS-LIST: ((\"ren\") (\"min\") (\"yin\") (\"hang\" \"xing\"))
-3. 输出结果为: ((\"ren\") (\"min\") (\"yin\") (\"hang\"))
-
-这个函数依赖 `pyim-pymap-duoyinzi' 提供的多音字数据。"
- (let ((n (length pinyins-list))
- output)
- (dotimes (i n)
- (let ((pinyins (nth i pinyins-list))
- ;; 当前位置对应的汉字和位置前后汉字组成的两字词语。
- (words-list (list (when (>= (- i 1) 0)
- (concat (nth (- i 1) string-parts)
- (nth i string-parts)))
- (when (< (+ i 1) n)
- (concat (nth i string-parts)
- (nth (+ i 1) string-parts)))))
- ;; 当前位置汉字
- (char-list (list (nth i string-parts))))
- (if (= (length pinyins) 1)
- (push pinyins output)
- (let ((py-adjusted
- (or
- ;; NOTE: 多音字校正规则:
- ;; 1. 首先通过 pyim 自带的多音字词语来校正,具体见:
- ;; `pyim-pymap-duoyinzi-words'
- (pyim-pymap-possible-cchar-pinyin pinyins words-list)
- ;; 2. 然后通过 pyim 自带的多音字常用读音进行校正, 具体见:
- ;; `pyim-pymap-duoyinzi-chars',
- ;;
- ;; NOTE: 如果用户想要使用某个汉字的偏僻读音,这样处理是有问题
- ;; 的,但大多数情况我们还是使用汉字的常用读音,让偏僻的读音进
- ;; 入用户个人词库似乎也没有什么好处。
- (pyim-pymap-possible-cchar-pinyin pinyins char-list t))))
- ;; 3. 如果多音字校正没有结果,就使用未校正的信息。
- (push (if py-adjusted
- (list py-adjusted)
- pinyins)
- output)))))
- (reverse output)))
+ (pyim-pymap-cchars2pys-get
+ (pyim-cstring--partition cstring t)))
;;;###autoload
(defun pyim-cstring-to-pinyin-simple (string &optional shou-zi-mu separator
return-list)
diff --git a/pyim-pymap.el b/pyim-pymap.el
index 4d014d9025..66dfae4069 100644
--- a/pyim-pymap.el
+++ b/pyim-pymap.el
@@ -1044,6 +1044,30 @@ If FORCE is non-nil, FORCE build."
output
(remove "|" output)))))
+(defun pyim-pymap-cchars2pys-get (cchars)
+ "将汉字列表转换为拼音列表,转换过程中矫正多音字。
+
+比如:
+1. CCHARS: (\"你\" \"好\")
+2. OUTPUTS: ((\"ni\" \"hao\"))
+
+注意事项:
+1. 这个函数遇到非汉字字符串时,原样输出。
+2. 多音字矫正依赖 pymap 自带的多音字矫正信息的完善程度,可能会出
+ 现矫正不正确的情况,这个函数为了保证性能,只处理常用多音字。"
+ (let* ((pinyins-list
+ ;; ("Hello" "银" "行") -> (("Hello") ("yin") ("hang" "xing"))
+ (mapcar (lambda (str)
+ (if (pyim-string-match-p "\\cc" str)
+ (pyim-pymap-cchar2py-get str)
+ (list str)))
+ cchars)))
+ ;; 通过排列组合的方式, 重排 pinyins-list。
+ ;; 比如:(("Hello") ("yin") ("hang")) -> (("Hello" "yin" "hang"))
+ (pyim-permutate-list
+ (pyim-pymap--adjust-duoyinzi
+ cchars pinyins-list))))
+
(defun pyim-pymap-cchar2py-get (char-or-str)
"获取字符或者字符串 CHAR-OR-STR 对应的拼音 code.
@@ -1062,6 +1086,51 @@ pyim 在特定的时候需要读取一个汉字的拼音,这个工作由此完
(when (= (length key) 1)
(gethash key pyim-pymap--cchar2py-cache))))
+(defun pyim-pymap--adjust-duoyinzi (cchars-list pinyins-list)
+ "根据 CCHARS-LIST 对 PINYINS-LIST 进行校正。
+
+比如:
+
+1. CCHARS-LIST: (\"人\" \"民\" \"银\" \"行\")
+2. PINYINS-LIST: ((\"ren\") (\"min\") (\"yin\") (\"hang\" \"xing\"))
+3. 输出结果为: ((\"ren\") (\"min\") (\"yin\") (\"hang\"))
+
+这个函数依赖 `pyim-pymap-duoyinzi' 提供的多音字数据。"
+ (let ((n (length pinyins-list))
+ output)
+ (dotimes (i n)
+ (let ((pinyins (nth i pinyins-list))
+ ;; 当前位置对应的汉字和位置前后汉字组成的两字词语。
+ (words-list (list (when (>= (- i 1) 0)
+ (concat (nth (- i 1) cchars-list)
+ (nth i cchars-list)))
+ (when (< (+ i 1) n)
+ (concat (nth i cchars-list)
+ (nth (+ i 1) cchars-list)))))
+ ;; 当前位置汉字
+ (char-list (list (nth i cchars-list))))
+ (if (= (length pinyins) 1)
+ (push pinyins output)
+ (let ((py-adjusted
+ (or
+ ;; NOTE: 多音字校正规则:
+ ;; 1. 首先通过 pyim 自带的多音字词语来校正,具体见:
+ ;; `pyim-pymap-duoyinzi-words'
+ (pyim-pymap-possible-cchar-pinyin pinyins words-list)
+ ;; 2. 然后通过 pyim 自带的多音字常用读音进行校正, 具体见:
+ ;; `pyim-pymap-duoyinzi-chars',
+ ;;
+ ;; NOTE: 如果用户想要使用某个汉字的偏僻读音,这样处理是有问题
+ ;; 的,但大多数情况我们还是使用汉字的常用读音,让偏僻的读音进
+ ;; 入用户个人词库似乎也没有什么好处。
+ (pyim-pymap-possible-cchar-pinyin pinyins char-list t))))
+ ;; 3. 如果多音字校正没有结果,就使用未校正的信息。
+ (push (if py-adjusted
+ (list py-adjusted)
+ pinyins)
+ output)))))
+ (reverse output)))
+
(defun pyim-pymap-possible-cchar-pinyin (cchar-pinyins cchar-words &optional
search-char)
"寻找一个汉字当前最可能的读音。
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index 87f145f496..818ab5ce31 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -297,6 +297,10 @@
(should-not (pyim-pymap-duoyinzi-include-p "银子"))
(should (equal (pyim-pymap-py2duoyinzi-get "ai" t)
'("艾")))
+
+ (should (equal (pyim-pymap-cchars2pys-get '("hello" "你" "好" "ma"))
+ '(("hello" "ni" "hao" "ma"))))
+
(should (equal (mapcar (lambda (x)
(concat (substring x 0 1)
(substring x -1)))
@@ -808,19 +812,19 @@
'("bu" "pi") '("不") t)
"bu"))
- (should (equal (pyim-cstring--adjust-duoyinzi
+ (should (equal (pyim-pymap--adjust-duoyinzi
'("银" "行" "传" "说")
'(("yin") ("xing" "heng" "hang")
("zhuan" "chuan") ("yue" "shuo" "shui")))
'(("yin") ("hang") ("chuan") ("shuo"))))
- (should (equal (pyim-cstring--adjust-duoyinzi
+ (should (equal (pyim-pymap--adjust-duoyinzi
'("银" "行" "很" "行")
'(("yin") ("xing" "heng" "hang")
("hen") ("xing" "heng" "hang")))
'(("yin") ("hang") ("hen") ("xing"))))
- (should (equal (pyim-cstring--adjust-duoyinzi
+ (should (equal (pyim-pymap--adjust-duoyinzi
'("银" "行" "行" "业" "很" "行"
"不" "行" "也" "行"
"行" "也" "行")
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [elpa] externals/pyim 172e8e14e4: Add pyim-pymap-cchars2pys-get and use it.,
ELPA Syncer <=