[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/pyim 7b96b82: Add pyim-cstring-to-code-criteria
From: |
ELPA Syncer |
Subject: |
[elpa] externals/pyim 7b96b82: Add pyim-cstring-to-code-criteria |
Date: |
Fri, 7 May 2021 08:57:09 -0400 (EDT) |
branch: externals/pyim
commit 7b96b826a2e0f5ea9267fc031c0a3431888a78e9
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>
Add pyim-cstring-to-code-criteria
* pyim.el (pyim-terminate-translation, pyim-create-pyim-word)
(pyim-select-word): use pyim-cstring-to-code-criteria.
* pyim-cstring.el (pyim-cstring-to-code-criteria): New variable.
---
pyim-cstring.el | 19 ++++++++++++++-----
pyim.el | 19 ++++++++++---------
2 files changed, 24 insertions(+), 14 deletions(-)
diff --git a/pyim-cstring.el b/pyim-cstring.el
index 31a7f8d..7a8131e 100644
--- a/pyim-cstring.el
+++ b/pyim-cstring.el
@@ -33,6 +33,15 @@
"Chinese string tools for pyim."
:group 'pyim)
+(defvar pyim-cstring-to-code-criteria nil
+ "用于 code 选取的基准字符串。
+
+`pyim-cstring-to-codes' 获取到一个词条的多个 codes 时,会将所有的
+codes 与这个字符串进行比较,然后选择一个最相似的 code 输出.
+
+这个字符串主要用于全拼和双拼输入法的多音字矫正,一般使用用户输入
+生成的 imobjs 转换得到,保留了用户原始输入的许多信息。")
+
;; ** 中文字符串分词相关功能
(defun pyim-cstring-split-to-list (chinese-string &optional max-word-length
delete-dups prefer-short-word)
"一个基于 pyim 的中文分词函数。这个函数可以将中文字符
@@ -350,7 +359,7 @@ code-prefix)。当RETURN-LIST 设置为 t 时,返回一个 code list。"
(substring s4 0 1))))))
(t nil))))
-(defun pyim-cstring-to-codes (string scheme-name &optional entered)
+(defun pyim-cstring-to-codes (string scheme-name &optional criteria)
"将 STRING 转换为 SCHEME-NAME 对应的 codes."
(let ((class (pyim-scheme-get-option scheme-name :class)))
(cond ((eq class 'xingma)
@@ -358,15 +367,15 @@ code-prefix)。当RETURN-LIST 设置为 t 时,返回一个 code list。"
;;拼音使用了多音字校正
(t (let ((codes (pyim-cstring-to-pinyin string nil "-" t nil t))
codes-sorted)
- (if (< (length entered) 1)
+ (if (< (length criteria) 1)
codes
- ;; 将 code 与用户输入 entered 比对,选取一个与用户输入最类似的
+ ;; 将 所有 codes 与 criteria 字符串比对,选取相似度最高的一个
;; code. 这种处理方式适合拼音输入法。
(setq codes-sorted
(sort codes
(lambda (a b)
- (< (string-distance a entered)
- (string-distance b entered)))))
+ (< (string-distance a criteria)
+ (string-distance b criteria)))))
(list (car codes-sorted))))))))
;; ** 获取光标处中文字符串或者中文词条的功能
diff --git a/pyim.el b/pyim.el
index ae7c29a..168b33b 100644
--- a/pyim.el
+++ b/pyim.el
@@ -434,7 +434,7 @@ REFRESH-COMMON-DCACHE 已经废弃,不要再使用了。"
(setq pyim-force-input-chinese nil)
(pyim-page-hide)
(pyim-entered-erase-buffer)
- (setq pyim-entered-longest nil)
+ (setq pyim-cstring-to-code-criteria nil)
(pyim-entered-refresh-timer-reset)
(let* ((class (pyim-scheme-get-option (pyim-scheme-name) :class))
(func (intern (format "pyim-terminate-translation:%S" class))))
@@ -467,7 +467,7 @@ BUG:拼音无法有效地处理多音字。"
(let* ((scheme-name (pyim-scheme-name))
(class (pyim-scheme-get-option scheme-name :class))
(code-prefix (pyim-scheme-get-option scheme-name :code-prefix))
- (codes (pyim-cstring-to-codes word scheme-name
pyim-entered-longest)))
+ (codes (pyim-cstring-to-codes word scheme-name
pyim-cstring-to-code-criteria)))
;; 保存对应词条的词频
(when (> (length word) 0)
(pyim-dcache-update-iword2count word prepend wordcount-handler))
@@ -590,13 +590,14 @@ FILE 的格式与 `pyim-dcache-export' 生成的文件格式相同,
(defun pyim-select-word ()
"从选词框中选择当前词条,然后删除该词条对应拼音。"
(interactive)
- ;; 记录用户在没有多次选词前的输入,用于多音字矫正。
- (setq pyim-entered-longest
- (let ((entered (pyim-entered-get 'point-before)))
- (if (> (length pyim-entered-longest)
- (length entered))
- pyim-entered-longest
- entered)))
+ (setq pyim-cstring-to-code-criteria
+ (let ((str (mapconcat #'identity
+ (pyim-codes-create (car pyim-imobjs)
(pyim-scheme-name))
+ "")))
+ (if (> (length pyim-cstring-to-code-criteria)
+ (length str))
+ pyim-cstring-to-code-criteria
+ str)))
(if (null pyim-candidates) ; 如果没有选项,输入空格
(progn
(pyim-outcome-handle 'last-char)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [elpa] externals/pyim 7b96b82: Add pyim-cstring-to-code-criteria,
ELPA Syncer <=