emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim 4b1ba0e47e 2/8: Add pyim-cstring-utils.el


From: ELPA Syncer
Subject: [elpa] externals/pyim 4b1ba0e47e 2/8: Add pyim-cstring-utils.el
Date: Sat, 28 May 2022 18:57:48 -0400 (EDT)

branch: externals/pyim
commit 4b1ba0e47e2856c32455662b5c05fac42cad4925
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    Add pyim-cstring-utils.el
---
 pyim-cregexp-utils.el                    |   3 -
 pyim-cregexp.el                          |   2 +-
 pyim-cstring.el => pyim-cstring-utils.el | 211 +---------------------------
 pyim-cstring.el                          | 230 +------------------------------
 pyim.el                                  |   2 +-
 tests/pyim-tests.el                      |   1 +
 6 files changed, 7 insertions(+), 442 deletions(-)

diff --git a/pyim-cregexp-utils.el b/pyim-cregexp-utils.el
index f4dbc8453c..d8c2cbf1f1 100644
--- a/pyim-cregexp-utils.el
+++ b/pyim-cregexp-utils.el
@@ -107,9 +107,6 @@ buffer."
 ;; 让 ivy 支持 code 搜索。
 (declare-function ivy--regex-plus "ivy")
 
-;; pyim-cregexp.el 是核心库,为保持稳定,不能添加太多的依赖,所以我把这个函数从
-;; pyim-cregexp.el 移到这里,也许我应该更改一下这个函数的名称,让其不使用
-;; pyim-cregexp 前缀,但由于向后兼容的原因,我决定再观察观察。
 (defun pyim-cregexp-ivy (str)
   "Let ivy support search Chinese with pinyin feature."
   (let ((x (ivy--regex-plus str))
diff --git a/pyim-cregexp.el b/pyim-cregexp.el
index 69a9829a12..540f2ca513 100644
--- a/pyim-cregexp.el
+++ b/pyim-cregexp.el
@@ -1,4 +1,4 @@
-;;; pyim-cregexp.el --- Chinese regexp core for pyim.        -*- 
lexical-binding: t; -*-
+;;; pyim-cregexp.el --- Chinese regexp core tools for pyim.        -*- 
lexical-binding: t; -*-
 
 ;; * Header
 ;; Copyright (C) 2021 Free Software Foundation, Inc.
diff --git a/pyim-cstring.el b/pyim-cstring-utils.el
similarity index 53%
copy from pyim-cstring.el
copy to pyim-cstring-utils.el
index 2c1de8af70..40f1a49251 100644
--- a/pyim-cstring.el
+++ b/pyim-cstring-utils.el
@@ -1,4 +1,4 @@
-;;; pyim-cstring.el --- Chinese string tools for pyim.        -*- 
lexical-binding: t; -*-
+;;; pyim-cstring-utils.el --- Chinese string tools for pyim.        -*- 
lexical-binding: t; -*-
 
 ;; * Header
 ;; Copyright (C) 2021 Free Software Foundation, Inc.
@@ -28,66 +28,12 @@
 ;;; Code:
 ;; * 代码                                                           :code:
 (require 'cl-lib)
-(require 'pyim-common)
-(require 'pyim-dcache)
-(require 'pyim-scheme)
-(require 'pyim-pymap)
+(require 'pyim-cstring)
 
 (defgroup pyim-cstring nil
   "Chinese string tools for pyim."
   :group 'pyim)
 
-(defvar pyim-cstring-to-code-criteria nil
-  "用于 code 选取的基准字符串。
-
-`pyim-cstring-to-codes' 获取到一个词条的多个 codes 时,会将所有的
-codes 与这个字符串进行比较,然后选择一个最相似的 code 输出.
-
-这个字符串主要用于全拼和双拼输入法的多音字矫正,一般使用用户输入
-生成的 imobjs 转换得到,保留了用户原始输入的许多信息。")
-
-(defun pyim-cstring-partition (string &optional to-cchar)
-  "STRING partition.
-
-1. Hello你好 -> (\"Hello\" \"你\" \"好\"), when TO-CCHAR is non-nil.
-2. Hello你好 -> (\"Hello\" \"你好\"), when TO-CCHAR is nil."
-  ;; NOTE: 使用5个\0作为分割符有没有其它副作用?有待观察。
-  (let ((sep (make-string 5 ?\0)))
-    (if (pyim-string-match-p "\\CC" string)
-        ;; 处理中英文混合的情况
-        (remove "" (split-string
-                    (replace-regexp-in-string
-                     (if to-cchar "\\(\\cc\\)" "\\(\\cc+\\)")
-                     (concat sep "\\1" sep) string)
-                    sep))
-      (if to-cchar
-          (cl-mapcar #'char-to-string string)
-        (list string)))))
-
-(defun pyim-cstring-substrings (cstring &optional max-length number)
-  "找出 CSTRING 中所有长度不超过 MAX-LENGTH 的子字符串,生成一个 alist。
-
-这个 alist 中的每个元素为:(子字符串 开始位置 结束位置), 参数
-NUMBER 用于递归,表示子字符串在 CSTRING 中的位置。"
-  (let ((number (or number 0)))
-    (cond
-     ((= (length cstring) 0) nil)
-     (t (append (pyim-cstring-substrings-1 cstring max-length number)
-                (pyim-cstring-substrings (substring cstring 1)
-                                         max-length (1+ number)))))))
-
-(defun pyim-cstring-substrings-1 (cstring max-length number)
-  "`pyim-cstring-substrings' 的内部函数。"
-  (cond
-   ((< (length cstring) 2) nil)
-   (t (append
-       (let ((length (length cstring)))
-         (when (<= length (or max-length 6))
-           (list (list cstring number (+ number length)))))
-       (pyim-cstring-substrings-1
-        (substring cstring 0 -1)
-        max-length number)))))
-
 ;; ** 中文字符串分词相关功能
 (defun pyim-cstring-split-to-list (chinese-string &optional max-word-length 
delete-dups prefer-short-word)
   "一个基于 pyim 的中文分词函数。这个函数可以将中文字符串
@@ -194,158 +140,6 @@ CHINESE-STRING 分词,得到一个词条 alist,这个 alist 的元素都是
   (goto-char (point-min))
   (message "分词完成!"))
 
-;; ** 中文字符串到拼音的转换工具
-;;;###autoload
-(defun pyim-cstring-to-pinyin (string &optional shou-zi-mu separator
-                                      return-list ignore-duo-yin-zi 
adjust-duo-yin-zi)
-  "将汉字字符串转换为对应的拼音字符串的工具.
-
-如果 SHOU-ZI-MU 设置为 t, 转换仅得到拼音首字母字符串。当
-RETURN-LIST 设置为 t 时,返回一个拼音列表,这个列表包含词条的一个
-或者多个拼音(词条包含多音字时);如果 IGNORE-DUO-YIN-ZI 设置为
-t, 遇到多音字时,只使用第一个拼音,其它拼音忽略;当
-ADJUST-DUO-YIN-Zi 设置为 t 时, `pyim-cstring-to-pinyin' 会使用 pyim 已
-安装的词库来校正多音字,但这个功能有一定的限制:
-
-1. pyim 普通词库中不存在的词条不能较正
-2. 多音字校正速度比较慢,实时转换会产生卡顿。
-
-BUG: 当 STRING 中包含其它标点符号,并且设置 SEPERATER 时,结果会
-包含多余的连接符:比如: '你=好' --> 'ni-=-hao'"
-  (if (not (pyim-string-match-p "\\cc" string))
-      (if return-list
-          (list string)
-        string)
-    (let (pinyins-list pinyins-list-adjusted)
-      ;; ("Hello" "银" "行") -> (("Hello") ("yin") ("hang" "xing"))
-      (setq pinyins-list
-            (mapcar (lambda (str)
-                      (if (pyim-string-match-p "\\cc" str)
-                          (pyim-pymap-cchar2py-get str)
-                        (list str)))
-                    (pyim-cstring-partition string t)))
-
-      ;; 通过排列组合的方式, 重排 pinyins-list。
-      ;; 比如:(("Hello") ("yin") ("hang" "xing")) -> (("Hello" "yin" "hang") 
("Hello" "yin" "xing"))
-      (setq pinyins-list
-            (pyim-permutate-list pinyins-list))
-
-      ;; 使用 pyim 的安装的词库来校正多音字。
-      ;; FIXME:如果 string 包含非中文的字符,那么多音字矫正将不起作用。
-      (when adjust-duo-yin-zi
-        (pyim-dcache-init-variables)
-        (dolist (pylist pinyins-list)
-          (let* ((py-str (mapconcat #'identity pylist "-"))
-                 (words-from-dicts
-                  (pyim-dcache-get py-str '(code2word))))
-            (when (member string words-from-dicts)
-              (push pylist pinyins-list-adjusted))))
-        (setq pinyins-list-adjusted
-              (nreverse pinyins-list-adjusted)))
-
-      ;; 返回拼音字符串或者拼音列表
-      (let* ((pinyins-list
-              (or pinyins-list-adjusted
-                  pinyins-list))
-             (list (mapcar (lambda (x)
-                             (mapconcat (lambda (str)
-                                          (if shou-zi-mu
-                                              (substring str 0 1)
-                                            str))
-                                        x separator))
-                           (if ignore-duo-yin-zi
-                               (list (car pinyins-list))
-                             pinyins-list))))
-        (if return-list
-            list
-          (string-join list " "))))))
-
-;;;###autoload
-(defun pyim-cstring-to-pinyin-simple (string &optional shou-zi-mu separator 
return-list)
-  "简化版的 `pyim-cstring-to-pinyin', 不处理多音字。"
-  (pyim-cstring-to-pinyin string shou-zi-mu separator return-list t))
-
-;; ** 中文字符串到形码的转换工具
-(defun pyim-cstring-to-xingma (string scheme-name &optional return-list)
-  "返回汉字 STRING 对应形码方案 SCHEME-NAME 的 code (不包括
-code-prefix)。当RETURN-LIST 设置为 t 时,返回一个 code list。"
-  (when (string-match-p "^\\cc+\\'" string)
-    (let* ((prefix (pyim-scheme-get-option scheme-name :code-prefix))
-           (func (intern (concat "pyim-cstring-to-xingma:" (symbol-name 
scheme-name))))
-           (dcache-codes (mapcar (lambda (x)
-                                   (when (string-prefix-p prefix x)
-                                     (string-remove-prefix prefix x)))
-                                 (sort (cl-copy-list (pyim-dcache-call-api 
'search-word-code string))
-                                       (lambda (a b) (> (length a) (length 
b))))))
-           (codes (or (remove nil dcache-codes)
-                      (and (functionp func)
-                           (funcall func string scheme-name)))))
-      (when codes
-        (if return-list
-            codes
-          ;; 如果要返回一个字符串,那就返回第一个,也就是最长的那个编码。
-          (car codes))))))
-
-(defun pyim-cstring-to-xingma:wubi (string &optional scheme-name)
-  "返回汉字 STRING 的五笔编码 (不包括 code-prefix) 编码列表。"
-  (let ((length (length string))
-        (string (split-string string "" t)))
-    (cond
-     ;; 双字词,分别取两个字的前两个编码
-     ((eq length 2)
-      (let ((s1 (pyim-cstring-to-xingma (nth 0 string) scheme-name))
-            (s2 (pyim-cstring-to-xingma (nth 1 string) scheme-name)))
-        (when (and s1 s2)
-          (list (concat (substring s1 0 2)
-                        (substring s2 0 2))))))
-     ;; 三字词,取前二字的首编码,及第三个字的前两个编码
-     ((eq length 3)
-      (let ((s1 (pyim-cstring-to-xingma (nth 0 string) scheme-name))
-            (s2 (pyim-cstring-to-xingma (nth 1 string) scheme-name))
-            (s3 (pyim-cstring-to-xingma (nth 2 string) scheme-name)))
-        (when (and s1 s2 s3)
-          (list (concat (substring s1 0 1)
-                        (substring s2 0 1)
-                        (substring s3 0 2))))))
-     ;; 四字词及以上,分别前三个字及最后一个字的首编码
-     ((> length 3)
-      (let ((s1 (pyim-cstring-to-xingma (nth 0 string) scheme-name))
-            (s2 (pyim-cstring-to-xingma (nth 1 string) scheme-name))
-            (s3 (pyim-cstring-to-xingma (nth 2 string) scheme-name))
-            (s4 (pyim-cstring-to-xingma (nth (1- length) string) scheme-name)))
-        (when (and s1 s2 s3 s4)
-          (list (concat (substring s1 0 1)
-                        (substring s2 0 1)
-                        (substring s3 0 1)
-                        (substring s4 0 1))))))
-     (t nil))))
-
-(defun pyim-cstring-to-codes (string scheme-name &optional criteria)
-  "将 STRING 转换为 SCHEME-NAME 对应的 codes.
-
-当 pyim class 为拼音,并且提供 CRITERIA 字符串时,检索到的所有
-codes 会和这个字符串进行比较,然后选择一个相似度最高的 code 作为
-输出,这种处理方式适合拼音输入法,形码输入法一般不需要类似的操作。
-
-CRITERIA 字符串一般是通过 imobjs 构建的,它保留了用户原始的输入信
-息。"
-  (let ((class (pyim-scheme-get-option scheme-name :class)))
-    (cond ((eq class 'xingma)
-           (pyim-cstring-to-xingma string scheme-name t))
-          ;;拼音使用了多音字校正
-          (t (let ((codes (pyim-cstring-to-pinyin string nil "-" t nil t))
-                   codes-sorted)
-               (if (< (length criteria) 1)
-                   codes
-                 ;; 将 所有 codes 与 criteria 字符串比对,选取相似度最高的一个
-                 ;; code. 这种处理方式适合拼音输入法。
-                 (setq codes-sorted
-                       (sort codes
-                             (lambda (a b)
-                               (< (pyim-string-distance a criteria)
-                                  (pyim-string-distance b criteria)))))
-                 (list (car codes-sorted))))))))
-
 ;; ** 获取光标处中文字符串或者中文词条的功能
 (defun pyim-cstring-at-point (&optional number)
   "获取光标一个中文字符串,字符数量为:NUMBER."
@@ -472,3 +266,4 @@ CRITERIA 字符串一般是通过 imobjs 构建的,它保留了用户原始的
 (provide 'pyim-cstring)
 
 ;;; pyim-cstring.el ends here
+
diff --git a/pyim-cstring.el b/pyim-cstring.el
index 2c1de8af70..331124941e 100644
--- a/pyim-cstring.el
+++ b/pyim-cstring.el
@@ -1,4 +1,4 @@
-;;; pyim-cstring.el --- Chinese string tools for pyim.        -*- 
lexical-binding: t; -*-
+;;; pyim-cstring.el --- Chinese string core tools for pyim.        -*- 
lexical-binding: t; -*-
 
 ;; * Header
 ;; Copyright (C) 2021 Free Software Foundation, Inc.
@@ -88,112 +88,6 @@ NUMBER 用于递归,表示子字符串在 CSTRING 中的位置。"
         (substring cstring 0 -1)
         max-length number)))))
 
-;; ** 中文字符串分词相关功能
-(defun pyim-cstring-split-to-list (chinese-string &optional max-word-length 
delete-dups prefer-short-word)
-  "一个基于 pyim 的中文分词函数。这个函数可以将中文字符串
-CHINESE-STRING 分词,得到一个词条 alist,这个 alist 的元素都是列
-表,其中第一个元素为分词得到的词条,第二个元素为词条相对于字符串
-中的起始位置,第三个元素为结束位置。分词时,默认词条不超过6个字符,
-用户可以通过 MAX-WORD-LENGTH 来自定义,但值得注意的是:这个值设置
-越大,分词速度越慢。
-
-如果 DELETE-DUPS 设置为 non-nil, 一个中文字符串只保留一种分割方式。
-比如:
-
-     我爱北京天安门 => 我爱 北京 天安门
-
-如果 PREFER-SHORT-WORD 为 non-nil, 去重的时候则优先保留较短的词。
-
-注意事项:
-1. 这个工具使用暴力匹配模式来分词,*不能检测出* pyim 词库中不存在
-的中文词条。
-2. 这个函数的分词速度比较慢,仅仅适用于中文短句的分词,不适用于文
-章分词。根据评估,20个汉字组成的字符串需要大约0.3s, 40个汉字消耗
-1s,随着字符串长度的增大消耗的时间呈几何倍数增加。"
-  ;; 如果 pyim 词库没有加载,加载 pyim 词库,确保 `pyim-dcache-get' 可以正常运行。
-  (pyim-dcache-init-variables)
-
-  (let (result)
-    (dolist (string-list (pyim-cstring-substrings chinese-string 
max-word-length))
-      (let ((pinyin-list (pyim-cstring-to-pinyin (car string-list) nil "-" t)))
-        (dolist (pinyin pinyin-list)
-          (let ((words (pyim-dcache-get pinyin '(code2word)))) ; 忽略个人词库可以提高速度
-            (dolist (word words)
-              (when (equal word (car string-list))
-                (push string-list result)))))))
-
-    (if delete-dups
-        ;;  判断两个词条在字符串中的位置是否冲突,如果冲突,仅保留一个。
-        (cl-delete-duplicates
-         result
-         :test (lambda (x1 x2)
-                 (let ((begin1 (nth 1 x1))
-                       (begin2 (nth 1 x2))
-                       (end1 (nth 2 x1))
-                       (end2 (nth 2 x2)))
-                   (not (or (<= end1 begin2)
-                            (<= end2 begin1)))))
-         :from-end prefer-short-word)
-      result)))
-
-(defun pyim-cstring-split-to-string (string &optional prefer-short-word
-                                            separator max-word-length)
-  "将中文字符串 STRING 分词.
-
-在分词的位置插入空格或者自定义分隔符 SEPERATERS,默认情况下较长的
-词条优先使用,如果 PREFER-SHORT-WORD 设置为 t,则优先使用较短的
-词条。默认最长词条不超过6个字符,用户可以通 MAX-WORD-LENGTH 来
-自定义词条的最大长度,但值得注意的是,这个值设置越大,分词速度越
-慢。"
-  (mapconcat (lambda (str)
-               (when (> (length str) 0)
-                 (if (not (pyim-string-match-p "\\CC" str))
-                     (pyim-cstring-split-to-string-1
-                      str prefer-short-word separator max-word-length)
-                   str)))
-             (pyim-cstring-partition string) (or separator " ")))
-
-(defun pyim-cstring-split-to-string-1 (chinese-string &optional 
prefer-short-word
-                                                      separator 
max-word-length)
-  "`pyim-cstring-split-to-string' 内部函数。"
-  (let ((str-length (length chinese-string))
-        (word-list (pyim-cstring-split-to-list
-                    chinese-string max-word-length t prefer-short-word))
-        position-list result)
-
-    ;; 提取词条相对于字符串的位置信息。
-    (dolist (word word-list)
-      (push (nth 1 word) position-list)
-      (push (nth 2 word) position-list))
-
-    ;; 将位置信息由小到大排序。
-    (setq position-list
-          (cl-delete-duplicates (sort position-list #'<)))
-
-    ;; 在分词的位置插入空格或者用户指定的分隔符。
-    (dotimes (i str-length)
-      (when (and (> i 0) (member i position-list))
-        (push (or separator " ") result))
-      (push (substring chinese-string i (1+ i)) result))
-    (setq result (nreverse result))
-    (string-join result)))
-
-(defun pyim-cstring-split-buffer ()
-  "将一个 buffer 中的中文文章,进行分词操作。"
-  (interactive)
-  (message "分词开始!")
-  (goto-char (point-min))
-  (while (not (eobp))
-    (let ((string (buffer-substring-no-properties
-                   (line-beginning-position)
-                   (line-end-position))))
-      (delete-region (line-beginning-position)
-                     (min (+ (line-end-position) 1) (point-max)))
-      (insert (pyim-cstring-split-to-string string))
-      (insert "\n")))
-  (goto-char (point-min))
-  (message "分词完成!"))
-
 ;; ** 中文字符串到拼音的转换工具
 ;;;###autoload
 (defun pyim-cstring-to-pinyin (string &optional shou-zi-mu separator
@@ -346,128 +240,6 @@ CRITERIA 字符串一般是通过 imobjs 构建的,它保留了用户原始的
                                   (pyim-string-distance b criteria)))))
                  (list (car codes-sorted))))))))
 
-;; ** 获取光标处中文字符串或者中文词条的功能
-(defun pyim-cstring-at-point (&optional number)
-  "获取光标一个中文字符串,字符数量为:NUMBER."
-  (save-excursion
-    (let* ((point (point))
-           (begin (- point number))
-           (begin (if (> begin 0)
-                      begin
-                    (point-min)))
-           (string (buffer-substring-no-properties
-                    point begin)))
-      (when (and (stringp string)
-                 (= (length string) number)
-                 (not (pyim-string-match-p "\\CC" string)))
-        string))))
-
-(defun pyim-cstring-words-at-point (&optional end-of-point)
-  "获取光标当前的词条列表,当 END-OF-POINT 设置为 t 时,获取光标后的词条列表。
-词条列表的每一个元素都是列表,这些列表的第一个元素为词条,第二个元素为光标处到词条
-头部的距离,第三个元素为光标处到词条尾部的距离。
-
-其工作原理是:
-
-1. 使用 `thing-at-point' 获取当前光标处的一个字符串,一般而言:英文会得到
-   一个单词,中文会得到一个句子。
-2. 英文单词直接返回这个单词的列表。
-3. 中文句子首先用 `pyim-cstring-split-to-list' 分词,然后根据光标在中文句子
-   中的位置,筛选出符合要求的中文词条。得到并返回 *一个* 或者 *多个* 词条
-   的列表。"
-  ;;
-  ;;                                光标到词 光标到词
-  ;;                                首的距离 尾的距离
-  ;;                                       | |
-  ;; 获取光标当前的词<I>条列表 -> (("的词" 2 0) ("词条" 1 1))
-  ;;
-  (let* ((case-fold-search t)
-         (current-pos (point))
-         (current-char
-          (if end-of-point
-              (string (following-char))
-            (string (preceding-char))))
-         (str (thing-at-point 'word t))
-         (str-length (length str))
-         (str-boundary (bounds-of-thing-at-point 'word))
-         (str-beginning-pos (when str-boundary
-                              (car str-boundary)))
-         (str-end-pos (when str-boundary
-                        (cdr str-boundary)))
-         (str-offset
-          (when (and str-beginning-pos str-end-pos)
-            (if (= current-pos str-end-pos)
-                (- str-end-pos str-beginning-pos)
-              (- current-pos str-beginning-pos))))
-         str-offset-adjusted words-alist results)
-
-    ;; 当字符串长度太长时, `pyim-cstring-split-to-list'
-    ;; 的速度比较慢,这里确保待分词的字符串长度不超过10.
-    (when (and str (not (pyim-string-match-p "\\CC" str)))
-      (if (> str-offset 5)
-          (progn (setq str-offset-adjusted 5)
-                 (setq str (substring str
-                                      (- str-offset 5)
-                                      (min (+ str-offset 5) str-length))))
-        (setq str-offset-adjusted str-offset)
-        (setq str (substring str 0 (min 9 str-length)))))
-
-    (cond
-     (;; FIXME: 在光标在 buffer 开头或者行首时,直接返回 nil.
-      ;; 也许这块应该更好的处理。
-      (or (bobp) (eq (point) (line-beginning-position))) nil)
-     ((and str (not (pyim-string-match-p "\\CC" str)))
-      (setq words-alist
-            (pyim-cstring-split-to-list str))
-      (dolist (word-list words-alist)
-        (let ((word-begin (nth 1 word-list))
-              (word-end (nth 2 word-list)))
-          (if (if end-of-point
-                  (and (< str-offset-adjusted word-end)
-                       (>= str-offset-adjusted word-begin))
-                (and (<= str-offset-adjusted word-end)
-                     (> str-offset-adjusted word-begin)))
-              (push (list (car word-list)
-                          (- str-offset-adjusted word-begin) ;; 例如: ("你好" 1 1)
-                          (- word-end str-offset-adjusted))
-                    results))))
-      (or results
-          (list (if end-of-point
-                    (list current-char 0 1)
-                  (list current-char 1 0)))))
-     (str (list (list str
-                      (- current-pos str-beginning-pos)
-                      (- str-end-pos current-pos)))))))
-
-;; ** 让 forward/backward 支持中文
-(defun pyim-cstring-forward-word (&optional arg)
-  "向前移动 ARG 英文或者中文词,向前移动时基于 *最长* 的词移动。"
-  (interactive "P")
-  (or arg (setq arg 1))
-  (dotimes (_ arg)
-    (let* ((words (pyim-cstring-words-at-point t))
-           (max-length
-            (cl-reduce #'max
-                       (cons 0 (mapcar (lambda (word)
-                                         (nth 2 word))
-                                       words))))
-           (max-length (max (or max-length 1) 1)))
-      (forward-char max-length))))
-
-(defun pyim-cstring-backward-word (&optional arg)
-  "向后移动 ARG 个英文或者中文词,向后移动时基于 *最长* 的词移动。"
-  (interactive "P")
-  (or arg (setq arg 1))
-  (dotimes (_ arg)
-    (let* ((words (pyim-cstring-words-at-point))
-           (max-length
-            (cl-reduce #'max
-                       (cons 0 (mapcar (lambda (word)
-                                         (nth 1 word))
-                                       words))))
-           (max-length (max (or max-length 1) 1)))
-      (backward-char max-length))))
-
 ;; * Footer
 (provide 'pyim-cstring)
 
diff --git a/pyim.el b/pyim.el
index e016c050d1..82d642f288 100644
--- a/pyim.el
+++ b/pyim.el
@@ -839,7 +839,7 @@ FILE 的格式与 `pyim-dcache-export' 生成的文件格式相同,
       (deactivate-mark))))
 
 ;; ** pyim 中文字符串工具
-(require 'pyim-cstring)
+(require 'pyim-cstring-utils)
 (defalias 'pyim-forward-word 'pyim-cstring-forward-word)
 (defalias 'pyim-backward-word 'pyim-cstring-backward-word)
 ;; PYIM 重构以前使用的一些函数名称,alias 一下,便于兼容。
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index 0f48bc54d8..3fe4b68708 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -33,6 +33,7 @@
 (require 'pyim)
 (require 'pyim-basedict) ;pyim-test.el use pyim-basedict v0.5.0.
 (require 'pyim-cstring-utils)
+(require 'pyim-cregexp-utils)
 (require 'pyim-dhashcache)
 (require 'pyim-dregcache)
 (require 'pyim-pymap-utils)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]