[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master 4eebf528fc: Add textsec predicates for different types of confusa
From: |
Lars Ingebrigtsen |
Subject: |
master 4eebf528fc: Add textsec predicates for different types of confusability |
Date: |
Tue, 18 Jan 2022 04:27:27 -0500 (EST) |
branch: master
commit 4eebf528fca6f6f16168c4f76a653353f3598a35
Author: Lars Ingebrigtsen <larsi@gnus.org>
Commit: Lars Ingebrigtsen <larsi@gnus.org>
Add textsec predicates for different types of confusability
* lisp/international/textsec.el (textsec-resolved-script-set)
(textsec-single-script-confusable-p)
(textsec-mixed-script-confusable-p)
(textsec-whole-script-confusable-p): New functions.
---
lisp/international/textsec.el | 30 ++++++++++++++++++++++++++++++
test/lisp/international/textsec-tests.el | 19 ++++++++++++++++++-
2 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/lisp/international/textsec.el b/lisp/international/textsec.el
index 304d69cb89..8095733e09 100644
--- a/lisp/international/textsec.el
+++ b/lisp/international/textsec.el
@@ -192,6 +192,36 @@ This algorithm is described in:
(string char)))
(ucs-normalize-NFD-string string)))))
+(defun textsec-resolved-script-set (string)
+ "Return the resolved script set for STRING.
+This is the minimal covering script set for STRING, but is nil is
+STRING isn't a single script string."
+ (and (textsec-single-script-p string)
+ (textsec-covering-scripts string)))
+
+(defun textsec-single-script-confusable-p (string1 string2)
+ "Say whether STRING1 and STRING2 are single script confusables."
+ (and (equal (textsec-unconfuse-string string1)
+ (textsec-unconfuse-string string2))
+ ;; And they have to have at least one resolved script in
+ ;; common.
+ (seq-intersection (textsec-resolved-script-set string1)
+ (textsec-resolved-script-set string2))))
+
+(defun textsec-mixed-script-confusable-p (string1 string2)
+ "Say whether STRING1 and STRING2 are mixed script confusables."
+ (and (equal (textsec-unconfuse-string string1)
+ (textsec-unconfuse-string string2))
+ ;; And they have no resolved scripts in common.
+ (null (seq-intersection (textsec-resolved-script-set string1)
+ (textsec-resolved-script-set string2)))))
+
+(defun textsec-whole-script-confusable-p (string1 string2)
+ "Say whether STRING1 and STRING2 are whole script confusables."
+ (and (textsec-mixed-script-confusable-p string1 string2)
+ (textsec-single-script-p string1)
+ (textsec-single-script-p string2)))
+
(provide 'textsec)
;;; textsec.el ends here
diff --git a/test/lisp/international/textsec-tests.el
b/test/lisp/international/textsec-tests.el
index 50106bb955..15b6b21b34 100644
--- a/test/lisp/international/textsec-tests.el
+++ b/test/lisp/international/textsec-tests.el
@@ -86,11 +86,28 @@
(should-not (textsec-mixed-numbers-p "8foo8"))
(should (textsec-mixed-numbers-p "8foo৪")))
+(ert-deftest test-resolved ()
+ (should (equal (textsec-resolved-script-set "ljeto")
+ '(latin)))
+ (should-not (textsec-resolved-script-set "Сirсlе")))
+
(ert-deftest test-confusable ()
(should (equal (textsec-unconfuse-string "ljeto") "ljeto"))
(should (textsec-ascii-confusable-p "ljeto"))
(should-not (textsec-ascii-confusable-p "ljeto"))
(should (equal (textsec-unconfuse-string "~") "〜"))
- (should-not (textsec-ascii-confusable-p "~")))
+ (should-not (textsec-ascii-confusable-p "~"))
+
+ (should (textsec-single-script-confusable-p "ljeto" "ljeto"))
+ (should-not (textsec-single-script-confusable-p "paypal" "pаypаl"))
+ (should-not (textsec-single-script-confusable-p "scope""ѕсоре"))
+
+ (should-not (textsec-mixed-script-confusable-p "ljeto" "ljeto"))
+ (should (textsec-mixed-script-confusable-p "paypal" "pаypаl"))
+ (should (textsec-mixed-script-confusable-p "scope""ѕсоре"))
+
+ (should-not (textsec-whole-script-confusable-p "ljeto" "ljeto"))
+ (should-not (textsec-whole-script-confusable-p "paypal" "pаypаl"))
+ (should (textsec-whole-script-confusable-p "scope""ѕсоре")))
;;; textsec-tests.el ends here
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- master 4eebf528fc: Add textsec predicates for different types of confusability,
Lars Ingebrigtsen <=