emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master a1ffee1e82: Add textsec-restriction-level function


From: Lars Ingebrigtsen
Subject: master a1ffee1e82: Add textsec-restriction-level function
Date: Mon, 17 Jan 2022 10:24:30 -0500 (EST)

branch: master
commit a1ffee1e82b7152772da86a3adc7513128ffefdf
Author: Lars Ingebrigtsen <larsi@gnus.org>
Commit: Lars Ingebrigtsen <larsi@gnus.org>

    Add textsec-restriction-level function
    
    * lisp/international/textsec.el (textsec-restriction-level): New
    function.
---
 lisp/international/textsec.el            | 52 ++++++++++++++++++++++++++++++++
 test/lisp/international/textsec-tests.el | 12 ++++++++
 2 files changed, 64 insertions(+)

diff --git a/lisp/international/textsec.el b/lisp/international/textsec.el
index 884425d492..fc809d52c1 100644
--- a/lisp/international/textsec.el
+++ b/lisp/international/textsec.el
@@ -97,6 +97,58 @@ Not that a string may have several different minimal cover 
sets."
       (setq set (seq-union set (seq-difference s set))))
     (sort (delq 'common (delq 'inherited set)) #'string<)))
 
+(defun textsec-restriction-level (string)
+  "Say what restriction level STRING qualifies for.
+Levels are (in order of restrictiveness) `ascii-only',
+`single-script', `highly-restrictive', `moderately-restrictive',
+`minimally-restrictive' and `unrestricted'."
+  (let ((scripts (textsec-covering-scripts string)))
+  (cond
+   ((string-match "\\`[[:ascii:]]+\\'" string)
+    'ascii-only)
+   ((textsec-single-script-p string)
+    'single-script)
+   ((or (null (seq-difference scripts '(latin han hiragana katakana)))
+        (null (seq-difference scripts '(latin han bopomofo)))
+        (null (seq-difference scripts '(latin han hangul))))
+    'highly-restrictive)
+   ((and (= (length scripts) 2)
+         (memq 'latin scripts)
+         (seq-intersection scripts
+                           '(arabic
+                             armenian
+                             bengali
+                             bopomofo
+                             devanagari
+                             ethiopic
+                             georgian
+                             gujarati
+                             gurmukhi
+                             hangul
+                             han
+                             hebrew
+                             hiragana
+                             katakana
+                             kannada
+                             khmer
+                             lao
+                             malayalam
+                             myanmar
+                             oriya
+                             sinhala
+                             tamil
+                             telugu
+                             thaana
+                             thai
+                             tibetan)))
+    ;; The string is covered by Latin and any one other Recommended
+    ;; script, except Cyrillic, Greek.
+    'moderately-retrictive)
+   ;; Fixme `minimally-restrictive' -- needs well-formedness criteria
+   ;; and Identifier Profile.
+   (t
+    'unrestricted))))
+
 (provide 'textsec)
 
 ;;; textsec.el ends here
diff --git a/test/lisp/international/textsec-tests.el 
b/test/lisp/international/textsec-tests.el
index c80b2ba0fd..7c56229e98 100644
--- a/test/lisp/international/textsec-tests.el
+++ b/test/lisp/international/textsec-tests.el
@@ -69,4 +69,16 @@
   (should (equal (textsec-covering-scripts "〆切")
                  '(han))))
 
+(ert-deftest test-restriction-level ()
+  (should (eq (textsec-restriction-level "foo")
+              'ascii-only))
+  (should (eq (textsec-restriction-level "C𝗂𝗋𝖼𝗅𝖾")
+              'single-script))
+  (should (eq (textsec-restriction-level "切foo")
+              'highly-restrictive))
+  (should (eq (textsec-restriction-level "հfoo")
+              'moderately-retrictive))
+  (should (eq (textsec-restriction-level "Сirсlе")
+              'unrestricted)))
+
 ;;; textsec-tests.el ends here



reply via email to

[Prev in Thread] Current Thread [Next in Thread]