emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master b968553: Better detection of potentially malicious bidi text


From: Eli Zaretskii
Subject: master b968553: Better detection of potentially malicious bidi text
Date: Thu, 4 Nov 2021 14:34:00 -0400 (EDT)

branch: master
commit b96855310efed13e0db1403759b686b9bc3e7490
Author: Eli Zaretskii <eliz@gnu.org>
Commit: Eli Zaretskii <eliz@gnu.org>

    Better detection of potentially malicious bidi text
    
    * src/bidi.c (bidi_find_first_overridden): Extend to detect more
    subtle effects of directional formatting controls, to include
    embeddings and isolates.
    * src/xdisp.c (Fbidi_find_overridden_directionality): Accept an
    additional argument BASE-DIR to specify the base direction of the
    paragraphs.
    
    * lisp/international/mule-cmds.el (confusingly-reordered): New
    face.
    (reorder-starters, reorder-enders): New variables.
    (highlight-confusing-reorderings): New command to detect and
    highlight suspiciously reordered text.
    
    * test/src/xdisp-tests.el
    (xdisp-tests--find-directional-overrides): New test.
    
    * etc/NEWS: Announce the new and improved features.
    * etc/tutorials/TUTORIAL.he: Fix embeddings with incorrect
    directions.
---
 etc/NEWS                        |  8 +++++++
 etc/tutorials/TUTORIAL.he       |  4 ++--
 lisp/international/mule-cmds.el | 47 +++++++++++++++++++++++++++++++++++++++++
 src/bidi.c                      | 20 ++++++++++++++++--
 src/xdisp.c                     | 19 +++++++++++------
 test/src/xdisp-tests.el         | 15 +++++++++++++
 6 files changed, 102 insertions(+), 11 deletions(-)

diff --git a/etc/NEWS b/etc/NEWS
index bd5dbf1..30df529 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -93,6 +93,14 @@ Image specifiers can now use ':type webp'.
 For example, an alist entry as '(window-width . (body-columns . 40))'
 will make the body of the chosen window 40 columns wide.
 
+** Better detection of text suspiciously reordered on display.
+The function 'bidi-find-overridden-directionality' has been extended
+to detect reordering effects produced by embeddings and isolates
+(started by directional formatting control characters such as RLO and
+LRI).  The new command 'highlight-confusing-reorderings' finds and
+highlights segments of buffer text whose reordering for display is
+suspicious and could be malicious.
+
 
 * Editing Changes in Emacs 29.1
 
diff --git a/etc/tutorials/TUTORIAL.he b/etc/tutorials/TUTORIAL.he
index 2ee4f74..465768a 100644
--- a/etc/tutorials/TUTORIAL.he
+++ b/etc/tutorials/TUTORIAL.he
@@ -1,4 +1,4 @@
-שיעור ראשון בשימוש ב־‫Emacs‬. זכויות שימוש ראה בסוף המסמך.
+שיעור ראשון בשימוש ב־‪Emacs‬. זכויות שימוש ראה בסוף המסמך.
 
 פקודות רבות של Emacs משתמשות במקש CONTROL (בדרך־כלל מסומן ב־CTRL)
 או במקש META (בדרך־כלל מסומן ALT). במקום לציין את כל השמות האפשריים
@@ -24,7 +24,7 @@
 שימו לב לחפיפה של שתי שורות כאשר אתם עוברים ממסך למסך, מה שמבטיח רציפות
 מסוימת בעת קריאת הטקסט.
 
-הטקסט שלפניכם הינו עותק של שיעור בשימוש ב־‫Emacs‬ שהותאם קלות עבורכם.
+הטקסט שלפניכם הינו עותק של שיעור בשימוש ב־‪Emacs‬ שהותאם קלות עבורכם.
 בהמשך תקבלו הוראות לנסות פקודות שונות כדי לבצע שינויים בטקסט הזה. אם
 במקרה תשנו את הטקסט לפני שנבקש, אל דאגה: זוהי "עריכה" שהיא יעודו של
 Emacs.
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index be4a4eb..8a64d61 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -3259,4 +3259,51 @@ as names, not numbers."
 (define-obsolete-function-alias 'ucs-insert 'insert-char "24.3")
 (define-key ctl-x-map "8\r" 'insert-char)
 
+(defface confusingly-reordered
+  '((t :inherit underline :underline (:style wave :color "Red1")))
+  "Face for highlighting text that was bidi-reordered in confusing ways."
+  :version "29.1")
+
+(defvar reorder-starters "[\u202A\u202B\u202D\u202E\u2066-\u2068]+"
+  "Regular expression for characters that start forced-reordered text.")
+(defvar reorder-enders "[\u202C\u2069]+\\|\n"
+  "Regular expression for characters that end forced-reordered text.")
+
+(defun highlight-confusing-reorderings (beg end)
+  "Highlight text in region that might be bidi-reordered in suspicious ways.
+This command find and highlights segments of buffer text that could have
+been reordered on display by using directional control characters, such
+as RLO and LRI, in a way that their display is deliberately meant to
+confuse the reader.  These techniques can be used for obfuscating
+malicious source code.  The suspicious stretches of buffer text are
+highlighted using the `confusingly-reordered' face.
+
+If the region is active, check the text inside the region.  Otherwise
+check the entire buffer.  When called from Lisp, pass BEG and END to
+specify the portion of the buffer to check."
+  (interactive
+   (if (use-region-p)
+       (list (region-beginning) (region-end))
+     (list (point-min) (point-max))))
+  (save-excursion
+    (let (next)
+      (goto-char beg)
+      (while (setq next
+                   (bidi-find-overridden-directionality
+                    (point) end nil
+                    (current-bidi-paragraph-direction)))
+        (goto-char next)
+        (let ((start
+               (save-excursion
+                 (re-search-backward reorder-starters nil t)))
+              (finish
+               (save-excursion
+                 (re-search-forward reorder-enders nil t))))
+          (with-silent-modifications
+            (add-text-properties start (1- finish)
+                                 '(font-lock-face
+                                   'confusingly-reordered
+                                   face 'confusingly-reordered)))
+          (goto-char finish))))))
+
 ;;; mule-cmds.el ends here
diff --git a/src/bidi.c b/src/bidi.c
index 1413ba6..3cdcd7d 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -3564,11 +3564,17 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
 }
 
 /* Utility function for looking for strong directional characters
-   whose bidi type was overridden by a directional override.  */
+   whose bidi type was overridden by directional override or embedding
+   or isolate control characters.  */
 ptrdiff_t
 bidi_find_first_overridden (struct bidi_it *bidi_it)
 {
   ptrdiff_t found_pos = ZV;
+  /* Maximum bidi levels we allow for L2R and R2L characters.  Note
+     that these are levels after resolving explicit embeddings,
+     overrides, and isolates, i.e. before resolving implicit levels.  */
+  int max_l2r = bidi_it->paragraph_dir == L2R ? 0 : 2;
+  int max_r2l = 1;
 
   do
     {
@@ -3577,10 +3583,20 @@ bidi_find_first_overridden (struct bidi_it *bidi_it)
         former.  */
       bidi_type_t type = bidi_resolve_weak (bidi_it);
 
+      /* Detect strong L or R types that have been overridden by
+        explicit overrides.  */
       if ((type == STRONG_R && bidi_it->orig_type == STRONG_L)
          || (type == STRONG_L
              && (bidi_it->orig_type == STRONG_R
-                 || bidi_it->orig_type == STRONG_AL)))
+                 || bidi_it->orig_type == STRONG_AL))
+         /* Detect strong L or R types that were pushed into higher
+            embedding levels (and will thus reorder) by explicit
+            embeddings and isolates.  */
+         || (bidi_it->orig_type == STRONG_L
+             && bidi_it->level_stack[bidi_it->stack_idx].level > max_l2r)
+         || ((bidi_it->orig_type == STRONG_R
+              || bidi_it->orig_type == STRONG_AL)
+             && bidi_it->level_stack[bidi_it->stack_idx].level > max_r2l))
        found_pos = bidi_it->charpos;
     } while (found_pos == ZV
             && bidi_it->charpos < ZV
diff --git a/src/xdisp.c b/src/xdisp.c
index 39ede3c..646beed 100644
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -24511,7 +24511,7 @@ See also `bidi-paragraph-direction'.  */)
 
 DEFUN ("bidi-find-overridden-directionality",
        Fbidi_find_overridden_directionality,
-       Sbidi_find_overridden_directionality, 2, 3, 0,
+       Sbidi_find_overridden_directionality, 3, 4, 0,
        doc: /* Return position between FROM and TO where directionality was 
overridden.
 
 This function returns the first character position in the specified
@@ -24530,12 +24530,18 @@ a buffer is preferable when the buffer is displayed 
in some window,
 because this function will then be able to correctly account for
 window-specific overlays, which can affect the results.
 
+Optional argument BASE-DIR specifies the base paragraph directory
+of the text.  It should be a symbol, either `left-to-right'
+or `right-to-left', and defaults to `left-to-right'.
+
 Strong directional characters `L', `R', and `AL' can have their
 intrinsic directionality overridden by directional override
-control characters RLO (u+202e) and LRO (u+202d).  See the
-function `get-char-code-property' for a way to inquire about
+control characters RLO (u+202E) and LRO (u+202D).  They can also
+have their directionality affected by other formatting control
+characters: LRE (u+202A), RLE (u+202B), LRI (u+2066), and RLI (u+2067).
+See the function `get-char-code-property' for a way to inquire about
 the `bidi-class' property of a character.  */)
-  (Lisp_Object from, Lisp_Object to, Lisp_Object object)
+  (Lisp_Object from, Lisp_Object to, Lisp_Object object, Lisp_Object base_dir)
 {
   struct buffer *buf = current_buffer;
   struct buffer *old = buf;
@@ -24632,10 +24638,9 @@ the `bidi-class' property of a character.  */)
     }
 
   ptrdiff_t found;
+  bidi_dir_t bdir = EQ (base_dir, Qright_to_left) ? R2L : L2R;
   do {
-    /* For the purposes of this function, the actual base direction of
-       the paragraph doesn't matter, so just set it to L2R.  */
-    bidi_paragraph_init (L2R, &itb, false);
+    bidi_paragraph_init (bdir, &itb, false);
     while ((found = bidi_find_first_overridden (&itb)) < from_pos)
       ;
   } while (found == ZV && itb.ch == '\n' && itb.charpos < to_pos);
diff --git a/test/src/xdisp-tests.el b/test/src/xdisp-tests.el
index 4e7d2ad..a385ded 100644
--- a/test/src/xdisp-tests.el
+++ b/test/src/xdisp-tests.el
@@ -99,4 +99,19 @@
            (width-in-chars (/ (car size) char-width)))
       (should (equal width-in-chars 3)))))
 
+(ert-deftest xdisp-tests--find-directional-overrides ()
+  (with-temp-buffer
+    (insert "\
+int main() {
+  bool isAdmin = false;
+  /*‮ }⁦if (isAdmin)⁩ ⁦ begin admins only */
+  printf(\"You are an admin.\\n\");
+  /* end admins only ‮ { ⁦*/
+  return 0;
+}")
+    (goto-char (point-min))
+    (should (eq (bidi-find-overridden-directionality (point-min) (point-max)
+                                                     nil)
+                46))))
+
 ;;; xdisp-tests.el ends here



reply via email to

[Prev in Thread] Current Thread [Next in Thread]