[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master b968553: Better detection of potentially malicious bidi text
From: |
Eli Zaretskii |
Subject: |
master b968553: Better detection of potentially malicious bidi text |
Date: |
Thu, 4 Nov 2021 14:34:00 -0400 (EDT) |
branch: master
commit b96855310efed13e0db1403759b686b9bc3e7490
Author: Eli Zaretskii <eliz@gnu.org>
Commit: Eli Zaretskii <eliz@gnu.org>
Better detection of potentially malicious bidi text
* src/bidi.c (bidi_find_first_overridden): Extend to detect more
subtle effects of directional formatting controls, to include
embeddings and isolates.
* src/xdisp.c (Fbidi_find_overridden_directionality): Accept an
additional argument BASE-DIR to specify the base direction of the
paragraphs.
* lisp/international/mule-cmds.el (confusingly-reordered): New
face.
(reorder-starters, reorder-enders): New variables.
(highlight-confusing-reorderings): New command to detect and
highlight suspiciously reordered text.
* test/src/xdisp-tests.el
(xdisp-tests--find-directional-overrides): New test.
* etc/NEWS: Announce the new and improved features.
* etc/tutorials/TUTORIAL.he: Fix embeddings with incorrect
directions.
---
etc/NEWS | 8 +++++++
etc/tutorials/TUTORIAL.he | 4 ++--
lisp/international/mule-cmds.el | 47 +++++++++++++++++++++++++++++++++++++++++
src/bidi.c | 20 ++++++++++++++++--
src/xdisp.c | 19 +++++++++++------
test/src/xdisp-tests.el | 15 +++++++++++++
6 files changed, 102 insertions(+), 11 deletions(-)
diff --git a/etc/NEWS b/etc/NEWS
index bd5dbf1..30df529 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -93,6 +93,14 @@ Image specifiers can now use ':type webp'.
For example, an alist entry as '(window-width . (body-columns . 40))'
will make the body of the chosen window 40 columns wide.
+** Better detection of text suspiciously reordered on display.
+The function 'bidi-find-overridden-directionality' has been extended
+to detect reordering effects produced by embeddings and isolates
+(started by directional formatting control characters such as RLO and
+LRI). The new command 'highlight-confusing-reorderings' finds and
+highlights segments of buffer text whose reordering for display is
+suspicious and could be malicious.
+
* Editing Changes in Emacs 29.1
diff --git a/etc/tutorials/TUTORIAL.he b/etc/tutorials/TUTORIAL.he
index 2ee4f74..465768a 100644
--- a/etc/tutorials/TUTORIAL.he
+++ b/etc/tutorials/TUTORIAL.he
@@ -1,4 +1,4 @@
-שיעור ראשון בשימוש ב־Emacs. זכויות שימוש ראה בסוף המסמך.
+שיעור ראשון בשימוש ב־Emacs. זכויות שימוש ראה בסוף המסמך.
פקודות רבות של Emacs משתמשות במקש CONTROL (בדרך־כלל מסומן ב־CTRL)
או במקש META (בדרך־כלל מסומן ALT). במקום לציין את כל השמות האפשריים
@@ -24,7 +24,7 @@
שימו לב לחפיפה של שתי שורות כאשר אתם עוברים ממסך למסך, מה שמבטיח רציפות
מסוימת בעת קריאת הטקסט.
-הטקסט שלפניכם הינו עותק של שיעור בשימוש ב־Emacs שהותאם קלות עבורכם.
+הטקסט שלפניכם הינו עותק של שיעור בשימוש ב־Emacs שהותאם קלות עבורכם.
בהמשך תקבלו הוראות לנסות פקודות שונות כדי לבצע שינויים בטקסט הזה. אם
במקרה תשנו את הטקסט לפני שנבקש, אל דאגה: זוהי "עריכה" שהיא יעודו של
Emacs.
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index be4a4eb..8a64d61 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -3259,4 +3259,51 @@ as names, not numbers."
(define-obsolete-function-alias 'ucs-insert 'insert-char "24.3")
(define-key ctl-x-map "8\r" 'insert-char)
+(defface confusingly-reordered
+ '((t :inherit underline :underline (:style wave :color "Red1")))
+ "Face for highlighting text that was bidi-reordered in confusing ways."
+ :version "29.1")
+
+(defvar reorder-starters "[\u202A\u202B\u202D\u202E\u2066-\u2068]+"
+ "Regular expression for characters that start forced-reordered text.")
+(defvar reorder-enders "[\u202C\u2069]+\\|\n"
+ "Regular expression for characters that end forced-reordered text.")
+
+(defun highlight-confusing-reorderings (beg end)
+ "Highlight text in region that might be bidi-reordered in suspicious ways.
+This command find and highlights segments of buffer text that could have
+been reordered on display by using directional control characters, such
+as RLO and LRI, in a way that their display is deliberately meant to
+confuse the reader. These techniques can be used for obfuscating
+malicious source code. The suspicious stretches of buffer text are
+highlighted using the `confusingly-reordered' face.
+
+If the region is active, check the text inside the region. Otherwise
+check the entire buffer. When called from Lisp, pass BEG and END to
+specify the portion of the buffer to check."
+ (interactive
+ (if (use-region-p)
+ (list (region-beginning) (region-end))
+ (list (point-min) (point-max))))
+ (save-excursion
+ (let (next)
+ (goto-char beg)
+ (while (setq next
+ (bidi-find-overridden-directionality
+ (point) end nil
+ (current-bidi-paragraph-direction)))
+ (goto-char next)
+ (let ((start
+ (save-excursion
+ (re-search-backward reorder-starters nil t)))
+ (finish
+ (save-excursion
+ (re-search-forward reorder-enders nil t))))
+ (with-silent-modifications
+ (add-text-properties start (1- finish)
+ '(font-lock-face
+ 'confusingly-reordered
+ face 'confusingly-reordered)))
+ (goto-char finish))))))
+
;;; mule-cmds.el ends here
diff --git a/src/bidi.c b/src/bidi.c
index 1413ba6..3cdcd7d 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -3564,11 +3564,17 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
}
/* Utility function for looking for strong directional characters
- whose bidi type was overridden by a directional override. */
+ whose bidi type was overridden by directional override or embedding
+ or isolate control characters. */
ptrdiff_t
bidi_find_first_overridden (struct bidi_it *bidi_it)
{
ptrdiff_t found_pos = ZV;
+ /* Maximum bidi levels we allow for L2R and R2L characters. Note
+ that these are levels after resolving explicit embeddings,
+ overrides, and isolates, i.e. before resolving implicit levels. */
+ int max_l2r = bidi_it->paragraph_dir == L2R ? 0 : 2;
+ int max_r2l = 1;
do
{
@@ -3577,10 +3583,20 @@ bidi_find_first_overridden (struct bidi_it *bidi_it)
former. */
bidi_type_t type = bidi_resolve_weak (bidi_it);
+ /* Detect strong L or R types that have been overridden by
+ explicit overrides. */
if ((type == STRONG_R && bidi_it->orig_type == STRONG_L)
|| (type == STRONG_L
&& (bidi_it->orig_type == STRONG_R
- || bidi_it->orig_type == STRONG_AL)))
+ || bidi_it->orig_type == STRONG_AL))
+ /* Detect strong L or R types that were pushed into higher
+ embedding levels (and will thus reorder) by explicit
+ embeddings and isolates. */
+ || (bidi_it->orig_type == STRONG_L
+ && bidi_it->level_stack[bidi_it->stack_idx].level > max_l2r)
+ || ((bidi_it->orig_type == STRONG_R
+ || bidi_it->orig_type == STRONG_AL)
+ && bidi_it->level_stack[bidi_it->stack_idx].level > max_r2l))
found_pos = bidi_it->charpos;
} while (found_pos == ZV
&& bidi_it->charpos < ZV
diff --git a/src/xdisp.c b/src/xdisp.c
index 39ede3c..646beed 100644
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -24511,7 +24511,7 @@ See also `bidi-paragraph-direction'. */)
DEFUN ("bidi-find-overridden-directionality",
Fbidi_find_overridden_directionality,
- Sbidi_find_overridden_directionality, 2, 3, 0,
+ Sbidi_find_overridden_directionality, 3, 4, 0,
doc: /* Return position between FROM and TO where directionality was
overridden.
This function returns the first character position in the specified
@@ -24530,12 +24530,18 @@ a buffer is preferable when the buffer is displayed
in some window,
because this function will then be able to correctly account for
window-specific overlays, which can affect the results.
+Optional argument BASE-DIR specifies the base paragraph directory
+of the text. It should be a symbol, either `left-to-right'
+or `right-to-left', and defaults to `left-to-right'.
+
Strong directional characters `L', `R', and `AL' can have their
intrinsic directionality overridden by directional override
-control characters RLO (u+202e) and LRO (u+202d). See the
-function `get-char-code-property' for a way to inquire about
+control characters RLO (u+202E) and LRO (u+202D). They can also
+have their directionality affected by other formatting control
+characters: LRE (u+202A), RLE (u+202B), LRI (u+2066), and RLI (u+2067).
+See the function `get-char-code-property' for a way to inquire about
the `bidi-class' property of a character. */)
- (Lisp_Object from, Lisp_Object to, Lisp_Object object)
+ (Lisp_Object from, Lisp_Object to, Lisp_Object object, Lisp_Object base_dir)
{
struct buffer *buf = current_buffer;
struct buffer *old = buf;
@@ -24632,10 +24638,9 @@ the `bidi-class' property of a character. */)
}
ptrdiff_t found;
+ bidi_dir_t bdir = EQ (base_dir, Qright_to_left) ? R2L : L2R;
do {
- /* For the purposes of this function, the actual base direction of
- the paragraph doesn't matter, so just set it to L2R. */
- bidi_paragraph_init (L2R, &itb, false);
+ bidi_paragraph_init (bdir, &itb, false);
while ((found = bidi_find_first_overridden (&itb)) < from_pos)
;
} while (found == ZV && itb.ch == '\n' && itb.charpos < to_pos);
diff --git a/test/src/xdisp-tests.el b/test/src/xdisp-tests.el
index 4e7d2ad..a385ded 100644
--- a/test/src/xdisp-tests.el
+++ b/test/src/xdisp-tests.el
@@ -99,4 +99,19 @@
(width-in-chars (/ (car size) char-width)))
(should (equal width-in-chars 3)))))
+(ert-deftest xdisp-tests--find-directional-overrides ()
+ (with-temp-buffer
+ (insert "\
+int main() {
+ bool isAdmin = false;
+ /* }if (isAdmin) begin admins only */
+ printf(\"You are an admin.\\n\");
+ /* end admins only { */
+ return 0;
+}")
+ (goto-char (point-min))
+ (should (eq (bidi-find-overridden-directionality (point-min) (point-max)
+ nil)
+ 46))))
+
;;; xdisp-tests.el ends here
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- master b968553: Better detection of potentially malicious bidi text,
Eli Zaretskii <=