emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master faf996d 1/2: Fix decoding ASCII strings with embedded CR characte


From: Eli Zaretskii
Subject: master faf996d 1/2: Fix decoding ASCII strings with embedded CR characters
Date: Thu, 9 Apr 2020 05:22:01 -0400 (EDT)

branch: master
commit faf996dc6e963a8dd74e9e794ded0467dd78ea18
Author: Eli Zaretskii <address@hidden>
Commit: Eli Zaretskii <address@hidden>

    Fix decoding ASCII strings with embedded CR characters
    
    * src/coding.c (string_ascii_p): Return a negative value if an
    all-ASCII string STR includes the CR character, otherwise a
    positive value.
    (code_convert_string): If the string is ASCII, but includes CR
    characters, use the fast path only if EOL doesn't need to be
    decoded.  (Bug#40519)
    
    * test/src/coding-tests.el (coding-nocopy-ascii): Add tests for
    bug#40519.
---
 src/coding.c             | 37 ++++++++++++++++++++++++++-----------
 test/src/coding-tests.el | 17 +++++++++++++++++
 2 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/src/coding.c b/src/coding.c
index 49c1e62..24a832f 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -9471,15 +9471,22 @@ not fully specified.)  */)
   return code_convert_region (start, end, coding_system, destination, 1, 0);
 }
 
-/* Whether a string only contains chars in the 0..127 range.  */
-static bool
+/* Non-zero if STR contains only characterss in the 0..127 range.
+   Positive if STR includes characters that don't need EOL conversion
+   on decoding, negative otherwise.  */
+static int
 string_ascii_p (Lisp_Object str)
 {
   ptrdiff_t nbytes = SBYTES (str);
+  bool CR_Seen = false;
   for (ptrdiff_t i = 0; i < nbytes; i++)
-    if (SREF (str, i) > 127)
-      return false;
-  return true;
+    {
+      if (SREF (str, i) > 127)
+       return 0;
+      if (SREF (str, i) == '\r')
+       CR_Seen = true;
+    }
+  return CR_Seen ? -1 : 1;
 }
 
 Lisp_Object
@@ -9517,15 +9524,23 @@ code_convert_string (Lisp_Object string, Lisp_Object 
coding_system,
     {
       /* Fast path for ASCII-only input and an ASCII-compatible coding:
          act as identity.  */
+      int ascii_p;
       Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
       if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
           && (STRING_MULTIBYTE (string)
-              ? (chars == bytes) : string_ascii_p (string)))
-       return (nocopy
-                ? string
-                : (encodep
-                   ? make_unibyte_string (SSDATA (string), bytes)
-                   : make_multibyte_string (SSDATA (string), bytes, bytes)));
+              ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0)))
+       {
+         if (ascii_p > 0
+             || (ascii_p < 0
+                 && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
+                     || inhibit_eol_conversion)))
+           return (nocopy
+                   ? string
+                   : (encodep
+                      ? make_unibyte_string (SSDATA (string), bytes)
+                      : make_multibyte_string (SSDATA (string),
+                                               bytes, bytes)));
+       }
     }
   else if (BUFFERP (dst_object))
     {
diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el
index 93e6709..83a06b8 100644
--- a/test/src/coding-tests.el
+++ b/test/src/coding-tests.el
@@ -388,6 +388,23 @@
   (let* ((uni (apply #'string (number-sequence 0 127)))
          (multi (string-to-multibyte uni)))
     (dolist (s (list uni multi))
+      (dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix))
+        (should-not (eq (decode-coding-string s coding nil) s))
+        (should-not (eq (encode-coding-string s coding nil) s))
+        (should (eq (decode-coding-string s coding t) s))
+        (should (eq (encode-coding-string s coding t) s)))))
+  (let* ((uni (apply #'string (number-sequence 15 127)))
+         (multi (string-to-multibyte uni)))
+    (dolist (s (list uni multi))
+      (dolist (coding '(us-ascii iso-latin-1 utf-8))
+        (should-not (eq (decode-coding-string s coding nil) s))
+        (should-not (eq (encode-coding-string s coding nil) s))
+        (should (eq (decode-coding-string s coding t) s))
+        (should (eq (encode-coding-string s coding t) s)))))
+  (let* ((uni (apply #'string (number-sequence 0 127)))
+         (multi (string-to-multibyte uni))
+         (inhibit-eol-conversion t))
+    (dolist (s (list uni multi))
       (dolist (coding '(us-ascii iso-latin-1 utf-8))
         (should-not (eq (decode-coding-string s coding nil) s))
         (should-not (eq (encode-coding-string s coding nil) s))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]