emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master d3e2c88 4/4: Fix ASCII-only conversion logic (bug#40407)


From: Mattias Engdegård
Subject: master d3e2c88 4/4: Fix ASCII-only conversion logic (bug#40407)
Date: Thu, 9 Apr 2020 07:04:09 -0400 (EDT)

branch: master
commit d3e2c88041b4844422bda64b1ee51678dc8a2e88
Author: Mattias Engdegård <address@hidden>
Commit: Mattias Engdegård <address@hidden>

    Fix ASCII-only conversion logic (bug#40407)
    
    To sidestep conversion altogether when EOL conversion applies, we must
    either be encoding a string without NL, or decoding without CR.
    
    * src/coding.c (string_ascii_p): Revert to a pure predicate.
    (code_convert_string): Fix logic.  Don't use uninitialised
    ascii_p (removed).  Use memchr to detect CR or LF in string when needed.
    * test/src/coding-tests.el (coding-nocopy-ascii):
    Update tests to include encodings with explicit EOL conversions.
---
 src/coding.c             | 46 +++++++++++++++++-----------------------------
 test/src/coding-tests.el | 33 +++++++++++++++++++++------------
 2 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/src/coding.c b/src/coding.c
index ffcb9cf..450c498 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -9474,22 +9474,15 @@ not fully specified.)  */)
   return code_convert_region (start, end, coding_system, destination, 1, 0);
 }
 
-/* Non-zero if STR contains only characters in the 0..127 range.
-   Positive if STR includes characters that don't need EOL conversion
-   on decoding, negative otherwise.  */
-static int
-string_ascii_p (Lisp_Object str)
+/* Whether STRING only contains chars in the 0..127 range.  */
+static bool
+string_ascii_p (Lisp_Object string)
 {
-  ptrdiff_t nbytes = SBYTES (str);
-  bool CR_Seen = false;
+  ptrdiff_t nbytes = SBYTES (string);
   for (ptrdiff_t i = 0; i < nbytes; i++)
-    {
-      if (SREF (str, i) > 127)
-       return 0;
-      if (SREF (str, i) == '\r')
-       CR_Seen = true;
-    }
-  return CR_Seen ? -1 : 1;
+    if (SREF (string, i) > 127)
+      return false;
+  return true;
 }
 
 Lisp_Object
@@ -9526,24 +9519,19 @@ code_convert_string (Lisp_Object string, Lisp_Object 
coding_system,
   if (EQ (dst_object, Qt))
     {
       /* Fast path for ASCII-only input and an ASCII-compatible coding:
-         act as identity if no EOL conversion is neede.  */
-      int ascii_p;
+         act as identity if no EOL conversion is needed.  */
       Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
       if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
           && (STRING_MULTIBYTE (string)
-              ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0)))
-       {
-         if (ascii_p > 0
-             || (ascii_p < 0
-                 && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
-                     || inhibit_eol_conversion)))
-           return (nocopy
-                   ? string
-                   : (encodep
-                      ? make_unibyte_string (SSDATA (string), bytes)
-                      : make_multibyte_string (SSDATA (string),
-                                               bytes, bytes)));
-       }
+              ? (chars == bytes) : string_ascii_p (string))
+          && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
+              || inhibit_eol_conversion
+              || ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes)))
+        return (nocopy
+                ? string
+                : (encodep
+                   ? make_unibyte_string (SSDATA (string), bytes)
+                   : make_multibyte_string (SSDATA (string), bytes, bytes)));
     }
   else if (BUFFERP (dst_object))
     {
diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el
index 8d92bcd..9f6fac3 100644
--- a/test/src/coding-tests.el
+++ b/test/src/coding-tests.el
@@ -388,29 +388,38 @@
   (let* ((uni (apply #'string (number-sequence 0 127)))
          (multi (string-to-multibyte uni)))
     (dolist (s (list uni multi))
+      ;; Encodings without EOL conversion.
       (dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix))
         (should-not (eq (decode-coding-string s coding nil) s))
         (should-not (eq (encode-coding-string s coding nil) s))
         (should (eq (decode-coding-string s coding t) s))
-        (should (eq (encode-coding-string s coding t) s)))))
-  (let* ((uni (apply #'string (number-sequence 15 127)))
+        (should (eq (encode-coding-string s coding t) s)))
+
+      ;; With EOL conversion inhibited.
+      (let ((inhibit-eol-conversion t))
+        (dolist (coding '(us-ascii iso-latin-1 utf-8))
+          (should-not (eq (decode-coding-string s coding nil) s))
+          (should-not (eq (encode-coding-string s coding nil) s))
+          (should (eq (decode-coding-string s coding t) s))
+          (should (eq (encode-coding-string s coding t) s))))))
+
+  ;; Check identity decoding with EOL conversion for ASCII except CR.
+  (let* ((uni (apply #'string (delq ?\r (number-sequence 0 127))))
          (multi (string-to-multibyte uni)))
     (dolist (s (list uni multi))
-      (dolist (coding '(us-ascii iso-latin-1 utf-8))
+      (dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac))
         (should-not (eq (decode-coding-string s coding nil) s))
-        (should-not (eq (encode-coding-string s coding nil) s))
-        (should (eq (decode-coding-string s coding t) s))
-        (should (eq (encode-coding-string s coding t) s)))))
-  (let* ((uni (apply #'string (number-sequence 0 127)))
-         (multi (string-to-multibyte uni))
-         (inhibit-eol-conversion t))
+        (should (eq (decode-coding-string s coding t) s)))))
+
+  ;; Check identity encoding with EOL conversion for ASCII except LF.
+  (let* ((uni (apply #'string (delq ?\n (number-sequence 0 127))))
+         (multi (string-to-multibyte uni)))
     (dolist (s (list uni multi))
-      (dolist (coding '(us-ascii iso-latin-1 utf-8))
-        (should-not (eq (decode-coding-string s coding nil) s))
+      (dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac))
         (should-not (eq (encode-coding-string s coding nil) s))
-        (should (eq (decode-coding-string s coding t) s))
         (should (eq (encode-coding-string s coding t) s))))))
 
+
 (ert-deftest coding-check-coding-systems-region ()
   (should (equal (check-coding-systems-region "aå" nil '(utf-8))
                  nil))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]