[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master a34650a: Fix sloppy base64 acceptance of some multibyte character
From: |
Mattias Engdegård |
Subject: |
master a34650a: Fix sloppy base64 acceptance of some multibyte characters |
Date: |
Mon, 20 Dec 2021 14:24:09 -0500 (EST) |
branch: master
commit a34650acff3740980ef23d900d35004bcfe2ef04
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>
Fix sloppy base64 acceptance of some multibyte characters
The base64 encoding functions incorrectly accepted some multibyte
characters; stop doing that (bug#52670).
* src/fns.c (base64_encode_1): Reject all multibyte characters.
* test/src/fns-tests.el (fns-tests-base64-encode-string)
(fns-test-base64url-encode-region)
(fns-test-base64url-encode-string): Add tests.
* doc/lispref/text.texi (Base 64): Rephrase outdated manual text.
* etc/NEWS: Add a notice.
---
doc/lispref/text.texi | 5 ++---
etc/NEWS | 8 ++++++++
src/fns.c | 6 +++---
test/src/fns-tests.el | 16 +++++++++++++---
4 files changed, 26 insertions(+), 9 deletions(-)
diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi
index 5ab5e57..9771d8a 100644
--- a/doc/lispref/text.texi
+++ b/doc/lispref/text.texi
@@ -4793,9 +4793,8 @@ converting to and from this code.
This function converts the region from @var{beg} to @var{end} into base
64 code. It returns the length of the encoded text. An error is
signaled if a character in the region is multibyte, i.e., in a
-multibyte buffer the region must contain only characters from the
-charsets @code{ascii}, @code{eight-bit-control} and
-@code{eight-bit-graphic}.
+multibyte buffer the region must contain only ASCII characters or raw
+bytes.
Normally, this function inserts newline characters into the encoded
text, to avoid overlong lines. However, if the optional argument
diff --git a/etc/NEWS b/etc/NEWS
index 24f3da8..57fe40c 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -848,6 +848,14 @@ This change is now applied in 'dired-insert-directory'.
'unify-8859-on-decoding-mode', 'unify-8859-on-encoding-mode',
'vc-arch-command'.
++++
+** Base64 encoding no longer tolerates latin-1 input.
+The functions 'base64-encode-string', 'base64url-encode-string',
+'base64-encode-region' and 'base64url-encode-region' no longer accept
+characters in the range U+0080..U+00FF as substitutes for single bytes
+in the range 128..255, but signal an error for all multibyte characters.
+The input must be encoded text.
+
* Lisp Changes in Emacs 29.1
diff --git a/src/fns.c b/src/fns.c
index 76c76c9..2372133 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -3653,7 +3653,7 @@ base64_encode_1 (const char *from, char *to, ptrdiff_t
length,
c = string_char_and_length ((unsigned char *) from + i, &bytes);
if (CHAR_BYTE8_P (c))
c = CHAR_TO_BYTE8 (c);
- else if (c >= 256)
+ else if (c >= 128)
return -1;
i += bytes;
}
@@ -3696,7 +3696,7 @@ base64_encode_1 (const char *from, char *to, ptrdiff_t
length,
c = string_char_and_length ((unsigned char *) from + i, &bytes);
if (CHAR_BYTE8_P (c))
c = CHAR_TO_BYTE8 (c);
- else if (c >= 256)
+ else if (c >= 128)
return -1;
i += bytes;
}
@@ -3721,7 +3721,7 @@ base64_encode_1 (const char *from, char *to, ptrdiff_t
length,
c = string_char_and_length ((unsigned char *) from + i, &bytes);
if (CHAR_BYTE8_P (c))
c = CHAR_TO_BYTE8 (c);
- else if (c >= 256)
+ else if (c >= 128)
return -1;
i += bytes;
}
diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el
index bec5c03..63423f6 100644
--- a/test/src/fns-tests.el
+++ b/test/src/fns-tests.el
@@ -318,7 +318,10 @@
(should (equal (base64-encode-string "fooba") "Zm9vYmE="))
(should (equal (base64-encode-string "foobar") "Zm9vYmFy"))
(should (equal (base64-encode-string "\x14\xfb\x9c\x03\xd9\x7e") "FPucA9l+"))
- (should (equal (base64-encode-string "\x14\xfb\x9c\x03\xd9\x7f")
"FPucA9l/")))
+ (should (equal (base64-encode-string "\x14\xfb\x9c\x03\xd9\x7f") "FPucA9l/"))
+
+ (should-error (base64-encode-string "Æ’"))
+ (should-error (base64-encode-string "ü")))
(ert-deftest fns-test-base64url-encode-region ()
;; url variant with padding
@@ -360,7 +363,11 @@
(should (equal (fns-tests--with-region base64url-encode-region
(fns-tests--string-repeat "\x14\xfb\x9c\x03\xd9\x7e" 10) t)
(fns-tests--string-repeat "FPucA9l-" 10)))
(should (equal (fns-tests--with-region base64url-encode-region
(fns-tests--string-repeat "\x14\xfb\x9c\x03\xd9\x7f" 10) t)
- (fns-tests--string-repeat "FPucA9l_" 10))))
+ (fns-tests--string-repeat "FPucA9l_" 10)))
+
+ (should-error (fns-tests--with-region base64url-encode-region "Æ’"))
+ (should-error (fns-tests--with-region base64url-encode-region "ü")))
+
(ert-deftest fns-test-base64url-encode-string ()
;; url variant with padding
@@ -394,7 +401,10 @@
(should (equal (base64url-encode-string (fns-tests--string-repeat "fooba"
15) t) (fns-tests--string-repeat "Zm9vYmFmb29iYWZvb2Jh" 5)))
(should (equal (base64url-encode-string (fns-tests--string-repeat "foobar"
15) t) (concat (fns-tests--string-repeat "Zm9vYmFyZm9vYmFy" 7) "Zm9vYmFy")))
(should (equal (base64url-encode-string (fns-tests--string-repeat
"\x14\xfb\x9c\x03\xd9\x7e" 10) t) (fns-tests--string-repeat "FPucA9l-" 10)))
- (should (equal (base64url-encode-string (fns-tests--string-repeat
"\x14\xfb\x9c\x03\xd9\x7f" 10) t) (fns-tests--string-repeat "FPucA9l_" 10))))
+ (should (equal (base64url-encode-string (fns-tests--string-repeat
"\x14\xfb\x9c\x03\xd9\x7f" 10) t) (fns-tests--string-repeat "FPucA9l_" 10)))
+
+ (should-error (base64url-encode-string "Æ’"))
+ (should-error (base64url-encode-string "ü")))
(ert-deftest fns-tests-base64-decode-string ()
;; standard variant RFC2045
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- master a34650a: Fix sloppy base64 acceptance of some multibyte characters,
Mattias Engdegård <=