emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] trunk r113220: Add the coding system prefer-utf-8.


From: Kenichi Handa
Subject: [Emacs-diffs] trunk r113220: Add the coding system prefer-utf-8.
Date: Sat, 29 Jun 2013 03:46:45 +0000
User-agent: Bazaar (2.6b2)

------------------------------------------------------------
revno: 113220 [merge]
revision-id: address@hidden
parent: address@hidden
parent: address@hidden
committer: K. Handa <address@hidden>
branch nick: trunk
timestamp: Sat 2013-06-29 12:46:39 +0900
message:
  Add the coding system prefer-utf-8.
modified:
  lisp/ChangeLog                 changelog-20091113204419-o5vbwnq5f7feedwu-1432
  lisp/international/mule-cmds.el 
mulecmds.el-20091113204419-o5vbwnq5f7feedwu-1043
  lisp/international/mule-conf.el 
muleconf.el-20091113204419-o5vbwnq5f7feedwu-1081
  lisp/international/mule.el     mule.el-20091113204419-o5vbwnq5f7feedwu-1046
  src/ChangeLog                  changelog-20091113204419-o5vbwnq5f7feedwu-1438
  src/coding.c                   coding.c-20091113204419-o5vbwnq5f7feedwu-1077
  src/coding.h                   coding.h-20091113204419-o5vbwnq5f7feedwu-1078
  test/ChangeLog                 changelog-20091113204419-o5vbwnq5f7feedwu-8588
  test/automated/decoder-tests.el 
decodertests.el-20130523111713-8bd70mdz8ezekunl-1
=== modified file 'lisp/ChangeLog'
--- a/lisp/ChangeLog    2013-06-28 07:57:49 +0000
+++ b/lisp/ChangeLog    2013-06-29 03:31:15 +0000
@@ -1,3 +1,20 @@
+2013-06-28  Kenichi Handa  <address@hidden>
+
+       * international/mule.el (define-coding-system): New coding system
+       properties :inhibit-null-byte-detection,
+       :inhibit-iso-escape-detection, and :prefer-utf-8.
+       (set-buffer-file-coding-system): If :charset-list property of
+       CODING-SYSTEM is `emacs', do not check if CODING-SYSTEM is
+       appropriate for setting.
+
+       * international/mule-cmds.el (select-safe-coding-system): If
+       DEFAULT-CODING-SYSTEM is prefer-utf-8 and the buffer contains
+       multibyte characters, return utf-8 (or one of it's siblings).
+
+       * international/mule-conf.el (prefer-utf-8): New coding system.
+       (file-coding-system-alist): Use prefer-utf-8 as default for Elisp
+       files.
+
 2013-06-28  Ivan Kanis  <address@hidden>
 
        * net/shr.el (shr-render-region): New function.

=== modified file 'lisp/international/mule-cmds.el'
--- a/lisp/international/mule-cmds.el   2013-03-12 02:25:36 +0000
+++ b/lisp/international/mule-cmds.el   2013-06-28 14:42:55 +0000
@@ -972,7 +972,7 @@
 
        ;; Classify the defaults into safe, rejected, and unsafe.
        (dolist (elt default-coding-system)
-         (if (or (eq (car codings) 'undecided)
+         (if (or (eq (coding-system-type (car elt)) 'undecided)
                  (memq (cdr elt) codings))
              (if (and (functionp accept-default-p)
                       (not (funcall accept-default-p (cdr elt))))
@@ -1029,6 +1029,11 @@
              (error "Save aborted"))))
       (when (and tick (/= tick (buffer-chars-modified-tick)))
        (error "Canceled because the buffer was modified"))
+      (if (and (eq (coding-system-type coding-system) 'undecided)
+              (coding-system-get coding-system :prefer-utf-8)
+              (< (- to from) (- (position-bytes to) (position-bytes from))))
+         (setq coding-system
+               (coding-system-change-text-conversion coding-system 'utf-8)))
       coding-system)))
 
 (setq select-safe-coding-system-function 'select-safe-coding-system)

=== modified file 'lisp/international/mule-conf.el'
--- a/lisp/international/mule-conf.el   2013-06-11 12:51:18 +0000
+++ b/lisp/international/mule-conf.el   2013-06-28 14:41:14 +0000
@@ -1225,6 +1225,18 @@
 (define-coding-system-alias 'dos 'undecided-dos)
 (define-coding-system-alias 'mac 'undecided-mac)
 
+(define-coding-system 'prefer-utf-8
+  "Like `undecided' but prefer UTF-8 when appropriate.
+On decoding, if the source contains 8-bit codes and they all
+are valid UTF-8 sequences, detect the source as UTF-8 encoding
+regardless of the coding priority.
+On encoding, if the source contains non-ASCII characters, encode them
+by UTF-8."
+  :coding-type 'undecided
+  :mnemonic ?-
+  :charset-list '(emacs)
+  :prefer-utf-8 t)
+
 (define-coding-system 'raw-text
   "Raw text, which means text contains random 8-bit codes.
 Encoding text with this coding system produces the actual byte
@@ -1508,7 +1520,7 @@
 (setq file-coding-system-alist
       (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
       '(("\\.elc\\'" . utf-8-emacs)
-       ("\\.el\\'" . utf-8)
+       ("\\.el\\'" . prefer-utf-8)
        ("\\.utf\\(-8\\)?\\'" . utf-8)
        ("\\.xml\\'" . xml-find-file-coding-system)
        ;; We use raw-text for reading loaddefs.el so that if it

=== modified file 'lisp/international/mule.el'
--- a/lisp/international/mule.el        2013-06-12 10:31:24 +0000
+++ b/lisp/international/mule.el        2013-06-28 14:53:44 +0000
@@ -732,7 +732,26 @@
 
 VALUE is a symbol representing the registered CCL program used for
 encoding.  This attribute has a meaning only when `:coding-type' is
-`ccl'."
+`ccl'.
+
+:inhibit-null-byte-detection
+
+VALUE non-nil means Emacs ignore null bytes on code detection.
+See the variable `inhibit-null-byte-detection'.  This attribute
+has a meaning only when `:coding-type' is `undecided'.
+
+:inhibit-iso-escape-detection
+
+VALUE non-nil means Emacs ignores ISO-2022 escape sequences on
+code detection.  See the variable `inhibit-iso-escape-detection'.
+This attribute has a meaning only when `:coding-type' is
+`undecided'.
+
+:prefer-utf-8
+
+VALUE non-nil means Emacs prefers UTF-8 on code detection for
+non-ASCII files.  This attribute has a meaning only when
+`:coding-type' is `undecided'."
   (let* ((common-attrs (mapcar 'list
                               '(:mnemonic
                                 :coding-type
@@ -761,7 +780,11 @@
                                   ((eq coding-type 'ccl)
                                    '(:ccl-decoder
                                      :ccl-encoder
-                                     :valids))))))
+                                     :valids))
+                                  ((eq coding-type 'undecided)
+                                   '(:inhibit-null-byte-detection
+                                     :inhibit-iso-escape-detection
+                                     :prefer-utf-8))))))
 
     (dolist (slot common-attrs)
       (setcdr slot (plist-get props (car slot))))
@@ -1236,7 +1259,9 @@
   (if (and coding-system buffer-file-coding-system (null force))
       (setq coding-system
            (merge-coding-systems coding-system buffer-file-coding-system)))
-  (when (called-interactively-p 'interactive)
+  (when (and (called-interactively-p 'interactive)
+            (not (memq 'emacs (coding-system-get coding-system
+                                                 :charset-list))))
     ;; Check whether save would succeed, and jump to the offending char(s)
     ;; if not.
     (let ((css (find-coding-systems-region (point-min) (point-max))))

=== modified file 'src/ChangeLog'
--- a/src/ChangeLog     2013-06-28 05:48:53 +0000
+++ b/src/ChangeLog     2013-06-29 03:31:15 +0000
@@ -1,3 +1,22 @@
+2013-06-28  Kenichi Handa  <address@hidden>
+
+       * coding.h (define_coding_undecided_arg_index): New enum.
+       (coding_attr_index): New members
+       coding_attr_undecided_inhibit_null_byte_detection,
+       coding_attr_undecided_inhibit_iso_escape_detection,
+       coding_attr_undecided_prefer_utf_8.
+       (undecided_spec): New struct.
+       (struct coding_system): New member `undecied' of the member
+       `spec'.
+
+       * coding.c (setup_coding_system): Handle CODING->spec.undecided.
+       (detect_coding): Likewise.
+       (detect_coding_system): Likewise.
+       (Fdefine_coding_system_internal): New coding system properties
+       :inhibit-null-byte-detection, :inhibit-iso-escape-detection, and
+       :prefer-utf-8.
+       (syms_of_coding): Adjusted for coding_arg_undecided_max.
+
 2013-06-28  Paul Eggert  <address@hidden>
 
        * image.c (x_from_xcolors): Remove unused local.

=== modified file 'src/coding.c'
--- a/src/coding.c      2013-06-17 06:03:19 +0000
+++ b/src/coding.c      2013-06-28 15:00:17 +0000
@@ -5705,6 +5705,20 @@
       coding->decoder = decode_coding_raw_text;
       coding->encoder = encode_coding_raw_text;
       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
+      coding->spec.undecided.inhibit_nbd
+       = (NILP (AREF (attrs, 
coding_attr_undecided_inhibit_null_byte_detection))
+          ? -1
+          : EQ (AREF (attrs, 
coding_attr_undecided_inhibit_null_byte_detection), Qt)
+          ? 1
+          : 0);
+      coding->spec.undecided.inhibit_ied
+       = (NILP (AREF (attrs, 
coding_attr_undecided_inhibit_iso_escape_detection))
+          ? -1
+          : EQ (AREF (attrs, 
coding_attr_undecided_inhibit_iso_escape_detection), Qt)
+          ? 1
+          : 0);
+      coding->spec.undecided.prefer_utf_8
+       = ! NILP (AREF (attrs, coding_attr_undecided_prefer_utf_8));
     }
   else if (EQ (coding_type, Qiso_2022))
     {
@@ -6462,6 +6476,16 @@
       int c, i;
       struct coding_detection_info detect_info;
       bool null_byte_found = 0, eight_bit_found = 0;
+      int inhibit_nbd          /* null byte detection */
+       = (coding->spec.undecided.inhibit_nbd > 0
+          | (coding->spec.undecided.inhibit_nbd == 0
+             & inhibit_null_byte_detection));
+      int inhibit_ied          /* iso escape detection */
+       = (coding->spec.undecided.inhibit_ied > 0
+          | (coding->spec.undecided.inhibit_ied == 0
+             & inhibit_iso_escape_detection));
+      int prefer_utf_8
+       = coding->spec.undecided.prefer_utf_8;
 
       coding->head_ascii = 0;
       detect_info.checked = detect_info.found = detect_info.rejected = 0;
@@ -6477,7 +6501,7 @@
          else if (c < 0x20)
            {
              if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
-                 && ! inhibit_iso_escape_detection
+                 && ! inhibit_ied
                  && ! detect_info.checked)
                {
                  if (detect_coding_iso_2022 (coding, &detect_info))
@@ -6496,7 +6520,7 @@
                      break;
                    }
                }
-             else if (! c && !inhibit_null_byte_detection)
+             else if (! c && !inhibit_nbd)
                {
                  null_byte_found = 1;
                  if (eight_bit_found)
@@ -6553,6 +6577,12 @@
                  detect_info.checked |= ~CATEGORY_MASK_UTF_16;
                  detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
                }
+             else if (prefer_utf_8
+                      && detect_coding_utf_8 (coding, &detect_info))
+               {
+                 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
+                 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
+               }
              for (i = 0; i < coding_category_raw_text; i++)
                {
                  category = coding_priorities[i];
@@ -8514,6 +8544,17 @@
       enum coding_category category IF_LINT (= 0);
       struct coding_system *this IF_LINT (= NULL);
       int c, i;
+      int inhibit_nbd          /* null byte detection */
+       = (coding.spec.undecided.inhibit_nbd > 0
+          | (coding.spec.undecided.inhibit_nbd == 0
+             & inhibit_null_byte_detection));
+      int inhibit_ied          /* iso escape detection */
+       = (coding.spec.undecided.inhibit_ied > 0
+          | (coding.spec.undecided.inhibit_ied == 0
+             & inhibit_iso_escape_detection));
+      int prefer_utf_8
+       = coding.spec.undecided.prefer_utf_8;
+
 
       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
       for (; src < src_end; src++)
@@ -8528,7 +8569,7 @@
          else if (c < 0x20)
            {
              if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
-                 && ! inhibit_iso_escape_detection
+                 && ! inhibit_ied
                  && ! detect_info.checked)
                {
                  if (detect_coding_iso_2022 (&coding, &detect_info))
@@ -8547,7 +8588,7 @@
                      break;
                    }
                }
-             else if (! c && !inhibit_null_byte_detection)
+             else if (! c && !inhibit_nbd)
                {
                  null_byte_found = 1;
                  if (eight_bit_found)
@@ -8580,6 +8621,12 @@
                  detect_info.checked |= ~CATEGORY_MASK_UTF_16;
                  detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
                }
+             else if (prefer_utf_8
+                      && detect_coding_utf_8 (&coding, &detect_info))
+               {
+                 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
+                 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
+               }
              for (i = 0; i < coding_category_raw_text; i++)
                {
                  category = coding_priorities[i];
@@ -8918,8 +8965,7 @@
        Lisp_Object attrs;
 
        attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
-       if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
-           && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
+       if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)))
          {
            ASET (attrs, coding_attr_trans_tbl,
                  get_translation_table (attrs, 1, NULL));
@@ -10333,7 +10379,17 @@
                  : coding_category_utf_8_sig);
     }
   else if (EQ (coding_type, Qundecided))
-    category = coding_category_undecided;
+    {
+      if (nargs < coding_arg_undecided_max)
+       goto short_args;
+      ASET (attrs, coding_attr_undecided_inhibit_null_byte_detection,
+           args[coding_arg_undecided_inhibit_null_byte_detection]);
+      ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection,
+           args[coding_arg_undecided_inhibit_iso_escape_detection]);
+      ASET (attrs, coding_attr_undecided_prefer_utf_8,
+           args[coding_arg_undecided_prefer_utf_8]);
+      category = coding_category_undecided;
+    }
   else
     error ("Invalid coding system type: %s",
           SDATA (SYMBOL_NAME (coding_type)));
@@ -11121,11 +11177,11 @@
     Vtranslation_table_for_input = Qnil;
 
   {
-    Lisp_Object args[coding_arg_max];
+    Lisp_Object args[coding_arg_undecided_max];
     Lisp_Object plist[16];
     int i;
 
-    for (i = 0; i < coding_arg_max; i++)
+    for (i = 0; i < coding_arg_undecided_max; i++)
       args[i] = Qnil;
 
     plist[0] = intern_c_string (":name");
@@ -11162,7 +11218,7 @@
     plist[13] = build_pure_c_string ("No conversion on encoding, automatic 
conversion on decoding.");
     plist[15] = args[coding_arg_eol_type] = Qnil;
     args[coding_arg_plist] = Flist (16, plist);
-    Fdefine_coding_system_internal (coding_arg_max, args);
+    Fdefine_coding_system_internal (coding_arg_undecided_max, args);
   }
 
   setup_coding_system (Qno_conversion, &safe_terminal_coding);

=== modified file 'src/coding.h'
--- a/src/coding.h      2013-05-22 14:53:21 +0000
+++ b/src/coding.h      2013-06-28 14:59:33 +0000
@@ -76,6 +76,14 @@
     coding_arg_ccl_max
   };
 
+enum define_coding_undecided_arg_index
+  {
+    coding_arg_undecided_inhibit_null_byte_detection = coding_arg_max,
+    coding_arg_undecided_inhibit_iso_escape_detection,
+    coding_arg_undecided_prefer_utf_8,
+    coding_arg_undecided_max
+  };
+
 /* Hash table for all coding systems.  Keys are coding system symbols
    and values are spec vectors of the corresponding coding system.  A
    spec vector has the form [ ATTRS ALIASES EOL-TYPE ].  ATTRS is a
@@ -158,6 +166,10 @@
 
     coding_attr_emacs_mule_full,
 
+    coding_attr_undecided_inhibit_null_byte_detection,
+    coding_attr_undecided_inhibit_iso_escape_detection,
+    coding_attr_undecided_prefer_utf_8,
+
     coding_attr_last_index
   };
 
@@ -368,6 +380,13 @@
 
 struct ccl_spec;
 
+struct undecided_spec
+{
+  int inhibit_nbd;             /* nbd: null byte detection */
+  int inhibit_ied;             /* ied: iso escape detection */
+  int prefer_utf_8;
+};
+
 enum utf_bom_type
   {
     utf_detect_bom,
@@ -425,6 +444,7 @@
       struct utf_16_spec utf_16;
       enum utf_bom_type utf_8_bom;
       struct emacs_mule_spec emacs_mule;
+      struct undecided_spec undecided;
     } spec;
 
   int max_charset_id;

=== modified file 'test/ChangeLog'
--- a/test/ChangeLog    2013-06-27 09:51:55 +0000
+++ b/test/ChangeLog    2013-06-29 03:31:15 +0000
@@ -1,3 +1,13 @@
+2013-06-28  Kenichi Handa  <address@hidden>
+
+       * automated/decoder-tests.el (decoder-tests-gen-file): New arg FILE.
+       (decoder-tests-ao-gen-file): Renamed from decoder-tests-filename.
+       Callers changed.
+       (decoder-tests-filename): New function.
+       (decoder-tests-prefer-utf-8-read)
+       (decoder-tests-prefer-utf-8-write): New function.
+       (ert-test-decoder-prefer-utf-8): New test.
+
 2013-06-27  Dmitry Gutov  <address@hidden>
 
        * automated/package-x-test.el: Change the commentary.

=== modified file 'test/automated/decoder-tests.el'
--- a/test/automated/decoder-tests.el   2013-05-23 11:18:43 +0000
+++ b/test/automated/decoder-tests.el   2013-06-28 14:39:08 +0000
@@ -23,12 +23,14 @@
 
 (require 'ert)
 
-;;; Check ASCII optimizing decoder
-
 ;; Directory to hold test data files.
 (defvar decoder-tests-workdir
   (expand-file-name "decoder-tests" temporary-file-directory))
 
+;; Remove all generated test files.
+(defun decoder-tests-remove-files ()
+  (delete-directory decoder-tests-workdir t))
+
 ;; Return the contents (specified by CONTENT-TYPE; ascii, latin, or
 ;; binary) of a test file.
 (defun decoder-tests-file-contents (content-type)
@@ -43,25 +45,16 @@
          (t
           (error "Invalid file content type: %s" content-type)))))
 
-;; Return the name of test file whose contents specified by
-;; CONTENT-TYPE and whose encoding specified by CODING-SYSTEM.
-(defun decoder-tests-filename (content-type coding-system)
-  (expand-file-name (format "%s-%s" content-type coding-system)
-                   decoder-tests-workdir))
-
-;; Generate a test file whose contents specified by CONTENT-TYPE and
+;; Generate FILE with CONTENTS encoded by CODING-SYSTEM.
 ;; whose encoding specified by CODING-SYSTEM.
-(defun decoder-tests-gen-file (content-type coding-system)
+(defun decoder-tests-gen-file (file contents coding-system)
   (or (file-directory-p decoder-tests-workdir)
       (mkdir decoder-tests-workdir t))
-  (let ((file (decoder-tests-filename content-type coding-system)))
-    (with-temp-file file
-      (set-buffer-file-coding-system coding-system)
-      (insert (decoder-tests-file-contents content-type)))))
-
-;; Remove all generated test files.
-(defun decoder-tests-remove-files ()
-  (delete-directory decoder-tests-workdir t))
+  (setq file (expand-file-name file decoder-tests-workdir))
+  (with-temp-file file
+    (set-buffer-file-coding-system coding-system)
+    (insert contents))
+  file)
 
 ;;; The following three functions are filters for contents of a test
 ;;; file.
@@ -96,6 +89,26 @@
 (defun decoder-tests-add-bom (str)
   (concat "\xfeff" str))
 
+;; Return the name of test file whose contents specified by
+;; CONTENT-TYPE and whose encoding specified by CODING-SYSTEM.
+(defun decoder-tests-filename (content-type coding-system &optional ext)
+  (if ext
+      (expand-file-name (format "%s-%s.%s" content-type coding-system ext)
+                       decoder-tests-workdir)
+    (expand-file-name (format "%s-%s" content-type coding-system)
+                     decoder-tests-workdir)))
+
+
+;;; Check ASCII optimizing decoder
+
+;; Generate a test file whose contents specified by CONTENT-TYPE and
+;; whose encoding specified by CODING-SYSTEM.
+(defun decoder-tests-ao-gen-file (content-type coding-system)
+  (let ((file (decoder-tests-filename content-type coding-system)))
+    (decoder-tests-gen-file file 
+                           (decoder-tests-file-contents content-type)
+                           coding-system)))
+
 ;; Test the decoding of a file whose contents and encoding are
 ;; specified by CONTENT-TYPE and WRITE-CODING.  The test passes if the
 ;; file is read by READ-CODING and detected as DETECTED-CODING and the
@@ -127,7 +140,7 @@
   (unwind-protect
       (progn
        (dolist (eol-type '(unix dos mac))
-         (decoder-tests-gen-file 'ascii eol-type))
+         (decoder-tests-ao-gen-file 'ascii eol-type))
        (should-not (decoder-tests 'ascii 'unix 'undecided 'unix))
        (should-not (decoder-tests 'ascii 'dos 'undecided 'dos))
        (should-not (decoder-tests 'ascii 'dos 'dos 'dos))
@@ -147,8 +160,8 @@
       (progn
        (dolist (coding '("utf-8" "utf-8-with-signature"))
          (dolist (eol-type '("unix" "dos" "mac"))
-           (decoder-tests-gen-file 'latin
-                                   (intern (concat coding "-" eol-type)))))
+           (decoder-tests-ao-gen-file 'latin
+                                      (intern (concat coding "-" eol-type)))))
        (should-not (decoder-tests 'latin 'utf-8-unix 'undecided 'utf-8-unix))
        (should-not (decoder-tests 'latin 'utf-8-unix 'utf-8-unix 'utf-8-unix))
        (should-not (decoder-tests 'latin 'utf-8-dos 'undecided 'utf-8-dos))
@@ -177,8 +190,8 @@
   (unwind-protect
       (progn
        (dolist (eol-type '("unix" "dos" "mac"))
-         (decoder-tests-gen-file 'binary
-                                 (intern (concat "raw-text" "-" eol-type))))
+         (decoder-tests-ao-gen-file 'binary
+                                    (intern (concat "raw-text" "-" eol-type))))
        (should-not (decoder-tests 'binary 'raw-text-unix 'undecided
                                   'raw-text-unix))
        (should-not (decoder-tests 'binary 'raw-text-dos 'undecided
@@ -193,6 +206,64 @@
                                   'raw-text-mac 'decoder-tests-lf-to-lflf)))
     (decoder-tests-remove-files)))
 
+
+;;; Check the coding system `prefer-utf-8'.
+
+;; Read FILE.  Check if the encoding was detected as DETECT.  If
+;; PREFER is non-nil, prefer that coding system before reading.
+
+(defun decoder-tests-prefer-utf-8-read (file detect prefer)
+  (if prefer
+      (prefer-coding-system prefer))
+  (with-temp-buffer
+    (insert-file-contents file)
+    (if (eq buffer-file-coding-system detect)
+       nil
+      (format "Invalid detection: %s" buffer-file-coding-system))))
+
+;; Read FILE, modify it, and write it.  Check if the coding system
+;; used for writing was CODING.  If CODING-TAG is non-nil, insert
+;; coding tag with it before writing.  If STR is non-nil, insert it
+;; before writing.
+
+(defun decoder-tests-prefer-utf-8-write (file coding-tag coding
+                                             &optional str)
+  (with-temp-buffer
+    (insert-file-contents file)
+    (goto-char (point-min))
+    (if coding-tag
+       (insert (format ";; -*- coding: %s; -*-\n" coding-tag))
+      (insert ";;\n"))
+    (if str
+       (insert str))
+    (write-file (decoder-tests-filename 'test 'test "el"))
+    (if (coding-system-equal buffer-file-coding-system coding)
+       nil
+      (format "Incorrect encoding: %s" last-coding-system-used))))
+
+(ert-deftest ert-test-decoder-prefer-utf-8 ()
+  (unwind-protect
+      (let ((ascii (decoder-tests-gen-file "ascii.el"
+                                          (decoder-tests-file-contents 'ascii)
+                                          'unix))
+           (latin (decoder-tests-gen-file "utf-8.el"
+                                          (decoder-tests-file-contents 'latin)
+                                          'utf-8)))
+       (should-not (decoder-tests-prefer-utf-8-read
+                    ascii 'prefer-utf-8-unix nil))
+       (should-not (decoder-tests-prefer-utf-8-read
+                    latin 'utf-8-unix nil))
+       (should-not (decoder-tests-prefer-utf-8-read
+                    latin 'utf-8-unix 'iso-8859-1))
+       (should-not (decoder-tests-prefer-utf-8-read
+                    latin 'utf-8-unix 'sjis))
+       (should-not (decoder-tests-prefer-utf-8-write
+                    ascii nil 'prefer-utf-8-unix))
+       (should-not (decoder-tests-prefer-utf-8-write
+                    ascii 'iso-8859-1 'iso-8859-1-unix))
+       (should-not (decoder-tests-prefer-utf-8-write
+                    ascii nil 'utf-8-unix "À")))
+    (decoder-tests-remove-files)))
 
 
 ;;; The following is for benchmark testing of the new optimized


reply via email to

[Prev in Thread] Current Thread [Next in Thread]