emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] /srv/bzr/emacs/emacs-23 r99966: Modify the coding system c


From: Kenichi Handa
Subject: [Emacs-diffs] /srv/bzr/emacs/emacs-23 r99966: Modify the coding system compound-text-with-extensions to conform to the spec of Compound Text.
Date: Fri, 06 Aug 2010 19:58:51 +0900
User-agent: Bazaar (2.0.3)

------------------------------------------------------------
revno: 99966 [merge]
committer: Kenichi Handa <address@hidden>
branch nick: emacs-23
timestamp: Fri 2010-08-06 19:58:51 +0900
message:
  Modify the coding system compound-text-with-extensions to conform to the spec 
of Compound Text.
modified:
  lisp/ChangeLog
  lisp/international/mule-conf.el
  lisp/international/mule.el
  lisp/language/cyrillic.el
  src/ChangeLog
  src/charset.c
  src/coding.c
=== modified file 'lisp/ChangeLog'
--- a/lisp/ChangeLog    2010-08-05 21:31:03 +0000
+++ b/lisp/ChangeLog    2010-08-06 08:15:12 +0000
@@ -1,9 +1,37 @@
+2010-08-06  Kenichi Handa  <address@hidden>
+
+       * international/mule.el (define-charset): Store NAME as :base
+       property.
+       (ctext-non-standard-encodings-table): Pay attention to charset
+       aliases.
+       (ctext-pre-write-conversion): Sort ctext-standard-encodings by the
+       current priority.  Force using the designation of the specific
+       charset by adding `charset' text property.  Improve the whole
+       algorithm.
+
 2010-08-05  Juanma Barranquero  <address@hidden>
 
        * emulation/pc-select.el (pc-selection-mode-hook)
        (copy-region-as-kill-nomark, beginning-of-buffer-mark)
        (pc-selection-mode): Fix typos in docstrings.
 
+2010-08-04  Kenichi Handa  <address@hidden>
+
+       * language/cyrillic.el: Don't add "microsoft-cp1251" to
+       ctext-non-standard-encodings-alist here.
+
+       * international/mule.el (ctext-non-standard-encodings-alist): Add
+       "koi8-r" and "microsoft-cp1251".
+       (ctext-standard-encodings): New variable.
+       (ctext-non-standard-encodings-table): List only elements for
+       non-standard encodings.
+       (ctext-pre-write-conversion): Adjusted for the above change.
+       Check ctext-standard-encodings.
+
+       * international/mule-conf.el (compound-text): Doc fix.
+       (ctext-no-compositions): Doc fix.
+       (compound-text-with-extensions): Doc fix.
+
 2010-08-04  Stefan Monnier  <address@hidden>
 
        * simple.el (exchange-dot-and-mark): Mark obsolete, finally.

=== modified file 'lisp/international/mule-conf.el'
--- a/lisp/international/mule-conf.el   2010-01-13 08:35:10 +0000
+++ b/lisp/international/mule-conf.el   2010-08-04 08:06:52 +0000
@@ -1410,9 +1410,10 @@
   :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
 
 (define-coding-system 'compound-text
-  "Compound text based generic encoding for decoding unknown messages.
-
-This coding system does not support extended segments of CTEXT."
+  "Compound text based generic encoding.
+This coding system is an extension of X's \"Compound Text Encoding\".
+It encodes many characters using the normal ISO-2022 designation sequences,
+but it doesn't support extended segments of CTEXT."
   :coding-type 'iso-2022
   :mnemonic ?x
   :charset-list 'iso-2022
@@ -1432,7 +1433,7 @@
 ;; not have a mime-charset property, to prevent it from showing up
 ;; close to the beginning of coding systems ordered by priority.
 (define-coding-system 'ctext-no-compositions
- "Compound text based generic encoding for decoding unknown messages.
+ "Compound text based generic encoding.
 
 Like `compound-text', but does not produce escape sequences for compositions."
   :coding-type 'iso-2022
@@ -1445,8 +1446,9 @@
 (define-coding-system 'compound-text-with-extensions
  "Compound text encoding with ICCCM Extended Segment extensions.
 
-See the variable `ctext-non-standard-encodings-alist' for the
-detail about how extended segments are handled.
+See the variables `ctext-standard-encodings' and
+`ctext-non-standard-encodings-alist' for the detail about how
+extended segments are handled.
 
 This coding system should be used only for X selections.  It is inappropriate
 for decoding and encoding files, process I/O, etc."

=== modified file 'lisp/international/mule.el'
--- a/lisp/international/mule.el        2010-05-03 01:55:01 +0000
+++ b/lisp/international/mule.el        2010-08-06 08:11:19 +0000
@@ -282,6 +282,7 @@
        (plist-put props :short-name (symbol-name name)))
     (or (plist-get props :long-name)
        (plist-put props :long-name (plist-get props :short-name)))
+    (plist-put props :base name)
     ;; We can probably get a worthwhile amount in purespace.
     (setq props
          (mapcar (lambda (elt)
@@ -1408,7 +1409,9 @@
   '(("big5-0" big5 2 big5)
     ("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
     ("ISO8859-15" iso-8859-15 1 latin-iso8859-15)
-    ("gbk-0" gbk 2 chinese-gbk)))
+    ("gbk-0" gbk 2 chinese-gbk)
+    ("koi8-r" koi8-r 1 koi8-r)
+    ("microsoft-cp1251" windows-1251 1 windows-1251)))
   "Alist of non-standard encoding names vs the corresponding usages in CTEXT.
 
 It controls how extended segments of a compound text are handled
@@ -1497,6 +1500,20 @@
       (goto-char (point-min))
       (- (point-max) (point)))))
 
+(defvar ctext-standard-encodings
+  '(ascii latin-jisx0201 katakana-jisx0201
+         latin-iso8859-1 latin-iso8859-2 latin-iso8859-3 latin-iso8859-4
+         greek-iso8859-7 arabic-iso8859-6 hebrew-iso8859-8 cyrillic-iso8859-5
+         latin-iso8859-9
+         chinese-gb2312 japanese-jisx0208 korean-ksc5601)
+  "List of approved standard encodings (i.e. charsets) of X's Compound Text.
+Coding-system `compound-text-with-extensions' encodes a character
+belonging to any of those charsets using the normal ISO2022
+designation sequence unless the current language environment or
+the variable `ctext-non-standard-encodings' decide to use an extended
+segment of CTEXT for that character.  See also the documentation
+of `ctext-non-standard-encodings-alist'.")
+
 ;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from
 ;; `ctext-non-standard-encodings' and a list specified by the key
 ;; `ctext-non-standard-encodings' for the currrent language
@@ -1508,115 +1525,94 @@
 ;; is encoded using UTF-8 encoding extention.
 
 (defun ctext-non-standard-encodings-table ()
-  (let (table)
-    ;; Setup charsets specified by the key
-    ;; `ctext-non-standard-encodings' for the current language
-    ;; environment and in `ctext-non-standard-encodings'.
-    (dolist (encoding (append
-                       (get-language-info current-language-environment
-                                          'ctext-non-standard-encodings)
-                       ctext-non-standard-encodings))
-      (let* ((slot (assoc encoding ctext-non-standard-encodings-alist))
+  (let* ((table (append ctext-non-standard-encodings
+                       (copy-sequence
+                        (get-language-info current-language-environment
+                                           'ctext-non-standard-encodings))))
+        (tail table)
+        elt)
+    (while tail
+      (setq elt (car tail))
+      (let* ((slot (assoc elt ctext-non-standard-encodings-alist))
             (charset (nth 3 slot)))
        (if (charsetp charset)
-           (push (cons charset slot) table)
-         (dolist (cs charset)
-           (push (cons cs slot) table)))))
-
-    ;; Next prepend charsets for ISO2022 designation sequence.
-    (dolist (charset charset-list)
-      (let ((final (plist-get (charset-plist charset) :iso-final-char)))
-       (if (and (integerp final)
-                (>= final #x40) (<= final #x7e)
-                ;; Exclude ascii and chinese-cns11643-X.
-                (not (eq charset 'ascii))
-                (not (string-match "cns11643" (symbol-name charset))))
-           (push (cons charset nil) table))))
-
-    ;; Returned reversed list so that the charsets specified by the
-    ;; key `ctext-non-standard-encodings' for the current language
-    ;; have the highest priority.
-    (nreverse table)))
+           (setcar tail
+                   (cons (plist-get (charset-plist charset) :base) slot))
+         (setcar tail (cons (car charset) slot))
+         (dolist (cs (cdr charset))
+           (setcdr tail
+                   (cons (cons (plist-get (charset-plist (car cs)) :base) slot)
+                         (cdr tail)))
+           (setq tail (cdr tail))))
+       (setq tail (cdr tail))))
+    table))
 
 (defun ctext-pre-write-conversion (from to)
   "Encode characters between FROM and TO as Compound Text w/Extended Segments.
 
-If FROM is a string, or if the current buffer is not the one set up for us
-by `encode-coding-string', generate a new temp buffer, insert the text,
-and convert it in the temporary buffer.  Otherwise, convert in-place."
+If FROM is a string, generate a new temp buffer, insert the text,
+and convert it in the temporary buffer.  Otherwise, convert
+in-place."
   (save-match-data
     ;; Setup a working buffer if necessary.
     (when (stringp from)
       (set-buffer (generate-new-buffer " *temp"))
       (set-buffer-multibyte (multibyte-string-p from))
-      (insert from))
-
-    ;; Now we can encode the whole buffer.
-    (let ((encoding-table (ctext-non-standard-encodings-table))
-         last-coding-system-used
-         last-pos last-encoding-info
-         encoding-info end-pos ch)
-      (goto-char (setq last-pos (point-min)))
-      (setq end-pos (point-marker))
-      (while (re-search-forward "[^\000-\177]+" nil t)
-       ;; Found a sequence of non-ASCII characters.
-       (setq last-pos (match-beginning 0)
-             ch (char-after last-pos)
-             last-encoding-info (catch 'tag
-                                  (dolist (elt encoding-table)
-                                    (if (encode-char ch (car elt))
-                                        (throw 'tag (cdr elt))))
-                                  'utf-8))
-       (set-marker end-pos (match-end 0))
-       (goto-char (1+ last-pos))
-       (catch 'tag
-         (while t
-           (setq encoding-info
-                 (if (< (point) end-pos)
-                     (catch 'tag
-                       (setq ch (following-char))
-                       (dolist (elt encoding-table)
-                         (if (encode-char ch (car elt))
-                             (throw 'tag (cdr elt))))
-                       'utf-8)))
-           (unless (eq last-encoding-info encoding-info)
-             (cond ((consp last-encoding-info)
-                    ;; Encode the previous range using an extended
-                    ;; segment.
-                    (let ((encoding-name (car last-encoding-info))
-                          (coding-system (nth 1 last-encoding-info))
-                          (noctets (nth 2 last-encoding-info))
-                          len)
-                      (encode-coding-region last-pos (point) coding-system)
-                      (setq len (+ (length encoding-name) 1
-                                   (- (point) last-pos)))
-                      ;; According to the spec of CTEXT, it is not
-                      ;; necessary to produce this extra designation
-                      ;; sequence, but some buggy application
-                      ;; (e.g. crxvt-gb) requires it.
-                      (insert "\e(B")
-                      (save-excursion
-                        (goto-char last-pos)
-                        (insert (format "\e%%/%d" noctets))
-                        (insert-byte (+ (/ len 128) 128) 1)
-                        (insert-byte (+ (% len 128) 128) 1)
-                        (insert encoding-name)
-                        (insert 2))))
-                   ((eq last-encoding-info 'utf-8)
-                    ;; Encode the previous range using UTF-8 encoding
-                    ;; extention.
-                    (encode-coding-region last-pos (point) 'mule-utf-8)
-                    (save-excursion
-                      (goto-char last-pos)
-                      (insert "\e%G"))
-                    (insert "\e%@")))
-             (setq last-pos (point)
-                   last-encoding-info encoding-info))
-           (if (< (point) end-pos)
-               (forward-char 1)
-             (throw 'tag nil)))))
-      (set-marker end-pos nil)
-      (goto-char (point-min))))
+      (insert from)
+      (setq from 1 to (point-max)))
+    (save-restriction
+      (narrow-to-region from to)
+      (goto-char from)
+      (let ((encoding-table (ctext-non-standard-encodings-table))
+           (charset-list (sort-charsets
+                          (copy-sequence ctext-standard-encodings)))
+           (end-pos (make-marker))
+           last-coding-system-used
+           last-pos charset encoding-info)
+       (dolist (elt encoding-table)
+         (push (car elt) charset-list))
+       (setq end-pos (point-marker))
+       (while (re-search-forward "[^\0-\177]+" nil t)
+         ;; Found a sequence of non-ASCII characters.
+         (set-marker end-pos (match-end 0))
+         (goto-char (match-beginning 0))
+         (setq last-pos (point)
+               charset (char-charset (following-char) charset-list))
+         (forward-char 1)
+         (while (and (< (point) end-pos)
+                     (eq charset (char-charset (following-char) charset-list)))
+           (forward-char 1))
+         (if charset
+             (if (setq encoding-info (cdr (assq charset encoding-table)))
+                 ;; Encode this range using an extended segment.
+                 (let ((encoding-name (car encoding-info))
+                       (coding-system (nth 1 encoding-info))
+                       (noctets (nth 2 encoding-info))
+                       len)
+                   (encode-coding-region last-pos (point) coding-system)
+                   (setq len (+ (length encoding-name) 1
+                                (- (point) last-pos)))
+                   ;; According to the spec of CTEXT, it is not
+                   ;; necessary to produce this extra designation
+                   ;; sequence, but some buggy application
+                   ;; (e.g. crxvt-gb) requires it.
+                   (insert "\e(B")
+                   (save-excursion
+                     (goto-char last-pos)
+                     (insert (format "\e%%/%d" noctets))
+                     (insert-byte (+ (/ len 128) 128) 1)
+                     (insert-byte (+ (% len 128) 128) 1)
+                     (insert encoding-name)
+                     (insert 2)))
+               ;; Encode this range as characters in CHARSET.
+               (put-text-property last-pos (point) 'charset charset))
+           ;; Encode this range using UTF-8 encoding extention.
+           (encode-coding-region last-pos (point) 'mule-utf-8)
+           (save-excursion
+             (goto-char last-pos)
+             (insert "\e%G"))
+           (insert "\e%@")))
+       (goto-char (point-min)))))
   ;; Must return nil, as build_annotations_2 expects that.
   nil)
 

=== modified file 'lisp/language/cyrillic.el'
--- a/lisp/language/cyrillic.el 2010-01-13 08:35:10 +0000
+++ b/lisp/language/cyrillic.el 2010-08-04 08:06:52 +0000
@@ -239,13 +239,6 @@
           (documentation . "Support for Tajik using KOI8-T."))
  '("Cyrillic"))
 
-(let ((elt `("microsoft-cp1251" windows-1251 1
-            ,(get 'encode-windows-1251 'translation-table)))
-      (slot (assoc "microsoft-cp1251" ctext-non-standard-encodings-alist)))
-  (if slot
-      (setcdr slot (cdr elt))
-    (push elt ctext-non-standard-encodings-alist)))
-
 (set-language-info-alist
  "Bulgarian" `((coding-system windows-1251)
               (coding-priority windows-1251)

=== modified file 'src/ChangeLog'
--- a/src/ChangeLog     2010-08-01 04:28:09 +0000
+++ b/src/ChangeLog     2010-08-06 08:11:19 +0000
@@ -1,3 +1,14 @@
+2010-08-06  Kenichi Handa  <address@hidden>
+
+       * charset.c: Include <stdlib.h>
+       (struct charset_sort_data): New struct.
+       (charset_compare): New function.
+       (Fsort_charsets): New funciton.
+       (syms_of_charset): Declare Fsort_charsets as a Lisp function.
+
+       * coding.c (decode_coding_iso_2022): Fix checking of dimension
+       number in CTEXT extended segment.
+
 2010-08-01  Juanma Barranquero  <address@hidden>
 
        * w32fns.c (syms_of_w32fns) <x-max-tooltip-size>: Fix typo in docstring.

=== modified file 'src/charset.c'
--- a/src/charset.c     2010-02-28 14:19:31 +0000
+++ b/src/charset.c     2010-08-06 08:11:19 +0000
@@ -28,6 +28,7 @@
 #include <config.h>
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <unistd.h>
 #include <ctype.h>
 #include <sys/types.h>
@@ -2139,23 +2140,22 @@
     charset = CHAR_CHARSET (XINT (ch));
   else
     {
-      Lisp_Object charset_list;
-
       if (CONSP (restriction))
        {
-         for (charset_list = Qnil; CONSP (restriction);
-              restriction = XCDR (restriction))
+         int c = XFASTINT (ch);
+
+         for (; CONSP (restriction); restriction = XCDR (restriction))
            {
-             int id;
+             struct charset *charset;
 
-             CHECK_CHARSET_GET_ID (XCAR (restriction), id);
-             charset_list = Fcons (make_number (id), charset_list);
+             CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset);
+             if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
+               return XCAR (restriction);
            }
-         charset_list = Fnreverse (charset_list);
+         return Qnil;
        }
-      else
-       charset_list = coding_system_charset_list (restriction);
-      charset = char_charset (XINT (ch), charset_list, NULL);
+      restriction = coding_system_charset_list (restriction);
+      charset = char_charset (XINT (ch), restriction, NULL);
       if (! charset)
        return Qnil;
     }
@@ -2312,6 +2312,69 @@
   return make_number (id);
 }
 
+struct charset_sort_data
+{
+  Lisp_Object charset;
+  int id;
+  int priority;
+};
+
+static int
+charset_compare (const void *d1, const void *d2)
+{
+  const struct charset_sort_data *data1 = d1, *data2 = d2;
+  return (data1->priority - data2->priority);
+}
+
+DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
+       doc: /* Sort charset list CHARSETS by a priority of each charset.
+Return the sorted list.  CHARSETS is modified by side effects.
+See also `charset-priority-list' and `set-charset-priority'.  */)
+     (Lisp_Object charsets)
+{
+  Lisp_Object len = Flength (charsets);
+  int n = XFASTINT (len), i, j, done;
+  Lisp_Object tail, elt, attrs;
+  struct charset_sort_data *sort_data;
+  int id, min_id, max_id;
+  USE_SAFE_ALLOCA;
+
+  if (n == 0)
+    return Qnil;
+  SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
+  for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
+    {
+      elt = XCAR (tail);
+      CHECK_CHARSET_GET_ATTR (elt, attrs);
+      sort_data[i].charset = elt;
+      sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
+      if (i == 0)
+       min_id = max_id = id;
+      else if (id < min_id)
+       min_id = id;
+      else if (id > max_id)
+       max_id = id;
+    }
+  for (done = 0, tail = Vcharset_ordered_list, i = 0;
+       done < n && CONSP (tail); tail = XCDR (tail), i++)
+    {
+      elt = XCAR (tail);
+      id = XFASTINT (elt);
+      if (id >= min_id && id <= max_id)
+       for (j = 0; j < n; j++)
+         if (sort_data[j].id == id)
+           {
+             sort_data[j].priority = i;
+             done++;
+           }
+    }
+  qsort (sort_data, n, sizeof *sort_data, charset_compare);
+  for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
+    XSETCAR (tail, sort_data[i].charset);
+  SAFE_FREE ();
+  return charsets;
+}
+
 
 void
 init_charset ()
@@ -2414,6 +2477,7 @@
   defsubr (&Scharset_priority_list);
   defsubr (&Sset_charset_priority);
   defsubr (&Scharset_id_internal);
+  defsubr (&Ssort_charsets);
 
   DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
               doc: /* *List of directories to search for charset map files.  
*/);

=== modified file 'src/coding.c'
--- a/src/coding.c      2010-03-16 05:48:41 +0000
+++ b/src/coding.c      2010-08-06 08:11:19 +0000
@@ -3935,7 +3935,7 @@
                  int size;
 
                  ONE_MORE_BYTE (dim);
-                 if (dim < 0 || dim > 4)
+                 if (dim < '0' || dim > '4')
                    goto invalid_code;
                  ONE_MORE_BYTE (M);
                  if (M < 128)


reply via email to

[Prev in Thread] Current Thread [Next in Thread]