[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] /srv/bzr/emacs/emacs-23 r99966: Modify the coding system c
From: |
Kenichi Handa |
Subject: |
[Emacs-diffs] /srv/bzr/emacs/emacs-23 r99966: Modify the coding system compound-text-with-extensions to conform to the spec of Compound Text. |
Date: |
Fri, 06 Aug 2010 19:58:51 +0900 |
User-agent: |
Bazaar (2.0.3) |
------------------------------------------------------------
revno: 99966 [merge]
committer: Kenichi Handa <address@hidden>
branch nick: emacs-23
timestamp: Fri 2010-08-06 19:58:51 +0900
message:
Modify the coding system compound-text-with-extensions to conform to the spec
of Compound Text.
modified:
lisp/ChangeLog
lisp/international/mule-conf.el
lisp/international/mule.el
lisp/language/cyrillic.el
src/ChangeLog
src/charset.c
src/coding.c
=== modified file 'lisp/ChangeLog'
--- a/lisp/ChangeLog 2010-08-05 21:31:03 +0000
+++ b/lisp/ChangeLog 2010-08-06 08:15:12 +0000
@@ -1,9 +1,37 @@
+2010-08-06 Kenichi Handa <address@hidden>
+
+ * international/mule.el (define-charset): Store NAME as :base
+ property.
+ (ctext-non-standard-encodings-table): Pay attention to charset
+ aliases.
+ (ctext-pre-write-conversion): Sort ctext-standard-encodings by the
+ current priority. Force using the designation of the specific
+ charset by adding `charset' text property. Improve the whole
+ algorithm.
+
2010-08-05 Juanma Barranquero <address@hidden>
* emulation/pc-select.el (pc-selection-mode-hook)
(copy-region-as-kill-nomark, beginning-of-buffer-mark)
(pc-selection-mode): Fix typos in docstrings.
+2010-08-04 Kenichi Handa <address@hidden>
+
+ * language/cyrillic.el: Don't add "microsoft-cp1251" to
+ ctext-non-standard-encodings-alist here.
+
+ * international/mule.el (ctext-non-standard-encodings-alist): Add
+ "koi8-r" and "microsoft-cp1251".
+ (ctext-standard-encodings): New variable.
+ (ctext-non-standard-encodings-table): List only elements for
+ non-standard encodings.
+ (ctext-pre-write-conversion): Adjusted for the above change.
+ Check ctext-standard-encodings.
+
+ * international/mule-conf.el (compound-text): Doc fix.
+ (ctext-no-compositions): Doc fix.
+ (compound-text-with-extensions): Doc fix.
+
2010-08-04 Stefan Monnier <address@hidden>
* simple.el (exchange-dot-and-mark): Mark obsolete, finally.
=== modified file 'lisp/international/mule-conf.el'
--- a/lisp/international/mule-conf.el 2010-01-13 08:35:10 +0000
+++ b/lisp/international/mule-conf.el 2010-08-04 08:06:52 +0000
@@ -1410,9 +1410,10 @@
:flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
(define-coding-system 'compound-text
- "Compound text based generic encoding for decoding unknown messages.
-
-This coding system does not support extended segments of CTEXT."
+ "Compound text based generic encoding.
+This coding system is an extension of X's \"Compound Text Encoding\".
+It encodes many characters using the normal ISO-2022 designation sequences,
+but it doesn't support extended segments of CTEXT."
:coding-type 'iso-2022
:mnemonic ?x
:charset-list 'iso-2022
@@ -1432,7 +1433,7 @@
;; not have a mime-charset property, to prevent it from showing up
;; close to the beginning of coding systems ordered by priority.
(define-coding-system 'ctext-no-compositions
- "Compound text based generic encoding for decoding unknown messages.
+ "Compound text based generic encoding.
Like `compound-text', but does not produce escape sequences for compositions."
:coding-type 'iso-2022
@@ -1445,8 +1446,9 @@
(define-coding-system 'compound-text-with-extensions
"Compound text encoding with ICCCM Extended Segment extensions.
-See the variable `ctext-non-standard-encodings-alist' for the
-detail about how extended segments are handled.
+See the variables `ctext-standard-encodings' and
+`ctext-non-standard-encodings-alist' for the detail about how
+extended segments are handled.
This coding system should be used only for X selections. It is inappropriate
for decoding and encoding files, process I/O, etc."
=== modified file 'lisp/international/mule.el'
--- a/lisp/international/mule.el 2010-05-03 01:55:01 +0000
+++ b/lisp/international/mule.el 2010-08-06 08:11:19 +0000
@@ -282,6 +282,7 @@
(plist-put props :short-name (symbol-name name)))
(or (plist-get props :long-name)
(plist-put props :long-name (plist-get props :short-name)))
+ (plist-put props :base name)
;; We can probably get a worthwhile amount in purespace.
(setq props
(mapcar (lambda (elt)
@@ -1408,7 +1409,9 @@
'(("big5-0" big5 2 big5)
("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
("ISO8859-15" iso-8859-15 1 latin-iso8859-15)
- ("gbk-0" gbk 2 chinese-gbk)))
+ ("gbk-0" gbk 2 chinese-gbk)
+ ("koi8-r" koi8-r 1 koi8-r)
+ ("microsoft-cp1251" windows-1251 1 windows-1251)))
"Alist of non-standard encoding names vs the corresponding usages in CTEXT.
It controls how extended segments of a compound text are handled
@@ -1497,6 +1500,20 @@
(goto-char (point-min))
(- (point-max) (point)))))
+(defvar ctext-standard-encodings
+ '(ascii latin-jisx0201 katakana-jisx0201
+ latin-iso8859-1 latin-iso8859-2 latin-iso8859-3 latin-iso8859-4
+ greek-iso8859-7 arabic-iso8859-6 hebrew-iso8859-8 cyrillic-iso8859-5
+ latin-iso8859-9
+ chinese-gb2312 japanese-jisx0208 korean-ksc5601)
+ "List of approved standard encodings (i.e. charsets) of X's Compound Text.
+Coding-system `compound-text-with-extensions' encodes a character
+belonging to any of those charsets using the normal ISO2022
+designation sequence unless the current language environment or
+the variable `ctext-non-standard-encodings' decide to use an extended
+segment of CTEXT for that character. See also the documentation
+of `ctext-non-standard-encodings-alist'.")
+
;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from
;; `ctext-non-standard-encodings' and a list specified by the key
;; `ctext-non-standard-encodings' for the currrent language
@@ -1508,115 +1525,94 @@
;; is encoded using UTF-8 encoding extention.
(defun ctext-non-standard-encodings-table ()
- (let (table)
- ;; Setup charsets specified by the key
- ;; `ctext-non-standard-encodings' for the current language
- ;; environment and in `ctext-non-standard-encodings'.
- (dolist (encoding (append
- (get-language-info current-language-environment
- 'ctext-non-standard-encodings)
- ctext-non-standard-encodings))
- (let* ((slot (assoc encoding ctext-non-standard-encodings-alist))
+ (let* ((table (append ctext-non-standard-encodings
+ (copy-sequence
+ (get-language-info current-language-environment
+ 'ctext-non-standard-encodings))))
+ (tail table)
+ elt)
+ (while tail
+ (setq elt (car tail))
+ (let* ((slot (assoc elt ctext-non-standard-encodings-alist))
(charset (nth 3 slot)))
(if (charsetp charset)
- (push (cons charset slot) table)
- (dolist (cs charset)
- (push (cons cs slot) table)))))
-
- ;; Next prepend charsets for ISO2022 designation sequence.
- (dolist (charset charset-list)
- (let ((final (plist-get (charset-plist charset) :iso-final-char)))
- (if (and (integerp final)
- (>= final #x40) (<= final #x7e)
- ;; Exclude ascii and chinese-cns11643-X.
- (not (eq charset 'ascii))
- (not (string-match "cns11643" (symbol-name charset))))
- (push (cons charset nil) table))))
-
- ;; Returned reversed list so that the charsets specified by the
- ;; key `ctext-non-standard-encodings' for the current language
- ;; have the highest priority.
- (nreverse table)))
+ (setcar tail
+ (cons (plist-get (charset-plist charset) :base) slot))
+ (setcar tail (cons (car charset) slot))
+ (dolist (cs (cdr charset))
+ (setcdr tail
+ (cons (cons (plist-get (charset-plist (car cs)) :base) slot)
+ (cdr tail)))
+ (setq tail (cdr tail))))
+ (setq tail (cdr tail))))
+ table))
(defun ctext-pre-write-conversion (from to)
"Encode characters between FROM and TO as Compound Text w/Extended Segments.
-If FROM is a string, or if the current buffer is not the one set up for us
-by `encode-coding-string', generate a new temp buffer, insert the text,
-and convert it in the temporary buffer. Otherwise, convert in-place."
+If FROM is a string, generate a new temp buffer, insert the text,
+and convert it in the temporary buffer. Otherwise, convert
+in-place."
(save-match-data
;; Setup a working buffer if necessary.
(when (stringp from)
(set-buffer (generate-new-buffer " *temp"))
(set-buffer-multibyte (multibyte-string-p from))
- (insert from))
-
- ;; Now we can encode the whole buffer.
- (let ((encoding-table (ctext-non-standard-encodings-table))
- last-coding-system-used
- last-pos last-encoding-info
- encoding-info end-pos ch)
- (goto-char (setq last-pos (point-min)))
- (setq end-pos (point-marker))
- (while (re-search-forward "[^\000-\177]+" nil t)
- ;; Found a sequence of non-ASCII characters.
- (setq last-pos (match-beginning 0)
- ch (char-after last-pos)
- last-encoding-info (catch 'tag
- (dolist (elt encoding-table)
- (if (encode-char ch (car elt))
- (throw 'tag (cdr elt))))
- 'utf-8))
- (set-marker end-pos (match-end 0))
- (goto-char (1+ last-pos))
- (catch 'tag
- (while t
- (setq encoding-info
- (if (< (point) end-pos)
- (catch 'tag
- (setq ch (following-char))
- (dolist (elt encoding-table)
- (if (encode-char ch (car elt))
- (throw 'tag (cdr elt))))
- 'utf-8)))
- (unless (eq last-encoding-info encoding-info)
- (cond ((consp last-encoding-info)
- ;; Encode the previous range using an extended
- ;; segment.
- (let ((encoding-name (car last-encoding-info))
- (coding-system (nth 1 last-encoding-info))
- (noctets (nth 2 last-encoding-info))
- len)
- (encode-coding-region last-pos (point) coding-system)
- (setq len (+ (length encoding-name) 1
- (- (point) last-pos)))
- ;; According to the spec of CTEXT, it is not
- ;; necessary to produce this extra designation
- ;; sequence, but some buggy application
- ;; (e.g. crxvt-gb) requires it.
- (insert "\e(B")
- (save-excursion
- (goto-char last-pos)
- (insert (format "\e%%/%d" noctets))
- (insert-byte (+ (/ len 128) 128) 1)
- (insert-byte (+ (% len 128) 128) 1)
- (insert encoding-name)
- (insert 2))))
- ((eq last-encoding-info 'utf-8)
- ;; Encode the previous range using UTF-8 encoding
- ;; extention.
- (encode-coding-region last-pos (point) 'mule-utf-8)
- (save-excursion
- (goto-char last-pos)
- (insert "\e%G"))
- (insert "\e%@")))
- (setq last-pos (point)
- last-encoding-info encoding-info))
- (if (< (point) end-pos)
- (forward-char 1)
- (throw 'tag nil)))))
- (set-marker end-pos nil)
- (goto-char (point-min))))
+ (insert from)
+ (setq from 1 to (point-max)))
+ (save-restriction
+ (narrow-to-region from to)
+ (goto-char from)
+ (let ((encoding-table (ctext-non-standard-encodings-table))
+ (charset-list (sort-charsets
+ (copy-sequence ctext-standard-encodings)))
+ (end-pos (make-marker))
+ last-coding-system-used
+ last-pos charset encoding-info)
+ (dolist (elt encoding-table)
+ (push (car elt) charset-list))
+ (setq end-pos (point-marker))
+ (while (re-search-forward "[^\0-\177]+" nil t)
+ ;; Found a sequence of non-ASCII characters.
+ (set-marker end-pos (match-end 0))
+ (goto-char (match-beginning 0))
+ (setq last-pos (point)
+ charset (char-charset (following-char) charset-list))
+ (forward-char 1)
+ (while (and (< (point) end-pos)
+ (eq charset (char-charset (following-char) charset-list)))
+ (forward-char 1))
+ (if charset
+ (if (setq encoding-info (cdr (assq charset encoding-table)))
+ ;; Encode this range using an extended segment.
+ (let ((encoding-name (car encoding-info))
+ (coding-system (nth 1 encoding-info))
+ (noctets (nth 2 encoding-info))
+ len)
+ (encode-coding-region last-pos (point) coding-system)
+ (setq len (+ (length encoding-name) 1
+ (- (point) last-pos)))
+ ;; According to the spec of CTEXT, it is not
+ ;; necessary to produce this extra designation
+ ;; sequence, but some buggy application
+ ;; (e.g. crxvt-gb) requires it.
+ (insert "\e(B")
+ (save-excursion
+ (goto-char last-pos)
+ (insert (format "\e%%/%d" noctets))
+ (insert-byte (+ (/ len 128) 128) 1)
+ (insert-byte (+ (% len 128) 128) 1)
+ (insert encoding-name)
+ (insert 2)))
+ ;; Encode this range as characters in CHARSET.
+ (put-text-property last-pos (point) 'charset charset))
+ ;; Encode this range using UTF-8 encoding extention.
+ (encode-coding-region last-pos (point) 'mule-utf-8)
+ (save-excursion
+ (goto-char last-pos)
+ (insert "\e%G"))
+ (insert "\e%@")))
+ (goto-char (point-min)))))
;; Must return nil, as build_annotations_2 expects that.
nil)
=== modified file 'lisp/language/cyrillic.el'
--- a/lisp/language/cyrillic.el 2010-01-13 08:35:10 +0000
+++ b/lisp/language/cyrillic.el 2010-08-04 08:06:52 +0000
@@ -239,13 +239,6 @@
(documentation . "Support for Tajik using KOI8-T."))
'("Cyrillic"))
-(let ((elt `("microsoft-cp1251" windows-1251 1
- ,(get 'encode-windows-1251 'translation-table)))
- (slot (assoc "microsoft-cp1251" ctext-non-standard-encodings-alist)))
- (if slot
- (setcdr slot (cdr elt))
- (push elt ctext-non-standard-encodings-alist)))
-
(set-language-info-alist
"Bulgarian" `((coding-system windows-1251)
(coding-priority windows-1251)
=== modified file 'src/ChangeLog'
--- a/src/ChangeLog 2010-08-01 04:28:09 +0000
+++ b/src/ChangeLog 2010-08-06 08:11:19 +0000
@@ -1,3 +1,14 @@
+2010-08-06 Kenichi Handa <address@hidden>
+
+ * charset.c: Include <stdlib.h>
+ (struct charset_sort_data): New struct.
+ (charset_compare): New function.
+ (Fsort_charsets): New funciton.
+ (syms_of_charset): Declare Fsort_charsets as a Lisp function.
+
+ * coding.c (decode_coding_iso_2022): Fix checking of dimension
+ number in CTEXT extended segment.
+
2010-08-01 Juanma Barranquero <address@hidden>
* w32fns.c (syms_of_w32fns) <x-max-tooltip-size>: Fix typo in docstring.
=== modified file 'src/charset.c'
--- a/src/charset.c 2010-02-28 14:19:31 +0000
+++ b/src/charset.c 2010-08-06 08:11:19 +0000
@@ -28,6 +28,7 @@
#include <config.h>
#include <stdio.h>
+#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include <sys/types.h>
@@ -2139,23 +2140,22 @@
charset = CHAR_CHARSET (XINT (ch));
else
{
- Lisp_Object charset_list;
-
if (CONSP (restriction))
{
- for (charset_list = Qnil; CONSP (restriction);
- restriction = XCDR (restriction))
+ int c = XFASTINT (ch);
+
+ for (; CONSP (restriction); restriction = XCDR (restriction))
{
- int id;
+ struct charset *charset;
- CHECK_CHARSET_GET_ID (XCAR (restriction), id);
- charset_list = Fcons (make_number (id), charset_list);
+ CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset);
+ if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
+ return XCAR (restriction);
}
- charset_list = Fnreverse (charset_list);
+ return Qnil;
}
- else
- charset_list = coding_system_charset_list (restriction);
- charset = char_charset (XINT (ch), charset_list, NULL);
+ restriction = coding_system_charset_list (restriction);
+ charset = char_charset (XINT (ch), restriction, NULL);
if (! charset)
return Qnil;
}
@@ -2312,6 +2312,69 @@
return make_number (id);
}
+struct charset_sort_data
+{
+ Lisp_Object charset;
+ int id;
+ int priority;
+};
+
+static int
+charset_compare (const void *d1, const void *d2)
+{
+ const struct charset_sort_data *data1 = d1, *data2 = d2;
+ return (data1->priority - data2->priority);
+}
+
+DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
+ doc: /* Sort charset list CHARSETS by a priority of each charset.
+Return the sorted list. CHARSETS is modified by side effects.
+See also `charset-priority-list' and `set-charset-priority'. */)
+ (Lisp_Object charsets)
+{
+ Lisp_Object len = Flength (charsets);
+ int n = XFASTINT (len), i, j, done;
+ Lisp_Object tail, elt, attrs;
+ struct charset_sort_data *sort_data;
+ int id, min_id, max_id;
+ USE_SAFE_ALLOCA;
+
+ if (n == 0)
+ return Qnil;
+ SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
+ for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
+ {
+ elt = XCAR (tail);
+ CHECK_CHARSET_GET_ATTR (elt, attrs);
+ sort_data[i].charset = elt;
+ sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
+ if (i == 0)
+ min_id = max_id = id;
+ else if (id < min_id)
+ min_id = id;
+ else if (id > max_id)
+ max_id = id;
+ }
+ for (done = 0, tail = Vcharset_ordered_list, i = 0;
+ done < n && CONSP (tail); tail = XCDR (tail), i++)
+ {
+ elt = XCAR (tail);
+ id = XFASTINT (elt);
+ if (id >= min_id && id <= max_id)
+ for (j = 0; j < n; j++)
+ if (sort_data[j].id == id)
+ {
+ sort_data[j].priority = i;
+ done++;
+ }
+ }
+ qsort (sort_data, n, sizeof *sort_data, charset_compare);
+ for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
+ XSETCAR (tail, sort_data[i].charset);
+ SAFE_FREE ();
+ return charsets;
+}
+
void
init_charset ()
@@ -2414,6 +2477,7 @@
defsubr (&Scharset_priority_list);
defsubr (&Sset_charset_priority);
defsubr (&Scharset_id_internal);
+ defsubr (&Ssort_charsets);
DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
doc: /* *List of directories to search for charset map files.
*/);
=== modified file 'src/coding.c'
--- a/src/coding.c 2010-03-16 05:48:41 +0000
+++ b/src/coding.c 2010-08-06 08:11:19 +0000
@@ -3935,7 +3935,7 @@
int size;
ONE_MORE_BYTE (dim);
- if (dim < 0 || dim > 4)
+ if (dim < '0' || dim > '4')
goto invalid_code;
ONE_MORE_BYTE (M);
if (M < 128)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Emacs-diffs] /srv/bzr/emacs/emacs-23 r99966: Modify the coding system compound-text-with-extensions to conform to the spec of Compound Text.,
Kenichi Handa <=