emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/lisp/international/utf-8.el


From: Kenichi Handa
Subject: [Emacs-diffs] Changes to emacs/lisp/international/utf-8.el
Date: Tue, 11 Mar 2003 19:45:50 -0500

Index: emacs/lisp/international/utf-8.el
diff -c emacs/lisp/international/utf-8.el:1.24 
emacs/lisp/international/utf-8.el:1.25
*** emacs/lisp/international/utf-8.el:1.24      Tue Feb  4 08:09:38 2003
--- emacs/lisp/international/utf-8.el   Tue Mar 11 19:45:49 2003
***************
*** 308,325 ****
      ((r5 = ,(charset-id 'eight-bit-control))
       (r6 = ,(charset-id 'eight-bit-graphic))
       (loop
        (read r0)
  
        ;; 1byte encoding, i.e., ascii
        (if (r0 < #x80)
!         (write r0)
        (if (r0 < #xc0)             ; continuation byte (invalid here)
!           (if (r0 < #xa0)
!               (write-multibyte-character r5 r0)
!             (write-multibyte-character r6 r0))
          ;; 2 byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
          (if (r0 < #xe0)
!             ((read r1)
  
               (if ((r1 & #b11000000) != #b10000000)
                   ;; Invalid 2-byte sequence
--- 308,327 ----
      ((r5 = ,(charset-id 'eight-bit-control))
       (r6 = ,(charset-id 'eight-bit-graphic))
       (loop
+       (r0 = -1)
        (read r0)
  
        ;; 1byte encoding, i.e., ascii
        (if (r0 < #x80)
!         ((write r0))
        (if (r0 < #xc0)             ; continuation byte (invalid here)
!           ((if (r0 < #xa0)
!                (write-multibyte-character r5 r0)
!              (write-multibyte-character r6 r0)))
          ;; 2 byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
          (if (r0 < #xe0)
!             ((r1 = -1)
!              (read r1)
  
               (if ((r1 & #b11000000) != #b10000000)
                   ;; Invalid 2-byte sequence
***************
*** 373,379 ****
            ;; 3byte encoding
            ;; zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
            (if (r0 < #xf0)
!               ((read r1 r2)
  
                 ;; This is set to 1 if the encoding is invalid.
                 (r4 = 0)
--- 375,383 ----
            ;; 3byte encoding
            ;; zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
            (if (r0 < #xf0)
!               ((r1 = -1)
!                (r2 = -1)
!                (read r1 r2)
  
                 ;; This is set to 1 if the encoding is invalid.
                 (r4 = 0)
***************
*** 478,484 ****
                  ;; 4byte encoding
                  ;; keep those bytes as eight-bit-{control|graphic}
                  ;; Fixme: allow lookup in utf-subst-table-for-decode.
!                 ((read r1 r2 r3)
                   ;; r0 > #xf0, thus eight-bit-graphic
                   (write-multibyte-character r6 r0)
                   (if (r1 < #xa0)
--- 482,491 ----
                  ;; 4byte encoding
                  ;; keep those bytes as eight-bit-{control|graphic}
                  ;; Fixme: allow lookup in utf-subst-table-for-decode.
!                 ((r1 = -1)
!                  (r2 = -1)
!                  (r3 = -1)
!                  (read r1 r2 r3)
                   ;; r0 > #xf0, thus eight-bit-graphic
                   (write-multibyte-character r6 r0)
                   (if (r1 < #xa0)
***************
*** 512,518 ****
                               (write-multibyte-character r6 r1)))))))
                ;; else invalid byte >= #xfe
                (write-multibyte-character r6 r0))))))
!       (repeat))))
  
    "CCL program to decode UTF-8.
  Basic decoding is done into the charsets ascii, latin-iso8859-1 and
--- 519,551 ----
                               (write-multibyte-character r6 r1)))))))
                ;; else invalid byte >= #xfe
                (write-multibyte-character r6 r0))))))
!       (repeat)))
! 
!     ;; At EOF...
!     (if (r0 >= 0)
!       ((if (r0 < #x80)
!            (write r0)
!          (if (r0 < #xa0)
!              (write-multibyte-character r5 r0)
!            ((write-multibyte-character r6 r0))))
!        (if (r1 >= 0)
!            ((if (r1 < #x80)
!                 (write r1)
!               (if (r1 < #xa0)
!                   (write-multibyte-character r5 r1)
!                 ((write-multibyte-character r6 r1))))
!             (if (r2 >= 0)
!                 ((if (r2 < #x80)
!                      (write r2)
!                    (if (r2 < #xa0)
!                        (write-multibyte-character r5 r2)
!                      ((write-multibyte-character r6 r2))))
!                  (if (r3 >= 0)
!                      (if (r3 < #x80)
!                          (write r3)
!                        (if (r3 < #xa0)
!                            (write-multibyte-character r5 r3)
!                          ((write-multibyte-character r6 r3))))))))))))
  
    "CCL program to decode UTF-8.
  Basic decoding is done into the charsets ascii, latin-iso8859-1 and




reply via email to

[Prev in Thread] Current Thread [Next in Thread]