[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] emacs/lisp/language indian.el
From: |
Kenichi Handa |
Subject: |
[Emacs-diffs] emacs/lisp/language indian.el |
Date: |
Fri, 25 Dec 2009 02:45:47 +0000 |
CVSROOT: /cvsroot/emacs
Module name: emacs
Changes by: Kenichi Handa <handa> 09/12/25 02:45:47
Modified files:
lisp/language : indian.el
Log message:
(devanagari-composable-pattern): Fixed to
handle ZWNJ and ZWJ. Use it in composition-function-table for
Devanagari.
(malayalam-composable-pattern): Fix previous change.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/emacs/lisp/language/indian.el?cvsroot=emacs&r1=1.41&r2=1.42
Patches:
Index: indian.el
===================================================================
RCS file: /cvsroot/emacs/emacs/lisp/language/indian.el,v
retrieving revision 1.41
retrieving revision 1.42
diff -u -b -r1.41 -r1.42
--- indian.el 12 Dec 2009 02:28:05 -0000 1.41
+++ indian.el 25 Dec 2009 02:45:47 -0000 1.42
@@ -139,12 +139,34 @@
regexp))
(defconst devanagari-composable-pattern
+ (let ((table
+ '(("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel
+ ("C" . "[\u0915-\u0939]") ; consonant
+ ("R" . "\u0930") ; RA
+ ("n" . "\u093C") ; NUKTA
+ ("H" . "\u094D") ; HALANT
+ ("m" . "\u093F") ; vowel sign (pre)
+ ("u" . "[\u0945-\u0948\u0955]") ; vowel sign (above)
+ ("b" . "[\u0941-\u0944\u0962-\u0963]") ; vowel sign (below)
+ ("p" . "[\u093E\u0940\u0949-\u094C]") ; vowel sign (post)
+ ("A" . "[\u0900-\u0902\u0953-\u0954]") ; vowel modifier (above)
+ ("a" . "\u0903") ; vowel modifier (post)
+ ("S" . "\u0951") ; stress sign (above)
+ ("s" . "\u0952") ; stress sign (below)
+ ("J" . "\u200D") ; ZWJ
+ ("N" . "\u200C") ; ZWNJ
+ ("X" . "[\u0900-\u097F]")))) ; all coverage
+ (indian-compose-regexp
(concat
- "\\([à¤
-à¤à¥ ॡ][à¤à¤]?\\)\\|[à¤à¥¤]"
- "\\|\\("
-
"\\(?:\\(?:[à¤-हà¥-à¥]à¥\\)?\\(?:[à¤-हà¥-à¥]à¥\\)?\\(?:[à¤-हà¥-à¥]à¥\\)?[à¤-हà¥-à¥]à¥\\)?"
- "[à¤-हà¥-à¥]\\(?:à¥\\|[ा-à¥à¥¢à¥£]?[à¤à¤]?\\)?"
- "\\)")
+ ;; syllables with an independent vowel, or
+ "\\(?:RH\\)?Vn?m?b?u?p?n?A?s?S?a?\\|"
+ ;; consonant-based syllables, or
+ "\\(?:Cn?J?HJ?\\)*Cn?\\(?:H[NJ]?\\|m?b?u?p?n?A?s?S?a?\\)\\|"
+ ;; special consonant form, or
+ "JHR\\|"
+ ;; any other singleton characters
+ "X")
+ table))
"Regexp matching a composable sequence of Devanagari characters.")
(defconst tamil-composable-pattern
@@ -165,23 +187,24 @@
"Regexp matching a composable sequence of Kannada characters.")
(defconst malayalam-composable-pattern
- (let ((table '(("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel
+ (let ((table
+ '(("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel
("C" . "[\u0D15-\u0D39]") ; consonant
("m" . "[\u0D46-\u0D48\u0D4A-\u0D4C]") ; prebase matra
- ("p" . "[\u0D3E-\u0D44\u0D57]") ; postname matra
+ ("p" . "[\u0D3E-\u0D44\u0D57]") ; postbase matra
("b" . "[\u0D62-\u0D63]") ; belowbase matra
("a" . "[\u0D02-\u0D03]") ; abovebase sign
- ("H" . "àµ") ; virama sign
+ ("H" . "\u0D4D") ; virama sign
("N" . "\u200D") ; ZWJ
("J" . "\u200C") ; ZWNJ
("X" . "[\u0D00-\u0D7F]")))) ; all coverage
(indian-compose-regexp
(concat
- ;; consonant-based syllables
- "\\(CJ?HJ?\\)*C\\(H[NJ]?\\|m?b?p?a?\\)\\|"
- ;; syllables with an independent vowel
- "V\\(J?HC\\)?m?b?p?a?\\|"
- ;; special consonant form
+ ;; syllables with an independent vowel, or
+ "V\\(?:J?HC\\)?m?b?p?a?\\|"
+ ;; consonant-based syllables, or
+ "\\(?:CJ?HJ?\\)\\{0,4\\}C\\(?:H[NJ]?\\|m?b?p?a?\\)\\|"
+ ;; special consonant form, or
"JHC\\|"
;; any other singleton characters
"X")
@@ -189,7 +212,7 @@
"Regexp matching a composable sequence of Malayalam characters.")
(let ((script-regexp-alist
- `((devanagari . "[\x900-\x97F\x200C\x200D]+")
+ `((devanagari . ,devanagari-composable-pattern)
(bengali . "[\x980-\x9FF\x200C\x200D]+")
(gurmukhi . "[\xA00-\xA7F\x200C\x200D]+")
(gujarati . "[\xA80-\xAFF\x200C\x200D]+")