[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] emacs/lisp/language indian.el
From: |
Kenichi Handa |
Subject: |
[Emacs-diffs] emacs/lisp/language indian.el |
Date: |
Sat, 12 Dec 2009 02:17:48 +0000 |
CVSROOT: /cvsroot/emacs
Module name: emacs
Changes by: Kenichi Handa <handa> 09/12/12 02:17:48
Modified files:
lisp/language : indian.el
Log message:
(indian-compose-regexp): New function.
(malayalam-composable-pattern): Fix the pattern.
(composition-function-table): Set malayalam-composable-pattern for
Malayalam characters.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/emacs/lisp/language/indian.el?cvsroot=emacs&r1=1.39&r2=1.40
Patches:
Index: indian.el
===================================================================
RCS file: /cvsroot/emacs/emacs/lisp/language/indian.el,v
retrieving revision 1.39
retrieving revision 1.40
diff -u -b -r1.39 -r1.40
--- indian.el 2 Dec 2009 07:59:51 -0000 1.39
+++ indian.el 12 Dec 2009 02:17:48 -0000 1.40
@@ -129,6 +129,15 @@
South Indian language Malayalam is supported in this language environment."))
'("Indian"))
+;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is
+;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING).
+
+(defun indian-compose-regexp (regexp table)
+ (let ((case-fold-search nil))
+ (dolist (elt table)
+ (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t)))
+ regexp))
+
(defconst devanagari-composable-pattern
(concat
"\\([à¤
-à¤à¥ ॡ][à¤à¤]?\\)\\|[à¤à¥¤]"
@@ -156,12 +165,27 @@
"Regexp matching a composable sequence of Kannada characters.")
(defconst malayalam-composable-pattern
+ (let ((table '(("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel
+ ("C" . "[\u0D15-\u0D39]") ; consonant
+ ("m" . "[\u0D46-\u0D48\u0D4A-\u0D4C]") ; prebase matra
+ ("p" . "[\u0D3E-\u0D44\u0D57]") ; postname matra
+ ("b" . "[\u0D62-\u0D63]") ; belowbase matra
+ ("a" . "[\u0D02-\u0D03]") ; abovebase sign
+ ("H" . "àµ") ; virama sign
+ ("N" . "\u200D") ; ZWJ
+ ("J" . "\u200C") ; ZWNJ
+ ("X" . "[\u0D00-\u0D7F]")))) ; all coverage
+ (indian-compose-regexp
(concat
- "\\([à´
-à´][à´]?\\)\\|à´"
- "\\|\\("
-
"\\(?:\\(?:[à´-à´¹]àµ\\)?\\(?:[à´-à´¹]àµ\\)?\\(?:[à´-à´¹]àµ\\)?[à´-à´¹]àµ\\)?"
- "[à´-à´¹]\\(?:àµ\\|[à´¾-àµàµàµàµàµàµàµàµ]?[à´àµ]?\\)?"
- "\\)")
+ ;; consonant-based syllables
+ "\\(CJ?HJ?\\)*C\\(H[NJ]?\\|m?b?p?a?\\)\\|"
+ ;; syllables with an independent vowel
+ "V\\(J?HC\\)?m?b?p?n?a?\\|"
+ ;; special consonant form
+ "JHC\\|"
+ ;; any other singleton characters
+ "X")
+ table))
"Regexp matching a composable sequence of Malayalam characters.")
(let ((script-regexp-alist
@@ -173,7 +197,7 @@
(tamil . "[\xB80-\xBFF\x200C\x200D]+")
(telugu . "[\xC00-\xC7F\x200C\x200D]+")
(kannada . "[\xC80-\xCFF\x200C\x200D]+")
- (malayalam . "[\xD00-\xD7F\x200C\x200D]+"))))
+ (malayalam . ,malayalam-composable-pattern))))
(map-char-table
#'(lambda (key val)
(let ((slot (assq val script-regexp-alist)))