[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: how to convert old 7bit chars to modern utf8
From: |
gojjoe2 |
Subject: |
Re: how to convert old 7bit chars to modern utf8 |
Date: |
Wed, 8 Sep 2021 21:39:30 +0200 |
User-agent: |
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Thunderbird/78.14.0 |
Hi Uwe,
not sure if this is what you'd like to do, but I use the functions "iso-iso2tex" and
"iso-tex2iso" to convert tex macros to UTF8 and vice versa. See below what I added to my
init file, maybe it can help you too?
Cheers!
(eval-after-load "iso-cvt"
'(progn (setq iso-tex2iso-trans-tab
(append '(
("\\\\o\\\\ " "ø ")
("\\\\o{}" "ø")
("{\\\\o}" "ø")
("\\\\o *\n\n" "ø\n\n")
("\\\\o *\n" "ø")
("\\\\o\\(\\W\\)" "ø\\1")
("\\\\o +\n?" "ø")
("\\\\O\\\\ " "Ø ")
("\\\\O{}" "Ø")
("{\\\\O}" "Ø")
("\\\\O *\n\n" "Ø\n\n")
("\\\\O *\n" "Ø")
("\\\\O +\n?" "Ø")
("\\\\O\\(\\W\\)" "Ø\\1")
;
("\\\\aa\\\\ " "å ")
("\\\\aa{}" "å")
("{\\\\aa}" "å")
("\\\\aa *\n\n" "å\n\n")
("\\\\aa *\n" "å")
("\\\\aa +\n?" "å")
("\\\\aa\\(\\W\\)" "å\\1")
("\\\\AA\\\\ " "Å ")
("\\\\AA{}" "Å")
("{\\\\AA}" "Å")
("\\\\AA *\n\n" "Å\n\n")
("\\\\AA *\n" "Å")
("\\\\AA +\n?" "Å")
("\\\\AA\\(\\W\\)" "Å\\1")
;
("\\\\ae\\\\ " "æ ")
("\\\\ae{}" "æ")
("{\\\\ae}" "æ")
("\\\\ae *\n\n" "æ\n\n")
("\\\\ae *\n" "æ")
("\\\\ae +\n?" "æ")
("\\\\ae\\(\\W\\)" "æ\\1")
("\\\\AE\\\\ " "Æ ")
("\\\\AE{}" "Æ")
("{\\\\AE}" "Æ")
("\\\\AE *\n\n" "Æ\n\n")
("\\\\AE *\n" "Æ")
("\\\\AE +\n?" "Æ")
("\\\\AE\\(\\W\\)" "Æ\\1")
;
("\\\\oe\\\\ " "œ ")
("\\\\oe{}" "œ")
("{\\\\oe}" "œ")
("\\\\oe *\n\n" "œ\n\n")
("\\\\oe *\n" "œ")
("\\\\oe +\n?" "œ")
("\\\\oe\\(\\W\\)" "œ\\1")
("\\\\OE\\\\ " "Œ ")
("\\\\OE{}" "Œ")
("{\\\\OE}" "Œ")
("\\\\OE *\n\n" "Œ\n\n")
("\\\\OE *\n" "Œ")
("\\\\OE +\n?" "Œ")
("\\\\OE\\(\\W\\)" "Œ\\1")
;
("\\\\ss\\\\ " "ß ")
("\\\\ss{}" "ß")
("{\\\\ss}" "ß")
("\\\\ss *\n\n" "ß\n\n")
("\\\\ss *\n" "ß")
("\\\\ss +\n?" "ß")
("\\\\ss\\(\\W\\)" "ß\\1")
;
("\\\\l\\\\ " "ł ")
("\\\\l{}" "ł")
("{\\\\l}" "ł")
("\\\\l *\n\n" "ł\n\n")
("\\\\l *\n" "ł")
("\\\\l +\n?" "ł")
("\\\\l\\(\\W\\)" "ł\\1")
("\\\\L\\\\ " "Ł ")
("\\\\L{}" "Ł")
("{\\\\L}" "Ł")
("\\\\L *\n\n" "Ł\n\n")
("\\\\L *\n" "Ł")
("\\\\L +\n?" "Ł")
("\\\\L\\(\\W\\)" "Ł\\1")
;
("{\\\\'y}" "ý")
("\\\\'{y}" "ý")
("\\\\'y" "ý")
("{\\\\'Y}" "Ý")
("\\\\'{Y}" "Ý")
("\\\\'Y" "Ý")
;
("{\\\\'c}" "ć")
("\\\\'{c}" "ć")
("\\\\'c" "ć")
("{\\\\'C}" "Ć")
("\\\\'{C}" "Ć")
("\\\\'C" "Ć")
;
("{\\\\'s}" "ś")
("\\\\'{s}" "ś")
("\\\\'s" "ś")
("{\\\\'S}" "Ś")
("\\\\'{S}" "Ś")
("\\\\'S" "Ś")
;
("{\\\\\\.z}" "ż")
("\\\\\\.{z}" "ż")
("\\\\\\.z" "ż")
("{\\\\\\.Z}" "Ż")
("\\\\\\.{Z}" "Ż")
("\\\\\\.Z" "Ż")
;
("{\\\\k a}" "ą")
("{\\\\k{a}}" "ą")
("\\\\k a" "ą")
("\\\\k{a}" "ą")
("{\\\\k A}" "Ą")
("{\\\\k{A}}" "Ą")
("\\\\k A" "Ą")
("\\\\k{A}" "Ą")
;
("{\\\\k e}" "ę")
("{\\\\k{e}}" "ę")
("\\\\k e" "ę")
("\\\\k{e}" "ę")
("{\\\\k E}" "Ę")
("{\\\\k{E}}" "Ę")
("\\\\k E" "Ę")
("\\\\k{E}" "Ę")
;
("{\\\\c e}" "ȩ")
("{\\\\c{e}}" "ȩ")
("\\\\c e" "ȩ")
("\\\\c{e}" "ȩ")
("{\\\\c E}" "Ȩ")
("{\\\\c{E}}" "Ȩ")
("\\\\c E" "Ȩ")
("\\\\c{E}" "Ȩ")
;
("{\\\\v c}" "č")
("{\\\\v{c}}" "č")
("\\\\v c" "č")
("\\\\v{c}" "č")
("{\\\\v C}" "Č")
("{\\\\v{C}}" "Č")
("\\\\v C" "Č")
("\\\\v{C}" "Č")
;
("{\\\\v e}" "ě")
("{\\\\v{e}}" "ě")
("\\\\v e" "ě")
("\\\\v{e}" "ě")
("{\\\\v E}" "Ě")
("{\\\\v{E}}" "Ě")
("\\\\v E" "Ě")
("\\\\v{E}" "Ě")
;
("{\\\\v r}" "ř")
("{\\\\v{r}}" "ř")
("\\\\v r" "ř")
("\\\\v{r}" "ř")
("{\\\\v R}" "Ř")
("{\\\\v{R}}" "Ř")
("\\\\v R" "Ř")
("\\\\v{R}" "Ř")
;
("{\\\\v s}" "š")
("{\\\\v{s}}" "š")
("\\\\v s" "š")
("\\\\v{s}" "š")
("{\\\\v S}" "Š")
("{\\\\v{S}}" "Š")
("\\\\v S" "Š")
("\\\\v{S}" "Š")
;
("{\\\\v z}" "ž")
("{\\\\v{z}}" "ž")
("\\\\v z" "ž")
("\\\\v{z}" "ž")
("{\\\\v Z}" "Ž")
("{\\\\v{Z}}" "Ž")
("\\\\v Z" "Ž")
("\\\\v{Z}" "Ž")
;
("{\\\\H o}" "ő")
("{\\\\H{o}}" "ő")
("\\\\H o" "ő")
("\\\\H{o}" "ő")
("{\\\\H O}" "Ő")
("{\\\\H{O}}" "Ő")
("\\\\H O" "Ő")
("\\\\H{O}" "Ő")
;
("{\\\\H u}" "ű")
("{\\\\H{u}}" "ű")
("\\\\H u" "ű")
("\\\\H{u}" "ű")
("{\\\\H U}" "Ű")
("{\\\\H{U}}" "Ű")
("\\\\H U" "Ű")
("\\\\H{U}" "Ű")
) iso-tex2iso-trans-tab)
)
)
)
(eval-after-load "iso-cvt"
'(progn (setq iso-iso2tex-trans-tab
(append '(
("ø" "{\\\\o}")
("Ø" "{\\\\O}")
;
("å" "{\\\\aa}")
("Å" "{\\\\AA}")
;
("æ" "{\\\\ae}")
("Æ" "{\\\\AE}")
;
("œ" "{\\\\oe}")
("Œ" "{\\\\OE}")
;
("ß" "{\\\\ss}")
;
("ł" "{\\\\l}")
("Ł" "{\\\\L}")
;
("ý" "{\\\\'y}")
("Ý" "{\\\\'Y}")
;
("ć" "{\\\\'c}")
("Ć" "{\\\\'C}")
;
("ś" "{\\\\'s}")
("Ś" "{\\\\'S}")
;
("ż" "{\\\\.z}")
("Ż" "{\\\\.Z}")
;
("ą" "{\\\\k{a}}")
("Ą" "{\\\\k{A}}")
;
("ę" "{\\\\k{e}}")
("Ę" "{\\\\k{E}}")
;
("ȩ" "{\\\\c{e}}")
("Ȩ" "{\\\\c{E}}")
;
("ç" "{\\\\c{c}}")
("Ç" "{\\\\c{C}}")
;
("č" "{\\\\v{c}}")
("Č" "{\\\\v{C}}")
;
("ě" "{\\\\v{e}}")
("Ě" "{\\\\v{E}}")
;
("ř" "{\\\\v{r}}")
("Ř" "{\\\\v{R}}")
;
("š" "{\\\\v{s}}")
("Š" "{\\\\v{S}}")
;
("ž" "{\\\\v{z}}")
("Ž" "{\\\\v{Z}}")
;
("ő" "{\\\\H{o}}")
("Ő" "{\\\\H{O}}")
;
("ű" "{\\\\H{u}}")
("Ű" "{\\\\H{U}}")
) iso-iso2tex-trans-tab)
)
)
)
On 2021-09-08 19:05, Uwe Brauer wrote:
Hi
Not sure that is off topic but anyhow.
Some of the bibtex entries I receive have field of the form
Had{\v{z}}i{\'{c}}, while the UTF8 version would be Hadžić
(ok it could also be some iso-88XX char). I know about old iso-cvt.el that
convert some 7bit to modern iso/uft8 but it does not include croatian. I can of
course write it using the croatian input method, but here I want to convert
them.
Any ideas?
Uwe Brauer