[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
striconveha: add support for transliteration
From: |
Bruno Haible |
Subject: |
striconveha: add support for transliteration |
Date: |
Wed, 24 Jan 2007 01:55:43 +0100 (MET) |
User-agent: |
KMail/1.5.4 |
This adds one more option to the string iconv modules: support for
transliteration, as implemented in glibc and GNU libiconv.
2007-01-23 Bruno Haible <address@hidden>
* lib/striconveha.h: Include <stdbool.h>.
(mem_iconveha, str_iconveha): Add 'transliterate' argument.
* lib/striconveha.c: Include allocsa.h, strdup.h, c-strcase.h.
(mem_iconveha_notranslit): Renamed from mem_iconveha.
(mem_iconveha): New function.
(str_iconveha_notranslit): Renamed from str_iconveha.
(str_iconveha): New function.
* modules/striconveha (Depends-on): Add stdbool, allocsa, strdup,
c-strcase.
*** lib/striconveha.h 23 Jan 2007 01:17:42 -0000 1.3
--- lib/striconveha.h 24 Jan 2007 00:49:48 -0000
***************
*** 19,24 ****
--- 19,26 ----
#ifndef _STRICONVEHA_H
#define _STRICONVEHA_H
+ #include <stdbool.h>
+
#include "striconveh.h"
***************
*** 30,35 ****
--- 32,40 ----
/* Convert an entire string from one encoding to another, using iconv.
The original string is at [SRC,...,SRC+SRCLEN-1].
The "from" encoding can also be a name defined for autodetection.
+ If TRANSLITERATE is true, transliteration will attempted to avoid
conversion
+ errors, for iconv implementations that support this. Usually you'll choose
+ TRANSLITERATE = true if HANDLER != iconveh_error.
If OFFSETS is not NULL, it should point to an array of SRCLEN integers;
this
array is filled with offsets into the result, i.e. the character starting
at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
***************
*** 44,49 ****
--- 49,55 ----
extern int
mem_iconveha (const char *src, size_t srclen,
const char *from_codeset, const char *to_codeset,
+ bool transliterate,
enum iconv_ilseq_handler handler,
size_t *offsets,
char **resultp, size_t *lengthp);
***************
*** 53,64 ****
--- 59,74 ----
Both the "from" and the "to" encoding must use a single NUL byte at the
end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
The "from" encoding can also be a name defined for autodetection.
+ If TRANSLITERATE is true, transliteration will attempted to avoid
conversion
+ errors, for iconv implementations that support this. Usually you'll choose
+ TRANSLITERATE = true if HANDLER != iconveh_error.
Allocate a malloced memory block for the result.
Return value: the freshly allocated resulting NUL-terminated string if
successful, otherwise NULL and errno set. */
extern char *
str_iconveha (const char *src,
const char *from_codeset, const char *to_codeset,
+ bool transliterate,
enum iconv_ilseq_handler handler);
*** lib/striconveha.c 24 Jan 2007 00:48:01 -0000 1.3
--- lib/striconveha.c 24 Jan 2007 00:49:48 -0000
***************
*** 25,30 ****
--- 25,34 ----
#include <stdlib.h>
#include <string.h>
+ #include "allocsa.h"
+ #include "strdup.h"
+ #include "c-strcase.h"
+
#define SIZEOF(a) (sizeof(a)/sizeof(a[0]))
***************
*** 143,154 ****
}
}
! int
! mem_iconveha (const char *src, size_t srclen,
! const char *from_codeset, const char *to_codeset,
! enum iconv_ilseq_handler handler,
! size_t *offsets,
! char **resultp, size_t *lengthp)
{
int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler,
offsets, resultp, lengthp);
--- 147,159 ----
}
}
! /* Like mem_iconveha, except no handling of transliteration. */
! static int
! mem_iconveha_notranslit (const char *src, size_t srclen,
! const char *from_codeset, const char *to_codeset,
! enum iconv_ilseq_handler handler,
! size_t *offsets,
! char **resultp, size_t *lengthp)
{
int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler,
offsets, resultp, lengthp);
***************
*** 171,180 ****
encodings = alias->encodings_to_try;
do
{
! retval = mem_iconveha (src, srclen,
! *encodings, to_codeset,
! iconveh_error, offsets,
! resultp, lengthp);
if (!(retval < 0 && errno == EILSEQ))
return retval;
encodings++;
--- 176,185 ----
encodings = alias->encodings_to_try;
do
{
! retval = mem_iconveha_notranslit (src, srclen,
! *encodings, to_codeset,
! iconveh_error, offsets,
! resultp, lengthp);
if (!(retval < 0 && errno == EILSEQ))
return retval;
encodings++;
***************
*** 185,194 ****
encodings = alias->encodings_to_try;
do
{
! retval = mem_iconveha (src, srclen,
! *encodings, to_codeset,
! handler, offsets,
! resultp, lengthp);
if (!(retval < 0 && errno == EILSEQ))
return retval;
encodings++;
--- 190,199 ----
encodings = alias->encodings_to_try;
do
{
! retval = mem_iconveha_notranslit (src, srclen,
! *encodings, to_codeset,
! handler, offsets,
! resultp, lengthp);
if (!(retval < 0 && errno == EILSEQ))
return retval;
encodings++;
***************
*** 205,214 ****
}
}
! char *
! str_iconveha (const char *src,
const char *from_codeset, const char *to_codeset,
! enum iconv_ilseq_handler handler)
{
char *result = str_iconveh (src, from_codeset, to_codeset, handler);
--- 210,261 ----
}
}
! int
! mem_iconveha (const char *src, size_t srclen,
const char *from_codeset, const char *to_codeset,
! bool transliterate,
! enum iconv_ilseq_handler handler,
! size_t *offsets,
! char **resultp, size_t *lengthp)
! {
! if (srclen == 0)
! {
! /* Nothing to convert. */
! *lengthp = 0;
! return 0;
! }
!
! /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
! we want to use transliteration. */
! #if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 ||
_LIBICONV_VERSION >= 0x0105
! if (transliterate)
! {
! int retval;
! size_t len = strlen (to_codeset);
! char *to_codeset_suffixed = (char *) allocsa (len + 10 + 1);
! memcpy (to_codeset_suffixed, to_codeset, len);
! memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
!
! retval = mem_iconveha_notranslit (src, srclen,
! from_codeset, to_codeset_suffixed,
! handler, offsets, resultp, lengthp);
!
! freesa (to_codeset_suffixed);
!
! return retval;
! }
! else
! #endif
! return mem_iconveha_notranslit (src, srclen,
! from_codeset, to_codeset,
! handler, offsets, resultp, lengthp);
! }
!
! /* Like str_iconveha, except no handling of transliteration. */
! static char *
! str_iconveha_notranslit (const char *src,
! const char *from_codeset, const char *to_codeset,
! enum iconv_ilseq_handler handler)
{
char *result = str_iconveh (src, from_codeset, to_codeset, handler);
***************
*** 231,239 ****
encodings = alias->encodings_to_try;
do
{
! result = str_iconveha (src,
! *encodings, to_codeset,
! iconveh_error);
if (!(result == NULL && errno == EILSEQ))
return result;
encodings++;
--- 278,286 ----
encodings = alias->encodings_to_try;
do
{
! result = str_iconveha_notranslit (src,
! *encodings, to_codeset,
! iconveh_error);
if (!(result == NULL && errno == EILSEQ))
return result;
encodings++;
***************
*** 244,252 ****
encodings = alias->encodings_to_try;
do
{
! result = str_iconveha (src,
! *encodings, to_codeset,
! handler);
if (!(result == NULL && errno == EILSEQ))
return result;
encodings++;
--- 291,299 ----
encodings = alias->encodings_to_try;
do
{
! result = str_iconveha_notranslit (src,
! *encodings, to_codeset,
! handler);
if (!(result == NULL && errno == EILSEQ))
return result;
encodings++;
***************
*** 262,264 ****
--- 309,349 ----
return NULL;
}
}
+
+ char *
+ str_iconveha (const char *src,
+ const char *from_codeset, const char *to_codeset,
+ bool transliterate,
+ enum iconv_ilseq_handler handler)
+ {
+ if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
+ {
+ char *result = strdup (src);
+
+ if (result == NULL)
+ errno = ENOMEM;
+ return result;
+ }
+
+ /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
+ we want to use transliteration. */
+ #if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 ||
_LIBICONV_VERSION >= 0x0105
+ if (transliterate)
+ {
+ char *result;
+ size_t len = strlen (to_codeset);
+ char *to_codeset_suffixed = (char *) allocsa (len + 10 + 1);
+ memcpy (to_codeset_suffixed, to_codeset, len);
+ memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
+
+ result = str_iconveha_notranslit (src, from_codeset,
to_codeset_suffixed,
+ handler);
+
+ freesa (to_codeset_suffixed);
+
+ return result;
+ }
+ else
+ #endif
+ return str_iconveha_notranslit (src, from_codeset, to_codeset, handler);
+ }
*** modules/striconveha 21 Jan 2007 22:59:19 -0000 1.1
--- modules/striconveha 24 Jan 2007 00:49:49 -0000
***************
*** 7,13 ****
--- 7,17 ----
lib/striconveha.c
Depends-on:
+ stdbool
striconveh
+ allocsa
+ strdup
+ c-strcase
configure.ac:
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- striconveha: add support for transliteration,
Bruno Haible <=