2009-08-24 Vladimir Serbinenko UTF-8 to UTF-16 transformation. * conf/common.rmk (pkglib_MODULES): Add utf.mod (utf_mod_SOURCES): New variable. (utf_mod_CFLAGS): Likewise. (utf_mod_LDFLAGS): Likewise. * include/grub/utf.h: New file. * lib/utf.c: New file. (Based on grub_utf8_to_ucs4 from kern/misc.c) diff --git a/conf/common.rmk b/conf/common.rmk index b0d3785..b5a6048 100644 --- a/conf/common.rmk +++ b/conf/common.rmk @@ -617,3 +617,8 @@ pkglib_MODULES += setjmp.mod setjmp_mod_SOURCES = lib/$(target_cpu)/setjmp.S setjmp_mod_ASFLAGS = $(COMMON_ASFLAGS) setjmp_mod_LDFLAGS = $(COMMON_LDFLAGS) + +pkglib_MODULES += utf.mod +utf_mod_SOURCES = lib/utf.c +utf_mod_CFLAGS = $(COMMON_CFLAGS) +utf_mod_LDFLAGS = $(COMMON_LDFLAGS) diff --git a/include/grub/utf.h b/include/grub/utf.h new file mode 100644 index 0000000..2091916 --- /dev/null +++ b/include/grub/utf.h @@ -0,0 +1,29 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 2009 Free Software Foundation, Inc. + * + * GRUB is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GRUB is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GRUB. If not, see . + */ + +#ifndef GRUB_UTF_HEADER +#define GRUB_UTF_HEADER 1 + +#include + +grub_ssize_t +grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize, + const grub_uint8_t *src, grub_size_t srcsize, + const grub_uint8_t **srcend); + +#endif diff --git a/lib/utf.c b/lib/utf.c new file mode 100644 index 0000000..1f89f2f --- /dev/null +++ b/lib/utf.c @@ -0,0 +1,116 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc. + * + * GRUB is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GRUB is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GRUB. If not, see . + */ + +/* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE + bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string. + Return the number of characters converted. DEST must be able to hold + at least DESTSIZE characters. If an invalid sequence is found, return -1. + If SRCEND is not NULL, then *SRCEND is set to the next byte after the + last byte used in SRC. */ + +#include + +grub_ssize_t +grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize, + const grub_uint8_t *src, grub_size_t srcsize, + const grub_uint8_t **srcend) +{ + grub_uint16_t *p = dest; + int count = 0; + grub_uint32_t code = 0; + + if (srcend) + *srcend = src; + + while (srcsize && destsize) + { + grub_uint32_t c = *src++; + if (srcsize != (grub_size_t)-1) + srcsize--; + if (count) + { + if ((c & 0xc0) != 0x80) + { + /* invalid */ + return -1; + } + else + { + code <<= 6; + code |= (c & 0x3f); + count--; + } + } + else + { + if (c == 0) + break; + + if ((c & 0x80) == 0x00) + code = c; + else if ((c & 0xe0) == 0xc0) + { + count = 1; + code = c & 0x1f; + } + else if ((c & 0xf0) == 0xe0) + { + count = 2; + code = c & 0x0f; + } + else if ((c & 0xf8) == 0xf0) + { + count = 3; + code = c & 0x07; + } + else if ((c & 0xfc) == 0xf8) + { + count = 4; + code = c & 0x03; + } + else if ((c & 0xfe) == 0xfc) + { + count = 5; + code = c & 0x01; + } + else + return -1; + } + + if (count == 0) + { + if (destsize < 2 && code > 0x10000) + break; + if (code > 0x10000) + { + *p++ = 0xD800 + (((code - 0x10000) >> 12) & 0xfff); + *p++ = 0xDC00 + ((code - 0x10000) & 0xfff); + destsize -= 2; + } + else + { + *p++ = code; + destsize--; + } + } + } + + if (srcend) + *srcend = src; + return p - dest; +}