diff --git a/ChangeLog b/ChangeLog index ab542e2..367ab05 100644 --- a/ChangeLog +++ b/ChangeLog @@ -158,6 +158,17 @@ 2009-08-24 Vladimir Serbinenko + UTF-8 to UTF-16 transformation. + + * conf/common.rmk (pkglib_MODULES): Add utf.mod + (utf_mod_SOURCES): New variable. + (utf_mod_CFLAGS): Likewise. + (utf_mod_LDFLAGS): Likewise. + * include/grub/utf.h: New file. + * lib/utf.c: New file. (Based on grub_utf8_to_ucs4 from kern/misc.c) + +2009-08-24 Vladimir Serbinenko + * script/sh/function.c (grub_script_function_find): Cut error message not to flood terminal. * script/sh/lexer.c (grub_script_yylex): Remove command line length diff --git a/conf/common.rmk b/conf/common.rmk index 7727f19..735e57a 100644 --- a/conf/common.rmk +++ b/conf/common.rmk @@ -633,3 +633,8 @@ pkglib_MODULES += setjmp.mod setjmp_mod_SOURCES = lib/$(target_cpu)/setjmp.S setjmp_mod_ASFLAGS = $(COMMON_ASFLAGS) setjmp_mod_LDFLAGS = $(COMMON_LDFLAGS) + +pkglib_MODULES += charset.mod +charset_mod_SOURCES = lib/charset.c +charset_mod_CFLAGS = $(COMMON_CFLAGS) +charset_mod_LDFLAGS = $(COMMON_LDFLAGS) diff --git a/include/grub/charset.h b/include/grub/charset.h new file mode 100644 index 0000000..22b6724 --- /dev/null +++ b/include/grub/charset.h @@ -0,0 +1,50 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 2009 Free Software Foundation, Inc. + * + * GRUB is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GRUB is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GRUB. If not, see . + */ + +#ifndef GRUB_CHARSET_HEADER +#define GRUB_CHARSET_HEADER 1 + +#include + +#define GRUB_UINT8_1_LEADINGBIT 0x80 +#define GRUB_UINT8_2_LEADINGBITS 0xc0 +#define GRUB_UINT8_3_LEADINGBITS 0xe0 +#define GRUB_UINT8_4_LEADINGBITS 0xf0 +#define GRUB_UINT8_5_LEADINGBITS 0xf8 +#define GRUB_UINT8_6_LEADINGBITS 0xfc +#define GRUB_UINT8_7_LEADINGBITS 0xfe + +#define GRUB_UINT8_1_TRAILINGBIT 0x01 +#define GRUB_UINT8_2_TRAILINGBITS 0x03 +#define GRUB_UINT8_3_TRAILINGBITS 0x07 +#define GRUB_UINT8_4_TRAILINGBITS 0x0f +#define GRUB_UINT8_5_TRAILINGBITS 0x1f +#define GRUB_UINT8_6_TRAILINGBITS 0x3f + +#define GRUB_UCS2_LIMIT 0x10000 +#define GRUB_UTF16_UPPER_SURROGATE(code) \ + (0xD800 + ((((code) - GRUB_UCS2_LIMIT) >> 12) & 0xfff)) +#define GRUB_UTF16_LOWER_SURROGATE(code) \ + (0xDC00 + (((code) - GRUB_UCS2_LIMIT) & 0xfff)) + +grub_ssize_t +grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize, + const grub_uint8_t *src, grub_size_t srcsize, + const grub_uint8_t **srcend); + +#endif diff --git a/lib/charset.c b/lib/charset.c new file mode 100644 index 0000000..8bc5b91 --- /dev/null +++ b/lib/charset.c @@ -0,0 +1,116 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc. + * + * GRUB is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GRUB is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GRUB. If not, see . + */ + +/* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE + bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string. + Return the number of characters converted. DEST must be able to hold + at least DESTSIZE characters. If an invalid sequence is found, return -1. + If SRCEND is not NULL, then *SRCEND is set to the next byte after the + last byte used in SRC. */ + +#include + +grub_ssize_t +grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize, + const grub_uint8_t *src, grub_size_t srcsize, + const grub_uint8_t **srcend) +{ + grub_uint16_t *p = dest; + int count = 0; + grub_uint32_t code = 0; + + if (srcend) + *srcend = src; + + while (srcsize && destsize) + { + grub_uint32_t c = *src++; + if (srcsize != (grub_size_t)-1) + srcsize--; + if (count) + { + if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT) + { + /* invalid */ + return -1; + } + else + { + code <<= 6; + code |= (c & GRUB_UINT8_6_TRAILINGBITS); + count--; + } + } + else + { + if (c == 0) + break; + + if ((c & GRUB_UINT8_1_LEADINGBIT) == 0) + code = c; + else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS) + { + count = 1; + code = c & GRUB_UINT8_5_TRAILINGBITS; + } + else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS) + { + count = 2; + code = c & GRUB_UINT8_4_TRAILINGBITS; + } + else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS) + { + count = 3; + code = c & GRUB_UINT8_3_TRAILINGBITS; + } + else if ((c & GRUB_UINT8_6_LEADINGBITS) == GRUB_UINT8_5_LEADINGBITS) + { + count = 4; + code = c & GRUB_UINT8_2_TRAILINGBITS; + } + else if ((c & GRUB_UINT8_7_LEADINGBITS) == GRUB_UINT8_6_LEADINGBITS) + { + count = 5; + code = c & GRUB_UINT8_1_TRAILINGBIT; + } + else + return -1; + } + + if (count == 0) + { + if (destsize < 2 && code >= GRUB_UCS2_LIMIT) + break; + if (code >= GRUB_UCS2_LIMIT) + { + *p++ = GRUB_UTF16_UPPER_SURROGATE (code); + *p++ = GRUB_UTF16_LOWER_SURROGATE (code); + destsize -= 2; + } + else + { + *p++ = code; + destsize--; + } + } + } + + if (srcend) + *srcend = src; + return p - dest; +}