help-libidn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 4/4] nfkc.c: reuse internally calculated lengths for buffer ov


From: Karlson2k
Subject: [PATCH v2 4/4] nfkc.c: reuse internally calculated lengths for buffer overrun protection, adjust internal API to public API: use ether string length OR null-termination, and do not stop processing on null char if length is specified.
Date: Mon, 4 Apr 2016 16:05:40 +0300

---
 lib/nfkc.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/lib/nfkc.c b/lib/nfkc.c
index 77ebc04..e6557ec 100644
--- a/lib/nfkc.c
+++ b/lib/nfkc.c
@@ -440,7 +440,7 @@ g_utf8_to_ucs4_fast (const gchar * str, gssize len, gsize * 
items_written)
 /*
  * g_ucs4_to_utf8:
  * @str: a UCS-4 encoded string
- * @len: the maximum length (number of characters) of @str to use.
+ * @len: the length (number of characters) of @str to use.
  *       If @len < 0, then the string is nul-terminated.
  * @items_read: location to store number of characters read, or %NULL.
  * @items_written: location to store number of bytes written or %NULL.
@@ -471,11 +471,8 @@ g_ucs4_to_utf8 (const gunichar * str,
   gsize i;
 
   result_length = 0;
-  for (i = 0; len < 0 || i < len; i++)
+  for (i = 0; (len < 0) ? (!str[i]) : (i < len); i++)
     {
-      if (!str[i])
-       break;
-
       if (str[i] > 0x10FFFF)
        goto err_out;
 
@@ -761,10 +758,12 @@ combine (gunichar a, gunichar b, gunichar * result)
   return FALSE;
 }
 
+/* result_len is in wide chars, not including terminating zero */
 static gunichar *
-_g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode)
+_g_utf8_normalize_wc (const gchar * str, gssize len, GNormalizeMode mode, 
gsize * result_len)
 {
   gsize n_wc;
+  gsize calc_len;
   gunichar *wc_buffer;
   const char *p;
   gsize last_start;
@@ -773,7 +772,7 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len, 
GNormalizeMode mode)
 
   n_wc = 0;
   p = str;
-  while ((max_len < 0 || p < str + max_len) && *p)
+  while ((len < 0) ?  *p : (p < str + len))
     {
       const gchar *decomp;
       gunichar wc = g_utf8_get_char (p);
@@ -797,6 +796,7 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len, 
GNormalizeMode mode)
       p = g_utf8_next_char (p);
     }
 
+  calc_len = p - str;
   wc_buffer = g_malloc (sizeof (gunichar) * (n_wc + 1));
   if (!wc_buffer)
     return NULL;
@@ -804,7 +804,7 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len, 
GNormalizeMode mode)
   last_start = 0;
   n_wc = 0;
   p = str;
-  while ((max_len < 0 || p < str + max_len) && *p)
+  while (p < str + calc_len)
     {
       gunichar wc = g_utf8_get_char (p);
       const gchar *decomp;
@@ -893,6 +893,8 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len, 
GNormalizeMode mode)
     }
 
   wc_buffer[n_wc] = 0;
+  if (result_len)
+    *result_len = n_wc;
 
   return wc_buffer;
 }
@@ -902,6 +904,9 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len, 
GNormalizeMode mode)
  * @str: a UTF-8 encoded string.
  * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
  * @mode: the type of normalization to perform.
+ * @result_len: location to store length of returned string
+ *              in bytes (not including terminating zero),
+ *              ignored if %NULL
  *
  * Converts a string into canonical form, standardizing
  * such issues as whether a character with an accent
@@ -934,12 +939,14 @@ _g_utf8_normalize_wc (const gchar * str, gssize max_len, 
GNormalizeMode mode)
  *   valid UTF-8.
  **/
 static gchar *
-g_utf8_normalize (const gchar * str, gssize len, GNormalizeMode mode)
+g_utf8_normalize (const gchar * str, gssize len, GNormalizeMode mode,
+                  gsize * result_len)
 {
-  gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
+  gsize wc_len;
+  gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode, &wc_len);
   gchar *result;
 
-  result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL);
+  result = g_ucs4_to_utf8 (result_wc, (gssize)wc_len, NULL, result_len);
   g_free (result_wc);
 
   return result;
@@ -1011,13 +1018,13 @@ stringprep_utf8_to_ucs4 (const char *str, ssize_t len, 
size_t * items_written)
   if (u8_check ((const uint8_t *) str, n))
     return NULL;
 
-  return g_utf8_to_ucs4_fast (str, len, items_written);
+  return g_utf8_to_ucs4_fast (str, (ssize_t)n, items_written);
 }
 
 /**
  * stringprep_ucs4_to_utf8:
  * @str: a UCS-4 encoded string
- * @len: the maximum length of @str to use. If @len < 0, then
+ * @len: the length of @str to use. If @len < 0, then
  *       the string is terminated with a 0 character.
  * @items_read: location to store number of characters read read, or %NULL.
  * @items_written: location to store number of bytes written or %NULL.
@@ -1073,7 +1080,7 @@ stringprep_utf8_nfkc_normalize (const char *str, ssize_t 
len)
   if (u8_check ((const uint8_t *) str, n))
     return NULL;
 
-  return g_utf8_normalize (str, len, G_NORMALIZE_NFKC);
+  return g_utf8_normalize (str, (ssize_t)n, G_NORMALIZE_NFKC, NULL);
 }
 
 /**
@@ -1092,9 +1099,10 @@ stringprep_ucs4_nfkc_normalize (const uint32_t * str, 
ssize_t len)
 {
   char *p;
   uint32_t *result_wc;
+  size_t wc_len;
 
-  p = stringprep_ucs4_to_utf8 (str, len, 0, 0);
-  result_wc = _g_utf8_normalize_wc (p, -1, G_NORMALIZE_NFKC);
+  p = stringprep_ucs4_to_utf8 (str, len, 0, &wc_len);
+  result_wc = _g_utf8_normalize_wc (p, (ssize_t)wc_len, G_NORMALIZE_NFKC, 
NULL);
   free (p);
 
   return result_wc;
-- 
2.8.0.windows.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]