[libmicrohttpd] 01/15: mhd_str: added functions for percent-decoding

gnunet-svn

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[libmicrohttpd] 01/15: mhd_str: added functions for percent-decoding

From:	gnunet
Subject:	[libmicrohttpd] 01/15: mhd_str: added functions for percent-decoding
Date:	Tue, 19 Jul 2022 16:51:09 +0200

This is an automated email from the git hooks/post-receive script.

karlson2k pushed a commit to branch master
in repository libmicrohttpd.

commit 420ed698d63401e4cc6d51913145067691f050b7
Author: Evgeny Grin (Karlson2k) <k2k@narod.ru>
AuthorDate: Mon Jun 20 18:25:57 2022 +0300

    mhd_str: added functions for percent-decoding
---
 src/microhttpd/mhd_str.c | 308 +++++++++++++++++++++++++++++++++++++++++++++++
 src/microhttpd/mhd_str.h |  93 ++++++++++++++
 2 files changed, 401 insertions(+)

diff --git a/src/microhttpd/mhd_str.c b/src/microhttpd/mhd_str.c
index 0c2b2f46..ef870e61 100644
--- a/src/microhttpd/mhd_str.c
+++ b/src/microhttpd/mhd_str.c
@@ -1387,6 +1387,314 @@ MHD_bin_to_hex (const void *bin,
 }
 
 
+size_t
+MHD_str_pct_decode_strict_n_ (const char *pct_encoded,
+                              size_t pct_encoded_len,
+                              char *decoded,
+                              size_t buf_size)
+{
+#ifdef MHD_FAVOR_SMALL_CODE
+  bool broken;
+  size_t res;
+
+  res = MHD_str_pct_decode_lenient_n_ (pct_encoded, pct_encoded_len, decoded,
+                                       buf_size, &broken);
+  if (broken)
+    return 0;
+  return res;
+#else  /* ! MHD_FAVOR_SMALL_CODE */
+  size_t r;
+  size_t w;
+  r = 0;
+  w = 0;
+
+  if (buf_size >= pct_encoded_len)
+  {
+    while (r < pct_encoded_len)
+    {
+      const char chr = pct_encoded[r];
+      if ('%' == chr)
+      {
+        if (2 > pct_encoded_len - r)
+          return 0;
+        else
+        {
+          const int h = toxdigitvalue (pct_encoded[++r]);
+          const int l = toxdigitvalue (pct_encoded[++r]);
+          unsigned char out;
+          if ((0 > h) || (0 > l))
+            return 0;
+          out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+                                  | ((uint8_t) ((unsigned int) l)) );
+          decoded[w] = (char) out;
+        }
+      }
+      else
+        decoded[w] = chr;
+      ++r;
+      ++w;
+    }
+    return w;
+  }
+
+  while (r < pct_encoded_len)
+  {
+    const char chr = pct_encoded[r];
+    if (w >= buf_size)
+      return 0;
+    if ('%' == chr)
+    {
+      if (2 > pct_encoded_len - r)
+        return 0;
+      else
+      {
+        const int h = toxdigitvalue (pct_encoded[++r]);
+        const int l = toxdigitvalue (pct_encoded[++r]);
+        unsigned char out;
+        if ((0 > h) || (0 > l))
+          return 0;
+        out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+                                | ((uint8_t) ((unsigned int) l)) );
+        decoded[w] = (char) out;
+      }
+    }
+    else
+      decoded[w] = chr;
+    ++r;
+    ++w;
+  }
+  return w;
+#endif /* ! MHD_FAVOR_SMALL_CODE */
+}
+
+
+size_t
+MHD_str_pct_decode_lenient_n_ (const char *pct_encoded,
+                               size_t pct_encoded_len,
+                               char *decoded,
+                               size_t buf_size,
+                               bool *broken_encoding)
+{
+  size_t r;
+  size_t w;
+  r = 0;
+  w = 0;
+  if (NULL != broken_encoding)
+    *broken_encoding = false;
+#ifndef MHD_FAVOR_SMALL_CODE
+  if (buf_size >= pct_encoded_len)
+  {
+    while (r < pct_encoded_len)
+    {
+      const char chr = pct_encoded[r];
+      if ('%' == chr)
+      {
+        if (2 > pct_encoded_len - r)
+        {
+          if (NULL != broken_encoding)
+            *broken_encoding = true;
+          decoded[w] = chr; /* Copy "as is" */
+        }
+        else
+        {
+          const int h = toxdigitvalue (pct_encoded[++r]);
+          const int l = toxdigitvalue (pct_encoded[++r]);
+          unsigned char out;
+          if ((0 > h) || (0 > l))
+          {
+            r -= 2;
+            if (NULL != broken_encoding)
+              *broken_encoding = true;
+            decoded[w] = chr; /* Copy "as is" */
+          }
+          else
+          {
+            out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+                                    | ((uint8_t) ((unsigned int) l)) );
+            decoded[w] = (char) out;
+          }
+        }
+      }
+      else
+        decoded[w] = chr;
+      ++r;
+      ++w;
+    }
+    return w;
+  }
+#endif /* ! MHD_FAVOR_SMALL_CODE */
+  while (r < pct_encoded_len)
+  {
+    const char chr = pct_encoded[r];
+    if (w >= buf_size)
+      return 0;
+    if ('%' == chr)
+    {
+      if (2 > pct_encoded_len - r)
+      {
+        if (NULL != broken_encoding)
+          *broken_encoding = true;
+        decoded[w] = chr; /* Copy "as is" */
+      }
+      else
+      {
+        const int h = toxdigitvalue (pct_encoded[++r]);
+        const int l = toxdigitvalue (pct_encoded[++r]);
+        if ((0 > h) || (0 > l))
+        {
+          r -= 2;
+          if (NULL != broken_encoding)
+            *broken_encoding = true;
+          decoded[w] = chr; /* Copy "as is" */
+        }
+        else
+        {
+          unsigned char out;
+          out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+                                  | ((uint8_t) ((unsigned int) l)) );
+          decoded[w] = (char) out;
+        }
+      }
+    }
+    else
+      decoded[w] = chr;
+    ++r;
+    ++w;
+  }
+  return w;
+}
+
+
+size_t
+MHD_str_pct_decode_in_place_strict_ (char *str)
+{
+#ifdef MHD_FAVOR_SMALL_CODE
+  size_t res;
+  bool broken;
+
+  res = MHD_str_pct_decode_in_place_lenient_ (str, &broken);
+  if (broken)
+  {
+    res = 0;
+    str[0] = 0;
+  }
+  return res;
+#else  /* ! MHD_FAVOR_SMALL_CODE */
+  size_t r;
+  size_t w;
+  r = 0;
+  w = 0;
+
+  while (0 != str[r])
+  {
+    const char chr = str[r++];
+    if ('%' == chr)
+    {
+      const char d1 = str[r++];
+      if (0 == d1)
+        return 0;
+      else
+      {
+        const char d2 = str[r++];
+        if (0 == d2)
+          return 0;
+        else
+        {
+          const int h = toxdigitvalue (d1);
+          const int l = toxdigitvalue (d2);
+          unsigned char out;
+          if ((0 > h) || (0 > l))
+            return 0;
+          out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+                                  | ((uint8_t) ((unsigned int) l)) );
+          str[w++] = (char) out;
+        }
+      }
+    }
+    else
+      str[w++] = chr;
+  }
+  str[w] = 0;
+  return w;
+#endif /* ! MHD_FAVOR_SMALL_CODE */
+}
+
+
+size_t
+MHD_str_pct_decode_in_place_lenient_ (char *str,
+                                      bool *broken_encoding)
+{
+#ifdef MHD_FAVOR_SMALL_CODE
+  size_t len;
+  size_t res;
+
+  len = strlen (str);
+  res = MHD_str_pct_decode_lenient_n_ (str, len, str, len, broken_encoding);
+  str[res] = 0;
+
+  return res;
+#else  /* ! MHD_FAVOR_SMALL_CODE */
+  size_t r;
+  size_t w;
+  if (NULL != broken_encoding)
+    *broken_encoding = false;
+  r = 0;
+  w = 0;
+  while (0 != str[r])
+  {
+    const char chr = str[r++];
+    if ('%' == chr)
+    {
+      const char d1 = str[r++];
+      if (0 == d1)
+      {
+        if (NULL != broken_encoding)
+          *broken_encoding = true;
+        str[w++] = chr; /* Copy "as is" */
+        str[w] = 0;
+        return w;
+      }
+      else
+      {
+        const char d2 = str[r++];
+        if (0 == d2)
+        {
+          if (NULL != broken_encoding)
+            *broken_encoding = true;
+          str[w++] = chr; /* Copy "as is" */
+          str[w++] = d1; /* Copy "as is" */
+          str[w] = 0;
+          return w;
+        }
+        else
+        {
+          const int h = toxdigitvalue (d1);
+          const int l = toxdigitvalue (d2);
+          unsigned char out;
+          if ((0 > h) || (0 > l))
+          {
+            if (NULL != broken_encoding)
+              *broken_encoding = true;
+            str[w++] = chr; /* Copy "as is" */
+            str[w++] = d1;
+            str[w++] = d2;
+            continue;
+          }
+          out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+                                  | ((uint8_t) ((unsigned int) l)) );
+          str[w++] = (char) out;
+          continue;
+        }
+      }
+    }
+    str[w++] = chr;
+  }
+  str[w] = 0;
+  return w;
+#endif /* ! MHD_FAVOR_SMALL_CODE */
+}
+
+
 #ifdef DAUTH_SUPPORT
 bool
 MHD_str_equal_quoted_bin_n (const char *quoted,
diff --git a/src/microhttpd/mhd_str.h b/src/microhttpd/mhd_str.h
index 9e3a0334..ec8b5cf4 100644
--- a/src/microhttpd/mhd_str.h
+++ b/src/microhttpd/mhd_str.h
@@ -493,6 +493,99 @@ MHD_bin_to_hex (const void *bin,
                 size_t size,
                 char *hex);
 
+/**
+ * Decode string with percent-encoded characters as defined by
+ * RFC 3986 #section-2.1.
+ *
+ * This function decode string by converting percent-encoded characters to
+ * their decoded versions and copying all other characters without extra
+ * processing.
+ *
+ * @param pct_encoded the input string to be decoded
+ * @param pct_encoded_len the length of the @a pct_encoded
+ * @param[out] decoded the output buffer, NOT zero-terminated, can point
+ *                     to the same buffer as @a pct_encoded
+ * @param buf_size the size of the output buffer
+ * @return the number of characters written to the output buffer or
+ *         zero if any percent-encoded characters is broken ('%' followed
+ *         by less than two hexadecimal digits) or output buffer is too
+ *         small to hold the result
+ */
+size_t
+MHD_str_pct_decode_strict_n_ (const char *pct_encoded,
+                              size_t pct_encoded_len,
+                              char *decoded,
+                              size_t buf_size);
+
+/**
+ * Decode string with percent-encoded characters as defined by
+ * RFC 3986 #section-2.1.
+ *
+ * This function decode string by converting percent-encoded characters to
+ * their decoded versions and copying all other characters without extra
+ * processing.
+ *
+ * Any invalid percent-encoding sequences ('%' symbol not followed by two
+ * valid hexadecimal digits) are copied to the output string without decoding.
+ *
+ * @param pct_encoded the input string to be decoded
+ * @param pct_encoded_len the length of the @a pct_encoded
+ * @param[out] decoded the output buffer, NOT zero-terminated, can point
+ *                     to the same buffer as @a pct_encoded
+ * @param buf_size the size of the output buffer
+ * @param[out] broken_encoding will be set to true if any '%' symbol is not
+ *                             followed by two valid hexadecimal digits,
+ *                             optional, can be NULL
+ * @return the number of characters written to the output buffer or
+ *         zero if output buffer is too small to hold the result
+ */
+size_t
+MHD_str_pct_decode_lenient_n_ (const char *pct_encoded,
+                               size_t pct_encoded_len,
+                               char *decoded,
+                               size_t buf_size,
+                               bool *broken_encoding);
+
+
+/**
+ * Decode string in-place with percent-encoded characters as defined by
+ * RFC 3986 #section-2.1.
+ *
+ * This function decode string by converting percent-encoded characters to
+ * their decoded versions and copying back all other characters without extra
+ * processing.
+ *
+ * @param[in,out] str the string to be updated in-place, must be 
zero-terminated
+ *                    on input, the output is zero-terminated; the string is
+ *                    truncated to zero length if broken encoding is found
+ * @return the number of character in decoded string
+ */
+size_t
+MHD_str_pct_decode_in_place_strict_ (char *str);
+
+
+/**
+ * Decode string in-place with percent-encoded characters as defined by
+ * RFC 3986 #section-2.1.
+ *
+ * This function decode string by converting percent-encoded characters to
+ * their decoded versions and copying back all other characters without extra
+ * processing.
+ *
+ * Any invalid percent-encoding sequences ('%' symbol not followed by two
+ * valid hexadecimal digits) are copied to the output string without decoding.
+ *
+ * @param[in,out] str the string to be updated in-place, must be 
zero-terminated
+ *                    on input, the output is zero-terminated
+ * @param[out] broken_encoding will be set to true if any '%' symbol is not
+ *                             followed by two valid hexadecimal digits,
+ *                             optional, can be NULL
+ * @return the number of character in decoded string
+ */
+size_t
+MHD_str_pct_decode_in_place_lenient_ (char *str,
+                                      bool *broken_encoding);
+
 #ifdef DAUTH_SUPPORT
 /**
  * Check two strings for equality, "unquoting" the first string from quoted

-- 
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.

[Prev in Thread]

Current Thread

[Next in Thread]

[libmicrohttpd] branch master updated (d629ada1 -> 76b5b195), gnunet, 2022/07/19
- [libmicrohttpd] 03/15: Use new functions for decode request URLs, gnunet, 2022/07/19
- [libmicrohttpd] 01/15: mhd_str: added functions for percent-decoding, gnunet <=
- [libmicrohttpd] 02/15: Added tests for percent-decoding functions, gnunet, 2022/07/19
- [libmicrohttpd] 05/15: Added tests for hex <-> binary functions, gnunet, 2022/07/19
- [libmicrohttpd] 12/15: microhttpd.h: added special enum for hash types, gnunet, 2022/07/19
- [libmicrohttpd] 04/15: mhd_str: added MHD_hex_to_bin() internal function, gnunet, 2022/07/19
- [libmicrohttpd] 09/15: Digest: check whether all required parameters are present before doing heavy calculations, gnunet, 2022/07/19
- [libmicrohttpd] 06/15: mhd_str: added macros for simple comparison against static strings, gnunet, 2022/07/19
- [libmicrohttpd] 08/15: Digest: use binary zero to separate get params in digest, gnunet, 2022/07/19
- [libmicrohttpd] 07/15: Added new functions MHD_digest_auth_get_request_info3() and MHD_digest_auth_get_username3(), gnunet, 2022/07/19
- [libmicrohttpd] 15/15: microhttpd.h: formatted some deprecation warnings, gnunet, 2022/07/19
- [libmicrohttpd] 10/15: Digest: moved URI match check to separate function, avoid one memcpy(), gnunet, 2022/07/19

Prev by Date: [libmicrohttpd] 03/15: Use new functions for decode request URLs
Next by Date: [libmicrohttpd] 02/15: Added tests for percent-decoding functions
Previous by thread: [libmicrohttpd] 03/15: Use new functions for decode request URLs
Next by thread: [libmicrohttpd] 02/15: Added tests for percent-decoding functions
Index(es):
- Date
- Thread