[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[libmicrohttpd] 01/15: mhd_str: added functions for percent-decoding
From: |
gnunet |
Subject: |
[libmicrohttpd] 01/15: mhd_str: added functions for percent-decoding |
Date: |
Tue, 19 Jul 2022 16:51:09 +0200 |
This is an automated email from the git hooks/post-receive script.
karlson2k pushed a commit to branch master
in repository libmicrohttpd.
commit 420ed698d63401e4cc6d51913145067691f050b7
Author: Evgeny Grin (Karlson2k) <k2k@narod.ru>
AuthorDate: Mon Jun 20 18:25:57 2022 +0300
mhd_str: added functions for percent-decoding
---
src/microhttpd/mhd_str.c | 308 +++++++++++++++++++++++++++++++++++++++++++++++
src/microhttpd/mhd_str.h | 93 ++++++++++++++
2 files changed, 401 insertions(+)
diff --git a/src/microhttpd/mhd_str.c b/src/microhttpd/mhd_str.c
index 0c2b2f46..ef870e61 100644
--- a/src/microhttpd/mhd_str.c
+++ b/src/microhttpd/mhd_str.c
@@ -1387,6 +1387,314 @@ MHD_bin_to_hex (const void *bin,
}
+size_t
+MHD_str_pct_decode_strict_n_ (const char *pct_encoded,
+ size_t pct_encoded_len,
+ char *decoded,
+ size_t buf_size)
+{
+#ifdef MHD_FAVOR_SMALL_CODE
+ bool broken;
+ size_t res;
+
+ res = MHD_str_pct_decode_lenient_n_ (pct_encoded, pct_encoded_len, decoded,
+ buf_size, &broken);
+ if (broken)
+ return 0;
+ return res;
+#else /* ! MHD_FAVOR_SMALL_CODE */
+ size_t r;
+ size_t w;
+ r = 0;
+ w = 0;
+
+ if (buf_size >= pct_encoded_len)
+ {
+ while (r < pct_encoded_len)
+ {
+ const char chr = pct_encoded[r];
+ if ('%' == chr)
+ {
+ if (2 > pct_encoded_len - r)
+ return 0;
+ else
+ {
+ const int h = toxdigitvalue (pct_encoded[++r]);
+ const int l = toxdigitvalue (pct_encoded[++r]);
+ unsigned char out;
+ if ((0 > h) || (0 > l))
+ return 0;
+ out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+ | ((uint8_t) ((unsigned int) l)) );
+ decoded[w] = (char) out;
+ }
+ }
+ else
+ decoded[w] = chr;
+ ++r;
+ ++w;
+ }
+ return w;
+ }
+
+ while (r < pct_encoded_len)
+ {
+ const char chr = pct_encoded[r];
+ if (w >= buf_size)
+ return 0;
+ if ('%' == chr)
+ {
+ if (2 > pct_encoded_len - r)
+ return 0;
+ else
+ {
+ const int h = toxdigitvalue (pct_encoded[++r]);
+ const int l = toxdigitvalue (pct_encoded[++r]);
+ unsigned char out;
+ if ((0 > h) || (0 > l))
+ return 0;
+ out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+ | ((uint8_t) ((unsigned int) l)) );
+ decoded[w] = (char) out;
+ }
+ }
+ else
+ decoded[w] = chr;
+ ++r;
+ ++w;
+ }
+ return w;
+#endif /* ! MHD_FAVOR_SMALL_CODE */
+}
+
+
+size_t
+MHD_str_pct_decode_lenient_n_ (const char *pct_encoded,
+ size_t pct_encoded_len,
+ char *decoded,
+ size_t buf_size,
+ bool *broken_encoding)
+{
+ size_t r;
+ size_t w;
+ r = 0;
+ w = 0;
+ if (NULL != broken_encoding)
+ *broken_encoding = false;
+#ifndef MHD_FAVOR_SMALL_CODE
+ if (buf_size >= pct_encoded_len)
+ {
+ while (r < pct_encoded_len)
+ {
+ const char chr = pct_encoded[r];
+ if ('%' == chr)
+ {
+ if (2 > pct_encoded_len - r)
+ {
+ if (NULL != broken_encoding)
+ *broken_encoding = true;
+ decoded[w] = chr; /* Copy "as is" */
+ }
+ else
+ {
+ const int h = toxdigitvalue (pct_encoded[++r]);
+ const int l = toxdigitvalue (pct_encoded[++r]);
+ unsigned char out;
+ if ((0 > h) || (0 > l))
+ {
+ r -= 2;
+ if (NULL != broken_encoding)
+ *broken_encoding = true;
+ decoded[w] = chr; /* Copy "as is" */
+ }
+ else
+ {
+ out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+ | ((uint8_t) ((unsigned int) l)) );
+ decoded[w] = (char) out;
+ }
+ }
+ }
+ else
+ decoded[w] = chr;
+ ++r;
+ ++w;
+ }
+ return w;
+ }
+#endif /* ! MHD_FAVOR_SMALL_CODE */
+ while (r < pct_encoded_len)
+ {
+ const char chr = pct_encoded[r];
+ if (w >= buf_size)
+ return 0;
+ if ('%' == chr)
+ {
+ if (2 > pct_encoded_len - r)
+ {
+ if (NULL != broken_encoding)
+ *broken_encoding = true;
+ decoded[w] = chr; /* Copy "as is" */
+ }
+ else
+ {
+ const int h = toxdigitvalue (pct_encoded[++r]);
+ const int l = toxdigitvalue (pct_encoded[++r]);
+ if ((0 > h) || (0 > l))
+ {
+ r -= 2;
+ if (NULL != broken_encoding)
+ *broken_encoding = true;
+ decoded[w] = chr; /* Copy "as is" */
+ }
+ else
+ {
+ unsigned char out;
+ out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+ | ((uint8_t) ((unsigned int) l)) );
+ decoded[w] = (char) out;
+ }
+ }
+ }
+ else
+ decoded[w] = chr;
+ ++r;
+ ++w;
+ }
+ return w;
+}
+
+
+size_t
+MHD_str_pct_decode_in_place_strict_ (char *str)
+{
+#ifdef MHD_FAVOR_SMALL_CODE
+ size_t res;
+ bool broken;
+
+ res = MHD_str_pct_decode_in_place_lenient_ (str, &broken);
+ if (broken)
+ {
+ res = 0;
+ str[0] = 0;
+ }
+ return res;
+#else /* ! MHD_FAVOR_SMALL_CODE */
+ size_t r;
+ size_t w;
+ r = 0;
+ w = 0;
+
+ while (0 != str[r])
+ {
+ const char chr = str[r++];
+ if ('%' == chr)
+ {
+ const char d1 = str[r++];
+ if (0 == d1)
+ return 0;
+ else
+ {
+ const char d2 = str[r++];
+ if (0 == d2)
+ return 0;
+ else
+ {
+ const int h = toxdigitvalue (d1);
+ const int l = toxdigitvalue (d2);
+ unsigned char out;
+ if ((0 > h) || (0 > l))
+ return 0;
+ out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+ | ((uint8_t) ((unsigned int) l)) );
+ str[w++] = (char) out;
+ }
+ }
+ }
+ else
+ str[w++] = chr;
+ }
+ str[w] = 0;
+ return w;
+#endif /* ! MHD_FAVOR_SMALL_CODE */
+}
+
+
+size_t
+MHD_str_pct_decode_in_place_lenient_ (char *str,
+ bool *broken_encoding)
+{
+#ifdef MHD_FAVOR_SMALL_CODE
+ size_t len;
+ size_t res;
+
+ len = strlen (str);
+ res = MHD_str_pct_decode_lenient_n_ (str, len, str, len, broken_encoding);
+ str[res] = 0;
+
+ return res;
+#else /* ! MHD_FAVOR_SMALL_CODE */
+ size_t r;
+ size_t w;
+ if (NULL != broken_encoding)
+ *broken_encoding = false;
+ r = 0;
+ w = 0;
+ while (0 != str[r])
+ {
+ const char chr = str[r++];
+ if ('%' == chr)
+ {
+ const char d1 = str[r++];
+ if (0 == d1)
+ {
+ if (NULL != broken_encoding)
+ *broken_encoding = true;
+ str[w++] = chr; /* Copy "as is" */
+ str[w] = 0;
+ return w;
+ }
+ else
+ {
+ const char d2 = str[r++];
+ if (0 == d2)
+ {
+ if (NULL != broken_encoding)
+ *broken_encoding = true;
+ str[w++] = chr; /* Copy "as is" */
+ str[w++] = d1; /* Copy "as is" */
+ str[w] = 0;
+ return w;
+ }
+ else
+ {
+ const int h = toxdigitvalue (d1);
+ const int l = toxdigitvalue (d2);
+ unsigned char out;
+ if ((0 > h) || (0 > l))
+ {
+ if (NULL != broken_encoding)
+ *broken_encoding = true;
+ str[w++] = chr; /* Copy "as is" */
+ str[w++] = d1;
+ str[w++] = d2;
+ continue;
+ }
+ out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+ | ((uint8_t) ((unsigned int) l)) );
+ str[w++] = (char) out;
+ continue;
+ }
+ }
+ }
+ str[w++] = chr;
+ }
+ str[w] = 0;
+ return w;
+#endif /* ! MHD_FAVOR_SMALL_CODE */
+}
+
+
#ifdef DAUTH_SUPPORT
bool
MHD_str_equal_quoted_bin_n (const char *quoted,
diff --git a/src/microhttpd/mhd_str.h b/src/microhttpd/mhd_str.h
index 9e3a0334..ec8b5cf4 100644
--- a/src/microhttpd/mhd_str.h
+++ b/src/microhttpd/mhd_str.h
@@ -493,6 +493,99 @@ MHD_bin_to_hex (const void *bin,
size_t size,
char *hex);
+/**
+ * Decode string with percent-encoded characters as defined by
+ * RFC 3986 #section-2.1.
+ *
+ * This function decode string by converting percent-encoded characters to
+ * their decoded versions and copying all other characters without extra
+ * processing.
+ *
+ * @param pct_encoded the input string to be decoded
+ * @param pct_encoded_len the length of the @a pct_encoded
+ * @param[out] decoded the output buffer, NOT zero-terminated, can point
+ * to the same buffer as @a pct_encoded
+ * @param buf_size the size of the output buffer
+ * @return the number of characters written to the output buffer or
+ * zero if any percent-encoded characters is broken ('%' followed
+ * by less than two hexadecimal digits) or output buffer is too
+ * small to hold the result
+ */
+size_t
+MHD_str_pct_decode_strict_n_ (const char *pct_encoded,
+ size_t pct_encoded_len,
+ char *decoded,
+ size_t buf_size);
+
+/**
+ * Decode string with percent-encoded characters as defined by
+ * RFC 3986 #section-2.1.
+ *
+ * This function decode string by converting percent-encoded characters to
+ * their decoded versions and copying all other characters without extra
+ * processing.
+ *
+ * Any invalid percent-encoding sequences ('%' symbol not followed by two
+ * valid hexadecimal digits) are copied to the output string without decoding.
+ *
+ * @param pct_encoded the input string to be decoded
+ * @param pct_encoded_len the length of the @a pct_encoded
+ * @param[out] decoded the output buffer, NOT zero-terminated, can point
+ * to the same buffer as @a pct_encoded
+ * @param buf_size the size of the output buffer
+ * @param[out] broken_encoding will be set to true if any '%' symbol is not
+ * followed by two valid hexadecimal digits,
+ * optional, can be NULL
+ * @return the number of characters written to the output buffer or
+ * zero if output buffer is too small to hold the result
+ */
+size_t
+MHD_str_pct_decode_lenient_n_ (const char *pct_encoded,
+ size_t pct_encoded_len,
+ char *decoded,
+ size_t buf_size,
+ bool *broken_encoding);
+
+
+/**
+ * Decode string in-place with percent-encoded characters as defined by
+ * RFC 3986 #section-2.1.
+ *
+ * This function decode string by converting percent-encoded characters to
+ * their decoded versions and copying back all other characters without extra
+ * processing.
+ *
+ * @param[in,out] str the string to be updated in-place, must be
zero-terminated
+ * on input, the output is zero-terminated; the string is
+ * truncated to zero length if broken encoding is found
+ * @return the number of character in decoded string
+ */
+size_t
+MHD_str_pct_decode_in_place_strict_ (char *str);
+
+
+/**
+ * Decode string in-place with percent-encoded characters as defined by
+ * RFC 3986 #section-2.1.
+ *
+ * This function decode string by converting percent-encoded characters to
+ * their decoded versions and copying back all other characters without extra
+ * processing.
+ *
+ * Any invalid percent-encoding sequences ('%' symbol not followed by two
+ * valid hexadecimal digits) are copied to the output string without decoding.
+ *
+ * @param[in,out] str the string to be updated in-place, must be
zero-terminated
+ * on input, the output is zero-terminated
+ * @param[out] broken_encoding will be set to true if any '%' symbol is not
+ * followed by two valid hexadecimal digits,
+ * optional, can be NULL
+ * @return the number of character in decoded string
+ */
+size_t
+MHD_str_pct_decode_in_place_lenient_ (char *str,
+ bool *broken_encoding);
+
#ifdef DAUTH_SUPPORT
/**
* Check two strings for equality, "unquoting" the first string from quoted
--
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.
- [libmicrohttpd] branch master updated (d629ada1 -> 76b5b195), gnunet, 2022/07/19
- [libmicrohttpd] 03/15: Use new functions for decode request URLs, gnunet, 2022/07/19
- [libmicrohttpd] 01/15: mhd_str: added functions for percent-decoding,
gnunet <=
- [libmicrohttpd] 02/15: Added tests for percent-decoding functions, gnunet, 2022/07/19
- [libmicrohttpd] 05/15: Added tests for hex <-> binary functions, gnunet, 2022/07/19
- [libmicrohttpd] 12/15: microhttpd.h: added special enum for hash types, gnunet, 2022/07/19
- [libmicrohttpd] 04/15: mhd_str: added MHD_hex_to_bin() internal function, gnunet, 2022/07/19
- [libmicrohttpd] 09/15: Digest: check whether all required parameters are present before doing heavy calculations, gnunet, 2022/07/19
- [libmicrohttpd] 06/15: mhd_str: added macros for simple comparison against static strings, gnunet, 2022/07/19
- [libmicrohttpd] 08/15: Digest: use binary zero to separate get params in digest, gnunet, 2022/07/19
- [libmicrohttpd] 07/15: Added new functions MHD_digest_auth_get_request_info3() and MHD_digest_auth_get_username3(), gnunet, 2022/07/19
- [libmicrohttpd] 15/15: microhttpd.h: formatted some deprecation warnings, gnunet, 2022/07/19
- [libmicrohttpd] 10/15: Digest: moved URI match check to separate function, avoid one memcpy(), gnunet, 2022/07/19