>From b062d348e50ac1a7789e1f322de9981dd2fb3947 Mon Sep 17 00:00:00 2001
From: Matthew White
Date: Wed, 24 Aug 2016 12:43:24 +0200
Subject: [PATCH 16/25] Bugfix: Remove surrounding quotes from Metalink/HTTP
key's value
* src/metalink.h: Add declaration of function dequote_metalink_string()
* src/metalink.c: Add function dequote_metalink_string() remove
surrounding quotes from string, \' or \"
* src/metalink.c (find_key_value, find_key_values): Call dequote_metalink_string()
to remove the surrounding quotes from the parsed value
* src/metalink.c (test_find_key_value, test_find_key_values): Add
quoted key's values for unit-tests
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-http-quoted.py: New file. Metalink/HTTP quoted
values tests
Some Metalink/HTTP keys, like "type" [2], may have a quoted value [1]:
Link: ; rel=describedby;
type="application/metalink4+xml"
Wget was expecting a dequoted value from the Metalink module. This
patch addresses this problem.
References:
[1] Metalink/HTTP: Mirrors and Hashes
1.1. Example Metalink Server Response
https://tools.ietf.org/html/rfc6249#section-1.1
[2] Additional Link Relations
6. "type"
https://tools.ietf.org/html/rfc6903#section-6
---
src/metalink.c | 49 ++++++++++++--
src/metalink.h | 1 +
testenv/Makefile.am | 1 +
testenv/Test-metalink-http-quoted.py | 127 +++++++++++++++++++++++++++++++++++
4 files changed, 171 insertions(+), 7 deletions(-)
create mode 100755 testenv/Test-metalink-http-quoted.py
diff --git a/src/metalink.c b/src/metalink.c
index 01d824b..b23efad 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -819,6 +819,39 @@ clean_metalink_string (char **str)
*str = new;
}
+/*
+ Remove the quotation surrounding a string.
+
+ The string is permanently modified.
+ */
+void
+dequote_metalink_string (char **str)
+{
+ char *new, *beg, *end;
+ size_t str_len, new_len;
+
+ if (!str || !*str)
+ return;
+
+ str_len = strlen (*str); /* current string length */
+
+ if (str_len < 2)
+ return;
+
+ new_len = str_len - 2; /* predict dequoted length */
+
+ beg = *str; /* begin of current string */
+ end = *str + (str_len - 1); /* end of current string */
+
+ /* Verify if the current string is surrounded by quotes. */
+ if (!(*beg == '\"' && *end == '\"') && !(*beg == '\'' && *end == '\''))
+ return;
+
+ new = xmemdup0 (beg + 1, new_len);
+ xfree (*str);
+ *str = new;
+}
+
/* Append the suffix ".badhash" to the file NAME, except without
overwriting an existing file with that name and suffix. */
void
@@ -966,6 +999,7 @@ find_key_value (const char *start, const char *end, const char *key, char **valu
while (val_end < end && *val_end != ';' && !c_isspace (*val_end))
val_end++;
*value = xstrndup (val_beg, val_end - val_beg);
+ dequote_metalink_string (value);
return true;
}
}
@@ -1066,6 +1100,7 @@ find_key_values (const char *start, const char *end, char **key, char **value)
*key = xstrndup (key_start, key_end - key_start);
*value = xstrndup (val_start, val_end - val_start);
+ dequote_metalink_string (value);
/* Skip trailing whitespaces. */
while (val_end < end && c_isspace (*val_end))
@@ -1078,10 +1113,10 @@ find_key_values (const char *start, const char *end, char **key, char **value)
const char *
test_find_key_values (void)
{
- static const char *header_data = "key1=val1;key2=val2 ;key3=val3; key4=val4"\
- " ; key5=val5;key6 =val6;key7= val7; "\
- "key8 = val8 ; key9 = val9 "\
- " ,key10= val10,key11,key12=val12";
+ static const char *header_data = "key1=val1;key2=\"val2\" ;key3=val3; key4=val4"\
+ " ; key5=val5;key6 ='val6';key7= val7; "\
+ "key8 = val8 ; key9 = \"val9\" "\
+ " ,key10= 'val10',key11,key12=val12";
static const struct
{
const char *key;
@@ -1122,9 +1157,9 @@ test_find_key_values (void)
const char *
test_find_key_value (void)
{
- static const char *header_data = "key1=val1;key2=val2 ;key3=val3; key4=val4"\
- " ; key5=val5;key6 =val6;key7= val7; "\
- "key8 = val8 ; key9 = val9 ";
+ static const char *header_data = "key1=val1;key2=val2 ;key3='val3'; key4=val4"\
+ " ; key5='val5';key6 =val6;key7= \"val7\"; "\
+ "key8 = \"val8\" ; key9 = val9 ";
static const struct
{
const char *key;
diff --git a/src/metalink.h b/src/metalink.h
index 3244b83..e15cbda 100644
--- a/src/metalink.h
+++ b/src/metalink.h
@@ -52,6 +52,7 @@ int metalink_check_safe_path(const char *path);
char *get_metalink_basename (char *name);
void append_suffix_number (char **str, const char *sep, wgint num);
void clean_metalink_string (char **str);
+void dequote_metalink_string (char **str);
void badhash_suffix (char *name);
void badhash_or_remove (char *name);
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index a896cca..8272734 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -28,6 +28,7 @@
if METALINK_IS_ENABLED
METALINK_TESTS = Test-metalink-http.py \
+ Test-metalink-http-quoted.py \
Test-metalink-xml.py \
Test-metalink-xml-relpath.py \
Test-metalink-xml-abspath.py \
diff --git a/testenv/Test-metalink-http-quoted.py b/testenv/Test-metalink-http-quoted.py
new file mode 100755
index 0000000..836b836
--- /dev/null
+++ b/testenv/Test-metalink-http-quoted.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import re
+import hashlib
+from base64 import b64encode
+
+"""
+ This is to test Metalink/HTTP quoted values support in Wget.
+"""
+
+# Helper function for hostname, port and digest substitution
+def SubstituteServerInfo (text, host, port, digest):
+ text = re.sub (r'{{FILE1_HASH}}', digest, text)
+ text = re.sub (r'{{SRV_HOST}}', host, text)
+ text = re.sub (r'{{SRV_PORT}}', str (port), text)
+ return text
+
+############# File Definitions ###############################################
+File1 = "Would you like some Tea?"
+File1_corrupted = "Would you like some Coffee?"
+File1_lowPref = "Do not take this"
+File1_sha256 = b64encode (hashlib.sha256 (File1.encode ('UTF-8')).digest ()).decode ('ascii')
+Signature = '''-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.0.7 (GNU/Linux)
+
+This is no valid signature. But it should be downloaded.
+The attempt to verify should fail but should not prevent
+a successful metalink resource retrieval (the sig failure
+should not be fatal).
+-----END PGP SIGNATURE-----
+'''
+File2 = "No meta data for this file."
+
+LinkHeaders = [
+ # This file has low priority and should not be picked.
+ "; rel=\"duplicate\"; pri=\"9\"; geo=\"pl\"",
+ # This file should be picked second, after hash failure.
+ "; rel =\"duplicate\";pref; pri=\"7\"",
+ # This signature download will fail.
+ "; rel=\"describedby\"; type=\"application/pgp-signature\"",
+ # Two good signatures
+ "; rel=\"describedby\"; type=\"application/pgp-signature\"",
+ "; rel=\"describedby\"; type=\"application/pgp-signature\"",
+ # Bad URL scheme
+ "; rel=\"duplicate\"; pri=\"4\"",
+ # rel missing
+ "; pri=\"1\"; pref",
+ # invalid rel
+ "; rel=\"strange\"; pri=\"4\"",
+ # This file should be picked first, because it has the lowest pri among preferred.
+ "; rel=\"duplicate\"; geo=\"su\"; pri=\"4\"; pref",
+ # This file should NOT be picked third due to preferred location set to 'uk'
+ "; rel =\"duplicate\";pri=\"5\"",
+ # This file should be picked as third try, and it should succeed
+ "; rel=\'duplicate\'; pri=\"5\";geo=\"uk\""
+ ]
+DigestHeader = "SHA-256=\'{{FILE1_HASH}}\'"
+
+# This will be filled as soon as we know server hostname and port
+MetaFileRules = {'SendHeader' : {}}
+
+FileOkServer = WgetFile ("File1_try3_ok", File1)
+FileBadPref = WgetFile ("File1_lowPref", File1_lowPref)
+FileBadHash = WgetFile ("File1_try1_corrupted", File1_corrupted)
+MetaFile = WgetFile ("test.meta", rules=MetaFileRules)
+# In case of Metalink over HTTP, the local file name is
+# derived from the URL suffix.
+FileOkLocal = WgetFile ("test.meta", File1)
+SigFile = WgetFile ("Sig.asc", Signature)
+FileNoMeta = WgetFile ("File2", File2)
+
+WGET_OPTIONS = "--metalink-over-http --preferred-location=uk"
+WGET_URLS = [["test.meta", "File2"]]
+
+Files = [[FileOkServer, FileBadPref, FileBadHash, MetaFile, SigFile, FileNoMeta]]
+Existing_Files = []
+
+ExpectedReturnCode = 0
+ExpectedDownloadedFiles = [FileNoMeta, FileOkLocal]
+
+RequestList = [
+ [
+ "HEAD /test.meta",
+ "GET /Sig2.asc",
+ "GET /Sig.asc",
+ "GET /File1_try1_corrupted",
+ "GET /File1_try3_ok",
+ "HEAD /File2",
+ "GET /File2",
+ ]
+]
+
+################ Pre and Post Test Hooks #####################################
+pre_test = {
+ "ServerFiles" : Files,
+ "LocalFiles" : Existing_Files
+}
+test_options = {
+ "WgetCommands" : WGET_OPTIONS,
+ "Urls" : WGET_URLS
+}
+post_test = {
+ "ExpectedFiles" : ExpectedDownloadedFiles,
+ "ExpectedRetcode" : ExpectedReturnCode,
+ "FilesCrawled" : RequestList,
+}
+
+http_test = HTTPTest (
+ pre_hook=pre_test,
+ test_params=test_options,
+ post_hook=post_test,
+)
+
+http_test.server_setup()
+srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname ()
+
+MetaFileRules["SendHeader"] = {
+ 'Link': [ SubstituteServerInfo (LinkHeader, srv_host, srv_port, File1_sha256)
+ for LinkHeader in LinkHeaders ],
+ 'Digest': SubstituteServerInfo (DigestHeader, srv_host, srv_port, File1_sha256),
+}
+
+err = http_test.begin ()
+
+exit (err)
--
2.7.3