>From b062d348e50ac1a7789e1f322de9981dd2fb3947 Mon Sep 17 00:00:00 2001 From: Matthew White Date: Wed, 24 Aug 2016 12:43:24 +0200 Subject: [PATCH 16/25] Bugfix: Remove surrounding quotes from Metalink/HTTP key's value * src/metalink.h: Add declaration of function dequote_metalink_string() * src/metalink.c: Add function dequote_metalink_string() remove surrounding quotes from string, \' or \" * src/metalink.c (find_key_value, find_key_values): Call dequote_metalink_string() to remove the surrounding quotes from the parsed value * src/metalink.c (test_find_key_value, test_find_key_values): Add quoted key's values for unit-tests * testenv/Makefile.am: Add new file * testenv/Test-metalink-http-quoted.py: New file. Metalink/HTTP quoted values tests Some Metalink/HTTP keys, like "type" [2], may have a quoted value [1]: Link: ; rel=describedby; type="application/metalink4+xml" Wget was expecting a dequoted value from the Metalink module. This patch addresses this problem. References: [1] Metalink/HTTP: Mirrors and Hashes 1.1. Example Metalink Server Response https://tools.ietf.org/html/rfc6249#section-1.1 [2] Additional Link Relations 6. "type" https://tools.ietf.org/html/rfc6903#section-6 --- src/metalink.c | 49 ++++++++++++-- src/metalink.h | 1 + testenv/Makefile.am | 1 + testenv/Test-metalink-http-quoted.py | 127 +++++++++++++++++++++++++++++++++++ 4 files changed, 171 insertions(+), 7 deletions(-) create mode 100755 testenv/Test-metalink-http-quoted.py diff --git a/src/metalink.c b/src/metalink.c index 01d824b..b23efad 100644 --- a/src/metalink.c +++ b/src/metalink.c @@ -819,6 +819,39 @@ clean_metalink_string (char **str) *str = new; } +/* + Remove the quotation surrounding a string. + + The string is permanently modified. + */ +void +dequote_metalink_string (char **str) +{ + char *new, *beg, *end; + size_t str_len, new_len; + + if (!str || !*str) + return; + + str_len = strlen (*str); /* current string length */ + + if (str_len < 2) + return; + + new_len = str_len - 2; /* predict dequoted length */ + + beg = *str; /* begin of current string */ + end = *str + (str_len - 1); /* end of current string */ + + /* Verify if the current string is surrounded by quotes. */ + if (!(*beg == '\"' && *end == '\"') && !(*beg == '\'' && *end == '\'')) + return; + + new = xmemdup0 (beg + 1, new_len); + xfree (*str); + *str = new; +} + /* Append the suffix ".badhash" to the file NAME, except without overwriting an existing file with that name and suffix. */ void @@ -966,6 +999,7 @@ find_key_value (const char *start, const char *end, const char *key, char **valu while (val_end < end && *val_end != ';' && !c_isspace (*val_end)) val_end++; *value = xstrndup (val_beg, val_end - val_beg); + dequote_metalink_string (value); return true; } } @@ -1066,6 +1100,7 @@ find_key_values (const char *start, const char *end, char **key, char **value) *key = xstrndup (key_start, key_end - key_start); *value = xstrndup (val_start, val_end - val_start); + dequote_metalink_string (value); /* Skip trailing whitespaces. */ while (val_end < end && c_isspace (*val_end)) @@ -1078,10 +1113,10 @@ find_key_values (const char *start, const char *end, char **key, char **value) const char * test_find_key_values (void) { - static const char *header_data = "key1=val1;key2=val2 ;key3=val3; key4=val4"\ - " ; key5=val5;key6 =val6;key7= val7; "\ - "key8 = val8 ; key9 = val9 "\ - " ,key10= val10,key11,key12=val12"; + static const char *header_data = "key1=val1;key2=\"val2\" ;key3=val3; key4=val4"\ + " ; key5=val5;key6 ='val6';key7= val7; "\ + "key8 = val8 ; key9 = \"val9\" "\ + " ,key10= 'val10',key11,key12=val12"; static const struct { const char *key; @@ -1122,9 +1157,9 @@ test_find_key_values (void) const char * test_find_key_value (void) { - static const char *header_data = "key1=val1;key2=val2 ;key3=val3; key4=val4"\ - " ; key5=val5;key6 =val6;key7= val7; "\ - "key8 = val8 ; key9 = val9 "; + static const char *header_data = "key1=val1;key2=val2 ;key3='val3'; key4=val4"\ + " ; key5='val5';key6 =val6;key7= \"val7\"; "\ + "key8 = \"val8\" ; key9 = val9 "; static const struct { const char *key; diff --git a/src/metalink.h b/src/metalink.h index 3244b83..e15cbda 100644 --- a/src/metalink.h +++ b/src/metalink.h @@ -52,6 +52,7 @@ int metalink_check_safe_path(const char *path); char *get_metalink_basename (char *name); void append_suffix_number (char **str, const char *sep, wgint num); void clean_metalink_string (char **str); +void dequote_metalink_string (char **str); void badhash_suffix (char *name); void badhash_or_remove (char *name); diff --git a/testenv/Makefile.am b/testenv/Makefile.am index a896cca..8272734 100644 --- a/testenv/Makefile.am +++ b/testenv/Makefile.am @@ -28,6 +28,7 @@ if METALINK_IS_ENABLED METALINK_TESTS = Test-metalink-http.py \ + Test-metalink-http-quoted.py \ Test-metalink-xml.py \ Test-metalink-xml-relpath.py \ Test-metalink-xml-abspath.py \ diff --git a/testenv/Test-metalink-http-quoted.py b/testenv/Test-metalink-http-quoted.py new file mode 100755 index 0000000..836b836 --- /dev/null +++ b/testenv/Test-metalink-http-quoted.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +from sys import exit +from test.http_test import HTTPTest +from misc.wget_file import WgetFile +import re +import hashlib +from base64 import b64encode + +""" + This is to test Metalink/HTTP quoted values support in Wget. +""" + +# Helper function for hostname, port and digest substitution +def SubstituteServerInfo (text, host, port, digest): + text = re.sub (r'{{FILE1_HASH}}', digest, text) + text = re.sub (r'{{SRV_HOST}}', host, text) + text = re.sub (r'{{SRV_PORT}}', str (port), text) + return text + +############# File Definitions ############################################### +File1 = "Would you like some Tea?" +File1_corrupted = "Would you like some Coffee?" +File1_lowPref = "Do not take this" +File1_sha256 = b64encode (hashlib.sha256 (File1.encode ('UTF-8')).digest ()).decode ('ascii') +Signature = '''-----BEGIN PGP SIGNATURE----- +Version: GnuPG v1.0.7 (GNU/Linux) + +This is no valid signature. But it should be downloaded. +The attempt to verify should fail but should not prevent +a successful metalink resource retrieval (the sig failure +should not be fatal). +-----END PGP SIGNATURE----- +''' +File2 = "No meta data for this file." + +LinkHeaders = [ + # This file has low priority and should not be picked. + "; rel=\"duplicate\"; pri=\"9\"; geo=\"pl\"", + # This file should be picked second, after hash failure. + "; rel =\"duplicate\";pref; pri=\"7\"", + # This signature download will fail. + "; rel=\"describedby\"; type=\"application/pgp-signature\"", + # Two good signatures + "; rel=\"describedby\"; type=\"application/pgp-signature\"", + "; rel=\"describedby\"; type=\"application/pgp-signature\"", + # Bad URL scheme + "; rel=\"duplicate\"; pri=\"4\"", + # rel missing + "; pri=\"1\"; pref", + # invalid rel + "; rel=\"strange\"; pri=\"4\"", + # This file should be picked first, because it has the lowest pri among preferred. + "; rel=\"duplicate\"; geo=\"su\"; pri=\"4\"; pref", + # This file should NOT be picked third due to preferred location set to 'uk' + "; rel =\"duplicate\";pri=\"5\"", + # This file should be picked as third try, and it should succeed + "; rel=\'duplicate\'; pri=\"5\";geo=\"uk\"" + ] +DigestHeader = "SHA-256=\'{{FILE1_HASH}}\'" + +# This will be filled as soon as we know server hostname and port +MetaFileRules = {'SendHeader' : {}} + +FileOkServer = WgetFile ("File1_try3_ok", File1) +FileBadPref = WgetFile ("File1_lowPref", File1_lowPref) +FileBadHash = WgetFile ("File1_try1_corrupted", File1_corrupted) +MetaFile = WgetFile ("test.meta", rules=MetaFileRules) +# In case of Metalink over HTTP, the local file name is +# derived from the URL suffix. +FileOkLocal = WgetFile ("test.meta", File1) +SigFile = WgetFile ("Sig.asc", Signature) +FileNoMeta = WgetFile ("File2", File2) + +WGET_OPTIONS = "--metalink-over-http --preferred-location=uk" +WGET_URLS = [["test.meta", "File2"]] + +Files = [[FileOkServer, FileBadPref, FileBadHash, MetaFile, SigFile, FileNoMeta]] +Existing_Files = [] + +ExpectedReturnCode = 0 +ExpectedDownloadedFiles = [FileNoMeta, FileOkLocal] + +RequestList = [ + [ + "HEAD /test.meta", + "GET /Sig2.asc", + "GET /Sig.asc", + "GET /File1_try1_corrupted", + "GET /File1_try3_ok", + "HEAD /File2", + "GET /File2", + ] +] + +################ Pre and Post Test Hooks ##################################### +pre_test = { + "ServerFiles" : Files, + "LocalFiles" : Existing_Files +} +test_options = { + "WgetCommands" : WGET_OPTIONS, + "Urls" : WGET_URLS +} +post_test = { + "ExpectedFiles" : ExpectedDownloadedFiles, + "ExpectedRetcode" : ExpectedReturnCode, + "FilesCrawled" : RequestList, +} + +http_test = HTTPTest ( + pre_hook=pre_test, + test_params=test_options, + post_hook=post_test, +) + +http_test.server_setup() +srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname () + +MetaFileRules["SendHeader"] = { + 'Link': [ SubstituteServerInfo (LinkHeader, srv_host, srv_port, File1_sha256) + for LinkHeader in LinkHeaders ], + 'Digest': SubstituteServerInfo (DigestHeader, srv_host, srv_port, File1_sha256), +} + +err = http_test.begin () + +exit (err) -- 2.7.3