[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Bug-wget] [PATCH 16/27] Bugfix: Process Metalink/XML url strings contai
From: |
Matthew White |
Subject: |
[Bug-wget] [PATCH 16/27] Bugfix: Process Metalink/XML url strings containing white spaces and CRLF |
Date: |
Thu, 29 Sep 2016 06:02:56 +0200 |
* src/metalink.h: Add declaration of function clean_metalink_string()
* src/metalink.c: Add directive #include "xmemdup0.h"
* src/metalink.c: Add function clean_metalink_string() remove leading
and trailing white spaces and CRLF from string
* src/metalink.c (retrieve_from_metalink): Remove leading and trailing
white spaces and CRLF from url resource mres->url
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-xml-urlbreak.py: New test. Metalink/XML white
spaces and CRLF in url resources tests
White spaces and CRLF are not automatically removed by libmetalink
from url strings. The Wget's Metalink module was unable to process
such url strings. This patch implements the processing of such url
strings cleaning off leading and trailing white spaces and CRLF.
If a parsed Metalink/XML url string contains strings separated by
CRLF, only the first of the series is accepted.
---
src/metalink.c | 43 +++++++
src/metalink.h | 1 +
testenv/Makefile.am | 3 +-
testenv/Test-metalink-xml-urlbreak.py | 236 ++++++++++++++++++++++++++++++++++
4 files changed, 282 insertions(+), 1 deletion(-)
create mode 100755 testenv/Test-metalink-xml-urlbreak.py
diff --git a/src/metalink.c b/src/metalink.c
index 5108a5e..5212742 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -41,6 +41,7 @@ as that of the covered work. */
#include "sha256.h"
#include "sha512.h"
#include "dosname.h"
+#include "xmemdup0.h"
#include "xstrndup.h"
#include "c-strcase.h"
#include <errno.h>
@@ -197,6 +198,8 @@ retrieve_from_metalink (const metalink_t* metalink)
struct url *url;
int url_err;
+ clean_metalink_string (&mres->url);
+
if (!RES_TYPE_SUPPORTED (mres->type))
{
logprintf (LOG_VERBOSE,
@@ -780,6 +783,46 @@ append_suffix_number (char **str, const char *sep, wgint
num)
*str = new;
}
+/*
+ Remove the string's trailing/leading whitespaces and line breaks.
+
+ The string is permanently modified.
+*/
+void
+clean_metalink_string (char **str)
+{
+ int c;
+ size_t len;
+ char *new, *beg, *end;
+
+ if (!str || !*str)
+ return;
+
+ beg = *str;
+
+ while ((c = *beg) && (c == '\n' || c == '\r' || c == '\t' || c == ' '))
+ beg++;
+
+ end = beg;
+
+ /* To not truncate a string containing spaces, search the first '\r'
+ or '\n' which ipotetically marks the end of the string. */
+ while ((c = *end) && (c != '\r') && (c != '\n'))
+ end++;
+
+ /* If we are at the end of the string, search the first legit
+ character going backward. */
+ if (*end == '\0')
+ while ((c = *(end - 1)) && (c == '\n' || c == '\r' || c == '\t' || c == '
'))
+ end--;
+
+ len = end - beg;
+
+ new = xmemdup0 (beg, len);
+ xfree (*str);
+ *str = new;
+}
+
/* Append the suffix ".badhash" to the file NAME, except without
overwriting an existing file with that name and suffix. */
void
diff --git a/src/metalink.h b/src/metalink.h
index c9dd73e..4095262 100644
--- a/src/metalink.h
+++ b/src/metalink.h
@@ -52,6 +52,7 @@ int metalink_check_safe_path (const char *path);
char *last_component (char const *name);
char *get_metalink_basename (char *name);
void append_suffix_number (char **str, const char *sep, wgint num);
+void clean_metalink_string (char **str);
void badhash_suffix (char *name);
void badhash_or_remove (char *name);
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index e6f9a23..4ad7d0a 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -46,7 +46,8 @@ if METALINK_IS_ENABLED
Test-metalink-xml-absprefix-trust.py \
Test-metalink-xml-homeprefix-trust.py \
Test-metalink-xml-size.py \
- Test-metalink-xml-nourls.py
+ Test-metalink-xml-nourls.py \
+ Test-metalink-xml-urlbreak.py
else
METALINK_TESTS =
endif
diff --git a/testenv/Test-metalink-xml-urlbreak.py
b/testenv/Test-metalink-xml-urlbreak.py
new file mode 100755
index 0000000..e53ae11
--- /dev/null
+++ b/testenv/Test-metalink-xml-urlbreak.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import hashlib
+
+"""
+ This is to test Metalink/XML white spaces in url resources.
+
+ With --trust-server-names, trust the metalink:file names.
+
+ Without --trust-server-names, don't trust the metalink:file names:
+ use the basename of --input-metalink, and add a sequential number
+ (e.g. .#1, .#2, etc.).
+
+ Strip the directory from unsafe paths.
+"""
+############# File Definitions ###############################################
+bad = "Ouch!"
+
+File1 = "Would you like some Tea?"
+File1_lowPref = "Do not take this"
+File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest ()
+
+File2 = "This is gonna be good"
+File2_lowPref = "Not this one too"
+File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest ()
+
+File3 = "A little more, please"
+File3_lowPref = "That's just too much"
+File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest ()
+
+File4 = "Maybe a biscuit?"
+File4_lowPref = "No, thanks"
+File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest ()
+
+File5 = "More Tea...?"
+File5_lowPref = "I have to go..."
+File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest ()
+
+MetaXml = \
+"""<?xml version="1.0" encoding="utf-8"?>
+<metalink version="3.0" xmlns="http://www.metalinker.org/">
+ <publisher>
+ <name>GNU Wget</name>
+ </publisher>
+ <license>
+ <name>GNU GPL</name>
+ <url>http://www.gnu.org/licenses/gpl.html</url>
+ </license>
+ <identity>Wget Test Files</identity>
+ <version>1.2.3</version>
+ <description>Wget Test Files description</description>
+ <files>
+ <file name="File1">
+ <verification>
+ <hash type="sha256">{{FILE1_HASH}}</hash>
+ </verification>
+ <resources>
+ <url type="http" preference="35">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+ </url>
+ <url type="http" preference="40">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/404
+ </url>
+ <url type="http" preference="25">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref
+ </url>
+ <url type="http" preference="30">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+ </url>
+ </resources>
+ </file>
+ <file name="File2">
+ <verification>
+ <hash type="sha256">{{FILE2_HASH}}</hash>
+ </verification>
+ <resources>
+ <url type="http" preference="35">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+ </url>
+ <url type="http" preference="40">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/404
+ </url>
+ <url type="http" preference="25">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/File2_lowPref
+ </url>
+ <url type="http" preference="30">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/File2
+ </url>
+ </resources>
+ </file>
+ <file name="File3">
+ <verification>
+ <hash type="sha256">{{FILE3_HASH}}</hash>
+ </verification>
+ <resources>
+ <url type="http" preference="35">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+ </url>
+ <url type="http" preference="40">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/404
+ </url>
+ <url type="http" preference="25">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref
+ </url>
+ <url type="http" preference="30">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/File3
+ </url>
+ </resources>
+ </file>
+ <file name="File4">
+ <verification>
+ <hash type="sha256">{{FILE4_HASH}}</hash>
+ </verification>
+ <resources>
+ <url type="http" preference="35">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+ </url>
+ <url type="http" preference="40">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/404
+ </url>
+ <url type="http" preference="25">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/File4_lowPref
+ </url>
+ <url type="http" preference="30">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/File4
+ </url>
+ </resources>
+ </file>
+ <file name="File5">
+ <verification>
+ <hash type="sha256">{{FILE5_HASH}}</hash>
+ </verification>
+ <resources>
+ <url type="http" preference="35">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+ </url>
+ <url type="http" preference="40">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/404
+ </url>
+ <url type="http" preference="25">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref
+ </url>
+ <url type="http" preference="30">
+ http://{{SRV_HOST}}:{{SRV_PORT}}/File5
+ </url>
+ </resources>
+ </file>
+ </files>
+</metalink>
+"""
+
+wrong_file = WgetFile ("wrong_file", bad)
+
+File1_orig = WgetFile ("File1", File1)
+File1_down = WgetFile ("test.metalink.#1", File1)
+File1_nono = WgetFile ("File1_lowPref", File1_lowPref)
+
+File2_orig = WgetFile ("File2", File2)
+File2_down = WgetFile ("test.metalink.#2", File2)
+File2_nono = WgetFile ("File2_lowPref", File2_lowPref)
+
+File3_orig = WgetFile ("File3", File3)
+File3_down = WgetFile ("test.metalink.#3", File3)
+File3_nono = WgetFile ("File3_lowPref", File3_lowPref)
+
+File4_orig = WgetFile ("File4", File4)
+File4_down = WgetFile ("test.metalink.#4", File4)
+File4_nono = WgetFile ("File4_lowPref", File4_lowPref)
+
+File5_orig = WgetFile ("File5", File5)
+File5_down = WgetFile ("test.metalink.#5", File5)
+File5_nono = WgetFile ("File5_lowPref", File5_lowPref)
+
+MetaFile = WgetFile ("test.metalink", MetaXml)
+
+WGET_OPTIONS = "--input-metalink test.metalink"
+WGET_URLS = [[]]
+
+Files = [[
+ wrong_file,
+ File1_orig, File1_nono,
+ File2_orig, File2_nono,
+ File3_orig, File3_nono,
+ File4_orig, File4_nono,
+ File5_orig, File5_nono
+]]
+Existing_Files = [MetaFile]
+
+ExpectedReturnCode = 0
+ExpectedDownloadedFiles = [
+ File1_down,
+ File2_down,
+ File3_down,
+ File4_down,
+ File5_down,
+ MetaFile
+]
+
+################ Pre and Post Test Hooks #####################################
+pre_test = {
+ "ServerFiles" : Files,
+ "LocalFiles" : Existing_Files
+}
+test_options = {
+ "WgetCommands" : WGET_OPTIONS,
+ "Urls" : WGET_URLS
+}
+post_test = {
+ "ExpectedFiles" : ExpectedDownloadedFiles,
+ "ExpectedRetcode" : ExpectedReturnCode
+}
+
+http_test = HTTPTest (
+ pre_hook=pre_test,
+ test_params=test_options,
+ post_hook=post_test,
+)
+
+http_test.server_setup()
+### Get and use dynamic server sockname
+srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname ()
+
+MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256)
+MetaXml = MetaXml.replace('{{FILE2_HASH}}', File2_sha256)
+MetaXml = MetaXml.replace('{{FILE3_HASH}}', File3_sha256)
+MetaXml = MetaXml.replace('{{FILE4_HASH}}', File4_sha256)
+MetaXml = MetaXml.replace('{{FILE5_HASH}}', File5_sha256)
+MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host)
+MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port))
+MetaFile.content = MetaXml
+
+err = http_test.begin ()
+
+exit (err)
--
2.7.3
- [Bug-wget] [PATCH 05/27] Bugfix: Fix NULL filename and output_stream in Metalink module, (continued)
- [Bug-wget] [PATCH 05/27] Bugfix: Fix NULL filename and output_stream in Metalink module, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 06/27] Bugfix: Keep the download progress when alternating metalink:url, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 07/27] Update Metalink/XML tests and add a new test for home paths, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 09/27] Change mfile->name to filename in Metalink module's messages, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 08/27] Add file size computation in Metalink module, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 10/27] Implement Metalink/XML --directory-prefix option in Metalink module, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 11/27] Enforce Metalink file name verification, strip directory if necessary, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 12/27] New document: Metalink/XML and Metalink/HTTP standard reference, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 14/27] New: Metalink file size mismatch returns error code METALINK_SIZE_ERROR, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 15/27] New test: Detect when there are no good Metalink url resources, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 16/27] Bugfix: Process Metalink/XML url strings containing white spaces and CRLF,
Matthew White <=
- [Bug-wget] [PATCH 17/27] Bugfix: Remove surrounding quotes from Metalink/HTTP key's value, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 18/27] New test: Metalink shall not concatenate '/' to an empty directory prefix, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 19/27] New: Parse Metalink/HTTP header for application/metalink4+xml, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 20/27] Bugfix: Prevent sorting when there are less than two elements, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 13/27] New: Metalink/XML and Metalink/HTTP file naming safety rules, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 23/27] Bugfix: Detect when a metalink:file doesn't have any hash, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 22/27] Bugfix: Detect malformed base64 Metalink/HTTP Digest header, Matthew White, 2016/09/29
- [Bug-wget] [PATCH 21/27] New option --metalink-index to process Metalink application/metalink4+xml, Matthew White, 2016/09/29