[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Bug-wget] [PATCH] Disable automatic wget headers.
From: |
adham elkarn |
Subject: |
[Bug-wget] [PATCH] Disable automatic wget headers. |
Date: |
Sat, 27 Apr 2019 16:34:43 +0000 |
From: sulfastor <address@hidden>
Hello,
We've worked on this features (bug #54769
(https://savannah.gnu.org/bugs/?54769)) taking in account the comments made by
Darshit some time ago. We would like you to review our changes and code. We
thank you for your helping us improving our programming skills.
* src/http.c: removed disabled headers before its creation
* src/init.c: added new functions to check user disabled headers, disable
headers
* src/main.c: added new option disable-header, added help description
* src/options.h: added new option disable-header
From bug #54769 (https://savannah.gnu.org/bugs/?54769).
Some servers doesn't handle well some headers. A --disable-header option will
ensure a request header
will not be included on the request. In addition a empty header value in
--header="headername: " will also disable
the request header.
Signed-off-by: sulfastor <address@hidden>, adham elkarn <address@hidden>
---
src/http.c | 137 ++++++++++++++++++++++++++++++++++----------------
src/init.c | 43 ++++++++++++++++
src/main.c | 4 ++
src/options.h | 1 +
4 files changed, 142 insertions(+), 43 deletions(-)
diff --git a/src/http.c b/src/http.c
index 289d1101..79cd2168 100644
--- a/src/http.c
+++ b/src/http.c
@@ -88,6 +88,7 @@ static char *basic_authentication_encode (const char *, const
char *);
static bool known_authentication_scheme_p (const char *, const char *);
static void ensure_extension (struct http_stat *, const char *, int *);
static void load_cookies (void);
+static bool disabled_header(char*);
static bool cookies_loaded_p;
static struct cookie_jar *wget_cookie_jar;
@@ -152,6 +153,8 @@ struct request {
int hcount, hcapacity;
};
+/* Forward decls. */
+static bool request_remove_header (struct request*, const char*);
/* Create a new, empty request. Set the request's method and its
arguments. METHOD should be a literal string (or it should outlive
@@ -245,6 +248,13 @@ request_set_header (struct request *req, const char *name,
const char *value,
return;
}
+ /* A empty value is a disabled header; so remove it from the request */
+ if (!*value)
+ {
+ request_remove_header(req, name);
+ return;
+ }
+
for (i = 0; i < req->hcount; i++)
{
hdr = &req->headers[i];
@@ -436,7 +446,7 @@ maybe_send_basic_creds (const char *hostname, const char
*user,
DEBUGP (("Host %s has not issued a general basic challenge.\n",
quote (hostname)));
}
- if (do_challenge)
+ if (!disabled_header("Authorization") && do_challenge)
{
request_set_header (req, "Authorization",
basic_authentication_encode (user, passwd),
@@ -1770,23 +1780,29 @@ read_response_body (struct http_stat *hs, int sock,
FILE *fp, wgint contlen,
#ifdef __VMS
#define SET_USER_AGENT(req) do { \
- if (!opt.useragent) \
- request_set_header (req, "User-Agent", \
- aprintf ("Wget/%s (VMS %s %s)", \
- version_string, vms_arch(), vms_vers()), \
- rel_value); \
- else if (*opt.useragent) \
- request_set_header (req, "User-Agent", opt.useragent, rel_none); \
+ if(!disabled_header("User-Agent")) \
+ { \
+ if (!opt.useragent) \
+ request_set_header (req, "User-Agent", \
+ aprintf ("Wget/%s (VMS %s %s)", \
+ version_string, vms_arch(), vms_vers()), \
+ rel_value); \
+ else if (*opt.useragent) \
+ request_set_header (req, "User-Agent", opt.useragent, rel_none); \
+ } \
} while (0)
#else /* def __VMS */
#define SET_USER_AGENT(req) do { \
- if (!opt.useragent) \
- request_set_header (req, "User-Agent", \
- aprintf ("Wget/%s (%s)", \
- version_string, OS_TYPE), \
- rel_value); \
- else if (*opt.useragent) \
- request_set_header (req, "User-Agent", opt.useragent, rel_none); \
+ if(!disabled_header("User-Agent")) \
+ { \
+ if (!opt.useragent) \
+ request_set_header (req, "User-Agent", \
+ aprintf ("Wget/%s (%s)", \
+ version_string, OS_TYPE), \
+ rel_value); \
+ else if (*opt.useragent) \
+ request_set_header (req, "User-Agent", opt.useragent, rel_none); \
+ } \
} while (0)
#endif /* def __VMS [else] */
@@ -1842,6 +1858,24 @@ time_to_rfc1123 (time_t time, char *buf, size_t bufsize)
return RETROK;
}
+static bool
+disabled_header (char* header_name)
+{
+ char** p = opt.disabled_headers;
+ char *s;
+
+ if(!p)
+ return 0;
+
+ for(;*p != NULL;++p) {
+ s = strchrnul(header_name, ':');
+ if (!strncmp(header_name, *p, (size_t)(s - header_name)))
+ return 1;
+ }
+
+ return 0;
+}
+
static struct request *
initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct
url *proxy,
bool inhibit_keep_alive, bool *basic_auth_finished,
@@ -1874,15 +1908,17 @@ initialize_request (const struct url *u, struct
http_stat *hs, int *dt, struct u
meth_arg = url_full_path (u);
req = request_new (meth, meth_arg);
}
-
- request_set_header (req, "Referer", (char *) hs->referer, rel_none);
+ if(!disabled_header("Referer"))
+ request_set_header (req, "Referer", (char *) hs->referer, rel_none);
if (*dt & SEND_NOCACHE)
{
/* Cache-Control MUST be obeyed by all HTTP/1.1 caching mechanisms... */
- request_set_header (req, "Cache-Control", "no-cache", rel_none);
+ if(!disabled_header("Cache-Control"))
+ request_set_header (req, "Cache-Control", "no-cache", rel_none);
/* ... but some HTTP/1.0 caches doesn't implement Cache-Control. */
- request_set_header (req, "Pragma", "no-cache", rel_none);
+ if(!disabled_header("Pragma"))
+ request_set_header (req, "Pragma", "no-cache", rel_none);
}
if (*dt & IF_MODIFIED_SINCE)
{
@@ -1896,21 +1932,27 @@ initialize_request (const struct url *u, struct
http_stat *hs, int *dt, struct u
"time.\n"));
strcpy (strtime, "Thu, 01 Jan 1970 00:00:00 GMT");
}
- request_set_header (req, "If-Modified-Since", xstrdup (strtime),
rel_value);
+ if(!disabled_header("If-Modified-Since"))
+ request_set_header (req, "If-Modified-Since", xstrdup (strtime),
rel_value);
}
- if (hs->restval)
+ if (!disabled_header("Range") && hs->restval)
request_set_header (req, "Range",
aprintf ("bytes=%s-",
number_to_static_string (hs->restval)),
rel_value);
+
SET_USER_AGENT (req);
- request_set_header (req, "Accept", "*/*", rel_none);
+
+ if(!disabled_header("Accept"))
+ request_set_header (req, "Accept", "*/*", rel_none);
+
#ifdef HAVE_LIBZ
- if (opt.compression != compression_none)
+ if (!disabled_header("Accept-Encoding") && opt.compression !=
compression_none)
request_set_header (req, "Accept-Encoding", "gzip", rel_none);
else
#endif
- request_set_header (req, "Accept-Encoding", "identity", rel_none);
+ if(!disabled_header("Accept-Encoding"))
+ request_set_header (req, "Accept-Encoding", "identity", rel_none);
/* Find the username with priority */
if (u->user)
@@ -1966,17 +2008,19 @@ initialize_request (const struct url *u, struct
http_stat *hs, int *dt, struct u
};
int add_port = u->port != scheme_default_port (u->scheme);
int add_squares = strchr (u->host, ':') != NULL;
- request_set_header (req, "Host",
- aprintf (hfmt[add_port][add_squares], u->host,
u->port),
- rel_value);
+ if(!disabled_header("Host"))
+ request_set_header (req, "Host",
+ aprintf (hfmt[add_port][add_squares], u->host,
u->port),
+ rel_value);
}
- if (inhibit_keep_alive)
+ if (!disabled_header("Connection") && inhibit_keep_alive)
request_set_header (req, "Connection", "Close", rel_none);
else
{
- request_set_header (req, "Connection", "Keep-Alive", rel_none);
- if (proxy)
+ if (!disabled_header("Connection"))
+ request_set_header (req, "Connection", "Keep-Alive", rel_none);
+ if (!disabled_header("Proxy-Connection") && proxy)
request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none);
}
@@ -1985,8 +2029,9 @@ initialize_request (const struct url *u, struct http_stat
*hs, int *dt, struct u
if (opt.body_data || opt.body_file)
{
- request_set_header (req, "Content-Type",
- "application/x-www-form-urlencoded", rel_none);
+ if (!disabled_header("Content-Type"))
+ request_set_header (req, "Content-Type",
+ "application/x-www-form-urlencoded", rel_none);
if (opt.body_data)
*body_data_size = strlen (opt.body_data);
@@ -2002,11 +2047,13 @@ initialize_request (const struct url *u, struct
http_stat *hs, int *dt, struct u
return NULL;
}
}
- request_set_header (req, "Content-Length",
- xstrdup (number_to_static_string
(*body_data_size)),
- rel_value);
+ if (!disabled_header("Content-Length"))
+ request_set_header (req, "Content-Length",
+ xstrdup (number_to_static_string
(*body_data_size)),
+ rel_value);
}
- else if (c_strcasecmp (opt.method, "post") == 0
+ else if (!disabled_header("Content-Length")
+ && c_strcasecmp (opt.method, "post") == 0
|| c_strcasecmp (opt.method, "put") == 0
|| c_strcasecmp (opt.method, "patch") == 0)
request_set_header (req, "Content-Length", "0", rel_none);
@@ -2043,7 +2090,8 @@ initialize_proxy_configuration (const struct url *u,
struct request *req,
#ifdef HAVE_SSL
if (u->scheme != SCHEME_HTTPS)
#endif
- request_set_header (req, "Proxy-Authorization", *proxyauth, rel_value);
+ if (!disabled_header("Proxy-Authorization"))
+ request_set_header (req, "Proxy-Authorization", *proxyauth, rel_value);
}
static uerr_t
@@ -2133,8 +2181,9 @@ establish_connection (const struct url *u, const struct
url **conn_ref,
CONNECT method to request passthrough. */
struct request *connreq = request_new ("CONNECT",
aprintf ("%s:%d", u->host, u->port));
+
SET_USER_AGENT (connreq);
- if (proxyauth)
+ if (!disabled_header("Proxy-Authorization") && proxyauth)
{
request_set_header (connreq, "Proxy-Authorization",
*proxyauth, rel_value);
@@ -2143,9 +2192,10 @@ establish_connection (const struct url *u, const struct
url **conn_ref,
the regular request below. */
*proxyauth = NULL;
}
- request_set_header (connreq, "Host",
- aprintf ("%s:%d", u->host, u->port),
- rel_value);
+ if (!disabled_header("Host"))
+ request_set_header (connreq, "Host",
+ aprintf ("%s:%d", u->host, u->port),
+ rel_value);
write_error = request_send (connreq, sock, 0);
request_free (&connreq);
@@ -2456,7 +2506,7 @@ check_auth (const struct url *u, char *user, char
*passwd, struct response *resp
auth_err = *auth_stat;
xfree (auth_stat);
xfree (pth);
- if (auth_err == RETROK)
+ if (!disabled_header("Authorisation") && auth_err == RETROK)
{
request_set_header (req, "Authorization", value, rel_value);
@@ -3264,7 +3314,8 @@ gethttp (const struct url *u, struct url *original_url,
struct http_stat *hs,
{
int i;
for (i = 0; opt.user_headers[i]; i++)
- request_set_user_header (req, opt.user_headers[i]);
+ if (!disabled_header(opt.user_headers[i]))
+ request_set_user_header (req, opt.user_headers[i]);
}
proxyauth = NULL;
diff --git a/src/init.c b/src/init.c
index 9b6665a6..a2b6f311 100644
--- a/src/init.c
+++ b/src/init.c
@@ -101,6 +101,7 @@ CMD_DECLARE (cmd_spec_compression);
#endif
CMD_DECLARE (cmd_spec_dirstruct);
CMD_DECLARE (cmd_spec_header);
+CMD_DECLARE (cmd_dis_header);
CMD_DECLARE (cmd_spec_warc_header);
CMD_DECLARE (cmd_spec_htmlify);
CMD_DECLARE (cmd_spec_mirror);
@@ -183,6 +184,7 @@ static const struct {
{ "deleteafter", &opt.delete_after, cmd_boolean },
{ "dirprefix", &opt.dir_prefix, cmd_directory },
{ "dirstruct", NULL, cmd_spec_dirstruct },
+ { "disableheader", NULL, cmd_dis_header},
{ "dnscache", &opt.dns_cache, cmd_boolean },
#ifdef HAVE_LIBCARES
{ "dnsservers", &opt.dns_servers, cmd_string },
@@ -398,6 +400,7 @@ defaults (void)
opt.metalink_index = -1;
#endif
+ opt.disabled_headers = NULL;
opt.cookies = true;
opt.verbose = -1;
opt.ntry = 20;
@@ -1459,6 +1462,7 @@ cmd_cert_type (const char *com, const char *val, void
*place)
options specially. */
static bool check_user_specified_header (const char *);
+static bool check_user_disabled_header (const char *);
#ifdef HAVE_LIBZ
static bool
@@ -1493,6 +1497,27 @@ cmd_spec_dirstruct (const char *com, const char *val,
void *place_ignored _GL_UN
return true;
}
+static bool
+cmd_dis_header (const char *com, const char *val, void *place_ignored
_GL_UNUSED)
+{
+ /* Empty value means reset the list of headers. */
+ if (*val == '\0')
+ {
+ free_vec (opt.disabled_headers);
+ opt.disabled_headers = NULL;
+ return true;
+ }
+
+ if (!check_user_disabled_header (val))
+ {
+ fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
+ exec_name, com, quote (val));
+ return false;
+ }
+ opt.disabled_headers = vec_append (opt.disabled_headers, val);
+ return true;
+}
+
static bool
cmd_spec_header (const char *com, const char *val, void *place_ignored
_GL_UNUSED)
{
@@ -1850,6 +1875,23 @@ simple_atof (const char *beg, const char *end, double
*dest)
contain a colon preceded by non-white-space characters and must not
contain newlines. */
+
+static bool
+check_user_disabled_header (const char* s)
+{
+ const char *p;
+
+ for (p = s; *p && *p != '\0' && !c_isspace (*p); p++)
+ ;
+
+ if (p == s)
+ return false;
+ /* The header MUST NOT contain newlines. */
+ if (strchr (s, '\n'))
+ return false;
+ return true;
+}
+
static bool
check_user_specified_header (const char *s)
{
@@ -1977,6 +2019,7 @@ cleanup (void)
xfree (opt.http_passwd);
xfree (opt.dot_style);
free_vec (opt.user_headers);
+ free_vec (opt.disabled_headers);
free_vec (opt.warc_user_headers);
# ifdef HAVE_SSL
xfree (opt.cert_file);
diff --git a/src/main.c b/src/main.c
index 65b7f3f3..a73b39d9 100644
--- a/src/main.c
+++ b/src/main.c
@@ -304,6 +304,7 @@ static struct cmdline_option option_data[] =
{ "delete-after", 0, OPT_BOOLEAN, "deleteafter", -1 },
{ "directories", 0, OPT_BOOLEAN, "dirstruct", -1 },
{ "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 },
+ { "disable-header", 0, OPT_VALUE, "disableheader", -1 },
{ "dns-cache", 0, OPT_BOOLEAN, "dnscache", -1 },
#ifdef HAVE_LIBCARES
{ "dns-servers", 0, OPT_VALUE, "dnsservers", -1 },
@@ -544,6 +545,7 @@ init_switches (void)
identical to "--foo", except it has opposite meaning and
it doesn't allow an argument. */
longopt = &long_options[o++];
+
longopt->name = no_prefix (cmdopt->long_name);
longopt->has_arg = no_argument;
/* Mask the value so we'll be able to recognize that we're
@@ -792,6 +794,8 @@ HTTP options:\n"),
--ignore-length ignore 'Content-Length' header field\n"),
N_("\
--header=STRING insert STRING among the headers\n"),
+ N_("\
+ --disable-header=STRING disable STRING among the headers\n"),
#ifdef HAVE_LIBZ
N_("\
--compression=TYPE choose compression, one of auto, gzip and
none. (default: none)\n"),
diff --git a/src/options.h b/src/options.h
index 881e2b2e..5559c6fa 100644
--- a/src/options.h
+++ b/src/options.h
@@ -147,6 +147,7 @@ struct options
char *http_user; /* HTTP username. */
char *http_passwd; /* HTTP password. */
char **user_headers; /* User-defined header(s). */
+ char **disabled_headers; /* User-disabled header(s) */
bool http_keep_alive; /* whether we use keep-alive */
bool use_proxy; /* Do we use proxy? */
--
2.21.0
- [Bug-wget] [PATCH] Disable automatic wget headers.,
adham elkarn <=