From: sulfastor <[email protected]> Hello, We've worked on this features (bug #54769 (https://savannah.gnu.org/bugs/?54769)) taking in account the comments made by Darshit some time ago. We would like you to review our changes and code. We thank you for your helping us improving our programming skills.
* src/http.c: removed disabled headers before its creation * src/init.c: added new functions to check user disabled headers, disable headers * src/main.c: added new option disable-header, added help description * src/options.h: added new option disable-header From bug #54769 (https://savannah.gnu.org/bugs/?54769). Some servers doesn't handle well some headers. A --disable-header option will ensure a request header will not be included on the request. In addition a empty header value in --header="headername: " will also disable the request header. Signed-off-by: sulfastor <[email protected]>, adham elkarn <[email protected]> --- src/http.c | 137 ++++++++++++++++++++++++++++++++++---------------- src/init.c | 43 ++++++++++++++++ src/main.c | 4 ++ src/options.h | 1 + 4 files changed, 142 insertions(+), 43 deletions(-) diff --git a/src/http.c b/src/http.c index 289d1101..79cd2168 100644 --- a/src/http.c +++ b/src/http.c @@ -88,6 +88,7 @@ static char *basic_authentication_encode (const char *, const char *); static bool known_authentication_scheme_p (const char *, const char *); static void ensure_extension (struct http_stat *, const char *, int *); static void load_cookies (void); +static bool disabled_header(char*); static bool cookies_loaded_p; static struct cookie_jar *wget_cookie_jar; @@ -152,6 +153,8 @@ struct request { int hcount, hcapacity; }; +/* Forward decls. */ +static bool request_remove_header (struct request*, const char*); /* Create a new, empty request. Set the request's method and its arguments. METHOD should be a literal string (or it should outlive @@ -245,6 +248,13 @@ request_set_header (struct request *req, const char *name, const char *value, return; } + /* A empty value is a disabled header; so remove it from the request */ + if (!*value) + { + request_remove_header(req, name); + return; + } + for (i = 0; i < req->hcount; i++) { hdr = &req->headers[i]; @@ -436,7 +446,7 @@ maybe_send_basic_creds (const char *hostname, const char *user, DEBUGP (("Host %s has not issued a general basic challenge.\n", quote (hostname))); } - if (do_challenge) + if (!disabled_header("Authorization") && do_challenge) { request_set_header (req, "Authorization", basic_authentication_encode (user, passwd), @@ -1770,23 +1780,29 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen, #ifdef __VMS #define SET_USER_AGENT(req) do { \ - if (!opt.useragent) \ - request_set_header (req, "User-Agent", \ - aprintf ("Wget/%s (VMS %s %s)", \ - version_string, vms_arch(), vms_vers()), \ - rel_value); \ - else if (*opt.useragent) \ - request_set_header (req, "User-Agent", opt.useragent, rel_none); \ + if(!disabled_header("User-Agent")) \ + { \ + if (!opt.useragent) \ + request_set_header (req, "User-Agent", \ + aprintf ("Wget/%s (VMS %s %s)", \ + version_string, vms_arch(), vms_vers()), \ + rel_value); \ + else if (*opt.useragent) \ + request_set_header (req, "User-Agent", opt.useragent, rel_none); \ + } \ } while (0) #else /* def __VMS */ #define SET_USER_AGENT(req) do { \ - if (!opt.useragent) \ - request_set_header (req, "User-Agent", \ - aprintf ("Wget/%s (%s)", \ - version_string, OS_TYPE), \ - rel_value); \ - else if (*opt.useragent) \ - request_set_header (req, "User-Agent", opt.useragent, rel_none); \ + if(!disabled_header("User-Agent")) \ + { \ + if (!opt.useragent) \ + request_set_header (req, "User-Agent", \ + aprintf ("Wget/%s (%s)", \ + version_string, OS_TYPE), \ + rel_value); \ + else if (*opt.useragent) \ + request_set_header (req, "User-Agent", opt.useragent, rel_none); \ + } \ } while (0) #endif /* def __VMS [else] */ @@ -1842,6 +1858,24 @@ time_to_rfc1123 (time_t time, char *buf, size_t bufsize) return RETROK; } +static bool +disabled_header (char* header_name) +{ + char** p = opt.disabled_headers; + char *s; + + if(!p) + return 0; + + for(;*p != NULL;++p) { + s = strchrnul(header_name, ':'); + if (!strncmp(header_name, *p, (size_t)(s - header_name))) + return 1; + } + + return 0; +} + static struct request * initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct url *proxy, bool inhibit_keep_alive, bool *basic_auth_finished, @@ -1874,15 +1908,17 @@ initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct u meth_arg = url_full_path (u); req = request_new (meth, meth_arg); } - - request_set_header (req, "Referer", (char *) hs->referer, rel_none); + if(!disabled_header("Referer")) + request_set_header (req, "Referer", (char *) hs->referer, rel_none); if (*dt & SEND_NOCACHE) { /* Cache-Control MUST be obeyed by all HTTP/1.1 caching mechanisms... */ - request_set_header (req, "Cache-Control", "no-cache", rel_none); + if(!disabled_header("Cache-Control")) + request_set_header (req, "Cache-Control", "no-cache", rel_none); /* ... but some HTTP/1.0 caches doesn't implement Cache-Control. */ - request_set_header (req, "Pragma", "no-cache", rel_none); + if(!disabled_header("Pragma")) + request_set_header (req, "Pragma", "no-cache", rel_none); } if (*dt & IF_MODIFIED_SINCE) { @@ -1896,21 +1932,27 @@ initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct u "time.\n")); strcpy (strtime, "Thu, 01 Jan 1970 00:00:00 GMT"); } - request_set_header (req, "If-Modified-Since", xstrdup (strtime), rel_value); + if(!disabled_header("If-Modified-Since")) + request_set_header (req, "If-Modified-Since", xstrdup (strtime), rel_value); } - if (hs->restval) + if (!disabled_header("Range") && hs->restval) request_set_header (req, "Range", aprintf ("bytes=%s-", number_to_static_string (hs->restval)), rel_value); + SET_USER_AGENT (req); - request_set_header (req, "Accept", "*/*", rel_none); + + if(!disabled_header("Accept")) + request_set_header (req, "Accept", "*/*", rel_none); + #ifdef HAVE_LIBZ - if (opt.compression != compression_none) + if (!disabled_header("Accept-Encoding") && opt.compression != compression_none) request_set_header (req, "Accept-Encoding", "gzip", rel_none); else #endif - request_set_header (req, "Accept-Encoding", "identity", rel_none); + if(!disabled_header("Accept-Encoding")) + request_set_header (req, "Accept-Encoding", "identity", rel_none); /* Find the username with priority */ if (u->user) @@ -1966,17 +2008,19 @@ initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct u }; int add_port = u->port != scheme_default_port (u->scheme); int add_squares = strchr (u->host, ':') != NULL; - request_set_header (req, "Host", - aprintf (hfmt[add_port][add_squares], u->host, u->port), - rel_value); + if(!disabled_header("Host")) + request_set_header (req, "Host", + aprintf (hfmt[add_port][add_squares], u->host, u->port), + rel_value); } - if (inhibit_keep_alive) + if (!disabled_header("Connection") && inhibit_keep_alive) request_set_header (req, "Connection", "Close", rel_none); else { - request_set_header (req, "Connection", "Keep-Alive", rel_none); - if (proxy) + if (!disabled_header("Connection")) + request_set_header (req, "Connection", "Keep-Alive", rel_none); + if (!disabled_header("Proxy-Connection") && proxy) request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none); } @@ -1985,8 +2029,9 @@ initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct u if (opt.body_data || opt.body_file) { - request_set_header (req, "Content-Type", - "application/x-www-form-urlencoded", rel_none); + if (!disabled_header("Content-Type")) + request_set_header (req, "Content-Type", + "application/x-www-form-urlencoded", rel_none); if (opt.body_data) *body_data_size = strlen (opt.body_data); @@ -2002,11 +2047,13 @@ initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct u return NULL; } } - request_set_header (req, "Content-Length", - xstrdup (number_to_static_string (*body_data_size)), - rel_value); + if (!disabled_header("Content-Length")) + request_set_header (req, "Content-Length", + xstrdup (number_to_static_string (*body_data_size)), + rel_value); } - else if (c_strcasecmp (opt.method, "post") == 0 + else if (!disabled_header("Content-Length") + && c_strcasecmp (opt.method, "post") == 0 || c_strcasecmp (opt.method, "put") == 0 || c_strcasecmp (opt.method, "patch") == 0) request_set_header (req, "Content-Length", "0", rel_none); @@ -2043,7 +2090,8 @@ initialize_proxy_configuration (const struct url *u, struct request *req, #ifdef HAVE_SSL if (u->scheme != SCHEME_HTTPS) #endif - request_set_header (req, "Proxy-Authorization", *proxyauth, rel_value); + if (!disabled_header("Proxy-Authorization")) + request_set_header (req, "Proxy-Authorization", *proxyauth, rel_value); } static uerr_t @@ -2133,8 +2181,9 @@ establish_connection (const struct url *u, const struct url **conn_ref, CONNECT method to request passthrough. */ struct request *connreq = request_new ("CONNECT", aprintf ("%s:%d", u->host, u->port)); + SET_USER_AGENT (connreq); - if (proxyauth) + if (!disabled_header("Proxy-Authorization") && proxyauth) { request_set_header (connreq, "Proxy-Authorization", *proxyauth, rel_value); @@ -2143,9 +2192,10 @@ establish_connection (const struct url *u, const struct url **conn_ref, the regular request below. */ *proxyauth = NULL; } - request_set_header (connreq, "Host", - aprintf ("%s:%d", u->host, u->port), - rel_value); + if (!disabled_header("Host")) + request_set_header (connreq, "Host", + aprintf ("%s:%d", u->host, u->port), + rel_value); write_error = request_send (connreq, sock, 0); request_free (&connreq); @@ -2456,7 +2506,7 @@ check_auth (const struct url *u, char *user, char *passwd, struct response *resp auth_err = *auth_stat; xfree (auth_stat); xfree (pth); - if (auth_err == RETROK) + if (!disabled_header("Authorisation") && auth_err == RETROK) { request_set_header (req, "Authorization", value, rel_value); @@ -3264,7 +3314,8 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs, { int i; for (i = 0; opt.user_headers[i]; i++) - request_set_user_header (req, opt.user_headers[i]); + if (!disabled_header(opt.user_headers[i])) + request_set_user_header (req, opt.user_headers[i]); } proxyauth = NULL; diff --git a/src/init.c b/src/init.c index 9b6665a6..a2b6f311 100644 --- a/src/init.c +++ b/src/init.c @@ -101,6 +101,7 @@ CMD_DECLARE (cmd_spec_compression); #endif CMD_DECLARE (cmd_spec_dirstruct); CMD_DECLARE (cmd_spec_header); +CMD_DECLARE (cmd_dis_header); CMD_DECLARE (cmd_spec_warc_header); CMD_DECLARE (cmd_spec_htmlify); CMD_DECLARE (cmd_spec_mirror); @@ -183,6 +184,7 @@ static const struct { { "deleteafter", &opt.delete_after, cmd_boolean }, { "dirprefix", &opt.dir_prefix, cmd_directory }, { "dirstruct", NULL, cmd_spec_dirstruct }, + { "disableheader", NULL, cmd_dis_header}, { "dnscache", &opt.dns_cache, cmd_boolean }, #ifdef HAVE_LIBCARES { "dnsservers", &opt.dns_servers, cmd_string }, @@ -398,6 +400,7 @@ defaults (void) opt.metalink_index = -1; #endif + opt.disabled_headers = NULL; opt.cookies = true; opt.verbose = -1; opt.ntry = 20; @@ -1459,6 +1462,7 @@ cmd_cert_type (const char *com, const char *val, void *place) options specially. */ static bool check_user_specified_header (const char *); +static bool check_user_disabled_header (const char *); #ifdef HAVE_LIBZ static bool @@ -1493,6 +1497,27 @@ cmd_spec_dirstruct (const char *com, const char *val, void *place_ignored _GL_UN return true; } +static bool +cmd_dis_header (const char *com, const char *val, void *place_ignored _GL_UNUSED) +{ + /* Empty value means reset the list of headers. */ + if (*val == '\0') + { + free_vec (opt.disabled_headers); + opt.disabled_headers = NULL; + return true; + } + + if (!check_user_disabled_header (val)) + { + fprintf (stderr, _("%s: %s: Invalid header %s.\n"), + exec_name, com, quote (val)); + return false; + } + opt.disabled_headers = vec_append (opt.disabled_headers, val); + return true; +} + static bool cmd_spec_header (const char *com, const char *val, void *place_ignored _GL_UNUSED) { @@ -1850,6 +1875,23 @@ simple_atof (const char *beg, const char *end, double *dest) contain a colon preceded by non-white-space characters and must not contain newlines. */ + +static bool +check_user_disabled_header (const char* s) +{ + const char *p; + + for (p = s; *p && *p != '\0' && !c_isspace (*p); p++) + ; + + if (p == s) + return false; + /* The header MUST NOT contain newlines. */ + if (strchr (s, '\n')) + return false; + return true; +} + static bool check_user_specified_header (const char *s) { @@ -1977,6 +2019,7 @@ cleanup (void) xfree (opt.http_passwd); xfree (opt.dot_style); free_vec (opt.user_headers); + free_vec (opt.disabled_headers); free_vec (opt.warc_user_headers); # ifdef HAVE_SSL xfree (opt.cert_file); diff --git a/src/main.c b/src/main.c index 65b7f3f3..a73b39d9 100644 --- a/src/main.c +++ b/src/main.c @@ -304,6 +304,7 @@ static struct cmdline_option option_data[] = { "delete-after", 0, OPT_BOOLEAN, "deleteafter", -1 }, { "directories", 0, OPT_BOOLEAN, "dirstruct", -1 }, { "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 }, + { "disable-header", 0, OPT_VALUE, "disableheader", -1 }, { "dns-cache", 0, OPT_BOOLEAN, "dnscache", -1 }, #ifdef HAVE_LIBCARES { "dns-servers", 0, OPT_VALUE, "dnsservers", -1 }, @@ -544,6 +545,7 @@ init_switches (void) identical to "--foo", except it has opposite meaning and it doesn't allow an argument. */ longopt = &long_options[o++]; + longopt->name = no_prefix (cmdopt->long_name); longopt->has_arg = no_argument; /* Mask the value so we'll be able to recognize that we're @@ -792,6 +794,8 @@ HTTP options:\n"), --ignore-length ignore 'Content-Length' header field\n"), N_("\ --header=STRING insert STRING among the headers\n"), + N_("\ + --disable-header=STRING disable STRING among the headers\n"), #ifdef HAVE_LIBZ N_("\ --compression=TYPE choose compression, one of auto, gzip and none. (default: none)\n"), diff --git a/src/options.h b/src/options.h index 881e2b2e..5559c6fa 100644 --- a/src/options.h +++ b/src/options.h @@ -147,6 +147,7 @@ struct options char *http_user; /* HTTP username. */ char *http_passwd; /* HTTP password. */ char **user_headers; /* User-defined header(s). */ + char **disabled_headers; /* User-disabled header(s) */ bool http_keep_alive; /* whether we use keep-alive */ bool use_proxy; /* Do we use proxy? */ -- 2.21.0
