Hi, Comments inline
* adham elkarn <[email protected]> [190504 17:41]: > From: sulfastor <[email protected]> > > Hi, Thank you Darshit for your feedback and code review. > Those are the updates: > > * doc/wget.texi: Added --disable-header documentation. > * fuzz/wget_options_fuzzer.dict: Update with --disable-header inputs. > * src/http.c (disabled_header): Checks for disabled headers > (request_set_header): Doesn't let header to be set if disabled > (gethttp): frees disabled header to let overriding > * src/init.c (cmd_dis_header), (check_user_disabled_header), > (vec_remove_header): added new option disabled_headers. > * src/main.c: added new option --disable-header, added help > description > * src/options.h: added new option --disable-header > * testenv/Makefile.am: Added new test files > * testenv/server/http/http_server.py: Added new rule RejectHeaderField > * testenv/conf/reject_header_field.py: Added new rule > RejectHeaderField > * testenv/README: Added help description for new rule > * testenv/Test-disable-default-headers.py: Test without using --header > * testenv/Test-disable-headers-after.py: Test using --header before > --disable-header > * testenv/Test-disable-headers-before.py: Test using --header after > --disable-header > > Signed-off-by: sulfastor <[email protected]>, adham elkarn > <[email protected]> > --- > doc/wget.texi | 19 ++++++ > fuzz/wget_options_fuzzer.dict | 17 +++++ > src/http.c | 31 +++++++++ > src/init.c | 88 +++++++++++++++++++++++++ > src/main.c | 4 ++ > src/options.h | 1 + > testenv/Makefile.am | 3 + > testenv/README | 4 ++ > testenv/Test-disable-default-headers.py | 73 ++++++++++++++++++++ > testenv/Test-disable-headers-after.py | 77 ++++++++++++++++++++++ > testenv/Test-disable-headers-before.py | 77 ++++++++++++++++++++++ > testenv/conf/reject_header_field.py | 12 ++++ > testenv/server/http/http_server.py | 8 +++ > 13 files changed, 414 insertions(+) > create mode 100644 testenv/Test-disable-default-headers.py > create mode 100644 testenv/Test-disable-headers-after.py > create mode 100644 testenv/Test-disable-headers-before.py > create mode 100644 testenv/conf/reject_header_field.py > > diff --git a/doc/wget.texi b/doc/wget.texi > index 7eada2dd..a43224a3 100644 > --- a/doc/wget.texi > +++ b/doc/wget.texi > @@ -1542,6 +1542,25 @@ wget --header="Host: foo.bar" http://localhost/ > In versions of Wget prior to 1.10 such use of @samp{--header} caused > sending of duplicate headers. > > +@cindex disable header, choose > +@item --disable-header=@var{header-field} > +Remove @var{header-field} among the headers in each @sc{http} request. > + > +You may define more than one additional header field by specifying > +@samp{--disable-header} more than once as in @samp{--header}. > + > +@example > +@group > +wget --disable-header='Accept' \ > + --disable-header='User-Agent' \ > + --disable-header='Accept-Encoding' \ > + https://example.com/ > +@end group > +@end example > + > +Specifying a header field with @samp{--header} after disabling it > +will override it and include it in the @sc{http} request headers. > + > @cindex Content-Encoding, choose > @item --compression=@var{type} > Choose the type of compression to be used. Legal values are > diff --git a/fuzz/wget_options_fuzzer.dict b/fuzz/wget_options_fuzzer.dict > index 9a2dbd8e..12d54d60 100644 > --- a/fuzz/wget_options_fuzzer.dict > +++ b/fuzz/wget_options_fuzzer.dict > @@ -30,6 +30,22 @@ > "human" > "csv" > "json" > +"Authorization" > +"User-Agent" > +"Referer" > +"Cache-Control" > +"Pragma" > +"If-Modified-Since" > +"Range" > +"Accept" > +"Accept-Encoding" > +"Host" > +"Connection" > +"Proxy-Connection" > +"Content-Type" > +"Content-Length" > +"Proxy-Authorization" > +"Cookie" > "accept=" > "accept-regex=" > "adjust-extension=" > @@ -66,6 +82,7 @@ > "delete-after=" > "directories=" > "directory-prefix=" > +"disable-header=" > "dns-caching=" > "dns-timeout=" > "domains=" > diff --git a/src/http.c b/src/http.c > index 289d1101..225be265 100644 > --- a/src/http.c > +++ b/src/http.c > @@ -88,6 +88,7 @@ static char *basic_authentication_encode (const char *, > const char *); > static bool known_authentication_scheme_p (const char *, const char *); > static void ensure_extension (struct http_stat *, const char *, int *); > static void load_cookies (void); > +static bool disabled_header (char*); > > static bool cookies_loaded_p; > static struct cookie_jar *wget_cookie_jar; > @@ -236,6 +237,9 @@ request_set_header (struct request *req, const char > *name, const char *value, > struct request_header *hdr; > int i; > > + if (disabled_header ((char*) name)) Why do you need to explicitly cast the const out? Please retain the const attribute here. > + return; > + > if (!value) > { > /* A NULL value is a no-op; if freeing the name is requested, > @@ -1842,6 +1846,27 @@ time_to_rfc1123 (time_t time, char *buf, size_t > bufsize) > return RETROK; > } > There is nothing in this function that forces `header_name` to not be const. > +static bool > +disabled_header (char* header_name) > +{ > + char** p = opt.disabled_headers; > + char *s; > + size_t n; > + > + if (!p) > + return 0; > + > + for (; *p; ++p) > + { > + s = strchrnul (header_name, ':'); > + n = (size_t) (s - header_name); > + if (n == strlen (*p) && 0 == strncmp (header_name, *p, n)) > + return 1; > + } > + > + return 0; > +} > + > static struct request * > initialize_request (const struct url *u, struct http_stat *hs, int *dt, > struct url *proxy, > bool inhibit_keep_alive, bool *basic_auth_finished, > @@ -3263,6 +3288,11 @@ gethttp (const struct url *u, struct url > *original_url, struct http_stat *hs, > if (opt.user_headers) > { > int i; > + /* Empty the disabled headers as they are no longer used > + and this will let headers to be overriden by the user */ > + free_vec (opt.disabled_headers); > + opt.disabled_headers = NULL; > + > for (i = 0; opt.user_headers[i]; i++) > request_set_user_header (req, opt.user_headers[i]); > } > @@ -5274,6 +5304,7 @@ ensure_extension (struct http_stat *hs, const char > *ext, int *dt) > shortext[0] = '\0'; > len = strlen (ext); > if (len == 5) > + Extra newline here can be confusing > { > memcpy (shortext, ext, len - 1); > shortext[len - 1] = '\0'; > diff --git a/src/init.c b/src/init.c > index 9b6665a6..aaf99ba6 100644 > --- a/src/init.c > +++ b/src/init.c > @@ -101,6 +101,7 @@ CMD_DECLARE (cmd_spec_compression); > #endif > CMD_DECLARE (cmd_spec_dirstruct); > CMD_DECLARE (cmd_spec_header); > +CMD_DECLARE (cmd_dis_header); > CMD_DECLARE (cmd_spec_warc_header); > CMD_DECLARE (cmd_spec_htmlify); > CMD_DECLARE (cmd_spec_mirror); > @@ -183,6 +184,7 @@ static const struct { > { "deleteafter", &opt.delete_after, cmd_boolean }, > { "dirprefix", &opt.dir_prefix, cmd_directory }, > { "dirstruct", NULL, cmd_spec_dirstruct }, > + { "disableheader", NULL, cmd_dis_header}, > { "dnscache", &opt.dns_cache, cmd_boolean }, > #ifdef HAVE_LIBCARES > { "dnsservers", &opt.dns_servers, cmd_string }, > @@ -398,6 +400,7 @@ defaults (void) > opt.metalink_index = -1; > #endif > > + opt.disabled_headers = NULL; > opt.cookies = true; > opt.verbose = -1; > opt.ntry = 20; > @@ -990,6 +993,7 @@ struct decode_item { > static bool decode_string (const char *, const struct decode_item *, int, > int *); > static bool simple_atof (const char *, const char *, double *); > > + Please try to not add extra newlines where they're not needed. > #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0') > > #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \ > @@ -1459,6 +1463,8 @@ cmd_cert_type (const char *com, const char *val, void > *place) > options specially. */ > > static bool check_user_specified_header (const char *); > +static bool check_user_disabled_header (const char *); > +static char ** vec_remove_header (char **, const char *); > > #ifdef HAVE_LIBZ > static bool > @@ -1493,6 +1499,29 @@ cmd_spec_dirstruct (const char *com, const char *val, > void *place_ignored _GL_UN > return true; > } > > +static bool > +cmd_dis_header (const char *com, const char *val, void *place_ignored > _GL_UNUSED) > +{ > + /* Empty value means reset the list of headers. */ > + if (*val == '\0') > + { > + free_vec (opt.disabled_headers); > + opt.disabled_headers = NULL; > + return true; > + } > + > + if (!check_user_disabled_header (val)) > + { > + fprintf (stderr, _("%s: %s: Invalid header %s.\n"), > + exec_name, com, quote (val)); > + return false; > + } > + /* Removes disabled headers from user defined headers */ > + opt.user_headers = vec_remove_header (opt.user_headers, val); > + opt.disabled_headers = vec_append (opt.disabled_headers, val); > + return true; > +} Unlike the `--header` command, this one should be capable of accepting a stringlist input. For e.g.: --disable-header=Accept,authorization > + > static bool > cmd_spec_header (const char *com, const char *val, void *place_ignored > _GL_UNUSED) > { > @@ -1850,6 +1879,23 @@ simple_atof (const char *beg, const char *end, double > *dest) > contain a colon preceded by non-white-space characters and must not > contain newlines. */ > > + > +static bool > +check_user_disabled_header (const char* s) > +{ > + const char *p; > + > + for (p = s; *p && !c_isspace (*p); p++) > + ; > + > + if (p == s) > + return false; > + /* The header MUST NOT contain newlines. */ > + if (strchr (s, '\n')) > + return false; > + return true; > +} Would be nice if you could reduce the code duplication here. I would do something like: static bool check_user_specified_header(const char *s, bool full_header) { const char *p; for (p = s; *p && !c_isspace (*p); p++) ; if (full_header) { /* The header MUST contain `:' preceded by at least one non-whitespace character. */ if (*p != ':' || p == s) return false; } /* The header MUST NOT contain newlines. */ if (strchr (s, '\n')) return false; return true; } Replace the existing function with this. Now we don't have additional code duplication > + > static bool > check_user_specified_header (const char *s) > { > @@ -1867,6 +1913,47 @@ check_user_specified_header (const char *s) > return true; > } > > +/* Removes a header from a request headers vector */ > + > +static char ** > +vec_remove_header (char **vec, const char *str) > +{ > + char* s; > + int i, cnt; /* count of vector elements */ > + size_t n; > + > + if (vec != NULL) > + { > + for (cnt = 0; vec[cnt]; cnt++) > + ; > + /* remove all duplicates */ > + i = 0; > + while (vec[i]) > + { > + s = strchrnul (vec[i], ':'); > + n = (size_t) (s - vec[i]); > + if (n == strlen (str) && 0 == strncmp (vec[i], str, n)) > + { > + if (cnt == 1) > + { > + vec[i] = NULL; > + return vec; > + } > + else > + { > + vec[i] = xstrdup (vec[cnt - 1]); > + vec = xrealloc (vec, (cnt + 1) * sizeof (char *)); > + vec[cnt] = NULL; > + --cnt; > + } > + } > + else > + ++i; > + } > + } > + return vec; > +} This method is better suited in src/utils.c along with the other vector operations. Also, I'm fairly confident that you're leaking memory in here I just noticed you're using tabs. Please use spaces instead > + > /* Decode VAL into a number, according to ITEMS. */ > > static bool > @@ -1977,6 +2064,7 @@ cleanup (void) > xfree (opt.http_passwd); > xfree (opt.dot_style); > free_vec (opt.user_headers); > + free_vec (opt.disabled_headers); > free_vec (opt.warc_user_headers); > # ifdef HAVE_SSL > xfree (opt.cert_file); > diff --git a/src/main.c b/src/main.c > index 65b7f3f3..a73b39d9 100644 > --- a/src/main.c > +++ b/src/main.c > @@ -304,6 +304,7 @@ static struct cmdline_option option_data[] = > { "delete-after", 0, OPT_BOOLEAN, "deleteafter", -1 }, > { "directories", 0, OPT_BOOLEAN, "dirstruct", -1 }, > { "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 }, > + { "disable-header", 0, OPT_VALUE, "disableheader", -1 }, > { "dns-cache", 0, OPT_BOOLEAN, "dnscache", -1 }, > #ifdef HAVE_LIBCARES > { "dns-servers", 0, OPT_VALUE, "dnsservers", -1 }, > @@ -544,6 +545,7 @@ init_switches (void) > identical to "--foo", except it has opposite meaning and > it doesn't allow an argument. */ > longopt = &long_options[o++]; > + Again, random newline inserted > longopt->name = no_prefix (cmdopt->long_name); > longopt->has_arg = no_argument; > /* Mask the value so we'll be able to recognize that we're > @@ -792,6 +794,8 @@ HTTP options:\n"), > --ignore-length ignore 'Content-Length' header field\n"), > N_("\ > --header=STRING insert STRING among the headers\n"), > + N_("\ > + --disable-header=STRING disable STRING among the headers\n"), > #ifdef HAVE_LIBZ > N_("\ > --compression=TYPE choose compression, one of auto, gzip and > none. (default: none)\n"), > diff --git a/src/options.h b/src/options.h > index 881e2b2e..5559c6fa 100644 > --- a/src/options.h > +++ b/src/options.h > @@ -147,6 +147,7 @@ struct options > char *http_user; /* HTTP username. */ > char *http_passwd; /* HTTP password. */ > char **user_headers; /* User-defined header(s). */ > + char **disabled_headers; /* User-disabled header(s) */ > bool http_keep_alive; /* whether we use keep-alive */ > > bool use_proxy; /* Do we use proxy? */ > diff --git a/testenv/Makefile.am b/testenv/Makefile.am > index b5a39ad2..4b3e2d08 100644 > --- a/testenv/Makefile.am > +++ b/testenv/Makefile.am > @@ -95,6 +95,9 @@ if HAVE_PYTHON3 > Test-cookie-domain-mismatch.py \ > Test-cookie-expires.py \ > Test-cookie.py \ > + Test-disable-default-headers.py \ > + Test-disable-headers-after.py \ > + Test-disable-headers-before.py \ > Test-Head.py \ > Test-hsts.py \ > Test--https.py \ > diff --git a/testenv/README b/testenv/README > index 6580bc99..d2f38a67 100644 > --- a/testenv/README > +++ b/testenv/README > @@ -182,6 +182,10 @@ This section lists the currently supported File Rules > and their structure. > * RejectHeader : This list of Headers must NEVER occur in a request. It > uses the same value format as ExpectHeader. > > + * RejectHeaderField : This list of Headers Fields must NOT appear in a > request. > + The value for this key is a list of strings where each header field is > represented as: > + |-->Header Field: <Header Field Name> What am I missing here? How is this different from RejectHeader? > + > * SendHeader : This list of Headers will be sent in EVERY response to > a > request for the respective file. It follows the same value format as > ExpectHeader. Additionally you can specify a list of strings as <Header > Data> > diff --git a/testenv/Test-disable-default-headers.py > b/testenv/Test-disable-default-headers.py > new file mode 100644 > index 00000000..22ea54ad > --- /dev/null > +++ b/testenv/Test-disable-default-headers.py > @@ -0,0 +1,73 @@ > +#!/usr/bin/env python3 > +from sys import exit > +from test.http_test import HTTPTest > +from test.base_test import HTTP, HTTPS > +from misc.wget_file import WgetFile > + > +""" > + This is test ensures that the --disable-header option removes default > request > + headers. There aren't any user defined header. > +""" > +############# File Definitions > ############################################### > +file_content = """Les paroles de la bouche d'un homme sont des eaux > profondes; > +La source de la sagesse est un torrent qui jaillit.""" > + > +Headers = { > + 'Authorization', > + 'User-Agent', > + 'Referer', > + 'Cache-Control', > + 'Pragma', > + 'If-Modified-Since', > + 'Range', > + 'Accept', > + 'Accept-Encoding', > + 'Host', > + 'Connection', > + 'Proxy-Connection', > + 'Content-Type', > + 'Content-Length', > + 'Proxy-Authorization', > + 'Cookie', > + 'MyHeader', > +} > + > +WGET_OPTIONS = '' > +WGET_URLS = [[]] > +Files = [[]] > + > +for index, header in enumerate(Headers, start=1): > + File_rules = { > + "RejectHeaderField" : { > + header > + } > + } > + file_name = "File" + str(index) > + Files[0].append (WgetFile(file_name, file_content, rules=File_rules)) > + WGET_OPTIONS += ' --disable-header="' + header + '"' > + WGET_URLS[0].append (file_name) > + > +Servers = [HTTP] > + > +ExpectedReturnCode = 0 > + > +################ Pre and Post Test Hooks > ##################################### > +pre_test = { > + "ServerFiles" : Files > +} > +test_options = { > + "WgetCommands" : WGET_OPTIONS, > + "Urls" : WGET_URLS > +} > +post_test = { > + "ExpectedRetcode" : ExpectedReturnCode > +} > + > +err = HTTPTest ( > + pre_hook=pre_test, > + test_params=test_options, > + post_hook=post_test, > + protocols=Servers > +).begin () > + > +exit (err) > diff --git a/testenv/Test-disable-headers-after.py > b/testenv/Test-disable-headers-after.py > new file mode 100644 > index 00000000..c0ffc84d > --- /dev/null > +++ b/testenv/Test-disable-headers-after.py > @@ -0,0 +1,77 @@ > +#!/usr/bin/env python3 > +from sys import exit > +from test.http_test import HTTPTest > +from test.base_test import HTTP, HTTPS > +from misc.wget_file import WgetFile > + > +""" > + This is test ensures that the --disable-header option removes user > headers > + from the HTTP request when it's placed after --header="header: value". > +""" > +############# File Definitions > ############################################### > +file_content = """Les paroles de la bouche d'un homme sont des eaux > profondes; > +La source de la sagesse est un torrent qui jaillit.""" > + > +Headers = { > + 'Authorization', > + 'User-Agent', > + 'Referer', > + 'Cache-Control', > + 'Pragma', > + 'If-Modified-Since', > + 'Range', > + 'Accept', > + 'Accept-Encoding', > + 'Host', > + 'Connection', > + 'Proxy-Connection', > + 'Content-Type', > + 'Content-Length', > + 'Proxy-Authorization', > + 'Cookie', > + 'MyHeader', > +} > + > +WGET_OPTIONS = '' > +WGET_URLS = [[]] > +Files = [[]] > + > +# Define user defined headers > +for header in Headers: > + WGET_OPTIONS += ' --header="' + header + ': any"' > + > +for index, header in enumerate(Headers, start=1): > + File_rules = { > + "RejectHeader" : { > + header : 'any' > + } > + } > + file_name = "File" + str(index) > + Files[0].append(WgetFile(file_name, file_content, rules=File_rules)) > + WGET_OPTIONS += ' --disable-header="' + header + '"' > + WGET_URLS[0].append(file_name) > + > +Servers = [HTTP] > + > +ExpectedReturnCode = 0 > + > +################ Pre and Post Test Hooks > ##################################### > +pre_test = { > + "ServerFiles" : Files > +} > +test_options = { > + "WgetCommands" : WGET_OPTIONS, > + "Urls" : WGET_URLS > +} > +post_test = { > + "ExpectedRetcode" : ExpectedReturnCode > +} > + > +err = HTTPTest ( > + pre_hook=pre_test, > + test_params=test_options, > + post_hook=post_test, > + protocols=Servers > +).begin () > + > +exit (err) > diff --git a/testenv/Test-disable-headers-before.py > b/testenv/Test-disable-headers-before.py > new file mode 100644 > index 00000000..d442b008 > --- /dev/null > +++ b/testenv/Test-disable-headers-before.py > @@ -0,0 +1,77 @@ > +#!/usr/bin/env python3 > +from sys import exit > +from test.http_test import HTTPTest > +from test.base_test import HTTP, HTTPS > +from misc.wget_file import WgetFile > + > +""" > + This is test ensures that the --disable-header option doesn't remove > user headers > + from the HTTP request when it's placed before --header="header: value". > +""" > +############# File Definitions > ############################################### > +file_content = """Les paroles de la bouche d'un homme sont des eaux > profondes; > +La source de la sagesse est un torrent qui jaillit.""" > + > +Headers = { > + 'Authorization', > + 'User-Agent', > + 'Referer', > + 'Cache-Control', > + 'Pragma', > + 'If-Modified-Since', > + 'Range', > + 'Accept', > + 'Accept-Encoding', > + 'Host', > + 'Connection', > + 'Proxy-Connection', > + 'Content-Type', > + 'Content-Length', > + 'Proxy-Authorization', > + 'Cookie', > + 'MyHeader', > +} > + > +WGET_OPTIONS = '' > +WGET_URLS = [[]] > +Files = [[]] > + > +for index, header in enumerate(Headers, start=1): > + File_rules = { > + "ExpectHeader" : { > + header : 'any' > + } > + } > + file_name = "File" + str(index) > + Files[0].append (WgetFile(file_name, file_content, rules=File_rules)) > + WGET_OPTIONS += ' --disable-header="' + header + '"' > + WGET_URLS[0].append (file_name) > + > +# Define user defined headers > +for header in Headers: > + WGET_OPTIONS += ' --header="' + header + ': any"' > + > +Servers = [HTTP] > + > +ExpectedReturnCode = 0 > + > +################ Pre and Post Test Hooks > ##################################### > +pre_test = { > + "ServerFiles" : Files > +} > +test_options = { > + "WgetCommands" : WGET_OPTIONS, > + "Urls" : WGET_URLS > +} > +post_test = { > + "ExpectedRetcode" : ExpectedReturnCode > +} > + > +err = HTTPTest ( > + pre_hook=pre_test, > + test_params=test_options, > + post_hook=post_test, > + protocols=Servers > +).begin () > + > +exit (err) > diff --git a/testenv/conf/reject_header_field.py > b/testenv/conf/reject_header_field.py > new file mode 100644 > index 00000000..e1009cdd > --- /dev/null > +++ b/testenv/conf/reject_header_field.py > @@ -0,0 +1,12 @@ > +from conf import rule > + > +""" Rule: RejectHeaderField > +This is a server side rule which expects a string list of Header Fields > +which should be blacklisted by the server for a particular file's requests. > +""" > + > + > +@rule() > +class RejectHeaderField: > + def __init__(self, header_fields): > + self.header_fields = header_fields > diff --git a/testenv/server/http/http_server.py > b/testenv/server/http/http_server.py > index 2cc82fb9..6f358335 100644 > --- a/testenv/server/http/http_server.py > +++ b/testenv/server/http/http_server.py > @@ -370,6 +370,14 @@ class _Handler(BaseHTTPRequestHandler): > header_line) > raise ServerError("Header " + header_line + ' received') > > + def RejectHeaderField(self, header_fields_obj): > + rej_header_fields = header_fields_obj.header_fields > + for field in rej_header_fields: > + if field in self.headers: > + self.send_error(400, 'Blacklisted Header Field %s received' % > + field) > + raise ServerError('Header Field %s received' % field) > + > def __log_request(self, method): > req = method + " " + self.path > self.server.request_headers.append(req) > -- > 2.21.0 > -- Thanking You, Darshit Shah PGP Fingerprint: 7845 120B 07CB D8D6 ECE5 FF2B 2A17 43ED A91A 35B6
signature.asc
Description: PGP signature
