i was in a situation where the website generates different access token
for the same file during each refresh , which was specified in the url
query , so wget wasn't detecting that the files already exists ,
causing tons of duplications each with different url query
diff -ru wget-1.20.3/src/init.c wget-1.20.3_batch/src/init.c
--- wget-1.20.3/src/init.c 2019-04-05 05:59:19.000000000 -0400
+++ wget-1.20.3_batch/src/init.c 2019-09-28 18:02:02.617725486 -0400
@@ -198,6 +198,7 @@
#endif
{ "excludedirectories", &opt.excludes, cmd_directory_vector },
{ "excludedomains", &opt.exclude_domains, cmd_vector },
+ { "filenameexcludeurlquery", &opt.filename_exclude_url_query,
cmd_boolean },
{ "followftp", &opt.follow_ftp, cmd_boolean },
{ "followtags", &opt.follow_tags, cmd_vector },
{ "forcehtml", &opt.force_html, cmd_boolean },
diff -ru wget-1.20.3/src/main.c wget-1.20.3_batch/src/main.c
--- wget-1.20.3/src/main.c 2019-04-05 05:59:19.000000000 -0400
+++ wget-1.20.3_batch/src/main.c 2019-09-28 18:03:36.812760350 -0400
@@ -316,6 +316,7 @@
{ "exclude-directories", 'X', OPT_VALUE, "excludedirectories", -1 },
{ "exclude-domains", 0, OPT_VALUE, "excludedomains", -1 },
{ "execute", 'e', OPT__EXECUTE, NULL, required_argument },
+ { "filename-exclude-url-query", 0, OPT_BOOLEAN, "filenameexcludeurlquery",
-1 },
{ "follow-ftp", 0, OPT_BOOLEAN, "followftp", -1 },
{ "follow-tags", 0, OPT_VALUE, "followtags", -1 },
{ "force-directories", 'x', OPT_BOOLEAN, "dirstruct", -1 },
diff -ru wget-1.20.3/src/options.h wget-1.20.3_batch/src/options.h
--- wget-1.20.3/src/options.h 2019-04-05 05:59:19.000000000 -0400
+++ wget-1.20.3_batch/src/options.h 2019-09-28 18:01:57.889572989 -0400
@@ -43,6 +43,7 @@
int ntry; /* Number of tries per URL */
bool retry_connrefused; /* Treat CONNREFUSED as non-fatal. */
bool retry_on_host_error; /* Treat host errors as non-fatal. */
+ bool filename_exclude_url_query; /* Do we exclude the ?query in generating
local file name from urls? */
char *retry_on_http_error; /* Treat given HTTP errors as non-fatal. */
bool background; /* Whether we should work in background. */
bool ignore_length; /* Do we heed content-length at all? */
diff -ru wget-1.20.3/src/url.c wget-1.20.3_batch/src/url.c
--- wget-1.20.3/src/url.c 2019-04-05 05:59:19.000000000 -0400
+++ wget-1.20.3_batch/src/url.c 2019-09-28 18:08:54.890973790 -0400
@@ -1752,9 +1752,9 @@
/* Create the filename. */
u_file = *u->file ? u->file : index_filename;
- /* Append "?query" to the file name, even if empty,
+ /* Append "?query" to the file name, even if empty, unless
--filename-exclude-url-query is true
* and create fname_len_check. */
- if (u->query)
+ if (u->query && !opt.filename_exclude_url_query)
fname_len_check = concat_strings (u_file, FN_QUERY_SEP_STR, u->query,
NULL);
else
fname_len_check = strdupdelim (u_file, u_file + strlen (u_file));