i was in a situation where the website generates different access token
   for the same file during each refresh , which was specified in the url
   query , so wget wasn't detecting that the files already exists ,
   causing tons of duplications each with different url query
diff -ru wget-1.20.3/src/init.c wget-1.20.3_batch/src/init.c
--- wget-1.20.3/src/init.c      2019-04-05 05:59:19.000000000 -0400
+++ wget-1.20.3_batch/src/init.c        2019-09-28 18:02:02.617725486 -0400
@@ -198,6 +198,7 @@
 #endif
   { "excludedirectories", &opt.excludes,        cmd_directory_vector },
   { "excludedomains",   &opt.exclude_domains,   cmd_vector },
+  { "filenameexcludeurlquery",       &opt.filename_exclude_url_query,       
cmd_boolean },
   { "followftp",        &opt.follow_ftp,        cmd_boolean },
   { "followtags",       &opt.follow_tags,       cmd_vector },
   { "forcehtml",        &opt.force_html,        cmd_boolean },
diff -ru wget-1.20.3/src/main.c wget-1.20.3_batch/src/main.c
--- wget-1.20.3/src/main.c      2019-04-05 05:59:19.000000000 -0400
+++ wget-1.20.3_batch/src/main.c        2019-09-28 18:03:36.812760350 -0400
@@ -316,6 +316,7 @@
     { "exclude-directories", 'X', OPT_VALUE, "excludedirectories", -1 },
     { "exclude-domains", 0, OPT_VALUE, "excludedomains", -1 },
     { "execute", 'e', OPT__EXECUTE, NULL, required_argument },
+    { "filename-exclude-url-query", 0, OPT_BOOLEAN, "filenameexcludeurlquery", 
-1 },
     { "follow-ftp", 0, OPT_BOOLEAN, "followftp", -1 },
     { "follow-tags", 0, OPT_VALUE, "followtags", -1 },
     { "force-directories", 'x', OPT_BOOLEAN, "dirstruct", -1 },
diff -ru wget-1.20.3/src/options.h wget-1.20.3_batch/src/options.h
--- wget-1.20.3/src/options.h   2019-04-05 05:59:19.000000000 -0400
+++ wget-1.20.3_batch/src/options.h     2019-09-28 18:01:57.889572989 -0400
@@ -43,6 +43,7 @@
   int ntry;                     /* Number of tries per URL */
   bool retry_connrefused;       /* Treat CONNREFUSED as non-fatal. */
   bool retry_on_host_error;     /* Treat host errors as non-fatal. */
+  bool filename_exclude_url_query; /* Do we exclude the ?query in generating 
local file name from urls? */
   char *retry_on_http_error;    /* Treat given HTTP errors as non-fatal. */
   bool background;              /* Whether we should work in background. */
   bool ignore_length;           /* Do we heed content-length at all?  */
diff -ru wget-1.20.3/src/url.c wget-1.20.3_batch/src/url.c
--- wget-1.20.3/src/url.c       2019-04-05 05:59:19.000000000 -0400
+++ wget-1.20.3_batch/src/url.c 2019-09-28 18:08:54.890973790 -0400
@@ -1752,9 +1752,9 @@
       /* Create the filename. */
       u_file = *u->file ? u->file : index_filename;

-      /* Append "?query" to the file name, even if empty,
+      /* Append "?query" to the file name, even if empty, unless 
--filename-exclude-url-query is true
        * and create fname_len_check. */
-      if (u->query)
+      if (u->query && !opt.filename_exclude_url_query)
         fname_len_check = concat_strings (u_file, FN_QUERY_SEP_STR, u->query, 
NULL);
       else
         fname_len_check = strdupdelim (u_file, u_file + strlen (u_file));

Reply via email to