As an user of wget, i'm sometimes confronted to server rejecting connection due to too many users while i'm mirroring a site. The fact is that when this happens, i must seek the tree of successfull downloads in order to find what has failed. So i wrote a little patch for wget (actually a patch against wget 1.5.3 patched with new-percentage function) that add a new option (-M) which enable logging of missed downloads. With -M enabled, wget create a file named 'wget-miss' that log a list of url that have failed retrieving. This add is a bit buggy, cause it logs fails against proxy connection too. Whenever you have cleaned the proxy failure in this file, you can use it as an input file for wget (in order to fetch missing files). I submit you the patch against the 1.5.3-newpercentage version, hoping it's of interrest. If i find time to, i'll make the log more accurate, in order to log only rejected connection, and not missing links. -- Pascal Valois ([EMAIL PROTECTED]) Département système réseaux ([EMAIL PROTECTED]) Ingénieur Système Unix / Pôle Universitaire Léonard de Vinci Poste : 7164
*** wget-1.5.3/src/main.c Fri Sep 11 03:41:53 1998 --- wget-1.5.3/src/main.c Fri Apr 13 13:25:09 2001 *************** Download:\n\ *** 144,149 **** --- 144,150 ---- -w, --wait=SECONDS wait SECONDS between retrievals.\n\ -Y, --proxy=on/off turn proxy on or off.\n\ -Q, --quota=NUMBER set retrieval quota to NUMBER.\n\ + -M log missed downloads in the file 'wget-miss'.\n\ \n"), _("\ Directories:\n\ -nd --no-directories don\'t create directories.\n\ *************** main (int argc, char *const *argv) *** 286,292 **** initialize (); while ((c = getopt_long (argc, argv, "\ ! hVqvdksxmNWrHSLcFbEY:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:", long_options, (int *)0)) != EOF) { switch (c) --- 287,293 ---- initialize (); while ((c = getopt_long (argc, argv, "\ ! hVqvdksxmNWrHSLcFbEY:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:M", long_options, (int *)0)) != EOF) { switch (c) *************** hVqvdksxmNWrHSLcFbEY:g:T:U:O:l:n:i:o:a:t *** 374,379 **** --- 375,383 ---- break; case 'm': setval ("mirror", "on"); + break; + case 'M': + setval ("logmissings", "on"); break; case 'N': setval ("timestamping", "on"); *** wget-1.5.3/src/init.c Fri Sep 11 02:20:23 1998 --- wget-1.5.3/src/init.c Fri Apr 13 13:25:02 2001 *************** extern int errno; *** 53,59 **** PARAMS ((const char *, const char *, void *)) CMD_DECLARE (cmd_boolean); - CMD_DECLARE (cmd_boolean); CMD_DECLARE (cmd_number); CMD_DECLARE (cmd_number_inf); CMD_DECLARE (cmd_string); --- 53,58 ---- *************** static struct { *** 118,123 **** --- 117,123 ---- { "killlonger", &opt.kill_longer, cmd_boolean }, { "logfile", &opt.lfilename, cmd_string }, { "login", &opt.ftp_acc, cmd_string }, + { "logmissings", &opt.log_missings, cmd_boolean }, { "mirror", NULL, cmd_spec_mirror }, { "netrc", &opt.netrc, cmd_boolean }, { "noclobber", &opt.noclobber, cmd_boolean }, *** wget-1.5.3/src/options.h Tue Apr 28 23:29:40 1998 --- wget-1.5.3/src/options.h Fri Apr 13 13:23:58 2001 *************** struct options *** 132,137 **** --- 132,140 ---- int delete_after; /* Whether the files will be deleted after download. */ + + int log_missings; /* Whether the non retrieved url + shall be logged in wget-miss */ }; #ifndef OPTIONS_DEFINED_HERE *** wget-1.5.3/src/retr.c Sun Dec 12 10:36:53 1999 --- wget-1.5.3/src/retr.c Fri Apr 13 08:48:20 2001 *************** retrieve_url (const char *origurl, char *** 359,365 **** mynewloc = NULL; if (u->proto == URLHTTP) ! result = http_loop (u, &mynewloc, dt); else if (u->proto == URLFTP) { /* If this is a redirection, we must not allow recursive FTP --- 359,368 ---- mynewloc = NULL; if (u->proto == URLHTTP) ! { ! result = http_loop (u, &mynewloc, dt); ! if (result != RETROK) LOG_MISS; ! } else if (u->proto == URLFTP) { /* If this is a redirection, we must not allow recursive FTP *************** retrieve_url (const char *origurl, char *** 369,374 **** --- 372,378 ---- if (already_redirected) opt.recursive = 0; result = ftp_loop (u, dt); + if (result != RETROK) LOG_MISS; opt.recursive = oldrec; /* There is a possibility of having HTTP being redirected to FTP. In these cases we must decide whether the text is HTML *** wget-1.5.3/src/retr.h Sat Jan 31 02:22:34 1998 --- wget-1.5.3/src/retr.h Fri Apr 13 13:25:49 2001 *************** char *rate PARAMS ((long, long)); *** 34,37 **** --- 34,43 ---- void printwhat PARAMS ((int, int)); + #define LOG_MISS { if (opt.log_missings) { FILE *miss; \ + miss=fopen("wget-miss","a"); \ + fprintf (miss,"%s\n",str_url(u,1)); \ + fclose(miss); }} + #endif /* RETR_H */ +