As an user of wget, i'm sometimes confronted to server rejecting connection due to too 
many users while i'm mirroring a site. The fact is that when this happens, i must seek 
the tree of successfull downloads in order to find what has failed.

So i wrote a little patch for wget (actually a patch against wget 1.5.3 patched with 
new-percentage function) that add a new option (-M) which enable logging 
of missed downloads. 

With -M enabled, wget create a file named 'wget-miss' that log a list of url that have 
failed retrieving. This add is a bit buggy, cause it logs fails against proxy 
connection too.

Whenever you have cleaned the proxy failure in this file, you can use it as an input 
file for wget (in order to fetch missing files).

I submit you the patch against the 1.5.3-newpercentage version, hoping it's of 
interrest. If i find time to, i'll make the log more accurate, in order to log only 
rejected connection, and not missing links.

-- 
Pascal Valois ([EMAIL PROTECTED])
Département système réseaux ([EMAIL PROTECTED])
Ingénieur Système Unix / Pôle Universitaire Léonard de Vinci
Poste : 7164 
*** wget-1.5.3/src/main.c       Fri Sep 11 03:41:53 1998
--- wget-1.5.3/src/main.c       Fri Apr 13 13:25:09 2001
*************** Download:\n\
*** 144,149 ****
--- 144,150 ----
    -w,  --wait=SECONDS           wait SECONDS between retrievals.\n\
    -Y,  --proxy=on/off           turn proxy on or off.\n\
    -Q,  --quota=NUMBER           set retrieval quota to NUMBER.\n\
+   -M                            log missed downloads in the file 'wget-miss'.\n\
  \n"),  _("\
  Directories:\n\
    -nd  --no-directories            don\'t create directories.\n\
*************** main (int argc, char *const *argv)
*** 286,292 ****
    initialize ();
  
    while ((c = getopt_long (argc, argv, "\
! hVqvdksxmNWrHSLcFbEY:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:",
                           long_options, (int *)0)) != EOF)
      {
        switch (c)
--- 287,293 ----
    initialize ();
  
    while ((c = getopt_long (argc, argv, "\
! hVqvdksxmNWrHSLcFbEY:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:M",
                           long_options, (int *)0)) != EOF)
      {
        switch (c)
*************** hVqvdksxmNWrHSLcFbEY:g:T:U:O:l:n:i:o:a:t
*** 374,379 ****
--- 375,383 ----
          break;
        case 'm':
          setval ("mirror", "on");
+         break;
+       case 'M':
+         setval ("logmissings", "on");
          break;
        case 'N':
          setval ("timestamping", "on");
*** wget-1.5.3/src/init.c       Fri Sep 11 02:20:23 1998
--- wget-1.5.3/src/init.c       Fri Apr 13 13:25:02 2001
*************** extern int errno;
*** 53,59 ****
    PARAMS ((const char *, const char *, void *))
  
  CMD_DECLARE (cmd_boolean);
- CMD_DECLARE (cmd_boolean);
  CMD_DECLARE (cmd_number);
  CMD_DECLARE (cmd_number_inf);
  CMD_DECLARE (cmd_string);
--- 53,58 ----
*************** static struct {
*** 118,123 ****
--- 117,123 ----
    { "killlonger",     &opt.kill_longer,       cmd_boolean },
    { "logfile",                &opt.lfilename,         cmd_string },
    { "login",          &opt.ftp_acc,           cmd_string },
+   { "logmissings",    &opt.log_missings,      cmd_boolean },
    { "mirror",         NULL,                   cmd_spec_mirror },
    { "netrc",          &opt.netrc,             cmd_boolean },
    { "noclobber",      &opt.noclobber,         cmd_boolean },
*** wget-1.5.3/src/options.h    Tue Apr 28 23:29:40 1998
--- wget-1.5.3/src/options.h    Fri Apr 13 13:23:58 2001
*************** struct options
*** 132,137 ****
--- 132,140 ----
  
    int delete_after;           /* Whether the files will be deleted
                                   after download. */
+ 
+   int log_missings;             /* Whether the non retrieved url 
+                                  shall be logged in wget-miss */
  };
  
  #ifndef OPTIONS_DEFINED_HERE
*** wget-1.5.3/src/retr.c       Sun Dec 12 10:36:53 1999
--- wget-1.5.3/src/retr.c       Fri Apr 13 08:48:20 2001
*************** retrieve_url (const char *origurl, char 
*** 359,365 ****
    mynewloc = NULL;
  
    if (u->proto == URLHTTP)
!     result = http_loop (u, &mynewloc, dt);
    else if (u->proto == URLFTP)
      {
        /* If this is a redirection, we must not allow recursive FTP
--- 359,368 ----
    mynewloc = NULL;
  
    if (u->proto == URLHTTP)
!     {
!       result = http_loop (u, &mynewloc, dt);
!       if (result != RETROK) LOG_MISS;
!     }
    else if (u->proto == URLFTP)
      {
        /* If this is a redirection, we must not allow recursive FTP
*************** retrieve_url (const char *origurl, char 
*** 369,374 ****
--- 372,378 ----
        if (already_redirected)
        opt.recursive = 0;
        result = ftp_loop (u, dt);
+       if (result != RETROK) LOG_MISS;
        opt.recursive = oldrec;
        /* There is a possibility of having HTTP being redirected to
         FTP.  In these cases we must decide whether the text is HTML
*** wget-1.5.3/src/retr.h       Sat Jan 31 02:22:34 1998
--- wget-1.5.3/src/retr.h       Fri Apr 13 13:25:49 2001
*************** char *rate PARAMS ((long, long));
*** 34,37 ****
--- 34,43 ----
  
  void printwhat PARAMS ((int, int));
  
+ #define LOG_MISS { if (opt.log_missings) { FILE *miss; \
+                    miss=fopen("wget-miss","a"); \
+                    fprintf (miss,"%s\n",str_url(u,1)); \
+                   fclose(miss); }}
+ 
  #endif /* RETR_H */
+ 

Reply via email to