Hi,
the diff attached is only a proof of concept since it's a quick hack, but is 
working for my needs.
It adds an "--transform-html" option which specifies a shell command to use as 
a preprocessor on the downloading html files.

I wanted to download a site with a url which had a domain redirecting to it. 
However, the domain expired in the past and the site relied all his links on 
the domain, so the site became unusable.

Solution with the option I'm proposing:
wget -rkpE --transform-html "sed s/www.bad-domain.net/www.good-url.net/g" 
www.good-url.net


--- http.c
+++ http.c
1345a1346,1352
> 
> FILE *topen (const char *, const char *); /* --transform-html */
> 
> #ifdef WINDOWS
> #define pclose _pclose
> #endif
> 
2224c2231
<         fp = fopen (hs->local_file, "ab");
---
>         fp = topen (hs->local_file, "ab");
2226c2233
<         fp = fopen (hs->local_file, "wb");
---
>         fp = topen (hs->local_file, "wb");
2294a2302,2303
>   {
>     pclose (fp);                  /* --transform-html */
2295a2305
>   }
+++ init.c
239a240
>   { "transformhtml",  &opt.transform_html,    cmd_string },
--- main.c
+++ main.c
240a241
>     { "transform-html", 0, OPT_VALUE, "transformhtml", required_argument },
587c588
<   -r,  --recursive          specify recursive download.\n"),
---
>   -r,  --recursive                 specify recursive download.\n"),
589c590
<   -l,  --level=NUMBER       maximum recursion depth (inf or 0 for 
infinite).\n"),
---
>   -l,  --level=NUMBER              maximum recursion depth (inf or 0 for 
> infinite).\n"),
591c592
<        --delete-after       delete files locally after downloading them.\n"),
---
>        --delete-after              delete files locally after downloading 
> them.\n"),
593c594
<   -k,  --convert-links      make links in downloaded HTML point to local 
files.\n"),
---
>   -k,  --convert-links             make links in downloaded HTML point to 
> local files.\n"),
595c596
<   -K,  --backup-converted   before converting file X, back up as X.orig.\n"),
---
>   -K,  --backup-converted          before converting file X, back up as 
> X.orig.\n"),
597c598
<   -m,  --mirror             shortcut for -N -r -l inf 
--no-remove-listing.\n"),
---
>   -m,  --mirror                    shortcut for -N -r -l inf 
> --no-remove-listing.\n"),
599c600
<   -p,  --page-requisites    get all images, etc. needed to display HTML 
page.\n"),
---
>   -p,  --page-requisites           get all images, etc. needed to display 
> HTML page.\n"),
601c602,604
<        --strict-comments    turn on strict (SGML) handling of HTML 
comments.\n"),
---
>        --strict-comments           turn on strict (SGML) handling of HTML 
> comments.\n"),
>     N_("\
>        --transform-html=SHELLCMD   preprocess html files through a shell 
> command.\n"),
+++ options.h
236a237,238
> 
>   char *transform_html;               /* preprocess html files through a 
> shell command */
--- utils.c
+++ utils.c
531a531,551
> 
> #undef O_EXCL                 /* hack --transform-html */
> 
> #ifdef WINDOWS
> #define popen _popen
> #endif
> 
> FILE *
> topen (const char *fname, const char *mode)
> {
>   if (opt.transform_html && has_html_suffix_p(fname))
>     {
>       char s[512];
> 
>       sprintf(s, "%s>%s", opt.transform_html, fname);
>       return popen(s, mode);
>     }
>   else
>     return fopen(fname, mode);
> }
> 
555c575
<   return fopen (fname, binary ? "wb" : "w");
---
>   return topen (fname, binary ? "wb" : "w");

_________________________________________________________________
Tecnología, moda, motor, viajes,…suscríbete a nuestros boletines para estar 
siempre a la última
Guapos y guapas, clips musicales y estrenos de cine. 

Reply via email to