Hello,

I created a patch to store the URL inside the user xattrs of the downloaded
file; this way, its origin can be identified afterwards.

I uploaded the change to my Github account and attached the diff, and I am
still working on portability issues, but I'd like to hear some opinions on
this:

http://github.com/wertarbyte/wget/tree/xattrurl

Sincerly,
Stefan Tomanek
diff --git a/src/Makefile.am b/src/Makefile.am
index ac9c9c9..31d0032 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -50,7 +50,7 @@ wget_SOURCES = cmpt.c connect.c convert.c cookies.c ftp.c    		  \
 	       http.h http-ntlm.h init.h log.h mswindows.h netrc.h        \
 	       options.h progress.h ptimer.h recur.h res.h retr.h         \
 	       spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h 	  \
-	       exits.h gettext.h
+	       exits.h gettext.h xattr.c xattr.h
 nodist_wget_SOURCES = version.c
 EXTRA_wget_SOURCES = mswindows.c iri.c
 LDADD = $(LIBOBJS) ../lib/libgnu.a @MD5_LDADD@
diff --git a/src/Makefile.in b/src/Makefile.in
index 46e9b28..d3b451f 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -120,7 +120,7 @@ am__libunittest_a_SOURCES_DIST = cmpt.c connect.c convert.c cookies.c \
 	hash.h host.h html-parse.h html-url.h http.h http-ntlm.h \
 	init.h log.h mswindows.h netrc.h options.h progress.h ptimer.h \
 	recur.h res.h retr.h spider.h ssl.h sysdep.h url.h utils.h \
-	wget.h iri.h exits.h gettext.h test.c test.h
+	wget.h iri.h exits.h gettext.h test.c test.h xattr.c xattr.h
 @iri_is_enabled_t...@am__objects_1 = libunittest_a-iri.$(OBJEXT)
 am__objects_2 = libunittest_a-cmpt.$(OBJEXT) \
 	libunittest_a-connect.$(OBJEXT) \
@@ -159,7 +159,7 @@ am__wget_SOURCES_DIST = cmpt.c connect.c convert.c cookies.c ftp.c \
 	hash.h host.h html-parse.h html-url.h http.h http-ntlm.h \
 	init.h log.h mswindows.h netrc.h options.h progress.h ptimer.h \
 	recur.h res.h retr.h spider.h ssl.h sysdep.h url.h utils.h \
-	wget.h iri.h exits.h gettext.h
+	wget.h iri.h exits.h gettext.h xattr.h xattr.c
 @iri_is_enabled_t...@am__objects_3 = iri.$(OBJEXT)
 am_wget_OBJECTS = cmpt.$(OBJEXT) connect.$(OBJEXT) convert.$(OBJEXT) \
 	cookies.$(OBJEXT) ftp.$(OBJEXT) css.$(OBJEXT) \
@@ -170,6 +170,7 @@ am_wget_OBJECTS = cmpt.$(OBJEXT) connect.$(OBJEXT) convert.$(OBJEXT) \
 	ptimer.$(OBJEXT) recur.$(OBJEXT) res.$(OBJEXT) retr.$(OBJEXT) \
 	snprintf.$(OBJEXT) spider.$(OBJEXT) url.$(OBJEXT) \
 	utils.$(OBJEXT) exits.$(OBJEXT) build_info.$(OBJEXT) \
+	xattr.$(OBJEXT) \
 	$(am__objects_3)
 nodist_wget_OBJECTS = version.$(OBJEXT)
 wget_OBJECTS = $(am_wget_OBJECTS) $(nodist_wget_OBJECTS)
@@ -678,7 +679,7 @@ wget_SOURCES = cmpt.c connect.c convert.c cookies.c ftp.c    		  \
 	       http.h http-ntlm.h init.h log.h mswindows.h netrc.h        \
 	       options.h progress.h ptimer.h recur.h res.h retr.h         \
 	       spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h 	  \
-	       exits.h gettext.h
+	       exits.h gettext.h xattr.h xattr.c
 
 nodist_wget_SOURCES = version.c
 EXTRA_wget_SOURCES = mswindows.c iri.c
diff --git a/src/ftp.c b/src/ftp.c
index dfdd83c..4a4e84a 100644
--- a/src/ftp.c
+++ b/src/ftp.c
@@ -51,6 +51,8 @@ as that of the covered work.  */
 #include "convert.h"            /* for downloaded_file */
 #include "recur.h"              /* for INFINITE_RECURSION */
 
+#include "xattr.h"
+
 #ifdef __VMS
 # include "vms.h"
 #endif /* def __VMS */
@@ -1206,6 +1208,11 @@ Error in server response, closing control connection.\n"));
   else
     fp = output_stream;
 
+  if (opt.xattr_url && file_exists_p(con->target))
+    {
+      set_xattr( u, con->target );
+    }
+
   if (passed_expected_bytes)
     {
       print_length (passed_expected_bytes, restval, true);
diff --git a/src/http.c b/src/http.c
index 3a46764..42f7900 100644
--- a/src/http.c
+++ b/src/http.c
@@ -62,6 +62,8 @@ as that of the covered work.  */
 #include "convert.h"
 #include "spider.h"
 
+#include "xattr.h"
+
 #ifdef TESTING
 #include "test.h"
 #endif
@@ -2359,6 +2361,11 @@ File %s already there; not retrieving.\n\n"), quote (hs->local_file));
   else
     fp = output_stream;
 
+  if (opt.xattr_url && file_exists_p(hs->local_file))
+    {
+      set_xattr( u, hs->local_file );
+    }
+
   /* Print fetch message, if opt.verbose.  */
   if (opt.verbose)
     {
diff --git a/src/init.c b/src/init.c
index 5a05d03..6565655 100644
--- a/src/init.c
+++ b/src/init.c
@@ -252,6 +252,7 @@ static const struct {
 #ifdef USE_WATT32
   { "wdebug",           &opt.wdebug,            cmd_boolean },
 #endif
+  { "xattrurl",         &opt.xattr_url,         cmd_boolean },
 };
 
 /* Look up CMDNAME in the commands[] and return its position in the
diff --git a/src/main.c b/src/main.c
index dddc4b2..06e3504 100644
--- a/src/main.c
+++ b/src/main.c
@@ -55,6 +55,7 @@ as that of the covered work.  */
 #include "convert.h"
 #include "spider.h"
 #include "http.h"               /* for save_cookies */
+#include "xattr.h"
 
 #include <getopt.h>
 #include <getpass.h>
@@ -258,6 +259,7 @@ static struct cmdline_option option_data[] =
     { "retry-connrefused", 0, OPT_BOOLEAN, "retryconnrefused", -1 },
     { "save-cookies", 0, OPT_VALUE, "savecookies", -1 },
     { "save-headers", 0, OPT_BOOLEAN, "saveheaders", -1 },
+    { "xattr-url", 0, OPT_BOOLEAN, "xattrurl", -1 },
     { IF_SSL ("secure-protocol"), 0, OPT_VALUE, "secureprotocol", -1 },
     { "server-response", 'S', OPT_BOOLEAN, "serverresponse", -1 },
     { "span-hosts", 'H', OPT_BOOLEAN, "spanhosts", -1 },
@@ -555,6 +557,8 @@ HTTP options:\n"),
     N_("\
        --save-headers          save the HTTP headers to file.\n"),
     N_("\
+       --xattr-url             save the URL to extended file attributes.\n"),
+    N_("\
   -U,  --user-agent=AGENT      identify as AGENT instead of Wget/VERSION.\n"),
     N_("\
        --no-http-keep-alive    disable HTTP keep-alive (persistent connections).\n"),
diff --git a/src/options.h b/src/options.h
index a895863..5e4b6f4 100644
--- a/src/options.h
+++ b/src/options.h
@@ -127,6 +127,7 @@ struct options
   bool server_response;		/* Do we print server response? */
   bool save_headers;		/* Do we save headers together with
 				   file? */
+  bool xattr_url;
 
 #ifdef ENABLE_DEBUG
   bool debug;			/* Debugging on/off */
diff --git a/src/xattr.c b/src/xattr.c
new file mode 100644
index 0000000..5eba265
--- /dev/null
+++ b/src/xattr.c
@@ -0,0 +1,13 @@
+#include "wget.h"
+#include "url.h"
+#include <attr/xattr.h>
+
+int set_xattr( struct url *origin, const char *filename ) {
+  char *url = url_string(origin, URL_AUTH_HIDE);
+  logprintf (LOG_VERBOSE, _("Placing URL '%s' in extended attributes.\n"), url);
+  int err = setxattr( filename, "user.wget.origin", url, strlen(url), 0 );
+  if (err) {
+    logprintf (LOG_NOTQUIET, "setxattr: %s\n", strerror (errno));
+  }
+  return err;
+}
diff --git a/src/xattr.h b/src/xattr.h
new file mode 100644
index 0000000..d0f5e06
--- /dev/null
+++ b/src/xattr.h
@@ -0,0 +1 @@
+int set_xattr( struct url *origin, const char *filename );
-- 
1.7.1

Reply via email to