On 19.04.2012 00:30, Ángel González wrote:
[...]
> Some minor comments:
> In line with the recent const discussion in this list, direct should be
> const char[].
> The wget code generally uses C89 variable definition before statements,
> which would be nice to keep by this patch.
> The two asprintf() are just strdup() in disguise, so I'd use the later
> function.
> Moreover, they're both unneeded.
> 
> As you moved the libproxy code above wget proxy code, it would now be
> possible
> for the pac file to return "do a direct connection", and for wget to try
> nonetheless
> a proxy from http_proxy env variable (probably a default for
> non-libproxy aware
> programs).

Don't you think it would be less confusing to the user to avoid mixing
libproxy handling with wget's own? Libproxy may get it's configuration
from various sources (kde, gnome settings, some pacrunner, not only the
standard environment variables from which various programs including
wget can get them), therefore I think separating the code even more
would be better. Maybe some user may have to explicitely bypass libproxy
on a binary distribution like most of them are (I'm in the lucky
position to use gentoo and compile libs and programs the way I want to
combine their features), for that I think a user option like provided in
the current patch (--no-libproxy, only effective if proxy turned on) may
fit. If some "..tp_proxy" variable is set to pac+http://... and wget is
called without libproxy support, it will simply complain about
unsupported scheme. Maybe that user message should be detailed a bit to
suggest using libproxy if compiled in.

> Not sure if httpproxy from .wgetrc should be honored in that case or not.

See above, my thoughts about mixing. Nevertheless, I think I extended
the libproxy option in the rc file as well

> A further enhacement would be having wget support multiple proxies, as
> is expected
> by libproxy ("If the first proxy fails, the second should be tried,
> etc..."), but just
> supporting the first proxy (as in openSUSE patch) seems a good enough
> change for
> the first addition of libproxy support.

I implemented at least finding a proxy to which wget can establish a
connection, out of the possibly larger list libproxy provides, I noticed
wget reuses that connection then. Is that ok?

Regards,
Lucian
diff -Naur wget-1.13.4_orig/configure.ac wget-1.13.4/configure.ac
--- wget-1.13.4_orig/configure.ac       2011-09-04 13:58:44.000000000 +0200
+++ wget-1.13.4/configure.ac    2012-04-19 17:34:25.633758176 +0200
@@ -338,6 +338,22 @@
   fi
 fi
 
+dnl
+dnl libproxy support
+dnl
+AC_ARG_ENABLE(libproxy,
+  [  --enable-libproxy       libproxy support for system wide proxy 
configuration])
+if test "${enable_libproxy}" != "no"
+then
+  PKG_CHECK_MODULES([libproxy], [libproxy-1.0], [enable_libproxy=yes], 
[enable_libproxy=no])
+fi
+if test "${enable_libproxy}" = "yes"
+then
+  AC_SUBST(libproxy_CFLAGS)
+  AC_SUBST(libproxy_LIBS)
+  AC_DEFINE([HAVE_LIBPROXY], 1, [Define when using libproxy])
+fi
+
 dnl **********************************************************************
 dnl Checks for IPv6
 dnl **********************************************************************
diff -Naur wget-1.13.4_orig/doc/sample.wgetrc wget-1.13.4/doc/sample.wgetrc
--- wget-1.13.4_orig/doc/sample.wgetrc  2011-01-01 13:12:33.000000000 +0100
+++ wget-1.13.4/doc/sample.wgetrc       2012-04-19 17:34:25.633758176 +0200
@@ -82,6 +82,9 @@
 # If you do not want to use proxy at all, set this to off.
 #use_proxy = on
 
+# If you do not want to let libproxy handle proxies, set this to off.
+#use_proxylib = on
+
 # You can customize the retrieval outlook.  Valid options are default,
 # binary, mega and micro.
 #dot_style = default
diff -Naur wget-1.13.4_orig/doc/sample.wgetrc.munged_for_texi_inclusion 
wget-1.13.4/doc/sample.wgetrc.munged_for_texi_inclusion
--- wget-1.13.4_orig/doc/sample.wgetrc.munged_for_texi_inclusion        
2011-09-13 10:17:45.000000000 +0200
+++ wget-1.13.4/doc/sample.wgetrc.munged_for_texi_inclusion     2012-04-19 
17:34:25.634758164 +0200
@@ -82,6 +82,9 @@
 # If you do not want to use proxy at all, set this to off.
 #use_proxy = on
 
+# If you do not want to let libproxy handle proxies, set this to off.
+#use_proxylib = on
+
 # You can customize the retrieval outlook.  Valid options are default,
 # binary, mega and micro.
 #dot_style = default
diff -Naur wget-1.13.4_orig/doc/wget.info wget-1.13.4/doc/wget.info
--- wget-1.13.4_orig/doc/wget.info      2011-09-13 10:17:45.000000000 +0200
+++ wget-1.13.4/doc/wget.info   2012-04-19 17:34:25.636758140 +0200
@@ -2866,6 +2866,10 @@
      environment variables are set.  In that case it is the same as
      using `--no-proxy'.
 
+use_proxylib = on/off
+     When set to off, don't use libproxy to handle proxies.
+     It only has effect if use_proxy above is on.
+
 user = STRING
      Specify username STRING for both FTP and HTTP file retrieval.
      This command can be overridden using the `ftp_user' and
@@ -2985,6 +2989,10 @@
      # If you do not want to use proxy at all, set this to off.
      #use_proxy = on
 
+     # If you use proxies but do not want them to be hadled by libproxy,
+     # set this to off.
+     #use_proxylib = on
+
      # You can customize the retrieval outlook.  Valid options are default,
      # binary, mega and micro.
      #dot_style = default
diff -Naur wget-1.13.4_orig/src/init.c wget-1.13.4/src/init.c
--- wget-1.13.4_orig/src/init.c 2011-08-19 12:06:20.000000000 +0200
+++ wget-1.13.4/src/init.c      2012-04-19 17:34:25.653757927 +0200
@@ -257,6 +257,9 @@
   { "trustservernames", &opt.trustservernames,  cmd_boolean },
   { "unlink",           &opt.unlink,            cmd_boolean },
   { "useproxy",         &opt.use_proxy,         cmd_boolean },
+#ifdef HAVE_LIBPROXY
+  { "useproxylib",      &opt.use_proxylib,      cmd_boolean },
+#endif
   { "user",             &opt.user,              cmd_string },
   { "useragent",        NULL,                   cmd_spec_useragent },
   { "useservertimestamps", &opt.useservertimestamps, cmd_boolean },
@@ -315,6 +318,9 @@
   opt.htmlify = true;
   opt.http_keep_alive = true;
   opt.use_proxy = true;
+#ifdef HAVE_LIBPROXY
+  opt.use_proxylib = true;
+#endif
   tmp = getenv ("no_proxy");
   if (tmp)
     opt.no_proxy = sepstring (tmp);
diff -Naur wget-1.13.4_orig/src/log.c wget-1.13.4/src/log.c
--- wget-1.13.4_orig/src/log.c  2011-07-29 15:43:44.000000000 +0200
+++ wget-1.13.4/src/log.c       2012-04-19 17:34:25.658757866 +0200
@@ -41,6 +41,7 @@
 #include "utils.h"
 #include "log.h"
 
+
 /* 2005-10-25 SMS.
    VMS log files are often VFC record format, not stream, so fputs() can
    produce multiple records, even when there's no newline terminator in
@@ -506,12 +507,12 @@
   while (!done);
 }
 
-#ifdef ENABLE_DEBUG
 /* The same as logprintf(), but does anything only if opt.debug is
    true.  */
 void
 debug_logprintf (const char *fmt, ...)
 {
+#ifdef ENABLE_DEBUG
   if (opt.debug)
     {
       va_list args;
@@ -531,8 +532,8 @@
         }
       while (!done);
     }
-}
 #endif /* ENABLE_DEBUG */
+}
 
 /* Open FILE and set up a logging stream.  If FILE cannot be opened,
    exit with status of 1.  */
diff -Naur wget-1.13.4_orig/src/main.c wget-1.13.4/src/main.c
--- wget-1.13.4_orig/src/main.c 2011-09-06 15:50:11.000000000 +0200
+++ wget-1.13.4/src/main.c      2012-04-19 17:38:24.865767363 +0200
@@ -225,6 +225,9 @@
     { "iri", 0, OPT_BOOLEAN, "iri", -1 },
     { "keep-session-cookies", 0, OPT_BOOLEAN, "keepsessioncookies", -1 },
     { "level", 'l', OPT_VALUE, "reclevel", -1 },
+#ifdef HAVE_LIBPROXY
+    { "libproxy", 0, OPT_BOOLEAN, "useproxylib", -1 },
+#endif
     { "limit-rate", 0, OPT_VALUE, "limitrate", -1 },
     { "load-cookies", 0, OPT_VALUE, "loadcookies", -1 },
     { "local-encoding", 0, OPT_VALUE, "localencoding", -1 },
@@ -495,6 +498,10 @@
        --random-wait             wait from 0.5*WAIT...1.5*WAIT secs between 
retrievals.\n"),
     N_("\
        --no-proxy                explicitly turn off proxy.\n"),
+#ifdef HAVE_LIBPROXY
+    N_("\
+       --no-libproxy             explicitly disable proxy handling by libproxy 
(only effective if proxy turned on).\n"),
+#endif
     N_("\
   -Q,  --quota=NUMBER            set retrieval quota to NUMBER.\n"),
     N_("\
diff -Naur wget-1.13.4_orig/src/Makefile.am wget-1.13.4/src/Makefile.am
--- wget-1.13.4_orig/src/Makefile.am    2011-08-18 13:44:47.000000000 +0200
+++ wget-1.13.4/src/Makefile.am 2012-04-19 17:34:25.663757803 +0200
@@ -37,7 +37,7 @@
 
 # The following line is losing on some versions of make!
 DEFS     = @DEFS@ -DSYSTEM_WGETRC=\"$(sysconfdir)/wgetrc\" 
-DLOCALEDIR=\"$(localedir)\"
-LIBS     = @LIBICONV@ @LIBINTL@ @LIBS@ $(LIB_CLOCK_GETTIME)
+LIBS     = @LIBICONV@ @LIBINTL@ @LIBS@ $(LIB_CLOCK_GETTIME) @libproxy_LIBS@
 
 EXTRA_DIST = css.l css.c css_.c build_info.c.in
 
diff -Naur wget-1.13.4_orig/src/options.h wget-1.13.4/src/options.h
--- wget-1.13.4_orig/src/options.h      2011-08-06 12:24:32.000000000 +0200
+++ wget-1.13.4/src/options.h   2012-04-19 17:34:25.663757803 +0200
@@ -105,6 +105,9 @@
   bool http_keep_alive;                /* whether we use keep-alive */
 
   bool use_proxy;              /* Do we use proxy? */
+#ifdef HAVE_LIBPROXY
+  bool use_proxylib;           /* Do we let libproxy do proxy handling? (only 
relevant if use_proxy == true) */
+#endif
   bool allow_cache;            /* Do we allow server-side caching? */
   char *http_proxy, *ftp_proxy, *https_proxy;
   char **no_proxy;
diff -Naur wget-1.13.4_orig/src/retr.c wget-1.13.4/src/retr.c
--- wget-1.13.4_orig/src/retr.c 2011-08-30 15:47:33.000000000 +0200
+++ wget-1.13.4/src/retr.c      2012-04-19 17:35:04.756269077 +0200
@@ -54,6 +54,10 @@
 #include "html-url.h"
 #include "iri.h"
 
+#ifdef HAVE_LIBPROXY
+#include "proxy.h"
+#endif
+
 /* Total size of downloaded files.  Used to enforce quota.  */
 SUM_SIZE_INT total_downloaded_bytes;
 
@@ -1165,7 +1169,68 @@
   if (no_proxy_match (u->host, (const char **)opt.no_proxy))
     return NULL;
 
-  switch (u->scheme)
+  bool use_libproxy = false;
+
+#ifdef HAVE_LIBPROXY
+  use_libproxy = opt.use_proxylib;
+  if(use_libproxy)
+  {
+    pxProxyFactory *pf = px_proxy_factory_new();
+    if (!pf)
+    {
+      debug_logprintf (_("retr.c, getproxy: Allocating memory for libproxy 
failed"));
+      return NULL;
+    }
+    int i; 
+    const char direct[] = "direct://";
+    struct url *proxy_url;
+    int up_error_code;
+    char *up_error;
+
+    debug_logprintf (_("retr.c, getproxy: asking libproxy which proxy to use 
for url '%s'\n"), u->url);
+    char **proxies = px_proxy_factory_get_proxies(pf, u->url);
+    for(i=0; proxies[i]; i++)
+    {
+      if(strcmp(proxies[i] ,direct) != 0)
+      {
+        // verify we can connect to that proxy, otherwise move along to the 
next one
+        proxy_url = url_parse (proxies[i], &up_error_code, NULL, true);
+        if (proxy_url)
+        {
+          if (connect_to_host(proxy_url->host, proxy_url->port) > 0)
+          {
+            proxy = strdup(proxies[i]);
+          }
+          else
+          {
+            logprintf (LOG_NOTQUIET, _("retr.c, getproxy: Failed to connect to 
proxy URL %s\n"), proxies[i]);
+          }
+        }
+        else
+        {
+          up_error = url_error (proxies[i], up_error_code);
+          logprintf (LOG_NOTQUIET, _("retr.c, getproxy: Error parsing proxy 
URL %s: %s.\n"),
+                     proxies[i], up_error);
+          xfree (up_error);
+        }
+        url_free (proxy_url);
+        if (proxy && *proxy)
+        {
+          // found a valid proxy, interrupt the loop
+          break;
+        }
+      }
+    }
+    debug_logprintf (_("retr.c, getproxy: libproxy suggests to use '%s'\n"), 
proxy ? proxy : direct);
+    for(i=0; proxies[i]; i++) free(proxies[i]);
+    free(proxies);
+    px_proxy_factory_free(pf);
+  }
+#endif
+
+  if (!use_libproxy && (!proxy || !*proxy))
+  {
+    switch (u->scheme)
     {
     case SCHEME_HTTP:
       proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
@@ -1181,6 +1246,7 @@
     case SCHEME_INVALID:
       break;
     }
+  }
   if (!proxy || !*proxy)
     return NULL;
 

Reply via email to