According to Frank Guangxin Liu:
> 
> What I did is
> 1) setup an internal proxy/cache server using "squid".
> 2) configure "squid" so that it connects directly to Intranet hosts
>    and uses firewall proxy for Internet hosts.
> 3) tell htdig to use "squid" for all hosts.

I hadn't thought of that!  I couldn't resist the challenge, though, and as
it's a quiet day I dove in and came up with my own patch, included below.
It's to be applied to the 3.1.0b2 sources, and includes Geoff's fix
from Nov. 8 for not deleting the proxy name prematurely.  With a bit
of fiddling, it could also be applied to the current CVS source tree,
I imagine, but I haven't grabbed a copy of it yet.  Probably deleting
the first couple sections from the Document.cc diffs below would do it,
unless this file has seen a lot of recent changes.

Denis, give this a try, with a line in your htdig.conf that specifies
what to exclude, e.g.:

http_proxy_exclude:     http://my.intranet.com/ \
        http://our.other.intranet.com/

It uses a case-insensitive substring match, so you could even just specify
a domain, if that domain isn't going to show up anywhere else in URLs that
should go through the proxy.  E.g. ".intranet.com/" would exclude anything
that starts "http://SOMETHING.intranet.com/...", but also something like
"http://outside.our.net/blah/stuff/from/other.intranet.com/hello.html".
In other words, it works like the exclude_urls parameter.

Here's the patch.  I tested the logic of UseProxy(), but I didn't have
a proxy server to test everything thoroughly, so let me know if it works.

--- htdig-3.1.0b2/htcommon/defaults.cc.proxy    Mon Nov  2 18:21:51 1998
+++ htdig-3.1.0b2/htcommon/defaults.cc  Thu Dec  3 13:42:13 1998
@@ -119,6 +119,7 @@
     {"heading_factor_6",               "0"},
     {"htnotify_sender",                        "webmaster@www"},
     {"http_proxy",                     ""},
+    {"http_proxy_exclude",             ""},
     {"image_list",                     "${database_base}.images"},
     {"iso_8601",                        "false"},
     {"keywords_factor",                        "100"},
--- htdig-3.1.0b2/htdig/Document.h.proxy        Mon Nov  2 18:21:51 1998
+++ htdig-3.1.0b2/htdig/Document.h      Thu Dec  3 15:45:05 1998
@@ -132,6 +132,7 @@
 
     int                                readHeader(Connection &);
     time_t                     getdate(char *datestring);
+    int                                UseProxy();
 };
 
 #endif
--- htdig-3.1.0b2/htdig/Document.cc.proxy       Mon Nov  2 18:21:51 1998
+++ htdig-3.1.0b2/htdig/Document.cc     Thu Dec  3 15:45:12 1998
@@ -167,9 +167,6 @@
     if (!url)
       delete url;
     url = 0;
-    if (!proxy)
-      delete proxy;
-    proxy = 0;
     referer = 0;
     if(config.Boolean("modification_time_is_now"))
        modtime = time(NULL);
@@ -182,6 +179,10 @@
 
     // Don't reset the authorization since it's a pain to set up again.
     //    authorization = 0;
+    // Don't reset the proxy since it's a pain to set up too.
+    //    if (!proxy)
+    //      delete proxy;
+    //    proxy = 0;
 }
 
 
@@ -305,6 +306,43 @@
 
 
 //*****************************************************************************
+// int Document::UseProxy()
+//   Returns 1 if the given url is to be retrieved from the proxy server,
+//   or 0 if it's not.
+//
+int
+Document::UseProxy()
+{
+    static StringMatch *excludeproxy = 0;
+
+    //
+    // Initialize excludeproxy list if this is the first time.
+    //
+    if (!excludeproxy)
+    {
+       excludeproxy = new StringMatch();
+
+       String t = config["http_proxy_exclude"];
+       String pattern;
+       char *p = strtok(t, " \t");
+       while (p)       
+       {
+           if (pattern.length())
+               pattern << '|';
+           pattern << p;
+           p = strtok(0, " \t");
+       }
+       excludeproxy->IgnoreCase();
+       excludeproxy->Pattern(pattern);
+    }
+
+    if (proxy && excludeproxy->FindFirst(url->get()) < 0)
+       return 1;
+    return 0;
+}
+
+
+//*****************************************************************************
 // DocStatus Document::RetrieveHTTP(time_t date)
 //   Attempt to retrieve the document pointed to by our internal URL
 //
@@ -315,7 +353,8 @@
     if (c.open() == NOTOK)
        return Document_not_found;
 
-    if (proxy)
+    int                useproxy = UseProxy();
+    if (useproxy)
     {
        if (c.assign_port(proxy->port()) == NOTOK)
            return Document_not_found;
@@ -335,6 +374,10 @@
        if (debug > 1)
        {
            cout << "Unable to build connection with " << url->host() << ':' << 
url->port() << endl;
+           if (useproxy)
+           {
+               cout << "(Via proxy " << proxy->host() << ':' << proxy->port() << ')' 
+<< endl;
+           }
        }
        return Document_no_server;
     }
@@ -344,7 +387,7 @@
     //
     String        command = "GET ";
 
-    if (proxy)
+    if (useproxy)
     {
        command << url->get() << " HTTP/1.0\r\n";
     }

-- 
Gilles R. Detillieux              E-mail: <[EMAIL PROTECTED]>
Spinal Cord Research Centre       WWW:    http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba  Phone:  (204)789-3766
Winnipeg, MB  R3E 3J7  (Canada)   Fax:    (204)789-3930
----------------------------------------------------------------------
To unsubscribe from the htdig mailing list, send a message to
[EMAIL PROTECTED] containing the single word "unsubscribe" in
the body of the message.

Reply via email to