I've attached a patch that adds support for using libpsl for cookie
domain checking in Wget.

The old heuristic checks still remain as a fallback. When the libpsl
library on the system is built without the builtin list, Wget simply
fallsback to the old heuristic checks. Similarly, if wget is built
without libpsl support, it continues to use the old cookie domain
checking code.

I've removed the check for numeric addresses since it seems unneeded.
The host and cookie_host variables will be compared for a full check
either ways.

-- 
Thanking You,
Darshit Shah
From 0fcfb1d85545feee30abd964ec3346a01483a3aa Mon Sep 17 00:00:00 2001
From: Darshit Shah <dar...@gmail.com>
Date: Fri, 30 May 2014 22:10:12 +0530
Subject: [PATCH] Support libpsl for cookie domain checking

---
 ChangeLog           |  5 +++++
 NEWS                |  2 ++
 README.checkout     | 39 +++++++++++++++++++++------------------
 configure.ac        | 11 +++++++++++
 src/ChangeLog       |  6 +++++-
 src/build_info.c.in |  1 +
 src/cookies.c       | 24 +++++++++++++++++++-----
 7 files changed, 64 insertions(+), 24 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a48e469..cb3114d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2014-05-30  Darshit Shah  <dar...@gmail.com>
+
+	* configure.ac: Allow compilation without libpsl.
+	* README.checkout: Add libpsl as a dependency.
+
 2014-05-24  Giuseppe Scrivano  <gscri...@redhat.com>
 
 	* gnulib: update module.
diff --git a/NEWS b/NEWS
index d911feb..2922cde 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,8 @@ Please send GNU Wget bug reports to <bug-wget@gnu.org>.
 
 * Changes in Wget X.Y.Z
 
+** Use libpsl for verifying cookie domains
+
 ** Default progress bar output changed
 
 ** Introduce --show-progress to force display the progress bar
diff --git a/README.checkout b/README.checkout
index 7c0f42b..ef0eded 100644
--- a/README.checkout
+++ b/README.checkout
@@ -68,6 +68,8 @@ Compiling From Repository Sources
 
      * [34]git is used to fetch gnulib files trough the bootstrap.sh script.
 
+     * [35]libpsl is (optionally) required for checking cookie domains.
+
    For those who might be confused as to what to do once they check out
    the source code, considering configure and Makefile do not yet exist at
    that point, a shell script called bootstrap.sh has been provided. After
@@ -96,21 +98,21 @@ Compiling From Repository Sources
 
  Originally written by Hrvoje Niksic <hnik...@xemacs.org>.
 
-     * [35]Edit
-     * [36]Comments
-     * [37]Info
-     * [38]Attachments
+     * [36]Edit
+     * [37]Comments
+     * [38]Info
+     * [39]Attachments
      * More Actions:
        [Raw Text................] Do
 
-     * [39]MoinMoin Powered
-     * [40]Python Powered
-     * [41]GPL licensed
-     * [42]Valid HTML 4.01
+     * [40]MoinMoin Powered
+     * [41]Python Powered
+     * [42]GPL licensed
+     * [43]Valid HTML 4.01
      __________________________________________________________________
 
    All content © 2007 Free Software Foundation. For terms of use,
-   redistribution, and modification, please see the [43]WikiLicense page.
+   redistribution, and modification, please see the [44]WikiLicense page.
 
 References
 
@@ -129,12 +131,13 @@ References
   32. http://www.gnu.org/software/libidn/
   33. http://www.gnu.org/software/libiconv/
   34. http://git-scm.com/
-  35. http://wget.addictivecode.org/CompilingRepoSources?action=edit&editor=text
-  36. http://wget.addictivecode.org/CompilingRepoSources
-  37. http://wget.addictivecode.org/CompilingRepoSources?action=info
-  38. http://wget.addictivecode.org/CompilingRepoSources?action=AttachFile
-  39. http://moinmo.in/
-  40. http://moinmo.in/Python
-  41. http://moinmo.in/GPL
-  42. http://validator.w3.org/check?uri=referer
-  43. http://wget.addictivecode.org/WikiLicense
+  35. https://github.com/rockdaboot/libpsl
+  36. http://wget.addictivecode.org/CompilingRepoSources?action=edit&editor=text
+  37. http://wget.addictivecode.org/CompilingRepoSources
+  38. http://wget.addictivecode.org/CompilingRepoSources?action=info
+  39. http://wget.addictivecode.org/CompilingRepoSources?action=AttachFile
+  40. http://moinmo.in/
+  41. http://moinmo.in/Python
+  42. http://moinmo.in/GPL
+  43. http://validator.w3.org/check?uri=referer
+  44. http://wget.addictivecode.org/WikiLicense
diff --git a/configure.ac b/configure.ac
index c5437bf..b9b5253 100644
--- a/configure.ac
+++ b/configure.ac
@@ -61,6 +61,10 @@ dnl
 dnl Process features.
 dnl
 
+AC_ARG_WITH(libpsl,
+    AS_HELP_STRING([--without-libpsl],
+                   [disable support for libpsl cookie checking.]))
+
 AC_ARG_WITH(ssl,
 [[  --without-ssl           disable SSL autodetection
   --with-ssl={gnutls,openssl} specify the SSL backend.  GNU TLS is the default.]])
@@ -237,6 +241,11 @@ dnl
 dnl Checks for libraries.
 dnl
 
+AS_IF([test x"$with_libpsl" != xno], [
+  with_libpsl=yes
+  AC_CHECK_LIB([psl-0.2], [psl_builtin])
+])
+
 AS_IF([test x"$with_zlib" != xno], [
   with_zlib=yes
   AC_CHECK_LIB(z, compress)
@@ -358,6 +367,7 @@ else
   fi
 fi
 
+
 dnl **********************************************************************
 dnl Checks for IPv6
 dnl **********************************************************************
@@ -580,6 +590,7 @@ AC_MSG_NOTICE([Summary of build options:
   Libs:              $LIBS
   SSL:               $with_ssl
   Zlib:              $with_zlib
+  PSL:               $with_libpsl
   Digest:            $ENABLE_DIGEST
   NTLM:              $ENABLE_NTLM
   OPIE:              $ENABLE_OPIE
diff --git a/src/ChangeLog b/src/ChangeLog
index ceac5aa..10dd42b 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,5 +1,10 @@
 2014-05-30  Darshit Shah  <dar...@gmail.com>
 
+	* cookies.c (check_domain_match): Use libpsl to check if the cookie domain
+	is valid. Also remove unneeded test for numeric addresses.
+
+2014-05-30  Darshit Shah  <dar...@gmail.com>
+
 	* connect.{c,h}, convert.{c,h}, cookies.{c,h}, ftp-ls.c, ftp.h, gettext.h,
 	hash.h, host.h, html-parse.h, html-url.h, http.c, init.c, main.c, mswindows.c,
 	netrc.h, openssl.c, options.h, ptimer.h, recur.c, retr.c, sysdep.h, url.h,
@@ -22,7 +27,6 @@
 	needs to modify the string.
 	(bar_set_params): Add support for noscroll parameter to bar.
 
-
 2014-05-03  Tim Ruehsen  <tim.rueh...@gmx.de>
 
 	* ftp-ls.c (ftp_parse_vms_ls): Explicitly typecast strlen's output
diff --git a/src/build_info.c.in b/src/build_info.c.in
index c0b1677..1078af2 100644
--- a/src/build_info.c.in
+++ b/src/build_info.c.in
@@ -7,6 +7,7 @@ large-file      SIZEOF_OFF_T >= 8
 nls             defined ENABLE_NLS
 ntlm            defined ENABLE_NTLM
 opie            defined ENABLE_OPIE
+psl             defined HAVE_LIBPSL_0_2
 
 ssl choice:
     openssl     defined HAVE_LIBSSL || defined HAVE_LIBSSL32
diff --git a/src/cookies.c b/src/cookies.c
index 7f5ba96..3dfc382 100644
--- a/src/cookies.c
+++ b/src/cookies.c
@@ -51,6 +51,7 @@ as that of the covered work.  */
 #include <assert.h>
 #include <errno.h>
 #include <time.h>
+#include <libpsl.h>
 #include "utils.h"
 #include "hash.h"
 #include "cookies.h"
@@ -503,14 +504,27 @@ numeric_address_p (const char *addr)
 static bool
 check_domain_match (const char *cookie_domain, const char *host)
 {
+
+#ifdef HAVE_LIBPSL_0_2
   DEBUGP (("cdm: 1"));
+  const psl_ctx_t *psl;
+  int is_acceptable;
+
+  if (!(psl = psl_builtin()))
+    {
+      DEBUGP (("\nlibpsl not built with a public suffix list. "
+               "Falling back to simple heuristics.\n"));
+      goto no_psl;
+    }
+
+  is_acceptable = psl_is_cookie_domain_acceptable (psl, host, cookie_domain);
+  return true ? (is_acceptable == 1) : false;
 
-  /* Numeric address requires exact match.  It also requires HOST to
-     be an IP address.  */
-  if (numeric_address_p (cookie_domain))
-    return 0 == strcmp (cookie_domain, host);
+no_psl:
+#endif
 
-  DEBUGP ((" 2"));
+  /* For efficiency make some elementary checks first */
+  DEBUGP (("cdm: 2"));
 
   /* For the sake of efficiency, check for exact match first. */
   if (0 == strcasecmp (cookie_domain, host))
-- 
1.9.3

Reply via email to