On Sun, 03 Jul 2016 at 07:54:05 +0200, Chris Lamb wrote:
> libhtml-tidy-perl fails to build from source in unstable/amd64

I don't think it was necessarily a good idea to forward this upstream:
the build failure is because Debian forked this Perl module to stop using
its author's fork (tidyp) of the underlying C library (tidy), and now we've
also replaced the C library with a different fork (tidy-html5), which is
what's making the tests fail.

The author of HTML::Tidy seems quite likely to respond "if you had
packaged my fork of tidy like the documentation told you to, you wouldn't
have this problem".

However, tidy-html5 seems likely to be better than either tidy or tidyp,
so hopefully the HTML::Tidy author will be somewhat receptive to the idea
of supporting tidy-html5.

Possible patches for the Debian packaging attached, also available from
<git+ssh://git.debian.org/git/users/smcv/libhtml-tidy-perl.git -b master>.

    S
>From 2bca8ddf43494c9f6d2b5c516088e0c3cf5682ac Mon Sep 17 00:00:00 2001
From: Simon McVittie <s...@debian.org>
Date: Fri, 22 Jul 2016 09:45:53 +0100
Subject: [PATCH 1/4] d/p/fix-error-message-in-webtidy: move to end of patch
 series

This is a firmly Debian-specific change that is not suitable for upstream.
---
 debian/changelog      | 5 +++++
 debian/patches/series | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/debian/changelog b/debian/changelog
index 1eeac67..d0db8e1 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -10,6 +10,11 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium
   [ gregor herrmann ]
   * debian/copyright: change Copyright-Format 1.0 URL to HTTPS.
 
+  [ Simon McVittie ]
+  * d/p/fix-error-message-in-webtidy: move to end of patch series.
+    This is a firmly Debian-specific change that is not suitable for
+    upstream.
+
  -- gregor herrmann <gre...@debian.org>  Thu, 27 Feb 2014 22:36:29 +0100
 
 libhtml-tidy-perl (1.56-1) unstable; urgency=low
diff --git a/debian/patches/series b/debian/patches/series
index 57718d6..4adbeaa 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,3 +1,3 @@
-fix-error-message-in-webtidy
 remove-tidy_version.patch
 tidy-not-tidyp.patch
+fix-error-message-in-webtidy
-- 
2.8.1

>From 447aa2e692a4276dc51c0e1d958c19f231c54279 Mon Sep 17 00:00:00 2001
From: Simon McVittie <s...@debian.org>
Date: Fri, 22 Jul 2016 09:46:18 +0100
Subject: [PATCH 2/4] d/patches: put all patches in the git style allowed by
 DEP-3, so they can be manipulated with gbp-pq

---
 debian/changelog                            |  2 +
 debian/patches/fix-error-message-in-webtidy | 13 +++--
 debian/patches/remove-tidy_version.patch    | 79 ++++++++++++++++++-----------
 debian/patches/tidy-not-tidyp.patch         | 19 +++++--
 4 files changed, 76 insertions(+), 37 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index d0db8e1..2c0a999 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -14,6 +14,8 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium
   * d/p/fix-error-message-in-webtidy: move to end of patch series.
     This is a firmly Debian-specific change that is not suitable for
     upstream.
+  * d/patches: put all patches in the git style allowed by DEP-3,
+    so they can be manipulated with gbp-pq
 
  -- gregor herrmann <gre...@debian.org>  Thu, 27 Feb 2014 22:36:29 +0100
 
diff --git a/debian/patches/fix-error-message-in-webtidy b/debian/patches/fix-error-message-in-webtidy
index afdfb7c..0efb8d3 100644
--- a/debian/patches/fix-error-message-in-webtidy
+++ b/debian/patches/fix-error-message-in-webtidy
@@ -1,10 +1,17 @@
-Description: make webtidy error message more debianish
-Author: Ryan Niebur <r...@debian.org>
+From: Ryan Niebur <r...@debian.org>
+Date: Tue, 2 Jun 2009 21:15:36 -0700
+Subject: make webtidy error message more debianish
+
 Forwarded: not-needed
+---
+ bin/webtidy | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
 
+diff --git a/bin/webtidy b/bin/webtidy
+index be57814..4a92423 100755
 --- a/bin/webtidy
 +++ b/bin/webtidy
-@@ -29,7 +29,7 @@
+@@ -29,7 +29,7 @@ for my $url ( @ARGV ) {
      my @lines;
      if ( $url =~ /^https?:/ ) {
          if ( !eval { require LWP::Simple; 1; } ) {
diff --git a/debian/patches/remove-tidy_version.patch b/debian/patches/remove-tidy_version.patch
index 6bbeffa..7b553f2 100644
--- a/debian/patches/remove-tidy_version.patch
+++ b/debian/patches/remove-tidy_version.patch
@@ -1,15 +1,26 @@
-Description: remove tidyVersion as it is a special call
- to Andy Lester's modified version of libtidy. Also remove
- the corresponding call from Perl, the documentation, and
- the tests.
-Author: gregor herrmann <gre...@debian.org>
+From: gregor herrmann <gre...@debian.org>
+Date: Sat, 20 Feb 2010 09:50:30 -0500
+Subject: remove tidyVersion
+
+It is a special call to Andy Lester's modified version of libtidy. Also
+remove the corresponding call from Perl, the documentation, and the tests.
+
 Reviewed-by: Jonathan Yu <jaw...@cpan.org>
 Origin: vendor
 Forwarded: not-needed
+---
+ Tidy.xs          | 11 -----------
+ bin/webtidy      |  2 +-
+ lib/HTML/Tidy.pm | 17 -----------------
+ t/00-load.t      |  2 +-
+ t/version.t      |  5 +++++
+ 5 files changed, 7 insertions(+), 30 deletions(-)
 
+diff --git a/Tidy.xs b/Tidy.xs
+index 2238b0b..630b550 100644
 --- a/Tidy.xs
 +++ b/Tidy.xs
-@@ -189,14 +189,3 @@
+@@ -189,14 +189,3 @@ _tidy_clean(input, configfile, tidy_options)
          if ( rc < 0 ) {
              XSRETURN_UNDEF;
          }
@@ -24,24 +35,24 @@ Forwarded: not-needed
 -        RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */
 -    OUTPUT:
 -        RETVAL
---- a/t/version.t
-+++ b/t/version.t
-@@ -7,7 +7,12 @@
- 
- use HTML::Tidy;
+diff --git a/bin/webtidy b/bin/webtidy
+index dc1a979..be57814 100755
+--- a/bin/webtidy
++++ b/bin/webtidy
+@@ -19,7 +19,7 @@ GetOptions(
+ ) or $help = 1;
  
-+SKIP: {
-+    skip 'libtidy_version has been removed in Debian', 4;
-+
- for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) {
-     like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' );
-     cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' );
+ if ( !@ARGV || $help ) {
+-    print "webtidy v$HTML::Tidy::VERSION using tidyp v" . HTML::Tidy::tidyp_version() . "\n";
++    print "webtidy v$HTML::Tidy::VERSION \n";
+     print <DATA>;
+     exit 1;
  }
-+
-+}
+diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
+index 32a5005..a172ea2 100644
 --- a/lib/HTML/Tidy.pm
 +++ b/lib/HTML/Tidy.pm
-@@ -341,23 +341,6 @@
+@@ -341,23 +341,6 @@ sub _is_keeper {
      return 1;
  }
  
@@ -65,23 +76,31 @@ Forwarded: not-needed
  require XSLoader;
  XSLoader::load('HTML::Tidy', $VERSION);
  
+diff --git a/t/00-load.t b/t/00-load.t
+index b40452d..d10902e 100644
 --- a/t/00-load.t
 +++ b/t/00-load.t
-@@ -8,5 +8,5 @@
+@@ -8,5 +8,5 @@ use Test::More tests => 1;
  use HTML::Tidy;
  use HTML::Tidy::Message;
  
 -diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, Perl $]; tidyp " . HTML::Tidy->tidyp_version() );
 +diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, Perl $];" );
  pass( 'Modules loaded' );
---- a/bin/webtidy
-+++ b/bin/webtidy
-@@ -19,7 +19,7 @@
- ) or $help = 1;
+diff --git a/t/version.t b/t/version.t
+index f225e3e..b05a7da 100644
+--- a/t/version.t
++++ b/t/version.t
+@@ -7,7 +7,12 @@ use Test::More tests => 4;
  
- if ( !@ARGV || $help ) {
--    print "webtidy v$HTML::Tidy::VERSION using tidyp v" . HTML::Tidy::tidyp_version() . "\n";
-+    print "webtidy v$HTML::Tidy::VERSION \n";
-     print <DATA>;
-     exit 1;
+ use HTML::Tidy;
+ 
++SKIP: {
++    skip 'libtidy_version has been removed in Debian', 4;
++
+ for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) {
+     like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' );
+     cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' );
  }
++
++}
diff --git a/debian/patches/tidy-not-tidyp.patch b/debian/patches/tidy-not-tidyp.patch
index 1b781cd..9875169 100644
--- a/debian/patches/tidy-not-tidyp.patch
+++ b/debian/patches/tidy-not-tidyp.patch
@@ -1,9 +1,18 @@
-Description: look for and use tidy.h in /usr/include/tidy (from libtidy-dev)
-Author: Florian Schlichting <f...@debian.org>
+From: Florian Schlichting <f...@debian.org>
+Date: Fri, 4 Oct 2013 23:39:39 +0200
+Subject: look for and use tidy.h in /usr/include/tidy (from libtidy-dev)
+
 Forwarded: not-needed
+---
+ Makefile.PL | 6 +++---
+ Tidy.xs     | 2 +-
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/Makefile.PL b/Makefile.PL
+index 1bca2ef..6ad1b7f 100644
 --- a/Makefile.PL
 +++ b/Makefile.PL
-@@ -10,7 +10,7 @@
+@@ -10,7 +10,7 @@ use ExtUtils::Liblist;
  use Config;
  
  my $libs = '-ltidyp';
@@ -12,7 +21,7 @@ Forwarded: not-needed
  
  eval { require Alien::Tidyp; };
  
-@@ -20,8 +20,8 @@
+@@ -20,8 +20,8 @@ if ( !$@ ) {
      $inc = Alien::Tidyp->config('INC');
  }
  else {
@@ -23,6 +32,8 @@ Forwarded: not-needed
      $libs = $vars[2];
  
      if ( !$libs ) {
+diff --git a/Tidy.xs b/Tidy.xs
+index 630b550..a8bbe82 100644
 --- a/Tidy.xs
 +++ b/Tidy.xs
 @@ -2,7 +2,7 @@
-- 
2.8.1

>From f2652c498511e9de6ce6b06eb0445cfa532ce20e Mon Sep 17 00:00:00 2001
From: Simon McVittie <s...@debian.org>
Date: Fri, 22 Jul 2016 09:49:52 +0100
Subject: [PATCH 3/4] Add patches to make the tests pass with tidy-html5
 providing libtidy

Closes: #829409
---
 debian/changelog                                   |   8 ++
 debian/control                                     |   2 +-
 ...tidy-html5-s-differently-formatted-summar.patch |  24 ++++
 debian/patches/series                              |   6 +
 .../patches/t-allow-tidy-html5-as-generator.patch  |  47 ++++++++
 ...ert-that-tidy-will-add-a-HTML-3.2-doctype.patch |  52 +++++++++
 ...-don-t-assert-that-the-DOCTYPE-is-preserv.patch |  36 ++++++
 ...-HTML5-DOCTYPE-to-get-warnings-about-unes.patch | 128 +++++++++++++++++++++
 ...s.t-adjust-expected-result-for-tidy-html5.patch |  58 ++++++++++
 9 files changed, 360 insertions(+), 1 deletion(-)
 create mode 100644 debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
 create mode 100644 debian/patches/t-allow-tidy-html5-as-generator.patch
 create mode 100644 debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch
 create mode 100644 debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch
 create mode 100644 debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch
 create mode 100644 debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch

diff --git a/debian/changelog b/debian/changelog
index 2c0a999..2101180 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -16,6 +16,14 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium
     upstream.
   * d/patches: put all patches in the git style allowed by DEP-3,
     so they can be manipulated with gbp-pq
+  * d/p/lib-ignore-tidy-html5-s-differently-formatted-summar.patch,
+    d/p/t-allow-tidy-html5-as-generator.patch,
+    d/p/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch,
+    d/p/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch,
+    d/p/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch,
+    d/p/t-venus.t-adjust-expected-result-for-tidy-html5.patch:
+    add patches to make the tests pass with tidy-html5 providing libtidy
+    (Closes: #829409)
 
  -- gregor herrmann <gre...@debian.org>  Thu, 27 Feb 2014 22:36:29 +0100
 
diff --git a/debian/control b/debian/control
index 0ca02f8..072fde5 100644
--- a/debian/control
+++ b/debian/control
@@ -7,7 +7,7 @@ Section: perl
 Priority: optional
 Build-Depends: debhelper (>= 9.20120312),
                help2man,
-               libtidy-dev,
+               libtidy-dev (>= 1:5.2.0),
                libtest-pod-perl,
                libtest-pod-coverage-perl,
                perl (>= 5.13.11) | libtest-simple-perl (>= 0.98),
diff --git a/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
new file mode 100644
index 0000000..04801b2
--- /dev/null
+++ b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
@@ -0,0 +1,24 @@
+From: Simon McVittie <s...@debian.org>
+Date: Fri, 22 Jul 2016 09:19:08 +0100
+Subject: lib: ignore tidy-html5's differently-formatted summary line
+
+Signed-off-by: Simon McVittie <s...@debian.org>
+---
+ lib/HTML/Tidy.pm | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
+index a172ea2..aa16c79 100644
+--- a/lib/HTML/Tidy.pm
++++ b/lib/HTML/Tidy.pm
+@@ -265,6 +265,10 @@ sub _parse_errors {
+             # Summary line we don't want
+ 
+         }
++        elsif ( $line =~ /^Tidy found \d+ warnings? and \d+ errors?!/ ) {
++            # Summary line we don't want
++
++        }
+         elsif ( $line eq 'No warnings or errors were found.' ) {
+             # Summary line we don't want
+ 
diff --git a/debian/patches/series b/debian/patches/series
index 4adbeaa..0fa4da0 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,3 +1,9 @@
 remove-tidy_version.patch
 tidy-not-tidyp.patch
+lib-ignore-tidy-html5-s-differently-formatted-summar.patch
+t-allow-tidy-html5-as-generator.patch
+t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch
+t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch
+t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch
+t-venus.t-adjust-expected-result-for-tidy-html5.patch
 fix-error-message-in-webtidy
diff --git a/debian/patches/t-allow-tidy-html5-as-generator.patch b/debian/patches/t-allow-tidy-html5-as-generator.patch
new file mode 100644
index 0000000..c3d6f30
--- /dev/null
+++ b/debian/patches/t-allow-tidy-html5-as-generator.patch
@@ -0,0 +1,47 @@
+From: Simon McVittie <s...@debian.org>
+Date: Fri, 22 Jul 2016 09:21:39 +0100
+Subject: t: allow tidy-html5 as generator
+
+Signed-off-by: Simon McVittie <s...@debian.org>
+---
+ t/roundtrip.t | 2 +-
+ t/unicode.t   | 6 ++----
+ 2 files changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/t/roundtrip.t b/t/roundtrip.t
+index e37cb88..2590795 100644
+--- a/t/roundtrip.t
++++ b/t/roundtrip.t
+@@ -25,7 +25,7 @@ my @messages = $tidy->messages( $clean );
+ 
+ is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} );
+ 
+-$clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
++$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]+)"/"Tidy"/;
+ 
+ my $expected = do { local $/ = undef; <DATA> };
+ is( $clean, $expected, 'Cleaned up properly' );
+diff --git a/t/unicode.t b/t/unicode.t
+index 2f45384..9ca6370 100644
+--- a/t/unicode.t
++++ b/t/unicode.t
+@@ -30,8 +30,7 @@ ok(utf8::is_utf8($reference), 'reference is utf8');
+ my $clean = $tidy->clean( $html );
+ ok(utf8::is_utf8($clean), 'cleaned output is also unicode');
+ 
+-$clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/;
+-$clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
++$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/;
+ is($clean, $reference, q{Cleanup didn't break anything});
+ 
+ my @messages = $tidy->messages;
+@@ -49,8 +48,7 @@ subtest 'Try send bytes to clean method.' => sub {
+     ok(!utf8::is_utf8($html), 'html is row bytes');
+     my $clean = $tidy->clean( $html );
+     ok(utf8::is_utf8($clean), 'but cleaned output is string');
+-    $clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/;
+-    $clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
++    $clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/;
+     is($clean, $reference, q{Cleanup didn't break anything});
+ };
+ 
diff --git a/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch b/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch
new file mode 100644
index 0000000..abbaffc
--- /dev/null
+++ b/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch
@@ -0,0 +1,52 @@
+From: Simon McVittie <s...@debian.org>
+Date: Fri, 22 Jul 2016 09:32:09 +0100
+Subject: t: don't assert that tidy will add a HTML 3.2 doctype
+
+tidy-html5 adds the HTML5 doctype, <!DOCTYPE html>.
+
+Signed-off-by: Simon McVittie <s...@debian.org>
+---
+ t/roundtrip.t | 3 ++-
+ t/wordwrap.t  | 3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/t/roundtrip.t b/t/roundtrip.t
+index 2590795..68be07f 100644
+--- a/t/roundtrip.t
++++ b/t/roundtrip.t
+@@ -26,12 +26,13 @@ my @messages = $tidy->messages( $clean );
+ is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} );
+ 
+ $clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]+)"/"Tidy"/;
++$clean =~ s/<!DOCTYPE html[^>]*>/<!DOCTYPE html>/;
+ 
+ my $expected = do { local $/ = undef; <DATA> };
+ is( $clean, $expected, 'Cleaned up properly' );
+ 
+ __DATA__
+-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <meta name="generator" content="Tidy">
+diff --git a/t/wordwrap.t b/t/wordwrap.t
+index 717d895..3f4daaf 100644
+--- a/t/wordwrap.t
++++ b/t/wordwrap.t
+@@ -11,7 +11,7 @@ my $input=q{Here's some <B>ed and <BR/>eakfest MarkUp};
+ 
+ my $expected=<<'EOD';
+ <!DOCTYPE 
+-html PUBLIC "-//W3C//DTD HTML 3.2//EN">
++html>
+ <html>
+ <head>
+ <title>
+@@ -32,6 +32,7 @@ my $cfg = 't/wordwrap.cfg';
+ my $tidy = HTML::Tidy->new( {config_file => $cfg} );
+ 
+ my $result = $tidy->clean( $input );
++$result =~ s/<!DOCTYPE ?\nhtml[^>]*>/<!DOCTYPE \nhtml>/;
+ my @result = split(/\n/, $result);
+ is_deeply( \@result, \@expected, 'Cleaned stuff looks like what we expected');
+ 
diff --git a/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch b/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch
new file mode 100644
index 0000000..48dda49
--- /dev/null
+++ b/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch
@@ -0,0 +1,36 @@
+From: Simon McVittie <s...@debian.org>
+Date: Fri, 22 Jul 2016 09:29:39 +0100
+Subject: t/unicode.t: don't assert that the DOCTYPE is preserved
+
+tidy-html5 currently doesn't preserve user-supplied DOCTYPEs
+in output: <https://github.com/htacg/tidy-html5/issues/435>
+
+Signed-off-by: Simon McVittie <s...@debian.org>
+---
+ t/unicode.html | 2 +-
+ t/unicode.t    | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/t/unicode.html b/t/unicode.html
+index a90f83f..c8d1804 100644
+--- a/t/unicode.html
++++ b/t/unicode.html
+@@ -1,4 +1,4 @@
+-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>日本語のホムページ</title>
+diff --git a/t/unicode.t b/t/unicode.t
+index 9ca6370..679b48a 100644
+--- a/t/unicode.t
++++ b/t/unicode.t
+@@ -53,7 +53,7 @@ subtest 'Try send bytes to clean method.' => sub {
+ };
+ 
+ __DATA__
+-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <meta name="generator" content="Tidy">
diff --git a/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch b/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch
new file mode 100644
index 0000000..e010fc9
--- /dev/null
+++ b/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch
@@ -0,0 +1,128 @@
+From: Simon McVittie <s...@debian.org>
+Date: Fri, 22 Jul 2016 09:27:26 +0100
+Subject: t: use a pre-HTML5 DOCTYPE to get warnings about unescaped ampersand
+
+HTML5 defines an ampersand followed by whitespace to be unambiguously
+an ampersand, matching what browsers have always done in practice.
+As a result, tidy-html5 does not warn about them when the doctype
+is either HTML5 or missing (lack of a DOCTYPE is treated as HTML5,
+on the basis that HTML5 is a closer match for what browsers actually
+do than any previous standard). Discussion here:
+<https://github.com/htacg/tidy-html5/issues/207>
+
+Adding the DOCTYPE throws off some of the line numbering, which needs
+adjusting.
+
+t/ignore-text.t also seems to rely on the missing DOCTYPE provoking a
+warning, which is obviously not going to happen now that we've
+added one, to be able to verify that case-insensitive ignoring
+can work. Add a new error so we can ignore that instead.
+
+Signed-off-by: Simon McVittie <s...@debian.org>
+---
+ t/ignore-text.t |  8 +++++---
+ t/ignore.t      | 10 +++++-----
+ t/levels.t      | 10 +++++-----
+ 3 files changed, 15 insertions(+), 13 deletions(-)
+
+diff --git a/t/ignore-text.t b/t/ignore-text.t
+index 9695a5a..a95e0e0 100644
+--- a/t/ignore-text.t
++++ b/t/ignore-text.t
+@@ -10,8 +10,8 @@ use HTML::Tidy;
+ my $html = do { local $/; <DATA> };
+ 
+ my @expected_messages = split /\n/, q{
+-DATA (24:XX) Warning: unescaped & which should be written as &amp;
+-DATA (24:XX) Warning: unescaped & which should be written as &amp;
++DATA (26:XX) Warning: unescaped & which should be written as &amp;
++DATA (26:XX) Warning: unescaped & which should be written as &amp;
+ };
+ 
+ chomp @expected_messages;
+@@ -22,7 +22,7 @@ IGNORE_BOGOTAG: {
+     isa_ok( $tidy, 'HTML::Tidy' );
+ 
+     $tidy->ignore( text => qr/bogotag/ );
+-    $tidy->ignore( text => [ qr/UNESCAPED/, qr/doctype/i ] );
++    $tidy->ignore( text => [ qr/UNESCAPED/, qr/case-insensitive/i ] );
+     # The qr/UNESCAPED/ should not ignore anything because there's no /i
+     my $rc = $tidy->parse( 'DATA', $html );
+     ok( $rc, 'Parsed OK' );
+@@ -44,6 +44,7 @@ sub munge_returned {
+     }
+ }
+ __DATA__
++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+ <HTML>
+ <HEAD>
+ 	<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
+@@ -67,6 +68,7 @@ DIV.TOC P {
+ </HEAD>
+ <BODY BGCOLOR="white">
+ <BOGOTAG>
++<CASE-INSENSITIVE>
+     <IMG SRC="/pix/petdance-logo-400x312.gif" HEIGHT=312 WIDTH=400 ALT="Andy & Amy's Pet Supplies & Dance Instruction" ALIGN=RIGHT>
+ 	<DIV CLASS="TOC">
+ 	<h2>Perl, Programming &amp; Writing</h2>
+diff --git a/t/ignore.t b/t/ignore.t
+index 3991733..c0a1317 100644
+--- a/t/ignore.t
++++ b/t/ignore.t
+@@ -10,16 +10,15 @@ use HTML::Tidy;
+ my $html = do { local $/ = undef; <DATA> };
+ 
+ my @expected_warnings = split /\n/, q{
+-- (1:1) Warning: missing <!DOCTYPE> declaration
+-- (23:1) Warning: discarding unexpected <bogotag>
+-- (24:XX) Warning: unescaped & which should be written as &amp;
+-- (24:XX) Warning: unescaped & which should be written as &amp;
++- (24:1) Warning: discarding unexpected <bogotag>
++- (25:XX) Warning: unescaped & which should be written as &amp;
++- (25:XX) Warning: unescaped & which should be written as &amp;
+ };
+ chomp @expected_warnings;
+ shift @expected_warnings; # First one's blank
+ 
+ my @expected_errors = split /\n/, q{
+-- (23:1) Error: <bogotag> is not recognized!
++- (24:1) Error: <bogotag> is not recognized!
+ };
+ chomp @expected_errors;
+ shift @expected_errors; # First one's blank
+@@ -71,6 +70,7 @@ sub munge_returned {
+     }
+ }
+ __DATA__
++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+ <HTML>
+ <HEAD>
+ 	<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
+diff --git a/t/levels.t b/t/levels.t
+index 01aeb3b..2ee3162 100644
+--- a/t/levels.t
++++ b/t/levels.t
+@@ -13,11 +13,10 @@ my $rc = $tidy->parse( '-', <DATA> );
+ ok( $rc, 'Parsed OK' );
+ 
+ my @expected = split /\n/, q{
+-- (1:1) Warning: missing <!DOCTYPE> declaration
+-- (23:1) Error: <bogotag> is not recognized!
+-- (23:1) Warning: discarding unexpected <bogotag>
+-- (24:XX) Warning: unescaped & which should be written as &amp;
+-- (24:XX) Warning: unescaped & which should be written as &amp;
++- (24:1) Error: <bogotag> is not recognized!
++- (24:1) Warning: discarding unexpected <bogotag>
++- (25:XX) Warning: unescaped & which should be written as &amp;
++- (25:XX) Warning: unescaped & which should be written as &amp;
+ };
+ chomp @expected;
+ shift @expected; # First one's blank
+@@ -41,6 +40,7 @@ sub munge_returned {
+ }
+ 
+ __DATA__
++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+ <HTML>
+ <HEAD>
+ 	<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
diff --git a/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch b/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch
new file mode 100644
index 0000000..004b09f
--- /dev/null
+++ b/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch
@@ -0,0 +1,58 @@
+From: Simon McVittie <s...@debian.org>
+Date: Fri, 22 Jul 2016 09:34:53 +0100
+Subject: t/venus.t: adjust expected result for tidy-html5
+
+tidy-html5 allows arbitrary block content inside <address>, whereas
+traditional tidy only allowed inline content.
+Reference: <https://github.com/htacg/tidy-html5/issues/55>
+
+This change will break with traditional tidy, and it isn't clear
+to me how to remain compatible with both.
+
+Signed-off-by: Simon McVittie <s...@debian.org>
+---
+ t/venus.t | 32 +++++++++++++++++++-------------
+ 1 file changed, 19 insertions(+), 13 deletions(-)
+
+diff --git a/t/venus.t b/t/venus.t
+index 41ee597..bd94d4b 100755
+--- a/t/venus.t
++++ b/t/venus.t
+@@ -72,18 +72,24 @@ __DATA__
+       <a href="../../General/Credits.html">Credits</a> | 
+       <a href="../../General/Feedback.html">Feedback</a> |</h4>
+     </center>
+-    <center>
+-      <p>
+-        <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" />
+-      </p>
+-    </center>
+-    <div align="center"></div>
+-    <center>
+-      <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address>
+-      <address> </address>
+-      <address>Authors: Twin Groves Museums in the Classroom Team,</address>
+-      <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address>
+-    </center>
+-    <center>Created: 27 June 1998- Updated: 6 October 2003</center>
++    <address>
++      <center>
++        <p>
++          <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" />
++        </p>
++      </center>
++      <div align="center"></div>
++      <address>
++        <center>
++          <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address>
++          <address> </address>
++          <address>Authors: Twin Groves Museums in the Classroom Team,</address>
++          <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address>
++        </center>
++      </address>
++      <address>
++        <center>Created: 27 June 1998- Updated: 6 October 2003</center>
++      </address>
++    </address>
+   </body>
+ </html>
-- 
2.8.1

>From 357626ff2e00753b2980f86e3884eed3b092c7f4 Mon Sep 17 00:00:00 2001
From: Simon McVittie <s...@debian.org>
Date: Fri, 22 Jul 2016 10:38:10 +0100
Subject: [PATCH 4/4] d/p/tidy-not-tidyp.patch: alter to support either tidyp
 or tidy-html5

This reinstates the (now somewhat misleadingly named) _tidyp_version
function by using tidy-html5's tidyLibraryVersion(). This is hopefully
more palatable to upstream.

d/p/remove-tidy_version.patch: drop, no longer needed.
---
 debian/changelog                                   |   5 +
 debian/patches/fix-error-message-in-webtidy        |   2 +-
 ...tidy-html5-s-differently-formatted-summar.patch |   4 +-
 debian/patches/remove-tidy_version.patch           | 106 -------------------
 debian/patches/series                              |   1 -
 debian/patches/tidy-not-tidyp.patch                | 115 +++++++++++++++------
 6 files changed, 94 insertions(+), 139 deletions(-)
 delete mode 100644 debian/patches/remove-tidy_version.patch

diff --git a/debian/changelog b/debian/changelog
index 2101180..3eb2bbe 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -24,6 +24,11 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium
     d/p/t-venus.t-adjust-expected-result-for-tidy-html5.patch:
     add patches to make the tests pass with tidy-html5 providing libtidy
     (Closes: #829409)
+  * d/p/tidy-not-tidyp.patch: alter to support either tidyp or
+    tidy-html5, reinstating the (now somewhat misleadingly named)
+    _tidyp_version function by using tidy-html5's tidyLibraryVersion().
+    This is hopefully more palatable to upstream.
+    - d/p/remove-tidy_version.patch: drop
 
  -- gregor herrmann <gre...@debian.org>  Thu, 27 Feb 2014 22:36:29 +0100
 
diff --git a/debian/patches/fix-error-message-in-webtidy b/debian/patches/fix-error-message-in-webtidy
index 0efb8d3..1f808ce 100644
--- a/debian/patches/fix-error-message-in-webtidy
+++ b/debian/patches/fix-error-message-in-webtidy
@@ -8,7 +8,7 @@ Forwarded: not-needed
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/bin/webtidy b/bin/webtidy
-index be57814..4a92423 100755
+index dc1a979..5338d09 100755
 --- a/bin/webtidy
 +++ b/bin/webtidy
 @@ -29,7 +29,7 @@ for my $url ( @ARGV ) {
diff --git a/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
index 04801b2..31c6042 100644
--- a/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
+++ b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
@@ -8,10 +8,10 @@ Signed-off-by: Simon McVittie <s...@debian.org>
  1 file changed, 4 insertions(+)
 
 diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
-index a172ea2..aa16c79 100644
+index e19a33d..e527f94 100644
 --- a/lib/HTML/Tidy.pm
 +++ b/lib/HTML/Tidy.pm
-@@ -265,6 +265,10 @@ sub _parse_errors {
+@@ -266,6 +266,10 @@ sub _parse_errors {
              # Summary line we don't want
  
          }
diff --git a/debian/patches/remove-tidy_version.patch b/debian/patches/remove-tidy_version.patch
deleted file mode 100644
index 7b553f2..0000000
--- a/debian/patches/remove-tidy_version.patch
+++ /dev/null
@@ -1,106 +0,0 @@
-From: gregor herrmann <gre...@debian.org>
-Date: Sat, 20 Feb 2010 09:50:30 -0500
-Subject: remove tidyVersion
-
-It is a special call to Andy Lester's modified version of libtidy. Also
-remove the corresponding call from Perl, the documentation, and the tests.
-
-Reviewed-by: Jonathan Yu <jaw...@cpan.org>
-Origin: vendor
-Forwarded: not-needed
----
- Tidy.xs          | 11 -----------
- bin/webtidy      |  2 +-
- lib/HTML/Tidy.pm | 17 -----------------
- t/00-load.t      |  2 +-
- t/version.t      |  5 +++++
- 5 files changed, 7 insertions(+), 30 deletions(-)
-
-diff --git a/Tidy.xs b/Tidy.xs
-index 2238b0b..630b550 100644
---- a/Tidy.xs
-+++ b/Tidy.xs
-@@ -189,14 +189,3 @@ _tidy_clean(input, configfile, tidy_options)
-         if ( rc < 0 ) {
-             XSRETURN_UNDEF;
-         }
--
--
--SV*
--_tidyp_version()
--    PREINIT:
--        const char* version;
--    CODE:
--        version = tidyVersion();
--        RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */
--    OUTPUT:
--        RETVAL
-diff --git a/bin/webtidy b/bin/webtidy
-index dc1a979..be57814 100755
---- a/bin/webtidy
-+++ b/bin/webtidy
-@@ -19,7 +19,7 @@ GetOptions(
- ) or $help = 1;
- 
- if ( !@ARGV || $help ) {
--    print "webtidy v$HTML::Tidy::VERSION using tidyp v" . HTML::Tidy::tidyp_version() . "\n";
-+    print "webtidy v$HTML::Tidy::VERSION \n";
-     print <DATA>;
-     exit 1;
- }
-diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
-index 32a5005..a172ea2 100644
---- a/lib/HTML/Tidy.pm
-+++ b/lib/HTML/Tidy.pm
-@@ -341,23 +341,6 @@ sub _is_keeper {
-     return 1;
- }
- 
--=head2 tidyp_version()
--
--=head2 libtidyp_version()
--
--Returns the version of the underling tidyp library.
--
--=cut
--
--# backcompat
--sub libtidyp_version { return shift->tidyp_version }
--
--sub tidyp_version {
--    my $version_str = _tidyp_version();
--
--    return $version_str;
--}
--
- require XSLoader;
- XSLoader::load('HTML::Tidy', $VERSION);
- 
-diff --git a/t/00-load.t b/t/00-load.t
-index b40452d..d10902e 100644
---- a/t/00-load.t
-+++ b/t/00-load.t
-@@ -8,5 +8,5 @@ use Test::More tests => 1;
- use HTML::Tidy;
- use HTML::Tidy::Message;
- 
--diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, Perl $]; tidyp " . HTML::Tidy->tidyp_version() );
-+diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, Perl $];" );
- pass( 'Modules loaded' );
-diff --git a/t/version.t b/t/version.t
-index f225e3e..b05a7da 100644
---- a/t/version.t
-+++ b/t/version.t
-@@ -7,7 +7,12 @@ use Test::More tests => 4;
- 
- use HTML::Tidy;
- 
-+SKIP: {
-+    skip 'libtidy_version has been removed in Debian', 4;
-+
- for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) {
-     like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' );
-     cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' );
- }
-+
-+}
diff --git a/debian/patches/series b/debian/patches/series
index 0fa4da0..51603c6 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,4 +1,3 @@
-remove-tidy_version.patch
 tidy-not-tidyp.patch
 lib-ignore-tidy-html5-s-differently-formatted-summar.patch
 t-allow-tidy-html5-as-generator.patch
diff --git a/debian/patches/tidy-not-tidyp.patch b/debian/patches/tidy-not-tidyp.patch
index 9875169..554c6a0 100644
--- a/debian/patches/tidy-not-tidyp.patch
+++ b/debian/patches/tidy-not-tidyp.patch
@@ -1,47 +1,104 @@
-From: Florian Schlichting <f...@debian.org>
-Date: Fri, 4 Oct 2013 23:39:39 +0200
-Subject: look for and use tidy.h in /usr/include/tidy (from libtidy-dev)
+From: Simon McVittie <s...@debian.org>
+Date: Fri, 22 Jul 2016 10:33:50 +0100
+Subject: Look for tidy if tidyp is not found
 
-Forwarded: not-needed
+tidy-html5 is an actively-maintained, HTML5-supporting variant of
+the tidy library from which tidyp was forked.
+
+Based on Debian-specific patches by Florian Schlichting and
+gregor herrmann, which unconditionally switched from tidyp to tidy.
 ---
- Makefile.PL | 6 +++---
- Tidy.xs     | 2 +-
- 2 files changed, 4 insertions(+), 4 deletions(-)
+ Makefile.PL      |  6 ++++++
+ Tidy.xs          | 15 +++++++++++++--
+ lib/HTML/Tidy.pm |  7 +++++++
+ t/version.t      |  2 +-
+ 4 files changed, 27 insertions(+), 3 deletions(-)
 
 diff --git a/Makefile.PL b/Makefile.PL
-index 1bca2ef..6ad1b7f 100644
+index 1bca2ef..e3f64a0 100644
 --- a/Makefile.PL
 +++ b/Makefile.PL
-@@ -10,7 +10,7 @@ use ExtUtils::Liblist;
- use Config;
- 
- my $libs = '-ltidyp';
--my $inc = "-I. -I/usr/include/tidyp -I/usr/local/include/tidyp -I$Config{usrinc}/tidyp";
-+my $inc = "-I. -I/usr/include/tidy -I/usr/local/include/tidy -I$Config{usrinc}/tidy";
- 
- eval { require Alien::Tidyp; };
- 
-@@ -20,8 +20,8 @@ if ( !$@ ) {
-     $inc = Alien::Tidyp->config('INC');
- }
- else {
--    print "Alien::Tidyp not found. Looking for for tidyp on your system.\n";
--    my @vars = ExtUtils::Liblist->ext( '-L/usr/lib -L/usr/local/lib -ltidyp', 0, 1 );
-+    print "Alien::Tidyp not found. Looking for for tidy on your system.\n";
-+    my @vars = ExtUtils::Liblist->ext( '-L/usr/lib -L/usr/local/lib -ltidy', 0, 1 );
+@@ -25,6 +25,12 @@ else {
      $libs = $vars[2];
  
      if ( !$libs ) {
++        @vars = ExtUtils::Liblist->ext( '-L/usr/lib -L/usr/local/lib -ltidy', 0, 1 );
++        $libs = $vars[2];
++        $inc = "-I. -I/usr/include/tidy -I/usr/local/include/tidy -I$Config{usrinc}/tidy -DWITH_TIDY";
++    }
++
++    if ( !$libs ) {
+         $libs = '-ltidyp';
+         print <<'EOF';
+ 
 diff --git a/Tidy.xs b/Tidy.xs
-index 630b550..a8bbe82 100644
+index 2238b0b..0b86116 100644
 --- a/Tidy.xs
 +++ b/Tidy.xs
-@@ -2,7 +2,7 @@
+@@ -2,8 +2,14 @@
  #include "perl.h"
  #include "XSUB.h"
  
 -#include <tidyp.h>
-+#include <tidy.h>
- #include <buffio.h>
+-#include <buffio.h>
++#ifdef WITH_TIDY
++# include <tidy.h>
++# include <tidybuffio.h>
++#else
++# include <tidyp.h>
++# include <buffio.h>
++#endif
++
  #include <stdio.h>
  #include <errno.h>
+ 
+@@ -196,7 +202,12 @@ _tidyp_version()
+     PREINIT:
+         const char* version;
+     CODE:
++#ifdef WITH_TIDY
++        /* tidy-html5 is required */
++        version = tidyLibraryVersion();
++#else
+         version = tidyVersion();
++#endif
+         RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */
+     OUTPUT:
+         RETVAL
+diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
+index 32a5005..e19a33d 100644
+--- a/lib/HTML/Tidy.pm
++++ b/lib/HTML/Tidy.pm
+@@ -4,6 +4,7 @@ use 5.008;
+ use strict;
+ use warnings;
+ use Carp ();
++use version 0.77 ();
+ 
+ use HTML::Tidy::Message;
+ 
+@@ -355,6 +356,12 @@ sub libtidyp_version { return shift->tidyp_version }
+ sub tidyp_version {
+     my $version_str = _tidyp_version();
+ 
++    # Convert tidy-html5 versions to the 5.002001 form so they work
++    # with naive numeric comparison
++    if ($version_str !~ m/^0\./) {
++        $version_str = version->parse("v$version_str")->numify;
++    }
++
+     return $version_str;
+ }
+ 
+diff --git a/t/version.t b/t/version.t
+index f225e3e..c2528cd 100644
+--- a/t/version.t
++++ b/t/version.t
+@@ -8,6 +8,6 @@ use Test::More tests => 4;
+ use HTML::Tidy;
+ 
+ for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) {
+-    like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' );
++    like( $version_string, qr/^\d\.\d+$/, 'Valid version string' );
+     cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' );
+ }
-- 
2.8.1

Reply via email to