On Sun, 03 Jul 2016 at 07:54:05 +0200, Chris Lamb wrote:
> libhtml-tidy-perl fails to build from source in unstable/amd64
I don't think it was necessarily a good idea to forward this upstream:
the build failure is because Debian forked this Perl module to stop using
its author's fork (tidyp) of the underlying C library (tidy), and now we've
also replaced the C library with a different fork (tidy-html5), which is
what's making the tests fail.
The author of HTML::Tidy seems quite likely to respond "if you had
packaged my fork of tidy like the documentation told you to, you wouldn't
have this problem".
However, tidy-html5 seems likely to be better than either tidy or tidyp,
so hopefully the HTML::Tidy author will be somewhat receptive to the idea
of supporting tidy-html5.
Possible patches for the Debian packaging attached, also available from
<git+ssh://git.debian.org/git/users/smcv/libhtml-tidy-perl.git -b master>.
S
>From 2bca8ddf43494c9f6d2b5c516088e0c3cf5682ac Mon Sep 17 00:00:00 2001
From: Simon McVittie <[email protected]>
Date: Fri, 22 Jul 2016 09:45:53 +0100
Subject: [PATCH 1/4] d/p/fix-error-message-in-webtidy: move to end of patch
series
This is a firmly Debian-specific change that is not suitable for upstream.
---
debian/changelog | 5 +++++
debian/patches/series | 2 +-
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/debian/changelog b/debian/changelog
index 1eeac67..d0db8e1 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -10,6 +10,11 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium
[ gregor herrmann ]
* debian/copyright: change Copyright-Format 1.0 URL to HTTPS.
+ [ Simon McVittie ]
+ * d/p/fix-error-message-in-webtidy: move to end of patch series.
+ This is a firmly Debian-specific change that is not suitable for
+ upstream.
+
-- gregor herrmann <[email protected]> Thu, 27 Feb 2014 22:36:29 +0100
libhtml-tidy-perl (1.56-1) unstable; urgency=low
diff --git a/debian/patches/series b/debian/patches/series
index 57718d6..4adbeaa 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,3 +1,3 @@
-fix-error-message-in-webtidy
remove-tidy_version.patch
tidy-not-tidyp.patch
+fix-error-message-in-webtidy
--
2.8.1
>From 447aa2e692a4276dc51c0e1d958c19f231c54279 Mon Sep 17 00:00:00 2001
From: Simon McVittie <[email protected]>
Date: Fri, 22 Jul 2016 09:46:18 +0100
Subject: [PATCH 2/4] d/patches: put all patches in the git style allowed by
DEP-3, so they can be manipulated with gbp-pq
---
debian/changelog | 2 +
debian/patches/fix-error-message-in-webtidy | 13 +++--
debian/patches/remove-tidy_version.patch | 79 ++++++++++++++++++-----------
debian/patches/tidy-not-tidyp.patch | 19 +++++--
4 files changed, 76 insertions(+), 37 deletions(-)
diff --git a/debian/changelog b/debian/changelog
index d0db8e1..2c0a999 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -14,6 +14,8 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium
* d/p/fix-error-message-in-webtidy: move to end of patch series.
This is a firmly Debian-specific change that is not suitable for
upstream.
+ * d/patches: put all patches in the git style allowed by DEP-3,
+ so they can be manipulated with gbp-pq
-- gregor herrmann <[email protected]> Thu, 27 Feb 2014 22:36:29 +0100
diff --git a/debian/patches/fix-error-message-in-webtidy b/debian/patches/fix-error-message-in-webtidy
index afdfb7c..0efb8d3 100644
--- a/debian/patches/fix-error-message-in-webtidy
+++ b/debian/patches/fix-error-message-in-webtidy
@@ -1,10 +1,17 @@
-Description: make webtidy error message more debianish
-Author: Ryan Niebur <[email protected]>
+From: Ryan Niebur <[email protected]>
+Date: Tue, 2 Jun 2009 21:15:36 -0700
+Subject: make webtidy error message more debianish
+
Forwarded: not-needed
+---
+ bin/webtidy | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+diff --git a/bin/webtidy b/bin/webtidy
+index be57814..4a92423 100755
--- a/bin/webtidy
+++ b/bin/webtidy
-@@ -29,7 +29,7 @@
+@@ -29,7 +29,7 @@ for my $url ( @ARGV ) {
my @lines;
if ( $url =~ /^https?:/ ) {
if ( !eval { require LWP::Simple; 1; } ) {
diff --git a/debian/patches/remove-tidy_version.patch b/debian/patches/remove-tidy_version.patch
index 6bbeffa..7b553f2 100644
--- a/debian/patches/remove-tidy_version.patch
+++ b/debian/patches/remove-tidy_version.patch
@@ -1,15 +1,26 @@
-Description: remove tidyVersion as it is a special call
- to Andy Lester's modified version of libtidy. Also remove
- the corresponding call from Perl, the documentation, and
- the tests.
-Author: gregor herrmann <[email protected]>
+From: gregor herrmann <[email protected]>
+Date: Sat, 20 Feb 2010 09:50:30 -0500
+Subject: remove tidyVersion
+
+It is a special call to Andy Lester's modified version of libtidy. Also
+remove the corresponding call from Perl, the documentation, and the tests.
+
Reviewed-by: Jonathan Yu <[email protected]>
Origin: vendor
Forwarded: not-needed
+---
+ Tidy.xs | 11 -----------
+ bin/webtidy | 2 +-
+ lib/HTML/Tidy.pm | 17 -----------------
+ t/00-load.t | 2 +-
+ t/version.t | 5 +++++
+ 5 files changed, 7 insertions(+), 30 deletions(-)
+diff --git a/Tidy.xs b/Tidy.xs
+index 2238b0b..630b550 100644
--- a/Tidy.xs
+++ b/Tidy.xs
-@@ -189,14 +189,3 @@
+@@ -189,14 +189,3 @@ _tidy_clean(input, configfile, tidy_options)
if ( rc < 0 ) {
XSRETURN_UNDEF;
}
@@ -24,24 +35,24 @@ Forwarded: not-needed
- RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */
- OUTPUT:
- RETVAL
---- a/t/version.t
-+++ b/t/version.t
-@@ -7,7 +7,12 @@
-
- use HTML::Tidy;
+diff --git a/bin/webtidy b/bin/webtidy
+index dc1a979..be57814 100755
+--- a/bin/webtidy
++++ b/bin/webtidy
+@@ -19,7 +19,7 @@ GetOptions(
+ ) or $help = 1;
-+SKIP: {
-+ skip 'libtidy_version has been removed in Debian', 4;
-+
- for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) {
- like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' );
- cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' );
+ if ( !@ARGV || $help ) {
+- print "webtidy v$HTML::Tidy::VERSION using tidyp v" . HTML::Tidy::tidyp_version() . "\n";
++ print "webtidy v$HTML::Tidy::VERSION \n";
+ print <DATA>;
+ exit 1;
}
-+
-+}
+diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
+index 32a5005..a172ea2 100644
--- a/lib/HTML/Tidy.pm
+++ b/lib/HTML/Tidy.pm
-@@ -341,23 +341,6 @@
+@@ -341,23 +341,6 @@ sub _is_keeper {
return 1;
}
@@ -65,23 +76,31 @@ Forwarded: not-needed
require XSLoader;
XSLoader::load('HTML::Tidy', $VERSION);
+diff --git a/t/00-load.t b/t/00-load.t
+index b40452d..d10902e 100644
--- a/t/00-load.t
+++ b/t/00-load.t
-@@ -8,5 +8,5 @@
+@@ -8,5 +8,5 @@ use Test::More tests => 1;
use HTML::Tidy;
use HTML::Tidy::Message;
-diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, Perl $]; tidyp " . HTML::Tidy->tidyp_version() );
+diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, Perl $];" );
pass( 'Modules loaded' );
---- a/bin/webtidy
-+++ b/bin/webtidy
-@@ -19,7 +19,7 @@
- ) or $help = 1;
+diff --git a/t/version.t b/t/version.t
+index f225e3e..b05a7da 100644
+--- a/t/version.t
++++ b/t/version.t
+@@ -7,7 +7,12 @@ use Test::More tests => 4;
- if ( !@ARGV || $help ) {
-- print "webtidy v$HTML::Tidy::VERSION using tidyp v" . HTML::Tidy::tidyp_version() . "\n";
-+ print "webtidy v$HTML::Tidy::VERSION \n";
- print <DATA>;
- exit 1;
+ use HTML::Tidy;
+
++SKIP: {
++ skip 'libtidy_version has been removed in Debian', 4;
++
+ for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) {
+ like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' );
+ cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' );
}
++
++}
diff --git a/debian/patches/tidy-not-tidyp.patch b/debian/patches/tidy-not-tidyp.patch
index 1b781cd..9875169 100644
--- a/debian/patches/tidy-not-tidyp.patch
+++ b/debian/patches/tidy-not-tidyp.patch
@@ -1,9 +1,18 @@
-Description: look for and use tidy.h in /usr/include/tidy (from libtidy-dev)
-Author: Florian Schlichting <[email protected]>
+From: Florian Schlichting <[email protected]>
+Date: Fri, 4 Oct 2013 23:39:39 +0200
+Subject: look for and use tidy.h in /usr/include/tidy (from libtidy-dev)
+
Forwarded: not-needed
+---
+ Makefile.PL | 6 +++---
+ Tidy.xs | 2 +-
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/Makefile.PL b/Makefile.PL
+index 1bca2ef..6ad1b7f 100644
--- a/Makefile.PL
+++ b/Makefile.PL
-@@ -10,7 +10,7 @@
+@@ -10,7 +10,7 @@ use ExtUtils::Liblist;
use Config;
my $libs = '-ltidyp';
@@ -12,7 +21,7 @@ Forwarded: not-needed
eval { require Alien::Tidyp; };
-@@ -20,8 +20,8 @@
+@@ -20,8 +20,8 @@ if ( !$@ ) {
$inc = Alien::Tidyp->config('INC');
}
else {
@@ -23,6 +32,8 @@ Forwarded: not-needed
$libs = $vars[2];
if ( !$libs ) {
+diff --git a/Tidy.xs b/Tidy.xs
+index 630b550..a8bbe82 100644
--- a/Tidy.xs
+++ b/Tidy.xs
@@ -2,7 +2,7 @@
--
2.8.1
>From f2652c498511e9de6ce6b06eb0445cfa532ce20e Mon Sep 17 00:00:00 2001
From: Simon McVittie <[email protected]>
Date: Fri, 22 Jul 2016 09:49:52 +0100
Subject: [PATCH 3/4] Add patches to make the tests pass with tidy-html5
providing libtidy
Closes: #829409
---
debian/changelog | 8 ++
debian/control | 2 +-
...tidy-html5-s-differently-formatted-summar.patch | 24 ++++
debian/patches/series | 6 +
.../patches/t-allow-tidy-html5-as-generator.patch | 47 ++++++++
...ert-that-tidy-will-add-a-HTML-3.2-doctype.patch | 52 +++++++++
...-don-t-assert-that-the-DOCTYPE-is-preserv.patch | 36 ++++++
...-HTML5-DOCTYPE-to-get-warnings-about-unes.patch | 128 +++++++++++++++++++++
...s.t-adjust-expected-result-for-tidy-html5.patch | 58 ++++++++++
9 files changed, 360 insertions(+), 1 deletion(-)
create mode 100644 debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
create mode 100644 debian/patches/t-allow-tidy-html5-as-generator.patch
create mode 100644 debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch
create mode 100644 debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch
create mode 100644 debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch
create mode 100644 debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch
diff --git a/debian/changelog b/debian/changelog
index 2c0a999..2101180 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -16,6 +16,14 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium
upstream.
* d/patches: put all patches in the git style allowed by DEP-3,
so they can be manipulated with gbp-pq
+ * d/p/lib-ignore-tidy-html5-s-differently-formatted-summar.patch,
+ d/p/t-allow-tidy-html5-as-generator.patch,
+ d/p/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch,
+ d/p/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch,
+ d/p/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch,
+ d/p/t-venus.t-adjust-expected-result-for-tidy-html5.patch:
+ add patches to make the tests pass with tidy-html5 providing libtidy
+ (Closes: #829409)
-- gregor herrmann <[email protected]> Thu, 27 Feb 2014 22:36:29 +0100
diff --git a/debian/control b/debian/control
index 0ca02f8..072fde5 100644
--- a/debian/control
+++ b/debian/control
@@ -7,7 +7,7 @@ Section: perl
Priority: optional
Build-Depends: debhelper (>= 9.20120312),
help2man,
- libtidy-dev,
+ libtidy-dev (>= 1:5.2.0),
libtest-pod-perl,
libtest-pod-coverage-perl,
perl (>= 5.13.11) | libtest-simple-perl (>= 0.98),
diff --git a/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
new file mode 100644
index 0000000..04801b2
--- /dev/null
+++ b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
@@ -0,0 +1,24 @@
+From: Simon McVittie <[email protected]>
+Date: Fri, 22 Jul 2016 09:19:08 +0100
+Subject: lib: ignore tidy-html5's differently-formatted summary line
+
+Signed-off-by: Simon McVittie <[email protected]>
+---
+ lib/HTML/Tidy.pm | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
+index a172ea2..aa16c79 100644
+--- a/lib/HTML/Tidy.pm
++++ b/lib/HTML/Tidy.pm
+@@ -265,6 +265,10 @@ sub _parse_errors {
+ # Summary line we don't want
+
+ }
++ elsif ( $line =~ /^Tidy found \d+ warnings? and \d+ errors?!/ ) {
++ # Summary line we don't want
++
++ }
+ elsif ( $line eq 'No warnings or errors were found.' ) {
+ # Summary line we don't want
+
diff --git a/debian/patches/series b/debian/patches/series
index 4adbeaa..0fa4da0 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,3 +1,9 @@
remove-tidy_version.patch
tidy-not-tidyp.patch
+lib-ignore-tidy-html5-s-differently-formatted-summar.patch
+t-allow-tidy-html5-as-generator.patch
+t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch
+t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch
+t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch
+t-venus.t-adjust-expected-result-for-tidy-html5.patch
fix-error-message-in-webtidy
diff --git a/debian/patches/t-allow-tidy-html5-as-generator.patch b/debian/patches/t-allow-tidy-html5-as-generator.patch
new file mode 100644
index 0000000..c3d6f30
--- /dev/null
+++ b/debian/patches/t-allow-tidy-html5-as-generator.patch
@@ -0,0 +1,47 @@
+From: Simon McVittie <[email protected]>
+Date: Fri, 22 Jul 2016 09:21:39 +0100
+Subject: t: allow tidy-html5 as generator
+
+Signed-off-by: Simon McVittie <[email protected]>
+---
+ t/roundtrip.t | 2 +-
+ t/unicode.t | 6 ++----
+ 2 files changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/t/roundtrip.t b/t/roundtrip.t
+index e37cb88..2590795 100644
+--- a/t/roundtrip.t
++++ b/t/roundtrip.t
+@@ -25,7 +25,7 @@ my @messages = $tidy->messages( $clean );
+
+ is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} );
+
+-$clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
++$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]+)"/"Tidy"/;
+
+ my $expected = do { local $/ = undef; <DATA> };
+ is( $clean, $expected, 'Cleaned up properly' );
+diff --git a/t/unicode.t b/t/unicode.t
+index 2f45384..9ca6370 100644
+--- a/t/unicode.t
++++ b/t/unicode.t
+@@ -30,8 +30,7 @@ ok(utf8::is_utf8($reference), 'reference is utf8');
+ my $clean = $tidy->clean( $html );
+ ok(utf8::is_utf8($clean), 'cleaned output is also unicode');
+
+-$clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/;
+-$clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
++$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/;
+ is($clean, $reference, q{Cleanup didn't break anything});
+
+ my @messages = $tidy->messages;
+@@ -49,8 +48,7 @@ subtest 'Try send bytes to clean method.' => sub {
+ ok(!utf8::is_utf8($html), 'html is row bytes');
+ my $clean = $tidy->clean( $html );
+ ok(utf8::is_utf8($clean), 'but cleaned output is string');
+- $clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/;
+- $clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
++ $clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/;
+ is($clean, $reference, q{Cleanup didn't break anything});
+ };
+
diff --git a/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch b/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch
new file mode 100644
index 0000000..abbaffc
--- /dev/null
+++ b/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch
@@ -0,0 +1,52 @@
+From: Simon McVittie <[email protected]>
+Date: Fri, 22 Jul 2016 09:32:09 +0100
+Subject: t: don't assert that tidy will add a HTML 3.2 doctype
+
+tidy-html5 adds the HTML5 doctype, <!DOCTYPE html>.
+
+Signed-off-by: Simon McVittie <[email protected]>
+---
+ t/roundtrip.t | 3 ++-
+ t/wordwrap.t | 3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/t/roundtrip.t b/t/roundtrip.t
+index 2590795..68be07f 100644
+--- a/t/roundtrip.t
++++ b/t/roundtrip.t
+@@ -26,12 +26,13 @@ my @messages = $tidy->messages( $clean );
+ is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} );
+
+ $clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]+)"/"Tidy"/;
++$clean =~ s/<!DOCTYPE html[^>]*>/<!DOCTYPE html>/;
+
+ my $expected = do { local $/ = undef; <DATA> };
+ is( $clean, $expected, 'Cleaned up properly' );
+
+ __DATA__
+-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <meta name="generator" content="Tidy">
+diff --git a/t/wordwrap.t b/t/wordwrap.t
+index 717d895..3f4daaf 100644
+--- a/t/wordwrap.t
++++ b/t/wordwrap.t
+@@ -11,7 +11,7 @@ my $input=q{Here's some <B>ed and <BR/>eakfest MarkUp};
+
+ my $expected=<<'EOD';
+ <!DOCTYPE
+-html PUBLIC "-//W3C//DTD HTML 3.2//EN">
++html>
+ <html>
+ <head>
+ <title>
+@@ -32,6 +32,7 @@ my $cfg = 't/wordwrap.cfg';
+ my $tidy = HTML::Tidy->new( {config_file => $cfg} );
+
+ my $result = $tidy->clean( $input );
++$result =~ s/<!DOCTYPE ?\nhtml[^>]*>/<!DOCTYPE \nhtml>/;
+ my @result = split(/\n/, $result);
+ is_deeply( \@result, \@expected, 'Cleaned stuff looks like what we expected');
+
diff --git a/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch b/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch
new file mode 100644
index 0000000..48dda49
--- /dev/null
+++ b/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch
@@ -0,0 +1,36 @@
+From: Simon McVittie <[email protected]>
+Date: Fri, 22 Jul 2016 09:29:39 +0100
+Subject: t/unicode.t: don't assert that the DOCTYPE is preserved
+
+tidy-html5 currently doesn't preserve user-supplied DOCTYPEs
+in output: <https://github.com/htacg/tidy-html5/issues/435>
+
+Signed-off-by: Simon McVittie <[email protected]>
+---
+ t/unicode.html | 2 +-
+ t/unicode.t | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/t/unicode.html b/t/unicode.html
+index a90f83f..c8d1804 100644
+--- a/t/unicode.html
++++ b/t/unicode.html
+@@ -1,4 +1,4 @@
+-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <title>日本語のホムページ</title>
+diff --git a/t/unicode.t b/t/unicode.t
+index 9ca6370..679b48a 100644
+--- a/t/unicode.t
++++ b/t/unicode.t
+@@ -53,7 +53,7 @@ subtest 'Try send bytes to clean method.' => sub {
+ };
+
+ __DATA__
+-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
++<!DOCTYPE html>
+ <html>
+ <head>
+ <meta name="generator" content="Tidy">
diff --git a/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch b/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch
new file mode 100644
index 0000000..e010fc9
--- /dev/null
+++ b/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch
@@ -0,0 +1,128 @@
+From: Simon McVittie <[email protected]>
+Date: Fri, 22 Jul 2016 09:27:26 +0100
+Subject: t: use a pre-HTML5 DOCTYPE to get warnings about unescaped ampersand
+
+HTML5 defines an ampersand followed by whitespace to be unambiguously
+an ampersand, matching what browsers have always done in practice.
+As a result, tidy-html5 does not warn about them when the doctype
+is either HTML5 or missing (lack of a DOCTYPE is treated as HTML5,
+on the basis that HTML5 is a closer match for what browsers actually
+do than any previous standard). Discussion here:
+<https://github.com/htacg/tidy-html5/issues/207>
+
+Adding the DOCTYPE throws off some of the line numbering, which needs
+adjusting.
+
+t/ignore-text.t also seems to rely on the missing DOCTYPE provoking a
+warning, which is obviously not going to happen now that we've
+added one, to be able to verify that case-insensitive ignoring
+can work. Add a new error so we can ignore that instead.
+
+Signed-off-by: Simon McVittie <[email protected]>
+---
+ t/ignore-text.t | 8 +++++---
+ t/ignore.t | 10 +++++-----
+ t/levels.t | 10 +++++-----
+ 3 files changed, 15 insertions(+), 13 deletions(-)
+
+diff --git a/t/ignore-text.t b/t/ignore-text.t
+index 9695a5a..a95e0e0 100644
+--- a/t/ignore-text.t
++++ b/t/ignore-text.t
+@@ -10,8 +10,8 @@ use HTML::Tidy;
+ my $html = do { local $/; <DATA> };
+
+ my @expected_messages = split /\n/, q{
+-DATA (24:XX) Warning: unescaped & which should be written as &
+-DATA (24:XX) Warning: unescaped & which should be written as &
++DATA (26:XX) Warning: unescaped & which should be written as &
++DATA (26:XX) Warning: unescaped & which should be written as &
+ };
+
+ chomp @expected_messages;
+@@ -22,7 +22,7 @@ IGNORE_BOGOTAG: {
+ isa_ok( $tidy, 'HTML::Tidy' );
+
+ $tidy->ignore( text => qr/bogotag/ );
+- $tidy->ignore( text => [ qr/UNESCAPED/, qr/doctype/i ] );
++ $tidy->ignore( text => [ qr/UNESCAPED/, qr/case-insensitive/i ] );
+ # The qr/UNESCAPED/ should not ignore anything because there's no /i
+ my $rc = $tidy->parse( 'DATA', $html );
+ ok( $rc, 'Parsed OK' );
+@@ -44,6 +44,7 @@ sub munge_returned {
+ }
+ }
+ __DATA__
++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+ <HTML>
+ <HEAD>
+ <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
+@@ -67,6 +68,7 @@ DIV.TOC P {
+ </HEAD>
+ <BODY BGCOLOR="white">
+ <BOGOTAG>
++<CASE-INSENSITIVE>
+ <IMG SRC="/pix/petdance-logo-400x312.gif" HEIGHT=312 WIDTH=400 ALT="Andy & Amy's Pet Supplies & Dance Instruction" ALIGN=RIGHT>
+ <DIV CLASS="TOC">
+ <h2>Perl, Programming & Writing</h2>
+diff --git a/t/ignore.t b/t/ignore.t
+index 3991733..c0a1317 100644
+--- a/t/ignore.t
++++ b/t/ignore.t
+@@ -10,16 +10,15 @@ use HTML::Tidy;
+ my $html = do { local $/ = undef; <DATA> };
+
+ my @expected_warnings = split /\n/, q{
+-- (1:1) Warning: missing <!DOCTYPE> declaration
+-- (23:1) Warning: discarding unexpected <bogotag>
+-- (24:XX) Warning: unescaped & which should be written as &
+-- (24:XX) Warning: unescaped & which should be written as &
++- (24:1) Warning: discarding unexpected <bogotag>
++- (25:XX) Warning: unescaped & which should be written as &
++- (25:XX) Warning: unescaped & which should be written as &
+ };
+ chomp @expected_warnings;
+ shift @expected_warnings; # First one's blank
+
+ my @expected_errors = split /\n/, q{
+-- (23:1) Error: <bogotag> is not recognized!
++- (24:1) Error: <bogotag> is not recognized!
+ };
+ chomp @expected_errors;
+ shift @expected_errors; # First one's blank
+@@ -71,6 +70,7 @@ sub munge_returned {
+ }
+ }
+ __DATA__
++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+ <HTML>
+ <HEAD>
+ <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
+diff --git a/t/levels.t b/t/levels.t
+index 01aeb3b..2ee3162 100644
+--- a/t/levels.t
++++ b/t/levels.t
+@@ -13,11 +13,10 @@ my $rc = $tidy->parse( '-', <DATA> );
+ ok( $rc, 'Parsed OK' );
+
+ my @expected = split /\n/, q{
+-- (1:1) Warning: missing <!DOCTYPE> declaration
+-- (23:1) Error: <bogotag> is not recognized!
+-- (23:1) Warning: discarding unexpected <bogotag>
+-- (24:XX) Warning: unescaped & which should be written as &
+-- (24:XX) Warning: unescaped & which should be written as &
++- (24:1) Error: <bogotag> is not recognized!
++- (24:1) Warning: discarding unexpected <bogotag>
++- (25:XX) Warning: unescaped & which should be written as &
++- (25:XX) Warning: unescaped & which should be written as &
+ };
+ chomp @expected;
+ shift @expected; # First one's blank
+@@ -41,6 +40,7 @@ sub munge_returned {
+ }
+
+ __DATA__
++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
+ <HTML>
+ <HEAD>
+ <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
diff --git a/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch b/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch
new file mode 100644
index 0000000..004b09f
--- /dev/null
+++ b/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch
@@ -0,0 +1,58 @@
+From: Simon McVittie <[email protected]>
+Date: Fri, 22 Jul 2016 09:34:53 +0100
+Subject: t/venus.t: adjust expected result for tidy-html5
+
+tidy-html5 allows arbitrary block content inside <address>, whereas
+traditional tidy only allowed inline content.
+Reference: <https://github.com/htacg/tidy-html5/issues/55>
+
+This change will break with traditional tidy, and it isn't clear
+to me how to remain compatible with both.
+
+Signed-off-by: Simon McVittie <[email protected]>
+---
+ t/venus.t | 32 +++++++++++++++++++-------------
+ 1 file changed, 19 insertions(+), 13 deletions(-)
+
+diff --git a/t/venus.t b/t/venus.t
+index 41ee597..bd94d4b 100755
+--- a/t/venus.t
++++ b/t/venus.t
+@@ -72,18 +72,24 @@ __DATA__
+ <a href="../../General/Credits.html">Credits</a> |
+ <a href="../../General/Feedback.html">Feedback</a> |</h4>
+ </center>
+- <center>
+- <p>
+- <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" />
+- </p>
+- </center>
+- <div align="center"></div>
+- <center>
+- <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address>
+- <address> </address>
+- <address>Authors: Twin Groves Museums in the Classroom Team,</address>
+- <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address>
+- </center>
+- <center>Created: 27 June 1998- Updated: 6 October 2003</center>
++ <address>
++ <center>
++ <p>
++ <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" />
++ </p>
++ </center>
++ <div align="center"></div>
++ <address>
++ <center>
++ <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address>
++ <address> </address>
++ <address>Authors: Twin Groves Museums in the Classroom Team,</address>
++ <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address>
++ </center>
++ </address>
++ <address>
++ <center>Created: 27 June 1998- Updated: 6 October 2003</center>
++ </address>
++ </address>
+ </body>
+ </html>
--
2.8.1
>From 357626ff2e00753b2980f86e3884eed3b092c7f4 Mon Sep 17 00:00:00 2001
From: Simon McVittie <[email protected]>
Date: Fri, 22 Jul 2016 10:38:10 +0100
Subject: [PATCH 4/4] d/p/tidy-not-tidyp.patch: alter to support either tidyp
or tidy-html5
This reinstates the (now somewhat misleadingly named) _tidyp_version
function by using tidy-html5's tidyLibraryVersion(). This is hopefully
more palatable to upstream.
d/p/remove-tidy_version.patch: drop, no longer needed.
---
debian/changelog | 5 +
debian/patches/fix-error-message-in-webtidy | 2 +-
...tidy-html5-s-differently-formatted-summar.patch | 4 +-
debian/patches/remove-tidy_version.patch | 106 -------------------
debian/patches/series | 1 -
debian/patches/tidy-not-tidyp.patch | 115 +++++++++++++++------
6 files changed, 94 insertions(+), 139 deletions(-)
delete mode 100644 debian/patches/remove-tidy_version.patch
diff --git a/debian/changelog b/debian/changelog
index 2101180..3eb2bbe 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -24,6 +24,11 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium
d/p/t-venus.t-adjust-expected-result-for-tidy-html5.patch:
add patches to make the tests pass with tidy-html5 providing libtidy
(Closes: #829409)
+ * d/p/tidy-not-tidyp.patch: alter to support either tidyp or
+ tidy-html5, reinstating the (now somewhat misleadingly named)
+ _tidyp_version function by using tidy-html5's tidyLibraryVersion().
+ This is hopefully more palatable to upstream.
+ - d/p/remove-tidy_version.patch: drop
-- gregor herrmann <[email protected]> Thu, 27 Feb 2014 22:36:29 +0100
diff --git a/debian/patches/fix-error-message-in-webtidy b/debian/patches/fix-error-message-in-webtidy
index 0efb8d3..1f808ce 100644
--- a/debian/patches/fix-error-message-in-webtidy
+++ b/debian/patches/fix-error-message-in-webtidy
@@ -8,7 +8,7 @@ Forwarded: not-needed
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bin/webtidy b/bin/webtidy
-index be57814..4a92423 100755
+index dc1a979..5338d09 100755
--- a/bin/webtidy
+++ b/bin/webtidy
@@ -29,7 +29,7 @@ for my $url ( @ARGV ) {
diff --git a/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
index 04801b2..31c6042 100644
--- a/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
+++ b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch
@@ -8,10 +8,10 @@ Signed-off-by: Simon McVittie <[email protected]>
1 file changed, 4 insertions(+)
diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
-index a172ea2..aa16c79 100644
+index e19a33d..e527f94 100644
--- a/lib/HTML/Tidy.pm
+++ b/lib/HTML/Tidy.pm
-@@ -265,6 +265,10 @@ sub _parse_errors {
+@@ -266,6 +266,10 @@ sub _parse_errors {
# Summary line we don't want
}
diff --git a/debian/patches/remove-tidy_version.patch b/debian/patches/remove-tidy_version.patch
deleted file mode 100644
index 7b553f2..0000000
--- a/debian/patches/remove-tidy_version.patch
+++ /dev/null
@@ -1,106 +0,0 @@
-From: gregor herrmann <[email protected]>
-Date: Sat, 20 Feb 2010 09:50:30 -0500
-Subject: remove tidyVersion
-
-It is a special call to Andy Lester's modified version of libtidy. Also
-remove the corresponding call from Perl, the documentation, and the tests.
-
-Reviewed-by: Jonathan Yu <[email protected]>
-Origin: vendor
-Forwarded: not-needed
----
- Tidy.xs | 11 -----------
- bin/webtidy | 2 +-
- lib/HTML/Tidy.pm | 17 -----------------
- t/00-load.t | 2 +-
- t/version.t | 5 +++++
- 5 files changed, 7 insertions(+), 30 deletions(-)
-
-diff --git a/Tidy.xs b/Tidy.xs
-index 2238b0b..630b550 100644
---- a/Tidy.xs
-+++ b/Tidy.xs
-@@ -189,14 +189,3 @@ _tidy_clean(input, configfile, tidy_options)
- if ( rc < 0 ) {
- XSRETURN_UNDEF;
- }
--
--
--SV*
--_tidyp_version()
-- PREINIT:
-- const char* version;
-- CODE:
-- version = tidyVersion();
-- RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */
-- OUTPUT:
-- RETVAL
-diff --git a/bin/webtidy b/bin/webtidy
-index dc1a979..be57814 100755
---- a/bin/webtidy
-+++ b/bin/webtidy
-@@ -19,7 +19,7 @@ GetOptions(
- ) or $help = 1;
-
- if ( !@ARGV || $help ) {
-- print "webtidy v$HTML::Tidy::VERSION using tidyp v" . HTML::Tidy::tidyp_version() . "\n";
-+ print "webtidy v$HTML::Tidy::VERSION \n";
- print <DATA>;
- exit 1;
- }
-diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
-index 32a5005..a172ea2 100644
---- a/lib/HTML/Tidy.pm
-+++ b/lib/HTML/Tidy.pm
-@@ -341,23 +341,6 @@ sub _is_keeper {
- return 1;
- }
-
--=head2 tidyp_version()
--
--=head2 libtidyp_version()
--
--Returns the version of the underling tidyp library.
--
--=cut
--
--# backcompat
--sub libtidyp_version { return shift->tidyp_version }
--
--sub tidyp_version {
-- my $version_str = _tidyp_version();
--
-- return $version_str;
--}
--
- require XSLoader;
- XSLoader::load('HTML::Tidy', $VERSION);
-
-diff --git a/t/00-load.t b/t/00-load.t
-index b40452d..d10902e 100644
---- a/t/00-load.t
-+++ b/t/00-load.t
-@@ -8,5 +8,5 @@ use Test::More tests => 1;
- use HTML::Tidy;
- use HTML::Tidy::Message;
-
--diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, Perl $]; tidyp " . HTML::Tidy->tidyp_version() );
-+diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, Perl $];" );
- pass( 'Modules loaded' );
-diff --git a/t/version.t b/t/version.t
-index f225e3e..b05a7da 100644
---- a/t/version.t
-+++ b/t/version.t
-@@ -7,7 +7,12 @@ use Test::More tests => 4;
-
- use HTML::Tidy;
-
-+SKIP: {
-+ skip 'libtidy_version has been removed in Debian', 4;
-+
- for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) {
- like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' );
- cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' );
- }
-+
-+}
diff --git a/debian/patches/series b/debian/patches/series
index 0fa4da0..51603c6 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,4 +1,3 @@
-remove-tidy_version.patch
tidy-not-tidyp.patch
lib-ignore-tidy-html5-s-differently-formatted-summar.patch
t-allow-tidy-html5-as-generator.patch
diff --git a/debian/patches/tidy-not-tidyp.patch b/debian/patches/tidy-not-tidyp.patch
index 9875169..554c6a0 100644
--- a/debian/patches/tidy-not-tidyp.patch
+++ b/debian/patches/tidy-not-tidyp.patch
@@ -1,47 +1,104 @@
-From: Florian Schlichting <[email protected]>
-Date: Fri, 4 Oct 2013 23:39:39 +0200
-Subject: look for and use tidy.h in /usr/include/tidy (from libtidy-dev)
+From: Simon McVittie <[email protected]>
+Date: Fri, 22 Jul 2016 10:33:50 +0100
+Subject: Look for tidy if tidyp is not found
-Forwarded: not-needed
+tidy-html5 is an actively-maintained, HTML5-supporting variant of
+the tidy library from which tidyp was forked.
+
+Based on Debian-specific patches by Florian Schlichting and
+gregor herrmann, which unconditionally switched from tidyp to tidy.
---
- Makefile.PL | 6 +++---
- Tidy.xs | 2 +-
- 2 files changed, 4 insertions(+), 4 deletions(-)
+ Makefile.PL | 6 ++++++
+ Tidy.xs | 15 +++++++++++++--
+ lib/HTML/Tidy.pm | 7 +++++++
+ t/version.t | 2 +-
+ 4 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/Makefile.PL b/Makefile.PL
-index 1bca2ef..6ad1b7f 100644
+index 1bca2ef..e3f64a0 100644
--- a/Makefile.PL
+++ b/Makefile.PL
-@@ -10,7 +10,7 @@ use ExtUtils::Liblist;
- use Config;
-
- my $libs = '-ltidyp';
--my $inc = "-I. -I/usr/include/tidyp -I/usr/local/include/tidyp -I$Config{usrinc}/tidyp";
-+my $inc = "-I. -I/usr/include/tidy -I/usr/local/include/tidy -I$Config{usrinc}/tidy";
-
- eval { require Alien::Tidyp; };
-
-@@ -20,8 +20,8 @@ if ( !$@ ) {
- $inc = Alien::Tidyp->config('INC');
- }
- else {
-- print "Alien::Tidyp not found. Looking for for tidyp on your system.\n";
-- my @vars = ExtUtils::Liblist->ext( '-L/usr/lib -L/usr/local/lib -ltidyp', 0, 1 );
-+ print "Alien::Tidyp not found. Looking for for tidy on your system.\n";
-+ my @vars = ExtUtils::Liblist->ext( '-L/usr/lib -L/usr/local/lib -ltidy', 0, 1 );
+@@ -25,6 +25,12 @@ else {
$libs = $vars[2];
if ( !$libs ) {
++ @vars = ExtUtils::Liblist->ext( '-L/usr/lib -L/usr/local/lib -ltidy', 0, 1 );
++ $libs = $vars[2];
++ $inc = "-I. -I/usr/include/tidy -I/usr/local/include/tidy -I$Config{usrinc}/tidy -DWITH_TIDY";
++ }
++
++ if ( !$libs ) {
+ $libs = '-ltidyp';
+ print <<'EOF';
+
diff --git a/Tidy.xs b/Tidy.xs
-index 630b550..a8bbe82 100644
+index 2238b0b..0b86116 100644
--- a/Tidy.xs
+++ b/Tidy.xs
-@@ -2,7 +2,7 @@
+@@ -2,8 +2,14 @@
#include "perl.h"
#include "XSUB.h"
-#include <tidyp.h>
-+#include <tidy.h>
- #include <buffio.h>
+-#include <buffio.h>
++#ifdef WITH_TIDY
++# include <tidy.h>
++# include <tidybuffio.h>
++#else
++# include <tidyp.h>
++# include <buffio.h>
++#endif
++
#include <stdio.h>
#include <errno.h>
+
+@@ -196,7 +202,12 @@ _tidyp_version()
+ PREINIT:
+ const char* version;
+ CODE:
++#ifdef WITH_TIDY
++ /* tidy-html5 is required */
++ version = tidyLibraryVersion();
++#else
+ version = tidyVersion();
++#endif
+ RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */
+ OUTPUT:
+ RETVAL
+diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm
+index 32a5005..e19a33d 100644
+--- a/lib/HTML/Tidy.pm
++++ b/lib/HTML/Tidy.pm
+@@ -4,6 +4,7 @@ use 5.008;
+ use strict;
+ use warnings;
+ use Carp ();
++use version 0.77 ();
+
+ use HTML::Tidy::Message;
+
+@@ -355,6 +356,12 @@ sub libtidyp_version { return shift->tidyp_version }
+ sub tidyp_version {
+ my $version_str = _tidyp_version();
+
++ # Convert tidy-html5 versions to the 5.002001 form so they work
++ # with naive numeric comparison
++ if ($version_str !~ m/^0\./) {
++ $version_str = version->parse("v$version_str")->numify;
++ }
++
+ return $version_str;
+ }
+
+diff --git a/t/version.t b/t/version.t
+index f225e3e..c2528cd 100644
+--- a/t/version.t
++++ b/t/version.t
+@@ -8,6 +8,6 @@ use Test::More tests => 4;
+ use HTML::Tidy;
+
+ for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) {
+- like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' );
++ like( $version_string, qr/^\d\.\d+$/, 'Valid version string' );
+ cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' );
+ }
--
2.8.1