On Sun, 03 Jul 2016 at 07:54:05 +0200, Chris Lamb wrote: > libhtml-tidy-perl fails to build from source in unstable/amd64
I don't think it was necessarily a good idea to forward this upstream: the build failure is because Debian forked this Perl module to stop using its author's fork (tidyp) of the underlying C library (tidy), and now we've also replaced the C library with a different fork (tidy-html5), which is what's making the tests fail. The author of HTML::Tidy seems quite likely to respond "if you had packaged my fork of tidy like the documentation told you to, you wouldn't have this problem". However, tidy-html5 seems likely to be better than either tidy or tidyp, so hopefully the HTML::Tidy author will be somewhat receptive to the idea of supporting tidy-html5. Possible patches for the Debian packaging attached, also available from <git+ssh://git.debian.org/git/users/smcv/libhtml-tidy-perl.git -b master>. S
>From 2bca8ddf43494c9f6d2b5c516088e0c3cf5682ac Mon Sep 17 00:00:00 2001 From: Simon McVittie <s...@debian.org> Date: Fri, 22 Jul 2016 09:45:53 +0100 Subject: [PATCH 1/4] d/p/fix-error-message-in-webtidy: move to end of patch series This is a firmly Debian-specific change that is not suitable for upstream. --- debian/changelog | 5 +++++ debian/patches/series | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index 1eeac67..d0db8e1 100644 --- a/debian/changelog +++ b/debian/changelog @@ -10,6 +10,11 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium [ gregor herrmann ] * debian/copyright: change Copyright-Format 1.0 URL to HTTPS. + [ Simon McVittie ] + * d/p/fix-error-message-in-webtidy: move to end of patch series. + This is a firmly Debian-specific change that is not suitable for + upstream. + -- gregor herrmann <gre...@debian.org> Thu, 27 Feb 2014 22:36:29 +0100 libhtml-tidy-perl (1.56-1) unstable; urgency=low diff --git a/debian/patches/series b/debian/patches/series index 57718d6..4adbeaa 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,3 +1,3 @@ -fix-error-message-in-webtidy remove-tidy_version.patch tidy-not-tidyp.patch +fix-error-message-in-webtidy -- 2.8.1
>From 447aa2e692a4276dc51c0e1d958c19f231c54279 Mon Sep 17 00:00:00 2001 From: Simon McVittie <s...@debian.org> Date: Fri, 22 Jul 2016 09:46:18 +0100 Subject: [PATCH 2/4] d/patches: put all patches in the git style allowed by DEP-3, so they can be manipulated with gbp-pq --- debian/changelog | 2 + debian/patches/fix-error-message-in-webtidy | 13 +++-- debian/patches/remove-tidy_version.patch | 79 ++++++++++++++++++----------- debian/patches/tidy-not-tidyp.patch | 19 +++++-- 4 files changed, 76 insertions(+), 37 deletions(-) diff --git a/debian/changelog b/debian/changelog index d0db8e1..2c0a999 100644 --- a/debian/changelog +++ b/debian/changelog @@ -14,6 +14,8 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium * d/p/fix-error-message-in-webtidy: move to end of patch series. This is a firmly Debian-specific change that is not suitable for upstream. + * d/patches: put all patches in the git style allowed by DEP-3, + so they can be manipulated with gbp-pq -- gregor herrmann <gre...@debian.org> Thu, 27 Feb 2014 22:36:29 +0100 diff --git a/debian/patches/fix-error-message-in-webtidy b/debian/patches/fix-error-message-in-webtidy index afdfb7c..0efb8d3 100644 --- a/debian/patches/fix-error-message-in-webtidy +++ b/debian/patches/fix-error-message-in-webtidy @@ -1,10 +1,17 @@ -Description: make webtidy error message more debianish -Author: Ryan Niebur <r...@debian.org> +From: Ryan Niebur <r...@debian.org> +Date: Tue, 2 Jun 2009 21:15:36 -0700 +Subject: make webtidy error message more debianish + Forwarded: not-needed +--- + bin/webtidy | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +diff --git a/bin/webtidy b/bin/webtidy +index be57814..4a92423 100755 --- a/bin/webtidy +++ b/bin/webtidy -@@ -29,7 +29,7 @@ +@@ -29,7 +29,7 @@ for my $url ( @ARGV ) { my @lines; if ( $url =~ /^https?:/ ) { if ( !eval { require LWP::Simple; 1; } ) { diff --git a/debian/patches/remove-tidy_version.patch b/debian/patches/remove-tidy_version.patch index 6bbeffa..7b553f2 100644 --- a/debian/patches/remove-tidy_version.patch +++ b/debian/patches/remove-tidy_version.patch @@ -1,15 +1,26 @@ -Description: remove tidyVersion as it is a special call - to Andy Lester's modified version of libtidy. Also remove - the corresponding call from Perl, the documentation, and - the tests. -Author: gregor herrmann <gre...@debian.org> +From: gregor herrmann <gre...@debian.org> +Date: Sat, 20 Feb 2010 09:50:30 -0500 +Subject: remove tidyVersion + +It is a special call to Andy Lester's modified version of libtidy. Also +remove the corresponding call from Perl, the documentation, and the tests. + Reviewed-by: Jonathan Yu <jaw...@cpan.org> Origin: vendor Forwarded: not-needed +--- + Tidy.xs | 11 ----------- + bin/webtidy | 2 +- + lib/HTML/Tidy.pm | 17 ----------------- + t/00-load.t | 2 +- + t/version.t | 5 +++++ + 5 files changed, 7 insertions(+), 30 deletions(-) +diff --git a/Tidy.xs b/Tidy.xs +index 2238b0b..630b550 100644 --- a/Tidy.xs +++ b/Tidy.xs -@@ -189,14 +189,3 @@ +@@ -189,14 +189,3 @@ _tidy_clean(input, configfile, tidy_options) if ( rc < 0 ) { XSRETURN_UNDEF; } @@ -24,24 +35,24 @@ Forwarded: not-needed - RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */ - OUTPUT: - RETVAL ---- a/t/version.t -+++ b/t/version.t -@@ -7,7 +7,12 @@ - - use HTML::Tidy; +diff --git a/bin/webtidy b/bin/webtidy +index dc1a979..be57814 100755 +--- a/bin/webtidy ++++ b/bin/webtidy +@@ -19,7 +19,7 @@ GetOptions( + ) or $help = 1; -+SKIP: { -+ skip 'libtidy_version has been removed in Debian', 4; -+ - for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) { - like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' ); - cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' ); + if ( !@ARGV || $help ) { +- print "webtidy v$HTML::Tidy::VERSION using tidyp v" . HTML::Tidy::tidyp_version() . "\n"; ++ print "webtidy v$HTML::Tidy::VERSION \n"; + print <DATA>; + exit 1; } -+ -+} +diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm +index 32a5005..a172ea2 100644 --- a/lib/HTML/Tidy.pm +++ b/lib/HTML/Tidy.pm -@@ -341,23 +341,6 @@ +@@ -341,23 +341,6 @@ sub _is_keeper { return 1; } @@ -65,23 +76,31 @@ Forwarded: not-needed require XSLoader; XSLoader::load('HTML::Tidy', $VERSION); +diff --git a/t/00-load.t b/t/00-load.t +index b40452d..d10902e 100644 --- a/t/00-load.t +++ b/t/00-load.t -@@ -8,5 +8,5 @@ +@@ -8,5 +8,5 @@ use Test::More tests => 1; use HTML::Tidy; use HTML::Tidy::Message; -diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, Perl $]; tidyp " . HTML::Tidy->tidyp_version() ); +diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, Perl $];" ); pass( 'Modules loaded' ); ---- a/bin/webtidy -+++ b/bin/webtidy -@@ -19,7 +19,7 @@ - ) or $help = 1; +diff --git a/t/version.t b/t/version.t +index f225e3e..b05a7da 100644 +--- a/t/version.t ++++ b/t/version.t +@@ -7,7 +7,12 @@ use Test::More tests => 4; - if ( !@ARGV || $help ) { -- print "webtidy v$HTML::Tidy::VERSION using tidyp v" . HTML::Tidy::tidyp_version() . "\n"; -+ print "webtidy v$HTML::Tidy::VERSION \n"; - print <DATA>; - exit 1; + use HTML::Tidy; + ++SKIP: { ++ skip 'libtidy_version has been removed in Debian', 4; ++ + for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) { + like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' ); + cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' ); } ++ ++} diff --git a/debian/patches/tidy-not-tidyp.patch b/debian/patches/tidy-not-tidyp.patch index 1b781cd..9875169 100644 --- a/debian/patches/tidy-not-tidyp.patch +++ b/debian/patches/tidy-not-tidyp.patch @@ -1,9 +1,18 @@ -Description: look for and use tidy.h in /usr/include/tidy (from libtidy-dev) -Author: Florian Schlichting <f...@debian.org> +From: Florian Schlichting <f...@debian.org> +Date: Fri, 4 Oct 2013 23:39:39 +0200 +Subject: look for and use tidy.h in /usr/include/tidy (from libtidy-dev) + Forwarded: not-needed +--- + Makefile.PL | 6 +++--- + Tidy.xs | 2 +- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/Makefile.PL b/Makefile.PL +index 1bca2ef..6ad1b7f 100644 --- a/Makefile.PL +++ b/Makefile.PL -@@ -10,7 +10,7 @@ +@@ -10,7 +10,7 @@ use ExtUtils::Liblist; use Config; my $libs = '-ltidyp'; @@ -12,7 +21,7 @@ Forwarded: not-needed eval { require Alien::Tidyp; }; -@@ -20,8 +20,8 @@ +@@ -20,8 +20,8 @@ if ( !$@ ) { $inc = Alien::Tidyp->config('INC'); } else { @@ -23,6 +32,8 @@ Forwarded: not-needed $libs = $vars[2]; if ( !$libs ) { +diff --git a/Tidy.xs b/Tidy.xs +index 630b550..a8bbe82 100644 --- a/Tidy.xs +++ b/Tidy.xs @@ -2,7 +2,7 @@ -- 2.8.1
>From f2652c498511e9de6ce6b06eb0445cfa532ce20e Mon Sep 17 00:00:00 2001 From: Simon McVittie <s...@debian.org> Date: Fri, 22 Jul 2016 09:49:52 +0100 Subject: [PATCH 3/4] Add patches to make the tests pass with tidy-html5 providing libtidy Closes: #829409 --- debian/changelog | 8 ++ debian/control | 2 +- ...tidy-html5-s-differently-formatted-summar.patch | 24 ++++ debian/patches/series | 6 + .../patches/t-allow-tidy-html5-as-generator.patch | 47 ++++++++ ...ert-that-tidy-will-add-a-HTML-3.2-doctype.patch | 52 +++++++++ ...-don-t-assert-that-the-DOCTYPE-is-preserv.patch | 36 ++++++ ...-HTML5-DOCTYPE-to-get-warnings-about-unes.patch | 128 +++++++++++++++++++++ ...s.t-adjust-expected-result-for-tidy-html5.patch | 58 ++++++++++ 9 files changed, 360 insertions(+), 1 deletion(-) create mode 100644 debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch create mode 100644 debian/patches/t-allow-tidy-html5-as-generator.patch create mode 100644 debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch create mode 100644 debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch create mode 100644 debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch create mode 100644 debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch diff --git a/debian/changelog b/debian/changelog index 2c0a999..2101180 100644 --- a/debian/changelog +++ b/debian/changelog @@ -16,6 +16,14 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium upstream. * d/patches: put all patches in the git style allowed by DEP-3, so they can be manipulated with gbp-pq + * d/p/lib-ignore-tidy-html5-s-differently-formatted-summar.patch, + d/p/t-allow-tidy-html5-as-generator.patch, + d/p/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch, + d/p/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch, + d/p/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch, + d/p/t-venus.t-adjust-expected-result-for-tidy-html5.patch: + add patches to make the tests pass with tidy-html5 providing libtidy + (Closes: #829409) -- gregor herrmann <gre...@debian.org> Thu, 27 Feb 2014 22:36:29 +0100 diff --git a/debian/control b/debian/control index 0ca02f8..072fde5 100644 --- a/debian/control +++ b/debian/control @@ -7,7 +7,7 @@ Section: perl Priority: optional Build-Depends: debhelper (>= 9.20120312), help2man, - libtidy-dev, + libtidy-dev (>= 1:5.2.0), libtest-pod-perl, libtest-pod-coverage-perl, perl (>= 5.13.11) | libtest-simple-perl (>= 0.98), diff --git a/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch new file mode 100644 index 0000000..04801b2 --- /dev/null +++ b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch @@ -0,0 +1,24 @@ +From: Simon McVittie <s...@debian.org> +Date: Fri, 22 Jul 2016 09:19:08 +0100 +Subject: lib: ignore tidy-html5's differently-formatted summary line + +Signed-off-by: Simon McVittie <s...@debian.org> +--- + lib/HTML/Tidy.pm | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm +index a172ea2..aa16c79 100644 +--- a/lib/HTML/Tidy.pm ++++ b/lib/HTML/Tidy.pm +@@ -265,6 +265,10 @@ sub _parse_errors { + # Summary line we don't want + + } ++ elsif ( $line =~ /^Tidy found \d+ warnings? and \d+ errors?!/ ) { ++ # Summary line we don't want ++ ++ } + elsif ( $line eq 'No warnings or errors were found.' ) { + # Summary line we don't want + diff --git a/debian/patches/series b/debian/patches/series index 4adbeaa..0fa4da0 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,3 +1,9 @@ remove-tidy_version.patch tidy-not-tidyp.patch +lib-ignore-tidy-html5-s-differently-formatted-summar.patch +t-allow-tidy-html5-as-generator.patch +t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch +t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch +t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch +t-venus.t-adjust-expected-result-for-tidy-html5.patch fix-error-message-in-webtidy diff --git a/debian/patches/t-allow-tidy-html5-as-generator.patch b/debian/patches/t-allow-tidy-html5-as-generator.patch new file mode 100644 index 0000000..c3d6f30 --- /dev/null +++ b/debian/patches/t-allow-tidy-html5-as-generator.patch @@ -0,0 +1,47 @@ +From: Simon McVittie <s...@debian.org> +Date: Fri, 22 Jul 2016 09:21:39 +0100 +Subject: t: allow tidy-html5 as generator + +Signed-off-by: Simon McVittie <s...@debian.org> +--- + t/roundtrip.t | 2 +- + t/unicode.t | 6 ++---- + 2 files changed, 3 insertions(+), 5 deletions(-) + +diff --git a/t/roundtrip.t b/t/roundtrip.t +index e37cb88..2590795 100644 +--- a/t/roundtrip.t ++++ b/t/roundtrip.t +@@ -25,7 +25,7 @@ my @messages = $tidy->messages( $clean ); + + is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} ); + +-$clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/; ++$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]+)"/"Tidy"/; + + my $expected = do { local $/ = undef; <DATA> }; + is( $clean, $expected, 'Cleaned up properly' ); +diff --git a/t/unicode.t b/t/unicode.t +index 2f45384..9ca6370 100644 +--- a/t/unicode.t ++++ b/t/unicode.t +@@ -30,8 +30,7 @@ ok(utf8::is_utf8($reference), 'reference is utf8'); + my $clean = $tidy->clean( $html ); + ok(utf8::is_utf8($clean), 'cleaned output is also unicode'); + +-$clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/; +-$clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/; ++$clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/; + is($clean, $reference, q{Cleanup didn't break anything}); + + my @messages = $tidy->messages; +@@ -49,8 +48,7 @@ subtest 'Try send bytes to clean method.' => sub { + ok(!utf8::is_utf8($html), 'html is row bytes'); + my $clean = $tidy->clean( $html ); + ok(utf8::is_utf8($clean), 'but cleaned output is string'); +- $clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/; +- $clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/; ++ $clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]*)"/"Tidy"/; + is($clean, $reference, q{Cleanup didn't break anything}); + }; + diff --git a/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch b/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch new file mode 100644 index 0000000..abbaffc --- /dev/null +++ b/debian/patches/t-don-t-assert-that-tidy-will-add-a-HTML-3.2-doctype.patch @@ -0,0 +1,52 @@ +From: Simon McVittie <s...@debian.org> +Date: Fri, 22 Jul 2016 09:32:09 +0100 +Subject: t: don't assert that tidy will add a HTML 3.2 doctype + +tidy-html5 adds the HTML5 doctype, <!DOCTYPE html>. + +Signed-off-by: Simon McVittie <s...@debian.org> +--- + t/roundtrip.t | 3 ++- + t/wordwrap.t | 3 ++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/t/roundtrip.t b/t/roundtrip.t +index 2590795..68be07f 100644 +--- a/t/roundtrip.t ++++ b/t/roundtrip.t +@@ -26,12 +26,13 @@ my @messages = $tidy->messages( $clean ); + is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} ); + + $clean =~ s/"((HTML Tidy|tidyp).+w3\.org|HTML Tidy for HTML5[^"]+)"/"Tidy"/; ++$clean =~ s/<!DOCTYPE html[^>]*>/<!DOCTYPE html>/; + + my $expected = do { local $/ = undef; <DATA> }; + is( $clean, $expected, 'Cleaned up properly' ); + + __DATA__ +-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> ++<!DOCTYPE html> + <html> + <head> + <meta name="generator" content="Tidy"> +diff --git a/t/wordwrap.t b/t/wordwrap.t +index 717d895..3f4daaf 100644 +--- a/t/wordwrap.t ++++ b/t/wordwrap.t +@@ -11,7 +11,7 @@ my $input=q{Here's some <B>ed and <BR/>eakfest MarkUp}; + + my $expected=<<'EOD'; + <!DOCTYPE +-html PUBLIC "-//W3C//DTD HTML 3.2//EN"> ++html> + <html> + <head> + <title> +@@ -32,6 +32,7 @@ my $cfg = 't/wordwrap.cfg'; + my $tidy = HTML::Tidy->new( {config_file => $cfg} ); + + my $result = $tidy->clean( $input ); ++$result =~ s/<!DOCTYPE ?\nhtml[^>]*>/<!DOCTYPE \nhtml>/; + my @result = split(/\n/, $result); + is_deeply( \@result, \@expected, 'Cleaned stuff looks like what we expected'); + diff --git a/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch b/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch new file mode 100644 index 0000000..48dda49 --- /dev/null +++ b/debian/patches/t-unicode.t-don-t-assert-that-the-DOCTYPE-is-preserv.patch @@ -0,0 +1,36 @@ +From: Simon McVittie <s...@debian.org> +Date: Fri, 22 Jul 2016 09:29:39 +0100 +Subject: t/unicode.t: don't assert that the DOCTYPE is preserved + +tidy-html5 currently doesn't preserve user-supplied DOCTYPEs +in output: <https://github.com/htacg/tidy-html5/issues/435> + +Signed-off-by: Simon McVittie <s...@debian.org> +--- + t/unicode.html | 2 +- + t/unicode.t | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/t/unicode.html b/t/unicode.html +index a90f83f..c8d1804 100644 +--- a/t/unicode.html ++++ b/t/unicode.html +@@ -1,4 +1,4 @@ +-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> ++<!DOCTYPE html> + <html> + <head> + <title>日本語のホムページ</title> +diff --git a/t/unicode.t b/t/unicode.t +index 9ca6370..679b48a 100644 +--- a/t/unicode.t ++++ b/t/unicode.t +@@ -53,7 +53,7 @@ subtest 'Try send bytes to clean method.' => sub { + }; + + __DATA__ +-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> ++<!DOCTYPE html> + <html> + <head> + <meta name="generator" content="Tidy"> diff --git a/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch b/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch new file mode 100644 index 0000000..e010fc9 --- /dev/null +++ b/debian/patches/t-use-a-pre-HTML5-DOCTYPE-to-get-warnings-about-unes.patch @@ -0,0 +1,128 @@ +From: Simon McVittie <s...@debian.org> +Date: Fri, 22 Jul 2016 09:27:26 +0100 +Subject: t: use a pre-HTML5 DOCTYPE to get warnings about unescaped ampersand + +HTML5 defines an ampersand followed by whitespace to be unambiguously +an ampersand, matching what browsers have always done in practice. +As a result, tidy-html5 does not warn about them when the doctype +is either HTML5 or missing (lack of a DOCTYPE is treated as HTML5, +on the basis that HTML5 is a closer match for what browsers actually +do than any previous standard). Discussion here: +<https://github.com/htacg/tidy-html5/issues/207> + +Adding the DOCTYPE throws off some of the line numbering, which needs +adjusting. + +t/ignore-text.t also seems to rely on the missing DOCTYPE provoking a +warning, which is obviously not going to happen now that we've +added one, to be able to verify that case-insensitive ignoring +can work. Add a new error so we can ignore that instead. + +Signed-off-by: Simon McVittie <s...@debian.org> +--- + t/ignore-text.t | 8 +++++--- + t/ignore.t | 10 +++++----- + t/levels.t | 10 +++++----- + 3 files changed, 15 insertions(+), 13 deletions(-) + +diff --git a/t/ignore-text.t b/t/ignore-text.t +index 9695a5a..a95e0e0 100644 +--- a/t/ignore-text.t ++++ b/t/ignore-text.t +@@ -10,8 +10,8 @@ use HTML::Tidy; + my $html = do { local $/; <DATA> }; + + my @expected_messages = split /\n/, q{ +-DATA (24:XX) Warning: unescaped & which should be written as & +-DATA (24:XX) Warning: unescaped & which should be written as & ++DATA (26:XX) Warning: unescaped & which should be written as & ++DATA (26:XX) Warning: unescaped & which should be written as & + }; + + chomp @expected_messages; +@@ -22,7 +22,7 @@ IGNORE_BOGOTAG: { + isa_ok( $tidy, 'HTML::Tidy' ); + + $tidy->ignore( text => qr/bogotag/ ); +- $tidy->ignore( text => [ qr/UNESCAPED/, qr/doctype/i ] ); ++ $tidy->ignore( text => [ qr/UNESCAPED/, qr/case-insensitive/i ] ); + # The qr/UNESCAPED/ should not ignore anything because there's no /i + my $rc = $tidy->parse( 'DATA', $html ); + ok( $rc, 'Parsed OK' ); +@@ -44,6 +44,7 @@ sub munge_returned { + } + } + __DATA__ ++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> + <HTML> + <HEAD> + <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1"> +@@ -67,6 +68,7 @@ DIV.TOC P { + </HEAD> + <BODY BGCOLOR="white"> + <BOGOTAG> ++<CASE-INSENSITIVE> + <IMG SRC="/pix/petdance-logo-400x312.gif" HEIGHT=312 WIDTH=400 ALT="Andy & Amy's Pet Supplies & Dance Instruction" ALIGN=RIGHT> + <DIV CLASS="TOC"> + <h2>Perl, Programming & Writing</h2> +diff --git a/t/ignore.t b/t/ignore.t +index 3991733..c0a1317 100644 +--- a/t/ignore.t ++++ b/t/ignore.t +@@ -10,16 +10,15 @@ use HTML::Tidy; + my $html = do { local $/ = undef; <DATA> }; + + my @expected_warnings = split /\n/, q{ +-- (1:1) Warning: missing <!DOCTYPE> declaration +-- (23:1) Warning: discarding unexpected <bogotag> +-- (24:XX) Warning: unescaped & which should be written as & +-- (24:XX) Warning: unescaped & which should be written as & ++- (24:1) Warning: discarding unexpected <bogotag> ++- (25:XX) Warning: unescaped & which should be written as & ++- (25:XX) Warning: unescaped & which should be written as & + }; + chomp @expected_warnings; + shift @expected_warnings; # First one's blank + + my @expected_errors = split /\n/, q{ +-- (23:1) Error: <bogotag> is not recognized! ++- (24:1) Error: <bogotag> is not recognized! + }; + chomp @expected_errors; + shift @expected_errors; # First one's blank +@@ -71,6 +70,7 @@ sub munge_returned { + } + } + __DATA__ ++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> + <HTML> + <HEAD> + <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1"> +diff --git a/t/levels.t b/t/levels.t +index 01aeb3b..2ee3162 100644 +--- a/t/levels.t ++++ b/t/levels.t +@@ -13,11 +13,10 @@ my $rc = $tidy->parse( '-', <DATA> ); + ok( $rc, 'Parsed OK' ); + + my @expected = split /\n/, q{ +-- (1:1) Warning: missing <!DOCTYPE> declaration +-- (23:1) Error: <bogotag> is not recognized! +-- (23:1) Warning: discarding unexpected <bogotag> +-- (24:XX) Warning: unescaped & which should be written as & +-- (24:XX) Warning: unescaped & which should be written as & ++- (24:1) Error: <bogotag> is not recognized! ++- (24:1) Warning: discarding unexpected <bogotag> ++- (25:XX) Warning: unescaped & which should be written as & ++- (25:XX) Warning: unescaped & which should be written as & + }; + chomp @expected; + shift @expected; # First one's blank +@@ -41,6 +40,7 @@ sub munge_returned { + } + + __DATA__ ++<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> + <HTML> + <HEAD> + <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1"> diff --git a/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch b/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch new file mode 100644 index 0000000..004b09f --- /dev/null +++ b/debian/patches/t-venus.t-adjust-expected-result-for-tidy-html5.patch @@ -0,0 +1,58 @@ +From: Simon McVittie <s...@debian.org> +Date: Fri, 22 Jul 2016 09:34:53 +0100 +Subject: t/venus.t: adjust expected result for tidy-html5 + +tidy-html5 allows arbitrary block content inside <address>, whereas +traditional tidy only allowed inline content. +Reference: <https://github.com/htacg/tidy-html5/issues/55> + +This change will break with traditional tidy, and it isn't clear +to me how to remain compatible with both. + +Signed-off-by: Simon McVittie <s...@debian.org> +--- + t/venus.t | 32 +++++++++++++++++++------------- + 1 file changed, 19 insertions(+), 13 deletions(-) + +diff --git a/t/venus.t b/t/venus.t +index 41ee597..bd94d4b 100755 +--- a/t/venus.t ++++ b/t/venus.t +@@ -72,18 +72,24 @@ __DATA__ + <a href="../../General/Credits.html">Credits</a> | + <a href="../../General/Feedback.html">Feedback</a> |</h4> + </center> +- <center> +- <p> +- <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" /> +- </p> +- </center> +- <div align="center"></div> +- <center> +- <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address> +- <address> </address> +- <address>Authors: Twin Groves Museums in the Classroom Team,</address> +- <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address> +- </center> +- <center>Created: 27 June 1998- Updated: 6 October 2003</center> ++ <address> ++ <center> ++ <p> ++ <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" /> ++ </p> ++ </center> ++ <div align="center"></div> ++ <address> ++ <center> ++ <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address> ++ <address> </address> ++ <address>Authors: Twin Groves Museums in the Classroom Team,</address> ++ <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address> ++ </center> ++ </address> ++ <address> ++ <center>Created: 27 June 1998- Updated: 6 October 2003</center> ++ </address> ++ </address> + </body> + </html> -- 2.8.1
>From 357626ff2e00753b2980f86e3884eed3b092c7f4 Mon Sep 17 00:00:00 2001 From: Simon McVittie <s...@debian.org> Date: Fri, 22 Jul 2016 10:38:10 +0100 Subject: [PATCH 4/4] d/p/tidy-not-tidyp.patch: alter to support either tidyp or tidy-html5 This reinstates the (now somewhat misleadingly named) _tidyp_version function by using tidy-html5's tidyLibraryVersion(). This is hopefully more palatable to upstream. d/p/remove-tidy_version.patch: drop, no longer needed. --- debian/changelog | 5 + debian/patches/fix-error-message-in-webtidy | 2 +- ...tidy-html5-s-differently-formatted-summar.patch | 4 +- debian/patches/remove-tidy_version.patch | 106 ------------------- debian/patches/series | 1 - debian/patches/tidy-not-tidyp.patch | 115 +++++++++++++++------ 6 files changed, 94 insertions(+), 139 deletions(-) delete mode 100644 debian/patches/remove-tidy_version.patch diff --git a/debian/changelog b/debian/changelog index 2101180..3eb2bbe 100644 --- a/debian/changelog +++ b/debian/changelog @@ -24,6 +24,11 @@ libhtml-tidy-perl (1.56-2) UNRELEASED; urgency=medium d/p/t-venus.t-adjust-expected-result-for-tidy-html5.patch: add patches to make the tests pass with tidy-html5 providing libtidy (Closes: #829409) + * d/p/tidy-not-tidyp.patch: alter to support either tidyp or + tidy-html5, reinstating the (now somewhat misleadingly named) + _tidyp_version function by using tidy-html5's tidyLibraryVersion(). + This is hopefully more palatable to upstream. + - d/p/remove-tidy_version.patch: drop -- gregor herrmann <gre...@debian.org> Thu, 27 Feb 2014 22:36:29 +0100 diff --git a/debian/patches/fix-error-message-in-webtidy b/debian/patches/fix-error-message-in-webtidy index 0efb8d3..1f808ce 100644 --- a/debian/patches/fix-error-message-in-webtidy +++ b/debian/patches/fix-error-message-in-webtidy @@ -8,7 +8,7 @@ Forwarded: not-needed 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/webtidy b/bin/webtidy -index be57814..4a92423 100755 +index dc1a979..5338d09 100755 --- a/bin/webtidy +++ b/bin/webtidy @@ -29,7 +29,7 @@ for my $url ( @ARGV ) { diff --git a/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch index 04801b2..31c6042 100644 --- a/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch +++ b/debian/patches/lib-ignore-tidy-html5-s-differently-formatted-summar.patch @@ -8,10 +8,10 @@ Signed-off-by: Simon McVittie <s...@debian.org> 1 file changed, 4 insertions(+) diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm -index a172ea2..aa16c79 100644 +index e19a33d..e527f94 100644 --- a/lib/HTML/Tidy.pm +++ b/lib/HTML/Tidy.pm -@@ -265,6 +265,10 @@ sub _parse_errors { +@@ -266,6 +266,10 @@ sub _parse_errors { # Summary line we don't want } diff --git a/debian/patches/remove-tidy_version.patch b/debian/patches/remove-tidy_version.patch deleted file mode 100644 index 7b553f2..0000000 --- a/debian/patches/remove-tidy_version.patch +++ /dev/null @@ -1,106 +0,0 @@ -From: gregor herrmann <gre...@debian.org> -Date: Sat, 20 Feb 2010 09:50:30 -0500 -Subject: remove tidyVersion - -It is a special call to Andy Lester's modified version of libtidy. Also -remove the corresponding call from Perl, the documentation, and the tests. - -Reviewed-by: Jonathan Yu <jaw...@cpan.org> -Origin: vendor -Forwarded: not-needed ---- - Tidy.xs | 11 ----------- - bin/webtidy | 2 +- - lib/HTML/Tidy.pm | 17 ----------------- - t/00-load.t | 2 +- - t/version.t | 5 +++++ - 5 files changed, 7 insertions(+), 30 deletions(-) - -diff --git a/Tidy.xs b/Tidy.xs -index 2238b0b..630b550 100644 ---- a/Tidy.xs -+++ b/Tidy.xs -@@ -189,14 +189,3 @@ _tidy_clean(input, configfile, tidy_options) - if ( rc < 0 ) { - XSRETURN_UNDEF; - } -- -- --SV* --_tidyp_version() -- PREINIT: -- const char* version; -- CODE: -- version = tidyVersion(); -- RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */ -- OUTPUT: -- RETVAL -diff --git a/bin/webtidy b/bin/webtidy -index dc1a979..be57814 100755 ---- a/bin/webtidy -+++ b/bin/webtidy -@@ -19,7 +19,7 @@ GetOptions( - ) or $help = 1; - - if ( !@ARGV || $help ) { -- print "webtidy v$HTML::Tidy::VERSION using tidyp v" . HTML::Tidy::tidyp_version() . "\n"; -+ print "webtidy v$HTML::Tidy::VERSION \n"; - print <DATA>; - exit 1; - } -diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm -index 32a5005..a172ea2 100644 ---- a/lib/HTML/Tidy.pm -+++ b/lib/HTML/Tidy.pm -@@ -341,23 +341,6 @@ sub _is_keeper { - return 1; - } - --=head2 tidyp_version() -- --=head2 libtidyp_version() -- --Returns the version of the underling tidyp library. -- --=cut -- --# backcompat --sub libtidyp_version { return shift->tidyp_version } -- --sub tidyp_version { -- my $version_str = _tidyp_version(); -- -- return $version_str; --} -- - require XSLoader; - XSLoader::load('HTML::Tidy', $VERSION); - -diff --git a/t/00-load.t b/t/00-load.t -index b40452d..d10902e 100644 ---- a/t/00-load.t -+++ b/t/00-load.t -@@ -8,5 +8,5 @@ use Test::More tests => 1; - use HTML::Tidy; - use HTML::Tidy::Message; - --diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, Perl $]; tidyp " . HTML::Tidy->tidyp_version() ); -+diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, Perl $];" ); - pass( 'Modules loaded' ); -diff --git a/t/version.t b/t/version.t -index f225e3e..b05a7da 100644 ---- a/t/version.t -+++ b/t/version.t -@@ -7,7 +7,12 @@ use Test::More tests => 4; - - use HTML::Tidy; - -+SKIP: { -+ skip 'libtidy_version has been removed in Debian', 4; -+ - for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) { - like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' ); - cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' ); - } -+ -+} diff --git a/debian/patches/series b/debian/patches/series index 0fa4da0..51603c6 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,4 +1,3 @@ -remove-tidy_version.patch tidy-not-tidyp.patch lib-ignore-tidy-html5-s-differently-formatted-summar.patch t-allow-tidy-html5-as-generator.patch diff --git a/debian/patches/tidy-not-tidyp.patch b/debian/patches/tidy-not-tidyp.patch index 9875169..554c6a0 100644 --- a/debian/patches/tidy-not-tidyp.patch +++ b/debian/patches/tidy-not-tidyp.patch @@ -1,47 +1,104 @@ -From: Florian Schlichting <f...@debian.org> -Date: Fri, 4 Oct 2013 23:39:39 +0200 -Subject: look for and use tidy.h in /usr/include/tidy (from libtidy-dev) +From: Simon McVittie <s...@debian.org> +Date: Fri, 22 Jul 2016 10:33:50 +0100 +Subject: Look for tidy if tidyp is not found -Forwarded: not-needed +tidy-html5 is an actively-maintained, HTML5-supporting variant of +the tidy library from which tidyp was forked. + +Based on Debian-specific patches by Florian Schlichting and +gregor herrmann, which unconditionally switched from tidyp to tidy. --- - Makefile.PL | 6 +++--- - Tidy.xs | 2 +- - 2 files changed, 4 insertions(+), 4 deletions(-) + Makefile.PL | 6 ++++++ + Tidy.xs | 15 +++++++++++++-- + lib/HTML/Tidy.pm | 7 +++++++ + t/version.t | 2 +- + 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/Makefile.PL b/Makefile.PL -index 1bca2ef..6ad1b7f 100644 +index 1bca2ef..e3f64a0 100644 --- a/Makefile.PL +++ b/Makefile.PL -@@ -10,7 +10,7 @@ use ExtUtils::Liblist; - use Config; - - my $libs = '-ltidyp'; --my $inc = "-I. -I/usr/include/tidyp -I/usr/local/include/tidyp -I$Config{usrinc}/tidyp"; -+my $inc = "-I. -I/usr/include/tidy -I/usr/local/include/tidy -I$Config{usrinc}/tidy"; - - eval { require Alien::Tidyp; }; - -@@ -20,8 +20,8 @@ if ( !$@ ) { - $inc = Alien::Tidyp->config('INC'); - } - else { -- print "Alien::Tidyp not found. Looking for for tidyp on your system.\n"; -- my @vars = ExtUtils::Liblist->ext( '-L/usr/lib -L/usr/local/lib -ltidyp', 0, 1 ); -+ print "Alien::Tidyp not found. Looking for for tidy on your system.\n"; -+ my @vars = ExtUtils::Liblist->ext( '-L/usr/lib -L/usr/local/lib -ltidy', 0, 1 ); +@@ -25,6 +25,12 @@ else { $libs = $vars[2]; if ( !$libs ) { ++ @vars = ExtUtils::Liblist->ext( '-L/usr/lib -L/usr/local/lib -ltidy', 0, 1 ); ++ $libs = $vars[2]; ++ $inc = "-I. -I/usr/include/tidy -I/usr/local/include/tidy -I$Config{usrinc}/tidy -DWITH_TIDY"; ++ } ++ ++ if ( !$libs ) { + $libs = '-ltidyp'; + print <<'EOF'; + diff --git a/Tidy.xs b/Tidy.xs -index 630b550..a8bbe82 100644 +index 2238b0b..0b86116 100644 --- a/Tidy.xs +++ b/Tidy.xs -@@ -2,7 +2,7 @@ +@@ -2,8 +2,14 @@ #include "perl.h" #include "XSUB.h" -#include <tidyp.h> -+#include <tidy.h> - #include <buffio.h> +-#include <buffio.h> ++#ifdef WITH_TIDY ++# include <tidy.h> ++# include <tidybuffio.h> ++#else ++# include <tidyp.h> ++# include <buffio.h> ++#endif ++ #include <stdio.h> #include <errno.h> + +@@ -196,7 +202,12 @@ _tidyp_version() + PREINIT: + const char* version; + CODE: ++#ifdef WITH_TIDY ++ /* tidy-html5 is required */ ++ version = tidyLibraryVersion(); ++#else + version = tidyVersion(); ++#endif + RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */ + OUTPUT: + RETVAL +diff --git a/lib/HTML/Tidy.pm b/lib/HTML/Tidy.pm +index 32a5005..e19a33d 100644 +--- a/lib/HTML/Tidy.pm ++++ b/lib/HTML/Tidy.pm +@@ -4,6 +4,7 @@ use 5.008; + use strict; + use warnings; + use Carp (); ++use version 0.77 (); + + use HTML::Tidy::Message; + +@@ -355,6 +356,12 @@ sub libtidyp_version { return shift->tidyp_version } + sub tidyp_version { + my $version_str = _tidyp_version(); + ++ # Convert tidy-html5 versions to the 5.002001 form so they work ++ # with naive numeric comparison ++ if ($version_str !~ m/^0\./) { ++ $version_str = version->parse("v$version_str")->numify; ++ } ++ + return $version_str; + } + +diff --git a/t/version.t b/t/version.t +index f225e3e..c2528cd 100644 +--- a/t/version.t ++++ b/t/version.t +@@ -8,6 +8,6 @@ use Test::More tests => 4; + use HTML::Tidy; + + for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) { +- like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' ); ++ like( $version_string, qr/^\d\.\d+$/, 'Valid version string' ); + cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' ); + } -- 2.8.1