The following commit has been merged in the master branch: commit c8b97181e1b78508c9264a559368b9dca6540f77 Author: Niels Thykier <ni...@thykier.net> Date: Sat Apr 13 11:00:23 2013 +0200
c/files: Check for files in non-UTF-8 encoding Signed-off-by: Niels Thykier <ni...@thykier.net> diff --git a/checks/files b/checks/files index 5ba18c8..c77e07b 100644 --- a/checks/files +++ b/checks/files @@ -26,7 +26,7 @@ use File::Basename; use Lintian::Data; use Lintian::Tags qw(tag); -use Lintian::Util qw(fail open_gz); +use Lintian::Util qw(fail is_string_utf8_encoded open_gz); my $FONT_PACKAGES = Lintian::Data->new ('files/fonts', qr/\s++/); my $TRIPLETS = Lintian::Data->new ('files/triplets', qr/\s++/); @@ -242,7 +242,6 @@ if (!$is_dummy) { # Read package contents... foreach my $file ($info->sorted_index) { - next if $file eq ''; my $index_info = $info->index ($file); my $owner = $index_info->owner . '/' . $index_info->group; my $operm = $index_info->operm; @@ -250,6 +249,10 @@ foreach my $file ($info->sorted_index) { $arch_dep_files = 1 if $file !~ m,^usr/share/,o && $file ne 'usr/'; + if (!is_string_utf8_encoded($file)) { + tag 'file-name-is-not-valid-UTF-8', $file; + } + if ($index_info->is_hardlink) { my $link_target_dir = $link; $link_target_dir =~ s,[^/]*$,,; diff --git a/checks/files.desc b/checks/files.desc index 0e47660..12d571a 100644 --- a/checks/files.desc +++ b/checks/files.desc @@ -1304,3 +1304,13 @@ Tag: dir-or-file-in-build-tree Severity: serious Certainty: possible Info: Your package install file in our build tree. + +Tag: file-name-is-not-valid-UTF-8 +Severity: normal +Certainty: certain +Ref: #701081 +Info: The file name does not appear to be valid UTF-8. This may become + a requirement in future Policy versions. + . + Note that Lintian may be unable to display the filename accurately. + Unprintable characters may have been replaced. diff --git a/debian/changelog b/debian/changelog index 8205368..c72abfa 100644 --- a/debian/changelog +++ b/debian/changelog @@ -11,6 +11,7 @@ lintian (2.5.12) UNRELEASED; urgency=low - dir-or-file-in-build-tree - dir-or-file-in-etc-opt - dir-or-file-in-home + - file-name-is-not-valid-UTF-8 - font-adobe-copyrighted-fragment-no-credit - font-package-not-multi-arch-foreign - illegal-runtime-test-name @@ -94,6 +95,9 @@ lintian (2.5.12) UNRELEASED; urgency=low spotting it. (Closes: #699452) + [NT] Add patch from Bastien Roucariès to check for another adobe font license issues. (Closes: #705175) + + [NT] Test for use of file names that are contain invalid + UTF-8 byte sequences. Thanks to Helmut Grohne for the + suggestion. (Closes: #704446) * checks/init.d: + [NT] Fix regression where Lintian would not properly match init.d passed to update-rc.d. Thanks to Michael Meskes for diff --git a/lib/Lintian/Util.pm b/lib/Lintian/Util.pm index df7c093..9c9fd4f 100644 --- a/lib/Lintian/Util.pm +++ b/lib/Lintian/Util.pm @@ -57,6 +57,7 @@ BEGIN { get_file_checksum slurp_entire_file file_is_encoded_in_non_utf8 + is_string_utf8_encoded fail strip lstrip @@ -712,6 +713,29 @@ sub get_file_checksum { return $digest->hexdigest; } +=item is_string_utf8_encoded(STRING) + +Returns a truth value if STRING can be decoded as valid UTF-8. + +=cut + +sub is_string_utf8_encoded { + my ($str) = @_; + if ($str =~ m,\e[-!"\$%()*+./],) { + # ISO-2022 + return 0; + } + eval { + Encode::decode('UTF-8', $str, Encode::FB_CROAK); + }; + if ($@) { + # fail + return 0; + } + # pass + return 1; +} + =item file_is_encoded_in_non_utf8 (...) Undocumented @@ -726,15 +750,7 @@ sub file_is_encoded_in_non_utf8 { or fail("failure while checking encoding of $file for $type package $pkg"); my $line = 0; while (<$fd>) { - if (m,\e[-!"\$%()*+./],) { - # ISO-2022 - $line = $.; - last; - } - eval { - $_ = Encode::decode('UTF-8', $_, Encode::FB_CROAK); - }; - if ($@) { + if (!is_string_utf8_encoded($_)) { $line = $.; last; } diff --git a/t/tests/files-general/debian/debian/rules b/t/tests/files-general/debian/debian/rules index 1806e1d..e5e7c6e 100755 --- a/t/tests/files-general/debian/debian/rules +++ b/t/tests/files-general/debian/debian/rules @@ -13,6 +13,10 @@ override_dh_install: touch $(tmp)/usr/share/foo/'*' touch $(tmp)/usr/share/foo/'ws ' touch $(tmp)/usr/share/foo/.nfs-fake-tmpfile + # If the following line gets messed up, it can be + # restored with something like: + # sed -i 's/@FILE@/bokm\xe5l/' + touch $(tmp)/usr/share/foo/bokm�l touch $(tmp)/var/catman/do override_dh_fixperms: diff --git a/t/tests/files-general/desc b/t/tests/files-general/desc index a39c520..51b701a 100644 --- a/t/tests/files-general/desc +++ b/t/tests/files-general/desc @@ -14,6 +14,7 @@ Test-For: executable-manpage file-in-unusual-dir file-name-ends-in-whitespace + file-name-is-not-valid-UTF-8 global-data-in-games-directory icon-size-and-directory-name-mismatch lengthy-symlink diff --git a/t/tests/files-general/tags b/t/tests/files-general/tags index df6f91f..ecdf64c 100644 --- a/t/tests/files-general/tags +++ b/t/tests/files-general/tags @@ -22,6 +22,7 @@ W: files-general: executable-not-elf-or-script usr/share/man/man5/foo.5.gz W: files-general: file-in-unusual-dir new-top-level-dir/file-in-new-top-level-dir W: files-general: file-in-unusual-dir var/catman/do W: files-general: file-name-ends-in-whitespace usr/share/foo/ws +W: files-general: file-name-is-not-valid-UTF-8 usr/share/foo/bokm?l W: files-general: icon-size-and-directory-name-mismatch usr/share/apps/lintian/icons/hicolor/22x22/lintian-16x16.png 16x16 W: files-general: icon-size-and-directory-name-mismatch usr/share/apps/lintian/icons/hicolor/64x64/lintian-64x64.png 22x22 W: files-general: icon-size-and-directory-name-mismatch usr/share/icons/hicolor/22x22/apps/lintian-16x16.png 16x16 diff --git a/testset/tags.filenames b/testset/tags.filenames index 0a45f1c..7fe8258 100644 --- a/testset/tags.filenames +++ b/testset/tags.filenames @@ -99,6 +99,8 @@ W: filenames: file-in-unusual-dir files/svk-commitsEr9P.tmp W: filenames: file-in-unusual-dir files/svn-commit.tmp W: filenames: file-in-usr-lib-sgml usr/lib/sgml/package W: filenames: file-name-ends-in-whitespace files/'\ +W: filenames: file-name-is-not-valid-UTF-8 usr/share/doc/filenames/bokm?l +W: filenames: file-name-is-not-valid-UTF-8 usr/share/doc/filenames/bokm\?l W: filenames: gz-file-not-gzip usr/share/filenames/prototype.js.gz W: filenames: macos-ds-store-file-in-package usr/share/doc/filenames/.DS_Store W: filenames: macos-resource-fork-file-in-package usr/share/doc/filenames/._NEWS.Debian -- Debian package checker -- To UNSUBSCRIBE, email to debian-lint-maint-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org Archive: http://lists.debian.org/e1uqwlq-0000tp...@vasks.debian.org