[SCM] Debian package checker branch, master, updated. 2.5.11-247-gfed3588

2013-04-13 Thread Niels Thykier
The following commit has been merged in the master branch:
commit fed3588cea6f8ae3366ee4a19d25824801cf0e5a
Author: Niels Thykier ni...@thykier.net
Date:   Sat Apr 13 11:01:31 2013 +0200

t: Add missing tag in Test-For field

Signed-off-by: Niels Thykier ni...@thykier.net

diff --git a/t/tests/files-adobe-font/desc b/t/tests/files-adobe-font/desc
index adb0c13..08f339b 100644
--- a/t/tests/files-adobe-font/desc
+++ b/t/tests/files-adobe-font/desc
@@ -2,4 +2,6 @@ Testname: files-adobe-font
 Sequence: 6000
 Version: 1.0
 Description: Test for adobe font license issues
-Test-For: font-adobe-copyrighted-fragment
+Test-For:
+ font-adobe-copyrighted-fragment
+ font-adobe-copyrighted-fragment-no-credit

-- 
Debian package checker


-- 
To UNSUBSCRIBE, email to debian-lint-maint-requ...@lists.debian.org
with a subject of unsubscribe. Trouble? Contact listmas...@lists.debian.org
Archive: http://lists.debian.org/e1uqwlr-ta...@vasks.debian.org



[SCM] Debian package checker branch, master, updated. 2.5.11-247-gfed3588

2013-04-13 Thread Niels Thykier
The following commit has been merged in the master branch:
commit c8b97181e1b78508c9264a559368b9dca6540f77
Author: Niels Thykier ni...@thykier.net
Date:   Sat Apr 13 11:00:23 2013 +0200

c/files: Check for files in non-UTF-8 encoding

Signed-off-by: Niels Thykier ni...@thykier.net

diff --git a/checks/files b/checks/files
index 5ba18c8..c77e07b 100644
--- a/checks/files
+++ b/checks/files
@@ -26,7 +26,7 @@ use File::Basename;
 
 use Lintian::Data;
 use Lintian::Tags qw(tag);
-use Lintian::Util qw(fail open_gz);
+use Lintian::Util qw(fail is_string_utf8_encoded open_gz);
 
 my $FONT_PACKAGES = Lintian::Data-new ('files/fonts', qr/\s++/);
 my $TRIPLETS = Lintian::Data-new ('files/triplets', qr/\s++/);
@@ -242,7 +242,6 @@ if (!$is_dummy) {
 
 # Read package contents...
 foreach my $file ($info-sorted_index) {
-next if $file eq '';
 my $index_info = $info-index ($file);
 my $owner = $index_info-owner . '/' . $index_info-group;
 my $operm = $index_info-operm;
@@ -250,6 +249,10 @@ foreach my $file ($info-sorted_index) {
 
 $arch_dep_files = 1 if $file !~ m,^usr/share/,o  $file ne 'usr/';
 
+if (!is_string_utf8_encoded($file)) {
+tag 'file-name-is-not-valid-UTF-8', $file;
+}
+
 if ($index_info-is_hardlink) {
 my $link_target_dir = $link;
 $link_target_dir =~ s,[^/]*$,,;
diff --git a/checks/files.desc b/checks/files.desc
index 0e47660..12d571a 100644
--- a/checks/files.desc
+++ b/checks/files.desc
@@ -1304,3 +1304,13 @@ Tag: dir-or-file-in-build-tree
 Severity: serious
 Certainty: possible
 Info: Your package install file in our build tree.
+
+Tag: file-name-is-not-valid-UTF-8
+Severity: normal
+Certainty: certain
+Ref: #701081
+Info: The file name does not appear to be valid UTF-8.  This may become
+ a requirement in future Policy versions.
+ .
+ Note that Lintian may be unable to display the filename accurately.
+ Unprintable characters may have been replaced.
diff --git a/debian/changelog b/debian/changelog
index 8205368..c72abfa 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -11,6 +11,7 @@ lintian (2.5.12) UNRELEASED; urgency=low
   - dir-or-file-in-build-tree
   - dir-or-file-in-etc-opt
   - dir-or-file-in-home
+  - file-name-is-not-valid-UTF-8
   - font-adobe-copyrighted-fragment-no-credit
   - font-package-not-multi-arch-foreign
   - illegal-runtime-test-name
@@ -94,6 +95,9 @@ lintian (2.5.12) UNRELEASED; urgency=low
   spotting it.  (Closes: #699452)
 + [NT] Add patch from Bastien Roucariès to check for another
   adobe font license issues.  (Closes: #705175)
++ [NT] Test for use of file names that are contain invalid
+  UTF-8 byte sequences.  Thanks to Helmut Grohne for the
+  suggestion.  (Closes: #704446)
   * checks/init.d:
 + [NT] Fix regression where Lintian would not properly match
   init.d passed to update-rc.d.  Thanks to Michael Meskes for
diff --git a/lib/Lintian/Util.pm b/lib/Lintian/Util.pm
index df7c093..9c9fd4f 100644
--- a/lib/Lintian/Util.pm
+++ b/lib/Lintian/Util.pm
@@ -57,6 +57,7 @@ BEGIN {
  get_file_checksum
  slurp_entire_file
  file_is_encoded_in_non_utf8
+ is_string_utf8_encoded
  fail
  strip
  lstrip
@@ -712,6 +713,29 @@ sub get_file_checksum {
 return $digest-hexdigest;
 }
 
+=item is_string_utf8_encoded(STRING)
+
+Returns a truth value if STRING can be decoded as valid UTF-8.
+
+=cut
+
+sub is_string_utf8_encoded {
+my ($str) = @_;
+if ($str =~ m,\e[-!\$%()*+./],) {
+# ISO-2022
+return 0;
+}
+eval {
+Encode::decode('UTF-8', $str, Encode::FB_CROAK);
+};
+if ($@) {
+# fail
+return 0;
+}
+# pass
+return 1;
+}
+
 =item file_is_encoded_in_non_utf8 (...)
 
 Undocumented
@@ -726,15 +750,7 @@ sub file_is_encoded_in_non_utf8 {
 or fail(failure while checking encoding of $file for $type package 
$pkg);
 my $line = 0;
 while ($fd) {
-if (m,\e[-!\$%()*+./],) {
-# ISO-2022
-$line = $.;
-last;
-}
-eval {
-$_ = Encode::decode('UTF-8', $_, Encode::FB_CROAK);
-};
-if ($@) {
+if (!is_string_utf8_encoded($_)) {
 $line = $.;
 last;
 }
diff --git a/t/tests/files-general/debian/debian/rules 
b/t/tests/files-general/debian/debian/rules
index 1806e1d..e5e7c6e 100755
--- a/t/tests/files-general/debian/debian/rules
+++ b/t/tests/files-general/debian/debian/rules
@@ -13,6 +13,10 @@ override_dh_install:
touch $(tmp)/usr/share/foo/'*'
touch $(tmp)/usr/share/foo/'ws '
touch $(tmp)/usr/share/foo/.nfs-fake-tmpfile
+   # If the following line gets messed up, it can be
+   # restored with something like:
+   #   sed -i 's/@FILE@/bokm\xe5l/'
+   touch $(tmp)/usr/share/foo/bokm�l
touch