Package: release.debian.org Severity: normal User: release.debian....@packages.debian.org Usertags: unblock
Please unblock package pg-checksums It fixes #928232 which is a rather serious problem for a program that is supposed to find data corruption. unblock pg-checksums/0.8-3 -- System Information: Debian Release: 8.10 APT prefers oldstable APT policy: (500, 'oldstable') Architecture: i386 (i686) Kernel: Linux 3.16.0-4-686-pae (SMP w/1 CPU core) Locale: LANG=de_DE.UTF-8, LC_CTYPE=de_DE.UTF-8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/dash Init: systemd (via /run/systemd/system)
diff -Nru pg-checksums-0.8/debian/changelog pg-checksums-0.8/debian/changelog --- pg-checksums-0.8/debian/changelog 2019-03-09 16:14:40.000000000 +0100 +++ pg-checksums-0.8/debian/changelog 2019-04-30 14:55:21.000000000 +0200 @@ -1,3 +1,16 @@ +pg-checksums (0.8-3) unstable; urgency=medium + + [ Christoph Berg ] + * Remove myself from uploaders. + + [ Michael Banck ] + * debian/patches/zero_random_pageheader.patch: New patch, fixes false + negatives for all-zero or random pageheaders which were not detected as + checksum failures previously. Adapted from upstream commit 043f003f + (Closes: #928232). + + -- Michael Banck <michael.ba...@credativ.de> Tue, 30 Apr 2019 14:55:21 +0200 + pg-checksums (0.8-2) unstable; urgency=medium * debian/patches/fsync_pgdata.patch: New patch, backports the fsync_pgdata() diff -Nru pg-checksums-0.8/debian/control pg-checksums-0.8/debian/control --- pg-checksums-0.8/debian/control 2019-02-26 21:39:57.000000000 +0100 +++ pg-checksums-0.8/debian/control 2019-04-30 14:44:14.000000000 +0200 @@ -3,7 +3,6 @@ Priority: optional Maintainer: Debian PostgreSQL Maintainers <team+postgre...@tracker.debian.org> Uploaders: Michael Banck <michael.ba...@credativ.de>, - Christoph Berg <m...@debian.org>, Build-Depends: debhelper (>= 9), postgresql-server-dev-all (>= 153~), diff -Nru pg-checksums-0.8/debian/control.in pg-checksums-0.8/debian/control.in --- pg-checksums-0.8/debian/control.in 2018-10-12 14:04:31.000000000 +0200 +++ pg-checksums-0.8/debian/control.in 2019-04-15 23:18:34.000000000 +0200 @@ -3,7 +3,6 @@ Priority: optional Maintainer: Debian PostgreSQL Maintainers <team+postgre...@tracker.debian.org> Uploaders: Michael Banck <michael.ba...@credativ.de>, - Christoph Berg <m...@debian.org>, Build-Depends: debhelper (>= 9), postgresql-server-dev-all (>= 153~), diff -Nru pg-checksums-0.8/debian/patches/series pg-checksums-0.8/debian/patches/series --- pg-checksums-0.8/debian/patches/series 2019-02-26 21:19:49.000000000 +0100 +++ pg-checksums-0.8/debian/patches/series 2019-04-23 17:14:59.000000000 +0200 @@ -1,2 +1,3 @@ postgresnode.patch fsync_pgdata.patch +zero_random_pageheader.patch diff -Nru pg-checksums-0.8/debian/patches/zero_random_pageheader.patch pg-checksums-0.8/debian/patches/zero_random_pageheader.patch --- pg-checksums-0.8/debian/patches/zero_random_pageheader.patch 1970-01-01 01:00:00.000000000 +0100 +++ pg-checksums-0.8/debian/patches/zero_random_pageheader.patch 2019-04-30 13:40:05.000000000 +0200 @@ -0,0 +1,355 @@ +commit 043f003fc1fc1cdf8a370971edf6e9323034b15d +Author: Michael Banck <mba...@debian.org> +Date: Tue Apr 23 16:03:16 2019 +0200 + + Fix checksum failure detection for all-zero or random pageheaders. + + Previously, a zeroed-out pageheader would lead to that page getting skipped as + being new, even if other parts of the page have (corrupted) data in it. Add an + additional check that the whole page shall be zero and report a checksum + failure if that is not the case. + + Also, random data in the pageheader would very likely make the page's LSN so + large that it would be considered newer than the checkpoint LSN. As we are not + connected to the server, we cannot inquire the current insert record pointer to + check against or lock the page, so we (i) advance the checkpoint LSN to the + current pg_control value and (ii) demand that the upper 32 bits of the LSN are + identical between the checkpoint LSN and the page's LSN in order to skip the + page rather than report a checksum failure. + + Additional TAP tests are added for both cases as well. + +Index: pg-checksums/pg_checksums.c +=================================================================== +--- pg-checksums.orig/pg_checksums.c ++++ pg-checksums/pg_checksums.c +@@ -77,6 +77,8 @@ static bool deactivate = false; + static bool show_progress = false; + static bool online = false; + ++static char *DataDir = NULL; ++ + static const char *progname; + + /* +@@ -181,6 +183,28 @@ isRelFileName(const char *fn) + return true; + } + ++ static void ++update_checkpoint_lsn(void) ++{ ++ bool crc_ok; ++ ++#if PG_VERSION_NUM >= 100000 ++ ControlFile = get_controlfile(DataDir, progname, &crc_ok); ++ if (!crc_ok) ++ { ++ fprintf(stderr, _("%s: pg_control CRC value is incorrect\n"), progname); ++ exit(1); ++ } ++#elif PG_VERSION_NUM >= 90600 ++ ControlFile = get_controlfile(DataDir, progname); ++#else ++ ControlFile = getControlFile(DataDir); ++#endif ++ ++ /* Update checkpointLSN with the current value */ ++ checkpointLSN = ControlFile->checkPoint; ++} ++ + static void + toggle_progress_report(int signo, + siginfo_t * siginfo, +@@ -252,6 +276,8 @@ scan_file(const char *fn, BlockNumber se + int f; + BlockNumber blockno; + bool block_retry = false; ++ bool all_zeroes; ++ size_t *pagebytes; + + f = open(fn, O_RDWR | PG_BINARY, 0); + if (f < 0) +@@ -273,6 +299,7 @@ scan_file(const char *fn, BlockNumber se + { + uint16 csum; + int r = read(f, buf.data, BLCKSZ); ++ int i; + + if (debug && block_retry) + fprintf(stderr, _("%s: retrying block %d in file \"%s\"\n"), +@@ -321,18 +348,38 @@ scan_file(const char *fn, BlockNumber se + continue; + } + ++ blocks++; ++ + /* New pages have no checksum yet */ + if (PageIsNew(header)) + { +- if (debug && block_retry) +- fprintf(stderr, _("%s: block %d in file \"%s\" is new, ignoring\n"), +- progname, blockno, fn); +- skippedblocks++; ++ /* Check for an all-zeroes page */ ++ all_zeroes = true; ++ pagebytes = (size_t *) buf.data; ++ for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++) ++ { ++ if (pagebytes[i] != 0) ++ { ++ all_zeroes = false; ++ break; ++ } ++ } ++ if (!all_zeroes) ++ { ++ fprintf(stderr, _("%s: checksum verification failed in file \"%s\", block %u: pd_upper is zero but block is not all-zero\n"), ++ progname, fn, blockno); ++ badblocks++; ++ } ++ else ++ { ++ if (debug && block_retry) ++ fprintf(stderr, _("%s: block %d in file \"%s\" is new, ignoring\n"), ++ progname, blockno, fn); ++ skippedblocks++; ++ } + continue; + } + +- blocks++; +- + csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE); + current_size += r; + +@@ -367,23 +414,34 @@ scan_file(const char *fn, BlockNumber se + + /* Reset loop to validate the block again */ + blockno--; ++ blocks--; + current_size -= r; + ++ /* ++ * Update the checkpoint LSN now. If we get a failure on ++ * re-read, we would need to do this anyway, and doing it ++ * now lowers the probability that we see the same torn ++ * page on re-read. ++ */ ++ update_checkpoint_lsn(); ++ + continue; + } + + /* + * The checksum verification failed on retry as well. Check if + * the page has been modified since the checkpoint and skip it +- * in this case. ++ * in this case. As a sanity check, demand that the upper 32 ++ * bits of the LSN are identical in order to skip as a guard ++ * against a corrupted LSN in the pageheader. + */ +- if (PageGetLSN(buf.data) > checkpointLSN) ++ if ((PageGetLSN(buf.data) > checkpointLSN) && ++ (PageGetLSN(buf.data) >> 32 == checkpointLSN >> 32)) + { + if (debug) + fprintf(stderr, _("%s: block %d in file \"%s\" with LSN %X/%X is newer than checkpoint LSN %X/%X, ignoring\n"), + progname, blockno, fn, (uint32) (PageGetLSN(buf.data) >> 32), (uint32) PageGetLSN(buf.data), (uint32) (checkpointLSN >> 32), (uint32) checkpointLSN); + block_retry = false; +- blocks--; + skippedblocks++; + continue; + } +@@ -897,7 +955,6 @@ main(int argc, char *argv[]) + {NULL, 0, NULL, 0} + }; + +- char *DataDir = NULL; + int c; + int option_index; + #if PG_VERSION_NUM >= 100000 +Index: pg-checksums/t/001_checksums.pl +=================================================================== +--- pg-checksums.orig/t/001_checksums.pl ++++ pg-checksums/t/001_checksums.pl +@@ -6,7 +6,7 @@ use Cwd; + use Config; + use PostgresNode; + use TestLib; +-use Test::More tests => 49; ++use Test::More tests => 97; + + program_help_ok('pg_checksums'); + program_version_ok('pg_checksums'); +@@ -14,8 +14,8 @@ program_options_handling_ok('pg_checksum + + my $tempdir = TestLib::tempdir; + +-# Initialize node +-my $node = get_new_node('main'); ++# Initialize node with checksums disabled. ++my $node = get_new_node('node_checksum'); + $node->init; + + $node->start; +@@ -80,70 +80,112 @@ $node->stop; + $node->command_ok(['pg_checksums', '-a', '-D', $pgdata], + 'pg_checksums are again activated in offline cluster'); + +-#exit 0; + $node->start; + + $node->command_ok(['pg_checksums', '-c', '-D', $pgdata], + 'pg_checksums can be verified in online cluster'); + +-# create table to corrupt and get their relfilenode +-create_corruption($node, 'corrupt1', 'pg_default'); +- +-$node->command_checks_all([ 'pg_checksums', '-c', '-D', $pgdata], +- 1, +- [qr/Bad checksums: 1/s], +- [qr/checksum verification failed/s], +- 'pg_checksums reports checksum mismatch' +-); +- +-# drop corrupt table again and make sure there is no more corruption +-$node->safe_psql('postgres', 'DROP TABLE corrupt1;'); +-$node->command_ok(['pg_checksums', '-c', '-D', $pgdata], +- 'pg_checksums can be verified in online cluster: '.getcwd()); +- +- +-# create table to corrupt in a non-default tablespace and get their relfilenode +-my $tablespace_dir = getcwd()."/tmp_check/ts_corrupt_dir"; ++# Set page header and block size ++my $pageheader_size = 24; ++my $block_size = $node->safe_psql('postgres', 'SHOW block_size;'); ++ ++# Check corruption of table on default tablespace. ++check_relation_corruption($node, 'corrupt1', 'pg_default', $pageheader_size, "\0\0\0\0\0\0\0\0\0", "on tablespace pg_default"); ++ ++# Create tablespace to check corruptions in a non-default tablespace. ++my $basedir = $node->basedir; ++my $tablespace_dir = "$basedir/ts_corrupt_dir"; + mkdir ($tablespace_dir); +-$node->safe_psql('postgres', "CREATE TABLESPACE ts_corrupt LOCATION '".$tablespace_dir."';"); +-create_corruption($node, 'corrupt2', 'ts_corrupt'); +- +-$node->command_checks_all([ 'pg_checksums', '-c', '-D', $pgdata], +- 1, +- [qr/Bad checksums: 1/s], +- [qr/checksum verification failed/s], +- 'pg_checksums reports checksum mismatch on non-default tablespace' +-); +- +-# drop corrupt table again and make sure there is no more corruption +-$node->safe_psql('postgres', 'DROP TABLE corrupt2;'); +-$node->command_ok(['pg_checksums', '-c', '-D', $pgdata], +- 'pg_checksums can be verified in online cluster'); +- +-# Utility routine to create a table with corrupted checksums. +-# It stops the node (if running), and starts it again. +-sub create_corruption ++$tablespace_dir = TestLib::real_dir($tablespace_dir); ++$node->safe_psql('postgres', ++ "CREATE TABLESPACE ts_corrupt LOCATION '$tablespace_dir';"); ++check_relation_corruption($node, 'corrupt2', 'ts_corrupt', $pageheader_size, "\0\0\0\0\0\0\0\0\0", "on tablespace ts_corrupt"); ++ ++# Check corruption in the pageheader with random data in it ++my $random_data = join '', map { ("a".."z")[rand 26] } 1 .. $pageheader_size; ++check_relation_corruption($node, 'corrupt1', 'pg_default', 0, $random_data, "with random data in pageheader"); ++ ++# Check corruption when the pageheader has been zeroed-out completely ++my $zero_data = "\0"x$pageheader_size; ++check_relation_corruption($node, 'corrupt1', 'pg_default', 0, $zero_data, "with zeroed-out pageheader"); ++ ++# Utility routine to create and check a table with corrupted checksums ++# on a wanted tablespace. Note that this stops and starts the node ++# multiple times to perform the checks, leaving the node started ++# at the end. ++sub check_relation_corruption + { + my $node = shift; + my $table = shift; + my $tablespace = shift; +- +- my $query = "SELECT a INTO ".$table." FROM generate_series(1,10000) AS a; ALTER TABLE ".$table." SET (autovacuum_enabled=false), SET TABLESPACE ".$tablespace."; SELECT pg_relation_filepath('".$table."')"; +- my $file_name = $node->safe_psql('postgres', $query); +- +- # set page header and block sizes +- my $pageheader_size = 24; +- my $block_size = $node->safe_psql('postgres', 'SHOW block_size;'); ++ my $offset = shift; ++ my $corrupted_data = shift; ++ my $description = shift; ++ my $pgdata = $node->data_dir; ++ ++ $node->safe_psql('postgres', ++ "SELECT a INTO $table FROM generate_series(1,10000) AS a; ++ ALTER TABLE $table SET (autovacuum_enabled=false);"); ++ ++ $node->safe_psql('postgres', ++ "ALTER TABLE ".$table." SET TABLESPACE ".$tablespace.";"); ++ ++ my $file_corrupted = $node->safe_psql('postgres', ++ "SELECT pg_relation_filepath('$table');"); ++ my $relfilenode_corrupted = $node->safe_psql('postgres', ++ "SELECT relfilenode FROM pg_class WHERE relname = '$table';"); + + $node->stop; + +- open my $file, '+<', "$pgdata/$file_name"; +- seek($file, $pageheader_size, 0); +- syswrite($file, '\0\0\0\0\0\0\0\0\0'); ++ # Checksums are correct for single relfilenode as the table is not ++ # corrupted yet. ++ command_ok(['pg_checksums', '-c', '-D', $pgdata, '-r', ++ $relfilenode_corrupted], ++ "succeeds for single relfilenode $description with offline cluster"); ++ ++ # Time to create some corruption ++ open my $file, '+<', "$pgdata/$file_corrupted"; ++ seek($file, $offset, 0); ++ syswrite($file, $corrupted_data); + close $file; + ++ # Checksum checks on single relfilenode fail ++ $node->command_checks_all([ 'pg_checksums', '-c', '-D', $pgdata, ++ '-r', $relfilenode_corrupted], ++ 1, ++ [qr/Bad checksums:.*1/], ++ [qr/checksum verification failed/], ++ "fails with corrupted data $description"); ++ ++ # Global checksum checks fail as well ++ $node->command_checks_all([ 'pg_checksums', '-c', '-D', $pgdata], ++ 1, ++ [qr/Bad checksums:.*1/], ++ [qr/checksum verification failed/], ++ "fails with corrupted data for single relfilenode on tablespace $tablespace"); ++ ++ # Now check online as well + $node->start; + ++ # Checksum checks on single relfilenode fail ++ $node->command_checks_all([ 'pg_checksums', '-c', '-D', $pgdata, ++ '-r', $relfilenode_corrupted], ++ 1, ++ [qr/Bad checksums:.*1/], ++ [qr/checksum verification failed/], ++ "fails with corrupted data $description"); ++ ++ # Global checksum checks fail as well ++ $node->command_checks_all([ 'pg_checksums', '-c', '-D', $pgdata], ++ 1, ++ [qr/Bad checksums:.*1/], ++ [qr/checksum verification failed/], ++ "fails with corrupted data for single relfilenode on tablespace $tablespace"); ++ ++ # Drop corrupted table again and make sure there is no more corruption. ++ $node->safe_psql('postgres', "DROP TABLE $table;"); ++ $node->command_ok(['pg_checksums', '-c', '-D', $pgdata], ++ "succeeds again after table drop on tablespace $tablespace"); + return; + } + diff -Nru pg-checksums-0.8/debian/source/lintian-overrides pg-checksums-0.8/debian/source/lintian-overrides --- pg-checksums-0.8/debian/source/lintian-overrides 2018-10-12 14:04:31.000000000 +0200 +++ pg-checksums-0.8/debian/source/lintian-overrides 1970-01-01 01:00:00.000000000 +0100 @@ -1,3 +0,0 @@ -# don't bug people uploading from @work -source: changelog-should-mention-nmu -source: source-nmu-has-incorrect-version-number