From 6930c51691e9f612c8167d567543f735fc193eb7 Mon Sep 17 00:00:00 2001
From: Hubert Zhang <hzhang@pivotal.io>
Date: Mon, 17 Feb 2020 09:16:44 +0000
Subject: [PATCH] Print physical file path when verify checksum failed

Move the verify checksum logic into md.c to avoid every smgrread()
needs to separately verify checksums.
This also enables printing physical file path when verify checksum
failed, since file segmenting is also handled in md.c.
---
 src/backend/catalog/storage.c       | 14 +++++--------
 src/backend/storage/buffer/bufmgr.c | 25 ++++++----------------
 src/backend/storage/page/bufpage.c  | 41 +++++++++++++++++++++++++++++++++++--
 src/backend/storage/smgr/md.c       | 17 +++++++++++++++
 src/common/relpath.c                | 24 ++++++++++++++++++++++
 src/include/common/relpath.h        | 21 +++++++++++++++++++
 src/include/storage/bufpage.h       |  7 ++++---
 src/include/storage/sync.h          |  1 +
 8 files changed, 117 insertions(+), 33 deletions(-)

diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index fddfbf1d8c..f845e696ce 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -367,17 +367,13 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
 		/* If we got a cancel signal during the copy of the data, quit */
 		CHECK_FOR_INTERRUPTS();
 
+		/*
+		 * Disable zero damaged page in checksum veirfy
+		 */
+		SetZeroDamagedPageInChecksum(false);
+		
 		smgrread(src, forkNum, blkno, buf.data);
 
-		if (!PageIsVerified(page, blkno))
-			ereport(ERROR,
-					(errcode(ERRCODE_DATA_CORRUPTED),
-					 errmsg("invalid page in block %u of relation %s",
-							blkno,
-							relpathbackend(src->smgr_rnode.node,
-										   src->smgr_rnode.backend,
-										   forkNum))));
-
 		/*
 		 * WAL-log the copied page. Unfortunately we don't know what kind of a
 		 * page this is, so we have to log the full page including any unused
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 5880054245..6d84a9950a 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -895,6 +895,12 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			if (track_io_timing)
 				INSTR_TIME_SET_CURRENT(io_start);
 
+			/*
+			 * Set whether zero damaged page in checksum veirfy
+			 */
+			if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages)
+				SetZeroDamagedPageInChecksum(true);
+
 			smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
 
 			if (track_io_timing)
@@ -905,25 +911,6 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 				INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
 			}
 
-			/* check for garbage data */
-			if (!PageIsVerified((Page) bufBlock, blockNum))
-			{
-				if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages)
-				{
-					ereport(WARNING,
-							(errcode(ERRCODE_DATA_CORRUPTED),
-							 errmsg("invalid page in block %u of relation %s; zeroing out page",
-									blockNum,
-									relpath(smgr->smgr_rnode, forkNum))));
-					MemSet((char *) bufBlock, 0, BLCKSZ);
-				}
-				else
-					ereport(ERROR,
-							(errcode(ERRCODE_DATA_CORRUPTED),
-							 errmsg("invalid page in block %u of relation %s",
-									blockNum,
-									relpath(smgr->smgr_rnode, forkNum))));
-			}
 		}
 	}
 
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 4ea6ea7a3d..e1abfea82a 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -26,6 +26,8 @@
 /* GUC variable */
 bool		ignore_checksum_failure = false;
 
+/* whether to zero damaged page when checksum verify failed */
+static bool zero_damaged_page_in_checksum = false;
 
 /* ----------------------------------------------------------------
  *						Page support functions
@@ -61,13 +63,17 @@ PageInit(Page page, Size pageSize, Size specialSize)
 
 
 /*
- * PageIsVerified
+ * VerifyPage
  *		Check that the page header and checksum (if any) appear valid.
  *
  * This is called when a page has just been read in from disk.  The idea is
  * to cheaply detect trashed pages before we go nuts following bogus line
  * pointers, testing invalid transaction identifiers, etc.
  *
+ * FileTag contains the segment file information which the page belongs to.
+ * Note that for large heap table, it may across many physical files. Better
+ * to print the exact file path when checksum fails.
+ *
  * It turns out to be necessary to allow zeroed pages here too.  Even though
  * this routine is *not* called when deliberately adding a page to a relation,
  * there are scenarios in which a zeroed page might be found in a table.
@@ -79,7 +85,7 @@ PageInit(Page page, Size pageSize, Size specialSize)
  * will clean up such a page and make it usable.
  */
 bool
-PageIsVerified(Page page, BlockNumber blkno)
+VerifyPage(const FileTag *ftag, Page page, BlockNumber blkno)
 {
 	PageHeader	p = (PageHeader) page;
 	size_t	   *pagebytes;
@@ -151,6 +157,28 @@ PageIsVerified(Page page, BlockNumber blkno)
 			return true;
 	}
 
+	/*
+	 * Throw out ERROR or WARNING based on whether zero_damaged_pages_in_checksum is set
+	 *
+	 * Note that using GUC zero_damaged_pages is not suffient here, since we should also
+	 * zero damaged page when ReadBufferMode is RBM_ZERO_ON_ERROR.
+	 */
+	if (zero_damaged_page_in_checksum)
+	{
+		ereport(WARNING,
+				(errcode(ERRCODE_DATA_CORRUPTED),
+				 errmsg("invalid page in block %u of relation file %s; zeroing out page",
+						blkno,
+						relfilepathbackend(ftag->rnode, ftag->backend, ftag->forknum, ftag->segno))));
+		MemSet((char *) page, 0, BLCKSZ);
+	}
+	else
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_CORRUPTED),
+				 errmsg("invalid page in block %u of relation file %s",
+						blkno,
+						relfilepathbackend(ftag->rnode, ftag->backend, ftag->forknum, ftag->segno))));
+
 	return false;
 }
 
@@ -1196,3 +1224,12 @@ PageSetChecksumInplace(Page page, BlockNumber blkno)
 
 	((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
 }
+
+/*
+ * Zero damage page when vefirying checksum failed
+ */
+void
+SetZeroDamagedPageInChecksum(bool isZeroDamagedPage)
+{
+	zero_damaged_page_in_checksum = isZeroDamagedPage;
+}
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index c5b771c531..2a4b8e5b42 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -94,6 +94,17 @@ static MemoryContext MdCxt;		/* context for all MdfdVec objects */
 	memset(&(a), 0, sizeof(FileTag)), \
 	(a).handler = SYNC_HANDLER_MD, \
 	(a).rnode = (xx_rnode), \
+	(a).backend = InvalidBackendId, \
+	(a).forknum = (xx_forknum), \
+	(a).segno = (xx_segno) \
+)
+
+#define INIT_MD_FILETAG_WITH_BACKEND(a,xx_rnode,xx_backend,xx_forknum,xx_segno) \
+( \
+	memset(&(a), 0, sizeof(FileTag)), \
+	(a).handler = SYNC_HANDLER_MD, \
+	(a).rnode = (xx_rnode), \
+	(a).backend = (xx_backend), \
 	(a).forknum = (xx_forknum), \
 	(a).segno = (xx_segno) \
 )
@@ -604,6 +615,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	off_t		seekpos;
 	int			nbytes;
 	MdfdVec    *v;
+	FileTag		tag;
 
 	TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum,
 										reln->smgr_rnode.node.spcNode,
@@ -653,6 +665,11 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 							blocknum, FilePathName(v->mdfd_vfd),
 							nbytes, BLCKSZ)));
 	}
+
+	/* verify the read page content satisfy page checksum */
+	INIT_MD_FILETAG_WITH_BACKEND(tag, reln->smgr_rnode.node, reln->smgr_rnode.backend, forknum, v->mdfd_segno);
+
+	VerifyPage(&tag, buffer, blocknum);
 }
 
 /*
diff --git a/src/common/relpath.c b/src/common/relpath.c
index ad733d1363..eae1d25fe4 100644
--- a/src/common/relpath.c
+++ b/src/common/relpath.c
@@ -208,3 +208,27 @@ GetRelationPath(Oid dbNode, Oid spcNode, Oid relNode,
 	}
 	return path;
 }
+
+/*
+ * GetRelationFilePath - construct path to a relation's physical file
+ * given its block number.
+ */
+char *
+GetRelationFilePath(Oid dbNode, Oid spcNode, Oid relNode,
+					int backendId, ForkNumber forkNumber, uint32 segno)
+{
+	char       *path;
+	char       *fullpath;
+
+	path = GetRelationPath(dbNode, spcNode, relNode, backendId, forkNumber);
+
+	if (segno > 0)
+	{
+		fullpath = psprintf("%s.%u", path, segno);
+		pfree(path);
+	}
+	else
+		fullpath = path;
+
+	return fullpath;
+}
diff --git a/src/include/common/relpath.h b/src/include/common/relpath.h
index 869cabcc0d..5db63c5197 100644
--- a/src/include/common/relpath.h
+++ b/src/include/common/relpath.h
@@ -13,6 +13,8 @@
 #ifndef RELPATH_H
 #define RELPATH_H
 
+#include "storage/block.h"
+
 /*
  *	'pgrminclude ignore' needed here because CppAsString2() does not throw
  *	an error if the symbol is not defined.
@@ -68,6 +70,8 @@ extern char *GetDatabasePath(Oid dbNode, Oid spcNode);
 
 extern char *GetRelationPath(Oid dbNode, Oid spcNode, Oid relNode,
 							 int backendId, ForkNumber forkNumber);
+extern char *GetRelationFilePath(Oid dbNode, Oid spcNode, Oid relNode,
+								 int backendId, ForkNumber forkNumber, BlockNumber blkno);
 
 /*
  * Wrapper macros for GetRelationPath.  Beware of multiple
@@ -87,4 +91,21 @@ extern char *GetRelationPath(Oid dbNode, Oid spcNode, Oid relNode,
 #define relpath(rnode, forknum) \
 	relpathbackend((rnode).node, (rnode).backend, forknum)
 
+
+/*
+ * File path of a relation given the block number.
+ * Note that a file can contain at most RELSEG_SIZE number
+ * of blocks. We supply relfilepath macro to display the
+ * physical file name for a relation given the block number.
+ */
+
+/* First argument is a RelFileNode */
+#define relfilepathbackend(rnode, backend, forknum, segno) \
+	GetRelationFilePath((rnode).dbNode, (rnode).spcNode, (rnode).relNode, \
+						backend, forknum, segno)
+
+/* First argument is a RelFileNodeBackend */
+#define relfilepath(rnode, forknum, segno) \
+	relfilepathbackend((rnode).node, (rnode).backend, forknum, segno)
+
 #endif							/* RELPATH_H */
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 3f88683a05..b88e15b76e 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -18,6 +18,7 @@
 #include "storage/block.h"
 #include "storage/item.h"
 #include "storage/off.h"
+#include "storage/sync.h"		/* For FileTag */
 
 /*
  * A postgres disk page is an abstraction layered on top of a postgres
@@ -419,7 +420,7 @@ do { \
 						((is_heap) ? PAI_IS_HEAP : 0))
 
 /*
- * Check that BLCKSZ is a multiple of sizeof(size_t).  In PageIsVerified(),
+ * Check that BLCKSZ is a multiple of sizeof(size_t).  In VerifyPage(),
  * it is much faster to check if a page is full of zeroes using the native
  * word size.  Note that this assertion is kept within a header to make
  * sure that StaticAssertDecl() works across various combinations of
@@ -429,7 +430,7 @@ StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
 				 "BLCKSZ has to be a multiple of sizeof(size_t)");
 
 extern void PageInit(Page page, Size pageSize, Size specialSize);
-extern bool PageIsVerified(Page page, BlockNumber blkno);
+extern bool VerifyPage(const FileTag *ftag, Page page, BlockNumber blkno);
 extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
 										OffsetNumber offsetNumber, int flags);
 extern Page PageGetTempPage(Page page);
@@ -448,5 +449,5 @@ extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
 									Item newtup, Size newsize);
 extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
 extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
-
+extern void SetZeroDamagedPageInChecksum(bool isZeroDamagedPage);
 #endif							/* BUFPAGE_H */
diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h
index e16ab8e711..2c34579eec 100644
--- a/src/include/storage/sync.h
+++ b/src/include/storage/sync.h
@@ -47,6 +47,7 @@ typedef struct FileTag
 	int16		handler;		/* SyncRequestHandler value, saving space */
 	int16		forknum;		/* ForkNumber, saving space */
 	RelFileNode rnode;
+	BackendId	backend;
 	uint32		segno;
 } FileTag;
 
-- 
2.16.6

