On Fri, Dec 2, 2022 at 12:50 PM Michael Paquier <mich...@paquier.xyz> wrote:
>
> On Thu, Nov 17, 2022 at 11:53:23AM +0530, Bharath Rupireddy wrote:
> > Please do, if you feel so. Thanks for your review.
>
> I don't really mind the addition of the LSN when operating on a given
> record where we are reading a location, like in the five error paths
> for the header validation or the three ones in ReadRecord()

Thanks for reviewing.

> Now this one looks confusing:
> +       XLogSegNoOffsetToRecPtr(openLogSegNo, startoffset,
> +                               wal_segment_size, lsn);
>         ereport(PANIC,
>                 (errcode_for_file_access(),
>                  errmsg("could not write to log file %s "
> -                       "at offset %u, length %zu: %m",
> -                       xlogfname, startoffset, nleft)));
> +                       "at offset %u, LSN %X/%X, length %zu: %m",
> +                       xlogfname, startoffset,
> +                       LSN_FORMAT_ARGS(lsn), nleft)));
>
> This does not always refer to an exact LSN of a record as we may be in
> the middle of a write, so I would leave it as-is.
>
> Similarly the addition of wre_lsn would be confusing?  The offset
> looks kind of enough to me when referring to the middle of a page in
> WALReadRaiseError().

Yes, I removed those changes. Even if someone sees an offset of a
record within a WAL file elsewhere, they have the new utility function
(0002) pg_walfile_offset_lsn().

I'm attaching the v4 patch set for further review.

--
Bharath Rupireddy
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
From 452f6adc4617230f8843a9339331170cb6f85723 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Sat, 3 Dec 2022 03:13:57 +0000
Subject: [PATCH v4] Emit record LSN in WAL read and validate error messages

The error messages reported during any failures in WAL record read
or validation currently contains offset within the WAL file which
is a bit hard to decode it to WAL LSN while debugging. Reporting
the WAL record LSN at which the WAL read or validation failure
occurrs saves time and makes life easier here.

Author: Bharath Rupireddy
Reviewed-by: Maxim Orlov, Michael Paquier
Reviewed-by: Kyotaro Horiguchi
Discussion: https://www.postgresql.org/message-id/CALj2ACWV=FCddsxcGbVOA=cvPyMr75YCFbSQT6g4KDj=gcJK4g@mail.gmail.com
---
 src/backend/access/transam/xlogreader.c   | 15 ++++++++++-----
 src/backend/access/transam/xlogrecovery.c | 13 ++++++++-----
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 93f667b254..a38a80e049 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -1226,9 +1226,10 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 		XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
 
 		report_invalid_record(state,
-							  "invalid magic number %04X in WAL segment %s, offset %u",
+							  "invalid magic number %04X in WAL segment %s, LSN %X/%X, offset %u",
 							  hdr->xlp_magic,
 							  fname,
+							  LSN_FORMAT_ARGS(recptr),
 							  offset);
 		return false;
 	}
@@ -1240,9 +1241,10 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 		XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
 
 		report_invalid_record(state,
-							  "invalid info bits %04X in WAL segment %s, offset %u",
+							  "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u",
 							  hdr->xlp_info,
 							  fname,
+							  LSN_FORMAT_ARGS(recptr),
 							  offset);
 		return false;
 	}
@@ -1281,9 +1283,10 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 
 		/* hmm, first page of file doesn't have a long header? */
 		report_invalid_record(state,
-							  "invalid info bits %04X in WAL segment %s, offset %u",
+							  "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u",
 							  hdr->xlp_info,
 							  fname,
+							  LSN_FORMAT_ARGS(recptr),
 							  offset);
 		return false;
 	}
@@ -1300,9 +1303,10 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 		XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
 
 		report_invalid_record(state,
-							  "unexpected pageaddr %X/%X in WAL segment %s, offset %u",
+							  "unexpected pageaddr %X/%X in WAL segment %s, LSN %X/%X, offset %u",
 							  LSN_FORMAT_ARGS(hdr->xlp_pageaddr),
 							  fname,
+							  LSN_FORMAT_ARGS(recptr),
 							  offset);
 		return false;
 	}
@@ -1325,10 +1329,11 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 			XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
 
 			report_invalid_record(state,
-								  "out-of-sequence timeline ID %u (after %u) in WAL segment %s, offset %u",
+								  "out-of-sequence timeline ID %u (after %u) in WAL segment %s, LSN %X/%X, offset %u",
 								  hdr->xlp_tli,
 								  state->latestPageTLI,
 								  fname,
+								  LSN_FORMAT_ARGS(recptr),
 								  offset);
 			return false;
 		}
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 41ffc57da9..97b882564f 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -3075,9 +3075,10 @@ ReadRecord(XLogPrefetcher *xlogprefetcher, int emode,
 			XLogFileName(fname, xlogreader->seg.ws_tli, segno,
 						 wal_segment_size);
 			ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
-					(errmsg("unexpected timeline ID %u in WAL segment %s, offset %u",
+					(errmsg("unexpected timeline ID %u in WAL segment %s, LSN %X/%X, offset %u",
 							xlogreader->latestPageTLI,
 							fname,
+							LSN_FORMAT_ARGS(xlogreader->latestPagePtr),
 							offset)));
 			record = NULL;
 		}
@@ -3280,14 +3281,16 @@ retry:
 			errno = save_errno;
 			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
 					(errcode_for_file_access(),
-					 errmsg("could not read from WAL segment %s, offset %u: %m",
-							fname, readOff)));
+					 errmsg("could not read from WAL segment %s, LSN %X/%X, offset %u: %m",
+							fname, LSN_FORMAT_ARGS(targetPagePtr),
+							readOff)));
 		}
 		else
 			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
 					(errcode(ERRCODE_DATA_CORRUPTED),
-					 errmsg("could not read from WAL segment %s, offset %u: read %d of %zu",
-							fname, readOff, r, (Size) XLOG_BLCKSZ)));
+					 errmsg("could not read from WAL segment %s, LSN %X/%X, offset %u: read %d of %zu",
+							fname, LSN_FORMAT_ARGS(targetPagePtr),
+							readOff, r, (Size) XLOG_BLCKSZ)));
 		goto next_record_is_invalid;
 	}
 	pgstat_report_wait_end();
-- 
2.34.1

From a76f7187cc54d099abc5de86f2aadb77c72c2f21 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Sat, 3 Dec 2022 03:17:34 +0000
Subject: [PATCH v4] Add new a function to convert offset to LSN

Introduce a new utility function pg_walfile_offset_lsn() that
converts a given offset within the WAL file to a valid WAL LSN.
This aids developers in debugging.

Authors: Nathan Bossart, Bharath Rupireddy
Reviewed-by: Maxim Orlov, Michael Paquier
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/CALj2ACWV=FCddsxcGbVOA=cvPyMr75YCFbSQT6g4KDj=gcJK4g@mail.gmail.com
---
 doc/src/sgml/func.sgml                       | 16 +++++
 src/backend/access/transam/xlogfuncs.c       | 62 ++++++++++++++++++++
 src/include/access/xlog_internal.h           |  8 +++
 src/include/catalog/pg_proc.dat              |  6 ++
 src/test/regress/expected/misc_functions.out | 26 ++++++++
 src/test/regress/sql/misc_functions.sql      | 12 ++++
 6 files changed, 130 insertions(+)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 2052d3c844..cb99fc64d7 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -25894,6 +25894,22 @@ LOG:  Grand total: 1651920 bytes in 201 blocks; 622360 free (88 chunks); 1029560
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_walfile_offset_lsn</primary>
+        </indexterm>
+        <function>pg_walfile_offset_lsn</function> ( <parameter>file_name</parameter> <type>text</type>, <parameter>file_offset</parameter> <type>integer</type> )
+        <returnvalue>record</returnvalue>
+        ( <parameter>lsn</parameter> <type>pg_lsn</type>,
+        <parameter>timeline_id</parameter> <type>integer</type> )
+       </para>
+       <para>
+        Computes write-ahead log location and timline ID from a given WAL file
+        name and byte offset within that file.
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 487d5d9cac..ed2690b49a 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -339,6 +339,68 @@ pg_last_wal_replay_lsn(PG_FUNCTION_ARGS)
 	PG_RETURN_LSN(recptr);
 }
 
+/*
+ * Compute an LSN and timline ID given a WAL file name and decimal byte offset.
+ */
+Datum
+pg_walfile_offset_lsn(PG_FUNCTION_ARGS)
+{
+	char	   *fname = text_to_cstring(PG_GETARG_TEXT_PP(0));
+	int			offset = PG_GETARG_INT32(1);
+	TimeLineID	tli;
+	XLogSegNo	segno;
+	XLogRecPtr	lsn;
+	uint32		log;
+	uint32		seg;
+	Datum		values[2] = {0};
+	bool		isnull[2] = {0};
+	TupleDesc	resultTupleDesc;
+	HeapTuple	resultHeapTuple;
+	Datum		result;
+
+	if (!IsXLogFileName(fname))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid WAL file name \"%s\"", fname)));
+
+	XLogIdFromFileName(fname, &tli, &segno, &log, &seg, wal_segment_size);
+
+	if (seg >= XLogSegmentsPerXLogId(wal_segment_size) ||
+		(log == 0 && seg == 0) ||
+		segno == 0 ||
+		tli == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid WAL file name \"%s\"", fname)));
+
+	if (offset < 0 || offset >= wal_segment_size)
+		ereport(ERROR,
+				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				 errmsg("offset must not be negative or greater than or equal to WAL segment size")));
+
+	/*
+	 * Construct a tuple descriptor for the result row.  This must match this
+	 * function's pg_proc entry!
+	 */
+	resultTupleDesc = CreateTemplateTupleDesc(2);
+	TupleDescInitEntry(resultTupleDesc, (AttrNumber) 1, "lsn",
+					   PG_LSNOID, -1, 0);
+	TupleDescInitEntry(resultTupleDesc, (AttrNumber) 2, "timeline_id",
+					   INT4OID, -1, 0);
+
+	resultTupleDesc = BlessTupleDesc(resultTupleDesc);
+
+	XLogSegNoOffsetToRecPtr(segno, offset, wal_segment_size, lsn);
+
+	values[0] = LSNGetDatum(lsn);
+	values[1] = UInt32GetDatum(tli);
+
+	resultHeapTuple = heap_form_tuple(resultTupleDesc, values, isnull);
+	result = HeapTupleGetDatum(resultHeapTuple);
+
+	PG_RETURN_DATUM(result);
+}
+
 /*
  * Compute an xlog file name and decimal byte offset given a WAL location,
  * such as is returned by pg_backup_stop() or pg_switch_wal().
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index e5fc66966b..4405036c3e 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -206,6 +206,14 @@ XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wa
 	*logSegNo = (uint64) log * XLogSegmentsPerXLogId(wal_segsz_bytes) + seg;
 }
 
+static inline void
+XLogIdFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo,
+				   uint32 *log, uint32 *seg, int wal_segsz_bytes)
+{
+	sscanf(fname, "%08X%08X%08X", tli, log, seg);
+	*logSegNo = (uint64) (*log) * XLogSegmentsPerXLogId(wal_segsz_bytes) + *seg;
+}
+
 static inline void
 XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
 {
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index f9301b2627..f32b77d174 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -6362,6 +6362,12 @@
   proargtypes => 'pg_lsn', proallargtypes => '{pg_lsn,text,int4}',
   proargmodes => '{i,o,o}', proargnames => '{lsn,file_name,file_offset}',
   prosrc => 'pg_walfile_name_offset' },
+{ oid => '8205',
+  descr => 'wal location and timeline ID given a wal filename and byte offset',
+  proname => 'pg_walfile_offset_lsn', prorettype => 'record',
+  proargtypes => 'text int4', proallargtypes => '{text,int4,pg_lsn,int4}',
+  proargmodes => '{i,i,o,o}', proargnames => '{file_name,file_offset,lsn,timeline_id}',
+  prosrc => 'pg_walfile_offset_lsn' },
 { oid => '2851', descr => 'wal filename, given a wal location',
   proname => 'pg_walfile_name', prorettype => 'text', proargtypes => 'pg_lsn',
   prosrc => 'pg_walfile_name' },
diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out
index 88bb696ded..1b2ab4f25b 100644
--- a/src/test/regress/expected/misc_functions.out
+++ b/src/test/regress/expected/misc_functions.out
@@ -619,3 +619,29 @@ SELECT count(*) > 0 AS ok FROM pg_control_system();
  t
 (1 row)
 
+-- pg_walfile_name_offset
+SELECT * FROM pg_walfile_name_offset('1/F');
+        file_name         | file_offset 
+--------------------------+-------------
+ 000000010000000100000000 |          15
+(1 row)
+
+-- pg_walfile_offset_lsn
+SELECT * FROM pg_walfile_offset_lsn('invalid', 15);
+ERROR:  invalid WAL file name "invalid"
+SELECT * FROM pg_walfile_offset_lsn('0000000100000000FFFFFFFF', 15);
+ERROR:  invalid WAL file name "0000000100000000FFFFFFFF"
+SELECT * FROM pg_walfile_offset_lsn('000000010000000000000000', 15);
+ERROR:  invalid WAL file name "000000010000000000000000"
+SELECT * FROM pg_walfile_offset_lsn('000000000000000100000000', 15);
+ERROR:  invalid WAL file name "000000000000000100000000"
+SELECT * FROM pg_walfile_offset_lsn('000000010000000100000000', -1);
+ERROR:  offset must not be negative or greater than or equal to WAL segment size
+SELECT * FROM pg_walfile_offset_lsn('000000010000000100000000', 2000000000);
+ERROR:  offset must not be negative or greater than or equal to WAL segment size
+SELECT * FROM pg_walfile_offset_lsn('000000010000000100000000', 15);
+ lsn | timeline_id 
+-----+-------------
+ 1/F |           1
+(1 row)
+
diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql
index b07e9e8dbb..9cb59cecf7 100644
--- a/src/test/regress/sql/misc_functions.sql
+++ b/src/test/regress/sql/misc_functions.sql
@@ -229,3 +229,15 @@ SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
 SELECT count(*) > 0 AS ok FROM pg_control_init();
 SELECT count(*) > 0 AS ok FROM pg_control_recovery();
 SELECT count(*) > 0 AS ok FROM pg_control_system();
+
+-- pg_walfile_name_offset
+SELECT * FROM pg_walfile_name_offset('1/F');
+
+-- pg_walfile_offset_lsn
+SELECT * FROM pg_walfile_offset_lsn('invalid', 15);
+SELECT * FROM pg_walfile_offset_lsn('0000000100000000FFFFFFFF', 15);
+SELECT * FROM pg_walfile_offset_lsn('000000010000000000000000', 15);
+SELECT * FROM pg_walfile_offset_lsn('000000000000000100000000', 15);
+SELECT * FROM pg_walfile_offset_lsn('000000010000000100000000', -1);
+SELECT * FROM pg_walfile_offset_lsn('000000010000000100000000', 2000000000);
+SELECT * FROM pg_walfile_offset_lsn('000000010000000100000000', 15);
-- 
2.34.1

Reply via email to