From 5e7f7dec0b9c3ecf86f2c7fb490e21165fe5a01f Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Tue, 18 Apr 2023 10:53:02 -0400
Subject: [PATCH v2] Add and use symbolic constants for tar header offsets and
 file types.

Because symbolic constants in a header file are better than magic
constants embedded in the code.

Patch by me, reviewed by Tom Lane.
---
 src/bin/pg_basebackup/bbstreamer_tar.c | 22 +++++++--------
 src/bin/pg_basebackup/walmethods.c     |  7 +++--
 src/bin/pg_dump/pg_backup_tar.c        | 16 +++++------
 src/include/pgtar.h                    | 39 ++++++++++++++++++++++++++
 src/port/tar.c                         | 38 ++++++++++++-------------
 5 files changed, 80 insertions(+), 42 deletions(-)

diff --git a/src/bin/pg_basebackup/bbstreamer_tar.c b/src/bin/pg_basebackup/bbstreamer_tar.c
index 03d7fd3375..d7b438d0f5 100644
--- a/src/bin/pg_basebackup/bbstreamer_tar.c
+++ b/src/bin/pg_basebackup/bbstreamer_tar.c
@@ -286,22 +286,20 @@ bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer)
 
 	/*
 	 * Parse key fields out of the header.
-	 *
-	 * FIXME: It's terrible that we use hard-coded values here instead of some
-	 * more principled approach. It's been like this for a long time, but we
-	 * ought to do better.
 	 */
-	strlcpy(member->pathname, &buffer[0], MAXPGPATH);
+	strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
 	if (member->pathname[0] == '\0')
 		pg_fatal("tar member has empty name");
-	member->size = read_tar_number(&buffer[124], 12);
-	member->mode = read_tar_number(&buffer[100], 8);
-	member->uid = read_tar_number(&buffer[108], 8);
-	member->gid = read_tar_number(&buffer[116], 8);
-	member->is_directory = (buffer[156] == '5');
-	member->is_link = (buffer[156] == '2');
+	member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
+	member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
+	member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
+	member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
+	member->is_directory =
+		(buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_DIRECTORY);
+	member->is_link =
+		(buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_SYMLINK);
 	if (member->is_link)
-		strlcpy(member->linktarget, &buffer[157], 100);
+		strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
 
 	/* Compute number of padding bytes. */
 	mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c
index 6d14b988cb..a974ffced4 100644
--- a/src/bin/pg_basebackup/walmethods.c
+++ b/src/bin/pg_basebackup/walmethods.c
@@ -1131,7 +1131,7 @@ tar_close(Walfile *f, WalCloseMethod method)
 	 * possibly also renaming the file. We overwrite the entire current header
 	 * when done, including the checksum.
 	 */
-	print_tar_number(&(tf->header[124]), 12, filesize);
+	print_tar_number(&(tf->header[TAR_OFFSET_SIZE]), 12, filesize);
 
 	if (method == CLOSE_NORMAL)
 
@@ -1139,9 +1139,10 @@ tar_close(Walfile *f, WalCloseMethod method)
 		 * We overwrite it with what it was before if we have no tempname,
 		 * since we're going to write the buffer anyway.
 		 */
-		strlcpy(&(tf->header[0]), tf->base.pathname, 100);
+		strlcpy(&(tf->header[TAR_OFFSET_NAME]), tf->base.pathname, 100);
 
-	print_tar_number(&(tf->header[148]), 8, tarChecksum(((TarMethodFile *) f)->header));
+	print_tar_number(&(tf->header[TAR_OFFSET_CHECKSUM]), 8,
+					 tarChecksum(((TarMethodFile *) f)->header));
 	if (lseek(tar_data->fd, tf->ofs_start, SEEK_SET) != ((TarMethodFile *) f)->ofs_start)
 	{
 		f->wwmethod->lasterrno = errno;
diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c
index babd23b4eb..59870acd17 100644
--- a/src/bin/pg_dump/pg_backup_tar.c
+++ b/src/bin/pg_dump/pg_backup_tar.c
@@ -975,20 +975,20 @@ isValidTarHeader(char *header)
 	int			sum;
 	int			chk = tarChecksum(header);
 
-	sum = read_tar_number(&header[148], 8);
+	sum = read_tar_number(&header[TAR_OFFSET_CHECKSUM], 8);
 
 	if (sum != chk)
 		return false;
 
 	/* POSIX tar format */
-	if (memcmp(&header[257], "ustar\0", 6) == 0 &&
-		memcmp(&header[263], "00", 2) == 0)
+	if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar\0", 6) == 0 &&
+		memcmp(&header[TAR_OFFSET_VERSION], "00", 2) == 0)
 		return true;
 	/* GNU tar format */
-	if (memcmp(&header[257], "ustar  \0", 8) == 0)
+	if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar  \0", 8) == 0)
 		return true;
 	/* not-quite-POSIX format written by pre-9.3 pg_dump */
-	if (memcmp(&header[257], "ustar00\0", 8) == 0)
+	if (memcmp(&header[TAR_OFFSET_MAGIC], "ustar00\0", 8) == 0)
 		return true;
 
 	return false;
@@ -1151,7 +1151,7 @@ _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th)
 
 		/* Calc checksum */
 		chk = tarChecksum(h);
-		sum = read_tar_number(&h[148], 8);
+		sum = read_tar_number(&h[TAR_OFFSET_CHECKSUM], 8);
 
 		/*
 		 * If the checksum failed, see if it is a null block. If so, silently
@@ -1175,9 +1175,9 @@ _tarGetHeader(ArchiveHandle *AH, TAR_MEMBER *th)
 	}
 
 	/* Name field is 100 bytes, might not be null-terminated */
-	strlcpy(tag, &h[0], 100 + 1);
+	strlcpy(tag, &h[TAR_OFFSET_NAME], 100 + 1);
 
-	len = read_tar_number(&h[124], 12);
+	len = read_tar_number(&h[TAR_OFFSET_SIZE], 12);
 
 	pg_log_debug("TOC Entry %s at %llu (length %llu, checksum %d)",
 				 tag, (unsigned long long) hPos, (unsigned long long) len, sum);
diff --git a/src/include/pgtar.h b/src/include/pgtar.h
index 661f9d7c59..8abfb9c19c 100644
--- a/src/include/pgtar.h
+++ b/src/include/pgtar.h
@@ -23,6 +23,45 @@ enum tarError
 	TAR_SYMLINK_TOO_LONG
 };
 
+/*
+ * Offsets of fields within a 512-byte tar header.
+ *
+ * "tar number" values should be generated using print_tar_number() and can be
+ * read using read_tar_number(). Fields that contain strings are generally
+ * both filled and read using strlcpy().
+ *
+ * The value for the checksum field can be computed using tarChecksum().
+ *
+ * Some fields are not used by PostgreSQL; see tarCreateHeader().
+ */
+enum tarHeaderOffset
+{
+	TAR_OFFSET_NAME = 0,		/* 100 byte string */
+	TAR_OFFSET_MODE = 100,		/* 8 byte tar number, excludes S_IFMT */
+	TAR_OFFSET_UID = 108,		/* 8 byte tar number */
+	TAR_OFFSET_GID = 116,		/* 8 byte tar number */
+	TAR_OFFSET_SIZE = 124,		/* 8 byte tar number */
+	TAR_OFFSET_MTIME = 136,		/* 12 byte tar number */
+	TAR_OFFSET_CHECKSUM = 148,	/* 8 byte tar number */
+	TAR_OFFSET_TYPEFLAG = 156,	/* 1 byte file type, see TAR_FILETYPE_* */
+	TAR_OFFSET_LINKNAME = 157,	/* 100 byte string */
+	TAR_OFFSET_MAGIC = 257,		/* "ustar" with terminating zero byte */
+	TAR_OFFSET_VERSION = 263,	/* "00" */
+	TAR_OFFSET_UNAME = 265,		/* 32 byte string */
+	TAR_OFFSET_GNAME = 297,		/* 32 byte string */
+	TAR_OFFSET_DEVMAJOR = 329,	/* 8 byte tar number */
+	TAR_OFFSET_DEVMINOR = 337,	/* 8 byte tar number */
+	TAR_OFFSET_PREFIX = 345		/* 155 byte string */
+	/* last 12 bytes of the 512-byte block are unassigned */
+};
+
+enum tarFileType
+{
+	TAR_FILETYPE_PLAIN = '0',
+	TAR_FILETYPE_SYMLINK = '2',
+	TAR_FILETYPE_DIRECTORY = '5'
+};
+
 extern enum tarError tarCreateHeader(char *h, const char *filename,
 									 const char *linktarget, pgoff_t size,
 									 mode_t mode, uid_t uid, gid_t gid,
diff --git a/src/port/tar.c b/src/port/tar.c
index 4afe9f2533..592b4fb7b0 100644
--- a/src/port/tar.c
+++ b/src/port/tar.c
@@ -120,10 +120,10 @@ tarCreateHeader(char *h, const char *filename, const char *linktarget,
 	if (linktarget && strlen(linktarget) > 99)
 		return TAR_SYMLINK_TOO_LONG;
 
-	memset(h, 0, 512);			/* assume tar header size */
+	memset(h, 0, TAR_BLOCK_SIZE);
 
 	/* Name 100 */
-	strlcpy(&h[0], filename, 100);
+	strlcpy(&h[TAR_OFFSET_NAME], filename, 100);
 	if (linktarget != NULL || S_ISDIR(mode))
 	{
 		/*
@@ -139,68 +139,68 @@ tarCreateHeader(char *h, const char *filename, const char *linktarget,
 	}
 
 	/* Mode 8 - this doesn't include the file type bits (S_IFMT)  */
-	print_tar_number(&h[100], 8, (mode & 07777));
+	print_tar_number(&h[TAR_OFFSET_MODE], 8, (mode & 07777));
 
 	/* User ID 8 */
-	print_tar_number(&h[108], 8, uid);
+	print_tar_number(&h[TAR_OFFSET_UID], 8, uid);
 
 	/* Group 8 */
-	print_tar_number(&h[116], 8, gid);
+	print_tar_number(&h[TAR_OFFSET_GID], 8, gid);
 
 	/* File size 12 */
 	if (linktarget != NULL || S_ISDIR(mode))
 		/* Symbolic link or directory has size zero */
-		print_tar_number(&h[124], 12, 0);
+		print_tar_number(&h[TAR_OFFSET_SIZE], 12, 0);
 	else
-		print_tar_number(&h[124], 12, size);
+		print_tar_number(&h[TAR_OFFSET_SIZE], 12, size);
 
 	/* Mod Time 12 */
-	print_tar_number(&h[136], 12, mtime);
+	print_tar_number(&h[TAR_OFFSET_MTIME], 12, mtime);
 
 	/* Checksum 8 cannot be calculated until we've filled all other fields */
 
 	if (linktarget != NULL)
 	{
 		/* Type - Symbolic link */
-		h[156] = '2';
+		h[TAR_OFFSET_TYPEFLAG] = TAR_FILETYPE_SYMLINK;
 		/* Link Name 100 */
-		strlcpy(&h[157], linktarget, 100);
+		strlcpy(&h[TAR_OFFSET_LINKNAME], linktarget, 100);
 	}
 	else if (S_ISDIR(mode))
 	{
 		/* Type - directory */
-		h[156] = '5';
+		h[TAR_OFFSET_TYPEFLAG] = TAR_FILETYPE_DIRECTORY;
 	}
 	else
 	{
 		/* Type - regular file */
-		h[156] = '0';
+		h[TAR_OFFSET_TYPEFLAG] = TAR_FILETYPE_PLAIN;
 	}
 
 	/* Magic 6 */
-	strcpy(&h[257], "ustar");
+	strcpy(&h[TAR_OFFSET_MAGIC], "ustar");
 
 	/* Version 2 */
-	memcpy(&h[263], "00", 2);
+	memcpy(&h[TAR_OFFSET_VERSION], "00", 2);
 
 	/* User 32 */
 	/* XXX: Do we need to care about setting correct username? */
-	strlcpy(&h[265], "postgres", 32);
+	strlcpy(&h[TAR_OFFSET_UNAME], "postgres", 32);
 
 	/* Group 32 */
 	/* XXX: Do we need to care about setting correct group name? */
-	strlcpy(&h[297], "postgres", 32);
+	strlcpy(&h[TAR_OFFSET_GNAME], "postgres", 32);
 
 	/* Major Dev 8 */
-	print_tar_number(&h[329], 8, 0);
+	print_tar_number(&h[TAR_OFFSET_DEVMAJOR], 8, 0);
 
 	/* Minor Dev 8 */
-	print_tar_number(&h[337], 8, 0);
+	print_tar_number(&h[TAR_OFFSET_DEVMINOR], 8, 0);
 
 	/* Prefix 155 - not used, leave as nulls */
 
 	/* Finally, compute and insert the checksum */
-	print_tar_number(&h[148], 8, tarChecksum(h));
+	print_tar_number(&h[TAR_OFFSET_CHECKSUM], 8, tarChecksum(h));
 
 	return TAR_OK;
 }
-- 
2.37.1 (Apple Git-137.1)

