On Tue, Mar 8, 2016 at 1:20 PM, Kyotaro HORIGUCHI
<horiguchi.kyot...@lab.ntt.co.jp> wrote:
> Hello, thank you for updating this tool.
>
> At Mon, 7 Mar 2016 14:03:08 -0500, Robert Haas <robertmh...@gmail.com> wrote 
> in <CA+Tgmob+NjfYE3b3BHBmAC=3tvtbqszgz1roj63yramrgrq...@mail.gmail.com>
>> On Mon, Mar 7, 2016 at 12:41 PM, Masahiko Sawada <sawada.m...@gmail.com> 
>> wrote:
>> > Attached latest version optimisation patch.
>> > I'm still consider regarding pg_upgrade regression test code, so I
>> > will submit that patch later.
>>
> I was thinking more about this today and I think that we don't
> actually need the PD_ALL_FROZEN page-level bit for anything.  It's
> enough that the bit is present in the visibility map.  The only point
> of PD_ALL_VISIBLE is that it tells us that we need to clear the
> visibility map bit, but that bit is enough to tell us to clear both
> visibility map bits.  So I propose the attached cleanup patch.

Thank you for updating tool and proposing it.
I agree with you, and the patch you attached looks good to me except
for Horiguchi-san's comment.

Regarding pg_visibility module, I'd like to share some bugs and
propose to add a relation type condition to each functions.
Including it, I've attached remaining  2 patches; one is removing page
conversion code from pg_upgarde, and another is supporting pg_upgrade
for frozen bit.

Please have a look at them.

Regards,

--
Masahiko Sawada
diff --git a/contrib/pg_visibility/pg_visibility--1.0.sql b/contrib/pg_visibility/pg_visibility--1.0.sql
index 9616e1f..da511e5 100644
--- a/contrib/pg_visibility/pg_visibility--1.0.sql
+++ b/contrib/pg_visibility/pg_visibility--1.0.sql
@@ -12,7 +12,7 @@ AS 'MODULE_PATHNAME', 'pg_visibility_map'
 LANGUAGE C STRICT;
 
 -- Show visibility map and page-level visibility information.
-CREATE FUNCTION pg_visibility(regclass, blkno, bigint,
+CREATE FUNCTION pg_visibility(regclass, blkno bigint,
 							  all_visible OUT boolean,
 							  all_frozen OUT boolean,
 							  pd_all_visible OUT boolean)
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index d4336ce..2993bcb 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -14,6 +14,7 @@
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
+#include "utils/rel.h"
 
 PG_MODULE_MAGIC;
 
@@ -55,6 +56,14 @@ pg_visibility_map(PG_FUNCTION_ARGS)
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("invalid block number")));
 
+	/* Check for relation type */
+	if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
+		  rel->rd_rel->relkind == RELKIND_MATVIEW))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table or materialized view",
+						RelationGetRelationName(rel))));
+
 	tupdesc = pg_visibility_tupdesc(false, false);
 	MemSet(nulls, 0, sizeof(nulls));
 
@@ -94,6 +103,14 @@ pg_visibility(PG_FUNCTION_ARGS)
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("invalid block number")));
 
+	/* Check for relation type */
+	if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
+		  rel->rd_rel->relkind == RELKIND_MATVIEW))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table or materialized view",
+						RelationGetRelationName(rel))));
+
 	tupdesc = pg_visibility_tupdesc(false, true);
 	MemSet(nulls, 0, sizeof(nulls));
 
@@ -147,9 +164,10 @@ pg_visibility_map_rel(PG_FUNCTION_ARGS)
 		HeapTuple	tuple;
 
 		MemSet(nulls, 0, sizeof(nulls));
-		values[0] = Int64GetDatum(info->next++);
+		values[0] = Int64GetDatum(info->next);
 		values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
 		values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
+		info->next++;
 
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
@@ -190,10 +208,11 @@ pg_visibility_rel(PG_FUNCTION_ARGS)
 		HeapTuple	tuple;
 
 		MemSet(nulls, 0, sizeof(nulls));
-		values[0] = Int64GetDatum(info->next++);
+		values[0] = Int64GetDatum(info->next);
 		values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
 		values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
 		values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0);
+		info->next++;
 
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
@@ -223,6 +242,14 @@ pg_visibility_map_summary(PG_FUNCTION_ARGS)
 	rel = relation_open(relid, AccessShareLock);
 	nblocks = RelationGetNumberOfBlocks(rel);
 
+	/* Check for relation type */
+	if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
+		  rel->rd_rel->relkind == RELKIND_MATVIEW))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table or materialized view",
+						RelationGetRelationName(rel))));
+
 	for (blkno = 0; blkno < nblocks; ++blkno)
 	{
 		int32		mapbits;
@@ -296,6 +323,15 @@ collect_visibility_data(Oid relid, bool include_pd)
 	BufferAccessStrategy	bstrategy = GetAccessStrategy(BAS_BULKREAD);
 
 	rel = relation_open(relid, AccessShareLock);
+
+	/* Check for relation type */
+	if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
+		  rel->rd_rel->relkind == RELKIND_MATVIEW))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not a table or materialized view",
+						RelationGetRelationName(rel))));
+
 	nblocks = RelationGetNumberOfBlocks(rel);
 	info = palloc0(offsetof(vbits, bits) + nblocks);
 	info->next = 0;
commit 8ab96722459fc929d5b2d447ffda18fe1107abc0
Author: Masahiko Sawada <sawada.mshk@gmail.com>
Date:   Wed Mar 2 11:09:41 2016 -0400

    Initial.

diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile
index d9c8145..0c882d9 100644
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -8,7 +8,7 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = check.o controldata.o dump.o exec.o file.o function.o info.o \
-       option.o page.o parallel.o pg_upgrade.o relfilenode.o server.o \
+       option.o parallel.o pg_upgrade.o relfilenode.o server.o \
        tablespace.o util.o version.o $(WIN32RES)
 
 override CPPFLAGS := -DDLSUFFIX=\"$(DLSUFFIX)\" -I$(srcdir) -I$(libpq_srcdir) $(CPPFLAGS)
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index e0cb675..f932094 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -80,8 +80,6 @@ check_and_dump_old_cluster(bool live_check)
 	if (!live_check)
 		start_postmaster(&old_cluster, true);
 
-	get_pg_database_relfilenode(&old_cluster);
-
 	/* Extract a list of databases and tables from the old cluster */
 	get_db_and_rel_infos(&old_cluster);
 
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 9357ad8..115d506 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -25,15 +25,11 @@ static int	win32_pghardlink(const char *src, const char *dst);
 /*
  * copyAndUpdateFile()
  *
- *	Copies a relation file from src to dst.  If pageConverter is non-NULL, this function
- *	uses that pageConverter to do a page-by-page conversion.
+ *	Copies a relation file from src to dst.
  */
 const char *
-copyAndUpdateFile(pageCnvCtx *pageConverter,
-				  const char *src, const char *dst, bool force)
+copyAndUpdateFile(const char *src, const char *dst, bool force)
 {
-	if (pageConverter == NULL)
-	{
 #ifndef WIN32
 		if (copy_file(src, dst, force) == -1)
 #else
@@ -42,65 +38,6 @@ copyAndUpdateFile(pageCnvCtx *pageConverter,
 			return getErrorText();
 		else
 			return NULL;
-	}
-	else
-	{
-		/*
-		 * We have a pageConverter object - that implies that the
-		 * PageLayoutVersion differs between the two clusters so we have to
-		 * perform a page-by-page conversion.
-		 *
-		 * If the pageConverter can convert the entire file at once, invoke
-		 * that plugin function, otherwise, read each page in the relation
-		 * file and call the convertPage plugin function.
-		 */
-
-#ifdef PAGE_CONVERSION
-		if (pageConverter->convertFile)
-			return pageConverter->convertFile(pageConverter->pluginData,
-											  dst, src);
-		else
-#endif
-		{
-			int			src_fd;
-			int			dstfd;
-			char		buf[BLCKSZ];
-			ssize_t		bytesRead;
-			const char *msg = NULL;
-
-			if ((src_fd = open(src, O_RDONLY, 0)) < 0)
-				return "could not open source file";
-
-			if ((dstfd = open(dst, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0)
-			{
-				close(src_fd);
-				return "could not create destination file";
-			}
-
-			while ((bytesRead = read(src_fd, buf, BLCKSZ)) == BLCKSZ)
-			{
-#ifdef PAGE_CONVERSION
-				if ((msg = pageConverter->convertPage(pageConverter->pluginData, buf, buf)) != NULL)
-					break;
-#endif
-				if (write(dstfd, buf, BLCKSZ) != BLCKSZ)
-				{
-					msg = "could not write new page to destination";
-					break;
-				}
-			}
-
-			close(src_fd);
-			close(dstfd);
-
-			if (msg)
-				return msg;
-			else if (bytesRead != 0)
-				return "found partial page in source file";
-			else
-				return NULL;
-		}
-	}
 }
 
 
@@ -114,12 +51,8 @@ copyAndUpdateFile(pageCnvCtx *pageConverter,
  * instead of copying the data from the old cluster to the new cluster.
  */
 const char *
-linkAndUpdateFile(pageCnvCtx *pageConverter,
-				  const char *src, const char *dst)
+linkAndUpdateFile(const char *src, const char *dst)
 {
-	if (pageConverter != NULL)
-		return "Cannot in-place update this cluster, page-by-page conversion is required";
-
 	if (pg_link_file(src, dst) == -1)
 		return getErrorText();
 	else
diff --git a/src/bin/pg_upgrade/page.c b/src/bin/pg_upgrade/page.c
deleted file mode 100644
index e5686e5..0000000
--- a/src/bin/pg_upgrade/page.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- *	page.c
- *
- *	per-page conversion operations
- *
- *	Copyright (c) 2010-2016, PostgreSQL Global Development Group
- *	src/bin/pg_upgrade/page.c
- */
-
-#include "postgres_fe.h"
-
-#include "pg_upgrade.h"
-
-#include "storage/bufpage.h"
-
-
-#ifdef PAGE_CONVERSION
-
-
-static void getPageVersion(
-			   uint16 *version, const char *pathName);
-static pageCnvCtx *loadConverterPlugin(
-					uint16 newPageVersion, uint16 oldPageVersion);
-
-
-/*
- * setupPageConverter()
- *
- *	This function determines the PageLayoutVersion of the old cluster and
- *	the PageLayoutVersion of the new cluster.  If the versions differ, this
- *	function loads a converter plugin and returns a pointer to a pageCnvCtx
- *	object (in *result) that knows how to convert pages from the old format
- *	to the new format.  If the versions are identical, this function just
- *	returns a NULL pageCnvCtx pointer to indicate that page-by-page conversion
- *	is not required.
- */
-pageCnvCtx *
-setupPageConverter(void)
-{
-	uint16		oldPageVersion;
-	uint16		newPageVersion;
-	pageCnvCtx *converter;
-	const char *msg;
-	char		dstName[MAXPGPATH];
-	char		srcName[MAXPGPATH];
-
-	snprintf(dstName, sizeof(dstName), "%s/global/%u", new_cluster.pgdata,
-			 new_cluster.pg_database_oid);
-	snprintf(srcName, sizeof(srcName), "%s/global/%u", old_cluster.pgdata,
-			 old_cluster.pg_database_oid);
-
-	getPageVersion(&oldPageVersion, srcName);
-	getPageVersion(&newPageVersion, dstName);
-
-	/*
-	 * If the old cluster and new cluster use the same page layouts, then we
-	 * don't need a page converter.
-	 */
-	if (newPageVersion != oldPageVersion)
-	{
-		/*
-		 * The clusters use differing page layouts, see if we can find a
-		 * plugin that knows how to convert from the old page layout to the
-		 * new page layout.
-		 */
-
-		if ((converter = loadConverterPlugin(newPageVersion, oldPageVersion)) == NULL)
-			pg_fatal("could not find plugin to convert from old page layout to new page layout\n");
-
-		return converter;
-	}
-	else
-		return NULL;
-}
-
-
-/*
- * getPageVersion()
- *
- *	Retrieves the PageLayoutVersion for the given relation.
- *
- *	Returns NULL on success (and stores the PageLayoutVersion at *version),
- *	if an error occurs, this function returns an error message (in the form
- *	of a null-terminated string).
- */
-static void
-getPageVersion(uint16 *version, const char *pathName)
-{
-	int			relfd;
-	PageHeaderData page;
-	ssize_t		bytesRead;
-
-	if ((relfd = open(pathName, O_RDONLY, 0)) < 0)
-		pg_fatal("could not open relation %s\n", pathName);
-
-	if ((bytesRead = read(relfd, &page, sizeof(page))) != sizeof(page))
-		pg_fatal("could not read page header of %s\n", pathName);
-
-	*version = PageGetPageLayoutVersion(&page);
-
-	close(relfd);
-
-	return;
-}
-
-
-/*
- * loadConverterPlugin()
- *
- *	This function loads a page-converter plugin library and grabs a
- *	pointer to each of the (interesting) functions provided by that
- *	plugin.  The name of the plugin library is derived from the given
- *	newPageVersion and oldPageVersion.  If a plugin is found, this
- *	function returns a pointer to a pageCnvCtx object (which will contain
- *	a collection of plugin function pointers). If the required plugin
- *	is not found, this function returns NULL.
- */
-static pageCnvCtx *
-loadConverterPlugin(uint16 newPageVersion, uint16 oldPageVersion)
-{
-	char		pluginName[MAXPGPATH];
-	void	   *plugin;
-
-	/*
-	 * Try to find a plugin that can convert pages of oldPageVersion into
-	 * pages of newPageVersion.  For example, if we oldPageVersion = 3 and
-	 * newPageVersion is 4, we search for a plugin named:
-	 * plugins/convertLayout_3_to_4.dll
-	 */
-
-	/*
-	 * FIXME: we are searching for plugins relative to the current directory,
-	 * we should really search relative to our own executable instead.
-	 */
-	snprintf(pluginName, sizeof(pluginName), "./plugins/convertLayout_%d_to_%d%s",
-			 oldPageVersion, newPageVersion, DLSUFFIX);
-
-	if ((plugin = pg_dlopen(pluginName)) == NULL)
-		return NULL;
-	else
-	{
-		pageCnvCtx *result = (pageCnvCtx *) pg_malloc(sizeof(*result));
-
-		result->old.PageVersion = oldPageVersion;
-		result->new.PageVersion = newPageVersion;
-
-		result->startup = (pluginStartup) pg_dlsym(plugin, "init");
-		result->convertFile = (pluginConvertFile) pg_dlsym(plugin, "convertFile");
-		result->convertPage = (pluginConvertPage) pg_dlsym(plugin, "convertPage");
-		result->shutdown = (pluginShutdown) pg_dlsym(plugin, "fini");
-		result->pluginData = NULL;
-
-		/*
-		 * If the plugin has exported an initializer, go ahead and invoke it.
-		 */
-		if (result->startup)
-			result->startup(MIGRATOR_API_VERSION, &result->pluginVersion,
-						newPageVersion, oldPageVersion, &result->pluginData);
-
-		return result;
-	}
-}
-
-#endif
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 984c395..4f5361a 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -260,8 +260,6 @@ prepare_new_cluster(void)
 			  new_cluster.bindir, cluster_conn_opts(&new_cluster),
 			  log_opts.verbose ? "--verbose" : "");
 	check_ok();
-
-	get_pg_database_relfilenode(&new_cluster);
 }
 
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index bc733c4..900b2a7 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -269,7 +269,6 @@ typedef struct
 	uint32		major_version;	/* PG_VERSION of cluster */
 	char		major_version_str[64];	/* string PG_VERSION of cluster */
 	uint32		bin_version;	/* version returned from pg_ctl */
-	Oid			pg_database_oid;	/* OID of pg_database relation */
 	const char *tablespace_suffix;		/* directory specification */
 } ClusterInfo;
 
@@ -364,40 +363,8 @@ bool		pid_lock_file_exists(const char *datadir);
 
 /* file.c */
 
-#ifdef PAGE_CONVERSION
-typedef const char *(*pluginStartup) (uint16 migratorVersion,
-								uint16 *pluginVersion, uint16 newPageVersion,
-								   uint16 oldPageVersion, void **pluginData);
-typedef const char *(*pluginConvertFile) (void *pluginData,
-								   const char *dstName, const char *srcName);
-typedef const char *(*pluginConvertPage) (void *pluginData,
-								   const char *dstPage, const char *srcPage);
-typedef const char *(*pluginShutdown) (void *pluginData);
-
-typedef struct
-{
-	uint16		oldPageVersion; /* Page layout version of the old cluster		*/
-	uint16		newPageVersion; /* Page layout version of the new cluster		*/
-	uint16		pluginVersion;	/* API version of converter plugin */
-	void	   *pluginData;		/* Plugin data (set by plugin) */
-	pluginStartup startup;		/* Pointer to plugin's startup function */
-	pluginConvertFile convertFile;		/* Pointer to plugin's file converter
-										 * function */
-	pluginConvertPage convertPage;		/* Pointer to plugin's page converter
-										 * function */
-	pluginShutdown shutdown;	/* Pointer to plugin's shutdown function */
-} pageCnvCtx;
-
-const pageCnvCtx *setupPageConverter(void);
-#else
-/* dummy */
-typedef void *pageCnvCtx;
-#endif
-
-const char *copyAndUpdateFile(pageCnvCtx *pageConverter, const char *src,
-				  const char *dst, bool force);
-const char *linkAndUpdateFile(pageCnvCtx *pageConverter, const char *src,
-				  const char *dst);
+const char *copyAndUpdateFile(const char *src, const char *dst, bool force);
+const char *linkAndUpdateFile(const char *src, const char *dst);
 
 void		check_hard_link(void);
 FILE	   *fopen_priv(const char *path, const char *mode);
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index c059c5b..fcaad79 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -15,10 +15,8 @@
 #include "access/transam.h"
 
 
-static void transfer_single_new_db(pageCnvCtx *pageConverter,
-					   FileNameMap *maps, int size, char *old_tablespace);
-static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
-				 const char *suffix);
+static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
+static void transfer_relfile(FileNameMap *map, const char *suffix);
 
 
 /*
@@ -92,7 +90,6 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
 				   *new_db = NULL;
 		FileNameMap *mappings;
 		int			n_maps;
-		pageCnvCtx *pageConverter = NULL;
 
 		/*
 		 * Advance past any databases that exist in the new cluster but not in
@@ -116,11 +113,7 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
 		{
 			print_maps(mappings, n_maps, new_db->db_name);
 
-#ifdef PAGE_CONVERSION
-			pageConverter = setupPageConverter();
-#endif
-			transfer_single_new_db(pageConverter, mappings, n_maps,
-								   old_tablespace);
+			transfer_single_new_db(mappings, n_maps, old_tablespace);
 		}
 		/* We allocate something even for n_maps == 0 */
 		pg_free(mappings);
@@ -129,45 +122,13 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
 	return;
 }
 
-
-/*
- * get_pg_database_relfilenode()
- *
- *	Retrieves the relfilenode for a few system-catalog tables.  We need these
- *	relfilenodes later in the upgrade process.
- */
-void
-get_pg_database_relfilenode(ClusterInfo *cluster)
-{
-	PGconn	   *conn = connectToServer(cluster, "template1");
-	PGresult   *res;
-	int			i_relfile;
-
-	res = executeQueryOrDie(conn,
-							"SELECT c.relname, c.relfilenode "
-							"FROM	pg_catalog.pg_class c, "
-							"		pg_catalog.pg_namespace n "
-							"WHERE	c.relnamespace = n.oid AND "
-							"		n.nspname = 'pg_catalog' AND "
-							"		c.relname = 'pg_database' "
-							"ORDER BY c.relname");
-
-	i_relfile = PQfnumber(res, "relfilenode");
-	cluster->pg_database_oid = atooid(PQgetvalue(res, 0, i_relfile));
-
-	PQclear(res);
-	PQfinish(conn);
-}
-
-
 /*
  * transfer_single_new_db()
  *
  * create links for mappings stored in "maps" array.
  */
 static void
-transfer_single_new_db(pageCnvCtx *pageConverter,
-					   FileNameMap *maps, int size, char *old_tablespace)
+transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
 {
 	int			mapnum;
 	bool		vm_crashsafe_match = true;
@@ -186,7 +147,7 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
 			strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
 		{
 			/* transfer primary file */
-			transfer_relfile(pageConverter, &maps[mapnum], "");
+			transfer_relfile(&maps[mapnum], "");
 
 			/* fsm/vm files added in PG 8.4 */
 			if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
@@ -194,9 +155,9 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
 				/*
 				 * Copy/link any fsm and vm files, if they exist
 				 */
-				transfer_relfile(pageConverter, &maps[mapnum], "_fsm");
+				transfer_relfile(&maps[mapnum], "_fsm");
 				if (vm_crashsafe_match)
-					transfer_relfile(pageConverter, &maps[mapnum], "_vm");
+					transfer_relfile(&maps[mapnum], "_vm");
 			}
 		}
 	}
@@ -209,8 +170,7 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
  * Copy or link file from old cluster to new one.
  */
 static void
-transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
-				 const char *type_suffix)
+transfer_relfile(FileNameMap *map, const char *type_suffix)
 {
 	const char *msg;
 	char		old_file[MAXPGPATH];
@@ -268,15 +228,11 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
 		/* Copying files might take some time, so give feedback. */
 		pg_log(PG_STATUS, "%s", old_file);
 
-		if ((user_opts.transfer_mode == TRANSFER_MODE_LINK) && (pageConverter != NULL))
-			pg_fatal("This upgrade requires page-by-page conversion, "
-					 "you must use copy mode instead of link mode.\n");
-
 		if (user_opts.transfer_mode == TRANSFER_MODE_COPY)
 		{
 			pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n", old_file, new_file);
 
-			if ((msg = copyAndUpdateFile(pageConverter, old_file, new_file, true)) != NULL)
+			if ((msg = copyAndUpdateFile(old_file, new_file, true)) != NULL)
 				pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
 						 map->nspname, map->relname, old_file, new_file, msg);
 		}
@@ -284,7 +240,7 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
 		{
 			pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n", old_file, new_file);
 
-			if ((msg = linkAndUpdateFile(pageConverter, old_file, new_file)) != NULL)
+			if ((msg = linkAndUpdateFile(old_file, new_file)) != NULL)
 				pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
 						 map->nspname, map->relname, old_file, new_file, msg);
 		}
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 115d506..9adee01 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -9,7 +9,11 @@
 
 #include "postgres_fe.h"
 
+#include "access/visibilitymap.h"
 #include "pg_upgrade.h"
+#include "storage/bufpage.h"
+#include "storage/checksum.h"
+#include "storage/checksum_impl.h"
 
 #include <fcntl.h>
 
@@ -21,6 +25,25 @@ static int	copy_file(const char *fromfile, const char *tofile, bool force);
 static int	win32_pghardlink(const char *src, const char *dst);
 #endif
 
+/* table for fast rewriting vm file in order to add all-frozen information */
+static const uint16 rewrite_vm_table[256] = {
+	0,     1,     4,     5,     16,    17,    20,    21,    64,    65,    68,    69,    80,    81,    84,    85,
+	256,   257,   260,   261,   272,   273,   276,   277,   320,   321,   324,   325,   336,   337,   340,   341,
+	1024,  1025,  1028,  1029,  1040,  1041,  1044,  1045,  1088,  1089,  1092,  1093,  1104,  1105,  1108,  1109,
+	1280,  1281,  1284,  1285,  1296,  1297,  1300,  1301,  1344,  1345,  1348,  1349,  1360,  1361,  1364,  1365,
+	4096,  4097,  4100,  4101,  4112,  4113,  4116,  4117,  4160,  4161,  4164,  4165,  4176,  4177,  4180,  4181,
+	4352,  4353,  4356,  4357,  4368,  4369,  4372,  4373,  4416,  4417,  4420,  4421,  4432,  4433,  4436,  4437,
+	5120,  5121,  5124,  5125,  5136,  5137,  5140,  5141,  5184,  5185,  5188,  5189,  5200,  5201,  5204,  5205,
+	5376,  5377,  5380,  5381,  5392,  5393,  5396,  5397,  5440,  5441,  5444,  5445,  5456,  5457,  5460,  5461,
+	16384, 16385, 16388, 16389, 16400, 16401, 16404, 16405, 16448, 16449, 16452, 16453, 16464, 16465, 16468, 16469,
+	16640, 16641, 16644, 16645, 16656, 16657, 16660, 16661, 16704, 16705, 16708, 16709, 16720, 16721, 16724, 16725,
+	17408, 17409, 17412, 17413, 17424, 17425, 17428, 17429, 17472, 17473, 17476, 17477, 17488, 17489, 17492, 17493,
+	17664, 17665, 17668, 17669, 17680, 17681, 17684, 17685, 17728, 17729, 17732, 17733, 17744, 17745, 17748, 17749,
+	20480, 20481, 20484, 20485, 20496, 20497, 20500, 20501, 20544, 20545, 20548, 20549, 20560, 20561, 20564, 20565,
+	20736, 20737, 20740, 20741, 20752, 20753, 20756, 20757, 20800, 20801, 20804, 20805, 20816, 20817, 20820, 20821,
+	21504, 21505, 21508, 21509, 21520, 21521, 21524, 21525, 21568, 21569, 21572, 21573, 21584, 21585, 21588, 21589,
+	21760, 21761, 21764, 21765, 21776, 21777, 21780, 21781, 21824, 21825, 21828, 21829, 21840, 21841, 21844, 21845
+};
 
 /*
  * copyAndUpdateFile()
@@ -138,6 +161,95 @@ copy_file(const char *srcfile, const char *dstfile, bool force)
 #endif
 
 
+/*
+ * rewriteVisibilityMap()
+ *
+ * Copies a visibility map file while adding all-frozen bit(0) into each bit.
+ */
+const char *
+rewriteVisibilityMap(const char *fromfile, const char *tofile, bool force)
+{
+	int			src_fd = 0;
+	int			dst_fd = 0;
+	char		buffer[BLCKSZ];
+	ssize_t 	bytesRead;
+	int			rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
+	BlockNumber	blkno = 0;
+
+	/* Reset errno */
+	errno = 0;
+
+	if ((fromfile == NULL) || (tofile == NULL))
+		return getErrorText();
+
+	if ((src_fd = open(fromfile, O_RDONLY, 0)) < 0)
+		goto err;
+
+	if ((dst_fd = open(tofile, O_RDWR | O_CREAT | (force ? 0 : O_EXCL), S_IRUSR | S_IWUSR)) < 0)
+		goto err;
+
+	/* Perform data rewriting per page */
+	while ((bytesRead = read(src_fd, buffer, BLCKSZ)) == BLCKSZ)
+	{
+		char	*cur, *end, *blkend;
+		PageHeaderData	pageheader;
+		uint16	vm_bits;
+
+		/* Save the page header data */
+		memcpy(&pageheader, buffer, SizeOfPageHeaderData);
+
+		cur = buffer + SizeOfPageHeaderData;
+		end = buffer + SizeOfPageHeaderData + rewriteVmBytesPerPage;
+		blkend = buffer + bytesRead;
+
+		while (blkend >= end)
+		{
+			char	vmbuf[BLCKSZ];
+			char	*vmtmp = vmbuf;
+
+			/* Copy page header in advance */
+			memcpy(vmbuf, &pageheader, SizeOfPageHeaderData);
+
+			vmtmp += SizeOfPageHeaderData;
+
+			/* Rewrite visibility map bit one by one */
+			while (end > cur)
+			{
+				/* Write rewritten bit from table and its string representation */
+				vm_bits = rewrite_vm_table[(uint8) *cur];
+				memcpy(vmtmp, &vm_bits, BITS_PER_HEAPBLOCK);
+
+				cur++;
+				vmtmp += BITS_PER_HEAPBLOCK;
+			}
+
+			/* Set new checksum for a visibility map page, If enabled */
+			if (old_cluster.controldata.data_checksum_version != 0 &&
+				new_cluster.controldata.data_checksum_version != 0)
+				((PageHeader) vmbuf)->pd_checksum = pg_checksum_page(vmbuf, blkno);
+
+			if (write(dst_fd, vmbuf, BLCKSZ) != BLCKSZ)
+			{
+					if (errno == 0)
+						errno = ENOSPC;
+					goto err;
+			}
+
+			end += rewriteVmBytesPerPage;
+			blkno++;
+		}
+	}
+
+err:
+	if (src_fd != 0)
+		close(src_fd);
+
+	if (dst_fd != 0)
+		close(dst_fd);
+
+	return (errno == 0) ? NULL : getErrorText();
+}
+
 void
 check_hard_link(void)
 {
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 900b2a7..ecd9ab3 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -110,6 +110,10 @@ extern char *output_files[];
 #define VISIBILITY_MAP_CRASHSAFE_CAT_VER 201107031
 
 /*
+ * The format of visibility map is changed with this 9.6 commit,
+ */
+#define VISIBILITY_MAP_FROZEN_BIT_CAT_VER 201602181
+/*
  * pg_multixact format changed in 9.3 commit 0ac5ad5134f2769ccbaefec73844f85,
  * ("Improve concurrency of foreign key locking") which also updated catalog
  * version to this value.  pg_upgrade behavior depends on whether old and new
@@ -269,6 +273,7 @@ typedef struct
 	uint32		major_version;	/* PG_VERSION of cluster */
 	char		major_version_str[64];	/* string PG_VERSION of cluster */
 	uint32		bin_version;	/* version returned from pg_ctl */
+	Oid			pg_database_oid;	/* OID of pg_database relation */
 	const char *tablespace_suffix;		/* directory specification */
 } ClusterInfo;
 
@@ -365,6 +370,8 @@ bool		pid_lock_file_exists(const char *datadir);
 
 const char *copyAndUpdateFile(const char *src, const char *dst, bool force);
 const char *linkAndUpdateFile(const char *src, const char *dst);
+const char *rewriteVisibilityMap(const char *fromfile, const char *tofile,
+										bool force);
 
 void		check_hard_link(void);
 FILE	   *fopen_priv(const char *path, const char *mode);
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index fcaad79..ee88c15 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -16,7 +16,7 @@
 
 
 static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
-static void transfer_relfile(FileNameMap *map, const char *suffix);
+static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_need_rewrite);
 
 
 /*
@@ -132,6 +132,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
 {
 	int			mapnum;
 	bool		vm_crashsafe_match = true;
+	bool		vm_need_rewrite = false;
 
 	/*
 	 * Do the old and new cluster disagree on the crash-safetiness of the vm
@@ -141,13 +142,20 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
 		new_cluster.controldata.cat_ver >= VISIBILITY_MAP_CRASHSAFE_CAT_VER)
 		vm_crashsafe_match = false;
 
+	/*
+	 * Do we need to rewrite visibilitymap?
+	 */
+	if (old_cluster.controldata.cat_ver < VISIBILITY_MAP_FROZEN_BIT_CAT_VER &&
+		new_cluster.controldata.cat_ver >= VISIBILITY_MAP_FROZEN_BIT_CAT_VER)
+		vm_need_rewrite = true;
+
 	for (mapnum = 0; mapnum < size; mapnum++)
 	{
 		if (old_tablespace == NULL ||
 			strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
 		{
 			/* transfer primary file */
-			transfer_relfile(&maps[mapnum], "");
+			transfer_relfile(&maps[mapnum], "", vm_need_rewrite);
 
 			/* fsm/vm files added in PG 8.4 */
 			if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
@@ -155,9 +163,9 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
 				/*
 				 * Copy/link any fsm and vm files, if they exist
 				 */
-				transfer_relfile(&maps[mapnum], "_fsm");
+				transfer_relfile(&maps[mapnum], "_fsm",  vm_need_rewrite);
 				if (vm_crashsafe_match)
-					transfer_relfile(&maps[mapnum], "_vm");
+					transfer_relfile(&maps[mapnum], "_vm",  vm_need_rewrite);
 			}
 		}
 	}
@@ -168,9 +176,11 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
  * transfer_relfile()
  *
  * Copy or link file from old cluster to new one.
+ * if vm_need_rewrite is true, visibility map is rewritten to be added frozen bit
+ * even link mode.
  */
 static void
-transfer_relfile(FileNameMap *map, const char *type_suffix)
+transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_need_rewrite)
 {
 	const char *msg;
 	char		old_file[MAXPGPATH];
@@ -232,7 +242,13 @@ transfer_relfile(FileNameMap *map, const char *type_suffix)
 		{
 			pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n", old_file, new_file);
 
-			if ((msg = copyAndUpdateFile(old_file, new_file, true)) != NULL)
+			/* Rewrite visibility map */
+			if (vm_need_rewrite && (strcmp(type_suffix, "_vm") == 0))
+				msg = rewriteVisibilityMap(old_file, new_file, true);
+			else
+				msg = copyAndUpdateFile(old_file, new_file, true);
+
+			if (msg)
 				pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
 						 map->nspname, map->relname, old_file, new_file, msg);
 		}
@@ -240,7 +256,13 @@ transfer_relfile(FileNameMap *map, const char *type_suffix)
 		{
 			pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n", old_file, new_file);
 
-			if ((msg = linkAndUpdateFile(old_file, new_file)) != NULL)
+			/* Rewrite visibility map even link mode */
+			if (vm_need_rewrite && (strcmp(type_suffix, "_vm") == 0))
+				msg = rewriteVisibilityMap(old_file, new_file, true);
+			else
+				msg = linkAndUpdateFile(old_file, new_file);
+
+			if (msg)
 				pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
 						 map->nspname, map->relname, old_file, new_file, msg);
 		}
diff --git a/src/bin/pg_upgrade/test.sh b/src/bin/pg_upgrade/test.sh
index ba79fb3..cd9b17e 100644
--- a/src/bin/pg_upgrade/test.sh
+++ b/src/bin/pg_upgrade/test.sh
@@ -174,6 +174,11 @@ if "$MAKE" -C "$oldsrc" installcheck; then
 		mv "$temp_root"/dump1.sql "$temp_root"/dump1.sql.orig
 		sed "s;$oldsrc;$newsrc;g" "$temp_root"/dump1.sql.orig >"$temp_root"/dump1.sql
 	fi
+
+	vm_sql="SELECT c.relname, c.relallvisible FROM pg_class as c, pg_namespace as n WHERE c.relnamespace = n.oid AND n.nspname NOT IN ('information_schema', 'pg_toast', 'pg_catalog') ORDER BY c.relname;"
+	# Test for rewriting visibility map
+	vacuumdb -d regression || visibilitymap_vacuum1_status=$?
+	psql -d regression -c "$vm_sql" > "$temp_root"/vm_test1.txt || visibilitymap_test1_status=$?
 else
 	make_installcheck_status=$?
 fi
@@ -188,6 +193,14 @@ if [ -n "$pg_dumpall1_status" ]; then
 	echo "pg_dumpall of pre-upgrade database cluster failed"
 	exit 1
 fi
+if [ -n "$visibilitymap_vacuum1_status" ];then
+	echo "VACUUM of pre-upgrade database cluster for visibility map test failed"
+	exit 1
+fi
+if [ -n "$visibilitymap_test1_status" ];then
+	echo "SELECT of pre-upgrade database cluster for visibility map test failed"
+	exit 1
+fi
 
 PGDATA=$BASE_PGDATA
 
@@ -203,6 +216,8 @@ case $testhost in
 esac
 
 pg_dumpall -f "$temp_root"/dump2.sql || pg_dumpall2_status=$?
+vacuumdb -d regression || visibilitymap_vacuum2_status=$?
+psql -d regression -c "$vm_sql" > "$temp_root"/vm_test2.txt || visibilitymap_test2_status=$?
 pg_ctl -m fast stop
 
 # no need to echo commands anymore
@@ -214,11 +229,26 @@ if [ -n "$pg_dumpall2_status" ]; then
 	exit 1
 fi
 
+if [ -n "$visibilitymap_vacuum2_status" ];then
+	echo "VACUUM of post-upgrade database cluster for visibility map test failed"
+	exit 1
+fi
+
+if [ -n "$visibilitymap_test2_status" ];then
+	echo "SELECT of post-upgrade database cluster for visibility map test failed"
+	exit 1
+fi
+
 case $testhost in
 	MINGW*)	cmd /c delete_old_cluster.bat ;;
 	*)	    sh ./delete_old_cluster.sh ;;
 esac
 
+if ! diff "$temp_root"/vm_test1.txt "$temp_root"/vm_test2.txt >/dev/null; then
+	echo "Visibility map rewriting test failed"
+	exit 1
+fi
+
 if diff "$temp_root"/dump1.sql "$temp_root"/dump2.sql >/dev/null; then
 	echo PASSED
 	exit 0
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to