On Tue, Mar 8, 2016 at 1:20 PM, Kyotaro HORIGUCHI
<[email protected]> wrote:
> Hello, thank you for updating this tool.
>
> At Mon, 7 Mar 2016 14:03:08 -0500, Robert Haas <[email protected]> wrote
> in <CA+Tgmob+NjfYE3b3BHBmAC=3tvtbqszgz1roj63yramrgrq...@mail.gmail.com>
>> On Mon, Mar 7, 2016 at 12:41 PM, Masahiko Sawada <[email protected]>
>> wrote:
>> > Attached latest version optimisation patch.
>> > I'm still consider regarding pg_upgrade regression test code, so I
>> > will submit that patch later.
>>
> I was thinking more about this today and I think that we don't
> actually need the PD_ALL_FROZEN page-level bit for anything. It's
> enough that the bit is present in the visibility map. The only point
> of PD_ALL_VISIBLE is that it tells us that we need to clear the
> visibility map bit, but that bit is enough to tell us to clear both
> visibility map bits. So I propose the attached cleanup patch.
Thank you for updating tool and proposing it.
I agree with you, and the patch you attached looks good to me except
for Horiguchi-san's comment.
Regarding pg_visibility module, I'd like to share some bugs and
propose to add a relation type condition to each functions.
Including it, I've attached remaining 2 patches; one is removing page
conversion code from pg_upgarde, and another is supporting pg_upgrade
for frozen bit.
Please have a look at them.
Regards,
--
Masahiko Sawada
diff --git a/contrib/pg_visibility/pg_visibility--1.0.sql b/contrib/pg_visibility/pg_visibility--1.0.sql
index 9616e1f..da511e5 100644
--- a/contrib/pg_visibility/pg_visibility--1.0.sql
+++ b/contrib/pg_visibility/pg_visibility--1.0.sql
@@ -12,7 +12,7 @@ AS 'MODULE_PATHNAME', 'pg_visibility_map'
LANGUAGE C STRICT;
-- Show visibility map and page-level visibility information.
-CREATE FUNCTION pg_visibility(regclass, blkno, bigint,
+CREATE FUNCTION pg_visibility(regclass, blkno bigint,
all_visible OUT boolean,
all_frozen OUT boolean,
pd_all_visible OUT boolean)
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index d4336ce..2993bcb 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -14,6 +14,7 @@
#include "funcapi.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
+#include "utils/rel.h"
PG_MODULE_MAGIC;
@@ -55,6 +56,14 @@ pg_visibility_map(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid block number")));
+ /* Check for relation type */
+ if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_MATVIEW))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a table or materialized view",
+ RelationGetRelationName(rel))));
+
tupdesc = pg_visibility_tupdesc(false, false);
MemSet(nulls, 0, sizeof(nulls));
@@ -94,6 +103,14 @@ pg_visibility(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid block number")));
+ /* Check for relation type */
+ if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_MATVIEW))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a table or materialized view",
+ RelationGetRelationName(rel))));
+
tupdesc = pg_visibility_tupdesc(false, true);
MemSet(nulls, 0, sizeof(nulls));
@@ -147,9 +164,10 @@ pg_visibility_map_rel(PG_FUNCTION_ARGS)
HeapTuple tuple;
MemSet(nulls, 0, sizeof(nulls));
- values[0] = Int64GetDatum(info->next++);
+ values[0] = Int64GetDatum(info->next);
values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
+ info->next++;
tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
@@ -190,10 +208,11 @@ pg_visibility_rel(PG_FUNCTION_ARGS)
HeapTuple tuple;
MemSet(nulls, 0, sizeof(nulls));
- values[0] = Int64GetDatum(info->next++);
+ values[0] = Int64GetDatum(info->next);
values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0);
+ info->next++;
tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
@@ -223,6 +242,14 @@ pg_visibility_map_summary(PG_FUNCTION_ARGS)
rel = relation_open(relid, AccessShareLock);
nblocks = RelationGetNumberOfBlocks(rel);
+ /* Check for relation type */
+ if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_MATVIEW))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a table or materialized view",
+ RelationGetRelationName(rel))));
+
for (blkno = 0; blkno < nblocks; ++blkno)
{
int32 mapbits;
@@ -296,6 +323,15 @@ collect_visibility_data(Oid relid, bool include_pd)
BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
rel = relation_open(relid, AccessShareLock);
+
+ /* Check for relation type */
+ if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_MATVIEW))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a table or materialized view",
+ RelationGetRelationName(rel))));
+
nblocks = RelationGetNumberOfBlocks(rel);
info = palloc0(offsetof(vbits, bits) + nblocks);
info->next = 0;
commit 8ab96722459fc929d5b2d447ffda18fe1107abc0
Author: Masahiko Sawada <[email protected]>
Date: Wed Mar 2 11:09:41 2016 -0400
Initial.
diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile
index d9c8145..0c882d9 100644
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -8,7 +8,7 @@ top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS = check.o controldata.o dump.o exec.o file.o function.o info.o \
- option.o page.o parallel.o pg_upgrade.o relfilenode.o server.o \
+ option.o parallel.o pg_upgrade.o relfilenode.o server.o \
tablespace.o util.o version.o $(WIN32RES)
override CPPFLAGS := -DDLSUFFIX=\"$(DLSUFFIX)\" -I$(srcdir) -I$(libpq_srcdir) $(CPPFLAGS)
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index e0cb675..f932094 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -80,8 +80,6 @@ check_and_dump_old_cluster(bool live_check)
if (!live_check)
start_postmaster(&old_cluster, true);
- get_pg_database_relfilenode(&old_cluster);
-
/* Extract a list of databases and tables from the old cluster */
get_db_and_rel_infos(&old_cluster);
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 9357ad8..115d506 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -25,15 +25,11 @@ static int win32_pghardlink(const char *src, const char *dst);
/*
* copyAndUpdateFile()
*
- * Copies a relation file from src to dst. If pageConverter is non-NULL, this function
- * uses that pageConverter to do a page-by-page conversion.
+ * Copies a relation file from src to dst.
*/
const char *
-copyAndUpdateFile(pageCnvCtx *pageConverter,
- const char *src, const char *dst, bool force)
+copyAndUpdateFile(const char *src, const char *dst, bool force)
{
- if (pageConverter == NULL)
- {
#ifndef WIN32
if (copy_file(src, dst, force) == -1)
#else
@@ -42,65 +38,6 @@ copyAndUpdateFile(pageCnvCtx *pageConverter,
return getErrorText();
else
return NULL;
- }
- else
- {
- /*
- * We have a pageConverter object - that implies that the
- * PageLayoutVersion differs between the two clusters so we have to
- * perform a page-by-page conversion.
- *
- * If the pageConverter can convert the entire file at once, invoke
- * that plugin function, otherwise, read each page in the relation
- * file and call the convertPage plugin function.
- */
-
-#ifdef PAGE_CONVERSION
- if (pageConverter->convertFile)
- return pageConverter->convertFile(pageConverter->pluginData,
- dst, src);
- else
-#endif
- {
- int src_fd;
- int dstfd;
- char buf[BLCKSZ];
- ssize_t bytesRead;
- const char *msg = NULL;
-
- if ((src_fd = open(src, O_RDONLY, 0)) < 0)
- return "could not open source file";
-
- if ((dstfd = open(dst, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0)
- {
- close(src_fd);
- return "could not create destination file";
- }
-
- while ((bytesRead = read(src_fd, buf, BLCKSZ)) == BLCKSZ)
- {
-#ifdef PAGE_CONVERSION
- if ((msg = pageConverter->convertPage(pageConverter->pluginData, buf, buf)) != NULL)
- break;
-#endif
- if (write(dstfd, buf, BLCKSZ) != BLCKSZ)
- {
- msg = "could not write new page to destination";
- break;
- }
- }
-
- close(src_fd);
- close(dstfd);
-
- if (msg)
- return msg;
- else if (bytesRead != 0)
- return "found partial page in source file";
- else
- return NULL;
- }
- }
}
@@ -114,12 +51,8 @@ copyAndUpdateFile(pageCnvCtx *pageConverter,
* instead of copying the data from the old cluster to the new cluster.
*/
const char *
-linkAndUpdateFile(pageCnvCtx *pageConverter,
- const char *src, const char *dst)
+linkAndUpdateFile(const char *src, const char *dst)
{
- if (pageConverter != NULL)
- return "Cannot in-place update this cluster, page-by-page conversion is required";
-
if (pg_link_file(src, dst) == -1)
return getErrorText();
else
diff --git a/src/bin/pg_upgrade/page.c b/src/bin/pg_upgrade/page.c
deleted file mode 100644
index e5686e5..0000000
--- a/src/bin/pg_upgrade/page.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * page.c
- *
- * per-page conversion operations
- *
- * Copyright (c) 2010-2016, PostgreSQL Global Development Group
- * src/bin/pg_upgrade/page.c
- */
-
-#include "postgres_fe.h"
-
-#include "pg_upgrade.h"
-
-#include "storage/bufpage.h"
-
-
-#ifdef PAGE_CONVERSION
-
-
-static void getPageVersion(
- uint16 *version, const char *pathName);
-static pageCnvCtx *loadConverterPlugin(
- uint16 newPageVersion, uint16 oldPageVersion);
-
-
-/*
- * setupPageConverter()
- *
- * This function determines the PageLayoutVersion of the old cluster and
- * the PageLayoutVersion of the new cluster. If the versions differ, this
- * function loads a converter plugin and returns a pointer to a pageCnvCtx
- * object (in *result) that knows how to convert pages from the old format
- * to the new format. If the versions are identical, this function just
- * returns a NULL pageCnvCtx pointer to indicate that page-by-page conversion
- * is not required.
- */
-pageCnvCtx *
-setupPageConverter(void)
-{
- uint16 oldPageVersion;
- uint16 newPageVersion;
- pageCnvCtx *converter;
- const char *msg;
- char dstName[MAXPGPATH];
- char srcName[MAXPGPATH];
-
- snprintf(dstName, sizeof(dstName), "%s/global/%u", new_cluster.pgdata,
- new_cluster.pg_database_oid);
- snprintf(srcName, sizeof(srcName), "%s/global/%u", old_cluster.pgdata,
- old_cluster.pg_database_oid);
-
- getPageVersion(&oldPageVersion, srcName);
- getPageVersion(&newPageVersion, dstName);
-
- /*
- * If the old cluster and new cluster use the same page layouts, then we
- * don't need a page converter.
- */
- if (newPageVersion != oldPageVersion)
- {
- /*
- * The clusters use differing page layouts, see if we can find a
- * plugin that knows how to convert from the old page layout to the
- * new page layout.
- */
-
- if ((converter = loadConverterPlugin(newPageVersion, oldPageVersion)) == NULL)
- pg_fatal("could not find plugin to convert from old page layout to new page layout\n");
-
- return converter;
- }
- else
- return NULL;
-}
-
-
-/*
- * getPageVersion()
- *
- * Retrieves the PageLayoutVersion for the given relation.
- *
- * Returns NULL on success (and stores the PageLayoutVersion at *version),
- * if an error occurs, this function returns an error message (in the form
- * of a null-terminated string).
- */
-static void
-getPageVersion(uint16 *version, const char *pathName)
-{
- int relfd;
- PageHeaderData page;
- ssize_t bytesRead;
-
- if ((relfd = open(pathName, O_RDONLY, 0)) < 0)
- pg_fatal("could not open relation %s\n", pathName);
-
- if ((bytesRead = read(relfd, &page, sizeof(page))) != sizeof(page))
- pg_fatal("could not read page header of %s\n", pathName);
-
- *version = PageGetPageLayoutVersion(&page);
-
- close(relfd);
-
- return;
-}
-
-
-/*
- * loadConverterPlugin()
- *
- * This function loads a page-converter plugin library and grabs a
- * pointer to each of the (interesting) functions provided by that
- * plugin. The name of the plugin library is derived from the given
- * newPageVersion and oldPageVersion. If a plugin is found, this
- * function returns a pointer to a pageCnvCtx object (which will contain
- * a collection of plugin function pointers). If the required plugin
- * is not found, this function returns NULL.
- */
-static pageCnvCtx *
-loadConverterPlugin(uint16 newPageVersion, uint16 oldPageVersion)
-{
- char pluginName[MAXPGPATH];
- void *plugin;
-
- /*
- * Try to find a plugin that can convert pages of oldPageVersion into
- * pages of newPageVersion. For example, if we oldPageVersion = 3 and
- * newPageVersion is 4, we search for a plugin named:
- * plugins/convertLayout_3_to_4.dll
- */
-
- /*
- * FIXME: we are searching for plugins relative to the current directory,
- * we should really search relative to our own executable instead.
- */
- snprintf(pluginName, sizeof(pluginName), "./plugins/convertLayout_%d_to_%d%s",
- oldPageVersion, newPageVersion, DLSUFFIX);
-
- if ((plugin = pg_dlopen(pluginName)) == NULL)
- return NULL;
- else
- {
- pageCnvCtx *result = (pageCnvCtx *) pg_malloc(sizeof(*result));
-
- result->old.PageVersion = oldPageVersion;
- result->new.PageVersion = newPageVersion;
-
- result->startup = (pluginStartup) pg_dlsym(plugin, "init");
- result->convertFile = (pluginConvertFile) pg_dlsym(plugin, "convertFile");
- result->convertPage = (pluginConvertPage) pg_dlsym(plugin, "convertPage");
- result->shutdown = (pluginShutdown) pg_dlsym(plugin, "fini");
- result->pluginData = NULL;
-
- /*
- * If the plugin has exported an initializer, go ahead and invoke it.
- */
- if (result->startup)
- result->startup(MIGRATOR_API_VERSION, &result->pluginVersion,
- newPageVersion, oldPageVersion, &result->pluginData);
-
- return result;
- }
-}
-
-#endif
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 984c395..4f5361a 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -260,8 +260,6 @@ prepare_new_cluster(void)
new_cluster.bindir, cluster_conn_opts(&new_cluster),
log_opts.verbose ? "--verbose" : "");
check_ok();
-
- get_pg_database_relfilenode(&new_cluster);
}
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index bc733c4..900b2a7 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -269,7 +269,6 @@ typedef struct
uint32 major_version; /* PG_VERSION of cluster */
char major_version_str[64]; /* string PG_VERSION of cluster */
uint32 bin_version; /* version returned from pg_ctl */
- Oid pg_database_oid; /* OID of pg_database relation */
const char *tablespace_suffix; /* directory specification */
} ClusterInfo;
@@ -364,40 +363,8 @@ bool pid_lock_file_exists(const char *datadir);
/* file.c */
-#ifdef PAGE_CONVERSION
-typedef const char *(*pluginStartup) (uint16 migratorVersion,
- uint16 *pluginVersion, uint16 newPageVersion,
- uint16 oldPageVersion, void **pluginData);
-typedef const char *(*pluginConvertFile) (void *pluginData,
- const char *dstName, const char *srcName);
-typedef const char *(*pluginConvertPage) (void *pluginData,
- const char *dstPage, const char *srcPage);
-typedef const char *(*pluginShutdown) (void *pluginData);
-
-typedef struct
-{
- uint16 oldPageVersion; /* Page layout version of the old cluster */
- uint16 newPageVersion; /* Page layout version of the new cluster */
- uint16 pluginVersion; /* API version of converter plugin */
- void *pluginData; /* Plugin data (set by plugin) */
- pluginStartup startup; /* Pointer to plugin's startup function */
- pluginConvertFile convertFile; /* Pointer to plugin's file converter
- * function */
- pluginConvertPage convertPage; /* Pointer to plugin's page converter
- * function */
- pluginShutdown shutdown; /* Pointer to plugin's shutdown function */
-} pageCnvCtx;
-
-const pageCnvCtx *setupPageConverter(void);
-#else
-/* dummy */
-typedef void *pageCnvCtx;
-#endif
-
-const char *copyAndUpdateFile(pageCnvCtx *pageConverter, const char *src,
- const char *dst, bool force);
-const char *linkAndUpdateFile(pageCnvCtx *pageConverter, const char *src,
- const char *dst);
+const char *copyAndUpdateFile(const char *src, const char *dst, bool force);
+const char *linkAndUpdateFile(const char *src, const char *dst);
void check_hard_link(void);
FILE *fopen_priv(const char *path, const char *mode);
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index c059c5b..fcaad79 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -15,10 +15,8 @@
#include "access/transam.h"
-static void transfer_single_new_db(pageCnvCtx *pageConverter,
- FileNameMap *maps, int size, char *old_tablespace);
-static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
- const char *suffix);
+static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
+static void transfer_relfile(FileNameMap *map, const char *suffix);
/*
@@ -92,7 +90,6 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
*new_db = NULL;
FileNameMap *mappings;
int n_maps;
- pageCnvCtx *pageConverter = NULL;
/*
* Advance past any databases that exist in the new cluster but not in
@@ -116,11 +113,7 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
{
print_maps(mappings, n_maps, new_db->db_name);
-#ifdef PAGE_CONVERSION
- pageConverter = setupPageConverter();
-#endif
- transfer_single_new_db(pageConverter, mappings, n_maps,
- old_tablespace);
+ transfer_single_new_db(mappings, n_maps, old_tablespace);
}
/* We allocate something even for n_maps == 0 */
pg_free(mappings);
@@ -129,45 +122,13 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
return;
}
-
-/*
- * get_pg_database_relfilenode()
- *
- * Retrieves the relfilenode for a few system-catalog tables. We need these
- * relfilenodes later in the upgrade process.
- */
-void
-get_pg_database_relfilenode(ClusterInfo *cluster)
-{
- PGconn *conn = connectToServer(cluster, "template1");
- PGresult *res;
- int i_relfile;
-
- res = executeQueryOrDie(conn,
- "SELECT c.relname, c.relfilenode "
- "FROM pg_catalog.pg_class c, "
- " pg_catalog.pg_namespace n "
- "WHERE c.relnamespace = n.oid AND "
- " n.nspname = 'pg_catalog' AND "
- " c.relname = 'pg_database' "
- "ORDER BY c.relname");
-
- i_relfile = PQfnumber(res, "relfilenode");
- cluster->pg_database_oid = atooid(PQgetvalue(res, 0, i_relfile));
-
- PQclear(res);
- PQfinish(conn);
-}
-
-
/*
* transfer_single_new_db()
*
* create links for mappings stored in "maps" array.
*/
static void
-transfer_single_new_db(pageCnvCtx *pageConverter,
- FileNameMap *maps, int size, char *old_tablespace)
+transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
{
int mapnum;
bool vm_crashsafe_match = true;
@@ -186,7 +147,7 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
{
/* transfer primary file */
- transfer_relfile(pageConverter, &maps[mapnum], "");
+ transfer_relfile(&maps[mapnum], "");
/* fsm/vm files added in PG 8.4 */
if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
@@ -194,9 +155,9 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
/*
* Copy/link any fsm and vm files, if they exist
*/
- transfer_relfile(pageConverter, &maps[mapnum], "_fsm");
+ transfer_relfile(&maps[mapnum], "_fsm");
if (vm_crashsafe_match)
- transfer_relfile(pageConverter, &maps[mapnum], "_vm");
+ transfer_relfile(&maps[mapnum], "_vm");
}
}
}
@@ -209,8 +170,7 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
* Copy or link file from old cluster to new one.
*/
static void
-transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
- const char *type_suffix)
+transfer_relfile(FileNameMap *map, const char *type_suffix)
{
const char *msg;
char old_file[MAXPGPATH];
@@ -268,15 +228,11 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
/* Copying files might take some time, so give feedback. */
pg_log(PG_STATUS, "%s", old_file);
- if ((user_opts.transfer_mode == TRANSFER_MODE_LINK) && (pageConverter != NULL))
- pg_fatal("This upgrade requires page-by-page conversion, "
- "you must use copy mode instead of link mode.\n");
-
if (user_opts.transfer_mode == TRANSFER_MODE_COPY)
{
pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n", old_file, new_file);
- if ((msg = copyAndUpdateFile(pageConverter, old_file, new_file, true)) != NULL)
+ if ((msg = copyAndUpdateFile(old_file, new_file, true)) != NULL)
pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
map->nspname, map->relname, old_file, new_file, msg);
}
@@ -284,7 +240,7 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
{
pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n", old_file, new_file);
- if ((msg = linkAndUpdateFile(pageConverter, old_file, new_file)) != NULL)
+ if ((msg = linkAndUpdateFile(old_file, new_file)) != NULL)
pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
map->nspname, map->relname, old_file, new_file, msg);
}
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 115d506..9adee01 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -9,7 +9,11 @@
#include "postgres_fe.h"
+#include "access/visibilitymap.h"
#include "pg_upgrade.h"
+#include "storage/bufpage.h"
+#include "storage/checksum.h"
+#include "storage/checksum_impl.h"
#include <fcntl.h>
@@ -21,6 +25,25 @@ static int copy_file(const char *fromfile, const char *tofile, bool force);
static int win32_pghardlink(const char *src, const char *dst);
#endif
+/* table for fast rewriting vm file in order to add all-frozen information */
+static const uint16 rewrite_vm_table[256] = {
+ 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85,
+ 256, 257, 260, 261, 272, 273, 276, 277, 320, 321, 324, 325, 336, 337, 340, 341,
+ 1024, 1025, 1028, 1029, 1040, 1041, 1044, 1045, 1088, 1089, 1092, 1093, 1104, 1105, 1108, 1109,
+ 1280, 1281, 1284, 1285, 1296, 1297, 1300, 1301, 1344, 1345, 1348, 1349, 1360, 1361, 1364, 1365,
+ 4096, 4097, 4100, 4101, 4112, 4113, 4116, 4117, 4160, 4161, 4164, 4165, 4176, 4177, 4180, 4181,
+ 4352, 4353, 4356, 4357, 4368, 4369, 4372, 4373, 4416, 4417, 4420, 4421, 4432, 4433, 4436, 4437,
+ 5120, 5121, 5124, 5125, 5136, 5137, 5140, 5141, 5184, 5185, 5188, 5189, 5200, 5201, 5204, 5205,
+ 5376, 5377, 5380, 5381, 5392, 5393, 5396, 5397, 5440, 5441, 5444, 5445, 5456, 5457, 5460, 5461,
+ 16384, 16385, 16388, 16389, 16400, 16401, 16404, 16405, 16448, 16449, 16452, 16453, 16464, 16465, 16468, 16469,
+ 16640, 16641, 16644, 16645, 16656, 16657, 16660, 16661, 16704, 16705, 16708, 16709, 16720, 16721, 16724, 16725,
+ 17408, 17409, 17412, 17413, 17424, 17425, 17428, 17429, 17472, 17473, 17476, 17477, 17488, 17489, 17492, 17493,
+ 17664, 17665, 17668, 17669, 17680, 17681, 17684, 17685, 17728, 17729, 17732, 17733, 17744, 17745, 17748, 17749,
+ 20480, 20481, 20484, 20485, 20496, 20497, 20500, 20501, 20544, 20545, 20548, 20549, 20560, 20561, 20564, 20565,
+ 20736, 20737, 20740, 20741, 20752, 20753, 20756, 20757, 20800, 20801, 20804, 20805, 20816, 20817, 20820, 20821,
+ 21504, 21505, 21508, 21509, 21520, 21521, 21524, 21525, 21568, 21569, 21572, 21573, 21584, 21585, 21588, 21589,
+ 21760, 21761, 21764, 21765, 21776, 21777, 21780, 21781, 21824, 21825, 21828, 21829, 21840, 21841, 21844, 21845
+};
/*
* copyAndUpdateFile()
@@ -138,6 +161,95 @@ copy_file(const char *srcfile, const char *dstfile, bool force)
#endif
+/*
+ * rewriteVisibilityMap()
+ *
+ * Copies a visibility map file while adding all-frozen bit(0) into each bit.
+ */
+const char *
+rewriteVisibilityMap(const char *fromfile, const char *tofile, bool force)
+{
+ int src_fd = 0;
+ int dst_fd = 0;
+ char buffer[BLCKSZ];
+ ssize_t bytesRead;
+ int rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
+ BlockNumber blkno = 0;
+
+ /* Reset errno */
+ errno = 0;
+
+ if ((fromfile == NULL) || (tofile == NULL))
+ return getErrorText();
+
+ if ((src_fd = open(fromfile, O_RDONLY, 0)) < 0)
+ goto err;
+
+ if ((dst_fd = open(tofile, O_RDWR | O_CREAT | (force ? 0 : O_EXCL), S_IRUSR | S_IWUSR)) < 0)
+ goto err;
+
+ /* Perform data rewriting per page */
+ while ((bytesRead = read(src_fd, buffer, BLCKSZ)) == BLCKSZ)
+ {
+ char *cur, *end, *blkend;
+ PageHeaderData pageheader;
+ uint16 vm_bits;
+
+ /* Save the page header data */
+ memcpy(&pageheader, buffer, SizeOfPageHeaderData);
+
+ cur = buffer + SizeOfPageHeaderData;
+ end = buffer + SizeOfPageHeaderData + rewriteVmBytesPerPage;
+ blkend = buffer + bytesRead;
+
+ while (blkend >= end)
+ {
+ char vmbuf[BLCKSZ];
+ char *vmtmp = vmbuf;
+
+ /* Copy page header in advance */
+ memcpy(vmbuf, &pageheader, SizeOfPageHeaderData);
+
+ vmtmp += SizeOfPageHeaderData;
+
+ /* Rewrite visibility map bit one by one */
+ while (end > cur)
+ {
+ /* Write rewritten bit from table and its string representation */
+ vm_bits = rewrite_vm_table[(uint8) *cur];
+ memcpy(vmtmp, &vm_bits, BITS_PER_HEAPBLOCK);
+
+ cur++;
+ vmtmp += BITS_PER_HEAPBLOCK;
+ }
+
+ /* Set new checksum for a visibility map page, If enabled */
+ if (old_cluster.controldata.data_checksum_version != 0 &&
+ new_cluster.controldata.data_checksum_version != 0)
+ ((PageHeader) vmbuf)->pd_checksum = pg_checksum_page(vmbuf, blkno);
+
+ if (write(dst_fd, vmbuf, BLCKSZ) != BLCKSZ)
+ {
+ if (errno == 0)
+ errno = ENOSPC;
+ goto err;
+ }
+
+ end += rewriteVmBytesPerPage;
+ blkno++;
+ }
+ }
+
+err:
+ if (src_fd != 0)
+ close(src_fd);
+
+ if (dst_fd != 0)
+ close(dst_fd);
+
+ return (errno == 0) ? NULL : getErrorText();
+}
+
void
check_hard_link(void)
{
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 900b2a7..ecd9ab3 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -110,6 +110,10 @@ extern char *output_files[];
#define VISIBILITY_MAP_CRASHSAFE_CAT_VER 201107031
/*
+ * The format of visibility map is changed with this 9.6 commit,
+ */
+#define VISIBILITY_MAP_FROZEN_BIT_CAT_VER 201602181
+/*
* pg_multixact format changed in 9.3 commit 0ac5ad5134f2769ccbaefec73844f85,
* ("Improve concurrency of foreign key locking") which also updated catalog
* version to this value. pg_upgrade behavior depends on whether old and new
@@ -269,6 +273,7 @@ typedef struct
uint32 major_version; /* PG_VERSION of cluster */
char major_version_str[64]; /* string PG_VERSION of cluster */
uint32 bin_version; /* version returned from pg_ctl */
+ Oid pg_database_oid; /* OID of pg_database relation */
const char *tablespace_suffix; /* directory specification */
} ClusterInfo;
@@ -365,6 +370,8 @@ bool pid_lock_file_exists(const char *datadir);
const char *copyAndUpdateFile(const char *src, const char *dst, bool force);
const char *linkAndUpdateFile(const char *src, const char *dst);
+const char *rewriteVisibilityMap(const char *fromfile, const char *tofile,
+ bool force);
void check_hard_link(void);
FILE *fopen_priv(const char *path, const char *mode);
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index fcaad79..ee88c15 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -16,7 +16,7 @@
static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
-static void transfer_relfile(FileNameMap *map, const char *suffix);
+static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_need_rewrite);
/*
@@ -132,6 +132,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
{
int mapnum;
bool vm_crashsafe_match = true;
+ bool vm_need_rewrite = false;
/*
* Do the old and new cluster disagree on the crash-safetiness of the vm
@@ -141,13 +142,20 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
new_cluster.controldata.cat_ver >= VISIBILITY_MAP_CRASHSAFE_CAT_VER)
vm_crashsafe_match = false;
+ /*
+ * Do we need to rewrite visibilitymap?
+ */
+ if (old_cluster.controldata.cat_ver < VISIBILITY_MAP_FROZEN_BIT_CAT_VER &&
+ new_cluster.controldata.cat_ver >= VISIBILITY_MAP_FROZEN_BIT_CAT_VER)
+ vm_need_rewrite = true;
+
for (mapnum = 0; mapnum < size; mapnum++)
{
if (old_tablespace == NULL ||
strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
{
/* transfer primary file */
- transfer_relfile(&maps[mapnum], "");
+ transfer_relfile(&maps[mapnum], "", vm_need_rewrite);
/* fsm/vm files added in PG 8.4 */
if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
@@ -155,9 +163,9 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
/*
* Copy/link any fsm and vm files, if they exist
*/
- transfer_relfile(&maps[mapnum], "_fsm");
+ transfer_relfile(&maps[mapnum], "_fsm", vm_need_rewrite);
if (vm_crashsafe_match)
- transfer_relfile(&maps[mapnum], "_vm");
+ transfer_relfile(&maps[mapnum], "_vm", vm_need_rewrite);
}
}
}
@@ -168,9 +176,11 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
* transfer_relfile()
*
* Copy or link file from old cluster to new one.
+ * if vm_need_rewrite is true, visibility map is rewritten to be added frozen bit
+ * even link mode.
*/
static void
-transfer_relfile(FileNameMap *map, const char *type_suffix)
+transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_need_rewrite)
{
const char *msg;
char old_file[MAXPGPATH];
@@ -232,7 +242,13 @@ transfer_relfile(FileNameMap *map, const char *type_suffix)
{
pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n", old_file, new_file);
- if ((msg = copyAndUpdateFile(old_file, new_file, true)) != NULL)
+ /* Rewrite visibility map */
+ if (vm_need_rewrite && (strcmp(type_suffix, "_vm") == 0))
+ msg = rewriteVisibilityMap(old_file, new_file, true);
+ else
+ msg = copyAndUpdateFile(old_file, new_file, true);
+
+ if (msg)
pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
map->nspname, map->relname, old_file, new_file, msg);
}
@@ -240,7 +256,13 @@ transfer_relfile(FileNameMap *map, const char *type_suffix)
{
pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n", old_file, new_file);
- if ((msg = linkAndUpdateFile(old_file, new_file)) != NULL)
+ /* Rewrite visibility map even link mode */
+ if (vm_need_rewrite && (strcmp(type_suffix, "_vm") == 0))
+ msg = rewriteVisibilityMap(old_file, new_file, true);
+ else
+ msg = linkAndUpdateFile(old_file, new_file);
+
+ if (msg)
pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
map->nspname, map->relname, old_file, new_file, msg);
}
diff --git a/src/bin/pg_upgrade/test.sh b/src/bin/pg_upgrade/test.sh
index ba79fb3..cd9b17e 100644
--- a/src/bin/pg_upgrade/test.sh
+++ b/src/bin/pg_upgrade/test.sh
@@ -174,6 +174,11 @@ if "$MAKE" -C "$oldsrc" installcheck; then
mv "$temp_root"/dump1.sql "$temp_root"/dump1.sql.orig
sed "s;$oldsrc;$newsrc;g" "$temp_root"/dump1.sql.orig >"$temp_root"/dump1.sql
fi
+
+ vm_sql="SELECT c.relname, c.relallvisible FROM pg_class as c, pg_namespace as n WHERE c.relnamespace = n.oid AND n.nspname NOT IN ('information_schema', 'pg_toast', 'pg_catalog') ORDER BY c.relname;"
+ # Test for rewriting visibility map
+ vacuumdb -d regression || visibilitymap_vacuum1_status=$?
+ psql -d regression -c "$vm_sql" > "$temp_root"/vm_test1.txt || visibilitymap_test1_status=$?
else
make_installcheck_status=$?
fi
@@ -188,6 +193,14 @@ if [ -n "$pg_dumpall1_status" ]; then
echo "pg_dumpall of pre-upgrade database cluster failed"
exit 1
fi
+if [ -n "$visibilitymap_vacuum1_status" ];then
+ echo "VACUUM of pre-upgrade database cluster for visibility map test failed"
+ exit 1
+fi
+if [ -n "$visibilitymap_test1_status" ];then
+ echo "SELECT of pre-upgrade database cluster for visibility map test failed"
+ exit 1
+fi
PGDATA=$BASE_PGDATA
@@ -203,6 +216,8 @@ case $testhost in
esac
pg_dumpall -f "$temp_root"/dump2.sql || pg_dumpall2_status=$?
+vacuumdb -d regression || visibilitymap_vacuum2_status=$?
+psql -d regression -c "$vm_sql" > "$temp_root"/vm_test2.txt || visibilitymap_test2_status=$?
pg_ctl -m fast stop
# no need to echo commands anymore
@@ -214,11 +229,26 @@ if [ -n "$pg_dumpall2_status" ]; then
exit 1
fi
+if [ -n "$visibilitymap_vacuum2_status" ];then
+ echo "VACUUM of post-upgrade database cluster for visibility map test failed"
+ exit 1
+fi
+
+if [ -n "$visibilitymap_test2_status" ];then
+ echo "SELECT of post-upgrade database cluster for visibility map test failed"
+ exit 1
+fi
+
case $testhost in
MINGW*) cmd /c delete_old_cluster.bat ;;
*) sh ./delete_old_cluster.sh ;;
esac
+if ! diff "$temp_root"/vm_test1.txt "$temp_root"/vm_test2.txt >/dev/null; then
+ echo "Visibility map rewriting test failed"
+ exit 1
+fi
+
if diff "$temp_root"/dump1.sql "$temp_root"/dump2.sql >/dev/null; then
echo PASSED
exit 0
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers