On Tue, Mar 8, 2016 at 1:20 PM, Kyotaro HORIGUCHI <horiguchi.kyot...@lab.ntt.co.jp> wrote: > Hello, thank you for updating this tool. > > At Mon, 7 Mar 2016 14:03:08 -0500, Robert Haas <robertmh...@gmail.com> wrote > in <CA+Tgmob+NjfYE3b3BHBmAC=3tvtbqszgz1roj63yramrgrq...@mail.gmail.com> >> On Mon, Mar 7, 2016 at 12:41 PM, Masahiko Sawada <sawada.m...@gmail.com> >> wrote: >> > Attached latest version optimisation patch. >> > I'm still consider regarding pg_upgrade regression test code, so I >> > will submit that patch later. >> > I was thinking more about this today and I think that we don't > actually need the PD_ALL_FROZEN page-level bit for anything. It's > enough that the bit is present in the visibility map. The only point > of PD_ALL_VISIBLE is that it tells us that we need to clear the > visibility map bit, but that bit is enough to tell us to clear both > visibility map bits. So I propose the attached cleanup patch.
Thank you for updating tool and proposing it. I agree with you, and the patch you attached looks good to me except for Horiguchi-san's comment. Regarding pg_visibility module, I'd like to share some bugs and propose to add a relation type condition to each functions. Including it, I've attached remaining 2 patches; one is removing page conversion code from pg_upgarde, and another is supporting pg_upgrade for frozen bit. Please have a look at them. Regards, -- Masahiko Sawada
diff --git a/contrib/pg_visibility/pg_visibility--1.0.sql b/contrib/pg_visibility/pg_visibility--1.0.sql index 9616e1f..da511e5 100644 --- a/contrib/pg_visibility/pg_visibility--1.0.sql +++ b/contrib/pg_visibility/pg_visibility--1.0.sql @@ -12,7 +12,7 @@ AS 'MODULE_PATHNAME', 'pg_visibility_map' LANGUAGE C STRICT; -- Show visibility map and page-level visibility information. -CREATE FUNCTION pg_visibility(regclass, blkno, bigint, +CREATE FUNCTION pg_visibility(regclass, blkno bigint, all_visible OUT boolean, all_frozen OUT boolean, pd_all_visible OUT boolean) diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index d4336ce..2993bcb 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -14,6 +14,7 @@ #include "funcapi.h" #include "miscadmin.h" #include "storage/bufmgr.h" +#include "utils/rel.h" PG_MODULE_MAGIC; @@ -55,6 +56,14 @@ pg_visibility_map(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid block number"))); + /* Check for relation type */ + if (!(rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_MATVIEW)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table or materialized view", + RelationGetRelationName(rel)))); + tupdesc = pg_visibility_tupdesc(false, false); MemSet(nulls, 0, sizeof(nulls)); @@ -94,6 +103,14 @@ pg_visibility(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid block number"))); + /* Check for relation type */ + if (!(rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_MATVIEW)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table or materialized view", + RelationGetRelationName(rel)))); + tupdesc = pg_visibility_tupdesc(false, true); MemSet(nulls, 0, sizeof(nulls)); @@ -147,9 +164,10 @@ pg_visibility_map_rel(PG_FUNCTION_ARGS) HeapTuple tuple; MemSet(nulls, 0, sizeof(nulls)); - values[0] = Int64GetDatum(info->next++); + values[0] = Int64GetDatum(info->next); values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0); values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0); + info->next++; tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); @@ -190,10 +208,11 @@ pg_visibility_rel(PG_FUNCTION_ARGS) HeapTuple tuple; MemSet(nulls, 0, sizeof(nulls)); - values[0] = Int64GetDatum(info->next++); + values[0] = Int64GetDatum(info->next); values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0); values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0); values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0); + info->next++; tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); @@ -223,6 +242,14 @@ pg_visibility_map_summary(PG_FUNCTION_ARGS) rel = relation_open(relid, AccessShareLock); nblocks = RelationGetNumberOfBlocks(rel); + /* Check for relation type */ + if (!(rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_MATVIEW)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table or materialized view", + RelationGetRelationName(rel)))); + for (blkno = 0; blkno < nblocks; ++blkno) { int32 mapbits; @@ -296,6 +323,15 @@ collect_visibility_data(Oid relid, bool include_pd) BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); rel = relation_open(relid, AccessShareLock); + + /* Check for relation type */ + if (!(rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_MATVIEW)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table or materialized view", + RelationGetRelationName(rel)))); + nblocks = RelationGetNumberOfBlocks(rel); info = palloc0(offsetof(vbits, bits) + nblocks); info->next = 0;
commit 8ab96722459fc929d5b2d447ffda18fe1107abc0 Author: Masahiko Sawada <sawada.mshk@gmail.com> Date: Wed Mar 2 11:09:41 2016 -0400 Initial. diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile index d9c8145..0c882d9 100644 --- a/src/bin/pg_upgrade/Makefile +++ b/src/bin/pg_upgrade/Makefile @@ -8,7 +8,7 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global OBJS = check.o controldata.o dump.o exec.o file.o function.o info.o \ - option.o page.o parallel.o pg_upgrade.o relfilenode.o server.o \ + option.o parallel.o pg_upgrade.o relfilenode.o server.o \ tablespace.o util.o version.o $(WIN32RES) override CPPFLAGS := -DDLSUFFIX=\"$(DLSUFFIX)\" -I$(srcdir) -I$(libpq_srcdir) $(CPPFLAGS) diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index e0cb675..f932094 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -80,8 +80,6 @@ check_and_dump_old_cluster(bool live_check) if (!live_check) start_postmaster(&old_cluster, true); - get_pg_database_relfilenode(&old_cluster); - /* Extract a list of databases and tables from the old cluster */ get_db_and_rel_infos(&old_cluster); diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c index 9357ad8..115d506 100644 --- a/src/bin/pg_upgrade/file.c +++ b/src/bin/pg_upgrade/file.c @@ -25,15 +25,11 @@ static int win32_pghardlink(const char *src, const char *dst); /* * copyAndUpdateFile() * - * Copies a relation file from src to dst. If pageConverter is non-NULL, this function - * uses that pageConverter to do a page-by-page conversion. + * Copies a relation file from src to dst. */ const char * -copyAndUpdateFile(pageCnvCtx *pageConverter, - const char *src, const char *dst, bool force) +copyAndUpdateFile(const char *src, const char *dst, bool force) { - if (pageConverter == NULL) - { #ifndef WIN32 if (copy_file(src, dst, force) == -1) #else @@ -42,65 +38,6 @@ copyAndUpdateFile(pageCnvCtx *pageConverter, return getErrorText(); else return NULL; - } - else - { - /* - * We have a pageConverter object - that implies that the - * PageLayoutVersion differs between the two clusters so we have to - * perform a page-by-page conversion. - * - * If the pageConverter can convert the entire file at once, invoke - * that plugin function, otherwise, read each page in the relation - * file and call the convertPage plugin function. - */ - -#ifdef PAGE_CONVERSION - if (pageConverter->convertFile) - return pageConverter->convertFile(pageConverter->pluginData, - dst, src); - else -#endif - { - int src_fd; - int dstfd; - char buf[BLCKSZ]; - ssize_t bytesRead; - const char *msg = NULL; - - if ((src_fd = open(src, O_RDONLY, 0)) < 0) - return "could not open source file"; - - if ((dstfd = open(dst, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0) - { - close(src_fd); - return "could not create destination file"; - } - - while ((bytesRead = read(src_fd, buf, BLCKSZ)) == BLCKSZ) - { -#ifdef PAGE_CONVERSION - if ((msg = pageConverter->convertPage(pageConverter->pluginData, buf, buf)) != NULL) - break; -#endif - if (write(dstfd, buf, BLCKSZ) != BLCKSZ) - { - msg = "could not write new page to destination"; - break; - } - } - - close(src_fd); - close(dstfd); - - if (msg) - return msg; - else if (bytesRead != 0) - return "found partial page in source file"; - else - return NULL; - } - } } @@ -114,12 +51,8 @@ copyAndUpdateFile(pageCnvCtx *pageConverter, * instead of copying the data from the old cluster to the new cluster. */ const char * -linkAndUpdateFile(pageCnvCtx *pageConverter, - const char *src, const char *dst) +linkAndUpdateFile(const char *src, const char *dst) { - if (pageConverter != NULL) - return "Cannot in-place update this cluster, page-by-page conversion is required"; - if (pg_link_file(src, dst) == -1) return getErrorText(); else diff --git a/src/bin/pg_upgrade/page.c b/src/bin/pg_upgrade/page.c deleted file mode 100644 index e5686e5..0000000 --- a/src/bin/pg_upgrade/page.c +++ /dev/null @@ -1,164 +0,0 @@ -/* - * page.c - * - * per-page conversion operations - * - * Copyright (c) 2010-2016, PostgreSQL Global Development Group - * src/bin/pg_upgrade/page.c - */ - -#include "postgres_fe.h" - -#include "pg_upgrade.h" - -#include "storage/bufpage.h" - - -#ifdef PAGE_CONVERSION - - -static void getPageVersion( - uint16 *version, const char *pathName); -static pageCnvCtx *loadConverterPlugin( - uint16 newPageVersion, uint16 oldPageVersion); - - -/* - * setupPageConverter() - * - * This function determines the PageLayoutVersion of the old cluster and - * the PageLayoutVersion of the new cluster. If the versions differ, this - * function loads a converter plugin and returns a pointer to a pageCnvCtx - * object (in *result) that knows how to convert pages from the old format - * to the new format. If the versions are identical, this function just - * returns a NULL pageCnvCtx pointer to indicate that page-by-page conversion - * is not required. - */ -pageCnvCtx * -setupPageConverter(void) -{ - uint16 oldPageVersion; - uint16 newPageVersion; - pageCnvCtx *converter; - const char *msg; - char dstName[MAXPGPATH]; - char srcName[MAXPGPATH]; - - snprintf(dstName, sizeof(dstName), "%s/global/%u", new_cluster.pgdata, - new_cluster.pg_database_oid); - snprintf(srcName, sizeof(srcName), "%s/global/%u", old_cluster.pgdata, - old_cluster.pg_database_oid); - - getPageVersion(&oldPageVersion, srcName); - getPageVersion(&newPageVersion, dstName); - - /* - * If the old cluster and new cluster use the same page layouts, then we - * don't need a page converter. - */ - if (newPageVersion != oldPageVersion) - { - /* - * The clusters use differing page layouts, see if we can find a - * plugin that knows how to convert from the old page layout to the - * new page layout. - */ - - if ((converter = loadConverterPlugin(newPageVersion, oldPageVersion)) == NULL) - pg_fatal("could not find plugin to convert from old page layout to new page layout\n"); - - return converter; - } - else - return NULL; -} - - -/* - * getPageVersion() - * - * Retrieves the PageLayoutVersion for the given relation. - * - * Returns NULL on success (and stores the PageLayoutVersion at *version), - * if an error occurs, this function returns an error message (in the form - * of a null-terminated string). - */ -static void -getPageVersion(uint16 *version, const char *pathName) -{ - int relfd; - PageHeaderData page; - ssize_t bytesRead; - - if ((relfd = open(pathName, O_RDONLY, 0)) < 0) - pg_fatal("could not open relation %s\n", pathName); - - if ((bytesRead = read(relfd, &page, sizeof(page))) != sizeof(page)) - pg_fatal("could not read page header of %s\n", pathName); - - *version = PageGetPageLayoutVersion(&page); - - close(relfd); - - return; -} - - -/* - * loadConverterPlugin() - * - * This function loads a page-converter plugin library and grabs a - * pointer to each of the (interesting) functions provided by that - * plugin. The name of the plugin library is derived from the given - * newPageVersion and oldPageVersion. If a plugin is found, this - * function returns a pointer to a pageCnvCtx object (which will contain - * a collection of plugin function pointers). If the required plugin - * is not found, this function returns NULL. - */ -static pageCnvCtx * -loadConverterPlugin(uint16 newPageVersion, uint16 oldPageVersion) -{ - char pluginName[MAXPGPATH]; - void *plugin; - - /* - * Try to find a plugin that can convert pages of oldPageVersion into - * pages of newPageVersion. For example, if we oldPageVersion = 3 and - * newPageVersion is 4, we search for a plugin named: - * plugins/convertLayout_3_to_4.dll - */ - - /* - * FIXME: we are searching for plugins relative to the current directory, - * we should really search relative to our own executable instead. - */ - snprintf(pluginName, sizeof(pluginName), "./plugins/convertLayout_%d_to_%d%s", - oldPageVersion, newPageVersion, DLSUFFIX); - - if ((plugin = pg_dlopen(pluginName)) == NULL) - return NULL; - else - { - pageCnvCtx *result = (pageCnvCtx *) pg_malloc(sizeof(*result)); - - result->old.PageVersion = oldPageVersion; - result->new.PageVersion = newPageVersion; - - result->startup = (pluginStartup) pg_dlsym(plugin, "init"); - result->convertFile = (pluginConvertFile) pg_dlsym(plugin, "convertFile"); - result->convertPage = (pluginConvertPage) pg_dlsym(plugin, "convertPage"); - result->shutdown = (pluginShutdown) pg_dlsym(plugin, "fini"); - result->pluginData = NULL; - - /* - * If the plugin has exported an initializer, go ahead and invoke it. - */ - if (result->startup) - result->startup(MIGRATOR_API_VERSION, &result->pluginVersion, - newPageVersion, oldPageVersion, &result->pluginData); - - return result; - } -} - -#endif diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 984c395..4f5361a 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -260,8 +260,6 @@ prepare_new_cluster(void) new_cluster.bindir, cluster_conn_opts(&new_cluster), log_opts.verbose ? "--verbose" : ""); check_ok(); - - get_pg_database_relfilenode(&new_cluster); } diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index bc733c4..900b2a7 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -269,7 +269,6 @@ typedef struct uint32 major_version; /* PG_VERSION of cluster */ char major_version_str[64]; /* string PG_VERSION of cluster */ uint32 bin_version; /* version returned from pg_ctl */ - Oid pg_database_oid; /* OID of pg_database relation */ const char *tablespace_suffix; /* directory specification */ } ClusterInfo; @@ -364,40 +363,8 @@ bool pid_lock_file_exists(const char *datadir); /* file.c */ -#ifdef PAGE_CONVERSION -typedef const char *(*pluginStartup) (uint16 migratorVersion, - uint16 *pluginVersion, uint16 newPageVersion, - uint16 oldPageVersion, void **pluginData); -typedef const char *(*pluginConvertFile) (void *pluginData, - const char *dstName, const char *srcName); -typedef const char *(*pluginConvertPage) (void *pluginData, - const char *dstPage, const char *srcPage); -typedef const char *(*pluginShutdown) (void *pluginData); - -typedef struct -{ - uint16 oldPageVersion; /* Page layout version of the old cluster */ - uint16 newPageVersion; /* Page layout version of the new cluster */ - uint16 pluginVersion; /* API version of converter plugin */ - void *pluginData; /* Plugin data (set by plugin) */ - pluginStartup startup; /* Pointer to plugin's startup function */ - pluginConvertFile convertFile; /* Pointer to plugin's file converter - * function */ - pluginConvertPage convertPage; /* Pointer to plugin's page converter - * function */ - pluginShutdown shutdown; /* Pointer to plugin's shutdown function */ -} pageCnvCtx; - -const pageCnvCtx *setupPageConverter(void); -#else -/* dummy */ -typedef void *pageCnvCtx; -#endif - -const char *copyAndUpdateFile(pageCnvCtx *pageConverter, const char *src, - const char *dst, bool force); -const char *linkAndUpdateFile(pageCnvCtx *pageConverter, const char *src, - const char *dst); +const char *copyAndUpdateFile(const char *src, const char *dst, bool force); +const char *linkAndUpdateFile(const char *src, const char *dst); void check_hard_link(void); FILE *fopen_priv(const char *path, const char *mode); diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c index c059c5b..fcaad79 100644 --- a/src/bin/pg_upgrade/relfilenode.c +++ b/src/bin/pg_upgrade/relfilenode.c @@ -15,10 +15,8 @@ #include "access/transam.h" -static void transfer_single_new_db(pageCnvCtx *pageConverter, - FileNameMap *maps, int size, char *old_tablespace); -static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map, - const char *suffix); +static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace); +static void transfer_relfile(FileNameMap *map, const char *suffix); /* @@ -92,7 +90,6 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, *new_db = NULL; FileNameMap *mappings; int n_maps; - pageCnvCtx *pageConverter = NULL; /* * Advance past any databases that exist in the new cluster but not in @@ -116,11 +113,7 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, { print_maps(mappings, n_maps, new_db->db_name); -#ifdef PAGE_CONVERSION - pageConverter = setupPageConverter(); -#endif - transfer_single_new_db(pageConverter, mappings, n_maps, - old_tablespace); + transfer_single_new_db(mappings, n_maps, old_tablespace); } /* We allocate something even for n_maps == 0 */ pg_free(mappings); @@ -129,45 +122,13 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, return; } - -/* - * get_pg_database_relfilenode() - * - * Retrieves the relfilenode for a few system-catalog tables. We need these - * relfilenodes later in the upgrade process. - */ -void -get_pg_database_relfilenode(ClusterInfo *cluster) -{ - PGconn *conn = connectToServer(cluster, "template1"); - PGresult *res; - int i_relfile; - - res = executeQueryOrDie(conn, - "SELECT c.relname, c.relfilenode " - "FROM pg_catalog.pg_class c, " - " pg_catalog.pg_namespace n " - "WHERE c.relnamespace = n.oid AND " - " n.nspname = 'pg_catalog' AND " - " c.relname = 'pg_database' " - "ORDER BY c.relname"); - - i_relfile = PQfnumber(res, "relfilenode"); - cluster->pg_database_oid = atooid(PQgetvalue(res, 0, i_relfile)); - - PQclear(res); - PQfinish(conn); -} - - /* * transfer_single_new_db() * * create links for mappings stored in "maps" array. */ static void -transfer_single_new_db(pageCnvCtx *pageConverter, - FileNameMap *maps, int size, char *old_tablespace) +transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) { int mapnum; bool vm_crashsafe_match = true; @@ -186,7 +147,7 @@ transfer_single_new_db(pageCnvCtx *pageConverter, strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0) { /* transfer primary file */ - transfer_relfile(pageConverter, &maps[mapnum], ""); + transfer_relfile(&maps[mapnum], ""); /* fsm/vm files added in PG 8.4 */ if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804) @@ -194,9 +155,9 @@ transfer_single_new_db(pageCnvCtx *pageConverter, /* * Copy/link any fsm and vm files, if they exist */ - transfer_relfile(pageConverter, &maps[mapnum], "_fsm"); + transfer_relfile(&maps[mapnum], "_fsm"); if (vm_crashsafe_match) - transfer_relfile(pageConverter, &maps[mapnum], "_vm"); + transfer_relfile(&maps[mapnum], "_vm"); } } } @@ -209,8 +170,7 @@ transfer_single_new_db(pageCnvCtx *pageConverter, * Copy or link file from old cluster to new one. */ static void -transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map, - const char *type_suffix) +transfer_relfile(FileNameMap *map, const char *type_suffix) { const char *msg; char old_file[MAXPGPATH]; @@ -268,15 +228,11 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map, /* Copying files might take some time, so give feedback. */ pg_log(PG_STATUS, "%s", old_file); - if ((user_opts.transfer_mode == TRANSFER_MODE_LINK) && (pageConverter != NULL)) - pg_fatal("This upgrade requires page-by-page conversion, " - "you must use copy mode instead of link mode.\n"); - if (user_opts.transfer_mode == TRANSFER_MODE_COPY) { pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n", old_file, new_file); - if ((msg = copyAndUpdateFile(pageConverter, old_file, new_file, true)) != NULL) + if ((msg = copyAndUpdateFile(old_file, new_file, true)) != NULL) pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n", map->nspname, map->relname, old_file, new_file, msg); } @@ -284,7 +240,7 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map, { pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n", old_file, new_file); - if ((msg = linkAndUpdateFile(pageConverter, old_file, new_file)) != NULL) + if ((msg = linkAndUpdateFile(old_file, new_file)) != NULL) pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n", map->nspname, map->relname, old_file, new_file, msg); }
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c index 115d506..9adee01 100644 --- a/src/bin/pg_upgrade/file.c +++ b/src/bin/pg_upgrade/file.c @@ -9,7 +9,11 @@ #include "postgres_fe.h" +#include "access/visibilitymap.h" #include "pg_upgrade.h" +#include "storage/bufpage.h" +#include "storage/checksum.h" +#include "storage/checksum_impl.h" #include <fcntl.h> @@ -21,6 +25,25 @@ static int copy_file(const char *fromfile, const char *tofile, bool force); static int win32_pghardlink(const char *src, const char *dst); #endif +/* table for fast rewriting vm file in order to add all-frozen information */ +static const uint16 rewrite_vm_table[256] = { + 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85, + 256, 257, 260, 261, 272, 273, 276, 277, 320, 321, 324, 325, 336, 337, 340, 341, + 1024, 1025, 1028, 1029, 1040, 1041, 1044, 1045, 1088, 1089, 1092, 1093, 1104, 1105, 1108, 1109, + 1280, 1281, 1284, 1285, 1296, 1297, 1300, 1301, 1344, 1345, 1348, 1349, 1360, 1361, 1364, 1365, + 4096, 4097, 4100, 4101, 4112, 4113, 4116, 4117, 4160, 4161, 4164, 4165, 4176, 4177, 4180, 4181, + 4352, 4353, 4356, 4357, 4368, 4369, 4372, 4373, 4416, 4417, 4420, 4421, 4432, 4433, 4436, 4437, + 5120, 5121, 5124, 5125, 5136, 5137, 5140, 5141, 5184, 5185, 5188, 5189, 5200, 5201, 5204, 5205, + 5376, 5377, 5380, 5381, 5392, 5393, 5396, 5397, 5440, 5441, 5444, 5445, 5456, 5457, 5460, 5461, + 16384, 16385, 16388, 16389, 16400, 16401, 16404, 16405, 16448, 16449, 16452, 16453, 16464, 16465, 16468, 16469, + 16640, 16641, 16644, 16645, 16656, 16657, 16660, 16661, 16704, 16705, 16708, 16709, 16720, 16721, 16724, 16725, + 17408, 17409, 17412, 17413, 17424, 17425, 17428, 17429, 17472, 17473, 17476, 17477, 17488, 17489, 17492, 17493, + 17664, 17665, 17668, 17669, 17680, 17681, 17684, 17685, 17728, 17729, 17732, 17733, 17744, 17745, 17748, 17749, + 20480, 20481, 20484, 20485, 20496, 20497, 20500, 20501, 20544, 20545, 20548, 20549, 20560, 20561, 20564, 20565, + 20736, 20737, 20740, 20741, 20752, 20753, 20756, 20757, 20800, 20801, 20804, 20805, 20816, 20817, 20820, 20821, + 21504, 21505, 21508, 21509, 21520, 21521, 21524, 21525, 21568, 21569, 21572, 21573, 21584, 21585, 21588, 21589, + 21760, 21761, 21764, 21765, 21776, 21777, 21780, 21781, 21824, 21825, 21828, 21829, 21840, 21841, 21844, 21845 +}; /* * copyAndUpdateFile() @@ -138,6 +161,95 @@ copy_file(const char *srcfile, const char *dstfile, bool force) #endif +/* + * rewriteVisibilityMap() + * + * Copies a visibility map file while adding all-frozen bit(0) into each bit. + */ +const char * +rewriteVisibilityMap(const char *fromfile, const char *tofile, bool force) +{ + int src_fd = 0; + int dst_fd = 0; + char buffer[BLCKSZ]; + ssize_t bytesRead; + int rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2; + BlockNumber blkno = 0; + + /* Reset errno */ + errno = 0; + + if ((fromfile == NULL) || (tofile == NULL)) + return getErrorText(); + + if ((src_fd = open(fromfile, O_RDONLY, 0)) < 0) + goto err; + + if ((dst_fd = open(tofile, O_RDWR | O_CREAT | (force ? 0 : O_EXCL), S_IRUSR | S_IWUSR)) < 0) + goto err; + + /* Perform data rewriting per page */ + while ((bytesRead = read(src_fd, buffer, BLCKSZ)) == BLCKSZ) + { + char *cur, *end, *blkend; + PageHeaderData pageheader; + uint16 vm_bits; + + /* Save the page header data */ + memcpy(&pageheader, buffer, SizeOfPageHeaderData); + + cur = buffer + SizeOfPageHeaderData; + end = buffer + SizeOfPageHeaderData + rewriteVmBytesPerPage; + blkend = buffer + bytesRead; + + while (blkend >= end) + { + char vmbuf[BLCKSZ]; + char *vmtmp = vmbuf; + + /* Copy page header in advance */ + memcpy(vmbuf, &pageheader, SizeOfPageHeaderData); + + vmtmp += SizeOfPageHeaderData; + + /* Rewrite visibility map bit one by one */ + while (end > cur) + { + /* Write rewritten bit from table and its string representation */ + vm_bits = rewrite_vm_table[(uint8) *cur]; + memcpy(vmtmp, &vm_bits, BITS_PER_HEAPBLOCK); + + cur++; + vmtmp += BITS_PER_HEAPBLOCK; + } + + /* Set new checksum for a visibility map page, If enabled */ + if (old_cluster.controldata.data_checksum_version != 0 && + new_cluster.controldata.data_checksum_version != 0) + ((PageHeader) vmbuf)->pd_checksum = pg_checksum_page(vmbuf, blkno); + + if (write(dst_fd, vmbuf, BLCKSZ) != BLCKSZ) + { + if (errno == 0) + errno = ENOSPC; + goto err; + } + + end += rewriteVmBytesPerPage; + blkno++; + } + } + +err: + if (src_fd != 0) + close(src_fd); + + if (dst_fd != 0) + close(dst_fd); + + return (errno == 0) ? NULL : getErrorText(); +} + void check_hard_link(void) { diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 900b2a7..ecd9ab3 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -110,6 +110,10 @@ extern char *output_files[]; #define VISIBILITY_MAP_CRASHSAFE_CAT_VER 201107031 /* + * The format of visibility map is changed with this 9.6 commit, + */ +#define VISIBILITY_MAP_FROZEN_BIT_CAT_VER 201602181 +/* * pg_multixact format changed in 9.3 commit 0ac5ad5134f2769ccbaefec73844f85, * ("Improve concurrency of foreign key locking") which also updated catalog * version to this value. pg_upgrade behavior depends on whether old and new @@ -269,6 +273,7 @@ typedef struct uint32 major_version; /* PG_VERSION of cluster */ char major_version_str[64]; /* string PG_VERSION of cluster */ uint32 bin_version; /* version returned from pg_ctl */ + Oid pg_database_oid; /* OID of pg_database relation */ const char *tablespace_suffix; /* directory specification */ } ClusterInfo; @@ -365,6 +370,8 @@ bool pid_lock_file_exists(const char *datadir); const char *copyAndUpdateFile(const char *src, const char *dst, bool force); const char *linkAndUpdateFile(const char *src, const char *dst); +const char *rewriteVisibilityMap(const char *fromfile, const char *tofile, + bool force); void check_hard_link(void); FILE *fopen_priv(const char *path, const char *mode); diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c index fcaad79..ee88c15 100644 --- a/src/bin/pg_upgrade/relfilenode.c +++ b/src/bin/pg_upgrade/relfilenode.c @@ -16,7 +16,7 @@ static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace); -static void transfer_relfile(FileNameMap *map, const char *suffix); +static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_need_rewrite); /* @@ -132,6 +132,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) { int mapnum; bool vm_crashsafe_match = true; + bool vm_need_rewrite = false; /* * Do the old and new cluster disagree on the crash-safetiness of the vm @@ -141,13 +142,20 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) new_cluster.controldata.cat_ver >= VISIBILITY_MAP_CRASHSAFE_CAT_VER) vm_crashsafe_match = false; + /* + * Do we need to rewrite visibilitymap? + */ + if (old_cluster.controldata.cat_ver < VISIBILITY_MAP_FROZEN_BIT_CAT_VER && + new_cluster.controldata.cat_ver >= VISIBILITY_MAP_FROZEN_BIT_CAT_VER) + vm_need_rewrite = true; + for (mapnum = 0; mapnum < size; mapnum++) { if (old_tablespace == NULL || strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0) { /* transfer primary file */ - transfer_relfile(&maps[mapnum], ""); + transfer_relfile(&maps[mapnum], "", vm_need_rewrite); /* fsm/vm files added in PG 8.4 */ if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804) @@ -155,9 +163,9 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) /* * Copy/link any fsm and vm files, if they exist */ - transfer_relfile(&maps[mapnum], "_fsm"); + transfer_relfile(&maps[mapnum], "_fsm", vm_need_rewrite); if (vm_crashsafe_match) - transfer_relfile(&maps[mapnum], "_vm"); + transfer_relfile(&maps[mapnum], "_vm", vm_need_rewrite); } } } @@ -168,9 +176,11 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) * transfer_relfile() * * Copy or link file from old cluster to new one. + * if vm_need_rewrite is true, visibility map is rewritten to be added frozen bit + * even link mode. */ static void -transfer_relfile(FileNameMap *map, const char *type_suffix) +transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_need_rewrite) { const char *msg; char old_file[MAXPGPATH]; @@ -232,7 +242,13 @@ transfer_relfile(FileNameMap *map, const char *type_suffix) { pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n", old_file, new_file); - if ((msg = copyAndUpdateFile(old_file, new_file, true)) != NULL) + /* Rewrite visibility map */ + if (vm_need_rewrite && (strcmp(type_suffix, "_vm") == 0)) + msg = rewriteVisibilityMap(old_file, new_file, true); + else + msg = copyAndUpdateFile(old_file, new_file, true); + + if (msg) pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n", map->nspname, map->relname, old_file, new_file, msg); } @@ -240,7 +256,13 @@ transfer_relfile(FileNameMap *map, const char *type_suffix) { pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n", old_file, new_file); - if ((msg = linkAndUpdateFile(old_file, new_file)) != NULL) + /* Rewrite visibility map even link mode */ + if (vm_need_rewrite && (strcmp(type_suffix, "_vm") == 0)) + msg = rewriteVisibilityMap(old_file, new_file, true); + else + msg = linkAndUpdateFile(old_file, new_file); + + if (msg) pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n", map->nspname, map->relname, old_file, new_file, msg); } diff --git a/src/bin/pg_upgrade/test.sh b/src/bin/pg_upgrade/test.sh index ba79fb3..cd9b17e 100644 --- a/src/bin/pg_upgrade/test.sh +++ b/src/bin/pg_upgrade/test.sh @@ -174,6 +174,11 @@ if "$MAKE" -C "$oldsrc" installcheck; then mv "$temp_root"/dump1.sql "$temp_root"/dump1.sql.orig sed "s;$oldsrc;$newsrc;g" "$temp_root"/dump1.sql.orig >"$temp_root"/dump1.sql fi + + vm_sql="SELECT c.relname, c.relallvisible FROM pg_class as c, pg_namespace as n WHERE c.relnamespace = n.oid AND n.nspname NOT IN ('information_schema', 'pg_toast', 'pg_catalog') ORDER BY c.relname;" + # Test for rewriting visibility map + vacuumdb -d regression || visibilitymap_vacuum1_status=$? + psql -d regression -c "$vm_sql" > "$temp_root"/vm_test1.txt || visibilitymap_test1_status=$? else make_installcheck_status=$? fi @@ -188,6 +193,14 @@ if [ -n "$pg_dumpall1_status" ]; then echo "pg_dumpall of pre-upgrade database cluster failed" exit 1 fi +if [ -n "$visibilitymap_vacuum1_status" ];then + echo "VACUUM of pre-upgrade database cluster for visibility map test failed" + exit 1 +fi +if [ -n "$visibilitymap_test1_status" ];then + echo "SELECT of pre-upgrade database cluster for visibility map test failed" + exit 1 +fi PGDATA=$BASE_PGDATA @@ -203,6 +216,8 @@ case $testhost in esac pg_dumpall -f "$temp_root"/dump2.sql || pg_dumpall2_status=$? +vacuumdb -d regression || visibilitymap_vacuum2_status=$? +psql -d regression -c "$vm_sql" > "$temp_root"/vm_test2.txt || visibilitymap_test2_status=$? pg_ctl -m fast stop # no need to echo commands anymore @@ -214,11 +229,26 @@ if [ -n "$pg_dumpall2_status" ]; then exit 1 fi +if [ -n "$visibilitymap_vacuum2_status" ];then + echo "VACUUM of post-upgrade database cluster for visibility map test failed" + exit 1 +fi + +if [ -n "$visibilitymap_test2_status" ];then + echo "SELECT of post-upgrade database cluster for visibility map test failed" + exit 1 +fi + case $testhost in MINGW*) cmd /c delete_old_cluster.bat ;; *) sh ./delete_old_cluster.sh ;; esac +if ! diff "$temp_root"/vm_test1.txt "$temp_root"/vm_test2.txt >/dev/null; then + echo "Visibility map rewriting test failed" + exit 1 +fi + if diff "$temp_root"/dump1.sql "$temp_root"/dump2.sql >/dev/null; then echo PASSED exit 0
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers