Justin Pryzby писал 2022-06-28 21:33:
Hi,
On Thu, Feb 10, 2022 at 06:07:08PM +0300, Alexander Pyhalov wrote:
I've rebased patches and tried to fix issues I've seen. I've fixed
reference
after table_close() in the first patch (can be seen while building
with
CPPFLAGS='-DRELCACHE_FORCE_RELEASE').
Rebased patches on the current master.
They still require proper review.
--
Best regards,
Alexander Pyhalov,
Postgres Professional
From 5c11849ceb2a1feb0e44dbdf30cc27de0282a659 Mon Sep 17 00:00:00 2001
From: Alexander Pyhalov <a.pyha...@postgrespro.ru>
Date: Mon, 28 Feb 2022 10:50:58 +0300
Subject: [PATCH 5/5] Mark intermediate partitioned indexes as valid
---
src/backend/commands/indexcmds.c | 33 ++++++++++-
src/test/regress/expected/indexing.out | 80 +++++++++++++++++++++++++-
src/test/regress/sql/indexing.sql | 8 +++
3 files changed, 118 insertions(+), 3 deletions(-)
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index d09f0390413..d3ced6265b6 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -3139,6 +3139,7 @@ static void
ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
{
List *partitions = NIL;
+ List *inhpartindexes = NIL;
char relkind = get_rel_relkind(relid);
char *relname = get_rel_name(relid);
char *relnamespace = get_namespace_name(get_rel_namespace(relid));
@@ -3193,6 +3194,17 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
char partkind = get_rel_relkind(partoid);
MemoryContext old_context;
+ /* Create a list of invalid inherited partitioned indexes */
+ if (partkind == RELKIND_PARTITIONED_INDEX)
+ {
+ if (partoid == relid || get_index_isvalid(partoid))
+ continue;
+
+ old_context = MemoryContextSwitchTo(reindex_context);
+ inhpartindexes = lappend_oid(inhpartindexes, partoid);
+ MemoryContextSwitchTo(old_context);
+ }
+
/*
* This discards partitioned tables, partitioned indexes and foreign
* tables.
@@ -3237,9 +3249,28 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
Oid tableoid = IndexGetRelation(relid, false);
List *child_tables = find_all_inheritors(tableoid, ShareLock, NULL);
- /* Both lists include their parent relation as well as any intermediate partitioned rels */
+ /*
+ * Both lists include their parent relation as well as any
+ * intermediate partitioned rels
+ */
if (list_length(inhoids) == list_length(child_tables))
+ {
index_set_state_flags(relid, INDEX_CREATE_SET_VALID);
+
+ /* Mark any intermediate partitioned index as valid */
+ foreach(lc, inhpartindexes)
+ {
+ Oid partoid = lfirst_oid(lc);
+
+ Assert(get_rel_relkind(partoid) == RELKIND_PARTITIONED_INDEX);
+ Assert(!get_index_isvalid(partoid));
+
+ /* Can't mark an index valid without marking it ready */
+ index_set_state_flags(partoid, INDEX_CREATE_SET_READY);
+ CommandCounterIncrement();
+ index_set_state_flags(partoid, INDEX_CREATE_SET_VALID);
+ }
+ }
}
/*
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index a4ccae50de3..b4f1aea6fca 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -57,6 +57,8 @@ create table idxpart11 partition of idxpart1 for values from (0) to (10) partiti
create table idxpart111 partition of idxpart11 default partition by range(a);
create table idxpart1111 partition of idxpart111 default partition by range(a);
create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
insert into idxpart2 values(10),(10); -- not unique
create index concurrently on idxpart (a); -- partitioned
create index concurrently on idxpart1 (a); -- partitioned and partition
@@ -76,7 +78,7 @@ Partition key: RANGE (a)
Indexes:
"idxpart_a_idx" btree (a)
"idxpart_a_idx1" UNIQUE, btree (a) INVALID
-Number of partitions: 2 (Use \d+ to list them.)
+Number of partitions: 3 (Use \d+ to list them.)
\d idxpart1
Partitioned table "public.idxpart1"
@@ -88,11 +90,59 @@ Number of partitions: 2 (Use \d+ to list them.)
Partition of: idxpart FOR VALUES FROM (0) TO (10)
Partition key: RANGE (a)
Indexes:
- "idxpart1_a_idx" btree (a) INVALID
+ "idxpart1_a_idx" btree (a)
"idxpart1_a_idx1" btree (a)
"idxpart1_a_idx2" UNIQUE, btree (a) INVALID
Number of partitions: 1 (Use \d+ to list them.)
+\d idxpart11
+ Partitioned table "public.idxpart11"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | integer | | |
+ c | text | | |
+Partition of: idxpart1 FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+ "idxpart11_a_idx" btree (a)
+ "idxpart11_a_idx1" btree (a)
+ "idxpart11_a_idx2" btree (a)
+ "idxpart11_a_idx3" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart111
+ Partitioned table "public.idxpart111"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | integer | | |
+ c | text | | |
+Partition of: idxpart11 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+ "idxpart111_a_idx" btree (a)
+ "idxpart111_a_idx1" btree (a)
+ "idxpart111_a_idx2" btree (a)
+ "idxpart111_a_idx3" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart1111
+ Partitioned table "public.idxpart1111"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | integer | | |
+ c | text | | |
+Partition of: idxpart111 DEFAULT
+Partition key: RANGE (a)
+Indexes:
+ "idxpart1111_a_idx" btree (a)
+ "idxpart1111_a_idx1" btree (a)
+ "idxpart1111_a_idx2" btree (a)
+ "idxpart1111_a_idx3" UNIQUE, btree (a) INVALID
+Number of partitions: 0
+
\d idxpart2
Table "public.idxpart2"
Column | Type | Collation | Nullable | Default
@@ -107,6 +157,32 @@ Indexes:
"idxpart2_a_idx2" UNIQUE, btree (a) INVALID
"idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
+\d idxpart3
+ Partitioned table "public.idxpart3"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | integer | | |
+ c | text | | |
+Partition of: idxpart FOR VALUES FROM (30) TO (40)
+Partition key: RANGE (a)
+Indexes:
+ "idxpart3_a_idx" btree (a)
+ "idxpart3_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart31
+ Table "public.idxpart31"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | integer | | |
+ c | text | | |
+Partition of: idxpart3 DEFAULT
+Indexes:
+ "idxpart31_a_idx" btree (a)
+ "idxpart31_a_idx1" UNIQUE, btree (a) INVALID
+
drop table idxpart;
-- Verify bugfix with query on indexed partitioned table with no partitions
-- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 3d4b6e9bc95..06673c15199 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -36,6 +36,9 @@ create table idxpart11 partition of idxpart1 for values from (0) to (10) partiti
create table idxpart111 partition of idxpart11 default partition by range(a);
create table idxpart1111 partition of idxpart111 default partition by range(a);
create table idxpart2 partition of idxpart for values from (10) to (20);
+create table idxpart3 partition of idxpart for values from (30) to (40) partition by range(a);
+create table idxpart31 partition of idxpart3 default;
+
insert into idxpart2 values(10),(10); -- not unique
create index concurrently on idxpart (a); -- partitioned
create index concurrently on idxpart1 (a); -- partitioned and partition
@@ -44,7 +47,12 @@ create index concurrently on idxpart2 (a); -- leaf
create unique index concurrently on idxpart (a); -- partitioned, unique failure
\d idxpart
\d idxpart1
+\d idxpart11
+\d idxpart111
+\d idxpart1111
\d idxpart2
+\d idxpart3
+\d idxpart31
drop table idxpart;
-- Verify bugfix with query on indexed partitioned table with no partitions
--
2.34.1
From 71838e0146e5150013c48818710d899e69786dc0 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pry...@telsasoft.com>
Date: Mon, 7 Feb 2022 10:39:59 +0300
Subject: [PATCH 4/5] ReindexPartitions() to set indisvalid
0004-ReindexPartitions-to-set-indisvalid.patch from
https://www.postgresql.org/message-id/20210226182019.gu20...@telsasoft.com
---
src/backend/commands/indexcmds.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index b231a05b8c9..d09f0390413 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1716,8 +1716,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
* CIC needs to mark a partitioned index as VALID, which itself
* requires setting READY, which is unset for CIC (even though
* it's meaningless for an index without storage).
- * This must be done only while holding a lock which precludes adding
- * partitions.
*/
CommandCounterIncrement();
index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
@@ -1729,9 +1727,6 @@ reindex_invalid_child_indexes(Oid indexRelationId)
* this commits and then starts a new transaction immediately.
*/
ReindexPartitions(indexRelationId, ¶ms, true);
-
- CommandCounterIncrement();
- index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
}
/*
@@ -3229,6 +3224,24 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
*/
ReindexMultipleInternal(partitions, params, relid, npart);
+ /*
+ * If indexes exist on all of the partitioned table's children, and we
+ * just reindexed them, then we know they're valid, and so can mark the
+ * parent index as valid.
+ * This handles the case of CREATE INDEX CONCURRENTLY.
+ * See also: validatePartitionedIndex().
+ */
+ if (get_rel_relkind(relid) == RELKIND_PARTITIONED_INDEX
+ && !get_index_isvalid(relid))
+ {
+ Oid tableoid = IndexGetRelation(relid, false);
+ List *child_tables = find_all_inheritors(tableoid, ShareLock, NULL);
+
+ /* Both lists include their parent relation as well as any intermediate partitioned rels */
+ if (list_length(inhoids) == list_length(child_tables))
+ index_set_state_flags(relid, INDEX_CREATE_SET_VALID);
+ }
+
/*
* Clean up working storage --- note we must do this after
* StartTransactionCommand, else we might be trying to delete the active
--
2.34.1
From f5b8afe1aafb78c53a527d379b978ce9f1fe06d6 Mon Sep 17 00:00:00 2001
From: Alexander Pyhalov <a.pyha...@postgrespro.ru>
Date: Tue, 8 Feb 2022 21:15:05 +0300
Subject: [PATCH 3/5] Try to fix create index progress report
---
src/backend/commands/indexcmds.c | 67 ++++++++++++++++++++++++++------
src/include/catalog/index.h | 1 +
2 files changed, 57 insertions(+), 11 deletions(-)
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 149235780b9..b231a05b8c9 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -102,11 +102,14 @@ static void reindex_error_callback(void *args);
static void ReindexPartitions(Oid relid, ReindexParams *params,
bool isTopLevel);
static void ReindexMultipleInternal(List *relids,
- ReindexParams *params);
+ ReindexParams *params,
+ Oid parent,
+ int npart);
static bool ReindexRelationConcurrently(Oid relationOid,
ReindexParams *params);
static void update_relispartition(Oid relationId, bool newval);
static inline void set_indexsafe_procflags(void);
+static void report_create_partition_index_done(Oid parent, int npart);
/*
* callback argument type for RangeVarCallbackForReindexIndex()
@@ -1220,6 +1223,7 @@ DefineIndex(Oid relationId,
Oid *opfamOids;
char *relname;
+
pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
nparts);
@@ -1705,7 +1709,7 @@ static void
reindex_invalid_child_indexes(Oid indexRelationId)
{
ReindexParams params = {
- .options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID
+ .options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID | REINDEXOPT_REPORT_CREATE_PART
};
/*
@@ -1718,6 +1722,8 @@ reindex_invalid_child_indexes(Oid indexRelationId)
CommandCounterIncrement();
index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN);
+
/*
* Process each partition listed in a separate transaction. Note that
* this commits and then starts a new transaction immediately.
@@ -3105,7 +3111,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
* Process each relation listed in a separate transaction. Note that this
* commits and then starts a new transaction immediately.
*/
- ReindexMultipleInternal(relids, params);
+ ReindexMultipleInternal(relids, params, InvalidOid, 0);
MemoryContextDelete(private_context);
}
@@ -3141,6 +3147,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
char relkind = get_rel_relkind(relid);
char *relname = get_rel_name(relid);
char *relnamespace = get_namespace_name(get_rel_namespace(relid));
+ int npart = 1;
MemoryContext reindex_context;
List *inhoids;
ListCell *lc;
@@ -3201,7 +3208,11 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
/* Skip valid indexes, if requested */
if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
get_index_isvalid(partoid))
+ {
+ if (params->options & REINDEXOPT_REPORT_CREATE_PART)
+ report_create_partition_index_done(relid, npart++);
continue;
+ }
Assert(partkind == RELKIND_INDEX ||
partkind == RELKIND_RELATION);
@@ -3216,7 +3227,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
* Process each partition listed in a separate transaction. Note that
* this commits and then starts a new transaction immediately.
*/
- ReindexMultipleInternal(partitions, params);
+ ReindexMultipleInternal(partitions, params, relid, npart);
/*
* Clean up working storage --- note we must do this after
@@ -3234,7 +3245,7 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
* and starts a new transaction when finished.
*/
static void
-ReindexMultipleInternal(List *relids, ReindexParams *params)
+ReindexMultipleInternal(List *relids, ReindexParams *params, Oid parent, int npart)
{
ListCell *l;
@@ -3328,6 +3339,9 @@ ReindexMultipleInternal(List *relids, ReindexParams *params)
}
CommitTransactionCommand();
+
+ if (params->options & REINDEXOPT_REPORT_CREATE_PART)
+ report_create_partition_index_done(parent, npart++);
}
StartTransactionCommand();
@@ -3723,7 +3737,9 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
elog(ERROR, "cannot reindex a temporary table concurrently");
- pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+ /* Don't overwrite CREATE INDEX command */
+ if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+ pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
idx->tableId);
progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
@@ -3883,9 +3899,11 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
/*
* Update progress for the index to build, with the correct parent
- * table involved.
+ * table involved. Don't overwrite CREATE INDEX command.
*/
- pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
+ if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+ pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
+
progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD;
progress_vals[2] = newidx->indexId;
@@ -3947,10 +3965,12 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
/*
* Update progress for the index to build, with the correct parent
- * table involved.
+ * table involved. Don't overwrite CREATE INDEX command.
*/
- pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
+ if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+ pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
newidx->tableId);
+
progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN;
progress_vals[2] = newidx->indexId;
@@ -4185,7 +4205,9 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
MemoryContextDelete(private_context);
- pgstat_progress_end_command();
+ /* Don't overwrite CREATE INDEX command. */
+ if (!(params->options & REINDEXOPT_REPORT_CREATE_PART))
+ pgstat_progress_end_command();
return true;
}
@@ -4321,6 +4343,29 @@ IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
}
}
+/*
+ * Update pgstat progress report to indicate that create index on
+ * partition was finished.
+ */
+static void
+report_create_partition_index_done(Oid index, int npart)
+{
+ const int progress_cols[] = {
+ PROGRESS_CREATEIDX_COMMAND,
+ PROGRESS_CREATEIDX_INDEX_OID,
+ PROGRESS_CREATEIDX_PHASE,
+ PROGRESS_CREATEIDX_PARTITIONS_DONE
+ };
+ const int64 progress_vals[] = {
+ PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY,
+ index,
+ PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN,
+ npart
+ };
+
+ pgstat_progress_update_multi_param(4, progress_cols, progress_vals);
+}
+
/*
* Subroutine of IndexSetParentIndex to update the relispartition flag of the
* given index to the given value.
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index c31b66ad0b9..b5b0a71e7d4 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -43,6 +43,7 @@ typedef struct ReindexParams
#define REINDEXOPT_MISSING_OK 0x04 /* skip missing relations */
#define REINDEXOPT_CONCURRENTLY 0x08 /* concurrent mode */
#define REINDEXOPT_SKIPVALID 0x10 /* skip valid indexes */
+#define REINDEXOPT_REPORT_CREATE_PART 0x20 /* report that index was created for partition */
/* state info for validate_index bulkdelete callback */
typedef struct ValidateIndexState
--
2.34.1
From 4f16e729a835dc8123a8ce593e6ae47542b82c83 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pry...@telsasoft.com>
Date: Mon, 7 Feb 2022 10:31:40 +0300
Subject: [PATCH 2/5] Add SKIPVALID flag for more integration
Combined
0002-f-progress-reporting.patch and
0003-WIP-Add-SKIPVALID-flag-for-more-integration.patch from
https://www.postgresql.org/message-id/20210226182019.gu20...@telsasoft.com
---
src/backend/commands/indexcmds.c | 57 ++++++++++----------------------
src/include/catalog/index.h | 1 +
2 files changed, 19 insertions(+), 39 deletions(-)
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 784f6d9eb87..149235780b9 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1697,59 +1697,33 @@ DefineIndex(Oid relationId,
return address;
}
-/* Reindex invalid child indexes created earlier */
+/*
+ * Reindex invalid child indexes created earlier thereby validating
+ * the parent index.
+ */
static void
reindex_invalid_child_indexes(Oid indexRelationId)
{
- ListCell *lc;
- int npart = 0;
ReindexParams params = {
- .options = REINDEXOPT_CONCURRENTLY
+ .options = REINDEXOPT_CONCURRENTLY | REINDEXOPT_SKIPVALID
};
- MemoryContext ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
- ALLOCSET_DEFAULT_SIZES);
- MemoryContext oldcontext;
- List *childs = find_inheritance_children(indexRelationId, ShareLock);
- List *partitions = NIL;
-
- PreventInTransactionBlock(true, "REINDEX INDEX");
-
- foreach (lc, childs)
- {
- Oid partoid = lfirst_oid(lc);
-
- /* XXX: need to retrofit progress reporting into it */
- // pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
- // npart++);
-
- if (get_index_isvalid(partoid) ||
- !RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
- continue;
-
- /* Save partition OID */
- oldcontext = MemoryContextSwitchTo(ind_context);
- partitions = lappend_oid(partitions, partoid);
- MemoryContextSwitchTo(oldcontext);
- }
-
- /*
- * Process each partition listed in a separate transaction. Note that
- * this commits and then starts a new transaction immediately.
- * XXX: since this is done in 2*N transactions, it could just as well
- * call ReindexRelationConcurrently directly
- */
- ReindexMultipleInternal(partitions, ¶ms);
-
/*
* CIC needs to mark a partitioned index as VALID, which itself
* requires setting READY, which is unset for CIC (even though
* it's meaningless for an index without storage).
* This must be done only while holding a lock which precludes adding
* partitions.
- * See also: validatePartitionedIndex().
*/
+ CommandCounterIncrement();
index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+
+ /*
+ * Process each partition listed in a separate transaction. Note that
+ * this commits and then starts a new transaction immediately.
+ */
+ ReindexPartitions(indexRelationId, ¶ms, true);
+
CommandCounterIncrement();
index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
}
@@ -3224,6 +3198,11 @@ ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
if (!RELKIND_HAS_STORAGE(partkind))
continue;
+ /* Skip valid indexes, if requested */
+ if ((params->options & REINDEXOPT_SKIPVALID) != 0 &&
+ get_index_isvalid(partoid))
+ continue;
+
Assert(partkind == RELKIND_INDEX ||
partkind == RELKIND_RELATION);
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index a1d6e3b645f..c31b66ad0b9 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -42,6 +42,7 @@ typedef struct ReindexParams
#define REINDEXOPT_REPORT_PROGRESS 0x02 /* report pgstat progress */
#define REINDEXOPT_MISSING_OK 0x04 /* skip missing relations */
#define REINDEXOPT_CONCURRENTLY 0x08 /* concurrent mode */
+#define REINDEXOPT_SKIPVALID 0x10 /* skip valid indexes */
/* state info for validate_index bulkdelete callback */
typedef struct ValidateIndexState
--
2.34.1
From f4933625541b030fabcb15d1426f496598949898 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pry...@telsasoft.com>
Date: Mon, 7 Feb 2022 10:28:42 +0300
Subject: [PATCH 1/5] Allow CREATE INDEX CONCURRENTLY on partitioned table
0001-Allow-CREATE-INDEX-CONCURRENTLY-on-partitioned-table.patch from
https://www.postgresql.org/message-id/20210226182019.gu20...@telsasoft.com
Fixes:
- rel was used after table_close();
- it seems childidxs shouldn't live in ind_context;
- updated doc.
---
doc/src/sgml/ref/create_index.sgml | 14 +--
src/backend/commands/indexcmds.c | 151 ++++++++++++++++++-------
src/test/regress/expected/indexing.out | 60 +++++++++-
src/test/regress/sql/indexing.sql | 18 ++-
4 files changed, 186 insertions(+), 57 deletions(-)
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 9ffcdc629e6..2040b27b685 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -642,7 +642,10 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
<para>
If a problem arises while scanning the table, such as a deadlock or a
uniqueness violation in a unique index, the <command>CREATE INDEX</command>
- command will fail but leave behind an <quote>invalid</quote> index. This index
+ command will fail but leave behind an <quote>invalid</quote> index.
+ If this happens while creating index concurrently on a partitioned
+ table, the command can also leave behind <quote>valid</quote> or
+ <quote>invalid</quote> indexes on table partitions. The invalid index
will be ignored for querying purposes because it might be incomplete;
however it will still consume update overhead. The <application>psql</application>
<command>\d</command> command will report such an index as <literal>INVALID</literal>:
@@ -689,15 +692,6 @@ Indexes:
cannot.
</para>
- <para>
- Concurrent builds for indexes on partitioned tables are currently not
- supported. However, you may concurrently build the index on each
- partition individually and then finally create the partitioned index
- non-concurrently in order to reduce the time where writes to the
- partitioned table will be locked out. In this case, building the
- partitioned index is a metadata only operation.
- </para>
-
</refsect2>
</refsect1>
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 99f5ab83c32..784f6d9eb87 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -68,6 +68,7 @@
/* non-export function prototypes */
+static void reindex_invalid_child_indexes(Oid indexRelationId);
static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
static void CheckPredicate(Expr *predicate);
static void ComputeIndexAttrs(IndexInfo *indexInfo,
@@ -695,17 +696,6 @@ DefineIndex(Oid relationId,
partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
if (partitioned)
{
- /*
- * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
- * the error is thrown also for temporary tables. Seems better to be
- * consistent, even though we could do it on temporary table because
- * we're not actually doing it concurrently.
- */
- if (stmt->concurrent)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("cannot create index on partitioned table \"%s\" concurrently",
- RelationGetRelationName(rel))));
if (stmt->excludeOpNames)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1145,6 +1135,11 @@ DefineIndex(Oid relationId,
if (pd->nparts != 0)
flags |= INDEX_CREATE_INVALID;
}
+ else if (concurrent && OidIsValid(parentIndexId))
+ {
+ /* If concurrent, initially build index partitions as "invalid" */
+ flags |= INDEX_CREATE_INVALID;
+ }
if (stmt->deferrable)
constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1210,18 +1205,30 @@ DefineIndex(Oid relationId,
partdesc = RelationGetPartitionDesc(rel, true);
if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
{
+ /*
+ * Need to close the relation before recursing into children, so
+ * copy needed data into a longlived context.
+ */
+
+ MemoryContext ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+ ALLOCSET_DEFAULT_SIZES);
+ MemoryContext oldcontext = MemoryContextSwitchTo(ind_context);
int nparts = partdesc->nparts;
Oid *part_oids = palloc(sizeof(Oid) * nparts);
bool invalidate_parent = false;
TupleDesc parentDesc;
Oid *opfamOids;
+ char *relname;
pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
nparts);
memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+ parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+ relname = pstrdup(RelationGetRelationName(rel));
+ table_close(rel, NoLock);
+ MemoryContextSwitchTo(oldcontext);
- parentDesc = RelationGetDescr(rel);
opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
for (i = 0; i < numberOfKeyAttributes; i++)
opfamOids[i] = get_opclass_family(classObjectId[i]);
@@ -1265,9 +1272,9 @@ DefineIndex(Oid relationId,
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot create unique index on partitioned table \"%s\"",
- RelationGetRelationName(rel)),
+ relname),
errdetail("Table \"%s\" contains partitions that are foreign tables.",
- RelationGetRelationName(rel))));
+ relname)));
AtEOXact_GUC(false, child_save_nestlevel);
SetUserIdAndSecContext(child_save_userid,
@@ -1277,9 +1284,12 @@ DefineIndex(Oid relationId,
}
childidxs = RelationGetIndexList(childrel);
+
+ oldcontext = MemoryContextSwitchTo(ind_context);
attmap =
build_attrmap_by_name(RelationGetDescr(childrel),
parentDesc);
+ MemoryContextSwitchTo(oldcontext);
foreach(cell, childidxs)
{
@@ -1353,10 +1363,14 @@ DefineIndex(Oid relationId,
*/
if (!found)
{
- IndexStmt *childStmt = copyObject(stmt);
+ IndexStmt *childStmt;
bool found_whole_row;
ListCell *lc;
+ oldcontext = MemoryContextSwitchTo(ind_context);
+ childStmt = copyObject(stmt);
+ MemoryContextSwitchTo(oldcontext);
+
/*
* We can't use the same index name for the child index,
* so clear idxname to let the recursive invocation choose
@@ -1417,12 +1431,21 @@ DefineIndex(Oid relationId,
skip_build, quiet);
SetUserIdAndSecContext(child_save_userid,
child_save_sec_context);
+ if (concurrent)
+ {
+ PopActiveSnapshot();
+ PushActiveSnapshot(GetTransactionSnapshot());
+ invalidate_parent = true;
+ }
}
- pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
- i + 1);
+ /* For concurrent build, this is a catalog-only stage */
+ if (!concurrent)
+ pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+ i + 1);
free_attrmap(attmap);
}
+ pfree(relname);
/*
* The pg_index row we inserted for this index was marked
@@ -1430,24 +1453,9 @@ DefineIndex(Oid relationId,
* invalid, this is incorrect, so update our row to invalid too.
*/
if (invalidate_parent)
- {
- Relation pg_index = table_open(IndexRelationId, RowExclusiveLock);
- HeapTuple tup,
- newtup;
-
- tup = SearchSysCache1(INDEXRELID,
- ObjectIdGetDatum(indexRelationId));
- if (!HeapTupleIsValid(tup))
- elog(ERROR, "cache lookup failed for index %u",
- indexRelationId);
- newtup = heap_copytuple(tup);
- ((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
- CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
- ReleaseSysCache(tup);
- table_close(pg_index, RowExclusiveLock);
- heap_freetuple(newtup);
- }
- }
+ index_set_state_flags(indexRelationId, INDEX_DROP_CLEAR_VALID);
+ } else
+ table_close(rel, NoLock);
/*
* Indexes on partitioned tables are not themselves built, so we're
@@ -1455,21 +1463,28 @@ DefineIndex(Oid relationId,
*/
AtEOXact_GUC(false, root_save_nestlevel);
SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
- table_close(rel, NoLock);
if (!OidIsValid(parentIndexId))
+ {
+ if (concurrent)
+ reindex_invalid_child_indexes(indexRelationId);
+
pgstat_progress_end_command();
+ }
+
return address;
}
AtEOXact_GUC(false, root_save_nestlevel);
SetUserIdAndSecContext(root_save_userid, root_save_sec_context);
- if (!concurrent)
+ if (!concurrent || OidIsValid(parentIndexId))
{
- /* Close the heap and we're done, in the non-concurrent case */
- table_close(rel, NoLock);
+ /*
+ * We're done if this is the top-level index,
+ * or the catalog-only phase of a partition built concurrently
+ */
- /* If this is the top-level index, we're done. */
+ table_close(rel, NoLock);
if (!OidIsValid(parentIndexId))
pgstat_progress_end_command();
@@ -1682,6 +1697,62 @@ DefineIndex(Oid relationId,
return address;
}
+/* Reindex invalid child indexes created earlier */
+static void
+reindex_invalid_child_indexes(Oid indexRelationId)
+{
+ ListCell *lc;
+ int npart = 0;
+ ReindexParams params = {
+ .options = REINDEXOPT_CONCURRENTLY
+ };
+
+ MemoryContext ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+ ALLOCSET_DEFAULT_SIZES);
+ MemoryContext oldcontext;
+ List *childs = find_inheritance_children(indexRelationId, ShareLock);
+ List *partitions = NIL;
+
+ PreventInTransactionBlock(true, "REINDEX INDEX");
+
+ foreach (lc, childs)
+ {
+ Oid partoid = lfirst_oid(lc);
+
+ /* XXX: need to retrofit progress reporting into it */
+ // pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+ // npart++);
+
+ if (get_index_isvalid(partoid) ||
+ !RELKIND_HAS_STORAGE(get_rel_relkind(partoid)))
+ continue;
+
+ /* Save partition OID */
+ oldcontext = MemoryContextSwitchTo(ind_context);
+ partitions = lappend_oid(partitions, partoid);
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /*
+ * Process each partition listed in a separate transaction. Note that
+ * this commits and then starts a new transaction immediately.
+ * XXX: since this is done in 2*N transactions, it could just as well
+ * call ReindexRelationConcurrently directly
+ */
+ ReindexMultipleInternal(partitions, ¶ms);
+
+ /*
+ * CIC needs to mark a partitioned index as VALID, which itself
+ * requires setting READY, which is unset for CIC (even though
+ * it's meaningless for an index without storage).
+ * This must be done only while holding a lock which precludes adding
+ * partitions.
+ * See also: validatePartitionedIndex().
+ */
+ index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+ CommandCounterIncrement();
+ index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
+}
/*
* CheckMutability
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index 193f7801912..a4ccae50de3 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,63 @@ select relname, relkind, relhassubclass, inhparent::regclass
(8 rows)
drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR: cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR: could not create unique index "idxpart2_a_idx2_ccnew"
+DETAIL: Key (a)=(10) is duplicated.
+\d idxpart
+ Partitioned table "public.idxpart"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | integer | | |
+ c | text | | |
+Partition key: RANGE (a)
+Indexes:
+ "idxpart_a_idx" btree (a)
+ "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 2 (Use \d+ to list them.)
+
+\d idxpart1
+ Partitioned table "public.idxpart1"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | integer | | |
+ c | text | | |
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+ "idxpart1_a_idx" btree (a) INVALID
+ "idxpart1_a_idx1" btree (a)
+ "idxpart1_a_idx2" UNIQUE, btree (a) INVALID
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart2
+ Table "public.idxpart2"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+ b | integer | | |
+ c | text | | |
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+ "idxpart2_a_idx" btree (a)
+ "idxpart2_a_idx1" btree (a)
+ "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+ "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID
+
drop table idxpart;
-- Verify bugfix with query on indexed partitioned table with no partitions
-- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 42f398b67c2..3d4b6e9bc95 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,22 @@ select relname, relkind, relhassubclass, inhparent::regclass
where relname like 'idxpart%' order by relname;
drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart111 partition of idxpart11 default partition by range(a);
+create table idxpart1111 partition of idxpart111 default partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart2
drop table idxpart;
-- Verify bugfix with query on indexed partitioned table with no partitions
--
2.34.1