hi.
attached patch trying to speedup ALTER TABLE ADD CHECK CONSTRAINT.
demo:
drop table if exists t7;
create table t7 (a int not null, b int, c int);
insert into t7 select g, g+1, g %100 from generate_series(1,1_000_000) g;
create index on t7(a,b);
alter table t7 add check (a > 0);
patch: Time: 4.281 ms
master: Time: 491.689 ms
----------------------------
implementation:
step1. during execute command `ALTER TABLE ADD CHECK CONSTRAINT`
in AddRelationNewConstraints->StoreRelCheck
after StoreRelCheck add a CommandCounterIncrement();
so previously added CHECK pg_constraint can be seen by us.
we need to use pg_get_constraintdef to retrieve the CHECK constraint definition.
step2. check if this relation has any suitable index (not expression
index, not predicate index)
--whole patch hinges on SPI query can use indexscan to quickly
retrieve certain information.
step3: construct and execute these three SPI query:
("set enable_seqscan to off")
(SELECT 1 FROM the_table WHERE NOT (check_constraint_def)
AND check_constraint_def IS NOT NULL LIMIT 1)
("reset enable_seqscan")
the SPI query will be faster, if the qual(check_constraint_def) can be
utilized by indexscan.
if SPI_processed == 0 means we toggle this check constraint as
"already_validated" (bool) is true.
we stored already_validated in CookedConstraint. later pass it to
AlteredTableInfo->constraints.
then phrase 3 within ATRewriteTable, we can do
```
if (constraint->already_validated)
needscan = false;
```
----------------------------
concern:
only certain kinds of check constraint expressions can be used for
this optimization.
i do all the CHECK constraint expressions filter in checkconstraint_expr_walker.
use contain_volatile_functions to filter out volatile expressions,
add (check_constraint_def IS NOT NULL) in the above SPI query, i think
null handling is correct?
ALTER TABLE ADD CHECK CONSTRAINT is using ACCESS EXCLUSIVE lock.
but i need to think more about concurrently related issues.
idea come from this thread:
https://postgr.es/m/canwftzk2mz7js_56v+zclxzyh1utbzx4ueg03p7cee86k2r...@mail.gmail.com
From 5a4039d058d34467696fd6e6238fdb1132d9af14 Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Sat, 30 Nov 2024 19:20:22 +0800
Subject: [PATCH v1 1/1] speedup alter table add check constraint
dicussion: https://postgr.es/m/
---
src/backend/catalog/heap.c | 218 ++++++++++++++++++++++
src/backend/catalog/index.c | 74 ++++++++
src/backend/catalog/pg_constraint.c | 1 +
src/backend/commands/tablecmds.c | 9 +-
src/include/catalog/heap.h | 1 +
src/include/catalog/index.h | 1 +
src/test/regress/expected/constraints.out | 20 ++
src/test/regress/sql/constraints.sql | 22 +++
8 files changed, 345 insertions(+), 1 deletion(-)
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 003af4bf21..5eb3450208 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -57,6 +57,7 @@
#include "commands/tablecmds.h"
#include "commands/typecmds.h"
#include "common/int.h"
+#include "executor/spi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
@@ -115,6 +116,17 @@ static Node *cookConstraint(ParseState *pstate,
Node *raw_constraint,
char *relname);
+typedef struct checkexpr_context
+{
+ Bitmapset *bms_indexattno;
+ bool cannot_be_indexed;
+} checkexpr_context;
+static bool
+checkconstraint_expr_walker(Node *node, checkexpr_context *context);
+static bool
+index_validate_check_constraint(Oid constrOid, Relation rel,
+ Node *expr,
+ const char *constrname);
/* ----------------------------------------------------------------
* XXX UGLY HARD CODED BADNESS FOLLOWS XXX
@@ -2409,6 +2421,7 @@ AddRelationNewConstraints(Relation rel,
cooked->is_local = is_local;
cooked->inhcount = is_local ? 0 : 1;
cooked->is_no_inherit = false;
+ cooked->already_validated = false;
cookedConstraints = lappend(cookedConstraints, cooked);
}
@@ -2527,6 +2540,8 @@ AddRelationNewConstraints(Relation rel,
StoreRelCheck(rel, ccname, expr, cdef->initially_valid, is_local,
is_local ? 0 : 1, cdef->is_no_inherit, is_internal);
+ /* we need this for index_validate_check_constraint*/
+ CommandCounterIncrement();
numchecks++;
cooked = (CookedConstraint *) palloc(sizeof(CookedConstraint));
@@ -2539,6 +2554,13 @@ AddRelationNewConstraints(Relation rel,
cooked->is_local = is_local;
cooked->inhcount = is_local ? 0 : 1;
cooked->is_no_inherit = cdef->is_no_inherit;
+ /* internal, readd check constraint cannot be prevalidated */
+ if (is_internal)
+ cooked->already_validated = false;
+ else
+ cooked->already_validated =
+ index_validate_check_constraint(constrOid, rel, expr, ccname);
+
cookedConstraints = lappend(cookedConstraints, cooked);
}
else if (cdef->contype == CONSTR_NOTNULL)
@@ -2609,6 +2631,7 @@ AddRelationNewConstraints(Relation rel,
nncooked->is_local = is_local;
nncooked->inhcount = inhcount;
nncooked->is_no_inherit = cdef->is_no_inherit;
+ nncooked->already_validated = false;
cookedConstraints = lappend(cookedConstraints, nncooked);
}
@@ -2626,6 +2649,201 @@ AddRelationNewConstraints(Relation rel,
return cookedConstraints;
}
+static bool
+index_validate_check_constraint(Oid constrOid, Relation rel, Node *expr, const char *constrname)
+{
+ StringInfoData querybuf;
+ BMS_Comparison cmp;
+ text *constrdef_text;
+ Bitmapset *idx_attnums = NULL;
+ char *constrdef = NULL;
+ char *constrdef_value = NULL;
+ bool check_constraint_ok = false;
+
+ checkexpr_context context = {0};
+ context.bms_indexattno = NULL;
+ context.cannot_be_indexed = false;
+
+ /*
+ * we use CHECK constraint definition for constructing SQL query, later we
+ * using SPI to execute it.
+ */
+ constrdef_text = DatumGetTextPP(DirectFunctionCall2(pg_get_constraintdef,
+ ObjectIdGetDatum(constrOid),
+ BoolGetDatum(false)));
+ constrdef = text_to_cstring(constrdef_text);
+
+ if (constrdef == NULL)
+ return false;
+
+ /* CHECK constraint is a plain Var node, var type should only be bool */
+ if (IsA(expr, Var))
+ {
+ Var *var = (Var *) expr;
+ if (var->vartype == BOOLOID)
+ return true;
+ else
+ return false;
+ }
+
+ idx_attnums = relation_get_indexattnums(rel);
+ /* no appropriate index on this relation, so gave up */
+ if (idx_attnums == NULL)
+ return false;
+
+ checkconstraint_expr_walker(expr, &context);
+ if (context.cannot_be_indexed || context.bms_indexattno == NULL)
+ return false;
+
+ /* all the referenced vars in the expr should have index on it */
+ cmp = bms_subset_compare(context.bms_indexattno, idx_attnums);
+ if (cmp == BMS_SUBSET2 || cmp == BMS_DIFFERENT)
+ return false;
+
+ /*
+ * set enable_seqscan to off to force optimizer using index scan.
+ * we already checked that check constraint expr varnode is not-null
+ * and have appropriate index on it.
+ */
+ SPI_connect();
+ if (SPI_execute("set enable_seqscan to off", false, 0) != SPI_OK_UTILITY)
+ elog(ERROR, "SPI_exec failed for query: \"%s\"", "set enable_seqscan to off");
+
+ initStringInfo(&querybuf);
+ constrdef_value = pstrdup(constrdef + 5); /*check constraint first word is CHECK, that's why offset 5*/
+ appendStringInfo(&querybuf, "SELECT 1 FROM %s WHERE (NOT %s) AND %s IS NOT NULL LIMIT 1",
+ RelationGetRelationName(rel),
+ constrdef_value,
+ constrdef_value);
+
+ if (SPI_execute(querybuf.data, false, 1) != SPI_OK_SELECT)
+ elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+ check_constraint_ok = (SPI_processed == 0);
+
+ /*we need reset enable_seqscan GUC */
+ if (SPI_execute("reset enable_seqscan;", false, 0) != SPI_OK_UTILITY)
+ elog(ERROR, "SPI_exec failed for query: \"%s\"", "reset enable_seqscan");
+ SPI_finish();
+
+ return check_constraint_ok;
+}
+
+static bool
+checkconstraint_expr_walker(Node *node, checkexpr_context *context)
+{
+ Node *leftop = NULL;
+ Node *rightop = NULL;
+
+ if (node == NULL)
+ return false;
+
+ /* volatile check expression will influence the result of to be executed SPI
+ * query. so we forbiden it.
+ */
+ if (contain_volatile_functions(node))
+ {
+ context->cannot_be_indexed = true;
+ return false;
+ }
+
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+
+ /* wholerow expression cannot be used for indexscan */
+ if (var->varattno == 0)
+ {
+ context->cannot_be_indexed = true;
+ return false;
+ }
+ context->bms_indexattno = bms_add_member(context->bms_indexattno,
+ var->varattno);
+ }
+
+ if (IsA(node, OpExpr))
+ {
+ bool leftop_have_var;
+ bool rightop_have_var;
+ OpExpr *opexpr = (OpExpr *) node;
+
+ if (opexpr->opresulttype != BOOLOID
+ || !OidIsValid(get_negator(opexpr->opno)))
+ {
+ context->cannot_be_indexed = true;
+ return false;
+ }
+
+ /*
+ * Check the expression form: (Var node operator constant) or (constant
+ * operator Var).
+ */
+ leftop = (Node *) linitial(opexpr->args);
+ rightop = (Node *) lsecond(opexpr->args);
+
+ if (IsA(leftop, RelabelType))
+ leftop = (Node *) ((RelabelType *) leftop)->arg;
+ if (IsA(rightop, RelabelType))
+ rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+ /*
+ * cannot both side contain Var node. current index scan cannot handle
+ * qual like `where col1 < colb`. we also skip case where bothside don't
+ * have var clause.
+ */
+ leftop_have_var = contain_var_clause(leftop);
+ rightop_have_var = contain_var_clause(rightop);
+ if ((leftop_have_var && rightop_have_var) ||
+ (!leftop_have_var && !rightop_have_var))
+ {
+ context->cannot_be_indexed = true;
+ return false;
+ }
+
+ if (IsA(leftop, Const) && !OidIsValid(get_commutator(opexpr->opno)))
+ {
+ /* commutator doesn't exist, we can't reverse the order */
+ context->cannot_be_indexed = true;
+ return false;
+ }
+
+ /* maybe this is not necessary? skip row/composite type first.*/
+ if (type_is_rowtype(exprType(leftop)) ||
+ type_is_rowtype(exprType(rightop)))
+ {
+ context->cannot_be_indexed = true;
+ return false;
+ }
+ }
+
+ if (IsA(node, ScalarArrayOpExpr))
+ {
+ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
+
+ leftop = (Node *) linitial(saop->args);
+ if (leftop && IsA(leftop, RelabelType))
+ leftop = (Node *) ((RelabelType *) leftop)->arg;
+
+ if (!IsA(leftop, Var))
+ {
+ context->cannot_be_indexed = true;
+ return false;
+ }
+
+ rightop = (Node *) lsecond(saop->args);
+ if (rightop && IsA(rightop, RelabelType))
+ rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+ if (!IsA(rightop, Const))
+ {
+ context->cannot_be_indexed = true;
+ return false;
+ }
+ }
+
+ return expression_tree_walker(node, checkconstraint_expr_walker,
+ (void *) context);
+}
/*
* Check for a pre-existing check constraint that conflicts with a proposed
* new one, and either adjust its conislocal/coninhcount settings or throw
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index f9bb721c5f..89c8873389 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -4257,3 +4257,77 @@ RestoreReindexState(const void *reindexstate)
/* Note the worker has its own transaction nesting level */
reindexingNestLevel = GetCurrentTransactionNestLevel();
}
+
+/*
+ * relation_get_indexattnums
+ * Returns a Bitmapsets with member of attribute number (ref:
+ * pg_attribute.attnum) that have suitable index on it.
+*/
+Bitmapset *
+relation_get_indexattnums(Relation rel)
+{
+ Bitmapset *attnums = NULL;
+
+ Relation pg_index;
+ HeapTuple indexTuple;
+ SysScanDesc scan;
+ ScanKeyData skey;
+
+ /* Scan pg_index for unique index of the target rel */
+ pg_index = table_open(IndexRelationId, AccessShareLock);
+
+ ScanKeyInit(&skey,
+ Anum_pg_index_indrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+ scan = systable_beginscan(pg_index, IndexIndrelidIndexId, true,
+ NULL, 1, &skey);
+
+ while (HeapTupleIsValid(indexTuple = systable_getnext(scan)))
+ {
+ Datum adatum;
+ bool isNull;
+ Datum *keys;
+ int nelems;
+
+ Form_pg_index index = (Form_pg_index) GETSTRUCT(indexTuple);
+ if (!index->indislive)
+ continue;
+
+ /* Skip invalid, exclusion index or deferred index */
+ if (!index->indisvalid || index->indisexclusion || !index->indimmediate)
+ continue;
+
+ /* Skip expression index or predicate index */
+ if (!heap_attisnull(indexTuple, Anum_pg_index_indpred, NULL) ||
+ !heap_attisnull(indexTuple, Anum_pg_index_indexprs, NULL))
+ continue;
+
+ /* Extract the pg_index->indkey int2vector */
+ adatum = heap_getattr(indexTuple, Anum_pg_index_indkey,
+ RelationGetDescr(pg_index), &isNull);
+ if (isNull)
+ elog(ERROR, "null int2vector for index %u", index->indexrelid);
+
+ deconstruct_array_builtin(DatumGetArrayTypeP(adatum), INT2OID,
+ &keys,
+ NULL,
+ &nelems);
+
+ if (nelems != index->indnatts)
+ elog(ERROR, "corrupted int2vector for index %u", index->indexrelid);
+
+ Assert(nelems >= index->indnkeyatts);
+
+ for (int i = 0; i < index->indnkeyatts; i++)
+ {
+ int attno = DatumGetInt16(keys[i]);
+
+ attnums = bms_add_member(attnums, attno);
+ }
+ }
+ systable_endscan(scan);
+
+ table_close(pg_index, AccessShareLock);
+ return attnums;
+}
\ No newline at end of file
diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c
index 9c05a98d28..27f164ad75 100644
--- a/src/backend/catalog/pg_constraint.c
+++ b/src/backend/catalog/pg_constraint.c
@@ -826,6 +826,7 @@ RelationGetNotNullConstraints(Oid relid, bool cooked, bool include_noinh)
cooked->is_local = true;
cooked->inhcount = 0;
cooked->is_no_inherit = conForm->connoinherit;
+ cooked->already_validated = false;
notnulls = lappend(notnulls, cooked);
}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 1af2e2bffb..8608c57356 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -218,6 +218,7 @@ typedef struct NewConstraint
Oid refrelid; /* PK rel, if FOREIGN */
Oid refindid; /* OID of PK's index, if FOREIGN */
bool conwithperiod; /* Whether the new FOREIGN KEY uses PERIOD */
+ bool already_validated; /* already validated for check constraint */
Oid conid; /* OID of pg_constraint entry, if FOREIGN */
Node *qual; /* Check expr or CONSTR_FOREIGN Constraint */
ExprState *qualstate; /* Execution state for CHECK expr */
@@ -977,6 +978,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
cooked->is_local = true; /* not used for defaults */
cooked->inhcount = 0; /* ditto */
cooked->is_no_inherit = false;
+ cooked->already_validated = false;
cookedDefaults = lappend(cookedDefaults, cooked);
attr->atthasdef = true;
}
@@ -6093,6 +6095,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
{
case CONSTR_CHECK:
needscan = true;
+ if(con->already_validated)
+ needscan = false;
con->qualstate = ExecPrepareExpr((Expr *) con->qual, estate);
break;
case CONSTR_FOREIGN:
@@ -9597,7 +9601,7 @@ ATAddCheckNNConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
newcon->name = ccon->name;
newcon->contype = ccon->contype;
newcon->qual = ccon->expr;
-
+ newcon->already_validated = ccon->already_validated;
tab->constraints = lappend(tab->constraints, newcon);
}
@@ -10718,6 +10722,7 @@ addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint, Relation rel,
newcon->refindid = indexOid;
newcon->conid = parentConstr;
newcon->conwithperiod = fkconstraint->fk_with_period;
+ newcon->already_validated = false;
newcon->qual = (Node *) fkconstraint;
tab->constraints = lappend(tab->constraints, newcon);
@@ -12027,6 +12032,7 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
newcon->refindid = con->conindid;
newcon->conid = con->oid;
newcon->qual = (Node *) fkconstraint;
+ newcon->already_validated = false;
/* Find or create work queue entry for this table */
tab = ATGetQueueEntry(wqueue, rel);
@@ -12095,6 +12101,7 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
newcon->refrelid = InvalidOid;
newcon->refindid = InvalidOid;
newcon->conid = con->oid;
+ newcon->already_validated = false;
val = SysCacheGetAttrNotNull(CONSTROID, tuple,
Anum_pg_constraint_conbin);
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index 8c278f202b..766ff00e6f 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -45,6 +45,7 @@ typedef struct CookedConstraint
int16 inhcount; /* number of times constraint is inherited */
bool is_no_inherit; /* constraint has local def and cannot be
* inherited */
+ bool already_validated; /* already validated for check constraint */
} CookedConstraint;
extern Relation heap_create(const char *relname,
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index 2dea96f47c..a4386a6f52 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -175,6 +175,7 @@ extern void RestoreReindexState(const void *reindexstate);
extern void IndexSetParentIndex(Relation partitionIdx, Oid parentOid);
+extern Bitmapset *relation_get_indexattnums(Relation rel);
/*
* itemptr_encode - Encode ItemPointer as int64/int8
diff --git a/src/test/regress/expected/constraints.out b/src/test/regress/expected/constraints.out
index 71200c90ed..5504bd7edd 100644
--- a/src/test/regress/expected/constraints.out
+++ b/src/test/regress/expected/constraints.out
@@ -112,6 +112,26 @@ SELECT * from CHECK2_TBL;
7 | check ok | 7
(2 rows)
+create table t1(a int not null, b int not null, c bool not null, d int[] not null);
+create index t1_idx_a on t1(a);
+create index t1_idx_b on t1(b);
+create index t1_idx_c on t1(c);
+create index t1_idx_d on t1(d);
+--empty table, can be optimized
+alter table t1 add constraint nc check(a>0);
+insert into t1 values(1,2, true, '{1,2}');
+insert into t1 values(1,2, true, '{1,3,4}');
+insert into t1 values(11,2, true, '{1,3,5}');
+alter table t1 add check(a>0);
+alter table t1 add check(a <> 12);
+alter table t1 add check(a>0 and a < 12);
+alter table t1 add check((a>0 and a < 12) or (a < 13 or a > -1) or b = 2);
+alter table t1 add check((a>0 and a < 12) or (d = '{13,12}'));
+alter table t1 add check((a = any('{1,11}')));
+alter table t1 add check((b = all('{2}')));
+--cannot optimzied for now.
+alter table t1 add check((d[1] = 1));
+drop table t1;
--
-- Check constraints on INSERT
--
diff --git a/src/test/regress/sql/constraints.sql b/src/test/regress/sql/constraints.sql
index e607eb1fdd..87e237f50d 100644
--- a/src/test/regress/sql/constraints.sql
+++ b/src/test/regress/sql/constraints.sql
@@ -82,6 +82,28 @@ INSERT INTO CHECK2_TBL VALUES (7, 'check ok', 7);
SELECT * from CHECK2_TBL;
+create table t1(a int not null, b int not null, c bool not null, d int[] not null);
+create index t1_idx_a on t1(a);
+create index t1_idx_b on t1(b);
+create index t1_idx_c on t1(c);
+create index t1_idx_d on t1(d);
+--empty table, can be optimized
+alter table t1 add constraint nc check(a>0);
+insert into t1 values(1,2, true, '{1,2}');
+insert into t1 values(1,2, true, '{1,3,4}');
+insert into t1 values(11,2, true, '{1,3,5}');
+
+alter table t1 add check(a>0);
+alter table t1 add check(a <> 12);
+alter table t1 add check(a>0 and a < 12);
+alter table t1 add check((a>0 and a < 12) or (a < 13 or a > -1) or b = 2);
+alter table t1 add check((a>0 and a < 12) or (d = '{13,12}'));
+alter table t1 add check((a = any('{1,11}')));
+alter table t1 add check((b = all('{2}')));
+--cannot optimzied for now.
+alter table t1 add check((d[1] = 1));
+drop table t1;
+
--
-- Check constraints on INSERT
--
--
2.34.1