Per an off-list report from Olaf Gawenda (thanks Olaf), it seems that the
range partition's constraint is sometimes incorrect, at least in the case
of multi-column range partitioning. See below:
create table p (a int, b int) partition by range (a, b);
create table p1 partition of p for values from (1, 1) to (10 ,10);
create table p2 partition of p for values from (11, 1) to (20, 10);
Perhaps unusual, but it's still a valid definition. Tuple-routing puts
rows where they belong correctly.
-- ok
insert into p values (10, 9);
select tableoid::regclass, * from p;
tableoid | a | b
----------+----+---
p1 | 10 | 9
(1 row)
-- but see this
select tableoid::regclass, * from p where a = 10;
tableoid | a | b
----------+---+---
(0 rows)
explain select tableoid::regclass, * from p where a = 10;
QUERY PLAN
-------------------------------------------
Result (cost=0.00..0.00 rows=0 width=12)
One-Time Filter: false
(2 rows)
-- or this
insert into p1 values (10, 9);
ERROR: new row for relation "p1" violates partition constraint
DETAIL: Failing row contains (10, 9).
This is because of the constraint being generated is not correct in this
case. p1's constraint is currently:
a >= 1 and a < 10
where it should really be the following:
(a > 1 OR (a = 1 AND b >= 1))
AND
(a < 10 OR (a = 10 AND b < 10))
Attached patch rewrites get_qual_for_range() for the same, along with some
code rearrangement for reuse. I also added some new tests to insert.sql
and inherit.sql, but wondered (maybe, too late now) whether there should
really be a declarative_partition.sql for these, moving in some of the old
tests too.
Adding to the open items list.
Thanks,
Amit
PS: due to vacation, I won't be able to reply promptly until Monday 05/08.
>From a252cd21c2342b0d293e0618e3eab4f998f746aa Mon Sep 17 00:00:00 2001
From: amit <[email protected]>
Date: Tue, 2 May 2017 11:03:54 +0900
Subject: [PATCH] Emit "correct" range partition constraint expression
Currently emitted expression does not always describe the constraint
correctly, especially in the case of multi-column range key.
Code surrounding get_qual_for_*() has been rearranged a little to
move common code to a couple of new functions.
Original issue reported by Olaf Gawenda ([email protected])
---
src/backend/catalog/partition.c | 616 ++++++++++++++++++++++------------
src/include/nodes/pg_list.h | 10 +
src/test/regress/expected/inherit.out | 90 +++++
src/test/regress/expected/insert.out | 59 ++++
src/test/regress/sql/inherit.sql | 18 +
src/test/regress/sql/insert.sql | 43 +++
6 files changed, 615 insertions(+), 221 deletions(-)
diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
index e0d2665a91..22c55726f0 100644
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -118,10 +118,19 @@ static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
void *arg);
-static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec);
-static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec);
static Oid get_partition_operator(PartitionKey key, int col,
StrategyNumber strategy, bool *need_relabel);
+static Expr* make_partition_op_expr(PartitionKey key, int keynum,
+ uint16 strategy, Expr *arg1, Expr *arg2);
+static void get_range_key_properties(PartitionKey key, int keynum,
+ PartitionRangeDatum *ldatum,
+ PartitionRangeDatum *udatum,
+ ListCell **partexprs_item,
+ Expr **keyCol,
+ Const **lower_val, Const **upper_val,
+ NullTest **nulltest);
+static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec);
+static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec);
static List *generate_partition_qual(Relation rel);
static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index,
@@ -1146,6 +1155,119 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
/* Module-local functions */
/*
+ * get_partition_operator
+ *
+ * Return oid of the operator of given strategy for a given partition key
+ * column.
+ */
+static Oid
+get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
+ bool *need_relabel)
+{
+ Oid operoid;
+
+ /*
+ * First check if there exists an operator of the given strategy, with
+ * this column's type as both its lefttype and righttype, in the
+ * partitioning operator family specified for the column.
+ */
+ operoid = get_opfamily_member(key->partopfamily[col],
+ key->parttypid[col],
+ key->parttypid[col],
+ strategy);
+
+ /*
+ * If one doesn't exist, we must resort to using an operator in the same
+ * opreator family but with the operator class declared input type. It is
+ * OK to do so, because the column's type is known to be binary-coercible
+ * with the operator class input type (otherwise, the operator class in
+ * question would not have been accepted as the partitioning operator
+ * class). We must however inform the caller to wrap the non-Const
+ * expression with a RelabelType node to denote the implicit coercion. It
+ * ensures that the resulting expression structurally matches similarly
+ * processed expressions within the optimizer.
+ */
+ if (!OidIsValid(operoid))
+ {
+ operoid = get_opfamily_member(key->partopfamily[col],
+ key->partopcintype[col],
+ key->partopcintype[col],
+ strategy);
+ *need_relabel = true;
+ }
+ else
+ *need_relabel = false;
+
+ if (!OidIsValid(operoid))
+ elog(ERROR, "could not find operator for partitioning");
+
+ return operoid;
+}
+
+/*
+ * make_partition_op_expr
+ * Returns an Expr for the given partition key column with arg1 and
+ * arg2 as its leftop and rightop, respectively
+ */
+static Expr*
+make_partition_op_expr(PartitionKey key, int keynum,
+ uint16 strategy, Expr *arg1, Expr *arg2)
+{
+ Oid operoid;
+ bool need_relabel = false;
+ Expr *result;
+
+ /* Get the correct btree operator for this partitioning column */
+ operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
+
+ /*
+ * Chosen operator may be such that the non-Const operand needs to
+ * be coerced, so apply the same; see the comment in
+ * get_partition_operator().
+ */
+ if (!IsA(arg1, Const) &&
+ (need_relabel ||
+ key->partcollation[keynum] != key->parttypcoll[keynum]))
+ arg1 = (Expr *) makeRelabelType(arg1,
+ key->partopcintype[keynum],
+ -1,
+ key->partcollation[keynum],
+ COERCE_EXPLICIT_CAST);
+
+ /* Generate the actual expression */
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_LIST:
+ {
+ ScalarArrayOpExpr *saopexpr;
+
+ /* Build leftop = ANY (rightop) */
+ saopexpr = makeNode(ScalarArrayOpExpr);
+ saopexpr->opno = operoid;
+ saopexpr->opfuncid = get_opcode(operoid);
+ saopexpr->useOr = true;
+ saopexpr->inputcollid = key->partcollation[0];
+ saopexpr->args = list_make2(arg1, arg2);
+ saopexpr->location = -1;
+
+ result = (Expr *) saopexpr;
+ break;
+ }
+
+ case PARTITION_STRATEGY_RANGE:
+ result = make_opclause(operoid,
+ BOOLOID,
+ false,
+ arg1, arg2,
+ InvalidOid,
+ key->partcollation[keynum]);
+ break;
+ }
+
+ return result;
+}
+
+/*
* get_qual_for_list
*
* Returns a list of expressions to use as a list partition's constraint.
@@ -1155,14 +1277,12 @@ get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
{
List *result;
ArrayExpr *arr;
- ScalarArrayOpExpr *opexpr;
+ Expr *opexpr;
ListCell *cell,
*prev,
*next;
Expr *keyCol;
- Oid operoid;
- bool need_relabel,
- list_has_null = false;
+ bool list_has_null = false;
NullTest *nulltest1 = NULL,
*nulltest2 = NULL;
@@ -1233,24 +1353,9 @@ get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
arr->multidims = false;
arr->location = -1;
- /* Get the correct btree equality operator */
- operoid = get_partition_operator(key, 0, BTEqualStrategyNumber,
- &need_relabel);
- if (need_relabel || key->partcollation[0] != key->parttypcoll[0])
- keyCol = (Expr *) makeRelabelType(keyCol,
- key->partopcintype[0],
- -1,
- key->partcollation[0],
- COERCE_EXPLICIT_CAST);
-
- /* Build leftop = ANY (rightop) */
- opexpr = makeNode(ScalarArrayOpExpr);
- opexpr->opno = operoid;
- opexpr->opfuncid = get_opcode(operoid);
- opexpr->useOr = true;
- opexpr->inputcollid = key->partcollation[0];
- opexpr->args = list_make2(keyCol, arr);
- opexpr->location = -1;
+ /* Generate the main expression, i.e., keyCol = ANY (arr) */
+ opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber,
+ keyCol, (Expr *) arr);
if (nulltest1)
result = list_make2(nulltest1, opexpr);
@@ -1268,9 +1373,95 @@ get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
}
/*
+ * get_range_key_properties
+ * Returns range partition key information for a given column
+ *
+ * On return, *lower_val and *upper_val contain either a valid Const
+ * expression or NULL. *nulltest contains a NullTest expression, or
+ * NULL.
+ */
+static void
+get_range_key_properties(PartitionKey key, int keynum,
+ PartitionRangeDatum *ldatum,
+ PartitionRangeDatum *udatum,
+ ListCell **partexprs_item,
+ Expr **keyCol,
+ Const **lower_val, Const **upper_val,
+ NullTest **nulltest)
+{
+ /* Partition key expression for this column */
+ if (key->partattrs[keynum] != 0)
+ {
+ *keyCol = (Expr *) makeVar(1,
+ key->partattrs[keynum],
+ key->parttypid[keynum],
+ key->parttypmod[keynum],
+ key->parttypcoll[keynum],
+ 0);
+ }
+ else
+ {
+ *keyCol = copyObject(lfirst(*partexprs_item));
+ *partexprs_item = lnext(*partexprs_item);
+ }
+
+ /*
+ * A range-partitioned table does not allow partition keys to be null.
+ * For simple columns, their NOT NULL constraint suffices for the
+ * enforcement of non-nullability. But for the expression keys, which
+ * are still nullable, we must emit a IS NOT NULL expression.
+ */
+ if (!IsA(*keyCol, Var))
+ {
+ *nulltest = makeNode(NullTest);
+ (*nulltest)->arg = *keyCol;
+ (*nulltest)->nulltesttype = IS_NOT_NULL;
+ (*nulltest)->argisrow = false;
+ (*nulltest)->location = -1;
+ }
+ else
+ *nulltest = NULL;
+
+ if (!ldatum->infinite)
+ *lower_val = (Const *) ldatum->value;
+ else
+ *lower_val = NULL;
+
+ if (!udatum->infinite)
+ *upper_val = (Const *) udatum->value;
+ else
+ *upper_val = NULL;
+}
+
+/*
* get_qual_for_range
*
- * Get a list of OpExpr's to use as a range partition's constraint.
+ * Get a list of expressions to use as a range partition's constraint.
+ * Expressions in the list are implicitly ANDed.
+ *
+ * For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
+ * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
+ * generate an expression tree of the following form:
+ *
+ * (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
+ * AND
+ * (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
+ *
+ * If b were an expression key instead of a simple column, we also append
+ * (b IS NOT NULL) to the AND's argument list.
+ *
+ * It is often the case that a prefix of lower and upper bound tuples contains
+ * the same values, for example, (al = au), in which case, we will emit an
+ * expression tree of the following form:
+ *
+ * (a = al)
+ * AND
+ * (b > bl OR (b = bl AND c >= cl))
+ * AND
+ * (b < bu) OR (b = bu AND c < cu))
+ *
+ * In most common cases with only one partition column, say a, the following
+ * expression tree will be generated: a >= al AND a < au
*/
static List *
get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
@@ -1278,239 +1469,222 @@ get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
List *result = NIL;
ListCell *cell1,
*cell2,
- *partexprs_item;
- int i;
+ *partexprs_item,
+ partexprs_item_saved;
+ int i,
+ j;
+ PartitionRangeDatum *ldatum,
+ *udatum;
+ Expr *keyCol;
+ Const *lower_val,
+ *upper_val;
+ NullTest *nulltest;
+ List *lower_or_arms,
+ *upper_or_arms;
+ int maxnum_or_arms,
+ num_or_arms;
+ ListCell *lower_or_start_datum,
+ *upper_or_start_datum;
+
+ lower_or_start_datum = list_head(spec->lowerdatums);
+ upper_or_start_datum = list_head(spec->upperdatums);
+ maxnum_or_arms = key->partnatts;
/*
- * Iterate over columns of the key, emitting an OpExpr for each using the
- * corresponding lower and upper datums as constant operands.
+ * Iterate over the key columns and check if the corresponding lower and
+ * upper datums are equal using the btree equality operator for the
+ * column's type. If equal, we emit single keyCol = common_value
+ * expression. Starting from the first column for which the corresponding
+ * lower and upper bound datums are not equal, we generate OR expressions
+ * as shown in the function's header comment.
*/
i = 0;
partexprs_item = list_head(key->partexprs);
forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
{
- PartitionRangeDatum *ldatum = lfirst(cell1),
- *udatum = lfirst(cell2);
- Expr *keyCol;
- Const *lower_val = NULL,
- *upper_val = NULL;
EState *estate;
MemoryContext oldcxt;
Expr *test_expr;
ExprState *test_exprstate;
Datum test_result;
bool isNull;
- bool need_relabel = false;
- Oid operoid;
- NullTest *nulltest;
- /* Left operand */
- if (key->partattrs[i] != 0)
- {
- keyCol = (Expr *) makeVar(1,
- key->partattrs[i],
- key->parttypid[i],
- key->parttypmod[i],
- key->parttypcoll[i],
- 0);
- }
- else
- {
- keyCol = copyObject(lfirst(partexprs_item));
- partexprs_item = lnext(partexprs_item);
- }
+ ldatum = lfirst(cell1);
+ udatum = lfirst(cell2);
/*
- * Emit a IS NOT NULL expression for non-Var keys, because whereas
- * simple attributes are covered by NOT NULL constraints, expression
- * keys are still nullable which is not acceptable in case of range
- * partitioning.
+ * Since get_range_key_properties() modifies partexprs_item, and we
+ * might need to start over from the previous expression in the
+ * later part of this functiom, save away the current value.
*/
- if (!IsA(keyCol, Var))
- {
- nulltest = makeNode(NullTest);
- nulltest->arg = keyCol;
- nulltest->nulltesttype = IS_NOT_NULL;
- nulltest->argisrow = false;
- nulltest->location = -1;
+ if (partexprs_item)
+ partexprs_item_saved = *partexprs_item;
+ get_range_key_properties(key, i, ldatum, udatum,
+ &partexprs_item,
+ &keyCol,
+ &lower_val, &upper_val,
+ &nulltest);
+
+ if (nulltest)
result = lappend(result, nulltest);
- }
/*
- * Stop at this column if either of lower or upper datum is infinite,
- * but do emit an OpExpr for the non-infinite datum.
+ * If either or both of lower_val and upper_val is NULL, they are
+ * unequal.
*/
- if (!ldatum->infinite)
- lower_val = (Const *) ldatum->value;
- if (!udatum->infinite)
- upper_val = (Const *) udatum->value;
+ if (!lower_val || !upper_val)
+ break;
+
+ /* Create the test expression */
+ estate = CreateExecutorState();
+ oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
+ test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber,
+ (Expr *) lower_val,
+ (Expr *) upper_val);
+ fix_opfuncids((Node *) test_expr);
+ test_exprstate = ExecInitExpr(test_expr, NULL);
+ test_result = ExecEvalExprSwitchContext(test_exprstate,
+ GetPerTupleExprContext(estate),
+ &isNull);
+ MemoryContextSwitchTo(oldcxt);
+ FreeExecutorState(estate);
+
+ /* If not equal, go generate the OR expressions */
+ if (!DatumGetBool(test_result))
+ break;
+
+ /* Equal, so generate keyCol = lower_val expression */
/*
- * If lower_val and upper_val are both finite and happen to be equal,
- * emit only (keyCol = lower_val) for this column, because all rows in
- * this partition could only ever contain this value (ie, lower_val)
- * in the current partitioning column. We must consider further
- * columns because the above condition does not fully constrain the
- * rows of this partition.
+ * This can never be true for the last key column, but it's
+ * better to make sure. This is because such a range
+ * partition would never be allowed to be defined (it would
+ * have an empty range otherwise).
*/
- if (lower_val && upper_val)
- {
- /* Get the correct btree equality operator for the test */
- operoid = get_partition_operator(key, i, BTEqualStrategyNumber,
- &need_relabel);
-
- /* Create the test expression */
- estate = CreateExecutorState();
- oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
- test_expr = make_opclause(operoid,
- BOOLOID,
- false,
- (Expr *) lower_val,
- (Expr *) upper_val,
- InvalidOid,
- key->partcollation[i]);
- fix_opfuncids((Node *) test_expr);
- test_exprstate = ExecInitExpr(test_expr, NULL);
- test_result = ExecEvalExprSwitchContext(test_exprstate,
- GetPerTupleExprContext(estate),
- &isNull);
- MemoryContextSwitchTo(oldcxt);
- FreeExecutorState(estate);
+ if (i == key->partnatts - 1)
+ elog(ERROR, "invalid range bound specification");
+
+ result = lappend(result,
+ make_partition_op_expr(key, i, BTEqualStrategyNumber,
+ keyCol, (Expr *) lower_val));
+
+ i++;
+ }
+
+ /* First pair of lower_val and upper_val that are not equal. */
+ lower_or_start_datum = cell1;
+ upper_or_start_datum = cell2;
- if (DatumGetBool(test_result))
+ /* OR will have as many arms as there are key columns left. */
+ maxnum_or_arms = key->partnatts - i;
+
+ num_or_arms = 0;
+ lower_or_arms = upper_or_arms = NIL;
+ while (num_or_arms < maxnum_or_arms)
+ {
+ List *lower_or_arm_args = NIL,
+ *upper_or_arm_args = NIL;
+
+ j = i;
+ partexprs_item = &partexprs_item_saved;
+ for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum)
+ {
+ ldatum = lfirst(cell1);
+ udatum = lfirst(cell2);
+ get_range_key_properties(key, j, ldatum, udatum,
+ &partexprs_item,
+ &keyCol,
+ &lower_val, &upper_val,
+ &nulltest);
+
+ if (nulltest)
+ result = lappend(result, nulltest);
+
+ if (lower_val)
{
- /* This can never be, but it's better to make sure */
- if (i == key->partnatts - 1)
- elog(ERROR, "invalid range bound specification");
-
- if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
- keyCol = (Expr *) makeRelabelType(keyCol,
- key->partopcintype[i],
- -1,
- key->partcollation[i],
- COERCE_EXPLICIT_CAST);
- result = lappend(result,
- make_opclause(operoid,
- BOOLOID,
- false,
- keyCol,
- (Expr *) lower_val,
- InvalidOid,
- key->partcollation[i]));
-
- /* Go over to consider the next column. */
- i++;
- continue;
+ uint16 strategy;
+
+ /*
+ * For the non-last columns of this arm, use the equality
+ * operator.
+ */
+ if (j - i < num_or_arms)
+ strategy = BTEqualStrategyNumber;
+ else
+ /* Consider that the lower bound is inclusive */
+ strategy = (j == key->partnatts - 1)
+ ? BTGreaterEqualStrategyNumber
+ : BTGreaterStrategyNumber;
+
+ lower_or_arm_args = lappend(lower_or_arm_args,
+ make_partition_op_expr(key, j,
+ strategy,
+ keyCol,
+ (Expr *) lower_val));
}
- }
- /*
- * We can say here that lower_val != upper_val. Emit expressions
- * (keyCol >= lower_val) and (keyCol < upper_val), then stop.
- */
- if (lower_val)
- {
- operoid = get_partition_operator(key, i,
- BTGreaterEqualStrategyNumber,
- &need_relabel);
-
- if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
- keyCol = (Expr *) makeRelabelType(keyCol,
- key->partopcintype[i],
- -1,
- key->partcollation[i],
- COERCE_EXPLICIT_CAST);
- result = lappend(result,
- make_opclause(operoid,
- BOOLOID,
- false,
- keyCol,
- (Expr *) lower_val,
- InvalidOid,
- key->partcollation[i]));
- }
+ if (upper_val)
+ {
+ uint16 strategy;
- if (upper_val)
- {
- operoid = get_partition_operator(key, i,
- BTLessStrategyNumber,
- &need_relabel);
-
- if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
- keyCol = (Expr *) makeRelabelType(keyCol,
- key->partopcintype[i],
- -1,
- key->partcollation[i],
- COERCE_EXPLICIT_CAST);
-
- result = lappend(result,
- make_opclause(operoid,
- BOOLOID,
- false,
- keyCol,
- (Expr *) upper_val,
- InvalidOid,
- key->partcollation[i]));
- }
+ /*
+ * For the non-last columns of this arm, use the equality
+ * operator.
+ */
+ if (j - i < num_or_arms)
+ strategy = BTEqualStrategyNumber;
+ else
+ strategy = BTLessStrategyNumber;
- /*
- * We can stop at this column, because we would not have checked the
- * next column when routing a given row into this partition.
- */
- break;
- }
+ upper_or_arm_args = lappend(upper_or_arm_args,
+ make_partition_op_expr(key, j,
+ strategy,
+ keyCol,
+ (Expr *) upper_val));
- return result;
-}
+ }
-/*
- * get_partition_operator
- *
- * Return oid of the operator of given strategy for a given partition key
- * column.
- */
-static Oid
-get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
- bool *need_relabel)
-{
- Oid operoid;
+ /* Did we generate enough of OR's arguments? */
+ ++j;
+ if (j - i > num_or_arms)
+ break;
+ }
- /*
- * First check if there exists an operator of the given strategy, with
- * this column's type as both its lefttype and righttype, in the
- * partitioning operator family specified for the column.
- */
- operoid = get_opfamily_member(key->partopfamily[col],
- key->parttypid[col],
- key->parttypid[col],
- strategy);
+ /* One arm of the OR expression for each of lower and upper bounds */
+ if (lower_or_arm_args != NIL)
+ lower_or_arms = lappend(lower_or_arms,
+ list_length(lower_or_arm_args) > 1
+ ? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
+ : linitial(lower_or_arm_args));
+
+ if (upper_or_arm_args != NIL)
+ upper_or_arms = lappend(upper_or_arms,
+ list_length(upper_or_arm_args) > 1
+ ? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
+ : linitial(upper_or_arm_args));
+ ++num_or_arms;
+ }
/*
- * If one doesn't exist, we must resort to using an operator in the same
- * opreator family but with the operator class declared input type. It is
- * OK to do so, because the column's type is known to be binary-coercible
- * with the operator class input type (otherwise, the operator class in
- * question would not have been accepted as the partitioning operator
- * class). We must however inform the caller to wrap the non-Const
- * expression with a RelabelType node to denote the implicit coercion. It
- * ensures that the resulting expression structurally matches similarly
- * processed expressions within the optimizer.
+ * Generate the OR expressions for each of lower and upper bounds (if
+ * required), and append to the list of implicitly ANDed list of
+ * expressions.
+ *
+ * At least one of lower and upper bound constraint should be present.
*/
- if (!OidIsValid(operoid))
- {
- operoid = get_opfamily_member(key->partopfamily[col],
- key->partopcintype[col],
- key->partopcintype[col],
- strategy);
- *need_relabel = true;
- }
- else
- *need_relabel = false;
-
- if (!OidIsValid(operoid))
- elog(ERROR, "could not find operator for partitioning");
+ Assert(lower_or_arms != NIL || upper_or_arms != NIL);
+ if (lower_or_arms != NIL)
+ result = lappend(result,
+ list_length(lower_or_arms) > 1
+ ? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
+ : linitial(lower_or_arms));
+ if (upper_or_arms != NIL)
+ result = lappend(result,
+ list_length(upper_or_arms) > 1
+ ? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
+ : linitial(upper_or_arms));
- return operoid;
+ return result;
}
/*
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 9df7fb30d3..011054f745 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -183,6 +183,16 @@ list_length(const List *l)
(cell1) = lnext(cell1), (cell2) = lnext(cell2))
/*
+ * for_both_cell -
+ * a convenience macro which loops through two lists starting from the
+ * specified cells of each
+ */
+#define for_both_cell(cell1, initcell1, cell2, initcell2) \
+ for ((cell1) = (initcell1), (cell2) = (initcell2); \
+ (cell1) != NULL && (cell2) != NULL; \
+ (cell1) = lnext(cell1), (cell2) = lnext(cell2))
+
+/*
* forthree -
* the same for three lists
*/
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index 6163ed8117..af7090ba0d 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -1828,3 +1828,93 @@ explain (costs off) select * from range_list_parted where a >= 30;
drop table list_parted;
drop table range_list_parted;
+-- check that constraint exclusion is able to cope with the partition
+-- constraint emitted for multi-column range partitioned tables
+create table mcrparted (a int, b int, c int) partition by range (a, abs(b), c);
+create table mcrparted0 partition of mcrparted for values from (unbounded, unbounded, unbounded) to (1, 1, 1);
+create table mcrparted1 partition of mcrparted for values from (1, 1, 1) to (10, 5, 10);
+create table mcrparted2 partition of mcrparted for values from (10, 5, 10) to (10, 10, 10);
+create table mcrparted3 partition of mcrparted for values from (11, 1, 1) to (20, 10, 10);
+create table mcrparted4 partition of mcrparted for values from (20, 10, 10) to (20, 20, 20);
+create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (unbounded, unbounded, unbounded);
+explain (costs off) select * from mcrparted where a = 0; -- scans mcrparted0
+ QUERY PLAN
+------------------------------
+ Append
+ -> Seq Scan on mcrparted0
+ Filter: (a = 0)
+(3 rows)
+
+explain (costs off) select * from mcrparted where a = 10 and abs(b) < 5; -- scans mcrparted1
+ QUERY PLAN
+---------------------------------------------
+ Append
+ -> Seq Scan on mcrparted1
+ Filter: ((a = 10) AND (abs(b) < 5))
+(3 rows)
+
+explain (costs off) select * from mcrparted where a = 10 and abs(b) = 5; -- scans mcrparted1, mcrparted2
+ QUERY PLAN
+---------------------------------------------
+ Append
+ -> Seq Scan on mcrparted1
+ Filter: ((a = 10) AND (abs(b) = 5))
+ -> Seq Scan on mcrparted2
+ Filter: ((a = 10) AND (abs(b) = 5))
+(5 rows)
+
+explain (costs off) select * from mcrparted where abs(b) = 5; -- scans all partitions
+ QUERY PLAN
+------------------------------
+ Append
+ -> Seq Scan on mcrparted0
+ Filter: (abs(b) = 5)
+ -> Seq Scan on mcrparted1
+ Filter: (abs(b) = 5)
+ -> Seq Scan on mcrparted2
+ Filter: (abs(b) = 5)
+ -> Seq Scan on mcrparted3
+ Filter: (abs(b) = 5)
+ -> Seq Scan on mcrparted5
+ Filter: (abs(b) = 5)
+(11 rows)
+
+explain (costs off) select * from mcrparted where a > -1; -- scans all partitions
+ QUERY PLAN
+-------------------------------------
+ Append
+ -> Seq Scan on mcrparted0
+ Filter: (a > '-1'::integer)
+ -> Seq Scan on mcrparted1
+ Filter: (a > '-1'::integer)
+ -> Seq Scan on mcrparted2
+ Filter: (a > '-1'::integer)
+ -> Seq Scan on mcrparted3
+ Filter: (a > '-1'::integer)
+ -> Seq Scan on mcrparted4
+ Filter: (a > '-1'::integer)
+ -> Seq Scan on mcrparted5
+ Filter: (a > '-1'::integer)
+(13 rows)
+
+explain (costs off) select * from mcrparted where a = 20 and abs(b) = 10 and c > 10; -- scans mcrparted4
+ QUERY PLAN
+-----------------------------------------------------------
+ Append
+ -> Seq Scan on mcrparted4
+ Filter: ((c > 10) AND (a = 20) AND (abs(b) = 10))
+(3 rows)
+
+explain (costs off) select * from mcrparted where a = 20 and c > 20; -- scans mcrparted3, mcrparte4, mcrparte5
+ QUERY PLAN
+-----------------------------------------
+ Append
+ -> Seq Scan on mcrparted3
+ Filter: ((c > 20) AND (a = 20))
+ -> Seq Scan on mcrparted4
+ Filter: ((c > 20) AND (a = 20))
+ -> Seq Scan on mcrparted5
+ Filter: ((c > 20) AND (a = 20))
+(7 rows)
+
+drop table mcrparted;
diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out
index 6f34b1c640..c68b31415d 100644
--- a/src/test/regress/expected/insert.out
+++ b/src/test/regress/expected/insert.out
@@ -435,3 +435,62 @@ revoke all on key_desc from someone_else;
revoke all on key_desc_1 from someone_else;
drop role someone_else;
drop table key_desc, key_desc_1;
+-- check multi-column range partitioning expression enforces the same
+-- constraint as what tuple-routing would determine it to be
+create table mcrparted (a int, b int, c int) partition by range (a, abs(b), c);
+create table mcrparted0 partition of mcrparted for values from (unbounded, unbounded, unbounded) to (1, 1, 1);
+create table mcrparted1 partition of mcrparted for values from (1, 1, 1) to (10, 5, 10);
+create table mcrparted2 partition of mcrparted for values from (10, 5, 10) to (10, 10, 10);
+create table mcrparted3 partition of mcrparted for values from (11, 1, 1) to (20, 10, 10);
+create table mcrparted4 partition of mcrparted for values from (20, 10, 10) to (20, 20, 20);
+create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (unbounded, unbounded, unbounded);
+-- routed to mcrparted0
+insert into mcrparted values (0, 1, 1);
+insert into mcrparted0 values (0, 1, 1);
+-- routed to mcparted1
+insert into mcrparted values (9, 1000, 1);
+insert into mcrparted1 values (9, 1000, 1);
+insert into mcrparted values (10, 5, 1);
+insert into mcrparted1 values (10, 5, 1);
+-- routed to mcparted2
+insert into mcrparted values (10, 6, 1000);
+insert into mcrparted2 values (10, 6, 1000);
+insert into mcrparted values (10, 10, 9);
+insert into mcrparted2 values (10, 10, 9);
+-- no partition exists, nor does mcrparted2 accept it
+insert into mcrparted values (10, 10, 1000);
+ERROR: no partition of relation "mcrparted" found for row
+DETAIL: Partition key of the failing row contains (a, abs(b), c) = (10, 10, 1000).
+insert into mcrparted2 values (10, 10, 1000);
+ERROR: new row for relation "mcrparted2" violates partition constraint
+DETAIL: Failing row contains (10, 10, 1000).
+-- nor does mcrparted3
+insert into mcrparted3 values (10, 10, 1000);
+ERROR: new row for relation "mcrparted3" violates partition constraint
+DETAIL: Failing row contains (10, 10, 1000).
+-- routed to mcrparted5
+insert into mcrparted values (20, 20, 20);
+insert into mcrparted5 values (20, 20, 20);
+insert into mcrparted4 values (20, 20, 20); -- error
+ERROR: new row for relation "mcrparted4" violates partition constraint
+DETAIL: Failing row contains (20, 20, 20).
+-- check rows
+select tableoid::regclass::text, * from mcrparted order by 1;
+ tableoid | a | b | c
+------------+----+------+------
+ mcrparted0 | 0 | 1 | 1
+ mcrparted0 | 0 | 1 | 1
+ mcrparted1 | 9 | 1000 | 1
+ mcrparted1 | 9 | 1000 | 1
+ mcrparted1 | 10 | 5 | 1
+ mcrparted1 | 10 | 5 | 1
+ mcrparted2 | 10 | 6 | 1000
+ mcrparted2 | 10 | 6 | 1000
+ mcrparted2 | 10 | 10 | 9
+ mcrparted2 | 10 | 10 | 9
+ mcrparted5 | 20 | 20 | 20
+ mcrparted5 | 20 | 20 | 20
+(12 rows)
+
+-- cleanup
+drop table mcrparted;
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
index d43b75c4a7..7f34f43ec0 100644
--- a/src/test/regress/sql/inherit.sql
+++ b/src/test/regress/sql/inherit.sql
@@ -643,3 +643,21 @@ explain (costs off) select * from range_list_parted where a >= 30;
drop table list_parted;
drop table range_list_parted;
+
+-- check that constraint exclusion is able to cope with the partition
+-- constraint emitted for multi-column range partitioned tables
+create table mcrparted (a int, b int, c int) partition by range (a, abs(b), c);
+create table mcrparted0 partition of mcrparted for values from (unbounded, unbounded, unbounded) to (1, 1, 1);
+create table mcrparted1 partition of mcrparted for values from (1, 1, 1) to (10, 5, 10);
+create table mcrparted2 partition of mcrparted for values from (10, 5, 10) to (10, 10, 10);
+create table mcrparted3 partition of mcrparted for values from (11, 1, 1) to (20, 10, 10);
+create table mcrparted4 partition of mcrparted for values from (20, 10, 10) to (20, 20, 20);
+create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (unbounded, unbounded, unbounded);
+explain (costs off) select * from mcrparted where a = 0; -- scans mcrparted0
+explain (costs off) select * from mcrparted where a = 10 and abs(b) < 5; -- scans mcrparted1
+explain (costs off) select * from mcrparted where a = 10 and abs(b) = 5; -- scans mcrparted1, mcrparted2
+explain (costs off) select * from mcrparted where abs(b) = 5; -- scans all partitions
+explain (costs off) select * from mcrparted where a > -1; -- scans all partitions
+explain (costs off) select * from mcrparted where a = 20 and abs(b) = 10 and c > 10; -- scans mcrparted4
+explain (costs off) select * from mcrparted where a = 20 and c > 20; -- scans mcrparted3, mcrparte4, mcrparte5
+drop table mcrparted;
diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql
index 020854c960..88509f9cce 100644
--- a/src/test/regress/sql/insert.sql
+++ b/src/test/regress/sql/insert.sql
@@ -289,3 +289,46 @@ revoke all on key_desc from someone_else;
revoke all on key_desc_1 from someone_else;
drop role someone_else;
drop table key_desc, key_desc_1;
+
+-- check multi-column range partitioning expression enforces the same
+-- constraint as what tuple-routing would determine it to be
+create table mcrparted (a int, b int, c int) partition by range (a, abs(b), c);
+create table mcrparted0 partition of mcrparted for values from (unbounded, unbounded, unbounded) to (1, 1, 1);
+create table mcrparted1 partition of mcrparted for values from (1, 1, 1) to (10, 5, 10);
+create table mcrparted2 partition of mcrparted for values from (10, 5, 10) to (10, 10, 10);
+create table mcrparted3 partition of mcrparted for values from (11, 1, 1) to (20, 10, 10);
+create table mcrparted4 partition of mcrparted for values from (20, 10, 10) to (20, 20, 20);
+create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (unbounded, unbounded, unbounded);
+
+-- routed to mcrparted0
+insert into mcrparted values (0, 1, 1);
+insert into mcrparted0 values (0, 1, 1);
+
+-- routed to mcparted1
+insert into mcrparted values (9, 1000, 1);
+insert into mcrparted1 values (9, 1000, 1);
+insert into mcrparted values (10, 5, 1);
+insert into mcrparted1 values (10, 5, 1);
+
+-- routed to mcparted2
+insert into mcrparted values (10, 6, 1000);
+insert into mcrparted2 values (10, 6, 1000);
+insert into mcrparted values (10, 10, 9);
+insert into mcrparted2 values (10, 10, 9);
+
+-- no partition exists, nor does mcrparted2 accept it
+insert into mcrparted values (10, 10, 1000);
+insert into mcrparted2 values (10, 10, 1000);
+-- nor does mcrparted3
+insert into mcrparted3 values (10, 10, 1000);
+
+-- routed to mcrparted5
+insert into mcrparted values (20, 20, 20);
+insert into mcrparted5 values (20, 20, 20);
+insert into mcrparted4 values (20, 20, 20); -- error
+
+-- check rows
+select tableoid::regclass::text, * from mcrparted order by 1;
+
+-- cleanup
+drop table mcrparted;
--
2.11.0
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers