I missed a case when column collation and partition key collation are the same and indeterministic. that should be fine for partition-wise join. so v2 attached.
have_partkey_equi_join, match_expr_to_partition_keys didn't do any collation related check. propose v2 change disallow partitionwise join for case when column collation is indeterministic *and* is differ from partition key's collation. the attached partition_wise_join_collation.sql is the test script. you may use it to compare with the master behavior.
partition_wise_join_collation.sql
Description: application/sql
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index d7266e4cdb..428751b05f 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -74,7 +74,7 @@ static bool have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *rel1, RelOptInfo *rel2,
JoinType jointype, List *restrictlist);
static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel,
- bool strict_op);
+ bool strict_op, bool *coll_incompatiable);
static void set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
RelOptInfo *outer_rel, RelOptInfo *inner_rel,
JoinType jointype);
@@ -2104,6 +2104,7 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
Expr *expr1;
Expr *expr2;
bool strict_op;
+ bool coll_incompatiable = false;
int ipk1;
int ipk2;
@@ -2167,10 +2168,11 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
* Only clauses referencing the partition keys are useful for
* partitionwise join.
*/
- ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op);
+ ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op, &coll_incompatiable);
if (ipk1 < 0)
continue;
- ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op);
+
+ ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op, &coll_incompatiable);
if (ipk2 < 0)
continue;
@@ -2181,6 +2183,15 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
if (ipk1 != ipk2)
continue;
+ /*
+ * we generally assume parttion key and expr's collation are fine for
+ * partition-wise join. forgidden case is column collation is
+ * indeterministic and partition key's collation not same as column's.
+ * see match_expr_to_partition_keys also.
+ */
+ if (coll_incompatiable)
+ return false;
+
/* Ignore clause if we already proved these keys equal. */
if (pk_known_equal[ipk1])
continue;
@@ -2296,9 +2307,15 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
* strict_op must be true if the expression will be compared with the
* partition key using a strict operator. This allows us to consider
* nullable as well as nonnullable partition keys.
+ * if exprCollation(expr) is inderministic also not equal to partcollation,
+ * that means same value with different apperances can live in different
+ * partition, coll_incompatiable return set to true. In that case, we cannot do
+ * partition-wise join. we are OK with expression's collation same as partition
+ * key's even though they are indeterministic.
+ *
*/
static int
-match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
+match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op, bool *coll_incompatiable)
{
int cnt;
@@ -2315,11 +2332,22 @@ match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
{
ListCell *lc;
+ Oid partcoll = rel->part_scheme->partcollation[cnt];
+
/* We can always match to the non-nullable partition keys. */
foreach(lc, rel->partexprs[cnt])
{
if (equal(lfirst(lc), expr))
+ {
+ Oid colloid = exprCollation((Node *) expr);
+
+ if ((partcoll != colloid) &&
+ OidIsValid(colloid) &&
+ !get_collation_isdeterministic(colloid))
+ *coll_incompatiable = true;
+
return cnt;
+ }
}
if (!strict_op)
@@ -2335,7 +2363,15 @@ match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
foreach(lc, rel->nullable_partexprs[cnt])
{
if (equal(lfirst(lc), expr))
+ {
+ Oid colloid = exprCollation((Node *) expr);
+
+ if ((partcoll != colloid) &&
+ OidIsValid(colloid) &&
+ !get_collation_isdeterministic(colloid))
+ *coll_incompatiable = true;
return cnt;
+ }
}
}
