I wrote:
> Andres Freund <[email protected]> writes:
>> On 2018-12-14 14:25:30 -0500, Tom Lane wrote:
>>> Now, it's certainly true that nameeq() doesn't need a collation spec
>>> today, any more than texteq() does, because both types legislate that
>>> equality is bitwise. But if we leave ExecBuildGroupingEqual like this,
>>> we're mandating that no type anywhere, ever, can have a
>>> collation-dependent notion of equality. Is that really a restriction
>>> we're comfortable with? citext is sort of the poster child here,
>>> because it already wishes it could have collation-dependent equality.
>> Don't we rely on that in other places too?
> Possibly; the regression tests probably don't stress type "name" too hard,
> so my Assert might well not be finding some other code paths that do
> likewise.
To investigate this a bit more, I added a similar Assert in texteq(),
and also grepped for places that were using FunctionCall2 to call
equality functions. The attached patch shows what I had to change
to get check-world to pass. It's possible that there are one or two
more places I missed --- two of these changes were found by grepping
but were *not* exposed by regression testing. But this ought to be
a pretty good indication of the scope of the problem.
There are a couple of places touched here that know they are invoking
texteq specifically, so we wouldn't really need to change them to have
a working feature. In all I found six places that we'd need to deal with
if we want to support collation-dependent equality.
A possible medium-term compromise is to pass DEFAULT_COLLATION_OID,
as I've done here, so that at least a collation-examining equality
function won't fall over. A variant of that is to pass C_COLLATION_OID,
which presumably would be faster to execute in many cases; it would
also have the advantage of being database-independent, which might
be a good thing in execReplication.c for instance.
Or we could just decide that collation-dependent equality is a bridge
too far for today. I'm not hugely excited about pursuing it myself,
I just wanted to get a handle on how badly broken it is.
regards, tom lane
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index d9087ca..fa00a36 100644
*** a/src/backend/executor/execExpr.c
--- b/src/backend/executor/execExpr.c
***************
*** 32,37 ****
--- 32,38 ----
#include "access/nbtree.h"
#include "catalog/objectaccess.h"
+ #include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "executor/execExpr.h"
#include "executor/nodeSubplan.h"
*************** ExecBuildGroupingEqual(TupleDesc ldesc,
*** 3393,3399 ****
fmgr_info(foid, finfo);
fmgr_info_set_expr(NULL, finfo);
InitFunctionCallInfoData(*fcinfo, finfo, 2,
! InvalidOid, NULL, NULL);
/* left arg */
scratch.opcode = EEOP_INNER_VAR;
--- 3394,3400 ----
fmgr_info(foid, finfo);
fmgr_info_set_expr(NULL, finfo);
InitFunctionCallInfoData(*fcinfo, finfo, 2,
! DEFAULT_COLLATION_OID, NULL, NULL);
/* left arg */
scratch.opcode = EEOP_INNER_VAR;
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 5bd3bbc..cd40071 100644
*** a/src/backend/executor/execReplication.c
--- b/src/backend/executor/execReplication.c
***************
*** 17,22 ****
--- 17,23 ----
#include "access/relscan.h"
#include "access/transam.h"
#include "access/xact.h"
+ #include "catalog/pg_collation.h"
#include "commands/trigger.h"
#include "executor/executor.h"
#include "nodes/nodeFuncs.h"
*************** tuple_equals_slot(TupleDesc desc, HeapTu
*** 265,273 ****
errmsg("could not identify an equality operator for type %s",
format_type_be(att->atttypid))));
! if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo,
! values[attrnum],
! slot->tts_values[attrnum])))
return false;
}
--- 266,275 ----
errmsg("could not identify an equality operator for type %s",
format_type_be(att->atttypid))));
! if (!DatumGetBool(FunctionCall2Coll(&typentry->eq_opr_finfo,
! DEFAULT_COLLATION_OID,
! values[attrnum],
! slot->tts_values[attrnum])))
return false;
}
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index daf56cd..50e0f58 100644
*** a/src/backend/executor/nodeAgg.c
--- b/src/backend/executor/nodeAgg.c
***************
*** 219,224 ****
--- 219,225 ----
#include "access/htup_details.h"
#include "catalog/objectaccess.h"
#include "catalog/pg_aggregate.h"
+ #include "catalog/pg_collation.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
*************** process_ordered_aggregate_single(AggStat
*** 748,762 ****
/*
* If DISTINCT mode, and not distinct from prior, skip it.
*
! * Note: we assume equality functions don't care about collation.
*/
if (isDistinct &&
haveOldVal &&
((oldIsNull && *isNull) ||
(!oldIsNull && !*isNull &&
oldAbbrevVal == newAbbrevVal &&
! DatumGetBool(FunctionCall2(&pertrans->equalfnOne,
! oldVal, *newVal)))))
{
/* equal to prior, so forget this one */
if (!pertrans->inputtypeByVal && !*isNull)
--- 749,765 ----
/*
* If DISTINCT mode, and not distinct from prior, skip it.
*
! * Note: most equality functions don't care about collation. For
! * those that do, just select DEFAULT_COLLATION_OID.
*/
if (isDistinct &&
haveOldVal &&
((oldIsNull && *isNull) ||
(!oldIsNull && !*isNull &&
oldAbbrevVal == newAbbrevVal &&
! DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne,
! DEFAULT_COLLATION_OID,
! oldVal, *newVal)))))
{
/* equal to prior, so forget this one */
if (!pertrans->inputtypeByVal && !*isNull)
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 84a1a91..b136650 100644
*** a/src/backend/executor/nodeSubplan.c
--- b/src/backend/executor/nodeSubplan.c
***************
*** 30,35 ****
--- 30,36 ----
#include <math.h>
#include "access/htup_details.h"
+ #include "catalog/pg_collation.h"
#include "executor/executor.h"
#include "executor/nodeSubplan.h"
#include "nodes/makefuncs.h"
*************** execTuplesUnequal(TupleTableSlot *slot1,
*** 671,678 ****
/* Apply the type-specific equality function */
! if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
! attr1, attr2)))
{
result = true; /* they are unequal */
break;
--- 672,680 ----
/* Apply the type-specific equality function */
! if (!DatumGetBool(FunctionCall2Coll(&eqfunctions[i],
! DEFAULT_COLLATION_OID,
! attr1, attr2)))
{
result = true; /* they are unequal */
break;
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index 1b21da8..5fed920 100644
*** a/src/backend/utils/adt/orderedsetaggs.c
--- b/src/backend/utils/adt/orderedsetaggs.c
***************
*** 17,22 ****
--- 17,23 ----
#include <math.h>
#include "catalog/pg_aggregate.h"
+ #include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
*************** mode_final(PG_FUNCTION_ARGS)
*** 1084,1090 ****
last_abbrev_val = abbrev_val;
}
else if (abbrev_val == last_abbrev_val &&
! DatumGetBool(FunctionCall2(equalfn, val, last_val)))
{
/* value equal to previous value, count it */
if (last_val_is_mode)
--- 1085,1092 ----
last_abbrev_val = abbrev_val;
}
else if (abbrev_val == last_abbrev_val &&
! DatumGetBool(FunctionCall2Coll(equalfn, DEFAULT_COLLATION_OID,
! val, last_val)))
{
/* value equal to previous value, count it */
if (last_val_is_mode)
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index cdda860..a77d610 100644
*** a/src/backend/utils/adt/ri_triggers.c
--- b/src/backend/utils/adt/ri_triggers.c
*************** ri_AttributesEqual(Oid eq_opr, Oid typei
*** 2975,2985 ****
}
/*
! * Apply the comparison operator. We assume it doesn't care about
! * collations.
*/
! return DatumGetBool(FunctionCall2(&entry->eq_opr_finfo,
! oldvalue, newvalue));
}
/* ----------
--- 2975,2985 ----
}
/*
! * Apply the comparison operator.
*/
! return DatumGetBool(FunctionCall2Coll(&entry->eq_opr_finfo,
! DEFAULT_COLLATION_OID,
! oldvalue, newvalue));
}
/* ----------
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 0fd3b15..c3c89cf 100644
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
*************** texteq(PG_FUNCTION_ARGS)
*** 1646,1651 ****
--- 1646,1653 ----
Size len1,
len2;
+ Assert(PG_GET_COLLATION() != 0);
+
/*
* Since we only care about equality or not-equality, we can avoid all the
* expense of strcoll() here, and just do bitwise comparison. In fact, we
*************** split_text(PG_FUNCTION_ARGS)
*** 4281,4289 ****
static bool
text_isequal(text *txt1, text *txt2)
{
! return DatumGetBool(DirectFunctionCall2(texteq,
! PointerGetDatum(txt1),
! PointerGetDatum(txt2)));
}
/*
--- 4283,4292 ----
static bool
text_isequal(text *txt1, text *txt2)
{
! return DatumGetBool(DirectFunctionCall2Coll(texteq,
! DEFAULT_COLLATION_OID,
! PointerGetDatum(txt1),
! PointerGetDatum(txt2)));
}
/*
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index b31fd5a..2ac6dd6 100644
*** a/src/backend/utils/cache/catcache.c
--- b/src/backend/utils/cache/catcache.c
***************
*** 22,27 ****
--- 22,28 ----
#include "access/tuptoaster.h"
#include "access/valid.h"
#include "access/xact.h"
+ #include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
*************** int4hashfast(Datum datum)
*** 181,187 ****
static bool
texteqfast(Datum a, Datum b)
{
! return DatumGetBool(DirectFunctionCall2(texteq, a, b));
}
static uint32
--- 182,188 ----
static bool
texteqfast(Datum a, Datum b)
{
! return DatumGetBool(DirectFunctionCall2Coll(texteq, DEFAULT_COLLATION_OID, a, b));
}
static uint32
*************** CatalogCacheInitializeCache(CatCache *ca
*** 1014,1021 ****
/* Fill in sk_strategy as well --- always standard equality */
cache->cc_skey[i].sk_strategy = BTEqualStrategyNumber;
cache->cc_skey[i].sk_subtype = InvalidOid;
! /* Currently, there are no catcaches on collation-aware data types */
! cache->cc_skey[i].sk_collation = InvalidOid;
CACHE4_elog(DEBUG2, "CatalogCacheInitializeCache %s %d %p",
cache->cc_relname,
--- 1015,1022 ----
/* Fill in sk_strategy as well --- always standard equality */
cache->cc_skey[i].sk_strategy = BTEqualStrategyNumber;
cache->cc_skey[i].sk_subtype = InvalidOid;
! /* If a catcache key requires a collation, it must be C collation */
! cache->cc_skey[i].sk_collation = C_COLLATION_OID;
CACHE4_elog(DEBUG2, "CatalogCacheInitializeCache %s %d %p",
cache->cc_relname,