Right now Postgres determines whether update operation touch index or
not based only on set of the affected columns.
But in case of functional indexes such policy quite frequently leads to
unnecessary index updates.
For example, functional index are widely use for indexing JSON data:
info->>'name'.
JSON data may contain multiple attributes and only few of them may be
affected by update.
Moreover, index is used to build for immutable attributes (like "id",
"isbn", "name",...).
Functions like (info->>'name') are named "surjective" ni mathematics.
I have strong feeling that most of functional indexes are based on
surjective functions.
For such indexes current Postgresql index update policy is very
inefficient. It cause disabling of hot updates
and so leads to significant degrade of performance.
Without this patch Postgres is slower than Mongo on YCSB benchmark with
(50% update,50 % select) workload.
And after applying this patch Postgres beats Mongo at all workloads.
My proposal is to check value of function for functional indexes instead
of just comparing set of effected attributes.
Obviously, for some complex functions it may have negative effect on
update speed.
This is why I have added "surjective" option to index. By default it is
switched on for all functional indexes (based on my assumption
that most functions used in functional indexes are surjective). But it
is possible to explicitly disable it and make decision weather index
needs to be updated or not only based on set of effected attributes.
--
Konstantin Knizhnik
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 6d1f22f..37fc407 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -130,6 +130,15 @@ static relopt_bool boolRelOpts[] =
},
{
{
+ "surjective",
+ "Reevaluate functional index expression on update to check if its values is changed",
+ RELOPT_KIND_INDEX,
+ AccessExclusiveLock
+ },
+ true
+ },
+ {
+ {
"security_barrier",
"View acts as a row security barrier",
RELOPT_KIND_VIEW,
@@ -1301,7 +1310,7 @@ fillRelOptions(void *rdopts, Size basesize,
break;
}
}
- if (validate && !found)
+ if (validate && !found && options[i].gen->kinds != RELOPT_KIND_INDEX)
elog(ERROR, "reloption \"%s\" not found in parse table",
options[i].gen->name);
}
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index e890e08..3525e3c 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -56,6 +56,7 @@
#include "access/xlogutils.h"
#include "catalog/catalog.h"
#include "catalog/namespace.h"
+#include "catalog/index.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
@@ -73,7 +74,9 @@
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/tqual.h"
-
+#include "utils/memutils.h"
+#include "nodes/execnodes.h"
+#include "executor/executor.h"
/* GUC variable */
bool synchronize_seqscans = true;
@@ -4199,6 +4202,7 @@ l2:
if (use_hot_update)
{
+ elog(DEBUG1, "Use hot update");
/* Mark the old tuple as HOT-updated */
HeapTupleSetHotUpdated(&oldtup);
/* And mark the new tuple as heap-only */
@@ -4436,6 +4440,73 @@ HeapDetermineModifiedColumns(Relation relation, Bitmapset *interesting_cols,
attnum - FirstLowInvalidHeapAttributeNumber);
}
+ if (hot_result && relation->rd_surjective)
+ {
+ ListCell *l;
+ List *indexoidlist = RelationGetIndexList(relation);
+ EState *estate = CreateExecutorState();
+ ExprContext *econtext = GetPerTupleExprContext(estate);
+ TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(relation));
+ Datum old_values[INDEX_MAX_KEYS];
+ bool old_isnull[INDEX_MAX_KEYS];
+ Datum new_values[INDEX_MAX_KEYS];
+ bool new_isnull[INDEX_MAX_KEYS];
+
+ econtext->ecxt_scantuple = slot;
+
+ foreach(l, indexoidlist)
+ {
+ Oid indexOid = lfirst_oid(l);
+ Relation indexDesc = index_open(indexOid, AccessShareLock);
+ IndexInfo *indexInfo = BuildIndexInfo(indexDesc);
+ int i;
+
+ if (indexInfo->ii_Expressions && indexInfo->ii_Surjective)
+ {
+ ResetExprContext(econtext);
+ ExecStoreTuple(oldtup, slot, InvalidBuffer, false);
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ old_values,
+ old_isnull);
+
+ ExecStoreTuple(newtup, slot, InvalidBuffer, false);
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ new_values,
+ new_isnull);
+
+ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
+ {
+ if (old_isnull[i] != new_isnull[i])
+ {
+ hot_result = false;
+ break;
+ }
+ else if (!old_isnull[i])
+ {
+ Form_pg_attribute att = RelationGetDescr(indexDesc)->attrs[i];
+ if (!datumIsEqual(old_values[i], new_values[i], att->attbyval, att->attlen))
+ {
+ hot_result = false;
+ break;
+ }
+
+ }
+ }
+ }
+ index_close(indexDesc, AccessShareLock);
+ }
+ ExecDropSingleTupleTableSlot(slot);
+ FreeExecutorState(estate);
+ }
+
+ *satisfies_hot = hot_result;
+ *satisfies_key = key_result;
+ *satisfies_id = id_result;
+
return modified;
}
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 2328b92..91217fa 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -26,6 +26,7 @@
#include "access/amapi.h"
#include "access/multixact.h"
#include "access/relscan.h"
+#include "access/reloptions.h"
#include "access/sysattr.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
@@ -86,6 +87,13 @@ typedef struct
tups_inserted;
} v_i_state;
+/* options common to all indexes */
+typedef struct
+{
+ int32 vl_len_;
+ bool surjective;
+} index_options;
+
/* non-export function prototypes */
static bool relationHasPrimaryKey(Relation rel);
static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
@@ -1676,6 +1684,40 @@ BuildIndexInfo(Relation index)
ii->ii_ExclusionStrats = NULL;
}
+ if (ii->ii_Expressions)
+ {
+ HeapTuple tuple;
+ Datum reloptions;
+ bool isnull;
+
+ ii->ii_Surjective = true; /* by default functional index is considered as surjective */
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(RelationGetRelid(index)));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", RelationGetRelid(index));
+
+ reloptions = SysCacheGetAttr(RELOID, tuple,
+ Anum_pg_class_reloptions, &isnull);
+ if (!isnull)
+ {
+ static const relopt_parse_elt tab[] = {
+ {"surjective", RELOPT_TYPE_BOOL, offsetof(index_options, surjective)}
+ };
+ int numoptions;
+ relopt_value *options = parseRelOptions(reloptions, false,
+ RELOPT_KIND_INDEX,
+ &numoptions);
+ if (numoptions != 0)
+ {
+ index_options optstruct;
+ fillRelOptions((void *)&optstruct, sizeof(bool), options, numoptions, false, tab, lengthof(tab));
+ ii->ii_Surjective = optstruct.surjective;
+ pfree(options);
+ }
+ }
+ ReleaseSysCache(tuple);
+ }
+
/* other info */
ii->ii_Unique = indexStruct->indisunique;
ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index c2e8361..d3b75f6 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -4854,6 +4854,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
List *newindexoidlist;
Oid relpkindex;
Oid relreplindex;
+ bool surjective = false;
ListCell *l;
MemoryContext oldcxt;
@@ -4963,9 +4964,15 @@ restart:
}
}
- /* Collect all attributes used in expressions, too */
- pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
-
+ if (indexInfo->ii_Expressions && indexInfo->ii_Surjective)
+ {
+ surjective = true;
+ }
+ else
+ {
+ /* Collect all attributes used in expressions, too */
+ pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
+ }
/* Collect all attributes in the index predicate, too */
pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
@@ -5010,6 +5017,8 @@ restart:
bms_free(relation->rd_idattr);
relation->rd_idattr = NULL;
+ relation->rd_surjective = surjective;
+
/*
* Now save copies of the bitmaps in the relcache entry. We intentionally
* set rd_indexattr last, because that's the one that signals validity of
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 2abd087..f404888 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -1647,11 +1647,11 @@ psql_completion(const char *text, int start, int end)
COMPLETE_WITH_CONST("(");
/* ALTER INDEX <foo> SET|RESET ( */
else if (Matches5("ALTER", "INDEX", MatchAny, "RESET", "("))
- COMPLETE_WITH_LIST3("fillfactor", "fastupdate",
- "gin_pending_list_limit");
+ COMPLETE_WITH_LIST4("fillfactor", "fastupdate",
+ "gin_pending_list_limit", "surjective");
else if (Matches5("ALTER", "INDEX", MatchAny, "SET", "("))
- COMPLETE_WITH_LIST3("fillfactor =", "fastupdate =",
- "gin_pending_list_limit =");
+ COMPLETE_WITH_LIST4("fillfactor =", "fastupdate =",
+ "gin_pending_list_limit =", "surjective = ");
/* ALTER LANGUAGE <name> */
else if (Matches3("ALTER", "LANGUAGE", MatchAny))
diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h
index 91b2cd7..1fba0f0 100644
--- a/src/include/access/reloptions.h
+++ b/src/include/access/reloptions.h
@@ -51,6 +51,7 @@ typedef enum relopt_kind
RELOPT_KIND_PARTITIONED = (1 << 11),
/* if you add a new kind, make sure you update "last_default" too */
RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_PARTITIONED,
+ RELOPT_KIND_INDEX = RELOPT_KIND_BTREE|RELOPT_KIND_HASH|RELOPT_KIND_GIN|RELOPT_KIND_SPGIST,
/* some compilers treat enums as signed ints, so we can't use 1 << 31 */
RELOPT_KIND_MAX = (1 << 30)
} relopt_kind;
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index d33392f..f67c9f0 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -147,6 +147,7 @@ typedef struct IndexInfo
bool ii_ReadyForInserts;
bool ii_Concurrent;
bool ii_BrokenHotChain;
+ bool ii_Surjective;
void *ii_AmCache;
MemoryContext ii_Context;
} IndexInfo;
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 8476896..147ba89 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -125,6 +125,8 @@ typedef struct RelationData
List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */
bool rd_fkeyvalid; /* true if list has been computed */
+ bool rd_surjective; /* relation contains functional index with surjective function */
+
MemoryContext rd_partkeycxt; /* private memory cxt for the below */
struct PartitionKeyData *rd_partkey; /* partition key, or NULL */
MemoryContext rd_pdcxt; /* private context for partdesc */
diff --git a/src/test/regress/expected/func_index.out b/src/test/regress/expected/func_index.out
new file mode 100644
index 0000000..c57a46a
--- /dev/null
+++ b/src/test/regress/expected/func_index.out
@@ -0,0 +1,29 @@
+create table keyvalue(id integer primary key, info jsonb);
+create index nameindex on keyvalue((info->>'name')) with (surjective=false);
+set client_min_messages=debug1;
+insert into keyvalue values (1, '{"name": "john", "data": "some data"}');
+update keyvalue set info='{"name": "john", "data": "some other data"}' where id=1;
+set client_min_messages=notice;
+drop table keyvalue;
+create table keyvalue(id integer primary key, info jsonb);
+create index nameindex on keyvalue((info->>'name')) with (surjective=true);
+set client_min_messages=debug1;
+insert into keyvalue values (1, '{"name": "john", "data": "some data"}');
+update keyvalue set info='{"name": "john", "data": "some other data"}' where id=1;
+DEBUG: Use hot update
+update keyvalue set info='{"name": "smith", "data": "some other data"}' where id=1;
+update keyvalue set info='{"name": "smith", "data": "some more data"}' where id=1;
+DEBUG: Use hot update
+set client_min_messages=notice;
+drop table keyvalue;
+create table keyvalue(id integer primary key, info jsonb);
+create index nameindex on keyvalue((info->>'name'));
+set client_min_messages=debug1;
+insert into keyvalue values (1, '{"name": "john", "data": "some data"}');
+update keyvalue set info='{"name": "john", "data": "some other data"}' where id=1;
+DEBUG: Use hot update
+update keyvalue set info='{"name": "smith", "data": "some other data"}' where id=1;
+update keyvalue set info='{"name": "smith", "data": "some more data"}' where id=1;
+DEBUG: Use hot update
+set client_min_messages=notice;
+drop table keyvalue;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 1f8f098..06fd9aa 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -79,7 +79,7 @@ ignore: random
# ----------
# Another group of parallel tests
# ----------
-test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update namespace prepared_xacts delete
+test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index func_index update namespace prepared_xacts delete
# ----------
# Another group of parallel tests
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 04206c3..4f8b460 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -98,6 +98,7 @@ test: portals
test: arrays
test: btree_index
test: hash_index
+test: func_index
test: update
test: delete
test: namespace
diff --git a/src/test/regress/sql/func_index.sql b/src/test/regress/sql/func_index.sql
new file mode 100644
index 0000000..4923382
--- /dev/null
+++ b/src/test/regress/sql/func_index.sql
@@ -0,0 +1,29 @@
+create table keyvalue(id integer primary key, info jsonb);
+create index nameindex on keyvalue((info->>'name')) with (surjective=false);
+set client_min_messages=debug1;
+insert into keyvalue values (1, '{"name": "john", "data": "some data"}');
+update keyvalue set info='{"name": "john", "data": "some other data"}' where id=1;
+set client_min_messages=notice;
+drop table keyvalue;
+
+create table keyvalue(id integer primary key, info jsonb);
+create index nameindex on keyvalue((info->>'name')) with (surjective=true);
+set client_min_messages=debug1;
+insert into keyvalue values (1, '{"name": "john", "data": "some data"}');
+update keyvalue set info='{"name": "john", "data": "some other data"}' where id=1;
+update keyvalue set info='{"name": "smith", "data": "some other data"}' where id=1;
+update keyvalue set info='{"name": "smith", "data": "some more data"}' where id=1;
+set client_min_messages=notice;
+drop table keyvalue;
+
+create table keyvalue(id integer primary key, info jsonb);
+create index nameindex on keyvalue((info->>'name'));
+set client_min_messages=debug1;
+insert into keyvalue values (1, '{"name": "john", "data": "some data"}');
+update keyvalue set info='{"name": "john", "data": "some other data"}' where id=1;
+update keyvalue set info='{"name": "smith", "data": "some other data"}' where id=1;
+update keyvalue set info='{"name": "smith", "data": "some more data"}' where id=1;
+set client_min_messages=notice;
+drop table keyvalue;
+
+
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers