Attached is v4, which fixes a couple of relatively minor bugs. There are still things to tackle before this is committable, but coding review of the new executor node would be welcome.
The big remaining item is still fitting the PK data in TIDs 6 bytes. I've been looking at reworking the btree code to allow for an arbitrary size; it doesn't look impossible although it's going to be rather invasive. Also, vacuuming: my answer continues to be that the killtuple interface should be good enough, but it's possible to vacuum the index separately from vacuuming the table anyway if you do a full scan and check the PK tuples for each indirect tuple. This patch implements killtuple: a scan that sees an indirect tuple not returning anything from the heap marks the tuple as LP_DEAD. Later, when the page is about to be split, those tuples are removed. I also have a note in the code about not inserting an indirect tuple when an identical one already exists. This is a correctness issue: we return duplicated heap rows in certain cases. -- Álvaro Herrera https://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index 40f201b..4b21216 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -1037,6 +1037,8 @@ amrestrpos (IndexScanDesc scan); for the same tuple values as were used in the original insertion. </para> </listitem> + + <listitem><para>XXX describe UNIQUE_CHECK_INSERT_SINGLETON here </para></listitem> </itemizedlist> </para> diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 1b45a4c..9f899c7 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -92,6 +92,7 @@ brinhandler(PG_FUNCTION_ARGS) amroutine->amstorage = true; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanindirect = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = brinbuild; diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index f07eedc..1bc91d2 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -49,6 +49,7 @@ ginhandler(PG_FUNCTION_ARGS) amroutine->amstorage = true; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanindirect = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = ginbuild; diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index b8aa9bc..4ec34d5 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -69,6 +69,7 @@ gisthandler(PG_FUNCTION_ARGS) amroutine->amstorage = true; amroutine->amclusterable = true; amroutine->ampredlocks = false; + amroutine->amcanindirect = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = gistbuild; diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 1fa087a..a2cf278 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -66,6 +66,7 @@ hashhandler(PG_FUNCTION_ARGS) amroutine->amstorage = false; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanindirect = false; amroutine->amkeytype = INT4OID; amroutine->ambuild = hashbuild; diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 19edbdf..a6e859c 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -3411,6 +3411,8 @@ simple_heap_delete(Relation relation, ItemPointer tid) * crosscheck - if not InvalidSnapshot, also check old tuple against this * wait - true if should wait for any conflicting update to commit/abort * hufd - output parameter, filled in failure cases (see below) + * unchanged_ind_cols - output parameter; bits set for unmodified columns + * that are indexed by indirect indexes * lockmode - output parameter, filled with lock mode acquired on tuple * * Normal, successful return value is HeapTupleMayBeUpdated, which @@ -3433,13 +3435,15 @@ simple_heap_delete(Relation relation, ItemPointer tid) HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, - HeapUpdateFailureData *hufd, LockTupleMode *lockmode) + HeapUpdateFailureData *hufd, Bitmapset **unchanged_ind_cols, + LockTupleMode *lockmode) { HTSU_Result result; TransactionId xid = GetCurrentTransactionId(); Bitmapset *hot_attrs; Bitmapset *key_attrs; Bitmapset *id_attrs; + Bitmapset *indirect_attrs; Bitmapset *interesting_attrs; Bitmapset *modified_attrs; ItemId lp; @@ -3501,14 +3505,16 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, * Note that we get copies of each bitmap, so we need not worry about * relcache flush happening midway through. */ - hot_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_ALL); + hot_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_HOT); key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY); id_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_IDENTITY_KEY); + indirect_attrs = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_INDIRECT_INDEXES); interesting_attrs = bms_add_members(NULL, hot_attrs); interesting_attrs = bms_add_members(interesting_attrs, key_attrs); interesting_attrs = bms_add_members(interesting_attrs, id_attrs); - + interesting_attrs = bms_add_members(interesting_attrs, indirect_attrs); block = ItemPointerGetBlockNumber(otid); buffer = ReadBuffer(relation, block); @@ -3560,6 +3566,19 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, &oldtup, newtup); /* + * From the set of columns interesting to indirect indexes (that is, the + * columns indexed by indirect indexes themselves as well as columns in + * the primary key), subtract those that were modified by the update, to + * obtain the list of interesting columns not modified. Note that we + * use the "key" columns (which includes all unique indexes, not just the + * primary key), so it may contain more columns than absolutely necessary. + */ + if (unchanged_ind_cols) + *unchanged_ind_cols = bms_difference(bms_union(key_attrs, + indirect_attrs), + modified_attrs); + + /* * If we're not updating any "key" column, we can grab a weaker lock type. * This allows for more concurrency when we are running simultaneously * with foreign key checks. @@ -3809,6 +3828,7 @@ l2: bms_free(hot_attrs); bms_free(key_attrs); bms_free(id_attrs); + bms_free(indirect_attrs); bms_free(modified_attrs); bms_free(interesting_attrs); return result; @@ -4280,6 +4300,7 @@ l2: bms_free(hot_attrs); bms_free(key_attrs); bms_free(id_attrs); + bms_free(indirect_attrs); bms_free(modified_attrs); bms_free(interesting_attrs); @@ -4411,7 +4432,7 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) result = heap_update(relation, otid, tup, GetCurrentCommandId(true), InvalidSnapshot, true /* wait for commit */ , - &hufd, &lockmode); + &hufd, NULL, &lockmode); switch (result) { case HeapTupleSelfUpdated: diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index fc4702c..07bf463 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -1614,6 +1614,7 @@ toast_save_datum(Relation rel, Datum value, /* Only index relations marked as ready can be updated */ if (IndexIsReady(toastidxs[i]->rd_index)) index_insert(toastidxs[i], t_values, t_isnull, + NULL, &(toasttup->t_self), toastrel, toastidxs[i]->rd_index->indisunique ? diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 65c941d..ef0c5bc 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -413,7 +413,7 @@ systable_getnext(SysScanDesc sysscan) if (sysscan->irel) { - htup = index_getnext(sysscan->iscan, ForwardScanDirection); + htup = index_getnext(sysscan->iscan, ForwardScanDirection, NULL); /* * We currently don't need to support lossy index operators for any @@ -593,7 +593,7 @@ systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) HeapTuple htup; Assert(sysscan->irel); - htup = index_getnext(sysscan->iscan, direction); + htup = index_getnext(sysscan->iscan, direction, NULL); /* See notes in systable_getnext */ if (htup && sysscan->iscan->xs_recheck) elog(ERROR, "system catalog scans with lossy index conditions are not implemented"); diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 54b71cb..202f64a 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -189,10 +189,13 @@ bool index_insert(Relation indexRelation, Datum *values, bool *isnull, + Datum *pkeyValues, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique) { + ItemPointerData iptr; + RELATION_CHECKS; CHECK_REL_PROCEDURE(aminsert); @@ -201,8 +204,19 @@ index_insert(Relation indexRelation, (HeapTuple) NULL, InvalidBuffer); + /* + * Indirect indexes use a fake item pointer constructed from the primary + * key values; regular indexes store the actual heap item pointer. + */ + if (!indexRelation->rd_index->indisindirect) + ItemPointerCopy(heap_t_ctid, &iptr); + else + FAKE_CTID_FROM_PKVALUES(&iptr, + indexRelation->rd_index->indnatts, + pkeyValues); + return indexRelation->rd_amroutine->aminsert(indexRelation, values, isnull, - heap_t_ctid, heapRelation, + &iptr, heapRelation, checkUnique); } @@ -444,6 +458,9 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) * although when using an MVCC snapshot it should be impossible for more than * one such tuple to exist.) * + * If anyfound is not NULL, *anyfound is set to TRUE if there are live tuples + * matching the scan keys, even if they are not visible to the scan's snapshot. + * * On success, the buffer containing the heap tup is pinned (the pin will be * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan * call). @@ -454,7 +471,7 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) * ---------------- */ HeapTuple -index_fetch_heap(IndexScanDesc scan) +index_fetch_heap(IndexScanDesc scan, bool *anyfound) { ItemPointer tid = &scan->xs_ctup.t_self; bool all_dead = false; @@ -487,6 +504,10 @@ index_fetch_heap(IndexScanDesc scan) !scan->xs_continue_hot); LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); + /* Let caller know there are live tuples, even if we can't see them */ + if (!all_dead && anyfound) + *anyfound = true; + if (got_heap_tuple) { /* @@ -524,13 +545,16 @@ index_fetch_heap(IndexScanDesc scan) * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan * call). * + * If anyfound is not NULL, *anyfound is set to TRUE if there are live tuples + * matching the scan keys, even if they are not visible to the scan snapshot. + * * Note: caller must check scan->xs_recheck, and perform rechecking of the * scan keys if required. We do not do that here because we don't have * enough information to do it efficiently in the general case. * ---------------- */ HeapTuple -index_getnext(IndexScanDesc scan, ScanDirection direction) +index_getnext(IndexScanDesc scan, ScanDirection direction, bool *anyfound) { HeapTuple heapTuple; ItemPointer tid; @@ -562,7 +586,7 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) * If we don't find anything, loop around and grab the next TID from * the index. */ - heapTuple = index_fetch_heap(scan); + heapTuple = index_fetch_heap(scan, anyfound); if (heapTuple != NULL) return heapTuple; } @@ -570,6 +594,56 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) return NULL; /* failure exit */ } +/* + * Return the primary key values from the next tuple in a indirect-index scan + * + * pkvals is an output array which must have been allocated by caller. + */ +void +index_getnext_pkey(IndexScanDesc scan, ScanDirection direction, Datum *pkvals, + bool *isDone) +{ + bool found; + Datum pkval; + + SCAN_CHECKS; + CHECK_SCAN_PROCEDURE(amgettuple); + + /* + * The AM's amgettuple proc finds the next index entry matching the scan + * keys, and puts the primary key value into scan->xs_ctup.t_self, from + * which we extract it into the pkvals array. It should also set + * scan->xs_recheck and scan->xs_itup, though we pay no attention to those + * fields here. + */ + found = scan->indexRelation->rd_amroutine->amgettuple(scan, direction); + + /* Reset kill flag immediately for safety */ + scan->kill_prior_tuple = false; + + if (!found) + { + if (BufferIsValid(scan->xs_cbuf)) + { + ReleaseBuffer(scan->xs_cbuf); + scan->xs_cbuf = InvalidBuffer; + } + *isDone = true; + return; + } + *isDone = false; + + /* + * XXX this assumes that Datum is 8 bytes. Which it is in my machine .. but + * not elsewhere + */ + /* XXX see FAKE_CTID_FROM_PKVALUES */ + pkval = (((Datum) scan->xs_ctup.t_self.ip_posid) >> 1) | + (((Datum) scan->xs_ctup.t_self.ip_blkid.bi_hi) << 15) | + (((Datum) scan->xs_ctup.t_self.ip_blkid.bi_lo >> 1) << 32); + pkvals[0] = pkval; +} + /* ---------------- * index_getbitmap - get all tuples at once from an index scan * diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index ef69290..eb4beef 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -92,7 +92,9 @@ static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel); * By here, itup is filled in, including the TID. * * If checkUnique is UNIQUE_CHECK_NO or UNIQUE_CHECK_PARTIAL, this - * will allow duplicates. Otherwise (UNIQUE_CHECK_YES or + * will allow duplicates. If it's UNIQUE_CHECK_INSERT_SINGLETON, the value + * will only be inserted if there isn't already a tuple with that value. + * Otherwise (UNIQUE_CHECK_YES or * UNIQUE_CHECK_EXISTING) it will throw error for a duplicate. * For UNIQUE_CHECK_EXISTING we merely run the duplicate check, and * don't actually insert. @@ -100,8 +102,8 @@ static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel); * The result value is only significant for UNIQUE_CHECK_PARTIAL: * it must be TRUE if the entry is known unique, else FALSE. * (In the current implementation we'll also return TRUE after a - * successful UNIQUE_CHECK_YES or UNIQUE_CHECK_EXISTING call, but - * that's just a coding artifact.) + * successful UNIQUE_CHECK_YES, UNIQUE_CHECK_EXISTING or + * UNIQUE_CHECK_INSERT_SINGLETON call, but that's just a coding artifact.) */ bool _bt_doinsert(Relation rel, IndexTuple itup, @@ -138,6 +140,21 @@ top: true, stack, BT_WRITE, NULL); /* + * In insert-singleton mode, we must return without doing anything if the + * value we're inserting already exists. + */ +#if 0 + if (checkUnique == UNIQUE_CHECK_INSERT_SINGLETON) + { + offset = _bt_binsrch( .. ); + if (offset is valid and contains a tuple matching the scankey) + return true; + /* otherwise fall through to insert */ + } +#endif + + + /* * If we're not allowing duplicates, make sure the key isn't already in * the index. * @@ -158,7 +175,8 @@ top: * let the tuple in and return false for possibly non-unique, or true for * definitely unique. */ - if (checkUnique != UNIQUE_CHECK_NO) + if (checkUnique != UNIQUE_CHECK_NO && + checkUnique != UNIQUE_CHECK_INSERT_SINGLETON) { TransactionId xwait; uint32 speculativeToken; @@ -167,6 +185,10 @@ top: xwait = _bt_check_unique(rel, itup, heapRel, buf, offset, itup_scankey, checkUnique, &is_unique, &speculativeToken); + if (checkUnique == UNIQUE_CHECK_INSERT_SINGLETON && + TransactionIdIsValid(xwait)) + return true; + if (TransactionIdIsValid(xwait)) { /* Have to wait for the other guy ... */ diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index a264b92..29c6ae6 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -37,6 +37,7 @@ typedef struct { bool isUnique; bool haveDead; + bool isIndirect; Relation heapRel; BTSpool *spool; @@ -45,6 +46,8 @@ typedef struct * put into spool2 instead of spool in order to avoid uniqueness check. */ BTSpool *spool2; + int16 pkNumKeys; + AttrNumber pkAttnums[INDEX_MAX_KEYS]; double indtuples; } BTBuildState; @@ -98,6 +101,7 @@ bthandler(PG_FUNCTION_ARGS) amroutine->amstorage = false; amroutine->amclusterable = true; amroutine->ampredlocks = true; + amroutine->amcanindirect = true; amroutine->amkeytype = InvalidOid; amroutine->ambuild = btbuild; @@ -136,6 +140,25 @@ btbuild(Relation heap, Relation index, IndexInfo *indexInfo) buildstate.heapRel = heap; buildstate.spool = NULL; buildstate.spool2 = NULL; + buildstate.isIndirect = indexInfo->ii_IsIndirect; + if (indexInfo->ii_IsIndirect) + { + Oid pkOid; + Relation pkRel; + int i; + + pkOid = RelationGetPrimaryKey(heap); + pkRel = index_open(pkOid, AccessShareLock); + + buildstate.pkNumKeys = pkRel->rd_index->indnatts; + for (i = 0; i < buildstate.pkNumKeys; i++) + buildstate.pkAttnums[i] = pkRel->rd_index->indkey.values[i]; + index_close(pkRel, AccessShareLock); + } + else + { + buildstate.pkNumKeys = 0; + } buildstate.indtuples = 0; #ifdef BTREE_BUILD_STATS @@ -213,18 +236,42 @@ btbuildCallback(Relation index, void *state) { BTBuildState *buildstate = (BTBuildState *) state; + ItemPointerData iptr; + + if (buildstate->isIndirect) + { + Datum pkValues[INDEX_MAX_KEYS]; + int i; + bool isnull; + + /* + * XXX WAG: this is very slow in the general case, but OK if PK column + * is first. + */ + for (i = 0; i < buildstate->pkNumKeys; i++) + { + pkValues[i] = heap_getattr(htup, + buildstate->pkAttnums[i], + RelationGetDescr(buildstate->heapRel), + &isnull); + Assert(!isnull); + } + FAKE_CTID_FROM_PKVALUES(&iptr, buildstate->pkNumKeys, pkValues); + } + else + ItemPointerCopy(&htup->t_self, &iptr); /* * insert the index tuple into the appropriate spool file for subsequent * processing */ if (tupleIsAlive || buildstate->spool2 == NULL) - _bt_spool(buildstate->spool, &htup->t_self, values, isnull); + _bt_spool(buildstate->spool, &iptr, values, isnull); else { /* dead tuples are put into spool2 */ buildstate->haveDead = true; - _bt_spool(buildstate->spool2, &htup->t_self, values, isnull); + _bt_spool(buildstate->spool2, &iptr, values, isnull); } buildstate->indtuples += 1; diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index d570ae5..9378919 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -48,6 +48,7 @@ spghandler(PG_FUNCTION_ARGS) amroutine->amstorage = false; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanindirect = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = spgbuild; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 08b0989..c2748b2 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -623,6 +623,7 @@ UpdateIndexRelation(Oid indexoid, values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid); values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs); values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique); + values[Anum_pg_index_indisindirect - 1] = BoolGetDatum(indexInfo->ii_IsIndirect); values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary); values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion); values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate); @@ -769,6 +770,26 @@ index_create(Relation heapRelation, errmsg("concurrent index creation on system catalog tables is not supported"))); /* + * indirect indexes are forbidden on system catalogs, and they obviously cannot + * be primary keys either. + */ + if (indexInfo->ii_IsIndirect && IsSystemRelation(heapRelation)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("indirect index creation on system catalog tables is not supported"))); + if (indexInfo->ii_IsIndirect && isprimary) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("primary key indexes cannot be indirect"))); + if (indexInfo->ii_IsIndirect && !OidIsValid(RelationGetPrimaryKey(heapRelation))) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("table \"%s\" does not have a primary key", + RelationGetRelationName(heapRelation)), + errtable(heapRelation))); + /* XXX other restrictions needed? */ + + /* * This case is currently not supported, but there's no way to ask for it * in the grammar anyway, so it can't happen. */ @@ -1011,6 +1032,14 @@ index_create(Relation heapRelation, Assert(!initdeferred); } + /* Store dependency on primary key index, if needed */ + if (indexInfo->ii_IsIndirect) + { + ObjectAddressSet(referenced, RelationRelationId, + RelationGetPrimaryKey(heapRelation)); + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } + /* Store dependency on collations */ /* The default collation is pinned, so don't bother recording it */ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) @@ -1681,6 +1710,7 @@ BuildIndexInfo(Relation index) /* other info */ ii->ii_Unique = indexStruct->indisunique; + ii->ii_IsIndirect = indexStruct->indisindirect; ii->ii_ReadyForInserts = IndexIsReady(indexStruct); /* assume not doing speculative insertion for now */ ii->ii_UniqueOps = NULL; @@ -3161,6 +3191,7 @@ validate_index_heapscan(Relation heapRelation, index_insert(indexRelation, values, isnull, + NULL, /* FIXME need to PK values here */ &rootTuple, heapRelation, indexInfo->ii_Unique ? diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index b9fe102..f00f446 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -136,6 +136,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple) index_insert(relationDescs[i], /* index relation */ values, /* array of index Datums */ isnull, /* is-null flags */ + NULL, /* catalogs never had indirect indexes */ &(heapTuple->t_self), /* tid of heap tuple */ heapRelation, relationDescs[i]->rd_index->indisunique ? diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 2131226..21b63bb 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -946,7 +946,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, if (indexScan != NULL) { - tuple = index_getnext(indexScan, ForwardScanDirection); + tuple = index_getnext(indexScan, ForwardScanDirection, NULL); if (tuple == NULL) break; @@ -1531,14 +1531,14 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, reindex_relation(OIDOldHeap, reindex_flags, 0); /* - * If the relation being rebuild is pg_class, swap_relation_files() + * If the relation being rebuilt is pg_class, swap_relation_files() * couldn't update pg_class's own pg_class entry (check comments in * swap_relation_files()), thus relfrozenxid was not updated. That's * annoying because a potential reason for doing a VACUUM FULL is a * imminent or actual anti-wraparound shutdown. So, now that we can - * access the new relation using it's indices, update relfrozenxid. + * access the new relation using its indices, update relfrozenxid. * pg_class doesn't have a toast relation, so we don't need to update the - * corresponding toast relation. Not that there's little point moving all + * corresponding toast relation. Note that there's little point moving all * relfrozenxid updates here since swap_relation_files() needs to write to * pg_class for non-mapped relations anyway. */ diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index 26f9114..5f1a7b7 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -164,7 +164,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) * correct even if t_self is now dead, because that is the TID the * index will know about. */ - index_insert(indexRel, values, isnull, &(new_row->t_self), + index_insert(indexRel, values, isnull, NULL, &(new_row->t_self), trigdata->tg_relation, UNIQUE_CHECK_EXISTING); } else diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index aa25a23..c5d185a 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2666,9 +2666,10 @@ CopyFrom(CopyState cstate) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), estate, - false, + false, false, NULL, - NIL); + NIL, + NULL); /* AFTER ROW INSERT Triggers */ ExecARInsertTriggers(estate, resultRelInfo, tuple, @@ -2820,7 +2821,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid, ExecStoreTuple(bufferedTuples[i], myslot, InvalidBuffer, false); recheckIndexes = ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self), - estate, false, NULL, NIL); + estate, false, false, NULL, NIL, NULL); ExecARInsertTriggers(estate, resultRelInfo, bufferedTuples[i], recheckIndexes); diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 0a669d9..853f155 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -884,6 +884,9 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_IndexScan: pname = sname = "Index Scan"; break; + case T_IndirectIndexScan: + pname = sname = "Indirect Index Scan"; + break; case T_IndexOnlyScan: pname = sname = "Index Only Scan"; break; @@ -1099,6 +1102,15 @@ ExplainNode(PlanState *planstate, List *ancestors, ExplainScanTarget((Scan *) indexscan, es); } break; + case T_IndirectIndexScan: + { + IndirectIndexScan *indexscan = (IndirectIndexScan *) plan; + ExplainIndexScanDetails(indexscan->indexid, + indexscan->indexorderdir, + es); + ExplainScanTarget((Scan *) indexscan, es); + } + break; case T_IndexOnlyScan: { IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan; @@ -1301,6 +1313,19 @@ ExplainNode(PlanState *planstate, List *ancestors, show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); break; + case T_IndirectIndexScan: + show_scan_qual(((IndirectIndexScan *) plan)->indexqualorig, + "Index Cond", planstate, ancestors, es); + if (((IndirectIndexScan *) plan)->indexqualorig) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + show_scan_qual(((IndirectIndexScan *) plan)->indexorderbyorig, + "Order By", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; case T_IndexOnlyScan: show_scan_qual(((IndexOnlyScan *) plan)->indexqual, "Index Cond", planstate, ancestors, es); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index eeb2b1f..4a7f79b 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -522,6 +522,11 @@ DefineIndex(Oid relationId, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("access method \"%s\" does not support exclusion constraints", accessMethodName))); + if (stmt->isindirect && !amRoutine->amcanindirect) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support indirect indexes", + accessMethodName))); amcanorder = amRoutine->amcanorder; amoptions = amRoutine->amoptions; @@ -557,6 +562,7 @@ DefineIndex(Oid relationId, indexInfo->ii_ExclusionProcs = NULL; indexInfo->ii_ExclusionStrats = NULL; indexInfo->ii_Unique = stmt->unique; + indexInfo->ii_IsIndirect = stmt->isindirect; /* In a concurrent build, mark it not-ready-for-inserts */ indexInfo->ii_ReadyForInserts = !stmt->concurrent; indexInfo->ii_Concurrent = stmt->concurrent; diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index b5fb325..e3686b6 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -1592,6 +1592,15 @@ lazy_vacuum_index(Relation indrel, ivinfo.num_heap_tuples = vacrelstats->old_rel_tuples; ivinfo.strategy = vac_strategy; + /* Scanning indirect indexes for vacuuming is pointless; skip it. */ + if (indrel->rd_index->indisindirect) + { + ereport(elevel, + (errmsg("skipped indirect index \"%s\" during VACUUM", + RelationGetRelationName(indrel)))); + return; + } + /* Do bulk deletion */ *stats = index_bulk_delete(&ivinfo, *stats, lazy_tid_reaped, (void *) vacrelstats); diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index 51edd4c..799b4b6 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -19,6 +19,7 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execIndexing.o execJunk.o \ nodeBitmapAnd.o nodeBitmapOr.o \ nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeCustom.o nodeGather.o \ nodeHash.o nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o \ + nodeIndirectIndexscan.o \ nodeLimit.o nodeLockRows.o \ nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \ nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \ diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 2587ef7..bcae700 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -32,6 +32,7 @@ #include "executor/nodeHashjoin.h" #include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" +#include "executor/nodeIndirectIndexscan.h" #include "executor/nodeLimit.h" #include "executor/nodeLockRows.h" #include "executor/nodeMaterial.h" @@ -170,6 +171,10 @@ ExecReScan(PlanState *node) ExecReScanIndexScan((IndexScanState *) node); break; + case T_IndirectIndexScanState: + ExecReScanIndirectIndexScan((IndirectIndexScanState *) node); + break; + case T_IndexOnlyScanState: ExecReScanIndexOnlyScan((IndexOnlyScanState *) node); break; @@ -300,6 +305,10 @@ ExecMarkPos(PlanState *node) ExecIndexMarkPos((IndexScanState *) node); break; + case T_IndirectIndexScanState: + ExecIndirectIndexMarkPos((IndirectIndexScanState *) node); + break; + case T_IndexOnlyScanState: ExecIndexOnlyMarkPos((IndexOnlyScanState *) node); break; @@ -349,6 +358,9 @@ ExecRestrPos(PlanState *node) ExecIndexRestrPos((IndexScanState *) node); break; + case T_IndirectIndexScanState: + ExecIndirectIndexRestrPos((IndirectIndexScanState *) node); + case T_IndexOnlyScanState: ExecIndexOnlyRestrPos((IndexOnlyScanState *) node); break; @@ -393,6 +405,7 @@ ExecSupportsMarkRestore(Path *pathnode) switch (pathnode->pathtype) { case T_IndexScan: + case T_IndirectIndexScan: case T_IndexOnlyScan: case T_Material: case T_Sort: @@ -491,6 +504,11 @@ ExecSupportsBackwardScan(Plan *node) return IndexSupportsBackwardScan(((IndexScan *) node)->indexid) && TargetListSupportsBackwardScan(node->targetlist); + case T_IndirectIndexScan: + /* FIXME this is expected to work, but is untested */ + return IndexSupportsBackwardScan(((IndirectIndexScan *) node)->indexid) && + TargetListSupportsBackwardScan(node->targetlist); + case T_IndexOnlyScan: return IndexSupportsBackwardScan(((IndexOnlyScan *) node)->indexid) && TargetListSupportsBackwardScan(node->targetlist); @@ -537,8 +555,8 @@ TargetListSupportsBackwardScan(List *targetlist) } /* - * An IndexScan or IndexOnlyScan node supports backward scan only if the - * index's AM does. + * An IndexScan, IndirectIndexScan or IndexOnlyScan node supports backward scan + * only if the index's AM does. */ static bool IndexSupportsBackwardScan(Oid indexid) diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index 009c1b7..02c7d17 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -206,6 +206,13 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative) if (speculative && ii->ii_Unique) BuildSpeculativeIndexInfo(indexDesc, ii); + if (ii->ii_IsIndirect) + resultRelInfo->ri_hasIndirectIndexes = true; + + /* Remember which of these indexes is the table's primary key */ + if (indexDesc->rd_index->indisprimary) + resultRelInfo->ri_PrimaryKeyIndex = i; + relationDescs[i] = indexDesc; indexInfoArray[i] = ii; i++; @@ -259,21 +266,23 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * the same is done for non-deferred constraints, but report * if conflict was speculative or deferred conflict to caller) * + * If 'isHOTupdate' is TRUE, the new tuple comes from a HOT update; + * only add entries to indirect indexes in that case, and only if + * the indexed values changed. + * * If 'arbiterIndexes' is nonempty, noDupErr applies only to * those indexes. NIL means noDupErr applies to all indexes. - * - * CAUTION: this must not be called for a HOT update. - * We can't defend against that here for lack of info. - * Should we change the API to make it safer? * ---------------------------------------------------------------- */ List * ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, EState *estate, + bool isHOTupdate, bool noDupErr, bool *specConflict, - List *arbiterIndexes) + List *arbiterIndexes, + Bitmapset *unchangedAttrs) { List *result = NIL; ResultRelInfo *resultRelInfo; @@ -285,6 +294,7 @@ ExecInsertIndexTuples(TupleTableSlot *slot, ExprContext *econtext; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; + Datum pkeyValues[INDEX_MAX_KEYS]; /* * Get information from the result relation info structure. @@ -324,6 +334,20 @@ ExecInsertIndexTuples(TupleTableSlot *slot, if (!indexInfo->ii_ReadyForInserts) continue; + /* + * If this is the primary key, form the values array if there are any + * indirect indexes. We will need it even if this is a HOT update. + * This coding assumes that the primary key appears before indirect + * indexes in the index list. + */ + if (resultRelInfo->ri_hasIndirectIndexes && + (i == resultRelInfo->ri_PrimaryKeyIndex)) + FormIndexDatum(indexInfo, slot, estate, pkeyValues, isnull); + + /* If the index is not indirect, then HOT updates mustn't insert anything */ + if (isHOTupdate && !indexInfo->ii_IsIndirect) + continue; + /* Check for partial index */ if (indexInfo->ii_Predicate != NIL) { @@ -348,6 +372,34 @@ ExecInsertIndexTuples(TupleTableSlot *slot, } /* + * For indirect indexes, verify whether the indexed attributes have + * changed; if they have not, skip the insertion. + * + * When unchangedAttrs is NULL, we cannot skip the insertion (it only + * becomes set for updates.) + */ + if (indexInfo->ii_IsIndirect && unchangedAttrs) + { + int j; + bool may_skip_insertion = true; + + for (j = 0; j < indexInfo->ii_NumIndexAttrs; j++) + { + /* + * FIXME this assumes indirect indexes are only on columns, not + * expressions. + */ + if (bms_is_member(indexInfo->ii_KeyAttrNumbers[j], unchangedAttrs)) + continue; + may_skip_insertion = false; + } + + /* may skip insertion if no indexed attribute changed value */ + if (may_skip_insertion) + continue; + } + + /* * FormIndexDatum fills in its values and isnull parameters with the * appropriate values for the column(s) of the index. */ @@ -389,6 +441,7 @@ ExecInsertIndexTuples(TupleTableSlot *slot, index_insert(indexRelation, /* index relation */ values, /* array of index Datums */ isnull, /* null flags */ + pkeyValues, /* values of primary key */ tupleid, /* tid of heap tuple */ heapRelation, /* heap relation */ checkUnique); /* type of uniqueness check to do */ @@ -726,7 +779,7 @@ retry: index_rescan(index_scan, scankeys, index_natts, NULL, 0); while ((tup = index_getnext(index_scan, - ForwardScanDirection)) != NULL) + ForwardScanDirection, NULL)) != NULL) { TransactionId xwait; ItemPointerData ctid_wait; diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index bca34a5..370cdc0 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1252,6 +1252,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, resultRelInfo->ri_FdwRoutine = NULL; resultRelInfo->ri_FdwState = NULL; resultRelInfo->ri_usesFdwDirectModify = false; + resultRelInfo->ri_hasIndirectIndexes = false; resultRelInfo->ri_ConstraintExprs = NULL; resultRelInfo->ri_junkFilter = NULL; resultRelInfo->ri_projectReturning = NULL; diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 554244f..9df9160 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -93,6 +93,7 @@ #include "executor/nodeHashjoin.h" #include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" +#include "executor/nodeIndirectIndexscan.h" #include "executor/nodeLimit.h" #include "executor/nodeLockRows.h" #include "executor/nodeMaterial.h" @@ -203,6 +204,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; + case T_IndirectIndexScan: + result = (PlanState *) ExecInitIndirectIndexScan((IndirectIndexScan *) node, + estate, eflags); + break; + case T_IndexOnlyScan: result = (PlanState *) ExecInitIndexOnlyScan((IndexOnlyScan *) node, estate, eflags); @@ -427,6 +433,10 @@ ExecProcNode(PlanState *node) result = ExecIndexScan((IndexScanState *) node); break; + case T_IndirectIndexScanState: + result = ExecIndirectIndexScan((IndirectIndexScanState *) node); + break; + case T_IndexOnlyScanState: result = ExecIndexOnlyScan((IndexOnlyScanState *) node); break; @@ -677,6 +687,10 @@ ExecEndNode(PlanState *node) ExecEndIndexScan((IndexScanState *) node); break; + case T_IndirectIndexScanState: + ExecEndIndirectIndexScan((IndirectIndexScanState *) node); + break; + case T_IndexOnlyScanState: ExecEndIndexOnlyScan((IndexOnlyScanState *) node); break; diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 4f6f91c..c06ff83 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -122,7 +122,7 @@ IndexOnlyNext(IndexOnlyScanState *node) * Rats, we have to visit the heap to check visibility. */ node->ioss_HeapFetches++; - tuple = index_fetch_heap(scandesc); + tuple = index_fetch_heap(scandesc, NULL); if (tuple == NULL) continue; /* no visible tuple, try next index entry */ diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 3143bd9..df115f8 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -102,7 +102,7 @@ IndexNext(IndexScanState *node) /* * ok, now that we have what we need, fetch the next tuple. */ - while ((tuple = index_getnext(scandesc, direction)) != NULL) + while ((tuple = index_getnext(scandesc, direction, NULL)) != NULL) { /* * Store the scanned tuple in the scan tuple slot of the scan state. @@ -212,7 +212,7 @@ IndexNextWithReorder(IndexScanState *node) * Fetch next tuple from the index. */ next_indextuple: - tuple = index_getnext(scandesc, ForwardScanDirection); + tuple = index_getnext(scandesc, ForwardScanDirection, NULL); if (!tuple) { /* diff --git a/src/backend/executor/nodeIndirectIndexscan.c b/src/backend/executor/nodeIndirectIndexscan.c new file mode 100644 index 0000000..456dcc5 --- /dev/null +++ b/src/backend/executor/nodeIndirectIndexscan.c @@ -0,0 +1,536 @@ +/*------------------------------------------------------------------------- + * + * nodeIndirectIndexscan.c + * Routines to support indirect index scans + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeIndirectIndexscan.c + * + *------------------------------------------------------------------------- + */ +/* + * INTERFACE ROUTINES + * ExecIndirectIndexScan scans an index + * IndirectIndexNext retrieve next tuple + * ExecInitIndirectIndexScan creates and initializes state info. + * ExecReScanIndirectIndexScan rescans the indexed relation. + * ExecEndIndirectIndexScan releases all storage. + * ExecIndirectIndexMarkPos marks scan position. + * ExecIndirectIndexRestrPos restores scan position. + */ +#include "postgres.h" + +#include "access/relscan.h" +#include "access/visibilitymap.h" +#include "executor/execdebug.h" +#include "executor/nodeIndirectIndexscan.h" +#include "executor/nodeIndexscan.h" +#include "storage/bufmgr.h" +#include "storage/predicate.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" + + +/* ---------------------------------------------------------------- + * IndirectIndexNext + * + * Retrieve a tuple from the IndirectIndexScan node's index. + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +IndirectIndexNext(IndirectIndexScanState *node) +{ + EState *estate; + ExprContext *econtext; + ScanDirection direction; + TupleTableSlot *slot; + + /* + * extract necessary information from index scan node + */ + estate = node->ss.ps.state; + direction = estate->es_direction; + /* flip direction if this is an overall backward scan */ + if (ScanDirectionIsBackward(((IndirectIndexScan *) node->ss.ps.plan)->indexorderdir)) + { + if (ScanDirectionIsForward(direction)) + direction = BackwardScanDirection; + else if (ScanDirectionIsBackward(direction)) + direction = ForwardScanDirection; + } + econtext = node->ss.ps.ps_ExprContext; + slot = node->ss.ss_ScanTupleSlot; + + /*---------- + * OK, now that we have what we need, fetch the next tuple. + * + * if we don't have a current indirect tuple: + * obtain tuple from indirect index into slot + * if no tuple is obtained, return "we're done" + * apply rescan on PK, set tuple as scankeys + * obtain tuple from PK + * if no tuple is obtained: + * clear the indirect slot + * start over + * return tuple + *---------- + */ + for (;;) + { + HeapTuple pktuple; + + /* + * If we haven't read a tuple from the indirect index, do so now and + * install it as scan keys into the primary key scan. + */ + if (!node->iiss_HaveIndirectTuple) + { + ScanKeyData pkkeys[INDEX_MAX_KEYS]; + Datum pkvalues[INDEX_MAX_KEYS]; + int nkeys = node->iiss_PKRelationDesc->rd_index->indnatts; + bool isDone; + int i; + + /* + * Obtain one tuple from the indirect index. If there are no more + * tuples in the indirect index, then the scan is done. + */ + index_getnext_pkey(node->iiss_IndScanDesc, direction, pkvalues, &isDone); + if (isDone) + return ExecClearTuple(slot); /* no more tuples */ + + /* + * We now have a tuple, but we don't know if it's worthy of being + * kept around. Initialize flag to have it killed until we know + * better. + */ + node->iiss_HaveIndirectTuple = true; + node->iiss_IndirectTupleIsLive = false; + + /* Install the primary key scan keys, and get it started */ + for (i = 0; i < nkeys; i++) + { + ScanKeyInit(&pkkeys[i], + i + 1, + BTEqualStrategyNumber, + node->iiss_EqualFuncs[i], + pkvalues[i]); + } + + index_rescan(node->iiss_PKScanDesc, pkkeys, nkeys, + NULL, 0); + } + + /* + * By now, we have a valid tuple from the indirect index and need to + * read (more?) tuples from the PK to return. + */ + pktuple = index_getnext(node->iiss_PKScanDesc, ForwardScanDirection, + &node->iiss_IndirectTupleIsLive); + + if (pktuple == NULL) + { + /* + * If no tuples are obtained from the PK for the current tuple, + * start over making sure that the indirect scan will advance. + */ + node->iiss_HaveIndirectTuple = false; + + /* + * If the primary key scan says that all tuples are dead, then the + * indirect index tuple doesn't point to anything live anymore, so + * have it removed. + */ + if (!node->iiss_IndirectTupleIsLive) + node->iiss_IndScanDesc->kill_prior_tuple = true; + + continue; + } + + /* + * Store the scanned tuple in the scan tuple slot of the scan state. + * Note: we pass 'false' because tuples returned by amgetnext are + * pointers onto disk pages and must not be pfree()'d. + */ + ExecStoreTuple(pktuple, slot, node->iiss_PKScanDesc->xs_cbuf, false); + + /* + * Recheck the index quals. For indirect indexes, this is not + * optional. + */ + econtext->ecxt_scantuple = slot; + ResetExprContext(econtext); + if (!ExecQual(node->indexqualorig, econtext, false)) + { + /* + * If the primary key scan says that all tuples are dead, then the + * indirect index tuple doesn't point to anything live anymore, so + * have it removed. + */ + if (!node->iiss_IndirectTupleIsLive) + node->iiss_IndScanDesc->kill_prior_tuple = true; + + continue; + } + + return slot; + } + + return ExecClearTuple(slot); +} + +/* + * IndirectIndexRecheck -- access method routine to recheck a tuple in + * EvalPlanQual + */ +static bool +IndirectIndexRecheck(IndirectIndexScanState *node, TupleTableSlot *slot) +{ + elog(ERROR, "EvalPlanQual recheck is not supported in indirect index scans"); + return false; /* keep compiler quiet */ +} + +TupleTableSlot * +ExecIndirectIndexScan(IndirectIndexScanState *node) +{ + /* + * If we have runtime keys and they've not already been set up, do it now. + */ + if (node->iiss_NumRuntimeKeys != 0 && !node->iiss_RuntimeKeysReady) + ExecReScan((PlanState *) node); + + Assert(node->iiss_NumOrderByKeys == 0); /* FIXME and test */ + return ExecScan(&node->ss, + (ExecScanAccessMtd) IndirectIndexNext, + (ExecScanRecheckMtd) IndirectIndexRecheck); +} + +/* ---------------------------------------------------------------- + * ExecReScanIndirectIndexScan(node) + * + * Recalculates the values of any scan keys whose value depends on + * information known at runtime, then rescans the indexed relation. + * + * Updating the scan key was formerly done separately in + * ExecUpdateIndexScanKeys. Integrating it into ReScan makes + * rescans of indices and relations/general streams more uniform. + * ---------------------------------------------------------------- + */ +void +ExecReScanIndirectIndexScan(IndirectIndexScanState *node) +{ + /* + * If we are doing runtime key calculations (ie, any of the index key + * values weren't simple Consts), compute the new key values. But first, + * reset the context so we don't leak memory as each outer tuple is + * scanned. Note this assumes that we will recalculate *all* runtime keys + * on each call. + */ + if (node->iiss_NumRuntimeKeys != 0) + { + ExprContext *econtext = node->iiss_RuntimeContext; + + ResetExprContext(econtext); + ExecIndexEvalRuntimeKeys(econtext, + node->iiss_RuntimeKeys, + node->iiss_NumRuntimeKeys); + } + node->iiss_RuntimeKeysReady = true; + + /* XXX We don't support reordering of results */ + + /* reset index scan */ + Assert(node->iiss_NumOrderByKeys == 0); /* FIXME and test */ + index_rescan(node->iiss_IndScanDesc, + node->iiss_ScanKeys, node->iiss_NumScanKeys, + node->iiss_OrderByKeys, node->iiss_NumOrderByKeys); + + ExecScanReScan(&node->ss); +} + +/* ---------------------------------------------------------------- + * ExecEndIndirectIndexScan + * ---------------------------------------------------------------- + */ +void +ExecEndIndirectIndexScan(IndirectIndexScanState *node) +{ + Relation indexRelationDesc; + Relation pkRelationDesc; + IndexScanDesc indexScanDesc; + IndexScanDesc pkScanDesc; + Relation relation; + + /* + * extract information from the node + */ + indexRelationDesc = node->iiss_IndRelationDesc; + pkRelationDesc = node->iiss_PKRelationDesc; + indexScanDesc = node->iiss_IndScanDesc; + pkScanDesc = node->iiss_PKScanDesc; + relation = node->ss.ss_currentRelation; + + /* clear out tuple table slots */ + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + ExecClearTuple(node->ss.ss_ScanTupleSlot); + + /* close the index relations (no-op if we didn't open them) */ + if (indexScanDesc) + index_endscan(indexScanDesc); + if (pkScanDesc) + index_endscan(pkScanDesc); + if (indexRelationDesc) + index_close(indexRelationDesc, NoLock); + if (pkRelationDesc) + index_close(pkRelationDesc, NoLock); + + /* close the heap relation */ + ExecCloseScanRelation(relation); +} + +/* ---------------------------------------------------------------- + * ExecIndirectIndexMarkPos + * ---------------------------------------------------------------- + */ +void +ExecIndirectIndexMarkPos(IndirectIndexScanState *node) +{ + index_markpos(node->iiss_IndScanDesc); + node->iiss_HaveIndirectTuple = false; /* necessary? */ +} + +/* ---------------------------------------------------------------- + * ExecIndirectIndexRestrPos + * ---------------------------------------------------------------- + */ +void +ExecIndirectIndexRestrPos(IndirectIndexScanState *node) +{ + index_restrpos(node->iiss_IndScanDesc); + node->iiss_HaveIndirectTuple = false; /* necessary? */ +} + +/* ---------------------------------------------------------------- + * ExecInitIndirectIndexScan + * + * Initializes the index scan's state information, creates + * scan keys, and opens the base and index relations. + * + * Note: indirect index scans have 3 sets of state information + * because we have to keep track of the base relation, the + * indirect index relation itself, and the primary key index + * relation. + * ---------------------------------------------------------------- + */ +IndirectIndexScanState * +ExecInitIndirectIndexScan(IndirectIndexScan *node, EState *estate, int eflags) +{ + IndirectIndexScanState *indexstate; + Relation currentRelation; + bool relistarget; + int i; + + /* + * create state structure + */ + indexstate = makeNode(IndirectIndexScanState); + indexstate->ss.ps.plan = (Plan *) node; + indexstate->ss.ps.state = estate; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &indexstate->ss.ps); + + indexstate->ss.ps.ps_TupFromTlist = false; + + /* + * initialize child expressions + */ + indexstate->ss.ps.targetlist = (List *) + ExecInitExpr((Expr *) node->scan.plan.targetlist, + (PlanState *) indexstate); + indexstate->ss.ps.qual = (List *) + ExecInitExpr((Expr *) node->scan.plan.qual, + (PlanState *) indexstate); + indexstate->indexqualorig = (List *) + ExecInitExpr((Expr *) node->indexqualorig, + (PlanState *) indexstate); + + /* + * tuple table initialization + */ + ExecInitResultTupleSlot(estate, &indexstate->ss.ps); + ExecInitScanTupleSlot(estate, &indexstate->ss); + + /* + * open the base relation and acquire appropriate lock on it. + */ + currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); + + indexstate->ss.ss_currentRelation = currentRelation; + indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */ + + /* + * get the scan type from the relation descriptor. + */ + ExecAssignScanType(&indexstate->ss, RelationGetDescr(currentRelation)); + + /* + * Initialize result tuple type and projection info. + */ + ExecAssignResultTypeFromTL(&indexstate->ss.ps); + ExecAssignScanProjectionInfo(&indexstate->ss); + + /* + * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop + * here. This allows an index-advisor plugin to EXPLAIN a plan containing + * references to nonexistent indexes. + */ + if (eflags & EXEC_FLAG_EXPLAIN_ONLY) + return indexstate; + + /* + * Open the index relations. + * + * If the parent table is one of the target relations of the query, then + * InitPlan already opened and write-locked the indexes, so we can avoid + * taking another lock here. Otherwise we need normal reader's locks. + */ + relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); + indexstate->iiss_IndRelationDesc = + index_open(node->indexid, relistarget ? NoLock : AccessShareLock); + indexstate->iiss_PKRelationDesc = + index_open(node->pkindexid, relistarget ? NoLock : AccessShareLock); + + /* + * Initialize index-specific scan state + */ + indexstate->iiss_HaveIndirectTuple = false; + indexstate->iiss_IndirectTupleIsLive = false; + indexstate->iiss_RuntimeKeysReady = false; + indexstate->iiss_RuntimeKeys = NULL; + indexstate->iiss_NumRuntimeKeys = 0; + + /* + * build the index scan keys from the index qualification + */ + ExecIndexBuildScanKeys((PlanState *) indexstate, + indexstate->iiss_IndRelationDesc, + node->indexqual, + false, + &indexstate->iiss_ScanKeys, + &indexstate->iiss_NumScanKeys, + &indexstate->iiss_RuntimeKeys, + &indexstate->iiss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* + * any ORDER BY exprs have to be turned into scankeys in the same way + */ + Assert(indexstate->iiss_NumOrderByKeys == 0); /* FIXME and test */ + ExecIndexBuildScanKeys((PlanState *) indexstate, + indexstate->iiss_IndRelationDesc, + node->indexorderby, + true, + &indexstate->iiss_OrderByKeys, + &indexstate->iiss_NumOrderByKeys, + &indexstate->iiss_RuntimeKeys, + &indexstate->iiss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* + * For the PK scan, initialize the equality func OIDs of each column's + * datatype. + */ + indexstate->iiss_EqualFuncs = palloc(sizeof(Oid) * + indexstate->iiss_PKRelationDesc->rd_index->indnatts); + for (i = 0; i < indexstate->iiss_PKRelationDesc->rd_index->indnatts; i++) + { + AttrNumber attnum; + Oid opfamily; + Oid datatype; + Oid eq_op; + Oid eq_proc; + + attnum = indexstate->iiss_PKRelationDesc->rd_index->indkey.values[i]; + datatype = currentRelation->rd_att->attrs[attnum - 1]->atttypid; + opfamily = indexstate->iiss_PKRelationDesc->rd_opfamily[i]; + eq_op = get_opfamily_member(opfamily, datatype, datatype, + BTEqualStrategyNumber); + if (!OidIsValid(eq_op)) + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + BTEqualStrategyNumber, datatype, datatype, opfamily); + eq_proc = get_opcode(eq_op); + if (!OidIsValid(eq_proc)) + elog(ERROR, "missing oprcode for operator %u", eq_op); + indexstate->iiss_EqualFuncs[i] = eq_proc; + } + + /* XXX initialize sort support? See nodeIndexscan.c */ + + /* + * If we have runtime keys, we need an ExprContext to evaluate them. The + * node's standard context won't do because we want to reset that context + * for every tuple. So, build another context just like the other one... + * -tgl 7/11/00 + */ + if (indexstate->iiss_NumRuntimeKeys != 0) + { + ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext; + + ExecAssignExprContext(estate, &indexstate->ss.ps); + indexstate->iiss_RuntimeContext = indexstate->ss.ps.ps_ExprContext; + indexstate->ss.ps.ps_ExprContext = stdecontext; + } + else + { + indexstate->iiss_RuntimeContext = NULL; + } + + /* + * Initialize scan descriptors. + * + * The snapshot to the indirect index doesn't matter, because we only use + * it to obtain primary key values; the visibility is only checked + * through the primary key index tuple anyway. + */ + indexstate->iiss_IndScanDesc = + index_beginscan(currentRelation, + indexstate->iiss_IndRelationDesc, + SnapshotAny, + indexstate->iiss_NumScanKeys, + indexstate->iiss_NumOrderByKeys); + + indexstate->iiss_PKScanDesc = + index_beginscan(currentRelation, + indexstate->iiss_PKRelationDesc, + estate->es_snapshot, + indexstate->iiss_PKRelationDesc->rd_index->indnatts, + 0); + + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to the + * index AM. + */ + if (indexstate->iiss_NumRuntimeKeys == 0) + index_rescan(indexstate->iiss_IndScanDesc, + indexstate->iiss_ScanKeys, + indexstate->iiss_NumScanKeys, + indexstate->iiss_OrderByKeys, + indexstate->iiss_NumOrderByKeys); + + /* + * all done. + */ + return indexstate; +} diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 0d85b15..49aa270 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -512,8 +512,10 @@ ExecInsert(ModifyTableState *mtstate, /* insert index entries for tuple */ recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, true, &specConflict, - arbiterIndexes); + estate, false, true, + &specConflict, + arbiterIndexes, + NULL); /* adjust the tuple's state accordingly */ if (!specConflict) @@ -558,8 +560,9 @@ ExecInsert(ModifyTableState *mtstate, /* insert index entries for tuple */ if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, false, NULL, - arbiterIndexes); + estate, false, false, + NULL, arbiterIndexes, + NULL); } } @@ -969,6 +972,7 @@ ExecUpdate(ItemPointer tupleid, else { LockTupleMode lockmode; + Bitmapset *unchangedAttrs = NULL; /* * Constraints might reference the tableoid column, so initialize @@ -1012,7 +1016,9 @@ lreplace:; estate->es_output_cid, estate->es_crosscheck_snapshot, true /* wait for commit */ , - &hufd, &lockmode); + &hufd, + &unchangedAttrs, + &lockmode); switch (result) { case HeapTupleSelfUpdated: @@ -1097,12 +1103,13 @@ lreplace:; * * Note: heap_update returns the tid (location) of the new tuple in * the t_self field. - * - * If it's a HOT update, we mustn't insert new index entries. */ - if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple)) + if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, false, NULL, NIL); + estate, + HeapTupleIsHeapOnly(tuple), + false, NULL, NIL, + unchangedAttrs); } if (canSetTag) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 6955298..30942f6 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -437,6 +437,34 @@ _copyIndexScan(const IndexScan *from) } /* + * _copyIndirectIndexScan + */ +static IndirectIndexScan * +_copyIndirectIndexScan(const IndirectIndexScan *from) +{ + IndirectIndexScan *newnode = makeNode(IndirectIndexScan); + + /* + * copy node superclass fields + */ + CopyScanFields((const Scan *) from, (Scan *) newnode); + + /* + * copy remainder of node + */ + COPY_SCALAR_FIELD(indexid); + COPY_SCALAR_FIELD(pkindexid); + COPY_NODE_FIELD(indexqual); + COPY_NODE_FIELD(indexqualorig); + COPY_NODE_FIELD(indexorderby); + COPY_NODE_FIELD(indexorderbyorig); + COPY_NODE_FIELD(indexorderbyops); + COPY_SCALAR_FIELD(indexorderdir); + + return newnode; +} + +/* * _copyIndexOnlyScan */ static IndexOnlyScan * @@ -3158,6 +3186,7 @@ _copyIndexStmt(const IndexStmt *from) COPY_SCALAR_FIELD(indexOid); COPY_SCALAR_FIELD(oldNode); COPY_SCALAR_FIELD(unique); + COPY_SCALAR_FIELD(isindirect); COPY_SCALAR_FIELD(primary); COPY_SCALAR_FIELD(isconstraint); COPY_SCALAR_FIELD(deferrable); @@ -4430,6 +4459,9 @@ copyObject(const void *from) case T_IndexScan: retval = _copyIndexScan(from); break; + case T_IndirectIndexScan: + retval = _copyIndirectIndexScan(from); + break; case T_IndexOnlyScan: retval = _copyIndexOnlyScan(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 548a2aa..efcd04c 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1273,6 +1273,7 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b) COMPARE_SCALAR_FIELD(indexOid); COMPARE_SCALAR_FIELD(oldNode); COMPARE_SCALAR_FIELD(unique); + COMPARE_SCALAR_FIELD(isindirect); COMPARE_SCALAR_FIELD(primary); COMPARE_SCALAR_FIELD(isconstraint); COMPARE_SCALAR_FIELD(deferrable); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 9fe9873..7c39e13 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -489,6 +489,23 @@ _outIndexScan(StringInfo str, const IndexScan *node) } static void +_outIndirectIndexScan(StringInfo str, const IndirectIndexScan *node) +{ + WRITE_NODE_TYPE("INDIRECTINDEXSCAN"); + + _outScanInfo(str, (const Scan *) node); + + WRITE_OID_FIELD(indexid); + WRITE_OID_FIELD(pkindexid); + WRITE_NODE_FIELD(indexqual); + WRITE_NODE_FIELD(indexqualorig); + WRITE_NODE_FIELD(indexorderby); + WRITE_NODE_FIELD(indexorderbyorig); + WRITE_NODE_FIELD(indexorderbyops); + WRITE_ENUM_FIELD(indexorderdir, ScanDirection); +} + +static void _outIndexOnlyScan(StringInfo str, const IndexOnlyScan *node) { WRITE_NODE_TYPE("INDEXONLYSCAN"); @@ -2451,6 +2468,7 @@ _outIndexStmt(StringInfo str, const IndexStmt *node) WRITE_OID_FIELD(indexOid); WRITE_OID_FIELD(oldNode); WRITE_BOOL_FIELD(unique); + WRITE_BOOL_FIELD(isindirect); WRITE_BOOL_FIELD(primary); WRITE_BOOL_FIELD(isconstraint); WRITE_BOOL_FIELD(deferrable); @@ -3389,6 +3407,9 @@ outNode(StringInfo str, const void *obj) case T_IndexScan: _outIndexScan(str, obj); break; + case T_IndirectIndexScan: + _outIndirectIndexScan(str, obj); + break; case T_IndexOnlyScan: _outIndexOnlyScan(str, obj); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 63f6336..29a3100 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1671,6 +1671,28 @@ _readIndexScan(void) } /* + * _readIndirectIndexScan + */ +static IndirectIndexScan * +_readIndirectIndexScan(void) +{ + READ_LOCALS(IndirectIndexScan); + + ReadCommonScan(&local_node->scan); + + READ_OID_FIELD(indexid); + READ_OID_FIELD(pkindexid); + READ_NODE_FIELD(indexqual); + READ_NODE_FIELD(indexqualorig); + READ_NODE_FIELD(indexorderby); + READ_NODE_FIELD(indexorderbyorig); + READ_NODE_FIELD(indexorderbyops); + READ_ENUM_FIELD(indexorderdir, ScanDirection); + + READ_DONE(); +} + +/* * _readIndexOnlyScan */ static IndexOnlyScan * @@ -2465,6 +2487,8 @@ parseNodeString(void) return_value = _readSampleScan(); else if (MATCH("INDEXSCAN", 9)) return_value = _readIndexScan(); + else if (MATCH("INDIRECTINDEXSCAN", 17)) + return_value = _readIndirectIndexScan(); else if (MATCH("INDEXONLYSCAN", 13)) return_value = _readIndexOnlyScan(); else if (MATCH("BITMAPINDEXSCAN", 15)) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 2952bfb..5ff3ebe 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -776,9 +776,10 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel, * and those we mustn't submit to add_path here.) * * Also, pick out the ones that are usable as bitmap scans. For that, we - * must discard indexes that don't support bitmap scans, and we also are - * only interested in paths that have some selectivity; we should discard - * anything that was generated solely for ordering purposes. + * must discard indexes that don't support bitmap scans as well as + * indirect indexes, and we also are only interested in paths that have + * some selectivity; we should discard anything that was generated solely + * for ordering purposes. */ foreach(lc, indexpaths) { @@ -788,6 +789,7 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel, add_path(rel, (Path *) ipath); if (index->amhasgetbitmap && + !index->indirect && (ipath->path.pathkeys == NIL || ipath->indexselectivity < 1.0)) *bitindexpaths = lappend(*bitindexpaths, ipath); @@ -800,6 +802,7 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel, */ if (skip_nonnative_saop) { + Assert(!index->indirect); /* what to do here? */ indexpaths = build_index_paths(root, rel, index, clauses, false, @@ -1119,6 +1122,10 @@ build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel, if (!index->amhasgetbitmap) continue; + /* Ignore indirect indexes too */ + if (index->indirect) + continue; + /* * Ignore partial indexes that do not match the query. If a partial * index is marked predOK then we know it's OK. Otherwise, we have to @@ -1798,6 +1805,10 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index) if (!enable_indexonlyscan) return false; + /* Can't use indirect indexes for index-only scans */ + if (index->indirect) + return false; + /* * Check that all needed attributes of the relation are available from the * index. diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index ad49674..52ec437 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -166,6 +166,17 @@ static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, List *indexorderby, List *indexorderbyorig, List *indexorderbyops, ScanDirection indexscandir); +static IndirectIndexScan *make_indirectindexscan(List *qptlist, + List *qpqual, + Index scanrelid, + Oid indexid, + Oid pkindexid, + List *indexqual, + List *indexqualorig, + List *indexorderby, + List *indexorderbyorig, + List *indexorderbyops, + ScanDirection indexscandir); static IndexOnlyScan *make_indexonlyscan(List *qptlist, List *qpqual, Index scanrelid, Oid indexid, List *indexqual, List *indexorderby, @@ -2494,6 +2505,8 @@ create_indexscan_plan(PlannerInfo *root, /* Finally ready to build the plan node */ if (indexonly) + { + Assert(!best_path->indexinfo->indirect); /* maybe someday */ scan_plan = (Scan *) make_indexonlyscan(tlist, qpqual, baserelid, @@ -2502,6 +2515,23 @@ create_indexscan_plan(PlannerInfo *root, fixed_indexorderbys, best_path->indexinfo->indextlist, best_path->indexscandir); + } + else if (best_path->indexinfo->indirect) + { + Oid pkindexoid = best_path->indexinfo->pkindex->indexoid; + + scan_plan = (Scan *) make_indirectindexscan(tlist, + qpqual, + baserelid, + indexoid, + pkindexoid, + fixed_indexquals, + stripped_indexquals, + fixed_indexorderbys, + indexorderbys, + indexorderbyops, + best_path->indexscandir); + } else scan_plan = (Scan *) make_indexscan(tlist, qpqual, @@ -4720,6 +4750,39 @@ make_indexscan(List *qptlist, return node; } +static IndirectIndexScan * +make_indirectindexscan(List *qptlist, + List *qpqual, + Index scanrelid, + Oid indexid, + Oid pkindexid, + List *indexqual, + List *indexqualorig, + List *indexorderby, + List *indexorderbyorig, + List *indexorderbyops, + ScanDirection indexscandir) +{ + IndirectIndexScan *node = makeNode(IndirectIndexScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->indexid = indexid; + node->pkindexid = pkindexid; + node->indexqual = indexqual; + node->indexqualorig = indexqualorig; + node->indexorderby = indexorderby; + node->indexorderbyorig = indexorderbyorig; + node->indexorderbyops = indexorderbyops; + node->indexorderdir = indexscandir; + + return node; +} + static IndexOnlyScan * make_indexonlyscan(List *qptlist, List *qpqual, diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 2fe1c8c..8401c14 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -490,6 +490,22 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_scan_list(root, splan->indexorderbyorig, rtoffset); } break; + case T_IndirectIndexScan: + { + IndirectIndexScan *splan = (IndirectIndexScan *) plan; + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, rtoffset); + splan->indexqual = + fix_scan_list(root, splan->indexqual, rtoffset); + splan->indexqualorig = + fix_scan_list(root, splan->indexqualorig, rtoffset); + splan->indexorderby = + fix_scan_list(root, splan->indexorderby, rtoffset); + splan->indexorderbyorig = + fix_scan_list(root, splan->indexorderbyorig, rtoffset); + } + break; case T_IndexOnlyScan: { IndexOnlyScan *splan = (IndexOnlyScan *) plan; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 3171743..aa5190c 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -2327,6 +2327,13 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, context.paramids = bms_add_members(context.paramids, scan_params); break; + case T_IndirectIndexScan: + finalize_primnode((Node *) ((IndirectIndexScan *) plan)->indexqual, + &context); + finalize_primnode((Node *) ((IndirectIndexScan *) plan)->indexorderby, + &context); + break; + case T_IndexOnlyScan: finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexqual, &context); diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 72272d9..fa78e33 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -152,6 +152,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, List *indexoidlist; ListCell *l; LOCKMODE lmode; + IndexOptInfo *pkinfo = NULL; indexoidlist = RelationGetIndexList(relation); @@ -234,6 +235,12 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, } info->relam = indexRelation->rd_rel->relam; + info->indirect = index->indisindirect; + if (info->indirect) + { + Assert(pkinfo != NULL); + info->pkindex = pkinfo; + } /* We copy just the fields we need, not all of rd_amroutine */ amroutine = indexRelation->rd_amroutine; @@ -354,6 +361,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, info->immediate = index->indimmediate; info->hypothetical = false; + /* remember primary key for indirect indexes */ + if (index->indisprimary) + pkinfo = info; + /* * Estimate the index size. If it's not a partial index, we lock * the number-of-tuples estimate to equal the parent table; if it diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 08cf5b7..c39c4a4 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -418,7 +418,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <node> overlay_placing substr_from substr_for %type <boolean> opt_instead -%type <boolean> opt_unique opt_concurrently opt_verbose opt_full +%type <boolean> opt_unique opt_indirect opt_concurrently opt_verbose opt_full %type <boolean> opt_freeze opt_default opt_recheck %type <defelt> opt_binary opt_oids copy_delimiter @@ -615,7 +615,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); HANDLER HAVING HEADER_P HOLD HOUR_P IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P - INCLUDING INCREMENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P + INCLUDING INCREMENT INDEX INDEXES INDIRECT + INHERIT INHERITS INITIALLY INLINE_P INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION @@ -7017,25 +7018,26 @@ defacl_privilege_target: * willing to make TABLESPACE a fully reserved word. *****************************************************************************/ -IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name +IndexStmt: CREATE opt_unique opt_indirect INDEX opt_concurrently opt_index_name ON qualified_name access_method_clause '(' index_params ')' opt_reloptions OptTableSpace where_clause { IndexStmt *n = makeNode(IndexStmt); n->unique = $2; - n->concurrent = $4; - n->idxname = $5; - n->relation = $7; - n->accessMethod = $8; - n->indexParams = $10; - n->options = $12; - n->tableSpace = $13; - n->whereClause = $14; + n->concurrent = $5; + n->idxname = $6; + n->relation = $8; + n->accessMethod = $9; + n->indexParams = $11; + n->options = $13; + n->tableSpace = $14; + n->whereClause = $15; n->excludeOpNames = NIL; n->idxcomment = NULL; n->indexOid = InvalidOid; n->oldNode = InvalidOid; n->primary = false; + n->isindirect = $3; n->isconstraint = false; n->deferrable = false; n->initdeferred = false; @@ -7043,25 +7045,26 @@ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name n->if_not_exists = false; $$ = (Node *)n; } - | CREATE opt_unique INDEX opt_concurrently IF_P NOT EXISTS index_name + | CREATE opt_unique opt_indirect INDEX opt_concurrently IF_P NOT EXISTS index_name ON qualified_name access_method_clause '(' index_params ')' opt_reloptions OptTableSpace where_clause { IndexStmt *n = makeNode(IndexStmt); n->unique = $2; - n->concurrent = $4; - n->idxname = $8; - n->relation = $10; - n->accessMethod = $11; - n->indexParams = $13; - n->options = $15; - n->tableSpace = $16; - n->whereClause = $17; + n->concurrent = $5; + n->idxname = $9; + n->relation = $11; + n->accessMethod = $12; + n->indexParams = $14; + n->options = $16; + n->tableSpace = $17; + n->whereClause = $18; n->excludeOpNames = NIL; n->idxcomment = NULL; n->indexOid = InvalidOid; n->oldNode = InvalidOid; n->primary = false; + n->isindirect = $3; n->isconstraint = false; n->deferrable = false; n->initdeferred = false; @@ -7076,6 +7079,11 @@ opt_unique: | /*EMPTY*/ { $$ = FALSE; } ; +opt_indirect: + INDIRECT { $$ = TRUE; } + | /*EMPTY*/ { $$ = FALSE; } + ; + opt_concurrently: CONCURRENTLY { $$ = TRUE; } | /*EMPTY*/ { $$ = FALSE; } @@ -14107,6 +14115,7 @@ unreserved_keyword: | INCREMENT | INDEX | INDEXES + | INDIRECT | INHERIT | INHERITS | INLINE_P diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 4e2ba19..14b0de7 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1248,7 +1248,8 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, if (!attrsOnly) { if (!isConstraint) - appendStringInfo(&buf, "CREATE %sINDEX %s ON %s USING %s (", + appendStringInfo(&buf, "CREATE %s%sINDEX %s ON %s USING %s (", + idxrec->indisindirect ? "INDIRECT " : "", idxrec->indisunique ? "UNIQUE " : "", quote_identifier(NameStr(idxrelrec->relname)), generate_relation_name(indrelid, NIL), diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 4973396..5df2bd7 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -5029,6 +5029,10 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, if (index->relam != BTREE_AM_OID) continue; + /* Ignore indirect indexes */ + if (index->indirect) + continue; + /* * Ignore partial indexes --- we only want stats that cover the entire * relation. @@ -5149,7 +5153,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, /* Fetch first tuple in sortop's direction */ if ((tup = index_getnext(index_scan, - indexscandir)) != NULL) + indexscandir, NULL)) != NULL) { /* Extract the index column values from the heap tuple */ ExecStoreTuple(tup, slot, InvalidBuffer, false); @@ -5181,7 +5185,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, /* Fetch first tuple in reverse direction */ if ((tup = index_getnext(index_scan, - -indexscandir)) != NULL) + -indexscandir, NULL)) != NULL) { /* Extract the index column values from the heap tuple */ ExecStoreTuple(tup, slot, InvalidBuffer, false); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 2a68359..1e2ff56 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -271,7 +271,7 @@ static TupleDesc GetPgIndexDescriptor(void); static void AttrDefaultFetch(Relation relation); static void CheckConstraintFetch(Relation relation); static int CheckConstraintCmp(const void *a, const void *b); -static List *insert_ordered_oid(List *list, Oid datum); +static List *insert_ordered_oid(List *list, Oid datum, bool must_be_first); static void InitIndexAmRoutine(Relation relation); static void IndexSupportInitialize(oidvector *indclass, RegProcedure *indexSupport, @@ -2335,8 +2335,10 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) list_free_deep(relation->rd_fkeylist); list_free(relation->rd_indexlist); bms_free(relation->rd_indexattr); + bms_free(relation->rd_hotattr); bms_free(relation->rd_keyattr); bms_free(relation->rd_idattr); + bms_free(relation->rd_indirectattr); if (relation->rd_options) pfree(relation->rd_options); if (relation->rd_indextuple) @@ -4288,6 +4290,47 @@ RelationGetFKeyList(Relation relation) } /* + * Return the relation's primary key OID. + * + * Surely this can be made better ... + */ +Oid +RelationGetPrimaryKey(Relation relation) +{ + Relation indrel; + SysScanDesc indscan; + ScanKeyData skey; + HeapTuple htup; + Oid pkid = InvalidOid; + + /* Currently we just scan pg_index every time this is called */ + ScanKeyInit(&skey, + Anum_pg_index_indrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + indrel = heap_open(IndexRelationId, AccessShareLock); + indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true, + NULL, 1, &skey); + while (HeapTupleIsValid(htup = systable_getnext(indscan))) + { + Form_pg_index index = (Form_pg_index) GETSTRUCT(htup); + + if (!IndexIsLive(index)) + continue; + if (!index->indisprimary) + continue; + pkid = index->indexrelid; + break; + } + + systable_endscan(indscan); + heap_close(indrel, AccessShareLock); + + return pkid; +} + +/* * RelationGetIndexList -- get a list of OIDs of indexes on this relation * * The index list is created only if someone requests it. We scan pg_index @@ -4301,7 +4344,8 @@ RelationGetFKeyList(Relation relation) * Such indexes are expected to be dropped momentarily, and should not be * touched at all by any caller of this function. * - * The returned list is guaranteed to be sorted in order by OID. This is + * The returned list is guaranteed to be sorted in order by OID, except that + * the primary key is always in front. This is * needed by the executor, since for index types that we obtain exclusive * locks on when updating the index, all backends must lock the indexes in * the same order or we will get deadlocks (see ExecOpenIndices()). Any @@ -4377,7 +4421,8 @@ RelationGetIndexList(Relation relation) continue; /* Add index's OID to result list in the proper order */ - result = insert_ordered_oid(result, index->indexrelid); + result = insert_ordered_oid(result, index->indexrelid, + index->indisprimary); /* * indclass cannot be referenced directly through the C struct, @@ -4450,12 +4495,12 @@ RelationGetIndexList(Relation relation) * indexes... */ static List * -insert_ordered_oid(List *list, Oid datum) +insert_ordered_oid(List *list, Oid datum, bool must_be_first) { ListCell *prev; /* Does the datum belong at the front? */ - if (list == NIL || datum < linitial_oid(list)) + if (list == NIL || datum < linitial_oid(list) || must_be_first) return lcons_oid(datum, list); /* No, so find the entry it belongs after */ prev = list_head(list); @@ -4492,7 +4537,7 @@ insert_ordered_oid(List *list, Oid datum) * to ensure that a correct rd_indexattr set has been cached before first * calling RelationSetIndexList; else a subsequent inquiry might cause a * wrong rd_indexattr set to get computed and cached. Likewise, we do not - * touch rd_keyattr or rd_idattr. + * touch rd_hotattr, rd_keyattr, rd_indirectattr or rd_idattr. */ void RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex) @@ -4703,9 +4748,7 @@ RelationGetIndexPredicate(Relation relation) * simple index keys, but attributes used in expressions and partial-index * predicates.) * - * Depending on attrKind, a bitmap covering the attnums for all index columns, - * for all potential foreign key columns, or for all columns in the configured - * replica identity index is returned. + * A bitmap covering attnums is returned, depending on attrKind. * * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that * we can include system attributes (e.g., OID) in the bitmap representation. @@ -4721,8 +4764,10 @@ Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) { Bitmapset *indexattrs; /* indexed columns */ + Bitmapset *hotattrs; /* HOT-index-relevant columns */ Bitmapset *uindexattrs; /* columns in unique indexes */ Bitmapset *idindexattrs; /* columns in the replica identity */ + Bitmapset *indirectattrs; /* columns in indirect indexes */ List *indexoidlist; Oid relreplindex; ListCell *l; @@ -4737,8 +4782,12 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) return bms_copy(relation->rd_indexattr); case INDEX_ATTR_BITMAP_KEY: return bms_copy(relation->rd_keyattr); + case INDEX_ATTR_BITMAP_HOT: + return bms_copy(relation->rd_hotattr); case INDEX_ATTR_BITMAP_IDENTITY_KEY: return bms_copy(relation->rd_idattr); + case INDEX_ATTR_BITMAP_INDIRECT_INDEXES: + return bms_copy(relation->rd_indirectattr); default: elog(ERROR, "unknown attrKind %u", attrKind); } @@ -4767,7 +4816,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) relreplindex = relation->rd_replidindex; /* - * For each index, add referenced attributes to indexattrs. + * For each index, add referenced attributes to the attribute bitmaps. * * Note: we consider all indexes returned by RelationGetIndexList, even if * they are not indisready or indisvalid. This is important because an @@ -4777,8 +4826,10 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) * won't be returned at all by RelationGetIndexList. */ indexattrs = NULL; + hotattrs = NULL; uindexattrs = NULL; idindexattrs = NULL; + indirectattrs = NULL; foreach(l, indexoidlist) { Oid indexOid = lfirst_oid(l); @@ -4787,6 +4838,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) int i; bool isKey; /* candidate key */ bool isIDKey; /* replica identity index */ + bool isIndirect; /* an indirect index */ + Bitmapset *exprattrs = NULL; indexDesc = index_open(indexOid, AccessShareLock); @@ -4801,6 +4854,9 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) /* Is this index the configured (or default) replica identity? */ isIDKey = (indexOid == relreplindex); + /* Is this an indirect index? */ + isIndirect = indexInfo->ii_IsIndirect; + /* Collect simple attribute references */ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) { @@ -4809,7 +4865,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) if (attrnum != 0) { indexattrs = bms_add_member(indexattrs, - attrnum - FirstLowInvalidHeapAttributeNumber); + attrnum - FirstLowInvalidHeapAttributeNumber); if (isKey) uindexattrs = bms_add_member(uindexattrs, @@ -4818,14 +4874,31 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) if (isIDKey) idindexattrs = bms_add_member(idindexattrs, attrnum - FirstLowInvalidHeapAttributeNumber); + + if (isIndirect) + indirectattrs = bms_add_member(indirectattrs, + attrnum - FirstLowInvalidHeapAttributeNumber); + else + hotattrs = bms_add_member(hotattrs, + attrnum - FirstLowInvalidHeapAttributeNumber); } } /* Collect all attributes used in expressions, too */ - pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs); + pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &exprattrs); + indexattrs = bms_add_members(indexattrs, exprattrs); + /* these don't affect KEY (unique) indexes */ + hotattrs = bms_add_members(hotattrs, exprattrs); + indirectattrs = bms_add_members(indirectattrs, exprattrs); + bms_free(exprattrs); /* Collect all attributes in the index predicate, too */ - pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs); + exprattrs = NULL; + pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &exprattrs); + indexattrs = bms_add_members(indexattrs, exprattrs); + hotattrs = bms_add_members(hotattrs, exprattrs); + /* these don't affect KEY or indirect indexes */ + bms_free(exprattrs); index_close(indexDesc, AccessShareLock); } @@ -4835,10 +4908,14 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) /* Don't leak the old values of these bitmaps, if any */ bms_free(relation->rd_indexattr); relation->rd_indexattr = NULL; + bms_free(relation->rd_hotattr); + relation->rd_hotattr = NULL; bms_free(relation->rd_keyattr); relation->rd_keyattr = NULL; bms_free(relation->rd_idattr); relation->rd_idattr = NULL; + bms_free(relation->rd_indirectattr); + relation->rd_indirectattr = NULL; /* * Now save copies of the bitmaps in the relcache entry. We intentionally @@ -4851,6 +4928,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) relation->rd_keyattr = bms_copy(uindexattrs); relation->rd_idattr = bms_copy(idindexattrs); relation->rd_indexattr = bms_copy(indexattrs); + relation->rd_hotattr = bms_copy(hotattrs); + relation->rd_indirectattr = bms_copy(indirectattrs); MemoryContextSwitchTo(oldcxt); /* We return our original working copy for caller to play with */ @@ -4858,10 +4937,14 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) { case INDEX_ATTR_BITMAP_ALL: return indexattrs; + case INDEX_ATTR_BITMAP_HOT: + return hotattrs; case INDEX_ATTR_BITMAP_KEY: return uindexattrs; case INDEX_ATTR_BITMAP_IDENTITY_KEY: return idindexattrs; + case INDEX_ATTR_BITMAP_INDIRECT_INDEXES: + return indirectattrs; default: elog(ERROR, "unknown attrKind %u", attrKind); return NULL; @@ -5409,8 +5492,10 @@ load_relcache_init_file(bool shared) rel->rd_oidindex = InvalidOid; rel->rd_replidindex = InvalidOid; rel->rd_indexattr = NULL; + rel->rd_hotattr = NULL; rel->rd_keyattr = NULL; rel->rd_idattr = NULL; + rel->rd_indirectattr = NULL; rel->rd_createSubid = InvalidSubTransactionId; rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; rel->rd_amcache = NULL; diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 1036cca..9f28c7d 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -175,6 +175,8 @@ typedef struct IndexAmRoutine bool amclusterable; /* does AM handle predicate locks? */ bool ampredlocks; + /* does AM support indirect indexes? */ + bool amcanindirect; /* type of data stored in index, or InvalidOid if variable */ Oid amkeytype; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 81907d5..ff240f3 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -102,13 +102,17 @@ typedef struct SysScanDescData *SysScanDesc; * call is made with UNIQUE_CHECK_EXISTING. The tuple is already in the * index in this case, so it should not be inserted again. Rather, just * check for conflicting live tuples (possibly blocking). + * + * UNIQUE_CHECK_INSERT_SINGLETON only inserts if there isn't already an + * index tuple. This is supported for indirect indexes. */ typedef enum IndexUniqueCheck { UNIQUE_CHECK_NO, /* Don't do any uniqueness checking */ UNIQUE_CHECK_YES, /* Enforce uniqueness at insertion time */ UNIQUE_CHECK_PARTIAL, /* Test uniqueness, but no error */ - UNIQUE_CHECK_EXISTING /* Check if existing tuple is unique */ + UNIQUE_CHECK_EXISTING, /* Check if existing tuple is unique */ + UNIQUE_CHECK_INSERT_SINGLETON /* Only insert if value doesn't exist */ } IndexUniqueCheck; @@ -127,6 +131,7 @@ extern void index_close(Relation relation, LOCKMODE lockmode); extern bool index_insert(Relation indexRelation, Datum *values, bool *isnull, + Datum *pkeyValues, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique); @@ -146,8 +151,12 @@ extern void index_markpos(IndexScanDesc scan); extern void index_restrpos(IndexScanDesc scan); extern ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction); -extern HeapTuple index_fetch_heap(IndexScanDesc scan); -extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction); +extern void index_getnext_pkey(IndexScanDesc scan, ScanDirection direction, + Datum *pkvals, bool *isDone); +extern HeapTuple index_fetch_heap(IndexScanDesc scan, bool *anyfound); + +extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction, + bool *anyfound); extern int64 index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap); extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info, diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 0d12bbb..99af368 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -16,6 +16,7 @@ #include "access/sdir.h" #include "access/skey.h" +#include "nodes/bitmapset.h" #include "nodes/lockoptions.h" #include "nodes/primnodes.h" #include "storage/bufpage.h" @@ -160,7 +161,8 @@ extern void heap_abort_speculative(Relation relation, HeapTuple tuple); extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, - HeapUpdateFailureData *hufd, LockTupleMode *lockmode); + HeapUpdateFailureData *hufd, Bitmapset **unchanged_attrs, + LockTupleMode *lockmode); extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_update, diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h index 8fb1f6d..9b4a886 100644 --- a/src/include/access/htup_details.h +++ b/src/include/access/htup_details.h @@ -501,7 +501,7 @@ do { \ #define HeapTupleHeaderIsHeapOnly(tup) \ ( \ - (tup)->t_infomask2 & HEAP_ONLY_TUPLE \ + ((tup)->t_infomask2 & HEAP_ONLY_TUPLE) != 0 \ ) #define HeapTupleHeaderSetHeapOnly(tup) \ diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 37e6ef3..0a70e08 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -37,6 +37,21 @@ typedef enum INDEX_DROP_SET_DEAD } IndexStateFlagsAction; +static inline void +FAKE_CTID_FROM_PKVALUES(ItemPointer iptr, int16 pkNumKeys, Datum *pkvalues) +{ + /* We should support more than one column in the PK */ + Assert(pkNumKeys == 1); + + /* + * Because some overzealous checks, we can't have all-zeroes offnum + * nor blknum, so we always set their lowest-order bit to 1; make sure + * to ignore that bit when reading back the value from the TID. + */ + iptr->ip_posid = ((pkvalues[0] & 0x7fff) << 1) | 1; + iptr->ip_blkid.bi_hi = (pkvalues[0] >> 15) & 0xffff; + iptr->ip_blkid.bi_lo = ((pkvalues[0] >> 31) & 0xffff) | 1; +} extern void index_check_primary_key(Relation heapRel, IndexInfo *indexInfo, diff --git a/src/include/catalog/pg_index.h b/src/include/catalog/pg_index.h index ee97c5d..144d204 100644 --- a/src/include/catalog/pg_index.h +++ b/src/include/catalog/pg_index.h @@ -34,6 +34,7 @@ CATALOG(pg_index,2610) BKI_WITHOUT_OIDS BKI_SCHEMA_MACRO Oid indrelid; /* OID of the relation it indexes */ int16 indnatts; /* number of columns in index */ bool indisunique; /* is this a unique index? */ + bool indisindirect; /* is this an indirect index? */ bool indisprimary; /* is this index for primary key? */ bool indisexclusion; /* is this index for exclusion constraint? */ bool indimmediate; /* is uniqueness enforced immediately? */ @@ -70,26 +71,27 @@ typedef FormData_pg_index *Form_pg_index; * compiler constants for pg_index * ---------------- */ -#define Natts_pg_index 19 +#define Natts_pg_index 20 #define Anum_pg_index_indexrelid 1 #define Anum_pg_index_indrelid 2 #define Anum_pg_index_indnatts 3 #define Anum_pg_index_indisunique 4 -#define Anum_pg_index_indisprimary 5 -#define Anum_pg_index_indisexclusion 6 -#define Anum_pg_index_indimmediate 7 -#define Anum_pg_index_indisclustered 8 -#define Anum_pg_index_indisvalid 9 -#define Anum_pg_index_indcheckxmin 10 -#define Anum_pg_index_indisready 11 -#define Anum_pg_index_indislive 12 -#define Anum_pg_index_indisreplident 13 -#define Anum_pg_index_indkey 14 -#define Anum_pg_index_indcollation 15 -#define Anum_pg_index_indclass 16 -#define Anum_pg_index_indoption 17 -#define Anum_pg_index_indexprs 18 -#define Anum_pg_index_indpred 19 +#define Anum_pg_index_indisindirect 5 +#define Anum_pg_index_indisprimary 6 +#define Anum_pg_index_indisexclusion 7 +#define Anum_pg_index_indimmediate 8 +#define Anum_pg_index_indisclustered 9 +#define Anum_pg_index_indisvalid 10 +#define Anum_pg_index_indcheckxmin 11 +#define Anum_pg_index_indisready 12 +#define Anum_pg_index_indislive 13 +#define Anum_pg_index_indisreplident 14 +#define Anum_pg_index_indkey 15 +#define Anum_pg_index_indcollation 16 +#define Anum_pg_index_indclass 17 +#define Anum_pg_index_indoption 18 +#define Anum_pg_index_indexprs 19 +#define Anum_pg_index_indpred 20 /* * Index AMs that support ordered scans must support these two indoption diff --git a/src/include/executor/execdebug.h b/src/include/executor/execdebug.h index 950a0bc..e662a69 100644 --- a/src/include/executor/execdebug.h +++ b/src/include/executor/execdebug.h @@ -17,6 +17,7 @@ #ifndef EXECDEBUG_H #define EXECDEBUG_H +#include "access/printtup.h" #include "executor/executor.h" #include "nodes/print.h" diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index b74fa5e..14a2740 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -377,8 +377,9 @@ extern void UnregisterExprContextCallback(ExprContext *econtext, extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative); extern void ExecCloseIndices(ResultRelInfo *resultRelInfo); extern List *ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, - EState *estate, bool noDupErr, bool *specConflict, - List *arbiterIndexes); + EState *estate, bool isHOTupdate, bool noDupErr, + bool *specConflict, List *arbiterIndexes, + Bitmapset *unchangedAttrs); extern bool ExecCheckIndexConstraints(TupleTableSlot *slot, EState *estate, ItemPointer conflictTid, List *arbiterIndexes); extern void check_exclusion_constraint(Relation heap, Relation index, diff --git a/src/include/executor/nodeIndirectIndexscan.h b/src/include/executor/nodeIndirectIndexscan.h new file mode 100644 index 0000000..46eea32 --- /dev/null +++ b/src/include/executor/nodeIndirectIndexscan.h @@ -0,0 +1,26 @@ +/*------------------------------------------------------------------------- + * + * nodeIndirectIndexscan.h + * + * + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/executor/nodeIndirectIndexscan.h + * + *------------------------------------------------------------------------- + */ +#ifndef NODEINDIRECTINDEXSCAN_H +#define NODEINDIRECTINDEXSCAN_H + +#include "nodes/execnodes.h" + +extern IndirectIndexScanState *ExecInitIndirectIndexScan(IndirectIndexScan *node, EState *estate, int eflags); +extern TupleTableSlot *ExecIndirectIndexScan(IndirectIndexScanState *node); +extern void ExecEndIndirectIndexScan(IndirectIndexScanState *node); +extern void ExecIndirectIndexMarkPos(IndirectIndexScanState *node); +extern void ExecIndirectIndexRestrPos(IndirectIndexScanState *node); +extern void ExecReScanIndirectIndexScan(IndirectIndexScanState *node); + +#endif /* NODEINDIRECTINDEXSCAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index d43ec56..09cc997 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -49,6 +49,7 @@ * UniqueProcs * UniqueStrats * Unique is it a unique index? + * IsIndirect is it an indirect index? * ReadyForInserts is it valid for inserts? * Concurrent are we doing a concurrent index build? * BrokenHotChain did we detect any broken HOT chains? @@ -73,6 +74,7 @@ typedef struct IndexInfo Oid *ii_UniqueProcs; /* array with one entry per column */ uint16 *ii_UniqueStrats; /* array with one entry per column */ bool ii_Unique; + bool ii_IsIndirect; bool ii_ReadyForInserts; bool ii_Concurrent; bool ii_BrokenHotChain; @@ -307,6 +309,7 @@ typedef struct JunkFilter * NumIndices # of indices existing on result relation * IndexRelationDescs array of relation descriptors for indices * IndexRelationInfo array of key/attr info for indices + * PrimaryKeyIndex array index for the primary key * TrigDesc triggers to be fired, if any * TrigFunctions cached lookup info for trigger functions * TrigWhenExprs array of trigger WHEN expr states @@ -314,6 +317,7 @@ typedef struct JunkFilter * FdwRoutine FDW callback functions, if foreign table * FdwState available to save private state of FDW * usesFdwDirectModify true when modifying foreign table directly + * hasIndirectIndexes true when there are any indirect indexes * WithCheckOptions list of WithCheckOption's to be checked * WithCheckOptionExprs list of WithCheckOption expr states * ConstraintExprs array of constraint-checking expr states @@ -333,6 +337,7 @@ typedef struct ResultRelInfo int ri_NumIndices; RelationPtr ri_IndexRelationDescs; IndexInfo **ri_IndexRelationInfo; + Index ri_PrimaryKeyIndex; TriggerDesc *ri_TrigDesc; FmgrInfo *ri_TrigFunctions; List **ri_TrigWhenExprs; @@ -340,6 +345,7 @@ typedef struct ResultRelInfo struct FdwRoutine *ri_FdwRoutine; void *ri_FdwState; bool ri_usesFdwDirectModify; + bool ri_hasIndirectIndexes; List *ri_WithCheckOptions; List *ri_WithCheckOptionExprs; List **ri_ConstraintExprs; @@ -1382,6 +1388,47 @@ typedef struct IndexScanState } IndexScanState; /* ---------------- + * IndirectIndexScanState information + * + * indexqualorig execution state for indexqualorig expressions + * ScanKeys Skey structures for index quals + * NumScanKeys number of ScanKeys + * IndRelationDesc indirect index relation descriptor + * IndScanDesc indirect index scan descriptor + * PKRelationDesc primary key index relation descriptor + * PKScanDesc primary key index scan descriptor + * HaveIndirectTuple a tuple has been read from the index + * IndirectTupleIsLive has the read tuple been proved live? + * OrderByKeys Skey structures for index ordering operators + * NumOrderByKeys number of OrderByKeys + * RuntimeKeys info about Skeys that must be evaluated at runtime + * NumRuntimeKeys number of RuntimeKeys + * RuntimeKeysReady true if runtime Skeys have been computed + * RuntimeContext expr context for evaling runtime Skeys + * ---------------- + */ +typedef struct IndirectIndexScanState +{ + ScanState ss; /* its first field is NodeTag */ + List *indexqualorig; + ScanKey iiss_ScanKeys; + int iiss_NumScanKeys; + Relation iiss_IndRelationDesc; + IndexScanDesc iiss_IndScanDesc; + Relation iiss_PKRelationDesc; + IndexScanDesc iiss_PKScanDesc; + bool iiss_HaveIndirectTuple; + bool iiss_IndirectTupleIsLive; + ScanKey iiss_OrderByKeys; + int iiss_NumOrderByKeys; + IndexRuntimeKeyInfo *iiss_RuntimeKeys; + int iiss_NumRuntimeKeys; + bool iiss_RuntimeKeysReady; + Oid *iiss_EqualFuncs; + ExprContext *iiss_RuntimeContext; +} IndirectIndexScanState; + +/* ---------------- * IndexOnlyScanState information * * indexqual execution state for indexqual expressions diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 201f248..46c2ff0 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -53,6 +53,7 @@ typedef enum NodeTag T_SeqScan, T_SampleScan, T_IndexScan, + T_IndirectIndexScan, T_IndexOnlyScan, T_BitmapIndexScan, T_BitmapHeapScan, @@ -101,6 +102,7 @@ typedef enum NodeTag T_SeqScanState, T_SampleScanState, T_IndexScanState, + T_IndirectIndexScanState, T_IndexOnlyScanState, T_BitmapIndexScanState, T_BitmapHeapScanState, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 9d8ef77..d4e676d 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2558,6 +2558,7 @@ typedef struct IndexStmt Oid indexOid; /* OID of an existing index, if any */ Oid oldNode; /* relfilenode of existing storage, if any */ bool unique; /* is index unique? */ + bool isindirect; /* is index indirect? */ bool primary; /* is index a primary key? */ bool isconstraint; /* is it for a pkey/unique constraint? */ bool deferrable; /* is the constraint DEFERRABLE? */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index e2fbc7d..c1e6a95 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -353,6 +353,27 @@ typedef struct IndexScan } IndexScan; /* ---------------- + * indirect index scan node + * + * IndirectIndexScan is very similar to IndexScan, but it specifies a + * scan of an indirect index. In addition to the fields in IndexScan, + * it has the OID of the primary key that this index references. + * ---------------- + */ +typedef struct IndirectIndexScan +{ + Scan scan; + Oid indexid; /* OID of indirect index */ + Oid pkindexid; /* OID of the primary key it references */ + List *indexqual; /* list of index quals */ + List *indexqualorig; /* the same in original form */ + List *indexorderby; /* list of index ORDER BY exprs */ + List *indexorderbyorig; /* the same in original form */ + List *indexorderbyops; /* OIDs of sort ops for ORDER BY exprs */ + ScanDirection indexorderdir; /* forward or backward or don't care */ +} IndirectIndexScan; + +/* ---------------- * index-only scan node * * IndexOnlyScan is very similar to IndexScan, but it specifies an diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 3a1255a..82cf1d6 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -581,6 +581,7 @@ typedef struct IndexOptInfo Oid indexoid; /* OID of the index relation */ Oid reltablespace; /* tablespace of index (not table) */ RelOptInfo *rel; /* back-link to index's table */ + struct IndexOptInfo *pkindex; /* link to referenced index, if indirect */ /* index-size statistics (from pg_class and elsewhere) */ BlockNumber pages; /* number of disk pages in index */ @@ -613,6 +614,7 @@ typedef struct IndexOptInfo bool predOK; /* true if index predicate matches query */ bool unique; /* true if a unique index */ bool immediate; /* is uniqueness enforced immediately? */ + bool indirect; /* true if index is indirect */ bool hypothetical; /* true if index doesn't really exist */ /* Remaining fields are copied from the index AM's API struct: */ diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 581ff6e..dbcc892 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -196,6 +196,7 @@ PG_KEYWORD("including", INCLUDING, UNRESERVED_KEYWORD) PG_KEYWORD("increment", INCREMENT, UNRESERVED_KEYWORD) PG_KEYWORD("index", INDEX, UNRESERVED_KEYWORD) PG_KEYWORD("indexes", INDEXES, UNRESERVED_KEYWORD) +PG_KEYWORD("indirect", INDIRECT, UNRESERVED_KEYWORD) PG_KEYWORD("inherit", INHERIT, UNRESERVED_KEYWORD) PG_KEYWORD("inherits", INHERITS, UNRESERVED_KEYWORD) PG_KEYWORD("initially", INITIALLY, RESERVED_KEYWORD) diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index cd7ea1d..b046f12 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -135,9 +135,11 @@ typedef struct RelationData Oid rd_replidindex; /* OID of replica identity index, if any */ /* data managed by RelationGetIndexAttrBitmap: */ - Bitmapset *rd_indexattr; /* identifies columns used in indexes */ + Bitmapset *rd_indexattr; /* identifies columns used in any index */ + Bitmapset *rd_hotattr; /* columns used in HOT-relevant indexes */ Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */ Bitmapset *rd_idattr; /* included in replica identity index */ + Bitmapset *rd_indirectattr; /* cols part of any indirect index */ /* * rd_options is set whenever rd_rel is loaded into the relcache entry. diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 6ea7dd2..0e5df4c 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -39,6 +39,7 @@ extern void RelationClose(Relation relation); */ extern List *RelationGetFKeyList(Relation relation); extern List *RelationGetIndexList(Relation relation); +extern Oid RelationGetPrimaryKey(Relation relation); extern Oid RelationGetOidIndex(Relation relation); extern Oid RelationGetReplicaIndex(Relation relation); extern List *RelationGetIndexExpressions(Relation relation); @@ -48,7 +49,9 @@ typedef enum IndexAttrBitmapKind { INDEX_ATTR_BITMAP_ALL, INDEX_ATTR_BITMAP_KEY, - INDEX_ATTR_BITMAP_IDENTITY_KEY + INDEX_ATTR_BITMAP_HOT, + INDEX_ATTR_BITMAP_IDENTITY_KEY, + INDEX_ATTR_BITMAP_INDIRECT_INDEXES } IndexAttrBitmapKind; extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation,
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers