I mostly agree with your patch, nice work, but I have some notices for your
patch:
1)
bt_target_page_check():
if (!P_RIGHTMOST(topaque) &&
!_bt_check_natts(state->rel, state->target, P_HIKEY))
Seems not very obvious: it looks like we don't need to check nattrs on rightmost
page. Okay, I remember that on rightmost page there is no hikey at all, but at
least comment should added. Implicitly bt_target_page_check() already takes into
account 'is page rightmost or not?' by using P_FIRSTDATAKEY, so, may be better
to move rightmost check into bt_target_page_check() with some refactoring if-logic:
if (offset > maxoff)
return true; //nothing to check, also covers empty rightmost page
if (P_ISLEAF) {
if (offnum >= P_FIRSTDATAKEY)
...
else /* if (offnum == P_HIKEY) */
...
}
else // !P_ISLEAF
{
if (offnum == P_FIRSTDATAKEY)
...
else if (offnum > P_FIRSTDATAKEY)
...
else /* if (offnum == P_HIKEY) */
...
}
I see it's possible only 3 nattrs value: 0, nkey and nkey+nincluded, but
collapsing if-clause to three branches causes difficulties for code readers. Let
compiler optimize that. Sorry for late notice, but it takes my attention only
when I noticed (!P_RIGHTMOST && !_bt_check_natts) condition.
2)
Style notice:
ItemPointerSetInvalid(&trunctuple.t_tid);
+ BTreeTupSetNAtts(&trunctuple, 0);
if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
It's better to have blank line between BTreeTupSetNAtts() and if clause.
3) Naming BTreeTupGetNAtts/BTreeTupSetNAtts - several lines above we use full
Tuple word in dowlink macroses, here we use just Tup. Seems, better to have
Tuple in both cases. Or Tup, but still in both cases.
4) BTreeTupSetNAtts - seems, it's better to add check of nattrs to fits to
BT_N_KEYS_OFFSET_MASK mask, and it should not touch BT_RESERVED_OFFSET_MASK
bits, now it will overwrite that bits.
Attached patch is rebased to current head and contains some comment improvement
in index_truncate_tuple() - you save some amount of memory with TupleDescCopy()
call but didn't explain why pfree is enough to free all allocated memory.
Peter Geoghegan wrote:
On Tue, Apr 17, 2018 at 3:12 AM, Alexander Korotkov
<a.korot...@postgrespro.ru> wrote:
Hmm, what do you think about making BTreeTupGetNAtts() take tupledesc
argument, not relation> It anyway doesn't need number of key attributes,
only total number of attributes. Then _bt_isequal() would be able to use
BTreeTupGetNAtts().
That would make the BTreeTupGetNAtts() assertions quite a bit more
verbose, since there is usually no existing tuple descriptor variable,
but there is almost always a "rel" variable. The coverage within
_bt_isequal() does not seem important, because we only use it with the
page high key in rare cases, where _bt_moveright() will already have
tested the highkey.
I think it's completely OK to fix broken things when you've to touch
them. Probably, Teodor would decide to make that by separate commit.
So, it's up to him.
You're right to say that this old negative infinity tuple code within
_bt_insertonpg() is broken code, and not just dead code. The code
doesn't just confuse things (e.g. see recent commit 2a67d644). It also
seems like it could actually be harmful. This is code that could only
ever corrupt your database.
I'm fine if Teodor wants to commit that change separately, of course.
--
Teodor Sigaev E-mail: teo...@sigaev.ru
WWW: http://www.sigaev.ru/
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index be0206d58e..7efd7ac47b 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -698,6 +698,9 @@ nextpage:
* "real" data item on the page to the right (if such a first item is
* available).
*
+ * - That tuples report that they have the expected number of attributes.
+ * INCLUDE index pivot tuples should not contain non-key attributes.
+ *
* Furthermore, when state passed shows ShareLock held, and target page is
* internal page, function also checks:
*
@@ -722,43 +725,32 @@ bt_target_page_check(BtreeCheckState *state)
elog(DEBUG2, "verifying %u items on %s block %u", max,
P_ISLEAF(topaque) ? "leaf" : "internal", state->targetblock);
-
- /* Check the number of attributes in high key if any */
- if (!P_RIGHTMOST(topaque))
+ /* Check the number of attributes in high key */
+ if (!P_RIGHTMOST(topaque) &&
+ !_bt_check_natts(state->rel, state->target, P_HIKEY))
{
- if (!_bt_check_natts(state->rel, state->target, P_HIKEY))
- {
- ItemId itemid;
- IndexTuple itup;
- char *itid,
- *htid;
+ ItemId itemid;
+ IndexTuple itup;
- itemid = PageGetItemId(state->target, P_HIKEY);
- itup = (IndexTuple) PageGetItem(state->target, itemid);
- itid = psprintf("(%u,%u)", state->targetblock, P_HIKEY);
- htid = psprintf("(%u,%u)",
- ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
- ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
+ itemid = PageGetItemId(state->target, P_HIKEY);
+ itup = (IndexTuple) PageGetItem(state->target, itemid);
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("wrong number of index tuple attributes for index \"%s\"",
- RelationGetRelationName(state->rel)),
- errdetail_internal("Index tid=%s natts=%u points to %s tid=%s page lsn=%X/%X.",
- itid,
- BTreeTupGetNAtts(itup, state->rel),
- P_ISLEAF(topaque) ? "heap" : "index",
- htid,
- (uint32) (state->targetlsn >> 32),
- (uint32) state->targetlsn)));
- }
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("wrong number of high key index tuple attributes in index \"%s\"",
+ RelationGetRelationName(state->rel)),
+ errdetail_internal("Index block=%u natts=%u block type=%s page lsn=%X/%X.",
+ state->targetblock,
+ BTreeTupGetNAtts(itup, state->rel),
+ P_ISLEAF(topaque) ? "heap" : "index",
+ (uint32) (state->targetlsn >> 32),
+ (uint32) state->targetlsn)));
}
-
/*
* Loop over page items, starting from first non-highkey item, not high
- * key (if any). Also, immediately skip "negative infinity" real item (if
- * any).
+ * key (if any). Most tests are not performed for the "negative infinity"
+ * real item (if any).
*/
for (offset = P_FIRSTDATAKEY(topaque);
offset <= max;
@@ -791,7 +783,7 @@ bt_target_page_check(BtreeCheckState *state)
tupsize, ItemIdGetLength(itemid),
(uint32) (state->targetlsn >> 32),
(uint32) state->targetlsn),
- errhint("This could be a torn page problem")));
+ errhint("This could be a torn page problem.")));
/* Check the number of index tuple attributes */
if (!_bt_check_natts(state->rel, state->target, offset))
@@ -806,7 +798,7 @@ bt_target_page_check(BtreeCheckState *state)
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("wrong number of index tuple attributes for index \"%s\"",
+ errmsg("wrong number of index tuple attributes in index \"%s\"",
RelationGetRelationName(state->rel)),
errdetail_internal("Index tid=%s natts=%u points to %s tid=%s page lsn=%X/%X.",
itid,
@@ -818,8 +810,8 @@ bt_target_page_check(BtreeCheckState *state)
}
/*
- * Don't try to generate scankey using "negative infinity" garbage
- * data on internal pages
+ * Don't try to generate scankey using "negative infinity" item on
+ * internal pages. They are always truncated to zero attributes.
*/
if (offset_is_negative_infinity(topaque, offset))
continue;
@@ -1430,9 +1422,9 @@ offset_is_negative_infinity(BTPageOpaque opaque, OffsetNumber offset)
* infinity item is either first or second line item, or there is none
* within page.
*
- * "Negative infinity" tuple is a special corner case of pivot tuples,
- * it has zero attributes while rest of pivot tuples have nkeyatts number
- * of attributes.
+ * Negative infinity items are a special case among pivot tuples. They
+ * always have zero attributes, while all other pivot tuples always have
+ * nkeyatts attributes.
*
* Right-most pages don't have a high key, but could be said to
* conceptually have a "positive infinity" high key. Thus, there is a
diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c
index 9b3e0a2e6e..2ae18fdb10 100644
--- a/src/backend/access/common/indextuple.c
+++ b/src/backend/access/common/indextuple.c
@@ -19,7 +19,6 @@
#include "access/heapam.h"
#include "access/itup.h"
#include "access/tuptoaster.h"
-#include "utils/rel.h"
/* ----------------------------------------------------------------
@@ -32,6 +31,9 @@
*
* This shouldn't leak any memory; otherwise, callers such as
* tuplesort_putindextuplevalues() will be very unhappy.
+ *
+ * This shouldn't perform external table access provided caller
+ * does not pass values that are stored EXTERNAL.
* ----------------
*/
IndexTuple
@@ -448,30 +450,49 @@ CopyIndexTuple(IndexTuple source)
}
/*
- * Truncate tailing attributes from given index tuple leaving it with
- * new_indnatts number of attributes.
+ * Create a palloc'd copy of an index tuple, leaving only the first
+ * leavenatts attributes remaining.
+ *
+ * Truncation is guaranteed to result in an index tuple that is no
+ * larger than the original. It is safe to use the IndexTuple with
+ * the original tuple descriptor, but caller must avoid actually
+ * accessing truncated attributes from returned tuple! In practice
+ * this means that index_getattr() must be called with special care,
+ * and that the truncated tuple should only ever be accessed by code
+ * under caller's direct control.
+ *
+ * It's safe to call this function with a buffer lock held, since it
+ * never performs external table access. If it ever became possible
+ * for index tuples to contain EXTERNAL TOAST values, then this would
+ * have to be revisited.
*/
IndexTuple
-index_truncate_tuple(TupleDesc tupleDescriptor, IndexTuple olditup,
- int new_indnatts)
+index_truncate_tuple(TupleDesc sourceDescriptor, IndexTuple source,
+ int leavenatts)
{
- TupleDesc itupdesc = CreateTupleDescCopyConstr(tupleDescriptor);
+ TupleDesc truncdesc;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
- IndexTuple newitup;
+ IndexTuple truncated;
- Assert(tupleDescriptor->natts <= INDEX_MAX_KEYS);
- Assert(new_indnatts > 0);
- Assert(new_indnatts < tupleDescriptor->natts);
+ Assert(leavenatts < sourceDescriptor->natts);
- index_deform_tuple(olditup, tupleDescriptor, values, isnull);
+ /* Create temporary descriptor to scribble on */
+ truncdesc = palloc(TupleDescSize(sourceDescriptor));
+ TupleDescCopy(truncdesc, sourceDescriptor);
+ truncdesc->natts = leavenatts;
- /* form new tuple that will contain only key attributes */
- itupdesc->natts = new_indnatts;
- newitup = index_form_tuple(itupdesc, values, isnull);
- newitup->t_tid = olditup->t_tid;
+ /* Deform, form copy of tuple with fewer attributes */
+ index_deform_tuple(source, truncdesc, values, isnull);
+ truncated = index_form_tuple(truncdesc, values, isnull);
+ truncated->t_tid = source->t_tid;
+ Assert(IndexTupleSize(truncated) <= IndexTupleSize(source));
+
+ /*
+ *Cannot leak memory here, TupleDescCopy() doesn't allocate any
+ * inner structure, so, plain pfree() should clean all allocated memory
+ */
+ pfree(truncdesc);
- FreeTupleDesc(itupdesc);
- Assert(IndexTupleSize(newitup) <= IndexTupleSize(olditup));
- return newitup;
+ return truncated;
}
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index 10509cfe8a..a5409a9c49 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -84,7 +84,7 @@ static void _bt_checksplitloc(FindSplitData *state,
int dataitemstoleft, Size firstoldonrightsz);
static bool _bt_pgaddtup(Page page, Size itemsize, IndexTuple itup,
OffsetNumber itup_off);
-static bool _bt_isequal(Relation idxrel, Page page, OffsetNumber offnum,
+static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
int keysz, ScanKey scankey);
static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel);
@@ -343,6 +343,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
IndexUniqueCheck checkUnique, bool *is_unique,
uint32 *speculativeToken)
{
+ TupleDesc itupdesc = RelationGetDescr(rel);
int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
SnapshotData SnapshotDirty;
OffsetNumber maxoff;
@@ -402,7 +403,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
* in real comparison, but only for ordering/finding items on
* pages. - vadim 03/24/97
*/
- if (!_bt_isequal(rel, page, offset, indnkeyatts, itup_scankey))
+ if (!_bt_isequal(itupdesc, page, offset, indnkeyatts, itup_scankey))
break; /* we're past all the equal tuples */
/* okay, we gotta fetch the heap tuple ... */
@@ -566,7 +567,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
/* If scankey == hikey we gotta check the next page too */
if (P_RIGHTMOST(opaque))
break;
- if (!_bt_isequal(rel, page, P_HIKEY,
+ if (!_bt_isequal(itupdesc, page, P_HIKEY,
indnkeyatts, itup_scankey))
break;
/* Advance to next non-dead page --- there must be one */
@@ -849,6 +850,13 @@ _bt_insertonpg(Relation rel,
/* child buffer must be given iff inserting on an internal page */
Assert(P_ISLEAF(lpageop) == !BufferIsValid(cbuf));
+ /* tuple must have appropriate number of attributes */
+ Assert(!P_ISLEAF(lpageop) ||
+ BTreeTupGetNAtts(itup, rel) ==
+ IndexRelationGetNumberOfAttributes(rel));
+ Assert(P_ISLEAF(lpageop) ||
+ BTreeTupGetNAtts(itup, rel) ==
+ IndexRelationGetNumberOfKeyAttributes(rel));
/* The caller should've finished any incomplete splits already. */
if (P_INCOMPLETE_SPLIT(lpageop))
@@ -956,6 +964,18 @@ _bt_insertonpg(Relation rel,
}
}
+ /*
+ * Every internal page should have exactly one negative infinity item
+ * at all times. Only _bt_split() and _bt_newroot() should add items
+ * that become negative infinity items through truncation, since
+ * they're the only routines that allocate new internal pages. Do not
+ * allow a retail insertion of a new item at the negative infinity
+ * offset.
+ */
+ if (!P_ISLEAF(lpageop) && newitemoff == P_FIRSTDATAKEY(lpageop))
+ elog(ERROR, "cannot insert second negative infinity item in block %u of index \"%s\"",
+ itup_blkno, RelationGetRelationName(rel));
+
/* Do the update. No ereport(ERROR) until changes are logged */
START_CRIT_SECTION();
@@ -1002,7 +1022,6 @@ _bt_insertonpg(Relation rel,
xl_btree_metadata xlmeta;
uint8 xlinfo;
XLogRecPtr recptr;
- IndexTupleData trunctuple;
xlrec.offnum = itup_off;
@@ -1038,17 +1057,8 @@ _bt_insertonpg(Relation rel,
xlinfo = XLOG_BTREE_INSERT_META;
}
- /* Read comments in _bt_pgaddtup */
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
- if (!P_ISLEAF(lpageop) && newitemoff == P_FIRSTDATAKEY(lpageop))
- {
- trunctuple = *itup;
- trunctuple.t_info = sizeof(IndexTupleData);
- XLogRegisterBufData(0, (char *) &trunctuple,
- sizeof(IndexTupleData));
- }
- else
- XLogRegisterBufData(0, (char *) itup, IndexTupleSize(itup));
+ XLogRegisterBufData(0, (char *) itup, IndexTupleSize(itup));
recptr = XLogInsert(RM_BTREE_ID, xlinfo);
@@ -1203,6 +1213,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
itemid = PageGetItemId(origpage, P_HIKEY);
itemsz = ItemIdGetLength(itemid);
item = (IndexTuple) PageGetItem(origpage, itemid);
+ Assert(BTreeTupGetNAtts(item, rel) == indnkeyatts);
if (PageAddItem(rightpage, (Item) item, itemsz, rightoff,
false, false) == InvalidOffsetNumber)
{
@@ -1235,20 +1246,25 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
}
/*
- * We must truncate included attributes of the "high key" item, before
- * insert it onto the leaf page. It's the only point in insertion
- * process, where we perform truncation. All other functions work with
- * this high key and do not change it.
+ * Truncate non-key (INCLUDE) attributes of the high key item before
+ * inserting it on the left page. This only needs to happen at the leaf
+ * level, since in general all pivot tuple values originate from leaf
+ * level high keys. This isn't just about avoiding unnecessary work,
+ * though; truncating unneeded key attributes (more aggressive suffix
+ * truncation) can only be performed at the leaf level anyway. This is
+ * because a pivot tuple in a grandparent page must guide a search not
+ * only to the correct parent page, but also to the correct leaf page.
*/
if (indnatts != indnkeyatts && isleaf)
{
- lefthikey = _bt_truncate_tuple(rel, item);
+ lefthikey = _bt_nonkey_truncate(rel, item);
itemsz = IndexTupleSize(lefthikey);
itemsz = MAXALIGN(itemsz);
}
else
lefthikey = item;
+ Assert(BTreeTupGetNAtts(lefthikey, rel) == indnkeyatts);
if (PageAddItem(leftpage, (Item) lefthikey, itemsz, leftoff,
false, false) == InvalidOffsetNumber)
{
@@ -1258,6 +1274,9 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
origpagenumber, RelationGetRelationName(rel));
}
leftoff = OffsetNumberNext(leftoff);
+ /* be tidy */
+ if (lefthikey != item)
+ pfree(lefthikey);
/*
* Now transfer all the data items to the appropriate page.
@@ -2180,6 +2199,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
* Note: we *must* insert the two items in item-number order, for the
* benefit of _bt_restore_page().
*/
+ Assert(BTreeTupGetNAtts(left_item, rel) == 0);
if (PageAddItem(rootpage, (Item) left_item, left_item_sz, P_HIKEY,
false, false) == InvalidOffsetNumber)
elog(PANIC, "failed to add leftkey to new root page"
@@ -2189,6 +2209,8 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
/*
* insert the right page pointer into the new root page.
*/
+ Assert(BTreeTupGetNAtts(right_item, rel) ==
+ IndexRelationGetNumberOfKeyAttributes(rel));
if (PageAddItem(rootpage, (Item) right_item, right_item_sz, P_FIRSTKEY,
false, false) == InvalidOffsetNumber)
elog(PANIC, "failed to add rightkey to new root page"
@@ -2303,10 +2325,9 @@ _bt_pgaddtup(Page page,
* Rule is simple: NOT_NULL not equal NULL, NULL not equal NULL too.
*/
static bool
-_bt_isequal(Relation idxrel, Page page, OffsetNumber offnum,
+_bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
int keysz, ScanKey scankey)
{
- TupleDesc itupdesc = RelationGetDescr(idxrel);
IndexTuple itup;
int i;
@@ -2316,16 +2337,11 @@ _bt_isequal(Relation idxrel, Page page, OffsetNumber offnum,
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
/*
- * Index tuple shouldn't be truncated. Despite we technically could
- * compare truncated tuple as well, this function should be only called
- * for regular non-truncated leaf tuples and P_HIKEY tuple on
- * rightmost leaf page.
+ * It's okay that we might perform a comparison against a truncated page
+ * high key when caller needs to determine if _bt_check_unique scan must
+ * continue on to the next page. Caller never asks us to compare non-key
+ * attributes within an INCLUDE index.
*/
- Assert((P_RIGHTMOST((BTPageOpaque) PageGetSpecialPointer(page)) ||
- offnum != P_HIKEY)
- ? BTreeTupGetNAtts(itup, idxrel) == itupdesc->natts
- : true);
-
for (i = 1; i <= keysz; i++)
{
AttrNumber attno;
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index ba68925912..f356be5153 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -1605,6 +1605,7 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
ItemPointerSetBlockNumber(&trunctuple.t_tid, target);
else
ItemPointerSetInvalid(&trunctuple.t_tid);
+ BTreeTupSetNAtts(&trunctuple, 0);
if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
false, false) == InvalidOffsetNumber)
elog(ERROR, "could not add dummy high key to half-dead page");
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 4c6fdcdd8a..0bcfa10b86 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -154,7 +154,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
* We need to save the location of the index entry we chose in the
* parent page on a stack. In case we split the tree, we'll use the
* stack to work back up to the parent page. We also save the actual
- * downlink (TID) to uniquely identify the index entry, in case it
+ * downlink (block) to uniquely identify the index entry, in case it
* moves right while we're working lower in the tree. See the paper
* by Lehman and Yao for how this is detected and handled. (We use the
* child link to disambiguate duplicate keys in the index -- Lehman
@@ -436,14 +436,7 @@ _bt_compare(Relation rel,
IndexTuple itup;
int i;
- /*
- * Check tuple has correct number of attributes.
- */
- if (unlikely(!_bt_check_natts(rel, page, offnum)))
- ereport(ERROR,
- (errcode(ERRCODE_INTERNAL_ERROR),
- errmsg("tuple has wrong number of attributes in index \"%s\"",
- RelationGetRelationName(rel))));
+ Assert(_bt_check_natts(rel, page, offnum));
/*
* Force result ">" if target item is first data item on an internal page
@@ -1968,51 +1961,3 @@ _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir)
so->numKilled = 0; /* just paranoia */
so->markItemIndex = -1; /* ditto */
}
-
-/*
- * Check if index tuple have appropriate number of attributes.
- */
-bool
-_bt_check_natts(Relation index, Page page, OffsetNumber offnum)
-{
- int16 natts = IndexRelationGetNumberOfAttributes(index);
- int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
- ItemId itemid;
- IndexTuple itup;
- BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
-
- /*
- * Assert that mask allocated for number of keys in index tuple can fit
- * maximum number of index keys.
- */
- StaticAssertStmt(BT_N_KEYS_OFFSET_MASK >= INDEX_MAX_KEYS,
- "BT_N_KEYS_OFFSET_MASK can't fit INDEX_MAX_KEYS");
-
- itemid = PageGetItemId(page, offnum);
- itup = (IndexTuple) PageGetItem(page, itemid);
-
- if (P_ISLEAF(opaque) && offnum >= P_FIRSTDATAKEY(opaque))
- {
- /*
- * Regular leaf tuples have as every index attributes
- */
- return (BTreeTupGetNAtts(itup, index) == natts);
- }
- else if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque))
- {
- /*
- * Leftmost tuples on non-leaf pages have no attributes, or haven't
- * INDEX_ALT_TID_MASK set in pg_upgraded indexes.
- */
- return (BTreeTupGetNAtts(itup, index) == 0 ||
- ((itup->t_info & INDEX_ALT_TID_MASK) == 0));
- }
- else
- {
- /*
- * Pivot tuples stored in non-leaf pages and hikeys of leaf pages
- * contain only key attributes
- */
- return (BTreeTupGetNAtts(itup, index) == nkeyatts);
- }
-}
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index feba5e1c8f..671669bf3a 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -790,7 +790,9 @@ _bt_sortaddtup(Page page,
* placeholder for the pointer to the "high key" item; when we have
* filled up the page, we will set linp0 to point to itemN and clear
* linpN. On the other hand, if we find this is the last (rightmost)
- * page, we leave the items alone and slide the linp array over.
+ * page, we leave the items alone and slide the linp array over. If
+ * the high key is to be truncated, offset 1 is deleted, and we insert
+ * the truncated high key at offset 1.
*
* 'last' pointer indicates the last offset added to the page.
*----------
@@ -803,7 +805,6 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
OffsetNumber last_off;
Size pgspc;
Size itupsz;
- BTPageOpaque pageop;
int indnatts = IndexRelationGetNumberOfAttributes(wstate->index);
int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(wstate->index);
@@ -860,7 +861,6 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
ItemId ii;
ItemId hii;
IndexTuple oitup;
- IndexTuple keytup;
BTPageOpaque opageop = (BTPageOpaque) PageGetSpecialPointer(opage);
/* Create new page of same level */
@@ -891,25 +891,38 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
if (indnkeyatts != indnatts && P_ISLEAF(opageop))
{
+ IndexTuple truncated;
+ Size truncsz;
+
/*
- * We truncate included attributes of high key here. Subsequent
- * insertions assume that hikey is already truncated, and so they
- * need not worry about it, when copying the high key into the
- * parent page as a downlink.
+ * Truncate any non-key attributes from high key on leaf level
+ * (i.e. truncate on leaf level if we're building an INCLUDE
+ * index). This is only done at the leaf level because
+ * downlinks in internal pages are either negative infinity
+ * items, or get their contents from copying from one level
+ * down. See also: _bt_split().
+ *
+ * Since the truncated tuple is probably smaller than the
+ * original, it cannot just be copied in place (besides, we want
+ * to actually save space on the leaf page). We delete the
+ * original high key, and add our own truncated high key at the
+ * same offset.
*
- * The code above have just rearranged item pointers, but it
- * didn't save any space. In order to save the space on page we
- * have to truly shift index tuples on the page. But that's not
- * so bad for performance, because we operating pd_upper and don't
- * have to shift much of tuples memory. Shift of ItemId's is
- * rather cheap, because they are small.
+ * Note that the page layout won't be changed very much. oitup
+ * is already located at the physical beginning of tuple space,
+ * so we only shift the line pointer array back and forth, and
+ * overwrite the latter portion of the space occupied by the
+ * original tuple. This is fairly cheap.
*/
- keytup = _bt_truncate_tuple(wstate->index, oitup);
-
- /* delete "wrong" high key, insert keytup as P_HIKEY. */
+ truncated = _bt_nonkey_truncate(wstate->index, oitup);
+ truncsz = IndexTupleSize(truncated);
PageIndexTupleDelete(opage, P_HIKEY);
+ _bt_sortaddtup(opage, truncsz, truncated, P_HIKEY);
+ pfree(truncated);
- _bt_sortaddtup(opage, IndexTupleSize(keytup), keytup, P_HIKEY);
+ /* oitup should continue to point to the page's high key */
+ hii = PageGetItemId(opage, P_HIKEY);
+ oitup = (IndexTuple) PageGetItem(opage, hii);
}
/*
@@ -920,7 +933,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
if (state->btps_next == NULL)
state->btps_next = _bt_pagestate(wstate, state->btps_level + 1);
- Assert(state->btps_minkey != NULL);
+ Assert(BTreeTupGetNAtts(state->btps_minkey, wstate->index) ==
+ IndexRelationGetNumberOfKeyAttributes(wstate->index));
BTreeInnerTupleSetDownLink(state->btps_minkey, oblkno);
_bt_buildadd(wstate, state->btps_next, state->btps_minkey);
pfree(state->btps_minkey);
@@ -928,11 +942,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
/*
* Save a copy of the minimum key for the new page. We have to copy
* it off the old page, not the new one, in case we are not at leaf
- * level. Despite oitup is already initialized, it's important to get
- * high key from the page, since we could have replaced it with
- * truncated copy. See comment above.
+ * level.
*/
- oitup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, P_HIKEY));
state->btps_minkey = CopyIndexTuple(oitup);
/*
@@ -959,8 +970,6 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
last_off = P_FIRSTKEY;
}
- pageop = (BTPageOpaque) PageGetSpecialPointer(npage);
-
/*
* If the new item is the first for its page, stash a copy for later. Note
* this will only happen for the first item on a level; on later pages,
@@ -969,14 +978,18 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
*/
if (last_off == P_HIKEY)
{
+ BTPageOpaque npageop;
+
Assert(state->btps_minkey == NULL);
+ npageop = (BTPageOpaque) PageGetSpecialPointer(npage);
+
/*
* Truncate included attributes of the tuple that we're going to
* insert into the parent page as a downlink
*/
- if (indnkeyatts != indnatts && P_ISLEAF(pageop))
- state->btps_minkey = _bt_truncate_tuple(wstate->index, itup);
+ if (indnkeyatts != indnatts && P_ISLEAF(npageop))
+ state->btps_minkey = _bt_nonkey_truncate(wstate->index, itup);
else
state->btps_minkey = CopyIndexTuple(itup);
}
@@ -1030,7 +1043,8 @@ _bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
}
else
{
- Assert(s->btps_minkey != NULL);
+ Assert(BTreeTupGetNAtts(s->btps_minkey, wstate->index) ==
+ IndexRelationGetNumberOfKeyAttributes(wstate->index));
BTreeInnerTupleSetDownLink(s->btps_minkey, blkno);
_bt_buildadd(wstate, s->btps_next, s->btps_minkey);
pfree(s->btps_minkey);
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 76ffa6b0d4..263c35876a 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -73,14 +73,14 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
indoption = rel->rd_indoption;
- Assert(indnkeyatts != 0);
+ Assert(indnkeyatts > 0);
Assert(indnkeyatts <= indnatts);
Assert(BTreeTupGetNAtts(itup, rel) == indnatts ||
BTreeTupGetNAtts(itup, rel) == indnkeyatts);
/*
- * We'll execute search using ScanKey constructed on key columns. Non key
- * (included) columns must be omitted.
+ * We'll execute search using scan key constructed on key columns. Non-key
+ * (INCLUDE index) columns are always omitted from scan keys.
*/
skey = (ScanKey) palloc(indnkeyatts * sizeof(ScanKeyData));
@@ -1427,6 +1427,7 @@ _bt_checkkeys(IndexScanDesc scan,
bool isNull;
Datum test;
+ Assert(key->sk_attno <= BTreeTupGetNAtts(tuple, scan->indexRelation));
/* row-comparison keys need special processing */
if (key->sk_flags & SK_ROW_HEADER)
{
@@ -2082,29 +2083,113 @@ btproperty(Oid index_oid, int attno,
}
/*
- * _bt_truncate_tuple() -- remove non-key (INCLUDE) attributes from index
- * tuple.
+ * _bt_nonkey_truncate() -- create tuple without non-key suffix attributes.
*
- * Transforms an ordinal B-tree leaf index tuple into pivot tuple to be used
- * as hikey or non-leaf page tuple with downlink. Note that t_tid offset
- * will be overwritten in order to represent number of present tuple
- * attributes.
+ * Returns truncated index tuple allocated in caller's memory context, with key
+ * attributes copied from caller's itup argument. Currently, suffix truncation
+ * is only performed to create pivot tuples in INCLUDE indexes, but some day it
+ * could be generalized to remove suffix attributes after the first
+ * distinguishing key attribute.
+ *
+ * Truncated tuple is guaranteed to be no larger than the original, which is
+ * important for staying under the 1/3 of a page restriction on tuple size.
+ *
+ * Note that returned tuple's t_tid offset will hold the number of attributes
+ * present, so the original item pointer offset is not represented. Caller
+ * should only change truncated tuple's downlink.
*/
IndexTuple
-_bt_truncate_tuple(Relation idxrel, IndexTuple olditup)
+_bt_nonkey_truncate(Relation rel, IndexTuple itup)
+{
+ int nkeyattrs = IndexRelationGetNumberOfKeyAttributes(rel);
+ IndexTuple truncated;
+
+ /*
+ * We should only ever truncate leaf index tuples, which must have both key
+ * and non-key attributes. It's never okay to truncate a second time.
+ */
+ Assert(BTreeTupGetNAtts(itup, rel) ==
+ IndexRelationGetNumberOfAttributes(rel));
+
+ truncated = index_truncate_tuple(RelationGetDescr(rel), itup, nkeyattrs);
+ BTreeTupSetNAtts(truncated, nkeyattrs);
+
+ return truncated;
+}
+
+/*
+ * _bt_check_natts() -- Verify tuple has expected number of attributes.
+ *
+ * Returns value indicating if the expected number of attributes were found
+ * for a particular offset on page. This can be used as a general purpose
+ * sanity check.
+ *
+ * Testing a tuple directly with BTreeTupGetNAtts() should generally be
+ * preferred to calling here. That's usually more convenient, and is always
+ * more explicit. Call here instead when offnum's tuple may be a negative
+ * infinity tuple that uses the pre-v11 on-disk representation, or when a low
+ * context check is appropriate.
+ */
+bool
+_bt_check_natts(Relation rel, Page page, OffsetNumber offnum)
{
- IndexTuple newitup;
- int nkeyattrs = IndexRelationGetNumberOfKeyAttributes(idxrel);
+ int16 natts = IndexRelationGetNumberOfAttributes(rel);
+ int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
+ BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ IndexTuple itup;
+
+ /*
+ * We cannot reliably test a deleted or half-deleted page, since they have
+ * dummy high keys
+ */
+ if (P_IGNORE(opaque))
+ return true;
/*
- * We're assuming to truncate only regular leaf index tuples which have
- * both key and non-key attributes.
+ * Mask allocated for number of keys in index tuple must be able to fit
+ * maximum possible number of index attributes
*/
- Assert(BTreeTupGetNAtts(olditup, idxrel) == IndexRelationGetNumberOfAttributes(idxrel));
+ StaticAssertStmt(BT_N_KEYS_OFFSET_MASK >= INDEX_MAX_KEYS,
+ "BT_N_KEYS_OFFSET_MASK can't fit INDEX_MAX_KEYS");
- newitup = index_truncate_tuple(RelationGetDescr(idxrel),
- olditup, nkeyattrs);
- BTreeTupSetNAtts(newitup, nkeyattrs);
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
- return newitup;
+ if (P_ISLEAF(opaque) && offnum >= P_FIRSTDATAKEY(opaque))
+ {
+ /*
+ * Leaf tuples that are not the page high key (non-pivot tuples) should
+ * never be truncated
+ */
+ return BTreeTupGetNAtts(itup, rel) == natts;
+ }
+ else if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque))
+ {
+ /*
+ * The first tuple on any internal page (possibly the first after its
+ * high key) is its negative infinity tuple. Negative infinity tuples
+ * are always truncated to zero attributes. They are a particular
+ * kind of pivot tuple.
+ *
+ * The number of attributes won't be explicitly represented if the
+ * negative infinity tuple was generated during a page split that
+ * occurred with a version of Postgres before v11. There must be a
+ * problem when there is an explicit representation that is non-zero,
+ * or when there is no explicit representation and the tuple is
+ * evidently not a pre-pg_upgrade tuple.
+ *
+ * Prior to v11, downlinks always had P_HIKEY as their offset. Use
+ * that to decide if the tuple is a pre-v11 tuple.
+ */
+ return BTreeTupGetNAtts(itup, rel) == 0 ||
+ ((itup->t_info & INDEX_ALT_TID_MASK) == 0 &&
+ ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
+ }
+ else
+ {
+ /*
+ * All other pivot tuples should contain all key attributes (and no
+ * non-key attributes)
+ */
+ return BTreeTupGetNAtts(itup, rel) == nkeyatts;
+ }
}
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index 0986ef07cf..20ee9019eb 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -248,17 +248,16 @@ btree_xlog_split(bool onleft, bool lhighkey, XLogReaderState *record)
_bt_restore_page(rpage, datapos, datalen);
- /* Non-leaf page should always have its high key logged. */
- Assert(isleaf || lhighkey);
-
/*
* When the high key isn't present is the wal record, then we assume it to
- * be equal to the first key on the right page.
+ * be equal to the first key on the right page. It must be from the leaf
+ * level.
*/
if (!lhighkey)
{
ItemId hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque));
+ Assert(isleaf);
left_hikey = (IndexTuple) PageGetItem(rpage, hiItemId);
left_hikeysz = ItemIdGetLength(hiItemId);
}
@@ -620,7 +619,7 @@ btree_xlog_delete_get_latestRemovedXid(XLogReaderState *record)
* heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer.
* Note that we are not looking at tuple data here, just headers.
*/
- hoffnum = ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid));
+ hoffnum = ItemPointerGetOffsetNumber(&(itup->t_tid));
hitemid = PageGetItemId(hpage, hoffnum);
/*
@@ -805,6 +804,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent);
else
ItemPointerSetInvalid(&trunctuple.t_tid);
+ BTreeTupSetNAtts(&trunctuple, 0);
if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
false, false) == InvalidOffsetNumber)
elog(ERROR, "could not add dummy high key to half-dead page");
@@ -915,6 +915,7 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent);
else
ItemPointerSetInvalid(&trunctuple.t_tid);
+ BTreeTupSetNAtts(&trunctuple, 0);
if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
false, false) == InvalidOffsetNumber)
elog(ERROR, "could not add dummy high key to half-dead page");
diff --git a/src/include/access/itup.h b/src/include/access/itup.h
index 555434ca80..bd3a702380 100644
--- a/src/include/access/itup.h
+++ b/src/include/access/itup.h
@@ -155,7 +155,7 @@ extern Datum nocache_index_getattr(IndexTuple tup, int attnum,
extern void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor,
Datum *values, bool *isnull);
extern IndexTuple CopyIndexTuple(IndexTuple source);
-extern IndexTuple index_truncate_tuple(TupleDesc tupleDescriptor,
- IndexTuple olditup, int new_indnatts);
+extern IndexTuple index_truncate_tuple(TupleDesc sourceDescriptor,
+ IndexTuple source, int leavenatts);
#endif /* ITUP_H */
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 36619b220f..d8f1f2e3f5 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -186,59 +186,47 @@ typedef struct BTMetaPageData
#define P_FIRSTKEY ((OffsetNumber) 2)
#define P_FIRSTDATAKEY(opaque) (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)
-
/*
- * B-tree index with INCLUDE clause has non-key (included) attributes, which
- * are used solely in index-only scans. Those non-key attributes are present
- * in leaf index tuples which point to corresponding heap tuples. However,
- * tree also contains "pivot" tuples. Pivot tuples are used for navigation
- * during tree traversal. Pivot tuples include tuples on non-leaf pages and
- * high key tuples. Such, tuples don't need to included attributes, because
- * they have no use during tree traversal. This is why we truncate them in
- * order to save some space. Therefore, B-tree index with INCLUDE clause
- * contain tuples with variable number of attributes.
- *
- * In order to keep on-disk compatibility with upcoming suffix truncation of
- * pivot tuples, we store number of attributes present inside tuple itself.
- * Thankfully, offset number is always unused in pivot tuple. So, we use free
- * bit of index tuple flags as sign that offset have alternative meaning: it
- * stores number of keys present in index tuple (12 bit is far enough for that).
- * And we have 4 bits reserved for future usage.
+ * INCLUDE B-Tree indexes have non-key attributes. These are extra
+ * attributes that may be returned by index-only scans, but do not influence
+ * the order of items in the index (formally, non-key attributes are not
+ * considered to be part of the key space). Non-key attributes are only
+ * present in leaf index tuples whose item pointers actually point to heap
+ * tuples. All other types of index tuples (collectively, "pivot" tuples)
+ * only have key attributes, since pivot tuples only ever need to represent
+ * how the key space is separated. In general, any B-Tree index that has
+ * more than one level (i.e. any index that does not just consist of a
+ * metapage and a single leaf root page) must have some number of pivot
+ * tuples, since pivot tuples are used for traversing the tree.
*
- * Right now INDEX_ALT_TID_MASK is set only on truncation of non-key
- * attributes of included indexes. But potentially every pivot index tuple
- * might have INDEX_ALT_TID_MASK set. Then this tuple should have number of
- * attributes correctly set in BT_N_KEYS_OFFSET_MASK, and in future it might
- * use some bits of BT_RESERVED_OFFSET_MASK.
+ * We store the number of attributes present inside pivot tuples by abusing
+ * their item pointer offset field, since pivot tuples never need to store a
+ * real offset (downlinks only need to store a block number). The offset
+ * field only stores the number of attributes when the INDEX_ALT_TID_MASK
+ * bit is set (we never assume that pivot tuples must explicitly store the
+ * number of attributes, and currently do not bother storing the number of
+ * attributes unless indnkeyatts actually differs from indnatts).
+ * INDEX_ALT_TID_MASK is only used for pivot tuples at present, though it's
+ * possible that it will be used within non-pivot tuples in the future. Do
+ * not assume that a tuple with INDEX_ALT_TID_MASK set must be a pivot
+ * tuple.
*
- * Non-pivot tuples might also use bit of BT_RESERVED_OFFSET_MASK. Despite
- * they store heap tuple offset, higher bits of offset are always free.
+ * The 12 least significant offset bits are used to represent the number of
+ * attributes in INDEX_ALT_TID_MASK tuples, leaving 4 bits that are reserved
+ * for future use (BT_RESERVED_OFFSET_MASK bits).
*/
-#define INDEX_ALT_TID_MASK INDEX_AM_RESERVED_BIT /* flag indicating t_tid
- * offset has an
- * alternative meaning */
-#define BT_RESERVED_OFFSET_MASK 0xF000 /* mask of bits in t_tid offset
- * reserved for future usage */
-#define BT_N_KEYS_OFFSET_MASK 0x0FFF /* mask of bits in t_tid offset
- * holding number of attributes
- * actually present in index tuple */
-
-/* Acess to downlink block number */
+#define INDEX_ALT_TID_MASK INDEX_AM_RESERVED_BIT
+#define BT_RESERVED_OFFSET_MASK 0xF000
+#define BT_N_KEYS_OFFSET_MASK 0x0FFF
+
+/* Get/set downlink block number */
#define BTreeInnerTupleGetDownLink(itup) \
ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))
-
#define BTreeInnerTupleSetDownLink(itup, blkno) \
ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno))
-/* Set number of attributes to B-tree index tuple overriding t_tid offset */
-#define BTreeTupSetNAtts(itup, n) \
- do { \
- (itup)->t_info |= INDEX_ALT_TID_MASK; \
- ItemPointerSetOffsetNumber(&(itup)->t_tid, n); \
- } while(0)
-
-/* Get number of attributes in B-tree index tuple */
-#define BTreeTupGetNAtts(itup, index) \
+/* Get/set number of attributes within B-tree index tuple */
+#define BTreeTupGetNAtts(itup, rel) \
( \
(itup)->t_info & INDEX_ALT_TID_MASK ? \
( \
@@ -246,8 +234,13 @@ typedef struct BTMetaPageData
ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_N_KEYS_OFFSET_MASK \
) \
: \
- IndexRelationGetNumberOfAttributes(index) \
+ IndexRelationGetNumberOfAttributes(rel) \
)
+#define BTreeTupSetNAtts(itup, n) \
+ do { \
+ (itup)->t_info |= INDEX_ALT_TID_MASK; \
+ ItemPointerSetOffsetNumber(&(itup)->t_tid, (n)); \
+ } while(0)
/*
* Operator strategy numbers for B-tree have been moved to access/stratnum.h,
@@ -561,7 +554,6 @@ extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
Snapshot snapshot);
-extern bool _bt_check_natts(Relation index, Page page, OffsetNumber offnum);
/*
* prototypes for functions in nbtutils.c
@@ -590,7 +582,8 @@ extern bytea *btoptions(Datum reloptions, bool validate);
extern bool btproperty(Oid index_oid, int attno,
IndexAMProperty prop, const char *propname,
bool *res, bool *isnull);
-extern IndexTuple _bt_truncate_tuple(Relation idxrel, IndexTuple olditup);
+extern IndexTuple _bt_nonkey_truncate(Relation rel, IndexTuple itup);
+extern bool _bt_check_natts(Relation rel, Page page, OffsetNumber offnum);
/*
* prototypes for functions in nbtvalidate.c
diff --git a/src/test/regress/expected/index_including.out b/src/test/regress/expected/index_including.out
index 1d253ee77d..b7d1812ec5 100644
--- a/src/test/regress/expected/index_including.out
+++ b/src/test/regress/expected/index_including.out
@@ -1,81 +1,78 @@
/*
* 1.test CREATE INDEX
+ *
+ * Deliberately avoid dropping objects in this section, to get some pg_dump
+ * coverage.
*/
-- Regular index with included columns
-CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box);
-INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-CREATE INDEX tbl_idx ON tbl using btree (c1, c2) INCLUDE (c3,c4);
+CREATE TABLE tbl_include_reg (c1 int, c2 int, c3 int, c4 box);
+INSERT INTO tbl_include_reg SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
+CREATE INDEX tbl_include_reg_idx ON tbl_include_reg using btree (c1, c2) INCLUDE (c3,c4);
-- must fail because of intersection of key and included columns
-CREATE INDEX tbl_idx ON tbl using btree (c1, c2) INCLUDE (c1,c3);
+CREATE INDEX tbl_include_reg_idx ON tbl_include_reg using btree (c1, c2) INCLUDE (c1,c3);
ERROR: included columns must not intersect with key columns
SELECT pg_get_indexdef(i.indexrelid)
FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid
-WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname;
- pg_get_indexdef
---------------------------------------------------------------------------
- CREATE INDEX tbl_idx ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4)
+WHERE i.indrelid = 'tbl_include_reg'::regclass ORDER BY c.relname;
+ pg_get_indexdef
+--------------------------------------------------------------------------------------------------
+ CREATE INDEX tbl_include_reg_idx ON public.tbl_include_reg USING btree (c1, c2) INCLUDE (c3, c4)
(1 row)
-DROP TABLE tbl;
-- Unique index and unique constraint
-CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box);
-INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4);
-ALTER TABLE tbl add UNIQUE USING INDEX tbl_idx_unique;
-ALTER TABLE tbl add UNIQUE (c1, c2) INCLUDE (c3, c4);
+CREATE TABLE tbl_include_unique1 (c1 int, c2 int, c3 int, c4 box);
+INSERT INTO tbl_include_unique1 SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
+CREATE UNIQUE INDEX tbl_include_unique1_idx_unique ON tbl_include_unique1 using btree (c1, c2) INCLUDE (c3, c4);
+ALTER TABLE tbl_include_unique1 add UNIQUE USING INDEX tbl_include_unique1_idx_unique;
+ALTER TABLE tbl_include_unique1 add UNIQUE (c1, c2) INCLUDE (c3, c4);
SELECT pg_get_indexdef(i.indexrelid)
FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid
-WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname;
- pg_get_indexdef
----------------------------------------------------------------------------------------------
- CREATE UNIQUE INDEX tbl_c1_c2_c3_c4_key ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4)
- CREATE UNIQUE INDEX tbl_idx_unique ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4)
+WHERE i.indrelid = 'tbl_include_unique1'::regclass ORDER BY c.relname;
+ pg_get_indexdef
+-----------------------------------------------------------------------------------------------------------------------------
+ CREATE UNIQUE INDEX tbl_include_unique1_c1_c2_c3_c4_key ON public.tbl_include_unique1 USING btree (c1, c2) INCLUDE (c3, c4)
+ CREATE UNIQUE INDEX tbl_include_unique1_idx_unique ON public.tbl_include_unique1 USING btree (c1, c2) INCLUDE (c3, c4)
(2 rows)
-DROP TABLE tbl;
-- Unique index and unique constraint. Both must fail.
-CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box);
-INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4);
-ERROR: could not create unique index "tbl_idx_unique"
+CREATE TABLE tbl_include_unique2 (c1 int, c2 int, c3 int, c4 box);
+INSERT INTO tbl_include_unique2 SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
+CREATE UNIQUE INDEX tbl_include_unique2_idx_unique ON tbl_include_unique2 using btree (c1, c2) INCLUDE (c3, c4);
+ERROR: could not create unique index "tbl_include_unique2_idx_unique"
DETAIL: Key (c1, c2)=(1, 2) is duplicated.
-ALTER TABLE tbl add UNIQUE (c1, c2) INCLUDE (c3, c4);
-ERROR: could not create unique index "tbl_c1_c2_c3_c4_key"
+ALTER TABLE tbl_include_unique2 add UNIQUE (c1, c2) INCLUDE (c3, c4);
+ERROR: could not create unique index "tbl_include_unique2_c1_c2_c3_c4_key"
DETAIL: Key (c1, c2)=(1, 2) is duplicated.
-DROP TABLE tbl;
-- PK constraint
-CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box);
-INSERT INTO tbl SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-ALTER TABLE tbl add PRIMARY KEY (c1, c2) INCLUDE (c3, c4);
+CREATE TABLE tbl_include_pk (c1 int, c2 int, c3 int, c4 box);
+INSERT INTO tbl_include_pk SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
+ALTER TABLE tbl_include_pk add PRIMARY KEY (c1, c2) INCLUDE (c3, c4);
SELECT pg_get_indexdef(i.indexrelid)
FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid
-WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname;
- pg_get_indexdef
-----------------------------------------------------------------------------------
- CREATE UNIQUE INDEX tbl_pkey ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4)
+WHERE i.indrelid = 'tbl_include_pk'::regclass ORDER BY c.relname;
+ pg_get_indexdef
+--------------------------------------------------------------------------------------------------------
+ CREATE UNIQUE INDEX tbl_include_pk_pkey ON public.tbl_include_pk USING btree (c1, c2) INCLUDE (c3, c4)
(1 row)
-DROP TABLE tbl;
-CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box);
-INSERT INTO tbl SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4);
-ALTER TABLE tbl add PRIMARY KEY USING INDEX tbl_idx_unique;
+CREATE TABLE tbl_include_box (c1 int, c2 int, c3 int, c4 box);
+INSERT INTO tbl_include_box SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
+CREATE UNIQUE INDEX tbl_include_box_idx_unique ON tbl_include_box using btree (c1, c2) INCLUDE (c3, c4);
+ALTER TABLE tbl_include_box add PRIMARY KEY USING INDEX tbl_include_box_idx_unique;
SELECT pg_get_indexdef(i.indexrelid)
FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid
-WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname;
- pg_get_indexdef
-----------------------------------------------------------------------------------------
- CREATE UNIQUE INDEX tbl_idx_unique ON public.tbl USING btree (c1, c2) INCLUDE (c3, c4)
+WHERE i.indrelid = 'tbl_include_box'::regclass ORDER BY c.relname;
+ pg_get_indexdef
+----------------------------------------------------------------------------------------------------------------
+ CREATE UNIQUE INDEX tbl_include_box_idx_unique ON public.tbl_include_box USING btree (c1, c2) INCLUDE (c3, c4)
(1 row)
-DROP TABLE tbl;
-- PK constraint. Must fail.
-CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box);
-INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-ALTER TABLE tbl add PRIMARY KEY (c1, c2) INCLUDE (c3, c4);
-ERROR: could not create unique index "tbl_pkey"
+CREATE TABLE tbl_include_box_pk (c1 int, c2 int, c3 int, c4 box);
+INSERT INTO tbl_include_box_pk SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
+ALTER TABLE tbl_include_box_pk add PRIMARY KEY (c1, c2) INCLUDE (c3, c4);
+ERROR: could not create unique index "tbl_include_box_pk_pkey"
DETAIL: Key (c1, c2)=(1, 2) is duplicated.
-DROP TABLE tbl;
/*
* 2. Test CREATE TABLE with constraint
*/
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out
index ac0fb539e9..8afb1f2f7e 100644
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -185,6 +185,12 @@ sql_sizing|f
sql_sizing_profiles|f
stud_emp|f
student|f
+tbl_include_box|t
+tbl_include_box_pk|f
+tbl_include_pk|t
+tbl_include_reg|t
+tbl_include_unique1|t
+tbl_include_unique2|f
tenk1|t
tenk2|t
test_range_excl|t
diff --git a/src/test/regress/sql/index_including.sql b/src/test/regress/sql/index_including.sql
index caedc9866d..f83b2c64ac 100644
--- a/src/test/regress/sql/index_including.sql
+++ b/src/test/regress/sql/index_including.sql
@@ -1,59 +1,56 @@
/*
* 1.test CREATE INDEX
+ *
+ * Deliberately avoid dropping objects in this section, to get some pg_dump
+ * coverage.
*/
-- Regular index with included columns
-CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box);
-INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-CREATE INDEX tbl_idx ON tbl using btree (c1, c2) INCLUDE (c3,c4);
+CREATE TABLE tbl_include_reg (c1 int, c2 int, c3 int, c4 box);
+INSERT INTO tbl_include_reg SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
+CREATE INDEX tbl_include_reg_idx ON tbl_include_reg using btree (c1, c2) INCLUDE (c3,c4);
-- must fail because of intersection of key and included columns
-CREATE INDEX tbl_idx ON tbl using btree (c1, c2) INCLUDE (c1,c3);
+CREATE INDEX tbl_include_reg_idx ON tbl_include_reg using btree (c1, c2) INCLUDE (c1,c3);
SELECT pg_get_indexdef(i.indexrelid)
FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid
-WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname;
-DROP TABLE tbl;
+WHERE i.indrelid = 'tbl_include_reg'::regclass ORDER BY c.relname;
-- Unique index and unique constraint
-CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box);
-INSERT INTO tbl SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4);
-ALTER TABLE tbl add UNIQUE USING INDEX tbl_idx_unique;
-ALTER TABLE tbl add UNIQUE (c1, c2) INCLUDE (c3, c4);
+CREATE TABLE tbl_include_unique1 (c1 int, c2 int, c3 int, c4 box);
+INSERT INTO tbl_include_unique1 SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
+CREATE UNIQUE INDEX tbl_include_unique1_idx_unique ON tbl_include_unique1 using btree (c1, c2) INCLUDE (c3, c4);
+ALTER TABLE tbl_include_unique1 add UNIQUE USING INDEX tbl_include_unique1_idx_unique;
+ALTER TABLE tbl_include_unique1 add UNIQUE (c1, c2) INCLUDE (c3, c4);
SELECT pg_get_indexdef(i.indexrelid)
FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid
-WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname;
-DROP TABLE tbl;
+WHERE i.indrelid = 'tbl_include_unique1'::regclass ORDER BY c.relname;
-- Unique index and unique constraint. Both must fail.
-CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box);
-INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4);
-ALTER TABLE tbl add UNIQUE (c1, c2) INCLUDE (c3, c4);
-DROP TABLE tbl;
+CREATE TABLE tbl_include_unique2 (c1 int, c2 int, c3 int, c4 box);
+INSERT INTO tbl_include_unique2 SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
+CREATE UNIQUE INDEX tbl_include_unique2_idx_unique ON tbl_include_unique2 using btree (c1, c2) INCLUDE (c3, c4);
+ALTER TABLE tbl_include_unique2 add UNIQUE (c1, c2) INCLUDE (c3, c4);
-- PK constraint
-CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box);
-INSERT INTO tbl SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-ALTER TABLE tbl add PRIMARY KEY (c1, c2) INCLUDE (c3, c4);
+CREATE TABLE tbl_include_pk (c1 int, c2 int, c3 int, c4 box);
+INSERT INTO tbl_include_pk SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
+ALTER TABLE tbl_include_pk add PRIMARY KEY (c1, c2) INCLUDE (c3, c4);
SELECT pg_get_indexdef(i.indexrelid)
FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid
-WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname;
-DROP TABLE tbl;
+WHERE i.indrelid = 'tbl_include_pk'::regclass ORDER BY c.relname;
-CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box);
-INSERT INTO tbl SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-CREATE UNIQUE INDEX tbl_idx_unique ON tbl using btree (c1, c2) INCLUDE (c3, c4);
-ALTER TABLE tbl add PRIMARY KEY USING INDEX tbl_idx_unique;
+CREATE TABLE tbl_include_box (c1 int, c2 int, c3 int, c4 box);
+INSERT INTO tbl_include_box SELECT 1, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
+CREATE UNIQUE INDEX tbl_include_box_idx_unique ON tbl_include_box using btree (c1, c2) INCLUDE (c3, c4);
+ALTER TABLE tbl_include_box add PRIMARY KEY USING INDEX tbl_include_box_idx_unique;
SELECT pg_get_indexdef(i.indexrelid)
FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid
-WHERE i.indrelid = 'tbl'::regclass ORDER BY c.relname;
-DROP TABLE tbl;
+WHERE i.indrelid = 'tbl_include_box'::regclass ORDER BY c.relname;
-- PK constraint. Must fail.
-CREATE TABLE tbl (c1 int, c2 int, c3 int, c4 box);
-INSERT INTO tbl SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-ALTER TABLE tbl add PRIMARY KEY (c1, c2) INCLUDE (c3, c4);
-DROP TABLE tbl;
+CREATE TABLE tbl_include_box_pk (c1 int, c2 int, c3 int, c4 box);
+INSERT INTO tbl_include_box_pk SELECT 1, 2, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
+ALTER TABLE tbl_include_box_pk add PRIMARY KEY (c1, c2) INCLUDE (c3, c4);
/*