There's minor change against the previous one(
http://archives.postgresql.org/pgsql-hackers/2008-07/msg01183.php ).
* merge branch master(Aug 16) into the patch
* clean code and make some comment
Performance result is here
http://wiki.postgresql.org/wiki/Gsoc08-hashindex
It seems hash index is a little better on index creation and selection.
But maybe it's in the range of noise, I'm not sure.
I'd like to try it with a bigger dataset (e.g. table with 10GB) but there is
not enough space in my computer.
Anyone interest can make a test on a bigger data set.
--
Best Regards,
Xiao Meng
DKERC, Harbin Institute of Technology, China
Gtalk: [EMAIL PROTECTED]
MSN: [EMAIL PROTECTED]
http://xiaomeng.yo2.cn
*** a/src/backend/access/hash/hash.c
--- b/src/backend/access/hash/hash.c
***************
*** 129,135 **** hashbuildCallback(Relation index,
IndexTuple itup;
/* form an index tuple and point it at the heap tuple */
! itup = index_form_tuple(RelationGetDescr(index), values, isnull);
itup->t_tid = htup->t_self;
/* Hash indexes don't index nulls, see notes in hashinsert */
--- 129,135 ----
IndexTuple itup;
/* form an index tuple and point it at the heap tuple */
! itup = _hash_form_tuple(index, values,isnull);
itup->t_tid = htup->t_self;
/* Hash indexes don't index nulls, see notes in hashinsert */
***************
*** 153,160 **** hashbuildCallback(Relation index,
/*
* hashinsert() -- insert an index tuple into a hash table.
*
! * Hash on the index tuple's key, find the appropriate location
! * for the new tuple, and put it there.
*/
Datum
hashinsert(PG_FUNCTION_ARGS)
--- 153,160 ----
/*
* hashinsert() -- insert an index tuple into a hash table.
*
! * Hash on the heap tuple's key, form an index tuple with hash code.
! * Find the appropriate location for the new tuple, and put it there.
*/
Datum
hashinsert(PG_FUNCTION_ARGS)
***************
*** 171,177 **** hashinsert(PG_FUNCTION_ARGS)
IndexTuple itup;
/* generate an index tuple */
! itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
itup->t_tid = *ht_ctid;
/*
--- 171,177 ----
IndexTuple itup;
/* generate an index tuple */
! itup = _hash_form_tuple(rel, values, isnull);
itup->t_tid = *ht_ctid;
/*
***************
*** 211,218 **** hashgettuple(PG_FUNCTION_ARGS)
OffsetNumber offnum;
bool res;
! /* Hash indexes are never lossy (at the moment anyway) */
! scan->xs_recheck = false;
/*
* We hold pin but not lock on current buffer while outside the hash AM.
--- 211,218 ----
OffsetNumber offnum;
bool res;
! /* Hash indexes maybe lossy since we store hash code only */
! scan->xs_recheck = true;
/*
* We hold pin but not lock on current buffer while outside the hash AM.
*** a/src/backend/access/hash/hashinsert.c
--- b/src/backend/access/hash/hashinsert.c
***************
*** 44,60 **** _hash_doinsert(Relation rel, IndexTuple itup)
uint32 hashkey;
Bucket bucket;
Datum datum;
- bool isnull;
/*
! * Compute the hash key for the item. We do this first so as not to need
! * to hold any locks while running the hash function.
*/
if (rel->rd_rel->relnatts != 1)
elog(ERROR, "hash indexes support only one index key");
! datum = index_getattr(itup, 1, RelationGetDescr(rel), &isnull);
! Assert(!isnull);
! hashkey = _hash_datum2hashkey(rel, datum);
/* compute item size too */
itemsz = IndexTupleDSize(*itup);
--- 44,58 ----
uint32 hashkey;
Bucket bucket;
Datum datum;
/*
! * Get the hash key for the item.
*/
if (rel->rd_rel->relnatts != 1)
elog(ERROR, "hash indexes support only one index key");
!
! datum = _hash_get_datum(itup);
! hashkey = DatumGetUInt32(datum);
/* compute item size too */
itemsz = IndexTupleDSize(*itup);
***************
*** 197,207 **** _hash_pgaddtup(Relation rel,
{
OffsetNumber itup_off;
Page page;
_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
page = BufferGetPage(buf);
! itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
== InvalidOffsetNumber)
elog(ERROR, "failed to add index item to \"%s\"",
--- 195,210 ----
{
OffsetNumber itup_off;
Page page;
+ Datum datum;
+ uint32 hashkey;
_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
page = BufferGetPage(buf);
! datum = _hash_get_datum(itup);
! hashkey = DatumGetUInt32(datum);
! itup_off = _hash_binsearch(page, hashkey);
!
if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
== InvalidOffsetNumber)
elog(ERROR, "failed to add index item to \"%s\"",
*** a/src/backend/access/hash/hashpage.c
--- b/src/backend/access/hash/hashpage.c
***************
*** 348,358 **** _hash_metapinit(Relation rel, double num_tuples)
* Determine the target fill factor (in tuples per bucket) for this index.
* The idea is to make the fill factor correspond to pages about as full
* as the user-settable fillfactor parameter says. We can compute it
! * exactly if the index datatype is fixed-width, but for var-width there's
! * some guessing involved.
*/
! data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid,
! RelationGetDescr(rel)->attrs[0]->atttypmod);
item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
sizeof(ItemIdData); /* include the line pointer */
ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width;
--- 348,356 ----
* Determine the target fill factor (in tuples per bucket) for this index.
* The idea is to make the fill factor correspond to pages about as full
* as the user-settable fillfactor parameter says. We can compute it
! * exactly since the index datatype (i.e. hash code of uint32 ) is fixed-width.
*/
! data_width = 4;
item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
sizeof(ItemIdData); /* include the line pointer */
ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width;
***************
*** 785,791 **** _hash_splitbucket(Relation rel,
OffsetNumber omaxoffnum;
Page opage;
Page npage;
! TupleDesc itupdesc = RelationGetDescr(rel);
/*
* It should be okay to simultaneously write-lock pages from each bucket,
--- 783,789 ----
OffsetNumber omaxoffnum;
Page opage;
Page npage;
!
/*
* It should be okay to simultaneously write-lock pages from each bucket,
***************
*** 848,861 **** _hash_splitbucket(Relation rel,
/*
* Re-hash the tuple to determine which bucket it now belongs in.
*
! * It is annoying to call the hash function while holding locks, but
! * releasing and relocking the page for each tuple is unappealing too.
*/
itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum));
! datum = index_getattr(itup, 1, itupdesc, &null);
! Assert(!null);
!
! bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum),
maxbucket, highmask, lowmask);
if (bucket == nbucket)
--- 846,856 ----
/*
* Re-hash the tuple to determine which bucket it now belongs in.
*
! * We needn't call hash function since we store hash code in the index tuple
*/
itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum));
! datum = _hash_get_datum(itup);
! bucket = _hash_hashkey2bucket(DatumGetUInt32(datum),
maxbucket, highmask, lowmask);
if (bucket == nbucket)
*** a/src/backend/access/hash/hashsearch.c
--- b/src/backend/access/hash/hashsearch.c
***************
*** 178,183 **** _hash_first(IndexScanDesc scan, ScanDirection dir)
--- 178,184 ----
hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
cur->sk_subtype);
+ so->hashso_sk_hash = hashkey;
/*
* Acquire shared split lock so we can compute the target bucket safely
* (see README).
***************
*** 289,370 **** _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
* continue to step through tuples until: 1) we get to the end of the
* bucket chain or 2) we find a valid tuple.
*/
! do
! {
! switch (dir)
{
! case ForwardScanDirection:
! if (offnum != InvalidOffsetNumber)
! offnum = OffsetNumberNext(offnum); /* move forward */
! else
! offnum = FirstOffsetNumber; /* new page */
!
! while (offnum > maxoff)
! {
! /*
! * either this page is empty (maxoff ==
! * InvalidOffsetNumber) or we ran off the end.
! */
! _hash_readnext(rel, &buf, &page, &opaque);
! if (BufferIsValid(buf))
! {
! maxoff = PageGetMaxOffsetNumber(page);
! offnum = FirstOffsetNumber;
! }
! else
! {
! /* end of bucket */
! maxoff = offnum = InvalidOffsetNumber;
! break; /* exit while */
! }
! }
! break;
!
! case BackwardScanDirection:
! if (offnum != InvalidOffsetNumber)
! offnum = OffsetNumberPrev(offnum); /* move back */
! else
! offnum = maxoff; /* new page */
!
! while (offnum < FirstOffsetNumber)
! {
! /*
! * either this page is empty (offnum ==
! * InvalidOffsetNumber) or we ran off the end.
! */
! _hash_readprev(rel, &buf, &page, &opaque);
! if (BufferIsValid(buf))
! maxoff = offnum = PageGetMaxOffsetNumber(page);
! else
! {
! /* end of bucket */
! maxoff = offnum = InvalidOffsetNumber;
! break; /* exit while */
! }
! }
! break;
!
! default:
! /* NoMovementScanDirection */
! /* this should not be reached */
! break;
}
! /* we ran off the end of the world without finding a match */
! if (offnum == InvalidOffsetNumber)
{
! *bufP = so->hashso_curbuf = InvalidBuffer;
! ItemPointerSetInvalid(current);
! return false;
}
!
! /* get ready to check this tuple */
! itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
! } while (!_hash_checkqual(scan, itup));
!
! /* if we made it to here, we've found a valid tuple */
! blkno = BufferGetBlockNumber(buf);
! *bufP = so->hashso_curbuf = buf;
! ItemPointerSet(current, blkno, offnum);
! return true;
}
--- 290,340 ----
* continue to step through tuples until: 1) we get to the end of the
* bucket chain or 2) we find a valid tuple.
*/
! for (;;)
! {
! if (offnum == InvalidOffsetNumber)
! {
! /*
! * This is the first time we're scanning this particular
! * page of the bucket, so jump to the right spot via
! * binary search.
! */
! offnum = _hash_binsearch(page, so->hashso_sk_hash);
! }
! else
{
! /* Advance to the next tuple */
! offnum = OffsetNumberNext(offnum);
}
! if (offnum <= maxoff) itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
! if (offnum <= maxoff && _hash_checkqual(scan, itup))
{
! /* Found a matching tuple */
! *bufP = so->hashso_curbuf = buf;
! ItemPointerSet(current, BufferGetBlockNumber(buf), offnum);
! return true;
}
! else
! {
! /* No more matches on this page, so go on to next page */
! if (ScanDirectionIsForward(dir))
! _hash_readnext(rel, &buf, &page, &opaque);
! else
! _hash_readprev(rel, &buf, &page, &opaque);
!
! if (BufferIsValid(buf))
! {
! maxoff = PageGetMaxOffsetNumber(page);
! offnum = InvalidOffsetNumber;
! }
! else
! {
! /* Ran out of pages, so we're done */
! *bufP = so->hashso_curbuf = InvalidBuffer;
! ItemPointerSetInvalid(current);
! return false;
! }
! }
! }
}
*** a/src/backend/access/hash/hashutil.c
--- b/src/backend/access/hash/hashutil.c
***************
*** 20,53 ****
#include "executor/execdebug.h"
#include "storage/bufmgr.h"
#include "utils/lsyscache.h"
/*
* _hash_checkqual -- does the index tuple satisfy the scan conditions?
*/
bool
_hash_checkqual(IndexScanDesc scan, IndexTuple itup)
{
- TupleDesc tupdesc = RelationGetDescr(scan->indexRelation);
ScanKey key = scan->keyData;
int scanKeySize = scan->numberOfKeys;
IncrIndexProcessed();
while (scanKeySize > 0)
{
- Datum datum;
- bool isNull;
Datum test;
!
! datum = index_getattr(itup,
! key->sk_attno,
! tupdesc,
! &isNull);
/* assume sk_func is strict */
- if (isNull)
- return false;
if (key->sk_flags & SK_ISNULL)
return false;
--- 20,59 ----
#include "executor/execdebug.h"
#include "storage/bufmgr.h"
#include "utils/lsyscache.h"
+ #include "utils/memutils.h"
+ #include "catalog/pg_type.h"
/*
+ * hardcoded tuple descriptors. see include/access/hash.h
+ */
+ static FormData_pg_attribute Desc_hash[1] = {Schema_Hash};
+ /*
* _hash_checkqual -- does the index tuple satisfy the scan conditions?
*/
bool
_hash_checkqual(IndexScanDesc scan, IndexTuple itup)
{
ScanKey key = scan->keyData;
int scanKeySize = scan->numberOfKeys;
+ Datum datum;
+ HashScanOpaque so = scan->opaque;
IncrIndexProcessed();
+
+ datum = _hash_get_datum(itup);
+ if( so->hashso_sk_hash != DatumGetUInt32(datum) )
+ return false;
+ key++;
+ scanKeySize--;
while (scanKeySize > 0)
{
Datum test;
!
! datum = _hash_get_datum(itup);
/* assume sk_func is strict */
if (key->sk_flags & SK_ISNULL)
return false;
***************
*** 221,223 **** hashoptions(PG_FUNCTION_ARGS)
--- 227,326 ----
PG_RETURN_BYTEA_P(result);
PG_RETURN_NULL();
}
+
+ /*
+ * _get_hash_desc - get the hash index tuple descriptor
+ *
+ * The hash index tuple descriptor is a hard-coded tuple descriptor with only int32 attribute.
+ */
+ TupleDesc _get_hash_desc()
+ {
+ static TupleDesc hashdesc = NULL;
+
+ /* Already done? */
+ if (hashdesc == NULL){
+ /*
+ * It's the same with BuildHardcodedDescriptor() in relcache.c to build
+ * a hard-coded tuple descriptor.
+ */
+ MemoryContext oldcxt;
+
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+
+ hashdesc = CreateTemplateTupleDesc(1, false);
+ hashdesc->tdtypeid = INT4OID;
+ hashdesc->tdtypmod = -1;
+ memcpy(hashdesc->attrs[0], &Desc_hash[0], ATTRIBUTE_TUPLE_SIZE);
+ hashdesc->attrs[0]->attcacheoff = 0;
+
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ return hashdesc;
+
+ }
+
+ /*
+ * _hash_get_datum - get the hash index tuple's first column datum (i.e. hash code)
+ */
+ Datum _hash_get_datum(IndexTuple itup)
+ {
+ return fetch_att( (char *) (itup) + IndexInfoFindDataOffset((itup)->t_info) ,
+ true,
+ 4);
+
+ }
+ /*
+ * _hash_form_tuple - form a tuple with hash code only
+ */
+ IndexTuple _hash_form_tuple(Relation rel, Datum* values, bool* isnull)
+ {
+ TupleDesc hashdesc;
+ IndexTuple itup;
+ uint32 hashkey;
+
+ hashdesc = _get_hash_desc();
+ hashkey = _hash_datum2hashkey(rel, values[0]);
+ values[0] = UInt32GetDatum(hashkey);
+ itup = index_form_tuple(hashdesc, values, isnull);
+ return itup;
+ }
+
+ /*
+ * _hash_binsearch - Return the offset number in the page where the specified hash value
+ * should be located.
+ *
+ * The return value might exceed the page's max offset
+ * if the hash value is greater than any hash in the page.
+ */
+ OffsetNumber
+ _hash_binsearch(Page page, uint32 hash_value)
+ {
+ OffsetNumber upper;
+ OffsetNumber lower;
+
+ upper = PageGetMaxOffsetNumber(page) + 1;
+ lower = FirstOffsetNumber;
+
+ while (upper > lower)
+ {
+ IndexTuple pos;
+ OffsetNumber off;
+ uint32 hashkey;
+ Datum datum;
+ bool isNull;
+
+ off = (upper + lower) / 2;
+ Assert(OffsetNumberIsValid(off));
+
+ pos = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
+ datum = _hash_get_datum(pos);
+ hashkey = DatumGetUInt32(datum);
+ if (hashkey < hash_value)
+ lower = off + 1;
+ else
+ upper = off;
+ }
+
+ return upper;
+ }
*** a/src/backend/utils/sort/tuplesort.c
--- b/src/backend/utils/sort/tuplesort.c
***************
*** 456,465 **** static int comparetup_index_btree(const SortTuple *a, const SortTuple *b,
--- 456,468 ----
static int comparetup_index_hash(const SortTuple *a, const SortTuple *b,
Tuplesortstate *state);
static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup);
+ static void copytup_index_hash(Tuplesortstate *state, SortTuple *stup, void *tup);
static void writetup_index(Tuplesortstate *state, int tapenum,
SortTuple *stup);
static void readtup_index(Tuplesortstate *state, SortTuple *stup,
int tapenum, unsigned int len);
+ static void readtup_index_hash(Tuplesortstate *state, SortTuple *stup,
+ int tapenum, unsigned int len);
static void reversedirection_index_btree(Tuplesortstate *state);
static void reversedirection_index_hash(Tuplesortstate *state);
static int comparetup_datum(const SortTuple *a, const SortTuple *b,
***************
*** 682,690 **** tuplesort_begin_index_hash(Relation indexRel,
state->nKeys = 1; /* Only one sort column, the hash code */
state->comparetup = comparetup_index_hash;
! state->copytup = copytup_index;
state->writetup = writetup_index;
! state->readtup = readtup_index;
state->reversedirection = reversedirection_index_hash;
state->indexRel = indexRel;
--- 685,693 ----
state->nKeys = 1; /* Only one sort column, the hash code */
state->comparetup = comparetup_index_hash;
! state->copytup = copytup_index_hash;
state->writetup = writetup_index;
! state->readtup = readtup_index_hash;
state->reversedirection = reversedirection_index_hash;
state->indexRel = indexRel;
***************
*** 2822,2830 **** comparetup_index_hash(const SortTuple *a, const SortTuple *b,
Tuplesortstate *state)
{
/*
! * It's slightly annoying to redo the hash function each time, although
! * most hash functions ought to be cheap. Is it worth having a variant
! * tuple storage format so we can store the hash code?
*/
uint32 hash1;
uint32 hash2;
--- 2825,2831 ----
Tuplesortstate *state)
{
/*
! * we needn't redo hash function each time since we've stored it.
*/
uint32 hash1;
uint32 hash2;
***************
*** 2836,2845 **** comparetup_index_hash(const SortTuple *a, const SortTuple *b,
/* Compute hash codes and mask off bits we don't want to sort by */
Assert(!a->isnull1);
! hash1 = DatumGetUInt32(FunctionCall1(state->hash_proc, a->datum1))
& state->hash_mask;
Assert(!b->isnull1);
! hash2 = DatumGetUInt32(FunctionCall1(state->hash_proc, b->datum1))
& state->hash_mask;
if (hash1 > hash2)
--- 2837,2846 ----
/* Compute hash codes and mask off bits we don't want to sort by */
Assert(!a->isnull1);
! hash1 = DatumGetUInt32(a->datum1)
& state->hash_mask;
Assert(!b->isnull1);
! hash2 = DatumGetUInt32(b->datum1)
& state->hash_mask;
if (hash1 > hash2)
***************
*** 2894,2899 **** copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup)
--- 2895,2916 ----
}
static void
+ copytup_index_hash(Tuplesortstate *state, SortTuple *stup, void *tup)
+ {
+ IndexTuple tuple = (IndexTuple) tup;
+ unsigned int tuplen = IndexTupleSize(tuple);
+ IndexTuple newtuple;
+
+ /* copy the tuple into sort storage */
+ newtuple = (IndexTuple) palloc(tuplen);
+ memcpy(newtuple, tuple, tuplen);
+ USEMEM(state, GetMemoryChunkSpace(newtuple));
+ stup->tuple = (void *) newtuple;
+ /* set up first-column key value(i.e. hash code) */
+ stup->datum1 = _hash_get_datum(newtuple);
+ stup->isnull1 = false;
+ }
+ static void
writetup_index(Tuplesortstate *state, int tapenum, SortTuple *stup)
{
IndexTuple tuple = (IndexTuple) stup->tuple;
***************
*** 2936,2941 **** readtup_index(Tuplesortstate *state, SortTuple *stup,
--- 2953,2979 ----
}
static void
+ readtup_index_hash(Tuplesortstate *state, SortTuple *stup,
+ int tapenum, unsigned int len)
+ {
+ unsigned int tuplen = len - sizeof(unsigned int);
+ IndexTuple tuple = (IndexTuple) palloc(tuplen);
+
+ USEMEM(state, GetMemoryChunkSpace(tuple));
+ if (LogicalTapeRead(state->tapeset, tapenum, (void *) tuple,
+ tuplen) != tuplen)
+ elog(ERROR, "unexpected end of data");
+ if (state->randomAccess) /* need trailing length word? */
+ if (LogicalTapeRead(state->tapeset, tapenum, (void *) &tuplen,
+ sizeof(tuplen)) != sizeof(tuplen))
+ elog(ERROR, "unexpected end of data");
+ stup->tuple = (void *) tuple;
+ /* set up first-column key value(i.e. hash code) */
+ stup->datum1 = _hash_get_datum(tuple);
+ stup->isnull1 = false;
+ }
+
+ static void
reversedirection_index_btree(Tuplesortstate *state)
{
ScanKey scanKey = state->indexScanKey;
*** a/src/include/access/hash.h
--- b/src/include/access/hash.h
***************
*** 100,105 **** typedef struct HashScanOpaqueData
--- 100,107 ----
/* Current and marked position of the scan */
ItemPointerData hashso_curpos;
ItemPointerData hashso_mrkpos;
+ /* Hash value of the scan key */
+ uint32 hashso_sk_hash;
} HashScanOpaqueData;
typedef HashScanOpaqueData *HashScanOpaque;
***************
*** 227,233 **** typedef HashMetaPageData *HashMetaPage;
*/
#define HASHPROC 1
!
/* public routines */
extern Datum hashbuild(PG_FUNCTION_ARGS);
--- 229,239 ----
*/
#define HASHPROC 1
! /*
! * hard-coded hash desc
! */
! #define Schema_Hash \
! { 0, {"hashcode"}, 23, -1, 4, 1, 0, -1, -1, true, 'p', 'i', false, false, false, true, 0 }
/* public routines */
extern Datum hashbuild(PG_FUNCTION_ARGS);
***************
*** 330,335 **** extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
--- 336,345 ----
uint32 highmask, uint32 lowmask);
extern uint32 _hash_log2(uint32 num);
extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
+ extern TupleDesc _get_hash_desc();
+ extern Datum _hash_get_datum(IndexTuple itup);
+ extern IndexTuple _hash_form_tuple(Relation rel, Datum* values, bool* isnull);
+ OffsetNumber _hash_binsearch(Page page, uint32 hash_value);
/* hash.c */
extern void hash_redo(XLogRecPtr lsn, XLogRecord *record);
--
Sent via pgsql-patches mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-patches