Hi, hackers.
I'm working on my gsoc project of improving hash index.
I posted a thread to ask some questions about implementation a few hours ago.
http://archives.postgresql.org/pgsql-hackers/2008-07/msg00721.php

Someone's advice reminds me of the problem of design decision again.
I  store hash index tuple as a regular index tuple currently.  An
alternative design is to totally change the layout of hash index tuple
(using a different structure).
Here is my design.
We convert hash code into a datum using UINT32GetDatum() and store it
in a regular index tuple.
When we check the tuple, we get the hashkey back using DatumGetUINT32.
We also need a TupleDesc with uint32 attribute. I don't know how to
create it. Alternatively, I create a TupleDesc with int32 attribute.
The hash item should be in order to support binary search later. I
haven't implement it because I'd like to finish the first available
one ASAP.

I post a patch below. The patch didn't passed regression test yet. I
just posted it here to show my design.
Any comments are welcome.

diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 6a5c000..fbbd7dc 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -129,7 +129,11 @@ hashbuildCallback(Relation index,
IndexTuple itup;

/* form an index tuple and point it at the heap tuple */
+#ifdef HASHVALUE_ONLY
+    itup = _hash_form_tuple(index, values,isnull);
+#else
itup = index_form_tuple(RelationGetDescr(index), values, isnull);
+#endif
itup->t_tid = htup->t_self;

/* Hash indexes don't index nulls, see notes in hashinsert */
@@ -171,7 +175,12 @@ hashinsert(PG_FUNCTION_ARGS)
IndexTuple itup;

/* generate an index tuple */
+#ifdef HASHVALUE_ONLY
+    itup = _hash_form_tuple(rel,values,isnull);
+#else
itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
+#endif
+
itup->t_tid = *ht_ctid;

/*
@@ -212,7 +221,11 @@ hashgettuple(PG_FUNCTION_ARGS)
bool res;

/* Hash indexes are never lossy (at the moment anyway) */
- scan->xs_recheck = false;
+#ifdef HASHVALUE_ONLY
+ scan->xs_recheck = true;
+#else
+ scan->xs_recheck = false;
+#endif

/*
* We hold pin but not lock on current buffer while outside the hash AM.
diff --git a/src/backend/access/hash/hashpage.c
b/src/backend/access/hash/hashpage.c
index b0b5874..068337e 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -785,7 +785,7 @@ _hash_splitbucket(Relation rel,
OffsetNumber omaxoffnum;
Page opage;
Page npage;
- TupleDesc itupdesc = RelationGetDescr(rel);
+ TupleDesc itupdesc = _create_hash_desc();

/*
* It should be okay to simultaneously write-lock pages from each bucket,
@@ -854,9 +854,13 @@ _hash_splitbucket(Relation rel,
itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum));
datum = index_getattr(itup, 1, itupdesc, &null);
Assert(!null);
-
+#ifdef HASHVALUE_ONLY
+ bucket = _hash_hashkey2bucket(DatumGetUInt32(datum),
+  maxbucket, highmask, lowmask);
+#else
bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum),
 maxbucket, highmask, lowmask);
+#endif

if (bucket == nbucket)
{
diff --git a/src/backend/access/hash/hashsearch.c
b/src/backend/access/hash/hashsearch.c
index 258526b..5e7668d 100644
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -177,7 +177,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
else
hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
  cur->sk_subtype);
-
+    so->hashso_sk_hash = hashkey;
/*
* Acquire shared split lock so we can compute the target bucket safely
* (see README).
diff --git a/src/backend/access/hash/hashutil.c
b/src/backend/access/hash/hashutil.c
index 41e2eef..4be814e 100644
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@@ -20,6 +20,8 @@
#include "executor/execdebug.h"
#include "storage/bufmgr.h"
#include "utils/lsyscache.h"
+#include "utils/typcache.h"
+#include "catalog/pg_type.h"


/*
@@ -28,16 +30,29 @@
bool
_hash_checkqual(IndexScanDesc scan, IndexTuple itup)
{
- TupleDesc tupdesc = RelationGetDescr(scan->indexRelation);
+ TupleDesc tupdesc = _create_hash_desc();
ScanKey key = scan->keyData;
int scanKeySize = scan->numberOfKeys;
+    Datum datum;
+    bool isNull;
+    HashScanOpaque      so = scan->opaque;

IncrIndexProcessed();

+#ifdef HASHVALUE_ONLY
+    datum = index_getattr(itup,
+                          key->sk_attno,
+                          tupdesc,
+                          &isNull);
+    if( so->hashso_sk_hash != DatumGetInt32(datum) )
+        return false;
+
+    key++;
+    scanKeySize--;
+#endif
+
while (scanKeySize > 0)
{
- Datum datum;
- bool isNull;
Datum test;

datum = index_getattr(itup,
@@ -50,7 +65,7 @@ _hash_checkqual(IndexScanDesc scan, IndexTuple itup)
return false;
if (key->sk_flags & SK_ISNULL)
return false;
-
+
test = FunctionCall2(&key->sk_func, datum, key->sk_argument);

if (!DatumGetBool(test))
@@ -222,3 +237,31 @@ hashoptions(PG_FUNCTION_ARGS)
PG_RETURN_BYTEA_P(result);
PG_RETURN_NULL();
}
+
+/*
+ * _create_hash_desc - create a hash TupleDesc
+ *
+ * the TupleDesc is with int32 attribute, not uint32 because we've no
pg_type with UINT4OID
+ */
+TupleDesc _create_hash_desc()
+{
+    TupleDesc tupdesc = CreateTemplateTupleDesc(1, false);
+    TupleDescInitEntry(tupdesc, 1, "hashcode", INT4OID, -1, 0);
+    return tupdesc;
+}
+
+/*
+ * _hash_form_tuple - form a tuple with hash code only
+ */
+IndexTuple _hash_form_tuple(Relation rel, Datum* values, bool* isnull)
+{
+    TupleDesc       hashdesc;
+    IndexTuple      itup;
+    uint32          hashkey;
+
+    hashdesc = _create_hash_desc();
+    hashkey = _hash_datum2hashkey(index, values[0]);
+    values[0] = Int32GetDatum(hashkey);
+    itup = index_form_tuple(hashdesc, values, isnull);
+    return itup;
+}
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index ab0824d..0cf5ad8 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -100,6 +100,8 @@ typedef struct HashScanOpaqueData
/* Current and marked position of the scan */
ItemPointerData hashso_curpos;
ItemPointerData hashso_mrkpos;
+    /* Hash value of the scan key */
+    int32      hashso_sk_hash;
} HashScanOpaqueData;

typedef HashScanOpaqueData *HashScanOpaque;
@@ -227,6 +229,10 @@ typedef HashMetaPageData *HashMetaPage;
 */
#define HASHPROC 1

+/*
+ * store hash value only in the bucket
+ */
+#define HASHVALUE_ONLY

/* public routines */

@@ -330,6 +336,8 @@ extern Bucket _hash_hashkey2bucket(uint32 hashkey,
uint32 maxbucket,
uint32 highmask, uint32 lowmask);
extern uint32 _hash_log2(uint32 num);
extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
+extern TupleDesc _create_hash_desc();
+extern IndexTuple _hash_form_tuple(Relation rel, Datum* values, bool* isnull);

/* hash.c */
extern void hash_redo(XLogRecPtr lsn, XLogRecord *record);

--
Best Regards,
Xiao Meng

DKERC, Harbin Institute of Technology, China
Gtalk: [EMAIL PROTECTED]
MSN: [EMAIL PROTECTED]
http://xiaomeng.yo2.cn
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 6a5c000..fbbd7dc 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -129,7 +129,11 @@ hashbuildCallback(Relation index,
 	IndexTuple	itup;
 
 	/* form an index tuple and point it at the heap tuple */
+#ifdef HASHVALUE_ONLY
+    itup = _hash_form_tuple(index, values,isnull);
+#else
 	itup = index_form_tuple(RelationGetDescr(index), values, isnull);
+#endif
 	itup->t_tid = htup->t_self;
 
 	/* Hash indexes don't index nulls, see notes in hashinsert */
@@ -171,7 +175,12 @@ hashinsert(PG_FUNCTION_ARGS)
 	IndexTuple	itup;
 
 	/* generate an index tuple */
+#ifdef HASHVALUE_ONLY
+    itup = _hash_form_tuple(rel,values,isnull);
+#else
 	itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
+#endif
+
 	itup->t_tid = *ht_ctid;
 
 	/*
@@ -212,7 +221,11 @@ hashgettuple(PG_FUNCTION_ARGS)
 	bool		res;
 
 	/* Hash indexes are never lossy (at the moment anyway) */
-	scan->xs_recheck = false;
+#ifdef HASHVALUE_ONLY
+	scan->xs_recheck = true;
+#else
+ 	scan->xs_recheck = false;
+#endif
 
 	/*
 	 * We hold pin but not lock on current buffer while outside the hash AM.
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index b0b5874..068337e 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -785,7 +785,7 @@ _hash_splitbucket(Relation rel,
 	OffsetNumber omaxoffnum;
 	Page		opage;
 	Page		npage;
-	TupleDesc	itupdesc = RelationGetDescr(rel);
+	TupleDesc	itupdesc = _create_hash_desc();
 
 	/*
 	 * It should be okay to simultaneously write-lock pages from each bucket,
@@ -854,9 +854,13 @@ _hash_splitbucket(Relation rel,
 		itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum));
 		datum = index_getattr(itup, 1, itupdesc, &null);
 		Assert(!null);
-
+#ifdef HASHVALUE_ONLY
+		bucket = _hash_hashkey2bucket(DatumGetUInt32(datum),
+									  maxbucket, highmask, lowmask);
+#else
 		bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum),
 									  maxbucket, highmask, lowmask);
+#endif
 
 		if (bucket == nbucket)
 		{
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c
index 258526b..5e7668d 100644
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -177,7 +177,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 	else
 		hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
 										   cur->sk_subtype);
-
+    so->hashso_sk_hash = hashkey;
 	/*
 	 * Acquire shared split lock so we can compute the target bucket safely
 	 * (see README).
diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c
index 41e2eef..4be814e 100644
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@@ -20,6 +20,8 @@
 #include "executor/execdebug.h"
 #include "storage/bufmgr.h"
 #include "utils/lsyscache.h"
+#include "utils/typcache.h"
+#include "catalog/pg_type.h"
 
 
 /*
@@ -28,16 +30,29 @@
 bool
 _hash_checkqual(IndexScanDesc scan, IndexTuple itup)
 {
-	TupleDesc	tupdesc = RelationGetDescr(scan->indexRelation);
+	TupleDesc	tupdesc = _create_hash_desc();
 	ScanKey		key = scan->keyData;
 	int			scanKeySize = scan->numberOfKeys;
+    Datum		datum;
+    bool		isNull;
+    HashScanOpaque      so = scan->opaque;
 
 	IncrIndexProcessed();
 
+#ifdef HASHVALUE_ONLY
+    datum = index_getattr(itup,
+                          key->sk_attno, 
+                          tupdesc, 
+                          &isNull);
+    if( so->hashso_sk_hash != DatumGetInt32(datum) )
+        return false;
+
+    key++;
+    scanKeySize--;
+#endif
+
 	while (scanKeySize > 0)
 	{
-		Datum		datum;
-		bool		isNull;
 		Datum		test;
 
 		datum = index_getattr(itup,
@@ -50,7 +65,7 @@ _hash_checkqual(IndexScanDesc scan, IndexTuple itup)
 			return false;
 		if (key->sk_flags & SK_ISNULL)
 			return false;
-
+        
 		test = FunctionCall2(&key->sk_func, datum, key->sk_argument);
 
 		if (!DatumGetBool(test))
@@ -222,3 +237,31 @@ hashoptions(PG_FUNCTION_ARGS)
 		PG_RETURN_BYTEA_P(result);
 	PG_RETURN_NULL();
 }
+
+/*
+ * _create_hash_desc - create a hash TupleDesc
+ *
+ * the TupleDesc is with int32 attribute, not uint32 because we've no pg_type with UINT4OID
+ */
+TupleDesc _create_hash_desc()
+{
+    TupleDesc tupdesc = CreateTemplateTupleDesc(1, false);
+    TupleDescInitEntry(tupdesc, 1, "hashcode", INT4OID, -1, 0);
+    return tupdesc;
+}
+
+/*
+ * _hash_form_tuple - form a tuple with hash code only 
+ */
+IndexTuple _hash_form_tuple(Relation rel, Datum* values, bool* isnull)
+{
+    TupleDesc       hashdesc;
+    IndexTuple      itup;
+    uint32          hashkey;
+
+    hashdesc = _create_hash_desc();
+    hashkey = _hash_datum2hashkey(index, values[0]);
+    values[0] = Int32GetDatum(hashkey);
+    itup = index_form_tuple(hashdesc, values, isnull);
+    return itup;
+}
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index ab0824d..0cf5ad8 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -100,6 +100,8 @@ typedef struct HashScanOpaqueData
 	/* Current and marked position of the scan */
 	ItemPointerData hashso_curpos;
 	ItemPointerData hashso_mrkpos;
+    /* Hash value of the scan key */
+    int32      hashso_sk_hash;
 } HashScanOpaqueData;
 
 typedef HashScanOpaqueData *HashScanOpaque;
@@ -227,6 +229,10 @@ typedef HashMetaPageData *HashMetaPage;
  */
 #define HASHPROC		1
 
+/*
+ * store hash value only in the bucket
+ */
+#define HASHVALUE_ONLY
 
 /* public routines */
 
@@ -330,6 +336,8 @@ extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
 					 uint32 highmask, uint32 lowmask);
 extern uint32 _hash_log2(uint32 num);
 extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
+extern TupleDesc _create_hash_desc();
+extern IndexTuple _hash_form_tuple(Relation rel, Datum* values, bool* isnull);
 
 /* hash.c */
 extern void hash_redo(XLogRecPtr lsn, XLogRecord *record);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to