[HACKERS] [PATCH] cleanup hashindex for pg_migrator hashindex compat mode (for 8.4)

Zdenek Kotala Fri, 22 May 2009 13:23:48 -0700

Attached patch cleanups hash index headers to allow compile hasham for
8.3 version. It helps to improve pg_migrator with capability to migrate
database with hash index without reindexing.


I discussed this patch year ago with Alvaro when we tried to cleanup
include bloating problem. It should reduce also number of including.

The main point is that hash functions for datatypes are now in related
data files in utils/adt directory. hash_any() and hash_uint32 it now in
utils/hashfunc.c.

It would be nice to have this in 8.4 because it allows to test index
migration functionality.

                Thanks Zdenek

diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/access/hash/hashfunc.c pgsql_indexcompat/src/backend/access/hash/hashfunc.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/access/hash/hashfunc.c	2009-05-22 15:56:34.409314434 -0400
--- pgsql_indexcompat/src/backend/access/hash/hashfunc.c	1969-12-31 19:00:00.000000000 -0500
***************
*** 1,528 ****
- /*-------------------------------------------------------------------------
-  *
-  * hashfunc.c
-  *	  Support functions for hash access method.
-  *
-  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
-  * Portions Copyright (c) 1994, Regents of the University of California
-  *
-  *
-  * IDENTIFICATION
-  *	  $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.57 2009/01/01 17:23:35 momjian Exp $
-  *
-  * NOTES
-  *	  These functions are stored in pg_amproc.	For each operator class
-  *	  defined for hash indexes, they compute the hash value of the argument.
-  *
-  *	  Additional hash functions appear in /utils/adt/ files for various
-  *	  specialized datatypes.
-  *
-  *	  It is expected that every bit of a hash function's 32-bit result is
-  *	  as random as every other; failure to ensure this is likely to lead
-  *	  to poor performance of hash joins, for example.  In most cases a hash
-  *	  function should use hash_any() or its variant hash_uint32().
-  *-------------------------------------------------------------------------
-  */
- 
- #include "postgres.h"
- 
- #include "access/hash.h"
- 
- 
- /* Note: this is used for both "char" and boolean datatypes */
- Datum
- hashchar(PG_FUNCTION_ARGS)
- {
- 	return hash_uint32((int32) PG_GETARG_CHAR(0));
- }
- 
- Datum
- hashint2(PG_FUNCTION_ARGS)
- {
- 	return hash_uint32((int32) PG_GETARG_INT16(0));
- }
- 
- Datum
- hashint4(PG_FUNCTION_ARGS)
- {
- 	return hash_uint32(PG_GETARG_INT32(0));
- }
- 
- Datum
- hashint8(PG_FUNCTION_ARGS)
- {
- 	/*
- 	 * The idea here is to produce a hash value compatible with the values
- 	 * produced by hashint4 and hashint2 for logically equal inputs; this is
- 	 * necessary to support cross-type hash joins across these input types.
- 	 * Since all three types are signed, we can xor the high half of the int8
- 	 * value if the sign is positive, or the complement of the high half when
- 	 * the sign is negative.
- 	 */
- #ifndef INT64_IS_BUSTED
- 	int64		val = PG_GETARG_INT64(0);
- 	uint32		lohalf = (uint32) val;
- 	uint32		hihalf = (uint32) (val >> 32);
- 
- 	lohalf ^= (val >= 0) ? hihalf : ~hihalf;
- 
- 	return hash_uint32(lohalf);
- #else
- 	/* here if we can't count on "x >> 32" to work sanely */
- 	return hash_uint32((int32) PG_GETARG_INT64(0));
- #endif
- }
- 
- Datum
- hashoid(PG_FUNCTION_ARGS)
- {
- 	return hash_uint32((uint32) PG_GETARG_OID(0));
- }
- 
- Datum
- hashenum(PG_FUNCTION_ARGS)
- {
- 	return hash_uint32((uint32) PG_GETARG_OID(0));
- }
- 
- Datum
- hashfloat4(PG_FUNCTION_ARGS)
- {
- 	float4		key = PG_GETARG_FLOAT4(0);
- 	float8		key8;
- 
- 	/*
- 	 * On IEEE-float machines, minus zero and zero have different bit patterns
- 	 * but should compare as equal.  We must ensure that they have the same
- 	 * hash value, which is most reliably done this way:
- 	 */
- 	if (key == (float4) 0)
- 		PG_RETURN_UINT32(0);
- 
- 	/*
- 	 * To support cross-type hashing of float8 and float4, we want to return
- 	 * the same hash value hashfloat8 would produce for an equal float8 value.
- 	 * So, widen the value to float8 and hash that.  (We must do this rather
- 	 * than have hashfloat8 try to narrow its value to float4; that could fail
- 	 * on overflow.)
- 	 */
- 	key8 = key;
- 
- 	return hash_any((unsigned char *) &key8, sizeof(key8));
- }
- 
- Datum
- hashfloat8(PG_FUNCTION_ARGS)
- {
- 	float8		key = PG_GETARG_FLOAT8(0);
- 
- 	/*
- 	 * On IEEE-float machines, minus zero and zero have different bit patterns
- 	 * but should compare as equal.  We must ensure that they have the same
- 	 * hash value, which is most reliably done this way:
- 	 */
- 	if (key == (float8) 0)
- 		PG_RETURN_UINT32(0);
- 
- 	return hash_any((unsigned char *) &key, sizeof(key));
- }
- 
- Datum
- hashoidvector(PG_FUNCTION_ARGS)
- {
- 	oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
- 
- 	return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
- }
- 
- Datum
- hashint2vector(PG_FUNCTION_ARGS)
- {
- 	int2vector *key = (int2vector *) PG_GETARG_POINTER(0);
- 
- 	return hash_any((unsigned char *) key->values, key->dim1 * sizeof(int2));
- }
- 
- Datum
- hashname(PG_FUNCTION_ARGS)
- {
- 	char	   *key = NameStr(*PG_GETARG_NAME(0));
- 	int			keylen = strlen(key);
- 
- 	Assert(keylen < NAMEDATALEN);		/* else it's not truncated correctly */
- 
- 	return hash_any((unsigned char *) key, keylen);
- }
- 
- Datum
- hashtext(PG_FUNCTION_ARGS)
- {
- 	text	   *key = PG_GETARG_TEXT_PP(0);
- 	Datum		result;
- 
- 	/*
- 	 * Note: this is currently identical in behavior to hashvarlena, but keep
- 	 * it as a separate function in case we someday want to do something
- 	 * different in non-C locales.	(See also hashbpchar, if so.)
- 	 */
- 	result = hash_any((unsigned char *) VARDATA_ANY(key),
- 					  VARSIZE_ANY_EXHDR(key));
- 
- 	/* Avoid leaking memory for toasted inputs */
- 	PG_FREE_IF_COPY(key, 0);
- 
- 	return result;
- }
- 
- /*
-  * hashvarlena() can be used for any varlena datatype in which there are
-  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
-  */
- Datum
- hashvarlena(PG_FUNCTION_ARGS)
- {
- 	struct varlena *key = PG_GETARG_VARLENA_PP(0);
- 	Datum		result;
- 
- 	result = hash_any((unsigned char *) VARDATA_ANY(key),
- 					  VARSIZE_ANY_EXHDR(key));
- 
- 	/* Avoid leaking memory for toasted inputs */
- 	PG_FREE_IF_COPY(key, 0);
- 
- 	return result;
- }
- 
- /*
-  * This hash function was written by Bob Jenkins
-  * ([email protected]), and superficially adapted
-  * for PostgreSQL by Neil Conway. For more information on this
-  * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
-  * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
-  *
-  * In the current code, we have adopted Bob's 2006 update of his hash
-  * function to fetch the data a word at a time when it is suitably aligned.
-  * This makes for a useful speedup, at the cost of having to maintain
-  * four code paths (aligned vs unaligned, and little-endian vs big-endian).
-  * It also uses two separate mixing functions mix() and final(), instead
-  * of a slower multi-purpose function.
-  */
- 
- /* Get a bit mask of the bits set in non-uint32 aligned addresses */
- #define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
- 
- /* Rotate a uint32 value left by k bits - note multiple evaluation! */
- #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
- 
- /*----------
-  * mix -- mix 3 32-bit values reversibly.
-  *
-  * This is reversible, so any information in (a,b,c) before mix() is
-  * still in (a,b,c) after mix().
-  *
-  * If four pairs of (a,b,c) inputs are run through mix(), or through
-  * mix() in reverse, there are at least 32 bits of the output that
-  * are sometimes the same for one pair and different for another pair.
-  * This was tested for:
-  * * pairs that differed by one bit, by two bits, in any combination
-  *   of top bits of (a,b,c), or in any combination of bottom bits of
-  *   (a,b,c).
-  * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
-  *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
-  *   is commonly produced by subtraction) look like a single 1-bit
-  *   difference.
-  * * the base values were pseudorandom, all zero but one bit set, or
-  *   all zero plus a counter that starts at zero.
-  * 
-  * This does not achieve avalanche.  There are input bits of (a,b,c)
-  * that fail to affect some output bits of (a,b,c), especially of a.  The
-  * most thoroughly mixed value is c, but it doesn't really even achieve
-  * avalanche in c. 
-  * 
-  * This allows some parallelism.  Read-after-writes are good at doubling
-  * the number of bits affected, so the goal of mixing pulls in the opposite
-  * direction from the goal of parallelism.  I did what I could.  Rotates
-  * seem to cost as much as shifts on every machine I could lay my hands on,
-  * and rotates are much kinder to the top and bottom bits, so I used rotates.
-  *----------
-  */
- #define mix(a,b,c) \
- { \
-   a -= c;  a ^= rot(c, 4);  c += b; \
-   b -= a;  b ^= rot(a, 6);  a += c; \
-   c -= b;  c ^= rot(b, 8);  b += a; \
-   a -= c;  a ^= rot(c,16);  c += b; \
-   b -= a;  b ^= rot(a,19);  a += c; \
-   c -= b;  c ^= rot(b, 4);  b += a; \
- }
- 
- /*----------
-  * final -- final mixing of 3 32-bit values (a,b,c) into c
-  *
-  * Pairs of (a,b,c) values differing in only a few bits will usually
-  * produce values of c that look totally different.  This was tested for
-  * * pairs that differed by one bit, by two bits, in any combination
-  *   of top bits of (a,b,c), or in any combination of bottom bits of
-  *   (a,b,c).
-  * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
-  *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
-  *   is commonly produced by subtraction) look like a single 1-bit
-  *   difference.
-  * * the base values were pseudorandom, all zero but one bit set, or
-  *   all zero plus a counter that starts at zero.
-  *     
-  * The use of separate functions for mix() and final() allow for a
-  * substantial performance increase since final() does not need to
-  * do well in reverse, but is does need to affect all output bits.
-  * mix(), on the other hand, does not need to affect all output
-  * bits (affecting 32 bits is enough).  The original hash function had
-  * a single mixing operation that had to satisfy both sets of requirements
-  * and was slower as a result.
-  *----------
-  */
- #define final(a,b,c) \
- { \
-   c ^= b; c -= rot(b,14); \
-   a ^= c; a -= rot(c,11); \
-   b ^= a; b -= rot(a,25); \
-   c ^= b; c -= rot(b,16); \
-   a ^= c; a -= rot(c, 4); \
-   b ^= a; b -= rot(a,14); \
-   c ^= b; c -= rot(b,24); \
- }
- 
- /*
-  * hash_any() -- hash a variable-length key into a 32-bit value
-  *		k		: the key (the unaligned variable-length array of bytes)
-  *		len		: the length of the key, counting by bytes
-  *
-  * Returns a uint32 value.	Every bit of the key affects every bit of
-  * the return value.  Every 1-bit and 2-bit delta achieves avalanche.
-  * About 6*len+35 instructions. The best hash table sizes are powers
-  * of 2.  There is no need to do mod a prime (mod is sooo slow!).
-  * If you need less than 32 bits, use a bitmask.
-  *
-  * Note: we could easily change this function to return a 64-bit hash value
-  * by using the final values of both b and c.  b is perhaps a little less
-  * well mixed than c, however.
-  */
- Datum
- hash_any(register const unsigned char *k, register int keylen)
- {
- 	register uint32 a,
- 				b,
- 				c,
- 				len;
- 
- 	/* Set up the internal state */
- 	len = keylen;
- 	a = b = c = 0x9e3779b9 + len + 3923095;
- 
- 	/* If the source pointer is word-aligned, we use word-wide fetches */
- 	if (((long) k & UINT32_ALIGN_MASK) == 0)
- 	{
- 		/* Code path for aligned source data */
- 		register const uint32 *ka = (const uint32 *) k;
- 
- 		/* handle most of the key */
- 		while (len >= 12)
- 		{
- 			a += ka[0];
- 			b += ka[1];
- 			c += ka[2];
- 			mix(a, b, c);
- 			ka += 3;
- 			len -= 12;
- 		}
- 
- 		/* handle the last 11 bytes */
- 		k = (const unsigned char *) ka;
- #ifdef WORDS_BIGENDIAN
- 		switch (len)
- 		{
- 			case 11:
- 				c += ((uint32) k[10] << 8);
- 				/* fall through */
- 			case 10:
- 				c += ((uint32) k[9] << 16);
- 				/* fall through */
- 			case 9:
- 				c += ((uint32) k[8] << 24);
- 				/* the lowest byte of c is reserved for the length */
- 				/* fall through */
- 			case 8:
- 				b += ka[1];
- 				a += ka[0];
- 				break;
- 			case 7:
- 				b += ((uint32) k[6] << 8);
- 				/* fall through */
- 			case 6:
- 				b += ((uint32) k[5] << 16);
- 				/* fall through */
- 			case 5:
- 				b += ((uint32) k[4] << 24);
- 				/* fall through */
- 			case 4:
- 				a += ka[0];
- 				break;
- 			case 3:
- 				a += ((uint32) k[2] << 8);
- 				/* fall through */
- 			case 2:
- 				a += ((uint32) k[1] << 16);
- 				/* fall through */
- 			case 1:
- 				a += ((uint32) k[0] << 24);
- 			/* case 0: nothing left to add */
- 		}
- #else /* !WORDS_BIGENDIAN */
- 		switch (len)
- 		{
- 			case 11:
- 				c += ((uint32) k[10] << 24);
- 				/* fall through */
- 			case 10:
- 				c += ((uint32) k[9] << 16);
- 				/* fall through */
- 			case 9:
- 				c += ((uint32) k[8] << 8);
- 				/* the lowest byte of c is reserved for the length */
- 				/* fall through */
- 			case 8:
- 				b += ka[1];
- 				a += ka[0];
- 				break;
- 			case 7:
- 				b += ((uint32) k[6] << 16);
- 				/* fall through */
- 			case 6:
- 				b += ((uint32) k[5] << 8);
- 				/* fall through */
- 			case 5:
- 				b += k[4];
- 				/* fall through */
- 			case 4:
- 				a += ka[0];
- 				break;
- 			case 3:
- 				a += ((uint32) k[2] << 16);
- 				/* fall through */
- 			case 2:
- 				a += ((uint32) k[1] << 8);
- 				/* fall through */
- 			case 1:
- 				a += k[0];
- 			/* case 0: nothing left to add */
- 		}
- #endif /* WORDS_BIGENDIAN */
- 	}
- 	else
- 	{
- 		/* Code path for non-aligned source data */
- 
- 		/* handle most of the key */
- 		while (len >= 12)
- 		{
- #ifdef WORDS_BIGENDIAN
- 			a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
- 			b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
- 			c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
- #else /* !WORDS_BIGENDIAN */
- 			a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
- 			b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
- 			c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
- #endif /* WORDS_BIGENDIAN */
- 			mix(a, b, c);
- 			k += 12;
- 			len -= 12;
- 		}
- 
- 		/* handle the last 11 bytes */
- #ifdef WORDS_BIGENDIAN
- 		switch (len)			/* all the case statements fall through */
- 		{
- 			case 11:
- 				c += ((uint32) k[10] << 8);
- 			case 10:
- 				c += ((uint32) k[9] << 16);
- 			case 9:
- 				c += ((uint32) k[8] << 24);
- 				/* the lowest byte of c is reserved for the length */
- 			case 8:
- 				b += k[7];
- 			case 7:
- 				b += ((uint32) k[6] << 8);
- 			case 6:
- 				b += ((uint32) k[5] << 16);
- 			case 5:
- 				b += ((uint32) k[4] << 24);
- 			case 4:
- 				a += k[3];
- 			case 3:
- 				a += ((uint32) k[2] << 8);
- 			case 2:
- 				a += ((uint32) k[1] << 16);
- 			case 1:
- 				a += ((uint32) k[0] << 24);
- 			/* case 0: nothing left to add */
- 		}
- #else /* !WORDS_BIGENDIAN */
- 		switch (len)			/* all the case statements fall through */
- 		{
- 			case 11:
- 				c += ((uint32) k[10] << 24);
- 			case 10:
- 				c += ((uint32) k[9] << 16);
- 			case 9:
- 				c += ((uint32) k[8] << 8);
- 				/* the lowest byte of c is reserved for the length */
- 			case 8:
- 				b += ((uint32) k[7] << 24);
- 			case 7:
- 				b += ((uint32) k[6] << 16);
- 			case 6:
- 				b += ((uint32) k[5] << 8);
- 			case 5:
- 				b += k[4];
- 			case 4:
- 				a += ((uint32) k[3] << 24);
- 			case 3:
- 				a += ((uint32) k[2] << 16);
- 			case 2:
- 				a += ((uint32) k[1] << 8);
- 			case 1:
- 				a += k[0];
- 			/* case 0: nothing left to add */
- 		}
- #endif /* WORDS_BIGENDIAN */
- 	}
- 
- 	final(a, b, c);
- 
- 	/* report the result */
- 	return UInt32GetDatum(c);
- }
- 
- /*
-  * hash_uint32() -- hash a 32-bit value
-  *
-  * This has the same result as
-  *		hash_any(&k, sizeof(uint32))
-  * but is faster and doesn't force the caller to store k into memory.
-  */
- Datum
- hash_uint32(uint32 k)
- {
- 	register uint32 a,
- 				b,
- 				c;
- 
- 	a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
- 	a += k;
- 
- 	final(a, b, c);
- 
- 	/* report the result */
- 	return UInt32GetDatum(c);
- }
--- 0 ----
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/access/hash/Makefile pgsql_indexcompat/src/backend/access/hash/Makefile
*** pgsql_indexcompat.5d4d60e3a557/src/backend/access/hash/Makefile	2009-05-22 15:56:34.353808065 -0400
--- pgsql_indexcompat/src/backend/access/hash/Makefile	2009-05-22 15:56:34.409876088 -0400
***************
*** 12,18 ****
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = hash.o hashfunc.o hashinsert.o hashovfl.o hashpage.o hashscan.o \
         hashsearch.o hashsort.o hashutil.o
  
  include $(top_srcdir)/src/backend/common.mk
--- 12,18 ----
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = hash.o hashinsert.o hashovfl.o hashpage.o hashscan.o \
         hashsearch.o hashsort.o hashutil.o
  
  include $(top_srcdir)/src/backend/common.mk
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/nodes/bitmapset.c pgsql_indexcompat/src/backend/nodes/bitmapset.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/nodes/bitmapset.c	2009-05-22 15:56:34.355138078 -0400
--- pgsql_indexcompat/src/backend/nodes/bitmapset.c	2009-05-22 15:56:34.410159011 -0400
***************
*** 21,27 ****
  #include "postgres.h"
  
  #include "nodes/bitmapset.h"
! #include "access/hash.h"
  
  
  #define WORDNUM(x)	((x) / BITS_PER_BITMAPWORD)
--- 21,27 ----
  #include "postgres.h"
  
  #include "nodes/bitmapset.h"
! #include "utils/hashfunc.h"
  
  
  #define WORDNUM(x)	((x) / BITS_PER_BITMAPWORD)
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/tsearch/ts_typanalyze.c pgsql_indexcompat/src/backend/tsearch/ts_typanalyze.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/tsearch/ts_typanalyze.c	2009-05-22 15:56:34.355928139 -0400
--- pgsql_indexcompat/src/backend/tsearch/ts_typanalyze.c	2009-05-22 15:56:34.410422878 -0400
***************
*** 13,24 ****
   */
  #include "postgres.h"
  
- #include "access/hash.h"
  #include "catalog/pg_operator.h"
  #include "commands/vacuum.h"
  #include "tsearch/ts_type.h"
  #include "utils/builtins.h"
  #include "utils/hsearch.h"
  
  
  /* A hash key for lexemes */
--- 13,24 ----
   */
  #include "postgres.h"
  
  #include "catalog/pg_operator.h"
  #include "commands/vacuum.h"
  #include "tsearch/ts_type.h"
  #include "utils/builtins.h"
  #include "utils/hsearch.h"
+ #include "utils/hashfunc.h"
  
  
  /* A hash key for lexemes */
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/date.c pgsql_indexcompat/src/backend/utils/adt/date.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/date.c	2009-05-22 15:56:34.360806325 -0400
--- pgsql_indexcompat/src/backend/utils/adt/date.c	2009-05-22 15:56:34.411013646 -0400
***************
*** 20,32 ****
  #include <float.h>
  #include <time.h>
  
- #include "access/hash.h"
  #include "libpq/pqformat.h"
  #include "miscadmin.h"
  #include "parser/scansup.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/date.h"
  #include "utils/nabstime.h"
  
  /*
--- 20,32 ----
  #include <float.h>
  #include <time.h>
  
  #include "libpq/pqformat.h"
  #include "miscadmin.h"
  #include "parser/scansup.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/date.h"
+ #include "utils/hashfunc.h"
  #include "utils/nabstime.h"
  
  /*
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/enum.c pgsql_indexcompat/src/backend/utils/adt/enum.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/enum.c	2009-05-22 15:56:34.361377758 -0400
--- pgsql_indexcompat/src/backend/utils/adt/enum.c	2009-05-22 15:56:34.411175195 -0400
***************
*** 17,22 ****
--- 17,23 ----
  #include "fmgr.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
+ #include "utils/hashfunc.h"
  #include "utils/lsyscache.h"
  #include "utils/syscache.h"
  #include "libpq/pqformat.h"
***************
*** 433,435 ****
--- 434,442 ----
  		return 1;
  	return 0;
  }
+ 
+ Datum
+ hashenum(PG_FUNCTION_ARGS)
+ {
+ 	return hash_uint32((uint32) PG_GETARG_OID(0));
+ }
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/float.c pgsql_indexcompat/src/backend/utils/adt/float.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/float.c	2009-05-22 15:56:34.367021659 -0400
--- pgsql_indexcompat/src/backend/utils/adt/float.c	2009-05-22 15:56:34.411406466 -0400
***************
*** 23,28 ****
--- 23,29 ----
  #include "libpq/pqformat.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
+ #include "utils/hashfunc.h"
  
  
  #ifndef M_PI
***************
*** 2745,2750 ****
--- 2746,2793 ----
  	PG_RETURN_INT32(result);
  }
  
+ Datum
+ hashfloat4(PG_FUNCTION_ARGS)
+ {
+ 	float4		key = PG_GETARG_FLOAT4(0);
+ 	float8		key8;
+ 
+ 	/*
+ 	 * On IEEE-float machines, minus zero and zero have different bit patterns
+ 	 * but should compare as equal.  We must ensure that they have the same
+ 	 * hash value, which is most reliably done this way:
+ 	 */
+ 	if (key == (float4) 0)
+ 		PG_RETURN_UINT32(0);
+ 
+ 	/*
+ 	 * To support cross-type hashing of float8 and float4, we want to return
+ 	 * the same hash value hashfloat8 would produce for an equal float8 value.
+ 	 * So, widen the value to float8 and hash that.  (We must do this rather
+ 	 * than have hashfloat8 try to narrow its value to float4; that could fail
+ 	 * on overflow.)
+ 	 */
+ 	key8 = key;
+ 
+ 	return hash_any((unsigned char *) &key8, sizeof(key8));
+ }
+ 
+ Datum
+ hashfloat8(PG_FUNCTION_ARGS)
+ {
+ 	float8		key = PG_GETARG_FLOAT8(0);
+ 
+ 	/*
+ 	 * On IEEE-float machines, minus zero and zero have different bit patterns
+ 	 * but should compare as equal.  We must ensure that they have the same
+ 	 * hash value, which is most reliably done this way:
+ 	 */
+ 	if (key == (float8) 0)
+ 		PG_RETURN_UINT32(0);
+ 
+ 	return hash_any((unsigned char *) &key, sizeof(key));
+ }
+ 
  /* ========== PRIVATE ROUTINES ========== */
  
  #ifndef HAVE_CBRT
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/char.c pgsql_indexcompat/src/backend/utils/adt/char.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/char.c	2009-05-22 15:56:34.356602890 -0400
--- pgsql_indexcompat/src/backend/utils/adt/char.c	2009-05-22 15:56:34.410785720 -0400
***************
*** 19,24 ****
--- 19,25 ----
  
  #include "libpq/pqformat.h"
  #include "utils/builtins.h"
+ #include "utils/hashfunc.h"
  
  /*****************************************************************************
   *	 USER I/O ROUTINES														 *
***************
*** 211,213 ****
--- 212,221 ----
  
  	PG_RETURN_TEXT_P(result);
  }
+ 
+ /* Note: this is used for both "char" and boolean datatypes */
+ Datum
+ hashchar(PG_FUNCTION_ARGS)
+ {
+ 	return hash_uint32((int32) PG_GETARG_CHAR(0));
+ }
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/int.c pgsql_indexcompat/src/backend/utils/adt/int.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/int.c	2009-05-22 15:56:34.369838628 -0400
--- pgsql_indexcompat/src/backend/utils/adt/int.c	2009-05-22 15:56:34.411593455 -0400
***************
*** 36,41 ****
--- 36,42 ----
  #include "libpq/pqformat.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
+ #include "utils/hashfunc.h"
  
  
  #define SAMESIGN(a,b)	(((a) < 0) == ((b) < 0))
***************
*** 1353,1355 ****
--- 1354,1376 ----
  		/* do when there is no more left */
  		SRF_RETURN_DONE(funcctx);
  }
+ 
+ Datum
+ hashint2(PG_FUNCTION_ARGS)
+ {
+ 	return hash_uint32((int32) PG_GETARG_INT16(0));
+ }
+ 
+ Datum
+ hashint4(PG_FUNCTION_ARGS)
+ {
+ 	return hash_uint32(PG_GETARG_INT32(0));
+ }
+ 
+ Datum
+ hashint2vector(PG_FUNCTION_ARGS)
+ {
+ 	int2vector *key = (int2vector *) PG_GETARG_POINTER(0);
+ 
+ 	return hash_any((unsigned char *) key->values, key->dim1 * sizeof(int2));
+ }
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/int8.c pgsql_indexcompat/src/backend/utils/adt/int8.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/int8.c	2009-05-22 15:56:34.372307503 -0400
--- pgsql_indexcompat/src/backend/utils/adt/int8.c	2009-05-22 15:56:34.411779391 -0400
***************
*** 21,26 ****
--- 21,27 ----
  #include "libpq/pqformat.h"
  #include "nodes/nodes.h"
  #include "utils/int8.h"
+ #include "utils/hashfunc.h"
  
  
  #define MAXINT8LEN		25
***************
*** 1401,1403 ****
--- 1402,1429 ----
  		/* do when there is no more left */
  		SRF_RETURN_DONE(funcctx);
  }
+ 
+ Datum
+ hashint8(PG_FUNCTION_ARGS)
+ {
+ 	/*
+ 	 * The idea here is to produce a hash value compatible with the values
+ 	 * produced by hashint4 and hashint2 for logically equal inputs; this is
+ 	 * necessary to support cross-type hash joins across these input types.
+ 	 * Since all three types are signed, we can xor the high half of the int8
+ 	 * value if the sign is positive, or the complement of the high half when
+ 	 * the sign is negative.
+ 	 */
+ #ifndef INT64_IS_BUSTED
+ 	int64		val = PG_GETARG_INT64(0);
+ 	uint32		lohalf = (uint32) val;
+ 	uint32		hihalf = (uint32) (val >> 32);
+ 
+ 	lohalf ^= (val >= 0) ? hihalf : ~hihalf;
+ 
+ 	return hash_uint32(lohalf);
+ #else
+ 	/* here if we can't count on "x >> 32" to work sanely */
+ 	return hash_uint32((int32) PG_GETARG_INT64(0));
+ #endif
+ }
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/mac.c pgsql_indexcompat/src/backend/utils/adt/mac.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/mac.c	2009-05-22 15:56:34.372677805 -0400
--- pgsql_indexcompat/src/backend/utils/adt/mac.c	2009-05-22 15:56:34.411931673 -0400
***************
*** 6,14 ****
  
  #include "postgres.h"
  
- #include "access/hash.h"
  #include "libpq/pqformat.h"
  #include "utils/builtins.h"
  #include "utils/inet.h"
  
  
--- 6,14 ----
  
  #include "postgres.h"
  
  #include "libpq/pqformat.h"
  #include "utils/builtins.h"
+ #include "utils/hashfunc.h"
  #include "utils/inet.h"
  
  
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/name.c pgsql_indexcompat/src/backend/utils/adt/name.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/name.c	2009-05-22 15:56:34.373430941 -0400
--- pgsql_indexcompat/src/backend/utils/adt/name.c	2009-05-22 15:56:34.412080342 -0400
***************
*** 27,32 ****
--- 27,33 ----
  #include "miscadmin.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
+ #include "utils/hashfunc.h"
  #include "utils/lsyscache.h"
  
  
***************
*** 319,321 ****
--- 320,333 ----
  
  	PG_RETURN_POINTER(array);
  }
+ 
+ Datum
+ hashname(PG_FUNCTION_ARGS)
+ {
+ 	char	   *key = NameStr(*PG_GETARG_NAME(0));
+ 	int			keylen = strlen(key);
+ 
+ 	Assert(keylen < NAMEDATALEN);		/* else it's not truncated correctly */
+ 
+ 	return hash_any((unsigned char *) key, keylen);
+ }
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/network.c pgsql_indexcompat/src/backend/utils/adt/network.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/network.c	2009-05-22 15:56:34.376147427 -0400
--- pgsql_indexcompat/src/backend/utils/adt/network.c	2009-05-22 15:56:34.412266819 -0400
***************
*** 12,24 ****
  #include <netinet/in.h>
  #include <arpa/inet.h>
  
- #include "access/hash.h"
  #include "catalog/pg_type.h"
  #include "libpq/ip.h"
  #include "libpq/libpq-be.h"
  #include "libpq/pqformat.h"
  #include "miscadmin.h"
  #include "utils/builtins.h"
  #include "utils/inet.h"
  
  
--- 12,24 ----
  #include <netinet/in.h>
  #include <arpa/inet.h>
  
  #include "catalog/pg_type.h"
  #include "libpq/ip.h"
  #include "libpq/libpq-be.h"
  #include "libpq/pqformat.h"
  #include "miscadmin.h"
  #include "utils/builtins.h"
+ #include "utils/hashfunc.h"
  #include "utils/inet.h"
  
  
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/numeric.c pgsql_indexcompat/src/backend/utils/adt/numeric.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/numeric.c	2009-05-22 15:56:34.382545862 -0400
--- pgsql_indexcompat/src/backend/utils/adt/numeric.c	2009-05-22 15:56:34.412630511 -0400
***************
*** 26,37 ****
  #include <limits.h>
  #include <math.h>
  
- #include "access/hash.h"
  #include "catalog/pg_type.h"
  #include "libpq/pqformat.h"
  #include "miscadmin.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/int8.h"
  #include "utils/numeric.h"
  
--- 26,37 ----
  #include <limits.h>
  #include <math.h>
  
  #include "catalog/pg_type.h"
  #include "libpq/pqformat.h"
  #include "miscadmin.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
+ #include "utils/hashfunc.h"
  #include "utils/int8.h"
  #include "utils/numeric.h"
  
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/oid.c pgsql_indexcompat/src/backend/utils/adt/oid.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/oid.c	2009-05-22 15:56:34.384087154 -0400
--- pgsql_indexcompat/src/backend/utils/adt/oid.c	2009-05-22 15:56:34.412785642 -0400
***************
*** 21,26 ****
--- 21,27 ----
  #include "libpq/pqformat.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
+ #include "utils/hashfunc.h"
  
  
  #define OidVectorSize(n)	(offsetof(oidvector, values) + (n) * sizeof(Oid))
***************
*** 419,421 ****
--- 420,436 ----
  
  	PG_RETURN_BOOL(cmp > 0);
  }
+ 
+ Datum
+ hashoid(PG_FUNCTION_ARGS)
+ {
+ 	return hash_uint32((uint32) PG_GETARG_OID(0));
+ }
+ 
+ Datum
+ hashoidvector(PG_FUNCTION_ARGS)
+ {
+ 	oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
+ 
+ 	return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
+ }
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/timestamp.c pgsql_indexcompat/src/backend/utils/adt/timestamp.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/timestamp.c	2009-05-22 15:56:34.391980267 -0400
--- pgsql_indexcompat/src/backend/utils/adt/timestamp.c	2009-05-22 15:56:34.413104630 -0400
***************
*** 21,27 ****
  #include <limits.h>
  #include <sys/time.h>
  
- #include "access/hash.h"
  #include "access/xact.h"
  #include "catalog/pg_type.h"
  #include "funcapi.h"
--- 21,26 ----
***************
*** 31,36 ****
--- 30,36 ----
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "utils/datetime.h"
+ #include "utils/hashfunc.h"
  
  /*
   * gcc's -ffast-math switch breaks routines that expect exact results from
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/uuid.c pgsql_indexcompat/src/backend/utils/adt/uuid.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/uuid.c	2009-05-22 15:56:34.392585968 -0400
--- pgsql_indexcompat/src/backend/utils/adt/uuid.c	2009-05-22 15:56:34.413254701 -0400
***************
*** 13,21 ****
  
  #include "postgres.h"
  
- #include "access/hash.h"
  #include "libpq/pqformat.h"
  #include "utils/builtins.h"
  #include "utils/uuid.h"
  
  /* uuid size in bytes */
--- 13,21 ----
  
  #include "postgres.h"
  
  #include "libpq/pqformat.h"
  #include "utils/builtins.h"
+ #include "utils/hashfunc.h"
  #include "utils/uuid.h"
  
  /* uuid size in bytes */
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/varchar.c pgsql_indexcompat/src/backend/utils/adt/varchar.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/varchar.c	2009-05-22 15:56:34.394025160 -0400
--- pgsql_indexcompat/src/backend/utils/adt/varchar.c	2009-05-22 15:56:34.413430641 -0400
***************
*** 15,25 ****
  #include "postgres.h"
  
  
- #include "access/hash.h"
  #include "access/tuptoaster.h"
  #include "libpq/pqformat.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "mb/pg_wchar.h"
  
  
--- 15,25 ----
  #include "postgres.h"
  
  
  #include "access/tuptoaster.h"
  #include "libpq/pqformat.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
+ #include "utils/hashfunc.h"
  #include "mb/pg_wchar.h"
  
  
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/varlena.c pgsql_indexcompat/src/backend/utils/adt/varlena.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/adt/varlena.c	2009-05-22 15:56:34.400341684 -0400
--- pgsql_indexcompat/src/backend/utils/adt/varlena.c	2009-05-22 15:56:34.413685740 -0400
***************
*** 24,29 ****
--- 24,30 ----
  #include "parser/scansup.h"
  #include "regex/regex.h"
  #include "utils/builtins.h"
+ #include "utils/hashfunc.h"
  #include "utils/lsyscache.h"
  #include "utils/pg_locale.h"
  
***************
*** 3102,3104 ****
--- 3103,3144 ----
  
  	PG_RETURN_INT32(result);
  }
+ 
+ Datum
+ hashtext(PG_FUNCTION_ARGS)
+ {
+ 	text	   *key = PG_GETARG_TEXT_PP(0);
+ 	Datum		result;
+ 
+ 	/*
+ 	 * Note: this is currently identical in behavior to hashvarlena, but keep
+ 	 * it as a separate function in case we someday want to do something
+ 	 * different in non-C locales.	(See also hashbpchar, if so.)
+ 	 */
+ 	result = hash_any((unsigned char *) VARDATA_ANY(key),
+ 					  VARSIZE_ANY_EXHDR(key));
+ 
+ 	/* Avoid leaking memory for toasted inputs */
+ 	PG_FREE_IF_COPY(key, 0);
+ 
+ 	return result;
+ }
+ 
+ /*
+  * hashvarlena() can be used for any varlena datatype in which there are
+  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
+  */
+ Datum
+ hashvarlena(PG_FUNCTION_ARGS)
+ {
+ 	struct varlena *key = PG_GETARG_VARLENA_PP(0);
+ 	Datum		result;
+ 
+ 	result = hash_any((unsigned char *) VARDATA_ANY(key),
+ 					  VARSIZE_ANY_EXHDR(key));
+ 
+ 	/* Avoid leaking memory for toasted inputs */
+ 	PG_FREE_IF_COPY(key, 0);
+ 
+ 	return result;
+ }
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/hash/hashfn.c pgsql_indexcompat/src/backend/utils/hash/hashfn.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/hash/hashfn.c	2009-05-22 15:56:34.401341014 -0400
--- pgsql_indexcompat/src/backend/utils/hash/hashfn.c	2009-05-22 15:56:34.414083214 -0400
***************
*** 21,28 ****
   */
  #include "postgres.h"
  
- #include "access/hash.h"
  #include "nodes/bitmapset.h"
  
  
  /*
--- 21,28 ----
   */
  #include "postgres.h"
  
  #include "nodes/bitmapset.h"
+ #include "utils/hashfunc.h"
  
  
  /*
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/hash/hashfunc.c pgsql_indexcompat/src/backend/utils/hash/hashfunc.c
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/hash/hashfunc.c	1969-12-31 19:00:00.000000000 -0500
--- pgsql_indexcompat/src/backend/utils/hash/hashfunc.c	2009-05-22 15:56:34.414915382 -0400
***************
*** 0 ****
--- 1,357 ----
+ /*-------------------------------------------------------------------------
+  *
+  * hashfunc.c
+  *	  Support functions for hash access method.
+  *
+  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  *
+  * IDENTIFICATION
+  *	  $PostgreSQL: pgsql/src/backend/utils/hash/hashfunc.c,v 1.57 2009/01/01 17:23:35 momjian Exp $
+  *
+  * NOTES
+  *	  It is expected that every bit of a hash function's 32-bit result is
+  *	  as random as every other; failure to ensure this is likely to lead
+  *	  to poor performance of hash joins, for example.  In most cases a hash
+  *	  function should use hash_any() or its variant hash_uint32().
+  *-------------------------------------------------------------------------
+  */
+ 
+ #include "postgres.h"
+ 
+ #include "utils/hashfunc.h"
+ 
+ /*
+  * This hash function was written by Bob Jenkins
+  * ([email protected]), and superficially adapted
+  * for PostgreSQL by Neil Conway. For more information on this
+  * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
+  * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
+  *
+  * In the current code, we have adopted Bob's 2006 update of his hash
+  * function to fetch the data a word at a time when it is suitably aligned.
+  * This makes for a useful speedup, at the cost of having to maintain
+  * four code paths (aligned vs unaligned, and little-endian vs big-endian).
+  * It also uses two separate mixing functions mix() and final(), instead
+  * of a slower multi-purpose function.
+  */
+ 
+ /* Get a bit mask of the bits set in non-uint32 aligned addresses */
+ #define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
+ 
+ /* Rotate a uint32 value left by k bits - note multiple evaluation! */
+ #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+ 
+ /*----------
+  * mix -- mix 3 32-bit values reversibly.
+  *
+  * This is reversible, so any information in (a,b,c) before mix() is
+  * still in (a,b,c) after mix().
+  *
+  * If four pairs of (a,b,c) inputs are run through mix(), or through
+  * mix() in reverse, there are at least 32 bits of the output that
+  * are sometimes the same for one pair and different for another pair.
+  * This was tested for:
+  * * pairs that differed by one bit, by two bits, in any combination
+  *   of top bits of (a,b,c), or in any combination of bottom bits of
+  *   (a,b,c).
+  * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
+  *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+  *   is commonly produced by subtraction) look like a single 1-bit
+  *   difference.
+  * * the base values were pseudorandom, all zero but one bit set, or
+  *   all zero plus a counter that starts at zero.
+  * 
+  * This does not achieve avalanche.  There are input bits of (a,b,c)
+  * that fail to affect some output bits of (a,b,c), especially of a.  The
+  * most thoroughly mixed value is c, but it doesn't really even achieve
+  * avalanche in c. 
+  * 
+  * This allows some parallelism.  Read-after-writes are good at doubling
+  * the number of bits affected, so the goal of mixing pulls in the opposite
+  * direction from the goal of parallelism.  I did what I could.  Rotates
+  * seem to cost as much as shifts on every machine I could lay my hands on,
+  * and rotates are much kinder to the top and bottom bits, so I used rotates.
+  *----------
+  */
+ #define mix(a,b,c) \
+ { \
+   a -= c;  a ^= rot(c, 4);  c += b; \
+   b -= a;  b ^= rot(a, 6);  a += c; \
+   c -= b;  c ^= rot(b, 8);  b += a; \
+   a -= c;  a ^= rot(c,16);  c += b; \
+   b -= a;  b ^= rot(a,19);  a += c; \
+   c -= b;  c ^= rot(b, 4);  b += a; \
+ }
+ 
+ /*----------
+  * final -- final mixing of 3 32-bit values (a,b,c) into c
+  *
+  * Pairs of (a,b,c) values differing in only a few bits will usually
+  * produce values of c that look totally different.  This was tested for
+  * * pairs that differed by one bit, by two bits, in any combination
+  *   of top bits of (a,b,c), or in any combination of bottom bits of
+  *   (a,b,c).
+  * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
+  *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+  *   is commonly produced by subtraction) look like a single 1-bit
+  *   difference.
+  * * the base values were pseudorandom, all zero but one bit set, or
+  *   all zero plus a counter that starts at zero.
+  *     
+  * The use of separate functions for mix() and final() allow for a
+  * substantial performance increase since final() does not need to
+  * do well in reverse, but is does need to affect all output bits.
+  * mix(), on the other hand, does not need to affect all output
+  * bits (affecting 32 bits is enough).  The original hash function had
+  * a single mixing operation that had to satisfy both sets of requirements
+  * and was slower as a result.
+  *----------
+  */
+ #define final(a,b,c) \
+ { \
+   c ^= b; c -= rot(b,14); \
+   a ^= c; a -= rot(c,11); \
+   b ^= a; b -= rot(a,25); \
+   c ^= b; c -= rot(b,16); \
+   a ^= c; a -= rot(c, 4); \
+   b ^= a; b -= rot(a,14); \
+   c ^= b; c -= rot(b,24); \
+ }
+ 
+ /*
+  * hash_any() -- hash a variable-length key into a 32-bit value
+  *		k		: the key (the unaligned variable-length array of bytes)
+  *		len		: the length of the key, counting by bytes
+  *
+  * Returns a uint32 value.	Every bit of the key affects every bit of
+  * the return value.  Every 1-bit and 2-bit delta achieves avalanche.
+  * About 6*len+35 instructions. The best hash table sizes are powers
+  * of 2.  There is no need to do mod a prime (mod is sooo slow!).
+  * If you need less than 32 bits, use a bitmask.
+  *
+  * Note: we could easily change this function to return a 64-bit hash value
+  * by using the final values of both b and c.  b is perhaps a little less
+  * well mixed than c, however.
+  */
+ Datum
+ hash_any(register const unsigned char *k, register int keylen)
+ {
+ 	register uint32 a,
+ 				b,
+ 				c,
+ 				len;
+ 
+ 	/* Set up the internal state */
+ 	len = keylen;
+ 	a = b = c = 0x9e3779b9 + len + 3923095;
+ 
+ 	/* If the source pointer is word-aligned, we use word-wide fetches */
+ 	if (((long) k & UINT32_ALIGN_MASK) == 0)
+ 	{
+ 		/* Code path for aligned source data */
+ 		register const uint32 *ka = (const uint32 *) k;
+ 
+ 		/* handle most of the key */
+ 		while (len >= 12)
+ 		{
+ 			a += ka[0];
+ 			b += ka[1];
+ 			c += ka[2];
+ 			mix(a, b, c);
+ 			ka += 3;
+ 			len -= 12;
+ 		}
+ 
+ 		/* handle the last 11 bytes */
+ 		k = (const unsigned char *) ka;
+ #ifdef WORDS_BIGENDIAN
+ 		switch (len)
+ 		{
+ 			case 11:
+ 				c += ((uint32) k[10] << 8);
+ 				/* fall through */
+ 			case 10:
+ 				c += ((uint32) k[9] << 16);
+ 				/* fall through */
+ 			case 9:
+ 				c += ((uint32) k[8] << 24);
+ 				/* the lowest byte of c is reserved for the length */
+ 				/* fall through */
+ 			case 8:
+ 				b += ka[1];
+ 				a += ka[0];
+ 				break;
+ 			case 7:
+ 				b += ((uint32) k[6] << 8);
+ 				/* fall through */
+ 			case 6:
+ 				b += ((uint32) k[5] << 16);
+ 				/* fall through */
+ 			case 5:
+ 				b += ((uint32) k[4] << 24);
+ 				/* fall through */
+ 			case 4:
+ 				a += ka[0];
+ 				break;
+ 			case 3:
+ 				a += ((uint32) k[2] << 8);
+ 				/* fall through */
+ 			case 2:
+ 				a += ((uint32) k[1] << 16);
+ 				/* fall through */
+ 			case 1:
+ 				a += ((uint32) k[0] << 24);
+ 			/* case 0: nothing left to add */
+ 		}
+ #else /* !WORDS_BIGENDIAN */
+ 		switch (len)
+ 		{
+ 			case 11:
+ 				c += ((uint32) k[10] << 24);
+ 				/* fall through */
+ 			case 10:
+ 				c += ((uint32) k[9] << 16);
+ 				/* fall through */
+ 			case 9:
+ 				c += ((uint32) k[8] << 8);
+ 				/* the lowest byte of c is reserved for the length */
+ 				/* fall through */
+ 			case 8:
+ 				b += ka[1];
+ 				a += ka[0];
+ 				break;
+ 			case 7:
+ 				b += ((uint32) k[6] << 16);
+ 				/* fall through */
+ 			case 6:
+ 				b += ((uint32) k[5] << 8);
+ 				/* fall through */
+ 			case 5:
+ 				b += k[4];
+ 				/* fall through */
+ 			case 4:
+ 				a += ka[0];
+ 				break;
+ 			case 3:
+ 				a += ((uint32) k[2] << 16);
+ 				/* fall through */
+ 			case 2:
+ 				a += ((uint32) k[1] << 8);
+ 				/* fall through */
+ 			case 1:
+ 				a += k[0];
+ 			/* case 0: nothing left to add */
+ 		}
+ #endif /* WORDS_BIGENDIAN */
+ 	}
+ 	else
+ 	{
+ 		/* Code path for non-aligned source data */
+ 
+ 		/* handle most of the key */
+ 		while (len >= 12)
+ 		{
+ #ifdef WORDS_BIGENDIAN
+ 			a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
+ 			b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
+ 			c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
+ #else /* !WORDS_BIGENDIAN */
+ 			a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
+ 			b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
+ 			c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
+ #endif /* WORDS_BIGENDIAN */
+ 			mix(a, b, c);
+ 			k += 12;
+ 			len -= 12;
+ 		}
+ 
+ 		/* handle the last 11 bytes */
+ #ifdef WORDS_BIGENDIAN
+ 		switch (len)			/* all the case statements fall through */
+ 		{
+ 			case 11:
+ 				c += ((uint32) k[10] << 8);
+ 			case 10:
+ 				c += ((uint32) k[9] << 16);
+ 			case 9:
+ 				c += ((uint32) k[8] << 24);
+ 				/* the lowest byte of c is reserved for the length */
+ 			case 8:
+ 				b += k[7];
+ 			case 7:
+ 				b += ((uint32) k[6] << 8);
+ 			case 6:
+ 				b += ((uint32) k[5] << 16);
+ 			case 5:
+ 				b += ((uint32) k[4] << 24);
+ 			case 4:
+ 				a += k[3];
+ 			case 3:
+ 				a += ((uint32) k[2] << 8);
+ 			case 2:
+ 				a += ((uint32) k[1] << 16);
+ 			case 1:
+ 				a += ((uint32) k[0] << 24);
+ 			/* case 0: nothing left to add */
+ 		}
+ #else /* !WORDS_BIGENDIAN */
+ 		switch (len)			/* all the case statements fall through */
+ 		{
+ 			case 11:
+ 				c += ((uint32) k[10] << 24);
+ 			case 10:
+ 				c += ((uint32) k[9] << 16);
+ 			case 9:
+ 				c += ((uint32) k[8] << 8);
+ 				/* the lowest byte of c is reserved for the length */
+ 			case 8:
+ 				b += ((uint32) k[7] << 24);
+ 			case 7:
+ 				b += ((uint32) k[6] << 16);
+ 			case 6:
+ 				b += ((uint32) k[5] << 8);
+ 			case 5:
+ 				b += k[4];
+ 			case 4:
+ 				a += ((uint32) k[3] << 24);
+ 			case 3:
+ 				a += ((uint32) k[2] << 16);
+ 			case 2:
+ 				a += ((uint32) k[1] << 8);
+ 			case 1:
+ 				a += k[0];
+ 			/* case 0: nothing left to add */
+ 		}
+ #endif /* WORDS_BIGENDIAN */
+ 	}
+ 
+ 	final(a, b, c);
+ 
+ 	/* report the result */
+ 	return UInt32GetDatum(c);
+ }
+ 
+ /*
+  * hash_uint32() -- hash a 32-bit value
+  *
+  * This has the same result as
+  *		hash_any(&k, sizeof(uint32))
+  * but is faster and doesn't force the caller to store k into memory.
+  */
+ Datum
+ hash_uint32(uint32 k)
+ {
+ 	register uint32 a,
+ 				b,
+ 				c;
+ 
+ 	a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
+ 	a += k;
+ 
+ 	final(a, b, c);
+ 
+ 	/* report the result */
+ 	return UInt32GetDatum(c);
+ }
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/backend/utils/hash/Makefile pgsql_indexcompat/src/backend/utils/hash/Makefile
*** pgsql_indexcompat.5d4d60e3a557/src/backend/utils/hash/Makefile	2009-05-22 15:56:34.400748291 -0400
--- pgsql_indexcompat/src/backend/utils/hash/Makefile	2009-05-22 15:56:34.413939001 -0400
***************
*** 12,17 ****
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = dynahash.o hashfn.o pg_crc.o
  
  include $(top_srcdir)/src/backend/common.mk
--- 12,17 ----
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = hashfunc.o dynahash.o hashfn.o pg_crc.o
  
  include $(top_srcdir)/src/backend/common.mk
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/include/access/hash.h pgsql_indexcompat/src/include/access/hash.h
*** pgsql_indexcompat.5d4d60e3a557/src/include/access/hash.h	2009-05-22 15:56:34.403100419 -0400
--- pgsql_indexcompat/src/include/access/hash.h	2009-05-22 15:56:34.414443320 -0400
***************
*** 251,281 ****
  extern Datum hashvacuumcleanup(PG_FUNCTION_ARGS);
  extern Datum hashoptions(PG_FUNCTION_ARGS);
  
- /*
-  * Datatype-specific hash functions in hashfunc.c.
-  *
-  * These support both hash indexes and hash joins.
-  *
-  * NOTE: some of these are also used by catcache operations, without
-  * any direct connection to hash indexes.  Also, the common hash_any
-  * routine is also used by dynahash tables.
-  */
- extern Datum hashchar(PG_FUNCTION_ARGS);
- extern Datum hashint2(PG_FUNCTION_ARGS);
- extern Datum hashint4(PG_FUNCTION_ARGS);
- extern Datum hashint8(PG_FUNCTION_ARGS);
- extern Datum hashoid(PG_FUNCTION_ARGS);
- extern Datum hashenum(PG_FUNCTION_ARGS);
- extern Datum hashfloat4(PG_FUNCTION_ARGS);
- extern Datum hashfloat8(PG_FUNCTION_ARGS);
- extern Datum hashoidvector(PG_FUNCTION_ARGS);
- extern Datum hashint2vector(PG_FUNCTION_ARGS);
- extern Datum hashname(PG_FUNCTION_ARGS);
- extern Datum hashtext(PG_FUNCTION_ARGS);
- extern Datum hashvarlena(PG_FUNCTION_ARGS);
- extern Datum hash_any(register const unsigned char *k, register int keylen);
- extern Datum hash_uint32(uint32 k);
- 
  /* private routines */
  
  /* hashinsert.c */
--- 251,256 ----
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/include/utils/builtins.h pgsql_indexcompat/src/include/utils/builtins.h
*** pgsql_indexcompat.5d4d60e3a557/src/include/utils/builtins.h	2009-05-22 15:56:34.408004271 -0400
--- pgsql_indexcompat/src/include/utils/builtins.h	2009-05-22 15:56:34.414756725 -0400
***************
*** 127,132 ****
--- 127,133 ----
  extern Datum i4tochar(PG_FUNCTION_ARGS);
  extern Datum text_char(PG_FUNCTION_ARGS);
  extern Datum char_text(PG_FUNCTION_ARGS);
+ extern Datum hashchar(PG_FUNCTION_ARGS);
  
  /* domains.c */
  extern Datum domain_in(PG_FUNCTION_ARGS);
***************
*** 150,155 ****
--- 151,157 ----
  extern Datum enum_last(PG_FUNCTION_ARGS);
  extern Datum enum_range_bounds(PG_FUNCTION_ARGS);
  extern Datum enum_range_all(PG_FUNCTION_ARGS);
+ extern Datum hashenum(PG_FUNCTION_ARGS);
  
  /* int.c */
  extern Datum int2in(PG_FUNCTION_ARGS);
***************
*** 238,243 ****
--- 240,249 ----
  extern Datum generate_series_int4(PG_FUNCTION_ARGS);
  extern Datum generate_series_step_int4(PG_FUNCTION_ARGS);
  extern int2vector *buildint2vector(const int2 *int2s, int n);
+ extern Datum hashint2(PG_FUNCTION_ARGS);
+ extern Datum hashint4(PG_FUNCTION_ARGS);
+ extern Datum hashint8(PG_FUNCTION_ARGS);
+ extern Datum hashint2vector(PG_FUNCTION_ARGS);
  
  /* name.c */
  extern Datum namein(PG_FUNCTION_ARGS);
***************
*** 257,262 ****
--- 263,269 ----
  extern Datum session_user(PG_FUNCTION_ARGS);
  extern Datum current_schema(PG_FUNCTION_ARGS);
  extern Datum current_schemas(PG_FUNCTION_ARGS);
+ extern Datum hashname(PG_FUNCTION_ARGS);
  
  /* numutils.c */
  extern int32 pg_atoi(char *s, int size, int c);
***************
*** 411,416 ****
--- 418,425 ----
  extern Datum float84gt(PG_FUNCTION_ARGS);
  extern Datum float84ge(PG_FUNCTION_ARGS);
  extern Datum width_bucket_float8(PG_FUNCTION_ARGS);
+ extern Datum hashfloat4(PG_FUNCTION_ARGS);
+ extern Datum hashfloat8(PG_FUNCTION_ARGS);
  
  /* dbsize.c */
  extern Datum pg_tablespace_size_oid(PG_FUNCTION_ARGS);
***************
*** 461,466 ****
--- 470,477 ----
  extern Datum oidvectorle(PG_FUNCTION_ARGS);
  extern Datum oidvectorge(PG_FUNCTION_ARGS);
  extern Datum oidvectorgt(PG_FUNCTION_ARGS);
+ extern Datum hashoid(PG_FUNCTION_ARGS);
+ extern Datum hashoidvector(PG_FUNCTION_ARGS);
  extern oidvector *buildoidvector(const Oid *oids, int n);
  
  /* pseudotypes.c */
***************
*** 698,703 ****
--- 709,716 ----
  extern Datum to_hex64(PG_FUNCTION_ARGS);
  extern Datum md5_text(PG_FUNCTION_ARGS);
  extern Datum md5_bytea(PG_FUNCTION_ARGS);
+ extern Datum hashtext(PG_FUNCTION_ARGS);
+ extern Datum hashvarlena(PG_FUNCTION_ARGS);
  
  extern Datum unknownin(PG_FUNCTION_ARGS);
  extern Datum unknownout(PG_FUNCTION_ARGS);
diff -Nrc pgsql_indexcompat.5d4d60e3a557/src/include/utils/hashfunc.h pgsql_indexcompat/src/include/utils/hashfunc.h
*** pgsql_indexcompat.5d4d60e3a557/src/include/utils/hashfunc.h	1969-12-31 19:00:00.000000000 -0500
--- pgsql_indexcompat/src/include/utils/hashfunc.h	2009-05-22 15:56:34.415054363 -0400
***************
*** 0 ****
--- 1,20 ----
+ /*-------------------------------------------------------------------------
+  *
+  * hashfunc.h
+  *	  header file for hash functions
+  *
+  *
+  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.91 2008/10/17 23:50:57 tgl Exp $
+  *
+  *-------------------------------------------------------------------------
+  */
+ #ifndef HASHFUNC_H
+ #define HASHFUNC_H
+ 
+ extern Datum hash_any(register const unsigned char *k, register int keylen);
+ extern Datum hash_uint32(uint32 k);
+ 
+ #endif   /* HASHFUNC_H */

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] [PATCH] cleanup hashindex for pg_migrator hashindex compat mode (for 8.4)

Reply via email to