> We would really appreciate help on finalizing this patch, especially in
> regard to style issues. Thank you for all the help.
Here is a cleaned-up version. I fixed a number of whitespace issues,
improved a few comments, and rearranged one set of nested if-else
statements (hopefully without breaking anything in the process).
Josh / eggyknap -
Can you rerun your performance tests with this version of the patch?
...Robert
*** a/src/backend/executor/nodeHash.c
--- b/src/backend/executor/nodeHash.c
***************
*** 53,58 **** ExecHash(HashState *node)
--- 53,222 ----
return NULL;
}
+ /*
+ * ----------------------------------------------------------------
+ * ExecHashGetIMBucket
+ *
+ * Returns the index of the in-memory bucket for this
+ * hashvalue, or IM_INVALID_BUCKET if the hashvalue is not
+ * associated with any unfrozen bucket (or if skew
+ * optimization is not being used).
+ *
+ * It is possible for a tuple whose join attribute value is
+ * not a MCV to hash to an in-memory bucket due to the limited
+ * number of hash values but it is unlikely and everything
+ * continues to work even if it does happen. We would
+ * accidentally cache some less optimal tuples in memory
+ * but the join result would still be accurate.
+ *
+ * hashtable->imBucket is an open addressing hashtable of
+ * in-memory buckets (HashJoinIMBucket).
+ * ----------------------------------------------------------------
+ */
+ int
+ ExecHashGetIMBucket(HashJoinTable hashtable, uint32 hashvalue)
+ {
+ int bucket;
+
+ if (!hashtable->enableSkewOptimization)
+ return IM_INVALID_BUCKET;
+
+ /* Modulo the hashvalue (using bitmask) to find the IM bucket. */
+ bucket = hashvalue & (hashtable->nIMBuckets - 1);
+
+ /*
+ * While we have not hit a hole in the hashtable and have not hit the
+ * actual bucket we have collided in the hashtable so try the next
+ * bucket location.
+ */
+ while (hashtable->imBucket[bucket] != NULL
+ && hashtable->imBucket[bucket]->hashvalue != hashvalue)
+ bucket = (bucket + 1) & (hashtable->nIMBuckets - 1);
+
+ /*
+ * If the bucket exists and has been correctly determined return
+ * the bucket index.
+ */
+ if (hashtable->imBucket[bucket] != NULL
+ && hashtable->imBucket[bucket]->hashvalue == hashvalue)
+ return bucket;
+
+ /*
+ * Must have run into an empty location or a frozen bucket which means the
+ * tuple with this hashvalue is not to be handled as if it matches with an
+ * in-memory bucket.
+ */
+ return IM_INVALID_BUCKET;
+ }
+
+ /*
+ * ----------------------------------------------------------------
+ * ExecHashFreezeNextIMBucket
+ *
+ * Freeze the tuples of the next in-memory bucket by pushing
+ * them into the main hashtable. Buckets are frozen in order
+ * so that the best tuples are kept in memory the longest.
+ * ----------------------------------------------------------------
+ */
+ static bool
+ ExecHashFreezeNextIMBucket(HashJoinTable hashtable)
+ {
+ int bucketToFreeze;
+ int bucketno;
+ int batchno;
+ uint32 hashvalue;
+ HashJoinTuple hashTuple;
+ HashJoinTuple nextHashTuple;
+ HashJoinIMBucket *bucket;
+ MinimalTuple mintuple;
+
+ /* Calculate the imBucket index of the bucket to freeze. */
+ bucketToFreeze = hashtable->imBucketFreezeOrder
+ [hashtable->nUsedIMBuckets - 1 - hashtable->nIMBucketsFrozen];
+
+ /* Grab a pointer to the actual IM bucket. */
+ bucket = hashtable->imBucket[bucketToFreeze];
+ hashvalue = bucket->hashvalue;
+
+ /*
+ * Grab a pointer to the first tuple in the soon to be frozen IM bucket.
+ */
+ hashTuple = bucket->tuples;
+
+ /*
+ * Calculate which bucket and batch the tuples belong to in the main
+ * non-IM hashtable.
+ */
+ ExecHashGetBucketAndBatch(hashtable, hashvalue, &bucketno, &batchno);
+
+ /* until we have read all tuples from this bucket */
+ while (hashTuple != NULL)
+ {
+ /*
+ * Some of this code is very similar to that of ExecHashTableInsert.
+ * We do not call ExecHashTableInsert directly as
+ * ExecHashTableInsert expects a TupleTableSlot and we already have
+ * HashJoinTuples.
+ */
+ mintuple = HJTUPLE_MINTUPLE(hashTuple);
+
+ /* Decide whether to put the tuple in the hash table or a temp file. */
+ if (batchno == hashtable->curbatch)
+ {
+ /* Put the tuple in hash table. */
+ nextHashTuple = hashTuple->next;
+ hashTuple->next = hashtable->buckets[bucketno];
+ hashtable->buckets[bucketno] = hashTuple;
+ hashTuple = nextHashTuple;
+ hashtable->spaceUsedIM -= HJTUPLE_OVERHEAD + mintuple->t_len;
+ }
+ else
+ {
+ /* Put the tuples into a temp file for later batches. */
+ Assert(batchno > hashtable->curbatch);
+ ExecHashJoinSaveTuple(mintuple, hashvalue,
+ &hashtable->innerBatchFile[batchno]);
+ /*
+ * Some memory has been freed up. This must be done before we
+ * pfree the hashTuple of we lose access to the tuple size.
+ */
+ hashtable->spaceUsed -= HJTUPLE_OVERHEAD + mintuple->t_len;
+ hashtable->spaceUsedIM -= HJTUPLE_OVERHEAD + mintuple->t_len;
+ nextHashTuple = hashTuple->next;
+ pfree(hashTuple);
+ hashTuple = nextHashTuple;
+ }
+ }
+
+ /*
+ * Free the memory the bucket struct was using as it is not necessary
+ * any more. All code treats a frozen in-memory bucket the same as one
+ * that did not exist; by setting the pointer to null the rest of the code
+ * will function as if we had not created this bucket at all.
+ */
+ pfree(bucket);
+ hashtable->imBucket[bucketToFreeze] = NULL;
+ hashtable->spaceUsed -= IM_BUCKET_OVERHEAD;
+ hashtable->spaceUsedIM -= IM_BUCKET_OVERHEAD;
+ hashtable->nIMBucketsFrozen++;
+
+ /*
+ * All buckets have been frozen and deleted from memory so turn off
+ * skew aware partitioning and remove the structs from memory as they are
+ * just wasting space from now on.
+ */
+ if (hashtable->nUsedIMBuckets == hashtable->nIMBucketsFrozen)
+ {
+ hashtable->enableSkewOptimization = false;
+ pfree(hashtable->imBucket);
+ pfree(hashtable->imBucketFreezeOrder);
+ hashtable->spaceUsed -= hashtable->spaceUsedIM;
+ hashtable->spaceUsedIM = 0;
+ }
+
+ return true;
+ }
+
/* ----------------------------------------------------------------
* MultiExecHash
*
***************
*** 69,74 **** MultiExecHash(HashState *node)
--- 233,240 ----
TupleTableSlot *slot;
ExprContext *econtext;
uint32 hashvalue;
+ MinimalTuple mintuple;
+ int bucketNumber;
/* must provide our own instrumentation support */
if (node->ps.instrument)
***************
*** 99,106 **** MultiExecHash(HashState *node)
if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, false,
&hashvalue))
{
! ExecHashTableInsert(hashtable, slot, hashvalue);
! hashtable->totalTuples += 1;
}
}
--- 265,306 ----
if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, false,
&hashvalue))
{
! bucketNumber = ExecHashGetIMBucket(hashtable, hashvalue);
!
! /* handle tuples not destined for an in-memory bucket normally */
! if (bucketNumber == IM_INVALID_BUCKET)
! ExecHashTableInsert(hashtable, slot, hashvalue);
! else
! {
! HashJoinTuple hashTuple;
! int hashTupleSize;
!
! /* get the HashJoinTuple */
! mintuple = ExecFetchSlotMinimalTuple(slot);
! hashTupleSize = HJTUPLE_OVERHEAD + mintuple->t_len;
! hashTuple = (HashJoinTuple)
! MemoryContextAlloc(hashtable->batchCxt, hashTupleSize);
! hashTuple->hashvalue = hashvalue;
! memcpy(HJTUPLE_MINTUPLE(hashTuple), mintuple, mintuple->t_len);
!
! /* Push the HashJoinTuple onto the front of the IM bucket. */
! hashTuple->next = hashtable->imBucket[bucketNumber]->tuples;
! hashtable->imBucket[bucketNumber]->tuples = hashTuple;
!
! /*
! * More memory is now in use so make sure we are not over
! * spaceAllowedIM.
! */
! hashtable->spaceUsed += hashTupleSize;
! hashtable->spaceUsedIM += hashTupleSize;
! while (hashtable->spaceUsedIM > hashtable->spaceAllowedIM
! && ExecHashFreezeNextIMBucket(hashtable))
! ;
! /* Guarantee we are not over the spaceAllowed. */
! if (hashtable->spaceUsed > hashtable->spaceAllowed)
! ExecHashIncreaseNumBatches(hashtable);
! }
! hashtable->totalTuples++;
}
}
***************
*** 269,274 **** ExecHashTableCreate(Hash *node, List *hashOperators)
--- 469,483 ----
hashtable->outerBatchFile = NULL;
hashtable->spaceUsed = 0;
hashtable->spaceAllowed = work_mem * 1024L;
+ /* Initialize skew optimization related hashtable variables. */
+ hashtable->spaceUsedIM = 0;
+ hashtable->spaceAllowedIM
+ = hashtable->spaceAllowed * IM_WORK_MEM_PERCENT / 100;
+ hashtable->enableSkewOptimization = false;
+ hashtable->nUsedIMBuckets = 0;
+ hashtable->nIMBuckets = 0;
+ hashtable->imBucket = NULL;
+ hashtable->nIMBucketsFrozen = 0;
/*
* Get info about the hash functions to be used for each hash key. Also
***************
*** 815,825 **** ExecScanHashBucket(HashJoinState *hjstate,
/*
* hj_CurTuple is NULL to start scanning a new bucket, or the address of
* the last tuple returned from the current bucket.
*/
! if (hashTuple == NULL)
! hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
! else
hashTuple = hashTuple->next;
while (hashTuple != NULL)
{
--- 1024,1040 ----
/*
* hj_CurTuple is NULL to start scanning a new bucket, or the address of
* the last tuple returned from the current bucket.
+ *
+ * If the tuple hashed to an IM bucket then scan the IM bucket
+ * otherwise scan the standard hashtable bucket.
*/
! if (hashTuple != NULL)
hashTuple = hashTuple->next;
+ else if (hjstate->hj_OuterTupleIMBucketNo != IM_INVALID_BUCKET)
+ hashTuple = hashtable->imBucket[hjstate->hj_OuterTupleIMBucketNo]
+ ->tuples;
+ else
+ hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
while (hashTuple != NULL)
{
*** a/src/backend/executor/nodeHashjoin.c
--- b/src/backend/executor/nodeHashjoin.c
***************
*** 20,25 ****
--- 20,29 ----
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
#include "utils/memutils.h"
+ #include "utils/syscache.h"
+ #include "utils/lsyscache.h"
+ #include "parser/parsetree.h"
+ #include "catalog/pg_statistic.h"
/* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */
***************
*** 34,39 **** static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
--- 38,227 ----
TupleTableSlot *tupleSlot);
static int ExecHashJoinNewBatch(HashJoinState *hjstate);
+ /*
+ * ----------------------------------------------------------------
+ * ExecHashJoinDetectSkew
+ *
+ * If MCV statistics can be found for the join attribute of
+ * this hashjoin then create a hash table of buckets. Each
+ * bucket will correspond to an MCV hashvalue and will be
+ * filled with inner relation tuples whose join attribute
+ * hashes to the same value as that MCV. If a join
+ * attribute value is a MCV for the join attribute in the
+ * outer (probe) relation, tuples with this value in the
+ * inner (build) relation are more likely to join with outer
+ * relation tuples and a benefit can be gained by keeping
+ * them in memory while joining the first batch of tuples.
+ * ----------------------------------------------------------------
+ */
+ static void
+ ExecHashJoinDetectSkew(EState *estate, HashJoinState *hjstate, int tupwidth)
+ {
+ HeapTupleData *statsTuple;
+ FuncExprState *clause;
+ ExprState *argstate;
+ Var *variable;
+ HashJoinTable hashtable;
+ Datum *values;
+ int nvalues;
+ float4 *numbers;
+ int nnumbers;
+ Oid relid;
+ Oid atttype;
+ int i;
+ int mcvsToUse;
+
+ /* Only use statistics if there is a single join attribute. */
+ if (hjstate->hashclauses->length != 1)
+ return; /* Histojoin is not defined for more than one join key */
+
+ hashtable = hjstate->hj_HashTable;
+
+ /*
+ * Estimate the number of IM buckets that will fit in
+ * the memory allowed for IM buckets.
+ *
+ * hashtable->imBucket will have up to 8 times as many HashJoinIMBucket
+ * pointers as the number of MCV hashvalues. A uint16 index in
+ * hashtable->imBucketFreezeOrder will be created for each IM bucket. One
+ * actual HashJoinIMBucket struct will be created for each
+ * unique MCV hashvalue so up to one struct per MCV.
+ *
+ * It is also estimated that each IM bucket will have a single build
+ * tuple stored in it after partitioning the build relation input. This
+ * estimate could be high if tuples are filtered out before this join but
+ * in that case the extra memory is used by the regular hashjoin batch.
+ * This estimate could be low if it is a many to many join but in that
+ * case IM buckets will be frozen to free up memory as needed
+ * during the inner relation partitioning phase.
+ */
+ mcvsToUse = hashtable->spaceAllowedIM / (
+ /* size of a hash tuple */
+ HJTUPLE_OVERHEAD + MAXALIGN(sizeof(MinimalTupleData))
+ + MAXALIGN(tupwidth)
+ /* max size of hashtable pointers per MCV */
+ + (8 * sizeof(HashJoinIMBucket*))
+ + sizeof(uint16) /* size of imBucketFreezeOrder entry */
+ + IM_BUCKET_OVERHEAD /* size of IM bucket struct */
+ );
+ if (mcvsToUse == 0)
+ return; /* No point in considering this any further. */
+
+ /*
+ * Determine the relation id and attribute id of the single join
+ * attribute of the probe relation.
+ */
+ clause = (FuncExprState *) lfirst(list_head(hjstate->hashclauses));
+ argstate = (ExprState *) lfirst(list_head(clause->args));
+
+ /*
+ * Do not try to exploit stats if the join attribute is an expression
+ * instead of just a simple attribute.
+ */
+ if (argstate->expr->type != T_Var)
+ return;
+
+ variable = (Var *) argstate->expr;
+ relid = getrelid(variable->varnoold, estate->es_range_table);
+ atttype = variable->vartype;
+
+ statsTuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid),
+ Int16GetDatum(variable->varoattno), 0, 0);
+ if (!HeapTupleIsValid(statsTuple))
+ return;
+
+ /* Look for MCV statistics for the attribute. */
+ if (get_attstatsslot(statsTuple, atttype, variable->vartypmod,
+ STATISTIC_KIND_MCV, InvalidOid, &values, &nvalues,
+ &numbers, &nnumbers))
+ {
+ FmgrInfo *hashfunctions;
+ int nbuckets = 2;
+
+ /*
+ * IM buckets (imBucket) is an open addressing hashtable with a
+ * power of 2 size that is greater than the number of MCV values.
+ */
+ if (mcvsToUse > nvalues)
+ mcvsToUse = nvalues;
+ while (nbuckets <= mcvsToUse)
+ nbuckets <<= 1;
+ /* use two more bits just to help avoid collisions */
+ nbuckets <<= 2;
+ hashtable->nIMBuckets = nbuckets;
+ hashtable->enableSkewOptimization = true;
+
+ /*
+ * Allocate the bucket memory in the hashtable's batch context
+ * because it is only relevant and necessary during the first batch
+ * and will be nicely removed once the first batch is done.
+ */
+ hashtable->imBucket =
+ MemoryContextAllocZero(hashtable->batchCxt,
+ nbuckets * sizeof(HashJoinIMBucket*));
+ hashtable->imBucketFreezeOrder =
+ MemoryContextAllocZero(hashtable->batchCxt,
+ mcvsToUse * sizeof(uint16));
+ /* Count the overhead of the IM pointers immediately. */
+ hashtable->spaceUsed += nbuckets * sizeof(HashJoinIMBucket*)
+ + mcvsToUse * sizeof(uint16);
+ hashtable->spaceUsedIM += nbuckets * sizeof(HashJoinIMBucket*)
+ + mcvsToUse * sizeof(uint16);
+
+ /*
+ * Grab the hash functions as we will be generating the hashvalues
+ * in this section.
+ */
+ hashfunctions = hashtable->outer_hashfunctions;
+
+ /* Create the buckets */
+ for (i = 0; i < mcvsToUse; i++)
+ {
+ uint32 hashvalue = DatumGetUInt32(
+ FunctionCall1(&hashfunctions[0], values[i]));
+ int bucket = hashvalue & (nbuckets - 1);
+
+ /*
+ * While we have not hit a hole in the hashtable and have not hit
+ * a bucket with the same hashvalue we have collided in the
+ * hashtable so try the next bucket location (remember it is an
+ * open addressing hashtable).
+ */
+ while (hashtable->imBucket[bucket] != NULL
+ && hashtable->imBucket[bucket]->hashvalue != hashvalue)
+ bucket = (bucket + 1) & (nbuckets - 1);
+
+ /*
+ * Leave bucket alone if it has the same hashvalue as current
+ * MCV. We only want one bucket per hashvalue. Even if two MCV
+ * values hash to the same bucket we are fine.
+ */
+ if (hashtable->imBucket[bucket] == NULL)
+ {
+ /*
+ * Allocate the actual bucket structure in the hashtable's batch
+ * context because it is only relevant and necessary during
+ * the first batch and will be nicely removed once the first
+ * batch is done.
+ */
+ hashtable->imBucket[bucket]
+ = MemoryContextAllocZero(hashtable->batchCxt,
+ sizeof(HashJoinIMBucket));
+ hashtable->imBucket[bucket]->hashvalue = hashvalue;
+ hashtable->imBucketFreezeOrder[hashtable->nUsedIMBuckets]
+ = bucket;
+ hashtable->nUsedIMBuckets++;
+ /* Count the overhead of the IM bucket struct */
+ hashtable->spaceUsed += IM_BUCKET_OVERHEAD;
+ hashtable->spaceUsedIM += IM_BUCKET_OVERHEAD;
+ }
+ }
+
+ free_attstatsslot(atttype, values, nvalues, numbers, nnumbers);
+ }
+
+ ReleaseSysCache(statsTuple);
+ }
/* ----------------------------------------------------------------
* ExecHashJoin
***************
*** 147,152 **** ExecHashJoin(HashJoinState *node)
--- 335,345 ----
node->hj_HashOperators);
node->hj_HashTable = hashtable;
+ /* Use skew optimization only when there is more than one batch. */
+ if (hashtable->nbatch > 1)
+ ExecHashJoinDetectSkew(estate, node,
+ (outerPlan((Hash *) hashNode->ps.plan))->plan_width);
+
/*
* execute the Hash node, to build the hash table
*/
***************
*** 205,216 **** ExecHashJoin(HashJoinState *node)
ExecHashGetBucketAndBatch(hashtable, hashvalue,
&node->hj_CurBucketNo, &batchno);
node->hj_CurTuple = NULL;
/*
* Now we've got an outer tuple and the corresponding hash bucket,
! * but this tuple may not belong to the current batch.
*/
! if (batchno != hashtable->curbatch)
{
/*
* Need to postpone this outer tuple to a later batch. Save it
--- 398,415 ----
ExecHashGetBucketAndBatch(hashtable, hashvalue,
&node->hj_CurBucketNo, &batchno);
node->hj_CurTuple = NULL;
+
+ /* Does the outer tuple match an IM bucket? */
+ node->hj_OuterTupleIMBucketNo =
+ ExecHashGetIMBucket(hashtable, hashvalue);
/*
* Now we've got an outer tuple and the corresponding hash bucket,
! * but in might not belong to the current batch, or it might need
! * to go into an in-memory bucket.
*/
! if (node->hj_OuterTupleIMBucketNo == IM_INVALID_BUCKET
! && batchno != hashtable->curbatch)
{
/*
* Need to postpone this outer tuple to a later batch. Save it
***************
*** 641,647 **** start_over:
nbatch = hashtable->nbatch;
curbatch = hashtable->curbatch;
! if (curbatch > 0)
{
/*
* We no longer need the previous outer batch file; close it right
--- 840,866 ----
nbatch = hashtable->nbatch;
curbatch = hashtable->curbatch;
! /* if we just finished the first batch */
! if (curbatch == 0)
! {
! /*
! * Reset some of the skew optimization state variables. IM buckets are
! * no longer being used as of this point because they are only
! * necessary while joining the first batch (before the cleanup phase).
! *
! * Especially need to make sure ExecHashGetIMBucket returns
! * IM_INVALID_BUCKET quickly for all subsequent calls.
! *
! * IM buckets are only taking up memory if this is a multi-batch join
! * and in that case ExecHashTableReset is about to be called which
! * will free all memory currently used by IM buckets and tuples when
! * it deletes hashtable->batchCxt. If this is a single batch join
! * then imBucket and imBucketFreezeOrder are already NULL and empty.
! */
! hashtable->enableSkewOptimization = false;
! hashtable->spaceUsedIM = 0;
! }
! else if (curbatch > 0)
{
/*
* We no longer need the previous outer batch file; close it right
*** a/src/include/executor/hashjoin.h
--- b/src/include/executor/hashjoin.h
***************
*** 72,77 **** typedef struct HashJoinTupleData
--- 72,96 ----
#define HJTUPLE_MINTUPLE(hjtup) \
((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD))
+ /*
+ * Stores a hashvalue and linked list of tuples that share that hashvalue.
+ *
+ * When processing MCVs to detect skew in the probe relation of a hash join
+ * the hashvalue is generated and stored before any tuples have been read
+ * (see ExecHashJoinDetectSkew).
+ *
+ * Build tuples that hash to the same hashvalue are placed in the bucket while
+ * reading the build relation.
+ */
+ typedef struct HashJoinIMBucket
+ {
+ uint32 hashvalue;
+ HashJoinTuple tuples;
+ } HashJoinIMBucket;
+
+ #define IM_INVALID_BUCKET -1
+ #define IM_WORK_MEM_PERCENT 2
+ #define IM_BUCKET_OVERHEAD MAXALIGN(sizeof(HashJoinIMBucket))
typedef struct HashJoinTableData
{
***************
*** 113,121 **** typedef struct HashJoinTableData
--- 132,161 ----
Size spaceUsed; /* memory space currently used by tuples */
Size spaceAllowed; /* upper limit for space used */
+ /* memory space currently used by IM buckets and tuples */
+ Size spaceUsedIM;
+ /* upper limit for space used by IM buckets and tuples */
+ Size spaceAllowedIM;
MemoryContext hashCxt; /* context for whole-hash-join storage */
MemoryContext batchCxt; /* context for this-batch-only storage */
+
+ /* will the join optimize memory usage when probe relation is skewed */
+ bool enableSkewOptimization;
+ HashJoinIMBucket **imBucket; /* hashtable of IM buckets */
+ /*
+ * array of imBucket indexes to the created IM buckets sorted
+ * in the opposite order that they would be frozen to disk
+ */
+ uint16 *imBucketFreezeOrder;
+ int nIMBuckets; /* # of buckets in the IM buckets hashtable */
+ /*
+ * # of used buckets in the IM buckets hashtable and length of
+ * imBucketFreezeOrder array
+ */
+ int nUsedIMBuckets;
+ /* # of IM buckets that have already been frozen to disk */
+ int nIMBucketsFrozen;
} HashJoinTableData;
#endif /* HASHJOIN_H */
*** a/src/include/executor/nodeHash.h
--- b/src/include/executor/nodeHash.h
***************
*** 45,48 **** extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
--- 45,50 ----
int *numbuckets,
int *numbatches);
+ extern int ExecHashGetIMBucket(HashJoinTable hashtable, uint32 hashvalue);
+
#endif /* NODEHASH_H */
*** a/src/include/nodes/execnodes.h
--- b/src/include/nodes/execnodes.h
***************
*** 1381,1386 **** typedef struct MergeJoinState
--- 1381,1387 ----
* hj_NeedNewOuter true if need new outer tuple on next call
* hj_MatchedOuter true if found a join match for current outer
* hj_OuterNotEmpty true if outer relation known not empty
+ * hj_OuterTupleIMBucketNo IM bucket# for the current outer tuple
* ----------------
*/
***************
*** 1406,1411 **** typedef struct HashJoinState
--- 1407,1413 ----
bool hj_NeedNewOuter;
bool hj_MatchedOuter;
bool hj_OuterNotEmpty;
+ int hj_OuterTupleIMBucketNo;
} HashJoinState;
*** a/src/include/nodes/primnodes.h
--- b/src/include/nodes/primnodes.h
***************
*** 121,128 **** typedef struct Expr
* subplans; for example, in a join node varno becomes INNER or OUTER and
* varattno becomes the index of the proper element of that subplan's target
* list. But varnoold/varoattno continue to hold the original values.
! * The code doesn't really need varnoold/varoattno, but they are very useful
! * for debugging and interpreting completed plans, so we keep them around.
*/
#define INNER 65000
#define OUTER 65001
--- 121,132 ----
* subplans; for example, in a join node varno becomes INNER or OUTER and
* varattno becomes the index of the proper element of that subplan's target
* list. But varnoold/varoattno continue to hold the original values.
! *
! * For the most part, the code doesn't really need varnoold/varoattno, but
! * they are very useful for debugging and interpreting completed plans, so we
! * keep them around. As of PostgreSQL 8.4, these values are also used by
! * ExecHashJoinDetectSkew to fetch MCV statistics when performing multi-batch
! * hash joins.
*/
#define INNER 65000
#define OUTER 65001
***************
*** 142,148 **** typedef struct Var
Index varlevelsup; /* for subquery variables referencing outer
* relations; 0 in a normal var, >0 means N
* levels up */
! Index varnoold; /* original value of varno, for debugging */
AttrNumber varoattno; /* original value of varattno */
int location; /* token location, or -1 if unknown */
} Var;
--- 146,152 ----
Index varlevelsup; /* for subquery variables referencing outer
* relations; 0 in a normal var, >0 means N
* levels up */
! Index varnoold; /* original value of varno */
AttrNumber varoattno; /* original value of varattno */
int location; /* token location, or -1 if unknown */
} Var;
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers