From 0808af61a2db37ea46d2cabef944bca48e1bc443 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 10 Jun 2019 10:54:42 -0700
Subject: [PATCH v3] hashloop fallback

First part is to "chunk" the inner file into arbitrary partitions of
work_mem size

This chunks inner file and makes it so that the offset is along tuple
bounds.

Note that this makes it impossible to increase nbatches during the
loading of batches after initial hashtable creation

In preparation for doing this chunking, separate advance batch and load
batch. advance batch only if page offset is reset to 0, then load that
part of the batch

Second part was to: implement outer tuple batch rewinding per chunk of
inner batch

Would be a simple rewind and replay of outer side for each chunk of
inner if it weren't for LOJ.
Because we need to wait to emit NULL-extended tuples for LOJ until after
all chunks of inner have been processed.

To do this without incurring additional memory pressure, use a temporary
Buffile to capture the match status of each outer side tuple. Use one
bit per tuple to represent the match status, and, since for
parallel-oblivious hashjoin the outer side tuples are encountered in a
deterministic order, synchronizing the outer tuples match status file
with the outer tuples in the batch file to decide which ones to emit
NULL-extended is easy and can be done with a simple counter.

For non-hashloop fallback scenario (including batch 0), this file is not
created and unmatched outer tuples should be emitted as they are
encountered.

OuterTupleMatchStatuses are in a file as a bitmap instead of in memory
---
 src/backend/executor/nodeHashjoin.c       | 445 ++++++++--
 src/backend/storage/file/buffile.c        |  25 +
 src/include/nodes/execnodes.h             |  13 +
 src/include/storage/buffile.h             |   3 +
 src/test/regress/expected/adaptive_hj.out | 960 ++++++++++++++++++++++
 src/test/regress/parallel_schedule        |   2 +-
 src/test/regress/serial_schedule          |   1 +
 src/test/regress/sql/adaptive_hj.sql      |  64 ++
 8 files changed, 1442 insertions(+), 71 deletions(-)
 create mode 100644 src/test/regress/expected/adaptive_hj.out
 create mode 100644 src/test/regress/sql/adaptive_hj.sql

diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 8484a287e7..73cc6685e9 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -124,9 +124,11 @@
 #define HJ_BUILD_HASHTABLE		1
 #define HJ_NEED_NEW_OUTER		2
 #define HJ_SCAN_BUCKET			3
-#define HJ_FILL_OUTER_TUPLE		4
-#define HJ_FILL_INNER_TUPLES	5
-#define HJ_NEED_NEW_BATCH		6
+#define HJ_FILL_INNER_TUPLES    4
+#define HJ_NEED_NEW_BATCH		5
+#define HJ_NEED_NEW_INNER_CHUNK 6
+#define HJ_ADAPTIVE_EMIT_UNMATCHED_OUTER_INIT 7
+#define HJ_ADAPTIVE_EMIT_UNMATCHED_OUTER 8
 
 /* Returns true if doing null-fill on outer relation */
 #define HJ_FILL_OUTER(hjstate)	((hjstate)->hj_NullInnerTupleSlot != NULL)
@@ -143,10 +145,16 @@ static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
 												 BufFile *file,
 												 uint32 *hashvalue,
 												 TupleTableSlot *tupleSlot);
-static bool ExecHashJoinNewBatch(HashJoinState *hjstate);
+
+static bool ExecHashJoinAdvanceBatch(HashJoinState *hjstate);
+static bool ExecHashJoinLoadInnerBatch(HashJoinState *hjstate);
 static bool ExecParallelHashJoinNewBatch(HashJoinState *hjstate);
 static void ExecParallelHashJoinPartitionOuter(HashJoinState *node);
 
+static BufFile *rewindOuterBatch(BufFile *bufFile);
+static TupleTableSlot *emitUnmatchedOuterTuple(ExprState *otherqual,
+											   ExprContext *econtext,
+											   HashJoinState *hjstate);
 
 /* ----------------------------------------------------------------
  *		ExecHashJoinImpl
@@ -176,6 +184,8 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 	int			batchno;
 	ParallelHashJoinState *parallel_state;
 
+	BufFile    *outerFileForAdaptiveRead;
+
 	/*
 	 * get information from HashJoin node
 	 */
@@ -198,6 +208,8 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 	 */
 	for (;;)
 	{
+		bool outerTupleMatchesExhausted = false;
+
 		/*
 		 * It's possible to iterate this loop many times before returning a
 		 * tuple, in some pathological cases such as needing to move much of
@@ -210,6 +222,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 		{
 			case HJ_BUILD_HASHTABLE:
 
+				elog(DEBUG1, "HJ_BUILD_HASHTABLE");
 				/*
 				 * First time through: build hash table for inner relation.
 				 */
@@ -344,6 +357,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 
 			case HJ_NEED_NEW_OUTER:
 
+				elog(DEBUG1, "HJ_NEED_NEW_OUTER");
 				/*
 				 * We don't have an outer tuple, try to get the next one
 				 */
@@ -357,20 +371,34 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 
 				if (TupIsNull(outerTupleSlot))
 				{
-					/* end of batch, or maybe whole join */
+					/*
+					 * end of batch, or maybe whole join.
+					 * for hashloop fallback, all we know is outer batch is
+					 * exhausted. inner could have more chunks
+					 */
 					if (HJ_FILL_INNER(node))
 					{
 						/* set up to scan for unmatched inner tuples */
 						ExecPrepHashTableForUnmatched(node);
 						node->hj_JoinState = HJ_FILL_INNER_TUPLES;
+						break;
 					}
-					else
-						node->hj_JoinState = HJ_NEED_NEW_BATCH;
-					continue;
+					node->hj_JoinState = HJ_NEED_NEW_INNER_CHUNK;
+					break;
 				}
-
+				/*
+				 * for the hashloop fallback case,
+				 * only initialize hj_MatchedOuter to false during the first chunk.
+				 * otherwise, we will be resetting hj_MatchedOuter to false for
+				 * an outer tuple that has already matched an inner tuple.
+				 * also, hj_MatchedOuter should be set to false for batch 0.
+				 * there are no chunks for batch 0, and node->hj_InnerFirstChunk isn't
+				 * set to true until HJ_NEED_NEW_BATCH,
+				 * so need to handle batch 0 explicitly
+				 */
+				if (node->hashloop_fallback == false || node->hj_InnerFirstChunk || hashtable->curbatch == 0)
+					node->hj_MatchedOuter = false;
 				econtext->ecxt_outertuple = outerTupleSlot;
-				node->hj_MatchedOuter = false;
 
 				/*
 				 * Find the corresponding bucket for this tuple in the main
@@ -410,6 +438,57 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 					continue;
 				}
 
+				if (hashtable->outerBatchFile == NULL)
+				{
+					node->hj_JoinState = HJ_SCAN_BUCKET;
+					break;
+				}
+
+				BufFile *outerFile = hashtable->outerBatchFile[batchno];
+				if (outerFile == NULL)
+				{
+					node->hj_JoinState = HJ_SCAN_BUCKET;
+					break;
+				}
+
+				if (node->hashloop_fallback == true)
+				{
+					/* first tuple of new batch */
+					if (node->hj_OuterMatchStatusesFile == NULL)
+					{
+						node->hj_OuterTupleCount = 0;
+						node->hj_OuterMatchStatusesFile = BufFileCreateTemp(false);
+					}
+
+					/* for fallback case, always increment tuple count */
+					node->hj_OuterTupleCount++;
+
+					/* Use the next byte on every 8th tuple */
+					if ((node->hj_OuterTupleCount - 1) % 8 == 0)
+					{
+						/*
+						 * first chunk of new batch, so write and initialize
+						 * enough bytes in the outer tuple match status file to
+						 * capture all tuples' match statuses
+						 */
+						if (node->hj_InnerFirstChunk)
+						{
+							node->hj_OuterCurrentByte = 0;
+							BufFileWrite(node->hj_OuterMatchStatusesFile, &node->hj_OuterCurrentByte, 1);
+						}
+						/* otherwise, just read the next byte */
+						else
+							BufFileRead(node->hj_OuterMatchStatusesFile, &node->hj_OuterCurrentByte, 1);
+					}
+
+					elog(DEBUG1,
+						 "in HJ_NEED_NEW_OUTER. batchno %i. val %i. read  byte %hhu. cur tup %li.",
+						 batchno,
+						 DatumGetInt32(outerTupleSlot->tts_values[0]),
+						 node->hj_OuterCurrentByte,
+						 node->hj_OuterTupleCount);
+				}
+
 				/* OK, let's scan the bucket for matches */
 				node->hj_JoinState = HJ_SCAN_BUCKET;
 
@@ -417,28 +496,32 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 
 			case HJ_SCAN_BUCKET:
 
+				elog(DEBUG1, "HJ_SCAN_BUCKET");
 				/*
 				 * Scan the selected hash bucket for matches to current outer
 				 */
 				if (parallel)
-				{
-					if (!ExecParallelScanHashBucket(node, econtext))
-					{
-						/* out of matches; check for possible outer-join fill */
-						node->hj_JoinState = HJ_FILL_OUTER_TUPLE;
-						continue;
-					}
-				}
+					outerTupleMatchesExhausted = !ExecParallelScanHashBucket(node, econtext);
 				else
+					outerTupleMatchesExhausted = !ExecScanHashBucket(node, econtext);
+
+				if (outerTupleMatchesExhausted)
 				{
-					if (!ExecScanHashBucket(node, econtext))
+					/*
+					 * The current outer tuple has run out of matches, so check
+					 * whether to emit a dummy outer-join tuple.  Whether we emit
+					 * one or not, the next state is NEED_NEW_OUTER.
+					 */
+					node->hj_JoinState = HJ_NEED_NEW_OUTER;
+
+					if (node->hj_HashTable->curbatch == 0 || node->hashloop_fallback == false)
 					{
-						/* out of matches; check for possible outer-join fill */
-						node->hj_JoinState = HJ_FILL_OUTER_TUPLE;
-						continue;
+						TupleTableSlot *slot = emitUnmatchedOuterTuple(otherqual, econtext, node);
+						if (slot != NULL)
+							return slot;
 					}
+					continue;
 				}
-
 				/*
 				 * We've got a match, but still need to test non-hashed quals.
 				 * ExecScanHashBucket already set up all the state needed to
@@ -471,42 +554,44 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 					if (node->js.single_match)
 						node->hj_JoinState = HJ_NEED_NEW_OUTER;
 
-					if (otherqual == NULL || ExecQual(otherqual, econtext))
-						return ExecProject(node->js.ps.ps_ProjInfo);
-					else
-						InstrCountFiltered2(node, 1);
-				}
-				else
-					InstrCountFiltered1(node, 1);
-				break;
+					/*
+					 * Set the match bit for this outer tuple in the match
+					 * status file
+					 */
+					if (node->hj_OuterMatchStatusesFile != NULL)
+					{
+						Assert(node->hashloop_fallback == true);
+						int byte_to_set = (node->hj_OuterTupleCount - 1) / 8;
+						int bit_to_set_in_byte = (node->hj_OuterTupleCount - 1) % 8;
 
-			case HJ_FILL_OUTER_TUPLE:
+						if (BufFileSeek(node->hj_OuterMatchStatusesFile, 0, byte_to_set, SEEK_SET) != 0)
+							elog(DEBUG1, "at beginning of file");
 
-				/*
-				 * The current outer tuple has run out of matches, so check
-				 * whether to emit a dummy outer-join tuple.  Whether we emit
-				 * one or not, the next state is NEED_NEW_OUTER.
-				 */
-				node->hj_JoinState = HJ_NEED_NEW_OUTER;
+						node->hj_OuterCurrentByte = node->hj_OuterCurrentByte | (1 << bit_to_set_in_byte);
 
-				if (!node->hj_MatchedOuter &&
-					HJ_FILL_OUTER(node))
-				{
-					/*
-					 * Generate a fake join tuple with nulls for the inner
-					 * tuple, and return it if it passes the non-join quals.
-					 */
-					econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
+						elog(DEBUG1,
+								"in HJ_SCAN_BUCKET.    batchno %i. val %i. write byte %hhu. cur tup %li. bitnum %i. bytenum %i.",
+								node->hj_HashTable->curbatch,
+								DatumGetInt32(econtext->ecxt_outertuple->tts_values[0]),
+								node->hj_OuterCurrentByte,
+								node->hj_OuterTupleCount,
+								bit_to_set_in_byte,
+								byte_to_set);
 
+						BufFileWrite(node->hj_OuterMatchStatusesFile, &node->hj_OuterCurrentByte, 1);
+					}
 					if (otherqual == NULL || ExecQual(otherqual, econtext))
 						return ExecProject(node->js.ps.ps_ProjInfo);
 					else
 						InstrCountFiltered2(node, 1);
 				}
+				else
+					InstrCountFiltered1(node, 1);
 				break;
 
 			case HJ_FILL_INNER_TUPLES:
 
+				elog(DEBUG1, "HJ_FILL_INNER_TUPLES");
 				/*
 				 * We have finished a batch, but we are doing right/full join,
 				 * so any unmatched inner tuples in the hashtable have to be
@@ -515,7 +600,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 				if (!ExecScanHashTableForUnmatched(node, econtext))
 				{
 					/* no more unmatched tuples */
-					node->hj_JoinState = HJ_NEED_NEW_BATCH;
+					node->hj_JoinState = HJ_NEED_NEW_INNER_CHUNK;
 					continue;
 				}
 
@@ -533,6 +618,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 
 			case HJ_NEED_NEW_BATCH:
 
+				elog(DEBUG1, "HJ_NEED_NEW_BATCH");
 				/*
 				 * Try to advance to next batch.  Done if there are no more.
 				 */
@@ -543,12 +629,156 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 				}
 				else
 				{
-					if (!ExecHashJoinNewBatch(node))
-						return NULL;	/* end of parallel-oblivious join */
+					/*
+					 * for batches after batch 0 for which hashloop_fallback is
+					 * true, if inner is exhausted, need to consider emitting
+					 * unmatched tuples we should never get here when
+					 * hashloop_fallback is false but hj_InnerExhausted is true,
+					 * however, it felt more clear to check for
+					 * hashloop_fallback explicitly
+					 */
+					if (node->hashloop_fallback == true && HJ_FILL_OUTER(node) && node->hj_InnerExhausted == true)
+					{
+						/*
+						 * For hashloop fallback, outer tuples are not emitted
+						 * until directly before advancing the batch (after all
+						 * inner chunks have been processed).
+						 * node->hashloop_fallback should be true because it is
+						 * not reset to false until advancing the batches
+						 */
+						node->hj_InnerExhausted = false;
+						node->hj_JoinState = HJ_ADAPTIVE_EMIT_UNMATCHED_OUTER_INIT;
+						break;
+					}
+
+					if (!ExecHashJoinAdvanceBatch(node))
+						return NULL;    /* end of parallel-oblivious join */
+
+					if (rewindOuterBatch(node->hj_HashTable->outerBatchFile[node->hj_HashTable->curbatch]) != NULL)
+						ExecHashJoinLoadInnerBatch(node); /* TODO: should I ever load inner when outer file is not present? */
 				}
 				node->hj_JoinState = HJ_NEED_NEW_OUTER;
 				break;
 
+			case HJ_NEED_NEW_INNER_CHUNK:
+
+				elog(DEBUG1, "HJ_NEED_NEW_INNER_CHUNK");
+
+				/*
+				 * there were never chunks because this is the normal case (not
+				 * hashloop fallback) or this is batch 0. batch 0 cannot have
+				 * chunks. hashloop_fallback should always be false when
+				 * curbatch is 0 here. proceed to HJ_NEED_NEW_BATCH to either
+				 * advance to the next batch or complete the join
+				 */
+				if (node->hj_HashTable->curbatch == 0)
+				{
+					Assert(node->hashloop_fallback == false);
+					if(node->hj_InnerPageOffset != 0L)
+						elog(NOTICE, "hj_InnerPageOffset is not reset to 0 on batch 0");
+				}
+
+				if (node->hashloop_fallback == false)
+				{
+					node->hj_JoinState = HJ_NEED_NEW_BATCH;
+					break;
+				}
+
+				/*
+				 * it is the hashloop fallback case and there are no more chunks
+				 * inner is exhausted, so we must advance the batches
+				 */
+				if (node->hj_InnerPageOffset == 0L)
+				{
+					node->hj_InnerExhausted = true;
+					node->hj_JoinState = HJ_NEED_NEW_BATCH;
+					break;
+				}
+
+				/*
+				 * This is the hashloop fallback case and we have more chunks in
+				 * inner. curbatch > 0. Rewind outer batch file (if present) so
+				 * that we can start reading it. Rewind outer match statuses
+				 * file if present so that we can set match bits as needed Reset
+				 * the tuple count and load the next chunk of inner. Then
+				 * proceed to get a new outer tuple from our rewound outer batch
+				 * file
+				 */
+				node->hj_JoinState = HJ_NEED_NEW_OUTER;
+
+				if (rewindOuterBatch(node->hj_HashTable->outerBatchFile[node->hj_HashTable->curbatch]) == NULL)
+					break; /* TODO: Is breaking here the right thing to do when outer file is not present? */
+				rewindOuterBatch(node->hj_OuterMatchStatusesFile);
+				node->hj_OuterTupleCount = 0;
+				ExecHashJoinLoadInnerBatch(node);
+				break;
+
+			case HJ_ADAPTIVE_EMIT_UNMATCHED_OUTER_INIT:
+
+				elog(DEBUG1, "HJ_ADAPTIVE_EMIT_UNMATCHED_OUTER_INIT");
+
+				node->hj_OuterTupleCount = 0;
+				rewindOuterBatch(node->hj_OuterMatchStatusesFile);
+
+				/* TODO: is it okay to use the hashtable to get the outer batch file here? */
+				outerFileForAdaptiveRead = hashtable->outerBatchFile[hashtable->curbatch];
+				if (outerFileForAdaptiveRead == NULL) /* TODO: could this happen */
+				{
+					node->hj_JoinState = HJ_NEED_NEW_BATCH;
+					break;
+				}
+				rewindOuterBatch(outerFileForAdaptiveRead);
+
+				node->hj_JoinState = HJ_ADAPTIVE_EMIT_UNMATCHED_OUTER;
+				/* fall through */
+
+			case HJ_ADAPTIVE_EMIT_UNMATCHED_OUTER:
+
+				elog(DEBUG1, "HJ_ADAPTIVE_EMIT_UNMATCHED_OUTER");
+
+				outerFileForAdaptiveRead = hashtable->outerBatchFile[hashtable->curbatch];
+
+				while (true)
+				{
+					uint32 unmatchedOuterHashvalue;
+					TupleTableSlot *temp = ExecHashJoinGetSavedTuple(node, outerFileForAdaptiveRead, &unmatchedOuterHashvalue, node->hj_OuterTupleSlot);
+					node->hj_OuterTupleCount++;
+
+					if (temp == NULL)
+					{
+						node->hj_JoinState = HJ_NEED_NEW_BATCH;
+						break;
+					}
+
+					unsigned char bit = (node->hj_OuterTupleCount - 1) % 8;
+
+					/* need to read the next byte */
+					if (bit == 0)
+						BufFileRead(node->hj_OuterMatchStatusesFile, &node->hj_OuterCurrentByte, 1);
+
+					elog(DEBUG1, "in HJ_ADAPTIVE_EMIT_UNMATCHED_OUTER. batchno %i. val %i. num %li. bitnum %hhu. current byte %hhu.",
+						 node->hj_HashTable->curbatch,
+						 DatumGetInt32(temp->tts_values[0]),
+						 node->hj_OuterTupleCount,
+						 bit,
+						 node->hj_OuterCurrentByte);
+
+					/* if the match bit is set for this tuple, continue */
+					if ((node->hj_OuterCurrentByte >> bit) & 1)
+						continue;
+					/*
+					 * if it is not a match
+					 * emit it NULL-extended
+					 */
+					econtext->ecxt_outertuple = temp;
+					econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
+					return ExecProject(node->js.ps.ps_ProjInfo);
+				}
+
+				/* came here from HJ_NEED_NEW_BATCH, so go back there */
+				node->hj_JoinState = HJ_NEED_NEW_BATCH;
+				break;
+
 			default:
 				elog(ERROR, "unrecognized hashjoin state: %d",
 					 (int) node->hj_JoinState);
@@ -628,6 +858,14 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 	hjstate->js.ps.ExecProcNode = ExecHashJoin;
 	hjstate->js.jointype = node->join.jointype;
 
+	hjstate->hashloop_fallback = false;
+	hjstate->hj_InnerPageOffset = 0L;
+	hjstate->hj_InnerFirstChunk = false;
+	hjstate->hj_OuterCurrentByte = 0;
+
+	hjstate->hj_OuterMatchStatusesFile = NULL;
+	hjstate->hj_OuterTupleCount  = 0;
+	hjstate->hj_InnerExhausted = false;
 	/*
 	 * Miscellaneous initialization
 	 *
@@ -805,6 +1043,40 @@ ExecEndHashJoin(HashJoinState *node)
 	ExecEndNode(innerPlanState(node));
 }
 
+static BufFile *rewindOuterBatch(BufFile *bufFile)
+{
+	if (bufFile != NULL)
+	{
+		if (BufFileSeek(bufFile, 0, 0L, SEEK_SET))
+			ereport(ERROR,
+				(errcode_for_file_access(),
+					errmsg("could not rewind hash-join temporary file: %m")));
+		return bufFile;
+	}
+	return NULL;
+}
+
+static TupleTableSlot *
+emitUnmatchedOuterTuple(ExprState *otherqual, ExprContext *econtext, HashJoinState *hjstate)
+{
+	if (hjstate->hj_MatchedOuter)
+		return NULL;
+
+	if (!HJ_FILL_OUTER(hjstate))
+		return NULL;
+
+	econtext->ecxt_innertuple = hjstate->hj_NullInnerTupleSlot;
+	/*
+	 * Generate a fake join tuple with nulls for the inner
+	 * tuple, and return it if it passes the non-join quals.
+	 */
+	if (otherqual == NULL || ExecQual(otherqual, econtext))
+		return ExecProject(hjstate->js.ps.ps_ProjInfo);
+
+	InstrCountFiltered2(hjstate, 1);
+	return NULL;
+}
+
 /*
  * ExecHashJoinOuterGetTuple
  *
@@ -951,20 +1223,17 @@ ExecParallelHashJoinOuterGetTuple(PlanState *outerNode,
 }
 
 /*
- * ExecHashJoinNewBatch
+ * ExecHashJoinAdvanceBatch
  *		switch to a new hashjoin batch
  *
  * Returns true if successful, false if there are no more batches.
  */
 static bool
-ExecHashJoinNewBatch(HashJoinState *hjstate)
+ExecHashJoinAdvanceBatch(HashJoinState *hjstate)
 {
 	HashJoinTable hashtable = hjstate->hj_HashTable;
 	int			nbatch;
 	int			curbatch;
-	BufFile    *innerFile;
-	TupleTableSlot *slot;
-	uint32		hashvalue;
 
 	nbatch = hashtable->nbatch;
 	curbatch = hashtable->curbatch;
@@ -1039,10 +1308,35 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
 		curbatch++;
 	}
 
+	hjstate->hj_InnerPageOffset = 0L;
+	hjstate->hj_InnerFirstChunk = true;
+	hjstate->hashloop_fallback = false; /* new batch, so start it off false */
+	if (hjstate->hj_OuterMatchStatusesFile != NULL)
+		BufFileClose(hjstate->hj_OuterMatchStatusesFile);
+	hjstate->hj_OuterMatchStatusesFile = NULL;
 	if (curbatch >= nbatch)
 		return false;			/* no more batches */
 
 	hashtable->curbatch = curbatch;
+	return true;
+}
+
+/*
+ * Returns true if there are more chunks left, false otherwise
+ */
+static bool ExecHashJoinLoadInnerBatch(HashJoinState *hjstate)
+{
+	HashJoinTable hashtable = hjstate->hj_HashTable;
+	int curbatch = hashtable->curbatch;
+	BufFile    *innerFile;
+	TupleTableSlot *slot;
+	uint32		hashvalue;
+
+	off_t tup_start_offset;
+	off_t chunk_start_offset;
+	off_t tup_end_offset;
+	int64 current_saved_size;
+	int current_fileno;
 
 	/*
 	 * Reload the hash table with the new inner batch (which could be empty)
@@ -1051,45 +1345,56 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
 
 	innerFile = hashtable->innerBatchFile[curbatch];
 
+	/* Reset this even if the innerfile is not null */
+	hjstate->hj_InnerFirstChunk = hjstate->hj_InnerPageOffset == 0L;
+
 	if (innerFile != NULL)
 	{
-		if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
+		/* TODO: should fileno always be 0? */
+		if (BufFileSeek(innerFile, 0, hjstate->hj_InnerPageOffset, SEEK_SET))
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not rewind hash-join temporary file: %m")));
 
+		chunk_start_offset = hjstate->hj_InnerPageOffset;
+		tup_end_offset = hjstate->hj_InnerPageOffset;
 		while ((slot = ExecHashJoinGetSavedTuple(hjstate,
 												 innerFile,
 												 &hashvalue,
 												 hjstate->hj_HashTupleSlot)))
 		{
+			/* next tuple's start is last tuple's end */
+			tup_start_offset = tup_end_offset;
+			/* after we got the tuple, figure out what the offset is */
+			BufFileTell(innerFile, &current_fileno, &tup_end_offset);
+			current_saved_size = tup_end_offset - chunk_start_offset;
+			if (current_saved_size > work_mem)
+			{
+				hjstate->hj_InnerPageOffset = tup_start_offset;
+				hjstate->hashloop_fallback = true;
+				return true;
+			}
+			hjstate->hj_InnerPageOffset = tup_end_offset;
 			/*
-			 * NOTE: some tuples may be sent to future batches.  Also, it is
-			 * possible for hashtable->nbatch to be increased here!
+			 * NOTE: some tuples may be sent to future batches.
+			 * With current hashloop patch, however, it is not possible
+			 * for hashtable->nbatch to be increased here
 			 */
 			ExecHashTableInsert(hashtable, slot, hashvalue);
 		}
 
+		/* this is the end of the file */
+		hjstate->hj_InnerPageOffset = 0L;
+
 		/*
-		 * after we build the hash table, the inner batch file is no longer
+		 * after we processed all chunks, the inner batch file is no longer
 		 * needed
 		 */
 		BufFileClose(innerFile);
 		hashtable->innerBatchFile[curbatch] = NULL;
 	}
 
-	/*
-	 * Rewind outer batch file (if present), so that we can start reading it.
-	 */
-	if (hashtable->outerBatchFile[curbatch] != NULL)
-	{
-		if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not rewind hash-join temporary file: %m")));
-	}
-
-	return true;
+	return false;
 }
 
 /*
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index b40e6f3fde..ed5d663b17 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -203,6 +203,9 @@ BufFileCreateTemp(bool interXact)
 	file = makeBufFile(pfile);
 	file->isInterXact = interXact;
 
+	if (file->files[0] == 0)
+		elog(NOTICE, "file is 0");
+
 	return file;
 }
 
@@ -737,6 +740,18 @@ BufFileTell(BufFile *file, int *fileno, off_t *offset)
 	*offset = file->curOffset + file->pos;
 }
 
+int
+BufFileTellPos(BufFile *file)
+{
+	return file->pos;
+}
+
+off_t
+BufFileTellOffset(BufFile *file)
+{
+	return file->curOffset;
+}
+
 /*
  * BufFileSeekBlock --- block-oriented seek
  *
@@ -801,6 +816,16 @@ BufFileSize(BufFile *file)
 		lastFileSize;
 }
 
+int64
+BufFileBytesUsed(BufFile *file)
+{
+	int64 lastFileSize = FileSize(file->files[file->numFiles - 1]);
+	if (lastFileSize >= 0)
+		return lastFileSize;
+	else
+		return 0;
+}
+
 /*
  * Append the contents of source file (managed within shared fileset) to
  * end of target file (managed within same shared fileset).
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 98bdcbcef5..efac63ca2e 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -14,6 +14,7 @@
 #ifndef EXECNODES_H
 #define EXECNODES_H
 
+#include <storage/buffile.h>
 #include "access/tupconvert.h"
 #include "executor/instrument.h"
 #include "lib/pairingheap.h"
@@ -1899,6 +1900,18 @@ typedef struct HashJoinState
 	int			hj_JoinState;
 	bool		hj_MatchedOuter;
 	bool		hj_OuterNotEmpty;
+
+	/* hashloop fallback */
+	bool hashloop_fallback;
+	/* hashloop fallback inner side */
+	bool hj_InnerFirstChunk;
+	bool hj_InnerExhausted;
+	off_t hj_InnerPageOffset;
+
+	/* hashloop fallback outer side */
+	unsigned char hj_OuterCurrentByte;
+	BufFile *hj_OuterMatchStatusesFile;
+	int64 hj_OuterTupleCount;
 } HashJoinState;
 
 
diff --git a/src/include/storage/buffile.h b/src/include/storage/buffile.h
index 1fba404fe2..74ee0f292d 100644
--- a/src/include/storage/buffile.h
+++ b/src/include/storage/buffile.h
@@ -42,8 +42,11 @@ extern size_t BufFileRead(BufFile *file, void *ptr, size_t size);
 extern size_t BufFileWrite(BufFile *file, void *ptr, size_t size);
 extern int	BufFileSeek(BufFile *file, int fileno, off_t offset, int whence);
 extern void BufFileTell(BufFile *file, int *fileno, off_t *offset);
+extern int BufFileTellPos(BufFile *file);
+extern off_t BufFileTellOffset(BufFile *file);
 extern int	BufFileSeekBlock(BufFile *file, long blknum);
 extern int64 BufFileSize(BufFile *file);
+int64 BufFileBytesUsed(BufFile *file);
 extern long BufFileAppend(BufFile *target, BufFile *source);
 
 extern BufFile *BufFileCreateShared(SharedFileSet *fileset, const char *name);
diff --git a/src/test/regress/expected/adaptive_hj.out b/src/test/regress/expected/adaptive_hj.out
new file mode 100644
index 0000000000..7a33316bfe
--- /dev/null
+++ b/src/test/regress/expected/adaptive_hj.out
@@ -0,0 +1,960 @@
+drop table if exists t1;
+NOTICE:  table "t1" does not exist, skipping
+drop table if exists t2;
+NOTICE:  table "t2" does not exist, skipping
+create table t1(a int);
+create table t2(b int);
+insert into t1 values(1),(2);
+insert into t2 values(2),(3),(11);
+insert into t1 select i from generate_series(1,10)i;
+insert into t2 select i from generate_series(2,10)i;
+insert into t1 select 2 from generate_series(1,5)i;
+insert into t2 select 2 from generate_series(2,7)i;
+set work_mem=64;
+set enable_mergejoin to off;
+select * from t1 left outer join t2 on a = b order by a;
+ a  | b  
+----+----
+  1 |   
+  1 |   
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  3 |  3
+  3 |  3
+  4 |  4
+  5 |  5
+  6 |  6
+  7 |  7
+  8 |  8
+  9 |  9
+ 10 | 10
+(67 rows)
+
+select count(*) from t1 left outer join t2 on a = b;
+ count 
+-------
+    67
+(1 row)
+
+select * from t1, t2 where a = b order by b;
+ a  | b  
+----+----
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  3 |  3
+  3 |  3
+  4 |  4
+  5 |  5
+  6 |  6
+  7 |  7
+  8 |  8
+  9 |  9
+ 10 | 10
+(65 rows)
+
+select count(*) from t1, t2 where a = b;
+ count 
+-------
+    65
+(1 row)
+
+select * from t1 right outer join t2 on a = b order by b;
+ a  | b  
+----+----
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  3 |  3
+  3 |  3
+  4 |  4
+  5 |  5
+  6 |  6
+  7 |  7
+  8 |  8
+  9 |  9
+ 10 | 10
+    | 11
+(66 rows)
+
+select count(*) from t1 right outer join t2 on a = b;
+ count 
+-------
+    66
+(1 row)
+
+select * from t1 full outer join t2 on a = b order by b;
+ a  | b  
+----+----
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  2 |  2
+  3 |  3
+  3 |  3
+  4 |  4
+  5 |  5
+  6 |  6
+  7 |  7
+  8 |  8
+  9 |  9
+ 10 | 10
+    | 11
+  1 |   
+  1 |   
+(68 rows)
+
+select count(*) from t1 full outer join t2 on a = b;
+ count 
+-------
+    68
+(1 row)
+
+truncate table t1;
+insert into t1 values (1),(2),(2),(3);
+truncate table t2;
+insert into t2 values(2),(2),(3),(3),(4);
+select * from t1 left outer join t2 on a = b order by a;
+ a | b 
+---+---
+ 1 |  
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 3 | 3
+ 3 | 3
+(7 rows)
+
+select count(*) from t1 left outer join t2 on a = b;
+ count 
+-------
+     7
+(1 row)
+
+select * from t1, t2 where a = b order by b;
+ a | b 
+---+---
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 3 | 3
+ 3 | 3
+(6 rows)
+
+select count(*) from t1, t2 where a = b;
+ count 
+-------
+     6
+(1 row)
+
+select * from t1 right outer join t2 on a = b order by b;
+ a | b 
+---+---
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 3 | 3
+ 3 | 3
+   | 4
+(7 rows)
+
+select count(*) from t1 right outer join t2 on a = b;
+ count 
+-------
+     7
+(1 row)
+
+select * from t1 full outer join t2 on a = b order by b;
+ a | b 
+---+---
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 3 | 3
+ 3 | 3
+   | 4
+ 1 |  
+(8 rows)
+
+select count(*) from t1 full outer join t2 on a = b;
+ count 
+-------
+     8
+(1 row)
+
+truncate table t1;
+insert into t1 values(1),(1);
+insert into t1 select 2 from generate_series(1,7)i;
+insert into t1 select i from generate_series(3,10)i;
+truncate table t2;
+insert into t2 select 2 from generate_series(1,7)i;
+insert into t2 values(3),(3);
+insert into t2 select i from generate_series(5,9)i;
+select * from t1 left outer join t2 on a = b order by a;
+ a  | b 
+----+---
+  1 |  
+  1 |  
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  3 | 3
+  3 | 3
+  4 |  
+  5 | 5
+  6 | 6
+  7 | 7
+  8 | 8
+  9 | 9
+ 10 |  
+(60 rows)
+
+select count(*) from t1 left outer join t2 on a = b;
+ count 
+-------
+    60
+(1 row)
+
+select * from t1, t2 where a = b order by b;
+ a | b 
+---+---
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 3 | 3
+ 3 | 3
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(56 rows)
+
+select count(*) from t1, t2 where a = b;
+ count 
+-------
+    56
+(1 row)
+
+select * from t1 right outer join t2 on a = b order by b;
+ a | b 
+---+---
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 3 | 3
+ 3 | 3
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(56 rows)
+
+select count(*) from t1 right outer join t2 on a = b;
+ count 
+-------
+    56
+(1 row)
+
+select * from t1 full outer join t2 on a = b order by b;
+ a  | b 
+----+---
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  2 | 2
+  3 | 3
+  3 | 3
+  5 | 5
+  6 | 6
+  7 | 7
+  8 | 8
+  9 | 9
+ 10 |  
+  4 |  
+  1 |  
+  1 |  
+(60 rows)
+
+select count(*) from t1 full outer join t2 on a = b;
+ count 
+-------
+    60
+(1 row)
+
+select * from t2 left outer join t1 on a = b order by a;
+ b | a 
+---+---
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 3 | 3
+ 3 | 3
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(56 rows)
+
+select count(*) from t2 left outer join t1 on a = b;
+ count 
+-------
+    56
+(1 row)
+
+select * from t2, t1 where a = b order by b;
+ b | a 
+---+---
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 2 | 2
+ 3 | 3
+ 3 | 3
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(56 rows)
+
+select count(*) from t2, t1 where a = b;
+ count 
+-------
+    56
+(1 row)
+
+select * from t2 right outer join t1 on a = b order by b;
+ b | a  
+---+----
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 3 |  3
+ 3 |  3
+ 5 |  5
+ 6 |  6
+ 7 |  7
+ 8 |  8
+ 9 |  9
+   | 10
+   |  4
+   |  1
+   |  1
+(60 rows)
+
+select count(*) from t2 right outer join t1 on a = b;
+ count 
+-------
+    60
+(1 row)
+
+select * from t2 full outer join t1 on a = b order by b;
+ b | a  
+---+----
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 2 |  2
+ 3 |  3
+ 3 |  3
+ 5 |  5
+ 6 |  6
+ 7 |  7
+ 8 |  8
+ 9 |  9
+   | 10
+   |  4
+   |  1
+   |  1
+(60 rows)
+
+select count(*) from t2 full outer join t1 on a = b;
+ count 
+-------
+    60
+(1 row)
+
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 8fb55f045e..7492c2c45b 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -78,7 +78,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview
 # ----------
 # Another group of parallel tests
 # ----------
-test: create_table_like alter_generic alter_operator misc async dbsize misc_functions sysviews tsrf tidscan
+test: create_table_like alter_generic alter_operator misc async dbsize misc_functions sysviews tsrf tidscan adaptive_hj
 
 # rules cannot run concurrently with any test that creates
 # a view or rule in the public schema
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index a39ca1012a..17099bf604 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -91,6 +91,7 @@ test: subselect
 test: union
 test: case
 test: join
+test: adaptive_hj
 test: aggregates
 test: transactions
 ignore: random
diff --git a/src/test/regress/sql/adaptive_hj.sql b/src/test/regress/sql/adaptive_hj.sql
new file mode 100644
index 0000000000..7e74aac603
--- /dev/null
+++ b/src/test/regress/sql/adaptive_hj.sql
@@ -0,0 +1,64 @@
+drop table if exists t1;
+drop table if exists t2;
+create table t1(a int);
+create table t2(b int);
+
+insert into t1 values(1),(2);
+insert into t2 values(2),(3),(11);
+insert into t1 select i from generate_series(1,10)i;
+insert into t2 select i from generate_series(2,10)i;
+insert into t1 select 2 from generate_series(1,5)i;
+insert into t2 select 2 from generate_series(2,7)i;
+
+set work_mem=64;
+set enable_mergejoin to off;
+
+select * from t1 left outer join t2 on a = b order by a;
+select count(*) from t1 left outer join t2 on a = b;
+select * from t1, t2 where a = b order by b;
+select count(*) from t1, t2 where a = b;
+select * from t1 right outer join t2 on a = b order by b;
+select count(*) from t1 right outer join t2 on a = b;
+select * from t1 full outer join t2 on a = b order by b;
+select count(*) from t1 full outer join t2 on a = b;
+
+truncate table t1;
+insert into t1 values (1),(2),(2),(3);
+truncate table t2;
+insert into t2 values(2),(2),(3),(3),(4);
+
+select * from t1 left outer join t2 on a = b order by a;
+select count(*) from t1 left outer join t2 on a = b;
+select * from t1, t2 where a = b order by b;
+select count(*) from t1, t2 where a = b;
+select * from t1 right outer join t2 on a = b order by b;
+select count(*) from t1 right outer join t2 on a = b;
+select * from t1 full outer join t2 on a = b order by b;
+select count(*) from t1 full outer join t2 on a = b;
+
+truncate table t1;
+insert into t1 values(1),(1);
+insert into t1 select 2 from generate_series(1,7)i;
+insert into t1 select i from generate_series(3,10)i;
+truncate table t2;
+insert into t2 select 2 from generate_series(1,7)i;
+insert into t2 values(3),(3);
+insert into t2 select i from generate_series(5,9)i;
+
+select * from t1 left outer join t2 on a = b order by a;
+select count(*) from t1 left outer join t2 on a = b;
+select * from t1, t2 where a = b order by b;
+select count(*) from t1, t2 where a = b;
+select * from t1 right outer join t2 on a = b order by b;
+select count(*) from t1 right outer join t2 on a = b;
+select * from t1 full outer join t2 on a = b order by b;
+select count(*) from t1 full outer join t2 on a = b;
+
+select * from t2 left outer join t1 on a = b order by a;
+select count(*) from t2 left outer join t1 on a = b;
+select * from t2, t1 where a = b order by b;
+select count(*) from t2, t1 where a = b;
+select * from t2 right outer join t1 on a = b order by b;
+select count(*) from t2 right outer join t1 on a = b;
+select * from t2 full outer join t1 on a = b order by b;
+select count(*) from t2 full outer join t1 on a = b;
-- 
2.22.0

