Good news, everyone!

I discovered that NUM_LOCK_OF_PARTITIONS is a bottleneck for a last
patch. Long story short - NUM_LOCK_OF_PARTITIONS = (1 << 7) gives best
performance:

PN = 16, AN =  8, NUM_LOCK_PARTITIONS = (1 << 7): 4782.9
PN = 16, AN =  4, NUM_LOCK_PARTITIONS = (1 << 7): 4089.9
PN = 16, AN =  2, NUM_LOCK_PARTITIONS = (1 << 7): 2822.5

PN =  8, AN =  8, NUM_LOCK_PARTITIONS = (1 << 7): 4391.7
PN =  8, AN =  4, NUM_LOCK_PARTITIONS = (1 << 7): 3665.0
PN =  8, AN =  2, NUM_LOCK_PARTITIONS = (1 << 7): 2205.7

PN =  4, AN =  8, NUM_LOCK_PARTITIONS = (1 << 7): 4031.9
PN =  4, AN =  4, NUM_LOCK_PARTITIONS = (1 << 7): 3378.2
PN =  4, AN =  2, NUM_LOCK_PARTITIONS = (1 << 7): 2142.4

Also I tried to optimize global_free_list_* procedures as I planned.
Optimized version is asymptotically faster but works about 5% slower in
practice because of some additional operations we need to perform.
Patch is attached to this message in case you would like to examine a
code.

Here is a funny thing - benchmark results I shared 22.12.2015 are wrong
because I forgot to run `make clean` after changing lwlock.h (autotools
doesn't rebuild project properly after changing .h files). Here are
correct results:

60-core server,
pgbench -j 64 -c 64 -f pgbench.sql -T 50 -P 1 my_database

NUM_LOCK_  |  master  | no locks |  lwlock  | spinlock | spinlock 
PARTITIONS | (99ccb2) |          |          |          |  array   
-----------|----------|----------|----------|----------|----------
 1 << 4    |  660.4   |  2296.3  |  1441.8  |  454.4   |  1597.8  
-----------|----------|----------|----------|----------|----------
 1 << 5    |  536.6   |  3006.5  |  1632.3  |  381.8   |  1896.8  
-----------|----------|----------|----------|----------|----------
 1 << 6    |  469.9   |  4083.5  |  1693.4  |  427.7   |  2271.5  
-----------|----------|----------|----------|----------|----------
 1 << 7    |  430.8   |  4653.0  |  2016.3  |  361.1   |  3277.1  

As you may see "spinlock array" version works quite well. It is almost
5 times faster than current dynahash implementation and only 30% slower
than "no locks" version. It also guarantees that we will use all
available memory.

I experimented with various improvements of "spinlock array" as well.
E.g. I tried to borrow elements not one a time, but it didn't cause any
performance improvements.

In case you believe that 5 times faster is not good enough we can also
use sharded-global-free-list.patch with appropriate AN and PN values.
Still this patch doesn't guarantee that all available memory will be
used ("no lock" patch doesn't guarantee it either).

Considering that benchmark above is not for all possible cases, but for
a very specific one, and that "spinlock array" patch has much better
guarantees then "no lock" patch, and that lock-free algorithms are
pretty complicated and error-prone I suggest we call "spinlock array"
solution good enough, at least until someone complain again that
dynahash is a bottleneck. Naturally first I clean a code, write more
comments, then re-check once again that there is no performance
degradation according to usual pgbench, etc.
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index 78f15f0..91fcc05 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -265,7 +265,7 @@ InitShmemIndex(void)
  */
 HTAB *
 ShmemInitHash(const char *name, /* table string name for shmem index */
-			  long init_size,	/* initial table size */
+			  long init_size,	/* initial table size */ // AALEKSEEV: is ignored, refactor!
 			  long max_size,	/* max size of the table */
 			  HASHCTL *infoP,	/* info about key and bucket size */
 			  int hash_flags)	/* info about infoP */
@@ -299,7 +299,7 @@ ShmemInitHash(const char *name, /* table string name for shmem index */
 	/* Pass location of hashtable header to hash_create */
 	infoP->hctl = (HASHHDR *) location;
 
-	return hash_create(name, init_size, infoP, hash_flags);
+	return hash_create(name, max_size, infoP, hash_flags);
 }
 
 /*
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index eacffc4..07a46db 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -87,6 +87,8 @@
 #include "access/xact.h"
 #include "storage/shmem.h"
 #include "storage/spin.h"
+#include "storage/lock.h"
+#include "storage/lwlock.h"
 #include "utils/dynahash.h"
 #include "utils/memutils.h"
 
@@ -118,6 +120,13 @@ typedef HASHELEMENT *HASHBUCKET;
 /* A hash segment is an array of bucket headers */
 typedef HASHBUCKET *HASHSEGMENT;
 
+// AALEKSEEV: TODO: comment, should be power of two
+#define GLOBAL_FREE_LIST_PARTITIONS_NUM 4
+#define GLOBAL_FREE_LIST_PARTITIONS_MASK (GLOBAL_FREE_LIST_PARTITIONS_NUM-1)
+
+// AALEKSEEV: TODO: comment
+#define GLOBAL_FREE_LIST_ALLOC_NUM 8
+
 /*
  * Header structure for a hash table --- contains all changeable info
  *
@@ -129,11 +138,24 @@ typedef HASHBUCKET *HASHSEGMENT;
 struct HASHHDR
 {
 	/* In a partitioned table, take this lock to touch nentries or freeList */
-	slock_t		mutex;			/* unused if not partitioned table */
+	slock_t		mutex[GLOBAL_FREE_LIST_PARTITIONS_NUM];			/* unused if not partitioned table */
+	HASHELEMENT * globalFreeList[GLOBAL_FREE_LIST_PARTITIONS_NUM];
+
+	// AALEKSEEV: fix comments
 
 	/* These fields change during entry addition/deletion */
-	long		nentries;		/* number of entries in hash table */
-	HASHELEMENT *freeList;		/* linked list of free elements */
+	/* number of entries in hash table */
+	long		nentries[NUM_LOCK_PARTITIONS];
+
+	// AALEKSEEV: TODO: comment
+	long		totalAllocated[NUM_LOCK_PARTITIONS];
+
+	/* linked list of free elements */
+	HASHELEMENT *freeList[NUM_LOCK_PARTITIONS];
+
+	// AALEKSEEV: TODO: comment
+	HASHELEMENT *freeListTail[NUM_LOCK_PARTITIONS];
+	HASHELEMENT *freeListWouldBeTail[NUM_LOCK_PARTITIONS];
 
 	/* These fields can change, but not in a partitioned table */
 	/* Also, dsize can't change in a shared table, even if unpartitioned */
@@ -166,6 +188,9 @@ struct HASHHDR
 
 #define IS_PARTITIONED(hctl)  ((hctl)->num_partitions != 0)
 
+// AALEKSEEV: add comment
+#define PARTITION_IDX(hctl, hashcode) (IS_PARTITIONED(hctl) ? LockHashPartition(hashcode) : 0)
+
 /*
  * Top control structure for a hashtable --- in a shared table, each backend
  * has its own copy (OK since no fields change at runtime)
@@ -192,6 +217,28 @@ struct HTAB
 	int			sshift;			/* segment shift = log2(ssize) */
 };
 
+typedef struct
+{
+	HASHELEMENT * tail;
+	uint32 nitems;
+#ifdef USE_ASSERT_CHECKING
+	uint32 magic;
+#endif
+} FREELISTBATCH;
+
+#ifdef USE_ASSERT_CHECKING
+
+#define FREE_LIST_BATCH_MAGIC 0x1234ABCD
+#define FREE_LIST_BATCH_SET_MAGIC(batchp) ((batchp)->magic = FREE_LIST_BATCH_MAGIC)
+#define FREE_LIST_BATCH_CHECK_MAGIC(batchp) (AssertMacro((batchp)->magic == FREE_LIST_BATCH_MAGIC))
+
+#else
+
+#define FREE_LIST_BATCH_SET_MAGIC(batchp) ((void)true)
+#define FREE_LIST_BATCH_CHECK_MAGIC(batchp) ((void)true)
+
+#endif
+
 /*
  * Key (also entry) part of a HASHELEMENT
  */
@@ -219,10 +266,10 @@ static long hash_accesses,
  */
 static void *DynaHashAlloc(Size size);
 static HASHSEGMENT seg_alloc(HTAB *hashp);
-static bool element_alloc(HTAB *hashp, int nelem);
+static bool element_alloc(HTAB *hashp, int nelem, int glob_part_idx);
 static bool dir_realloc(HTAB *hashp);
 static bool expand_table(HTAB *hashp);
-static HASHBUCKET get_hash_entry(HTAB *hashp);
+static HASHBUCKET get_hash_entry(HTAB *hashp, int partition_idx);
 static void hdefault(HTAB *hashp);
 static int	choose_nelem_alloc(Size entrysize);
 static bool init_htab(HTAB *hashp, long nelem);
@@ -260,6 +307,197 @@ string_compare(const char *key1, const char *key2, Size keysize)
 	return strncmp(key1, key2, keysize - 1);
 }
 
+// AALEKSEEV TODO comment
+static void
+global_free_list_init(HTAB* hashp, int glob_partition_idx)
+{
+	FREELISTBATCH* batchp;
+	HASHHDR    *hctl = hashp->hctl;
+	HASHELEMENT* batchHead = hctl->globalFreeList[glob_partition_idx];
+	HASHELEMENT* currElement = batchHead;
+	int elementsInBatch = 0;
+
+	Assert(currElement != NULL);
+
+	for(;;)
+	{
+		elementsInBatch++;
+
+		if(currElement->link == NULL)
+			break;
+
+		if(elementsInBatch == GLOBAL_FREE_LIST_ALLOC_NUM)
+		{
+			batchp = (FREELISTBATCH*)ELEMENTKEY(batchHead);
+			batchp->tail = currElement;
+			batchp->nitems = elementsInBatch;
+			FREE_LIST_BATCH_SET_MAGIC(batchp);
+
+			batchHead = currElement->link;
+			elementsInBatch = 0;
+		}
+
+		currElement = currElement->link;
+	}
+
+	batchp = (FREELISTBATCH*)ELEMENTKEY(batchHead);
+	batchp->tail = currElement;
+	batchp->nitems = elementsInBatch;
+	FREE_LIST_BATCH_SET_MAGIC(batchp);
+}
+
+// AALEKSEEV: acquire elements - WE KNOW WE NEED THEM.
+// return true on success, false on failure
+static bool
+global_free_list_acquire_from_partition(HTAB* hashp, int partition_idx, int glob_partition_idx)
+{
+	FREELISTBATCH* batchp;
+	HASHELEMENT *firstElement;
+	HASHHDR    *hctl = hashp->hctl;
+
+	Assert(IS_PARTITIONED(hctl));
+	Assert(hctl->freeList[partition_idx] == NULL);
+	Assert(hctl->nentries[partition_idx] == hctl->totalAllocated[partition_idx]);
+
+	SpinLockAcquire(&hctl->mutex[glob_partition_idx]);
+	firstElement = hctl->globalFreeList[glob_partition_idx];
+
+	if(!firstElement)
+	{
+		SpinLockRelease(&hctl->mutex[glob_partition_idx]);
+		return false;
+	}
+
+	batchp = (FREELISTBATCH*)ELEMENTKEY(firstElement);
+	FREE_LIST_BATCH_CHECK_MAGIC(batchp);
+
+	hctl->globalFreeList[glob_partition_idx] = batchp->tail->link;
+	SpinLockRelease(&hctl->mutex[glob_partition_idx]);
+
+	batchp->tail->link = NULL;
+	hctl->freeList[partition_idx] = firstElement;
+	hctl->freeListTail[partition_idx] = batchp->tail;
+	hctl->freeListWouldBeTail[partition_idx] = NULL;
+	hctl->totalAllocated[partition_idx] += batchp->nitems;
+
+	return true;
+}
+
+static bool
+global_free_list_acquire(HTAB* hashp, int partition_idx)
+{
+	int idx,
+		start = partition_idx & GLOBAL_FREE_LIST_PARTITIONS_MASK;
+
+	idx = start;
+	do {
+		if(global_free_list_acquire_from_partition(hashp, partition_idx, idx))
+			return true;
+		idx = (idx + 1) & GLOBAL_FREE_LIST_PARTITIONS_MASK;
+	} while(idx != start);
+
+	return false;
+}
+
+// AALEKSEEV: relesase elements IF POSSIBLE.
+static void
+global_free_list_try_release(HTAB* hashp, int partition_idx)
+{
+#ifdef USE_ASSERT_CHECKING
+	int nreleased;
+#endif
+	FREELISTBATCH* batchp;
+	HASHELEMENT *releasedFirst,
+				*releasedLast;
+	int global_free_list_part = partition_idx & GLOBAL_FREE_LIST_PARTITIONS_MASK;
+
+	HASHHDR    *hctl = hashp->hctl;
+
+	if(!IS_PARTITIONED(hctl))
+		return;
+
+	/* keep at least GLOBAL_FREE_LIST_ALLOC_NUM items in each freeList */
+	if(hctl->totalAllocated[partition_idx] <= GLOBAL_FREE_LIST_ALLOC_NUM)
+		return;
+
+	if(hctl->totalAllocated[partition_idx] - hctl->nentries[partition_idx] <= GLOBAL_FREE_LIST_ALLOC_NUM)
+		return;
+
+	Assert(hctl->freeList[partition_idx] != NULL);
+	Assert(hctl->freeListWouldBeTail[partition_idx] != NULL);
+	Assert(hctl->freeListWouldBeTail[partition_idx]->link != NULL);
+	Assert(hctl->freeListTail[partition_idx] != NULL);
+
+	releasedFirst = hctl->freeListWouldBeTail[partition_idx]->link;
+	releasedLast = hctl->freeListTail[partition_idx];
+
+#ifdef USE_ASSERT_CHECKING
+	releasedLast = releasedFirst;
+	nreleased = 1;
+	while(nreleased < GLOBAL_FREE_LIST_ALLOC_NUM)
+	{
+		Assert(releasedLast->link != NULL);
+
+		releasedLast = releasedLast->link;
+		nreleased++;
+	}
+	Assert(releasedLast == hctl->freeListTail[partition_idx]);
+#endif
+	
+	hctl->totalAllocated[partition_idx] -= GLOBAL_FREE_LIST_ALLOC_NUM;
+	hctl->freeListTail[partition_idx] = hctl->freeListWouldBeTail[partition_idx];
+	hctl->freeListTail[partition_idx]->link = NULL;
+	hctl->freeListWouldBeTail[partition_idx] = NULL;
+
+	batchp = (FREELISTBATCH*)ELEMENTKEY(releasedFirst);
+	batchp->tail = releasedLast;
+	batchp->nitems = GLOBAL_FREE_LIST_ALLOC_NUM;
+	FREE_LIST_BATCH_SET_MAGIC(batchp);
+
+	SpinLockAcquire(&hctl->mutex[global_free_list_part]);
+	releasedLast->link = hctl->globalFreeList[global_free_list_part];
+	hctl->globalFreeList[global_free_list_part] = releasedFirst;
+	SpinLockRelease(&hctl->mutex[global_free_list_part]);
+}
+
+/* remove entry from freelist, bump nentries */
+static inline HASHELEMENT*
+free_list_get(HTAB* hashp, int partition_idx)
+{
+	HASHHDR    *hctl = hashp->hctl;	
+	HASHELEMENT* element = hctl->freeList[partition_idx];
+
+	Assert(element != NULL);
+
+	hctl->freeList[partition_idx] = element->link;
+	hctl->nentries[partition_idx]++;
+
+#ifdef USE_ASSERT_CHECKING
+	if(hctl->freeListWouldBeTail[partition_idx] == element)
+		hctl->freeListWouldBeTail[partition_idx] = NULL;
+	if(element->link == NULL)
+		hctl->freeListTail[partition_idx] = NULL;
+#endif
+
+	return element;
+}
+
+/* add the record to the freelist for this table.  */
+static inline void
+free_list_put(HTAB* hashp, HASHELEMENT* element, int partition_idx)
+{
+	HASHHDR    *hctl = hashp->hctl;
+
+	if(hctl->freeList[partition_idx] == NULL)
+		hctl->freeListTail[partition_idx] = element;
+
+	element->link = hctl->freeList[partition_idx];
+	hctl->freeList[partition_idx] = element;
+	hctl->nentries[partition_idx]--;
+
+	if(hctl->totalAllocated[partition_idx] - hctl->nentries[partition_idx] == (GLOBAL_FREE_LIST_ALLOC_NUM+1))
+		hctl->freeListWouldBeTail[partition_idx] = element;
+}
 
 /************************** CREATE ROUTINES **********************/
 
@@ -282,6 +520,7 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
 {
 	HTAB	   *hashp;
 	HASHHDR    *hctl;
+	int 		i, nelem_alloc;
 
 	/*
 	 * For shared hash tables, we have a local hash header (HTAB struct) that
@@ -408,7 +647,7 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
 		if (!hashp->hctl)
 			ereport(ERROR,
 					(errcode(ERRCODE_OUT_OF_MEMORY),
-					 errmsg("out of memory")));
+					 errmsg("out of memory (3)"))); // AALEKSEEV: fix string
 	}
 
 	hashp->frozen = false;
@@ -422,6 +661,9 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
 		/* Doesn't make sense to partition a local hash table */
 		Assert(flags & HASH_SHARED_MEM);
 
+		// AALEKSEEV: comment. For storing links in globalFreeList
+		Assert(info->entrysize >= sizeof(FREELISTBATCH));
+
 		/*
 		 * The number of partitions had better be a power of 2. Also, it must
 		 * be less than INT_MAX (see init_htab()), so call the int version of
@@ -482,10 +724,32 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
 	if ((flags & HASH_SHARED_MEM) ||
 		nelem < hctl->nelem_alloc)
 	{
-		if (!element_alloc(hashp, (int) nelem))
-			ereport(ERROR,
-					(errcode(ERRCODE_OUT_OF_MEMORY),
-					 errmsg("out of memory")));
+		if(IS_PARTITIONED(hashp->hctl))
+		{
+			nelem_alloc = nelem / GLOBAL_FREE_LIST_PARTITIONS_NUM;
+			if(nelem_alloc == 0)
+				nelem_alloc = 1;
+
+			Assert(nelem_alloc > 0);
+			for(i = 0; i < GLOBAL_FREE_LIST_PARTITIONS_NUM; i++)
+			{
+				if (!element_alloc(hashp, nelem_alloc, i))
+					ereport(ERROR,
+							(errcode(ERRCODE_OUT_OF_MEMORY),
+							 errmsg("out of memory (1.1)"))); // AALEKSEEV: fix string
+				
+				global_free_list_init(hashp, i);
+				global_free_list_acquire(hashp, i);
+			}				
+		}
+		else
+		{
+			if (!element_alloc(hashp, nelem, 0))
+				ereport(ERROR,
+						(errcode(ERRCODE_OUT_OF_MEMORY),
+						 errmsg("out of memory (1.2)"))); // AALEKSEEV: fix string
+
+		}
 	}
 
 	if (flags & HASH_FIXED_SIZE)
@@ -503,8 +767,9 @@ hdefault(HTAB *hashp)
 
 	MemSet(hctl, 0, sizeof(HASHHDR));
 
-	hctl->nentries = 0;
-	hctl->freeList = NULL;
+	// AALEKSEEV: redundant!
+	// hctl->nentries = 0;
+	// hctl->freeList = NULL;
 
 	hctl->dsize = DEF_DIRSIZE;
 	hctl->nsegs = 0;
@@ -572,12 +837,14 @@ init_htab(HTAB *hashp, long nelem)
 	HASHSEGMENT *segp;
 	int			nbuckets;
 	int			nsegs;
+	int i;
 
 	/*
 	 * initialize mutex if it's a partitioned table
 	 */
 	if (IS_PARTITIONED(hctl))
-		SpinLockInit(&hctl->mutex);
+		for(i = 0; i < GLOBAL_FREE_LIST_PARTITIONS_NUM; i++)
+			SpinLockInit(&(hctl->mutex[i]));
 
 	/*
 	 * Divide number of elements by the fill factor to determine a desired
@@ -648,7 +915,8 @@ init_htab(HTAB *hashp, long nelem)
 			"HIGH MASK       ", hctl->high_mask,
 			"LOW  MASK       ", hctl->low_mask,
 			"NSEGS           ", hctl->nsegs,
-			"NENTRIES        ", hctl->nentries);
+			// AALEKSEEV: fix this
+			"NENTRIES        ", hctl->nentries[0]);
 #endif
 	return true;
 }
@@ -769,7 +1037,8 @@ hash_stats(const char *where, HTAB *hashp)
 			where, hashp->hctl->accesses, hashp->hctl->collisions);
 
 	fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
-			hashp->hctl->nentries, (long) hashp->hctl->keysize,
+		// AALEKSEEV: fix this
+			hashp->hctl->nentries[0], (long) hashp->hctl->keysize,
 			hashp->hctl->max_bucket, hashp->hctl->nsegs);
 	fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
 			where, hash_accesses, hash_collisions);
@@ -863,6 +1132,7 @@ hash_search_with_hash_value(HTAB *hashp,
 	HASHBUCKET	currBucket;
 	HASHBUCKET *prevBucketPtr;
 	HashCompareFunc match;
+	int partition_idx = PARTITION_IDX(hctl, hashvalue);
 
 #if HASH_STATISTICS
 	hash_accesses++;
@@ -885,7 +1155,7 @@ hash_search_with_hash_value(HTAB *hashp,
 		 * order of these tests is to try to check cheaper conditions first.
 		 */
 		if (!IS_PARTITIONED(hctl) && !hashp->frozen &&
-			hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor &&
+			hctl->nentries[0] / (long) (hctl->max_bucket + 1) >= hctl->ffactor &&
 			!has_seq_scans(hashp))
 			(void) expand_table(hashp);
 	}
@@ -942,21 +1212,21 @@ hash_search_with_hash_value(HTAB *hashp,
 			if (currBucket != NULL)
 			{
 				/* if partitioned, must lock to touch nentries and freeList */
-				if (IS_PARTITIONED(hctl))
-					SpinLockAcquire(&hctl->mutex);
-
-				Assert(hctl->nentries > 0);
-				hctl->nentries--;
+				// AALEKSEEV: remove this
+				// if (IS_PARTITIONED(hctl))
+					// SpinLockAcquire(&hctl->mutex);
 
+				Assert(hctl->nentries[partition_idx] > 0);
+				
 				/* remove record from hash bucket's chain. */
 				*prevBucketPtr = currBucket->link;
 
-				/* add the record to the freelist for this table.  */
-				currBucket->link = hctl->freeList;
-				hctl->freeList = currBucket;
+				free_list_put(hashp, currBucket, partition_idx);
+				global_free_list_try_release(hashp, partition_idx);
 
-				if (IS_PARTITIONED(hctl))
-					SpinLockRelease(&hctl->mutex);
+				// AALEKSEEV: remove this
+				// if (IS_PARTITIONED(hctl))
+					// SpinLockRelease(&hctl->mutex);
 
 				/*
 				 * better hope the caller is synchronizing access to this
@@ -982,7 +1252,7 @@ hash_search_with_hash_value(HTAB *hashp,
 				elog(ERROR, "cannot insert into frozen hashtable \"%s\"",
 					 hashp->tabname);
 
-			currBucket = get_hash_entry(hashp);
+			currBucket = get_hash_entry(hashp, partition_idx);
 			if (currBucket == NULL)
 			{
 				/* out of memory */
@@ -996,7 +1266,7 @@ hash_search_with_hash_value(HTAB *hashp,
 				else
 					ereport(ERROR,
 							(errcode(ERRCODE_OUT_OF_MEMORY),
-							 errmsg("out of memory")));
+							 errmsg("out of memory (2)"))); // AALEKSEEV: fix string
 			}
 
 			/* link into hashbucket chain */
@@ -1175,41 +1445,34 @@ hash_update_hash_key(HTAB *hashp,
  * create a new entry if possible
  */
 static HASHBUCKET
-get_hash_entry(HTAB *hashp)
+get_hash_entry(HTAB *hashp, int partition_idx)
 {
 	HASHHDR *hctl = hashp->hctl;
-	HASHBUCKET	newElement;
+	bool alloc_result;
 
 	for (;;)
 	{
 		/* if partitioned, must lock to touch nentries and freeList */
-		if (IS_PARTITIONED(hctl))
-			SpinLockAcquire(&hctl->mutex);
+		// if (IS_PARTITIONED(hctl))
+			// SpinLockAcquire(&hctl->mutex);
 
 		/* try to get an entry from the freelist */
-		newElement = hctl->freeList;
-		if (newElement != NULL)
+		if (hctl->freeList[partition_idx] != NULL)
 			break;
 
 		/* no free elements.  allocate another chunk of buckets */
-		if (IS_PARTITIONED(hctl))
-			SpinLockRelease(&hctl->mutex);
+		if(IS_PARTITIONED(hctl))
+			alloc_result = global_free_list_acquire(hashp, partition_idx);
+		else
+			alloc_result = element_alloc(hashp, hctl->nelem_alloc, 0);
 
-		if (!element_alloc(hashp, hctl->nelem_alloc))
-		{
-			/* out of memory */
-			return NULL;
-		}
+		if(!alloc_result)
+			return NULL; /* out of memory */
 	}
+	// if (IS_PARTITIONED(hctl))
+		// SpinLockRelease(&hctl->mutex);
 
-	/* remove entry from freelist, bump nentries */
-	hctl->freeList = newElement->link;
-	hctl->nentries++;
-
-	if (IS_PARTITIONED(hctl))
-		SpinLockRelease(&hctl->mutex);
-
-	return newElement;
+	return free_list_get(hashp, partition_idx);
 }
 
 /*
@@ -1218,11 +1481,21 @@ get_hash_entry(HTAB *hashp)
 long
 hash_get_num_entries(HTAB *hashp)
 {
+	int i;
+	long sum = hashp->hctl->nentries[0];
+	
 	/*
 	 * We currently don't bother with the mutex; it's only sensible to call
 	 * this function if you've got lock on all partitions of the table.
 	 */
-	return hashp->hctl->nentries;
+
+	if(!IS_PARTITIONED(hashp->hctl))
+		return sum;
+
+	for(i = 1; i < NUM_LOCK_PARTITIONS; i++)
+		sum += hashp->hctl->nentries[i];
+
+	return sum;
 }
 
 /*
@@ -1530,7 +1803,7 @@ seg_alloc(HTAB *hashp)
  * allocate some new elements and link them into the free list
  */
 static bool
-element_alloc(HTAB *hashp, int nelem)
+element_alloc(HTAB *hashp, int nelem, int glob_part_idx)
 {
 	HASHHDR *hctl = hashp->hctl;
 	Size		elementSize;
@@ -1562,15 +1835,23 @@ element_alloc(HTAB *hashp, int nelem)
 	}
 
 	/* if partitioned, must lock to touch freeList */
-	if (IS_PARTITIONED(hctl))
-		SpinLockAcquire(&hctl->mutex);
+	// if (IS_PARTITIONED(hctl))
+		// SpinLockAcquire(&hctl->mutex);
 
 	/* freelist could be nonempty if two backends did this concurrently */
-	firstElement->link = hctl->freeList;
-	hctl->freeList = prevElement;
+	if(IS_PARTITIONED(hctl))
+	{
+		firstElement->link = hctl->globalFreeList[glob_part_idx];
+		hctl->globalFreeList[glob_part_idx] = prevElement;
+	}
+	else
+	{
+		firstElement->link = hctl->freeList[0];
+		hctl->freeList[0] = prevElement;
+	}
 
-	if (IS_PARTITIONED(hctl))
-		SpinLockRelease(&hctl->mutex);
+	// if (IS_PARTITIONED(hctl))
+		// SpinLockRelease(&hctl->mutex);
 
 	return true;
 }
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index ff34529..1a5405d 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -128,13 +128,14 @@ extern char *MainLWLockNames[];
  * having this file include lock.h or bufmgr.h would be backwards.
  */
 
-/* Number of partitions of the shared buffer mapping hashtable */
-#define NUM_BUFFER_PARTITIONS  128
-
 /* Number of partitions the shared lock tables are divided into */
-#define LOG2_NUM_LOCK_PARTITIONS  4
+#define LOG2_NUM_LOCK_PARTITIONS  7 // 4 // AALEKSEEV: KEEP THIS! its a bottleneck for new dynahash.c
 #define NUM_LOCK_PARTITIONS  (1 << LOG2_NUM_LOCK_PARTITIONS)
 
+ /* Number of partitions of the shared buffer mapping hashtable */
+ // AALEKSEEV: refactor
+#define NUM_BUFFER_PARTITIONS NUM_LOCK_PARTITIONS
+
 /* Number of partitions the shared predicate lock tables are divided into */
 #define LOG2_NUM_PREDICATELOCK_PARTITIONS  4
 #define NUM_PREDICATELOCK_PARTITIONS  (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to