From 439ec9295568bfa35933c1595f799e4fc3ad0030 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Fri, 26 Jan 2024 11:20:23 +0900
Subject: [PATCH v1 2/4] Add functions for updating keys and removing nodes to
 binaryheap.

Previously, binaryheap didn't support key updates and removing nodes
in an efficient way. For example, in order to remove a node from the
binaryheap, the caller has to pass the node's position within the
array that the binaryheap internally has. This operation can be done
in O(log n) but searching for the key's position is O(n).

This commit adds a hash table to binaryheap to track of positions of
each nodes in the binaryheap. That way, by using newly added
functions, binaryheap_update_up, binaryheap_update_down and
binaryheap_remove_node_ptr, both updating a key and removing a
node can be done in O(1) in an average and O(log n) in worst
case. This is known as the indexed priority queue.

Author:
Reviewed-by:
Discussion: https://postgr.es/m/
Backpatch-through:
---
 src/common/binaryheap.c      | 134 ++++++++++++++++++++++++++++++++---
 src/include/lib/binaryheap.h |  28 ++++++++
 2 files changed, 153 insertions(+), 9 deletions(-)

diff --git a/src/common/binaryheap.c b/src/common/binaryheap.c
index bc43aca093..9122654e15 100644
--- a/src/common/binaryheap.c
+++ b/src/common/binaryheap.c
@@ -22,8 +22,28 @@
 #ifdef FRONTEND
 #include "common/logging.h"
 #endif
+#include "common/hashfn.h"
 #include "lib/binaryheap.h"
 
+/*
+ * Define parameters for hash table code generation. The interface is *also*"
+ * declared in binaryheaph.h (to generate the types, which are externally
+ * visible).
+ */
+#define SH_PREFIX bh_nodeidx
+#define SH_ELEMENT_TYPE bh_nodeidx_entry
+#define SH_KEY_TYPE bh_node_type
+#define SH_KEY key
+#define SH_HASH_KEY(tb, key) \
+	hash_bytes((const unsigned char *) &key, sizeof(bh_node_type))
+#define SH_EQUAL(tb, a, b) (memcmp(&a, &b, sizeof(bh_node_type)) == 0)
+#define SH_SCOPE extern
+#ifdef FRONTEND
+#define SH_RAW_ALLOCATOR pg_malloc0
+#endif
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
 static void sift_down(binaryheap *heap, int node_off);
 static void sift_up(binaryheap *heap, int node_off);
 
@@ -49,6 +69,12 @@ binaryheap_allocate(int capacity, binaryheap_comparator compare, void *arg)
 	heap->bh_has_heap_property = true;
 	heap->bh_nodes = (bh_node_type *) palloc(sizeof(bh_node_type) * capacity);
 
+#ifdef FRONTEND
+	heap->bh_nodeidx = bh_nodeidx_create(capacity, NULL);
+#else
+	heap->bh_nodeidx = bh_nodeidx_create(CurrentMemoryContext, capacity, NULL);
+#endif
+
 	return heap;
 }
 
@@ -63,6 +89,7 @@ binaryheap_reset(binaryheap *heap)
 {
 	heap->bh_size = 0;
 	heap->bh_has_heap_property = true;
+	bh_nodeidx_reset(heap->bh_nodeidx);
 }
 
 /*
@@ -73,6 +100,7 @@ binaryheap_reset(binaryheap *heap)
 void
 binaryheap_free(binaryheap *heap)
 {
+	bh_nodeidx_destroy(heap->bh_nodeidx);
 	pfree(heap);
 }
 
@@ -117,6 +145,40 @@ bh_enlarge_node_array(binaryheap *heap)
 							  sizeof(bh_node_type) * heap->bh_space);
 }
 
+/*
+ * Set the given node at the 'idx' and updates its position accordingly.
+ */
+static void
+bh_set_node(binaryheap *heap, bh_node_type d, int idx)
+{
+	bh_nodeidx_entry *ent;
+	bool	found;
+
+	/* Set the node to the nodes array */
+	heap->bh_nodes[idx] = d;
+
+	/* Remember its index in the nodes array */
+	ent = bh_nodeidx_insert(heap->bh_nodeidx, d, &found);
+	ent->idx = idx;
+}
+
+/*
+ * Replace the node at 'idx' with the given node 'replaced_by'. Also
+ * update their positions accordingly.
+ */
+static void
+bh_replace_node(binaryheap *heap, int idx, bh_node_type replaced_by)
+{
+	bh_node_type	node = heap->bh_nodes[idx];
+
+	/* Remove overwritten node's index */
+	(void) bh_nodeidx_delete(heap->bh_nodeidx, node);
+
+	/* Replace it with the given new node */
+	if (idx < heap->bh_size)
+		bh_set_node(heap, replaced_by, idx);
+}
+
 /*
  * binaryheap_add_unordered
  *
@@ -131,7 +193,7 @@ binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
 	bh_enlarge_node_array(heap);
 
 	heap->bh_has_heap_property = false;
-	heap->bh_nodes[heap->bh_size] = d;
+	bh_set_node(heap, d, heap->bh_size);
 	heap->bh_size++;
 }
 
@@ -162,7 +224,7 @@ binaryheap_add(binaryheap *heap, bh_node_type d)
 {
 	bh_enlarge_node_array(heap);
 
-	heap->bh_nodes[heap->bh_size] = d;
+	bh_set_node(heap, d, heap->bh_size);
 	heap->bh_size++;
 	sift_up(heap, heap->bh_size - 1);
 }
@@ -203,6 +265,7 @@ binaryheap_remove_first(binaryheap *heap)
 	if (heap->bh_size == 1)
 	{
 		heap->bh_size--;
+		bh_nodeidx_delete(heap->bh_nodeidx, result);
 		return result;
 	}
 
@@ -210,7 +273,7 @@ binaryheap_remove_first(binaryheap *heap)
 	 * Remove the last node, placing it in the vacated root entry, and sift
 	 * the new root node down to its correct position.
 	 */
-	heap->bh_nodes[0] = heap->bh_nodes[--heap->bh_size];
+	bh_replace_node(heap, 0, heap->bh_nodes[--heap->bh_size]);
 	sift_down(heap, 0);
 
 	return result;
@@ -236,7 +299,7 @@ binaryheap_remove_node(binaryheap *heap, int n)
 						   heap->bh_arg);
 
 	/* remove the last node, placing it in the vacated entry */
-	heap->bh_nodes[n] = heap->bh_nodes[heap->bh_size];
+	bh_replace_node(heap, n, heap->bh_nodes[heap->bh_size]);
 
 	/* sift as needed to preserve the heap property */
 	if (cmp > 0)
@@ -245,6 +308,59 @@ binaryheap_remove_node(binaryheap *heap, int n)
 		sift_down(heap, n);
 }
 
+/*
+ * binaryheap_remove_node_ptr
+ *
+ * Similar to binaryheap_remove_node() but removes the given node. The caller
+ * must ensure that the given node is in the heap. O(log n) worst case.
+ */
+void
+binaryheap_remove_node_ptr(binaryheap *heap, bh_node_type d)
+{
+	bh_nodeidx_entry *ent;
+
+	ent = bh_nodeidx_lookup(heap->bh_nodeidx, d);
+	Assert(ent);
+
+	binaryheap_remove_node(heap, ent->idx);
+}
+
+/*
+ * binaryheap_update_up
+ *
+ * Sift the given node up after the node's key is updated. The caller must
+ * ensure that the given node is in the heap. O(log n) worst case.
+ */
+void
+binaryheap_update_up(binaryheap *heap, bh_node_type d)
+{
+	bh_nodeidx_entry *ent;
+
+	ent = bh_nodeidx_lookup(heap->bh_nodeidx, d);
+	Assert(ent);
+	Assert(ent->idx >= 0 && ent->idx < heap->bh_size);
+
+	sift_up(heap, ent->idx);
+}
+
+/*
+ * binaryheap_update_down
+ *
+ * Sift the given node down after the node's key is updated. The caller must
+ * ensure that the given node is in the heap. O(log n) worst case.
+ */
+void
+binaryheap_update_down(binaryheap *heap, bh_node_type d)
+{
+	bh_nodeidx_entry *ent;
+
+	ent = bh_nodeidx_lookup(heap->bh_nodeidx, d);
+	Assert(ent);
+	Assert(ent->idx >= 0 && ent->idx < heap->bh_size);
+
+	sift_down(heap, ent->idx);
+}
+
 /*
  * binaryheap_replace_first
  *
@@ -257,7 +373,7 @@ binaryheap_replace_first(binaryheap *heap, bh_node_type d)
 {
 	Assert(!binaryheap_empty(heap) && heap->bh_has_heap_property);
 
-	heap->bh_nodes[0] = d;
+	bh_replace_node(heap, 0, d);
 
 	if (heap->bh_size > 1)
 		sift_down(heap, 0);
@@ -299,11 +415,11 @@ sift_up(binaryheap *heap, int node_off)
 		 * Otherwise, swap the parent value with the hole, and go on to check
 		 * the node's new parent.
 		 */
-		heap->bh_nodes[node_off] = parent_val;
+		bh_set_node(heap, parent_val, node_off);
 		node_off = parent_off;
 	}
 	/* Re-fill the hole */
-	heap->bh_nodes[node_off] = node_val;
+	bh_set_node(heap, node_val, node_off);
 }
 
 /*
@@ -358,9 +474,9 @@ sift_down(binaryheap *heap, int node_off)
 		 * Otherwise, swap the hole with the child that violates the heap
 		 * property; then go on to check its children.
 		 */
-		heap->bh_nodes[node_off] = heap->bh_nodes[swap_off];
+		bh_set_node(heap, heap->bh_nodes[swap_off], node_off);
 		node_off = swap_off;
 	}
 	/* Re-fill the hole */
-	heap->bh_nodes[node_off] = node_val;
+	bh_set_node(heap, node_val, node_off);
 }
diff --git a/src/include/lib/binaryheap.h b/src/include/lib/binaryheap.h
index 1439f20803..8847abf863 100644
--- a/src/include/lib/binaryheap.h
+++ b/src/include/lib/binaryheap.h
@@ -11,6 +11,8 @@
 #ifndef BINARYHEAP_H
 #define BINARYHEAP_H
 
+#include "utils/palloc.h"
+
 /*
  * We provide a Datum-based API for backend code and a void *-based API for
  * frontend code (since the Datum definitions are not available to frontend
@@ -29,6 +31,28 @@ typedef Datum bh_node_type;
  */
 typedef int (*binaryheap_comparator) (bh_node_type a, bh_node_type b, void *arg);
 
+/*
+ * Struct for A hash table element to store the node's index in the bh_nodes
+ * array.
+ */
+typedef struct bh_nodeidx_entry
+{
+	bh_node_type	key;
+	char			status;
+	int				idx;
+} bh_nodeidx_entry;
+
+/* define parameters necessary to generate the hash table interface */
+#define SH_PREFIX bh_nodeidx
+#define SH_ELEMENT_TYPE bh_nodeidx_entry
+#define SH_KEY_TYPE bh_node_type
+#define SH_SCOPE extern
+#ifdef FRONTEND
+#define SH_RAW_ALLOCATOR pg_malloc0
+#endif
+#define SH_DECLARE
+#include "lib/simplehash.h"
+
 /*
  * binaryheap
  *
@@ -47,6 +71,7 @@ typedef struct binaryheap
 	binaryheap_comparator bh_compare;
 	void	   *bh_arg;
 	bh_node_type *bh_nodes;
+	bh_nodeidx_hash	*bh_nodeidx;
 } binaryheap;
 
 extern binaryheap *binaryheap_allocate(int capacity,
@@ -60,7 +85,10 @@ extern void binaryheap_add(binaryheap *heap, bh_node_type d);
 extern bh_node_type binaryheap_first(binaryheap *heap);
 extern bh_node_type binaryheap_remove_first(binaryheap *heap);
 extern void binaryheap_remove_node(binaryheap *heap, int n);
+extern void binaryheap_remove_node_ptr(binaryheap *heap, bh_node_type d);
 extern void binaryheap_replace_first(binaryheap *heap, bh_node_type d);
+extern void binaryheap_update_up(binaryheap *heap, bh_node_type d);
+extern void binaryheap_update_down(binaryheap *heap, bh_node_type d);
 
 #define binaryheap_empty(h)			((h)->bh_size == 0)
 #define binaryheap_size(h)			((h)->bh_size)
-- 
2.39.3

