On Wed, Jan 10, 2024 at 9:05 AM Masahiko Sawada <sawada.m...@gmail.com> wrote:
>
> I've done in 0010 patch in v51 patch set.  Whereas RT_NODE_4 and
> RT_NODE_16 structs declaration needs RT_FANOUT_4_HI and
> RT_FANOUT_16_HI respectively, RT_FANOUT_16_LO and RT_FANOUT_48 need
> RT_NODE_16 and RT_NODE_48 structs declaration. So fanout declarations
> are now spread before and after RT_NODE_XXX struct declaration. It's a
> bit less readable, but I'm not sure of a better way.

They were before and after the *_BASE types, so it's not really worse,
I think. I did notice that RT_SLOT_IDX_LIMIT has been considered
special for a very long time, before we even had size classes, so it's
the same thing but even more far away. I have an idea to introduce
*_MAX macros, allowing to turn RT_SLOT_IDX_LIMIT into
RT_FANOUT_48_MAX, so that everything is in the same spot, and to make
this area more consistent. I also noticed that I'd been assuming that
RT_FANOUT_16_HI fits easily into a DSA size class, but that's only
true on 64-bit, and in any case we don't want to assume it. I've
attached an addendum .txt to demo this idea.
diff --git a/src/include/lib/radixtree.h b/src/include/lib/radixtree.h
index bde6916184..ffb0b58826 100644
--- a/src/include/lib/radixtree.h
+++ b/src/include/lib/radixtree.h
@@ -287,19 +287,6 @@ RT_SCOPE void RT_STATS(RT_RADIX_TREE * tree);
 /* Tree level the radix tree uses */
 #define RT_MAX_LEVEL   ((sizeof(uint64) * BITS_PER_BYTE) / RT_SPAN)
 
-/*
- * Number of bits necessary for isset array in node48.
- * Since bitmapword can be 64 bits, the only values that make sense
- * here are 64 and 128.
- * WIP: The paper uses at most 64 for this node kind. "isset" happens to fit
- * inside a single bitmapword on most platforms, so it's a good starting
- * point. We can make it higher if we need to.
- */
-#define RT_SLOT_IDX_LIMIT (RT_NODE_MAX_SLOTS / 4)
-
-/* Invalid index used in node48 */
-#define RT_INVALID_SLOT_IDX    0xFF
-
 /* Get a chunk from the key */
 #define RT_GET_KEY_CHUNK(key, shift) ((uint8) (((key) >> (shift)) & 
RT_CHUNK_MASK))
 
@@ -426,20 +413,29 @@ typedef union RT_NODE_PTR
 }                      RT_NODE_PTR;
 
 /*
- * fanout values for each size class.
- *
- * RT_FANOUT_4_HI and RT_FANOUT_16_HI are declared here as they are
+ * Symbols for maximum possible fanout are declared first as they are
  * required to declare each node kind. The declarations of other fanout
  * values are followed as they need the struct sizes of each node kind.
- *
- * TODO: consider 5 with subclass 1 or 2.
  */
 
 /* max possible key chunks without struct padding */
-#define RT_FANOUT_4_HI (8 - sizeof(RT_NODE))
+#define RT_FANOUT_4_MAX (8 - sizeof(RT_NODE))
 
 /* equal to two 128-bit SIMD registers, regardless of availability */
-#define RT_FANOUT_16_HI        32
+#define RT_FANOUT_16_MAX       32
+
+/*
+ * This also determines the number of bits necessary for the isset array,
+ * so we need to be mindful of the size of bitmapword.
+ * Since bitmapword can be 64 bits, the only values that make sense
+ * here are 64 and 128.
+ * WIP: The paper uses at most 64 for this node kind. "isset" happens to fit
+ * inside a single bitmapword on most platforms, so it's a good starting
+ * point. We can make it higher if we need to.
+ */
+#define RT_FANOUT_48_MAX (RT_NODE_MAX_SLOTS / 4)
+
+#define RT_FANOUT_256   RT_NODE_MAX_SLOTS
 
 /*
  * Node structs, one for each "kind"
@@ -448,7 +444,7 @@ typedef struct RT_NODE_4
 {
        RT_NODE         base;
 
-       uint8           chunks[RT_FANOUT_4_HI];
+       uint8           chunks[RT_FANOUT_4_MAX];
 
        /* number of children depends on size class */
        RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
@@ -458,7 +454,7 @@ typedef struct RT_NODE_16
 {
        RT_NODE          base;
 
-       uint8           chunks[RT_FANOUT_16_HI];
+       uint8           chunks[RT_FANOUT_16_MAX];
 
        /* number of children depends on size class */
        RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
@@ -476,8 +472,11 @@ typedef struct RT_NODE_48
        /* The index of slots for each fanout */
        uint8           slot_idxs[RT_NODE_MAX_SLOTS];
 
+/* Invalid index */
+#define RT_INVALID_SLOT_IDX    0xFF
+
        /* bitmap to track which slots are in use */
-       bitmapword      isset[RT_BM_IDX(RT_SLOT_IDX_LIMIT)];
+       bitmapword      isset[RT_BM_IDX(RT_FANOUT_48_MAX)];
 
        /* number of children depends on size class */
        RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER];
@@ -493,28 +492,29 @@ typedef struct RT_NODE_256
 {
        RT_NODE          base;
 
-       /*
-        * Zero is a valid value for embedded values, so we use a bitmap to 
track
-        * which slots are in use.
-        */
-       bitmapword      isset[RT_BM_IDX(RT_NODE_MAX_SLOTS)];
+       /* bitmap to track which slots are in use */
+       bitmapword      isset[RT_BM_IDX(RT_FANOUT_256)];
 
        /* Slots for 256 children */
-       RT_PTR_ALLOC children[RT_NODE_MAX_SLOTS];
+       RT_PTR_ALLOC children[RT_FANOUT_256];
 }                      RT_NODE_256;
 
-#define RT_FANOUT_4            4
-StaticAssertDecl(RT_FANOUT_4 <= RT_FANOUT_4_HI, "watch struct padding");
-
 #if defined(RT_SHMEM)
-/* make sure the node16 subclass and node48 fit neatly into a DSA size class */
+/*
+ * Make sure the all nodes (except for node256) fit neatly into a DSA size 
class.
+ * We assume the RT_FANOUT_4 is in the range where DSA size classes
+ * increment by 8 (as of PG17 up to 64 bytes), so we just hard
+ * code that one.
+ */
 
 #if SIZEOF_DSA_POINTER < 8
 #define RT_FANOUT_16_LO        ((96 - offsetof(RT_NODE_16, children)) / 
sizeof(RT_PTR_ALLOC))
-#define RT_FANOUT_48   ((512 - offsetof(RT_NODE_48, children)) / 
sizeof(RT_PTR_ALLOC))
+#define RT_FANOUT_16_HI        Min(RT_FANOUT_16_MAX, (160 - 
offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
+#define RT_FANOUT_48   Min(RT_FANOUT_48_MAX, (512 - offsetof(RT_NODE_48, 
children)) / sizeof(RT_PTR_ALLOC))
 #else
 #define RT_FANOUT_16_LO        ((160 - offsetof(RT_NODE_16, children)) / 
sizeof(RT_PTR_ALLOC))
-#define RT_FANOUT_48   ((768 - offsetof(RT_NODE_48, children)) / 
sizeof(RT_PTR_ALLOC))
+#define RT_FANOUT_16_HI        Min(RT_FANOUT_16_MAX, (320 - 
offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC))
+#define RT_FANOUT_48   Min(RT_FANOUT_48_MAX, (768 - offsetof(RT_NODE_48, 
children)) / sizeof(RT_PTR_ALLOC))
 #endif                                                 /* SIZEOF_DSA_POINTER < 
8 */
 
 #else                                                  /* ! RT_SHMEM */
@@ -522,14 +522,17 @@ StaticAssertDecl(RT_FANOUT_4 <= RT_FANOUT_4_HI, "watch 
struct padding");
 /* doesn't really matter, but may as well use the namesake */
 #define RT_FANOUT_16_LO        16
 /* use maximum possible */
-#define RT_FANOUT_48   RT_SLOT_IDX_LIMIT
+#define RT_FANOUT_16_HI RT_FANOUT_16_MAX
+#define RT_FANOUT_48   RT_FANOUT_48_MAX
 
 #endif                                                 /* RT_SHMEM */
 
-#define RT_FANOUT_256  256
+/* TODO: consider 5 with subclass 1 or 2. */
+#define RT_FANOUT_4            4
 
+StaticAssertDecl(RT_FANOUT_4 <= RT_FANOUT_4_MAX, "watch struct padding");
 StaticAssertDecl(RT_FANOUT_16_LO < RT_FANOUT_16_HI, "LO subclass bigger than 
HI");
-StaticAssertDecl(RT_FANOUT_48 <= RT_SLOT_IDX_LIMIT, "more slots than isset 
bits");
+StaticAssertDecl(RT_FANOUT_48 <= RT_FANOUT_48_MAX, "more slots than isset 
bits");
 
 /*
  * Node size classes
@@ -1332,7 +1335,7 @@ RT_ADD_CHILD_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * ref, 
RT_NODE_PTR node,
                                        inverse;
 
                /* get the first word with at least one bit not set */
-               for (int i = 0; i < RT_BM_IDX(RT_SLOT_IDX_LIMIT); i++)
+               for (int i = 0; i < RT_BM_IDX(RT_FANOUT_48_MAX); i++)
                {
                        w = n48->isset[i];
                        if (w < ~((bitmapword) 0))
@@ -2771,7 +2774,7 @@ RT_DUMP_NODE(RT_NODE * node)
                                }
 
                                fprintf(stderr, "isset-bitmap: ");
-                               for (int i = 0; i < (RT_SLOT_IDX_LIMIT / 
BITS_PER_BYTE); i++)
+                               for (int i = 0; i < (RT_FANOUT_48_MAX / 
BITS_PER_BYTE); i++)
                                {
                                        fprintf(stderr, "%s%x", sep, ((uint8 *) 
n48->isset)[i]);
                                        sep = " ";
@@ -2802,7 +2805,7 @@ RT_DUMP_NODE(RT_NODE * node)
                                char *sep = "";
 
                                fprintf(stderr, "isset-bitmap: ");
-                               for (int i = 0; i < (RT_SLOT_IDX_LIMIT / 
BITS_PER_BYTE); i++)
+                               for (int i = 0; i < (RT_FANOUT_256 / 
BITS_PER_BYTE); i++)
                                {
                                        fprintf(stderr, "%s%x", sep, ((uint8 *) 
n256->isset)[i]);
                                        sep = " ";
@@ -2860,7 +2863,6 @@ RT_DUMP_NODE(RT_NODE * node)
 #undef RT_NODE_MUST_GROW
 #undef RT_NODE_KIND_COUNT
 #undef RT_SIZE_CLASS_COUNT
-#undef RT_SLOT_IDX_LIMIT
 #undef RT_INVALID_SLOT_IDX
 #undef RT_SLAB_BLOCK_SIZE
 #undef RT_RADIX_TREE_MAGIC

Reply via email to