Implement a latched RB-tree in order to get RCU style lookups.

Cc: Michel Lespinasse <wal...@google.com>
Cc: Andrea Arcangeli <aarca...@redhat.com>
Cc: David Woodhouse <david.woodho...@intel.com>
Cc: Rik van Riel <r...@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoy...@efficios.com>
Cc: "Paul E. McKenney" <paul...@linux.vnet.ibm.com>
Cc: Oleg Nesterov <o...@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
 include/linux/rbtree_latch.h |  140 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 140 insertions(+)

--- /dev/null
+++ b/include/linux/rbtree_latch.h
@@ -0,0 +1,140 @@
+/*
+ * Latched RB-trees
+ *
+ * Copyright (C) 2015 Intel Corp., Peter Zijlstra <pet...@infradead.org>
+ */
+
+#ifndef RB_TREE_LATCH_H
+#define RB_TREE_LATCH_H
+
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+
+/*
+ * Since RB-trees have non atomic modifications they're not suited for
+ * RCU/lockless queries.
+ *
+ * Employ the latch technique -- see @raw_write_seqcount_latch -- to implement
+ * a latched RB-tree which does allow this by virtue of always having (at
+ * least) one stable copy of the tree.
+ *
+ * However, while we have the guarantee that there is at all times one stable
+ * copy, this does not guarantee an iteration will not observe modifications.
+ * What might have been a stable copy at the start of the iteration, need not
+ * remain so for the duration of the iteration.
+ *
+ * Therefore, this does require a lockless RB-tree iteration to be non-fatal in
+ * all circumstances; see the comment in lib/rbtree.c.
+ */
+
+struct latch_tree_node {
+       void            *priv;
+       struct rb_node  node;
+};
+
+struct latch_tree_nodes {
+       struct latch_tree_node node[2];
+};
+
+struct latch_tree_root {
+       seqcount_t      seq;
+       struct rb_root  tree[2];
+};
+
+struct latch_tree_ops {
+       bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b);
+       int  (*comp)(void *key,                 struct latch_tree_node *b);
+};
+
+static __always_inline void
+__lt_insert(struct latch_tree_node *ltn, struct rb_root *root,
+           bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b))
+{
+       struct rb_node **link = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct latch_tree_node *ltp;
+
+       while (*link) {
+               parent = *link;
+               ltp = container_of(parent, struct latch_tree_node, node);
+
+               if (less(ltn, ltp))
+                       link = &parent->rb_left;
+               else
+                       link = &parent->rb_right;
+       }
+
+       rb_link_node_rcu(&ltn->node, parent, link);
+       rb_insert_color(&ltn->node, root);
+}
+
+static __always_inline void
+__lt_erase(struct latch_tree_node *ltn, struct rb_root *root)
+{
+       rb_erase(&ltn->node, root);
+}
+
+static __always_inline struct latch_tree_node *
+__lt_find(void *key, struct rb_root *root,
+         int (*comp)(void *key, struct latch_tree_node *ltn))
+{
+       struct rb_node *n = rcu_dereference_raw(root->rb_node);
+       struct latch_tree_node *ltn;
+       int c;
+
+       while (n) {
+               ltn = container_of(n, struct latch_tree_node, node);
+               c = comp(key, ltn);
+
+               if (c < 0)
+                       n = rcu_dereference_raw(n->rb_left);
+               else if (c > 0)
+                       n = rcu_dereference_raw(n->rb_right);
+               else
+                       return ltn;
+       }
+
+       return NULL;
+}
+
+static __always_inline void
+latch_tree_insert(struct latch_tree_nodes *nodes,
+                 struct latch_tree_root *root,
+                 void *priv,
+                 const struct latch_tree_ops *ops)
+{
+       nodes->node[0].priv = nodes->node[1].priv = priv;
+
+       raw_write_seqcount_latch(&root->seq);
+       __lt_insert(&nodes->node[0], &root->tree[0], ops->less);
+       raw_write_seqcount_latch(&root->seq);
+       __lt_insert(&nodes->node[1], &root->tree[1], ops->less);
+}
+
+static __always_inline void
+latch_tree_erase(struct latch_tree_nodes *nodes,
+                struct latch_tree_root *root,
+                const struct latch_tree_ops *ops)
+{
+       raw_write_seqcount_latch(&root->seq);
+       __lt_erase(&nodes->node[0], &root->tree[0]);
+       raw_write_seqcount_latch(&root->seq);
+       __lt_erase(&nodes->node[1], &root->tree[1]);
+}
+
+static __always_inline struct latch_tree_node *
+latch_tree_find(void *key, struct latch_tree_root *root,
+               const struct latch_tree_ops *ops)
+{
+       struct latch_tree_node *node;
+       unsigned int seq;
+
+       do {
+               seq = raw_read_seqcount(&root->seq);
+               node = __lt_find(key, &root->tree[seq & 1], ops->comp);
+       } while (read_seqcount_retry(&root->seq, seq));
+
+       return node;
+}
+
+#endif /* RB_TREE_LATCH_H */


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to