Instead of a linear search over common_list to check whether
a path is common, use a trie.  The trie search operates on
path prefixes, and handles excludes.

Signed-off-by: David Turner <dtur...@twopensource.com>
---

Probably overkill, but maybe we could later use it for making exclude
or sparse-checkout matching faster (or maybe we have to go all the way
to McNaughton-Yamada for that to be truly worthwhile).

---
 path.c | 215 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 201 insertions(+), 14 deletions(-)

diff --git a/path.c b/path.c
index 236f797..21a4ce7 100644
--- a/path.c
+++ b/path.c
@@ -121,25 +121,212 @@ struct common_dir common_list[] = {
        { NULL, 0, 0, 0 }
 };
 
-static void update_common_dir(struct strbuf *buf, int git_dir_len)
+/*
+ * A compressed trie.  A trie node consists of zero or more characters that
+ * are common to all elements with this prefix, optionally followed by some
+ * children.  If value is not NULL, the trie node is a terminal node.
+ *
+ * For example, consider the following set of strings:
+ * abc
+ * def
+ * definite
+ * definition
+ *
+ * The trie would look look like:
+ * root: len = 0, value = (something), children a and d non-NULL.
+ *    a: len = 2, contents = bc
+ *    d: len = 2, contents = ef, children i non-NULL, value = (something)
+ *       i: len = 3, contents = nit, children e and i non-NULL, value = NULL
+ *           e: len = 0, children all NULL, value = (something)
+ *           i: len = 2, contents = on, children all NULL, value = (something)
+ */
+struct trie {
+       struct trie *children[256];
+       int len;
+       char *contents;
+       void *value;
+};
+
+static struct trie *make_trie_node(const char *key, void *value)
 {
-       char *base = buf->buf + git_dir_len;
-       const struct common_dir *p;
+       struct trie *new_node = xcalloc(1, sizeof(*new_node));
+       new_node->len = strlen(key);
+       if (new_node->len) {
+               new_node->contents = xmalloc(new_node->len);
+               memcpy(new_node->contents, key, new_node->len);
+       }
+       new_node->value = value;
+       return new_node;
+}
 
-       if (is_dir_file(base, "logs", "HEAD") ||
-           is_dir_file(base, "info", "sparse-checkout"))
-               return; /* keep this in $GIT_DIR */
-       for (p = common_list; p->dirname; p++) {
-               const char *path = p->dirname;
-               if (p->is_dir && dir_prefix(base, path)) {
-                       replace_dir(buf, git_dir_len, get_git_common_dir());
-                       return;
+/*
+ * Add a key/value pair to a trie.  The key is assumed to be \0-terminated.
+ * If there was an existing value for this key, return it.
+ */
+static void *add_to_trie(struct trie *root, const char *key, void *value)
+{
+       struct trie *child;
+       void *old;
+       int i;
+
+       if (!*key) {
+               /* we have reached the end of the key */
+               old = root->value;
+               root->value = value;
+               return old;
+       }
+
+       for (i = 0; i < root->len; ++i) {
+               if (root->contents[i] == key[i])
+                       continue;
+
+               /*
+                * Split this node: child will contain this node's
+                * existing children.
+                */
+               child = malloc(sizeof(*child));
+               memcpy(child->children, root->children, sizeof(root->children));
+
+               child->len = root->len - i - 1;
+               if (child->len) {
+                       child->contents = strndup(root->contents + i + 1,
+                                                  child->len);
                }
-               if (!p->is_dir && !strcmp(base, path)) {
-                       replace_dir(buf, git_dir_len, get_git_common_dir());
-                       return;
+               child->value = root->value;
+               root->value = NULL;
+               root->len = i;
+
+               memset(root->children, 0, sizeof(root->children));
+               root->children[(unsigned char)root->contents[i]] = child;
+
+               /* This is the newly-added child. */
+               root->children[(unsigned char)key[i]] =
+                       make_trie_node(key + i + 1, value);
+               return NULL;
+       }
+
+       /* We have matched the entire compressed section */
+       if (key[i]) {
+               child = root->children[(unsigned char)key[root->len]];
+               if (child) {
+                       return add_to_trie(child, key + root->len + 1, value);
+               } else {
+                       child = make_trie_node(key + root->len + 1, value);
+                       root->children[(unsigned char)key[root->len]] = child;
+                       return NULL;
                }
        }
+
+       old = root->value;
+       root->value = value;
+       return old;
+}
+
+typedef int (*match_fn)(const char *unmatched, void *data, void *baton);
+
+/*
+ * Search a trie for some key.  Find the longest /-or-\0-terminated
+ * prefix of the key for which the trie contains a value.  Call fn
+ * with the unmatched portion of the key and the found value, and
+ * return its return value.  If there is no such prefix, return -1.
+ *
+ * For example, consider the trie containing only [refs,
+ * refs/worktree] (both with values).
+ *
+ * | key             | unmatched  | val from node | return value |
+ * |-----------------|------------|---------------|--------------|
+ * | a               | not called | n/a           | -1           |
+ * | refs            | \0         | refs          | as per fn    |
+ * | refs/           | /          | refs          | as per fn    |
+ * | refs/w          | /w         | refs          | as per fn    |
+ * | refs/worktree   | \0         | refs/worktree | as per fn    |
+ * | refs/worktree/  | /          | refs/worktree | as per fn    |
+ * | refs/worktree/a | /a         | refs/worktree | as per fn    |
+ * |-----------------|------------|---------------|--------------|
+ *
+ */
+static int trie_find(struct trie *root, const char *key, match_fn fn,
+                    void *baton)
+{
+       int i;
+       int result;
+       struct trie *child;
+
+       if (!*key) {
+               /* we have reached the end of the key */
+               if (root->value && !root->len)
+                       return fn(key, root->value, baton);
+               else
+                       return -1;
+       }
+
+       for (i = 0; i < root->len; ++i) {
+               if (root->contents[i] != key[i])
+                       return -1;
+       }
+
+       /* Matched the entire compressed section */
+       key += i;
+       if (!*key)
+               /* End of key */
+               return fn(key, root->value, baton);
+
+       child = root->children[(unsigned char)*key];
+       if (child)
+               result = trie_find(child, key + 1, fn, baton);
+       else
+               result = -1;
+
+       if (result >= 0 || (*key != '/' && *key != 0))
+               return result;
+       if (root->value)
+               return fn(key, root->value, baton);
+       else
+               return -1;
+}
+
+static struct trie common_trie;
+static int common_trie_done_setup;
+
+static void init_common_trie(void)
+{
+       struct common_dir *p;
+
+       if (common_trie_done_setup)
+               return;
+
+       for (p = common_list; p->dirname; p++)
+               add_to_trie(&common_trie, p->dirname, p);
+
+       common_trie_done_setup = 1;
+}
+
+/*
+ * Helper function for update_common_dir: returns 1 if the dir
+ * prefix is common.
+ */
+static int check_common(const char *unmatched, void *value, void *baton)
+{
+       struct common_dir *dir = value;
+
+       if (!dir)
+               return 0;
+
+       if (dir->is_dir && (unmatched[0] == 0 || unmatched[0] == '/'))
+               return !dir->exclude;
+
+       if (!dir->is_dir && unmatched[0] == 0)
+               return !dir->exclude;
+
+       return 0;
+}
+
+static void update_common_dir(struct strbuf *buf, int git_dir_len)
+{
+       char *base = buf->buf + git_dir_len;
+       init_common_trie();
+       if (trie_find(&common_trie, base, check_common, NULL) > 0)
+               replace_dir(buf, git_dir_len, get_git_common_dir());
 }
 
 void report_linked_checkout_garbage(void)
-- 
2.0.4.315.gad8727a-twtrsrc

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to