Hi all,
as an experiment and to encourage some discussion I prepared an
alternate implementation of TrieNode which uses a std::map instead of
an array to store a node's children.
The expected result is a worst case performance degradation on insert
and delete from O(N) to O(N log R) where N is the length of the
c-string being looked up, and R is the size of the alphabet (as R =
256, we're talking about 8x worse).
The expected benefit is a noticeable reduction in terms of memory use,
especially for sparse key-spaces; it'd be useful e.g. in some lookup
cases.
Comments?
--
Francesco
=== modified file 'lib/libTrie/Trie.cc'
--- lib/libTrie/Trie.cc 2014-06-03 10:49:08 +0000
+++ lib/libTrie/Trie.cc 2014-06-03 14:16:21 +0000
@@ -48,7 +48,7 @@
return head->add(aString, theLength, privatedata, transform);
}
- head = new TrieNode;
+ head = new node_type;
return head->add(aString, theLength, privatedata, transform);
}
=== modified file 'lib/libTrie/Trie.h'
--- lib/libTrie/Trie.h 2014-06-03 10:49:08 +0000
+++ lib/libTrie/Trie.h 2014-06-03 14:20:17 +0000
@@ -58,7 +58,8 @@
bool add(char const *, size_t, void *);
private:
- TrieNode *head;
+ typedef MapTrieNode node_type;
+ node_type *head;
/* transfor each 8 bits in the element */
TrieCharTransform *transform;
=== modified file 'lib/libTrie/TrieNode.cc'
--- lib/libTrie/TrieNode.cc 2014-06-03 11:59:56 +0000
+++ lib/libTrie/TrieNode.cc 2014-06-03 13:09:24 +0000
@@ -24,13 +24,13 @@
#include <unistd.h>
#endif
-TrieNode::TrieNode() : _privateData(NULL)
+ArrayTrieNode::ArrayTrieNode() : _privateData(NULL)
{
for (int i = 0; i < 256; ++i)
internal[i] = NULL;
}
-TrieNode::~TrieNode()
+ArrayTrieNode::~ArrayTrieNode()
{
for (int i = 0; i < 256; ++i)
delete internal[i];
@@ -38,7 +38,7 @@
/* as for find */
bool
-TrieNode::add(char const *aString, size_t theLength, void *privatedata, TrieCharTransform *transform)
+ArrayTrieNode::add(char const *aString, size_t theLength, void *privatedata, TrieCharTransform *transform)
{
/* We trust that privatedata and existant keys have already been checked */
@@ -46,7 +46,7 @@
int index = transform ? (*transform)(*aString): *aString;
if (!internal[index])
- internal[index] = new TrieNode;
+ internal[index] = new ArrayTrieNode;
return internal[index]->add(aString + 1, theLength - 1, privatedata, transform);
} else {
=== modified file 'lib/libTrie/TrieNode.h'
--- lib/libTrie/TrieNode.h 2014-06-03 10:49:08 +0000
+++ lib/libTrie/TrieNode.h 2014-06-03 14:12:01 +0000
@@ -29,14 +29,14 @@
* i.e. M-ary internal node sizes etc
*/
-class TrieNode
+class ArrayTrieNode
{
public:
- TrieNode();
- ~TrieNode();
- TrieNode(TrieNode const &);
- TrieNode &operator= (TrieNode const &);
+ ArrayTrieNode();
+ ~ArrayTrieNode();
+ ArrayTrieNode(ArrayTrieNode const &);
+ ArrayTrieNode &operator= (ArrayTrieNode const &);
/* Find a string.
* If found, return the private data.
@@ -58,7 +58,7 @@
* internal[0] is the terminal node for
* a string and may not be used
*/
- TrieNode * internal[256];
+ ArrayTrieNode * internal[256];
/* If a string ends here, non NULL */
void *_privateData;
@@ -66,7 +66,7 @@
/* recursive. TODO? make iterative */
void *
-TrieNode::find (char const *aString, size_t theLength, TrieCharTransform *transform, bool const prefix) const
+ArrayTrieNode::find (char const *aString, size_t theLength, TrieCharTransform *transform, bool const prefix) const
{
if (theLength) {
int index = -1;
@@ -92,4 +92,79 @@
return _privateData;
}
}
+
+#include <map>
+class MapTrieNode {
+public:
+ MapTrieNode() : _privateData(NULL) {}
+ inline ~MapTrieNode();
+ MapTrieNode(MapTrieNode const &);
+ MapTrieNode &operator=(MapTrieNode const &);
+
+ inline void *find(char const *, size_t, TrieCharTransform *, bool const prefix) const;
+ inline bool add (char const *, size_t, void *, TrieCharTransform *);
+
+private:
+ typedef std::map<unsigned char, MapTrieNode *> internalmap;
+ internalmap internal;
+ void *_privateData;
+};
+
+MapTrieNode::~MapTrieNode()
+{
+ using std::map;
+ for (internalmap::iterator i =internal.begin(); i != internal.end(); ++i)
+ if (i->second)
+ delete i->second;
+}
+
+bool
+MapTrieNode::add(char const *aString, size_t theLength, void *privatedata, TrieCharTransform *transform)
+{
+ if (theLength) {
+ int index = transform ? (*transform)(*aString): *aString;
+ const internalmap::iterator i = internal.find(index);
+
+ if (i == internal.end())
+ internal[index] = new MapTrieNode;
+
+ return internal[index]->add(aString + 1, theLength - 1, privatedata, transform);
+ } else {
+ /* terminal node */
+
+ if (_privateData)
+ return false;
+
+ _privateData = privatedata;
+
+ return true;
+ }
+}
+
+void *
+MapTrieNode::find (char const *aString, size_t theLength, TrieCharTransform *transform, bool const prefix) const
+{
+ if (theLength) {
+ unsigned char pos = transform ? (*transform) (*aString) : *aString;
+
+ internalmap::const_iterator i = internal.find(pos);
+ if (i != internal.end()) {
+ void *result;
+ result = i->second->find(aString + 1, theLength - 1, transform, prefix);
+
+ if (result)
+ return result;
+ }
+
+ if (prefix)
+ return _privateData;
+
+ return NULL;
+ } else {
+ /* terminal node */
+ return _privateData;
+ }
+
+}
+
#endif /* LIBTRIE_TRIENODE_H */