Here's the code, but I fail see to the point if the goal is compare
IntrusiveHashMap to some theoretical hash map rather than the actual
alternative hash maps that have been proposed. I thought your specific
recommendation was to use std::unordered_map instead, so that's what I
tested against. Also, wasn't your original objection to having to write and
maintain our own hash containers? But now we're going to write our own
"better designed" ones? I think this is the point at which I give up, where
I have to beat unspecified alternate implementations and meet unspecified
performance goals.
1. TEST_CASE("IntrusiveHashMapPerf", "[IntrusiveHashMap][performance]")
2. {
3. // Force these so I can easily change the set of tests.
4. auto start = std::chrono::high_resolution_clock::now();
5. auto delta = std::chrono::high_resolution_clock::now() - start;
6. constexpr int N_LOOPS = 1000;
7. constexpr int N = 1009; // prime > N_LOOPS
8.
9. std::vector<const char *> strings;
10.
11. std::uniform_int_distribution<short> char_gen {'a', 'z'};
12. std::uniform_int_distribution<short> length_gen { 20, 40 };
13. std::minstd_rand randu;
14.
15. Map ihm;
16. std::unordered_map<std::string, Thing> um;
17.
18. strings.reserve(N);
19. for ( int i = 0 ; i < N ; ++i ) {
20. auto len = length_gen(randu);
21. char *s = static_cast<char *>(malloc(len + 1));
22. for (decltype(len) j = 0; j < len; ++j) {
23. s[j] = char_gen(randu);
24. }
25. s[len] = 0;
26. strings.push_back(s);
27. }
28.
29. // Fill the IntrusiveHashMap
30. start = std::chrono::high_resolution_clock::now();
31. for (int i = 0; i < N ; ++i) {
32. auto thing = new Thing{strings[i], i};
33. ihm.insert(thing);
34. }
35. delta = std::chrono::high_resolution_clock::now() - start;
36. std::cout << "IHM populate " << delta.count() << "ns or " <<
std::chrono::duration_cast<std::chrono::milliseconds>(delta).count()
37. << "ms" << std::endl;
38.
39. // Do some lookups
40. start = std::chrono::high_resolution_clock::now();
41. for ( int i = 0 ; i < N_LOOPS ; ++i ) {
42. for (int j = 0, idx = i; j < N; ++j, idx = (idx + i) % N) {
43. ihm.find(strings[idx]);
44. }
45. }
46. delta = std::chrono::high_resolution_clock::now() - start;
47. std::cout << "IHM find " << delta.count() << "ns or " <<
std::chrono::duration_cast<std::chrono::milliseconds>(delta).count()
48. << "ms" << std::endl;
49.
50. // Fill the std::unordered_map
51. start = std::chrono::high_resolution_clock::now();
52. for (int i = 0; i < N ; ++i) {
53. um.emplace(strings[i], Thing{strings[i], i});
54. }
55. delta = std::chrono::high_resolution_clock::now() - start;
56. std::cout << "UM populate " << delta.count() << "ns or " <<
std::chrono::duration_cast<std::chrono::milliseconds>(delta).count()
57. << "ms" << std::endl;
58.
59. // Do some lookups
60. start = std::chrono::high_resolution_clock::now();
61. for ( int i = 0 ; i < N_LOOPS ; ++i ) {
62. for (int j = 0, idx = i; j < N; ++j, idx = (idx + i) % N) {
63. um.find(strings[idx]);
64. }
65. }
66. delta = std::chrono::high_resolution_clock::now() - start;
67. std::cout << "UM find " << delta.count() << "ns or " <<
std::chrono::duration_cast<std::chrono::milliseconds>(delta).count()
68. << "ms" << std::endl;
69.
70. // Test double indexing
71.
72. std::vector<Thing2> things;
73. things.reserve(N);
74. for ( int i = 0 ; i < N ; ++i ) {
75. things.emplace_back(strings[i], i);
76. }
77.
78. std::unordered_map<std::string, Thing2*> um_by_name;
79. std::unordered_map<int, Thing2*> um_by_n;
80.
81. IntrusiveHashMap<Thing2::LinkByName> ihm_by_name;
82. IntrusiveHashMap<Thing2::LinkByN> ihm_by_n;
83.
84. // Fill the IntrusiveHashMap
85. start = std::chrono::high_resolution_clock::now();
86. for (int i = 0; i < N ; ++i) {
87. ihm_by_name.insert(&things[i]);
88. ihm_by_n.insert(&things[i]);
89. }
90. delta = std::chrono::high_resolution_clock::now() - start;
91. std::cout << "IHM2 populate " << delta.count() << "ns or " <<
std::chrono::duration_cast<std::chrono::milliseconds>(delta).count()
92. << "ms" << std::endl;
93.
94. // Do some lookups
95. start = std::chrono::high_resolution_clock::now();
96. for ( int i = 0 ; i < N_LOOPS ; ++i ) {
97. for (int j = 0, idx = i; j < N; ++j, idx = (idx + i) % N) {
98. Thing2 * thing = ihm_by_n.find(idx);
99. ihm_by_name.iterator_for(thing);
100. }
101. }
102. delta = std::chrono::high_resolution_clock::now() - start;
103. std::cout << "IHM2 find " << delta.count() << "ns or " <<
std::chrono::duration_cast<std::chrono::milliseconds>(delta).count()
104. << "ms" << std::endl;
105.
106. // Fill the std::unordered_map
107. start = std::chrono::high_resolution_clock::now();
108. for (int i = 0; i < N ; ++i) {
109. um_by_n.emplace(things[i]._n, &things[i]);
110. um_by_name.emplace(things[i]._payload, &things[i]);
111. }
112. delta = std::chrono::high_resolution_clock::now() - start;
113. std::cout << "UM2 populate " << delta.count() << "ns or " <<
std::chrono::duration_cast<std::chrono::milliseconds>(delta).count()
114. << "ms" << std::endl;
115.
116. // Do some lookups
117. start = std::chrono::high_resolution_clock::now();
118. for ( int i = 0 ; i < N_LOOPS ; ++i ) {
119. for (int j = 0, idx = i; j < N; ++j, idx = (idx + i) % N) {
120. um_by_name.find(strings[idx]);
121. um_by_n.find(idx);
122. }
123. }
124. delta = std::chrono::high_resolution_clock::now() - start;
125. std::cout << "UM2 find " << delta.count() << "ns or " <<
std::chrono::duration_cast<std::chrono::milliseconds>(delta).count()
126. << "ms" << std::endl;
127.
128. };
<https://apaste.info/>
I think you'll find, though, that even the best designed hash map must
still compute a hash to do a lookup and that is going to take more
computation than IntrusiveHashMap::iterator_for. If you mean it would
slower than that but still fast enough to not be a performance issue,
perhaps you could elucidate what your threshold for "not a performance hit"
is.
On Fri, Aug 24, 2018 at 12:46 PM Bryan Call <[email protected]> wrote:
> I stated that a better designed hash table (not a chained hash table)
> would have much better performance and that a double find would not be a
> performance hit. The benchmark was done with two chained hash tables,
> which are known not to have great performance.
>
> Can you post the source code of the benchmark?
>
>