Reviewers: dak, Message: On 2020/01/24 13:29:34, dak wrote: > Not sure about the speedup (we might have small sizes often enough that the > constant factor becomes relevant), but the code is quite a net win with regard > to obviously doing what it should do. It probably takes a whole lot more > allocation calls, but at least the impact on memory is just temporary. > > LGTM
I was hoping the standard hashmap would use probing, but looks like it's chaining If the allocation cost becomes problematic, we can use another hashmap instead. Description: Simplify and speed up uniquify Previously we sorted the array twice. Instead, we use a hash set. This makes the procedure O(N) rather than O(N log N). Please review this at https://codereview.appspot.com/583390043/ Affected files (+12, -42 lines): M lily/grob.cc Index: lily/grob.cc diff --git a/lily/grob.cc b/lily/grob.cc index e1582583c095f9a25dcdfd19f41708115e0c7734..13f0edf4f21789694a85d5855c0a99b0f17d2259 100644 --- a/lily/grob.cc +++ b/lily/grob.cc @@ -21,6 +21,7 @@ #include <cstring> #include <set> +#include <unordered_set> #include "align-interface.hh" #include "axis-group-interface.hh" @@ -963,50 +964,19 @@ Grob::check_cross_staff (Grob *commony) return false; } -static -bool -indirect_less (Grob **a, Grob **b) -{ - // Use original order as tie breaker. That gives us a stable sort - // at the lower price tag of an unstable one, and we want a stable - // sort in order to reliably retain the first instance of a grob - // pointer. - return *a < *b || (*a == *b && a < b); -} - -static -bool -indirect_eq (Grob **a, Grob **b) -{ - return *a == *b; -} - -static -bool -direct_less (Grob **a, Grob **b) -{ - return a < b; -} - -// uniquify uniquifies on the memory addresses of the Grobs, but then -// uses the original order. This makes results independent from the -// memory allocation of Grobs. - void uniquify (vector <Grob *> & grobs) { - vector <Grob **> vec (grobs.size ()); + std::unordered_set<Grob*> seen(grobs.size()); + int j = 0; for (vsize i = 0; i < grobs.size (); i++) - vec[i] = &grobs[i]; - vector_sort (vec, indirect_less); - vec.erase (unique (vec.begin (), vec.end (), indirect_eq), vec.end ()); - vector_sort (vec, direct_less); - - // Since the output is a sorted copy of the input with some elements - // removed, we can fill in the vector in-place if we do it starting - // from the front. - for (vsize i = 0; i < vec.size (); i++) - grobs[i] = *vec[i]; - grobs.erase (grobs.begin () + vec.size (), grobs.end ()); - return; + { + if (seen.find(grobs[i]) == seen.end()) + { + seen.insert(grobs[i]); + grobs[j++] = grobs[i]; + } + } + + grobs.resize(j); }