[PATCH] D36423: [libc++] Introsort based sorting function
DIVYA added a comment. ping https://reviews.llvm.org/D36423 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D36423: [libc++] Introsort based sorting function
DIVYA updated this revision to Diff 111998. DIVYA added a comment. - added test qsort_worst_uint32 in algorithm.bench.cpp https://reviews.llvm.org/D36423 Files: benchmarks/GenerateInput.hpp benchmarks/algorithms.bench.cpp include/algorithm Index: include/algorithm === --- include/algorithm +++ include/algorithm @@ -642,6 +642,7 @@ #include // needed to provide swap_ranges. #include #include +#include #include #if defined(__IBMCPP__) @@ -3994,7 +3995,14 @@ template void -__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +__partial_sort(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, +_Compare); + +// Using introsort algorithm for sorting +template +void +__intro_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + typename iterator_traits<_RandomAccessIterator>::difference_type __depth_limit) { // _Compare is known to be a reference type typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; @@ -4029,6 +4037,12 @@ _VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp); return; } +if (__depth_limit == 0) +{ +__partial_sort<_Compare>(__first,__last,__last,__comp); +return; +} + // __len > 5 _RandomAccessIterator __m = __first; _RandomAccessIterator __lm1 = __last; @@ -4172,19 +4186,33 @@ // sort smaller range with recursive call and larger with tail recursion elimination if (__i - __first < __last - __i) { -_VSTD::__sort<_Compare>(__first, __i, __comp); -// _VSTD::__sort<_Compare>(__i+1, __last, __comp); +_VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit); +// _VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, __depth_limit); __first = ++__i; } else { -_VSTD::__sort<_Compare>(__i+1, __last, __comp); -// _VSTD::__sort<_Compare>(__first, __i, __comp); +_VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, __depth_limit); +// _VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit); __last = __i; } +--__depth_limit; } } +template +void +__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +{ + + // Threshold(or depth limit) for introsort is taken to be 2*log2(size) + typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; + const difference_type __dp = __last - __first; + difference_type __depth_limit = __last == __first ? 0 : _VSTD::log2(__dp); + __depth_limit *= 2; + __intro_sort<_Compare>(__first, __last, __comp, __depth_limit); +} + // This forwarder keeps the top call and the recursive calls using the same instantiation, forcing a reference _Compare template inline _LIBCPP_INLINE_VISIBILITY Index: benchmarks/algorithms.bench.cpp === --- benchmarks/algorithms.bench.cpp +++ benchmarks/algorithms.bench.cpp @@ -5,7 +5,7 @@ #include "benchmark/benchmark_api.h" #include "GenerateInput.hpp" -constexpr std::size_t TestNumInputs = 1024; +constexpr std::size_t TestNumInputs = 1024*64; template void BM_Sort(benchmark::State& st, GenInputs gen) { @@ -58,5 +58,8 @@ BENCHMARK_CAPTURE(BM_Sort, single_element_strings, getDuplicateStringInputs)->Arg(TestNumInputs); +BENCHMARK_CAPTURE(BM_Sort, qsort_worst_uint32, +getQSortKiller)->Arg(TestNumInputs); + BENCHMARK_MAIN() Index: benchmarks/GenerateInput.hpp === --- benchmarks/GenerateInput.hpp +++ benchmarks/GenerateInput.hpp @@ -138,5 +138,40 @@ return cinputs; } +template +inline std::vector make_killer(size_t N) { +std::vector inputs; +uint32_t candidate = 0; +uint32_t num_solid = 0; +uint32_t gas = N - 1; + +std::vector tmp(N); +inputs.resize(N); + +for (T i = 0; i < N; ++i) { +tmp[i] = i; +inputs[i] = gas; +} + +std::sort(tmp.begin(), tmp.end(), [&](T x, T y) { +if (inputs[x] == gas && inputs[y] == gas) { +if (x == candidate) inputs[x] = num_solid++; +else inputs[y] = num_solid++; +} + +if (inputs[x] == gas) candidate = x; +else if (inputs[y] == gas) candidate = y; + +return inputs[x] < inputs[y]; +}); +return inputs; +} + + +template +inline std::vector getQSortKiller(size_t N){ +std::vector inputs = make_killer(N); +return inputs; +} #endif // BENCHMARK_GENERATE_INPUT_HPP ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-c
[PATCH] D36423: [libc++] Introsort based sorting function
DIVYA added a comment. Added benchmark from Aditya's repo into the libcxx benchmark code base https://reviews.llvm.org/D36423 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D36423: [libc++] Introsort based sorting function
DIVYA updated this revision to Diff 111027. https://reviews.llvm.org/D36423 Files: benchmarks/algorithms.bench.cpp include/algorithm include/rng_utils.h include/test_configs.h include/test_utils.h Index: include/test_utils.h === --- /dev/null +++ include/test_utils.h @@ -0,0 +1,270 @@ +#ifndef TEST_UTILS_H +#define TEST_UTILS_H + +#include "rng_utils.h" +#include +#include + +// TODO: Add more aggregates. +struct aggregate { + int first; + int second; + int third; + int fourth; + aggregate() : first(0), second(0), third(0), fourth(0) + {} + // This is a hacky constructor for ::find on associative containers to work. + aggregate(int i) +: first(i), second(i), third(i), fourth(i) + {} + aggregate(int i, int j, int k, int l) +: first(i), second(j), third(k), fourth(l) + {} + + aggregate& operator++() { +++first; +++second; +++third; +++fourth; +return *this; + } + aggregate operator++(int) { +aggregate N(*this); +++(*this); +return N; + } + + bool operator<(const aggregate& i) const { +return first < i.first; + } + + bool operator>(const aggregate& i) const { +return i < *this; + } + + bool operator==(const aggregate& i) const { +return first == i.first; + } + + bool operator!=(const aggregate& i) const { +return !(*this == i); + } +}; + +// Hasher for aggregate data type. +namespace std { + template <> + struct hash + { +std::size_t operator()(const aggregate& k) const +{ + using std::hash; + // Hash and combine using bit-shift. + return ((hash()(k.first) + ^ (hash()(k.second) << 1)) >> 1) + ^ (hash()(k.third) << 1) + ^ (hash()(k.fourth) << 1); +} + }; +} + +template +struct remove_const { typedef T type; }; + +// value_type of a std::map is std::pair +template +struct remove_const> { typedef std::pair type; }; + +template +T get_rand(random_device &r, int max) { + return r.get_rand(T(0), T(max)); +} + +template<> +std::pair get_rand>(random_device &r, int max) { + return std::make_pair(r.get_rand(0, max), r.get_rand(0, max)); +} + +template<> +aggregate get_rand(random_device &r, int max) { + return aggregate(r.get_rand(0, max)); +} + +template<> +std::pair +get_rand>(random_device &r, int max) { + return std::make_pair(r.get_rand(0, max), r.get_rand(0, max)); +} + +template +T increment(T &i) { + return ++i; +} + +// value_type of a std::map is std::pair +template<> +std::pair increment>(std::pair &i) { + return std::make_pair(++i.first, i.second); +} + +template<> +std::pair +increment>(std::pair &i) { + return std::make_pair(++i.first, i.second); +} + +template +T init() { + return T(0); +} + +template<> +std::pair init>() { + return std::make_pair(0, 0); +} + +template<> +std::pair init>() { + return std::make_pair(0, 0); +} + +template class Container, class value_type> +void fill_random(Container> &v, +int max = RAND_MAX) { + random_device r; + for (auto &e : v) +e = get_rand(r, max); +} + +template +void fill_random(T begin, T end, int max = RAND_MAX) { + typedef typename std::iterator_traits::value_type value_type; + random_device r; + for (auto it = begin; it != end; ++it) +*it = get_rand(r, max); +} + +// It can work with char* or std::string. +template +void fill_random_chars(T begin, T end, bool upper) { + char max = upper ? 'Z' : 'z'; + char min = upper ? 'A' : 'a'; + auto it = begin; + typedef typename std::iterator_traits::value_type value_type; + random_device r; + for (; it != end -1; ++it) { +*it = get_rand(r, max) * (max - min) + min; +assert(*it >= min); +assert(*it <= max); + } + *it = '\0'; +} + +// TODO: Create a template class such that all the +// APIs of STL containers can be exercised in a concise way. +// for example insert, push_back, pop_back, push_front, pop_front, advance + +// TODO: Benchmark memory allocated on heap vs. stack. +template +void fill_seq(T begin, T end) { + typedef typename std::iterator_traits::value_type value_type; + //random_device r; + value_type j = init();// = get_rand(r, RAND_MAX); + for (auto it = begin; it != end; ++it, increment(j)) +*it = j; +} + +template class Container, class value_type> +void fill_seq(Container> &v) { + fill_seq(std::begin(v), std::end(v)); +} + +// Size of vector \p v to be constructed is \p size +template +void make_killer(int size, std::vector& v) { + int candidate = 0; + int num_solid = 0; + int gas = size - 1; + + std::vector tmp(size); + v.resize(size); + + for (T i = 0; i < size; ++i) { +tmp[i] = i; +v[i] = gas; + } + + std::sort(tmp.begin(), tmp.end(), [&](T x, T y) { + if (v[x] == gas && v[y] == gas) { +if (x == candidate) v[x] = num_solid++; +else v[y] = num_solid++; + } + + if (v[x] == gas) candidate = x; + else if (v[y] == gas) candidate = y; + + return v[x] < v[y
[PATCH] D36423: [libc++] Introsort based sorting function
DIVYA added a comment. Link to algorithm.bench.cpp benchmark https://github.com/hiraditya/std-benchmark/blob/master/cxx/algorithm.bench.cpp Comment at: include/algorithm:4208 + + // Threshold(or depth limit) for introsort is taken to be 2*log2(size) + typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; bcraig wrote: > This comment says basically the same thing as the code. The comment would be > more useful if it said why 2*log2(size) is used. We tested the code with depth limit from log2(size) to 4*log2(size).It was giving good performance around 2*log2(size).So the depth limit was fixed a this value. https://reviews.llvm.org/D36423 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D36423: [libc++] Introsort based sorting function
DIVYA added a comment. benchmarks/algorithms.bench.cpp Results With old code (in ns) BM_sort_std_common>/16384 : 730752 BM_sort_std_common>/32768 : 1.58E+06 BM_sort_std_ascending>/16384:17160.5 BM_sort_std_ascending>/32768 : 35350.1 BM_sort_std_descending>/16384 : 35809 BM_sort_std_descending>/32768 : 72133 BM_sort_std_list_with_vector>/16384 : 124250 BM_sort_std_list_with_vector>/32768 : 247705 BM_sort_std_worst_quick>/16384 : 1.03E+07 BM_sort_std_worst_quick>/32768: 4.04E+07 With new code (in ns) BM_sort_std_common>/16384 : 720510 BM_sort_std_common>/32768: 1.55E+06 BM_sort_std_ascending>/16384: 17164.9 BM_sort_std_ascending>/32768: 34726.7 BM_sort_std_descending>/16384 : 35671 BM_sort_std_descending>/32768 : 72100.7 BM_sort_std_list_with_vector>/16384 : 125816 BM_sort_std_list_with_vector>/32768 : 247450 BM_sort_std_worst_quick>/16384 : 987016 BM_sort_std_worst_quick>/32768: 2.14E+06 https://reviews.llvm.org/D36423 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D36423: [libc++] Introsort based sorting function
DIVYA created this revision. The sorting algorithm currently employed in libc+ library uses quicksort with tail recursion elimination, as a result of which the worst case complexity turns out to be O(N^2). This patch reduces the worst case time complexity, by employing Introsort algorithm. Introsort is a sorting technique, which begins with quicksort and when the recursion depth (or depth limit) goes beyond a threshold value, then it switches to Heapsort .As a result the worst case complexity reduces to O(NlogN) Worked in collaboration with Aditya Kumar. https://reviews.llvm.org/D36423 Files: include/algorithm Index: include/algorithm === --- include/algorithm +++ include/algorithm @@ -642,6 +642,7 @@ #include // needed to provide swap_ranges. #include #include +#include #include #if defined(__IBMCPP__) @@ -3994,7 +3995,14 @@ template void -__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +__partial_sort(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, +_Compare); + +// Using introsort algorithm for sorting +template +void +__intro_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + typename iterator_traits<_RandomAccessIterator>::difference_type __depth_limit) { // _Compare is known to be a reference type typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; @@ -4029,6 +4037,12 @@ _VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp); return; } +if (__depth_limit == 0) +{ +__partial_sort<_Compare>(__first,__last,__last,__comp); +return; +} + // __len > 5 _RandomAccessIterator __m = __first; _RandomAccessIterator __lm1 = __last; @@ -4172,19 +4186,33 @@ // sort smaller range with recursive call and larger with tail recursion elimination if (__i - __first < __last - __i) { -_VSTD::__sort<_Compare>(__first, __i, __comp); -// _VSTD::__sort<_Compare>(__i+1, __last, __comp); +_VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit); +// _VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, __depth_limit); __first = ++__i; } else { -_VSTD::__sort<_Compare>(__i+1, __last, __comp); -// _VSTD::__sort<_Compare>(__first, __i, __comp); +_VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, __depth_limit); +// _VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit); __last = __i; } +--__depth_limit; } } +template +void +__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +{ + + // Threshold(or depth limit) for introsort is taken to be 2*log2(size) + typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; + const difference_type __dp = __last - __first; + difference_type __depth_limit = __last == __first ? 0 : _VSTD::log2(__dp); + __depth_limit *=2; + __intro_sort<_Compare>(__first, __last, __comp, __depth_limit); +} + // This forwarder keeps the top call and the recursive calls using the same instantiation, forcing a reference _Compare template inline _LIBCPP_INLINE_VISIBILITY Index: include/algorithm === --- include/algorithm +++ include/algorithm @@ -642,6 +642,7 @@ #include // needed to provide swap_ranges. #include #include +#include #include #if defined(__IBMCPP__) @@ -3994,7 +3995,14 @@ template void -__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +__partial_sort(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, +_Compare); + +// Using introsort algorithm for sorting +template +void +__intro_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + typename iterator_traits<_RandomAccessIterator>::difference_type __depth_limit) { // _Compare is known to be a reference type typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; @@ -4029,6 +4037,12 @@ _VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp); return; } +if (__depth_limit == 0) +{ +__partial_sort<_Compare>(__first,__last,__last,__comp); +return; +} + // __len > 5 _RandomAccessIterator __m = __first; _RandomAccessIterator __lm1 = __last; @@ -4172,19 +4186,33 @@ // sort smaller range with recursive call and larger with tail recursion elimination if (__i - __first < __last - __i) { -_VSTD::__sort<_Compa