[PATCH] D44661: [libcxx] optimize reduce(), hmin(), hmax() by reordering the operations.

2018-03-25 Thread Zhihao Yuan via Phabricator via cfe-commits
lichray added inline comments.



Comment at: libcxx/include/experimental/simd:2196
+typename _SimdType::value_type>::type
+__hmax(const _SimdType& __v) {
   auto __acc = __v[0];

These conditions are too long, consider
```
__hmax_impl(const _SimdType& __v, true_type, ...);
__hmax_impl(const _SimdType& __v, false_type, true_type);
__hmax_impl(const _SimdType& __v, false_type, false_type);
```


https://reviews.llvm.org/D44661



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44661: [libcxx] optimize reduce(), hmin(), hmax() by reordering the operations.

2018-03-25 Thread Zhihao Yuan via Phabricator via cfe-commits
lichray added inline comments.



Comment at: libcxx/include/experimental/simd:2096
+template 
+std::array::size() / 2, _Abi>>, 2>
+__deinterleave(const simd<_Tp, _Abi>& __v) {

Too long.  Consider a traits?


https://reviews.llvm.org/D44661



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44661: [libcxx] optimize reduce(), hmin(), hmax() by reordering the operations.

2018-03-19 Thread Tim Shen via Phabricator via cfe-commits
timshen created this revision.
timshen added a reviewer: mclow.lists.
Herald added subscribers: christof, sanjoy.
Herald added a reviewer: EricWF.

Also change std::plus<_Tp> to std::plus<>/__simd_plus_op, so that the
optimization can transparently use the simd<> overloading.


https://reviews.llvm.org/D44661

Files:
  libcxx/include/experimental/simd
  libcxx/test/std/experimental/simd/simd.horizontal/hmax.pass.cpp
  libcxx/test/std/experimental/simd/simd.horizontal/hmin.pass.cpp
  libcxx/test/std/experimental/simd/simd.horizontal/reduce.pass.cpp

Index: libcxx/test/std/experimental/simd/simd.horizontal/reduce.pass.cpp
===
--- libcxx/test/std/experimental/simd/simd.horizontal/reduce.pass.cpp
+++ libcxx/test/std/experimental/simd/simd.horizontal/reduce.pass.cpp
@@ -38,52 +38,64 @@
 #include 
 #include 
 
+#include "test_macros.h"
+
 using namespace std::experimental::parallelism_v2;
 
 inline int factorial(int n) { return n == 1 ? 1 : n * factorial(n - 1); }
 
+template 
 void test_reduce_simd() {
-  int n = (int)native_simd::size();
-  assert(reduce(native_simd([](int i) { return i; })) == n * (n - 1) / 2);
-  assert(reduce(native_simd([](int i) { return i; }), std::plus()) ==
+  int n = (int)SimdType::size();
+  assert(reduce(SimdType([](int i) { return i; })) == n * (n - 1) / 2);
+
+#if TEST_STD_VER >= 14
+  assert(reduce(SimdType([](int i) { return i; }), std::plus<>()) ==
  n * (n - 1) / 2);
-  assert(reduce(native_simd([](int i) { return i + 1; }),
-std::multiplies()) == factorial(n));
+  assert(reduce(SimdType([](int i) { return i + 1; }), std::multiplies<>()) ==
+ factorial(n));
+#endif
 }
 
 void test_reduce_mask() {
   {
 fixed_size_simd a([](int i) { return i; });
-assert(reduce(where(a < 2, a), 0, std::plus()) == 0 + 1);
-assert(reduce(where(a >= 2, a), 1, std::multiplies()) == 2 * 3);
 assert(reduce(where(a >= 2, a)) == 2 + 3);
-assert(reduce(where(a >= 2, a), std::plus()) == 2 + 3);
-assert(reduce(where(a >= 2, a), std::multiplies()) == 2 * 3);
-assert(reduce(where(a >= 2, a), std::bit_and()) == (2 & 3));
-assert(reduce(where(a >= 2, a), std::bit_or()) == (2 | 3));
-assert(reduce(where(a >= 2, a), std::bit_xor()) == (2 ^ 3));
+#if TEST_STD_VER >= 14
+assert(reduce(where(a < 2, a), 0, std::plus<>()) == 0 + 1);
+assert(reduce(where(a >= 2, a), 1, std::multiplies<>()) == 2 * 3);
+assert(reduce(where(a >= 2, a), std::plus<>()) == 2 + 3);
+assert(reduce(where(a >= 2, a), std::multiplies<>()) == 2 * 3);
+assert(reduce(where(a >= 2, a), std::bit_and<>()) == (2 & 3));
+assert(reduce(where(a >= 2, a), std::bit_or<>()) == (2 | 3));
+assert(reduce(where(a >= 2, a), std::bit_xor<>()) == (2 ^ 3));
+#endif
   }
   {
 fixed_size_simd_mask a;
 a[0] = false;
 a[1] = true;
 a[2] = true;
 a[3] = false;
 assert(reduce(where(fixed_size_simd_mask(true), a)) == true);
+#if TEST_STD_VER >= 14
 assert(reduce(where(fixed_size_simd_mask(true), a),
-  std::plus()) == true);
+  std::plus<>()) == true);
 assert(reduce(where(fixed_size_simd_mask(true), a),
-  std::multiplies()) == false);
+  std::multiplies<>()) == false);
 assert(reduce(where(fixed_size_simd_mask(true), a),
-  std::bit_and()) == false);
+  std::bit_and<>()) == false);
 assert(reduce(where(fixed_size_simd_mask(true), a),
-  std::bit_or()) == true);
+  std::bit_or<>()) == true);
 assert(reduce(where(fixed_size_simd_mask(true), a),
-  std::bit_xor()) == false);
+  std::bit_xor<>()) == false);
+#endif
   }
 }
 
 int main() {
-  test_reduce_simd();
+  test_reduce_simd();
+  test_reduce_simd>();
+  test_reduce_simd>();
   test_reduce_mask();
 }
Index: libcxx/test/std/experimental/simd/simd.horizontal/hmin.pass.cpp
===
--- libcxx/test/std/experimental/simd/simd.horizontal/hmin.pass.cpp
+++ libcxx/test/std/experimental/simd/simd.horizontal/hmin.pass.cpp
@@ -20,22 +20,47 @@
 
 using namespace std::experimental::parallelism_v2;
 
-void test_hmin_simd() {
+template 
+void test_hmin_simd_power_of_2() {
   {
 int a[] = {2, 5, -4, 6};
-assert(hmin(fixed_size_simd(a, element_aligned_tag())) == -4);
+assert(hmin(SimdType(a, element_aligned_tag())) == -4);
   }
   {
 int a[] = {6, 2, 5, -4};
-assert(hmin(fixed_size_simd(a, element_aligned_tag())) == -4);
+assert(hmin(SimdType(a, element_aligned_tag())) == -4);
   }
   {
 int a[] = {-4, 6, 2, 5};
-assert(hmin(fixed_size_simd(a, element_aligned_tag())) == -4);
+assert(hmin(SimdType(a, element_aligned_tag())) == -4);
   }