================
@@ -58,6 +58,21 @@ constexpr float dot2add_impl(half2 a, half2 b, float c) {
 #endif
 }
 
+template <typename T, int N>
+constexpr enable_if_t<!is_same<double, T>::value, T>
+mul_vec_impl(vector<T, N> x, vector<T, N> y) {
+  return dot(x, y);
+}
+
+// Double vectors do not have a dot intrinsic, so expand manually.
+template <typename T, int N>
+enable_if_t<is_same<double, T>::value, T> mul_vec_impl(vector<T, N> x,
+                                                       vector<T, N> y) {
+  T sum = x[0] * y[0];
+  [unroll] for (int i = 1; i < N; ++i) sum += x[i] * y[i];
----------------
Icohedron wrote:

I pushed a commit to perform the optimizations I described 
04e744f4c17a4d08feaecb3a7b6eb78f32903121 by making the lowering of the 
vector-vector case be handled by codegen for the mul builtin.

https://github.com/llvm/llvm-project/pull/184882
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to