Hi JeffHi Jeff,
This is how we handle it in Crypto++ [0]. I would not worry about
__APPLE_ALTIVEC__. __ALTIVEC__ picks up the use case.
In an implementation file that needs the definitions:
// could use -maltivec, -mcpu=power7, -mcpu=power8, etc
#if defined(__ALTIVEC__) || defined(_ARCH_PWR7)
# include "ppc_simd.h"
#endif
And then at the head of ppc_simd.h: [1]
#if defined(__ALTIVEC__)
# include <altivec.h>
# undef vector
# undef pixel
# undef bool
#endif
And then in the same ppc_simd.h file:
#if defined(__ALTIVEC__) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
/// \brief Vector of 8-bit elements
typedef __vector unsigned char uint8x16_p;
/// \brief Vector of 16-bit elements
typedef __vector unsigned short uint16x8_p;
/// \brief Vector of 32-bit elements
typedef __vector unsigned int uint32x4_p;
#if defined(__VSX__) || defined(_ARCH_PWR8) ||
defined(CRYPTOPP_DOXYGEN_PROCESSING)
/// \brief Vector of 64-bit elements
typedef __vector unsigned long long uint64x2_p;
#endif // VSX or ARCH_PWR8
#endif // __ALTIVEC__ or Doxygen
Now use the typedefs like uint8x16_p, uint16x8_p, uint32x4_p, and
uint64x2_p.
[0] https://cryptopp.com/
[1] https://github.com/weidai11/cryptopp/blob/master/ppc_simd.h
JeffHi Jeff,
Thank you for taking the time to review and for the pointer to
ppc_simd.h — really helpful reference.
One thing I confirmed: vec_altivec.hpp only ever gets included when
__ALTIVEC__ is already true (that check happens earlier, in vec.hpp).
So adding the same guard inside vec_altivec.hpp doesn't change anything
right now — it's already guaranteed true.
But as you suggested, and I think that's the correct way to go, I've
kept the guard anyway so the header stays safe on its own,
in case someone includes it directly in the future without going through
vec.hpp first. Thanks for pointing that out.
I have updated my patch to gate the #include <altivec.h> and #undef
vector/pixel/bool behind #if defined(__ALTIVEC__), instead of relying on
__APPLE_ALTIVEC__ reasoning.
Rebuilt and tested on my ppc64el machine.
Updated patches attached. Will go ahead and create the Salsa Merge
Request with this version.
Thanks again,
Trupti
Index: supercollider-sc3-plugins-3.14.0+ds/external_libraries/nova-simd/vec/vec_altivec.hpp
===================================================================
--- supercollider-sc3-plugins-3.14.0+ds.orig/external_libraries/nova-simd/vec/vec_altivec.hpp
+++ supercollider-sc3-plugins-3.14.0+ds/external_libraries/nova-simd/vec/vec_altivec.hpp
@@ -20,8 +20,12 @@
#ifndef VEC_ALTIVEC_HPP
#define VEC_ALTIVEC_HPP
-#include <altivec.h>
-#undef bool
+#if defined(__ALTIVEC__)
+# include <altivec.h>
+# undef bool
+# undef vector
+# undef pixel
+#endif
#include "../detail/vec_math.hpp"
#include "vec_int_altivec.hpp"
@@ -39,13 +43,13 @@ namespace nova
template <>
struct vec<float>:
- vec_base<float, vector float, 4>
+ vec_base<float, __vector float, 4>
{
- typedef vector float internal_vector_type;
+ typedef __vector float internal_vector_type;
typedef float float_type;
private:
- typedef vec_base<float, vector float, 4> base;
+ typedef vec_base<float, __vector float, 4> base;
static internal_vector_type set_vector(float f0, float f1, float f2, float f3)
{
@@ -231,7 +235,7 @@ private:
// adapted from http://developer.apple.com/hardwaredrivers/ve/algorithms.html
// Get the reciprocal estimate
- vector float estimate = vec_re(arg);
+ __vector float estimate = vec_re(arg);
// One round of Newton-Raphson refinement
return vec_madd(vec_nmsub(estimate, arg, gen_one()), estimate, estimate);
@@ -284,7 +288,7 @@ public:
friend vec fast_reciprocal(const vec & arg)
{
- vector float estimate = vec_re(arg);
+ __vector float estimate = vec_re(arg.data_);
return estimate;
}
@@ -316,8 +320,8 @@ public:
vec operator op(vec const & rhs) const \
{ \
const internal_vector_type one = gen_one(); \
- vector unsigned int mask = (vector unsigned int)opcode(data_, rhs.data_); \
- return (internal_vector_type)vec_and(mask, (vector unsigned int)one); \
+ __vector unsigned int mask = (__vector unsigned int)opcode(data_, rhs.data_); \
+ return (internal_vector_type)vec_and(mask, (__vector unsigned int)one); \
}
#define vec_cmple_(a, b) vec_cmpge(b, a)
@@ -367,7 +371,7 @@ public:
friend inline vec select(vec lhs, vec rhs, vec bitmask)
{
- return vec_sel(lhs.data_, rhs.data_, (vector unsigned int)bitmask.data_);
+ return vec_sel(lhs.data_, rhs.data_, (__vector unsigned int)bitmask.data_);
}
/* @} */
@@ -395,14 +399,14 @@ private:
// adapted from http://developer.apple.com/hardwaredrivers/ve/algorithms.html
//Get the square root reciprocal estimate
- vector float zero = gen_zero();
- vector float oneHalf = gen_05();
- vector float one = gen_one();
- vector float estimate = vec_rsqrte(arg);
+ __vector float zero = gen_zero();
+ __vector float oneHalf = gen_05();
+ __vector float one = gen_one();
+ __vector float estimate = vec_rsqrte(arg);
//One round of Newton-Raphson refinement
- vector float estimateSquared = vec_madd(estimate, estimate, zero);
- vector float halfEstimate = vec_madd(estimate, oneHalf, zero);
+ __vector float estimateSquared = vec_madd(estimate, estimate, zero);
+ __vector float halfEstimate = vec_madd(estimate, oneHalf, zero);
return vec_madd(vec_nmsub(arg, estimateSquared, one), halfEstimate, estimate);
}
Index: supercollider-sc3-plugins-3.14.0+ds/external_libraries/nova-simd/vec/vec_int_altivec.hpp
===================================================================
--- supercollider-sc3-plugins-3.14.0+ds.orig/external_libraries/nova-simd/vec/vec_int_altivec.hpp
+++ supercollider-sc3-plugins-3.14.0+ds/external_libraries/nova-simd/vec/vec_int_altivec.hpp
@@ -20,22 +20,28 @@
#ifndef VEC_INT_ALTIVEC_HPP
#define VEC_INT_ALTIVEC_HPP
-#include <altivec.h>
+#if defined(__ALTIVEC__)
+# include <altivec.h>
+# undef bool
+# undef vector
+# undef pixel
+#endif
+
namespace nova {
namespace detail {
struct int_vec_altivec
{
- typedef vector float fvec;
- typedef vector unsigned int ivec;
+ typedef __vector float fvec;
+ typedef __vector unsigned int ivec;
ivec data_;
private:
static ivec set_vector(int i)
{
#ifdef __GNUC__
- return (ivec){i, i, i, i};
+ return (ivec){(unsigned int)i,(unsigned int)i, (unsigned int)i, (unsigned int)i};
#else
#error compiler not supported
#endif
@@ -58,7 +64,7 @@ public:
data_(arg.data_)
{}
- int_vec_altivec(vector signed int arg):
+ int_vec_altivec(__vector signed int arg):
data_((ivec)arg)
{}