[dpdk-dev] [PATCH 1/3] eal: introduce rte_vect_* abstractions
On Wed, Dec 02, 2015 at 02:43:34PM +0100, Jan Viktorin wrote: > On Mon, 30 Nov 2015 22:54:11 +0530 > Jerin Jacob wrote: > > > introduce rte_vect_* abstractions to remove SSE/AVX specific > > code in the common code(i.e the test applications) > > > > The patch does not provide any functional change for IA, the goal is to > > Does IA mean Intel Architecture? Yes. > > > have infrastructure to reuse the common vector-based test code across > > all the architectures. > > > > Signed-off-by: Jerin Jacob > > --- > > lib/librte_eal/common/include/arch/arm/rte_vect.h | 17 - > > lib/librte_eal/common/include/arch/x86/rte_vect.h | 8 > > 2 files changed, 24 insertions(+), 1 deletion(-) > > > > diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h > > b/lib/librte_eal/common/include/arch/arm/rte_vect.h > > index 21cdb4d..d300951 100644 > > --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h > > +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h > > @@ -33,13 +33,14 @@ > > #ifndef _RTE_VECT_ARM_H_ > > #define _RTE_VECT_ARM_H_ > > > > -#include "arm_neon.h" > > +#include > > > > #ifdef __cplusplus > > extern "C" { > > #endif > > > > typedef int32x4_t xmm_t; > > +typedef int32x4_t __m128i; > > As Jianbo pointed out recently, the __m128i type should be refactored in > a general rte_vect API too. If we do something like > > #if SSE > typedef __m128i rte_128i; > #elif NEON > typedef int32x4_y rte_128i; > #endif > > does it make somebody angry? I am afraid that it will influence a lot of > code. However, from the ABI point of view, it is OK, isn't it? > > > > > #defineXMM_SIZE(sizeof(xmm_t)) > > #defineXMM_MASK(XMM_SIZE - 1) > > @@ -53,6 +54,20 @@ typedef union rte_xmm { > > double pd[XMM_SIZE / sizeof(double)]; > > } __attribute__((aligned(16))) rte_xmm_t; > > > > +/* rte_vect_* abstraction implementation using NEON */ > > + > > +/* loads the __m128i value from address p(does not need to be 16-byte > > aligned)*/ > > +#define rte_vect_loadu_sil128(p) vld1q_s32((const int32_t *)p) > > + > > +/* sets the 4 signed 32-bit integer values and returns the __m128i > > variable */ > > +static inline __m128i __attribute__((always_inline)) > > +rte_vect_set_epi32(int i3, int i2, int i1, int i0) > > +{ > > + int32_t data[4] = {i0, i1, i2, i3}; > > + > > + return vld1q_s32(data); > > +} > > + > > #ifdef __cplusplus > > } > > #endif > > diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h > > b/lib/librte_eal/common/include/arch/x86/rte_vect.h > > index b698797..91c6523 100644 > > --- a/lib/librte_eal/common/include/arch/x86/rte_vect.h > > +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h > > @@ -125,6 +125,14 @@ typedef union rte_ymm { > > }) > > #endif /* (defined(__ICC) && __ICC < 1210) */ > > > > +/* rte_vect_* abstraction implementation using SSE */ > > + > > +/* loads the __m128i value from address p(does not need to be 16-byte > > aligned)*/ > > +#define rte_vect_loadu_sil128(p) _mm_loadu_si128(p) > > + > > +/* sets the 4 signed 32-bit integer values and returns the __m128i > > variable */ > > +#define rte_vect_set_epi32(i3, i2, i1, i0) _mm_set_epi32(i3, i2, i1, i0) > > + > > #ifdef __cplusplus > > } > > #endif > > I like this approach. It is a question whether to inherit names from > SSE. However, why to reinvent the wheel... > > We probably need other people to give their ideas about such > generalization of the API. Yes, I would like get the feedback from other people. ret_vect_* abstraction only for the common code (i.e test code) which typically used to call the SIMD DPDK API's across the architecture. > > I think, there should be an autotest of the rte_vect API. Is it > possible to create one? Yes > > Regards > Jan > > -- >Jan Viktorin E-mail: Viktorin at RehiveTech.com >System Architect Web:www.RehiveTech.com >RehiveTech >Brno, Czech Republic
[dpdk-dev] [PATCH 1/3] eal: introduce rte_vect_* abstractions
On Mon, 30 Nov 2015 22:54:11 +0530 Jerin Jacob wrote: > introduce rte_vect_* abstractions to remove SSE/AVX specific > code in the common code(i.e the test applications) > > The patch does not provide any functional change for IA, the goal is to Does IA mean Intel Architecture? > have infrastructure to reuse the common vector-based test code across > all the architectures. > > Signed-off-by: Jerin Jacob > --- > lib/librte_eal/common/include/arch/arm/rte_vect.h | 17 - > lib/librte_eal/common/include/arch/x86/rte_vect.h | 8 > 2 files changed, 24 insertions(+), 1 deletion(-) > > diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h > b/lib/librte_eal/common/include/arch/arm/rte_vect.h > index 21cdb4d..d300951 100644 > --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h > +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h > @@ -33,13 +33,14 @@ > #ifndef _RTE_VECT_ARM_H_ > #define _RTE_VECT_ARM_H_ > > -#include "arm_neon.h" > +#include > > #ifdef __cplusplus > extern "C" { > #endif > > typedef int32x4_t xmm_t; > +typedef int32x4_t __m128i; As Jianbo pointed out recently, the __m128i type should be refactored in a general rte_vect API too. If we do something like #if SSE typedef __m128i rte_128i; #elif NEON typedef int32x4_y rte_128i; #endif does it make somebody angry? I am afraid that it will influence a lot of code. However, from the ABI point of view, it is OK, isn't it? > > #define XMM_SIZE(sizeof(xmm_t)) > #define XMM_MASK(XMM_SIZE - 1) > @@ -53,6 +54,20 @@ typedef union rte_xmm { > double pd[XMM_SIZE / sizeof(double)]; > } __attribute__((aligned(16))) rte_xmm_t; > > +/* rte_vect_* abstraction implementation using NEON */ > + > +/* loads the __m128i value from address p(does not need to be 16-byte > aligned)*/ > +#define rte_vect_loadu_sil128(p) vld1q_s32((const int32_t *)p) > + > +/* sets the 4 signed 32-bit integer values and returns the __m128i variable > */ > +static inline __m128i __attribute__((always_inline)) > +rte_vect_set_epi32(int i3, int i2, int i1, int i0) > +{ > + int32_t data[4] = {i0, i1, i2, i3}; > + > + return vld1q_s32(data); > +} > + > #ifdef __cplusplus > } > #endif > diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h > b/lib/librte_eal/common/include/arch/x86/rte_vect.h > index b698797..91c6523 100644 > --- a/lib/librte_eal/common/include/arch/x86/rte_vect.h > +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h > @@ -125,6 +125,14 @@ typedef union rte_ymm { > }) > #endif /* (defined(__ICC) && __ICC < 1210) */ > > +/* rte_vect_* abstraction implementation using SSE */ > + > +/* loads the __m128i value from address p(does not need to be 16-byte > aligned)*/ > +#define rte_vect_loadu_sil128(p) _mm_loadu_si128(p) > + > +/* sets the 4 signed 32-bit integer values and returns the __m128i variable > */ > +#define rte_vect_set_epi32(i3, i2, i1, i0) _mm_set_epi32(i3, i2, i1, i0) > + > #ifdef __cplusplus > } > #endif I like this approach. It is a question whether to inherit names from SSE. However, why to reinvent the wheel... We probably need other people to give their ideas about such generalization of the API. I think, there should be an autotest of the rte_vect API. Is it possible to create one? Regards Jan -- Jan Viktorin E-mail: Viktorin at RehiveTech.com System Architect Web:www.RehiveTech.com RehiveTech Brno, Czech Republic
[dpdk-dev] [PATCH 1/3] eal: introduce rte_vect_* abstractions
introduce rte_vect_* abstractions to remove SSE/AVX specific code in the common code(i.e the test applications) The patch does not provide any functional change for IA, the goal is to have infrastructure to reuse the common vector-based test code across all the architectures. Signed-off-by: Jerin Jacob --- lib/librte_eal/common/include/arch/arm/rte_vect.h | 17 - lib/librte_eal/common/include/arch/x86/rte_vect.h | 8 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h index 21cdb4d..d300951 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h @@ -33,13 +33,14 @@ #ifndef _RTE_VECT_ARM_H_ #define _RTE_VECT_ARM_H_ -#include "arm_neon.h" +#include #ifdef __cplusplus extern "C" { #endif typedef int32x4_t xmm_t; +typedef int32x4_t __m128i; #defineXMM_SIZE(sizeof(xmm_t)) #defineXMM_MASK(XMM_SIZE - 1) @@ -53,6 +54,20 @@ typedef union rte_xmm { double pd[XMM_SIZE / sizeof(double)]; } __attribute__((aligned(16))) rte_xmm_t; +/* rte_vect_* abstraction implementation using NEON */ + +/* loads the __m128i value from address p(does not need to be 16-byte aligned)*/ +#define rte_vect_loadu_sil128(p) vld1q_s32((const int32_t *)p) + +/* sets the 4 signed 32-bit integer values and returns the __m128i variable */ +static inline __m128i __attribute__((always_inline)) +rte_vect_set_epi32(int i3, int i2, int i1, int i0) +{ + int32_t data[4] = {i0, i1, i2, i3}; + + return vld1q_s32(data); +} + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h index b698797..91c6523 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_vect.h +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h @@ -125,6 +125,14 @@ typedef union rte_ymm { }) #endif /* (defined(__ICC) && __ICC < 1210) */ +/* rte_vect_* abstraction implementation using SSE */ + +/* loads the __m128i value from address p(does not need to be 16-byte aligned)*/ +#define rte_vect_loadu_sil128(p) _mm_loadu_si128(p) + +/* sets the 4 signed 32-bit integer values and returns the __m128i variable */ +#define rte_vect_set_epi32(i3, i2, i1, i0) _mm_set_epi32(i3, i2, i1, i0) + #ifdef __cplusplus } #endif -- 2.1.0