[dpdk-dev] [PATCH 1/3] eal: introduce rte_vect_* abstractions

2015-12-02 Thread Jerin Jacob
On Wed, Dec 02, 2015 at 02:43:34PM +0100, Jan Viktorin wrote:
> On Mon, 30 Nov 2015 22:54:11 +0530
> Jerin Jacob  wrote:
> 
> > introduce rte_vect_* abstractions to remove SSE/AVX specific
> > code in the common code(i.e the test applications)
> > 
> > The patch does not provide any functional change for IA, the goal is to
> 
> Does IA mean Intel Architecture?

Yes.

> 
> > have infrastructure to reuse the common vector-based test code across
> > all the architectures.
> > 
> > Signed-off-by: Jerin Jacob 
> > ---
> >  lib/librte_eal/common/include/arch/arm/rte_vect.h | 17 -
> >  lib/librte_eal/common/include/arch/x86/rte_vect.h |  8 
> >  2 files changed, 24 insertions(+), 1 deletion(-)
> > 
> > diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h 
> > b/lib/librte_eal/common/include/arch/arm/rte_vect.h
> > index 21cdb4d..d300951 100644
> > --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
> > +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
> > @@ -33,13 +33,14 @@
> >  #ifndef _RTE_VECT_ARM_H_
> >  #define _RTE_VECT_ARM_H_
> >  
> > -#include "arm_neon.h"
> > +#include 
> >  
> >  #ifdef __cplusplus
> >  extern "C" {
> >  #endif
> >  
> >  typedef int32x4_t xmm_t;
> > +typedef int32x4_t __m128i;
> 
> As Jianbo pointed out recently, the __m128i type should be refactored in
> a general rte_vect API too. If we do something like
> 
> #if SSE
> typedef __m128i rte_128i;
> #elif NEON
> typedef int32x4_y rte_128i;
> #endif
> 
> does it make somebody angry? I am afraid that it will influence a lot of
> code. However, from the ABI point of view, it is OK, isn't it?
> 
> >  
> >  #defineXMM_SIZE(sizeof(xmm_t))
> >  #defineXMM_MASK(XMM_SIZE - 1)
> > @@ -53,6 +54,20 @@ typedef union rte_xmm {
> > double   pd[XMM_SIZE / sizeof(double)];
> >  } __attribute__((aligned(16))) rte_xmm_t;
> >  
> > +/* rte_vect_* abstraction implementation using NEON */
> > +
> > +/* loads the __m128i value from address p(does not need to be 16-byte 
> > aligned)*/
> > +#define rte_vect_loadu_sil128(p) vld1q_s32((const int32_t *)p)
> > +
> > +/* sets the 4 signed 32-bit integer values and returns the __m128i 
> > variable */
> > +static inline __m128i  __attribute__((always_inline))
> > +rte_vect_set_epi32(int i3, int i2, int i1, int i0)
> > +{
> > +   int32_t data[4] = {i0, i1, i2, i3};
> > +
> > +   return vld1q_s32(data);
> > +}
> > +
> >  #ifdef __cplusplus
> >  }
> >  #endif
> > diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h 
> > b/lib/librte_eal/common/include/arch/x86/rte_vect.h
> > index b698797..91c6523 100644
> > --- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
> > +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
> > @@ -125,6 +125,14 @@ typedef union rte_ymm {
> >  })
> >  #endif /* (defined(__ICC) && __ICC < 1210) */
> >  
> > +/* rte_vect_* abstraction implementation using SSE */
> > +
> > +/* loads the __m128i value from address p(does not need to be 16-byte 
> > aligned)*/
> > +#define rte_vect_loadu_sil128(p) _mm_loadu_si128(p)
> > +
> > +/* sets the 4 signed 32-bit integer values and returns the __m128i 
> > variable */
> > +#define rte_vect_set_epi32(i3, i2, i1, i0) _mm_set_epi32(i3, i2, i1, i0)
> > +
> >  #ifdef __cplusplus
> >  }
> >  #endif
> 
> I like this approach. It is a question whether to inherit names from
> SSE. However, why to reinvent the wheel...
> 
> We probably need other people to give their ideas about such
> generalization of the API.

Yes, I would like get the feedback from other people.

ret_vect_* abstraction only for the common code (i.e test code) which
typically used to call the SIMD DPDK API's across the architecture.

> 
> I think, there should be an autotest of the rte_vect API. Is it
> possible to create one?

Yes

> 
> Regards
> Jan
> 
> -- 
>Jan Viktorin  E-mail: Viktorin at RehiveTech.com
>System Architect  Web:www.RehiveTech.com
>RehiveTech
>Brno, Czech Republic


[dpdk-dev] [PATCH 1/3] eal: introduce rte_vect_* abstractions

2015-12-02 Thread Jan Viktorin
On Mon, 30 Nov 2015 22:54:11 +0530
Jerin Jacob  wrote:

> introduce rte_vect_* abstractions to remove SSE/AVX specific
> code in the common code(i.e the test applications)
> 
> The patch does not provide any functional change for IA, the goal is to

Does IA mean Intel Architecture?

> have infrastructure to reuse the common vector-based test code across
> all the architectures.
> 
> Signed-off-by: Jerin Jacob 
> ---
>  lib/librte_eal/common/include/arch/arm/rte_vect.h | 17 -
>  lib/librte_eal/common/include/arch/x86/rte_vect.h |  8 
>  2 files changed, 24 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h 
> b/lib/librte_eal/common/include/arch/arm/rte_vect.h
> index 21cdb4d..d300951 100644
> --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
> +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
> @@ -33,13 +33,14 @@
>  #ifndef _RTE_VECT_ARM_H_
>  #define _RTE_VECT_ARM_H_
>  
> -#include "arm_neon.h"
> +#include 
>  
>  #ifdef __cplusplus
>  extern "C" {
>  #endif
>  
>  typedef int32x4_t xmm_t;
> +typedef int32x4_t __m128i;

As Jianbo pointed out recently, the __m128i type should be refactored in
a general rte_vect API too. If we do something like

#if SSE
typedef __m128i rte_128i;
#elif NEON
typedef int32x4_y rte_128i;
#endif

does it make somebody angry? I am afraid that it will influence a lot of
code. However, from the ABI point of view, it is OK, isn't it?

>  
>  #define  XMM_SIZE(sizeof(xmm_t))
>  #define  XMM_MASK(XMM_SIZE - 1)
> @@ -53,6 +54,20 @@ typedef union rte_xmm {
>   double   pd[XMM_SIZE / sizeof(double)];
>  } __attribute__((aligned(16))) rte_xmm_t;
>  
> +/* rte_vect_* abstraction implementation using NEON */
> +
> +/* loads the __m128i value from address p(does not need to be 16-byte 
> aligned)*/
> +#define rte_vect_loadu_sil128(p) vld1q_s32((const int32_t *)p)
> +
> +/* sets the 4 signed 32-bit integer values and returns the __m128i variable 
> */
> +static inline __m128i  __attribute__((always_inline))
> +rte_vect_set_epi32(int i3, int i2, int i1, int i0)
> +{
> + int32_t data[4] = {i0, i1, i2, i3};
> +
> + return vld1q_s32(data);
> +}
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h 
> b/lib/librte_eal/common/include/arch/x86/rte_vect.h
> index b698797..91c6523 100644
> --- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
> +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
> @@ -125,6 +125,14 @@ typedef union rte_ymm {
>  })
>  #endif /* (defined(__ICC) && __ICC < 1210) */
>  
> +/* rte_vect_* abstraction implementation using SSE */
> +
> +/* loads the __m128i value from address p(does not need to be 16-byte 
> aligned)*/
> +#define rte_vect_loadu_sil128(p) _mm_loadu_si128(p)
> +
> +/* sets the 4 signed 32-bit integer values and returns the __m128i variable 
> */
> +#define rte_vect_set_epi32(i3, i2, i1, i0) _mm_set_epi32(i3, i2, i1, i0)
> +
>  #ifdef __cplusplus
>  }
>  #endif

I like this approach. It is a question whether to inherit names from
SSE. However, why to reinvent the wheel...

We probably need other people to give their ideas about such
generalization of the API.

I think, there should be an autotest of the rte_vect API. Is it
possible to create one?

Regards
Jan

-- 
   Jan Viktorin  E-mail: Viktorin at RehiveTech.com
   System Architect  Web:www.RehiveTech.com
   RehiveTech
   Brno, Czech Republic


[dpdk-dev] [PATCH 1/3] eal: introduce rte_vect_* abstractions

2015-11-30 Thread Jerin Jacob
introduce rte_vect_* abstractions to remove SSE/AVX specific
code in the common code(i.e the test applications)

The patch does not provide any functional change for IA, the goal is to
have infrastructure to reuse the common vector-based test code across
all the architectures.

Signed-off-by: Jerin Jacob 
---
 lib/librte_eal/common/include/arch/arm/rte_vect.h | 17 -
 lib/librte_eal/common/include/arch/x86/rte_vect.h |  8 
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h 
b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index 21cdb4d..d300951 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -33,13 +33,14 @@
 #ifndef _RTE_VECT_ARM_H_
 #define _RTE_VECT_ARM_H_

-#include "arm_neon.h"
+#include 

 #ifdef __cplusplus
 extern "C" {
 #endif

 typedef int32x4_t xmm_t;
+typedef int32x4_t __m128i;

 #defineXMM_SIZE(sizeof(xmm_t))
 #defineXMM_MASK(XMM_SIZE - 1)
@@ -53,6 +54,20 @@ typedef union rte_xmm {
double   pd[XMM_SIZE / sizeof(double)];
 } __attribute__((aligned(16))) rte_xmm_t;

+/* rte_vect_* abstraction implementation using NEON */
+
+/* loads the __m128i value from address p(does not need to be 16-byte 
aligned)*/
+#define rte_vect_loadu_sil128(p) vld1q_s32((const int32_t *)p)
+
+/* sets the 4 signed 32-bit integer values and returns the __m128i variable */
+static inline __m128i  __attribute__((always_inline))
+rte_vect_set_epi32(int i3, int i2, int i1, int i0)
+{
+   int32_t data[4] = {i0, i1, i2, i3};
+
+   return vld1q_s32(data);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h 
b/lib/librte_eal/common/include/arch/x86/rte_vect.h
index b698797..91c6523 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
@@ -125,6 +125,14 @@ typedef union rte_ymm {
 })
 #endif /* (defined(__ICC) && __ICC < 1210) */

+/* rte_vect_* abstraction implementation using SSE */
+
+/* loads the __m128i value from address p(does not need to be 16-byte 
aligned)*/
+#define rte_vect_loadu_sil128(p) _mm_loadu_si128(p)
+
+/* sets the 4 signed 32-bit integer values and returns the __m128i variable */
+#define rte_vect_set_epi32(i3, i2, i1, i0) _mm_set_epi32(i3, i2, i1, i0)
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.1.0