Re: [Mesa-dev] [PATCH 03/14] swr: [rasterizer] add support for building avx512 version

2016-06-20 Thread Rowley, Timothy O
Since there isn’t much code difference at this point, I was holding off on this 
until we had made changes to how the different architecture swr builds are 
built/linked to minimize build time and disk space.

-Tim

On Jun 20, 2016, at 9:27 AM, Chuck Atkins 
> wrote:

Doesn't this also need corresponding compiler flags in 
configure.ac to populate SWR_AVX512_CXXFLAGS?

- Chuck

On Fri, Jun 17, 2016 at 3:25 PM, Tim Rowley 
> wrote:
Currently, most code paths between AVX2 and AVX512 are identical
(see changes to knobs.h).
---
 src/gallium/drivers/swr/rasterizer/common/simdintrin.h  |  4 ++--
 src/gallium/drivers/swr/rasterizer/core/format_types.h  |  8 
 src/gallium/drivers/swr/rasterizer/core/knobs.h | 15 ++-
 src/gallium/drivers/swr/rasterizer/memory/Convert.h |  4 ++--
 src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp |  4 ++--
 5 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h 
b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
index 5ec1f71..cc29b5d 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
@@ -1002,7 +1002,7 @@ static INLINE simdscalar _simd_abs_ps(simdscalar a)
 INLINE
 UINT pdep_u32(UINT a, UINT mask)
 {
-#if KNOB_ARCH==KNOB_ARCH_AVX2
+#if KNOB_ARCH >= KNOB_ARCH_AVX2
 return _pdep_u32(a, mask);
 #else
 UINT result = 0;
@@ -1035,7 +1035,7 @@ UINT pdep_u32(UINT a, UINT mask)
 INLINE
 UINT pext_u32(UINT a, UINT mask)
 {
-#if KNOB_ARCH==KNOB_ARCH_AVX2
+#if KNOB_ARCH >= KNOB_ARCH_AVX2
 return _pext_u32(a, mask);
 #else
 UINT result = 0;
diff --git a/src/gallium/drivers/swr/rasterizer/core/format_types.h 
b/src/gallium/drivers/swr/rasterizer/core/format_types.h
index afb6337..6612c83 100644
--- a/src/gallium/drivers/swr/rasterizer/core/format_types.h
+++ b/src/gallium/drivers/swr/rasterizer/core/format_types.h
@@ -98,7 +98,7 @@ struct PackTraits<8, false>
 __m256i result = _mm256_castsi128_si256(resLo);
 result = _mm256_insertf128_si256(result, resHi, 1);
 return _mm256_castsi256_ps(result);
-#elif KNOB_ARCH==KNOB_ARCH_AVX2
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2
 return 
_mm256_castsi256_ps(_mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in;
 #endif
 #else
@@ -161,7 +161,7 @@ struct PackTraits<8, true>
 __m256i result = _mm256_castsi128_si256(resLo);
 result = _mm256_insertf128_si256(result, resHi, 1);
 return _mm256_castsi256_ps(result);
-#elif KNOB_ARCH==KNOB_ARCH_AVX2
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2
 return 
_mm256_castsi256_ps(_mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in;
 #endif
 #else
@@ -223,7 +223,7 @@ struct PackTraits<16, false>
 __m256i result = _mm256_castsi128_si256(resLo);
 result = _mm256_insertf128_si256(result, resHi, 1);
 return _mm256_castsi256_ps(result);
-#elif KNOB_ARCH==KNOB_ARCH_AVX2
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2
 return 
_mm256_castsi256_ps(_mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in;
 #endif
 #else
@@ -285,7 +285,7 @@ struct PackTraits<16, true>
 __m256i result = _mm256_castsi128_si256(resLo);
 result = _mm256_insertf128_si256(result, resHi, 1);
 return _mm256_castsi256_ps(result);
-#elif KNOB_ARCH==KNOB_ARCH_AVX2
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2
 return 
_mm256_castsi256_ps(_mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in;
 #endif
 #else
diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h 
b/src/gallium/drivers/swr/rasterizer/core/knobs.h
index 55a22a6..2629276 100644
--- a/src/gallium/drivers/swr/rasterizer/core/knobs.h
+++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h
@@ -52,11 +52,16 @@
 #define KNOB_SIMD_WIDTH 8
 #define KNOB_SIMD_BYTES 32
 #elif (KNOB_ARCH == KNOB_ARCH_AVX512)
-#define KNOB_ARCH_ISA AVX512F
-#define KNOB_ARCH_STR "AVX512"
-#define KNOB_SIMD_WIDTH 16
-#define KNOB_SIMD_BYTES 64
-#error "AVX512 not yet supported"
+#define KNOB_ARCH_ISA AVX2
+#define KNOB_ARCH_STR "AVX2"
+#define KNOB_SIMD_WIDTH 8
+#define KNOB_SIMD_BYTES 32
+// Disable AVX512 for now...
+//#define KNOB_ARCH_ISA AVX512F
+//#define KNOB_ARCH_STR "AVX512"
+//#define KNOB_SIMD_WIDTH 16
+//#define KNOB_SIMD_BYTES 64
+//#error "AVX512 not yet supported"
 #else
 #error "Unknown architecture"
 #endif
diff --git a/src/gallium/drivers/swr/rasterizer/memory/Convert.h 
b/src/gallium/drivers/swr/rasterizer/memory/Convert.h
index 42b973c..b790d35 100644
--- a/src/gallium/drivers/swr/rasterizer/memory/Convert.h
+++ b/src/gallium/drivers/swr/rasterizer/memory/Convert.h
@@ -336,7 +336,7 @@ static void ConvertPixelFromFloat(
 // Convert from 32-bit float to 16-bit float using _mm_cvtps_ph
 // @todo 16bit 

Re: [Mesa-dev] [PATCH 03/14] swr: [rasterizer] add support for building avx512 version

2016-06-20 Thread Chuck Atkins
Doesn't this also need corresponding compiler flags in configure.ac to
populate SWR_AVX512_CXXFLAGS?

- Chuck

On Fri, Jun 17, 2016 at 3:25 PM, Tim Rowley 
wrote:

> Currently, most code paths between AVX2 and AVX512 are identical
> (see changes to knobs.h).
> ---
>  src/gallium/drivers/swr/rasterizer/common/simdintrin.h  |  4 ++--
>  src/gallium/drivers/swr/rasterizer/core/format_types.h  |  8 
>  src/gallium/drivers/swr/rasterizer/core/knobs.h | 15
> ++-
>  src/gallium/drivers/swr/rasterizer/memory/Convert.h |  4 ++--
>  src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp |  4 ++--
>  5 files changed, 20 insertions(+), 15 deletions(-)
>
> diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
> b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
> index 5ec1f71..cc29b5d 100644
> --- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
> +++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
> @@ -1002,7 +1002,7 @@ static INLINE simdscalar _simd_abs_ps(simdscalar a)
>  INLINE
>  UINT pdep_u32(UINT a, UINT mask)
>  {
> -#if KNOB_ARCH==KNOB_ARCH_AVX2
> +#if KNOB_ARCH >= KNOB_ARCH_AVX2
>  return _pdep_u32(a, mask);
>  #else
>  UINT result = 0;
> @@ -1035,7 +1035,7 @@ UINT pdep_u32(UINT a, UINT mask)
>  INLINE
>  UINT pext_u32(UINT a, UINT mask)
>  {
> -#if KNOB_ARCH==KNOB_ARCH_AVX2
> +#if KNOB_ARCH >= KNOB_ARCH_AVX2
>  return _pext_u32(a, mask);
>  #else
>  UINT result = 0;
> diff --git a/src/gallium/drivers/swr/rasterizer/core/format_types.h
> b/src/gallium/drivers/swr/rasterizer/core/format_types.h
> index afb6337..6612c83 100644
> --- a/src/gallium/drivers/swr/rasterizer/core/format_types.h
> +++ b/src/gallium/drivers/swr/rasterizer/core/format_types.h
> @@ -98,7 +98,7 @@ struct PackTraits<8, false>
>  __m256i result = _mm256_castsi128_si256(resLo);
>  result = _mm256_insertf128_si256(result, resHi, 1);
>  return _mm256_castsi256_ps(result);
> -#elif KNOB_ARCH==KNOB_ARCH_AVX2
> +#elif KNOB_ARCH>=KNOB_ARCH_AVX2
>  return
> _mm256_castsi256_ps(_mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in;
>  #endif
>  #else
> @@ -161,7 +161,7 @@ struct PackTraits<8, true>
>  __m256i result = _mm256_castsi128_si256(resLo);
>  result = _mm256_insertf128_si256(result, resHi, 1);
>  return _mm256_castsi256_ps(result);
> -#elif KNOB_ARCH==KNOB_ARCH_AVX2
> +#elif KNOB_ARCH>=KNOB_ARCH_AVX2
>  return
> _mm256_castsi256_ps(_mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in;
>  #endif
>  #else
> @@ -223,7 +223,7 @@ struct PackTraits<16, false>
>  __m256i result = _mm256_castsi128_si256(resLo);
>  result = _mm256_insertf128_si256(result, resHi, 1);
>  return _mm256_castsi256_ps(result);
> -#elif KNOB_ARCH==KNOB_ARCH_AVX2
> +#elif KNOB_ARCH>=KNOB_ARCH_AVX2
>  return
> _mm256_castsi256_ps(_mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in;
>  #endif
>  #else
> @@ -285,7 +285,7 @@ struct PackTraits<16, true>
>  __m256i result = _mm256_castsi128_si256(resLo);
>  result = _mm256_insertf128_si256(result, resHi, 1);
>  return _mm256_castsi256_ps(result);
> -#elif KNOB_ARCH==KNOB_ARCH_AVX2
> +#elif KNOB_ARCH>=KNOB_ARCH_AVX2
>  return
> _mm256_castsi256_ps(_mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in;
>  #endif
>  #else
> diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h
> b/src/gallium/drivers/swr/rasterizer/core/knobs.h
> index 55a22a6..2629276 100644
> --- a/src/gallium/drivers/swr/rasterizer/core/knobs.h
> +++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h
> @@ -52,11 +52,16 @@
>  #define KNOB_SIMD_WIDTH 8
>  #define KNOB_SIMD_BYTES 32
>  #elif (KNOB_ARCH == KNOB_ARCH_AVX512)
> -#define KNOB_ARCH_ISA AVX512F
> -#define KNOB_ARCH_STR "AVX512"
> -#define KNOB_SIMD_WIDTH 16
> -#define KNOB_SIMD_BYTES 64
> -#error "AVX512 not yet supported"
> +#define KNOB_ARCH_ISA AVX2
> +#define KNOB_ARCH_STR "AVX2"
> +#define KNOB_SIMD_WIDTH 8
> +#define KNOB_SIMD_BYTES 32
> +// Disable AVX512 for now...
> +//#define KNOB_ARCH_ISA AVX512F
> +//#define KNOB_ARCH_STR "AVX512"
> +//#define KNOB_SIMD_WIDTH 16
> +//#define KNOB_SIMD_BYTES 64
> +//#error "AVX512 not yet supported"
>  #else
>  #error "Unknown architecture"
>  #endif
> diff --git a/src/gallium/drivers/swr/rasterizer/memory/Convert.h
> b/src/gallium/drivers/swr/rasterizer/memory/Convert.h
> index 42b973c..b790d35 100644
> --- a/src/gallium/drivers/swr/rasterizer/memory/Convert.h
> +++ b/src/gallium/drivers/swr/rasterizer/memory/Convert.h
> @@ -336,7 +336,7 @@ static void ConvertPixelFromFloat(
>  // Convert from 32-bit float to 16-bit float using
> _mm_cvtps_ph
>  // @todo 16bit float instruction support is orthogonal to
> avx support.  need to
>  // add check for F16C support instead.
> -#if KNOB_ARCH == KNOB_ARCH_AVX2
> 

[Mesa-dev] [PATCH 03/14] swr: [rasterizer] add support for building avx512 version

2016-06-17 Thread Tim Rowley
Currently, most code paths between AVX2 and AVX512 are identical
(see changes to knobs.h).
---
 src/gallium/drivers/swr/rasterizer/common/simdintrin.h  |  4 ++--
 src/gallium/drivers/swr/rasterizer/core/format_types.h  |  8 
 src/gallium/drivers/swr/rasterizer/core/knobs.h | 15 ++-
 src/gallium/drivers/swr/rasterizer/memory/Convert.h |  4 ++--
 src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp |  4 ++--
 5 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h 
b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
index 5ec1f71..cc29b5d 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
@@ -1002,7 +1002,7 @@ static INLINE simdscalar _simd_abs_ps(simdscalar a)
 INLINE
 UINT pdep_u32(UINT a, UINT mask)
 {
-#if KNOB_ARCH==KNOB_ARCH_AVX2
+#if KNOB_ARCH >= KNOB_ARCH_AVX2
 return _pdep_u32(a, mask);
 #else
 UINT result = 0;
@@ -1035,7 +1035,7 @@ UINT pdep_u32(UINT a, UINT mask)
 INLINE
 UINT pext_u32(UINT a, UINT mask)
 {
-#if KNOB_ARCH==KNOB_ARCH_AVX2
+#if KNOB_ARCH >= KNOB_ARCH_AVX2
 return _pext_u32(a, mask);
 #else
 UINT result = 0;
diff --git a/src/gallium/drivers/swr/rasterizer/core/format_types.h 
b/src/gallium/drivers/swr/rasterizer/core/format_types.h
index afb6337..6612c83 100644
--- a/src/gallium/drivers/swr/rasterizer/core/format_types.h
+++ b/src/gallium/drivers/swr/rasterizer/core/format_types.h
@@ -98,7 +98,7 @@ struct PackTraits<8, false>
 __m256i result = _mm256_castsi128_si256(resLo);
 result = _mm256_insertf128_si256(result, resHi, 1);
 return _mm256_castsi256_ps(result);
-#elif KNOB_ARCH==KNOB_ARCH_AVX2
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2
 return 
_mm256_castsi256_ps(_mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in;
 #endif
 #else
@@ -161,7 +161,7 @@ struct PackTraits<8, true>
 __m256i result = _mm256_castsi128_si256(resLo);
 result = _mm256_insertf128_si256(result, resHi, 1);
 return _mm256_castsi256_ps(result);
-#elif KNOB_ARCH==KNOB_ARCH_AVX2
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2
 return 
_mm256_castsi256_ps(_mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in;
 #endif
 #else
@@ -223,7 +223,7 @@ struct PackTraits<16, false>
 __m256i result = _mm256_castsi128_si256(resLo);
 result = _mm256_insertf128_si256(result, resHi, 1);
 return _mm256_castsi256_ps(result);
-#elif KNOB_ARCH==KNOB_ARCH_AVX2
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2
 return 
_mm256_castsi256_ps(_mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in;
 #endif
 #else
@@ -285,7 +285,7 @@ struct PackTraits<16, true>
 __m256i result = _mm256_castsi128_si256(resLo);
 result = _mm256_insertf128_si256(result, resHi, 1);
 return _mm256_castsi256_ps(result);
-#elif KNOB_ARCH==KNOB_ARCH_AVX2
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2
 return 
_mm256_castsi256_ps(_mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in;
 #endif
 #else
diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h 
b/src/gallium/drivers/swr/rasterizer/core/knobs.h
index 55a22a6..2629276 100644
--- a/src/gallium/drivers/swr/rasterizer/core/knobs.h
+++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h
@@ -52,11 +52,16 @@
 #define KNOB_SIMD_WIDTH 8
 #define KNOB_SIMD_BYTES 32
 #elif (KNOB_ARCH == KNOB_ARCH_AVX512)
-#define KNOB_ARCH_ISA AVX512F
-#define KNOB_ARCH_STR "AVX512"
-#define KNOB_SIMD_WIDTH 16
-#define KNOB_SIMD_BYTES 64
-#error "AVX512 not yet supported"
+#define KNOB_ARCH_ISA AVX2
+#define KNOB_ARCH_STR "AVX2"
+#define KNOB_SIMD_WIDTH 8
+#define KNOB_SIMD_BYTES 32
+// Disable AVX512 for now...
+//#define KNOB_ARCH_ISA AVX512F
+//#define KNOB_ARCH_STR "AVX512"
+//#define KNOB_SIMD_WIDTH 16
+//#define KNOB_SIMD_BYTES 64
+//#error "AVX512 not yet supported"
 #else
 #error "Unknown architecture"
 #endif
diff --git a/src/gallium/drivers/swr/rasterizer/memory/Convert.h 
b/src/gallium/drivers/swr/rasterizer/memory/Convert.h
index 42b973c..b790d35 100644
--- a/src/gallium/drivers/swr/rasterizer/memory/Convert.h
+++ b/src/gallium/drivers/swr/rasterizer/memory/Convert.h
@@ -336,7 +336,7 @@ static void ConvertPixelFromFloat(
 // Convert from 32-bit float to 16-bit float using _mm_cvtps_ph
 // @todo 16bit float instruction support is orthogonal to avx 
support.  need to
 // add check for F16C support instead.
-#if KNOB_ARCH == KNOB_ARCH_AVX2
+#if KNOB_ARCH >= KNOB_ARCH_AVX2
 __m128 src128 = _mm_set1_ps(src);
 __m128i srci128 = _mm_cvtps_ph(src128, _MM_FROUND_TRUNC);
 UINT value = _mm_extract_epi16(srci128, 0);
@@ -519,7 +519,7 @@ INLINE static void ConvertPixelToFloat(
 float dst;
 if (FormatTraits::GetBPC(comp) == 16)
 {
-#if KNOB_ARCH == KNOB_ARCH_AVX2
+#if KNOB_ARCH >=