Re: [PATCH i386 11/8] [AVX512] [2/2] Add missing packed PF gathers/scatters.

2014-01-27 Thread Uros Bizjak
On Mon, Jan 27, 2014 at 11:09 AM, Kirill Yukhin  wrote:

>> > (define_expand "avx512pf_scatterpfdf"
>> >   [(unspec
>> >  [(match_operand: 0 "register_or_constm1_operand")
>> >   (mem:DF
>> >(match_par_dup 5
>> >  [(match_operand 2 "vsib_address_operand")
>> >   (match_operand:VI4_256_8_512 1 "register_operand")
>> >   (match_operand:SI 3 "const1248_operand")]))
>> >   (match_operand:SI 4 "const_0_to_1_operand")]
>> >  UNSPEC_SCATTER_PREFETCH)]
>> >
>> > We have this correspondence between, say, main and index modes:
>> >   SF -> (V16SI, V8DI)
>> >   DF -> (V8SI , V8DI)
>>
>> It looks to me that you should use V16SF and V8DF instead of SF and DF
>> modes here.
> I didn't find existing attributes with necessary mapping, so I invented new.
>
>> Other than this, the patch looks OK to me. Please wait a day if Jakub
>> has any remark here.
>
> Patch in the bottom and I'll check it in this evening (MS time) if no 
> objections.
> (will update ChangeLog adding new mode attributes)
>
> --
> Thanks, K
>
>  gcc/config/i386/avx512pfintrin.h   | 113 +++--
>  gcc/config/i386/i386-builtin-types.def |   2 +
>  gcc/config/i386/i386.c |  37 -
>  gcc/config/i386/sse.md | 176 
> +++--
>  gcc/testsuite/gcc.target/i386/avx-1.c  |   4 +
>  .../gcc.target/i386/avx512pf-vgatherpf0dpd-1.c |  15 ++
>  .../gcc.target/i386/avx512pf-vgatherpf0qpd-1.c |  15 ++
>  .../gcc.target/i386/avx512pf-vgatherpf1dpd-1.c |  15 ++
>  .../gcc.target/i386/avx512pf-vgatherpf1qpd-1.c |  15 ++
>  .../gcc.target/i386/avx512pf-vscatterpf0dpd-1.c|  17 ++
>  .../gcc.target/i386/avx512pf-vscatterpf0qpd-1.c|  17 ++
>  .../gcc.target/i386/avx512pf-vscatterpf1dpd-1.c|  17 ++
>  .../gcc.target/i386/avx512pf-vscatterpf1qpd-1.c|  17 ++
>  gcc/testsuite/gcc.target/i386/sse-14.c |   4 +
>  gcc/testsuite/gcc.target/i386/sse-22.c |   5 +
>  gcc/testsuite/gcc.target/i386/sse-23.c |   4 +
>  16 files changed, 442 insertions(+), 31 deletions(-)

> -(define_expand "avx512pf_gatherpf"
> +;; Packed float variants
> +(define_mode_attr GATHER_SCATTER_SF_MEM_MODE
> + [(V8DI "V8SF") (V16SI "V16SF")])
> +(define_mode_attr GATHER_SCATTER_DF_MEM_MODE
> + [(V8DI "V8DF") (V8SI "V8DF")])

You actually don't need this attribute, since it always declares V8DF.
Just use V8DF mode in the patterns instead.

(no need to repost the patch due to this trivial removal).

Uros.


Re: [PATCH i386 11/8] [AVX512] [2/2] Add missing packed PF gathers/scatters.

2014-01-27 Thread Kirill Yukhin
Hello,
On 23 Jan 14:22, Uros Bizjak wrote:
> > (define_expand "avx512pf_scatterpfdf"
> >   [(unspec
> >  [(match_operand: 0 "register_or_constm1_operand")
> >   (mem:DF
> >(match_par_dup 5
> >  [(match_operand 2 "vsib_address_operand")
> >   (match_operand:VI4_256_8_512 1 "register_operand")
> >   (match_operand:SI 3 "const1248_operand")]))
> >   (match_operand:SI 4 "const_0_to_1_operand")]
> >  UNSPEC_SCATTER_PREFETCH)]
> >
> > We have this correspondence between, say, main and index modes:
> >   SF -> (V16SI, V8DI)
> >   DF -> (V8SI , V8DI)
> 
> It looks to me that you should use V16SF and V8DF instead of SF and DF
> modes here.
I didn't find existing attributes with necessary mapping, so I invented new.

> Other than this, the patch looks OK to me. Please wait a day if Jakub
> has any remark here.

Patch in the bottom and I'll check it in this evening (MS time) if no 
objections.
(will update ChangeLog adding new mode attributes)

--
Thanks, K

 gcc/config/i386/avx512pfintrin.h   | 113 +++--
 gcc/config/i386/i386-builtin-types.def |   2 +
 gcc/config/i386/i386.c |  37 -
 gcc/config/i386/sse.md | 176 +++--
 gcc/testsuite/gcc.target/i386/avx-1.c  |   4 +
 .../gcc.target/i386/avx512pf-vgatherpf0dpd-1.c |  15 ++
 .../gcc.target/i386/avx512pf-vgatherpf0qpd-1.c |  15 ++
 .../gcc.target/i386/avx512pf-vgatherpf1dpd-1.c |  15 ++
 .../gcc.target/i386/avx512pf-vgatherpf1qpd-1.c |  15 ++
 .../gcc.target/i386/avx512pf-vscatterpf0dpd-1.c|  17 ++
 .../gcc.target/i386/avx512pf-vscatterpf0qpd-1.c|  17 ++
 .../gcc.target/i386/avx512pf-vscatterpf1dpd-1.c|  17 ++
 .../gcc.target/i386/avx512pf-vscatterpf1qpd-1.c|  17 ++
 gcc/testsuite/gcc.target/i386/sse-14.c |   4 +
 gcc/testsuite/gcc.target/i386/sse-22.c |   5 +
 gcc/testsuite/gcc.target/i386/sse-23.c |   4 +
 16 files changed, 442 insertions(+), 31 deletions(-)

diff --git a/gcc/config/i386/avx512pfintrin.h b/gcc/config/i386/avx512pfintrin.h
index b8c0110..bc7598e 100644
--- a/gcc/config/i386/avx512pfintrin.h
+++ b/gcc/config/i386/avx512pfintrin.h
@@ -48,74 +48,157 @@ typedef unsigned short __mmask16;
 #ifdef __OPTIMIZE__
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_pd (__m256i index, __mmask8 mask,
+  void *addr, int scale, int hint)
+{
+  __builtin_ia32_gatherpfdpd (mask, (__v8si) index, (long long const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_prefetch_i32gather_ps (__m512i index, __mmask16 mask,
-  int const *addr, int scale, int hint)
+  void *addr, int scale, int hint)
+{
+  __builtin_ia32_gatherpfdps (mask, (__v16si) index, (int const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64gather_pd (__m512i index, __mmask8 mask,
+  void *addr, int scale, int hint)
 {
-  __builtin_ia32_gatherpfdps (mask, (__v16si) index, addr, scale, hint);
+  __builtin_ia32_gatherpfqpd (mask, (__v8di) index, (long long const *) addr,
+ scale, hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_prefetch_i64gather_ps (__m512i index, __mmask8 mask,
-  int const *addr, int scale, int hint)
+  void *addr, int scale, int hint)
 {
-  __builtin_ia32_gatherpfqps (mask, (__v8di) index, addr, scale, hint);
+  __builtin_ia32_gatherpfqps (mask, (__v8di) index, (int const *) addr,
+ scale, hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i32scatter_ps (int const *addr, __m512i index, int scale,
+_mm512_prefetch_i32scatter_pd (void *addr, __m256i index, int scale,
   int hint)
 {
-  __builtin_ia32_scatterpfdps ((__mmask16) 0x, (__v16si) index, addr, 
scale,
-  hint);
+  __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) index, 
+  (long long const *)addr, scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32scatter_ps (void *addr, __m512i index, int scale,
+  int hint)
+{
+  __builtin_ia32_scatterpfdps ((__mmask16) 0x, (__v16si) index, (int const 
*) addr,
+  scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline_

Re: [PATCH i386 11/8] [AVX512] [2/2] Add missing packed PF gathers/scatters.

2014-01-23 Thread Uros Bizjak
On Tue, Jan 21, 2014 at 7:52 PM, Kirill Yukhin  wrote:
> Hello,
> This is non-trivial part of the patch.
>
>> On 15 Jan 20:53, Uros Bizjak wrote:
>> On Tue, Jan 14, 2014 at 7:13 AM, Kirill Yukhin  
>> wrote:
>> Did you try to add DF/SF mode to the unspec? I am not familiar with
>> this insn, but shouldn't the mode of mem access be somehow similar to
>> the avx512f_scattersi access?
> avx512f_scattersi is different in its appearence.
> It has explicit type of destination which discriminates SF/DF modes. 
> Prefetches
> has no such.
>
>> Also, you can use double macroization with MODEF iterator for SF and DFmode.
> I think I cannot. Because DF/SF types of the insn incurs different vidx 
> iterators.
> E.g.:
> Currently we have for SF:
> (define_expand "avx512pf_scatterpfsf"
>   [(unspec
>  [(match_operand: 0 "register_or_constm1_operand")
>   (mem:SF
> (match_par_dup 5
>   [(match_operand 2 "vsib_address_operand")
>(match_operand:VI48_512 1 "register_operand")
>(match_operand:SI 3 "const1248_operand")]))
>   (match_operand:SI 4 "const_0_to_1_operand")]
>  UNSPEC_SCATTER_PREFETCH)]
>
> and for DF:
> (define_expand "avx512pf_scatterpfdf"
>   [(unspec
>  [(match_operand: 0 "register_or_constm1_operand")
>   (mem:DF
>(match_par_dup 5
>  [(match_operand 2 "vsib_address_operand")
>   (match_operand:VI4_256_8_512 1 "register_operand")
>   (match_operand:SI 3 "const1248_operand")]))
>   (match_operand:SI 4 "const_0_to_1_operand")]
>  UNSPEC_SCATTER_PREFETCH)]
>
> We have this correspondence between, say, main and index modes:
>   SF -> (V16SI, V8DI)
>   DF -> (V8SI , V8DI)

It looks to me that you should use V16SF and V8DF instead of SF and DF
modes here.

Other than this, the patch looks OK to me. Please wait a day if Jakub
has any remark here.

Thanks,
Uros.


Re: [PATCH i386 11/8] [AVX512] [2/2] Add missing packed PF gathers/scatters.

2014-01-21 Thread Kirill Yukhin
Hello,
This is non-trivial part of the patch.

> On 15 Jan 20:53, Uros Bizjak wrote:
> On Tue, Jan 14, 2014 at 7:13 AM, Kirill Yukhin  
> wrote:
> Did you try to add DF/SF mode to the unspec? I am not familiar with
> this insn, but shouldn't the mode of mem access be somehow similar to
> the avx512f_scattersi access?
avx512f_scattersi is different in its appearence.
It has explicit type of destination which discriminates SF/DF modes. Prefetches
has no such.
 
> Also, you can use double macroization with MODEF iterator for SF and DFmode.
I think I cannot. Because DF/SF types of the insn incurs different vidx 
iterators.
E.g.:
Currently we have for SF:
(define_expand "avx512pf_scatterpfsf"
  [(unspec
 [(match_operand: 0 "register_or_constm1_operand")
  (mem:SF
(match_par_dup 5
  [(match_operand 2 "vsib_address_operand")
   (match_operand:VI48_512 1 "register_operand")
   (match_operand:SI 3 "const1248_operand")]))
  (match_operand:SI 4 "const_0_to_1_operand")]
 UNSPEC_SCATTER_PREFETCH)]

and for DF:
(define_expand "avx512pf_scatterpfdf"
  [(unspec
 [(match_operand: 0 "register_or_constm1_operand")
  (mem:DF
   (match_par_dup 5
 [(match_operand 2 "vsib_address_operand")
  (match_operand:VI4_256_8_512 1 "register_operand")
  (match_operand:SI 3 "const1248_operand")]))
  (match_operand:SI 4 "const_0_to_1_operand")]
 UNSPEC_SCATTER_PREFETCH)]

We have this correspondence between, say, main and index modes:
  SF -> (V16SI, V8DI)
  DF -> (V8SI , V8DI)


I think we should hear from Jaku about sse.md changes and expansion.

Bootstrapped, avx512* and sse-* tests pass.

Comments?

--
Thanks, K

---
 gcc/config/i386/avx512pfintrin.h   | 113 --
 gcc/config/i386/i386-builtin-types.def |   2 +
 gcc/config/i386/i386.c |  37 -
 gcc/config/i386/sse.md | 171 +++--
 gcc/testsuite/gcc.target/i386/avx-1.c  |   4 +
 .../gcc.target/i386/avx512pf-vgatherpf0dpd-1.c |  15 ++
 .../gcc.target/i386/avx512pf-vgatherpf0qpd-1.c |  15 ++
 .../gcc.target/i386/avx512pf-vgatherpf1dpd-1.c |  15 ++
 .../gcc.target/i386/avx512pf-vgatherpf1qpd-1.c |  15 ++
 .../gcc.target/i386/avx512pf-vscatterpf0dpd-1.c|  17 ++
 .../gcc.target/i386/avx512pf-vscatterpf0qpd-1.c|  17 ++
 .../gcc.target/i386/avx512pf-vscatterpf1dpd-1.c|  17 ++
 .../gcc.target/i386/avx512pf-vscatterpf1qpd-1.c|  17 ++
 gcc/testsuite/gcc.target/i386/sse-14.c |   4 +
 gcc/testsuite/gcc.target/i386/sse-22.c |   5 +
 gcc/testsuite/gcc.target/i386/sse-23.c |   4 +
 16 files changed, 437 insertions(+), 31 deletions(-)

diff --git a/gcc/config/i386/avx512pfintrin.h b/gcc/config/i386/avx512pfintrin.h
index b8c0110..bc7598e 100644
--- a/gcc/config/i386/avx512pfintrin.h
+++ b/gcc/config/i386/avx512pfintrin.h
@@ -48,74 +48,157 @@ typedef unsigned short __mmask16;
 #ifdef __OPTIMIZE__
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_pd (__m256i index, __mmask8 mask,
+  void *addr, int scale, int hint)
+{
+  __builtin_ia32_gatherpfdpd (mask, (__v8si) index, (long long const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_prefetch_i32gather_ps (__m512i index, __mmask16 mask,
-  int const *addr, int scale, int hint)
+  void *addr, int scale, int hint)
+{
+  __builtin_ia32_gatherpfdps (mask, (__v16si) index, (int const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64gather_pd (__m512i index, __mmask8 mask,
+  void *addr, int scale, int hint)
 {
-  __builtin_ia32_gatherpfdps (mask, (__v16si) index, addr, scale, hint);
+  __builtin_ia32_gatherpfqpd (mask, (__v8di) index, (long long const *) addr,
+ scale, hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_prefetch_i64gather_ps (__m512i index, __mmask8 mask,
-  int const *addr, int scale, int hint)
+  void *addr, int scale, int hint)
 {
-  __builtin_ia32_gatherpfqps (mask, (__v8di) index, addr, scale, hint);
+  __builtin_ia32_gatherpfqps (mask, (__v8di) index, (int const *) addr,
+ scale, hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i32scatter_ps (int const *addr, __m512i index, int scale,
+_mm512_prefetch_i32scatter_pd (void *addr, __m256i index, int scale,