Add V8QImode and V4QImode vector shift patterns that call into ix86_expand_vecop_qihi_partial. Generate special sequences for constant count operands.
The patch regresses g++.dg/pr91838.C - as explained in PR91838, the test returns different results, depending on whether V8QImode shift pattern is present in target *.md files. The tree optimizers produce: V f (V x) { V _2; <bb 2> [local count: 1073741824]: _2 = x_1(D) >> 8; return _2; } and without the named expander: V f (V x) { <bb 2> [local count: 1073741824]: return { 0, 0, 0, 0, 0, 0, 0, 0 }; } RTL part just expands from there. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_expand_vecop_qihi_partial): Call ix86_expand_vec_shift_qihi_constant for shifts with constant count operand. * config/i386/i386.cc (ix86_shift_rotate_cost): Handle V4QImode and V8QImode. * config/i386/mmx.md (<insn>v8qi3): New insn pattern. (<insn>v4qi3): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/vect-shiftv4qi.c: New test. * gcc.target/i386/vect-shiftv8qi.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Uros.
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 50d9d34ebcb..ff3d382f1b4 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -23294,6 +23294,16 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2) else qop2 = op2; + qdest = gen_reg_rtx (V16QImode); + + if (CONST_INT_P (op2) + && (code == ASHIFT || code == LSHIFTRT || code == ASHIFTRT) + && ix86_expand_vec_shift_qihi_constant (code, qdest, qop1, qop2)) + { + emit_move_insn (dest, gen_lowpart (qimode, qdest)); + return; + } + switch (code) { case MULT: @@ -23358,8 +23368,6 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2) bool ok; int i; - qdest = gen_reg_rtx (V16QImode); - /* Merge the data back into the right place. */ d.target = qdest; d.op0 = qres; diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 38125ce284a..2710c6dfc56 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -20580,6 +20580,37 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, switch (mode) { + case V4QImode: + case V8QImode: + if (TARGET_AVX2) + /* Use vpbroadcast. */ + extra = cost->sse_op; + else + extra = cost->sse_load[2]; + + if (constant_op1) + { + if (code == ASHIFTRT) + { + count = 4; + extra *= 2; + } + else + count = 2; + } + else if (TARGET_AVX512BW && TARGET_AVX512VL) + { + count = 3; + return ix86_vec_cost (mode, cost->sse_op * count); + } + else if (TARGET_SSE4_1) + count = 4; + else if (code == ASHIFTRT) + count = 5; + else + count = 4; + return ix86_vec_cost (mode, cost->sse_op * count) + extra; + case V16QImode: if (TARGET_XOP) { @@ -20600,7 +20631,12 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, } /* FALLTHRU */ case V32QImode: - extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3]; + if (TARGET_AVX2) + /* Use vpbroadcast. */ + extra = cost->sse_op; + else + extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3]; + if (constant_op1) { if (code == ASHIFTRT) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 45773673049..a37bbbb811f 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2680,6 +2680,28 @@ (const_string "0"))) (set_attr "mode" "TI")]) +(define_expand "<insn>v8qi3" + [(set (match_operand:V8QI 0 "register_operand") + (any_shift:V8QI (match_operand:V8QI 1 "register_operand") + (match_operand:DI 2 "nonmemory_operand")))] + "TARGET_MMX_WITH_SSE" +{ + ix86_expand_vecop_qihi_partial (<CODE>, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "<insn>v4qi3" + [(set (match_operand:V4QI 0 "register_operand") + (any_shift:V4QI (match_operand:V4QI 1 "register_operand") + (match_operand:DI 2 "nonmemory_operand")))] + "TARGET_SSE2" +{ + ix86_expand_vecop_qihi_partial (<CODE>, operands[0], + operands[1], operands[2]); + DONE; +}) + (define_insn_and_split "<insn>v2qi3" [(set (match_operand:V2QI 0 "register_operand" "=Q") (any_shift:V2QI diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c new file mode 100644 index 00000000000..c06dfb87bd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +#define N 4 + +typedef unsigned char __vu __attribute__ ((__vector_size__ (N))); +typedef signed char __vi __attribute__ ((__vector_size__ (N))); + +__vu sll (__vu a, int n) +{ + return a << n; +} + +__vu sll_c (__vu a) +{ + return a << 5; +} + +/* { dg-final { scan-assembler-times "psllw" 2 } } */ + +__vu srl (__vu a, int n) +{ + return a >> n; +} + +__vu srl_c (__vu a) +{ + return a >> 5; +} + +/* { dg-final { scan-assembler-times "psrlw" 2 } } */ + +__vi sra (__vi a, int n) +{ + return a >> n; +} + +__vi sra_c (__vi a) +{ + return a >> 5; +} + +/* { dg-final { scan-assembler-times "psraw" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c new file mode 100644 index 00000000000..f5e8925aa25 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c @@ -0,0 +1,43 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +#define N 8 + +typedef unsigned char __vu __attribute__ ((__vector_size__ (N))); +typedef signed char __vi __attribute__ ((__vector_size__ (N))); + +__vu sll (__vu a, int n) +{ + return a << n; +} + +__vu sll_c (__vu a) +{ + return a << 5; +} + +/* { dg-final { scan-assembler-times "psllw" 2 } } */ + +__vu srl (__vu a, int n) +{ + return a >> n; +} + +__vu srl_c (__vu a) +{ + return a >> 5; +} + +/* { dg-final { scan-assembler-times "psrlw" 2 } } */ + +__vi sra (__vi a, int n) +{ + return a >> n; +} + +__vi sra_c (__vi a) +{ + return a >> 5; +} + +/* { dg-final { scan-assembler-times "psraw" 2 } } */