With the attachment this time :-) Regards VP
On Tue, Aug 20, 2013 at 04:01:59PM +0100, Vidya Praveen wrote: > Hello, > > This patch supports SISD shift instructions SHL/USHR/SSHR/SSHL/USHL for > SImode and DImode. This patch also refactors the integer shifts pattern > "<optab><mode>3_insn". Pattern for rotate is moved out as ror<mode>3_insn. > > Shift patterns (aarch64_{lshr|ashl|ashr}_sisd_or_int_{si|di}3) support > both SIMD registers and general purpose registers with the shift quantity > either as variable or literal. Since there are no SISD instructions for > right shifts, the instruction SSHL and USHL are used with shift operand > negated using NEG in order reverse the direction. This is done by > insisting on splitting (after reload) in to neg and UNSPEC_SISD_USHL or > UNSPEC_SISD_SSHL or UNSPEC_USHL_S2 or UNSPEC_SSHL_S2 pattern. Since there > are no SISD variants of shift instructions available for SImode, the SIMD > variants of corresponsing instructions are used with 2S size by taking > one lane alone in to cosideration and ignoring other. > > This patch also introduces a predicate aarch64_simd_register to help in > splitting patterns. Tests for both newly introduced instructions as well > as for the integer instructions are included. > > Tested and no new regressions. > > OK for trunk? > > Regards > VP > > --- > > gcc/ChangeLog > > 2013-08-20 Vidya Praveen <vidyaprav...@arm.com> > > * config/aarch64/aarch64.md (unspec): Add UNSPEC_SISD_SSHL, > UNSPEC_SISD_USHL, UNSPEC_USHL_2S, UNSPEC_SSHL_2S, UNSPEC_SISD_NEG. > (<optab><mode>3_insn): Remove. > (aarch64_ashl_sisd_or_int_<mode>3): New Pattern. > (aarch64_lshr_sisd_or_int_<mode>3): Likewise. > (aarch64_ashr_sisd_or_int_<mode>3): Likewise. > (define_split for aarch64_lshr_sisd_or_int_di3): Likewise. > (define_split for aarch64_lshr_sisd_or_int_si3): Likewise. > (define_split for aarch64_ashr_sisd_or_int_di3): Likewise. > (define_split for aarch64_ashr_sisd_or_int_si3): Likewise. > (aarch64_sisd_ushl, aarch64_sisd_sshl): Likewise. > (aarch64_ushl_2s, aarch64_sshl_2s, aarch64_sisd_neg_qi): Likewise. > (ror<mode>3_insn): Likewise. > * config/aarch64/predicates.md (aarch64_simd_register): New. > > gcc/testsuite/ChangeLog > > 2013-08-20 Vidya Praveen <vidyaprav...@arm.com> > > * gcc.target/aarch64/scalar_shift_1.c: New. > >
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 5312a79..07349c6 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -88,11 +88,16 @@ UNSPEC_NOP UNSPEC_PRLG_STK UNSPEC_RBIT + UNSPEC_SISD_NEG + UNSPEC_SISD_SSHL + UNSPEC_SISD_USHL + UNSPEC_SSHL_2S UNSPEC_ST2 UNSPEC_ST3 UNSPEC_ST4 UNSPEC_TLS UNSPEC_TLSDESC + UNSPEC_USHL_2S UNSPEC_VSTRUCTDUMMY ]) @@ -3183,13 +3188,182 @@ } ) -(define_insn "*<optab><mode>3_insn" +;; Logical left shift using SISD or Integer instruction +(define_insn "*aarch64_ashl_sisd_or_int_<mode>3" + [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + (ashift:GPI + (match_operand:GPI 1 "register_operand" "w,w,r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))] + "" + "@ + shl\t%<rtn>0<vas>, %<rtn>1<vas>, %2 + ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas> + lsl\t%<w>0, %<w>1, %<w>2" + [(set_attr "simd" "yes,yes,no") + (set_attr "simd_type" "simd_shift_imm,simd_shift,*") + (set_attr "simd_mode" "<MODE>,<MODE>,*") + (set_attr "v8type" "*,*,shift") + (set_attr "type" "*,*,shift") + (set_attr "mode" "*,*,<MODE>")] +) + +;; Logical right shift using SISD or Integer instruction +(define_insn "*aarch64_lshr_sisd_or_int_<mode>3" + [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + (lshiftrt:GPI + (match_operand:GPI 1 "register_operand" "w,w,r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))] + "" + "@ + ushr\t%<rtn>0<vas>, %<rtn>1<vas>, %2 + # + lsr\t%<w>0, %<w>1, %<w>2" + [(set_attr "simd" "yes,yes,no") + (set_attr "simd_type" "simd_shift_imm,simd_shift,*") + (set_attr "simd_mode" "<MODE>,<MODE>,*") + (set_attr "v8type" "*,*,shift") + (set_attr "type" "*,*,shift") + (set_attr "mode" "*,*,<MODE>")] +) + +(define_split + [(set (match_operand:DI 0 "aarch64_simd_register") + (lshiftrt:DI + (match_operand:DI 1 "aarch64_simd_register") + (match_operand:QI 2 "aarch64_simd_register")))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 2) + (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) + (set (match_dup 0) + (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_USHL))] + "" +) + +(define_split + [(set (match_operand:SI 0 "aarch64_simd_register") + (lshiftrt:SI + (match_operand:SI 1 "aarch64_simd_register") + (match_operand:QI 2 "aarch64_simd_register")))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 2) + (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) + (set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_USHL_2S))] + "" +) + +;; Arithmetic right shift using SISD or Integer instruction +(define_insn "*aarch64_ashr_sisd_or_int_<mode>3" + [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + (ashiftrt:GPI + (match_operand:GPI 1 "register_operand" "w,w,r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,rUs<cmode>")))] + "" + "@ + sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2 + # + asr\t%<w>0, %<w>1, %<w>2" + [(set_attr "simd" "yes,yes,no") + (set_attr "simd_type" "simd_shift_imm,simd_shift,*") + (set_attr "simd_mode" "<MODE>,<MODE>,*") + (set_attr "v8type" "*,*,shift") + (set_attr "type" "*,*,shift") + (set_attr "mode" "*,*,<MODE>")] +) + +(define_split + [(set (match_operand:DI 0 "aarch64_simd_register") + (ashiftrt:DI + (match_operand:DI 1 "aarch64_simd_register") + (match_operand:QI 2 "aarch64_simd_register")))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 2) + (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) + (set (match_dup 0) + (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_SSHL))] + "" +) + +(define_split + [(set (match_operand:SI 0 "aarch64_simd_register") + (ashiftrt:SI + (match_operand:SI 1 "aarch64_simd_register") + (match_operand:QI 2 "aarch64_simd_register")))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 2) + (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) + (set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SSHL_2S))] + "" +) + +(define_insn "*aarch64_sisd_ushl" + [(set (match_operand:DI 0 "register_operand" "=w") + (unspec:DI [(match_operand:DI 1 "register_operand" "w") + (match_operand:QI 2 "register_operand" "w")] + UNSPEC_SISD_USHL))] + "TARGET_SIMD" + "ushl\t%d0, %d1, %d2" + [(set_attr "simd" "yes") + (set_attr "simd_type" "simd_shift") + (set_attr "simd_mode" "DI")] +) + +(define_insn "*aarch64_ushl_2s" + [(set (match_operand:SI 0 "register_operand" "=w") + (unspec:SI [(match_operand:SI 1 "register_operand" "w") + (match_operand:QI 2 "register_operand" "w")] + UNSPEC_USHL_2S))] + "TARGET_SIMD" + "ushl\t%0.2s, %1.2s, %2.2s" + [(set_attr "simd" "yes") + (set_attr "simd_type" "simd_shift") + (set_attr "simd_mode" "DI")] +) + +(define_insn "*aarch64_sisd_sshl" + [(set (match_operand:DI 0 "register_operand" "=w") + (unspec:DI [(match_operand:DI 1 "register_operand" "w") + (match_operand:QI 2 "register_operand" "w")] + UNSPEC_SISD_SSHL))] + "TARGET_SIMD" + "sshl\t%d0, %d1, %d2" + [(set_attr "simd" "yes") + (set_attr "simd_type" "simd_shift") + (set_attr "simd_mode" "DI")] +) + +(define_insn "*aarch64_sshl_2s" + [(set (match_operand:SI 0 "register_operand" "=w") + (unspec:SI [(match_operand:SI 1 "register_operand" "w") + (match_operand:QI 2 "register_operand" "w")] + UNSPEC_SSHL_2S))] + "TARGET_SIMD" + "sshl\t%0.2s, %1.2s, %2.2s" + [(set_attr "simd" "yes") + (set_attr "simd_type" "simd_shift") + (set_attr "simd_mode" "DI")] +) + +(define_insn "*aarch64_sisd_neg_qi" + [(set (match_operand:QI 0 "register_operand" "=w") + (unspec:QI [(match_operand:QI 1 "register_operand" "w")] + UNSPEC_SISD_NEG))] + "TARGET_SIMD" + "neg\t%d0, %d1" + [(set_attr "simd" "yes") + (set_attr "simd_type" "simd_negabs") + (set_attr "simd_mode" "QI")] +) + +;; Rotate right +(define_insn "*ror<mode>3_insn" [(set (match_operand:GPI 0 "register_operand" "=r") - (SHIFT:GPI - (match_operand:GPI 1 "register_operand" "r") - (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>")))] + (rotatert:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>")))] "" - "<shift>\\t%<w>0, %<w>1, %<w>2" + "ror\\t%<w>0, %<w>1, %<w>2" [(set_attr "v8type" "shift") (set_attr "type" "shift") (set_attr "mode" "<MODE>")] diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 3e2b6b3..dbc9082 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -26,6 +26,11 @@ && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC")))) ) +(define_predicate "aarch64_simd_register" + (and (match_code "reg") + (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS") + (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_REGS")))) + (define_predicate "aarch64_reg_or_zero" (and (match_code "reg,subreg,const_int") (ior (match_operand 0 "register_operand") diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c b/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c new file mode 100644 index 0000000..7cb17f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c @@ -0,0 +1,263 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fno-inline -save-temps" } */ + +extern void abort (); + +#define force_simd_di(v) asm volatile ("mov %d0, %1.d[0]" :"=w" (v) :"w" (v) :) +#define force_simd_si(v) asm volatile ("mov %s0, %1.s[0]" :"=w" (v) :"w" (v) :) + +typedef unsigned long long int UInt64x1; +typedef long long int Int64x1; +typedef unsigned int UInt32x1; +typedef int Int32x1; + +UInt64x1 +test_lshift_left_sisd_di (UInt64x1 b, UInt64x1 c) +{ + UInt64x1 a; + + force_simd_di (b); + force_simd_di (c); + a = b << 8; + a = a << c; + force_simd_di (a); + return a; +} +/* { dg-final { scan-assembler "shl\td\[0-9\]+,\ d\[0-9\]+,\ 8" } } */ +/* { dg-final { scan-assembler "ushl\td\[0-9\]+,\ d\[0-9\]+,\ d\[0-9\]+" } } */ + +UInt32x1 +test_lshift_left_sisd_si (UInt32x1 b, UInt32x1 c) +{ + UInt32x1 a; + + force_simd_si (b); + force_simd_si (c); + a = b << 4; + a = a << c; + force_simd_si (a); + return a; +} +/* { dg-final { scan-assembler "shl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 4" } } */ +/* "ushl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" (counted later) */ + +UInt64x1 +test_lshift_right_sisd_di (UInt64x1 b, UInt64x1 c) +{ + UInt64x1 a; + + force_simd_di (b); + force_simd_di (c); + a = b >> 8; + a = a >> c; + force_simd_di (a); + return a; +} +/* { dg-final { scan-assembler "ushr\td\[0-9\]+,\ d\[0-9\]+,\ 8" } } */ +/* "neg\td\[0-9\]+,\ d\[0-9\]+" (counted later) */ +/* { dg-final { scan-assembler "ushl\td\[0-9\]+,\ d\[0-9\]+,\ d\[0-9\]+" } } */ + +UInt64x1 +test_lshift_right_sisd_si (UInt32x1 b, UInt32x1 c) +{ + UInt32x1 a; + + force_simd_si (b); + force_simd_si (c); + a = b >> 4; + a = a >> c; + force_simd_si (a); + return a; +} +/* { dg-final { scan-assembler "ushr\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 4" } } */ +/* "neg\td\[0-9\]+,\ d\[0-9\]+" (counted later) */ +/* { dg-final { scan-assembler-times "ushl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" 2 } } */ + +Int64x1 +test_ashift_right_sisd_di (Int64x1 b, Int64x1 c) +{ + Int64x1 a; + + force_simd_di (b); + force_simd_di (c); + a = b >> 8; + a = a >> c; + force_simd_di (a); + return a; +} +/* { dg-final { scan-assembler "sshr\td\[0-9\]+,\ d\[0-9\]+,\ 8" } } */ +/* "neg\td\[0-9\]+,\ d\[0-9\]+" (counted later) */ +/* { dg-final { scan-assembler "sshl\td\[0-9\]+,\ d\[0-9\]+,\ d\[0-9\]+" } } */ + +Int32x1 +test_ashift_right_sisd_si (Int32x1 b, Int32x1 c) +{ + Int32x1 a; + + force_simd_si (b); + force_simd_si (c); + a = b >> 4; + a = a >> c; + force_simd_si (a); + return a; +} +/* { dg-final { scan-assembler "sshr\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 4" } } */ +/* { dg-final { scan-assembler-times "neg\td\[0-9\]+,\ d\[0-9\]+" 4 } } */ +/* { dg-final { scan-assembler "sshl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" } } */ + + +/* The following are to make sure if the integer instructions lsl/lsr/asr are + generated in non-vector scenarios */ + +UInt64x1 +test_lshift_left_int_di (UInt64x1 b, UInt64x1 c) +{ + UInt64x1 a; + + a = b << 8; + a = a << c; + return a; +} +/* { dg-final { scan-assembler "lsl\tx\[0-9\]+,\ x\[0-9\]+,\ 8" } } */ +/* { dg-final { scan-assembler "lsl\tx\[0-9\]+,\ x\[0-9\]+,\ x\[0-9\]+" } } */ + +UInt32x1 +test_lshift_left_int_si (UInt32x1 b, UInt32x1 c) +{ + UInt32x1 a; + + a = b << 4; + a = a << c; + return a; +} +/* { dg-final { scan-assembler "lsl\tw\[0-9\]+,\ w\[0-9\]+,\ 4" } } */ +/* { dg-final { scan-assembler "lsl\tw\[0-9\]+,\ w\[0-9\]+,\ w\[0-9\]+" } } */ + +UInt64x1 +test_lshift_right_int_di (UInt64x1 b, UInt64x1 c) +{ + UInt64x1 a; + + a = b >> 8; + a = a >> c; + return a; +} +/* { dg-final { scan-assembler "lsr\tx\[0-9\]+,\ x\[0-9\]+,\ 8" } } */ +/* { dg-final { scan-assembler "lsr\tx\[0-9\]+,\ x\[0-9\]+,\ x\[0-9\]+" } } */ + +UInt32x1 +test_lshift_right_int_si (UInt32x1 b, UInt32x1 c) +{ + UInt32x1 a; + + a = b >> 4; + a = a >> c; + return a; +} +/* { dg-final { scan-assembler "lsr\tw\[0-9\]+,\ w\[0-9\]+,\ 4" } } */ +/* { dg-final { scan-assembler "lsr\tw\[0-9\]+,\ w\[0-9\]+,\ w\[0-9\]+" } } */ + +Int64x1 +test_ashift_right_int_di (Int64x1 b, Int64x1 c) +{ + Int64x1 a; + + a = b >> 8; + a = a >> c; + return a; +} +/* { dg-final { scan-assembler "asr\tx\[0-9\]+,\ x\[0-9\]+,\ 8" } } */ +/* { dg-final { scan-assembler "asr\tx\[0-9\]+,\ x\[0-9\]+,\ x\[0-9\]+" } } */ + +Int32x1 +test_ashift_right_int_si (Int32x1 b, Int32x1 c) +{ + Int32x1 a; + + a = b >> 4; + a = a >> c; + return a; +} +/* { dg-final { scan-assembler "asr\tw\[0-9\]+,\ w\[0-9\]+,\ 4" } } */ +/* { dg-final { scan-assembler "asr\tw\[0-9\]+,\ w\[0-9\]+,\ w\[0-9\]+" } } */ + +Int64x1 +test_corners_sisd_di (Int64x1 b) +{ + force_simd_di (b); + b = b >> 63; + b = b >> 0; + b += b >> 65; /* { dg-warning "right shift count >= width of type" } */ + force_simd_di (b); + + return b; +} +/* { dg-final { scan-assembler "sshr\td\[0-9\]+,\ d\[0-9\]+,\ 63" } } */ +/* { dg-final { scan-assembler "shl\td\[0-9\]+,\ d\[0-9\]+,\ 1" } } */ + +Int32x1 +test_corners_sisd_si (Int32x1 b) +{ + force_simd_si (b); + b = b >> 31; + b = b >> 0; + b += b >> 33; /* { dg-warning "right shift count >= width of type" } */ + force_simd_si (b); + + return b; +} +/* { dg-final { scan-assembler "sshr\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 31" } } */ +/* { dg-final { scan-assembler "shl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 1" } } */ + + + +#define CHECK(var,val) \ +do \ + { \ + if (var != val) \ + abort(); \ + } \ +while(0) + +UInt64x1 x = 0xC01dDeadBeefFaceull; +UInt32x1 y = 0xDeadBeef; + +int +main () +{ + x = test_lshift_left_sisd_di (x, 8); + CHECK (x, 0xdeadbeefface0000ull); + x = test_lshift_right_int_di (x, 8); + CHECK (x, 0x0000deadbeeffaceull); + x = test_lshift_right_sisd_di (x, 8); + CHECK (x, 0x00000000deadbeefull); + x = test_lshift_left_int_di (x, 8); + CHECK (x, 0x0000deadbeef0000ull); + x = ~x; + x = test_ashift_right_int_di (x, 8); + CHECK (x, 0xffffffff21524110ull); + x = test_ashift_right_sisd_di (x, 8); + CHECK (x, 0xffffffffffff2152ull); + x = test_corners_sisd_di (x); + CHECK (x, 0xfffffffffffffffeull); + + y = test_lshift_left_sisd_si (y, 4); + CHECK (y, 0xadbeef00); + y = test_lshift_right_int_si (y, 4); + CHECK (y, 0x00adbeef); + y = test_lshift_right_sisd_si (y, 4); + CHECK (y, 0x0000adbe); + y = test_lshift_left_int_si (y, 4); + CHECK (y, 0x00adbe00); + y = ~y; + y = test_ashift_right_int_si (y, 4); + CHECK (y, 0xffff5241); + y = test_ashift_right_sisd_si (y, 4); + CHECK (y, 0xffffff52); + y = test_corners_sisd_si (y); + CHECK (y, 0xfffffffe); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */