On 24/10/16 15:28, Wilco Dijkstra wrote:
> With -fpu=neon DI mode shifts are expanded after reload. DI mode registers
> can
> either fully or partially overlap. However the shift expansion code can only
> deal
> with the full overlap case, and generates incorrect code for partial overlaps.
> The fix is to add new variants that support either full overlap or no overlap.
>
> Bootstrap & regress on arm-linux-gnueabihf OK.
>
> This will need backporting to all active branches.
>
> ChangeLog:
> 2016-10-20 Wilco Dijkstra
>
> gcc/
> PR target/78041
> * config/arm/neon.md (ashldi3_neon): Add "r 0 i" and " r i" variants.
> Remove partial overlap check for shift by 1.
> (ashldi3_neon): Likewise.
> testsuite/
> * gcc.target/arm/pr78041.c: New test.
>
OK.
R.
> --
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index
> 05323334ffd81aeff33ee407b96c788d123b3fe3..59316de004107913c1db0951ced4d584999fc099
> 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -1143,12 +1143,12 @@
> )
>
> (define_insn_and_split "ashldi3_neon"
> - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?,?r,
> ?w,w")
> - (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,
> 0w,w")
> -(match_operand:SI 2 "general_operand""rUm, i, r,
> i,rUm,i")))
> - (clobber (match_scratch:SI 3 "= X,
> X,?, X, X,X"))
> - (clobber (match_scratch:SI 4 "= X,
> X,?, X, X,X"))
> - (clobber (match_scratch:DI 5 "=, X,
> X, X, ,X"))
> + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?,?r,?,
> ?w,w")
> + (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,
> 0w,w")
> +(match_operand:SI 2 "general_operand""rUm, i, r, i,
> i,rUm,i")))
> + (clobber (match_scratch:SI 3 "= X,
> X,?, X, X, X,X"))
> + (clobber (match_scratch:SI 4 "= X,
> X,?, X, X, X,X"))
> + (clobber (match_scratch:DI 5 "=, X,
> X, X, X, ,X"))
> (clobber (reg:CC_C CC_REGNUM))]
>"TARGET_NEON"
>"#"
> @@ -1180,9 +1180,11 @@
>}
> else
>{
> - if (operands[2] == CONST1_RTX (SImode)
> - && (!reg_overlap_mentioned_p (operands[0], operands[1])
> - || REGNO (operands[0]) == REGNO (operands[1])))
> + /* The shift expanders support either full overlap or no overlap. */
> + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
> + || REGNO (operands[0]) == REGNO (operands[1]));
> +
> + if (operands[2] == CONST1_RTX (SImode))
> /* This clobbers CC. */
> emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
> else
> @@ -1191,8 +1193,8 @@
>}
> DONE;
>}"
> - [(set_attr "arch"
> "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
> - (set_attr "opt" "*,*,speed,speed,*,*")
> + [(set_attr "arch"
> "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
> + (set_attr "opt" "*,*,speed,speed,speed,*,*")
> (set_attr "type" "multiple")]
> )
>
> @@ -1241,12 +1243,12 @@
> ;; ashrdi3_neon
> ;; lshrdi3_neon
> (define_insn_and_split "di3_neon"
> - [(set (match_operand:DI 0 "s_register_operand" "= w,
> w,?,?r,?w,?w")
> - (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w,
> w")
> - (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r,
> i")))
> - (clobber (match_scratch:SI 3 "=2r, X,
> , X,2r, X"))
> - (clobber (match_scratch:SI 4 "= X, X,
> , X, X, X"))
> - (clobber (match_scratch:DI 5 "=, X,
> X, X,, X"))
> + [(set (match_operand:DI 0 "s_register_operand" "= w,
> w,?,?r,?,?w,?w")
> + (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,
> r,0w, w")
> + (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i,
> i, r, i")))
> + (clobber (match_scratch:SI 3 "=2r, X,
> , X, X,2r, X"))
> + (clobber (match_scratch:SI 4 "= X, X,
> , X, X, X, X"))
> + (clobber (match_scratch:DI 5 "=, X,
> X, X, X,, X"))
> (clobber (reg:CC CC_REGNUM))]
>"TARGET_NEON"
>"#"
> @@ -1282,9 +1284,11 @@
>}
> else
>{
> - if (operands[2] == CONST1_RTX (SImode)
> - && (!reg_overlap_mentioned_p (operands[0], operands[1])
> - || REGNO (operands[0]) == REGNO (operands[1])))
> + /* The shift expanders support either full overlap or no overlap. */
> + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
>