Re: [PATCH][ARM] Avoid partial overlaps in DImode shifts

2016-10-25 Thread Richard Earnshaw (lists)
On 24/10/16 15:28, Wilco Dijkstra wrote:
> With -fpu=neon DI mode shifts are expanded after reload.  DI mode registers 
> can 
> either fully or partially overlap.  However the shift expansion code can only 
> deal
> with the full overlap case, and generates incorrect code for partial overlaps.
> The fix is to add new variants that support either full overlap or no overlap.
> 
> Bootstrap & regress on arm-linux-gnueabihf OK.
> 
> This will need backporting to all active branches.
> 
> ChangeLog:
> 2016-10-20  Wilco Dijkstra  
> 
> gcc/
>   PR target/78041
>   * config/arm/neon.md (ashldi3_neon): Add "r 0 i" and " r i" variants.
>   Remove partial overlap check for shift by 1.
>   (ashldi3_neon): Likewise.
> testsuite/
>   * gcc.target/arm/pr78041.c: New test.
> 

OK.

R.

> --
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 
> 05323334ffd81aeff33ee407b96c788d123b3fe3..59316de004107913c1db0951ced4d584999fc099
>  100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -1143,12 +1143,12 @@
>  )
>  
>  (define_insn_and_split "ashldi3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand" "= w, w,?,?r, 
> ?w,w")
> - (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 
> 0w,w")
> -(match_operand:SI 2 "general_operand""rUm, i,  r, 
> i,rUm,i")))
> -   (clobber (match_scratch:SI 3  "= X, 
> X,?, X,  X,X"))
> -   (clobber (match_scratch:SI 4  "= X, 
> X,?, X,  X,X"))
> -   (clobber (match_scratch:DI 5  "=, X,  
> X, X, ,X"))
> +  [(set (match_operand:DI 0 "s_register_operand" "= w, w,?,?r,?, 
> ?w,w")
> + (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r, 
> 0w,w")
> +(match_operand:SI 2 "general_operand""rUm, i,  r, i,  
> i,rUm,i")))
> +   (clobber (match_scratch:SI 3  "= X, 
> X,?, X,  X,  X,X"))
> +   (clobber (match_scratch:SI 4  "= X, 
> X,?, X,  X,  X,X"))
> +   (clobber (match_scratch:DI 5  "=, X,  
> X, X,  X, ,X"))
> (clobber (reg:CC_C CC_REGNUM))]
>"TARGET_NEON"
>"#"
> @@ -1180,9 +1180,11 @@
>}
>  else
>{
> - if (operands[2] == CONST1_RTX (SImode)
> - && (!reg_overlap_mentioned_p (operands[0], operands[1])
> - || REGNO (operands[0]) == REGNO (operands[1])))
> + /* The shift expanders support either full overlap or no overlap.  */
> + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
> + || REGNO (operands[0]) == REGNO (operands[1]));
> +
> + if (operands[2] == CONST1_RTX (SImode))
> /* This clobbers CC.  */
> emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
>   else
> @@ -1191,8 +1193,8 @@
>}
>  DONE;
>}"
> -  [(set_attr "arch" 
> "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
> -   (set_attr "opt" "*,*,speed,speed,*,*")
> +  [(set_attr "arch" 
> "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
> +   (set_attr "opt" "*,*,speed,speed,speed,*,*")
> (set_attr "type" "multiple")]
>  )
>  
> @@ -1241,12 +1243,12 @@
>  ;; ashrdi3_neon
>  ;; lshrdi3_neon
>  (define_insn_and_split "di3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand"  "= w, 
> w,?,?r,?w,?w")
> - (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, 
> w")
> - (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i, r, 
> i")))
> -   (clobber (match_scratch:SI 3   "=2r, X, 
> , X,2r, X"))
> -   (clobber (match_scratch:SI 4   "= X, X, 
> , X, X, X"))
> -   (clobber (match_scratch:DI 5   "=, X,  
> X, X,, X"))
> +  [(set (match_operand:DI 0 "s_register_operand"  "= w, 
> w,?,?r,?,?w,?w")
> + (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  
> r,0w, w")
> + (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i,  
> i, r, i")))
> +   (clobber (match_scratch:SI 3   "=2r, X, 
> , X,  X,2r, X"))
> +   (clobber (match_scratch:SI 4   "= X, X, 
> , X,  X, X, X"))
> +   (clobber (match_scratch:DI 5   "=, X,  
> X, X, X,, X"))
> (clobber (reg:CC CC_REGNUM))]
>"TARGET_NEON"
>"#"
> @@ -1282,9 +1284,11 @@
>}
>  else
>{
> - if (operands[2] == CONST1_RTX (SImode)
> - && (!reg_overlap_mentioned_p (operands[0], operands[1])
> - || REGNO (operands[0]) == REGNO (operands[1])))
> + /* The shift expanders support either full overlap or no overlap.  */
> + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
> 

[PATCH][ARM] Avoid partial overlaps in DImode shifts

2016-10-24 Thread Wilco Dijkstra
With -fpu=neon DI mode shifts are expanded after reload.  DI mode registers can 
either fully or partially overlap.  However the shift expansion code can only 
deal
with the full overlap case, and generates incorrect code for partial overlaps.
The fix is to add new variants that support either full overlap or no overlap.

Bootstrap & regress on arm-linux-gnueabihf OK.

This will need backporting to all active branches.

ChangeLog:
2016-10-20  Wilco Dijkstra  

gcc/
PR target/78041
* config/arm/neon.md (ashldi3_neon): Add "r 0 i" and " r i" variants.
Remove partial overlap check for shift by 1.
(ashldi3_neon): Likewise.
testsuite/
* gcc.target/arm/pr78041.c: New test.

--
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 
05323334ffd81aeff33ee407b96c788d123b3fe3..59316de004107913c1db0951ced4d584999fc099
 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1143,12 +1143,12 @@
 )
 
 (define_insn_and_split "ashldi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand"   "= w, w,?,?r, 
?w,w")
-   (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 
0w,w")
-  (match_operand:SI 2 "general_operand""rUm, i,  r, 
i,rUm,i")))
-   (clobber (match_scratch:SI 3"= X, 
X,?, X,  X,X"))
-   (clobber (match_scratch:SI 4"= X, 
X,?, X,  X,X"))
-   (clobber (match_scratch:DI 5"=, X,  
X, X, ,X"))
+  [(set (match_operand:DI 0 "s_register_operand"   "= w, w,?,?r,?, 
?w,w")
+   (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r, 
0w,w")
+  (match_operand:SI 2 "general_operand""rUm, i,  r, i,  
i,rUm,i")))
+   (clobber (match_scratch:SI 3"= X, 
X,?, X,  X,  X,X"))
+   (clobber (match_scratch:SI 4"= X, 
X,?, X,  X,  X,X"))
+   (clobber (match_scratch:DI 5"=, X,  
X, X,  X, ,X"))
(clobber (reg:CC_C CC_REGNUM))]
   "TARGET_NEON"
   "#"
@@ -1180,9 +1180,11 @@
   }
 else
   {
-   if (operands[2] == CONST1_RTX (SImode)
-   && (!reg_overlap_mentioned_p (operands[0], operands[1])
-   || REGNO (operands[0]) == REGNO (operands[1])))
+   /* The shift expanders support either full overlap or no overlap.  */
+   gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
+   || REGNO (operands[0]) == REGNO (operands[1]));
+
+   if (operands[2] == CONST1_RTX (SImode))
  /* This clobbers CC.  */
  emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
else
@@ -1191,8 +1193,8 @@
   }
 DONE;
   }"
-  [(set_attr "arch" 
"neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
-   (set_attr "opt" "*,*,speed,speed,*,*")
+  [(set_attr "arch" 
"neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
+   (set_attr "opt" "*,*,speed,speed,speed,*,*")
(set_attr "type" "multiple")]
 )
 
@@ -1241,12 +1243,12 @@
 ;; ashrdi3_neon
 ;; lshrdi3_neon
 (define_insn_and_split "di3_neon"
-  [(set (match_operand:DI 0 "s_register_operand""= w, 
w,?,?r,?w,?w")
-   (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, 
w")
-   (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i, r, 
i")))
-   (clobber (match_scratch:SI 3 "=2r, X, 
, X,2r, X"))
-   (clobber (match_scratch:SI 4 "= X, X, 
, X, X, X"))
-   (clobber (match_scratch:DI 5 "=, X,  
X, X,, X"))
+  [(set (match_operand:DI 0 "s_register_operand""= w, 
w,?,?r,?,?w,?w")
+   (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  
r,0w, w")
+   (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i,  
i, r, i")))
+   (clobber (match_scratch:SI 3 "=2r, X, 
, X,  X,2r, X"))
+   (clobber (match_scratch:SI 4 "= X, X, 
, X,  X, X, X"))
+   (clobber (match_scratch:DI 5 "=, X,  
X, X, X,, X"))
(clobber (reg:CC CC_REGNUM))]
   "TARGET_NEON"
   "#"
@@ -1282,9 +1284,11 @@
   }
 else
   {
-   if (operands[2] == CONST1_RTX (SImode)
-   && (!reg_overlap_mentioned_p (operands[0], operands[1])
-   || REGNO (operands[0]) == REGNO (operands[1])))
+   /* The shift expanders support either full overlap or no overlap.  */
+   gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
+   || REGNO (operands[0]) == REGNO (operands[1]));
+
+   if (operands[2] == CONST1_RTX (SImode))
  /* This clobbers CC.  */
  emit_insn (gen_arm_di3_1bit (operands[0], operands[1]));