Re: [ARM Patch 1/3]PR53189: optimizations of 64bit logic operation with constant

2012-08-03 Thread Ramana Radhakrishnan
On 18 July 2012 11:12, Carrot Wei car...@google.com wrote:
 On Wed, Jul 18, 2012 at 5:39 PM, Ramana Radhakrishnan
 ramana.radhakrish...@linaro.org wrote:
 On 18 July 2012 09:20, Carrot Wei car...@google.com wrote:
 On Tue, Jul 17, 2012 at 9:47 PM, Ramana Radhakrishnan
 ramana.radhakrish...@linaro.org wrote:
 Carrot,

 Sorry about the delayed response.

 On 3 July 2012 12:28, Carrot Wei car...@google.com wrote:
 On Thu, Jun 28, 2012 at 12:14 AM, Ramana Radhakrishnan
 ramana.radhakrish...@linaro.org wrote:
 On 28 May 2012 11:08, Carrot Wei car...@google.com wrote:
 Hi

 This is the second part of the patches that deals with 64bit and. It 
 directly
 extends the patterns anddi3, anddi3_insn and anddi3_neon to handle 64bit
 constant operands.


 Comments about const_di_ok_for_op still apply from my review of your add 
 patch.

 However I don't see why and /ior / xor with constants that have either
 the low or high parts set can't be expanded directly into ands of
 subregs with moves of zero's or the original value especially if you
 aren't looking at doing 64 bit operations in neon .With Neon being
 used for 64 bit arithmetic it gets more interesting.

 Finally this should target PR target/53189.


 Hi Ramana

 Thanks for the review. Following is the updated patch according to
 your comments.

 You've missed answering this part of my review :)

 However I don't see why and /ior / xor with constants that have either
 the low or high parts set can't be expanded directly into ands of
 subregs with moves of zero's or the original value especially if you
 aren't looking at doing 64 bit operations in neon .With Neon being
 used for 64 bit arithmetic it gets more interesting.

 It has been handled by the const_ok_for_dimode_op and the output part
 of corresponding SI mode insn. Let's take the IOR case as an example.


 I noticed that - If I wasn't clear enough, the question was more
 towards generating a subreg move at expand time rather than a split
 and handling while outputting asm if you see what I mean.

 I see your point now. I don't know how much better if we handle it
 earlier. Even if I generates subreg move for non-neon code at expand
 time, the latter output handling is still necessary for neon insns.

Just a quick note to let you know that I had a look this week and I'd
rather have the splitters deal with the idioms properly rather than
change the SImode patterns to deal with mov's . In theory with the
proper idiom type splitting dead loads can disappear and a number of
your tests that do and with the upper 32 bits as 0 don't really need a
load from memory and so on. I'd rather have it split before reload in
that form if possible. The neon case is something I don't yet have an
answer to but I'm gonna take a look.



regards,
ramana


Re: [ARM Patch 1/3]PR53189: optimizations of 64bit logic operation with constant

2012-07-18 Thread Carrot Wei
On Tue, Jul 17, 2012 at 9:47 PM, Ramana Radhakrishnan
ramana.radhakrish...@linaro.org wrote:
 Carrot,

 Sorry about the delayed response.

 On 3 July 2012 12:28, Carrot Wei car...@google.com wrote:
 On Thu, Jun 28, 2012 at 12:14 AM, Ramana Radhakrishnan
 ramana.radhakrish...@linaro.org wrote:
 On 28 May 2012 11:08, Carrot Wei car...@google.com wrote:
 Hi

 This is the second part of the patches that deals with 64bit and. It 
 directly
 extends the patterns anddi3, anddi3_insn and anddi3_neon to handle 64bit
 constant operands.


 Comments about const_di_ok_for_op still apply from my review of your add 
 patch.

 However I don't see why and /ior / xor with constants that have either
 the low or high parts set can't be expanded directly into ands of
 subregs with moves of zero's or the original value especially if you
 aren't looking at doing 64 bit operations in neon .With Neon being
 used for 64 bit arithmetic it gets more interesting.

 Finally this should target PR target/53189.


 Hi Ramana

 Thanks for the review. Following is the updated patch according to
 your comments.

 You've missed answering this part of my review :)

 However I don't see why and /ior / xor with constants that have either
 the low or high parts set can't be expanded directly into ands of
 subregs with moves of zero's or the original value especially if you
 aren't looking at doing 64 bit operations in neon .With Neon being
 used for 64 bit arithmetic it gets more interesting.

It has been handled by the const_ok_for_dimode_op and the output part
of corresponding SI mode insn. Let's take the IOR case as an example.

In the const_ok_for_dimode_op patch

--- arm.c   (revision 189278)
+++ arm.c   (working copy)
@@ -2524,6 +2524,16 @@
 case PLUS:
   return arm_not_operand (hi, SImode)  arm_add_operand (lo, SImode);

+case IOR:
+  if ((const_ok_for_arm (hi_val) || (hi_val == 0x))
+  (const_ok_for_arm (lo_val) || (lo_val == 0x)))
+   return 1;
+  if (TARGET_THUMB2
+  (const_ok_for_arm (lo_val) || const_ok_for_arm (~lo_val))
+  (const_ok_for_arm (hi_val) || const_ok_for_arm (~hi_val)))
+   return 1;
+  return 0;
+
 default:
   return 0;
 }

The 0x is not valid arm mode immediate, but ior 0X
results in all 1's, so it is also allowed in an iordi3 insn. And the
patch in iorsi3_insn pattern explicitly check the all 0's and all 1's
cases, and output either a simple register mov instruction or
instruction mov all 1's to the destination.

@@ -2902,10 +2915,29 @@
(ior:SI (match_operand:SI 1 s_register_operand %r,r,r)
(match_operand:SI 2 reg_or_int_operand rI,K,?n)))]
   TARGET_32BIT
-  @
-   orr%?\\t%0, %1, %2
-   orn%?\\t%0, %1, #%B2
-   #
+  *
+  {
+if (CONST_INT_P (operands[2]))
+  {
+   HOST_WIDE_INT i = INTVAL (operands[2])  0x;
+   if (i == 0x)
+ return \mvn%?\\t%0, #0\;
+   if (i == 0)
+ {
+   if (!rtx_equal_p (operands[0], operands[1]))
+ return \mov%?\\t%0, %1\;
+   else
+ return \\;
+ }
+  }
+
+switch (which_alternative)
+  {
+  case 0: return \orr%?\\t%0, %1, %2\;
+  case 1: return \orn%?\\t%0, %1, #%B2\;
+  case 2: return \#\;
+  }
+  }
   TARGET_32BIT
 GET_CODE (operands[2]) == CONST_INT
 !(const_ok_for_arm (INTVAL (operands[2]))


 Is there any reason why we don't split such cases earlier into the
 constituent moves and the associated ands earlier than reload in the
 non-Neon case?

I referenced pattern arm_adddi3 which is split after reload_completed.
And the pattern arm_subdi3 is even not split. I guess they keep the
original constant longer may benefit some optimizations involving
constants. But it may also lose flexibility in other cases.

  In addition, it would be good to have some tests for Thumb2 that deal
 with the replicated constants for Thumb2 . Can you have a look at
 creating some tests similar to the thumb2*replicated*.c tests in
 gcc.target/arm but for 64 bit constants ?


The new test cases involving thumb2 replicated constants are added as following.

thanks
Carrot



2012-07-18  Wei Guozhi  car...@google.com

PR target/53189
* gcc.target/arm/pr53189-10.c: New testcase.
* gcc.target/arm/pr53189-11.c: New testcase.
* gcc.target/arm/pr53189-12.c: New testcase.



Index: pr53189-10.c
===
--- pr53189-10.c(revision 0)
+++ pr53189-10.c(revision 0)
@@ -0,0 +1,9 @@
+/* { dg-options -mthumb -O2 }  */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-final { scan-assembler-not mov } } */
+/* { dg-final { scan-assembler and.*#-16843010 } } */
+
+void t0p(long long * p)
+{
+  *p = 0x9fefefefe;
+}


Index: pr53189-11.c
===
--- pr53189-11.c(revision 0)
+++ pr53189-11.c   

Re: [ARM Patch 1/3]PR53189: optimizations of 64bit logic operation with constant

2012-07-18 Thread Ramana Radhakrishnan
On 18 July 2012 09:20, Carrot Wei car...@google.com wrote:
 On Tue, Jul 17, 2012 at 9:47 PM, Ramana Radhakrishnan
 ramana.radhakrish...@linaro.org wrote:
 Carrot,

 Sorry about the delayed response.

 On 3 July 2012 12:28, Carrot Wei car...@google.com wrote:
 On Thu, Jun 28, 2012 at 12:14 AM, Ramana Radhakrishnan
 ramana.radhakrish...@linaro.org wrote:
 On 28 May 2012 11:08, Carrot Wei car...@google.com wrote:
 Hi

 This is the second part of the patches that deals with 64bit and. It 
 directly
 extends the patterns anddi3, anddi3_insn and anddi3_neon to handle 64bit
 constant operands.


 Comments about const_di_ok_for_op still apply from my review of your add 
 patch.

 However I don't see why and /ior / xor with constants that have either
 the low or high parts set can't be expanded directly into ands of
 subregs with moves of zero's or the original value especially if you
 aren't looking at doing 64 bit operations in neon .With Neon being
 used for 64 bit arithmetic it gets more interesting.

 Finally this should target PR target/53189.


 Hi Ramana

 Thanks for the review. Following is the updated patch according to
 your comments.

 You've missed answering this part of my review :)

 However I don't see why and /ior / xor with constants that have either
 the low or high parts set can't be expanded directly into ands of
 subregs with moves of zero's or the original value especially if you
 aren't looking at doing 64 bit operations in neon .With Neon being
 used for 64 bit arithmetic it gets more interesting.

 It has been handled by the const_ok_for_dimode_op and the output part
 of corresponding SI mode insn. Let's take the IOR case as an example.


I noticed that - If I wasn't clear enough, the question was more
towards generating a subreg move at expand time rather than a split
and handling while outputting asm if you see what I mean.

regards,
Ramana


Re: [ARM Patch 1/3]PR53189: optimizations of 64bit logic operation with constant

2012-07-18 Thread Carrot Wei
On Wed, Jul 18, 2012 at 5:39 PM, Ramana Radhakrishnan
ramana.radhakrish...@linaro.org wrote:
 On 18 July 2012 09:20, Carrot Wei car...@google.com wrote:
 On Tue, Jul 17, 2012 at 9:47 PM, Ramana Radhakrishnan
 ramana.radhakrish...@linaro.org wrote:
 Carrot,

 Sorry about the delayed response.

 On 3 July 2012 12:28, Carrot Wei car...@google.com wrote:
 On Thu, Jun 28, 2012 at 12:14 AM, Ramana Radhakrishnan
 ramana.radhakrish...@linaro.org wrote:
 On 28 May 2012 11:08, Carrot Wei car...@google.com wrote:
 Hi

 This is the second part of the patches that deals with 64bit and. It 
 directly
 extends the patterns anddi3, anddi3_insn and anddi3_neon to handle 64bit
 constant operands.


 Comments about const_di_ok_for_op still apply from my review of your add 
 patch.

 However I don't see why and /ior / xor with constants that have either
 the low or high parts set can't be expanded directly into ands of
 subregs with moves of zero's or the original value especially if you
 aren't looking at doing 64 bit operations in neon .With Neon being
 used for 64 bit arithmetic it gets more interesting.

 Finally this should target PR target/53189.


 Hi Ramana

 Thanks for the review. Following is the updated patch according to
 your comments.

 You've missed answering this part of my review :)

 However I don't see why and /ior / xor with constants that have either
 the low or high parts set can't be expanded directly into ands of
 subregs with moves of zero's or the original value especially if you
 aren't looking at doing 64 bit operations in neon .With Neon being
 used for 64 bit arithmetic it gets more interesting.

 It has been handled by the const_ok_for_dimode_op and the output part
 of corresponding SI mode insn. Let's take the IOR case as an example.


 I noticed that - If I wasn't clear enough, the question was more
 towards generating a subreg move at expand time rather than a split
 and handling while outputting asm if you see what I mean.

I see your point now. I don't know how much better if we handle it
earlier. Even if I generates subreg move for non-neon code at expand
time, the latter output handling is still necessary for neon insns. Do
you think it deserves the extra expand handling?

thanks
Carrot


Re: [ARM Patch 1/3]PR53189: optimizations of 64bit logic operation with constant

2012-07-17 Thread Ramana Radhakrishnan
Carrot,

Sorry about the delayed response.

On 3 July 2012 12:28, Carrot Wei car...@google.com wrote:
 On Thu, Jun 28, 2012 at 12:14 AM, Ramana Radhakrishnan
 ramana.radhakrish...@linaro.org wrote:
 On 28 May 2012 11:08, Carrot Wei car...@google.com wrote:
 Hi

 This is the second part of the patches that deals with 64bit and. It 
 directly
 extends the patterns anddi3, anddi3_insn and anddi3_neon to handle 64bit
 constant operands.


 Comments about const_di_ok_for_op still apply from my review of your add 
 patch.

 However I don't see why and /ior / xor with constants that have either
 the low or high parts set can't be expanded directly into ands of
 subregs with moves of zero's or the original value especially if you
 aren't looking at doing 64 bit operations in neon .With Neon being
 used for 64 bit arithmetic it gets more interesting.

 Finally this should target PR target/53189.


 Hi Ramana

 Thanks for the review. Following is the updated patch according to
 your comments.

You've missed answering this part of my review :)

 However I don't see why and /ior / xor with constants that have either
 the low or high parts set can't be expanded directly into ands of
 subregs with moves of zero's or the original value especially if you
 aren't looking at doing 64 bit operations in neon .With Neon being
 used for 64 bit arithmetic it gets more interesting.

Is there any reason why we don't split such cases earlier into the
constituent moves and the associated ands earlier than reload in the
non-Neon case?

 In addition, it would be good to have some tests for Thumb2 that deal
with the replicated constants for Thumb2 . Can you have a look at
creating some tests similar to the thumb2*replicated*.c tests in
gcc.target/arm but for 64 bit constants ?


regards,
Ramana


Re: [ARM Patch 1/3]PR53189: optimizations of 64bit logic operation with constant

2012-07-03 Thread Carrot Wei
On Thu, Jun 28, 2012 at 12:14 AM, Ramana Radhakrishnan
ramana.radhakrish...@linaro.org wrote:
 On 28 May 2012 11:08, Carrot Wei car...@google.com wrote:
 Hi

 This is the second part of the patches that deals with 64bit and. It directly
 extends the patterns anddi3, anddi3_insn and anddi3_neon to handle 64bit
 constant operands.


 Comments about const_di_ok_for_op still apply from my review of your add 
 patch.

 However I don't see why and /ior / xor with constants that have either
 the low or high parts set can't be expanded directly into ands of
 subregs with moves of zero's or the original value especially if you
 aren't looking at doing 64 bit operations in neon .With Neon being
 used for 64 bit arithmetic it gets more interesting.

 Finally this should target PR target/53189.


Hi Ramana

Thanks for the review. Following is the updated patch according to
your comments.

Tested on arm qemu with all arm/thumb neon/non-neon mode combination
without regression.

thanks
Carrot


2012-07-03  Wei Guozhi  car...@google.com

PR target/53189
* gcc.target/arm/pr53189-1.c: New testcase.
* gcc.target/arm/pr53189-2.c: New testcase.
* gcc.target/arm/pr53189-3.c: New testcase.


2012-07-03  Wei Guozhi  car...@google.com

PR target/53189
* config/arm/arm.c (const_ok_for_dimode_op): Handle AND op.
* config/arm/constraints.md (De): New constraint.
* config/arm/predicates.md (arm_anddi_operand): New predicate.
(arm_immediate_anddi_operand): Likewise.
(anddi_operand): Likewise.
* config/arm/arm.md (arm_andsi3_insn): Optimization for special
constants.
(anddi3): Extend it to handle 64bit constants.
(anddi3_insn): Likewise.
* config/arm/neon.md (anddi3_neon): Likewise.



Index: testsuite/gcc.target/arm/pr53189-2.c
===
--- testsuite/gcc.target/arm/pr53189-2.c(revision 0)
+++ testsuite/gcc.target/arm/pr53189-2.c(revision 0)
@@ -0,0 +1,9 @@
+/* { dg-options -O2 }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-times mov 1 } } */
+/* { dg-final { scan-assembler-times and 1 } } */
+
+void t0p(long long * p)
+{
+  *p = 0x1;
+}
Index: testsuite/gcc.target/arm/pr53189-3.c
===
--- testsuite/gcc.target/arm/pr53189-3.c(revision 0)
+++ testsuite/gcc.target/arm/pr53189-3.c(revision 0)
@@ -0,0 +1,9 @@
+/* { dg-options -O2 }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not mov } } */
+/* { dg-final { scan-assembler-times and 1 } } */
+
+void t0p(long long * p)
+{
+  *p = 0x1;
+}
Index: testsuite/gcc.target/arm/pr53189-1.c
===
--- testsuite/gcc.target/arm/pr53189-1.c(revision 0)
+++ testsuite/gcc.target/arm/pr53189-1.c(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-options -O2 }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not mov } } */
+
+void t0p(long long * p)
+{
+  *p = 0x10002;
+}
Index: config/arm/arm.c
===
--- config/arm/arm.c(revision 189107)
+++ config/arm/arm.c(working copy)
@@ -2524,6 +2524,10 @@
 case PLUS:
   return arm_not_operand (hi, SImode)  arm_add_operand (lo, SImode);

+case AND:
+  return ((const_ok_for_arm (lo_val) || const_ok_for_arm (~lo_val))
+  (const_ok_for_arm (hi_val) || const_ok_for_arm (~hi_val)));
+
 default:
   return 0;
 }
Index: config/arm/neon.md
===
--- config/arm/neon.md  (revision 189107)
+++ config/arm/neon.md  (working copy)
@@ -776,9 +776,9 @@
 )

 (define_insn anddi3_neon
-  [(set (match_operand:DI 0 s_register_operand =w,w,?r,?r,?w,?w)
-(and:DI (match_operand:DI 1 s_register_operand %w,0,0,r,w,0)
-   (match_operand:DI 2 neon_inv_logic_op2 w,DL,r,r,w,DL)))]
+  [(set (match_operand:DI 0 s_register_operand =w,w,?r,?r,?w,?w,?r,?r)
+(and:DI (match_operand:DI 1 s_register_operand %w,0,0,r,w,0,0,r)
+   (match_operand:DI 2 anddi_operand w,DL,r,r,w,DL,De,De)))]
   TARGET_NEON
 {
   switch (which_alternative)
@@ -790,12 +790,14 @@
 DImode, 1, VALID_NEON_QREG_MODE (DImode));
 case 2: return #;
 case 3: return #;
+case 6: return #;
+case 7: return #;
 default: gcc_unreachable ();
 }
 }
-  [(set_attr neon_type neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1)
-   (set_attr length *,*,8,8,*,*)
-   (set_attr arch nota8,nota8,*,*,onlya8,onlya8)]
+  [(set_attr neon_type neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1,*,*)
+   (set_attr length *,*,8,8,*,*,8,8)
+   (set_attr arch nota8,nota8,*,*,onlya8,onlya8,*,*)]
 )

 (define_insn ornmode3_neon
Index: config/arm/constraints.md