Richard Kenner wrote:
> > Maybe the best solution would be to remove the SUBREG case from the generic
> > apply_distributive_law subroutine, and instead add a special check for the
> > distributed subreg case right at the above place in simplify_set; i.e. to
> > perform the inverse distribution only if it is already guaranteed that we
> > will also be able to move the subreg to the LHS ...
> 
> That could indeed work.

I tried to implement that suggestion, but interestingly enough I cannot
really test it since I was unable to find any single case where that
SUBREG case in apply_distributive_law actually causes any difference
whatsoever in generated code.

As test case I used the whole of libstdc++.so on the following set of
platforms:
  - i686-pc-linux
  - s390x-ibm-linux
  - powerpc-ibm-linux
  - arm-linux-gnueabi
and built the compiler and libstdc++.so for each of:
  - current mainline
  - current mainline plus the first patch below
  - current mainline plus both patches below

All three resulting object files were identical for every platform.

Do you have any further suggestion of how to find a testcase (some
particular source code and/or architecture)?

Given the current set of results, since I do not have any way to verify
whether my simplify_set changes would actually trigger correctly, I'd
rather propose to just remove the SUBREG case in apply_distributive_law
(i.e. only apply the first patch below).

Thoughts?

Thanks,
Ulrich

Patch A: Remove SUBREG case in apply_distributive_law

Index: gcc/combine.c
===================================================================
--- gcc/combine.c       (revision 183240)
+++ gcc/combine.c       (working copy)
@@ -9238,37 +9269,6 @@
       /* This is also a multiply, so it distributes over everything.  */
       break;
 
-    case SUBREG:
-      /* Non-paradoxical SUBREGs distributes over all operations,
-        provided the inner modes and byte offsets are the same, this
-        is an extraction of a low-order part, we don't convert an fp
-        operation to int or vice versa, this is not a vector mode,
-        and we would not be converting a single-word operation into a
-        multi-word operation.  The latter test is not required, but
-        it prevents generating unneeded multi-word operations.  Some
-        of the previous tests are redundant given the latter test,
-        but are retained because they are required for correctness.
-
-        We produce the result slightly differently in this case.  */
-
-      if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs))
-         || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs)
-         || ! subreg_lowpart_p (lhs)
-         || (GET_MODE_CLASS (GET_MODE (lhs))
-             != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs))))
-         || paradoxical_subreg_p (lhs)
-         || VECTOR_MODE_P (GET_MODE (lhs))
-         || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD
-         /* Result might need to be truncated.  Don't change mode if
-            explicit truncation is needed.  */
-         || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x),
-                                            GET_MODE (SUBREG_REG (lhs))))
-       return x;
-
-      tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)),
-                                SUBREG_REG (lhs), SUBREG_REG (rhs));
-      return gen_lowpart (GET_MODE (x), tem);
-
     default:
       return x;
     }


Patch B: Re-implement SUBREG case specifically in simplify_set


Index: gcc/combine.c
===================================================================
--- gcc/combine.c       (revision 183240)
+++ gcc/combine.c       (working copy)
@@ -6299,6 +6299,7 @@
   rtx dest = SET_DEST (x);
   enum machine_mode mode
     = GET_MODE (src) != VOIDmode ? GET_MODE (src) : GET_MODE (dest);
+  rtx src_subreg;
   rtx other_insn;
   rtx *cc_use;
 
@@ -6496,6 +6497,10 @@
      and X being a REG or (subreg (reg)), we may be able to convert this to
      (set (subreg:m2 x) (op)).
 
+     Similarly, if we have (set x (op:m1 (subreg:m2 ...) (subreg:m2 ...))),
+     we may be able to first distribute the subreg over op, and then apply
+     the above transformation.
+
      We can always do this if M1 is narrower than M2 because that means that
      we only care about the low bits of the result.
 
@@ -6504,30 +6509,56 @@
      be undefined.  On machine where it is defined, this transformation is safe
      as long as M1 and M2 have the same number of words.  */
 
+  src_subreg = NULL_RTX;
   if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src)
-      && !OBJECT_P (SUBREG_REG (src))
+      && !OBJECT_P (SUBREG_REG (src)))
+    src_subreg = SUBREG_REG (src);
+  else if (GET_CODE (src) == IOR || GET_CODE (src) == XOR
+          || GET_CODE (src) == AND
+          || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+    {
+      rtx lhs = XEXP (x, 0);
+      rtx rhs = XEXP (x, 1);
+
+      /* We can distribute non-paradoxical lowpart SUBREGs if the
+        inner modes agree.  */
+      if (GET_CODE (lhs) == SUBREG && GET_CODE (rhs) == SUBREG
+         && GET_MODE (SUBREG_REG (lhs)) == GET_MODE (SUBREG_REG (rhs))
+         && subreg_lowpart_p (lhs) && !paradoxical_subreg_p (lhs)
+         && subreg_lowpart_p (rhs) && !paradoxical_subreg_p (rhs)
+         /* This is safe in general only for integral modes.  */
+         && INTEGRAL_MODE_P (GET_MODE (lhs))
+         && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (lhs)))
+         /* Result might need to be truncated.  Don't change mode if
+            explicit truncation is needed.  */
+         && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (src),
+                                           GET_MODE (SUBREG_REG (lhs))))
+       src_subreg = simplify_gen_binary (GET_CODE (src),
+                                         GET_MODE (SUBREG_REG (lhs)),
+                                         SUBREG_REG (lhs), SUBREG_REG (rhs));
+    }
+
+  if (src_subreg
       && (((GET_MODE_SIZE (GET_MODE (src)) + (UNITS_PER_WORD - 1))
           / UNITS_PER_WORD)
-         == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))
-              + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))
+         == ((GET_MODE_SIZE (GET_MODE (src_subreg)))
+              + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)
 #ifndef WORD_REGISTER_OPERATIONS
       && (GET_MODE_SIZE (GET_MODE (src))
-       < GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))
+         < GET_MODE_SIZE (GET_MODE (src_subreg)))
 #endif
 #ifdef CANNOT_CHANGE_MODE_CLASS
       && ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER
            && REG_CANNOT_CHANGE_MODE_P (REGNO (dest),
-                                        GET_MODE (SUBREG_REG (src)),
+                                        GET_MODE (src_subreg),
                                         GET_MODE (src)))
 #endif
       && (REG_P (dest)
          || (GET_CODE (dest) == SUBREG
              && REG_P (SUBREG_REG (dest)))))
     {
-      SUBST (SET_DEST (x),
-            gen_lowpart (GET_MODE (SUBREG_REG (src)),
-                                     dest));
-      SUBST (SET_SRC (x), SUBREG_REG (src));
+      SUBST (SET_DEST (x), gen_lowpart (GET_MODE (src_subreg), dest));
+      SUBST (SET_SRC (x), src_subreg);
 
       src = SET_SRC (x), dest = SET_DEST (x);
     }


-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  ulrich.weig...@de.ibm.com


Richard Kenner wrote:
> > Maybe the best solution would be to remove the SUBREG case from the generic
> > apply_distributive_law subroutine, and instead add a special check for the
> > distributed subreg case right at the above place in simplify_set; i.e. to
> > perform the inverse distribution only if it is already guaranteed that we
> > will also be able to move the subreg to the LHS ...
> 
> That could indeed work.

I tried to implement that suggestion, but interestingly enough I cannot
really test it since I was unable to find any single case where that
SUBREG case in apply_distributive_law actually causes any difference
whatsoever in generated code.

As test case I used the whole of libstdc++.so on the following set of
platforms:
  - i686-pc-linux
  - s390x-ibm-linux
  - powerpc-ibm-linux
  - arm-linux-gnueabi
and built the compiler and libstdc++.so for each of:
  - current mainline
  - current mainline plus the first patch below
  - current mainline plus both patches below

All three resulting object files were identical for every platform.

Do you have any further suggestion of how to find a testcase (some
particular source code and/or architecture)?

Given the current set of results, since I do not have any way to verify
whether my simplify_set changes would actually trigger correctly, I'd
rather propose to just remove the SUBREG case in apply_distributive_law
(i.e. only apply the first patch below).

Thoughts?

Thanks,
Ulrich

Patch A: Remove SUBREG case in apply_distributive_law

Index: gcc/combine.c
===================================================================
--- gcc/combine.c       (revision 183240)
+++ gcc/combine.c       (working copy)
@@ -9238,37 +9269,6 @@
       /* This is also a multiply, so it distributes over everything.  */
       break;
 
-    case SUBREG:
-      /* Non-paradoxical SUBREGs distributes over all operations,
-        provided the inner modes and byte offsets are the same, this
-        is an extraction of a low-order part, we don't convert an fp
-        operation to int or vice versa, this is not a vector mode,
-        and we would not be converting a single-word operation into a
-        multi-word operation.  The latter test is not required, but
-        it prevents generating unneeded multi-word operations.  Some
-        of the previous tests are redundant given the latter test,
-        but are retained because they are required for correctness.
-
-        We produce the result slightly differently in this case.  */
-
-      if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs))
-         || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs)
-         || ! subreg_lowpart_p (lhs)
-         || (GET_MODE_CLASS (GET_MODE (lhs))
-             != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs))))
-         || paradoxical_subreg_p (lhs)
-         || VECTOR_MODE_P (GET_MODE (lhs))
-         || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD
-         /* Result might need to be truncated.  Don't change mode if
-            explicit truncation is needed.  */
-         || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x),
-                                            GET_MODE (SUBREG_REG (lhs))))
-       return x;
-
-      tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)),
-                                SUBREG_REG (lhs), SUBREG_REG (rhs));
-      return gen_lowpart (GET_MODE (x), tem);
-
     default:
       return x;
     }


Patch B: Re-implement SUBREG case specifically in simplify_set


Index: gcc/combine.c
===================================================================
--- gcc/combine.c       (revision 183240)
+++ gcc/combine.c       (working copy)
@@ -6299,6 +6299,7 @@
   rtx dest = SET_DEST (x);
   enum machine_mode mode
     = GET_MODE (src) != VOIDmode ? GET_MODE (src) : GET_MODE (dest);
+  rtx src_subreg;
   rtx other_insn;
   rtx *cc_use;
 
@@ -6496,6 +6497,10 @@
      and X being a REG or (subreg (reg)), we may be able to convert this to
      (set (subreg:m2 x) (op)).
 
+     Similarly, if we have (set x (op:m1 (subreg:m2 ...) (subreg:m2 ...))),
+     we may be able to first distribute the subreg over op, and then apply
+     the above transformation.
+
      We can always do this if M1 is narrower than M2 because that means that
      we only care about the low bits of the result.
 
@@ -6504,30 +6509,56 @@
      be undefined.  On machine where it is defined, this transformation is safe
      as long as M1 and M2 have the same number of words.  */
 
+  src_subreg = NULL_RTX;
   if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src)
-      && !OBJECT_P (SUBREG_REG (src))
+      && !OBJECT_P (SUBREG_REG (src)))
+    src_subreg = SUBREG_REG (src);
+  else if (GET_CODE (src) == IOR || GET_CODE (src) == XOR
+          || GET_CODE (src) == AND
+          || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+    {
+      rtx lhs = XEXP (x, 0);
+      rtx rhs = XEXP (x, 1);
+
+      /* We can distribute non-paradoxical lowpart SUBREGs if the
+        inner modes agree.  */
+      if (GET_CODE (lhs) == SUBREG && GET_CODE (rhs) == SUBREG
+         && GET_MODE (SUBREG_REG (lhs)) == GET_MODE (SUBREG_REG (rhs))
+         && subreg_lowpart_p (lhs) && !paradoxical_subreg_p (lhs)
+         && subreg_lowpart_p (rhs) && !paradoxical_subreg_p (rhs)
+         /* This is safe in general only for integral modes.  */
+         && INTEGRAL_MODE_P (GET_MODE (lhs))
+         && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (lhs)))
+         /* Result might need to be truncated.  Don't change mode if
+            explicit truncation is needed.  */
+         && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (src),
+                                           GET_MODE (SUBREG_REG (lhs))))
+       src_subreg = simplify_gen_binary (GET_CODE (src),
+                                         GET_MODE (SUBREG_REG (lhs)),
+                                         SUBREG_REG (lhs), SUBREG_REG (rhs));
+    }
+
+  if (src_subreg
       && (((GET_MODE_SIZE (GET_MODE (src)) + (UNITS_PER_WORD - 1))
           / UNITS_PER_WORD)
-         == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))
-              + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))
+         == ((GET_MODE_SIZE (GET_MODE (src_subreg)))
+              + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)
 #ifndef WORD_REGISTER_OPERATIONS
       && (GET_MODE_SIZE (GET_MODE (src))
-       < GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))
+         < GET_MODE_SIZE (GET_MODE (src_subreg)))
 #endif
 #ifdef CANNOT_CHANGE_MODE_CLASS
       && ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER
            && REG_CANNOT_CHANGE_MODE_P (REGNO (dest),
-                                        GET_MODE (SUBREG_REG (src)),
+                                        GET_MODE (src_subreg),
                                         GET_MODE (src)))
 #endif
       && (REG_P (dest)
          || (GET_CODE (dest) == SUBREG
              && REG_P (SUBREG_REG (dest)))))
     {
-      SUBST (SET_DEST (x),
-            gen_lowpart (GET_MODE (SUBREG_REG (src)),
-                                     dest));
-      SUBST (SET_SRC (x), SUBREG_REG (src));
+      SUBST (SET_DEST (x), gen_lowpart (GET_MODE (src_subreg), dest));
+      SUBST (SET_SRC (x), src_subreg);
 
       src = SET_SRC (x), dest = SET_DEST (x);
     }


-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  ulrich.weig...@de.ibm.com


Richard Kenner wrote:
> > Maybe the best solution would be to remove the SUBREG case from the generic
> > apply_distributive_law subroutine, and instead add a special check for the
> > distributed subreg case right at the above place in simplify_set; i.e. to
> > perform the inverse distribution only if it is already guaranteed that we
> > will also be able to move the subreg to the LHS ...
> 
> That could indeed work.

I tried to implement that suggestion, but interestingly enough I cannot
really test it since I was unable to find any single case where that
SUBREG case in apply_distributive_law actually causes any difference
whatsoever in generated code.

As test case I used the whole of libstdc++.so on the following set of
platforms:
  - i686-pc-linux
  - s390x-ibm-linux
  - powerpc-ibm-linux
  - arm-linux-gnueabi
and built the compiler and libstdc++.so for each of:
  - current mainline
  - current mainline plus the first patch below
  - current mainline plus both patches below

All three resulting object files were identical for every platform.

Do you have any further suggestion of how to find a testcase (some
particular source code and/or architecture)?

Given the current set of results, since I do not have any way to verify
whether my simplify_set changes would actually trigger correctly, I'd
rather propose to just remove the SUBREG case in apply_distributive_law
(i.e. only apply the first patch below).

Thoughts?

Thanks,
Ulrich

Patch A: Remove SUBREG case in apply_distributive_law

Index: gcc/combine.c
===================================================================
--- gcc/combine.c       (revision 183240)
+++ gcc/combine.c       (working copy)
@@ -9238,37 +9269,6 @@
       /* This is also a multiply, so it distributes over everything.  */
       break;
 
-    case SUBREG:
-      /* Non-paradoxical SUBREGs distributes over all operations,
-        provided the inner modes and byte offsets are the same, this
-        is an extraction of a low-order part, we don't convert an fp
-        operation to int or vice versa, this is not a vector mode,
-        and we would not be converting a single-word operation into a
-        multi-word operation.  The latter test is not required, but
-        it prevents generating unneeded multi-word operations.  Some
-        of the previous tests are redundant given the latter test,
-        but are retained because they are required for correctness.
-
-        We produce the result slightly differently in this case.  */
-
-      if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs))
-         || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs)
-         || ! subreg_lowpart_p (lhs)
-         || (GET_MODE_CLASS (GET_MODE (lhs))
-             != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs))))
-         || paradoxical_subreg_p (lhs)
-         || VECTOR_MODE_P (GET_MODE (lhs))
-         || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD
-         /* Result might need to be truncated.  Don't change mode if
-            explicit truncation is needed.  */
-         || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x),
-                                            GET_MODE (SUBREG_REG (lhs))))
-       return x;
-
-      tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)),
-                                SUBREG_REG (lhs), SUBREG_REG (rhs));
-      return gen_lowpart (GET_MODE (x), tem);
-
     default:
       return x;
     }


Patch B: Re-implement SUBREG case specifically in simplify_set


Index: gcc/combine.c
===================================================================
--- gcc/combine.c       (revision 183240)
+++ gcc/combine.c       (working copy)
@@ -6299,6 +6299,7 @@
   rtx dest = SET_DEST (x);
   enum machine_mode mode
     = GET_MODE (src) != VOIDmode ? GET_MODE (src) : GET_MODE (dest);
+  rtx src_subreg;
   rtx other_insn;
   rtx *cc_use;
 
@@ -6496,6 +6497,10 @@
      and X being a REG or (subreg (reg)), we may be able to convert this to
      (set (subreg:m2 x) (op)).
 
+     Similarly, if we have (set x (op:m1 (subreg:m2 ...) (subreg:m2 ...))),
+     we may be able to first distribute the subreg over op, and then apply
+     the above transformation.
+
      We can always do this if M1 is narrower than M2 because that means that
      we only care about the low bits of the result.
 
@@ -6504,30 +6509,56 @@
      be undefined.  On machine where it is defined, this transformation is safe
      as long as M1 and M2 have the same number of words.  */
 
+  src_subreg = NULL_RTX;
   if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src)
-      && !OBJECT_P (SUBREG_REG (src))
+      && !OBJECT_P (SUBREG_REG (src)))
+    src_subreg = SUBREG_REG (src);
+  else if (GET_CODE (src) == IOR || GET_CODE (src) == XOR
+          || GET_CODE (src) == AND
+          || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+    {
+      rtx lhs = XEXP (x, 0);
+      rtx rhs = XEXP (x, 1);
+
+      /* We can distribute non-paradoxical lowpart SUBREGs if the
+        inner modes agree.  */
+      if (GET_CODE (lhs) == SUBREG && GET_CODE (rhs) == SUBREG
+         && GET_MODE (SUBREG_REG (lhs)) == GET_MODE (SUBREG_REG (rhs))
+         && subreg_lowpart_p (lhs) && !paradoxical_subreg_p (lhs)
+         && subreg_lowpart_p (rhs) && !paradoxical_subreg_p (rhs)
+         /* This is safe in general only for integral modes.  */
+         && INTEGRAL_MODE_P (GET_MODE (lhs))
+         && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (lhs)))
+         /* Result might need to be truncated.  Don't change mode if
+            explicit truncation is needed.  */
+         && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (src),
+                                           GET_MODE (SUBREG_REG (lhs))))
+       src_subreg = simplify_gen_binary (GET_CODE (src),
+                                         GET_MODE (SUBREG_REG (lhs)),
+                                         SUBREG_REG (lhs), SUBREG_REG (rhs));
+    }
+
+  if (src_subreg
       && (((GET_MODE_SIZE (GET_MODE (src)) + (UNITS_PER_WORD - 1))
           / UNITS_PER_WORD)
-         == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))
-              + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))
+         == ((GET_MODE_SIZE (GET_MODE (src_subreg)))
+              + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)
 #ifndef WORD_REGISTER_OPERATIONS
       && (GET_MODE_SIZE (GET_MODE (src))
-       < GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))
+         < GET_MODE_SIZE (GET_MODE (src_subreg)))
 #endif
 #ifdef CANNOT_CHANGE_MODE_CLASS
       && ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER
            && REG_CANNOT_CHANGE_MODE_P (REGNO (dest),
-                                        GET_MODE (SUBREG_REG (src)),
+                                        GET_MODE (src_subreg),
                                         GET_MODE (src)))
 #endif
       && (REG_P (dest)
          || (GET_CODE (dest) == SUBREG
              && REG_P (SUBREG_REG (dest)))))
     {
-      SUBST (SET_DEST (x),
-            gen_lowpart (GET_MODE (SUBREG_REG (src)),
-                                     dest));
-      SUBST (SET_SRC (x), SUBREG_REG (src));
+      SUBST (SET_DEST (x), gen_lowpart (GET_MODE (src_subreg), dest));
+      SUBST (SET_SRC (x), src_subreg);
 
       src = SET_SRC (x), dest = SET_DEST (x);
     }


-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  ulrich.weig...@de.ibm.com


Richard Kenner wrote:
> > Maybe the best solution would be to remove the SUBREG case from the generic
> > apply_distributive_law subroutine, and instead add a special check for the
> > distributed subreg case right at the above place in simplify_set; i.e. to
> > perform the inverse distribution only if it is already guaranteed that we
> > will also be able to move the subreg to the LHS ...
> 
> That could indeed work.

I tried to implement that suggestion, but interestingly enough I cannot
really test it since I was unable to find any single case where that
SUBREG case in apply_distributive_law actually causes any difference
whatsoever in generated code.

As test case I used the whole of libstdc++.so on the following set of
platforms:
  - i686-pc-linux
  - s390x-ibm-linux
  - powerpc-ibm-linux
  - arm-linux-gnueabi
and built the compiler and libstdc++.so for each of:
  - current mainline
  - current mainline plus the first patch below
  - current mainline plus both patches below

All three resulting object files were identical for every platform.

Do you have any further suggestion of how to find a testcase (some
particular source code and/or architecture)?

Given the current set of results, since I do not have any way to verify
whether my simplify_set changes would actually trigger correctly, I'd
rather propose to just remove the SUBREG case in apply_distributive_law
(i.e. only apply the first patch below).

Thoughts?

Thanks,
Ulrich

Patch A: Remove SUBREG case in apply_distributive_law

Index: gcc/combine.c
===================================================================
--- gcc/combine.c       (revision 183240)
+++ gcc/combine.c       (working copy)
@@ -9238,37 +9269,6 @@
       /* This is also a multiply, so it distributes over everything.  */
       break;
 
-    case SUBREG:
-      /* Non-paradoxical SUBREGs distributes over all operations,
-        provided the inner modes and byte offsets are the same, this
-        is an extraction of a low-order part, we don't convert an fp
-        operation to int or vice versa, this is not a vector mode,
-        and we would not be converting a single-word operation into a
-        multi-word operation.  The latter test is not required, but
-        it prevents generating unneeded multi-word operations.  Some
-        of the previous tests are redundant given the latter test,
-        but are retained because they are required for correctness.
-
-        We produce the result slightly differently in this case.  */
-
-      if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs))
-         || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs)
-         || ! subreg_lowpart_p (lhs)
-         || (GET_MODE_CLASS (GET_MODE (lhs))
-             != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs))))
-         || paradoxical_subreg_p (lhs)
-         || VECTOR_MODE_P (GET_MODE (lhs))
-         || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD
-         /* Result might need to be truncated.  Don't change mode if
-            explicit truncation is needed.  */
-         || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x),
-                                            GET_MODE (SUBREG_REG (lhs))))
-       return x;
-
-      tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)),
-                                SUBREG_REG (lhs), SUBREG_REG (rhs));
-      return gen_lowpart (GET_MODE (x), tem);
-
     default:
       return x;
     }


Patch B: Re-implement SUBREG case specifically in simplify_set


Index: gcc/combine.c
===================================================================
--- gcc/combine.c       (revision 183240)
+++ gcc/combine.c       (working copy)
@@ -6299,6 +6299,7 @@
   rtx dest = SET_DEST (x);
   enum machine_mode mode
     = GET_MODE (src) != VOIDmode ? GET_MODE (src) : GET_MODE (dest);
+  rtx src_subreg;
   rtx other_insn;
   rtx *cc_use;
 
@@ -6496,6 +6497,10 @@
      and X being a REG or (subreg (reg)), we may be able to convert this to
      (set (subreg:m2 x) (op)).
 
+     Similarly, if we have (set x (op:m1 (subreg:m2 ...) (subreg:m2 ...))),
+     we may be able to first distribute the subreg over op, and then apply
+     the above transformation.
+
      We can always do this if M1 is narrower than M2 because that means that
      we only care about the low bits of the result.
 
@@ -6504,30 +6509,56 @@
      be undefined.  On machine where it is defined, this transformation is safe
      as long as M1 and M2 have the same number of words.  */
 
+  src_subreg = NULL_RTX;
   if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src)
-      && !OBJECT_P (SUBREG_REG (src))
+      && !OBJECT_P (SUBREG_REG (src)))
+    src_subreg = SUBREG_REG (src);
+  else if (GET_CODE (src) == IOR || GET_CODE (src) == XOR
+          || GET_CODE (src) == AND
+          || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+    {
+      rtx lhs = XEXP (x, 0);
+      rtx rhs = XEXP (x, 1);
+
+      /* We can distribute non-paradoxical lowpart SUBREGs if the
+        inner modes agree.  */
+      if (GET_CODE (lhs) == SUBREG && GET_CODE (rhs) == SUBREG
+         && GET_MODE (SUBREG_REG (lhs)) == GET_MODE (SUBREG_REG (rhs))
+         && subreg_lowpart_p (lhs) && !paradoxical_subreg_p (lhs)
+         && subreg_lowpart_p (rhs) && !paradoxical_subreg_p (rhs)
+         /* This is safe in general only for integral modes.  */
+         && INTEGRAL_MODE_P (GET_MODE (lhs))
+         && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (lhs)))
+         /* Result might need to be truncated.  Don't change mode if
+            explicit truncation is needed.  */
+         && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (src),
+                                           GET_MODE (SUBREG_REG (lhs))))
+       src_subreg = simplify_gen_binary (GET_CODE (src),
+                                         GET_MODE (SUBREG_REG (lhs)),
+                                         SUBREG_REG (lhs), SUBREG_REG (rhs));
+    }
+
+  if (src_subreg
       && (((GET_MODE_SIZE (GET_MODE (src)) + (UNITS_PER_WORD - 1))
           / UNITS_PER_WORD)
-         == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))
-              + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))
+         == ((GET_MODE_SIZE (GET_MODE (src_subreg)))
+              + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)
 #ifndef WORD_REGISTER_OPERATIONS
       && (GET_MODE_SIZE (GET_MODE (src))
-       < GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))
+         < GET_MODE_SIZE (GET_MODE (src_subreg)))
 #endif
 #ifdef CANNOT_CHANGE_MODE_CLASS
       && ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER
            && REG_CANNOT_CHANGE_MODE_P (REGNO (dest),
-                                        GET_MODE (SUBREG_REG (src)),
+                                        GET_MODE (src_subreg),
                                         GET_MODE (src)))
 #endif
       && (REG_P (dest)
          || (GET_CODE (dest) == SUBREG
              && REG_P (SUBREG_REG (dest)))))
     {
-      SUBST (SET_DEST (x),
-            gen_lowpart (GET_MODE (SUBREG_REG (src)),
-                                     dest));
-      SUBST (SET_SRC (x), SUBREG_REG (src));
+      SUBST (SET_DEST (x), gen_lowpart (GET_MODE (src_subreg), dest));
+      SUBST (SET_SRC (x), src_subreg);
 
       src = SET_SRC (x), dest = SET_DEST (x);
     }


-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  ulrich.weig...@de.ibm.com

Reply via email to