Richard Kenner wrote: > > Maybe the best solution would be to remove the SUBREG case from the generic > > apply_distributive_law subroutine, and instead add a special check for the > > distributed subreg case right at the above place in simplify_set; i.e. to > > perform the inverse distribution only if it is already guaranteed that we > > will also be able to move the subreg to the LHS ... > > That could indeed work.
I tried to implement that suggestion, but interestingly enough I cannot really test it since I was unable to find any single case where that SUBREG case in apply_distributive_law actually causes any difference whatsoever in generated code. As test case I used the whole of libstdc++.so on the following set of platforms: - i686-pc-linux - s390x-ibm-linux - powerpc-ibm-linux - arm-linux-gnueabi and built the compiler and libstdc++.so for each of: - current mainline - current mainline plus the first patch below - current mainline plus both patches below All three resulting object files were identical for every platform. Do you have any further suggestion of how to find a testcase (some particular source code and/or architecture)? Given the current set of results, since I do not have any way to verify whether my simplify_set changes would actually trigger correctly, I'd rather propose to just remove the SUBREG case in apply_distributive_law (i.e. only apply the first patch below). Thoughts? Thanks, Ulrich Patch A: Remove SUBREG case in apply_distributive_law Index: gcc/combine.c =================================================================== --- gcc/combine.c (revision 183240) +++ gcc/combine.c (working copy) @@ -9238,37 +9269,6 @@ /* This is also a multiply, so it distributes over everything. */ break; - case SUBREG: - /* Non-paradoxical SUBREGs distributes over all operations, - provided the inner modes and byte offsets are the same, this - is an extraction of a low-order part, we don't convert an fp - operation to int or vice versa, this is not a vector mode, - and we would not be converting a single-word operation into a - multi-word operation. The latter test is not required, but - it prevents generating unneeded multi-word operations. Some - of the previous tests are redundant given the latter test, - but are retained because they are required for correctness. - - We produce the result slightly differently in this case. */ - - if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs)) - || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs) - || ! subreg_lowpart_p (lhs) - || (GET_MODE_CLASS (GET_MODE (lhs)) - != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs)))) - || paradoxical_subreg_p (lhs) - || VECTOR_MODE_P (GET_MODE (lhs)) - || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD - /* Result might need to be truncated. Don't change mode if - explicit truncation is needed. */ - || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x), - GET_MODE (SUBREG_REG (lhs)))) - return x; - - tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)), - SUBREG_REG (lhs), SUBREG_REG (rhs)); - return gen_lowpart (GET_MODE (x), tem); - default: return x; } Patch B: Re-implement SUBREG case specifically in simplify_set Index: gcc/combine.c =================================================================== --- gcc/combine.c (revision 183240) +++ gcc/combine.c (working copy) @@ -6299,6 +6299,7 @@ rtx dest = SET_DEST (x); enum machine_mode mode = GET_MODE (src) != VOIDmode ? GET_MODE (src) : GET_MODE (dest); + rtx src_subreg; rtx other_insn; rtx *cc_use; @@ -6496,6 +6497,10 @@ and X being a REG or (subreg (reg)), we may be able to convert this to (set (subreg:m2 x) (op)). + Similarly, if we have (set x (op:m1 (subreg:m2 ...) (subreg:m2 ...))), + we may be able to first distribute the subreg over op, and then apply + the above transformation. + We can always do this if M1 is narrower than M2 because that means that we only care about the low bits of the result. @@ -6504,30 +6509,56 @@ be undefined. On machine where it is defined, this transformation is safe as long as M1 and M2 have the same number of words. */ + src_subreg = NULL_RTX; if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src) - && !OBJECT_P (SUBREG_REG (src)) + && !OBJECT_P (SUBREG_REG (src))) + src_subreg = SUBREG_REG (src); + else if (GET_CODE (src) == IOR || GET_CODE (src) == XOR + || GET_CODE (src) == AND + || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS) + { + rtx lhs = XEXP (x, 0); + rtx rhs = XEXP (x, 1); + + /* We can distribute non-paradoxical lowpart SUBREGs if the + inner modes agree. */ + if (GET_CODE (lhs) == SUBREG && GET_CODE (rhs) == SUBREG + && GET_MODE (SUBREG_REG (lhs)) == GET_MODE (SUBREG_REG (rhs)) + && subreg_lowpart_p (lhs) && !paradoxical_subreg_p (lhs) + && subreg_lowpart_p (rhs) && !paradoxical_subreg_p (rhs) + /* This is safe in general only for integral modes. */ + && INTEGRAL_MODE_P (GET_MODE (lhs)) + && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (lhs))) + /* Result might need to be truncated. Don't change mode if + explicit truncation is needed. */ + && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (src), + GET_MODE (SUBREG_REG (lhs)))) + src_subreg = simplify_gen_binary (GET_CODE (src), + GET_MODE (SUBREG_REG (lhs)), + SUBREG_REG (lhs), SUBREG_REG (rhs)); + } + + if (src_subreg && (((GET_MODE_SIZE (GET_MODE (src)) + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)) + == ((GET_MODE_SIZE (GET_MODE (src_subreg))) + + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) #ifndef WORD_REGISTER_OPERATIONS && (GET_MODE_SIZE (GET_MODE (src)) - < GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))) + < GET_MODE_SIZE (GET_MODE (src_subreg))) #endif #ifdef CANNOT_CHANGE_MODE_CLASS && ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER && REG_CANNOT_CHANGE_MODE_P (REGNO (dest), - GET_MODE (SUBREG_REG (src)), + GET_MODE (src_subreg), GET_MODE (src))) #endif && (REG_P (dest) || (GET_CODE (dest) == SUBREG && REG_P (SUBREG_REG (dest))))) { - SUBST (SET_DEST (x), - gen_lowpart (GET_MODE (SUBREG_REG (src)), - dest)); - SUBST (SET_SRC (x), SUBREG_REG (src)); + SUBST (SET_DEST (x), gen_lowpart (GET_MODE (src_subreg), dest)); + SUBST (SET_SRC (x), src_subreg); src = SET_SRC (x), dest = SET_DEST (x); } -- Dr. Ulrich Weigand GNU Toolchain for Linux on System z and Cell BE ulrich.weig...@de.ibm.com Richard Kenner wrote: > > Maybe the best solution would be to remove the SUBREG case from the generic > > apply_distributive_law subroutine, and instead add a special check for the > > distributed subreg case right at the above place in simplify_set; i.e. to > > perform the inverse distribution only if it is already guaranteed that we > > will also be able to move the subreg to the LHS ... > > That could indeed work. I tried to implement that suggestion, but interestingly enough I cannot really test it since I was unable to find any single case where that SUBREG case in apply_distributive_law actually causes any difference whatsoever in generated code. As test case I used the whole of libstdc++.so on the following set of platforms: - i686-pc-linux - s390x-ibm-linux - powerpc-ibm-linux - arm-linux-gnueabi and built the compiler and libstdc++.so for each of: - current mainline - current mainline plus the first patch below - current mainline plus both patches below All three resulting object files were identical for every platform. Do you have any further suggestion of how to find a testcase (some particular source code and/or architecture)? Given the current set of results, since I do not have any way to verify whether my simplify_set changes would actually trigger correctly, I'd rather propose to just remove the SUBREG case in apply_distributive_law (i.e. only apply the first patch below). Thoughts? Thanks, Ulrich Patch A: Remove SUBREG case in apply_distributive_law Index: gcc/combine.c =================================================================== --- gcc/combine.c (revision 183240) +++ gcc/combine.c (working copy) @@ -9238,37 +9269,6 @@ /* This is also a multiply, so it distributes over everything. */ break; - case SUBREG: - /* Non-paradoxical SUBREGs distributes over all operations, - provided the inner modes and byte offsets are the same, this - is an extraction of a low-order part, we don't convert an fp - operation to int or vice versa, this is not a vector mode, - and we would not be converting a single-word operation into a - multi-word operation. The latter test is not required, but - it prevents generating unneeded multi-word operations. Some - of the previous tests are redundant given the latter test, - but are retained because they are required for correctness. - - We produce the result slightly differently in this case. */ - - if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs)) - || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs) - || ! subreg_lowpart_p (lhs) - || (GET_MODE_CLASS (GET_MODE (lhs)) - != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs)))) - || paradoxical_subreg_p (lhs) - || VECTOR_MODE_P (GET_MODE (lhs)) - || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD - /* Result might need to be truncated. Don't change mode if - explicit truncation is needed. */ - || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x), - GET_MODE (SUBREG_REG (lhs)))) - return x; - - tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)), - SUBREG_REG (lhs), SUBREG_REG (rhs)); - return gen_lowpart (GET_MODE (x), tem); - default: return x; } Patch B: Re-implement SUBREG case specifically in simplify_set Index: gcc/combine.c =================================================================== --- gcc/combine.c (revision 183240) +++ gcc/combine.c (working copy) @@ -6299,6 +6299,7 @@ rtx dest = SET_DEST (x); enum machine_mode mode = GET_MODE (src) != VOIDmode ? GET_MODE (src) : GET_MODE (dest); + rtx src_subreg; rtx other_insn; rtx *cc_use; @@ -6496,6 +6497,10 @@ and X being a REG or (subreg (reg)), we may be able to convert this to (set (subreg:m2 x) (op)). + Similarly, if we have (set x (op:m1 (subreg:m2 ...) (subreg:m2 ...))), + we may be able to first distribute the subreg over op, and then apply + the above transformation. + We can always do this if M1 is narrower than M2 because that means that we only care about the low bits of the result. @@ -6504,30 +6509,56 @@ be undefined. On machine where it is defined, this transformation is safe as long as M1 and M2 have the same number of words. */ + src_subreg = NULL_RTX; if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src) - && !OBJECT_P (SUBREG_REG (src)) + && !OBJECT_P (SUBREG_REG (src))) + src_subreg = SUBREG_REG (src); + else if (GET_CODE (src) == IOR || GET_CODE (src) == XOR + || GET_CODE (src) == AND + || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS) + { + rtx lhs = XEXP (x, 0); + rtx rhs = XEXP (x, 1); + + /* We can distribute non-paradoxical lowpart SUBREGs if the + inner modes agree. */ + if (GET_CODE (lhs) == SUBREG && GET_CODE (rhs) == SUBREG + && GET_MODE (SUBREG_REG (lhs)) == GET_MODE (SUBREG_REG (rhs)) + && subreg_lowpart_p (lhs) && !paradoxical_subreg_p (lhs) + && subreg_lowpart_p (rhs) && !paradoxical_subreg_p (rhs) + /* This is safe in general only for integral modes. */ + && INTEGRAL_MODE_P (GET_MODE (lhs)) + && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (lhs))) + /* Result might need to be truncated. Don't change mode if + explicit truncation is needed. */ + && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (src), + GET_MODE (SUBREG_REG (lhs)))) + src_subreg = simplify_gen_binary (GET_CODE (src), + GET_MODE (SUBREG_REG (lhs)), + SUBREG_REG (lhs), SUBREG_REG (rhs)); + } + + if (src_subreg && (((GET_MODE_SIZE (GET_MODE (src)) + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)) + == ((GET_MODE_SIZE (GET_MODE (src_subreg))) + + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) #ifndef WORD_REGISTER_OPERATIONS && (GET_MODE_SIZE (GET_MODE (src)) - < GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))) + < GET_MODE_SIZE (GET_MODE (src_subreg))) #endif #ifdef CANNOT_CHANGE_MODE_CLASS && ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER && REG_CANNOT_CHANGE_MODE_P (REGNO (dest), - GET_MODE (SUBREG_REG (src)), + GET_MODE (src_subreg), GET_MODE (src))) #endif && (REG_P (dest) || (GET_CODE (dest) == SUBREG && REG_P (SUBREG_REG (dest))))) { - SUBST (SET_DEST (x), - gen_lowpart (GET_MODE (SUBREG_REG (src)), - dest)); - SUBST (SET_SRC (x), SUBREG_REG (src)); + SUBST (SET_DEST (x), gen_lowpart (GET_MODE (src_subreg), dest)); + SUBST (SET_SRC (x), src_subreg); src = SET_SRC (x), dest = SET_DEST (x); } -- Dr. Ulrich Weigand GNU Toolchain for Linux on System z and Cell BE ulrich.weig...@de.ibm.com Richard Kenner wrote: > > Maybe the best solution would be to remove the SUBREG case from the generic > > apply_distributive_law subroutine, and instead add a special check for the > > distributed subreg case right at the above place in simplify_set; i.e. to > > perform the inverse distribution only if it is already guaranteed that we > > will also be able to move the subreg to the LHS ... > > That could indeed work. I tried to implement that suggestion, but interestingly enough I cannot really test it since I was unable to find any single case where that SUBREG case in apply_distributive_law actually causes any difference whatsoever in generated code. As test case I used the whole of libstdc++.so on the following set of platforms: - i686-pc-linux - s390x-ibm-linux - powerpc-ibm-linux - arm-linux-gnueabi and built the compiler and libstdc++.so for each of: - current mainline - current mainline plus the first patch below - current mainline plus both patches below All three resulting object files were identical for every platform. Do you have any further suggestion of how to find a testcase (some particular source code and/or architecture)? Given the current set of results, since I do not have any way to verify whether my simplify_set changes would actually trigger correctly, I'd rather propose to just remove the SUBREG case in apply_distributive_law (i.e. only apply the first patch below). Thoughts? Thanks, Ulrich Patch A: Remove SUBREG case in apply_distributive_law Index: gcc/combine.c =================================================================== --- gcc/combine.c (revision 183240) +++ gcc/combine.c (working copy) @@ -9238,37 +9269,6 @@ /* This is also a multiply, so it distributes over everything. */ break; - case SUBREG: - /* Non-paradoxical SUBREGs distributes over all operations, - provided the inner modes and byte offsets are the same, this - is an extraction of a low-order part, we don't convert an fp - operation to int or vice versa, this is not a vector mode, - and we would not be converting a single-word operation into a - multi-word operation. The latter test is not required, but - it prevents generating unneeded multi-word operations. Some - of the previous tests are redundant given the latter test, - but are retained because they are required for correctness. - - We produce the result slightly differently in this case. */ - - if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs)) - || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs) - || ! subreg_lowpart_p (lhs) - || (GET_MODE_CLASS (GET_MODE (lhs)) - != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs)))) - || paradoxical_subreg_p (lhs) - || VECTOR_MODE_P (GET_MODE (lhs)) - || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD - /* Result might need to be truncated. Don't change mode if - explicit truncation is needed. */ - || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x), - GET_MODE (SUBREG_REG (lhs)))) - return x; - - tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)), - SUBREG_REG (lhs), SUBREG_REG (rhs)); - return gen_lowpart (GET_MODE (x), tem); - default: return x; } Patch B: Re-implement SUBREG case specifically in simplify_set Index: gcc/combine.c =================================================================== --- gcc/combine.c (revision 183240) +++ gcc/combine.c (working copy) @@ -6299,6 +6299,7 @@ rtx dest = SET_DEST (x); enum machine_mode mode = GET_MODE (src) != VOIDmode ? GET_MODE (src) : GET_MODE (dest); + rtx src_subreg; rtx other_insn; rtx *cc_use; @@ -6496,6 +6497,10 @@ and X being a REG or (subreg (reg)), we may be able to convert this to (set (subreg:m2 x) (op)). + Similarly, if we have (set x (op:m1 (subreg:m2 ...) (subreg:m2 ...))), + we may be able to first distribute the subreg over op, and then apply + the above transformation. + We can always do this if M1 is narrower than M2 because that means that we only care about the low bits of the result. @@ -6504,30 +6509,56 @@ be undefined. On machine where it is defined, this transformation is safe as long as M1 and M2 have the same number of words. */ + src_subreg = NULL_RTX; if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src) - && !OBJECT_P (SUBREG_REG (src)) + && !OBJECT_P (SUBREG_REG (src))) + src_subreg = SUBREG_REG (src); + else if (GET_CODE (src) == IOR || GET_CODE (src) == XOR + || GET_CODE (src) == AND + || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS) + { + rtx lhs = XEXP (x, 0); + rtx rhs = XEXP (x, 1); + + /* We can distribute non-paradoxical lowpart SUBREGs if the + inner modes agree. */ + if (GET_CODE (lhs) == SUBREG && GET_CODE (rhs) == SUBREG + && GET_MODE (SUBREG_REG (lhs)) == GET_MODE (SUBREG_REG (rhs)) + && subreg_lowpart_p (lhs) && !paradoxical_subreg_p (lhs) + && subreg_lowpart_p (rhs) && !paradoxical_subreg_p (rhs) + /* This is safe in general only for integral modes. */ + && INTEGRAL_MODE_P (GET_MODE (lhs)) + && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (lhs))) + /* Result might need to be truncated. Don't change mode if + explicit truncation is needed. */ + && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (src), + GET_MODE (SUBREG_REG (lhs)))) + src_subreg = simplify_gen_binary (GET_CODE (src), + GET_MODE (SUBREG_REG (lhs)), + SUBREG_REG (lhs), SUBREG_REG (rhs)); + } + + if (src_subreg && (((GET_MODE_SIZE (GET_MODE (src)) + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)) + == ((GET_MODE_SIZE (GET_MODE (src_subreg))) + + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) #ifndef WORD_REGISTER_OPERATIONS && (GET_MODE_SIZE (GET_MODE (src)) - < GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))) + < GET_MODE_SIZE (GET_MODE (src_subreg))) #endif #ifdef CANNOT_CHANGE_MODE_CLASS && ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER && REG_CANNOT_CHANGE_MODE_P (REGNO (dest), - GET_MODE (SUBREG_REG (src)), + GET_MODE (src_subreg), GET_MODE (src))) #endif && (REG_P (dest) || (GET_CODE (dest) == SUBREG && REG_P (SUBREG_REG (dest))))) { - SUBST (SET_DEST (x), - gen_lowpart (GET_MODE (SUBREG_REG (src)), - dest)); - SUBST (SET_SRC (x), SUBREG_REG (src)); + SUBST (SET_DEST (x), gen_lowpart (GET_MODE (src_subreg), dest)); + SUBST (SET_SRC (x), src_subreg); src = SET_SRC (x), dest = SET_DEST (x); } -- Dr. Ulrich Weigand GNU Toolchain for Linux on System z and Cell BE ulrich.weig...@de.ibm.com Richard Kenner wrote: > > Maybe the best solution would be to remove the SUBREG case from the generic > > apply_distributive_law subroutine, and instead add a special check for the > > distributed subreg case right at the above place in simplify_set; i.e. to > > perform the inverse distribution only if it is already guaranteed that we > > will also be able to move the subreg to the LHS ... > > That could indeed work. I tried to implement that suggestion, but interestingly enough I cannot really test it since I was unable to find any single case where that SUBREG case in apply_distributive_law actually causes any difference whatsoever in generated code. As test case I used the whole of libstdc++.so on the following set of platforms: - i686-pc-linux - s390x-ibm-linux - powerpc-ibm-linux - arm-linux-gnueabi and built the compiler and libstdc++.so for each of: - current mainline - current mainline plus the first patch below - current mainline plus both patches below All three resulting object files were identical for every platform. Do you have any further suggestion of how to find a testcase (some particular source code and/or architecture)? Given the current set of results, since I do not have any way to verify whether my simplify_set changes would actually trigger correctly, I'd rather propose to just remove the SUBREG case in apply_distributive_law (i.e. only apply the first patch below). Thoughts? Thanks, Ulrich Patch A: Remove SUBREG case in apply_distributive_law Index: gcc/combine.c =================================================================== --- gcc/combine.c (revision 183240) +++ gcc/combine.c (working copy) @@ -9238,37 +9269,6 @@ /* This is also a multiply, so it distributes over everything. */ break; - case SUBREG: - /* Non-paradoxical SUBREGs distributes over all operations, - provided the inner modes and byte offsets are the same, this - is an extraction of a low-order part, we don't convert an fp - operation to int or vice versa, this is not a vector mode, - and we would not be converting a single-word operation into a - multi-word operation. The latter test is not required, but - it prevents generating unneeded multi-word operations. Some - of the previous tests are redundant given the latter test, - but are retained because they are required for correctness. - - We produce the result slightly differently in this case. */ - - if (GET_MODE (SUBREG_REG (lhs)) != GET_MODE (SUBREG_REG (rhs)) - || SUBREG_BYTE (lhs) != SUBREG_BYTE (rhs) - || ! subreg_lowpart_p (lhs) - || (GET_MODE_CLASS (GET_MODE (lhs)) - != GET_MODE_CLASS (GET_MODE (SUBREG_REG (lhs)))) - || paradoxical_subreg_p (lhs) - || VECTOR_MODE_P (GET_MODE (lhs)) - || GET_MODE_SIZE (GET_MODE (SUBREG_REG (lhs))) > UNITS_PER_WORD - /* Result might need to be truncated. Don't change mode if - explicit truncation is needed. */ - || !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (x), - GET_MODE (SUBREG_REG (lhs)))) - return x; - - tem = simplify_gen_binary (code, GET_MODE (SUBREG_REG (lhs)), - SUBREG_REG (lhs), SUBREG_REG (rhs)); - return gen_lowpart (GET_MODE (x), tem); - default: return x; } Patch B: Re-implement SUBREG case specifically in simplify_set Index: gcc/combine.c =================================================================== --- gcc/combine.c (revision 183240) +++ gcc/combine.c (working copy) @@ -6299,6 +6299,7 @@ rtx dest = SET_DEST (x); enum machine_mode mode = GET_MODE (src) != VOIDmode ? GET_MODE (src) : GET_MODE (dest); + rtx src_subreg; rtx other_insn; rtx *cc_use; @@ -6496,6 +6497,10 @@ and X being a REG or (subreg (reg)), we may be able to convert this to (set (subreg:m2 x) (op)). + Similarly, if we have (set x (op:m1 (subreg:m2 ...) (subreg:m2 ...))), + we may be able to first distribute the subreg over op, and then apply + the above transformation. + We can always do this if M1 is narrower than M2 because that means that we only care about the low bits of the result. @@ -6504,30 +6509,56 @@ be undefined. On machine where it is defined, this transformation is safe as long as M1 and M2 have the same number of words. */ + src_subreg = NULL_RTX; if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src) - && !OBJECT_P (SUBREG_REG (src)) + && !OBJECT_P (SUBREG_REG (src))) + src_subreg = SUBREG_REG (src); + else if (GET_CODE (src) == IOR || GET_CODE (src) == XOR + || GET_CODE (src) == AND + || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS) + { + rtx lhs = XEXP (x, 0); + rtx rhs = XEXP (x, 1); + + /* We can distribute non-paradoxical lowpart SUBREGs if the + inner modes agree. */ + if (GET_CODE (lhs) == SUBREG && GET_CODE (rhs) == SUBREG + && GET_MODE (SUBREG_REG (lhs)) == GET_MODE (SUBREG_REG (rhs)) + && subreg_lowpart_p (lhs) && !paradoxical_subreg_p (lhs) + && subreg_lowpart_p (rhs) && !paradoxical_subreg_p (rhs) + /* This is safe in general only for integral modes. */ + && INTEGRAL_MODE_P (GET_MODE (lhs)) + && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (lhs))) + /* Result might need to be truncated. Don't change mode if + explicit truncation is needed. */ + && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (src), + GET_MODE (SUBREG_REG (lhs)))) + src_subreg = simplify_gen_binary (GET_CODE (src), + GET_MODE (SUBREG_REG (lhs)), + SUBREG_REG (lhs), SUBREG_REG (rhs)); + } + + if (src_subreg && (((GET_MODE_SIZE (GET_MODE (src)) + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) - == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))) - + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)) + == ((GET_MODE_SIZE (GET_MODE (src_subreg))) + + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) #ifndef WORD_REGISTER_OPERATIONS && (GET_MODE_SIZE (GET_MODE (src)) - < GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))) + < GET_MODE_SIZE (GET_MODE (src_subreg))) #endif #ifdef CANNOT_CHANGE_MODE_CLASS && ! (REG_P (dest) && REGNO (dest) < FIRST_PSEUDO_REGISTER && REG_CANNOT_CHANGE_MODE_P (REGNO (dest), - GET_MODE (SUBREG_REG (src)), + GET_MODE (src_subreg), GET_MODE (src))) #endif && (REG_P (dest) || (GET_CODE (dest) == SUBREG && REG_P (SUBREG_REG (dest))))) { - SUBST (SET_DEST (x), - gen_lowpart (GET_MODE (SUBREG_REG (src)), - dest)); - SUBST (SET_SRC (x), SUBREG_REG (src)); + SUBST (SET_DEST (x), gen_lowpart (GET_MODE (src_subreg), dest)); + SUBST (SET_SRC (x), src_subreg); src = SET_SRC (x), dest = SET_DEST (x); } -- Dr. Ulrich Weigand GNU Toolchain for Linux on System z and Cell BE ulrich.weig...@de.ibm.com