The PR is about missed simplifications for __builtin_swap. IIUC Andrew has
patches for them at the Tree level, but I think having basic simplifications
at the RTL level for BSWAP is also worthwhile, hence the attached patch.
Tested on x86_64-suse-linux. Comments?
2013-05-23 Eric Botcazou <ebotca...@adacore.com>
PR opt/55177
* simplify-rtx.c (simplify_unary_operation_1) <NOT>: Deal with BSWAP.
(simplify_byte_swapping_operation): New.
(simplify_binary_operation_1): Call it for AND, IOR and XOR.
(simplify_relational_operation_1): Deal with BSWAP.
2013-05-23 Eric Botcazou <ebotca...@adacore.com>
* gcc.dg/builtin-bswap-6.c: New test.
* gcc.dg/builtin-bswap-7.c: Likewise.
* gcc.dg/builtin-bswap-8.c: Likewise.
* gcc.dg/builtin-bswap-9.c: Likewise.
--
Eric Botcazou
Index: simplify-rtx.c
===================================================================
--- simplify-rtx.c (revision 199091)
+++ simplify-rtx.c (working copy)
@@ -858,7 +858,6 @@ simplify_unary_operation_1 (enum rtx_cod
/* (not (ashiftrt foo C)) where C is the number of bits in FOO
minus 1 is (ge foo (const_int 0)) if STORE_FLAG_VALUE is -1,
so we can perform the above simplification. */
-
if (STORE_FLAG_VALUE == -1
&& GET_CODE (op) == ASHIFTRT
&& GET_CODE (XEXP (op, 1))
@@ -890,7 +889,6 @@ simplify_unary_operation_1 (enum rtx_cod
with negating logical insns (and-not, nand, etc.). If result has
only one NOT, put it first, since that is how the patterns are
coded. */
-
if (GET_CODE (op) == IOR || GET_CODE (op) == AND)
{
rtx in1 = XEXP (op, 0), in2 = XEXP (op, 1);
@@ -913,6 +911,13 @@ simplify_unary_operation_1 (enum rtx_cod
return gen_rtx_fmt_ee (GET_CODE (op) == IOR ? AND : IOR,
mode, in1, in2);
}
+
+ /* (not (bswap x)) -> (bswap (not x)). */
+ if (GET_CODE (op) == BSWAP)
+ {
+ rtx x = simplify_gen_unary (NOT, mode, XEXP (op, 0), mode);
+ return simplify_gen_unary (BSWAP, mode, x, mode);
+ }
break;
case NEG:
@@ -2050,6 +2055,36 @@ simplify_const_unary_operation (enum rtx
return NULL_RTX;
}
+/* Subroutine of simplify_binary_operation to simplify a binary operation
+ CODE that can commute with byte swapping, with result mode MODE and
+ operating on OP0 and OP1. CODE is currently one of AND, IOR or XOR.
+ Return zero if no simplification or canonicalization is possible. */
+
+static rtx
+simplify_byte_swapping_operation (enum rtx_code code, enum machine_mode mode,
+ rtx op0, rtx op1)
+{
+ rtx tem;
+
+ /* (op (bswap x) C1)) -> (bswap (op x C2)) with C2 swapped. */
+ if (GET_CODE (op0) == BSWAP
+ && (CONST_INT_P (op1) || CONST_DOUBLE_AS_INT_P (op1)))
+ {
+ tem = simplify_gen_binary (code, mode, XEXP (op0, 0),
+ simplify_gen_unary (BSWAP, mode, op1, mode));
+ return simplify_gen_unary (BSWAP, mode, tem, mode);
+ }
+
+ /* (op (bswap x) (bswap y)) -> (bswap (op x y)). */
+ if (GET_CODE (op0) == BSWAP && GET_CODE (op1) == BSWAP)
+ {
+ tem = simplify_gen_binary (code, mode, XEXP (op0, 0), XEXP (op1, 0));
+ return simplify_gen_unary (BSWAP, mode, tem, mode);
+ }
+
+ return NULL_RTX;
+}
+
/* Subroutine of simplify_binary_operation to simplify a commutative,
associative binary operation CODE with result mode MODE, operating
on OP0 and OP1. CODE is currently one of PLUS, MULT, AND, IOR, XOR,
@@ -2791,6 +2826,10 @@ simplify_binary_operation_1 (enum rtx_co
XEXP (op0, 1));
}
+ tem = simplify_byte_swapping_operation (code, mode, op0, op1);
+ if (tem)
+ return tem;
+
tem = simplify_associative_operation (code, mode, op0, op1);
if (tem)
return tem;
@@ -2934,6 +2973,10 @@ simplify_binary_operation_1 (enum rtx_co
&& (reversed = reversed_comparison (op0, mode)))
return reversed;
+ tem = simplify_byte_swapping_operation (code, mode, op0, op1);
+ if (tem)
+ return tem;
+
tem = simplify_associative_operation (code, mode, op0, op1);
if (tem)
return tem;
@@ -3116,6 +3159,10 @@ simplify_binary_operation_1 (enum rtx_co
&& op1 == XEXP (XEXP (op0, 0), 0))
return simplify_gen_binary (AND, mode, op1, XEXP (op0, 1));
+ tem = simplify_byte_swapping_operation (code, mode, op0, op1);
+ if (tem)
+ return tem;
+
tem = simplify_associative_operation (code, mode, op0, op1);
if (tem)
return tem;
@@ -4764,6 +4811,21 @@ simplify_relational_operation_1 (enum rt
simplify_gen_binary (XOR, cmp_mode,
XEXP (op0, 1), op1));
+ /* (eq/ne (bswap x) C1) simplifies to (eq/ne x C2) with C2 swapped. */
+ if ((code == EQ || code == NE)
+ && GET_CODE (op0) == BSWAP
+ && (CONST_INT_P (op1) || CONST_DOUBLE_AS_INT_P (op1)))
+ return simplify_gen_relational (code, mode, cmp_mode, XEXP (op0, 0),
+ simplify_gen_unary (BSWAP, cmp_mode,
+ op1, cmp_mode));
+
+ /* (eq/ne (bswap x) (bswap y)) simplifies to (eq/ne x y). */
+ if ((code == EQ || code == NE)
+ && GET_CODE (op0) == BSWAP
+ && GET_CODE (op1) == BSWAP)
+ return simplify_gen_relational (code, mode, cmp_mode,
+ XEXP (op0, 0), XEXP (op1, 0));
+
if (op0code == POPCOUNT && op1 == const0_rtx)
switch (code)
{
/* { dg-do compile { target arm*-*-* alpha*-*-* ia64*-*-* x86_64-*-* s390x-*-* powerpc*-*-* rs6000-*-* } } */
/* { dg-require-effective-target stdint_types } */
/* { dg-options "-O -fdump-rtl-combine" } */
#include <stdint.h>
#define BS(X) __builtin_bswap32(X)
int foo1 (uint32_t a)
{
if (BS (a) == 0xA0000)
return 1;
return 0;
}
int foo2 (uint32_t a)
{
if (BS (a) != 0xA0000)
return 1;
return 0;
}
int foo3 (uint32_t a, uint32_t b)
{
if (BS (a) == BS (b))
return 1;
return 0;
}
int foo4 (uint32_t a, uint32_t b)
{
if (BS (a) != BS (b))
return 1;
return 0;
}
/* { dg-final { scan-rtl-dump-not "bswapsi" "combine" } } */
/* { dg-final { cleanup-rtl-dump "combine" } } */
/* { dg-do compile { target arm*-*-* alpha*-*-* ia64*-*-* x86_64-*-* s390x-*-* powerpc*-*-* rs6000-*-* } } */
/* { dg-require-effective-target stdint_types } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O -fdump-rtl-combine" } */
#include <stdint.h>
#define BS(X) __builtin_bswap64(X)
int foo1 (uint64_t a)
{
if (BS (a) == 0xA00000000)
return 1;
return 0;
}
int foo2 (uint64_t a)
{
if (BS (a) != 0xA00000000)
return 1;
return 0;
}
int foo3 (uint64_t a, uint64_t b)
{
if (BS (a) == BS (b))
return 1;
return 0;
}
int foo4 (uint64_t a, uint64_t b)
{
if (BS (a) != BS (b))
return 1;
return 0;
}
/* { dg-final { scan-rtl-dump-not "bswapdi" "combine" } } */
/* { dg-final { cleanup-rtl-dump "combine" } } */
/* { dg-do compile { target arm*-*-* alpha*-*-* ia64*-*-* x86_64-*-* s390x-*-* powerpc*-*-* rs6000-*-* } } */
/* { dg-require-effective-target stdint_types } */
/* { dg-options "-O -fdump-rtl-combine" } */
#include <stdint.h>
#define BS(X) __builtin_bswap32(X)
uint32_t foo1 (uint32_t a)
{
return BS (~ BS (a));
}
uint32_t foo2 (uint32_t a)
{
return BS (BS (a) & 0xA0000);
}
uint32_t foo3 (uint32_t a)
{
return BS (BS (a) | 0xA0000);
}
uint32_t foo4 (uint32_t a)
{
return BS (BS (a) ^ 0xA0000);
}
uint32_t foo5 (uint32_t a, uint32_t b)
{
return BS (BS (a) & BS (b));
}
uint32_t foo6 (uint32_t a, uint32_t b)
{
return BS (BS (a) | BS (b));
}
uint32_t foo7 (uint32_t a, uint32_t b)
{
return BS (BS (a) ^ BS (b));
}
/* { dg-final { scan-rtl-dump-not "bswapsi" "combine" } } */
/* { dg-final { cleanup-rtl-dump "combine" } } */
/* { dg-do compile { target arm*-*-* alpha*-*-* ia64*-*-* x86_64-*-* s390x-*-* powerpc*-*-* rs6000-*-* } } */
/* { dg-require-effective-target stdint_types } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O -fdump-rtl-combine" } */
#include <stdint.h>
#define BS(X) __builtin_bswap64(X)
uint64_t foo1 (uint64_t a)
{
return BS (~ BS (a));
}
uint64_t foo2 (uint64_t a)
{
return BS (BS (a) & 0xA00000000);
}
uint64_t foo3 (uint64_t a)
{
return BS (BS (a) | 0xA00000000);
}
uint64_t foo4 (uint64_t a)
{
return BS (BS (a) ^ 0xA00000000);
}
uint64_t foo5 (uint64_t a, uint64_t b)
{
return BS (BS (a) & BS (b));
}
uint64_t foo6 (uint64_t a, uint64_t b)
{
return BS (BS (a) | BS (b));
}
uint64_t foo7 (uint64_t a, uint64_t b)
{
return BS (BS (a) ^ BS (b));
}
/* { dg-final { scan-rtl-dump-not "bswapdi" "combine" } } */
/* { dg-final { cleanup-rtl-dump "combine" } } */