Hi Richard,

When detecting a 16-bit hand-crafted byte swap, the bswap pass replaces the 
sequence of statements by a rotation left of 8 bits of the same type as the 
source. For source whose type size is bigger than 16 bits this is wrong. For 
instance,

int swap(int x)
{
return (unsigned short) ((unsigned short) x << 8 | (unsigned short) x >> 8);
}

is optimized incorrectly. This patch select the type of the rotation to be 16 
bits in size and with the same sign as the source, doing a conversation if 
necessary before doing the rotation.

ChangeLog entries are as follows:

*** gcc/ChangeLog ***

2015-01-22  Thomas Preud'homme  <thomas.preudho...@arm.com>

    PR tree-optimization/64718
    * tree-ssa-math-opts.c (bswap_replace): Make bswap_type be a short
    type of same sign as src and convert src to that type if necessary for
    all bswap sizes. Fix rotation right notation in nearby comment.


*** gcc/testsuite/ChangeLog ***

2015-01-22  Thomas Preud'homme  <thomas.preudho...@arm.com>

    PR tree-optimization/64718
    * gcc.c-torture/execute/pr64718.c: New test.


diff --git a/gcc/testsuite/gcc.c-torture/execute/pr64718.c 
b/gcc/testsuite/gcc.c-torture/execute/pr64718.c
new file mode 100644
index 0000000..58773e0
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr64718.c
@@ -0,0 +1,18 @@
+static int __attribute__ ((noinline, noclone))
+swap (int x)
+{
+  return (unsigned short) ((unsigned short) x << 8 | (unsigned short) x >> 8);
+}
+
+static int a = 0x1234;
+
+int
+main (void)
+{
+  int b = 0x1234;
+  if (swap (a) != 0x3412)
+    __builtin_abort ();
+  if (swap (b) != 0x3412)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 479f49f..0ff8040 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2355,30 +2355,32 @@ bswap_replace (gimple cur_stmt, gimple src_stmt, tree 
fndecl, tree bswap_type,
 
   tmp = src;
 
+  if (bswap && n->range == 16)
+    bswap_type = TYPE_UNSIGNED (TREE_TYPE (src)) ? short_unsigned_type_node
+                                                : short_integer_type_node;
+
+  /* Convert the src expression if necessary.  */
+  if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
+    {
+      gimple convert_stmt;
+
+      tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
+      convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
+      gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
+    }
+
   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
      are considered as rotation of 2N bit values by N bits is generally not
-     equivalent to a bswap.  Consider for instance 0x01020304 >> 16 which gives
-     0x03040102 while a bswap for that value is 0x04030201.  */
+     equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
+     gives 0x03040102 while a bswap for that value is 0x04030201.  */
   if (bswap && n->range == 16)
     {
       tree count = build_int_cst (NULL, BITS_PER_UNIT);
-      bswap_type = TREE_TYPE (src);
-      src = fold_build2 (LROTATE_EXPR, bswap_type, src, count);
+      src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
       bswap_stmt = gimple_build_assign (NULL, src);
     }
   else
-    {
-      /* Convert the src expression if necessary.  */
-      if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
-       {
-         gimple convert_stmt;
-         tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
-         convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
-         gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
-       }
-
-      bswap_stmt = gimple_build_call (fndecl, 1, tmp);
-    }
+    bswap_stmt = gimple_build_call (fndecl, 1, tmp);
 
   tmp = tgt;
 
@@ -2386,6 +2388,7 @@ bswap_replace (gimple cur_stmt, gimple src_stmt, tree 
fndecl, tree bswap_type,
   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
     {
       gimple convert_stmt;
+
       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);


Testing done:

 *  Bootstrapped on x86_64 and no regression were found when running the 
testsuite.
  * Equally, an arm-none-eabi GCC cross-compiler was build and no regression 
were found when running the testsuite on QEMU emulating a Cortex-M3

Is this ok for trunk?

Best regards,

Thomas



Reply via email to