http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59379

--- Comment #19 from H.J. Lu <hjl.tools at gmail dot com> ---
(In reply to Uroš Bizjak from comment #18)
> I have checked that this patch with the testcase from Comment #9, using "-O
> -march=corei7 -mtune=slm" compile options. The resulting binary worked OK.

Yes, the resulting GCC works correctly.  However, we generate
extra

(set (reg:DI) (zero_extend:DI (reg:SI)))

It is because we generate

(set (reg:SI) (reg:SI)
(set (reg:DI) (zero_extend:DI (reg:SI)))

REE pass doesn't know

(set (reg:SI) (reg:SI)

has an implicit ZERO_EXTEND.  Here is a testcase:

---foo.c---
extern __thread unsigned int __bid_IDEC_glbflags;
typedef unsigned long long UINT64;
typedef __attribute__ ((aligned(16))) struct
{
  UINT64 w[2];
} UINT128;
extern UINT64 __bid64_from_uint64 (UINT64);
extern void __bid_round64_2_18 (int q,
                int x,
                UINT64 C,
                UINT64 * ptr_Cstar,
                int *delta_exp,
                int *ptr_is_midpoint_lt_even,
                int *ptr_is_midpoint_gt_even,
                int *ptr_is_inexact_lt_midpoint,
                int *ptr_is_inexact_gt_midpoint);
extern void __bid_round128_19_38 (int q,
                  int x,
                  UINT128 C,
                  UINT128 * ptr_Cstar,
                  int *delta_exp,
                  int *ptr_is_midpoint_lt_even,
                  int *ptr_is_midpoint_gt_even,
                  int *ptr_is_inexact_lt_midpoint,
                  int *ptr_is_inexact_gt_midpoint);
UINT64
__bid64_from_uint64 (UINT64 x)
{
  UINT64 res;
  UINT128 x128, res128;
  unsigned int q, ind;
  int incr_exp = 0;
  int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
  int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
  if (x <= 0x002386F26FC0ffffull) {
    if (x < 0x0020000000000000ull) {
      res = 0x31c0000000000000ull | x;
    } else {
      res = 0x6c70000000000000ull | (x & 0x0007ffffffffffffull);
    }
  }
  else
    {
      if (x < 0x16345785d8a0000ull) {
    q = 17;
    ind = 1;
      } else if (x < 0xde0b6b3a7640000ull) {
    q = 18;
    ind = 2;
      } else if (x < 0x8ac7230489e80000ull) {
    q = 19;
    ind = 3;
      } else {
    q = 20;
    ind = 4;
      }
      if (q <= 19) {
    __bid_round64_2_18 (
                q, ind, x, &res, &incr_exp,
                &is_midpoint_lt_even, &is_midpoint_gt_even,
                &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
      }
      else {
    x128.w[1] = 0x0;
    x128.w[0] = x;
    __bid_round128_19_38 (q, ind, x128, &res128, &incr_exp,
                  &is_midpoint_lt_even, &is_midpoint_gt_even,
                  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
    res = res128.w[0];
      }
      if (incr_exp)
    ind++;
      if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
      is_midpoint_lt_even || is_midpoint_gt_even)
    *&__bid_IDEC_glbflags |= 0x00000020;
      if (res < 0x0020000000000000ull) {
    res = (((UINT64) ind + 398) << 53) | res;
      } else
    {
      res = 0x6000000000000000ull | (((UINT64) ind + 398) << 51) |
        (res & 0x0007ffffffffffffull);
    }
    }
  return(res);;
}
-----------

Compiling with -fPIC -O2, the differences between your patch and mine
are

--- bad.s    2014-01-19 06:10:28.006570325 -0800
+++ foo.s    2014-01-19 06:11:46.117754696 -0800
@@ -84,19 +84,18 @@ __bid64_from_uint64:
     movabsq    $9007199254740991, %rax
     cmpq    %rax, %rbx
     jbe    .L23
-    movl    %ebp, %edx
     leaq    88(%rsp), %rsp
     .cfi_remember_state
     .cfi_def_cfa_offset 24
     movabsq    $2251799813685247, %rax
-    movl    %edx, %edx
+    movl    %ebp, %edx
     andq    %rbx, %rax
-    movabsq    $6917529027641081856, %rcx
     popq    %rbx
     .cfi_def_cfa_offset 16
+    movabsq    $6917529027641081856, %rcx
     addq    $398, %rdx
-    orq    %rcx, %rax
     salq    $51, %rdx
+    orq    %rcx, %rax
     popq    %rbp
     .cfi_def_cfa_offset 8
     orq    %rdx, %rax
@@ -154,7 +153,6 @@ __bid64_from_uint64:
     leaq    88(%rsp), %rsp
     .cfi_remember_state
     .cfi_def_cfa_offset 24
-    movl    %eax, %eax
     addq    $398, %rax
     salq    $53, %rax
     orq    %rbx, %rax

My patch removes 2 extra

(set (reg:DI) (zero_extend:DI (reg:SI)))

Reply via email to