Re: [PATCH, RTX]: Additional fix for PR 57003

2014-10-08 Thread Jeff Law

On 10/03/14 11:54, Uros Bizjak wrote:

Hello!

My r215428 change exposed another PR 57003 problem on x86_64. When
compiling gcc.target/i386/pr57003.c we refer to clobbered %rdi
register after the call to memcpy:

--- pr57003.s   2014-10-03 15:08:24.0 +0200
+++ pr57003_.s  2014-10-03 15:08:19.0 +0200
@@ -78,7 +78,7 @@
 leaq-20(%rbx), %rdx
 movq%rax, %rdi
 callmemcpy
-   movq%rdi, c(%rip)
+   movq%rax, c(%rip)
  .L8:
 movaps  (%rsp), %xmm6
 movaps  16(%rsp), %xmm7
@@ -321,5 +321,5 @@
 .byte   0xb
 .align 8
  .LEFDE7:
-   .ident  "GCC: (GNU) 5.0.0 20141002 (experimental) [trunk
revision 215797]"
+   .ident  "GCC: (GNU) 4.9.2 20141001 (prerelease)
[gcc-4_9-branch revision 215749]"
 .section.note.GNU-stack,"",@progbits

The runtime failure happens only on CentOS5 (and not in Fedora20),
which supports findings in Comment #17 of the PR.

The difference is, that now we emit memcpy for MS_ABI->ELF_ABI
cross-ABI call as:

#(call_insn:TI 24 23 27 3 (set (reg:DI 0 ax)
#(call (mem:QI (symbol_ref:DI ("memcpy") [flags 0x41]
) [0 memcpy S1 A8])
#(const_int 0 [0]))) pr57003.c:32 661 {*call_value}
# (expr_list:REG_DEAD (reg:DI 5 di)
#(expr_list:REG_DEAD (reg:DI 4 si)
#(expr_list:REG_DEAD (reg:DI 1 dx)
#(expr_list:REG_UNUSED (reg:DI 0 ax)
#(expr_list:REG_RETURNED (reg/v/f:DI 2 cx [orig:87 e ] [87])
#(expr_list:REG_CALL_DECL (symbol_ref:DI
("memcpy") [flags 0x41] )
#(expr_list:REG_EH_REGION (const_int 0 [0])
#(nil
#(expr_list (clobber (reg:TI 52 xmm15))
#(expr_list (clobber (reg:TI 51 xmm14))
#(expr_list (clobber (reg:TI 50 xmm13))
#(expr_list (clobber (reg:TI 49 xmm12))
#(expr_list (clobber (reg:TI 48 xmm11))
#(expr_list (clobber (reg:TI 47 xmm10))
#(expr_list (clobber (reg:TI 46 xmm9))
#(expr_list (clobber (reg:TI 45 xmm8))
#(expr_list (clobber (reg:TI 28 xmm7))
#(expr_list (clobber (reg:TI 27 xmm6))
#(expr_list (clobber (reg:DI 5 di))
#(expr_list (clobber
(reg:DI 4 si))
#(expr_list:DI
(set (reg:DI 0 ax)
#(reg:DI 5 di))
#(expr_list:DI
(use (reg:DI 5 di))
#
(expr_list:DI (use (reg:DI 4 si))
#
(expr_list:DI (use (reg:DI 1 dx))
#
(nil))

which is alternate, but equivalent form of what was generated previously:

#(call_insn:TI 24 23 27 3 (parallel [
#(set (reg:DI 0 ax)
#(call (mem:QI (symbol_ref:DI ("memcpy") [flags 0x41]
) [0 memcpy S1 A8])
#(const_int 0 [0])))
#(unspec [
#(const_int 0 [0])
#] UNSPEC_MS_TO_SYSV_CALL)
#(clobber (reg:DI 4 si))
#(clobber (reg:DI 5 di))
#(clobber (reg:TI 27 xmm6))
#(clobber (reg:TI 28 xmm7))
#(clobber (reg:TI 45 xmm8))
#(clobber (reg:TI 46 xmm9))
#(clobber (reg:TI 47 xmm10))
#(clobber (reg:TI 48 xmm11))
#(clobber (reg:TI 49 xmm12))
#(clobber (reg:TI 50 xmm13))
#(clobber (reg:TI 51 xmm14))
#(clobber (reg:TI 52 xmm15))
#]) pr57003.c:32 652 {*call_value_rex64_ms_sysv}
# (expr_list:REG_DEAD (reg:DI 5 di)
#(expr_list:REG_DEAD (reg:DI 4 si)
#(expr_list:REG_DEAD (reg:DI 1 dx)
#(expr_list:REG_RETURNED (reg/v/f:DI 2 cx [orig:87 e ] [87])
#(expr_list:REG_EH_REGION (const_int 0 [0])
#(nil))
#(expr_list:DI (set (reg:DI 0 ax)
#(reg:DI 5 di))
#(expr_list:DI (use (reg:DI 5 di))
#(expr_list:DI (use (reg:DI 4 si))
#(expr_list:DI (use (reg:DI 1 dx))
#(nil))

It looks that Jakub's patch, proposed in Comment #21 doesn't cover
alternative form, so it doesn't record clobbers properly.

Attached patch fixes this omission.

2014-10-03  Uros Bizjak  

 PR rtl-optimization/57003
 * regcprop.c (copyprop_hardreg_forward_1): If ksvd.ignore_set_reg,
 also check CALL_INSN_FUNCTION_USAGE for clobbers again after
 killing regs_invalidated_by_call.

Tested on x86_64-linux-gnu {,-m32}.

OK for mainline and release branches?

OK.  Sorry for the delay, I missed this completely.

jeff



[PATCH, RTX]: Additional fix for PR 57003

2014-10-03 Thread Uros Bizjak
Hello!

My r215428 change exposed another PR 57003 problem on x86_64. When
compiling gcc.target/i386/pr57003.c we refer to clobbered %rdi
register after the call to memcpy:

--- pr57003.s   2014-10-03 15:08:24.0 +0200
+++ pr57003_.s  2014-10-03 15:08:19.0 +0200
@@ -78,7 +78,7 @@
leaq-20(%rbx), %rdx
movq%rax, %rdi
callmemcpy
-   movq%rdi, c(%rip)
+   movq%rax, c(%rip)
 .L8:
movaps  (%rsp), %xmm6
movaps  16(%rsp), %xmm7
@@ -321,5 +321,5 @@
.byte   0xb
.align 8
 .LEFDE7:
-   .ident  "GCC: (GNU) 5.0.0 20141002 (experimental) [trunk
revision 215797]"
+   .ident  "GCC: (GNU) 4.9.2 20141001 (prerelease)
[gcc-4_9-branch revision 215749]"
.section.note.GNU-stack,"",@progbits

The runtime failure happens only on CentOS5 (and not in Fedora20),
which supports findings in Comment #17 of the PR.

The difference is, that now we emit memcpy for MS_ABI->ELF_ABI
cross-ABI call as:

#(call_insn:TI 24 23 27 3 (set (reg:DI 0 ax)
#(call (mem:QI (symbol_ref:DI ("memcpy") [flags 0x41]
) [0 memcpy S1 A8])
#(const_int 0 [0]))) pr57003.c:32 661 {*call_value}
# (expr_list:REG_DEAD (reg:DI 5 di)
#(expr_list:REG_DEAD (reg:DI 4 si)
#(expr_list:REG_DEAD (reg:DI 1 dx)
#(expr_list:REG_UNUSED (reg:DI 0 ax)
#(expr_list:REG_RETURNED (reg/v/f:DI 2 cx [orig:87 e ] [87])
#(expr_list:REG_CALL_DECL (symbol_ref:DI
("memcpy") [flags 0x41] )
#(expr_list:REG_EH_REGION (const_int 0 [0])
#(nil
#(expr_list (clobber (reg:TI 52 xmm15))
#(expr_list (clobber (reg:TI 51 xmm14))
#(expr_list (clobber (reg:TI 50 xmm13))
#(expr_list (clobber (reg:TI 49 xmm12))
#(expr_list (clobber (reg:TI 48 xmm11))
#(expr_list (clobber (reg:TI 47 xmm10))
#(expr_list (clobber (reg:TI 46 xmm9))
#(expr_list (clobber (reg:TI 45 xmm8))
#(expr_list (clobber (reg:TI 28 xmm7))
#(expr_list (clobber (reg:TI 27 xmm6))
#(expr_list (clobber (reg:DI 5 di))
#(expr_list (clobber
(reg:DI 4 si))
#(expr_list:DI
(set (reg:DI 0 ax)
#(reg:DI 5 di))
#(expr_list:DI
(use (reg:DI 5 di))
#
(expr_list:DI (use (reg:DI 4 si))
#
(expr_list:DI (use (reg:DI 1 dx))
#
(nil))

which is alternate, but equivalent form of what was generated previously:

#(call_insn:TI 24 23 27 3 (parallel [
#(set (reg:DI 0 ax)
#(call (mem:QI (symbol_ref:DI ("memcpy") [flags 0x41]
) [0 memcpy S1 A8])
#(const_int 0 [0])))
#(unspec [
#(const_int 0 [0])
#] UNSPEC_MS_TO_SYSV_CALL)
#(clobber (reg:DI 4 si))
#(clobber (reg:DI 5 di))
#(clobber (reg:TI 27 xmm6))
#(clobber (reg:TI 28 xmm7))
#(clobber (reg:TI 45 xmm8))
#(clobber (reg:TI 46 xmm9))
#(clobber (reg:TI 47 xmm10))
#(clobber (reg:TI 48 xmm11))
#(clobber (reg:TI 49 xmm12))
#(clobber (reg:TI 50 xmm13))
#(clobber (reg:TI 51 xmm14))
#(clobber (reg:TI 52 xmm15))
#]) pr57003.c:32 652 {*call_value_rex64_ms_sysv}
# (expr_list:REG_DEAD (reg:DI 5 di)
#(expr_list:REG_DEAD (reg:DI 4 si)
#(expr_list:REG_DEAD (reg:DI 1 dx)
#(expr_list:REG_RETURNED (reg/v/f:DI 2 cx [orig:87 e ] [87])
#(expr_list:REG_EH_REGION (const_int 0 [0])
#(nil))
#(expr_list:DI (set (reg:DI 0 ax)
#(reg:DI 5 di))
#(expr_list:DI (use (reg:DI 5 di))
#(expr_list:DI (use (reg:DI 4 si))
#(expr_list:DI (use (reg:DI 1 dx))
#(nil))

It looks that Jakub's patch, proposed in Comment #21 doesn't cover
alternative form, so it doesn't record clobbers properly.

Attached patch fixes this omission.

2014-10-03  Uros Bizjak  

PR rtl-optimization/57003
* regcprop.c (copyprop_hardreg_forward_1): If ksvd.ignore_set_reg,
also check CALL_INSN_FUNCTION_USAGE for clobbers again after
killing regs_invalidated_by_call.

Tested on x86_64-linux-gnu {,-m32}.

OK for mainline and release branches?

Uros.
Index: regcprop.c
===
--- regcprop.c  (revision 215861)
+++ regcprop.c  (working copy)
@@ -1029,7 +1029,17 @@ copyprop_hardreg_forward_1 (basic_block bb, struct
 but instead among CLOBB