[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-03-16 Thread fredrik dot huss at home dot se

--- Additional Comments From fredrik dot huss at home dot se  2005-03-16 
17:25 ---
I see similar problems with real by complex multiplications in C++. 
The following test program, 
 
#include complex 
 
std::complexdouble a, b; 
double c; 
 
void f() 
{ 
 a = b * c; 
} 
 
when compiled with g++ -O3 -march=pentium4 -mfpmath=sse -S -c test.C, gives 
this result (from test.s): 
 
_Z1fv: 
.LFB1857: 
 pushl %ebp 
.LCFI0: 
 movl %esp, %ebp 
.LCFI1: 
 movsd b+8, %xmm3 
 movsd b, %xmm2 
 movsd c, %xmm4 
 pxor %xmm5, %xmm5 
 movapd %xmm2, %xmm0 
 mulsd %xmm5, %xmm0 
 movapd %xmm4, %xmm1 
 mulsd %xmm3, %xmm1 
 addsd %xmm1, %xmm0 
 movsd %xmm0, a+8 
 mulsd %xmm4, %xmm2 
 mulsd %xmm5, %xmm3 
 subsd %xmm3, %xmm2 
 movsd %xmm2, a 
 popl %ebp 
 ret 
 
I.e, the real value c is still converted to a complex value and a full 
multiplication is done. 
 
I'm using the following version of gcc: 
 
Using built-in specs. 
Target: i686-pc-linux-gnu 
Configured with: ../gcc-4.0-20050312/configure --prefix=/home/fredrik/gcc 
--enable-languages=c,c++ 
Thread model: posix 
gcc version 4.0.0 20050312 (prerelease) 
 

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-03-01 Thread Thomas dot Koenig at online dot de

--- Additional Comments From Thomas dot Koenig at online dot de  2005-03-01 
15:43 ---
(In reply to comment #14)
 (In reply to comment #13)
  (In reply to comment #11)
 I get the same as I got above with the following version on x86:
 GNU C version 4.0.0 20050225 (experimental) (i686-pc-linux-gnu)
 compiled by GNU C version 4.0.0 20050225 (experimental).
 GGC heuristics: --param ggc-min-expand=30 --param ggc-min-heapsize=4096

 And no local patches which could cause this.

I can only assume that this has regressed, that this is a
little-endian problem (why it should be so is beyond me, though),
that your specific vibes make this go away or that mine make it
appear :-)

I have just done the following:

- Downloaded the 4.1 20050227 snapshot onto a ia-64 Linux box
- untarred it
$ mkdir gcc-bin
$ cd gcc-bin/
$ ../gcc-4.1-20050227/configure --prefix=$HOME --enable-languages=c,f95
$ make -j2 bootstrap
$ make install

Then, I get:
$ gcc -v
Using built-in specs.
Target: ia64-unknown-linux-gnu
Configured with: ../gcc-4.1-20050227/configure --prefix=/home/zfkts
--enable-languages=c,f95
Thread model: posix
gcc version 4.1.0 20050227 (experimental)
$ cat c-div.c
#include math.h
#include complex.h

int main()
{
float a;
complex float b,c;
foo(a,b);
c = b/a;
return creal(c) + cimag(c)  0;
}

$ gcc -O3 -fdump-tree-optimized  -S  c-div.c
$ cat c-div.c.t65.optimized

;; Function main (main)

Analyzing Edge Insertions.
main ()
{
  float SR.12;
  float SR.11;
  float SR.10;
  float SR.9;
  float c$imag;
  float c$real;
  float SR.6;
  float SR.5;
  float SR.4;
  float SR.3;
  float D.2255;
  float D.2254;
  float D.2253;
  float D.2252;
  float D.2251;
  float D.2250;
  float D.2249;
  float D.2248;
  float D.2247;
  float D.2246;
  float D.2245;
  float D.2244;
  float D.2243;
  float D.2242;
  float D.2241;
  float D.2240;
  float D.2239;
  float D.2238;
  float D.2237;
  float D.2236;
  float D.2233;
  float D.2232;
  float D.2231;
  float D.2230;
  float D.2229;
  float D.2228;
  complex float c;
  complex float b;
  float a;
  double D.2225;
  double D.2224;
  float D.2223;
  double D.;
  float D.2221;
  complex float c.2;
  complex float c.1;
  int D.2218;
  complex float D.2217;
  complex float D.2216;
  float a.0;

bb 0:
  foo (a, b);
  SR.4 = a;
  D.2228 = REALPART_EXPR b;
  D.2229 = IMAGPART_EXPR b;
  if (ABS_EXPR SR.4  0.0) goto L1; else goto L2;

L1:;
  D.2238 = SR.4 / 0.0;
  D.2240 = SR.4 * D.2238 + 0.0;
  c$real = (D.2229 + D.2228 * D.2238) / D.2240;
  c$imag = (D.2229 * D.2238 - D.2228) / D.2240;
  goto bb 3;

L2:;
  D.2247 = 0.0 / SR.4;
  D.2249 = SR.4 + D.2247 * 0.0;
  c$real = (D.2228 + D.2229 * D.2247) / D.2249;
  c$imag = (D.2229 - D.2228 * D.2247) / D.2249;

bb 3:
  return (double) c$real + (double) c$imag  0.0;

}


Anything more I can do to test this?

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-03-01 Thread pinskia at gcc dot gnu dot org

--- Additional Comments From pinskia at gcc dot gnu dot org  2005-03-01 
17:13 ---
(In reply to comment #15)
 I can only assume that this has regressed, that this is a
 little-endian problem (why it should be so is beyond me, though),
 that your specific vibes make this go away or that mine make it
 appear :-)
No because I tested on ppc-darwin also and got the same thing, -O3 -std=c99 
enables 
flag_complex_method=2 IIRC.  Also -ffast-math produces slightly different 
result but if you look at the 
final asm output you will see that the branch was removed, as I said before the 
branch is false always 
but that is a different bug which is already filed.

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-03-01 Thread Thomas dot Koenig at online dot de

--- Additional Comments From Thomas dot Koenig at online dot de  2005-03-01 
21:07 ---
Andrew,

I'm sorry if I'm not making myself clear here.

The problem that I see is that, on ia64-unknown-linux-gnu and on
i386-pc-linux-gnu, with clean trees, I see code like

L2:;
  D.2390 = 0.0 / SR.22;
  D.2392 = SR.22 + D.2390 * 0.0;
  c$real = (D.2371 + D.2372 * D.2390) / D.2392;
  c$imag = (D.2372 - D.2371 * D.2390) / D.2392;

in *.t65.optimized for the simple test case with -O1 and -O3. As you
have stated, this is independent of PR 20139.

I just rechecked this with the 4.0.0 20050226 (prerelease) snapshot.
You have posted different results, which I cannot reproduce.

Something has to be the cause of this difference, but I have no
real idea what it could be.

Is the *.t65.optimized that I am looking at the correct file?

Is there any patch in your tree that may be the cause of these
of these different results after all?

What version are you using, exactly?  How can I download the exact
version from cvs?

Can this be caused by header files?  I think that this is highly
unlikely, this is why I didn't include the preprocessed source, but
I can do so, of course.

Is there anything else that I can do to clear this up?

Thomas

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-03-01 Thread pinskia at gcc dot gnu dot org

--- Additional Comments From pinskia at gcc dot gnu dot org  2005-03-01 
21:14 ---
(In reply to comment #17)
 Andrew,
 
 I'm sorry if I'm not making myself clear here.
 
 The problem that I see is that, on ia64-unknown-linux-gnu and on
 i386-pc-linux-gnu, with clean trees, I see code like
 
 L2:;
   D.2390 = 0.0 / SR.22;
   D.2392 = SR.22 + D.2390 * 0.0;
   c$real = (D.2371 + D.2372 * D.2390) / D.2392;
   c$imag = (D.2372 - D.2371 * D.2390) / D.2392;
 
 in *.t65.optimized for the simple test case with -O1 and -O3. As you
 have stated, this is independent of PR 20139.

Yes that code is correct. as 0.0/SR.22 is not 0.0 if SR.22 is NAN.
and 0.0 * D.2390 is not 0.0 if D.2390 is NAN.

Try -ffast-math or -fno-trapping-math or -ffinite-math-only.

 I just rechecked this with the 4.0.0 20050226 (prerelease) snapshot.
 You have posted different results, which I cannot reproduce.

Yes I posted results with -ffast-math and other options as you described in 
comment #8:
 $ gcc -ffast-math -O3 -fdump-tree-optimized -fno-cx-limited-range -S  c-div.c 

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-03-01 Thread Thomas dot Koenig at online dot de

--- Additional Comments From Thomas dot Koenig at online dot de  2005-03-01 
21:26 ---
(In reply to comment #18)

  L2:;
D.2390 = 0.0 / SR.22;
D.2392 = SR.22 + D.2390 * 0.0;
c$real = (D.2371 + D.2372 * D.2390) / D.2392;
c$imag = (D.2372 - D.2371 * D.2390) / D.2392;
  
  in *.t65.optimized for the simple test case with -O1 and -O3. As you
  have stated, this is independent of PR 20139.
 
 Yes that code is correct. as 0.0/SR.22 is not 0.0 if SR.22 is NAN.
 and 0.0 * D.2390 is not 0.0 if D.2390 is NAN.

Ok, then I have misunderstood you - you were referring to the
results with -ffast-math.

However, there still is a missed optimization here.

If SR.22 is NaN, then the proposed simplification

c$real = D.2371 / SR.22;
c$imag = D.2372 / SR.22

would still yield NaN for c$real and c$imag, which is correct.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-02-28 Thread Thomas dot Koenig at online dot de

--- Additional Comments From Thomas dot Koenig at online dot de  2005-02-28 
20:55 ---
Comment #7 shows that there is still something to be done
for (br+I*bi)/a (with real br, bi, a).  This could be
simplified to br/a + I*bi/a, which isn't happening.

Thomas

-- 
   What|Removed |Added

 Status|RESOLVED|REOPENED
 Resolution|FIXED   |


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-02-28 Thread Thomas dot Koenig at online dot de

--- Additional Comments From Thomas dot Koenig at online dot de  2005-02-28 
20:55 ---
What I meant was comment#8 *sigh*

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-02-28 Thread pinskia at gcc dot gnu dot org

--- Additional Comments From pinskia at gcc dot gnu dot org  2005-02-28 
21:36 ---
For me I get:
  D.1542 = COMPLEX_EXPR REALPART_EXPR b / SR.4, IMAGPART_EXPR b / SR.4;
  D.1541 = D.1542;
  D.1500 = D.1541;
  return (double) REALPART_EXPR D.1500 + (double) IMAGPART_EXPR D.1500  
0.0;

The only problem with this is we don't do SRA or complex propagation so we 
don't do have just the real 
and imag parts seperated. 

Are you sure that you are doing the correct thing?

Without -fno-cx-limited-range I get:
  SR.4 = a;
  D.1529 = __builtin_powf (SR.4, 2.0e+0);
  return (double) (SR.4 * REALPART_EXPR b / D.1529) + (double) (SR.4 * 
IMAGPART_EXPR b / 
D.1529)  0.0;

Now without -std=c99 -fno-cx-limited-range and -ffast-math, I get what you got 
there is one thing 
which can be optimizated out at the tree level but that would be PR 20139.
Now -ffast-math I get:
  SR.4 = a;
  D.1612 = __builtin_powf (SR.4, 2.0e+0);
  return (double) (SR.4 * REALPART_EXPR b / D.1612) + (double) (SR.4 * 
IMAGPART_EXPR b / 
D.1612)  0.0;

these all look fine to me.

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-02-28 Thread pinskia at gcc dot gnu dot org

--- Additional Comments From pinskia at gcc dot gnu dot org  2005-02-28 
21:38 ---
So after my investigation says this is fixed so closing as such.

-- 
   What|Removed |Added

 Status|REOPENED|RESOLVED
 Resolution||FIXED
   Target Milestone|--- |4.0.0


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-02-28 Thread pinskia at gcc dot gnu dot org

--- Additional Comments From pinskia at gcc dot gnu dot org  2005-02-28 
23:02 ---
(In reply to comment #13)
 (In reply to comment #11)
I get the same as I got above with the following version on x86:
GNU C version 4.0.0 20050225 (experimental) (i686-pc-linux-gnu)
compiled by GNU C version 4.0.0 20050225 (experimental).
GGC heuristics: --param ggc-min-expand=30 --param ggc-min-heapsize=4096

And no local patches which could cause this.

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-02-27 Thread Thomas dot Koenig at online dot de

--- Additional Comments From Thomas dot Koenig at online dot de  2005-02-27 
12:52 ---
Is this really fixed?

Look at this:
$ cat c-div.c
#include math.h
#include complex.h

int main()
{
float a;
complex float b,c;
foo(a,b);
c = b/a;
return creal(c) + cimag(c)  0;
}
$ gcc -ffast-math -O3 -fdump-tree-optimized -fno-cx-limited-range -S  c-div.c 
$ tail -20 c-div.c.t65.optimized
  if (ABS_EXPR SR.26  0.0) goto L1; else goto L2;

L1:;
  D.3021 = SR.26 *  Inf;
  D.3022 = SR.26 * D.3021;
  c$real = (D.3012 + D.3011 * D.3021) / D.3022;
  c$imag = (D.3012 * D.3021 - D.3011) / D.3022;
  goto bb 3;

L2:;
  D.3030 = 0.0 / SR.26;
  c$real = (D.3011 + D.3012 * D.3030) / SR.26;
  c$imag = (D.3012 - D.3011 * D.3030) / SR.26;

bb 3:
  return (double) c$real + (double) c$imag  0.0;

}

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-02-24 Thread cvs-commit at gcc dot gnu dot org

--- Additional Comments From cvs-commit at gcc dot gnu dot org  2005-02-24 
20:00 ---
Subject: Bug 19953

CVSROOT:/cvs/gcc
Module name:gcc
Changes by: [EMAIL PROTECTED]   2005-02-24 20:00:09

Modified files:
gcc: ChangeLog builtins.c fold-const.c tree.h 
Added files:
gcc/testsuite/gcc.dg/tree-ssa: complex-1.c complex-2.c 

Log message:
PR middle-end/19953
* builtins.c (fold_builtin_complex_mul, fold_builtin_complex_div): New.
(fold_builtin_1): Call them.
* fold-const.c (fold_complex_mult_parts): Split out from ...
(fold_complex_mult): ... here.  Fix typo in both imaginary case.
(fold_complex_div_parts, fold_complex_div): New.
(fold): Use them.
* tree.h (fold_complex_mult_parts, fold_complex_div_parts): Declare.

Patches:
http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/gcc/ChangeLog.diff?cvsroot=gccr1=2.7581r2=2.7582
http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/gcc/builtins.c.diff?cvsroot=gccr1=1.425r2=1.426
http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/gcc/fold-const.c.diff?cvsroot=gccr1=1.516r2=1.517
http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/gcc/tree.h.diff?cvsroot=gccr1=1.692r2=1.693
http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/gcc/testsuite/gcc.dg/tree-ssa/complex-1.c.diff?cvsroot=gccr1=NONEr2=1.1
http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/gcc/testsuite/gcc.dg/tree-ssa/complex-2.c.diff?cvsroot=gccr1=NONEr2=1.1



-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-02-24 Thread rth at gcc dot gnu dot org

--- Additional Comments From rth at gcc dot gnu dot org  2005-02-24 20:04 
---
Fixed.

-- 
   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution||FIXED


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-02-18 Thread rth at gcc dot gnu dot org


-- 
   What|Removed |Added

 AssignedTo|unassigned at gcc dot gnu   |rth at gcc dot gnu dot org
   |dot org |
 Status|NEW |ASSIGNED
   Last reconfirmed|2005-02-14 15:55:36 |2005-02-19 02:41:20
   date||


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-02-15 Thread Thomas dot Koenig at online dot de

--- Additional Comments From Thomas dot Koenig at online dot de  2005-02-15 
10:29 ---

 And in fact this only can happen with -funsafe-math-optimizations (or maybe
with -fno-trapping-
 math because a+0.0 can trap.

Hmm...

if b is complex and has the value (0., signalling NaN) and a is
real with the value 1.0, should a+b trap?  I don't think so, but I'm
open to discussion on that point.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation (-ffast-math)

2005-02-14 Thread pinskia at gcc dot gnu dot org

--- Additional Comments From pinskia at gcc dot gnu dot org  2005-02-15 
06:15 ---
(In reply to comment #3)
 For addition, this is a regression against 3.3.5:
Actually it is not and here is why:
fadds   .LC0

.LC0 is actually zero.

And in fact this only can happen with -funsafe-math-optimizations (or maybe 
with -fno-trapping-
math because a+0.0 can trap.

-- 
   What|Removed |Added

Summary|[4.0 regression] Special-   |Special-case real + complex
   |case real + complex |arithmetic operation (-
   |arithmetic operation|ffast-math)


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953


[Bug middle-end/19953] Special-case real + complex arithmetic operation

2005-02-14 Thread Thomas dot Koenig at online dot de

--- Additional Comments From Thomas dot Koenig at online dot de  2005-02-14 
20:06 ---
Same thing for complex division, where the performance
penalty is probably also pretty severe:


$ cat c-div.c
#include math.h
#include complex.h

int main()
{
float a;
complex float b,c;
foo(a,b);
c = b/a;
return creal(c) + cimag(c)  0;
}
$ gcc -fdump-tree-all-all -O3 -S c-div.c
$ tail -20 c-div.c.t65.optimized
  complex floatD.25 c.20D.2363;
  complex floatD.25 c.19D.2362;
  intD.0 D.2361;
  complex floatD.25 D.2360;
  complex floatD.25 D.2359;
  floatD.21 a.18D.2358;

  # BLOCK 0
  # PRED: ENTRY [100.0%]  (fallthru,exec)
  #   aD.2354_25 = V_MAY_DEF aD.2354_1;
  #   bD.2355_26 = V_MAY_DEF bD.2355_5;
  foo (aD.2354, bD.2355);
  #   D.2360_11 = V_MUST_DEF D.2360_10;
  D.2360 = __divsc3 (REALPART_EXPR bD.2355, IMAGPART_EXPR bD.2355, 
aD.2354,0.0);
  return (doubleD.22) REALPART_EXPR D.2360 + (doubleD.22) IMAGPART_EXPR
D.2360  0.0;
  # SUCC: EXIT [100.0%]

}

Addition has the same problem. Here, a floating point register is
carefully zeroed in order to add something to it:

$ cat c-add.c
#include math.h
#include complex.h

int main()
{
float a;
complex float b,c;
foo(a,b);
c = b+a;
return creal(c) + cimag(c)  0;
}
$ gcc -fdump-tree-all-all -O3 -S c-add.c
$ tail -20 c-add.c.t65.optimized
  doubleD.22 D.2365;
  floatD.21 D.2364;
  complex floatD.25 c.20D.2363;
  complex floatD.25 c.19D.2362;
  intD.0 D.2361;
  complex floatD.25 D.2360;
  complex floatD.25 D.2359;
  floatD.21 a.18D.2358;

  # BLOCK 0
  # PRED: ENTRY [100.0%]  (fallthru,exec)
  #   aD.2354_27 = V_MAY_DEF aD.2354_1;
  #   bD.2355_28 = V_MAY_DEF bD.2355_7;
  foo (aD.2354, bD.2355);
  return (doubleD.22) (aD.2354 + REALPART_EXPR bD.2355) + (doubleD.22)
(IMAGPART_EXPR bD.2355 + 0.0)  0.0;
  # SUCC: EXIT [100.0%]

}


$ tail -20 c-add.s
leal-4(%ebp), %eax
movl%eax, (%esp)
callfoo
flds-12(%ebp)
fldz
fadds   -8(%ebp)
fxch%st(1)
fadds   -4(%ebp)
leave
faddp   %st, %st(1)
fldz
fucompp
fnstsw  %ax
testb   $69, %ah
sete%al
movzbl  %al, %eax
ret
.size   main, .-main
.ident  GCC: (GNU) 4.0.0 20050212 (experimental)
.section.note.GNU-stack,,@progbits

If somebody tackles this, it would also be nice if purely
imaginary numbers were also special-cased.

-- 
   What|Removed |Added

Summary|Special-case real*complex   |Special-case real + complex
   |multiplication for  |arithmetic operation
   |flag_complex_method=2   |


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19953