Hi --

I'm seeing register allocation problems and code size increases
with gcc-4.6.2 (and gcc-head) compared with older (gcc-4.1.2).
Both are compiled using -O3.

One test case that I have has a long series of nested if's
each with the same comparison and similar computation.

        if (n<max_no){
          n+=*(cp-*p++);
          if (n<max_no){
            n+=*(cp-*p);
              if (n<max_no){
                . . .          ~20 levels of nesting
                   <more computations with 'cp' and 'p'>
                . . . }}}

Gcc-4.6.2 generates many blocks like the following:
        lwi     r28,r1,68       -- load into dead reg
        lwi     r31,r1,140      -- load p from stack
        lbui    r28,r31,0
        rsubk   r31,r28,r19
        lbui    r31,r31,0
        addk    r29,r29,r31
        swi     r31,r1,308
        lwi     r31,r1,428      -- load of max_no from stack
        cmp     r28,r31,r29     -- n in r29
        bgeid   r28,$L46

gcc-4.1.2 generates the following:
        lbui    r3,r26,3
        rsubk   r3,r3,r19
        lbui    r3,r3,0
        addk    r30,r30,r3
        swi     r3,r1,80
        cmp     r18,r9,r30      -- max_no in r9, n in r30
        bgei    r18,$L6

gcc-4.6.2 (and gcc-head) load max_no from the stack in each block.
There also are extra loads into r28 (which is not used) and r31 at
the start of each block.  Only r28, r29, and r31 are used.

I'm having a hard time telling what is happening or why.  The
IRA dump has this line:
   Ignoring reg 772, has equiv memory
where pseudo 772 is loaded with max_no early in the function.

The reload dump has
Reloads for insn # 254
Reload 0: reload_in (SI) = (reg/v:SI 722 [ max_no ])
        GR_REGS, RELOAD_FOR_INPUT (opnum = 1)
        reload_in_reg: (reg/v:SI 722 [ max_no ])
        reload_reg_rtx: (reg:SI 31 r31)
and similar for each of the other insns using 722.

This is followed by
  Spilling for insn 254.
  Using reg 31 for reload 0
for each insn using pseudo 722.

Any idea what is going on?

--
Michael Eager    ea...@eagercon.com
1960 Park Blvd., Palo Alto, CA 94306  650-325-8077
#if 0
mb-gcc -O3 -mhard-float -fdump-rtl-all -c s.c -save-temps
#endif
typedef unsigned char uchar;
typedef struct {int x,y,info, dx, dy, I;} CORNER_LIST[15000];
susan_corners(in,r,bp,max_no,corner_list,x_size,y_size)
  uchar *in, *bp;
  int *r, max_no, x_size, y_size;
  CORNER_LIST corner_list;
{
int n,x,y,sq,xx,yy,
      i,j,*cgx,*cgy;
float divide;
uchar c,*p,*cp;

  for (i=5;i<y_size-5;i++)
    for (j=5;j<x_size-5;j++) 
      {
        n=100;
        p=in + (i-3)*x_size + j - 1;
        cp=bp + in[i*x_size+j];

        n+=*(cp-*p++);
        n+=*(cp-*p++);
        n+=*(cp-*p);
        p+=x_size-3;

        n+=*(cp-*p++);
        n+=*(cp-*p++);
        n+=*(cp-*p++);
        n+=*(cp-*p++);
        n+=*(cp-*p);
        p+=x_size-5;

        n+=*(cp-*p++);
        n+=*(cp-*p++);
        n+=*(cp-*p++);
        n+=*(cp-*p++);
        n+=*(cp-*p++);
        n+=*(cp-*p++);
        n+=*(cp-*p);
        p+=x_size-6;

        n+=*(cp-*p++);
        n+=*(cp-*p++);
        n+=*(cp-*p);
      if (n<max_no){
        p+=2;
        n+=*(cp-*p++);
#if 1
        if (n<max_no){
          n+=*(cp-*p++);
          if (n<max_no){
            n+=*(cp-*p);
              if (n<max_no){
                p+=x_size-6;
        	n+=*(cp-*p++);
      		if (n<max_no){
                  n+=*(cp-*p++);
                  if (n<max_no){
                    n+=*(cp-*p++);
                    if (n<max_no){
                      n+=*(cp-*p++);
                      if (n<max_no){
                        n+=*(cp-*p++);
                        if (n<max_no){
                          n+=*(cp-*p++);
                          if (n<max_no){
                            n+=*(cp-*p);
                            if (n<max_no){
        		      p+=x_size-5;
        		      n+=*(cp-*p++);
      			      if (n<max_no){
        			n+=*(cp-*p++);
      				if (n<max_no){
        			  n+=*(cp-*p++);
      			 	  if (n<max_no){
        			    n+=*(cp-*p++);
      			            if (n<max_no){
        			      n+=*(cp-*p);
      				      if (n<max_no){
        			        p+=x_size-3;
        				n+=*(cp-*p++);
#endif
      					if (n<max_no){
        			 	  n+=*(cp-*p++);
      				          if (n<max_no){
        				    n+=*(cp-*p);

        				    if (n<max_no)
        				      {
            					x=0;y=0;
            					p=in + (i-3)*x_size + j - 1;

            					c=*(cp-*p++);x-=c;y-=3*c;
            					c=*(cp-*p++);y-=3*c;
            					c=*(cp-*p);x+=c;y-=3*c;
            					p+=x_size-3;

            					c=*(cp-*p++);x-=2*c;y-=2*c;
            					c=*(cp-*p++);x-=c;y-=2*c;
            					c=*(cp-*p++);y-=2*c;
            					c=*(cp-*p++);x+=c;y-=2*c;
            					c=*(cp-*p);x+=2*c;y-=2*c;
            					p+=x_size-5;

            					c=*(cp-*p++);x-=3*c;y-=c;
            					c=*(cp-*p++);x-=2*c;y-=c;
            					c=*(cp-*p++);x-=c;y-=c;
            					c=*(cp-*p++);y-=c;
            					c=*(cp-*p++);x+=c;y-=c;
            					c=*(cp-*p++);x+=2*c;y-=c;
            					c=*(cp-*p);x+=3*c;y-=c;
            					p+=x_size-6;

            					c=*(cp-*p++);x-=3*c;
            					c=*(cp-*p++);x-=2*c;
            					c=*(cp-*p);x-=c;
            					p+=2;
            					c=*(cp-*p++);x+=c;
            					c=*(cp-*p++);x+=2*c;
            					c=*(cp-*p);x+=3*c;
            					p+=x_size-6;

            					c=*(cp-*p++);x-=3*c;y+=c;
            					c=*(cp-*p++);x-=2*c;y+=c;
            					c=*(cp-*p++);x-=c;y+=c;
            					c=*(cp-*p++);y+=c;
            					c=*(cp-*p++);x+=c;y+=c;
            					c=*(cp-*p++);x+=2*c;y+=c;
            					c=*(cp-*p);x+=3*c;y+=c;
            					p+=x_size-5;

            					c=*(cp-*p++);x-=2*c;y+=2*c;
            					c=*(cp-*p++);x-=c;y+=2*c;
            					c=*(cp-*p++);y+=2*c;
            					c=*(cp-*p++);x+=c;y+=2*c;
            					c=*(cp-*p);x+=2*c;y+=2*c;
            					p+=x_size-3;

            					c=*(cp-*p++);x-=c;y+=3*c;
            					c=*(cp-*p++);y+=3*c;
            					c=*(cp-*p);x+=c;y+=3*c;

            					xx=x*x;
            					yy=y*y;
            					sq=xx+yy;
            					if ( sq > ((n*n)/2) )
            				 	  {
#if 1
              					    if(yy<xx) 
						      {
                					divide=(float)y/(float)abs(x);
                					sq=abs(x)/x;
                					sq=*(cp-in[(i+( (divide) < 0 ? ((int)(divide-0.5)) : ((int)(divide+0.5)) ))*x_size+j+sq]) +
                   					*(cp-in[(i+( (2*divide) < 0 ? ((int)(2*divide-0.5)) : ((int)(2*divide+0.5)) ))*x_size+j+2*sq]) +
                   					*(cp-in[(i+( (3*divide) < 0 ? ((int)(3*divide-0.5)) : ((int)(3*divide+0.5)) ))*x_size+j+3*sq]);
						      }
              				  	    else 
						      {
                					divide=(float)x/(float)abs(y);
                					sq=abs(y)/y;
                					sq=*(cp-in[(i+sq)*x_size+j+( (divide) < 0 ? ((int)(divide-0.5)) : ((int)(divide+0.5)) )]) +
                   					*(cp-in[(i+2*sq)*x_size+j+( (2*divide) < 0 ? ((int)(2*divide-0.5)) : ((int)(2*divide+0.5)) )]) +
                   					*(cp-in[(i+3*sq)*x_size+j+( (3*divide) < 0 ? ((int)(3*divide-0.5)) : ((int)(3*divide+0.5)) )]);
						      }

              					    if(sq>290)
						      {
                					r[i*x_size+j] = max_no-n;
                					cgx[i*x_size+j] = (51*x)/n;
                					cgy[i*x_size+j] = (51*y)/n;
						      }
#endif
            					  }
 					      }
					  }
					}
#if 1
				      }
				    }
				  }
				}
			      }
			    }
			  }
			}
		      }
		    }
		  }
		}
	      }
	    }
	  }
#endif
	}
      }

#if 0

  n=0;
  for (i=5;i<y_size-5;i++)
    for (j=5;j<x_size-5;j++) {
       x = r[i*x_size+j];
       if (x>0) {
          if (
                (x>r[(i-3)*x_size+j-3]) &&
                (x>r[(i-3)*x_size+j-2]) &&
                (x>r[(i-3)*x_size+j-1]) &&
                (x>r[(i-3)*x_size+j ]) &&
                (x>r[(i-3)*x_size+j+1]) &&
                (x>r[(i-3)*x_size+j+2]) &&
                (x>r[(i-3)*x_size+j+3]) &&

                (x>r[(i-2)*x_size+j-3]) &&
                (x>r[(i-2)*x_size+j-2]) &&
                (x>r[(i-2)*x_size+j-1]) &&
                (x>r[(i-2)*x_size+j ]) &&
                (x>r[(i-2)*x_size+j+1]) &&
                (x>r[(i-2)*x_size+j+2]) &&
                (x>r[(i-2)*x_size+j+3]) &&

                (x>r[(i-1)*x_size+j-3]) &&
                (x>r[(i-1)*x_size+j-2]) &&
                (x>r[(i-1)*x_size+j-1]) &&
                (x>r[(i-1)*x_size+j ]) &&
                (x>r[(i-1)*x_size+j+1]) &&
                (x>r[(i-1)*x_size+j+2]) &&
                (x>r[(i-1)*x_size+j+3]) &&

                (x>r[(i)*x_size+j-3]) &&
                (x>r[(i)*x_size+j-2]) &&
                (x>r[(i)*x_size+j-1]) &&
                (x>=r[(i)*x_size+j+1]) &&
                (x>=r[(i)*x_size+j+2]) &&
                (x>=r[(i)*x_size+j+3]) &&

                (x>=r[(i+1)*x_size+j-3]) &&
                (x>=r[(i+1)*x_size+j-2]) &&
                (x>=r[(i+1)*x_size+j-1]) &&
                (x>=r[(i+1)*x_size+j ]) &&
                (x>=r[(i+1)*x_size+j+1]) &&
                (x>=r[(i+1)*x_size+j+2]) &&
                (x>=r[(i+1)*x_size+j+3]) &&

                (x>=r[(i+2)*x_size+j-3]) &&
                (x>=r[(i+2)*x_size+j-2]) &&
                (x>=r[(i+2)*x_size+j-1]) &&
                (x>=r[(i+2)*x_size+j ]) &&
                (x>=r[(i+2)*x_size+j+1]) &&
                (x>=r[(i+2)*x_size+j+2]) &&
                (x>=r[(i+2)*x_size+j+3]) &&

                (x>=r[(i+3)*x_size+j-3]) &&
                (x>=r[(i+3)*x_size+j-2]) &&
                (x>=r[(i+3)*x_size+j-1]) &&
                (x>=r[(i+3)*x_size+j ]) &&
                (x>=r[(i+3)*x_size+j+1]) &&
                (x>=r[(i+3)*x_size+j+2]) &&
                (x>=r[(i+3)*x_size+j+3]) 
          )
	{
	  corner_list[n].info=0;
	  corner_list[n].x=j;
	  corner_list[n].y=i;
	  corner_list[n].dx=cgx[i*x_size+j];
	  corner_list[n].dy=cgy[i*x_size+j];
	  corner_list[n].I=in[i*x_size+j];
	  n++;
	  if(n==15000){
            }
        }
   }
}
#endif
corner_list[n].info=7;

}

Reply via email to