Hi -- I'm seeing register allocation problems and code size increases with gcc-4.6.2 (and gcc-head) compared with older (gcc-4.1.2). Both are compiled using -O3.
One test case that I have has a long series of nested if's each with the same comparison and similar computation. if (n<max_no){ n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p); if (n<max_no){ . . . ~20 levels of nesting <more computations with 'cp' and 'p'> . . . }}} Gcc-4.6.2 generates many blocks like the following: lwi r28,r1,68 -- load into dead reg lwi r31,r1,140 -- load p from stack lbui r28,r31,0 rsubk r31,r28,r19 lbui r31,r31,0 addk r29,r29,r31 swi r31,r1,308 lwi r31,r1,428 -- load of max_no from stack cmp r28,r31,r29 -- n in r29 bgeid r28,$L46 gcc-4.1.2 generates the following: lbui r3,r26,3 rsubk r3,r3,r19 lbui r3,r3,0 addk r30,r30,r3 swi r3,r1,80 cmp r18,r9,r30 -- max_no in r9, n in r30 bgei r18,$L6 gcc-4.6.2 (and gcc-head) load max_no from the stack in each block. There also are extra loads into r28 (which is not used) and r31 at the start of each block. Only r28, r29, and r31 are used. I'm having a hard time telling what is happening or why. The IRA dump has this line: Ignoring reg 772, has equiv memory where pseudo 772 is loaded with max_no early in the function. The reload dump has Reloads for insn # 254 Reload 0: reload_in (SI) = (reg/v:SI 722 [ max_no ]) GR_REGS, RELOAD_FOR_INPUT (opnum = 1) reload_in_reg: (reg/v:SI 722 [ max_no ]) reload_reg_rtx: (reg:SI 31 r31) and similar for each of the other insns using 722. This is followed by Spilling for insn 254. Using reg 31 for reload 0 for each insn using pseudo 722. Any idea what is going on? -- Michael Eager ea...@eagercon.com 1960 Park Blvd., Palo Alto, CA 94306 650-325-8077
#if 0 mb-gcc -O3 -mhard-float -fdump-rtl-all -c s.c -save-temps #endif typedef unsigned char uchar; typedef struct {int x,y,info, dx, dy, I;} CORNER_LIST[15000]; susan_corners(in,r,bp,max_no,corner_list,x_size,y_size) uchar *in, *bp; int *r, max_no, x_size, y_size; CORNER_LIST corner_list; { int n,x,y,sq,xx,yy, i,j,*cgx,*cgy; float divide; uchar c,*p,*cp; for (i=5;i<y_size-5;i++) for (j=5;j<x_size-5;j++) { n=100; p=in + (i-3)*x_size + j - 1; cp=bp + in[i*x_size+j]; n+=*(cp-*p++); n+=*(cp-*p++); n+=*(cp-*p); p+=x_size-3; n+=*(cp-*p++); n+=*(cp-*p++); n+=*(cp-*p++); n+=*(cp-*p++); n+=*(cp-*p); p+=x_size-5; n+=*(cp-*p++); n+=*(cp-*p++); n+=*(cp-*p++); n+=*(cp-*p++); n+=*(cp-*p++); n+=*(cp-*p++); n+=*(cp-*p); p+=x_size-6; n+=*(cp-*p++); n+=*(cp-*p++); n+=*(cp-*p); if (n<max_no){ p+=2; n+=*(cp-*p++); #if 1 if (n<max_no){ n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p); if (n<max_no){ p+=x_size-6; n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p); if (n<max_no){ p+=x_size-5; n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p); if (n<max_no){ p+=x_size-3; n+=*(cp-*p++); #endif if (n<max_no){ n+=*(cp-*p++); if (n<max_no){ n+=*(cp-*p); if (n<max_no) { x=0;y=0; p=in + (i-3)*x_size + j - 1; c=*(cp-*p++);x-=c;y-=3*c; c=*(cp-*p++);y-=3*c; c=*(cp-*p);x+=c;y-=3*c; p+=x_size-3; c=*(cp-*p++);x-=2*c;y-=2*c; c=*(cp-*p++);x-=c;y-=2*c; c=*(cp-*p++);y-=2*c; c=*(cp-*p++);x+=c;y-=2*c; c=*(cp-*p);x+=2*c;y-=2*c; p+=x_size-5; c=*(cp-*p++);x-=3*c;y-=c; c=*(cp-*p++);x-=2*c;y-=c; c=*(cp-*p++);x-=c;y-=c; c=*(cp-*p++);y-=c; c=*(cp-*p++);x+=c;y-=c; c=*(cp-*p++);x+=2*c;y-=c; c=*(cp-*p);x+=3*c;y-=c; p+=x_size-6; c=*(cp-*p++);x-=3*c; c=*(cp-*p++);x-=2*c; c=*(cp-*p);x-=c; p+=2; c=*(cp-*p++);x+=c; c=*(cp-*p++);x+=2*c; c=*(cp-*p);x+=3*c; p+=x_size-6; c=*(cp-*p++);x-=3*c;y+=c; c=*(cp-*p++);x-=2*c;y+=c; c=*(cp-*p++);x-=c;y+=c; c=*(cp-*p++);y+=c; c=*(cp-*p++);x+=c;y+=c; c=*(cp-*p++);x+=2*c;y+=c; c=*(cp-*p);x+=3*c;y+=c; p+=x_size-5; c=*(cp-*p++);x-=2*c;y+=2*c; c=*(cp-*p++);x-=c;y+=2*c; c=*(cp-*p++);y+=2*c; c=*(cp-*p++);x+=c;y+=2*c; c=*(cp-*p);x+=2*c;y+=2*c; p+=x_size-3; c=*(cp-*p++);x-=c;y+=3*c; c=*(cp-*p++);y+=3*c; c=*(cp-*p);x+=c;y+=3*c; xx=x*x; yy=y*y; sq=xx+yy; if ( sq > ((n*n)/2) ) { #if 1 if(yy<xx) { divide=(float)y/(float)abs(x); sq=abs(x)/x; sq=*(cp-in[(i+( (divide) < 0 ? ((int)(divide-0.5)) : ((int)(divide+0.5)) ))*x_size+j+sq]) + *(cp-in[(i+( (2*divide) < 0 ? ((int)(2*divide-0.5)) : ((int)(2*divide+0.5)) ))*x_size+j+2*sq]) + *(cp-in[(i+( (3*divide) < 0 ? ((int)(3*divide-0.5)) : ((int)(3*divide+0.5)) ))*x_size+j+3*sq]); } else { divide=(float)x/(float)abs(y); sq=abs(y)/y; sq=*(cp-in[(i+sq)*x_size+j+( (divide) < 0 ? ((int)(divide-0.5)) : ((int)(divide+0.5)) )]) + *(cp-in[(i+2*sq)*x_size+j+( (2*divide) < 0 ? ((int)(2*divide-0.5)) : ((int)(2*divide+0.5)) )]) + *(cp-in[(i+3*sq)*x_size+j+( (3*divide) < 0 ? ((int)(3*divide-0.5)) : ((int)(3*divide+0.5)) )]); } if(sq>290) { r[i*x_size+j] = max_no-n; cgx[i*x_size+j] = (51*x)/n; cgy[i*x_size+j] = (51*y)/n; } #endif } } } } #if 1 } } } } } } } } } } } } } } } #endif } } #if 0 n=0; for (i=5;i<y_size-5;i++) for (j=5;j<x_size-5;j++) { x = r[i*x_size+j]; if (x>0) { if ( (x>r[(i-3)*x_size+j-3]) && (x>r[(i-3)*x_size+j-2]) && (x>r[(i-3)*x_size+j-1]) && (x>r[(i-3)*x_size+j ]) && (x>r[(i-3)*x_size+j+1]) && (x>r[(i-3)*x_size+j+2]) && (x>r[(i-3)*x_size+j+3]) && (x>r[(i-2)*x_size+j-3]) && (x>r[(i-2)*x_size+j-2]) && (x>r[(i-2)*x_size+j-1]) && (x>r[(i-2)*x_size+j ]) && (x>r[(i-2)*x_size+j+1]) && (x>r[(i-2)*x_size+j+2]) && (x>r[(i-2)*x_size+j+3]) && (x>r[(i-1)*x_size+j-3]) && (x>r[(i-1)*x_size+j-2]) && (x>r[(i-1)*x_size+j-1]) && (x>r[(i-1)*x_size+j ]) && (x>r[(i-1)*x_size+j+1]) && (x>r[(i-1)*x_size+j+2]) && (x>r[(i-1)*x_size+j+3]) && (x>r[(i)*x_size+j-3]) && (x>r[(i)*x_size+j-2]) && (x>r[(i)*x_size+j-1]) && (x>=r[(i)*x_size+j+1]) && (x>=r[(i)*x_size+j+2]) && (x>=r[(i)*x_size+j+3]) && (x>=r[(i+1)*x_size+j-3]) && (x>=r[(i+1)*x_size+j-2]) && (x>=r[(i+1)*x_size+j-1]) && (x>=r[(i+1)*x_size+j ]) && (x>=r[(i+1)*x_size+j+1]) && (x>=r[(i+1)*x_size+j+2]) && (x>=r[(i+1)*x_size+j+3]) && (x>=r[(i+2)*x_size+j-3]) && (x>=r[(i+2)*x_size+j-2]) && (x>=r[(i+2)*x_size+j-1]) && (x>=r[(i+2)*x_size+j ]) && (x>=r[(i+2)*x_size+j+1]) && (x>=r[(i+2)*x_size+j+2]) && (x>=r[(i+2)*x_size+j+3]) && (x>=r[(i+3)*x_size+j-3]) && (x>=r[(i+3)*x_size+j-2]) && (x>=r[(i+3)*x_size+j-1]) && (x>=r[(i+3)*x_size+j ]) && (x>=r[(i+3)*x_size+j+1]) && (x>=r[(i+3)*x_size+j+2]) && (x>=r[(i+3)*x_size+j+3]) ) { corner_list[n].info=0; corner_list[n].x=j; corner_list[n].y=i; corner_list[n].dx=cgx[i*x_size+j]; corner_list[n].dy=cgy[i*x_size+j]; corner_list[n].I=in[i*x_size+j]; n++; if(n==15000){ } } } } #endif corner_list[n].info=7; }