------- Comment #2 from jv244 at cam dot ac dot uk  2007-03-05 11:47 -------
(In reply to comment #1)
> We don't unroll non-innermost loops at the moment.  I don't know if sccp can
> be taught to handle this case (and if it's worth it).

such small loops are quite typical for some quantum chemistry integral
routines.
I'm just experimenting rewriting the kernel mentioned in PR 31021. If I do this
unrolling by hand I get quite a speedup on the full kernel:

hand unrolled:
# best time    5.260329
loops:
# best time    6.616413

which is quite impressive because these loops take at most 30% of the kernel
total time: 

The actual code in question is:

             coef(:,:)=0.0_wp
             lxy=0 ; lx=0
             DO lxa=0,1
             DO lxb=0,1
              lx = lx + 1
              g1=0.0_wp
              g2=0.0_wp
              g1k=0.0_wp
              g2k=0.0_wp
              DO lya=0,1-lxa
              DO lyb=0,1-lxb
                    lxy=lxy+1
                    g1=g1+pyx(1,lxy)*dpy(lyb,lya,jg)
                    g2=g2+pyx(1,lxy)*dpy(lyb,lya,jg2)
                    g1k=g1k+pyx(2,lxy)*dpy(lyb,lya,jg)
                    g2k=g2k+pyx(2,lxy)*dpy(lyb,lya,jg2)
              ENDDO
              ENDDO
              DO icoef=1,3
                 coef(icoef,1)=coef(icoef,1)+alpha(icoef,lx)*g1
                 coef(icoef,2)=coef(icoef,2)+alpha(icoef,lx)*g2
                 coef(icoef,3)=coef(icoef,3)+alpha(icoef,lx)*g1k
                 coef(icoef,4)=coef(icoef,4)+alpha(icoef,lx)*g2k
              ENDDO
             ENDDO
             ENDDO

and the hand-unrolling just explicitly expands all loops to the loop free
version of exactly the same statements:

             coef(:,:)=0.0_wp
              g1=0.0_wp
              g2=0.0_wp
              g1k=0.0_wp
              g2k=0.0_wp
                    g1=g1+pyx(1,1)*dpy(0,0,jg)
                    g2=g2+pyx(1,1)*dpy(0,0,jg2)
                    g1k=g1k+pyx(2,1)*dpy(0,0,jg)
                    g2k=g2k+pyx(2,1)*dpy(0,0,jg2)
                    g1=g1+pyx(1,2)*dpy(1,0,jg)
                    g2=g2+pyx(1,2)*dpy(1,0,jg2)
                    g1k=g1k+pyx(2,2)*dpy(1,0,jg)
                    g2k=g2k+pyx(2,2)*dpy(1,0,jg2)
                    g1=g1+pyx(1,3)*dpy(0,1,jg)
                    g2=g2+pyx(1,3)*dpy(0,1,jg2)
                    g1k=g1k+pyx(2,3)*dpy(0,1,jg)
                    g2k=g2k+pyx(2,3)*dpy(0,1,jg2)
                    g1=g1+pyx(1,4)*dpy(1,1,jg)
                    g2=g2+pyx(1,4)*dpy(1,1,jg2)
                    g1k=g1k+pyx(2,4)*dpy(1,1,jg)
                    g2k=g2k+pyx(2,4)*dpy(1,1,jg2)
                 coef(01,01)=coef(01,01)+alpha(1,1)*g1
                 coef(01,02)=coef(01,02)+alpha(1,1)*g2
                 coef(01,03)=coef(01,03)+alpha(1,1)*g1k
                 coef(01,04)=coef(01,04)+alpha(1,1)*g2k
                 coef(02,01)=coef(02,01)+alpha(2,1)*g1
                 coef(02,02)=coef(02,02)+alpha(2,1)*g2
                 coef(02,03)=coef(02,03)+alpha(2,1)*g1k
                 coef(02,04)=coef(02,04)+alpha(2,1)*g2k
                 coef(03,01)=coef(03,01)+alpha(3,1)*g1
                 coef(03,02)=coef(03,02)+alpha(3,1)*g2
                 coef(03,03)=coef(03,03)+alpha(3,1)*g1k
                 coef(03,04)=coef(03,04)+alpha(3,1)*g2k
              g1=0.0_wp
              g2=0.0_wp
              g1k=0.0_wp
              g2k=0.0_wp
                    g1=g1+pyx(1,5)*dpy(0,0,jg)
                    g2=g2+pyx(1,5)*dpy(0,0,jg2)
                    g1k=g1k+pyx(2,5)*dpy(0,0,jg)
                    g2k=g2k+pyx(2,5)*dpy(0,0,jg2)
                    g1=g1+pyx(1,6)*dpy(0,1,jg)
                    g2=g2+pyx(1,6)*dpy(0,1,jg2)
                    g1k=g1k+pyx(2,6)*dpy(0,1,jg)
                    g2k=g2k+pyx(2,6)*dpy(0,1,jg2)
                 coef(01,01)=coef(01,01)+alpha(1,2)*g1
                 coef(01,02)=coef(01,02)+alpha(1,2)*g2
                 coef(01,03)=coef(01,03)+alpha(1,2)*g1k
                 coef(01,04)=coef(01,04)+alpha(1,2)*g2k
                 coef(02,01)=coef(02,01)+alpha(2,2)*g1
                 coef(02,02)=coef(02,02)+alpha(2,2)*g2
                 coef(02,03)=coef(02,03)+alpha(2,2)*g1k
                 coef(02,04)=coef(02,04)+alpha(2,2)*g2k
                 coef(03,01)=coef(03,01)+alpha(3,2)*g1
                 coef(03,02)=coef(03,02)+alpha(3,2)*g2
                 coef(03,03)=coef(03,03)+alpha(3,2)*g1k
                 coef(03,04)=coef(03,04)+alpha(3,2)*g2k
              g1=0.0_wp
              g2=0.0_wp
              g1k=0.0_wp
              g2k=0.0_wp
                    g1=g1+pyx(1,7)*dpy(0,0,jg)
                    g2=g2+pyx(1,7)*dpy(0,0,jg2)
                    g1k=g1k+pyx(2,7)*dpy(0,0,jg)
                    g2k=g2k+pyx(2,7)*dpy(0,0,jg2)
                    g1=g1+pyx(1,8)*dpy(1,0,jg)
                    g2=g2+pyx(1,8)*dpy(1,0,jg2)
                    g1k=g1k+pyx(2,8)*dpy(1,0,jg)
                    g2k=g2k+pyx(2,8)*dpy(1,0,jg2)
                 coef(01,01)=coef(01,01)+alpha(1,3)*g1
                 coef(01,02)=coef(01,02)+alpha(1,3)*g2
                 coef(01,03)=coef(01,03)+alpha(1,3)*g1k
                 coef(01,04)=coef(01,04)+alpha(1,3)*g2k
                 coef(02,01)=coef(02,01)+alpha(2,3)*g1
                 coef(02,02)=coef(02,02)+alpha(2,3)*g2
                 coef(02,03)=coef(02,03)+alpha(2,3)*g1k
                 coef(02,04)=coef(02,04)+alpha(2,3)*g2k
                 coef(03,01)=coef(03,01)+alpha(3,3)*g1
                 coef(03,02)=coef(03,02)+alpha(3,3)*g2
                 coef(03,03)=coef(03,03)+alpha(3,3)*g1k
                 coef(03,04)=coef(03,04)+alpha(3,3)*g2k
              g1=0.0_wp
              g2=0.0_wp
              g1k=0.0_wp
              g2k=0.0_wp
                    g1=g1+pyx(1,9)*dpy(0,0,jg)
                    g2=g2+pyx(1,9)*dpy(0,0,jg2)
                    g1k=g1k+pyx(2,9)*dpy(0,0,jg)
                    g2k=g2k+pyx(2,9)*dpy(0,0,jg2)
                 coef(01,01)=coef(01,01)+alpha(1,4)*g1
                 coef(01,02)=coef(01,02)+alpha(1,4)*g2
                 coef(01,03)=coef(01,03)+alpha(1,4)*g1k
                 coef(01,04)=coef(01,04)+alpha(1,4)*g2k
                 coef(02,01)=coef(02,01)+alpha(2,4)*g1
                 coef(02,02)=coef(02,02)+alpha(2,4)*g2
                 coef(02,03)=coef(02,03)+alpha(2,4)*g1k
                 coef(02,04)=coef(02,04)+alpha(2,4)*g2k
                 coef(03,01)=coef(03,01)+alpha(3,4)*g1
                 coef(03,02)=coef(03,02)+alpha(3,4)*g2
                 coef(03,03)=coef(03,03)+alpha(3,4)*g1k
                 coef(03,04)=coef(03,04)+alpha(3,4)*g2k


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31040

Reply via email to