Recently I noticed that clearing ar.ssd/ar.csd right before srlz.d is
causing significant stalling in the syscall path.  The patch below
fixes that by moving the register-writes after srlz.d.  On a Madison,
this drops break-based getpid() from 241 to 226 cycles (-15 cycles).

        --david

Signed-off-by: David Mosberger-Tang <[EMAIL PROTECTED]>

===== arch/ia64/kernel/entry.S 1.80 vs edited =====
--- 1.80/arch/ia64/kernel/entry.S       2005-01-28 16:37:04 -08:00
+++ edited/arch/ia64/kernel/entry.S     2005-03-08 13:07:35 -08:00
@@ -728,12 +728,8 @@
        mov f8=f0               // clear f8
        ;;
        ld8 r30=[r2],16         // M0|1 load cr.ifs
-       mov.m ar.ssd=r0         // M2 clear ar.ssd
-       cmp.eq p9,p0=r0,r0      // set p9 to indicate that we should restore 
cr.ifs
-       ;;
        ld8 r25=[r3],16         // M0|1 load ar.unat
-       mov.m ar.csd=r0         // M2 clear ar.csd
-       mov r22=r0              // clear r22
+       cmp.eq p9,p0=r0,r0      // set p9 to indicate that we should restore 
cr.ifs
        ;;
        ld8 r26=[r2],PT(B0)-PT(AR_PFS)  // M0|1 load ar.pfs
 (pKStk)        mov r22=psr             // M2 read PSR now that interrupts are 
disabled
@@ -756,11 +752,15 @@
        mov f7=f0               // clear f7
        ;;
        ld8.fill r12=[r2]       // restore r12 (sp)
+       mov.m ar.ssd=r0         // M2 clear ar.ssd
+       mov r22=r0              // clear r22
+
        ld8.fill r15=[r3]       // restore r15
+(pUStk) st1 [r14]=r17
        addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
        ;;
 (pUStk)        ld4 r3=[r3]             // r3 = cpu_data->phys_stacked_size_p8
-(pUStk) st1 [r14]=r17
+       mov.m ar.csd=r0         // M2 clear ar.csd
        mov b6=r18              // I0  restore b6
        ;;
        mov r14=r0              // clear r14
-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to