Hi,

I was trying to skip FreeBSD’s DELAY() on X86_64 very much like we do on ARM for Linux (or FreeBSD for that matter) and started to implement things and found a strange behaviour:

From my src/arch/x86/utility.cc

void
skipFunction(ThreadContext *tc)
{
    PCState newPC = tc->pcState();
    Addr sp = tc->readIntReg(INTREG_RSP);
DPRINTF(XXXBZ, "XXX-BZ sp %#x\n", sp);
    Addr npc;
    // XXX For some reason the memory write is not visible yet *sigh*
    //CopyOut(tc, &npc, sp, sizeof(Addr));
    FSTranslatingPortProxy &proxy = tc->getVirtProxy();
    proxy.readBlob(sp, (uint8_t *)&npc, sizeof(Addr));
DPRINTF(XXXBZ, "XXX-BZ npc %#x\n", npc);
    newPC.set(npc);
    // Don't forget to increment the sp again.
    tc->setIntReg(INTREG_RSP, sp + 8);
    tc->pcState(newPC);
}


As you can see I tried two ways to read the return address off the stack, and neither (on the first try) returns the current one (after the memory write) but the previous one, which makes the preceding function part since the last ret to be run twice and on the 2nd iteration the memory location on the stack returns the proper (former) return address and we continue. I would expected the correct value to be visible given the instruction was committed and logged with the DPRINTF.

That’s not the behaviour I expected. Is there anything I am doing wrong or is this a (caching) bug? Can anyone enlighten me?

My command line (including private options):

command line: ./build/X86/gem5.opt -r -e -d m5out-amd64-1 --stdout-file=fbsd301452-detailed-00117.log --stderr-file=fbsd301452-detailed-00117.err --debug-flags=Exec,XXXBZ configs/example/fs.py --mem-size=1024MB --os-type=FreeBSD --virtblk --loader-config-file=loader-amd64.conf --cpu-type=detailed --disk-image=disk-amd64-r301452.img --kernel=kernel-amd64-r301452 --command-line=-hvs --caches --l2cache --l3cache --simple-trace-en

Bjoern



222604924000: system.cpu T0 : @_vprintf+255    : ret
222604924000: system.cpu T0 : @_vprintf+255.0 : RET_NEAR : ld t1, SS:[rsp] : MemRead : D=0xffffffff803e4d23 A=0xffffffff80974b98 222604924500: system.cpu T0 : @_vprintf+255.1 : RET_NEAR : addi rsp, rsp, 0x8 : IntAlu : D=0xffffffff80974ba0 222604924500: system.cpu T0 : @_vprintf+255.2 : RET_NEAR : wripi , t1, 0 : IntAlu : 222604933000: system.cpu T0 : @printf+83 : cmp DS:[0xffffffff8095c638], 0 222604933000: system.cpu T0 : @printf+83.0 : CMP_M_I : limm t2, 0 : IntAlu : D=0x0000000000000000 222604933000: system.cpu T0 : @printf+83.1 : CMP_M_I : ld t1, DS:[0xffffffff8095c638] : MemRead : D=0x0000000000000000 A=0xffffffff8095c638 222604933000: system.cpu T0 : @printf+83.2 : CMP_M_I : sub t0, t1, t2 : IntAlu : D=0x0000000000000000
222604933000: system.cpu T0 : @printf+92    : jnz       0xb
222604933000: system.cpu T0 : @printf+92.0 : JNZ_I : rdip t1, %ctrl153, : IntAlu : D=0xffffffff803e4d2e 222604933000: system.cpu T0 : @printf+92.1 : JNZ_I : limm t2, 0xb : IntAlu : D=0x000000000000000b 222604933000: system.cpu T0 : @printf+92.2 : JNZ_I : wrip , t1, t2 : IntAlu : 222604933000: system.cpu T0 : @printf+94 : mov DS:[0xffffffff8095cab8], 0x1 222604933000: system.cpu T0 : @printf+94.0 : MOV_M_I : limm t1d, 0x1 : IntAlu : D=0x0000000000000001 222604933000: system.cpu T0 : @printf+94.1 : MOV_M_I : st t1d, DS:[0xffffffff8095cab8] : MemWrite : D=0x0000000000000001 A=0xffffffff8095cab8
222604933500: system.cpu T0 : @printf+105    : add      rax, 0x50
222604933500: system.cpu T0 : @printf+105.0 : ADD_R_I : limm t1, 0x50 : IntAlu : D=0x0000000000000050 222604933500: system.cpu T0 : @printf+105.1 : ADD_R_I : add rsp, rsp, t1 : IntAlu : D=0x0000000000000000
222604933500: system.cpu T0 : @printf+109    : pop      rbp
222604933500: system.cpu T0 : @printf+109.0 : POP_R : ld t1, SS:[rsp] : MemRead : D=0xffffffff80974c60 A=0xffffffff80974bf0 222604933500: system.cpu T0 : @printf+109.1 : POP_R : addi rsp, rsp, 0x8 : IntAlu : D=0xffffffff80974bf8 222604933500: system.cpu T0 : @printf+109.2 : POP_R : mov rbp, rbp, t1 : IntAlu : D=0xffffffff80974c60
222604933500: system.cpu T0 : @printf+110    : ret
222604933500: system.cpu T0 : @printf+110.0 : RET_NEAR : ld t1, SS:[rsp] : MemRead : D=0xffffffff80611d2e A=0xffffffff80974bf8 222604933500: system.cpu T0 : @printf+110.1 : RET_NEAR : addi rsp, rsp, 0x8 : IntAlu : D=0xffffffff80974c00 222604933500: system.cpu T0 : @printf+110.2 : RET_NEAR : wripi , t1, 0 : IntAlu :
222604944000: system.cpu T0 : @init_TSC+894    : rdtsc
222604944000: system.cpu T0 : @init_TSC+894.0 : RDTSC : rdtsc t1d, %ctrl26, : IntAlu : D=0x000000001a895d29 222604944000: system.cpu T0 : @init_TSC+894.1 : RDTSC : mov eax, eax, t1d : IntAlu : D=0x000000001a895d29 222604944000: system.cpu T0 : @init_TSC+894.2 : RDTSC : srli t1, t1, 0x20 : IntAlu : D=0x0000000000000000 222604944000: system.cpu T0 : @init_TSC+894.3 : RDTSC : mov edx, edx, t1d : IntAlu : D=0x0000000000000000
222604944000: system.cpu T0 : @init_TSC+896    : mov    ebx, edx
222604944000: system.cpu T0 : @init_TSC+896.0 : MOV_R_R : mov ebx, ebx, edx : IntAlu : D=0x0000000000000000
222604944000: system.cpu T0 : @init_TSC+898    : mov    eax, eax
222604944000: system.cpu T0 : @init_TSC+898.0 : MOV_R_R : mov eax, eax, eax : IntAlu : D=0x000000001a895d29
222604944000: system.cpu T0 : @init_TSC+900    : sal    rbx, 0x20
222604944000: system.cpu T0 : @init_TSC+900.0 : SAL_R_I : slli rbx, rbx, 0x20 : IntAlu : D=0x0000000000000000
222604944000: system.cpu T0 : @init_TSC+904    : or     rbx, rax
222604944000: system.cpu T0 : @init_TSC+904.0 : OR_R_R : or rbx, rbx, rax : IntAlu : D=0x0000000000000000
222604944500: system.cpu T0 : @init_TSC+907    : mov    edi, 0xf4240
222604944500: system.cpu T0 : @init_TSC+907.0 : MOV_R_I : limm edi, 0xf4240 : IntAlu : D=0x00000000000f4240
222604945500: system.cpu T0 : @init_TSC+912    : call   0x69b
222604945500: system.cpu T0 : @init_TSC+912.0 : CALL_NEAR_I : limm t1, 0x69b : IntAlu : D=0x000000000000069b 222604945500: system.cpu T0 : @init_TSC+912.1 : CALL_NEAR_I : rdip t7, %ctrl153, : IntAlu : D=0xffffffff80611d45 222604945500: system.cpu T0 : @init_TSC+912.2 : CALL_NEAR_I : st t7, SS:[rsp + 0xfffffffffffffff8] : MemWrite : D=0xffffffff80611d45 A=0xffffffff80974bf8 222604945500: system.cpu T0 : @init_TSC+912.3 : CALL_NEAR_I : subi rsp, rsp, 0x8 : IntAlu : D=0xffffffff80974bf8 222604945500: system.cpu T0 : @init_TSC+912.4 : CALL_NEAR_I : wrip , t7, t1 : IntAlu :
222604953000: global: XXX-BZ reg 0 0x7 = 0xf4240
222604953000: global: XXX-BZ sp 0xffffffff80974bf8
222604953000: global: XXX-BZ npc 0xffffffff80611d2e

// still previous not the current just written return address on the stack
   // running the same bits of the function again since the last ret

1222604955000: system.cpu T0 : @init_TSC+894    : rdtsc
1222604955000: system.cpu T0 : @init_TSC+894.0 : RDTSC : rdtsc t1d, %ctrl26, : IntAlu : D=0x0000000091bef13d 1222604955000: system.cpu T0 : @init_TSC+894.1 : RDTSC : mov eax, eax, t1d : IntAlu : D=0x0000000091bef13d 1222604955000: system.cpu T0 : @init_TSC+894.2 : RDTSC : srli t1, t1, 0x20 : IntAlu : D=0x0000000000000000 1222604955000: system.cpu T0 : @init_TSC+894.3 : RDTSC : mov edx, edx, t1d : IntAlu : D=0x0000000000000000
1222604955000: system.cpu T0 : @init_TSC+896    : mov   ebx, edx
1222604955000: system.cpu T0 : @init_TSC+896.0 : MOV_R_R : mov ebx, ebx, edx : IntAlu : D=0x0000000000000000
1222604955000: system.cpu T0 : @init_TSC+898    : mov   eax, eax
1222604955000: system.cpu T0 : @init_TSC+898.0 : MOV_R_R : mov eax, eax, eax : IntAlu : D=0x0000000091bef13d
1222604955000: system.cpu T0 : @init_TSC+900    : sal   rbx, 0x20
1222604955000: system.cpu T0 : @init_TSC+900.0 : SAL_R_I : slli rbx, rbx, 0x20 : IntAlu : D=0x0000000000000000
1222604955000: system.cpu T0 : @init_TSC+904    : or    rbx, rax
1222604955000: system.cpu T0 : @init_TSC+904.0 : OR_R_R : or rbx, rbx, rax : IntAlu : D=0x0000000000000000
1222604955500: system.cpu T0 : @init_TSC+907    : mov   edi, 0xf4240
1222604955500: system.cpu T0 : @init_TSC+907.0 : MOV_R_I : limm edi, 0xf4240 : IntAlu : D=0x00000000000f4240
1222604956500: system.cpu T0 : @init_TSC+912    : call  0x69b
1222604956500: system.cpu T0 : @init_TSC+912.0 : CALL_NEAR_I : limm t1, 0x69b : IntAlu : D=0x000000000000069b 1222604956500: system.cpu T0 : @init_TSC+912.1 : CALL_NEAR_I : rdip t7, %ctrl153, : IntAlu : D=0xffffffff80611d45 1222604956500: system.cpu T0 : @init_TSC+912.2 : CALL_NEAR_I : st t7, SS:[rsp + 0xfffffffffffffff8] : MemWrite : D=0xffffffff80611d45 A=0xffffffff80974bf8 1222604956500: system.cpu T0 : @init_TSC+912.3 : CALL_NEAR_I : subi rsp, rsp, 0x8 : IntAlu : D=0xffffffff80974bf8 1222604956500: system.cpu T0 : @init_TSC+912.4 : CALL_NEAR_I : wrip , t7, t1 : IntAlu :
1222604962000: global: XXX-BZ reg 0 0x7 = 0xf4240
1222604962000: global: XXX-BZ sp 0xffffffff80974bf8
1222604962000: global: XXX-BZ npc 0xffffffff80611d45

// this time we have the correct value there but executed the code twice obviously not what we expected …
    // but at least we continue but something clearly went wrong above.

2222604964000: system.cpu T0 : @init_TSC+917    : rdtsc
2222604964000: system.cpu T0 : @init_TSC+917.0 : RDTSC : rdtsc t1d, %ctrl26, : IntAlu : D=0x0000000108f4854d 2222604964000: system.cpu T0 : @init_TSC+917.1 : RDTSC : mov eax, eax, t1d : IntAlu : D=0x0000000008f4854d 2222604964000: system.cpu T0 : @init_TSC+917.2 : RDTSC : srli t1, t1, 0x20 : IntAlu : D=0x0000000000000001 2222604964000: system.cpu T0 : @init_TSC+917.3 : RDTSC : mov edx, edx, t1d : IntAlu : D=0x0000000000000001
2222604964000: system.cpu T0 : @init_TSC+919    : mov   esi, edx
2222604964000: system.cpu T0 : @init_TSC+919.0 : MOV_R_R : mov esi, esi, edx : IntAlu : D=0x0000000000000001
2222604964000: system.cpu T0 : @init_TSC+921    : mov   eax, eax
2222604964000: system.cpu T0 : @init_TSC+921.0 : MOV_R_R : mov eax, eax, eax : IntAlu : D=0x0000000008f4854d
2222604964000: system.cpu T0 : @init_TSC+923    : sal   rsi, 0x20
2222604964000: system.cpu T0 : @init_TSC+923.0 : SAL_R_I : slli rsi, rsi, 0x20 : IntAlu : D=0x0000000000000000
2222604964000: system.cpu T0 : @init_TSC+927    : or    rsi, rax
2222604964000: system.cpu T0 : @init_TSC+927.0 : OR_R_R : or rsi, rsi, rax : IntAlu : D=0x0000000000000000
2222604964500: system.cpu T0 : @init_TSC+930    : sub   rsi, rbx
2222604964500: system.cpu T0 : @init_TSC+930.0 : SUB_R_R : sub rsi, rsi, rbx : IntAlu : D=0x0000000000000000 2222604964500: system.cpu T0 : @init_TSC+933 : mov DS:[0xffffffff8096ed88], rsi 2222604964500: system.cpu T0 : @init_TSC+933.0 : MOV_M_R : st rsi, DS:[0xffffffff8096ed88] : MemWrite : D=0x0000000077359410 A=0xffffffff8096ed88 2222604964500: system.cpu T0 : @init_TSC+941 : cmp DS:[0xffffffff808f213c], 0 2222604964500: system.cpu T0 : @init_TSC+941.0 : CMP_M_I : limm t2d, 0 : IntAlu : D=0x0000000000000000 2222604964500: system.cpu T0 : @init_TSC+941.1 : CMP_M_I : ld t1d, DS:[0xffffffff808f213c] : MemRead : D=0x0000000000000001 A=0xffffffff808f213c 2222604964500: system.cpu T0 : @init_TSC+941.2 : CMP_M_I : sub t0d, t1d, t2d : IntAlu : D=0x0000000000000000
2222604964500: system.cpu T0 : @init_TSC+949    : jz    0xd8
2222604964500: system.cpu T0 : @init_TSC+949.0 : JZ_I : rdip t1, %ctrl153, : IntAlu : D=0xffffffff80611d6b 2222604964500: system.cpu T0 : @init_TSC+949.1 : JZ_I : limm t2, 0xd8 : IntAlu : D=0x00000000000000d8 2222604964500: system.cpu T0 : @init_TSC+949.2 : JZ_I : wrip , t1, t2 : IntAlu : 2222604965000: system.cpu T0 : @init_TSC+955 : mov rdi, 0xffffffff80685376 2222604965000: system.cpu T0 : @init_TSC+955.0 : MOV_R_I : limm rdi, 0xffffffff80685376 : IntAlu : D=0xffffffff80685376
2222604965000: system.cpu T0 : @init_TSC+962    : xor   eax, eax
2222604965000: system.cpu T0 : @init_TSC+962.0 : XOR_R_R : xor eax, eax, eax : IntAlu : D=0x0000000000000000 2222604965000: system.cpu T0 : @init_TSC+964 : call 0xffffffffffdd2f57 2222604965000: system.cpu T0 : @init_TSC+964.0 : CALL_NEAR_I : limm t1, 0xffffffffffdd2f57 : IntAlu : D=0xffffffffffdd2f57 2222604965000: system.cpu T0 : @init_TSC+964.1 : CALL_NEAR_I : rdip t7, %ctrl153, : IntAlu : D=0xffffffff80611d79 2222604965000: system.cpu T0 : @init_TSC+964.2 : CALL_NEAR_I : st t7, SS:[rsp + 0xfffffffffffffff8] : MemWrite : D=0xffffffff80611d79 A=0xffffffff80974bf8 2222604965000: system.cpu T0 : @init_TSC+964.3 : CALL_NEAR_I : subi rsp, rsp, 0x8 : IntAlu : D=0xffffffff80974bf8 2222604965000: system.cpu T0 : @init_TSC+964.4 : CALL_NEAR_I : wrip , t7, t1 : IntAlu :
2222604971000: system.cpu T0 : @printf    : push        rbp
2222604971000: system.cpu T0 : @printf.0 : PUSH_R : st rbp, SS:[rsp + 0xfffffffffffffff8] : MemWrite : D=0xffffffff80974c60 A=0xffffffff80974bf0 2222604971000: system.cpu T0 : @printf.1 : PUSH_R : subi rsp, rsp, 0x8 : IntAlu : D=0xffffffff80974bf0
2222604971000: system.cpu T0 : @printf+1    : mov       rbp, rsp
2222604971000: system.cpu T0 : @printf+1.0 : MOV_R_R : mov rbp, rbp, rsp : IntAlu : D=0xffffffff80974bf0
2222604971000: system.cpu T0 : @printf+4    : sub       rax, 0x50
2222604971000: system.cpu T0 : @printf+4.0 : SUB_R_I : limm t1, 0x50 : IntAlu : D=0x0000000000000050 2222604971000: system.cpu T0 : @printf+4.1 : SUB_R_I : sub rsp, rsp, t1 : IntAlu : D=0x0000000000000000
2222604971000: system.cpu T0 : @printf+8    : mov       rax, rdi
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev

Reply via email to