Hi,
I was trying to skip FreeBSD’s DELAY() on X86_64 very much like we do
on ARM for Linux (or FreeBSD for that matter) and started to implement
things and found a strange behaviour:
From my src/arch/x86/utility.cc
void
skipFunction(ThreadContext *tc)
{
PCState newPC = tc->pcState();
Addr sp = tc->readIntReg(INTREG_RSP);
DPRINTF(XXXBZ, "XXX-BZ sp %#x\n", sp);
Addr npc;
// XXX For some reason the memory write is not visible yet *sigh*
//CopyOut(tc, &npc, sp, sizeof(Addr));
FSTranslatingPortProxy &proxy = tc->getVirtProxy();
proxy.readBlob(sp, (uint8_t *)&npc, sizeof(Addr));
DPRINTF(XXXBZ, "XXX-BZ npc %#x\n", npc);
newPC.set(npc);
// Don't forget to increment the sp again.
tc->setIntReg(INTREG_RSP, sp + 8);
tc->pcState(newPC);
}
As you can see I tried two ways to read the return address off the
stack, and neither (on the first try) returns the current one (after the
memory write) but the previous one, which makes the preceding function
part since the last ret to be run twice and on the 2nd iteration the
memory location on the stack returns the proper (former) return address
and we continue. I would expected the correct value to be visible
given the instruction was committed and logged with the DPRINTF.
That’s not the behaviour I expected. Is there anything I am doing
wrong or is this a (caching) bug? Can anyone enlighten me?
My command line (including private options):
command line: ./build/X86/gem5.opt -r -e -d m5out-amd64-1
--stdout-file=fbsd301452-detailed-00117.log
--stderr-file=fbsd301452-detailed-00117.err --debug-flags=Exec,XXXBZ
configs/example/fs.py --mem-size=1024MB --os-type=FreeBSD --virtblk
--loader-config-file=loader-amd64.conf --cpu-type=detailed
--disk-image=disk-amd64-r301452.img --kernel=kernel-amd64-r301452
--command-line=-hvs --caches --l2cache --l3cache --simple-trace-en
Bjoern
222604924000: system.cpu T0 : @_vprintf+255 : ret
222604924000: system.cpu T0 : @_vprintf+255.0 : RET_NEAR : ld t1,
SS:[rsp] : MemRead : D=0xffffffff803e4d23 A=0xffffffff80974b98
222604924500: system.cpu T0 : @_vprintf+255.1 : RET_NEAR : addi
rsp, rsp, 0x8 : IntAlu : D=0xffffffff80974ba0
222604924500: system.cpu T0 : @_vprintf+255.2 : RET_NEAR : wripi ,
t1, 0 : IntAlu :
222604933000: system.cpu T0 : @printf+83 : cmp
DS:[0xffffffff8095c638], 0
222604933000: system.cpu T0 : @printf+83.0 : CMP_M_I : limm t2, 0
: IntAlu : D=0x0000000000000000
222604933000: system.cpu T0 : @printf+83.1 : CMP_M_I : ld t1,
DS:[0xffffffff8095c638] : MemRead : D=0x0000000000000000
A=0xffffffff8095c638
222604933000: system.cpu T0 : @printf+83.2 : CMP_M_I : sub t0, t1,
t2 : IntAlu : D=0x0000000000000000
222604933000: system.cpu T0 : @printf+92 : jnz 0xb
222604933000: system.cpu T0 : @printf+92.0 : JNZ_I : rdip t1,
%ctrl153, : IntAlu : D=0xffffffff803e4d2e
222604933000: system.cpu T0 : @printf+92.1 : JNZ_I : limm t2, 0xb
: IntAlu : D=0x000000000000000b
222604933000: system.cpu T0 : @printf+92.2 : JNZ_I : wrip , t1, t2
: IntAlu :
222604933000: system.cpu T0 : @printf+94 : mov
DS:[0xffffffff8095cab8], 0x1
222604933000: system.cpu T0 : @printf+94.0 : MOV_M_I : limm t1d,
0x1 : IntAlu : D=0x0000000000000001
222604933000: system.cpu T0 : @printf+94.1 : MOV_M_I : st t1d,
DS:[0xffffffff8095cab8] : MemWrite : D=0x0000000000000001
A=0xffffffff8095cab8
222604933500: system.cpu T0 : @printf+105 : add rax, 0x50
222604933500: system.cpu T0 : @printf+105.0 : ADD_R_I : limm t1,
0x50 : IntAlu : D=0x0000000000000050
222604933500: system.cpu T0 : @printf+105.1 : ADD_R_I : add rsp,
rsp, t1 : IntAlu : D=0x0000000000000000
222604933500: system.cpu T0 : @printf+109 : pop rbp
222604933500: system.cpu T0 : @printf+109.0 : POP_R : ld t1,
SS:[rsp] : MemRead : D=0xffffffff80974c60 A=0xffffffff80974bf0
222604933500: system.cpu T0 : @printf+109.1 : POP_R : addi rsp,
rsp, 0x8 : IntAlu : D=0xffffffff80974bf8
222604933500: system.cpu T0 : @printf+109.2 : POP_R : mov rbp, rbp,
t1 : IntAlu : D=0xffffffff80974c60
222604933500: system.cpu T0 : @printf+110 : ret
222604933500: system.cpu T0 : @printf+110.0 : RET_NEAR : ld t1,
SS:[rsp] : MemRead : D=0xffffffff80611d2e A=0xffffffff80974bf8
222604933500: system.cpu T0 : @printf+110.1 : RET_NEAR : addi rsp,
rsp, 0x8 : IntAlu : D=0xffffffff80974c00
222604933500: system.cpu T0 : @printf+110.2 : RET_NEAR : wripi ,
t1, 0 : IntAlu :
222604944000: system.cpu T0 : @init_TSC+894 : rdtsc
222604944000: system.cpu T0 : @init_TSC+894.0 : RDTSC : rdtsc t1d,
%ctrl26, : IntAlu : D=0x000000001a895d29
222604944000: system.cpu T0 : @init_TSC+894.1 : RDTSC : mov eax,
eax, t1d : IntAlu : D=0x000000001a895d29
222604944000: system.cpu T0 : @init_TSC+894.2 : RDTSC : srli t1,
t1, 0x20 : IntAlu : D=0x0000000000000000
222604944000: system.cpu T0 : @init_TSC+894.3 : RDTSC : mov edx,
edx, t1d : IntAlu : D=0x0000000000000000
222604944000: system.cpu T0 : @init_TSC+896 : mov ebx, edx
222604944000: system.cpu T0 : @init_TSC+896.0 : MOV_R_R : mov ebx,
ebx, edx : IntAlu : D=0x0000000000000000
222604944000: system.cpu T0 : @init_TSC+898 : mov eax, eax
222604944000: system.cpu T0 : @init_TSC+898.0 : MOV_R_R : mov eax,
eax, eax : IntAlu : D=0x000000001a895d29
222604944000: system.cpu T0 : @init_TSC+900 : sal rbx, 0x20
222604944000: system.cpu T0 : @init_TSC+900.0 : SAL_R_I : slli rbx,
rbx, 0x20 : IntAlu : D=0x0000000000000000
222604944000: system.cpu T0 : @init_TSC+904 : or rbx, rax
222604944000: system.cpu T0 : @init_TSC+904.0 : OR_R_R : or rbx,
rbx, rax : IntAlu : D=0x0000000000000000
222604944500: system.cpu T0 : @init_TSC+907 : mov edi, 0xf4240
222604944500: system.cpu T0 : @init_TSC+907.0 : MOV_R_I : limm edi,
0xf4240 : IntAlu : D=0x00000000000f4240
222604945500: system.cpu T0 : @init_TSC+912 : call 0x69b
222604945500: system.cpu T0 : @init_TSC+912.0 : CALL_NEAR_I : limm
t1, 0x69b : IntAlu : D=0x000000000000069b
222604945500: system.cpu T0 : @init_TSC+912.1 : CALL_NEAR_I : rdip
t7, %ctrl153, : IntAlu : D=0xffffffff80611d45
222604945500: system.cpu T0 : @init_TSC+912.2 : CALL_NEAR_I : st
t7, SS:[rsp + 0xfffffffffffffff8] : MemWrite : D=0xffffffff80611d45
A=0xffffffff80974bf8
222604945500: system.cpu T0 : @init_TSC+912.3 : CALL_NEAR_I : subi
rsp, rsp, 0x8 : IntAlu : D=0xffffffff80974bf8
222604945500: system.cpu T0 : @init_TSC+912.4 : CALL_NEAR_I : wrip
, t7, t1 : IntAlu :
222604953000: global: XXX-BZ reg 0 0x7 = 0xf4240
222604953000: global: XXX-BZ sp 0xffffffff80974bf8
222604953000: global: XXX-BZ npc 0xffffffff80611d2e
// still previous not the current just written return address on the
stack
// running the same bits of the function again since the last ret
1222604955000: system.cpu T0 : @init_TSC+894 : rdtsc
1222604955000: system.cpu T0 : @init_TSC+894.0 : RDTSC : rdtsc t1d,
%ctrl26, : IntAlu : D=0x0000000091bef13d
1222604955000: system.cpu T0 : @init_TSC+894.1 : RDTSC : mov eax,
eax, t1d : IntAlu : D=0x0000000091bef13d
1222604955000: system.cpu T0 : @init_TSC+894.2 : RDTSC : srli t1,
t1, 0x20 : IntAlu : D=0x0000000000000000
1222604955000: system.cpu T0 : @init_TSC+894.3 : RDTSC : mov edx,
edx, t1d : IntAlu : D=0x0000000000000000
1222604955000: system.cpu T0 : @init_TSC+896 : mov ebx, edx
1222604955000: system.cpu T0 : @init_TSC+896.0 : MOV_R_R : mov ebx,
ebx, edx : IntAlu : D=0x0000000000000000
1222604955000: system.cpu T0 : @init_TSC+898 : mov eax, eax
1222604955000: system.cpu T0 : @init_TSC+898.0 : MOV_R_R : mov eax,
eax, eax : IntAlu : D=0x0000000091bef13d
1222604955000: system.cpu T0 : @init_TSC+900 : sal rbx, 0x20
1222604955000: system.cpu T0 : @init_TSC+900.0 : SAL_R_I : slli
rbx, rbx, 0x20 : IntAlu : D=0x0000000000000000
1222604955000: system.cpu T0 : @init_TSC+904 : or rbx, rax
1222604955000: system.cpu T0 : @init_TSC+904.0 : OR_R_R : or rbx,
rbx, rax : IntAlu : D=0x0000000000000000
1222604955500: system.cpu T0 : @init_TSC+907 : mov edi, 0xf4240
1222604955500: system.cpu T0 : @init_TSC+907.0 : MOV_R_I : limm
edi, 0xf4240 : IntAlu : D=0x00000000000f4240
1222604956500: system.cpu T0 : @init_TSC+912 : call 0x69b
1222604956500: system.cpu T0 : @init_TSC+912.0 : CALL_NEAR_I : limm
t1, 0x69b : IntAlu : D=0x000000000000069b
1222604956500: system.cpu T0 : @init_TSC+912.1 : CALL_NEAR_I : rdip
t7, %ctrl153, : IntAlu : D=0xffffffff80611d45
1222604956500: system.cpu T0 : @init_TSC+912.2 : CALL_NEAR_I : st
t7, SS:[rsp + 0xfffffffffffffff8] : MemWrite : D=0xffffffff80611d45
A=0xffffffff80974bf8
1222604956500: system.cpu T0 : @init_TSC+912.3 : CALL_NEAR_I : subi
rsp, rsp, 0x8 : IntAlu : D=0xffffffff80974bf8
1222604956500: system.cpu T0 : @init_TSC+912.4 : CALL_NEAR_I : wrip
, t7, t1 : IntAlu :
1222604962000: global: XXX-BZ reg 0 0x7 = 0xf4240
1222604962000: global: XXX-BZ sp 0xffffffff80974bf8
1222604962000: global: XXX-BZ npc 0xffffffff80611d45
// this time we have the correct value there but executed the code
twice obviously not what we expected …
// but at least we continue but something clearly went wrong above.
2222604964000: system.cpu T0 : @init_TSC+917 : rdtsc
2222604964000: system.cpu T0 : @init_TSC+917.0 : RDTSC : rdtsc t1d,
%ctrl26, : IntAlu : D=0x0000000108f4854d
2222604964000: system.cpu T0 : @init_TSC+917.1 : RDTSC : mov eax,
eax, t1d : IntAlu : D=0x0000000008f4854d
2222604964000: system.cpu T0 : @init_TSC+917.2 : RDTSC : srli t1,
t1, 0x20 : IntAlu : D=0x0000000000000001
2222604964000: system.cpu T0 : @init_TSC+917.3 : RDTSC : mov edx,
edx, t1d : IntAlu : D=0x0000000000000001
2222604964000: system.cpu T0 : @init_TSC+919 : mov esi, edx
2222604964000: system.cpu T0 : @init_TSC+919.0 : MOV_R_R : mov esi,
esi, edx : IntAlu : D=0x0000000000000001
2222604964000: system.cpu T0 : @init_TSC+921 : mov eax, eax
2222604964000: system.cpu T0 : @init_TSC+921.0 : MOV_R_R : mov eax,
eax, eax : IntAlu : D=0x0000000008f4854d
2222604964000: system.cpu T0 : @init_TSC+923 : sal rsi, 0x20
2222604964000: system.cpu T0 : @init_TSC+923.0 : SAL_R_I : slli
rsi, rsi, 0x20 : IntAlu : D=0x0000000000000000
2222604964000: system.cpu T0 : @init_TSC+927 : or rsi, rax
2222604964000: system.cpu T0 : @init_TSC+927.0 : OR_R_R : or rsi,
rsi, rax : IntAlu : D=0x0000000000000000
2222604964500: system.cpu T0 : @init_TSC+930 : sub rsi, rbx
2222604964500: system.cpu T0 : @init_TSC+930.0 : SUB_R_R : sub rsi,
rsi, rbx : IntAlu : D=0x0000000000000000
2222604964500: system.cpu T0 : @init_TSC+933 : mov
DS:[0xffffffff8096ed88], rsi
2222604964500: system.cpu T0 : @init_TSC+933.0 : MOV_M_R : st rsi,
DS:[0xffffffff8096ed88] : MemWrite : D=0x0000000077359410
A=0xffffffff8096ed88
2222604964500: system.cpu T0 : @init_TSC+941 : cmp
DS:[0xffffffff808f213c], 0
2222604964500: system.cpu T0 : @init_TSC+941.0 : CMP_M_I : limm
t2d, 0 : IntAlu : D=0x0000000000000000
2222604964500: system.cpu T0 : @init_TSC+941.1 : CMP_M_I : ld t1d,
DS:[0xffffffff808f213c] : MemRead : D=0x0000000000000001
A=0xffffffff808f213c
2222604964500: system.cpu T0 : @init_TSC+941.2 : CMP_M_I : sub t0d,
t1d, t2d : IntAlu : D=0x0000000000000000
2222604964500: system.cpu T0 : @init_TSC+949 : jz 0xd8
2222604964500: system.cpu T0 : @init_TSC+949.0 : JZ_I : rdip t1,
%ctrl153, : IntAlu : D=0xffffffff80611d6b
2222604964500: system.cpu T0 : @init_TSC+949.1 : JZ_I : limm t2,
0xd8 : IntAlu : D=0x00000000000000d8
2222604964500: system.cpu T0 : @init_TSC+949.2 : JZ_I : wrip , t1,
t2 : IntAlu :
2222604965000: system.cpu T0 : @init_TSC+955 : mov rdi,
0xffffffff80685376
2222604965000: system.cpu T0 : @init_TSC+955.0 : MOV_R_I : limm
rdi, 0xffffffff80685376 : IntAlu : D=0xffffffff80685376
2222604965000: system.cpu T0 : @init_TSC+962 : xor eax, eax
2222604965000: system.cpu T0 : @init_TSC+962.0 : XOR_R_R : xor eax,
eax, eax : IntAlu : D=0x0000000000000000
2222604965000: system.cpu T0 : @init_TSC+964 : call
0xffffffffffdd2f57
2222604965000: system.cpu T0 : @init_TSC+964.0 : CALL_NEAR_I : limm
t1, 0xffffffffffdd2f57 : IntAlu : D=0xffffffffffdd2f57
2222604965000: system.cpu T0 : @init_TSC+964.1 : CALL_NEAR_I : rdip
t7, %ctrl153, : IntAlu : D=0xffffffff80611d79
2222604965000: system.cpu T0 : @init_TSC+964.2 : CALL_NEAR_I : st
t7, SS:[rsp + 0xfffffffffffffff8] : MemWrite : D=0xffffffff80611d79
A=0xffffffff80974bf8
2222604965000: system.cpu T0 : @init_TSC+964.3 : CALL_NEAR_I : subi
rsp, rsp, 0x8 : IntAlu : D=0xffffffff80974bf8
2222604965000: system.cpu T0 : @init_TSC+964.4 : CALL_NEAR_I : wrip
, t7, t1 : IntAlu :
2222604971000: system.cpu T0 : @printf : push rbp
2222604971000: system.cpu T0 : @printf.0 : PUSH_R : st rbp, SS:[rsp
+ 0xfffffffffffffff8] : MemWrite : D=0xffffffff80974c60
A=0xffffffff80974bf0
2222604971000: system.cpu T0 : @printf.1 : PUSH_R : subi rsp, rsp,
0x8 : IntAlu : D=0xffffffff80974bf0
2222604971000: system.cpu T0 : @printf+1 : mov rbp, rsp
2222604971000: system.cpu T0 : @printf+1.0 : MOV_R_R : mov rbp,
rbp, rsp : IntAlu : D=0xffffffff80974bf0
2222604971000: system.cpu T0 : @printf+4 : sub rax, 0x50
2222604971000: system.cpu T0 : @printf+4.0 : SUB_R_I : limm t1,
0x50 : IntAlu : D=0x0000000000000050
2222604971000: system.cpu T0 : @printf+4.1 : SUB_R_I : sub rsp,
rsp, t1 : IntAlu : D=0x0000000000000000
2222604971000: system.cpu T0 : @printf+8 : mov rax, rdi
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev