Add a new TLB flag to force all the accesses made to a page to follow the slow-path.
In the case we remove a TLB entry marked as EXCL, we unset the corresponding exclusive bit in the bitmap. Suggested-by: Jani Kokkonen <jani.kokko...@huawei.com> Suggested-by: Claudio Fontana <claudio.font...@huawei.com> Signed-off-by: Alvise Rigo <a.r...@virtualopensystems.com> --- cputlb.c | 38 +++++++++++++++- include/exec/cpu-all.h | 8 ++++ include/exec/cpu-defs.h | 1 + include/qom/cpu.h | 14 ++++++ softmmu_template.h | 114 ++++++++++++++++++++++++++++++++++++++---------- 5 files changed, 152 insertions(+), 23 deletions(-) diff --git a/cputlb.c b/cputlb.c index bf1d50a..7ee0c89 100644 --- a/cputlb.c +++ b/cputlb.c @@ -394,6 +394,16 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, env->tlb_v_table[mmu_idx][vidx] = *te; env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index]; + if (unlikely(!(te->addr_write & TLB_MMIO) && (te->addr_write & TLB_EXCL))) { + /* We are removing an exclusive entry, set the page to dirty. This + * is not be necessary if the vCPU has performed both SC and LL. */ + hwaddr hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) + + (te->addr_write & TARGET_PAGE_MASK); + if (!cpu->ll_sc_context) { + cpu_physical_memory_unset_excl(hw_addr, cpu->cpu_index); + } + } + /* refill the tlb */ env->iotlb[mmu_idx][index].addr = iotlb - vaddr; env->iotlb[mmu_idx][index].attrs = attrs; @@ -419,7 +429,15 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, + xlat)) { te->addr_write = address | TLB_NOTDIRTY; } else { - te->addr_write = address; + if (!(address & TLB_MMIO) && + cpu_physical_memory_atleast_one_excl(section->mr->ram_addr + + xlat)) { + /* There is at least one vCPU that has flagged the address as + * exclusive. */ + te->addr_write = address | TLB_EXCL; + } else { + te->addr_write = address; + } } } else { te->addr_write = -1; @@ -471,6 +489,24 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) return qemu_ram_addr_from_host_nofail(p); } +/* For every vCPU compare the exclusive address and reset it in case of a + * match. Since only one vCPU is running at once, no lock has to be held to + * guard this operation. */ +static inline void lookup_and_reset_cpus_ll_addr(hwaddr addr, hwaddr size) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + if (cpu->excl_protected_range.begin != EXCLUSIVE_RESET_ADDR && + ranges_overlap(cpu->excl_protected_range.begin, + cpu->excl_protected_range.end - + cpu->excl_protected_range.begin, + addr, size)) { + cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR; + } + } +} + #define MMUSUFFIX _mmu #define SHIFT 0 diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 83b1781..f8d8feb 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -277,6 +277,14 @@ CPUArchState *cpu_copy(CPUArchState *env); #define TLB_NOTDIRTY (1 << 4) /* Set if TLB entry is an IO callback. */ #define TLB_MMIO (1 << 5) +/* Set if TLB entry references a page that requires exclusive access. */ +#define TLB_EXCL (1 << 6) + +/* Do not allow a TARGET_PAGE_MASK which covers one or more bits defined + * above. */ +#if TLB_EXCL >= TARGET_PAGE_SIZE +#error TARGET_PAGE_MASK covering the low bits of the TLB virtual address +#endif void dump_exec_info(FILE *f, fprintf_function cpu_fprintf); void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf); diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index 5093be2..b34d7ae 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -27,6 +27,7 @@ #include <inttypes.h> #include "qemu/osdep.h" #include "qemu/queue.h" +#include "qemu/range.h" #include "tcg-target.h" #ifndef CONFIG_USER_ONLY #include "exec/hwaddr.h" diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 51a1323..c6bb6b6 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -29,6 +29,7 @@ #include "qemu/queue.h" #include "qemu/thread.h" #include "qemu/typedefs.h" +#include "qemu/range.h" typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size, void *opaque); @@ -210,6 +211,9 @@ struct kvm_run; #define TB_JMP_CACHE_BITS 12 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) +/* Atomic insn translation TLB support. */ +#define EXCLUSIVE_RESET_ADDR ULLONG_MAX + /** * CPUState: * @cpu_index: CPU index (informative). @@ -329,6 +333,16 @@ struct CPUState { */ bool throttle_thread_scheduled; + /* Used by the atomic insn translation backend. */ + int ll_sc_context; + /* vCPU current exclusive addresses range. + * The address is set to EXCLUSIVE_RESET_ADDR if the vCPU is not. + * in the middle of a LL/SC. */ + struct Range excl_protected_range; + /* Used to carry the SC result but also to flag a normal (legacy) + * store access made by a stcond (see softmmu_template.h). */ + int excl_succeeded; + /* Note that this is accessed at the start of every TB via a negative offset from AREG0. Leave this field at the end so as to make the (absolute value) offset as small as possible. This reduces code diff --git a/softmmu_template.h b/softmmu_template.h index 6803890..24d29b7 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -395,19 +395,54 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, tlb_addr = env->tlb_table[mmu_idx][index].addr_write; } - /* Handle an IO access. */ + /* Handle an IO access or exclusive access. */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUIOTLBEntry *iotlbentry; - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; + CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index]; + + if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) { + CPUState *cpu = ENV_GET_CPU(env); + /* The slow-path has been forced since we are writing to + * exclusive-protected memory. */ + hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; + + /* The function lookup_and_reset_cpus_ll_addr could have reset the + * exclusive address. Fail the SC in this case. + * N.B.: Here excl_succeeded == 0 means that helper_le_st_name has + * not been called by a softmmu_llsc_template.h. */ + if (cpu->excl_succeeded) { + if (cpu->excl_protected_range.begin != hw_addr) { + /* The vCPU is SC-ing to an unprotected address. */ + cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR; + cpu->excl_succeeded = 0; + + return; + } + + cpu_physical_memory_unset_excl(hw_addr, cpu->cpu_index); + } + + haddr = addr + env->tlb_table[mmu_idx][index].addend; + #if DATA_SIZE == 1 + glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); + #else + glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val); + #endif + + lookup_and_reset_cpus_ll_addr(hw_addr, DATA_SIZE); + + return; + } else { + if ((addr & (DATA_SIZE - 1)) != 0) { + goto do_unaligned_access; + } + iotlbentry = &env->iotlb[mmu_idx][index]; + + /* ??? Note that the io helpers always read data in the target + byte ordering. We should push the LE/BE request down into io. */ + val = TGT_LE(val); + glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr); + return; } - iotlbentry = &env->iotlb[mmu_idx][index]; - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - val = TGT_LE(val); - glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr); - return; } /* Handle slow unaligned access (it spans two pages or IO). */ @@ -475,19 +510,54 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, tlb_addr = env->tlb_table[mmu_idx][index].addr_write; } - /* Handle an IO access. */ + /* Handle an IO access or exclusive access. */ if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { - CPUIOTLBEntry *iotlbentry; - if ((addr & (DATA_SIZE - 1)) != 0) { - goto do_unaligned_access; + CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index]; + + if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) { + CPUState *cpu = ENV_GET_CPU(env); + /* The slow-path has been forced since we are writing to + * exclusive-protected memory. */ + hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; + + /* The function lookup_and_reset_cpus_ll_addr could have reset the + * exclusive address. Fail the SC in this case. + * N.B.: Here excl_succeeded == 0 means that helper_le_st_name has + * not been called by a softmmu_llsc_template.h. */ + if (cpu->excl_succeeded) { + if (cpu->excl_protected_range.begin != hw_addr) { + /* The vCPU is SC-ing to an unprotected address. */ + cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR; + cpu->excl_succeeded = 0; + + return; + } + + cpu_physical_memory_unset_excl(hw_addr, cpu->cpu_index); + } + + haddr = addr + env->tlb_table[mmu_idx][index].addend; + #if DATA_SIZE == 1 + glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); + #else + glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val); + #endif + + lookup_and_reset_cpus_ll_addr(hw_addr, DATA_SIZE); + + return; + } else { + if ((addr & (DATA_SIZE - 1)) != 0) { + goto do_unaligned_access; + } + iotlbentry = &env->iotlb[mmu_idx][index]; + + /* ??? Note that the io helpers always read data in the target + byte ordering. We should push the LE/BE request down into io. */ + val = TGT_BE(val); + glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr); + return; } - iotlbentry = &env->iotlb[mmu_idx][index]; - - /* ??? Note that the io helpers always read data in the target - byte ordering. We should push the LE/BE request down into io. */ - val = TGT_BE(val); - glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr); - return; } /* Handle slow unaligned access (it spans two pages or IO). */ -- 2.6.4