Alvise Rigo <a.r...@virtualopensystems.com> writes:

> Add a new TLB flag to force all the accesses made to a page to follow
> the slow-path.
>
> In the case we remove a TLB entry marked as EXCL, we unset the
> corresponding exclusive bit in the bitmap.
>
> Suggested-by: Jani Kokkonen <jani.kokko...@huawei.com>
> Suggested-by: Claudio Fontana <claudio.font...@huawei.com>
> Signed-off-by: Alvise Rigo <a.r...@virtualopensystems.com>
> ---
>  cputlb.c                |  38 +++++++++++++++-
>  include/exec/cpu-all.h  |   8 ++++
>  include/exec/cpu-defs.h |   1 +
>  include/qom/cpu.h       |  14 ++++++
>  softmmu_template.h      | 114 
> ++++++++++++++++++++++++++++++++++++++----------
>  5 files changed, 152 insertions(+), 23 deletions(-)
>
> diff --git a/cputlb.c b/cputlb.c
> index bf1d50a..7ee0c89 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -394,6 +394,16 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong 
> vaddr,
>      env->tlb_v_table[mmu_idx][vidx] = *te;
>      env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
>
> +    if (unlikely(!(te->addr_write & TLB_MMIO) && (te->addr_write &
> TLB_EXCL))) {

Why do we care about TLB_MMIO flags here? Does it actually happen? Would
bad things happen if we enforced exclusivity for an MMIO write? Do the
other flags matter?

There should be a comment as to why MMIO is mentioned I think.

> +        /* We are removing an exclusive entry, set the page to dirty. This
> +         * is not be necessary if the vCPU has performed both SC and LL. */
> +        hwaddr hw_addr = (env->iotlb[mmu_idx][index].addr & 
> TARGET_PAGE_MASK) +
> +                                          (te->addr_write & 
> TARGET_PAGE_MASK);
> +        if (!cpu->ll_sc_context) {
> +            cpu_physical_memory_unset_excl(hw_addr, cpu->cpu_index);
> +        }
> +    }
> +
>      /* refill the tlb */
>      env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
>      env->iotlb[mmu_idx][index].attrs = attrs;
> @@ -419,7 +429,15 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong 
> vaddr,
>                                                     + xlat)) {
>              te->addr_write = address | TLB_NOTDIRTY;
>          } else {
> -            te->addr_write = address;
> +            if (!(address & TLB_MMIO) &&
> +                cpu_physical_memory_atleast_one_excl(section->mr->ram_addr
> +                                                           + xlat)) {
> +                /* There is at least one vCPU that has flagged the address as
> +                 * exclusive. */
> +                te->addr_write = address | TLB_EXCL;
> +            } else {
> +                te->addr_write = address;
> +            }
>          }
>      } else {
>          te->addr_write = -1;
> @@ -471,6 +489,24 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, 
> target_ulong addr)
>      return qemu_ram_addr_from_host_nofail(p);
>  }
>
> +/* For every vCPU compare the exclusive address and reset it in case of a
> + * match. Since only one vCPU is running at once, no lock has to be held to
> + * guard this operation. */
> +static inline void lookup_and_reset_cpus_ll_addr(hwaddr addr, hwaddr size)
> +{
> +    CPUState *cpu;
> +
> +    CPU_FOREACH(cpu) {
> +        if (cpu->excl_protected_range.begin != EXCLUSIVE_RESET_ADDR &&
> +            ranges_overlap(cpu->excl_protected_range.begin,
> +                           cpu->excl_protected_range.end -
> +                           cpu->excl_protected_range.begin,
> +                           addr, size)) {
> +            cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
> +        }
> +    }
> +}
> +
>  #define MMUSUFFIX _mmu
>
>  #define SHIFT 0
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index 83b1781..f8d8feb 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -277,6 +277,14 @@ CPUArchState *cpu_copy(CPUArchState *env);
>  #define TLB_NOTDIRTY    (1 << 4)
>  /* Set if TLB entry is an IO callback.  */
>  #define TLB_MMIO        (1 << 5)
> +/* Set if TLB entry references a page that requires exclusive access.  */
> +#define TLB_EXCL        (1 << 6)
> +
> +/* Do not allow a TARGET_PAGE_MASK which covers one or more bits defined
> + * above. */
> +#if TLB_EXCL >= TARGET_PAGE_SIZE
> +#error TARGET_PAGE_MASK covering the low bits of the TLB virtual address
> +#endif
>
>  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
>  void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf);
> diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
> index 5093be2..b34d7ae 100644
> --- a/include/exec/cpu-defs.h
> +++ b/include/exec/cpu-defs.h
> @@ -27,6 +27,7 @@
>  #include <inttypes.h>
>  #include "qemu/osdep.h"
>  #include "qemu/queue.h"
> +#include "qemu/range.h"
>  #include "tcg-target.h"
>  #ifndef CONFIG_USER_ONLY
>  #include "exec/hwaddr.h"
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index 51a1323..c6bb6b6 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -29,6 +29,7 @@
>  #include "qemu/queue.h"
>  #include "qemu/thread.h"
>  #include "qemu/typedefs.h"
> +#include "qemu/range.h"
>
>  typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
>                                       void *opaque);
> @@ -210,6 +211,9 @@ struct kvm_run;
>  #define TB_JMP_CACHE_BITS 12
>  #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
>
> +/* Atomic insn translation TLB support. */
> +#define EXCLUSIVE_RESET_ADDR ULLONG_MAX
> +
>  /**
>   * CPUState:
>   * @cpu_index: CPU index (informative).
> @@ -329,6 +333,16 @@ struct CPUState {
>       */
>      bool throttle_thread_scheduled;
>
> +    /* Used by the atomic insn translation backend. */
> +    int ll_sc_context;
> +    /* vCPU current exclusive addresses range.
> +     * The address is set to EXCLUSIVE_RESET_ADDR if the vCPU is not.
> +     * in the middle of a LL/SC. */
> +    struct Range excl_protected_range;
> +    /* Used to carry the SC result but also to flag a normal (legacy)
> +     * store access made by a stcond (see softmmu_template.h). */
> +    int excl_succeeded;

It might be clearer if excl_succeeded was defined as a bool?

>      /* Note that this is accessed at the start of every TB via a negative
>         offset from AREG0.  Leave this field at the end so as to make the
>         (absolute value) offset as small as possible.  This reduces code
> diff --git a/softmmu_template.h b/softmmu_template.h
> index 6803890..24d29b7 100644
> --- a/softmmu_template.h
> +++ b/softmmu_template.h
> @@ -395,19 +395,54 @@ void helper_le_st_name(CPUArchState *env, target_ulong 
> addr, DATA_TYPE val,
>          tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
>      }
>
> -    /* Handle an IO access.  */
> +    /* Handle an IO access or exclusive access.  */
>      if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
> -        CPUIOTLBEntry *iotlbentry;
> -        if ((addr & (DATA_SIZE - 1)) != 0) {
> -            goto do_unaligned_access;
> +        CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
> +
> +        if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) {
> +            CPUState *cpu = ENV_GET_CPU(env);
> +            /* The slow-path has been forced since we are writing to
> +             * exclusive-protected memory. */
> +            hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
> +
> +            /* The function lookup_and_reset_cpus_ll_addr could have reset 
> the
> +             * exclusive address. Fail the SC in this case.
> +             * N.B.: Here excl_succeeded == 0 means that helper_le_st_name 
> has
> +             * not been called by a softmmu_llsc_template.h. */

Could this be better worded (along with bool-ising) as:

"excl_succeeded is set by helper_le_st_name (softmmu_llsc_template)."

But having said that grepping for helper_le_st_name I see that's defined
in softmmu_template.h so now the comments has confused me.

It also might be worth mentioning the subtly that exclusive addresses
are based on the real hwaddr (hence the iotlb lookup?).

> +            if (cpu->excl_succeeded) {
> +                if (cpu->excl_protected_range.begin != hw_addr) {
> +                    /* The vCPU is SC-ing to an unprotected address. */
> +                    cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
> +                    cpu->excl_succeeded = 0;

cpu->excl_succeeded = false;

> +
> +                    return;
> +                }
> +
> +                cpu_physical_memory_unset_excl(hw_addr, cpu->cpu_index);
> +            }
> +
> +            haddr = addr + env->tlb_table[mmu_idx][index].addend;
> +        #if DATA_SIZE == 1
> +            glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
> +        #else
> +            glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val);
> +        #endif

Why the special casing for byte access? Isn't this something the glue +
SUFFIX magic is meant to sort out?

> +
> +            lookup_and_reset_cpus_ll_addr(hw_addr, DATA_SIZE);
> +
> +            return;
> +        } else {
> +            if ((addr & (DATA_SIZE - 1)) != 0) {
> +                goto do_unaligned_access;
> +            }
> +            iotlbentry = &env->iotlb[mmu_idx][index];

Are we re-loading the TLB entry here?

> +
> +            /* ??? Note that the io helpers always read data in the target
> +               byte ordering.  We should push the LE/BE request down into 
> io.  */
> +            val = TGT_LE(val);
> +            glue(io_write, SUFFIX)(env, iotlbentry, val, addr,
> retaddr);

What happens if the software does and exclusive operation on a io
address?

> +            return;
>          }
> -        iotlbentry = &env->iotlb[mmu_idx][index];
> -
> -        /* ??? Note that the io helpers always read data in the target
> -           byte ordering.  We should push the LE/BE request down into io.  */
> -        val = TGT_LE(val);
> -        glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
> -        return;
>      }
>
>      /* Handle slow unaligned access (it spans two pages or IO).  */
> @@ -475,19 +510,54 @@ void helper_be_st_name(CPUArchState *env, target_ulong 
> addr, DATA_TYPE val,
>          tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
>      }
>
> -    /* Handle an IO access.  */
> +    /* Handle an IO access or exclusive access.  */

Hmm there looks like a massive amount of duplication (not your fault, it
was like that when you got here ;-) but maybe this can be re-factored
away somehow?

>      if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
> -        CPUIOTLBEntry *iotlbentry;
> -        if ((addr & (DATA_SIZE - 1)) != 0) {
> -            goto do_unaligned_access;
> +        CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
> +
> +        if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) {
> +            CPUState *cpu = ENV_GET_CPU(env);
> +            /* The slow-path has been forced since we are writing to
> +             * exclusive-protected memory. */
> +            hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
> +
> +            /* The function lookup_and_reset_cpus_ll_addr could have reset 
> the
> +             * exclusive address. Fail the SC in this case.
> +             * N.B.: Here excl_succeeded == 0 means that helper_le_st_name 
> has
> +             * not been called by a softmmu_llsc_template.h. */
> +            if (cpu->excl_succeeded) {
> +                if (cpu->excl_protected_range.begin != hw_addr) {
> +                    /* The vCPU is SC-ing to an unprotected address. */
> +                    cpu->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
> +                    cpu->excl_succeeded = 0;
> +
> +                    return;
> +                }
> +
> +                cpu_physical_memory_unset_excl(hw_addr, cpu->cpu_index);
> +            }
> +
> +            haddr = addr + env->tlb_table[mmu_idx][index].addend;
> +        #if DATA_SIZE == 1
> +            glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
> +        #else
> +            glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val);
> +        #endif
> +
> +            lookup_and_reset_cpus_ll_addr(hw_addr, DATA_SIZE);
> +
> +            return;
> +        } else {
> +            if ((addr & (DATA_SIZE - 1)) != 0) {
> +                goto do_unaligned_access;
> +            }
> +            iotlbentry = &env->iotlb[mmu_idx][index];
> +
> +            /* ??? Note that the io helpers always read data in the target
> +               byte ordering.  We should push the LE/BE request down into 
> io.  */
> +            val = TGT_BE(val);
> +            glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
> +            return;
>          }
> -        iotlbentry = &env->iotlb[mmu_idx][index];
> -
> -        /* ??? Note that the io helpers always read data in the target
> -           byte ordering.  We should push the LE/BE request down into io.  */
> -        val = TGT_BE(val);
> -        glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
> -        return;
>      }
>
>      /* Handle slow unaligned access (it spans two pages or IO).  */


--
Alex Bennée

Reply via email to