Re: [Qemu-devel] [PATCH v3] implementing victim TLB for QEMU system emulated TLB

Peter Maydell Sat, 01 Feb 2014 14:16:48 -0800

On 28 January 2014 17:31, Xin Tong <trent.t...@gmail.com> wrote:
> This patch adds a victim TLB to the QEMU system mode TLB.
>
> QEMU system mode page table walks are expensive. Taken by running QEMU
> qemu-system-x86_64 system mode on Intel PIN , a TLB miss and walking a
> 4-level page tables in guest Linux OS takes ~450 X86 instructions on
> average.


My review below is largely limited to style issues; I'm assuming
rth will do the substantive review.

> Signed-off-by: Xin Tong <trent.t...@gmail.com>
>
> ---
>  cputlb.c                        | 50 ++++++++++++++++++++++++++++++++++++-
>  include/exec/cpu-defs.h         | 12 ++++++---
>  include/exec/exec-all.h         |  2 ++
>  include/exec/softmmu_template.h | 55 
> ++++++++++++++++++++++++++++++++++++++---
>  4 files changed, 111 insertions(+), 8 deletions(-)
>
> diff --git a/cputlb.c b/cputlb.c
> index b533f3f..caee78e 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -34,6 +34,22 @@
>  /* statistics */
>  int tlb_flush_count;
>
> +/* swap the 2 given TLB entries as well as their corresponding IOTLB */
> +inline void swap_tlb(CPUTLBEntry *te, CPUTLBEntry *se, hwaddr *iote,
> +                     hwaddr *iose)
> +{
> +   hwaddr iotmp;
> +   CPUTLBEntry tmp;
> +   /* swap tlb */
> +   tmp = *te;
> +   *te = *se;
> +   *se = tmp;
> +   /* swap iotlb */
> +   iotmp = *iote;
> +   *iote = *iose;
> +   *iose = iotmp;
> +}
> +
>  /* NOTE:
>   * If flush_global is true (the usual case), flush all tlb entries.
>   * If flush_global is false, flush (at least) all tlb entries not
> @@ -58,8 +74,10 @@ void tlb_flush(CPUArchState *env, int flush_global)
>      cpu->current_tb = NULL;
>
>      memset(env->tlb_table, -1, sizeof(env->tlb_table));
> +    memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
>      memset(env->tb_jmp_cache, 0, sizeof(env->tb_jmp_cache));
>
> +    env->vtlb_index = 0;
>      env->tlb_flush_addr = -1;
>      env->tlb_flush_mask = 0;
>      tlb_flush_count++;
> @@ -106,6 +124,14 @@ void tlb_flush_page(CPUArchState *env, target_ulong addr)
>          tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
>      }
>
> +    /* check whether there are entries that need to be flushed in the vtlb */
> +    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
> +        unsigned int k;

Just plain "int" is fine.

> +        for (k = 0; k < CPU_VTLB_SIZE; k++) {
> +            tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
> +        }
> +    }
> +
>      tb_flush_jmp_cache(env, addr);
>  }
>
> @@ -170,6 +196,11 @@ void cpu_tlb_reset_dirty_all(ram_addr_t start1, 
> ram_addr_t length)
>                  tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
>                                        start1, length);
>              }
> +
> +            for (i = 0; i < CPU_VTLB_SIZE; i++) {
> +                tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i],
> +                                      start1, length);
> +            }
>          }
>      }
>  }
> @@ -193,6 +224,13 @@ void tlb_set_dirty(CPUArchState *env, target_ulong vaddr)
>      for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
>          tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
>      }
> +
> +    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
> +        unsigned int k;
> +        for (k = 0; k < CPU_VTLB_SIZE; k++) {
> +            tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr);
> +        }
> +    }
>  }
>
>  /* Our TLB does not support large pages, so remember the area covered by
> @@ -264,8 +302,18 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
>                                              prot, &address);
>
>      index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
> -    env->iotlb[mmu_idx][index] = iotlb - vaddr;
>      te = &env->tlb_table[mmu_idx][index];
> +
> +    /* do not discard the translation in te, evict it into a victim tlb */
> +    unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
> +    env->tlb_v_table[mmu_idx][vidx].addr_read  = te->addr_read;
> +    env->tlb_v_table[mmu_idx][vidx].addr_write = te->addr_write;
> +    env->tlb_v_table[mmu_idx][vidx].addr_code  = te->addr_code;
> +    env->tlb_v_table[mmu_idx][vidx].addend     = te->addend;

You're still writing structure assignments out longhand. These four
lines should all be replaced with
    env->tlb_v_table[mmu_idx][vidx] = *te;

> +    env->iotlb_v[mmu_idx][vidx]                = env->iotlb[mmu_idx][index];
> +
> +    /* refill the tlb */
> +    env->iotlb[mmu_idx][index] = iotlb - vaddr;
>      te->addend = addend - vaddr;
>      if (prot & PAGE_READ) {
>          te->addr_read = address;
> diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
> index 01cd8c7..2631d6b 100644
> --- a/include/exec/cpu-defs.h
> +++ b/include/exec/cpu-defs.h
> @@ -74,6 +74,8 @@ typedef uint64_t target_ulong;
>  #if !defined(CONFIG_USER_ONLY)
>  #define CPU_TLB_BITS 8
>  #define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
> +/* use a fully associative victim tlb */
> +#define CPU_VTLB_SIZE 8
>
>  #if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
>  #define CPU_TLB_ENTRY_BITS 4
> @@ -103,12 +105,16 @@ typedef struct CPUTLBEntry {
>
>  QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
>
> +/* The meaning of the MMU modes is defined in the target code. */

Why this addition? It's duplicating a comment that already exists below.

>  #define CPU_COMMON_TLB \
>      /* The meaning of the MMU modes is defined in the target code. */   \
> -    CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE];                  \
> -    hwaddr iotlb[NB_MMU_MODES][CPU_TLB_SIZE];               \
> +    CPUTLBEntry  tlb_table[NB_MMU_MODES][CPU_TLB_SIZE];                 \
> +    CPUTLBEntry  tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE];              \
> +    hwaddr       iotlb[NB_MMU_MODES][CPU_TLB_SIZE];                     \
> +    hwaddr       iotlb_v[NB_MMU_MODES][CPU_VTLB_SIZE];                  \

Don't try to line up the field names like this -- it just means more
code churn, because next time somebody has to add a line here
with a typename longer than "CPUTLBEntry" they need to change
every line in the macro. Single space is fine. (Lining up the '\' on
the right is OK.)

>      target_ulong tlb_flush_addr;                                        \
> -    target_ulong tlb_flush_mask;
> +    target_ulong tlb_flush_mask;                                        \
> +    target_ulong vtlb_index;                                            \
>
>  #else
>
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index ea90b64..7e88b08 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -102,6 +102,8 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
>                    hwaddr paddr, int prot,
>                    int mmu_idx, target_ulong size);
>  void tb_invalidate_phys_addr(hwaddr addr);
> +/* swap the 2 given tlb entries as well as their iotlb */
> +void swap_tlb(CPUTLBEntry *te, CPUTLBEntry *se, hwaddr *iote, hwaddr *iose);
>  #else
>  static inline void tlb_flush_page(CPUArchState *env, target_ulong addr)
>  {
> diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h
> index c6a5440..38d18de 100644
> --- a/include/exec/softmmu_template.h
> +++ b/include/exec/softmmu_template.h
> @@ -112,6 +112,21 @@
>  # define helper_te_st_name  helper_le_st_name
>  #endif
>
> +/* macro to check the victim tlb */
> +#define HELPER_CHECK_VICTIM_TLB(ACCESS_TYPE)                       \
> +do {                                                               \
> +    for (vtlb_idx = CPU_VTLB_SIZE; vtlb_idx >= 0; --vtlb_idx) {    \
> +        if (ACCESS_TYPE == (addr & TARGET_PAGE_MASK)) {            \
> +            /* found entry in victim tlb */                        \
> +            swap_tlb(&env->tlb_table[mmu_idx][index],              \
> +                     &env->tlb_v_table[mmu_idx][vtlb_idx],         \
> +                     &env->iotlb[mmu_idx][index],                  \
> +                     &env->iotlb_v[mmu_idx][vtlb_idx]);            \

This is the only place swap_tlb gets used, so probably better to
ditch that as a separate function and just inline it here. (Then
everywhere that uses softmmu_template.h gets the inlined
version, rather than cputlb.c getting to inline and the rest not.)

> +            break;                                                 \
> +        }                                                          \
> +    }                                                              \
> +} while (0);

I think we could make this macro use a bit less ugly.
(1) just call it VICTIM_TLB_HIT; 'helper' has a particular
meaning in QEMU (function called from generated code), and
besides it makes the calling code read a bit less naturally.
(2) rather than having it take as an argument
"env->tlb_v_table[mmu_idx][vtlb_idx].ADDR_READ", just
take the "ADDR_READ" part. This makes the callers simpler
and avoids giving the impression that the macro is going to
simply evaluate its argument once (ie that it is function-like).
(3) Make it a gcc statement-expression, so it can return a
value. Then you can (a) make the scope of vtlb_idx be only
inside teh macro, and avoid forcing the caller to define it and
(b) have the usage pattern be
    if (!VICTIM_TLB_HIT(ADDR_READ)) {
        tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
    }

(which means you don't need that "miss victim tlb" comment
any more, because it's obvious from the macro name what
is happening.)

> +
>  static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
>                                                hwaddr physaddr,
>                                                target_ulong addr,
> @@ -141,6 +156,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, 
> target_ulong addr, int mmu_idx,
>      target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
>      uintptr_t haddr;
>      DATA_TYPE res;
> +    int vtlb_idx;
>
>      /* Adjust the given return address.  */
>      retaddr -= GETPC_ADJ;
> @@ -153,7 +169,14 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, 
> target_ulong addr, int mmu_idx,
>              do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, 
> retaddr);
>          }
>  #endif
> -        tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
> +        /* we are about to do a page table walk. our last hope is the
> +         * victim tlb. try to refill from the victim tlb before walking the
> +         * page table. */

You might as well put this comment in your helper macro; there's
no need to repeat it in every callsite.

> +        
> HELPER_CHECK_VICTIM_TLB(env->tlb_v_table[mmu_idx][vtlb_idx].ADDR_READ);
> +        /* miss victim tlb */
> +        if (vtlb_idx < 0) {
> +            tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
> +        }
>          tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
>      }

thanks
-- PMM

Re: [Qemu-devel] [PATCH v3] implementing victim TLB for QEMU system emulated TLB

Reply via email to