On Sat, 27 Feb 2021 10:41:57 +0000
Alexandru Elisei <alexandru.eli...@arm.com> wrote:

> Compute the dcache line size when doing dcache maintenance instead of using
> a global variable computed in setup(), which allows us to do dcache
> maintenance at any point in the boot process. This will be useful for
> running as an EFI app and it also aligns our implementation to that of the
> Linux kernel.

Can you add that this changes the semantic of dcache_by_line_op to use
the size instead of the end address?

> 
> For consistency, the arm code has been similary modified.
> 
> Signed-off-by: Alexandru Elisei <alexandru.eli...@arm.com>
> ---
>  lib/arm/asm/assembler.h   | 44 ++++++++++++++++++++++++++++++++
>  lib/arm/asm/processor.h   |  7 ------
>  lib/arm64/asm/assembler.h | 53 +++++++++++++++++++++++++++++++++++++++
>  lib/arm64/asm/processor.h |  7 ------
>  lib/arm/setup.c           |  7 ------
>  arm/cstart.S              | 18 +++----------
>  arm/cstart64.S            | 16 ++----------
>  7 files changed, 102 insertions(+), 50 deletions(-)
>  create mode 100644 lib/arm/asm/assembler.h
>  create mode 100644 lib/arm64/asm/assembler.h
> 
> diff --git a/lib/arm/asm/assembler.h b/lib/arm/asm/assembler.h
> new file mode 100644
> index 000000000000..6b932df86204
> --- /dev/null
> +++ b/lib/arm/asm/assembler.h
> @@ -0,0 +1,44 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Based on several files from Linux version v5.10: 
> arch/arm/mm/proc-macros.S,
> + * arch/arm/mm/proc-v7.S.
> + */
> +
> +/*
> + * dcache_line_size - get the minimum D-cache line size from the CTR register
`> + * on ARMv7.
> + */
> +     .macro  dcache_line_size, reg, tmp
> +     mrc     p15, 0, \tmp, c0, c0, 1         // read ctr
> +     lsr     \tmp, \tmp, #16
> +     and     \tmp, \tmp, #0xf                // cache line size encoding
> +     mov     \reg, #4                        // bytes per word
> +     mov     \reg, \reg, lsl \tmp            // actual cache line size
> +     .endm
> +
> +/*
> + * Macro to perform a data cache maintenance for the interval
> + * [addr, addr + size).
> + *
> + *   op:             operation to execute
> + *   domain          domain used in the dsb instruction
> + *   addr:           starting virtual address of the region
> + *   size:           size of the region
> + *   Corrupts:       addr, size, tmp1, tmp2
> + */
> +     .macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2
> +     dcache_line_size \tmp1, \tmp2
> +     add     \size, \addr, \size
> +     sub     \tmp2, \tmp1, #1
> +     bic     \addr, \addr, \tmp2

Just a nit, but since my brain was in assembly land: We could skip tmp2,
by adding back #1 to tmp1 after the bic.
Same for the arm64 code.

> +9998:
> +     .ifc    \op, dccimvac
> +     mcr     p15, 0, \addr, c7, c14, 1
> +     .else
> +     .err
> +     .endif
> +     add     \addr, \addr, \tmp1
> +     cmp     \addr, \size
> +     blo     9998b
> +     dsb     \domain
> +     .endm
> diff --git a/lib/arm/asm/processor.h b/lib/arm/asm/processor.h
> index 273366d1fe1c..3c36eac903f0 100644
> --- a/lib/arm/asm/processor.h
> +++ b/lib/arm/asm/processor.h
> @@ -9,11 +9,6 @@
>  #include <asm/sysreg.h>
>  #include <asm/barrier.h>

Do we want the same protection against inclusion from C here as in the
arm64 version?

> -#define CTR_DMINLINE_SHIFT   16
> -#define CTR_DMINLINE_MASK    (0xf << 16)
> -#define CTR_DMINLINE(x)      \
> -     (((x) & CTR_DMINLINE_MASK) >> CTR_DMINLINE_SHIFT)
> -
>  enum vector {
>       EXCPTN_RST,
>       EXCPTN_UND,
> @@ -89,6 +84,4 @@ static inline u32 get_ctr(void)
>       return read_sysreg(CTR);
>  }
>  
> -extern unsigned long dcache_line_size;
> -
>  #endif /* _ASMARM_PROCESSOR_H_ */
> diff --git a/lib/arm64/asm/assembler.h b/lib/arm64/asm/assembler.h
> new file mode 100644
> index 000000000000..f801c0c43d02
> --- /dev/null
> +++ b/lib/arm64/asm/assembler.h
> @@ -0,0 +1,53 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Based on the file arch/arm64/include/asm/assembled.h from Linux v5.10, 
> which
> + * in turn is based on arch/arm/include/asm/assembler.h and
> + * arch/arm/mm/proc-macros.S
> + *
> + * Copyright (C) 1996-2000 Russell King
> + * Copyright (C) 2012 ARM Ltd.
> + */
> +#ifndef __ASSEMBLY__
> +#error "Only include this from assembly code"
> +#endif
> +
> +#ifndef __ASM_ASSEMBLER_H
> +#define __ASM_ASSEMBLER_H
> +
> +/*
> + * raw_dcache_line_size - get the minimum D-cache line size on this CPU
> + * from the CTR register.
> + */
> +     .macro  raw_dcache_line_size, reg, tmp
> +     mrs     \tmp, ctr_el0                   // read CTR
> +     ubfm    \tmp, \tmp, #16, #19            // cache line size encoding

this encoding of ubfm is supposed to be written as:
        ubfx \tmp, \tmp, #16, #4
This is also what objdump makes of the above.

The rest looks good, I convinced myself that the assembly algorithms are
correct.

Cheers,
Andre


> +     mov     \reg, #4                        // bytes per word
> +     lsl     \reg, \reg, \tmp                // actual cache line size
> +     .endm
> +
> +/*
> + * Macro to perform a data cache maintenance for the interval
> + * [addr, addr + size). Use the raw value for the dcache line size because
> + * kvm-unit-tests has no concept of scheduling.
> + *
> + *   op:             operation passed to dc instruction
> + *   domain:         domain used in dsb instruciton
> + *   addr:           starting virtual address of the region
> + *   size:           size of the region
> + *   Corrupts:       addr, size, tmp1, tmp2
> + */
> +
> +     .macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2
> +     raw_dcache_line_size \tmp1, \tmp2
> +     add     \size, \addr, \size
> +     sub     \tmp2, \tmp1, #1
> +     bic     \addr, \addr, \tmp2
> +9998:
> +     dc      \op, \addr
> +     add     \addr, \addr, \tmp1
> +     cmp     \addr, \size
> +     b.lo    9998b
> +     dsb     \domain
> +     .endm
> +
> +#endif       /* __ASM_ASSEMBLER_H */
> diff --git a/lib/arm64/asm/processor.h b/lib/arm64/asm/processor.h
> index 771b2d1e0c94..cdc2463e1981 100644
> --- a/lib/arm64/asm/processor.h
> +++ b/lib/arm64/asm/processor.h
> @@ -16,11 +16,6 @@
>  #define SCTLR_EL1_A  (1 << 1)
>  #define SCTLR_EL1_M  (1 << 0)
>  
> -#define CTR_DMINLINE_SHIFT   16
> -#define CTR_DMINLINE_MASK    (0xf << 16)
> -#define CTR_DMINLINE(x)      \
> -     (((x) & CTR_DMINLINE_MASK) >> CTR_DMINLINE_SHIFT)
> -
>  #ifndef __ASSEMBLY__
>  #include <asm/ptrace.h>
>  #include <asm/esr.h>
> @@ -115,8 +110,6 @@ static inline u64 get_ctr(void)
>       return read_sysreg(ctr_el0);
>  }
>  
> -extern unsigned long dcache_line_size;
> -
>  static inline unsigned long get_id_aa64mmfr0_el1(void)
>  {
>       return read_sysreg(id_aa64mmfr0_el1);
> diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> index 066524f8bf61..751ba980000a 100644
> --- a/lib/arm/setup.c
> +++ b/lib/arm/setup.c
> @@ -42,8 +42,6 @@ static struct mem_region 
> __initial_mem_regions[NR_INITIAL_MEM_REGIONS + 1];
>  struct mem_region *mem_regions = __initial_mem_regions;
>  phys_addr_t __phys_offset, __phys_end;
>  
> -unsigned long dcache_line_size;
> -
>  int mpidr_to_cpu(uint64_t mpidr)
>  {
>       int i;
> @@ -72,11 +70,6 @@ static void cpu_init(void)
>       ret = dt_for_each_cpu_node(cpu_set, NULL);
>       assert(ret == 0);
>       set_cpu_online(0, true);
> -     /*
> -      * DminLine is log2 of the number of words in the smallest cache line; a
> -      * word is 4 bytes.
> -      */
> -     dcache_line_size = 1 << (CTR_DMINLINE(get_ctr()) + 2);
>  }
>  
>  unsigned int mem_region_get_flags(phys_addr_t paddr)
> diff --git a/arm/cstart.S b/arm/cstart.S
> index ef936ae2f874..954748b00f64 100644
> --- a/arm/cstart.S
> +++ b/arm/cstart.S
> @@ -7,6 +7,7 @@
>   */
>  #define __ASSEMBLY__
>  #include <auxinfo.h>
> +#include <asm/assembler.h>
>  #include <asm/thread_info.h>
>  #include <asm/asm-offsets.h>
>  #include <asm/pgtable-hwdef.h>
> @@ -197,20 +198,6 @@ asm_mmu_enable:
>  
>       mov     pc, lr
>  
> -.macro dcache_clean_inval domain, start, end, tmp1, tmp2
> -     ldr     \tmp1, =dcache_line_size
> -     ldr     \tmp1, [\tmp1]
> -     sub     \tmp2, \tmp1, #1
> -     bic     \start, \start, \tmp2
> -9998:
> -     /* DCCIMVAC */
> -     mcr     p15, 0, \start, c7, c14, 1
> -     add     \start, \start, \tmp1
> -     cmp     \start, \end
> -     blo     9998b
> -     dsb     \domain
> -.endm
> -
>  .globl asm_mmu_disable
>  asm_mmu_disable:
>       /* SCTLR */
> @@ -223,7 +210,8 @@ asm_mmu_disable:
>       ldr     r0, [r0]
>       ldr     r1, =__phys_end
>       ldr     r1, [r1]
> -     dcache_clean_inval sy, r0, r1, r2, r3
> +     sub     r1, r1, r0
> +     dcache_by_line_op dccimvac, sy, r0, r1, r2, r3
>       isb
>  
>       mov     pc, lr
> diff --git a/arm/cstart64.S b/arm/cstart64.S
> index fc1930bcdb53..046bd3914098 100644
> --- a/arm/cstart64.S
> +++ b/arm/cstart64.S
> @@ -8,6 +8,7 @@
>  #define __ASSEMBLY__
>  #include <auxinfo.h>
>  #include <asm/asm-offsets.h>
> +#include <asm/assembler.h>
>  #include <asm/ptrace.h>
>  #include <asm/processor.h>
>  #include <asm/page.h>
> @@ -204,20 +205,6 @@ asm_mmu_enable:
>  
>       ret
>  
> -/* Taken with small changes from arch/arm64/incluse/asm/assembler.h */
> -.macro dcache_by_line_op op, domain, start, end, tmp1, tmp2
> -     adrp    \tmp1, dcache_line_size
> -     ldr     \tmp1, [\tmp1, :lo12:dcache_line_size]
> -     sub     \tmp2, \tmp1, #1
> -     bic     \start, \start, \tmp2
> -9998:
> -     dc      \op , \start
> -     add     \start, \start, \tmp1
> -     cmp     \start, \end
> -     b.lo    9998b
> -     dsb     \domain
> -.endm
> -
>  .globl asm_mmu_disable
>  asm_mmu_disable:
>       mrs     x0, sctlr_el1
> @@ -230,6 +217,7 @@ asm_mmu_disable:
>       ldr     x0, [x0, :lo12:__phys_offset]
>       adrp    x1, __phys_end
>       ldr     x1, [x1, :lo12:__phys_end]
> +     sub     x1, x1, x0
>       dcache_by_line_op civac, sy, x0, x1, x2, x3
>       isb
>  

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to