Re: [Qemu-devel] [PULL 12/51] target-arm: A64: Implement DC ZVA

2014-10-08 Thread Christopher Covington
Hi Peter,

On 04/17/2014 06:33 AM, Peter Maydell wrote:
 Implement the DC ZVA instruction, which clears a block of memory.
 The fast path obtains a pointer to the underlying RAM via the TCG TLB
 data structure so we can do a direct memset(), with fallback to a
 simple byte-store loop in the slow path.

 diff --git a/target-arm/helper.c b/target-arm/helper.c
 index 62f7fd3..2ffc588 100644
 --- a/target-arm/helper.c
 +++ b/target-arm/helper.c

 +static uint64_t aa64_dczid_read(CPUARMState *env, const ARMCPRegInfo *ri)
 +{
 +ARMCPU *cpu = arm_env_get_cpu(env);
 +int dzp_bit = 1  4;
 +
 +/* DZP indicates whether DC ZVA access is allowed */
 +if (aa64_zva_access(env, NULL) != CP_ACCESS_OK) {

I believe this logic for the Data Zero Prohibited field is inverted, causing
eglibc to use STP rather than DC ZVA for __memset.

 +dzp_bit = 0;
 +}
 +return cpu-dcz_blocksize | dzp_bit;
 +}

Thanks,
Christopher

-- 
Employee of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by the Linux Foundation.



[Qemu-devel] [PULL 12/51] target-arm: A64: Implement DC ZVA

2014-04-17 Thread Peter Maydell
Implement the DC ZVA instruction, which clears a block of memory.
The fast path obtains a pointer to the underlying RAM via the TCG TLB
data structure so we can do a direct memset(), with fallback to a
simple byte-store loop in the slow path.

Signed-off-by: Peter Maydell peter.mayd...@linaro.org
Reviewed-by: Richard Henderson r...@twiddle.net
Acked-by: Peter Crosthwaite peter.crosthwa...@xilinx.com
---
 include/exec/softmmu_exec.h |  52 +++
 target-arm/cpu-qom.h|   2 +
 target-arm/cpu.h|   3 +-
 target-arm/cpu64.c  |   1 +
 target-arm/helper.c | 122 ++--
 target-arm/helper.h |   1 +
 target-arm/translate-a64.c  |   5 ++
 7 files changed, 180 insertions(+), 6 deletions(-)

diff --git a/include/exec/softmmu_exec.h b/include/exec/softmmu_exec.h
index 6fde154..470db20 100644
--- a/include/exec/softmmu_exec.h
+++ b/include/exec/softmmu_exec.h
@@ -162,3 +162,55 @@
 #define stw(p, v) stw_data(p, v)
 #define stl(p, v) stl_data(p, v)
 #define stq(p, v) stq_data(p, v)
+
+/**
+ * tlb_vaddr_to_host:
+ * @env: CPUArchState
+ * @addr: guest virtual address to look up
+ * @access_type: 0 for read, 1 for write, 2 for execute
+ * @mmu_idx: MMU index to use for lookup
+ *
+ * Look up the specified guest virtual index in the TCG softmmu TLB.
+ * If the TLB contains a host virtual address suitable for direct RAM
+ * access, then return it. Otherwise (TLB miss, TLB entry is for an
+ * I/O access, etc) return NULL.
+ *
+ * This is the equivalent of the initial fast-path code used by
+ * TCG backends for guest load and store accesses.
+ */
+static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
+  int access_type, int mmu_idx)
+{
+int index = (addr  TARGET_PAGE_BITS)  (CPU_TLB_SIZE - 1);
+CPUTLBEntry *tlbentry = env-tlb_table[mmu_idx][index];
+target_ulong tlb_addr;
+uintptr_t haddr;
+
+switch (access_type) {
+case 0:
+tlb_addr = tlbentry-addr_read;
+break;
+case 1:
+tlb_addr = tlbentry-addr_write;
+break;
+case 2:
+tlb_addr = tlbentry-addr_code;
+break;
+default:
+g_assert_not_reached();
+}
+
+if ((addr  TARGET_PAGE_MASK)
+!= (tlb_addr  (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+/* TLB entry is for a different page */
+return NULL;
+}
+
+if (tlb_addr  ~TARGET_PAGE_MASK) {
+/* IO access */
+return NULL;
+}
+
+haddr = addr + env-tlb_table[mmu_idx][index].addend;
+return (void *)haddr;
+}
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 00234e1..41caa6c 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -150,6 +150,8 @@ typedef struct ARMCPU {
 uint32_t reset_cbar;
 uint32_t reset_auxcr;
 bool reset_hivecs;
+/* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */
+uint32_t dcz_blocksize;
 } ARMCPU;
 
 #define TYPE_AARCH64_CPU aarch64-cpu
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index ff56519..a00ff73 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -758,7 +758,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
 #define ARM_CP_WFI (ARM_CP_SPECIAL | (2  8))
 #define ARM_CP_NZCV (ARM_CP_SPECIAL | (3  8))
 #define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | (4  8))
-#define ARM_LAST_SPECIAL ARM_CP_CURRENTEL
+#define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | (5  8))
+#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA
 /* Used only as a terminator for ARMCPRegInfo lists */
 #define ARM_CP_SENTINEL 0x
 /* Mask of only the flag bits in a type field */
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 8426bf1..fccecc2 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -46,6 +46,7 @@ static void aarch64_any_initfn(Object *obj)
 set_feature(cpu-env, ARM_FEATURE_V7MP);
 set_feature(cpu-env, ARM_FEATURE_AARCH64);
 cpu-ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
+cpu-dcz_blocksize = 7; /*  512 bytes */
 }
 #endif
 
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 62f7fd3..2ffc588 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -10,6 +10,8 @@
 #include zlib.h /* For crc32 */
 
 #ifndef CONFIG_USER_ONLY
+#include exec/softmmu_exec.h
+
 static inline int get_phys_addr(CPUARMState *env, target_ulong address,
 int access_type, int is_user,
 hwaddr *phys_ptr, int *prot,
@@ -1745,6 +1747,29 @@ static void tlbi_aa64_asid_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 tlb_flush(CPU(cpu), asid == 0);
 }
 
+static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+/* We don't implement EL2, so the only control on DC ZVA is the
+ * bit in the SCTLR which can prohibit access for EL0.
+ */
+if (arm_current_pl(env) == 0  !(env-cp15.c1_sys  SCTLR_DZE)) {
+return CP_ACCESS_TRAP;
+}
+