[PATCH v3 3/5] KVM: selftests: Move GUEST_ASSERT_EQ to utils header

2021-05-12 Thread Ricardo Koller
Move GUEST_ASSERT_EQ to a common header, kvm_util.h, for other
architectures and tests to use. Also modify __GUEST_ASSERT so it can be
reused to implement GUEST_ASSERT_EQ.

Signed-off-by: Ricardo Koller 
---
 .../testing/selftests/kvm/include/kvm_util.h  | 22 ++-
 .../selftests/kvm/x86_64/tsc_msrs_test.c  |  9 
 2 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h 
b/tools/testing/selftests/kvm/include/kvm_util.h
index 7880929ea548..fb2b8964f2ca 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -366,26 +366,28 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, 
struct ucall *uc);
ucall(UCALL_SYNC, 6, "hello", stage, arg1, 
arg2, arg3, arg4)
 #define GUEST_SYNC(stage)  ucall(UCALL_SYNC, 2, "hello", stage)
 #define GUEST_DONE()   ucall(UCALL_DONE, 0)
-#define __GUEST_ASSERT(_condition, _nargs, _args...) do {  \
-   if (!(_condition))  \
-   ucall(UCALL_ABORT, 2 + _nargs,  \
-   "Failed guest assert: " \
-   #_condition, __LINE__, _args);  \
+#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do {\
+   if (!(_condition))  \
+   ucall(UCALL_ABORT, 2 + _nargs,  \
+   "Failed guest assert: " \
+   _condstr, __LINE__, _args); \
 } while (0)
 
 #define GUEST_ASSERT(_condition) \
-   __GUEST_ASSERT((_condition), 0, 0)
+   __GUEST_ASSERT(_condition, #_condition, 0, 0)
 
 #define GUEST_ASSERT_1(_condition, arg1) \
-   __GUEST_ASSERT((_condition), 1, (arg1))
+   __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
 
 #define GUEST_ASSERT_2(_condition, arg1, arg2) \
-   __GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+   __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
 
 #define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
-   __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+   __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
 
 #define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
-   __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+   __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), 
(arg4))
+
+#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
 
 #endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c 
b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
index e357d8e222d4..5a6a662f2e59 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -18,15 +18,6 @@
 #define rounded_rdmsr(x)   ROUND(rdmsr(x))
 #define rounded_host_rdmsr(x)  ROUND(vcpu_get_msr(vm, 0, x))
 
-#define GUEST_ASSERT_EQ(a, b) do { \
-   __typeof(a) _a = (a);   \
-   __typeof(b) _b = (b);   \
-   if (_a != _b)   \
-ucall(UCALL_ABORT, 4,  \
-"Failed guest assert: "\
-#a " == " #b, __LINE__, _a, _b);   \
-  } while(0)
-
 static void guest_code(void)
 {
u64 val = 0;
-- 
2.31.1.607.g51e8a6a459-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 5/5] KVM: selftests: Add aarch64/debug-exceptions test

2021-05-12 Thread Ricardo Koller
Covers fundamental tests for debug exceptions. The guest installs and
handle its debug exceptions itself, without KVM_SET_GUEST_DEBUG.

Reviewed-by: Andrew Jones 
Signed-off-by: Ricardo Koller 
---
 tools/testing/selftests/kvm/.gitignore|   1 +
 tools/testing/selftests/kvm/Makefile  |   1 +
 .../selftests/kvm/aarch64/debug-exceptions.c  | 250 ++
 .../selftests/kvm/include/aarch64/processor.h |  22 +-
 4 files changed, 268 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/aarch64/debug-exceptions.c

diff --git a/tools/testing/selftests/kvm/.gitignore 
b/tools/testing/selftests/kvm/.gitignore
index e65d5572aefc..f09ed908422b 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+/aarch64/debug-exceptions
 /aarch64/get-reg-list
 /aarch64/get-reg-list-sve
 /aarch64/vgic_init
diff --git a/tools/testing/selftests/kvm/Makefile 
b/tools/testing/selftests/kvm/Makefile
index 618c5903f478..2f92442c0cc9 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -73,6 +73,7 @@ TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 
+TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c 
b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
new file mode 100644
index ..51c42ac24dca
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+
+#define VCPU_ID 0
+
+#define MDSCR_KDE  (1 << 13)
+#define MDSCR_MDE  (1 << 15)
+#define MDSCR_SS   (1 << 0)
+
+#define DBGBCR_LEN8(0xff << 5)
+#define DBGBCR_EXEC(0x0 << 3)
+#define DBGBCR_EL1 (0x1 << 1)
+#define DBGBCR_E   (0x1 << 0)
+
+#define DBGWCR_LEN8(0xff << 5)
+#define DBGWCR_RD  (0x1 << 3)
+#define DBGWCR_WR  (0x2 << 3)
+#define DBGWCR_EL1 (0x1 << 1)
+#define DBGWCR_E   (0x1 << 0)
+
+#define SPSR_D (1 << 9)
+#define SPSR_SS(1 << 21)
+
+extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start;
+static volatile uint64_t sw_bp_addr, hw_bp_addr;
+static volatile uint64_t wp_addr, wp_data_addr;
+static volatile uint64_t svc_addr;
+static volatile uint64_t ss_addr[4], ss_idx;
+#define  PC(v)  ((uint64_t)&(v))
+
+static void reset_debug_state(void)
+{
+   asm volatile("msr daifset, #8");
+
+   write_sysreg(osdlr_el1, 0);
+   write_sysreg(oslar_el1, 0);
+   isb();
+
+   write_sysreg(mdscr_el1, 0);
+   /* This test only uses the first bp and wp slot. */
+   write_sysreg(dbgbvr0_el1, 0);
+   write_sysreg(dbgbcr0_el1, 0);
+   write_sysreg(dbgwcr0_el1, 0);
+   write_sysreg(dbgwvr0_el1, 0);
+   isb();
+}
+
+static void install_wp(uint64_t addr)
+{
+   uint32_t wcr;
+   uint32_t mdscr;
+
+   wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
+   write_sysreg(dbgwcr0_el1, wcr);
+   write_sysreg(dbgwvr0_el1, addr);
+   isb();
+
+   asm volatile("msr daifclr, #8");
+
+   mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+   write_sysreg(mdscr_el1, mdscr);
+   isb();
+}
+
+static void install_hw_bp(uint64_t addr)
+{
+   uint32_t bcr;
+   uint32_t mdscr;
+
+   bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
+   write_sysreg(dbgbcr0_el1, bcr);
+   write_sysreg(dbgbvr0_el1, addr);
+   isb();
+
+   asm volatile("msr daifclr, #8");
+
+   mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+   write_sysreg(mdscr_el1, mdscr);
+   isb();
+}
+
+static void install_ss(void)
+{
+   uint32_t mdscr;
+
+   asm volatile("msr daifclr, #8");
+
+   mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
+   write_sysreg(mdscr_el1, mdscr);
+   isb();
+}
+
+static volatile char write_data;
+
+static void guest_code(void)
+{
+   GUEST_SYNC(0);
+
+   /* Software-breakpoint */
+   asm volatile("sw_bp: brk #0");
+   GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
+
+   GUEST_SYNC(1);
+
+   /* Hardware-breakpoint */
+   reset_debug_state();
+   install_hw_bp(PC(hw_bp));
+   asm volatile("hw_bp: nop");
+   GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
+
+   GUEST_SYNC(2);
+
+   /* Hardware-breakpoint + svc */
+   reset_debug_state();
+   install_hw_bp(PC(bp_svc));
+   asm volatile("bp_svc: svc #0");
+   GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
+   GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
+
+   GUEST_SYNC(3);
+
+   /* Hardware-breakpoint + software-breakpoint */
+   reset_debug_state();
+  

[PATCH v3 1/5] KVM: selftests: Rename vm_handle_exception

2021-05-12 Thread Ricardo Koller
Rename the vm_handle_exception function to a name that indicates more
clearly that it installs something: vm_install_vector_handler.

Reviewed-by: Andrew Jones 
Reviewed-by: Eric Auger 
Suggested-by: Marc Zyngier 
Suggested-by: Andrew Jones 
Signed-off-by: Ricardo Koller 
---
 tools/testing/selftests/kvm/include/x86_64/processor.h| 2 +-
 tools/testing/selftests/kvm/lib/x86_64/processor.c| 4 ++--
 tools/testing/selftests/kvm/x86_64/kvm_pv_test.c  | 2 +-
 .../selftests/kvm/x86_64/userspace_msr_exit_test.c| 8 
 tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c   | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h 
b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 0b30b4e15c38..12889d3e8948 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -391,7 +391,7 @@ struct ex_regs {
 
 void vm_init_descriptor_tables(struct kvm_vm *vm);
 void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
-void vm_handle_exception(struct kvm_vm *vm, int vector,
+void vm_install_vector_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *));
 
 /*
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c 
b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index a8906e60a108..e156061263a6 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1250,8 +1250,8 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, 
uint32_t vcpuid)
*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(_handlers)) = 
vm->handlers;
 }
 
-void vm_handle_exception(struct kvm_vm *vm, int vector,
-void (*handler)(struct ex_regs *))
+void vm_install_vector_handler(struct kvm_vm *vm, int vector,
+  void (*handler)(struct ex_regs *))
 {
vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
 
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c 
b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index 732b244d6956..5ae5f748723a 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -227,7 +227,7 @@ int main(void)
 
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
-   vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+   vm_install_vector_handler(vm, GP_VECTOR, guest_gp_handler);
 
enter_guest(vm);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c 
b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
index 72c0d0797522..20c373e2d329 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -574,7 +574,7 @@ static void test_msr_filter_allow(void) {
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
 
-   vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+   vm_install_vector_handler(vm, GP_VECTOR, guest_gp_handler);
 
/* Process guest code userspace exits. */
run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
@@ -588,12 +588,12 @@ static void test_msr_filter_allow(void) {
run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
 
-   vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+   vm_install_vector_handler(vm, UD_VECTOR, guest_ud_handler);
run_guest(vm);
-   vm_handle_exception(vm, UD_VECTOR, NULL);
+   vm_install_vector_handler(vm, UD_VECTOR, NULL);
 
if (process_ucall(vm) != UCALL_DONE) {
-   vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler);
+   vm_install_vector_handler(vm, GP_VECTOR, guest_fep_gp_handler);
 
/* Process emulated rdmsr and wrmsr instructions. */
run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c 
b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index 2f964cdc273c..ded70ff465d5 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -462,7 +462,7 @@ int main(int argc, char *argv[])
 
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
-   vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler);
+   vm_install_vector_handler(vm, IPI_VECTOR, guest_ipi_handler);
 
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0);
 
-- 
2.31.1.607.g51e8a6a459-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 4/5] KVM: selftests: Add exception handling support for aarch64

2021-05-12 Thread Ricardo Koller
Add the infrastructure needed to enable exception handling in aarch64
selftests. The exception handling defaults to an unhandled-exception
handler which aborts the test, just like x86. These handlers can be
overridden by calling vm_install_vector_handler(vector) or
vm_install_exception_handler(vector, ec). The unhandled exception
reporting from the guest is done using the ucall type introduced in a
previous commit, UCALL_UNHANDLED.

The exception handling code is heavily inspired on kvm-unit-tests.

Signed-off-by: Ricardo Koller 
---
 tools/testing/selftests/kvm/Makefile  |   2 +-
 .../selftests/kvm/include/aarch64/processor.h |  63 +
 .../selftests/kvm/lib/aarch64/handlers.S  | 124 +
 .../selftests/kvm/lib/aarch64/processor.c | 131 ++
 4 files changed, 319 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/kvm/lib/aarch64/handlers.S

diff --git a/tools/testing/selftests/kvm/Makefile 
b/tools/testing/selftests/kvm/Makefile
index 4e548d7ab0ab..618c5903f478 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -35,7 +35,7 @@ endif
 
 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c 
lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c 
lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
+LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c 
lib/aarch64/handlers.S
 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c 
lib/s390x/diag318_test_handler.c
 
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h 
b/tools/testing/selftests/kvm/include/aarch64/processor.h
index b7fa0c8551db..bc81cd62254f 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -8,6 +8,7 @@
 #define SELFTEST_KVM_PROCESSOR_H
 
 #include "kvm_util.h"
+#include 
 
 
 #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
@@ -18,6 +19,7 @@
 #define MAIR_EL1   3, 0, 10, 2, 0
 #define TTBR0_EL1  3, 0,  2, 0, 0
 #define SCTLR_EL1  3, 0,  1, 0, 0
+#define VBAR_EL1   3, 0, 12, 0, 0
 
 /*
  * Default MAIR
@@ -56,4 +58,65 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, 
struct kvm_vcpu_init *ini
 void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
  struct kvm_vcpu_init *init, void *guest_code);
 
+struct ex_regs {
+   u64 regs[31];
+   u64 sp;
+   u64 pc;
+   u64 pstate;
+};
+
+#define VECTOR_NUM 16
+
+enum {
+   VECTOR_SYNC_CURRENT_SP0,
+   VECTOR_IRQ_CURRENT_SP0,
+   VECTOR_FIQ_CURRENT_SP0,
+   VECTOR_ERROR_CURRENT_SP0,
+
+   VECTOR_SYNC_CURRENT,
+   VECTOR_IRQ_CURRENT,
+   VECTOR_FIQ_CURRENT,
+   VECTOR_ERROR_CURRENT,
+
+   VECTOR_SYNC_LOWER_64,
+   VECTOR_IRQ_LOWER_64,
+   VECTOR_FIQ_LOWER_64,
+   VECTOR_ERROR_LOWER_64,
+
+   VECTOR_SYNC_LOWER_32,
+   VECTOR_IRQ_LOWER_32,
+   VECTOR_FIQ_LOWER_32,
+   VECTOR_ERROR_LOWER_32,
+};
+
+#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
+  (v) == VECTOR_SYNC_CURRENT || \
+  (v) == VECTOR_SYNC_LOWER_64|| \
+  (v) == VECTOR_SYNC_LOWER_32)
+
+#define ESR_EC_NUM 64
+#define ESR_EC_SHIFT   26
+#define ESR_EC_MASK(ESR_EC_NUM - 1)
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
+
+typedef void(*handler_fn)(struct ex_regs *);
+void vm_install_exception_handler(struct kvm_vm *vm,
+   int vector, int ec, handler_fn handler);
+void vm_install_vector_handler(struct kvm_vm *vm,
+   int vector, handler_fn handler);
+
+#define write_sysreg(reg, val)   \
+({   \
+   u64 __val = (u64)(val);   \
+   asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val));   \
+})
+
+#define read_sysreg(reg) \
+({ u64 val;  \
+   asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\
+   val;  \
+})
+
 #endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S 
b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
new file mode 100644
index ..49bf8827c6ab
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.macro save_registers
+   add sp, sp, #-16 * 17
+

[PATCH v3 2/5] KVM: selftests: Introduce UCALL_UNHANDLED for unhandled vector reporting

2021-05-12 Thread Ricardo Koller
x86, the only arch implementing exception handling, reports unhandled
vectors using port IO at a specific port number. This replicates what
ucall already does.

Introduce a new ucall type, UCALL_UNHANDLED, for guests to report
unhandled exceptions. Then replace the x86 unhandled vector exception
reporting to use it instead of port IO.  This new ucall type will be
used in the next commits by arm64 to report unhandled vectors as well.

Tested: Forcing a page fault in the ./x86_64/xapic_ipi_test
halter_guest_code() shows this:

$ ./x86_64/xapic_ipi_test
...
  Unexpected vectored event in guest (vector:0xe)

Reviewed-by: Eric Auger 
Reviewed-by: Andrew Jones 
Signed-off-by: Ricardo Koller 
---
 tools/testing/selftests/kvm/include/kvm_util.h |  1 +
 .../selftests/kvm/include/x86_64/processor.h   |  2 --
 .../selftests/kvm/lib/x86_64/processor.c   | 18 +++---
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h 
b/tools/testing/selftests/kvm/include/kvm_util.h
index bea4644d645d..7880929ea548 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -347,6 +347,7 @@ enum {
UCALL_SYNC,
UCALL_ABORT,
UCALL_DONE,
+   UCALL_UNHANDLED,
 };
 
 #define UCALL_MAX_ARGS 6
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h 
b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 12889d3e8948..ff4da2f95b13 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -53,8 +53,6 @@
 #define CPUID_PKU  (1ul << 3)
 #define CPUID_LA57 (1ul << 16)
 
-#define UNEXPECTED_VECTOR_PORT 0xfff0u
-
 /* General Registers in 64-Bit Mode */
 struct gpr64_regs {
u64 rax;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c 
b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index e156061263a6..814bb695d803 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1207,7 +1207,7 @@ static void set_idt_entry(struct kvm_vm *vm, int vector, 
unsigned long addr,
 
 void kvm_exit_unexpected_vector(uint32_t value)
 {
-   outl(UNEXPECTED_VECTOR_PORT, value);
+   ucall(UCALL_UNHANDLED, 1, value);
 }
 
 void route_exception(struct ex_regs *regs)
@@ -1260,16 +1260,12 @@ void vm_install_vector_handler(struct kvm_vm *vm, int 
vector,
 
 void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
 {
-   if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
-   && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
-   && vcpu_state(vm, vcpuid)->io.size == 4) {
-   /* Grab pointer to io data */
-   uint32_t *data = (void *)vcpu_state(vm, vcpuid)
-   + vcpu_state(vm, vcpuid)->io.data_offset;
-
-   TEST_ASSERT(false,
-   "Unexpected vectored event in guest (vector:0x%x)",
-   *data);
+   struct ucall uc;
+
+   if (get_ucall(vm, vcpuid, ) == UCALL_UNHANDLED) {
+   uint64_t vector = uc.args[0];
+   TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
+ vector);
}
 }
 
-- 
2.31.1.607.g51e8a6a459-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 0/5] KVM: selftests: arm64 exception handling and debug test

2021-05-12 Thread Ricardo Koller
Hi,

These patches add a debug exception test in aarch64 KVM selftests while
also adding basic exception handling support.

The structure of the exception handling is based on its x86 counterpart.
Tests use the same calls to initialize exception handling and both
architectures allow tests to override the handler for a particular
vector, or (vector, ec) for synchronous exceptions in the arm64 case.

The debug test is similar to x86_64/debug_regs, except that the x86 one
controls the debugging from outside the VM. This proposed arm64 test
controls and handles debug exceptions from the inside.

Thanks,
Ricardo

v2 -> v3:

Addressed comments from Andrew and Marc (thanks again). Also, many thanks for
the reviews and tests from Eric and Zenghui.
- add missing ISBs after writing into debug registers.
- not store/restore of sp_el0 on exceptions.
- add default handlers for Error and FIQ.
- change multiple TEST_ASSERT(false, ...) to TEST_FAIL.
- use Andrew's suggestion regarding __GUEST_ASSERT modifications
  in order to easier implement GUEST_ASSERT_EQ (Thanks Andrew).

v1 -> v2:

Addressed comments from Andrew and Marc (thank you very much):
- rename vm_handle_exception in all tests.
- introduce UCALL_UNHANDLED in x86 first.
- move GUEST_ASSERT_EQ to common utils header.
- handle sync and other exceptions separately: use two tables (like
  kvm-unit-tests).
- add two separate functions for installing sync versus other exceptions
- changes in handlers.S: use the same layout as user_pt_regs, treat the
  EL1t vectors as invalid, refactor the vector table creation to not use
  manual numbering, add comments, remove LR from the stored registers.
- changes in debug-exceptions.c: remove unused headers, use the common
  GUEST_ASSERT_EQ, use vcpu_run instead of _vcpu_run.
- changes in processor.h: write_sysreg with support for xzr, replace EL1
  with current in macro names, define ESR_EC_MASK as ESR_EC_NUM-1.

Ricardo Koller (5):
  KVM: selftests: Rename vm_handle_exception
  KVM: selftests: Introduce UCALL_UNHANDLED for unhandled vector
reporting
  KVM: selftests: Move GUEST_ASSERT_EQ to utils header
  KVM: selftests: Add exception handling support for aarch64
  KVM: selftests: Add aarch64/debug-exceptions test

 tools/testing/selftests/kvm/.gitignore|   1 +
 tools/testing/selftests/kvm/Makefile  |   3 +-
 .../selftests/kvm/aarch64/debug-exceptions.c  | 250 ++
 .../selftests/kvm/include/aarch64/processor.h |  83 +-
 .../testing/selftests/kvm/include/kvm_util.h  |  23 +-
 .../selftests/kvm/include/x86_64/processor.h  |   4 +-
 .../selftests/kvm/lib/aarch64/handlers.S  | 124 +
 .../selftests/kvm/lib/aarch64/processor.c | 131 +
 .../selftests/kvm/lib/x86_64/processor.c  |  22 +-
 .../selftests/kvm/x86_64/kvm_pv_test.c|   2 +-
 .../selftests/kvm/x86_64/tsc_msrs_test.c  |   9 -
 .../kvm/x86_64/userspace_msr_exit_test.c  |   8 +-
 .../selftests/kvm/x86_64/xapic_ipi_test.c |   2 +-
 13 files changed, 615 insertions(+), 47 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/aarch64/debug-exceptions.c
 create mode 100644 tools/testing/selftests/kvm/lib/aarch64/handlers.S

-- 
2.31.1.607.g51e8a6a459-goog

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 4/5] KVM: selftests: Add exception handling support for aarch64

2021-05-12 Thread Ricardo Koller
On Wed, May 12, 2021 at 05:18:42PM +0100, Marc Zyngier wrote:
> On 2021-05-12 17:03, Ricardo Koller wrote:
> > On Wed, May 12, 2021 at 02:43:28PM +0100, Marc Zyngier wrote:
> > > On 2021-05-12 13:59, Zenghui Yu wrote:
> > > > Hi Eric,
> > > >
> > > > On 2021/5/6 20:30, Auger Eric wrote:
> > > > > running the test on 5.12 I get
> > > > >
> > > > >  Test Assertion Failure 
> > > > >   aarch64/debug-exceptions.c:232: false
> > > > >   pid=6477 tid=6477 errno=4 - Interrupted system call
> > > > >  10x0040147b: main at debug-exceptions.c:230
> > > > >  20x03ff8aa60de3: ?? ??:0
> > > > >  30x00401517: _start at :?
> > > > >   Failed guest assert: hw_bp_addr == PC(hw_bp) at
> > > > > aarch64/debug-exceptions.c:105
> > > > >   values: 0, 0x401794
> > > >
> > > > FYI I can also reproduce it on my VHE box. And Drew's suggestion [*]
> > > > seemed to work for me. Is the ISB a requirement of architecture?
> > > 
> > > Very much so. Given that there is no context synchronisation (such as
> > > ERET or an interrupt) in this code, the CPU is perfectly allowed to
> > > delay the system register effect as long as it can.
> > > 
> > > M.
> > > --
> > > Jazz is not dead. It just smells funny...
> > 
> > Thank you very much Eric, Zenghui, Marc, and Andrew (for the ISB
> > suggestion)!
> > 
> > As per Zenghui test, will send a V3 that includes the missing ISBs.
> > Hopefully that will fix the issue for Eric as well. It's very
> > interesting that the CPU seems to _always_ reorder those instructions.
> 
> I suspect that because hitting the debug registers can be a costly
> operation (it mobilises a lot of resources in the CPU), there is
> a strong incentive to let it slide until there is an actual mandate
> to commit the resource.
> 
> It also means that SW can issue a bunch of these without too much
> overhead, and only pay the cost *once*.
> 
> Your N1 CPU seems to be less aggressive on this. Implement choice,
> I'd say (it probably is more aggressive than TX2 on other things).
> Also, QEMU will almost always hide these problems, due to the nature
> of TCG.
> 
> Thanks,
> 
>  M.
> -- 
> Jazz is not dead. It just smells funny...

Thank you, this is very informative.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 4/5] KVM: selftests: Add exception handling support for aarch64

2021-05-12 Thread Ricardo Koller
On Wed, May 12, 2021 at 10:52:09AM +0200, Auger Eric wrote:
> Hi,
> 
> On 5/12/21 10:33 AM, Marc Zyngier wrote:
> > On 2021-05-12 09:19, Auger Eric wrote:
> >> Hi Ricardo,
> >>
> >> On 5/12/21 9:27 AM, Ricardo Koller wrote:
> >>> On Fri, May 07, 2021 at 04:08:07PM +0200, Auger Eric wrote:
>  Hi Ricardo,
> 
>  On 5/6/21 9:14 PM, Ricardo Koller wrote:
> > On Thu, May 06, 2021 at 02:30:17PM +0200, Auger Eric wrote:
> >> Hi Ricardo,
> >>
> >
> > Hi Eric,
> >
> > Thank you very much for the test.
> >
> >> On 5/3/21 9:12 PM, Ricardo Koller wrote:
> >>> On Mon, May 03, 2021 at 11:32:39AM +0100, Marc Zyngier wrote:
>  On Sat, 01 May 2021 00:24:06 +0100,
>  Ricardo Koller  wrote:
> >
> > Add the infrastructure needed to enable exception handling in
> > aarch64
> > selftests. The exception handling defaults to an
> > unhandled-exception
> > handler which aborts the test, just like x86. These handlers
> > can be
> > overridden by calling vm_install_vector_handler(vector) or
> > vm_install_exception_handler(vector, ec). The unhandled exception
> > reporting from the guest is done using the ucall type
> > introduced in a
> > previous commit, UCALL_UNHANDLED.
> >
> > The exception handling code is heavily inspired on kvm-unit-tests.
> >>
> >> running the test on 5.12 I get
> >>
> >>>
> >>> Hi Eric,
> >>>
> >>> I'm able to reproduce the failure you are seeing on 5.6, specifically
> >>> with kernels older than this commit:
> >>>
> >>>   4942dc6638b0 KVM: arm64: Write arch.mdcr_el2 changes since last
> >>> vcpu_load on VHE
> >>>
> >>> but not yet on v5.12. Could you share the commit of the kernel you are
> >>> testing, please?
> >>
> >> my host is a 5.12 kernel (8404c9fbc84b)
> > 
> > Time to compare notes then. What HW are you using? Running VHE or not?
> VHE yes. Cavium Sabre system.
> 

On my side. VHE (v5.12) on both QEMU (5.2.0 cpu=max) and Ampere Altra.

> Thanks
> 
> Eric
> > 
> > Thanks,
> > 
> >     M.
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 4/5] KVM: selftests: Add exception handling support for aarch64

2021-05-12 Thread Ricardo Koller
On Wed, May 12, 2021 at 02:43:28PM +0100, Marc Zyngier wrote:
> On 2021-05-12 13:59, Zenghui Yu wrote:
> > Hi Eric,
> > 
> > On 2021/5/6 20:30, Auger Eric wrote:
> > > running the test on 5.12 I get
> > > 
> > >  Test Assertion Failure 
> > >   aarch64/debug-exceptions.c:232: false
> > >   pid=6477 tid=6477 errno=4 - Interrupted system call
> > >  10x0040147b: main at debug-exceptions.c:230
> > >  20x03ff8aa60de3: ?? ??:0
> > >  30x00401517: _start at :?
> > >   Failed guest assert: hw_bp_addr == PC(hw_bp) at
> > > aarch64/debug-exceptions.c:105
> > >   values: 0, 0x401794
> > 
> > FYI I can also reproduce it on my VHE box. And Drew's suggestion [*]
> > seemed to work for me. Is the ISB a requirement of architecture?
> 
> Very much so. Given that there is no context synchronisation (such as
> ERET or an interrupt) in this code, the CPU is perfectly allowed to
> delay the system register effect as long as it can.
> 
> M.
> -- 
> Jazz is not dead. It just smells funny...

Thank you very much Eric, Zenghui, Marc, and Andrew (for the ISB
suggestion)!

As per Zenghui test, will send a V3 that includes the missing ISBs.
Hopefully that will fix the issue for Eric as well. It's very
interesting that the CPU seems to _always_ reorder those instructions.

Thanks!
Ricardo
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 5.4 000/244] 5.4.119-rc1 review

2021-05-12 Thread Sasha Levin

On Wed, May 12, 2021 at 06:02:37PM +0100, Marc Zyngier wrote:

On Wed, 12 May 2021 18:00:16 +0100,
Alexandru Elisei  wrote:

I made this change to get it to build:

$ git diff
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index dd03d5e01a94..32564b017ba0 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -335,6 +335,7 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu,
int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 static inline void kvm_arm_init_debug(void) {}
+static inline void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}

which matches the stub for kvm_arm_init_debug(). I can spin a patch
out of it and send it for 5.4 and 4.19. Marc, what do you think?


Ideally, we'd drop the patch in its current form from 5.4 and 4.19,
and send an updated patch with this hunk to fix the 32bit build.


Yes please. I've dropped it from 5.4 and 4.19 and we can queue up a
fixed backport for the next round of releases.

--
Thanks,
Sasha
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 5.4 000/244] 5.4.119-rc1 review

2021-05-12 Thread Greg Kroah-Hartman
On Wed, May 12, 2021 at 06:02:37PM +0100, Marc Zyngier wrote:
> On Wed, 12 May 2021 18:00:16 +0100,
> Alexandru Elisei  wrote:
> > 
> > Hi Naresh,
> > 
> > Thank you for the report!
> > 
> > On 5/12/21 5:47 PM, Naresh Kamboju wrote:
> > > On Wed, 12 May 2021 at 20:22, Greg Kroah-Hartman
> > >  wrote:
> > >> This is the start of the stable review cycle for the 5.4.119 release.
> > >> There are 244 patches in this series, all will be posted as a response
> > >> to this one.  If anyone has any issues with these being applied, please
> > >> let me know.
> > >>
> > >> Responses should be made by Fri, 14 May 2021 14:47:09 +.
> > >> Anything received after that time might be too late.
> > >>
> > >> The whole patch series can be found in one patch at:
> > >> 
> > >> https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.119-rc1.gz
> > >> or in the git tree and branch at:
> > >> 
> > >> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> > >> linux-5.4.y
> > >> and the diffstat can be found below.
> > >>
> > >> thanks,
> > >>
> > >> greg k-h
> > > Build regression detected.
> > >
> > >> Alexandru Elisei 
> > >> KVM: arm64: Initialize VCPU mdcr_el2 before loading it
> > > stable rc 5.4 arm axm55xx_defconfig builds failed due to these
> > > warnings / errors.
> > >   - arm (axm55xx_defconfig) with gcc-8,9 and 10 failed
> > >
> > > arch/arm/kvm/../../../virt/kvm/arm/arm.c: In function 
> > > 'kvm_vcpu_first_run_init':
> > > arch/arm/kvm/../../../virt/kvm/arm/arm.c:582:2: error: implicit
> > > declaration of function 'kvm_arm_vcpu_init_debug'; did you mean
> > > 'kvm_arm_init_debug'? [-Werror=implicit-function-declaration]
> > >   kvm_arm_vcpu_init_debug(vcpu);
> > >   ^~~
> > >   kvm_arm_init_debug
> > > cc1: some warnings being treated as errors
> > 
> > This is my fault, in Linux v5.4 KVM for arm is still around, and
> > there's no prototype for the function when compiling for arm. I
> > suspect that's also the case for v4.19.
> > 
> > I made this change to get it to build:
> > 
> > $ git diff
> > diff --git a/arch/arm/include/asm/kvm_host.h 
> > b/arch/arm/include/asm/kvm_host.h
> > index dd03d5e01a94..32564b017ba0 100644
> > --- a/arch/arm/include/asm/kvm_host.h
> > +++ b/arch/arm/include/asm/kvm_host.h
> > @@ -335,6 +335,7 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu 
> > *vcpu,
> > int cpu) {}
> >  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> >  
> >  static inline void kvm_arm_init_debug(void) {}
> > +static inline void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu) {}
> >  static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
> >  static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
> >  static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
> > 
> > which matches the stub for kvm_arm_init_debug(). I can spin a patch
> > out of it and send it for 5.4 and 4.19. Marc, what do you think?
> 
> Ideally, we'd drop the patch in its current form from 5.4 and 4.19,
> and send an updated patch with this hunk to fix the 32bit build.
> 

That would be great.  Do you want to do it for this set of releases
(i.e. today/tomorrow) or have me just drop this now and someone will
send me an updated version later when they get a chance?

thanks,

greg k-h
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v11 2/6] arm64: kvm: Introduce MTE VM feature

2021-05-12 Thread Catalin Marinas
On Wed, May 12, 2021 at 04:46:48PM +0100, Steven Price wrote:
> On 10/05/2021 19:35, Catalin Marinas wrote:
> > On Fri, May 07, 2021 at 07:25:39PM +0100, Catalin Marinas wrote:
> > > On Thu, May 06, 2021 at 05:15:25PM +0100, Steven Price wrote:
> > > > On 04/05/2021 18:40, Catalin Marinas wrote:
> > > > > On Thu, Apr 29, 2021 at 05:06:41PM +0100, Steven Price wrote:
> > > > > > Given the changes to set_pte_at() which means that tags are 
> > > > > > restored from
> > > > > > swap even if !PROT_MTE, the only race I can see remaining is the 
> > > > > > creation of
> > > > > > new PROT_MTE mappings. As you mention an attempt to change mappings 
> > > > > > in the
> > > > > > VMM memory space should involve a mmu notifier call which I think 
> > > > > > serialises
> > > > > > this. So the remaining issue is doing this in a separate address 
> > > > > > space.
> > > > > > 
> > > > > > So I guess the potential problem is:
> > > > > > 
> > > > > >* allocate memory MAP_SHARED but !PROT_MTE
> > > > > >* fork()
> > > > > >* VM causes a fault in parent address space
> > > > > >* child does a mprotect(PROT_MTE)
> > > > > > 
> > > > > > With the last two potentially racing. Sadly I can't see a good way 
> > > > > > of
> > > > > > handling that.
[...]
> > Options:
> > 
> > 1. Change the mte_sync_tags() code path to set the flag after clearing
> > and avoid reading stale tags. We document that mprotect() on
> > MAP_SHARED may lead to tag loss. Maybe we can intercept this in the
> > arch code and return an error.
> 
> This is the best option I've come up with so far - but it's not a good
> one! We can replace the set_bit() with a test_and_set_bit() to catch the
> race after it has occurred - but I'm not sure what we can do about it
> then (we've already wiped the data). Returning an error doesn't seem
> particularly useful at that point, a message in dmesg is about the best
> I can come up with.

What I meant about intercepting is on something like
arch_validate_flags() to prevent VM_SHARED and VM_MTE together but only
for mprotect(), not mmap(). However, arch_validate_flags() is currently
called on both mmap() and mprotect() paths.

We can't do much in set_pte_at() to prevent the race with only a single
bit.

> > 2. Figure out some other locking in the core code. However, if
> > mprotect() in one process can race with a handle_pte_fault() in
> > another, on the same shared mapping, it's not trivial.
> > filemap_map_pages() would take the page lock before calling
> > do_set_pte(), so mprotect() would need the same page lock.
> 
> I can't see how this is going to work without harming the performance of
> non-MTE work. Ultimately we're trying to add some sort of locking for
> two (mostly) unrelated processes doing page table operations, which will
> hurt scalability.

Another option is to have an arch callback to force re-faulting on the
pte. That means we don't populate it back after the invalidation in the
change_protection() path. We could do this only if the new pte is tagged
and the page doesn't have PG_mte_tagged. The faulting path takes the
page lock IIUC.

Well, at least for stage 1, I haven't thought much about stage 2.

> > 3. Use another PG_arch_3 bit as a lock to spin on in the arch code (i.e.
> > set it around the other PG_arch_* bit setting).
> 
> This is certainly tempting, although sadly the existing
> wait_on_page_bit() is sleeping - so this would either be a literal spin,
> or we'd need to implement a new non-sleeping wait mechanism.

Yeah, it would have to be a custom spinning mechanism, something like:

/* lock the page */
while (test_and_set_bit(PG_arch_3, >flags))
smp_cond_load_relaxed(>flags, !(VAL & PG_arch_3));
...
/* unlock the page */
clear_bit(PG_arch_3, >flags);

> 4. Sledgehammer locking in mte_sync_page_tags(), add a spinlock only for
> the MTE case where we have to sync tags (see below). What the actual
> performance impact of this is I've no idea. It could certainly be bad
> if there are a lot of pages with MTE enabled, which sadly is exactly
> the case if KVM is used with MTE :(
> 
> --->8
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index 0d320c060ebe..389ad40256f6 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -25,23 +25,33 @@
>  u64 gcr_kernel_excl __ro_after_init;
>  static bool report_fault_once = true;
> +static spinlock_t tag_sync_lock;
>  static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool 
> check_swap,
>  bool pte_is_tagged)
>  {
>   pte_t old_pte = READ_ONCE(*ptep);
> + if (!is_swap_pte(old_pte) && !pte_is_tagged)
> + return;
> +
> + spin_lock_irqsave(_sync_lock, flags);
> +
> + /* Recheck with the lock held */
> + if (test_bit(PG_mte_tagged, >flags))
> + goto out;

Can we skip the lock if the page already has the PG_mte_tagged set?

Re: [PATCH 5.4 000/244] 5.4.119-rc1 review

2021-05-12 Thread Guenter Roeck
On Wed, May 12, 2021 at 06:00:16PM +0100, Alexandru Elisei wrote:
> Hi Naresh,
> 
> Thank you for the report!
> 
> On 5/12/21 5:47 PM, Naresh Kamboju wrote:
> > On Wed, 12 May 2021 at 20:22, Greg Kroah-Hartman
> >  wrote:
> >> This is the start of the stable review cycle for the 5.4.119 release.
> >> There are 244 patches in this series, all will be posted as a response
> >> to this one.  If anyone has any issues with these being applied, please
> >> let me know.
> >>
> >> Responses should be made by Fri, 14 May 2021 14:47:09 +.
> >> Anything received after that time might be too late.
> >>
> >> The whole patch series can be found in one patch at:
> >> 
> >> https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.119-rc1.gz
> >> or in the git tree and branch at:
> >> 
> >> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> >> linux-5.4.y
> >> and the diffstat can be found below.
> >>
> >> thanks,
> >>
> >> greg k-h
> > Build regression detected.
> >
> >> Alexandru Elisei 
> >> KVM: arm64: Initialize VCPU mdcr_el2 before loading it
> > stable rc 5.4 arm axm55xx_defconfig builds failed due to these
> > warnings / errors.
> >   - arm (axm55xx_defconfig) with gcc-8,9 and 10 failed
> >
> > arch/arm/kvm/../../../virt/kvm/arm/arm.c: In function 
> > 'kvm_vcpu_first_run_init':
> > arch/arm/kvm/../../../virt/kvm/arm/arm.c:582:2: error: implicit
> > declaration of function 'kvm_arm_vcpu_init_debug'; did you mean
> > 'kvm_arm_init_debug'? [-Werror=implicit-function-declaration]
> >   kvm_arm_vcpu_init_debug(vcpu);
> >   ^~~
> >   kvm_arm_init_debug
> > cc1: some warnings being treated as errors
> 
> This is my fault, in Linux v5.4 KVM for arm is still around, and there's no
> prototype for the function when compiling for arm. I suspect that's also the 
> case
> for v4.19.
> 

Correct.

Guenter

> I made this change to get it to build:
> 
> $ git diff
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index dd03d5e01a94..32564b017ba0 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -335,6 +335,7 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu 
> *vcpu,
> int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>  
>  static inline void kvm_arm_init_debug(void) {}
> +static inline void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
> 
> which matches the stub for kvm_arm_init_debug(). I can spin a patch out of it 
> and
> send it for 5.4 and 4.19. Marc, what do you think?
> 
> Thanks,
> 
> Alex
> 
> >
> >
> > steps to reproduce:
> > 
> > #!/bin/sh
> >
> > # TuxMake is a command line tool and Python library that provides
> > # portable and repeatable Linux kernel builds across a variety of
> > # architectures, toolchains, kernel configurations, and make targets.
> > #
> > # TuxMake supports the concept of runtimes.
> > # See https://docs.tuxmake.org/runtimes/, for that to work it requires
> > # that you install podman or docker on your system.
> > #
> > # To install tuxmake on your system globally:
> > # sudo pip3 install -U tuxmake
> > #
> > # See https://docs.tuxmake.org/ for complete documentation.
> >
> >
> > tuxmake --runtime podman --target-arch arm --toolchain gcc-8 --kconfig
> > axm55xx_defconfig
> >
> > ref:
> > https://builds.tuxbuild.com/1sRT0HOyHnZ8N5ktJmaEcMIQZL0/
> >
> >
> > --
> > Linaro LKFT
> > https://lkft.linaro.org
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 5.4 000/244] 5.4.119-rc1 review

2021-05-12 Thread Marc Zyngier
On Wed, 12 May 2021 18:00:16 +0100,
Alexandru Elisei  wrote:
> 
> Hi Naresh,
> 
> Thank you for the report!
> 
> On 5/12/21 5:47 PM, Naresh Kamboju wrote:
> > On Wed, 12 May 2021 at 20:22, Greg Kroah-Hartman
> >  wrote:
> >> This is the start of the stable review cycle for the 5.4.119 release.
> >> There are 244 patches in this series, all will be posted as a response
> >> to this one.  If anyone has any issues with these being applied, please
> >> let me know.
> >>
> >> Responses should be made by Fri, 14 May 2021 14:47:09 +.
> >> Anything received after that time might be too late.
> >>
> >> The whole patch series can be found in one patch at:
> >> 
> >> https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.119-rc1.gz
> >> or in the git tree and branch at:
> >> 
> >> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> >> linux-5.4.y
> >> and the diffstat can be found below.
> >>
> >> thanks,
> >>
> >> greg k-h
> > Build regression detected.
> >
> >> Alexandru Elisei 
> >> KVM: arm64: Initialize VCPU mdcr_el2 before loading it
> > stable rc 5.4 arm axm55xx_defconfig builds failed due to these
> > warnings / errors.
> >   - arm (axm55xx_defconfig) with gcc-8,9 and 10 failed
> >
> > arch/arm/kvm/../../../virt/kvm/arm/arm.c: In function 
> > 'kvm_vcpu_first_run_init':
> > arch/arm/kvm/../../../virt/kvm/arm/arm.c:582:2: error: implicit
> > declaration of function 'kvm_arm_vcpu_init_debug'; did you mean
> > 'kvm_arm_init_debug'? [-Werror=implicit-function-declaration]
> >   kvm_arm_vcpu_init_debug(vcpu);
> >   ^~~
> >   kvm_arm_init_debug
> > cc1: some warnings being treated as errors
> 
> This is my fault, in Linux v5.4 KVM for arm is still around, and
> there's no prototype for the function when compiling for arm. I
> suspect that's also the case for v4.19.
> 
> I made this change to get it to build:
> 
> $ git diff
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index dd03d5e01a94..32564b017ba0 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -335,6 +335,7 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu 
> *vcpu,
> int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>  
>  static inline void kvm_arm_init_debug(void) {}
> +static inline void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
> 
> which matches the stub for kvm_arm_init_debug(). I can spin a patch
> out of it and send it for 5.4 and 4.19. Marc, what do you think?

Ideally, we'd drop the patch in its current form from 5.4 and 4.19,
and send an updated patch with this hunk to fix the 32bit build.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 5.4 000/244] 5.4.119-rc1 review

2021-05-12 Thread Alexandru Elisei
Hi Naresh,

Thank you for the report!

On 5/12/21 5:47 PM, Naresh Kamboju wrote:
> On Wed, 12 May 2021 at 20:22, Greg Kroah-Hartman
>  wrote:
>> This is the start of the stable review cycle for the 5.4.119 release.
>> There are 244 patches in this series, all will be posted as a response
>> to this one.  If anyone has any issues with these being applied, please
>> let me know.
>>
>> Responses should be made by Fri, 14 May 2021 14:47:09 +.
>> Anything received after that time might be too late.
>>
>> The whole patch series can be found in one patch at:
>> 
>> https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.119-rc1.gz
>> or in the git tree and branch at:
>> 
>> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
>> linux-5.4.y
>> and the diffstat can be found below.
>>
>> thanks,
>>
>> greg k-h
> Build regression detected.
>
>> Alexandru Elisei 
>> KVM: arm64: Initialize VCPU mdcr_el2 before loading it
> stable rc 5.4 arm axm55xx_defconfig builds failed due to these
> warnings / errors.
>   - arm (axm55xx_defconfig) with gcc-8,9 and 10 failed
>
> arch/arm/kvm/../../../virt/kvm/arm/arm.c: In function 
> 'kvm_vcpu_first_run_init':
> arch/arm/kvm/../../../virt/kvm/arm/arm.c:582:2: error: implicit
> declaration of function 'kvm_arm_vcpu_init_debug'; did you mean
> 'kvm_arm_init_debug'? [-Werror=implicit-function-declaration]
>   kvm_arm_vcpu_init_debug(vcpu);
>   ^~~
>   kvm_arm_init_debug
> cc1: some warnings being treated as errors

This is my fault, in Linux v5.4 KVM for arm is still around, and there's no
prototype for the function when compiling for arm. I suspect that's also the 
case
for v4.19.

I made this change to get it to build:

$ git diff
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index dd03d5e01a94..32564b017ba0 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -335,6 +335,7 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu,
int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 static inline void kvm_arm_init_debug(void) {}
+static inline void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}

which matches the stub for kvm_arm_init_debug(). I can spin a patch out of it 
and
send it for 5.4 and 4.19. Marc, what do you think?

Thanks,

Alex

>
>
> steps to reproduce:
> 
> #!/bin/sh
>
> # TuxMake is a command line tool and Python library that provides
> # portable and repeatable Linux kernel builds across a variety of
> # architectures, toolchains, kernel configurations, and make targets.
> #
> # TuxMake supports the concept of runtimes.
> # See https://docs.tuxmake.org/runtimes/, for that to work it requires
> # that you install podman or docker on your system.
> #
> # To install tuxmake on your system globally:
> # sudo pip3 install -U tuxmake
> #
> # See https://docs.tuxmake.org/ for complete documentation.
>
>
> tuxmake --runtime podman --target-arch arm --toolchain gcc-8 --kconfig
> axm55xx_defconfig
>
> ref:
> https://builds.tuxbuild.com/1sRT0HOyHnZ8N5ktJmaEcMIQZL0/
>
>
> --
> Linaro LKFT
> https://lkft.linaro.org
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 5.4 000/244] 5.4.119-rc1 review

2021-05-12 Thread Naresh Kamboju
On Wed, 12 May 2021 at 20:22, Greg Kroah-Hartman
 wrote:
>
> This is the start of the stable review cycle for the 5.4.119 release.
> There are 244 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Fri, 14 May 2021 14:47:09 +.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.119-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-5.4.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

Build regression detected.

> Alexandru Elisei 
> KVM: arm64: Initialize VCPU mdcr_el2 before loading it

stable rc 5.4 arm axm55xx_defconfig builds failed due to these
warnings / errors.
  - arm (axm55xx_defconfig) with gcc-8,9 and 10 failed

arch/arm/kvm/../../../virt/kvm/arm/arm.c: In function 'kvm_vcpu_first_run_init':
arch/arm/kvm/../../../virt/kvm/arm/arm.c:582:2: error: implicit
declaration of function 'kvm_arm_vcpu_init_debug'; did you mean
'kvm_arm_init_debug'? [-Werror=implicit-function-declaration]
  kvm_arm_vcpu_init_debug(vcpu);
  ^~~
  kvm_arm_init_debug
cc1: some warnings being treated as errors


steps to reproduce:

#!/bin/sh

# TuxMake is a command line tool and Python library that provides
# portable and repeatable Linux kernel builds across a variety of
# architectures, toolchains, kernel configurations, and make targets.
#
# TuxMake supports the concept of runtimes.
# See https://docs.tuxmake.org/runtimes/, for that to work it requires
# that you install podman or docker on your system.
#
# To install tuxmake on your system globally:
# sudo pip3 install -U tuxmake
#
# See https://docs.tuxmake.org/ for complete documentation.


tuxmake --runtime podman --target-arch arm --toolchain gcc-8 --kconfig
axm55xx_defconfig

ref:
https://builds.tuxbuild.com/1sRT0HOyHnZ8N5ktJmaEcMIQZL0/


--
Linaro LKFT
https://lkft.linaro.org
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 0/9] KVM: arm64: Initial host support for the Apple M1

2021-05-12 Thread Marc Zyngier
Hi Alex,

On Wed, 12 May 2021 17:22:43 +0100,
Alexandru Elisei  wrote:
> 
> Hi Marc,
> 
> On 5/10/21 2:48 PM, Marc Zyngier wrote:
> > This is a new version of the series previously posted at [2], reworking
> > the vGIC and timer code to cope with the M1 braindead^Wamusing nature.
> >
> > Hardly any change this time around, mostly rebased on top of upstream
> > now that the dependencies have made it in.
> >
> > Tested with multiple concurrent VMs running from an initramfs.
> >
> > * From v2:
> >   - Rebased on 5.13-rc1
> >   - Fixed a couple of nits in the GIC registration code
> >
> > * From v1 [1]:
> >   - Rebased on Hector's v4 posting[0]
> >   - Dropped a couple of patches that have been merged in the above series
> >   - Fixed irq_ack callback on the timer path
> >
> > [0] https://lore.kernel.org/r/20210402090542.131194-1-mar...@marcan.st
> > [1] https://lore.kernel.org/r/20210316174617.173033-1-...@kernel.org
> > [2] https://lore.kernel.org/r/20210403112931.1043452-1-...@kernel.org
> 
> This looks interesting and I want to take a look. For now, I can
> only review the series, but maybe at some point I'll take the leap
> and try to run Linux on my Macbook Air.

It is a bit involved at the moment, and I haven't tried on a laptop
(the nice thing about the Mini is that you can bury it under a pile of
other machines and still make use of it).

> Can I find something resembling a specification for the Apple
> interrupt controller, or the only available documentation is in the
> Linux driver and patches on the mailing list?

The Asahi wiki has a bunch of RE goodies, but you really don't need to
know much about the HW to follow what this series does. Actually, you
instead need to understand what the GIC guarantees to the guest,
because this is all about the GIC emulation on a non-GIC interrupt
controller.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 0/9] KVM: arm64: Initial host support for the Apple M1

2021-05-12 Thread Alexandru Elisei
Hi Marc,

On 5/10/21 2:48 PM, Marc Zyngier wrote:
> This is a new version of the series previously posted at [2], reworking
> the vGIC and timer code to cope with the M1 braindead^Wamusing nature.
>
> Hardly any change this time around, mostly rebased on top of upstream
> now that the dependencies have made it in.
>
> Tested with multiple concurrent VMs running from an initramfs.
>
> * From v2:
>   - Rebased on 5.13-rc1
>   - Fixed a couple of nits in the GIC registration code
>
> * From v1 [1]:
>   - Rebased on Hector's v4 posting[0]
>   - Dropped a couple of patches that have been merged in the above series
>   - Fixed irq_ack callback on the timer path
>
> [0] https://lore.kernel.org/r/20210402090542.131194-1-mar...@marcan.st
> [1] https://lore.kernel.org/r/20210316174617.173033-1-...@kernel.org
> [2] https://lore.kernel.org/r/20210403112931.1043452-1-...@kernel.org

This looks interesting and I want to take a look. For now, I can only review the
series, but maybe at some point I'll take the leap and try to run Linux on my
Macbook Air.

Can I find something resembling a specification for the Apple interrupt
controller, or the only available documentation is in the Linux driver and 
patches
on the mailing list?

Thanks,

Alex

>
> Marc Zyngier (9):
>   irqchip/gic: Split vGIC probing information from the GIC code
>   KVM: arm64: Handle physical FIQ as an IRQ while running a guest
>   KVM: arm64: vgic: Be tolerant to the lack of maintenance interrupt
>   KVM: arm64: vgic: Let an interrupt controller advertise lack of HW
> deactivation
>   KVM: arm64: vgic: move irq->get_input_level into an ops structure
>   KVM: arm64: vgic: Implement SW-driven deactivation
>   KVM: arm64: timer: Refactor IRQ configuration
>   KVM: arm64: timer: Add support for SW-based deactivation
>   irqchip/apple-aic: Advertise some level of vGICv3 compatibility
>
>  arch/arm64/kvm/arch_timer.c| 161 +
>  arch/arm64/kvm/hyp/hyp-entry.S |   6 +-
>  arch/arm64/kvm/vgic/vgic-init.c|  34 +-
>  arch/arm64/kvm/vgic/vgic-v2.c  |  19 ++-
>  arch/arm64/kvm/vgic/vgic-v3.c  |  19 ++-
>  arch/arm64/kvm/vgic/vgic.c |  14 +--
>  drivers/irqchip/irq-apple-aic.c|   8 ++
>  drivers/irqchip/irq-gic-common.c   |  13 --
>  drivers/irqchip/irq-gic-common.h   |   2 -
>  drivers/irqchip/irq-gic-v3.c   |   6 +-
>  drivers/irqchip/irq-gic.c  |   6 +-
>  include/kvm/arm_vgic.h |  41 +--
>  include/linux/irqchip/arm-gic-common.h |  25 +---
>  include/linux/irqchip/arm-vgic-info.h  |  43 +++
>  14 files changed, 291 insertions(+), 106 deletions(-)
>  create mode 100644 include/linux/irqchip/arm-vgic-info.h
>
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 4/5] KVM: selftests: Add exception handling support for aarch64

2021-05-12 Thread Marc Zyngier

On 2021-05-12 17:03, Ricardo Koller wrote:

On Wed, May 12, 2021 at 02:43:28PM +0100, Marc Zyngier wrote:

On 2021-05-12 13:59, Zenghui Yu wrote:
> Hi Eric,
>
> On 2021/5/6 20:30, Auger Eric wrote:
> > running the test on 5.12 I get
> >
> >  Test Assertion Failure 
> >   aarch64/debug-exceptions.c:232: false
> >   pid=6477 tid=6477 errno=4 - Interrupted system call
> >  10x0040147b: main at debug-exceptions.c:230
> >  20x03ff8aa60de3: ?? ??:0
> >  30x00401517: _start at :?
> >   Failed guest assert: hw_bp_addr == PC(hw_bp) at
> > aarch64/debug-exceptions.c:105
> >   values: 0, 0x401794
>
> FYI I can also reproduce it on my VHE box. And Drew's suggestion [*]
> seemed to work for me. Is the ISB a requirement of architecture?

Very much so. Given that there is no context synchronisation (such as
ERET or an interrupt) in this code, the CPU is perfectly allowed to
delay the system register effect as long as it can.

M.
--
Jazz is not dead. It just smells funny...


Thank you very much Eric, Zenghui, Marc, and Andrew (for the ISB
suggestion)!

As per Zenghui test, will send a V3 that includes the missing ISBs.
Hopefully that will fix the issue for Eric as well. It's very
interesting that the CPU seems to _always_ reorder those instructions.


I suspect that because hitting the debug registers can be a costly
operation (it mobilises a lot of resources in the CPU), there is
a strong incentive to let it slide until there is an actual mandate
to commit the resource.

It also means that SW can issue a bunch of these without too much
overhead, and only pay the cost *once*.

Your N1 CPU seems to be less aggressive on this. Implement choice,
I'd say (it probably is more aggressive than TX2 on other things).
Also, QEMU will almost always hide these problems, due to the nature
of TCG.

Thanks,

 M.
--
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v11 2/6] arm64: kvm: Introduce MTE VM feature

2021-05-12 Thread Steven Price

On 10/05/2021 19:35, Catalin Marinas wrote:

On Fri, May 07, 2021 at 07:25:39PM +0100, Catalin Marinas wrote:

On Thu, May 06, 2021 at 05:15:25PM +0100, Steven Price wrote:

On 04/05/2021 18:40, Catalin Marinas wrote:

On Thu, Apr 29, 2021 at 05:06:41PM +0100, Steven Price wrote:

Given the changes to set_pte_at() which means that tags are restored from
swap even if !PROT_MTE, the only race I can see remaining is the creation of
new PROT_MTE mappings. As you mention an attempt to change mappings in the
VMM memory space should involve a mmu notifier call which I think serialises
this. So the remaining issue is doing this in a separate address space.

So I guess the potential problem is:

   * allocate memory MAP_SHARED but !PROT_MTE
   * fork()
   * VM causes a fault in parent address space
   * child does a mprotect(PROT_MTE)

With the last two potentially racing. Sadly I can't see a good way of
handling that.


Ah, the mmap lock doesn't help as they are different processes
(mprotect() acquires it as a writer).

I wonder whether this is racy even in the absence of KVM. If both parent
and child do an mprotect(PROT_MTE), one of them may be reading stale
tags for a brief period.

Maybe we should revisit whether shared MTE pages are of any use, though
it's an ABI change (not bad if no-one is relying on this). However...

[...]

Thinking about this, we have a similar problem with the PG_dcache_clean
and two processes doing mprotect(PROT_EXEC). One of them could see the
flag set and skip the I-cache maintenance while the other executes
stale instructions. change_pte_range() could acquire the page lock if
the page is VM_SHARED (my preferred core mm fix). It doesn't immediately
solve the MTE/KVM case but we could at least take the page lock via
user_mem_abort().

[...]

This is the real issue I see - the race in PROT_MTE case is either a data
leak (syncing after setting the bit) or data loss (syncing before setting
the bit).

[...]

But without serialising through a spinlock (in mte_sync_tags()) I haven't
been able to come up with any way of closing the race. But with the change
to set_pte_at() to call mte_sync_tags() even if the PTE isn't PROT_MTE that
is likely to seriously hurt performance.


Yeah. We could add another page flag as a lock though I think it should
be the core code that prevents the race.

If we are to do it in the arch code, maybe easier with a custom
ptep_modify_prot_start/end() where we check if it's VM_SHARED and
VM_MTE, take a (big) lock.


I think in the general case we don't even need VM_SHARED. For example,
we have two processes mapping a file, read-only. An mprotect() call in
both processes will race on the page->flags via the corresponding
set_pte_at(). I think an mprotect() with a page fault in different
processes can also race.

The PROT_EXEC case can be easily fixed, as you said already. The
PROT_MTE with MAP_PRIVATE I think can be made safe by a similar
approach: test flag, clear tags, set flag. A subsequent write would
trigger a CoW, so different page anyway.

Anyway, I don't think ptep_modify_prot_start/end would buy us much, it
probably makes the code even harder to read.


In the core code, something like below (well, a partial hack, not tested
and it doesn't handle huge pages but just to give an idea):

diff --git a/mm/mprotect.c b/mm/mprotect.c
index 94188df1ee55..6ba96ff141a6 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -114,6 +113,10 @@ static unsigned long change_pte_range(struct 
vm_area_struct *vma, pmd_t *pmd,
}
  
  			oldpte = ptep_modify_prot_start(vma, addr, pte);

+   if (vma->vm_flags & VM_SHARED) {
+   page = vm_normal_page(vma, addr, oldpte);
+   lock_page(page);
+   }
ptent = pte_modify(oldpte, newprot);
if (preserve_write)
ptent = pte_mk_savedwrite(ptent);
@@ -138,6 +141,8 @@ static unsigned long change_pte_range(struct vm_area_struct 
*vma, pmd_t *pmd,
ptent = pte_mkwrite(ptent);
}
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
+   if (page)
+   unlock_page(page);
pages++;
} else if (is_swap_pte(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);


That's bogus: lock_page() might sleep but this whole code sequence is
under the ptl spinlock. There are some lock_page_* variants but that
would involve either a busy loop on this path or some bailing out,
waiting for a release.

Options:

1. Change the mte_sync_tags() code path to set the flag after clearing
and avoid reading stale tags. We document that mprotect() on
MAP_SHARED may lead to tag loss. Maybe we can intercept this in the
arch code and return an error.


This is the best option I've 

Re: [PATCH v2 4/5] KVM: selftests: Add exception handling support for aarch64

2021-05-12 Thread Marc Zyngier

On 2021-05-12 13:59, Zenghui Yu wrote:

Hi Eric,

On 2021/5/6 20:30, Auger Eric wrote:

running the test on 5.12 I get

 Test Assertion Failure 
  aarch64/debug-exceptions.c:232: false
  pid=6477 tid=6477 errno=4 - Interrupted system call
 1  0x0040147b: main at debug-exceptions.c:230
 2  0x03ff8aa60de3: ?? ??:0
 3  0x00401517: _start at :?
  Failed guest assert: hw_bp_addr == PC(hw_bp) at
aarch64/debug-exceptions.c:105
values: 0, 0x401794


FYI I can also reproduce it on my VHE box. And Drew's suggestion [*]
seemed to work for me. Is the ISB a requirement of architecture?


Very much so. Given that there is no context synchronisation (such as
ERET or an interrupt) in this code, the CPU is perfectly allowed to
delay the system register effect as long as it can.

M.
--
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 4/5] KVM: selftests: Add exception handling support for aarch64

2021-05-12 Thread Zenghui Yu

Hi Eric,

On 2021/5/6 20:30, Auger Eric wrote:

running the test on 5.12 I get

 Test Assertion Failure 
  aarch64/debug-exceptions.c:232: false
  pid=6477 tid=6477 errno=4 - Interrupted system call
 1  0x0040147b: main at debug-exceptions.c:230
 2  0x03ff8aa60de3: ?? ??:0
 3  0x00401517: _start at :?
  Failed guest assert: hw_bp_addr == PC(hw_bp) at
aarch64/debug-exceptions.c:105
values: 0, 0x401794


FYI I can also reproduce it on my VHE box. And Drew's suggestion [*]
seemed to work for me. Is the ISB a requirement of architecture?

[*] https://lore.kernel.org/kvm/20210503124925.wxdcyzharpyze...@gator.home/


Thanks,
Zenghui
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 0/6] KVM: arm64: Improve efficiency of stage2 page table

2021-05-12 Thread wangyanan (Y)

A really gentle ping ...

Sincerely,
Yanan


On 2021/4/15 19:50, Yanan Wang wrote:

Hi,

This series makes some efficiency improvement of guest stage-2 page
table code, and there are some test results to quantify the benefit.
The code has been re-arranged based on the latest kvmarm/next tree.

Descriptions:
We currently uniformly permorm CMOs of D-cache and I-cache in function
user_mem_abort before calling the fault handlers. If we get concurrent
guest faults(e.g. translation faults, permission faults) or some really
unnecessary guest faults caused by BBM, CMOs for the first vcpu are
necessary while the others later are not.

By moving CMOs to the fault handlers, we can easily identify conditions
where they are really needed and avoid the unnecessary ones. As it's a
time consuming process to perform CMOs especially when flushing a block
range, so this solution reduces much load of kvm and improve efficiency
of the stage-2 page table code.

In this series, patch #1, #3, #4 make preparation for place movement
of CMOs (adapt to the latest stage-2 page table framework). And patch
#2, #5 move CMOs of D-cache and I-cache to the fault handlers. Patch
#6 introduces a new way to distinguish cases of memcache allocations.

The following are results in v3 to represent the benefit introduced
by movement of CMOs, and they were tested by [1] (kvm/selftest) that
I have posted recently.
[1] https://lore.kernel.org/lkml/20210302125751.19080-1-wangyana...@huawei.com/

When there are muitiple vcpus concurrently accessing the same memory
region, we can test the execution time of KVM creating new mappings,
updating the permissions of old mappings from RO to RW, and the time
of re-creating the blocks after they have been split.

hardware platform: HiSilicon Kunpeng920 Server
host kernel: Linux mainline v5.12-rc2

cmdline: ./kvm_page_table_test -m 4 -s anonymous -b 1G -v 80
(80 vcpus, 1G memory, page mappings(normal 4K))
KVM_CREATE_MAPPINGS: before 104.35s -> after  90.42s  +13.35%
KVM_UPDATE_MAPPINGS: before  78.64s -> after  75.45s  + 4.06%

cmdline: ./kvm_page_table_test -m 4 -s anonymous_thp -b 20G -v 40
(40 vcpus, 20G memory, block mappings(THP 2M))
KVM_CREATE_MAPPINGS: before  15.66s -> after   6.92s  +55.80%
KVM_UPDATE_MAPPINGS: before 178.80s -> after 123.35s  +31.00%
KVM_REBUILD_BLOCKS:  before 187.34s -> after 131.76s  +30.65%

cmdline: ./kvm_page_table_test -m 4 -s anonymous_hugetlb_1gb -b 20G -v 40
(40 vcpus, 20G memory, block mappings(HUGETLB 1G))
KVM_CREATE_MAPPINGS: before 104.54s -> after   3.70s  +96.46%
KVM_UPDATE_MAPPINGS: before 174.20s -> after 115.94s  +33.44%
KVM_REBUILD_BLOCKS:  before 103.95s -> after   2.96s  +97.15%

---

Changelogs:
v4->v5:
- rebased on the latest kvmarm/tree to adapt to the new stage-2 page-table code
- v4: 
https://lore.kernel.org/lkml/20210409033652.28316-1-wangyana...@huawei.com/

v3->v4:
- perform D-cache flush if we are not mapping device memory
- rebased on top of mainline v5.12-rc6
- v3: https://lore.kernel.org/lkml/20210326031654.3716-1-wangyana...@huawei.com/

v2->v3:
- drop patch #3 in v2
- retest v3 based on v5.12-rc2
- v2: 
https://lore.kernel.org/lkml/20210310094319.18760-1-wangyana...@huawei.com/

v1->v2:
- rebased on top of mainline v5.12-rc2
- also move CMOs of I-cache to the fault handlers
- retest v2 based on v5.12-rc2
- v1: 
https://lore.kernel.org/lkml/20210208112250.163568-1-wangyana...@huawei.com/

---

Yanan Wang (6):
   KVM: arm64: Introduce KVM_PGTABLE_S2_GUEST stage-2 flag
   KVM: arm64: Move D-cache flush to the fault handlers
   KVM: arm64: Add mm_ops member for structure stage2_attr_data
   KVM: arm64: Provide invalidate_icache_range at non-VHE EL2
   KVM: arm64: Move I-cache flush to the fault handlers
   KVM: arm64: Distinguish cases of memcache allocations completely

  arch/arm64/include/asm/kvm_mmu.h | 31 -
  arch/arm64/include/asm/kvm_pgtable.h | 38 ++--
  arch/arm64/kvm/hyp/nvhe/cache.S  | 11 +
  arch/arm64/kvm/hyp/pgtable.c | 65 +++-
  arch/arm64/kvm/mmu.c | 51 --
  5 files changed, 107 insertions(+), 89 deletions(-)


___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 4/5] KVM: selftests: Add exception handling support for aarch64

2021-05-12 Thread Ricardo Koller
On Fri, May 07, 2021 at 04:08:07PM +0200, Auger Eric wrote:
> Hi Ricardo,
> 
> On 5/6/21 9:14 PM, Ricardo Koller wrote:
> > On Thu, May 06, 2021 at 02:30:17PM +0200, Auger Eric wrote:
> >> Hi Ricardo,
> >>
> > 
> > Hi Eric,
> > 
> > Thank you very much for the test.
> > 
> >> On 5/3/21 9:12 PM, Ricardo Koller wrote:
> >>> On Mon, May 03, 2021 at 11:32:39AM +0100, Marc Zyngier wrote:
>  On Sat, 01 May 2021 00:24:06 +0100,
>  Ricardo Koller  wrote:
> >
> > Add the infrastructure needed to enable exception handling in aarch64
> > selftests. The exception handling defaults to an unhandled-exception
> > handler which aborts the test, just like x86. These handlers can be
> > overridden by calling vm_install_vector_handler(vector) or
> > vm_install_exception_handler(vector, ec). The unhandled exception
> > reporting from the guest is done using the ucall type introduced in a
> > previous commit, UCALL_UNHANDLED.
> >
> > The exception handling code is heavily inspired on kvm-unit-tests.
> >>
> >> running the test on 5.12 I get
> >>

Hi Eric,

I'm able to reproduce the failure you are seeing on 5.6, specifically
with kernels older than this commit:

  4942dc6638b0 KVM: arm64: Write arch.mdcr_el2 changes since last vcpu_load on 
VHE

but not yet on v5.12. Could you share the commit of the kernel you are
testing, please?

Thanks!
Ricardo

> >>  Test Assertion Failure 
> >>   aarch64/debug-exceptions.c:232: false
> >>   pid=6477 tid=6477 errno=4 - Interrupted system call
> >>  1 0x0040147b: main at debug-exceptions.c:230
> >>  2 0x03ff8aa60de3: ?? ??:0
> >>  3 0x00401517: _start at :?
> >>   Failed guest assert: hw_bp_addr == PC(hw_bp) at
> >> aarch64/debug-exceptions.c:105
> >>values: 0, 0x401794
> >>
> >>
> >> I guess it is not an expected result. Any known bug waiting on the list?
> >>
> > 
> > Not expected. That should work, or at least abort early because there is
> > no HW breakpoints support.
> > 
> > I'm trying to reproduce the failure; can you help me with some
> > questions, please?
> sure, please find the answers below.
> > 
> > - does your setup have support for hardware breakpoints? Can you try a
> >   'dmesg | grep break'? I'm looking for something like 'hw-breakpoint:
> >   found ...'. If there is no such line it's very likely that the check
> >   for "debug_ver >= 6" is not enough and the test should check for
> >   "num_breakpoints > 0".
> [   25.640418] hw-breakpoint: found 6 breakpoint and 4 watchpoint registers.
> > - does it fail consistently (every single attempt)?
> yes it does.
> 
> I will try to find some time to investigate too
> 
> Thanks
> 
> Eric
> > 
> > Thanks!
> > Ricardo
> > 
> >>
> >> Thanks
> >>
> >> Eric
> > 
> > 
> >
> > Signed-off-by: Ricardo Koller 
> > ---
> >  tools/testing/selftests/kvm/Makefile  |   2 +-
> >  .../selftests/kvm/include/aarch64/processor.h |  78 +++
> >  .../selftests/kvm/lib/aarch64/handlers.S  | 130 ++
> >  .../selftests/kvm/lib/aarch64/processor.c | 124 +
> >  4 files changed, 333 insertions(+), 1 deletion(-)
> >  create mode 100644 tools/testing/selftests/kvm/lib/aarch64/handlers.S
> >
> > diff --git a/tools/testing/selftests/kvm/Makefile 
> > b/tools/testing/selftests/kvm/Makefile
> > index 4e548d7ab0ab..618c5903f478 100644
> > --- a/tools/testing/selftests/kvm/Makefile
> > +++ b/tools/testing/selftests/kvm/Makefile
> > @@ -35,7 +35,7 @@ endif
> >  
> >  LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c 
> > lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
> >  LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c 
> > lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
> > -LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
> > +LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c 
> > lib/aarch64/handlers.S
> >  LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c 
> > lib/s390x/diag318_test_handler.c
> >  
> >  TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
> > diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h 
> > b/tools/testing/selftests/kvm/include/aarch64/processor.h
> > index b7fa0c8551db..40aae31b4afc 100644
> > --- a/tools/testing/selftests/kvm/include/aarch64/processor.h
> > +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
> > @@ -8,6 +8,7 @@
> >  #define SELFTEST_KVM_PROCESSOR_H
> >  
> >  #include "kvm_util.h"
> > +#include 
> >  
> >  
> >  #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
> > @@ -18,6 +19,7 @@
> >  #define MAIR_EL1   3, 0, 10, 2, 0
> >  #define TTBR0_EL1  3, 0,  2, 0, 0
> >  #define SCTLR_EL1  3, 0,  1, 0, 0
> > +#define VBAR_EL1   3, 0, 12, 0, 0
> >  
> >  

Re: [PATCH v2 4/5] KVM: selftests: Add exception handling support for aarch64

2021-05-12 Thread Auger Eric
Hi,

On 5/12/21 10:33 AM, Marc Zyngier wrote:
> On 2021-05-12 09:19, Auger Eric wrote:
>> Hi Ricardo,
>>
>> On 5/12/21 9:27 AM, Ricardo Koller wrote:
>>> On Fri, May 07, 2021 at 04:08:07PM +0200, Auger Eric wrote:
 Hi Ricardo,

 On 5/6/21 9:14 PM, Ricardo Koller wrote:
> On Thu, May 06, 2021 at 02:30:17PM +0200, Auger Eric wrote:
>> Hi Ricardo,
>>
>
> Hi Eric,
>
> Thank you very much for the test.
>
>> On 5/3/21 9:12 PM, Ricardo Koller wrote:
>>> On Mon, May 03, 2021 at 11:32:39AM +0100, Marc Zyngier wrote:
 On Sat, 01 May 2021 00:24:06 +0100,
 Ricardo Koller  wrote:
>
> Add the infrastructure needed to enable exception handling in
> aarch64
> selftests. The exception handling defaults to an
> unhandled-exception
> handler which aborts the test, just like x86. These handlers
> can be
> overridden by calling vm_install_vector_handler(vector) or
> vm_install_exception_handler(vector, ec). The unhandled exception
> reporting from the guest is done using the ucall type
> introduced in a
> previous commit, UCALL_UNHANDLED.
>
> The exception handling code is heavily inspired on kvm-unit-tests.
>>
>> running the test on 5.12 I get
>>
>>>
>>> Hi Eric,
>>>
>>> I'm able to reproduce the failure you are seeing on 5.6, specifically
>>> with kernels older than this commit:
>>>
>>>   4942dc6638b0 KVM: arm64: Write arch.mdcr_el2 changes since last
>>> vcpu_load on VHE
>>>
>>> but not yet on v5.12. Could you share the commit of the kernel you are
>>> testing, please?
>>
>> my host is a 5.12 kernel (8404c9fbc84b)
> 
> Time to compare notes then. What HW are you using? Running VHE or not?
VHE yes. Cavium Sabre system.

Thanks

Eric
> 
> Thanks,
> 
>     M.

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 4/5] KVM: selftests: Add exception handling support for aarch64

2021-05-12 Thread Marc Zyngier

On 2021-05-12 09:19, Auger Eric wrote:

Hi Ricardo,

On 5/12/21 9:27 AM, Ricardo Koller wrote:

On Fri, May 07, 2021 at 04:08:07PM +0200, Auger Eric wrote:

Hi Ricardo,

On 5/6/21 9:14 PM, Ricardo Koller wrote:

On Thu, May 06, 2021 at 02:30:17PM +0200, Auger Eric wrote:

Hi Ricardo,



Hi Eric,

Thank you very much for the test.


On 5/3/21 9:12 PM, Ricardo Koller wrote:

On Mon, May 03, 2021 at 11:32:39AM +0100, Marc Zyngier wrote:

On Sat, 01 May 2021 00:24:06 +0100,
Ricardo Koller  wrote:


Add the infrastructure needed to enable exception handling in 
aarch64
selftests. The exception handling defaults to an 
unhandled-exception
handler which aborts the test, just like x86. These handlers can 
be

overridden by calling vm_install_vector_handler(vector) or
vm_install_exception_handler(vector, ec). The unhandled 
exception
reporting from the guest is done using the ucall type introduced 
in a

previous commit, UCALL_UNHANDLED.

The exception handling code is heavily inspired on 
kvm-unit-tests.


running the test on 5.12 I get



Hi Eric,

I'm able to reproduce the failure you are seeing on 5.6, specifically
with kernels older than this commit:

  4942dc6638b0 KVM: arm64: Write arch.mdcr_el2 changes since last 
vcpu_load on VHE


but not yet on v5.12. Could you share the commit of the kernel you are
testing, please?


my host is a 5.12 kernel (8404c9fbc84b)


Time to compare notes then. What HW are you using? Running VHE or not?

Thanks,

M.
--
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 0/4] arm64: drop pfn_valid_within() and simplify pfn_valid()

2021-05-12 Thread Mike Rapoport
On Wed, May 12, 2021 at 09:59:33AM +0200, Ard Biesheuvel wrote:
> On Wed, 12 May 2021 at 09:34, Mike Rapoport  wrote:
> >
> > On Wed, May 12, 2021 at 09:00:02AM +0200, Ard Biesheuvel wrote:
> > > On Tue, 11 May 2021 at 12:05, Mike Rapoport  wrote:
> > > >
> > > > From: Mike Rapoport 
> > > >
> > > > Hi,
> > > >
> > > > These patches aim to remove CONFIG_HOLES_IN_ZONE and essentially 
> > > > hardwire
> > > > pfn_valid_within() to 1.
> > > >
> > > > The idea is to mark NOMAP pages as reserved in the memory map and 
> > > > restore
> > > > the intended semantics of pfn_valid() to designate availability of 
> > > > struct
> > > > page for a pfn.
> > > >
> > > > With this the core mm will be able to cope with the fact that it cannot 
> > > > use
> > > > NOMAP pages and the holes created by NOMAP ranges within MAX_ORDER 
> > > > blocks
> > > > will be treated correctly even without the need for pfn_valid_within.
> > > >
> > > > The patches are boot tested on qemu-system-aarch64.
> > > >
> > >
> > > Did you use EFI boot when testing this? The memory map is much more
> > > fragmented in that case, so this would be a good data point.
> >
> > Right, something like this:
> >
> 
> Yes, although it is not always that bad.
> 
> > [0.00] Early memory node ranges
> > [0.00]   node   0: [mem 0x4000-0xbfff]
> > [0.00]   node   0: [mem 0xc000-0x]
> 
> This is allocated below 4 GB by the firmware, for reasons that are
> only valid on x86 (where some of the early boot chain is IA32 only)
> 
> > [0.00]   node   0: [mem 0x0001-0x0004386f]
> > [0.00]   node   0: [mem 0x00043870-0x00043899]
> > [0.00]   node   0: [mem 0x0004389a-0x0004389b]
> > [0.00]   node   0: [mem 0x0004389c-0x000438b5]
> > [0.00]   node   0: [mem 0x000438b6-0x00043be3]
> > [0.00]   node   0: [mem 0x00043be4-0x00043bec]
> > [0.00]   node   0: [mem 0x00043bed-0x00043bed]
> > [0.00]   node   0: [mem 0x00043bee-0x00043bff]
> > [0.00]   node   0: [mem 0x00043c00-0x00043fff]
> >
> > This is a pity really, because I don't see a fundamental reason for those
> > tiny holes all over the place.
> >
> 
> There is a config option in the firmware build that allows these
> regions to be preallocated using larger windows, which greatly reduces
> the fragmentation.
> > I know that EFI/ACPI mandates "IO style" memory access for those regions,
> > but I fail to get why...
> >
> 
> Not sure what you mean by 'IO style memory access'.
 
Well, my understanding is that the memory reserved by the firmware cannot
be mapped in the linear map because it might require different caching
modes (e.g like IO) and arm64 cannot tolerate aliased mappings with
different caching.
But what evades me is *why* these areas cannot be accessed as normal RAM.
 
> > > > I beleive it would be best to route these via mmotm tree.
> > > >
> > > > v4:
> > > > * rebase on v5.13-rc1
> > > >
> > > > v3: Link: 
> > > > https://lore.kernel.org/lkml/20210422061902.21614-1-r...@kernel.org
> > > > * Fix minor issues found by Anshuman
> > > > * Freshen up the declaration of pfn_valid() to make it consistent with
> > > >   pfn_is_map_memory()
> > > > * Add more Acked-by and Reviewed-by tags, thanks Anshuman and David
> > > >
> > > > v2: Link: 
> > > > https://lore.kernel.org/lkml/20210421065108.1987-1-r...@kernel.org
> > > > * Add check for PFN overflow in pfn_is_map_memory()
> > > > * Add Acked-by and Reviewed-by tags, thanks David.
> > > >
> > > > v1: Link: 
> > > > https://lore.kernel.org/lkml/20210420090925.7457-1-r...@kernel.org
> > > > * Add comment about the semantics of pfn_valid() as Anshuman suggested
> > > > * Extend comments about MEMBLOCK_NOMAP, per Anshuman
> > > > * Use pfn_is_map_memory() name for the exported wrapper for
> > > >   memblock_is_map_memory(). It is still local to arch/arm64 in the end
> > > >   because of header dependency issues.
> > > >
> > > > rfc: Link: 
> > > > https://lore.kernel.org/lkml/20210407172607.8812-1-r...@kernel.org
> > > >
> > > > Mike Rapoport (4):
> > > >   include/linux/mmzone.h: add documentation for pfn_valid()
> > > >   memblock: update initialization of reserved pages
> > > >   arm64: decouple check whether pfn is in linear map from pfn_valid()
> > > >   arm64: drop pfn_valid_within() and simplify pfn_valid()
> > > >
> > > >  arch/arm64/Kconfig  |  3 ---
> > > >  arch/arm64/include/asm/memory.h |  2 +-
> > > >  arch/arm64/include/asm/page.h   |  3 ++-
> > > >  arch/arm64/kvm/mmu.c|  2 +-
> > > >  arch/arm64/mm/init.c| 14 +-
> > > >  arch/arm64/mm/ioremap.c |  4 ++--
> > > >  arch/arm64/mm/mmu.c |  2 +-
> > > >  include/linux/memblock.h|  4 +++-
> > > >  include/linux/mmzone.h  | 11 

Re: arm32: panic in move_freepages (Was [PATCH v2 0/4] arm64: drop pfn_valid_within() and simplify pfn_valid())

2021-05-12 Thread Mike Rapoport
On Wed, May 12, 2021 at 11:08:14AM +0800, Kefeng Wang wrote:
> 
> On 2021/5/11 16:48, Mike Rapoport wrote:
> > On Mon, May 10, 2021 at 11:10:20AM +0800, Kefeng Wang wrote:
> > > 
> > > > > The memory is not continuous, see MEMBLOCK:
> > > > >memory size = 0x4c0f reserved size = 0x027ef058
> > > > >memory.cnt  = 0xa
> > > > >memory[0x0][0x80a0-0x855f], 0x04c0 bytes flags: 0x0
> > > > >memory[0x1][0x86a0-0x87df], 0x0140 bytes flags: 0x0
> > > > >memory[0x2][0x8bd0-0x8c4f], 0x0080 bytes flags: 0x0
> > > > >memory[0x3][0x8e30-0x8ecf], 0x00a0 bytes flags: 0x0
> > > > >memory[0x4][0x90d0-0xbfff], 0x2f30 bytes flags: 0x0
> > > > >memory[0x5][0xcc00-0xdc9f], 0x10a0 bytes flags: 0x0
> > > > >memory[0x6][0xde70-0xde9f], 0x0030 bytes flags: 0x0
> > > > > ...
> > > > > 
> > > > > The pfn_range [0xde600,0xde700] => addr_range [0xde60,0xde70]
> > > > > is not available memory, and we won't create memmap , so with or 
> > > > > without
> > > > > your patch, we can't see the range in free_memmap(), right?
> > > > 
> > > > This is not available memory and we won't see the reange in 
> > > > free_memmap(),
> > > > but we still should create memmap for it and that's what my patch tried 
> > > > to
> > > > do.
> > > > 
> > > > There are a lot of places in core mm that operate on pageblocks and
> > > > free_unused_memmap() should make sure that any pageblock has a valid 
> > > > memory
> > > > map.
> > > > 
> > > > Currently, that's not the case when SPARSEMEM=y and my patch tried to 
> > > > fix
> > > > it.
> > > > 
> > > > Can you please send log with my patch applied and with the printing of
> > > > ranges that are freed in free_unused_memmap() you've used in previous
> > > > mails?
> > 
> > > with your patch[1] and debug print in free_memmap,
> > > > free_memmap, start_pfn = 85800,  8580 end_pfn = 86800, 8680
> > > > free_memmap, start_pfn = 8c800,  8c80 end_pfn = 8e000, 8e00
> > > > free_memmap, start_pfn = 8f000,  8f00 end_pfn = 9, 9000
> > > > free_memmap, start_pfn = dcc00,  dcc0 end_pfn = de400, de40
> > > > free_memmap, start_pfn = dec00,  dec0 end_pfn = e, e000
> > > > free_memmap, start_pfn = e0c00,  e0c0 end_pfn = e4000, e400
> > > > free_memmap, start_pfn = f7000,  f700 end_pfn = f8000, f800
> > 
> > It seems that freeing of the memory map is suboptimal still because that
> > code was not designed for memory layout that has more holes than Swiss
> > cheese.
> > 
> > Still, the range [0xde600,0xde700] is not freed and there should be struct
> > pages for this range.
> > 
> > Can you add
> > 
> > dump_page(pfn_to_page(0xde600), "");
> > 
> > say, in the end of memblock_free_all()?
> > 
> The range [0xde600,0xde700] is not memory, so it won't create struct page
> for it when sparse_init?

sparse_init() indeed does not create memory map for unpopulated memory, but
it has pretty coarse granularity, i.e. 64M in your configuration. A hole
should be at least 64M in order to skip allocation of the memory map for
it.

For example, your memory layout has a hole of 192M at pfn 0xc and this
hole won't have the memory map.

However the hole 0xdca00 - 0xde70 will still have a memory map in the
section  that covers 0xdc000 - 0xe.

I've tried outline this in a sketch below, hope it helps.

Memory:
  c  cc000  dca00
--+  +--+ ++
 memory bank  |<- hole ->| memory bank  | | mb |
--+  +--+ ++
de700  dea00

Memory map:

bb4000c  cc000   dd8000dc000
+++- ... -+  ++- ... -++-+
| memmap | memmap | ...   |<- hole ->| memmap |  ...  | memmap | memmap  |
+++- ... -+  ++- ... -++-+


> After apply patch[1], the dump_page log,
> 
> page:ef3cc000 is uninitialized and poisoned
> raw:        
> page dumped because:

This means that there is a memory map entry, and it got poisoned during the
initialization and never got reinitialized to sensible values, which would
be PageReserved() in this case.

I believe this was fixed by commit 0740a50b9baa ("mm/page_alloc.c: refactor
initialization of struct page for holes in memory layout") in the mainline
tree.

Can you backport it to your 5.10 tree and check if it helps?
 
-- 
Sincerely yours,
Mike.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 4/5] KVM: selftests: Add exception handling support for aarch64

2021-05-12 Thread Auger Eric
Hi Ricardo,

On 5/12/21 9:27 AM, Ricardo Koller wrote:
> On Fri, May 07, 2021 at 04:08:07PM +0200, Auger Eric wrote:
>> Hi Ricardo,
>>
>> On 5/6/21 9:14 PM, Ricardo Koller wrote:
>>> On Thu, May 06, 2021 at 02:30:17PM +0200, Auger Eric wrote:
 Hi Ricardo,

>>>
>>> Hi Eric,
>>>
>>> Thank you very much for the test.
>>>
 On 5/3/21 9:12 PM, Ricardo Koller wrote:
> On Mon, May 03, 2021 at 11:32:39AM +0100, Marc Zyngier wrote:
>> On Sat, 01 May 2021 00:24:06 +0100,
>> Ricardo Koller  wrote:
>>>
>>> Add the infrastructure needed to enable exception handling in aarch64
>>> selftests. The exception handling defaults to an unhandled-exception
>>> handler which aborts the test, just like x86. These handlers can be
>>> overridden by calling vm_install_vector_handler(vector) or
>>> vm_install_exception_handler(vector, ec). The unhandled exception
>>> reporting from the guest is done using the ucall type introduced in a
>>> previous commit, UCALL_UNHANDLED.
>>>
>>> The exception handling code is heavily inspired on kvm-unit-tests.

 running the test on 5.12 I get

> 
> Hi Eric,
> 
> I'm able to reproduce the failure you are seeing on 5.6, specifically
> with kernels older than this commit:
> 
>   4942dc6638b0 KVM: arm64: Write arch.mdcr_el2 changes since last vcpu_load 
> on VHE
> 
> but not yet on v5.12. Could you share the commit of the kernel you are
> testing, please?

my host is a 5.12 kernel (8404c9fbc84b)

Thanks

Eric
> 
> Thanks!
> Ricardo
> 
  Test Assertion Failure 
   aarch64/debug-exceptions.c:232: false
   pid=6477 tid=6477 errno=4 - Interrupted system call
  1 0x0040147b: main at debug-exceptions.c:230
  2 0x03ff8aa60de3: ?? ??:0
  3 0x00401517: _start at :?
   Failed guest assert: hw_bp_addr == PC(hw_bp) at
 aarch64/debug-exceptions.c:105
values: 0, 0x401794


 I guess it is not an expected result. Any known bug waiting on the list?

>>>
>>> Not expected. That should work, or at least abort early because there is
>>> no HW breakpoints support.
>>>
>>> I'm trying to reproduce the failure; can you help me with some
>>> questions, please?
>> sure, please find the answers below.
>>>
>>> - does your setup have support for hardware breakpoints? Can you try a
>>>   'dmesg | grep break'? I'm looking for something like 'hw-breakpoint:
>>>   found ...'. If there is no such line it's very likely that the check
>>>   for "debug_ver >= 6" is not enough and the test should check for
>>>   "num_breakpoints > 0".
>> [   25.640418] hw-breakpoint: found 6 breakpoint and 4 watchpoint registers.
>>> - does it fail consistently (every single attempt)?
>> yes it does.
>>
>> I will try to find some time to investigate too
>>
>> Thanks
>>
>> Eric
>>>
>>> Thanks!
>>> Ricardo
>>>

 Thanks

 Eric
>>>
>>>
>>>
>>> Signed-off-by: Ricardo Koller 
>>> ---
>>>  tools/testing/selftests/kvm/Makefile  |   2 +-
>>>  .../selftests/kvm/include/aarch64/processor.h |  78 +++
>>>  .../selftests/kvm/lib/aarch64/handlers.S  | 130 ++
>>>  .../selftests/kvm/lib/aarch64/processor.c | 124 +
>>>  4 files changed, 333 insertions(+), 1 deletion(-)
>>>  create mode 100644 tools/testing/selftests/kvm/lib/aarch64/handlers.S
>>>
>>> diff --git a/tools/testing/selftests/kvm/Makefile 
>>> b/tools/testing/selftests/kvm/Makefile
>>> index 4e548d7ab0ab..618c5903f478 100644
>>> --- a/tools/testing/selftests/kvm/Makefile
>>> +++ b/tools/testing/selftests/kvm/Makefile
>>> @@ -35,7 +35,7 @@ endif
>>>  
>>>  LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c 
>>> lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
>>>  LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c 
>>> lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
>>> -LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
>>> +LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c 
>>> lib/aarch64/handlers.S
>>>  LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c 
>>> lib/s390x/diag318_test_handler.c
>>>  
>>>  TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
>>> diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h 
>>> b/tools/testing/selftests/kvm/include/aarch64/processor.h
>>> index b7fa0c8551db..40aae31b4afc 100644
>>> --- a/tools/testing/selftests/kvm/include/aarch64/processor.h
>>> +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
>>> @@ -8,6 +8,7 @@
>>>  #define SELFTEST_KVM_PROCESSOR_H
>>>  
>>>  #include "kvm_util.h"
>>> +#include 
>>>  
>>>  
>>>  #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
>>> @@ -18,6 +19,7 @@
>>>  #define MAIR_EL1   3, 0, 10, 2, 0
>>>  #define 

Re: [PATCH v4 0/4] arm64: drop pfn_valid_within() and simplify pfn_valid()

2021-05-12 Thread Ard Biesheuvel
On Wed, 12 May 2021 at 09:34, Mike Rapoport  wrote:
>
> On Wed, May 12, 2021 at 09:00:02AM +0200, Ard Biesheuvel wrote:
> > On Tue, 11 May 2021 at 12:05, Mike Rapoport  wrote:
> > >
> > > From: Mike Rapoport 
> > >
> > > Hi,
> > >
> > > These patches aim to remove CONFIG_HOLES_IN_ZONE and essentially hardwire
> > > pfn_valid_within() to 1.
> > >
> > > The idea is to mark NOMAP pages as reserved in the memory map and restore
> > > the intended semantics of pfn_valid() to designate availability of struct
> > > page for a pfn.
> > >
> > > With this the core mm will be able to cope with the fact that it cannot 
> > > use
> > > NOMAP pages and the holes created by NOMAP ranges within MAX_ORDER blocks
> > > will be treated correctly even without the need for pfn_valid_within.
> > >
> > > The patches are boot tested on qemu-system-aarch64.
> > >
> >
> > Did you use EFI boot when testing this? The memory map is much more
> > fragmented in that case, so this would be a good data point.
>
> Right, something like this:
>

Yes, although it is not always that bad.

> [0.00] Early memory node ranges
> [0.00]   node   0: [mem 0x4000-0xbfff]
> [0.00]   node   0: [mem 0xc000-0x]

This is allocated below 4 GB by the firmware, for reasons that are
only valid on x86 (where some of the early boot chain is IA32 only)

> [0.00]   node   0: [mem 0x0001-0x0004386f]
> [0.00]   node   0: [mem 0x00043870-0x00043899]
> [0.00]   node   0: [mem 0x0004389a-0x0004389b]
> [0.00]   node   0: [mem 0x0004389c-0x000438b5]
> [0.00]   node   0: [mem 0x000438b6-0x00043be3]
> [0.00]   node   0: [mem 0x00043be4-0x00043bec]
> [0.00]   node   0: [mem 0x00043bed-0x00043bed]
> [0.00]   node   0: [mem 0x00043bee-0x00043bff]
> [0.00]   node   0: [mem 0x00043c00-0x00043fff]
>
> This is a pity really, because I don't see a fundamental reason for those
> tiny holes all over the place.
>

There is a config option in the firmware build that allows these
regions to be preallocated using larger windows, which greatly reduces
the fragmentation.
> I know that EFI/ACPI mandates "IO style" memory access for those regions,
> but I fail to get why...
>

Not sure what you mean by 'IO style memory access'.



> > > I beleive it would be best to route these via mmotm tree.
> > >
> > > v4:
> > > * rebase on v5.13-rc1
> > >
> > > v3: Link: 
> > > https://lore.kernel.org/lkml/20210422061902.21614-1-r...@kernel.org
> > > * Fix minor issues found by Anshuman
> > > * Freshen up the declaration of pfn_valid() to make it consistent with
> > >   pfn_is_map_memory()
> > > * Add more Acked-by and Reviewed-by tags, thanks Anshuman and David
> > >
> > > v2: Link: 
> > > https://lore.kernel.org/lkml/20210421065108.1987-1-r...@kernel.org
> > > * Add check for PFN overflow in pfn_is_map_memory()
> > > * Add Acked-by and Reviewed-by tags, thanks David.
> > >
> > > v1: Link: 
> > > https://lore.kernel.org/lkml/20210420090925.7457-1-r...@kernel.org
> > > * Add comment about the semantics of pfn_valid() as Anshuman suggested
> > > * Extend comments about MEMBLOCK_NOMAP, per Anshuman
> > > * Use pfn_is_map_memory() name for the exported wrapper for
> > >   memblock_is_map_memory(). It is still local to arch/arm64 in the end
> > >   because of header dependency issues.
> > >
> > > rfc: Link: 
> > > https://lore.kernel.org/lkml/20210407172607.8812-1-r...@kernel.org
> > >
> > > Mike Rapoport (4):
> > >   include/linux/mmzone.h: add documentation for pfn_valid()
> > >   memblock: update initialization of reserved pages
> > >   arm64: decouple check whether pfn is in linear map from pfn_valid()
> > >   arm64: drop pfn_valid_within() and simplify pfn_valid()
> > >
> > >  arch/arm64/Kconfig  |  3 ---
> > >  arch/arm64/include/asm/memory.h |  2 +-
> > >  arch/arm64/include/asm/page.h   |  3 ++-
> > >  arch/arm64/kvm/mmu.c|  2 +-
> > >  arch/arm64/mm/init.c| 14 +-
> > >  arch/arm64/mm/ioremap.c |  4 ++--
> > >  arch/arm64/mm/mmu.c |  2 +-
> > >  include/linux/memblock.h|  4 +++-
> > >  include/linux/mmzone.h  | 11 +++
> > >  mm/memblock.c   | 28 ++--
> > >  10 files changed, 60 insertions(+), 13 deletions(-)
> > >
> > >
> > > base-commit: 6efb943b8616ec53a5e444193dccf1af9ad627b5
> > > --
> > > 2.28.0
> > >
>
> --
> Sincerely yours,
> Mike.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 0/4] arm64: drop pfn_valid_within() and simplify pfn_valid()

2021-05-12 Thread Mike Rapoport
On Wed, May 12, 2021 at 09:00:02AM +0200, Ard Biesheuvel wrote:
> On Tue, 11 May 2021 at 12:05, Mike Rapoport  wrote:
> >
> > From: Mike Rapoport 
> >
> > Hi,
> >
> > These patches aim to remove CONFIG_HOLES_IN_ZONE and essentially hardwire
> > pfn_valid_within() to 1.
> >
> > The idea is to mark NOMAP pages as reserved in the memory map and restore
> > the intended semantics of pfn_valid() to designate availability of struct
> > page for a pfn.
> >
> > With this the core mm will be able to cope with the fact that it cannot use
> > NOMAP pages and the holes created by NOMAP ranges within MAX_ORDER blocks
> > will be treated correctly even without the need for pfn_valid_within.
> >
> > The patches are boot tested on qemu-system-aarch64.
> >
> 
> Did you use EFI boot when testing this? The memory map is much more
> fragmented in that case, so this would be a good data point.

Right, something like this:

[0.00] Early memory node ranges 
[0.00]   node   0: [mem 0x4000-0xbfff]  
[0.00]   node   0: [mem 0xc000-0x]  
[0.00]   node   0: [mem 0x0001-0x0004386f]  
[0.00]   node   0: [mem 0x00043870-0x00043899]  
[0.00]   node   0: [mem 0x0004389a-0x0004389b]  
[0.00]   node   0: [mem 0x0004389c-0x000438b5]  
[0.00]   node   0: [mem 0x000438b6-0x00043be3]  
[0.00]   node   0: [mem 0x00043be4-0x00043bec]  
[0.00]   node   0: [mem 0x00043bed-0x00043bed]  
[0.00]   node   0: [mem 0x00043bee-0x00043bff]  
[0.00]   node   0: [mem 0x00043c00-0x00043fff]  

This is a pity really, because I don't see a fundamental reason for those
tiny holes all over the place. 

I know that EFI/ACPI mandates "IO style" memory access for those regions,
but I fail to get why...
 
> > I beleive it would be best to route these via mmotm tree.
> >
> > v4:
> > * rebase on v5.13-rc1
> >
> > v3: Link: 
> > https://lore.kernel.org/lkml/20210422061902.21614-1-r...@kernel.org
> > * Fix minor issues found by Anshuman
> > * Freshen up the declaration of pfn_valid() to make it consistent with
> >   pfn_is_map_memory()
> > * Add more Acked-by and Reviewed-by tags, thanks Anshuman and David
> >
> > v2: Link: https://lore.kernel.org/lkml/20210421065108.1987-1-r...@kernel.org
> > * Add check for PFN overflow in pfn_is_map_memory()
> > * Add Acked-by and Reviewed-by tags, thanks David.
> >
> > v1: Link: https://lore.kernel.org/lkml/20210420090925.7457-1-r...@kernel.org
> > * Add comment about the semantics of pfn_valid() as Anshuman suggested
> > * Extend comments about MEMBLOCK_NOMAP, per Anshuman
> > * Use pfn_is_map_memory() name for the exported wrapper for
> >   memblock_is_map_memory(). It is still local to arch/arm64 in the end
> >   because of header dependency issues.
> >
> > rfc: Link: 
> > https://lore.kernel.org/lkml/20210407172607.8812-1-r...@kernel.org
> >
> > Mike Rapoport (4):
> >   include/linux/mmzone.h: add documentation for pfn_valid()
> >   memblock: update initialization of reserved pages
> >   arm64: decouple check whether pfn is in linear map from pfn_valid()
> >   arm64: drop pfn_valid_within() and simplify pfn_valid()
> >
> >  arch/arm64/Kconfig  |  3 ---
> >  arch/arm64/include/asm/memory.h |  2 +-
> >  arch/arm64/include/asm/page.h   |  3 ++-
> >  arch/arm64/kvm/mmu.c|  2 +-
> >  arch/arm64/mm/init.c| 14 +-
> >  arch/arm64/mm/ioremap.c |  4 ++--
> >  arch/arm64/mm/mmu.c |  2 +-
> >  include/linux/memblock.h|  4 +++-
> >  include/linux/mmzone.h  | 11 +++
> >  mm/memblock.c   | 28 ++--
> >  10 files changed, 60 insertions(+), 13 deletions(-)
> >
> >
> > base-commit: 6efb943b8616ec53a5e444193dccf1af9ad627b5
> > --
> > 2.28.0
> >

-- 
Sincerely yours,
Mike.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 0/4] arm64: drop pfn_valid_within() and simplify pfn_valid()

2021-05-12 Thread Kefeng Wang




On 2021/5/11 18:05, Mike Rapoport wrote:

From: Mike Rapoport 

Hi,

These patches aim to remove CONFIG_HOLES_IN_ZONE and essentially hardwire
pfn_valid_within() to 1.

The idea is to mark NOMAP pages as reserved in the memory map and restore
the intended semantics of pfn_valid() to designate availability of struct
page for a pfn.

With this the core mm will be able to cope with the fact that it cannot use
NOMAP pages and the holes created by NOMAP ranges within MAX_ORDER blocks
will be treated correctly even without the need for pfn_valid_within.

The patches are boot tested on qemu-system-aarch64.

I beleive it would be best to route these via mmotm tree.


Reviewed-by: Kefeng Wang 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: arm32: panic in move_freepages (Was [PATCH v2 0/4] arm64: drop pfn_valid_within() and simplify pfn_valid())

2021-05-12 Thread Kefeng Wang




On 2021/5/11 16:48, Mike Rapoport wrote:

On Mon, May 10, 2021 at 11:10:20AM +0800, Kefeng Wang wrote:



The memory is not continuous, see MEMBLOCK:
   memory size = 0x4c0f reserved size = 0x027ef058
   memory.cnt  = 0xa
   memory[0x0][0x80a0-0x855f], 0x04c0 bytes flags: 0x0
   memory[0x1][0x86a0-0x87df], 0x0140 bytes flags: 0x0
   memory[0x2][0x8bd0-0x8c4f], 0x0080 bytes flags: 0x0
   memory[0x3][0x8e30-0x8ecf], 0x00a0 bytes flags: 0x0
   memory[0x4][0x90d0-0xbfff], 0x2f30 bytes flags: 0x0
   memory[0x5][0xcc00-0xdc9f], 0x10a0 bytes flags: 0x0
   memory[0x6][0xde70-0xde9f], 0x0030 bytes flags: 0x0
...

The pfn_range [0xde600,0xde700] => addr_range [0xde60,0xde70]
is not available memory, and we won't create memmap , so with or without
your patch, we can't see the range in free_memmap(), right?


This is not available memory and we won't see the reange in free_memmap(),
but we still should create memmap for it and that's what my patch tried to
do.

There are a lot of places in core mm that operate on pageblocks and
free_unused_memmap() should make sure that any pageblock has a valid memory
map.

Currently, that's not the case when SPARSEMEM=y and my patch tried to fix
it.

Can you please send log with my patch applied and with the printing of
ranges that are freed in free_unused_memmap() you've used in previous
mails?



with your patch[1] and debug print in free_memmap,
> free_memmap, start_pfn = 85800,  8580 end_pfn = 86800, 8680
> free_memmap, start_pfn = 8c800,  8c80 end_pfn = 8e000, 8e00
> free_memmap, start_pfn = 8f000,  8f00 end_pfn = 9, 9000
> free_memmap, start_pfn = dcc00,  dcc0 end_pfn = de400, de40
> free_memmap, start_pfn = dec00,  dec0 end_pfn = e, e000
> free_memmap, start_pfn = e0c00,  e0c0 end_pfn = e4000, e400
> free_memmap, start_pfn = f7000,  f700 end_pfn = f8000, f800


It seems that freeing of the memory map is suboptimal still because that
code was not designed for memory layout that has more holes than Swiss
cheese.

Still, the range [0xde600,0xde700] is not freed and there should be struct
pages for this range.

Can you add

dump_page(pfn_to_page(0xde600), "");

say, in the end of memblock_free_all()?
  

The range [0xde600,0xde700] is not memory, so it won't create struct 
page for it when sparse_init?


After apply patch[1], the dump_page log,

page:ef3cc000 is uninitialized and poisoned
raw:        
page dumped because:


[1] 
https://lore.kernel.org/linux-mm/20210512031057.13580-3-wangkefeng.w...@huawei.com/T/#u

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 4/4] arm64: drop pfn_valid_within() and simplify pfn_valid()

2021-05-12 Thread Andrew Morton
On Tue, 11 May 2021 13:05:50 +0300 Mike Rapoport  wrote:

> From: Mike Rapoport 
> 
> The arm64's version of pfn_valid() differs from the generic because of two
> reasons:
> 
> * Parts of the memory map are freed during boot. This makes it necessary to
>   verify that there is actual physical memory that corresponds to a pfn
>   which is done by querying memblock.
> 
> * There are NOMAP memory regions. These regions are not mapped in the
>   linear map and until the previous commit the struct pages representing
>   these areas had default values.
> 
> As the consequence of absence of the special treatment of NOMAP regions in
> the memory map it was necessary to use memblock_is_map_memory() in
> pfn_valid() and to have pfn_valid_within() aliased to pfn_valid() so that
> generic mm functionality would not treat a NOMAP page as a normal page.
> 
> Since the NOMAP regions are now marked as PageReserved(), pfn walkers and
> the rest of core mm will treat them as unusable memory and thus
> pfn_valid_within() is no longer required at all and can be disabled by
> removing CONFIG_HOLES_IN_ZONE on arm64.
> 
> pfn_valid() can be slightly simplified by replacing
> memblock_is_map_memory() with memblock_is_memory().
> 
> ...
>
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1052,9 +1052,6 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK
>   def_bool y
>   depends on NUMA
>  
> -config HOLES_IN_ZONE
> - def_bool y
> -
>  source "kernel/Kconfig.hz"
>  
>  config ARCH_SPARSEMEM_ENABLE

https://lkml.kernel.org/r/20210417075946.181402-1-wangkefeng.w...@huawei.com
already did this, so I simply dropped that hunk?  And I don't think the
changelog needs updating for this?


> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 798f74f501d5..fb07218da2c0 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -251,7 +251,7 @@ int pfn_valid(unsigned long pfn)
>   if (!early_section(ms))
>   return pfn_section_valid(ms, pfn);
>  
> - return memblock_is_map_memory(addr);
> + return memblock_is_memory(addr);
>  }
>  EXPORT_SYMBOL(pfn_valid);

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 0/4] arm64: drop pfn_valid_within() and simplify pfn_valid()

2021-05-12 Thread Ard Biesheuvel
On Tue, 11 May 2021 at 12:05, Mike Rapoport  wrote:
>
> From: Mike Rapoport 
>
> Hi,
>
> These patches aim to remove CONFIG_HOLES_IN_ZONE and essentially hardwire
> pfn_valid_within() to 1.
>
> The idea is to mark NOMAP pages as reserved in the memory map and restore
> the intended semantics of pfn_valid() to designate availability of struct
> page for a pfn.
>
> With this the core mm will be able to cope with the fact that it cannot use
> NOMAP pages and the holes created by NOMAP ranges within MAX_ORDER blocks
> will be treated correctly even without the need for pfn_valid_within.
>
> The patches are boot tested on qemu-system-aarch64.
>

Did you use EFI boot when testing this? The memory map is much more
fragmented in that case, so this would be a good data point.


> I beleive it would be best to route these via mmotm tree.
>
> v4:
> * rebase on v5.13-rc1
>
> v3: Link: https://lore.kernel.org/lkml/20210422061902.21614-1-r...@kernel.org
> * Fix minor issues found by Anshuman
> * Freshen up the declaration of pfn_valid() to make it consistent with
>   pfn_is_map_memory()
> * Add more Acked-by and Reviewed-by tags, thanks Anshuman and David
>
> v2: Link: https://lore.kernel.org/lkml/20210421065108.1987-1-r...@kernel.org
> * Add check for PFN overflow in pfn_is_map_memory()
> * Add Acked-by and Reviewed-by tags, thanks David.
>
> v1: Link: https://lore.kernel.org/lkml/20210420090925.7457-1-r...@kernel.org
> * Add comment about the semantics of pfn_valid() as Anshuman suggested
> * Extend comments about MEMBLOCK_NOMAP, per Anshuman
> * Use pfn_is_map_memory() name for the exported wrapper for
>   memblock_is_map_memory(). It is still local to arch/arm64 in the end
>   because of header dependency issues.
>
> rfc: Link: https://lore.kernel.org/lkml/20210407172607.8812-1-r...@kernel.org
>
> Mike Rapoport (4):
>   include/linux/mmzone.h: add documentation for pfn_valid()
>   memblock: update initialization of reserved pages
>   arm64: decouple check whether pfn is in linear map from pfn_valid()
>   arm64: drop pfn_valid_within() and simplify pfn_valid()
>
>  arch/arm64/Kconfig  |  3 ---
>  arch/arm64/include/asm/memory.h |  2 +-
>  arch/arm64/include/asm/page.h   |  3 ++-
>  arch/arm64/kvm/mmu.c|  2 +-
>  arch/arm64/mm/init.c| 14 +-
>  arch/arm64/mm/ioremap.c |  4 ++--
>  arch/arm64/mm/mmu.c |  2 +-
>  include/linux/memblock.h|  4 +++-
>  include/linux/mmzone.h  | 11 +++
>  mm/memblock.c   | 28 ++--
>  10 files changed, 60 insertions(+), 13 deletions(-)
>
>
> base-commit: 6efb943b8616ec53a5e444193dccf1af9ad627b5
> --
> 2.28.0
>
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm