On Sat, Apr 26, 2025, Peter Zijlstra wrote:
> On Wed, Apr 16, 2025 at 10:38:59AM +0200, Peter Zijlstra wrote:
>
> > Yeah, I finally got there. I'll go cook up something else.
>
> Sean, Paolo, can I once again ask how best to test this fastop crud?
Apply the below, build KVM selftests, enable forced emulation in KVM, and then
run fastops_test. It's well past time we had a selftest for this. It won't
detect bugs that are specific to 32-bit kernels, e.g. b63f20a778c8
("x86/retpoline:
Don't clobber RFLAGS during CALL_NOSPEC on i386"), since KVM selftests are
64-bit
only, but for what you're doing, it should suffice.
For 32-bit kernels, it requires a 32-bit QEMU and KVM-Unit-Tests (or maybe even
a full blown 32-bit guest image; I forget how much coverage KUT provides).
Regardless, I don't see any reason to put you through that pain, I can do that
sanity testing.
I'll post a proper patch for the new selftest after testing on AMD. The test
relies on hardware providing deterministic behavior for undefined output (RFLAGS
and GPRs); I don't know if that holds true on AMD.
To enable forced emulation, set
/sys/module/kvm/parameters/force_emulation_prefix
to '1' (for the purposes of this test, the value doesn't matter). The param is
writable at runtime, so it doesn't matter if kvm.ko is built-in or a module.
---
From: Sean Christopherson <[email protected]>
Date: Mon, 28 Apr 2025 08:55:44 -0700
Subject: [PATCH] KVM: selftests: Add a test for x86's fastops emulation
Signed-off-by: Sean Christopherson <[email protected]>
---
tools/testing/selftests/kvm/Makefile.kvm | 1 +
.../testing/selftests/kvm/x86/fastops_test.c | 165 ++++++++++++++++++
2 files changed, 166 insertions(+)
create mode 100644 tools/testing/selftests/kvm/x86/fastops_test.c
diff --git a/tools/testing/selftests/kvm/Makefile.kvm
b/tools/testing/selftests/kvm/Makefile.kvm
index f62b0a5aba35..411c3d5eb5b1 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -66,6 +66,7 @@ TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86 += x86/feature_msrs_test
TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fastops_test
TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
TEST_GEN_PROGS_x86 += x86/hyperv_clock
diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c
b/tools/testing/selftests/kvm/x86/fastops_test.c
new file mode 100644
index 000000000000..c3799edb5d0c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/fastops_test.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Execute a fastop() instruction, with or without forced emulation. BT bit 0
+ * to set RFLAGS.CF based on whether or not the input is even or odd, so that
+ * instructions like ADC and SBB are deterministic.
+ */
+#define guest_execute_fastop_1(FEP, insn, type_t, __val, __flags)
\
+do {
\
+ __asm__ __volatile__("bt $0, %[val]\n\t"
\
+ FEP insn " %[val]\n\t"
\
+ "pushfq\n\t"
\
+ "pop %[flags]\n\t"
\
+ : [val]"+r"(__val), [flags]"=r"(__flags)
\
+ : : "cc", "memory");
\
+} while (0)
+
+#define guest_test_fastop_1(insn, type_t, __val)
\
+do {
\
+ type_t val = __val, ex_val = __val, input = __val;
\
+ uint64_t flags, ex_flags;
\
+
\
+ guest_execute_fastop_1("", insn, type_t, ex_val, ex_flags);
\
+ guest_execute_fastop_1(KVM_FEP, insn, type_t, val, flags);
\
+
\
+ __GUEST_ASSERT(val == ex_val,
\
+ "Wanted 0x%lx for '%s 0x%lx', got 0x%lx",
\
+ (uint64_t)ex_val, insn, (uint64_t)input, (uint64_t)val);
\
+ __GUEST_ASSERT(flags == ex_flags,
\
+ "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx",
\
+ ex_flags, insn, (uint64_t)input, flags);
\
+} while (0)
+
+#define guest_execute_fastop_2(FEP, insn, type_t, __input, __output, __flags)
\
+do {
\
+ __asm__ __volatile__("bt $0, %[output]\n\t"
\
+ FEP insn " %[input], %[output]\n\t"
\
+ "pushfq\n\t"
\
+ "pop %[flags]\n\t"
\
+ : [output]"+r"(__output), [flags]"=r"(__flags)
\
+ : [input]"r"(__input) : "cc", "memory");
\
+} while (0)
+
+#define guest_test_fastop_2(insn, type_t, __val1, __val2)
\
+do {
\
+ type_t input = __val1, input2 = __val2, output = __val2, ex_output =
__val2; \
+ uint64_t flags, ex_flags;
\
+
\
+ guest_execute_fastop_2("", insn, type_t, input, ex_output, ex_flags);
\
+ guest_execute_fastop_2(KVM_FEP, insn, type_t, input, output, flags);
\
+
\
+ __GUEST_ASSERT(output == ex_output,
\
+ "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx",
\
+ (uint64_t)ex_output, insn, (uint64_t)input,
\
+ (uint64_t)input2, (uint64_t)output);
\
+ __GUEST_ASSERT(flags == ex_flags,
\
+ "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx",
\
+ ex_flags, insn, (uint64_t)input, (uint64_t)input2,
flags); \
+} while (0)
+
+#define guest_execute_fastop_cl(FEP, insn, type_t, __shift, __output, __flags)
\
+do {
\
+ __asm__ __volatile__("bt $0, %[output]\n\t"
\
+ FEP insn " %%cl, %[output]\n\t"
\
+ "pushfq\n\t"
\
+ "pop %[flags]\n\t"
\
+ : [output]"+r"(__output), [flags]"=r"(__flags)
\
+ : "c"(__shift) : "cc", "memory");
\
+} while (0)
+
+#define guest_test_fastop_cl(insn, type_t, __val1, __val2)
\
+do {
\
+ type_t output = __val2, ex_output = __val2, input = __val2;
\
+ uint8_t shift = __val1;
\
+ uint64_t flags, ex_flags;
\
+
\
+ guest_execute_fastop_cl("", insn, type_t, shift, ex_output, ex_flags);
\
+ guest_execute_fastop_cl(KVM_FEP, insn, type_t, shift, output, flags);
\
+
\
+ __GUEST_ASSERT(output == ex_output,
\
+ "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx",
\
+ (uint64_t)ex_output, insn, shift, (uint64_t)input,
\
+ (uint64_t)output);
\
+ __GUEST_ASSERT(flags == ex_flags,
\
+ "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx",
\
+ ex_flags, insn, shift, (uint64_t)input, flags);
\
+} while (0)
+
+static const uint64_t vals[] = {
+ 0,
+ 1,
+ 2,
+ 4,
+ 7,
+ 0x5555555555555555,
+ 0xaaaaaaaaaaaaaaaa,
+ 0xfefefefefefefefe,
+ 0xffffffffffffffff,
+};
+
+#define guest_test_fastops(type_t, suffix)
\
+do {
\
+ int i, j;
\
+
\
+ for (i = 0; i < ARRAY_SIZE(vals); i++) {
\
+ guest_test_fastop_1("dec" suffix, type_t, vals[i]);
\
+ guest_test_fastop_1("inc" suffix, type_t, vals[i]);
\
+ guest_test_fastop_1("neg" suffix, type_t, vals[i]);
\
+ guest_test_fastop_1("not" suffix, type_t, vals[i]);
\
+
\
+ for (j = 0; j < ARRAY_SIZE(vals); j++) {
\
+ guest_test_fastop_2("add" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("adc" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("and" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("bsf" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("bsr" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("bt" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("btc" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("btr" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("bts" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("cmp" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("imul" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("or" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("sbb" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("sub" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("test" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_2("xor" suffix, type_t, vals[i],
vals[j]); \
+
\
+ guest_test_fastop_cl("rol" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_cl("ror" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_cl("rcl" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_cl("rcr" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_cl("sar" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_cl("shl" suffix, type_t, vals[i],
vals[j]); \
+ guest_test_fastop_cl("shr" suffix, type_t, vals[i],
vals[j]); \
+ }
\
+ }
\
+} while (0)
+
+static void guest_code(void)
+{
+ guest_test_fastops(uint16_t, "w");
+ guest_test_fastops(uint32_t, "l");
+ guest_test_fastops(uint64_t, "q");
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
base-commit: 661b7ddb2d10258b53106d7c39c309806b00a99c
--