On 2020-11-19 16:25, David Brazdil wrote:
KVM nVHE code runs under a different VA mapping than the kernel, hence
so far it relied only on PC-relative addressing to avoid accidentally
using a relocated kernel VA from a constant pool (see hyp_symbol_addr).

So as to reduce the possibility of a programmer error, fixup the
relocated addresses instead. Let the kernel relocate them to kernel VA
first, but then iterate over them again, filter those that point to hyp
code/data and convert the kernel VA to hyp VA.

This is done after kvm_compute_layout and before apply_alternatives.

Signed-off-by: David Brazdil <dbraz...@google.com>
---
 arch/arm64/include/asm/kvm_mmu.h |  1 +
 arch/arm64/kernel/smp.c          |  4 +-
 arch/arm64/kvm/va_layout.c       | 76 ++++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 5168a0c516ae..e5226f7e4732 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -105,6 +105,7 @@ alternative_cb_end
 void kvm_update_va_mask(struct alt_instr *alt,
                        __le32 *origptr, __le32 *updptr, int nr_inst);
 void kvm_compute_layout(void);
+void kvm_fixup_hyp_relocations(void);

 static __always_inline unsigned long __kern_hyp_va(unsigned long v)
 {
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 18e9727d3f64..30241afc2c93 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
                           "CPU: CPUs started in inconsistent modes");
        else
                pr_info("CPU: All CPU(s) started at EL1\n");
-       if (IS_ENABLED(CONFIG_KVM))
+       if (IS_ENABLED(CONFIG_KVM)) {
                kvm_compute_layout();
+               kvm_fixup_hyp_relocations();
+       }
 }

 void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index d8cc51bd60bf..b80fab974896 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -10,6 +10,7 @@
 #include <asm/alternative.h>
 #include <asm/debug-monitors.h>
 #include <asm/insn.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
 #include <asm/memory.h>

@@ -82,6 +83,81 @@ __init void kvm_compute_layout(void)
        init_hyp_physvirt_offset();
 }

+#define __load_elf_u64(s)                                      \
+       ({                                                      \
+               extern u64 s;                                   \
+               u64 val;                                        \
+                                                               \
+               asm ("ldr %0, =%1" : "=r"(val) : "S"(&s));    \
+               val;                                            \
+       })

I'm not sure I get the rational about the naming here. None of this
has much to do with ELF, but seems to just load a value from a
constant pool.

+
+static bool __is_within_bounds(u64 addr, char *start, char *end)
+{
+       return start <= (char*)addr && (char*)addr < end;
+}
+
+static bool __is_in_hyp_section(u64 addr)
+{
+       return __is_within_bounds(addr, __hyp_text_start, __hyp_text_end) ||
+ __is_within_bounds(addr, __hyp_rodata_start, __hyp_rodata_end) ||
+              __is_within_bounds(addr,
+                                 CHOOSE_NVHE_SYM(__per_cpu_start),
+                                 CHOOSE_NVHE_SYM(__per_cpu_end));
+}
+
+static void __fixup_hyp_rel(u64 addr)
+{
+       u64 *ptr, kern_va, hyp_va;
+
+       /* Adjust the relocation address taken from ELF for KASLR. */
+       addr += kaslr_offset();
+
+       /* Skip addresses not in any of the hyp sections. */
+       if (!__is_in_hyp_section(addr))
+               return;
+
+       /* Get the LM alias of the relocation address. */
+       ptr = (u64*)kvm_ksym_ref((void*)addr);

Why the casting? We should be perfectly fine without.

nit: we really need to change the name of this helper, it doesn't have
anything to do with symbols anymore. And actually, lm_alias() *is* the
right thing to use here (we don't relocate anything on VHE).

+
+       /*
+        * Read the value at the relocation address. It has already been
+        * relocated to the actual kernel kimg VA.
+        */
+       kern_va = (u64)kvm_ksym_ref((void*)*ptr);

Same comment.

+
+       /* Convert to hyp VA. */
+       hyp_va = __early_kern_hyp_va(kern_va);
+
+       /* Store hyp VA at the relocation address. */
+       *ptr = __early_kern_hyp_va(kern_va);
+}
+
+static void __fixup_hyp_rela(void)
+{
+       Elf64_Rela *rel;
+       size_t i, n;
+
+       rel = (Elf64_Rela*)(kimage_vaddr + __load_elf_u64(__rela_offset));
+       n = __load_elf_u64(__rela_size) / sizeof(*rel);
+
+       for (i = 0; i < n; ++i)
+               __fixup_hyp_rel(rel[i].r_offset);
+}
+
+/*
+ * The kernel relocated pointers to kernel VA. Iterate over relocations in + * the hypervisor ELF sections and convert them to hyp VA. This avoids the
+ * need to only use PC-relative addressing in hyp.
+ */
+__init void kvm_fixup_hyp_relocations(void)
+{
+       if (!IS_ENABLED(CONFIG_RELOCATABLE) || has_vhe())

What do we do if CONFIG_RELOCATABLE is not selected? As far as I can tell,
bad things will happen...

I'm also worried that at this stage, the kernel is broken, until you
remove the other bits involved in runtime offsetting pointers.

Thanks,

        M.
--
Jazz is not dead. It just smells funny...

Reply via email to