On 19.06.25 21:42, Mathias Krause wrote:
> KVM has a weird behaviour when a guest executes VMCALL on an AMD system
> or VMMCALL on an Intel CPU. Both naturally generate an invalid opcode
> exception (#UD) as they are just the wrong instruction for the CPU
> given. But instead of forwarding the exception to the guest, KVM tries
> to patch the guest instruction to match the host's actual hypercall
> instruction. That is doomed to fail as read-only code is rather the
> standard these days. But, instead of letting go the patching attempt and
> falling back to #UD injection, KVM injects the page fault instead.
> 
> That's wrong on multiple levels. Not only isn't that a valid exception
> to be generated by these instructions, confusing attempts to handle
> them. It also destroys guest state by doing so, namely the value of CR2.
> 
> Sean attempted to fix that in KVM[1] but the patch was never applied.
> 
> Later, Oliver added a quirk bit in [2] so the behaviour can, at least,
> conceptually be disabled. Paolo even called out to add this very
> functionality to disable the quirk in QEMU[3]. So lets just do it.
> 
> A new property 'hypercall-patching=on|off' is added, for the very
> unlikely case that there are setups that really need the patching.
> However, these would be vulnerable to memory corruption attacks freely
> overwriting code as they please. So, my guess is, there are exactly 0
> systems out there requiring this quirk.
> 
> [1] https://lore.kernel.org/kvm/20211210222903.3417968-1-sea...@google.com/
> [2] https://lore.kernel.org/kvm/20220316005538.2282772-2-oup...@google.com/
> [3] 
> https://lore.kernel.org/kvm/80e1f1d2-2d79-22b7-6665-c00e4fe9c...@redhat.com/
> 
> Cc: Oliver Upton <oliver.up...@linux.dev>
> Cc: Sean Christopherson <sea...@google.com>
> Cc: Paolo Bonzini <pbonz...@redhat.com>
> Signed-off-by: Mathias Krause <mini...@grsecurity.net>
> ---
>  include/system/kvm_int.h |  1 +
>  qemu-options.hx          | 10 ++++++++++
>  target/i386/kvm/kvm.c    | 38 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 49 insertions(+)
> 
> diff --git a/include/system/kvm_int.h b/include/system/kvm_int.h
> index 756a3c0a250e..fd7129824429 100644
> --- a/include/system/kvm_int.h
> +++ b/include/system/kvm_int.h
> @@ -159,6 +159,7 @@ struct KVMState
>      uint64_t kvm_eager_split_size;  /* Eager Page Splitting chunk size */
>      struct KVMDirtyRingReaper reaper;
>      struct KVMMsrEnergy msr_energy;
> +    bool hypercall_patching_enabled;
>      NotifyVmexitOption notify_vmexit;
>      uint32_t notify_window;
>      uint32_t xen_version;
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 1f862b19a676..c2e232649c19 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -231,6 +231,7 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel,
>      "                dirty-ring-size=n (KVM dirty ring GFN count, default 
> 0)\n"
>      "                eager-split-size=n (KVM Eager Page Split chunk size, 
> default 0, disabled. ARM only)\n"
>      "                
> notify-vmexit=run|internal-error|disable,notify-window=n (enable notify VM 
> exit and set notify window, x86 only)\n"
> +    "                hypercall-patching=on|off (enable KVM's VMCALL/VMMCALL 
> hypercall patching quirk, x86 only)\n"
>      "                thread=single|multi (enable multi-threaded TCG)\n"
>      "                device=path (KVM device path, default /dev/kvm)\n", 
> QEMU_ARCH_ALL)
>  SRST
> @@ -313,6 +314,15 @@ SRST
>          open up for a specified of time (i.e. notify-window).
>          Default: notify-vmexit=run,notify-window=0.
>  
> +    ``hypercall-patching=on|off``
> +        KVM tries to recover from the wrong hypercall instruction being used 
> by
> +        a guest by attempting to rewrite it to the one supported natively by
> +        the host CPU (VMCALL on Intel, VMMCALL for AMD systems). However, 
> this
> +        patching may fail if the guest memory is write protected, leading to 
> a
> +        page fault getting propagated to the guest instead of an illegal
> +        instruction exception. As this may confuse guests, it gets disabled 
> by
> +        default (x86 only).
> +
>      ``device=path``
>          Sets the path to the KVM device node. Defaults to ``/dev/kvm``. This
>          option can be used to pass the KVM device to use via a file 
> descriptor
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 56a6b9b6381a..6f5f3b95e553 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -3224,6 +3224,19 @@ static int kvm_vm_enable_energy_msrs(KVMState *s)
>      return 0;
>  }
>  
> +static int kvm_vm_disable_hypercall_patching(KVMState *s)
> +{
> +    int valid_quirks = kvm_vm_check_extension(s, KVM_CAP_DISABLE_QUIRKS2);
> +
> +    if (valid_quirks & KVM_X86_QUIRK_FIX_HYPERCALL_INSN) {
> +        return kvm_vm_enable_cap(s, KVM_CAP_DISABLE_QUIRKS2, 0,
> +                                 KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
> +    }
> +
> +    warn_report("kvm: disabling hypercall patching not supported");
> +    return 0;
> +}
> +
>  int kvm_arch_init(MachineState *ms, KVMState *s)
>  {
>      int ret;
> @@ -3363,6 +3376,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>          }
>      }
>  
> +    if (s->hypercall_patching_enabled == false) {
> +        if (kvm_vm_disable_hypercall_patching(s)) {
> +            warn_report("kvm: failed to disable hypercall patching quirk");
> +        }
> +    }
> +
>      return 0;
>  }
>  
> @@ -6456,6 +6475,19 @@ void kvm_request_xsave_components(X86CPU *cpu, 
> uint64_t mask)
>      }
>  }
>  
> +static bool kvm_arch_get_hypercall_patching(Object *obj, Error **errp)
> +{
> +    KVMState *s = KVM_STATE(obj);
> +    return s->hypercall_patching_enabled;
> +}
> +
> +static void kvm_arch_set_hypercall_patching(Object *obj, bool value,
> +                                            Error **errp)
> +{
> +    KVMState *s = KVM_STATE(obj);
> +    s->hypercall_patching_enabled = value;
> +}
> +
>  static int kvm_arch_get_notify_vmexit(Object *obj, Error **errp)
>  {
>      KVMState *s = KVM_STATE(obj);
> @@ -6589,6 +6621,12 @@ static void kvm_arch_set_xen_evtchn_max_pirq(Object 
> *obj, Visitor *v,
>  
>  void kvm_arch_accel_class_init(ObjectClass *oc)
>  {
> +    object_class_property_add_bool(oc, "hypercall-patching",
> +                                   kvm_arch_get_hypercall_patching,
> +                                   kvm_arch_set_hypercall_patching);
> +    object_class_property_set_description(oc, "hypercall-patching",
> +                                          "Enable hypercall patching quirk");
> +
>      object_class_property_add_enum(oc, "notify-vmexit", "NotifyVMexitOption",
>                                     &NotifyVmexitOption_lookup,
>                                     kvm_arch_get_notify_vmexit,

Ping! Paolo, can we get this weird and unexpected behaviour of KVM get
disabled by default, please?

Thanks,
Mathias

Reply via email to