[RFC 03/11] KVM: Allow VM lifecycle management without userspace

2017-08-25 Thread Florent Revest
The current codebase of KVM makes many assumptions regarding the origin of
the virtual machine being executed or configured. Indeed, the KVM API
implementation has been written with userspace usage in mind and lots of
userspace-specific code is used (namely preempt_notifiers, eventfd, mmu
notifiers, current->mm...)

The aim of this patch is to make the KVM API (create_vm, create_vcpu etc)
usable from a kernel context. A simple trick is used to distinguish
userspace VMs (coming from QEMU or LKVM...) from internal VMs. (coming
from other subsystems, for example for sandboxing purpose):
  - When a VM is created from an ioctl, kvm->mm is set to current->mm
  - When a VM is created from the kernel, kvm->mm must be set to NULL

This ensures that no userspace program can create internal VMs and allows
to easily check whether a given VM is attached to a process or is internal.

This patch simply encloses the userspace-specific pieces of code of
kvm_main in conditions checking if kvm->mm is present and modifies the
prototype of kvm_create_vm to enable NULL mm.

Signed-off-by: Florent Revest 
---
 virt/kvm/kvm_main.c | 64 ++---
 1 file changed, 41 insertions(+), 23 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 15252d7..2e7af1a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -154,7 +154,8 @@ int vcpu_load(struct kvm_vcpu *vcpu)
if (mutex_lock_killable(>mutex))
return -EINTR;
cpu = get_cpu();
-   preempt_notifier_register(>preempt_notifier);
+   if (vcpu->kvm->mm)
+   preempt_notifier_register(>preempt_notifier);
kvm_arch_vcpu_load(vcpu, cpu);
put_cpu();
return 0;
@@ -165,7 +166,8 @@ void vcpu_put(struct kvm_vcpu *vcpu)
 {
preempt_disable();
kvm_arch_vcpu_put(vcpu);
-   preempt_notifier_unregister(>preempt_notifier);
+   if (vcpu->kvm->mm)
+   preempt_notifier_unregister(>preempt_notifier);
preempt_enable();
mutex_unlock(>mutex);
 }
@@ -640,7 +642,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
return 0;
 }

-static struct kvm *kvm_create_vm(unsigned long type)
+static struct kvm *kvm_create_vm(unsigned long type, struct mm_struct *mm)
 {
int r, i;
struct kvm *kvm = kvm_arch_alloc_vm();
@@ -649,9 +651,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
return ERR_PTR(-ENOMEM);

spin_lock_init(>mmu_lock);
-   mmgrab(current->mm);
-   kvm->mm = current->mm;
-   kvm_eventfd_init(kvm);
+   kvm->mm = mm;
+   if (mm) {
+   mmgrab(current->mm);
+   kvm_eventfd_init(kvm);
+   }
mutex_init(>lock);
mutex_init(>irq_lock);
mutex_init(>slots_lock);
@@ -697,15 +701,18 @@ static struct kvm *kvm_create_vm(unsigned long type)
goto out_err;
}

-   r = kvm_init_mmu_notifier(kvm);
-   if (r)
-   goto out_err;
+   if (mm) {
+   r = kvm_init_mmu_notifier(kvm);
+   if (r)
+   goto out_err;
+   }

spin_lock(_lock);
list_add(>vm_list, _list);
spin_unlock(_lock);

-   preempt_notifier_inc();
+   if (mm)
+   preempt_notifier_inc();

return kvm;

@@ -721,7 +728,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
kvm_arch_free_vm(kvm);
-   mmdrop(current->mm);
+   if (mm)
+   mmdrop(mm);
return ERR_PTR(r);
 }

@@ -772,9 +780,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
cleanup_srcu_struct(>irq_srcu);
cleanup_srcu_struct(>srcu);
kvm_arch_free_vm(kvm);
-   preempt_notifier_dec();
+   if (mm)
+   preempt_notifier_dec();
hardware_disable_all();
-   mmdrop(mm);
+   if (mm)
+   mmdrop(mm);
 }

 void kvm_get_kvm(struct kvm *kvm)
@@ -1269,6 +1279,9 @@ unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t 
gfn)
if (kvm_is_error_hva(addr))
return PAGE_SIZE;

+   if (!kvm->mm)
+   return PAGE_SIZE;
+
down_read(>mm->mmap_sem);
vma = find_vma(current->mm, addr);
if (!vma)
@@ -2486,9 +2499,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 
id)
if (r)
goto vcpu_destroy;

-   r = kvm_create_vcpu_debugfs(vcpu);
-   if (r)
-   goto vcpu_destroy;
+   if (kvm->mm) {
+   r = kvm_create_vcpu_debugfs(vcpu);
+   if (r)
+   goto vcpu_destroy;
+   }

mutex_lock(>lock);
if (kvm_get_vcpu_by_id(kvm, id)) {
@@ -2499,11 +2514,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, 
u32 id)

[RFC 03/11] KVM: Allow VM lifecycle management without userspace

2017-08-25 Thread Florent Revest
The current codebase of KVM makes many assumptions regarding the origin of
the virtual machine being executed or configured. Indeed, the KVM API
implementation has been written with userspace usage in mind and lots of
userspace-specific code is used (namely preempt_notifiers, eventfd, mmu
notifiers, current->mm...)

The aim of this patch is to make the KVM API (create_vm, create_vcpu etc)
usable from a kernel context. A simple trick is used to distinguish
userspace VMs (coming from QEMU or LKVM...) from internal VMs. (coming
from other subsystems, for example for sandboxing purpose):
  - When a VM is created from an ioctl, kvm->mm is set to current->mm
  - When a VM is created from the kernel, kvm->mm must be set to NULL

This ensures that no userspace program can create internal VMs and allows
to easily check whether a given VM is attached to a process or is internal.

This patch simply encloses the userspace-specific pieces of code of
kvm_main in conditions checking if kvm->mm is present and modifies the
prototype of kvm_create_vm to enable NULL mm.

Signed-off-by: Florent Revest 
---
 virt/kvm/kvm_main.c | 64 ++---
 1 file changed, 41 insertions(+), 23 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 15252d7..2e7af1a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -154,7 +154,8 @@ int vcpu_load(struct kvm_vcpu *vcpu)
if (mutex_lock_killable(>mutex))
return -EINTR;
cpu = get_cpu();
-   preempt_notifier_register(>preempt_notifier);
+   if (vcpu->kvm->mm)
+   preempt_notifier_register(>preempt_notifier);
kvm_arch_vcpu_load(vcpu, cpu);
put_cpu();
return 0;
@@ -165,7 +166,8 @@ void vcpu_put(struct kvm_vcpu *vcpu)
 {
preempt_disable();
kvm_arch_vcpu_put(vcpu);
-   preempt_notifier_unregister(>preempt_notifier);
+   if (vcpu->kvm->mm)
+   preempt_notifier_unregister(>preempt_notifier);
preempt_enable();
mutex_unlock(>mutex);
 }
@@ -640,7 +642,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
return 0;
 }

-static struct kvm *kvm_create_vm(unsigned long type)
+static struct kvm *kvm_create_vm(unsigned long type, struct mm_struct *mm)
 {
int r, i;
struct kvm *kvm = kvm_arch_alloc_vm();
@@ -649,9 +651,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
return ERR_PTR(-ENOMEM);

spin_lock_init(>mmu_lock);
-   mmgrab(current->mm);
-   kvm->mm = current->mm;
-   kvm_eventfd_init(kvm);
+   kvm->mm = mm;
+   if (mm) {
+   mmgrab(current->mm);
+   kvm_eventfd_init(kvm);
+   }
mutex_init(>lock);
mutex_init(>irq_lock);
mutex_init(>slots_lock);
@@ -697,15 +701,18 @@ static struct kvm *kvm_create_vm(unsigned long type)
goto out_err;
}

-   r = kvm_init_mmu_notifier(kvm);
-   if (r)
-   goto out_err;
+   if (mm) {
+   r = kvm_init_mmu_notifier(kvm);
+   if (r)
+   goto out_err;
+   }

spin_lock(_lock);
list_add(>vm_list, _list);
spin_unlock(_lock);

-   preempt_notifier_inc();
+   if (mm)
+   preempt_notifier_inc();

return kvm;

@@ -721,7 +728,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
kvm_arch_free_vm(kvm);
-   mmdrop(current->mm);
+   if (mm)
+   mmdrop(mm);
return ERR_PTR(r);
 }

@@ -772,9 +780,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
cleanup_srcu_struct(>irq_srcu);
cleanup_srcu_struct(>srcu);
kvm_arch_free_vm(kvm);
-   preempt_notifier_dec();
+   if (mm)
+   preempt_notifier_dec();
hardware_disable_all();
-   mmdrop(mm);
+   if (mm)
+   mmdrop(mm);
 }

 void kvm_get_kvm(struct kvm *kvm)
@@ -1269,6 +1279,9 @@ unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t 
gfn)
if (kvm_is_error_hva(addr))
return PAGE_SIZE;

+   if (!kvm->mm)
+   return PAGE_SIZE;
+
down_read(>mm->mmap_sem);
vma = find_vma(current->mm, addr);
if (!vma)
@@ -2486,9 +2499,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 
id)
if (r)
goto vcpu_destroy;

-   r = kvm_create_vcpu_debugfs(vcpu);
-   if (r)
-   goto vcpu_destroy;
+   if (kvm->mm) {
+   r = kvm_create_vcpu_debugfs(vcpu);
+   if (r)
+   goto vcpu_destroy;
+   }

mutex_lock(>lock);
if (kvm_get_vcpu_by_id(kvm, id)) {
@@ -2499,11 +2514,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, 
u32 id)
BUG_ON(kvm->vcpus[atomic_read(>online_vcpus)]);