On Tuesday 06 May 2008 03:06:23 Kay, Allen M wrote:
> Kvm kernel changes.
>
> Signed-off-by: Allen M Kay <[EMAIL PROTECTED]>
> --- /dev/null
> +++ b/arch/x86/kvm/vtd.c
> @@ -0,0 +1,183 @@
> +
> +#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
> +
> +struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev
> *dev);
> +struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu);
> +void iommu_free_domain(struct dmar_domain *domain);
> +int domain_init(struct dmar_domain *domain, int guest_width);
> +int domain_context_mapping(struct dmar_domain *d,
> + struct pci_dev *pdev);
> +int domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
> + u64 hpa, size_t size, int prot);
> +void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8
> devfn);
> +struct dmar_domain * find_domain(struct pci_dev *pdev);
Please move these to a .h file and also prefix appropriate keywords:
domain_context_mapping is confusing and since it's an intel iommu-only thing,
use something like
intel_iommu_domain_context_mapping
> +int kvm_iommu_map_pages(struct kvm *kvm,
> + gfn_t base_gfn, unsigned long npages)
> +{
> + unsigned long gpa;
> + struct page *page;
> + hpa_t hpa;
> + int j, write;
> + struct vm_area_struct *vma;
> +
> + if (!kvm->arch.domain)
> + return 1;
> +
> + gpa = base_gfn << PAGE_SHIFT;
> + page = gfn_to_page(kvm, base_gfn);
> + hpa = page_to_phys(page);
> +
> + printk(KERN_DEBUG "kvm_iommu_map_page: gpa = %lx\n", gpa);
> + printk(KERN_DEBUG "kvm_iommu_map_page: hpa = %llx\n", hpa);
> + printk(KERN_DEBUG "kvm_iommu_map_page: size = %lx\n",
> + npages*PAGE_SIZE);
> +
> + for (j = 0; j < npages; j++) {
> + gpa += PAGE_SIZE;
> + page = gfn_to_page(kvm, gpa >> PAGE_SHIFT);
> + hpa = page_to_phys(page);
> + domain_page_mapping(kvm->arch.domain, gpa, hpa,
> PAGE_SIZE,
> + DMA_PTE_READ | DMA_PTE_WRITE);
> + vma = find_vma(current->mm, gpa);
> + if (!vma)
> + return 1;
*
> + write = (vma->vm_flags & VM_WRITE) != 0;
> + get_user_pages(current, current->mm, gpa,
> + PAGE_SIZE, write, 0, NULL, NULL);
You should put_page each of the user pages when freeing or exiting (in
unmap_guest), else a ref is held on each page and that's a lot of memory
leaked.
Also, this rules out any form of guest swapping. You should put_page in case a
balloon driver in the guest tries to free some pages for the host.
> + }
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_iommu_map_pages);
> +
> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> + int i, status;
> + for (i = 0; i < kvm->nmemslots; i++) {
> + status = kvm_iommu_map_pages(kvm,
> kvm->memslots[i].base_gfn,
> + kvm->memslots[i].npages);
> + if (status)
> + return status;
*
> + }
> + return 0;
> +}
> +
> +int kvm_iommu_map_guest(struct kvm *kvm,
> + struct kvm_pci_passthrough_dev *pci_pt_dev)
> +{
> + struct dmar_drhd_unit *drhd;
> + struct dmar_domain *domain;
> + struct intel_iommu *iommu;
> + struct pci_dev *pdev = NULL;
> +
> + printk(KERN_DEBUG "kvm_iommu_map_guest: host bdf = %x:%x:%x\n",
> + pci_pt_dev->host.busnr,
> + PCI_SLOT(pci_pt_dev->host.devfn),
> + PCI_FUNC(pci_pt_dev->host.devfn));
> +
> + for_each_pci_dev(pdev) {
> + if ((pdev->bus->number == pci_pt_dev->host.busnr) &&
> + (pdev->devfn == pci_pt_dev->host.devfn))
> + goto found;
> + }
You can use pci_get_device instead of going through the list yourself.
> + goto not_found;
> +found:
> + pci_pt_dev->pdev = pdev;
> +
> + drhd = dmar_find_matched_drhd_unit(pdev);
> + if (!drhd) {
> + printk(KERN_ERR "kvm_iommu_map_guest: drhd == NULL\n");
> + goto not_found;
> + }
> +
> + printk(KERN_DEBUG "kvm_iommu_map_guest: reg_base_addr = %llx\n",
> + drhd->reg_base_addr);
> +
> + iommu = drhd->iommu;
> + if (!iommu) {
> + printk(KERN_ERR "kvm_iommu_map_guest: iommu == NULL\n");
> + goto not_found;
> + }
> + domain = iommu_alloc_domain(iommu);
> + if (!domain) {
> + printk(KERN_ERR "kvm_iommu_map_guest: domain ==
> NULL\n");
> + goto not_found;
> + }
> + if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
> + printk(KERN_ERR "kvm_iommu_map_guest: domain_init()
> failed\n");
> + goto not_found;
Memory allocated in iommu_alloc_domain is leaked in this case
> + }
> + kvm->arch.domain = domain;
> + kvm_iommu_map_memslots(kvm);
*: You don't check for failure in mapping
> + domain_context_mapping(kvm->arch.domain, pdev);
> + return 0;
> +not_found:
> + return 1;
> +}
> +EXPORT_SYMBOL_GPL(kvm_iommu_map_guest);
> +
> +int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> + struct dmar_domain *domain;
> + struct kvm_pci_pt_dev_list *entry;
> + struct pci_dev *pdev;
> +
> + list_for_each_entry(entry, &kvm->arch.domain->devices, list) {
> + printk(KERN_DEBUG "kvm_iommu_unmap_guest: %x:%x:%x\n",
> + entry->pt_dev.host.busnr,
> + PCI_SLOT(entry->pt_dev.host.devfn),
> + PCI_FUNC(entry->pt_dev.host.devfn));
> +
> + pdev = entry->pt_dev.pdev;
> +
> + if (pdev == NULL) {
> + printk("kvm_iommu_unmap_guest: pdev == NULL\n");
> + return 1;
> + }
> +
> + /* detach kvm dmar domain */
> + detach_domain_for_dev(kvm->arch.domain,
> + pdev->bus->number, pdev->devfn);
> +
> + /* now restore back linux iommu domain */
> + domain = find_domain(pdev);
> + if (domain)
> + domain_context_mapping(domain, pdev);
> + else
> + printk(KERN_DEBUG
> + "kvm_iommu_unmap_guest: domain ==
> NULL\n");
> + }
> + /* unmap guest memory in vt-d page table */
> + iommu_free_domain(kvm->arch.domain);
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_iommu_unmap_guest);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a97d2e2..a877db2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -257,6 +257,7 @@ static void kvm_free_pci_passthrough(struct kvm
> *kvm)
>
> list_del(&pci_pt_dev->list);
> }
> + kvm->arch.domain = NULL;
> }
>
> unsigned long segment_base(u16 selector)
> @@ -1846,6 +1847,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
> if (copy_from_user(&pci_pt_dev, argp, sizeof
> pci_pt_dev))
> goto out;
>
> + r = kvm_iommu_map_guest(kvm, &pci_pt_dev);
> + if (r)
> + goto out;
> +
> r = kvm_vm_ioctl_pci_pt_dev(kvm, &pci_pt_dev);
> if (r)
> goto out;
If the ioctl fails, you don't "unmap" the guest (and also leak memory). I
suggest you call the map guest routine after the ioctl since the ioctl also
checks if a similar device has already been added and populates the
structure. In that case, you should call kvm_free_pci_passthrough() on
unsuccessful iommu mapping
Looks like you're leaking memory and failing to unmap or free the domain in
all the error paths.
> @@ -4088,6 +4093,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
>
> void kvm_arch_destroy_vm(struct kvm *kvm)
> {
> + if (kvm->arch.domain)
> + kvm_iommu_unmap_guest(kvm);
This condition can be checked for inside the unmap guest routine.
iommu_unmap_guest can be called unconditionally.
> kvm_free_pci_passthrough(kvm);
> kvm_free_pit(kvm);
> kfree(kvm->arch.vpic);
> diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> index 4662d49..70248cb 100644
> --- a/include/asm-x86/kvm_host.h
> +++ b/include/asm-x86/kvm_host.h
> @@ -19,6 +19,8 @@
> #include <linux/kvm_types.h>
>
> #include <asm/desc.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
>
> #define KVM_MAX_VCPUS 16
> #define KVM_MEMORY_SLOTS 32
> @@ -318,6 +320,7 @@ struct kvm_arch{
> */
> struct list_head active_mmu_pages;
> struct list_head pci_pt_dev_head;
> + struct dmar_domain *domain;
Use a descriptive name, like intel_iommu_domain.
> struct kvm_pic *vpic;
> struct kvm_ioapic *vioapic;
> struct kvm_pit *vpit;
> diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
> index 5f93b78..6202ed1 100644
> --- a/include/asm-x86/kvm_para.h
> +++ b/include/asm-x86/kvm_para.h
> @@ -170,5 +170,6 @@ struct kvm_pci_pt_info {
> struct kvm_pci_passthrough_dev {
> struct kvm_pci_pt_info guest;
> struct kvm_pci_pt_info host;
> + struct pci_dev *pdev; /* kernel device pointer for host dev
> */
> };
> #endif
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 4e16682..bcfcf78 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -276,6 +276,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
> int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
> void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
>
> +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
> + unsigned long npages);
> +int kvm_iommu_map_guest(struct kvm *kvm,
> + struct kvm_pci_passthrough_dev *pci_pt_dev);
> +int kvm_iommu_unmap_guest(struct kvm *kvm);
> +
Why can't these be put in asm-x86/kvm_host.h?
> static inline void kvm_guest_enter(void)
> {
> account_system_vtime(current);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index d3cb4cc..e46614a 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -309,6 +309,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
> new.npages = npages;
> new.flags = mem->flags;
>
> + /* map the pages in iommu page table */
(if one exists)
> + kvm_iommu_map_pages(kvm, base_gfn, npages);
> +
You should do this once all the memory is set up properly (which is just
before returning 0)
> /* Disallow changing a memory slot's size. */
> r = -EINVAL;
> if (npages && old.npa
Amit
-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference
Don't miss this year's exciting event. There's still time to save $100.
Use priority code J8TL2D2.
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel