On Mon, Jun 09, 2008 at 05:43:15PM -0700, Kay, Allen M wrote:
> vt-d specific files in KVM for contructing vt-d page tables and
> programming vt-d context entries.

Hi Allen,

Some comments below, patches will follow up.
 
> Signed-off-by: Allen M. Kay <[EMAIL PROTECTED]>

>
> diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
> new file mode 100644
> index 0000000..634802c
> --- /dev/null
> +++ b/arch/x86/kvm/vtd.c
> @@ -0,0 +1,197 @@
> +/*
> + * Copyright (c) 2006, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along 
> with
> + * this program; if not, write to the Free Software Foundation, Inc., 59 
> Temple
> + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + *
> + * Copyright (C) 2006-2008 Intel Corporation
> + * Author: Allen M. Kay <[EMAIL PROTECTED]>
> + * Author: Weidong Han <[EMAIL PROTECTED]>
> + */
> +
> +#include <linux/list.h>
> +#include <linux/kvm_host.h>
> +#include <linux/pci.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
> +#include "vtd.h"
> +
> +int kvm_iommu_map_pages(struct kvm *kvm,
> +     gfn_t base_gfn, unsigned long npages)
> +{
> +     gfn_t gfn = base_gfn;
> +     pfn_t pfn;
> +     struct page *page;
> +     int i, rc;
> +
> +     if (!kvm->arch.domain)
> +             return -EFAULT;
> +
> +     printk(KERN_DEBUG "kvm_iommu_map_page: gpa = %lx\n",
> +             gfn << PAGE_SHIFT);
> +     printk(KERN_DEBUG "kvm_iommu_map_page: hpa = %lx\n",
> +             gfn_to_pfn(kvm, base_gfn) << PAGE_SHIFT);
> +     printk(KERN_DEBUG "kvm_iommu_map_page: size = %lx\n",
> +             npages*PAGE_SIZE);
> +
> +     for (i = 0; i < npages; i++) {
> +             pfn = gfn_to_pfn(kvm, gfn);
> +             if (pfn_valid(pfn)) {

Checking against pfn_valid() isn't enough to differentiate between RAM
and MMIO areas. I think the consensus was that we also need to check
PageReserved(), i.e.,

if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) ...
 
> +                     rc = kvm_intel_iommu_page_mapping(kvm->arch.domain,
> +                             gfn << PAGE_SHIFT, pfn << PAGE_SHIFT,
> +                             PAGE_SIZE, DMA_PTE_READ | DMA_PTE_WRITE);
> +                     if (rc) {
> +                             page = gfn_to_page(kvm, gfn);
> +                             put_page(page);

If we fail to map some of the domain's memory, shouldn't we bail out
of giving it pass-through access at all?

> +                     }
> +             } else {
> +                     printk(KERN_DEBUG "kvm_iommu_map_page:"
> +                             "invalid pfn=%lx\n", pfn);
> +                     return 0;

I think we should BUG_ON() (or at least WARN_ON()) if we hit a slot
that has both RAM and an MMIO region. 

> +             }
> +             gfn++;
> +     }
> +     return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_iommu_map_pages);
> +
> +static int kvm_iommu_map_memslots(struct kvm *kvm)
> +{
> +     int i, rc;
> +     for (i = 0; i < kvm->nmemslots; i++) {
> +             rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
> +                             kvm->memslots[i].npages);
> +             if (rc)
> +                     return rc;
> +     }
> +     return 0;
> +}
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +     struct kvm_pci_passthrough_dev *pci_pt_dev)
> +{
> +     struct pci_dev *pdev = NULL;
> +
> +     printk(KERN_DEBUG "kvm_iommu_map_guest: host bdf = %x:%x:%x\n",
> +             pci_pt_dev->host.busnr,
> +             PCI_SLOT(pci_pt_dev->host.devfn),
> +             PCI_FUNC(pci_pt_dev->host.devfn));
> +
> +     for_each_pci_dev(pdev) {
> +             if ((pdev->bus->number == pci_pt_dev->host.busnr) &&
> +                     (pdev->devfn == pci_pt_dev->host.devfn))
> +                     goto found;

We can stick the `found' stanza in a seperate function and call it
here, which gets rid of one goto.

> +     }
> +     if (kvm->arch.domain) {
> +             kvm_intel_iommu_domain_exit(kvm->arch.domain);
> +             kvm->arch.domain = NULL;
> +     }
> +     return -ENODEV;
> +found:
> +     kvm->arch.domain = kvm_intel_iommu_domain_alloc(pdev);
> +     if (kvm->arch.domain == NULL)
> +             printk(KERN_WARN "kvm_iommu_map_guest: domain == NULL\n");
> +     else
> +             printk(KERN_INFO "kvm_iommu_map_guest: domain = %p\n",
> +                     kvm->arch.domain);
> +     if (kvm_iommu_map_memslots(kvm)) {

We shouldn't call map_memslots if domain == NULL.

> +             kvm_iommu_unmap_memslots(kvm);
> +             return -EFAULT;
> +     }
> +     kvm_intel_iommu_context_mapping(kvm->arch.domain, pdev);
> +     return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_iommu_map_guest);
> +
> +static int kvm_iommu_put_pages(struct kvm *kvm,
> +     gfn_t base_gfn, unsigned long npages)
> +{
> +     gfn_t gfn = base_gfn;
> +     struct page *page;
> +     int i;
> +
> +     if (!kvm->arch.domain)
> +             return -EFAULT;
> +
> +     printk(KERN_DEBUG "kvm_iommu_put_pages: gpa = %lx\n",
> +             gfn << PAGE_SHIFT);
> +     printk(KERN_DEBUG "kvm_iommu_put_pages: hpa = %lx\n",
> +             gfn_to_pfn(kvm, gfn) << PAGE_SHIFT);
> +     printk(KERN_DEBUG "kvm_iommu_put_pages: size = %lx\n",
> +             npages*PAGE_SIZE);
> +
> +     for (i = 0; i < npages; i++) {
> +             page = gfn_to_page(kvm, gfn);
> +             put_page(page);
> +             gfn++;
> +     }
> +     return 0;
> +}
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm)
> +{
> +     int i, rc;
> +     for (i = 0; i < kvm->nmemslots; i++) {
> +             rc = kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
> +                             kvm->memslots[i].npages);
> +             if (rc)
> +                     return rc;
> +     }
> +     return 0;
> +}
> +
> +int kvm_iommu_unmap_guest(struct kvm *kvm)
> +{
> +     struct dmar_domain *domain;
> +     struct kvm_pci_pt_dev_list *entry;
> +     struct pci_dev *pdev = NULL;
> +
> +     if (kvm->arch.domain)
> +             return 0;
> +
> +     list_for_each_entry(entry, &kvm->arch.pci_pt_dev_head, list) {
> +             printk(KERN_DEBUG "kvm_iommu_unmap_guest: %x:%x:%x\n",
> +                     entry->pt_dev.host.busnr,
> +                     PCI_SLOT(entry->pt_dev.host.devfn),
> +                     PCI_FUNC(entry->pt_dev.host.devfn));
> +
> +             for_each_pci_dev(pdev) {
> +                     if ((pdev->bus->number == entry->pt_dev.host.busnr) &&
> +                             (pdev->devfn == entry->pt_dev.host.devfn))
> +                             goto found;
> +             }
> +             return -ENODEV;
> +found:

Same comment as above.

Cheers,
Muli
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to