Add an alternate domain ops for type IOMMU_DOMAIN_KVM.  This type is
intended for use when KVM is managing the IOMMU domain on behalf of a
VM.  Mapping can only be performed once a KVM is registered with the
domain as well as a guest IOTA (address translation anchor).

The map operation is expected to be received in response to an
04 intercept of a guest RPCIT instruction, and will perform a
synchronization operation between the host DMA and guest DMA tables
over the range specified.

Signed-off-by: Matthew Rosato <mjros...@linux.ibm.com>
---
 arch/s390/include/asm/kvm_pci.h |   6 +
 arch/s390/include/asm/pci_dma.h |   3 +
 drivers/iommu/Kconfig           |   8 +
 drivers/iommu/Makefile          |   1 +
 drivers/iommu/s390-iommu.c      |  49 ++--
 drivers/iommu/s390-iommu.h      |  53 ++++
 drivers/iommu/s390-kvm-iommu.c  | 469 ++++++++++++++++++++++++++++++++
 7 files changed, 562 insertions(+), 27 deletions(-)
 create mode 100644 drivers/iommu/s390-iommu.h
 create mode 100644 drivers/iommu/s390-kvm-iommu.c

diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
index ae8669105f72..ebc0da5d9ac1 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -11,6 +11,7 @@
 #define ASM_KVM_PCI_H
 
 #include <linux/types.h>
+#include <linux/iommu.h>
 #include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
@@ -19,9 +20,14 @@
 struct kvm_zdev {
        struct zpci_dev *zdev;
        struct kvm *kvm;
+       struct iommu_domain *dom; /* Used to invoke IOMMU API for RPCIT */
 };
 
 int kvm_s390_pci_dev_open(struct zpci_dev *zdev);
 void kvm_s390_pci_dev_release(struct zpci_dev *zdev);
 
+int zpci_iommu_attach_kvm(struct zpci_dev *zdev, struct kvm *kvm);
+int zpci_iommu_kvm_assign_iota(struct zpci_dev *zdev, u64 iota);
+int zpci_iommu_kvm_remove_iota(struct zpci_dev *zdev);
+
 #endif /* ASM_KVM_PCI_H */
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 91e63426bdc5..38004e0a4383 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -50,6 +50,9 @@ enum zpci_ioat_dtype {
 #define ZPCI_TABLE_ALIGN               ZPCI_TABLE_SIZE
 #define ZPCI_TABLE_ENTRY_SIZE          (sizeof(unsigned long))
 #define ZPCI_TABLE_ENTRIES             (ZPCI_TABLE_SIZE / 
ZPCI_TABLE_ENTRY_SIZE)
+#define ZPCI_TABLE_PAGES               (ZPCI_TABLE_SIZE >> PAGE_SHIFT)
+#define ZPCI_TABLE_ENTRIES_PAGES       (ZPCI_TABLE_ENTRIES * ZPCI_TABLE_PAGES)
+#define ZPCI_TABLE_ENTRIES_PER_PAGE    (ZPCI_TABLE_ENTRIES / ZPCI_TABLE_PAGES)
 
 #define ZPCI_TABLE_BITS                        11
 #define ZPCI_PT_BITS                   8
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 3eb68fa1b8cc..9637f73925ec 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -411,6 +411,14 @@ config S390_AP_IOMMU
          Enables bits of IOMMU API required by VFIO. The iommu_ops
          is not implemented as it is not necessary for VFIO.
 
+config S390_KVM_IOMMU
+       bool "S390 KVM IOMMU Support"
+       depends on S390_IOMMU && KVM || COMPILE_TEST
+       select IOMMU_API
+       help
+         Extends the S390 IOMMU API to support a domain owned and managed by
+         KVM. This allows KVM to manage nested mappings vs userspace.
+
 config MTK_IOMMU
        tristate "MediaTek IOMMU Support"
        depends on ARCH_MEDIATEK || COMPILE_TEST
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index bc7f730edbb0..5476e978d7f5 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
 obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
+obj-$(CONFIG_S390_KVM_IOMMU) += s390-kvm-iommu.o
 obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
 obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
 obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 73a85c599dc2..0ead37f6e232 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -11,6 +11,7 @@
 #include <linux/iommu-helper.h>
 #include <linux/sizes.h>
 #include <asm/pci_dma.h>
+#include "s390-iommu.h"
 
 /*
  * Physically contiguous memory regions can be mapped with 4 KiB alignment,
@@ -21,24 +22,6 @@
 
 static const struct iommu_ops s390_iommu_ops;
 
-struct s390_domain {
-       struct iommu_domain     domain;
-       struct list_head        devices;
-       unsigned long           *dma_table;
-       spinlock_t              dma_table_lock;
-       spinlock_t              list_lock;
-};
-
-struct s390_domain_device {
-       struct list_head        list;
-       struct zpci_dev         *zdev;
-};
-
-static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
-{
-       return container_of(dom, struct s390_domain, domain);
-}
-
 static bool s390_iommu_capable(enum iommu_cap cap)
 {
        switch (cap) {
@@ -55,7 +38,12 @@ static struct iommu_domain *s390_domain_alloc(unsigned 
domain_type)
 {
        struct s390_domain *s390_domain;
 
-       if (domain_type != IOMMU_DOMAIN_UNMANAGED)
+       if (domain_type != IOMMU_DOMAIN_UNMANAGED &&
+           domain_type != IOMMU_DOMAIN_KVM)
+               return NULL;
+
+       if (domain_type == IOMMU_DOMAIN_KVM &&
+           !IS_ENABLED(CONFIG_S390_KVM_IOMMU))
                return NULL;
 
        s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
@@ -68,23 +56,30 @@ static struct iommu_domain *s390_domain_alloc(unsigned 
domain_type)
                return NULL;
        }
 
+       /* If KVM-managed, swap in alternate ops now */
+       if (IS_ENABLED(CONFIG_S390_KVM_IOMMU) &&
+           domain_type == IOMMU_DOMAIN_KVM)
+               s390_domain->domain.ops = &s390_kvm_domain_ops;
+
        spin_lock_init(&s390_domain->dma_table_lock);
        spin_lock_init(&s390_domain->list_lock);
+       mutex_init(&s390_domain->kvm_dom.ioat_lock);
        INIT_LIST_HEAD(&s390_domain->devices);
 
        return &s390_domain->domain;
 }
 
-static void s390_domain_free(struct iommu_domain *domain)
+void s390_domain_free(struct iommu_domain *domain)
 {
        struct s390_domain *s390_domain = to_s390_domain(domain);
 
        dma_cleanup_tables(s390_domain->dma_table);
+       mutex_destroy(&s390_domain->kvm_dom.ioat_lock);
        kfree(s390_domain);
 }
 
-static int s390_iommu_attach_device(struct iommu_domain *domain,
-                                   struct device *dev)
+int s390_iommu_attach_device(struct iommu_domain *domain,
+                            struct device *dev)
 {
        struct s390_domain *s390_domain = to_s390_domain(domain);
        struct zpci_dev *zdev = to_zpci_dev(dev);
@@ -143,8 +138,8 @@ static int s390_iommu_attach_device(struct iommu_domain 
*domain,
        return rc;
 }
 
-static void s390_iommu_detach_device(struct iommu_domain *domain,
-                                    struct device *dev)
+void s390_iommu_detach_device(struct iommu_domain *domain,
+                             struct device *dev)
 {
        struct s390_domain *s390_domain = to_s390_domain(domain);
        struct zpci_dev *zdev = to_zpci_dev(dev);
@@ -200,7 +195,7 @@ static void s390_iommu_release_device(struct device *dev)
        if (zdev && zdev->s390_domain) {
                domain = iommu_get_domain_for_dev(dev);
                if (domain)
-                       s390_iommu_detach_device(domain, dev);
+                       domain->ops->detach_dev(domain, dev);
        }
 }
 
@@ -282,8 +277,8 @@ static int s390_iommu_map(struct iommu_domain *domain, 
unsigned long iova,
        return rc;
 }
 
-static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
-                                          dma_addr_t iova)
+phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
+                                   dma_addr_t iova)
 {
        struct s390_domain *s390_domain = to_s390_domain(domain);
        unsigned long *sto, *pto, *rto, flags;
diff --git a/drivers/iommu/s390-iommu.h b/drivers/iommu/s390-iommu.h
new file mode 100644
index 000000000000..21c8243a36b1
--- /dev/null
+++ b/drivers/iommu/s390-iommu.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IOMMU API for s390 PCI devices
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Matthew Rosato <mjros...@linux.ibm.com>
+ */
+
+#ifndef _S390_IOMMU_H
+#define _S390_IOMMU_H
+
+#include <linux/iommu.h>
+#include <linux/kvm_host.h>
+
+extern const struct iommu_domain_ops s390_kvm_domain_ops;
+
+struct s390_kvm_domain {
+       struct kvm              *kvm;
+       unsigned long           *head[ZPCI_TABLE_PAGES];
+       unsigned long           **seg;
+       unsigned long           ***pt;
+       struct page *(*pin)(struct kvm *kvm, gfn_t gfn);
+       void (*unpin)(kvm_pfn_t pfn);
+       struct mutex            ioat_lock;
+       bool                    map_enabled;
+};
+
+struct s390_domain {
+       struct iommu_domain     domain;
+       struct list_head        devices;
+       unsigned long           *dma_table;
+       spinlock_t              dma_table_lock;
+       spinlock_t              list_lock;
+       struct s390_kvm_domain  kvm_dom;
+};
+
+struct s390_domain_device {
+       struct list_head        list;
+       struct zpci_dev         *zdev;
+};
+
+static inline struct s390_domain *to_s390_domain(struct iommu_domain *dom)
+{
+       return container_of(dom, struct s390_domain, domain);
+}
+
+void s390_domain_free(struct iommu_domain *domain);
+int s390_iommu_attach_device(struct iommu_domain *domain, struct device *dev);
+void s390_iommu_detach_device(struct iommu_domain *domain, struct device *dev);
+phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
+                                   dma_addr_t iova);
+
+#endif /* _S390_IOMMU_H */
diff --git a/drivers/iommu/s390-kvm-iommu.c b/drivers/iommu/s390-kvm-iommu.c
new file mode 100644
index 000000000000..d24e6904d5f8
--- /dev/null
+++ b/drivers/iommu/s390-kvm-iommu.c
@@ -0,0 +1,469 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IOMMU API domain ops for s390 PCI devices using KVM passthrough
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Matthew Rosato <mjros...@linux.ibm.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/iommu-helper.h>
+#include <linux/sizes.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_pci.h>
+#include <asm/pci_dma.h>
+#include "s390-iommu.h"
+
+const struct iommu_domain_ops s390_kvm_domain_ops;
+
+static int dma_shadow_cpu_trans(struct s390_kvm_domain *kvm_dom,
+                               unsigned long *entry, unsigned long *gentry)
+{
+       phys_addr_t gaddr = 0;
+       unsigned long idx;
+       struct page *page;
+       kvm_pfn_t pfn;
+       gpa_t addr;
+       int rc = 0;
+
+       if (pt_entry_isvalid(*gentry)) {
+               /* pin and validate */
+               addr = *gentry & ZPCI_PTE_ADDR_MASK;
+               idx = srcu_read_lock(&kvm_dom->kvm->srcu);
+               page = kvm_dom->pin(kvm_dom->kvm, gpa_to_gfn(addr));
+               srcu_read_unlock(&kvm_dom->kvm->srcu, idx);
+               if (is_error_page(page))
+                       return -EIO;
+               gaddr = page_to_phys(page) + (addr & ~PAGE_MASK);
+       }
+
+       if (pt_entry_isvalid(*entry)) {
+               /* Either we are invalidating, replacing or no-op */
+               if (gaddr != 0) {
+                       if ((*entry & ZPCI_PTE_ADDR_MASK) == gaddr) {
+                               /* Duplicate */
+                               kvm_dom->unpin(*entry >> PAGE_SHIFT);
+                       } else {
+                               /* Replace */
+                               pfn = (*entry >> PAGE_SHIFT);
+                               invalidate_pt_entry(entry);
+                               set_pt_pfaa(entry, gaddr);
+                               validate_pt_entry(entry);
+                               kvm_dom->unpin(pfn);
+                               rc = 1;
+                       }
+               } else {
+                       /* Invalidate */
+                       pfn = (*entry >> PAGE_SHIFT);
+                       invalidate_pt_entry(entry);
+                       kvm_dom->unpin(pfn);
+                       rc = 1;
+               }
+       } else if (gaddr != 0) {
+               /* New Entry */
+               set_pt_pfaa(entry, gaddr);
+               validate_pt_entry(entry);
+       }
+
+       return rc;
+}
+
+static unsigned long *dma_walk_guest_cpu_trans(struct s390_kvm_domain *kvm_dom,
+                                              dma_addr_t dma_addr)
+{
+       unsigned long *rto, *sto, *pto;
+       unsigned int rtx, rts, sx, px, idx;
+       struct page *page;
+       gpa_t addr;
+       int i;
+
+       /* Pin guest segment table if needed */
+       rtx = calc_rtx(dma_addr);
+       rto = kvm_dom->head[(rtx / ZPCI_TABLE_ENTRIES_PER_PAGE)];
+       rts = rtx * ZPCI_TABLE_PAGES;
+       if (!kvm_dom->seg[rts]) {
+               if (!reg_entry_isvalid(rto[rtx % ZPCI_TABLE_ENTRIES_PER_PAGE]))
+                       return NULL;
+               sto = get_rt_sto(rto[rtx % ZPCI_TABLE_ENTRIES_PER_PAGE]);
+               addr = ((u64)sto & ZPCI_RTE_ADDR_MASK);
+               idx = srcu_read_lock(&kvm_dom->kvm->srcu);
+               for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+                       page = kvm_dom->pin(kvm_dom->kvm, gpa_to_gfn(addr));
+                       if (is_error_page(page)) {
+                               srcu_read_unlock(&kvm_dom->kvm->srcu, idx);
+                               return NULL;
+                       }
+                       kvm_dom->seg[rts + i] = (page_to_virt(page) +
+                                                (addr & ~PAGE_MASK));
+                       addr += PAGE_SIZE;
+               }
+               srcu_read_unlock(&kvm_dom->kvm->srcu, idx);
+       }
+
+       /* Allocate pin pointers for another segment table if needed */
+       if (!kvm_dom->pt[rtx]) {
+               kvm_dom->pt[rtx] = kcalloc(ZPCI_TABLE_ENTRIES,
+                                          (sizeof(unsigned long *)),
+                                          GFP_KERNEL);
+               if (!kvm_dom->pt[rtx])
+                       return NULL;
+       }
+       /* Pin guest page table if needed */
+       sx = calc_sx(dma_addr);
+       sto = kvm_dom->seg[(rts + (sx / ZPCI_TABLE_ENTRIES_PER_PAGE))];
+       if (!kvm_dom->pt[rtx][sx]) {
+               if (!reg_entry_isvalid(sto[sx % ZPCI_TABLE_ENTRIES_PER_PAGE]))
+                       return NULL;
+               pto = get_st_pto(sto[sx % ZPCI_TABLE_ENTRIES_PER_PAGE]);
+               if (!pto)
+                       return NULL;
+               addr = ((u64)pto & ZPCI_STE_ADDR_MASK);
+               idx = srcu_read_lock(&kvm_dom->kvm->srcu);
+               page = kvm_dom->pin(kvm_dom->kvm, gpa_to_gfn(addr));
+               srcu_read_unlock(&kvm_dom->kvm->srcu, idx);
+               if (is_error_page(page))
+                       return NULL;
+               kvm_dom->pt[rtx][sx] = page_to_virt(page) + (addr & ~PAGE_MASK);
+       }
+       pto = kvm_dom->pt[rtx][sx];
+
+       /* Return guest PTE */
+       px = calc_px(dma_addr);
+       return &pto[px];
+}
+
+static int dma_table_shadow(struct s390_domain *s390_domain,
+                           dma_addr_t dma_addr, size_t nr_pages,
+                           size_t *mapped_pages)
+{
+       struct s390_kvm_domain *kvm_dom = &s390_domain->kvm_dom;
+       unsigned long *entry, *gentry;
+       int rc = 0, rc2;
+
+       for (*mapped_pages = 0; *mapped_pages < nr_pages; (*mapped_pages)++) {
+               gentry = dma_walk_guest_cpu_trans(kvm_dom, dma_addr);
+               if (!gentry)
+                       continue;
+               entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+
+               if (!entry)
+                       return -ENOMEM;
+
+               rc2 = dma_shadow_cpu_trans(kvm_dom, entry, gentry);
+               if (rc2 < 0)
+                       return -EIO;
+
+               dma_addr += PAGE_SIZE;
+               rc += rc2;
+       }
+
+       return rc;
+}
+
+static int s390_kvm_iommu_update_trans(struct s390_domain *s390_domain,
+                                      dma_addr_t dma_addr, size_t nr_pages,
+                                      size_t *mapped)
+{
+       struct s390_domain_device *domain_device;
+       unsigned long irq_flags;
+       size_t mapped_pages;
+       int rc = 0;
+       u8 status;
+
+       mutex_lock(&s390_domain->kvm_dom.ioat_lock);
+       rc = dma_table_shadow(s390_domain, dma_addr, nr_pages, &mapped_pages);
+
+       /* If error or no new mappings, leave immediately without refresh */
+       if (rc <= 0)
+               goto exit;
+
+       spin_lock_irqsave(&s390_domain->list_lock, irq_flags);
+       list_for_each_entry(domain_device, &s390_domain->devices, list) {
+               rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32,
+                                       dma_addr, nr_pages * PAGE_SIZE,
+                                       &status);
+               if (rc) {
+                       if (status == 0)
+                               rc = -EINVAL;
+                       else
+                               rc = -EIO;
+               }
+       }
+       spin_unlock_irqrestore(&s390_domain->list_lock, irq_flags);
+
+exit:
+       if (mapped)
+               *mapped = mapped_pages << PAGE_SHIFT;
+
+       mutex_unlock(&s390_domain->kvm_dom.ioat_lock);
+       return rc;
+}
+
+static int s390_kvm_iommu_map(struct iommu_domain *domain, unsigned long iova,
+                             phys_addr_t paddr, size_t size, int prot,
+                             gfp_t gfp)
+{
+       struct s390_domain *s390_domain = to_s390_domain(domain);
+       size_t nr_pages;
+
+       int rc = 0;
+
+       if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+               return -EINVAL;
+
+       /* Can only perform mapping when a guest IOTA is registered */
+       if (!s390_domain->kvm_dom.map_enabled)
+               return -EINVAL;
+
+       nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       if (!nr_pages)
+               return -EINVAL;
+
+       rc = s390_kvm_iommu_update_trans(s390_domain, iova, nr_pages, NULL);
+
+       return rc;
+}
+
+static int s390_kvm_iommu_map_pages(struct iommu_domain *domain,
+                                   unsigned long iova, phys_addr_t paddr,
+                                   size_t pgsize, size_t pgcount, int prot,
+                                   gfp_t gfp, size_t *mapped)
+{
+       struct s390_domain *s390_domain = to_s390_domain(domain);
+       size_t nr_pages;
+
+       int rc = 0;
+
+       if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+               return -EINVAL;
+
+       /* Can only perform mapping when a guest IOTA is registered */
+       if (!s390_domain->kvm_dom.map_enabled)
+               return -EINVAL;
+
+       nr_pages = pgcount * (pgsize / PAGE_SIZE);
+       if (!nr_pages)
+               return -EINVAL;
+
+       rc = s390_kvm_iommu_update_trans(s390_domain, iova, nr_pages, mapped);
+
+       return rc;
+}
+
+static void free_pt_entry(struct s390_kvm_domain *kvm_dom, int st, int pt)
+{
+       if (!kvm_dom->pt[st][pt])
+               return;
+
+       kvm_dom->unpin((u64)kvm_dom->pt[st][pt]);
+}
+
+static void free_seg_entry(struct s390_kvm_domain *kvm_dom, int entry)
+{
+       int i, st, count = 0;
+
+       for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+               if (kvm_dom->seg[entry + i]) {
+                       kvm_dom->unpin((u64)kvm_dom->seg[entry + i]);
+                       count++;
+               }
+       }
+
+       if (count == 0)
+               return;
+
+       st = entry / ZPCI_TABLE_PAGES;
+       for (i = 0; i < ZPCI_TABLE_ENTRIES; i++)
+               free_pt_entry(kvm_dom, st, i);
+       kfree(kvm_dom->pt[st]);
+}
+
+static int s390_kvm_clear_ioat_tables(struct s390_domain *s390_domain)
+{
+       struct s390_kvm_domain *kvm_dom = &s390_domain->kvm_dom;
+       unsigned long *entry;
+       dma_addr_t dma_addr;
+       kvm_pfn_t pfn;
+       int i;
+
+       if (!kvm_dom->kvm || !kvm_dom->map_enabled)
+               return -EINVAL;
+
+       mutex_lock(&s390_domain->kvm_dom.ioat_lock);
+
+       /* Invalidate and unpin remaining guest pages */
+       for (dma_addr = s390_domain->domain.geometry.aperture_start;
+            dma_addr < s390_domain->domain.geometry.aperture_end;
+            dma_addr += PAGE_SIZE) {
+               entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+               if (entry && pt_entry_isvalid(*entry)) {
+                       pfn = (*entry >> PAGE_SHIFT);
+                       invalidate_pt_entry(entry);
+                       kvm_dom->unpin(pfn);
+               }
+       }
+
+       /* Unpin all shadow tables */
+       for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+               kvm_dom->unpin((u64)kvm_dom->head[i] >> PAGE_SHIFT);
+               kvm_dom->head[i] = 0;
+       }
+
+       for (i = 0; i < ZPCI_TABLE_ENTRIES_PAGES; i += ZPCI_TABLE_PAGES)
+               free_seg_entry(kvm_dom, i);
+
+       kfree(kvm_dom->seg);
+       kfree(kvm_dom->pt);
+
+       mutex_unlock(&s390_domain->kvm_dom.ioat_lock);
+
+       kvm_dom->map_enabled = false;
+
+       return 0;
+}
+
+static void s390_kvm_domain_free(struct iommu_domain *domain)
+{
+       struct s390_domain *s390_domain = to_s390_domain(domain);
+
+       s390_kvm_clear_ioat_tables(s390_domain);
+
+       if (s390_domain->kvm_dom.kvm) {
+               symbol_put(gfn_to_page);
+               symbol_put(kvm_release_pfn_dirty);
+       }
+
+       s390_domain_free(domain);
+}
+
+int zpci_iommu_attach_kvm(struct zpci_dev *zdev, struct kvm *kvm)
+{
+       struct s390_domain *s390_domain = zdev->s390_domain;
+       struct iommu_domain *domain = &s390_domain->domain;
+       struct s390_domain_device *domain_device;
+       unsigned long flags;
+       int rc = 0;
+
+       if (domain->type != IOMMU_DOMAIN_KVM)
+               return -EINVAL;
+
+       if (s390_domain->kvm_dom.kvm != 0)
+               return -EINVAL;
+
+       spin_lock_irqsave(&s390_domain->list_lock, flags);
+       list_for_each_entry(domain_device, &s390_domain->devices, list) {
+               if (domain_device->zdev->kzdev->kvm != kvm) {
+                       rc = -EINVAL;
+                       break;
+               }
+               domain_device->zdev->kzdev->dom = domain;
+       }
+       spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+       if (rc)
+               return rc;
+
+       s390_domain->kvm_dom.pin = symbol_get(gfn_to_page);
+       if (!s390_domain->kvm_dom.pin)
+               return -EINVAL;
+
+       s390_domain->kvm_dom.unpin = symbol_get(kvm_release_pfn_dirty);
+       if (!s390_domain->kvm_dom.unpin) {
+               symbol_put(gfn_to_page);
+               return -EINVAL;
+       }
+
+       s390_domain->kvm_dom.kvm = kvm;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(zpci_iommu_attach_kvm);
+
+int zpci_iommu_kvm_assign_iota(struct zpci_dev *zdev, u64 iota)
+{
+       struct s390_domain *s390_domain = zdev->s390_domain;
+       struct s390_kvm_domain *kvm_dom = &s390_domain->kvm_dom;
+       gpa_t gpa = (gpa_t)(iota & ZPCI_RTE_ADDR_MASK);
+       struct page *page;
+       struct kvm *kvm;
+       unsigned int idx;
+       void *iaddr;
+       int i, rc;
+
+       /* Ensure KVM associated and IOTA not already registered */
+       if (!kvm_dom->kvm || kvm_dom->map_enabled)
+               return -EINVAL;
+
+       /* Ensure supported type specified */
+       if ((iota & ZPCI_IOTA_RTTO_FLAG) != ZPCI_IOTA_RTTO_FLAG)
+               return -EINVAL;
+
+       kvm = kvm_dom->kvm;
+       mutex_lock(&s390_domain->kvm_dom.ioat_lock);
+       idx = srcu_read_lock(&kvm->srcu);
+       for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+               page = kvm_dom->pin(kvm, gpa_to_gfn(gpa));
+               if (is_error_page(page)) {
+                       srcu_read_unlock(&kvm->srcu, idx);
+                       rc = -EIO;
+                       goto unpin;
+               }
+               iaddr = page_to_virt(page) + (gpa & ~PAGE_MASK);
+               kvm_dom->head[i] = (unsigned long *)iaddr;
+               gpa += PAGE_SIZE;
+       }
+       srcu_read_unlock(&kvm->srcu, idx);
+
+       kvm_dom->seg = kcalloc(ZPCI_TABLE_ENTRIES_PAGES,
+                              sizeof(unsigned long *), GFP_KERNEL);
+       if (!kvm_dom->seg)
+               goto unpin;
+       kvm_dom->pt = kcalloc(ZPCI_TABLE_ENTRIES, sizeof(unsigned long **),
+                             GFP_KERNEL);
+       if (!kvm_dom->pt)
+               goto free_seg;
+
+       mutex_unlock(&s390_domain->kvm_dom.ioat_lock);
+       kvm_dom->map_enabled = true;
+       return 0;
+
+free_seg:
+       kfree(kvm_dom->seg);
+       rc = -ENOMEM;
+unpin:
+       for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+               kvm_dom->unpin((u64)kvm_dom->head[i] >> PAGE_SHIFT);
+               kvm_dom->head[i] = 0;
+       }
+       mutex_unlock(&s390_domain->kvm_dom.ioat_lock);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_iommu_kvm_assign_iota);
+
+int zpci_iommu_kvm_remove_iota(struct zpci_dev *zdev)
+{
+       struct s390_domain *s390_domain = zdev->s390_domain;
+
+       return s390_kvm_clear_ioat_tables(s390_domain);
+}
+EXPORT_SYMBOL_GPL(zpci_iommu_kvm_remove_iota);
+
+const struct iommu_domain_ops s390_kvm_domain_ops = {
+       .attach_dev     = s390_iommu_attach_device,
+       .detach_dev     = s390_iommu_detach_device,
+       /*
+        * All iommu mapping and unmapping operations are handled via the map
+        * ops.  A map over a given range will synchronize the host and guest
+        * DMA tables, performing the necessary mappings / unmappings to
+        * synchronize the table states.
+        * Partial mapping failures do not require a rewind, the guest will
+        * receive an indication that will trigger a global refresh of the
+        * tables.
+        */
+       .map            = s390_kvm_iommu_map,
+       .map_pages      = s390_kvm_iommu_map_pages,
+       .unmap          = NULL,
+       .unmap_pages    = NULL,
+       .iova_to_phys   = s390_iommu_iova_to_phys,
+       .free           = s390_kvm_domain_free,
+};
-- 
2.27.0

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to