On Fri, 2 Nov 2007 19:12:27 +0200
Muli Ben-Yehuda [EMAIL PROTECTED] wrote:
On Sat, Nov 03, 2007 at 02:05:39AM +0900, FUJITA Tomonori wrote:
This patchset convert the PPC64 IOMMU to use the iova code for free
area management.
The IOMMUs ignores low level drivers' restrictions, the maximum
segment size and segment boundary.
I fixed the former:
http://thread.gmane.org/gmane.linux.scsi/35602
The latter makes the free area management complicated. I'd like to
convert IOMMUs to use the iova code (that intel-iommu introduced)
for free area management and enable iova to handle segment boundary
restrictions, rather than fixing all the IOMMUs' free area
management,
In general it sounds like a great idea, but have you looked at what
impact this has on the performance of the IO path?
I converted swiotlb to use iova and compared it with the original
algorithm (better than the simple bit map one that most of the IOMMUs
use, I think).
I use 'swiotlb=force' boot option and run netperf with -m 128 -D
(lead to tons of dma_map_single).
The original produced 281.8 Mb/s and the iova produced 77.2 Mb/s.
Seems that it would be better to generalization the swiotlb algorithm
(at least for small I/O area)? Or my patch might have bugs.
Here's the patch to convert swiotlb to use iova against my iova
patchset:
http://marc.info/?l=linux-kernelm=119402340801254w=2
diff --git a/arch/x86/Kconfig.x86_64 b/arch/x86/Kconfig.x86_64
index c10d3f0..8735822 100644
--- a/arch/x86/Kconfig.x86_64
+++ b/arch/x86/Kconfig.x86_64
@@ -524,6 +524,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
# need this always selected by IOMMU for the VIA workaround
config SWIOTLB
bool
+ select IOVA
help
Support for software bounce buffers used on x86-64 systems
which don't have a hardware IOMMU (e.g. the current generation
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index aa805b1..8507402 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -325,6 +325,9 @@ static int __init pci_iommu_init(void)
gart_iommu_init();
#endif
+#ifdef CONFIG_SWIOTLB
+ swiotlb_alloc();
+#endif
no_iommu_init();
return 0;
}
diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h
index f9c5895..f00d20c 100644
--- a/include/asm-x86/swiotlb.h
+++ b/include/asm-x86/swiotlb.h
@@ -40,6 +40,7 @@ extern void swiotlb_free_coherent (struct device *hwdev,
size_t size,
void *vaddr, dma_addr_t dma_handle);
extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
extern void swiotlb_init(void);
+extern void swiotlb_alloc(void);
extern int swiotlb_force;
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 1a8050a..54ecb87 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -24,6 +24,7 @@
#include linux/string.h
#include linux/types.h
#include linux/ctype.h
+#include linux/iova.h
#include asm/io.h
#include asm/dma.h
@@ -103,10 +104,7 @@ static unsigned int io_tlb_index;
*/
static unsigned char **io_tlb_orig_addr;
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
+static struct iova_domain swiotlb_iovad;
static int __init
setup_io_tlb_npages(char *str)
@@ -272,6 +270,19 @@ cleanup1:
return -ENOMEM;
}
+static struct kmem_cache *iova_cachep;
+
+void __init
+swiotlb_alloc(void)
+{
+ if (!swiotlb)
+ return;
+
+ iova_cachep = KMEM_CACHE(iova, 0);
+ init_iova_domain(swiotlb_iovad, DMA_32BIT_MASK IO_TLB_SHIFT,
+iova_cachep);
+}
+
static int
address_needs_mapping(struct device *hwdev, dma_addr_t addr)
{
@@ -288,70 +299,20 @@ address_needs_mapping(struct device *hwdev, dma_addr_t
addr)
static void *
map_single(struct device *hwdev, char *buffer, size_t size, int dir)
{
- unsigned long flags;
char *dma_addr;
- unsigned int nslots, stride, index, wrap;
+ unsigned int nslots, index;
int i;
+ struct iova *iova;
- /*
-* For mappings greater than a page, we limit the stride (and
-* hence alignment) to a page size.
-*/
nslots = ALIGN(size, 1 IO_TLB_SHIFT) IO_TLB_SHIFT;
- if (size PAGE_SIZE)
- stride = (1 (PAGE_SHIFT - IO_TLB_SHIFT));
- else
- stride = 1;
-
BUG_ON(!nslots);
- /*
-* Find suitable number of IO TLB entries size that will fit this
-* request and allocate a buffer from that IO TLB pool.
-*/
- spin_lock_irqsave(io_tlb_lock, flags);
- {
- wrap = index = ALIGN(io_tlb_index, stride);
-
- if (index = io_tlb_nslabs)
- wrap = index = 0;
-
- do {
- /*
-* If we find a slot that indicates we have 'nslots'
-* number of contiguous