Re: [RFC PATCH 7/7 v2] ppc: add dynamic dma window support

2010-12-09 Thread Nishanth Aravamudan
On 09.12.2010 [15:17:06 +1100], Benjamin Herrenschmidt wrote:
 On Tue, 2010-10-26 at 20:35 -0700, Nishanth Aravamudan wrote:
 
 No much comments... I'm amazed how complex he firmware folks managed to
 make this ... 
 
   static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned 
  long action, void *node)
   {
  int err = NOTIFY_OK;
  struct device_node *np = node;
  struct pci_dn *pci = PCI_DN(np);
  +   struct direct_window *window;
   
  switch (action) {
  case PSERIES_RECONFIG_REMOVE:
  if (pci  pci-iommu_table)
  iommu_free_table(pci-iommu_table, np-full_name);
  +
  +   spin_lock(direct_window_list_lock);
  +   list_for_each_entry(window, direct_window_list, list) {
  +   if (window-device == np) {
  +   list_del(window-list);
  +   break;
  +   }
  +   }
  +   spin_unlock(direct_window_list_lock);
 
 Should you also kfree the window ?

Yeah, looks like I should. I have a few other questions due to testing,
but I'll reply to my original e-mail with those.

Thanks for the review!
Nish

-- 
Nishanth Aravamudan n...@us.ibm.com
IBM Linux Technology Center
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC PATCH 7/7 v2] ppc: add dynamic dma window support

2010-12-09 Thread Nishanth Aravamudan
On 26.10.2010 [20:35:17 -0700], Nishanth Aravamudan wrote:
 If firmware allows us to map all of a partition's memory for DMA on a
 particular bridge, create a 1:1 mapping of that memory. Add hooks for
 dealing with hotplug events. Dyanmic DMA windows can use larger than the
 default page size, and we use the largest one possible.
 
 Not-yet-signed-off-by: Nishanth Aravamudan n...@us.ibm.com
 
 ---
 
 I've tested this briefly on a machine with suitable firmware/hardware.
 Things seem to work well, but I want to do more exhaustive I/O testing
 before asking for upstream merging. I would really appreciate any
 feedback on the updated approach.
 
 Specific questions:
 
 Ben, did I hook into the dma_set_mask() platform callback as you
 expected? Anything I can do better or which perhaps might lead to
 gotchas later?
 
 I've added a disable_ddw option, but perhaps it would be better to
 just disable the feature if iommu=force?

So for the final version, I probably should document this option in
kernel-parameters.txt w/ the patch, right?

snip

 +static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
 + unsigned long num_pfn, const void *arg)
 +{
 + const struct dynamic_dma_window_prop *maprange = arg;
 + int rc;
 + u64 tce_size, num_tce, dma_offset, next;
 + u32 tce_shift;
 + long limit;
 +
 + tce_shift = be32_to_cpu(maprange-tce_shift);
 + tce_size = 1ULL  tce_shift;
 + next = start_pfn  PAGE_SHIFT;
 + num_tce = num_pfn  PAGE_SHIFT;
 +
 + /* round back to the beginning of the tce page size */
 + num_tce += next  (tce_size - 1);
 + next = ~(tce_size - 1);
 +
 + /* covert to number of tces */
 + num_tce |= tce_size - 1;
 + num_tce = tce_shift;
 +
 + do {
 + /*
 +  * Set up the page with TCE data, looping through and setting
 +  * the values.
 +  */
 + limit = min_t(long, num_tce, 512);
 + dma_offset = next + be64_to_cpu(maprange-dma_base);
 +
 + rc = plpar_tce_stuff(be64_to_cpu(maprange-liobn),
 + (u64)dma_offset,
 +  0, limit);
 + num_tce -= limit;
 + } while (num_tce  0  !rc);
 +
 + return rc;
 +}

There is a bit of a typo here, the liobn is a 32-bit value. I've fixed
this is up locally and will update it when I send out the final version
of this patch.

I'm finding that on dlpar remove of adapters running in slots supporting
64-bit DMA, that the plpar_tce_stuff is failing. Can you think of a
reason why? It looks basically the same as the put_indirect below...

 +static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
 + unsigned long num_pfn, const void *arg)
 +{
 + const struct dynamic_dma_window_prop *maprange = arg;
 + u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn;
 + u32 tce_shift;
 + u64 rc = 0;
 + long l, limit;
 +
 + local_irq_disable();/* to protect tcep and the page behind it */
 + tcep = __get_cpu_var(tce_page);
 +
 + if (!tcep) {
 + tcep = (u64 *)__get_free_page(GFP_ATOMIC);
 + if (!tcep) {
 + local_irq_enable();
 + return -ENOMEM;
 + }
 + __get_cpu_var(tce_page) = tcep;
 + }
 +
 + proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
 +
 + liobn = (u64)be32_to_cpu(maprange-liobn);
 + tce_shift = be32_to_cpu(maprange-tce_shift);
 + tce_size = 1ULL  tce_shift;
 + next = start_pfn  PAGE_SHIFT;
 + num_tce = num_pfn  PAGE_SHIFT;
 +
 + /* round back to the beginning of the tce page size */
 + num_tce += next  (tce_size - 1);
 + next = ~(tce_size - 1);
 +
 + /* covert to number of tces */
 + num_tce |= tce_size - 1;
 + num_tce = tce_shift;
 +
 + /* We can map max one pageful of TCEs at a time */
 + do {
 + /*
 +  * Set up the page with TCE data, looping through and setting
 +  * the values.
 +  */
 + limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE);
 + dma_offset = next + be64_to_cpu(maprange-dma_base);
 +
 + for (l = 0; l  limit; l++) {
 + tcep[l] = proto_tce | next;
 + next += tce_size;
 + }
 +
 + rc = plpar_tce_put_indirect(liobn,
 + (u64)dma_offset,
 + (u64)virt_to_abs(tcep),
 + limit);
 +
 + num_tce -= limit;
 + } while (num_tce  0  !rc);
 +printk(plpar_tce_put_indirect for offset 0x%llx and tcep[0] 
 0x%llx returned %llu\n,
 +(u64)dma_offset, tcep[0], rc);
 +

I'll cleanup the debugging on the final version too.

snip

 +static void remove_ddw(struct 

Re: [RFC PATCH 7/7 v2] ppc: add dynamic dma window support

2010-12-08 Thread Benjamin Herrenschmidt
On Tue, 2010-10-26 at 20:35 -0700, Nishanth Aravamudan wrote:

No much comments... I'm amazed how complex he firmware folks managed to
make this ... 

  static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long 
 action, void *node)
  {
   int err = NOTIFY_OK;
   struct device_node *np = node;
   struct pci_dn *pci = PCI_DN(np);
 + struct direct_window *window;
  
   switch (action) {
   case PSERIES_RECONFIG_REMOVE:
   if (pci  pci-iommu_table)
   iommu_free_table(pci-iommu_table, np-full_name);
 +
 + spin_lock(direct_window_list_lock);
 + list_for_each_entry(window, direct_window_list, list) {
 + if (window-device == np) {
 + list_del(window-list);
 + break;
 + }
 + }
 + spin_unlock(direct_window_list_lock);

Should you also kfree the window ?


Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[RFC PATCH 7/7 v2] ppc: add dynamic dma window support

2010-10-26 Thread Nishanth Aravamudan
If firmware allows us to map all of a partition's memory for DMA on a
particular bridge, create a 1:1 mapping of that memory. Add hooks for
dealing with hotplug events. Dyanmic DMA windows can use larger than the
default page size, and we use the largest one possible.

Not-yet-signed-off-by: Nishanth Aravamudan n...@us.ibm.com

---

I've tested this briefly on a machine with suitable firmware/hardware.
Things seem to work well, but I want to do more exhaustive I/O testing
before asking for upstream merging. I would really appreciate any
feedback on the updated approach.

Specific questions:

Ben, did I hook into the dma_set_mask() platform callback as you
expected? Anything I can do better or which perhaps might lead to
gotchas later?

I've added a disable_ddw option, but perhaps it would be better to
just disable the feature if iommu=force?

---
 arch/powerpc/platforms/pseries/iommu.c |  577 +++-
 1 files changed, 575 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/iommu.c 
b/arch/powerpc/platforms/pseries/iommu.c
index 45c6865..8090b6b 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -33,6 +33,7 @@
 #include linux/pci.h
 #include linux/dma-mapping.h
 #include linux/crash_dump.h
+#include linux/memory.h
 #include asm/io.h
 #include asm/prom.h
 #include asm/rtas.h
@@ -45,6 +46,7 @@
 #include asm/tce.h
 #include asm/ppc-pci.h
 #include asm/udbg.h
+#include asm/mmzone.h
 
 #include plpar_wrappers.h
 
@@ -270,6 +272,139 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table 
*tbl, long tcenum)
return tce_ret;
 }
 
+/* this is compatable with cells for the device tree property */
+struct dynamic_dma_window_prop {
+   __be32  liobn;  /* tce table number */
+   __be64  dma_base;   /* address hi,lo */
+   __be32  tce_shift;  /* ilog2(tce_page_size) */
+   __be32  window_shift;   /* ilog2(tce_window_size) */
+};
+
+struct direct_window {
+   struct device_node *device;
+   const struct dynamic_dma_window_prop *prop;
+   struct list_head list;
+};
+static LIST_HEAD(direct_window_list);
+/* prevents races between memory on/offline and window creation */
+static DEFINE_SPINLOCK(direct_window_list_lock);
+/* protects initializing window twice for same device */
+static DEFINE_MUTEX(direct_window_init_mutex);
+#define DIRECT64_PROPNAME linux,direct64-ddr-window-info
+
+static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
+   unsigned long num_pfn, const void *arg)
+{
+   const struct dynamic_dma_window_prop *maprange = arg;
+   int rc;
+   u64 tce_size, num_tce, dma_offset, next;
+   u32 tce_shift;
+   long limit;
+
+   tce_shift = be32_to_cpu(maprange-tce_shift);
+   tce_size = 1ULL  tce_shift;
+   next = start_pfn  PAGE_SHIFT;
+   num_tce = num_pfn  PAGE_SHIFT;
+
+   /* round back to the beginning of the tce page size */
+   num_tce += next  (tce_size - 1);
+   next = ~(tce_size - 1);
+
+   /* covert to number of tces */
+   num_tce |= tce_size - 1;
+   num_tce = tce_shift;
+
+   do {
+   /*
+* Set up the page with TCE data, looping through and setting
+* the values.
+*/
+   limit = min_t(long, num_tce, 512);
+   dma_offset = next + be64_to_cpu(maprange-dma_base);
+
+   rc = plpar_tce_stuff(be64_to_cpu(maprange-liobn),
+   (u64)dma_offset,
+0, limit);
+   num_tce -= limit;
+   } while (num_tce  0  !rc);
+
+   return rc;
+}
+
+static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+   unsigned long num_pfn, const void *arg)
+{
+   const struct dynamic_dma_window_prop *maprange = arg;
+   u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn;
+   u32 tce_shift;
+   u64 rc = 0;
+   long l, limit;
+
+   local_irq_disable();/* to protect tcep and the page behind it */
+   tcep = __get_cpu_var(tce_page);
+
+   if (!tcep) {
+   tcep = (u64 *)__get_free_page(GFP_ATOMIC);
+   if (!tcep) {
+   local_irq_enable();
+   return -ENOMEM;
+   }
+   __get_cpu_var(tce_page) = tcep;
+   }
+
+   proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
+
+   liobn = (u64)be32_to_cpu(maprange-liobn);
+   tce_shift = be32_to_cpu(maprange-tce_shift);
+   tce_size = 1ULL  tce_shift;
+   next = start_pfn  PAGE_SHIFT;
+   num_tce = num_pfn  PAGE_SHIFT;
+
+   /* round back to the beginning of the tce page size */
+   num_tce += next  (tce_size - 1);
+   next = ~(tce_size - 1);
+
+   /* covert to number of tces */
+   num_tce |= tce_size - 1;
+   num_tce =