Re: [RFCv2][PATCH 2/5] arm: Implement ARCH_HAS_FORCE_CACHE

2016-08-16 Thread Laura Abbott

On 08/10/2016 04:22 PM, Russell King - ARM Linux wrote:

On Mon, Aug 08, 2016 at 10:49:34AM -0700, Laura Abbott wrote:

+/*
+ * Make an area consistent for devices.
+ * Note: Drivers should NOT use this function directly, as it will break
+ * platforms with CONFIG_DMABOUNCE.
+ * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
+ */
+void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
+   size_t size, enum dma_data_direction dir)
+{
+   phys_addr_t paddr;
+
+   dma_cache_maint_page(page, off, size, dir, dmac_map_area);
+
+   paddr = page_to_phys(page) + off;
+   if (dir == DMA_FROM_DEVICE) {
+   outer_inv_range(paddr, paddr + size);
+   } else {
+   outer_clean_range(paddr, paddr + size);
+   }
+   /* FIXME: non-speculating: flush on bidirectional mappings? */
+}
+
+void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
+   size_t size, enum dma_data_direction dir)
+{
+   phys_addr_t paddr = page_to_phys(page) + off;
+
+   /* FIXME: non-speculating: not required */
+   /* in any case, don't bother invalidating if DMA to device */
+   if (dir != DMA_TO_DEVICE) {
+   outer_inv_range(paddr, paddr + size);
+
+   dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
+   }
+
+   /*
+* Mark the D-cache clean for these pages to avoid extra flushing.
+*/
+   if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
+   unsigned long pfn;
+   size_t left = size;
+
+   pfn = page_to_pfn(page) + off / PAGE_SIZE;
+   off %= PAGE_SIZE;
+   if (off) {
+   pfn++;
+   left -= PAGE_SIZE - off;
+   }
+   while (left >= PAGE_SIZE) {
+   page = pfn_to_page(pfn++);
+   set_bit(PG_dcache_clean, >flags);
+   left -= PAGE_SIZE;
+   }
+   }
+}


I _really_ don't want these exposed in any shape or form to driver
code.  I've seen too many hacks out there where people have gone
under the cover of the APIs they should be using, and headed straight
for the low-level functionality - adding function prototypes to get
at stuff they have no business doing.  Moving this here is just
asking for it to be abused.


+
+void kernel_force_cache_clean(struct page *page, size_t size)
+{
+   __dma_page_cpu_to_dev(page, 0, size, DMA_BIDIRECTIONAL);
+}
+
+void kernel_force_cache_invalidate(struct page *page, size_t size)
+{
+   __dma_page_dev_to_cpu(page, 0, size, DMA_BIDIRECTIONAL);
+}


Nothing in our implementation of these DMA operations guarantees
that those mean "clean" and "invalidate".  The DMA operations are
there so that CPUs can implement whatever they need at the map and
unmap times - and I've been very careful not to specify which cache
operations are involved.

For example, on older CPUs, __dma_page_dev_to_cpu() is almost
always a no-op.

If you want something that does something specific, then we need
something designed to do something specific.  Please don't re-use
what you think will fit.



I see what you are saying. What I really wanted was to re-use some of
the code that dma_cache_maint_page was doing for highmem handling
but it looks like I picked the wrong layer to make common. I'll give
this some thought.

Thanks,
Laura
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [RFCv2][PATCH 2/5] arm: Implement ARCH_HAS_FORCE_CACHE

2016-08-10 Thread Russell King - ARM Linux
On Mon, Aug 08, 2016 at 10:49:34AM -0700, Laura Abbott wrote:
> +/*
> + * Make an area consistent for devices.
> + * Note: Drivers should NOT use this function directly, as it will break
> + * platforms with CONFIG_DMABOUNCE.
> + * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
> + */
> +void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
> + size_t size, enum dma_data_direction dir)
> +{
> + phys_addr_t paddr;
> +
> + dma_cache_maint_page(page, off, size, dir, dmac_map_area);
> +
> + paddr = page_to_phys(page) + off;
> + if (dir == DMA_FROM_DEVICE) {
> + outer_inv_range(paddr, paddr + size);
> + } else {
> + outer_clean_range(paddr, paddr + size);
> + }
> + /* FIXME: non-speculating: flush on bidirectional mappings? */
> +}
> +
> +void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
> + size_t size, enum dma_data_direction dir)
> +{
> + phys_addr_t paddr = page_to_phys(page) + off;
> +
> + /* FIXME: non-speculating: not required */
> + /* in any case, don't bother invalidating if DMA to device */
> + if (dir != DMA_TO_DEVICE) {
> + outer_inv_range(paddr, paddr + size);
> +
> + dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
> + }
> +
> + /*
> +  * Mark the D-cache clean for these pages to avoid extra flushing.
> +  */
> + if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
> + unsigned long pfn;
> + size_t left = size;
> +
> + pfn = page_to_pfn(page) + off / PAGE_SIZE;
> + off %= PAGE_SIZE;
> + if (off) {
> + pfn++;
> + left -= PAGE_SIZE - off;
> + }
> + while (left >= PAGE_SIZE) {
> + page = pfn_to_page(pfn++);
> + set_bit(PG_dcache_clean, >flags);
> + left -= PAGE_SIZE;
> + }
> + }
> +}

I _really_ don't want these exposed in any shape or form to driver
code.  I've seen too many hacks out there where people have gone
under the cover of the APIs they should be using, and headed straight
for the low-level functionality - adding function prototypes to get
at stuff they have no business doing.  Moving this here is just
asking for it to be abused.

> +
> +void kernel_force_cache_clean(struct page *page, size_t size)
> +{
> + __dma_page_cpu_to_dev(page, 0, size, DMA_BIDIRECTIONAL);
> +}
> +
> +void kernel_force_cache_invalidate(struct page *page, size_t size)
> +{
> + __dma_page_dev_to_cpu(page, 0, size, DMA_BIDIRECTIONAL);
> +}

Nothing in our implementation of these DMA operations guarantees
that those mean "clean" and "invalidate".  The DMA operations are
there so that CPUs can implement whatever they need at the map and
unmap times - and I've been very careful not to specify which cache
operations are involved.

For example, on older CPUs, __dma_page_dev_to_cpu() is almost
always a no-op.

If you want something that does something specific, then we need
something designed to do something specific.  Please don't re-use
what you think will fit.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [RFCv2][PATCH 2/5] arm: Implement ARCH_HAS_FORCE_CACHE

2016-08-09 Thread Florian Fainelli
On 08/09/2016 05:13 PM, Laura Abbott wrote:
> On 08/09/2016 02:56 PM, Florian Fainelli wrote:
>> On 08/08/2016 10:49 AM, Laura Abbott wrote:
>>> arm may need the kernel_force_cache APIs to guarantee data consistency.
>>> Implement versions of these APIs based on the DMA APIs.
>>>
>>> Signed-off-by: Laura Abbott 
>>> ---
>>>  arch/arm/include/asm/cacheflush.h |   4 ++
>>>  arch/arm/mm/dma-mapping.c | 119
>>> --
>>>  arch/arm/mm/flush.c   | 115
>>> 
>>
>> Why is the code moved between dma-mapping.c and flush.c? It was not
>> obvious while looking at these patches why this is needed.
>>
> 
> I wanted to use the cache flushing routines from dma-mapping.c and
> it seemed better to pull them out vs. trying to put more generic
> cache flushing code in dma-mapping.c. flush.c seemed like an
> appropriate place although I forgot about the dependency on CONFIG_MMU.
> It can certainly remain in dma-mapping.c if deemed appropriate.

My concern is that this is an area of the kernel where you might be
looking for stable backports, so avoiding churn in there is desireable
and if the new cache APIs become accepted and standard, since they are
building directly on top of the DMA-API, keeping them in dma-mapping.c
seems consistent.

My 2 cents.
-- 
Florian
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [RFCv2][PATCH 2/5] arm: Implement ARCH_HAS_FORCE_CACHE

2016-08-09 Thread Laura Abbott

On 08/09/2016 02:56 PM, Florian Fainelli wrote:

On 08/08/2016 10:49 AM, Laura Abbott wrote:

arm may need the kernel_force_cache APIs to guarantee data consistency.
Implement versions of these APIs based on the DMA APIs.

Signed-off-by: Laura Abbott 
---
 arch/arm/include/asm/cacheflush.h |   4 ++
 arch/arm/mm/dma-mapping.c | 119 --
 arch/arm/mm/flush.c   | 115 


Why is the code moved between dma-mapping.c and flush.c? It was not
obvious while looking at these patches why this is needed.



I wanted to use the cache flushing routines from dma-mapping.c and
it seemed better to pull them out vs. trying to put more generic
cache flushing code in dma-mapping.c. flush.c seemed like an
appropriate place although I forgot about the dependency on CONFIG_MMU.
It can certainly remain in dma-mapping.c if deemed appropriate.

Thanks,
Laura
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [RFCv2][PATCH 2/5] arm: Implement ARCH_HAS_FORCE_CACHE

2016-08-09 Thread Florian Fainelli
On 08/08/2016 10:49 AM, Laura Abbott wrote:
> arm may need the kernel_force_cache APIs to guarantee data consistency.
> Implement versions of these APIs based on the DMA APIs.
> 
> Signed-off-by: Laura Abbott 
> ---
>  arch/arm/include/asm/cacheflush.h |   4 ++
>  arch/arm/mm/dma-mapping.c | 119 
> --
>  arch/arm/mm/flush.c   | 115 

Why is the code moved between dma-mapping.c and flush.c? It was not
obvious while looking at these patches why this is needed.
-- 
Florian
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[RFCv2][PATCH 2/5] arm: Implement ARCH_HAS_FORCE_CACHE

2016-08-08 Thread Laura Abbott
arm may need the kernel_force_cache APIs to guarantee data consistency.
Implement versions of these APIs based on the DMA APIs.

Signed-off-by: Laura Abbott 
---
 arch/arm/include/asm/cacheflush.h |   4 ++
 arch/arm/mm/dma-mapping.c | 119 --
 arch/arm/mm/flush.c   | 115 
 arch/arm/mm/mm.h  |   8 +++
 4 files changed, 127 insertions(+), 119 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h 
b/arch/arm/include/asm/cacheflush.h
index 9156fc3..78eb011 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -518,4 +518,8 @@ static inline void secure_flush_area(const void *addr, 
size_t size)
outer_flush_range(phys, phys + size);
 }
 
+#define ARCH_HAS_FORCE_CACHE 1
+void kernel_force_cache_clean(struct page *page, size_t size);
+void kernel_force_cache_invalidate(struct page *page, size_t size);
+
 #endif
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c6834c0..8c9296d 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -95,23 +95,6 @@ static struct arm_dma_buffer *arm_dma_buffer_find(void *virt)
return found;
 }
 
-/*
- * The DMA API is built upon the notion of "buffer ownership".  A buffer
- * is either exclusively owned by the CPU (and therefore may be accessed
- * by it) or exclusively owned by the DMA device.  These helper functions
- * represent the transitions between these two ownership states.
- *
- * Note, however, that on later ARMs, this notion does not work due to
- * speculative prefetches.  We model our approach on the assumption that
- * the CPU does do speculative prefetches, which means we clean caches
- * before transfers and delay cache invalidation until transfer completion.
- *
- */
-static void __dma_page_cpu_to_dev(struct page *, unsigned long,
-   size_t, enum dma_data_direction);
-static void __dma_page_dev_to_cpu(struct page *, unsigned long,
-   size_t, enum dma_data_direction);
-
 /**
  * arm_dma_map_page - map a portion of a page for streaming DMA
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -945,108 +928,6 @@ int arm_dma_get_sgtable(struct device *dev, struct 
sg_table *sgt,
return 0;
 }
 
-static void dma_cache_maint_page(struct page *page, unsigned long offset,
-   size_t size, enum dma_data_direction dir,
-   void (*op)(const void *, size_t, int))
-{
-   unsigned long pfn;
-   size_t left = size;
-
-   pfn = page_to_pfn(page) + offset / PAGE_SIZE;
-   offset %= PAGE_SIZE;
-
-   /*
-* A single sg entry may refer to multiple physically contiguous
-* pages.  But we still need to process highmem pages individually.
-* If highmem is not configured then the bulk of this loop gets
-* optimized out.
-*/
-   do {
-   size_t len = left;
-   void *vaddr;
-
-   page = pfn_to_page(pfn);
-
-   if (PageHighMem(page)) {
-   if (len + offset > PAGE_SIZE)
-   len = PAGE_SIZE - offset;
-
-   if (cache_is_vipt_nonaliasing()) {
-   vaddr = kmap_atomic(page);
-   op(vaddr + offset, len, dir);
-   kunmap_atomic(vaddr);
-   } else {
-   vaddr = kmap_high_get(page);
-   if (vaddr) {
-   op(vaddr + offset, len, dir);
-   kunmap_high(page);
-   }
-   }
-   } else {
-   vaddr = page_address(page) + offset;
-   op(vaddr, len, dir);
-   }
-   offset = 0;
-   pfn++;
-   left -= len;
-   } while (left);
-}
-
-/*
- * Make an area consistent for devices.
- * Note: Drivers should NOT use this function directly, as it will break
- * platforms with CONFIG_DMABOUNCE.
- * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
- */
-static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
-   size_t size, enum dma_data_direction dir)
-{
-   phys_addr_t paddr;
-
-   dma_cache_maint_page(page, off, size, dir, dmac_map_area);
-
-   paddr = page_to_phys(page) + off;
-   if (dir == DMA_FROM_DEVICE) {
-   outer_inv_range(paddr, paddr + size);
-   } else {
-   outer_clean_range(paddr, paddr + size);
-   }
-   /* FIXME: non-speculating: flush on bidirectional mappings? */
-}
-
-static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
-   size_t size, enum dma_data_direction dir)
-{
-   phys_addr_t paddr = page_to_phys(page) + off;
-
-   /* FIXME: non-speculating: