Re: [RFCv2][PATCH 2/5] arm: Implement ARCH_HAS_FORCE_CACHE
On 08/10/2016 04:22 PM, Russell King - ARM Linux wrote: On Mon, Aug 08, 2016 at 10:49:34AM -0700, Laura Abbott wrote: +/* + * Make an area consistent for devices. + * Note: Drivers should NOT use this function directly, as it will break + * platforms with CONFIG_DMABOUNCE. + * Use the driver DMA support - see dma-mapping.h (dma_sync_*) + */ +void __dma_page_cpu_to_dev(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr; + + dma_cache_maint_page(page, off, size, dir, dmac_map_area); + + paddr = page_to_phys(page) + off; + if (dir == DMA_FROM_DEVICE) { + outer_inv_range(paddr, paddr + size); + } else { + outer_clean_range(paddr, paddr + size); + } + /* FIXME: non-speculating: flush on bidirectional mappings? */ +} + +void __dma_page_dev_to_cpu(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = page_to_phys(page) + off; + + /* FIXME: non-speculating: not required */ + /* in any case, don't bother invalidating if DMA to device */ + if (dir != DMA_TO_DEVICE) { + outer_inv_range(paddr, paddr + size); + + dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); + } + + /* +* Mark the D-cache clean for these pages to avoid extra flushing. +*/ + if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + off / PAGE_SIZE; + off %= PAGE_SIZE; + if (off) { + pfn++; + left -= PAGE_SIZE - off; + } + while (left >= PAGE_SIZE) { + page = pfn_to_page(pfn++); + set_bit(PG_dcache_clean, >flags); + left -= PAGE_SIZE; + } + } +} I _really_ don't want these exposed in any shape or form to driver code. I've seen too many hacks out there where people have gone under the cover of the APIs they should be using, and headed straight for the low-level functionality - adding function prototypes to get at stuff they have no business doing. Moving this here is just asking for it to be abused. + +void kernel_force_cache_clean(struct page *page, size_t size) +{ + __dma_page_cpu_to_dev(page, 0, size, DMA_BIDIRECTIONAL); +} + +void kernel_force_cache_invalidate(struct page *page, size_t size) +{ + __dma_page_dev_to_cpu(page, 0, size, DMA_BIDIRECTIONAL); +} Nothing in our implementation of these DMA operations guarantees that those mean "clean" and "invalidate". The DMA operations are there so that CPUs can implement whatever they need at the map and unmap times - and I've been very careful not to specify which cache operations are involved. For example, on older CPUs, __dma_page_dev_to_cpu() is almost always a no-op. If you want something that does something specific, then we need something designed to do something specific. Please don't re-use what you think will fit. I see what you are saying. What I really wanted was to re-use some of the code that dma_cache_maint_page was doing for highmem handling but it looks like I picked the wrong layer to make common. I'll give this some thought. Thanks, Laura ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [RFCv2][PATCH 2/5] arm: Implement ARCH_HAS_FORCE_CACHE
On Mon, Aug 08, 2016 at 10:49:34AM -0700, Laura Abbott wrote: > +/* > + * Make an area consistent for devices. > + * Note: Drivers should NOT use this function directly, as it will break > + * platforms with CONFIG_DMABOUNCE. > + * Use the driver DMA support - see dma-mapping.h (dma_sync_*) > + */ > +void __dma_page_cpu_to_dev(struct page *page, unsigned long off, > + size_t size, enum dma_data_direction dir) > +{ > + phys_addr_t paddr; > + > + dma_cache_maint_page(page, off, size, dir, dmac_map_area); > + > + paddr = page_to_phys(page) + off; > + if (dir == DMA_FROM_DEVICE) { > + outer_inv_range(paddr, paddr + size); > + } else { > + outer_clean_range(paddr, paddr + size); > + } > + /* FIXME: non-speculating: flush on bidirectional mappings? */ > +} > + > +void __dma_page_dev_to_cpu(struct page *page, unsigned long off, > + size_t size, enum dma_data_direction dir) > +{ > + phys_addr_t paddr = page_to_phys(page) + off; > + > + /* FIXME: non-speculating: not required */ > + /* in any case, don't bother invalidating if DMA to device */ > + if (dir != DMA_TO_DEVICE) { > + outer_inv_range(paddr, paddr + size); > + > + dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); > + } > + > + /* > + * Mark the D-cache clean for these pages to avoid extra flushing. > + */ > + if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { > + unsigned long pfn; > + size_t left = size; > + > + pfn = page_to_pfn(page) + off / PAGE_SIZE; > + off %= PAGE_SIZE; > + if (off) { > + pfn++; > + left -= PAGE_SIZE - off; > + } > + while (left >= PAGE_SIZE) { > + page = pfn_to_page(pfn++); > + set_bit(PG_dcache_clean, >flags); > + left -= PAGE_SIZE; > + } > + } > +} I _really_ don't want these exposed in any shape or form to driver code. I've seen too many hacks out there where people have gone under the cover of the APIs they should be using, and headed straight for the low-level functionality - adding function prototypes to get at stuff they have no business doing. Moving this here is just asking for it to be abused. > + > +void kernel_force_cache_clean(struct page *page, size_t size) > +{ > + __dma_page_cpu_to_dev(page, 0, size, DMA_BIDIRECTIONAL); > +} > + > +void kernel_force_cache_invalidate(struct page *page, size_t size) > +{ > + __dma_page_dev_to_cpu(page, 0, size, DMA_BIDIRECTIONAL); > +} Nothing in our implementation of these DMA operations guarantees that those mean "clean" and "invalidate". The DMA operations are there so that CPUs can implement whatever they need at the map and unmap times - and I've been very careful not to specify which cache operations are involved. For example, on older CPUs, __dma_page_dev_to_cpu() is almost always a no-op. If you want something that does something specific, then we need something designed to do something specific. Please don't re-use what you think will fit. -- RMK's Patch system: http://www.armlinux.org.uk/developer/patches/ FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up according to speedtest.net. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [RFCv2][PATCH 2/5] arm: Implement ARCH_HAS_FORCE_CACHE
On 08/09/2016 05:13 PM, Laura Abbott wrote: > On 08/09/2016 02:56 PM, Florian Fainelli wrote: >> On 08/08/2016 10:49 AM, Laura Abbott wrote: >>> arm may need the kernel_force_cache APIs to guarantee data consistency. >>> Implement versions of these APIs based on the DMA APIs. >>> >>> Signed-off-by: Laura Abbott>>> --- >>> arch/arm/include/asm/cacheflush.h | 4 ++ >>> arch/arm/mm/dma-mapping.c | 119 >>> -- >>> arch/arm/mm/flush.c | 115 >>> >> >> Why is the code moved between dma-mapping.c and flush.c? It was not >> obvious while looking at these patches why this is needed. >> > > I wanted to use the cache flushing routines from dma-mapping.c and > it seemed better to pull them out vs. trying to put more generic > cache flushing code in dma-mapping.c. flush.c seemed like an > appropriate place although I forgot about the dependency on CONFIG_MMU. > It can certainly remain in dma-mapping.c if deemed appropriate. My concern is that this is an area of the kernel where you might be looking for stable backports, so avoiding churn in there is desireable and if the new cache APIs become accepted and standard, since they are building directly on top of the DMA-API, keeping them in dma-mapping.c seems consistent. My 2 cents. -- Florian ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [RFCv2][PATCH 2/5] arm: Implement ARCH_HAS_FORCE_CACHE
On 08/09/2016 02:56 PM, Florian Fainelli wrote: On 08/08/2016 10:49 AM, Laura Abbott wrote: arm may need the kernel_force_cache APIs to guarantee data consistency. Implement versions of these APIs based on the DMA APIs. Signed-off-by: Laura Abbott--- arch/arm/include/asm/cacheflush.h | 4 ++ arch/arm/mm/dma-mapping.c | 119 -- arch/arm/mm/flush.c | 115 Why is the code moved between dma-mapping.c and flush.c? It was not obvious while looking at these patches why this is needed. I wanted to use the cache flushing routines from dma-mapping.c and it seemed better to pull them out vs. trying to put more generic cache flushing code in dma-mapping.c. flush.c seemed like an appropriate place although I forgot about the dependency on CONFIG_MMU. It can certainly remain in dma-mapping.c if deemed appropriate. Thanks, Laura ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [RFCv2][PATCH 2/5] arm: Implement ARCH_HAS_FORCE_CACHE
On 08/08/2016 10:49 AM, Laura Abbott wrote: > arm may need the kernel_force_cache APIs to guarantee data consistency. > Implement versions of these APIs based on the DMA APIs. > > Signed-off-by: Laura Abbott> --- > arch/arm/include/asm/cacheflush.h | 4 ++ > arch/arm/mm/dma-mapping.c | 119 > -- > arch/arm/mm/flush.c | 115 Why is the code moved between dma-mapping.c and flush.c? It was not obvious while looking at these patches why this is needed. -- Florian ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[RFCv2][PATCH 2/5] arm: Implement ARCH_HAS_FORCE_CACHE
arm may need the kernel_force_cache APIs to guarantee data consistency. Implement versions of these APIs based on the DMA APIs. Signed-off-by: Laura Abbott--- arch/arm/include/asm/cacheflush.h | 4 ++ arch/arm/mm/dma-mapping.c | 119 -- arch/arm/mm/flush.c | 115 arch/arm/mm/mm.h | 8 +++ 4 files changed, 127 insertions(+), 119 deletions(-) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 9156fc3..78eb011 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -518,4 +518,8 @@ static inline void secure_flush_area(const void *addr, size_t size) outer_flush_range(phys, phys + size); } +#define ARCH_HAS_FORCE_CACHE 1 +void kernel_force_cache_clean(struct page *page, size_t size); +void kernel_force_cache_invalidate(struct page *page, size_t size); + #endif diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c6834c0..8c9296d 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -95,23 +95,6 @@ static struct arm_dma_buffer *arm_dma_buffer_find(void *virt) return found; } -/* - * The DMA API is built upon the notion of "buffer ownership". A buffer - * is either exclusively owned by the CPU (and therefore may be accessed - * by it) or exclusively owned by the DMA device. These helper functions - * represent the transitions between these two ownership states. - * - * Note, however, that on later ARMs, this notion does not work due to - * speculative prefetches. We model our approach on the assumption that - * the CPU does do speculative prefetches, which means we clean caches - * before transfers and delay cache invalidation until transfer completion. - * - */ -static void __dma_page_cpu_to_dev(struct page *, unsigned long, - size_t, enum dma_data_direction); -static void __dma_page_dev_to_cpu(struct page *, unsigned long, - size_t, enum dma_data_direction); - /** * arm_dma_map_page - map a portion of a page for streaming DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices @@ -945,108 +928,6 @@ int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, return 0; } -static void dma_cache_maint_page(struct page *page, unsigned long offset, - size_t size, enum dma_data_direction dir, - void (*op)(const void *, size_t, int)) -{ - unsigned long pfn; - size_t left = size; - - pfn = page_to_pfn(page) + offset / PAGE_SIZE; - offset %= PAGE_SIZE; - - /* -* A single sg entry may refer to multiple physically contiguous -* pages. But we still need to process highmem pages individually. -* If highmem is not configured then the bulk of this loop gets -* optimized out. -*/ - do { - size_t len = left; - void *vaddr; - - page = pfn_to_page(pfn); - - if (PageHighMem(page)) { - if (len + offset > PAGE_SIZE) - len = PAGE_SIZE - offset; - - if (cache_is_vipt_nonaliasing()) { - vaddr = kmap_atomic(page); - op(vaddr + offset, len, dir); - kunmap_atomic(vaddr); - } else { - vaddr = kmap_high_get(page); - if (vaddr) { - op(vaddr + offset, len, dir); - kunmap_high(page); - } - } - } else { - vaddr = page_address(page) + offset; - op(vaddr, len, dir); - } - offset = 0; - pfn++; - left -= len; - } while (left); -} - -/* - * Make an area consistent for devices. - * Note: Drivers should NOT use this function directly, as it will break - * platforms with CONFIG_DMABOUNCE. - * Use the driver DMA support - see dma-mapping.h (dma_sync_*) - */ -static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, - size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr; - - dma_cache_maint_page(page, off, size, dir, dmac_map_area); - - paddr = page_to_phys(page) + off; - if (dir == DMA_FROM_DEVICE) { - outer_inv_range(paddr, paddr + size); - } else { - outer_clean_range(paddr, paddr + size); - } - /* FIXME: non-speculating: flush on bidirectional mappings? */ -} - -static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, - size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr = page_to_phys(page) + off; - - /* FIXME: non-speculating: