On Fri, May 05, 2017 at 03:19:07PM +0200, Mark Kettenis wrote: > To support sdhc(4) on the Firefly-RK3399, I actually need a working > bus_dmamap_sync(9) implementation that does proper cache flushes. So > far we've gotten away with not having one, because the USB stack tends > to set the BUS_DMA_COHERENT flag which maps memory into uncached > address space such that the cache flushes can be skipped. > > Diff below follows the approach taken by armv7. I've left out the > optimization where BUS_DMASYNC_PREREAD invalidates complete cache > lines instead of writing them back. I'm far from certain that this > optimization actually matters on your typical ARMv8 CPU. > > ok?
ok patrick@ > > > Index: bus_dma.c > =================================================================== > RCS file: /cvs/src/sys/arch/arm64/arm64/bus_dma.c,v > retrieving revision 1.6 > diff -u -p -r1.6 bus_dma.c > --- bus_dma.c 22 Feb 2017 22:55:27 -0000 1.6 > +++ bus_dma.c 5 May 2017 13:06:50 -0000 > @@ -63,9 +63,9 @@ > > #include <uvm/uvm_extern.h> > > -#include <machine/cpu.h> > - > #include <machine/bus.h> > +#include <machine/cpu.h> > +#include <machine/cpufunc.h> > > /* > * Common function for DMA map creation. May be called by bus-specific > @@ -351,6 +351,33 @@ _dmamap_unload(bus_dma_tag_t t, bus_dmam > map->dm_mapsize = 0; > } > > +static void > +_dmamap_sync_segment(vaddr_t va, vsize_t len, int ops) > +{ > + switch (ops) { > + case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE: > + case BUS_DMASYNC_PREREAD: > + cpu_dcache_wbinv_range(va, len); > + break; > + > + case BUS_DMASYNC_PREWRITE: > + cpu_dcache_wb_range(va, len); > + break; > + > + /* > + * Cortex CPUs can do speculative loads so we need to clean the cache > + * after a DMA read to deal with any speculatively loaded cache lines. > + * Since these can't be dirty, we can just invalidate them and don't > + * have to worry about having to write back their contents. > + */ > + case BUS_DMASYNC_POSTREAD: > + case BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE: > + membar_sync(); > + cpu_dcache_inv_range(va, len); > + break; > + } > +} > + > /* > * Common function for DMA map synchronization. May be called > * by bus-specific DMA map synchronization functions. > @@ -376,12 +403,10 @@ _dmamap_sync(bus_dma_tag_t t, bus_dmamap > curseg = 0; > > while (size && nsegs) { > - paddr_t paddr; > vaddr_t vaddr; > bus_size_t ssize; > > ssize = map->dm_segs[curseg].ds_len; > - paddr = map->dm_segs[curseg]._ds_paddr; > vaddr = map->dm_segs[curseg]._ds_vaddr; > > if (addr != 0) { > @@ -390,7 +415,6 @@ _dmamap_sync(bus_dma_tag_t t, bus_dmamap > ssize = 0; > } else { > vaddr += addr; > - paddr += addr; > ssize -= addr; > addr = 0; > } > @@ -399,21 +423,7 @@ _dmamap_sync(bus_dma_tag_t t, bus_dmamap > ssize = size; > > if (ssize != 0) { > - /* > - * If only PREWRITE is requested, writeback. > - * PREWRITE with PREREAD writebacks > - * and invalidates (if noncoherent) *all* cache levels. > - * Otherwise, just invalidate (if noncoherent). > - */ > - if (op & BUS_DMASYNC_PREWRITE) { > - if (op & BUS_DMASYNC_PREREAD) > - ; // XXX MUST ADD CACHEFLUSHING > - else > - ; // XXX MUST ADD CACHEFLUSHING > - } else > - if (op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_POSTREAD)) { > - ; // XXX MUST ADD CACHEFLUSHING > - } > + _dmamap_sync_segment(vaddr, ssize, op); > size -= ssize; > } > curseg++; >