Re: [PATCH] usb: cdns3: Coherent memory allocation optimization
Hi Peter, On 09/03/21 11:09 am, Sanket Parmar wrote: > Hi Peter, > >> On 21-03-05 17:01:11, Sanket Parmar wrote: >>> Allocation of DMA coherent memory in atomic context using >>> dma_alloc_coherent() might fail on some platform. To fix it, >>> Replaced dma_alloc_coherent() with dma_pool API to allocate a >>> smaller chunk of DMA coherent memory for TRB rings. >>> >>> Also in cdns3_prepare_aligned_request_buf(), replaced >>> dma_alloc_coherent() with kmalloc and dma_map API to allocate >>> aligned request buffer of dynamic length. >>> >> >> You do two changes in one commit, would you please split this one as >> two patches? >> >>> Fixes: commit 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") >> >> "commit" is not needed >> >>> Reported by: Aswath Govindraju >> >> Reported-by: >> > I have split the change into two patches. > New patch series has been posted already. > >>> Signed-off-by: Sanket Parmar >>> --- >>> drivers/usb/cdns3/cdns3-gadget.c | 115 ++ >> --- >>> drivers/usb/cdns3/cdns3-gadget.h |3 + >>> 2 files changed, 71 insertions(+), 47 deletions(-) >>> >>> diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3- >> gadget.c >>> index 582bfec..5fd6993 100644 >>> --- a/drivers/usb/cdns3/cdns3-gadget.c >>> +++ b/drivers/usb/cdns3/cdns3-gadget.c >>> @@ -59,6 +59,7 @@ >>> #include >>> #include >>> #include >>> +#include >>> #include >>> >>> #include "core.h" >>> @@ -190,29 +191,13 @@ dma_addr_t cdns3_trb_virt_to_dma(struct >> cdns3_endpoint *priv_ep, >>> return priv_ep->trb_pool_dma + offset; >>> } >>> >>> -static int cdns3_ring_size(struct cdns3_endpoint *priv_ep) >>> -{ >>> - switch (priv_ep->type) { >>> - case USB_ENDPOINT_XFER_ISOC: >>> - return TRB_ISO_RING_SIZE; >>> - case USB_ENDPOINT_XFER_CONTROL: >>> - return TRB_CTRL_RING_SIZE; >>> - default: >>> - if (priv_ep->use_streams) >>> - return TRB_STREAM_RING_SIZE; >>> - else >>> - return TRB_RING_SIZE; >>> - } >>> -} >>> - >>> static void cdns3_free_trb_pool(struct cdns3_endpoint *priv_ep) >>> { >>> struct cdns3_device *priv_dev = priv_ep->cdns3_dev; >>> >>> if (priv_ep->trb_pool) { >>> - dma_free_coherent(priv_dev->sysdev, >>> - cdns3_ring_size(priv_ep), >>> - priv_ep->trb_pool, priv_ep->trb_pool_dma); >>> + dma_pool_free(priv_dev->eps_dma_pool, >>> + priv_ep->trb_pool, priv_ep->trb_pool_dma); >>> priv_ep->trb_pool = NULL; >>> } >>> } >>> @@ -226,7 +211,7 @@ static void cdns3_free_trb_pool(struct >> cdns3_endpoint *priv_ep) >>> int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep) >>> { >>> struct cdns3_device *priv_dev = priv_ep->cdns3_dev; >>> - int ring_size = cdns3_ring_size(priv_ep); >>> + int ring_size = TRB_RING_SIZE; >> >> You will use the same size for TRB ring region for control/bulk/isoc >> endpoint. >> > As single DMA pool is used to allocate the ring buffer, different TRB ring > size is not possible for different EP. Hence, TRB_RING_SIZE(600 * 12 B) which > fits > for all type of EPs is used. > >>> int num_trbs = ring_size / TRB_SIZE; >>> struct cdns3_trb *link_trb; >>> >>> @@ -234,10 +219,10 @@ int cdns3_allocate_trb_pool(struct >> cdns3_endpoint *priv_ep) >>> cdns3_free_trb_pool(priv_ep); >>> >>> if (!priv_ep->trb_pool) { >>> - priv_ep->trb_pool = dma_alloc_coherent(priv_dev->sysdev, >>> - ring_size, >>> - _ep->trb_pool_dma, >>> - GFP_DMA32 | >> GFP_ATOMIC); >>> + priv_ep->trb_pool = dma_pool_alloc(priv_dev- >>> eps_dma_pool, >>> + GFP_DMA32 | GFP_ATOMIC, >>> + _ep->trb_pool_dma); >> >> dma_pool_alloc also allocates the whole thunk of TRB region. See the >> kernel-doc for dma_pool_create. >> >> * Given one of these pools, dma_pool_alloc() >> * may be used to allocate memory. Such memory will all have "consistent" > > Yes, dma_pool_alloc allocates the whole chunk of TRB region for enabled EPs > from the > pool. Currently the block size of the DMA pool is 7.2KB. So dma_pool_alloc > allocates at least > 7.2KB for TRB region per enabled EP. > >>> + >>> if (!priv_ep->trb_pool) >>> return -ENOMEM; >>> >>> @@ -833,10 +818,26 @@ void cdns3_gadget_giveback(struct >> cdns3_endpoint *priv_ep, >>> usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request, >>> priv_ep->dir); >>> >>> - if ((priv_req->flags & REQUEST_UNALIGNED) && >>> - priv_ep->dir == USB_DIR_OUT && !request->status) >>> - memcpy(request->buf, priv_req->aligned_buf->buf, >>> -
RE: [PATCH] usb: cdns3: Coherent memory allocation optimization
Hi Peter, > On 21-03-05 17:01:11, Sanket Parmar wrote: > > Allocation of DMA coherent memory in atomic context using > > dma_alloc_coherent() might fail on some platform. To fix it, > > Replaced dma_alloc_coherent() with dma_pool API to allocate a > > smaller chunk of DMA coherent memory for TRB rings. > > > > Also in cdns3_prepare_aligned_request_buf(), replaced > > dma_alloc_coherent() with kmalloc and dma_map API to allocate > > aligned request buffer of dynamic length. > > > > You do two changes in one commit, would you please split this one as > two patches? > > > Fixes: commit 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") > > "commit" is not needed > > > Reported by: Aswath Govindraju > > Reported-by: > I have split the change into two patches. New patch series has been posted already. > > Signed-off-by: Sanket Parmar > > --- > > drivers/usb/cdns3/cdns3-gadget.c | 115 ++ > --- > > drivers/usb/cdns3/cdns3-gadget.h |3 + > > 2 files changed, 71 insertions(+), 47 deletions(-) > > > > diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3- > gadget.c > > index 582bfec..5fd6993 100644 > > --- a/drivers/usb/cdns3/cdns3-gadget.c > > +++ b/drivers/usb/cdns3/cdns3-gadget.c > > @@ -59,6 +59,7 @@ > > #include > > #include > > #include > > +#include > > #include > > > > #include "core.h" > > @@ -190,29 +191,13 @@ dma_addr_t cdns3_trb_virt_to_dma(struct > cdns3_endpoint *priv_ep, > > return priv_ep->trb_pool_dma + offset; > > } > > > > -static int cdns3_ring_size(struct cdns3_endpoint *priv_ep) > > -{ > > - switch (priv_ep->type) { > > - case USB_ENDPOINT_XFER_ISOC: > > - return TRB_ISO_RING_SIZE; > > - case USB_ENDPOINT_XFER_CONTROL: > > - return TRB_CTRL_RING_SIZE; > > - default: > > - if (priv_ep->use_streams) > > - return TRB_STREAM_RING_SIZE; > > - else > > - return TRB_RING_SIZE; > > - } > > -} > > - > > static void cdns3_free_trb_pool(struct cdns3_endpoint *priv_ep) > > { > > struct cdns3_device *priv_dev = priv_ep->cdns3_dev; > > > > if (priv_ep->trb_pool) { > > - dma_free_coherent(priv_dev->sysdev, > > - cdns3_ring_size(priv_ep), > > - priv_ep->trb_pool, priv_ep->trb_pool_dma); > > + dma_pool_free(priv_dev->eps_dma_pool, > > + priv_ep->trb_pool, priv_ep->trb_pool_dma); > > priv_ep->trb_pool = NULL; > > } > > } > > @@ -226,7 +211,7 @@ static void cdns3_free_trb_pool(struct > cdns3_endpoint *priv_ep) > > int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep) > > { > > struct cdns3_device *priv_dev = priv_ep->cdns3_dev; > > - int ring_size = cdns3_ring_size(priv_ep); > > + int ring_size = TRB_RING_SIZE; > > You will use the same size for TRB ring region for control/bulk/isoc > endpoint. > As single DMA pool is used to allocate the ring buffer, different TRB ring size is not possible for different EP. Hence, TRB_RING_SIZE(600 * 12 B) which fits for all type of EPs is used. > > int num_trbs = ring_size / TRB_SIZE; > > struct cdns3_trb *link_trb; > > > > @@ -234,10 +219,10 @@ int cdns3_allocate_trb_pool(struct > cdns3_endpoint *priv_ep) > > cdns3_free_trb_pool(priv_ep); > > > > if (!priv_ep->trb_pool) { > > - priv_ep->trb_pool = dma_alloc_coherent(priv_dev->sysdev, > > - ring_size, > > - _ep->trb_pool_dma, > > - GFP_DMA32 | > GFP_ATOMIC); > > + priv_ep->trb_pool = dma_pool_alloc(priv_dev- > >eps_dma_pool, > > + GFP_DMA32 | GFP_ATOMIC, > > + _ep->trb_pool_dma); > > dma_pool_alloc also allocates the whole thunk of TRB region. See the > kernel-doc for dma_pool_create. > > * Given one of these pools, dma_pool_alloc() > * may be used to allocate memory. Such memory will all have "consistent" Yes, dma_pool_alloc allocates the whole chunk of TRB region for enabled EPs from the pool. Currently the block size of the DMA pool is 7.2KB. So dma_pool_alloc allocates at least 7.2KB for TRB region per enabled EP. > > + > > if (!priv_ep->trb_pool) > > return -ENOMEM; > > > > @@ -833,10 +818,26 @@ void cdns3_gadget_giveback(struct > cdns3_endpoint *priv_ep, > > usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request, > > priv_ep->dir); > > > > - if ((priv_req->flags & REQUEST_UNALIGNED) && > > - priv_ep->dir == USB_DIR_OUT && !request->status) > > - memcpy(request->buf, priv_req->aligned_buf->buf, > > - request->length); > > + if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req- > >aligned_buf) { > > +
Re: [PATCH] usb: cdns3: Coherent memory allocation optimization
On 21-03-05 17:01:11, Sanket Parmar wrote: > Allocation of DMA coherent memory in atomic context using > dma_alloc_coherent() might fail on some platform. To fix it, > Replaced dma_alloc_coherent() with dma_pool API to allocate a > smaller chunk of DMA coherent memory for TRB rings. > > Also in cdns3_prepare_aligned_request_buf(), replaced > dma_alloc_coherent() with kmalloc and dma_map API to allocate > aligned request buffer of dynamic length. > You do two changes in one commit, would you please split this one as two patches? > Fixes: commit 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") "commit" is not needed > Reported by: Aswath Govindraju Reported-by: > Signed-off-by: Sanket Parmar > --- > drivers/usb/cdns3/cdns3-gadget.c | 115 ++--- > drivers/usb/cdns3/cdns3-gadget.h |3 + > 2 files changed, 71 insertions(+), 47 deletions(-) > > diff --git a/drivers/usb/cdns3/cdns3-gadget.c > b/drivers/usb/cdns3/cdns3-gadget.c > index 582bfec..5fd6993 100644 > --- a/drivers/usb/cdns3/cdns3-gadget.c > +++ b/drivers/usb/cdns3/cdns3-gadget.c > @@ -59,6 +59,7 @@ > #include > #include > #include > +#include > #include > > #include "core.h" > @@ -190,29 +191,13 @@ dma_addr_t cdns3_trb_virt_to_dma(struct cdns3_endpoint > *priv_ep, > return priv_ep->trb_pool_dma + offset; > } > > -static int cdns3_ring_size(struct cdns3_endpoint *priv_ep) > -{ > - switch (priv_ep->type) { > - case USB_ENDPOINT_XFER_ISOC: > - return TRB_ISO_RING_SIZE; > - case USB_ENDPOINT_XFER_CONTROL: > - return TRB_CTRL_RING_SIZE; > - default: > - if (priv_ep->use_streams) > - return TRB_STREAM_RING_SIZE; > - else > - return TRB_RING_SIZE; > - } > -} > - > static void cdns3_free_trb_pool(struct cdns3_endpoint *priv_ep) > { > struct cdns3_device *priv_dev = priv_ep->cdns3_dev; > > if (priv_ep->trb_pool) { > - dma_free_coherent(priv_dev->sysdev, > - cdns3_ring_size(priv_ep), > - priv_ep->trb_pool, priv_ep->trb_pool_dma); > + dma_pool_free(priv_dev->eps_dma_pool, > + priv_ep->trb_pool, priv_ep->trb_pool_dma); > priv_ep->trb_pool = NULL; > } > } > @@ -226,7 +211,7 @@ static void cdns3_free_trb_pool(struct cdns3_endpoint > *priv_ep) > int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep) > { > struct cdns3_device *priv_dev = priv_ep->cdns3_dev; > - int ring_size = cdns3_ring_size(priv_ep); > + int ring_size = TRB_RING_SIZE; You will use the same size for TRB ring region for control/bulk/isoc endpoint. > int num_trbs = ring_size / TRB_SIZE; > struct cdns3_trb *link_trb; > > @@ -234,10 +219,10 @@ int cdns3_allocate_trb_pool(struct cdns3_endpoint > *priv_ep) > cdns3_free_trb_pool(priv_ep); > > if (!priv_ep->trb_pool) { > - priv_ep->trb_pool = dma_alloc_coherent(priv_dev->sysdev, > -ring_size, > -_ep->trb_pool_dma, > -GFP_DMA32 | GFP_ATOMIC); > + priv_ep->trb_pool = dma_pool_alloc(priv_dev->eps_dma_pool, > + GFP_DMA32 | GFP_ATOMIC, > + _ep->trb_pool_dma); dma_pool_alloc also allocates the whole thunk of TRB region. See the kernel-doc for dma_pool_create. * Given one of these pools, dma_pool_alloc() * may be used to allocate memory. Such memory will all have "consistent" > + > if (!priv_ep->trb_pool) > return -ENOMEM; > > @@ -833,10 +818,26 @@ void cdns3_gadget_giveback(struct cdns3_endpoint > *priv_ep, > usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request, > priv_ep->dir); > > - if ((priv_req->flags & REQUEST_UNALIGNED) && > - priv_ep->dir == USB_DIR_OUT && !request->status) > - memcpy(request->buf, priv_req->aligned_buf->buf, > -request->length); > + if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req->aligned_buf) { > + struct cdns3_aligned_buf *buf; > + > + buf = priv_req->aligned_buf; > + dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size, > + buf->dir); > + priv_req->flags &= ~REQUEST_UNALIGNED; > + > + if (priv_ep->dir == USB_DIR_OUT && !request->status) { > + memcpy(request->buf, priv_req->aligned_buf->buf, > +request->length); > + } > + > + trace_cdns3_free_aligned_request(priv_req); > + priv_req->aligned_buf->in_use = 0; > + queue_work(system_freezable_wq, > +