Re: [PATCH] usb: cdns3: Coherent memory allocation optimization

2021-03-09 Thread Aswath Govindraju
Hi Peter,

On 09/03/21 11:09 am, Sanket Parmar wrote:
> Hi Peter,
> 
>> On 21-03-05 17:01:11, Sanket Parmar wrote:
>>> Allocation of DMA coherent memory in atomic context using
>>> dma_alloc_coherent() might fail on some platform. To fix it,
>>> Replaced dma_alloc_coherent() with dma_pool API to allocate a
>>> smaller chunk of DMA coherent memory for TRB rings.
>>>
>>> Also in cdns3_prepare_aligned_request_buf(), replaced
>>> dma_alloc_coherent() with kmalloc and dma_map API to allocate
>>> aligned request buffer of dynamic length.
>>>
>>
>> You do two changes in one commit, would you please split this one as
>> two patches?
>>
>>> Fixes: commit 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
>>
>> "commit" is not needed
>>
>>> Reported by: Aswath Govindraju 
>>
>> Reported-by:
>>
> I have split the change into two patches.
> New patch series has been posted already.
>  
>>> Signed-off-by: Sanket Parmar 
>>> ---
>>>  drivers/usb/cdns3/cdns3-gadget.c |  115 ++
>> ---
>>>  drivers/usb/cdns3/cdns3-gadget.h |3 +
>>>  2 files changed, 71 insertions(+), 47 deletions(-)
>>>
>>> diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-
>> gadget.c
>>> index 582bfec..5fd6993 100644
>>> --- a/drivers/usb/cdns3/cdns3-gadget.c
>>> +++ b/drivers/usb/cdns3/cdns3-gadget.c
>>> @@ -59,6 +59,7 @@
>>>  #include 
>>>  #include 
>>>  #include 
>>> +#include 
>>>  #include 
>>>
>>>  #include "core.h"
>>> @@ -190,29 +191,13 @@ dma_addr_t cdns3_trb_virt_to_dma(struct
>> cdns3_endpoint *priv_ep,
>>> return priv_ep->trb_pool_dma + offset;
>>>  }
>>>
>>> -static int cdns3_ring_size(struct cdns3_endpoint *priv_ep)
>>> -{
>>> -   switch (priv_ep->type) {
>>> -   case USB_ENDPOINT_XFER_ISOC:
>>> -   return TRB_ISO_RING_SIZE;
>>> -   case USB_ENDPOINT_XFER_CONTROL:
>>> -   return TRB_CTRL_RING_SIZE;
>>> -   default:
>>> -   if (priv_ep->use_streams)
>>> -   return TRB_STREAM_RING_SIZE;
>>> -   else
>>> -   return TRB_RING_SIZE;
>>> -   }
>>> -}
>>> -
>>>  static void cdns3_free_trb_pool(struct cdns3_endpoint *priv_ep)
>>>  {
>>> struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
>>>
>>> if (priv_ep->trb_pool) {
>>> -   dma_free_coherent(priv_dev->sysdev,
>>> - cdns3_ring_size(priv_ep),
>>> - priv_ep->trb_pool, priv_ep->trb_pool_dma);
>>> +   dma_pool_free(priv_dev->eps_dma_pool,
>>> +   priv_ep->trb_pool, priv_ep->trb_pool_dma);
>>> priv_ep->trb_pool = NULL;
>>> }
>>>  }
>>> @@ -226,7 +211,7 @@ static void cdns3_free_trb_pool(struct
>> cdns3_endpoint *priv_ep)
>>>  int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep)
>>>  {
>>> struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
>>> -   int ring_size = cdns3_ring_size(priv_ep);
>>> +   int ring_size = TRB_RING_SIZE;
>>
>> You will use the same size for TRB ring region for control/bulk/isoc
>> endpoint.
>>
> As single DMA pool is used to allocate the ring buffer, different TRB ring
> size is not possible for different EP. Hence, TRB_RING_SIZE(600 * 12 B) which 
> fits
> for all type of EPs is used.
> 
>>> int num_trbs = ring_size / TRB_SIZE;
>>> struct cdns3_trb *link_trb;
>>>
>>> @@ -234,10 +219,10 @@ int cdns3_allocate_trb_pool(struct
>> cdns3_endpoint *priv_ep)
>>> cdns3_free_trb_pool(priv_ep);
>>>
>>> if (!priv_ep->trb_pool) {
>>> -   priv_ep->trb_pool = dma_alloc_coherent(priv_dev->sysdev,
>>> -  ring_size,
>>> -  _ep->trb_pool_dma,
>>> -  GFP_DMA32 |
>> GFP_ATOMIC);
>>> +   priv_ep->trb_pool = dma_pool_alloc(priv_dev-
>>> eps_dma_pool,
>>> +   GFP_DMA32 | GFP_ATOMIC,
>>> +   _ep->trb_pool_dma);
>>
>> dma_pool_alloc also allocates the whole thunk of TRB region. See the
>> kernel-doc for dma_pool_create.
>>
>>  * Given one of these pools, dma_pool_alloc()
>>  * may be used to allocate memory.  Such memory will all have "consistent"
> 
> Yes,  dma_pool_alloc allocates the whole chunk of TRB region for enabled EPs 
> from the 
> pool. Currently the block size of the DMA pool is 7.2KB. So dma_pool_alloc 
> allocates at least
> 7.2KB for TRB region per enabled EP.
> 
>>> +
>>> if (!priv_ep->trb_pool)
>>> return -ENOMEM;
>>>
>>> @@ -833,10 +818,26 @@ void cdns3_gadget_giveback(struct
>> cdns3_endpoint *priv_ep,
>>> usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request,
>>> priv_ep->dir);
>>>
>>> -   if ((priv_req->flags & REQUEST_UNALIGNED) &&
>>> -   priv_ep->dir == USB_DIR_OUT && !request->status)
>>> -   memcpy(request->buf, priv_req->aligned_buf->buf,
>>> -  

RE: [PATCH] usb: cdns3: Coherent memory allocation optimization

2021-03-08 Thread Sanket Parmar
Hi Peter,

> On 21-03-05 17:01:11, Sanket Parmar wrote:
> > Allocation of DMA coherent memory in atomic context using
> > dma_alloc_coherent() might fail on some platform. To fix it,
> > Replaced dma_alloc_coherent() with dma_pool API to allocate a
> > smaller chunk of DMA coherent memory for TRB rings.
> >
> > Also in cdns3_prepare_aligned_request_buf(), replaced
> > dma_alloc_coherent() with kmalloc and dma_map API to allocate
> > aligned request buffer of dynamic length.
> >
> 
> You do two changes in one commit, would you please split this one as
> two patches?
> 
> > Fixes: commit 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
> 
> "commit" is not needed
> 
> > Reported by: Aswath Govindraju 
> 
> Reported-by:
>
I have split the change into two patches.
New patch series has been posted already.
 
> > Signed-off-by: Sanket Parmar 
> > ---
> >  drivers/usb/cdns3/cdns3-gadget.c |  115 ++
> ---
> >  drivers/usb/cdns3/cdns3-gadget.h |3 +
> >  2 files changed, 71 insertions(+), 47 deletions(-)
> >
> > diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-
> gadget.c
> > index 582bfec..5fd6993 100644
> > --- a/drivers/usb/cdns3/cdns3-gadget.c
> > +++ b/drivers/usb/cdns3/cdns3-gadget.c
> > @@ -59,6 +59,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >
> >  #include "core.h"
> > @@ -190,29 +191,13 @@ dma_addr_t cdns3_trb_virt_to_dma(struct
> cdns3_endpoint *priv_ep,
> > return priv_ep->trb_pool_dma + offset;
> >  }
> >
> > -static int cdns3_ring_size(struct cdns3_endpoint *priv_ep)
> > -{
> > -   switch (priv_ep->type) {
> > -   case USB_ENDPOINT_XFER_ISOC:
> > -   return TRB_ISO_RING_SIZE;
> > -   case USB_ENDPOINT_XFER_CONTROL:
> > -   return TRB_CTRL_RING_SIZE;
> > -   default:
> > -   if (priv_ep->use_streams)
> > -   return TRB_STREAM_RING_SIZE;
> > -   else
> > -   return TRB_RING_SIZE;
> > -   }
> > -}
> > -
> >  static void cdns3_free_trb_pool(struct cdns3_endpoint *priv_ep)
> >  {
> > struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
> >
> > if (priv_ep->trb_pool) {
> > -   dma_free_coherent(priv_dev->sysdev,
> > - cdns3_ring_size(priv_ep),
> > - priv_ep->trb_pool, priv_ep->trb_pool_dma);
> > +   dma_pool_free(priv_dev->eps_dma_pool,
> > +   priv_ep->trb_pool, priv_ep->trb_pool_dma);
> > priv_ep->trb_pool = NULL;
> > }
> >  }
> > @@ -226,7 +211,7 @@ static void cdns3_free_trb_pool(struct
> cdns3_endpoint *priv_ep)
> >  int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep)
> >  {
> > struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
> > -   int ring_size = cdns3_ring_size(priv_ep);
> > +   int ring_size = TRB_RING_SIZE;
> 
> You will use the same size for TRB ring region for control/bulk/isoc
> endpoint.
> 
As single DMA pool is used to allocate the ring buffer, different TRB ring
size is not possible for different EP. Hence, TRB_RING_SIZE(600 * 12 B) which 
fits
for all type of EPs is used.

> > int num_trbs = ring_size / TRB_SIZE;
> > struct cdns3_trb *link_trb;
> >
> > @@ -234,10 +219,10 @@ int cdns3_allocate_trb_pool(struct
> cdns3_endpoint *priv_ep)
> > cdns3_free_trb_pool(priv_ep);
> >
> > if (!priv_ep->trb_pool) {
> > -   priv_ep->trb_pool = dma_alloc_coherent(priv_dev->sysdev,
> > -  ring_size,
> > -  _ep->trb_pool_dma,
> > -  GFP_DMA32 |
> GFP_ATOMIC);
> > +   priv_ep->trb_pool = dma_pool_alloc(priv_dev-
> >eps_dma_pool,
> > +   GFP_DMA32 | GFP_ATOMIC,
> > +   _ep->trb_pool_dma);
> 
> dma_pool_alloc also allocates the whole thunk of TRB region. See the
> kernel-doc for dma_pool_create.
> 
>  * Given one of these pools, dma_pool_alloc()
>  * may be used to allocate memory.  Such memory will all have "consistent"

Yes,  dma_pool_alloc allocates the whole chunk of TRB region for enabled EPs 
from the 
pool. Currently the block size of the DMA pool is 7.2KB. So dma_pool_alloc 
allocates at least
7.2KB for TRB region per enabled EP.

> > +
> > if (!priv_ep->trb_pool)
> > return -ENOMEM;
> >
> > @@ -833,10 +818,26 @@ void cdns3_gadget_giveback(struct
> cdns3_endpoint *priv_ep,
> > usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request,
> > priv_ep->dir);
> >
> > -   if ((priv_req->flags & REQUEST_UNALIGNED) &&
> > -   priv_ep->dir == USB_DIR_OUT && !request->status)
> > -   memcpy(request->buf, priv_req->aligned_buf->buf,
> > -  request->length);
> > +   if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req-
> >aligned_buf) {
> > +   

Re: [PATCH] usb: cdns3: Coherent memory allocation optimization

2021-03-05 Thread Peter Chen
On 21-03-05 17:01:11, Sanket Parmar wrote:
> Allocation of DMA coherent memory in atomic context using
> dma_alloc_coherent() might fail on some platform. To fix it,
> Replaced dma_alloc_coherent() with dma_pool API to allocate a
> smaller chunk of DMA coherent memory for TRB rings.
> 
> Also in cdns3_prepare_aligned_request_buf(), replaced
> dma_alloc_coherent() with kmalloc and dma_map API to allocate
> aligned request buffer of dynamic length.
> 

You do two changes in one commit, would you please split this one as
two patches?

> Fixes: commit 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")

"commit" is not needed

> Reported by: Aswath Govindraju 

Reported-by:

> Signed-off-by: Sanket Parmar 
> ---
>  drivers/usb/cdns3/cdns3-gadget.c |  115 ++---
>  drivers/usb/cdns3/cdns3-gadget.h |3 +
>  2 files changed, 71 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/usb/cdns3/cdns3-gadget.c 
> b/drivers/usb/cdns3/cdns3-gadget.c
> index 582bfec..5fd6993 100644
> --- a/drivers/usb/cdns3/cdns3-gadget.c
> +++ b/drivers/usb/cdns3/cdns3-gadget.c
> @@ -59,6 +59,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include "core.h"
> @@ -190,29 +191,13 @@ dma_addr_t cdns3_trb_virt_to_dma(struct cdns3_endpoint 
> *priv_ep,
>   return priv_ep->trb_pool_dma + offset;
>  }
>  
> -static int cdns3_ring_size(struct cdns3_endpoint *priv_ep)
> -{
> - switch (priv_ep->type) {
> - case USB_ENDPOINT_XFER_ISOC:
> - return TRB_ISO_RING_SIZE;
> - case USB_ENDPOINT_XFER_CONTROL:
> - return TRB_CTRL_RING_SIZE;
> - default:
> - if (priv_ep->use_streams)
> - return TRB_STREAM_RING_SIZE;
> - else
> - return TRB_RING_SIZE;
> - }
> -}
> -
>  static void cdns3_free_trb_pool(struct cdns3_endpoint *priv_ep)
>  {
>   struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
>  
>   if (priv_ep->trb_pool) {
> - dma_free_coherent(priv_dev->sysdev,
> -   cdns3_ring_size(priv_ep),
> -   priv_ep->trb_pool, priv_ep->trb_pool_dma);
> + dma_pool_free(priv_dev->eps_dma_pool,
> + priv_ep->trb_pool, priv_ep->trb_pool_dma);
>   priv_ep->trb_pool = NULL;
>   }
>  }
> @@ -226,7 +211,7 @@ static void cdns3_free_trb_pool(struct cdns3_endpoint 
> *priv_ep)
>  int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep)
>  {
>   struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
> - int ring_size = cdns3_ring_size(priv_ep);
> + int ring_size = TRB_RING_SIZE;

You will use the same size for TRB ring region for control/bulk/isoc
endpoint.

>   int num_trbs = ring_size / TRB_SIZE;
>   struct cdns3_trb *link_trb;
>  
> @@ -234,10 +219,10 @@ int cdns3_allocate_trb_pool(struct cdns3_endpoint 
> *priv_ep)
>   cdns3_free_trb_pool(priv_ep);
>  
>   if (!priv_ep->trb_pool) {
> - priv_ep->trb_pool = dma_alloc_coherent(priv_dev->sysdev,
> -ring_size,
> -_ep->trb_pool_dma,
> -GFP_DMA32 | GFP_ATOMIC);
> + priv_ep->trb_pool = dma_pool_alloc(priv_dev->eps_dma_pool,
> + GFP_DMA32 | GFP_ATOMIC,
> + _ep->trb_pool_dma);

dma_pool_alloc also allocates the whole thunk of TRB region. See the
kernel-doc for dma_pool_create.

 * Given one of these pools, dma_pool_alloc()
 * may be used to allocate memory.  Such memory will all have "consistent"
> +
>   if (!priv_ep->trb_pool)
>   return -ENOMEM;
>  
> @@ -833,10 +818,26 @@ void cdns3_gadget_giveback(struct cdns3_endpoint 
> *priv_ep,
>   usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request,
>   priv_ep->dir);
>  
> - if ((priv_req->flags & REQUEST_UNALIGNED) &&
> - priv_ep->dir == USB_DIR_OUT && !request->status)
> - memcpy(request->buf, priv_req->aligned_buf->buf,
> -request->length);
> + if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req->aligned_buf) {
> + struct cdns3_aligned_buf *buf;
> +
> + buf = priv_req->aligned_buf;
> + dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> + buf->dir);
> + priv_req->flags &= ~REQUEST_UNALIGNED;
> +
> + if (priv_ep->dir == USB_DIR_OUT && !request->status) {
> + memcpy(request->buf, priv_req->aligned_buf->buf,
> +request->length);
> + }
> +
> + trace_cdns3_free_aligned_request(priv_req);
> + priv_req->aligned_buf->in_use = 0;
> + queue_work(system_freezable_wq,
> +