Re: [PATCH 2/2] usb: cdns3: Optimize DMA request buffer allocation

2021-03-15 Thread Peter Chen
On 21-03-15 15:51:04, Sanket Parmar wrote:
> > > +
> > >   priv_req->flags |= REQUEST_UNALIGNED;
> > >   trace_cdns3_prepare_aligned_request(priv_req);
> > >
> > > @@ -3088,11 +3113,11 @@ static void cdns3_gadget_exit(struct cdns
> > *cdns)
> > >   struct cdns3_aligned_buf *buf;
> > >
> > >   buf = cdns3_next_align_buf(&priv_dev->aligned_buf_list);
> > > - dma_free_coherent(priv_dev->sysdev, buf->size,
> > > -   buf->buf,
> > > -   buf->dma);
> > > + dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> > > + buf->dir);
> > 
> > It only needs to DMA unmap after DMA has completed, this buf will not be
> > used, otherwise, the kfree below will cause issue.
> 
> This part is not clear.  Aligned DMA buffer is allocated and mapped in 
> cdns3_prepare_aligned_request_buf()
> and put into aligned_buf_list. While unloading the gadget, We need to undo 
> the same if aligned_buf_list is not
> empty.  Am I missing something here? 

My point is this unmap operation is useless since there is no user for
aligned buf, and it calls kfree afterwards. You could also keep it as it has
no harm.

> 
> Also, I will post v2 of this patch which uses dma_*_noncoherent APIs 
> suggested by Christoph Hellwig.

-- 

Thanks,
Peter Chen



RE: [PATCH 2/2] usb: cdns3: Optimize DMA request buffer allocation

2021-03-15 Thread Sanket Parmar
> 
> On 21-03-09 06:19:40, Sanket Parmar wrote:
> > dma_alloc_coherent() might fail on the platform with a small DMA region.
> >
> > To avoid such failure in cdns3_prepare_aligned_request_buf(),
> > dma_alloc_coherent() is replaced with kmalloc and dma_map API to
> > allocate aligned request buffer of dynamic length.
> >
> > Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
> 
> The comment with the 1st patch, it is not a bug-fix.

I will remove this. 

> 
> > Reported-by: Aswath Govindraju 
> > Signed-off-by: Sanket Parmar 
> > ---
> >  drivers/usb/cdns3/cdns3-gadget.c |   73 +--
> --
> >  drivers/usb/cdns3/cdns3-gadget.h |2 +
> >  2 files changed, 51 insertions(+), 24 deletions(-)
> >
> > diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-
> gadget.c
> > index 5f51215..b4955ce 100644
> > --- a/drivers/usb/cdns3/cdns3-gadget.c
> > +++ b/drivers/usb/cdns3/cdns3-gadget.c
> > @@ -818,10 +818,26 @@ void cdns3_gadget_giveback(struct
> cdns3_endpoint *priv_ep,
> > usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request,
> > priv_ep->dir);
> >
> > -   if ((priv_req->flags & REQUEST_UNALIGNED) &&
> > -   priv_ep->dir == USB_DIR_OUT && !request->status)
> > -   memcpy(request->buf, priv_req->aligned_buf->buf,
> > -  request->length);
> > +   if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req-
> >aligned_buf) {
> > +   struct cdns3_aligned_buf *buf;
> > +
> > +   buf = priv_req->aligned_buf;
> > +   dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> > +   buf->dir);
> > +   priv_req->flags &= ~REQUEST_UNALIGNED;
> > +
> > +   if (priv_ep->dir == USB_DIR_OUT && !request->status) {
> > +   memcpy(request->buf, priv_req->aligned_buf->buf,
> > +  request->length);
> > +   }
> > +
> > +   trace_cdns3_free_aligned_request(priv_req);
> > +   priv_req->aligned_buf->in_use = 0;
> > +   queue_work(system_freezable_wq,
> > +  &priv_dev->aligned_buf_wq);
> > +   priv_req->aligned_buf = NULL;
> > +
> > +   }
> >
> > priv_req->flags &= ~(REQUEST_PENDING | REQUEST_UNALIGNED);
> > /* All TRBs have finished, clear the counter */
> > @@ -883,8 +899,7 @@ static void cdns3_free_aligned_request_buf(struct
> work_struct *work)
> >  * interrupts.
> >  */
> > spin_unlock_irqrestore(&priv_dev->lock, flags);
> > -   dma_free_coherent(priv_dev->sysdev, buf->size,
> > - buf->buf, buf->dma);
> > +   kfree(buf->buf);
> > kfree(buf);
> > spin_lock_irqsave(&priv_dev->lock, flags);
> > }
> > @@ -910,27 +925,16 @@ static int
> cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
> > if (!buf)
> > return -ENOMEM;
> >
> > -   buf->size = priv_req->request.length;
> > +   buf->size = usb_endpoint_dir_out(priv_ep->endpoint.desc)
> ?
> > +   usb_ep_align(&(priv_ep->endpoint),
> priv_req->request.length)
> > +   : priv_req->request.length;
> >
> > -   buf->buf = dma_alloc_coherent(priv_dev->sysdev,
> > - buf->size,
> > - &buf->dma,
> > - GFP_ATOMIC);
> > +   buf->buf = kmalloc(buf->size, GFP_ATOMIC);
> > if (!buf->buf) {
> > kfree(buf);
> > return -ENOMEM;
> > }
> >
> > -   if (priv_req->aligned_buf) {
> > -   trace_cdns3_free_aligned_request(priv_req);
> > -   priv_req->aligned_buf->in_use = 0;
> > -   queue_work(system_freezable_wq,
> > -  &priv_dev->aligned_buf_wq);
> > -   }
> > -
> > -   buf->in_use = 1;
> > -   priv_req->aligned_buf = buf;
> > -
> > list_add_tail(&buf->list,
> >   &priv_dev->aligned_buf_list);
> > }
> > @@ -940,6 +944,27 @@ static int
> cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
> >priv_req->request.length);
> > }
> >
> > +   if (priv_req->aligned_buf) {
> > +   trace_cdns3_free_aligned_request(priv_req);
> > +   priv_req->aligned_buf->in_use = 0;
> > +   queue_work(system_freezable_wq,
> > +  &priv_dev->aligned_buf_wq);
> 
> @Pawel, do you remember when this condition is met?
> 
> > +   }
> > +
> > +   buf->dir =  priv_ep->dir ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
> > +   buf->in_use = 1;
> > +   priv_req->aligned_buf = buf;
> > +
> > +   buf->dma = dma_map_single(priv_dev->sysdev, buf->buf, buf-
> >size,
> > +

Re: [PATCH 2/2] usb: cdns3: Optimize DMA request buffer allocation

2021-03-13 Thread Peter Chen
On 21-03-09 06:19:40, Sanket Parmar wrote:
> dma_alloc_coherent() might fail on the platform with a small DMA region.
> 
> To avoid such failure in cdns3_prepare_aligned_request_buf(),
> dma_alloc_coherent() is replaced with kmalloc and dma_map API to
> allocate aligned request buffer of dynamic length.
> 
> Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")

The comment with the 1st patch, it is not a bug-fix.

> Reported-by: Aswath Govindraju 
> Signed-off-by: Sanket Parmar 
> ---
>  drivers/usb/cdns3/cdns3-gadget.c |   73 +
>  drivers/usb/cdns3/cdns3-gadget.h |2 +
>  2 files changed, 51 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/usb/cdns3/cdns3-gadget.c 
> b/drivers/usb/cdns3/cdns3-gadget.c
> index 5f51215..b4955ce 100644
> --- a/drivers/usb/cdns3/cdns3-gadget.c
> +++ b/drivers/usb/cdns3/cdns3-gadget.c
> @@ -818,10 +818,26 @@ void cdns3_gadget_giveback(struct cdns3_endpoint 
> *priv_ep,
>   usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request,
>   priv_ep->dir);
>  
> - if ((priv_req->flags & REQUEST_UNALIGNED) &&
> - priv_ep->dir == USB_DIR_OUT && !request->status)
> - memcpy(request->buf, priv_req->aligned_buf->buf,
> -request->length);
> + if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req->aligned_buf) {
> + struct cdns3_aligned_buf *buf;
> +
> + buf = priv_req->aligned_buf;
> + dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> + buf->dir);
> + priv_req->flags &= ~REQUEST_UNALIGNED;
> +
> + if (priv_ep->dir == USB_DIR_OUT && !request->status) {
> + memcpy(request->buf, priv_req->aligned_buf->buf,
> +request->length);
> + }
> +
> + trace_cdns3_free_aligned_request(priv_req);
> + priv_req->aligned_buf->in_use = 0;
> + queue_work(system_freezable_wq,
> +&priv_dev->aligned_buf_wq);
> + priv_req->aligned_buf = NULL;
> +
> + }
>  
>   priv_req->flags &= ~(REQUEST_PENDING | REQUEST_UNALIGNED);
>   /* All TRBs have finished, clear the counter */
> @@ -883,8 +899,7 @@ static void cdns3_free_aligned_request_buf(struct 
> work_struct *work)
>* interrupts.
>*/
>   spin_unlock_irqrestore(&priv_dev->lock, flags);
> - dma_free_coherent(priv_dev->sysdev, buf->size,
> -   buf->buf, buf->dma);
> + kfree(buf->buf);
>   kfree(buf);
>   spin_lock_irqsave(&priv_dev->lock, flags);
>   }
> @@ -910,27 +925,16 @@ static int cdns3_prepare_aligned_request_buf(struct 
> cdns3_request *priv_req)
>   if (!buf)
>   return -ENOMEM;
>  
> - buf->size = priv_req->request.length;
> + buf->size = usb_endpoint_dir_out(priv_ep->endpoint.desc) ?
> + usb_ep_align(&(priv_ep->endpoint), 
> priv_req->request.length)
> + : priv_req->request.length;
>  
> - buf->buf = dma_alloc_coherent(priv_dev->sysdev,
> -   buf->size,
> -   &buf->dma,
> -   GFP_ATOMIC);
> + buf->buf = kmalloc(buf->size, GFP_ATOMIC);
>   if (!buf->buf) {
>   kfree(buf);
>   return -ENOMEM;
>   }
>  
> - if (priv_req->aligned_buf) {
> - trace_cdns3_free_aligned_request(priv_req);
> - priv_req->aligned_buf->in_use = 0;
> - queue_work(system_freezable_wq,
> -&priv_dev->aligned_buf_wq);
> - }
> -
> - buf->in_use = 1;
> - priv_req->aligned_buf = buf;
> -
>   list_add_tail(&buf->list,
> &priv_dev->aligned_buf_list);
>   }
> @@ -940,6 +944,27 @@ static int cdns3_prepare_aligned_request_buf(struct 
> cdns3_request *priv_req)
>  priv_req->request.length);
>   }
>  
> + if (priv_req->aligned_buf) {
> + trace_cdns3_free_aligned_request(priv_req);
> + priv_req->aligned_buf->in_use = 0;
> + queue_work(system_freezable_wq,
> +&priv_dev->aligned_buf_wq);

@Pawel, do you remember when this condition is met?

> + }
> +
> + buf->dir =  priv_ep->dir ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
> + buf->in_use = 1;
> + priv_req->aligned_buf = buf;
> +
> + buf->dma = dma_map_single(priv_dev->sysdev, buf->buf, buf->size,
> + buf->dir);
> +
> + if (dma_mapping_error(priv_dev->sysdev, buf->dma)) {

RE: [PATCH 2/2] usb: cdns3: Optimize DMA request buffer allocation

2021-03-09 Thread Sanket Parmar
> On Tue, Mar 09, 2021 at 10:18:43AM +, Sanket Parmar wrote:
> > > On Tue, Mar 09, 2021 at 06:19:40AM +0100, Sanket Parmar wrote:
> > > > dma_alloc_coherent() might fail on the platform with a small DMA
> region.
> > > >
> > > > To avoid such failure in cdns3_prepare_aligned_request_buf(),
> > > > dma_alloc_coherent() is replaced with kmalloc and dma_map API to
> > > > allocate aligned request buffer of dynamic length.
> > >
> > > dma_alloc_noncoherent is the proper API instead of using kmalloc, which
> > > can lead to unaddressable memory that might require bounce buffering.
> >
> > cdns3 device required DMA coherent buffer to perform operations. So
> > dma_alloc_noncoherent will not help here.
> >
> > Also all gadget classes(except g_ether) use kmalloc to allocated request
> buffer,
> > and device driver uses usb_gadget_map_request_by_dev to map the
> request
> > buffer. Similar approach is used to allocate aligned buffer.
> 
> If you can use kmalloc and dma_map_single you can use
> dma_alloc_noncoherent per definition.

Okay. I was not aware of it. I will test it. 
Thank you for your feedback.

--
Sanket


Re: [PATCH 2/2] usb: cdns3: Optimize DMA request buffer allocation

2021-03-09 Thread Christoph Hellwig
On Tue, Mar 09, 2021 at 10:18:43AM +, Sanket Parmar wrote:
> > On Tue, Mar 09, 2021 at 06:19:40AM +0100, Sanket Parmar wrote:
> > > dma_alloc_coherent() might fail on the platform with a small DMA region.
> > >
> > > To avoid such failure in cdns3_prepare_aligned_request_buf(),
> > > dma_alloc_coherent() is replaced with kmalloc and dma_map API to
> > > allocate aligned request buffer of dynamic length.
> > 
> > dma_alloc_noncoherent is the proper API instead of using kmalloc, which
> > can lead to unaddressable memory that might require bounce buffering.
> 
> cdns3 device required DMA coherent buffer to perform operations. So 
> dma_alloc_noncoherent will not help here.
> 
> Also all gadget classes(except g_ether) use kmalloc to allocated request 
> buffer,
> and device driver uses usb_gadget_map_request_by_dev to map the request
> buffer. Similar approach is used to allocate aligned buffer. 

If you can use kmalloc and dma_map_single you can use
dma_alloc_noncoherent per definition.


RE: [PATCH 2/2] usb: cdns3: Optimize DMA request buffer allocation

2021-03-09 Thread Sanket Parmar
> On Tue, Mar 09, 2021 at 06:19:40AM +0100, Sanket Parmar wrote:
> > dma_alloc_coherent() might fail on the platform with a small DMA region.
> >
> > To avoid such failure in cdns3_prepare_aligned_request_buf(),
> > dma_alloc_coherent() is replaced with kmalloc and dma_map API to
> > allocate aligned request buffer of dynamic length.
> 
> dma_alloc_noncoherent is the proper API instead of using kmalloc, which
> can lead to unaddressable memory that might require bounce buffering.

cdns3 device required DMA coherent buffer to perform operations. So 
dma_alloc_noncoherent will not help here.

Also all gadget classes(except g_ether) use kmalloc to allocated request buffer,
and device driver uses usb_gadget_map_request_by_dev to map the request
buffer. Similar approach is used to allocate aligned buffer. 

Thanks,
Sanket


Re: [PATCH 2/2] usb: cdns3: Optimize DMA request buffer allocation

2021-03-09 Thread Christoph Hellwig
On Tue, Mar 09, 2021 at 06:19:40AM +0100, Sanket Parmar wrote:
> dma_alloc_coherent() might fail on the platform with a small DMA region.
> 
> To avoid such failure in cdns3_prepare_aligned_request_buf(),
> dma_alloc_coherent() is replaced with kmalloc and dma_map API to
> allocate aligned request buffer of dynamic length.

dma_alloc_noncoherent is the proper API instead of using kmalloc, which
can lead to unaddressable memory that might require bounce buffering.


[PATCH 2/2] usb: cdns3: Optimize DMA request buffer allocation

2021-03-08 Thread Sanket Parmar
dma_alloc_coherent() might fail on the platform with a small DMA region.

To avoid such failure in cdns3_prepare_aligned_request_buf(),
dma_alloc_coherent() is replaced with kmalloc and dma_map API to
allocate aligned request buffer of dynamic length.

Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
Reported-by: Aswath Govindraju 
Signed-off-by: Sanket Parmar 
---
 drivers/usb/cdns3/cdns3-gadget.c |   73 +
 drivers/usb/cdns3/cdns3-gadget.h |2 +
 2 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c
index 5f51215..b4955ce 100644
--- a/drivers/usb/cdns3/cdns3-gadget.c
+++ b/drivers/usb/cdns3/cdns3-gadget.c
@@ -818,10 +818,26 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep,
usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request,
priv_ep->dir);
 
-   if ((priv_req->flags & REQUEST_UNALIGNED) &&
-   priv_ep->dir == USB_DIR_OUT && !request->status)
-   memcpy(request->buf, priv_req->aligned_buf->buf,
-  request->length);
+   if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req->aligned_buf) {
+   struct cdns3_aligned_buf *buf;
+
+   buf = priv_req->aligned_buf;
+   dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
+   buf->dir);
+   priv_req->flags &= ~REQUEST_UNALIGNED;
+
+   if (priv_ep->dir == USB_DIR_OUT && !request->status) {
+   memcpy(request->buf, priv_req->aligned_buf->buf,
+  request->length);
+   }
+
+   trace_cdns3_free_aligned_request(priv_req);
+   priv_req->aligned_buf->in_use = 0;
+   queue_work(system_freezable_wq,
+  &priv_dev->aligned_buf_wq);
+   priv_req->aligned_buf = NULL;
+
+   }
 
priv_req->flags &= ~(REQUEST_PENDING | REQUEST_UNALIGNED);
/* All TRBs have finished, clear the counter */
@@ -883,8 +899,7 @@ static void cdns3_free_aligned_request_buf(struct 
work_struct *work)
 * interrupts.
 */
spin_unlock_irqrestore(&priv_dev->lock, flags);
-   dma_free_coherent(priv_dev->sysdev, buf->size,
- buf->buf, buf->dma);
+   kfree(buf->buf);
kfree(buf);
spin_lock_irqsave(&priv_dev->lock, flags);
}
@@ -910,27 +925,16 @@ static int cdns3_prepare_aligned_request_buf(struct 
cdns3_request *priv_req)
if (!buf)
return -ENOMEM;
 
-   buf->size = priv_req->request.length;
+   buf->size = usb_endpoint_dir_out(priv_ep->endpoint.desc) ?
+   usb_ep_align(&(priv_ep->endpoint), 
priv_req->request.length)
+   : priv_req->request.length;
 
-   buf->buf = dma_alloc_coherent(priv_dev->sysdev,
- buf->size,
- &buf->dma,
- GFP_ATOMIC);
+   buf->buf = kmalloc(buf->size, GFP_ATOMIC);
if (!buf->buf) {
kfree(buf);
return -ENOMEM;
}
 
-   if (priv_req->aligned_buf) {
-   trace_cdns3_free_aligned_request(priv_req);
-   priv_req->aligned_buf->in_use = 0;
-   queue_work(system_freezable_wq,
-  &priv_dev->aligned_buf_wq);
-   }
-
-   buf->in_use = 1;
-   priv_req->aligned_buf = buf;
-
list_add_tail(&buf->list,
  &priv_dev->aligned_buf_list);
}
@@ -940,6 +944,27 @@ static int cdns3_prepare_aligned_request_buf(struct 
cdns3_request *priv_req)
   priv_req->request.length);
}
 
+   if (priv_req->aligned_buf) {
+   trace_cdns3_free_aligned_request(priv_req);
+   priv_req->aligned_buf->in_use = 0;
+   queue_work(system_freezable_wq,
+  &priv_dev->aligned_buf_wq);
+   }
+
+   buf->dir =  priv_ep->dir ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+   buf->in_use = 1;
+   priv_req->aligned_buf = buf;
+
+   buf->dma = dma_map_single(priv_dev->sysdev, buf->buf, buf->size,
+   buf->dir);
+
+   if (dma_mapping_error(priv_dev->sysdev, buf->dma)) {
+   dev_err(priv_dev->dev, "Failed to map buffer\n");
+   kfree(buf->buf);
+   kfree(buf);
+   return -EFAULT;
+   }
+
priv_req->flags |= REQUEST_UNALIGNED;
trace_cdns3_pre