Dear Marek Vasut,

On Fri, Jul 27, 2012 at 02:54:07 PM, Marek Vasut wrote:
> > This patch takes advantage of the hardware EHCI qTD queuing
> > mechanism to
> > avoid software overhead and to make transfers as fast as possible.
> > 
> > The only drawback is a call to memalign. However, this is fast
> > compared to
> > the transfer timings, and the heap size to allocate is small, e.g.
> > a
> > little bit more than 100 kB for a transfer length of 65535 packets
> > of 512
> > bytes.
> > 
> > Tested on i.MX25 and i.MX35. In my test conditions, the speedup was
> > about
> > 15x using page-aligned buffers, which is really appreciable when
> > accessing
> > large files.
> > 
> > Signed-off-by: Benoît Thébaudeau <benoit.thebaud...@advansee.com>
> > Cc: Marek Vasut <ma...@denx.de>
> > Cc: Ilya Yanok <ilya.ya...@cogentembedded.com>
> > Cc: Stefan Herbrechtsmeier <ste...@herbrechtsmeier.net>
> > ---
> >  .../drivers/usb/host/ehci-hcd.c                    |   94
> > ++++++++++++++------ 1 file changed, 65 insertions(+), 29
> > deletions(-)
> > 
> > diff --git u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c
> > u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c index
> > 5b3b906..b5645fa
> > 100644
> > --- u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c
> > +++ u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c
> > @@ -208,7 +208,8 @@ ehci_submit_async(struct usb_device *dev,
> > unsigned long
> > pipe, void *buffer, int length, struct devrequest *req)
> >  {
> >     ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN);
> > -   ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN);
> > +   struct qTD *qtd;
> > +   int qtd_count = 0;
> >     int qtd_counter = 0;
> > 
> >     volatile struct qTD *vtd;
> > @@ -229,8 +230,25 @@ ehci_submit_async(struct usb_device *dev,
> > unsigned
> > long pipe, void *buffer, le16_to_cpu(req->value),
> > le16_to_cpu(req->value),
> >                   le16_to_cpu(req->index));
> > 
> > +   if (req != NULL)                        /* SETUP + ACK */
> > +           qtd_count += 1 + 1;
> > +   if (length > 0 || req == NULL) {        /* buffer */
> > +           if ((uint32_t)buffer & 4095)            /* page-unaligned */
> > +                   qtd_count += (((uint32_t)buffer & 4095) + length +
> > +                                   (QT_BUFFER_CNT - 1) * 4096 - 1) /
> > +                                           ((QT_BUFFER_CNT - 1) * 4096);
> 
> Ok, maybe you can please elaborate on this crazy calculation in here?
> Or somehow
> clarify it? Also, won't the macros in include/common.h help in a way?
> (like
> ROUND() etc).

I have already posted a v2 for this specific patch (only 2/5) that does exactly
that. Please review only the latest versions of patches.

> I don't really graps what you're trying to calculate here, so maybe
> even a
> comment would help.

I'll do that.

> > +           else                                    /* page-aligned */
> > +                   qtd_count += (length + QT_BUFFER_CNT * 4096 - 1) /
> > +                                   (QT_BUFFER_CNT * 4096);
> 
> Same here, also please avoid using those 4096 and such constants ...
> maybe
> #define them in ehci.h ?

Yes. It was already everywhere, so I went on the same way. Do you think using
PAGE_SIZE from <linux/compat.h> would be fine since these 4096 are nothing more
than page sizes?

> > +   }
> > +   qtd = memalign(USB_DMA_MINALIGN, qtd_count * sizeof(struct qTD));
> 
> So your code can alloc more than 5 qTDs ? How does it chain them
> then? Into more
> QHs ?

It's done in exactly the same way as for the original 3 qTDs, only with more
qTDs, but still with 5 qt_buffers per qTD.

> > +   if (qtd == NULL) {
> > +           printf("unable to allocate TDs\n");
> > +           return -1;
> > +   }
> > +
> >     memset(qh, 0, sizeof(struct QH));
> > -   memset(qtd, 0, 3 * sizeof(*qtd));
> > +   memset(qtd, 0, qtd_count * sizeof(*qtd));
> > 
> >     toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe),
> >     usb_pipeout(pipe));
> > 
> > @@ -291,31 +309,46 @@ ehci_submit_async(struct usb_device *dev,
> > unsigned
> > long pipe, void *buffer, }
> > 
> >     if (length > 0 || req == NULL) {
> > -           /*
> > -            * Setup request qTD (3.5 in ehci-r10.pdf)
> > -            *
> > -            *   qt_next ................ 03-00 H
> > -            *   qt_altnext ............. 07-04 H
> > -            *   qt_token ............... 0B-08 H
> > -            *
> > -            *   [ buffer, buffer_hi ] loaded with "buffer".
> > -            */
> > -           qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
> > -           qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE);
> > -           token = (toggle << 31) |
> > -               (length << 16) |
> > -               ((req == NULL ? 1 : 0) << 15) |
> > -               (0 << 12) |
> > -               (3 << 10) |
> > -               ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0);
> > -           qtd[qtd_counter].qt_token = cpu_to_hc32(token);
> > -           if (ehci_td_buffer(&qtd[qtd_counter], buffer, length) != 0) {
> > -                   printf("unable construct DATA td\n");
> > -                   goto fail;
> > -           }
> > -           /* Update previous qTD! */
> > -           *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]);
> > -           tdp = &qtd[qtd_counter++].qt_next;
> > +           uint8_t *buf_ptr = buffer;
> > +           int left_length = length;
> > +
> > +           do {
> > +                   int xfr_bytes = min(left_length,
> > +                                       (QT_BUFFER_CNT * 4096 -
> > +                                        ((uint32_t)buf_ptr & 4095)) &
> > +                                       ~4095);
> 
> Magic formula yet again ... comment would again be welcome please.

OK.

> > +                   /*
> > +                    * Setup request qTD (3.5 in ehci-r10.pdf)
> > +                    *
> > +                    *   qt_next ................ 03-00 H
> > +                    *   qt_altnext ............. 07-04 H
> > +                    *   qt_token ............... 0B-08 H
> > +                    *
> > +                    *   [ buffer, buffer_hi ] loaded with "buffer".
> > +                    */
> > +                   qtd[qtd_counter].qt_next =
> > +                                   cpu_to_hc32(QT_NEXT_TERMINATE);
> > +                   qtd[qtd_counter].qt_altnext =
> > +                                   cpu_to_hc32(QT_NEXT_TERMINATE);
> > +                   token = (toggle << 31) |
> > +                       (xfr_bytes << 16) |
> > +                       ((req == NULL ? 1 : 0) << 15) |
> > +                       (0 << 12) |
> > +                       (3 << 10) |
> > +                       ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0);
> 
> If you could fix all this magic afterwards (not in these patches),
> that'd be
> great.

Do you only mean #defining all those values?

> > +                   qtd[qtd_counter].qt_token = cpu_to_hc32(token);
> > +                   if (ehci_td_buffer(&qtd[qtd_counter], buf_ptr,
> > +                                           xfr_bytes) != 0) {
> > +                           printf("unable construct DATA td\n");
> > +                           goto fail;
> > +                   }
> > +                   /* Update previous qTD! */
> > +                   *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]);
> > +                   tdp = &qtd[qtd_counter++].qt_next;
> > +                   buf_ptr += xfr_bytes;
> > +                   left_length -= xfr_bytes;
> > +           } while (left_length > 0);
> >     }
> > 
> >     if (req != NULL) {
> > @@ -346,7 +379,8 @@ ehci_submit_async(struct usb_device *dev,
> > unsigned long
> > pipe, void *buffer, flush_dcache_range((uint32_t)qh_list,
> >             ALIGN_END_ADDR(struct QH, qh_list, 1));
> >     flush_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh,
> >     1));
> > -   flush_dcache_range((uint32_t)qtd, ALIGN_END_ADDR(struct qTD, qtd,
> > 3));
> > +   flush_dcache_range((uint32_t)qtd,
> > +                      ALIGN_END_ADDR(struct qTD, qtd, qtd_count));
> > 
> >     /* Set async. queue head pointer. */
> >     ehci_writel(&hcor->or_asynclistaddr, (uint32_t)qh_list);
> > @@ -377,7 +411,7 @@ ehci_submit_async(struct usb_device *dev,
> > unsigned long
> > pipe, void *buffer, invalidate_dcache_range((uint32_t)qh,
> >                     ALIGN_END_ADDR(struct QH, qh, 1));
> >             invalidate_dcache_range((uint32_t)qtd,
> > -                   ALIGN_END_ADDR(struct qTD, qtd, 3));
> > +                   ALIGN_END_ADDR(struct qTD, qtd, qtd_count));
> > 
> >             token = hc32_to_cpu(vtd->qt_token);
> >             if (!(token & 0x80))
> > @@ -450,9 +484,11 @@ ehci_submit_async(struct usb_device *dev,
> > unsigned
> > long pipe, void *buffer, ehci_readl(&hcor->or_portsc[1]));
> >     }
> > 
> > +   free(qtd);
> >     return (dev->status != USB_ST_NOT_PROC) ? 0 : -1;
> > 
> >  fail:
> > +   free(qtd);
> >     return -1;
> >  }
> 

Best regards,
Benoît Thébaudeau
_______________________________________________
U-Boot mailing list
U-Boot@lists.denx.de
http://lists.denx.de/mailman/listinfo/u-boot

Reply via email to