Dear Marek Vasut, On Fri, Jul 27, 2012 at 02:54:07 PM, Marek Vasut wrote: > > This patch takes advantage of the hardware EHCI qTD queuing > > mechanism to > > avoid software overhead and to make transfers as fast as possible. > > > > The only drawback is a call to memalign. However, this is fast > > compared to > > the transfer timings, and the heap size to allocate is small, e.g. > > a > > little bit more than 100 kB for a transfer length of 65535 packets > > of 512 > > bytes. > > > > Tested on i.MX25 and i.MX35. In my test conditions, the speedup was > > about > > 15x using page-aligned buffers, which is really appreciable when > > accessing > > large files. > > > > Signed-off-by: Benoît Thébaudeau <benoit.thebaud...@advansee.com> > > Cc: Marek Vasut <ma...@denx.de> > > Cc: Ilya Yanok <ilya.ya...@cogentembedded.com> > > Cc: Stefan Herbrechtsmeier <ste...@herbrechtsmeier.net> > > --- > > .../drivers/usb/host/ehci-hcd.c | 94 > > ++++++++++++++------ 1 file changed, 65 insertions(+), 29 > > deletions(-) > > > > diff --git u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c > > u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c index > > 5b3b906..b5645fa > > 100644 > > --- u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c > > +++ u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c > > @@ -208,7 +208,8 @@ ehci_submit_async(struct usb_device *dev, > > unsigned long > > pipe, void *buffer, int length, struct devrequest *req) > > { > > ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN); > > - ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN); > > + struct qTD *qtd; > > + int qtd_count = 0; > > int qtd_counter = 0; > > > > volatile struct qTD *vtd; > > @@ -229,8 +230,25 @@ ehci_submit_async(struct usb_device *dev, > > unsigned > > long pipe, void *buffer, le16_to_cpu(req->value), > > le16_to_cpu(req->value), > > le16_to_cpu(req->index)); > > > > + if (req != NULL) /* SETUP + ACK */ > > + qtd_count += 1 + 1; > > + if (length > 0 || req == NULL) { /* buffer */ > > + if ((uint32_t)buffer & 4095) /* page-unaligned */ > > + qtd_count += (((uint32_t)buffer & 4095) + length + > > + (QT_BUFFER_CNT - 1) * 4096 - 1) / > > + ((QT_BUFFER_CNT - 1) * 4096); > > Ok, maybe you can please elaborate on this crazy calculation in here? > Or somehow > clarify it? Also, won't the macros in include/common.h help in a way? > (like > ROUND() etc).
I have already posted a v2 for this specific patch (only 2/5) that does exactly that. Please review only the latest versions of patches. > I don't really graps what you're trying to calculate here, so maybe > even a > comment would help. I'll do that. > > + else /* page-aligned */ > > + qtd_count += (length + QT_BUFFER_CNT * 4096 - 1) / > > + (QT_BUFFER_CNT * 4096); > > Same here, also please avoid using those 4096 and such constants ... > maybe > #define them in ehci.h ? Yes. It was already everywhere, so I went on the same way. Do you think using PAGE_SIZE from <linux/compat.h> would be fine since these 4096 are nothing more than page sizes? > > + } > > + qtd = memalign(USB_DMA_MINALIGN, qtd_count * sizeof(struct qTD)); > > So your code can alloc more than 5 qTDs ? How does it chain them > then? Into more > QHs ? It's done in exactly the same way as for the original 3 qTDs, only with more qTDs, but still with 5 qt_buffers per qTD. > > + if (qtd == NULL) { > > + printf("unable to allocate TDs\n"); > > + return -1; > > + } > > + > > memset(qh, 0, sizeof(struct QH)); > > - memset(qtd, 0, 3 * sizeof(*qtd)); > > + memset(qtd, 0, qtd_count * sizeof(*qtd)); > > > > toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), > > usb_pipeout(pipe)); > > > > @@ -291,31 +309,46 @@ ehci_submit_async(struct usb_device *dev, > > unsigned > > long pipe, void *buffer, } > > > > if (length > 0 || req == NULL) { > > - /* > > - * Setup request qTD (3.5 in ehci-r10.pdf) > > - * > > - * qt_next ................ 03-00 H > > - * qt_altnext ............. 07-04 H > > - * qt_token ............... 0B-08 H > > - * > > - * [ buffer, buffer_hi ] loaded with "buffer". > > - */ > > - qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); > > - qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); > > - token = (toggle << 31) | > > - (length << 16) | > > - ((req == NULL ? 1 : 0) << 15) | > > - (0 << 12) | > > - (3 << 10) | > > - ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0); > > - qtd[qtd_counter].qt_token = cpu_to_hc32(token); > > - if (ehci_td_buffer(&qtd[qtd_counter], buffer, length) != 0) { > > - printf("unable construct DATA td\n"); > > - goto fail; > > - } > > - /* Update previous qTD! */ > > - *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]); > > - tdp = &qtd[qtd_counter++].qt_next; > > + uint8_t *buf_ptr = buffer; > > + int left_length = length; > > + > > + do { > > + int xfr_bytes = min(left_length, > > + (QT_BUFFER_CNT * 4096 - > > + ((uint32_t)buf_ptr & 4095)) & > > + ~4095); > > Magic formula yet again ... comment would again be welcome please. OK. > > + /* > > + * Setup request qTD (3.5 in ehci-r10.pdf) > > + * > > + * qt_next ................ 03-00 H > > + * qt_altnext ............. 07-04 H > > + * qt_token ............... 0B-08 H > > + * > > + * [ buffer, buffer_hi ] loaded with "buffer". > > + */ > > + qtd[qtd_counter].qt_next = > > + cpu_to_hc32(QT_NEXT_TERMINATE); > > + qtd[qtd_counter].qt_altnext = > > + cpu_to_hc32(QT_NEXT_TERMINATE); > > + token = (toggle << 31) | > > + (xfr_bytes << 16) | > > + ((req == NULL ? 1 : 0) << 15) | > > + (0 << 12) | > > + (3 << 10) | > > + ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0); > > If you could fix all this magic afterwards (not in these patches), > that'd be > great. Do you only mean #defining all those values? > > + qtd[qtd_counter].qt_token = cpu_to_hc32(token); > > + if (ehci_td_buffer(&qtd[qtd_counter], buf_ptr, > > + xfr_bytes) != 0) { > > + printf("unable construct DATA td\n"); > > + goto fail; > > + } > > + /* Update previous qTD! */ > > + *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]); > > + tdp = &qtd[qtd_counter++].qt_next; > > + buf_ptr += xfr_bytes; > > + left_length -= xfr_bytes; > > + } while (left_length > 0); > > } > > > > if (req != NULL) { > > @@ -346,7 +379,8 @@ ehci_submit_async(struct usb_device *dev, > > unsigned long > > pipe, void *buffer, flush_dcache_range((uint32_t)qh_list, > > ALIGN_END_ADDR(struct QH, qh_list, 1)); > > flush_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, > > 1)); > > - flush_dcache_range((uint32_t)qtd, ALIGN_END_ADDR(struct qTD, qtd, > > 3)); > > + flush_dcache_range((uint32_t)qtd, > > + ALIGN_END_ADDR(struct qTD, qtd, qtd_count)); > > > > /* Set async. queue head pointer. */ > > ehci_writel(&hcor->or_asynclistaddr, (uint32_t)qh_list); > > @@ -377,7 +411,7 @@ ehci_submit_async(struct usb_device *dev, > > unsigned long > > pipe, void *buffer, invalidate_dcache_range((uint32_t)qh, > > ALIGN_END_ADDR(struct QH, qh, 1)); > > invalidate_dcache_range((uint32_t)qtd, > > - ALIGN_END_ADDR(struct qTD, qtd, 3)); > > + ALIGN_END_ADDR(struct qTD, qtd, qtd_count)); > > > > token = hc32_to_cpu(vtd->qt_token); > > if (!(token & 0x80)) > > @@ -450,9 +484,11 @@ ehci_submit_async(struct usb_device *dev, > > unsigned > > long pipe, void *buffer, ehci_readl(&hcor->or_portsc[1])); > > } > > > > + free(qtd); > > return (dev->status != USB_ST_NOT_PROC) ? 0 : -1; > > > > fail: > > + free(qtd); > > return -1; > > } > Best regards, Benoît Thébaudeau _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot