> ensure that transmit and receive buffers are cache-line aligned > invalidate cache after each packet received > flush cache before transmitting > > Signed-off-by: Eric Nelson <eric.nel...@boundarydevices.com> > --- > drivers/net/fec_mxc.c | 248 > ++++++++++++++++++++++++++++++++++++------------- drivers/net/fec_mxc.h | > 19 +---- > 2 files changed, 184 insertions(+), 83 deletions(-) > > diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c > index 1fdd071..f72304b 100644 > --- a/drivers/net/fec_mxc.c > +++ b/drivers/net/fec_mxc.c > @@ -50,6 +50,33 @@ DECLARE_GLOBAL_DATA_PTR; > #define CONFIG_FEC_MXC_SWAP_PACKET > #endif > > +#ifndef CONFIG_FEC_DESC_ALIGNMENT > +#define CONFIG_FEC_DESC_ALIGNMENT ARCH_DMA_MINALIGN > +#endif > + > +#ifndef CONFIG_FEC_DATA_ALIGNMENT > +#define CONFIG_FEC_DATA_ALIGNMENT ARCH_DMA_MINALIGN > +#endif > + > +/* Check various alignment issues at compile time */ > +#if ((CONFIG_FEC_DESC_ALIGNMENT < 16) || (CONFIG_FEC_DESC_ALIGNMENT % 16 > != 0)) +#error "CONFIG_FEC_DESC_ALIGNMENT must be multiple of 16!" > +#endif > + > +#if ((CONFIG_FEC_DATA_ALIGNMENT < 16) || (CONFIG_FEC_DATA_ALIGNMENT % 16 > != 0)) +#error "CONFIG_FEC_DATA_ALIGNMENT must be multiple of 16!" > +#endif > + > +#if ((PKTALIGN < CONFIG_FEC_DATA_ALIGNMENT) || \ > + (PKTALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0)) > +#error "PKTALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!" > +#endif > + > +#if ((PKTSIZE_ALIGN < CONFIG_FEC_DATA_ALIGNMENT) || \ > + (PKTSIZE_ALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0)) > +#error "PKTSIZE_ALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!" > +#endif > + > #undef DEBUG > > struct nbuf { > @@ -259,43 +286,47 @@ static int fec_tx_task_disable(struct fec_priv *fec) > * Initialize receive task's buffer descriptors > * @param[in] fec all we know about the device yet > * @param[in] count receive buffer count to be allocated > - * @param[in] size size of each receive buffer > + * @param[in] dsize desired size of each receive buffer > * @return 0 on success > * > * For this task we need additional memory for the data buffers. And each > * data buffer requires some alignment. Thy must be aligned to a specific > - * boundary each (DB_DATA_ALIGNMENT). > + * boundary each. > */ > -static int fec_rbd_init(struct fec_priv *fec, int count, int size) > +static int fec_rbd_init(struct fec_priv *fec, int count, int dsize) > { > - int ix; > - uint32_t p = 0; > - > - /* reserve data memory and consider alignment */ > - if (fec->rdb_ptr == NULL) > - fec->rdb_ptr = malloc(size * count + DB_DATA_ALIGNMENT); > - p = (uint32_t)fec->rdb_ptr; > - if (!p) { > - puts("fec_mxc: not enough malloc memory\n"); > - return -ENOMEM; > - } > - memset((void *)p, 0, size * count + DB_DATA_ALIGNMENT); > - p += DB_DATA_ALIGNMENT-1; > - p &= ~(DB_DATA_ALIGNMENT-1); > - > - for (ix = 0; ix < count; ix++) { > - writel(p, &fec->rbd_base[ix].data_pointer); > - p += size; > - writew(FEC_RBD_EMPTY, &fec->rbd_base[ix].status); > - writew(0, &fec->rbd_base[ix].data_length); > - } > + uint32_t size; > + int i; > + > /* > - * mark the last RBD to close the ring > + * Allocate memory for the buffers. This allocation respects the > + * alignment > */ > - writew(FEC_RBD_WRAP | FEC_RBD_EMPTY, &fec->rbd_base[ix - 1].status); > + size = roundup(dsize, CONFIG_FEC_DATA_ALIGNMENT); > + for (i = 0; i < count; i++) { > + if (0 == fec->rbd_base[i].data_pointer) { > + uint8_t *data = memalign(CONFIG_FEC_DATA_ALIGNMENT, size); > + if (!data) { > + printf("%s: error allocating rxbuf %d\n", __func__, i); > + goto err; > + } > + fec->rbd_base[i].data_pointer = (uint32_t)data; > + } // needs allocation
Wrong comment > + fec->rbd_base[i].status = FEC_RBD_EMPTY; > + fec->rbd_base[i].data_length = 0; > + } > + > + /* Mark the last RBD to close the ring. */ > + fec->rbd_base[i - 1].status = FEC_RBD_EMPTY | FEC_RBD_WRAP; > fec->rbd_index = 0; > > return 0; > + > +err: > + for (; i >= 0; i--) > + free((uint8_t *)fec->rbd_base[i].data_pointer); > + > + return -ENOMEM; > } > > /** > @@ -312,8 +343,8 @@ static int fec_rbd_init(struct fec_priv *fec, int > count, int size) */ > static void fec_tbd_init(struct fec_priv *fec) > { > - writew(0x0000, &fec->tbd_base[0].status); > - writew(FEC_TBD_WRAP, &fec->tbd_base[1].status); > + fec->tbd_base[0].status = 0; > + fec->tbd_base[1].status = FEC_TBD_WRAP; > fec->tbd_index = 0; > } > > @@ -387,12 +418,25 @@ static int fec_open(struct eth_device *edev) > { > struct fec_priv *fec = (struct fec_priv *)edev->priv; > int speed; > + uint32_t addr, size; > + int i; > > debug("fec_open: fec_open(dev)\n"); > /* full-duplex, heartbeat disabled */ > writel(1 << 2, &fec->eth->x_cntrl); > fec->rbd_index = 0; > > + /* Invalidate all descriptors */ > + for (i = 0; i < FEC_RBD_NUM - 1; i++) > + fec_rbd_clean(0, &fec->rbd_base[i]); > + fec_rbd_clean(1, &fec->rbd_base[i]); > + > + /* Flush the descriptors into RAM */ > + size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd), > + CONFIG_FEC_DATA_ALIGNMENT); > + addr = (uint32_t)&fec->rbd_base[0]; addr = (uint32_t)fec->rbd_base; would likely work too? > + flush_dcache_range(addr, addr + size); > + > #ifdef FEC_QUIRK_ENET_MAC > /* Enable ENET HW endian SWAP */ > writel(readl(&fec->eth->ecntrl) | FEC_ECNTRL_DBSWAP, > @@ -478,38 +522,44 @@ static int fec_open(struct eth_device *edev) > > static int fec_init(struct eth_device *dev, bd_t* bd) > { > - uint32_t base; > struct fec_priv *fec = (struct fec_priv *)dev->priv; > uint32_t mib_ptr = (uint32_t)&fec->eth->rmon_t_drop; > uint32_t rcntrl; > - int i; > + uint32_t size; > + int i, ret; > > /* Initialize MAC address */ > fec_set_hwaddr(dev); > > /* > - * reserve memory for both buffer descriptor chains at once > - * Datasheet forces the startaddress of each chain is 16 byte > - * aligned > + * Allocate transmit descriptors, there are two in total. This > + * allocation respects cache alignment. > */ > - if (fec->base_ptr == NULL) > - fec->base_ptr = malloc((2 + FEC_RBD_NUM) * > - sizeof(struct fec_bd) + DB_ALIGNMENT); > - base = (uint32_t)fec->base_ptr; > - if (!base) { > - puts("fec_mxc: not enough malloc memory\n"); > - return -ENOMEM; > + if (!fec->tbd_base) { > + size = roundup(2 * sizeof(struct fec_bd), > + CONFIG_FEC_DATA_ALIGNMENT); > + fec->tbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size); > + if (!fec->tbd_base) { > + ret = -ENOMEM; > + goto err1; > + } > + memset(fec->tbd_base, 0, size); > } > - memset((void *)base, 0, (2 + FEC_RBD_NUM) * > - sizeof(struct fec_bd) + DB_ALIGNMENT); > - base += (DB_ALIGNMENT-1); > - base &= ~(DB_ALIGNMENT-1); > - > - fec->rbd_base = (struct fec_bd *)base; > - > - base += FEC_RBD_NUM * sizeof(struct fec_bd); > > - fec->tbd_base = (struct fec_bd *)base; > + /* > + * Allocate receive descriptors. This allocation respects cache > + * alignment. > + */ > + if (!fec->rbd_base) { > + size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd), > + CONFIG_FEC_DATA_ALIGNMENT); > + fec->rbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size); > + if (!fec->rbd_base) { > + ret = -ENOMEM; > + goto err2; > + } > + memset(fec->rbd_base, 0, size); > + } > > /* > * Set interrupt mask register > @@ -570,9 +620,8 @@ static int fec_init(struct eth_device *dev, bd_t* bd) > * Initialize RxBD/TxBD rings > */ > if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE) < 0) { > - free(fec->base_ptr); > - fec->base_ptr = NULL; > - return -ENOMEM; > + ret = -ENOMEM; > + goto err3; > } > fec_tbd_init(fec); > > @@ -583,6 +632,13 @@ static int fec_init(struct eth_device *dev, bd_t* bd) > #endif > fec_open(dev); > return 0; > + > +err3: > + free(fec->rbd_base); > +err2: > + free(fec->tbd_base); > +err1: > + return ret; > } > > /** > @@ -631,9 +687,11 @@ static void fec_halt(struct eth_device *dev) > * @param[in] length Data count in bytes > * @return 0 on success > */ > -static int fec_send(struct eth_device *dev, volatile void* packet, int > length) +static int fec_send(struct eth_device *dev, volatile void > *packet, int length) { > unsigned int status; > + uint32_t size; > + uint32_t addr; > > /* > * This routine transmits one frame. This routine only accepts > @@ -650,15 +708,21 @@ static int fec_send(struct eth_device *dev, volatile > void* packet, int length) } > > /* > - * Setup the transmit buffer > - * Note: We are always using the first buffer for transmission, > - * the second will be empty and only used to stop the DMA engine > + * Setup the transmit buffer. We are always using the first buffer for > + * transmission, the second will be empty and only used to stop the DMA > + * engine. We also flush the packet to RAM here to avoid cache trouble. > */ > #ifdef CONFIG_FEC_MXC_SWAP_PACKET > swap_packet((uint32_t *)packet, length); > #endif > - writew(length, &fec->tbd_base[fec->tbd_index].data_length); > - writel((uint32_t)packet, &fec->tbd_base[fec->tbd_index].data_pointer); > + > + addr = (uint32_t)packet; > + size = roundup(length, CONFIG_FEC_DATA_ALIGNMENT); > + flush_dcache_range(addr, addr + size); What if size of the packet isn't aligned on cacheline boundary? Won't it choke here? > + > + fec->tbd_base[fec->tbd_index].data_length = length; > + fec->tbd_base[fec->tbd_index].data_pointer = addr; > + > /* > * update BD's status now > * This block: > @@ -667,9 +731,18 @@ static int fec_send(struct eth_device *dev, volatile > void* packet, int length) * - might be the last BD in the list, so the > address counter should * wrap (-> keep the WRAP flag) > */ > - status = readw(&fec->tbd_base[fec->tbd_index].status) & FEC_TBD_WRAP; > + status = fec->tbd_base[fec->tbd_index].status & FEC_TBD_WRAP; > status |= FEC_TBD_LAST | FEC_TBD_TC | FEC_TBD_READY; > - writew(status, &fec->tbd_base[fec->tbd_index].status); > + fec->tbd_base[fec->tbd_index].status = status; > + > + /* > + * Flush data cache. This code flushes both TX descriptors to RAM. > + * After this code this code, the descritors will be safely in RAM > + * and we can start DMA. > + */ > + size = roundup(2 * sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT); > + addr = (uint32_t)fec->tbd_base; > + flush_dcache_range(addr, addr + size); Same concern here and everywhere else ... I believe aligning it like this is quite unsafe :-( > > /* > * Enable SmartDMA transmit task > @@ -677,11 +750,16 @@ static int fec_send(struct eth_device *dev, volatile > void* packet, int length) fec_tx_task_enable(fec); > > /* > - * wait until frame is sent . > + * Wait until frame is sent. On each turn of the wait cycle, we must > + * invalidate data cache to see what's really in RAM. Also, we need > + * barrier here. > */ > + invalidate_dcache_range(addr, addr + size); > while (readw(&fec->tbd_base[fec->tbd_index].status) & FEC_TBD_READY) { > udelay(1); > + invalidate_dcache_range(addr, addr + size); > } > + > debug("fec_send: status 0x%x index %d\n", > readw(&fec->tbd_base[fec->tbd_index].status), > fec->tbd_index); > @@ -707,6 +785,8 @@ static int fec_recv(struct eth_device *dev) > int frame_length, len = 0; > struct nbuf *frame; > uint16_t bd_status; > + uint32_t addr, size; > + int i; > uchar buff[FEC_MAX_PKT_SIZE]; > > /* > @@ -737,8 +817,23 @@ static int fec_recv(struct eth_device *dev) > } > > /* > - * ensure reading the right buffer status > + * Read the buffer status. Before the status can be read, the data cache > + * must be invalidated, because the data in RAM might have been changed > + * by DMA. The descriptors are properly aligned to cachelines so there's > + * no need to worry they'd overlap. > + * > + * WARNING: By invalidating the descriptor here, we also invalidate > + * the descriptors surrounding this one. Therefore we can NOT change the > + * contents of this descriptor nor the surrounding ones. The problem is > + * that in order to mark the descriptor as processed, we need to change > + * the descriptor. The solution is to mark the whole cache line when all > + * descriptors in the cache line are processed. > */ > + addr = (uint32_t)rbd; > + addr &= ~(CONFIG_FEC_DATA_ALIGNMENT - 1); > + size = roundup(sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT); > + invalidate_dcache_range(addr, addr + size); > + > bd_status = readw(&rbd->status); > debug("fec_recv: status 0x%x\n", bd_status); > > @@ -751,6 +846,13 @@ static int fec_recv(struct eth_device *dev) > frame = (struct nbuf *)readl(&rbd->data_pointer); > frame_length = readw(&rbd->data_length) - 4; > /* > + * Invalidate data cache over the buffer > + */ > + addr = (uint32_t)frame; > + size = roundup(frame_length, CONFIG_FEC_DATA_ALIGNMENT); > + invalidate_dcache_range(addr, addr + size); DTTO here, frame length might not be aligned properly, or will it be? Network stack must be properly analyzed here. > + > + /* > * Fill the buffer and pass it to upper layers > */ > #ifdef CONFIG_FEC_MXC_SWAP_PACKET > @@ -765,11 +867,27 @@ static int fec_recv(struct eth_device *dev) > (ulong)rbd->data_pointer, > bd_status); > } > + > /* > - * free the current buffer, restart the engine > - * and move forward to the next buffer > + * Free the current buffer, restart the engine and move forward > + * to the next buffer. Here we check if the whole cacheline of > + * descriptors was already processed and if so, we mark it free > + * as whole. > */ > - fec_rbd_clean(fec->rbd_index == (FEC_RBD_NUM - 1) ? 1 : 0, rbd); > + size = (CONFIG_FEC_DATA_ALIGNMENT / sizeof(struct fec_bd)) - 1; > + if ((fec->rbd_index & size) == size) { > + i = fec->rbd_index - size; > + addr = (uint32_t)&fec->rbd_base[i]; > + for (; i <= fec->rbd_index + size; i++) { > + if (i == (FEC_RBD_NUM - 1)) > + fec_rbd_clean(1, &fec->rbd_base[i]); > + else > + fec_rbd_clean(0, &fec->rbd_base[i]); > + } > + flush_dcache_range(addr, > + addr + CONFIG_FEC_DATA_ALIGNMENT); > + } > + This was the worst part. I don't quite remember how and why I did those alignment decisions (well it's obvious here, you flush rxdesc once whole cacheline is done), but some of the pieces are far less obvious now that I read the code. > fec_rx_task_enable(fec); > fec->rbd_index = (fec->rbd_index + 1) % FEC_RBD_NUM; > } > diff --git a/drivers/net/fec_mxc.h b/drivers/net/fec_mxc.h > index 2eb7803..852b2e0 100644 > --- a/drivers/net/fec_mxc.h > +++ b/drivers/net/fec_mxc.h > @@ -234,22 +234,6 @@ struct ethernet_regs { > #endif > > /** > - * @brief Descriptor buffer alignment > - * > - * i.MX27 requires a 16 byte alignment (but for the first element only) > - */ > -#define DB_ALIGNMENT 16 > - > -/** > - * @brief Data buffer alignment > - * > - * i.MX27 requires a four byte alignment for transmit and 16 bits > - * alignment for receive so take 16 > - * Note: Valid for member data_pointer in struct buffer_descriptor > - */ > -#define DB_DATA_ALIGNMENT 16 > - > -/** > * @brief Receive & Transmit Buffer Descriptor definitions > * > * Note: The first BD must be aligned (see DB_ALIGNMENT) > @@ -282,8 +266,7 @@ struct fec_priv { > struct fec_bd *tbd_base; /* TBD ring */ > int tbd_index; /* next transmit BD to write */ > bd_t *bd; > - void *rdb_ptr; > - void *base_ptr; > + uint8_t *tdb_ptr; > int dev_id; > int phy_id; > struct mii_dev *bus; _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot