>       ensure that transmit and receive buffers are cache-line aligned
>         invalidate cache after each packet received
>         flush cache before transmitting
> 
> Signed-off-by: Eric Nelson <eric.nel...@boundarydevices.com>
> ---
>  drivers/net/fec_mxc.c |  248
> ++++++++++++++++++++++++++++++++++++------------- drivers/net/fec_mxc.h | 
>  19 +----
>  2 files changed, 184 insertions(+), 83 deletions(-)
> 
> diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c
> index 1fdd071..f72304b 100644
> --- a/drivers/net/fec_mxc.c
> +++ b/drivers/net/fec_mxc.c
> @@ -50,6 +50,33 @@ DECLARE_GLOBAL_DATA_PTR;
>  #define      CONFIG_FEC_MXC_SWAP_PACKET
>  #endif
> 
> +#ifndef      CONFIG_FEC_DESC_ALIGNMENT
> +#define      CONFIG_FEC_DESC_ALIGNMENT       ARCH_DMA_MINALIGN
> +#endif
> +
> +#ifndef      CONFIG_FEC_DATA_ALIGNMENT
> +#define      CONFIG_FEC_DATA_ALIGNMENT       ARCH_DMA_MINALIGN
> +#endif
> +
> +/* Check various alignment issues at compile time */
> +#if ((CONFIG_FEC_DESC_ALIGNMENT < 16) || (CONFIG_FEC_DESC_ALIGNMENT % 16
> != 0)) +#error        "CONFIG_FEC_DESC_ALIGNMENT must be multiple of 16!"
> +#endif
> +
> +#if ((CONFIG_FEC_DATA_ALIGNMENT < 16) || (CONFIG_FEC_DATA_ALIGNMENT % 16
> != 0)) +#error        "CONFIG_FEC_DATA_ALIGNMENT must be multiple of 16!"
> +#endif
> +
> +#if ((PKTALIGN < CONFIG_FEC_DATA_ALIGNMENT) || \
> +     (PKTALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0))
> +#error       "PKTALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!"
> +#endif
> +
> +#if ((PKTSIZE_ALIGN < CONFIG_FEC_DATA_ALIGNMENT) || \
> +     (PKTSIZE_ALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0))
> +#error       "PKTSIZE_ALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!"
> +#endif
> +
>  #undef DEBUG
> 
>  struct nbuf {
> @@ -259,43 +286,47 @@ static int fec_tx_task_disable(struct fec_priv *fec)
>   * Initialize receive task's buffer descriptors
>   * @param[in] fec all we know about the device yet
>   * @param[in] count receive buffer count to be allocated
> - * @param[in] size size of each receive buffer
> + * @param[in] dsize desired size of each receive buffer
>   * @return 0 on success
>   *
>   * For this task we need additional memory for the data buffers. And each
>   * data buffer requires some alignment. Thy must be aligned to a specific
> - * boundary each (DB_DATA_ALIGNMENT).
> + * boundary each.
>   */
> -static int fec_rbd_init(struct fec_priv *fec, int count, int size)
> +static int fec_rbd_init(struct fec_priv *fec, int count, int dsize)
>  {
> -     int ix;
> -     uint32_t p = 0;
> -
> -     /* reserve data memory and consider alignment */
> -     if (fec->rdb_ptr == NULL)
> -             fec->rdb_ptr = malloc(size * count + DB_DATA_ALIGNMENT);
> -     p = (uint32_t)fec->rdb_ptr;
> -     if (!p) {
> -             puts("fec_mxc: not enough malloc memory\n");
> -             return -ENOMEM;
> -     }
> -     memset((void *)p, 0, size * count + DB_DATA_ALIGNMENT);
> -     p += DB_DATA_ALIGNMENT-1;
> -     p &= ~(DB_DATA_ALIGNMENT-1);
> -
> -     for (ix = 0; ix < count; ix++) {
> -             writel(p, &fec->rbd_base[ix].data_pointer);
> -             p += size;
> -             writew(FEC_RBD_EMPTY, &fec->rbd_base[ix].status);
> -             writew(0, &fec->rbd_base[ix].data_length);
> -     }
> +     uint32_t size;
> +     int i;
> +
>       /*
> -      * mark the last RBD to close the ring
> +      * Allocate memory for the buffers. This allocation respects the
> +      * alignment
>        */
> -     writew(FEC_RBD_WRAP | FEC_RBD_EMPTY, &fec->rbd_base[ix - 1].status);
> +     size = roundup(dsize, CONFIG_FEC_DATA_ALIGNMENT);
> +     for (i = 0; i < count; i++) {
> +             if (0 == fec->rbd_base[i].data_pointer) {
> +                     uint8_t *data = memalign(CONFIG_FEC_DATA_ALIGNMENT, 
size);
> +                     if (!data) {
> +                             printf("%s: error allocating rxbuf %d\n", 
__func__, i);
> +                             goto err;
> +                     }
> +                     fec->rbd_base[i].data_pointer = (uint32_t)data;
> +             } // needs allocation

Wrong comment

> +             fec->rbd_base[i].status = FEC_RBD_EMPTY;
> +             fec->rbd_base[i].data_length = 0;
> +     }
> +
> +     /* Mark the last RBD to close the ring. */
> +     fec->rbd_base[i - 1].status = FEC_RBD_EMPTY | FEC_RBD_WRAP;
>       fec->rbd_index = 0;
> 
>       return 0;
> +
> +err:
> +     for (; i >= 0; i--)
> +             free((uint8_t *)fec->rbd_base[i].data_pointer);
> +
> +     return -ENOMEM;
>  }
> 
>  /**
> @@ -312,8 +343,8 @@ static int fec_rbd_init(struct fec_priv *fec, int
> count, int size) */
>  static void fec_tbd_init(struct fec_priv *fec)
>  {
> -     writew(0x0000, &fec->tbd_base[0].status);
> -     writew(FEC_TBD_WRAP, &fec->tbd_base[1].status);
> +     fec->tbd_base[0].status = 0;
> +     fec->tbd_base[1].status = FEC_TBD_WRAP;
>       fec->tbd_index = 0;
>  }
> 
> @@ -387,12 +418,25 @@ static int fec_open(struct eth_device *edev)
>  {
>       struct fec_priv *fec = (struct fec_priv *)edev->priv;
>       int speed;
> +     uint32_t addr, size;
> +     int i;
> 
>       debug("fec_open: fec_open(dev)\n");
>       /* full-duplex, heartbeat disabled */
>       writel(1 << 2, &fec->eth->x_cntrl);
>       fec->rbd_index = 0;
> 
> +     /* Invalidate all descriptors */
> +     for (i = 0; i < FEC_RBD_NUM - 1; i++)
> +             fec_rbd_clean(0, &fec->rbd_base[i]);
> +     fec_rbd_clean(1, &fec->rbd_base[i]);
> +
> +     /* Flush the descriptors into RAM */
> +     size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd),
> +                     CONFIG_FEC_DATA_ALIGNMENT);
> +     addr = (uint32_t)&fec->rbd_base[0];

addr = (uint32_t)fec->rbd_base; would likely work too?

> +     flush_dcache_range(addr, addr + size);
> +
>  #ifdef FEC_QUIRK_ENET_MAC
>       /* Enable ENET HW endian SWAP */
>       writel(readl(&fec->eth->ecntrl) | FEC_ECNTRL_DBSWAP,
> @@ -478,38 +522,44 @@ static int fec_open(struct eth_device *edev)
> 
>  static int fec_init(struct eth_device *dev, bd_t* bd)
>  {
> -     uint32_t base;
>       struct fec_priv *fec = (struct fec_priv *)dev->priv;
>       uint32_t mib_ptr = (uint32_t)&fec->eth->rmon_t_drop;
>       uint32_t rcntrl;
> -     int i;
> +     uint32_t size;
> +     int i, ret;
> 
>       /* Initialize MAC address */
>       fec_set_hwaddr(dev);
> 
>       /*
> -      * reserve memory for both buffer descriptor chains at once
> -      * Datasheet forces the startaddress of each chain is 16 byte
> -      * aligned
> +      * Allocate transmit descriptors, there are two in total. This
> +      * allocation respects cache alignment.
>        */
> -     if (fec->base_ptr == NULL)
> -             fec->base_ptr = malloc((2 + FEC_RBD_NUM) *
> -                             sizeof(struct fec_bd) + DB_ALIGNMENT);
> -     base = (uint32_t)fec->base_ptr;
> -     if (!base) {
> -             puts("fec_mxc: not enough malloc memory\n");
> -             return -ENOMEM;
> +     if (!fec->tbd_base) {
> +             size = roundup(2 * sizeof(struct fec_bd),
> +                             CONFIG_FEC_DATA_ALIGNMENT);
> +             fec->tbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size);
> +             if (!fec->tbd_base) {
> +                     ret = -ENOMEM;
> +                     goto err1;
> +             }
> +             memset(fec->tbd_base, 0, size);
>       }
> -     memset((void *)base, 0, (2 + FEC_RBD_NUM) *
> -                     sizeof(struct fec_bd) + DB_ALIGNMENT);
> -     base += (DB_ALIGNMENT-1);
> -     base &= ~(DB_ALIGNMENT-1);
> -
> -     fec->rbd_base = (struct fec_bd *)base;
> -
> -     base += FEC_RBD_NUM * sizeof(struct fec_bd);
> 
> -     fec->tbd_base = (struct fec_bd *)base;
> +     /*
> +      * Allocate receive descriptors. This allocation respects cache
> +      * alignment.
> +      */
> +     if (!fec->rbd_base) {
> +             size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd),
> +                             CONFIG_FEC_DATA_ALIGNMENT);
> +             fec->rbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size);
> +             if (!fec->rbd_base) {
> +                     ret = -ENOMEM;
> +                     goto err2;
> +             }
> +             memset(fec->rbd_base, 0, size);
> +     }
> 
>       /*
>        * Set interrupt mask register
> @@ -570,9 +620,8 @@ static int fec_init(struct eth_device *dev, bd_t* bd)
>        * Initialize RxBD/TxBD rings
>        */
>       if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE) < 0) {
> -             free(fec->base_ptr);
> -             fec->base_ptr = NULL;
> -             return -ENOMEM;
> +             ret = -ENOMEM;
> +             goto err3;
>       }
>       fec_tbd_init(fec);
> 
> @@ -583,6 +632,13 @@ static int fec_init(struct eth_device *dev, bd_t* bd)
>  #endif
>       fec_open(dev);
>       return 0;
> +
> +err3:
> +     free(fec->rbd_base);
> +err2:
> +     free(fec->tbd_base);
> +err1:
> +     return ret;
>  }
> 
>  /**
> @@ -631,9 +687,11 @@ static void fec_halt(struct eth_device *dev)
>   * @param[in] length Data count in bytes
>   * @return 0 on success
>   */
> -static int fec_send(struct eth_device *dev, volatile void* packet, int
> length) +static int fec_send(struct eth_device *dev, volatile void
> *packet, int length) {
>       unsigned int status;
> +     uint32_t size;
> +     uint32_t addr;
> 
>       /*
>        * This routine transmits one frame.  This routine only accepts
> @@ -650,15 +708,21 @@ static int fec_send(struct eth_device *dev, volatile
> void* packet, int length) }
> 
>       /*
> -      * Setup the transmit buffer
> -      * Note: We are always using the first buffer for transmission,
> -      * the second will be empty and only used to stop the DMA engine
> +      * Setup the transmit buffer. We are always using the first buffer for
> +      * transmission, the second will be empty and only used to stop the DMA
> +      * engine. We also flush the packet to RAM here to avoid cache trouble.
>        */
>  #ifdef       CONFIG_FEC_MXC_SWAP_PACKET
>       swap_packet((uint32_t *)packet, length);
>  #endif
> -     writew(length, &fec->tbd_base[fec->tbd_index].data_length);
> -     writel((uint32_t)packet, &fec->tbd_base[fec->tbd_index].data_pointer);
> +
> +     addr = (uint32_t)packet;
> +     size = roundup(length, CONFIG_FEC_DATA_ALIGNMENT);
> +     flush_dcache_range(addr, addr + size);

What if size of the packet isn't aligned on cacheline boundary? Won't it choke 
here?

> +
> +     fec->tbd_base[fec->tbd_index].data_length = length;
> +     fec->tbd_base[fec->tbd_index].data_pointer = addr;
> +
>       /*
>        * update BD's status now
>        * This block:
> @@ -667,9 +731,18 @@ static int fec_send(struct eth_device *dev, volatile
> void* packet, int length) * - might be the last BD in the list, so the
> address counter should *   wrap (-> keep the WRAP flag)
>        */
> -     status = readw(&fec->tbd_base[fec->tbd_index].status) & FEC_TBD_WRAP;
> +     status = fec->tbd_base[fec->tbd_index].status & FEC_TBD_WRAP;
>       status |= FEC_TBD_LAST | FEC_TBD_TC | FEC_TBD_READY;
> -     writew(status, &fec->tbd_base[fec->tbd_index].status);
> +     fec->tbd_base[fec->tbd_index].status = status;
> +
> +     /*
> +      * Flush data cache. This code flushes both TX descriptors to RAM.
> +      * After this code this code, the descritors will be safely in RAM
> +      * and we can start DMA.
> +      */
> +     size = roundup(2 * sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT);
> +     addr = (uint32_t)fec->tbd_base;
> +     flush_dcache_range(addr, addr + size);

Same concern here and everywhere else ... I believe aligning it like this is 
quite unsafe :-(
> 
>       /*
>        * Enable SmartDMA transmit task
> @@ -677,11 +750,16 @@ static int fec_send(struct eth_device *dev, volatile
> void* packet, int length) fec_tx_task_enable(fec);
> 
>       /*
> -      * wait until frame is sent .
> +      * Wait until frame is sent. On each turn of the wait cycle, we must
> +      * invalidate data cache to see what's really in RAM. Also, we need
> +      * barrier here.
>        */
> +     invalidate_dcache_range(addr, addr + size);
>       while (readw(&fec->tbd_base[fec->tbd_index].status) & FEC_TBD_READY) {
>               udelay(1);
> +             invalidate_dcache_range(addr, addr + size);
>       }
> +
>       debug("fec_send: status 0x%x index %d\n",
>                       readw(&fec->tbd_base[fec->tbd_index].status),
>                       fec->tbd_index);
> @@ -707,6 +785,8 @@ static int fec_recv(struct eth_device *dev)
>       int frame_length, len = 0;
>       struct nbuf *frame;
>       uint16_t bd_status;
> +     uint32_t addr, size;
> +     int i;
>       uchar buff[FEC_MAX_PKT_SIZE];
> 
>       /*
> @@ -737,8 +817,23 @@ static int fec_recv(struct eth_device *dev)
>       }
> 
>       /*
> -      * ensure reading the right buffer status
> +      * Read the buffer status. Before the status can be read, the data cache
> +      * must be invalidated, because the data in RAM might have been changed
> +      * by DMA. The descriptors are properly aligned to cachelines so there's
> +      * no need to worry they'd overlap.
> +      *
> +      * WARNING: By invalidating the descriptor here, we also invalidate
> +      * the descriptors surrounding this one. Therefore we can NOT change the
> +      * contents of this descriptor nor the surrounding ones. The problem is
> +      * that in order to mark the descriptor as processed, we need to change
> +      * the descriptor. The solution is to mark the whole cache line when all
> +      * descriptors in the cache line are processed.
>        */
> +     addr = (uint32_t)rbd;
> +     addr &= ~(CONFIG_FEC_DATA_ALIGNMENT - 1);
> +     size = roundup(sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT);
> +     invalidate_dcache_range(addr, addr + size);
> +
>       bd_status = readw(&rbd->status);
>       debug("fec_recv: status 0x%x\n", bd_status);
> 
> @@ -751,6 +846,13 @@ static int fec_recv(struct eth_device *dev)
>                       frame = (struct nbuf *)readl(&rbd->data_pointer);
>                       frame_length = readw(&rbd->data_length) - 4;
>                       /*
> +                      * Invalidate data cache over the buffer
> +                      */
> +                     addr = (uint32_t)frame;
> +                     size = roundup(frame_length, CONFIG_FEC_DATA_ALIGNMENT);
> +                     invalidate_dcache_range(addr, addr + size);

DTTO here, frame length might not be aligned properly, or will it be? Network 
stack must be properly analyzed here.

> +
> +                     /*
>                        *  Fill the buffer and pass it to upper layers
>                        */
>  #ifdef       CONFIG_FEC_MXC_SWAP_PACKET
> @@ -765,11 +867,27 @@ static int fec_recv(struct eth_device *dev)
>                                               (ulong)rbd->data_pointer,
>                                               bd_status);
>               }
> +
>               /*
> -              * free the current buffer, restart the engine
> -              * and move forward to the next buffer
> +              * Free the current buffer, restart the engine and move forward
> +              * to the next buffer. Here we check if the whole cacheline of
> +              * descriptors was already processed and if so, we mark it free
> +              * as whole.
>                */
> -             fec_rbd_clean(fec->rbd_index == (FEC_RBD_NUM - 1) ? 1 : 0, rbd);
> +             size = (CONFIG_FEC_DATA_ALIGNMENT / sizeof(struct fec_bd)) - 1;
> +             if ((fec->rbd_index & size) == size) {
> +                     i = fec->rbd_index - size;
> +                     addr = (uint32_t)&fec->rbd_base[i];
> +                     for (; i <= fec->rbd_index + size; i++) {
> +                             if (i == (FEC_RBD_NUM - 1))
> +                                     fec_rbd_clean(1, &fec->rbd_base[i]);
> +                             else
> +                                     fec_rbd_clean(0, &fec->rbd_base[i]);
> +                     }
> +                     flush_dcache_range(addr,
> +                             addr + CONFIG_FEC_DATA_ALIGNMENT);
> +             }
> +

This was the worst part. I don't quite remember how and why I did those 
alignment decisions (well it's obvious here, you flush rxdesc once whole 
cacheline is done), but some of the pieces are far less obvious now that I read 
the code.

>               fec_rx_task_enable(fec);
>               fec->rbd_index = (fec->rbd_index + 1) % FEC_RBD_NUM;
>       }
> diff --git a/drivers/net/fec_mxc.h b/drivers/net/fec_mxc.h
> index 2eb7803..852b2e0 100644
> --- a/drivers/net/fec_mxc.h
> +++ b/drivers/net/fec_mxc.h
> @@ -234,22 +234,6 @@ struct ethernet_regs {
>  #endif
> 
>  /**
> - * @brief Descriptor buffer alignment
> - *
> - * i.MX27 requires a 16 byte alignment (but for the first element only)
> - */
> -#define DB_ALIGNMENT         16
> -
> -/**
> - * @brief Data buffer alignment
> - *
> - * i.MX27 requires a four byte alignment for transmit and 16 bits
> - * alignment for receive so take 16
> - * Note: Valid for member data_pointer in struct buffer_descriptor
> - */
> -#define DB_DATA_ALIGNMENT    16
> -
> -/**
>   * @brief Receive & Transmit Buffer Descriptor definitions
>   *
>   * Note: The first BD must be aligned (see DB_ALIGNMENT)
> @@ -282,8 +266,7 @@ struct fec_priv {
>       struct fec_bd *tbd_base;        /* TBD ring */
>       int tbd_index;                  /* next transmit BD to write */
>       bd_t *bd;
> -     void *rdb_ptr;
> -     void *base_ptr;
> +     uint8_t *tdb_ptr;
>       int dev_id;
>       int phy_id;
>       struct mii_dev *bus;
_______________________________________________
U-Boot mailing list
U-Boot@lists.denx.de
http://lists.denx.de/mailman/listinfo/u-boot

Reply via email to