ensure that transmit and receive buffers are cache-line aligned
invalidate cache after each packet received
flush cache before transmitting
Signed-off-by: Eric Nelson<eric.nel...@boundarydevices.com>
---
drivers/net/fec_mxc.c | 248 ++++++++++++++++++++++++++++++++++++-------------
drivers/net/fec_mxc.h | 19 +----
2 files changed, 184 insertions(+), 83 deletions(-)
diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c
index 1fdd071..f72304b 100644
--- a/drivers/net/fec_mxc.c
+++ b/drivers/net/fec_mxc.c
@@ -50,6 +50,33 @@ DECLARE_GLOBAL_DATA_PTR;
#define CONFIG_FEC_MXC_SWAP_PACKET
#endif
+#ifndef CONFIG_FEC_DESC_ALIGNMENT
+#define CONFIG_FEC_DESC_ALIGNMENT ARCH_DMA_MINALIGN
+#endif
+
+#ifndef CONFIG_FEC_DATA_ALIGNMENT
+#define CONFIG_FEC_DATA_ALIGNMENT ARCH_DMA_MINALIGN
+#endif
+
+/* Check various alignment issues at compile time */
+#if ((CONFIG_FEC_DESC_ALIGNMENT< 16) || (CONFIG_FEC_DESC_ALIGNMENT % 16 != 0))
+#error "CONFIG_FEC_DESC_ALIGNMENT must be multiple of 16!"
+#endif
+
+#if ((CONFIG_FEC_DATA_ALIGNMENT< 16) || (CONFIG_FEC_DATA_ALIGNMENT % 16 != 0))
+#error "CONFIG_FEC_DATA_ALIGNMENT must be multiple of 16!"
+#endif
+
+#if ((PKTALIGN< CONFIG_FEC_DATA_ALIGNMENT) || \
+ (PKTALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0))
+#error "PKTALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!"
+#endif
+
+#if ((PKTSIZE_ALIGN< CONFIG_FEC_DATA_ALIGNMENT) || \
+ (PKTSIZE_ALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0))
+#error "PKTSIZE_ALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!"
+#endif
+
#undef DEBUG
struct nbuf {
@@ -259,43 +286,47 @@ static int fec_tx_task_disable(struct fec_priv *fec)
* Initialize receive task's buffer descriptors
* @param[in] fec all we know about the device yet
* @param[in] count receive buffer count to be allocated
- * @param[in] size size of each receive buffer
+ * @param[in] dsize desired size of each receive buffer
* @return 0 on success
*
* For this task we need additional memory for the data buffers. And each
* data buffer requires some alignment. Thy must be aligned to a specific
- * boundary each (DB_DATA_ALIGNMENT).
+ * boundary each.
*/
-static int fec_rbd_init(struct fec_priv *fec, int count, int size)
+static int fec_rbd_init(struct fec_priv *fec, int count, int dsize)
{
- int ix;
- uint32_t p = 0;
-
- /* reserve data memory and consider alignment */
- if (fec->rdb_ptr == NULL)
- fec->rdb_ptr = malloc(size * count + DB_DATA_ALIGNMENT);
- p = (uint32_t)fec->rdb_ptr;
- if (!p) {
- puts("fec_mxc: not enough malloc memory\n");
- return -ENOMEM;
- }
- memset((void *)p, 0, size * count + DB_DATA_ALIGNMENT);
- p += DB_DATA_ALIGNMENT-1;
- p&= ~(DB_DATA_ALIGNMENT-1);
-
- for (ix = 0; ix< count; ix++) {
- writel(p,&fec->rbd_base[ix].data_pointer);
- p += size;
- writew(FEC_RBD_EMPTY,&fec->rbd_base[ix].status);
- writew(0,&fec->rbd_base[ix].data_length);
- }
+ uint32_t size;
+ int i;
+
/*
- * mark the last RBD to close the ring
+ * Allocate memory for the buffers. This allocation respects the
+ * alignment
*/
- writew(FEC_RBD_WRAP | FEC_RBD_EMPTY,&fec->rbd_base[ix - 1].status);
+ size = roundup(dsize, CONFIG_FEC_DATA_ALIGNMENT);
+ for (i = 0; i< count; i++) {
+ if (0 == fec->rbd_base[i].data_pointer) {
+ uint8_t *data = memalign(CONFIG_FEC_DATA_ALIGNMENT,
size);
+ if (!data) {
+ printf("%s: error allocating rxbuf %d\n",
__func__, i);
+ goto err;
+ }
+ fec->rbd_base[i].data_pointer = (uint32_t)data;
+ } // needs allocation
+ fec->rbd_base[i].status = FEC_RBD_EMPTY;
+ fec->rbd_base[i].data_length = 0;
+ }
+
+ /* Mark the last RBD to close the ring. */
+ fec->rbd_base[i - 1].status = FEC_RBD_EMPTY | FEC_RBD_WRAP;
fec->rbd_index = 0;
return 0;
+
+err:
+ for (; i>= 0; i--)
+ free((uint8_t *)fec->rbd_base[i].data_pointer);
+
+ return -ENOMEM;
}
/**
@@ -312,8 +343,8 @@ static int fec_rbd_init(struct fec_priv *fec, int count,
int size)
*/
static void fec_tbd_init(struct fec_priv *fec)
{
- writew(0x0000,&fec->tbd_base[0].status);
- writew(FEC_TBD_WRAP,&fec->tbd_base[1].status);
+ fec->tbd_base[0].status = 0;
+ fec->tbd_base[1].status = FEC_TBD_WRAP;
fec->tbd_index = 0;
}
@@ -387,12 +418,25 @@ static int fec_open(struct eth_device *edev)
{
struct fec_priv *fec = (struct fec_priv *)edev->priv;
int speed;
+ uint32_t addr, size;
+ int i;
debug("fec_open: fec_open(dev)\n");
/* full-duplex, heartbeat disabled */
writel(1<< 2,&fec->eth->x_cntrl);
fec->rbd_index = 0;
+ /* Invalidate all descriptors */
+ for (i = 0; i< FEC_RBD_NUM - 1; i++)
+ fec_rbd_clean(0,&fec->rbd_base[i]);
+ fec_rbd_clean(1,&fec->rbd_base[i]);
+
+ /* Flush the descriptors into RAM */
+ size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd),
+ CONFIG_FEC_DATA_ALIGNMENT);
+ addr = (uint32_t)&fec->rbd_base[0];
+ flush_dcache_range(addr, addr + size);
+
#ifdef FEC_QUIRK_ENET_MAC
/* Enable ENET HW endian SWAP */
writel(readl(&fec->eth->ecntrl) | FEC_ECNTRL_DBSWAP,
@@ -478,38 +522,44 @@ static int fec_open(struct eth_device *edev)
static int fec_init(struct eth_device *dev, bd_t* bd)
{
- uint32_t base;
struct fec_priv *fec = (struct fec_priv *)dev->priv;
uint32_t mib_ptr = (uint32_t)&fec->eth->rmon_t_drop;
uint32_t rcntrl;
- int i;
+ uint32_t size;
+ int i, ret;
/* Initialize MAC address */
fec_set_hwaddr(dev);
/*
- * reserve memory for both buffer descriptor chains at once
- * Datasheet forces the startaddress of each chain is 16 byte
- * aligned
+ * Allocate transmit descriptors, there are two in total. This
+ * allocation respects cache alignment.
*/
- if (fec->base_ptr == NULL)
- fec->base_ptr = malloc((2 + FEC_RBD_NUM) *
- sizeof(struct fec_bd) + DB_ALIGNMENT);
- base = (uint32_t)fec->base_ptr;
- if (!base) {
- puts("fec_mxc: not enough malloc memory\n");
- return -ENOMEM;
+ if (!fec->tbd_base) {
+ size = roundup(2 * sizeof(struct fec_bd),
+ CONFIG_FEC_DATA_ALIGNMENT);
+ fec->tbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size);
+ if (!fec->tbd_base) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+ memset(fec->tbd_base, 0, size);
}
- memset((void *)base, 0, (2 + FEC_RBD_NUM) *
- sizeof(struct fec_bd) + DB_ALIGNMENT);
- base += (DB_ALIGNMENT-1);
- base&= ~(DB_ALIGNMENT-1);
-
- fec->rbd_base = (struct fec_bd *)base;
-
- base += FEC_RBD_NUM * sizeof(struct fec_bd);
- fec->tbd_base = (struct fec_bd *)base;
+ /*
+ * Allocate receive descriptors. This allocation respects cache
+ * alignment.
+ */
+ if (!fec->rbd_base) {
+ size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd),
+ CONFIG_FEC_DATA_ALIGNMENT);
+ fec->rbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size);
+ if (!fec->rbd_base) {
+ ret = -ENOMEM;
+ goto err2;
+ }
+ memset(fec->rbd_base, 0, size);
+ }
/*
* Set interrupt mask register
@@ -570,9 +620,8 @@ static int fec_init(struct eth_device *dev, bd_t* bd)
* Initialize RxBD/TxBD rings
*/
if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE)< 0) {
- free(fec->base_ptr);
- fec->base_ptr = NULL;
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err3;
}
fec_tbd_init(fec);
@@ -583,6 +632,13 @@ static int fec_init(struct eth_device *dev, bd_t* bd)
#endif
fec_open(dev);
return 0;
+
+err3:
+ free(fec->rbd_base);
+err2:
+ free(fec->tbd_base);
+err1:
+ return ret;
}
/**
@@ -631,9 +687,11 @@ static void fec_halt(struct eth_device *dev)
* @param[in] length Data count in bytes
* @return 0 on success
*/
-static int fec_send(struct eth_device *dev, volatile void* packet, int length)
+static int fec_send(struct eth_device *dev, volatile void *packet, int length)
{
unsigned int status;
+ uint32_t size;
+ uint32_t addr;
/*
* This routine transmits one frame. This routine only accepts
@@ -650,15 +708,21 @@ static int fec_send(struct eth_device *dev, volatile
void* packet, int length)
}
/*
- * Setup the transmit buffer
- * Note: We are always using the first buffer for transmission,
- * the second will be empty and only used to stop the DMA engine
+ * Setup the transmit buffer. We are always using the first buffer for
+ * transmission, the second will be empty and only used to stop the DMA
+ * engine. We also flush the packet to RAM here to avoid cache trouble.
*/
#ifdef CONFIG_FEC_MXC_SWAP_PACKET
swap_packet((uint32_t *)packet, length);
#endif
- writew(length,&fec->tbd_base[fec->tbd_index].data_length);
- writel((uint32_t)packet,&fec->tbd_base[fec->tbd_index].data_pointer);
+
+ addr = (uint32_t)packet;
+ size = roundup(length, CONFIG_FEC_DATA_ALIGNMENT);
+ flush_dcache_range(addr, addr + size);
+
+ fec->tbd_base[fec->tbd_index].data_length = length;
+ fec->tbd_base[fec->tbd_index].data_pointer = addr;
+
/*
* update BD's status now
* This block:
@@ -667,9 +731,18 @@ static int fec_send(struct eth_device *dev, volatile void*
packet, int length)
* - might be the last BD in the list, so the address counter should
* wrap (-> keep the WRAP flag)
*/
- status = readw(&fec->tbd_base[fec->tbd_index].status)& FEC_TBD_WRAP;
+ status = fec->tbd_base[fec->tbd_index].status& FEC_TBD_WRAP;
status |= FEC_TBD_LAST | FEC_TBD_TC | FEC_TBD_READY;
- writew(status,&fec->tbd_base[fec->tbd_index].status);
+ fec->tbd_base[fec->tbd_index].status = status;
+
+ /*
+ * Flush data cache. This code flushes both TX descriptors to RAM.
+ * After this code this code, the descritors will be safely in RAM
+ * and we can start DMA.
+ */
+ size = roundup(2 * sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT);
+ addr = (uint32_t)fec->tbd_base;
+ flush_dcache_range(addr, addr + size);
/*
* Enable SmartDMA transmit task
@@ -677,11 +750,16 @@ static int fec_send(struct eth_device *dev, volatile
void* packet, int length)
fec_tx_task_enable(fec);
/*
- * wait until frame is sent .
+ * Wait until frame is sent. On each turn of the wait cycle, we must
+ * invalidate data cache to see what's really in RAM. Also, we need
+ * barrier here.
*/
+ invalidate_dcache_range(addr, addr + size);
while (readw(&fec->tbd_base[fec->tbd_index].status)& FEC_TBD_READY) {
udelay(1);
+ invalidate_dcache_range(addr, addr + size);
}
+
debug("fec_send: status 0x%x index %d\n",
readw(&fec->tbd_base[fec->tbd_index].status),
fec->tbd_index);
@@ -707,6 +785,8 @@ static int fec_recv(struct eth_device *dev)
int frame_length, len = 0;
struct nbuf *frame;
uint16_t bd_status;
+ uint32_t addr, size;
+ int i;
uchar buff[FEC_MAX_PKT_SIZE];
/*
@@ -737,8 +817,23 @@ static int fec_recv(struct eth_device *dev)
}
/*
- * ensure reading the right buffer status
+ * Read the buffer status. Before the status can be read, the data cache
+ * must be invalidated, because the data in RAM might have been changed
+ * by DMA. The descriptors are properly aligned to cachelines so there's
+ * no need to worry they'd overlap.
+ *
+ * WARNING: By invalidating the descriptor here, we also invalidate
+ * the descriptors surrounding this one. Therefore we can NOT change the
+ * contents of this descriptor nor the surrounding ones. The problem is
+ * that in order to mark the descriptor as processed, we need to change
+ * the descriptor. The solution is to mark the whole cache line when all
+ * descriptors in the cache line are processed.
*/
+ addr = (uint32_t)rbd;
+ addr&= ~(CONFIG_FEC_DATA_ALIGNMENT - 1);
+ size = roundup(sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT);
+ invalidate_dcache_range(addr, addr + size);
+
bd_status = readw(&rbd->status);
debug("fec_recv: status 0x%x\n", bd_status);
@@ -751,6 +846,13 @@ static int fec_recv(struct eth_device *dev)
frame = (struct nbuf *)readl(&rbd->data_pointer);
frame_length = readw(&rbd->data_length) - 4;
/*
+ * Invalidate data cache over the buffer
+ */
+ addr = (uint32_t)frame;
+ size = roundup(frame_length, CONFIG_FEC_DATA_ALIGNMENT);
+ invalidate_dcache_range(addr, addr + size);
+
+ /*
* Fill the buffer and pass it to upper layers
*/
#ifdef CONFIG_FEC_MXC_SWAP_PACKET
@@ -765,11 +867,27 @@ static int fec_recv(struct eth_device *dev)
(ulong)rbd->data_pointer,
bd_status);
}
+
/*
- * free the current buffer, restart the engine
- * and move forward to the next buffer
+ * Free the current buffer, restart the engine and move forward
+ * to the next buffer. Here we check if the whole cacheline of
+ * descriptors was already processed and if so, we mark it free
+ * as whole.
*/
- fec_rbd_clean(fec->rbd_index == (FEC_RBD_NUM - 1) ? 1 : 0, rbd);
+ size = (CONFIG_FEC_DATA_ALIGNMENT / sizeof(struct fec_bd)) - 1;
+ if ((fec->rbd_index& size) == size) {
+ i = fec->rbd_index - size;
+ addr = (uint32_t)&fec->rbd_base[i];
+ for (; i<= fec->rbd_index + size; i++) {
+ if (i == (FEC_RBD_NUM - 1))
+ fec_rbd_clean(1,&fec->rbd_base[i]);
+ else
+ fec_rbd_clean(0,&fec->rbd_base[i]);
+ }
+ flush_dcache_range(addr,
+ addr + CONFIG_FEC_DATA_ALIGNMENT);
+ }
+
fec_rx_task_enable(fec);
fec->rbd_index = (fec->rbd_index + 1) % FEC_RBD_NUM;
}
diff --git a/drivers/net/fec_mxc.h b/drivers/net/fec_mxc.h
index 2eb7803..852b2e0 100644
--- a/drivers/net/fec_mxc.h
+++ b/drivers/net/fec_mxc.h
@@ -234,22 +234,6 @@ struct ethernet_regs {
#endif
/**
- * @brief Descriptor buffer alignment
- *
- * i.MX27 requires a 16 byte alignment (but for the first element only)
- */
-#define DB_ALIGNMENT 16
-
-/**
- * @brief Data buffer alignment
- *
- * i.MX27 requires a four byte alignment for transmit and 16 bits
- * alignment for receive so take 16
- * Note: Valid for member data_pointer in struct buffer_descriptor
- */
-#define DB_DATA_ALIGNMENT 16
-
-/**
* @brief Receive& Transmit Buffer Descriptor definitions
*
* Note: The first BD must be aligned (see DB_ALIGNMENT)
@@ -282,8 +266,7 @@ struct fec_priv {
struct fec_bd *tbd_base; /* TBD ring */
int tbd_index; /* next transmit BD to write */
bd_t *bd;
- void *rdb_ptr;
- void *base_ptr;
+ uint8_t *tdb_ptr;
int dev_id;
int phy_id;
struct mii_dev *bus;