[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-12 Thread Pantelis Antoniou

Joakim Tjernlund wrote:

>>>hmm, something is broken here. I can apply my patch to my local copy of
>>>8xx_io/enet.c, BK version 1.24 just fine. Whats your version of enet.c?
>>>
>>>  Jocke
>>>
>>>
>>>
>>>
>>>
>>>
>>My version is the head of the BK tree, updated today.
>>
>>
>
>What version is that?
>
> Jocke
>
>
>
>
>
>
>
2.4.21-pre4

Regards

Pantelis


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Pantelis Antoniou

Joakim Tjernlund wrote:

>>>Hi Pantelis
>>>
>>>I don't follow you here. Didn't my patch apply cleanly against 
>>>linuxppc_2_4_devel?
>>>I generated my patch against linuxppc_2_4_devel(I think).
>>>
>>>   Jocke
>>>
>>Nope.
>>
>>Fails at hunks #7 and #8.
>>Don't worry, it's trivial stuff.
>>
>>Here is enet.c.rej, if you want to check it.
>>
>
>hmm, something is broken here. I can apply my patch to my local copy of
>8xx_io/enet.c, BK version 1.24 just fine. Whats your version of enet.c?
>
>   Jocke
>
>
>
>
My version is the head of the BK tree, updated today.

Regards

Pantelis


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Joakim Tjernlund

> >hmm, something is broken here. I can apply my patch to my local copy of
> >8xx_io/enet.c, BK version 1.24 just fine. Whats your version of enet.c?
> >
> >   Jocke
> >
> >
> >
> >
> My version is the head of the BK tree, updated today.

What version is that?

 Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Pantelis Antoniou
Joakim Tjernlund wrote:

>>Hi guys,
>>
>>I have created a patch that applies cleanly to the head of
>>linuxppc_2_4_devel
>>and it works great.
>>
>>Keep up the good work!
>>
>>Pantelis
>>
>>
>
>Hi Pantelis
>
>I don't follow you here. Didn't my patch apply cleanly against 
>linuxppc_2_4_devel?
>I generated my patch against linuxppc_2_4_devel(I think).
>
>Jocke
>
>
>
>
>
Nope.

Fails at hunks #7 and #8.
Don't worry, it's trivial stuff.

Here is enet.c.rej, if you want to check it.

Regards

-- next part --
An embedded and charset-unspecified text was scrubbed...
Name: enet.c.rej
Url: 
http://ozlabs.org/pipermail/linuxppc-embedded/attachments/20030210/0bc35bce/attachment.txt
 


[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Joakim Tjernlund

> >Hi Pantelis
> >
> >I don't follow you here. Didn't my patch apply cleanly against 
> >linuxppc_2_4_devel?
> >I generated my patch against linuxppc_2_4_devel(I think).
> >
> >Jocke
> Nope.
>
> Fails at hunks #7 and #8.
> Don't worry, it's trivial stuff.
>
> Here is enet.c.rej, if you want to check it.

hmm, something is broken here. I can apply my patch to my local copy of
8xx_io/enet.c, BK version 1.24 just fine. Whats your version of enet.c?

   Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Joakim Tjernlund

> Hi there,
>
> has someone actually ported Jocke's patch to 8xx FEC !?
>
> Any benchmarks?
>
> Thanks,
>
> Steven

Actually I had a copy at work so here it goes. Not tested(not even compiled)

 Jocke

--- arch/ppc/8xx_io/fec.c   Fri Nov  1 14:44:05 2002
+++ arch/ppc/8xx_io/new_fec.c   Sun Feb  9 14:24:45 2003
@@ -83,26 +83,34 @@
 } phy_info_t;
 #endif /* CONFIG_USE_MDIO */

+/* Define COPY_SMALL_FRAMES if you want to save buffer memory for small packets
+ * at a small performance hit. Note performance testing needed */
+/* #define COPY_SMALL_FRAMES 1  */
+
+#ifdef COPY_SMALL_FRAMES
+  #define RX_COPYBREAK (256-16) /* dev_alloc_skb() adds 16 bytes for internal 
use */
+#endif
+
+
 /* The number of Tx and Rx buffers.  These are allocated from the page
  * pool.  The code may assume these are power of two, so it is best
  * to keep them that size.
  * We don't need to allocate pages for the transmitter.  We just use
  * the skbuffer directly.
  */
+
 #ifdef CONFIG_ENET_BIG_BUFFERS
-#define FEC_ENET_RX_PAGES  16
-#define FEC_ENET_RX_FRSIZE 2048
-#define FEC_ENET_RX_FRPPG  (PAGE_SIZE / FEC_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES)
-#define TX_RING_SIZE   16  /* Must be power of two */
-#define TX_RING_MOD_MASK   15  /*   for this to work */
+  #define RX_RING_SIZE 32
+  #define TX_RING_SIZE 16  /* Must be power of two for this to 
work */
 #else
-#define FEC_ENET_RX_PAGES  4
-#define FEC_ENET_RX_FRSIZE 2048
-#define FEC_ENET_RX_FRPPG  (PAGE_SIZE / FEC_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES)
-#define TX_RING_SIZE   8   /* Must be power of two */
-#define TX_RING_MOD_MASK   7   /*   for this to work */
+  #define RX_RING_SIZE 8
+  #define TX_RING_SIZE 8   /* Must be power of two for this to 
work */
+#endif
+#define TX_RING_MOD_MASK   (TX_RING_SIZE-1)
+
+#define CPM_ENET_RX_FRSIZE 1552 /* must be a multiple of cache line */
+#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
+#error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
 #endif

 /* Interrupt events/masks.
@@ -573,7 +581,7 @@

/* Check for errors. */
if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
-  BD_ENET_RX_CR | BD_ENET_RX_OV)) {
+  BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_CL)) {
fep->stats.rx_errors++;
if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH)) {
/* Frame too long or too short. */
@@ -585,41 +593,53 @@
fep->stats.rx_crc_errors++;
if (bdp->cbd_sc & BD_ENET_RX_OV)/* FIFO overrun */
fep->stats.rx_crc_errors++;
-   }
-
-   /* Report late collisions as a frame error.
-* On this error, the BD is closed, but we don't know what we
-* have in the buffer.  So, just drop this frame on the floor.
-*/
-   if (bdp->cbd_sc & BD_ENET_RX_CL) {
-   fep->stats.rx_errors++;
-   fep->stats.rx_frame_errors++;
-   goto rx_processing_done;
-   }
-
-   /* Process the incoming frame.
-*/
-   fep->stats.rx_packets++;
-   pkt_len = bdp->cbd_datlen;
-   fep->stats.rx_bytes += pkt_len;
-   data = fep->rx_vaddr[bdp - fep->rx_bd_base];
-
-   /* This does 16 byte alignment, exactly what we need.
-* The packet length includes FCS, but we don't want to
-* include that when passing upstream as it messes up
-* bridging applications.
-*/
-   skb = dev_alloc_skb(pkt_len-4);
-
-   if (skb == NULL) {
-   printk("%s: Memory squeeze, dropping packet.\n", dev->name);
-   fep->stats.rx_dropped++;
+   if (bdp->cbd_sc & BD_ENET_RX_CL)/* Late collision */
+   fep->stats.rx_frame_errors++;   /* Report as a frame 
error. */
} else {
-   skb->dev = dev;
-   skb_put(skb,pkt_len-4); /* Make room */
-   eth_copy_and_sum(skb, data, pkt_len-4, 0);
-   skb->protocol=eth_type_trans(skb,dev);
-   netif_rx(skb);
+
+   /* Process the incoming frame.
+*/
+   fep->stats.rx_packets++;
+   pkt_len = bdp->cbd_datlen;
+   fep->stats.rx_bytes += pkt_len;
+   pkt_len -= 4; /* The packet length includes FCS, but we don't 
want to
+  * include that when passing upstream as it 
messes up
+  * bridging applications. Is this still true  
*/
+#ifdef COPY_SMALL_FRAMES
+   /* Allocate the next buffer now so we are sure to have one when 
needed
+* This does 16 byte alignment, exactly what we need(L1_CACHE 
aligned). */
+   if(pkt_len < RX_COPYBREAK)
+ 

[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Joakim Tjernlund

> Hi guys,
>
> I have created a patch that applies cleanly to the head of
> linuxppc_2_4_devel
> and it works great.
>
> Keep up the good work!
>
> Pantelis

Hi Pantelis

I don't follow you here. Didn't my patch apply cleanly against 
linuxppc_2_4_devel?
I generated my patch against linuxppc_2_4_devel(I think).

Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Joakim Tjernlund

> has someone actually ported Jocke's patch to 8xx FEC !?

I just did a port, but the patch is on my home computer. I can send
it later ...

It's not tested at all since I don't have the FEC connected on my boards.


 Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Pantelis Antoniou
Joakim Tjernlund wrote:

>>Hi,
>>
>>I am on an 862. Anyway I can't find another definition of dma_cache_inv() but 
>>the NO OP in asm-ppc/io.h. Could you give me e hint
>>where it is defined in your kernel?
>>
>
>It's in asm-ppc/io.h (2.4.20) and there are 2 definitions of dma_cache_inv() 
>which depends on
>CONFIG_NOT_COHERENT_CACHE(should be defined for 8xx). What kernel version are 
>you running?
>
> Jocke
>
>>Thanks, Stephan
>>
>>
>>>-Original Message-
>>>From: Joakim Tjernlund [mailto:joakim.tjernlund at lumentis.se]
>>>Sent: Montag, 3. Februar 2003 18:23
>>>To: Stephan Linke
>>>Subject: RE: [PATCH] arch/ppc/8xx_io/enet.c, version 3
>>>
>>>
>>>
>>>>Hi Jocke,
>>>>
>>>>in your latest patch you are using dma_cache_inv() instead of 
>>>>invalidate_dcache_range().
>>>>The only dma_cache_inv() I can find is in include/asm-ppc/io.h. and it's a 
>>>>"do{}while(0)".
>>>>Are you shure that this was your intention? It seames to me like you could 
>>>>remove that call as well.
>>>>
>>>I guess you are on 8260? On 8260 there is no need for 
>>>invalidate_dcache_range() since
>>>it's the CPM is cache coherent. On 8xx it is not cache coherent. I switched
>>>to dma_cache_inv() because it's a no op on 8260 and a 
>>>invalidate_dcache_range() on 8xx so
>>>it would be easy to adapt the patch to both CPU's.
>>>
>>> Jocke
>>>
>>>
>>>
>>
>
>
>
>
>
>
Hi guys,

I have created a patch that applies cleanly to the head of
linuxppc_2_4_devel
and it works great.

Keep up the good work!

Pantelis


-- next part --
An embedded and charset-unspecified text was scrubbed...
Name: joakim-enet-8xx-panto.patch
Url: 
http://ozlabs.org/pipermail/linuxppc-embedded/attachments/20030210/547be440/attachment.txt
 


[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Steven Scholz

Hi there,

has someone actually ported Jocke's patch to 8xx FEC !?

Any benchmarks?

Thanks,

Steven


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-04 Thread Stephan Linke

Hi Jocke,

I am running a 2.4.18 kernel. Maybe that's the problem?
But let me guess: keeping the invalidate_dcache_range() would be fine as well 
for our 8xx board?

Stephan

> -Original Message-
> From: Joakim Tjernlund [mailto:joakim.tjernlund at lumentis.se]
> Sent: Dienstag, 4. Februar 2003 10:59
> To: Stephan Linke
> Cc: Linuxppc-Embedded
> Subject: RE: [PATCH] arch/ppc/8xx_io/enet.c, version 3
>
>
> >
> > Hi,
> >
> > I am on an 862. Anyway I can't find another definition of dma_cache_inv() 
> > but the NO OP in asm-ppc/io.h. Could you give
> me e hint
> > where it is defined in your kernel?
>
> It's in asm-ppc/io.h (2.4.20) and there are 2 definitions of dma_cache_inv() 
> which depends on
> CONFIG_NOT_COHERENT_CACHE(should be defined for 8xx). What kernel version are 
> you running?
>
>  Jocke
> >
> > Thanks, Stephan
> >
> > > -Original Message-
> > > From: Joakim Tjernlund [mailto:joakim.tjernlund at lumentis.se]
> > > Sent: Montag, 3. Februar 2003 18:23
> > > To: Stephan Linke
> > > Subject: RE: [PATCH] arch/ppc/8xx_io/enet.c, version 3
> > >
> > >
> > > > Hi Jocke,
> > > >
> > > > in your latest patch you are using dma_cache_inv() instead of 
> > > > invalidate_dcache_range().
> > > > The only dma_cache_inv() I can find is in include/asm-ppc/io.h. and 
> > > > it's a "do{}while (0)".
> > > > Are you shure that this was your intention? It seames to me like you 
> > > > could remove that call as well.
> > >
> > > I guess you are on 8260? On 8260 there is no need for 
> > > invalidate_dcache_range() since
> > > it's the CPM is cache coherent. On 8xx it is not cache coherent. I 
> > > switched
> > > to dma_cache_inv() because it's a no op on 8260 and a 
> > > invalidate_dcache_range() on 8xx so
> > > it would be easy to adapt the patch to both CPU's.
> > >
> > >  Jocke
> > >
> > >
> >
> >
>
>

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-04 Thread Joakim Tjernlund

>
> Hi,
>
> I am on an 862. Anyway I can't find another definition of dma_cache_inv() but 
> the NO OP in asm-ppc/io.h. Could you give me e hint
> where it is defined in your kernel?

It's in asm-ppc/io.h (2.4.20) and there are 2 definitions of dma_cache_inv() 
which depends on
CONFIG_NOT_COHERENT_CACHE(should be defined for 8xx). What kernel version are 
you running?

 Jocke
>
> Thanks, Stephan
>
> > -Original Message-
> > From: Joakim Tjernlund [mailto:joakim.tjernlund at lumentis.se]
> > Sent: Montag, 3. Februar 2003 18:23
> > To: Stephan Linke
> > Subject: RE: [PATCH] arch/ppc/8xx_io/enet.c, version 3
> >
> >
> > > Hi Jocke,
> > >
> > > in your latest patch you are using dma_cache_inv() instead of 
> > > invalidate_dcache_range().
> > > The only dma_cache_inv() I can find is in include/asm-ppc/io.h. and it's 
> > > a "do{}while(0)".
> > > Are you shure that this was your intention? It seames to me like you 
> > > could remove that call as well.
> >
> > I guess you are on 8260? On 8260 there is no need for 
> > invalidate_dcache_range() since
> > it's the CPM is cache coherent. On 8xx it is not cache coherent. I switched
> > to dma_cache_inv() because it's a no op on 8260 and a 
> > invalidate_dcache_range() on 8xx so
> > it would be easy to adapt the patch to both CPU's.
> >
> >  Jocke
> >
> >
>
>


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-04 Thread Stephan Linke

Hi,

I am on an 862. Anyway I can't find another definition of dma_cache_inv() but 
the NO OP in asm-ppc/io.h. Could you give me e hint
where it is defined in your kernel?

Thanks, Stephan

> -Original Message-
> From: Joakim Tjernlund [mailto:joakim.tjernlund at lumentis.se]
> Sent: Montag, 3. Februar 2003 18:23
> To: Stephan Linke
> Subject: RE: [PATCH] arch/ppc/8xx_io/enet.c, version 3
>
>
> > Hi Jocke,
> >
> > in your latest patch you are using dma_cache_inv() instead of 
> > invalidate_dcache_range().
> > The only dma_cache_inv() I can find is in include/asm-ppc/io.h. and it's a 
> > "do{}while(0)".
> > Are you shure that this was your intention? It seames to me like you could 
> > remove that call as well.
>
> I guess you are on 8260? On 8260 there is no need for 
> invalidate_dcache_range() since
> it's the CPM is cache coherent. On 8xx it is not cache coherent. I switched
> to dma_cache_inv() because it's a no op on 8260 and a 
> invalidate_dcache_range() on 8xx so
> it would be easy to adapt the patch to both CPU's.
>
>  Jocke
>
>


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-03 Thread Pantelis Antoniou

Joakim Tjernlund wrote:

>>>This is the second version of my patch that removes the expensive memcpy of
>>>received
>>>ethernet frames in interrupt context.
>>>
>>>I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
>>>size 1500 when
>>>applied to 8260 FEC(needs to be applied manually). But min packet size
>>>decreased with 10 %.
>>>This version should fix the 10% decrease case.
>>>
>>>This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
>>>8260/fcc_enet.c with little effort.
>>>
>>>Better fix a bug in set_multicast_list(), move the dmi list forward when
>>>walking it(dmi = dmi->next;)
>>>
>>>New stuff:
>>>   - Configrable: copy small packets or pass them directly, see
>>>COPY_SMALL_FRAMES in code.
>>>   - Collision reporting fix form Thomas Lange.
>>>   - Don't pass receive frames which has error upwards.
>>>   - Report RX_OV errors as fifo errors, not crc errors.
>>>
>>>Please test and report any problems and performace improvements.
>>>
>>>
>>Hi
>>
>>This is the third version of my optimized enet.c patch.
>>Changes since version 2:
>>  1) invalidate the whole buffer BEFORE it is given to he CPM. Previously
>> it was invalidated after the packet was received and that could lead to 
>> buffer
>> corruption in some cases.
>>
>>  2) use dma_cache_inv() instead of invalidate_dcache_range() since that will 
>> work
>> for both 8xx and 82xx.
>>
>>  3) decrease the allocated buffer length.
>>
>>  4) disabled COPY_SMALL_FRAME. Define it somewhere if you want to save some 
>> memory.
>>
>>  5) probably some white space changes got in too.
>>
>>Any chance to see it the devel tree?
>>
>>More than 3 months has passed since version 2 and the only problem reported 
>>was
>>1) and the fix has been know since mid November.
>>
>>Dan, you said you would integrate this patch(or some version of it) in 
>>November. I
>>think I have waited long enough now. Please do ASAP.
>>
>> Jocke
>>
>>
>[SNIP]
>
>Zero feedback so far. Is there zero interest to improve the mpc code in 
>linuxppc?
>Anyhow, I give up. Don't have the energy to resend over and over again.
>
>   Jocke
>
>
>
>
>
>
Please don't give up.

I'll try it and you'll have my feedback within the week.

Anyhow, what is the deal with 8xx/82xx?
At least the maintainers should ack the patch, and if it is not suitable
for inclusion they should state for what reason.

We don't want to discourage people sending patches.
Right?

Pantelis


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-03 Thread Joakim Tjernlund

> > This is the second version of my patch that removes the expensive memcpy of
> > received
> > ethernet frames in interrupt context.
> >
> > I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
> > size 1500 when
> > applied to 8260 FEC(needs to be applied manually). But min packet size
> > decreased with 10 %.
> > This version should fix the 10% decrease case.
> >
> > This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> > 8260/fcc_enet.c with little effort.
> >
> > Better fix a bug in set_multicast_list(), move the dmi list forward when
> > walking it(dmi = dmi->next;)
> >
> > New stuff:
> >- Configrable: copy small packets or pass them directly, see
> > COPY_SMALL_FRAMES in code.
> >- Collision reporting fix form Thomas Lange.
> >- Don't pass receive frames which has error upwards.
> >- Report RX_OV errors as fifo errors, not crc errors.
> >
> > Please test and report any problems and performace improvements.
>
> Hi
>
> This is the third version of my optimized enet.c patch.
> Changes since version 2:
>   1) invalidate the whole buffer BEFORE it is given to he CPM. Previously
>  it was invalidated after the packet was received and that could lead to 
> buffer
>  corruption in some cases.
>
>   2) use dma_cache_inv() instead of invalidate_dcache_range() since that will 
> work
>  for both 8xx and 82xx.
>
>   3) decrease the allocated buffer length.
>
>   4) disabled COPY_SMALL_FRAME. Define it somewhere if you want to save some 
> memory.
>
>   5) probably some white space changes got in too.
>
> Any chance to see it the devel tree?
>
> More than 3 months has passed since version 2 and the only problem reported 
> was
> 1) and the fix has been know since mid November.
>
> Dan, you said you would integrate this patch(or some version of it) in 
> November. I
> think I have waited long enough now. Please do ASAP.
>
>  Jocke
[SNIP]

Zero feedback so far. Is there zero interest to improve the mpc code in 
linuxppc?
Anyhow, I give up. Don't have the energy to resend over and over again.

   Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-01-28 Thread Joakim Tjernlund

> This is the second version of my patch that removes the expensive memcpy of
> received
> ethernet frames in interrupt context.
>
> I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
> size 1500 when
> applied to 8260 FEC(needs to be applied manually). But min packet size
> decreased with 10 %.
> This version should fix the 10% decrease case.
>
> This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> 8260/fcc_enet.c with little effort.
>
> Better fix a bug in set_multicast_list(), move the dmi list forward when
> walking it(dmi = dmi->next;)
>
> New stuff:
>- Configrable: copy small packets or pass them directly, see
> COPY_SMALL_FRAMES in code.
>- Collision reporting fix form Thomas Lange.
>- Don't pass receive frames which has error upwards.
>- Report RX_OV errors as fifo errors, not crc errors.
>
> Please test and report any problems and performace improvements.

Hi

This is the third version of my optimized enet.c patch.
Changes since version 2:
  1) invalidate the whole buffer BEFORE it is given to he CPM. Previously
 it was invalidated after the packet was received and that could lead to 
buffer
 corruption in some cases.

  2) use dma_cache_inv() instead of invalidate_dcache_range() since that will 
work
 for both 8xx and 82xx.

  3) decrease the allocated buffer length.

  4) disabled COPY_SMALL_FRAME. Define it somewhere if you want to save some 
memory.

  5) probably some white space changes got in too.

Any chance to see it the devel tree?

More than 3 months has passed since version 2 and the only problem reported was
1) and the fix has been know since mid November.

Dan, you said you would integrate this patch(or some version of it) in 
November. I
think I have waited long enough now. Please do ASAP.

 Jocke

--- arch/ppc/8xx_io/enet.c  Fri Nov  1 14:44:05 2002
+++ arch/ppc/8xx_io/new_enet.c  Sat Jan 25 19:57:50 2003
@@ -34,7 +34,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -86,6 +85,14 @@
  * All functions are directly controlled using I/O pins.  See .
  */

+/* Define COPY_SMALL_FRAMES if you want to save buffer memory for small packets
+ * at a small performance hit. Note performance testing needed */
+/* #define COPY_SMALL_FRAMES 1  */
+
+#ifdef COPY_SMALL_FRAMES
+  #define RX_COPYBREAK (256-16) /* dev_alloc_skb() adds 16 bytes for internal 
use */
+#endif
+
 /* The transmitter timeout
  */
 #define TX_TIMEOUT (2*HZ)
@@ -97,19 +104,17 @@
  * the skbuffer directly.
  */
 #ifdef CONFIG_ENET_BIG_BUFFERS
-#define CPM_ENET_RX_PAGES  32
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG  (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE   64  /* Must be power of two */
-#define TX_RING_MOD_MASK   63  /*   for this to work */
+  #define RX_RING_SIZE 64
+  #define TX_RING_SIZE 64  /* Must be power of two for this to 
work */
 #else
-#define CPM_ENET_RX_PAGES  4
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG  (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE   8   /* Must be power of two */
-#define TX_RING_MOD_MASK   7   /*   for this to work */
+  #define RX_RING_SIZE 8
+  #define TX_RING_SIZE 8   /* Must be power of two for this to 
work */
+#endif
+#define TX_RING_MOD_MASK   (TX_RING_SIZE-1)
+
+#define CPM_ENET_RX_FRSIZE 1552 /* must be a multiple of cache line */
+#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
+#error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
 #endif

 /* The CPM stores dest/src/type, data, and checksum for receive packets.
@@ -143,7 +148,7 @@
/* Virtual addresses for the receive buffers because we can't
 * do a __va() on them anymore.
 */
-   unsigned char *rx_vaddr[RX_RING_SIZE];
+   void*rx_vaddr[RX_RING_SIZE];
struct  net_device_stats stats;
uinttx_full;
spinlock_t lock;
@@ -370,11 +375,11 @@

cep->stats.tx_packets++;

-   /* Deferred means some collisions occurred during transmit,
-* but we eventually sent the packet OK.
-*/
-   if (bdp->cbd_sc & BD_ENET_TX_DEF)
-   cep->stats.collisions++;
+   /* Check retry counter, i.e. collision counter */
+   if (bdp->cbd_sc & BD_ENET_TX_RCMASK){
+   /* Note that counter cannot go higher than 15 */
+   cep->stats.collisions+=(bdp->cbd_sc & 
BD_ENET_TX_RCMASK)>>2;
+   }

/* Free the sk buffer associated with this last transmit.
*/
@@ -449,6 +454,7 @@
struct  scc_enet_private *cep;
volatile cbd_t  *bdp;
struct  sk_buff *skb;
+   s

[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-15 Thread Hans Feldt

On 11/13/02 11:12 PM, Dan Malek wrote:

> This isn't something new that hasn't been tried before.  The problem
> in the past with non-coherent processors, incoming DMA, and skbufs is
> the buffers would share cache lines with other data which would get
> corrupted as the result of the invalidate for the DMA.  Typically,
> data that was corrupted were flags and control information for the IP

The problem I was describing related to this patch was due to the L1
cache replacement algorithm. The L1 cache flushes a line to main memory
when it's full with some LRU algo. Thus if you have no snooping
(860/405), this can interfere with the DMA into the same piece of memory.

Invalidating the whole buffer before giving it to DMA solves this and
the cost of doing it on the whole buffer is not much. You have already
done the big optimisation in removing the memcpy and changing flush to
invalidate.

Cheers,
Hans


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-13 Thread Joakim Tjernlund

> Joakim Tjernlund wrote:
>
> > OK, anyone against? Dan?
>
> I'm currently looking at the patches and I'll be integrating something
> that hopefully works :-)
Please tell me if there is something in that patch you don't like(besides the
moving the invalidate call).

>
> This isn't something new that hasn't been tried before.  The problem
> in the past with non-coherent processors, incoming DMA, and skbufs is
> the buffers would share cache lines with other data which would get
> corrupted as the result of the invalidate for the DMA.  Typically,
> data that was corrupted were flags and control information for the IP
> stack, and under "normal" use you wouldn't notice this.  However,
> forwarding/bridging applications would fail to work properly and you
> would sometimes see packet retransmits that weren't necessary.
>
> The "trick" is to ensure you allocate a larger than necessary sk buffer
> and then align the start and end such that they consume entire cache
> lines.  There has been sufficient discussion about this that I hope
> the sk buffer mechanism will allow this alignment now, as it didn't
> work well in the past.  This is what I want to check out when I
> apply and test the patches.

Tell me about it, I got severely bitten by a non cache aligned invalidate call
in the i2c-algo-8xx.c driver :-(

I too checked carefully that the buffer returned from 
__dev_alloc_skb()/dev_alloc_skb()
cache aligned, turns out that it kmalloc's a buffer and reserves 16 bytes in the
beginning so it's safe.

>
> This isn't necessary on the 8260 family due to cache snooping, but it
> is required on the 8xx.
>
> Of course, a packet checksum still needs to be performed, and if it
> is done as part of the data copy (and if the IP stack doesn't do it
> again), it would seem that this implementation rather than DMA would
> be more efficient.

Are referring to eth_copy_and_sum()? That function has never done
a csum, just a plain memcpy(). The IP stack has always done
it's own csum(just as well since it would be doing this in IRQ context),
unless you set ip_summed(I think).

Perhaps a backwards memcpy() would be more efficient? That way
the  IP header get copied last and will be in cache longer. I believe memmove()
will do that.

Some drivers also try cache align the IP header. I tried that to but
eth_type_trans() could not handle this.

Finally, why does passing the Ethernet CRC upwards mess-up bridging 
applications?

Jocke
>
> Thanks.
>
>
> -- Dan
>

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-13 Thread Joakim Tjernlund

> - Original Message -
> From: Joakim Tjernlund 
>
> > You may be right, perhaps one must invalidate the whole buffer before 
> > giving it
> > to the CPM/DMA. Suppose you reuse a buffer which has been modified before it
> > was freed and the dcache must write back data to free up space and the 
> > buffer,
> > which now is owned by the CPM, get written to.
>
> I beleive this could happen. Since IP does not perform checksumming
> but relies on the link (don't know this really) in that matter, I guess
> the application could get wrong data...

OK, anyone against? Dan?

>
> > I have not seen any corrupted packets and you are the first to report
> > any problems.
>
> Did you run any data integrity tests?
No, normal use, lots of pings etc.

>
> > What modifications have you done?
>
> I haven't used your driver patch. I used the __idea__ of delaying the
> invalidate to the point where you know how much has been received. This was
> in an RTOS ATM driver for a 405. An integrity test showed that some few
> packets was wrong. Data was changed by means of cache lines.

Did you notice any difference in performance when moving the invalidate call?

Any other problems with the patch?

Dan, would you like a new patch or will you fix this?

 Jocke

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-13 Thread Hans Feldt

- Original Message -
From: Joakim Tjernlund <[EMAIL PROTECTED]>

> You may be right, perhaps one must invalidate the whole buffer before giving 
> it
> to the CPM/DMA. Suppose you reuse a buffer which has been modified before it
> was freed and the dcache must write back data to free up space and the buffer,
> which now is owned by the CPM, get written to.

I beleive this could happen. Since IP does not perform checksumming
but relies on the link (don't know this really) in that matter, I guess
the application could get wrong data...

> I have not seen any corrupted packets and you are the first to report
> any problems.

Did you run any data integrity tests?

> What modifications have you done?

I haven't used your driver patch. I used the __idea__ of delaying the
invalidate to the point where you know how much has been received. This was
in an RTOS ATM driver for a 405. An integrity test showed that some few
packets was wrong. Data was changed by means of cache lines.

> What CPU?
> Post the driver please.

Irrelevant, non-linux

Cheers,
Hans


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-13 Thread Joakim Tjernlund

> On 10/24/02 04:23 PM, Joakim Tjernlund wrote:
> > Hi
> >
> > This is the second version of my patch that removes the expensive memcpy of
> > received
> > ethernet frames in interrupt context.
>
> Isn't it so that this patch works because you have snooping? Without
> snooping the driver would fail because of cache line replacement which
> could trash received data.

I don't have snooping(mpc860).

>
> The buffer invalidate in this case is unneeded. But if you wanna
> optimise you should invalidate the whole buffer before giving it to DMA
> thus getting rid of the flush implied by memory coherence.

In my first version I did invalidate the whole buffer BEFORE I gave it
to the CPM/DMA. I moved it because I thougth it was better to invalidate
after I received it, since the packet len is known at that time
so I don't have to invalidate the whole buffer, just the received number of 
bytes.

>
> Tried this out in a driver for a processor without snooping, now and
> then received packets got corrupted, cache line wise...

You may be right, perhaps one must invalidate the whole buffer before giving it
to the CPM/DMA. Suppose you reuse a buffer which has been modified before it
was freed and the dcache must write back data to free up space and the buffer,
which now is owned by the CPM, get written to.

I have not seen any corrupted packets and you are the first to report
any problems. What modifications have you done?

What CPU?
Post the driver please.

 Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-13 Thread Dan Malek

Joakim Tjernlund wrote:

> OK, anyone against? Dan?

I'm currently looking at the patches and I'll be integrating something
that hopefully works :-)

This isn't something new that hasn't been tried before.  The problem
in the past with non-coherent processors, incoming DMA, and skbufs is
the buffers would share cache lines with other data which would get
corrupted as the result of the invalidate for the DMA.  Typically,
data that was corrupted were flags and control information for the IP
stack, and under "normal" use you wouldn't notice this.  However,
forwarding/bridging applications would fail to work properly and you
would sometimes see packet retransmits that weren't necessary.

The "trick" is to ensure you allocate a larger than necessary sk buffer
and then align the start and end such that they consume entire cache
lines.  There has been sufficient discussion about this that I hope
the sk buffer mechanism will allow this alignment now, as it didn't
work well in the past.  This is what I want to check out when I
apply and test the patches.

This isn't necessary on the 8260 family due to cache snooping, but it
is required on the 8xx.

Of course, a packet checksum still needs to be performed, and if it
is done as part of the data copy (and if the IP stack doesn't do it
again), it would seem that this implementation rather than DMA would
be more efficient.

Thanks.


-- Dan


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-13 Thread Hans Feldt

On 10/24/02 04:23 PM, Joakim Tjernlund wrote:
> Hi
>
> This is the second version of my patch that removes the expensive memcpy of
> received
> ethernet frames in interrupt context.

Isn't it so that this patch works because you have snooping? Without
snooping the driver would fail because of cache line replacement which
could trash received data.

The buffer invalidate in this case is unneeded. But if you wanna
optimise you should invalidate the whole buffer before giving it to DMA
thus getting rid of the flush implied by memory coherence.

Tried this out in a driver for a processor without snooping, now and
then received packets got corrupted, cache line wise...

Just trying to understand how things really work, please correct me if I
am wrong.

Cheers,
Hans


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-01 Thread Joakim Tjernlund

Hi Tom

Will you apply this patch?  No problems reported so far.

 Jocke

> >
> > On Thu, Oct 24, 2002 at 04:23:31PM +0200, Joakim Tjernlund wrote:
> >
> > > This is the second version of my patch that removes the expensive memcpy 
> > > of
> > > received
> > > ethernet frames in interrupt context.
> > >
> > > I have 1 report(from Ricardo Scop) of 20% increase in packets/second, 
> > > packet
> > > size 1500 when
> > > applied to 8260 FEC(needs to be applied manually). But min packet size
> > > decreased with 10 %.
> > > This version should fix the 10% decrease case.
> > >
> > > This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> > > 8260/fcc_enet.c with little effort.
> > >
> > > Better fix a bug in set_multicast_list(), move the dmi list forward when
> > > walking it(dmi = dmi->next;)
> > >
> > > New stuff:
> > >- Configrable: copy small packets or pass them directly, see
> > > COPY_SMALL_FRAMES in code.
> > >- Collision reporting fix form Thomas Lange.
> > >- Don't pass receive frames which has error upwards.
> > >- Report RX_OV errors as fifo errors, not crc errors.
> > >
> > > Please test and report any problems and performace improvements.
> >
> > All of this sounds good, but can you please break this one patch up into
> > seperate logical patches?
>
> I rather not.
>
> The logical pices in this patch are:
>  1) remove the expensive memcpy.
>- This is the main part of the patch.
>
>  2) dmi->next fix, which is a one liner.
> Don't pass receive frames which has error upwards.
>- Trival stuff.
>
>  3) Collision reporting fix form Thomas Lange.
> Report RX_OV errors as fifo errors, not crc errors.
>- These are just minor accounting fixes.
>
> So I don't think it's worth the effort. Let's just wait a few days and see
> if it was success or not.
>
> Folks, please report you findings to the list.
>
> Jocke
>


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-10-28 Thread Joakim Tjernlund

Hi Stephan

> Hi Jocke,
>
> I just had a look at the code. So far it is running fine. on my board. :)
> Following question:
> In scc-enet_rx you use invalidate_dcache_range() for the skb data area.
> Why don't you do the same in scc_enet_init() for the first set of skb's?
> Isn't it required too at this place.

Thanks for trying this!

I did this in my first version, but in my second version I moved the
invalidate_dcache_range() to AFTER the ethernet frame had been received.

invalidate_dcache_range() can either be called when you add the buffer to the
HW(BD) or after you have received a packet but before you try to use it. Doing 
it
after has one advantage, you know the packet len and therefore you only need
to invalidate "packet len" bytes.

In my tree I also removed the "sync" instruction in  invalidate_dcache_range()
because I don't think it's needed. My contact at Motorola thinks so too.
I have been running without the sync instruction for 2-3 weeks and
has yet to see any problems. Would be intresting to hear if others
can try this out and if it gives any performance improvement.

Jocke
>
> Thanks, Stephan
>
> > -Original Message-
> > From: owner-linuxppc-embedded at lists.linuxppc.org
> > [mailto:owner-linuxppc-embedded at lists.linuxppc.org]On Behalf Of Joakim
> > Tjernlund
> > Sent: Donnerstag, 24. Oktober 2002 16:24
> > To: linuxppc-embedded at lists.linuxppc.org
> > Cc: scop at digitel.com.br; thomas at corelatus.com
> > Subject: [PATCH] arch/ppc/8xx_io/enet.c, version 2
> >
> >
> >
> > Hi
> >
> > This is the second version of my patch that removes the expensive
> > memcpy of
> > received
> > ethernet frames in interrupt context.
> >
> > I have 1 report(from Ricardo Scop) of 20% increase in
> > packets/second, packet
> > size 1500 when
> > applied to 8260 FEC(needs to be applied manually). But min packet size
> > decreased with 10 %.
> > This version should fix the 10% decrease case.
> >
> > This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> > 8260/fcc_enet.c with little effort.
> >
> > Better fix a bug in set_multicast_list(), move the dmi list forward when
> > walking it(dmi = dmi->next;)
> >
> > New stuff:
> >- Configrable: copy small packets or pass them directly, see
> > COPY_SMALL_FRAMES in code.
> >- Collision reporting fix form Thomas Lange.
> >- Don't pass receive frames which has error upwards.
> >- Report RX_OV errors as fifo errors, not crc errors.
> >
> > Please test and report any problems and performace improvements.
> >
> > Jocke
> >


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-10-28 Thread Stephan Linke

Hi Jocke,

I just had a look at the code. So far it is running fine. on my board. :)
Following question:
In scc-enet_rx you use invalidate_dcache_range() for the skb data area.
Why don't you do the same in scc_enet_init() for the first set of skb's?
Isn't it required too at this place.

Thanks, Stephan

> -Original Message-
> From: owner-linuxppc-embedded at lists.linuxppc.org
> [mailto:owner-linuxppc-embedded at lists.linuxppc.org]On Behalf Of Joakim
> Tjernlund
> Sent: Donnerstag, 24. Oktober 2002 16:24
> To: linuxppc-embedded at lists.linuxppc.org
> Cc: scop at digitel.com.br; thomas at corelatus.com
> Subject: [PATCH] arch/ppc/8xx_io/enet.c, version 2
>
>
>
> Hi
>
> This is the second version of my patch that removes the expensive
> memcpy of
> received
> ethernet frames in interrupt context.
>
> I have 1 report(from Ricardo Scop) of 20% increase in
> packets/second, packet
> size 1500 when
> applied to 8260 FEC(needs to be applied manually). But min packet size
> decreased with 10 %.
> This version should fix the 10% decrease case.
>
> This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> 8260/fcc_enet.c with little effort.
>
> Better fix a bug in set_multicast_list(), move the dmi list forward when
> walking it(dmi = dmi->next;)
>
> New stuff:
>- Configrable: copy small packets or pass them directly, see
> COPY_SMALL_FRAMES in code.
>- Collision reporting fix form Thomas Lange.
>- Don't pass receive frames which has error upwards.
>- Report RX_OV errors as fifo errors, not crc errors.
>
> Please test and report any problems and performace improvements.
>
> Jocke
>
> --- arch/ppc/8xx_io/enet.c.orgMon Oct 21 14:35:59 2002
> +++ arch/ppc/8xx_io/enet.cThu Oct 24 15:48:25 2002
> @@ -34,7 +34,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
>  #include 
>  #include 
>  #include 
> @@ -86,6 +85,14 @@
>   * All functions are directly controlled using I/O pins.  See
> .
>   */
>
> +/* Define COPY_SMALL_FRAMES if you want to save buffer memory for small
> packets
> + * at a small performance hit. Note performance testing needed */
> +#define COPY_SMALL_FRAMES 1
> +
> +#ifdef COPY_SMALL_FRAMES
> +  #define RX_COPYBREAK (256-16) /* dev_alloc_skb() adds 16 bytes for
> internal use */
> +#endif
> +
>  /* The transmitter timeout
>   */
>  #define TX_TIMEOUT   (2*HZ)
> @@ -97,19 +104,17 @@
>   * the skbuffer directly.
>   */
>  #ifdef CONFIG_ENET_BIG_BUFFERS
> -#define CPM_ENET_RX_PAGES32
> -#define CPM_ENET_RX_FRSIZE   2048
> -#define CPM_ENET_RX_FRPPG(PAGE_SIZE / CPM_ENET_RX_FRSIZE)
> -#define RX_RING_SIZE (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
> -#define TX_RING_SIZE 64  /* Must be power of two */
> -#define TX_RING_MOD_MASK 63  /*   for this to work */
> +  #define RX_RING_SIZE   64
> +  #define TX_RING_SIZE   64  /* Must be power of
> two for this to work */
>  #else
> -#define CPM_ENET_RX_PAGES4
> -#define CPM_ENET_RX_FRSIZE   2048
> -#define CPM_ENET_RX_FRPPG(PAGE_SIZE / CPM_ENET_RX_FRSIZE)
> -#define RX_RING_SIZE (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
> -#define TX_RING_SIZE 8   /* Must be power of two */
> -#define TX_RING_MOD_MASK 7   /*   for this to work */
> +  #define RX_RING_SIZE   8
> +  #define TX_RING_SIZE   8   /* Must be power of
> two for this to work */
> +#endif
> +#define TX_RING_MOD_MASK (TX_RING_SIZE-1)
> +
> +#define CPM_ENET_RX_FRSIZE   1600 /* must be a multiple of cache line */
> +#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
> +#error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
>  #endif
>
>  /* The CPM stores dest/src/type, data, and checksum for receive packets.
> @@ -143,7 +148,7 @@
>   /* Virtual addresses for the receive buffers because we can't
>* do a __va() on them anymore.
>*/
> - unsigned char *rx_vaddr[RX_RING_SIZE];
> + void*rx_vaddr[RX_RING_SIZE];
>   struct  net_device_stats stats;
>   uinttx_full;
>   spinlock_t lock;
> @@ -370,11 +375,11 @@
>
>   cep->stats.tx_packets++;
>
> - /* Deferred means some collisions occurred during transmit,
> -  * but we eventually sent the packet OK.
> -  */
> - if (bdp->cbd_sc & BD_ENET_TX_DEF)
> - cep->stats.collisions++;
> + /* Check retry counter, i.e. collision counter */
> + if (bdp->cbd_sc & BD_ENET_TX_RCMASK){
> + /* Note th

[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-10-24 Thread Joakim Tjernlund

>
>
> On Thu, Oct 24, 2002 at 04:23:31PM +0200, Joakim Tjernlund wrote:
>
> > This is the second version of my patch that removes the expensive memcpy of
> > received
> > ethernet frames in interrupt context.
> >
> > I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
> > size 1500 when
> > applied to 8260 FEC(needs to be applied manually). But min packet size
> > decreased with 10 %.
> > This version should fix the 10% decrease case.
> >
> > This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> > 8260/fcc_enet.c with little effort.
> >
> > Better fix a bug in set_multicast_list(), move the dmi list forward when
> > walking it(dmi = dmi->next;)
> >
> > New stuff:
> >- Configrable: copy small packets or pass them directly, see
> > COPY_SMALL_FRAMES in code.
> >- Collision reporting fix form Thomas Lange.
> >- Don't pass receive frames which has error upwards.
> >- Report RX_OV errors as fifo errors, not crc errors.
> >
> > Please test and report any problems and performace improvements.
>
> All of this sounds good, but can you please break this one patch up into
> seperate logical patches?

I rather not.

The logical pices in this patch are:
 1) remove the expensive memcpy.
   - This is the main part of the patch.

 2) dmi->next fix, which is a one liner.
Don't pass receive frames which has error upwards.
   - Trival stuff.

 3) Collision reporting fix form Thomas Lange.
Report RX_OV errors as fifo errors, not crc errors.
   - These are just minor accounting fixes.

So I don't think it's worth the effort. Let's just wait a few days and see
if it was success or not.

Folks, please report you findings to the list.

Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-10-24 Thread Joakim Tjernlund

Hi

This is the second version of my patch that removes the expensive memcpy of
received
ethernet frames in interrupt context.

I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
size 1500 when
applied to 8260 FEC(needs to be applied manually). But min packet size
decreased with 10 %.
This version should fix the 10% decrease case.

This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
8260/fcc_enet.c with little effort.

Better fix a bug in set_multicast_list(), move the dmi list forward when
walking it(dmi = dmi->next;)

New stuff:
   - Configrable: copy small packets or pass them directly, see
COPY_SMALL_FRAMES in code.
   - Collision reporting fix form Thomas Lange.
   - Don't pass receive frames which has error upwards.
   - Report RX_OV errors as fifo errors, not crc errors.

Please test and report any problems and performace improvements.

Jocke

--- arch/ppc/8xx_io/enet.c.org  Mon Oct 21 14:35:59 2002
+++ arch/ppc/8xx_io/enet.c  Thu Oct 24 15:48:25 2002
@@ -34,7 +34,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -86,6 +85,14 @@
  * All functions are directly controlled using I/O pins.  See
.
  */

+/* Define COPY_SMALL_FRAMES if you want to save buffer memory for small
packets
+ * at a small performance hit. Note performance testing needed */
+#define COPY_SMALL_FRAMES 1
+
+#ifdef COPY_SMALL_FRAMES
+  #define RX_COPYBREAK (256-16) /* dev_alloc_skb() adds 16 bytes for
internal use */
+#endif
+
 /* The transmitter timeout
  */
 #define TX_TIMEOUT (2*HZ)
@@ -97,19 +104,17 @@
  * the skbuffer directly.
  */
 #ifdef CONFIG_ENET_BIG_BUFFERS
-#define CPM_ENET_RX_PAGES  32
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG  (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE   64  /* Must be power of two */
-#define TX_RING_MOD_MASK   63  /*   for this to work */
+  #define RX_RING_SIZE 64
+  #define TX_RING_SIZE 64  /* Must be power of two for this to 
work */
 #else
-#define CPM_ENET_RX_PAGES  4
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG  (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE   8   /* Must be power of two */
-#define TX_RING_MOD_MASK   7   /*   for this to work */
+  #define RX_RING_SIZE 8
+  #define TX_RING_SIZE 8   /* Must be power of two for this to 
work */
+#endif
+#define TX_RING_MOD_MASK   (TX_RING_SIZE-1)
+
+#define CPM_ENET_RX_FRSIZE 1600 /* must be a multiple of cache line */
+#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
+#error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
 #endif

 /* The CPM stores dest/src/type, data, and checksum for receive packets.
@@ -143,7 +148,7 @@
/* Virtual addresses for the receive buffers because we can't
 * do a __va() on them anymore.
 */
-   unsigned char *rx_vaddr[RX_RING_SIZE];
+   void*rx_vaddr[RX_RING_SIZE];
struct  net_device_stats stats;
uinttx_full;
spinlock_t lock;
@@ -370,11 +375,11 @@

cep->stats.tx_packets++;

-   /* Deferred means some collisions occurred during transmit,
-* but we eventually sent the packet OK.
-*/
-   if (bdp->cbd_sc & BD_ENET_TX_DEF)
-   cep->stats.collisions++;
+   /* Check retry counter, i.e. collision counter */
+   if (bdp->cbd_sc & BD_ENET_TX_RCMASK){
+   /* Note that counter cannot go higher than 15 */
+   cep->stats.collisions+=(bdp->cbd_sc & 
BD_ENET_TX_RCMASK)>>2;
+   }

/* Free the sk buffer associated with this last transmit.
*/
@@ -449,6 +454,7 @@
struct  scc_enet_private *cep;
volatile cbd_t  *bdp;
struct  sk_buff *skb;
+   struct  sk_buff *skb_tmp;
ushort  pkt_len;

cep = (struct scc_enet_private *)dev->priv;
@@ -461,7 +467,8 @@
 for (;;) {
if (bdp->cbd_sc & BD_ENET_RX_EMPTY)
break;
-
+
+#define RX_BD_ERRORS (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_CL)
 #ifndef final_version
/* Since we have allocated space to hold a complete frame, both
 * the first and last indicators should be set.
@@ -470,51 +477,62 @@
(BD_ENET_RX_FIRST | BD_ENET_RX_LAST))
printk("CPM ENET: rcv is not first+last\n");
 #endif
-
-   /* Frame too long or too short.
-   */
-   if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
-   cep->stats.rx_length_errors++;
-   if (bdp->cbd_sc & BD_ENET_RX_NO)/* Frame alignment */
-   cep->stats.rx_frame_errors++;
-   if (bdp

[PATCH] arch/ppc/8xx_io/enet.c

2002-10-24 Thread Joakim Tjernlund

>
> Ricardo Scop wrote:
> >
> > On Wednesday 23 October 2002 05:51, Joakim Tjernlund wrote:
> > > On Monday 21 October 2002 15:13, Joakim Tjernlund wrote:
> > > > Hi
> > > >
> > > > Here is a patch to drop the expensive memcpy of received ethernet frames
> > > > in interrupt context. I have not done any bench marking, but mounting a
> > > > NFS rootfs feels faster.
> > > >
> > > > I am using a heavily modified enet.c in my system, but I think I got the
> > > > patch correct.
> > > >
> > > > Also fixed a bug in set_multicast_list(), move the dmi list forward when
> > > > walking it(dmi = dmi->next;)
> > > >
> > > > Comments? Anyone care to do some benchmarking?
> > >
> > > No comments so far, no one interested in this?
> > I'm interested! Indeed, I adapted and tested your patch in a 8260 FCC fast
> > ethernet driver and it worked fine  I had a 20% increase in routing
> > throughput with the  patch installed!
> > ...
>
> Did I understand that correctly that this patch would work with mith MPC8xx 
> FEC
> as well!?!?

Yes, but you have to adapt it a little.

 Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-24 Thread Steven Scholz

Ricardo Scop wrote:
>
> On Wednesday 23 October 2002 05:51, Joakim Tjernlund wrote:
> > On Monday 21 October 2002 15:13, Joakim Tjernlund wrote:
> > > Hi
> > >
> > > Here is a patch to drop the expensive memcpy of received ethernet frames
> > > in interrupt context. I have not done any bench marking, but mounting a
> > > NFS rootfs feels faster.
> > >
> > > I am using a heavily modified enet.c in my system, but I think I got the
> > > patch correct.
> > >
> > > Also fixed a bug in set_multicast_list(), move the dmi list forward when
> > > walking it(dmi = dmi->next;)
> > >
> > > Comments? Anyone care to do some benchmarking?
> >
> > No comments so far, no one interested in this?
> I'm interested! Indeed, I adapted and tested your patch in a 8260 FCC fast
> ethernet driver and it worked fine  I had a 20% increase in routing
> throughput with the  patch installed!
> ...

Did I understand that correctly that this patch would work with mith MPC8xx FEC
as well!?!?

Cheers,

Steven

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-24 Thread Joakim Tjernlund

> > >
> > > Comments? Anyone care to do some benchmarking?
> >
> > No comments so far, no one interested in this?
> I'm interested! Indeed, I adapted and tested your patch in a 8260 FCC fast
> ethernet driver and it worked fine  I had a 20% increase in routing
> throughput with the  patch installed!

Cool!

>
> The bug fix in set_multicast_list worked too. But, since there's a continue
> clause inside the for command in this routine, I would suggest that the
> (dmi=dmi->next) command go inside the for increment clause, instead of the
> place you've put it; that is:
>   for (...;...; i++, dmi = dmi->next) {...}

Yes thats better, thanks.
> >
> > A question, why do enet.c pass bad frames to netif_rx? Only late collisions
> > are disregarded, the rest is passed on.
> IMHO, this is another bug...
me too.

   Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-10-24 Thread Tom Rini

On Thu, Oct 24, 2002 at 04:23:31PM +0200, Joakim Tjernlund wrote:

> This is the second version of my patch that removes the expensive memcpy of
> received
> ethernet frames in interrupt context.
>
> I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
> size 1500 when
> applied to 8260 FEC(needs to be applied manually). But min packet size
> decreased with 10 %.
> This version should fix the 10% decrease case.
>
> This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> 8260/fcc_enet.c with little effort.
>
> Better fix a bug in set_multicast_list(), move the dmi list forward when
> walking it(dmi = dmi->next;)
>
> New stuff:
>- Configrable: copy small packets or pass them directly, see
> COPY_SMALL_FRAMES in code.
>- Collision reporting fix form Thomas Lange.
>- Don't pass receive frames which has error upwards.
>- Report RX_OV errors as fifo errors, not crc errors.
>
> Please test and report any problems and performace improvements.

All of this sounds good, but can you please break this one patch up into
seperate logical patches?

--
Tom Rini (TR1265)
http://gate.crashing.org/~trini/

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-23 Thread Ricardo Scop

On Wednesday 23 October 2002 13:32, Ricardo Scop wrote:
> On Wednesday 23 October 2002 05:51, Joakim Tjernlund wrote:
[snip]

> > No comments so far, no one interested in this?
>
> I'm interested! Indeed, I adapted and tested your patch in a 8260 FCC fast
> ethernet driver and it worked fine  I had a 20% increase in routing
> throughput with the  patch installed!
>
I forgot to mention the bad news. The increase mentioned above was obtained
with large packets (1500 bytes) traversing our board. With very small packets
(64 bytes), we  experimented a 10% DEcrease in PPS (packets per second).
We're still testing, but I guess the overall performance will be better with
the patch installed.

-Scop

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-23 Thread Ricardo Scop

On Wednesday 23 October 2002 05:51, Joakim Tjernlund wrote:
> On Monday 21 October 2002 15:13, Joakim Tjernlund wrote:
> > Hi
> >
> > Here is a patch to drop the expensive memcpy of received ethernet frames
> > in interrupt context. I have not done any bench marking, but mounting a
> > NFS rootfs feels faster.
> >
> > I am using a heavily modified enet.c in my system, but I think I got the
> > patch correct.
> >
> > Also fixed a bug in set_multicast_list(), move the dmi list forward when
> > walking it(dmi = dmi->next;)
> >
> > Comments? Anyone care to do some benchmarking?
>
> No comments so far, no one interested in this?
I'm interested! Indeed, I adapted and tested your patch in a 8260 FCC fast
ethernet driver and it worked fine  I had a 20% increase in routing
throughput with the  patch installed!

The bug fix in set_multicast_list worked too. But, since there's a continue
clause inside the for command in this routine, I would suggest that the
(dmi=dmi->next) command go inside the for increment clause, instead of the
place you've put it; that is:
for (...;...; i++, dmi = dmi->next) {...}
>
> A question, why do enet.c pass bad frames to netif_rx? Only late collisions
> are disregarded, the rest is passed on.
IMHO, this is another bug...

>

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-23 Thread Joakim Tjernlund

On Monday 21 October 2002 15:13, Joakim Tjernlund wrote:
> Hi
>
> Here is a patch to drop the expensive memcpy of received ethernet frames in
> interrupt context. I have not done any bench marking, but mounting a NFS
> rootfs feels faster.
>
> I am using a heavily modified enet.c in my system, but I think I got the
> patch correct.
>
> Also fixed a bug in set_multicast_list(), move the dmi list forward when
> walking it(dmi = dmi->next;)
>
> Comments? Anyone care to do some benchmarking?

No comments so far, no one interested in this?

A question, why do enet.c pass bad frames to netif_rx? Only late collisions
are disregarded, the rest is passed on.

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-21 Thread Joakim Tjernlund

Hi

Here is a patch to drop the expensive memcpy of received ethernet frames in
interrupt context. I have not done any bench marking, but mounting a NFS rootfs 
feels faster.

I am using a heavily modified enet.c in my system, but I think I got the patch 
correct.

Also fixed a bug in set_multicast_list(), move the dmi list forward when 
walking it(dmi = dmi->next;)

Comments? Anyone care to do some benchmarking?

Jocke

 --- arch/ppc/8xx_io/enet.c.org Mon Oct 21 14:35:59 2002
+++ arch/ppc/8xx_io/enet.c  Mon Oct 21 15:06:04 2002
@@ -96,18 +96,17 @@
  * We don't need to allocate pages for the transmitter.  We just use
  * the skbuffer directly.
  */
+#define CPM_ENET_RX_FRSIZE 1600   /* MUST be a multiple of cache line! */
+#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
+#error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
+#endif
+
 #ifdef CONFIG_ENET_BIG_BUFFERS
-#define CPM_ENET_RX_PAGES  32
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG  (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
+#define RX_RING_SIZE   64
 #define TX_RING_SIZE   64  /* Must be power of two */
 #define TX_RING_MOD_MASK   63  /*   for this to work */
 #else
-#define CPM_ENET_RX_PAGES  4
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG  (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
+#define RX_RING_SIZE   8
 #define TX_RING_SIZE   8   /* Must be power of two */
 #define TX_RING_MOD_MASK   7   /*   for this to work */
 #endif
@@ -143,7 +142,7 @@
/* Virtual addresses for the receive buffers because we can't
 * do a __va() on them anymore.
 */
-   unsigned char *rx_vaddr[RX_RING_SIZE];
+   void*rx_vaddr[RX_RING_SIZE];
struct  net_device_stats stats;
uinttx_full;
spinlock_t lock;
@@ -449,6 +448,7 @@
struct  scc_enet_private *cep;
volatile cbd_t  *bdp;
struct  sk_buff *skb;
+   struct  sk_buff *skb_tmp;
ushort  pkt_len;

cep = (struct scc_enet_private *)dev->priv;
@@ -497,25 +497,26 @@
pkt_len = bdp->cbd_datlen;
cep->stats.rx_bytes += pkt_len;

-   /* This does 16 byte alignment, much more than we need.
+   /* This does 16 byte alignment, exactly what we need.
 * The packet length includes FCS, but we don't want to
 * include that when passing upstream as it messes up
 * bridging applications.
 */
-   skb = dev_alloc_skb(pkt_len-4);
-
-   if (skb == NULL) {
+   skb_tmp = dev_alloc_skb(CPM_ENET_RX_FRSIZE);
+   if (skb_tmp == NULL) {
printk("%s: Memory squeeze, dropping packet.\n", 
dev->name);
cep->stats.rx_dropped++;
-   }
-   else {
+   } else {
+   skb = cep->rx_vaddr[bdp - cep->rx_bd_base];
skb->dev = dev;
skb_put(skb,pkt_len-4); /* Make room */
-   eth_copy_and_sum(skb,
-   cep->rx_vaddr[bdp - cep->rx_bd_base],
-   pkt_len-4, 0);
skb->protocol=eth_type_trans(skb,dev);
netif_rx(skb);
+
+   invalidate_dcache_range((unsigned long) skb_tmp->data,
+   (unsigned long) skb_tmp->data + 
CPM_ENET_RX_FRSIZE);
+   bdp->cbd_bufaddr = __pa(skb_tmp->data);
+   cep->rx_vaddr[bdp - cep->rx_bd_base] = skb_tmp;
}
}

@@ -631,6 +632,7 @@
/* this delay is necessary here -- Cort */
udelay(10);
while (cpmp->cp_cpcr & CPM_CR_FLG);
+   dmi = dmi->next;
}
}
}
@@ -647,8 +649,7 @@
struct net_device *dev;
struct scc_enet_private *cep;
int i, j, k;
-   unsigned char   *eap, *ba;
-   dma_addr_t  mem_addr;
+   unsigned char   *eap;
bd_t*bd;
volatilecbd_t   *bdp;
volatilecpm8xx_t*cp;
@@ -839,22 +840,15 @@

bdp = cep->rx_bd_base;
k = 0;
-   for (i=0; icbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
-   bdp->cbd_bufaddr = mem_addr;
-   cep->rx_vaddr[k++] = ba;
-   mem_addr += CPM_ENET_RX_FRSIZE;
-   ba += CPM_ENET_RX_FRSIZE;
-   bdp++;
-   }
+   /* Initialize the BDs. */
+   for (j=0; j < RX_RING_SIZE; j++) {
+   struct  sk_buff * skb