[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Pantelis Antoniou
Joakim Tjernlund wrote:

Hi,

I am on an 862. Anyway I can't find another definition of dma_cache_inv() but 
the NO OP in asm-ppc/io.h. Could you give me e hint
where it is defined in your kernel?


It's in asm-ppc/io.h (2.4.20) and there are 2 definitions of dma_cache_inv() 
which depends on
CONFIG_NOT_COHERENT_CACHE(should be defined for 8xx). What kernel version are 
you running?

 Jocke

Thanks, Stephan


-Original Message-
From: Joakim Tjernlund [mailto:joakim.tjernlund at lumentis.se]
Sent: Montag, 3. Februar 2003 18:23
To: Stephan Linke
Subject: RE: [PATCH] arch/ppc/8xx_io/enet.c, version 3



Hi Jocke,

in your latest patch you are using dma_cache_inv() instead of 
invalidate_dcache_range().
The only dma_cache_inv() I can find is in include/asm-ppc/io.h. and it's a 
do{}while(0).
Are you shure that this was your intention? It seames to me like you could 
remove that call as well.

I guess you are on 8260? On 8260 there is no need for 
invalidate_dcache_range() since
it's the CPM is cache coherent. On 8xx it is not cache coherent. I switched
to dma_cache_inv() because it's a no op on 8260 and a 
invalidate_dcache_range() on 8xx so
it would be easy to adapt the patch to both CPU's.

 Jocke










Hi guys,

I have created a patch that applies cleanly to the head of
linuxppc_2_4_devel
and it works great.

Keep up the good work!

Pantelis


-- next part --
An embedded and charset-unspecified text was scrubbed...
Name: joakim-enet-8xx-panto.patch
Url: 
http://ozlabs.org/pipermail/linuxppc-embedded/attachments/20030210/547be440/attachment.txt
 


[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Steven Scholz

Hi there,

has someone actually ported Jocke's patch to 8xx FEC !?

Any benchmarks?

Thanks,

Steven


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Joakim Tjernlund

 has someone actually ported Jocke's patch to 8xx FEC !?

I just did a port, but the patch is on my home computer. I can send
it later ...

It's not tested at all since I don't have the FEC connected on my boards.


 Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Joakim Tjernlund

 Hi guys,

 I have created a patch that applies cleanly to the head of
 linuxppc_2_4_devel
 and it works great.

 Keep up the good work!

 Pantelis

Hi Pantelis

I don't follow you here. Didn't my patch apply cleanly against 
linuxppc_2_4_devel?
I generated my patch against linuxppc_2_4_devel(I think).

Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Joakim Tjernlund

 Hi there,

 has someone actually ported Jocke's patch to 8xx FEC !?

 Any benchmarks?

 Thanks,

 Steven

Actually I had a copy at work so here it goes. Not tested(not even compiled)

 Jocke

--- arch/ppc/8xx_io/fec.c   Fri Nov  1 14:44:05 2002
+++ arch/ppc/8xx_io/new_fec.c   Sun Feb  9 14:24:45 2003
@@ -83,26 +83,34 @@
 } phy_info_t;
 #endif /* CONFIG_USE_MDIO */

+/* Define COPY_SMALL_FRAMES if you want to save buffer memory for small packets
+ * at a small performance hit. Note performance testing needed */
+/* #define COPY_SMALL_FRAMES 1  */
+
+#ifdef COPY_SMALL_FRAMES
+  #define RX_COPYBREAK (256-16) /* dev_alloc_skb() adds 16 bytes for internal 
use */
+#endif
+
+
 /* The number of Tx and Rx buffers.  These are allocated from the page
  * pool.  The code may assume these are power of two, so it is best
  * to keep them that size.
  * We don't need to allocate pages for the transmitter.  We just use
  * the skbuffer directly.
  */
+
 #ifdef CONFIG_ENET_BIG_BUFFERS
-#define FEC_ENET_RX_PAGES  16
-#define FEC_ENET_RX_FRSIZE 2048
-#define FEC_ENET_RX_FRPPG  (PAGE_SIZE / FEC_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES)
-#define TX_RING_SIZE   16  /* Must be power of two */
-#define TX_RING_MOD_MASK   15  /*   for this to work */
+  #define RX_RING_SIZE 32
+  #define TX_RING_SIZE 16  /* Must be power of two for this to 
work */
 #else
-#define FEC_ENET_RX_PAGES  4
-#define FEC_ENET_RX_FRSIZE 2048
-#define FEC_ENET_RX_FRPPG  (PAGE_SIZE / FEC_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES)
-#define TX_RING_SIZE   8   /* Must be power of two */
-#define TX_RING_MOD_MASK   7   /*   for this to work */
+  #define RX_RING_SIZE 8
+  #define TX_RING_SIZE 8   /* Must be power of two for this to 
work */
+#endif
+#define TX_RING_MOD_MASK   (TX_RING_SIZE-1)
+
+#define CPM_ENET_RX_FRSIZE 1552 /* must be a multiple of cache line */
+#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
+#error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
 #endif

 /* Interrupt events/masks.
@@ -573,7 +581,7 @@

/* Check for errors. */
if (bdp-cbd_sc  (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
-  BD_ENET_RX_CR | BD_ENET_RX_OV)) {
+  BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_CL)) {
fep-stats.rx_errors++;
if (bdp-cbd_sc  (BD_ENET_RX_LG | BD_ENET_RX_SH)) {
/* Frame too long or too short. */
@@ -585,41 +593,53 @@
fep-stats.rx_crc_errors++;
if (bdp-cbd_sc  BD_ENET_RX_OV)/* FIFO overrun */
fep-stats.rx_crc_errors++;
-   }
-
-   /* Report late collisions as a frame error.
-* On this error, the BD is closed, but we don't know what we
-* have in the buffer.  So, just drop this frame on the floor.
-*/
-   if (bdp-cbd_sc  BD_ENET_RX_CL) {
-   fep-stats.rx_errors++;
-   fep-stats.rx_frame_errors++;
-   goto rx_processing_done;
-   }
-
-   /* Process the incoming frame.
-*/
-   fep-stats.rx_packets++;
-   pkt_len = bdp-cbd_datlen;
-   fep-stats.rx_bytes += pkt_len;
-   data = fep-rx_vaddr[bdp - fep-rx_bd_base];
-
-   /* This does 16 byte alignment, exactly what we need.
-* The packet length includes FCS, but we don't want to
-* include that when passing upstream as it messes up
-* bridging applications.
-*/
-   skb = dev_alloc_skb(pkt_len-4);
-
-   if (skb == NULL) {
-   printk(%s: Memory squeeze, dropping packet.\n, dev-name);
-   fep-stats.rx_dropped++;
+   if (bdp-cbd_sc  BD_ENET_RX_CL)/* Late collision */
+   fep-stats.rx_frame_errors++;   /* Report as a frame 
error. */
} else {
-   skb-dev = dev;
-   skb_put(skb,pkt_len-4); /* Make room */
-   eth_copy_and_sum(skb, data, pkt_len-4, 0);
-   skb-protocol=eth_type_trans(skb,dev);
-   netif_rx(skb);
+
+   /* Process the incoming frame.
+*/
+   fep-stats.rx_packets++;
+   pkt_len = bdp-cbd_datlen;
+   fep-stats.rx_bytes += pkt_len;
+   pkt_len -= 4; /* The packet length includes FCS, but we don't 
want to
+  * include that when passing upstream as it 
messes up
+  * bridging applications. Is this still true  
*/
+#ifdef COPY_SMALL_FRAMES
+   /* Allocate the next buffer now so we are sure to have one when 
needed
+* This does 16 byte alignment, exactly what we need(L1_CACHE 
aligned). */
+   if(pkt_len  RX_COPYBREAK)
+   skb_tmp = 

[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Pantelis Antoniou
Joakim Tjernlund wrote:

Hi guys,

I have created a patch that applies cleanly to the head of
linuxppc_2_4_devel
and it works great.

Keep up the good work!

Pantelis



Hi Pantelis

I don't follow you here. Didn't my patch apply cleanly against 
linuxppc_2_4_devel?
I generated my patch against linuxppc_2_4_devel(I think).

Jocke





Nope.

Fails at hunks #7 and #8.
Don't worry, it's trivial stuff.

Here is enet.c.rej, if you want to check it.

Regards

-- next part --
An embedded and charset-unspecified text was scrubbed...
Name: enet.c.rej
Url: 
http://ozlabs.org/pipermail/linuxppc-embedded/attachments/20030210/0bc35bce/attachment.txt
 


[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Pantelis Antoniou

Joakim Tjernlund wrote:

Hi Pantelis

I don't follow you here. Didn't my patch apply cleanly against 
linuxppc_2_4_devel?
I generated my patch against linuxppc_2_4_devel(I think).

   Jocke

Nope.

Fails at hunks #7 and #8.
Don't worry, it's trivial stuff.

Here is enet.c.rej, if you want to check it.


hmm, something is broken here. I can apply my patch to my local copy of
8xx_io/enet.c, BK version 1.24 just fine. Whats your version of enet.c?

   Jocke




My version is the head of the BK tree, updated today.

Regards

Pantelis


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-10 Thread Joakim Tjernlund

 Hi Pantelis
 
 I don't follow you here. Didn't my patch apply cleanly against 
 linuxppc_2_4_devel?
 I generated my patch against linuxppc_2_4_devel(I think).
 
 Jocke
 Nope.

 Fails at hunks #7 and #8.
 Don't worry, it's trivial stuff.

 Here is enet.c.rej, if you want to check it.

hmm, something is broken here. I can apply my patch to my local copy of
8xx_io/enet.c, BK version 1.24 just fine. Whats your version of enet.c?

   Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-04 Thread Stephan Linke

Hi,

I am on an 862. Anyway I can't find another definition of dma_cache_inv() but 
the NO OP in asm-ppc/io.h. Could you give me e hint
where it is defined in your kernel?

Thanks, Stephan

 -Original Message-
 From: Joakim Tjernlund [mailto:joakim.tjernlund at lumentis.se]
 Sent: Montag, 3. Februar 2003 18:23
 To: Stephan Linke
 Subject: RE: [PATCH] arch/ppc/8xx_io/enet.c, version 3


  Hi Jocke,
 
  in your latest patch you are using dma_cache_inv() instead of 
  invalidate_dcache_range().
  The only dma_cache_inv() I can find is in include/asm-ppc/io.h. and it's a 
  do{}while(0).
  Are you shure that this was your intention? It seames to me like you could 
  remove that call as well.

 I guess you are on 8260? On 8260 there is no need for 
 invalidate_dcache_range() since
 it's the CPM is cache coherent. On 8xx it is not cache coherent. I switched
 to dma_cache_inv() because it's a no op on 8260 and a 
 invalidate_dcache_range() on 8xx so
 it would be easy to adapt the patch to both CPU's.

  Jocke




** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-04 Thread Joakim Tjernlund


 Hi,

 I am on an 862. Anyway I can't find another definition of dma_cache_inv() but 
 the NO OP in asm-ppc/io.h. Could you give me e hint
 where it is defined in your kernel?

It's in asm-ppc/io.h (2.4.20) and there are 2 definitions of dma_cache_inv() 
which depends on
CONFIG_NOT_COHERENT_CACHE(should be defined for 8xx). What kernel version are 
you running?

 Jocke

 Thanks, Stephan

  -Original Message-
  From: Joakim Tjernlund [mailto:joakim.tjernlund at lumentis.se]
  Sent: Montag, 3. Februar 2003 18:23
  To: Stephan Linke
  Subject: RE: [PATCH] arch/ppc/8xx_io/enet.c, version 3
 
 
   Hi Jocke,
  
   in your latest patch you are using dma_cache_inv() instead of 
   invalidate_dcache_range().
   The only dma_cache_inv() I can find is in include/asm-ppc/io.h. and it's 
   a do{}while(0).
   Are you shure that this was your intention? It seames to me like you 
   could remove that call as well.
 
  I guess you are on 8260? On 8260 there is no need for 
  invalidate_dcache_range() since
  it's the CPM is cache coherent. On 8xx it is not cache coherent. I switched
  to dma_cache_inv() because it's a no op on 8260 and a 
  invalidate_dcache_range() on 8xx so
  it would be easy to adapt the patch to both CPU's.
 
   Jocke
 
 




** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-04 Thread Stephan Linke

Hi Jocke,

I am running a 2.4.18 kernel. Maybe that's the problem?
But let me guess: keeping the invalidate_dcache_range() would be fine as well 
for our 8xx board?

Stephan

 -Original Message-
 From: Joakim Tjernlund [mailto:joakim.tjernlund at lumentis.se]
 Sent: Dienstag, 4. Februar 2003 10:59
 To: Stephan Linke
 Cc: Linuxppc-Embedded
 Subject: RE: [PATCH] arch/ppc/8xx_io/enet.c, version 3


 
  Hi,
 
  I am on an 862. Anyway I can't find another definition of dma_cache_inv() 
  but the NO OP in asm-ppc/io.h. Could you give
 me e hint
  where it is defined in your kernel?

 It's in asm-ppc/io.h (2.4.20) and there are 2 definitions of dma_cache_inv() 
 which depends on
 CONFIG_NOT_COHERENT_CACHE(should be defined for 8xx). What kernel version are 
 you running?

  Jocke
 
  Thanks, Stephan
 
   -Original Message-
   From: Joakim Tjernlund [mailto:joakim.tjernlund at lumentis.se]
   Sent: Montag, 3. Februar 2003 18:23
   To: Stephan Linke
   Subject: RE: [PATCH] arch/ppc/8xx_io/enet.c, version 3
  
  
Hi Jocke,
   
in your latest patch you are using dma_cache_inv() instead of 
invalidate_dcache_range().
The only dma_cache_inv() I can find is in include/asm-ppc/io.h. and 
it's a do{}while (0).
Are you shure that this was your intention? It seames to me like you 
could remove that call as well.
  
   I guess you are on 8260? On 8260 there is no need for 
   invalidate_dcache_range() since
   it's the CPM is cache coherent. On 8xx it is not cache coherent. I 
   switched
   to dma_cache_inv() because it's a no op on 8260 and a 
   invalidate_dcache_range() on 8xx so
   it would be easy to adapt the patch to both CPU's.
  
Jocke
  
  
 
 



** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-03 Thread Joakim Tjernlund

  This is the second version of my patch that removes the expensive memcpy of
  received
  ethernet frames in interrupt context.
 
  I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
  size 1500 when
  applied to 8260 FEC(needs to be applied manually). But min packet size
  decreased with 10 %.
  This version should fix the 10% decrease case.
 
  This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
  8260/fcc_enet.c with little effort.
 
  Better fix a bug in set_multicast_list(), move the dmi list forward when
  walking it(dmi = dmi-next;)
 
  New stuff:
 - Configrable: copy small packets or pass them directly, see
  COPY_SMALL_FRAMES in code.
 - Collision reporting fix form Thomas Lange.
 - Don't pass receive frames which has error upwards.
 - Report RX_OV errors as fifo errors, not crc errors.
 
  Please test and report any problems and performace improvements.

 Hi

 This is the third version of my optimized enet.c patch.
 Changes since version 2:
   1) invalidate the whole buffer BEFORE it is given to he CPM. Previously
  it was invalidated after the packet was received and that could lead to 
 buffer
  corruption in some cases.

   2) use dma_cache_inv() instead of invalidate_dcache_range() since that will 
 work
  for both 8xx and 82xx.

   3) decrease the allocated buffer length.

   4) disabled COPY_SMALL_FRAME. Define it somewhere if you want to save some 
 memory.

   5) probably some white space changes got in too.

 Any chance to see it the devel tree?

 More than 3 months has passed since version 2 and the only problem reported 
 was
 1) and the fix has been know since mid November.

 Dan, you said you would integrate this patch(or some version of it) in 
 November. I
 think I have waited long enough now. Please do ASAP.

  Jocke
[SNIP]

Zero feedback so far. Is there zero interest to improve the mpc code in 
linuxppc?
Anyhow, I give up. Don't have the energy to resend over and over again.

   Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-02-03 Thread Pantelis Antoniou

Joakim Tjernlund wrote:

This is the second version of my patch that removes the expensive memcpy of
received
ethernet frames in interrupt context.

I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
size 1500 when
applied to 8260 FEC(needs to be applied manually). But min packet size
decreased with 10 %.
This version should fix the 10% decrease case.

This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
8260/fcc_enet.c with little effort.

Better fix a bug in set_multicast_list(), move the dmi list forward when
walking it(dmi = dmi-next;)

New stuff:
   - Configrable: copy small packets or pass them directly, see
COPY_SMALL_FRAMES in code.
   - Collision reporting fix form Thomas Lange.
   - Don't pass receive frames which has error upwards.
   - Report RX_OV errors as fifo errors, not crc errors.

Please test and report any problems and performace improvements.


Hi

This is the third version of my optimized enet.c patch.
Changes since version 2:
  1) invalidate the whole buffer BEFORE it is given to he CPM. Previously
 it was invalidated after the packet was received and that could lead to 
 buffer
 corruption in some cases.

  2) use dma_cache_inv() instead of invalidate_dcache_range() since that will 
 work
 for both 8xx and 82xx.

  3) decrease the allocated buffer length.

  4) disabled COPY_SMALL_FRAME. Define it somewhere if you want to save some 
 memory.

  5) probably some white space changes got in too.

Any chance to see it the devel tree?

More than 3 months has passed since version 2 and the only problem reported 
was
1) and the fix has been know since mid November.

Dan, you said you would integrate this patch(or some version of it) in 
November. I
think I have waited long enough now. Please do ASAP.

 Jocke


[SNIP]

Zero feedback so far. Is there zero interest to improve the mpc code in 
linuxppc?
Anyhow, I give up. Don't have the energy to resend over and over again.

   Jocke






Please don't give up.

I'll try it and you'll have my feedback within the week.

Anyhow, what is the deal with 8xx/82xx?
At least the maintainers should ack the patch, and if it is not suitable
for inclusion they should state for what reason.

We don't want to discourage people sending patches.
Right?

Pantelis


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 3

2003-01-28 Thread Joakim Tjernlund

 This is the second version of my patch that removes the expensive memcpy of
 received
 ethernet frames in interrupt context.

 I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
 size 1500 when
 applied to 8260 FEC(needs to be applied manually). But min packet size
 decreased with 10 %.
 This version should fix the 10% decrease case.

 This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
 8260/fcc_enet.c with little effort.

 Better fix a bug in set_multicast_list(), move the dmi list forward when
 walking it(dmi = dmi-next;)

 New stuff:
- Configrable: copy small packets or pass them directly, see
 COPY_SMALL_FRAMES in code.
- Collision reporting fix form Thomas Lange.
- Don't pass receive frames which has error upwards.
- Report RX_OV errors as fifo errors, not crc errors.

 Please test and report any problems and performace improvements.

Hi

This is the third version of my optimized enet.c patch.
Changes since version 2:
  1) invalidate the whole buffer BEFORE it is given to he CPM. Previously
 it was invalidated after the packet was received and that could lead to 
buffer
 corruption in some cases.

  2) use dma_cache_inv() instead of invalidate_dcache_range() since that will 
work
 for both 8xx and 82xx.

  3) decrease the allocated buffer length.

  4) disabled COPY_SMALL_FRAME. Define it somewhere if you want to save some 
memory.

  5) probably some white space changes got in too.

Any chance to see it the devel tree?

More than 3 months has passed since version 2 and the only problem reported was
1) and the fix has been know since mid November.

Dan, you said you would integrate this patch(or some version of it) in 
November. I
think I have waited long enough now. Please do ASAP.

 Jocke

--- arch/ppc/8xx_io/enet.c  Fri Nov  1 14:44:05 2002
+++ arch/ppc/8xx_io/new_enet.c  Sat Jan 25 19:57:50 2003
@@ -34,7 +34,6 @@
 #include linux/ioport.h
 #include linux/slab.h
 #include linux/interrupt.h
-#include linux/pci.h
 #include linux/init.h
 #include linux/delay.h
 #include linux/netdevice.h
@@ -86,6 +85,14 @@
  * All functions are directly controlled using I/O pins.  See asm/commproc.h.
  */

+/* Define COPY_SMALL_FRAMES if you want to save buffer memory for small packets
+ * at a small performance hit. Note performance testing needed */
+/* #define COPY_SMALL_FRAMES 1  */
+
+#ifdef COPY_SMALL_FRAMES
+  #define RX_COPYBREAK (256-16) /* dev_alloc_skb() adds 16 bytes for internal 
use */
+#endif
+
 /* The transmitter timeout
  */
 #define TX_TIMEOUT (2*HZ)
@@ -97,19 +104,17 @@
  * the skbuffer directly.
  */
 #ifdef CONFIG_ENET_BIG_BUFFERS
-#define CPM_ENET_RX_PAGES  32
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG  (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE   64  /* Must be power of two */
-#define TX_RING_MOD_MASK   63  /*   for this to work */
+  #define RX_RING_SIZE 64
+  #define TX_RING_SIZE 64  /* Must be power of two for this to 
work */
 #else
-#define CPM_ENET_RX_PAGES  4
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG  (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE   8   /* Must be power of two */
-#define TX_RING_MOD_MASK   7   /*   for this to work */
+  #define RX_RING_SIZE 8
+  #define TX_RING_SIZE 8   /* Must be power of two for this to 
work */
+#endif
+#define TX_RING_MOD_MASK   (TX_RING_SIZE-1)
+
+#define CPM_ENET_RX_FRSIZE 1552 /* must be a multiple of cache line */
+#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
+#error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
 #endif

 /* The CPM stores dest/src/type, data, and checksum for receive packets.
@@ -143,7 +148,7 @@
/* Virtual addresses for the receive buffers because we can't
 * do a __va() on them anymore.
 */
-   unsigned char *rx_vaddr[RX_RING_SIZE];
+   void*rx_vaddr[RX_RING_SIZE];
struct  net_device_stats stats;
uinttx_full;
spinlock_t lock;
@@ -370,11 +375,11 @@

cep-stats.tx_packets++;

-   /* Deferred means some collisions occurred during transmit,
-* but we eventually sent the packet OK.
-*/
-   if (bdp-cbd_sc  BD_ENET_TX_DEF)
-   cep-stats.collisions++;
+   /* Check retry counter, i.e. collision counter */
+   if (bdp-cbd_sc  BD_ENET_TX_RCMASK){
+   /* Note that counter cannot go higher than 15 */
+   cep-stats.collisions+=(bdp-cbd_sc  
BD_ENET_TX_RCMASK)2;
+   }

/* Free the sk buffer associated with this last transmit.
*/
@@ -449,6 +454,7 @@
struct  scc_enet_private 

[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-15 Thread Hans Feldt

On 11/13/02 11:12 PM, Dan Malek wrote:

 This isn't something new that hasn't been tried before.  The problem
 in the past with non-coherent processors, incoming DMA, and skbufs is
 the buffers would share cache lines with other data which would get
 corrupted as the result of the invalidate for the DMA.  Typically,
 data that was corrupted were flags and control information for the IP

The problem I was describing related to this patch was due to the L1
cache replacement algorithm. The L1 cache flushes a line to main memory
when it's full with some LRU algo. Thus if you have no snooping
(860/405), this can interfere with the DMA into the same piece of memory.

Invalidating the whole buffer before giving it to DMA solves this and
the cost of doing it on the whole buffer is not much. You have already
done the big optimisation in removing the memcpy and changing flush to
invalidate.

Cheers,
Hans


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-13 Thread Joakim Tjernlund

 On 10/24/02 04:23 PM, Joakim Tjernlund wrote:
  Hi
 
  This is the second version of my patch that removes the expensive memcpy of
  received
  ethernet frames in interrupt context.

 Isn't it so that this patch works because you have snooping? Without
 snooping the driver would fail because of cache line replacement which
 could trash received data.

I don't have snooping(mpc860).


 The buffer invalidate in this case is unneeded. But if you wanna
 optimise you should invalidate the whole buffer before giving it to DMA
 thus getting rid of the flush implied by memory coherence.

In my first version I did invalidate the whole buffer BEFORE I gave it
to the CPM/DMA. I moved it because I thougth it was better to invalidate
after I received it, since the packet len is known at that time
so I don't have to invalidate the whole buffer, just the received number of 
bytes.


 Tried this out in a driver for a processor without snooping, now and
 then received packets got corrupted, cache line wise...

You may be right, perhaps one must invalidate the whole buffer before giving it
to the CPM/DMA. Suppose you reuse a buffer which has been modified before it
was freed and the dcache must write back data to free up space and the buffer,
which now is owned by the CPM, get written to.

I have not seen any corrupted packets and you are the first to report
any problems. What modifications have you done?

What CPU?
Post the driver please.

 Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-13 Thread Hans Feldt

- Original Message -
From: Joakim Tjernlund [EMAIL PROTECTED]

 You may be right, perhaps one must invalidate the whole buffer before giving 
 it
 to the CPM/DMA. Suppose you reuse a buffer which has been modified before it
 was freed and the dcache must write back data to free up space and the buffer,
 which now is owned by the CPM, get written to.

I beleive this could happen. Since IP does not perform checksumming
but relies on the link (don't know this really) in that matter, I guess
the application could get wrong data...

 I have not seen any corrupted packets and you are the first to report
 any problems.

Did you run any data integrity tests?

 What modifications have you done?

I haven't used your driver patch. I used the __idea__ of delaying the
invalidate to the point where you know how much has been received. This was
in an RTOS ATM driver for a 405. An integrity test showed that some few
packets was wrong. Data was changed by means of cache lines.

 What CPU?
 Post the driver please.

Irrelevant, non-linux

Cheers,
Hans


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-13 Thread Joakim Tjernlund

 - Original Message -
 From: Joakim Tjernlund joakim.tjernlund at lumentis.se

  You may be right, perhaps one must invalidate the whole buffer before 
  giving it
  to the CPM/DMA. Suppose you reuse a buffer which has been modified before it
  was freed and the dcache must write back data to free up space and the 
  buffer,
  which now is owned by the CPM, get written to.

 I beleive this could happen. Since IP does not perform checksumming
 but relies on the link (don't know this really) in that matter, I guess
 the application could get wrong data...

OK, anyone against? Dan?


  I have not seen any corrupted packets and you are the first to report
  any problems.

 Did you run any data integrity tests?
No, normal use, lots of pings etc.


  What modifications have you done?

 I haven't used your driver patch. I used the __idea__ of delaying the
 invalidate to the point where you know how much has been received. This was
 in an RTOS ATM driver for a 405. An integrity test showed that some few
 packets was wrong. Data was changed by means of cache lines.

Did you notice any difference in performance when moving the invalidate call?

Any other problems with the patch?

Dan, would you like a new patch or will you fix this?

 Jocke

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-13 Thread Dan Malek

Joakim Tjernlund wrote:

 OK, anyone against? Dan?

I'm currently looking at the patches and I'll be integrating something
that hopefully works :-)

This isn't something new that hasn't been tried before.  The problem
in the past with non-coherent processors, incoming DMA, and skbufs is
the buffers would share cache lines with other data which would get
corrupted as the result of the invalidate for the DMA.  Typically,
data that was corrupted were flags and control information for the IP
stack, and under normal use you wouldn't notice this.  However,
forwarding/bridging applications would fail to work properly and you
would sometimes see packet retransmits that weren't necessary.

The trick is to ensure you allocate a larger than necessary sk buffer
and then align the start and end such that they consume entire cache
lines.  There has been sufficient discussion about this that I hope
the sk buffer mechanism will allow this alignment now, as it didn't
work well in the past.  This is what I want to check out when I
apply and test the patches.

This isn't necessary on the 8260 family due to cache snooping, but it
is required on the 8xx.

Of course, a packet checksum still needs to be performed, and if it
is done as part of the data copy (and if the IP stack doesn't do it
again), it would seem that this implementation rather than DMA would
be more efficient.

Thanks.


-- Dan


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-13 Thread Joakim Tjernlund

 Joakim Tjernlund wrote:

  OK, anyone against? Dan?

 I'm currently looking at the patches and I'll be integrating something
 that hopefully works :-)
Please tell me if there is something in that patch you don't like(besides the
moving the invalidate call).


 This isn't something new that hasn't been tried before.  The problem
 in the past with non-coherent processors, incoming DMA, and skbufs is
 the buffers would share cache lines with other data which would get
 corrupted as the result of the invalidate for the DMA.  Typically,
 data that was corrupted were flags and control information for the IP
 stack, and under normal use you wouldn't notice this.  However,
 forwarding/bridging applications would fail to work properly and you
 would sometimes see packet retransmits that weren't necessary.

 The trick is to ensure you allocate a larger than necessary sk buffer
 and then align the start and end such that they consume entire cache
 lines.  There has been sufficient discussion about this that I hope
 the sk buffer mechanism will allow this alignment now, as it didn't
 work well in the past.  This is what I want to check out when I
 apply and test the patches.

Tell me about it, I got severely bitten by a non cache aligned invalidate call
in the i2c-algo-8xx.c driver :-(

I too checked carefully that the buffer returned from 
__dev_alloc_skb()/dev_alloc_skb()
cache aligned, turns out that it kmalloc's a buffer and reserves 16 bytes in the
beginning so it's safe.


 This isn't necessary on the 8260 family due to cache snooping, but it
 is required on the 8xx.

 Of course, a packet checksum still needs to be performed, and if it
 is done as part of the data copy (and if the IP stack doesn't do it
 again), it would seem that this implementation rather than DMA would
 be more efficient.

Are referring to eth_copy_and_sum()? That function has never done
a csum, just a plain memcpy(). The IP stack has always done
it's own csum(just as well since it would be doing this in IRQ context),
unless you set ip_summed(I think).

Perhaps a backwards memcpy() would be more efficient? That way
the  IP header get copied last and will be in cache longer. I believe memmove()
will do that.

Some drivers also try cache align the IP header. I tried that to but
eth_type_trans() could not handle this.

Finally, why does passing the Ethernet CRC upwards mess-up bridging 
applications?

Jocke

 Thanks.


 -- Dan


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-11-01 Thread Joakim Tjernlund

Hi Tom

Will you apply this patch?  No problems reported so far.

 Jocke

 
  On Thu, Oct 24, 2002 at 04:23:31PM +0200, Joakim Tjernlund wrote:
 
   This is the second version of my patch that removes the expensive memcpy 
   of
   received
   ethernet frames in interrupt context.
  
   I have 1 report(from Ricardo Scop) of 20% increase in packets/second, 
   packet
   size 1500 when
   applied to 8260 FEC(needs to be applied manually). But min packet size
   decreased with 10 %.
   This version should fix the 10% decrease case.
  
   This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
   8260/fcc_enet.c with little effort.
  
   Better fix a bug in set_multicast_list(), move the dmi list forward when
   walking it(dmi = dmi-next;)
  
   New stuff:
  - Configrable: copy small packets or pass them directly, see
   COPY_SMALL_FRAMES in code.
  - Collision reporting fix form Thomas Lange.
  - Don't pass receive frames which has error upwards.
  - Report RX_OV errors as fifo errors, not crc errors.
  
   Please test and report any problems and performace improvements.
 
  All of this sounds good, but can you please break this one patch up into
  seperate logical patches?

 I rather not.

 The logical pices in this patch are:
  1) remove the expensive memcpy.
- This is the main part of the patch.

  2) dmi-next fix, which is a one liner.
 Don't pass receive frames which has error upwards.
- Trival stuff.

  3) Collision reporting fix form Thomas Lange.
 Report RX_OV errors as fifo errors, not crc errors.
- These are just minor accounting fixes.

 So I don't think it's worth the effort. Let's just wait a few days and see
 if it was success or not.

 Folks, please report you findings to the list.

 Jocke



** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-10-28 Thread Joakim Tjernlund

Hi Stephan

 Hi Jocke,

 I just had a look at the code. So far it is running fine. on my board. :)
 Following question:
 In scc-enet_rx you use invalidate_dcache_range() for the skb data area.
 Why don't you do the same in scc_enet_init() for the first set of skb's?
 Isn't it required too at this place.

Thanks for trying this!

I did this in my first version, but in my second version I moved the
invalidate_dcache_range() to AFTER the ethernet frame had been received.

invalidate_dcache_range() can either be called when you add the buffer to the
HW(BD) or after you have received a packet but before you try to use it. Doing 
it
after has one advantage, you know the packet len and therefore you only need
to invalidate packet len bytes.

In my tree I also removed the sync instruction in  invalidate_dcache_range()
because I don't think it's needed. My contact at Motorola thinks so too.
I have been running without the sync instruction for 2-3 weeks and
has yet to see any problems. Would be intresting to hear if others
can try this out and if it gives any performance improvement.

Jocke

 Thanks, Stephan

  -Original Message-
  From: owner-linuxppc-embedded at lists.linuxppc.org
  [mailto:owner-linuxppc-embedded at lists.linuxppc.org]On Behalf Of Joakim
  Tjernlund
  Sent: Donnerstag, 24. Oktober 2002 16:24
  To: linuxppc-embedded at lists.linuxppc.org
  Cc: scop at digitel.com.br; thomas at corelatus.com
  Subject: [PATCH] arch/ppc/8xx_io/enet.c, version 2
 
 
 
  Hi
 
  This is the second version of my patch that removes the expensive
  memcpy of
  received
  ethernet frames in interrupt context.
 
  I have 1 report(from Ricardo Scop) of 20% increase in
  packets/second, packet
  size 1500 when
  applied to 8260 FEC(needs to be applied manually). But min packet size
  decreased with 10 %.
  This version should fix the 10% decrease case.
 
  This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
  8260/fcc_enet.c with little effort.
 
  Better fix a bug in set_multicast_list(), move the dmi list forward when
  walking it(dmi = dmi-next;)
 
  New stuff:
 - Configrable: copy small packets or pass them directly, see
  COPY_SMALL_FRAMES in code.
 - Collision reporting fix form Thomas Lange.
 - Don't pass receive frames which has error upwards.
 - Report RX_OV errors as fifo errors, not crc errors.
 
  Please test and report any problems and performace improvements.
 
  Jocke
 


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-24 Thread Joakim Tjernlund

  
   Comments? Anyone care to do some benchmarking?
 
  No comments so far, no one interested in this?
 I'm interested! Indeed, I adapted and tested your patch in a 8260 FCC fast
 ethernet driver and it worked fine  I had a 20% increase in routing
 throughput with the  patch installed!

Cool!


 The bug fix in set_multicast_list worked too. But, since there's a continue
 clause inside the for command in this routine, I would suggest that the
 (dmi=dmi-next) command go inside the for increment clause, instead of the
 place you've put it; that is:
   for (...;...; i++, dmi = dmi-next) {...}

Yes thats better, thanks.
 
  A question, why do enet.c pass bad frames to netif_rx? Only late collisions
  are disregarded, the rest is passed on.
 IMHO, this is another bug...
me too.

   Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-24 Thread Steven Scholz

Ricardo Scop wrote:

 On Wednesday 23 October 2002 05:51, Joakim Tjernlund wrote:
  On Monday 21 October 2002 15:13, Joakim Tjernlund wrote:
   Hi
  
   Here is a patch to drop the expensive memcpy of received ethernet frames
   in interrupt context. I have not done any bench marking, but mounting a
   NFS rootfs feels faster.
  
   I am using a heavily modified enet.c in my system, but I think I got the
   patch correct.
  
   Also fixed a bug in set_multicast_list(), move the dmi list forward when
   walking it(dmi = dmi-next;)
  
   Comments? Anyone care to do some benchmarking?
 
  No comments so far, no one interested in this?
 I'm interested! Indeed, I adapted and tested your patch in a 8260 FCC fast
 ethernet driver and it worked fine  I had a 20% increase in routing
 throughput with the  patch installed!
 ...

Did I understand that correctly that this patch would work with mith MPC8xx FEC
as well!?!?

Cheers,

Steven

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-24 Thread Joakim Tjernlund


 Ricardo Scop wrote:
 
  On Wednesday 23 October 2002 05:51, Joakim Tjernlund wrote:
   On Monday 21 October 2002 15:13, Joakim Tjernlund wrote:
Hi
   
Here is a patch to drop the expensive memcpy of received ethernet frames
in interrupt context. I have not done any bench marking, but mounting a
NFS rootfs feels faster.
   
I am using a heavily modified enet.c in my system, but I think I got the
patch correct.
   
Also fixed a bug in set_multicast_list(), move the dmi list forward when
walking it(dmi = dmi-next;)
   
Comments? Anyone care to do some benchmarking?
  
   No comments so far, no one interested in this?
  I'm interested! Indeed, I adapted and tested your patch in a 8260 FCC fast
  ethernet driver and it worked fine  I had a 20% increase in routing
  throughput with the  patch installed!
  ...

 Did I understand that correctly that this patch would work with mith MPC8xx 
 FEC
 as well!?!?

Yes, but you have to adapt it a little.

 Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-10-24 Thread Joakim Tjernlund

Hi

This is the second version of my patch that removes the expensive memcpy of
received
ethernet frames in interrupt context.

I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
size 1500 when
applied to 8260 FEC(needs to be applied manually). But min packet size
decreased with 10 %.
This version should fix the 10% decrease case.

This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
8260/fcc_enet.c with little effort.

Better fix a bug in set_multicast_list(), move the dmi list forward when
walking it(dmi = dmi-next;)

New stuff:
   - Configrable: copy small packets or pass them directly, see
COPY_SMALL_FRAMES in code.
   - Collision reporting fix form Thomas Lange.
   - Don't pass receive frames which has error upwards.
   - Report RX_OV errors as fifo errors, not crc errors.

Please test and report any problems and performace improvements.

Jocke

--- arch/ppc/8xx_io/enet.c.org  Mon Oct 21 14:35:59 2002
+++ arch/ppc/8xx_io/enet.c  Thu Oct 24 15:48:25 2002
@@ -34,7 +34,6 @@
 #include linux/ioport.h
 #include linux/slab.h
 #include linux/interrupt.h
-#include linux/pci.h
 #include linux/init.h
 #include linux/delay.h
 #include linux/netdevice.h
@@ -86,6 +85,14 @@
  * All functions are directly controlled using I/O pins.  See
asm/commproc.h.
  */

+/* Define COPY_SMALL_FRAMES if you want to save buffer memory for small
packets
+ * at a small performance hit. Note performance testing needed */
+#define COPY_SMALL_FRAMES 1
+
+#ifdef COPY_SMALL_FRAMES
+  #define RX_COPYBREAK (256-16) /* dev_alloc_skb() adds 16 bytes for
internal use */
+#endif
+
 /* The transmitter timeout
  */
 #define TX_TIMEOUT (2*HZ)
@@ -97,19 +104,17 @@
  * the skbuffer directly.
  */
 #ifdef CONFIG_ENET_BIG_BUFFERS
-#define CPM_ENET_RX_PAGES  32
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG  (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE   64  /* Must be power of two */
-#define TX_RING_MOD_MASK   63  /*   for this to work */
+  #define RX_RING_SIZE 64
+  #define TX_RING_SIZE 64  /* Must be power of two for this to 
work */
 #else
-#define CPM_ENET_RX_PAGES  4
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG  (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE   8   /* Must be power of two */
-#define TX_RING_MOD_MASK   7   /*   for this to work */
+  #define RX_RING_SIZE 8
+  #define TX_RING_SIZE 8   /* Must be power of two for this to 
work */
+#endif
+#define TX_RING_MOD_MASK   (TX_RING_SIZE-1)
+
+#define CPM_ENET_RX_FRSIZE 1600 /* must be a multiple of cache line */
+#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
+#error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
 #endif

 /* The CPM stores dest/src/type, data, and checksum for receive packets.
@@ -143,7 +148,7 @@
/* Virtual addresses for the receive buffers because we can't
 * do a __va() on them anymore.
 */
-   unsigned char *rx_vaddr[RX_RING_SIZE];
+   void*rx_vaddr[RX_RING_SIZE];
struct  net_device_stats stats;
uinttx_full;
spinlock_t lock;
@@ -370,11 +375,11 @@

cep-stats.tx_packets++;

-   /* Deferred means some collisions occurred during transmit,
-* but we eventually sent the packet OK.
-*/
-   if (bdp-cbd_sc  BD_ENET_TX_DEF)
-   cep-stats.collisions++;
+   /* Check retry counter, i.e. collision counter */
+   if (bdp-cbd_sc  BD_ENET_TX_RCMASK){
+   /* Note that counter cannot go higher than 15 */
+   cep-stats.collisions+=(bdp-cbd_sc  
BD_ENET_TX_RCMASK)2;
+   }

/* Free the sk buffer associated with this last transmit.
*/
@@ -449,6 +454,7 @@
struct  scc_enet_private *cep;
volatile cbd_t  *bdp;
struct  sk_buff *skb;
+   struct  sk_buff *skb_tmp;
ushort  pkt_len;

cep = (struct scc_enet_private *)dev-priv;
@@ -461,7 +467,8 @@
 for (;;) {
if (bdp-cbd_sc  BD_ENET_RX_EMPTY)
break;
-
+
+#define RX_BD_ERRORS (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_CL)
 #ifndef final_version
/* Since we have allocated space to hold a complete frame, both
 * the first and last indicators should be set.
@@ -470,51 +477,62 @@
(BD_ENET_RX_FIRST | BD_ENET_RX_LAST))
printk(CPM ENET: rcv is not first+last\n);
 #endif
-
-   /* Frame too long or too short.
-   */
-   if (bdp-cbd_sc  (BD_ENET_RX_LG | BD_ENET_RX_SH))
-   cep-stats.rx_length_errors++;
-   if (bdp-cbd_sc  BD_ENET_RX_NO)   

[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-10-24 Thread Tom Rini

On Thu, Oct 24, 2002 at 04:23:31PM +0200, Joakim Tjernlund wrote:

 This is the second version of my patch that removes the expensive memcpy of
 received
 ethernet frames in interrupt context.

 I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
 size 1500 when
 applied to 8260 FEC(needs to be applied manually). But min packet size
 decreased with 10 %.
 This version should fix the 10% decrease case.

 This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
 8260/fcc_enet.c with little effort.

 Better fix a bug in set_multicast_list(), move the dmi list forward when
 walking it(dmi = dmi-next;)

 New stuff:
- Configrable: copy small packets or pass them directly, see
 COPY_SMALL_FRAMES in code.
- Collision reporting fix form Thomas Lange.
- Don't pass receive frames which has error upwards.
- Report RX_OV errors as fifo errors, not crc errors.

 Please test and report any problems and performace improvements.

All of this sounds good, but can you please break this one patch up into
seperate logical patches?

--
Tom Rini (TR1265)
http://gate.crashing.org/~trini/

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c, version 2

2002-10-24 Thread Joakim Tjernlund



 On Thu, Oct 24, 2002 at 04:23:31PM +0200, Joakim Tjernlund wrote:

  This is the second version of my patch that removes the expensive memcpy of
  received
  ethernet frames in interrupt context.
 
  I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
  size 1500 when
  applied to 8260 FEC(needs to be applied manually). But min packet size
  decreased with 10 %.
  This version should fix the 10% decrease case.
 
  This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
  8260/fcc_enet.c with little effort.
 
  Better fix a bug in set_multicast_list(), move the dmi list forward when
  walking it(dmi = dmi-next;)
 
  New stuff:
 - Configrable: copy small packets or pass them directly, see
  COPY_SMALL_FRAMES in code.
 - Collision reporting fix form Thomas Lange.
 - Don't pass receive frames which has error upwards.
 - Report RX_OV errors as fifo errors, not crc errors.
 
  Please test and report any problems and performace improvements.

 All of this sounds good, but can you please break this one patch up into
 seperate logical patches?

I rather not.

The logical pices in this patch are:
 1) remove the expensive memcpy.
   - This is the main part of the patch.

 2) dmi-next fix, which is a one liner.
Don't pass receive frames which has error upwards.
   - Trival stuff.

 3) Collision reporting fix form Thomas Lange.
Report RX_OV errors as fifo errors, not crc errors.
   - These are just minor accounting fixes.

So I don't think it's worth the effort. Let's just wait a few days and see
if it was success or not.

Folks, please report you findings to the list.

Jocke


** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-23 Thread Joakim Tjernlund

On Monday 21 October 2002 15:13, Joakim Tjernlund wrote:
 Hi

 Here is a patch to drop the expensive memcpy of received ethernet frames in
 interrupt context. I have not done any bench marking, but mounting a NFS
 rootfs feels faster.

 I am using a heavily modified enet.c in my system, but I think I got the
 patch correct.

 Also fixed a bug in set_multicast_list(), move the dmi list forward when
 walking it(dmi = dmi-next;)

 Comments? Anyone care to do some benchmarking?

No comments so far, no one interested in this?

A question, why do enet.c pass bad frames to netif_rx? Only late collisions
are disregarded, the rest is passed on.

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-23 Thread Ricardo Scop

On Wednesday 23 October 2002 05:51, Joakim Tjernlund wrote:
 On Monday 21 October 2002 15:13, Joakim Tjernlund wrote:
  Hi
 
  Here is a patch to drop the expensive memcpy of received ethernet frames
  in interrupt context. I have not done any bench marking, but mounting a
  NFS rootfs feels faster.
 
  I am using a heavily modified enet.c in my system, but I think I got the
  patch correct.
 
  Also fixed a bug in set_multicast_list(), move the dmi list forward when
  walking it(dmi = dmi-next;)
 
  Comments? Anyone care to do some benchmarking?

 No comments so far, no one interested in this?
I'm interested! Indeed, I adapted and tested your patch in a 8260 FCC fast
ethernet driver and it worked fine  I had a 20% increase in routing
throughput with the  patch installed!

The bug fix in set_multicast_list worked too. But, since there's a continue
clause inside the for command in this routine, I would suggest that the
(dmi=dmi-next) command go inside the for increment clause, instead of the
place you've put it; that is:
for (...;...; i++, dmi = dmi-next) {...}

 A question, why do enet.c pass bad frames to netif_rx? Only late collisions
 are disregarded, the rest is passed on.
IMHO, this is another bug...



** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-23 Thread Ricardo Scop

On Wednesday 23 October 2002 13:32, Ricardo Scop wrote:
 On Wednesday 23 October 2002 05:51, Joakim Tjernlund wrote:
[snip]

  No comments so far, no one interested in this?

 I'm interested! Indeed, I adapted and tested your patch in a 8260 FCC fast
 ethernet driver and it worked fine  I had a 20% increase in routing
 throughput with the  patch installed!

I forgot to mention the bad news. The increase mentioned above was obtained
with large packets (1500 bytes) traversing our board. With very small packets
(64 bytes), we  experimented a 10% DEcrease in PPS (packets per second).
We're still testing, but I guess the overall performance will be better with
the patch installed.

-Scop

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/





[PATCH] arch/ppc/8xx_io/enet.c

2002-10-21 Thread Joakim Tjernlund

Hi

Here is a patch to drop the expensive memcpy of received ethernet frames in
interrupt context. I have not done any bench marking, but mounting a NFS rootfs 
feels faster.

I am using a heavily modified enet.c in my system, but I think I got the patch 
correct.

Also fixed a bug in set_multicast_list(), move the dmi list forward when 
walking it(dmi = dmi-next;)

Comments? Anyone care to do some benchmarking?

Jocke

 --- arch/ppc/8xx_io/enet.c.org Mon Oct 21 14:35:59 2002
+++ arch/ppc/8xx_io/enet.c  Mon Oct 21 15:06:04 2002
@@ -96,18 +96,17 @@
  * We don't need to allocate pages for the transmitter.  We just use
  * the skbuffer directly.
  */
+#define CPM_ENET_RX_FRSIZE 1600   /* MUST be a multiple of cache line! */
+#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
+#error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
+#endif
+
 #ifdef CONFIG_ENET_BIG_BUFFERS
-#define CPM_ENET_RX_PAGES  32
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG  (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
+#define RX_RING_SIZE   64
 #define TX_RING_SIZE   64  /* Must be power of two */
 #define TX_RING_MOD_MASK   63  /*   for this to work */
 #else
-#define CPM_ENET_RX_PAGES  4
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG  (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE   (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
+#define RX_RING_SIZE   8
 #define TX_RING_SIZE   8   /* Must be power of two */
 #define TX_RING_MOD_MASK   7   /*   for this to work */
 #endif
@@ -143,7 +142,7 @@
/* Virtual addresses for the receive buffers because we can't
 * do a __va() on them anymore.
 */
-   unsigned char *rx_vaddr[RX_RING_SIZE];
+   void*rx_vaddr[RX_RING_SIZE];
struct  net_device_stats stats;
uinttx_full;
spinlock_t lock;
@@ -449,6 +448,7 @@
struct  scc_enet_private *cep;
volatile cbd_t  *bdp;
struct  sk_buff *skb;
+   struct  sk_buff *skb_tmp;
ushort  pkt_len;

cep = (struct scc_enet_private *)dev-priv;
@@ -497,25 +497,26 @@
pkt_len = bdp-cbd_datlen;
cep-stats.rx_bytes += pkt_len;

-   /* This does 16 byte alignment, much more than we need.
+   /* This does 16 byte alignment, exactly what we need.
 * The packet length includes FCS, but we don't want to
 * include that when passing upstream as it messes up
 * bridging applications.
 */
-   skb = dev_alloc_skb(pkt_len-4);
-
-   if (skb == NULL) {
+   skb_tmp = dev_alloc_skb(CPM_ENET_RX_FRSIZE);
+   if (skb_tmp == NULL) {
printk(%s: Memory squeeze, dropping packet.\n, 
dev-name);
cep-stats.rx_dropped++;
-   }
-   else {
+   } else {
+   skb = cep-rx_vaddr[bdp - cep-rx_bd_base];
skb-dev = dev;
skb_put(skb,pkt_len-4); /* Make room */
-   eth_copy_and_sum(skb,
-   cep-rx_vaddr[bdp - cep-rx_bd_base],
-   pkt_len-4, 0);
skb-protocol=eth_type_trans(skb,dev);
netif_rx(skb);
+
+   invalidate_dcache_range((unsigned long) skb_tmp-data,
+   (unsigned long) skb_tmp-data + 
CPM_ENET_RX_FRSIZE);
+   bdp-cbd_bufaddr = __pa(skb_tmp-data);
+   cep-rx_vaddr[bdp - cep-rx_bd_base] = skb_tmp;
}
}

@@ -631,6 +632,7 @@
/* this delay is necessary here -- Cort */
udelay(10);
while (cpmp-cp_cpcr  CPM_CR_FLG);
+   dmi = dmi-next;
}
}
}
@@ -647,8 +649,7 @@
struct net_device *dev;
struct scc_enet_private *cep;
int i, j, k;
-   unsigned char   *eap, *ba;
-   dma_addr_t  mem_addr;
+   unsigned char   *eap;
bd_t*bd;
volatilecbd_t   *bdp;
volatilecpm8xx_t*cp;
@@ -839,22 +840,15 @@

bdp = cep-rx_bd_base;
k = 0;
-   for (i=0; iCPM_ENET_RX_PAGES; i++) {
-
-   /* Allocate a page.
-   */
-   ba = (unsigned char *)consistent_alloc(GFP_KERNEL, PAGE_SIZE, 
mem_addr);
-
-   /* Initialize the BD for every fragment in the page.
-   */
-   for (j=0; jCPM_ENET_RX_FRPPG; j++) {
-   bdp-cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
-   bdp-cbd_bufaddr =