date:20070725

These patches deal with issues brought up by Gavin McCullagh
about reactions of Cubic and HTCP to hostile receivers that return
bogus timestamp options. If the receiver crafts a timestamp that is
larger than the original, then some of the congestion control algorithms
maybe come unfair.

The solution in these patches is to only use local values to measure
RTT for congestion control.  The timestamp is still used as described
in RFC's to measure RTT used for retransmit timer.

Thank you to Sangtae Ha for testing, these, see:
  http://netsrv.csc.ncsu.edu/net-2.6.22/stephen_lowres/
He also found some pre-existing problems with TCP-LP that might
be related to NAPI on the receiver.

This should go into 2.6.23. But not into the stable kernel
since the risk of causing regression is greater than the possible
risk exposure.

-- 

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/3] TCP: htcp - use measured rtt

Change HTCP to use measured RTT rather than smooth RTT.
Srtt is computed using the TCP receive timestamp
options, so it is vulnerable to hostile receivers. To avoid any problems
this might cause use the measured RTT instead.

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]

--- a/net/ipv4/tcp_htcp.c   2007-07-19 08:26:40.0 +0100
+++ b/net/ipv4/tcp_htcp.c   2007-07-19 08:28:07.0 +0100
@@ -76,12 +76,11 @@ static u32 htcp_cwnd_undo(struct sock *s
return max(tp-snd_cwnd, (tp-snd_ssthresh  7) / ca-beta);
 }
 
-static inline void measure_rtt(struct sock *sk)
+static inline void measure_rtt(struct sock *sk, u32 srtt)
 {
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
-   u32 srtt = tp-srtt  3;
 
/* keep track of minimum RTT seen so far, minRTT is zero at first */
if (ca-minRTT  srtt || !ca-minRTT)
@@ -108,6 +107,9 @@ static void measure_achieved_throughput(
if (icsk-icsk_ca_state == TCP_CA_Open)
ca-pkts_acked = pkts_acked;
 
+   if (rtt  0)
+   measure_rtt(sk, usecs_to_jiffies(rtt));
+
if (!use_bandwidth_switch)
return;
 
@@ -237,8 +239,6 @@ static void htcp_cong_avoid(struct sock 
if (tp-snd_cwnd = tp-snd_ssthresh)
tcp_slow_start(tp);
else {
-   measure_rtt(sk);
-
/* In dangerous area, increase slowly.
 * In theory this is tp-snd_cwnd += alpha / tp-snd_cwnd
 */

-- 

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/3] TCP: congestion control API pass RTT in microseconds

This patch changes the API for the callback that is done after an ACK is
received. It solves a couple of issues:

  * Some congestion controls want higher resolution value of RTT
(controlled by TCP_CONG_RTT_SAMPLE flag). These don't really want a ktime, 
but
all compute a RTT in microseconds.

  * Other congestion control methods could use RTT at jiffies resolution.

To keep API consistent the units should be the same for both cases, just the
resolution should change. 

A value -1 is used to indicate no valid timestamp is available.

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]

--- a/include/net/tcp.h 2007-07-23 10:29:42.0 +0100
+++ b/include/net/tcp.h 2007-07-23 10:33:50.0 +0100
@@ -660,7 +660,7 @@ struct tcp_congestion_ops {
/* new value of cwnd after loss (optional) */
u32  (*undo_cwnd)(struct sock *sk);
/* hook for packet ack accounting (optional) */
-   void (*pkts_acked)(struct sock *sk, u32 num_acked, ktime_t last);
+   void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us);
/* get info for inet_diag (optional) */
void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
 
--- a/net/ipv4/tcp_input.c  2007-07-23 10:29:42.0 +0100
+++ b/net/ipv4/tcp_input.c  2007-07-23 10:33:50.0 +0100
@@ -2490,12 +2490,23 @@ static int tcp_clean_rtx_queue(struct so
tcp_ack_update_rtt(sk, acked, seq_rtt);
tcp_ack_packets_out(sk);
 
-   /* Is the ACK triggering packet unambiguous? */
-   if (acked  FLAG_RETRANS_DATA_ACKED)
-   last_ackt = net_invalid_timestamp();
+   if (ca_ops-pkts_acked) {
+   s32 rtt_us = -1;
 
-   if (ca_ops-pkts_acked)
-   ca_ops-pkts_acked(sk, pkts_acked, last_ackt);
+   /* Is the ACK triggering packet unambiguous? */
+   if (!(acked  FLAG_RETRANS_DATA_ACKED)) {
+   /* High resolution needed and available? */
+   if (ca_ops-flags  TCP_CONG_RTT_STAMP 
+   !ktime_equal(last_ackt,
+net_invalid_timestamp()))
+   rtt_us = 
ktime_us_delta(ktime_get_real(),
+   last_ackt);
+   else if (seq_rtt  0)
+   rtt_us = jiffies_to_usecs(seq_rtt);
+   }
+
+   ca_ops-pkts_acked(sk, pkts_acked, rtt_us);
+   }
}
 
 #if FASTRETRANS_DEBUG  0
--- a/net/ipv4/tcp_bic.c2007-07-23 10:29:42.0 +0100
+++ b/net/ipv4/tcp_bic.c2007-07-23 10:33:50.0 +0100
@@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk
 /* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
-static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
+static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt)
 {
const struct inet_connection_sock *icsk = inet_csk(sk);
 
--- a/net/ipv4/tcp_cubic.c  2007-07-23 10:29:42.0 +0100
+++ b/net/ipv4/tcp_cubic.c  2007-07-23 10:33:50.0 +0100
@@ -334,7 +334,7 @@ static void bictcp_state(struct sock *sk
 /* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
-static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
+static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 {
const struct inet_connection_sock *icsk = inet_csk(sk);
 
--- a/net/ipv4/tcp_htcp.c   2007-07-23 10:29:42.0 +0100
+++ b/net/ipv4/tcp_htcp.c   2007-07-23 10:33:50.0 +0100
@@ -98,7 +98,7 @@ static inline void measure_rtt(struct so
}
 }
 
-static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, 
ktime_t last)
+static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 
rtt)
 {
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
--- a/net/ipv4/tcp_illinois.c   2007-07-23 10:29:42.0 +0100
+++ b/net/ipv4/tcp_illinois.c   2007-07-23 10:33:50.0 +0100
@@ -83,18 +83,16 @@ static void tcp_illinois_init(struct soc
 }
 
 /* Measure RTT for each ack. */
-static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
+static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, s32 rtt)
 {
struct illinois *ca = inet_csk_ca(sk);
-   u32 rtt;
 
ca-acked = pkts_acked;
 
-   if (ktime_equal(last, net_invalid_timestamp()))
+   /* dup ack, no rtt sample */
+   if (rtt  0)
return;
 
-   rtt = ktime_to_us(net_timedelta(last));
-
/* ignore bogus values, this prevents wraparound in alpha math */
if (rtt  RTT_MAX)

[PATCH 2/3] TCP: cubic - eliminate use of receive time stamp

Remove use of received timestamp option value from RTT calculation in Cubic.
A hostile receiver may be returning a larger timestamp option than the original
value. This would cause the sender to believe the malevolent receiver had
a larger RTT and because Cubic tries to provide some RTT friendliness, the
sender would then favor the liar.

Instead, use the jiffie resolutionRTT value already computed and
passed back after ack.

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]

--- a/net/ipv4/tcp_cubic.c  2007-07-23 10:33:50.0 +0100
+++ b/net/ipv4/tcp_cubic.c  2007-07-23 10:35:26.0 +0100
@@ -246,38 +246,12 @@ static inline void bictcp_update(struct 
ca-cnt = 1;
 }
 
-
-/* Keep track of minimum rtt */
-static inline void measure_delay(struct sock *sk)
-{
-   const struct tcp_sock *tp = tcp_sk(sk);
-   struct bictcp *ca = inet_csk_ca(sk);
-   u32 delay;
-
-   /* No time stamp */
-   if (!(tp-rx_opt.saw_tstamp  tp-rx_opt.rcv_tsecr) ||
-/* Discard delay samples right after fast recovery */
-   (s32)(tcp_time_stamp - ca-epoch_start)  HZ)
-   return;
-
-   delay = (tcp_time_stamp - tp-rx_opt.rcv_tsecr)3;
-   if (delay == 0)
-   delay = 1;
-
-   /* first time call or link delay decreases */
-   if (ca-delay_min == 0 || ca-delay_min  delay)
-   ca-delay_min = delay;
-}
-
 static void bictcp_cong_avoid(struct sock *sk, u32 ack,
  u32 in_flight, int data_acked)
 {
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
 
-   if (data_acked)
-   measure_delay(sk);
-
if (!tcp_is_cwnd_limited(sk, in_flight))
return;
 
@@ -337,14 +311,30 @@ static void bictcp_state(struct sock *sk
 static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 {
const struct inet_connection_sock *icsk = inet_csk(sk);
+   struct bictcp *ca = inet_csk_ca(sk);
+   u32 delay;
 
if (cnt  0  icsk-icsk_ca_state == TCP_CA_Open) {
-   struct bictcp *ca = inet_csk_ca(sk);
cnt -= ca-delayed_ack  ACK_RATIO_SHIFT;
ca-delayed_ack += cnt;
}
-}
 
+   /* Some calls are for duplicates without timetamps */
+   if (rtt_us  0)
+   return;
+
+   /* Discard delay samples right after fast recovery */
+   if ((s32)(tcp_time_stamp - ca-epoch_start)  HZ)
+   return;
+
+   delay = usecs_to_jiffies(rtt_us)  3;
+   if (delay == 0)
+   delay = 1;
+
+   /* first time call or link delay decreases */
+   if (ca-delay_min == 0 || ca-delay_min  delay)
+   ca-delay_min = delay;
+}
 
 static struct tcp_congestion_ops cubictcp = {
.init   = bictcp_init,

-- 

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[NET] IOC3: Switch hw checksumming to ethtool configurable.

2007-07-25 Thread Ralf Baechle

Signed-off-by: Ralf Baechle [EMAIL PROTECTED]

---

I've previously sent out this patch a long time ago.  At that time I was
told NETIF_F_IP_CSUM wouldn't make any sense without NETIF_F_SG.  IOC3's
S/G abilities are very limited; it can do upto three segments of which
the first one is upto 104 bytes and part of the packet's TX ring entry,
the second and 3rd ones can be anywhere in the 64-bit PCI address space
but may not cross a 16kB page boundary.  So setting NETIF_F_SG isn't
really an option unless the IOC3 was going to linearize any packet it
can't cope with itself.

So the big question, does NETIF_F_IP_CSUM without NETIF_F_SG make sense?

 drivers/net/Kconfig|   20 
 drivers/net/ioc3-eth.c |   48 
 2 files changed, 32 insertions(+), 36 deletions(-)

Index: linux-2.6/drivers/net/Kconfig
===
--- linux-2.6.orig/drivers/net/Kconfig
+++ linux-2.6/drivers/net/Kconfig
@@ -480,26 +480,6 @@ config SGI_IOC3_ETH
  the Ethernet-HOWTO, available from
  http://www.tldp.org/docs.html#howto.
 
-config SGI_IOC3_ETH_HW_RX_CSUM
-   bool Receive hardware checksums
-   depends on SGI_IOC3_ETH  INET
-   default y
-   help
- The SGI IOC3 network adapter supports TCP and UDP checksums in
- hardware to offload processing of these checksums from the CPU.  At
- the moment only acceleration of IPv4 is supported.  This option
- enables offloading for checksums on receive.  If unsure, say Y.
-
-config SGI_IOC3_ETH_HW_TX_CSUM
-   bool Transmit hardware checksums
-   depends on SGI_IOC3_ETH  INET
-   default y
-   help
- The SGI IOC3 network adapter supports TCP and UDP checksums in
- hardware to offload processing of these checksums from the CPU.  At
- the moment only acceleration of IPv4 is supported.  This option
- enables offloading for checksums on transmit.  If unsure, say Y.
-
 config MIPS_SIM_NET
tristate MIPS simulator Network device
depends on MIPS_SIM
Index: linux-2.6/drivers/net/ioc3-eth.c
===
--- linux-2.6.orig/drivers/net/ioc3-eth.c
+++ linux-2.6/drivers/net/ioc3-eth.c
@@ -5,7 +5,7 @@
  *
  * Driver for SGI's IOC3 based Ethernet cards as found in the PCI card.
  *
- * Copyright (C) 1999, 2000, 2001, 2003 Ralf Baechle
+ * Copyright (C) 1999, 2000, 01, 03, 06 Ralf Baechle
  * Copyright (C) 1995, 1999, 2000, 2001 by Silicon Graphics, Inc.
  *
  * References:
@@ -61,12 +61,7 @@
 #include asm/pgtable.h
 #include asm/uaccess.h
 #include asm/sn/types.h
-#include asm/sn/sn0/addrs.h
-#include asm/sn/sn0/hubni.h
-#include asm/sn/sn0/hubio.h
-#include asm/sn/klconfig.h
 #include asm/sn/ioc3.h
-#include asm/sn/sn0/ip27.h
 #include asm/pci/bridge.h
 
 /*
@@ -94,6 +89,9 @@ struct ioc3_private {
u32 emcr, ehar_h, ehar_l;
spinlock_t ioc3_lock;
struct mii_if_info mii;
+   unsigned long flags;
+#define IOC3_FLAG_RX_CHECKSUMS 1
+
struct pci_dev *pdev;
 
/* Members used by autonegotiation  */
@@ -520,8 +518,6 @@ static struct net_device_stats *ioc3_get
return ip-stats;
 }
 
-#ifdef CONFIG_SGI_IOC3_ETH_HW_RX_CSUM
-
 static void ioc3_tcpudp_checksum(struct sk_buff *skb, uint32_t hwsum, int len)
 {
struct ethhdr *eh = eth_hdr(skb);
@@ -589,7 +585,6 @@ static void ioc3_tcpudp_checksum(struct 
if (csum == 0x)
skb-ip_summed = CHECKSUM_UNNECESSARY;
 }
-#endif /* CONFIG_SGI_IOC3_ETH_HW_RX_CSUM */
 
 static inline void ioc3_rx(struct ioc3_private *ip)
 {
@@ -624,9 +619,9 @@ static inline void ioc3_rx(struct ioc3_p
goto next;
}
 
-#ifdef CONFIG_SGI_IOC3_ETH_HW_RX_CSUM
-   ioc3_tcpudp_checksum(skb, w0  ERXBUF_IPCKSUM_MASK,len);
-#endif
+   if (likely(ip-flags  IOC3_FLAG_RX_CHECKSUMS))
+   ioc3_tcpudp_checksum(skb,
+   w0  ERXBUF_IPCKSUM_MASK, len);
 
netif_rx(skb);
 
@@ -1298,9 +1293,7 @@ static int ioc3_probe(struct pci_dev *pd
dev-set_multicast_list = ioc3_set_multicast_list;
dev-set_mac_address= ioc3_set_mac_address;
dev-ethtool_ops= ioc3_ethtool_ops;
-#ifdef CONFIG_SGI_IOC3_ETH_HW_TX_CSUM
dev-features   = NETIF_F_IP_CSUM;
-#endif
 
sw_physid1 = ioc3_mdio_read(dev, ip-mii.phy_id, MII_PHYSID1);
sw_physid2 = ioc3_mdio_read(dev, ip-mii.phy_id, MII_PHYSID2);
@@ -1390,7 +1383,6 @@ static int ioc3_start_xmit(struct sk_buf
uint32_t w0 = 0;
int produce;
 
-#ifdef CONFIG_SGI_IOC3_ETH_HW_TX_CSUM
/*
 * IOC3 has a fairly simple minded checksumming hardware which simply
 * adds up the 1's complement checksum for the entire packet and
@@ -1438,7 +1430,6 @@ static

Re: modpost warning question

2007-07-25 Thread Sam Ravnborg

On Wed, Jul 25, 2007 at 06:08:03PM +0800, chengong wrote:
 On Wed, 2007-07-25 at 09:27 +0200, Sam Ravnborg wrote:
  On Wed, Jul 25, 2007 at 02:14:12AM -0500, Kumar Gala wrote:
   I'm seeing the following warning:
   
   WARNING: vmlinux.o(.init.text+0x1acdc): Section mismatch: reference to
   .exit.text:gfar_mdio_exit (between 'gfar_init' and 'gfar_mdio_init')
   
   I don't understand why its not ok to access .exit.text from .init.text
  
  Several architectures discards .exit.text in the final linker
  script (arch/$(ARCH)/kernel/vmlinux.lds.S
  
  So any references to .exit.text will when a module is build-in result
  in a linker error because ld will flag it as an error when we reference
  a symbol in a discarded section.
 But why? Just make kernel size smaller?
Yes - that the whole goal of init/exit sections.

  
  For the popular architectures (i386,x86_64) we discard .exit.text at
  runtime so here we do not see the error from ld (sadly).
 From which version? On my machine I have seen the same problem when
 building i386 target with the version 2.6.21.
modpost has started to warn about it. I assume you did not see link errors.

Sam
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC]: napi_struct V4

2007-07-25 Thread jamal

On Wed, 2007-25-07 at 01:31 -0700, David Miller wrote:
 We're getting there, slowly...
 
 1) netif_napi_init() is added, the workqueue/requeue stuff
as discussed is not needed so you won't see that here
..

 Another thing that's really apparent now is all the wacky
 napi-weight values various drivers use.  Just grep for
 netif_napi_init() in the patch or a patched tree to see what
 I mean.  So much of it doesn't make any sense and I'm tempted
 to just remove the argument and make everyone use 32 or 64
 or something like that :-)  Or, default to some value across
 the board, and let drivers override that on a case by case
 basis with a BIG FAT COMMENT above the override describing
 why the different value is being used and precisely what
 tests were performed to validate that different value.

Sounds reasonable.
32-64 for Gige seemed to work well as i recall. 10/100 was around 16.
But that shouldnt matter i think: because the poll from the core is
based on Varghese's DRR, it probably will be fine if you just gave all
the same value and the deficit part will kick in when needed. Some
testing maybe required but theoretically i dont see a problem. 
 
BTW: The current kernel code has a bug - where if a driver
forgot to set its weight it would be indeterminate, so even for this
reason it will be a good idea to enforce a default.

Sorry, havent been following the thread - thanks for CCing me, will try
to catch up at some point.

cheers,
jamal



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[-mm patch] one e1000 driver should be enough for everyone

2007-07-25 Thread Adrian Bunk

On Wed, Jul 25, 2007 at 04:03:04AM -0700, Andrew Morton wrote:
...
 Changes since 2.6.22-rc6-mm1:
...
  git-e1000new.patch
...
  git trees
...

Both e1000 drivers compiled into the kernel resulted in the following 
compile error:

--  snip  --

...
  LD  drivers/net/built-in.o
drivers/net/e1000/built-in.o: In function `e1000_read_mac_addr':
(.text+0xb9f2): multiple definition of `e1000_read_mac_addr'
drivers/net/e1000new/built-in.o:(.text+0x821a): first defined here
drivers/net/e1000/built-in.o: In function `e1000_phy_setup_autoneg':
(.text+0x8799): multiple definition of `e1000_phy_setup_autoneg'
drivers/net/e1000new/built-in.o:(.text+0xa9bd): first defined here
...
make[3]: *** [drivers/net/built-in.o] Error 1

--  snip  --

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]

---

BTW:
Unless I'm misunderstanding anything, the new driver should support a 
superset of what the old driver supported.
Therefore, it would be good if the final merge into Linus' tree will
do an
  rm -r drivers/net/e1000
  mv drivers/net/e1000new drivers/net/e1000

--- linux-2.6.23-rc1-mm1/drivers/net/Kconfig.old2007-07-25 
15:06:13.0 +0200
+++ linux-2.6.23-rc1-mm1/drivers/net/Kconfig2007-07-25 15:09:59.0 
+0200
@@ -2036,7 +2036,7 @@
 
 config E1000
tristate Intel(R) PRO/1000 Gigabit Ethernet support
-   depends on PCI
+   depends on PCI  E1000NEW=n
---help---
  This driver supports Intel(R) PRO/1000 gigabit ethernet family of
  adapters.  For more information on how to identify your adapter, go 

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: 2.6.20-2.6.21 - networking dies after random time

2007-07-25 Thread Jarek Poplawski

On Wed, Jul 25, 2007 at 02:19:31AM +0200, Thomas Gleixner wrote:
...
 Looking into the IO_APIC code, the resend via send_IPI_self() happens
 unconditionally. So the resend is done for level and edge interrupts.
 This makes the problem more mysterious.
 
 The code in question lib8390.c does
 
   disable_irq();
   fiddle_with_the_network_card_hardware()
   enable_irq();
...
 
 No idea how this affects the network card, as the code there must be
 able to handle interrupts, which are not originated from the card due to
 interrupt sharing.

I think, in this last yesterday's patch Ingo could be right, yet!
The comment at the beginnig points this is done like that because
of chip's slowness. And problems with timing are mysterious.

On the other hand author of this code didn't use spin_lock_irqsave
for some reason, probably after testing this option too. So, I hope
this is the right path, but alas, I'm not sure this patch has to
prove this 100%.

Anyway, in my opinion this situation where interrupts could/have_to
be used for such strange things should confirm the need of more
options for handling irqs individually.

Thanks,
Jarek P.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [-mm patch] one e1000 driver should be enough for everyone


Adrian Bunk wrote:

BTW:
Unless I'm misunderstanding anything, the new driver should support a 
superset of what the old driver supported.

Therefore, it would be good if the final merge into Linus' tree will
do an
  rm -r drivers/net/e1000
  mv drivers/net/e1000new drivers/net/e1000


Based on the most recent discussion, e1000new (or whatever it will be 
called) should support only the newer PCI-Express chips, while e1000 
will retain support for the older chips.


Over the long term this will allow e1000new to grow without affecting 
support for the older, stable chips.


So, e1000 is not going away.

Jeff


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/1] netxen: Load firmware during probe, dma watchdog fix.


Dhananjay Phadke wrote:

Jeff,

You committed old patch, which I had asked to ignore for two newer patches.

[PATCH 1/1] netxen: Load firmware during probe, dma watchdog fix.

is wrong patch that went in, instead please commit:

[PATCH 1/2] netxen: IMEZ multiport card 2nd port issue, dma watchdog fix
[PATCH 2/2] netxen: Fix interrupt handling for multiport adapters


Two responses:

1) I never received the two patches you mention.  It helps (though not 
required) to CC me, in addition to sending patches to netdev.


2) Kernel history is fixed in stone, once committed upstream.  Thus, you 
must regenerate your patches based on the fact that old-patch is now 
upstream.


Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: 2.6.20-2.6.21 - networking dies after random time

2007-07-25 Thread Alan Cox

  The code in question lib8390.c does
  
  disable_irq();
  fiddle_with_the_network_card_hardware()
  enable_irq();
 ...
  
  No idea how this affects the network card, as the code there must be
  able to handle interrupts, which are not originated from the card due to
  interrupt sharing.
 
 I think, in this last yesterday's patch Ingo could be right, yet!
 The comment at the beginnig points this is done like that because
 of chip's slowness. And problems with timing are mysterious.
 
 On the other hand author of this code didn't use spin_lock_irqsave
 for some reason, probably after testing this option too. So, I hope
 this is the right path, but alas, I'm not sure this patch has to
 prove this 100%.

The author (me) didn't use spin_lock_irqsave because the slowness of the
card means that approach caused horrible problems like losing serial data
at 38400 baud on some chips. Rememeber many 8390 nics on PCI were ISA
chips with FPGA front ends.

 Anyway, in my opinion this situation where interrupts could/have_to
 be used for such strange things should confirm the need of more
 options for handling irqs individually.

Ok the logic behind the 8390 is very simple:

Things to know
- IRQ delivery is asynchronous to the PCI bus
- Blocking the local CPU IRQ via spin locks was too slow
- The chip has register windows needing locking work

So the path was once (I say once as people appear to have changed it
in the mean time and it now looks rather bogus if the changes to use
disable_irq_nosync_irqsave are disabling the local IRQ)


Take the page lock
Mask the IRQ on chip
Disable the IRQ (but not mask locally- someone seems to have
broken this with the lock validator stuff)
[This must be _nosync as the page lock may otherwise
deadlock us]
Drop the page lock and turn IRQs back on

At this point an existing IRQ may still be running but we can't
get a new one

Take the lock (so we know the IRQ has terminated) but don't mask
the IRQs on the processor
Set irqlock [for debug]

Transmit (slow as )

re-enable the IRQ


We have to use disable_irq because otherwise you will get delayed
interrupts on the APIC bus deadlocking the transmit path.

Quite hairy but the chip simply wasn't designed for SMP and you can't
even ACK an interrupt without risking corrupting other parallel
activities on the chip.

Alan
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [-mm patch] one e1000 driver should be enough for everyone

2007-07-25 Thread Adrian Bunk

On Wed, Jul 25, 2007 at 09:48:55AM -0400, Jeff Garzik wrote:
 Adrian Bunk wrote:
 BTW:
 Unless I'm misunderstanding anything, the new driver should support a 
 superset of what the old driver supported.
 Therefore, it would be good if the final merge into Linus' tree will
 do an
   rm -r drivers/net/e1000
   mv drivers/net/e1000new drivers/net/e1000

 Based on the most recent discussion, e1000new (or whatever it will be 
 called) should support only the newer PCI-Express chips, while e1000 will 
 retain support for the older chips.

I found the discussion, and Christoph's e1000e sounds like the best name 
(new doesn't say whether it's a new driver for old hardware or a 
driver for new hardware).

 Over the long term this will allow e1000new to grow without affecting 
 support for the older, stable chips.

 So, e1000 is not going away.

No problem for me, but this obviously implies that global code in the 
new driver has to be renamed.

And please ensure that they will always support distinct PCI IDs, or 
there will be the following common pattern if both drivers support
a card:
- user tries driver A
- driver A doesn't work (although it should have worked)
- user tries driver B
- driver B works
- a later kernel removes support for this card from driver B
- user tries driver A
- driver A still doesn't work
- user writes bug report

Users should report bugs early instead of bouncing between different 
drivers.

   Jeff

cu
Adrian

-- 

   Is there not promise of rain? Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   Only a promise, Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RESEND 2/2] netxen: Fix interrupt handling for multiport adapters

2007-07-25 Thread dhananjay

This patch fixes masking of interrupts on multiport adapters. Also disables
interrupts upon ifdown interface. The wrong mask could result in interrupt
flood after interface is down.

Signed-off-by: Dhananjay Phadke [EMAIL PROTECTED]

Index: netdev-2.6/drivers/net/netxen/netxen_nic_main.c
===
--- netdev-2.6.orig/drivers/net/netxen/netxen_nic_main.c
+++ netdev-2.6/drivers/net/netxen/netxen_nic_main.c
@@ -930,6 +930,8 @@ static int netxen_nic_close(struct net_d
netif_carrier_off(netdev);
netif_stop_queue(netdev);
 
+   netxen_nic_disable_int(adapter);
+
cmd_buff = adapter-cmd_buf_arr;
for (i = 0; i  adapter-max_tx_desc_count; i++) {
buffrag = cmd_buff-frag_array;
@@ -1243,28 +1245,12 @@ static int
 netxen_handle_int(struct netxen_adapter *adapter, struct net_device *netdev)
 {
u32 ret = 0;
-   u32 our_int = 0;
 
DPRINTK(INFO, Entered handle ISR\n);
adapter-stats.ints++;
 
-   if (!(adapter-flags  NETXEN_NIC_MSI_ENABLED)) {
-   our_int = readl(NETXEN_CRB_NORMALIZE(adapter, CRB_INT_VECTOR));
-   /* not our interrupt */
-   if ((our_int  (0x80  adapter-portnum)) == 0)
-   return ret;
-   }
-
netxen_nic_disable_int(adapter);
 
-   if (adapter-intr_scheme == INTR_SCHEME_PERPORT) {
-   /* claim interrupt */
-   if (!(adapter-flags  NETXEN_NIC_MSI_ENABLED)) {
-   writel(our_int  ~((u32)(0x80  adapter-portnum)),
-   NETXEN_CRB_NORMALIZE(adapter, CRB_INT_VECTOR));
-   }
-   }
-
if (netxen_nic_rx_has_work(adapter) || netxen_nic_tx_has_work(adapter)) 
{
if (netif_rx_schedule_prep(netdev)) {
/*
@@ -1298,6 +1284,7 @@ irqreturn_t netxen_intr(int irq, void *d
 {
struct netxen_adapter *adapter;
struct net_device *netdev;
+   u32 our_int = 0;
 
if (unlikely(!irq)) {
return IRQ_NONE;/* Not our interrupt */
@@ -1305,7 +1292,22 @@ irqreturn_t netxen_intr(int irq, void *d
 
adapter = (struct netxen_adapter *)data;
netdev  = adapter-netdev;
-   /* process our status queue (for all 4 ports) */
+
+   if (!(adapter-flags  NETXEN_NIC_MSI_ENABLED)) {
+   our_int = readl(NETXEN_CRB_NORMALIZE(adapter, CRB_INT_VECTOR));
+   /* not our interrupt */
+   if ((our_int  (0x80  adapter-portnum)) == 0)
+   return IRQ_NONE;
+   }
+
+   if (adapter-intr_scheme == INTR_SCHEME_PERPORT) {
+   /* claim interrupt */
+   if (!(adapter-flags  NETXEN_NIC_MSI_ENABLED)) {
+   writel(our_int  ~((u32)(0x80  adapter-portnum)),
+   NETXEN_CRB_NORMALIZE(adapter, CRB_INT_VECTOR));
+   }
+   }
+
if (netif_running(netdev))
netxen_handle_int(adapter, netdev);
 

-- 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RESEND 0/2] netxen: bug fixes for IMEZ adapters on pblades

2007-07-25 Thread dhananjay

Resending the earlier patches, since the old patch got committed. 

 drivers/net/netxen/netxen_nic_main.c |   40 +
 1 files changed, 21 insertions(+), 19 deletions(-)

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RESEND 1/2] netxen: re-init station address after h/w init

2007-07-25 Thread dhananjay

This is a workaround for firmware bug with 2nd port of multiport adapter,
where MAC address is reset. Driver just needs to overwrite it with the
value read from PROM.

Signed-off-by: Dhananjay Phadke [EMAIL PROTECTED]

Index: netdev-2.6/drivers/net/netxen/netxen_nic_main.c
===
--- netdev-2.6.orig/drivers/net/netxen/netxen_nic_main.c
+++ netdev-2.6/drivers/net/netxen/netxen_nic_main.c
@@ -895,8 +895,6 @@ static int netxen_nic_open(struct net_de
 
/* Done here again so that even if phantom sw overwrote it,
 * we set it */
-   if (adapter-macaddr_set)
-   adapter-macaddr_set(adapter, netdev-dev_addr);
if (adapter-init_port
 adapter-init_port(adapter, adapter-portnum) != 0) {
del_timer_sync(adapter-watchdog_timer);
@@ -904,6 +902,8 @@ static int netxen_nic_open(struct net_de
netxen_nic_driver_name, adapter-portnum);
return -EIO;
}
+   if (adapter-macaddr_set)
+   adapter-macaddr_set(adapter, netdev-dev_addr);
 
netxen_nic_set_link_parameters(adapter);
 

-- 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH][v2] Netfilter Kconfig: Expose IPv4/6 connection tracking options by selecting NF_CONNTRACK_ENABLED

2007-07-25 Thread Al Boldi


Make NF_CONNTRACK_IPV4 and NF_CONNTRACK_IPV6 select NF_CONNTRACK_ENABLED.

This exposes IPv4/6 connection tracking options for easier Kconfig setup.

Signed-off-by: Al Boldi [EMAIL PROTECTED]
Cc: Patrick McHardy [EMAIL PROTECTED]
Cc: David Miller [EMAIL PROTECTED]
Cc: Sam Ravnborg [EMAIL PROTECTED]
Cc: Andrew Morton [EMAIL PROTECTED]
---
--- a/net/netfilter/Kconfig 2007-07-09 06:38:52.0 +0300
+++ b/net/netfilter/Kconfig 2007-07-25 17:37:16.0 +0300
@@ -28,6 +28,7 @@ config NETFILTER_NETLINK_LOG
 # Rename this to NF_CONNTRACK in a 2.6.25
 config NF_CONNTRACK_ENABLED
tristate Netfilter connection tracking support
+   select NF_CONNTRACK
help
  Connection tracking keeps a record of what packets have passed
  through your machine, in order to figure out how they are related
--- a/net/ipv4/netfilter/Kconfig2007-07-09 06:38:50.0 +0300
+++ b/net/ipv4/netfilter/Kconfig2007-07-25 17:37:39.0 +0300
@@ -7,7 +7,7 @@ menu IP: Netfilter Configuration
 
 config NF_CONNTRACK_IPV4
tristate IPv4 connection tracking support (required for NAT)
-   depends on NF_CONNTRACK
+   select NF_CONNTRACK_ENABLED
---help---
  Connection tracking keeps a record of what packets have passed
  through your machine, in order to figure out how they are related
--- a/net/ipv6/netfilter/Kconfig2007-07-09 06:38:51.0 +0300
+++ b/net/ipv6/netfilter/Kconfig2007-07-25 17:37:57.0 +0300
@@ -7,7 +7,8 @@ menu IPv6: Netfilter Configuration (EXP
 
 config NF_CONNTRACK_IPV6
tristate IPv6 connection tracking support (EXPERIMENTAL)
-   depends on INET  IPV6  EXPERIMENTAL  NF_CONNTRACK
+   depends on INET  IPV6  EXPERIMENTAL
+   select NF_CONNTRACK_ENABLED
---help---
  Connection tracking keeps a record of what packets have passed
  through your machine, in order to figure out how they are related

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [-mm patch] one e1000 driver should be enough for everyone


Adrian Bunk wrote:
I found the discussion, and Christoph's e1000e sounds like the best name 
(new doesn't say whether it's a new driver for old hardware or a 
driver for new hardware).


Yeah, I think e1000new is a lame name.

e1000e is good, or even e1001e if we wanted even more symmetry :)


No problem for me, but this obviously implies that global code in the 
new driver has to be renamed.


Yes.  A global namespace is a global namespace.


And please ensure that they will always support distinct PCI IDs, or 
there will be the following common pattern if both drivers support

a card:


IIRC I think Auke said there is some minor PCI ID overlap that must be 
addressed in the transition.  Disappointing and it raises transition 
issues, but that's the way the split falls out naturally AFAICS.


Jeff


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [-mm patch] one e1000 driver should be enough for everyone

2007-07-25 Thread Kok, Auke


Jeff Garzik wrote:

Adrian Bunk wrote:
I found the discussion, and Christoph's e1000e sounds like the best name 
(new doesn't say whether it's a new driver for old hardware or a 
driver for new hardware).


Yeah, I think e1000new is a lame name.


Moreover, Andrew should probably just drop this driver from -mm for now.


e1000e is good, or even e1001e if we wanted even more symmetry :)


I'm working on e1000e right now...

No problem for me, but this obviously implies that global code in the 
new driver has to be renamed.


Yes.  A global namespace is a global namespace.


yes, these are some of the kinks I still need to address. Allthough minor, it's 
going to take me some time to get it to the first step before I want to submit 
it (patience :))


And please ensure that they will always support distinct PCI IDs, or 
there will be the following common pattern if both drivers support

a card:


IIRC I think Auke said there is some minor PCI ID overlap that must be 
addressed in the transition.  Disappointing and it raises transition 
issues, but that's the way the split falls out naturally AFAICS.


I'll submit it with only ich9 id's at first, but it will be able to drive (sysfs 
bind) to some other devices too. This allows me to keep an eye out on the future 
structure that I want to give it without removing too much code that I might 
then later have to add back.


Auke
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RESEND 1/2] netxen: re-init station address after h/w init

Since we had a problem before, just wanted to let you know I received 
these two patches.


Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [-mm patch] one e1000 driver should be enough for everyone


Kok, Auke wrote:

I'm working on e1000e right now...


Cool :)


I'll submit it with only ich9 id's at first, but it will be able to 
drive (sysfs bind) to some other devices too. This allows me to keep an 
eye out on the future structure that I want to give it without removing 
too much code that I might then later have to add back.


Sounds good to me...

Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Cbe-oss-dev] [PATCH 0/10] ps3: fixes for ps3_gelic driver

2007-07-25 Thread Joel Schopp

Since gelic and spidernet are 95% identical it would be a good idea to cc the 
spidernet maintainer Linas on these patches (ccd on this reply).


Masakazu Mokuno wrote:

This patchset are fixes and updates incorporating the comments from Jeff
Garzik and Stephen Hemminger.
Please accept the following patches for the ps3_gelic driver.  Thanks.

 [1] ps3: fix wrong calculation of rx descriptor address
 [2] ps3: some minor cleanups
 [3] ps3: tx descriptor handling cleanup
 [4] ps3: removed defines no longer used
 [5] ps3: removed conditional ethtool support
 [6] ps3: use net_device_stats of net_device structure
 [7] ps3: use ethX as the name of irq
 [8] ps3: removed calling netif_poll_enable() in open()
 [9] ps3: fix rare issue that reenabling rx DMA fails
[10] ps3: reduce allocation size of rx skb buffers


--
Masakazu MOKUNO

___
cbe-oss-dev mailing list
[EMAIL PROTECTED]
https://ozlabs.org/mailman/listinfo/cbe-oss-dev



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: Tc filtering: broken 802_3 classifier?

2007-07-25 Thread Waskiewicz Jr, Peter P


 The protocol match is on skb-protocol, so it case of 
 ethernet its on the ethernet protocol, which is ETH_P_IP or 
 ip for IPv4.

I see that in the code, but the reason I started worrying was when I
added the 802_3 classifier on 8 flows, it would shove all traffic into
flowid 1:1, no matter if it matched or not.

I'll keep investigating and see if I can narrow down what I'm seeing.

Thanks Patrick,
-PJ
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Tc filtering: broken 802_3 classifier?

Waskiewicz Jr, Peter P wrote:
The protocol match is on skb-protocol, so it case of 
ethernet its on the ethernet protocol, which is ETH_P_IP or 
ip for IPv4.
 
 
 I see that in the code, but the reason I started worrying was when I
 added the 802_3 classifier on 8 flows, it would shove all traffic into
 flowid 1:1, no matter if it matched or not.
 
 I'll keep investigating and see if I can narrow down what I'm seeing.


I'm not sure what you're expecting. skb-protocol is usually not set
to ETH_P_802_3, which is why the filter is not matching.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: Tc filtering: broken 802_3 classifier?

2007-07-25 Thread Waskiewicz Jr, Peter P

 Waskiewicz Jr, Peter P wrote:
 The protocol match is on skb-protocol, so it case of 
 ethernet its on 
 the ethernet protocol, which is ETH_P_IP or ip for IPv4.
  
  
  I see that in the code, but the reason I started worrying was when I
  added the 802_3 classifier on 8 flows, it would shove all 
 traffic into
  flowid 1:1, no matter if it matched or not.
  
  I'll keep investigating and see if I can narrow down what 
 I'm seeing.
 
 
 I'm not sure what you're expecting. skb-protocol is usually not set
 to ETH_P_802_3, which is why the filter is not matching.

I understand that.  I had two issues, which you cleared up one by
reminding me that the protocol matches on skb-protocol before it tries
to run the -classify() routine.  The other issue I am seeing is with 8
bands, an 802_3 filter is affecting classification of IP traffic.  For
example, I have an 802_3 filter to look for dst MAC address, but an ssh
packet, which without any filters should go into flowid 1:3 on my
system, is getting pushed into flowid 1:1.  I remove the 802_3 filter,
and ssh traffic starts going back into 1:3.  No other filters on the
system.  That's the main issue I'm seeing, so I'll keep investigating to
see what's going on.

-PJ
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/2] [POWERPC] Add support of platforms without PHY to gianfar driver

2007-07-25 Thread Vitaly Bordug


Gianfar driver is now able to work without real phy subnode,
that is necessary to cope with fixed-link situation, when
SoC is connected to the Ethernet inteface or embedded switch 
without any PHY. In this case, fixed-speed property will
describe such a situation for gianfar driver.

The property is in form duplexity speed

Signed-off-by: Vitaly Bordug [EMAIL PROTECTED]

---

 arch/powerpc/sysdev/fsl_soc.c |   39 +++
 drivers/net/gianfar.c |   17 ++---
 2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index cad1757..6864534 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -255,29 +255,36 @@ static int __init gfar_of_init(void)
FSL_GIANFAR_DEV_HAS_EXTENDED_HASH;
 
ph = of_get_property(np, phy-handle, NULL);
-   phy = of_find_node_by_phandle(*ph);
+   if (ph == NULL) {
+   unsigned int *bus_id;
 
-   if (phy == NULL) {
-   ret = -ENODEV;
-   goto unreg;
-   }
+   bus_id = of_get_property(np, fixed_speed,NULL);
+   gfar_data.bus_id = (bus_id[0]16) | bus_id[1];
+   } else {
+   phy = of_find_node_by_phandle(*ph);
 
-   mdio = of_get_parent(phy);
+   if (phy == NULL) {
+   ret = -ENODEV;
+   goto unreg;
+   }
+
+   mdio = of_get_parent(phy);
+
+   id = of_get_property(phy, reg, NULL);
+   ret = of_address_to_resource(mdio, 0, res);
+   if (ret) {
+   of_node_put(phy);
+   of_node_put(mdio);
+   goto unreg;
+   }
+
+   gfar_data.phy_id = *id;
+   gfar_data.bus_id = res.start;
 
-   id = of_get_property(phy, reg, NULL);
-   ret = of_address_to_resource(mdio, 0, res);
-   if (ret) {
of_node_put(phy);
of_node_put(mdio);
-   goto unreg;
}
 
-   gfar_data.phy_id = *id;
-   gfar_data.bus_id = res.start;
-
-   of_node_put(phy);
-   of_node_put(mdio);
-
ret =
platform_device_add_data(gfar_dev, gfar_data,
 sizeof(struct
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 1b854bf..cf08ced 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -93,6 +93,7 @@
 #include linux/crc32.h
 #include linux/mii.h
 #include linux/phy.h
+#include linux/phy_fixed.h
 
 #include gianfar.h
 #include gianfar_mii.h
@@ -445,11 +446,21 @@ static int init_phy(struct net_device *dev)
priv-oldspeed = 0;
priv-oldduplex = -1;
 
-   snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, priv-einfo-bus_id, 
priv-einfo-phy_id);
-
interface = gfar_get_interface(dev);
 
-   phydev = phy_connect(dev, phy_id, adjust_link, 0, interface);
+   if (priv-einfo-phy_id) {
+   snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, priv-einfo-bus_id, 
priv-einfo-phy_id);
+   phydev = phy_connect(dev, phy_id, adjust_link, 0, interface);
+   } else {
+   struct fixed_info *phyinfo;
+   int phy_addr = (priv-einfo-bus_id  16);
+   
+   phyinfo = fixed_mdio_get_phydev(phy_addr-1);
+   phydev = phyinfo-phydev;
+   snprintf(phydev-dev.bus_id, BUS_ID_SIZE, PHY_ID_FMT,
+   (priv-einfo-bus_id  0x) , phy_addr);
+   
memset(phyinfo-regs,0xff,sizeof(phyinfo-regs[0])*phyinfo-regs_num);
+   }
 
if (IS_ERR(phydev)) {
printk(KERN_ERR %s: Could not attach to PHY\n, dev-name);

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/2] [POWERPC] Remove dummy network phy from MPC8313E-RDB

2007-07-25 Thread Vitaly Bordug


Cleaned up inexistent network phy from the target dts, added
necessary property to gianfar node there.

Signed-off-by: Vitaly Bordug [EMAIL PROTECTED]

---

 arch/powerpc/boot/dts/mpc8313erdb.dts |8 +---
 1 files changed, 1 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/boot/dts/mpc8313erdb.dts 
b/arch/powerpc/boot/dts/mpc8313erdb.dts
index a1533cc..b602a8b 100644
--- a/arch/powerpc/boot/dts/mpc8313erdb.dts
+++ b/arch/powerpc/boot/dts/mpc8313erdb.dts
@@ -98,12 +98,6 @@
reg = 24520 20;
#address-cells = 1;
#size-cells = 0;
-   phy1: [EMAIL PROTECTED] {
-   interrupt-parent =  ipic ;
-   interrupts = 13 8;
-   reg = 1;
-   device_type = ethernet-phy;
-   };
phy4: [EMAIL PROTECTED] {
interrupt-parent =  ipic ;
interrupts = 14 8;
@@ -120,7 +114,7 @@
local-mac-address = [ 00 00 00 00 00 00 ];
interrupts = 25 8 24 8 23 8;
interrupt-parent =  ipic ;
-   phy-handle =  phy1 ;
+   fixed_speed = 1 1000;
};
 
[EMAIL PROTECTED] {

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC 0/1] lro: Generic Large Receive Offload for TCP traffic

2007-07-25 Thread Andrew Gallatin


Hi,

I've ported myri10ge to use the new LRO interface.  I have attached a
preliminary patch to myri10ge.  I'm very pleased to note that the
performance is on-par with my own LRO used by our out-of-tree driver.
(except when using mixed MTUS, see performance data below).

As I expected, actually porting our driver to use the LRO interface
gave me a far better understanding of the interface, and allowed for
better feedback.  I have attached a patch to the LRO code which
addresses some of the issues I mention below.

Please find below a performance summary, as well as my comments
on the LRO code, and patches to myri10ge and inet_lro. Both patches
are Signed-off-by: Andrew J. Gallatin [EMAIL PROTECTED]


Cheers,

Drew

===
Performance:
===

Here is a performance summary taken on my very low-end 2.0GHz AMD
Athlon(tm) 64 X2 Dual Core Processor 3800+ running 2.6.23-rc1 and
receiving a netperf TCP_SENDFILE test from an identical sender (which
was running 2.6.22 and our 1.3.1 out of tree driver).  The netserver
process was bound to a different core than the interrupt handler.  The
data reported is from the median of 5 60 second netperf tests.  The
following settings were in /etc/sysctl.conf on both machines:

net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_rmem = 4096 87380 16777216
net.ipv4.tcp_wmem = 4096 65536 16777216
net.core.netdev_max_backlog = 2500
net.ipv4.tcp_timestamps = 0


RX Performance for Sender MTU=1500, Receiver MTU=1500 expressed as
x Gb/s, y %CPU receiver utilization:

rxbuf(1) 1.3.1(2)  inet_lro   no LRO
----   ---
4K pg8.9 78%   8.8 77%  3.7 89%
8K pg9.2 77%   9.1 77%  3.7 89%
16Kpg9.4 73%   9.4 73%  3.8 89%
32Kpg9.4 72%   9.4 72%  3.9 89%
skb  N/A N/A   5.5 90%  4.1 92%

RX Performance for Sender MTU=1500, Receiver MTU=9000 expressed as
x Gb/s, y %CPU receiver utilization:

rxbuf(1) 1.3.1(2)  inet_lro   no LRO
----   ---
4K pg8.9 78%   7.3 79%  3.7 89%
8K pg9.2 77%   7.6 79%  3.7 89%
16Kpg9.4 73%   8.0 78%  3.8 89%
32Kpg9.4 72%   8.2 79%  3.9 89%
skb  N/A N/A   4.9 92%  4.1 92%

RX Performance for Sender MTU=9000, Receiver MTU=9000 expressed as
x Gb/s, y %CPU receiver utilization:

rxbuf(1) 1.3.1(2)  inet_lro   no LRO
----   ---
4K pg9.9 63%   9.6 66%  8.3 71%
8K pg9.9 60%   9.9 63%  8.4 72%
16Kpg9.9 55%   9.9 55%  8.7 70%
32Kpg9.9 53%   9.9 53%  8.9 67%
skb  N/A N/A   9.9 68%  8.7 72%

(1) xK pg means the driver was configured to adjust the receive page
size using MYRI10GE_ALLOC_ORDER.  skb means an internal variant
of our driver which receives into skbs rather than pages was used.

(2) 1.3.1 is our latest out of tree driver which uses the myri10ge
specific frags-based LRO code previously submitted and rejected.

===
Code review / comments:
===

1) Checksum information for CHECKSUM_COMPLETE drivers.

Our NIC passes partial checksums to our driver.  In the current code,
it seems impossible for page based CHECKSUM_COMPLETE drivers to behave
correctly in the case of rejected frames.  Eg, there is no way
to pass the partial checksum to the LRO module so that it gets
set in the skb header and passed up the stack.

Further, there seems to be no (easy) way to use CHECKSUM_COMPLETE
on an aggregated packet at LRO flush time.  By the time a packet
is aggregated, the partial checksum from the first segment is
out of date.

I think it would make sense to require that a CHECKSUM_COMPLETE style
driver verify the checksum in its get_frag_header / get_skb_header
callback.  This allows the LRO code to unconditionally set
CHECKSUM_UNNECESSARY.

The attached a patch modifies the code to do this.


2) Non-accelerated VLAN tags

Our firmware currently does not do vlan tag insertion
and removal.  This causes a problem in __lro_proc_segment()
where the tcp and ip headers are setup to point into the
newly created skb.  A frame containing an unstripped vlan
tag will cause the headers to be garbage.

The attached patch modifies __lro_proc_segment() to offset
those pointers by VLAN_HLEN when required.

3) Padded frames.

I may be missing something, but I don't see where you
either strip padding from frames or reject padded frames.
(see the pskb_trim_rcsum() in net/ipv4/ip_input.c:ip_rcv()

I did not add such a feature as I was confused about the intended
use of len/true_size.

Also, trimming is a pain when dealing with pure frags (without a
containing skb).  We have code in our out-of-kernel driver to deal
with it which you are welcome to use.


4) LRO_MIN_PG_HLEN (== 80)

This confuses me.  Can you please explain what you're trying to do?
Because of this, I kept getting crashes in the skb_pull() done by
eth_type_trans() because I was passing segments which were 60 bytes
and the skb-data_len of the skb constructed by lro_gen_skb()

2.6.23-rc1-mm1: net/ipv4/fib_trie.c compile error

2007-07-25 Thread Adrian Bunk

On Wed, Jul 25, 2007 at 04:03:04AM -0700, Andrew Morton wrote:
...
 Changes since 2.6.22-rc6-mm1:
...
 +immunize-rcu_dereference-against-crazy-compiler-writers.patch
...
  Misc new patches
...

This patch causes the following compile error:

--  snip  --

...
  CC  net/ipv4/fib_trie.o
/home/bunk/linux/kernel-2.6/linux-2.6.23-rc1-mm1/net/ipv4/fib_trie.c: In 
function ‘trie_rebalance’:
/home/bunk/linux/kernel-2.6/linux-2.6.23-rc1-mm1/net/ipv4/fib_trie.c:969: 
error: lvalue required as unary ‘’ operand
/home/bunk/linux/kernel-2.6/linux-2.6.23-rc1-mm1/net/ipv4/fib_trie.c:971: 
error: lvalue required as unary ‘’ operand
/home/bunk/linux/kernel-2.6/linux-2.6.23-rc1-mm1/net/ipv4/fib_trie.c:977: 
error: lvalue required as unary ‘’ operand
/home/bunk/linux/kernel-2.6/linux-2.6.23-rc1-mm1/net/ipv4/fib_trie.c:980: 
error: lvalue required as unary ‘’ operand
...
make[3]: *** [net/ipv4/fib_trie.o] Error 1

--  snip  --


cu
Adrian

-- 

   Is there not promise of rain? Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   Only a promise, Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: net/9p/mux.c: use-after-free

2007-07-25 Thread Latchesar Ionkov


Yep, it's a leak.

Thanks,
   Lucho

On 7/25/07, Eric Van Hensbergen [EMAIL PROTECTED] wrote:

On 7/22/07, Adrian Bunk [EMAIL PROTECTED] wrote:
 The Coverity checker spotted the following use-after-free
 in net/9p/mux.c:

 --  snip  --

 ...
 struct p9_conn *p9_conn_create(struct p9_transport *trans, int msize,
 unsigned char *extended)
 {
 ...
 if (!m-tagpool) {
 kfree(m);
 return ERR_PTR(PTR_ERR(m-tagpool));
 }
 ...

 --  snip  --


I've got a fix for this one:
if (!m-tagpool) {
mtmp = ERR_PTR(PTR_ERR(m-tagpool));
kfree(m);
return mtmp;
}

but I was wondering about one of the other returns further down the function:

...
memset(m-poll_waddr, 0, sizeof(m-poll_waddr));
m-poll_task = NULL;
n = p9_mux_poll_start(m);
if (n)
return ERR_PTR(n);

n = trans-poll(trans, m-pt);
...

lucho: doesn't that constitute a leak?  Shouldn't we be doing:

if (n) {
kfree(m);
return ERR_PTR(n);
}

 -eric


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [-mm patch] one e1000 driver should be enough for everyone

2007-07-25 Thread Andrew Morton

On Wed, 25 Jul 2007 08:21:10 -0700
Kok, Auke [EMAIL PROTECTED] wrote:

 Moreover, Andrew should probably just drop this driver from -mm for now.

gone..
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] [POWERPC] Add support of platforms without PHY to gianfar driver


I'll let paulus and linuxppc merge this one (or not)...

Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Proposed interface for per-packet mesh-ttl

2007-07-25 Thread Dan Williams

On Tue, 2007-07-03 at 12:29 -0700, Javier Cardona wrote:
 David Woodhouse suggested that this list is a more appropriate forum
 for my message...

Attached is Javier's proposed patch for this.  Please flame away.

Dan

---
Resent per Dan's request.

Support for using setsockpt() to change the mesh-ttl on a network flow, i.e.

snip
sock = socket (PF_INET, SOCK_STREAM, 0);
setsockopt(sock, SOL_IP, MESH_SO_SET_TTL, ttl, optlen);
ttl = 0;
getsockopt(sock, SOL_IP, MESH_SO_GET_TTL, ttl, optlen);
/snip

Signed-off-by: Javier Cardona [EMAIL PROTECTED]
---
 drivers/net/wireless/Kconfig  |7 +
 drivers/net/wireless/libertas/Makefile|1 +
 drivers/net/wireless/libertas/decl.h  |3 +
 drivers/net/wireless/libertas/hostcmd.h   |6 +
 drivers/net/wireless/libertas/mesh_opts.c |  174 +
 drivers/net/wireless/libertas/mesh_opts.h |5 +
 drivers/net/wireless/libertas/tx.c|   46 +++-
 include/linux/in.h|3 +
 8 files changed, 241 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index 1146f3d..f4123c3 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -294,6 +294,13 @@ config LIBERTAS_USB
---help---
  A driver for Marvell Libertas 8388 USB devices.
 
+config LIBERTAS_MESH_OPTS
+   tristate Mesh Configuration Options for Libertas USB 802.11b/g cards
+   depends on LIBERTAS_USB  NETFILTER
+   ---help---
+ This module enables the configuration of mesh parameters on a
+ per-socket basis, via setsockopt() calls. 
+
 config LIBERTAS_DEBUG
bool Enable full debugging output in the Libertas module.
depends on LIBERTAS
diff --git a/drivers/net/wireless/libertas/Makefile 
b/drivers/net/wireless/libertas/Makefile
index 71c5a25..a31d4f7 100644
--- a/drivers/net/wireless/libertas/Makefile
+++ b/drivers/net/wireless/libertas/Makefile
@@ -18,3 +18,4 @@ usb8xxx-objs += if_usb.o
 
 obj-$(CONFIG_LIBERTAS) += libertas.o
 obj-$(CONFIG_LIBERTAS_USB) += usb8xxx.o
+obj-m += mesh_opts.o
diff --git a/drivers/net/wireless/libertas/decl.h 
b/drivers/net/wireless/libertas/decl.h
index 4d553da..2cbc137 100644
--- a/drivers/net/wireless/libertas/decl.h
+++ b/drivers/net/wireless/libertas/decl.h
@@ -14,6 +14,7 @@
 struct wlan_private;
 struct sk_buff;
 struct net_device;
+struct mesh_options;
 
 extern char *libertas_fw_name;
 
@@ -86,6 +87,8 @@ int libertas_activate_card(wlan_private *priv, char *fw_name);
 int libertas_remove_card(wlan_private *priv);
 int libertas_add_mesh(wlan_private *priv, struct device *dev);
 void libertas_remove_mesh(wlan_private *priv);
+int libertas_register_mesh_opts(struct mesh_options *);
+int libertas_unregister_mesh_opts(struct mesh_options *);
 
 
 #endif /* _WLAN_DECL_H_ */
diff --git a/drivers/net/wireless/libertas/hostcmd.h 
b/drivers/net/wireless/libertas/hostcmd.h
index 0f67cba..bc86ed0 100644
--- a/drivers/net/wireless/libertas/hostcmd.h
+++ b/drivers/net/wireless/libertas/hostcmd.h
@@ -34,6 +34,12 @@ struct txpd {
u8 reserved1;
 };
 
+struct txpd_mesh {
+   __le16 reserved;
+   /* mesh ttl */
+   u8 ttl;
+} __attribute__ ((packed));
+
 /* RxPD Descriptor */
 struct rxpd {
/* Current Rx packet status */
diff --git a/drivers/net/wireless/libertas/mesh_opts.c 
b/drivers/net/wireless/libertas/mesh_opts.c
new file mode 100644
index 000..118eaed
--- /dev/null
+++ b/drivers/net/wireless/libertas/mesh_opts.c
@@ -0,0 +1,174 @@
+/*
+ * mesh_opts
+ *
+ * Author: Javier Cardona [EMAIL PROTECTED]
+ * Copyright: Marvell Semiconductors Inc., 2007
+ *
+ * Apply mesh-layer specific configuration to network flows.  Currently this
+ * only supports the mesh TTL parameter.
+ *
+ * Users call setsockopt on sockets to configure mesh parameters.  This module
+ * maintains a list of sockets (mesh_sks) that have different mesh parameters
+ * than the per-interface defaults.  The driver will modify the mesh
+ * configuration for each outgoing frame that belongs to one of the sockets in
+ * the mesh_sks list.
+ */
+
+#include linux/module.h
+#include linux/list.h
+#include linux/net.h
+#include linux/in.h
+#include linux/netfilter.h
+#include linux/netfilter_ipv4.h
+#include linux/netfilter_ipv6.h
+#include linux/spinlock.h
+#include net/sock.h
+
+#include asm/uaccess.h
+
+#include mesh_opts.h
+
+#define MESH_SO_BASE_CTL   MESH_SO_SET_TTL
+
+static struct list_head mesh_sks = LIST_HEAD_INIT(mesh_sks);
+static DEFINE_RWLOCK(mesh_sks_lock);
+
+struct mesh_sock {
+   struct list_head list;
+
+   struct sock *sk;
+   unsigned char ttl;
+   void (*orig_sk_destruct) (struct sock *sk);
+};
+
+static struct mesh_sock * lookup_socket(struct sock *sk)
+{
+   struct mesh_sock *mesh_sk;
+   struct mesh_sock *found_sk = NULL;
+
+   read_lock(mesh_sks_lock);

Re: atl1 driver corrupting memory?

2007-07-25 Thread Chris Snook


Chuck Ebbert wrote:

I have a report of random errors when using the atl1 driver
with kernel 2.6.22.1. Could that be a problem fixed by the
recent changes to DMA setup in 2.6.23-rc?


I hope so.  As far as we can tell the driver and the NIC itself are doing the 
right thing, and the pci layer or chipset is screwing up the 64-bit DMA.  This 
only manifests when physical memory addresses cross the 4 GB boundary, and as 
far as I'm aware atl1 is only used on desktop boards, so we don't have a lot of 
testers.  If someone wants to buy me and Jay more RAM so we can test it 
ourselves, I guess we wouldn't object :)


I favor disabling 64-bit DMA in atl1 until Atheros can track this down in the 
lab.  If we don't get confirmation that this bug is fixed by the DMA changes, I 
think we should revert to 32-bit DMA for 2.6.23.  Limiting ourselves to 32-bit 
DMA on desktop systems is a lot less bad than allowing arbitrary memory corruption.


-- Chris
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Tc filtering: broken 802_3 classifier?

Waskiewicz Jr, Peter P wrote:
In case of prio, if your manually installed filters don't 
match, it will fall back to the skb-priority based 
classification, which is based on tos and is probably 
responsible for what you're seeing. Feel free to investigate, 
but you could save us all some time by simply posting what 
you're doing, what you're expecting and what is actually 
happening, there's probably a good explanation.
 
 
 I thought I did that before, but I probably wasn't clear.  I'll try
 again (and if I'm still not clear, please pop me in the head).  I am
 aware that skb-priority is used if no filter matches, and that is
 derived from tos (and gets set in ipsockglue).
 
 This is my setup.  8 bands with prio, with a priomap that is nice and
 simple:
 
 # tc qdisc add dev eth0 root handle 1: prio bands 8 priomap 0 0 1 1 2 2
 3 3 4 4 5 5 6 6 7 7
 
 With this configuration, ICMP will default to flowid 1:1 (band 0), and
 ssh will default to flowid 1:4 (band 3) based on TOS.  I add this filter
 (802_3) and all traffic starts flowing into flowid 1:1 (including ssh),
 even though it should never match:
 
 # tc filter add dev eth0 protocol 802_3 parent 1: prio 2 u32 match u32
 0x0800 0x at 12 flowid 1:6
 
 As soon as I remove the filter:
 
 # tc filter del dev eth0 protocol 802_3 prio 2
 
 ssh flows back into flowid 1:4.  No filters of protocol ip were added,
 only the 802.3 filter.
 
 I hope this is more clear as to what I'm seeing.


It is .. now let me think about the good explanation, it doesn't
make sense at first :)

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: Tc filtering: broken 802_3 classifier?

2007-07-25 Thread Waskiewicz Jr, Peter P

 In case of prio, if your manually installed filters don't 
 match, it will fall back to the skb-priority based 
 classification, which is based on tos and is probably 
 responsible for what you're seeing. Feel free to investigate, 
 but you could save us all some time by simply posting what 
 you're doing, what you're expecting and what is actually 
 happening, there's probably a good explanation.

I thought I did that before, but I probably wasn't clear.  I'll try
again (and if I'm still not clear, please pop me in the head).  I am
aware that skb-priority is used if no filter matches, and that is
derived from tos (and gets set in ipsockglue).

This is my setup.  8 bands with prio, with a priomap that is nice and
simple:

# tc qdisc add dev eth0 root handle 1: prio bands 8 priomap 0 0 1 1 2 2
3 3 4 4 5 5 6 6 7 7

With this configuration, ICMP will default to flowid 1:1 (band 0), and
ssh will default to flowid 1:4 (band 3) based on TOS.  I add this filter
(802_3) and all traffic starts flowing into flowid 1:1 (including ssh),
even though it should never match:

# tc filter add dev eth0 protocol 802_3 parent 1: prio 2 u32 match u32
0x0800 0x at 12 flowid 1:6

As soon as I remove the filter:

# tc filter del dev eth0 protocol 802_3 prio 2

ssh flows back into flowid 1:4.  No filters of protocol ip were added,
only the 802.3 filter.

I hope this is more clear as to what I'm seeing.

Thanks,
-PJ
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: atl1 driver corrupting memory?

2007-07-25 Thread Chris Snook

Chuck Ebbert wrote:

On 07/25/2007 05:22 PM, Chris Snook wrote:

Chuck Ebbert wrote:

I have a report of random errors when using the atl1 driver
with kernel 2.6.22.1. Could that be a problem fixed by the
recent changes to DMA setup in 2.6.23-rc?

I hope so. As far as we can tell the driver and the NIC itself are
doing the right thing, and the pci layer or chipset is screwing up the
64-bit DMA. This only manifests when physical memory addresses cross
the 4 GB boundary, and as far as I'm aware atl1 is only used on desktop
boards, so we don't have a lot of testers. If someone wants to buy me
and Jay more RAM so we can test it ourselves, I guess we wouldn't object :)

Our reporter has 8GB of memory in an x86_64 machine.

https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=249511

I favor disabling 64-bit DMA in atl1 until Atheros can track this down
in the lab. If we don't get confirmation that this bug is fixed by the
DMA changes, I think we should revert to 32-bit DMA for 2.6.23.
Limiting ourselves to 32-bit DMA on desktop systems is a lot less bad

than allowing arbitrary memory corruption.

This is what was committed.

http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=3f516c00d416bd39aab6cfb348b68919e295fe23
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=ef76e3e2505db01f7d4b537854f4a177220c26c8

Oh, I thought you were referring to a problem reproduced *after* those changes,
to be fixed by some generic DMA setup patch. Has anyone reproduced the problem
after those changes?

CCing atl1-devel to see if we can get some more testing...

-- Chris
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: [RFC 0/1] lro: Generic Large Receive Offload for TCP traffic

From: Andrew Gallatin [EMAIL PROTECTED]
Date: Wed, 25 Jul 2007 13:17:54 -0400

 I've ported myri10ge to use the new LRO interface.  I have attached a
 preliminary patch to myri10ge.  I'm very pleased to note that the
 performance is on-par with my own LRO used by our out-of-tree driver.
 (except when using mixed MTUS, see performance data below).

Thanks for posting this port and feedback on the generic LRO
code.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH]: 2nd revision of make xfrm_audit_log more generic

From: Joy Latten [EMAIL PROTECTED]
Date: Wed, 25 Jul 2007 14:21:43 -0500

 This is 2nd revision of patch to modify xfrm_audit_log() such
 that it can accomodate auditing other ipsec events
 besides add/delete of an SA or SPD entry.

 2nd revision includes new define for all IPsec
 events in audit.h and introduces op= entry
 in logfile as well as add a hyphen in description
 for report parsing. 

 This is a small change to accomodate updating
 ipsec protocol to RFCs 4301, 4302 and 4303 which
 require auditing some ipsec events if auditing
 is available. Please let me know if ok.

 Signed-off-by: Joy Latten [EMAIL PROTECTED]

I like very much how the implementation of xfrm_audit_log() got
simplified.

But _TEN_ function call arguments, good grief!

That's at least twice as many as most cpus can pass in registers.

Let's try an alternative where you have specialized
xfrm_audit_log_foo() routines that take a user policy pointer, or
whatever the main object is.

If internally this just unpacks the needed bits and calls some
do_xfrm_audit_log() thing inside of the auditing code that takes lots
of arguments, that's fine, but let's not expand all of that argument
setup code in the main IPSEC code paths.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Tc filtering: broken 802_3 classifier?

From: Patrick McHardy [EMAIL PROTECTED]
Date: Thu, 26 Jul 2007 01:58:54 +0200

 This patch should fix it, but other qdiscs might need similar
 fixes I believe. I'll look into that tommorrow.

Thanks for figuring this out Patrick, let me know when you
have a final version to apply.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2.6.22 1/3]S2io: Mask spurious interrupts

- Mask single and double bit ETQ ecc errors to inhibit spurious interrupts.

(Resending; Removed HTML sections in patch)

Signed-off-by: Santosh Rastapur [EMAIL PROTECTED]
---
diff -Nurp 2.0.24.1/drivers/net/s2io-regs.h 2.0.24.1P1/drivers/net/s2io-regs.h
--- 2.0.24.1/drivers/net/s2io-regs.h2007-07-20 16:13:29.0 -0700
+++ 2.0.24.1P1/drivers/net/s2io-regs.h  2007-07-20 16:18:27.0 -0700
@@ -747,10 +747,9 @@ struct XENA_dev_config {
 #define MC_ERR_REG_MIRI_CRI_ERR_1  BIT(23)
 #define MC_ERR_REG_SM_ERR  BIT(31)
 #define MC_ERR_REG_ECC_ALL_SNG(BIT(2) | BIT(3) | BIT(4) | BIT(5) |\
-   BIT(6) | BIT(7) | BIT(17) | BIT(19))
+   BIT(17) | BIT(19))
 #define MC_ERR_REG_ECC_ALL_DBL(BIT(10) | BIT(11) | BIT(12) |\
-   BIT(13) | BIT(14) | BIT(15) |\
-   BIT(18) | BIT(20))
+   BIT(13) | BIT(18) | BIT(20))
u64 mc_err_mask;
u64 mc_err_alarm;
 

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] IPv6: ipv6_addr_type() doesn't know about RFC4193 addresses

2007-07-25 Thread Dave Johnson

David Miller writes:
 Contrarily, there may be ipv6_addr_type() call sites that really
 do want to reject rfc4193 addresses.

A quick look through the callers and only these functions should be
effected, they check either RESERVED or UNICAST from ipv6_addr_type():

net/ipv6/addrconf.c:ipv6_dev_get_saddr()
net/ipv6/exthdrs.c: ipv6_dest_hao()   
net/ipv6/ip6_tunnel.c:  ip6_tnl_set_cap()
net/ipv6/netfilter/ip6t_REJECT.c:   send_reset()
net/ipv6/route.c:   ip6_route_add()
net/ipv6/route.c:   ip6_pkt_drop()
net/sctp/ipv6.c:sctp_v6_available()
net/sctp/ipv6.c:sctp_v6_addr_valid()

-- 
Dave Johnson
Starent Networks

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] IPv6: ipv6_addr_type() doesn't know about RFC4193 addresses

2007-07-25 Thread Dave Johnson


ipv6_addr_type() doesn't check for 'Unique Local IPv6 Unicast
Addresses' (RFC4193) and returns IPV6_ADDR_RESERVED for that range.

SCTP uses this function and will fail bind() and connect() calls that
use RFC4193 addresses, SCTP will also ignore inbound connections from
RFC4193 addresses if listening on IPV6_ADDR_ANY.

There may be other users of ipv6_addr_type() that could also have
problems.

Signed-off-by: Dave Johnson [EMAIL PROTECTED]
Cc: Srinivas Akkipeddi [EMAIL PROTECTED]

= net/ipv6/addrconf_core.c 1.2 vs edited =
--- 1.2/net/ipv6/addrconf_core.c2007-02-26 14:42:57 -05:00
+++ edited/net/ipv6/addrconf_core.c 2007-07-25 15:21:41 -04:00
@@ -50,6 +50,9 @@
if ((st  htonl(0xFFC0)) == htonl(0xFEC0))
return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST |
IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL));   
/* addr-select 3.1 */
+   if ((st  htonl(0xFE00)) == htonl(0xFC00))
+   return (IPV6_ADDR_UNICAST |
+   IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL));  
/* RFC 4193 */
 
if ((addr-s6_addr32[0] | addr-s6_addr32[1]) == 0) {
if (addr-s6_addr32[2] == 0) {

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC]: napi_struct V4

From: Jeff Garzik [EMAIL PROTECTED]
Date: Wed, 25 Jul 2007 21:55:08 -0400

 I don't see any logic to your request, only added overhead for no reason.

There may be some flawed logic in what Stephen stated, but
the change really is needed.

It must be atomic to execute the:

enable_interrupts();
netif_rx_complete();

sequence wrt. the same code path in the interrupt handler.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2.6.22 1/3]S2io: Mask spurious interrupts

- Mask single and double bit ETQ ecc errors to inhibit spurious interrupts.

(Resending; Removed HTML sections in the patch)

Signed-off-by: Santosh Rastapur [EMAIL PROTECTED]
---
diff -Nurp 2.0.24.1/drivers/net/s2io-regs.h 2.0.24.1P1/drivers/net/s2io-regs.h
--- 2.0.24.1/drivers/net/s2io-regs.h2007-07-20 16:13:29.0 -0700
+++ 2.0.24.1P1/drivers/net/s2io-regs.h  2007-07-20 16:18:27.0 -0700
@@ -747,10 +747,9 @@ struct XENA_dev_config {
 #define MC_ERR_REG_MIRI_CRI_ERR_1  BIT(23)
 #define MC_ERR_REG_SM_ERR  BIT(31)
 #define MC_ERR_REG_ECC_ALL_SNG(BIT(2) | BIT(3) | BIT(4) | BIT(5) |\
-   BIT(6) | BIT(7) | BIT(17) | BIT(19))
+   BIT(17) | BIT(19))
 #define MC_ERR_REG_ECC_ALL_DBL(BIT(10) | BIT(11) | BIT(12) |\
-   BIT(13) | BIT(14) | BIT(15) |\
-   BIT(18) | BIT(20))
+   BIT(13) | BIT(18) | BIT(20))
u64 mc_err_mask;
u64 mc_err_alarm;
 



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][v2] Netfilter Kconfig: Expose IPv4/6 connection tracking options by selecting NF_CONNTRACK_ENABLED

[Removed a few CCs]

Al Boldi wrote:
 Make NF_CONNTRACK_IPV4 and NF_CONNTRACK_IPV6 select NF_CONNTRACK_ENABLED.


One thought that occured to me after the last of many false bugreports
that were actually caused by failure to configure the new options
properly. Most people know they want NF_CONNTRACK (and its selected by
default with old configs), what they're missing is that they now also
need to select IPv4 connection tracking. So what would really make sense
is to make NF_CONNTRACK_IPV4 default to m (and really *everyone*
using conntrack wants this). But with your proposed change this would
default to selecting NF_CONNTRACK by default, which I'm not so sure
is a good idea. So I'm leaning towards just using m as default for
IPv4 conntrack to save people trouble and myself some bugreports, but
I also like your simplification ...

Maybe we can do something to have the NF_CONNTRACK_ENABLED option select
NF_CONNTRACK_IPV4 (which really is what we actually want) and combine
that with automatic selection of NF_CONNTRACK? I believe the only case
with negative impact would be people that currently use only IPv6
connection tracking, which is most likely nobody.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC]: napi_struct V4


Stephen Hemminger wrote:

The usage of NAPI on 8139cp and 8139too seems dodgy;
these drivers expect this to work:

local_irq_save(flags);
cpw16_f(IntrMask, cp_intr_mask);
__netif_rx_complete(dev);
local_irq_restore(flags);

It works on SMP only because if poll races with IRQ, 
the IRQ is not masked or cleared so the IRQ will get restarted.


Expect?  This _does_ work on both UP and SMP.



Better would be to change it to:
spin_lock_irqsave(cp-lock, flags);
cpw16_f(IntrMask, cp_intr_mask);
__netif_rx_complete(dev);
spin_unlock_irqrestore(cp-lock, flags);

Which actually is same code on UP.


I'm missing your point?

You claim local_irq_save() works only on SMP, then suggest a solution 
that you agree produces the same result?


I don't see any logic to your request, only added overhead for no reason.

Jeff


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: net/9p/mux.c: use-after-free

2007-07-25 Thread Eric Van Hensbergen


On 7/25/07, Latchesar Ionkov [EMAIL PROTECTED] wrote:

Yep, it's a leak.



Okay, I'll roll that into the patch as well.

  -eric
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][v2] Netfilter Kconfig: Expose IPv4/6 connection tracking options by selecting NF_CONNTRACK_ENABLED

2007-07-25 Thread Yasuyuki KOZAKAI

From: Patrick McHardy [EMAIL PROTECTED]
Date: Thu, 26 Jul 2007 02:46:05 +0200

 [Removed a few CCs]

 Al Boldi wrote:
  Make NF_CONNTRACK_IPV4 and NF_CONNTRACK_IPV6 select NF_CONNTRACK_ENABLED.

 One thought that occured to me after the last of many false bugreports
 that were actually caused by failure to configure the new options
 properly. Most people know they want NF_CONNTRACK (and its selected by
 default with old configs), what they're missing is that they now also
 need to select IPv4 connection tracking. So what would really make sense
 is to make NF_CONNTRACK_IPV4 default to m (and really *everyone*
 using conntrack wants this). But with your proposed change this would
 default to selecting NF_CONNTRACK by default, which I'm not so sure
 is a good idea. So I'm leaning towards just using m as default for
 IPv4 conntrack to save people trouble and myself some bugreports, but
 I also like your simplification ...

 Maybe we can do something to have the NF_CONNTRACK_ENABLED option select
 NF_CONNTRACK_IPV4 (which really is what we actually want) and combine
 that with automatic selection of NF_CONNTRACK? I believe the only case
 with negative impact would be people that currently use only IPv6
 connection tracking, which is most likely nobody.

I agree. I've not heard trouble with NF_CONNTRACK_IPV6. I think that is
because it is purely new feature.

BTW, it's too late to restore IP_NF_CONNTRACK in stable and current tree
for a while ?

-- Yasuyuki Kozakai
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC]: napi_struct V4

David Miller wrote:

From: Jeff Garzik [EMAIL PROTECTED]
Date: Wed, 25 Jul 2007 21:55:08 -0400

I don't see any logic to your request, only added overhead for no reason.

There may be some flawed logic in what Stephen stated, but
the change really is needed.

It must be atomic to execute the:

enable_interrupts();
netif_rx_complete();

sequence wrt. the same code path in the interrupt handler.

Sure.  And how did the existing code fail to achieve that?

Jeff

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC]: napi_struct V4

From: Jeff Garzik [EMAIL PROTECTED]
Date: Wed, 25 Jul 2007 22:00:31 -0400

 David Miller wrote:
  From: Jeff Garzik [EMAIL PROTECTED]
  Date: Wed, 25 Jul 2007 21:55:08 -0400

  I don't see any logic to your request, only added overhead for no reason.

  There may be some flawed logic in what Stephen stated, but
  the change really is needed.

  It must be atomic to execute the:

  enable_interrupts();
  netif_rx_complete();

  sequence wrt. the same code path in the interrupt handler.

 Sure.  And how did the existing code fail to achieve that?

The interrupt handler can run on another cpu in betwen those two
statements, running the NAPI test-and-do-something operations in
parallel with the netif_rx_complete() which causes problems as Rusty
and I discussed yesterday.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Tc filtering: broken 802_3 classifier?

Patrick McHardy wrote:
 Waskiewicz Jr, Peter P wrote:
 
This is my setup.  8 bands with prio, with a priomap that is nice and
simple:

# tc qdisc add dev eth0 root handle 1: prio bands 8 priomap 0 0 1 1 2 2
3 3 4 4 5 5 6 6 7 7

With this configuration, ICMP will default to flowid 1:1 (band 0), and
ssh will default to flowid 1:4 (band 3) based on TOS.  I add this filter
(802_3) and all traffic starts flowing into flowid 1:1 (including ssh),
even though it should never match:

# tc filter add dev eth0 protocol 802_3 parent 1: prio 2 u32 match u32
0x0800 0x at 12 flowid 1:6

As soon as I remove the filter:

# tc filter del dev eth0 protocol 802_3 prio 2

ssh flows back into flowid 1:4.  No filters of protocol ip were added,
only the 802.3 filter.

I hope this is more clear as to what I'm seeing.

 
 It is .. now let me think about the good explanation, it doesn't
 make sense at first :)


First of all - good catch :) This really is a bug, and one that
has existed for quite some time. Whats happening is that
tc_classify returns -1 because no filter matches, but this
is not caught in the switch statement and the !q-filter_list
condition is false. So band is set to the uninitialized value
of res.classid, and the band = q-bands checks catches this
as invalid and uses 0. The sad thing is that this is one of
the typical constructs gcc falsely warns about for primitive
types, in this case it doesn't care. Anyway, what should
really be happening in this case is that skb-priority is
used, as without any filters.

This patch should fix it, but other qdiscs might need similar
fixes I believe. I'll look into that tommorrow.

diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2d8c084..f37dd8c 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -38,22 +38,21 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int 
*qerr)
struct prio_sched_data *q = qdisc_priv(sch);
u32 band = skb-priority;
struct tcf_result res;
+   int err;
 
*qerr = NET_XMIT_BYPASS;
if (TC_H_MAJ(skb-priority) != sch-handle) {
+   err = tc_classify(skb, q-filter_list, res);
 #ifdef CONFIG_NET_CLS_ACT
-   switch (tc_classify(skb, q-filter_list, res)) {
+   switch (err) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
*qerr = NET_XMIT_SUCCESS;
case TC_ACT_SHOT:
return NULL;
}
-
-   if (!q-filter_list ) {
-#else
-   if (!q-filter_list || tc_classify(skb, q-filter_list, res)) {
 #endif
+   if (!q-filter_list || err  0) {
if (TC_H_MAJ(band))
band = 0;
band = q-prio2band[bandTC_PRIO_MAX];

Re: [PATCH][v2] Netfilter Kconfig: Expose IPv4/6 connection tracking options by selecting NF_CONNTRACK_ENABLED

2007-07-25 Thread Al Boldi

Patrick McHardy wrote:
 Al Boldi wrote:
  Make NF_CONNTRACK_IPV4 and NF_CONNTRACK_IPV6 select
  NF_CONNTRACK_ENABLED.

 One thought that occured to me after the last of many false bugreports
 that were actually caused by failure to configure the new options
 properly. Most people know they want NF_CONNTRACK (and its selected by
 default with old configs), what they're missing is that they now also
 need to select IPv4 connection tracking. So what would really make sense
 is to make NF_CONNTRACK_IPV4 default to m (and really *everyone*
 using conntrack wants this). But with your proposed change this would
 default to selecting NF_CONNTRACK by default, which I'm not so sure
 is a good idea.

Making NF_CONNTRACK_IPV4 default to m would select NF_CONNTRACK to m if 
it hasn't been selected by the user to be y, which seems reasonable.

 So I'm leaning towards just using m as default for
 IPv4 conntrack to save people trouble and myself some bugreports, but
 I also like your simplification ...

I was also planning to submit another patch to make all netfilter 
childoptions options default to their parent, i.e: NF_CONNTRACK_FTP would 
default NF_CONNTRACK.  This could be one big Kconfig time-saver.

 Maybe we can do something to have the NF_CONNTRACK_ENABLED option select
 NF_CONNTRACK_IPV4 (which really is what we actually want) and combine
 that with automatic selection of NF_CONNTRACK? I believe the only case
 with negative impact would be people that currently use only IPv6
 connection tracking, which is most likely nobody.

I think that wouldn't be advisable, as this would add an unnecessary 
dependency.  But of course,  it's your call...


Thanks!

--
Al

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [DRIVER SUBMISSION] DRBD wants to go mainline

2007-07-25 Thread Kyle Moffett


On Jul 25, 2007, at 22:03:37, [EMAIL PROTECTED] wrote:

On Wed, 25 Jul 2007, Satyam Sharma wrote:

On 7/25/07, Lars Ellenberg [EMAIL PROTECTED] wrote:

On Wed, Jul 25, 2007 at 04:41:53AM +0530, Satyam Sharma wrote:

[...]
But where does the send come into the picture over here -- a  
send won't block forever, so I don't foresee any issues  
whatsoever w.r.t.  kthreads conversion for that. [ BTW I hope  
you're *not* using any signals-based interface for your kernel  
thread _at all_. Kthreads disallow (ignore) all signals by  
default, as they should, and you really shouldn't need to write  
any logic to handle or   do-certain-things-on-seeing a signal  
in a well designed kernel thread. ] and the sending latency is  
crucial to performance, while the recv will not timeout for the  
next few seconds.  Again, I don't see what sending latency has  
to do with a kernel_thread to kthread conversion. Or with  
signals, for that matter. Anyway, as Kyle Moffett mentioned  
elsewhere, you could probably look at other examples (say  
cifs_demultiplexer_thread() in fs/cifs/connect.c).


the basic problem, and what we use signals for, is:  it is  
waiting in recv, waiting for the peer to say something.  but I  
want it to stop recv, and go send something right now.


That's ... weird. Most (all?) communication between any two  
parties would follow a protocol where someone recv's stuff, does  
something with it, and sends it back ... what would you send  
right now if you didn't receive anything?


becouse even though you didn't receive anything you now have  
something important to send.


remember that both sides can be sitting in receive mode. this puts  
them both in a position to respond to the other if the other has  
something to say.


Why not just have 2 threads, one for sending and one for  
receiving.  When your receiving thread gets data it takes  
appropriate locks and processes it, then releases the locks and goes  
back to waiting for packets.  Your sending thread would take  
appropriate locks, generate data to send, release locks, and transmit  
packets.  You don't have to interrupt the receive thread to send  
packets, so where's the latency problem, exactly?


If I were writing that in userspace I would have:

(A) The pool of IO-generating threads (IE: What would ordinarily be  
userspace)

(B) One or a small number of data-reception threads.
(C) One or a small number of data-transmission threads.

When you get packets to process in your network-reception thread(s),  
you queue appropriate disk IOs and any appropriate responses with  
your transmission thread(s).  You can basically just sit in a loop on  
tcp_recvmsg=demultiplex=do-stuff.  When your IO-generators actually  
make stuff to send you queue such data for disk IO, then packetize it  
and hand it off to your data-transmission threads.


If you made all your sockets and inter-thread pipes nonblocking then  
in userspace you would just epoll_wait() on the sockets and pipes and  
be easily able to react to any IO from anywhere.


In kernel space there are similar nonblocking interfaces, although it  
would probably be easier just to use a couple threads.


Cheers,
Kyle Moffett

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: atl1 driver corrupting memory?

2007-07-25 Thread Chuck Ebbert

On 07/25/2007 05:22 PM, Chris Snook wrote:
Chuck Ebbert wrote:
I have a report of random errors when using the atl1 driver
with kernel 2.6.22.1. Could that be a problem fixed by the
recent changes to DMA setup in 2.6.23-rc?

Our reporter has 8GB of memory in an x86_64 machine.

https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=249511

This is what was committed.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: atl1 driver corrupting memory?

2007-07-25 Thread Jay Cliburn

On Wed, 25 Jul 2007 17:31:02 -0400
Chuck Ebbert [EMAIL PROTECTED] wrote:

 This is what was committed.
 
 http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=3f516c00d416bd39aab6cfb348b68919e295fe23
 http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=ef76e3e2505db01f7d4b537854f4a177220c26c8

I'm doubtful these patches will fix the highmem corruption problem
we've seen in the L1.  I actually extracted the changes in the
referenced commits from the vendor's current out-of-tree driver, and
unfortunately he was able to duplicate the problem in his lab using
that driver.

As a workaround, Chuck, your reporter can boot with mem=3900 until the
problem is resolved.

I go on record with Chris:  we should apply the patch at
http://lkml.org/lkml/2007/6/25/293 until we get to the bottom of it.
The patch is in Jeff's queue, but I think he suspects a driver bug and
so far hasn't chosen to apply the patch.

Jeff, can we ask you to please reconsider?

Jay
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 4/4 -rev1] Initialize and fill IPv6 route age

The age field of the ipv6 route structures are initilized with the current 
timeval at the time of route
creation. When the route dump is called the route age value stored in the 
structure is subtracted from the
present timeval and the difference is passed on as the route age. The dumpflg 
clarity is added as per
suggestion.

Signed-off-by: Varun Chandramohan [EMAIL PROTECTED]
---
 include/net/ip6_fib.h   |1 +
 include/net/ip6_route.h |3 +++
 net/ipv6/addrconf.c |5 +
 net/ipv6/route.c|   23 +++
 4 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index c48ea87..e30a1cf 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -98,6 +98,7 @@ struct rt6_info

u32 rt6i_flags;
u32 rt6i_metric;
+   time_t  rt6i_age;
atomic_trt6i_ref;
struct fib6_table   *rt6i_table;
 
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 5456fdd..fc9716c 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -36,6 +36,9 @@ struct route_info {
 #define RT6_LOOKUP_F_REACHABLE 0x2
 #define RT6_LOOKUP_F_HAS_SADDR 0x4
 
+#define RT6_SET_ROUTE_INFO 0x0
+#define RT6_GET_ROUTE_INFO 0x1
+
 extern struct rt6_info ip6_null_entry;
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5a5f8bd..715c766 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4187,6 +4187,7 @@ EXPORT_SYMBOL(unregister_inet6addr_notif
 
 int __init addrconf_init(void)
 {
+   struct timeval tv;
int err = 0;
 
/* The addrconf netdev notifier requires that loopback_dev
@@ -4214,10 +4215,14 @@ int __init addrconf_init(void)
if (err)
return err;
 
+   do_gettimeofday(tv);
ip6_null_entry.rt6i_idev = in6_dev_get(loopback_dev);
+   ip6_null_entry.rt6i_age = timeval_to_sec(tv);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
ip6_prohibit_entry.rt6i_idev = in6_dev_get(loopback_dev);
+   ip6_prohibit_entry.rt6i_age = timeval_to_sec(tv);
ip6_blk_hole_entry.rt6i_idev = in6_dev_get(loopback_dev);
+   ip6_blk_hole_entry.rt6i_age = timeval_to_sec(tv);
 #endif
 
register_netdevice_notifier(ipv6_dev_notf);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fe8d983..686566f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -600,7 +600,14 @@ static int __ip6_ins_rt(struct rt6_info
 {
int err;
struct fib6_table *table;
+   struct timeval tv;
 
+   do_gettimeofday(tv);
+   /* Update the timeval for new routes
+* We add it here to make it common irrespective
+* of how the new route is added.
+*/
+   rt-rt6i_age = timeval_to_sec(tv);
table = rt-rt6i_table;
write_lock_bh(table-tb6_lock);
err = fib6_add(table-tb6_root, rt, info);
@@ -2111,6 +2118,7 @@ static inline size_t rt6_nlmsg_size(void
   + nla_total_size(4) /* RTA_IIF */
   + nla_total_size(4) /* RTA_OIF */
   + nla_total_size(4) /* RTA_PRIORITY */
+  + nla_total_size(4) /*RTA_AGE*/
   + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
   + nla_total_size(sizeof(struct rta_cacheinfo));
 }
@@ -2118,10 +2126,11 @@ static inline size_t rt6_nlmsg_size(void
 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 struct in6_addr *dst, struct in6_addr *src,
 int iif, int type, u32 pid, u32 seq,
-int prefix, unsigned int flags)
+int prefix, unsigned int flags, int dumpflg)
 {
struct rtmsg *rtm;
struct nlmsghdr *nlh;
+   struct timeval tv;
long expires;
u32 table;
 
@@ -2185,6 +2194,12 @@ static int rt6_fill_node(struct sk_buff
if (ipv6_get_saddr(rt-u.dst, dst, saddr_buf) == 0)
NLA_PUT(skb, RTA_PREFSRC, 16, saddr_buf);
}
+   
+   do_gettimeofday(tv);
+   if (dumpflg == RT6_GET_ROUTE_INFO)
+   NLA_PUT_U32(skb, RTA_AGE, timeval_to_sec(tv) - rt-rt6i_age);
+   else
+   NLA_PUT_U32(skb, RTA_AGE, rt-rt6i_age);
 
if (rtnetlink_put_metrics(skb, rt-u.dst.metrics)  0)
goto nla_put_failure;
@@ -,7 +2237,7 @@ int rt6_dump_route(struct rt6_info *rt,
 
return rt6_fill_node(arg-skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
 NETLINK_CB(arg-cb-skb).pid, arg-cb-nlh-nlmsg_seq,
-prefix, NLM_F_MULTI);
+prefix, NLM_F_MULTI, RT6_GET_ROUTE_INFO);
 }
 
 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, 
void *arg)
@@ -2287,7 +2302,7 @@ static int inet6_rtm_getroute(struct sk_
 
err = rt6_fill_node(skb, rt, fl.fl6_dst,

[PATCH 0/4 -rev1] Age Entry For IPv4 IPv6 Route Table

Hi,
   This is the rev 1 of the patch set i sent out earlier. A few changes 
suggested were added. 

Orginial Message:
   According to the RFC 4292 (IP Forwarding Table MIB) there is a need for an 
age entry for all the routes in the routing table. The entry in the RFC is 
inetCidrRouteAge and oid is inetCidrRouteAge.1.10.
Many snmp application require this age entry. So iam adding the age field in 
the routing table for ipv4 and ipv6 and providing the interface for this value 
netlink.

Signed-off-by: Varun Chandramohan [EMAIL PROTECTED]
---

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC]: napi_struct V4

From: David Miller [EMAIL PROTECTED]
Date: Wed, 25 Jul 2007 01:31:54 -0700 (PDT)

 Besides that the only major issue is netpoll and I have some
 ideas on how to handle that, which I'll try to implement
 tonight and tomorrow.

Ok, here is how I'm trying to crack this nut.

The way I'll solve this is to just change the problem space
and therefore simplify things enough that it's actually sane
and implementable.

None of the receive packet handling of netpoll is even needed,
so we rip all of it out.  Every use was out of tree, and now
subsumed by another facility.  Basically that was kdump, which
is now done via kexec.

As a result there is no more messing around with fake NAPI polls and
all that other crap, instead -poll_controller() merely has to try to
process TX queue acks to free up space and wake up the transmit
queue(s) of the device.

The downside is that TX ack processing has to run with the
netif_tx_lock() for this to work out.  Basically, netpoll carefully
grabs the TX lock first by disabling interrupts and using
netif_tx_trylock(), if successful it does dev-poll_controller() and
then tries to transmit the packet.

Non-NAPI driver should be able to continue functioning as-is,
they just run their interrupt handler which doesn't schedule
polls and will do the TX queue acking.

NAPI drivers do need to change their -poll_controller() to do TX
queue acking directly.

I wanted to do this in a way that didn't require touching non-NAPI
drivers as this change is getting big enough as it is and the
change is incomplete if it makes netconsole totally broken in the
process.

Anyways, here are snippets of patch from my current tree that should
show generally what is going on, with bnx2 and tg3 as two converted
NAPI -poll_controler() examples.  I'm about to boot this up and play
with netconsole to test out that it works properly.

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index a729da0..5187aaf 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -428,7 +428,7 @@ bnx2_netif_stop(struct bnx2 *bp)
 {
bnx2_disable_int_sync(bp);
if (netif_running(bp-dev)) {
-   netif_poll_disable(bp-dev);
+   napi_disable(bp-napi);
netif_tx_disable(bp-dev);
bp-dev-trans_start = jiffies; /* prevent tx timeout */
}
@@ -440,7 +440,7 @@ bnx2_netif_start(struct bnx2 *bp)
if (atomic_dec_and_test(bp-intr_sem)) {
if (netif_running(bp-dev)) {
netif_wake_queue(bp-dev);
-   netif_poll_enable(bp-dev);
+   napi_enable(bp-napi);
bnx2_enable_int(bp);
}
}
@@ -2268,7 +2268,7 @@ bnx2_phy_int(struct bnx2 *bp)
 }

 static void
-bnx2_tx_int(struct bnx2 *bp)
+__bnx2_tx_int(struct bnx2 *bp)
 {
struct status_block *sblk = bp-status_blk;
u16 hw_cons, sw_cons, sw_ring_cons;
@@ -2347,11 +2347,9 @@ bnx2_tx_int(struct bnx2 *bp)

if (unlikely(netif_queue_stopped(bp-dev)) 
 (bnx2_tx_avail(bp)  bp-tx_wake_thresh)) {
-   netif_tx_lock(bp-dev);
if ((netif_queue_stopped(bp-dev)) 
(bnx2_tx_avail(bp)  bp-tx_wake_thresh))
netif_wake_queue(bp-dev);
-   netif_tx_unlock(bp-dev);
}
 }

@@ -2551,7 +2549,7 @@ bnx2_msi(int irq, void *dev_instance)
if (unlikely(atomic_read(bp-intr_sem) != 0))
return IRQ_HANDLED;

-   netif_rx_schedule(dev);
+   netif_rx_schedule(dev, bp-napi);

return IRQ_HANDLED;
 }
@@ -2568,7 +2566,7 @@ bnx2_msi_1shot(int irq, void *dev_instance)
if (unlikely(atomic_read(bp-intr_sem) != 0))
return IRQ_HANDLED;

-   netif_rx_schedule(dev);
+   netif_rx_schedule(dev, bp-napi);

return IRQ_HANDLED;
 }
@@ -2604,9 +2602,9 @@ bnx2_interrupt(int irq, void *dev_instance)
if (unlikely(atomic_read(bp-intr_sem) != 0))
return IRQ_HANDLED;

-   if (netif_rx_schedule_prep(dev)) {
+   if (netif_rx_schedule_prep(dev, bp-napi)) {
bp-last_status_idx = sblk-status_idx;
-   __netif_rx_schedule(dev);
+   __netif_rx_schedule(dev, bp-napi);
}

return IRQ_HANDLED;
@@ -2632,12 +2630,14 @@ bnx2_has_work(struct bnx2 *bp)
 }

 static int
-bnx2_poll(struct net_device *dev, int *budget)
+bnx2_poll(struct napi_struct *napi, int budget)
 {
-   struct bnx2 *bp = netdev_priv(dev);
+   struct bnx2 *bp = container_of(napi, struct bnx2, napi);
+   struct net_device *dev = bp-dev;
struct status_block *sblk = bp-status_blk;
u32 status_attn_bits = sblk-status_attn_bits;
u32 status_attn_bits_ack = sblk-status_attn_bits_ack;
+   int work_done = 0;

if ((status_attn_bits  STATUS_ATTN_EVENTS) !=
(status_attn_bits_ack  STATUS_ATTN_EVENTS)) {
@@ -2652,26 +2652,20 @@ bnx2_poll(struct net_device *dev, int

Re: [PATCH] IPv6: ipv6_addr_type() doesn't know about RFC4193 addresses

From: Dave Johnson [EMAIL PROTECTED]
Date: Wed, 25 Jul 2007 19:49:09 -0400

 ipv6_addr_type() doesn't check for 'Unique Local IPv6 Unicast
 Addresses' (RFC4193) and returns IPV6_ADDR_RESERVED for that range.

 SCTP uses this function and will fail bind() and connect() calls that
 use RFC4193 addresses, SCTP will also ignore inbound connections from
 RFC4193 addresses if listening on IPV6_ADDR_ANY.

 There may be other users of ipv6_addr_type() that could also have
 problems.

Contrarily, there may be ipv6_addr_type() call sites that really
do want to reject rfc4193 addresses.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2.6.22 2/3]S2io: Fix crash when resetting adapter

- Removed the call to pci_set_power_state to reset the adapter as it was 
resulting
  in system crash on some platforms.

(Resending; Removed HTML sections in the patch)

Signed-off-by: Santosh Rastapur [EMAIL PROTECTED]
---
diff -Nurp 2.0.24.1P1/drivers/net/s2io.c 2.0.24.1P2/drivers/net/s2io.c
--- 2.0.24.1P1/drivers/net/s2io.c   2007-07-20 16:16:01.0 -0700
+++ 2.0.24.1P2/drivers/net/s2io.c   2007-07-20 16:26:13.0 -0700
@@ -3382,23 +3382,8 @@ static void s2io_reset(struct s2io_nic *
/* Back up  the PCI-X CMD reg, dont want to lose MMRBC, OST settings */
pci_read_config_word(sp-pdev, PCIX_COMMAND_REGISTER, (pci_cmd));
 
-   if (sp-device_type == XFRAME_II_DEVICE) {
-   int ret;
-   ret = pci_set_power_state(sp-pdev, 3);
-   if (!ret)
-   ret = pci_set_power_state(sp-pdev, 0);
-   else {
-   DBG_PRINT(ERR_DBG,%s PME based SW_Reset failed!\n,
-   __FUNCTION__);
-   goto old_way;
-   }
-   msleep(20);
-   goto new_way;
-   }
-old_way:
val64 = SW_RESET_ALL;
writeq(val64, bar0-sw_reset);
-new_way:
if (strstr(sp-product_name, CX4)) {
msleep(750);
}



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Tc filtering: broken 802_3 classifier?

Waskiewicz Jr, Peter P wrote:
Waskiewicz Jr, Peter P wrote:

I'm not sure what you're expecting. skb-protocol is usually not set
to ETH_P_802_3, which is why the filter is not matching.
 
 
 I understand that.  I had two issues, which you cleared up one by
 reminding me that the protocol matches on skb-protocol before it tries
 to run the -classify() routine.  The other issue I am seeing is with 8
 bands, an 802_3 filter is affecting classification of IP traffic.  For
 example, I have an 802_3 filter to look for dst MAC address, but an ssh
 packet, which without any filters should go into flowid 1:3 on my
 system, is getting pushed into flowid 1:1.  I remove the 802_3 filter,
 and ssh traffic starts going back into 1:3.  No other filters on the
 system.  That's the main issue I'm seeing, so I'll keep investigating to
 see what's going on.


In case of prio, if your manually installed filters don't match, it will
fall back to the skb-priority based classification, which is based
on tos and is probably responsible for what you're seeing. Feel free to
investigate, but you could save us all some time by simply posting what
you're doing, what you're expecting and what is actually happening,
there's probably a good explanation.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/4 -rev1] Add new timeval_to_sec function

A new function for converting timeval to time_t is added in time.h. Its a 
common function used in different
places. The timeout is now rounded up as per the suggestion.

Signed-off-by: Varun Chandramohan [EMAIL PROTECTED]
---
 include/linux/time.h |   11 +++
 1 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/include/linux/time.h b/include/linux/time.h
index dda9be6..908329a 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -147,6 +147,17 @@ static inline s64 timeval_to_ns(const st
 }
 
 /**
+ * timeval_to_sec - Convert timeval to seconds
+ * @tv: pointer to the timeval variable to be converted
+ *
+ * Returns the seconds representation of timeval parameter.
+ */
+static inline time_t timeval_to_sec(const struct timeval *tv)
+{
+   return (tv-tv_sec + (tv-tv_usec + 99)/100);
+}
+
+/**
  * ns_to_timespec - Convert nanoseconds to timespec
  * @nsec:  the nanoseconds value to be converted
  *
-- 
1.4.3.4

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/4 -rev1] New attribute RTA_AGE

A new attribute RTA_AGE is added for the age value to be exported to userlevel 
using netlink

Signed-off-by: Varun Chandramohan [EMAIL PROTECTED]
---
 include/linux/rtnetlink.h |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 6127858..884f507 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -263,6 +263,7 @@ enum rtattr_type_t
RTA_SESSION,
RTA_MP_ALGO, /* no longer used */
RTA_TABLE,
+   RTA_AGE,
__RTA_MAX
 };
 
-- 
1.4.3.4

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/4 -rev1] Initilize and populate age field

The age field is filled with the current time at the time of creation of the 
route. When the routes are dumped
then the age value stored in the route structure is subtracted from the current 
time value and the difference is the age expressed in secs.

Signed-off-by: Varun Chandramohan [EMAIL PROTECTED]
---
 net/ipv4/fib_hash.c  |3 +++
 net/ipv4/fib_lookup.h|3 ++-
 net/ipv4/fib_semantics.c |   16 +---
 net/ipv4/fib_trie.c  |1 +
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 07e843a..faa7364 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -448,6 +448,7 @@ static int fn_hash_insert(struct fib_tab
fa-fa_info = fi;
fa-fa_type = cfg-fc_type;
fa-fa_scope = cfg-fc_scope;
+   fa-fa_age = 0;
state = fa-fa_state;
fa-fa_state = ~FA_S_ACCESSED;
fib_hash_genid++;
@@ -507,6 +508,7 @@ static int fn_hash_insert(struct fib_tab
new_fa-fa_type = cfg-fc_type;
new_fa-fa_scope = cfg-fc_scope;
new_fa-fa_state = 0;
+   new_fa-fa_age = 0;
 
/*
 * Insert new entry to the list.
@@ -697,6 +699,7 @@ fn_hash_dump_bucket(struct sk_buff *skb,
  f-fn_key,
  fz-fz_order,
  fa-fa_tos,
+ fa-fa_age,
  fa-fa_info,
  NLM_F_MULTI)  0) {
cb-args[4] = i;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index eef9eec..c9145b5 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -13,6 +13,7 @@ struct fib_alias {
u8  fa_type;
u8  fa_scope;
u8  fa_state;
+   time_t  fa_age;
 };
 
 #define FA_S_ACCESSED  0x01
@@ -27,7 +28,7 @@ extern struct fib_info *fib_create_info(
 extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
 extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 u32 tb_id, u8 type, u8 scope, __be32 dst,
-int dst_len, u8 tos, struct fib_info *fi,
+int dst_len, u8 tos, time_t *age, struct fib_info *fi,
 unsigned int);
 extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
  int dst_len, u32 tb_id, struct nl_info *info,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c434119..1822d92 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -278,7 +278,8 @@ static inline size_t fib_nlmsg_size(stru
 + nla_total_size(4) /* RTA_TABLE */
 + nla_total_size(4) /* RTA_DST */
 + nla_total_size(4) /* RTA_PRIORITY */
-+ nla_total_size(4); /* RTA_PREFSRC */
++ nla_total_size(4) /* RTA_PREFSRC */
++ nla_total_size(4); /*RTA_AGE*/
 
/* space for nested metrics */
payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
@@ -313,7 +314,7 @@ void rtmsg_fib(int event, __be32 key, st
 
err = fib_dump_info(skb, info-pid, seq, event, tb_id,
fa-fa_type, fa-fa_scope, key, dst_len,
-   fa-fa_tos, fa-fa_info, nlm_flags);
+   fa-fa_tos, fa-fa_age, fa-fa_info, nlm_flags);
if (err  0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -940,11 +941,12 @@ __be32 __fib_res_prefsrc(struct fib_resu
 }
 
 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
+ u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 
tos, time_t *age,
  struct fib_info *fi, unsigned int flags)
 {
struct nlmsghdr *nlh;
struct rtmsg *rtm;
+   struct timeval tv;
 
nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
if (nlh == NULL)
@@ -985,6 +987,14 @@ int fib_dump_info(struct sk_buff *skb, u
NLA_PUT_U32(skb, RTA_FLOW, fi-fib_nh[0].nh_tclassid);
 #endif
}
+
+   do_gettimeofday(tv);
+   if (!*age) {
+   *age = timeval_to_sec(tv);
+   NLA_PUT_U32(skb, RTA_AGE, *age);
+   } else {
+   NLA_PUT_U32(skb, RTA_AGE, timeval_to_sec(tv) - *age);
+   }
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
if (fi-fib_nhs  1) {
struct rtnexthop *rtnh;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 30e332a..be2d1d6

Re: TCP MD5 and Scatter Gather offloading.

2007-07-25 Thread Chuck Ebbert

On 07/25/2007 01:12 PM, Siddharth Taneja wrote:

[cc: netdev]

 Hello,
 
 I am using a vanilla 2.6.22.1 kernel and I see the same kind of
 problem as had been mentioned some time back on this list
 
 http://lkml.org/lkml/2007/5/22/45
 
 The issue is essentially that with the MD5 option enabled for the
 specific TCP connection, the SYN and SYN-ACKS are passed fine and the
 connection establishes fine, but the other end (a cisco router)
 complains about incorrect MD5 signatures on any other message that is
 sent after this.
 
 Setting the scatter-gather offloading option on the NIC seems to
 correct this problem. Recently I had seen a checkin (as a response to
 the problem mentioned in the above link) where the TSO option was
 turned off to make MD5 work (my kernel has that fix). Is a similar
 solution needed here too?
 
 This is the information about my system:
 uname -a
 Linux stdalone 2.6.22.1 #1 SMP Mon Jul 23 20:15:21 PDT 2007 i686 i686
 i386 GNU/Linux
 
 ethtool -i eth0
 driver: e1000
 version: 7.3.20-k2
 firmware-version: N/A
 bus-info: :01:0a.0
 
 ethtool -k eth0
 Offload parameters for eth0:
 rx-checksumming: on
 tx-checksumming: on
 scatter-gather: off
 tcp segmentation offload: off
 
 Thanks for your help.
 
 Siddharth
 
 PS: I would like to be CC'ed on the reply to this email. Thanks.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Bugme-new] [Bug 8808] New: Large file transfer causes kernel panic showing b44_poll

2007-07-25 Thread Andrew Morton

On Wed, 25 Jul 2007 04:29:33 -0700 (PDT)
[EMAIL PROTECTED] wrote:

 http://bugzilla.kernel.org/show_bug.cgi?id=8808
 
Summary: Large file transfer causes kernel panic showing b44_poll
Product: Drivers
Version: 2.5
  KernelVersion: 2.6.22.1
   Platform: All
 OS/Version: Linux
   Tree: Mainline
 Status: NEW
   Severity: blocking
   Priority: P1
  Component: Network
 AssignedTo: [EMAIL PROTECTED]
 ReportedBy: [EMAIL PROTECTED]
 
 
 Most recent kernel where this bug did not occur: /
 Distribution: ttylinux 
 Hardware Environment: Dell Inspiron 1300
 Problem Description: 
 
 A large file transfer (6.5GB) (tried http with wget and plain netcat) causes a
 kernel panic after more than several GB have been transferred. However, kernel
 panic does not occur consistently. That is, it has occurred after 1.2GB, 
 1.8GB,
 2.3GB and even 3.4GB transferred. Transfer never finished though. 
 
 Call trace: 
 [c027d02b] b44_poll+0x220/0x459
 [c027d032] b44_poll+0x220/0x459
 [c031b59e] net_rx_action+0x52/0x110
 [c01109e4] __do_softirq+0x35/0x75
 [c0110a46] do_softirq+0x22/0x26
 [c010400f] do_IRQ+0x55/0x6a
 [c0102997] common_interrupt+0x23/0x28
 [c01e3e43] acpi_processor_idle+0x1dd/0x35e
 [c0100b51] cpu_idle+0x3c/0x51
 [c04a6a2c] start_kernel+0x24a/0x252
 [c04a63d0] unknown_bootoption+0x0/0x205
 
 Code: lots of numbers
 EIP: [c0314cb8] skb_over_panic+0x59/0x5d SS:ESP 0068:c04a5ed8
 Kernel panic - not syncing: Fatal exception in interrupt
 
 Steps to reproduce:
 Network boot ttylinux (standard filesys with dhcp enabled) with custom kernel
 (see below for config) compiled as bzImage on Dell Inspiron 1300 using
 /dev/ram0 as root filesystem. Mount hdd ext3 and attempt to download 6.5GB
 image file from local server. Both netcat and wget attempts fail as described
 above.
 
 Kernel config:
 
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.22.1
 # Wed Jul 25 14:53:11 2007
 #
 CONFIG_X86_32=y
 CONFIG_GENERIC_TIME=y
 CONFIG_CLOCKSOURCE_WATCHDOG=y
 CONFIG_GENERIC_CLOCKEVENTS=y
 CONFIG_LOCKDEP_SUPPORT=y
 CONFIG_STACKTRACE_SUPPORT=y
 CONFIG_SEMAPHORE_SLEEPERS=y
 CONFIG_X86=y
 CONFIG_MMU=y
 CONFIG_ZONE_DMA=y
 CONFIG_QUICKLIST=y
 CONFIG_GENERIC_ISA_DMA=y
 CONFIG_GENERIC_IOMAP=y
 CONFIG_GENERIC_BUG=y
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_ARCH_MAY_HAVE_PC_FDC=y
 CONFIG_DMI=y
 CONFIG_DEFCONFIG_LIST=/lib/modules/$UNAME_RELEASE/.config
 
 #
 # Code maturity level options
 #
 CONFIG_EXPERIMENTAL=y
 CONFIG_BROKEN_ON_SMP=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 
 #
 # General setup
 #
 CONFIG_LOCALVERSION=
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 # CONFIG_IPC_NS is not set
 CONFIG_SYSVIPC_SYSCTL=y
 # CONFIG_POSIX_MQUEUE is not set
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_UTS_NS is not set
 # CONFIG_AUDIT is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_RELAY is not set
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 # CONFIG_EMBEDDED is not set
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
 CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
 CONFIG_EVENTFD=y
 CONFIG_SHMEM=y
 CONFIG_VM_EVENT_COUNTERS=y
 CONFIG_SLAB=y
 # CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 CONFIG_RT_MUTEXES=y
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 
 #
 # Loadable module support
 #
 # CONFIG_MODULES is not set
 
 #
 # Block layer
 #
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
 # CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_LSF is not set
 
 #
 # IO Schedulers
 #
 CONFIG_IOSCHED_NOOP=y
 # CONFIG_IOSCHED_AS is not set
 CONFIG_IOSCHED_DEADLINE=y
 # CONFIG_IOSCHED_CFQ is not set
 # CONFIG_DEFAULT_AS is not set
 CONFIG_DEFAULT_DEADLINE=y
 # CONFIG_DEFAULT_CFQ is not set
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED=deadline
 
 #
 # Processor type and features
 #
 # CONFIG_TICK_ONESHOT is not set
 # CONFIG_NO_HZ is not set
 # CONFIG_HIGH_RES_TIMERS is not set
 # CONFIG_SMP is not set
 CONFIG_X86_PC=y
 # CONFIG_X86_ELAN is not set
 # CONFIG_X86_VOYAGER is not set
 # CONFIG_X86_NUMAQ is not set
 # CONFIG_X86_SUMMIT is not set
 # CONFIG_X86_BIGSMP is not set
 # CONFIG_X86_VISWS is not set
 # CONFIG_X86_GENERICARCH is not set
 # CONFIG_X86_ES7000 is not set
 # CONFIG_PARAVIRT is not set
 CONFIG_M386=y
 # CONFIG_M486 is not set
 # CONFIG_M586 is not set
 # CONFIG_M586TSC is not set
 # CONFIG_M586MMX is not set
 # CONFIG_M686 is not set
 # CONFIG_MPENTIUMII is not set
 # CONFIG_MPENTIUMIII is not set
 # CONFIG_MPENTIUMM is not set
 # CONFIG_MCORE2 is not set
 # CONFIG_MPENTIUM4 is not set
 # CONFIG_MK6 is not set
 #

atl1 driver corrupting memory?

2007-07-25 Thread Chuck Ebbert

I have a report of random errors when using the atl1 driver
with kernel 2.6.22.1. Could that be a problem fixed by the
recent changes to DMA setup in 2.6.23-rc?


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] gfar: Fix modpost warning

2007-07-25 Thread Kumar Gala

Fix the following modpost warning:

WARNING: vmlinux.o(.init.text+0x1aa6c): Section mismatch: reference to 
.exit.text:gfar_mdio_exit (between 'gfar_init' and 'gfar_mdio_init')

Signed-off-by: Kumar Gala [EMAIL PROTECTED]
---
 drivers/net/gianfar_mii.c |2 +-
 drivers/net/gianfar_mii.h |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/gianfar_mii.c b/drivers/net/gianfar_mii.c
index ac3596f..100bf41 100644
--- a/drivers/net/gianfar_mii.c
+++ b/drivers/net/gianfar_mii.c
@@ -245,7 +245,7 @@ int __init gfar_mdio_init(void)
return driver_register(gianfar_mdio_driver);
 }

-void __exit gfar_mdio_exit(void)
+void gfar_mdio_exit(void)
 {
driver_unregister(gianfar_mdio_driver);
 }
diff --git a/drivers/net/gianfar_mii.h b/drivers/net/gianfar_mii.h
index 5d34004..b373091 100644
--- a/drivers/net/gianfar_mii.h
+++ b/drivers/net/gianfar_mii.h
@@ -42,5 +42,5 @@ struct gfar_mii {
 int gfar_mdio_read(struct mii_bus *bus, int mii_id, int regnum);
 int gfar_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
 int __init gfar_mdio_init(void);
-void __exit gfar_mdio_exit(void);
+void gfar_mdio_exit(void);
 #endif /* GIANFAR_PHY_H */
-- 
1.5.2.2

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC]: napi_struct V4

From: Stephen Hemminger [EMAIL PROTECTED]
Date: Wed, 25 Jul 2007 09:56:54 +0100

 The usage of NAPI on 8139cp and 8139too seems dodgy;
 these drivers expect this to work:

   local_irq_save(flags);
   cpw16_f(IntrMask, cp_intr_mask);
   __netif_rx_complete(dev);
   local_irq_restore(flags);

 It works on SMP only because if poll races with IRQ, 
 the IRQ is not masked or cleared so the IRQ will get restarted.

 Better would be to change it to:
   spin_lock_irqsave(cp-lock, flags);
   cpw16_f(IntrMask, cp_intr_mask);
   __netif_rx_complete(dev);
   spin_unlock_irqrestore(cp-lock, flags);

 Which actually is same code on UP.

I've made these fixes to my tree, thanks Stephen.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: net/9p/mux.c: use-after-free

2007-07-25 Thread Eric Van Hensbergen


On 7/22/07, Adrian Bunk [EMAIL PROTECTED] wrote:

The Coverity checker spotted the following use-after-free
in net/9p/mux.c:

--  snip  --

...
struct p9_conn *p9_conn_create(struct p9_transport *trans, int msize,
unsigned char *extended)
{
...
if (!m-tagpool) {
kfree(m);
return ERR_PTR(PTR_ERR(m-tagpool));
}
...

--  snip  --



I've got a fix for this one:
   if (!m-tagpool) {
   mtmp = ERR_PTR(PTR_ERR(m-tagpool));
   kfree(m);
   return mtmp;
   }

but I was wondering about one of the other returns further down the function:

...
   memset(m-poll_waddr, 0, sizeof(m-poll_waddr));
   m-poll_task = NULL;
   n = p9_mux_poll_start(m);
   if (n)
   return ERR_PTR(n);

   n = trans-poll(trans, m-pt);
...

lucho: doesn't that constitute a leak?  Shouldn't we be doing:

   if (n) {
   kfree(m);
   return ERR_PTR(n);
   }

-eric
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH]: 2nd revision of make xfrm_audit_log more generic

2007-07-25 Thread Joy Latten

This is 2nd revision of patch to modify xfrm_audit_log() such
that it can accomodate auditing other ipsec events
besides add/delete of an SA or SPD entry.

2nd revision includes new define for all IPsec
events in audit.h and introduces op= entry
in logfile as well as add a hyphen in description
for report parsing. 

This is a small change to accomodate updating
ipsec protocol to RFCs 4301, 4302 and 4303 which
require auditing some ipsec events if auditing
is available. Please let me know if ok.

Regards,
Joy

Signed-off-by: Joy Latten [EMAIL PROTECTED]

diff -urpN linux-2.6.22/include/linux/audit.h 
linux-2.6.22.patch/include/linux/audit.h
--- linux-2.6.22/include/linux/audit.h  2007-07-23 14:35:28.0 -0500
+++ linux-2.6.22.patch/include/linux/audit.h2007-07-23 14:38:51.0 
-0500
@@ -112,6 +112,7 @@
 #define AUDIT_MAC_IPSEC_DELSA  1412/* Delete a XFRM state */
 #define AUDIT_MAC_IPSEC_ADDSPD 1413/* Add a XFRM policy */
 #define AUDIT_MAC_IPSEC_DELSPD 1414/* Delete a XFRM policy */
+#define AUDIT_MAC_IPSEC_EVENT  1415/* Audit IPSec events */
 
 #define AUDIT_FIRST_KERN_ANOM_MSG   1700
 #define AUDIT_LAST_KERN_ANOM_MSG1799
diff -urpN linux-2.6.22/include/net/xfrm.h linux-2.6.22.patch/include/net/xfrm.h
--- linux-2.6.22/include/net/xfrm.h 2007-07-23 14:35:28.0 -0500
+++ linux-2.6.22.patch/include/net/xfrm.h   2007-07-23 14:38:51.0 
-0500
@@ -427,9 +427,11 @@ struct xfrm_audit
 
 #ifdef CONFIG_AUDITSYSCALL
 extern void xfrm_audit_log(uid_t auid, u32 secid, int type, int result,
-   struct xfrm_policy *xp, struct xfrm_state *x);
+  u16 family, xfrm_address_t saddr, 
+  xfrm_address_t daddr, __be32 spi, __be32 flowid, 
+  struct xfrm_sec_ctx *sctx, char *buf);
 #else
-#define xfrm_audit_log(a,s,t,r,p,x) do { ; } while (0)
+#define xfrm_audit_log(a,i,t,r,f,s,d,p,l,c,b) do { ; } while (0)
 #endif /* CONFIG_AUDITSYSCALL */
 
 static inline void xfrm_pol_hold(struct xfrm_policy *policy)
diff -urpN linux-2.6.22/net/key/af_key.c linux-2.6.22.patch/net/key/af_key.c
--- linux-2.6.22/net/key/af_key.c   2007-07-08 18:32:17.0 -0500
+++ linux-2.6.22.patch/net/key/af_key.c 2007-07-24 11:50:35.0 -0500
@@ -1459,7 +1459,9 @@ static int pfkey_add(struct sock *sk, st
err = xfrm_state_update(x);
 
xfrm_audit_log(audit_get_loginuid(current-audit_context), 0,
-  AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x);
+  AUDIT_MAC_IPSEC_EVENT, err ? 0 : 1, 
+  x-props.family, x-props.saddr, x-id.daddr, 
+  x-id.spi, 0, x-security, SAD-add);
 
if (err  0) {
x-km.state = XFRM_STATE_DEAD;
@@ -1513,7 +1515,10 @@ static int pfkey_delete(struct sock *sk,
km_state_notify(x, c);
 out:
xfrm_audit_log(audit_get_loginuid(current-audit_context), 0,
-  AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
+  AUDIT_MAC_IPSEC_EVENT, err ? 0 : 1, x-props.family,
+  x-props.saddr, x-id.daddr, x-id.spi, 0,
+  x-security, SAD-delete);
+
xfrm_state_put(x);
 
return err;
@@ -2266,7 +2271,9 @@ static int pfkey_spdadd(struct sock *sk,
 hdr-sadb_msg_type != SADB_X_SPDUPDATE);
 
xfrm_audit_log(audit_get_loginuid(current-audit_context), 0,
-  AUDIT_MAC_IPSEC_ADDSPD, err ? 0 : 1, xp, NULL);
+  AUDIT_MAC_IPSEC_EVENT, err ? 0 : 1, 
+  xp-selector.family, xp-selector.saddr,
+  xp-selector.daddr, 0, 0, xp-security, SPD-add);
 
if (err)
goto out;
@@ -2350,7 +2357,9 @@ static int pfkey_spddelete(struct sock *
return -ENOENT;
 
xfrm_audit_log(audit_get_loginuid(current-audit_context), 0,
-  AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL);
+  AUDIT_MAC_IPSEC_EVENT, err ? 0 : 1,
+  xp-selector.family, xp-selector.saddr,
+  xp-selector.daddr, 0, 0, xp-security, SPD-delete);
 
if (err)
goto out;
@@ -2611,7 +2620,10 @@ static int pfkey_spdget(struct sock *sk,
 
if (delete) {
xfrm_audit_log(audit_get_loginuid(current-audit_context), 0,
-  AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL);
+  AUDIT_MAC_IPSEC_EVENT, err ? 0 : 1, 
+  xp-selector.family, xp-selector.saddr,
+  xp-selector.daddr, 0, 0, xp-security,
+  SPD-delete);
 
if (err)
goto out;
diff -urpN linux-2.6.22/net/xfrm/xfrm_policy.c 
linux-2.6.22.patch/net/xfrm/xfrm_policy.c
--- linux-2.6.22/net/xfrm/xfrm_policy.c 2007-07-23 14:35:29.0 -0500
+++

[PATCH 2.6.22 3/3]S2io: Increment received packet count correctly