[PATCH net-next] hv_netvsc: report stop_queue and wake_queue

2017-09-29 Thread Simon Xiao
Report the numbers of events for stop_queue and wake_queue in
ethtool stats.

Example:
ethtool -S eth0
NIC statistics:
...
stop_queue: 7
wake_queue: 7
...

Signed-off-by: Simon Xiao <six...@microsoft.com>
Reviewed-by: Haiyang Zhang <haiya...@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h |  2 ++
 drivers/net/hyperv/netvsc.c | 12 ++--
 drivers/net/hyperv/netvsc_drv.c |  2 ++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 5176be7..6f550e1 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -686,6 +686,8 @@ struct netvsc_ethtool_stats {
unsigned long tx_busy;
unsigned long tx_send_full;
unsigned long rx_comp_busy;
+   unsigned long stop_queue;
+   unsigned long wake_queue;
 };
 
 struct netvsc_vf_pcpu_stats {
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index b0d323e..6e51949 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -609,6 +609,7 @@ static void netvsc_send_tx_complete(struct netvsc_device 
*net_device,
 {
struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
struct net_device *ndev = hv_get_drvdata(device);
+   struct net_device_context *ndev_ctx = netdev_priv(ndev);
struct vmbus_channel *channel = device->channel;
u16 q_idx = 0;
int queue_sends;
@@ -643,8 +644,10 @@ static void netvsc_send_tx_complete(struct netvsc_device 
*net_device,
 
if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
(hv_ringbuf_avail_percent(>outbound) > 
RING_AVAIL_PERCENT_HIWATER ||
-queue_sends < 1))
+queue_sends < 1)) {
netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
+   ndev_ctx->eth_stats.wake_queue++;
+   }
 }
 
 static void netvsc_send_completion(struct netvsc_device *net_device,
@@ -749,6 +752,7 @@ static inline int netvsc_send_pkt(
_device->chan_table[packet->q_idx];
struct vmbus_channel *out_channel = nvchan->channel;
struct net_device *ndev = hv_get_drvdata(device);
+   struct net_device_context *ndev_ctx = netdev_priv(ndev);
struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
u64 req_id;
int ret;
@@ -789,12 +793,16 @@ static inline int netvsc_send_pkt(
if (ret == 0) {
atomic_inc_return(>queue_sends);
 
-   if (ring_avail < RING_AVAIL_PERCENT_LOWATER)
+   if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
netif_tx_stop_queue(txq);
+   ndev_ctx->eth_stats.stop_queue++;
+   }
} else if (ret == -EAGAIN) {
netif_tx_stop_queue(txq);
+   ndev_ctx->eth_stats.stop_queue++;
if (atomic_read(>queue_sends) < 1) {
netif_tx_wake_queue(txq);
+   ndev_ctx->eth_stats.wake_queue++;
ret = -ENOSPC;
}
} else {
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index e9d54c9..f300ae6 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1126,6 +1126,8 @@ static const struct {
{ "tx_busy",  offsetof(struct netvsc_ethtool_stats, tx_busy) },
{ "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) },
{ "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) },
+   { "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
+   { "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
 }, vf_stats[] = {
{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
{ "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
-- 
2.7.4



[PATCH net-next,1/1] tools: hv: ignore a NIC if it has been configured

2017-07-14 Thread Simon Xiao
Let bondvf.sh ignore this NIC if it has been configured, to prevent
user configuration from being overwritten unexpectly.

Signed-off-by: Simon Xiao <six...@microsoft.com>
---
 tools/hv/bondvf.sh | 27 +--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
index 89b2506..80f1028 100755
--- a/tools/hv/bondvf.sh
+++ b/tools/hv/bondvf.sh
@@ -211,6 +211,30 @@ function create_bond {
 
echo $'\nBond name:' $bondname
 
+   if [ $distro == ubuntu ]
+   then
+   local mainfn=$cfgdir/interfaces
+   local s="^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+${bondname}"
+
+   grep -E "$s" $mainfn
+   if [ $? -eq 0 ]
+   then
+   echo "WARNING: ${bondname} has been configured already"
+   return
+   fi
+   elif [ $distro == redhat ] || [ $distro == suse ]
+   then
+   local fn=$cfgdir/ifcfg-$bondname
+   if [ -f $fn ]
+   then
+   echo "WARNING: ${bondname} has been configured already"
+   return
+   fi
+   else
+   echo "Unsupported Distro: ${distro}"
+   return
+   fi
+
echo configuring $primary
create_eth_cfg_pri_$distro $primary $bondname
 
@@ -219,8 +243,6 @@ function create_bond {
 
echo creating: $bondname with primary slave: $primary
create_bond_cfg_$distro $bondname $primary $secondary
-
-   let bondcnt=bondcnt+1
 }
 
 for (( i=0; i < $eth_cnt-1; i++ ))
@@ -228,5 +250,6 @@ do
 if [ -n "${list_match[$i]}" ]
 then
create_bond ${list_eth[$i]} ${list_match[$i]}
+   let bondcnt=bondcnt+1
 fi
 done
-- 
2.7.4



[PATCH net-next, 2/2] tools: hv: set hotplug for VF on Suse

2017-05-31 Thread Simon Xiao
On HyperV, the VF interface can be offered by a host at any time.
Mark the VF interface as hotplug, to make sure it will be brought up
automatically when it is registered.

Signed-off-by: Simon Xiao <six...@microsoft.com>
Reviewed-by: Haiyang Zhang <haiya...@microsoft.com>
---
 tools/hv/bondvf.sh | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
index 8abd46e..89b2506 100755
--- a/tools/hv/bondvf.sh
+++ b/tools/hv/bondvf.sh
@@ -170,7 +170,11 @@ function create_eth_cfg_suse {
 }
 
 function create_eth_cfg_pri_suse {
-   create_eth_cfg_suse $1
+   local fn=$cfgdir/ifcfg-$1
+
+   rm -f $fn
+   echo BOOTPROTO=none >>$fn
+   echo STARTMODE=hotplug >>$fn
 }
 
 function create_bond_cfg_suse {
-- 
2.7.4



[PATCH net-next, 1/2] tools: hv: set allow-hotplug for VF on Ubuntu

2017-05-31 Thread Simon Xiao
On HyperV, the VF interface can be offered by a host at any time.
Mark the VF interface as hotplug, to make sure it will be brought up
automatically when it is registered.

Signed-off-by: Simon Xiao <six...@microsoft.com>
Reviewed-by: Haiyang Zhang <haiya...@microsoft.com>
---
 tools/hv/bondvf.sh | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
index 112deba..8abd46e 100755
--- a/tools/hv/bondvf.sh
+++ b/tools/hv/bondvf.sh
@@ -134,7 +134,6 @@ function create_eth_cfg_ubuntu {
local fn=$cfgdir/interfaces
 
del_eth_cfg_ubuntu $1
-
echo $'\n'auto $1 >>$fn
echo iface $1 inet manual >>$fn
echo bond-master $2 >>$fn
@@ -143,7 +142,10 @@ function create_eth_cfg_ubuntu {
 function create_eth_cfg_pri_ubuntu {
local fn=$cfgdir/interfaces
 
-   create_eth_cfg_ubuntu $1 $2
+   del_eth_cfg_ubuntu $1
+   echo $'\n'allow-hotplug $1 >>$fn
+   echo iface $1 inet manual >>$fn
+   echo bond-master $2 >>$fn
echo bond-primary $1 >>$fn
 }
 
-- 
2.7.4



RE: [PATCH 1/1] hv_netvsc: fix a netvsc stats typo

2017-02-08 Thread Simon Xiao
Please ignore this patch. I will resubmit it to net-next.

> -Original Message-
> From: Simon Xiao [mailto:six...@microsoft.com]
> Sent: Tuesday, February 7, 2017 10:03 AM
> To: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; Stephen Hemminger
> <sthem...@microsoft.com>; de...@linuxdriverproject.org;
> netdev@vger.kernel.org; linux-ker...@vger.kernel.org
> Cc: Simon Xiao <six...@microsoft.com>
> Subject: [PATCH 1/1] hv_netvsc: fix a netvsc stats typo
> 
> [This sender failed our fraud detection checks and may not be who they
> appear to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]
> 
> Now, return the correct tx_errors stats in netvsc.
> 
> Signed-off-by: Simon Xiao <six...@microsoft.com>
> Reviewed-by: Haiyang Zhang <haiya...@microsoft.com>
> ---
>  drivers/net/hyperv/netvsc_drv.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/hyperv/netvsc_drv.c
> b/drivers/net/hyperv/netvsc_drv.c
> index 72b0c1f..725ac19 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -920,7 +920,7 @@ static void netvsc_get_stats64(struct net_device *net,
> }
> 
> t->tx_dropped   = net->stats.tx_dropped;
> -   t->tx_errors= net->stats.tx_dropped;
> +   t->tx_errors= net->stats.tx_errors;
> 
> t->rx_dropped   = net->stats.rx_dropped;
> t->rx_errors= net->stats.rx_errors;
> --
> 2.7.4



[PATCH net-next] hv_netvsc: add ethtool support for set and get of settings

2016-02-25 Thread Simon Xiao
This patch allows the user to set and retrieve speed and duplex of the
hv_netvsc device via ethtool.

Example:
$ ethtool eth0
Settings for eth0:
...
Speed: Unknown!
Duplex: Unknown! (255)
...
$ ethtool -s eth0 speed 1000 duplex full
$ ethtool eth0
Settings for eth0:
...
Speed: 1000Mb/s
Duplex: Full
...

This is based on patches by Roopa Prabhu and Nikolay Aleksandrov.

Signed-off-by: Simon Xiao <six...@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h |  4 +++
 drivers/net/hyperv/netvsc_drv.c | 56 +
 2 files changed, 60 insertions(+)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index fcb92c0..b4c6878 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -658,6 +658,10 @@ struct net_device_context {
 
struct netvsc_stats __percpu *tx_stats;
struct netvsc_stats __percpu *rx_stats;
+
+   /* Ethtool settings */
+   u8 duplex;
+   u32 speed;
 };
 
 /* Per netvsc device */
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 202e2b1..e703b9a 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -799,6 +799,58 @@ static int netvsc_set_channels(struct net_device *net,
goto do_set;
 }
 
+static bool netvsc_validate_ethtool_ss_cmd(const struct ethtool_cmd *cmd)
+{
+   struct ethtool_cmd diff1 = *cmd;
+   struct ethtool_cmd diff2 = {};
+
+   ethtool_cmd_speed_set(, 0);
+   diff1.duplex = 0;
+   /* advertising and cmd are usually set */
+   diff1.advertising = 0;
+   diff1.cmd = 0;
+   /* We set port to PORT_OTHER */
+   diff2.port = PORT_OTHER;
+
+   return !memcmp(, , sizeof(diff1));
+}
+
+static void netvsc_init_settings(struct net_device *dev)
+{
+   struct net_device_context *ndc = netdev_priv(dev);
+
+   ndc->speed = SPEED_UNKNOWN;
+   ndc->duplex = DUPLEX_UNKNOWN;
+}
+
+static int netvsc_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+   struct net_device_context *ndc = netdev_priv(dev);
+
+   ethtool_cmd_speed_set(cmd, ndc->speed);
+   cmd->duplex = ndc->duplex;
+   cmd->port = PORT_OTHER;
+
+   return 0;
+}
+
+static int netvsc_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+   struct net_device_context *ndc = netdev_priv(dev);
+   u32 speed;
+
+   speed = ethtool_cmd_speed(cmd);
+   if (!ethtool_validate_speed(speed) ||
+   !ethtool_validate_duplex(cmd->duplex) ||
+   !netvsc_validate_ethtool_ss_cmd(cmd))
+   return -EINVAL;
+
+   ndc->speed = speed;
+   ndc->duplex = cmd->duplex;
+
+   return 0;
+}
+
 static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 {
struct net_device_context *ndevctx = netdev_priv(ndev);
@@ -923,6 +975,8 @@ static const struct ethtool_ops ethtool_ops = {
.get_channels   = netvsc_get_channels,
.set_channels   = netvsc_set_channels,
.get_ts_info= ethtool_op_get_ts_info,
+   .get_settings   = netvsc_get_settings,
+   .set_settings   = netvsc_set_settings,
 };
 
 static const struct net_device_ops device_ops = {
@@ -1112,6 +1166,8 @@ static int netvsc_probe(struct hv_device *dev,
netif_set_real_num_tx_queues(net, nvdev->num_chn);
netif_set_real_num_rx_queues(net, nvdev->num_chn);
 
+   netvsc_init_settings(net);
+
ret = register_netdev(net);
if (ret != 0) {
pr_err("Unable to register netdev.\n");
-- 
2.5.0



[PATCH net-next] hv_netvsc: add software transmit timestamp support

2016-02-17 Thread Simon Xiao
Enable skb_tx_timestamp in hyperv netvsc.

Signed-off-by: Simon Xiao <six...@microsoft.com>
Reviewed-by: K. Y. Srinivasan <k...@microsoft.com>
Reviewed-by: Haiyang Zhang <haiya...@microsoft.com>
---
 drivers/net/hyperv/netvsc_drv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index c72e5b8..202e2b1 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -550,6 +550,8 @@ do_send:
packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
   skb, packet, );
 
+   /* timestamp packet in software */
+   skb_tx_timestamp(skb);
ret = netvsc_send(net_device_ctx->device_ctx, packet,
  rndis_msg, , skb);
 
@@ -920,6 +922,7 @@ static const struct ethtool_ops ethtool_ops = {
.get_link   = ethtool_op_get_link,
.get_channels   = netvsc_get_channels,
.set_channels   = netvsc_set_channels,
+   .get_ts_info= ethtool_op_get_ts_info,
 };
 
 static const struct net_device_ops device_ops = {
-- 
2.5.0



[PATCH V2 net-next] hv_netvsc: cleanup netdev feature flags for netvsc

2016-02-04 Thread Simon Xiao
1. Adding NETIF_F_TSO6 feature flag;
2. Adding NETIF_F_HW_CSUM. NETIF_F_IPV6_CSUM and NETIF_F_IP_CSUM are 
being deprecated;
3. Cleanup the coding style of flag assignment by using macro.

Signed-off-by: Simon Xiao <six...@microsoft.com>
Reviewed-by: K. Y. Srinivasan <k...@microsoft.com>
Reviewed-by: Haiyang Zhang <haiya...@microsoft.com>
---
 drivers/net/hyperv/netvsc_drv.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 1d3a665..c72e5b8 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -43,6 +43,11 @@
 
 #define RING_SIZE_MIN 64
 #define LINKCHANGE_INT (2 * HZ)
+#define NETVSC_HW_FEATURES (NETIF_F_RXCSUM | \
+NETIF_F_SG | \
+NETIF_F_TSO | \
+NETIF_F_TSO6 | \
+NETIF_F_HW_CSUM)
 static int ring_size = 128;
 module_param(ring_size, int, S_IRUGO);
 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
@@ -1081,10 +1086,8 @@ static int netvsc_probe(struct hv_device *dev,
 
net->netdev_ops = _ops;
 
-   net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM |
-   NETIF_F_TSO;
-   net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM |
-   NETIF_F_IP_CSUM | NETIF_F_TSO;
+   net->hw_features = NETVSC_HW_FEATURES;
+   net->features = NETVSC_HW_FEATURES | NETIF_F_HW_VLAN_CTAG_TX;
 
net->ethtool_ops = _ops;
SET_NETDEV_DEV(net, >device);
-- 
2.5.0



[PATCH net-next] hv_netvsc: Add feature flags NETIF_F_IPV6_CSUM and NETIF_F_TSO6 for netvsc

2016-02-03 Thread Simon Xiao
1. Adding NETIF_F_IPV6_CSUM and NETIF_F_TSO6 feature flags which are
supported by Hyper-V platform.
2. Cleanup the coding style of flag assignment by using macro.

Signed-off-by: Simon Xiao <six...@microsoft.com>
Reviewed-by: K. Y. Srinivasan <k...@microsoft.com>
Reviewed-by: Haiyang Zhang <haiya...@microsoft.com>
---
 drivers/net/hyperv/netvsc_drv.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 1d3a665..0cde741 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -43,6 +43,12 @@
 
 #define RING_SIZE_MIN 64
 #define LINKCHANGE_INT (2 * HZ)
+#define NETVSC_HW_FEATURES (NETIF_F_RXCSUM | \
+NETIF_F_SG | \
+NETIF_F_TSO | \
+NETIF_F_TSO6 | \
+NETIF_F_IP_CSUM | \
+NETIF_F_IPV6_CSUM)
 static int ring_size = 128;
 module_param(ring_size, int, S_IRUGO);
 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
@@ -1081,10 +1087,8 @@ static int netvsc_probe(struct hv_device *dev,
 
net->netdev_ops = _ops;
 
-   net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM |
-   NETIF_F_TSO;
-   net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM |
-   NETIF_F_IP_CSUM | NETIF_F_TSO;
+   net->hw_features = NETVSC_HW_FEATURES;
+   net->features = NETVSC_HW_FEATURES | NETIF_F_HW_VLAN_CTAG_TX;
 
net->ethtool_ops = _ops;
SET_NETDEV_DEV(net, >device);
-- 
2.5.0



RE: linux-next network throughput performance regression

2015-11-09 Thread Simon Xiao
Thanks Eric to provide the data. I am looping Tom (as I am looking into his 
recent patches) and Olaf (from Suse).

So, if I understand it correctly, you are running netperf with single TCP 
connection, and you got ~26Gbps initially and got ~30Gbps after turning the 
tx-usecs and tx-frames.

Do you have a baseline on your environment for the best/max/or peak throughput?
Again, in my environment (SLES bare metal), if use SLES12 default kernel as a 
baseline, we can see significant performance drop (10% ~ 50%) on latest 
linux-next kernel. 
Absolutely I will try the same test on net-next soon and update the results to 
here later.

Thanks,
Simon


> -Original Message-
> From: Eric Dumazet [mailto:eric.duma...@gmail.com]
> Sent: Saturday, November 7, 2015 11:50 AM
> To: David Ahern <d...@cumulusnetworks.com>
> Cc: Simon Xiao <six...@microsoft.com>; de...@linuxdriverproject.org;
> netdev@vger.kernel.org; linux-ker...@vger.kernel.org; David Miller
> <da...@davemloft.net>; KY Srinivasan <k...@microsoft.com>; Haiyang
> Zhang <haiya...@microsoft.com>
> Subject: Re: linux-next network throughput performance regression
> 
> On Sat, 2015-11-07 at 11:35 -0800, Eric Dumazet wrote:
> > On Fri, 2015-11-06 at 14:30 -0700, David Ahern wrote:
> > > On 11/6/15 2:18 PM, Simon Xiao wrote:
> > > > The .config file used to build linux-next kernel is attached to this 
> > > > mail.
> > >
> > > Thanks.
> > >
> > > Failed to notice this on the first response; my brain filled in. Why
> > > linux-next tree? Can you try net-next which is more relevant for
> > > this mailing list, post the top commit id and config file used?
> >
> > Throughput on a single TCP flow for a 40G NIC can be tricky to tune.
> >
> > Make sure IRQ are properly setup/balanced, as I know that IRQ names
> > were changed recently and your scripts might have not noticed...
> >
> > Also "ethtool -c eth0" might show very different interrupt coalescing
> > params ?
> >
> > I too have a Mellanox 40Gb in my lab and saw no difference in
> > performance with recent kernels.
> >
> > Of course, a simple "perf record -a -g sleep 4 ; perf report" might
> > point to some obvious issue. Like unexpected segmentation in case of
> > forwarding...
> >
> >
> 
> I did a test with current net tree on both sender and receiver
> 
> lpaa23:~# ./netperf -H 10.246.7.152
> MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> 10.246.7.152 () port 0 AF_INET
> Recv   SendSend
> Socket Socket  Message  Elapsed
> Size   SizeSize Time Throughput
> bytes  bytes   bytessecs.10^6bits/sec
> 
>  87380  16384  1638410.0026864.98
> lpaa23:~# ethtool -c eth1
> Coalesce parameters for eth1:
> Adaptive RX: on  TX: off
> stats-block-usecs: 0
> sample-interval: 0
> pkt-rate-low: 40
> pkt-rate-high: 45
> 
> rx-usecs: 16
> rx-frames: 44
> rx-usecs-irq: 0
> rx-frames-irq: 0
> 
> tx-usecs: 16
> tx-frames: 16
> tx-usecs-irq: 0
> tx-frames-irq: 256
> 
> rx-usecs-low: 0
> rx-frame-low: 0
> tx-usecs-low: 0
> tx-frame-low: 0
> 
> rx-usecs-high: 128
> rx-frame-high: 0
> tx-usecs-high: 0
> tx-frame-high: 0
> 
> lpaa23:~# ethtool -C eth1 tx-usecs 4 tx-frames 4


> lpaa23:~# ./netperf -H
> 10.246.7.152 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0
> AF_INET to
> 10.246.7.152 () port 0 AF_INET
> Recv   SendSend
> Socket Socket  Message  Elapsed
> Size   SizeSize Time Throughput
> bytes  bytes   bytessecs.10^6bits/sec
> 
>  87380  16384  1638410.0030206.27
> 



linux-next network throughput performance regression

2015-11-06 Thread Simon Xiao
I compared the network throughput performance on SLES12 bare metal servers, 
between SLES12 default kernel and latest linux-next (2015-11-05) kernel, based 
on the test results, I suspect there is a network regression exists on 
Linux-Next over the 40G Ethernet network:
a) iperf3 reports 50% performance drop with single TCP stream on latest 
linux-next;
b) iperf3 reports 10% ~ 30% performance drop with 2 to 128 TCP streams on 
latest linux-next;
Another throughput benchmarking tool (ntttcp-for-linux) test result is also 
listed at the end of the email for reference.


Server configuration:
--
Two servers (one client and one server, cross linked by 40G Ethernet), which 
have:
a) CPU: Intel(R) Xeon(R) CPU E5-2667 v3 @ 3.20GHz, 2 sockets, 16 CPUs, cache 
size : 20480 KB
b) Memory: 64 GB
c) Ethernet controller: Mellanox Technologies MT27520 Family [ConnectX-3 Pro], 
40G Ethernet, default driver


Test with iperf3:
--
iperf3: https://github.com/esnet/iperf

a) SLES12 default kernel, network throughput tested by iperf3:
Test Connections1   2   4   8   16  32  64  
128 
Throughput (G bps)  36.737.337.637.737.737.737.7
25.7

b) SLES12 + Linux-Next 20151105, network throughput tested by iperf3:
Test Connections1   2   4   8   16  32  64  
128
Throughput (G bps)  18.232.234.632.827.632.027.0
21.3
Percentage dropped  -50%-14%-8% -13%-27%-15%-28%
-17%


Test with ntttcp-for-linux:
--
ntttcp-for-linux: https://github.com/Microsoft/ntttcp-for-linux 

a) SLES12 default kernel, network throughput tested by ntttcp-for-linux:
Test Connections1   2   4   8   16  32  64  
128 256 512
Throughput (Gbps)   36.19   37.29   37.67   37.68   37.737.72   37.74   
37.76   37.81   37.9

b) SLES12 + Linux-Next 20151105, network throughput tested by ntttcp-for-linux:
Test Connections1   2   4   8   16  32  64  
128 256 512 
Throughput (Gbps)   28.12   34.01   37.636.53   32.94   33.07   33.63   
33.44   33.83   34.42   
Percentage dropped  -22%-9% 0%  -3% -13%-12%-11%
-11%-11%-9%
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH net-next,v2,1/1] hv_netvsc: introduce netif-msg into netvsc module

2015-04-24 Thread Simon Xiao

 -Original Message-
 From: Joe Perches [mailto:j...@perches.com]
 Sent: Friday, April 24, 2015 1:29 PM
 To: Simon Xiao
 Cc: KY Srinivasan; Haiyang Zhang; de...@linuxdriverproject.org;
 netdev@vger.kernel.org; linux-ker...@vger.kernel.org
 Subject: Re: [PATCH net-next,v2,1/1] hv_netvsc: introduce netif-msg into
 netvsc module
 
 On Fri, 2015-04-24 at 11:34 -0700, six...@microsoft.com wrote:
  From: Simon Xiao six...@microsoft.com
 
  1. Introduce netif-msg to netvsc to control debug logging output and
  keep msg_enable in netvsc_device_context so that it is kept
  persistently.
  2. Only call dump_rndis_message() when NETIF_MSG_RX_ERR or above is
  specified in netvsc module debug param.
  In non-debug mode, in current code, dump_rndis_message() will not
 dump
  anything but it still initialize some local variables and process the
  switch logic which is unnecessary, especially in high network
  throughput situation.
 
 []
 
  diff --git a/drivers/net/hyperv/netvsc_drv.c
  b/drivers/net/hyperv/netvsc_drv.c
 []
  @@ -888,6 +891,11 @@ static int netvsc_probe(struct hv_device *dev,
 
  net_device_ctx = netdev_priv(net);
  net_device_ctx-device_ctx = dev;
  +   net_device_ctx-msg_enable = netif_msg_init(debug, default_msg);
  +   if (netif_msg_probe(net_device_ctx))
  +   netdev_dbg(net, netvsc msg_enable: %d,
  +  net_device_ctx-msg_enable);
 
 Please use newlines to terminate formats.
 
 It helps prevent log content interleaving when multiple processes are
 emitting output at the same time.
 
 This could be shortened to use netif_level like:
 
   netif_dbg(net_device_ctx, probe, net, netvsc_msg_enable: %d\n,
 net_device_ctx-msg_enable);
 

Thanks Joe. I would like to leave this to my next patch as there are some 
places else in netvsc (rndis_filter.c) 
have the same usage. I would like to fix them in one patch to make them 
consistent.

Thanks,
Simon 


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/1] v2: Driver: hv: netvsc: call dump_rndis_message() only in netvsc debug mode

2015-04-21 Thread Simon Xiao
Sorry - this patch should be sent to net-next so please ignore it. 

Thanks,
Simon

-Original Message-
From: six...@microsoft.com [mailto:six...@microsoft.com] 
Sent: Tuesday, April 21, 2015 2:44 PM
To: KY Srinivasan; Haiyang Zhang; netdev@vger.kernel.org; 
linux-ker...@vger.kernel.org
Cc: Simon Xiao
Subject: [PATCH 1/1] v2: Driver: hv: netvsc: call dump_rndis_message() only in 
netvsc debug mode

From: Simon Xiao six...@microsoft.com

Signed-off-by: Simon Xiao six...@microsoft.com
---
 drivers/net/hyperv/hyperv_net.h   | 3 +++
 drivers/net/hyperv/netvsc_drv.c   | 8 
 drivers/net/hyperv/rndis_filter.c | 3 ++-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h 
index a10b316..c9be35e 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -28,6 +28,9 @@
 #include linux/hyperv.h
 #include linux/rndis.h
 
+/* flag for netvsc debug mode */
+extern int debug_mode;
+
 /* RSS related */
 #define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203  /* query only */  
#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204  /* query and set */ diff 
--git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 
a3a9d38..7c41864 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -52,6 +52,10 @@ static int ring_size = 128;  module_param(ring_size, int, 
S_IRUGO);  MODULE_PARM_DESC(ring_size, Ring buffer size (# of pages));
 
+int debug_mode = 0;
+module_param(debug_mode, int, S_IRUGO); MODULE_PARM_DESC(debug_mode, 
+debug mode: zero(0) for non-debug mode; non-zero for debug mode);
+
 static void do_set_multicast(struct work_struct *w)  {
struct net_device_context *ndevctx =
@@ -999,6 +1003,10 @@ static int __init netvsc_drv_init(void)
pr_info(Increased ring_size to %d (min allowed)\n,
ring_size);
}
+
+   if (debug_mode != 0)
+   pr_info(Run netvsc in debug mode);
+
return vmbus_driver_register(netvsc_drv);
 }
 
diff --git a/drivers/net/hyperv/rndis_filter.c 
b/drivers/net/hyperv/rndis_filter.c
index 0d92efe..a3f43f6 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -429,7 +429,8 @@ int rndis_filter_receive(struct hv_device *dev,
 
rndis_msg = pkt-data;
 
-   dump_rndis_message(dev, rndis_msg);
+   if (debug_mode != 0)
+   dump_rndis_message(dev, rndis_msg);
 
switch (rndis_msg-ndis_msg_type) {
case RNDIS_MSG_PACKET:
--
1.8.5.2

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH net-next,1/1] hv_netvsc: call dump_rndis_message() only in netvsc debug mode

2015-04-21 Thread Simon Xiao

 -Original Message-
 From: David Miller [mailto:da...@davemloft.net]
 Sent: Tuesday, April 21, 2015 2:49 PM
 To: Simon Xiao
 Cc: KY Srinivasan; Haiyang Zhang; de...@linuxdriverproject.org;
 netdev@vger.kernel.org; linux-ker...@vger.kernel.org
 Subject: Re: [PATCH net-next,1/1] hv_netvsc: call dump_rndis_message() only in
 netvsc debug mode
 
 From: six...@microsoft.com
 Date: Tue, 21 Apr 2015 15:58:05 -0700
 
  From: Simon Xiao six...@microsoft.com
 
  Signed-off-by: Simon Xiao six...@microsoft.com
  Reviewed-by: K. Y. Srinivasan k...@microsoft.com
  Reviewed-by: Haiyang Zhang haiya...@microsoft.com
 
 I just gave you feedback on this patch in response to your original 
 submission,
 do not ignore it.

Thanks for your feedback, David.

In current netvsc driver, for each packet received, it will call 
dump_rndis_message() 
to try to dump the rndis packet information by netdev_dbg(). 
In non-debug mode, dump_rndis_message() will not dump anything 
but it still initialize some local variables and process the switch logic in 
the function 
of dump_rndis_message(), which is unnecessary, especially in high network 
throughput situation.

My change is to have a run-time config flag to control the execution of 
dump_rndis_message() 
and avoid above unnecessary cost in non-debug mode.
In the default case, it will be non-debug mode,
 and rndis_filter_receive() will not call dump_rndis_message() 
which saves the above extra cost for each packet received.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html