Author: sephe
Date: Fri Mar  4 06:52:11 2016
New Revision: 296379
URL: https://svnweb.freebsd.org/changeset/base/296379

Log:
  hyperv/hn: Add multiple channel support, a.k.a. vRSS
  
  Each channel contains one RX ring and one TX ring.  And we
  try to distribute the channels to different evenly.
  
  Note: Currently we don't have enough information to extract
  the RSS type and RSS hash value from the received packets.
  
  This greatly improves the TX/RX performance for 8 virtual CPU
  Hyper-V over 10Ge: it can max out 10Ge for TCP when multiple
  RX/TX rings are enabled.
  
  This almost doubles the TX/RX performance for locally connected
  Hyper-Vs: was 6Gbps w/ 128 TCP streams, now 11Gbps w/ multiple
  RX/TX rings enabled.
  
  It is not enabled by default; it will be switched on after more
  tests.
  
  Collaborated with:    Hongjiang Zhang <honzhan microsoft com>
  MFC after:    2 week
  Sponsored by: Microsoft OSTC

Modified:
  head/sys/dev/hyperv/include/hyperv.h
  head/sys/dev/hyperv/netvsc/hv_net_vsc.c
  head/sys/dev/hyperv/netvsc/hv_net_vsc.h
  head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
  head/sys/dev/hyperv/netvsc/hv_rndis.h
  head/sys/dev/hyperv/netvsc/hv_rndis_filter.c
  head/sys/dev/hyperv/netvsc/hv_rndis_filter.h
  head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c

Modified: head/sys/dev/hyperv/include/hyperv.h
==============================================================================
--- head/sys/dev/hyperv/include/hyperv.h        Fri Mar  4 05:36:53 2016        
(r296378)
+++ head/sys/dev/hyperv/include/hyperv.h        Fri Mar  4 06:52:11 2016        
(r296379)
@@ -911,6 +911,8 @@ int         hv_vmbus_channel_teardown_gpdal(
 
 struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel 
*promary);
 
+void           vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu);
+
 /**
  * @brief Get physical address from virtual
  */

Modified: head/sys/dev/hyperv/netvsc/hv_net_vsc.c
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_net_vsc.c     Fri Mar  4 05:36:53 2016        
(r296378)
+++ head/sys/dev/hyperv/netvsc/hv_net_vsc.c     Fri Mar  4 06:52:11 2016        
(r296379)
@@ -57,7 +57,7 @@ MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper
 /*
  * Forward declarations
  */
-static void hv_nv_on_channel_callback(void *context);
+static void hv_nv_on_channel_callback(void *xchan);
 static int  hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
 static int  hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
 static int  hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
@@ -662,6 +662,34 @@ hv_nv_disconnect_from_vsp(netvsc_dev *ne
 }
 
 /*
+ * Callback handler for subchannel offer
+ * @@param context new subchannel
+ */
+static void
+hv_nv_subchan_callback(void *xchan)
+{
+       struct hv_vmbus_channel *chan = xchan;
+       netvsc_dev *net_dev;
+       uint16_t chn_index = chan->offer_msg.offer.sub_channel_index;
+       struct hv_device *device = chan->device;
+       hn_softc_t *sc = device_get_softc(device->device);
+       int ret;
+
+       net_dev = sc->net_dev;
+
+       if (chn_index >= net_dev->num_channel) {
+               /* Would this ever happen? */
+               return;
+       }
+       netvsc_subchan_callback(sc, chan);
+
+       chan->hv_chan_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK);
+       ret = hv_vmbus_channel_open(chan, NETVSC_DEVICE_RING_BUFFER_SIZE,
+           NETVSC_DEVICE_RING_BUFFER_SIZE, NULL, 0,
+           hv_nv_on_channel_callback, chan);
+}
+
+/*
  * Net VSC on device add
  * 
  * Callback when the device belonging to this driver is added
@@ -693,6 +721,7 @@ hv_nv_on_device_add(struct hv_device *de
                free(chan->hv_chan_rdbuf, M_NETVSC);
                goto cleanup;
        }
+       chan->sc_creation_callback = hv_nv_subchan_callback;
 
        /*
         * Connect with the NetVsp
@@ -770,7 +799,9 @@ hv_nv_on_send_completion(netvsc_dev *net
                || nvsp_msg_pkt->hdr.msg_type
                        == nvsp_msg_1_type_send_rx_buf_complete
                || nvsp_msg_pkt->hdr.msg_type
-                       == nvsp_msg_1_type_send_send_buf_complete) {
+                       == nvsp_msg_1_type_send_send_buf_complete
+               || nvsp_msg_pkt->hdr.msg_type
+                       == nvsp_msg5_type_subchannel) {
                /* Copy the response back */
                memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
                    sizeof(nvsp_msg));
@@ -964,6 +995,46 @@ retry_send_cmplt:
 }
 
 /*
+ * Net VSC receiving vRSS send table from VSP
+ */
+static void
+hv_nv_send_table(struct hv_device *device, hv_vm_packet_descriptor *pkt)
+{
+       netvsc_dev *net_dev;
+       nvsp_msg *nvsp_msg_pkt;
+       int i;
+       uint32_t count, *table;
+
+       net_dev = hv_nv_get_inbound_net_device(device);
+       if (!net_dev)
+               return;
+
+       nvsp_msg_pkt =
+           (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
+
+       if (nvsp_msg_pkt->hdr.msg_type !=
+           nvsp_msg5_type_send_indirection_table) {
+               printf("Netvsc: !Warning! receive msg type not "
+                       "send_indirection_table. type = %d\n",
+                       nvsp_msg_pkt->hdr.msg_type);
+               return;
+       }
+
+       count = nvsp_msg_pkt->msgs.vers_5_msgs.send_table.count;
+       if (count != VRSS_SEND_TABLE_SIZE) {
+               printf("Netvsc: Received wrong send table size: %u\n", count);
+               return;
+       }
+
+       table = (uint32_t *)
+           ((unsigned long)&nvsp_msg_pkt->msgs.vers_5_msgs.send_table +
+            nvsp_msg_pkt->msgs.vers_5_msgs.send_table.offset);
+
+       for (i = 0; i < count; i++)
+               net_dev->vrss_send_table[i] = table[i];
+}
+
+/*
  * Net VSC on channel callback
  */
 static void
@@ -999,6 +1070,9 @@ hv_nv_on_channel_callback(void *xchan)
                                case 
HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
                                        hv_nv_on_receive(net_dev, device, chan, 
desc);
                                        break;
+                               case HV_VMBUS_PACKET_TYPE_DATA_IN_BAND:
+                                       hv_nv_send_table(device, desc);
+                                       break;
                                default:
                                        device_printf(dev,
                                            "hv_cb recv unknow type %d "

Modified: head/sys/dev/hyperv/netvsc/hv_net_vsc.h
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_net_vsc.h     Fri Mar  4 05:36:53 2016        
(r296378)
+++ head/sys/dev/hyperv/netvsc/hv_net_vsc.h     Fri Mar  4 06:52:11 2016        
(r296379)
@@ -86,6 +86,92 @@ MALLOC_DECLARE(M_NETVSC);
  */
 #define NVSP_MAX_PACKETS_PER_RECEIVE            375
 
+/* vRSS stuff */
+#define RNDIS_OBJECT_TYPE_RSS_CAPABILITIES      0x88
+#define RNDIS_OBJECT_TYPE_RSS_PARAMETERS        0x89
+
+#define RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2     2
+#define RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2       2
+
+struct rndis_obj_header {
+        uint8_t type;
+        uint8_t rev;
+        uint16_t size;
+} __packed;
+
+/* rndis_recv_scale_cap/cap_flag */
+#define RNDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS      0x01000000
+#define RNDIS_RSS_CAPS_CLASSIFICATION_AT_ISR            0x02000000
+#define RNDIS_RSS_CAPS_CLASSIFICATION_AT_DPC            0x04000000
+#define RNDIS_RSS_CAPS_USING_MSI_X                      0x08000000
+#define RNDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS           0x10000000
+#define RNDIS_RSS_CAPS_SUPPORTS_MSI_X                   0x20000000
+#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4               0x00000100
+#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6               0x00000200
+#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX            0x00000400
+
+/* RNDIS_RECEIVE_SCALE_CAPABILITIES */
+struct rndis_recv_scale_cap {
+        struct rndis_obj_header hdr;
+        uint32_t cap_flag;
+        uint32_t num_int_msg;
+        uint32_t num_recv_que;
+        uint16_t num_indirect_tabent;
+} __packed;
+
+/* rndis_recv_scale_param flags */
+#define RNDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED         0x0001
+#define RNDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED        0x0002
+#define RNDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED           0x0004
+#define RNDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED         0x0008
+#define RNDIS_RSS_PARAM_FLAG_DISABLE_RSS                0x0010
+
+/* Hash info bits */
+#define RNDIS_HASH_FUNC_TOEPLITZ                0x00000001
+#define RNDIS_HASH_IPV4                         0x00000100
+#define RNDIS_HASH_TCP_IPV4                     0x00000200
+#define RNDIS_HASH_IPV6                         0x00000400
+#define RNDIS_HASH_IPV6_EX                      0x00000800
+#define RNDIS_HASH_TCP_IPV6                     0x00001000
+#define RNDIS_HASH_TCP_IPV6_EX                  0x00002000
+
+#define RNDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
+#define RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40
+
+#define ITAB_NUM                                        128
+#define HASH_KEYLEN RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
+
+/* RNDIS_RECEIVE_SCALE_PARAMETERS */
+typedef struct rndis_recv_scale_param_ {
+        struct rndis_obj_header hdr;
+
+        /* Qualifies the rest of the information */
+        uint16_t flag;
+
+        /* The base CPU number to do receive processing. not used */
+        uint16_t base_cpu_number;
+
+        /* This describes the hash function and type being enabled */
+        uint32_t hashinfo;
+
+        /* The size of indirection table array */
+        uint16_t indirect_tabsize;
+
+        /* The offset of the indirection table from the beginning of this
+         * structure
+         */
+        uint32_t indirect_taboffset;
+
+        /* The size of the hash secret key */
+        uint16_t hashkey_size;
+
+        /* The offset of the secret key from the beginning of this structure */
+        uint32_t hashkey_offset;
+
+        uint32_t processor_masks_offset;
+        uint32_t num_processor_masks;
+        uint32_t processor_masks_entry_size;
+} rndis_recv_scale_param;
 
 typedef enum nvsp_msg_type_ {
        nvsp_msg_type_none                      = 0,
@@ -146,6 +232,27 @@ typedef enum nvsp_msg_type_ {
 
        nvsp_msg_2_type_alloc_chimney_handle,
        nvsp_msg_2_type_alloc_chimney_handle_complete,
+
+       nvsp_msg2_max = nvsp_msg_2_type_alloc_chimney_handle_complete,
+
+       /*
+        * Version 4 Messages
+        */
+       nvsp_msg4_type_send_vf_association,
+       nvsp_msg4_type_switch_data_path,
+       nvsp_msg4_type_uplink_connect_state_deprecated,
+
+       nvsp_msg4_max = nvsp_msg4_type_uplink_connect_state_deprecated,
+
+       /*
+        * Version 5 Messages
+        */
+       nvsp_msg5_type_oid_query_ex,
+       nvsp_msg5_type_oid_query_ex_comp,
+       nvsp_msg5_type_subchannel,
+       nvsp_msg5_type_send_indirection_table,
+
+       nvsp_msg5_max = nvsp_msg5_type_send_indirection_table,
 } nvsp_msg_type;
 
 typedef enum nvsp_status_ {
@@ -793,6 +900,39 @@ typedef struct nvsp_2_msg_send_vmq_rndis
        uint32_t                                status;
 } __packed nvsp_2_msg_send_vmq_rndis_pkt_complete;
 
+/*
+ * Version 5 messages
+ */
+enum nvsp_subchannel_operation {
+        NVSP_SUBCHANNEL_NONE = 0,
+        NVSP_SUBCHANNE_ALLOCATE,
+        NVSP_SUBCHANNE_MAX
+};
+
+typedef struct nvsp_5_subchannel_request_
+{
+        uint32_t                                op;
+        uint32_t                                num_subchannels;
+} __packed nvsp_5_subchannel_request;
+
+typedef struct nvsp_5_subchannel_complete_
+{
+        uint32_t                                status;
+        /* Actual number of subchannels allocated */
+        uint32_t                                num_subchannels;
+} __packed nvsp_5_subchannel_complete;
+
+typedef struct nvsp_5_send_indirect_table_
+{
+        /* The number of entries in the send indirection table */
+        uint32_t                                count;
+        /*
+         * The offset of the send indireciton table from top of
+         * this struct. The send indirection table tells which channel
+         * to put the send traffic on. Each entry is a channel number.
+         */
+        uint32_t                                offset;
+} __packed nvsp_5_send_indirect_table;
 
 typedef union nvsp_1_msg_uber_ {
        nvsp_1_msg_send_ndis_version            send_ndis_vers;
@@ -838,11 +978,18 @@ typedef union nvsp_2_msg_uber_ {
        nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete;
 } __packed nvsp_2_msg_uber;
 
+typedef union nvsp_5_msg_uber_
+{
+        nvsp_5_subchannel_request               subchannel_request;
+        nvsp_5_subchannel_complete              subchn_complete;
+        nvsp_5_send_indirect_table              send_table;
+} __packed nvsp_5_msg_uber;
 
 typedef union nvsp_all_msgs_ {
        nvsp_msg_init_uber                      init_msgs;
        nvsp_1_msg_uber                         vers_1_msgs;
        nvsp_2_msg_uber                         vers_2_msgs;
+       nvsp_5_msg_uber                         vers_5_msgs;
 } __packed nvsp_all_msgs;
 
 /*
@@ -883,6 +1030,7 @@ typedef struct nvsp_msg_ {
 #define NETVSC_MAX_CONFIGURABLE_MTU            (9 * 1024)
 
 #define NETVSC_PACKET_SIZE                     PAGE_SIZE
+#define VRSS_SEND_TABLE_SIZE                   16
 
 /*
  * Data types
@@ -923,6 +1071,10 @@ typedef struct netvsc_dev_ {
        hv_bool_uint8_t                         destroy;
        /* Negotiated NVSP version */
        uint32_t                                nvsp_version;
+
+       uint32_t                                num_channel;
+
+       uint32_t                                
vrss_send_table[VRSS_SEND_TABLE_SIZE];
 } netvsc_dev;
 
 
@@ -1010,6 +1162,10 @@ struct hn_rx_ring {
        u_long          hn_csum_trusted;
        u_long          hn_lro_tried;
        u_long          hn_small_pkts;
+       u_long          hn_pkts;
+
+       /* Rarely used stuffs */
+       struct sysctl_oid *hn_rx_sysctl_tree;
 } __aligned(CACHE_LINE_SIZE);
 
 #define HN_TRUST_HCSUM_IP      0x0001
@@ -1084,9 +1240,12 @@ typedef struct hn_softc {
        int             hn_tx_ring_cnt;
        int             hn_tx_ring_inuse;
        struct hn_tx_ring *hn_tx_ring;
+
+       int             hn_cpu;
        int             hn_tx_chimney_max;
        struct taskqueue *hn_tx_taskq;
        struct sysctl_oid *hn_tx_sysctl_tree;
+       struct sysctl_oid *hn_rx_sysctl_tree;
 } hn_softc_t;
 
 /*

Modified: head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c  Fri Mar  4 05:36:53 
2016        (r296378)
+++ head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c  Fri Mar  4 06:52:11 
2016        (r296379)
@@ -287,6 +287,8 @@ static int hn_single_tx_ring = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, single_tx_ring, CTLFLAG_RDTUN,
     &hn_single_tx_ring, 0, "Use one TX ring");
 
+static u_int hn_cpu_index;
+
 /*
  * Forward declarations
  */
@@ -438,6 +440,7 @@ netvsc_attach(device_t dev)
        ring_cnt = hn_ring_cnt;
        if (ring_cnt <= 0 || ring_cnt >= mp_ncpus)
                ring_cnt = mp_ncpus;
+       sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
 
        tx_ring_cnt = ring_cnt;
        if (hn_single_tx_ring || hn_use_if_start) {
@@ -461,6 +464,7 @@ netvsc_attach(device_t dev)
        chan->hv_chan_rxr = &sc->hn_rx_ring[0];
        chan->hv_chan_txr = &sc->hn_tx_ring[0];
        sc->hn_tx_ring[0].hn_chan = chan;
+       vmbus_channel_cpu_set(chan, sc->hn_cpu);
 
        if_initname(ifp, device_get_name(dev), device_get_unit(dev));
        ifp->if_dunit = unit;
@@ -502,10 +506,17 @@ netvsc_attach(device_t dev)
        error = hv_rf_on_device_add(device_ctx, &device_info, ring_cnt);
        if (error)
                goto failed;
+       KASSERT(sc->net_dev->num_channel <= ring_cnt,
+           ("invalid channel count %u, should be less than %d",
+            sc->net_dev->num_channel, ring_cnt));
 
-       /* TODO: vRSS */
-       sc->hn_tx_ring_inuse = 1;
-       sc->hn_rx_ring_inuse = 1;
+       /*
+        * Set # of TX/RX rings that could be used according to
+        * the # of channels that host offered.
+        */
+       if (sc->hn_tx_ring_inuse > sc->net_dev->num_channel)
+               sc->hn_tx_ring_inuse = sc->net_dev->num_channel;
+       sc->hn_rx_ring_inuse = sc->net_dev->num_channel;
        device_printf(dev, "%d TX ring, %d RX ring\n",
            sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
 
@@ -1337,6 +1348,7 @@ skip:
         */
 
        if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+       rxr->hn_pkts++;
 
        if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
 #if defined(INET) || defined(INET6)
@@ -2074,6 +2086,13 @@ hn_create_rx_data(struct hn_softc *sc, i
 #endif
 #endif /* INET || INET6 */
 
+       ctx = device_get_sysctl_ctx(dev);
+       child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+       /* Create dev.hn.UNIT.rx sysctl tree */
+       sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
+           CTLFLAG_RD, 0, "");
+
        for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
                struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
@@ -2101,10 +2120,27 @@ hn_create_rx_data(struct hn_softc *sc, i
                rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
 #endif
 #endif /* INET || INET6 */
-       }
 
-       ctx = device_get_sysctl_ctx(dev);
-       child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+               if (sc->hn_rx_sysctl_tree != NULL) {
+                       char name[16];
+
+                       /*
+                        * Create per RX ring sysctl tree:
+                        * dev.hn.UNIT.rx.RINGID
+                        */
+                       snprintf(name, sizeof(name), "%d", i);
+                       rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
+                           SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
+                           OID_AUTO, name, CTLFLAG_RD, 0, "");
+
+                       if (rxr->hn_rx_sysctl_tree != NULL) {
+                               SYSCTL_ADD_ULONG(ctx,
+                                   SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
+                                   OID_AUTO, "packets", CTLFLAG_RW,
+                                   &rxr->hn_pkts, "# of packets received");
+                       }
+               }
+       }
 
        SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
            CTLTYPE_U64 | CTLFLAG_RW, sc,
@@ -2724,6 +2760,32 @@ hn_xmit_txeof_taskfunc(void *xtxr, int p
        mtx_unlock(&txr->hn_tx_lock);
 }
 
+void
+netvsc_subchan_callback(struct hn_softc *sc, struct hv_vmbus_channel *chan)
+{
+       int idx;
+
+       KASSERT(!HV_VMBUS_CHAN_ISPRIMARY(chan),
+           ("subchannel callback on primary channel"));
+
+       idx = chan->offer_msg.offer.sub_channel_index;
+       KASSERT(idx > 0 && idx < sc->hn_rx_ring_inuse,
+           ("invalid channel index %d, should > 0 && < %d",
+            idx, sc->hn_rx_ring_inuse));
+       vmbus_channel_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
+
+       chan->hv_chan_rxr = &sc->hn_rx_ring[idx];
+       if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n",
+           idx, chan->offer_msg.child_rel_id);
+
+       if (idx < sc->hn_tx_ring_inuse) {
+               chan->hv_chan_txr = &sc->hn_tx_ring[idx];
+               sc->hn_tx_ring[idx].hn_chan = chan;
+               if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n",
+                   idx, chan->offer_msg.child_rel_id);
+       }
+}
+
 static void
 hn_tx_taskq_create(void *arg __unused)
 {

Modified: head/sys/dev/hyperv/netvsc/hv_rndis.h
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis.h       Fri Mar  4 05:36:53 2016        
(r296378)
+++ head/sys/dev/hyperv/netvsc/hv_rndis.h       Fri Mar  4 06:52:11 2016        
(r296379)
@@ -167,6 +167,14 @@
 #define RNDIS_OID_GEN_MACHINE_NAME                      0x0001021A
 #define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER            0x0001021B
 
+/*
+ * For receive side scale
+ */
+/* Query only */
+#define RNDIS_OID_GEN_RSS_CAPABILITIES                 0x00010203
+/* Query and set */
+#define RNDIS_OID_GEN_RSS_PARAMETERS                   0x00010204
+
 #define RNDIS_OID_GEN_XMIT_OK                           0x00020101
 #define RNDIS_OID_GEN_RCV_OK                            0x00020102
 #define RNDIS_OID_GEN_XMIT_ERROR                        0x00020103
@@ -1060,6 +1068,8 @@ struct hv_vmbus_channel;
 int netvsc_recv(struct hv_vmbus_channel *chan,
     netvsc_packet *packet, rndis_tcp_ip_csum_info *csum_info);
 void netvsc_channel_rollup(struct hv_vmbus_channel *chan);
+void netvsc_subchan_callback(struct hn_softc *sc,
+    struct hv_vmbus_channel *chan);
 
 void* hv_set_rppi_data(rndis_msg *rndis_mesg,
     uint32_t rppi_size,

Modified: head/sys/dev/hyperv/netvsc/hv_rndis_filter.c
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis_filter.c        Fri Mar  4 05:36:53 
2016        (r296378)
+++ head/sys/dev/hyperv/netvsc/hv_rndis_filter.c        Fri Mar  4 06:52:11 
2016        (r296379)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/pmap.h>
 
 #include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
 #include "hv_net_vsc.h"
 #include "hv_rndis.h"
 #include "hv_rndis_filter.h"
@@ -223,6 +224,8 @@ hv_rf_send_request(rndis_device *device,
 {
        int ret;
        netvsc_packet *packet;
+       netvsc_dev      *net_dev = device->net_dev;
+       int send_buf_section_idx;
 
        /* Set up the packet to send it */
        packet = &request->pkt;
@@ -237,6 +240,20 @@ hv_rf_send_request(rndis_device *device,
        packet->page_buffers[0].offset =
            (unsigned long)&request->request_msg & (PAGE_SIZE - 1);
 
+       if (packet->page_buffers[0].offset +
+               packet->page_buffers[0].length > PAGE_SIZE) {
+               packet->page_buf_count = 2;
+               packet->page_buffers[0].length =
+                       PAGE_SIZE - packet->page_buffers[0].offset;
+               packet->page_buffers[1].pfn =
+                       hv_get_phys_addr((char*)&request->request_msg +
+                               packet->page_buffers[0].length) >> PAGE_SHIFT;
+               packet->page_buffers[1].offset = 0;
+               packet->page_buffers[1].length =
+                       request->request_msg.msg_len -
+                               packet->page_buffers[0].length;
+       }
+
        packet->compl.send.send_completion_context = request; /* packet */
        if (message_type != REMOTE_NDIS_HALT_MSG) {
                packet->compl.send.on_send_completion =
@@ -246,10 +263,25 @@ hv_rf_send_request(rndis_device *device,
                    hv_rf_on_send_request_halt_completion;
        }
        packet->compl.send.send_completion_tid = (unsigned long)device;
-       packet->send_buf_section_idx =
-           NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
+       if (packet->tot_data_buf_len < net_dev->send_section_size) {
+               send_buf_section_idx = hv_nv_get_next_send_section(net_dev);
+               if (send_buf_section_idx !=
+                       NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
+                       char *dest = ((char *)net_dev->send_buf +
+                               send_buf_section_idx * 
net_dev->send_section_size);
+
+                       memcpy(dest, &request->request_msg, 
request->request_msg.msg_len);
+                       packet->send_buf_section_idx = send_buf_section_idx;
+                       packet->send_buf_section_size = 
packet->tot_data_buf_len;
+                       packet->page_buf_count = 0;
+                       goto sendit;
+               }
+               /* Failed to allocate chimney send buffer; move on */
+       }
+       packet->send_buf_section_idx = 
NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
        packet->send_buf_section_size = 0;
 
+sendit:
        ret = hv_nv_on_send(device->net_dev->dev->channel, packet);
 
        return (ret);
@@ -527,6 +559,19 @@ hv_rf_query_device(rndis_device *device,
        query->info_buffer_length = 0;
        query->device_vc_handle = 0;
 
+       if (oid == RNDIS_OID_GEN_RSS_CAPABILITIES) {
+               struct rndis_recv_scale_cap *cap;
+
+               request->request_msg.msg_len += 
+                       sizeof(struct rndis_recv_scale_cap);
+               query->info_buffer_length = sizeof(struct rndis_recv_scale_cap);
+               cap = (struct rndis_recv_scale_cap *)((unsigned long)query + 
+                                               query->info_buffer_offset);
+               cap->hdr.type = RNDIS_OBJECT_TYPE_RSS_CAPABILITIES;
+               cap->hdr.rev = RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
+               cap->hdr.size = sizeof(struct rndis_recv_scale_cap);
+       }
+
        ret = hv_rf_send_request(device, request, REMOTE_NDIS_QUERY_MSG);
        if (ret != 0) {
                /* Fixme:  printf added */
@@ -581,6 +626,114 @@ hv_rf_query_device_link_status(rndis_dev
            RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &device->link_status, &size));
 }
 
+static uint8_t netvsc_hash_key[HASH_KEYLEN] = {
+       0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+       0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+       0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+       0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+       0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
+};
+
+/*
+ * RNDIS set vRSS parameters
+ */
+static int
+hv_rf_set_rss_param(rndis_device *device, int num_queue)
+{
+       rndis_request *request;
+       rndis_set_request *set;
+       rndis_set_complete *set_complete;
+       rndis_recv_scale_param *rssp;
+       uint32_t extlen = sizeof(rndis_recv_scale_param) +
+           (4 * ITAB_NUM) + HASH_KEYLEN;
+       uint32_t *itab, status;
+       uint8_t *keyp;
+       int i, ret;
+
+
+       request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
+           RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen);
+       if (request == NULL) {
+               if (bootverbose)
+                       printf("Netvsc: No memory to set vRSS parameters.\n");
+               ret = -1;
+               goto cleanup;
+       }
+
+       set = &request->request_msg.msg.set_request;
+       set->oid = RNDIS_OID_GEN_RSS_PARAMETERS;
+       set->info_buffer_length = extlen;
+       set->info_buffer_offset = sizeof(rndis_set_request);
+       set->device_vc_handle = 0;
+
+       /* Fill out the rssp parameter structure */
+       rssp = (rndis_recv_scale_param *)(set + 1);
+       rssp->hdr.type = RNDIS_OBJECT_TYPE_RSS_PARAMETERS;
+       rssp->hdr.rev = RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
+       rssp->hdr.size = sizeof(rndis_recv_scale_param);
+       rssp->flag = 0;
+       rssp->hashinfo = RNDIS_HASH_FUNC_TOEPLITZ | RNDIS_HASH_IPV4 |
+           RNDIS_HASH_TCP_IPV4 | RNDIS_HASH_IPV6 | RNDIS_HASH_TCP_IPV6;
+       rssp->indirect_tabsize = 4 * ITAB_NUM;
+       rssp->indirect_taboffset = sizeof(rndis_recv_scale_param);
+       rssp->hashkey_size = HASH_KEYLEN;
+       rssp->hashkey_offset = rssp->indirect_taboffset +
+           rssp->indirect_tabsize;
+
+       /* Set indirection table entries */
+       itab = (uint32_t *)(rssp + 1);
+       for (i = 0; i < ITAB_NUM; i++)
+               itab[i] = i % num_queue;
+
+       /* Set hash key values */
+       keyp = (uint8_t *)((unsigned long)rssp + rssp->hashkey_offset);
+       for (i = 0; i < HASH_KEYLEN; i++)
+               keyp[i] = netvsc_hash_key[i];
+
+       ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
+       if (ret != 0) {
+               goto cleanup;
+       }
+
+       /*
+        * Wait for the response from the host.  Another thread will signal
+        * us when the response has arrived.  In the failure case,
+        * sema_timedwait() returns a non-zero status after waiting 5 seconds.
+        */
+       ret = sema_timedwait(&request->wait_sema, 5 * hz);
+       if (ret == 0) {
+               /* Response received, check status */
+               set_complete = &request->response_msg.msg.set_complete;
+               status = set_complete->status;
+               if (status != RNDIS_STATUS_SUCCESS) {
+                       /* Bad response status, return error */
+                       if (bootverbose)
+                               printf("Netvsc: Failed to set vRSS "
+                                   "parameters.\n");
+                       ret = -2;
+               } else {
+                       if (bootverbose)
+                               printf("Netvsc: Successfully set vRSS "
+                                   "parameters.\n");
+               }
+       } else {
+               /*
+                * We cannot deallocate the request since we may still
+                * receive a send completion for it.
+                */
+               printf("Netvsc: vRSS set timeout, id = %u, ret = %d\n",
+                   request->request_msg.msg.init_request.request_id, ret);
+               goto exit;
+       }
+
+cleanup:
+       if (request != NULL) {
+               hv_put_rndis_request(device, request);
+       }
+exit:
+       return (ret);
+}
+
 /*
  * RNDIS filter set packet filter
  * Sends an rndis request with the new filter, then waits for a response
@@ -816,12 +969,15 @@ hv_rf_close_device(rndis_device *device)
  */
 int
 hv_rf_on_device_add(struct hv_device *device, void *additl_info,
-    int nchan __unused)
+    int nchan)
 {
        int ret;
        netvsc_dev *net_dev;
        rndis_device *rndis_dev;
+       nvsp_msg *init_pkt;
        rndis_offload_params offloads;
+       struct rndis_recv_scale_cap rsscaps;
+       uint32_t rsscaps_size = sizeof(struct rndis_recv_scale_cap);
        netvsc_device_info *dev_info = (netvsc_device_info *)additl_info;
        device_t dev = device->device;
 
@@ -887,6 +1043,67 @@ hv_rf_on_device_add(struct hv_device *de
        
        dev_info->link_state = rndis_dev->link_status;
 
+       net_dev->num_channel = 1;
+       if (net_dev->nvsp_version < NVSP_PROTOCOL_VERSION_5 || nchan == 1)
+               return (0);
+
+       memset(&rsscaps, 0, rsscaps_size);
+       ret = hv_rf_query_device(rndis_dev,
+                       RNDIS_OID_GEN_RSS_CAPABILITIES,
+                       &rsscaps, &rsscaps_size);
+       if ((ret != 0) || (rsscaps.num_recv_que < 2)) {
+               device_printf(dev, "hv_rf_query_device failed or "
+                       "rsscaps.num_recv_que < 2 \n");
+               goto out;
+       }
+       device_printf(dev, "channel, offered %u, requested %d\n",
+           rsscaps.num_recv_que, nchan);
+       if (nchan > rsscaps.num_recv_que)
+               nchan = rsscaps.num_recv_que;
+       net_dev->num_channel = nchan;
+
+       if (net_dev->num_channel == 1) {
+               device_printf(dev, "net_dev->num_channel == 1 under VRSS\n");
+               goto out;
+       }
+       
+       /* request host to create sub channels */
+       init_pkt = &net_dev->channel_init_packet;
+       memset(init_pkt, 0, sizeof(nvsp_msg));
+
+       init_pkt->hdr.msg_type = nvsp_msg5_type_subchannel;
+       init_pkt->msgs.vers_5_msgs.subchannel_request.op =
+           NVSP_SUBCHANNE_ALLOCATE;
+       init_pkt->msgs.vers_5_msgs.subchannel_request.num_subchannels =
+           net_dev->num_channel - 1;
+
+       ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
+           sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
+           HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+           HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+       if (ret != 0) {
+               device_printf(dev, "Fail to allocate subchannel\n");
+               goto out;
+       }
+
+       sema_wait(&net_dev->channel_init_sema);
+
+       if (init_pkt->msgs.vers_5_msgs.subchn_complete.status !=
+           nvsp_status_success) {
+               ret = ENODEV;
+               device_printf(dev, "sub channel complete error\n");
+               goto out;
+       }
+
+       net_dev->num_channel = 1 +
+           init_pkt->msgs.vers_5_msgs.subchn_complete.num_subchannels;
+
+       ret = hv_rf_set_rss_param(rndis_dev, net_dev->num_channel);
+
+out:
+       if (ret)
+               net_dev->num_channel = 1;
+
        return (ret);
 }
 

Modified: head/sys/dev/hyperv/netvsc/hv_rndis_filter.h
==============================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis_filter.h        Fri Mar  4 05:36:53 
2016        (r296378)
+++ head/sys/dev/hyperv/netvsc/hv_rndis_filter.h        Fri Mar  4 06:52:11 
2016        (r296379)
@@ -63,17 +63,32 @@ typedef struct rndis_request_ {
        struct sema                     wait_sema;      
 
        /*
-        * Fixme:  We assumed a fixed size response here.  If we do ever
-        * need to handle a bigger response, we can either define a max
-        * response message or add a response buffer variable above this field
+        * The max response size is sizeof(rndis_msg) + PAGE_SIZE.
+        *
+        * XXX
+        * This is ugly and should be cleaned up once we busdma-fy
+        * RNDIS request bits.
         */
        rndis_msg                       response_msg;
+       uint8_t                         buf_resp[PAGE_SIZE];
 
        /* Simplify allocation by having a netvsc packet inline */
        netvsc_packet                   pkt;
        hv_vmbus_page_buffer            buffer;
-       /* Fixme:  We assumed a fixed size request here. */
+
+       /*
+        * The max request size is sizeof(rndis_msg) + PAGE_SIZE.
+        *
+        * NOTE:
+        * This is required for the large request like RSS settings.
+        *
+        * XXX
+        * This is ugly and should be cleaned up once we busdma-fy
+        * RNDIS request bits.
+        */
        rndis_msg                       request_msg;
+       uint8_t                         buf_req[PAGE_SIZE];
+
        /* Fixme:  Poor man's semaphore. */
        uint32_t                        halt_complete_flag;
 } rndis_request;

Modified: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
==============================================================================
--- head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c Fri Mar  4 05:36:53 2016        
(r296378)
+++ head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c Fri Mar  4 06:52:11 2016        
(r296379)
@@ -274,7 +274,7 @@ vmbus_channel_process_offer(hv_vmbus_cha
        }
 }
 
-static void
+void
 vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
 {
        KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to