From: Jan Scheurich <jan.scheur...@ericsson.com>

 Today dpif-netdev considers PMD threads on a non-local NUMA node for
 automatic assignment of the rxqs of a port only if there are no local,
 non-isolated PMDs.

 On typical servers with both physical ports on one NUMA node, this
 often
 leaves the PMDs on the other NUMA node under-utilized, wasting CPU
 resources. The alternative, to manually pin the rxqs to PMDs on remote
 NUMA nodes, also has drawbacks as it limits OVS' ability to auto
 load-balance the rxqs.

 This patch introduces a new interface configuration option to allow
 ports to be automatically polled by PMDs on any NUMA node:

 ovs-vsctl set interface <Name> other_config:cross-numa-polling=true

 If this option is not present or set to false, legacy behaviour
 applies.

Signed-off-by: Jan Scheurich <jan.scheur...@ericsson.com>
Signed-off-by: Anurag Agarwal <anura...@gmail.com>
---
 Documentation/topics/dpdk/pmd.rst | 24 ++++++++++++++++++++++--
 lib/dpif-netdev.c                 | 31 +++++++++++++++++++++++--------
 tests/pmd.at                      | 30 ++++++++++++++++++++++++++++++
 vswitchd/vswitch.xml              | 20 ++++++++++++++++++++
 4 files changed, 95 insertions(+), 10 deletions(-)

diff --git a/Documentation/topics/dpdk/pmd.rst 
b/Documentation/topics/dpdk/pmd.rst
index b259cc8b3..f6c9671d7 100644
--- a/Documentation/topics/dpdk/pmd.rst
+++ b/Documentation/topics/dpdk/pmd.rst
@@ -132,8 +132,28 @@ or can be triggered by using::
 
 Port/Rx Queue assignment to PMD threads by manual pinning
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Rx queues may be manually pinned to cores. This will change the default Rx
-queue assignment to PMD threads::
+
+Normally, Rx queues are assigned to PMD threads automatically.  By default
+OVS only assigns Rx queues to PMD threads executing on the same NUMA
+node in order to avoid unnecessary latency for accessing packet buffers
+across the NUMA boundary.  Typically this overhead is higher for vhostuser
+ports than for physical ports due to the packet copy that is done for all
+rx packets.
+
+On NUMA servers with physical ports only on one NUMA node, the NUMA-local
+polling policy can lead to an under-utilization of the PMD threads on the
+remote NUMA node.  For the overall OVS performance it may in such cases be
+beneficial to utilize the spare capacity and allow polling of a physical
+port's rxqs across NUMA nodes despite the overhead involved.
+The policy can be set per port with the following configuration option::
+
+    $ ovs-vsctl set Interface <iface> \
+        other_config:cross-numa-polling=true|false
+
+The default value is false.
+
+Rx queues may also be manually pinned to cores. This will change the default
+Rx queue assignment to PMD threads::
 
     $ ovs-vsctl set Interface <iface> \
         other_config:pmd-rxq-affinity=<rxq-affinity-list>
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 720818e30..4fda8d7a0 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -466,6 +466,7 @@ struct dp_netdev_port {
     char *type;                 /* Port type as requested by user. */
     char *rxq_affinity_list;    /* Requested affinity of rx queues. */
     enum txq_req_mode txq_requested_mode;
+    bool cross_numa_polling;    /* If true cross polling will be enabled */
 };
 
 static bool dp_netdev_flow_ref(struct dp_netdev_flow *);
@@ -5018,6 +5019,7 @@ dpif_netdev_port_set_config(struct dpif *dpif, odp_port_t 
port_no,
     bool emc_enabled = smap_get_bool(cfg, "emc-enable", true);
     const char *tx_steering_mode = smap_get(cfg, "tx-steering");
     enum txq_req_mode txq_mode;
+    bool cross_numa_polling = smap_get_bool(cfg, "cross-numa-polling", false);
 
     ovs_rwlock_wrlock(&dp->port_rwlock);
     error = get_port_by_number(dp, port_no, &port);
@@ -5086,6 +5088,14 @@ dpif_netdev_port_set_config(struct dpif *dpif, 
odp_port_t port_no,
         dp_netdev_request_reconfigure(dp);
     }
 
+    if (cross_numa_polling != port->cross_numa_polling) {
+        port->cross_numa_polling = cross_numa_polling;
+        VLOG_INFO("%s:cross-numa-polling has been %s.",
+                  netdev_get_name(port->netdev),
+                  (cross_numa_polling)? "enabled" : "disabled");
+        dp_netdev_request_reconfigure(dp);
+    }
+
 unlock:
     ovs_rwlock_unlock(&dp->port_rwlock);
     return error;
@@ -5885,7 +5895,7 @@ sched_numa_list_schedule(struct sched_numa_list 
*numa_list,
 {
     struct dp_netdev_port *port;
     struct dp_netdev_rxq **rxqs = NULL;
-    struct sched_numa *last_cross_numa;
+    struct sched_numa *next_numa = NULL;
     unsigned n_rxqs = 0;
     bool start_logged = false;
     size_t n_numa;
@@ -5969,7 +5979,7 @@ sched_numa_list_schedule(struct sched_numa_list 
*numa_list,
         qsort(rxqs, n_rxqs, sizeof *rxqs, compare_rxq_cycles);
     }
 
-    last_cross_numa = NULL;
+    next_numa = NULL;
     n_numa = sched_numa_list_count(numa_list);
     for (unsigned i = 0; i < n_rxqs; i++) {
         struct dp_netdev_rxq *rxq = rxqs[i];
@@ -5989,20 +5999,25 @@ sched_numa_list_schedule(struct sched_numa_list 
*numa_list,
         proc_cycles = dp_netdev_rxq_get_cycles(rxq, RXQ_CYCLES_PROC_HIST);
         /* Select the numa that should be used for this rxq. */
         numa_id = netdev_get_numa_id(rxq->port->netdev);
-        numa = sched_numa_list_lookup(numa_list, numa_id);
+
+        if (!(rxqs[i]->port->cross_numa_polling)) {
+            /* Try to find a local pmd. */
+            numa = sched_numa_list_lookup(numa_list, numa_id);
+        } else {
+            /* Allow polling by any pmd. */
+            numa = NULL;
+        }
 
         /* Check if numa has no PMDs or no non-isolated PMDs. */
         if (!numa || !sched_numa_noniso_pmd_count(numa)) {
             /* Unable to use this numa to find a PMD. */
-            numa = NULL;
             /* Find any numa with available PMDs. */
             for (int j = 0; j < n_numa; j++) {
-                numa = sched_numa_list_next(numa_list, last_cross_numa);
-                if (sched_numa_noniso_pmd_count(numa)) {
+                next_numa = sched_numa_list_next(numa_list, next_numa);
+                if (sched_numa_noniso_pmd_count(next_numa)) {
+                    numa = next_numa;
                     break;
                 }
-                last_cross_numa = numa;
-                numa = NULL;
             }
         }
 
diff --git a/tests/pmd.at b/tests/pmd.at
index a2f9d34a2..81a9a0eca 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -541,6 +541,36 @@ 
icmp,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10
 OVS_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([PMD - Enable cross numa polling])
+OVS_VSWITCHD_START(
+  [add-port br0 p1 -- set Interface p1 type=dummy-pmd ofport_request=1 
options:n_rxq=4 -- \
+   set Open_vSwitch . other_config:pmd-cpu-mask=3
+], [], [], [--dummy-numa 0,1])
+
+AT_CHECK([ovs-ofctl add-flow br0 action=controller])
+
+AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | parse_pmd_rxq_show | cut -f 3 
-d ' ' | sort | uniq], [0], [dnl
+0
+])
+
+dnl Enable cross numa polling and check numa ids
+AT_CHECK([ovs-vsctl set Interface p1 other_config:cross-numa-polling=true])
+
+AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | parse_pmd_rxq_show | cut -f 3 
-d ' ' | sort | uniq], [0], [dnl
+0
+1
+])
+
+dnl Disable cross numa polling and check numa ids
+AT_CHECK([ovs-vsctl set Interface p1 other_config:cross-numa-polling=false])
+
+AT_CHECK([ovs-appctl dpif-netdev/pmd-rxq-show | parse_pmd_rxq_show | cut -f 3 
-d ' ' | sort | uniq], [0], [dnl
+0
+])
+
+OVS_VSWITCHD_STOP(["/|WARN|/d"])
+AT_CLEANUP
+
 AT_SETUP([PMD - change numa node])
 OVS_VSWITCHD_START(
   [add-port br0 p1 -- set Interface p1 type=dummy-pmd ofport_request=1 
options:n_rxq=2 -- \
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 0c6632617..4d7339885 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -3284,6 +3284,26 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch 
options:peer=p1 \
         </p>
       </column>
 
+      <column name="other_config" key="cross-numa-polling"
+                                  type='{"type": "boolean"}'>
+        <p>
+          Specifies if the RX queues of the port can be automatically assigned
+          to PMD threads on any NUMA node or only on the local NUMA node of
+          the port.
+        </p>
+        <p>
+          Polling of physical ports from a non-local PMD thread incurs some
+          performance penalty due to the access to packet data across the NUMA
+          barrier. This option can still increase the overall performance if
+          it allows better utilization of those non-local PMDs threads.
+          It is most useful together with the auto load-balancing of RX queues
+          (see other_config:auto_lb in table Open_vSwitch).
+        </p>
+        <p>
+          Defaults to false.
+        </p>
+      </column>
+
       <column name="options" key="xdp-mode"
               type='{"type": "string",
                      "enum": ["set", ["best-effort", "native-with-zerocopy",
-- 
2.25.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to