Ingress scheduling configuration is given effect by way of Flow Director
filters. A small subset of the ingress scheduling possible is
implemented in this patch.

Signed-off-by: Billy O'Mahony <billy.o.mah...@intel.com>
---
 include/openvswitch/ofp-parse.h |   3 +
 lib/dpif-netdev.c               |   1 +
 lib/netdev-dpdk.c               | 167 ++++++++++++++++++++++++++++++++++++++--
 vswitchd/bridge.c               |   2 +
 4 files changed, 166 insertions(+), 7 deletions(-)

diff --git a/include/openvswitch/ofp-parse.h b/include/openvswitch/ofp-parse.h
index fc5784e..08d6086 100644
--- a/include/openvswitch/ofp-parse.h
+++ b/include/openvswitch/ofp-parse.h
@@ -37,6 +37,9 @@ struct ofputil_table_mod;
 struct ofputil_bundle_msg;
 struct ofputil_tlv_table_mod;
 struct simap;
+struct tun_table;
+struct flow_wildcards;
+struct ofputil_port_map;
 enum ofputil_protocol;
 
 char *parse_ofp_str(struct ofputil_flow_mod *, int command, const char *str_,
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 47a9fa0..d35566f 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -44,6 +44,7 @@
 #include "dp-packet.h"
 #include "dpif.h"
 #include "dpif-provider.h"
+#include "netdev-provider.h"
 #include "dummy.h"
 #include "fat-rwlock.h"
 #include "flow.h"
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index e74c50f..e393abf 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -33,6 +33,8 @@
 #include <rte_meter.h>
 #include <rte_virtio_net.h>
 
+#include <openvswitch/ofp-parse.h>
+#include <openvswitch/ofp-util.h>
 #include "dirs.h"
 #include "dp-packet.h"
 #include "dpdk.h"
@@ -169,6 +171,10 @@ static const struct rte_eth_conf port_conf = {
     .txmode = {
         .mq_mode = ETH_MQ_TX_NONE,
     },
+    .fdir_conf = {
+        .mode = RTE_FDIR_MODE_PERFECT,
+    },
+
 };
 
 enum { DPDK_RING_SIZE = 256 };
@@ -330,6 +336,11 @@ enum dpdk_hw_ol_features {
     NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
 };
 
+union ingress_filter {
+    struct rte_eth_ethertype_filter ethertype;
+    struct rte_eth_fdir_filter fdir;
+};
+
 struct netdev_dpdk {
     struct netdev up;
     dpdk_port_t port_id;
@@ -369,8 +380,11 @@ struct netdev_dpdk {
     /* If true, device was attached by rte_eth_dev_attach(). */
     bool attached;
 
-    /* Ingress Scheduling config */
+    /* Ingress Scheduling config & state. */
     char *ingress_sched_str;
+    bool ingress_sched_changed;
+    enum rte_filter_type ingress_filter_type;
+    union ingress_filter ingress_filter;
 
     /* In dpdk_list. */
     struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
@@ -653,6 +667,15 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int 
n_rxq, int n_txq)
     int i;
     struct rte_eth_conf conf = port_conf;
 
+    /* Ingress scheduling requires ETH_MQ_RX_NONE so limit it to when exactly
+     * two rxqs are defined. Otherwise MQ will not work as expected. */
+    if (dev->ingress_sched_str && n_rxq == 2) {
+        conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
+    }
+    else {
+        conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
+    }
+
     if (dev->mtu > ETHER_MTU) {
         conf.rxmode.jumbo_frame = 1;
         conf.rxmode.max_rx_pkt_len = dev->max_packet_len;
@@ -730,6 +753,121 @@ dpdk_eth_flow_ctrl_setup(struct netdev_dpdk *dev) 
OVS_REQUIRES(dev->mutex)
     }
 }
 
+static void
+dpdk_apply_ingress_scheduling(struct netdev_dpdk *dev, int n_rxq)
+{
+    if (!dev->ingress_sched_str) {
+        return;
+    }
+
+    if (n_rxq != 2) {
+        VLOG_ERR("Interface %s: Ingress scheduling config ignored; " \
+                 "Requires n_rxq==2.", dev->up.name);
+    }
+
+    int priority_q_id = n_rxq-1;
+    char *key, *val, *str, *iter;
+
+    ovs_be32 ip_src, ip_dst;
+    ip_src = ip_dst = 0;
+
+    uint16_t eth_type, port_src, port_dst;
+    eth_type = port_src = port_dst = 0;
+    uint8_t ip_proto = 0;
+    int diag = 0;
+
+    /* delete any existing filter */
+    if (dev->ingress_filter_type == RTE_ETH_FILTER_FDIR) {
+        diag = rte_eth_dev_filter_ctrl(dev->port_id, RTE_ETH_FILTER_FDIR,
+            RTE_ETH_FILTER_DELETE, &dev->ingress_filter.fdir);
+    } else if (dev->ingress_filter_type == RTE_ETH_FILTER_ETHERTYPE) {
+        diag = rte_eth_dev_filter_ctrl(dev->port_id, RTE_ETH_FILTER_ETHERTYPE,
+            RTE_ETH_FILTER_DELETE, &dev->ingress_filter.ethertype);
+    }
+
+    char *mallocd_str; /* str_to_x returns malloc'd str we'll need to free */
+    /* Parse the configuration into local vars */
+    iter = str = xstrdup(dev->ingress_sched_str);
+    while (ofputil_parse_key_value (&iter, &key, &val)) {
+        if (strcmp(key, "nw_src") == 0 || strcmp(key, "ip_src") == 0) {
+            mallocd_str = str_to_ip(val, &ip_src);
+        } else if (strcmp(key, "nw_dst") == 0 || strcmp(key, "ip_dst") == 0) {
+            mallocd_str = str_to_ip(val, &ip_dst);
+        } else if (strcmp(key, "dl_type") == 0 ||
+                   strcmp(key, "eth_type") == 0) {
+            mallocd_str = str_to_u16(val, "eth_type/dl_type", &eth_type);
+        } else if (strcmp(key, "tcp_src") == 0 ||
+                  strcmp(key, "tp_src") == 0 ||
+                  strcmp(key, "udp_src") == 0) {
+            mallocd_str = str_to_u16(val, "tcp/udp_src", &port_src);
+        } else if (strcmp(key, "tcp_dst") == 0 ||
+                   strcmp(key, "tp_dst") == 0 ||
+                   strcmp(key, "udp_dst") == 0) {
+            mallocd_str = str_to_u16(val, "tcp/udp_dst", &port_dst);
+        } else if (strcmp(key, "ip") == 0) {
+            eth_type = 0x0800;
+        } else if (strcmp(key, "udp") == 0) {
+            eth_type = 0x0800;
+            ip_proto = 17;
+        } else if (strcmp(key, "tcp") == 0) {
+            eth_type = 0x0800;
+            ip_proto = 6;
+        } else {
+            VLOG_WARN("Ignoring unsupported ingress scheduling field '%s'", \
+                      key);
+        }
+        if (mallocd_str) {
+            VLOG_ERR ("%s", mallocd_str);
+            free(mallocd_str);
+            mallocd_str = NULL;
+        }
+    }
+    free (str);
+
+    /* Set the filters */
+    if (eth_type && ip_src && ip_dst && port_src && port_dst && ip_proto) {
+        struct rte_eth_fdir_filter entry = { 0 };
+        entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP;
+        entry.input.flow.udp4_flow.ip.src_ip = ip_src;
+        entry.input.flow.udp4_flow.ip.dst_ip = ip_dst;
+        entry.input.flow.udp4_flow.src_port = htons(port_src);
+        entry.input.flow.udp4_flow.dst_port = htons(port_dst);
+        entry.action.rx_queue = priority_q_id;
+        entry.action.behavior = RTE_ETH_FDIR_ACCEPT;
+        entry.action.report_status = RTE_ETH_FDIR_REPORT_ID;
+        diag = rte_eth_dev_filter_ctrl(dev->port_id,
+                RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_ADD, &entry);
+        dev->ingress_filter_type = RTE_ETH_FILTER_FDIR;
+        dev->ingress_filter.fdir = entry;
+    }
+    else if (eth_type && !ip_src && !ip_dst && !port_src
+             && !port_dst && !ip_proto) {
+        struct rte_eth_ethertype_filter entry = {0};
+        memset (&entry, 0, sizeof entry);
+        entry.ether_type = eth_type;
+        entry.flags = 0;
+        entry.queue = priority_q_id;
+        diag = rte_eth_dev_filter_ctrl(dev->port_id,
+                RTE_ETH_FILTER_ETHERTYPE, RTE_ETH_FILTER_ADD, &entry);
+        dev->ingress_filter.ethertype = entry;
+        dev->ingress_filter_type = RTE_ETH_FILTER_ETHERTYPE;
+    }
+    else {
+        VLOG_ERR("Unsupported ingress scheduling match-field combination.");
+        dev->ingress_filter_type = RTE_ETH_FILTER_NONE;
+        return;
+    }
+
+    if (diag) {
+        dev->ingress_filter_type = RTE_ETH_FILTER_NONE;
+        VLOG_ERR("Failed to add ingress scheduling filter.");
+    }
+    else {
+        /* Mark the appropriate q as prioritized */
+        dev->up.priority_rxq = priority_q_id;
+    }
+}
+
 static int
 dpdk_eth_dev_init(struct netdev_dpdk *dev)
     OVS_REQUIRES(dev->mutex)
@@ -764,6 +902,8 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
         return -diag;
     }
 
+    dpdk_apply_ingress_scheduling(dev, n_rxq);
+
     diag = rte_eth_dev_start(dev->port_id);
     if (diag) {
         VLOG_ERR("Interface %s start error: %s", dev->up.name,
@@ -870,6 +1010,9 @@ common_construct(struct netdev *netdev, dpdk_port_t 
port_no,
     dev->requested_rxq_size = NIC_PORT_DEFAULT_RXQ_SIZE;
     dev->requested_txq_size = NIC_PORT_DEFAULT_TXQ_SIZE;
 
+    dev->ingress_sched_str = NULL;
+    dev->ingress_sched_changed = false;
+    dev->ingress_filter_type = RTE_ETH_FILTER_NONE;
     /* Initialize the flow control to NULL */
     memset(&dev->fc_conf, 0, sizeof dev->fc_conf);
 
@@ -1950,11 +2093,20 @@ netdev_dpdk_set_ingress_sched(struct netdev *netdev,
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 
-    free(dev->ingress_sched_str);
-    if (ingress_sched_str) {
-        dev->ingress_sched_str = xstrdup(ingress_sched_str);
+    if ((ingress_sched_str && dev->ingress_sched_str &&
+        strcmp(ingress_sched_str, dev->ingress_sched_str) == 0) ||
+        (!ingress_sched_str && !dev->ingress_sched_str)) {
+        /* no-op; new cfg == old cfg or else both are NULL */
+        return 0;
+    } else {
+        /* free the old, copy in the new */
+        free(dev->ingress_sched_str);
+        if (ingress_sched_str) {
+            dev->ingress_sched_str = xstrdup(ingress_sched_str);
+        }
+        dev->ingress_sched_changed = true;
+        netdev_request_reconfigure(netdev);
     }
-
     return 0;
 }
 
@@ -3112,12 +3264,13 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
         && dev->mtu == dev->requested_mtu
         && dev->rxq_size == dev->requested_rxq_size
         && dev->txq_size == dev->requested_txq_size
-        && dev->socket_id == dev->requested_socket_id) {
+        && dev->socket_id == dev->requested_socket_id
+        && !dev->ingress_sched_changed) {
         /* Reconfiguration is unnecessary */
-
         goto out;
     }
 
+    dev->ingress_sched_changed = false;
     rte_eth_dev_stop(dev->port_id);
 
     if (dev->mtu != dev->requested_mtu
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 9113195..2c5dfd3 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -831,6 +831,8 @@ bridge_delete_or_reconfigure_ports(struct bridge *br)
         }
 
         iface_set_netdev_mtu(iface->cfg, iface->netdev);
+        netdev_set_ingress_sched(iface->netdev,
+            smap_get(&iface->cfg->other_config, "ingress_sched"));
 
         /* If the requested OpenFlow port for 'iface' changed, and it's not
          * already the correct port, then we might want to temporarily delete
-- 
2.7.4

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to