Ingress scheduling configuration is given effect by way of Flow Director filters. A small subset of the ingress scheduling possible is implemented in this patch.
Signed-off-by: Billy O'Mahony <billy.o.mah...@intel.com> --- include/openvswitch/ofp-parse.h | 3 + lib/dpif-netdev.c | 1 + lib/netdev-dpdk.c | 167 ++++++++++++++++++++++++++++++++++++++-- vswitchd/bridge.c | 2 + 4 files changed, 166 insertions(+), 7 deletions(-) diff --git a/include/openvswitch/ofp-parse.h b/include/openvswitch/ofp-parse.h index fc5784e..08d6086 100644 --- a/include/openvswitch/ofp-parse.h +++ b/include/openvswitch/ofp-parse.h @@ -37,6 +37,9 @@ struct ofputil_table_mod; struct ofputil_bundle_msg; struct ofputil_tlv_table_mod; struct simap; +struct tun_table; +struct flow_wildcards; +struct ofputil_port_map; enum ofputil_protocol; char *parse_ofp_str(struct ofputil_flow_mod *, int command, const char *str_, diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 47a9fa0..d35566f 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -44,6 +44,7 @@ #include "dp-packet.h" #include "dpif.h" #include "dpif-provider.h" +#include "netdev-provider.h" #include "dummy.h" #include "fat-rwlock.h" #include "flow.h" diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index e74c50f..e393abf 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -33,6 +33,8 @@ #include <rte_meter.h> #include <rte_virtio_net.h> +#include <openvswitch/ofp-parse.h> +#include <openvswitch/ofp-util.h> #include "dirs.h" #include "dp-packet.h" #include "dpdk.h" @@ -169,6 +171,10 @@ static const struct rte_eth_conf port_conf = { .txmode = { .mq_mode = ETH_MQ_TX_NONE, }, + .fdir_conf = { + .mode = RTE_FDIR_MODE_PERFECT, + }, + }; enum { DPDK_RING_SIZE = 256 }; @@ -330,6 +336,11 @@ enum dpdk_hw_ol_features { NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0, }; +union ingress_filter { + struct rte_eth_ethertype_filter ethertype; + struct rte_eth_fdir_filter fdir; +}; + struct netdev_dpdk { struct netdev up; dpdk_port_t port_id; @@ -369,8 +380,11 @@ struct netdev_dpdk { /* If true, device was attached by rte_eth_dev_attach(). */ bool attached; - /* Ingress Scheduling config */ + /* Ingress Scheduling config & state. */ char *ingress_sched_str; + bool ingress_sched_changed; + enum rte_filter_type ingress_filter_type; + union ingress_filter ingress_filter; /* In dpdk_list. */ struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); @@ -653,6 +667,15 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq) int i; struct rte_eth_conf conf = port_conf; + /* Ingress scheduling requires ETH_MQ_RX_NONE so limit it to when exactly + * two rxqs are defined. Otherwise MQ will not work as expected. */ + if (dev->ingress_sched_str && n_rxq == 2) { + conf.rxmode.mq_mode = ETH_MQ_RX_NONE; + } + else { + conf.rxmode.mq_mode = ETH_MQ_RX_RSS; + } + if (dev->mtu > ETHER_MTU) { conf.rxmode.jumbo_frame = 1; conf.rxmode.max_rx_pkt_len = dev->max_packet_len; @@ -730,6 +753,121 @@ dpdk_eth_flow_ctrl_setup(struct netdev_dpdk *dev) OVS_REQUIRES(dev->mutex) } } +static void +dpdk_apply_ingress_scheduling(struct netdev_dpdk *dev, int n_rxq) +{ + if (!dev->ingress_sched_str) { + return; + } + + if (n_rxq != 2) { + VLOG_ERR("Interface %s: Ingress scheduling config ignored; " \ + "Requires n_rxq==2.", dev->up.name); + } + + int priority_q_id = n_rxq-1; + char *key, *val, *str, *iter; + + ovs_be32 ip_src, ip_dst; + ip_src = ip_dst = 0; + + uint16_t eth_type, port_src, port_dst; + eth_type = port_src = port_dst = 0; + uint8_t ip_proto = 0; + int diag = 0; + + /* delete any existing filter */ + if (dev->ingress_filter_type == RTE_ETH_FILTER_FDIR) { + diag = rte_eth_dev_filter_ctrl(dev->port_id, RTE_ETH_FILTER_FDIR, + RTE_ETH_FILTER_DELETE, &dev->ingress_filter.fdir); + } else if (dev->ingress_filter_type == RTE_ETH_FILTER_ETHERTYPE) { + diag = rte_eth_dev_filter_ctrl(dev->port_id, RTE_ETH_FILTER_ETHERTYPE, + RTE_ETH_FILTER_DELETE, &dev->ingress_filter.ethertype); + } + + char *mallocd_str; /* str_to_x returns malloc'd str we'll need to free */ + /* Parse the configuration into local vars */ + iter = str = xstrdup(dev->ingress_sched_str); + while (ofputil_parse_key_value (&iter, &key, &val)) { + if (strcmp(key, "nw_src") == 0 || strcmp(key, "ip_src") == 0) { + mallocd_str = str_to_ip(val, &ip_src); + } else if (strcmp(key, "nw_dst") == 0 || strcmp(key, "ip_dst") == 0) { + mallocd_str = str_to_ip(val, &ip_dst); + } else if (strcmp(key, "dl_type") == 0 || + strcmp(key, "eth_type") == 0) { + mallocd_str = str_to_u16(val, "eth_type/dl_type", ð_type); + } else if (strcmp(key, "tcp_src") == 0 || + strcmp(key, "tp_src") == 0 || + strcmp(key, "udp_src") == 0) { + mallocd_str = str_to_u16(val, "tcp/udp_src", &port_src); + } else if (strcmp(key, "tcp_dst") == 0 || + strcmp(key, "tp_dst") == 0 || + strcmp(key, "udp_dst") == 0) { + mallocd_str = str_to_u16(val, "tcp/udp_dst", &port_dst); + } else if (strcmp(key, "ip") == 0) { + eth_type = 0x0800; + } else if (strcmp(key, "udp") == 0) { + eth_type = 0x0800; + ip_proto = 17; + } else if (strcmp(key, "tcp") == 0) { + eth_type = 0x0800; + ip_proto = 6; + } else { + VLOG_WARN("Ignoring unsupported ingress scheduling field '%s'", \ + key); + } + if (mallocd_str) { + VLOG_ERR ("%s", mallocd_str); + free(mallocd_str); + mallocd_str = NULL; + } + } + free (str); + + /* Set the filters */ + if (eth_type && ip_src && ip_dst && port_src && port_dst && ip_proto) { + struct rte_eth_fdir_filter entry = { 0 }; + entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP; + entry.input.flow.udp4_flow.ip.src_ip = ip_src; + entry.input.flow.udp4_flow.ip.dst_ip = ip_dst; + entry.input.flow.udp4_flow.src_port = htons(port_src); + entry.input.flow.udp4_flow.dst_port = htons(port_dst); + entry.action.rx_queue = priority_q_id; + entry.action.behavior = RTE_ETH_FDIR_ACCEPT; + entry.action.report_status = RTE_ETH_FDIR_REPORT_ID; + diag = rte_eth_dev_filter_ctrl(dev->port_id, + RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_ADD, &entry); + dev->ingress_filter_type = RTE_ETH_FILTER_FDIR; + dev->ingress_filter.fdir = entry; + } + else if (eth_type && !ip_src && !ip_dst && !port_src + && !port_dst && !ip_proto) { + struct rte_eth_ethertype_filter entry = {0}; + memset (&entry, 0, sizeof entry); + entry.ether_type = eth_type; + entry.flags = 0; + entry.queue = priority_q_id; + diag = rte_eth_dev_filter_ctrl(dev->port_id, + RTE_ETH_FILTER_ETHERTYPE, RTE_ETH_FILTER_ADD, &entry); + dev->ingress_filter.ethertype = entry; + dev->ingress_filter_type = RTE_ETH_FILTER_ETHERTYPE; + } + else { + VLOG_ERR("Unsupported ingress scheduling match-field combination."); + dev->ingress_filter_type = RTE_ETH_FILTER_NONE; + return; + } + + if (diag) { + dev->ingress_filter_type = RTE_ETH_FILTER_NONE; + VLOG_ERR("Failed to add ingress scheduling filter."); + } + else { + /* Mark the appropriate q as prioritized */ + dev->up.priority_rxq = priority_q_id; + } +} + static int dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dev->mutex) @@ -764,6 +902,8 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) return -diag; } + dpdk_apply_ingress_scheduling(dev, n_rxq); + diag = rte_eth_dev_start(dev->port_id); if (diag) { VLOG_ERR("Interface %s start error: %s", dev->up.name, @@ -870,6 +1010,9 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no, dev->requested_rxq_size = NIC_PORT_DEFAULT_RXQ_SIZE; dev->requested_txq_size = NIC_PORT_DEFAULT_TXQ_SIZE; + dev->ingress_sched_str = NULL; + dev->ingress_sched_changed = false; + dev->ingress_filter_type = RTE_ETH_FILTER_NONE; /* Initialize the flow control to NULL */ memset(&dev->fc_conf, 0, sizeof dev->fc_conf); @@ -1950,11 +2093,20 @@ netdev_dpdk_set_ingress_sched(struct netdev *netdev, { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - free(dev->ingress_sched_str); - if (ingress_sched_str) { - dev->ingress_sched_str = xstrdup(ingress_sched_str); + if ((ingress_sched_str && dev->ingress_sched_str && + strcmp(ingress_sched_str, dev->ingress_sched_str) == 0) || + (!ingress_sched_str && !dev->ingress_sched_str)) { + /* no-op; new cfg == old cfg or else both are NULL */ + return 0; + } else { + /* free the old, copy in the new */ + free(dev->ingress_sched_str); + if (ingress_sched_str) { + dev->ingress_sched_str = xstrdup(ingress_sched_str); + } + dev->ingress_sched_changed = true; + netdev_request_reconfigure(netdev); } - return 0; } @@ -3112,12 +3264,13 @@ netdev_dpdk_reconfigure(struct netdev *netdev) && dev->mtu == dev->requested_mtu && dev->rxq_size == dev->requested_rxq_size && dev->txq_size == dev->requested_txq_size - && dev->socket_id == dev->requested_socket_id) { + && dev->socket_id == dev->requested_socket_id + && !dev->ingress_sched_changed) { /* Reconfiguration is unnecessary */ - goto out; } + dev->ingress_sched_changed = false; rte_eth_dev_stop(dev->port_id); if (dev->mtu != dev->requested_mtu diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 9113195..2c5dfd3 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -831,6 +831,8 @@ bridge_delete_or_reconfigure_ports(struct bridge *br) } iface_set_netdev_mtu(iface->cfg, iface->netdev); + netdev_set_ingress_sched(iface->netdev, + smap_get(&iface->cfg->other_config, "ingress_sched")); /* If the requested OpenFlow port for 'iface' changed, and it's not * already the correct port, then we might want to temporarily delete -- 2.7.4 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev