Mellanox card has different XSK design. It requires users to create dedicated queues for XSK. Unlike Intel's NIC which loads XDP program to all queues, Mellanox only loads XDP program to a subset of its queue.
When OVS uses AF_XDP with mlx5, it doesn't replace the existing RX and TX queues in the channel with XSK RX and XSK TX queues, but it creates an additional pair of queues for XSK in that channel. To distinguish regular and XSK queues, mlx5 uses a different range of qids. That means, if the card has 24 queues, queues 0..11 correspond to regular queues, and queues 12..23 are XSK queues. In this case, we should attach the netdev-afxdp with 'start-qid=12'. I tested using Mellanox Connect-X 6Dx, by setting 'start-qid=1', and: $ ethtool -L enp2s0f0np0 combined 1 # queue 0 is for non-XDP traffic, queue 1 is for XSK $ ethtool -N enp2s0f0np0 flow-type udp4 action 1 note: we need additionally add flow-redirect rule to queue 1 Tested-at: https://github.com/williamtu/ovs-travis/actions/runs/535141041 Signed-off-by: William Tu <u9012...@gmail.com> --- Documentation/intro/install/afxdp.rst | 2 ++ lib/netdev-afxdp.c | 23 ++++++++++++++++++----- lib/netdev-linux-private.h | 1 + vswitchd/vswitch.xml | 8 ++++++++ 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst index f2643e0d41a1..eac298c52575 100644 --- a/Documentation/intro/install/afxdp.rst +++ b/Documentation/intro/install/afxdp.rst @@ -204,6 +204,8 @@ more details): * ``use-need-wakeup``: default ``true`` if libbpf supports it, otherwise ``false``. +* ``start-qid``: default ``0``. + For example, to use 1 PMD (on core 4) on 1 queue (queue 0) device, configure these options: ``pmd-cpu-mask``, ``pmd-rxq-affinity``, and ``n_rxq``:: diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c index 482400d8d135..36f6a323b1bc 100644 --- a/lib/netdev-afxdp.c +++ b/lib/netdev-afxdp.c @@ -458,12 +458,13 @@ xsk_configure_queue(struct netdev_linux *dev, int ifindex, int queue_id, VLOG_DBG("%s: configuring queue: %d, mode: %s, use-need-wakeup: %s.", netdev_get_name(&dev->up), queue_id, xdp_modes[mode].name, dev->use_need_wakeup ? "true" : "false"); - xsk_info = xsk_configure(ifindex, queue_id, mode, dev->use_need_wakeup, - report_socket_failures); + xsk_info = xsk_configure(ifindex, dev->startqid + queue_id, mode, + dev->use_need_wakeup, report_socket_failures); if (!xsk_info) { VLOG(report_socket_failures ? VLL_ERR : VLL_DBG, - "%s: Failed to create AF_XDP socket on queue %d in %s mode.", - netdev_get_name(&dev->up), queue_id, xdp_modes[mode].name); + "%s: Failed to create AF_XDP socket on queue %d+%d in %s mode.", + netdev_get_name(&dev->up), dev->startqid, queue_id, + xdp_modes[mode].name); dev->xsks[queue_id] = NULL; return -1; } @@ -604,6 +605,7 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args, enum afxdp_mode xdp_mode; bool need_wakeup; int new_n_rxq; + int new_startqid; ovs_mutex_lock(&dev->mutex); new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1); @@ -637,12 +639,18 @@ netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args, } #endif + /* TODO: need to check + * new_startqid + new_n_rxq > total dev's queues. */ + new_startqid = smap_get_int(args, "start-qid", 0); + if (dev->requested_n_rxq != new_n_rxq || dev->requested_xdp_mode != xdp_mode - || dev->requested_need_wakeup != need_wakeup) { + || dev->requested_need_wakeup != need_wakeup + || dev->requested_startqid != new_startqid) { dev->requested_n_rxq = new_n_rxq; dev->requested_xdp_mode = xdp_mode; dev->requested_need_wakeup = need_wakeup; + dev->requested_startqid = new_startqid; netdev_request_reconfigure(netdev); } ovs_mutex_unlock(&dev->mutex); @@ -661,6 +669,7 @@ netdev_afxdp_get_config(const struct netdev *netdev, struct smap *args) xdp_modes[dev->xdp_mode_in_use].name); smap_add_format(args, "use-need-wakeup", "%s", dev->use_need_wakeup ? "true" : "false"); + smap_add_format(args, "start-qid", "%d", dev->startqid); ovs_mutex_unlock(&dev->mutex); return 0; } @@ -696,6 +705,7 @@ netdev_afxdp_reconfigure(struct netdev *netdev) if (netdev->n_rxq == dev->requested_n_rxq && dev->xdp_mode == dev->requested_xdp_mode && dev->use_need_wakeup == dev->requested_need_wakeup + && dev->startqid == dev->requested_startqid && dev->xsks) { goto out; } @@ -713,6 +723,7 @@ netdev_afxdp_reconfigure(struct netdev *netdev) VLOG_ERR("setrlimit(RLIMIT_MEMLOCK) failed: %s", ovs_strerror(errno)); } dev->use_need_wakeup = dev->requested_need_wakeup; + dev->startqid = dev->requested_startqid; err = xsk_configure_all(netdev); if (err) { @@ -1177,12 +1188,14 @@ netdev_afxdp_construct(struct netdev *netdev) /* Queues should not be used before the first reconfiguration. Clearing. */ netdev->n_rxq = 0; netdev->n_txq = 0; + dev->startqid = 0; dev->xdp_mode = OVS_AF_XDP_MODE_UNSPEC; dev->xdp_mode_in_use = OVS_AF_XDP_MODE_UNSPEC; dev->requested_n_rxq = NR_QUEUE; dev->requested_xdp_mode = OVS_AF_XDP_MODE_BEST_EFFORT; dev->requested_need_wakeup = NEED_WAKEUP_DEFAULT; + dev->requested_startqid = 0; dev->xsks = NULL; dev->tx_locks = NULL; diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h index c7c515f70700..242ce1659614 100644 --- a/lib/netdev-linux-private.h +++ b/lib/netdev-linux-private.h @@ -109,6 +109,7 @@ struct netdev_linux { /* AF_XDP information. */ struct xsk_socket_info **xsks; int requested_n_rxq; + int startqid, requested_startqid; enum afxdp_mode xdp_mode; /* Configured AF_XDP mode. */ enum afxdp_mode requested_xdp_mode; /* Requested AF_XDP mode. */ diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index a2ad84edefa9..47e14ba67c3a 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -3290,6 +3290,14 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ </p> </column> + <column name="options" key="start-qid" + type='{"type": "integer", "minInteger": 0, "maxInteger": 32}'> + <p> + Specifies the starting XDP socket's queue id. + Defaults to 0. + </p> + </column> + <column name="options" key="vhost-server-path" type='{"type": "string"}'> <p> -- 2.7.4 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev