Tunnel devices have 0 txqs and don't support netdev_send(). While netdev_send() simply returns EOPNOTSUPP, the XPS logic is still executed on output, and that might be confused by devices with no txqs.
It seems better to have different structures in the fast path for ports that support netdev_{push,pop}_header (tunnel devices), and ports that support netdev_send. With this we can also remove a branch in netdev_send(). This is also necessary for a future commit, which starts DPDK devices without txqs. Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com> --- lib/dpif-netdev.c | 73 +++++++++++++++++++++++++++++++++++++++---------------- lib/netdev.c | 35 ++++++++++++++------------ lib/netdev.h | 1 + 3 files changed, 73 insertions(+), 36 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index f600cab00..004b28dc8 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -422,7 +422,8 @@ struct rxq_poll { struct ovs_list node; }; -/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or 'tx_ports'. */ +/* Contained by struct dp_netdev_pmd_thread's 'send_port_cache', + * 'tnl_port_cache' or 'tx_ports'. */ struct tx_port { struct dp_netdev_port *port; int qid; @@ -504,11 +505,18 @@ struct dp_netdev_pmd_thread { * read by the pmd thread. */ struct hmap tx_ports OVS_GUARDED; - /* Map of 'tx_port' used in the fast path. This is a thread-local copy of - * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be accessed - * by multiple threads, and thusly need to be protected by 'non_pmd_mutex'. - * Every other instance will only be accessed by its own pmd thread. */ - struct hmap port_cache; + /* These are thread-local copies of 'tx_ports'. One contains only tunnel + * ports (that support push_tunnel/pop_tunnel), the other contains ports + * with at least one txq (that support send). A port can be in both. + * + * There are two separate maps to make sure that we don't try to execute + * OUTPUT on a device which has 0 txqs or PUSH/POP on a non-tunnel device. + * + * The instances for cpu core NON_PMD_CORE_ID can be accessed by multiple + * threads, and thusly need to be protected by 'non_pmd_mutex'. Every + * other instance will only be accessed by its own pmd thread. */ + struct hmap tnl_port_cache; + struct hmap send_port_cache; /* Only a pmd thread can write on its own 'cycles' and 'stats'. * The main thread keeps 'stats_zero' and 'cycles_zero' as base @@ -3058,7 +3066,10 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd) /* Free all used tx queue ids. */ dpif_netdev_xps_revalidate_pmd(pmd, 0, true); - HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->port_cache) { + HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->tnl_port_cache) { + free(tx_port_cached); + } + HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->send_port_cache) { free(tx_port_cached); } } @@ -3072,12 +3083,21 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd) struct tx_port *tx_port, *tx_port_cached; pmd_free_cached_ports(pmd); - hmap_shrink(&pmd->port_cache); + hmap_shrink(&pmd->send_port_cache); + hmap_shrink(&pmd->tnl_port_cache); HMAP_FOR_EACH (tx_port, node, &pmd->tx_ports) { - tx_port_cached = xmemdup(tx_port, sizeof *tx_port_cached); - hmap_insert(&pmd->port_cache, &tx_port_cached->node, - hash_port_no(tx_port_cached->port->port_no)); + if (netdev_has_tunnel_push_pop(tx_port->port->netdev)) { + tx_port_cached = xmemdup(tx_port, sizeof *tx_port_cached); + hmap_insert(&pmd->tnl_port_cache, &tx_port_cached->node, + hash_port_no(tx_port_cached->port->port_no)); + } + + if (netdev_n_txq(tx_port->port->netdev)) { + tx_port_cached = xmemdup(tx_port, sizeof *tx_port_cached); + hmap_insert(&pmd->send_port_cache, &tx_port_cached->node, + hash_port_no(tx_port_cached->port->port_no)); + } } } @@ -3312,7 +3332,8 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, pmd->next_optimization = time_msec() + DPCLS_OPTIMIZATION_INTERVAL; ovs_list_init(&pmd->poll_list); hmap_init(&pmd->tx_ports); - hmap_init(&pmd->port_cache); + hmap_init(&pmd->tnl_port_cache); + hmap_init(&pmd->send_port_cache); /* init the 'flow_cache' since there is no * actual thread created for NON_PMD_CORE_ID. */ if (core_id == NON_PMD_CORE_ID) { @@ -3328,7 +3349,8 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) struct dpcls *cls; dp_netdev_pmd_flow_flush(pmd); - hmap_destroy(&pmd->port_cache); + hmap_destroy(&pmd->send_port_cache); + hmap_destroy(&pmd->tnl_port_cache); hmap_destroy(&pmd->tx_ports); /* All flows (including their dpcls_rules) have been deleted already */ CMAP_FOR_EACH (cls, node, &pmd->classifiers) { @@ -3595,7 +3617,9 @@ static void dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_port *port) { - struct tx_port *tx = xzalloc(sizeof *tx); + struct tx_port *tx; + + tx = xzalloc(sizeof *tx); tx->port = port; tx->qid = -1; @@ -4283,7 +4307,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd, struct dp_netdev_port *port; long long interval; - HMAP_FOR_EACH (tx, node, &pmd->port_cache) { + HMAP_FOR_EACH (tx, node, &pmd->send_port_cache) { if (!tx->port->dynamic_txqs) { continue; } @@ -4347,10 +4371,17 @@ dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd, } static struct tx_port * -pmd_tx_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd, - odp_port_t port_no) +pmd_tnl_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd, + odp_port_t port_no) +{ + return tx_port_lookup(&pmd->tnl_port_cache, port_no); +} + +static struct tx_port * +pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd, + odp_port_t port_no) { - return tx_port_lookup(&pmd->port_cache, port_no); + return tx_port_lookup(&pmd->send_port_cache, port_no); } static int @@ -4364,7 +4395,7 @@ push_tnl_action(const struct dp_netdev_pmd_thread *pmd, data = nl_attr_get(attr); - tun_port = pmd_tx_port_cache_lookup(pmd, u32_to_odp(data->tnl_port)); + tun_port = pmd_tnl_port_cache_lookup(pmd, u32_to_odp(data->tnl_port)); if (!tun_port) { err = -EINVAL; goto error; @@ -4416,7 +4447,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, switch ((enum ovs_action_attr)type) { case OVS_ACTION_ATTR_OUTPUT: - p = pmd_tx_port_cache_lookup(pmd, nl_attr_get_odp_port(a)); + p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a)); if (OVS_LIKELY(p)) { int tx_qid; bool dynamic_txqs; @@ -4463,7 +4494,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, struct dp_packet_batch *orig_packets_ = packets_; odp_port_t portno = nl_attr_get_odp_port(a); - p = pmd_tx_port_cache_lookup(pmd, portno); + p = pmd_tnl_port_cache_lookup(pmd, portno); if (p) { struct dp_packet_batch tnl_pkt; int i; diff --git a/lib/netdev.c b/lib/netdev.c index f7a1001f2..100af0359 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -117,6 +117,13 @@ netdev_is_pmd(const struct netdev *netdev) return netdev->netdev_class->is_pmd; } +bool +netdev_has_tunnel_push_pop(const struct netdev *netdev) +{ + return netdev->netdev_class->push_header + && netdev->netdev_class->pop_header; +} + static void netdev_initialize(void) OVS_EXCLUDED(netdev_mutex) @@ -686,6 +693,9 @@ netdev_set_tx_multiq(struct netdev *netdev, unsigned int n_txq) * if a partial packet was transmitted or if a packet is too big or too small * to transmit on the device. * + * The caller must make sure that 'netdev' supports sending by making sure that + * 'netdev_n_txq(netdev)' returns >= 1. + * * If the function returns a non-zero value, some of the packets might have * been sent anyway. * @@ -710,11 +720,6 @@ int netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, bool may_steal, bool concurrent_txq) { - if (!netdev->netdev_class->send) { - dp_packet_delete_batch(batch, may_steal); - return EOPNOTSUPP; - } - int error = netdev->netdev_class->send(netdev, qid, batch, may_steal, concurrent_txq); if (!error) { @@ -726,18 +731,17 @@ netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, return error; } +/* Pop tunnel header, build tunnel metadata and resize 'batch->packets' + * for further processing. + * + * The caller must make sure that 'netdev' support this operation by checking + * that netdev_has_tunnel_push_pop() returns true. */ void netdev_pop_header(struct netdev *netdev, struct dp_packet_batch *batch) { int i, n_cnt = 0; struct dp_packet **buffers = batch->packets; - if (!netdev->netdev_class->pop_header) { - dp_packet_delete_batch(batch, true); - batch->count = 0; - return; - } - for (i = 0; i < batch->count; i++) { buffers[i] = netdev->netdev_class->pop_header(buffers[i]); if (buffers[i]) { @@ -775,6 +779,11 @@ int netdev_build_header(const struct netdev *netdev, return EOPNOTSUPP; } +/* Push tunnel header (reading from tunnel metadata) and resize + * 'batch->packets' for further processing. + * + * The caller must make sure that 'netdev' support this operation by checking + * that netdev_has_tunnel_push_pop() returns true. */ int netdev_push_header(const struct netdev *netdev, struct dp_packet_batch *batch, @@ -782,10 +791,6 @@ netdev_push_header(const struct netdev *netdev, { int i; - if (!netdev->netdev_class->push_header) { - return -EINVAL; - } - for (i = 0; i < batch->count; i++) { netdev->netdev_class->push_header(batch->packets[i], data); pkt_metadata_init(&batch->packets[i]->md, u32_to_odp(data->out_port)); diff --git a/lib/netdev.h b/lib/netdev.h index a667fe35f..bef9cddb5 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -109,6 +109,7 @@ bool netdev_is_reserved_name(const char *name); int netdev_n_txq(const struct netdev *netdev); int netdev_n_rxq(const struct netdev *netdev); bool netdev_is_pmd(const struct netdev *netdev); +bool netdev_has_tunnel_push_pop(const struct netdev *netdev); /* Open and close. */ int netdev_open(const char *name, const char *type, struct netdev **netdevp); -- 2.11.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev