Hi. One comment for the patch names. It's not a rule, but it's better to make the <summary> part of the subject a complete sentence. i.e. start with a capital letter and end with a period.
Same rule is applicable to the comments in the code, but this is documented in coding-style. More comments inline. Best regards, Ilya Maximets. On 16.04.2019 12:45, David Marchand wrote: > We currently poll all available queues based on the max queue count > exchanged with the vhost peer and rely on the vhost library in DPDK to > check the vring status beneath. > This can lead to some overhead when we have a lot of unused queues. > > To enhance the situation, we can skip the disabled queues. > On rxq notifications, we make use of the netdev's change_seq number so > that the pmd thread main loop can cache the queue state periodically. > > $ ovs-appctl dpif-netdev/pmd-rxq-show > pmd thread numa_id 0 core_id 1: > isolated : true > port: dpdk0 queue-id: 0 pmd usage: 0 % > pmd thread numa_id 0 core_id 2: > isolated : true > port: vhost1 queue-id: 0 pmd usage: 0 % > port: vhost3 queue-id: 0 pmd usage: 0 % > pmd thread numa_id 0 core_id 15: > isolated : true > port: dpdk1 queue-id: 0 pmd usage: 0 % > pmd thread numa_id 0 core_id 16: > isolated : true > port: vhost0 queue-id: 0 pmd usage: 0 % > port: vhost2 queue-id: 0 pmd usage: 0 % > > $ while true; do > ovs-appctl dpif-netdev/pmd-rxq-show |awk ' > /port: / { > tot++; > if ($NF == "disabled") { > dis++; > } > } > END { > print "total: " tot ", enabled: " (tot - dis) > }' > sleep 1 > done > > total: 6, enabled: 2 > total: 6, enabled: 2 > ... > > # Started vm, virtio devices are bound to kernel driver which enables > # F_MQ + all queue pairs > total: 6, enabled: 2 > total: 66, enabled: 66 > ... > > # Unbound vhost0 and vhost1 from the kernel driver > total: 66, enabled: 66 > total: 66, enabled: 34 > ... > > # Configured kernel bound devices to use only 1 queue pair > total: 66, enabled: 34 > total: 66, enabled: 19 > total: 66, enabled: 4 > ... > > # While rebooting the vm > total: 66, enabled: 4 > total: 66, enabled: 2 > ... > total: 66, enabled: 66 > ... > > # After shutting down the vm > total: 66, enabled: 66 > total: 66, enabled: 2 > > Signed-off-by: David Marchand <david.march...@redhat.com> > --- > > Changes since v1: > - only indicate disabled queues in dpif-netdev/pmd-rxq-show output > - Ilya comments > - no need for a struct as we only need a boolean per rxq > - "rx_q" is generic, while we only care for this in vhost case, > renamed as "vhost_rxq_enabled", > - add missing rte_free on allocation error, > - vhost_rxq_enabled is freed in vhost destruct only, > - rxq0 is enabled at the virtio device activation to accomodate > legacy implementations which would not report per queue states > later, > - do not mix boolean with integer, > - do not use bit operand on boolean, > > --- > lib/dpif-netdev.c | 27 ++++++++++++++++++++++++ > lib/netdev-dpdk.c | 58 > +++++++++++++++++++++++++++++++++++++++------------ > lib/netdev-provider.h | 5 +++++ > lib/netdev.c | 10 +++++++++ > lib/netdev.h | 1 + > 5 files changed, 88 insertions(+), 13 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 4d6d0c3..5bfa6ad 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -591,6 +591,8 @@ struct polled_queue { > struct dp_netdev_rxq *rxq; > odp_port_t port_no; > bool emc_enabled; > + bool enabled; What do you think about renaming to 'rxq_enabled'? It seems more clear. Having both 'emc_enabled' and just 'enabled' is a bit confusing. > + uint64_t change_seq; > }; > > /* Contained by struct dp_netdev_pmd_thread's 'poll_list' member. */ > @@ -1171,6 +1173,9 @@ pmd_info_show_rxq(struct ds *reply, struct > dp_netdev_pmd_thread *pmd) > } else { > ds_put_format(reply, "%s", "NOT AVAIL"); > } > + if (!netdev_rxq_enabled(list[i].rxq->rx)) { > + ds_put_cstr(reply, " polling: disabled"); > + } > ds_put_cstr(reply, "\n"); > } > ovs_mutex_unlock(&pmd->port_mutex); > @@ -5198,6 +5203,11 @@ dpif_netdev_run(struct dpif *dpif) > } > > for (i = 0; i < port->n_rxq; i++) { > + > + if (!netdev_rxq_enabled(port->rxqs[i].rx)) { > + continue; > + } > + > if (dp_netdev_process_rxq_port(non_pmd, > &port->rxqs[i], > port->port_no)) { > @@ -5371,6 +5381,9 @@ pmd_load_queues_and_ports(struct dp_netdev_pmd_thread > *pmd, > poll_list[i].rxq = poll->rxq; > poll_list[i].port_no = poll->rxq->port->port_no; > poll_list[i].emc_enabled = poll->rxq->port->emc_enabled; > + poll_list[i].enabled = netdev_rxq_enabled(poll->rxq->rx); > + poll_list[i].change_seq = > + netdev_get_change_seq(poll->rxq->port->netdev); > i++; > } > > @@ -5436,6 +5449,10 @@ reload: > > for (i = 0; i < poll_cnt; i++) { > > + if (!poll_list[i].enabled) { > + continue; > + } > + > if (poll_list[i].emc_enabled) { > atomic_read_relaxed(&pmd->dp->emc_insert_min, > &pmd->ctx.emc_insert_min); > @@ -5472,6 +5489,16 @@ reload: > if (reload) { > break; > } > + > + for (i = 0; i < poll_cnt; i++) { > + uint64_t current_seq = > + > netdev_get_change_seq(poll_list[i].rxq->port->netdev); > + if (poll_list[i].change_seq != current_seq) { > + poll_list[i].change_seq = current_seq; > + poll_list[i].enabled = > + netdev_rxq_enabled(poll_list[i].rxq->rx); > + } > + } > } > pmd_perf_end_iteration(s, rx_packets, tx_packets, > pmd_perf_metrics_enabled(pmd)); > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 47153dc..9ba8e67 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -424,6 +424,9 @@ struct netdev_dpdk { > OVSRCU_TYPE(struct ingress_policer *) ingress_policer; > uint32_t policer_rate; > uint32_t policer_burst; > + > + /* Array of vhost rxq states, see vring_state_changed */ Should end with a period. > + bool *vhost_rxq_enabled; > ); > > PADDED_MEMBERS(CACHE_LINE_SIZE, > @@ -1235,8 +1238,14 @@ vhost_common_construct(struct netdev *netdev) > int socket_id = rte_lcore_to_socket_id(rte_get_master_lcore()); > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > > + dev->vhost_rxq_enabled = dpdk_rte_mzalloc(OVS_VHOST_MAX_QUEUE_NUM * > + sizeof > *dev->vhost_rxq_enabled); > + if (!dev->vhost_rxq_enabled) { > + return ENOMEM; > + } > dev->tx_q = netdev_dpdk_alloc_txq(OVS_VHOST_MAX_QUEUE_NUM); > if (!dev->tx_q) { > + rte_free(dev->vhost_rxq_enabled); > return ENOMEM; > } > > @@ -1448,6 +1457,7 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev) > dev->vhost_id = NULL; > > common_destruct(dev); > + rte_free(dev->vhost_rxq_enabled); Logically, 'common_destruct' should go after class-specific things. > > ovs_mutex_unlock(&dpdk_mutex); > > @@ -2200,6 +2210,14 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, > return 0; > } > > +static bool > +netdev_dpdk_vhost_rxq_enabled(struct netdev_rxq *rxq) > +{ > + struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev); > + > + return dev->vhost_rxq_enabled[rxq->queue_id]; > +} > + > static int > netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch, > int *qfill) > @@ -3563,6 +3581,8 @@ destroy_device(int vid) > ovs_mutex_lock(&dev->mutex); > dev->vhost_reconfigured = false; > ovsrcu_index_set(&dev->vid, -1); > + memset(dev->vhost_rxq_enabled, 0, > + OVS_VHOST_MAX_QUEUE_NUM * sizeof *dev->vhost_rxq_enabled); We need to clear only first 'dev->up.n_rxq' queues. > netdev_dpdk_txq_map_clear(dev); > > netdev_change_seq_changed(&dev->up); > @@ -3597,24 +3617,30 @@ vring_state_changed(int vid, uint16_t queue_id, int > enable) > struct netdev_dpdk *dev; > bool exists = false; > int qid = queue_id / VIRTIO_QNUM; > + bool is_rx = (queue_id % VIRTIO_QNUM) == VIRTIO_TXQ; > char ifname[IF_NAME_SZ]; > > rte_vhost_get_ifname(vid, ifname, sizeof ifname); > > - if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) { > - return 0; > - } > - > ovs_mutex_lock(&dpdk_mutex); > LIST_FOR_EACH (dev, list_node, &dpdk_list) { > ovs_mutex_lock(&dev->mutex); > if (nullable_string_is_equal(ifname, dev->vhost_id)) { > - if (enable) { > - dev->tx_q[qid].map = qid; > + if (is_rx) { > + bool enabled = dev->vhost_rxq_enabled[qid]; This is also confusing to have 'enable' and 'enabled' in a same scope. What do you think about renaming 'enabled' --> 'old_state'? > + > + dev->vhost_rxq_enabled[qid] = enable != 0; > + if (enabled != dev->vhost_rxq_enabled[qid]) { > + netdev_change_seq_changed(&dev->up); > + } > } else { > - dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED; > + if (enable) { > + dev->tx_q[qid].map = qid; > + } else { > + dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED; > + } > + netdev_dpdk_remap_txqs(dev); > } > - netdev_dpdk_remap_txqs(dev); > exists = true; > ovs_mutex_unlock(&dev->mutex); > break; > @@ -3624,9 +3650,9 @@ vring_state_changed(int vid, uint16_t queue_id, int > enable) > ovs_mutex_unlock(&dpdk_mutex); > > if (exists) { > - VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' " > - "changed to \'%s\'", queue_id, qid, ifname, > - (enable == 1) ? "enabled" : "disabled"); > + VLOG_INFO("State of queue %d ( %s_qid %d ) of vhost device '%s' " > + "changed to \'%s\'", queue_id, is_rx == true ? "rx" : "tx", > + qid, ifname, (enable == 1) ? "enabled" : "disabled"); > } else { > VLOG_INFO("vHost Device '%s' not found", ifname); > return -1; > @@ -4085,6 +4112,10 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev) > dev->up.n_rxq = dev->requested_n_rxq; > int err; > > + /* Always keep RX queue 0 enabled for implementations that won't > + * report vring states. */ > + dev->vhost_rxq_enabled[0] = true; > + > /* Enable TX queue 0 by default if it wasn't disabled. */ > if (dev->tx_q[0].map == OVS_VHOST_QUEUE_MAP_UNKNOWN) { > dev->tx_q[0].map = 0; > @@ -4297,7 +4327,8 @@ static const struct netdev_class dpdk_vhost_class = { > .get_stats = netdev_dpdk_vhost_get_stats, > .get_status = netdev_dpdk_vhost_user_get_status, > .reconfigure = netdev_dpdk_vhost_reconfigure, > - .rxq_recv = netdev_dpdk_vhost_rxq_recv > + .rxq_recv = netdev_dpdk_vhost_rxq_recv, > + .rxq_enabled = netdev_dpdk_vhost_rxq_enabled, > }; > > static const struct netdev_class dpdk_vhost_client_class = { > @@ -4311,7 +4342,8 @@ static const struct netdev_class > dpdk_vhost_client_class = { > .get_stats = netdev_dpdk_vhost_get_stats, > .get_status = netdev_dpdk_vhost_user_get_status, > .reconfigure = netdev_dpdk_vhost_client_reconfigure, > - .rxq_recv = netdev_dpdk_vhost_rxq_recv > + .rxq_recv = netdev_dpdk_vhost_rxq_recv, > + .rxq_enabled = netdev_dpdk_vhost_rxq_enabled, > }; > > void > diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h > index fb0c27e..5faae0d 100644 > --- a/lib/netdev-provider.h > +++ b/lib/netdev-provider.h > @@ -789,6 +789,11 @@ struct netdev_class { > void (*rxq_destruct)(struct netdev_rxq *); > void (*rxq_dealloc)(struct netdev_rxq *); > > + /* A netdev can report if a queue won't get traffic and should be > excluded > + * from polling (no callback implicitely means that the queue is > enabled). > + */ I'm suggesting following variant of this comment to be similar to other function descriptions here: /* Retrieves the current state of rx queue. 'False' means that queue won't * get traffic in a short term and could be not polled. * * This function may be set to null if it would always return 'True' * anyhow. */ > + bool (*rxq_enabled)(struct netdev_rxq *); > + > /* Attempts to receive a batch of packets from 'rx'. In 'batch', the > * caller supplies 'packets' as the pointer to the beginning of an array > * of NETDEV_MAX_BURST pointers to dp_packet. If successful, the > diff --git a/lib/netdev.c b/lib/netdev.c > index 7d7ecf6..03a1b24 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -682,6 +682,16 @@ netdev_rxq_close(struct netdev_rxq *rx) > } > } > > +bool netdev_rxq_enabled(struct netdev_rxq *rx) > +{ > + bool enabled = true; > + > + if (rx->netdev->netdev_class->rxq_enabled) { > + enabled = rx->netdev->netdev_class->rxq_enabled(rx); > + } > + return enabled; > +} > + > /* Attempts to receive a batch of packets from 'rx'. 'batch' should point to > * the beginning of an array of NETDEV_MAX_BURST pointers to dp_packet. If > * successful, this function stores pointers to up to NETDEV_MAX_BURST > diff --git a/lib/netdev.h b/lib/netdev.h > index d94817f..bfcdf39 100644 > --- a/lib/netdev.h > +++ b/lib/netdev.h > @@ -183,6 +183,7 @@ enum netdev_pt_mode netdev_get_pt_mode(const struct > netdev *); > /* Packet reception. */ > int netdev_rxq_open(struct netdev *, struct netdev_rxq **, int id); > void netdev_rxq_close(struct netdev_rxq *); > +bool netdev_rxq_enabled(struct netdev_rxq *); > > const char *netdev_rxq_get_name(const struct netdev_rxq *); > int netdev_rxq_get_queue_id(const struct netdev_rxq *); > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev