Hi team ,

I have addressed all the comments . Please review .

Regards ,
Dheeraj

-----Original Message-----
From: Ilya Maximets <i.maxim...@ovn.org> 
Sent: 22 December 2022 02:41
To: Dheeraj Kumar <dheera...@acldigital.com>; ovs-dev@openvswitch.org
Cc: i.maxim...@ovn.org; Kevin Traynor <ktray...@redhat.com>
Subject: Re: [ovs-dev] [PATCH] dpif-netdev: Optimize flushing of output packet 
buffers

On 11/18/22 11:40, Dheeraj Kumar via dev wrote:
> Problem Statement:
> Before OVS 2.12 the OVS-DPDK datapath transmitted processed rx packet 
> batches directly to the wanted tx queues. In OVS 2.12 each PMD stores 
> the processed packets in an intermediate buffer per output port and 
> flushes these output buffers in a separate step. This buffering was 
> introduced to allow better batching of packets for transmit.
> 
> The current implementation of the function that flushes the output 
> buffers performs a full scan overall output ports, even if only one 
> single packet was buffered for a single output port. In systems with 
> hundreds of ports this can take a long time and degrades OVS-DPDK performance 
> significantly.
> 
> Solution:
> Maintain a list of output ports with buffered packets for each PMD 
> thread and only iterate over that list when flushing output buffers.
> 
> Signed-off-by: Dheeraj Kumar <dheera...@acldigital.com>

Hi, Dheeraj.  Thanks for the follow up and sorry for delay.

Please, add the version number to the subject prefix while sending new versions 
of patches, e.g. '[PATCH v2]'.

I didn't test it, but the patch looks good to me in general, see some minor 
comments inline.

Best regards, Ilya Maximets.

> ---
>  lib/dpif-netdev-private-thread.h |  7 ++++---
>  lib/dpif-netdev.c                | 23 +++++++++++------------
>  2 files changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/dpif-netdev-private-thread.h 
> b/lib/dpif-netdev-private-thread.h
> index 4472b199d..2775e1a2b 100644
> --- a/lib/dpif-netdev-private-thread.h
> +++ b/lib/dpif-netdev-private-thread.h
> @@ -185,9 +185,6 @@ struct dp_netdev_pmd_thread {
>       * than 'cmap_count(dp->poll_threads)'. */
>      uint32_t static_tx_qid;
>  
> -    /* Number of filled output batches. */
> -    int n_output_batches;
> -
>      struct ovs_mutex port_mutex;    /* Mutex for 'poll_list' and 'tx_ports'. 
> */
>      /* List of rx queues to poll. */
>      struct hmap poll_list OVS_GUARDED; @@ -213,6 +210,10 @@ struct 
> dp_netdev_pmd_thread {
>      struct hmap tnl_port_cache;
>      struct hmap send_port_cache;
>  
> +    /* Keep track of the ports with buffered output packets in
> +     * send_port_cache. */
> +    struct ovs_list pending_tx_ports;
> +
>      /* Keep track of detailed PMD performance statistics. */
>      struct pmd_perf_stats perf_stats;
>  
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 
> a45b46014..af0e0cc5d 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -500,6 +500,7 @@ struct tx_port {
>      int qid;
>      long long last_used;
>      struct hmap_node node;
> +    struct ovs_list pending_tx;     /* Only used in send_port_cache. */

The comment seems incorrect.  It should be 'pending_tx_ports'
instead of 'send_port_cache'.  It's also better to specify the structure that 
will reference this node and add a 'node'
to the name to highlight that it's not a standalone list.  E.g.:

    struct ovs_list pending_tx_node;  /* In dp_netdev_pmd_thread's
                                       * 'pending_tx_ports'. */

>      long long flush_time;
>      struct dp_packet_batch output_pkts;
>      struct dp_packet_batch *txq_pkts; /* Only for hash mode. */ @@ 
> -5241,8 +5242,9 @@ dp_netdev_pmd_flush_output_on_port(struct 
> dp_netdev_pmd_thread *pmd,
>      atomic_read_relaxed(&pmd->dp->tx_flush_interval, &tx_flush_interval);
>      p->flush_time = pmd->ctx.now + tx_flush_interval;
>  
> -    ovs_assert(pmd->n_output_batches > 0);
> -    pmd->n_output_batches--;
> +    /* Remove send port from pending port list */
> +    ovs_assert(!ovs_list_is_empty(&p->pending_tx));
> +    ovs_list_remove(&p->pending_tx);

We should re-init the node here.

>  
>      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_PKTS, 
> output_cnt);
>      pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_BATCHES, 
> 1); @@ -5264,16 +5266,11 @@ static int  
> dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd,
>                                     bool force)  {
> -    struct tx_port *p;
> +    struct tx_port *p, *next;

SAFE iterators don't need a 'next' pointer these days, you should be able to 
just drop it.

>      int output_cnt = 0;
>  
> -    if (!pmd->n_output_batches) {
> -        return 0;
> -    }
> -
> -    HMAP_FOR_EACH (p, node, &pmd->send_port_cache) {
> -        if (!dp_packet_batch_is_empty(&p->output_pkts)
> -            && (force || pmd->ctx.now >= p->flush_time)) {
> +    LIST_FOR_EACH_SAFE (p, next, pending_tx, &pmd->pending_tx_ports) {
> +        if (force || pmd->ctx.now >= p->flush_time) {
>              output_cnt += dp_netdev_pmd_flush_output_on_port(pmd, p);
>          }
>      }
> @@ -6803,6 +6800,7 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
>                                            n_txq * sizeof *tx_port->txq_pkts);
>                  tx_port_cached->txq_pkts = txq_pkts_cached;
>              }
> +            ovs_list_init(&tx_port_cached->pending_tx);
>              hmap_insert(&pmd->send_port_cache, &tx_port_cached->node,
>                          hash_port_no(tx_port_cached->port->port_no));
>          }
> @@ -7447,7 +7445,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread 
> *pmd, struct dp_netdev *dp,
>      pmd->core_id = core_id;
>      pmd->numa_id = numa_id;
>      pmd->need_reload = false;
> -    pmd->n_output_batches = 0;
>  
>      ovs_refcount_init(&pmd->ref_cnt);
>      atomic_init(&pmd->exit, false);
> @@ -7474,6 +7471,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread 
> *pmd, struct dp_netdev *dp,
>      hmap_init(&pmd->tnl_port_cache);
>      hmap_init(&pmd->send_port_cache);
>      cmap_init(&pmd->tx_bonds);
> +    ovs_list_init(&pmd->pending_tx_ports);
>  
>      /* Initialize DPIF function pointer to the default configured version. */
>      atomic_init(&pmd->netdev_input_func, 
> dp_netdev_impl_get_default()); @@ -7498,6 +7496,7 @@ 
> dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
>      struct dpcls *cls;
>  
>      dp_netdev_pmd_flow_flush(pmd);
> +    ovs_list_poison(&pmd->pending_tx_ports);

Not sure if it makes sense to poison the list here.

>      hmap_destroy(&pmd->send_port_cache);
>      hmap_destroy(&pmd->tnl_port_cache);
>      hmap_destroy(&pmd->tx_ports);
> @@ -8714,7 +8713,7 @@ dp_execute_output_action(struct dp_netdev_pmd_thread 
> *pmd,
>          dp_netdev_pmd_flush_output_on_port(pmd, p);
>      }
>      if (dp_packet_batch_is_empty(&p->output_pkts)) {

We didn't have a direct way to check before, so we used the number of buffered 
packets as an indicator that this port is already counted.  Now we can and 
probably should just check for the list node being empty.  This will work if we 
will re-initialize it after the flush.

> -        pmd->n_output_batches++;
> +        ovs_list_push_front(&pmd->pending_tx_ports, &p->pending_tx);
>      }
>  
>      struct dp_packet *packet;

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to