> In dpdk_do_tx_copy function, all packets were copied to mbuf first, but
> QoS checking may drop some of them.
> Move the QoS checking in front of copying data to mbuf, it helps to reduce
> useless copy.

Hi Zhenyu, thanks for the patch, I agree with the objective of the patch but 
have some comments below I'd like to see addressed/tested before signing off.

> 
> Signed-off-by: Zhenyu Gao <sysugaozhe...@gmail.com>
> ---
>  lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++++++++--------------
> -----
>  1 file changed, 36 insertions(+), 19 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1aaf6f7..50874b8
> 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
>       * For all QoS implementations it should always be non-null.
>       */
>      int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
> -                   int pkt_cnt);
> +                   int pkt_cnt, bool may_steal);
>  };
> 
>  /* dpdk_qos_ops for each type of user space QoS implementation */ @@ -
> 1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
> *meter,
> 
>  static int
>  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> -                        struct rte_mbuf **pkts, int pkt_cnt)
> +                        struct rte_mbuf **pkts, int pkt_cnt,
> +                        bool may_steal)
>  {
>      int i = 0;
>      int cnt = 0;
> @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> *meter,
>              }
>              cnt++;
>          } else {
> -            rte_pktmbuf_free(pkt);
> +            if (may_steal) {
> +                rte_pktmbuf_free(pkt);
> +            }
>          }
>      }
> 
> @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> *meter,
> 
>  static int
>  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf
> **pkts,
> -                    int pkt_cnt)
> +                    int pkt_cnt, bool may_steal)
>  {
>      int cnt = 0;
> 
>      rte_spinlock_lock(&policer->policer_lock);
> -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
> +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> +                                  pkt_cnt, may_steal);
>      rte_spinlock_unlock(&policer->policer_lock);
> 
>      return cnt;
> @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>          dropped = nb_rx;
>          nb_rx = ingress_policer_run(policer,
>                                      (struct rte_mbuf **) batch->packets,
> -                                    nb_rx);
> +                                    nb_rx, true);
>          dropped -= nb_rx;
>      }
> 
> @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
> dp_packet_batch *batch)
>          dropped = nb_rx;
>          nb_rx = ingress_policer_run(policer,
>                                      (struct rte_mbuf **) batch->packets,
> -                                    nb_rx);
> +                                    nb_rx, true);
>          dropped -= nb_rx;
>      }
> 
> @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
> struct dp_packet_batch *batch)
> 
>  static inline int
>  netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
> -                    int cnt)
> +                    int cnt, bool may_steal)
>  {
>      struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev-
> >qos_conf);
> 
>      if (qos_conf) {
>          rte_spinlock_lock(&qos_conf->lock);
> -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
> +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
>          rte_spinlock_unlock(&qos_conf->lock);
>      }
> 
> @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
> 
>      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
>      /* Check has QoS has been configured for the netdev */
> -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
> +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
>      dropped = total_pkts - cnt;
> 
>      do {
> @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet_batch *batch)  #endif
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
> +    int txcnt_eth = batch->count;
>      int dropped = 0;
>      int newcnt = 0;
>      int i;
> 
> +    if (dev->type != DPDK_DEV_VHOST) {
> +        /* Check if QoS has been configured for this netdev. */
> +        txcnt_eth = netdev_dpdk_qos_run(dev,
> +                                        (struct rte_mbuf **)batch-
> >packets,
> +                                        txcnt_eth, false);

So dpdk_do_tx_copy will be called as part of netdev_dpdk_eth_send__ if its 
triggered by the following conditions

    if (OVS_UNLIKELY(!may_steal ||
                     batch->packets[0]->source != DPBUF_DPDK)

What will happen in above if the source of the packet is not DPBUF_DPDK?

For the most part it will be ok until 'netdev_dpdk_policer_pkt_handle()' is 
called later on.

This has specific DPDK calls that expect an mbuf source for the packet. 

Previously QoS took place after the packet had been copied so this was not an 
issue but now we have DPDK rte functions being called on a non mbuf source 
packet. I'm not sure of the behavior that could occur in this case. Could an 
incorrect length be returned for the call rte_pktmbuf_pkt_len(pkt) in 
'netdev_dpdk_policer_pkt_handle()'?

It could be the case then that non DPDK specific call maybe required to attain 
the length of the packet in 'netdev_dpdk_policer_pkt_handle()' before calling 
the meter itself.

Have you tested this use case?

Ian

> +        if (txcnt_eth == 0) {
> +            dropped = batch->count;
> +            goto out;
> +        }
> +    }
> +
>      dp_packet_batch_apply_cutlen(batch);
> 
>      for (i = 0; i < batch->count; i++) { @@ -1848,21 +1864,20 @@
> dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
> *batch)
>          rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
> 
>          newcnt++;
> +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
> +            dropped += batch->count - i - 1;
> +            break;
> +        }
>      }
> 
>      if (dev->type == DPDK_DEV_VHOST) {
>          __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
>                                   newcnt);
>      } else {
> -        unsigned int qos_pkts = newcnt;
> -
> -        /* Check if QoS has been configured for this netdev. */
> -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
> -
> -        dropped += qos_pkts - newcnt;
>          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
>      }
> 
> +out:
>      if (OVS_UNLIKELY(dropped)) {
>          rte_spinlock_lock(&dev->stats_lock);
>          dev->stats.tx_dropped += dropped; @@ -1915,7 +1930,7 @@
> netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>          dp_packet_batch_apply_cutlen(batch);
> 
>          cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
> -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
> +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
>          dropped = batch->count - cnt;
> 
>          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); @@ -
> 3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf
> *conf,  }
> 
>  static int
> -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> pkt_cnt)
> +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> pkt_cnt,
> +                   bool may_steal)
>  {
>      int cnt = 0;
>      struct egress_policer *policer =
>          CONTAINER_OF(conf, struct egress_policer, qos_conf);
> 
> -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
> +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> +                                  pkt_cnt, may_steal);
> 
>      return cnt;
>  }
> --
> 1.8.3.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to