RE: [PATCH v6 1/9] eal: annotate spinlock, rwlock and seqlock
> -Original Message- > From: David Marchand > Sent: Tuesday, February 7, 2023 6:45 PM > To: dev@dpdk.org > Cc: maxime.coque...@redhat.com; step...@networkplumber.org; Xia, Chenbo > ; Hu, Jiayu ; Wang, YuanX > ; Ding, Xuan ; > m...@smartsharesystems.com; Burakov, Anatoly ; > mattias.ronnblom ; David Christensen > ; Richardson, Bruce ; > Konstantin Ananyev > Subject: [PATCH v6 1/9] eal: annotate spinlock, rwlock and seqlock > > clang offers some thread safety checks, statically verifying that locks > are taken and released in the code. > To use those checks, the full code leading to taking or releasing locks > must be annotated with some attributes. > > Wrap those attributes into our own set of macros. > > rwlock, seqlock and the "normal" spinlock are instrumented. > > Those checks might be of interest out of DPDK, but it requires that the > including application locks are annotated. > On the other hand, applications out there might have been using > those same checks. > To be on the safe side, keep this instrumentation under a > RTE_ANNOTATE_LOCKS internal build flag. > > A component may en/disable this check by setting > annotate_locks = true/false in its meson.build. > > Note: > Doxygen preprocessor does not understand trailing function attributes > (this can be observed with the rte_seqlock.h header). > One would think that expanding the annotation macros to a noop in > rte_lock_annotations.h would be enough (since RTE_ANNOTATE_LOCKS is not > set during doxygen processing)). Unfortunately, the use of > EXPAND_ONLY_PREDEF defeats this. > > Removing EXPAND_ONLY_PREDEF entirely is not an option as it would expand > all other DPDK macros. > > The chosen solution is to expand the annotation macros explicitly to a > noop in PREDEFINED. > > Signed-off-by: David Marchand > Acked-by: Morten Brørup > Reviewed-by: Maxime Coquelin > --- > -- > 2.39.1 Acked-by: Chenbo Xia
RE: [PATCH v6 2/9] vhost: simplify need reply handling
> -Original Message- > From: David Marchand > Sent: Tuesday, February 7, 2023 6:45 PM > To: dev@dpdk.org > Cc: maxime.coque...@redhat.com; step...@networkplumber.org; Xia, Chenbo > ; Hu, Jiayu ; Wang, YuanX > ; Ding, Xuan ; > m...@smartsharesystems.com > Subject: [PATCH v6 2/9] vhost: simplify need reply handling > > Dedicate send_vhost_slave_message() helper to the case when no reply is > needed. > > Add a send_vhost_slave_message_process_reply() helper for the opposite. > This new helper merges both send_vhost_slave_message() and the code > previously in process_slave_message_reply(). > The slave_req_lock lock is then only handled in this helper which will > make lock checks trivial. > > Signed-off-by: David Marchand > Acked-by: Morten Brørup > Reviewed-by: Maxime Coquelin > --- > lib/vhost/vhost_user.c | 119 ++--- > 1 file changed, 51 insertions(+), 68 deletions(-) > > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c > index 8f33d5f4d9..60ec1bf5f6 100644 > --- a/lib/vhost/vhost_user.c > +++ b/lib/vhost/vhost_user.c > @@ -2878,18 +2878,46 @@ send_vhost_reply(struct virtio_net *dev, int > sockfd, struct vhu_msg_context *ctx > } > > static int > -send_vhost_slave_message(struct virtio_net *dev, > - struct vhu_msg_context *ctx) > +send_vhost_slave_message(struct virtio_net *dev, struct vhu_msg_context > *ctx) > +{ > + return send_vhost_message(dev, dev->slave_req_fd, ctx); > +} > + > +static int > +send_vhost_slave_message_process_reply(struct virtio_net *dev, struct > vhu_msg_context *ctx) > { > + struct vhu_msg_context msg_reply; > int ret; > > - if (ctx->msg.flags & VHOST_USER_NEED_REPLY) > - rte_spinlock_lock(&dev->slave_req_lock); > + rte_spinlock_lock(&dev->slave_req_lock); > + ret = send_vhost_slave_message(dev, ctx); > + if (ret < 0) { > + VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to send config > change (%d)\n", ret); > + goto out; > + } > > - ret = send_vhost_message(dev, dev->slave_req_fd, ctx); > - if (ret < 0 && (ctx->msg.flags & VHOST_USER_NEED_REPLY)) > - rte_spinlock_unlock(&dev->slave_req_lock); > + ret = read_vhost_message(dev, dev->slave_req_fd, &msg_reply); > + if (ret <= 0) { > + if (ret < 0) > + VHOST_LOG_CONFIG(dev->ifname, ERR, > + "vhost read slave message reply failed\n"); > + else > + VHOST_LOG_CONFIG(dev->ifname, INFO, "vhost peer > closed\n"); > + ret = -1; > + goto out; > + } > + > + if (msg_reply.msg.request.slave != ctx->msg.request.slave) { > + VHOST_LOG_CONFIG(dev->ifname, ERR, > + "received unexpected msg type (%u), expected %u\n", > + msg_reply.msg.request.slave, ctx->msg.request.slave); > + ret = -1; > + goto out; > + } > > + ret = msg_reply.msg.payload.u64 ? -1 : 0; > +out: > + rte_spinlock_unlock(&dev->slave_req_lock); > return ret; > } > > @@ -3213,42 +3241,6 @@ vhost_user_msg_handler(int vid, int fd) > return ret; > } > > -static int process_slave_message_reply(struct virtio_net *dev, > -const struct vhu_msg_context *ctx) > -{ > - struct vhu_msg_context msg_reply; > - int ret; > - > - if ((ctx->msg.flags & VHOST_USER_NEED_REPLY) == 0) > - return 0; > - > - ret = read_vhost_message(dev, dev->slave_req_fd, &msg_reply); > - if (ret <= 0) { > - if (ret < 0) > - VHOST_LOG_CONFIG(dev->ifname, ERR, > - "vhost read slave message reply failed\n"); > - else > - VHOST_LOG_CONFIG(dev->ifname, INFO, "vhost peer > closed\n"); > - ret = -1; > - goto out; > - } > - > - ret = 0; > - if (msg_reply.msg.request.slave != ctx->msg.request.slave) { > - VHOST_LOG_CONFIG(dev->ifname, ERR, > - "received unexpected msg type (%u), expected %u\n", > - msg_reply.msg.request.slave, ctx->msg.request.slave); > - ret = -1; > - goto out; > - } > - > - ret = msg_reply.msg.payload.u64 ? -1 : 0; > - > -out: > - rte_spinlock_unlock(&dev->slave_req_lock); > - return ret; > -} > - > int > vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm) > { > @@ -3277,10 +3269,9 @@ vhost_user_iotlb_miss(struct virtio_net *dev, > uint64_t iova, uint8_t perm) > return 0; > } > > -static int > -vhost_user_slave_config_change(struct virtio_net *dev, bool need_reply) > +int > +rte_vhost_slave_config_change(int vid, bool need_reply) > { > - int ret; > struct vhu_msg_context ctx = { > .msg = { > .request.slave = VHOST_USER_SLAVE_CONFIG_CHANGE_MSG, > @@ -3288,29
RE: [PATCH v6 3/9] vhost: terminate when access lock is not taken
> -Original Message- > From: David Marchand > Sent: Tuesday, February 7, 2023 6:45 PM > To: dev@dpdk.org > Cc: maxime.coque...@redhat.com; step...@networkplumber.org; Xia, Chenbo > ; Hu, Jiayu ; Wang, YuanX > ; Ding, Xuan ; > m...@smartsharesystems.com > Subject: [PATCH v6 3/9] vhost: terminate when access lock is not taken > > Be a bit more strict when a programmatic error is detected with regards to > the access_lock not being taken. > Mark the new helper with __rte_assert_exclusive_lock so that clang > understands where locks are expected to be taken. > > Signed-off-by: David Marchand > Acked-by: Morten Brørup > Reviewed-by: Maxime Coquelin > --- > lib/vhost/vhost.c | 18 +++--- > lib/vhost/vhost.h | 10 ++ > lib/vhost/virtio_net.c | 6 +- > 3 files changed, 14 insertions(+), 20 deletions(-) > > diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c > index 19c7b92c32..8cd727ca2f 100644 > --- a/lib/vhost/vhost.c > +++ b/lib/vhost/vhost.c > @@ -1781,11 +1781,7 @@ rte_vhost_async_channel_register_thread_unsafe(int > vid, uint16_t queue_id) > if (unlikely(vq == NULL || !dev->async_copy)) > return -1; > > - if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) { > - VHOST_LOG_CONFIG(dev->ifname, ERR, "%s() called without access > lock taken.\n", > - __func__); > - return -1; > - } > + vq_assert_lock(dev, vq); > > return async_channel_register(dev, vq); > } > @@ -1847,11 +1843,7 @@ > rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t > queue_id) > if (vq == NULL) > return -1; > > - if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) { > - VHOST_LOG_CONFIG(dev->ifname, ERR, "%s() called without access > lock taken.\n", > - __func__); > - return -1; > - } > + vq_assert_lock(dev, vq); > > if (!vq->async) > return 0; > @@ -1994,11 +1986,7 @@ rte_vhost_async_get_inflight_thread_unsafe(int vid, > uint16_t queue_id) > if (vq == NULL) > return ret; > > - if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) { > - VHOST_LOG_CONFIG(dev->ifname, ERR, "%s() called without access > lock taken.\n", > - __func__); > - return -1; > - } > + vq_assert_lock(dev, vq); > > if (!vq->async) > return ret; > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h > index 1f913803f6..82fe9b5fda 100644 > --- a/lib/vhost/vhost.h > +++ b/lib/vhost/vhost.h > @@ -513,6 +513,16 @@ struct virtio_net { > struct rte_vhost_user_extern_ops extern_ops; > } __rte_cache_aligned; > > +static inline void > +vq_assert_lock__(struct virtio_net *dev, struct vhost_virtqueue *vq, > const char *func) > + __rte_assert_exclusive_lock(&vq->access_lock) > +{ > + if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) > + rte_panic("VHOST_CONFIG: (%s) %s() called without access lock > taken.\n", > + dev->ifname, func); > +} > +#define vq_assert_lock(dev, vq) vq_assert_lock__(dev, vq, __func__) > + > static __rte_always_inline bool > vq_is_packed(struct virtio_net *dev) > { > diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c > index aac7aa9d01..cc9675ebe5 100644 > --- a/lib/vhost/virtio_net.c > +++ b/lib/vhost/virtio_net.c > @@ -2367,11 +2367,7 @@ rte_vhost_clear_queue_thread_unsafe(int vid, > uint16_t queue_id, > > vq = dev->virtqueue[queue_id]; > > - if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) { > - VHOST_LOG_DATA(dev->ifname, ERR, "%s() called without access > lock taken.\n", > - __func__); > - return -1; > - } > + vq_assert_lock(dev, vq); > > if (unlikely(!vq->async)) { > VHOST_LOG_DATA(dev->ifname, ERR, > -- > 2.39.1 Reviewed-by: Chenbo Xia
RE: [PATCH v6] ethdev: add flow rule group description
Hi Rongwei, > -Original Message- > From: Rongwei Liu > Sent: Thursday, 9 February 2023 9:33 > > Add more sentences to describe the group concepts > and define group 0 as root group for traffic to search a > hit rule. > > Signed-off-by: Rongwei Liu > --- > lib/ethdev/rte_flow.h | 12 +++- > 1 file changed, 11 insertions(+), 1 deletion(-) > > diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h > index b60987db4b..321cf06fbc 100644 > --- a/lib/ethdev/rte_flow.h > +++ b/lib/ethdev/rte_flow.h > @@ -86,7 +86,17 @@ extern "C" { > * but may be valid in a few cases. > */ > struct rte_flow_attr { > - uint32_t group; /**< Priority group. */ > + /** > + * A group is a superset of multiple rules. > + * The default group is 0 and is processed for all packets. > + * Rules in other groups are processed only if the group is chained > + * by a jump action from a previously matched rule. > + * It means the group hierarchy is made by the flow rules, > + * and the group 0 is the hierarchy root. > + * Note there is no automatic dead loop protection. > + * @see rte_flow_action_jump > + */ > + uint32_t group; > uint32_t priority; /**< Rule priority level within group. */ > /** >* The rule in question applies to ingress traffic (non-"transfer"). > -- > 2.27.0 Acked-by: Ori Kam Best, Ori
RE: [PATCH v6 4/9] vhost: annotate virtqueue access lock
> -Original Message- > From: David Marchand > Sent: Tuesday, February 7, 2023 6:45 PM > To: dev@dpdk.org > Cc: maxime.coque...@redhat.com; step...@networkplumber.org; Xia, Chenbo > ; Hu, Jiayu ; Wang, YuanX > ; Ding, Xuan ; > m...@smartsharesystems.com > Subject: [PATCH v6 4/9] vhost: annotate virtqueue access lock > > vhost_user_lock/unlock_all_queue_pairs must be waived since clang > annotations can't express taking a runtime number of locks. > > vhost_queue_stats_update() requirement can be expressed with a required > tag. > > Signed-off-by: David Marchand > Acked-by: Morten Brørup > Reviewed-by: Maxime Coquelin > --- > Changes since RFC v3: > - removed annotations needed for vhost async which went to the next > patch, > > --- > lib/vhost/vhost_user.c | 2 ++ > lib/vhost/virtio_net.c | 4 +--- > 2 files changed, 3 insertions(+), 3 deletions(-) > > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c > index 60ec1bf5f6..70d221b9f6 100644 > --- a/lib/vhost/vhost_user.c > +++ b/lib/vhost/vhost_user.c > @@ -2965,6 +2965,7 @@ vhost_user_check_and_alloc_queue_pair(struct > virtio_net *dev, > > static void > vhost_user_lock_all_queue_pairs(struct virtio_net *dev) > + __rte_no_thread_safety_analysis > { > unsigned int i = 0; > unsigned int vq_num = 0; > @@ -2982,6 +2983,7 @@ vhost_user_lock_all_queue_pairs(struct virtio_net > *dev) > > static void > vhost_user_unlock_all_queue_pairs(struct virtio_net *dev) > + __rte_no_thread_safety_analysis > { > unsigned int i = 0; > unsigned int vq_num = 0; > diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c > index cc9675ebe5..f2ab6dba15 100644 > --- a/lib/vhost/virtio_net.c > +++ b/lib/vhost/virtio_net.c > @@ -52,12 +52,10 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, > uint32_t nr_vring) > return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; > } > > -/* > - * This function must be called with virtqueue's access_lock taken. > - */ > static inline void > vhost_queue_stats_update(struct virtio_net *dev, struct vhost_virtqueue > *vq, > struct rte_mbuf **pkts, uint16_t count) > + __rte_exclusive_locks_required(&vq->access_lock) > { > struct virtqueue_stats *stats = &vq->stats; > int i; > -- > 2.39.1 Reviewed-by: Chenbo Xia
RE: [PATCH v6 5/9] vhost: annotate async accesses
> -Original Message- > From: David Marchand > Sent: Tuesday, February 7, 2023 6:45 PM > To: dev@dpdk.org > Cc: maxime.coque...@redhat.com; step...@networkplumber.org; Xia, Chenbo > ; Hu, Jiayu ; Wang, YuanX > ; Ding, Xuan ; > m...@smartsharesystems.com > Subject: [PATCH v6 5/9] vhost: annotate async accesses > > vq->async is initialised and must be accessed under vq->access_lock. > > Signed-off-by: David Marchand > Acked-by: Morten Brørup > Reviewed-by: Maxime Coquelin > --- > Changes since v5: > - rebased after packed support was added to async code, > > Changes since RFC v3: > - rebased, > - fixed annotations vq->access_lock -> &vq->access_lock, > - reworked free_vq, > > --- > lib/vhost/vhost.c | 4 > lib/vhost/vhost.h | 2 +- > lib/vhost/vhost_user.c | 10 +++--- > lib/vhost/virtio_net.c | 41 + > 4 files changed, 53 insertions(+), 4 deletions(-) > > diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c > index 8cd727ca2f..8bccdd8584 100644 > --- a/lib/vhost/vhost.c > +++ b/lib/vhost/vhost.c > @@ -369,6 +369,7 @@ cleanup_device(struct virtio_net *dev, int destroy) > > static void > vhost_free_async_mem(struct vhost_virtqueue *vq) > + __rte_exclusive_locks_required(&vq->access_lock) > { > if (!vq->async) > return; > @@ -393,7 +394,9 @@ free_vq(struct virtio_net *dev, struct vhost_virtqueue > *vq) > else > rte_free(vq->shadow_used_split); > > + rte_spinlock_lock(&vq->access_lock); > vhost_free_async_mem(vq); > + rte_spinlock_unlock(&vq->access_lock); > rte_free(vq->batch_copy_elems); > vhost_user_iotlb_destroy(vq); > rte_free(vq->log_cache); > @@ -1669,6 +1672,7 @@ rte_vhost_extern_callback_register(int vid, > > static __rte_always_inline int > async_channel_register(struct virtio_net *dev, struct vhost_virtqueue *vq) > + __rte_exclusive_locks_required(&vq->access_lock) > { > struct vhost_async *async; > int node = vq->numa_node; > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h > index 82fe9b5fda..c05313cf37 100644 > --- a/lib/vhost/vhost.h > +++ b/lib/vhost/vhost.h > @@ -326,7 +326,7 @@ struct vhost_virtqueue { > struct rte_vhost_resubmit_info *resubmit_inflight; > uint64_tglobal_counter; > > - struct vhost_async *async; > + struct vhost_async *async __rte_guarded_var; > > int notif_enable; > #define VIRTIO_UNINITIALIZED_NOTIF (-1) > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c > index 70d221b9f6..8c1d60b76b 100644 > --- a/lib/vhost/vhost_user.c > +++ b/lib/vhost/vhost_user.c > @@ -2168,6 +2168,7 @@ vhost_user_set_vring_enable(struct virtio_net **pdev, > int main_fd __rte_unused) > { > struct virtio_net *dev = *pdev; > + struct vhost_virtqueue *vq; > bool enable = !!ctx->msg.payload.state.num; > int index = (int)ctx->msg.payload.state.index; > > @@ -2175,15 +2176,18 @@ vhost_user_set_vring_enable(struct virtio_net > **pdev, > "set queue enable: %d to qp idx: %d\n", > enable, index); > > - if (enable && dev->virtqueue[index]->async) { > - if (dev->virtqueue[index]->async->pkts_inflight_n) { > + vq = dev->virtqueue[index]; > + /* vhost_user_lock_all_queue_pairs locked all qps */ > + vq_assert_lock(dev, vq); > + if (enable && vq->async) { > + if (vq->async->pkts_inflight_n) { > VHOST_LOG_CONFIG(dev->ifname, ERR, > "failed to enable vring. Inflight packets must > be > completed first\n"); > return RTE_VHOST_MSG_RESULT_ERR; > } > } > > - dev->virtqueue[index]->enabled = enable; > + vq->enabled = enable; > > return RTE_VHOST_MSG_RESULT_OK; > } > diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c > index f2ab6dba15..6672caac49 100644 > --- a/lib/vhost/virtio_net.c > +++ b/lib/vhost/virtio_net.c > @@ -102,6 +102,7 @@ static __rte_always_inline int64_t > vhost_async_dma_transfer_one(struct virtio_net *dev, struct > vhost_virtqueue *vq, > int16_t dma_id, uint16_t vchan_id, uint16_t flag_idx, > struct vhost_iov_iter *pkt) > + __rte_exclusive_locks_required(&vq->access_lock) > { > struct async_dma_vchan_info *dma_info = > &dma_copy_track[dma_id].vchans[vchan_id]; > uint16_t ring_mask = dma_info->ring_mask; > @@ -151,6 +152,7 @@ static __rte_always_inline uint16_t > vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue > *vq, > int16_t dma_id, uint16_t vchan_id, uint16_t head_idx, > struct vhost_iov_iter *pkts, uint16_t nr_pkts) > + __rte_exclusive_locks_required(&vq->access_lock) > { > struct async_dma_vchan_info *dma_info = > &dma_copy_track[dma_id].vchans[vchan_id]; > int64_t ret, nr_copies = 0; > @@
RE: [PATCH v6 6/9] vhost: always take IOTLB lock
> -Original Message- > From: David Marchand > Sent: Tuesday, February 7, 2023 6:45 PM > To: dev@dpdk.org > Cc: maxime.coque...@redhat.com; step...@networkplumber.org; Xia, Chenbo > ; Hu, Jiayu ; Wang, YuanX > ; Ding, Xuan ; > m...@smartsharesystems.com > Subject: [PATCH v6 6/9] vhost: always take IOTLB lock > > clang does not support conditionally held locks when statically analysing > taken locks with thread safety checks. > Always take iotlb locks regardless of VIRTIO_F_IOMMU_PLATFORM feature. > > Signed-off-by: David Marchand > Acked-by: Morten Brørup > Reviewed-by: Maxime Coquelin > --- > lib/vhost/vhost.c | 8 +++- > lib/vhost/virtio_net.c | 24 > 2 files changed, 11 insertions(+), 21 deletions(-) > > diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c > index 8bccdd8584..1e0c30791e 100644 > --- a/lib/vhost/vhost.c > +++ b/lib/vhost/vhost.c > @@ -563,10 +563,9 @@ vring_translate(struct virtio_net *dev, struct > vhost_virtqueue *vq) > } > > void > -vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq) > +vring_invalidate(struct virtio_net *dev __rte_unused, struct > vhost_virtqueue *vq) > { > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_wr_lock(vq); > + vhost_user_iotlb_wr_lock(vq); > > vq->access_ok = false; > vq->desc = NULL; > @@ -574,8 +573,7 @@ vring_invalidate(struct virtio_net *dev, struct > vhost_virtqueue *vq) > vq->used = NULL; > vq->log_guest_addr = 0; > > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_wr_unlock(vq); > + vhost_user_iotlb_wr_unlock(vq); > } > > static void > diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c > index 6672caac49..49fc46e127 100644 > --- a/lib/vhost/virtio_net.c > +++ b/lib/vhost/virtio_net.c > @@ -1688,8 +1688,7 @@ virtio_dev_rx(struct virtio_net *dev, struct > vhost_virtqueue *vq, > if (unlikely(!vq->enabled)) > goto out_access_unlock; > > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_lock(vq); > + vhost_user_iotlb_rd_lock(vq); > > if (unlikely(!vq->access_ok)) > if (unlikely(vring_translate(dev, vq) < 0)) > @@ -1707,8 +1706,7 @@ virtio_dev_rx(struct virtio_net *dev, struct > vhost_virtqueue *vq, > vhost_queue_stats_update(dev, vq, pkts, nb_tx); > > out: > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_unlock(vq); > + vhost_user_iotlb_rd_unlock(vq); > > out_access_unlock: > rte_spinlock_unlock(&vq->access_lock); > @@ -2499,8 +2497,7 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, > struct vhost_virtqueue *vq, > if (unlikely(!vq->enabled || !vq->async)) > goto out_access_unlock; > > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_lock(vq); > + vhost_user_iotlb_rd_lock(vq); > > if (unlikely(!vq->access_ok)) > if (unlikely(vring_translate(dev, vq) < 0)) > @@ -2520,8 +2517,7 @@ virtio_dev_rx_async_submit(struct virtio_net *dev, > struct vhost_virtqueue *vq, > vq->stats.inflight_submitted += nb_tx; > > out: > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_unlock(vq); > + vhost_user_iotlb_rd_unlock(vq); > > out_access_unlock: > rte_spinlock_unlock(&vq->access_lock); > @@ -3543,8 +3539,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, > goto out_access_unlock; > } > > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_lock(vq); > + vhost_user_iotlb_rd_lock(vq); > > if (unlikely(!vq->access_ok)) > if (unlikely(vring_translate(dev, vq) < 0)) { > @@ -3603,8 +3598,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, > vhost_queue_stats_update(dev, vq, pkts, count); > > out: > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_unlock(vq); > + vhost_user_iotlb_rd_unlock(vq); > > out_access_unlock: > rte_spinlock_unlock(&vq->access_lock); > @@ -4150,8 +4144,7 @@ rte_vhost_async_try_dequeue_burst(int vid, uint16_t > queue_id, > goto out_access_unlock; > } > > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_lock(vq); > + vhost_user_iotlb_rd_lock(vq); > > if (unlikely(vq->access_ok == 0)) > if (unlikely(vring_translate(dev, vq) < 0)) { > @@ -4215,8 +4208,7 @@ rte_vhost_async_try_dequeue_burst(int vid, uint16_t > queue_id, > vhost_queue_stats_update(dev, vq, pkts, count); > > out: > - if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) > - vhost_user_iotlb_rd_unlock(vq); > + vhost_user_iotlb_rd_unlock(vq); > > out_access_unlock: > rte_spinlo
RE: [PATCH v6 7/9] vhost: annotate IOTLB lock
> -Original Message- > From: David Marchand > Sent: Tuesday, February 7, 2023 6:46 PM > To: dev@dpdk.org > Cc: maxime.coque...@redhat.com; step...@networkplumber.org; Xia, Chenbo > ; Hu, Jiayu ; Wang, YuanX > ; Ding, Xuan ; > m...@smartsharesystems.com > Subject: [PATCH v6 7/9] vhost: annotate IOTLB lock > > The starting point for this is __vhost_iova_to_vva() which requires the > lock to be taken. Annotate all the code leading to a call to it. > > vdpa and vhost_crypto code are annotated but they end up not taking > a IOTLB lock and have been marked with a FIXME at the top level. > > Signed-off-by: David Marchand > Acked-by: Morten Brørup > Reviewed-by: Maxime Coquelin > --- > 2.39.1 Reviewed-by: Chenbo Xia
RE: [PATCH v6 8/9] vhost: annotate vDPA device list accesses
> -Original Message- > From: David Marchand > Sent: Tuesday, February 7, 2023 6:46 PM > To: dev@dpdk.org > Cc: maxime.coque...@redhat.com; step...@networkplumber.org; Xia, Chenbo > ; Hu, Jiayu ; Wang, YuanX > ; Ding, Xuan ; > m...@smartsharesystems.com > Subject: [PATCH v6 8/9] vhost: annotate vDPA device list accesses > > Access to vdpa_device_list must be protected with vdpa_device_list_lock > spinlock. > > Signed-off-by: David Marchand > Acked-by: Morten Brørup > Reviewed-by: Maxime Coquelin > --- > Changes since RFC v3: > - rebased, > > --- > lib/vhost/vdpa.c | 19 ++- > 1 file changed, 10 insertions(+), 9 deletions(-) > > diff --git a/lib/vhost/vdpa.c b/lib/vhost/vdpa.c > index a430a66970..6284ea2ed1 100644 > --- a/lib/vhost/vdpa.c > +++ b/lib/vhost/vdpa.c > @@ -23,21 +23,22 @@ > /** Double linked list of vDPA devices. */ > TAILQ_HEAD(vdpa_device_list, rte_vdpa_device); > > -static struct vdpa_device_list vdpa_device_list = > - TAILQ_HEAD_INITIALIZER(vdpa_device_list); > +static struct vdpa_device_list vdpa_device_list__ = > + TAILQ_HEAD_INITIALIZER(vdpa_device_list__); > static rte_spinlock_t vdpa_device_list_lock = RTE_SPINLOCK_INITIALIZER; > +static struct vdpa_device_list * const vdpa_device_list > + __rte_guarded_by(&vdpa_device_list_lock) = &vdpa_device_list__; > > - > -/* Unsafe, needs to be called with vdpa_device_list_lock held */ > static struct rte_vdpa_device * > __vdpa_find_device_by_name(const char *name) > + __rte_exclusive_locks_required(&vdpa_device_list_lock) > { > struct rte_vdpa_device *dev, *ret = NULL; > > if (name == NULL) > return NULL; > > - TAILQ_FOREACH(dev, &vdpa_device_list, next) { > + TAILQ_FOREACH(dev, vdpa_device_list, next) { > if (!strncmp(dev->device->name, name, RTE_DEV_NAME_MAX_LEN)) { > ret = dev; > break; > @@ -116,7 +117,7 @@ rte_vdpa_register_device(struct rte_device *rte_dev, > dev->type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; > } > > - TAILQ_INSERT_TAIL(&vdpa_device_list, dev, next); > + TAILQ_INSERT_TAIL(vdpa_device_list, dev, next); > out_unlock: > rte_spinlock_unlock(&vdpa_device_list_lock); > > @@ -130,11 +131,11 @@ rte_vdpa_unregister_device(struct rte_vdpa_device > *dev) > int ret = -1; > > rte_spinlock_lock(&vdpa_device_list_lock); > - RTE_TAILQ_FOREACH_SAFE(cur_dev, &vdpa_device_list, next, tmp_dev) { > + RTE_TAILQ_FOREACH_SAFE(cur_dev, vdpa_device_list, next, tmp_dev) { > if (dev != cur_dev) > continue; > > - TAILQ_REMOVE(&vdpa_device_list, dev, next); > + TAILQ_REMOVE(vdpa_device_list, dev, next); > rte_free(dev); > ret = 0; > break; > @@ -336,7 +337,7 @@ vdpa_find_device(const struct rte_vdpa_device *start, > rte_vdpa_cmp_t cmp, > > rte_spinlock_lock(&vdpa_device_list_lock); > if (start == NULL) > - dev = TAILQ_FIRST(&vdpa_device_list); > + dev = TAILQ_FIRST(vdpa_device_list); > else > dev = TAILQ_NEXT(start, next); > > -- > 2.39.1 Reviewed-by: Chenbo Xia
RE: [PATCH v2] eal: introduce atomics abstraction
> From: Tyler Retzlaff [mailto:roret...@linux.microsoft.com] > Sent: Wednesday, 8 February 2023 22.44 > > Introduce atomics abstraction that permits optional use of standard C11 > atomics when meson is provided the new enable_stdatomics=true option. > > Signed-off-by: Tyler Retzlaff > --- Looks good. A few minor suggestions about implementation only. With or without suggested modifications, Acked-by: Morten Brørup > config/meson.build | 11 > lib/eal/arm/include/rte_atomic_32.h| 6 ++- > lib/eal/arm/include/rte_atomic_64.h| 6 ++- > lib/eal/include/generic/rte_atomic.h | 96 > +- > lib/eal/loongarch/include/rte_atomic.h | 6 ++- > lib/eal/ppc/include/rte_atomic.h | 6 ++- > lib/eal/riscv/include/rte_atomic.h | 6 ++- > lib/eal/x86/include/rte_atomic.h | 8 ++- > meson_options.txt | 2 + > 9 files changed, 139 insertions(+), 8 deletions(-) > > diff --git a/config/meson.build b/config/meson.build > index 26f3168..25dd628 100644 > --- a/config/meson.build > +++ b/config/meson.build > @@ -255,6 +255,17 @@ endif > # add -include rte_config to cflags > add_project_arguments('-include', 'rte_config.h', language: 'c') > > +stdc_atomics_enabled = get_option('enable_stdatomics') > +dpdk_conf.set('RTE_STDC_ATOMICS', stdc_atomics_enabled) > + > +if stdc_atomics_enabled > +if cc.get_id() == 'gcc' or cc.get_id() == 'clang' > +add_project_arguments('-std=gnu11', language: 'c') > +else > +add_project_arguments('-std=c11', language: 'c') > +endif > +endif > + > # enable extra warnings and disable any unwanted warnings > # -Wall is added by default at warning level 1, and -Wextra > # at warning level 2 (DPDK default) > diff --git a/lib/eal/arm/include/rte_atomic_32.h > b/lib/eal/arm/include/rte_atomic_32.h > index c00ab78..7088a12 100644 > --- a/lib/eal/arm/include/rte_atomic_32.h > +++ b/lib/eal/arm/include/rte_atomic_32.h > @@ -34,9 +34,13 @@ > #define rte_io_rmb() rte_rmb() > > static __rte_always_inline void > -rte_atomic_thread_fence(int memorder) > +rte_atomic_thread_fence(rte_memory_order memorder) > { > +#ifdef RTE_STDC_ATOMICS > + atomic_thread_fence(memorder); > +#else > __atomic_thread_fence(memorder); > +#endif > } > > #ifdef __cplusplus > diff --git a/lib/eal/arm/include/rte_atomic_64.h > b/lib/eal/arm/include/rte_atomic_64.h > index 6047911..7f02c57 100644 > --- a/lib/eal/arm/include/rte_atomic_64.h > +++ b/lib/eal/arm/include/rte_atomic_64.h > @@ -38,9 +38,13 @@ > #define rte_io_rmb() rte_rmb() > > static __rte_always_inline void > -rte_atomic_thread_fence(int memorder) > +rte_atomic_thread_fence(rte_memory_order memorder) > { > +#ifdef RTE_STDC_ATOMICS > + atomic_thread_fence(memorder); > +#else > __atomic_thread_fence(memorder); > +#endif > } > > /* 128 bit atomic operations - > */ > diff --git a/lib/eal/include/generic/rte_atomic.h > b/lib/eal/include/generic/rte_atomic.h > index f5c49a9..392d928 100644 > --- a/lib/eal/include/generic/rte_atomic.h > +++ b/lib/eal/include/generic/rte_atomic.h > @@ -110,6 +110,100 @@ > > #endif /* __DOXYGEN__ */ > > +#ifdef RTE_STDC_ATOMICS > + > +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L || > defined(__STDC_NO_ATOMICS__) > +#error compiler does not support C11 standard atomics > +#else > +#include > +#endif > + > +#define __rte_atomic _Atomic > + > +typedef int rte_memory_order; I would prefer enum for rte_memory_order: typedef enum { rte_memory_order_relaxed = memory_order_relaxed, rte_memory_order_consume = memory_order_consume, rte_memory_order_acquire = memory_order_acquire, rte_memory_order_release = memory_order_release, rte_memory_order_acq_rel = memory_order_acq_rel, rte_memory_order_seq_cst = memory_order_seq_cst } rte_memory_order; > + > +#define rte_memory_order_relaxed memory_order_relaxed > +#define rte_memory_order_consume memory_order_consume > +#define rte_memory_order_acquire memory_order_acquire > +#define rte_memory_order_release memory_order_release > +#define rte_memory_order_acq_rel memory_order_acq_rel > +#define rte_memory_order_seq_cst memory_order_seq_cst > + > +#define rte_atomic_store_explicit(obj, desired, order) \ > + atomic_store_explicit(obj, desired, order) > + > +#define rte_atomic_load_explicit(obj, order) \ > + atomic_load_explicit(obj, order) > + > +#define rte_atomic_exchange_explicit(obj, desired, order) \ > + atomic_exchange_explicit(obj, desired, order) > + > +#define rte_atomic_compare_exchange_strong_explicit(obj, expected, > desired, success, fail) \ > + atomic_compare_exchange_strong_explicit(obj, expected, desired, > success, fail) > + > +#define rte_atomic_compare_exchange_weak_explicit(obj, expected, > desired, success, fail) \ > + atomic_compare_exchange_weak_explicit(obj, expected, desired, > success, fail) > + > +#define rte_atomic_fetch_add
RE: [PATCH v6 9/9] vhost: enable lock check
> -Original Message- > From: David Marchand > Sent: Tuesday, February 7, 2023 6:46 PM > To: dev@dpdk.org > Cc: maxime.coque...@redhat.com; step...@networkplumber.org; Xia, Chenbo > ; Hu, Jiayu ; Wang, YuanX > ; Ding, Xuan ; > m...@smartsharesystems.com > Subject: [PATCH v6 9/9] vhost: enable lock check > > Now that all locks in this library are annotated, we can enable the > check. > > Signed-off-by: David Marchand > Acked-by: Morten Brørup > Reviewed-by: Maxime Coquelin > --- > lib/vhost/meson.build | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/lib/vhost/meson.build b/lib/vhost/meson.build > index bc7272053b..197a51d936 100644 > --- a/lib/vhost/meson.build > +++ b/lib/vhost/meson.build > @@ -17,6 +17,8 @@ elif (toolchain == 'icc' and > cc.version().version_compare('>=16.0.0')) > endif > dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY', > cc.has_header('linux/userfaultfd.h')) > cflags += '-fno-strict-aliasing' > + > +annotate_locks = true > sources = files( > 'fd_man.c', > 'iotlb.c', > -- > 2.39.1 Reviewed-by: Chenbo Xia After going through the whole series, I agree it does need much effort to add all these annotations.. But I also believe it will bring more benefits :) Thanks for the work, David!
Re: [PATCH v6 0/9] Lock annotations
Hello, On Thu, Feb 9, 2023 at 8:59 AM Xia, Chenbo wrote: > > Subject: [PATCH v6 0/9] Lock annotations > > > > vhost internals involves multiple locks to protect data access by > > multiple threads. > > > > This series uses clang thread safety checks [1] to catch issues during > > compilation: EAL spinlock, seqlock and rwlock are annotated and vhost > > code is instrumented so that clang can statically check correctness. > > > > Those annotations are quite heavy to maintain because the full path of > > code must be annotated (as can be seen in the vhost datapath code), > > but I think it is worth using. > > > > This has been tested against the whole tree and some fixes are already > > flying on the mailing list (see [2] for a list). > > > > If this first series is merged, I will prepare a followup series for EAL > > and other libraries. > > > > > > 1: https://clang.llvm.org/docs/ThreadSafetyAnalysis.html > > 2: > > https://patchwork.dpdk.org/bundle/dmarchand/lock_fixes/?state=*&archive=bo > > th > > > > -- > > David Marchand > > > > Changes since v5: > > - rebased after lib/vhost updates (patches 5 and 7), > > > > Changes since v4: > > - masked annotations from Doxygen as it seems confused with some > > constructs, > > - fixed typos, > > [snip] > > > > > > David Marchand (9): > > eal: annotate spinlock, rwlock and seqlock > > vhost: simplify need reply handling > > vhost: terminate when access lock is not taken > > vhost: annotate virtqueue access lock > > vhost: annotate async accesses > > vhost: always take IOTLB lock > > vhost: annotate IOTLB lock > > vhost: annotate vDPA device list accesses > > vhost: enable lock check > > > > doc/api/doxy-api.conf.in | 11 ++ > > .../prog_guide/env_abstraction_layer.rst | 24 > > doc/guides/rel_notes/release_23_03.rst| 5 + > > drivers/meson.build | 5 + > > lib/eal/include/generic/rte_rwlock.h | 27 +++- > > lib/eal/include/generic/rte_spinlock.h| 31 +++-- > > lib/eal/include/meson.build | 1 + > > lib/eal/include/rte_lock_annotations.h| 73 ++ > > lib/eal/include/rte_seqlock.h | 2 + > > lib/eal/ppc/include/rte_spinlock.h| 3 + > > lib/eal/x86/include/rte_rwlock.h | 4 + > > lib/eal/x86/include/rte_spinlock.h| 9 ++ > > lib/meson.build | 5 + > > lib/vhost/iotlb.h | 4 + > > lib/vhost/meson.build | 2 + > > lib/vhost/vdpa.c | 20 +-- > > lib/vhost/vhost.c | 38 ++--- > > lib/vhost/vhost.h | 34 - > > lib/vhost/vhost_crypto.c | 8 ++ > > lib/vhost/vhost_user.c| 131 -- > > lib/vhost/virtio_net.c| 118 > > 21 files changed, 405 insertions(+), 150 deletions(-) > > create mode 100644 lib/eal/include/rte_lock_annotations.h > > > > -- > > 2.39.1 > > Seems one compilation error reported? Not sure it's related or not. We discovered recently that Intel CI filters out doc/ updates in patches (?!). https://inbox.dpdk.org/dev/20220328121758.26632-1-david.march...@redhat.com/T/#mb42fa6342204dd01c923339ec0b1587bc0b5ac0a So yes, it is "related" to the series, but you can ignore Intel CI report because the reported issue is fixed since the v4 revision. Btw, thanks for the review Chenbo! -- David Marchand
RE: [PATCH v6 0/9] Lock annotations
> -Original Message- > From: David Marchand > Sent: Thursday, February 9, 2023 4:08 PM > To: Xia, Chenbo > Cc: dev@dpdk.org; maxime.coque...@redhat.com; step...@networkplumber.org; > Hu, Jiayu ; Wang, YuanX ; Ding, > Xuan ; m...@smartsharesystems.com > Subject: Re: [PATCH v6 0/9] Lock annotations > > Hello, > > On Thu, Feb 9, 2023 at 8:59 AM Xia, Chenbo wrote: > > > Subject: [PATCH v6 0/9] Lock annotations > > > > > > vhost internals involves multiple locks to protect data access by > > > multiple threads. > > > > > > This series uses clang thread safety checks [1] to catch issues during > > > compilation: EAL spinlock, seqlock and rwlock are annotated and vhost > > > code is instrumented so that clang can statically check correctness. > > > > > > Those annotations are quite heavy to maintain because the full path of > > > code must be annotated (as can be seen in the vhost datapath code), > > > but I think it is worth using. > > > > > > This has been tested against the whole tree and some fixes are already > > > flying on the mailing list (see [2] for a list). > > > > > > If this first series is merged, I will prepare a followup series for > EAL > > > and other libraries. > > > > > > > > > 1: https://clang.llvm.org/docs/ThreadSafetyAnalysis.html > > > 2: > > > > https://patchwork.dpdk.org/bundle/dmarchand/lock_fixes/?state=*&archive=bo > > > th > > > > > > -- > > > David Marchand > > > > > > Changes since v5: > > > - rebased after lib/vhost updates (patches 5 and 7), > > > > > > Changes since v4: > > > - masked annotations from Doxygen as it seems confused with some > > > constructs, > > > - fixed typos, > > > > > [snip] > > > > > > > > > > David Marchand (9): > > > eal: annotate spinlock, rwlock and seqlock > > > vhost: simplify need reply handling > > > vhost: terminate when access lock is not taken > > > vhost: annotate virtqueue access lock > > > vhost: annotate async accesses > > > vhost: always take IOTLB lock > > > vhost: annotate IOTLB lock > > > vhost: annotate vDPA device list accesses > > > vhost: enable lock check > > > > > > doc/api/doxy-api.conf.in | 11 ++ > > > .../prog_guide/env_abstraction_layer.rst | 24 > > > doc/guides/rel_notes/release_23_03.rst| 5 + > > > drivers/meson.build | 5 + > > > lib/eal/include/generic/rte_rwlock.h | 27 +++- > > > lib/eal/include/generic/rte_spinlock.h| 31 +++-- > > > lib/eal/include/meson.build | 1 + > > > lib/eal/include/rte_lock_annotations.h| 73 ++ > > > lib/eal/include/rte_seqlock.h | 2 + > > > lib/eal/ppc/include/rte_spinlock.h| 3 + > > > lib/eal/x86/include/rte_rwlock.h | 4 + > > > lib/eal/x86/include/rte_spinlock.h| 9 ++ > > > lib/meson.build | 5 + > > > lib/vhost/iotlb.h | 4 + > > > lib/vhost/meson.build | 2 + > > > lib/vhost/vdpa.c | 20 +-- > > > lib/vhost/vhost.c | 38 ++--- > > > lib/vhost/vhost.h | 34 - > > > lib/vhost/vhost_crypto.c | 8 ++ > > > lib/vhost/vhost_user.c| 131 - > - > > > lib/vhost/virtio_net.c| 118 > > > 21 files changed, 405 insertions(+), 150 deletions(-) > > > create mode 100644 lib/eal/include/rte_lock_annotations.h > > > > > > -- > > > 2.39.1 > > > > Seems one compilation error reported? Not sure it's related or not. > > We discovered recently that Intel CI filters out doc/ updates in patches > (?!). Oh, I remember you reported some CI issue to us but didn't realize this is the one. Good to know it's resolved :) Thanks, Chenbo > https://inbox.dpdk.org/dev/20220328121758.26632-1- > david.march...@redhat.com/T/#mb42fa6342204dd01c923339ec0b1587bc0b5ac0a > > So yes, it is "related" to the series, but you can ignore Intel CI > report because the reported issue is fixed since the v4 revision. > > > Btw, thanks for the review Chenbo! > > > -- > David Marchand
Re: [PATCH v9 1/5] eal: add lcore info in telemetry
On Thu, Feb 9, 2023 at 3:19 AM lihuisong (C) wrote: > >>> + if (info->lcore_id != lcore_id) > >> Suggest: info->lcore_id != lcore_id -> lcore_id != info->lcore_id > >> Here, info->lcore_id is a target and lcore_id is the variable to be > >> judged, right? > > Yeah that looks better. I didn't pay too much attention since this > > principle is not well respected in the current code base. > That's not a very good reason. > It's similar to "ret != 0" and "p != NULL" in DPDK coding style. I'll squash this suggestion when applying. -- David Marchand
Re: [PATCH V8] ethdev: fix one address occupies two entries in MAC addrs
在 2023/2/4 10:57, lihuisong (C) 写道: 在 2023/2/3 20:58, Ferruh Yigit 写道: On 2/3/2023 1:56 AM, lihuisong (C) wrote: 在 2023/2/3 5:10, Thomas Monjalon 写道: 02/02/2023 19:09, Ferruh Yigit: On 2/2/2023 12:36 PM, Huisong Li wrote: The dev->data->mac_addrs[0] will be changed to a new MAC address when applications modify the default MAC address by .mac_addr_set(). However, if the new default one has been added as a non-default MAC address by .mac_addr_add(), the .mac_addr_set() doesn't remove it from the mac_addrs list. As a result, one MAC address occupies two entries in the list. Like: add(MAC1) add(MAC2) add(MAC3) add(MAC4) set_default(MAC3) default=MAC3, the rest of the list=MAC1, MAC2, MAC3, MAC4 Note: MAC3 occupies two entries. In addition, some PMDs, such as i40e, ice, hns3 and so on, do remove the old default MAC when set default MAC. If user continues to do set_default(MAC5), and the mac_addrs list is default=MAC5, filters=(MAC1, MAC2, MAC3, MAC4). At this moment, user can still see MAC3 from the list, but packets with MAC3 aren't actually received by the PMD. So need to ensure that the new default address is removed from the rest of the list if the address was already in the list. Same comment from past seems already valid, I am not looking to the set for a while, sorry if this is already discussed and decided, if not, I am referring to the side effect that setting MAC addresses cause to remove MAC addresses, think following case: add(MAC1) -> MAC1 add(MAC2) -> MAC1, MAC2 add(MAC3) -> MAC1, MAC2, MAC3 add(MAC4) -> MAC1, MAC2, MAC3, MAC4 set(MAC3) -> MAC3, MAC2, MAC4 set(MAC4) -> MAC4, MAC2 set(MAC2) -> MAC2 I am not exactly clear what is the intention with set(), That's the problem, nobody is clear with the current behavior. The doc says "Set the default MAC address." and nothing else. Indeed. But we can see the following information. From the ethdev layer, this set() API always replaces the old default address (index 0) without adding the old one. From the PMD layer, set() interface of some PMDs, such as i40e, ice, hns3 and so on (as far as I know), also do remove the hardware entry of the old default address. If we define behavior clearly, I think we can adapt PMD implementation according it, unless there is HW limitation. Right. I think this is another point (issue 2/) to be discussed. Namely, whether the old default address should be removed when set new default one. If we want to explicitly unify the behavior of all PMDs in ethdev layer as described above, there may be no problem if do the following: 1) In the ethdev layer, remove the old default address if the old one is exist. 2) For PMD i40e, ice and hns3, remvoe the code of deleting the old default address before adding the new one. For other PMDs, we probably don't need to do anything because they have supported remove_addr() API. (Without explicitly removing the old default address, I don't know if their hardware or firmware removes the old one when set a new address. But, we explicitly remove the old one in ethdev layer now, I'm not sure if this has an effect on these PMDs.) if there is single MAC I guess intention is to replace it with new one, but if there are multiple MACs and one of them are already in the list intention may be just to change the default MAC. The assumption in this patch is that "Set" means "Replace", not "Swap". So this patch takes the approach 1/ Replace and keep Unique. If above assumption is correct, what about following: set(MAC) { if only_default_mac_exist replace_default_mac if MAC exists in list swap MAC and list[0] else replace_default_mac } This approach 2/ is a mix of Swap and Replace. The old default MAC destiny depends on whether we have added the new MAC as "secondary" before setting as new default. This swap prevents removing MAC side affect, does it make sense? Another approach would be 3/ to do an "Always Swap" even if the new MAC didn't exist before, you keep the old default MAC as a secondary MAC. And the current approach 0/ is to Replace default MAC address without touching the secondary addresses at all. So we have 4 choices. We could vote, roll a dice, or find a strong argument? According to the implement of set() in ethdev and PMD layer, it always use "Replace", not "Swap". If we use "Swap" now, the behavior of this API will be changed. I'm not sure if the application can accept this change or has other effects. This patch is also changing behavior, because of implied remove address, same concern is valid with this patch. Indeed, it changes the behavior. But this patch only resolves the problem (issue 1/) that the entries of the MAC address list possibly are not uniques. Fixing it may be little impact on the application. As I checked again current implementation may have one more problem (this from reading code, I did not test this): add(MAC1) -> MAC1 add(MAC2) -> MAC1, MAC2 set(MAC2) -> MA
RE: [PATCH] eal: introduce atomics abstraction
> From: Honnappa Nagarahalli [mailto:honnappa.nagaraha...@arm.com] > Sent: Thursday, 9 February 2023 01.17 > > > > > > > > > > > > > > > > > > > > > > > For environments where stdatomics are not supported, we > could > > > > have a > > > > > > stdatomic.h in DPDK implementing the same APIs (we have to > > > > > > support > > > > only > > > > > > _explicit APIs). This allows the code to use stdatomics APIs > and > > > > when we move > > > > > > to minimum supported standard C11, we just need to get rid of > > > > > > the > > > > file in DPDK > > > > > > repo. > > > > > > > > > > > > my concern with this is that if we provide a stdatomic.h or > > > > introduce names > > > > > > from stdatomic.h it's a violation of the C standard. > > > > > > > > > > > > references: > > > > > > * ISO/IEC 9899:2011 sections 7.1.2, 7.1.3. > > > > > > * GNU libc manual > > > > > > > https://www.gnu.org/software/libc/manual/html_node/Reserved- > > > > > > Names.html > > > > > > > > > > > > in effect the header, the names and in some instances > namespaces > > > > introduced > > > > > > are reserved by the implementation. there are several reasons > in > > > > the GNU libc > > > > > Wouldn't this apply only after the particular APIs were > introduced? > > > > i.e. it should not apply if the compiler does not support > stdatomics. > > > > > > > > yeah, i agree they're being a bit wishy washy in the wording, but > > > > i'm not convinced glibc folks are documenting this as permissive > > > > guidance against. > > > > > > > > > > > > > > > manual that explain the justification for these reservations > and > > > > > > if > > > > if we think > > > > > > about ODR and ABI compatibility we can conceive of others. > > > > > > > > > > > > i'll also remark that the inter-mingling of names from the > POSIX > > > > standard > > > > > > implicitly exposed as a part of the EAL public API has been > > > > problematic for > > > > > > portability. > > > > > These should be exposed as EAL APIs only when compiled with a > > > > compiler that does not support stdatomics. > > > > > > > > you don't necessarily compile dpdk, the application or its other > > > > dynamically linked dependencies with the same compiler at the > same > > > > time. > > > > i.e. basically the model of any dpdk-dev package on any linux > > > > distribution. > > > > > > > > if dpdk is built without real stdatomic types but the application > > > > has to interoperate with a different kit or library that does > they > > > > would be forced to dance around dpdk with their own version of a > > > > shim to hide our faked up stdatomics. > > > > > > > > > > So basically, if we want a binary DPDK distribution to be > compatible with a > > separate application build environment, they both have to implement > atomics > > the same way, i.e. agree on the ABI for atomics. > > > > > > Summing up, this leaves us with only two realistic options: > > > > > > 1. Go all in on C11 stdatomics, also requiring the application > build > > environment to support C11 stdatomics. > > > 2. Provide our own DPDK atomics library. > > > > > > (As mentioned by Tyler, the third option - using C11 stdatomics > inside > > > DPDK, and requiring a build environment without C11 stdatomics to > > > implement a shim - is not realistic!) > > > > > > I strongly want atomics to be available for use across inline and > compiled > > code; i.e. it must be possible for both compiled DPDK functions and > inline > > functions to perform atomic transactions on the same atomic variable. > > > > i consider it a mandatory requirement. i don't see practically how we > could > > withdraw existing use and even if we had clean way i don't see why we > would > > want to. so this item is defintely settled if you were concerned. > I think I agree here. > > > > > > > > > So either we upgrade the DPDK build requirements to support C11 > (including > > the optional stdatomics), or we provide our own DPDK atomics. > > > > i think the issue of requiring a toolchain conformant to a specific > standard is a > > separate matter because any adoption of C11 standard atomics is a > potential > > abi break from the current use of intrinsics. > I am not sure why you are calling it as ABI break. Referring to [1], I > just see wrappers around intrinsics (though [2] does not use the > intrinsics). > > [1] https://github.com/gcc- > mirror/gcc/blob/master/gcc/ginclude/stdatomic.h > [2] https://github.com/llvm- > mirror/clang/blob/master/lib/Headers/stdatomic.h Good input, Honnappa. This means that the ABI break is purely academic, and there is no ABI breakage in reality. Since the underlying implementation is the same, it is perfectly OK to mix C11 and intrinsic atomics, even when the DPDK and the application are built in different environments (with and without C11 atomics, or vice versa). This eliminates my only remaining practical concern about this approach. > > > > > the abstraction (whatever namespace it resides) allows the existing >
[PATCH v4 1/3] ethdev: skip congestion management configuration
Introduce new flow action to skip congestion management configuration This feature helps to skip the congestion management processing based on per flow or the packet color identified by rte_flow meter object. For example, If one Rx queue configured as RED congestion and application wants tobypass the RED congestion processing for all GREEN color packet can be expressed though RTE_FLOW_ACTION_TYPE_SKIP_CMAN flow action Signed-off-by: Rakesh Kudurumalla --- v4: updated commit message doc/guides/prog_guide/rte_flow.rst | 22 ++ lib/ethdev/rte_flow.h | 11 +++ 2 files changed, 33 insertions(+) diff --git a/doc/guides/prog_guide/rte_flow.rst b/doc/guides/prog_guide/rte_flow.rst index 3e6242803d..0737b877da 100644 --- a/doc/guides/prog_guide/rte_flow.rst +++ b/doc/guides/prog_guide/rte_flow.rst @@ -1840,6 +1840,28 @@ Drop packets. | no properties | +---+ + +Action: ``SKIP_CMAN`` +^^^ + +Skip congestion management on received packets + +- Using ``rte_eth_cman_config_set()``, application can configure ethdev Rx + queue's congestion mechanism.Once applied packets congestion configuration + is bypassed on that particular ethdev Rx queue for all packets directed + to that receive queue + +.. _table_rte_flow_action_skip_cman: + +.. table:: SKIP_CMAN + + +---+ + | Field | + +===+ + | no properties | + +---+ + + Action: ``COUNT`` ^ diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index b60987db4b..f4eb4232d4 100644 --- a/lib/ethdev/rte_flow.h +++ b/lib/ethdev/rte_flow.h @@ -2203,6 +2203,17 @@ enum rte_flow_action_type { */ RTE_FLOW_ACTION_TYPE_DROP, + /** +* Skip congestion management configuration +* +* Using rte_eth_cman_config_set() API the application +* can configure ethdev Rx queue's congestion mechanism. +* Introducing RTE_FLOW_ACTION_TYPE_SKIP_CMAN flow action to skip the +* congestion configuration applied to the given ethdev Rx queue. +* +*/ + RTE_FLOW_ACTION_TYPE_SKIP_CMAN, + /** * Enables counters for this flow rule. * -- 2.25.1
[PATCH v4 2/3] app/testpmd: add skip cman support for testpmd
added support for testpmd application to accept skip_cman action while configuring policy action Signed-off-by: Rakesh Kudurumalla --- app/test-pmd/cmdline_flow.c | 9 + 1 file changed, 9 insertions(+) diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 88108498e0..fc003e0096 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -472,6 +472,7 @@ enum index { ACTION_END, ACTION_VOID, ACTION_PASSTHRU, + ACTION_SKIP_CMAN, ACTION_JUMP, ACTION_JUMP_GROUP, ACTION_MARK, @@ -1825,6 +1826,7 @@ static const enum index next_action[] = { ACTION_END, ACTION_VOID, ACTION_PASSTHRU, + ACTION_SKIP_CMAN, ACTION_JUMP, ACTION_MARK, ACTION_FLAG, @@ -5159,6 +5161,13 @@ static const struct token token_list[] = { .next = NEXT(NEXT_ENTRY(ACTION_NEXT)), .call = parse_vc, }, + [ACTION_SKIP_CMAN] = { + .name = "skip_cman", + .help = "bypass cman on received packets", + .priv = PRIV_ACTION(SKIP_CMAN, 0), + .next = NEXT(NEXT_ENTRY(ACTION_NEXT)), + .call = parse_vc, + }, [ACTION_JUMP] = { .name = "jump", .help = "redirect traffic to a given group", -- 2.25.1
[PATCH v4 3/3] net/cnxk: skip red drop for ingress policer
Dropping of packets is based on action configured to meter.If both skip_red and drop actions are configured then tail dropping in invoked else if only drop action is configured then RED drop is invoked.This action is supported only when RED is configured using rte_eth_cman_config_set() Signed-off-by: Rakesh Kudurumalla --- doc/guides/nics/features/cnxk.ini | 1 + drivers/net/cnxk/cnxk_ethdev.h | 1 + drivers/net/cnxk/cnxk_ethdev_mtr.c | 50 ++ 3 files changed, 52 insertions(+) diff --git a/doc/guides/nics/features/cnxk.ini b/doc/guides/nics/features/cnxk.ini index f81628da77..753da67c5a 100644 --- a/doc/guides/nics/features/cnxk.ini +++ b/doc/guides/nics/features/cnxk.ini @@ -80,6 +80,7 @@ vxlan_gpe= Y [rte_flow actions] count= Y +skip_cman = Y drop = Y flag = Y mark = Y diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h index f0eab4244c..ba35873124 100644 --- a/drivers/net/cnxk/cnxk_ethdev.h +++ b/drivers/net/cnxk/cnxk_ethdev.h @@ -168,6 +168,7 @@ struct policy_actions { uint16_t queue; uint32_t mtr_id; struct action_rss *rss_desc; + bool skip_red; }; }; diff --git a/drivers/net/cnxk/cnxk_ethdev_mtr.c b/drivers/net/cnxk/cnxk_ethdev_mtr.c index dcfa4223d5..27a6e4ef3d 100644 --- a/drivers/net/cnxk/cnxk_ethdev_mtr.c +++ b/drivers/net/cnxk/cnxk_ethdev_mtr.c @@ -358,6 +358,9 @@ cnxk_nix_mtr_policy_validate(struct rte_eth_dev *dev, if (action->type == RTE_FLOW_ACTION_TYPE_VOID) supported[i] = true; + if (action->type == RTE_FLOW_ACTION_TYPE_SKIP_CMAN) + supported[i] = true; + if (!supported[i]) return update_mtr_err(i, error, true); } @@ -397,6 +400,10 @@ cnxk_fill_policy_actions(struct cnxk_mtr_policy_node *fmp, fmp->actions[i].action_fate = action->type; } + + if (action->type == + RTE_FLOW_ACTION_TYPE_SKIP_CMAN) + fmp->actions[i].skip_red = true; } } } @@ -1306,6 +1313,45 @@ nix_mtr_config_map(struct cnxk_meter_node *mtr, struct roc_nix_bpf_cfg *cfg) cfg->action[ROC_NIX_BPF_COLOR_RED] = ROC_NIX_BPF_ACTION_DROP; } +static void +nix_mtr_config_red(struct cnxk_meter_node *mtr, struct roc_nix_rq *rq, + struct roc_nix_bpf_cfg *cfg) +{ + struct cnxk_mtr_policy_node *policy = mtr->policy; + + if ((rq->red_pass && rq->red_pass >= rq->red_drop) || + (rq->spb_red_pass && rq->spb_red_pass >= rq->spb_red_drop) || + (rq->xqe_red_pass && rq->xqe_red_pass >= rq->xqe_red_drop)) { + if (policy->actions[RTE_COLOR_GREEN].action_fate == + RTE_FLOW_ACTION_TYPE_DROP) { + if (policy->actions[RTE_COLOR_GREEN].skip_red) + cfg->action[ROC_NIX_BPF_COLOR_GREEN] = + ROC_NIX_BPF_ACTION_DROP; + else + cfg->action[ROC_NIX_BPF_COLOR_GREEN] = + ROC_NIX_BPF_ACTION_RED; + } + if (policy->actions[RTE_COLOR_YELLOW].action_fate == + RTE_FLOW_ACTION_TYPE_DROP) { + if (policy->actions[RTE_COLOR_YELLOW].skip_red) + cfg->action[ROC_NIX_BPF_COLOR_YELLOW] = + ROC_NIX_BPF_ACTION_DROP; + else + cfg->action[ROC_NIX_BPF_COLOR_YELLOW] = + ROC_NIX_BPF_ACTION_RED; + } + if (policy->actions[RTE_COLOR_RED].action_fate == + RTE_FLOW_ACTION_TYPE_DROP) { + if (policy->actions[RTE_COLOR_RED].skip_red) + cfg->action[ROC_NIX_BPF_COLOR_RED] = + ROC_NIX_BPF_ACTION_DROP; + else + cfg->action[ROC_NIX_BPF_COLOR_RED] = + ROC_NIX_BPF_ACTION_RED; + } + } +} + static void nix_precolor_table_map(struct cnxk_meter_node *mtr, struct roc_nix_bpf_precolor *tbl, @@ -1483,6 +1529,10 @@ nix_mtr_configure(struct rte_eth_dev *eth_dev, uint32_t id) if (!mtr[i]->is_used) { memse
Re: [PATCH v9 1/5] eal: add lcore info in telemetry
On Thu, Feb 9, 2023 at 9:31 AM David Marchand wrote: > > On Thu, Feb 9, 2023 at 3:19 AM lihuisong (C) wrote: > > >>> + if (info->lcore_id != lcore_id) > > >> Suggest: info->lcore_id != lcore_id -> lcore_id != info->lcore_id > > >> Here, info->lcore_id is a target and lcore_id is the variable to be > > >> judged, right? > > > Yeah that looks better. I didn't pay too much attention since this > > > principle is not well respected in the current code base. > > That's not a very good reason. > > It's similar to "ret != 0" and "p != NULL" in DPDK coding style. > > I'll squash this suggestion when applying. Hum, well, I have some other comments later in this series, so Robin will fix this himself. -- David Marchand
Re: [RESEND PATCH v9 4/5] app/testpmd: report lcore usage
On Wed, Feb 8, 2023 at 9:49 AM Robin Jarry wrote: > > The --record-core-cycles option already accounts for busy cycles. One > turn of packet_fwd_t is considered "busy" if there was at least one > received or transmitted packet. > > Rename core_cycles to busy_cycles in struct fwd_stream to make it more > explicit. Add total_cycles to struct fwd_lcore. Add cycles accounting in > noisy_vnf where it was missing. > > When --record-core-cycles is specified, register a callback with > rte_lcore_register_usage_cb() and update total_cycles every turn of > lcore loop based on a starting tsc value. > > In the callback, resolve the proper struct fwd_lcore based on lcore_id > and return the lcore total_cycles and the sum of busy_cycles of all its > fwd_streams. > > This makes the cycles counters available in rte_lcore_dump() and the > lcore telemetry API: > > testpmd> dump_lcores > lcore 3, socket 0, role RTE, cpuset 3 > lcore 4, socket 0, role RTE, cpuset 4, busy cycles 1228584096/9239923140 > lcore 5, socket 0, role RTE, cpuset 5, busy cycles 1255661768/9218141538 > > --> /eal/lcore/info,4 > { >"/eal/lcore/info": { > "lcore_id": 4, > "socket": 0, > "role": "RTE", > "cpuset": [ >4 > ], > "busy_cycles": 10623340318, > "total_cycles": 55331167354 >} > } > > Signed-off-by: Robin Jarry > Acked-by: Morten Brørup > Acked-by: Konstantin Ananyev > Reviewed-by: Kevin Laatz > --- > > Notes: > v8 -> v9: Fixed accounting of total cycles > > app/test-pmd/noisy_vnf.c | 8 +++- > app/test-pmd/testpmd.c | 42 > app/test-pmd/testpmd.h | 25 +++- > 3 files changed, 61 insertions(+), 14 deletions(-) > > diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c > index c65ec6f06a5c..ce5a3e5e6987 100644 > --- a/app/test-pmd/noisy_vnf.c > +++ b/app/test-pmd/noisy_vnf.c > @@ -144,6 +144,7 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) > struct noisy_config *ncf = noisy_cfg[fs->rx_port]; > struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; > struct rte_mbuf *tmp_pkts[MAX_PKT_BURST]; > + uint64_t start_tsc = 0; > uint16_t nb_deqd = 0; > uint16_t nb_rx = 0; > uint16_t nb_tx = 0; > @@ -153,6 +154,8 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) > bool needs_flush = false; > uint64_t now; > > + get_start_cycles(&start_tsc); > + > nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, > pkts_burst, nb_pkt_per_burst); > inc_rx_burst_stats(fs, nb_rx); > @@ -169,7 +172,7 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) > inc_tx_burst_stats(fs, nb_tx); > fs->tx_packets += nb_tx; > fs->fwd_dropped += drop_pkts(pkts_burst, nb_rx, nb_tx); > - return; > + goto end; > } > > fifo_free = rte_ring_free_count(ncf->f); > @@ -219,6 +222,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) > fs->fwd_dropped += drop_pkts(tmp_pkts, nb_deqd, sent); > ncf->prev_time = rte_get_timer_cycles(); > } > +end: > + if (nb_rx > 0 || nb_tx > 0) > + get_end_cycles(fs, start_tsc); > } > > #define NOISY_STRSIZE 256 > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c > index e366f81a0f46..eeb96aefa80b 100644 > --- a/app/test-pmd/testpmd.c > +++ b/app/test-pmd/testpmd.c > @@ -2053,7 +2053,7 @@ fwd_stats_display(void) > fs->rx_bad_outer_ip_csum; > > if (record_core_cycles) > - fwd_cycles += fs->core_cycles; > + fwd_cycles += fs->busy_cycles; > } > for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) { > pt_id = fwd_ports_ids[i]; > @@ -2145,7 +2145,7 @@ fwd_stats_display(void) > else > total_pkts = total_recv; > > - printf("\n CPU cycles/packet=%.2F (total cycles=" > + printf("\n CPU cycles/packet=%.2F (busy cycles=" >"%"PRIu64" / total %s packets=%"PRIu64") at > %"PRIu64 >" MHz Clock\n", >(double) fwd_cycles / total_pkts, > @@ -2184,8 +2184,10 @@ fwd_stats_reset(void) > > memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats)); > memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats)); > - fs->core_cycles = 0; > + fs->busy_cycles = 0; > } > + for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) > + fwd_lcores[i]->total_cycles = 0; This instrumentation accuracy may not be that important in testpmd (becauase testpmd is just a test/validation tool). However, resetting total_cycles is setting a bad example for people who may look at this code. It does not comply with the EAL api. T
Re: [RESEND PATCH v9 0/5] lcore telemetry improvements
Hello Robin, On Wed, Feb 8, 2023 at 9:45 AM Robin Jarry wrote: > > This is a follow up on previous work by Kevin Laatz: > > http://patches.dpdk.org/project/dpdk/list/?series=24658&state=* > > This series is aimed at allowing DPDK applications to expose their CPU > usage stats in the DPDK telemetry under /eal/lcore/info. This is a much > more basic and naive approach which leaves the cpu cycles accounting > completely up to the application. > > For reference, I have implemented a draft patch in OvS to use > rte_lcore_register_usage_cb() and report the already available busy > cycles information. > > https://github.com/rjarry/ovs/commit/643e672fe388e348ea7ccbbda6f5a87a066fd919 > > v9: > > - Fixed changelog & version.map order. > - Updated with 64-bit integer telemetry functions. > - Refined docstrings (added notice about resetting the callback). > - Fixed accounting of total cycles in testpmd. > > v8: > > - Made /eal/lcore/info lcore_id argument parsing more robust. > > Robin Jarry (5): > eal: add lcore info in telemetry > eal: report applications lcore usage > app/testpmd: add dump command for lcores > app/testpmd: report lcore usage > eal: add lcore usage telemetry endpoint > > app/test-pmd/cmdline.c | 3 + > app/test-pmd/noisy_vnf.c| 8 +- > app/test-pmd/testpmd.c | 42 +++- > app/test-pmd/testpmd.h | 25 ++- > doc/guides/rel_notes/release_23_03.rst | 8 + > doc/guides/testpmd_app_ug/testpmd_funcs.rst | 7 + > lib/eal/common/eal_common_lcore.c | 219 ++-- > lib/eal/include/rte_lcore.h | 48 + > lib/eal/version.map | 1 + > 9 files changed, 329 insertions(+), 32 deletions(-) Thanks for this work. The EAL parts look ready to me, but I still have some concerns on the implementation in testpmd (see comments on patch 4). -- David Marchand
Re: [PATCH v2 16/21] net/virtio-user: allocate shadow control queue
On 2/7/23 19:06, Eugenio Perez Martin wrote: On Tue, Feb 7, 2023 at 4:18 PM Maxime Coquelin wrote: If the backends supports control virtqueue, allocate a shadow control virtqueue, and implement the notify callback that writes into the kickfd. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia Even with the nitpick below, Acked-by: Eugenio Pérez --- .../net/virtio/virtio_user/virtio_user_dev.c | 47 ++- .../net/virtio/virtio_user/virtio_user_dev.h | 5 ++ drivers/net/virtio/virtio_user_ethdev.c | 6 +++ 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index a3584e7735..16a0e07413 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -146,8 +146,9 @@ virtio_user_dev_set_features(struct virtio_user_dev *dev) /* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */ features &= ~(1ull << VIRTIO_NET_F_MAC); - /* Strip VIRTIO_NET_F_CTRL_VQ, as devices do not really need to know */ - features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); + /* Strip VIRTIO_NET_F_CTRL_VQ if the devices does not really support control VQ */ + if (!dev->hw_cvq) + features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); features &= ~(1ull << VIRTIO_NET_F_STATUS); ret = dev->ops->set_features(dev, features); if (ret < 0) @@ -911,6 +912,48 @@ virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx) } } +static void +virtio_user_control_queue_notify(struct virtqueue *vq, void *cookie) +{ + struct virtio_user_dev *dev = cookie; + uint64_t buf = 1; + + if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) + PMD_DRV_LOG(ERR, "failed to kick backend: %s", + strerror(errno)); +} + +int +virtio_user_dev_create_shadow_cvq(struct virtio_user_dev *dev, struct virtqueue *vq) +{ + char name[VIRTQUEUE_MAX_NAME_SZ]; + struct virtqueue *scvq; + + snprintf(name, sizeof(name), "port%d_shadow_cvq", vq->hw->port_id); + scvq = virtqueue_alloc(&dev->hw, vq->vq_queue_index, vq->vq_nentries, + VTNET_CQ, SOCKET_ID_ANY, name); + if (!scvq) { + PMD_INIT_LOG(ERR, "(%s) Failed to alloc shadow control vq\n", dev->path); + return -ENOMEM; + } + + scvq->cq.notify_queue = &virtio_user_control_queue_notify; + scvq->cq.notify_cookie = dev; + dev->scvq = scvq; + + return 0; +} + +void +virtio_user_dev_destroy_shadow_cvq(struct virtio_user_dev *dev) +{ + if (!dev->scvq) + return; + + virtqueue_free(dev->scvq); + dev->scvq = NULL; +} + int virtio_user_dev_set_status(struct virtio_user_dev *dev, uint8_t status) { diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h index 3c5453eac0..e0db4faf3f 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h @@ -58,6 +58,9 @@ struct virtio_user_dev { pthread_mutex_t mutex; boolstarted; + boolhw_cvq; + struct virtqueue*scvq; + void *backend_data; }; @@ -74,6 +77,8 @@ void virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx); void virtio_user_handle_cq_packed(struct virtio_user_dev *dev, uint16_t queue_idx); uint8_t virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs); +int virtio_user_dev_create_shadow_cvq(struct virtio_user_dev *dev, struct virtqueue *vq); +void virtio_user_dev_destroy_shadow_cvq(struct virtio_user_dev *dev); int virtio_user_dev_set_status(struct virtio_user_dev *dev, uint8_t status); int virtio_user_dev_update_status(struct virtio_user_dev *dev); int virtio_user_dev_update_link_state(struct virtio_user_dev *dev); diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index 6c3e875793..626bd95b62 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -232,6 +232,9 @@ virtio_user_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) else virtio_user_setup_queue_split(vq, dev); + if (dev->hw_cvq && hw->cvq && (virtnet_cq_to_vq(hw->cvq) == vq)) + return virtio_user_dev_create_shadow_cvq(dev, vq); + return 0; } @@ -251,6 +254,9 @@ virtio_user_del_queue(struct virtio_hw *hw, struct virtqueue *vq) close(dev->callfds[vq->vq_queue_index]); close(dev->kickfds[vq->vq_queue_index]); + + if (hw->cvq && (virtnet_cq_to_vq(hw->cvq) == vq) && dev->scvq) Not sure if intended, but check for dev->scvq is already in virtio_user_dev_des
Re: [PATCH] mem: fix displaying heap ID failed for heap info command
On Thu, Feb 9, 2023 at 4:24 AM fengchengwen wrote: > > Acked-by: Chengwen Feng > > And, how abort add error log in telemetry valid_name, so so that problems can > be identified as early as possible. > > On 2023/2/9 11:03, Huisong Li wrote: > > The telemetry lib has added a allowed characters set for dictionary names, > > See commit > > 2537fb0c5f34 ("telemetry: limit characters allowed in dictionary names") > > > > The space is not in this set, which cause the heap ID in /eal/heap_info > > cannot be displayed. Additionally, 'heap' is also misspelling. So use I agree this typo is ugly, but I wonder if some telemetry users started relying on it... > > 'Heap_id' to replace 'Head id'. > > > > Fixes: e6732d0d6e26 ("mem: add telemetry infos") IIUC, the commit that broke displaying "Head id" is actually the one that limited the set of chars. > > Cc: sta...@dpdk.org > > > > Signed-off-by: Huisong Li -- David Marchand
Re: [PATCH v2 18/21] net/virtio-user: add new callback to enable control queue
On 2/7/23 19:10, Eugenio Perez Martin wrote: On Tue, Feb 7, 2023 at 4:18 PM Maxime Coquelin wrote: This patch introduces a new callback that is to be called when the backend supports control virtqueue. Implementation for Vhost-vDPA backend is added in this patch. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia Small nitpick but as previous one it is ok to leave as it is now. Acked-by: Eugenio Pérez --- drivers/net/virtio/virtio_user/vhost.h | 1 + drivers/net/virtio/virtio_user/vhost_vdpa.c | 15 +++ drivers/net/virtio/virtio_user/virtio_user_dev.c | 3 +++ 3 files changed, 19 insertions(+) diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h index dfbf6be033..f817cab77a 100644 --- a/drivers/net/virtio/virtio_user/vhost.h +++ b/drivers/net/virtio/virtio_user/vhost.h @@ -82,6 +82,7 @@ struct virtio_user_backend_ops { int (*get_config)(struct virtio_user_dev *dev, uint8_t *data, uint32_t off, uint32_t len); int (*set_config)(struct virtio_user_dev *dev, const uint8_t *data, uint32_t off, uint32_t len); + int (*cvq_enable)(struct virtio_user_dev *dev, int enable); int (*enable_qp)(struct virtio_user_dev *dev, uint16_t pair_idx, int enable); int (*dma_map)(struct virtio_user_dev *dev, void *addr, uint64_t iova, size_t len); int (*dma_unmap)(struct virtio_user_dev *dev, void *addr, uint64_t iova, size_t len); diff --git a/drivers/net/virtio/virtio_user/vhost_vdpa.c b/drivers/net/virtio/virtio_user/vhost_vdpa.c index a0897f8dd1..3fd13d9fac 100644 --- a/drivers/net/virtio/virtio_user/vhost_vdpa.c +++ b/drivers/net/virtio/virtio_user/vhost_vdpa.c @@ -564,6 +564,20 @@ vhost_vdpa_destroy(struct virtio_user_dev *dev) return 0; } +static int +vhost_vdpa_cvq_enable(struct virtio_user_dev *dev, int enable) +{ + struct vhost_vring_state state = { + .index = dev->max_queue_pairs * 2, + .num = enable, + }; + + if (vhost_vdpa_set_vring_enable(dev, &state)) + return -1; + + return 0; Any reason for not to "return vhost_vdpa_set_vring_enable(dev, &state));"? No reason, I guess I didn't refactor when modifying the code. Will simplify it in next revision. Thanks, Maxime Thanks! +} + static int vhost_vdpa_enable_queue_pair(struct virtio_user_dev *dev, uint16_t pair_idx, @@ -629,6 +643,7 @@ struct virtio_user_backend_ops virtio_ops_vdpa = { .set_status = vhost_vdpa_set_status, .get_config = vhost_vdpa_get_config, .set_config = vhost_vdpa_set_config, + .cvq_enable = vhost_vdpa_cvq_enable, .enable_qp = vhost_vdpa_enable_queue_pair, .dma_map = vhost_vdpa_dma_map_batch, .dma_unmap = vhost_vdpa_dma_unmap_batch, diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 1a5386a3f6..b0d603ee12 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -767,6 +767,9 @@ virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) for (i = q_pairs; i < dev->max_queue_pairs; ++i) ret |= dev->ops->enable_qp(dev, i, 0); + if (dev->scvq) + ret |= dev->ops->cvq_enable(dev, 1); + dev->queue_pairs = q_pairs; return ret; -- 2.39.1
Re: [PATCH v6 0/3] eal: deprecate last use of pthread_t in public API
On Wed, Feb 8, 2023 at 10:26 PM Tyler Retzlaff wrote: > > Announce deprecation of rte_ctrl_thread_create API which is the final > remaining stable API exposing pthread_t. > > Provide an equivalent replacement API rte_thread_create_control that > uses the new rte_thread_t type. > > Provide a unit test for the new rte_thread_create_control API > (test code provided by David Marchand.) > > Add missing doxygen index for thread / rte_thread.h. > > Notice! > > To limit compatibility regression risk and ease removal of the > existing rte_ctrl_thread_create in the future duplicate most of the > existing implementation rather than try to have it accommodate both > public API contracts. > > The duplication, the union introduced to support it along with remaining > internal pthread_xxx calls will be removed when rte_ctrl_thread_create is > finally removed. > > The old unit test for rte_ctrl_thread_create is kept in place to guarantee > correct behavior while deprecated and will be removed when > rte_ctrl_thread_create is finally removed. > > Series-acked-by: Morten Brørup > Reviewed-by: Mattias Rönnblom > > v6: > * fix missing comma in doxygen index > * combine deprecation notice for rte_ctrl_thread_create into notice > of removal of rte_thread_setname. > * remove test_lcore.c test and add test provided by David Marchand > to test_threads.c. > * rename the function to start with rte_thread to be consistent > with the rest of functions in the rte_thread.h APIs. Thank you Tyler. The series lgtm. The deprecation notice is missing one ack, I'll still wait a bit. If we don't get it soon, I'll apply the first two patches for rc1. -- David Marchand
Re: [PATCH v6 3/3] doc: announce deprecation of thread ctrl create function
On Wed, Feb 08, 2023 at 01:26:35PM -0800, Tyler Retzlaff wrote: > Notify deprecation of rte_ctrl_thread_create API, it will be removed as > it exposes platform-specific thread details. > > Signed-off-by: Tyler Retzlaff > Acked-by: Morten Brørup > Acked-by: David Marchand > Reviewed-by: Mattias Rönnblom > --- Acked-by: Bruce Richardson
RE: [PATCH v2 01/10] net/ngbe: fix Rx buffer size in configure register
On Wednesday, February 8, 2023 6:28 PM, Ferruh Yigit wrote: > On 2/2/2023 9:21 AM, Jiawen Wu wrote: > > When buffer size is less than 1K, round down makes it 0, which is an > > error value. > > > > Fixes: 62fc35e63d0e ("net/ngbe: support Rx queue start/stop") > > Cc: sta...@dpdk.org > > > > Signed-off-by: Jiawen Wu > > --- > > drivers/net/ngbe/ngbe_rxtx.c | 5 - > > 1 file changed, 4 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/net/ngbe/ngbe_rxtx.c > > b/drivers/net/ngbe/ngbe_rxtx.c index 9fd24fa444..9a646cb6a7 100644 > > --- a/drivers/net/ngbe/ngbe_rxtx.c > > +++ b/drivers/net/ngbe/ngbe_rxtx.c > > @@ -2944,7 +2944,10 @@ ngbe_dev_rx_init(struct rte_eth_dev *dev) > > */ > > buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) - > > RTE_PKTMBUF_HEADROOM); > > - buf_size = ROUND_DOWN(buf_size, 0x1 << 10); > > + if (buf_size < 1024) > > + buf_size = ROUND_UP(buf_size, 0x1 << 10); > > Back to original problem statement in previous version, can't this cause HW to > receive packets exceeding the buffer size? > > If HW accepts buffer size in multiple of 1K, does this mean any buffer size > less than > 1K is an error condition for this HW? > After rechecking the code, the minimum buffer size is limited to 1K by the txgbe/ngbe [1]. I think v1 patch for txgbe is enough. [1] static int txgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); struct txgbe_hw *hw = TXGBE_DEV_HW(dev); dev_info->min_rx_bufsize = 1024; > > + else > > + buf_size = ROUND_DOWN(buf_size, 0x1 << 10); > > srrctl |= NGBE_RXCFG_PKTLEN(buf_size); > > > > wr32(hw, NGBE_RXCFG(rxq->reg_idx), srrctl); > >
Re: [PATCH v2] eal: introduce atomics abstraction
On Wed, Feb 08, 2023 at 01:43:38PM -0800, Tyler Retzlaff wrote: > Introduce atomics abstraction that permits optional use of standard C11 > atomics when meson is provided the new enable_stdatomics=true option. > > Signed-off-by: Tyler Retzlaff > --- > config/meson.build | 11 > lib/eal/arm/include/rte_atomic_32.h| 6 ++- > lib/eal/arm/include/rte_atomic_64.h| 6 ++- > lib/eal/include/generic/rte_atomic.h | 96 > +- > lib/eal/loongarch/include/rte_atomic.h | 6 ++- > lib/eal/ppc/include/rte_atomic.h | 6 ++- > lib/eal/riscv/include/rte_atomic.h | 6 ++- > lib/eal/x86/include/rte_atomic.h | 8 ++- > meson_options.txt | 2 + > 9 files changed, 139 insertions(+), 8 deletions(-) > > diff --git a/config/meson.build b/config/meson.build > index 26f3168..25dd628 100644 > --- a/config/meson.build > +++ b/config/meson.build > @@ -255,6 +255,17 @@ endif > # add -include rte_config to cflags > add_project_arguments('-include', 'rte_config.h', language: 'c') > > +stdc_atomics_enabled = get_option('enable_stdatomics') > +dpdk_conf.set('RTE_STDC_ATOMICS', stdc_atomics_enabled) > + > +if stdc_atomics_enabled > +if cc.get_id() == 'gcc' or cc.get_id() == 'clang' > +add_project_arguments('-std=gnu11', language: 'c') Is there a reason for using gnu11 on gcc and clang, rather than limiting ourselves to proper c11 support? /Bruce
Re: [PATCH 1/2] eal: fix mmap fail regarded as success
On Mon, Feb 6, 2023 at 11:59 AM Chengwen Feng wrote: > > The map_shared_memory() function should treat mmap MAP_FAILED as NULL > because callers compare it with NULL to determine whether the map is > failed. > > Fixes: 764bf26873b9 ("add FreeBSD support") > Fixes: cb97d93e9d3b ("mem: share hugepage info primary and secondary") > Cc: sta...@dpdk.org > > Signed-off-by: Chengwen Feng Reviewed-by: David Marchand -- David Marchand
Re: [PATCH 0/2] fix mmap retcode check fail
On Mon, Feb 6, 2023 at 11:59 AM Chengwen Feng wrote: > > This patchset contains two patch which fix mmap retcode check fail. > > Chengwen Feng (2): > eal: fix mmap fail regarded as success > raw/ifpga/base: fix mmap retcode check fail > > drivers/raw/ifpga/base/opae_hw_api.c | 4 ++-- > lib/eal/freebsd/eal_hugepage_info.c | 2 +- > lib/eal/linux/eal_hugepage_info.c| 2 +- > 3 files changed, 4 insertions(+), 4 deletions(-) Series applied, thanks. -- David Marchand
Re: [PATCH v1 13/13] test/bbdev: remove iteration count check
On 2/8/23 21:38, Vargas, Hernan wrote: Hi Maxime, We would like to keep the same signature for validate_dec_op because there are functions such as latency_test_dec that have vector_mask on their signatures and they pass it to validate_dec_op. Let me know if you'd like to discuss more. I think this is not a valid reason, just simplify latency_test_dec too. Thanks, Maxime Thanks, Hernan -Original Message- From: Maxime Coquelin Sent: Tuesday, January 31, 2023 6:36 AM To: Vargas, Hernan ; dev@dpdk.org; gak...@marvell.com; Rix, Tom Cc: Chautru, Nicolas ; Zhang, Qi Z Subject: Re: [PATCH v1 13/13] test/bbdev: remove iteration count check On 1/17/23 17:50, Hernan Vargas wrote: To make the test compatible with devices that do not support early termination, the iteration count assert can be removed. Signed-off-by: Hernan Vargas --- app/test-bbdev/test_bbdev_perf.c | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 81bf2c8b60..c68d79cf29 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -2290,6 +2290,7 @@ static int validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, struct rte_bbdev_dec_op *ref_op, const int vector_mask) { + RTE_SET_USED(vector_mask); Why not just remove vector_mask if it isn't of any use instead of hiding the warning? unsigned int i; int ret; struct op_data_entries *hard_data_orig = @@ -2299,17 +2300,12 @@ validate_dec_op(struct rte_bbdev_dec_op **ops, const uint16_t n, struct rte_bbdev_op_turbo_dec *ops_td; struct rte_bbdev_op_data *hard_output; struct rte_bbdev_op_data *soft_output; - struct rte_bbdev_op_turbo_dec *ref_td = &ref_op->turbo_dec; for (i = 0; i < n; ++i) { ops_td = &ops[i]->turbo_dec; hard_output = &ops_td->hard_output; soft_output = &ops_td->soft_output; - if (vector_mask & TEST_BBDEV_VF_EXPECTED_ITER_COUNT) - TEST_ASSERT(ops_td->iter_count <= ref_td->iter_count, - "Returned iter_count (%d) > expected iter_count (%d)", - ops_td->iter_count, ref_td->iter_count); ret = check_dec_status_and_ordering(ops[i], i, ref_op->status); TEST_ASSERT_SUCCESS(ret, "Checking status and ordering for decoder failed"); Maxime
Re: [PATCH v5 2/3] graph: pcap capture for graph nodes
On Fri, Feb 3, 2023 at 9:19 AM Amit Prakash Shukla wrote: > diff --git a/lib/graph/rte_graph.h b/lib/graph/rte_graph.h > index b32c4bc217..c9a77297fc 100644 > --- a/lib/graph/rte_graph.h > +++ b/lib/graph/rte_graph.h > @@ -35,6 +35,7 @@ extern "C" { > > #define RTE_GRAPH_NAMESIZE 64 /**< Max length of graph name. */ > #define RTE_NODE_NAMESIZE 64 /**< Max length of node name. */ > +#define RTE_GRAPH_PCAP_FILE_SZ 64 /**< Max length of pcap file name. */ > #define RTE_GRAPH_OFF_INVALID UINT32_MAX /**< Invalid graph offset. */ > #define RTE_NODE_ID_INVALID UINT32_MAX /**< Invalid node id. */ > #define RTE_EDGE_ID_INVALID UINT16_MAX /**< Invalid edge id. */ > @@ -164,6 +165,10 @@ struct rte_graph_param { > uint16_t nb_node_patterns; /**< Number of node patterns. */ > const char **node_patterns; > /**< Array of node patterns based on shell pattern. */ > + > + bool pcap_enable; /**< Pcap enable. */ > + uint64_t num_pkt_to_capture; /**< Number of packets to capture. */ > + char *pcap_filename; /**< Filename in which packets to be captured.*/ > }; Repeating in this thread what I commented on a patch fixing compilation for the unit test. Extending this structure requires updating the graph unit test. Please squash this fix in this series. Thanks. -- David Marchand
Re: [PATCH v5 1/3] pcapng: comment option support for epb
Stephen, Reshma, On Fri, Feb 3, 2023 at 9:19 AM Amit Prakash Shukla wrote: > > This change enhances rte_pcapng_copy to have comment in enhanced > packet block. > > Signed-off-by: Amit Prakash Shukla Is this change ok for you? Thanks. -- David Marchand
RE: [PATCH 2/2] test/mempool: add zero-copy API's
> From: Kamalakshitha Aligeri [mailto:kamalakshitha.alig...@arm.com] > Sent: Thursday, 9 February 2023 07.25 > > Added mempool test cases with zero-copy get and put API's > > Signed-off-by: Kamalakshitha Aligeri > Reviewed-by: Ruifeng Wang > Reviewed-by: Feifei Wang > --- > Link: > https://patchwork.dpdk.org/project/dpdk/patch/20221227151700.80887-1- > m...@smartsharesystems.com/ > Good choice of test method - testing the same, but also with the zero-copy methods. Acked-by: Morten Brørup
Re: [PATCH v2 0/8] Enable building more libraries on Windows
Hello Stephen, On Tue, Feb 7, 2023 at 11:13 PM Dmitry Kozlyuk wrote: > > 2023-02-06 16:19 (UTC-0800), Stephen Hemminger: > > While diagnosing some Windows cross build errors; > > noticed that lots of important DPDK libraries are not > > being built on Windows. > > > > Stephen Hemminger (8): > > net/null: build null PMD on Windows > > net/ring: build on Windows > > lpm: enable on Windows > > reorder: build on Windows > > ip_frag: enable build on Windows > > rib: enable on Windows > > fib: enable on Windows > > pcapng: windows compatibility > > > > v2 - fix unnecessary PATH_MAX in net/ring driver > > Looks like #include is still needed in rte_eth_ring.h > to avoid the fatal warning about `strdup()` with clang (MS CRT, actually). We need a new revision. Thanks. -- David Marchand
[PATCH 00/21] Add control queue & MQ support to Virtio-user vDPA
This series introduces control queue support for Vhost-vDPA backend. This is a requirement to support multiqueue, but be usefull for other features like RSS for example. Since the Virtio-user layer of the Virtio PMD must handle some control messages, like the number of queue pairs to be used by the device, a shadow control queue is created at Virtio-user layer. Control messages from the regular Virtio control queue are still dequeues and handled if needed by the Virtio-user layer, and are then forwarded to the shadow control queue so that the physical vDPA device can handle them. This model is similar to the one adopted by the QEMU project. In order to avoid code duplication, virtqueue allocation and control queue message sending has been factored out of the Virtio layer to be reusable by the Virtio-user layer. Finally, in order to support vDPA hardware which may support large number of queues, last patch removes the 8 queue pairs limitation by dynamically allocating vring metadata. The series has been tested with Nvidia Cx-6 DX NIC with up to 16 queue pairs: # echo 0 > /sys/bus/pci/devices/\:3b\:00.0/sriov_numvfs # echo 0 > /sys/bus/pci/devices/\:3b\:00.1/sriov_numvfs # modprobe vhost_vdpa # modprobe mlx5_vdpa # echo 1 > /sys/bus/pci/devices/\:3b\:00.0/sriov_numvfs # echo :3b:00.2 >/sys/bus/pci/drivers/mlx5_core/unbind # devlink dev eswitch set pci/:3b:00.0 mode switchdev # echo :3b:00.2 >/sys/bus/pci/drivers/mlx5_core/bind # vdpa dev add name vdpa0 mgmtdev pci/:3b:00.2 mac 00:11:22:33:44:03 max_vqp 16 # ulimit -l unlimited # dpdk-testpmd -l 0,2,4,6 --socket-mem 1024,0 --vdev 'virtio_user0,path=/dev/vhost-vdpa-0' --no-pci -n 3 -- --nb-cores=3 -i --rxq=16 --txq=16 Changes in v3: == - Trivial code simplifications (Eugenio) Changes in v2: == - Fix double spaces (Chenbo) - Get rid of uneeded gotos (Stephen) - Only allocate packed ring metadata if supported (Chenbo) - Rebased on top of main Maxime Coquelin (21): net/virtio: move CVQ code into a dedicated file net/virtio: introduce notify callback for control queue net/virtio: virtqueue headers alloc refactoring net/virtio: remove port ID info from Rx queue net/virtio: remove unused fields in Tx queue struct net/virtio: remove unused queue ID field in Rx queue net/virtio: remove unused Port ID in control queue net/virtio: move vring memzone to virtqueue struct net/virtio: refactor indirect desc headers init net/virtio: alloc Rx SW ring only if vectorized path net/virtio: extract virtqueue init from virtio queue init net/virtio-user: fix device starting failure handling net/virtio-user: simplify queues setup net/virtio-user: use proper type for number of queue pairs net/virtio-user: get max number of queue pairs from device net/virtio-user: allocate shadow control queue net/virtio-user: send shadow virtqueue info to the backend net/virtio-user: add new callback to enable control queue net/virtio-user: forward control messages to shadow queue net/virtio-user: advertize control VQ support with vDPA net/virtio-user: remove max queues limitation drivers/net/virtio/meson.build| 1 + drivers/net/virtio/virtio.h | 6 - drivers/net/virtio/virtio_cvq.c | 229 + drivers/net/virtio/virtio_cvq.h | 127 + drivers/net/virtio/virtio_ethdev.c| 472 +- drivers/net/virtio/virtio_rxtx.c | 47 +- drivers/net/virtio/virtio_rxtx.h | 31 +- drivers/net/virtio/virtio_rxtx_packed.c | 3 +- drivers/net/virtio/virtio_rxtx_simple.c | 3 +- drivers/net/virtio/virtio_rxtx_simple.h | 7 +- .../net/virtio/virtio_rxtx_simple_altivec.c | 4 +- drivers/net/virtio/virtio_rxtx_simple_neon.c | 4 +- drivers/net/virtio/virtio_rxtx_simple_sse.c | 4 +- drivers/net/virtio/virtio_user/vhost.h| 1 + drivers/net/virtio/virtio_user/vhost_vdpa.c | 16 +- .../net/virtio/virtio_user/virtio_user_dev.c | 305 +-- .../net/virtio/virtio_user/virtio_user_dev.h | 30 +- drivers/net/virtio/virtio_user_ethdev.c | 49 +- drivers/net/virtio/virtqueue.c| 346 - drivers/net/virtio/virtqueue.h| 127 + 20 files changed, 1066 insertions(+), 746 deletions(-) create mode 100644 drivers/net/virtio/virtio_cvq.c create mode 100644 drivers/net/virtio/virtio_cvq.h -- 2.39.1
[PATCH 01/21] net/virtio: move CVQ code into a dedicated file
This patch moves Virtio control queue code into a dedicated file, as preliminary rework to support shadow control queue in Virtio-user. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/meson.build | 1 + drivers/net/virtio/virtio_cvq.c| 230 + drivers/net/virtio/virtio_cvq.h| 126 drivers/net/virtio/virtio_ethdev.c | 218 +-- drivers/net/virtio/virtio_rxtx.h | 9 -- drivers/net/virtio/virtqueue.h | 105 + 6 files changed, 359 insertions(+), 330 deletions(-) create mode 100644 drivers/net/virtio/virtio_cvq.c create mode 100644 drivers/net/virtio/virtio_cvq.h diff --git a/drivers/net/virtio/meson.build b/drivers/net/virtio/meson.build index d78b8278c6..0ffd77024e 100644 --- a/drivers/net/virtio/meson.build +++ b/drivers/net/virtio/meson.build @@ -9,6 +9,7 @@ endif sources += files( 'virtio.c', +'virtio_cvq.c', 'virtio_ethdev.c', 'virtio_pci_ethdev.c', 'virtio_pci.c', diff --git a/drivers/net/virtio/virtio_cvq.c b/drivers/net/virtio/virtio_cvq.c new file mode 100644 index 00..de4299a2a7 --- /dev/null +++ b/drivers/net/virtio/virtio_cvq.c @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2016 Intel Corporation + * Copyright(c) 2022 Red Hat Inc, + */ + +#include + +#include +#include +#include + +#include "virtio_cvq.h" +#include "virtqueue.h" + +static struct virtio_pmd_ctrl * +virtio_send_command_packed(struct virtnet_ctl *cvq, + struct virtio_pmd_ctrl *ctrl, + int *dlen, int pkt_num) +{ + struct virtqueue *vq = virtnet_cq_to_vq(cvq); + int head; + struct vring_packed_desc *desc = vq->vq_packed.ring.desc; + struct virtio_pmd_ctrl *result; + uint16_t flags; + int sum = 0; + int nb_descs = 0; + int k; + + /* +* Format is enforced in qemu code: +* One TX packet for header; +* At least one TX packet per argument; +* One RX packet for ACK. +*/ + head = vq->vq_avail_idx; + flags = vq->vq_packed.cached_flags; + desc[head].addr = cvq->virtio_net_hdr_mem; + desc[head].len = sizeof(struct virtio_net_ctrl_hdr); + vq->vq_free_cnt--; + nb_descs++; + if (++vq->vq_avail_idx >= vq->vq_nentries) { + vq->vq_avail_idx -= vq->vq_nentries; + vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED; + } + + for (k = 0; k < pkt_num; k++) { + desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem + + sizeof(struct virtio_net_ctrl_hdr) + + sizeof(ctrl->status) + sizeof(uint8_t) * sum; + desc[vq->vq_avail_idx].len = dlen[k]; + desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT | + vq->vq_packed.cached_flags; + sum += dlen[k]; + vq->vq_free_cnt--; + nb_descs++; + if (++vq->vq_avail_idx >= vq->vq_nentries) { + vq->vq_avail_idx -= vq->vq_nentries; + vq->vq_packed.cached_flags ^= + VRING_PACKED_DESC_F_AVAIL_USED; + } + } + + desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem + + sizeof(struct virtio_net_ctrl_hdr); + desc[vq->vq_avail_idx].len = sizeof(ctrl->status); + desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE | + vq->vq_packed.cached_flags; + vq->vq_free_cnt--; + nb_descs++; + if (++vq->vq_avail_idx >= vq->vq_nentries) { + vq->vq_avail_idx -= vq->vq_nentries; + vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED; + } + + virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags, + vq->hw->weak_barriers); + + virtio_wmb(vq->hw->weak_barriers); + virtqueue_notify(vq); + + /* wait for used desc in virtqueue +* desc_is_used has a load-acquire or rte_io_rmb inside +*/ + while (!desc_is_used(&desc[head], vq)) + usleep(100); + + /* now get used descriptors */ + vq->vq_free_cnt += nb_descs; + vq->vq_used_cons_idx += nb_descs; + if (vq->vq_used_cons_idx >= vq->vq_nentries) { + vq->vq_used_cons_idx -= vq->vq_nentries; + vq->vq_packed.used_wrap_counter ^= 1; + } + + PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n" + "vq->vq_avail_idx=%d\n" + "vq->vq_used_cons_idx=%d\n" + "vq->vq_packed.cached_flags=0x%x\n" + "vq->vq_packed.used_wrap_counter=%d", + vq->vq_free_cnt, + vq->vq_avail_idx, + vq->vq_used_cons_idx, + vq-
[PATCH 02/21] net/virtio: introduce notify callback for control queue
This patch introduces a notification callback for the control virtqueue as preliminary work to add shadow control virtqueue support. This new callback is required so that the shadow control queue implemented in Virtio-user does not call the notifciation op implemented for the driver layer. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_cvq.c| 4 ++-- drivers/net/virtio/virtio_cvq.h| 4 drivers/net/virtio/virtio_ethdev.c | 7 +++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio/virtio_cvq.c b/drivers/net/virtio/virtio_cvq.c index de4299a2a7..cd25614df8 100644 --- a/drivers/net/virtio/virtio_cvq.c +++ b/drivers/net/virtio/virtio_cvq.c @@ -76,7 +76,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq, vq->hw->weak_barriers); virtio_wmb(vq->hw->weak_barriers); - virtqueue_notify(vq); + cvq->notify_queue(vq, cvq->notify_cookie); /* wait for used desc in virtqueue * desc_is_used has a load-acquire or rte_io_rmb inside @@ -155,7 +155,7 @@ virtio_send_command_split(struct virtnet_ctl *cvq, PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index); - virtqueue_notify(vq); + cvq->notify_queue(vq, cvq->notify_cookie); while (virtqueue_nused(vq) == 0) usleep(100); diff --git a/drivers/net/virtio/virtio_cvq.h b/drivers/net/virtio/virtio_cvq.h index 139e813ffb..224dc81422 100644 --- a/drivers/net/virtio/virtio_cvq.h +++ b/drivers/net/virtio/virtio_cvq.h @@ -7,6 +7,8 @@ #include +struct virtqueue; + /** * Control the RX mode, ie. promiscuous, allmulti, etc... * All commands require an "out" sg entry containing a 1 byte @@ -110,6 +112,8 @@ struct virtnet_ctl { uint16_t port_id; /**< Device port identifier. */ const struct rte_memzone *mz; /**< mem zone to populate CTL ring. */ rte_spinlock_t lock; /**< spinlock for control queue. */ + void (*notify_queue)(struct virtqueue *vq, void *cookie); /**< notify ops. */ + void *notify_cookie; /**< cookie for notify ops */ }; #define VIRTIO_MAX_CTRL_DATA 2048 diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index d3aa420c89..422c597c2b 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -253,6 +253,12 @@ virtio_init_vring(struct virtqueue *vq) virtqueue_disable_intr(vq); } +static void +virtio_control_queue_notify(struct virtqueue *vq, __rte_unused void *cookie) +{ + virtqueue_notify(vq); +} + static int virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) { @@ -421,6 +427,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size()); hw->cvq = cvq; + vq->cq.notify_queue = &virtio_control_queue_notify; } if (hw->use_va) -- 2.39.1
[PATCH 03/21] net/virtio: virtqueue headers alloc refactoring
This patch refactors virtqueue initialization by moving its headers allocation and deallocation in dedicated function. While at it, it renames the memzone metadata and address pointers in the virtnet_tx and virtnet_ctl structures to remove redundant virtio_net_ prefix. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_cvq.c| 19 ++-- drivers/net/virtio/virtio_cvq.h| 9 +- drivers/net/virtio/virtio_ethdev.c | 149 ++--- drivers/net/virtio/virtio_rxtx.c | 12 +-- drivers/net/virtio/virtio_rxtx.h | 12 +-- drivers/net/virtio/virtqueue.c | 8 +- drivers/net/virtio/virtqueue.h | 13 +-- 7 files changed, 126 insertions(+), 96 deletions(-) diff --git a/drivers/net/virtio/virtio_cvq.c b/drivers/net/virtio/virtio_cvq.c index cd25614df8..5e457f5fd0 100644 --- a/drivers/net/virtio/virtio_cvq.c +++ b/drivers/net/virtio/virtio_cvq.c @@ -34,7 +34,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq, */ head = vq->vq_avail_idx; flags = vq->vq_packed.cached_flags; - desc[head].addr = cvq->virtio_net_hdr_mem; + desc[head].addr = cvq->hdr_mem; desc[head].len = sizeof(struct virtio_net_ctrl_hdr); vq->vq_free_cnt--; nb_descs++; @@ -44,7 +44,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq, } for (k = 0; k < pkt_num; k++) { - desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem + desc[vq->vq_avail_idx].addr = cvq->hdr_mem + sizeof(struct virtio_net_ctrl_hdr) + sizeof(ctrl->status) + sizeof(uint8_t) * sum; desc[vq->vq_avail_idx].len = dlen[k]; @@ -60,7 +60,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq, } } - desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem + desc[vq->vq_avail_idx].addr = cvq->hdr_mem + sizeof(struct virtio_net_ctrl_hdr); desc[vq->vq_avail_idx].len = sizeof(ctrl->status); desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE | @@ -103,7 +103,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq, vq->vq_packed.cached_flags, vq->vq_packed.used_wrap_counter); - result = cvq->virtio_net_hdr_mz->addr; + result = cvq->hdr_mz->addr; return result; } @@ -126,14 +126,14 @@ virtio_send_command_split(struct virtnet_ctl *cvq, * One RX packet for ACK. */ vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT; - vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem; + vq->vq_split.ring.desc[head].addr = cvq->hdr_mem; vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr); vq->vq_free_cnt--; i = vq->vq_split.ring.desc[head].next; for (k = 0; k < pkt_num; k++) { vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT; - vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem + vq->vq_split.ring.desc[i].addr = cvq->hdr_mem + sizeof(struct virtio_net_ctrl_hdr) + sizeof(ctrl->status) + sizeof(uint8_t) * sum; vq->vq_split.ring.desc[i].len = dlen[k]; @@ -143,7 +143,7 @@ virtio_send_command_split(struct virtnet_ctl *cvq, } vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE; - vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem + vq->vq_split.ring.desc[i].addr = cvq->hdr_mem + sizeof(struct virtio_net_ctrl_hdr); vq->vq_split.ring.desc[i].len = sizeof(ctrl->status); vq->vq_free_cnt--; @@ -186,7 +186,7 @@ virtio_send_command_split(struct virtnet_ctl *cvq, PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d", vq->vq_free_cnt, vq->vq_desc_head_idx); - result = cvq->virtio_net_hdr_mz->addr; + result = cvq->hdr_mz->addr; return result; } @@ -216,8 +216,7 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, int * return -1; } - memcpy(cvq->virtio_net_hdr_mz->addr, ctrl, - sizeof(struct virtio_pmd_ctrl)); + memcpy(cvq->hdr_mz->addr, ctrl, sizeof(struct virtio_pmd_ctrl)); if (virtio_with_packed_queue(vq->hw)) result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num); diff --git a/drivers/net/virtio/virtio_cvq.h b/drivers/net/virtio/virtio_cvq.h index 224dc81422..226561e6b8 100644 --- a/drivers/net/virtio/virtio_cvq.h +++ b/drivers/net/virtio/virtio_cvq.h @@ -106,11 +106,10 @@ struct virtio_net_ctrl_hdr { typedef uint8_t virtio_net_ctrl_ack; struct virtnet_ctl { - /**< memzone to populate hdr. */ - const struct rte_memzone *virtio_net_hdr_mz; - rte_iova_t virtio_net_hdr_mem; /**< hdr for each xmit packet */ - uint16_t port_id; /**< Dev
[PATCH 05/21] net/virtio: remove unused fields in Tx queue struct
The port and queue IDs are not used in virtnet_tx struct, this patch removes them. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_ethdev.c | 1 - drivers/net/virtio/virtio_rxtx.c | 1 - drivers/net/virtio/virtio_rxtx.h | 3 --- 3 files changed, 5 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 1c10c16ca7..56395f79c3 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -466,7 +466,6 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) rxvq->fake_mbuf = fake_mbuf; } else if (queue_type == VTNET_TQ) { txvq = &vq->txq; - txvq->port_id = dev->data->port_id; txvq->mz = mz; } else if (queue_type == VTNET_CQ) { cvq = &vq->cq; diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index 45c04aa3f8..304403d46c 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -831,7 +831,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); txvq = &vq->txq; - txvq->queue_id = queue_idx; tx_free_thresh = tx_conf->tx_free_thresh; if (tx_free_thresh == 0) diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h index 97de9eb0a3..9bbcf32f66 100644 --- a/drivers/net/virtio/virtio_rxtx.h +++ b/drivers/net/virtio/virtio_rxtx.h @@ -35,9 +35,6 @@ struct virtnet_tx { const struct rte_memzone *hdr_mz; /**< memzone to populate hdr. */ rte_iova_t hdr_mem; /**< hdr for each xmit packet */ - uint16_tqueue_id; /**< DPDK queue index. */ - uint16_tport_id; /**< Device port identifier. */ - struct virtnet_stats stats; /* Statistics */ const struct rte_memzone *mz;/**< mem zone to populate TX ring. */ -- 2.39.1
[PATCH 04/21] net/virtio: remove port ID info from Rx queue
The port ID information is duplicated in several places. This patch removes it from the virtnet_rx struct as it can be found in virtio_hw struct. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_ethdev.c | 1 - drivers/net/virtio/virtio_rxtx.c| 25 ++--- drivers/net/virtio/virtio_rxtx.h| 1 - drivers/net/virtio/virtio_rxtx_packed.c | 3 +-- drivers/net/virtio/virtio_rxtx_simple.c | 3 ++- drivers/net/virtio/virtio_rxtx_simple.h | 5 +++-- 6 files changed, 16 insertions(+), 22 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 057388cfaf..1c10c16ca7 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -462,7 +462,6 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) vq->sw_ring = sw_ring; rxvq = &vq->rxq; - rxvq->port_id = dev->data->port_id; rxvq->mz = mz; rxvq->fake_mbuf = fake_mbuf; } else if (queue_type == VTNET_TQ) { diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index bd95e8ceb5..45c04aa3f8 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -1024,7 +1024,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) continue; } - rxm->port = rxvq->port_id; + rxm->port = hw->port_id; rxm->data_off = RTE_PKTMBUF_HEADROOM; rxm->ol_flags = 0; rxm->vlan_tci = 0; @@ -1066,8 +1066,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) } nb_enqueued += free_cnt; } else { - struct rte_eth_dev *dev = - &rte_eth_devices[rxvq->port_id]; + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id]; dev->data->rx_mbuf_alloc_failed += free_cnt; } } @@ -1127,7 +1126,7 @@ virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, continue; } - rxm->port = rxvq->port_id; + rxm->port = hw->port_id; rxm->data_off = RTE_PKTMBUF_HEADROOM; rxm->ol_flags = 0; rxm->vlan_tci = 0; @@ -1169,8 +1168,7 @@ virtio_recv_pkts_packed(void *rx_queue, struct rte_mbuf **rx_pkts, } nb_enqueued += free_cnt; } else { - struct rte_eth_dev *dev = - &rte_eth_devices[rxvq->port_id]; + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id]; dev->data->rx_mbuf_alloc_failed += free_cnt; } } @@ -1258,7 +1256,7 @@ virtio_recv_pkts_inorder(void *rx_queue, rxm->pkt_len = (uint32_t)(len[i] - hdr_size); rxm->data_len = (uint16_t)(len[i] - hdr_size); - rxm->port = rxvq->port_id; + rxm->port = hw->port_id; rx_pkts[nb_rx] = rxm; prev = rxm; @@ -1352,8 +1350,7 @@ virtio_recv_pkts_inorder(void *rx_queue, } nb_enqueued += free_cnt; } else { - struct rte_eth_dev *dev = - &rte_eth_devices[rxvq->port_id]; + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id]; dev->data->rx_mbuf_alloc_failed += free_cnt; } } @@ -1437,7 +1434,7 @@ virtio_recv_mergeable_pkts(void *rx_queue, rxm->pkt_len = (uint32_t)(len[i] - hdr_size); rxm->data_len = (uint16_t)(len[i] - hdr_size); - rxm->port = rxvq->port_id; + rxm->port = hw->port_id; rx_pkts[nb_rx] = rxm; prev = rxm; @@ -1530,8 +1527,7 @@ virtio_recv_mergeable_pkts(void *rx_queue, } nb_enqueued += free_cnt; } else { - struct rte_eth_dev *dev = - &rte_eth_devices[rxvq->port_id]; + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id]; dev->data->rx_mbuf_alloc_failed += free_cnt; } } @@ -1610,7 +1606,7 @@ virtio_recv_mergeable_pkts_packed(void *rx_queue, rxm->pkt_len = (uint32_t)(len[i] - hdr_size); rxm->data_len = (uint16_t)(len[i] - hdr_size); - rxm->port = rxvq->port_id; + rxm->port = hw->port_id; rx_pkts[nb_rx] = rxm; prev = rxm; @@ -1699,8 +1695,7 @@ virtio_recv_mergeable_p
[PATCH 06/21] net/virtio: remove unused queue ID field in Rx queue
This patch removes the queue ID field in virtnet_rx struct. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_rxtx.c | 1 - drivers/net/virtio/virtio_rxtx.h | 2 -- 2 files changed, 3 deletions(-) diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index 304403d46c..4f69b97f41 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -703,7 +703,6 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); rxvq = &vq->rxq; - rxvq->queue_id = queue_idx; rxvq->mpool = mp; dev->data->rx_queues[queue_idx] = rxvq; diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h index 9bbcf32f66..a5fe3ea95c 100644 --- a/drivers/net/virtio/virtio_rxtx.h +++ b/drivers/net/virtio/virtio_rxtx.h @@ -23,8 +23,6 @@ struct virtnet_rx { uint64_t mbuf_initializer; /**< value to init mbufs. */ struct rte_mempool *mpool; /**< mempool for mbuf allocation */ - uint16_t queue_id; /**< DPDK queue index. */ - /* Statistics */ struct virtnet_stats stats; -- 2.39.1
[PATCH 07/21] net/virtio: remove unused Port ID in control queue
This patch removes the unused port ID information from virtnet_ctl struct. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_cvq.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/virtio/virtio_cvq.h b/drivers/net/virtio/virtio_cvq.h index 226561e6b8..0ff326b063 100644 --- a/drivers/net/virtio/virtio_cvq.h +++ b/drivers/net/virtio/virtio_cvq.h @@ -108,7 +108,6 @@ typedef uint8_t virtio_net_ctrl_ack; struct virtnet_ctl { const struct rte_memzone *hdr_mz; /**< memzone to populate hdr. */ rte_iova_t hdr_mem; /**< hdr for each xmit packet */ - uint16_t port_id; /**< Device port identifier. */ const struct rte_memzone *mz; /**< mem zone to populate CTL ring. */ rte_spinlock_t lock; /**< spinlock for control queue. */ void (*notify_queue)(struct virtqueue *vq, void *cookie); /**< notify ops. */ -- 2.39.1
[PATCH 08/21] net/virtio: move vring memzone to virtqueue struct
Whatever its type (Rx, Tx or Ctl), all the virtqueue require a memzone for the vrings. This patch moves its pointer to the virtqueue struct, simplifying the code. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_cvq.h| 1 - drivers/net/virtio/virtio_ethdev.c | 11 ++- drivers/net/virtio/virtio_rxtx.h | 4 drivers/net/virtio/virtqueue.c | 6 ++ drivers/net/virtio/virtqueue.h | 1 + 5 files changed, 5 insertions(+), 18 deletions(-) diff --git a/drivers/net/virtio/virtio_cvq.h b/drivers/net/virtio/virtio_cvq.h index 0ff326b063..70739ae04b 100644 --- a/drivers/net/virtio/virtio_cvq.h +++ b/drivers/net/virtio/virtio_cvq.h @@ -108,7 +108,6 @@ typedef uint8_t virtio_net_ctrl_ack; struct virtnet_ctl { const struct rte_memzone *hdr_mz; /**< memzone to populate hdr. */ rte_iova_t hdr_mem; /**< hdr for each xmit packet */ - const struct rte_memzone *mz; /**< mem zone to populate CTL ring. */ rte_spinlock_t lock; /**< spinlock for control queue. */ void (*notify_queue)(struct virtqueue *vq, void *cookie); /**< notify ops. */ void *notify_cookie; /**< cookie for notify ops */ diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 56395f79c3..4f6d777951 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -423,6 +423,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) memset(mz->addr, 0, mz->len); + vq->mz = mz; if (hw->use_va) vq->vq_ring_mem = (uintptr_t)mz->addr; else @@ -462,14 +463,11 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) vq->sw_ring = sw_ring; rxvq = &vq->rxq; - rxvq->mz = mz; rxvq->fake_mbuf = fake_mbuf; } else if (queue_type == VTNET_TQ) { txvq = &vq->txq; - txvq->mz = mz; } else if (queue_type == VTNET_CQ) { cvq = &vq->cq; - cvq->mz = mz; hw->cvq = cvq; vq->cq.notify_queue = &virtio_control_queue_notify; } @@ -550,15 +548,10 @@ virtio_free_queues(struct virtio_hw *hw) if (queue_type == VTNET_RQ) { rte_free(vq->rxq.fake_mbuf); rte_free(vq->sw_ring); - rte_memzone_free(vq->rxq.mz); - } else if (queue_type == VTNET_TQ) { - rte_memzone_free(vq->txq.mz); - } else { - rte_memzone_free(vq->cq.mz); } virtio_free_queue_headers(vq); - + rte_memzone_free(vq->mz); rte_free(vq); hw->vqs[i] = NULL; } diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h index a5fe3ea95c..57af630110 100644 --- a/drivers/net/virtio/virtio_rxtx.h +++ b/drivers/net/virtio/virtio_rxtx.h @@ -25,8 +25,6 @@ struct virtnet_rx { /* Statistics */ struct virtnet_stats stats; - - const struct rte_memzone *mz; /**< mem zone to populate RX ring. */ }; struct virtnet_tx { @@ -34,8 +32,6 @@ struct virtnet_tx { rte_iova_t hdr_mem; /**< hdr for each xmit packet */ struct virtnet_stats stats; /* Statistics */ - - const struct rte_memzone *mz;/**< mem zone to populate TX ring. */ }; int virtio_rxq_vec_setup(struct virtnet_rx *rxvq); diff --git a/drivers/net/virtio/virtqueue.c b/drivers/net/virtio/virtqueue.c index 3b174a5923..41e3529546 100644 --- a/drivers/net/virtio/virtqueue.c +++ b/drivers/net/virtio/virtqueue.c @@ -148,7 +148,6 @@ virtqueue_rxvq_reset_packed(struct virtqueue *vq) { int size = vq->vq_nentries; struct vq_desc_extra *dxp; - struct virtnet_rx *rxvq; uint16_t desc_idx; vq->vq_used_cons_idx = 0; @@ -162,8 +161,7 @@ virtqueue_rxvq_reset_packed(struct virtqueue *vq) vq->vq_packed.event_flags_shadow = 0; vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE; - rxvq = &vq->rxq; - memset(rxvq->mz->addr, 0, rxvq->mz->len); + memset(vq->mz->addr, 0, vq->mz->len); for (desc_idx = 0; desc_idx < vq->vq_nentries; desc_idx++) { dxp = &vq->vq_descx[desc_idx]; @@ -201,7 +199,7 @@ virtqueue_txvq_reset_packed(struct virtqueue *vq) txvq = &vq->txq; txr = txvq->hdr_mz->addr; - memset(txvq->mz->addr, 0, txvq->mz->len); + memset(vq->mz->addr, 0, vq->mz->len); memset(txvq->hdr_mz->addr, 0, txvq->hdr_mz->len); for (desc_idx = 0; desc_idx < vq->vq_nentries; desc_idx++) { diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index f5058f362c..8b7bfae643 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -201,6 +201,
[PATCH 09/21] net/virtio: refactor indirect desc headers init
This patch refactors the indirect descriptors headers initialization in a dedicated function, and makes it used by both queue init and reset functions. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_ethdev.c | 30 + drivers/net/virtio/virtqueue.c | 68 ++ drivers/net/virtio/virtqueue.h | 2 + 3 files changed, 54 insertions(+), 46 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 4f6d777951..f839a24d12 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -347,7 +347,6 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) unsigned int vq_size, size; struct virtio_hw *hw = dev->data->dev_private; struct virtnet_rx *rxvq = NULL; - struct virtnet_tx *txvq = NULL; struct virtnet_ctl *cvq = NULL; struct virtqueue *vq; void *sw_ring = NULL; @@ -465,7 +464,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) rxvq = &vq->rxq; rxvq->fake_mbuf = fake_mbuf; } else if (queue_type == VTNET_TQ) { - txvq = &vq->txq; + virtqueue_txq_indirect_headers_init(vq); } else if (queue_type == VTNET_CQ) { cvq = &vq->cq; hw->cvq = cvq; @@ -477,33 +476,6 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) else vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova); - if (queue_type == VTNET_TQ) { - struct virtio_tx_region *txr; - unsigned int i; - - txr = txvq->hdr_mz->addr; - for (i = 0; i < vq_size; i++) { - /* first indirect descriptor is always the tx header */ - if (!virtio_with_packed_queue(hw)) { - struct vring_desc *start_dp = txr[i].tx_indir; - vring_desc_init_split(start_dp, - RTE_DIM(txr[i].tx_indir)); - start_dp->addr = txvq->hdr_mem + i * sizeof(*txr) - + offsetof(struct virtio_tx_region, tx_hdr); - start_dp->len = hw->vtnet_hdr_size; - start_dp->flags = VRING_DESC_F_NEXT; - } else { - struct vring_packed_desc *start_dp = - txr[i].tx_packed_indir; - vring_desc_init_indirect_packed(start_dp, - RTE_DIM(txr[i].tx_packed_indir)); - start_dp->addr = txvq->hdr_mem + i * sizeof(*txr) - + offsetof(struct virtio_tx_region, tx_hdr); - start_dp->len = hw->vtnet_hdr_size; - } - } - } - if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) { PMD_INIT_LOG(ERR, "setup_queue failed"); ret = -EINVAL; diff --git a/drivers/net/virtio/virtqueue.c b/drivers/net/virtio/virtqueue.c index 41e3529546..fb651a4ca3 100644 --- a/drivers/net/virtio/virtqueue.c +++ b/drivers/net/virtio/virtqueue.c @@ -143,6 +143,54 @@ virtqueue_rxvq_flush(struct virtqueue *vq) virtqueue_rxvq_flush_split(vq); } +static void +virtqueue_txq_indirect_header_init_packed(struct virtqueue *vq, uint32_t idx) +{ + struct virtio_tx_region *txr; + struct vring_packed_desc *desc; + rte_iova_t hdr_mem; + + txr = vq->txq.hdr_mz->addr; + hdr_mem = vq->txq.hdr_mem; + desc = txr[idx].tx_packed_indir; + + vring_desc_init_indirect_packed(desc, RTE_DIM(txr[idx].tx_packed_indir)); + desc->addr = hdr_mem + idx * sizeof(*txr) + offsetof(struct virtio_tx_region, tx_hdr); + desc->len = vq->hw->vtnet_hdr_size; +} + +static void +virtqueue_txq_indirect_header_init_split(struct virtqueue *vq, uint32_t idx) +{ + struct virtio_tx_region *txr; + struct vring_desc *desc; + rte_iova_t hdr_mem; + + txr = vq->txq.hdr_mz->addr; + hdr_mem = vq->txq.hdr_mem; + desc = txr[idx].tx_indir; + + vring_desc_init_split(desc, RTE_DIM(txr[idx].tx_indir)); + desc->addr = hdr_mem + idx * sizeof(*txr) + offsetof(struct virtio_tx_region, tx_hdr); + desc->len = vq->hw->vtnet_hdr_size; + desc->flags = VRING_DESC_F_NEXT; +} + +void +virtqueue_txq_indirect_headers_init(struct virtqueue *vq) +{ + uint32_t i; + + if (!virtio_with_feature(vq->hw, VIRTIO_RING_F_INDIRECT_DESC)) + return; + + for (i = 0; i < vq->vq_nentries; i++) + if (virtio_with_packed_queue(vq->hw)) + virtqueue_txq_indirect_header_init_packed(vq, i); + else +
[PATCH 10/21] net/virtio: alloc Rx SW ring only if vectorized path
This patch only allocates the SW ring when vectorized datapath is used. It also moves the SW ring and fake mbuf in the virtnet_rx struct since this is Rx-only. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_ethdev.c| 88 --- drivers/net/virtio/virtio_rxtx.c | 8 +- drivers/net/virtio/virtio_rxtx.h | 4 +- drivers/net/virtio/virtio_rxtx_simple.h | 2 +- .../net/virtio/virtio_rxtx_simple_altivec.c | 4 +- drivers/net/virtio/virtio_rxtx_simple_neon.c | 4 +- drivers/net/virtio/virtio_rxtx_simple_sse.c | 4 +- drivers/net/virtio/virtqueue.c| 6 +- drivers/net/virtio/virtqueue.h| 1 - 9 files changed, 72 insertions(+), 49 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index f839a24d12..14c5dc9059 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -339,6 +339,47 @@ virtio_free_queue_headers(struct virtqueue *vq) *hdr_mem = 0; } +static int +virtio_rxq_sw_ring_alloc(struct virtqueue *vq, int numa_node) +{ + void *sw_ring; + struct rte_mbuf *mbuf; + size_t size; + + /* SW ring is only used with vectorized datapath */ + if (!vq->hw->use_vec_rx) + return 0; + + size = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq->vq_nentries) * sizeof(vq->rxq.sw_ring[0]); + + sw_ring = rte_zmalloc_socket("sw_ring", size, RTE_CACHE_LINE_SIZE, numa_node); + if (!sw_ring) { + PMD_INIT_LOG(ERR, "can not allocate RX soft ring"); + return -ENOMEM; + } + + mbuf = rte_zmalloc_socket("sw_ring", sizeof(*mbuf), RTE_CACHE_LINE_SIZE, numa_node); + if (!mbuf) { + PMD_INIT_LOG(ERR, "can not allocate fake mbuf"); + rte_free(sw_ring); + return -ENOMEM; + } + + vq->rxq.sw_ring = sw_ring; + vq->rxq.fake_mbuf = mbuf; + + return 0; +} + +static void +virtio_rxq_sw_ring_free(struct virtqueue *vq) +{ + rte_free(vq->rxq.fake_mbuf); + vq->rxq.fake_mbuf = NULL; + rte_free(vq->rxq.sw_ring); + vq->rxq.sw_ring = NULL; +} + static int virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) { @@ -346,14 +387,11 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) const struct rte_memzone *mz = NULL; unsigned int vq_size, size; struct virtio_hw *hw = dev->data->dev_private; - struct virtnet_rx *rxvq = NULL; struct virtnet_ctl *cvq = NULL; struct virtqueue *vq; - void *sw_ring = NULL; int queue_type = virtio_get_queue_type(hw, queue_idx); int ret; int numa_node = dev->device->numa_node; - struct rte_mbuf *fake_mbuf = NULL; PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d", queue_idx, numa_node); @@ -441,28 +479,9 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) } if (queue_type == VTNET_RQ) { - size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * - sizeof(vq->sw_ring[0]); - - sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, - RTE_CACHE_LINE_SIZE, numa_node); - if (!sw_ring) { - PMD_INIT_LOG(ERR, "can not allocate RX soft ring"); - ret = -ENOMEM; + ret = virtio_rxq_sw_ring_alloc(vq, numa_node); + if (ret) goto free_hdr_mz; - } - - fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf), - RTE_CACHE_LINE_SIZE, numa_node); - if (!fake_mbuf) { - PMD_INIT_LOG(ERR, "can not allocate fake mbuf"); - ret = -ENOMEM; - goto free_sw_ring; - } - - vq->sw_ring = sw_ring; - rxvq = &vq->rxq; - rxvq->fake_mbuf = fake_mbuf; } else if (queue_type == VTNET_TQ) { virtqueue_txq_indirect_headers_init(vq); } else if (queue_type == VTNET_CQ) { @@ -486,9 +505,8 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) clean_vq: hw->cvq = NULL; - rte_free(fake_mbuf); -free_sw_ring: - rte_free(sw_ring); + if (queue_type == VTNET_RQ) + virtio_rxq_sw_ring_free(vq); free_hdr_mz: virtio_free_queue_headers(vq); free_mz: @@ -519,7 +537,7 @@ virtio_free_queues(struct virtio_hw *hw) queue_type = virtio_get_queue_type(hw, i); if (queue_type == VTNET_RQ) { rte_free(vq->rxq.fake_mbuf); - rte_free(vq->sw_ring); + rte_free(vq->rxq.sw_ring); } virtio_free_queue_headers(vq); @@ -
[PATCH 11/21] net/virtio: extract virtqueue init from virtio queue init
This patch extracts the virtqueue initialization out of the Virtio ethdev queue initialization, as preliminary work to provide a way for Virtio-user to allocate its shadow control virtqueue. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_ethdev.c | 261 ++-- drivers/net/virtio/virtqueue.c | 266 + drivers/net/virtio/virtqueue.h | 5 + 3 files changed, 282 insertions(+), 250 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 14c5dc9059..0103d95920 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -221,173 +221,18 @@ virtio_get_nr_vq(struct virtio_hw *hw) return nr_vq; } -static void -virtio_init_vring(struct virtqueue *vq) -{ - int size = vq->vq_nentries; - uint8_t *ring_mem = vq->vq_ring_virt_mem; - - PMD_INIT_FUNC_TRACE(); - - memset(ring_mem, 0, vq->vq_ring_size); - - vq->vq_used_cons_idx = 0; - vq->vq_desc_head_idx = 0; - vq->vq_avail_idx = 0; - vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1); - vq->vq_free_cnt = vq->vq_nentries; - memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); - if (virtio_with_packed_queue(vq->hw)) { - vring_init_packed(&vq->vq_packed.ring, ring_mem, - VIRTIO_VRING_ALIGN, size); - vring_desc_init_packed(vq, size); - } else { - struct vring *vr = &vq->vq_split.ring; - - vring_init_split(vr, ring_mem, VIRTIO_VRING_ALIGN, size); - vring_desc_init_split(vr->desc, size); - } - /* -* Disable device(host) interrupting guest -*/ - virtqueue_disable_intr(vq); -} - static void virtio_control_queue_notify(struct virtqueue *vq, __rte_unused void *cookie) { virtqueue_notify(vq); } -static int -virtio_alloc_queue_headers(struct virtqueue *vq, int numa_node, const char *name) -{ - char hdr_name[VIRTQUEUE_MAX_NAME_SZ]; - const struct rte_memzone **hdr_mz; - rte_iova_t *hdr_mem; - ssize_t size; - int queue_type; - - queue_type = virtio_get_queue_type(vq->hw, vq->vq_queue_index); - switch (queue_type) { - case VTNET_TQ: - /* -* For each xmit packet, allocate a virtio_net_hdr -* and indirect ring elements -*/ - size = vq->vq_nentries * sizeof(struct virtio_tx_region); - hdr_mz = &vq->txq.hdr_mz; - hdr_mem = &vq->txq.hdr_mem; - break; - case VTNET_CQ: - /* Allocate a page for control vq command, data and status */ - size = rte_mem_page_size(); - hdr_mz = &vq->cq.hdr_mz; - hdr_mem = &vq->cq.hdr_mem; - break; - case VTNET_RQ: - /* fallthrough */ - default: - return 0; - } - - snprintf(hdr_name, sizeof(hdr_name), "%s_hdr", name); - *hdr_mz = rte_memzone_reserve_aligned(hdr_name, size, numa_node, - RTE_MEMZONE_IOVA_CONTIG, RTE_CACHE_LINE_SIZE); - if (*hdr_mz == NULL) { - if (rte_errno == EEXIST) - *hdr_mz = rte_memzone_lookup(hdr_name); - if (*hdr_mz == NULL) - return -ENOMEM; - } - - memset((*hdr_mz)->addr, 0, size); - - if (vq->hw->use_va) - *hdr_mem = (uintptr_t)(*hdr_mz)->addr; - else - *hdr_mem = (uintptr_t)(*hdr_mz)->iova; - - return 0; -} - -static void -virtio_free_queue_headers(struct virtqueue *vq) -{ - const struct rte_memzone **hdr_mz; - rte_iova_t *hdr_mem; - int queue_type; - - queue_type = virtio_get_queue_type(vq->hw, vq->vq_queue_index); - switch (queue_type) { - case VTNET_TQ: - hdr_mz = &vq->txq.hdr_mz; - hdr_mem = &vq->txq.hdr_mem; - break; - case VTNET_CQ: - hdr_mz = &vq->cq.hdr_mz; - hdr_mem = &vq->cq.hdr_mem; - break; - case VTNET_RQ: - /* fallthrough */ - default: - return; - } - - rte_memzone_free(*hdr_mz); - *hdr_mz = NULL; - *hdr_mem = 0; -} - -static int -virtio_rxq_sw_ring_alloc(struct virtqueue *vq, int numa_node) -{ - void *sw_ring; - struct rte_mbuf *mbuf; - size_t size; - - /* SW ring is only used with vectorized datapath */ - if (!vq->hw->use_vec_rx) - return 0; - - size = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq->vq_nentries) * sizeof(vq->rxq.sw_ring[0]); - - sw_ring = rte_zmalloc_socket("sw_ring", size, RTE_CACHE_LINE_SIZE, numa_node); - if (!sw_ring) { - PMD_INIT_LOG(ERR, "can not allocate RX soft ring"); -
[PATCH 12/21] net/virtio-user: fix device starting failure handling
If the device fails to start, read the status from the device and return early. Fixes: 57912824615f ("net/virtio-user: support vhost status setting") Cc: sta...@dpdk.org Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_user_ethdev.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index d32abec327..78b1ed9ace 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -90,10 +90,15 @@ virtio_user_set_status(struct virtio_hw *hw, uint8_t status) if (status & VIRTIO_CONFIG_STATUS_FEATURES_OK && ~old_status & VIRTIO_CONFIG_STATUS_FEATURES_OK) virtio_user_dev_set_features(dev); - if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK) - virtio_user_start_device(dev); - else if (status == VIRTIO_CONFIG_STATUS_RESET) + + if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK) { + if (virtio_user_start_device(dev)) { + virtio_user_dev_update_status(dev); + return; + } + } else if (status == VIRTIO_CONFIG_STATUS_RESET) { virtio_user_reset(hw); + } virtio_user_dev_set_status(dev, status); } -- 2.39.1
[PATCH 13/21] net/virtio-user: simplify queues setup
The only reason two loops were needed to iterate over queues at setup time was to be able to print whether it was a Tx or Rx queue. This patch changes queues iteration to a single loop. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_user/virtio_user_dev.c | 16 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 19599aa3f6..873c6aa036 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -118,19 +118,11 @@ static int virtio_user_queue_setup(struct virtio_user_dev *dev, int (*fn)(struct virtio_user_dev *, uint32_t)) { - uint32_t i, queue_sel; + uint32_t i; - for (i = 0; i < dev->max_queue_pairs; ++i) { - queue_sel = 2 * i + VTNET_SQ_RQ_QUEUE_IDX; - if (fn(dev, queue_sel) < 0) { - PMD_DRV_LOG(ERR, "(%s) setup rx vq %u failed", dev->path, i); - return -1; - } - } - for (i = 0; i < dev->max_queue_pairs; ++i) { - queue_sel = 2 * i + VTNET_SQ_TQ_QUEUE_IDX; - if (fn(dev, queue_sel) < 0) { - PMD_DRV_LOG(INFO, "(%s) setup tx vq %u failed", dev->path, i); + for (i = 0; i < dev->max_queue_pairs * 2; ++i) { + if (fn(dev, i) < 0) { + PMD_DRV_LOG(ERR, "(%s) setup VQ %u failed", dev->path, i); return -1; } } -- 2.39.1
[PATCH 14/21] net/virtio-user: use proper type for number of queue pairs
The number of queue pairs is specified as a 16 bits unsigned int in the Virtio specification. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio_user/virtio_user_dev.c | 2 +- drivers/net/virtio/virtio_user/virtio_user_dev.h | 6 +++--- drivers/net/virtio/virtio_user_ethdev.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 873c6aa036..809c9ef442 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -553,7 +553,7 @@ virtio_user_dev_setup(struct virtio_user_dev *dev) 1ULL << VIRTIO_F_RING_PACKED) int -virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, +virtio_user_dev_init(struct virtio_user_dev *dev, char *path, uint16_t queues, int cq, int queue_size, const char *mac, char **ifname, int server, int mrg_rxbuf, int in_order, int packed_vq, enum virtio_user_backend_type backend_type) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h index 819f6463ba..3c5453eac0 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h @@ -32,8 +32,8 @@ struct virtio_user_dev { int callfds[VIRTIO_MAX_VIRTQUEUES]; int kickfds[VIRTIO_MAX_VIRTQUEUES]; int mac_specified; - uint32_tmax_queue_pairs; - uint32_tqueue_pairs; + uint16_tmax_queue_pairs; + uint16_tqueue_pairs; uint32_tqueue_size; uint64_tfeatures; /* the negotiated features with driver, * and will be sync with device @@ -64,7 +64,7 @@ struct virtio_user_dev { int virtio_user_dev_set_features(struct virtio_user_dev *dev); int virtio_user_start_device(struct virtio_user_dev *dev); int virtio_user_stop_device(struct virtio_user_dev *dev); -int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, +int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, uint16_t queues, int cq, int queue_size, const char *mac, char **ifname, int server, int mrg_rxbuf, int in_order, int packed_vq, diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index 78b1ed9ace..6ad5896378 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -655,7 +655,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *vdev) dev = eth_dev->data->dev_private; hw = &dev->hw; - if (virtio_user_dev_init(dev, path, queues, cq, + if (virtio_user_dev_init(dev, path, (uint16_t)queues, cq, queue_size, mac_addr, &ifname, server_mode, mrg_rxbuf, in_order, packed_vq, backend_type) < 0) { PMD_INIT_LOG(ERR, "virtio_user_dev_init fails"); -- 2.39.1
[PATCH 15/21] net/virtio-user: get max number of queue pairs from device
When supported by the backend (only vDPA for now), this patch gets the maximum number of queue pairs supported by the device by querying it in its config space. This is required for adding backend control queue support, as is index equals the maximum number of queues supported by the device as described by the Virtio specification. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia Acked-by: Eugenio Pérez --- .../net/virtio/virtio_user/virtio_user_dev.c | 93 ++- drivers/net/virtio/virtio_user_ethdev.c | 7 -- 2 files changed, 71 insertions(+), 29 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 809c9ef442..a3584e7735 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -253,6 +253,50 @@ int virtio_user_stop_device(struct virtio_user_dev *dev) return -1; } +static int +virtio_user_dev_init_max_queue_pairs(struct virtio_user_dev *dev, uint32_t user_max_qp) +{ + int ret; + + if (!(dev->device_features & (1ULL << VIRTIO_NET_F_MQ))) { + dev->max_queue_pairs = 1; + return 0; + } + + if (!dev->ops->get_config) { + dev->max_queue_pairs = user_max_qp; + return 0; + } + + ret = dev->ops->get_config(dev, (uint8_t *)&dev->max_queue_pairs, + offsetof(struct virtio_net_config, max_virtqueue_pairs), + sizeof(uint16_t)); + if (ret) { + /* +* We need to know the max queue pair from the device so that +* the control queue gets the right index. +*/ + dev->max_queue_pairs = 1; + PMD_DRV_LOG(ERR, "(%s) Failed to get max queue pairs from device", dev->path); + + return ret; + } + + if (dev->max_queue_pairs > VIRTIO_MAX_VIRTQUEUE_PAIRS) { + /* +* If the device supports control queue, the control queue +* index is max_virtqueue_pairs * 2. Disable MQ if it happens. +*/ + PMD_DRV_LOG(ERR, "(%s) Device advertises too many queues (%u, max supported %u)", + dev->path, dev->max_queue_pairs, VIRTIO_MAX_VIRTQUEUE_PAIRS); + dev->max_queue_pairs = 1; + + return -1; + } + + return 0; +} + int virtio_user_dev_set_mac(struct virtio_user_dev *dev) { @@ -511,24 +555,7 @@ virtio_user_dev_setup(struct virtio_user_dev *dev) return -1; } - if (virtio_user_dev_init_notify(dev) < 0) { - PMD_INIT_LOG(ERR, "(%s) Failed to init notifiers", dev->path); - goto destroy; - } - - if (virtio_user_fill_intr_handle(dev) < 0) { - PMD_INIT_LOG(ERR, "(%s) Failed to init interrupt handler", dev->path); - goto uninit; - } - return 0; - -uninit: - virtio_user_dev_uninit_notify(dev); -destroy: - dev->ops->destroy(dev); - - return -1; } /* Use below macro to filter features from vhost backend */ @@ -570,7 +597,6 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, uint16_t queues, } dev->started = 0; - dev->max_queue_pairs = queues; dev->queue_pairs = 1; /* mq disabled by default */ dev->queue_size = queue_size; dev->is_server = server; @@ -591,23 +617,39 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, uint16_t queues, if (dev->ops->set_owner(dev) < 0) { PMD_INIT_LOG(ERR, "(%s) Failed to set backend owner", dev->path); - return -1; + goto destroy; } if (dev->ops->get_backend_features(&backend_features) < 0) { PMD_INIT_LOG(ERR, "(%s) Failed to get backend features", dev->path); - return -1; + goto destroy; } dev->unsupported_features = ~(VIRTIO_USER_SUPPORTED_FEATURES | backend_features); if (dev->ops->get_features(dev, &dev->device_features) < 0) { PMD_INIT_LOG(ERR, "(%s) Failed to get device features", dev->path); - return -1; + goto destroy; } virtio_user_dev_init_mac(dev, mac); + if (virtio_user_dev_init_max_queue_pairs(dev, queues)) + dev->unsupported_features |= (1ull << VIRTIO_NET_F_MQ); + + if (dev->max_queue_pairs > 1) + cq = 1; + + if (virtio_user_dev_init_notify(dev) < 0) { + PMD_INIT_LOG(ERR, "(%s) Failed to init notifiers", dev->path); + goto destroy; + } + + if (virtio_user_fill_intr_handle(dev) < 0) { + PMD_INIT_LOG(ERR, "(%s) Failed to init interrupt handler", dev->path); + goto notify_uninit; + } + if (!mrg_rx
[PATCH 16/21] net/virtio-user: allocate shadow control queue
If the backends supports control virtqueue, allocate a shadow control virtqueue, and implement the notify callback that writes into the kickfd. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia Acked-by: Eugenio Pérez --- .../net/virtio/virtio_user/virtio_user_dev.c | 47 ++- .../net/virtio/virtio_user/virtio_user_dev.h | 5 ++ drivers/net/virtio/virtio_user_ethdev.c | 6 +++ 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index a3584e7735..16a0e07413 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -146,8 +146,9 @@ virtio_user_dev_set_features(struct virtio_user_dev *dev) /* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */ features &= ~(1ull << VIRTIO_NET_F_MAC); - /* Strip VIRTIO_NET_F_CTRL_VQ, as devices do not really need to know */ - features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); + /* Strip VIRTIO_NET_F_CTRL_VQ if the devices does not really support control VQ */ + if (!dev->hw_cvq) + features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); features &= ~(1ull << VIRTIO_NET_F_STATUS); ret = dev->ops->set_features(dev, features); if (ret < 0) @@ -911,6 +912,48 @@ virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx) } } +static void +virtio_user_control_queue_notify(struct virtqueue *vq, void *cookie) +{ + struct virtio_user_dev *dev = cookie; + uint64_t buf = 1; + + if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) + PMD_DRV_LOG(ERR, "failed to kick backend: %s", + strerror(errno)); +} + +int +virtio_user_dev_create_shadow_cvq(struct virtio_user_dev *dev, struct virtqueue *vq) +{ + char name[VIRTQUEUE_MAX_NAME_SZ]; + struct virtqueue *scvq; + + snprintf(name, sizeof(name), "port%d_shadow_cvq", vq->hw->port_id); + scvq = virtqueue_alloc(&dev->hw, vq->vq_queue_index, vq->vq_nentries, + VTNET_CQ, SOCKET_ID_ANY, name); + if (!scvq) { + PMD_INIT_LOG(ERR, "(%s) Failed to alloc shadow control vq\n", dev->path); + return -ENOMEM; + } + + scvq->cq.notify_queue = &virtio_user_control_queue_notify; + scvq->cq.notify_cookie = dev; + dev->scvq = scvq; + + return 0; +} + +void +virtio_user_dev_destroy_shadow_cvq(struct virtio_user_dev *dev) +{ + if (!dev->scvq) + return; + + virtqueue_free(dev->scvq); + dev->scvq = NULL; +} + int virtio_user_dev_set_status(struct virtio_user_dev *dev, uint8_t status) { diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h index 3c5453eac0..e0db4faf3f 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h @@ -58,6 +58,9 @@ struct virtio_user_dev { pthread_mutex_t mutex; boolstarted; + boolhw_cvq; + struct virtqueue*scvq; + void *backend_data; }; @@ -74,6 +77,8 @@ void virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx); void virtio_user_handle_cq_packed(struct virtio_user_dev *dev, uint16_t queue_idx); uint8_t virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs); +int virtio_user_dev_create_shadow_cvq(struct virtio_user_dev *dev, struct virtqueue *vq); +void virtio_user_dev_destroy_shadow_cvq(struct virtio_user_dev *dev); int virtio_user_dev_set_status(struct virtio_user_dev *dev, uint8_t status); int virtio_user_dev_update_status(struct virtio_user_dev *dev); int virtio_user_dev_update_link_state(struct virtio_user_dev *dev); diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index 6c3e875793..0e9968ca89 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -232,6 +232,9 @@ virtio_user_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) else virtio_user_setup_queue_split(vq, dev); + if (dev->hw_cvq && hw->cvq && (virtnet_cq_to_vq(hw->cvq) == vq)) + return virtio_user_dev_create_shadow_cvq(dev, vq); + return 0; } @@ -251,6 +254,9 @@ virtio_user_del_queue(struct virtio_hw *hw, struct virtqueue *vq) close(dev->callfds[vq->vq_queue_index]); close(dev->kickfds[vq->vq_queue_index]); + + if (hw->cvq && (virtnet_cq_to_vq(hw->cvq) == vq)) + virtio_user_dev_destroy_shadow_cvq(dev); } static void -- 2.39.1
[PATCH 17/21] net/virtio-user: send shadow virtqueue info to the backend
This patch adds sending the shadow control queue info to the backend. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia Acked-by: Eugenio Pérez --- .../net/virtio/virtio_user/virtio_user_dev.c | 28 --- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 16a0e07413..1a5386a3f6 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -66,6 +66,18 @@ virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) .flags = 0, /* disable log */ }; + if (queue_sel == dev->max_queue_pairs * 2) { + if (!dev->scvq) { + PMD_INIT_LOG(ERR, "(%s) Shadow control queue expected but missing", + dev->path); + goto err; + } + + /* Use shadow control queue information */ + vring = &dev->scvq->vq_split.ring; + pq_vring = &dev->scvq->vq_packed.ring; + } + if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) { addr.desc_user_addr = (uint64_t)(uintptr_t)pq_vring->desc; @@ -118,9 +130,13 @@ static int virtio_user_queue_setup(struct virtio_user_dev *dev, int (*fn)(struct virtio_user_dev *, uint32_t)) { - uint32_t i; + uint32_t i, nr_vq; - for (i = 0; i < dev->max_queue_pairs * 2; ++i) { + nr_vq = dev->max_queue_pairs * 2; + if (dev->hw_cvq) + nr_vq++; + + for (i = 0; i < nr_vq; i++) { if (fn(dev, i) < 0) { PMD_DRV_LOG(ERR, "(%s) setup VQ %u failed", dev->path, i); return -1; @@ -381,11 +397,15 @@ virtio_user_dev_init_mac(struct virtio_user_dev *dev, const char *mac) static int virtio_user_dev_init_notify(struct virtio_user_dev *dev) { - uint32_t i, j; + uint32_t i, j, nr_vq; int callfd; int kickfd; - for (i = 0; i < dev->max_queue_pairs * 2; i++) { + nr_vq = dev->max_queue_pairs * 2; + if (dev->hw_cvq) + nr_vq++; + + for (i = 0; i < nr_vq; i++) { /* May use invalid flag, but some backend uses kickfd and * callfd as criteria to judge if dev is alive. so finally we * use real event_fd. -- 2.39.1
[PATCH 18/21] net/virtio-user: add new callback to enable control queue
This patch introduces a new callback that is to be called when the backend supports control virtqueue. Implementation for Vhost-vDPA backend is added in this patch. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia Acked-by: Eugenio Pérez --- drivers/net/virtio/virtio_user/vhost.h | 1 + drivers/net/virtio/virtio_user/vhost_vdpa.c | 12 drivers/net/virtio/virtio_user/virtio_user_dev.c | 3 +++ 3 files changed, 16 insertions(+) diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h index dfbf6be033..f817cab77a 100644 --- a/drivers/net/virtio/virtio_user/vhost.h +++ b/drivers/net/virtio/virtio_user/vhost.h @@ -82,6 +82,7 @@ struct virtio_user_backend_ops { int (*get_config)(struct virtio_user_dev *dev, uint8_t *data, uint32_t off, uint32_t len); int (*set_config)(struct virtio_user_dev *dev, const uint8_t *data, uint32_t off, uint32_t len); + int (*cvq_enable)(struct virtio_user_dev *dev, int enable); int (*enable_qp)(struct virtio_user_dev *dev, uint16_t pair_idx, int enable); int (*dma_map)(struct virtio_user_dev *dev, void *addr, uint64_t iova, size_t len); int (*dma_unmap)(struct virtio_user_dev *dev, void *addr, uint64_t iova, size_t len); diff --git a/drivers/net/virtio/virtio_user/vhost_vdpa.c b/drivers/net/virtio/virtio_user/vhost_vdpa.c index a0897f8dd1..ebf6011769 100644 --- a/drivers/net/virtio/virtio_user/vhost_vdpa.c +++ b/drivers/net/virtio/virtio_user/vhost_vdpa.c @@ -564,6 +564,17 @@ vhost_vdpa_destroy(struct virtio_user_dev *dev) return 0; } +static int +vhost_vdpa_cvq_enable(struct virtio_user_dev *dev, int enable) +{ + struct vhost_vring_state state = { + .index = dev->max_queue_pairs * 2, + .num = enable, + }; + + return vhost_vdpa_set_vring_enable(dev, &state); +} + static int vhost_vdpa_enable_queue_pair(struct virtio_user_dev *dev, uint16_t pair_idx, @@ -629,6 +640,7 @@ struct virtio_user_backend_ops virtio_ops_vdpa = { .set_status = vhost_vdpa_set_status, .get_config = vhost_vdpa_get_config, .set_config = vhost_vdpa_set_config, + .cvq_enable = vhost_vdpa_cvq_enable, .enable_qp = vhost_vdpa_enable_queue_pair, .dma_map = vhost_vdpa_dma_map_batch, .dma_unmap = vhost_vdpa_dma_unmap_batch, diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 1a5386a3f6..b0d603ee12 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -767,6 +767,9 @@ virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) for (i = q_pairs; i < dev->max_queue_pairs; ++i) ret |= dev->ops->enable_qp(dev, i, 0); + if (dev->scvq) + ret |= dev->ops->cvq_enable(dev, 1); + dev->queue_pairs = q_pairs; return ret; -- 2.39.1
[PATCH 19/21] net/virtio-user: forward control messages to shadow queue
This patch implements control messages forwarding from the regular control queue to the shadow control queue. Signed-off-by: Maxime Coquelin Acked-by: Eugenio Pérez Reviewed-by: Chenbo Xia --- .../net/virtio/virtio_user/virtio_user_dev.c | 35 +++ .../net/virtio/virtio_user/virtio_user_dev.h | 3 -- drivers/net/virtio/virtio_user_ethdev.c | 6 ++-- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index b0d603ee12..04d780189e 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -750,7 +750,7 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev) dev->ops->destroy(dev); } -uint8_t +static uint8_t virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) { uint16_t i; @@ -775,14 +775,17 @@ virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) return ret; } +#define CVQ_MAX_DATA_DESCS 32 + static uint32_t -virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring, +virtio_user_handle_ctrl_msg_split(struct virtio_user_dev *dev, struct vring *vring, uint16_t idx_hdr) { struct virtio_net_ctrl_hdr *hdr; virtio_net_ctrl_ack status = ~0; uint16_t i, idx_data, idx_status; uint32_t n_descs = 0; + int dlen[CVQ_MAX_DATA_DESCS], nb_dlen = 0; /* locate desc for header, data, and status */ idx_data = vring->desc[idx_hdr].next; @@ -790,6 +793,7 @@ virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring, i = idx_data; while (vring->desc[i].flags == VRING_DESC_F_NEXT) { + dlen[nb_dlen++] = vring->desc[i].len; i = vring->desc[i].next; n_descs++; } @@ -811,6 +815,10 @@ virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring, status = 0; } + if (!status && dev->scvq) + status = virtio_send_command(&dev->scvq->cq, + (struct virtio_pmd_ctrl *)hdr, dlen, nb_dlen); + /* Update status */ *(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status; @@ -836,6 +844,7 @@ virtio_user_handle_ctrl_msg_packed(struct virtio_user_dev *dev, uint16_t idx_data, idx_status; /* initialize to one, header is first */ uint32_t n_descs = 1; + int dlen[CVQ_MAX_DATA_DESCS], nb_dlen = 0; /* locate desc for header, data, and status */ idx_data = idx_hdr + 1; @@ -846,6 +855,7 @@ virtio_user_handle_ctrl_msg_packed(struct virtio_user_dev *dev, idx_status = idx_data; while (vring->desc[idx_status].flags & VRING_DESC_F_NEXT) { + dlen[nb_dlen++] = vring->desc[idx_status].len; idx_status++; if (idx_status >= dev->queue_size) idx_status -= dev->queue_size; @@ -866,6 +876,10 @@ virtio_user_handle_ctrl_msg_packed(struct virtio_user_dev *dev, status = 0; } + if (!status && dev->scvq) + status = virtio_send_command(&dev->scvq->cq, + (struct virtio_pmd_ctrl *)hdr, dlen, nb_dlen); + /* Update status */ *(virtio_net_ctrl_ack *)(uintptr_t) vring->desc[idx_status].addr = status; @@ -877,7 +891,7 @@ virtio_user_handle_ctrl_msg_packed(struct virtio_user_dev *dev, return n_descs; } -void +static void virtio_user_handle_cq_packed(struct virtio_user_dev *dev, uint16_t queue_idx) { struct virtio_user_queue *vq = &dev->packed_queues[queue_idx]; @@ -909,8 +923,8 @@ virtio_user_handle_cq_packed(struct virtio_user_dev *dev, uint16_t queue_idx) } } -void -virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx) +static void +virtio_user_handle_cq_split(struct virtio_user_dev *dev, uint16_t queue_idx) { uint16_t avail_idx, desc_idx; struct vring_used_elem *uep; @@ -924,7 +938,7 @@ virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx) & (vring->num - 1); desc_idx = vring->avail->ring[avail_idx]; - n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx); + n_descs = virtio_user_handle_ctrl_msg_split(dev, vring, desc_idx); /* Update used ring */ uep = &vring->used->ring[avail_idx]; @@ -935,6 +949,15 @@ virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx) } } +void +virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx) +{ + if (virtio_with_packed_queue(&dev->hw)) + virtio_user_handle_cq_packed(dev, queue_idx); + else + virtio_user_handle_cq_split(dev, queue_idx); +} + static
[PATCH 21/21] net/virtio-user: remove max queues limitation
This patch removes the limitation of 8 queue pairs by dynamically allocating vring metadata once we know the maximum number of queue pairs supported by the backend. This is especially useful for Vhost-vDPA with physical devices, where the maximum queues supported may be much more than 8 pairs. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia --- drivers/net/virtio/virtio.h | 6 - .../net/virtio/virtio_user/virtio_user_dev.c | 143 ++ .../net/virtio/virtio_user/virtio_user_dev.h | 16 +- drivers/net/virtio/virtio_user_ethdev.c | 17 +-- 4 files changed, 125 insertions(+), 57 deletions(-) diff --git a/drivers/net/virtio/virtio.h b/drivers/net/virtio/virtio.h index 5c8f71a44d..04a897bf51 100644 --- a/drivers/net/virtio/virtio.h +++ b/drivers/net/virtio/virtio.h @@ -124,12 +124,6 @@ VIRTIO_NET_HASH_TYPE_UDP_EX) -/* - * Maximum number of virtqueues per device. - */ -#define VIRTIO_MAX_VIRTQUEUE_PAIRS 8 -#define VIRTIO_MAX_VIRTQUEUES (VIRTIO_MAX_VIRTQUEUE_PAIRS * 2 + 1) - /* VirtIO device IDs. */ #define VIRTIO_ID_NETWORK 0x01 #define VIRTIO_ID_BLOCK0x02 diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 04d780189e..cf58b63029 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "vhost.h" #include "virtio_user_dev.h" @@ -58,8 +59,8 @@ virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) int ret; struct vhost_vring_file file; struct vhost_vring_state state; - struct vring *vring = &dev->vrings[queue_sel]; - struct vring_packed *pq_vring = &dev->packed_vrings[queue_sel]; + struct vring *vring = &dev->vrings.split[queue_sel]; + struct vring_packed *pq_vring = &dev->vrings.packed[queue_sel]; struct vhost_vring_addr addr = { .index = queue_sel, .log_guest_addr = 0, @@ -299,18 +300,6 @@ virtio_user_dev_init_max_queue_pairs(struct virtio_user_dev *dev, uint32_t user_ return ret; } - if (dev->max_queue_pairs > VIRTIO_MAX_VIRTQUEUE_PAIRS) { - /* -* If the device supports control queue, the control queue -* index is max_virtqueue_pairs * 2. Disable MQ if it happens. -*/ - PMD_DRV_LOG(ERR, "(%s) Device advertises too many queues (%u, max supported %u)", - dev->path, dev->max_queue_pairs, VIRTIO_MAX_VIRTQUEUE_PAIRS); - dev->max_queue_pairs = 1; - - return -1; - } - return 0; } @@ -579,6 +568,93 @@ virtio_user_dev_setup(struct virtio_user_dev *dev) return 0; } +static int +virtio_user_alloc_vrings(struct virtio_user_dev *dev) +{ + int i, size, nr_vrings; + bool packed_ring = !!(dev->device_features & (1ull << VIRTIO_F_RING_PACKED)); + + nr_vrings = dev->max_queue_pairs * 2; + if (dev->hw_cvq) + nr_vrings++; + + dev->callfds = rte_zmalloc("virtio_user_dev", nr_vrings * sizeof(*dev->callfds), 0); + if (!dev->callfds) { + PMD_INIT_LOG(ERR, "(%s) Failed to alloc callfds", dev->path); + return -1; + } + + dev->kickfds = rte_zmalloc("virtio_user_dev", nr_vrings * sizeof(*dev->kickfds), 0); + if (!dev->kickfds) { + PMD_INIT_LOG(ERR, "(%s) Failed to alloc kickfds", dev->path); + goto free_callfds; + } + + for (i = 0; i < nr_vrings; i++) { + dev->callfds[i] = -1; + dev->kickfds[i] = -1; + } + + if (packed_ring) + size = sizeof(*dev->vrings.packed); + else + size = sizeof(*dev->vrings.split); + dev->vrings.ptr = rte_zmalloc("virtio_user_dev", nr_vrings * size, 0); + if (!dev->vrings.ptr) { + PMD_INIT_LOG(ERR, "(%s) Failed to alloc vrings metadata", dev->path); + goto free_kickfds; + } + + if (packed_ring) { + dev->packed_queues = rte_zmalloc("virtio_user_dev", + nr_vrings * sizeof(*dev->packed_queues), 0); + if (!dev->packed_queues) { + PMD_INIT_LOG(ERR, "(%s) Failed to alloc packed queues metadata", + dev->path); + goto free_vrings; + } + } + + dev->qp_enabled = rte_zmalloc("virtio_user_dev", + dev->max_queue_pairs * sizeof(*dev->qp_enabled), 0); + if (!dev->qp_enabled) { + PMD_INIT_LOG(ERR, "(%s) Failed to alloc QP enable states", dev->path); + goto free_packed_queues; + } + + return 0; + +free_packed_queues: + rte_free(dev->packed_queues); + dev->p
[PATCH 20/21] net/virtio-user: advertize control VQ support with vDPA
This patch advertizes control virtqueue support by the vDPA backend if it supports VIRTIO_NET_F_CTRL_VQ. Signed-off-by: Maxime Coquelin Reviewed-by: Chenbo Xia Acked-by: Eugenio Pérez --- drivers/net/virtio/virtio_user/vhost_vdpa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio/virtio_user/vhost_vdpa.c b/drivers/net/virtio/virtio_user/vhost_vdpa.c index ebf6011769..2c36b26224 100644 --- a/drivers/net/virtio/virtio_user/vhost_vdpa.c +++ b/drivers/net/virtio/virtio_user/vhost_vdpa.c @@ -135,8 +135,8 @@ vhost_vdpa_get_features(struct virtio_user_dev *dev, uint64_t *features) return -1; } - /* Multiqueue not supported for now */ - *features &= ~(1ULL << VIRTIO_NET_F_MQ); + if (*features & 1ULL << VIRTIO_NET_F_CTRL_VQ) + dev->hw_cvq = true; /* Negotiated vDPA backend features */ ret = vhost_vdpa_get_protocol_features(dev, &data->protocol_features); -- 2.39.1
Re: [PATCH] telemetry: fix docstring of RTE_TEL_INT_VAL
On Tue, Feb 7, 2023 at 9:46 AM Robin Jarry wrote: > > The doc string was left to its previous definition. Make it explicit > that RTE_TEL_INT_VAL is a 64-bit signed integer. > > Fixes: 8e639c7c50cc ("telemetry: use 64-bit signed values in API") > > Signed-off-by: Robin Jarry Applied, thanks. -- David Marchand
Re: [PATCH 00/21] Add control queue & MQ support to Virtio-user vDPA
Oups, forgot to set subject prefix to v3. On 2/9/23 10:16, Maxime Coquelin wrote: This series introduces control queue support for Vhost-vDPA backend. This is a requirement to support multiqueue, but be usefull for other features like RSS for example. Since the Virtio-user layer of the Virtio PMD must handle some control messages, like the number of queue pairs to be used by the device, a shadow control queue is created at Virtio-user layer. Control messages from the regular Virtio control queue are still dequeues and handled if needed by the Virtio-user layer, and are then forwarded to the shadow control queue so that the physical vDPA device can handle them. This model is similar to the one adopted by the QEMU project. In order to avoid code duplication, virtqueue allocation and control queue message sending has been factored out of the Virtio layer to be reusable by the Virtio-user layer. Finally, in order to support vDPA hardware which may support large number of queues, last patch removes the 8 queue pairs limitation by dynamically allocating vring metadata. The series has been tested with Nvidia Cx-6 DX NIC with up to 16 queue pairs: # echo 0 > /sys/bus/pci/devices/\:3b\:00.0/sriov_numvfs # echo 0 > /sys/bus/pci/devices/\:3b\:00.1/sriov_numvfs # modprobe vhost_vdpa # modprobe mlx5_vdpa # echo 1 > /sys/bus/pci/devices/\:3b\:00.0/sriov_numvfs # echo :3b:00.2 >/sys/bus/pci/drivers/mlx5_core/unbind # devlink dev eswitch set pci/:3b:00.0 mode switchdev # echo :3b:00.2 >/sys/bus/pci/drivers/mlx5_core/bind # vdpa dev add name vdpa0 mgmtdev pci/:3b:00.2 mac 00:11:22:33:44:03 max_vqp 16 # ulimit -l unlimited # dpdk-testpmd -l 0,2,4,6 --socket-mem 1024,0 --vdev 'virtio_user0,path=/dev/vhost-vdpa-0' --no-pci -n 3 -- --nb-cores=3 -i --rxq=16 --txq=16 Changes in v3: == - Trivial code simplifications (Eugenio) Changes in v2: == - Fix double spaces (Chenbo) - Get rid of uneeded gotos (Stephen) - Only allocate packed ring metadata if supported (Chenbo) - Rebased on top of main Maxime Coquelin (21): net/virtio: move CVQ code into a dedicated file net/virtio: introduce notify callback for control queue net/virtio: virtqueue headers alloc refactoring net/virtio: remove port ID info from Rx queue net/virtio: remove unused fields in Tx queue struct net/virtio: remove unused queue ID field in Rx queue net/virtio: remove unused Port ID in control queue net/virtio: move vring memzone to virtqueue struct net/virtio: refactor indirect desc headers init net/virtio: alloc Rx SW ring only if vectorized path net/virtio: extract virtqueue init from virtio queue init net/virtio-user: fix device starting failure handling net/virtio-user: simplify queues setup net/virtio-user: use proper type for number of queue pairs net/virtio-user: get max number of queue pairs from device net/virtio-user: allocate shadow control queue net/virtio-user: send shadow virtqueue info to the backend net/virtio-user: add new callback to enable control queue net/virtio-user: forward control messages to shadow queue net/virtio-user: advertize control VQ support with vDPA net/virtio-user: remove max queues limitation drivers/net/virtio/meson.build| 1 + drivers/net/virtio/virtio.h | 6 - drivers/net/virtio/virtio_cvq.c | 229 + drivers/net/virtio/virtio_cvq.h | 127 + drivers/net/virtio/virtio_ethdev.c| 472 +- drivers/net/virtio/virtio_rxtx.c | 47 +- drivers/net/virtio/virtio_rxtx.h | 31 +- drivers/net/virtio/virtio_rxtx_packed.c | 3 +- drivers/net/virtio/virtio_rxtx_simple.c | 3 +- drivers/net/virtio/virtio_rxtx_simple.h | 7 +- .../net/virtio/virtio_rxtx_simple_altivec.c | 4 +- drivers/net/virtio/virtio_rxtx_simple_neon.c | 4 +- drivers/net/virtio/virtio_rxtx_simple_sse.c | 4 +- drivers/net/virtio/virtio_user/vhost.h| 1 + drivers/net/virtio/virtio_user/vhost_vdpa.c | 16 +- .../net/virtio/virtio_user/virtio_user_dev.c | 305 +-- .../net/virtio/virtio_user/virtio_user_dev.h | 30 +- drivers/net/virtio/virtio_user_ethdev.c | 49 +- drivers/net/virtio/virtqueue.c| 346 - drivers/net/virtio/virtqueue.h| 127 + 20 files changed, 1066 insertions(+), 746 deletions(-) create mode 100644 drivers/net/virtio/virtio_cvq.c create mode 100644 drivers/net/virtio/virtio_cvq.h
RE: [EXT] Re: [PATCH v5 2/3] graph: pcap capture for graph nodes
Hi David, Sorry for the delayed reply. I will send the next version of the patch, in sometime, after squashing the fix. Thanks, Amit Shukla > -Original Message- > From: David Marchand > Sent: Thursday, February 9, 2023 2:43 PM > To: Amit Prakash Shukla > Cc: Jerin Jacob Kollanukkaran ; Kiran Kumar > Kokkilagadda ; Nithin Kumar Dabilpuram > ; Anatoly Burakov > ; dev@dpdk.org > Subject: [EXT] Re: [PATCH v5 2/3] graph: pcap capture for graph nodes > > External Email > > -- > On Fri, Feb 3, 2023 at 9:19 AM Amit Prakash Shukla > wrote: > > diff --git a/lib/graph/rte_graph.h b/lib/graph/rte_graph.h index > > b32c4bc217..c9a77297fc 100644 > > --- a/lib/graph/rte_graph.h > > +++ b/lib/graph/rte_graph.h > > @@ -35,6 +35,7 @@ extern "C" { > > > > #define RTE_GRAPH_NAMESIZE 64 /**< Max length of graph name. */ > > #define RTE_NODE_NAMESIZE 64 /**< Max length of node name. */ > > +#define RTE_GRAPH_PCAP_FILE_SZ 64 /**< Max length of pcap file > name. > > +*/ > > #define RTE_GRAPH_OFF_INVALID UINT32_MAX /**< Invalid graph > offset. */ > > #define RTE_NODE_ID_INVALID UINT32_MAX /**< Invalid node id. */ > > #define RTE_EDGE_ID_INVALID UINT16_MAX /**< Invalid edge id. */ > > @@ -164,6 +165,10 @@ struct rte_graph_param { > > uint16_t nb_node_patterns; /**< Number of node patterns. */ > > const char **node_patterns; > > /**< Array of node patterns based on shell pattern. */ > > + > > + bool pcap_enable; /**< Pcap enable. */ > > + uint64_t num_pkt_to_capture; /**< Number of packets to capture. */ > > + char *pcap_filename; /**< Filename in which packets to be > > + captured.*/ > > }; > > Repeating in this thread what I commented on a patch fixing compilation for > the unit test. > > Extending this structure requires updating the graph unit test. > Please squash this fix in this series. > > > Thanks. > > -- > David Marchand
RE: [PATCH 1/2] net/i40e: replace put function
> From: Kamalakshitha Aligeri [mailto:kamalakshitha.alig...@arm.com] > Sent: Thursday, 9 February 2023 07.25 > > Integrated zero-copy put API in mempool cache in i40e PMD. > On Ampere Altra server, l3fwd single core's performance improves by 5% > with the new API > > Signed-off-by: Kamalakshitha Aligeri > Reviewed-by: Ruifeng Wang > Reviewed-by: Feifei Wang > --- > Link: > https://patchwork.dpdk.org/project/dpdk/patch/20221227151700.80887-1- > m...@smartsharesystems.com/ > > .mailmap| 1 + > drivers/net/i40e/i40e_rxtx_vec_common.h | 34 - > 2 files changed, 28 insertions(+), 7 deletions(-) > > diff --git a/.mailmap b/.mailmap > index 75884b6fe2..05a42edbcf 100644 > --- a/.mailmap > +++ b/.mailmap > @@ -670,6 +670,7 @@ Kai Ji > Kaiwen Deng > Kalesh AP > Kamalakannan R > +Kamalakshitha Aligeri > Kamil Bednarczyk > Kamil Chalupnik > Kamil Rytarowski > diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h > b/drivers/net/i40e/i40e_rxtx_vec_common.h > index fe1a6ec75e..80d4a159e6 100644 > --- a/drivers/net/i40e/i40e_rxtx_vec_common.h > +++ b/drivers/net/i40e/i40e_rxtx_vec_common.h > @@ -95,17 +95,37 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq) > > n = txq->tx_rs_thresh; > > - /* first buffer to free from S/W ring is at index > - * tx_next_dd - (tx_rs_thresh-1) > - */ > + /* first buffer to free from S/W ring is at index > + * tx_next_dd - (tx_rs_thresh-1) > + */ > txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; > > if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) { > - for (i = 0; i < n; i++) { > - free[i] = txep[i].mbuf; > - /* no need to reset txep[i].mbuf in vector path */ > + struct rte_mempool *mp = txep[0].mbuf->pool; > + struct rte_mempool_cache *cache = > rte_mempool_default_cache(mp, rte_lcore_id()); > + > + if (!cache || n > RTE_MEMPOOL_CACHE_MAX_SIZE) { If the mempool has a cache, do not compare n to RTE_MEMPOOL_CACHE_MAX_SIZE. Instead, call rte_mempool_cache_zc_put_bulk() to determine if n is acceptable for zero-copy. It looks like this patch behaves incorrectly if the cache is configured to be smaller than RTE_MEMPOOL_CACHE_MAX_SIZE. Let's say the cache size is 8, which will make the flush threshold 12. If n is 32, your code will not enter this branch, but proceed to call rte_mempool_cache_zc_put_bulk(), which will return NULL, and then you will goto done. Obviously, if there is no cache, fall back to the standard rte_mempool_put_bulk(). > + for (i = 0; i < n ; i++) > + free[i] = txep[i].mbuf; > + if (!cache) { > + rte_mempool_generic_put(mp, (void **)free, n, > cache); > + goto done; > + } > + if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) { > + rte_mempool_ops_enqueue_bulk(mp, (void **)free, > n); > + goto done; > + } > + } > + void **cache_objs; > + > + cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n); > + if (cache_objs) { > + for (i = 0; i < n; i++) { > + cache_objs[i] = txep->mbuf; > + /* no need to reset txep[i].mbuf in vector path > */ > + txep++; > + } > } > - rte_mempool_put_bulk(free[0]->pool, (void **)free, n); > goto done; > } > > -- > 2.25.1 >
[PATCH v5 00/21] add support for cpfl PMD in DPDK
The patchset introduced the cpfl (Control Plane Function Library) PMD for Intel?? IPU E2100???s Configure Physical Function (Device ID: 0x1453) The cpfl PMD inherits all the features from idpf PMD which will follow an ongoing standard data plan function spec https://www.oasis-open.org/committees/tc_home.php?wg_abbrev=idpf Besides, it will also support more device specific hardware offloading features from DPDK???s control path (e.g.: hairpin, rte_flow ???). which is different from idpf PMD, and that's why we need a new cpfl PMD. This patchset mainly focuses on idpf PMD???s equivalent features. To avoid duplicated code, the patchset depends on below patchsets which move the common part from net/idpf into common/idpf as a shared library. v2 changes: - rebase to the new baseline. - Fix rss lut config issue. v3 changes: - rebase to the new baseline. v4 changes: - Resend v3. No code changed. v3 changes: - rebase to the new baseline. - optimize some code - give "not supported" tips when user want to config rss hash type - if stats reset fails at initialization time, don't rollback, just print ERROR info Mingxia Liu (21): net/cpfl: support device initialization net/cpfl: add Tx queue setup net/cpfl: add Rx queue setup net/cpfl: support device start and stop net/cpfl: support queue start net/cpfl: support queue stop net/cpfl: support queue release net/cpfl: support MTU configuration net/cpfl: support basic Rx data path net/cpfl: support basic Tx data path net/cpfl: support write back based on ITR expire net/cpfl: support RSS net/cpfl: support Rx offloading net/cpfl: support Tx offloading net/cpfl: add AVX512 data path for single queue model net/cpfl: support timestamp offload net/cpfl: add AVX512 data path for split queue model net/cpfl: add HW statistics net/cpfl: add RSS set/get ops net/cpfl: support scalar scatter Rx datapath for single queue model net/cpfl: add xstats ops MAINTAINERS |9 + doc/guides/nics/cpfl.rst| 88 ++ doc/guides/nics/features/cpfl.ini | 17 + doc/guides/rel_notes/release_23_03.rst |6 + drivers/net/cpfl/cpfl_ethdev.c | 1453 +++ drivers/net/cpfl/cpfl_ethdev.h | 95 ++ drivers/net/cpfl/cpfl_logs.h| 32 + drivers/net/cpfl/cpfl_rxtx.c| 952 +++ drivers/net/cpfl/cpfl_rxtx.h| 44 + drivers/net/cpfl/cpfl_rxtx_vec_common.h | 116 ++ drivers/net/cpfl/meson.build| 38 + drivers/net/meson.build |1 + 12 files changed, 2851 insertions(+) create mode 100644 doc/guides/nics/cpfl.rst create mode 100644 doc/guides/nics/features/cpfl.ini create mode 100644 drivers/net/cpfl/cpfl_ethdev.c create mode 100644 drivers/net/cpfl/cpfl_ethdev.h create mode 100644 drivers/net/cpfl/cpfl_logs.h create mode 100644 drivers/net/cpfl/cpfl_rxtx.c create mode 100644 drivers/net/cpfl/cpfl_rxtx.h create mode 100644 drivers/net/cpfl/cpfl_rxtx_vec_common.h create mode 100644 drivers/net/cpfl/meson.build -- 2.25.1
[PATCH v5 01/21] net/cpfl: support device initialization
Support device init and add the following dev ops: - dev_configure - dev_close - dev_infos_get - link_update - dev_supported_ptypes_get Signed-off-by: Mingxia Liu --- MAINTAINERS| 9 + doc/guides/nics/cpfl.rst | 66 +++ doc/guides/nics/features/cpfl.ini | 12 + doc/guides/rel_notes/release_23_03.rst | 6 + drivers/net/cpfl/cpfl_ethdev.c | 768 + drivers/net/cpfl/cpfl_ethdev.h | 78 +++ drivers/net/cpfl/cpfl_logs.h | 32 ++ drivers/net/cpfl/cpfl_rxtx.c | 244 drivers/net/cpfl/cpfl_rxtx.h | 25 + drivers/net/cpfl/meson.build | 14 + drivers/net/meson.build| 1 + 11 files changed, 1255 insertions(+) create mode 100644 doc/guides/nics/cpfl.rst create mode 100644 doc/guides/nics/features/cpfl.ini create mode 100644 drivers/net/cpfl/cpfl_ethdev.c create mode 100644 drivers/net/cpfl/cpfl_ethdev.h create mode 100644 drivers/net/cpfl/cpfl_logs.h create mode 100644 drivers/net/cpfl/cpfl_rxtx.c create mode 100644 drivers/net/cpfl/cpfl_rxtx.h create mode 100644 drivers/net/cpfl/meson.build diff --git a/MAINTAINERS b/MAINTAINERS index 9a0f416d2e..cf044c478b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -783,6 +783,15 @@ F: drivers/common/idpf/ F: doc/guides/nics/idpf.rst F: doc/guides/nics/features/idpf.ini +Intel cpfl +M: Qi Zhang +M: Jingjing Wu +M: Beilei Xing +T: git://dpdk.org/next/dpdk-next-net-intel +F: drivers/net/cpfl/ +F: doc/guides/nics/cpfl.rst +F: doc/guides/nics/features/cpfl.ini + Intel igc M: Junfeng Guo M: Simei Su diff --git a/doc/guides/nics/cpfl.rst b/doc/guides/nics/cpfl.rst new file mode 100644 index 00..7c5aff0789 --- /dev/null +++ b/doc/guides/nics/cpfl.rst @@ -0,0 +1,66 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2022 Intel Corporation. + +.. include:: + +CPFL Poll Mode Driver += + +The [*EXPERIMENTAL*] cpfl PMD (**librte_net_cpfl**) provides poll mode driver support +for Intel\ |reg| Infrastructure Processing Unit (Intel\ |reg| IPU) E2100. + + +Linux Prerequisites +--- + +Follow the DPDK :doc:`../linux_gsg/index` to setup the basic DPDK environment. + +To get better performance on Intel platforms, +please follow the :doc:`../linux_gsg/nic_perf_intel_platform`. + + +Pre-Installation Configuration +-- + +Runtime Config Options +~~ + +- ``vport`` (default ``0``) + + The PMD supports creation of multiple vports for one PCI device, + each vport corresponds to a single ethdev. + The user can specify the vports with specific ID to be created, for example:: + +-a ca:00.0,vport=[0,2,3] + + Then the PMD will create 3 vports (ethdevs) for device ``ca:00.0``. + + If the parameter is not provided, the vport 0 will be created by default. + +- ``rx_single`` (default ``0``) + + There are two queue modes supported by Intel\ |reg| IPU Ethernet E2100 Series, + single queue mode and split queue mode for Rx queue. + User can choose Rx queue mode, example:: + +-a ca:00.0,rx_single=1 + + Then the PMD will configure Rx queue with single queue mode. + Otherwise, split queue mode is chosen by default. + +- ``tx_single`` (default ``0``) + + There are two queue modes supported by Intel\ |reg| IPU Ethernet E2100 Series, + single queue mode and split queue mode for Tx queue. + User can choose Tx queue mode, example:: + +-a ca:00.0,tx_single=1 + + Then the PMD will configure Tx queue with single queue mode. + Otherwise, split queue mode is chosen by default. + + +Driver compilation and testing +-- + +Refer to the document :doc:`build_and_test` for details. \ No newline at end of file diff --git a/doc/guides/nics/features/cpfl.ini b/doc/guides/nics/features/cpfl.ini new file mode 100644 index 00..a2d1ca9e15 --- /dev/null +++ b/doc/guides/nics/features/cpfl.ini @@ -0,0 +1,12 @@ +; +; Supported features of the 'cpfl' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +; A feature with "P" indicates only be supported when non-vector path +; is selected. +; +[Features] +Linux= Y +x86-32 = Y +x86-64 = Y diff --git a/doc/guides/rel_notes/release_23_03.rst b/doc/guides/rel_notes/release_23_03.rst index 07914170a7..b0b23d1a44 100644 --- a/doc/guides/rel_notes/release_23_03.rst +++ b/doc/guides/rel_notes/release_23_03.rst @@ -88,6 +88,12 @@ New Features * Added timesync API support. * Added packet pacing(launch time offloading) support. +* **Added Intel cpfl driver.** + + Added the new ``cpfl`` net driver + for Intel\ |reg| Infrastructure Processing Unit (Intel\ |reg| IPU) E2100. + See the :doc:`../nics/cpfl` NIC guide for more details on this new driver. + Removed Items - diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c n
[PATCH v5 02/21] net/cpfl: add Tx queue setup
Add support for tx_queue_setup ops. In the single queue model, the same descriptor queue is used by SW to post buffer descriptors to HW and by HW to post completed descriptors to SW. In the split queue model, "RX buffer queues" are used to pass descriptor buffers from SW to HW while Rx queues are used only to pass the descriptor completions, that is, descriptors that point to completed buffers, from HW to SW. This is contrary to the single queue model in which Rx queues are used for both purposes. Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 13 + drivers/net/cpfl/cpfl_rxtx.c | 8 drivers/net/cpfl/meson.build | 1 + 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index e10c6346ba..abb9f8d617 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -12,6 +12,7 @@ #include #include "cpfl_ethdev.h" +#include "cpfl_rxtx.h" #define CPFL_TX_SINGLE_Q "tx_single" #define CPFL_RX_SINGLE_Q "rx_single" @@ -96,6 +97,17 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_mtu = vport->max_mtu; dev_info->min_mtu = RTE_ETHER_MIN_MTU; + dev_info->default_txconf = (struct rte_eth_txconf) { + .tx_free_thresh = CPFL_DEFAULT_TX_FREE_THRESH, + .tx_rs_thresh = CPFL_DEFAULT_TX_RS_THRESH, + }; + + dev_info->tx_desc_lim = (struct rte_eth_desc_lim) { + .nb_max = CPFL_MAX_RING_DESC, + .nb_min = CPFL_MIN_RING_DESC, + .nb_align = CPFL_ALIGN_RING_DESC, + }; + return 0; } @@ -513,6 +525,7 @@ cpfl_adapter_ext_init(struct rte_pci_device *pci_dev, struct cpfl_adapter_ext *a static const struct eth_dev_ops cpfl_eth_dev_ops = { .dev_configure = cpfl_dev_configure, .dev_close = cpfl_dev_close, + .tx_queue_setup = cpfl_tx_queue_setup, .dev_infos_get = cpfl_dev_info_get, .link_update= cpfl_dev_link_update, .dev_supported_ptypes_get = cpfl_dev_supported_ptypes_get, diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index 53ba2770de..e0f8484b19 100644 --- a/drivers/net/cpfl/cpfl_rxtx.c +++ b/drivers/net/cpfl/cpfl_rxtx.c @@ -130,7 +130,7 @@ cpfl_tx_complq_setup(struct rte_eth_dev *dev, struct idpf_tx_queue *txq, cq->tx_ring_phys_addr = mz->iova; cq->compl_ring = mz->addr; cq->mz = mz; - reset_split_tx_complq(cq); + idpf_qc_split_tx_complq_reset(cq); txq->complq = cq; @@ -164,7 +164,7 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, tx_conf->tx_rs_thresh : CPFL_DEFAULT_TX_RS_THRESH); tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh > 0) ? tx_conf->tx_free_thresh : CPFL_DEFAULT_TX_FREE_THRESH); - if (check_tx_thresh(nb_desc, tx_rs_thresh, tx_free_thresh) != 0) + if (idpf_qc_tx_thresh_check(nb_desc, tx_rs_thresh, tx_free_thresh) != 0) return -EINVAL; /* Allocate the TX queue data structure. */ @@ -215,10 +215,10 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, if (!is_splitq) { txq->tx_ring = mz->addr; - reset_single_tx_queue(txq); + idpf_qc_single_tx_queue_reset(txq); } else { txq->desc_ring = mz->addr; - reset_split_tx_descq(txq); + idpf_qc_split_tx_descq_reset(txq); /* Setup tx completion queue if split model */ ret = cpfl_tx_complq_setup(dev, txq, queue_idx, diff --git a/drivers/net/cpfl/meson.build b/drivers/net/cpfl/meson.build index c721732b50..1894423689 100644 --- a/drivers/net/cpfl/meson.build +++ b/drivers/net/cpfl/meson.build @@ -11,4 +11,5 @@ deps += ['common_idpf'] sources = files( 'cpfl_ethdev.c', +'cpfl_rxtx.c', ) \ No newline at end of file -- 2.25.1
[PATCH v5 03/21] net/cpfl: add Rx queue setup
Add support for rx_queue_setup ops. Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 11 ++ drivers/net/cpfl/cpfl_rxtx.c | 232 + drivers/net/cpfl/cpfl_rxtx.h | 6 + 3 files changed, 249 insertions(+) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index abb9f8d617..fb530c7690 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -102,12 +102,22 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .tx_rs_thresh = CPFL_DEFAULT_TX_RS_THRESH, }; + dev_info->default_rxconf = (struct rte_eth_rxconf) { + .rx_free_thresh = CPFL_DEFAULT_RX_FREE_THRESH, + }; + dev_info->tx_desc_lim = (struct rte_eth_desc_lim) { .nb_max = CPFL_MAX_RING_DESC, .nb_min = CPFL_MIN_RING_DESC, .nb_align = CPFL_ALIGN_RING_DESC, }; + dev_info->rx_desc_lim = (struct rte_eth_desc_lim) { + .nb_max = CPFL_MAX_RING_DESC, + .nb_min = CPFL_MIN_RING_DESC, + .nb_align = CPFL_ALIGN_RING_DESC, + }; + return 0; } @@ -525,6 +535,7 @@ cpfl_adapter_ext_init(struct rte_pci_device *pci_dev, struct cpfl_adapter_ext *a static const struct eth_dev_ops cpfl_eth_dev_ops = { .dev_configure = cpfl_dev_configure, .dev_close = cpfl_dev_close, + .rx_queue_setup = cpfl_rx_queue_setup, .tx_queue_setup = cpfl_tx_queue_setup, .dev_infos_get = cpfl_dev_info_get, .link_update= cpfl_dev_link_update, diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index e0f8484b19..4083e8c3b6 100644 --- a/drivers/net/cpfl/cpfl_rxtx.c +++ b/drivers/net/cpfl/cpfl_rxtx.c @@ -9,6 +9,25 @@ #include "cpfl_ethdev.h" #include "cpfl_rxtx.h" +static uint64_t +cpfl_rx_offload_convert(uint64_t offload) +{ + uint64_t ol = 0; + + if ((offload & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM) != 0) + ol |= IDPF_RX_OFFLOAD_IPV4_CKSUM; + if ((offload & RTE_ETH_RX_OFFLOAD_UDP_CKSUM) != 0) + ol |= IDPF_RX_OFFLOAD_UDP_CKSUM; + if ((offload & RTE_ETH_RX_OFFLOAD_TCP_CKSUM) != 0) + ol |= IDPF_RX_OFFLOAD_TCP_CKSUM; + if ((offload & RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM) != 0) + ol |= IDPF_RX_OFFLOAD_OUTER_IPV4_CKSUM; + if ((offload & RTE_ETH_RX_OFFLOAD_TIMESTAMP) != 0) + ol |= IDPF_RX_OFFLOAD_TIMESTAMP; + + return ol; +} + static uint64_t cpfl_tx_offload_convert(uint64_t offload) { @@ -94,6 +113,219 @@ cpfl_dma_zone_release(const struct rte_memzone *mz) rte_memzone_free(mz); } +static int +cpfl_rx_split_bufq_setup(struct rte_eth_dev *dev, struct idpf_rx_queue *rxq, +uint16_t queue_idx, uint16_t rx_free_thresh, +uint16_t nb_desc, unsigned int socket_id, +struct rte_mempool *mp, uint8_t bufq_id) +{ + struct idpf_vport *vport = dev->data->dev_private; + struct idpf_adapter *adapter = vport->adapter; + struct idpf_hw *hw = &adapter->hw; + const struct rte_memzone *mz; + struct idpf_rx_queue *bufq; + uint16_t len; + int ret; + + bufq = rte_zmalloc_socket("cpfl bufq", + sizeof(struct idpf_rx_queue), + RTE_CACHE_LINE_SIZE, + socket_id); + if (bufq == NULL) { + PMD_INIT_LOG(ERR, "Failed to allocate memory for rx buffer queue."); + ret = -ENOMEM; + goto err_bufq1_alloc; + } + + bufq->mp = mp; + bufq->nb_rx_desc = nb_desc; + bufq->rx_free_thresh = rx_free_thresh; + bufq->queue_id = vport->chunks_info.rx_buf_start_qid + queue_idx; + bufq->port_id = dev->data->port_id; + bufq->rx_hdr_len = 0; + bufq->adapter = adapter; + + len = rte_pktmbuf_data_room_size(bufq->mp) - RTE_PKTMBUF_HEADROOM; + bufq->rx_buf_len = len; + + /* Allocate a little more to support bulk allocate. */ + len = nb_desc + IDPF_RX_MAX_BURST; + + mz = cpfl_dma_zone_reserve(dev, queue_idx, len, + VIRTCHNL2_QUEUE_TYPE_RX_BUFFER, + socket_id, true); + if (mz == NULL) { + ret = -ENOMEM; + goto err_mz_reserve; + } + + bufq->rx_ring_phys_addr = mz->iova; + bufq->rx_ring = mz->addr; + bufq->mz = mz; + + bufq->sw_ring = + rte_zmalloc_socket("cpfl rx bufq sw ring", + sizeof(struct rte_mbuf *) * len, + RTE_CACHE_LINE_SIZE, + socket_id); + if (bufq->sw_ring == NULL) { +
[PATCH v5 04/21] net/cpfl: support device start and stop
Add dev ops dev_start, dev_stop and link_update. Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 35 ++ 1 file changed, 35 insertions(+) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index fb530c7690..423a8dcdcd 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -184,12 +184,45 @@ cpfl_dev_configure(struct rte_eth_dev *dev) return 0; } +static int +cpfl_dev_start(struct rte_eth_dev *dev) +{ + struct idpf_vport *vport = dev->data->dev_private; + int ret; + + ret = idpf_vc_vport_ena_dis(vport, true); + if (ret != 0) { + PMD_DRV_LOG(ERR, "Failed to enable vport"); + return ret; + } + + vport->stopped = 0; + + return 0; +} + +static int +cpfl_dev_stop(struct rte_eth_dev *dev) +{ + struct idpf_vport *vport = dev->data->dev_private; + + if (vport->stopped == 1) + return 0; + + idpf_vc_vport_ena_dis(vport, false); + + vport->stopped = 1; + + return 0; +} + static int cpfl_dev_close(struct rte_eth_dev *dev) { struct idpf_vport *vport = dev->data->dev_private; struct cpfl_adapter_ext *adapter = CPFL_ADAPTER_TO_EXT(vport->adapter); + cpfl_dev_stop(dev); idpf_vport_deinit(vport); adapter->cur_vports &= ~RTE_BIT32(vport->devarg_id); @@ -538,6 +571,8 @@ static const struct eth_dev_ops cpfl_eth_dev_ops = { .rx_queue_setup = cpfl_rx_queue_setup, .tx_queue_setup = cpfl_tx_queue_setup, .dev_infos_get = cpfl_dev_info_get, + .dev_start = cpfl_dev_start, + .dev_stop = cpfl_dev_stop, .link_update= cpfl_dev_link_update, .dev_supported_ptypes_get = cpfl_dev_supported_ptypes_get, }; -- 2.25.1
[PATCH v5 05/21] net/cpfl: support queue start
Add support for these device ops: - rx_queue_start - tx_queue_start Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 41 ++ drivers/net/cpfl/cpfl_rxtx.c | 138 + drivers/net/cpfl/cpfl_rxtx.h | 4 + 3 files changed, 183 insertions(+) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index 423a8dcdcd..60339c836d 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -184,12 +184,51 @@ cpfl_dev_configure(struct rte_eth_dev *dev) return 0; } +static int +cpfl_start_queues(struct rte_eth_dev *dev) +{ + struct idpf_rx_queue *rxq; + struct idpf_tx_queue *txq; + int err = 0; + int i; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + txq = dev->data->tx_queues[i]; + if (txq == NULL || txq->tx_deferred_start) + continue; + err = cpfl_tx_queue_start(dev, i); + if (err != 0) { + PMD_DRV_LOG(ERR, "Fail to start Tx queue %u", i); + return err; + } + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + if (rxq == NULL || rxq->rx_deferred_start) + continue; + err = cpfl_rx_queue_start(dev, i); + if (err != 0) { + PMD_DRV_LOG(ERR, "Fail to start Rx queue %u", i); + return err; + } + } + + return err; +} + static int cpfl_dev_start(struct rte_eth_dev *dev) { struct idpf_vport *vport = dev->data->dev_private; int ret; + ret = cpfl_start_queues(dev); + if (ret != 0) { + PMD_DRV_LOG(ERR, "Failed to start queues"); + return ret; + } + ret = idpf_vc_vport_ena_dis(vport, true); if (ret != 0) { PMD_DRV_LOG(ERR, "Failed to enable vport"); @@ -574,6 +613,8 @@ static const struct eth_dev_ops cpfl_eth_dev_ops = { .dev_start = cpfl_dev_start, .dev_stop = cpfl_dev_stop, .link_update= cpfl_dev_link_update, + .rx_queue_start = cpfl_rx_queue_start, + .tx_queue_start = cpfl_tx_queue_start, .dev_supported_ptypes_get = cpfl_dev_supported_ptypes_get, }; diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index 4083e8c3b6..2813e83a67 100644 --- a/drivers/net/cpfl/cpfl_rxtx.c +++ b/drivers/net/cpfl/cpfl_rxtx.c @@ -474,3 +474,141 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, err_txq_alloc: return ret; } + +int +cpfl_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id) +{ + struct idpf_rx_queue *rxq; + int err; + + if (rx_queue_id >= dev->data->nb_rx_queues) + return -EINVAL; + + rxq = dev->data->rx_queues[rx_queue_id]; + + if (rxq == NULL || !rxq->q_set) { + PMD_DRV_LOG(ERR, "RX queue %u not available or setup", + rx_queue_id); + return -EINVAL; + } + + if (rxq->bufq1 == NULL) { + /* Single queue */ + err = idpf_qc_single_rxq_mbufs_alloc(rxq); + if (err != 0) { + PMD_DRV_LOG(ERR, "Failed to allocate RX queue mbuf"); + return err; + } + + rte_wmb(); + + /* Init the RX tail register. */ + IDPF_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1); + } else { + /* Split queue */ + err = idpf_qc_split_rxq_mbufs_alloc(rxq->bufq1); + if (err != 0) { + PMD_DRV_LOG(ERR, "Failed to allocate RX buffer queue mbuf"); + return err; + } + err = idpf_qc_split_rxq_mbufs_alloc(rxq->bufq2); + if (err != 0) { + PMD_DRV_LOG(ERR, "Failed to allocate RX buffer queue mbuf"); + return err; + } + + rte_wmb(); + + /* Init the RX tail register. */ + IDPF_PCI_REG_WRITE(rxq->bufq1->qrx_tail, rxq->bufq1->rx_tail); + IDPF_PCI_REG_WRITE(rxq->bufq2->qrx_tail, rxq->bufq2->rx_tail); + } + + return err; +} + +int +cpfl_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) +{ + struct idpf_vport *vport = dev->data->dev_private; + struct idpf_rx_queue *rxq = + dev->data->rx_queues[rx_queue_id]; + int err = 0; + + err = idpf_vc_rxq_config(vport, rxq); + if (err != 0) { + PMD_DRV_LOG(ERR, "Fail to configure Rx queue %u", rx_queue_id); + return err; + } + + err = cpfl_rx_queue_init(dev, rx_
[PATCH v5 06/21] net/cpfl: support queue stop
Add support for these device ops: - rx_queue_stop - tx_queue_stop Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 10 +++- drivers/net/cpfl/cpfl_rxtx.c | 87 ++ drivers/net/cpfl/cpfl_rxtx.h | 3 ++ 3 files changed, 99 insertions(+), 1 deletion(-) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index 60339c836d..8ce7329b78 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -232,12 +232,16 @@ cpfl_dev_start(struct rte_eth_dev *dev) ret = idpf_vc_vport_ena_dis(vport, true); if (ret != 0) { PMD_DRV_LOG(ERR, "Failed to enable vport"); - return ret; + goto err_vport; } vport->stopped = 0; return 0; + +err_vport: + cpfl_stop_queues(dev); + return ret; } static int @@ -250,6 +254,8 @@ cpfl_dev_stop(struct rte_eth_dev *dev) idpf_vc_vport_ena_dis(vport, false); + cpfl_stop_queues(dev); + vport->stopped = 1; return 0; @@ -615,6 +621,8 @@ static const struct eth_dev_ops cpfl_eth_dev_ops = { .link_update= cpfl_dev_link_update, .rx_queue_start = cpfl_rx_queue_start, .tx_queue_start = cpfl_tx_queue_start, + .rx_queue_stop = cpfl_rx_queue_stop, + .tx_queue_stop = cpfl_tx_queue_stop, .dev_supported_ptypes_get = cpfl_dev_supported_ptypes_get, }; diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index 2813e83a67..ab5383a635 100644 --- a/drivers/net/cpfl/cpfl_rxtx.c +++ b/drivers/net/cpfl/cpfl_rxtx.c @@ -612,3 +612,90 @@ cpfl_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id) return err; } + +int +cpfl_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id) +{ + struct idpf_vport *vport = dev->data->dev_private; + struct idpf_rx_queue *rxq; + int err; + + if (rx_queue_id >= dev->data->nb_rx_queues) + return -EINVAL; + + err = idpf_vc_queue_switch(vport, rx_queue_id, true, false); + if (err != 0) { + PMD_DRV_LOG(ERR, "Failed to switch RX queue %u off", + rx_queue_id); + return err; + } + + rxq = dev->data->rx_queues[rx_queue_id]; + if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) { + rxq->ops->release_mbufs(rxq); + idpf_qc_single_rx_queue_reset(rxq); + } else { + rxq->bufq1->ops->release_mbufs(rxq->bufq1); + rxq->bufq2->ops->release_mbufs(rxq->bufq2); + idpf_qc_split_rx_queue_reset(rxq); + } + dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED; + + return 0; +} + +int +cpfl_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id) +{ + struct idpf_vport *vport = dev->data->dev_private; + struct idpf_tx_queue *txq; + int err; + + if (tx_queue_id >= dev->data->nb_tx_queues) + return -EINVAL; + + err = idpf_vc_queue_switch(vport, tx_queue_id, false, false); + if (err != 0) { + PMD_DRV_LOG(ERR, "Failed to switch TX queue %u off", + tx_queue_id); + return err; + } + + txq = dev->data->tx_queues[tx_queue_id]; + txq->ops->release_mbufs(txq); + if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE) { + idpf_qc_single_tx_queue_reset(txq); + } else { + idpf_qc_split_tx_descq_reset(txq); + idpf_qc_split_tx_complq_reset(txq->complq); + } + dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED; + + return 0; +} + +void +cpfl_stop_queues(struct rte_eth_dev *dev) +{ + struct idpf_rx_queue *rxq; + struct idpf_tx_queue *txq; + int i; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + if (rxq == NULL) + continue; + + if (cpfl_rx_queue_stop(dev, i) != 0) + PMD_DRV_LOG(WARNING, "Fail to stop Rx queue %d", i); + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + txq = dev->data->tx_queues[i]; + if (txq == NULL) + continue; + + if (cpfl_tx_queue_stop(dev, i) != 0) + PMD_DRV_LOG(WARNING, "Fail to stop Tx queue %d", i); + } +} diff --git a/drivers/net/cpfl/cpfl_rxtx.h b/drivers/net/cpfl/cpfl_rxtx.h index 716b2fefa4..e9b810deaa 100644 --- a/drivers/net/cpfl/cpfl_rxtx.h +++ b/drivers/net/cpfl/cpfl_rxtx.h @@ -32,4 +32,7 @@ int cpfl_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id); int cpfl_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id); int cpfl_tx_queue_init(struct rte_eth_dev *dev, uint16
[PATCH v5 07/21] net/cpfl: support queue release
Add support for queue operations: - rx_queue_release - tx_queue_release Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 2 ++ drivers/net/cpfl/cpfl_rxtx.c | 35 ++ drivers/net/cpfl/cpfl_rxtx.h | 2 ++ 3 files changed, 39 insertions(+) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index 8ce7329b78..f59ad56db2 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -623,6 +623,8 @@ static const struct eth_dev_ops cpfl_eth_dev_ops = { .tx_queue_start = cpfl_tx_queue_start, .rx_queue_stop = cpfl_rx_queue_stop, .tx_queue_stop = cpfl_tx_queue_stop, + .rx_queue_release = cpfl_dev_rx_queue_release, + .tx_queue_release = cpfl_dev_tx_queue_release, .dev_supported_ptypes_get = cpfl_dev_supported_ptypes_get, }; diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index ab5383a635..aa0f6bd792 100644 --- a/drivers/net/cpfl/cpfl_rxtx.c +++ b/drivers/net/cpfl/cpfl_rxtx.c @@ -49,6 +49,14 @@ cpfl_tx_offload_convert(uint64_t offload) return ol; } +static const struct idpf_rxq_ops def_rxq_ops = { + .release_mbufs = idpf_qc_rxq_mbufs_release, +}; + +static const struct idpf_txq_ops def_txq_ops = { + .release_mbufs = idpf_qc_txq_mbufs_release, +}; + static const struct rte_memzone * cpfl_dma_zone_reserve(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t len, uint16_t queue_type, @@ -177,6 +185,7 @@ cpfl_rx_split_bufq_setup(struct rte_eth_dev *dev, struct idpf_rx_queue *rxq, idpf_qc_split_rx_bufq_reset(bufq); bufq->qrx_tail = hw->hw_addr + (vport->chunks_info.rx_buf_qtail_start + queue_idx * vport->chunks_info.rx_buf_qtail_spacing); + bufq->ops = &def_rxq_ops; bufq->q_set = true; if (bufq_id == 1) { @@ -235,6 +244,12 @@ cpfl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, if (idpf_qc_rx_thresh_check(nb_desc, rx_free_thresh) != 0) return -EINVAL; + /* Free memory if needed */ + if (dev->data->rx_queues[queue_idx] != NULL) { + idpf_qc_rx_queue_release(dev->data->rx_queues[queue_idx]); + dev->data->rx_queues[queue_idx] = NULL; + } + /* Setup Rx queue */ rxq = rte_zmalloc_socket("cpfl rxq", sizeof(struct idpf_rx_queue), @@ -287,6 +302,7 @@ cpfl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, idpf_qc_single_rx_queue_reset(rxq); rxq->qrx_tail = hw->hw_addr + (vport->chunks_info.rx_qtail_start + queue_idx * vport->chunks_info.rx_qtail_spacing); + rxq->ops = &def_rxq_ops; } else { idpf_qc_split_rx_descq_reset(rxq); @@ -399,6 +415,12 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, if (idpf_qc_tx_thresh_check(nb_desc, tx_rs_thresh, tx_free_thresh) != 0) return -EINVAL; + /* Free memory if needed. */ + if (dev->data->tx_queues[queue_idx] != NULL) { + idpf_qc_tx_queue_release(dev->data->tx_queues[queue_idx]); + dev->data->tx_queues[queue_idx] = NULL; + } + /* Allocate the TX queue data structure. */ txq = rte_zmalloc_socket("cpfl txq", sizeof(struct idpf_tx_queue), @@ -461,6 +483,7 @@ cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start + queue_idx * vport->chunks_info.tx_qtail_spacing); + txq->ops = &def_txq_ops; txq->q_set = true; dev->data->tx_queues[queue_idx] = txq; @@ -674,6 +697,18 @@ cpfl_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id) return 0; } +void +cpfl_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) +{ + idpf_qc_rx_queue_release(dev->data->rx_queues[qid]); +} + +void +cpfl_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) +{ + idpf_qc_tx_queue_release(dev->data->tx_queues[qid]); +} + void cpfl_stop_queues(struct rte_eth_dev *dev) { diff --git a/drivers/net/cpfl/cpfl_rxtx.h b/drivers/net/cpfl/cpfl_rxtx.h index e9b810deaa..f5882401dc 100644 --- a/drivers/net/cpfl/cpfl_rxtx.h +++ b/drivers/net/cpfl/cpfl_rxtx.h @@ -35,4 +35,6 @@ int cpfl_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id); void cpfl_stop_queues(struct rte_eth_dev *dev); int cpfl_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id); int cpfl_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id); +void cpfl_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); +void cpfl_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); #endif /* _CPFL_RXTX_H_ */ -- 2.25.1
[PATCH v5 08/21] net/cpfl: support MTU configuration
Add dev ops mtu_set. Signed-off-by: Mingxia Liu --- doc/guides/nics/features/cpfl.ini | 1 + drivers/net/cpfl/cpfl_ethdev.c| 27 +++ 2 files changed, 28 insertions(+) diff --git a/doc/guides/nics/features/cpfl.ini b/doc/guides/nics/features/cpfl.ini index a2d1ca9e15..470ba81579 100644 --- a/doc/guides/nics/features/cpfl.ini +++ b/doc/guides/nics/features/cpfl.ini @@ -7,6 +7,7 @@ ; is selected. ; [Features] +MTU update = Y Linux= Y x86-32 = Y x86-64 = Y diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index f59ad56db2..19b5234ef4 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -121,6 +121,27 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) return 0; } +static int +cpfl_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct idpf_vport *vport = dev->data->dev_private; + + /* mtu setting is forbidden if port is start */ + if (dev->data->dev_started) { + PMD_DRV_LOG(ERR, "port must be stopped before configuration"); + return -EBUSY; + } + + if (mtu > vport->max_mtu) { + PMD_DRV_LOG(ERR, "MTU should be less than %d", vport->max_mtu); + return -EINVAL; + } + + vport->max_pkt_len = mtu + CPFL_ETH_OVERHEAD; + + return 0; +} + static const uint32_t * cpfl_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused) { @@ -142,6 +163,7 @@ cpfl_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused) static int cpfl_dev_configure(struct rte_eth_dev *dev) { + struct idpf_vport *vport = dev->data->dev_private; struct rte_eth_conf *conf = &dev->data->dev_conf; if (conf->link_speeds & RTE_ETH_LINK_SPEED_FIXED) { @@ -181,6 +203,10 @@ cpfl_dev_configure(struct rte_eth_dev *dev) return -ENOTSUP; } + vport->max_pkt_len = + (dev->data->mtu == 0) ? CPFL_DEFAULT_MTU : dev->data->mtu + + CPFL_ETH_OVERHEAD; + return 0; } @@ -625,6 +651,7 @@ static const struct eth_dev_ops cpfl_eth_dev_ops = { .tx_queue_stop = cpfl_tx_queue_stop, .rx_queue_release = cpfl_dev_rx_queue_release, .tx_queue_release = cpfl_dev_tx_queue_release, + .mtu_set= cpfl_dev_mtu_set, .dev_supported_ptypes_get = cpfl_dev_supported_ptypes_get, }; -- 2.25.1
[PATCH v5 09/21] net/cpfl: support basic Rx data path
Add basic Rx support in split queue mode and single queue mode. Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 2 ++ drivers/net/cpfl/cpfl_rxtx.c | 18 ++ drivers/net/cpfl/cpfl_rxtx.h | 1 + 3 files changed, 21 insertions(+) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index 19b5234ef4..cdbe0eede2 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -255,6 +255,8 @@ cpfl_dev_start(struct rte_eth_dev *dev) return ret; } + cpfl_set_rx_function(dev); + ret = idpf_vc_vport_ena_dis(vport, true); if (ret != 0) { PMD_DRV_LOG(ERR, "Failed to enable vport"); diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index aa0f6bd792..d583079fb6 100644 --- a/drivers/net/cpfl/cpfl_rxtx.c +++ b/drivers/net/cpfl/cpfl_rxtx.c @@ -734,3 +734,21 @@ cpfl_stop_queues(struct rte_eth_dev *dev) PMD_DRV_LOG(WARNING, "Fail to stop Tx queue %d", i); } } + +void +cpfl_set_rx_function(struct rte_eth_dev *dev) +{ + struct idpf_vport *vport = dev->data->dev_private; + + if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + PMD_DRV_LOG(NOTICE, + "Using Split Scalar Rx (port %d).", + dev->data->port_id); + dev->rx_pkt_burst = idpf_dp_splitq_recv_pkts; + } else { + PMD_DRV_LOG(NOTICE, + "Using Single Scalar Rx (port %d).", + dev->data->port_id); + dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts; + } +} diff --git a/drivers/net/cpfl/cpfl_rxtx.h b/drivers/net/cpfl/cpfl_rxtx.h index f5882401dc..a5dd388e1f 100644 --- a/drivers/net/cpfl/cpfl_rxtx.h +++ b/drivers/net/cpfl/cpfl_rxtx.h @@ -37,4 +37,5 @@ int cpfl_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id); int cpfl_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id); void cpfl_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); void cpfl_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); +void cpfl_set_rx_function(struct rte_eth_dev *dev); #endif /* _CPFL_RXTX_H_ */ -- 2.25.1
[PATCH v5 10/21] net/cpfl: support basic Tx data path
Add basic Tx support in split queue mode and single queue mode. Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 3 +++ drivers/net/cpfl/cpfl_rxtx.c | 20 drivers/net/cpfl/cpfl_rxtx.h | 1 + 3 files changed, 24 insertions(+) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index cdbe0eede2..b24fae8f3f 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -97,6 +97,8 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_mtu = vport->max_mtu; dev_info->min_mtu = RTE_ETHER_MIN_MTU; + dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS; + dev_info->default_txconf = (struct rte_eth_txconf) { .tx_free_thresh = CPFL_DEFAULT_TX_FREE_THRESH, .tx_rs_thresh = CPFL_DEFAULT_TX_RS_THRESH, @@ -256,6 +258,7 @@ cpfl_dev_start(struct rte_eth_dev *dev) } cpfl_set_rx_function(dev); + cpfl_set_tx_function(dev); ret = idpf_vc_vport_ena_dis(vport, true); if (ret != 0) { diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index d583079fb6..9c59b74c90 100644 --- a/drivers/net/cpfl/cpfl_rxtx.c +++ b/drivers/net/cpfl/cpfl_rxtx.c @@ -752,3 +752,23 @@ cpfl_set_rx_function(struct rte_eth_dev *dev) dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts; } } + +void +cpfl_set_tx_function(struct rte_eth_dev *dev) +{ + struct idpf_vport *vport = dev->data->dev_private; + + if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + PMD_DRV_LOG(NOTICE, + "Using Split Scalar Tx (port %d).", + dev->data->port_id); + dev->tx_pkt_burst = idpf_dp_splitq_xmit_pkts; + dev->tx_pkt_prepare = idpf_dp_prep_pkts; + } else { + PMD_DRV_LOG(NOTICE, + "Using Single Scalar Tx (port %d).", + dev->data->port_id); + dev->tx_pkt_burst = idpf_dp_singleq_xmit_pkts; + dev->tx_pkt_prepare = idpf_dp_prep_pkts; + } +} diff --git a/drivers/net/cpfl/cpfl_rxtx.h b/drivers/net/cpfl/cpfl_rxtx.h index a5dd388e1f..5f8144e55f 100644 --- a/drivers/net/cpfl/cpfl_rxtx.h +++ b/drivers/net/cpfl/cpfl_rxtx.h @@ -38,4 +38,5 @@ int cpfl_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id); void cpfl_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); void cpfl_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid); void cpfl_set_rx_function(struct rte_eth_dev *dev); +void cpfl_set_tx_function(struct rte_eth_dev *dev); #endif /* _CPFL_RXTX_H_ */ -- 2.25.1
[PATCH v5 11/21] net/cpfl: support write back based on ITR expire
Enable write back on ITR expire, then packets can be received one by Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 45 +- drivers/net/cpfl/cpfl_ethdev.h | 2 ++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index b24fae8f3f..c02e6c8e58 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -212,6 +212,15 @@ cpfl_dev_configure(struct rte_eth_dev *dev) return 0; } +static int +cpfl_config_rx_queues_irqs(struct rte_eth_dev *dev) +{ + struct idpf_vport *vport = dev->data->dev_private; + uint16_t nb_rx_queues = dev->data->nb_rx_queues; + + return idpf_vport_irq_map_config(vport, nb_rx_queues); +} + static int cpfl_start_queues(struct rte_eth_dev *dev) { @@ -249,12 +258,37 @@ static int cpfl_dev_start(struct rte_eth_dev *dev) { struct idpf_vport *vport = dev->data->dev_private; + struct idpf_adapter *base = vport->adapter; + struct cpfl_adapter_ext *adapter = CPFL_ADAPTER_TO_EXT(base); + uint16_t num_allocated_vectors = base->caps.num_allocated_vectors; + uint16_t req_vecs_num; int ret; + req_vecs_num = CPFL_DFLT_Q_VEC_NUM; + if (req_vecs_num + adapter->used_vecs_num > num_allocated_vectors) { + PMD_DRV_LOG(ERR, "The accumulated request vectors' number should be less than %d", + num_allocated_vectors); + ret = -EINVAL; + goto err_vec; + } + + ret = idpf_vc_vectors_alloc(vport, req_vecs_num); + if (ret != 0) { + PMD_DRV_LOG(ERR, "Failed to allocate interrupt vectors"); + goto err_vec; + } + adapter->used_vecs_num += req_vecs_num; + + ret = cpfl_config_rx_queues_irqs(dev); + if (ret != 0) { + PMD_DRV_LOG(ERR, "Failed to configure irqs"); + goto err_irq; + } + ret = cpfl_start_queues(dev); if (ret != 0) { PMD_DRV_LOG(ERR, "Failed to start queues"); - return ret; + goto err_startq; } cpfl_set_rx_function(dev); @@ -272,6 +306,11 @@ cpfl_dev_start(struct rte_eth_dev *dev) err_vport: cpfl_stop_queues(dev); +err_startq: + idpf_vport_irq_unmap_config(vport, dev->data->nb_rx_queues); +err_irq: + idpf_vc_vectors_dealloc(vport); +err_vec: return ret; } @@ -287,6 +326,10 @@ cpfl_dev_stop(struct rte_eth_dev *dev) cpfl_stop_queues(dev); + idpf_vport_irq_unmap_config(vport, dev->data->nb_rx_queues); + + idpf_vc_vectors_dealloc(vport); + vport->stopped = 1; return 0; diff --git a/drivers/net/cpfl/cpfl_ethdev.h b/drivers/net/cpfl/cpfl_ethdev.h index 9ca39b4558..cd7f560d19 100644 --- a/drivers/net/cpfl/cpfl_ethdev.h +++ b/drivers/net/cpfl/cpfl_ethdev.h @@ -24,6 +24,8 @@ #define CPFL_INVALID_VPORT_IDX 0x +#define CPFL_DFLT_Q_VEC_NUM1 + #define CPFL_MIN_BUF_SIZE 1024 #define CPFL_MAX_FRAME_SIZE9728 #define CPFL_DEFAULT_MTU RTE_ETHER_MTU -- 2.25.1
[PATCH v5 12/21] net/cpfl: support RSS
Add RSS support. Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 51 ++ drivers/net/cpfl/cpfl_ethdev.h | 15 ++ 2 files changed, 66 insertions(+) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index c02e6c8e58..cf5a968cad 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -97,6 +97,8 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_mtu = vport->max_mtu; dev_info->min_mtu = RTE_ETHER_MIN_MTU; + dev_info->flow_type_rss_offloads = CPFL_RSS_OFFLOAD_ALL; + dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS; dev_info->default_txconf = (struct rte_eth_txconf) { @@ -162,11 +164,49 @@ cpfl_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused) return ptypes; } +static int +cpfl_init_rss(struct idpf_vport *vport) +{ + struct rte_eth_rss_conf *rss_conf; + struct rte_eth_dev_data *dev_data; + uint16_t i, nb_q; + int ret = 0; + + dev_data = vport->dev_data; + rss_conf = &dev_data->dev_conf.rx_adv_conf.rss_conf; + nb_q = dev_data->nb_rx_queues; + + if (rss_conf->rss_key == NULL) { + for (i = 0; i < vport->rss_key_size; i++) + vport->rss_key[i] = (uint8_t)rte_rand(); + } else if (rss_conf->rss_key_len != vport->rss_key_size) { + PMD_INIT_LOG(ERR, "Invalid RSS key length in RSS configuration, should be %d", +vport->rss_key_size); + return -EINVAL; + } else { + rte_memcpy(vport->rss_key, rss_conf->rss_key, + vport->rss_key_size); + } + + for (i = 0; i < vport->rss_lut_size; i++) + vport->rss_lut[i] = i % nb_q; + + vport->rss_hf = IDPF_DEFAULT_RSS_HASH_EXPANDED; + + ret = idpf_vport_rss_config(vport); + if (ret != 0) + PMD_INIT_LOG(ERR, "Failed to configure RSS"); + + return ret; +} + static int cpfl_dev_configure(struct rte_eth_dev *dev) { struct idpf_vport *vport = dev->data->dev_private; struct rte_eth_conf *conf = &dev->data->dev_conf; + struct idpf_adapter *adapter = vport->adapter; + int ret; if (conf->link_speeds & RTE_ETH_LINK_SPEED_FIXED) { PMD_INIT_LOG(ERR, "Setting link speed is not supported"); @@ -205,6 +245,17 @@ cpfl_dev_configure(struct rte_eth_dev *dev) return -ENOTSUP; } + if (adapter->caps.rss_caps != 0 && dev->data->nb_rx_queues != 0) { + ret = cpfl_init_rss(vport); + if (ret != 0) { + PMD_INIT_LOG(ERR, "Failed to init rss"); + return ret; + } + } else { + PMD_INIT_LOG(ERR, "RSS is not supported."); + return -1; + } + vport->max_pkt_len = (dev->data->mtu == 0) ? CPFL_DEFAULT_MTU : dev->data->mtu + CPFL_ETH_OVERHEAD; diff --git a/drivers/net/cpfl/cpfl_ethdev.h b/drivers/net/cpfl/cpfl_ethdev.h index cd7f560d19..e00dff4bf0 100644 --- a/drivers/net/cpfl/cpfl_ethdev.h +++ b/drivers/net/cpfl/cpfl_ethdev.h @@ -36,6 +36,21 @@ #define CPFL_ETH_OVERHEAD \ (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + CPFL_VLAN_TAG_SIZE * 2) +#define CPFL_RSS_OFFLOAD_ALL ( \ + RTE_ETH_RSS_IPV4| \ + RTE_ETH_RSS_FRAG_IPV4 | \ + RTE_ETH_RSS_NONFRAG_IPV4_TCP| \ + RTE_ETH_RSS_NONFRAG_IPV4_UDP| \ + RTE_ETH_RSS_NONFRAG_IPV4_SCTP | \ + RTE_ETH_RSS_NONFRAG_IPV4_OTHER | \ + RTE_ETH_RSS_IPV6| \ + RTE_ETH_RSS_FRAG_IPV6 | \ + RTE_ETH_RSS_NONFRAG_IPV6_TCP| \ + RTE_ETH_RSS_NONFRAG_IPV6_UDP| \ + RTE_ETH_RSS_NONFRAG_IPV6_SCTP | \ + RTE_ETH_RSS_NONFRAG_IPV6_OTHER | \ + RTE_ETH_RSS_L2_PAYLOAD) + #define CPFL_ADAPTER_NAME_LEN (PCI_PRI_STR_SIZE + 1) #define CPFL_ALARM_INTERVAL5 /* us */ -- 2.25.1
[PATCH v5 13/21] net/cpfl: support Rx offloading
Add Rx offloading support: - support CHKSUM and RSS offload for split queue model - support CHKSUM offload for single queue model Signed-off-by: Mingxia Liu --- doc/guides/nics/features/cpfl.ini | 2 ++ drivers/net/cpfl/cpfl_ethdev.c| 6 ++ 2 files changed, 8 insertions(+) diff --git a/doc/guides/nics/features/cpfl.ini b/doc/guides/nics/features/cpfl.ini index 470ba81579..ee5948f444 100644 --- a/doc/guides/nics/features/cpfl.ini +++ b/doc/guides/nics/features/cpfl.ini @@ -8,6 +8,8 @@ ; [Features] MTU update = Y +L3 checksum offload = P +L4 checksum offload = P Linux= Y x86-32 = Y x86-64 = Y diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index cf5a968cad..3c0145303e 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -99,6 +99,12 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->flow_type_rss_offloads = CPFL_RSS_OFFLOAD_ALL; + dev_info->rx_offload_capa = + RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | + RTE_ETH_RX_OFFLOAD_UDP_CKSUM| + RTE_ETH_RX_OFFLOAD_TCP_CKSUM| + RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM; + dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS; dev_info->default_txconf = (struct rte_eth_txconf) { -- 2.25.1
[PATCH v5 14/21] net/cpfl: support Tx offloading
Add Tx offloading support: - support TSO for single queue model and split queue model. Signed-off-by: Mingxia Liu --- doc/guides/nics/features/cpfl.ini | 1 + drivers/net/cpfl/cpfl_ethdev.c| 8 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/guides/nics/features/cpfl.ini b/doc/guides/nics/features/cpfl.ini index ee5948f444..f4e45c7c68 100644 --- a/doc/guides/nics/features/cpfl.ini +++ b/doc/guides/nics/features/cpfl.ini @@ -8,6 +8,7 @@ ; [Features] MTU update = Y +TSO = P L3 checksum offload = P L4 checksum offload = P Linux= Y diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index 3c0145303e..a0bdfb5ca4 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -105,7 +105,13 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) RTE_ETH_RX_OFFLOAD_TCP_CKSUM| RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM; - dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS; + dev_info->tx_offload_capa = + RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | + RTE_ETH_TX_OFFLOAD_UDP_CKSUM| + RTE_ETH_TX_OFFLOAD_TCP_CKSUM| + RTE_ETH_TX_OFFLOAD_SCTP_CKSUM | + RTE_ETH_TX_OFFLOAD_TCP_TSO | + RTE_ETH_TX_OFFLOAD_MULTI_SEGS; dev_info->default_txconf = (struct rte_eth_txconf) { .tx_free_thresh = CPFL_DEFAULT_TX_FREE_THRESH, -- 2.25.1
[PATCH v5 15/21] net/cpfl: add AVX512 data path for single queue model
Add support of AVX512 vector data path for single queue model. Signed-off-by: Wenjun Wu Signed-off-by: Mingxia Liu --- doc/guides/nics/cpfl.rst| 24 +- drivers/net/cpfl/cpfl_ethdev.c | 3 +- drivers/net/cpfl/cpfl_rxtx.c| 94 ++ drivers/net/cpfl/cpfl_rxtx_vec_common.h | 100 drivers/net/cpfl/meson.build| 25 +- 5 files changed, 243 insertions(+), 3 deletions(-) create mode 100644 drivers/net/cpfl/cpfl_rxtx_vec_common.h diff --git a/doc/guides/nics/cpfl.rst b/doc/guides/nics/cpfl.rst index 7c5aff0789..f0018b41df 100644 --- a/doc/guides/nics/cpfl.rst +++ b/doc/guides/nics/cpfl.rst @@ -63,4 +63,26 @@ Runtime Config Options Driver compilation and testing -- -Refer to the document :doc:`build_and_test` for details. \ No newline at end of file +Refer to the document :doc:`build_and_test` for details. + +Features + + +Vector PMD +~~ + +Vector path for Rx and Tx path are selected automatically. +The paths are chosen based on 2 conditions: + +- ``CPU`` + + On the x86 platform, the driver checks if the CPU supports AVX512. + If the CPU supports AVX512 and EAL argument ``--force-max-simd-bitwidth`` + is set to 512, AVX512 paths will be chosen. + +- ``Offload features`` + + The supported HW offload features are described in the document cpfl.ini, + A value "P" means the offload feature is not supported by vector path. + If any not supported features are used, cpfl vector PMD is disabled + and the scalar paths are chosen. diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index a0bdfb5ca4..9d921b4355 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -111,7 +111,8 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) RTE_ETH_TX_OFFLOAD_TCP_CKSUM| RTE_ETH_TX_OFFLOAD_SCTP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_TSO | - RTE_ETH_TX_OFFLOAD_MULTI_SEGS; + RTE_ETH_TX_OFFLOAD_MULTI_SEGS | + RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; dev_info->default_txconf = (struct rte_eth_txconf) { .tx_free_thresh = CPFL_DEFAULT_TX_FREE_THRESH, diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index 9c59b74c90..f1119b27e1 100644 --- a/drivers/net/cpfl/cpfl_rxtx.c +++ b/drivers/net/cpfl/cpfl_rxtx.c @@ -8,6 +8,7 @@ #include "cpfl_ethdev.h" #include "cpfl_rxtx.h" +#include "cpfl_rxtx_vec_common.h" static uint64_t cpfl_rx_offload_convert(uint64_t offload) @@ -735,11 +736,61 @@ cpfl_stop_queues(struct rte_eth_dev *dev) } } + void cpfl_set_rx_function(struct rte_eth_dev *dev) { struct idpf_vport *vport = dev->data->dev_private; +#ifdef RTE_ARCH_X86 + struct idpf_rx_queue *rxq; + int i; + + if (cpfl_rx_vec_dev_check_default(dev) == CPFL_VECTOR_PATH && + rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) { + vport->rx_vec_allowed = true; + if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) +#ifdef CC_AVX512_SUPPORT + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 && + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1) + vport->rx_use_avx512 = true; +#else + PMD_DRV_LOG(NOTICE, + "AVX512 is not supported in build env"); +#endif /* CC_AVX512_SUPPORT */ + } else { + vport->rx_vec_allowed = false; + } +#endif /* RTE_ARCH_X86 */ + +#ifdef RTE_ARCH_X86 + if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + PMD_DRV_LOG(NOTICE, + "Using Split Scalar Rx (port %d).", + dev->data->port_id); + dev->rx_pkt_burst = idpf_dp_splitq_recv_pkts; + } else { + if (vport->rx_vec_allowed) { + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + (void)idpf_qc_singleq_rx_vec_setup(rxq); + } +#ifdef CC_AVX512_SUPPORT + if (vport->rx_use_avx512) { + PMD_DRV_LOG(NOTICE, + "Using Single AVX512 Vector Rx (port %d).", + dev->data->port_id); + dev->rx_pkt_burst = idpf_dp_singleq_recv_pkts_avx512; + return; + } +#endif /* CC_AVX512_SUPPORT */ + } + PMD_DRV_LOG(NOTICE, + "Using Single Scalar Rx (port %d).", + dev->data->port_id); + dev->rx_pkt_burst = idpf_dp_singleq_recv
[PATCH v5 16/21] net/cpfl: support timestamp offload
Add support for timestamp offload. Signed-off-by: Mingxia Liu --- doc/guides/nics/features/cpfl.ini | 1 + drivers/net/cpfl/cpfl_ethdev.c| 3 ++- drivers/net/cpfl/cpfl_rxtx.c | 7 +++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/guides/nics/features/cpfl.ini b/doc/guides/nics/features/cpfl.ini index f4e45c7c68..c1209df3e5 100644 --- a/doc/guides/nics/features/cpfl.ini +++ b/doc/guides/nics/features/cpfl.ini @@ -11,6 +11,7 @@ MTU update = Y TSO = P L3 checksum offload = P L4 checksum offload = P +Timestamp offload= P Linux= Y x86-32 = Y x86-64 = Y diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index 9d921b4355..5393b32922 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -103,7 +103,8 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM| RTE_ETH_RX_OFFLOAD_TCP_CKSUM| - RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM; + RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | + RTE_ETH_RX_OFFLOAD_TIMESTAMP; dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index f1119b27e1..c81e830c6a 100644 --- a/drivers/net/cpfl/cpfl_rxtx.c +++ b/drivers/net/cpfl/cpfl_rxtx.c @@ -516,6 +516,13 @@ cpfl_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id) return -EINVAL; } + err = idpf_qc_ts_mbuf_register(rxq); + if (err != 0) { + PMD_DRV_LOG(ERR, "fail to register timestamp mbuf %u", + rx_queue_id); + return -EIO; + } + if (rxq->bufq1 == NULL) { /* Single queue */ err = idpf_qc_single_rxq_mbufs_alloc(rxq); -- 2.25.1
[PATCH v5 17/21] net/cpfl: add AVX512 data path for split queue model
Add support of AVX512 data path for split queue model. Signed-off-by: Wenjun Wu Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_rxtx.c| 56 +++-- drivers/net/cpfl/cpfl_rxtx_vec_common.h | 20 - 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index c81e830c6a..d55ce9696d 100644 --- a/drivers/net/cpfl/cpfl_rxtx.c +++ b/drivers/net/cpfl/cpfl_rxtx.c @@ -759,7 +759,8 @@ cpfl_set_rx_function(struct rte_eth_dev *dev) if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) #ifdef CC_AVX512_SUPPORT if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 && - rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1) + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 && + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512DQ)) vport->rx_use_avx512 = true; #else PMD_DRV_LOG(NOTICE, @@ -772,6 +773,21 @@ cpfl_set_rx_function(struct rte_eth_dev *dev) #ifdef RTE_ARCH_X86 if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + if (vport->rx_vec_allowed) { + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + (void)idpf_qc_splitq_rx_vec_setup(rxq); + } +#ifdef CC_AVX512_SUPPORT + if (vport->rx_use_avx512) { + PMD_DRV_LOG(NOTICE, + "Using Split AVX512 Vector Rx (port %d).", + dev->data->port_id); + dev->rx_pkt_burst = idpf_dp_splitq_recv_pkts_avx512; + return; + } +#endif /* CC_AVX512_SUPPORT */ + } PMD_DRV_LOG(NOTICE, "Using Split Scalar Rx (port %d).", dev->data->port_id); @@ -827,9 +843,17 @@ cpfl_set_tx_function(struct rte_eth_dev *dev) vport->tx_vec_allowed = true; if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) #ifdef CC_AVX512_SUPPORT + { if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 && rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1) vport->tx_use_avx512 = true; + if (vport->tx_use_avx512) { + for (i = 0; i < dev->data->nb_tx_queues; i++) { + txq = dev->data->tx_queues[i]; + idpf_qc_tx_vec_avx512_setup(txq); + } + } + } #else PMD_DRV_LOG(NOTICE, "AVX512 is not supported in build env"); @@ -839,14 +863,26 @@ cpfl_set_tx_function(struct rte_eth_dev *dev) } #endif /* RTE_ARCH_X86 */ +#ifdef RTE_ARCH_X86 if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + if (vport->tx_vec_allowed) { +#ifdef CC_AVX512_SUPPORT + if (vport->tx_use_avx512) { + PMD_DRV_LOG(NOTICE, + "Using Split AVX512 Vector Tx (port %d).", + dev->data->port_id); + dev->tx_pkt_burst = idpf_dp_splitq_xmit_pkts_avx512; + dev->tx_pkt_prepare = idpf_dp_prep_pkts; + return; + } +#endif /* CC_AVX512_SUPPORT */ + } PMD_DRV_LOG(NOTICE, "Using Split Scalar Tx (port %d).", dev->data->port_id); dev->tx_pkt_burst = idpf_dp_splitq_xmit_pkts; dev->tx_pkt_prepare = idpf_dp_prep_pkts; } else { -#ifdef RTE_ARCH_X86 if (vport->tx_vec_allowed) { #ifdef CC_AVX512_SUPPORT if (vport->tx_use_avx512) { @@ -865,11 +901,25 @@ cpfl_set_tx_function(struct rte_eth_dev *dev) } #endif /* CC_AVX512_SUPPORT */ } -#endif /* RTE_ARCH_X86 */ PMD_DRV_LOG(NOTICE, "Using Single Scalar Tx (port %d).", dev->data->port_id); dev->tx_pkt_burst = idpf_dp_singleq_xmit_pkts; dev->tx_pkt_prepare = idpf_dp_prep_pkts; } +#else + if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + PMD_DRV_LOG(NOTICE, + "Using Split Scalar Tx (port %d).", + dev->data->port_id); + dev->tx_pkt_burst = idpf_dp_split
[PATCH v5 18/21] net/cpfl: add HW statistics
This patch add hardware packets/bytes statistics. Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 86 ++ 1 file changed, 86 insertions(+) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index 5393b32922..c6ae8039fb 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -178,6 +178,87 @@ cpfl_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused) return ptypes; } +static uint64_t +cpfl_get_mbuf_alloc_failed_stats(struct rte_eth_dev *dev) +{ + uint64_t mbuf_alloc_failed = 0; + struct idpf_rx_queue *rxq; + int i = 0; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + mbuf_alloc_failed += __atomic_load_n(&rxq->rx_stats.mbuf_alloc_failed, +__ATOMIC_RELAXED); + } + + return mbuf_alloc_failed; +} + +static int +cpfl_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + struct idpf_vport *vport = + (struct idpf_vport *)dev->data->dev_private; + struct virtchnl2_vport_stats *pstats = NULL; + int ret; + + ret = idpf_vc_stats_query(vport, &pstats); + if (ret == 0) { + uint8_t crc_stats_len = (dev->data->dev_conf.rxmode.offloads & +RTE_ETH_RX_OFFLOAD_KEEP_CRC) ? 0 : +RTE_ETHER_CRC_LEN; + + idpf_vport_stats_update(&vport->eth_stats_offset, pstats); + stats->ipackets = pstats->rx_unicast + pstats->rx_multicast + + pstats->rx_broadcast - pstats->rx_discards; + stats->opackets = pstats->tx_broadcast + pstats->tx_multicast + + pstats->tx_unicast; + stats->imissed = pstats->rx_discards; + stats->oerrors = pstats->tx_errors + pstats->tx_discards; + stats->ibytes = pstats->rx_bytes; + stats->ibytes -= stats->ipackets * crc_stats_len; + stats->obytes = pstats->tx_bytes; + + dev->data->rx_mbuf_alloc_failed = cpfl_get_mbuf_alloc_failed_stats(dev); + stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; + } else { + PMD_DRV_LOG(ERR, "Get statistics failed"); + } + return ret; +} + +static void +cpfl_reset_mbuf_alloc_failed_stats(struct rte_eth_dev *dev) +{ + struct idpf_rx_queue *rxq; + int i; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxq = dev->data->rx_queues[i]; + __atomic_store_n(&rxq->rx_stats.mbuf_alloc_failed, 0, __ATOMIC_RELAXED); + } +} + +static int +cpfl_dev_stats_reset(struct rte_eth_dev *dev) +{ + struct idpf_vport *vport = + (struct idpf_vport *)dev->data->dev_private; + struct virtchnl2_vport_stats *pstats = NULL; + int ret; + + ret = idpf_vc_stats_query(vport, &pstats); + if (ret != 0) + return ret; + + /* set stats offset base on current values */ + vport->eth_stats_offset = *pstats; + + cpfl_reset_mbuf_alloc_failed_stats(dev); + + return 0; +} + static int cpfl_init_rss(struct idpf_vport *vport) { @@ -365,6 +446,9 @@ cpfl_dev_start(struct rte_eth_dev *dev) goto err_vport; } + if (cpfl_dev_stats_reset(dev)) + PMD_DRV_LOG(ERR, "Failed to reset stats"); + vport->stopped = 0; return 0; @@ -766,6 +850,8 @@ static const struct eth_dev_ops cpfl_eth_dev_ops = { .tx_queue_release = cpfl_dev_tx_queue_release, .mtu_set= cpfl_dev_mtu_set, .dev_supported_ptypes_get = cpfl_dev_supported_ptypes_get, + .stats_get = cpfl_dev_stats_get, + .stats_reset= cpfl_dev_stats_reset, }; static uint16_t -- 2.25.1
[PATCH v5 19/21] net/cpfl: add RSS set/get ops
Add support for these device ops: - rss_reta_update - rss_reta_query - rss_hash_update - rss_hash_conf_get Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 268 + 1 file changed, 268 insertions(+) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index c6ae8039fb..c657f9c7cc 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -30,6 +30,56 @@ static const char * const cpfl_valid_args[] = { NULL }; +static const uint64_t cpfl_map_hena_rss[] = { + [IDPF_HASH_NONF_UNICAST_IPV4_UDP] = + RTE_ETH_RSS_NONFRAG_IPV4_UDP, + [IDPF_HASH_NONF_MULTICAST_IPV4_UDP] = + RTE_ETH_RSS_NONFRAG_IPV4_UDP, + [IDPF_HASH_NONF_IPV4_UDP] = + RTE_ETH_RSS_NONFRAG_IPV4_UDP, + [IDPF_HASH_NONF_IPV4_TCP_SYN_NO_ACK] = + RTE_ETH_RSS_NONFRAG_IPV4_TCP, + [IDPF_HASH_NONF_IPV4_TCP] = + RTE_ETH_RSS_NONFRAG_IPV4_TCP, + [IDPF_HASH_NONF_IPV4_SCTP] = + RTE_ETH_RSS_NONFRAG_IPV4_SCTP, + [IDPF_HASH_NONF_IPV4_OTHER] = + RTE_ETH_RSS_NONFRAG_IPV4_OTHER, + [IDPF_HASH_FRAG_IPV4] = RTE_ETH_RSS_FRAG_IPV4, + + /* IPv6 */ + [IDPF_HASH_NONF_UNICAST_IPV6_UDP] = + RTE_ETH_RSS_NONFRAG_IPV6_UDP, + [IDPF_HASH_NONF_MULTICAST_IPV6_UDP] = + RTE_ETH_RSS_NONFRAG_IPV6_UDP, + [IDPF_HASH_NONF_IPV6_UDP] = + RTE_ETH_RSS_NONFRAG_IPV6_UDP, + [IDPF_HASH_NONF_IPV6_TCP_SYN_NO_ACK] = + RTE_ETH_RSS_NONFRAG_IPV6_TCP, + [IDPF_HASH_NONF_IPV6_TCP] = + RTE_ETH_RSS_NONFRAG_IPV6_TCP, + [IDPF_HASH_NONF_IPV6_SCTP] = + RTE_ETH_RSS_NONFRAG_IPV6_SCTP, + [IDPF_HASH_NONF_IPV6_OTHER] = + RTE_ETH_RSS_NONFRAG_IPV6_OTHER, + [IDPF_HASH_FRAG_IPV6] = RTE_ETH_RSS_FRAG_IPV6, + + /* L2 Payload */ + [IDPF_HASH_L2_PAYLOAD] = RTE_ETH_RSS_L2_PAYLOAD +}; + +static const uint64_t cpfl_ipv4_rss = RTE_ETH_RSS_NONFRAG_IPV4_UDP | + RTE_ETH_RSS_NONFRAG_IPV4_TCP | + RTE_ETH_RSS_NONFRAG_IPV4_SCTP | + RTE_ETH_RSS_NONFRAG_IPV4_OTHER | + RTE_ETH_RSS_FRAG_IPV4; + +static const uint64_t cpfl_ipv6_rss = RTE_ETH_RSS_NONFRAG_IPV6_UDP | + RTE_ETH_RSS_NONFRAG_IPV6_TCP | + RTE_ETH_RSS_NONFRAG_IPV6_SCTP | + RTE_ETH_RSS_NONFRAG_IPV6_OTHER | + RTE_ETH_RSS_FRAG_IPV6; + static int cpfl_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete) @@ -97,6 +147,9 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_mtu = vport->max_mtu; dev_info->min_mtu = RTE_ETHER_MIN_MTU; + dev_info->hash_key_size = vport->rss_key_size; + dev_info->reta_size = vport->rss_lut_size; + dev_info->flow_type_rss_offloads = CPFL_RSS_OFFLOAD_ALL; dev_info->rx_offload_capa = @@ -259,6 +312,36 @@ cpfl_dev_stats_reset(struct rte_eth_dev *dev) return 0; } +static int cpfl_config_rss_hf(struct idpf_vport *vport, uint64_t rss_hf) +{ + uint64_t hena = 0; + uint16_t i; + + /** +* RTE_ETH_RSS_IPV4 and RTE_ETH_RSS_IPV6 can be considered as 2 +* generalizations of all other IPv4 and IPv6 RSS types. +*/ + if (rss_hf & RTE_ETH_RSS_IPV4) + rss_hf |= cpfl_ipv4_rss; + + if (rss_hf & RTE_ETH_RSS_IPV6) + rss_hf |= cpfl_ipv6_rss; + + for (i = 0; i < RTE_DIM(cpfl_map_hena_rss); i++) { + if (cpfl_map_hena_rss[i] & rss_hf) + hena |= BIT_ULL(i); + } + + /** +* At present, cp doesn't process the virtual channel msg of rss_hf configuration, +* tips are given below. +*/ + if (hena != vport->rss_hf) + PMD_DRV_LOG(WARNING, "Updating RSS Hash Function is not supported at present."); + + return 0; +} + static int cpfl_init_rss(struct idpf_vport *vport) { @@ -295,6 +378,187 @@ cpfl_init_rss(struct idpf_vport *vport) return ret; } +static int +cpfl_rss_reta_update(struct rte_eth_dev *dev, +struct rte_eth_rss_reta_entry64 *reta_conf, +uint16_t reta_size) +{ + struct idpf_vport *vport = dev->data->dev_private; + struct idpf_adapter *adapter = vport->adapter; + uint16_t idx, shift; + int ret = 0; + uint16_t i; + + if (adapter->caps.rss_caps == 0 || dev->data->nb_rx_queues == 0) { + PMD_DRV_LOG(DEBUG, "RSS is not supported"); + return -ENOTSUP; + } + + if (reta_size != vport->rss_lut_size) { + PMD_DRV_LOG(ERR, "T
[PATCH v5 20/21] net/cpfl: support scalar scatter Rx datapath for single queue model
This patch add single q recv scatter Rx function. Signed-off-by: Wenjun Wu Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 3 ++- drivers/net/cpfl/cpfl_rxtx.c | 27 +++ drivers/net/cpfl/cpfl_rxtx.h | 2 ++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index c657f9c7cc..a97d9b4494 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -157,7 +157,8 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) RTE_ETH_RX_OFFLOAD_UDP_CKSUM| RTE_ETH_RX_OFFLOAD_TCP_CKSUM| RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | - RTE_ETH_RX_OFFLOAD_TIMESTAMP; + RTE_ETH_RX_OFFLOAD_TIMESTAMP| + RTE_ETH_RX_OFFLOAD_SCATTER; dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c index d55ce9696d..f4b76e0f90 100644 --- a/drivers/net/cpfl/cpfl_rxtx.c +++ b/drivers/net/cpfl/cpfl_rxtx.c @@ -503,6 +503,8 @@ int cpfl_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id) { struct idpf_rx_queue *rxq; + uint16_t max_pkt_len; + uint32_t frame_size; int err; if (rx_queue_id >= dev->data->nb_rx_queues) @@ -516,6 +518,17 @@ cpfl_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id) return -EINVAL; } + frame_size = dev->data->mtu + CPFL_ETH_OVERHEAD; + + max_pkt_len = + RTE_MIN((uint32_t)CPFL_SUPPORT_CHAIN_NUM * rxq->rx_buf_len, + frame_size); + + rxq->max_pkt_len = max_pkt_len; + if ((dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER) || + frame_size > rxq->rx_buf_len) + dev->data->scattered_rx = 1; + err = idpf_qc_ts_mbuf_register(rxq); if (err != 0) { PMD_DRV_LOG(ERR, "fail to register timestamp mbuf %u", @@ -808,6 +821,13 @@ cpfl_set_rx_function(struct rte_eth_dev *dev) } #endif /* CC_AVX512_SUPPORT */ } + if (dev->data->scattered_rx) { + PMD_DRV_LOG(NOTICE, + "Using Single Scalar Scatterd Rx (port %d).", + dev->data->port_id); + dev->rx_pkt_burst = idpf_dp_singleq_recv_scatter_pkts; + return; + } PMD_DRV_LOG(NOTICE, "Using Single Scalar Rx (port %d).", dev->data->port_id); @@ -820,6 +840,13 @@ cpfl_set_rx_function(struct rte_eth_dev *dev) dev->data->port_id); dev->rx_pkt_burst = idpf_dp_splitq_recv_pkts; } else { + if (dev->data->scattered_rx) { + PMD_DRV_LOG(NOTICE, + "Using Single Scalar Scatterd Rx (port %d).", + dev->data->port_id); + dev->rx_pkt_burst = idpf_dp_singleq_recv_scatter_pkts; + return; + } PMD_DRV_LOG(NOTICE, "Using Single Scalar Rx (port %d).", dev->data->port_id); diff --git a/drivers/net/cpfl/cpfl_rxtx.h b/drivers/net/cpfl/cpfl_rxtx.h index 5f8144e55f..fb267d38c8 100644 --- a/drivers/net/cpfl/cpfl_rxtx.h +++ b/drivers/net/cpfl/cpfl_rxtx.h @@ -21,6 +21,8 @@ #define CPFL_DEFAULT_TX_RS_THRESH 32 #define CPFL_DEFAULT_TX_FREE_THRESH32 +#define CPFL_SUPPORT_CHAIN_NUM 5 + int cpfl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, const struct rte_eth_txconf *tx_conf); -- 2.25.1
[PATCH v5 21/21] net/cpfl: add xstats ops
Add support for these device ops: - dev_xstats_get - dev_xstats_get_names - dev_xstats_reset Signed-off-by: Mingxia Liu --- drivers/net/cpfl/cpfl_ethdev.c | 80 ++ 1 file changed, 80 insertions(+) diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c index a97d9b4494..1f6c9aa248 100644 --- a/drivers/net/cpfl/cpfl_ethdev.c +++ b/drivers/net/cpfl/cpfl_ethdev.c @@ -80,6 +80,30 @@ static const uint64_t cpfl_ipv6_rss = RTE_ETH_RSS_NONFRAG_IPV6_UDP | RTE_ETH_RSS_NONFRAG_IPV6_OTHER | RTE_ETH_RSS_FRAG_IPV6; +struct rte_cpfl_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned int offset; +}; + +static const struct rte_cpfl_xstats_name_off rte_cpfl_stats_strings[] = { + {"rx_bytes", offsetof(struct virtchnl2_vport_stats, rx_bytes)}, + {"rx_unicast_packets", offsetof(struct virtchnl2_vport_stats, rx_unicast)}, + {"rx_multicast_packets", offsetof(struct virtchnl2_vport_stats, rx_multicast)}, + {"rx_broadcast_packets", offsetof(struct virtchnl2_vport_stats, rx_broadcast)}, + {"rx_dropped_packets", offsetof(struct virtchnl2_vport_stats, rx_discards)}, + {"rx_errors", offsetof(struct virtchnl2_vport_stats, rx_errors)}, + {"rx_unknown_protocol_packets", offsetof(struct virtchnl2_vport_stats, +rx_unknown_protocol)}, + {"tx_bytes", offsetof(struct virtchnl2_vport_stats, tx_bytes)}, + {"tx_unicast_packets", offsetof(struct virtchnl2_vport_stats, tx_unicast)}, + {"tx_multicast_packets", offsetof(struct virtchnl2_vport_stats, tx_multicast)}, + {"tx_broadcast_packets", offsetof(struct virtchnl2_vport_stats, tx_broadcast)}, + {"tx_dropped_packets", offsetof(struct virtchnl2_vport_stats, tx_discards)}, + {"tx_error_packets", offsetof(struct virtchnl2_vport_stats, tx_errors)}}; + +#define CPFL_NB_XSTATS (sizeof(rte_cpfl_stats_strings) / \ + sizeof(rte_cpfl_stats_strings[0])) + static int cpfl_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete) @@ -313,6 +337,59 @@ cpfl_dev_stats_reset(struct rte_eth_dev *dev) return 0; } +static int cpfl_dev_xstats_reset(struct rte_eth_dev *dev) +{ + cpfl_dev_stats_reset(dev); + return 0; +} + +static int cpfl_dev_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *xstats, unsigned int n) +{ + struct idpf_vport *vport = + (struct idpf_vport *)dev->data->dev_private; + struct virtchnl2_vport_stats *pstats = NULL; + unsigned int i; + int ret; + + if (n < CPFL_NB_XSTATS) + return CPFL_NB_XSTATS; + + if (!xstats) + return 0; + + ret = idpf_vc_stats_query(vport, &pstats); + if (ret) { + PMD_DRV_LOG(ERR, "Get statistics failed"); + return 0; + } + + idpf_vport_stats_update(&vport->eth_stats_offset, pstats); + + /* loop over xstats array and values from pstats */ + for (i = 0; i < CPFL_NB_XSTATS; i++) { + xstats[i].id = i; + xstats[i].value = *(uint64_t *)(((char *)pstats) + + rte_cpfl_stats_strings[i].offset); + } + return CPFL_NB_XSTATS; +} + +static int cpfl_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev, +struct rte_eth_xstat_name *xstats_names, +__rte_unused unsigned int limit) +{ + unsigned int i; + + if (xstats_names) + for (i = 0; i < CPFL_NB_XSTATS; i++) { + snprintf(xstats_names[i].name, +sizeof(xstats_names[i].name), +"%s", rte_cpfl_stats_strings[i].name); + } + return CPFL_NB_XSTATS; +} + static int cpfl_config_rss_hf(struct idpf_vport *vport, uint64_t rss_hf) { uint64_t hena = 0; @@ -1121,6 +1198,9 @@ static const struct eth_dev_ops cpfl_eth_dev_ops = { .reta_query = cpfl_rss_reta_query, .rss_hash_update= cpfl_rss_hash_update, .rss_hash_conf_get = cpfl_rss_hash_conf_get, + .xstats_get = cpfl_dev_xstats_get, + .xstats_get_names = cpfl_dev_xstats_get_names, + .xstats_reset = cpfl_dev_xstats_reset, }; static uint16_t -- 2.25.1
[PATCH v10 0/5] lcore telemetry improvements
This is a follow up on previous work by Kevin Laatz: http://patches.dpdk.org/project/dpdk/list/?series=24658&state=* This series is aimed at allowing DPDK applications to expose their CPU usage stats in the DPDK telemetry under /eal/lcore/info. This is a much more basic and naive approach which leaves the cpu cycles accounting completely up to the application. For reference, I have implemented a draft patch in OvS to use rte_lcore_register_usage_cb() and report the already available busy cycles information. https://github.com/rjarry/ovs/commit/643e672fe388e348ea7ccbbda6f5a87a066fd919 v10: - Code style fix - Fixed reset of total_cycles while lcore is running v9: - Fixed changelog & version.map order. - Updated with 64-bit integer telemetry functions. - Refined docstrings (added notice about resetting the callback). - Fixed accounting of total cycles in testpmd. Robin Jarry (5): eal: add lcore info in telemetry eal: report applications lcore usage app/testpmd: add dump command for lcores app/testpmd: report lcore usage eal: add lcore usage telemetry endpoint app/test-pmd/cmdline.c | 3 + app/test-pmd/noisy_vnf.c| 8 +- app/test-pmd/testpmd.c | 44 +++- app/test-pmd/testpmd.h | 25 ++- doc/guides/rel_notes/release_23_03.rst | 8 + doc/guides/testpmd_app_ug/testpmd_funcs.rst | 7 + lib/eal/common/eal_common_lcore.c | 222 ++-- lib/eal/include/rte_lcore.h | 48 + lib/eal/version.map | 1 + 9 files changed, 335 insertions(+), 31 deletions(-) -- 2.39.1
[PATCH v10 1/5] eal: add lcore info in telemetry
Report the same information than rte_lcore_dump() in the telemetry API into /eal/lcore/list and /eal/lcore/info,ID. Example: --> /eal/lcore/info,3 { "/eal/lcore/info": { "lcore_id": 3, "socket": 0, "role": "RTE", "cpuset": [ 3 ] } } Signed-off-by: Robin Jarry Acked-by: Morten Brørup Reviewed-by: Kevin Laatz --- Notes: v9 -> v10: s/info->lcore_id != lcore_id/lcore_id != info->lcore_id/ lib/eal/common/eal_common_lcore.c | 126 +- 1 file changed, 108 insertions(+), 18 deletions(-) diff --git a/lib/eal/common/eal_common_lcore.c b/lib/eal/common/eal_common_lcore.c index 06c594b0224f..d45a40831393 100644 --- a/lib/eal/common/eal_common_lcore.c +++ b/lib/eal/common/eal_common_lcore.c @@ -10,6 +10,9 @@ #include #include #include +#ifndef RTE_EXEC_ENV_WINDOWS +#include +#endif #include "eal_private.h" #include "eal_thread.h" @@ -419,35 +422,35 @@ rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg) return ret; } +static const char * +lcore_role_str(enum rte_lcore_role_t role) +{ + switch (role) { + case ROLE_RTE: + return "RTE"; + case ROLE_SERVICE: + return "SERVICE"; + case ROLE_NON_EAL: + return "NON_EAL"; + default: + return "UNKNOWN"; + } +} + static int lcore_dump_cb(unsigned int lcore_id, void *arg) { struct rte_config *cfg = rte_eal_get_configuration(); char cpuset[RTE_CPU_AFFINITY_STR_LEN]; - const char *role; FILE *f = arg; int ret; - switch (cfg->lcore_role[lcore_id]) { - case ROLE_RTE: - role = "RTE"; - break; - case ROLE_SERVICE: - role = "SERVICE"; - break; - case ROLE_NON_EAL: - role = "NON_EAL"; - break; - default: - role = "UNKNOWN"; - break; - } - ret = eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset, cpuset, sizeof(cpuset)); fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s\n", lcore_id, - rte_lcore_to_socket_id(lcore_id), role, cpuset, - ret == 0 ? "" : "..."); + rte_lcore_to_socket_id(lcore_id), + lcore_role_str(cfg->lcore_role[lcore_id]), + cpuset, ret == 0 ? "" : "..."); return 0; } @@ -456,3 +459,90 @@ rte_lcore_dump(FILE *f) { rte_lcore_iterate(lcore_dump_cb, f); } + +#ifndef RTE_EXEC_ENV_WINDOWS +static int +lcore_telemetry_id_cb(unsigned int lcore_id, void *arg) +{ + struct rte_tel_data *d = arg; + return rte_tel_data_add_array_int(d, lcore_id); +} + +static int +handle_lcore_list(const char *cmd __rte_unused, + const char *params __rte_unused, + struct rte_tel_data *d) +{ + int ret; + + ret = rte_tel_data_start_array(d, RTE_TEL_INT_VAL); + if (ret == 0) + ret = rte_lcore_iterate(lcore_telemetry_id_cb, d); + + return ret; +} + +struct lcore_telemetry_info { + unsigned int lcore_id; + struct rte_tel_data *d; +}; + +static int +lcore_telemetry_info_cb(unsigned int lcore_id, void *arg) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + struct lcore_telemetry_info *info = arg; + struct rte_tel_data *cpuset; + unsigned int cpu; + + if (lcore_id != info->lcore_id) + return 0; + + rte_tel_data_start_dict(info->d); + rte_tel_data_add_dict_int(info->d, "lcore_id", lcore_id); + rte_tel_data_add_dict_int(info->d, "socket", rte_lcore_to_socket_id(lcore_id)); + rte_tel_data_add_dict_string(info->d, "role", lcore_role_str(cfg->lcore_role[lcore_id])); + cpuset = rte_tel_data_alloc(); + if (cpuset == NULL) + return -ENOMEM; + rte_tel_data_start_array(cpuset, RTE_TEL_INT_VAL); + for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { + if (CPU_ISSET(cpu, &lcore_config[lcore_id].cpuset)) + rte_tel_data_add_array_int(cpuset, cpu); + } + rte_tel_data_add_dict_container(info->d, "cpuset", cpuset, 0); + + return 0; +} + +static int +handle_lcore_info(const char *cmd __rte_unused, const char *params, struct rte_tel_data *d) +{ + struct lcore_telemetry_info info = { .d = d }; + unsigned long lcore_id; + char *endptr; + + if (params == NULL) + return -EINVAL; + errno = 0; + lcore_id = strtoul(params, &endptr, 10); + if (errno) + return -errno; + if (*params == '\0' || *endptr != '\0' || lcore_id >= RTE_MAX_LCORE) + return -EINVAL; + + info.lcore_id = lcore_id; + + return rte_lcore_iterate(lcore_telemetry_info_cb, &info); +} + +RTE_INIT(lcore_telemetry) +{ + rte_telemetry_register_cmd( + "/eal/lcore/list", handle_lcore_list, +
[PATCH v10 2/5] eal: report applications lcore usage
Allow applications to register a callback that will be invoked in rte_lcore_dump() and when requesting lcore info in the telemetry API. The callback is expected to return the number of TSC cycles that have passed since application start and the number of these cycles that were spent doing busy work. Signed-off-by: Robin Jarry Acked-by: Morten Brørup Reviewed-by: Kevin Laatz --- Notes: v9 -> v10: no change doc/guides/rel_notes/release_23_03.rst | 7 lib/eal/common/eal_common_lcore.c | 40 +++-- lib/eal/include/rte_lcore.h| 48 ++ lib/eal/version.map| 1 + 4 files changed, 94 insertions(+), 2 deletions(-) diff --git a/doc/guides/rel_notes/release_23_03.rst b/doc/guides/rel_notes/release_23_03.rst index 1fa101c420cd..17d38d5ea264 100644 --- a/doc/guides/rel_notes/release_23_03.rst +++ b/doc/guides/rel_notes/release_23_03.rst @@ -55,6 +55,13 @@ New Features Also, make sure to start the actual text at the margin. === +* **Added support for reporting lcore usage in applications.** + + * The ``/eal/lcore/list`` and ``/eal/lcore/info`` telemetry endpoints have +been added to provide information similar to ``rte_lcore_dump()``. + * Applications can register a callback at startup via +``rte_lcore_register_usage_cb()`` to provide lcore usage information. + * **Updated AMD axgbe driver.** * Added multi-process support. diff --git a/lib/eal/common/eal_common_lcore.c b/lib/eal/common/eal_common_lcore.c index d45a40831393..8fcdebd87692 100644 --- a/lib/eal/common/eal_common_lcore.c +++ b/lib/eal/common/eal_common_lcore.c @@ -2,6 +2,7 @@ * Copyright(c) 2010-2014 Intel Corporation */ +#include #include #include @@ -437,20 +438,45 @@ lcore_role_str(enum rte_lcore_role_t role) } } +static rte_lcore_usage_cb lcore_usage_cb; + +void +rte_lcore_register_usage_cb(rte_lcore_usage_cb cb) +{ + lcore_usage_cb = cb; +} + static int lcore_dump_cb(unsigned int lcore_id, void *arg) { struct rte_config *cfg = rte_eal_get_configuration(); char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + struct rte_lcore_usage usage; + rte_lcore_usage_cb usage_cb; + char *usage_str = NULL; FILE *f = arg; int ret; + /* The callback may not set all the fields in the structure, so clear it here. */ + memset(&usage, 0, sizeof(usage)); + /* Guard against concurrent modification of lcore_usage_cb. */ + usage_cb = lcore_usage_cb; + if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0) { + if (asprintf(&usage_str, ", busy cycles %"PRIu64"/%"PRIu64, + usage.busy_cycles, usage.total_cycles) < 0) { + return -ENOMEM; + } + } ret = eal_thread_dump_affinity(&lcore_config[lcore_id].cpuset, cpuset, sizeof(cpuset)); - fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s\n", lcore_id, + fprintf(f, "lcore %u, socket %u, role %s, cpuset %s%s%s\n", lcore_id, rte_lcore_to_socket_id(lcore_id), lcore_role_str(cfg->lcore_role[lcore_id]), - cpuset, ret == 0 ? "" : "..."); + cpuset, ret == 0 ? "" : "...", + usage_str != NULL ? usage_str : ""); + + free(usage_str); + return 0; } @@ -492,7 +518,9 @@ lcore_telemetry_info_cb(unsigned int lcore_id, void *arg) { struct rte_config *cfg = rte_eal_get_configuration(); struct lcore_telemetry_info *info = arg; + struct rte_lcore_usage usage; struct rte_tel_data *cpuset; + rte_lcore_usage_cb usage_cb; unsigned int cpu; if (lcore_id != info->lcore_id) @@ -511,6 +539,14 @@ lcore_telemetry_info_cb(unsigned int lcore_id, void *arg) rte_tel_data_add_array_int(cpuset, cpu); } rte_tel_data_add_dict_container(info->d, "cpuset", cpuset, 0); + /* The callback may not set all the fields in the structure, so clear it here. */ + memset(&usage, 0, sizeof(usage)); + /* Guard against concurrent modification of lcore_usage_cb. */ + usage_cb = lcore_usage_cb; + if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0) { + rte_tel_data_add_dict_uint(info->d, "total_cycles", usage.total_cycles); + rte_tel_data_add_dict_uint(info->d, "busy_cycles", usage.busy_cycles); + } return 0; } diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h index 9c7865052100..30f83f4d578c 100644 --- a/lib/eal/include/rte_lcore.h +++ b/lib/eal/include/rte_lcore.h @@ -328,6 +328,54 @@ typedef int (*rte_lcore_iterate_cb)(unsigned int lcore_id, void *arg); int rte_lcore_iterate(rte_lcore_iterate_cb cb, void *arg); +/** + * lcore usage statistics. + */ +struct rte_lcore_usage { + /** +* Th
[PATCH v10 5/5] eal: add lcore usage telemetry endpoint
Allow fetching CPU cycles usage for all lcores with a single request. This endpoint is intended for repeated and frequent invocations by external monitoring systems and therefore returns condensed data. It consists of a single dictionary with three keys: "lcore_ids", "total_cycles" and "busy_cycles" that are mapped to three arrays of integer values. Each array has the same number of values, one per lcore, in the same order. Example: --> /eal/lcore/usage { "/eal/lcore/usage": { "lcore_ids": [ 4, 5 ], "total_cycles": [ 23846845590, 23900558914 ], "busy_cycles": [ 21043446682, 21448837316 ] } } Signed-off-by: Robin Jarry Reviewed-by: Kevin Laatz --- Notes: v9 -> v10: no change doc/guides/rel_notes/release_23_03.rst | 5 ++- lib/eal/common/eal_common_lcore.c | 60 ++ 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/doc/guides/rel_notes/release_23_03.rst b/doc/guides/rel_notes/release_23_03.rst index 17d38d5ea264..4f2878846829 100644 --- a/doc/guides/rel_notes/release_23_03.rst +++ b/doc/guides/rel_notes/release_23_03.rst @@ -57,8 +57,9 @@ New Features * **Added support for reporting lcore usage in applications.** - * The ``/eal/lcore/list`` and ``/eal/lcore/info`` telemetry endpoints have -been added to provide information similar to ``rte_lcore_dump()``. + * The ``/eal/lcore/list``, ``/eal/lcore/usage`` and ``/eal/lcore/info`` +telemetry endpoints have been added to provide information similar to +``rte_lcore_dump()``. * Applications can register a callback at startup via ``rte_lcore_register_usage_cb()`` to provide lcore usage information. diff --git a/lib/eal/common/eal_common_lcore.c b/lib/eal/common/eal_common_lcore.c index 8fcdebd87692..837acbe4142a 100644 --- a/lib/eal/common/eal_common_lcore.c +++ b/lib/eal/common/eal_common_lcore.c @@ -572,6 +572,63 @@ handle_lcore_info(const char *cmd __rte_unused, const char *params, struct rte_t return rte_lcore_iterate(lcore_telemetry_info_cb, &info); } +struct lcore_telemetry_usage { + struct rte_tel_data *lcore_ids; + struct rte_tel_data *total_cycles; + struct rte_tel_data *busy_cycles; +}; + +static int +lcore_telemetry_usage_cb(unsigned int lcore_id, void *arg) +{ + struct lcore_telemetry_usage *u = arg; + struct rte_lcore_usage usage; + rte_lcore_usage_cb usage_cb; + + /* The callback may not set all the fields in the structure, so clear it here. */ + memset(&usage, 0, sizeof(usage)); + /* Guard against concurrent modification of lcore_usage_cb. */ + usage_cb = lcore_usage_cb; + if (usage_cb != NULL && usage_cb(lcore_id, &usage) == 0) { + rte_tel_data_add_array_uint(u->lcore_ids, lcore_id); + rte_tel_data_add_array_uint(u->total_cycles, usage.total_cycles); + rte_tel_data_add_array_uint(u->busy_cycles, usage.busy_cycles); + } + + return 0; +} + +static int +handle_lcore_usage(const char *cmd __rte_unused, + const char *params __rte_unused, + struct rte_tel_data *d) +{ + struct lcore_telemetry_usage usage; + struct rte_tel_data *lcore_ids = rte_tel_data_alloc(); + struct rte_tel_data *total_cycles = rte_tel_data_alloc(); + struct rte_tel_data *busy_cycles = rte_tel_data_alloc(); + + if (!lcore_ids || !total_cycles || !busy_cycles) { + rte_tel_data_free(lcore_ids); + rte_tel_data_free(total_cycles); + rte_tel_data_free(busy_cycles); + return -ENOMEM; + } + + rte_tel_data_start_dict(d); + rte_tel_data_start_array(lcore_ids, RTE_TEL_UINT_VAL); + rte_tel_data_start_array(total_cycles, RTE_TEL_UINT_VAL); + rte_tel_data_start_array(busy_cycles, RTE_TEL_UINT_VAL); + rte_tel_data_add_dict_container(d, "lcore_ids", lcore_ids, 0); + rte_tel_data_add_dict_container(d, "total_cycles", total_cycles, 0); + rte_tel_data_add_dict_container(d, "busy_cycles", busy_cycles, 0); + usage.lcore_ids = lcore_ids; + usage.total_cycles = total_cycles; + usage.busy_cycles = busy_cycles; + + return rte_lcore_iterate(lcore_telemetry_usage_cb, &usage); +} + RTE_INIT(lcore_telemetry) { rte_telemetry_register_cmd( @@ -580,5 +637,8 @@ RTE_INIT(lcore_telemetry) rte_telemetry_register_cmd( "/eal/lcore/info", handle_lcore_info, "Returns lcore info. Parameters: int lcore_id"); + rte_telemetry_register_cmd( + "/eal/lcore/usage", handle_lcore_usage, + "Returns lcore cycles usage. Takes no parameters"); } #endif /* !RTE_EXEC_ENV_WINDOWS */ -- 2.39.1
[PATCH v10 3/5] app/testpmd: add dump command for lcores
Add a simple command that calls rte_lcore_dump(). Signed-off-by: Robin Jarry Acked-by: Morten Brørup Acked-by: Konstantin Ananyev Reviewed-by: Kevin Laatz --- Notes: v9 -> v10: no change app/test-pmd/cmdline.c | 3 +++ doc/guides/testpmd_app_ug/testpmd_funcs.rst | 7 +++ 2 files changed, 10 insertions(+) diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index cb8c174020b0..bb7ff2b44989 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -8357,6 +8357,8 @@ static void cmd_dump_parsed(void *parsed_result, rte_mempool_list_dump(stdout); else if (!strcmp(res->dump, "dump_devargs")) rte_devargs_dump(stdout); + else if (!strcmp(res->dump, "dump_lcores")) + rte_lcore_dump(stdout); else if (!strcmp(res->dump, "dump_log_types")) rte_log_dump(stdout); } @@ -8370,6 +8372,7 @@ static cmdline_parse_token_string_t cmd_dump_dump = "dump_ring#" "dump_mempool#" "dump_devargs#" + "dump_lcores#" "dump_log_types"); static cmdline_parse_inst_t cmd_dump = { diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/doc/guides/testpmd_app_ug/testpmd_funcs.rst index 79a1fa9cb73d..9ceb21dfbbdf 100644 --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst @@ -591,6 +591,13 @@ Dumps the user device list:: testpmd> dump_devargs +dump lcores +~~~ + +Dumps the logical cores list:: + + testpmd> dump_lcores + dump log types ~~ -- 2.39.1
[PATCH v10 4/5] app/testpmd: report lcore usage
The --record-core-cycles option already accounts for busy cycles. One turn of packet_fwd_t is considered "busy" if there was at least one received or transmitted packet. Rename core_cycles to busy_cycles in struct fwd_stream to make it more explicit. Add total_cycles to struct fwd_lcore. Add cycles accounting in noisy_vnf where it was missing. When --record-core-cycles is specified, register a callback with rte_lcore_register_usage_cb() and update total_cycles every turn of lcore loop based on a starting tsc value. In the callback, resolve the proper struct fwd_lcore based on lcore_id and return the lcore total_cycles and the sum of busy_cycles of all its fwd_streams. This makes the cycles counters available in rte_lcore_dump() and the lcore telemetry API: testpmd> dump_lcores lcore 3, socket 0, role RTE, cpuset 3 lcore 4, socket 0, role RTE, cpuset 4, busy cycles 1228584096/9239923140 lcore 5, socket 0, role RTE, cpuset 5, busy cycles 1255661768/9218141538 --> /eal/lcore/info,4 { "/eal/lcore/info": { "lcore_id": 4, "socket": 0, "role": "RTE", "cpuset": [ 4 ], "busy_cycles": 10623340318, "total_cycles": 55331167354 } } Signed-off-by: Robin Jarry Acked-by: Morten Brørup Acked-by: Konstantin Ananyev Reviewed-by: Kevin Laatz --- Notes: v9 -> v10: Fixed reset of total_cycles without stopping app/test-pmd/noisy_vnf.c | 8 +++- app/test-pmd/testpmd.c | 44 +--- app/test-pmd/testpmd.h | 25 +++ 3 files changed, 64 insertions(+), 13 deletions(-) diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c index c65ec6f06a5c..ce5a3e5e6987 100644 --- a/app/test-pmd/noisy_vnf.c +++ b/app/test-pmd/noisy_vnf.c @@ -144,6 +144,7 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) struct noisy_config *ncf = noisy_cfg[fs->rx_port]; struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; struct rte_mbuf *tmp_pkts[MAX_PKT_BURST]; + uint64_t start_tsc = 0; uint16_t nb_deqd = 0; uint16_t nb_rx = 0; uint16_t nb_tx = 0; @@ -153,6 +154,8 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) bool needs_flush = false; uint64_t now; + get_start_cycles(&start_tsc); + nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst, nb_pkt_per_burst); inc_rx_burst_stats(fs, nb_rx); @@ -169,7 +172,7 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) inc_tx_burst_stats(fs, nb_tx); fs->tx_packets += nb_tx; fs->fwd_dropped += drop_pkts(pkts_burst, nb_rx, nb_tx); - return; + goto end; } fifo_free = rte_ring_free_count(ncf->f); @@ -219,6 +222,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs) fs->fwd_dropped += drop_pkts(tmp_pkts, nb_deqd, sent); ncf->prev_time = rte_get_timer_cycles(); } +end: + if (nb_rx > 0 || nb_tx > 0) + get_end_cycles(fs, start_tsc); } #define NOISY_STRSIZE 256 diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index e366f81a0f46..d02f96df7570 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -2053,7 +2053,7 @@ fwd_stats_display(void) fs->rx_bad_outer_ip_csum; if (record_core_cycles) - fwd_cycles += fs->core_cycles; + fwd_cycles += fs->busy_cycles; } for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) { pt_id = fwd_ports_ids[i]; @@ -2145,7 +2145,7 @@ fwd_stats_display(void) else total_pkts = total_recv; - printf("\n CPU cycles/packet=%.2F (total cycles=" + printf("\n CPU cycles/packet=%.2F (busy cycles=" "%"PRIu64" / total %s packets=%"PRIu64") at %"PRIu64 " MHz Clock\n", (double) fwd_cycles / total_pkts, @@ -2184,8 +2184,10 @@ fwd_stats_reset(void) memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats)); memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats)); - fs->core_cycles = 0; + fs->busy_cycles = 0; } + for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) + fwd_lcores[i]->total_cycles = 0; } static void @@ -2248,6 +2250,7 @@ static void run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd) { struct fwd_stream **fsm; + uint64_t prev_tsc; streamid_t nb_fs; streamid_t sm_id; #ifdef RTE_LIB_BITRATESTATS @@ -2262,6 +2265,7 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd) #endif fsm = &fwd_streams[fc->stream_idx]; nb_fs = fc->stream_nb; + prev_tsc = rte_rdtsc(); do { for (sm_id = 0; sm_id < nb
[PATCH v6 1/4] pcapng: comment option support for epb
This change enhances rte_pcapng_copy to have comment in enhanced packet block. Signed-off-by: Amit Prakash Shukla --- v2: - Fixed code style issue - Fixed CI compilation issue on github-robot v3: - Code review suggestion from Stephen - Fixed potential memory leak v4: - Code review suggestion from Jerin v5: - Code review suggestion from Jerin v6: - Squashing test graph param initialize fix app/test/test_pcapng.c | 4 ++-- doc/guides/rel_notes/release_23_03.rst | 2 ++ lib/pcapng/rte_pcapng.c| 10 +- lib/pcapng/rte_pcapng.h| 4 +++- lib/pdump/rte_pdump.c | 2 +- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index edba46d1fe..b8429a02f1 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -146,7 +146,7 @@ test_write_packets(void) struct rte_mbuf *mc; mc = rte_pcapng_copy(port_id, 0, orig, mp, pkt_len, - rte_get_tsc_cycles(), 0); + rte_get_tsc_cycles(), 0, NULL); if (mc == NULL) { fprintf(stderr, "Cannot copy packet\n"); return -1; @@ -262,7 +262,7 @@ test_write_over_limit_iov_max(void) struct rte_mbuf *mc; mc = rte_pcapng_copy(port_id, 0, orig, mp, pkt_len, - rte_get_tsc_cycles(), 0); + rte_get_tsc_cycles(), 0, NULL); if (mc == NULL) { fprintf(stderr, "Cannot copy packet\n"); return -1; diff --git a/doc/guides/rel_notes/release_23_03.rst b/doc/guides/rel_notes/release_23_03.rst index 1fa101c420..bb435dde32 100644 --- a/doc/guides/rel_notes/release_23_03.rst +++ b/doc/guides/rel_notes/release_23_03.rst @@ -116,6 +116,8 @@ API Changes Also, make sure to start the actual text at the margin. === +* Experimental function ``rte_pcapng_copy`` was updated to support comment + section in enhanced packet block in pcapng library. ABI Changes --- diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index ea004939e6..65c8c77fa4 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -466,7 +466,8 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, const struct rte_mbuf *md, struct rte_mempool *mp, uint32_t length, uint64_t cycles, - enum rte_pcapng_direction direction) + enum rte_pcapng_direction direction, + const char *comment) { struct pcapng_enhance_packet_block *epb; uint32_t orig_len, data_len, padding, flags; @@ -527,6 +528,9 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, if (rss_hash) optlen += pcapng_optlen(sizeof(uint8_t) + sizeof(uint32_t)); + if (comment) + optlen += pcapng_optlen(strlen(comment)); + /* reserve trailing options and block length */ opt = (struct pcapng_option *) rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); @@ -564,6 +568,10 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, &hash_opt, sizeof(hash_opt)); } + if (comment) + opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, comment, + strlen(comment)); + /* Note: END_OPT necessary here. Wireshark doesn't do it. */ /* Add PCAPNG packet header */ diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index 86b7996e29..4afdec22ef 100644 --- a/lib/pcapng/rte_pcapng.h +++ b/lib/pcapng/rte_pcapng.h @@ -125,6 +125,8 @@ enum rte_pcapng_direction { * The timestamp in TSC cycles. * @param direction * The direction of the packer: receive, transmit or unknown. + * @param comment + * Packet comment. * * @return * - The pointer to the new mbuf formatted for pcapng_write @@ -136,7 +138,7 @@ struct rte_mbuf * rte_pcapng_copy(uint16_t port_id, uint32_t queue, const struct rte_mbuf *m, struct rte_mempool *mp, uint32_t length, uint64_t timestamp, - enum rte_pcapng_direction direction); + enum rte_pcapng_direction direction, const char *comment); /** diff --git a/lib/pdump/rte_pdump.c b/lib/pdump/rte_pdump.c index a81544cb57..9bc4bab4f2 100644 --- a/lib/pdump/rte_pdump.c +++ b/lib/pdump/rte_pdump.c @@ -122,7 +122,7 @@ pdump_copy(uint16_t port_id, uint16_t queue, if (cbs->ver == V2) p = rte_pcapng_copy(port_id, queue, pkts[i], mp, cbs->snaplen, - ts, direction); + ts, direction, NULL); else
[PATCH v6 2/4] graph: pcap capture for graph nodes
Implementation adds support to capture packets at each node with packet metadata and node name. Signed-off-by: Amit Prakash Shukla --- v2: - Fixed code style issue - Fixed CI compilation issue on github-robot v3: - Code review suggestion from Stephen - Fixed potential memory leak v4: - Code review suggestion from Jerin v5: - Code review suggestion from Jerin v6: - Squashing test graph param initialize fix doc/guides/rel_notes/release_23_03.rst | 7 + lib/graph/graph.c | 17 +- lib/graph/graph_pcap.c | 216 + lib/graph/graph_pcap_private.h | 116 + lib/graph/graph_populate.c | 12 +- lib/graph/graph_private.h | 5 + lib/graph/meson.build | 3 +- lib/graph/rte_graph.h | 5 + lib/graph/rte_graph_worker.h | 9 ++ 9 files changed, 387 insertions(+), 3 deletions(-) create mode 100644 lib/graph/graph_pcap.c create mode 100644 lib/graph/graph_pcap_private.h diff --git a/doc/guides/rel_notes/release_23_03.rst b/doc/guides/rel_notes/release_23_03.rst index bb435dde32..328dfd3009 100644 --- a/doc/guides/rel_notes/release_23_03.rst +++ b/doc/guides/rel_notes/release_23_03.rst @@ -87,6 +87,10 @@ New Features ``rte_event_dev_config::nb_single_link_event_port_queues`` parameter required for eth_rx, eth_tx, crypto and timer eventdev adapters. +* **Added pcap trace support in graph library.** + + * Added support to capture packets at each graph node with packet metadata and +node name. Removed Items - @@ -119,6 +123,9 @@ API Changes * Experimental function ``rte_pcapng_copy`` was updated to support comment section in enhanced packet block in pcapng library. +* Experimental structures ``struct rte_graph_param``, ``struct rte_graph`` and + ``struct graph`` were updated to support pcap trace in graph library. + ABI Changes --- diff --git a/lib/graph/graph.c b/lib/graph/graph.c index 3a617cc369..a839a2803b 100644 --- a/lib/graph/graph.c +++ b/lib/graph/graph.c @@ -15,6 +15,7 @@ #include #include "graph_private.h" +#include "graph_pcap_private.h" static struct graph_head graph_list = STAILQ_HEAD_INITIALIZER(graph_list); static rte_spinlock_t graph_lock = RTE_SPINLOCK_INITIALIZER; @@ -228,7 +229,12 @@ graph_mem_fixup_node_ctx(struct rte_graph *graph) node_db = node_from_name(name); if (node_db == NULL) SET_ERR_JMP(ENOLINK, fail, "Node %s not found", name); - node->process = node_db->process; + + if (graph->pcap_enable) { + node->process = graph_pcap_dispatch; + node->original_process = node_db->process; + } else + node->process = node_db->process; } return graph; @@ -242,6 +248,9 @@ graph_mem_fixup_secondary(struct rte_graph *graph) if (graph == NULL || rte_eal_process_type() == RTE_PROC_PRIMARY) return graph; + if (graph_pcap_file_open(graph->pcap_filename) || graph_pcap_mp_init()) + graph_pcap_exit(graph); + return graph_mem_fixup_node_ctx(graph); } @@ -323,11 +332,17 @@ rte_graph_create(const char *name, struct rte_graph_param *prm) if (graph_has_isolated_node(graph)) goto graph_cleanup; + /* Initialize pcap config. */ + graph_pcap_enable(prm->pcap_enable); + /* Initialize graph object */ graph->socket = prm->socket_id; graph->src_node_count = src_node_count; graph->node_count = graph_nodes_count(graph); graph->id = graph_id; + graph->num_pkt_to_capture = prm->num_pkt_to_capture; + if (prm->pcap_filename) + rte_strscpy(graph->pcap_filename, prm->pcap_filename, RTE_GRAPH_PCAP_FILE_SZ); /* Allocate the Graph fast path memory and populate the data */ if (graph_fp_mem_create(graph)) diff --git a/lib/graph/graph_pcap.c b/lib/graph/graph_pcap.c new file mode 100644 index 00..9cbd1b8fdb --- /dev/null +++ b/lib/graph/graph_pcap.c @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2023 Marvell International Ltd. + */ + +#include +#include +#include +#include + +#include +#include + +#include "rte_graph_worker.h" + +#include "graph_pcap_private.h" + +#define GRAPH_PCAP_BUF_SZ 128 +#define GRAPH_PCAP_NUM_PACKETS 1024 +#define GRAPH_PCAP_PKT_POOL"graph_pcap_pkt_pool" +#define GRAPH_PCAP_FILE_NAME "dpdk_graph_pcap_capture_XX.pcapng" + +/* For multi-process, packets are captured in separate files. */ +static rte_pcapng_t *pcapng_fd; +static bool pcap_enable; +struct rte_mempool *pkt_mp; + +void +graph_pcap_enable(bool val) +{ + pcap_enable = val; +} + +int +graph_pcap_is_enable(void) +{ + return pcap_enable; +} + +void +graph_pcap_exit(struct rte_graph *graph) +{ + if (rte
[PATCH v6 3/4] examples/l3fwd-graph: changes to configure pcap capture
Added support to configure pcap capture. Signed-off-by: Amit Prakash Shukla --- v2: - Fixed code style issue - Fixed CI compilation issue on github-robot v3: - Code review suggestion from Stephen - Fixed potential memory leak v4: - Code review suggestion from Jerin v5: - Code review suggestion from Jerin v6: - Squashing test graph param initialize fix doc/guides/sample_app_ug/l3_forward_graph.rst | 23 +++ examples/l3fwd-graph/main.c | 62 ++- 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/doc/guides/sample_app_ug/l3_forward_graph.rst b/doc/guides/sample_app_ug/l3_forward_graph.rst index 0a3e0d44ec..585ac8c898 100644 --- a/doc/guides/sample_app_ug/l3_forward_graph.rst +++ b/doc/guides/sample_app_ug/l3_forward_graph.rst @@ -51,6 +51,9 @@ The application has a number of command line options similar to l3fwd:: [--max-pkt-len PKTLEN] [--no-numa] [--per-port-pool] + [--pcap-enable] + [--pcap-num-cap] + [--pcap-file-name] Where, @@ -69,6 +72,12 @@ Where, * ``--per-port-pool:`` Optional, set to use independent buffer pools per port. Without this option, single buffer pool is used for all ports. +* ``--pcap-enable:`` Optional, Enables packet capture in pcap format on each node with mbuf and node metadata. + +* ``--pcap-num-cap:`` Optional, Number of packets to be captured per core. + +* ``--pcap-file-name:`` Optional, Pcap filename to capture packets in. + For example, consider a dual processor socket platform with 8 physical cores, where cores 0-7 and 16-23 appear on socket 0, while cores 8-15 and 24-31 appear on socket 1. @@ -99,6 +108,20 @@ In this command: | | | | | +--+---+---+-+ +To enable pcap trace on each graph, use following command: + +.. code-block:: console + +.//examples/dpdk-l3fwd-graph -l 1,2 -n 4 -- -p 0x3 --config="(0,0,1),(1,0,2)" --pcap-enable --pcap-num-cap= --pcap-file-name "" + +In this command: + +* The --pcap-enable option enables pcap trace on graph nodes. + +* The --pcap-num-cap option enables user to configure number packets to be captured per graph. Default 1024 packets per graph are captured. + +* The --pcap-file-name option enables user to give filename in which packets are to be captured. + Refer to the *DPDK Getting Started Guide* for general information on running applications and the Environment Abstraction Layer (EAL) options. diff --git a/examples/l3fwd-graph/main.c b/examples/l3fwd-graph/main.c index 6dcb6ee92b..5feeab4f0f 100644 --- a/examples/l3fwd-graph/main.c +++ b/examples/l3fwd-graph/main.c @@ -76,6 +76,12 @@ xmm_t val_eth[RTE_MAX_ETHPORTS]; /* Mask of enabled ports */ static uint32_t enabled_port_mask; +/* Pcap trace */ +static char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ]; +static uint64_t packet_to_capture; +static int pcap_trace_enable; + + struct lcore_rx_queue { uint16_t port_id; uint8_t queue_id; @@ -261,7 +267,8 @@ print_usage(const char *prgname) " [--eth-dest=X,MM:MM:MM:MM:MM:MM]" " [--max-pkt-len PKTLEN]" " [--no-numa]" - " [--per-port-pool]\n\n" + " [--per-port-pool]" + " [--num-pkt-cap]\n\n" " -p PORTMASK: Hexadecimal bitmask of ports to configure\n" " -P : Enable promiscuous mode\n" @@ -270,10 +277,30 @@ print_usage(const char *prgname) "port X\n" " --max-pkt-len PKTLEN: maximum packet length in decimal (64-9600)\n" " --no-numa: Disable numa awareness\n" - " --per-port-pool: Use separate buffer pool per port\n\n", + " --per-port-pool: Use separate buffer pool per port\n" + " --pcap-enable: Enables pcap capture\n" + " --pcap-num-cap NUMPKT: Number of packets to capture\n" + " --pcap-file-name NAME: Pcap file name\n\n", prgname); } +static uint64_t +parse_num_pkt_cap(const char *num_pkt_cap) +{ + uint64_t num_pkt; + char *end = NULL; + + /* Parse decimal string */ + num_pkt = strtoull(num_pkt_cap, &end, 10); + if ((num_pkt_cap[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + + if (num_pkt == 0) + return 0; + + return num_pkt; +} + static int parse_max_pkt_len(const char *pktlen) { @@ -404,6 +431,9 @@ static const char short_options[] = "p:" /* portmask */ #define CMD_LINE_OPT_NO_NUMA "no-numa" #define CMD_LINE_OPT_MAX_PKT_LEN "max-pkt-len" #define CMD_LINE_OPT_PER_PORT_POOL "per-port-pool" +#define CMD_LINE_OPT_PCAP_ENABLE "pcap-enable" +#d
[PATCH v6 4/4] test/graph: initialize graph param variable
Initializing rte_graph_param variable with 0 to avoid any garbage value in structure elements which are not populated as part of this function. Signed-off-by: Amit Prakash Shukla Acked-by: Jerin Jacob --- v2: - Fixed code style issue - Fixed CI compilation issue on github-robot v3: - Code review suggestion from Stephen - Fixed potential memory leak v4: - Code review suggestion from Jerin v5: - Code review suggestion from Jerin v6: - Squashing test graph param initialize fix app/test/test_graph_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test/test_graph_perf.c b/app/test/test_graph_perf.c index 1d065438a6..c5b463f700 100644 --- a/app/test/test_graph_perf.c +++ b/app/test/test_graph_perf.c @@ -324,7 +324,7 @@ graph_init(const char *gname, uint8_t nb_srcs, uint8_t nb_sinks, char nname[RTE_NODE_NAMESIZE / 2]; struct test_node_data *node_data; char *ename[nodes_per_stage]; - struct rte_graph_param gconf; + struct rte_graph_param gconf = {0}; const struct rte_memzone *mz; uint8_t total_percent = 0; rte_node_t *src_nodes; -- 2.25.1
RE: [PATCH v6 1/4] pcapng: comment option support for epb
Please ignore this version. I will resend the patch. > -Original Message- > From: Amit Prakash Shukla > Sent: Thursday, February 9, 2023 3:26 PM > To: Reshma Pattan ; Stephen Hemminger > > Cc: dev@dpdk.org; Jerin Jacob Kollanukkaran ; > david.march...@redhat.com; Amit Prakash Shukla > > Subject: [PATCH v6 1/4] pcapng: comment option support for epb > > This change enhances rte_pcapng_copy to have comment in enhanced > packet block. > > Signed-off-by: Amit Prakash Shukla > --- > v2: > - Fixed code style issue > - Fixed CI compilation issue on github-robot > > v3: > - Code review suggestion from Stephen > - Fixed potential memory leak > > v4: > - Code review suggestion from Jerin > > v5: > - Code review suggestion from Jerin > > v6: > - Squashing test graph param initialize fix > > app/test/test_pcapng.c | 4 ++-- > doc/guides/rel_notes/release_23_03.rst | 2 ++ > lib/pcapng/rte_pcapng.c| 10 +- > lib/pcapng/rte_pcapng.h| 4 +++- > lib/pdump/rte_pdump.c | 2 +- > 5 files changed, 17 insertions(+), 5 deletions(-) > > diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index > edba46d1fe..b8429a02f1 100644 > --- a/app/test/test_pcapng.c > +++ b/app/test/test_pcapng.c > @@ -146,7 +146,7 @@ test_write_packets(void) > struct rte_mbuf *mc; > > mc = rte_pcapng_copy(port_id, 0, orig, mp, pkt_len, > - rte_get_tsc_cycles(), 0); > + rte_get_tsc_cycles(), 0, NULL); > if (mc == NULL) { > fprintf(stderr, "Cannot copy packet\n"); > return -1; > @@ -262,7 +262,7 @@ test_write_over_limit_iov_max(void) > struct rte_mbuf *mc; > > mc = rte_pcapng_copy(port_id, 0, orig, mp, pkt_len, > - rte_get_tsc_cycles(), 0); > + rte_get_tsc_cycles(), 0, NULL); > if (mc == NULL) { > fprintf(stderr, "Cannot copy packet\n"); > return -1; > diff --git a/doc/guides/rel_notes/release_23_03.rst > b/doc/guides/rel_notes/release_23_03.rst > index 1fa101c420..bb435dde32 100644 > --- a/doc/guides/rel_notes/release_23_03.rst > +++ b/doc/guides/rel_notes/release_23_03.rst > @@ -116,6 +116,8 @@ API Changes > Also, make sure to start the actual text at the margin. > === > > +* Experimental function ``rte_pcapng_copy`` was updated to support > +comment > + section in enhanced packet block in pcapng library. > > ABI Changes > --- > diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index > ea004939e6..65c8c77fa4 100644 > --- a/lib/pcapng/rte_pcapng.c > +++ b/lib/pcapng/rte_pcapng.c > @@ -466,7 +466,8 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, > const struct rte_mbuf *md, > struct rte_mempool *mp, > uint32_t length, uint64_t cycles, > - enum rte_pcapng_direction direction) > + enum rte_pcapng_direction direction, > + const char *comment) > { > struct pcapng_enhance_packet_block *epb; > uint32_t orig_len, data_len, padding, flags; @@ -527,6 +528,9 @@ > rte_pcapng_copy(uint16_t port_id, uint32_t queue, > if (rss_hash) > optlen += pcapng_optlen(sizeof(uint8_t) + sizeof(uint32_t)); > > + if (comment) > + optlen += pcapng_optlen(strlen(comment)); > + > /* reserve trailing options and block length */ > opt = (struct pcapng_option *) > rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); @@ - > 564,6 +568,10 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, > &hash_opt, sizeof(hash_opt)); > } > > + if (comment) > + opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, > comment, > + strlen(comment)); > + > /* Note: END_OPT necessary here. Wireshark doesn't do it. */ > > /* Add PCAPNG packet header */ > diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index > 86b7996e29..4afdec22ef 100644 > --- a/lib/pcapng/rte_pcapng.h > +++ b/lib/pcapng/rte_pcapng.h > @@ -125,6 +125,8 @@ enum rte_pcapng_direction { > * The timestamp in TSC cycles. > * @param direction > * The direction of the packer: receive, transmit or unknown. > + * @param comment > + * Packet comment. > * > * @return > * - The pointer to the new mbuf formatted for pcapng_write > @@ -136,7 +138,7 @@ struct rte_mbuf * > rte_pcapng_copy(uint16_t port_id, uint32_t queue, > const struct rte_mbuf *m, struct rte_mempool *mp, > uint32_t length, uint64_t timestamp, > - enum rte_pcapng_direction direction); > + enum rte_pcapng_direction direction, const char > *comment); > > > /** > diff --g
[PATCH v7 1/3] pcapng: comment option support for epb
This change enhances rte_pcapng_copy to have comment in enhanced packet block. Signed-off-by: Amit Prakash Shukla --- v2: - Fixed code style issue - Fixed CI compilation issue on github-robot v3: - Code review suggestion from Stephen - Fixed potential memory leak v4: - Code review suggestion from Jerin v5: - Code review suggestion from Jerin v6: - Squashing test graph param initialize fix v7: - Resending the patch app/test/test_pcapng.c | 4 ++-- doc/guides/rel_notes/release_23_03.rst | 2 ++ lib/pcapng/rte_pcapng.c| 10 +- lib/pcapng/rte_pcapng.h| 4 +++- lib/pdump/rte_pdump.c | 2 +- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index edba46d1fe..b8429a02f1 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -146,7 +146,7 @@ test_write_packets(void) struct rte_mbuf *mc; mc = rte_pcapng_copy(port_id, 0, orig, mp, pkt_len, - rte_get_tsc_cycles(), 0); + rte_get_tsc_cycles(), 0, NULL); if (mc == NULL) { fprintf(stderr, "Cannot copy packet\n"); return -1; @@ -262,7 +262,7 @@ test_write_over_limit_iov_max(void) struct rte_mbuf *mc; mc = rte_pcapng_copy(port_id, 0, orig, mp, pkt_len, - rte_get_tsc_cycles(), 0); + rte_get_tsc_cycles(), 0, NULL); if (mc == NULL) { fprintf(stderr, "Cannot copy packet\n"); return -1; diff --git a/doc/guides/rel_notes/release_23_03.rst b/doc/guides/rel_notes/release_23_03.rst index 1fa101c420..bb435dde32 100644 --- a/doc/guides/rel_notes/release_23_03.rst +++ b/doc/guides/rel_notes/release_23_03.rst @@ -116,6 +116,8 @@ API Changes Also, make sure to start the actual text at the margin. === +* Experimental function ``rte_pcapng_copy`` was updated to support comment + section in enhanced packet block in pcapng library. ABI Changes --- diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index ea004939e6..65c8c77fa4 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -466,7 +466,8 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, const struct rte_mbuf *md, struct rte_mempool *mp, uint32_t length, uint64_t cycles, - enum rte_pcapng_direction direction) + enum rte_pcapng_direction direction, + const char *comment) { struct pcapng_enhance_packet_block *epb; uint32_t orig_len, data_len, padding, flags; @@ -527,6 +528,9 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, if (rss_hash) optlen += pcapng_optlen(sizeof(uint8_t) + sizeof(uint32_t)); + if (comment) + optlen += pcapng_optlen(strlen(comment)); + /* reserve trailing options and block length */ opt = (struct pcapng_option *) rte_pktmbuf_append(mc, optlen + sizeof(uint32_t)); @@ -564,6 +568,10 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, &hash_opt, sizeof(hash_opt)); } + if (comment) + opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, comment, + strlen(comment)); + /* Note: END_OPT necessary here. Wireshark doesn't do it. */ /* Add PCAPNG packet header */ diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h index 86b7996e29..4afdec22ef 100644 --- a/lib/pcapng/rte_pcapng.h +++ b/lib/pcapng/rte_pcapng.h @@ -125,6 +125,8 @@ enum rte_pcapng_direction { * The timestamp in TSC cycles. * @param direction * The direction of the packer: receive, transmit or unknown. + * @param comment + * Packet comment. * * @return * - The pointer to the new mbuf formatted for pcapng_write @@ -136,7 +138,7 @@ struct rte_mbuf * rte_pcapng_copy(uint16_t port_id, uint32_t queue, const struct rte_mbuf *m, struct rte_mempool *mp, uint32_t length, uint64_t timestamp, - enum rte_pcapng_direction direction); + enum rte_pcapng_direction direction, const char *comment); /** diff --git a/lib/pdump/rte_pdump.c b/lib/pdump/rte_pdump.c index a81544cb57..9bc4bab4f2 100644 --- a/lib/pdump/rte_pdump.c +++ b/lib/pdump/rte_pdump.c @@ -122,7 +122,7 @@ pdump_copy(uint16_t port_id, uint16_t queue, if (cbs->ver == V2) p = rte_pcapng_copy(port_id, queue, pkts[i], mp, cbs->snaplen, - ts, direction); + ts, direction, NULL);
[PATCH v7 2/3] graph: pcap capture for graph nodes
Implementation adds support to capture packets at each node with packet metadata and node name. Signed-off-by: Amit Prakash Shukla --- v2: - Fixed code style issue - Fixed CI compilation issue on github-robot v3: - Code review suggestion from Stephen - Fixed potential memory leak v4: - Code review suggestion from Jerin v5: - Code review suggestion from Jerin v6: - Squashing test graph param initialize fix v7: - Resending the patch app/test/test_graph_perf.c | 2 +- doc/guides/rel_notes/release_23_03.rst | 7 + lib/graph/graph.c | 17 +- lib/graph/graph_pcap.c | 216 + lib/graph/graph_pcap_private.h | 116 + lib/graph/graph_populate.c | 12 +- lib/graph/graph_private.h | 5 + lib/graph/meson.build | 3 +- lib/graph/rte_graph.h | 5 + lib/graph/rte_graph_worker.h | 9 ++ 10 files changed, 388 insertions(+), 4 deletions(-) create mode 100644 lib/graph/graph_pcap.c create mode 100644 lib/graph/graph_pcap_private.h diff --git a/app/test/test_graph_perf.c b/app/test/test_graph_perf.c index 1d065438a6..c5b463f700 100644 --- a/app/test/test_graph_perf.c +++ b/app/test/test_graph_perf.c @@ -324,7 +324,7 @@ graph_init(const char *gname, uint8_t nb_srcs, uint8_t nb_sinks, char nname[RTE_NODE_NAMESIZE / 2]; struct test_node_data *node_data; char *ename[nodes_per_stage]; - struct rte_graph_param gconf; + struct rte_graph_param gconf = {0}; const struct rte_memzone *mz; uint8_t total_percent = 0; rte_node_t *src_nodes; diff --git a/doc/guides/rel_notes/release_23_03.rst b/doc/guides/rel_notes/release_23_03.rst index bb435dde32..328dfd3009 100644 --- a/doc/guides/rel_notes/release_23_03.rst +++ b/doc/guides/rel_notes/release_23_03.rst @@ -87,6 +87,10 @@ New Features ``rte_event_dev_config::nb_single_link_event_port_queues`` parameter required for eth_rx, eth_tx, crypto and timer eventdev adapters. +* **Added pcap trace support in graph library.** + + * Added support to capture packets at each graph node with packet metadata and +node name. Removed Items - @@ -119,6 +123,9 @@ API Changes * Experimental function ``rte_pcapng_copy`` was updated to support comment section in enhanced packet block in pcapng library. +* Experimental structures ``struct rte_graph_param``, ``struct rte_graph`` and + ``struct graph`` were updated to support pcap trace in graph library. + ABI Changes --- diff --git a/lib/graph/graph.c b/lib/graph/graph.c index 3a617cc369..a839a2803b 100644 --- a/lib/graph/graph.c +++ b/lib/graph/graph.c @@ -15,6 +15,7 @@ #include #include "graph_private.h" +#include "graph_pcap_private.h" static struct graph_head graph_list = STAILQ_HEAD_INITIALIZER(graph_list); static rte_spinlock_t graph_lock = RTE_SPINLOCK_INITIALIZER; @@ -228,7 +229,12 @@ graph_mem_fixup_node_ctx(struct rte_graph *graph) node_db = node_from_name(name); if (node_db == NULL) SET_ERR_JMP(ENOLINK, fail, "Node %s not found", name); - node->process = node_db->process; + + if (graph->pcap_enable) { + node->process = graph_pcap_dispatch; + node->original_process = node_db->process; + } else + node->process = node_db->process; } return graph; @@ -242,6 +248,9 @@ graph_mem_fixup_secondary(struct rte_graph *graph) if (graph == NULL || rte_eal_process_type() == RTE_PROC_PRIMARY) return graph; + if (graph_pcap_file_open(graph->pcap_filename) || graph_pcap_mp_init()) + graph_pcap_exit(graph); + return graph_mem_fixup_node_ctx(graph); } @@ -323,11 +332,17 @@ rte_graph_create(const char *name, struct rte_graph_param *prm) if (graph_has_isolated_node(graph)) goto graph_cleanup; + /* Initialize pcap config. */ + graph_pcap_enable(prm->pcap_enable); + /* Initialize graph object */ graph->socket = prm->socket_id; graph->src_node_count = src_node_count; graph->node_count = graph_nodes_count(graph); graph->id = graph_id; + graph->num_pkt_to_capture = prm->num_pkt_to_capture; + if (prm->pcap_filename) + rte_strscpy(graph->pcap_filename, prm->pcap_filename, RTE_GRAPH_PCAP_FILE_SZ); /* Allocate the Graph fast path memory and populate the data */ if (graph_fp_mem_create(graph)) diff --git a/lib/graph/graph_pcap.c b/lib/graph/graph_pcap.c new file mode 100644 index 00..9cbd1b8fdb --- /dev/null +++ b/lib/graph/graph_pcap.c @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2023 Marvell International Ltd. + */ + +#include +#include +#include +#include + +#include
[PATCH v7 3/3] examples/l3fwd-graph: changes to configure pcap capture
Added support to configure pcap capture. Signed-off-by: Amit Prakash Shukla --- v2: - Fixed code style issue - Fixed CI compilation issue on github-robot v3: - Code review suggestion from Stephen - Fixed potential memory leak v4: - Code review suggestion from Jerin v5: - Code review suggestion from Jerin v6: - Squashing test graph param initialize fix v7: - Resending the patch doc/guides/sample_app_ug/l3_forward_graph.rst | 23 +++ examples/l3fwd-graph/main.c | 62 ++- 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/doc/guides/sample_app_ug/l3_forward_graph.rst b/doc/guides/sample_app_ug/l3_forward_graph.rst index 0a3e0d44ec..585ac8c898 100644 --- a/doc/guides/sample_app_ug/l3_forward_graph.rst +++ b/doc/guides/sample_app_ug/l3_forward_graph.rst @@ -51,6 +51,9 @@ The application has a number of command line options similar to l3fwd:: [--max-pkt-len PKTLEN] [--no-numa] [--per-port-pool] + [--pcap-enable] + [--pcap-num-cap] + [--pcap-file-name] Where, @@ -69,6 +72,12 @@ Where, * ``--per-port-pool:`` Optional, set to use independent buffer pools per port. Without this option, single buffer pool is used for all ports. +* ``--pcap-enable:`` Optional, Enables packet capture in pcap format on each node with mbuf and node metadata. + +* ``--pcap-num-cap:`` Optional, Number of packets to be captured per core. + +* ``--pcap-file-name:`` Optional, Pcap filename to capture packets in. + For example, consider a dual processor socket platform with 8 physical cores, where cores 0-7 and 16-23 appear on socket 0, while cores 8-15 and 24-31 appear on socket 1. @@ -99,6 +108,20 @@ In this command: | | | | | +--+---+---+-+ +To enable pcap trace on each graph, use following command: + +.. code-block:: console + +.//examples/dpdk-l3fwd-graph -l 1,2 -n 4 -- -p 0x3 --config="(0,0,1),(1,0,2)" --pcap-enable --pcap-num-cap= --pcap-file-name "" + +In this command: + +* The --pcap-enable option enables pcap trace on graph nodes. + +* The --pcap-num-cap option enables user to configure number packets to be captured per graph. Default 1024 packets per graph are captured. + +* The --pcap-file-name option enables user to give filename in which packets are to be captured. + Refer to the *DPDK Getting Started Guide* for general information on running applications and the Environment Abstraction Layer (EAL) options. diff --git a/examples/l3fwd-graph/main.c b/examples/l3fwd-graph/main.c index 6dcb6ee92b..5feeab4f0f 100644 --- a/examples/l3fwd-graph/main.c +++ b/examples/l3fwd-graph/main.c @@ -76,6 +76,12 @@ xmm_t val_eth[RTE_MAX_ETHPORTS]; /* Mask of enabled ports */ static uint32_t enabled_port_mask; +/* Pcap trace */ +static char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ]; +static uint64_t packet_to_capture; +static int pcap_trace_enable; + + struct lcore_rx_queue { uint16_t port_id; uint8_t queue_id; @@ -261,7 +267,8 @@ print_usage(const char *prgname) " [--eth-dest=X,MM:MM:MM:MM:MM:MM]" " [--max-pkt-len PKTLEN]" " [--no-numa]" - " [--per-port-pool]\n\n" + " [--per-port-pool]" + " [--num-pkt-cap]\n\n" " -p PORTMASK: Hexadecimal bitmask of ports to configure\n" " -P : Enable promiscuous mode\n" @@ -270,10 +277,30 @@ print_usage(const char *prgname) "port X\n" " --max-pkt-len PKTLEN: maximum packet length in decimal (64-9600)\n" " --no-numa: Disable numa awareness\n" - " --per-port-pool: Use separate buffer pool per port\n\n", + " --per-port-pool: Use separate buffer pool per port\n" + " --pcap-enable: Enables pcap capture\n" + " --pcap-num-cap NUMPKT: Number of packets to capture\n" + " --pcap-file-name NAME: Pcap file name\n\n", prgname); } +static uint64_t +parse_num_pkt_cap(const char *num_pkt_cap) +{ + uint64_t num_pkt; + char *end = NULL; + + /* Parse decimal string */ + num_pkt = strtoull(num_pkt_cap, &end, 10); + if ((num_pkt_cap[0] == '\0') || (end == NULL) || (*end != '\0')) + return 0; + + if (num_pkt == 0) + return 0; + + return num_pkt; +} + static int parse_max_pkt_len(const char *pktlen) { @@ -404,6 +431,9 @@ static const char short_options[] = "p:" /* portmask */ #define CMD_LINE_OPT_NO_NUMA "no-numa" #define CMD_LINE_OPT_MAX_PKT_LEN "max-pkt-len" #define CMD_LINE_OPT_PER_PORT_POOL "per-port-pool" +#define CMD_LINE_OPT_PC
回复: [PATCH 1/2] net/i40e: replace put function
Hi, Morten > -邮件原件- > 发件人: Morten Brørup > 发送时间: Thursday, February 9, 2023 5:34 PM > 收件人: Kamalakshitha Aligeri ; > yuying.zh...@intel.com; beilei.x...@intel.com; olivier.m...@6wind.com; > andrew.rybche...@oktetlabs.ru; bruce.richard...@intel.com; > konstantin.anan...@huawei.com; Honnappa Nagarahalli > > 抄送: dev@dpdk.org; nd ; Ruifeng Wang > ; Feifei Wang > 主题: RE: [PATCH 1/2] net/i40e: replace put function > > > From: Kamalakshitha Aligeri [mailto:kamalakshitha.alig...@arm.com] > > Sent: Thursday, 9 February 2023 07.25 > > > > Integrated zero-copy put API in mempool cache in i40e PMD. > > On Ampere Altra server, l3fwd single core's performance improves by 5% > > with the new API > > > > Signed-off-by: Kamalakshitha Aligeri > > Reviewed-by: Ruifeng Wang > > Reviewed-by: Feifei Wang > > --- > > Link: > > https://patchwork.dpdk.org/project/dpdk/patch/20221227151700.80887-1- > > m...@smartsharesystems.com/ > > > > .mailmap| 1 + > > drivers/net/i40e/i40e_rxtx_vec_common.h | 34 > > - > > 2 files changed, 28 insertions(+), 7 deletions(-) > > > > diff --git a/.mailmap b/.mailmap > > index 75884b6fe2..05a42edbcf 100644 > > --- a/.mailmap > > +++ b/.mailmap > > @@ -670,6 +670,7 @@ Kai Ji Kaiwen Deng > > Kalesh AP > > > > Kamalakannan R > > +Kamalakshitha Aligeri > > Kamil Bednarczyk Kamil Chalupnik > > Kamil Rytarowski > > > > diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h > > b/drivers/net/i40e/i40e_rxtx_vec_common.h > > index fe1a6ec75e..80d4a159e6 100644 > > --- a/drivers/net/i40e/i40e_rxtx_vec_common.h > > +++ b/drivers/net/i40e/i40e_rxtx_vec_common.h > > @@ -95,17 +95,37 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq) > > > > n = txq->tx_rs_thresh; > > > > -/* first buffer to free from S/W ring is at index > > - * tx_next_dd - (tx_rs_thresh-1) > > - */ > > + /* first buffer to free from S/W ring is at index > > +* tx_next_dd - (tx_rs_thresh-1) > > +*/ > > txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; > > > > if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) { > > - for (i = 0; i < n; i++) { > > - free[i] = txep[i].mbuf; > > - /* no need to reset txep[i].mbuf in vector path */ > > + struct rte_mempool *mp = txep[0].mbuf->pool; > > + struct rte_mempool_cache *cache = > > rte_mempool_default_cache(mp, rte_lcore_id()); > > + > > + if (!cache || n > RTE_MEMPOOL_CACHE_MAX_SIZE) { > > If the mempool has a cache, do not compare n to > RTE_MEMPOOL_CACHE_MAX_SIZE. Instead, call > rte_mempool_cache_zc_put_bulk() to determine if n is acceptable for zero- > copy. > > It looks like this patch behaves incorrectly if the cache is configured to be > smaller than RTE_MEMPOOL_CACHE_MAX_SIZE. Let's say the cache size is 8, > which will make the flush threshold 12. If n is 32, your code will not enter > this > branch, but proceed to call rte_mempool_cache_zc_put_bulk(), which will > return NULL, and then you will goto done. > > Obviously, if there is no cache, fall back to the standard > rte_mempool_put_bulk(). Agree with this. I think we ignore the case that (cache -> flushthresh < n < RTE_MEMPOOL_CACHE_MAX_SIZE). Our goal is that if (!cache || n > cache -> flushthresh), we can put the buffers into mempool directly. Thus maybe we can change as: struct rte_mempool_cache *cache = rte_mempool_default_cache(mp, rte_lcore_id()); if (!cache || n > cache -> flushthresh) { for (i = 0; i < n ; i++) free[i] = txep[i].mbuf; if (!cache) { rte_mempool_generic_put; goto done; } else if { rte_mempool_ops_enqueue_bulk; goto done; } } If we can change like this? > > > + for (i = 0; i < n ; i++) > > + free[i] = txep[i].mbuf; > > + if (!cache) { > > + rte_mempool_generic_put(mp, (void > **)free, n, > > cache); > > + goto done; > > + } > > + if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) { > > + rte_mempool_ops_enqueue_bulk(mp, (void > **)free, > > n); > > + goto done; > > + } > > + } > > + void **cache_objs; > > + > > + cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, > n); > > + if (cache_objs) { > > + for (i = 0; i < n; i++) { > > + cache_objs[i] = txep->mbuf; > > + /* no need to reset txep[i].mbuf in vector > path > > */ > > + txep++; > > + } > > } > > - rte_mempool_put_bulk(free[0]->pool, (void **)free, n); > > goto done; > > } > > > > -- > > 2.25.1 > >
RE: [EXT] Re: [PATCH v9 1/4] lib: add generic support for reading PMU events
Hi David, Thanks for review. Comments inline. >-Original Message- >From: David Marchand >Sent: Monday, February 6, 2023 12:03 PM >To: Tomasz Duszynski >Cc: dev@dpdk.org; Thomas Monjalon ; >roret...@linux.microsoft.com; >ruifeng.w...@arm.com; bruce.richard...@intel.com; Jerin Jacob Kollanukkaran >; >mattias.ronnb...@ericsson.com; m...@smartsharesystems.com; zhou...@loongson.cn >Subject: [EXT] Re: [PATCH v9 1/4] lib: add generic support for reading PMU >events > >External Email > >-- >Hello, > >On Thu, Feb 2, 2023 at 1:50 PM Tomasz Duszynski wrote: >> >> Add support for programming PMU counters and reading their values in >> runtime bypassing kernel completely. >> >> This is especially useful in cases where CPU cores are isolated >> (nohz_full) i.e run dedicated tasks. In such cases one cannot use >> standard perf utility without sacrificing latency and performance. > >For my understanding, what OS capability/permission are required to use this >library? > On x86 it sufficient for self-monitoring to have kernel built with perf events enabled and /proc/sys/kernel/perf_event_paranoid knob should be set to 2, which should be a default value anyway, unless changed by some scripts. On ARM64 you need to additionally set /proc/sys/kernel/perf_user_access to bypass kernel when accessing hw counters. > >> >> Signed-off-by: Tomasz Duszynski >> Acked-by: Morten Brørup >> --- >> MAINTAINERS| 5 + >> app/test/meson.build | 1 + >> app/test/test_pmu.c| 55 +++ >> doc/api/doxy-api-index.md | 3 +- >> doc/api/doxy-api.conf.in | 1 + >> doc/guides/prog_guide/profile_app.rst | 8 + >> doc/guides/rel_notes/release_23_03.rst | 9 + >> lib/meson.build| 1 + >> lib/pmu/meson.build| 13 + >> lib/pmu/pmu_private.h | 29 ++ >> lib/pmu/rte_pmu.c | 464 + >> lib/pmu/rte_pmu.h | 205 +++ >> lib/pmu/version.map| 20 ++ >> 13 files changed, 813 insertions(+), 1 deletion(-) create mode >> 100644 app/test/test_pmu.c create mode 100644 lib/pmu/meson.build >> create mode 100644 lib/pmu/pmu_private.h create mode 100644 >> lib/pmu/rte_pmu.c create mode 100644 lib/pmu/rte_pmu.h create mode >> 100644 lib/pmu/version.map >> >> diff --git a/MAINTAINERS b/MAINTAINERS index 9a0f416d2e..9f13eafd95 >> 100644 >> --- a/MAINTAINERS >> +++ b/MAINTAINERS >> @@ -1697,6 +1697,11 @@ M: Nithin Dabilpuram >> M: Pavan Nikhilesh >> F: lib/node/ >> >> +PMU - EXPERIMENTAL >> +M: Tomasz Duszynski >> +F: lib/pmu/ >> +F: app/test/test_pmu* >> + >> >> Test Applications >> - >> diff --git a/app/test/meson.build b/app/test/meson.build index >> f34d19e3c3..7b6b69dcf1 100644 >> --- a/app/test/meson.build >> +++ b/app/test/meson.build >> @@ -111,6 +111,7 @@ test_sources = files( >> 'test_reciprocal_division_perf.c', >> 'test_red.c', >> 'test_pie.c', >> +'test_pmu.c', >> 'test_reorder.c', >> 'test_rib.c', >> 'test_rib6.c', > >This code adds a new test. >This test should be added to an existing testsuite, like fast-tests etc... > > >> diff --git a/app/test/test_pmu.c b/app/test/test_pmu.c new file mode >> 100644 index 00..a9bfb1a427 >> --- /dev/null >> +++ b/app/test/test_pmu.c >> @@ -0,0 +1,55 @@ >> +/* SPDX-License-Identifier: BSD-3-Clause >> + * Copyright(C) 2023 Marvell International Ltd. >> + */ >> + >> +#include "test.h" >> + >> +#ifndef RTE_EXEC_ENV_LINUX >> + >> +static int >> +test_pmu(void) >> +{ >> + printf("pmu_autotest only supported on Linux, skipping test\n"); >> + return TEST_SKIPPED; >> +} >> + >> +#else >> + >> +#include >> + >> +static int >> +test_pmu_read(void) >> +{ >> + int tries = 10, event = -1; >> + uint64_t val = 0; >> + >> + if (rte_pmu_init() < 0) >> + return TEST_FAILED; >> + >> + while (tries--) >> + val += rte_pmu_read(event); >> + >> + rte_pmu_fini(); >> + >> + return val ? TEST_SUCCESS : TEST_FAILED; } >> + >> +static struct unit_test_suite pmu_tests = { >> + .suite_name = "pmu autotest", >> + .setup = NULL, >> + .teardown = NULL, >> + .unit_test_cases = { >> + TEST_CASE(test_pmu_read), >> + TEST_CASES_END() >> + } >> +}; >> + >> +static int >> +test_pmu(void) >> +{ >> + return unit_test_suite_runner(&pmu_tests); >> +} >> + >> +#endif /* RTE_EXEC_ENV_LINUX */ >> + >> +REGISTER_TEST_COMMAND(pmu_autotest, test_pmu); >> diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md >> index de488c7abf..7f1938f92f 100644 >> --- a/doc/api/doxy-api-index.md >> +++ b/doc/api/doxy-api-index.md >> @@ -222,7 +222,8 @@ The public API headers are grouped by t