On Tue, Feb 21, 2023 at 2:54 PM <pbhagavat...@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavat...@marvell.com> > > CN10K supports stashing the scheduled WQE carried by `rte_event` to the > cores L2 Dcache.The number of cache lines to be stashed and the offset > is configurable per HWGRP i.e. event queue. > > By default, stashing is enabled on queues which have been connected to > Rx adapter. Stashing improves performance by upto 6% based on the > workload. Both MBUF and NIX_RX_WQE_HDR + NIX_RX_PARSE_S are stashed. > > Add device arguments to support configuring WQE stashing. > The dictionary format for device arguments is as follows > `[Qx|stash_offset|stash_length]` here the stash offset can be a > negative integer. > > For stashing mbuf on queue 0 and mbuf + headroom on queue 1 > -a 0002:1e:00.0,stash="[0|-1|1][1|-1|2]" > > Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com>
Applied to dpdk-next-net-eventdev/for-main. Thanks > --- > doc/guides/eventdevs/cnxk.rst | 14 ++++++ > drivers/common/cnxk/roc_api.h | 3 ++ > drivers/common/cnxk/roc_features.h | 13 ++++++ > drivers/common/cnxk/roc_mbox.h | 10 +++++ > drivers/common/cnxk/roc_model.h | 6 +++ > drivers/common/cnxk/roc_sso.c | 49 ++++++++++++++++++++- > drivers/common/cnxk/roc_sso.h | 11 ++++- > drivers/common/cnxk/version.map | 1 + > drivers/event/cnxk/cn10k_eventdev.c | 30 +++++++++++++ > drivers/event/cnxk/cn10k_eventdev.h | 3 ++ > drivers/event/cnxk/cnxk_eventdev.c | 66 ++++++++++++++++++++++++++--- > drivers/event/cnxk/cnxk_eventdev.h | 11 ++++- > 12 files changed, 207 insertions(+), 10 deletions(-) > create mode 100644 drivers/common/cnxk/roc_features.h > > diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst > index a91c947c10..c3c0bfd7b5 100644 > --- a/doc/guides/eventdevs/cnxk.rst > +++ b/doc/guides/eventdevs/cnxk.rst > @@ -102,6 +102,20 @@ Runtime Config Options > > -a 0002:0e:00.0,qos=[1-50-50] > > +- ``CN10K WQE stashing support`` > + > + CN10K supports stashing the scheduled WQE carried by `rte_event` to the > + cores L2 Dcache. The number of cache lines to be stashed and the offset > + is configurable per HWGRP i.e. event queue. The dictionary format is as > + follows `[Qx|stash_offset|stash_length]` here the stash offset can be > + a negative integer. > + By default stashing is not enabled. > + > + For example:: > + > + For stashing mbuf on queue 0 and mbuf + headroom on queue 1 > + -a 0002:0e:00.0,stash="[0|-1|1][1|-1|2]" > + > - ``Force Rx Back pressure`` > > Force Rx back pressure when same mempool is used across ethernet device > diff --git a/drivers/common/cnxk/roc_api.h b/drivers/common/cnxk/roc_api.h > index 9d7f5417c2..993a2f7a68 100644 > --- a/drivers/common/cnxk/roc_api.h > +++ b/drivers/common/cnxk/roc_api.h > @@ -47,6 +47,9 @@ > /* HW Errata */ > #include "roc_errata.h" > > +/* HW Features */ > +#include "roc_features.h" > + > /* Mbox */ > #include "roc_mbox.h" > > diff --git a/drivers/common/cnxk/roc_features.h > b/drivers/common/cnxk/roc_features.h > new file mode 100644 > index 0000000000..f3948af45e > --- /dev/null > +++ b/drivers/common/cnxk/roc_features.h > @@ -0,0 +1,13 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2022 Marvell. > + */ > +#ifndef _ROC_FEATURES_H_ > +#define _ROC_FEATURES_H_ > + > +static inline bool > +roc_feature_sso_has_stash(void) > +{ > + return (roc_model_is_cn103xx() | roc_model_is_cn10ka_b0()) ? true : > false; > +} > + > +#endif > diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h > index a6091a98c1..af3c10b0b0 100644 > --- a/drivers/common/cnxk/roc_mbox.h > +++ b/drivers/common/cnxk/roc_mbox.h > @@ -127,6 +127,8 @@ struct mbox_msghdr { > M(SSO_CONFIG_LSW, 0x612, ssow_config_lsw, ssow_config_lsw, msg_rsp) > \ > M(SSO_HWS_CHNG_MSHIP, 0x613, ssow_chng_mship, ssow_chng_mship, > \ > msg_rsp) > \ > + M(SSO_GRP_STASH_CONFIG, 0x614, sso_grp_stash_config, > \ > + sso_grp_stash_cfg, msg_rsp) > \ > /* TIM mbox IDs (range 0x800 - 0x9FF) */ > \ > M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req, > \ > tim_lf_alloc_rsp) > \ > @@ -1374,6 +1376,14 @@ struct sso_grp_qos_cfg { > uint16_t __io iaq_thr; > }; > > +struct sso_grp_stash_cfg { > + struct mbox_msghdr hdr; > + uint16_t __io grp; > + uint8_t __io ena; > + uint8_t __io offset : 4; > + uint8_t __io num_linesm1 : 4; > +}; > + > struct sso_grp_stats { > struct mbox_msghdr hdr; > uint16_t __io grp; > diff --git a/drivers/common/cnxk/roc_model.h b/drivers/common/cnxk/roc_model.h > index 081c03915c..ba0fe577c0 100644 > --- a/drivers/common/cnxk/roc_model.h > +++ b/drivers/common/cnxk/roc_model.h > @@ -244,6 +244,12 @@ roc_model_is_cn10kb(void) > return roc_model->flag & ROC_MODEL_CN103xx; > } > > +static inline uint64_t > +roc_model_is_cn103xx(void) > +{ > + return roc_model->flag & ROC_MODEL_CN103xx; > +} > + > static inline bool > roc_env_is_hw(void) > { > diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c > index 9e466f09ef..4a6a5080f7 100644 > --- a/drivers/common/cnxk/roc_sso.c > +++ b/drivers/common/cnxk/roc_sso.c > @@ -414,8 +414,7 @@ roc_sso_hwgrp_hws_link_status(struct roc_sso *roc_sso, > uint8_t hws, > } > > int > -roc_sso_hwgrp_qos_config(struct roc_sso *roc_sso, struct roc_sso_hwgrp_qos > *qos, > - uint8_t nb_qos) > +roc_sso_hwgrp_qos_config(struct roc_sso *roc_sso, struct roc_sso_hwgrp_qos > *qos, uint16_t nb_qos) > { > struct sso *sso = roc_sso_to_sso_priv(roc_sso); > struct dev *dev = &sso->dev; > @@ -423,6 +422,9 @@ roc_sso_hwgrp_qos_config(struct roc_sso *roc_sso, struct > roc_sso_hwgrp_qos *qos, > struct mbox *mbox; > int i, rc; > > + if (!nb_qos) > + return 0; > + > mbox = mbox_get(dev->mbox); > for (i = 0; i < nb_qos; i++) { > uint8_t iaq_prcnt = qos[i].iaq_prcnt; > @@ -748,6 +750,49 @@ sso_update_msix_vec_count(struct roc_sso *roc_sso, > uint16_t sso_vec_cnt) > return rc; > } > > +int > +roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso, struct > roc_sso_hwgrp_stash *stash, > + uint16_t nb_stash) > +{ > + struct sso *sso = roc_sso_to_sso_priv(roc_sso); > + struct sso_grp_stash_cfg *req; > + struct dev *dev = &sso->dev; > + struct mbox *mbox; > + int i, rc; > + > + if (!nb_stash) > + return 0; > + > + mbox = mbox_get(dev->mbox); > + for (i = 0; i < nb_stash; i++) { > + req = mbox_alloc_msg_sso_grp_stash_config(mbox); > + if (req == NULL) { > + rc = mbox_process(mbox); > + if (rc) { > + rc = -EIO; > + goto fail; > + } > + > + req = mbox_alloc_msg_sso_grp_stash_config(mbox); > + if (req == NULL) { > + rc = -ENOSPC; > + goto fail; > + } > + } > + req->ena = true; > + req->grp = stash[i].hwgrp; > + req->offset = stash[i].stash_offset; > + req->num_linesm1 = stash[i].stash_count - 1; > + } > + > + rc = mbox_process(mbox); > + if (rc) > + rc = -EIO; > +fail: > + mbox_put(mbox); > + return rc; > +} > + > int > roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, uint16_t > nb_hwgrp, uint16_t nb_tim_lfs) > { > diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h > index 4699908c25..e67797b046 100644 > --- a/drivers/common/cnxk/roc_sso.h > +++ b/drivers/common/cnxk/roc_sso.h > @@ -14,6 +14,12 @@ struct roc_sso_hwgrp_qos { > uint8_t taq_prcnt; > }; > > +struct roc_sso_hwgrp_stash { > + uint16_t hwgrp; > + uint8_t stash_offset; > + uint8_t stash_count; > +}; > + > struct roc_sso_hws_stats { > uint64_t arbitration; > }; > @@ -66,7 +72,7 @@ int __roc_api roc_sso_rsrc_init(struct roc_sso *roc_sso, > uint8_t nb_hws, uint16_ > void __roc_api roc_sso_rsrc_fini(struct roc_sso *roc_sso); > int __roc_api roc_sso_hwgrp_qos_config(struct roc_sso *roc_sso, > struct roc_sso_hwgrp_qos *qos, > - uint8_t nb_qos); > + uint16_t nb_qos); > int __roc_api roc_sso_hwgrp_alloc_xaq(struct roc_sso *roc_sso, > uint32_t npa_aura_id, uint16_t hwgrps); > int __roc_api roc_sso_hwgrp_release_xaq(struct roc_sso *roc_sso, > @@ -88,6 +94,9 @@ int __roc_api roc_sso_hwgrp_init_xaq_aura(struct roc_sso > *roc_sso, > uint32_t nb_xae); > int __roc_api roc_sso_hwgrp_free_xaq_aura(struct roc_sso *roc_sso, > uint16_t nb_hwgrp); > +int __roc_api roc_sso_hwgrp_stash_config(struct roc_sso *roc_sso, > + struct roc_sso_hwgrp_stash *stash, > + uint16_t nb_stash); > > /* Debug */ > void __roc_api roc_sso_dump(struct roc_sso *roc_sso, uint8_t nb_hws, > diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map > index 5d2b75fb5a..381bbb0b6c 100644 > --- a/drivers/common/cnxk/version.map > +++ b/drivers/common/cnxk/version.map > @@ -379,6 +379,7 @@ INTERNAL { > roc_sso_hwgrp_qos_config; > roc_sso_hwgrp_release_xaq; > roc_sso_hwgrp_set_priority; > + roc_sso_hwgrp_stash_config; > roc_sso_hwgrp_stats_get; > roc_sso_hws_base_get; > roc_sso_hws_link; > diff --git a/drivers/event/cnxk/cn10k_eventdev.c > b/drivers/event/cnxk/cn10k_eventdev.c > index 8e74edff55..a9d2e5463f 100644 > --- a/drivers/event/cnxk/cn10k_eventdev.c > +++ b/drivers/event/cnxk/cn10k_eventdev.c > @@ -786,6 +786,24 @@ cn10k_sso_port_unlink(struct rte_eventdev *event_dev, > void *port, > return (int)nb_unlinks; > } > > +static void > +cn10k_sso_configure_queue_stash(struct rte_eventdev *event_dev) > +{ > + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); > + struct roc_sso_hwgrp_stash stash[dev->stash_cnt]; > + int i, rc; > + > + plt_sso_dbg(); > + for (i = 0; i < dev->stash_cnt; i++) { > + stash[i].hwgrp = dev->stash_parse_data[i].queue; > + stash[i].stash_offset = dev->stash_parse_data[i].stash_offset; > + stash[i].stash_count = dev->stash_parse_data[i].stash_length; > + } > + rc = roc_sso_hwgrp_stash_config(&dev->sso, stash, dev->stash_cnt); > + if (rc < 0) > + plt_warn("failed to configure HWGRP WQE stashing rc = %d", > rc); > +} > + > static int > cn10k_sso_start(struct rte_eventdev *event_dev) > { > @@ -795,6 +813,7 @@ cn10k_sso_start(struct rte_eventdev *event_dev) > if (rc < 0) > return rc; > > + cn10k_sso_configure_queue_stash(event_dev); > rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset, > cn10k_sso_hws_flush_events); > if (rc < 0) > @@ -866,6 +885,8 @@ cn10k_sso_rx_adapter_queue_add( > int32_t rx_queue_id, > const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) > { > + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); > + struct roc_sso_hwgrp_stash stash; > struct cn10k_eth_rxq *rxq; > uint64_t meta_aura; > void *lookup_mem; > @@ -884,6 +905,14 @@ cn10k_sso_rx_adapter_queue_add( > meta_aura = rxq->meta_aura; > cn10k_sso_set_priv_mem(event_dev, lookup_mem, meta_aura); > cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); > + if (roc_feature_sso_has_stash()) { > + stash.hwgrp = queue_conf->ev.queue_id; > + stash.stash_offset = CN10K_SSO_DEFAULT_STASH_OFFSET; > + stash.stash_count = CN10K_SSO_DEFAULT_STASH_LENGTH; > + rc = roc_sso_hwgrp_stash_config(&dev->sso, &stash, 1); > + if (rc < 0) > + plt_warn("failed to configure HWGRP WQE stashing rc = > %d", rc); > + } > > return 0; > } > @@ -1226,6 +1255,7 @@ RTE_PMD_REGISTER_PARAM_STRING(event_cn10k, > CNXK_SSO_XAE_CNT "=<int>" > CNXK_SSO_GGRP_QOS "=<string>" > CNXK_SSO_FORCE_BP "=1" > CN10K_SSO_GW_MODE "=<int>" > + CN10K_SSO_STASH "=<string>" > CNXK_TIM_DISABLE_NPA "=1" > CNXK_TIM_CHNK_SLOTS "=<int>" > CNXK_TIM_RINGS_LMT "=<int>" > diff --git a/drivers/event/cnxk/cn10k_eventdev.h > b/drivers/event/cnxk/cn10k_eventdev.h > index 5fb6f0a6f2..aaa01d1ec1 100644 > --- a/drivers/event/cnxk/cn10k_eventdev.h > +++ b/drivers/event/cnxk/cn10k_eventdev.h > @@ -5,6 +5,9 @@ > #ifndef __CN10K_EVENTDEV_H__ > #define __CN10K_EVENTDEV_H__ > > +#define CN10K_SSO_DEFAULT_STASH_OFFSET -1 > +#define CN10K_SSO_DEFAULT_STASH_LENGTH 2 > + > struct cn10k_sso_hws { > uint64_t base; > uint64_t gw_rdata; > diff --git a/drivers/event/cnxk/cnxk_eventdev.c > b/drivers/event/cnxk/cnxk_eventdev.c > index 11bcd792ba..5f1fa1e96d 100644 > --- a/drivers/event/cnxk/cnxk_eventdev.c > +++ b/drivers/event/cnxk/cnxk_eventdev.c > @@ -459,6 +459,8 @@ cnxk_sso_close(struct rte_eventdev *event_dev, > cnxk_sso_unlink_t unlink_fn) > return 0; > } > > +typedef void (*param_parse_t)(char *value, void *opaque); > + > static void > parse_queue_param(char *value, void *opaque) > { > @@ -496,7 +498,44 @@ parse_queue_param(char *value, void *opaque) > } > > static void > -parse_qos_list(const char *value, void *opaque) > +parse_stash_param(char *value, void *opaque) > +{ > + struct cnxk_sso_stash queue_stash = {0}; > + struct cnxk_sso_evdev *dev = opaque; > + struct cnxk_sso_stash *old_ptr; > + char *tok = strtok(value, "|"); > + uint16_t *val; > + > + if (!strlen(value)) > + return; > + > + val = (uint16_t *)&queue_stash; > + while (tok != NULL) { > + *val = atoi(tok); > + tok = strtok(NULL, "|"); > + val++; > + } > + > + if (val != (&queue_stash.stash_length + 1)) { > + plt_err("Invalid QoS parameter expected > [Qx|stash_offset|stash_length]"); > + return; > + } > + > + dev->stash_cnt++; > + old_ptr = dev->stash_parse_data; > + dev->stash_parse_data = > + rte_realloc(dev->stash_parse_data, > + sizeof(struct cnxk_sso_stash) * dev->stash_cnt, > 0); > + if (dev->stash_parse_data == NULL) { > + dev->stash_parse_data = old_ptr; > + dev->stash_cnt--; > + return; > + } > + dev->stash_parse_data[dev->stash_cnt - 1] = queue_stash; > +} > + > +static void > +parse_list(const char *value, void *opaque, param_parse_t fn) > { > char *s = strdup(value); > char *start = NULL; > @@ -511,7 +550,7 @@ parse_qos_list(const char *value, void *opaque) > > if (start && start < end) { > *end = 0; > - parse_queue_param(start + 1, opaque); > + fn(start + 1, opaque); > s = end; > start = end; > } > @@ -522,14 +561,27 @@ parse_qos_list(const char *value, void *opaque) > } > > static int > -parse_sso_kvargs_dict(const char *key, const char *value, void *opaque) > +parse_sso_kvargs_qos_dict(const char *key, const char *value, void *opaque) > { > RTE_SET_USED(key); > > /* Dict format [Qx-TAQ-IAQ][Qz-TAQ-IAQ] use '-' cause ',' isn't > allowed. > * Everything is expressed in percentages, 0 represents default. > */ > - parse_qos_list(value, opaque); > + parse_list(value, opaque, parse_queue_param); > + > + return 0; > +} > + > +static int > +parse_sso_kvargs_stash_dict(const char *key, const char *value, void *opaque) > +{ > + RTE_SET_USED(key); > + > + /* Dict format [Qx|<stash_offset>|<stash_length>] use '|' cause ',' > + * isn't allowed. > + */ > + parse_list(value, opaque, parse_stash_param); > > return 0; > } > @@ -548,14 +600,16 @@ cnxk_sso_parse_devargs(struct cnxk_sso_evdev *dev, > struct rte_devargs *devargs) > > rte_kvargs_process(kvlist, CNXK_SSO_XAE_CNT, &parse_kvargs_value, > &dev->xae_cnt); > - rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, &parse_sso_kvargs_dict, > - dev); > + rte_kvargs_process(kvlist, CNXK_SSO_GGRP_QOS, > + &parse_sso_kvargs_qos_dict, dev); > rte_kvargs_process(kvlist, CNXK_SSO_FORCE_BP, &parse_kvargs_flag, > &dev->force_ena_bp); > rte_kvargs_process(kvlist, CN9K_SSO_SINGLE_WS, &parse_kvargs_flag, > &single_ws); > rte_kvargs_process(kvlist, CN10K_SSO_GW_MODE, &parse_kvargs_flag, > &dev->gw_mode); > + rte_kvargs_process(kvlist, CN10K_SSO_STASH, > + &parse_sso_kvargs_stash_dict, dev); > dev->dual_ws = !single_ws; > rte_kvargs_free(kvlist); > } > diff --git a/drivers/event/cnxk/cnxk_eventdev.h > b/drivers/event/cnxk/cnxk_eventdev.h > index d93fdcda25..c7cbd722ab 100644 > --- a/drivers/event/cnxk/cnxk_eventdev.h > +++ b/drivers/event/cnxk/cnxk_eventdev.h > @@ -31,6 +31,7 @@ > #define CNXK_SSO_FORCE_BP "force_rx_bp" > #define CN9K_SSO_SINGLE_WS "single_ws" > #define CN10K_SSO_GW_MODE "gw_mode" > +#define CN10K_SSO_STASH "stash" > > #define NSEC2USEC(__ns) ((__ns) / 1E3) > #define USEC2NSEC(__us) ((__us)*1E3) > @@ -70,6 +71,12 @@ struct cnxk_sso_qos { > uint16_t iaq_prcnt; > }; > > +struct cnxk_sso_stash { > + uint16_t queue; > + uint16_t stash_offset; > + uint16_t stash_length; > +}; > + > struct cnxk_sso_evdev { > struct roc_sso sso; > uint8_t max_event_queues; > @@ -103,13 +110,15 @@ struct cnxk_sso_evdev { > struct cnxk_timesync_info *tstamp[RTE_MAX_ETHPORTS]; > /* Dev args */ > uint32_t xae_cnt; > - uint8_t qos_queue_cnt; > + uint16_t qos_queue_cnt; > struct cnxk_sso_qos *qos_parse_data; > uint8_t force_ena_bp; > /* CN9K */ > uint8_t dual_ws; > /* CN10K */ > uint8_t gw_mode; > + uint16_t stash_cnt; > + struct cnxk_sso_stash *stash_parse_data; > /* Crypto adapter */ > uint8_t is_ca_internal_port; > } __rte_cache_aligned; > -- > 2.25.1 >