Hi Akhil, 

> -----Original Message-----
> From: Akhil Goyal <gak...@marvell.com>
> Sent: Monday, May 30, 2022 12:40 AM
> To: Chautru, Nicolas <nicolas.chau...@intel.com>; dev@dpdk.org;
> t...@redhat.com; maxime.coque...@redhat.com
> Cc: tho...@monjalon.net; Kinsella, Ray <ray.kinse...@intel.com>;
> Richardson, Bruce <bruce.richard...@intel.com>;
> hemant.agra...@nxp.com; Vargas, Hernan <hernan.var...@intel.com>;
> david.march...@redhat.com
> Subject: RE: [EXT] [PATCH v6 3/5] baseband/acc100: introduce PMD for
> ACC101
> 
> >
> >
> >  Enable Virtual Functions
> > @@ -167,14 +172,14 @@ queues, priorities, load balance, bandwidth and
> > other settings necessary for the  device to perform FEC functions.
> >
> >  This configuration needs to be executed at least once after reboot or
> > PCI FLR and can -be achieved by using the function
> > ``acc100_configure()``, which sets up the -parameters defined in
> > ``acc100_conf`` structure.
> > +be achieved by using the functions ``acc100_configure()`` or
> > ``acc101_configure()``,
> 
> I believe the API for user is common now. Right? So why to have two
> references?

Thanks, I will amend the documentation now. 

> 
> > +which sets up the parameters defined in the compatible
> > +``acc100_conf``
> > structure.
> >
> >  Test Application
> >  ----------------
> >
> >  BBDEV provides a test application, ``test-bbdev.py`` and range of
> > test data for testing -the functionality of ACC100 5G/4G FEC encode
> > and decode, depending on the device's
> > +the functionality of the device 5G/4G FEC encode and decode,
> > +depending on
> > the device's
> >  capabilities. The test application is located under app->test-bbdev
> > folder and has the  following options:
> >
> > @@ -212,7 +217,7 @@ Test Vectors
> >
> >  In addition to the simple LDPC decoder and LDPC encoder tests, bbdev
> > also provides  a range of additional tests under the test_vectors
> > folder, which may be useful.
> > The results
> > -of these tests will depend on the ACC100 5G/4G FEC capabilities which
> > may cause some
> > +of these tests will depend on the device 5G/4G FEC capabilities which
> > +may
> > cause some
> >  testcases to be skipped, but no failure should be reported.
> >
> >
> > @@ -233,3 +238,11 @@ Specifically for the BBDEV ACC100 PMD, the
> > command below can be used:
> >
> >    ./pf_bb_config ACC100 -c acc100/acc100_config_vf_5g.cfg
> >    ./test-bbdev.py -e="-c 0xff0 -a${VF_PCI_ADDR}" -c validation -n 64
> > -b 32 -l 1 -v ./ldpc_dec_default.data
> > +
> > +Specifically for the BBDEV ACC101 PMD, the command below can be used:
> > +
> > +.. code-block:: console
> > +
> > +  ./pf_bb_config ACC101 -c acc101/acc101_config_2vf_4g5g.cfg
> > + ./test-bbdev.py -e="-c 0xff0 -a${VF_PCI_ADDR}" -c validation -n 64
> > + -b 32 -l 1 -
> > v ./ldpc_dec_default.data
> > +
> > diff --git a/doc/guides/bbdevs/features/acc101.ini
> > b/doc/guides/bbdevs/features/acc101.ini
> > new file mode 100644
> > index 0000000..0e2c21a
> > --- /dev/null
> > +++ b/doc/guides/bbdevs/features/acc101.ini
> > @@ -0,0 +1,13 @@
> > +;
> > +; Supported features of the 'acc101' bbdev driver.
> > +;
> > +; Refer to default.ini for the full list of available PMD features.
> > +;
> > +[Features]
> > +Turbo Decoder (4G)     = Y
> > +Turbo Encoder (4G)     = Y
> > +LDPC Decoder (5G)      = Y
> > +LDPC Encoder (5G)      = Y
> > +LLR/HARQ Compression   = Y
> > +External DDR Access    = Y
> > +HW Accelerated         = Y
> > diff --git a/doc/guides/rel_notes/release_22_07.rst
> > b/doc/guides/rel_notes/release_22_07.rst
> > index e49cace..1803947 100644
> > --- a/doc/guides/rel_notes/release_22_07.rst
> > +++ b/doc/guides/rel_notes/release_22_07.rst
> > @@ -104,6 +104,9 @@ New Features
> >    * ``RTE_EVENT_QUEUE_ATTR_WEIGHT``
> >    * ``RTE_EVENT_QUEUE_ATTR_AFFINITY``
> >
> > +* **Added Intel ACC101 baseband PMD.**
> > +
> > +  * Added a new baseband PMD for Intel ACC101 device.
> >
> >  Removed Items
> >  -------------
> > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> > b/drivers/baseband/acc100/rte_acc100_pmd.c
> > index 3fdf17d..6a2123b 100644
> > --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> > +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> > @@ -22,6 +22,7 @@
> >  #include <rte_bbdev.h>
> >  #include <rte_bbdev_pmd.h>
> >  #include "rte_acc100_pmd.h"
> > +#include "rte_acc101_pmd.h"
> >
> >  #ifdef RTE_LIBRTE_BBDEV_DEBUG
> >  RTE_LOG_REGISTER_DEFAULT(acc100_logtype, DEBUG); @@ -1133,7
> +1134,10
> > @@
> >  /* ACC100 PCI PF address map */
> >  static struct rte_pci_id pci_id_acc100_pf_map[] = {
> >     {
> > -           RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID,
> > RTE_ACC100_PF_DEVICE_ID)
> > +           RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID,
> > RTE_ACC100_PF_DEVICE_ID),
> > +   },
> > +   {
> > +           RTE_PCI_DEVICE(RTE_ACC101_VENDOR_ID,
> > RTE_ACC101_PF_DEVICE_ID),
> >     },
> >     {.device_id = 0},
> >  };
> > @@ -1141,7 +1145,10 @@
> >  /* ACC100 PCI VF address map */
> >  static struct rte_pci_id pci_id_acc100_vf_map[] = {
> >     {
> > -           RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID,
> > RTE_ACC100_VF_DEVICE_ID)
> > +           RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID,
> > RTE_ACC100_VF_DEVICE_ID),
> > +   },
> > +   {
> > +           RTE_PCI_DEVICE(RTE_ACC101_VENDOR_ID,
> > RTE_ACC101_VF_DEVICE_ID),
> >     },
> >     {.device_id = 0},
> >  };
> > @@ -1290,7 +1297,7 @@
> >
> >  /* Fill in a frame control word for LDPC decoding. */  static inline
> > void -acc100_fcw_ld_fill(const struct rte_bbdev_dec_op *op, struct
> > acc100_fcw_ld *fcw,
> > +acc100_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld
> > +*fcw,
> >             union acc100_harq_layout_data *harq_layout)  {
> >     uint16_t harq_out_length, harq_in_length, ncb_p, k0_p,
> > parity_offset; @@ -1414,6 +1421,128 @@
> >     }
> >  }
> >
> > +/* Convert offset to harq index for harq_layout structure */ static
> > +inline uint32_t hq_index(uint32_t offset) {
> > +   return (offset >> ACC100_HARQ_OFFSET_SHIFT) &
> > ACC100_HARQ_OFFSET_MASK;
> > +}
> > +
> > +/* Fill in a frame control word for LDPC decoding for ACC101 */
> > +static inline void acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op,
> > +struct acc100_fcw_ld *fcw,
> > +           union acc100_harq_layout_data *harq_layout) {
> > +   uint16_t harq_out_length, harq_in_length, ncb_p, k0_p,
> parity_offset;
> > +   uint32_t harq_index;
> > +   uint32_t l;
> > +
> > +   fcw->qm = op->ldpc_dec.q_m;
> > +   fcw->nfiller = op->ldpc_dec.n_filler;
> > +   fcw->BG = (op->ldpc_dec.basegraph - 1);
> > +   fcw->Zc = op->ldpc_dec.z_c;
> > +   fcw->ncb = op->ldpc_dec.n_cb;
> > +   fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_dec.basegraph,
> > +                   op->ldpc_dec.rv_index);
> > +   if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK)
> > +           fcw->rm_e = op->ldpc_dec.cb_params.e;
> > +   else
> > +           fcw->rm_e = (op->ldpc_dec.tb_params.r <
> > +                           op->ldpc_dec.tb_params.cab) ?
> > +                                           op->ldpc_dec.tb_params.ea :
> > +                                           op->ldpc_dec.tb_params.eb;
> > +
> > +   if (unlikely(check_bit(op->ldpc_dec.op_flags,
> > +                   RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE) &&
> > +                   (op->ldpc_dec.harq_combined_input.length == 0))) {
> > +           rte_bbdev_log(WARNING, "Null HARQ input size provided");
> > +           /* Disable HARQ input in that case to carry forward */
> > +           op->ldpc_dec.op_flags ^=
> > RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
> > +   }
> > +
> > +   fcw->hcin_en = check_bit(op->ldpc_dec.op_flags,
> > +                   RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE);
> > +   fcw->hcout_en = check_bit(op->ldpc_dec.op_flags,
> > +                   RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
> > +   fcw->crc_select = check_bit(op->ldpc_dec.op_flags,
> > +                   RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK);
> > +   fcw->bypass_dec = check_bit(op->ldpc_dec.op_flags,
> > +                   RTE_BBDEV_LDPC_DECODE_BYPASS);
> > +   fcw->bypass_intlv = check_bit(op->ldpc_dec.op_flags,
> > +                   RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS);
> > +   if (op->ldpc_dec.q_m == 1) {
> > +           fcw->bypass_intlv = 1;
> > +           fcw->qm = 2;
> > +   }
> > +   fcw->hcin_decomp_mode = check_bit(op->ldpc_dec.op_flags,
> > +                   RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION);
> > +   fcw->hcout_comp_mode = check_bit(op->ldpc_dec.op_flags,
> > +                   RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION);
> > +   fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags,
> > +                   RTE_BBDEV_LDPC_LLR_COMPRESSION);
> > +   harq_index = hq_index(op-
> >ldpc_dec.harq_combined_output.offset);
> > +   if (fcw->hcin_en > 0) {
> > +           harq_in_length = op-
> >ldpc_dec.harq_combined_input.length;
> > +           if (fcw->hcin_decomp_mode > 0)
> > +                   harq_in_length = harq_in_length * 8 / 6;
> > +           harq_in_length = RTE_MIN(harq_in_length, op-
> >ldpc_dec.n_cb
> > +                           - op->ldpc_dec.n_filler);
> > +           /* Alignment on next 64B - Already enforced from HC output
> */
> > +           harq_in_length = RTE_ALIGN_FLOOR(harq_in_length, 64);
> > +           fcw->hcin_size0 = harq_in_length;
> > +           fcw->hcin_offset = 0;
> > +           fcw->hcin_size1 = 0;
> > +   } else {
> > +           fcw->hcin_size0 = 0;
> > +           fcw->hcin_offset = 0;
> > +           fcw->hcin_size1 = 0;
> > +   }
> > +
> > +   fcw->itmax = op->ldpc_dec.iter_max;
> > +   fcw->itstop = check_bit(op->ldpc_dec.op_flags,
> > +                   RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);
> > +   fcw->synd_precoder = fcw->itstop;
> > +   /*
> > +    * These are all implicitly set
> > +    * fcw->synd_post = 0;
> > +    * fcw->so_en = 0;
> > +    * fcw->so_bypass_rm = 0;
> > +    * fcw->so_bypass_intlv = 0;
> > +    * fcw->dec_convllr = 0;
> > +    * fcw->hcout_convllr = 0;
> > +    * fcw->hcout_size1 = 0;
> > +    * fcw->so_it = 0;
> > +    * fcw->hcout_offset = 0;
> > +    * fcw->negstop_th = 0;
> > +    * fcw->negstop_it = 0;
> > +    * fcw->negstop_en = 0;
> > +    * fcw->gain_i = 1;
> > +    * fcw->gain_h = 1;
> > +    */
> > +   if (fcw->hcout_en > 0) {
> > +           parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8)
> > +                   * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
> > +           k0_p = (fcw->k0 > parity_offset) ?
> > +                           fcw->k0 - op->ldpc_dec.n_filler : fcw->k0;
> > +           ncb_p = fcw->ncb - op->ldpc_dec.n_filler;
> > +           l = RTE_MIN(k0_p + fcw->rm_e, INT16_MAX);
> > +           harq_out_length = (uint16_t) fcw->hcin_size0;
> > +           harq_out_length = RTE_MAX(harq_out_length, l);
> > +           /* Cannot exceed the pruned Ncb circular buffer */
> > +           harq_out_length = RTE_MIN(harq_out_length, ncb_p);
> > +           /* Alignment on next 64B */
> > +           harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64);
> > +           fcw->hcout_size0 = harq_out_length;
> > +           fcw->hcout_size1 = 0;
> > +           fcw->hcout_offset = 0;
> > +           harq_layout[harq_index].offset = fcw->hcout_offset;
> > +           harq_layout[harq_index].size0 = fcw->hcout_size0;
> > +   } else {
> > +           fcw->hcout_size0 = 0;
> > +           fcw->hcout_size1 = 0;
> > +           fcw->hcout_offset = 0;
> > +   }
> > +}
> > +
> >  /**
> >   * Fills descriptor with data pointers of one block type.
> >   *
> > @@ -2966,7 +3095,7 @@
> >             struct acc100_fcw_ld *fcw;
> >             uint32_t seg_total_left;
> >             fcw = &desc->req.fcw_ld;
> > -           acc100_fcw_ld_fill(op, fcw, harq_layout);
> > +           q->d->fcw_ld_fill(op, fcw, harq_layout);
> >
> >             /* Special handling when overusing mbuf */
> >             if (fcw->rm_e < ACC100_MAX_E_MBUF)
> > @@ -3033,7 +3162,7 @@
> >     desc = q->ring_addr + desc_idx;
> >     uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
> >     union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
> > -   acc100_fcw_ld_fill(op, &desc->req.fcw_ld, harq_layout);
> > +   q->d->fcw_ld_fill(op, &desc->req.fcw_ld, harq_layout);
> >
> >     input = op->ldpc_dec.input.data;
> >     h_output_head = h_output = op->ldpc_dec.hard_output.data; @@ -
> 4145,9
> > +4274,19 @@
> >     dev->dequeue_ldpc_enc_ops = acc100_dequeue_ldpc_enc;
> >     dev->dequeue_ldpc_dec_ops = acc100_dequeue_ldpc_dec;
> >
> > +   /* Device variant specific handling */
> > +   if ((pci_dev->id.device_id == RTE_ACC100_PF_DEVICE_ID) ||
> > +                   (pci_dev->id.device_id ==
> RTE_ACC100_VF_DEVICE_ID))
> > {
> > +           ((struct acc100_device *) dev->data->dev_private)-
> > >device_variant = ACC100_VARIANT;
> > +           ((struct acc100_device *) dev->data->dev_private)-
> >fcw_ld_fill
> > = acc100_fcw_ld_fill;
> > +   } else {
> > +           ((struct acc100_device *) dev->data->dev_private)-
> > >device_variant = ACC101_VARIANT;
> > +           ((struct acc100_device *) dev->data->dev_private)-
> >fcw_ld_fill
> > = acc101_fcw_ld_fill;
> > +   }
> > +
> >     ((struct acc100_device *) dev->data->dev_private)->pf_device =
> > -                   !strcmp(drv->driver.name,
> > -                                   RTE_STR(ACC100PF_DRIVER_NAME));
> > +                   !strcmp(drv->driver.name,
> > RTE_STR(ACC100PF_DRIVER_NAME));
> > +
> >     ((struct acc100_device *) dev->data->dev_private)->mmio_base =
> >                     pci_dev->mem_resource[0].addr;
> >
> > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.h
> > b/drivers/baseband/acc100/rte_acc100_pmd.h
> > index 8fea322..39d5f22 100644
> > --- a/drivers/baseband/acc100/rte_acc100_pmd.h
> > +++ b/drivers/baseband/acc100/rte_acc100_pmd.h
> > @@ -22,6 +22,9 @@
> >  #define rte_bbdev_log_debug(fmt, ...)  #endif
> >
> > +#define ACC100_VARIANT 0
> > +#define ACC101_VARIANT 1
> 
> Since you are using PCI dev id for identifying the device. Do we still need
> above defines?

The implementation is_acc100() is based on checking that enum 
        (q->d->device_variant == ACC100_VARIANT);

> > +
> >  /* ACC100 PF and VF driver names */
> >  #define ACC100PF_DRIVER_NAME           intel_acc100_pf
> >  #define ACC100VF_DRIVER_NAME           intel_acc100_vf
> > @@ -67,6 +70,8 @@
> >  #define ACC100_HARQ_LAYOUT             (64*1024*1024)
> >  /* Assume offset for HARQ in memory */
> >  #define ACC100_HARQ_OFFSET             (32*1024)
> > +#define ACC100_HARQ_OFFSET_SHIFT       15
> > +#define ACC100_HARQ_OFFSET_MASK        0x7ffffff
> >  /* Mask used to calculate an index in an Info Ring array (not a byte 
> > offset)
> */
> >  #define ACC100_INFO_RING_MASK
> (ACC100_INFO_RING_NUM_ENTRIES-
> > 1)
> >  /* Number of Virtual Functions ACC100 supports */ @@ -574,6 +579,10
> > @@ struct __rte_cache_aligned acc100_queue {
> >     struct acc100_device *d;
> >  };
> >
> > +typedef void (*acc10x_fcw_ld_fill_fun_t)(struct rte_bbdev_dec_op *op,
> > +           struct acc100_fcw_ld *fcw,
> > +           union acc100_harq_layout_data *harq_layout);
> > +
> >  /* Private data structure for each ACC100 device */  struct
> > acc100_device {
> >     void *mmio_base;  /**< Base address of MMIO registers (BAR0) */
> @@
> > -605,6 +614,8 @@ struct acc100_device {
> >     uint16_t q_assigned_bit_map[ACC100_NUM_QGRPS];
> >     bool pf_device; /**< True if this is a PF ACC100 device */
> >     bool configured; /**< True if this ACC100 device is configured */
> > +   uint16_t device_variant;  /**< Device variant */
> > +   acc10x_fcw_ld_fill_fun_t fcw_ld_fill;  /**< 5GUL FCW generation
> > function */
> >  };
> >
> >  /**
> > diff --git a/drivers/baseband/acc100/rte_acc101_pmd.h
> > b/drivers/baseband/acc100/rte_acc101_pmd.h
> > new file mode 100644
> > index 0000000..8f1f4ab
> > --- /dev/null
> > +++ b/drivers/baseband/acc100/rte_acc101_pmd.h
> 
> This file is internal, shouldn't we drop the rte?

There is lack of consistency on the filenaming really historically, still will 
update the new file. Thanks

> 
> 
> > @@ -0,0 +1,55 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2022 Intel Corporation  */
> > +
> > +/* ACC101 PCI vendor & device IDs */
> > +#define RTE_ACC101_VENDOR_ID           (0x8086)
> > +#define RTE_ACC101_PF_DEVICE_ID        (0x57c4)
> > +#define RTE_ACC101_VF_DEVICE_ID        (0x57c5)
> > +
> > +/* Define as 1 to use only a single FEC engine */ #ifndef
> > +RTE_ACC101_SINGLE_FEC #define RTE_ACC101_SINGLE_FEC 0 #endif
> 
> Also the above defines should drop RTE if they are internal symbols.

This is historical as well but will update on current code first with 
additional commit.
Thanks 

> 
> > +
> > +/* Number of Virtual Functions ACC101 supports */
> > +#define ACC101_NUM_VFS                  16
> > +#define ACC101_NUM_QGRPS                8
> > +#define ACC101_NUM_AQS                  16
> > +/* All ACC101 Registers alignment are 32bits = 4B */
> > +#define ACC101_BYTES_IN_WORD                 4
> > +
> > +#define ACC101_TMPL_PRI_0      0x03020100
> > +#define ACC101_TMPL_PRI_1      0x07060504
> > +#define ACC101_TMPL_PRI_2      0x0b0a0908
> > +#define ACC101_TMPL_PRI_3      0x0f0e0d0c
> > +#define ACC101_WORDS_IN_ARAM_SIZE (128 * 1024 / 4)
> > +
> > +#define ACC101_NUM_TMPL       32
> > +/* Mapping of signals for the available engines */
> > +#define ACC101_SIG_UL_5G      0
> > +#define ACC101_SIG_UL_5G_LAST 8
> > +#define ACC101_SIG_DL_5G      13
> > +#define ACC101_SIG_DL_5G_LAST 15
> > +#define ACC101_SIG_UL_4G      16
> > +#define ACC101_SIG_UL_4G_LAST 19
> > +#define ACC101_SIG_DL_4G      27
> > +#define ACC101_SIG_DL_4G_LAST 31
> > +#define ACC101_NUM_ACCS       5
> > +#define ACC101_PF_VAL         2
> > +
> > +/* ACC101 Configuration */
> > +#define ACC101_CFG_DMA_ERROR    0x3D7
> > +#define ACC101_CFG_AXI_CACHE    0x11
> > +#define ACC101_CFG_QMGR_HI_P    0x0F0F
> > +#define ACC101_CFG_PCI_AXI      0xC003
> > +#define ACC101_CFG_PCI_BRIDGE   0x40006033
> > +#define ACC101_ENGINE_OFFSET    0x1000
> > +#define ACC101_LONG_WAIT        1000
> > +#define ACC101_GPEX_AXIMAP_NUM  17
> > +#define ACC101_CLOCK_GATING_EN  0x30000
> > +#define ACC101_DMA_INBOUND      0x104
> > +/* DDR Size per VF - 512MB by default
> > + * Can be increased up to 4 GB with single PF/VF  */
> > +#define ACC101_HARQ_DDR         (512 * 1)
> > --
> > 1.8.3.1

Reply via email to