Optimization of Turbo Software driver:
- resource-hungry piece of code removed or optimized
- validation of decoder/encoder parameters put under debug flug

Signed-off-by: KamilX Chalupnik <kamilx.chalup...@intel.com>

v2:
- logging macros fixed

---
 drivers/baseband/turbo_sw/bbdev_turbo_software.c | 170 ++++++++++++++---------
 lib/librte_bbdev/rte_bbdev_op.h                  |  18 ++-
 2 files changed, 122 insertions(+), 66 deletions(-)

diff --git a/drivers/baseband/turbo_sw/bbdev_turbo_software.c 
b/drivers/baseband/turbo_sw/bbdev_turbo_software.c
index 2a65d46..8c41ed5 100644
--- a/drivers/baseband/turbo_sw/bbdev_turbo_software.c
+++ b/drivers/baseband/turbo_sw/bbdev_turbo_software.c
@@ -33,12 +33,6 @@ static int bbdev_turbo_sw_logtype;
        rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \
                ##__VA_ARGS__)
 
-/* Number of columns in sub-block interleaver (36.212, section 5.1.4.1.1) */
-#define C_SUBBLOCK (32)
-#define MAX_TB_SIZE (391656)
-#define MAX_CB_SIZE (6144)
-#define MAX_KW (18528)
-
 /* private data structure */
 struct bbdev_private {
        unsigned int max_nb_queues;  /**< Max number of queues */
@@ -91,7 +85,7 @@ compute_idx(uint16_t k)
 {
        int32_t result = 0;
 
-       if (k < 40 || k > MAX_CB_SIZE)
+       if (k < RTE_BBDEV_MIN_CB_SIZE || k > RTE_BBDEV_MAX_CB_SIZE)
                return -1;
 
        if (k > 2048) {
@@ -235,7 +229,8 @@ q_setup(struct rte_bbdev *dev, uint16_t q_id,
                return -ENAMETOOLONG;
        }
        q->enc_out = rte_zmalloc_socket(name,
-                       ((MAX_TB_SIZE >> 3) + 3) * sizeof(*q->enc_out) * 3,
+                       ((RTE_BBDEV_MAX_TB_SIZE >> 3) + 3) *
+                       sizeof(*q->enc_out) * 3,
                        RTE_CACHE_LINE_SIZE, queue_conf->socket);
        if (q->enc_out == NULL) {
                rte_bbdev_log(ERR,
@@ -254,7 +249,7 @@ q_setup(struct rte_bbdev *dev, uint16_t q_id,
                return -ENAMETOOLONG;
        }
        q->enc_in = rte_zmalloc_socket(name,
-                       (MAX_CB_SIZE >> 3) * sizeof(*q->enc_in),
+                       (RTE_BBDEV_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in),
                        RTE_CACHE_LINE_SIZE, queue_conf->socket);
        if (q->enc_in == NULL) {
                rte_bbdev_log(ERR,
@@ -272,7 +267,7 @@ q_setup(struct rte_bbdev *dev, uint16_t q_id,
                return -ENAMETOOLONG;
        }
        q->ag = rte_zmalloc_socket(name,
-                       MAX_CB_SIZE * 10 * sizeof(*q->ag),
+                       RTE_BBDEV_MAX_CB_SIZE * 10 * sizeof(*q->ag),
                        RTE_CACHE_LINE_SIZE, queue_conf->socket);
        if (q->ag == NULL) {
                rte_bbdev_log(ERR,
@@ -309,7 +304,7 @@ q_setup(struct rte_bbdev *dev, uint16_t q_id,
                return -ENAMETOOLONG;
        }
        q->deint_input = rte_zmalloc_socket(name,
-                       MAX_KW * sizeof(*q->deint_input),
+                       RTE_BBDEV_MAX_KW * sizeof(*q->deint_input),
                        RTE_CACHE_LINE_SIZE, queue_conf->socket);
        if (q->deint_input == NULL) {
                rte_bbdev_log(ERR,
@@ -328,7 +323,7 @@ q_setup(struct rte_bbdev *dev, uint16_t q_id,
                return -ENAMETOOLONG;
        }
        q->deint_output = rte_zmalloc_socket(NULL,
-                       MAX_KW * sizeof(*q->deint_output),
+                       RTE_BBDEV_MAX_KW * sizeof(*q->deint_output),
                        RTE_CACHE_LINE_SIZE, queue_conf->socket);
        if (q->deint_output == NULL) {
                rte_bbdev_log(ERR,
@@ -347,7 +342,7 @@ q_setup(struct rte_bbdev *dev, uint16_t q_id,
                return -ENAMETOOLONG;
        }
        q->adapter_output = rte_zmalloc_socket(NULL,
-                       MAX_CB_SIZE * 6 * sizeof(*q->adapter_output),
+                       RTE_BBDEV_MAX_CB_SIZE * 6 * sizeof(*q->adapter_output),
                        RTE_CACHE_LINE_SIZE, queue_conf->socket);
        if (q->adapter_output == NULL) {
                rte_bbdev_log(ERR,
@@ -396,6 +391,7 @@ static const struct rte_bbdev_ops pmd_ops = {
        .queue_release = q_release
 };
 
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
 /* Checks if the encoder input buffer is correct.
  * Returns 0 if it's valid, -1 otherwise.
  */
@@ -415,15 +411,17 @@ is_enc_input_valid(const uint16_t k, const int32_t k_idx,
                return -1;
        }
 
-       if (k > MAX_CB_SIZE) {
+       if (k > RTE_BBDEV_MAX_CB_SIZE) {
                rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d",
-                               k, MAX_CB_SIZE);
+                               k, RTE_BBDEV_MAX_CB_SIZE);
                return -1;
        }
 
        return 0;
 }
+#endif
 
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
 /* Checks if the decoder input buffer is correct.
  * Returns 0 if it's valid, -1 otherwise.
  */
@@ -442,26 +440,32 @@ is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t 
in_length)
                return -1;
        }
 
-       if (kw > MAX_KW) {
+       if (kw > RTE_BBDEV_MAX_KW) {
                rte_bbdev_log(ERR, "Input length (%u) is too big, max: %d",
-                               kw, MAX_KW);
+                               kw, RTE_BBDEV_MAX_KW);
                return -1;
        }
 
        return 0;
 }
+#endif
 
 static inline void
 process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op,
-               uint8_t cb_idx, uint8_t c, uint16_t k, uint16_t ncb,
+               uint8_t r, uint8_t c, uint16_t k, uint16_t ncb,
                uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out,
                uint16_t in_offset, uint16_t out_offset, uint16_t total_left,
                struct rte_bbdev_stats *q_stats)
 {
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
        int ret;
+#else
+       RTE_SET_USED(total_left);
+#endif
        int16_t k_idx;
        uint16_t m;
        uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out;
+       uint64_t first_3_bytes = 0;
        struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc;
        struct bblib_crc_request crc_req;
        struct bblib_crc_response crc_resp;
@@ -481,73 +485,116 @@ process_enc_cb(struct turbo_sw_queue *q, struct 
rte_bbdev_enc_op *op,
        /* CRC24A (for TB) */
        if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) &&
                (enc->code_block_mode == 1)) {
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
                ret = is_enc_input_valid(k - 24, k_idx, total_left);
                if (ret != 0) {
                        op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                        return;
                }
-               /* copy the input to the temporary buffer to be able to extend
-                * it by 3 CRC bytes
-                */
-               rte_memcpy(q->enc_in, in, (k - 24) >> 3);
+#endif
+
                crc_req.data = in;
                crc_req.len = (k - 24) >> 3;
-               crc_resp.data = q->enc_in;
+               /* Check if there is a room for CRC bits if not use
+                * the temporary buffer.
+                */
+               if (rte_pktmbuf_append(m_in, 3) == NULL) {
+                       rte_memcpy(q->enc_in, in, (k - 24) >> 3);
+                       in = q->enc_in;
+               } else {
+                       /* Store 3 first bytes of next CB as they will be
+                        * overwritten by CRC bytes. If it is the last CB then
+                        * there is no point to store 3 next bytes and this
+                        * if..else branch will be omitted.
+                        */
+                       first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]);
+               }
 
+               crc_resp.data = in;
 #ifdef RTE_TEST_BBDEV
                start_time = rte_rdtsc_precise();
 #endif
-
                bblib_lte_crc24a_gen(&crc_req, &crc_resp);
-
 #ifdef RTE_TEST_BBDEV
                q_stats->turbo_perf_time += rte_rdtsc_precise() - start_time;
 #endif
-
-               in = q->enc_in;
        } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) {
                /* CRC24B */
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
                ret = is_enc_input_valid(k - 24, k_idx, total_left);
                if (ret != 0) {
                        op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                        return;
                }
-               /* copy the input to the temporary buffer to be able to extend
-                * it by 3 CRC bytes
-                */
-               rte_memcpy(q->enc_in, in, (k - 24) >> 3);
+#endif
+
                crc_req.data = in;
                crc_req.len = (k - 24) >> 3;
-               crc_resp.data = q->enc_in;
+               /* Check if there is a room for CRC bits if this is the last
+                * CB in TB. If not use temporary buffer.
+                */
+               if ((c - r == 1) && (rte_pktmbuf_append(m_in, 3) == NULL)) {
+                       rte_memcpy(q->enc_in, in, (k - 24) >> 3);
+                       in = q->enc_in;
+               } else if (c - r > 1) {
+                       /* Store 3 first bytes of next CB as they will be
+                        * overwritten by CRC bytes. If it is the last CB then
+                        * there is no point to store 3 next bytes and this
+                        * if..else branch will be omitted.
+                        */
+                       first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]);
+               }
 
+               crc_resp.data = in;
 #ifdef RTE_TEST_BBDEV
                start_time = rte_rdtsc_precise();
 #endif
-
                bblib_lte_crc24b_gen(&crc_req, &crc_resp);
-
 #ifdef RTE_TEST_BBDEV
                q_stats->turbo_perf_time += rte_rdtsc_precise() - start_time;
 #endif
-
-               in = q->enc_in;
-       } else {
+       }
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+       else {
                ret = is_enc_input_valid(k, k_idx, total_left);
                if (ret != 0) {
                        op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                        return;
                }
        }
+#endif
 
        /* Turbo encoder */
 
        /* Each bit layer output from turbo encoder is (k+4) bits long, i.e.
         * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up.
         * So dst_data's length should be 3*(k/8) + 3 bytes.
+        * In Rate-matching bypass case outputs pointers passed to encoder
+        * (out0, out1 and out2) can directly point to addresses of output from
+        * turbo_enc entity.
         */
-       out0 = q->enc_out;
-       out1 = RTE_PTR_ADD(out0, (k >> 3) + 1);
-       out2 = RTE_PTR_ADD(out1, (k >> 3) + 1);
+       if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) {
+               out0 = q->enc_out;
+               out1 = RTE_PTR_ADD(out0, (k >> 3) + 1);
+               out2 = RTE_PTR_ADD(out1, (k >> 3) + 1);
+       } else {
+               out0 = (uint8_t *)rte_pktmbuf_append(m_out, (k >> 3) * 3 + 2);
+               if (out0 == NULL) {
+                       op->status |= 1 << RTE_BBDEV_DATA_ERROR;
+                       rte_bbdev_log(ERR,
+                                       "Too little space in output mbuf");
+                       return;
+               }
+               enc->output.length += (k >> 3) * 3 + 2;
+               /* rte_bbdev_op_data.offset can be different than the
+                * offset of the appended bytes
+                */
+               out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
+               out1 = rte_pktmbuf_mtod_offset(m_out, uint8_t *,
+                               out_offset + (k >> 3) + 1);
+               out2 = rte_pktmbuf_mtod_offset(m_out, uint8_t *,
+                               out_offset + 2 * ((k >> 3) + 1));
+       }
 
        turbo_req.case_id = k_idx;
        turbo_req.input_win = in;
@@ -570,6 +617,10 @@ process_enc_cb(struct turbo_sw_queue *q, struct 
rte_bbdev_enc_op *op,
        q_stats->turbo_perf_time += rte_rdtsc_precise() - start_time;
 #endif
 
+       /* Restore 3 first bytes of next CB if they were overwritten by CRC*/
+       if (first_3_bytes != 0)
+               *((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes;
+
        /* Rate-matching */
        if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) {
                /* get output data starting address */
@@ -586,7 +637,7 @@ process_enc_cb(struct turbo_sw_queue *q, struct 
rte_bbdev_enc_op *op,
                rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset);
 
                /* index of current code block */
-               rm_req.r = cb_idx;
+               rm_req.r = r;
                /* total number of code block */
                rm_req.C = c;
                /* For DL - 1, UL - 0 */
@@ -656,23 +707,6 @@ process_enc_cb(struct turbo_sw_queue *q, struct 
rte_bbdev_enc_op *op,
                        tmp_out++;
                }
                *tmp_out = 0;
-
-               /* copy shifted output to turbo_enc entity */
-               out0 = (uint8_t *)rte_pktmbuf_append(m_out,
-                               (k >> 3) * 3 + 2);
-               if (out0 == NULL) {
-                       op->status |= 1 << RTE_BBDEV_DATA_ERROR;
-                       rte_bbdev_log(ERR,
-                                       "Too little space in output mbuf");
-                       return;
-               }
-               enc->output.length += (k >> 3) * 3 + 2;
-               /* rte_bbdev_op_data.offset can be different than the
-                * offset of the appended bytes
-                */
-               out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *,
-                               out_offset);
-               rte_memcpy(out0, q->enc_out, (k >> 3) * 3 + 2);
        }
 }
 
@@ -693,9 +727,9 @@ enqueue_enc_one_op(struct turbo_sw_queue *q, struct 
rte_bbdev_enc_op *op,
        /* Clear op status */
        op->status = 0;
 
-       if (total_left > MAX_TB_SIZE >> 3) {
+       if (total_left > RTE_BBDEV_MAX_TB_SIZE >> 3) {
                rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d",
-                               total_left, MAX_TB_SIZE);
+                               total_left, RTE_BBDEV_MAX_TB_SIZE);
                op->status = 1 << RTE_BBDEV_DATA_ERROR;
                return;
        }
@@ -784,11 +818,11 @@ remove_nulls_from_circular_buf(const uint8_t *in, uint8_t 
*out, uint16_t k,
        const uint32_t d = k + 4;
        const uint32_t kw = (ncb / 3);
        const uint32_t nd = kw - d;
-       const uint32_t r_subblock = kw / C_SUBBLOCK;
+       const uint32_t r_subblock = kw / RTE_BBDEV_C_SUBBLOCK;
        /* Inter-column permutation pattern */
-       const uint32_t P[C_SUBBLOCK] = {0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10,
-                       26, 6, 22, 14, 30, 1, 17, 9, 25, 5, 21, 13, 29, 3, 19,
-                       11, 27, 7, 23, 15, 31};
+       const uint32_t P[RTE_BBDEV_C_SUBBLOCK] = {0, 16, 8, 24, 4, 20, 12, 28,
+                       2, 18, 10, 26, 6, 22, 14, 30, 1, 17, 9, 25, 5, 21, 13,
+                       29, 3, 19, 11, 27, 7, 23, 15, 31};
        in_idx = 0;
        out_idx = 0;
 
@@ -869,7 +903,11 @@ process_dec_cb(struct turbo_sw_queue *q, struct 
rte_bbdev_dec_op *op,
                struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset,
                bool check_crc_24b, uint16_t total_left)
 {
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
        int ret;
+#else
+       RTE_SET_USED(total_left);
+#endif
        int32_t k_idx;
        int32_t iter_cnt;
        uint8_t *in, *out, *adapter_input;
@@ -882,11 +920,13 @@ process_dec_cb(struct turbo_sw_queue *q, struct 
rte_bbdev_dec_op *op,
 
        k_idx = compute_idx(k);
 
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
        ret = is_dec_input_valid(k_idx, kw, total_left);
        if (ret != 0) {
                op->status |= 1 << RTE_BBDEV_DATA_ERROR;
                return;
        }
+#endif
 
        in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset);
        ncb = kw;
@@ -1005,7 +1045,7 @@ enqueue_dec_one_op(struct turbo_sw_queue *q, struct 
rte_bbdev_dec_op *op)
                 * where D is the size of each output from turbo encoder block
                 * (k + 4).
                 */
-               kw = RTE_ALIGN_CEIL(k + 4, C_SUBBLOCK) * 3;
+               kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_C_SUBBLOCK) * 3;
 
                process_dec_cb(q, op, c, k, kw, m_in, m_out, in_offset,
                                out_offset, check_bit(dec->op_flags,
diff --git a/lib/librte_bbdev/rte_bbdev_op.h b/lib/librte_bbdev/rte_bbdev_op.h
index 9a80c64..1a80588 100644
--- a/lib/librte_bbdev/rte_bbdev_op.h
+++ b/lib/librte_bbdev/rte_bbdev_op.h
@@ -25,7 +25,23 @@ extern "C" {
 #include <rte_memory.h>
 #include <rte_mempool.h>
 
-#define RTE_BBDEV_MAX_CODE_BLOCKS 64
+/* Number of columns in sub-block interleaver (36.212, section 5.1.4.1.1) */
+#define RTE_BBDEV_C_SUBBLOCK (32)
+/* Maximum size of Transport Block (36.213, Table, Table 7.1.7.2.5-1) */
+#define RTE_BBDEV_MAX_TB_SIZE (391656)
+/* Maximum size of Code Block (36.212, Table 5.1.3-3) */
+#define RTE_BBDEV_MAX_CB_SIZE (6144)
+/* Minimum size of Code Block (36.212, Table 5.1.3-3) */
+#define RTE_BBDEV_MIN_CB_SIZE (40)
+/* Maximum size of circular buffer */
+#define RTE_BBDEV_MAX_KW (18528)
+/*
+ * Maximum number of Code Blocks in Transport Block. It is calculated based on
+ * maximum size of one Code Block and one Transport Block (considering CRC24A
+ * and CRC24B):
+ * (391656 + 24) / (6144 - 24) = 64
+ */
+#define RTE_BBDEV_MAX_CODE_BLOCKS (64)
 
 /** Flags for turbo decoder operation and capability structure */
 enum rte_bbdev_op_td_flag_bitmasks {
-- 
2.5.5

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

Reply via email to