On 2021-09-01 18:32, Bruce Richardson wrote:
Add a series of tests to inject bad copy operations into a dmadev to
test the error handling and reporting capabilities. Various combinations
of errors in various positions in a burst are tested, as are errors in
bursts with fence flag set, and multiple errors in a single burst.

Signed-off-by: Bruce Richardson <bruce.richard...@intel.com>
---
  app/test/test_dmadev.c | 427 +++++++++++++++++++++++++++++++++++++++++
  1 file changed, 427 insertions(+)

diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
index 7a808a9cba..5d7b6ddd87 100644
--- a/app/test/test_dmadev.c
+++ b/app/test/test_dmadev.c
@@ -302,6 +302,414 @@ test_enqueue_copies(int dev_id, uint16_t vchan)
                        || do_multi_copies(dev_id, vchan, 0, 0, 1);
  }
+/* Failure handling test cases - global macros and variables for those tests*/
+#define COMP_BURST_SZ  16
+#define OPT_FENCE(idx) ((fence && idx == 8) ? RTE_DMA_OP_FLAG_FENCE : 0)
+
+static int
+test_failure_in_full_burst(int dev_id, uint16_t vchan, bool fence,
+               struct rte_mbuf **srcs, struct rte_mbuf **dsts, unsigned int 
fail_idx)
+{
+       /* Test single full batch statuses with failures */
+       enum rte_dma_status_code status[COMP_BURST_SZ];
+       struct rte_dmadev_stats baseline, stats;
+       uint16_t invalid_addr_id = 0;
+       uint16_t idx;
+       uint16_t count, status_count;
+       unsigned int i;
+       bool error = 0;

error = false;

+       int err_count = 0;
+
+       rte_dmadev_stats_get(dev_id, vchan, &baseline); /* get a baseline set 
of stats */
+       for (i = 0; i < COMP_BURST_SZ; i++) {
+               int id = rte_dmadev_copy(dev_id, vchan,
+                               (i == fail_idx ? 0 : (srcs[i]->buf_iova + 
srcs[i]->data_off)),
+                               dsts[i]->buf_iova + dsts[i]->data_off,
+                               COPY_LEN, OPT_FENCE(i));
+               if (id < 0) {
+                       PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", 
i);
+                       return -1;
+               }
+               if (i == fail_idx)
+                       invalid_addr_id = id;
+       }
+       rte_dmadev_submit(dev_id, vchan);
+       rte_dmadev_stats_get(dev_id, vchan, &stats);
+       if (stats.submitted != baseline.submitted + COMP_BURST_SZ) {
+               PRINT_ERR("Submitted stats value not as expected, %"PRIu64" not 
%"PRIu64"\n",
+                               stats.submitted, baseline.submitted + 
COMP_BURST_SZ);
+               return -1;
+       }
+
+       await_hw(dev_id, vchan);
+
+       count = rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, &idx, 
&error);
+       if (count != fail_idx) {
+               PRINT_ERR("Error with rte_dmadev_completed for failure test. Got 
returned %u not %u.\n",
+                               count, fail_idx);
+               rte_dmadev_dump(dev_id, stdout);
+               return -1;
+       }
+       if (error == false) {
if (!error)
+               PRINT_ERR("Error, missing expected failed copy, %u. has_error is not 
set\n",
+                               fail_idx);
+               return -1;
+       }
+       if (idx != invalid_addr_id - 1) {
+               PRINT_ERR("Error, missing expected failed copy, %u. Got last idx %u, 
not %u\n",
+                               fail_idx, idx, invalid_addr_id - 1);
+               return -1;
+       }
+
+       /* all checks ok, now verify calling completed() again always returns 0 
*/
+       for (i = 0; i < 10; i++) {
+               if (rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, &idx, 
&error) != 0
+                               || error == false || idx != (invalid_addr_id - 
1)) {
+                       PRINT_ERR("Error with follow-up completed calls for fail idx 
%u\n",
+                                       fail_idx);
+                       return -1;
+               }
+       }
+
+       status_count = rte_dmadev_completed_status(dev_id, vchan, COMP_BURST_SZ,
+                       &idx, status);
+       /* some HW may stop on error and be restarted after getting error 
status for single value
+        * To handle this case, if we get just one error back, wait for more 
completions and get
+        * status for rest of the burst
+        */
+       if (status_count == 1) {
+               await_hw(dev_id, vchan);
+               status_count += rte_dmadev_completed_status(dev_id, vchan, 
COMP_BURST_SZ - 1,
+                                       &idx, &status[1]);
+       }
+       /* check that at this point we have all status values */
+       if (status_count != COMP_BURST_SZ - count) {
+               PRINT_ERR("Error with completed_status calls for fail idx %u. Got %u 
not %u\n",
+                               fail_idx, status_count, COMP_BURST_SZ - count);
+               return -1;
+       }
+       /* now verify just one failure followed by multiple successful or 
skipped entries */
+       if (status[0] == RTE_DMA_STATUS_SUCCESSFUL) {
+               PRINT_ERR("Error with status returned for fail idx %u. First status 
was not failure\n",
+                               fail_idx);
+               return -1;
+       }
+       for (i = 1; i < status_count; i++) {
+               /* after a failure in a burst, depending on ordering/fencing,
+                * operations may be successful or skipped because of previous 
error.
+                */
+               if (status[i] != RTE_DMA_STATUS_SUCCESSFUL
+                               && status[i] != RTE_DMA_STATUS_NOT_ATTEMPTED) {
+                       PRINT_ERR("Error with status calls for fail idx %u. Status 
for job %u (of %u) is not successful\n",
+                                       fail_idx, count + i, COMP_BURST_SZ);
+                       return -1;
+               }
+       }
+
+       /* check the completed + errors stats are as expected */
+       rte_dmadev_stats_get(dev_id, vchan, &stats);
+       if (stats.completed != baseline.completed + COMP_BURST_SZ) {
+               PRINT_ERR("Completed stats value not as expected, %"PRIu64" not 
%"PRIu64"\n",
+                               stats.completed, baseline.completed + 
COMP_BURST_SZ);
+               return -1;
+       }
+       for (i = 0; i < status_count; i++)
+               err_count += (status[i] != RTE_DMA_STATUS_SUCCESSFUL);
+       if (stats.errors != baseline.errors + err_count) {
+               PRINT_ERR("'Errors' stats value not as expected, %"PRIu64" not 
%"PRIu64"\n",
+                               stats.errors, baseline.errors + err_count);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+test_individual_status_query_with_failure(int dev_id, uint16_t vchan, bool 
fence,
+               struct rte_mbuf **srcs, struct rte_mbuf **dsts, unsigned int 
fail_idx)
+{
+       /* Test gathering batch statuses one at a time */
+       enum rte_dma_status_code status[COMP_BURST_SZ];
+       uint16_t invalid_addr_id = 0;
+       uint16_t idx;
+       uint16_t count = 0, status_count = 0;
+       unsigned int j;
+       bool error = false;
+
+       for (j = 0; j < COMP_BURST_SZ; j++) {
+               int id = rte_dmadev_copy(dev_id, vchan,
+                               (j == fail_idx ? 0 : (srcs[j]->buf_iova + 
srcs[j]->data_off)),
+                               dsts[j]->buf_iova + dsts[j]->data_off,
+                               COPY_LEN, OPT_FENCE(j));
+               if (id < 0) {
+                       PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", 
j);
+                       return -1;
+               }
+               if (j == fail_idx)
+                       invalid_addr_id = id;
+       }
+       rte_dmadev_submit(dev_id, vchan);
+       await_hw(dev_id, vchan);
+
+       /* use regular "completed" until we hit error */
+       while (!error) {
+               uint16_t n = rte_dmadev_completed(dev_id, vchan, 1, &idx, 
&error);
+               count += n;
+               if (n > 1 || count >= COMP_BURST_SZ) {
+                       PRINT_ERR("Error - too many completions got\n");
+                       return -1;
+               }
+               if (n == 0 && !error) {
+                       PRINT_ERR("Error, unexpectedly got zero completions after %u 
completed\n",
+                                       count);
+                       return -1;
+               }
+       }
+       if (idx != invalid_addr_id - 1) {
+               PRINT_ERR("Error, last successful index not as expected, got %u, 
expected %u\n",
+                               idx, invalid_addr_id - 1);
+               return -1;
+       }
+
+       /* use completed_status until we hit end of burst */
+       while (count + status_count < COMP_BURST_SZ) {
+               uint16_t n = rte_dmadev_completed_status(dev_id, vchan, 1, &idx,
+                               &status[status_count]);
+               await_hw(dev_id, vchan); /* allow delay to ensure jobs are 
completed */
+               status_count += n;
+               if (n != 1) {
+                       PRINT_ERR("Error: unexpected number of completions received, 
%u, not 1\n",
+                                       n);
+                       return -1;
+               }
+       }
+
+       /* check for single failure */
+       if (status[0] == RTE_DMA_STATUS_SUCCESSFUL) {
+               PRINT_ERR("Error, unexpected successful DMA transaction\n");
+               return -1;
+       }
+       for (j = 1; j < status_count; j++) {
+               if (status[j] != RTE_DMA_STATUS_SUCCESSFUL
+                               && status[j] != RTE_DMA_STATUS_NOT_ATTEMPTED) {
+                       PRINT_ERR("Error, unexpected DMA error reported\n");
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int
+test_single_item_status_query_with_failure(int dev_id, uint16_t vchan,
+               struct rte_mbuf **srcs, struct rte_mbuf **dsts, unsigned int 
fail_idx)
+{
+       /* When error occurs just collect a single error using 
"completed_status()"
+        * before going to back to completed() calls
+        */
+       enum rte_dma_status_code status;
+       uint16_t invalid_addr_id = 0;
+       uint16_t idx;
+       uint16_t count, status_count, count2;
+       unsigned int j;
+       bool error = 0;

Same here.

+
+       for (j = 0; j < COMP_BURST_SZ; j++) {
+               int id = rte_dmadev_copy(dev_id, vchan,
+                               (j == fail_idx ? 0 : (srcs[j]->buf_iova + 
srcs[j]->data_off)),
+                               dsts[j]->buf_iova + dsts[j]->data_off,
+                               COPY_LEN, 0);
+               if (id < 0) {
+                       PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", 
j);
+                       return -1;
+               }
+               if (j == fail_idx)
+                       invalid_addr_id = id;
+       }
+       rte_dmadev_submit(dev_id, vchan);
+       await_hw(dev_id, vchan);
+
+       /* get up to the error point */
+       count = rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, &idx, 
&error);
+       if (count != fail_idx) {
+               PRINT_ERR("Error with rte_dmadev_completed for failure test. Got 
returned %u not %u.\n",
+                               count, fail_idx);
+               rte_dmadev_dump(dev_id, stdout);
+               return -1;
+       }
+       if (error == false) {

And here.

+               PRINT_ERR("Error, missing expected failed copy, %u. has_error is not 
set\n",
+                               fail_idx);
+               return -1;
+       }
+       if (idx != invalid_addr_id - 1) {
+               PRINT_ERR("Error, missing expected failed copy, %u. Got last idx %u, 
not %u\n",
+                               fail_idx, idx, invalid_addr_id - 1);
+               return -1;
+       }
+
+       /* get the error code */
+       status_count = rte_dmadev_completed_status(dev_id, vchan, 1, &idx, 
&status);
+       if (status_count != 1) {
+               PRINT_ERR("Error with completed_status calls for fail idx %u. Got %u 
not %u\n",
+                               fail_idx, status_count, COMP_BURST_SZ - count);
+               return -1;
+       }
+       if (status == RTE_DMA_STATUS_SUCCESSFUL) {
+               PRINT_ERR("Error with status returned for fail idx %u. First status 
was not failure\n",
+                               fail_idx);
+               return -1;
+       }
+       /* delay in case time needed after err handled to complete other jobs */
+       await_hw(dev_id, vchan);
+
+       /* get the rest of the completions without status */
+       count2 = rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, &idx, 
&error);
+       if (error == true) {

if (error)

+               PRINT_ERR("Error, got further errors post completed_status() call, 
for failure case %u.\n",
+                               fail_idx);
+               return -1;
+       }
+       if (count + status_count + count2 != COMP_BURST_SZ) {
+               PRINT_ERR("Error, incorrect number of completions received, got %u 
not %u\n",
+                               count + status_count + count2, COMP_BURST_SZ);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+test_multi_failure(int dev_id, uint16_t vchan, struct rte_mbuf **srcs, struct 
rte_mbuf **dsts,
+               const unsigned int *fail, size_t num_fail)
+{
+       /* test having multiple errors in one go */
+       enum rte_dma_status_code status[COMP_BURST_SZ];
+       unsigned int i, j;
+       uint16_t count, err_count = 0;
+       bool error = 0;

false

+
+       /* enqueue and gather completions in one go */
+       for (j = 0; j < COMP_BURST_SZ; j++) {
+               uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
+               /* set up for failure if the current index is anywhere is the 
fails array */
+               for (i = 0; i < num_fail; i++)
+                       if (j == fail[i])
+                               src = 0;
+
+               int id = rte_dmadev_copy(dev_id, vchan,
+                               src, dsts[j]->buf_iova + dsts[j]->data_off,
+                               COPY_LEN, 0);
+               if (id < 0) {
+                       PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", 
j);
+                       return -1;
+               }
+       }
+       rte_dmadev_submit(dev_id, vchan);
+       await_hw(dev_id, vchan);
+
+       count = rte_dmadev_completed_status(dev_id, vchan, COMP_BURST_SZ, NULL, 
status);
+       while (count < COMP_BURST_SZ) {
+               await_hw(dev_id, vchan);
+
+               uint16_t ret = rte_dmadev_completed_status(dev_id, vchan, 
COMP_BURST_SZ - count,
+                               NULL, &status[count]);
+               if (ret == 0) {
+                       PRINT_ERR("Error getting all completions for jobs. Got %u of 
%u\n",
+                                       count, COMP_BURST_SZ);
+                       return -1;
+               }
+               count += ret;
+       }
+       for (i = 0; i < count; i++) {
+               if (status[i] != RTE_DMA_STATUS_SUCCESSFUL)
+                       err_count++;
+       }

Remove {} around the loop?

+       if (err_count != num_fail) {
+               PRINT_ERR("Error: Invalid number of failed completions returned, %u; 
expected %zu\n",
+                       err_count, num_fail);
+               return -1;
+       }
+
+       /* enqueue and gather completions in bursts, but getting errors one at 
a time */
+       for (j = 0; j < COMP_BURST_SZ; j++) {
+               uintptr_t src = srcs[j]->buf_iova + srcs[j]->data_off;
+               /* set up for failure if the current index is anywhere is the 
fails array */
+               for (i = 0; i < num_fail; i++)
+                       if (j == fail[i])
+                               src = 0;
+
+               int id = rte_dmadev_copy(dev_id, vchan,
+                               src, dsts[j]->buf_iova + dsts[j]->data_off,
+                               COPY_LEN, 0);
+               if (id < 0) {
+                       PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", 
j);
+                       return -1;
+               }
+       }
+       rte_dmadev_submit(dev_id, vchan);
+       await_hw(dev_id, vchan);
+
+       count = 0;
+       err_count = 0;
+       while (count + err_count < COMP_BURST_SZ) {
+               count += rte_dmadev_completed(dev_id, vchan, COMP_BURST_SZ, NULL, 
&error);
+               if (error) {
+                       uint16_t ret = rte_dmadev_completed_status(dev_id, 
vchan, 1,
+                                       NULL, status);
+                       if (ret != 1) {
+                               PRINT_ERR("Error getting error-status for 
completions\n");
+                               return -1;
+                       }
+                       err_count += ret;
+                       await_hw(dev_id, vchan);
+               }
+       }
+       if (err_count != num_fail) {
+               PRINT_ERR("Error: Incorrect number of failed completions received, 
got %u not %zu\n",
+                               err_count, num_fail);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+test_completion_status(int dev_id, uint16_t vchan, bool fence)
+{
+       const unsigned int fail[] = {0, 7, 14, 15};
+       struct rte_mbuf *srcs[COMP_BURST_SZ], *dsts[COMP_BURST_SZ];
+       unsigned int i;
+
+       for (i = 0; i < COMP_BURST_SZ; i++) {
+               srcs[i] = rte_pktmbuf_alloc(pool);
+               dsts[i] = rte_pktmbuf_alloc(pool);
+       }
+
+       for (i = 0; i < RTE_DIM(fail); i++) {
+               if (test_failure_in_full_burst(dev_id, vchan, fence, srcs, dsts, 
fail[i]) < 0)
+                       return -1;
+
+               if (test_individual_status_query_with_failure(dev_id, vchan, 
fence,
+                               srcs, dsts, fail[i]) < 0)
+                       return -1;
+
+               /* test is run the same fenced, or unfenced, but no harm in 
running it twice */
+               if (test_single_item_status_query_with_failure(dev_id, vchan,
+                               srcs, dsts, fail[i]) < 0)
+                       return -1;
+       }
+
+       if (test_multi_failure(dev_id, vchan, srcs, dsts, fail, RTE_DIM(fail)) 
< 0)
+               return -1;
+
+       for (i = 0; i < COMP_BURST_SZ; i++) {
+               rte_pktmbuf_free(srcs[i]);
+               rte_pktmbuf_free(dsts[i]);
+       }
+       return 0;
+}
+
  static int
  test_dmadev_instance(uint16_t dev_id)
  {
@@ -386,6 +794,25 @@ test_dmadev_instance(uint16_t dev_id)
        if (check_stats(&stats, true) < 0)
                goto err;
+ /* to test error handling we can provide null pointers for source or dest in copies. This
+        * requires VA mode in DPDK, since NULL(0) is a valid physical address.
+        */
+       if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+               rte_dmadev_stats_reset(dev_id, vchan);
+               printf("DMA Dev: %u, Running Completion Handling Tests (errors 
expected)\n",
+                               dev_id);
+               if (test_completion_status(dev_id, vchan, false) != 0) /* 
without fences */
+                       goto err;
+               if (test_completion_status(dev_id, vchan, true) != 0) /* with 
fences */
+                       goto err;
+               rte_dmadev_stats_get(dev_id, 0, &stats);
+               printf("Ops submitted: %"PRIu64"\t", stats.submitted);
+               printf("Ops completed: %"PRIu64"\t", stats.completed);
+               printf("Errors: %"PRIu64"\n", stats.errors);
+               if (check_stats(&stats, false) < 0) /* don't check stats.errors 
this time */
+                       goto err;
+       }
+
        rte_mempool_free(pool);
        rte_dmadev_stop(dev_id);
        rte_dmadev_stats_reset(dev_id, vchan);

Reply via email to