[PATCH v3 1/2] event/sw: fix ordering corruption with op release
This commit changes the logic in the scheduler to always reset reorder-buffer (and QID/FID) entries when writing them. This avoids stale ROB/QID/FID data re-use, which previously caused ordering issues. Before this commit, release events left the history-list in an inconsistent state, and future events with op type of forward could be incorrectly reordered. There was a partial fix previously committed which is now being resolved for all cases in a more general way, hence the two fixlines here. Fixes: 2e516d18dc01 ("event/sw: fix events mis-identified as needing reorder") Fixes: 617995dfc5b2 ("event/sw: add scheduling logic") Cc: sta...@dpdk.org Suggested-by: Bruce Richardson Signed-off-by: Harry van Haaren Acked-by: Bruce Richardson --- v3: - Fixup whitespace and line wrapping suggestions (Bruce) - Add Fixes lines (Bruce) - Cc stable, as this is a functionality bugfix - Including Ack from v2, as no significant code changes v2: - Rework fix to simpler suggestion (Bruce) - Respin patchset to "apply order" (Bruce) --- drivers/event/sw/sw_evdev_scheduler.c | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/event/sw/sw_evdev_scheduler.c b/drivers/event/sw/sw_evdev_scheduler.c index de6ed21643..cc652815e4 100644 --- a/drivers/event/sw/sw_evdev_scheduler.c +++ b/drivers/event/sw/sw_evdev_scheduler.c @@ -90,8 +90,10 @@ sw_schedule_atomic_to_cq(struct sw_evdev *sw, struct sw_qid * const qid, sw->cq_ring_space[cq]--; int head = (p->hist_head++ & (SW_PORT_HIST_LIST-1)); - p->hist_list[head].fid = flow_id; - p->hist_list[head].qid = qid_id; + p->hist_list[head] = (struct sw_hist_list_entry) { + .qid = qid_id, + .fid = flow_id, + }; p->stats.tx_pkts++; qid->stats.tx_pkts++; @@ -162,8 +164,10 @@ sw_schedule_parallel_to_cq(struct sw_evdev *sw, struct sw_qid * const qid, qid->stats.tx_pkts++; const int head = (p->hist_head & (SW_PORT_HIST_LIST-1)); - p->hist_list[head].fid = SW_HASH_FLOWID(qe->flow_id); - p->hist_list[head].qid = qid_id; + p->hist_list[head] = (struct sw_hist_list_entry) { + .qid = qid_id, + .fid = SW_HASH_FLOWID(qe->flow_id), + }; if (keep_order) rob_ring_dequeue(qid->reorder_buffer_freelist, @@ -419,7 +423,6 @@ __pull_port_lb(struct sw_evdev *sw, uint32_t port_id, int allow_reorder) struct reorder_buffer_entry *rob_entry = hist_entry->rob_entry; - hist_entry->rob_entry = NULL; /* Although fragmentation not currently * supported by eventdev API, we support it * here. Open: How do we alert the user that -- 2.34.1
[PATCH v3 2/2] event/sw: add selftest for ordered history list
This commit adds a unit test for an issue identified where ordered history-list entries are not correctly cleared when the returned event is of op RELEASE type. The result of the history-list bug is that a future event which re-uses that history-list slot, but has an op type of FORWARD will incorrectly be reordered. The existing unit-tests did not cover the RELEASE of an ORDERED queue, and then stress-test the history-list by iterating HIST_LIST times afterwards. Signed-off-by: Harry van Haaren Acked-by: Bruce Richardson --- v3: - Including Ack from v2 --- drivers/event/sw/sw_evdev_selftest.c | 132 +++ 1 file changed, 132 insertions(+) diff --git a/drivers/event/sw/sw_evdev_selftest.c b/drivers/event/sw/sw_evdev_selftest.c index 3aa8d76ca8..59afa260c6 100644 --- a/drivers/event/sw/sw_evdev_selftest.c +++ b/drivers/event/sw/sw_evdev_selftest.c @@ -2959,6 +2959,132 @@ dev_stop_flush(struct test *t) /* test to check we can properly flush events */ return -1; } +static int +ordered_atomic_hist_completion(struct test *t) +{ + const int rx_enq = 0; + int err; + + /* Create instance with 1 atomic QID going to 3 ports + 1 prod port */ + if (init(t, 2, 2) < 0 || + create_ports(t, 2) < 0 || + create_ordered_qids(t, 1) < 0 || + create_atomic_qids(t, 1) < 0) + return -1; + + /* Helpers to identify queues */ + const uint8_t qid_ordered = t->qid[0]; + const uint8_t qid_atomic = t->qid[1]; + + /* CQ mapping to QID */ + if (rte_event_port_link(evdev, t->port[1], &t->qid[0], NULL, 1) != 1) { + printf("%d: error mapping port 1 qid\n", __LINE__); + return -1; + } + if (rte_event_port_link(evdev, t->port[1], &t->qid[1], NULL, 1) != 1) { + printf("%d: error mapping port 1 qid\n", __LINE__); + return -1; + } + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /* Enqueue 1x ordered event, to be RELEASE-ed by the worker +* CPU, which may cause hist-list corruption (by not comleting) +*/ + struct rte_event ord_ev = { + .op = RTE_EVENT_OP_NEW, + .queue_id = qid_ordered, + .event_type = RTE_EVENT_TYPE_CPU, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + }; + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ord_ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + + /* call the scheduler. This schedules the above event as a single +* event in an ORDERED queue, to the worker. +*/ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* Dequeue ORDERED event 0 from port 1, so that we can then drop */ + struct rte_event ev; + if (!rte_event_dequeue_burst(evdev, t->port[1], &ev, 1, 0)) { + printf("%d: failed to dequeue\n", __LINE__); + return -1; + } + + /* drop the ORDERED event. Here the history list should be completed, +* but might not be if the hist-list bug exists. Call scheduler to make +* it act on the RELEASE that was enqueued. +*/ + rte_event_enqueue_burst(evdev, t->port[1], &release_ev, 1); + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* Enqueue 1x atomic event, to then FORWARD to trigger atomic hist-list +* completion. If the bug exists, the ORDERED entry may be completed in +* error (aka, using the ORDERED-ROB for the ATOMIC event). This is the +* main focus of this unit test. +*/ + { + struct rte_event ev = { + .op = RTE_EVENT_OP_NEW, + .queue_id = qid_atomic, + .event_type = RTE_EVENT_TYPE_CPU, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .flow_id = 123, + }; + + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* Deq ATM event, then forward it for more than HIST_LIST_SIZE times, +* to re-use the history list entry that may be corrupted previously. +*/ + for (int i = 0; i < SW_PORT_HIST_LIST + 2; i++) { + if (!rte_event_dequeue_burst(evdev, t->port[1], &ev, 1, 0)) { + printf("%
[PATCH 2/2] event/sw: fix ordering corruption with op release
This commit changes the logic in the scheduler to always reset reorder-buffer entries in the QE_FLAG_COMPLETE path, and not just the QE_FLAG_VALID path. A release event is a COMPLETE but not VALID (no new event). As a result, release events previously left the history-list in an inconsistent state, and future events with op type of forward could be incorrectly reordered. Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev_scheduler.c | 45 --- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/event/sw/sw_evdev_scheduler.c b/drivers/event/sw/sw_evdev_scheduler.c index 8bc21944f5..9ee6698525 100644 --- a/drivers/event/sw/sw_evdev_scheduler.c +++ b/drivers/event/sw/sw_evdev_scheduler.c @@ -360,10 +360,15 @@ __pull_port_lb(struct sw_evdev *sw, uint32_t port_id, int allow_reorder) while (port->pp_buf_count) { const struct rte_event *qe = &port->pp_buf[port->pp_buf_start]; - struct sw_hist_list_entry *hist_entry = NULL; uint8_t flags = qe->op; const uint16_t eop = !(flags & QE_FLAG_NOT_EOP); - int needs_reorder = 0; + + /* rob_entry being NULL or a value is used as the distinction +* between reordering being required (mark ROB as ready) or +* just an Atomic completion. +*/ + struct reorder_buffer_entry *rob_ptr = NULL; + /* if no-reordering, having PARTIAL == NEW */ if (!allow_reorder && !eop) flags = QE_FLAG_VALID; @@ -386,6 +391,7 @@ __pull_port_lb(struct sw_evdev *sw, uint32_t port_id, int allow_reorder) const uint32_t hist_tail = port->hist_tail & (SW_PORT_HIST_LIST - 1); + struct sw_hist_list_entry *hist_entry; hist_entry = &port->hist_list[hist_tail]; const uint32_t hist_qid = hist_entry->qid; const uint32_t hist_fid = hist_entry->fid; @@ -396,17 +402,24 @@ __pull_port_lb(struct sw_evdev *sw, uint32_t port_id, int allow_reorder) if (fid->pcount == 0) fid->cq = -1; + /* Assign current hist-list entry to the rob_entry, to +* allow VALID code below re-use it for checks. +*/ + rob_ptr = hist_entry->rob_entry; + + /* Clear the rob entry in this COMPLETE flag phase, as +* RELEASE events must clear hist-list, but MIGHT NOT +* contain a VALID flag too. +*/ + hist_entry->rob_entry = NULL; + if (allow_reorder) { - /* set reorder ready if an ordered QID */ - uintptr_t rob_ptr = - (uintptr_t)hist_entry->rob_entry; const uintptr_t valid = (rob_ptr != 0); - needs_reorder = valid; - rob_ptr |= - ((valid - 1) & (uintptr_t)&dummy_rob); + uintptr_t tmp = (uintptr_t)rob_ptr; + tmp |= ((valid - 1) & (uintptr_t)&dummy_rob); struct reorder_buffer_entry *tmp_rob_ptr = - (struct reorder_buffer_entry *)rob_ptr; - tmp_rob_ptr->ready = eop * needs_reorder; + (struct reorder_buffer_entry *)tmp; + tmp_rob_ptr->ready = eop * valid; } port->inflights -= eop; @@ -415,22 +428,18 @@ __pull_port_lb(struct sw_evdev *sw, uint32_t port_id, int allow_reorder) if (flags & QE_FLAG_VALID) { port->stats.rx_pkts++; - if (allow_reorder && needs_reorder) { - struct reorder_buffer_entry *rob_entry = - hist_entry->rob_entry; - - hist_entry->rob_entry = NULL; + if (allow_reorder && rob_ptr) { /* Although fragmentation not currently * supported by eventdev API, we support it * here. Open: How do we alert the user that * they've exceeded max frags? */ - int num_frag = rob_entry->num_fragments; +
[PATCH 1/2] event/sw: add selftest for ordered history list
This commit adds a unit test for an issue identified where ordered history-list entries are not correctly cleared when the returned event is of op RELEASE type. The result of the history-list bug is that a future event which re-uses that history-list slot, but has an op type of FORWARD will incorrectly be reordered. The existing unit-tests did not cover the RELEASE of an ORDERED queue, and then stress-test the history-list by iterating HIST_LIST times afterwards. Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev_selftest.c | 132 +++ 1 file changed, 132 insertions(+) diff --git a/drivers/event/sw/sw_evdev_selftest.c b/drivers/event/sw/sw_evdev_selftest.c index 3aa8d76ca8..59afa260c6 100644 --- a/drivers/event/sw/sw_evdev_selftest.c +++ b/drivers/event/sw/sw_evdev_selftest.c @@ -2959,6 +2959,132 @@ dev_stop_flush(struct test *t) /* test to check we can properly flush events */ return -1; } +static int +ordered_atomic_hist_completion(struct test *t) +{ + const int rx_enq = 0; + int err; + + /* Create instance with 1 atomic QID going to 3 ports + 1 prod port */ + if (init(t, 2, 2) < 0 || + create_ports(t, 2) < 0 || + create_ordered_qids(t, 1) < 0 || + create_atomic_qids(t, 1) < 0) + return -1; + + /* Helpers to identify queues */ + const uint8_t qid_ordered = t->qid[0]; + const uint8_t qid_atomic = t->qid[1]; + + /* CQ mapping to QID */ + if (rte_event_port_link(evdev, t->port[1], &t->qid[0], NULL, 1) != 1) { + printf("%d: error mapping port 1 qid\n", __LINE__); + return -1; + } + if (rte_event_port_link(evdev, t->port[1], &t->qid[1], NULL, 1) != 1) { + printf("%d: error mapping port 1 qid\n", __LINE__); + return -1; + } + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /* Enqueue 1x ordered event, to be RELEASE-ed by the worker +* CPU, which may cause hist-list corruption (by not comleting) +*/ + struct rte_event ord_ev = { + .op = RTE_EVENT_OP_NEW, + .queue_id = qid_ordered, + .event_type = RTE_EVENT_TYPE_CPU, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + }; + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ord_ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + + /* call the scheduler. This schedules the above event as a single +* event in an ORDERED queue, to the worker. +*/ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* Dequeue ORDERED event 0 from port 1, so that we can then drop */ + struct rte_event ev; + if (!rte_event_dequeue_burst(evdev, t->port[1], &ev, 1, 0)) { + printf("%d: failed to dequeue\n", __LINE__); + return -1; + } + + /* drop the ORDERED event. Here the history list should be completed, +* but might not be if the hist-list bug exists. Call scheduler to make +* it act on the RELEASE that was enqueued. +*/ + rte_event_enqueue_burst(evdev, t->port[1], &release_ev, 1); + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* Enqueue 1x atomic event, to then FORWARD to trigger atomic hist-list +* completion. If the bug exists, the ORDERED entry may be completed in +* error (aka, using the ORDERED-ROB for the ATOMIC event). This is the +* main focus of this unit test. +*/ + { + struct rte_event ev = { + .op = RTE_EVENT_OP_NEW, + .queue_id = qid_atomic, + .event_type = RTE_EVENT_TYPE_CPU, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .flow_id = 123, + }; + + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* Deq ATM event, then forward it for more than HIST_LIST_SIZE times, +* to re-use the history list entry that may be corrupted previously. +*/ + for (int i = 0; i < SW_PORT_HIST_LIST + 2; i++) { + if (!rte_event_dequeue_burst(evdev, t->port[1], &ev, 1, 0)) { + printf("%d: failed to dequeue, did corrupt ORD hist " + "lis
[PATCH v2 1/2] event/sw: fix ordering corruption with op release
This commit changes the logic in the scheduler to always reset reorder-buffer (and QID/FID) entries when writing them. This avoids stale ROB/QID/FID data re-use, which previously caused ordering issues. Before this commit, release events left the history-list in an inconsistent state, and future events with op type of forward could be incorrectly reordered. Suggested-by: Bruce Richardson Signed-off-by: Harry van Haaren --- v2: - Rework fix to simpler suggestion (Bruce) - Respin patchset to "apply order" (Bruce) --- drivers/event/sw/sw_evdev_scheduler.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/event/sw/sw_evdev_scheduler.c b/drivers/event/sw/sw_evdev_scheduler.c index de6ed21643..21c360770e 100644 --- a/drivers/event/sw/sw_evdev_scheduler.c +++ b/drivers/event/sw/sw_evdev_scheduler.c @@ -90,8 +90,10 @@ sw_schedule_atomic_to_cq(struct sw_evdev *sw, struct sw_qid * const qid, sw->cq_ring_space[cq]--; int head = (p->hist_head++ & (SW_PORT_HIST_LIST-1)); - p->hist_list[head].fid = flow_id; - p->hist_list[head].qid = qid_id; + p->hist_list[head] = (struct sw_hist_list_entry) { + .qid = qid_id, + .fid = flow_id, + }; p->stats.tx_pkts++; qid->stats.tx_pkts++; @@ -162,8 +164,13 @@ sw_schedule_parallel_to_cq(struct sw_evdev *sw, struct sw_qid * const qid, qid->stats.tx_pkts++; const int head = (p->hist_head & (SW_PORT_HIST_LIST-1)); - p->hist_list[head].fid = SW_HASH_FLOWID(qe->flow_id); - p->hist_list[head].qid = qid_id; + const uint32_t fid = SW_HASH_FLOWID(qe->flow_id); + p->hist_list[head] = (struct sw_hist_list_entry) { + .qid = qid_id, + .fid = fid, + }; + + if (keep_order) rob_ring_dequeue(qid->reorder_buffer_freelist, -- 2.34.1
[PATCH v2 2/2] event/sw: add selftest for ordered history list
This commit adds a unit test for an issue identified where ordered history-list entries are not correctly cleared when the returned event is of op RELEASE type. The result of the history-list bug is that a future event which re-uses that history-list slot, but has an op type of FORWARD will incorrectly be reordered. The existing unit-tests did not cover the RELEASE of an ORDERED queue, and then stress-test the history-list by iterating HIST_LIST times afterwards. Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev_selftest.c | 132 +++ 1 file changed, 132 insertions(+) diff --git a/drivers/event/sw/sw_evdev_selftest.c b/drivers/event/sw/sw_evdev_selftest.c index 3aa8d76ca8..59afa260c6 100644 --- a/drivers/event/sw/sw_evdev_selftest.c +++ b/drivers/event/sw/sw_evdev_selftest.c @@ -2959,6 +2959,132 @@ dev_stop_flush(struct test *t) /* test to check we can properly flush events */ return -1; } +static int +ordered_atomic_hist_completion(struct test *t) +{ + const int rx_enq = 0; + int err; + + /* Create instance with 1 atomic QID going to 3 ports + 1 prod port */ + if (init(t, 2, 2) < 0 || + create_ports(t, 2) < 0 || + create_ordered_qids(t, 1) < 0 || + create_atomic_qids(t, 1) < 0) + return -1; + + /* Helpers to identify queues */ + const uint8_t qid_ordered = t->qid[0]; + const uint8_t qid_atomic = t->qid[1]; + + /* CQ mapping to QID */ + if (rte_event_port_link(evdev, t->port[1], &t->qid[0], NULL, 1) != 1) { + printf("%d: error mapping port 1 qid\n", __LINE__); + return -1; + } + if (rte_event_port_link(evdev, t->port[1], &t->qid[1], NULL, 1) != 1) { + printf("%d: error mapping port 1 qid\n", __LINE__); + return -1; + } + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /* Enqueue 1x ordered event, to be RELEASE-ed by the worker +* CPU, which may cause hist-list corruption (by not comleting) +*/ + struct rte_event ord_ev = { + .op = RTE_EVENT_OP_NEW, + .queue_id = qid_ordered, + .event_type = RTE_EVENT_TYPE_CPU, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + }; + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ord_ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + + /* call the scheduler. This schedules the above event as a single +* event in an ORDERED queue, to the worker. +*/ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* Dequeue ORDERED event 0 from port 1, so that we can then drop */ + struct rte_event ev; + if (!rte_event_dequeue_burst(evdev, t->port[1], &ev, 1, 0)) { + printf("%d: failed to dequeue\n", __LINE__); + return -1; + } + + /* drop the ORDERED event. Here the history list should be completed, +* but might not be if the hist-list bug exists. Call scheduler to make +* it act on the RELEASE that was enqueued. +*/ + rte_event_enqueue_burst(evdev, t->port[1], &release_ev, 1); + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* Enqueue 1x atomic event, to then FORWARD to trigger atomic hist-list +* completion. If the bug exists, the ORDERED entry may be completed in +* error (aka, using the ORDERED-ROB for the ATOMIC event). This is the +* main focus of this unit test. +*/ + { + struct rte_event ev = { + .op = RTE_EVENT_OP_NEW, + .queue_id = qid_atomic, + .event_type = RTE_EVENT_TYPE_CPU, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .flow_id = 123, + }; + + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* Deq ATM event, then forward it for more than HIST_LIST_SIZE times, +* to re-use the history list entry that may be corrupted previously. +*/ + for (int i = 0; i < SW_PORT_HIST_LIST + 2; i++) { + if (!rte_event_dequeue_burst(evdev, t->port[1], &ev, 1, 0)) { + printf("%d: failed to dequeue, did corrupt ORD hist " + "lis
[PATCH] service: debug and fix lingering might_be_active() result
This commit fixes an issue where calling rte_service_lcore_stop() would result in a service's "active on lcore" status becoming stale. The stale status would result in rte_service_may_be_active() always returning "1", indiciating that the service is not certainly stopped. This is fixed by ensuring the "active on lcore" status of each service is set to 0 when an lcore is stopped. Fixes: e30dd31847d2 ("service: add mechanism for quiescing") Fixes: 8929de043eb4 ("service: retrieve lcore active state") Reported-by: S. V. Naga Harish Signed-off-by: Harry van Haaren --- lib/eal/common/rte_service.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c index ef31b1f63c..d2b7275ac0 100644 --- a/lib/eal/common/rte_service.c +++ b/lib/eal/common/rte_service.c @@ -758,7 +758,9 @@ rte_service_lcore_stop(uint32_t lcore) return -EALREADY; uint32_t i; - uint64_t service_mask = lcore_states[lcore].service_mask; + struct core_state *cs = &lcore_states[lcore]; + uint64_t service_mask = cs->service_mask; + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { int32_t enabled = service_mask & (UINT64_C(1) << i); int32_t service_running = rte_service_runstate_get(i); @@ -766,6 +768,11 @@ rte_service_lcore_stop(uint32_t lcore) __atomic_load_n(&rte_services[i].num_mapped_cores, __ATOMIC_RELAXED)); + /* Switch off this core for all services, to ensure that future +* calls to may_be_active() know this core is switched off. +*/ + cs->service_active_on_lcore[i] = 0; + /* if the core is mapped, and the service is running, and this * is the only core that is mapped, the service would cease to * run if this core stopped, so fail instead. -- 2.32.0
[PATCH 1/2] test/service: add perf measurements for with stats mode
This commit improves the performance reporting of the service cores polling loop to show both with and without statistics collection modes. Collecting cycle statistics is costly, due to calls to rte_rdtsc() per service iteration. Reported-by: Mattias Rönnblom Suggested-by: Honnappa Nagarahalli Suggested-by: Morten Brørup Signed-off-by: Harry van Haaren --- This is split out as a seperate patch from the fix to allow measuring the before/after of the service stats atomic fixup. --- app/test/test_service_cores.c | 36 --- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c index ced6ed0081..7415b6b686 100644 --- a/app/test/test_service_cores.c +++ b/app/test/test_service_cores.c @@ -777,6 +777,22 @@ service_run_on_app_core_func(void *arg) return rte_service_run_iter_on_app_lcore(*delay_service_id, 1); } +static float +service_app_lcore_perf_measure(uint32_t id) +{ + /* Performance test: call in a loop, and measure tsc() */ + const uint32_t perf_iters = (1 << 12); + uint64_t start = rte_rdtsc(); + uint32_t i; + for (i = 0; i < perf_iters; i++) { + int err = service_run_on_app_core_func(&id); + TEST_ASSERT_EQUAL(0, err, "perf test: returned run failure"); + } + uint64_t end = rte_rdtsc(); + + return (end - start)/(float)perf_iters; +} + static int service_app_lcore_poll_impl(const int mt_safe) { @@ -828,17 +844,15 @@ service_app_lcore_poll_impl(const int mt_safe) "MT Unsafe: App core1 didn't return -EBUSY"); } - /* Performance test: call in a loop, and measure tsc() */ - const uint32_t perf_iters = (1 << 12); - uint64_t start = rte_rdtsc(); - uint32_t i; - for (i = 0; i < perf_iters; i++) { - int err = service_run_on_app_core_func(&id); - TEST_ASSERT_EQUAL(0, err, "perf test: returned run failure"); - } - uint64_t end = rte_rdtsc(); - printf("perf test for %s: %0.1f cycles per call\n", mt_safe ? - "MT Safe" : "MT Unsafe", (end - start)/(float)perf_iters); + /* Measure performance of no-stats and with-stats. */ + float cyc_no_stats = service_app_lcore_perf_measure(id); + + TEST_ASSERT_EQUAL(0, rte_service_set_stats_enable(id, 1), + "failed to enable stats for service."); + float cyc_with_stats = service_app_lcore_perf_measure(id); + + printf("perf test for %s, no stats: %0.1f, with stats %0.1f cycles/call\n", + mt_safe ? "MT Safe" : "MT Unsafe", cyc_no_stats, cyc_with_stats); unregister_all(); return TEST_SUCCESS; -- 2.32.0
[PATCH 2/2] service: fix potential stats race-condition on MT services
This commit fixes a potential racey-add that could occur if multiple service-lcores were executing the same MT-safe service at the same time, with service statistics collection enabled. Because multiple threads can run and execute the service, the stats values can have multiple writer threads, resulting in the requirement of using atomic addition for correctness. Note that when a MT unsafe service is executed, a spinlock is held, so the stats increments are protected. This fact is used to avoid executing atomic add instructions when not required. This patch causes a 1.25x increase in cycle-cost for polling a MT safe service when statistics are enabled. No change was seen for MT unsafe services, or when statistics are disabled. Reported-by: Mattias Rönnblom Suggested-by: Honnappa Nagarahalli Suggested-by: Morten Brørup Signed-off-by: Harry van Haaren --- --- lib/eal/common/rte_service.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c index ef31b1f63c..f045e74ef3 100644 --- a/lib/eal/common/rte_service.c +++ b/lib/eal/common/rte_service.c @@ -363,9 +363,15 @@ service_runner_do_callback(struct rte_service_spec_impl *s, uint64_t start = rte_rdtsc(); s->spec.callback(userdata); uint64_t end = rte_rdtsc(); - s->cycles_spent += end - start; + uint64_t cycles = end - start; cs->calls_per_service[service_idx]++; - s->calls++; + if (service_mt_safe(s)) { + __atomic_fetch_add(&s->cycles_spent, cycles, __ATOMIC_RELAXED); + __atomic_fetch_add(&s->calls, 1, __ATOMIC_RELAXED); + } else { + s->cycles_spent += cycles; + s->calls++; + } } else s->spec.callback(userdata); } -- 2.32.0
[PATCH v2 1/2] test/service: add perf measurements for with stats mode
This commit improves the performance reporting of the service cores polling loop to show both with and without statistics collection modes. Collecting cycle statistics is costly, due to calls to rte_rdtsc() per service iteration. Reported-by: Mattias Rönnblom Suggested-by: Honnappa Nagarahalli Suggested-by: Morten Brørup Signed-off-by: Harry van Haaren --- This is split out as a seperate patch from the fix to allow measuring the before/after of the service stats atomic fixup. --- app/test/test_service_cores.c | 36 --- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c index ced6ed0081..7415b6b686 100644 --- a/app/test/test_service_cores.c +++ b/app/test/test_service_cores.c @@ -777,6 +777,22 @@ service_run_on_app_core_func(void *arg) return rte_service_run_iter_on_app_lcore(*delay_service_id, 1); } +static float +service_app_lcore_perf_measure(uint32_t id) +{ + /* Performance test: call in a loop, and measure tsc() */ + const uint32_t perf_iters = (1 << 12); + uint64_t start = rte_rdtsc(); + uint32_t i; + for (i = 0; i < perf_iters; i++) { + int err = service_run_on_app_core_func(&id); + TEST_ASSERT_EQUAL(0, err, "perf test: returned run failure"); + } + uint64_t end = rte_rdtsc(); + + return (end - start)/(float)perf_iters; +} + static int service_app_lcore_poll_impl(const int mt_safe) { @@ -828,17 +844,15 @@ service_app_lcore_poll_impl(const int mt_safe) "MT Unsafe: App core1 didn't return -EBUSY"); } - /* Performance test: call in a loop, and measure tsc() */ - const uint32_t perf_iters = (1 << 12); - uint64_t start = rte_rdtsc(); - uint32_t i; - for (i = 0; i < perf_iters; i++) { - int err = service_run_on_app_core_func(&id); - TEST_ASSERT_EQUAL(0, err, "perf test: returned run failure"); - } - uint64_t end = rte_rdtsc(); - printf("perf test for %s: %0.1f cycles per call\n", mt_safe ? - "MT Safe" : "MT Unsafe", (end - start)/(float)perf_iters); + /* Measure performance of no-stats and with-stats. */ + float cyc_no_stats = service_app_lcore_perf_measure(id); + + TEST_ASSERT_EQUAL(0, rte_service_set_stats_enable(id, 1), + "failed to enable stats for service."); + float cyc_with_stats = service_app_lcore_perf_measure(id); + + printf("perf test for %s, no stats: %0.1f, with stats %0.1f cycles/call\n", + mt_safe ? "MT Safe" : "MT Unsafe", cyc_no_stats, cyc_with_stats); unregister_all(); return TEST_SUCCESS; -- 2.32.0
[PATCH v2 2/2] service: fix potential stats race-condition on MT services
This commit fixes a potential racey-add that could occur if multiple service-lcores were executing the same MT-safe service at the same time, with service statistics collection enabled. Because multiple threads can run and execute the service, the stats values can have multiple writer threads, resulting in the requirement of using atomic addition for correctness. Note that when a MT unsafe service is executed, a spinlock is held, so the stats increments are protected. This fact is used to avoid executing atomic add instructions when not required. Regular reads and increments are used, and only the store is specified as atomic, reducing perf impact on e.g. x86 arch. This patch causes a 1.25x increase in cycle-cost for polling a MT safe service when statistics are enabled. No change was seen for MT unsafe services, or when statistics are disabled. Reported-by: Mattias Rönnblom Suggested-by: Honnappa Nagarahalli Suggested-by: Morten Brørup Suggested-by: Bruce Richardson Signed-off-by: Harry van Haaren --- v2 (Thanks Honnappa, Morten, Bruce & Mattias for discussion): - Improved handling of stat stores to ensure they're atomic by using __atomic_store_n() with regular loads/increments. - Added BUILD_BUG_ON alignment checks for the uint64_t stats variables, tested with __rte_packed to ensure build breaks if not aligned naturally. --- lib/eal/common/rte_service.c | 23 +-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c index d2b7275ac0..90d12032f0 100644 --- a/lib/eal/common/rte_service.c +++ b/lib/eal/common/rte_service.c @@ -54,6 +54,9 @@ struct rte_service_spec_impl { uint64_t cycles_spent; } __rte_cache_aligned; +/* Mask used to ensure uint64_t 8 byte vars are naturally aligned. */ +#define RTE_SERVICE_STAT_ALIGN_MASK (8 - 1) + /* the internal values of a service core */ struct core_state { /* map of services IDs are run on this core */ @@ -359,13 +362,29 @@ service_runner_do_callback(struct rte_service_spec_impl *s, { void *userdata = s->spec.callback_userdata; + /* Ensure the atomically stored variables are naturally aligned, +* as required for regular loads to be atomic. +*/ + RTE_BUILD_BUG_ON((offsetof(struct rte_service_spec_impl, calls) + & RTE_SERVICE_STAT_ALIGN_MASK) != 0); + RTE_BUILD_BUG_ON((offsetof(struct rte_service_spec_impl, cycles_spent) + & RTE_SERVICE_STAT_ALIGN_MASK) != 0); + if (service_stats_enabled(s)) { uint64_t start = rte_rdtsc(); s->spec.callback(userdata); uint64_t end = rte_rdtsc(); - s->cycles_spent += end - start; + uint64_t cycles = end - start; cs->calls_per_service[service_idx]++; - s->calls++; + if (service_mt_safe(s)) { + __atomic_fetch_add(&s->cycles_spent, cycles, __ATOMIC_RELAXED); + __atomic_fetch_add(&s->calls, 1, __ATOMIC_RELAXED); + } else { + uint64_t cycles_new = s->cycles_spent + cycles; + uint64_t calls_new = s->calls++; + __atomic_store_n(&s->cycles_spent, cycles_new, __ATOMIC_RELAXED); + __atomic_store_n(&s->calls, calls_new, __ATOMIC_RELAXED); + } } else s->spec.callback(userdata); } -- 2.32.0
[PATCH v3 1/2] test/service: add perf measurements for with stats mode
This commit improves the performance reporting of the service cores polling loop to show both with and without statistics collection modes. Collecting cycle statistics is costly, due to calls to rte_rdtsc() per service iteration. Reported-by: Mattias Rönnblom Suggested-by: Honnappa Nagarahalli Suggested-by: Morten Brørup Signed-off-by: Harry van Haaren --- This is split out as a seperate patch from the fix to allow measuring the before/after of the service stats atomic fixup. --- app/test/test_service_cores.c | 36 --- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c index ced6ed0081..7415b6b686 100644 --- a/app/test/test_service_cores.c +++ b/app/test/test_service_cores.c @@ -777,6 +777,22 @@ service_run_on_app_core_func(void *arg) return rte_service_run_iter_on_app_lcore(*delay_service_id, 1); } +static float +service_app_lcore_perf_measure(uint32_t id) +{ + /* Performance test: call in a loop, and measure tsc() */ + const uint32_t perf_iters = (1 << 12); + uint64_t start = rte_rdtsc(); + uint32_t i; + for (i = 0; i < perf_iters; i++) { + int err = service_run_on_app_core_func(&id); + TEST_ASSERT_EQUAL(0, err, "perf test: returned run failure"); + } + uint64_t end = rte_rdtsc(); + + return (end - start)/(float)perf_iters; +} + static int service_app_lcore_poll_impl(const int mt_safe) { @@ -828,17 +844,15 @@ service_app_lcore_poll_impl(const int mt_safe) "MT Unsafe: App core1 didn't return -EBUSY"); } - /* Performance test: call in a loop, and measure tsc() */ - const uint32_t perf_iters = (1 << 12); - uint64_t start = rte_rdtsc(); - uint32_t i; - for (i = 0; i < perf_iters; i++) { - int err = service_run_on_app_core_func(&id); - TEST_ASSERT_EQUAL(0, err, "perf test: returned run failure"); - } - uint64_t end = rte_rdtsc(); - printf("perf test for %s: %0.1f cycles per call\n", mt_safe ? - "MT Safe" : "MT Unsafe", (end - start)/(float)perf_iters); + /* Measure performance of no-stats and with-stats. */ + float cyc_no_stats = service_app_lcore_perf_measure(id); + + TEST_ASSERT_EQUAL(0, rte_service_set_stats_enable(id, 1), + "failed to enable stats for service."); + float cyc_with_stats = service_app_lcore_perf_measure(id); + + printf("perf test for %s, no stats: %0.1f, with stats %0.1f cycles/call\n", + mt_safe ? "MT Safe" : "MT Unsafe", cyc_no_stats, cyc_with_stats); unregister_all(); return TEST_SUCCESS; -- 2.32.0
[PATCH v3 2/2] service: fix potential stats race-condition on MT services
This commit fixes a potential racey-add that could occur if multiple service-lcores were executing the same MT-safe service at the same time, with service statistics collection enabled. Because multiple threads can run and execute the service, the stats values can have multiple writer threads, resulting in the requirement of using atomic addition for correctness. Note that when a MT unsafe service is executed, a spinlock is held, so the stats increments are protected. This fact is used to avoid executing atomic add instructions when not required. Regular reads and increments are used, and only the store is specified as atomic, reducing perf impact on e.g. x86 arch. This patch causes a 1.25x increase in cycle-cost for polling a MT safe service when statistics are enabled. No change was seen for MT unsafe services, or when statistics are disabled. Reported-by: Mattias Rönnblom Suggested-by: Honnappa Nagarahalli Suggested-by: Morten Brørup Suggested-by: Bruce Richardson Signed-off-by: Harry van Haaren --- v3: - Fix 32-bit build, by forcing natural alignment of uint64_t in the struct that contains it, using __rte_aligned(8) macro. - Note: I'm seeing a checkpatch "avoid externs in .c files" warning, but it doesn't make sense to me, so perhaps its a false-positive..? v2 (Thanks Honnappa, Morten, Bruce & Mattias for discussion): - Improved handling of stat stores to ensure they're atomic by using __atomic_store_n() with regular loads/increments. - Added BUILD_BUG_ON alignment checks for the uint64_t stats variables, tested with __rte_packed to ensure build breaks. --- lib/eal/common/rte_service.c | 31 +++ 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c index d2b7275ac0..94cb056196 100644 --- a/lib/eal/common/rte_service.c +++ b/lib/eal/common/rte_service.c @@ -50,10 +50,17 @@ struct rte_service_spec_impl { * on currently. */ uint32_t num_mapped_cores; - uint64_t calls; - uint64_t cycles_spent; + + /* 32-bit builds won't naturally align a uint64_t, so force alignment, +* allowing regular reads to be atomic. +*/ + uint64_t calls __rte_aligned(8); + uint64_t cycles_spent __rte_aligned(8); } __rte_cache_aligned; +/* Mask used to ensure uint64_t 8 byte vars are naturally aligned. */ +#define RTE_SERVICE_STAT_ALIGN_MASK (8 - 1) + /* the internal values of a service core */ struct core_state { /* map of services IDs are run on this core */ @@ -359,13 +366,29 @@ service_runner_do_callback(struct rte_service_spec_impl *s, { void *userdata = s->spec.callback_userdata; + /* Ensure the atomically stored variables are naturally aligned, +* as required for regular loads to be atomic. +*/ + RTE_BUILD_BUG_ON((offsetof(struct rte_service_spec_impl, calls) + & RTE_SERVICE_STAT_ALIGN_MASK) != 0); + RTE_BUILD_BUG_ON((offsetof(struct rte_service_spec_impl, cycles_spent) + & RTE_SERVICE_STAT_ALIGN_MASK) != 0); + if (service_stats_enabled(s)) { uint64_t start = rte_rdtsc(); s->spec.callback(userdata); uint64_t end = rte_rdtsc(); - s->cycles_spent += end - start; + uint64_t cycles = end - start; cs->calls_per_service[service_idx]++; - s->calls++; + if (service_mt_safe(s)) { + __atomic_fetch_add(&s->cycles_spent, cycles, __ATOMIC_RELAXED); + __atomic_fetch_add(&s->calls, 1, __ATOMIC_RELAXED); + } else { + uint64_t cycles_new = s->cycles_spent + cycles; + uint64_t calls_new = s->calls++; + __atomic_store_n(&s->cycles_spent, cycles_new, __ATOMIC_RELAXED); + __atomic_store_n(&s->calls, calls_new, __ATOMIC_RELAXED); + } } else s->spec.callback(userdata); } -- 2.32.0
[dpdk-dev] [PATCH] app/test-eventdev: fix terminal colour after control-c exit
Before this commit, a Control^C exit of the test-eventdev application would print the worker packet percentages, and leave the terminal with a green colour despite the colour reset being issued after the newline. By moving the colour reset command before the \n the issue is fixed. Fixes: 6b1a14a83a06 ("app/eventdev: add packet distribution logs") Signed-off-by: Harry van Haaren --- Given this is an aesthetic only fix, I feel its not worth backporting. Cc: pbhagavat...@marvell.com> --- app/test-eventdev/test_perf_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c index e0d9f05ecd..a1b8dd72ee 100644 --- a/app/test-eventdev/test_perf_common.c +++ b/app/test-eventdev/test_perf_common.c @@ -19,7 +19,7 @@ perf_test_result(struct evt_test *test, struct evt_options *opt) total += t->worker[i].processed_pkts; for (i = 0; i < t->nb_workers; i++) printf("Worker %d packets: "CLGRN"%"PRIx64" "CLNRM"percentage:" - CLGRN" %3.2f\n"CLNRM, i, + CLGRN" %3.2f"CLNRM"\n", i, t->worker[i].processed_pkts, (((double)t->worker[i].processed_pkts)/total) * 100); -- 2.30.2
[dpdk-dev] [PATCH v2 1/4] eventdev: add usage hints to port configure API
This commit introduces 3 flags to the port configuration flags. These flags allow the application to indicate what type of work is expected to be performed by an eventdev port. The three new flags are - RTE_EVENT_PORT_CFG_HINT_PRODUCER (mostly RTE_EVENT_OP_NEW events) - RTE_EVENT_PORT_CFG_HINT_CONSUMER (mostly RTE_EVENT_OP_RELEASE events) - RTE_EVENT_PORT_CFG_HINT_WORKER (mostly RTE_EVENT_OP_FORWARD events) These flags are only hints, and the PMDs must operate under the assumption that any port can enqueue an event with any type of op. Signed-off-by: Harry van Haaren --- v2: - Add note about PMD assumptions to all hints (Jerin) - Fixup title to eventdev: (Jerin) - Improve wording/readability of worker hint description. --- lib/eventdev/rte_eventdev.h | 32 1 file changed, 32 insertions(+) diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h index a9c496fb62..c20d0b0c86 100644 --- a/lib/eventdev/rte_eventdev.h +++ b/lib/eventdev/rte_eventdev.h @@ -709,6 +709,38 @@ rte_event_queue_attr_get(uint8_t dev_id, uint8_t queue_id, uint32_t attr_id, * * @see rte_event_port_setup(), rte_event_port_link() */ +#define RTE_EVENT_PORT_CFG_HINT_PRODUCER (1ULL << 2) +/**< Hint that this event port will primarily enqueue events to the system. + * A PMD can optimize its internal workings by assuming that this port is + * primarily going to enqueue NEW events. + * + * Note that this flag is only a hint, so PMDs must operate under the + * assumption that any port can enqueue an event with any type of op. + * + * @see rte_event_port_setup() + */ +#define RTE_EVENT_PORT_CFG_HINT_CONSUMER (1ULL << 3) +/**< Hint that this event port will primarily dequeue events from the system. + * A PMD can optimize its internal workings by assuming that this port is + * primarily going to consume events, and not enqueue FORWARD or RELEASE + * events. + * + * Note that this flag is only a hint, so PMDs must operate under the + * assumption that any port can enqueue an event with any type of op. + * + * @see rte_event_port_setup() + */ +#define RTE_EVENT_PORT_CFG_HINT_WORKER (1ULL << 4) +/**< Hint that this event port will primarily pass existing events through. + * A PMD can optimize its internal workings by assuming that this port is + * primarily going to FORWARD events, and not enqueue NEW or RELEASE events + * often. + * + * Note that this flag is only a hint, so PMDs must operate under the + * assumption that any port can enqueue an event with any type of op. + * + * @see rte_event_port_setup() + */ /** Event port configuration structure */ struct rte_event_port_conf { -- 2.30.2
[dpdk-dev] [PATCH v2 2/4] examples/eventdev_pipeline: use port config hints
This commit adds the per-port hints added to the eventdev API, indicating which eventdev ports will be used for producing, forwarding, or consuming events from the system. Signed-off-by: Harry van Haaren --- examples/eventdev_pipeline/pipeline_worker_generic.c | 2 ++ examples/eventdev_pipeline/pipeline_worker_tx.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/examples/eventdev_pipeline/pipeline_worker_generic.c b/examples/eventdev_pipeline/pipeline_worker_generic.c index 5ed0dc73ec..873ba13dfc 100644 --- a/examples/eventdev_pipeline/pipeline_worker_generic.c +++ b/examples/eventdev_pipeline/pipeline_worker_generic.c @@ -139,6 +139,7 @@ setup_eventdev_generic(struct worker_data *worker_data) .dequeue_depth = cdata.worker_cq_depth, .enqueue_depth = 64, .new_event_threshold = 4096, + .event_port_cfg = RTE_EVENT_PORT_CFG_HINT_WORKER, }; struct rte_event_queue_conf wkr_q_conf = { .schedule_type = cdata.queue_type, @@ -416,6 +417,7 @@ init_adapters(uint16_t nb_ports) .dequeue_depth = cdata.worker_cq_depth, .enqueue_depth = 64, .new_event_threshold = 4096, + .event_port_cfg = RTE_EVENT_PORT_CFG_HINT_PRODUCER, }; if (adptr_p_conf.new_event_threshold > dev_info.max_num_events) diff --git a/examples/eventdev_pipeline/pipeline_worker_tx.c b/examples/eventdev_pipeline/pipeline_worker_tx.c index ab8c6d6a0d..d760e2f6da 100644 --- a/examples/eventdev_pipeline/pipeline_worker_tx.c +++ b/examples/eventdev_pipeline/pipeline_worker_tx.c @@ -446,6 +446,7 @@ setup_eventdev_worker_tx_enq(struct worker_data *worker_data) .dequeue_depth = cdata.worker_cq_depth, .enqueue_depth = 64, .new_event_threshold = 4096, + .event_port_cfg = RTE_EVENT_PORT_CFG_HINT_WORKER, }; struct rte_event_queue_conf wkr_q_conf = { .schedule_type = cdata.queue_type, @@ -744,6 +745,7 @@ init_adapters(uint16_t nb_ports) .dequeue_depth = cdata.worker_cq_depth, .enqueue_depth = 64, .new_event_threshold = 4096, + .event_port_cfg = RTE_EVENT_PORT_CFG_HINT_PRODUCER, }; init_ports(nb_ports); -- 2.30.2
[dpdk-dev] [PATCH v2 3/4] test-eventdev: add event port hints for perf mode
This commit adds producer, worker and consumer port hints for the test-eventdev application performance tests. Signed-off-by: Harry van Haaren --- app/test-eventdev/test_perf_common.c | 18 ++ 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c index cc100650c2..766ea22a27 100644 --- a/app/test-eventdev/test_perf_common.c +++ b/app/test-eventdev/test_perf_common.c @@ -480,7 +480,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, w->processed_pkts = 0; w->latency = 0; - ret = rte_event_port_setup(opt->dev_id, port, port_conf); + struct rte_event_port_conf conf = *port_conf; + conf.event_port_cfg |= RTE_EVENT_PORT_CFG_HINT_WORKER; + + ret = rte_event_port_setup(opt->dev_id, port, &conf); if (ret) { evt_err("failed to setup port %d", port); return ret; @@ -500,7 +503,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, p->t = t; } - ret = perf_event_rx_adapter_setup(opt, stride, *port_conf); + struct rte_event_port_conf conf = *port_conf; + conf.event_port_cfg |= RTE_EVENT_PORT_CFG_HINT_PRODUCER; + + ret = perf_event_rx_adapter_setup(opt, stride, conf); if (ret) return ret; } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR) { @@ -525,8 +531,12 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, p->queue_id = prod * stride; p->t = t; - ret = rte_event_port_setup(opt->dev_id, port, - port_conf); + struct rte_event_port_conf conf = *port_conf; + conf.event_port_cfg |= + RTE_EVENT_PORT_CFG_HINT_PRODUCER | + RTE_EVENT_PORT_CFG_HINT_CONSUMER; + + ret = rte_event_port_setup(opt->dev_id, port, &conf); if (ret) { evt_err("failed to setup port %d", port); return ret; -- 2.30.2
[dpdk-dev] [PATCH v2 4/4] event/dlb2: optimize credit allocations using port hint flags
From: "Pathak, Pravin" This commit implements the changes required for using suggested port type hint feature. Each port uses different credit quanta based on port type specified using port configuration flags. Each port has separate quanta defined in dlb2_priv.h Producer and consumer ports will need larger quanta value to reduce number of credit calls they make. Workers can use small quanta as they mostly work out of locally cached credits and don't request/return credits often. Signed-off-by: Pathak, Pravin --- drivers/event/dlb2/dlb2.c | 61 ++--- drivers/event/dlb2/dlb2_priv.h | 12 +-- drivers/event/dlb2/pf/dlb2_pf.c | 1 + 3 files changed, 67 insertions(+), 7 deletions(-) diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c index 252bbd8d5e..f77f00c0bc 100644 --- a/drivers/event/dlb2/dlb2.c +++ b/drivers/event/dlb2/dlb2.c @@ -355,6 +355,26 @@ set_sw_credit_quanta(const char *key __rte_unused, return 0; } +static int +set_hw_credit_quanta(const char *key __rte_unused, + const char *value, + void *opaque) +{ + int *hw_credit_quanta = opaque; + int ret; + + if (value == NULL || opaque == NULL) { + DLB2_LOG_ERR("NULL pointer\n"); + return -EINVAL; + } + + ret = dlb2_string_to_int(hw_credit_quanta, value); + if (ret < 0) + return ret; + + return 0; +} + static int set_default_depth_thresh(const char *key __rte_unused, const char *value, @@ -769,7 +789,7 @@ dlb2_eventdev_configure(const struct rte_eventdev *dev) if (rsrcs->num_ldb_queues) rsrcs->num_ldb_credits = config->nb_events_limit; if (rsrcs->num_dir_ports) - rsrcs->num_dir_credits = config->nb_events_limit / 4; + rsrcs->num_dir_credits = config->nb_events_limit / 2; if (dlb2->num_dir_credits_override != -1) rsrcs->num_dir_credits = dlb2->num_dir_credits_override; } @@ -1693,6 +1713,7 @@ dlb2_eventdev_port_setup(struct rte_eventdev *dev, struct dlb2_eventdev *dlb2; struct dlb2_eventdev_port *ev_port; int ret; + uint32_t hw_credit_quanta, sw_credit_quanta; if (dev == NULL || port_conf == NULL) { DLB2_LOG_ERR("Null parameter\n"); @@ -1753,9 +1774,25 @@ dlb2_eventdev_port_setup(struct rte_eventdev *dev, RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL); ev_port->outstanding_releases = 0; ev_port->inflight_credits = 0; - ev_port->credit_update_quanta = dlb2->sw_credit_quanta; ev_port->dlb2 = dlb2; /* reverse link */ + /* Default for worker ports */ + sw_credit_quanta = dlb2->sw_credit_quanta; + hw_credit_quanta = dlb2->hw_credit_quanta; + + if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_PRODUCER) { + /* Producer type ports. Mostly enqueue */ + sw_credit_quanta = DLB2_SW_CREDIT_P_QUANTA_DEFAULT; + hw_credit_quanta = DLB2_SW_CREDIT_P_BATCH_SZ; + } + if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_CONSUMER) { + /* Consumer type ports. Mostly dequeue */ + sw_credit_quanta = DLB2_SW_CREDIT_C_QUANTA_DEFAULT; + hw_credit_quanta = DLB2_SW_CREDIT_C_BATCH_SZ; + } + ev_port->credit_update_quanta = sw_credit_quanta; + ev_port->qm_port.hw_credit_quanta = hw_credit_quanta; + /* Tear down pre-existing port->queue links */ if (dlb2->run_state == DLB2_RUN_STATE_STOPPED) dlb2_port_link_teardown(dlb2, &dlb2->ev_ports[ev_port_id]); @@ -2378,7 +2415,8 @@ dlb2_port_credits_get(struct dlb2_port *qm_port, enum dlb2_hw_queue_types type) { uint32_t credits = *qm_port->credit_pool[type]; - uint32_t batch_size = DLB2_SW_CREDIT_BATCH_SZ; + /* By default hw_credit_quanta is DLB2_SW_CREDIT_BATCH_SZ */ + uint32_t batch_size = qm_port->hw_credit_quanta; if (unlikely(credits < batch_size)) batch_size = credits; @@ -3112,7 +3150,7 @@ dlb2_event_release(struct dlb2_eventdev *dlb2, static inline void dlb2_port_credits_inc(struct dlb2_port *qm_port, int num) { - uint32_t batch_size = DLB2_SW_CREDIT_BATCH_SZ; + uint32_t batch_size = qm_port->hw_credit_quanta; /* increment port credits, and return to pool if exceeds threshold */ if (!qm_port->is_directed) { @@ -4446,6 +4484,7 @@ dlb2_primary_eventdev_probe(struct rte_eventdev *dev, dlb2->qm_instance.cos_id = dlb2_args->cos_id; dlb2->poll_interval = dlb2_args->poll_interval; dlb2->sw_credit_quanta = dlb2_args->sw_credit_quanta; + dlb2->hw_credit_quanta = dlb2_args->hw_credit_quanta; dlb2->default_depth_thresh = dlb2_args->default_depth_thresh; dlb2->vector_opts_enabled = dlb2_args->vector_opts_enabl
[PATCH 1/2] docs/bpf: fix formatting and link
Small improvements to the documentation based on Sphinx HTML doc output. Signed-off-by: Harry van Haaren --- Cc: konstantin.v.anan...@yandex.ru --- doc/guides/prog_guide/bpf_lib.rst | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/guides/prog_guide/bpf_lib.rst b/doc/guides/prog_guide/bpf_lib.rst index 1feb7734a3..1cf2d59429 100644 --- a/doc/guides/prog_guide/bpf_lib.rst +++ b/doc/guides/prog_guide/bpf_lib.rst @@ -10,7 +10,7 @@ user-space dpdk application. It supports basic set of features from eBPF spec. Please refer to the -`eBPF spec <https://www.kernel.org/doc/Documentation/networking/filter.txt>` +`eBPF spec <https://www.kernel.org/doc/Documentation/networking/filter.txt>`_ for more information. Also it introduces basic framework to load/unload BPF-based filters on eth devices (right now only via SW RX/TX callbacks). @@ -48,9 +48,9 @@ For example, ``(BPF_IND | BPF_W | BPF_LD)`` means: .. code-block:: c uint32_t tmp; -R0 = rte_pktmbuf_read((const struct rte_mbuf *)R6, src_reg + imm32, - sizeof(tmp), &tmp); -if (R0 == NULL) return FAILED; +R0 = rte_pktmbuf_read((const struct rte_mbuf *)R6, src_reg + imm32, sizeof(tmp), &tmp); +if (R0 == NULL) +return FAILED; R0 = ntohl(*(uint32_t *)R0); and ``R1-R5`` were scratched. -- 2.32.0
[PATCH 2/2] docs: improve ordering and remove old header titles
Move the "source_org" page to after overview, where it fits better to explain the source-code layout of DPDK, before getting into details of specific libraries such as EAL. Also removes the older titles from the 3 documents which still had them. Signed-off-by: Harry van Haaren --- doc/guides/prog_guide/index.rst | 2 +- doc/guides/prog_guide/overview.rst| 2 -- doc/guides/prog_guide/perf_opt_guidelines.rst | 2 -- doc/guides/prog_guide/source_org.rst | 2 -- 4 files changed, 1 insertion(+), 7 deletions(-) diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst index 6c7942b845..8564883018 100644 --- a/doc/guides/prog_guide/index.rst +++ b/doc/guides/prog_guide/index.rst @@ -10,6 +10,7 @@ Programmer's Guide intro overview +source_org env_abstraction_layer service_cores trace_lib @@ -67,7 +68,6 @@ Programmer's Guide bpf_lib ipsec_lib graph_lib -source_org build-sdk-meson meson_ut build_app diff --git a/doc/guides/prog_guide/overview.rst b/doc/guides/prog_guide/overview.rst index 537a568f1e..a3b6603320 100644 --- a/doc/guides/prog_guide/overview.rst +++ b/doc/guides/prog_guide/overview.rst @@ -1,8 +1,6 @@ .. SPDX-License-Identifier: BSD-3-Clause Copyright(c) 2010-2014 Intel Corporation. -**Part 1: Architecture Overview** - Overview diff --git a/doc/guides/prog_guide/perf_opt_guidelines.rst b/doc/guides/prog_guide/perf_opt_guidelines.rst index 88f92909c1..360009f2e4 100644 --- a/doc/guides/prog_guide/perf_opt_guidelines.rst +++ b/doc/guides/prog_guide/perf_opt_guidelines.rst @@ -3,8 +3,6 @@ .. _Performance_Optimization: -**Part 3: Performance Optimization** - Performance Optimization Guidelines === diff --git a/doc/guides/prog_guide/source_org.rst b/doc/guides/prog_guide/source_org.rst index 5e4333460a..3d5f7f3bdd 100644 --- a/doc/guides/prog_guide/source_org.rst +++ b/doc/guides/prog_guide/source_org.rst @@ -1,8 +1,6 @@ .. SPDX-License-Identifier: BSD-3-Clause Copyright(c) 2010-2014 Intel Corporation. -**Part 2: Development Environment** - Source Organization === -- 2.32.0
[dpdk-dev] [PATCH] eventdev: remove return value comment in pmd api
The PMD info get API has a void return type. Remove the @return 0 Success doxygen comment as it doesn't make sense here. Reported-by: Fredrik A Lindgren Signed-off-by: Harry van Haaren --- lib/librte_eventdev/eventdev_pmd.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/librte_eventdev/eventdev_pmd.h b/lib/librte_eventdev/eventdev_pmd.h index 9e83993efa..7eb9a77393 100644 --- a/lib/librte_eventdev/eventdev_pmd.h +++ b/lib/librte_eventdev/eventdev_pmd.h @@ -158,9 +158,6 @@ rte_event_pmd_is_valid_dev(uint8_t dev_id) * Event device pointer * @param dev_info * Event device information structure - * - * @return - * Returns 0 on success */ typedef void (*eventdev_info_get_t)(struct rte_eventdev *dev, struct rte_event_dev_info *dev_info); -- 2.25.1
[dpdk-dev] [PATCH v3 1/3] service: add component useful work attribute
This commit adds a new attribute which allows the service to indicate if the previous iteration of work was "useful". Useful work here implies forward progress was made. Exposing this information via an attribute to the application allows tracking of CPU cycles as being useful or not-useful, and a CPU load estimate can be deduced from that information. Signed-off-by: Harry van Haaren --- lib/librte_eal/common/rte_service.c | 19 +++ lib/librte_eal/include/rte_service.h | 5 + .../include/rte_service_component.h | 13 + lib/librte_eal/version.map| 3 +++ 4 files changed, 40 insertions(+) diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c index bd8fb72e78..859fc3 100644 --- a/lib/librte_eal/common/rte_service.c +++ b/lib/librte_eal/common/rte_service.c @@ -58,6 +58,7 @@ struct rte_service_spec_impl { uint32_t num_mapped_cores; uint64_t calls; uint64_t cycles_spent; + uint8_t useful_work_last_iter; } __rte_cache_aligned; /* the internal values of a service core */ @@ -294,6 +295,21 @@ rte_service_component_unregister(uint32_t id) return 0; } +int32_t +rte_service_component_attr_set(uint32_t id, uint32_t attr, uint64_t value) +{ + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + + switch (attr) { + case RTE_SERVICE_ATTR_USEFUL_WORK_LAST_ITER: + s->useful_work_last_iter = value; + return 0; + default: + return -EINVAL; + }; +} + int32_t rte_service_component_runstate_set(uint32_t id, uint32_t runstate) { @@ -799,6 +815,9 @@ rte_service_attr_get(uint32_t id, uint32_t attr_id, uint64_t *attr_value) return -EINVAL; switch (attr_id) { + case RTE_SERVICE_ATTR_USEFUL_WORK_LAST_ITER: + *attr_value = s->useful_work_last_iter; + return 0; case RTE_SERVICE_ATTR_CYCLES: *attr_value = s->cycles_spent; return 0; diff --git a/lib/librte_eal/include/rte_service.h b/lib/librte_eal/include/rte_service.h index ca9950d091..d50b5c8d7a 100644 --- a/lib/librte_eal/include/rte_service.h +++ b/lib/librte_eal/include/rte_service.h @@ -390,6 +390,11 @@ int32_t rte_service_dump(FILE *f, uint32_t id); */ #define RTE_SERVICE_ATTR_CALL_COUNT 1 +/** + * Returns if the last iteration of the service resulted in useful work done. + */ +#define RTE_SERVICE_ATTR_USEFUL_WORK_LAST_ITER 2 + /** * Get an attribute from a service. * diff --git a/lib/librte_eal/include/rte_service_component.h b/lib/librte_eal/include/rte_service_component.h index 9e66ee7e29..534f41f531 100644 --- a/lib/librte_eal/include/rte_service_component.h +++ b/lib/librte_eal/include/rte_service_component.h @@ -87,6 +87,19 @@ int32_t rte_service_component_register(const struct rte_service_spec *spec, */ int32_t rte_service_component_unregister(uint32_t id); +/** + * Set an attribute for this service. + * + * Note this API is to be called by the service implementation, to make the + * statistic available via the usual attr_get() service APIs. + * + * @retval 0 Success + * @retval -EINVAL Invalid service id or attribute provided + */ +__rte_experimental +int32_t rte_service_component_attr_set(uint32_t id, uint32_t attr, + uint64_t value); + /** * Private function to allow EAL to initialized default mappings. * diff --git a/lib/librte_eal/version.map b/lib/librte_eal/version.map index fce90a112f..e60eaa3dd9 100644 --- a/lib/librte_eal/version.map +++ b/lib/librte_eal/version.map @@ -412,6 +412,9 @@ EXPERIMENTAL { rte_thread_tls_key_delete; rte_thread_tls_value_get; rte_thread_tls_value_set; + + # added in 21.05 + rte_service_component_attr_set; }; INTERNAL { -- 2.25.1
[dpdk-dev] [PATCH v3 3/3] event/sw: add xstat for work done in last iteration
Today it is difficult to know what Eventdev ports recieved work from the scheduling core. Sometimes it is useful to know where work has been scheduled. This patch implements an xstat for the SW PMD, which provides a bitmask of ports that were scheduled to. If the SW PMD instance has more than 64 ports, always report that a port got an event. Signed-off-by: Harry van Haaren --- Note most of the changes here are unit-test changes to add a statistic to the PMD. The actual "useful code" is a mere handful of lines in a lot of noise.. could split into 2 patches? --- drivers/event/sw/sw_evdev.h | 1 + drivers/event/sw/sw_evdev_scheduler.c | 12 drivers/event/sw/sw_evdev_selftest.c | 27 ++- drivers/event/sw/sw_evdev_xstats.c| 6 +- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 5ab6465c83..5dfa4508b3 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -259,6 +259,7 @@ struct sw_evdev { uint64_t sched_no_iq_enqueues; uint64_t sched_no_cq_enqueues; uint64_t sched_cq_qid_called; + uint64_t sched_last_iter_bitmask; uint8_t started; uint32_t credit_update_quanta; diff --git a/drivers/event/sw/sw_evdev_scheduler.c b/drivers/event/sw/sw_evdev_scheduler.c index c78f687446..3ee1188be0 100644 --- a/drivers/event/sw/sw_evdev_scheduler.c +++ b/drivers/event/sw/sw_evdev_scheduler.c @@ -566,6 +566,8 @@ sw_event_schedule(struct rte_eventdev *dev) rte_service_component_attr_set(sw->service_id, RTE_SERVICE_ATTR_USEFUL_WORK_LAST_ITER, work_done); + uint64_t cqs_scheds_last_iter = 0; + /* push all the internal buffered QEs in port->cq_ring to the * worker cores: aka, do the ring transfers batched. */ @@ -585,6 +587,7 @@ sw_event_schedule(struct rte_eventdev *dev) &sw->cq_ring_space[i]); port->cq_buf_count = 0; no_enq = 0; + cqs_scheds_last_iter |= (1ULL << i); } else { sw->cq_ring_space[i] = rte_event_ring_free_count(worker) - @@ -604,4 +607,13 @@ sw_event_schedule(struct rte_eventdev *dev) sw->sched_min_burst = sw->sched_min_burst_size; } + /* Provide stats on what eventdev ports were scheduled to this +* iteration. If more than 64 ports are active, always report that +* all Eventdev ports have been scheduled events. +*/ + if (likely(sw->port_count < 64)) { + sw->sched_last_iter_bitmask = cqs_scheds_last_iter; + } else { + sw->sched_last_iter_bitmask = UINT64_MAX; + } } diff --git a/drivers/event/sw/sw_evdev_selftest.c b/drivers/event/sw/sw_evdev_selftest.c index e4bfb3a0f1..7dd35cb22e 100644 --- a/drivers/event/sw/sw_evdev_selftest.c +++ b/drivers/event/sw/sw_evdev_selftest.c @@ -873,15 +873,15 @@ xstats_tests(struct test *t) int ret = rte_event_dev_xstats_names_get(evdev, RTE_EVENT_DEV_XSTATS_DEVICE, 0, xstats_names, ids, XSTATS_MAX); - if (ret != 6) { - printf("%d: expected 6 stats, got return %d\n", __LINE__, ret); + if (ret != 7) { + printf("%d: expected 7 stats, got return %d\n", __LINE__, ret); return -1; } ret = rte_event_dev_xstats_get(evdev, RTE_EVENT_DEV_XSTATS_DEVICE, 0, ids, values, ret); - if (ret != 6) { - printf("%d: expected 6 stats, got return %d\n", __LINE__, ret); + if (ret != 7) { + printf("%d: expected 7 stats, got return %d\n", __LINE__, ret); return -1; } @@ -959,7 +959,7 @@ xstats_tests(struct test *t) ret = rte_event_dev_xstats_get(evdev, RTE_EVENT_DEV_XSTATS_DEVICE, 0, ids, values, num_stats); - static const uint64_t expected[] = {3, 3, 0, 1, 0, 0}; + static const uint64_t expected[] = {3, 3, 0, 1, 0, 0, 4}; for (i = 0; (signed int)i < ret; i++) { if (expected[i] != values[i]) { printf( @@ -975,7 +975,7 @@ xstats_tests(struct test *t) 0, NULL, 0); /* ensure reset statistics are zero-ed */ - static const uint64_t expected_zero[] = {0, 0, 0, 0, 0, 0}; + static const uint64_t expected_zero[] = {0, 0, 0, 0, 0, 0, 0}; ret = rte_event_dev_xstats_get(evdev, RTE_EVENT_DEV_XSTAT
[dpdk-dev] [PATCH v3 2/3] event/sw: add useful work done attribute
This commit exposes if useful work is done to the service instance. The normal service_attr_get() API can be used to retrieve the value of the attribute. Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev_scheduler.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/event/sw/sw_evdev_scheduler.c b/drivers/event/sw/sw_evdev_scheduler.c index f747b3c6d4..c78f687446 100644 --- a/drivers/event/sw/sw_evdev_scheduler.c +++ b/drivers/event/sw/sw_evdev_scheduler.c @@ -5,6 +5,9 @@ #include #include #include + +#include + #include "sw_evdev.h" #include "iq_chunk.h" #include "event_ring.h" @@ -559,6 +562,10 @@ sw_event_schedule(struct rte_eventdev *dev) sw->sched_no_iq_enqueues += (in_pkts_total == 0); sw->sched_no_cq_enqueues += (out_pkts_total == 0); + uint64_t work_done = (in_pkts_total + out_pkts_total) != 0; + rte_service_component_attr_set(sw->service_id, + RTE_SERVICE_ATTR_USEFUL_WORK_LAST_ITER, work_done); + /* push all the internal buffered QEs in port->cq_ring to the * worker cores: aka, do the ring transfers batched. */ -- 2.25.1
[dpdk-dev] [PATCH v4] event/sw: add xstats to expose progress details
Today it is difficult to know if the SW Eventdev PMD is making forward progress when it runs an iteration of its service. This commit adds two xstats to give better visibility to the application. The new xstats provide an application with which Eventdev ports recieved work in the last iteration of scheduling, as well if forward progress was made by the scheduler. This patch implements an xstat for the SW PMD that exposes a bitmask of ports that were scheduled to. In the unlikely case that the SW PMD instance has 64 or more ports, return UINT64_MAX. Signed-off-by: Harry van Haaren --- v3: - Simplify all metrics to Event SW PMD v2: - Fixup printf() %ld to PRIu64 Note most of the changes here are unit-test changes to add a statistic to the PMD. The actual "useful code" is a mere handful of lines in a lot of noise. --- drivers/event/sw/sw_evdev.h | 2 ++ drivers/event/sw/sw_evdev_scheduler.c | 15 ++ drivers/event/sw/sw_evdev_selftest.c | 28 ++- drivers/event/sw/sw_evdev_xstats.c| 9 - 4 files changed, 40 insertions(+), 14 deletions(-) diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 5ab6465c83..33645bd1df 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -259,6 +259,8 @@ struct sw_evdev { uint64_t sched_no_iq_enqueues; uint64_t sched_no_cq_enqueues; uint64_t sched_cq_qid_called; + uint64_t sched_last_iter_bitmask; + uint8_t sched_progress_last_iter; uint8_t started; uint32_t credit_update_quanta; diff --git a/drivers/event/sw/sw_evdev_scheduler.c b/drivers/event/sw/sw_evdev_scheduler.c index f747b3c6d4..d3a6bd5cda 100644 --- a/drivers/event/sw/sw_evdev_scheduler.c +++ b/drivers/event/sw/sw_evdev_scheduler.c @@ -559,6 +559,11 @@ sw_event_schedule(struct rte_eventdev *dev) sw->sched_no_iq_enqueues += (in_pkts_total == 0); sw->sched_no_cq_enqueues += (out_pkts_total == 0); + uint64_t work_done = (in_pkts_total + out_pkts_total) != 0; + sw->sched_progress_last_iter = work_done; + + uint64_t cqs_scheds_last_iter = 0; + /* push all the internal buffered QEs in port->cq_ring to the * worker cores: aka, do the ring transfers batched. */ @@ -578,6 +583,7 @@ sw_event_schedule(struct rte_eventdev *dev) &sw->cq_ring_space[i]); port->cq_buf_count = 0; no_enq = 0; + cqs_scheds_last_iter |= (1ULL << i); } else { sw->cq_ring_space[i] = rte_event_ring_free_count(worker) - @@ -597,4 +603,13 @@ sw_event_schedule(struct rte_eventdev *dev) sw->sched_min_burst = sw->sched_min_burst_size; } + /* Provide stats on what eventdev ports were scheduled to this +* iteration. If more than 64 ports are active, always report that +* all Eventdev ports have been scheduled events. +*/ + if (likely(sw->port_count < 64)) { + sw->sched_last_iter_bitmask = cqs_scheds_last_iter; + } else { + sw->sched_last_iter_bitmask = UINT64_MAX; + } } diff --git a/drivers/event/sw/sw_evdev_selftest.c b/drivers/event/sw/sw_evdev_selftest.c index e4bfb3a0f1..d53e903129 100644 --- a/drivers/event/sw/sw_evdev_selftest.c +++ b/drivers/event/sw/sw_evdev_selftest.c @@ -873,15 +873,15 @@ xstats_tests(struct test *t) int ret = rte_event_dev_xstats_names_get(evdev, RTE_EVENT_DEV_XSTATS_DEVICE, 0, xstats_names, ids, XSTATS_MAX); - if (ret != 6) { - printf("%d: expected 6 stats, got return %d\n", __LINE__, ret); + if (ret != 8) { + printf("%d: expected 8 stats, got return %d\n", __LINE__, ret); return -1; } ret = rte_event_dev_xstats_get(evdev, RTE_EVENT_DEV_XSTATS_DEVICE, 0, ids, values, ret); - if (ret != 6) { - printf("%d: expected 6 stats, got return %d\n", __LINE__, ret); + if (ret != 8) { + printf("%d: expected 8 stats, got return %d\n", __LINE__, ret); return -1; } @@ -959,7 +959,7 @@ xstats_tests(struct test *t) ret = rte_event_dev_xstats_get(evdev, RTE_EVENT_DEV_XSTATS_DEVICE, 0, ids, values, num_stats); - static const uint64_t expected[] = {3, 3, 0, 1, 0, 0}; + static const uint64_t expected[] = {3, 3, 0, 1, 0, 0, 4, 1}; for (i = 0; (signed int)i < ret; i++) { if (expec
[PATCH] service: split tests to perf and autotest to avoid spurious CI failures
On some CI runs, some service-cores tests spuriously fail as the service lcore thread is not actually scheduled by the OS in the given amount of time. Increasing timeouts has not resolved the issue in the CI, so the solution in this patch is to move them to a separate perf test suite. Signed-off-by: Harry van Haaren --- See DPDK ML discussion in this thread: http://mails.dpdk.org/archives/dev/2023-February/263523.html --- app/test/meson.build | 1 + app/test/test_service_cores.c | 32 +++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/app/test/meson.build b/app/test/meson.build index f34d19e3c3..2db5ccf4ff 100644 --- a/app/test/meson.build +++ b/app/test/meson.build @@ -287,6 +287,7 @@ perf_test_names = [ 'pie_perf', 'distributor_perf_autotest', 'pmd_perf_autotest', +'service_perf_autotest', 'stack_perf_autotest', 'stack_lf_perf_autotest', 'rand_perf_autotest', diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c index 637fcd7cf9..06653dfdef 100644 --- a/app/test/test_service_cores.c +++ b/app/test/test_service_cores.c @@ -1022,17 +1022,12 @@ static struct unit_test_suite service_tests = { TEST_CASE_ST(dummy_register, NULL, service_name), TEST_CASE_ST(dummy_register, NULL, service_get_by_name), TEST_CASE_ST(dummy_register, NULL, service_dump), - TEST_CASE_ST(dummy_register, NULL, service_attr_get), - TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get), TEST_CASE_ST(dummy_register, NULL, service_probe_capability), TEST_CASE_ST(dummy_register, NULL, service_start_stop), TEST_CASE_ST(dummy_register, NULL, service_lcore_add_del), - TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop), TEST_CASE_ST(dummy_register, NULL, service_lcore_en_dis_able), TEST_CASE_ST(dummy_register, NULL, service_mt_unsafe_poll), TEST_CASE_ST(dummy_register, NULL, service_mt_safe_poll), - TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_safe), - TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_unsafe), TEST_CASE_ST(dummy_register, NULL, service_may_be_active), TEST_CASE_ST(dummy_register, NULL, service_active_two_cores), TEST_CASES_END() /**< NULL terminate unit test array */ @@ -1046,3 +1041,30 @@ test_service_common(void) } REGISTER_TEST_COMMAND(service_autotest, test_service_common); + + +/* The tests below have been split from the auto-test suite, as the + * when they are run in a cloud CI environment they can give false-positive + * errors, due to the service-cores not being scheduled by the OS. + */ +static struct unit_test_suite service_perf_tests = { + .suite_name = "service core test suite", + .setup = testsuite_setup, + .teardown = testsuite_teardown, + .unit_test_cases = { + TEST_CASE_ST(dummy_register, NULL, service_attr_get), + TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get), + TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop), + TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_safe), + TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_unsafe), + TEST_CASES_END() /**< NULL terminate unit test array */ + } +}; + +static int +test_service_perf(void) +{ + return unit_test_suite_runner(&service_perf_tests); +} + +REGISTER_TEST_COMMAND(service_perf_autotest, test_service_perf); -- 2.34.1
[PATCH v2] service: split tests to perf and autotest to avoid spurious CI failures
On some CI runs, some service-cores tests spuriously fail as the service lcore thread is not actually scheduled by the OS in the given amount of time. Increasing timeouts has not resolved the issue in the CI, so the solution in this patch is to move them to a separate perf test suite. Signed-off-by: Harry van Haaren --- v2: - Add "performance" to suite name (David) - Remove comment above perf-test suite (David) See DPDK ML discussion in this thread: http://mails.dpdk.org/archives/dev/2023-February/263523.html --- app/test/meson.build | 1 + app/test/test_service_cores.c | 27 ++- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/app/test/meson.build b/app/test/meson.build index f34d19e3c3..2db5ccf4ff 100644 --- a/app/test/meson.build +++ b/app/test/meson.build @@ -287,6 +287,7 @@ perf_test_names = [ 'pie_perf', 'distributor_perf_autotest', 'pmd_perf_autotest', +'service_perf_autotest', 'stack_perf_autotest', 'stack_lf_perf_autotest', 'rand_perf_autotest', diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c index 637fcd7cf9..c8b6a27c69 100644 --- a/app/test/test_service_cores.c +++ b/app/test/test_service_cores.c @@ -1022,17 +1022,12 @@ static struct unit_test_suite service_tests = { TEST_CASE_ST(dummy_register, NULL, service_name), TEST_CASE_ST(dummy_register, NULL, service_get_by_name), TEST_CASE_ST(dummy_register, NULL, service_dump), - TEST_CASE_ST(dummy_register, NULL, service_attr_get), - TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get), TEST_CASE_ST(dummy_register, NULL, service_probe_capability), TEST_CASE_ST(dummy_register, NULL, service_start_stop), TEST_CASE_ST(dummy_register, NULL, service_lcore_add_del), - TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop), TEST_CASE_ST(dummy_register, NULL, service_lcore_en_dis_able), TEST_CASE_ST(dummy_register, NULL, service_mt_unsafe_poll), TEST_CASE_ST(dummy_register, NULL, service_mt_safe_poll), - TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_safe), - TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_unsafe), TEST_CASE_ST(dummy_register, NULL, service_may_be_active), TEST_CASE_ST(dummy_register, NULL, service_active_two_cores), TEST_CASES_END() /**< NULL terminate unit test array */ @@ -1046,3 +1041,25 @@ test_service_common(void) } REGISTER_TEST_COMMAND(service_autotest, test_service_common); + +static struct unit_test_suite service_perf_tests = { + .suite_name = "service core performance test suite", + .setup = testsuite_setup, + .teardown = testsuite_teardown, + .unit_test_cases = { + TEST_CASE_ST(dummy_register, NULL, service_attr_get), + TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get), + TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop), + TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_safe), + TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_unsafe), + TEST_CASES_END() /**< NULL terminate unit test array */ + } +}; + +static int +test_service_perf(void) +{ + return unit_test_suite_runner(&service_perf_tests); +} + +REGISTER_TEST_COMMAND(service_perf_autotest, test_service_perf); -- 2.34.1
[PATCH] test/service: fix spurious failures by extending timeout
This commit extends the timeout for service_may_be_active() from 100ms to 1000ms. Local testing on a idle and loaded system (compiling DPDK with all cores) always completes after 1 ms. The same timeout waiting code was duplicated in two tests, and is now refactored to a standalone function avoiding duplication. Reported-by: David Marchand Suggested-by: Mattias Ronnblom Signed-off-by: Harry van Haaren --- app/test/test_service_cores.c | 43 --- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c index 359b6dcd8b..5260e078c4 100644 --- a/app/test/test_service_cores.c +++ b/app/test/test_service_cores.c @@ -921,12 +921,26 @@ service_lcore_start_stop(void) return unregister_all(); } +static int +service_ensure_stopped_with_timeout(uint32_t sid) +{ + /* give the service time to stop running */ + int32_t timeout_ms = 1000; + int i; + for (i = 0; i < timeout_ms; i++) { + if (!rte_service_may_be_active(sid)) + break; + rte_delay_ms(SERVICE_DELAY); + } + + return rte_service_may_be_active(sid); +} + /* stop a service and wait for it to become inactive */ static int service_may_be_active(void) { const uint32_t sid = 0; - int i; /* expected failure cases */ TEST_ASSERT_EQUAL(-EINVAL, rte_service_may_be_active(1), @@ -946,19 +960,11 @@ service_may_be_active(void) TEST_ASSERT_EQUAL(1, service_lcore_running_check(), "Service core expected to poll service but it didn't"); - /* stop the service */ + /* stop the service, and wait for not-active with timeout */ TEST_ASSERT_EQUAL(0, rte_service_runstate_set(sid, 0), "Error: Service stop returned non-zero"); - - /* give the service 100ms to stop running */ - for (i = 0; i < 100; i++) { - if (!rte_service_may_be_active(sid)) - break; - rte_delay_ms(SERVICE_DELAY); - } - - TEST_ASSERT_EQUAL(0, rte_service_may_be_active(sid), - "Error: Service not stopped after 100ms"); + TEST_ASSERT_EQUAL(0, service_ensure_stopped_with_timeout(sid), + "Error: Service not stopped after timeout period."); return unregister_all(); } @@ -972,7 +978,6 @@ service_active_two_cores(void) return TEST_SKIPPED; const uint32_t sid = 0; - int i; uint32_t lcore = rte_get_next_lcore(/* start core */ -1, /* skip main */ 1, @@ -1002,16 +1007,8 @@ service_active_two_cores(void) /* stop the service */ TEST_ASSERT_EQUAL(0, rte_service_runstate_set(sid, 0), "Error: Service stop returned non-zero"); - - /* give the service 100ms to stop running */ - for (i = 0; i < 100; i++) { - if (!rte_service_may_be_active(sid)) - break; - rte_delay_ms(SERVICE_DELAY); - } - - TEST_ASSERT_EQUAL(0, rte_service_may_be_active(sid), - "Error: Service not stopped after 100ms"); + TEST_ASSERT_EQUAL(0, service_ensure_stopped_with_timeout(sid), + "Error: Service not stopped after timeout period."); return unregister_all(); } -- 2.34.1
[PATCH v2] test/service: fix spurious failures by extending timeout
This commit extends the timeout for service_may_be_active() from 100ms to 1000ms. Local testing on a idle and loaded system (compiling DPDK with all cores) always completes after 1 ms. The wait time for a service-lcore to finish is also extended from 100ms to 1000ms. The same timeout waiting code was duplicated in two tests, and is now refactored to a standalone function avoiding duplication. Reported-by: David Marchand Suggested-by: Mattias Ronnblom Signed-off-by: Harry van Haaren --- Apologies for the quick respin noise; only the first diff-section is added, no changes to the rest of the patch. v2: - v1 addressed only testcase 15 issue, v2 also addresses test case 5, which has an service-lcore wait code-path. --- app/test/test_service_cores.c | 47 --- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c index 359b6dcd8b..4b147bd64c 100644 --- a/app/test/test_service_cores.c +++ b/app/test/test_service_cores.c @@ -123,14 +123,14 @@ unregister_all(void) return TEST_SUCCESS; } -/* Wait until service lcore not active, or for 100x SERVICE_DELAY */ +/* Wait until service lcore not active, or for N times SERVICE_DELAY */ static void wait_slcore_inactive(uint32_t slcore_id) { int i; for (i = 0; rte_service_lcore_may_be_active(slcore_id) == 1 && - i < 100; i++) + i < 1000; i++) rte_delay_ms(SERVICE_DELAY); } @@ -921,12 +921,26 @@ service_lcore_start_stop(void) return unregister_all(); } +static int +service_ensure_stopped_with_timeout(uint32_t sid) +{ + /* give the service time to stop running */ + int32_t timeout_ms = 1000; + int i; + for (i = 0; i < timeout_ms; i++) { + if (!rte_service_may_be_active(sid)) + break; + rte_delay_ms(SERVICE_DELAY); + } + + return rte_service_may_be_active(sid); +} + /* stop a service and wait for it to become inactive */ static int service_may_be_active(void) { const uint32_t sid = 0; - int i; /* expected failure cases */ TEST_ASSERT_EQUAL(-EINVAL, rte_service_may_be_active(1), @@ -946,19 +960,11 @@ service_may_be_active(void) TEST_ASSERT_EQUAL(1, service_lcore_running_check(), "Service core expected to poll service but it didn't"); - /* stop the service */ + /* stop the service, and wait for not-active with timeout */ TEST_ASSERT_EQUAL(0, rte_service_runstate_set(sid, 0), "Error: Service stop returned non-zero"); - - /* give the service 100ms to stop running */ - for (i = 0; i < 100; i++) { - if (!rte_service_may_be_active(sid)) - break; - rte_delay_ms(SERVICE_DELAY); - } - - TEST_ASSERT_EQUAL(0, rte_service_may_be_active(sid), - "Error: Service not stopped after 100ms"); + TEST_ASSERT_EQUAL(0, service_ensure_stopped_with_timeout(sid), + "Error: Service not stopped after timeout period."); return unregister_all(); } @@ -972,7 +978,6 @@ service_active_two_cores(void) return TEST_SKIPPED; const uint32_t sid = 0; - int i; uint32_t lcore = rte_get_next_lcore(/* start core */ -1, /* skip main */ 1, @@ -1002,16 +1007,8 @@ service_active_two_cores(void) /* stop the service */ TEST_ASSERT_EQUAL(0, rte_service_runstate_set(sid, 0), "Error: Service stop returned non-zero"); - - /* give the service 100ms to stop running */ - for (i = 0; i < 100; i++) { - if (!rte_service_may_be_active(sid)) - break; - rte_delay_ms(SERVICE_DELAY); - } - - TEST_ASSERT_EQUAL(0, rte_service_may_be_active(sid), - "Error: Service not stopped after 100ms"); + TEST_ASSERT_EQUAL(0, service_ensure_stopped_with_timeout(sid), + "Error: Service not stopped after timeout period."); return unregister_all(); } -- 2.34.1
[PATCH v3] test/service: fix spurious failures by extending timeout
This commit extends the timeout for service_may_be_active() from 100ms to 1000ms. Local testing on a idle and loaded system (compiling DPDK with all cores) always completes after 1 ms. The wait time for a service-lcore to finish is also extended from 100ms to 1000ms. The same timeout waiting code was duplicated in two tests, and is now refactored to a standalone function avoiding duplication. Reported-by: David Marchand Suggested-by: Mattias Ronnblom Signed-off-by: Harry van Haaren --- v3: - Use #define for timeout, and delay(1) (Mattias) - Rework slcore-wait to use TIMEOUT_MS as well. v2: - v1 addressed only testcase 15 issue, v2 also addresses test case 5, which has an service-lcore wait code-path. --- app/test/test_service_cores.c | 49 --- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c index 359b6dcd8b..637fcd7cf9 100644 --- a/app/test/test_service_cores.c +++ b/app/test/test_service_cores.c @@ -22,6 +22,7 @@ static uint64_t service_tick; static uint32_t service_remote_launch_flag; #define SERVICE_DELAY 1 +#define TIMEOUT_MS 1000 #define DUMMY_SERVICE_NAME "dummy_service" #define MT_SAFE_SERVICE_NAME "mt_safe_service" @@ -123,15 +124,15 @@ unregister_all(void) return TEST_SUCCESS; } -/* Wait until service lcore not active, or for 100x SERVICE_DELAY */ +/* Wait until service lcore not active, or for TIMEOUT_MS */ static void wait_slcore_inactive(uint32_t slcore_id) { int i; for (i = 0; rte_service_lcore_may_be_active(slcore_id) == 1 && - i < 100; i++) - rte_delay_ms(SERVICE_DELAY); + i < TIMEOUT_MS; i++) + rte_delay_ms(1); } /* register a single dummy service */ @@ -921,12 +922,25 @@ service_lcore_start_stop(void) return unregister_all(); } +static int +service_ensure_stopped_with_timeout(uint32_t sid) +{ + /* give the service time to stop running */ + int i; + for (i = 0; i < TIMEOUT_MS; i++) { + if (!rte_service_may_be_active(sid)) + break; + rte_delay_ms(1); + } + + return rte_service_may_be_active(sid); +} + /* stop a service and wait for it to become inactive */ static int service_may_be_active(void) { const uint32_t sid = 0; - int i; /* expected failure cases */ TEST_ASSERT_EQUAL(-EINVAL, rte_service_may_be_active(1), @@ -946,19 +960,11 @@ service_may_be_active(void) TEST_ASSERT_EQUAL(1, service_lcore_running_check(), "Service core expected to poll service but it didn't"); - /* stop the service */ + /* stop the service, and wait for not-active with timeout */ TEST_ASSERT_EQUAL(0, rte_service_runstate_set(sid, 0), "Error: Service stop returned non-zero"); - - /* give the service 100ms to stop running */ - for (i = 0; i < 100; i++) { - if (!rte_service_may_be_active(sid)) - break; - rte_delay_ms(SERVICE_DELAY); - } - - TEST_ASSERT_EQUAL(0, rte_service_may_be_active(sid), - "Error: Service not stopped after 100ms"); + TEST_ASSERT_EQUAL(0, service_ensure_stopped_with_timeout(sid), + "Error: Service not stopped after timeout period."); return unregister_all(); } @@ -972,7 +978,6 @@ service_active_two_cores(void) return TEST_SKIPPED; const uint32_t sid = 0; - int i; uint32_t lcore = rte_get_next_lcore(/* start core */ -1, /* skip main */ 1, @@ -1002,16 +1007,8 @@ service_active_two_cores(void) /* stop the service */ TEST_ASSERT_EQUAL(0, rte_service_runstate_set(sid, 0), "Error: Service stop returned non-zero"); - - /* give the service 100ms to stop running */ - for (i = 0; i < 100; i++) { - if (!rte_service_may_be_active(sid)) - break; - rte_delay_ms(SERVICE_DELAY); - } - - TEST_ASSERT_EQUAL(0, rte_service_may_be_active(sid), - "Error: Service not stopped after 100ms"); + TEST_ASSERT_EQUAL(0, service_ensure_stopped_with_timeout(sid), + "Error: Service not stopped after timeout period."); return unregister_all(); } -- 2.34.1
[PATCH] event/sw: fix selftest xstats reset API usage
The eventdev xstats reset API takes an ID of "uint32_t", while the rest of the xstats APIs require an "unsigned int". On some platforms these might not be the same bitwidth, however this was assumed in the code. Fix by providing a uint32_t to the xstats_reset() function. Fixes: e21df4b062b5 ("test/eventdev: add SW xstats tests") Reported-by: Morten Brørup Signed-off-by: Harry van Haaren --- Note this is a fix for a potential build issue in 64-bit BE systems. --- drivers/event/sw/sw_evdev_selftest.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/event/sw/sw_evdev_selftest.c b/drivers/event/sw/sw_evdev_selftest.c index ed7ae6a685..f8496bc44e 100644 --- a/drivers/event/sw/sw_evdev_selftest.c +++ b/drivers/event/sw/sw_evdev_selftest.c @@ -1525,10 +1525,11 @@ xstats_id_reset_tests(struct test *t) dev_names[i], dev_expected[i], val); goto fail; } - /* reset to zero */ + /* reset to zero: note API requires uint32_t not unsigned int */ + uint32_t reset_id = id; int reset_ret = rte_event_dev_xstats_reset(evdev, RTE_EVENT_DEV_XSTATS_DEVICE, 0, - &id, + &reset_id, 1); if (reset_ret) { printf("%d: failed to reset successfully\n", __LINE__); @@ -1647,10 +1648,11 @@ xstats_id_reset_tests(struct test *t) port_expected[i], id); failed = 1; } - /* reset to zero */ + /* reset to zero: note API requires uint32_t not unsigned int */ + uint32_t reset_id = id; int reset_ret = rte_event_dev_xstats_reset(evdev, RTE_EVENT_DEV_XSTATS_PORT, PORT, - &id, + &reset_id, 1); if (reset_ret) { printf("%d: failed to reset successfully\n", __LINE__); @@ -1762,10 +1764,11 @@ xstats_id_reset_tests(struct test *t) queue_names[i], queue_expected[i], val); failed = 1; } - /* reset to zero */ + /* reset to zero: note API requires uint32_t not unsigned int */ + uint32_t reset_id = id; int reset_ret = rte_event_dev_xstats_reset(evdev, RTE_EVENT_DEV_XSTATS_QUEUE, - queue, &id, 1); + queue, &reset_id, 1); if (reset_ret) { printf("%d: failed to reset successfully\n", __LINE__); failed = 1; -- 2.34.1
[dpdk-dev] [PATCH 0/2] eventdev: add port usage hints
These 2 patches are a suggestion to add a hint to the struct rte_event_port_conf.event_port_cfg. The usage of these hints is to allow an application to identify/communicate to the PMD what ports will primarily serve what purpose. E.g, some ports are "mainly producers" in that they are usually polling Ethdev RXQs (or other event sources..) and enqueue the resulting events to the eventdev instance. Similarly there are usages for "worker" (mainly forwards events) and "consumer" (mainly consumes events without re-enq). Note that these flags are *hints* only, and *functionally* any combination of (NEW/FWD/RELEASE) is still allowed by any port. The reason to add these is to allow a PMD to allocate internal resource more efficiently. Note that this implementation does not change the ABI, as it gives a purpose to existing bits in an existing field. Regards, -Harry Harry van Haaren (2): lib/eventdev: add usage hints to port configure API examples/eventdev_pipeline: use port config hints .../pipeline_worker_generic.c | 2 ++ .../eventdev_pipeline/pipeline_worker_tx.c| 2 ++ lib/eventdev/rte_eventdev.h | 23 +++ 3 files changed, 27 insertions(+) -- 2.30.2
[dpdk-dev] [PATCH 1/2] lib/eventdev: add usage hints to port configure API
This commit introduces 3 flags to the port configuration flags. These flags allow the application to indicate what type of work is expected to be performed by an eventdev port. The three new flags are - RTE_EVENT_PORT_CFG_HINT_PRODUCER (mostly RTE_EVENT_OP_NEW events) - RTE_EVENT_PORT_CFG_HINT_CONSUMER (mostly RTE_EVENT_OP_RELEASE events) - RTE_EVENT_PORT_CFG_HINT_WORKER (mostly RTE_EVENT_OP_FORWARD events) These flags are only hints, and the PMDs must operate under the assumption that any port can enqueue an event with any type of op. Signed-off-by: Harry van Haaren --- lib/eventdev/rte_eventdev.h | 23 +++ 1 file changed, 23 insertions(+) diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h index a9c496fb62..159b580938 100644 --- a/lib/eventdev/rte_eventdev.h +++ b/lib/eventdev/rte_eventdev.h @@ -709,6 +709,29 @@ rte_event_queue_attr_get(uint8_t dev_id, uint8_t queue_id, uint32_t attr_id, * * @see rte_event_port_setup(), rte_event_port_link() */ +#define RTE_EVENT_PORT_CFG_HINT_PRODUCER (1ULL << 2) +/**< Hint that this event port will primarily enqueue events to the system. + * A PMD can optimize its internal workings by assuming that this port is + * primarily going to enqueue NEW events. Note that this flag is only a hint. + * + * @see rte_event_port_setup() + */ +#define RTE_EVENT_PORT_CFG_HINT_CONSUMER (1ULL << 3) +/**< Hint that this event port will primarily dequeue events from the system. + * A PMD can optimize its internal workings by assuming that this port is + * primarily going to consume events, and not enqueue FORWARD or RELEASE events. + * Note that this flag is only a hint. + * + * @see rte_event_port_setup() + */ +#define RTE_EVENT_PORT_CFG_HINT_WORKER (1ULL << 4) +/**< Hint that this event port will primarily events to the system. + * A PMD can optimize its internal workings by assuming that this port is + * primarily going to FORWARD events, and not enqueue NEW or RELEASE events + * often. Note that this flag is only a hint. + * + * @see rte_event_port_setup() + */ /** Event port configuration structure */ struct rte_event_port_conf { -- 2.30.2
[dpdk-dev] [PATCH 2/2] examples/eventdev_pipeline: use port config hints
This commit adds the per-port hints added to the eventdev API, indicating which eventdev ports will be used for producing, forwarding, or consuming events from the system. Signed-off-by: Harry van Haaren --- examples/eventdev_pipeline/pipeline_worker_generic.c | 2 ++ examples/eventdev_pipeline/pipeline_worker_tx.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/examples/eventdev_pipeline/pipeline_worker_generic.c b/examples/eventdev_pipeline/pipeline_worker_generic.c index f70ab0cc9e..8f43d1f71e 100644 --- a/examples/eventdev_pipeline/pipeline_worker_generic.c +++ b/examples/eventdev_pipeline/pipeline_worker_generic.c @@ -139,6 +139,7 @@ setup_eventdev_generic(struct worker_data *worker_data) .dequeue_depth = cdata.worker_cq_depth, .enqueue_depth = 64, .new_event_threshold = 4096, + .event_port_cfg = RTE_EVENT_PORT_CFG_HINT_WORKER, }; struct rte_event_queue_conf wkr_q_conf = { .schedule_type = cdata.queue_type, @@ -419,6 +420,7 @@ init_adapters(uint16_t nb_ports) .dequeue_depth = cdata.worker_cq_depth, .enqueue_depth = 64, .new_event_threshold = 4096, + .event_port_cfg = RTE_EVENT_PORT_CFG_HINT_PRODUCER, }; if (adptr_p_conf.new_event_threshold > dev_info.max_num_events) diff --git a/examples/eventdev_pipeline/pipeline_worker_tx.c b/examples/eventdev_pipeline/pipeline_worker_tx.c index ca6cd200ca..ad17451a04 100644 --- a/examples/eventdev_pipeline/pipeline_worker_tx.c +++ b/examples/eventdev_pipeline/pipeline_worker_tx.c @@ -446,6 +446,7 @@ setup_eventdev_worker_tx_enq(struct worker_data *worker_data) .dequeue_depth = cdata.worker_cq_depth, .enqueue_depth = 64, .new_event_threshold = 4096, + .event_port_cfg = RTE_EVENT_PORT_CFG_HINT_WORKER, }; struct rte_event_queue_conf wkr_q_conf = { .schedule_type = cdata.queue_type, @@ -747,6 +748,7 @@ init_adapters(uint16_t nb_ports) .dequeue_depth = cdata.worker_cq_depth, .enqueue_depth = 64, .new_event_threshold = 4096, + .event_port_cfg = RTE_EVENT_PORT_CFG_HINT_PRODUCER, }; init_ports(nb_ports); -- 2.30.2
[dpdk-dev] [PATCH v5] event/sw: add xstats to expose progress details
Today it is difficult to know if the SW Eventdev PMD is making forward progress when it runs an iteration of its service. This commit adds two xstats to give better visibility to the application. The new xstats provide an application with which Eventdev ports received work in the last iteration of scheduling, as well if forward progress was made by the scheduler. This patch implements an xstat for the SW PMD that exposes a bitmask of ports that were scheduled to. In the unlikely case that the SW PMD instance has 64 or more ports, return UINT64_MAX. Signed-off-by: Harry van Haaren --- Thanks for review Jerin! v5: - Fixup if/else {} brackets by refactor and removing (Jerin/checkpatch) - Fixup recieved typo in commit message (Jerin/checkpatch) v4: - Simplify all metrics to Event SW PMD --- drivers/event/sw/sw_evdev.h | 2 ++ drivers/event/sw/sw_evdev_scheduler.c | 13 + drivers/event/sw/sw_evdev_selftest.c | 28 ++- drivers/event/sw/sw_evdev_xstats.c| 9 - 4 files changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 5ab6465c83..33645bd1df 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -259,6 +259,8 @@ struct sw_evdev { uint64_t sched_no_iq_enqueues; uint64_t sched_no_cq_enqueues; uint64_t sched_cq_qid_called; + uint64_t sched_last_iter_bitmask; + uint8_t sched_progress_last_iter; uint8_t started; uint32_t credit_update_quanta; diff --git a/drivers/event/sw/sw_evdev_scheduler.c b/drivers/event/sw/sw_evdev_scheduler.c index f747b3c6d4..809a54d731 100644 --- a/drivers/event/sw/sw_evdev_scheduler.c +++ b/drivers/event/sw/sw_evdev_scheduler.c @@ -559,6 +559,11 @@ sw_event_schedule(struct rte_eventdev *dev) sw->sched_no_iq_enqueues += (in_pkts_total == 0); sw->sched_no_cq_enqueues += (out_pkts_total == 0); + uint64_t work_done = (in_pkts_total + out_pkts_total) != 0; + sw->sched_progress_last_iter = work_done; + + uint64_t cqs_scheds_last_iter = 0; + /* push all the internal buffered QEs in port->cq_ring to the * worker cores: aka, do the ring transfers batched. */ @@ -578,6 +583,7 @@ sw_event_schedule(struct rte_eventdev *dev) &sw->cq_ring_space[i]); port->cq_buf_count = 0; no_enq = 0; + cqs_scheds_last_iter |= (1ULL << i); } else { sw->cq_ring_space[i] = rte_event_ring_free_count(worker) - @@ -597,4 +603,11 @@ sw_event_schedule(struct rte_eventdev *dev) sw->sched_min_burst = sw->sched_min_burst_size; } + /* Provide stats on what eventdev ports were scheduled to this +* iteration. If more than 64 ports are active, always report that +* all Eventdev ports have been scheduled events. +*/ + sw->sched_last_iter_bitmask = cqs_scheds_last_iter; + if (unlikely(sw->port_count >= 64)) + sw->sched_last_iter_bitmask = UINT64_MAX; } diff --git a/drivers/event/sw/sw_evdev_selftest.c b/drivers/event/sw/sw_evdev_selftest.c index e4bfb3a0f1..d53e903129 100644 --- a/drivers/event/sw/sw_evdev_selftest.c +++ b/drivers/event/sw/sw_evdev_selftest.c @@ -873,15 +873,15 @@ xstats_tests(struct test *t) int ret = rte_event_dev_xstats_names_get(evdev, RTE_EVENT_DEV_XSTATS_DEVICE, 0, xstats_names, ids, XSTATS_MAX); - if (ret != 6) { - printf("%d: expected 6 stats, got return %d\n", __LINE__, ret); + if (ret != 8) { + printf("%d: expected 8 stats, got return %d\n", __LINE__, ret); return -1; } ret = rte_event_dev_xstats_get(evdev, RTE_EVENT_DEV_XSTATS_DEVICE, 0, ids, values, ret); - if (ret != 6) { - printf("%d: expected 6 stats, got return %d\n", __LINE__, ret); + if (ret != 8) { + printf("%d: expected 8 stats, got return %d\n", __LINE__, ret); return -1; } @@ -959,7 +959,7 @@ xstats_tests(struct test *t) ret = rte_event_dev_xstats_get(evdev, RTE_EVENT_DEV_XSTATS_DEVICE, 0, ids, values, num_stats); - static const uint64_t expected[] = {3, 3, 0, 1, 0, 0}; + static const uint64_t expected[] = {3, 3, 0, 1, 0, 0, 4, 1}; for (i = 0; (signed int)i < ret; i++) { if (expected[i] != values[i]) { printf( @@ -975,7 +975,7 @@ xstats_tests(struct test *t)
[dpdk-dev] [PATCH 00/15] next-eventdev: event/sw Software Eventdev
The following patchset adds software eventdev implementation to the next-eventdev tree, and applies to current git HEAD. This implementation is based on the previous software eventdev RFC patchset[1], updated to integrate with the latest rte_eventdev.h API. The first two patches make changes to the eventdev API, then the software implementation is added, and finally tests are added for the sw eventdev implementation. This patchset contains the work of multiple developers, please see signoffs on each patch. Signed-off-by: Harry van Haaren [1] http://dpdk.org/ml/archives/dev/2016-November/050285.html Bruce Richardson (15): eventdev: remove unneeded dependencies eventdev: add APIs for extended stats event/sw: add new software-only eventdev driver event/sw: add function to return device capabilities event/sw: add configure function event/sw: add fns to return default port/queue config event/sw: add support for event queues event/sw: add support for event ports event/sw: add support for linking queues to ports event/sw: add worker core functions event/sw: add scheduling logic event/sw: add start, stop and close functions event/sw: add dump function for easier debugging event/sw: add xstats support app/test: add unit tests for SW eventdev driver app/test/Makefile |5 +- app/test/test_sw_eventdev.c | 2031 + config/common_base|5 + drivers/event/Makefile|1 + drivers/event/sw/Makefile | 70 + drivers/event/sw/event_ring.h | 179 +++ drivers/event/sw/iq_ring.h| 176 +++ drivers/event/sw/rte_pmd_evdev_sw_version.map |3 + drivers/event/sw/sw_evdev.c | 709 + drivers/event/sw/sw_evdev.h | 287 drivers/event/sw/sw_evdev_scheduler.c | 586 +++ drivers/event/sw/sw_evdev_worker.c| 169 ++ drivers/event/sw/sw_evdev_xstats.c| 401 + lib/librte_eventdev/Makefile |1 - lib/librte_eventdev/rte_eventdev.c| 64 + lib/librte_eventdev/rte_eventdev.h| 80 +- lib/librte_eventdev/rte_eventdev_pmd.h| 58 + lib/librte_eventdev/rte_eventdev_version.map |3 + mk/rte.app.mk |1 + 19 files changed, 4825 insertions(+), 4 deletions(-) create mode 100644 app/test/test_sw_eventdev.c create mode 100644 drivers/event/sw/Makefile create mode 100644 drivers/event/sw/event_ring.h create mode 100644 drivers/event/sw/iq_ring.h create mode 100644 drivers/event/sw/rte_pmd_evdev_sw_version.map create mode 100644 drivers/event/sw/sw_evdev.c create mode 100644 drivers/event/sw/sw_evdev.h create mode 100644 drivers/event/sw/sw_evdev_scheduler.c create mode 100644 drivers/event/sw/sw_evdev_worker.c create mode 100644 drivers/event/sw/sw_evdev_xstats.c -- 2.7.4
[dpdk-dev] [PATCH 02/15] eventdev: add APIs for extended stats
From: Bruce Richardson Add in APIs for extended stats so that eventdev implementations can report out information on their internal state. The APIs are based on, but not identical to, the equivalent ethdev functions. Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- lib/librte_eventdev/rte_eventdev.c | 64 lib/librte_eventdev/rte_eventdev.h | 75 lib/librte_eventdev/rte_eventdev_pmd.h | 58 + lib/librte_eventdev/rte_eventdev_version.map | 3 ++ 4 files changed, 200 insertions(+) diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c index c8f3e94..ca11d54 100644 --- a/lib/librte_eventdev/rte_eventdev.c +++ b/lib/librte_eventdev/rte_eventdev.c @@ -920,6 +920,70 @@ rte_event_dev_dump(uint8_t dev_id, FILE *f) } +static int +get_xstats_count(uint8_t dev_id) +{ + struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + if (dev->dev_ops->get_xstat_names != NULL) + return (*dev->dev_ops->get_xstat_names)(dev, NULL, 0); + return 0; +} + +int +rte_event_dev_get_xstat_names(uint8_t dev_id, + struct rte_event_dev_xstat_name *xstats_names, + unsigned int size) +{ + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + const int cnt_expected_entries = get_xstats_count(dev_id); + if (xstats_names == NULL || cnt_expected_entries < 0 || + (int)size < cnt_expected_entries) + return cnt_expected_entries; + + /* dev_id checked in get_xstats_count() */ + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + + if (dev->dev_ops->get_xstat_names != NULL) + return (*dev->dev_ops->get_xstat_names)(dev, + xstats_names, size); + + return 0; +} + +/* retrieve eventdev extended statistics */ +int +rte_event_dev_get_xstats(uint8_t dev_id, const unsigned int ids[], + uint64_t values[], unsigned int n) +{ + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + + /* implemented by the driver */ + if (dev->dev_ops->get_xstats != NULL) + return (*dev->dev_ops->get_xstats)(dev, ids, values, n); + return 0; +} + +uint64_t +rte_event_dev_get_xstat_by_name(uint8_t dev_id, const char *name, + unsigned int *id) +{ + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + unsigned int temp = -1; + + if (id != NULL) + *id = (unsigned int)-1; + else + id = &temp; /* ensure driver never gets a NULL value */ + + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, 0); + + /* implemented by driver */ + if (dev->dev_ops->get_xstat_by_name != NULL) + return (*dev->dev_ops->get_xstat_by_name)(dev, name, id); + return 0; +} + int rte_event_dev_start(uint8_t dev_id) { diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h index c2f9310..681cbfa 100644 --- a/lib/librte_eventdev/rte_eventdev.h +++ b/lib/librte_eventdev/rte_eventdev.h @@ -1401,6 +1401,81 @@ rte_event_port_links_get(uint8_t dev_id, uint8_t port_id, int rte_event_dev_dump(uint8_t dev_id, FILE *f); +/** Maximum name length for extended statistics counters */ +#define RTE_EVENT_DEV_XSTAT_NAME_SIZE 64 + +/** + * A name-key lookup element for extended statistics. + * + * This structure is used to map between names and ID numbers + * for extended ethdev statistics. + */ +struct rte_event_dev_xstat_name { + char name[RTE_EVENT_DEV_XSTAT_NAME_SIZE]; +}; + +/** + * Retrieve names of extended statistics of an event device. + * + * @param dev_id + * The identifier of the event device. + * @param xstat_names + * Block of memory to insert names into. Must be at least size in capacity. + * If set to NULL, function returns required capacity. + * @param size + * Capacity of xstat_names (number of names). + * @return + * - positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. + * - positive value higher than size: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - negative value on error (invalid port id) + */ +int +rte_event_dev_get_xstat_names(uint8_t dev_id, + struct rte_event_dev_xstat_name *xstat_names, + unsigned int size); + +/** + * Retrieve extended statistics of an event device. + * + * @param dev_id + * The identifier of the device. + * @param ids + * The id numbers of the stats to get. The ids can be got from the stat + * position in the stat list from rte_eve
[dpdk-dev] [PATCH 01/15] eventdev: remove unneeded dependencies
From: Bruce Richardson Since eventdev uses event structures rather than working directly on mbufs, there is no actual dependencies on the mbuf library. The inclusion of an mbuf pointer element inside the event itself does not require the inclusion of the mbuf header file. Similarly the pci header is not needed, but following their removal, rte_memory.h is needed for the definition of the __rte_cache_aligned macro. Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- lib/librte_eventdev/Makefile | 1 - lib/librte_eventdev/rte_eventdev.h | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/librte_eventdev/Makefile b/lib/librte_eventdev/Makefile index dac0663..396e5ec 100644 --- a/lib/librte_eventdev/Makefile +++ b/lib/librte_eventdev/Makefile @@ -52,6 +52,5 @@ EXPORT_MAP := rte_eventdev_version.map # library dependencies DEPDIRS-y += lib/librte_eal -DEPDIRS-y += lib/librte_mbuf include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h index e1bd05f..c2f9310 100644 --- a/lib/librte_eventdev/rte_eventdev.h +++ b/lib/librte_eventdev/rte_eventdev.h @@ -244,8 +244,9 @@ extern "C" { #endif #include -#include -#include +#include + +struct rte_mbuf; /* we just use mbuf pointers; no need to include rte_mbuf.h */ /* Event device capability bitmap flags */ #define RTE_EVENT_DEV_CAP_QUEUE_QOS (1ULL << 0) -- 2.7.4
[dpdk-dev] [PATCH 03/15] event/sw: add new software-only eventdev driver
From: Bruce Richardson This adds the minimal changes to allow a SW eventdev implementation to be compiled, linked and created at run time. The eventdev does nothing, but can be created via vdev on commandline, e.g. sudo ./x86_64-native-linuxapp-gcc/app/test --vdev=event_sw0 ... PMD: Creating eventdev sw device event_sw0, numa_node=0, sched_quanta=128 RTE>> Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- config/common_base| 5 + drivers/event/Makefile| 1 + drivers/event/sw/Makefile | 67 drivers/event/sw/rte_pmd_evdev_sw_version.map | 3 + drivers/event/sw/sw_evdev.c | 148 ++ drivers/event/sw/sw_evdev.h | 57 ++ mk/rte.app.mk | 1 + 7 files changed, 282 insertions(+) create mode 100644 drivers/event/sw/Makefile create mode 100644 drivers/event/sw/rte_pmd_evdev_sw_version.map create mode 100644 drivers/event/sw/sw_evdev.c create mode 100644 drivers/event/sw/sw_evdev.h diff --git a/config/common_base b/config/common_base index 00af811..6f91172 100644 --- a/config/common_base +++ b/config/common_base @@ -434,6 +434,11 @@ CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV=y CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV_DEBUG=n # +# Compile PMD for software event device +# +CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV=y + +# # Compile librte_ring # CONFIG_RTE_LIBRTE_RING=y diff --git a/drivers/event/Makefile b/drivers/event/Makefile index 678279f..353441c 100644 --- a/drivers/event/Makefile +++ b/drivers/event/Makefile @@ -32,5 +32,6 @@ include $(RTE_SDK)/mk/rte.vars.mk DIRS-$(CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV) += skeleton +DIRS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/drivers/event/sw/Makefile b/drivers/event/sw/Makefile new file mode 100644 index 000..c891eb5 --- /dev/null +++ b/drivers/event/sw/Makefile @@ -0,0 +1,67 @@ +# BSD LICENSE +# +# Copyright(c) 2016-2017 Intel Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + + +# library name +LIB = librte_pmd_sw_event.a + +# build flags +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +# for older GCC versions, allow us to initialize an event using +# designated initializers. +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +ifeq ($(shell test $(GCC_VERSION) -le 50 && echo 1), 1) +CFLAGS += -Wno-missing-field-initializers +endif +endif + +# library version +LIBABIVER := 1 + +# versioning export map +EXPORT_MAP := rte_pmd_evdev_sw_version.map + +# library source files +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev.c + +# export include files +SYMLINK-y-include += + +# library dependencies +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += lib/librte_eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += lib/librte_eventdev +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += lib/librte_kvargs +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += lib/librte_ring + +include $(RTE_SDK)/mk/rte.lib.mk + diff --git a/drivers/event/sw/rte_pmd_evdev_sw_version.map b/drivers/event/sw/rte_pmd_evdev_sw_version.map new file mode 100644 index 000..1f84b68 --- /dev/null +++ b/drivers/event/sw/rte_pmd_evdev_sw_version.map @@ -0,0 +1,3 @@ +DPDK_17.02 { + local: *; +}; diff --git a/drivers/event/sw/sw_evdev.c b/drivers/even
[dpdk-dev] [PATCH 04/15] event/sw: add function to return device capabilities
From: Bruce Richardson Add in the info_get function to return details on the queues, flow, prioritization capabilities, etc. that this device has. Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev.c | 23 +++ drivers/event/sw/sw_evdev.h | 11 +++ 2 files changed, 34 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 29762b8..d75bf50 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -45,6 +45,28 @@ int sched_quanta = 128; +static void +sw_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info) +{ + RTE_SET_USED(dev); + + static const struct rte_event_dev_info evdev_sw_info = { + .driver_name = PMD_NAME, + .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV, + .max_event_queue_flows = SW_QID_NUM_FIDS, + .max_event_queue_priority_levels = SW_Q_PRIORITY_MAX, + .max_event_priority_levels = SW_IQS_MAX, + .max_event_ports = SW_PORTS_MAX, + .max_event_port_dequeue_depth = MAX_SW_CONS_Q_DEPTH, + .max_event_port_enqueue_depth = MAX_SW_PROD_Q_DEPTH, + .max_num_events = SW_INFLIGHT_EVENTS_TOTAL, + .event_dev_cap = (RTE_EVENT_DEV_CAP_QUEUE_QOS | + RTE_EVENT_DEV_CAP_EVENT_QOS), + }; + + *info = evdev_sw_info; +} + static int assign_numa_node(const char *key __rte_unused, const char *value, void *opaque) { @@ -69,6 +91,7 @@ static int sw_probe(const char *name, const char *params) { static const struct rte_eventdev_ops evdev_sw_ops = { + .dev_infos_get = sw_info_get, }; static const char *const args[] = { NUMA_NODE_ARG, SCHED_QUANTA_ARG, NULL }; diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 1400417..b6d99fd 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -36,6 +36,17 @@ #include #include +#define PMD_NAME "event_sw" + +#define SW_QID_NUM_FIDS 16384 +#define SW_IQS_MAX 4 +#define SW_Q_PRIORITY_MAX 255 +#define SW_PORTS_MAX 64 +#define MAX_SW_CONS_Q_DEPTH 128 +#define SW_INFLIGHT_EVENTS_TOTAL 4096 +/* allow for lots of over-provisioning */ +#define MAX_SW_PROD_Q_DEPTH 4096 + struct sw_evdev { struct rte_eventdev_data *data; }; -- 2.7.4
[dpdk-dev] [PATCH 06/15] event/sw: add fns to return default port/queue config
From: Bruce Richardson Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev.c | 32 1 file changed, 32 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 1bdcc05..8cc8e06 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -45,6 +45,35 @@ int sched_quanta = 128; +static void +sw_queue_def_conf(struct rte_eventdev *dev, uint8_t queue_id, +struct rte_event_queue_conf *conf) +{ + RTE_SET_USED(dev); + RTE_SET_USED(queue_id); + + static const struct rte_event_queue_conf default_conf = { + .nb_atomic_flows = 4096, + .nb_atomic_order_sequences = 1, + .event_queue_cfg = RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + }; + + *conf = default_conf; +} + +static void +sw_port_def_conf(struct rte_eventdev *dev, uint8_t port_id, +struct rte_event_port_conf *port_conf) +{ + RTE_SET_USED(dev); + RTE_SET_USED(port_id); + + port_conf->new_event_threshold = 1024; + port_conf->dequeue_depth = 16; + port_conf->enqueue_depth = 16; +} + static int sw_dev_configure(const struct rte_eventdev *dev) { @@ -109,6 +138,9 @@ sw_probe(const char *name, const char *params) static const struct rte_eventdev_ops evdev_sw_ops = { .dev_configure = sw_dev_configure, .dev_infos_get = sw_info_get, + + .queue_def_conf = sw_queue_def_conf, + .port_def_conf = sw_port_def_conf, }; static const char *const args[] = { NUMA_NODE_ARG, SCHED_QUANTA_ARG, NULL }; -- 2.7.4
[dpdk-dev] [PATCH 08/15] event/sw: add support for event ports
From: Bruce Richardson Add in the data-structures for the ports used by workers to send packets to/from the scheduler. Also add in the functions to create/destroy those ports. Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/event_ring.h | 179 ++ drivers/event/sw/sw_evdev.c | 71 + drivers/event/sw/sw_evdev.h | 76 ++ 3 files changed, 326 insertions(+) create mode 100644 drivers/event/sw/event_ring.h diff --git a/drivers/event/sw/event_ring.h b/drivers/event/sw/event_ring.h new file mode 100644 index 000..67aa72e --- /dev/null +++ b/drivers/event/sw/event_ring.h @@ -0,0 +1,179 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Generic ring structure for passing events from one core to another. + * + * Used by the software scheduler for the producer and consumer rings for + * each port, i.e. for passing events from worker cores to scheduler and + * vice-versa. Designed for single-producer, single-consumer use with two + * cores working on each ring. + */ + +#ifndef _EVENT_RING_ +#define _EVENT_RING_ + +#include + +#include +#include +#include + +#define QE_RING_NAMESIZE 32 + +struct qe_ring { + char name[QE_RING_NAMESIZE] __rte_cache_aligned; + uint32_t ring_size; /* size of memory block allocated to the ring */ + uint32_t mask; /* mask for read/write values == ring_size -1 */ + uint32_t size; /* actual usable space in the ring */ + volatile uint32_t write_idx __rte_cache_aligned; + volatile uint32_t read_idx __rte_cache_aligned; + + struct rte_event ring[0] __rte_cache_aligned; +}; + +#ifndef force_inline +#define force_inline inline __attribute__((always_inline)) +#endif + +static inline struct qe_ring * +qe_ring_create(const char *name, unsigned int size, unsigned int socket_id) +{ + struct qe_ring *retval; + const uint32_t ring_size = rte_align32pow2(size + 1); + size_t memsize = sizeof(*retval) + + (ring_size * sizeof(retval->ring[0])); + + retval = rte_zmalloc_socket(NULL, memsize, 0, socket_id); + if (retval == NULL) + goto end; + + snprintf(retval->name, sizeof(retval->name), "EVDEV_RG_%s", name); + retval->ring_size = ring_size; + retval->mask = ring_size - 1; + retval->size = size; +end: + return retval; +} + +static inline void +qe_ring_destroy(struct qe_ring *r) +{ + rte_free(r); +} + +static force_inline unsigned int +qe_ring_count(const struct qe_ring *r) +{ + return r->write_idx - r->read_idx; +} + +static force_inline unsigned int +qe_ring_free_count(const struct qe_ring *r) +{ + return r->size - qe_ring_count(r); +} + +static force_inline unsigned int +qe_ring_enqueue_burst(struct qe_ring *r, const struct rte_event *qes, + unsigned int nb_qes, uint16_t *free_count) +{ + const uint32_t size = r->size; + const uint32_t mask = r->mask; + const uint32_t read = r->read_idx; + uint32_t write = r->write_idx; + const uint32_t space = read + size - write; + uint32_t i; + + if (space < nb_qes) + nb_qes = space; + + for (i = 0; i < nb
[dpdk-dev] [PATCH 09/15] event/sw: add support for linking queues to ports
From: Bruce Richardson Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev.c | 67 + 1 file changed, 67 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 51f4ffd..6c2593e 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -51,6 +51,71 @@ static void sw_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info); static int +sw_port_link(void *port, const uint8_t queues[], const uint8_t priorities[], + uint16_t num) +{ + struct sw_port *p = (void *)port; + struct sw_evdev *sw = p->sw; + int i; + + RTE_SET_USED(priorities); + for (i = 0; i < num; i++) { + struct sw_qid *q = &sw->qids[queues[i]]; + + /* check for qid map overflow */ + if (q->cq_num_mapped_cqs >= RTE_DIM(q->cq_map)) + break; + + if (p->is_directed && p->num_qids_mapped > 0) + break; + + if (q->type == RTE_SCHED_TYPE_DIRECT) { + /* check directed qids only map to one port */ + if (p->num_qids_mapped > 0) + break; + /* check port only takes a directed flow */ + if (num > 1) + break; + + p->is_directed = 1; + p->num_qids_mapped = 1; + } else if (q->type == RTE_SCHED_TYPE_ORDERED) { + p->num_ordered_qids++; + p->num_qids_mapped++; + } else if (q->type == RTE_SCHED_TYPE_ATOMIC) { + p->num_qids_mapped++; + } + + q->cq_map[q->cq_num_mapped_cqs] = p->id; + rte_smp_wmb(); + q->cq_num_mapped_cqs++; + } + return i; +} + +static int +sw_port_unlink(void *port, uint8_t queues[], uint16_t nb_unlinks) +{ + struct sw_port *p = (void *)port; + struct sw_evdev *sw = p->sw; + unsigned int i, j; + + int unlinked = 0; + for (i = 0; i < nb_unlinks; i++) { + struct sw_qid *q = &sw->qids[queues[i]]; + for (j = 0; j < q->cq_num_mapped_cqs; j++) + if (q->cq_map[j] == p->id) { + q->cq_map[j] = q->cq_map[q->cq_num_mapped_cqs - 1]; + rte_smp_wmb(); + q->cq_num_mapped_cqs--; + unlinked++; + continue; + } + } + return unlinked; +} + +static int sw_port_setup(struct rte_eventdev *dev, uint8_t port_id, const struct rte_event_port_conf *conf) { @@ -368,6 +433,8 @@ sw_probe(const char *name, const char *params) .port_def_conf = sw_port_def_conf, .port_setup = sw_port_setup, .port_release = sw_port_release, + .port_link = sw_port_link, + .port_unlink = sw_port_unlink, }; static const char *const args[] = { NUMA_NODE_ARG, SCHED_QUANTA_ARG, NULL }; -- 2.7.4
[dpdk-dev] [PATCH 07/15] event/sw: add support for event queues
From: Bruce Richardson Add in the data structures for the event queues, and the eventdev functions to create and destroy those queues. Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/iq_ring.h | 176 drivers/event/sw/sw_evdev.c | 156 +++ drivers/event/sw/sw_evdev.h | 75 +++ 3 files changed, 407 insertions(+) create mode 100644 drivers/event/sw/iq_ring.h diff --git a/drivers/event/sw/iq_ring.h b/drivers/event/sw/iq_ring.h new file mode 100644 index 000..5994e70 --- /dev/null +++ b/drivers/event/sw/iq_ring.h @@ -0,0 +1,176 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Ring structure definitions used for the internal ring buffers of the + * SW eventdev implementation. These are designed for single-core use only. + */ +#ifndef _IQ_RING_ +#define _IQ_RING_ + +#include + +#include +#include +#include +#include + +#define IQ_RING_NAMESIZE 12 +#define QID_IQ_DEPTH 512 +#define QID_IQ_MASK (uint16_t)(QID_IQ_DEPTH - 1) + +struct iq_ring { + char name[IQ_RING_NAMESIZE] __rte_cache_aligned; + uint16_t write_idx; + uint16_t read_idx; + + struct rte_event ring[QID_IQ_DEPTH]; +}; + +#ifndef force_inline +#define force_inline inline __attribute__((always_inline)) +#endif + +static inline struct iq_ring * +iq_ring_create(const char *name, unsigned int socket_id) +{ + struct iq_ring *retval; + + retval = rte_malloc_socket(NULL, sizeof(*retval), 0, socket_id); + if (retval == NULL) + goto end; + + snprintf(retval->name, sizeof(retval->name), "%s", name); + retval->write_idx = retval->read_idx = 0; +end: + return retval; +} + +static inline void +iq_ring_destroy(struct iq_ring *r) +{ + rte_free(r); +} + +static force_inline uint16_t +iq_ring_count(const struct iq_ring *r) +{ + return r->write_idx - r->read_idx; +} + +static force_inline uint16_t +iq_ring_free_count(const struct iq_ring *r) +{ + return QID_IQ_MASK - iq_ring_count(r); +} + +static force_inline uint16_t +iq_ring_enqueue_burst(struct iq_ring *r, struct rte_event *qes, uint16_t nb_qes) +{ + const uint16_t read = r->read_idx; + uint16_t write = r->write_idx; + const uint16_t space = read + QID_IQ_MASK - write; + uint16_t i; + + if (space < nb_qes) + nb_qes = space; + + for (i = 0; i < nb_qes; i++, write++) + r->ring[write & QID_IQ_MASK] = qes[i]; + + r->write_idx = write; + + return nb_qes; +} + +static force_inline uint16_t +iq_ring_dequeue_burst(struct iq_ring *r, struct rte_event *qes, uint16_t nb_qes) +{ + uint16_t read = r->read_idx; + const uint16_t write = r->write_idx; + const uint16_t items = write - read; + uint16_t i; + + for (i = 0; i < nb_qes; i++, read++) + qes[i] = r->ring[read & QID_IQ_MASK]; + + if (items < nb_qes) + nb_qes = items; + + r->read_idx += nb_qes; + + return nb_qes; +} + +/* assumes there is space, from a previous dequeue_burst */ +static force_inline uint16_t +iq_ring_put_back(struct iq_ring *r, struct rte_event *qes, uint16
[dpdk-dev] [PATCH 05/15] event/sw: add configure function
From: Bruce Richardson Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev.c | 17 + drivers/event/sw/sw_evdev.h | 13 + 2 files changed, 30 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index d75bf50..1bdcc05 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -45,6 +45,22 @@ int sched_quanta = 128; +static int +sw_dev_configure(const struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + const struct rte_eventdev_data *data = dev->data; + const struct rte_event_dev_config *conf = &data->dev_conf; + + sw->qid_count = conf->nb_event_queues; + sw->port_count = conf->nb_event_ports; + sw->nb_events_limit = conf->nb_events_limit; + uint32_t quanta_pool = conf->nb_events_limit / SW_INFLIGHT_QUANTA_SIZE; + rte_atomic32_set(&sw->inflight_quanta, quanta_pool); + + return 0; +} + static void sw_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info) { @@ -91,6 +107,7 @@ static int sw_probe(const char *name, const char *params) { static const struct rte_eventdev_ops evdev_sw_ops = { + .dev_configure = sw_dev_configure, .dev_infos_get = sw_info_get, }; diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index b6d99fd..514b0b1 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -35,6 +35,7 @@ #include #include +#include #define PMD_NAME "event_sw" @@ -46,9 +47,21 @@ #define SW_INFLIGHT_EVENTS_TOTAL 4096 /* allow for lots of over-provisioning */ #define MAX_SW_PROD_Q_DEPTH 4096 +#define SW_INFLIGHT_QUANTA_SIZE 32 struct sw_evdev { struct rte_eventdev_data *data; + + uint32_t port_count; + uint32_t qid_count; + + rte_atomic32_t inflight_quanta __rte_cache_aligned; + /* +* max events in this instance. Cached here for performance. +* (also available in data->conf.nb_events_limit) +*/ + uint32_t nb_events_limit; + }; static inline struct sw_evdev * -- 2.7.4
[dpdk-dev] [PATCH 12/15] event/sw: add start, stop and close functions
From: Bruce Richardson Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev.c | 74 + 1 file changed, 74 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 021f3ab..318f1d7 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -401,6 +401,77 @@ sw_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info) } static int +sw_start(struct rte_eventdev *dev) +{ + unsigned int i, j; + struct sw_evdev *sw = sw_pmd_priv(dev); + /* check all ports are set up */ + for (i = 0; i < sw->port_count; i++) + if (sw->ports[i].rx_worker_ring == NULL) { + printf("%s %d: port %d not configured\n", + __func__, __LINE__, i); + return -1; + } + + /* check all queues are configured and mapped to ports*/ + for (i = 0; i < sw->qid_count; i++) + if (sw->qids[i].iq[0] == NULL || + sw->qids[i].cq_num_mapped_cqs == 0) { + printf("%s %d: queue %d not configured\n", + __func__, __LINE__, i); + return -1; + } + + /* build up our prioritized array of qids */ + /* We don't use qsort here, as if all/multiple entries have the same +* priority, the result is non-deterministic. From "man 3 qsort": +* "If two members compare as equal, their order in the sorted +* array is undefined." +*/ + uint32_t qidx = 0; + for (j = 0; j <= RTE_EVENT_DEV_PRIORITY_LOWEST; j++) { + for (i = 0; i < sw->qid_count; i++) { + if (sw->qids[i].priority == j) { + sw->qids_prioritized[qidx] = &sw->qids[i]; + qidx++; + } + } + } + sw->started = 1; + return 0; +} + +static void +sw_stop(struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + sw->started = 0; +} + +static int +sw_close(struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + uint32_t i; + + for (i = 0; i < sw->qid_count; i++) + sw_queue_release(dev, i); + sw->qid_count = 0; + + for (i = 0; i < sw->port_count; i++) + sw_port_release(&sw->ports[i]); + sw->port_count = 0; + + memset(&sw->stats, 0, sizeof(sw->stats)); + sw->sched_called = 0; + sw->sched_no_iq_enqueues = 0; + sw->sched_no_cq_enqueues = 0; + sw->sched_cq_qid_called = 0; + + return 0; +} + +static int assign_numa_node(const char *key __rte_unused, const char *value, void *opaque) { int *socket_id = opaque; @@ -426,6 +497,9 @@ sw_probe(const char *name, const char *params) static const struct rte_eventdev_ops evdev_sw_ops = { .dev_configure = sw_dev_configure, .dev_infos_get = sw_info_get, + .dev_close = sw_close, + .dev_start = sw_start, + .dev_stop = sw_stop, .queue_def_conf = sw_queue_def_conf, .queue_setup = sw_queue_setup, -- 2.7.4
[dpdk-dev] [PATCH 13/15] event/sw: add dump function for easier debugging
From: Bruce Richardson Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev.c | 109 1 file changed, 109 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 318f1d7..ea34b99 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -400,6 +400,114 @@ sw_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info) *info = evdev_sw_info; } +static void +sw_dump(struct rte_eventdev *dev, FILE *f) +{ + const struct sw_evdev *sw = sw_pmd_priv(dev); + + static const char * const q_type_strings[] = { + "Ordered", "Atomic", "Parallel", "Directed" + }; + uint32_t i; + fprintf(f, "EventDev %s: ports %d, qids %d\n", "todo-fix-name", + sw->port_count, sw->qid_count); + + fprintf(f, "\trx %"PRIu64"\n\tdrop %"PRIu64"\n\ttx %"PRIu64"\n", + sw->stats.rx_pkts, sw->stats.rx_dropped, sw->stats.tx_pkts); + fprintf(f, "\tsched calls: %"PRIu64"\n", sw->sched_called); + fprintf(f, "\tsched cq/qid call: %"PRIu64"\n", sw->sched_cq_qid_called); + fprintf(f, "\tsched no IQ enq: %"PRIu64"\n", sw->sched_no_iq_enqueues); + fprintf(f, "\tsched no CQ enq: %"PRIu64"\n", sw->sched_no_cq_enqueues); + uint32_t quanta = *((const uint32_t *)(&sw->inflight_quanta)); + uint32_t credits = sw->nb_events_limit - (quanta * SW_INFLIGHT_QUANTA_SIZE); + fprintf(f, "\tinflight sw credits: %d\n", credits); + +#define COL_RED "\x1b[31m" +#define COL_RESET "\x1b[0m" + + for (i = 0; i < sw->port_count; i++) { + int max, j; + const struct sw_port *p = &sw->ports[i]; + fprintf(f, " Port %d %s\n", i, + p->is_directed ? " (SingleCons)" : ""); + fprintf(f, "\trx %"PRIu64"\tdrop %"PRIu64"\ttx %"PRIu64"\tinflight %d\n", + sw->ports[i].stats.rx_pkts, + sw->ports[i].stats.rx_dropped, + sw->ports[i].stats.tx_pkts, sw->ports[i].inflights); + + fprintf(f, "\tAvg cycles PP: %"PRIu64"\tCredits: %u\n", + sw->ports[i].avg_pkt_ticks, sw->ports[i].inflight_credits); + fprintf(f, "\tReceive burst distribution:\n"); + float zp_percent = p->zero_polls * 100.0 / p->total_polls; + fprintf(f, zp_percent < 10 ? "\t\t0:%.02f%% " : "\t\t0:%.0f%% ", + zp_percent); + for (max = (int)RTE_DIM(p->poll_buckets); max --> 0;) + if (p->poll_buckets[max] != 0) + break; + for (j = 0; j <= max; j++) + if (p->poll_buckets[j] != 0) + printf("%u-%u:%.02f%% ", + ((j << SW_DEQ_STAT_BUCKET_SHIFT) + 1), + ((j+1) << SW_DEQ_STAT_BUCKET_SHIFT), + p->poll_buckets[j] * 100.0 / p->total_polls); + printf("\n"); + + uint64_t rx_used = qe_ring_count(p->rx_worker_ring); + uint64_t rx_free = qe_ring_free_count(p->rx_worker_ring); + const char *rxcol = (rx_free == 0) ? COL_RED : COL_RESET; + fprintf(f, "\t%srx ring used: %4"PRIu64"\tfree: %4"PRIu64 COL_RESET"\n", + rxcol, rx_used, rx_free); + + uint64_t tx_used = qe_ring_count(p->cq_worker_ring); + uint64_t tx_free = qe_ring_free_count(p->cq_worker_ring); + const char *txcol = (tx_free == 0) ? COL_RED : COL_RESET; + fprintf(f, "\t%scq ring used: %4"PRIu64"\tfree: %4"PRIu64 COL_RESET"\n", + txcol, tx_used, tx_free); + } + + for (i = 0; i < sw->qid_count; i++) { + const struct sw_qid *qid = &sw->qids[i]; + int affinities_per_port[SW_PORTS_MAX] = {0}; + uint32_t inflights = 0; + + fprintf(f, " Queue %d (%s)\n", i, q_type_strings[qid->type]); + fprintf(f, "\trx %"PRIu64"\tdrop %"PRIu64"\ttx %"PRIu64"\n", +
[dpdk-dev] [PATCH 11/15] event/sw: add scheduling logic
From: Bruce Richardson Add in the scheduling function which takes the events from the producer queues and buffers them before scheduling them to consumer queues. The scheduling logic includes support for atomic, reordered, and parallel scheduling of flows. Signed-off-by: Bruce Richardson Signed-off-by: Gage Eads Signed-off-by: Harry van Haaren --- drivers/event/sw/Makefile | 1 + drivers/event/sw/sw_evdev.c | 1 + drivers/event/sw/sw_evdev.h | 12 + drivers/event/sw/sw_evdev_scheduler.c | 586 ++ 4 files changed, 600 insertions(+) create mode 100644 drivers/event/sw/sw_evdev_scheduler.c diff --git a/drivers/event/sw/Makefile b/drivers/event/sw/Makefile index 7c23b73..e96c457 100644 --- a/drivers/event/sw/Makefile +++ b/drivers/event/sw/Makefile @@ -54,6 +54,7 @@ EXPORT_MAP := rte_pmd_evdev_sw_version.map # library source files SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_worker.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_scheduler.c # export include files SYMLINK-y-include += diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index fbf7598..021f3ab 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -489,6 +489,7 @@ sw_probe(const char *name, const char *params) dev->enqueue_burst = sw_event_enqueue_burst; dev->dequeue = sw_event_dequeue; dev->dequeue_burst = sw_event_dequeue_burst; + dev->schedule = sw_event_schedule; sw = dev->data->dev_private; sw->data = dev->data; diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 97bad17..a0f3668 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -238,6 +238,17 @@ struct sw_evdev { /* Cache how many packets are in each cq */ uint16_t cq_ring_space[SW_PORTS_MAX] __rte_cache_aligned; + /* Array of pointers to load-balanced QIDs sorted by priority level */ + struct sw_qid *qids_prioritized[RTE_EVENT_MAX_QUEUES_PER_DEV]; + + /* Stats */ + struct sw_point_stats stats __rte_cache_aligned; + uint64_t sched_called; + uint64_t sched_no_iq_enqueues; + uint64_t sched_no_cq_enqueues; + uint64_t sched_cq_qid_called; + + uint8_t started; }; static inline struct sw_evdev * @@ -261,5 +272,6 @@ uint16_t sw_event_enqueue_burst(void *port, const struct rte_event ev[], uint16_t sw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait); uint16_t sw_event_dequeue_burst(void *port, struct rte_event *ev, uint16_t num, uint64_t wait); +void sw_event_schedule(struct rte_eventdev *dev); #endif /* _SW_EVDEV_H_ */ diff --git a/drivers/event/sw/sw_evdev_scheduler.c b/drivers/event/sw/sw_evdev_scheduler.c new file mode 100644 index 000..76829f3 --- /dev/null +++ b/drivers/event/sw/sw_evdev_scheduler.c @@ -0,0 +1,586 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "sw_evdev.h" +#include "iq_ring.h" +#include "event_ring.h" + +#define SW_IQS_MASK (SW_IQS_MAX-1) + +/* Retrieve the highest priority IQ or -1 if no pkts available. Doing the + * CLZ twice is fast
[dpdk-dev] [PATCH 10/15] event/sw: add worker core functions
From: Bruce Richardson add the event enqueue, dequeue and release functions to the eventdev. These also include tracking of stats for observability in the load of the scheduler. Internally in the enqueue function, the various types of enqueue operations, to forward an existing event, to send a new event, to drop a previous event, are converted to a series of flags which will be used by the scheduler code to perform the needed actions for that event. Signed-off-by: Bruce Richardson Signed-off-by: Gage Eads Signed-off-by: Harry van Haaren --- drivers/event/sw/Makefile | 1 + drivers/event/sw/sw_evdev.c| 4 + drivers/event/sw/sw_evdev.h| 33 drivers/event/sw/sw_evdev_worker.c | 169 + 4 files changed, 207 insertions(+) create mode 100644 drivers/event/sw/sw_evdev_worker.c diff --git a/drivers/event/sw/Makefile b/drivers/event/sw/Makefile index c891eb5..7c23b73 100644 --- a/drivers/event/sw/Makefile +++ b/drivers/event/sw/Makefile @@ -53,6 +53,7 @@ EXPORT_MAP := rte_pmd_evdev_sw_version.map # library source files SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_worker.c # export include files SYMLINK-y-include += diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 6c2593e..fbf7598 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -485,6 +485,10 @@ sw_probe(const char *name, const char *params) return -EFAULT; } dev->dev_ops = &evdev_sw_ops; + dev->enqueue = sw_event_enqueue; + dev->enqueue_burst = sw_event_enqueue_burst; + dev->dequeue = sw_event_dequeue; + dev->dequeue_burst = sw_event_dequeue_burst; sw = dev->data->dev_private; sw->data = dev->data; diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 383ea9c..97bad17 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -52,10 +52,35 @@ #define SW_DEQ_STAT_BUCKET_SHIFT 2 /* report dequeue burst sizes in buckets */ #define SCHED_DEQUEUE_BURST_SIZE 32 /* how many packets pulled from port by sched */ #define SW_PORT_HIST_LIST (MAX_SW_PROD_Q_DEPTH) /* size of our history list */ +#define NUM_SAMPLES 64 /* how many data points use for average stats */ /* have a new scheduling type for 1:1 queue to port links */ #define RTE_SCHED_TYPE_DIRECT (RTE_SCHED_TYPE_PARALLEL + 1) +enum { + QE_FLAG_VALID_SHIFT = 0, + QE_FLAG_COMPLETE_SHIFT, + QE_FLAG_NOT_EOP_SHIFT, + _QE_FLAG_COUNT +}; + +#define QE_FLAG_VALID(1 << QE_FLAG_VALID_SHIFT) /* set for NEW, FWD, FRAG */ +#define QE_FLAG_COMPLETE (1 << QE_FLAG_COMPLETE_SHIFT) /* set for FWD, DROP */ +#define QE_FLAG_NOT_EOP (1 << QE_FLAG_NOT_EOP_SHIFT) /* set for FRAG only */ + +static const uint8_t sw_qe_flag_map[] = { + QE_FLAG_VALID /* NEW Event */, + QE_FLAG_VALID | QE_FLAG_COMPLETE /* FWD Event */, + QE_FLAG_COMPLETE /* RELEASE Event */, + + /* Values which can be used for future support for partial +* events, i.e. where one event comes back to the scheduler +* as multiple which need to be tracked together +*/ + QE_FLAG_VALID | QE_FLAG_COMPLETE | QE_FLAG_NOT_EOP, +}; + + #ifdef RTE_LIBRTE_PMD_EVDEV_SW_DEBUG #define SW_LOG_INFO(fmt, args...) \ RTE_LOG(INFO, PMD, "[%s] %s() line %u: " fmt "\n", \ @@ -229,4 +254,12 @@ sw_pmd_priv_const(const struct rte_eventdev *eventdev) extern int sched_quanta; +uint16_t sw_event_enqueue(void *port, const struct rte_event *ev); +uint16_t sw_event_enqueue_burst(void *port, const struct rte_event ev[], + uint16_t num); + +uint16_t sw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait); +uint16_t sw_event_dequeue_burst(void *port, struct rte_event *ev, uint16_t num, + uint64_t wait); + #endif /* _SW_EVDEV_H_ */ diff --git a/drivers/event/sw/sw_evdev_worker.c b/drivers/event/sw/sw_evdev_worker.c new file mode 100644 index 000..3cfaa6f --- /dev/null +++ b/drivers/event/sw/sw_evdev_worker.c @@ -0,0 +1,169 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Inte
[dpdk-dev] [PATCH 14/15] event/sw: add xstats support
From: Bruce Richardson Add support for xstats to report out on the state of the eventdev. Useful for debugging and for unit tests, as well as observability at runtime and performance tuning of apps to work well with the scheduler. Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/Makefile | 1 + drivers/event/sw/sw_evdev.c| 7 + drivers/event/sw/sw_evdev.h| 10 + drivers/event/sw/sw_evdev_xstats.c | 401 + 4 files changed, 419 insertions(+) create mode 100644 drivers/event/sw/sw_evdev_xstats.c diff --git a/drivers/event/sw/Makefile b/drivers/event/sw/Makefile index e96c457..890d2af 100644 --- a/drivers/event/sw/Makefile +++ b/drivers/event/sw/Makefile @@ -55,6 +55,7 @@ EXPORT_MAP := rte_pmd_evdev_sw_version.map SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_worker.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_scheduler.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_xstats.c # export include files SYMLINK-y-include += diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index ea34b99..958e798 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -545,6 +545,8 @@ sw_start(struct rte_eventdev *dev) } } } + if (sw_xstats_init(sw) < 0) + return -1; sw->started = 1; return 0; } @@ -553,6 +555,7 @@ static void sw_stop(struct rte_eventdev *dev) { struct sw_evdev *sw = sw_pmd_priv(dev); + sw_xstats_uninit(sw); sw->started = 0; } @@ -618,6 +621,10 @@ sw_probe(const char *name, const char *params) .port_release = sw_port_release, .port_link = sw_port_link, .port_unlink = sw_port_unlink, + + .get_xstat_names = sw_get_xstat_names, + .get_xstats = sw_get_xstats, + .get_xstat_by_name = sw_get_xstat_by_name, }; static const char *const args[] = { NUMA_NODE_ARG, SCHED_QUANTA_ARG, NULL }; diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index a0f3668..d82e61a 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -221,6 +221,8 @@ struct sw_evdev { uint32_t port_count; uint32_t qid_count; + uint32_t xstats_count; + struct sw_xstats_entry *xstats; /* Contains all ports - load balanced and directed */ struct sw_port ports[SW_PORTS_MAX] __rte_cache_aligned; @@ -273,5 +275,13 @@ uint16_t sw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait); uint16_t sw_event_dequeue_burst(void *port, struct rte_event *ev, uint16_t num, uint64_t wait); void sw_event_schedule(struct rte_eventdev *dev); +int sw_xstats_init(struct sw_evdev *dev); +int sw_xstats_uninit(struct sw_evdev *dev); +int sw_get_xstat_names(const struct rte_eventdev *dev, + struct rte_event_dev_xstat_name *xstats_names, unsigned int size); +int sw_get_xstats(const struct rte_eventdev *dev, const unsigned int ids[], + uint64_t values[], unsigned int n); +uint64_t sw_get_xstat_by_name(const struct rte_eventdev *dev, + const char *name, unsigned int *id); #endif /* _SW_EVDEV_H_ */ diff --git a/drivers/event/sw/sw_evdev_xstats.c b/drivers/event/sw/sw_evdev_xstats.c new file mode 100644 index 000..f879303 --- /dev/null +++ b/drivers/event/sw/sw_evdev_xstats.c @@ -0,0 +1,401 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT
[dpdk-dev] [PATCH 15/15] app/test: add unit tests for SW eventdev driver
From: Bruce Richardson Since the sw driver is a standalone lookaside device that has no HW requirements, we can provide a set of unit tests that test its functionality across the different queue types and with different input scenarios. Signed-off-by: Bruce Richardson Signed-off-by: David Hunt Signed-off-by: Harry van Haaren --- app/test/Makefile |5 +- app/test/test_sw_eventdev.c | 2031 +++ 2 files changed, 2035 insertions(+), 1 deletion(-) create mode 100644 app/test/test_sw_eventdev.c diff --git a/app/test/Makefile b/app/test/Makefile index e28c079..1770c09 100644 --- a/app/test/Makefile +++ b/app/test/Makefile @@ -197,7 +197,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += test_cryptodev_blockcipher.c SRCS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += test_cryptodev_perf.c SRCS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += test_cryptodev.c -SRCS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += test_eventdev.c +ifeq ($(CONFIG_RTE_LIBRTE_EVENTDEV),y) +SRCS-y += test_eventdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += test_sw_eventdev.c +endif SRCS-$(CONFIG_RTE_LIBRTE_KVARGS) += test_kvargs.c diff --git a/app/test/test_sw_eventdev.c b/app/test/test_sw_eventdev.c new file mode 100644 index 000..13a8218 --- /dev/null +++ b/app/test/test_sw_eventdev.c @@ -0,0 +1,2031 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "test.h" + +#define MAX_PORTS 16 +#define MAX_QIDS 16 +#define NUM_PACKETS (1<<18) + +static int evdev; + +struct test { + struct rte_mempool *mbuf_pool; + uint8_t port[MAX_PORTS]; + uint8_t qid[MAX_QIDS]; + int nb_qids; +}; + +static struct rte_event release_ev = {.op = RTE_EVENT_OP_RELEASE }; + +static inline struct rte_mbuf * +rte_gen_arp(int portid, struct rte_mempool *mp) +{ + /* + * len = 14 + 46 + * ARP, Request who-has 10.0.0.1 tell 10.0.0.2, length 46 + */ + static const uint8_t arp_request[] = { + /*0x:*/ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xec, 0xa8, + 0x6b, 0xfd, 0x02, 0x29, 0x08, 0x06, 0x00, 0x01, + /*0x0010:*/ 0x08, 0x00, 0x06, 0x04, 0x00, 0x01, 0xec, 0xa8, + 0x6b, 0xfd, 0x02, 0x29, 0x0a, 0x00, 0x00, 0x01, + /*0x0020:*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, + 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /*0x0030:*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 + }; + struct rte_mbuf *m; + int pkt_len = sizeof(arp_request) - 1; + + m = rte_pktmbuf_alloc(mp); + if (!m) + return 0; + + memcpy((void *)((uintptr_t)m->buf_addr + m->data_off), + arp_request, pkt_len); + rte_pktmbuf_pkt_len(m) = pkt_len; + rte_pktmbuf_data_len(m) = pkt_len; + + RTE_SET_USED(portid); + + return m; +} + +/* initialization and qm config */ +static inline int +init(struct test *t, int nb_queues, int nb_ports) +{ + struct rte_event_dev_config config = { +
[dpdk-dev] [PATCH v2 00/15] next-eventdev: event/sw software eventdev
The following patchset adds software eventdev implementation to the next-eventdev tree. This implementation is based on the previous software eventdev v1 patchset, now with comments addressed: 1) xstats api return values changed to be consistent 2) xstats api [out] added to appropriate values 3) xstats api now uses xxx_get() for consistency 4) patch names for check-log.sh 5) checkpatch issues resolved (where it makes sense to, there are certain places where fixing checkpatch makes the code less readable. These checkpatch warnings will still show up - I see no alternative) In addition, the following improvements have been made to the patchset: 1) Adds test to run automatically with make test 2) Rework the sw implementation event credit scheme The first two patches make changes to the eventdev API, then the software implementation is added, and finally tests are added for the sw eventdev implementation. This patchset contains the work of multiple developers, please see signoffs on each patch. Signed-off-by: Harry van Haaren Bruce Richardson (15): eventdev: remove unneeded dependencies eventdev: add APIs for extended stats event/sw: add new software-only eventdev driver event/sw: add device capabilities function event/sw: add configure function event/sw: add fns to return default port/queue config event/sw: add support for event queues event/sw: add support for event ports event/sw: add support for linking queues to ports event/sw: add worker core functions event/sw: add scheduling logic event/sw: add start stop and close functions event/sw: add dump function for easier debugging event/sw: add xstats support app/test: add unit tests for SW eventdev driver app/test/Makefile |5 +- app/test/autotest_data.py | 26 + app/test/test_sw_eventdev.c | 2071 + config/common_base|5 + drivers/event/Makefile|1 + drivers/event/sw/Makefile | 69 + drivers/event/sw/event_ring.h | 179 +++ drivers/event/sw/iq_ring.h| 176 +++ drivers/event/sw/rte_pmd_evdev_sw_version.map |3 + drivers/event/sw/sw_evdev.c | 754 + drivers/event/sw/sw_evdev.h | 291 drivers/event/sw/sw_evdev_scheduler.c | 602 +++ drivers/event/sw/sw_evdev_worker.c| 186 +++ drivers/event/sw/sw_evdev_xstats.c| 404 + lib/librte_eventdev/Makefile |1 - lib/librte_eventdev/rte_eventdev.c| 63 + lib/librte_eventdev/rte_eventdev.h| 85 +- lib/librte_eventdev/rte_eventdev_pmd.h| 60 + lib/librte_eventdev/rte_eventdev_version.map |3 + mk/rte.app.mk |1 + 20 files changed, 4981 insertions(+), 4 deletions(-) create mode 100644 app/test/test_sw_eventdev.c create mode 100644 drivers/event/sw/Makefile create mode 100644 drivers/event/sw/event_ring.h create mode 100644 drivers/event/sw/iq_ring.h create mode 100644 drivers/event/sw/rte_pmd_evdev_sw_version.map create mode 100644 drivers/event/sw/sw_evdev.c create mode 100644 drivers/event/sw/sw_evdev.h create mode 100644 drivers/event/sw/sw_evdev_scheduler.c create mode 100644 drivers/event/sw/sw_evdev_worker.c create mode 100644 drivers/event/sw/sw_evdev_xstats.c -- 2.7.4
[dpdk-dev] [PATCH v2 03/15] event/sw: add new software-only eventdev driver
From: Bruce Richardson This adds the minimal changes to allow a SW eventdev implementation to be compiled, linked and created at run time. The eventdev does nothing, but can be created via vdev on commandline, e.g. sudo ./x86_64-native-linuxapp-gcc/app/test --vdev=event_sw0 ... PMD: Creating eventdev sw device event_sw0, numa_node=0, sched_quanta=128 RTE>> Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- config/common_base| 5 + drivers/event/Makefile| 1 + drivers/event/sw/Makefile | 66 ++ drivers/event/sw/rte_pmd_evdev_sw_version.map | 3 + drivers/event/sw/sw_evdev.c | 178 ++ drivers/event/sw/sw_evdev.h | 63 + mk/rte.app.mk | 1 + 7 files changed, 317 insertions(+) create mode 100644 drivers/event/sw/Makefile create mode 100644 drivers/event/sw/rte_pmd_evdev_sw_version.map create mode 100644 drivers/event/sw/sw_evdev.c create mode 100644 drivers/event/sw/sw_evdev.h diff --git a/config/common_base b/config/common_base index 00af811..6f91172 100644 --- a/config/common_base +++ b/config/common_base @@ -434,6 +434,11 @@ CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV=y CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV_DEBUG=n # +# Compile PMD for software event device +# +CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV=y + +# # Compile librte_ring # CONFIG_RTE_LIBRTE_RING=y diff --git a/drivers/event/Makefile b/drivers/event/Makefile index 678279f..353441c 100644 --- a/drivers/event/Makefile +++ b/drivers/event/Makefile @@ -32,5 +32,6 @@ include $(RTE_SDK)/mk/rte.vars.mk DIRS-$(CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV) += skeleton +DIRS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/drivers/event/sw/Makefile b/drivers/event/sw/Makefile new file mode 100644 index 000..d6836e3 --- /dev/null +++ b/drivers/event/sw/Makefile @@ -0,0 +1,66 @@ +# BSD LICENSE +# +# Copyright(c) 2016-2017 Intel Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + + +# library name +LIB = librte_pmd_sw_event.a + +# build flags +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +# for older GCC versions, allow us to initialize an event using +# designated initializers. +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +ifeq ($(shell test $(GCC_VERSION) -le 50 && echo 1), 1) +CFLAGS += -Wno-missing-field-initializers +endif +endif + +# library version +LIBABIVER := 1 + +# versioning export map +EXPORT_MAP := rte_pmd_evdev_sw_version.map + +# library source files +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev.c + +# export include files +SYMLINK-y-include += + +# library dependencies +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += lib/librte_eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += lib/librte_eventdev +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += lib/librte_kvargs +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += lib/librte_ring + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/event/sw/rte_pmd_evdev_sw_version.map b/drivers/event/sw/rte_pmd_evdev_sw_version.map new file mode 100644 index 000..1f84b68 --- /dev/null +++ b/drivers/event/sw/rte_pmd_evdev_sw_version.map @@ -0,0 +1,3 @@ +DPDK_17.02 { + local: *; +}; diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/
[dpdk-dev] [PATCH v2 01/15] eventdev: remove unneeded dependencies
From: Bruce Richardson Since eventdev uses event structures rather than working directly on mbufs, there is no actual dependencies on the mbuf library. The inclusion of an mbuf pointer element inside the event itself does not require the inclusion of the mbuf header file. Similarly the pci header is not needed, but following their removal, rte_memory.h is needed for the definition of the __rte_cache_aligned macro. Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- lib/librte_eventdev/Makefile | 1 - lib/librte_eventdev/rte_eventdev.h | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/librte_eventdev/Makefile b/lib/librte_eventdev/Makefile index dac0663..396e5ec 100644 --- a/lib/librte_eventdev/Makefile +++ b/lib/librte_eventdev/Makefile @@ -52,6 +52,5 @@ EXPORT_MAP := rte_eventdev_version.map # library dependencies DEPDIRS-y += lib/librte_eal -DEPDIRS-y += lib/librte_mbuf include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h index e1bd05f..c2f9310 100644 --- a/lib/librte_eventdev/rte_eventdev.h +++ b/lib/librte_eventdev/rte_eventdev.h @@ -244,8 +244,9 @@ extern "C" { #endif #include -#include -#include +#include + +struct rte_mbuf; /* we just use mbuf pointers; no need to include rte_mbuf.h */ /* Event device capability bitmap flags */ #define RTE_EVENT_DEV_CAP_QUEUE_QOS (1ULL << 0) -- 2.7.4
[dpdk-dev] [PATCH v2 04/15] event/sw: add device capabilities function
From: Bruce Richardson Add in the info_get function to return details on the queues, flow, prioritization capabilities, etc. that this device has. Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev.c | 23 +++ drivers/event/sw/sw_evdev.h | 10 ++ 2 files changed, 33 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index d60f00f..4dca4cf 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -44,6 +44,28 @@ #define SCHED_QUANTA_ARG "sched_quanta" #define CREDIT_QUANTA_ARG "credit_quanta" +static void +sw_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info) +{ + RTE_SET_USED(dev); + + static const struct rte_event_dev_info evdev_sw_info = { + .driver_name = PMD_NAME, + .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV, + .max_event_queue_flows = SW_QID_NUM_FIDS, + .max_event_queue_priority_levels = SW_Q_PRIORITY_MAX, + .max_event_priority_levels = SW_IQS_MAX, + .max_event_ports = SW_PORTS_MAX, + .max_event_port_dequeue_depth = MAX_SW_CONS_Q_DEPTH, + .max_event_port_enqueue_depth = MAX_SW_PROD_Q_DEPTH, + .max_num_events = SW_INFLIGHT_EVENTS_TOTAL, + .event_dev_cap = (RTE_EVENT_DEV_CAP_QUEUE_QOS | + RTE_EVENT_DEV_CAP_EVENT_QOS), + }; + + *info = evdev_sw_info; +} + static int assign_numa_node(const char *key __rte_unused, const char *value, void *opaque) { @@ -78,6 +100,7 @@ static int sw_probe(const char *name, const char *params) { static const struct rte_eventdev_ops evdev_sw_ops = { + .dev_infos_get = sw_info_get, }; static const char *const args[] = { diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 9494659..6e3cb36 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -36,8 +36,18 @@ #include #include +#define PMD_NAME "event_sw" + #define SW_DEFAULT_CREDIT_QUANTA 32 #define SW_DEFAULT_SCHED_QUANTA 128 +#define SW_QID_NUM_FIDS 16384 +#define SW_IQS_MAX 4 +#define SW_Q_PRIORITY_MAX 255 +#define SW_PORTS_MAX 64 +#define MAX_SW_CONS_Q_DEPTH 128 +#define SW_INFLIGHT_EVENTS_TOTAL 4096 +/* allow for lots of over-provisioning */ +#define MAX_SW_PROD_Q_DEPTH 4096 struct sw_evdev { struct rte_eventdev_data *data; -- 2.7.4
[dpdk-dev] [PATCH v2 02/15] eventdev: add APIs for extended stats
From: Bruce Richardson Add in APIs for extended stats so that eventdev implementations can report out information on their internal state. The APIs are based on, but not identical to, the equivalent ethdev functions. Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- lib/librte_eventdev/rte_eventdev.c | 63 ++ lib/librte_eventdev/rte_eventdev.h | 80 lib/librte_eventdev/rte_eventdev_pmd.h | 60 + lib/librte_eventdev/rte_eventdev_version.map | 3 ++ 4 files changed, 206 insertions(+) diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c index c8f3e94..95572f4 100644 --- a/lib/librte_eventdev/rte_eventdev.c +++ b/lib/librte_eventdev/rte_eventdev.c @@ -920,6 +920,69 @@ rte_event_dev_dump(uint8_t dev_id, FILE *f) } +static int +get_xstats_count(uint8_t dev_id) +{ + struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + if (dev->dev_ops->get_xstat_names != NULL) + return (*dev->dev_ops->get_xstat_names)(dev, NULL, 0); + return 0; +} + +int +rte_event_dev_xstats_names_get(uint8_t dev_id, + struct rte_event_dev_xstat_name *xstats_names, + unsigned int size) +{ + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + const int cnt_expected_entries = get_xstats_count(dev_id); + if (xstats_names == NULL || cnt_expected_entries < 0 || + (int)size < cnt_expected_entries) + return cnt_expected_entries; + + /* dev_id checked above */ + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + + if (dev->dev_ops->get_xstat_names != NULL) + return (*dev->dev_ops->get_xstat_names)(dev, + xstats_names, size); + + return -ENOTSUP; +} + +/* retrieve eventdev extended statistics */ +int +rte_event_dev_xstats_get(uint8_t dev_id, const unsigned int ids[], + uint64_t values[], unsigned int n) +{ + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + + /* implemented by the driver */ + if (dev->dev_ops->get_xstats != NULL) + return (*dev->dev_ops->get_xstats)(dev, ids, values, n); + return -ENOTSUP; +} + +uint64_t +rte_event_dev_xstats_by_name_get(uint8_t dev_id, const char *name, + unsigned int *id) +{ + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, 0); + const struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + unsigned int temp = -1; + + if (id != NULL) + *id = (unsigned int)-1; + else + id = &temp; /* ensure driver never gets a NULL value */ + + /* implemented by driver */ + if (dev->dev_ops->get_xstat_by_name != NULL) + return (*dev->dev_ops->get_xstat_by_name)(dev, name, id); + return -ENOTSUP; +} + int rte_event_dev_start(uint8_t dev_id) { diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h index c2f9310..66fc35a 100644 --- a/lib/librte_eventdev/rte_eventdev.h +++ b/lib/librte_eventdev/rte_eventdev.h @@ -1401,6 +1401,86 @@ rte_event_port_links_get(uint8_t dev_id, uint8_t port_id, int rte_event_dev_dump(uint8_t dev_id, FILE *f); +/** Maximum name length for extended statistics counters */ +#define RTE_EVENT_DEV_XSTAT_NAME_SIZE 64 + +/** + * A name-key lookup element for extended statistics. + * + * This structure is used to map between names and ID numbers + * for extended ethdev statistics. + */ +struct rte_event_dev_xstat_name { + char name[RTE_EVENT_DEV_XSTAT_NAME_SIZE]; +}; + +/** + * Retrieve names of extended statistics of an event device. + * + * @param dev_id + * The identifier of the event device. + * @param[out] xstat_names + * Block of memory to insert names into. Must be at least size in capacity. + * If set to NULL, function returns required capacity. + * @param size + * Capacity of xstat_names (number of names). + * @return + * - positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. + * - positive value higher than size: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - negative value on error. -EINVAL for invalid dev id, -ENOTSUP if the + * device doesn't support this function. + */ +int +rte_event_dev_xstats_names_get(uint8_t dev_id, + struct rte_event_dev_xstat_name *xstat_names, + unsigned int size); + +/** + * Retrieve extended statistics of an event device. + * + * @param dev_id + * The identifier of the device. + * @param
[dpdk-dev] [PATCH v2 05/15] event/sw: add configure function
From: Bruce Richardson Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev.c | 15 +++ drivers/event/sw/sw_evdev.h | 11 +++ 2 files changed, 26 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 4dca4cf..b657eb4 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -44,6 +44,20 @@ #define SCHED_QUANTA_ARG "sched_quanta" #define CREDIT_QUANTA_ARG "credit_quanta" +static int +sw_dev_configure(const struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + const struct rte_eventdev_data *data = dev->data; + const struct rte_event_dev_config *conf = &data->dev_conf; + + sw->qid_count = conf->nb_event_queues; + sw->port_count = conf->nb_event_ports; + sw->nb_events_limit = conf->nb_events_limit; + + return 0; +} + static void sw_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info) { @@ -100,6 +114,7 @@ static int sw_probe(const char *name, const char *params) { static const struct rte_eventdev_ops evdev_sw_ops = { + .dev_configure = sw_dev_configure, .dev_infos_get = sw_info_get, }; diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 6e3cb36..65f00e4 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -35,6 +35,7 @@ #include #include +#include #define PMD_NAME "event_sw" @@ -52,7 +53,17 @@ struct sw_evdev { struct rte_eventdev_data *data; + uint32_t port_count; + uint32_t qid_count; + + /* +* max events in this instance. Cached here for performance. +* (also available in data->conf.nb_events_limit) +*/ + uint32_t nb_events_limit; + int32_t sched_quanta; + uint32_t credit_update_quanta; }; -- 2.7.4
[dpdk-dev] [PATCH v2 07/15] event/sw: add support for event queues
From: Bruce Richardson Add in the data structures for the event queues, and the eventdev functions to create and destroy those queues. Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/iq_ring.h | 176 drivers/event/sw/sw_evdev.c | 158 +++ drivers/event/sw/sw_evdev.h | 75 +++ 3 files changed, 409 insertions(+) create mode 100644 drivers/event/sw/iq_ring.h diff --git a/drivers/event/sw/iq_ring.h b/drivers/event/sw/iq_ring.h new file mode 100644 index 000..d480d15 --- /dev/null +++ b/drivers/event/sw/iq_ring.h @@ -0,0 +1,176 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Ring structure definitions used for the internal ring buffers of the + * SW eventdev implementation. These are designed for single-core use only. + */ +#ifndef _IQ_RING_ +#define _IQ_RING_ + +#include + +#include +#include +#include +#include + +#define IQ_RING_NAMESIZE 12 +#define QID_IQ_DEPTH 512 +#define QID_IQ_MASK (uint16_t)(QID_IQ_DEPTH - 1) + +struct iq_ring { + char name[IQ_RING_NAMESIZE] __rte_cache_aligned; + uint16_t write_idx; + uint16_t read_idx; + + struct rte_event ring[QID_IQ_DEPTH]; +}; + +#ifndef force_inline +#define force_inline inline __attribute__((always_inline)) +#endif + +static inline struct iq_ring * +iq_ring_create(const char *name, unsigned int socket_id) +{ + struct iq_ring *retval; + + retval = rte_malloc_socket(NULL, sizeof(*retval), 0, socket_id); + if (retval == NULL) + goto end; + + snprintf(retval->name, sizeof(retval->name), "%s", name); + retval->write_idx = retval->read_idx = 0; +end: + return retval; +} + +static inline void +iq_ring_destroy(struct iq_ring *r) +{ + rte_free(r); +} + +static force_inline uint16_t +iq_ring_count(const struct iq_ring *r) +{ + return r->write_idx - r->read_idx; +} + +static force_inline uint16_t +iq_ring_free_count(const struct iq_ring *r) +{ + return QID_IQ_MASK - iq_ring_count(r); +} + +static force_inline uint16_t +iq_ring_enqueue_burst(struct iq_ring *r, struct rte_event *qes, uint16_t nb_qes) +{ + const uint16_t read = r->read_idx; + uint16_t write = r->write_idx; + const uint16_t space = read + QID_IQ_MASK - write; + uint16_t i; + + if (space < nb_qes) + nb_qes = space; + + for (i = 0; i < nb_qes; i++, write++) + r->ring[write & QID_IQ_MASK] = qes[i]; + + r->write_idx = write; + + return nb_qes; +} + +static force_inline uint16_t +iq_ring_dequeue_burst(struct iq_ring *r, struct rte_event *qes, uint16_t nb_qes) +{ + uint16_t read = r->read_idx; + const uint16_t write = r->write_idx; + const uint16_t items = write - read; + uint16_t i; + + for (i = 0; i < nb_qes; i++, read++) + qes[i] = r->ring[read & QID_IQ_MASK]; + + if (items < nb_qes) + nb_qes = items; + + r->read_idx += nb_qes; + + return nb_qes; +} + +/* assumes there is space, from a previous dequeue_burst */ +static force_inline uint16_t +iq_ring_put_back(struct iq_ring *r, struct rte_event *qes, uint16
[dpdk-dev] [PATCH v2 08/15] event/sw: add support for event ports
From: Bruce Richardson Add in the data-structures for the ports used by workers to send packets to/from the scheduler. Also add in the functions to create/destroy those ports. Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/event_ring.h | 179 ++ drivers/event/sw/sw_evdev.c | 75 ++ drivers/event/sw/sw_evdev.h | 76 ++ 3 files changed, 330 insertions(+) create mode 100644 drivers/event/sw/event_ring.h diff --git a/drivers/event/sw/event_ring.h b/drivers/event/sw/event_ring.h new file mode 100644 index 000..67aa72e --- /dev/null +++ b/drivers/event/sw/event_ring.h @@ -0,0 +1,179 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Generic ring structure for passing events from one core to another. + * + * Used by the software scheduler for the producer and consumer rings for + * each port, i.e. for passing events from worker cores to scheduler and + * vice-versa. Designed for single-producer, single-consumer use with two + * cores working on each ring. + */ + +#ifndef _EVENT_RING_ +#define _EVENT_RING_ + +#include + +#include +#include +#include + +#define QE_RING_NAMESIZE 32 + +struct qe_ring { + char name[QE_RING_NAMESIZE] __rte_cache_aligned; + uint32_t ring_size; /* size of memory block allocated to the ring */ + uint32_t mask; /* mask for read/write values == ring_size -1 */ + uint32_t size; /* actual usable space in the ring */ + volatile uint32_t write_idx __rte_cache_aligned; + volatile uint32_t read_idx __rte_cache_aligned; + + struct rte_event ring[0] __rte_cache_aligned; +}; + +#ifndef force_inline +#define force_inline inline __attribute__((always_inline)) +#endif + +static inline struct qe_ring * +qe_ring_create(const char *name, unsigned int size, unsigned int socket_id) +{ + struct qe_ring *retval; + const uint32_t ring_size = rte_align32pow2(size + 1); + size_t memsize = sizeof(*retval) + + (ring_size * sizeof(retval->ring[0])); + + retval = rte_zmalloc_socket(NULL, memsize, 0, socket_id); + if (retval == NULL) + goto end; + + snprintf(retval->name, sizeof(retval->name), "EVDEV_RG_%s", name); + retval->ring_size = ring_size; + retval->mask = ring_size - 1; + retval->size = size; +end: + return retval; +} + +static inline void +qe_ring_destroy(struct qe_ring *r) +{ + rte_free(r); +} + +static force_inline unsigned int +qe_ring_count(const struct qe_ring *r) +{ + return r->write_idx - r->read_idx; +} + +static force_inline unsigned int +qe_ring_free_count(const struct qe_ring *r) +{ + return r->size - qe_ring_count(r); +} + +static force_inline unsigned int +qe_ring_enqueue_burst(struct qe_ring *r, const struct rte_event *qes, + unsigned int nb_qes, uint16_t *free_count) +{ + const uint32_t size = r->size; + const uint32_t mask = r->mask; + const uint32_t read = r->read_idx; + uint32_t write = r->write_idx; + const uint32_t space = read + size - write; + uint32_t i; + + if (space < nb_qes) + nb_qes = space; + + for (i = 0; i < nb
[dpdk-dev] [PATCH v2 06/15] event/sw: add fns to return default port/queue config
From: Bruce Richardson Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev.c | 32 1 file changed, 32 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index b657eb4..ceca865 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -44,6 +44,35 @@ #define SCHED_QUANTA_ARG "sched_quanta" #define CREDIT_QUANTA_ARG "credit_quanta" +static void +sw_queue_def_conf(struct rte_eventdev *dev, uint8_t queue_id, +struct rte_event_queue_conf *conf) +{ + RTE_SET_USED(dev); + RTE_SET_USED(queue_id); + + static const struct rte_event_queue_conf default_conf = { + .nb_atomic_flows = 4096, + .nb_atomic_order_sequences = 1, + .event_queue_cfg = RTE_EVENT_QUEUE_CFG_ATOMIC_ONLY, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + }; + + *conf = default_conf; +} + +static void +sw_port_def_conf(struct rte_eventdev *dev, uint8_t port_id, +struct rte_event_port_conf *port_conf) +{ + RTE_SET_USED(dev); + RTE_SET_USED(port_id); + + port_conf->new_event_threshold = 1024; + port_conf->dequeue_depth = 16; + port_conf->enqueue_depth = 16; +} + static int sw_dev_configure(const struct rte_eventdev *dev) { @@ -116,6 +145,9 @@ sw_probe(const char *name, const char *params) static const struct rte_eventdev_ops evdev_sw_ops = { .dev_configure = sw_dev_configure, .dev_infos_get = sw_info_get, + + .queue_def_conf = sw_queue_def_conf, + .port_def_conf = sw_port_def_conf, }; static const char *const args[] = { -- 2.7.4
[dpdk-dev] [PATCH v2 09/15] event/sw: add support for linking queues to ports
From: Bruce Richardson Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev.c | 68 + 1 file changed, 68 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 0b26fcb..693a833 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -50,6 +50,72 @@ static void sw_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info); static int +sw_port_link(void *port, const uint8_t queues[], const uint8_t priorities[], + uint16_t num) +{ + struct sw_port *p = (void *)port; + struct sw_evdev *sw = p->sw; + int i; + + RTE_SET_USED(priorities); + for (i = 0; i < num; i++) { + struct sw_qid *q = &sw->qids[queues[i]]; + + /* check for qid map overflow */ + if (q->cq_num_mapped_cqs >= RTE_DIM(q->cq_map)) + break; + + if (p->is_directed && p->num_qids_mapped > 0) + break; + + if (q->type == RTE_SCHED_TYPE_DIRECT) { + /* check directed qids only map to one port */ + if (p->num_qids_mapped > 0) + break; + /* check port only takes a directed flow */ + if (num > 1) + break; + + p->is_directed = 1; + p->num_qids_mapped = 1; + } else if (q->type == RTE_SCHED_TYPE_ORDERED) { + p->num_ordered_qids++; + p->num_qids_mapped++; + } else if (q->type == RTE_SCHED_TYPE_ATOMIC) { + p->num_qids_mapped++; + } + + q->cq_map[q->cq_num_mapped_cqs] = p->id; + rte_smp_wmb(); + q->cq_num_mapped_cqs++; + } + return i; +} + +static int +sw_port_unlink(void *port, uint8_t queues[], uint16_t nb_unlinks) +{ + struct sw_port *p = (void *)port; + struct sw_evdev *sw = p->sw; + unsigned int i, j; + + int unlinked = 0; + for (i = 0; i < nb_unlinks; i++) { + struct sw_qid *q = &sw->qids[queues[i]]; + for (j = 0; j < q->cq_num_mapped_cqs; j++) + if (q->cq_map[j] == p->id) { + q->cq_map[j] = + q->cq_map[q->cq_num_mapped_cqs - 1]; + rte_smp_wmb(); + q->cq_num_mapped_cqs--; + unlinked++; + continue; + } + } + return unlinked; +} + +static int sw_port_setup(struct rte_eventdev *dev, uint8_t port_id, const struct rte_event_port_conf *conf) { @@ -381,6 +447,8 @@ sw_probe(const char *name, const char *params) .port_def_conf = sw_port_def_conf, .port_setup = sw_port_setup, .port_release = sw_port_release, + .port_link = sw_port_link, + .port_unlink = sw_port_unlink, }; static const char *const args[] = { -- 2.7.4
[dpdk-dev] [PATCH v2 12/15] event/sw: add start stop and close functions
From: Bruce Richardson Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev.c | 74 + 1 file changed, 74 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 17b5e49..e352a5c 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -406,6 +406,77 @@ sw_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info) } static int +sw_start(struct rte_eventdev *dev) +{ + unsigned int i, j; + struct sw_evdev *sw = sw_pmd_priv(dev); + /* check all ports are set up */ + for (i = 0; i < sw->port_count; i++) + if (sw->ports[i].rx_worker_ring == NULL) { + printf("%s %d: port %d not configured\n", + __func__, __LINE__, i); + return -1; + } + + /* check all queues are configured and mapped to ports*/ + for (i = 0; i < sw->qid_count; i++) + if (sw->qids[i].iq[0] == NULL || + sw->qids[i].cq_num_mapped_cqs == 0) { + printf("%s %d: queue %d not configured\n", + __func__, __LINE__, i); + return -1; + } + + /* build up our prioritized array of qids */ + /* We don't use qsort here, as if all/multiple entries have the same +* priority, the result is non-deterministic. From "man 3 qsort": +* "If two members compare as equal, their order in the sorted +* array is undefined." +*/ + uint32_t qidx = 0; + for (j = 0; j <= RTE_EVENT_DEV_PRIORITY_LOWEST; j++) { + for (i = 0; i < sw->qid_count; i++) { + if (sw->qids[i].priority == j) { + sw->qids_prioritized[qidx] = &sw->qids[i]; + qidx++; + } + } + } + sw->started = 1; + return 0; +} + +static void +sw_stop(struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + sw->started = 0; +} + +static int +sw_close(struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + uint32_t i; + + for (i = 0; i < sw->qid_count; i++) + sw_queue_release(dev, i); + sw->qid_count = 0; + + for (i = 0; i < sw->port_count; i++) + sw_port_release(&sw->ports[i]); + sw->port_count = 0; + + memset(&sw->stats, 0, sizeof(sw->stats)); + sw->sched_called = 0; + sw->sched_no_iq_enqueues = 0; + sw->sched_no_cq_enqueues = 0; + sw->sched_cq_qid_called = 0; + + return 0; +} + +static int assign_numa_node(const char *key __rte_unused, const char *value, void *opaque) { int *socket_id = opaque; @@ -441,6 +512,9 @@ sw_probe(const char *name, const char *params) static const struct rte_eventdev_ops evdev_sw_ops = { .dev_configure = sw_dev_configure, .dev_infos_get = sw_info_get, + .dev_close = sw_close, + .dev_start = sw_start, + .dev_stop = sw_stop, .queue_def_conf = sw_queue_def_conf, .queue_setup = sw_queue_setup, -- 2.7.4
[dpdk-dev] [PATCH v2 11/15] event/sw: add scheduling logic
From: Bruce Richardson Add in the scheduling function which takes the events from the producer queues and buffers them before scheduling them to consumer queues. The scheduling logic includes support for atomic, reordered, and parallel scheduling of flows. Signed-off-by: Bruce Richardson Signed-off-by: Gage Eads Signed-off-by: Harry van Haaren --- drivers/event/sw/Makefile | 1 + drivers/event/sw/sw_evdev.c | 1 + drivers/event/sw/sw_evdev.h | 11 + drivers/event/sw/sw_evdev_scheduler.c | 602 ++ 4 files changed, 615 insertions(+) create mode 100644 drivers/event/sw/sw_evdev_scheduler.c diff --git a/drivers/event/sw/Makefile b/drivers/event/sw/Makefile index b6ecd91..a7f5b3d 100644 --- a/drivers/event/sw/Makefile +++ b/drivers/event/sw/Makefile @@ -54,6 +54,7 @@ EXPORT_MAP := rte_pmd_evdev_sw_version.map # library source files SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_worker.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_scheduler.c # export include files SYMLINK-y-include += diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index b719f65..17b5e49 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -521,6 +521,7 @@ sw_probe(const char *name, const char *params) dev->enqueue_burst = sw_event_enqueue_burst; dev->dequeue = sw_event_dequeue; dev->dequeue_burst = sw_event_dequeue_burst; + dev->schedule = sw_event_schedule; sw = dev->data->dev_private; sw->data = dev->data; diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index ea39bb2..63b0979 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -240,8 +240,18 @@ struct sw_evdev { /* Cache how many packets are in each cq */ uint16_t cq_ring_space[SW_PORTS_MAX] __rte_cache_aligned; + /* Array of pointers to load-balanced QIDs sorted by priority level */ + struct sw_qid *qids_prioritized[RTE_EVENT_MAX_QUEUES_PER_DEV]; + + /* Stats */ + struct sw_point_stats stats __rte_cache_aligned; + uint64_t sched_called; int32_t sched_quanta; + uint64_t sched_no_iq_enqueues; + uint64_t sched_no_cq_enqueues; + uint64_t sched_cq_qid_called; + uint8_t started; uint32_t credit_update_quanta; }; @@ -266,5 +276,6 @@ uint16_t sw_event_enqueue_burst(void *port, const struct rte_event ev[], uint16_t sw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait); uint16_t sw_event_dequeue_burst(void *port, struct rte_event *ev, uint16_t num, uint64_t wait); +void sw_event_schedule(struct rte_eventdev *dev); #endif /* _SW_EVDEV_H_ */ diff --git a/drivers/event/sw/sw_evdev_scheduler.c b/drivers/event/sw/sw_evdev_scheduler.c new file mode 100644 index 000..3b30efe --- /dev/null +++ b/drivers/event/sw/sw_evdev_scheduler.c @@ -0,0 +1,602 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "sw_evdev.h" +#include "iq_ring.h" +#include "event_ring.h" + +#define SW_IQS_MASK (SW_IQS_MAX-1) + +/* Retrieve the highest priority IQ or -1 if no pkts availab
[dpdk-dev] [PATCH v2 14/15] event/sw: add xstats support
From: Bruce Richardson Add support for xstats to report out on the state of the eventdev. Useful for debugging and for unit tests, as well as observability at runtime and performance tuning of apps to work well with the scheduler. Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/Makefile | 1 + drivers/event/sw/sw_evdev.c| 7 + drivers/event/sw/sw_evdev.h| 10 + drivers/event/sw/sw_evdev_xstats.c | 404 + 4 files changed, 422 insertions(+) create mode 100644 drivers/event/sw/sw_evdev_xstats.c diff --git a/drivers/event/sw/Makefile b/drivers/event/sw/Makefile index a7f5b3d..eb0dc4c 100644 --- a/drivers/event/sw/Makefile +++ b/drivers/event/sw/Makefile @@ -55,6 +55,7 @@ EXPORT_MAP := rte_pmd_evdev_sw_version.map SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_worker.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_scheduler.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_xstats.c # export include files SYMLINK-y-include += diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 6096aa4..2fb9cc4 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -559,6 +559,8 @@ sw_start(struct rte_eventdev *dev) } } } + if (sw_xstats_init(sw) < 0) + return -1; sw->started = 1; return 0; } @@ -567,6 +569,7 @@ static void sw_stop(struct rte_eventdev *dev) { struct sw_evdev *sw = sw_pmd_priv(dev); + sw_xstats_uninit(sw); sw->started = 0; } @@ -642,6 +645,10 @@ sw_probe(const char *name, const char *params) .port_release = sw_port_release, .port_link = sw_port_link, .port_unlink = sw_port_unlink, + + .get_xstat_names = sw_get_xstat_names, + .get_xstats = sw_get_xstats, + .get_xstat_by_name = sw_get_xstat_by_name, }; static const char *const args[] = { diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 63b0979..93bc8b5 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -222,6 +222,8 @@ struct sw_evdev { uint32_t port_count; uint32_t qid_count; + uint32_t xstats_count; + struct sw_xstats_entry *xstats; /* Contains all ports - load balanced and directed */ struct sw_port ports[SW_PORTS_MAX] __rte_cache_aligned; @@ -277,5 +279,13 @@ uint16_t sw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait); uint16_t sw_event_dequeue_burst(void *port, struct rte_event *ev, uint16_t num, uint64_t wait); void sw_event_schedule(struct rte_eventdev *dev); +int sw_xstats_init(struct sw_evdev *dev); +int sw_xstats_uninit(struct sw_evdev *dev); +int sw_get_xstat_names(const struct rte_eventdev *dev, + struct rte_event_dev_xstat_name *xstats_names, unsigned int size); +int sw_get_xstats(const struct rte_eventdev *dev, const unsigned int ids[], + uint64_t values[], unsigned int n); +uint64_t sw_get_xstat_by_name(const struct rte_eventdev *dev, + const char *name, unsigned int *id); #endif /* _SW_EVDEV_H_ */ diff --git a/drivers/event/sw/sw_evdev_xstats.c b/drivers/event/sw/sw_evdev_xstats.c new file mode 100644 index 000..569eba2 --- /dev/null +++ b/drivers/event/sw/sw_evdev_xstats.c @@ -0,0 +1,404 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
[dpdk-dev] [PATCH v2 10/15] event/sw: add worker core functions
From: Bruce Richardson add the event enqueue, dequeue and release functions to the eventdev. These also include tracking of stats for observability in the load of the scheduler. Internally in the enqueue function, the various types of enqueue operations, to forward an existing event, to send a new event, to drop a previous event, are converted to a series of flags which will be used by the scheduler code to perform the needed actions for that event. Signed-off-by: Bruce Richardson Signed-off-by: Gage Eads Signed-off-by: Harry van Haaren --- drivers/event/sw/Makefile | 1 + drivers/event/sw/sw_evdev.c| 5 + drivers/event/sw/sw_evdev.h| 35 +++ drivers/event/sw/sw_evdev_worker.c | 186 + 4 files changed, 227 insertions(+) create mode 100644 drivers/event/sw/sw_evdev_worker.c diff --git a/drivers/event/sw/Makefile b/drivers/event/sw/Makefile index d6836e3..b6ecd91 100644 --- a/drivers/event/sw/Makefile +++ b/drivers/event/sw/Makefile @@ -53,6 +53,7 @@ EXPORT_MAP := rte_pmd_evdev_sw_version.map # library source files SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_worker.c # export include files SYMLINK-y-include += diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 693a833..b719f65 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -378,6 +378,7 @@ sw_dev_configure(const struct rte_eventdev *dev) sw->qid_count = conf->nb_event_queues; sw->port_count = conf->nb_event_ports; sw->nb_events_limit = conf->nb_events_limit; + rte_atomic32_set(&sw->inflights, 0); return 0; } @@ -516,6 +517,10 @@ sw_probe(const char *name, const char *params) return -EFAULT; } dev->dev_ops = &evdev_sw_ops; + dev->enqueue = sw_event_enqueue; + dev->enqueue_burst = sw_event_enqueue_burst; + dev->dequeue = sw_event_dequeue; + dev->dequeue_burst = sw_event_dequeue_burst; sw = dev->data->dev_private; sw->data = dev->data; diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 0bae511..ea39bb2 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -57,10 +57,35 @@ #define SCHED_DEQUEUE_BURST_SIZE 32 #define SW_PORT_HIST_LIST (MAX_SW_PROD_Q_DEPTH) /* size of our history list */ +#define NUM_SAMPLES 64 /* how many data points use for average stats */ /* have a new scheduling type for 1:1 queue to port links */ #define RTE_SCHED_TYPE_DIRECT (RTE_SCHED_TYPE_PARALLEL + 1) +enum { + QE_FLAG_VALID_SHIFT = 0, + QE_FLAG_COMPLETE_SHIFT, + QE_FLAG_NOT_EOP_SHIFT, + _QE_FLAG_COUNT +}; + +#define QE_FLAG_VALID(1 << QE_FLAG_VALID_SHIFT)/* for NEW FWD, FRAG */ +#define QE_FLAG_COMPLETE (1 << QE_FLAG_COMPLETE_SHIFT) /* set for FWD, DROP */ +#define QE_FLAG_NOT_EOP (1 << QE_FLAG_NOT_EOP_SHIFT) /* set for FRAG only */ + +static const uint8_t sw_qe_flag_map[] = { + QE_FLAG_VALID /* NEW Event */, + QE_FLAG_VALID | QE_FLAG_COMPLETE /* FWD Event */, + QE_FLAG_COMPLETE /* RELEASE Event */, + + /* Values which can be used for future support for partial +* events, i.e. where one event comes back to the scheduler +* as multiple which need to be tracked together +*/ + QE_FLAG_VALID | QE_FLAG_COMPLETE | QE_FLAG_NOT_EOP, +}; + + #ifdef RTE_LIBRTE_PMD_EVDEV_SW_DEBUG #define SW_LOG_INFO(fmt, args...) \ RTE_LOG(INFO, PMD, "[%s] %s() line %u: " fmt "\n", \ @@ -201,6 +226,8 @@ struct sw_evdev { /* Contains all ports - load balanced and directed */ struct sw_port ports[SW_PORTS_MAX] __rte_cache_aligned; + rte_atomic32_t inflights __rte_cache_aligned; + /* * max events in this instance. Cached here for performance. * (also available in data->conf.nb_events_limit) @@ -232,4 +259,12 @@ sw_pmd_priv_const(const struct rte_eventdev *eventdev) extern int sched_quanta; +uint16_t sw_event_enqueue(void *port, const struct rte_event *ev); +uint16_t sw_event_enqueue_burst(void *port, const struct rte_event ev[], + uint16_t num); + +uint16_t sw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait); +uint16_t sw_event_dequeue_burst(void *port, struct rte_event *ev, uint16_t num, + uint64_t wait); + #endif /* _SW_EVDEV_H_ */ diff --git a/drivers/event/sw/sw_evdev_worker.c b/drivers/event/sw/sw_evdev_worker.c new file mode 100644 index 000..5c6b3ab --- /dev/null +++ b/drivers/event/sw/sw_evdev_worker.c @@ -0,0 +1,186 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in sourc
[dpdk-dev] [PATCH v2 15/15] app/test: add unit tests for SW eventdev driver
From: Bruce Richardson Since the sw driver is a standalone lookaside device that has no HW requirements, we can provide a set of unit tests that test its functionality across the different queue types and with different input scenarios. This also adds the tests to be automatically run by autotest.py Signed-off-by: Bruce Richardson Signed-off-by: David Hunt Signed-off-by: Harry van Haaren --- app/test/Makefile |5 +- app/test/autotest_data.py | 26 + app/test/test_sw_eventdev.c | 2071 +++ 3 files changed, 2101 insertions(+), 1 deletion(-) create mode 100644 app/test/test_sw_eventdev.c diff --git a/app/test/Makefile b/app/test/Makefile index e28c079..1770c09 100644 --- a/app/test/Makefile +++ b/app/test/Makefile @@ -197,7 +197,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += test_cryptodev_blockcipher.c SRCS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += test_cryptodev_perf.c SRCS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += test_cryptodev.c -SRCS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += test_eventdev.c +ifeq ($(CONFIG_RTE_LIBRTE_EVENTDEV),y) +SRCS-y += test_eventdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += test_sw_eventdev.c +endif SRCS-$(CONFIG_RTE_LIBRTE_KVARGS) += test_kvargs.c diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py index 0cd598b..165ed6c 100644 --- a/app/test/autotest_data.py +++ b/app/test/autotest_data.py @@ -346,6 +346,32 @@ def per_sockets(num): non_parallel_test_group_list = [ { +"Prefix":"eventdev", +"Memory":"512", +"Tests": +[ +{ +"Name":"Eventdev common autotest", +"Command": "eventdev_common_autotest", +"Func":default_autotest, +"Report": None, +}, +] +}, +{ +"Prefix":"eventdev_sw", +"Memory":"512", +"Tests": +[ +{ +"Name":"Eventdev sw autotest", +"Command": "eventdev_sw_autotest", +"Func":default_autotest, +"Report": None, +}, +] +}, +{ "Prefix":"kni", "Memory":"512", "Tests": diff --git a/app/test/test_sw_eventdev.c b/app/test/test_sw_eventdev.c new file mode 100644 index 000..6322f36 --- /dev/null +++ b/app/test/test_sw_eventdev.c @@ -0,0 +1,2071 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "test.h" + +#define MAX_PORTS 16 +#define MAX_QIDS 16 +#define NUM_PACKETS (1<<18) + +static int evdev; + +struct test { + struct rte_mempool *mbuf_pool; + uint8_t port[MAX_PORTS]; + uint8_t qid[MAX_QIDS]; + int nb_qids; +}; + +static struct rte_event release_ev = {.op = RTE_EVENT_OP_RELEASE }; + +static inline struct
[dpdk-dev] [PATCH v2 13/15] event/sw: add dump function for easier debugging
From: Bruce Richardson Signed-off-by: Bruce Richardson Signed-off-by: Harry van Haaren --- drivers/event/sw/sw_evdev.c | 118 1 file changed, 118 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index e352a5c..6096aa4 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -405,6 +405,123 @@ sw_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info) *info = evdev_sw_info; } +static void +sw_dump(struct rte_eventdev *dev, FILE *f) +{ + const struct sw_evdev *sw = sw_pmd_priv(dev); + + static const char * const q_type_strings[] = { + "Ordered", "Atomic", "Parallel", "Directed" + }; + uint32_t i; + fprintf(f, "EventDev %s: ports %d, qids %d\n", "todo-fix-name", + sw->port_count, sw->qid_count); + + fprintf(f, "\trx %"PRIu64"\n\tdrop %"PRIu64"\n\ttx %"PRIu64"\n", + sw->stats.rx_pkts, sw->stats.rx_dropped, sw->stats.tx_pkts); + fprintf(f, "\tsched calls: %"PRIu64"\n", sw->sched_called); + fprintf(f, "\tsched cq/qid call: %"PRIu64"\n", sw->sched_cq_qid_called); + fprintf(f, "\tsched no IQ enq: %"PRIu64"\n", sw->sched_no_iq_enqueues); + fprintf(f, "\tsched no CQ enq: %"PRIu64"\n", sw->sched_no_cq_enqueues); + uint32_t inflights = rte_atomic32_read(&sw->inflights); + uint32_t credits = sw->nb_events_limit - inflights; + fprintf(f, "\tinflight %d, credits: %d\n", inflights, credits); + +#define COL_RED "\x1b[31m" +#define COL_RESET "\x1b[0m" + + for (i = 0; i < sw->port_count; i++) { + int max, j; + const struct sw_port *p = &sw->ports[i]; + fprintf(f, " Port %d %s\n", i, + p->is_directed ? " (SingleCons)" : ""); + fprintf(f, "\trx %"PRIu64"\tdrop %"PRIu64"\ttx %"PRIu64 + "\tinflight %d\n", sw->ports[i].stats.rx_pkts, + sw->ports[i].stats.rx_dropped, + sw->ports[i].stats.tx_pkts, sw->ports[i].inflights); + + fprintf(f, "\tAvg cycles PP: %"PRIu64"\tCredits: %u\n", + sw->ports[i].avg_pkt_ticks, + sw->ports[i].inflight_credits); + fprintf(f, "\tReceive burst distribution:\n"); + float zp_percent = p->zero_polls * 100.0 / p->total_polls; + fprintf(f, zp_percent < 10 ? "\t\t0:%.02f%% " : "\t\t0:%.0f%% ", + zp_percent); + for (max = (int)RTE_DIM(p->poll_buckets); max-- > 0;) + if (p->poll_buckets[max] != 0) + break; + for (j = 0; j <= max; j++) { + if (p->poll_buckets[j] != 0) { + float poll_pc = p->poll_buckets[j] * 100.0 / + p->total_polls; + printf("%u-%u:%.02f%% ", + ((j << SW_DEQ_STAT_BUCKET_SHIFT) + 1), + ((j+1) << SW_DEQ_STAT_BUCKET_SHIFT), + poll_pc); + } + } + printf("\n"); + + uint64_t rx_used = qe_ring_count(p->rx_worker_ring); + uint64_t rx_free = qe_ring_free_count(p->rx_worker_ring); + const char *rxcol = (rx_free == 0) ? COL_RED : COL_RESET; + fprintf(f, "\t%srx ring used: %4"PRIu64"\tfree: %4" + PRIu64 COL_RESET"\n", rxcol, rx_used, rx_free); + + uint64_t tx_used = qe_ring_count(p->cq_worker_ring); + uint64_t tx_free = qe_ring_free_count(p->cq_worker_ring); + const char *txcol = (tx_free == 0) ? COL_RED : COL_RESET; + fprintf(f, "\t%scq ring used: %4"PRIu64"\tfree: %4"PRIu64 + COL_RESET"\n", txcol, tx_used, tx_free); + } + + for (i = 0; i < sw->qid_count; i++) { + const struct sw_qid *qid = &sw->qids[i]; + int affinities_per_port[SW_PORTS_MAX] = {0}; + uint32_t inflights = 0; + + fprintf(f, " Queue %d (%s)\n", i, q_type_strings[qid->type]); + fprintf(f, "\trx %"PRIu64"
[dpdk-dev] [PATCH] doc/contributing: add ack review descriptions
This commit details what is meant by the various email tags that the DPDK community use regularly. The descriptions state what each tag means, drawing from the kernel's understanding[1], and the discussion on the DPDK mailing list[2]. Signed-off-by: Harry van Haaren [1] https://www.kernel.org/doc/html/latest/process/submitting-patches.html#when-to-use-acked-by-and-cc [2] http://dpdk.org/ml/archives/dev/2017-January/thread.html#56300 --- doc/guides/contributing/patches.rst | 69 - 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/doc/guides/contributing/patches.rst b/doc/guides/contributing/patches.rst index a6b2753..2b47ab3 100644 --- a/doc/guides/contributing/patches.rst +++ b/doc/guides/contributing/patches.rst @@ -225,13 +225,9 @@ Here are some guidelines for the body of a commit message: * Use correct capitalization, punctuation and spelling. -In addition to the ``Signed-off-by:`` name the commit messages can also have one or more of the following: - -* ``Reported-by:`` The reporter of the issue. -* ``Tested-by:`` The tester of the change. -* ``Reviewed-by:`` The reviewer of the change. -* ``Suggested-by:`` The person who suggested the change. -* ``Acked-by:`` When a previous version of the patch was acked and the ack is still relevant. +In addition to the ``Signed-off-by:`` name the commit messages can also have +tags for who reported, suggested, tested etc, the patch being posted. Please +refer to section `Tested, Acked and Reviewed by`_. Creating Patches @@ -427,9 +423,64 @@ The options ``--annotate`` and ``confirm = always`` are recommended for checking The Review Process -- -The more work you put into the previous steps the easier it will be to get a patch accepted. +Patches are reviewed by the community, relying on the experience and +collaboration of the community to double-check each others work. +There are a number of ways to say you have double-checked a patch on the +mailing list. + +Tested, Acked and Reviewed by +~ + +To state that you have interacted with a patch on the mailing list, +one replies to the email with a tag. The commonly used tags are: + + * Reviewed-by: + * Acked-by: + * Tested-by: + * Reported-by: + * Suggested-by: + +All of these tags should be used as follows: + +.. code-block:: console + +tag-here: Name Surname + +Each of these tags has a specific meaning. In general, the DPDK community +generally follows the kernel usage of the tags. A short summary of the meanings +of each tag here for reference: + +.. _statement: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#reviewer-s-statement-of-oversight + +``Reviewed-by:`` is a strong statement_ that the patch is an +appropriate modification without any remaining serious technical issues. +Reviewers known to understand the subject area and to perform thorough reviews +will normally increase the likelihood of your patch getting merged. + +``Acked-by:`` is a record that the a person named was not directly involved in +the preparation of a patch but wishes to signify and record their +acceptance and approval of it. + +``Tested-by:`` indicates that the patch has been successfully tested (in some +environment) by the person named. + +``Reported-by:`` gives credit to people who find bugs and report them and it +hopefully inspires them to help us again in the future. Please note that if the +bug was reported in private, then ask for permission first before using the +Reported-by tag. + +``Suggested-by:`` indicates that the patch idea is suggested by the person named +and ensures credit to the person for the idea. Please note that this tag should +not be added without the reporter’s permission, especially if the idea was not +posted in a public forum. + + + +Steps to getting your patch merged +~~ -The general cycle for patch review and acceptance is: +The more work you put into the previous steps the easier it will be to get a +patch accepted. The general cycle for patch review and acceptance is: #. Submit the patch. -- 2.7.4
[dpdk-dev] [PATCH v3 0/7] service cores: cover letter
This patchset introduces service cores to DPDK. A service core is an lcore that performs functions to abstract away details of differences in environment of the application. An example is using the eventdev API, where either a software or hardware PMD performs scheduling. In the case of the software PMD an lcore is required to perform scheduling, which means application logic would have to be aware of the PMD running under the API. To abstract away the differences in HW / SW PMDs, service cores can run the SW PMD service without application logic specifying the exact cores to use. Note that eventdev is only one API that benefits; timers, interrupts handling, statistics and monitoring, and a range of other infrastructure that requires a slice of CPU time may all benefit from service cores. The application is not obliged to manually use the service cores API, however if an application wishes to use the service cores API for fine grained control over how the services are run, this is possible. Deciding between a performance threading-profile and scaled-down profile can be achieved by advanced usage of service cores and setting the lcore mappings. Finally, the last patch introduces how a PMD can register a service to run a function. This is then available (along with any other registered services) to be run by the service cores. Regards, -Harry v3: - Added docs - Added release notes - Updated maintainers file - Compile checks with devtools/test-build.sh - Validated patches apply to latest dpdk/master - Based on discussion, rte_service_iterate() is *not* included, but could be adding at a later date if use-cases require it. - Future work includes enabling the eventdev_pipeline sample app, but there is still some churn there to enable both HW/SW PMDs seamlessly. Once sample app is enabled a service core walk-through with that sample app can be added to the docs, to provide a tutorial on service-core usage. Harry van Haaren (7): service cores: header and implementation service cores: EAL init changes service cores: coremask parsing service cores: add unit tests service cores: enable event/sw with service maintainers: claim service cores doc: add service cores to doc and release notes MAINTAINERS| 6 + doc/api/doxy-api-index.md | 1 + doc/guides/eventdevs/sw.rst| 4 +- doc/guides/prog_guide/index.rst| 1 + doc/guides/prog_guide/service_cores.rst| 81 +++ doc/guides/rel_notes/release_17_08.rst | 8 + drivers/event/sw/sw_evdev.c| 32 + drivers/event/sw/sw_evdev.h| 3 + lib/librte_eal/bsdapp/eal/Makefile | 1 + lib/librte_eal/bsdapp/eal/eal.c| 22 + lib/librte_eal/bsdapp/eal/rte_eal_version.map | 28 + lib/librte_eal/common/Makefile | 1 + lib/librte_eal/common/eal_common_lcore.c | 1 + lib/librte_eal/common/eal_common_options.c | 77 +++ lib/librte_eal/common/include/rte_eal.h| 4 + lib/librte_eal/common/include/rte_lcore.h | 3 +- lib/librte_eal/common/include/rte_service.h| 298 + .../common/include/rte_service_private.h | 118 lib/librte_eal/common/rte_service.c| 671 + lib/librte_eal/linuxapp/eal/Makefile | 1 + lib/librte_eal/linuxapp/eal/eal.c | 23 + lib/librte_eal/linuxapp/eal/eal_thread.c | 9 +- lib/librte_eal/linuxapp/eal/rte_eal_version.map| 29 + test/test/Makefile | 2 + test/test/test_service_cores.c | 496 +++ 25 files changed, 1917 insertions(+), 3 deletions(-) create mode 100644 doc/guides/prog_guide/service_cores.rst create mode 100644 lib/librte_eal/common/include/rte_service.h create mode 100644 lib/librte_eal/common/include/rte_service_private.h create mode 100644 lib/librte_eal/common/rte_service.c create mode 100644 test/test/test_service_cores.c -- 2.7.4
[dpdk-dev] [PATCH v3 2/7] service cores: EAL init changes
This commit shows the changes required in rte_eal_init() to transparently launch the service threads. The threads are launched into the service worker functions here because after rte_eal_init() the application is not gauranteed to call any other DPDK API. As the registration of services happens at initialization time, the services that require CPU time are already available when we reach the end of rte_eal_init(). Signed-off-by: Harry van Haaren --- v2 comments: - Include BSD implementation (Jerin) - Move details of core-tracking into rte_service_lcore_add(Jerin) - Given there are changes other to suggested, not using Ack --- lib/librte_eal/bsdapp/eal/eal.c | 22 ++ lib/librte_eal/linuxapp/eal/eal.c | 23 +++ 2 files changed, 45 insertions(+) diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 05f0c1f..4f7dcb3 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -653,6 +653,17 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* initialize services first so vdevs can register during bus_probe. +* Ignore return value of already initialized, this means EAL parameter +* -s was used to set a service-core mask. +*/ + ret = rte_service_init(); + if (ret) { + rte_eal_init_alert("rte_service_init() failed\n"); + rte_errno = ENOEXEC; + return -1; + } + /* Probe all the buses and devices/drivers on them */ if (rte_bus_probe()) { rte_eal_init_alert("Cannot probe devices\n"); @@ -660,6 +671,17 @@ rte_eal_init(int argc, char **argv) return -1; } + /* initialize default services configuration */ + uint32_t service_cores[RTE_MAX_LCORE]; + int count = rte_service_lcore_list(service_cores, RTE_MAX_LCORE); + for (i = 0; i < count; i++) + rte_service_lcore_start(service_cores[i]); + ret = rte_service_set_default_mapping(); + if (ret) { + rte_errno = ENOEXEC; + return -1; + } + rte_eal_mcfg_complete(); return fctret; diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 7c78f2d..d63dd87 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -78,6 +78,7 @@ #include #include #include +#include #include "eal_private.h" #include "eal_thread.h" @@ -932,6 +933,17 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* initialize services first so vdevs can register during bus_probe. +* Ignore return value of already initialized, this means EAL parameter +* -s was used to set a service-core mask. +*/ + ret = rte_service_init(); + if (ret) { + rte_eal_init_alert("rte_service_init() failed\n"); + rte_errno = ENOEXEC; + return -1; + } + /* Probe all the buses and devices/drivers on them */ if (rte_bus_probe()) { rte_eal_init_alert("Cannot probe devices\n"); @@ -939,6 +951,17 @@ rte_eal_init(int argc, char **argv) return -1; } + /* initialize default services configuration */ + uint32_t service_cores[RTE_MAX_LCORE]; + int count = rte_service_lcore_list(service_cores, RTE_MAX_LCORE); + for (i = 0; i < count; i++) + rte_service_lcore_start(service_cores[i]); + ret = rte_service_set_default_mapping(); + if (ret) { + rte_errno = ENOEXEC; + return -1; + } + rte_eal_mcfg_complete(); return fctret; -- 2.7.4
[dpdk-dev] [PATCH v3 1/7] service cores: header and implementation
Add header files, update .map files with new service functions, and add the service header to the doxygen for building. This service header API allows DPDK to use services as a concept of something that requires CPU cycles. An example is a PMD that runs in software to schedule events, where a hardware version exists that does not require a CPU. The code presented here is based on an initial RFC: http://dpdk.org/ml/archives/dev/2017-May/065207.html This was then reworked, and RFC v2 with the changes posted: http://dpdk.org/ml/archives/dev/2017-June/067194.html This is the fourth iteration of the service core concept, with 2 RFCs and this being v2 of the implementation. Signed-off-by: Harry van Haaren --- v2: Thanks Jerin for review - below a list your suggested changes; - Doxygen rename to "service cores" for consistency - use lcore instead of core for function names - Fix about 10 typos / seplling msitakse ;) - Dix doxygen /** comments for functions - Doxygen @param[out] improvements - int8_t for socket_id to ordinary int - Rename MACROS for readability - Align structs to cache lines - Allocate fastpath-used data from hugepages - Added/fixed memory barriers for multi-core scheduling - Add const to variables, and hoist above loop - Optimize cmpset atomic if MT_SAFE or only one core mapped - Statistics collection only when requested - Add error check for array pointer - Remove panic() calls from library - Fix TODO notes from previous patchset There are also some other changes; - Checkpatch issues fixed - .map file updates - Add rte_service_get_by_name() function --- doc/api/doxy-api-index.md | 1 + lib/librte_eal/bsdapp/eal/Makefile | 1 + lib/librte_eal/bsdapp/eal/rte_eal_version.map | 28 + lib/librte_eal/common/Makefile | 1 + lib/librte_eal/common/eal_common_lcore.c | 1 + lib/librte_eal/common/include/rte_eal.h| 4 + lib/librte_eal/common/include/rte_lcore.h | 3 +- lib/librte_eal/common/include/rte_service.h| 298 + .../common/include/rte_service_private.h | 118 lib/librte_eal/common/rte_service.c| 671 + lib/librte_eal/linuxapp/eal/Makefile | 1 + lib/librte_eal/linuxapp/eal/eal_thread.c | 9 +- lib/librte_eal/linuxapp/eal/rte_eal_version.map| 29 + 13 files changed, 1163 insertions(+), 2 deletions(-) create mode 100644 lib/librte_eal/common/include/rte_service.h create mode 100644 lib/librte_eal/common/include/rte_service_private.h create mode 100644 lib/librte_eal/common/rte_service.c diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md index f5f1f19..1284402 100644 --- a/doc/api/doxy-api-index.md +++ b/doc/api/doxy-api-index.md @@ -158,6 +158,7 @@ There are many libraries, so their headers may be grouped by topics: [common] (@ref rte_common.h), [ABI compat] (@ref rte_compat.h), [keepalive] (@ref rte_keepalive.h), + [service cores] (@ref rte_service.h), [device metrics] (@ref rte_metrics.h), [bitrate statistics] (@ref rte_bitrate.h), [latency statistics] (@ref rte_latencystats.h), diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index a0f9950..05517a2 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -87,6 +87,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map index 2e48a73..5493a13 100644 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -193,3 +193,31 @@ DPDK_17.05 { vfio_get_group_no; } DPDK_17.02; + +DPDK_17.08 { + global: + + rte_service_disable_on_lcore; + rte_service_dump; + rte_service_enable_on_lcore; + rte_service_get_by_id; + rte_service_get_by_name; + rte_service_get_count; + rte_service_get_enabled_on_lcore; + rte_service_is_running; + rte_service_lcore_add; + rte_service_lcore_count; + rte_service_lcore_del; + rte_service_lcore_list; + rte_service_lcore_reset_all; + rte_service_lcore_start; + rte_service_lcore_stop; + rte_service_probe_capability; + rte_service_register; + rte_service_reset; + rte_service_set_stats_enable; + rte_service_start; + rte_service_stop; + rte_service_unregister; + +} DPDK_17.05; diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index a5bd108..2a93397 100644
[dpdk-dev] [PATCH v3 3/7] service cores: coremask parsing
Add logic for parsing a coremask from EAL, which allows the application to be unaware of the cores being taken from its coremask. Signed-off-by: Harry van Haaren Acked-by: Jerin Jacob --- v2: - Remove printf() (Jerin) - Remove commented code (Jerin) - simplified core tracking, no requirement on #include rte_service in EAL parsing anymore. --- lib/librte_eal/common/eal_common_options.c | 77 ++ 1 file changed, 77 insertions(+) diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index f470195..cee200c 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -61,6 +61,7 @@ const char eal_short_options[] = "b:" /* pci-blacklist */ "c:" /* coremask */ + "s:" /* service coremask */ "d:" /* driver */ "h" /* help */ "l:" /* corelist */ @@ -267,6 +268,73 @@ static int xdigit2val(unsigned char c) } static int +eal_parse_service_coremask(const char *coremask) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + int i, j, idx = 0; + unsigned int count = 0; + char c; + int val; + + if (coremask == NULL) + return -1; + /* Remove all blank characters ahead and after . +* Remove 0x/0X if exists. +*/ + while (isblank(*coremask)) + coremask++; + if (coremask[0] == '0' && ((coremask[1] == 'x') + || (coremask[1] == 'X'))) + coremask += 2; + i = strlen(coremask); + while ((i > 0) && isblank(coremask[i - 1])) + i--; + + if (i == 0) + return -1; + + for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) { + c = coremask[i]; + if (isxdigit(c) == 0) { + /* invalid characters */ + return -1; + } + val = xdigit2val(c); + for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; + j++, idx++) { + if ((1 << j) & val) { + /* handle master lcore already parsed */ + uint32_t lcore = idx; + if (master_lcore_parsed && + cfg->master_lcore == lcore) + continue; + + if (!lcore_config[idx].detected) { + RTE_LOG(ERR, EAL, + "lcore %u unavailable\n", idx); + return -1; + } + lcore_config[idx].core_role = ROLE_SERVICE; + count++; + } + } + } + + for (; i >= 0; i--) + if (coremask[i] != '0') + return -1; + + for (; idx < RTE_MAX_LCORE; idx++) + lcore_config[idx].core_index = -1; + + if (count == 0) + return -1; + + cfg->service_lcore_count = count; + return 0; +} + +static int eal_parse_coremask(const char *coremask) { struct rte_config *cfg = rte_eal_get_configuration(); @@ -409,6 +477,8 @@ eal_parse_master_lcore(const char *arg) if (cfg->master_lcore >= RTE_MAX_LCORE) return -1; master_lcore_parsed = 1; + /* ensure master core is not used as service core */ + lcore_config[cfg->master_lcore].core_role = ROLE_RTE; return 0; } @@ -826,6 +896,13 @@ eal_parse_common_option(int opt, const char *optarg, } core_parsed = 1; break; + /* service coremask */ + case 's': + if (eal_parse_service_coremask(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid service coremask\n"); + return -1; + } + break; /* size of memory */ case 'm': conf->memory = atoi(optarg); -- 2.7.4
[dpdk-dev] [PATCH v3 5/7] service cores: enable event/sw with service
This commit shows how easy it is to enable a specific DPDK component with a service callback, in order to get CPU cycles for it. The beauty of this method is that the service is unaware of how much CPU time it is getting - the application can decide how to split and slice cores and map them to the registered services. Signed-off-by: Harry van Haaren --- v2: - Remove #include (Jerin) - Remove development prints (Jerin) - Track service name in PMD - Print warning if service does not have an lcore mapped (Jerin) --- drivers/event/sw/sw_evdev.c | 32 drivers/event/sw/sw_evdev.h | 3 +++ 2 files changed, 35 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index fe2a61e..baab376 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "sw_evdev.h" #include "iq_ring.h" @@ -597,6 +598,13 @@ sw_start(struct rte_eventdev *dev) { unsigned int i, j; struct sw_evdev *sw = sw_pmd_priv(dev); + + /* check a service core is mapped to this service */ + struct rte_service_spec *s = rte_service_get_by_name(sw->service_name); + if (!rte_service_is_running(s)) + SW_LOG_ERR("Warning: No Service core enabled on service %s\n", + s->name); + /* check all ports are set up */ for (i = 0; i < sw->port_count; i++) if (sw->ports[i].rx_worker_ring == NULL) { @@ -699,6 +707,14 @@ set_credit_quanta(const char *key __rte_unused, const char *value, void *opaque) return 0; } + +static int32_t sw_sched_service_func(void *args) +{ + struct rte_eventdev *dev = args; + sw_event_schedule(dev); + return 0; +} + static int sw_probe(struct rte_vdev_device *vdev) { @@ -810,6 +826,22 @@ sw_probe(struct rte_vdev_device *vdev) sw->credit_update_quanta = credit_quanta; sw->sched_quanta = sched_quanta; + /* register service with EAL */ + struct rte_service_spec service; + memset(&service, 0, sizeof(struct rte_service_spec)); + snprintf(service.name, sizeof(service.name), "%s_service", name); + snprintf(sw->service_name, sizeof(sw->service_name), "%s_service", + name); + service.socket_id = socket_id; + service.callback = sw_sched_service_func; + service.callback_userdata = (void *)dev; + + int32_t ret = rte_service_register(&service); + if (ret) { + SW_LOG_ERR("service register() failed"); + return -ENOEXEC; + } + return 0; } diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 0d7f94f..3e83823 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -59,6 +59,7 @@ #define EVENTDEV_NAME_SW_PMD event_sw #define SW_PMD_NAME RTE_STR(event_sw) +#define SW_PMD_NAME_MAX 64 #define SW_SCHED_TYPE_DIRECT (RTE_SCHED_TYPE_PARALLEL + 1) @@ -276,6 +277,8 @@ struct sw_evdev { /* store num stats and offset of the stats for each queue */ uint16_t xstats_count_per_qid[RTE_EVENT_MAX_QUEUES_PER_DEV]; uint16_t xstats_offset_for_qid[RTE_EVENT_MAX_QUEUES_PER_DEV]; + + char service_name[SW_PMD_NAME_MAX]; }; static inline struct sw_evdev * -- 2.7.4
[dpdk-dev] [PATCH v3 4/7] service cores: add unit tests
Add a bunch of unit tests, to ensure that the service core functions are operating as expected. As part of these tests a dummy service is registered which allows identifying if a service callback has been invoked by using the CPU tick counter. This allows identifying if functions to start and stop service lcores are actually having effect. Signed-off-by: Harry van Haaren --- v2 changes; - Rename variable to slcore_id (Jerin) - Rename function to unregister_all() (Jerin) - Fix typos (Jerin) - Add unit test for get_by_name() - Add unit tests (all suggestions by Jerin) -- get_name() -- Verify probe_capability API -- Verify MT_SAFE capability (see code for details) -- Verify rte_service_dump() API --- test/test/Makefile | 2 + test/test/test_service_cores.c | 496 + 2 files changed, 498 insertions(+) create mode 100644 test/test/test_service_cores.c diff --git a/test/test/Makefile b/test/test/Makefile index ee240be..61e296b 100644 --- a/test/test/Makefile +++ b/test/test/Makefile @@ -151,6 +151,8 @@ SRCS-y += test_interrupts.c SRCS-y += test_version.c SRCS-y += test_func_reentrancy.c +SRCS-y += test_service_cores.c + SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline.c SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline_num.c SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline_etheraddr.c diff --git a/test/test/test_service_cores.c b/test/test/test_service_cores.c new file mode 100644 index 000..f2f3a93 --- /dev/null +++ b/test/test/test_service_cores.c @@ -0,0 +1,496 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "test.h" + +/* used as the service core ID */ +static uint32_t slcore_id; +/* used as timestamp to detect if a service core is running */ +static uint64_t service_tick; + +#define SERVICE_DELAY 1 + +#define DUMMY_SERVICE_NAME "dummy_service" +#define MT_SAFE_SERVICE_NAME "mt_safe_service" + +static int +testsuite_setup(void) +{ + /* assuming lcore 1 is available for service-core testing */ + slcore_id = 1; + return TEST_SUCCESS; +} + +static void +testsuite_teardown(void) +{ + /* release service cores? */ +} + +static int32_t dummy_cb(void *args) +{ + RTE_SET_USED(args); + service_tick++; + rte_delay_ms(SERVICE_DELAY); + return 0; +} + + +static int32_t dummy_mt_safe_cb(void *args) +{ + /* Atomic checks to ensure MT safe services allow > 1 thread to +* concurrently run the callback. The concept is as follows; +* 1) if lock is available, take the lock then delay +* 2) if first lock is taken, and a thread arrives in the CB, we know +*that 2 threads are running the callback at the same time: MT safe +*/ + uint32_t *test_params = args; + uint32_t *atomic_lock = &test_params[0]; + uint32_t *pass_test = &test_params[1]; + int lock_taken = rte_atomic32_cmpset(atomic_lock, 0, 1); + if (lock_taken) { + /* delay with the lock held */ + rte_delay_ms(250); + rte_atomic32_clear((rte_atomic32_t *)atomic_lock); + } else { + /* 2nd thread will fail to take lock, so set pass flag */ +
[dpdk-dev] [PATCH v3 7/7] doc: add service cores to doc and release notes
Add a section describing the fundamental concepts behind service cores. Where service cores originate from, and how to enable services. The release notes for 17.08 are updated, with an introductory paragraph on the service cores concept. Finally the Eventdev SW PMD documentation is amended to reflect that it can be run as a service. Signed-off-by: Harry van Haaren --- I would like to enable the service-cores in the eventdev_pipeline sample app, to showcase the power of the service-core abstraction. There is some remaining work TODO, in order to genericise the sample app for both HW and SW PMDs, and during that rework the service-cores can be added too. The sample app will make a good showcase for docs, and make it much easier to understand. --- doc/guides/eventdevs/sw.rst | 4 +- doc/guides/prog_guide/index.rst | 1 + doc/guides/prog_guide/service_cores.rst | 81 + doc/guides/rel_notes/release_17_08.rst | 8 4 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 doc/guides/prog_guide/service_cores.rst diff --git a/doc/guides/eventdevs/sw.rst b/doc/guides/eventdevs/sw.rst index fb63c84..a3e6624 100644 --- a/doc/guides/eventdevs/sw.rst +++ b/doc/guides/eventdevs/sw.rst @@ -32,7 +32,9 @@ Software Eventdev Poll Mode Driver The software eventdev is an implementation of the eventdev API, that provides a wide range of the eventdev features. The eventdev relies on a CPU core to -perform event scheduling. +perform event scheduling. This PMD can use the service core library to run the +scheduling function, allowing an application to utilize the power of service +cores to multiplex other work on the same core if required. Features diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst index ef5a02a..231622a 100644 --- a/doc/guides/prog_guide/index.rst +++ b/doc/guides/prog_guide/index.rst @@ -38,6 +38,7 @@ Programmer's Guide intro overview env_abstraction_layer +service_cores ring_lib mempool_lib mbuf_lib diff --git a/doc/guides/prog_guide/service_cores.rst b/doc/guides/prog_guide/service_cores.rst new file mode 100644 index 000..3a029ba --- /dev/null +++ b/doc/guides/prog_guide/service_cores.rst @@ -0,0 +1,81 @@ +.. BSD LICENSE +Copyright(c) 2017 Intel Corporation. All rights reserved. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +* Neither the name of Intel Corporation nor the names of its +contributors may be used to endorse or promote products derived +from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Service Cores += + +DPDK has a concept known as service cores, which enables a dynamic way of +performing work on DPDK lcores. Service core support is built into the EAL, and +an API is provided to optionally allow applications to control how the service +cores are used at runtime. + +The service cores concept is built up out of services (components of DPDK that +require CPU cycles to operate) and service cores (DPDK lcores, tasked with +running services). The power of the service core concept is that the mapping +between service cores and services can be configured to abstract away the +difference between platforms and environments. + +For example, the Eventdev has hardware and software PMDs. Of these the software +PMD requires an lcore to perform the scheduling operations, while the hardware +PMD does not. With service cores, the application would not directly notice +that the scheduling is done in software. + +For detailed information about the service core API, please refer to the docs. + +Service Core
[dpdk-dev] [PATCH v3 6/7] maintainers: claim service cores
Sign-up to be the maintainer of public header files and implementation of the service-cores infrastructure. Signed-off-by: Harry van Haaren --- MAINTAINERS | 6 ++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 00351ff..2e5081c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -134,6 +134,12 @@ F: test/test/test_mp_secondary.c F: examples/multi_process/ F: doc/guides/sample_app_ug/multi_process.rst +Service Cores +M: Harry van Haaren +F: lib/librte_eal/common/include/rte_service.h +F: lib/librte_eal/common/include/rte_service_private.h +F: lib/librte_eal/common/rte_keepalive.c + ARM v7 M: Jan Viktorin M: Jianbo Liu -- 2.7.4
[dpdk-dev] [PATCH] event/sw: allow multiple calls to port setup
This commit allows port_setup() to be called multiple times, as is required by the API to re-configure an already initialized port. Signed-off-by: Harry van Haaren --- This patch can be squashed with 5/5 of Bruce's event rings patchset: http://dpdk.org/dev/patchwork/patch/26110/ drivers/event/sw/sw_evdev.c | 13 + 1 file changed, 13 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 8588003..5b33bfd 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -161,8 +161,16 @@ sw_port_setup(struct rte_eventdev *dev, uint8_t port_id, p->id = port_id; p->sw = sw; + /* check to see if rings exists - port_setup() can be called multiple +* times legally (assuming device is stopped). If ring exists, free it +* to so it gets re-created with the correct size +*/ snprintf(buf, sizeof(buf), "sw%d_p%u_%s", dev->data->dev_id, port_id, "rx_worker_ring"); + struct rte_event_ring *existing_ring = rte_event_ring_lookup(buf); + if (existing_ring) + rte_event_ring_free(existing_ring); + p->rx_worker_ring = rte_event_ring_create(buf, MAX_SW_PROD_Q_DEPTH, dev->data->socket_id, RING_F_SP_ENQ | RING_F_SC_DEQ | RING_F_EXACT_SZ); @@ -174,8 +182,13 @@ sw_port_setup(struct rte_eventdev *dev, uint8_t port_id, p->inflight_max = conf->new_event_threshold; + /* check if ring exists, same as rx_worker above */ snprintf(buf, sizeof(buf), "sw%d_p%u, %s", dev->data->dev_id, port_id, "cq_worker_ring"); + existing_ring = rte_event_ring_lookup(buf); + if (existing_ring) + rte_event_ring_free(existing_ring); + p->cq_worker_ring = rte_event_ring_create(buf, conf->dequeue_depth, dev->data->socket_id, RING_F_SP_ENQ | RING_F_SC_DEQ | RING_F_EXACT_SZ); -- 2.7.4
[dpdk-dev] [PATCH v4 0/7] service cores: cover letter
This patchset introduces service cores to DPDK. A service core is an lcore that performs functions to abstract away details of differences in environment of the application. An example is using the eventdev API, where either a software or hardware PMD performs scheduling. In the case of the software PMD an lcore is required to perform scheduling, which means application logic would have to be aware of the PMD running under the API. To abstract away the differences in HW / SW PMDs, service cores can run the SW PMD service without application logic specifying the exact cores to use. Note that eventdev is only one API that benefits; timers, interrupts handling, statistics and monitoring, and a range of other infrastructure that requires a slice of CPU time may all benefit from service cores. The application is not obliged to manually use the service cores API, however if an application wishes to use the service cores API for fine grained control over how the services are run, this is possible. Deciding between a performance threading-profile and scaled-down profile can be achieved by advanced usage of service cores and setting the lcore mappings. Patch 5/7 shows how a PMD can register a service to run a function. This is then available (along with any other registered services) to be run by the service cores. Patches 6/7 and 7/7 add documentation, and claim maintainership. Regards, -Harry v4: - Range of fixes as suggested by Jerin - Improved unit tests, ensuring ex-service cores become available to app - Added functions to EXPERIMENTAL tag in .map files (Thomas) - Added @warning experimental notes to Doxygen API documentation (Thomas) - Various smaller fixes / cleanups - See commit notes for details v3: - Added docs - Added release notes - Updated maintainers file - Compile checks with devtools/test-build.sh - Validated patches apply to latest dpdk/master - Based on discussion, rte_service_iterate() is *not* included, but could be adding at a later date if use-cases require it. - Future work includes enabling the eventdev_pipeline sample app, but there is still some churn there to enable both HW/SW PMDs seamlessly. Once sample app is enabled a service core walk-through with that sample app can be added to the docs, to provide a tutorial on service-core usage. Harry van Haaren (7): service cores: header and implementation service cores: EAL init changes service cores: coremask parsing service cores: add unit tests event/sw: enable SW PMD with service capability doc: add service cores to doc and release notes maintainers: claim service cores MAINTAINERS| 7 + doc/api/doxy-api-index.md | 1 + doc/guides/eventdevs/sw.rst| 4 +- doc/guides/prog_guide/index.rst| 1 + doc/guides/prog_guide/service_cores.rst| 81 +++ doc/guides/rel_notes/release_17_08.rst | 8 + drivers/event/sw/sw_evdev.c| 32 + drivers/event/sw/sw_evdev.h| 3 + lib/librte_eal/bsdapp/eal/Makefile | 1 + lib/librte_eal/bsdapp/eal/eal.c| 23 + lib/librte_eal/bsdapp/eal/rte_eal_version.map | 22 + lib/librte_eal/common/Makefile | 1 + lib/librte_eal/common/eal_common_lcore.c | 1 + lib/librte_eal/common/eal_common_options.c | 90 ++- lib/librte_eal/common/include/rte_eal.h| 4 + lib/librte_eal/common/include/rte_lcore.h | 3 +- lib/librte_eal/common/include/rte_service.h| 383 .../common/include/rte_service_private.h | 140 + lib/librte_eal/common/rte_service.c| 687 + lib/librte_eal/linuxapp/eal/Makefile | 1 + lib/librte_eal/linuxapp/eal/eal.c | 23 + lib/librte_eal/linuxapp/eal/eal_thread.c | 9 +- lib/librte_eal/linuxapp/eal/rte_eal_version.map| 22 + test/test/Makefile | 2 + test/test/test_service_cores.c | 538 25 files changed, 2083 insertions(+), 4 deletions(-) create mode 100644 doc/guides/prog_guide/service_cores.rst create mode 100644 lib/librte_eal/common/include/rte_service.h create mode 100644 lib/librte_eal/common/include/rte_service_private.h create mode 100644 lib/librte_eal/common/rte_service.c create mode 100644 test/test/test_service_cores.c -- 2.7.4
[dpdk-dev] [PATCH v4 1/7] service cores: header and implementation
Add header files, update .map files with new service functions, and add the service header to the doxygen for building. This service header API allows DPDK to use services as a concept of something that requires CPU cycles. An example is a PMD that runs in software to schedule events, where a hardware version exists that does not require a CPU. The code presented here is based on an initial RFC: http://dpdk.org/ml/archives/dev/2017-May/065207.html This was then reworked, and RFC v2 with the changes posted: http://dpdk.org/ml/archives/dev/2017-June/067194.html This is the fourth iteration of the service core concept, with 2 RFCs and this being v2 of the implementation. Signed-off-by: Harry van Haaren --- v4: - Fixed (unsigned) checkpatch error - Fixed misleading-indentation/if { } brackets (checkpatch/Jerin) - Fixed set function argument to be "enable" instead of "enabled" (Jerin) - Improve doxygen comment for size of array in rte_service_core_list (Jerin) - Fixed typos (Jerin) - Optimized atomic clear after running service (Jerin) - Added smp_rmb() at end of loop to re-load runstate / mapping (Jerin) - Fix issue with lcore role not being adhered to (Jerin) - Add experimental warnings for all service core functions (Thomas) - Moved service core functions into EXPERIMENTAL section of .map (Thomas) - Improve documentation of rte_service_lcore_reset_all() (Harry) v3: - None. v2: Thanks Jerin for review - below a list your suggested changes; - Doxygen rename to "service cores" for consistency - use lcore instead of core for function names - Fix about 10 typos / seplling msitakse ;) - Dix doxygen /** comments for functions - Doxygen @param[out] improvements - int8_t for socket_id to ordinary int - Rename MACROS for readability - Align structs to cache lines - Allocate fastpath-used data from hugepages - Added/fixed memory barriers for multi-core scheduling - Add const to variables, and hoist above loop - Optimize cmpset atomic if MT_SAFE or only one core mapped - Statistics collection only when requested - Add error check for array pointer - Remove panic() calls from library - Fix TODO notes from previous patchset There are also some other changes; - Checkpatch issues fixed - .map file updates - Add rte_service_get_by_name() function --- doc/api/doxy-api-index.md | 1 + lib/librte_eal/bsdapp/eal/Makefile | 1 + lib/librte_eal/bsdapp/eal/rte_eal_version.map | 22 + lib/librte_eal/common/Makefile | 1 + lib/librte_eal/common/eal_common_lcore.c | 1 + lib/librte_eal/common/include/rte_eal.h| 4 + lib/librte_eal/common/include/rte_lcore.h | 3 +- lib/librte_eal/common/include/rte_service.h| 383 .../common/include/rte_service_private.h | 140 + lib/librte_eal/common/rte_service.c| 687 + lib/librte_eal/linuxapp/eal/Makefile | 1 + lib/librte_eal/linuxapp/eal/eal_thread.c | 9 +- lib/librte_eal/linuxapp/eal/rte_eal_version.map| 22 + 13 files changed, 1273 insertions(+), 2 deletions(-) create mode 100644 lib/librte_eal/common/include/rte_service.h create mode 100644 lib/librte_eal/common/include/rte_service_private.h create mode 100644 lib/librte_eal/common/rte_service.c diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md index 3b83288..e2abdf4 100644 --- a/doc/api/doxy-api-index.md +++ b/doc/api/doxy-api-index.md @@ -159,6 +159,7 @@ There are many libraries, so their headers may be grouped by topics: [common] (@ref rte_common.h), [ABI compat] (@ref rte_compat.h), [keepalive] (@ref rte_keepalive.h), + [service cores] (@ref rte_service.h), [device metrics] (@ref rte_metrics.h), [bitrate statistics] (@ref rte_bitrate.h), [latency statistics] (@ref rte_latencystats.h), diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index a0f9950..05517a2 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -87,6 +87,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map index 0295ea9..130b2c0 100644 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -208,5 +208,27 @@ EXPERIMENTAL { rte_eal_hotplug_add; rte_eal_hotplug_remove; + rte_service_disable_on_lcore; + rte_service_dump; + rte_service_enable_on_lcore; + rte_service_get_by_id; +
[dpdk-dev] [PATCH v4 2/7] service cores: EAL init changes
This commit shows the changes required in rte_eal_init() to transparently launch the service threads. The threads are launched into the service worker functions here because after rte_eal_init() the application is not gauranteed to call any other DPDK API. As the registration of services happens at initialization time, the services that require CPU time are already available when we reach the end of rte_eal_init(). Signed-off-by: Harry van Haaren --- v4: - Added #include for service cores in BSD eal.c v2 comments: - Include BSD implementation (Jerin) - Move details of core-tracking into rte_service_lcore_add(Jerin) - Given there are changes other to suggested, not using Ack --- lib/librte_eal/bsdapp/eal/eal.c | 23 +++ lib/librte_eal/linuxapp/eal/eal.c | 23 +++ 2 files changed, 46 insertions(+) diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 05f0c1f..09e3301 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include "eal_private.h" @@ -653,6 +654,17 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* initialize services first so vdevs can register during bus_probe. +* Ignore return value of already initialized, this means EAL parameter +* -s was used to set a service-core mask. +*/ + ret = rte_service_init(); + if (ret) { + rte_eal_init_alert("rte_service_init() failed\n"); + rte_errno = ENOEXEC; + return -1; + } + /* Probe all the buses and devices/drivers on them */ if (rte_bus_probe()) { rte_eal_init_alert("Cannot probe devices\n"); @@ -660,6 +672,17 @@ rte_eal_init(int argc, char **argv) return -1; } + /* initialize default services configuration */ + uint32_t service_cores[RTE_MAX_LCORE]; + int count = rte_service_lcore_list(service_cores, RTE_MAX_LCORE); + for (i = 0; i < count; i++) + rte_service_lcore_start(service_cores[i]); + ret = rte_service_set_default_mapping(); + if (ret) { + rte_errno = ENOEXEC; + return -1; + } + rte_eal_mcfg_complete(); return fctret; diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 7c78f2d..d63dd87 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -78,6 +78,7 @@ #include #include #include +#include #include "eal_private.h" #include "eal_thread.h" @@ -932,6 +933,17 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* initialize services first so vdevs can register during bus_probe. +* Ignore return value of already initialized, this means EAL parameter +* -s was used to set a service-core mask. +*/ + ret = rte_service_init(); + if (ret) { + rte_eal_init_alert("rte_service_init() failed\n"); + rte_errno = ENOEXEC; + return -1; + } + /* Probe all the buses and devices/drivers on them */ if (rte_bus_probe()) { rte_eal_init_alert("Cannot probe devices\n"); @@ -939,6 +951,17 @@ rte_eal_init(int argc, char **argv) return -1; } + /* initialize default services configuration */ + uint32_t service_cores[RTE_MAX_LCORE]; + int count = rte_service_lcore_list(service_cores, RTE_MAX_LCORE); + for (i = 0; i < count; i++) + rte_service_lcore_start(service_cores[i]); + ret = rte_service_set_default_mapping(); + if (ret) { + rte_errno = ENOEXEC; + return -1; + } + rte_eal_mcfg_complete(); return fctret; -- 2.7.4
[dpdk-dev] [PATCH v4 3/7] service cores: coremask parsing
Add logic for parsing a coremask from EAL, which allows the application to be unaware of the cores being taken from its coremask. Signed-off-by: Harry van Haaren Acked-by: Jerin Jacob --- v4: - Add --help print output (Jerin) - Fixed coremask parsing to ensure master core is ROLE_RTE (Jerin) - Improve coremask parsing error handling, master lcore and service coremask overlap is failed, informing user that the core cannot be used for the purpose A as it is already in use for B (A or B being service/master) v2: - Remove printf() (Jerin) - Remove commented code (Jerin) - simplified core tracking, no requirement on #include rte_service in EAL parsing anymore. wip: fix master lcore handling in EAL --- lib/librte_eal/common/eal_common_options.c | 90 +- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index f470195..2881884 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -61,6 +61,7 @@ const char eal_short_options[] = "b:" /* pci-blacklist */ "c:" /* coremask */ + "s:" /* service coremask */ "d:" /* driver */ "h" /* help */ "l:" /* corelist */ @@ -267,6 +268,76 @@ static int xdigit2val(unsigned char c) } static int +eal_parse_service_coremask(const char *coremask) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + int i, j, idx = 0; + unsigned int count = 0; + char c; + int val; + + if (coremask == NULL) + return -1; + /* Remove all blank characters ahead and after . +* Remove 0x/0X if exists. +*/ + while (isblank(*coremask)) + coremask++; + if (coremask[0] == '0' && ((coremask[1] == 'x') + || (coremask[1] == 'X'))) + coremask += 2; + i = strlen(coremask); + while ((i > 0) && isblank(coremask[i - 1])) + i--; + + if (i == 0) + return -1; + + for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) { + c = coremask[i]; + if (isxdigit(c) == 0) { + /* invalid characters */ + return -1; + } + val = xdigit2val(c); + for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; + j++, idx++) { + if ((1 << j) & val) { + /* handle master lcore already parsed */ + uint32_t lcore = idx; + if (master_lcore_parsed && + cfg->master_lcore == lcore) { + RTE_LOG(ERR, EAL, + "Error: lcore %u is master lcore, cannot use as service core\n", idx); + return -1; + } + + if (!lcore_config[idx].detected) { + RTE_LOG(ERR, EAL, + "lcore %u unavailable\n", idx); + return -1; + } + lcore_config[idx].core_role = ROLE_SERVICE; + count++; + } + } + } + + for (; i >= 0; i--) + if (coremask[i] != '0') + return -1; + + for (; idx < RTE_MAX_LCORE; idx++) + lcore_config[idx].core_index = -1; + + if (count == 0) + return -1; + + cfg->service_lcore_count = count; + return 0; +} + +static int eal_parse_coremask(const char *coremask) { struct rte_config *cfg = rte_eal_get_configuration(); @@ -409,6 +480,13 @@ eal_parse_master_lcore(const char *arg) if (cfg->master_lcore >= RTE_MAX_LCORE) return -1; master_lcore_parsed = 1; + + /* ensure master core is not used as service core */ + if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) { + RTE_LOG(ERR, EAL, "Error: Master lcore is used as a service core.\n"); + return -1; + } + return 0; } @@ -826,6 +904,13 @@ eal_parse_common_option(int opt, const char *optarg, } core_parsed = 1; break; + /* service coremask */ + case 's': + if (eal_parse_service_coremask(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid service coremask\n"); +
[dpdk-dev] [PATCH v4 5/7] event/sw: enable SW PMD with service capability
This commit shows how easy it is to enable a specific DPDK component with a service callback, in order to get CPU cycles for it. The beauty of this method is that the service is unaware of how much CPU time it is getting - the application can decide how to split and slice cores and map them to the registered services. Signed-off-by: Harry van Haaren Acked-by: Jerin Jacob --- v4: - Include Acked by v2: - Remove #include (Jerin) - Remove development prints (Jerin) - Track service name in PMD - Print warning if service does not have an lcore mapped (Jerin) --- drivers/event/sw/sw_evdev.c | 32 drivers/event/sw/sw_evdev.h | 3 +++ 2 files changed, 35 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index fe2a61e..baab376 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "sw_evdev.h" #include "iq_ring.h" @@ -597,6 +598,13 @@ sw_start(struct rte_eventdev *dev) { unsigned int i, j; struct sw_evdev *sw = sw_pmd_priv(dev); + + /* check a service core is mapped to this service */ + struct rte_service_spec *s = rte_service_get_by_name(sw->service_name); + if (!rte_service_is_running(s)) + SW_LOG_ERR("Warning: No Service core enabled on service %s\n", + s->name); + /* check all ports are set up */ for (i = 0; i < sw->port_count; i++) if (sw->ports[i].rx_worker_ring == NULL) { @@ -699,6 +707,14 @@ set_credit_quanta(const char *key __rte_unused, const char *value, void *opaque) return 0; } + +static int32_t sw_sched_service_func(void *args) +{ + struct rte_eventdev *dev = args; + sw_event_schedule(dev); + return 0; +} + static int sw_probe(struct rte_vdev_device *vdev) { @@ -810,6 +826,22 @@ sw_probe(struct rte_vdev_device *vdev) sw->credit_update_quanta = credit_quanta; sw->sched_quanta = sched_quanta; + /* register service with EAL */ + struct rte_service_spec service; + memset(&service, 0, sizeof(struct rte_service_spec)); + snprintf(service.name, sizeof(service.name), "%s_service", name); + snprintf(sw->service_name, sizeof(sw->service_name), "%s_service", + name); + service.socket_id = socket_id; + service.callback = sw_sched_service_func; + service.callback_userdata = (void *)dev; + + int32_t ret = rte_service_register(&service); + if (ret) { + SW_LOG_ERR("service register() failed"); + return -ENOEXEC; + } + return 0; } diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 0d7f94f..3e83823 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -59,6 +59,7 @@ #define EVENTDEV_NAME_SW_PMD event_sw #define SW_PMD_NAME RTE_STR(event_sw) +#define SW_PMD_NAME_MAX 64 #define SW_SCHED_TYPE_DIRECT (RTE_SCHED_TYPE_PARALLEL + 1) @@ -276,6 +277,8 @@ struct sw_evdev { /* store num stats and offset of the stats for each queue */ uint16_t xstats_count_per_qid[RTE_EVENT_MAX_QUEUES_PER_DEV]; uint16_t xstats_offset_for_qid[RTE_EVENT_MAX_QUEUES_PER_DEV]; + + char service_name[SW_PMD_NAME_MAX]; }; static inline struct sw_evdev * -- 2.7.4
[dpdk-dev] [PATCH v4 4/7] service cores: add unit tests
Add a bunch of unit tests, to ensure that the service core functions are operating as expected. As part of these tests a dummy service is registered which allows identifying if a service callback has been invoked by using the CPU tick counter. This allows identifying if functions to start and stop service lcores are actually having effect. Signed-off-by: Harry van Haaren --- v4: - Remove static slcore_id, make service-core choice dynamic (Jerin) - Use lcore_get_next() to acquire service cores (Jerin) - Improve unit test to ensure app can remote_launc() on a core that previously served as a service-core (verifies ROLE_RTE is set) (Jerin) v2 changes; - Rename variable to slcore_id (Jerin) - Rename function to unregister_all() (Jerin) - Fix typos (Jerin) - Add unit test for get_by_name() - Add unit tests (all suggestions by Jerin) -- get_name() -- Verify probe_capability API -- Verify MT_SAFE capability (see code for details) -- Verify rte_service_dump() API --- test/test/Makefile | 2 + test/test/test_service_cores.c | 538 + 2 files changed, 540 insertions(+) create mode 100644 test/test/test_service_cores.c diff --git a/test/test/Makefile b/test/test/Makefile index ee240be..61e296b 100644 --- a/test/test/Makefile +++ b/test/test/Makefile @@ -151,6 +151,8 @@ SRCS-y += test_interrupts.c SRCS-y += test_version.c SRCS-y += test_func_reentrancy.c +SRCS-y += test_service_cores.c + SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline.c SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline_num.c SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline_etheraddr.c diff --git a/test/test/test_service_cores.c b/test/test/test_service_cores.c new file mode 100644 index 000..452ea31 --- /dev/null +++ b/test/test/test_service_cores.c @@ -0,0 +1,538 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "test.h" + +/* used as the service core ID */ +static uint32_t slcore_id; +/* used as timestamp to detect if a service core is running */ +static uint64_t service_tick; +/* used as a flag to check if a function was run */ +static uint32_t service_remote_launch_flag; + +#define SERVICE_DELAY 1 + +#define DUMMY_SERVICE_NAME "dummy_service" +#define MT_SAFE_SERVICE_NAME "mt_safe_service" + +static int +testsuite_setup(void) +{ + slcore_id = rte_get_next_lcore(/* start core */ -1, + /* skip master */ 1, + /* wrap */ 0); + + return TEST_SUCCESS; +} + +static void +testsuite_teardown(void) +{ + /* release service cores? */ +} + +static int32_t dummy_cb(void *args) +{ + RTE_SET_USED(args); + service_tick++; + rte_delay_ms(SERVICE_DELAY); + return 0; +} + + +static int32_t dummy_mt_safe_cb(void *args) +{ + /* Atomic checks to ensure MT safe services allow > 1 thread to +* concurrently run the callback. The concept is as follows; +* 1) if lock is available, take the lock then delay +* 2) if first lock is taken, and a thread arrives in the CB, we know +*that 2 threads are running the callback at the same time: MT safe +*/ + uint32_t *test_p
[dpdk-dev] [PATCH v4 7/7] maintainers: claim service cores
Sign-up to be the maintainer of public header files and implementation of the service-cores infrastructure. Signed-off-by: Harry van Haaren --- v4: - fix rte_service.c filename - Include service_cores.rst file --- MAINTAINERS | 7 +++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index b4424ea..1cf6bf6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -134,6 +134,13 @@ F: test/test/test_mp_secondary.c F: examples/multi_process/ F: doc/guides/sample_app_ug/multi_process.rst +Service Cores +M: Harry van Haaren +F: doc/guides/prog_guide/service_cores.rst +F: lib/librte_eal/common/include/rte_service.h +F: lib/librte_eal/common/include/rte_service_private.h +F: lib/librte_eal/common/rte_service.c + ARM v7 M: Jan Viktorin M: Jianbo Liu -- 2.7.4
[dpdk-dev] [PATCH v4 6/7] doc: add service cores to doc and release notes
Add a section describing the fundamental concepts behind service cores. Where service cores originate from, and how to enable services. The release notes for 17.08 are updated, with an introductory paragraph on the service cores concept. Finally the Eventdev SW PMD documentation is amended to reflect that it can be run as a service. Signed-off-by: Harry van Haaren Acked-by: Jerin Jacob --- v4: - Add ack from ML I would like to enable the service-cores in the eventdev_pipeline sample app, to showcase the power of the service-core abstraction. There is some remaining work TODO, in order to genericise the sample app for both HW and SW PMDs, and during that rework the service-cores can be added too. The sample app will make a good showcase for docs, and make it much easier to understand. --- doc/guides/eventdevs/sw.rst | 4 +- doc/guides/prog_guide/index.rst | 1 + doc/guides/prog_guide/service_cores.rst | 81 + doc/guides/rel_notes/release_17_08.rst | 8 4 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 doc/guides/prog_guide/service_cores.rst diff --git a/doc/guides/eventdevs/sw.rst b/doc/guides/eventdevs/sw.rst index fb63c84..a3e6624 100644 --- a/doc/guides/eventdevs/sw.rst +++ b/doc/guides/eventdevs/sw.rst @@ -32,7 +32,9 @@ Software Eventdev Poll Mode Driver The software eventdev is an implementation of the eventdev API, that provides a wide range of the eventdev features. The eventdev relies on a CPU core to -perform event scheduling. +perform event scheduling. This PMD can use the service core library to run the +scheduling function, allowing an application to utilize the power of service +cores to multiplex other work on the same core if required. Features diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst index ef5a02a..231622a 100644 --- a/doc/guides/prog_guide/index.rst +++ b/doc/guides/prog_guide/index.rst @@ -38,6 +38,7 @@ Programmer's Guide intro overview env_abstraction_layer +service_cores ring_lib mempool_lib mbuf_lib diff --git a/doc/guides/prog_guide/service_cores.rst b/doc/guides/prog_guide/service_cores.rst new file mode 100644 index 000..3a029ba --- /dev/null +++ b/doc/guides/prog_guide/service_cores.rst @@ -0,0 +1,81 @@ +.. BSD LICENSE +Copyright(c) 2017 Intel Corporation. All rights reserved. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +* Neither the name of Intel Corporation nor the names of its +contributors may be used to endorse or promote products derived +from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Service Cores += + +DPDK has a concept known as service cores, which enables a dynamic way of +performing work on DPDK lcores. Service core support is built into the EAL, and +an API is provided to optionally allow applications to control how the service +cores are used at runtime. + +The service cores concept is built up out of services (components of DPDK that +require CPU cycles to operate) and service cores (DPDK lcores, tasked with +running services). The power of the service core concept is that the mapping +between service cores and services can be configured to abstract away the +difference between platforms and environments. + +For example, the Eventdev has hardware and software PMDs. Of these the software +PMD requires an lcore to perform the scheduling operations, while the hardware +PMD does not. With service cores, the application would not directly notice +that the scheduling is done in software. + +For detailed information about the service core API,
[dpdk-dev] [PATCH v5 0/7] service cores: cover letter
This patchset introduces service cores to DPDK. A service core is an lcore that performs functions to abstract away details of differences in environment of the application. An example is using the eventdev API, where either a software or hardware PMD performs scheduling. In the case of the software PMD an lcore is required to perform scheduling, which means application logic would have to be aware of the PMD running under the API. To abstract away the differences in HW / SW PMDs, service cores can run the SW PMD service without application logic specifying the exact cores to use. Note that eventdev is only one API that benefits; timers, interrupts handling, statistics and monitoring, and a range of other infrastructure that requires a slice of CPU time may all benefit from service cores. The application is not obliged to manually use the service cores API, however if an application wishes to use the service cores API for fine grained control over how the services are run, this is possible. Deciding between a performance threading-profile and scaled-down profile can be achieved by advanced usage of service cores and setting the lcore mappings. Patch 5/7 shows how a PMD can register a service to run a function. This is then available (along with any other registered services) to be run by the service cores. Patches 6/7 and 7/7 add documentation, and claim maintainership. Regards, -Harry v5: Jerin: - Fix documentation warnings - Rename variables to better names - Enable statistics per-service - Improve atomic operation flag checks - Reworked function to rte_service_start_with_defaults() - Added memory barriers to lcore_add() and lcore_del() - Simplified EAL code, reduced duplication and makes it more maintainable Jerin/Thomas: - Rename component header to rte_service_component.h v4: - Range of fixes as suggested by Jerin - Improved unit tests, ensuring ex-service cores become available to app - Added functions to EXPERIMENTAL tag in .map files (Thomas) - Added @warning experimental notes to Doxygen API documentation (Thomas) - Various smaller fixes / cleanups - See commit notes for details v3: - Added docs - Added release notes - Updated maintainers file - Compile checks with devtools/test-build.sh - Validated patches apply to latest dpdk/master - Based on discussion, rte_service_iterate() is *not* included, but could be adding at a later date if use-cases require it. - Future work includes enabling the eventdev_pipeline sample app, but there is still some churn there to enable both HW/SW PMDs seamlessly. Once sample app is enabled a service core walk-through with that sample app can be added to the docs, to provide a tutorial on service-core usage. Harry van Haaren (7): service cores: header and implementation service cores: EAL init changes service cores: coremask parsing service cores: add unit tests event/sw: enable SW PMD with service capability doc: add service cores to doc and release notes maintainers: claim service cores MAINTAINERS| 7 + doc/api/doxy-api-index.md | 1 + doc/guides/eventdevs/sw.rst| 4 +- doc/guides/prog_guide/index.rst| 1 + doc/guides/prog_guide/service_cores.rst| 81 +++ doc/guides/rel_notes/release_17_08.rst | 8 + drivers/event/sw/sw_evdev.c| 32 + drivers/event/sw/sw_evdev.h| 3 + lib/librte_eal/bsdapp/eal/Makefile | 1 + lib/librte_eal/bsdapp/eal/eal.c| 18 + lib/librte_eal/bsdapp/eal/rte_eal_version.map | 23 + lib/librte_eal/common/Makefile | 1 + lib/librte_eal/common/eal_common_lcore.c | 1 + lib/librte_eal/common/eal_common_options.c | 91 ++- lib/librte_eal/common/include/rte_eal.h| 4 + lib/librte_eal/common/include/rte_lcore.h | 3 +- lib/librte_eal/common/include/rte_service.h| 387 +++ .../common/include/rte_service_component.h | 144 + lib/librte_eal/common/rte_service.c| 704 + lib/librte_eal/linuxapp/eal/Makefile | 1 + lib/librte_eal/linuxapp/eal/eal.c | 18 + lib/librte_eal/linuxapp/eal/eal_thread.c | 9 +- lib/librte_eal/linuxapp/eal/rte_eal_version.map| 23 + test/test/Makefile | 2 + test/test/test_service_cores.c | 599 ++ 25 files changed, 2162 insertions(+), 4 deletions(-) create mode 100644 doc/guides/prog_guide/service_cores.rst create mode 100644 lib/librte_eal/common/include/rte_service.h create mode 100644 lib/librte_eal/common/include/rte_service_component.h create mode 100644 lib/librte_eal/common/rte_service.c create mode 100644 test/test/test_service_cores.c -- 2.7.4
[dpdk-dev] [PATCH v5 1/7] service cores: header and implementation
Add header files, update .map files with new service functions, and add the service header to the doxygen for building. This service header API allows DPDK to use services as a concept of something that requires CPU cycles. An example is a PMD that runs in software to schedule events, where a hardware version exists that does not require a CPU. Signed-off-by: Harry van Haaren --- v5: - Improved service_set_stats_enable() to operate per service (Jerin) - Fixed un-documented doxygen parameter (Jerin) - Renamed cores_state to lcore_states (Jerin) - Optimized atomic operations and flags (Jerin) - Removed info about RFCs etc from commit message (Jerin) - Add lcore_count check to default setup function and return early (Jerin) - Add memory barriers to lcore_add() and lcore_del() (Jerin) - Rename start function to rte_service_start_with_defaults() (Jerin) - Rename header to rte_service_component.h (Jerin/Thomas) v4: - Fixed (unsigned) checkpatch error - Fixed misleading-indentation/if { } brackets (checkpatch/Jerin) - Fixed set function argument to be "enable" instead of "enabled" (Jerin) - Improve doxygen comment for size of array in rte_service_core_list (Jerin) - Fixed typos (Jerin) - Optimized atomic clear after running service (Jerin) - Added smp_rmb() at end of loop to re-load runstate / mapping (Jerin) - Fix issue with lcore role not being adhered to (Jerin) - Add experimental warnings for all service core functions (Thomas) - Moved service core functions into EXPERIMENTAL section of .map (Thomas) - Improve documentation of rte_service_lcore_reset_all() (Harry) v3: - None. v2: Thanks Jerin for review - below a list your suggested changes; - Doxygen rename to "service cores" for consistency - use lcore instead of core for function names - Fix about 10 typos / seplling msitakse ;) - Dix doxygen /** comments for functions - Doxygen @param[out] improvements - int8_t for socket_id to ordinary int - Rename MACROS for readability - Align structs to cache lines - Allocate fastpath-used data from hugepages - Added/fixed memory barriers for multi-core scheduling - Add const to variables, and hoist above loop - Optimize cmpset atomic if MT_SAFE or only one core mapped - Statistics collection only when requested - Add error check for array pointer - Remove panic() calls from library - Fix TODO notes from previous patchset There are also some other changes; - Checkpatch issues fixed - .map file updates - Add rte_service_get_by_name() function --- doc/api/doxy-api-index.md | 1 + lib/librte_eal/bsdapp/eal/Makefile | 1 + lib/librte_eal/bsdapp/eal/rte_eal_version.map | 23 + lib/librte_eal/common/Makefile | 1 + lib/librte_eal/common/eal_common_lcore.c | 1 + lib/librte_eal/common/include/rte_eal.h| 4 + lib/librte_eal/common/include/rte_lcore.h | 3 +- lib/librte_eal/common/include/rte_service.h| 387 +++ .../common/include/rte_service_component.h | 144 + lib/librte_eal/common/rte_service.c| 704 + lib/librte_eal/linuxapp/eal/Makefile | 1 + lib/librte_eal/linuxapp/eal/eal_thread.c | 9 +- lib/librte_eal/linuxapp/eal/rte_eal_version.map| 23 + 13 files changed, 1300 insertions(+), 2 deletions(-) create mode 100644 lib/librte_eal/common/include/rte_service.h create mode 100644 lib/librte_eal/common/include/rte_service_component.h create mode 100644 lib/librte_eal/common/rte_service.c diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md index 67594e1..e99e114 100644 --- a/doc/api/doxy-api-index.md +++ b/doc/api/doxy-api-index.md @@ -160,6 +160,7 @@ There are many libraries, so their headers may be grouped by topics: [common] (@ref rte_common.h), [ABI compat] (@ref rte_compat.h), [keepalive] (@ref rte_keepalive.h), + [service cores] (@ref rte_service.h), [device metrics] (@ref rte_metrics.h), [bitrate statistics] (@ref rte_bitrate.h), [latency statistics] (@ref rte_latencystats.h), diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile index a0f9950..05517a2 100644 --- a/lib/librte_eal/bsdapp/eal/Makefile +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -87,6 +87,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c # from arch dir SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map index 381f895..480ad23 100644 --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -209,5 +209,28 @@ EXPERIMENTAL {
[dpdk-dev] [PATCH v5 2/7] service cores: EAL init changes
This commit shows the changes required in rte_eal_init() to transparently launch the service threads. The threads are launched into the service worker functions here because after rte_eal_init() the application is not gauranteed to call any other DPDK API. As the registration of services happens at initialization time, the services that require CPU time are already available when we reach the end of rte_eal_init(). Signed-off-by: Harry van Haaren Acked-by: Jerin Jacob --- v5: - Simplify EAL code, calling rte_service_start_with_defaults() (Jerin) - Added Ack from ML v4: - Added #include for service cores in BSD eal.c v2 comments: - Include BSD implementation (Jerin) - Move details of core-tracking into rte_service_lcore_add(Jerin) - Given there are changes other to suggested, not using Ack --- lib/librte_eal/bsdapp/eal/eal.c | 18 ++ lib/librte_eal/linuxapp/eal/eal.c | 18 ++ 2 files changed, 36 insertions(+) diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 05f0c1f..3e7a3a8 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include "eal_private.h" @@ -653,6 +654,14 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* initialize services so vdevs register service during bus_probe. */ + ret = rte_service_init(); + if (ret) { + rte_eal_init_alert("rte_service_init() failed\n"); + rte_errno = ENOEXEC; + return -1; + } + /* Probe all the buses and devices/drivers on them */ if (rte_bus_probe()) { rte_eal_init_alert("Cannot probe devices\n"); @@ -660,6 +669,15 @@ rte_eal_init(int argc, char **argv) return -1; } + /* initialize default service/lcore mappings and start running. Ignore +* -ENOTSUP, as it indicates no service coremask passed to EAL. +*/ + ret = rte_service_start_with_defaults(); + if (ret < 0 && ret != -ENOTSUP) { + rte_errno = ENOEXEC; + return -1; + } + rte_eal_mcfg_complete(); return fctret; diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 7c78f2d..2914646 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -78,6 +78,7 @@ #include #include #include +#include #include "eal_private.h" #include "eal_thread.h" @@ -932,6 +933,14 @@ rte_eal_init(int argc, char **argv) rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); + /* initialize services so vdevs register service during bus_probe. */ + ret = rte_service_init(); + if (ret) { + rte_eal_init_alert("rte_service_init() failed\n"); + rte_errno = ENOEXEC; + return -1; + } + /* Probe all the buses and devices/drivers on them */ if (rte_bus_probe()) { rte_eal_init_alert("Cannot probe devices\n"); @@ -939,6 +948,15 @@ rte_eal_init(int argc, char **argv) return -1; } + /* initialize default service/lcore mappings and start running. Ignore +* -ENOTSUP, as it indicates no service coremask passed to EAL. +*/ + ret = rte_service_start_with_defaults(); + if (ret < 0 && ret != -ENOTSUP) { + rte_errno = ENOEXEC; + return -1; + } + rte_eal_mcfg_complete(); return fctret; -- 2.7.4
[dpdk-dev] [PATCH v5 3/7] service cores: coremask parsing
Add logic for parsing a coremask from EAL, which allows the application to be unaware of the cores being taken from its coremask. Signed-off-by: Harry van Haaren Acked-by: Jerin Jacob --- v4: - Add --help print output (Jerin) - Fixed coremask parsing to ensure master core is ROLE_RTE (Jerin) - Improve coremask parsing error handling, master lcore and service coremask overlap is failed, informing user that the core cannot be used for the purpose A as it is already in use for B (A or B being service/master) v2: - Remove printf() (Jerin) - Remove commented code (Jerin) - simplified core tracking, no requirement on #include rte_service in EAL parsing anymore. wip: fix master lcore handling in EAL --- lib/librte_eal/common/eal_common_options.c | 91 +- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 075b0ea..00265d6 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -61,6 +61,7 @@ const char eal_short_options[] = "b:" /* pci-blacklist */ "c:" /* coremask */ + "s:" /* service coremask */ "d:" /* driver */ "h" /* help */ "l:" /* corelist */ @@ -267,6 +268,77 @@ static int xdigit2val(unsigned char c) } static int +eal_parse_service_coremask(const char *coremask) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + int i, j, idx = 0; + unsigned int count = 0; + char c; + int val; + + if (coremask == NULL) + return -1; + /* Remove all blank characters ahead and after . +* Remove 0x/0X if exists. +*/ + while (isblank(*coremask)) + coremask++; + if (coremask[0] == '0' && ((coremask[1] == 'x') + || (coremask[1] == 'X'))) + coremask += 2; + i = strlen(coremask); + while ((i > 0) && isblank(coremask[i - 1])) + i--; + + if (i == 0) + return -1; + + for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) { + c = coremask[i]; + if (isxdigit(c) == 0) { + /* invalid characters */ + return -1; + } + val = xdigit2val(c); + for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; + j++, idx++) { + if ((1 << j) & val) { + /* handle master lcore already parsed */ + uint32_t lcore = idx; + if (master_lcore_parsed && + cfg->master_lcore == lcore) { + RTE_LOG(ERR, EAL, + "Error: lcore %u is master lcore, cannot use as service core\n", + idx); + return -1; + } + + if (!lcore_config[idx].detected) { + RTE_LOG(ERR, EAL, + "lcore %u unavailable\n", idx); + return -1; + } + lcore_config[idx].core_role = ROLE_SERVICE; + count++; + } + } + } + + for (; i >= 0; i--) + if (coremask[i] != '0') + return -1; + + for (; idx < RTE_MAX_LCORE; idx++) + lcore_config[idx].core_index = -1; + + if (count == 0) + return -1; + + cfg->service_lcore_count = count; + return 0; +} + +static int eal_parse_coremask(const char *coremask) { struct rte_config *cfg = rte_eal_get_configuration(); @@ -409,6 +481,13 @@ eal_parse_master_lcore(const char *arg) if (cfg->master_lcore >= RTE_MAX_LCORE) return -1; master_lcore_parsed = 1; + + /* ensure master core is not used as service core */ + if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) { + RTE_LOG(ERR, EAL, "Error: Master lcore is used as a service core.\n"); + return -1; + } + return 0; } @@ -826,6 +905,13 @@ eal_parse_common_option(int opt, const char *optarg, } core_parsed = 1; break; + /* service coremask */ + case 's': + if (eal_parse_service_coremask(optarg) < 0) { + RTE_LO
[dpdk-dev] [PATCH v5 5/7] event/sw: enable SW PMD with service capability
This commit shows how easy it is to enable a specific DPDK component with a service callback, in order to get CPU cycles for it. The beauty of this method is that the service is unaware of how much CPU time it is getting - the application can decide how to split and slice cores and map them to the registered services. Signed-off-by: Harry van Haaren Acked-by: Jerin Jacob --- v4: - Include Acked by v2: - Remove #include (Jerin) - Remove development prints (Jerin) - Track service name in PMD - Print warning if service does not have an lcore mapped (Jerin) --- drivers/event/sw/sw_evdev.c | 32 drivers/event/sw/sw_evdev.h | 3 +++ 2 files changed, 35 insertions(+) diff --git a/drivers/event/sw/sw_evdev.c b/drivers/event/sw/sw_evdev.c index 5b33bfd..9c534b7 100644 --- a/drivers/event/sw/sw_evdev.c +++ b/drivers/event/sw/sw_evdev.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "sw_evdev.h" #include "iq_ring.h" @@ -614,6 +615,13 @@ sw_start(struct rte_eventdev *dev) { unsigned int i, j; struct sw_evdev *sw = sw_pmd_priv(dev); + + /* check a service core is mapped to this service */ + struct rte_service_spec *s = rte_service_get_by_name(sw->service_name); + if (!rte_service_is_running(s)) + SW_LOG_ERR("Warning: No Service core enabled on service %s\n", + s->name); + /* check all ports are set up */ for (i = 0; i < sw->port_count; i++) if (sw->ports[i].rx_worker_ring == NULL) { @@ -716,6 +724,14 @@ set_credit_quanta(const char *key __rte_unused, const char *value, void *opaque) return 0; } + +static int32_t sw_sched_service_func(void *args) +{ + struct rte_eventdev *dev = args; + sw_event_schedule(dev); + return 0; +} + static int sw_probe(struct rte_vdev_device *vdev) { @@ -829,6 +845,22 @@ sw_probe(struct rte_vdev_device *vdev) sw->credit_update_quanta = credit_quanta; sw->sched_quanta = sched_quanta; + /* register service with EAL */ + struct rte_service_spec service; + memset(&service, 0, sizeof(struct rte_service_spec)); + snprintf(service.name, sizeof(service.name), "%s_service", name); + snprintf(sw->service_name, sizeof(sw->service_name), "%s_service", + name); + service.socket_id = socket_id; + service.callback = sw_sched_service_func; + service.callback_userdata = (void *)dev; + + int32_t ret = rte_service_register(&service); + if (ret) { + SW_LOG_ERR("service register() failed"); + return -ENOEXEC; + } + return 0; } diff --git a/drivers/event/sw/sw_evdev.h b/drivers/event/sw/sw_evdev.h index 6ef03ce..71de3c1 100644 --- a/drivers/event/sw/sw_evdev.h +++ b/drivers/event/sw/sw_evdev.h @@ -59,6 +59,7 @@ #define EVENTDEV_NAME_SW_PMD event_sw #define SW_PMD_NAME RTE_STR(event_sw) +#define SW_PMD_NAME_MAX 64 #define SW_SCHED_TYPE_DIRECT (RTE_SCHED_TYPE_PARALLEL + 1) @@ -276,6 +277,8 @@ struct sw_evdev { /* store num stats and offset of the stats for each queue */ uint16_t xstats_count_per_qid[RTE_EVENT_MAX_QUEUES_PER_DEV]; uint16_t xstats_offset_for_qid[RTE_EVENT_MAX_QUEUES_PER_DEV]; + + char service_name[SW_PMD_NAME_MAX]; }; static inline struct sw_evdev * -- 2.7.4
[dpdk-dev] [PATCH v5 4/7] service cores: add unit tests
Add a bunch of unit tests, to ensure that the service core functions are operating as expected. As part of these tests a dummy service is registered which allows identifying if a service callback has been invoked by using the CPU tick counter. This allows identifying if functions to start and stop service lcores are actually having effect. Signed-off-by: Harry van Haaren Acked-by: Jerin Jacob --- v5: - Improve unit test for dump() to enable and disable stats (Jerin) - Add new unit test for MT unsafe service callback (Harry) - Added Ack from ML v4: - Remove static slcore_id, make service-core choice dynamic (Jerin) - Use lcore_get_next() to acquire service cores (Jerin) - Improve unit test to ensure app can remote_launc() on a core that previously served as a service-core (verifies ROLE_RTE is set) (Jerin) v2 changes; - Rename variable to slcore_id (Jerin) - Rename function to unregister_all() (Jerin) - Fix typos (Jerin) - Add unit test for get_by_name() - Add unit tests (all suggestions by Jerin) -- get_name() -- Verify probe_capability API -- Verify MT_SAFE capability (see code for details) -- Verify rte_service_dump() API --- test/test/Makefile | 2 + test/test/test_service_cores.c | 599 + 2 files changed, 601 insertions(+) create mode 100644 test/test/test_service_cores.c diff --git a/test/test/Makefile b/test/test/Makefile index e797c20..42d9a49 100644 --- a/test/test/Makefile +++ b/test/test/Makefile @@ -151,6 +151,8 @@ SRCS-y += test_interrupts.c SRCS-y += test_version.c SRCS-y += test_func_reentrancy.c +SRCS-y += test_service_cores.c + SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline.c SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline_num.c SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline_etheraddr.c diff --git a/test/test/test_service_cores.c b/test/test/test_service_cores.c new file mode 100644 index 000..88fac8f --- /dev/null +++ b/test/test/test_service_cores.c @@ -0,0 +1,599 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "test.h" + +/* used as the service core ID */ +static uint32_t slcore_id; +/* used as timestamp to detect if a service core is running */ +static uint64_t service_tick; +/* used as a flag to check if a function was run */ +static uint32_t service_remote_launch_flag; + +#define SERVICE_DELAY 1 + +#define DUMMY_SERVICE_NAME "dummy_service" +#define MT_SAFE_SERVICE_NAME "mt_safe_service" + +static int +testsuite_setup(void) +{ + slcore_id = rte_get_next_lcore(/* start core */ -1, + /* skip master */ 1, + /* wrap */ 0); + + return TEST_SUCCESS; +} + +static void +testsuite_teardown(void) +{ + /* release service cores? */ +} + +static int32_t dummy_cb(void *args) +{ + RTE_SET_USED(args); + service_tick++; + rte_delay_ms(SERVICE_DELAY); + return 0; +} + +static int32_t dummy_mt_unsafe_cb(void *args) +{ + /* before running test, the initialization has set pass_test to 1. +* If the cmpset in service-cores is working correctly, the code here +* should never fail to take the lock. If the lock *is* taken, fail the
[dpdk-dev] [PATCH v5 6/7] doc: add service cores to doc and release notes
Add a section describing the fundamental concepts behind service cores. Where service cores originate from, and how to enable services. The release notes for 17.08 are updated, with an introductory paragraph on the service cores concept. Finally the Eventdev SW PMD documentation is amended to reflect that it can be run as a service. Signed-off-by: Harry van Haaren Acked-by: Jerin Jacob --- v4: - Add ack from ML I would like to enable the service-cores in the eventdev_pipeline sample app, to showcase the power of the service-core abstraction. There is some remaining work TODO, in order to genericise the sample app for both HW and SW PMDs, and during that rework the service-cores can be added too. The sample app will make a good showcase for docs, and make it much easier to understand. --- doc/guides/eventdevs/sw.rst | 4 +- doc/guides/prog_guide/index.rst | 1 + doc/guides/prog_guide/service_cores.rst | 81 + doc/guides/rel_notes/release_17_08.rst | 8 4 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 doc/guides/prog_guide/service_cores.rst diff --git a/doc/guides/eventdevs/sw.rst b/doc/guides/eventdevs/sw.rst index fb63c84..a3e6624 100644 --- a/doc/guides/eventdevs/sw.rst +++ b/doc/guides/eventdevs/sw.rst @@ -32,7 +32,9 @@ Software Eventdev Poll Mode Driver The software eventdev is an implementation of the eventdev API, that provides a wide range of the eventdev features. The eventdev relies on a CPU core to -perform event scheduling. +perform event scheduling. This PMD can use the service core library to run the +scheduling function, allowing an application to utilize the power of service +cores to multiplex other work on the same core if required. Features diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst index 7578395..5548aba 100644 --- a/doc/guides/prog_guide/index.rst +++ b/doc/guides/prog_guide/index.rst @@ -38,6 +38,7 @@ Programmer's Guide intro overview env_abstraction_layer +service_cores ring_lib mempool_lib mbuf_lib diff --git a/doc/guides/prog_guide/service_cores.rst b/doc/guides/prog_guide/service_cores.rst new file mode 100644 index 000..3a029ba --- /dev/null +++ b/doc/guides/prog_guide/service_cores.rst @@ -0,0 +1,81 @@ +.. BSD LICENSE +Copyright(c) 2017 Intel Corporation. All rights reserved. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +* Neither the name of Intel Corporation nor the names of its +contributors may be used to endorse or promote products derived +from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Service Cores += + +DPDK has a concept known as service cores, which enables a dynamic way of +performing work on DPDK lcores. Service core support is built into the EAL, and +an API is provided to optionally allow applications to control how the service +cores are used at runtime. + +The service cores concept is built up out of services (components of DPDK that +require CPU cycles to operate) and service cores (DPDK lcores, tasked with +running services). The power of the service core concept is that the mapping +between service cores and services can be configured to abstract away the +difference between platforms and environments. + +For example, the Eventdev has hardware and software PMDs. Of these the software +PMD requires an lcore to perform the scheduling operations, while the hardware +PMD does not. With service cores, the application would not directly notice +that the scheduling is done in software. + +For detailed information about the service core API,
[dpdk-dev] [PATCH v5 7/7] maintainers: claim service cores
Sign-up to be the maintainer of public header files and implementation of the service-cores infrastructure. Signed-off-by: Harry van Haaren Acked-by: Jerin Jacob --- v5: - Mark with EXPERIMENTAL - Update filename to rte_service_component.h - Added Ack from ML v4: - fix rte_service.c filename - Include service_cores.rst file --- MAINTAINERS | 7 +++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 804ac04..b7df49b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -134,6 +134,13 @@ F: test/test/test_mp_secondary.c F: examples/multi_process/ F: doc/guides/sample_app_ug/multi_process.rst +Service Cores - EXPERIMENTAL +M: Harry van Haaren +F: doc/guides/prog_guide/service_cores.rst +F: lib/librte_eal/common/include/rte_service.h +F: lib/librte_eal/common/include/rte_service_component.h +F: lib/librte_eal/common/rte_service.c + ARM v7 M: Jan Viktorin M: Jianbo Liu -- 2.7.4
[dpdk-dev] [PATCH] doc: add author on cc to git fixline alias
With this commit, the correct method to use git fixline to indicate a fix of a previous commit has changed. The new rules require the author of the original code that is being fixed to be on CC. The logic behind this improvement is that if there is a genuine bug, one of the ideal people to review is the author of the original code being fixed. Adding them on Cc makes them aware of the patch, avoiding it from being passed by accidentally while reading the mailing-list. Given that the original author (now on Cc:) might not actually review, there is no value in keeping the Cc: in git commit history. If the original author performs a review, their Reviewed-by: or Acked-by: is stored in git history (same as now). Signed-off-by: Harry van Haaren --- This fixline suggestion was proposed here: http://dpdk.org/ml/archives/dev/2017-July/071107.html Based on Thomas' feedback, improved CC: to Cc, and added note about removing the Cc: from git history. http://dpdk.org/ml/archives/dev/2017-July/071119.html --- doc/guides/contributing/patches.rst | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/doc/guides/contributing/patches.rst b/doc/guides/contributing/patches.rst index 7926c96..27e218b 100644 --- a/doc/guides/contributing/patches.rst +++ b/doc/guides/contributing/patches.rst @@ -207,18 +207,21 @@ Here are some guidelines for the body of a commit message: * The text of the commit message should be wrapped at 72 characters. -* When fixing a regression, it is a good idea to reference the id of the commit which introduced the bug. - You can generate the required text using the following git alias:: +* When fixing a regression, it is required to reference the id of the commit + which introduced the bug, and put the original author of that commit on CC. + You can generate the required lines using the following git alias, which prints + the commit SHA and the author of the original code:: - git config alias.fixline "log -1 --abbrev=12 --format='Fixes: %h (\"%s\")'" + git config alias.fixline "log -1 --abbrev=12 --format='Fixes: %h (\"%s\")%nCc: %ae'" - The ``Fixes:`` line can then be added to the commit message:: + The output of ``git fixline `` must then be added to the commit message:: - doc: fix vhost sample parameter + doc: fix some parameter description - Update the docs to reflect removed dev-index. + Update the docs, fixing description of some parameter. - Fixes: 17b8320a3e11 ("vhost: remove index parameter") + Fixes: abcdefgh1234 ("doc: add some parameter") + Cc: aut...@example.com Signed-off-by: Alex Smith -- 2.7.4
[dpdk-dev] [PATCH] service: add corelist to EAL arguments
This commit allows the -S (captial 's') to be used to indicate a corelist for Services. This is a "nice to have" patch, and does not modify any of the service core functionality. Suggested-by: Jerin Jacob Suggested-by: Thomas Monjalon Signed-off-by: Harry van Haaren --- lib/librte_eal/common/eal_common_options.c | 74 ++ 1 file changed, 74 insertions(+) diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 00265d6..696a627 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -65,6 +65,7 @@ eal_short_options[] = "d:" /* driver */ "h" /* help */ "l:" /* corelist */ + "S:" /* service corelist */ "m:" /* memory size */ "n:" /* memory channels */ "r:" /* memory ranks */ @@ -402,6 +403,72 @@ eal_parse_coremask(const char *coremask) } static int +eal_parse_service_corelist(const char *corelist) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + int i, idx = 0; + unsigned count = 0; + char *end = NULL; + int min, max; + + if (corelist == NULL) + return -1; + + /* Remove all blank characters ahead and after */ + while (isblank(*corelist)) + corelist++; + i = strlen(corelist); + while ((i > 0) && isblank(corelist[i - 1])) + i--; + + /* Get list of cores */ + min = RTE_MAX_LCORE; + do { + while (isblank(*corelist)) + corelist++; + if (*corelist == '\0') + return -1; + errno = 0; + idx = strtoul(corelist, &end, 10); + if (errno || end == NULL) + return -1; + while (isblank(*end)) + end++; + if (*end == '-') { + min = idx; + } else if ((*end == ',') || (*end == '\0')) { + max = idx; + if (min == RTE_MAX_LCORE) + min = idx; + for (idx = min; idx <= max; idx++) { + if (cfg->lcore_role[idx] != ROLE_SERVICE) { + /* handle master lcore already parsed */ + uint32_t lcore = idx; + if (cfg->master_lcore == lcore && + master_lcore_parsed) { + RTE_LOG(ERR, EAL, + "Error: lcore %u is master lcore, cannot use as service core\n", + idx); + return -1; + } + lcore_config[idx].core_role = + ROLE_SERVICE; + count++; + } + } + min = RTE_MAX_LCORE; + } else + return -1; + corelist = end + 1; + } while (*end != '\0'); + + if (count == 0) + return -1; + + return 0; +} + +static int eal_parse_corelist(const char *corelist) { struct rte_config *cfg = rte_eal_get_configuration(); @@ -912,6 +979,13 @@ eal_parse_common_option(int opt, const char *optarg, return -1; } break; + /* service corelist */ + case 'S': + if (eal_parse_service_corelist(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid service core list\n"); + return -1; + } + break; /* size of memory */ case 'm': conf->memory = atoi(optarg); -- 2.7.4
[dpdk-dev] [PATCH] event: fix memory realloc check in port config
This commit fixes the check to use the just reallocated links_map variable, instead of stale dev->data->links_map. Later the new variable is written to the dev->data->links_map, so the stale-ness is only temporary. Coverity issue: 143456 Fixes: 4f0804bbdfb9 ("eventdev: implement the northbound APIs") Cc: jerin.ja...@caviumnetworks.com Signed-off-by: Harry van Haaren --- lib/librte_eventdev/rte_eventdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c index ca2900c..bbb3805 100644 --- a/lib/librte_eventdev/rte_eventdev.c +++ b/lib/librte_eventdev/rte_eventdev.c @@ -298,7 +298,7 @@ rte_event_dev_port_config(struct rte_eventdev *dev, uint8_t nb_ports) sizeof(dev->data->links_map[0]) * nb_ports * RTE_EVENT_MAX_QUEUES_PER_DEV, RTE_CACHE_LINE_SIZE); - if (dev->data->links_map == NULL) { + if (links_map == NULL) { dev->data->nb_ports = 0; RTE_EDEV_LOG_ERR("failed to realloc mem for port_map," "nb_ports %u", nb_ports); -- 2.7.4
[dpdk-dev] [PATCH] eventdev: add dev id checks to config functions
This commit adds checks to verify the device ID is valid to the following functions. Given that they are non-datapath, these checks are always performed. This commit also updates the event/octeontx test-cases to have the correct signed-ness, as the API has changes this change is required in order to compile. Suggested-by: Jesse Bruni Signed-off-by: Harry van Haaren --- lib/librte_eventdev/rte_eventdev.c | 36 +--- lib/librte_eventdev/rte_eventdev.h | 36 ++-- test/test/test_eventdev_octeontx.c | 32 +++- 3 files changed, 62 insertions(+), 42 deletions(-) diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c index bbb3805..e71f20c 100644 --- a/lib/librte_eventdev/rte_eventdev.c +++ b/lib/librte_eventdev/rte_eventdev.c @@ -609,21 +609,26 @@ rte_event_queue_setup(uint8_t dev_id, uint8_t queue_id, return (*dev->dev_ops->queue_setup)(dev, queue_id, queue_conf); } -uint8_t +int16_t rte_event_queue_count(uint8_t dev_id) { struct rte_eventdev *dev; + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); dev = &rte_eventdevs[dev_id]; return dev->data->nb_queues; } -uint8_t +int16_t rte_event_queue_priority(uint8_t dev_id, uint8_t queue_id) { - struct rte_eventdev *dev; + struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + if (!is_valid_queue(dev, queue_id)) { + RTE_EDEV_LOG_ERR("Invalid queue_id=%" PRIu8, queue_id); + return -EINVAL; + } - dev = &rte_eventdevs[dev_id]; if (dev->data->event_dev_cap & RTE_EVENT_DEV_CAP_QUEUE_QOS) return dev->data->queues_prio[queue_id]; else @@ -743,28 +748,37 @@ rte_event_port_setup(uint8_t dev_id, uint8_t port_id, return 0; } -uint8_t +int16_t rte_event_port_dequeue_depth(uint8_t dev_id, uint8_t port_id) { - struct rte_eventdev *dev; + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + if (!is_valid_port(dev, port_id)) { + RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id); + return -EINVAL; + } - dev = &rte_eventdevs[dev_id]; return dev->data->ports_dequeue_depth[port_id]; } -uint8_t +int16_t rte_event_port_enqueue_depth(uint8_t dev_id, uint8_t port_id) { - struct rte_eventdev *dev; + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); + struct rte_eventdev *dev = &rte_eventdevs[dev_id]; + if (!is_valid_port(dev, port_id)) { + RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id); + return -EINVAL; + } - dev = &rte_eventdevs[dev_id]; return dev->data->ports_enqueue_depth[port_id]; } -uint8_t +int16_t rte_event_port_count(uint8_t dev_id) { struct rte_eventdev *dev; + RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL); dev = &rte_eventdevs[dev_id]; return dev->data->nb_ports; diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h index 128bc52..204ff82 100644 --- a/lib/librte_eventdev/rte_eventdev.h +++ b/lib/librte_eventdev/rte_eventdev.h @@ -611,10 +611,10 @@ rte_event_queue_setup(uint8_t dev_id, uint8_t queue_id, * * @param dev_id * Event device identifier. - * @return - * - The number of configured event queues + * @retval Positive The number of configured event queues + * @retval -EINVAL Invalid device id */ -uint8_t +int16_t rte_event_queue_count(uint8_t dev_id); /** @@ -624,13 +624,13 @@ rte_event_queue_count(uint8_t dev_id); * Event device identifier. * @param queue_id * Event queue identifier. - * @return - * - If the device has RTE_EVENT_DEV_CAP_QUEUE_QOS capability then the - *configured priority of the event queue in - *[RTE_EVENT_DEV_PRIORITY_HIGHEST, RTE_EVENT_DEV_PRIORITY_LOWEST] range - *else the value RTE_EVENT_DEV_PRIORITY_NORMAL + * @retval Positive If the device has RTE_EVENT_DEV_CAP_QUEUE_QOS capability + * then the configured priority of the event queue in + * [RTE_EVENT_DEV_PRIORITY_HIGHEST, RTE_EVENT_DEV_PRIORITY_LOWEST] + * range else the value RTE_EVENT_DEV_PRIORITY_NORMAL + * @retval -EINVAL Invalid device id or queue id */ -uint8_t +int16_t rte_event_queue_priority(uint8_t dev_id, uint8_t queue_id); /* Event port specific APIs */ @@ -722,12 +722,12 @@ rte_event_port_setup(uint8_t dev_id, uint8_t port_id, * Event device identifier. * @param port_id * Event port identifier. - * @return - * - The number of configured dequeue queue depth + * @retval Positive The dequeue queue depth + * @retval -EINVAL Invalid device ID or port ID * * @see rt
[dpdk-dev] [PATCH] service: fix shifts to operate on 64 bit integers
This commit fixes shifts to an integer (1 << shift) which is assumed to be a 32-bit integer. In this case, the shift is variable and expected to be valid for 64-bit integers. Given that the expectation to work with 64 bits exists, we must ensure that the (1 << shift) one in that formula is actually a uin64_t. Simply defining a const uint64_t and using it ensures the compiler is aware of the intention. The issue would only manifest if there were greater than 31 services registered. Fixes: 21698354c832 ("service: introduce service cores concept") Signed-off-by: Harry van Haaren --- lib/librte_eal/common/rte_service.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c index e82b9ad..8c1cffa 100644 --- a/lib/librte_eal/common/rte_service.c +++ b/lib/librte_eal/common/rte_service.c @@ -285,8 +285,9 @@ rte_service_unregister(struct rte_service_spec *spec) s->internal_flags &= ~(SERVICE_F_REGISTERED); + const uint64_t one = 1; for (i = 0; i < RTE_MAX_LCORE; i++) - lcore_states[i].service_mask &= ~(1 << service_id); + lcore_states[i].service_mask &= ~(one << service_id); memset(&rte_services[service_id], 0, sizeof(struct rte_service_spec_impl)); @@ -319,6 +320,7 @@ rte_service_runner_func(void *arg) { RTE_SET_USED(arg); uint32_t i; + const uint64_t one = 1; const int lcore = rte_lcore_id(); struct core_state *cs = &lcore_states[lcore]; @@ -327,7 +329,7 @@ rte_service_runner_func(void *arg) for (i = 0; i < rte_service_count; i++) { struct rte_service_spec_impl *s = &rte_services[i]; if (s->runstate != RUNSTATE_RUNNING || - !(service_mask & (1 << i))) + !(service_mask & (one << i))) continue; /* check do we need cmpset, if MT safe or <= 1 core @@ -448,6 +450,7 @@ service_update(struct rte_service_spec *service, uint32_t lcore, { uint32_t i; int32_t sid = -1; + const uint64_t one = 1; for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { if ((struct rte_service_spec *)&rte_services[i] == service && @@ -465,16 +468,16 @@ service_update(struct rte_service_spec *service, uint32_t lcore, if (set) { if (*set) { - lcore_states[lcore].service_mask |= (1 << sid); + lcore_states[lcore].service_mask |= (one << sid); rte_services[sid].num_mapped_cores++; } else { - lcore_states[lcore].service_mask &= ~(1 << sid); + lcore_states[lcore].service_mask &= ~(one << sid); rte_services[sid].num_mapped_cores--; } } if (enabled) - *enabled = (lcore_states[lcore].service_mask & (1 << sid)); + *enabled = (lcore_states[lcore].service_mask & (one << sid)); rte_smp_wmb(); @@ -599,6 +602,7 @@ rte_service_lcore_start(uint32_t lcore) int32_t rte_service_lcore_stop(uint32_t lcore) { + const uint64_t one = 1; if (lcore >= RTE_MAX_LCORE) return -EINVAL; @@ -607,7 +611,7 @@ rte_service_lcore_stop(uint32_t lcore) uint32_t i; for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { - int32_t enabled = lcore_states[i].service_mask & (1 << i); + int32_t enabled = lcore_states[i].service_mask & (one << i); int32_t service_running = rte_services[i].runstate != RUNSTATE_STOPPED; int32_t only_core = rte_services[i].num_mapped_cores == 1; -- 2.7.4
[dpdk-dev] [PATCH v2] service: fix shifts to operate on 64 bit integers
This commit fixes shifts to an integer (1 << shift) which is assumed to be a 32-bit integer. In this case, the shift is variable and expected to be valid for 64-bit integers. Given that the expectation to work with 64 bits exists, we must ensure that the (1 << shift) one in that formula is actually a uin64_t. The UINT64_C() macro portably adds the correct suffix to a constant, informing the compiler that the value is to be assigned 64 bits. The issue would only manifests when there were greater than 31 services registered. Fixes: 21698354c832 ("service: introduce service cores concept") Signed-off-by: Harry van Haaren --- v2: - Use UINT64_C() instead of const uint64_t method (Gaetan) - Refactored to keep checkpatch happy with line-lenghts --- lib/librte_eal/common/rte_service.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c index e82b9ad..7efb76d 100644 --- a/lib/librte_eal/common/rte_service.c +++ b/lib/librte_eal/common/rte_service.c @@ -286,7 +286,7 @@ rte_service_unregister(struct rte_service_spec *spec) s->internal_flags &= ~(SERVICE_F_REGISTERED); for (i = 0; i < RTE_MAX_LCORE; i++) - lcore_states[i].service_mask &= ~(1 << service_id); + lcore_states[i].service_mask &= ~(UINT64_C(1) << service_id); memset(&rte_services[service_id], 0, sizeof(struct rte_service_spec_impl)); @@ -327,7 +327,7 @@ rte_service_runner_func(void *arg) for (i = 0; i < rte_service_count; i++) { struct rte_service_spec_impl *s = &rte_services[i]; if (s->runstate != RUNSTATE_RUNNING || - !(service_mask & (1 << i))) + !(service_mask & (UINT64_C(1) << i))) continue; /* check do we need cmpset, if MT safe or <= 1 core @@ -463,18 +463,19 @@ service_update(struct rte_service_spec *service, uint32_t lcore, if (!lcore_states[lcore].is_service_core) return -EINVAL; + uint64_t sid_mask = UINT64_C(1) << sid; if (set) { if (*set) { - lcore_states[lcore].service_mask |= (1 << sid); + lcore_states[lcore].service_mask |= sid_mask; rte_services[sid].num_mapped_cores++; } else { - lcore_states[lcore].service_mask &= ~(1 << sid); + lcore_states[lcore].service_mask &= ~(sid_mask); rte_services[sid].num_mapped_cores--; } } if (enabled) - *enabled = (lcore_states[lcore].service_mask & (1 << sid)); + *enabled = (lcore_states[lcore].service_mask & (sid_mask)); rte_smp_wmb(); @@ -607,7 +608,8 @@ rte_service_lcore_stop(uint32_t lcore) uint32_t i; for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { - int32_t enabled = lcore_states[i].service_mask & (1 << i); + int32_t enabled = + lcore_states[i].service_mask & (UINT64_C(1) << i); int32_t service_running = rte_services[i].runstate != RUNSTATE_STOPPED; int32_t only_core = rte_services[i].num_mapped_cores == 1; -- 2.7.4
[dpdk-dev] [PATCH] dev: update git fixline alias
The git fixline was updated in the dpdk.org repository to add the original author of the line being "fixed", using Cc: This patch updates the fixline here in the website. See patch for details: http://dpdk.org/dev/patchwork/patch/26860/ Signed-off-by: Harry van Haaren --- dev.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev.html b/dev.html index c0e64d0..34f24cf 100644 --- a/dev.html +++ b/dev.html @@ -110,7 +110,7 @@ smtpserverport = 465 smtpencryption = ssl [alias] - fixline = log -1 --abbrev=12 --format='Fixes: %h (\"%s\")' + fixline = log -1 --abbrev=12 --format='Fixes: %h (\"%s\")%nCc: %ae' Contribute by testing or reviewing patches Patches are applied in the git repository when it becomes clear that -- 2.7.4