Re: [ovs-dev] [PATCH 2/4] dpif-netdev: Least-loaded scheduling algorithm for rxqs

2021-06-29 Thread 0-day Robot
Bleep bloop.  Greetings , I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Unexpected sign-offs from developers who are not authors or co-authors 
or committers: Jan Scheurich , Rudra Surya Bhaskara 
Rao 
Lines checked: 465, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/4] dpif-netdev: Least-loaded scheduling algorithm for rxqs

2021-06-29 Thread anurag2k
From: Anurag Agarwal 

The current algorithm for balancing unpinned rxqs over non-isolated pmds
assigns the rxqs in descending load order to pmds in a "zig-zag" fashion
(e.g. A,B,C,C,B,A,A,B,...). This simple heuristic relies on the pmds
being initially "empty" and produces optimal results only if the rxq loads
are not too disparate (e.g. 100,10,10,10,10,10,10).

The first pre-requisite will no longer be fulfilled when the isolation
of pmds with pinned rxqs is made optional in a subsequent patch.

To prepare for this change we introduce a least-loaded scheduling
algorithm. During rxq scheduling, we keep track of the number of assigned
rxqs and their processing cycles per non-isolated pmd and maintain the
array of pmds per numa node sorted according to their load. We still
assign the rxqs in descending load order, but always assign to the least
loaded pmd. This deals with the case of pmds with a-prioy load due to
pinned rxs and, as additional benefit, handles disparate rxq loads better.

If rxq processing cycles are not used for rxq scheduling, the estimated
pmd load is based on the number of assigned rxqs. In this case, the
least-loaded scheduling algorithm effectively results in a round-robin
assignment of rxqs to pmds so that the legacy code for round-robin
assignment could be removed.

Signed-off-by: Anurag Agarwal 
Signed-off-by: Jan Scheurich 
Signed-off-by: Rudra Surya Bhaskara Rao 
---
 lib/dpif-netdev.c | 267 +-
 tests/pmd.at  |   4 +-
 2 files changed, 166 insertions(+), 105 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index fd192db..1c458b2 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -800,6 +800,10 @@ struct dp_netdev_pmd_thread {
 
 /* Next time when PMD should try RCU quiescing. */
 long long next_rcu_quiesce;
+
+/* Number and cycles of assigned rxqs during rxq scheduling. */
+int ll_n_rxq;
+uint64_t ll_cycles;
 };
 
 /* Interface to netdev-based datapath. */
@@ -4905,6 +4909,62 @@ port_reconfigure(struct dp_netdev_port *port)
 return 0;
 }
 
+/* Sort PMDs in ascending order of load. If processing cycles are not used for
+ * rxq scheduling, the ll_cycles are zero. In that case the count of
+ * assigned rxqs is taken as PMD load, resulting in round-robin scheduling. */
+static int
+compare_pmd_load(const void *a, const void *b)
+{
+struct dp_netdev_pmd_thread *pmda;
+struct dp_netdev_pmd_thread *pmdb;
+
+pmda = *(struct dp_netdev_pmd_thread **) a;
+pmdb = *(struct dp_netdev_pmd_thread **) b;
+
+if (pmda->ll_cycles != pmdb->ll_cycles) {
+return (pmda->ll_cycles > pmdb->ll_cycles) ? 1 : -1;
+} else if (pmda->ll_n_rxq != pmdb->ll_n_rxq) {
+return (pmda->ll_n_rxq > pmdb->ll_n_rxq) ? 1 : -1;
+} else {
+/* Cycles and rxqs are the same so tiebreak on core ID.
+ * Tiebreaking (as opposed to return 0) ensures consistent
+ * sort results across multiple OS's. */
+return (pmda->core_id > pmdb->core_id) ? 1 : -1;
+}
+}
+
+/* Sort Rx Queues in descending order of processing cycles they
+ * are consuming. */
+static int
+compare_rxq_cycles(const void *a, const void *b)
+{
+struct dp_netdev_rxq *qa;
+struct dp_netdev_rxq *qb;
+uint64_t cycles_qa, cycles_qb;
+
+qa = *(struct dp_netdev_rxq **) a;
+qb = *(struct dp_netdev_rxq **) b;
+
+cycles_qa = dp_netdev_rxq_get_cycles(qa, RXQ_CYCLES_PROC_HIST);
+cycles_qb = dp_netdev_rxq_get_cycles(qb, RXQ_CYCLES_PROC_HIST);
+
+if (cycles_qa != cycles_qb) {
+return (cycles_qa < cycles_qb) ? 1 : -1;
+} else {
+/* Cycles are the same so tiebreak on port/queue id.
+ * Tiebreaking (as opposed to return 0) ensures consistent
+ * sort results across multiple OS's. */
+uint32_t port_qa = odp_to_u32(qa->port->port_no);
+uint32_t port_qb = odp_to_u32(qb->port->port_no);
+if (port_qa != port_qb) {
+return (port_qa > port_qb) ? 1 : -1;
+} else {
+return (netdev_rxq_get_queue_id(qa->rx)
+- netdev_rxq_get_queue_id(qb->rx));
+}
+}
+}
+
 struct rr_numa_list {
 struct hmap numas;  /* Contains 'struct rr_numa' */
 };
@@ -4917,9 +4977,6 @@ struct rr_numa {
 /* Non isolated pmds on numa node 'numa_id' */
 struct dp_netdev_pmd_thread **pmds;
 int n_pmds;
-
-int cur_index;
-bool idx_inc;
 };
 
 static struct rr_numa *
@@ -4976,49 +5033,12 @@ rr_numa_list_populate(struct dp_netdev *dp, struct 
rr_numa_list *rr)
 numa->n_pmds++;
 numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa->pmds);
 numa->pmds[numa->n_pmds - 1] = pmd;
-/* At least one pmd so initialise curr_idx and idx_inc. */
-numa->cur_index = 0;
-numa->idx_inc = true;
 }
-}
-
-/*
- * Returns the next pmd from the numa node.
- *
- * If 'updown' is 'true' it will alternate between selecting the next pmd in
- *