From: Stephen Hemminger <shemm...@brocade.com>

The old code was doing a floating point divide for each rte_dequeue()
which is very expensive. Change to using fixed point scaled math instead.
This improved performance from 5Gbit/sec to 10 Gbit/sec

Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
---
 lib/librte_sched/rte_sched.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index 55fbc14..3023457 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -102,6 +102,9 @@

 #define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX

+/* For cycles_per_byte calculation */
+#define RTE_SCHED_TIME_SHIFT                 20
+
 struct rte_sched_subport {
        /* Token bucket (TB) */
        uint64_t tb_time; /* time of last update */
@@ -239,7 +242,7 @@ struct rte_sched_port {
        uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles 
*/
        uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
        uint64_t time;                /* Current NIC TX time measured in bytes 
*/
-       double cycles_per_byte;       /* CPU cycles per byte */
+       uint32_t cycles_per_byte;       /* CPU cycles per byte (scaled) */

        /* Scheduling loop detection */
        uint32_t pipe_loop;
@@ -657,7 +660,9 @@ rte_sched_port_config(struct rte_sched_port_params *params)
        port->time_cpu_cycles = rte_get_tsc_cycles();
        port->time_cpu_bytes = 0;
        port->time = 0;
-       port->cycles_per_byte = ((double) rte_get_tsc_hz()) / ((double) 
params->rate);
+
+       port->cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
+               / params->rate;

        /* Scheduling loop detection */
        port->pipe_loop = RTE_SCHED_PIPE_INVALID;
@@ -2156,11 +2161,12 @@ rte_sched_port_time_resync(struct rte_sched_port *port)
 {
        uint64_t cycles = rte_get_tsc_cycles();
        uint64_t cycles_diff = cycles - port->time_cpu_cycles;
-       double bytes_diff = ((double) cycles_diff) / port->cycles_per_byte;
+       uint64_t bytes_diff = (cycles_diff << RTE_SCHED_TIME_SHIFT)
+               / port->cycles_per_byte;

        /* Advance port time */
        port->time_cpu_cycles = cycles;
-       port->time_cpu_bytes += (uint64_t) bytes_diff;
+       port->time_cpu_bytes += bytes_diff;
        if (port->time < port->time_cpu_bytes) {
                port->time = port->time_cpu_bytes;
        }
-- 
2.1.4

Reply via email to