get_cycles() is the historical access to a fine grained time source, but it
is a suboptimal choice for two reasons:

   - get_cycles() is not guaranteed to be supported and functional on all
     systems/platforms. If not supported or not functional it returns 0,
     which makes benchmarking moot.

   - get_cycles() returns the raw counter value of whatever the
     architecture platform provides. The original x86 Time Stamp Counter
     (TSC) was despite its name tied to the actual CPU core frequency.
     That's not longer the case. So the counter value is only meaningful
     when the CPU operates at the same frequency as the TSC or the value is
     adjusted to the actual CPU frequency. Other architectures and
     platforms provide similar disjunct counters via get_cycles(), so the
     result is operations per BOGO-cycles, which is not really meaningful.

Use ktime_get() instead which provides nanosecond timestamps with the
granularity of the underlying hardware counter, which is not different to
the variety of get_cycles() implementations.

This provides at least understandable metrics, i.e. operations/nanoseconds,
and is available on all platforms. As with get_cycles() the result might
have to be put into relation with the CPU operating frequency, but that's
not any different.

This is part of a larger effort to remove get_cycles() usage from
non-architecture code.

Signed-off-by: Thomas Gleixner <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Uladzislau Rezki <[email protected]>
Cc: [email protected]
---
 lib/interval_tree_test.c |   16 ++++++++--------
 lib/rbtree_test.c        |   46 +++++++++++++++++++++++-----------------------
 lib/test_vmalloc.c       |   10 +++++-----
 3 files changed, 36 insertions(+), 36 deletions(-)

--- a/lib/interval_tree_test.c
+++ b/lib/interval_tree_test.c
@@ -65,13 +65,13 @@ static void init(void)
 static int basic_check(void)
 {
        int i, j;
-       cycles_t time1, time2, time;
+       ktime_t time1, time2, time;
 
        printk(KERN_ALERT "interval tree insert/remove");
 
        init();
 
-       time1 = get_cycles();
+       time1 = ktime_get();
 
        for (i = 0; i < perf_loops; i++) {
                for (j = 0; j < nnodes; j++)
@@ -80,11 +80,11 @@ static int basic_check(void)
                        interval_tree_remove(nodes + j, &root);
        }
 
-       time2 = get_cycles();
+       time2 = ktime_get();
        time = time2 - time1;
 
        time = div_u64(time, perf_loops);
-       printk(" -> %llu cycles\n", (unsigned long long)time);
+       printk(" -> %llu nsecs\n", (unsigned long long)time);
 
        return 0;
 }
@@ -93,7 +93,7 @@ static int search_check(void)
 {
        int i, j;
        unsigned long results;
-       cycles_t time1, time2, time;
+       ktime_t time1, time2, time;
 
        printk(KERN_ALERT "interval tree search");
 
@@ -102,7 +102,7 @@ static int search_check(void)
        for (j = 0; j < nnodes; j++)
                interval_tree_insert(nodes + j, &root);
 
-       time1 = get_cycles();
+       time1 = ktime_get();
 
        results = 0;
        for (i = 0; i < search_loops; i++)
@@ -113,12 +113,12 @@ static int search_check(void)
                        results += search(&root, start, last);
                }
 
-       time2 = get_cycles();
+       time2 = ktime_get();
        time = time2 - time1;
 
        time = div_u64(time, search_loops);
        results = div_u64(results, search_loops);
-       printk(" -> %llu cycles (%lu results)\n",
+       printk(" -> %llu nsecs (%lu results)\n",
               (unsigned long long)time, results);
 
        for (j = 0; j < nnodes; j++)
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -243,14 +243,14 @@ static void check_augmented(int nr_nodes
 static int basic_check(void)
 {
        int i, j;
-       cycles_t time1, time2, time;
+       ktime_t time1, time2, time;
        struct rb_node *node;
 
        printk(KERN_ALERT "rbtree testing");
 
        init();
 
-       time1 = get_cycles();
+       time1 = ktime_get();
 
        for (i = 0; i < perf_loops; i++) {
                for (j = 0; j < nnodes; j++)
@@ -259,14 +259,14 @@ static int basic_check(void)
                        erase(nodes + j, &root);
        }
 
-       time2 = get_cycles();
+       time2 = ktime_get();
        time = time2 - time1;
 
        time = div_u64(time, perf_loops);
-       printk(" -> test 1 (latency of nnodes insert+delete): %llu cycles\n",
+       printk(" -> test 1 (latency of nnodes insert+delete): %llu nsecs\n",
               (unsigned long long)time);
 
-       time1 = get_cycles();
+       time1 = ktime_get();
 
        for (i = 0; i < perf_loops; i++) {
                for (j = 0; j < nnodes; j++)
@@ -275,52 +275,52 @@ static int basic_check(void)
                        erase_cached(nodes + j, &root);
        }
 
-       time2 = get_cycles();
+       time2 = ktime_get();
        time = time2 - time1;
 
        time = div_u64(time, perf_loops);
-       printk(" -> test 2 (latency of nnodes cached insert+delete): %llu 
cycles\n",
+       printk(" -> test 2 (latency of nnodes cached insert+delete): %llu 
nsecs\n",
               (unsigned long long)time);
 
        for (i = 0; i < nnodes; i++)
                insert(nodes + i, &root);
 
-       time1 = get_cycles();
+       time1 = ktime_get();
 
        for (i = 0; i < perf_loops; i++) {
                for (node = rb_first(&root.rb_root); node; node = rb_next(node))
                        ;
        }
 
-       time2 = get_cycles();
+       time2 = ktime_get();
        time = time2 - time1;
 
        time = div_u64(time, perf_loops);
-       printk(" -> test 3 (latency of inorder traversal): %llu cycles\n",
+       printk(" -> test 3 (latency of inorder traversal): %llu nsecs\n",
               (unsigned long long)time);
 
-       time1 = get_cycles();
+       time1 = ktime_get();
 
        for (i = 0; i < perf_loops; i++)
                node = rb_first(&root.rb_root);
 
-       time2 = get_cycles();
+       time2 = ktime_get();
        time = time2 - time1;
 
        time = div_u64(time, perf_loops);
        printk(" -> test 4 (latency to fetch first node)\n");
-       printk("        non-cached: %llu cycles\n", (unsigned long long)time);
+       printk("        non-cached: %llu nsecs\n", (unsigned long long)time);
 
-       time1 = get_cycles();
+       time1 = ktime_get();
 
        for (i = 0; i < perf_loops; i++)
                node = rb_first_cached(&root);
 
-       time2 = get_cycles();
+       time2 = ktime_get();
        time = time2 - time1;
 
        time = div_u64(time, perf_loops);
-       printk("        cached: %llu cycles\n", (unsigned long long)time);
+       printk("        cached: %llu nsecs\n", (unsigned long long)time);
 
        for (i = 0; i < nnodes; i++)
                erase(nodes + i, &root);
@@ -345,13 +345,13 @@ static int basic_check(void)
 static int augmented_check(void)
 {
        int i, j;
-       cycles_t time1, time2, time;
+       ktime_t time1, time2, time;
 
        printk(KERN_ALERT "augmented rbtree testing");
 
        init();
 
-       time1 = get_cycles();
+       time1 = ktime_get();
 
        for (i = 0; i < perf_loops; i++) {
                for (j = 0; j < nnodes; j++)
@@ -360,13 +360,13 @@ static int augmented_check(void)
                        erase_augmented(nodes + j, &root);
        }
 
-       time2 = get_cycles();
+       time2 = ktime_get();
        time = time2 - time1;
 
        time = div_u64(time, perf_loops);
-       printk(" -> test 1 (latency of nnodes insert+delete): %llu cycles\n", 
(unsigned long long)time);
+       printk(" -> test 1 (latency of nnodes insert+delete): %llu nsecs\n", 
(unsigned long long)time);
 
-       time1 = get_cycles();
+       time1 = ktime_get();
 
        for (i = 0; i < perf_loops; i++) {
                for (j = 0; j < nnodes; j++)
@@ -375,11 +375,11 @@ static int augmented_check(void)
                        erase_augmented_cached(nodes + j, &root);
        }
 
-       time2 = get_cycles();
+       time2 = ktime_get();
        time = time2 - time1;
 
        time = div_u64(time, perf_loops);
-       printk(" -> test 2 (latency of nnodes cached insert+delete): %llu 
cycles\n", (unsigned long long)time);
+       printk(" -> test 2 (latency of nnodes cached insert+delete): %llu 
nsecs\n", (unsigned long long)time);
 
        for (i = 0; i < check_loops; i++) {
                init();
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -454,8 +454,8 @@ static struct test_driver {
        struct task_struct *task;
        struct test_case_data data[ARRAY_SIZE(test_case_array)];
 
-       unsigned long start;
-       unsigned long stop;
+       ktime_t start;
+       ktime_t stop;
 } *tdriver;
 
 static void shuffle_array(int *arr, int n)
@@ -490,7 +490,7 @@ static int test_func(void *private)
         */
        synchronize_srcu(&prepare_for_test_srcu);
 
-       t->start = get_cycles();
+       t->start = ktime_get();
        for (i = 0; i < ARRAY_SIZE(test_case_array); i++) {
                index = random_array[i];
 
@@ -519,7 +519,7 @@ static int test_func(void *private)
 
                t->data[index].time = delta;
        }
-       t->stop = get_cycles();
+       t->stop = ktime_get();
        test_report_one_done();
 
        /*
@@ -619,7 +619,7 @@ static void do_concurrent_test(void)
                                t->data[j].time);
                }
 
-               pr_info("All test took worker%d=%lu cycles\n",
+               pr_info("All test took worker%d=%lld nsecs\n",
                        i, t->stop - t->start);
        }
 


Reply via email to