A package wide event can return a valid read even if it has not run in a
specific cpu, this does not fit well with the assumption that run == 0
is equivalent to a <not counted>.

To fix the problem, this patch defines special error values for val,
run and ena (~0ULL), and use them to signal read errors, allowing run == 0
to be a valid value for package events. A new value, (NA), is output on
read error and when event has not been enabled (timed enabled == 0).

Finally, this patch revamps calculation of deltas and scaling for snapshot
events, removing the calculation of deltas for time running and enabled in
snapshot event, as should be.

Reviewed-by: Stephane Eranian <eran...@google.com>
Signed-off-by: David Carrillo-Cisneros <davi...@google.com>
---
 tools/perf/builtin-stat.c | 36 +++++++++++++++++++++++-----------
 tools/perf/util/counts.h  | 19 ++++++++++++++++++
 tools/perf/util/evsel.c   | 49 ++++++++++++++++++++++++++++++++++++-----------
 tools/perf/util/evsel.h   |  8 ++++++--
 tools/perf/util/stat.c    | 35 +++++++++++----------------------
 5 files changed, 99 insertions(+), 48 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c3c4b49..79043a3 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -311,10 +311,8 @@ static int read_counter(struct perf_evsel *counter)
 
                        count = perf_counts(counter->counts, cpu, thread);
                        if (perf_evsel__read(counter, cpu, thread, count)) {
-                               counter->counts->scaled = -1;
-                               perf_counts(counter->counts, cpu, thread)->ena 
= 0;
-                               perf_counts(counter->counts, cpu, thread)->run 
= 0;
-                               return -1;
+                               /* do not write stat for failed reads. */
+                               continue;
                        }
 
                        if (STAT_RECORD) {
@@ -725,12 +723,16 @@ static int run_perf_stat(int argc, const char **argv)
 
 static void print_running(u64 run, u64 ena)
 {
+       bool is_na = run == PERF_COUNTS_NA || ena == PERF_COUNTS_NA || !ena;
+
        if (csv_output) {
-               fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
-                                       csv_sep,
-                                       run,
-                                       csv_sep,
-                                       ena ? 100.0 * run / ena : 100.0);
+               if (is_na)
+                       fprintf(stat_config.output, "%sNA%sNA", csv_sep, 
csv_sep);
+               else
+                       fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
+                               csv_sep, run, csv_sep, 100.0 * run / ena);
+       } else if (is_na) {
+               fprintf(stat_config.output, "  (NA)");
        } else if (run != ena) {
                fprintf(stat_config.output, "  (%.2f%%)", 100.0 * run / ena);
        }
@@ -1103,7 +1105,7 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
                if (counter->cgrp)
                        os.nfields++;
        }
-       if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+       if (run == PERF_COUNTS_NA || ena == PERF_COUNTS_NA || 
counter->counts->scaled == -1) {
                if (metric_only) {
                        pm(&os, NULL, "", "", 0);
                        return;
@@ -1209,12 +1211,17 @@ static void print_aggr(char *prefix)
                id = aggr_map->map[s];
                first = true;
                evlist__for_each_entry(evsel_list, counter) {
+                       bool all_nan = true;
                        val = ena = run = 0;
                        nr = 0;
                        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); 
cpu++) {
                                s2 = aggr_get_id(perf_evsel__cpus(counter), 
cpu);
                                if (s2 != id)
                                        continue;
+                               /* skip NA reads. */
+                               if 
(perf_counts_values__is_na(perf_counts(counter->counts, cpu, 0)))
+                                       continue;
+                               all_nan = false;
                                val += perf_counts(counter->counts, cpu, 
0)->val;
                                ena += perf_counts(counter->counts, cpu, 
0)->ena;
                                run += perf_counts(counter->counts, cpu, 
0)->run;
@@ -1228,6 +1235,10 @@ static void print_aggr(char *prefix)
                                fprintf(output, "%s", prefix);
 
                        uval = val * counter->scale;
+                       if (all_nan) {
+                               run = PERF_COUNTS_NA;
+                               ena = PERF_COUNTS_NA;
+                       }
                        printout(id, nr, counter, uval, prefix, run, ena, 1.0);
                        if (!metric_only)
                                fputc('\n', output);
@@ -1306,7 +1317,10 @@ static void print_counter(struct perf_evsel *counter, 
char *prefix)
                if (prefix)
                        fprintf(output, "%s", prefix);
 
-               uval = val * counter->scale;
+               if (val != PERF_COUNTS_NA)
+                       uval = val * counter->scale;
+               else
+                       uval = NAN;
                printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
 
                fputc('\n', output);
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 34d8baa..b65e97a 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -3,6 +3,9 @@
 
 #include "xyarray.h"
 
+/* Not Available (NA) value. Any operation with a NA equals a NA. */
+#define PERF_COUNTS_NA ((u64)~0ULL)
+
 struct perf_counts_values {
        union {
                struct {
@@ -14,6 +17,22 @@ struct perf_counts_values {
        };
 };
 
+static inline void
+perf_counts_values__make_na(struct perf_counts_values *values)
+{
+       values->val = PERF_COUNTS_NA;
+       values->ena = PERF_COUNTS_NA;
+       values->run = PERF_COUNTS_NA;
+}
+
+static inline bool
+perf_counts_values__is_na(struct perf_counts_values *values)
+{
+       return values->val == PERF_COUNTS_NA ||
+              values->ena == PERF_COUNTS_NA ||
+              values->run == PERF_COUNTS_NA;
+}
+
 struct perf_counts {
        s8                        scaled;
        struct perf_counts_values aggr;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d54efb5..fa0ba96 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1180,6 +1180,9 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, 
int cpu, int thread,
        if (!evsel->prev_raw_counts)
                return;
 
+       if (perf_counts_values__is_na(count))
+               return;
+
        if (cpu == -1) {
                tmp = evsel->prev_raw_counts->aggr;
                evsel->prev_raw_counts->aggr = *count;
@@ -1188,26 +1191,43 @@ void perf_evsel__compute_deltas(struct perf_evsel 
*evsel, int cpu, int thread,
                *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
        }
 
-       count->val = count->val - tmp.val;
+       /* Snapshot events do not calculate deltas for count values. */
+       if (!evsel->snapshot)
+               count->val = count->val - tmp.val;
        count->ena = count->ena - tmp.ena;
        count->run = count->run - tmp.run;
 }
 
 void perf_counts_values__scale(struct perf_counts_values *count,
-                              bool scale, s8 *pscaled)
+                              bool scale, bool per_pkg, bool snapshot, s8 
*pscaled)
 {
        s8 scaled = 0;
 
+       if (perf_counts_values__is_na(count)) {
+               if (pscaled)
+                       *pscaled = -1;
+               return;
+       }
+
        if (scale) {
-               if (count->run == 0) {
+               /*
+                * per-pkg events can have run == 0 in a CPU and still be
+                * valid.
+                */
+               if (count->run == 0 && !per_pkg) {
                        scaled = -1;
                        count->val = 0;
                } else if (count->run < count->ena) {
                        scaled = 1;
-                       count->val = (u64)((double) count->val * count->ena / 
count->run + 0.5);
+                       /* Snapshot events do not scale counts values. */
+                       if (!snapshot && count->run)
+                               count->val = (u64)((double) count->val * 
count->ena /
+                                            count->run + 0.5);
                }
-       } else
-               count->ena = count->run = 0;
+
+       } else {
+               count->run = count->ena;
+       }
 
        if (pscaled)
                *pscaled = scaled;
@@ -1221,8 +1241,10 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, 
int thread,
        if (FD(evsel, cpu, thread) < 0)
                return -EINVAL;
 
-       if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) <= 0)
+       if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) <= 0) {
+               perf_counts_values__make_na(count);
                return -errno;
+       }
 
        return 0;
 }
@@ -1230,6 +1252,7 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, 
int thread,
 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
                              int cpu, int thread, bool scale)
 {
+       int ret = 0;
        struct perf_counts_values count;
        size_t nv = scale ? 3 : 1;
 
@@ -1239,13 +1262,17 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
        if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, 
thread + 1) < 0)
                return -ENOMEM;
 
-       if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
-               return -errno;
+       if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) {
+               perf_counts_values__make_na(&count);
+               ret = -errno;
+               goto exit;
+       }
 
        perf_evsel__compute_deltas(evsel, cpu, thread, &count);
-       perf_counts_values__scale(&count, scale, NULL);
+       perf_counts_values__scale(&count, scale, evsel->per_pkg, 
evsel->snapshot, NULL);
+exit:
        *perf_counts(evsel->counts, cpu, thread) = count;
-       return 0;
+       return ret;
 }
 
 static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b1503b0..facb6494 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -80,6 +80,10 @@ struct perf_evsel_config_term {
  * @is_pos: the position (counting backwards) of the event id (PERF_SAMPLE_ID 
or
  *          PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if sample_id_all
  *          is used there is an id sample appended to non-sample events
+ * @snapshot: an event that whose raw value cannot be extrapolated based on
+ *         the ratio of running/enabled time.
+ * @per_pkg: an event that runs package wide. All cores in same package will
+ *         read the same value, even if running time == 0.
  * @priv:   And what is in its containing unnamed union are tool specific
  */
 struct perf_evsel {
@@ -150,8 +154,8 @@ static inline int perf_evsel__nr_cpus(struct perf_evsel 
*evsel)
        return perf_evsel__cpus(evsel)->nr;
 }
 
-void perf_counts_values__scale(struct perf_counts_values *count,
-                              bool scale, s8 *pscaled);
+void perf_counts_values__scale(struct perf_counts_values *count, bool scale,
+                              bool per_pkg, bool snapshot, s8 *pscaled);
 
 void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
                                struct perf_counts_values *count);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 39345c2d..514b953 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -202,7 +202,7 @@ static void zero_per_pkg(struct perf_evsel *counter)
 }
 
 static int check_per_pkg(struct perf_evsel *counter,
-                        struct perf_counts_values *vals, int cpu, bool *skip)
+                        int cpu, bool *skip)
 {
        unsigned long *mask = counter->per_pkg_mask;
        struct cpu_map *cpus = perf_evsel__cpus(counter);
@@ -224,17 +224,6 @@ static int check_per_pkg(struct perf_evsel *counter,
                counter->per_pkg_mask = mask;
        }
 
-       /*
-        * we do not consider an event that has not run as a good
-        * instance to mark a package as used (skip=1). Otherwise
-        * we may run into a situation where the first CPU in a package
-        * is not running anything, yet the second is, and this function
-        * would mark the package as used after the first CPU and would
-        * not read the values from the second CPU.
-        */
-       if (!(vals->run && vals->ena))
-               return 0;
-
        s = cpu_map__get_socket(cpus, cpu, NULL);
        if (s < 0)
                return -1;
@@ -249,30 +238,27 @@ process_counter_values(struct perf_stat_config *config, 
struct perf_evsel *evsel
                       struct perf_counts_values *count)
 {
        struct perf_counts_values *aggr = &evsel->counts->aggr;
-       static struct perf_counts_values zero;
        bool skip = false;
 
-       if (check_per_pkg(evsel, count, cpu, &skip)) {
+       if (check_per_pkg(evsel, cpu, &skip)) {
                pr_err("failed to read per-pkg counter\n");
                return -1;
        }
 
-       if (skip)
-               count = &zero;
-
        switch (config->aggr_mode) {
        case AGGR_THREAD:
        case AGGR_CORE:
        case AGGR_SOCKET:
        case AGGR_NONE:
-               if (!evsel->snapshot)
-                       perf_evsel__compute_deltas(evsel, cpu, thread, count);
-               perf_counts_values__scale(count, config->scale, NULL);
+               perf_evsel__compute_deltas(evsel, cpu, thread, count);
+               perf_counts_values__scale(count, config->scale,
+                                         evsel->per_pkg, evsel->snapshot, 
NULL);
                if (config->aggr_mode == AGGR_NONE)
                        perf_stat__update_shadow_stats(evsel, count->values, 
cpu);
                break;
        case AGGR_GLOBAL:
-               aggr->val += count->val;
+               if (!skip)
+                       aggr->val += count->val;
                if (config->scale) {
                        aggr->ena += count->ena;
                        aggr->run += count->run;
@@ -337,9 +323,10 @@ int perf_stat_process_counter(struct perf_stat_config 
*config,
        if (config->aggr_mode != AGGR_GLOBAL)
                return 0;
 
-       if (!counter->snapshot)
-               perf_evsel__compute_deltas(counter, -1, -1, aggr);
-       perf_counts_values__scale(aggr, config->scale, 
&counter->counts->scaled);
+       perf_evsel__compute_deltas(counter, -1, -1, aggr);
+       perf_counts_values__scale(aggr, config->scale,
+                                 counter->per_pkg, counter->snapshot,
+                                 &counter->counts->scaled);
 
        for (i = 0; i < 3; i++)
                update_stats(&ps->res_stats[i], count[i]);
-- 
2.8.0.rc3.226.g39d4020

Reply via email to