I agree, that this solution is much more simple, but I don't like this race for
coverage_mutex.
Why this all done so?
I mean, why we should run coverage_clear every second? What if we change type of
thread_local unsigned int counter_##COUNTER from unsigned int to unsigned long
long
and will call coverage_clear() in coverage_read() ? (May be possible with my
patch
applied) Also, we may collect stats for all threads separately.
On 12.08.2015 08:01, Alex Wang wrote:
> Just trying share an alternative, could we still do the coverage clear inside
> 'pmd_thread_main()'. However,
> instead of contending for the 'coverage_mutex', use 'ovs_mutex_trylock()'.
> Since pmd thread runs in infinite
> loop, it can constantly try grabbing the lock and will eventually acquire it.
>
> Something like this:
>
> diff --git a/lib/coverage.c b/lib/coverage.c
> index 6121956..9f23f99 100644
> --- a/lib/coverage.c
> +++ b/lib/coverage.c
> @@ -272,6 +272,37 @@ coverage_clear(void)
> }
> }
>
> +/* Runs approximately every COVERAGE_CLEAR_INTERVAL amount of time to
> + * synchronize per-thread counters with global counters. Exits immediately
> + * without updating the 'thread_time' if cannot acquire the 'coverage_mutex'.
> + * This is to avoid lock contention for some performance-critical threads.
> + */
> +void
> +coverage_try_clear(void)
> +{
> + long long int now, *thread_time;
> +
> + now = time_msec();
> + thread_time = coverage_clear_time_get();
> +
> + /* Initialize the coverage_clear_time. */
> + if (*thread_time == LLONG_MIN) {
> + *thread_time = now + COVERAGE_CLEAR_INTERVAL;
> + }
> +
> + if (now >= *thread_time) {
> + if (ovs_mutex_trylock(&coverage_mutex)) {
> + size_t i;
> + for (i = 0; i < n_coverage_counters; i++) {
> + struct coverage_counter *c = coverage_counters[i];
> + c->total += c->count();
> + }
> + ovs_mutex_unlock(&coverage_mutex);
> + *thread_time = now + COVERAGE_CLEAR_INTERVAL;
> + }
> + }
> +}
> +
> /* Runs approximately every COVERAGE_RUN_INTERVAL amount of time to update
> the
> * coverage counters' 'min' and 'hr' array. 'min' array is for cumulating
> * per second counts into per minute count. 'hr' array is for cumulating per
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index c144352..96884ec 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -2695,6 +2695,7 @@ reload:
>
> lc = 0;
>
> + coverage_try_clear();
> emc_cache_slow_sweep(&pmd->flow_cache);
> ovsrcu_quiesce();
>
>
>
> On Tue, Aug 11, 2015 at 6:24 PM, Alex Wang <[email protected]
> <mailto:[email protected]>> wrote:
>
> Hi IIya,
>
> Thx a lot for writing up this fix,
>
> The reason that you did not see the pmd related coverage counter stats
> (in "ovs-appctl coverage/show") is that we do not call coverage_clear()
> anywhere in pmd_thread_main(). This is in that the coverage_clear()
> requires holding the global mutex in the coverage module which can
> be super expensive for dpdk packet processing.
>
> Want to admit that your fix is more complicated than I expect.
> Especially,
> changing the global (extern) struct 'counter_##COUNTER' to per-thread
> and expanding the array size by number of threads times can also be
> expensive for main thread...
>
> Do you think there could be a simpler way to achieve this? I'd like to
> think
> about other possible solutions more,
>
> Thanks,
> Alex Wang,
>
>
> On Mon, Aug 10, 2015 at 7:45 AM, Ilya Maximets <[email protected]
> <mailto:[email protected]>> wrote:
>
> Currently coverage_counter_register() is called only from
> constructor functions at program initialization time by
> main thread. Coverage counter values are static and
> thread local.
>
> This means, that all COVERAGE_INC() calls from pmd threads
> leads to increment of thread local variables of that threads
> that can't be accessed by anyone else. And they are not used
> in final coverage accounting.
>
> Fix that by adding ability to add/remove coverage counters
> in runtime for newly created threads and counting coverage
> using counters from all threads.
>
> Signed-off-by: Ilya Maximets <[email protected]
> <mailto:[email protected]>>
> ---
> lib/coverage.c | 83
> +++++++++++++++++++++++++++++++++++++++++++++----------
> lib/coverage.h | 27 ++++++++++++------
> lib/dpif-netdev.c | 4 ++-
> 3 files changed, 91 insertions(+), 23 deletions(-)
>
> diff --git a/lib/coverage.c b/lib/coverage.c
> index 6121956..2ae9e7a 100644
> --- a/lib/coverage.c
> +++ b/lib/coverage.c
> @@ -30,14 +30,22 @@ VLOG_DEFINE_THIS_MODULE(coverage);
>
> /* The coverage counters. */
> static struct coverage_counter **coverage_counters = NULL;
> -static size_t n_coverage_counters = 0;
> static size_t allocated_coverage_counters = 0;
>
> +static void (**coverage_initializers)(void) = NULL;
> +static size_t n_coverage_initializers = 0;
> +static size_t allocated_coverage_initializers = 0;
> +
> static struct ovs_mutex coverage_mutex = OVS_MUTEX_INITIALIZER;
>
> DEFINE_STATIC_PER_THREAD_DATA(long long int, coverage_clear_time,
> LLONG_MIN);
> static long long int coverage_run_time = LLONG_MIN;
>
> +volatile unsigned int *coverage_val[2048] = {NULL};
> +volatile size_t n_coverage_counters = 0;
> +
> +DEFINE_STATIC_PER_THREAD_DATA(unsigned int, coverage_thread_start,
> 0);
> +
> /* Index counter used to compute the moving average array's index. */
> static unsigned int idx_count = 0;
>
> @@ -54,7 +62,44 @@ coverage_counter_register(struct coverage_counter*
> counter)
> &allocated_coverage_counters,
> sizeof(struct
> coverage_counter*));
> }
> - coverage_counters[n_coverage_counters++] = counter;
> + coverage_counters[n_coverage_counters] = counter;
> + coverage_val[n_coverage_counters++] = NULL;
> +}
> +
> +void
> +coverage_initializer_register(void (*f)(void))
> +{
> + if (n_coverage_initializers >= allocated_coverage_initializers) {
> + coverage_initializers = x2nrealloc(coverage_initializers,
> +
> &allocated_coverage_initializers,
> + sizeof
> *coverage_initializers);
> + }
> + coverage_initializers[n_coverage_initializers++] = f;
> +}
> +
> +void
> +coverage_register_new_thread(void)
> +{
> + int i;
> + ovs_mutex_lock(&coverage_mutex);
> + *coverage_thread_start_get() = n_coverage_counters;
> + for (i = 0; i < n_coverage_initializers; i++)
> + coverage_initializers[i]();
> + ovs_mutex_unlock(&coverage_mutex);
> +}
> +
> +void
> +coverage_unregister_thread(void)
> +{
> + int i;
> + ovs_mutex_lock(&coverage_mutex);
> + for (i = *coverage_thread_start_get() + n_coverage_initializers;
> + i < n_coverage_counters; i++) {
> + coverage_counters[i - n_coverage_initializers] =
> coverage_counters[i];
> + coverage_val[i - n_coverage_initializers] = coverage_val[i];
> + }
> + n_coverage_counters -= n_coverage_initializers;
> + ovs_mutex_unlock(&coverage_mutex);
> }
>
> static void
> @@ -102,13 +147,12 @@ coverage_hash(void)
> uint32_t hash = 0;
> int n_groups, i;
>
> + ovs_mutex_lock(&coverage_mutex);
> /* Sort coverage counters into groups with equal totals. */
> c = xmalloc(n_coverage_counters * sizeof *c);
> - ovs_mutex_lock(&coverage_mutex);
> for (i = 0; i < n_coverage_counters; i++) {
> c[i] = coverage_counters[i];
> }
> - ovs_mutex_unlock(&coverage_mutex);
> qsort(c, n_coverage_counters, sizeof *c,
> compare_coverage_counters);
>
> /* Hash the names in each group along with the rank. */
> @@ -130,6 +174,7 @@ coverage_hash(void)
> i = j;
> }
>
> + ovs_mutex_unlock(&coverage_mutex);
> free(c);
>
> return hash_int(n_groups, hash);
> @@ -200,9 +245,11 @@ coverage_read(struct svec *lines)
> {
> struct coverage_counter **c = coverage_counters;
> unsigned long long int *totals;
> + unsigned long long int *total_sec, *total_min, *total_hr;
> size_t n_never_hit;
> uint32_t hash;
> size_t i;
> + int initial_n = n_coverage_initializers;
>
> hash = coverage_hash();
>
> @@ -213,14 +260,20 @@ coverage_read(struct svec *lines)
> "hash=%08"PRIx32":",
> COVERAGE_RUN_INTERVAL/1000, hash));
>
> - totals = xmalloc(n_coverage_counters * sizeof *totals);
> ovs_mutex_lock(&coverage_mutex);
> + totals = xcalloc(n_coverage_counters, sizeof *totals);
> + total_sec = xcalloc(n_coverage_counters, sizeof *total_sec);
> + total_min = xcalloc(n_coverage_counters, sizeof *total_min);
> + total_hr = xcalloc(n_coverage_counters, sizeof *total_hr);
> +
> for (i = 0; i < n_coverage_counters; i++) {
> - totals[i] = c[i]->total;
> + totals[i % initial_n] += c[i]->total;
> + total_sec[i % initial_n] += c[i]->min[(idx_count - 1) %
> MIN_AVG_LEN];
> + total_min[i % initial_n] += coverage_array_sum(c[i]->min,
> MIN_AVG_LEN);
> + total_hr[i % initial_n] += coverage_array_sum(c[i]->hr,
> HR_AVG_LEN);
> }
> - ovs_mutex_unlock(&coverage_mutex);
>
> - for (i = 0; i < n_coverage_counters; i++) {
> + for (i = 0; i < initial_n; i++) {
> if (totals[i]) {
> /* Shows the averaged per-second rates for the last
> * COVERAGE_RUN_INTERVAL interval, the last minute and
> @@ -229,18 +282,22 @@ coverage_read(struct svec *lines)
> xasprintf("%-24s %5.1f/sec %9.3f/sec "
> "%13.4f/sec total: %llu",
> c[i]->name,
> - (c[i]->min[(idx_count - 1) % MIN_AVG_LEN]
> + (total_sec[i]
> * 1000.0 / COVERAGE_RUN_INTERVAL),
> - coverage_array_sum(c[i]->min, MIN_AVG_LEN)
> / 60.0,
> - coverage_array_sum(c[i]->hr, HR_AVG_LEN)
> / 3600.0,
> + total_min[i] / 60.0,
> + total_hr[i] / 3600.0,
> totals[i]));
> } else {
> n_never_hit++;
> }
> }
> + ovs_mutex_unlock(&coverage_mutex);
>
> svec_add_nocopy(lines, xasprintf("%"PRIuSIZE" events never hit",
> n_never_hit));
> free(totals);
> + free(total_sec);
> + free(total_min);
> + free(total_hr);
> }
>
> /* Runs approximately every COVERAGE_CLEAR_INTERVAL amount of time to
> @@ -265,7 +322,7 @@ coverage_clear(void)
> ovs_mutex_lock(&coverage_mutex);
> for (i = 0; i < n_coverage_counters; i++) {
> struct coverage_counter *c = coverage_counters[i];
> - c->total += c->count();
> + c->total += c->count(i);
> }
> ovs_mutex_unlock(&coverage_mutex);
> *thread_time = now + COVERAGE_CLEAR_INTERVAL;
> @@ -340,10 +397,8 @@ coverage_array_sum(const unsigned int *arr,
> const unsigned int len)
> unsigned int sum = 0;
> size_t i;
>
> - ovs_mutex_lock(&coverage_mutex);
> for (i = 0; i < len; i++) {
> sum += arr[i];
> }
> - ovs_mutex_unlock(&coverage_mutex);
> return sum;
> }
> diff --git a/lib/coverage.h b/lib/coverage.h
> index 832c433..1f5ae17 100644
> --- a/lib/coverage.h
> +++ b/lib/coverage.h
> @@ -45,9 +45,9 @@ BUILD_ASSERT_DECL(COVERAGE_RUN_INTERVAL %
> COVERAGE_CLEAR_INTERVAL == 0);
>
> /* A coverage counter. */
> struct coverage_counter {
> - const char *const name; /* Textual name. */
> - unsigned int (*const count)(void); /* Gets, zeros this thread's
> count. */
> - unsigned long long int total; /* Total count. */
> + const char *const name; /* Textual name. */
> + unsigned int (*const count)(unsigned int); /* Gets, zeros this
> thread's count. */
> + unsigned long long int total; /* Total count. */
> unsigned long long int last_total;
> /* The moving average arrays. */
> unsigned int min[MIN_AVG_LEN];
> @@ -55,15 +55,20 @@ struct coverage_counter {
> };
>
> void coverage_counter_register(struct coverage_counter*);
> +void coverage_initializer_register(void (*f)(void));
> +void coverage_register_new_thread(void);
> +void coverage_unregister_thread(void);
>
> +extern volatile unsigned int *coverage_val[2048];
> +extern volatile size_t n_coverage_counters;
> /* Defines COUNTER. There must be exactly one such definition at
> file scope
> * within a program. */
> #define COVERAGE_DEFINE(COUNTER)
> \
> DEFINE_STATIC_PER_THREAD_DATA(unsigned int,
> \
> counter_##COUNTER, 0);
> \
> - static unsigned int COUNTER##_count(void)
> \
> + static unsigned int COUNTER##_count(unsigned int i)
> \
> {
> \
> - unsigned int *countp = counter_##COUNTER##_get();
> \
> + volatile unsigned int *countp = coverage_val[i];
> \
> unsigned int count = *countp;
> \
> *countp = 0;
> \
> return count;
> \
> @@ -72,11 +77,17 @@ void coverage_counter_register(struct
> coverage_counter*);
> {
> \
> *counter_##COUNTER##_get() += n;
> \
> }
> \
> - extern struct coverage_counter counter_##COUNTER;
> \
> - struct coverage_counter counter_##COUNTER
> \
> + extern thread_local struct coverage_counter
> counter_##COUNTER; \
> + thread_local struct coverage_counter counter_##COUNTER
> \
> = { #COUNTER, COUNTER##_count, 0, 0, {0}, {0} };
> \
> - OVS_CONSTRUCTOR(COUNTER##_init) {
> \
> + static void COUNTER##_initializer(void) {
> \
> coverage_counter_register(&counter_##COUNTER);
> \
> + coverage_val[n_coverage_counters - 1] =
> \
> + counter_##COUNTER##_get();
> \
> + }
> \
> + OVS_CONSTRUCTOR(COUNTER##_init) {
> \
> + coverage_initializer_register(&COUNTER##_initializer);
> \
> + COUNTER##_initializer();
> \
> }
>
> /* Adds 1 to COUNTER. */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index c144352..a92dd6a 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -34,6 +34,7 @@
> #include "bitmap.h"
> #include "cmap.h"
> #include "csum.h"
> +#include "coverage.h"
> #include "dp-packet.h"
> #include "dpif.h"
> #include "dpif-provider.h"
> @@ -2666,7 +2667,7 @@ pmd_thread_main(void *f_)
>
> poll_cnt = 0;
> poll_list = NULL;
> -
> + coverage_register_new_thread();
> /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
> ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
> pmd_thread_setaffinity_cpu(pmd->core_id);
> @@ -2717,6 +2718,7 @@ reload:
> }
>
> dp_netdev_pmd_reload_done(pmd);
> + coverage_unregister_thread();
>
> free(poll_list);
> return NULL;
> --
> 2.1.4
>
>
>
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev