Just trying share an alternative, could we still do the coverage clear
inside 'pmd_thread_main()'. However,
instead of contending for the 'coverage_mutex', use 'ovs_mutex_trylock()'.
Since pmd thread runs in infinite
loop, it can constantly try grabbing the lock and will eventually acquire
it.
Something like this:
diff --git a/lib/coverage.c b/lib/coverage.c
index 6121956..9f23f99 100644
--- a/lib/coverage.c
+++ b/lib/coverage.c
@@ -272,6 +272,37 @@ coverage_clear(void)
}
}
+/* Runs approximately every COVERAGE_CLEAR_INTERVAL amount of time to
+ * synchronize per-thread counters with global counters. Exits immediately
+ * without updating the 'thread_time' if cannot acquire the
'coverage_mutex'.
+ * This is to avoid lock contention for some performance-critical threads.
+ */
+void
+coverage_try_clear(void)
+{
+ long long int now, *thread_time;
+
+ now = time_msec();
+ thread_time = coverage_clear_time_get();
+
+ /* Initialize the coverage_clear_time. */
+ if (*thread_time == LLONG_MIN) {
+ *thread_time = now + COVERAGE_CLEAR_INTERVAL;
+ }
+
+ if (now >= *thread_time) {
+ if (ovs_mutex_trylock(&coverage_mutex)) {
+ size_t i;
+ for (i = 0; i < n_coverage_counters; i++) {
+ struct coverage_counter *c = coverage_counters[i];
+ c->total += c->count();
+ }
+ ovs_mutex_unlock(&coverage_mutex);
+ *thread_time = now + COVERAGE_CLEAR_INTERVAL;
+ }
+ }
+}
+
/* Runs approximately every COVERAGE_RUN_INTERVAL amount of time to update
the
* coverage counters' 'min' and 'hr' array. 'min' array is for cumulating
* per second counts into per minute count. 'hr' array is for cumulating
per
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c144352..96884ec 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2695,6 +2695,7 @@ reload:
lc = 0;
+ coverage_try_clear();
emc_cache_slow_sweep(&pmd->flow_cache);
ovsrcu_quiesce();
On Tue, Aug 11, 2015 at 6:24 PM, Alex Wang <[email protected]> wrote:
> Hi IIya,
>
> Thx a lot for writing up this fix,
>
> The reason that you did not see the pmd related coverage counter stats
> (in "ovs-appctl coverage/show") is that we do not call coverage_clear()
> anywhere in pmd_thread_main(). This is in that the coverage_clear()
> requires holding the global mutex in the coverage module which can
> be super expensive for dpdk packet processing.
>
> Want to admit that your fix is more complicated than I expect. Especially,
> changing the global (extern) struct 'counter_##COUNTER' to per-thread
> and expanding the array size by number of threads times can also be
> expensive for main thread...
>
> Do you think there could be a simpler way to achieve this? I'd like to
> think
> about other possible solutions more,
>
> Thanks,
> Alex Wang,
>
>
> On Mon, Aug 10, 2015 at 7:45 AM, Ilya Maximets <[email protected]>
> wrote:
>
>> Currently coverage_counter_register() is called only from
>> constructor functions at program initialization time by
>> main thread. Coverage counter values are static and
>> thread local.
>>
>> This means, that all COVERAGE_INC() calls from pmd threads
>> leads to increment of thread local variables of that threads
>> that can't be accessed by anyone else. And they are not used
>> in final coverage accounting.
>>
>> Fix that by adding ability to add/remove coverage counters
>> in runtime for newly created threads and counting coverage
>> using counters from all threads.
>>
>> Signed-off-by: Ilya Maximets <[email protected]>
>> ---
>> lib/coverage.c | 83
>> +++++++++++++++++++++++++++++++++++++++++++++----------
>> lib/coverage.h | 27 ++++++++++++------
>> lib/dpif-netdev.c | 4 ++-
>> 3 files changed, 91 insertions(+), 23 deletions(-)
>>
>> diff --git a/lib/coverage.c b/lib/coverage.c
>> index 6121956..2ae9e7a 100644
>> --- a/lib/coverage.c
>> +++ b/lib/coverage.c
>> @@ -30,14 +30,22 @@ VLOG_DEFINE_THIS_MODULE(coverage);
>>
>> /* The coverage counters. */
>> static struct coverage_counter **coverage_counters = NULL;
>> -static size_t n_coverage_counters = 0;
>> static size_t allocated_coverage_counters = 0;
>>
>> +static void (**coverage_initializers)(void) = NULL;
>> +static size_t n_coverage_initializers = 0;
>> +static size_t allocated_coverage_initializers = 0;
>> +
>> static struct ovs_mutex coverage_mutex = OVS_MUTEX_INITIALIZER;
>>
>> DEFINE_STATIC_PER_THREAD_DATA(long long int, coverage_clear_time,
>> LLONG_MIN);
>> static long long int coverage_run_time = LLONG_MIN;
>>
>> +volatile unsigned int *coverage_val[2048] = {NULL};
>> +volatile size_t n_coverage_counters = 0;
>> +
>> +DEFINE_STATIC_PER_THREAD_DATA(unsigned int, coverage_thread_start, 0);
>> +
>> /* Index counter used to compute the moving average array's index. */
>> static unsigned int idx_count = 0;
>>
>> @@ -54,7 +62,44 @@ coverage_counter_register(struct coverage_counter*
>> counter)
>> &allocated_coverage_counters,
>> sizeof(struct coverage_counter*));
>> }
>> - coverage_counters[n_coverage_counters++] = counter;
>> + coverage_counters[n_coverage_counters] = counter;
>> + coverage_val[n_coverage_counters++] = NULL;
>> +}
>> +
>> +void
>> +coverage_initializer_register(void (*f)(void))
>> +{
>> + if (n_coverage_initializers >= allocated_coverage_initializers) {
>> + coverage_initializers = x2nrealloc(coverage_initializers,
>> + &allocated_coverage_initializers,
>> + sizeof *coverage_initializers);
>> + }
>> + coverage_initializers[n_coverage_initializers++] = f;
>> +}
>> +
>> +void
>> +coverage_register_new_thread(void)
>> +{
>> + int i;
>> + ovs_mutex_lock(&coverage_mutex);
>> + *coverage_thread_start_get() = n_coverage_counters;
>> + for (i = 0; i < n_coverage_initializers; i++)
>> + coverage_initializers[i]();
>> + ovs_mutex_unlock(&coverage_mutex);
>> +}
>> +
>> +void
>> +coverage_unregister_thread(void)
>> +{
>> + int i;
>> + ovs_mutex_lock(&coverage_mutex);
>> + for (i = *coverage_thread_start_get() + n_coverage_initializers;
>> + i < n_coverage_counters; i++) {
>> + coverage_counters[i - n_coverage_initializers] =
>> coverage_counters[i];
>> + coverage_val[i - n_coverage_initializers] = coverage_val[i];
>> + }
>> + n_coverage_counters -= n_coverage_initializers;
>> + ovs_mutex_unlock(&coverage_mutex);
>> }
>>
>> static void
>> @@ -102,13 +147,12 @@ coverage_hash(void)
>> uint32_t hash = 0;
>> int n_groups, i;
>>
>> + ovs_mutex_lock(&coverage_mutex);
>> /* Sort coverage counters into groups with equal totals. */
>> c = xmalloc(n_coverage_counters * sizeof *c);
>> - ovs_mutex_lock(&coverage_mutex);
>> for (i = 0; i < n_coverage_counters; i++) {
>> c[i] = coverage_counters[i];
>> }
>> - ovs_mutex_unlock(&coverage_mutex);
>> qsort(c, n_coverage_counters, sizeof *c, compare_coverage_counters);
>>
>> /* Hash the names in each group along with the rank. */
>> @@ -130,6 +174,7 @@ coverage_hash(void)
>> i = j;
>> }
>>
>> + ovs_mutex_unlock(&coverage_mutex);
>> free(c);
>>
>> return hash_int(n_groups, hash);
>> @@ -200,9 +245,11 @@ coverage_read(struct svec *lines)
>> {
>> struct coverage_counter **c = coverage_counters;
>> unsigned long long int *totals;
>> + unsigned long long int *total_sec, *total_min, *total_hr;
>> size_t n_never_hit;
>> uint32_t hash;
>> size_t i;
>> + int initial_n = n_coverage_initializers;
>>
>> hash = coverage_hash();
>>
>> @@ -213,14 +260,20 @@ coverage_read(struct svec *lines)
>> "hash=%08"PRIx32":",
>> COVERAGE_RUN_INTERVAL/1000, hash));
>>
>> - totals = xmalloc(n_coverage_counters * sizeof *totals);
>> ovs_mutex_lock(&coverage_mutex);
>> + totals = xcalloc(n_coverage_counters, sizeof *totals);
>> + total_sec = xcalloc(n_coverage_counters, sizeof *total_sec);
>> + total_min = xcalloc(n_coverage_counters, sizeof *total_min);
>> + total_hr = xcalloc(n_coverage_counters, sizeof *total_hr);
>> +
>> for (i = 0; i < n_coverage_counters; i++) {
>> - totals[i] = c[i]->total;
>> + totals[i % initial_n] += c[i]->total;
>> + total_sec[i % initial_n] += c[i]->min[(idx_count - 1) %
>> MIN_AVG_LEN];
>> + total_min[i % initial_n] += coverage_array_sum(c[i]->min,
>> MIN_AVG_LEN);
>> + total_hr[i % initial_n] += coverage_array_sum(c[i]->hr,
>> HR_AVG_LEN);
>> }
>> - ovs_mutex_unlock(&coverage_mutex);
>>
>> - for (i = 0; i < n_coverage_counters; i++) {
>> + for (i = 0; i < initial_n; i++) {
>> if (totals[i]) {
>> /* Shows the averaged per-second rates for the last
>> * COVERAGE_RUN_INTERVAL interval, the last minute and
>> @@ -229,18 +282,22 @@ coverage_read(struct svec *lines)
>> xasprintf("%-24s %5.1f/sec %9.3f/sec "
>> "%13.4f/sec total: %llu",
>> c[i]->name,
>> - (c[i]->min[(idx_count - 1) % MIN_AVG_LEN]
>> + (total_sec[i]
>> * 1000.0 / COVERAGE_RUN_INTERVAL),
>> - coverage_array_sum(c[i]->min, MIN_AVG_LEN) /
>> 60.0,
>> - coverage_array_sum(c[i]->hr, HR_AVG_LEN) /
>> 3600.0,
>> + total_min[i] / 60.0,
>> + total_hr[i] / 3600.0,
>> totals[i]));
>> } else {
>> n_never_hit++;
>> }
>> }
>> + ovs_mutex_unlock(&coverage_mutex);
>>
>> svec_add_nocopy(lines, xasprintf("%"PRIuSIZE" events never hit",
>> n_never_hit));
>> free(totals);
>> + free(total_sec);
>> + free(total_min);
>> + free(total_hr);
>> }
>>
>> /* Runs approximately every COVERAGE_CLEAR_INTERVAL amount of time to
>> @@ -265,7 +322,7 @@ coverage_clear(void)
>> ovs_mutex_lock(&coverage_mutex);
>> for (i = 0; i < n_coverage_counters; i++) {
>> struct coverage_counter *c = coverage_counters[i];
>> - c->total += c->count();
>> + c->total += c->count(i);
>> }
>> ovs_mutex_unlock(&coverage_mutex);
>> *thread_time = now + COVERAGE_CLEAR_INTERVAL;
>> @@ -340,10 +397,8 @@ coverage_array_sum(const unsigned int *arr, const
>> unsigned int len)
>> unsigned int sum = 0;
>> size_t i;
>>
>> - ovs_mutex_lock(&coverage_mutex);
>> for (i = 0; i < len; i++) {
>> sum += arr[i];
>> }
>> - ovs_mutex_unlock(&coverage_mutex);
>> return sum;
>> }
>> diff --git a/lib/coverage.h b/lib/coverage.h
>> index 832c433..1f5ae17 100644
>> --- a/lib/coverage.h
>> +++ b/lib/coverage.h
>> @@ -45,9 +45,9 @@ BUILD_ASSERT_DECL(COVERAGE_RUN_INTERVAL %
>> COVERAGE_CLEAR_INTERVAL == 0);
>>
>> /* A coverage counter. */
>> struct coverage_counter {
>> - const char *const name; /* Textual name. */
>> - unsigned int (*const count)(void); /* Gets, zeros this thread's
>> count. */
>> - unsigned long long int total; /* Total count. */
>> + const char *const name; /* Textual name. */
>> + unsigned int (*const count)(unsigned int); /* Gets, zeros this
>> thread's count. */
>> + unsigned long long int total; /* Total count. */
>> unsigned long long int last_total;
>> /* The moving average arrays. */
>> unsigned int min[MIN_AVG_LEN];
>> @@ -55,15 +55,20 @@ struct coverage_counter {
>> };
>>
>> void coverage_counter_register(struct coverage_counter*);
>> +void coverage_initializer_register(void (*f)(void));
>> +void coverage_register_new_thread(void);
>> +void coverage_unregister_thread(void);
>>
>> +extern volatile unsigned int *coverage_val[2048];
>> +extern volatile size_t n_coverage_counters;
>> /* Defines COUNTER. There must be exactly one such definition at file
>> scope
>> * within a program. */
>> #define COVERAGE_DEFINE(COUNTER) \
>> DEFINE_STATIC_PER_THREAD_DATA(unsigned int, \
>> counter_##COUNTER, 0); \
>> - static unsigned int COUNTER##_count(void) \
>> + static unsigned int COUNTER##_count(unsigned int i) \
>> { \
>> - unsigned int *countp = counter_##COUNTER##_get(); \
>> + volatile unsigned int *countp = coverage_val[i]; \
>> unsigned int count = *countp; \
>> *countp = 0; \
>> return count; \
>> @@ -72,11 +77,17 @@ void coverage_counter_register(struct
>> coverage_counter*);
>> { \
>> *counter_##COUNTER##_get() += n; \
>> } \
>> - extern struct coverage_counter counter_##COUNTER; \
>> - struct coverage_counter counter_##COUNTER \
>> + extern thread_local struct coverage_counter counter_##COUNTER; \
>> + thread_local struct coverage_counter counter_##COUNTER \
>> = { #COUNTER, COUNTER##_count, 0, 0, {0}, {0} }; \
>> - OVS_CONSTRUCTOR(COUNTER##_init) { \
>> + static void COUNTER##_initializer(void) { \
>> coverage_counter_register(&counter_##COUNTER); \
>> + coverage_val[n_coverage_counters - 1] = \
>> + counter_##COUNTER##_get(); \
>> + } \
>> + OVS_CONSTRUCTOR(COUNTER##_init) { \
>> + coverage_initializer_register(&COUNTER##_initializer); \
>> + COUNTER##_initializer(); \
>> }
>
> /* Adds 1 to COUNTER. */
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index c144352..a92dd6a 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -34,6 +34,7 @@
>> #include "bitmap.h"
>> #include "cmap.h"
>> #include "csum.h"
>> +#include "coverage.h"
>> #include "dp-packet.h"
>> #include "dpif.h"
>> #include "dpif-provider.h"
>> @@ -2666,7 +2667,7 @@ pmd_thread_main(void *f_)
>>
>> poll_cnt = 0;
>> poll_list = NULL;
>> -
>> + coverage_register_new_thread();
>> /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
>> ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
>> pmd_thread_setaffinity_cpu(pmd->core_id);
>> @@ -2717,6 +2718,7 @@ reload:
>> }
>>
>> dp_netdev_pmd_reload_done(pmd);
>> + coverage_unregister_thread();
>>
>> free(poll_list);
>> return NULL;
>> --
>> 2.1.4
>>
>>
>
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev