On 10/17/2018 05:50 PM, Peter Zijlstra wrote:
> On Wed, Oct 17, 2018 at 04:41:55PM +0200, Daniel Borkmann wrote:
>> @@ -73,7 +73,8 @@ static inline u64 perf_mmap__read_head(struct perf_mmap 
>> *mm)
>>  {
>>      struct perf_event_mmap_page *pc = mm->base;
>>      u64 head = READ_ONCE(pc->data_head);
>> -    rmb();
>> +
>> +    smp_rmb();
>>      return head;
>>  }
>>  
>> @@ -84,7 +85,7 @@ static inline void perf_mmap__write_tail(struct perf_mmap 
>> *md, u64 tail)
>>      /*
>>       * ensure all reads are done before we write the tail out.
>>       */
>> -    mb();
>> +    smp_mb();
>>      pc->data_tail = tail;
> 
> Ideally that would be a WRITE_ONCE() to avoid store tearing.

Right, agree.

> Alternatively, I think we can use smp_store_release() here, all we care
> about is that the prior loads stay prior.
> 
> Similarly, I suppose, we could use smp_load_acquire() for the data_head
> load above.

Wouldn't this then also allow the kernel side to use smp_store_release()
when it updates the head? We'd be pretty much at the model as described
in Documentation/core-api/circular-buffers.rst.

Meaning, rough pseudo-code diff would look as:

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 5d3cf40..3d96275 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -84,8 +84,9 @@ static void perf_output_put_handle(struct perf_output_handle 
*handle)
         *
         * See perf_output_begin().
         */
-       smp_wmb(); /* B, matches C */
-       rb->user_page->data_head = head;
+
+       /* B, matches C */
+       smp_store_release(&rb->user_page->data_head, head);

        /*
         * Now check if we missed an update -- rely on previous implied

Plus, user space side of perf (assuming we have the barriers imported):

diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 05a6d47..66e1304 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -72,20 +72,15 @@ void perf_mmap__consume(struct perf_mmap *map);
 static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
 {
        struct perf_event_mmap_page *pc = mm->base;
-       u64 head = READ_ONCE(pc->data_head);
-       rmb();
-       return head;
+
+       return smp_load_acquire(&pc->data_head);
 }

 static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
 {
        struct perf_event_mmap_page *pc = md->base;

-       /*
-        * ensure all reads are done before we write the tail out.
-        */
-       mb();
-       pc->data_tail = tail;
+       smp_store_release(&pc->data_tail, tail);
 }

 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);

Reply via email to