On Tue, Jul 09, 2013 at 03:05:41PM +0800, Yan, Zheng wrote:

> Thank you for your help. I ran the same test, the results for regular case
> are much better. But it still has about 1% overhead, probably because we
> enlarge the ring_buffer structure, make it less cache friendly.
> 
>       origin    with the patch
> AVG    1000      1013
> STDEV  13.4      15.0

And this is the !overwrite case, right? I don't suppose you cured the logic
Namhyung Kim pointed out? That should affect the overwrite case I suppose since
it won't switch to perf_event_output_overwrite().

tip/master:

struct ring_buffer {
        atomic_t                   refcount;             /*     0     4 */

        /* XXX 4 bytes hole, try to pack */

        struct callback_head       callback_head;        /*     8    16 */
        int                        nr_pages;             /*    24     4 */
        int                        overwrite;            /*    28     4 */
        atomic_t                   poll;                 /*    32     4 */

        /* XXX 4 bytes hole, try to pack */

        local_t                    head;                 /*    40     8 */
        local_t                    nest;                 /*    48     8 */
        local_t                    events;               /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        local_t                    wakeup;               /*    64     8 */
        local_t                    lost;                 /*    72     8 */
        long int                   watermark;            /*    80     8 */
        spinlock_t                 event_lock;           /*    88    56 */
        /* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
        struct list_head           event_list;           /*   144    16 */
        atomic_t                   mmap_count;           /*   160     4 */

        /* XXX 4 bytes hole, try to pack */

        long unsigned int          mmap_locked;          /*   168     8 */
        struct user_struct *       mmap_user;            /*   176     8 */
        struct perf_event_mmap_page * user_page;         /*   184     8 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        void *                     data_pages[0];        /*   192     0 */

        /* size: 192, cachelines: 3, members: 18 */
        /* sum members: 180, holes: 3, sum holes: 12 */
};

tip/master + patch:

struct ring_buffer {
        atomic_t                   refcount;             /*     0     4 */

        /* XXX 4 bytes hole, try to pack */

        struct callback_head       callback_head;        /*     8    16 */
        int                        nr_pages;             /*    24     4 */
        int                        overwrite;            /*    28     4 */
        atomic_t                   poll;                 /*    32     4 */

        /* XXX 4 bytes hole, try to pack */

        local_t                    tail;                 /*    40     8 */
        local_t                    next_tail;            /*    48     8 */
        local_t                    head;                 /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        local_t                    nest;                 /*    64     8 */
        local_t                    events;               /*    72     8 */
        local_t                    wakeup;               /*    80     8 */
        local_t                    lost;                 /*    88     8 */
        long int                   watermark;            /*    96     8 */
        spinlock_t                 event_lock;           /*   104    56 */
        /* --- cacheline 2 boundary (128 bytes) was 32 bytes ago --- */
        struct list_head           event_list;           /*   160    16 */
        atomic_t                   mmap_count;           /*   176     4 */

        /* XXX 4 bytes hole, try to pack */

        long unsigned int          mmap_locked;          /*   184     8 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        struct user_struct *       mmap_user;            /*   192     8 */
        struct perf_event_mmap_page * user_page;         /*   200     8 */
        void *                     data_pages[0];        /*   208     0 */

        /* size: 208, cachelines: 4, members: 20 */
        /* sum members: 196, holes: 3, sum holes: 12 */
        /* last cacheline: 16 bytes */
};

tip/master + patch^2:

struct ring_buffer {
        atomic_t                   refcount;             /*     0     4 */
        atomic_t                   mmap_count;           /*     4     4 */
        union {
                int                overwrite;            /*           4 */
                struct callback_head callback_head;      /*          16 */
        };                                               /*     8    16 */
        int                        nr_pages;             /*    24     4 */
        atomic_t                   poll;                 /*    28     4 */
        local_t                    tail;                 /*    32     8 */
        local_t                    next_tail;            /*    40     8 */
        local_t                    head;                 /*    48     8 */
        local_t                    nest;                 /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        local_t                    events;               /*    64     8 */
        local_t                    wakeup;               /*    72     8 */
        local_t                    lost;                 /*    80     8 */
        long int                   watermark;            /*    88     8 */
        spinlock_t                 event_lock;           /*    96    56 */
        /* --- cacheline 2 boundary (128 bytes) was 24 bytes ago --- */
        struct list_head           event_list;           /*   152    16 */
        long unsigned int          mmap_locked;          /*   168     8 */
        struct user_struct *       mmap_user;            /*   176     8 */
        struct perf_event_mmap_page * user_page;         /*   184     8 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        void *                     data_pages[0];        /*   192     0 */

        /* size: 192, cachelines: 3, members: 19 */
};


---
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4641,7 +4641,7 @@ static void perf_event_output_overwrite(
 static void
 perf_event_set_overflow(struct perf_event *event, struct ring_buffer *rb)
 {
-       if (event->overflow_handler != perf_event_output ||
+       if (event->overflow_handler != perf_event_output &&
            event->overflow_handler != perf_event_output_overwrite)
                return;
 
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -10,13 +10,16 @@
 
 struct ring_buffer {
        atomic_t                        refcount;
-       struct rcu_head                 rcu_head;
+       atomic_t                        mmap_count;
+       union {
+               int                     overwrite;      /* can overwrite itself 
*/
+               struct rcu_head         rcu_head;
+       };
 #ifdef CONFIG_PERF_USE_VMALLOC
        struct work_struct              work;
        int                             page_order;     /* allocation order  */
 #endif
        int                             nr_pages;       /* nr of data pages  */
-       int                             overwrite;      /* can overwrite itself 
*/
 
        atomic_t                        poll;           /* POLL_ for wakeups */
 
@@ -33,7 +36,6 @@ struct ring_buffer {
        spinlock_t                      event_lock;
        struct list_head                event_list;
 
-       atomic_t                        mmap_count;
        unsigned long                   mmap_locked;
        struct user_struct              *mmap_user;
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to