So the atomic operations for accounting block I/O can be killed
completely, and it is OK to add the percpu variables in part_in_flight()
because the function is run at most one time in every tick.

Signed-off-by: Ming Lei <tom.leim...@gmail.com>
---
 block/blk-core.c          |  1 +
 block/partition-generic.c |  5 +++--
 drivers/md/dm.c           | 10 ++++++----
 include/linux/genhd.h     | 24 ++++++++++++++++++------
 4 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index f180a6d..0001d4c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1344,6 +1344,7 @@ static void part_round_stats_single(int cpu, struct 
hd_struct *part,
        if (now == part->stamp)
                return;
 
+       /* at most one percpu addition per one tick */
        inflight = part_in_flight(part);
        if (inflight) {
                __part_stat_add(cpu, part, time_in_queue,
diff --git a/block/partition-generic.c b/block/partition-generic.c
index e771113..0a553e7 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -140,8 +140,9 @@ ssize_t part_inflight_show(struct device *dev,
 {
        struct hd_struct *p = dev_to_part(dev);
 
-       return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
-               atomic_read(&p->in_flight[1]));
+       return sprintf(buf, "%8u %8u\n",
+                       part_stat_read(p, in_flight[0]),
+                       part_stat_read(p, in_flight[1]));
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index de70377..1b6d8be 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -651,9 +651,9 @@ static void start_io_acct(struct dm_io *io)
 
        cpu = part_stat_lock();
        part_round_stats(cpu, &dm_disk(md)->part0);
+       part_stat_set(cpu, &dm_disk(md)->part0, in_flight[rw],
+                       atomic_inc_return(&md->pending[rw]));
        part_stat_unlock();
-       atomic_set(&dm_disk(md)->part0.in_flight[rw],
-               atomic_inc_return(&md->pending[rw]));
 
        if (unlikely(dm_stats_used(&md->stats)))
                dm_stats_account_io(&md->stats, bio->bi_rw, 
bio->bi_iter.bi_sector,
@@ -665,7 +665,7 @@ static void end_io_acct(struct dm_io *io)
        struct mapped_device *md = io->md;
        struct bio *bio = io->bio;
        unsigned long duration = jiffies - io->start_time;
-       int pending;
+       int pending, cpu;
        int rw = bio_data_dir(bio);
 
        generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time);
@@ -679,7 +679,9 @@ static void end_io_acct(struct dm_io *io)
         * a flush.
         */
        pending = atomic_dec_return(&md->pending[rw]);
-       atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
+       cpu = part_stat_lock();
+       part_stat_set(cpu, &dm_disk(md)->part0, in_flight[rw], pending);
+       part_stat_unlock();
        pending += atomic_read(&md->pending[rw^0x1]);
 
        /* nudge anyone waiting on suspend queue */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 612ae80..abe5567 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -86,6 +86,7 @@ struct disk_stats {
        unsigned long ticks[2];
        unsigned long io_ticks;
        unsigned long time_in_queue;
+       unsigned int  in_flight[2];
 };
 
 #define PARTITION_META_INFO_VOLNAMELTH 64
@@ -119,7 +120,6 @@ struct hd_struct {
        int make_it_fail;
 #endif
        unsigned long stamp;
-       atomic_t in_flight[2];
 #ifdef CONFIG_SMP
        struct disk_stats __percpu *dkstats;
 #else
@@ -320,6 +320,9 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk 
*disk,
        res;                                                            \
 })
 
+#define part_stat_set(cpu, part, field, seted)                 \
+       (per_cpu_ptr((part)->dkstats, (cpu))->field = (seted))
+
 static inline void part_stat_set_all(struct hd_struct *part, int value)
 {
        int i;
@@ -351,6 +354,9 @@ static inline void free_part_stats(struct hd_struct *part)
 
 #define part_stat_read(part, field)    ((part)->dkstats.field)
 
+#define part_stat_set(cpu, part, field, seted)                 \
+       ((part)->dkstats.field = (seted))
+
 static inline void part_stat_set_all(struct hd_struct *part, int value)
 {
        memset(&part->dkstats, value, sizeof(struct disk_stats));
@@ -383,21 +389,27 @@ static inline void free_part_stats(struct hd_struct *part)
 
 static inline void part_inc_in_flight(int cpu, struct hd_struct *part, int rw)
 {
-       atomic_inc(&part->in_flight[rw]);
+       part_stat_inc(cpu, part, in_flight[rw]);
        if (part->partno)
-               atomic_inc(&part_to_disk(part)->part0.in_flight[rw]);
+               part_stat_inc(cpu, &part_to_disk(part)->part0, in_flight[rw]);
 }
 
 static inline void part_dec_in_flight(int cpu, struct hd_struct *part, int rw)
 {
-       atomic_dec(&part->in_flight[rw]);
+       part_stat_dec(cpu, part, in_flight[rw]);
        if (part->partno)
-               atomic_dec(&part_to_disk(part)->part0.in_flight[rw]);
+               part_stat_dec(cpu, &part_to_disk(part)->part0, in_flight[rw]);
 }
 
 static inline int part_in_flight(struct hd_struct *part)
 {
-       return atomic_read(&part->in_flight[0]) + 
atomic_read(&part->in_flight[1]);
+       int res = 0;
+       unsigned int cpu;
+       for_each_possible_cpu(cpu) {
+               res += per_cpu_ptr((part)->dkstats, cpu)->in_flight[0];
+               res += per_cpu_ptr((part)->dkstats, cpu)->in_flight[1];
+       }
+       return res;
 }
 
 static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk)
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to