From: Konstantin Khlebnikov <khlebni...@yandex-team.ru>

Parameters:
period   - interval between refills (100ms should be fine)
quota    - events refill per period
deadline - interval to utilize unused past quota (1s by default)
latency  - maximum injected delay (10s by default)

Quota sums into 'budget' and spreads across cpus.

Signed-off-by: Konstantin Khlebnikov <khlebni...@yandex-team.ru>
---
 include/linux/percpu_ratelimit.h |   45 ++++++++++
 lib/Makefile                     |    1 
 lib/percpu_ratelimit.c           |  168 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 214 insertions(+)
 create mode 100644 include/linux/percpu_ratelimit.h
 create mode 100644 lib/percpu_ratelimit.c

diff --git a/include/linux/percpu_ratelimit.h b/include/linux/percpu_ratelimit.h
new file mode 100644
index 0000000..42c45d4
--- /dev/null
+++ b/include/linux/percpu_ratelimit.h
@@ -0,0 +1,45 @@
+#ifndef _LINUX_PERCPU_RATELIMIT_H
+#define _LINUX_PERCPU_RATELIMIT_H
+
+#include <linux/hrtimer.h>
+
+struct percpu_ratelimit {
+       struct hrtimer  timer;
+       ktime_t         target;         /* time of next refill */
+       ktime_t         deadline;       /* interval to utilize past budget */
+       ktime_t         latency;        /* maximum injected delay */
+       ktime_t         period;         /* interval between refills */
+       u64             quota;          /* events refill per period */
+       u64             budget;         /* amount of available events */
+       u64             total;          /* consumed and pre-charged events */
+       raw_spinlock_t  lock;           /* protect the state */
+       u32             cpu_batch;      /* events in per-cpu precharge */
+       u32 __percpu    *cpu_budget;    /* per-cpu precharge */
+};
+
+static inline bool percpu_ratelimit_blocked(struct percpu_ratelimit *rl)
+{
+       return hrtimer_active(&rl->timer);
+}
+
+static inline ktime_t percpu_ratelimit_target(struct percpu_ratelimit *rl)
+{
+       return rl->target;
+}
+
+static inline int percpu_ratelimit_wait(struct percpu_ratelimit *rl)
+{
+       ktime_t target = rl->target;
+
+       return schedule_hrtimeout_range(&target, ktime_to_ns(rl->period),
+                                       HRTIMER_MODE_ABS);
+}
+
+int percpu_ratelimit_init(struct percpu_ratelimit *rl, gfp_t gfp);
+void percpu_ratelimit_destroy(struct percpu_ratelimit *rl);
+void percpu_ratelimit_setup(struct percpu_ratelimit *rl, u64 quota, u64 
period);
+u64 percpu_ratelimit_quota(struct percpu_ratelimit *rl, u64 period);
+bool percpu_ratelimit_charge(struct percpu_ratelimit *rl, u64 events);
+u64 percpu_ratelimit_sum(struct percpu_ratelimit *rl);
+
+#endif /* _LINUX_PERCPU_RATELIMIT_H */
diff --git a/lib/Makefile b/lib/Makefile
index 3c3b30b..b20ab47 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,6 +21,7 @@ lib-$(CONFIG_SMP) += cpumask.o
 
 lib-y  += kobject.o klist.o
 obj-y  += lockref.o
+obj-y   += percpu_ratelimit.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
         bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
diff --git a/lib/percpu_ratelimit.c b/lib/percpu_ratelimit.c
new file mode 100644
index 0000000..8254683
--- /dev/null
+++ b/lib/percpu_ratelimit.c
@@ -0,0 +1,168 @@
+#include <linux/percpu_ratelimit.h>
+
+static void __percpu_ratelimit_setup(struct percpu_ratelimit *rl,
+                                    u64 period, u64 quota)
+{
+       rl->period = ns_to_ktime(period);
+       rl->quota = quota;
+       rl->total += quota - rl->budget;
+       rl->budget = quota;
+       if (do_div(quota, num_possible_cpus() * 2))
+               quota++;
+       rl->cpu_batch = min_t(u64, UINT_MAX, quota);
+       rl->target = ktime_get();
+}
+
+static enum hrtimer_restart ratelimit_unblock(struct hrtimer *t)
+{
+       struct percpu_ratelimit *rl = container_of(t, struct percpu_ratelimit, 
timer);
+       enum hrtimer_restart ret = HRTIMER_NORESTART;
+       ktime_t now = t->base->get_time();
+
+       raw_spin_lock(&rl->lock);
+       if (ktime_after(rl->target, now)) {
+               hrtimer_set_expires_range(t, rl->target, rl->period);
+               ret = HRTIMER_RESTART;
+       }
+       raw_spin_unlock(&rl->lock);
+
+       return ret;
+}
+
+int percpu_ratelimit_init(struct percpu_ratelimit *rl, gfp_t gfp)
+{
+       memset(rl, 0, sizeof(*rl));
+       rl->cpu_budget = alloc_percpu_gfp(typeof(*rl->cpu_budget), gfp);
+       if (!rl->cpu_budget)
+               return -ENOMEM;
+       raw_spin_lock_init(&rl->lock);
+       hrtimer_init(&rl->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+       rl->timer.function = ratelimit_unblock;
+       rl->deadline = ns_to_ktime(NSEC_PER_SEC);
+       rl->latency  = ns_to_ktime(NSEC_PER_SEC * 10);
+       __percpu_ratelimit_setup(rl, NSEC_PER_SEC, ULLONG_MAX);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(percpu_ratelimit_init);
+
+void percpu_ratelimit_destroy(struct percpu_ratelimit *rl)
+{
+       free_percpu(rl->cpu_budget);
+       hrtimer_cancel(&rl->timer);
+}
+EXPORT_SYMBOL_GPL(percpu_ratelimit_destroy);
+
+static void percpu_ratelimit_drain(void *info)
+{
+       struct percpu_ratelimit *rl = info;
+
+       __this_cpu_write(*rl->cpu_budget, 0);
+}
+
+void percpu_ratelimit_setup(struct percpu_ratelimit *rl, u64 quota, u64 period)
+{
+       unsigned long flags;
+
+       if (!quota || !period) {
+               quota = ULLONG_MAX;
+               period = NSEC_PER_SEC;
+       } else if (period > NSEC_PER_SEC / 10) {
+               u64 quant = div_u64(quota * NSEC_PER_SEC / 10, period);
+
+               if (quant > 20) {
+                       quota = quant;
+                       period = NSEC_PER_SEC / 10;
+               }
+       }
+
+       raw_spin_lock_irqsave(&rl->lock, flags);
+       __percpu_ratelimit_setup(rl, period, quota);
+       raw_spin_unlock_irqrestore(&rl->lock, flags);
+       on_each_cpu(percpu_ratelimit_drain, rl, 1);
+       hrtimer_cancel(&rl->timer);
+}
+EXPORT_SYMBOL_GPL(percpu_ratelimit_setup);
+
+u64 percpu_ratelimit_quota(struct percpu_ratelimit *rl, u64 period)
+{
+       unsigned long flags;
+       u64 quota;
+
+       raw_spin_lock_irqsave(&rl->lock, flags);
+       if (rl->quota == ULLONG_MAX)
+               quota = 0;
+       else
+               quota = div64_u64(rl->quota * period, ktime_to_ns(rl->period));
+       raw_spin_unlock_irqrestore(&rl->lock, flags);
+
+       return quota;
+}
+EXPORT_SYMBOL_GPL(percpu_ratelimit_quota);
+
+/*
+ * Charges events, returns true if ratelimit is blocked and caller should 
sleep.
+ */
+bool percpu_ratelimit_charge(struct percpu_ratelimit *rl, u64 events)
+{
+       unsigned long flags;
+       u64 budget, delta;
+       ktime_t now, deadline;
+
+       preempt_disable();
+       budget = __this_cpu_read(*rl->cpu_budget);
+       if (likely(budget >= events)) {
+               __this_cpu_sub(*rl->cpu_budget, events);
+       } else {
+               now = ktime_get();
+               raw_spin_lock_irqsave(&rl->lock, flags);
+               deadline = ktime_sub(now, rl->deadline);
+               if (ktime_after(deadline, rl->target))
+                       rl->target = deadline;
+               budget += rl->budget;
+               if (budget >= events + rl->cpu_batch) {
+                       budget -= events;
+               } else {
+                       delta = events + rl->cpu_batch - budget;
+                       if (do_div(delta, rl->quota))
+                               delta++;
+                       rl->target = ktime_add_ns(rl->target,
+                                       ktime_to_ns(rl->period) * delta);
+                       deadline = ktime_add(now, rl->latency);
+                       if (ktime_after(rl->target, deadline))
+                               rl->target = deadline;
+                       delta *= rl->quota;
+                       rl->total += delta;
+                       budget += delta - events;
+               }
+               rl->budget = budget - rl->cpu_batch;
+               __this_cpu_write(*rl->cpu_budget, rl->cpu_batch);
+               if (!hrtimer_active(&rl->timer) && ktime_after(rl->target, now))
+                       hrtimer_start_range_ns(&rl->timer, rl->target,
+                                       ktime_to_ns(rl->period),
+                                       HRTIMER_MODE_ABS);
+               raw_spin_unlock_irqrestore(&rl->lock, flags);
+       }
+       preempt_enable();
+
+       return percpu_ratelimit_blocked(rl);
+}
+EXPORT_SYMBOL_GPL(percpu_ratelimit_charge);
+
+/*
+ * Returns count of consumed events.
+ */
+u64 percpu_ratelimit_sum(struct percpu_ratelimit *rl)
+{
+       unsigned long flags;
+       int cpu;
+       s64 ret;
+
+       raw_spin_lock_irqsave(&rl->lock, flags);
+       ret = rl->total - rl->budget;
+       for_each_online_cpu(cpu)
+               ret -= per_cpu(*rl->cpu_budget, cpu);
+       raw_spin_unlock_irqrestore(&rl->lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(percpu_ratelimit_sum);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to