Re: init_timer_deferrable conversion

Eric Dumazet Mon, 17 Dec 2007 06:30:45 -0800

On Mon, 17 Dec 2007 09:55:04 +0100
Eric Dumazet <[EMAIL PROTECTED]> wrote:


> On Sun, 16 Dec 2007 22:00:23 -0500 (EST)
> Parag Warudkar <[EMAIL PROTECTED]> wrote:
> 
> > In my quest to get the wake-ups from idle per second down to bare minimum, 
> > I noticed 3 places in the kernel that could benefit from 
> > using init_timer_deferrable() instead of init_timer() -
> > 
> > a) drivers/net/sky2.c - watchdog_timer. This was showing up high on 
> > Powertop's list of things that cause routine wakeups from idle. After 
> > converting to init_timer_deferrable() the wakeups went down and this one 
> > no longer shows up in powertop's list. 25% reduction.
> > 
> > b) kernel/time/clocksource.c - watchdog_timer - same story as sky2.c
> > 
> > c) net/core/neighbour.c - gc_timer - Most benefit from deferrable timer.
> 
> neigh_periodic_timer() is actually doing almost nothing per round, since it
> looks only one slot of hash table. We could probably convert it to a
> workqueue and scan whole table at once.
> 

Parag, could you please try this patch ?

[NET] ARP : Convert neigh garbage collection from softirq to workqueue

Current neigh_periodic_timer() function is fired by timer IRQ, and
scans one hash bucket out of a potentially big number.

As we are supposed to scan whole hash table in 15 seconds, this means
neigh_periodic_timer() can be fired very often. (depending on the number
of concurrent hash entries we stored in this table)

Converting this to a workqueue permits scaning whole table, minimizing icache
pollution, and firing this work every 15 seconds, independantly of hash table
size.

Signed-off-by: Eric Dumazet <[EMAIL PROTECTED]>

 include/net/neighbour.h |    4 -
 net/core/neighbour.c    |   89 ++++++++++++++++++--------------------
 2 files changed, 45 insertions(+), 48 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index a4f2618..fdb9251 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -24,6 +24,7 @@
 
 #include <linux/err.h>
 #include <linux/sysctl.h>
+#include <linux/workqueue.h>
 #include <net/rtnetlink.h>
 
 #define NUD_IN_TIMER   (NUD_INCOMPLETE|NUD_REACHABLE|NUD_DELAY|NUD_PROBE)
@@ -155,7 +156,7 @@ struct neigh_table
        int                     gc_thresh2;
        int                     gc_thresh3;
        unsigned long           last_flush;
-       struct timer_list       gc_timer;
+       struct delayed_work     gc_work;
        struct timer_list       proxy_timer;
        struct sk_buff_head     proxy_queue;
        atomic_t                entries;
@@ -166,7 +167,6 @@ struct neigh_table
        struct neighbour        **hash_buckets;
        unsigned int            hash_mask;
        __u32                   hash_rnd;
-       unsigned int            hash_chain_gc;
        struct pneigh_entry     **phash_buckets;
 #ifdef CONFIG_PROC_FS
        struct proc_dir_entry   *pde;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4b6dd1e..495ab19 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -637,75 +637,74 @@ static void neigh_connect(struct neighbour *neigh)
                hh->hh_output = neigh->ops->hh_output;
 }
 
-static void neigh_periodic_timer(unsigned long arg)
+static void neigh_periodic_work(struct work_struct *work)
 {
-       struct neigh_table *tbl = (struct neigh_table *)arg;
+       struct neigh_table *tbl = container_of(work, struct neigh_table, 
gc_work.work);
        struct neighbour *n, **np;
-       unsigned long expire, now = jiffies;
+       unsigned int i;
 
        NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 
-       write_lock(&tbl->lock);
+       write_lock_bh(&tbl->lock);
 
        /*
         *      periodically recompute ReachableTime from random function
         */
 
-       if (time_after(now, tbl->last_rand + 300 * HZ)) {
+       if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
                struct neigh_parms *p;
-               tbl->last_rand = now;
+               tbl->last_rand = jiffies;
                for (p = &tbl->parms; p; p = p->next)
                        p->reachable_time =
                                neigh_rand_reach_time(p->base_reachable_time);
        }
 
-       np = &tbl->hash_buckets[tbl->hash_chain_gc];
-       tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
+       for (i = 0 ; i <= tbl->hash_mask; i++) {
+               np = &tbl->hash_buckets[i];
 
-       while ((n = *np) != NULL) {
-               unsigned int state;
+               while ((n = *np) != NULL) {
+                       unsigned int state;
 
-               write_lock(&n->lock);
+                       write_lock(&n->lock);
 
-               state = n->nud_state;
-               if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
-                       write_unlock(&n->lock);
-                       goto next_elt;
-               }
+                       state = n->nud_state;
+                       if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
+                               write_unlock(&n->lock);
+                               goto next_elt;
+                       }
 
-               if (time_before(n->used, n->confirmed))
-                       n->used = n->confirmed;
+                       if (time_before(n->used, n->confirmed))
+                               n->used = n->confirmed;
 
-               if (atomic_read(&n->refcnt) == 1 &&
-                   (state == NUD_FAILED ||
-                    time_after(now, n->used + n->parms->gc_staletime))) {
-                       *np = n->next;
-                       n->dead = 1;
+                       if (atomic_read(&n->refcnt) == 1 &&
+                           (state == NUD_FAILED ||
+                            time_after(jiffies, n->used + 
n->parms->gc_staletime))) {
+                               *np = n->next;
+                               n->dead = 1;
+                               write_unlock(&n->lock);
+                               neigh_cleanup_and_release(n);
+                               continue;
+                       }
                        write_unlock(&n->lock);
-                       neigh_cleanup_and_release(n);
-                       continue;
-               }
-               write_unlock(&n->lock);
 
 next_elt:
-               np = &n->next;
+                       np = &n->next;
+               }
+               /*
+                * It's fine to release lock here, even if hash table
+                * grows while we are preempted.
+                */
+               write_unlock_bh(&tbl->lock);
+               cond_resched();
+               write_lock_bh(&tbl->lock);
        }
-
        /* Cycle through all hash buckets every base_reachable_time/2 ticks.
         * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
         * base_reachable_time.
         */
-       expire = tbl->parms.base_reachable_time >> 1;
-       expire /= (tbl->hash_mask + 1);
-       if (!expire)
-               expire = 1;
-
-       if (expire>HZ)
-               mod_timer(&tbl->gc_timer, round_jiffies(now + expire));
-       else
-               mod_timer(&tbl->gc_timer, now + expire);
-
-       write_unlock(&tbl->lock);
+       schedule_delayed_work(&tbl->gc_work,
+                             tbl->parms.base_reachable_time >> 1);
+       write_unlock_bh(&tbl->lock);
 }
 
 static __inline__ int neigh_max_probes(struct neighbour *n)
@@ -1370,10 +1369,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
        get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
 
        rwlock_init(&tbl->lock);
-       setup_timer(&tbl->gc_timer, neigh_periodic_timer, (unsigned long)tbl);
-       tbl->gc_timer.expires  = now + 1;
-       add_timer(&tbl->gc_timer);
-
+       INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
+       schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
        setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
        skb_queue_head_init_class(&tbl->proxy_queue,
                        &neigh_table_proxy_queue_class);
@@ -1408,7 +1405,8 @@ int neigh_table_clear(struct neigh_table *tbl)
        struct neigh_table **tp;
 
        /* It is not clean... Fix it to unload IPv6 module safely */
-       del_timer_sync(&tbl->gc_timer);
+       cancel_delayed_work(&tbl->gc_work);
+       flush_scheduled_work();
        del_timer_sync(&tbl->proxy_timer);
        pneigh_queue_purge(&tbl->proxy_queue);
        neigh_ifdown(tbl, NULL);
@@ -1678,7 +1676,6 @@ static int neightbl_fill_info(struct sk_buff *skb, struct 
neigh_table *tbl,
                        .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
                        .ndtc_hash_rnd          = tbl->hash_rnd,
                        .ndtc_hash_mask         = tbl->hash_mask,
-                       .ndtc_hash_chain_gc     = tbl->hash_chain_gc,
                        .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
                };
 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: init_timer_deferrable conversion

Reply via email to