The size of struct cpumask varies with CONFIG_NR_CPUS, some config
CONFIG_NR_CPUS is very larger, like 5120, struct cpumask will take
640 bytes, if there is thousands of flows, it will take lots of
memory

cpu_used_mask has two purposes
1: Assume first cpu as cpu0 which maybe not true; now use
   cpumask_first(cpu_possible_mask)
2: when get/clear statistic, reduce the iteratation; but it
   is not hot path, so use for_each_possible_cpu

Signed-off-by: Zhang Yu <zhangy...@baidu.com>
Signed-off-by: Li RongQing <lirongq...@baidu.com>
---
 net/openvswitch/flow.c       | 11 +++++------
 net/openvswitch/flow.h       |  5 ++---
 net/openvswitch/flow_table.c | 11 +++++------
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 56b8e7167790..ad580bec00fb 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -85,7 +85,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 
tcp_flags,
                if (cpu == 0 && unlikely(flow->stats_last_writer != cpu))
                        flow->stats_last_writer = cpu;
        } else {
-               stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
+               int cpu1 = cpumask_first(cpu_possible_mask);
+
+               stats = rcu_dereference(flow->stats[cpu1]); /* Pre-allocated. */
                spin_lock(&stats->lock);
 
                /* If the current CPU is the only writer on the
@@ -118,7 +120,6 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 
tcp_flags,
 
                                        rcu_assign_pointer(flow->stats[cpu],
                                                           new_stats);
-                                       cpumask_set_cpu(cpu, 
&flow->cpu_used_mask);
                                        goto unlock;
                                }
                        }
@@ -145,8 +146,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
        *tcp_flags = 0;
        memset(ovs_stats, 0, sizeof(*ovs_stats));
 
-       /* We open code this to make sure cpu 0 is always considered */
-       for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, 
&flow->cpu_used_mask)) {
+       for_each_possible_cpu(cpu) {
                struct flow_stats *stats = 
rcu_dereference_ovsl(flow->stats[cpu]);
 
                if (stats) {
@@ -169,8 +169,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
 {
        int cpu;
 
-       /* We open code this to make sure cpu 0 is always considered */
-       for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, 
&flow->cpu_used_mask)) {
+       for_each_possible_cpu(cpu) {
                struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
 
                if (stats) {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index c670dd24b8b7..d0ea5d6ced3e 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -223,17 +223,16 @@ struct sw_flow {
                u32 hash;
        } flow_table, ufid_table;
        int stats_last_writer;          /* CPU id of the last writer on
-                                        * 'stats[0]'.
+                                        * 'stats[first cpu id]'.
                                         */
        struct sw_flow_key key;
        struct sw_flow_id id;
-       struct cpumask cpu_used_mask;
        struct sw_flow_mask *mask;
        struct sw_flow_actions __rcu *sf_acts;
        struct flow_stats __rcu *stats[]; /* One for each CPU.  First one
                                           * is allocated at flow creation time,
                                           * the rest are allocated on demand
-                                          * while holding the 'stats[0].lock'.
+                                          * while holding the 'stats[first cpu 
id].lock'
                                           */
 };
 
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 80ea2a71852e..e4dbd65c308a 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -80,6 +80,7 @@ struct sw_flow *ovs_flow_alloc(void)
 {
        struct sw_flow *flow;
        struct flow_stats *stats;
+       int cpu = cpumask_first(cpu_possible_mask);
 
        flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
        if (!flow)
@@ -90,15 +91,13 @@ struct sw_flow *ovs_flow_alloc(void)
        /* Initialize the default stat node. */
        stats = kmem_cache_alloc_node(flow_stats_cache,
                                      GFP_KERNEL | __GFP_ZERO,
-                                     node_online(0) ? 0 : NUMA_NO_NODE);
+                                     cpu_to_node(cpu));
        if (!stats)
                goto err;
 
        spin_lock_init(&stats->lock);
 
-       RCU_INIT_POINTER(flow->stats[0], stats);
-
-       cpumask_set_cpu(0, &flow->cpu_used_mask);
+       RCU_INIT_POINTER(flow->stats[cpu], stats);
 
        return flow;
 err:
@@ -142,11 +141,11 @@ static void flow_free(struct sw_flow *flow)
                kfree(flow->id.unmasked_key);
        if (flow->sf_acts)
                ovs_nla_free_flow_actions((struct sw_flow_actions __force 
*)flow->sf_acts);
-       /* We open code this to make sure cpu 0 is always considered */
-       for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, 
&flow->cpu_used_mask))
+       for_each_possible_cpu(cpu) {
                if (flow->stats[cpu])
                        kmem_cache_free(flow_stats_cache,
                                        (struct flow_stats __force 
*)flow->stats[cpu]);
+       }
        kmem_cache_free(flow_cache, flow);
 }
 
-- 
2.16.2

Reply via email to