The current code tracks the assigned CPUs within a NUMA node in the context of the primary channel. So, if we have a VM with a single NUMA node with 8 VCPUs, we may end up unevenly distributing the channel load. Fix the issue by tracking affiliations globally.
Signed-off-by: K. Y. Srinivasan <k...@microsoft.com> --- drivers/hv/channel_mgmt.c | 11 ++++++----- drivers/hv/hv.c | 9 +++++++++ drivers/hv/hyperv_vmbus.h | 5 +++++ include/linux/hyperv.h | 1 - 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 4506a66..f52373a 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -391,6 +391,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui struct vmbus_channel *primary = channel->primary_channel; int next_node; struct cpumask available_mask; + struct cpumask *alloced_mask; for (i = IDE; i < MAX_PERF_CHN; i++) { if (!memcmp(type_guid->b, hp_devs[i].guid, @@ -408,7 +409,6 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui * channel, bind it to cpu 0. */ channel->numa_node = 0; - cpumask_set_cpu(0, &channel->alloced_cpus_in_node); channel->target_cpu = 0; channel->target_vp = hv_context.vp_index[0]; return; @@ -433,21 +433,22 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui channel->numa_node = next_node; primary = channel; } + alloced_mask = &hv_context.hv_numa_map[primary->numa_node]; - if (cpumask_weight(&primary->alloced_cpus_in_node) == + if (cpumask_weight(alloced_mask) == cpumask_weight(cpumask_of_node(primary->numa_node))) { /* * We have cycled through all the CPUs in the node; * reset the alloced map. */ - cpumask_clear(&primary->alloced_cpus_in_node); + cpumask_clear(alloced_mask); } - cpumask_xor(&available_mask, &primary->alloced_cpus_in_node, + cpumask_xor(&available_mask, alloced_mask, cpumask_of_node(primary->numa_node)); cur_cpu = cpumask_next(-1, &available_mask); - cpumask_set_cpu(cur_cpu, &primary->alloced_cpus_in_node); + cpumask_set_cpu(cur_cpu, alloced_mask); channel->target_cpu = cur_cpu; channel->target_vp = hv_context.vp_index[cur_cpu]; diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 41d8072..fd93cfd 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -332,6 +332,13 @@ int hv_synic_alloc(void) size_t ced_size = sizeof(struct clock_event_device); int cpu; + hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids, + GFP_ATOMIC); + if (hv_context.hv_numa_map == NULL) { + pr_err("Unable to allocate NUMA map\n"); + goto err; + } + for_each_online_cpu(cpu) { hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC); if (hv_context.event_dpc[cpu] == NULL) { @@ -345,6 +352,7 @@ int hv_synic_alloc(void) pr_err("Unable to allocate clock event device\n"); goto err; } + hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu); hv_context.synic_message_page[cpu] = @@ -393,6 +401,7 @@ void hv_synic_free(void) { int cpu; + kfree(hv_context.hv_numa_map); for_each_online_cpu(cpu) hv_synic_free_cpu(cpu); } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index cddc0c9..7bdef66 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -551,6 +551,11 @@ struct hv_context { * Support PV clockevent device. */ struct clock_event_device *clk_evt[NR_CPUS]; + /* + * To manage allocations in a NUMA node. + * Array indexed by numa node ID. + */ + struct cpumask *hv_numa_map; }; extern struct hv_context hv_context; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 54733d5..5a3df5a 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -699,7 +699,6 @@ struct vmbus_channel { /* * State to manage the CPU affiliation of channels. */ - struct cpumask alloced_cpus_in_node; int numa_node; /* * Support for sub-channels. For high performance devices, -- 1.7.4.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/