From: Lan Tianyu <tianyu....@microsoft.com> On the bare metal, enabling X2APIC mode requires interrupt remapping function which helps to deliver irq to cpu with 32-bit APIC ID. Hyper-V doesn't provide interrupt remapping function so far and Hyper-V MSI protocol already supports to deliver interrupt to the CPU whose virtual processor index is more than 255. IO-APIC interrupt still has 8-bit APIC ID limitation.
This patch is to add Hyper-V stub IOMMU driver in order to enable X2APIC mode successfully in Hyper-V Linux guest. The driver returns X2APIC interrupt remapping capability when X2APIC mode is available. Otherwise, it creates a Hyper-V irq domain to limit IO-APIC interrupts' affinity and make sure cpus assigned with IO-APIC interrupt have 8-bit APIC ID. Define 24 IO-APIC remapping entries because Hyper-V only expose one single IO-APIC and one IO-APIC has 24 pins according IO-APIC spec( https://pdos.csail.mit.edu/6.828/2016/readings/ia32/ioapic.pdf). Reviewed-by: Michael Kelley <mikel...@microsoft.com> Signed-off-by: Lan Tianyu <tianyu....@microsoft.com> --- Change since v4: - Fix the loop of scan cpu's APIC id Change since v3: - Make Hyper-V IOMMU as Hyper-V default driver - Fix hypervisor_is_type() input parameter - Check possible cpu numbers during scan 0~255 cpu's apic id. Change since v2: - Improve comment about why save IO-APIC entry in the irq chip data. - Some code improvement. - Improve statement in the IOMMU Kconfig. Change since v1: - Remove unused pr_fmt - Make ioapic_ir_domain as static variable - Remove unused variables cfg and entry in the hyperv_irq_remapping_alloc() - Fix comments --- drivers/iommu/Kconfig | 9 ++ drivers/iommu/Makefile | 1 + drivers/iommu/hyperv-iommu.c | 194 ++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/irq_remapping.c | 3 + drivers/iommu/irq_remapping.h | 1 + 5 files changed, 208 insertions(+) create mode 100644 drivers/iommu/hyperv-iommu.c diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 45d7021..6f07f3b 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -437,4 +437,13 @@ config QCOM_IOMMU help Support for IOMMU on certain Qualcomm SoCs. +config HYPERV_IOMMU + bool "Hyper-V x2APIC IRQ Handling" + depends on HYPERV + select IOMMU_API + default HYPERV + help + Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux + guests to run with x2APIC mode enabled. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index a158a68..8c71a15 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -32,3 +32,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o +obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c new file mode 100644 index 0000000..eb358e9 --- /dev/null +++ b/drivers/iommu/hyperv-iommu.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V stub IOMMU driver. + * + * Copyright (C) 2019, Microsoft, Inc. + * + * Author : Lan Tianyu <tianyu....@microsoft.com> + */ + +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/iommu.h> +#include <linux/module.h> + +#include <asm/hw_irq.h> +#include <asm/io_apic.h> +#include <asm/irq_remapping.h> +#include <asm/hypervisor.h> + +#include "irq_remapping.h" + +#ifdef CONFIG_IRQ_REMAP + +/* + * According 82093AA IO-APIC spec , IO APIC has a 24-entry Interrupt + * Redirection Table. Hyper-V exposes one single IO-APIC and so define + * 24 IO APIC remmapping entries. + */ +#define IOAPIC_REMAPPING_ENTRY 24 + +static cpumask_t ioapic_max_cpumask = { CPU_BITS_NONE }; +static struct irq_domain *ioapic_ir_domain; + +static int hyperv_ir_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + struct irq_data *parent = data->parent_data; + struct irq_cfg *cfg = irqd_cfg(data); + struct IO_APIC_route_entry *entry; + int ret; + + /* Return error If new irq affinity is out of ioapic_max_cpumask. */ + if (!cpumask_subset(mask, &ioapic_max_cpumask)) + return -EINVAL; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; + + entry = data->chip_data; + entry->dest = cfg->dest_apicid; + entry->vector = cfg->vector; + send_cleanup_vector(cfg); + + return 0; +} + +static struct irq_chip hyperv_ir_chip = { + .name = "HYPERV-IR", + .irq_ack = apic_ack_irq, + .irq_set_affinity = hyperv_ir_set_affinity, +}; + +static int hyperv_irq_remapping_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, + void *arg) +{ + struct irq_alloc_info *info = arg; + struct irq_data *irq_data; + struct irq_desc *desc; + int ret = 0; + + if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1) + return -EINVAL; + + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret < 0) + return ret; + + irq_data = irq_domain_get_irq_data(domain, virq); + if (!irq_data) { + irq_domain_free_irqs_common(domain, virq, nr_irqs); + return -EINVAL; + } + + irq_data->chip = &hyperv_ir_chip; + + /* + * If there is interrupt remapping function of IOMMU, setting irq + * affinity only needs to change IRTE of IOMMU. But Hyper-V doesn't + * support interrupt remapping function, setting irq affinity of IO-APIC + * interrupts still needs to change IO-APIC registers. But ioapic_ + * configure_entry() will ignore value of cfg->vector and cfg-> + * dest_apicid when IO-APIC's parent irq domain is not the vector + * domain.(See ioapic_configure_entry()) In order to setting vector + * and dest_apicid to IO-APIC register, IO-APIC entry pointer is saved + * in the chip_data and hyperv_irq_remapping_activate()/hyperv_ir_set_ + * affinity() set vector and dest_apicid directly into IO-APIC entry. + */ + irq_data->chip_data = info->ioapic_entry; + + /* + * Hypver-V IO APIC irq affinity should be in the scope of + * ioapic_max_cpumask because no irq remapping support. + */ + desc = irq_data_to_desc(irq_data); + cpumask_copy(desc->irq_common_data.affinity, &ioapic_max_cpumask); + + return 0; +} + +static void hyperv_irq_remapping_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + irq_domain_free_irqs_common(domain, virq, nr_irqs); +} + +static int hyperv_irq_remapping_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool reserve) +{ + struct irq_cfg *cfg = irqd_cfg(irq_data); + struct IO_APIC_route_entry *entry = irq_data->chip_data; + + entry->dest = cfg->dest_apicid; + entry->vector = cfg->vector; + + return 0; +} + +static struct irq_domain_ops hyperv_ir_domain_ops = { + .alloc = hyperv_irq_remapping_alloc, + .free = hyperv_irq_remapping_free, + .activate = hyperv_irq_remapping_activate, +}; + +static int __init hyperv_prepare_irq_remapping(void) +{ + struct fwnode_handle *fn; + int i; + + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) || + !x2apic_supported()) + return -ENODEV; + + fn = irq_domain_alloc_named_id_fwnode("HYPERV-IR", 0); + if (!fn) + return -ENOMEM; + + ioapic_ir_domain = + irq_domain_create_hierarchy(arch_get_ir_parent_domain(), + 0, IOAPIC_REMAPPING_ENTRY, fn, + &hyperv_ir_domain_ops, NULL); + + irq_domain_free_fwnode(fn); + + /* + * Hyper-V doesn't provide irq remapping function for + * IO-APIC and so IO-APIC only accepts 8-bit APIC ID. + * Cpu's APIC ID is read from ACPI MADT table and APIC IDs + * in the MADT table on Hyper-v are sorted monotonic increasingly. + * APIC ID reflects cpu topology. There maybe some APIC ID + * gaps when cpu number in a socket is not power of two. Prepare + * max cpu affinity for IOAPIC irqs. Scan cpu 0-255 and set cpu + * into ioapic_max_cpumask if its APIC ID is less than 256. + */ + for (i = min_t(unsigned int, num_possible_cpus() - 1, 255); i >= 0; i--) + if (cpu_physical_id(i) < 256) + cpumask_set_cpu(i, &ioapic_max_cpumask); + + return 0; +} + +static int __init hyperv_enable_irq_remapping(void) +{ + return IRQ_REMAP_X2APIC_MODE; +} + +static struct irq_domain *hyperv_get_ir_irq_domain(struct irq_alloc_info *info) +{ + if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) + return ioapic_ir_domain; + else + return NULL; +} + +struct irq_remap_ops hyperv_irq_remap_ops = { + .prepare = hyperv_prepare_irq_remapping, + .enable = hyperv_enable_irq_remapping, + .get_ir_irq_domain = hyperv_get_ir_irq_domain, +}; + +#endif diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index b94ebd4..81cf290 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -103,6 +103,9 @@ int __init irq_remapping_prepare(void) else if (IS_ENABLED(CONFIG_AMD_IOMMU) && amd_iommu_irq_ops.prepare() == 0) remap_ops = &amd_iommu_irq_ops; + else if (IS_ENABLED(CONFIG_HYPERV_IOMMU) && + hyperv_irq_remap_ops.prepare() == 0) + remap_ops = &hyperv_irq_remap_ops; else return -ENOSYS; diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 0afef6e..f8609e9 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -64,6 +64,7 @@ struct irq_remap_ops { extern struct irq_remap_ops intel_irq_remap_ops; extern struct irq_remap_ops amd_iommu_irq_ops; +extern struct irq_remap_ops hyperv_irq_remap_ops; #else /* CONFIG_IRQ_REMAP */ -- 2.7.4