[RFC 10/10] irqchip: Make versatile fpga irq driver a generic chip
This is an RFC patch to convert the versatile FPGA irq controller driver to use generic irq chip. It builds on the series that extends the generic chip code to allow a linear irq domain to contain one or more generic irq chips so that each interrupt controller doesn't need to hand code the generic chip setup. I've written this as a proof of concept to see if the new generic irq code does what it needs to. I had to extend it slightly to properly handle the valid mask used by the versatile FPGA driver. Tested on QEMU, but not on real hardware. Signed-off-by: Grant Likely Cc: Russell King Cc: Linus Walleij Cc: Thomas Gleixner --- drivers/irqchip/Kconfig | 1 + drivers/irqchip/irq-versatile-fpga.c | 104 +-- 2 files changed, 39 insertions(+), 66 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 4a33351..8765502 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -14,6 +14,7 @@ config ARM_VIC bool select IRQ_DOMAIN select MULTI_IRQ_HANDLER + select GENERIC_IRQ_CHIP config ARM_VIC_NR int diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c index 47a52ab..8c7097b 100644 --- a/drivers/irqchip/irq-versatile-fpga.c +++ b/drivers/irqchip/irq-versatile-fpga.c @@ -34,37 +34,18 @@ * @used_irqs: number of active IRQs on this controller */ struct fpga_irq_data { - void __iomem *base; - struct irq_chip chip; - u32 valid; struct irq_domain *domain; - u8 used_irqs; }; /* we cannot allocate memory when the controllers are initially registered */ static struct fpga_irq_data fpga_irq_devices[CONFIG_VERSATILE_FPGA_IRQ_NR]; static int fpga_irq_id; -static void fpga_irq_mask(struct irq_data *d) -{ - struct fpga_irq_data *f = irq_data_get_irq_chip_data(d); - u32 mask = 1 << d->hwirq; - - writel(mask, f->base + IRQ_ENABLE_CLEAR); -} - -static void fpga_irq_unmask(struct irq_data *d) -{ - struct fpga_irq_data *f = irq_data_get_irq_chip_data(d); - u32 mask = 1 << d->hwirq; - - writel(mask, f->base + IRQ_ENABLE_SET); -} - static void fpga_irq_handle(unsigned int irq, struct irq_desc *desc) { - struct fpga_irq_data *f = irq_desc_get_handler_data(desc); - u32 status = readl(f->base + IRQ_STATUS); + struct irq_domain *domain = irq_desc_get_handler_data(desc); + struct irq_chip_generic *gc = irq_get_domain_generic_chip(domain, 0); + u32 status = readl(gc->reg_base + IRQ_STATUS); if (status == 0) { do_bad_IRQ(irq, desc); @@ -74,7 +55,7 @@ static void fpga_irq_handle(unsigned int irq, struct irq_desc *desc) do { irq = ffs(status) - 1; status &= ~(1 << irq); - generic_handle_irq(irq_find_mapping(f->domain, irq)); + generic_handle_irq(irq_find_mapping(domain, irq)); } while (status); } @@ -85,11 +66,12 @@ static void fpga_irq_handle(unsigned int irq, struct irq_desc *desc) */ static int handle_one_fpga(struct fpga_irq_data *f, struct pt_regs *regs) { + struct irq_chip_generic *gc = irq_get_domain_generic_chip(f->domain, 0); int handled = 0; int irq; u32 status; - while ((status = readl(f->base + IRQ_STATUS))) { + while ((status = readl(gc->reg_base + IRQ_STATUS))) { irq = ffs(status) - 1; handle_IRQ(irq_find_mapping(f->domain, irq), regs); handled = 1; @@ -112,63 +94,53 @@ asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs) } while (handled); } -static int fpga_irqdomain_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hwirq) -{ - struct fpga_irq_data *f = d->host_data; - - /* Skip invalid IRQs, only register handlers for the real ones */ - if (!(f->valid & BIT(hwirq))) - return -EPERM; - irq_set_chip_data(irq, f); - irq_set_chip_and_handler(irq, &f->chip, - handle_level_irq); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); - return 0; -} - -static struct irq_domain_ops fpga_irqdomain_ops = { - .map = fpga_irqdomain_map, - .xlate = irq_domain_xlate_onetwocell, -}; - void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start, int parent_irq, u32 valid, struct device_node *node) { + struct irq_chip_generic *gc; struct fpga_irq_data *f; - int i; + int ret; if (fpga_irq_id >= ARRAY_SIZE(fpga_irq_devices)) { pr_err("%s: too few FPGA IRQ controllers, increase CONFIG_VERSATILE_FPGA_IRQ_NR\n", __func__); return; } f = &fpga_irq_devices[fpga_irq_id]; - f->base = base; - f->chip.name = name; - f->chip.irq_ack = fpga_irq_mask; - f->chip.irq_mask = fpga_
[RFC 09/10] irqdomain: remove irq_domain_generate_simple()
Nobody calls it; remove the function Signed-off-by: Grant Likely --- include/linux/irqdomain.h | 8 kernel/irq/irqdomain.c| 15 --- 2 files changed, 23 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index f9e8e06..fe7c57d 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -205,14 +205,6 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type); -#if defined(CONFIG_OF_IRQ) -extern void irq_domain_generate_simple(const struct of_device_id *match, - u64 phys_base, unsigned int irq_start); -#else /* CONFIG_OF_IRQ */ -static inline void irq_domain_generate_simple(const struct of_device_id *match, - u64 phys_base, unsigned int irq_start) { } -#endif /* !CONFIG_OF_IRQ */ - #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } #endif /* !CONFIG_IRQ_DOMAIN */ diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 80e9249..e47b356 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -741,18 +741,3 @@ const struct irq_domain_ops irq_domain_simple_ops = { .xlate = irq_domain_xlate_onetwocell, }; EXPORT_SYMBOL_GPL(irq_domain_simple_ops); - -#ifdef CONFIG_OF_IRQ -void irq_domain_generate_simple(const struct of_device_id *match, - u64 phys_base, unsigned int irq_start) -{ - struct device_node *node; - pr_debug("looking for phys_base=%llx, irq_start=%i\n", - (unsigned long long) phys_base, (int) irq_start); - node = of_find_matching_node_by_address(NULL, match, phys_base); - if (node) - irq_domain_add_legacy(node, 32, irq_start, 0, - &irq_domain_simple_ops, NULL); -} -EXPORT_SYMBOL_GPL(irq_domain_generate_simple); -#endif -- 1.8.1.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[RFC 08/10] irqdomain: Refactor irq_domain_associate_many()
Originally, irq_domain_associate_many() was designed to unwind the mapped irqs on a failure of any individual association. However, that proved to be a problem with certain IRQ controllers. Some of them only support a subset of irqs, and will fail when attempting to map a reserved IRQ. In those cases we want to map as many IRQs as possible, so instead it is better for irq_domain_associate_many() to make a best-effort attempt to map irqs, but not fail if any or all of them don't succeed. If a caller really cares about how many irqs got associated, then it should instead go back and check that all of the irqs is cares about were mapped. The original design open-coded the individual association code into the body of irq_domain_associate_many(), but with no longer needing to unwind associations, the code becomes simpler to split out irq_domain_associate() to contain the bulk of the logic, and irq_domain_associate_many() to be a simple loop wrapper. This patch also adds a new error check to the associate path to make sure it isn't called for an irq larger than the controller can handle, and adds locking so that the irq_domain_mutex is held while setting up a new association. Signed-off-by: Grant Likely --- include/linux/irqdomain.h | 22 +++--- kernel/irq/irqdomain.c| 185 +++--- 2 files changed, 101 insertions(+), 106 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index fd4b26f..f9e8e06 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -103,6 +103,7 @@ struct irq_domain { struct irq_domain_chip_generic *gc; /* reverse map data. The linear map gets appended to the irq_domain */ + irq_hw_number_t hwirq_max; unsigned int revmap_direct_max_irq; unsigned int revmap_size; struct radix_tree_root revmap_tree; @@ -110,8 +111,8 @@ struct irq_domain { }; #ifdef CONFIG_IRQ_DOMAIN -struct irq_domain *__irq_domain_add(struct device_node *of_node, - int size, int direct_max, +struct irq_domain *__irq_domain_add(struct device_node *of_node, int size, + irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, void *host_data); struct irq_domain *irq_domain_add_simple(struct device_node *of_node, @@ -140,14 +141,14 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(of_node, size, 0, ops, host_data); + return __irq_domain_add(of_node, size, size, 0, ops, host_data); } static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, unsigned int max_irq, const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(of_node, 0, max_irq, ops, host_data); + return __irq_domain_add(of_node, 0, max_irq, max_irq, ops, host_data); } static inline struct irq_domain *irq_domain_add_legacy_isa( struct device_node *of_node, @@ -166,14 +167,11 @@ static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node extern void irq_domain_remove(struct irq_domain *host); -extern int irq_domain_associate_many(struct irq_domain *domain, -unsigned int irq_base, -irq_hw_number_t hwirq_base, int count); -static inline int irq_domain_associate(struct irq_domain *domain, unsigned int irq, - irq_hw_number_t hwirq) -{ - return irq_domain_associate_many(domain, irq, hwirq, 1); -} +extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq, + irq_hw_number_t hwirq); +extern void irq_domain_associate_many(struct irq_domain *domain, + unsigned int irq_base, + irq_hw_number_t hwirq_base, int count); extern unsigned int irq_create_mapping(struct irq_domain *host, irq_hw_number_t hwirq); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 280b804..80e9249 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -35,8 +35,8 @@ static struct irq_domain *irq_default_domain; * register allocated irq_domain with irq_domain_register(). Returns pointer * to IRQ domain, or NULL on failure. */ -struct irq_domain *__irq_domain_add(struct device_node *of_node, - int size, int direct_max, +struct irq_domain *__irq_domain_add(struct device_node *of_node, int size, +
[RFC 06/10] irqdomain: Clean up aftermath of irq_domain refactoring
After refactoring the irqdomain code, there are a number of API functions that are merely empty wrappers around core code. Drop those wrappers out of the C file and replace them with static inlines in the header. Signed-off-by: Grant Likely --- arch/powerpc/platforms/cell/beat_interrupt.c | 2 +- arch/powerpc/platforms/powermac/smp.c| 2 +- include/linux/irqdomain.h| 31 +-- kernel/irq/irqdomain.c | 127 --- 4 files changed, 62 insertions(+), 100 deletions(-) diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c index 8c6dc42..9e5dfbc 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.c +++ b/arch/powerpc/platforms/cell/beat_interrupt.c @@ -239,7 +239,7 @@ void __init beatic_init_IRQ(void) ppc_md.get_irq = beatic_get_irq; /* Allocate an irq host */ - beatic_host = irq_domain_add_nomap(NULL, 0, &beatic_pic_host_ops, NULL); + beatic_host = irq_domain_add_nomap(NULL, ~0, &beatic_pic_host_ops, NULL); BUG_ON(beatic_host == NULL); irq_set_default_host(beatic_host); } diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index bdb738a..f921067 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -192,7 +192,7 @@ static int psurge_secondary_ipi_init(void) { int rc = -ENOMEM; - psurge_host = irq_domain_add_nomap(NULL, 0, &psurge_host_ops, NULL); + psurge_host = irq_domain_add_nomap(NULL, ~0, &psurge_host_ops, NULL); if (psurge_host) psurge_secondary_virq = irq_create_direct_mapping(psurge_host); diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 51ef84a..fd4b26f 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -110,6 +110,10 @@ struct irq_domain { }; #ifdef CONFIG_IRQ_DOMAIN +struct irq_domain *__irq_domain_add(struct device_node *of_node, + int size, int direct_max, + const struct irq_domain_ops *ops, + void *host_data); struct irq_domain *irq_domain_add_simple(struct device_node *of_node, unsigned int size, unsigned int first_irq, @@ -121,17 +125,30 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data); -struct irq_domain *irq_domain_add_linear(struct device_node *of_node, +extern struct irq_domain *irq_find_host(struct device_node *node); +extern void irq_set_default_host(struct irq_domain *host); + +/** + * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. + * @of_node: pointer to interrupt controller's device tree node. + * @size: Number of interrupts in the domain. + * @ops: map/unmap domain callbacks + * @host_data: Controller private data pointer + */ +static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node, unsigned int size, const struct irq_domain_ops *ops, -void *host_data); -struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, +void *host_data) +{ + return __irq_domain_add(of_node, size, 0, ops, host_data); +} +static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, unsigned int max_irq, const struct irq_domain_ops *ops, -void *host_data); -extern struct irq_domain *irq_find_host(struct device_node *node); -extern void irq_set_default_host(struct irq_domain *host); - +void *host_data) +{ + return __irq_domain_add(of_node, 0, max_irq, ops, host_data); +} static inline struct irq_domain *irq_domain_add_legacy_isa( struct device_node *of_node, const struct irq_domain_ops *ops, diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index c38be78..e0db59e 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -23,8 +23,11 @@ static DEFINE_MUTEX(revmap_trees_mutex); static struct irq_domain *irq_default_domain; /** - * irq_domain_alloc() - Allocate a new irq_domain data structure + * __irq_domain_add() - Allocate a new irq_domain data structure * @of_node: optional device-tree node of the interrupt controller + * @size: Size of linear map; 0 for radix mapping only + * @direct_max: Maximum value of direct maps; Use ~0 for no li
[RFC 07/10] irqdomain: Beef up debugfs output
This patch increases the amount of output produced by the irq_domain_mapping debugfs file by first listing all of the registered irq domains at the beginning of the output, and then by including all mapped IRQs in the output, not just the active ones. It is very useful when debugging irqdomain issues to be able to see the entire list of mapped irqs, not just the ones that happen to be connected to devices. Signed-off-by: Grant Likely --- kernel/irq/irqdomain.c | 35 ++- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index e0db59e..280b804 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -596,12 +596,29 @@ static int virq_debug_show(struct seq_file *m, void *private) { unsigned long flags; struct irq_desc *desc; - void *data; + struct irq_domain *domain; + struct radix_tree_iter iter; + void *data, **slot; int i; - seq_printf(m, "%-5s %-7s %-15s %-*s %s\n", "irq", "hwirq", + seq_printf(m, " %-16s %-6s %-10s %-10s %s\n", + "name", "mapped", "linear-max", "direct-max", "devtree-node"); + mutex_lock(&irq_domain_mutex); + list_for_each_entry(domain, &irq_domain_list, link) { + int count = 0; + radix_tree_for_each_slot(slot, &domain->revmap_tree, &iter, 0) + count++; + seq_printf(m, "%c%-16s %6u %10u %10u %s\n", + domain == irq_default_domain ? '*' : ' ', domain->name, + domain->revmap_size + count, domain->revmap_size, + domain->revmap_direct_max_irq, + domain->of_node ? of_node_full_name(domain->of_node) : ""); + } + mutex_unlock(&irq_domain_mutex); + + seq_printf(m, "%-5s %-7s %-15s %-*s %6s %-14s %s\n", "irq", "hwirq", "chip name", (int)(2 * sizeof(void *) + 2), "chip data", - "domain name"); + "active", "type", "domain"); for (i = 1; i < nr_irqs; i++) { desc = irq_to_desc(i); @@ -609,12 +626,15 @@ static int virq_debug_show(struct seq_file *m, void *private) continue; raw_spin_lock_irqsave(&desc->lock, flags); + domain = desc->irq_data.domain; - if (desc->action && desc->action->handler) { + if (domain) { struct irq_chip *chip; + int hwirq = desc->irq_data.hwirq; + bool direct; seq_printf(m, "%5d ", i); - seq_printf(m, "0x%05lx ", desc->irq_data.hwirq); + seq_printf(m, "0x%05x ", hwirq); chip = irq_desc_get_chip(desc); seq_printf(m, "%-15s ", (chip && chip->name) ? chip->name : "none"); @@ -622,6 +642,11 @@ static int virq_debug_show(struct seq_file *m, void *private) data = irq_desc_get_chip_data(desc); seq_printf(m, data ? "0x%p " : " %p ", data); + seq_printf(m, " %c", (desc->action && desc->action->handler) ? '*' : ' '); + direct = (i == hwirq) && (i < domain->revmap_direct_max_irq); + seq_printf(m, "%6s%-8s ", + (hwirq < domain->revmap_size) ? "LINEAR" : "RADIX", + direct ? "(DIRECT)" : ""); seq_printf(m, "%s\n", desc->irq_data.domain->name); } -- 1.8.1.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[RFC 05/10] irqdomain: Eliminate revmap type
The NOMAP irq_domain type is only used by a handful of interrupt controllers and it unnecessarily complicates the code by adding special cases on how to look up mappings and different revmap functions are used for each type which need to validate the correct type is passed to it before performing the reverse map. Eliminating the revmap_type and making a single reverse mapping function simplifies the code. It also shouldn't be any slower than having separate revmap functions because the type of the revmap needed to be checked anyway. The linear and tree revmap types were already merged in a previous patch. This patch rolls the NOMAP or direct mapping behaviour into the same domain code making is possible for an irq domain to do any mapping type; linear, tree or direct; and that the mapping will be transparent to the interrupt controller driver. With this change, direct mappings will get stored in the linear or tree mapping for consistency. Reverse mapping from the hwirq to virq will go through the normal lookup process. However, any controller using a direct mapping can take advantage of knowing that hwirq==virq for any mapped interrupts skip doing a revmap lookup when handling IRQs. Signed-off-by: Grant Likely --- include/linux/irqdomain.h | 48 + kernel/irq/generic-chip.c | 5 + kernel/irq/irqdomain.c| 55 +++ 3 files changed, 43 insertions(+), 65 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 1cbb741..51ef84a 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -73,50 +73,42 @@ struct irq_domain_chip_generic; /** * struct irq_domain - Hardware interrupt number translation object * @link: Element in global irq_domain list. - * @revmap_type: Method used for reverse mapping hwirq numbers to linux irq. This - * will be one of the IRQ_DOMAIN_MAP_* values. + * @name: Name of interrupt domain * @ops: pointer to irq_domain methods * @host_data: private data pointer for use by owner. Not touched by irq_domain * core code. - * @irq_base: Start of irq_desc range assigned to the irq_domain. The creator - *of the irq_domain is responsible for allocating the array of - *irq_desc structures. - * @nr_irq: Number of irqs managed by the irq domain - * @hwirq_base: Starting number for hwirqs managed by the irq domain - * @of_node: (optional) Pointer to device tree nodes associated with the - * irq_domain. Used when decoding device tree interrupt specifiers. + * + * Optional elements + * @of_node: Pointer to device tree nodes associated with the irq_domain. Used + * when decoding device tree interrupt specifiers. + * @gc: Pointer to a list of generic chips. There is a helper function for + * setting up one or more generic chips for interrupt controllers + * drivers using the generic chip library which uses this pointer. + * + * Revmap data, used internally by irq_domain + * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that + * support direct mapping + * @revmap_size: Size of the linear map table @linear_revmap[] + * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map + * @linear_revmap: Linear table of hwirq->virq reverse mappings */ struct irq_domain { struct list_head link; const char *name; - - /* type of reverse mapping_technique */ - unsigned int revmap_type; - struct { - struct { - unsigned int size; - } linear; - struct { - unsigned int max_irq; - } nomap; - struct radix_tree_root tree; - } revmap_data; const struct irq_domain_ops *ops; void *host_data; - irq_hw_number_t inval_irq; - /* Optional device node pointer */ + /* Optional data */ struct device_node *of_node; - /* Optional pointer to generic interrupt chips */ struct irq_domain_chip_generic *gc; - /* Linear reverse map */ + /* reverse map data. The linear map gets appended to the irq_domain */ + unsigned int revmap_direct_max_irq; + unsigned int revmap_size; + struct radix_tree_root revmap_tree; unsigned int linear_revmap[]; }; -#define IRQ_DOMAIN_MAP_NOMAP 1 /* no fast reverse mapping */ -#define IRQ_DOMAIN_MAP_LINEAR 2 /* linear map of interrupts */ - #ifdef CONFIG_IRQ_DOMAIN struct irq_domain *irq_domain_add_simple(struct device_node *of_node, unsigned int size, diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index ca98cc5..4b01106 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -270,10 +270,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, if (d->gc)
[RFC 04/10] irqdomain: merge linear and tree reverse mappings.
From: Grant Likely Keeping them separate makes irq_domain more complex and adds a lot of code (as proven by the diffstat). Merging them simplifies the whole scheme. This change makes it so both the tree and linear methods can be used by the same irq_domain instance. If the hwirq is less than the ->linear_size, then the linear map is used to reverse map the hwirq. Otherwise the radix tree is used. The test for which map to use is no more expensive that the existing code, so the performance of fast path is preserved. It also means that complex interrupt controllers can use both the linear map and a tree in the same domain. This may be useful for an interrupt controller with a base set of core irqs and a large number of GPIOs which might be used as irqs. The linear map could cover the core irqs, and the tree used for thas irqs. The linear map could cover the core irqs, and the tree used for the gpios. v2: Drop reorganization of revmap data Signed-off-by: Grant Likely Cc: Paul Mundt Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Rob Herring --- include/linux/irqdomain.h | 18 kernel/irq/irqdomain.c| 107 +- 2 files changed, 39 insertions(+), 86 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index e5e513c..1cbb741 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -75,7 +75,6 @@ struct irq_domain_chip_generic; * @link: Element in global irq_domain list. * @revmap_type: Method used for reverse mapping hwirq numbers to linux irq. This * will be one of the IRQ_DOMAIN_MAP_* values. - * @revmap_data: Revmap method specific data. * @ops: pointer to irq_domain methods * @host_data: private data pointer for use by owner. Not touched by irq_domain * core code. @@ -93,10 +92,9 @@ struct irq_domain { /* type of reverse mapping_technique */ unsigned int revmap_type; - union { + struct { struct { unsigned int size; - unsigned int *revmap; } linear; struct { unsigned int max_irq; @@ -111,11 +109,13 @@ struct irq_domain { struct device_node *of_node; /* Optional pointer to generic interrupt chips */ struct irq_domain_chip_generic *gc; + + /* Linear reverse map */ + unsigned int linear_revmap[]; }; #define IRQ_DOMAIN_MAP_NOMAP 1 /* no fast reverse mapping */ #define IRQ_DOMAIN_MAP_LINEAR 2 /* linear map of interrupts */ -#define IRQ_DOMAIN_MAP_TREE 3 /* radix tree */ #ifdef CONFIG_IRQ_DOMAIN struct irq_domain *irq_domain_add_simple(struct device_node *of_node, @@ -137,10 +137,6 @@ struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, unsigned int max_irq, const struct irq_domain_ops *ops, void *host_data); -struct irq_domain *irq_domain_add_tree(struct device_node *of_node, -const struct irq_domain_ops *ops, -void *host_data); - extern struct irq_domain *irq_find_host(struct device_node *node); extern void irq_set_default_host(struct irq_domain *host); @@ -152,6 +148,12 @@ static inline struct irq_domain *irq_domain_add_legacy_isa( return irq_domain_add_legacy(of_node, NUM_ISA_INTERRUPTS, 0, 0, ops, host_data); } +static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, +const struct irq_domain_ops *ops, +void *host_data) +{ + return irq_domain_add_linear(of_node, 0, ops, host_data); +} extern void irq_domain_remove(struct irq_domain *host); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index b1b5e67..5a1d8ec 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -34,22 +34,24 @@ static struct irq_domain *irq_default_domain; * to IRQ domain, or NULL on failure. */ static struct irq_domain *irq_domain_alloc(struct device_node *of_node, - unsigned int revmap_type, + unsigned int revmap_type, int size, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain *domain; - domain = kzalloc_node(sizeof(*domain), GFP_KERNEL, - of_node_to_nid(of_node)); + domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size), + GFP_KERNEL, of_node_to_nid(of_node)); if (WARN_ON(!domain)) return NULL; /* Fill structure */ + INIT_RADIX_TREE(&domain->revmap_data.tree, GFP
[RFC 03/10] irqdomain: Add a name field
This patch adds a name field to the irq_domain structure to help mere mortals understand the mappings between irq domains and virqs. It also converts a number of places that have open-coded some kind of fudging an irqdomain name to use the new field. This means a more consistent display of names in irq domain log messages and debugfs output. Signed-off-by: Grant Likely --- include/linux/irqdomain.h | 1 + kernel/irq/generic-chip.c | 1 + kernel/irq/irqdomain.c| 19 ++- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 6f06241..e5e513c 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -89,6 +89,7 @@ struct irq_domain_chip_generic; */ struct irq_domain { struct list_head link; + const char *name; /* type of reverse mapping_technique */ unsigned int revmap_type; diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 95575d8..ca98cc5 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -305,6 +305,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, /* Calc pointer to the next generic chip */ tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); } + d->name = name; return 0; } EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 1ac8cf4..b1b5e67 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -410,12 +410,15 @@ int irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, */ if (ret != -EPERM) { pr_info("%s didn't like hwirq-0x%lx to VIRQ%i mapping (rc=%d)\n", - of_node_full_name(domain->of_node), hwirq, virq, ret); + domain->name, hwirq, virq, ret); } irq_data->domain = NULL; irq_data->hwirq = 0; continue; } + /* If not already assigned, give the domain the chip's name */ + if (!domain->name && irq_data->chip) + domain->name = irq_data->chip->name; } switch (domain->revmap_type) { @@ -708,8 +711,6 @@ static int virq_debug_show(struct seq_file *m, void *private) { unsigned long flags; struct irq_desc *desc; - const char *p; - static const char none[] = "none"; void *data; int i; @@ -731,20 +732,12 @@ static int virq_debug_show(struct seq_file *m, void *private) seq_printf(m, "0x%05lx ", desc->irq_data.hwirq); chip = irq_desc_get_chip(desc); - if (chip && chip->name) - p = chip->name; - else - p = none; - seq_printf(m, "%-15s ", p); + seq_printf(m, "%-15s ", (chip && chip->name) ? chip->name : "none"); data = irq_desc_get_chip_data(desc); seq_printf(m, data ? "0x%p " : " %p ", data); - if (desc->irq_data.domain) - p = of_node_full_name(desc->irq_data.domain->of_node); - else - p = none; - seq_printf(m, "%s\n", p); + seq_printf(m, "%s\n", desc->irq_data.domain->name); } raw_spin_unlock_irqrestore(&desc->lock, flags); -- 1.8.1.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[RFC 02/10] irqdomain: Replace LEGACY mapping with LINEAR
The LEGACY mapping unnecessarily complicates the irqdomain code and can easily be implemented with a linear mapping. By ripping it out and replacing it with the LINEAR mapping the object size of irqdomain.c shrinks by about 330 bytes (ARMv7) which offsets the additional allocation required by the linear map. It also makes it possible for current LEGACY map users to pre-allocate irq_descs for a subset of the hwirqs and dynamically allocate the rest as needed. Signed-off-by: Grant Likely Cc: Paul Mundt Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Rob Herring --- include/linux/irqdomain.h | 7 kernel/irq/irqdomain.c| 84 --- 2 files changed, 6 insertions(+), 85 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index ba2c708..6f06241 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -95,11 +95,6 @@ struct irq_domain { union { struct { unsigned int size; - unsigned int first_irq; - irq_hw_number_t first_hwirq; - } legacy; - struct { - unsigned int size; unsigned int *revmap; } linear; struct { @@ -117,8 +112,6 @@ struct irq_domain { struct irq_domain_chip_generic *gc; }; -#define IRQ_DOMAIN_MAP_LEGACY 0 /* driver allocated fixed range of irqs. -* ie. legacy 8259, gets irqs 1..15 */ #define IRQ_DOMAIN_MAP_NOMAP 1 /* no fast reverse mapping */ #define IRQ_DOMAIN_MAP_LINEAR 2 /* linear map of interrupts */ #define IRQ_DOMAIN_MAP_TREE 3 /* radix tree */ diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 61d6d3c..1ac8cf4 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -82,13 +82,6 @@ void irq_domain_remove(struct irq_domain *domain) mutex_lock(&irq_domain_mutex); switch (domain->revmap_type) { - case IRQ_DOMAIN_MAP_LEGACY: - /* -* Legacy domains don't manage their own irq_desc -* allocations, we expect the caller to handle irq_desc -* freeing on their own. -*/ - break; case IRQ_DOMAIN_MAP_TREE: /* * radix_tree_delete() takes care of destroying the root @@ -122,17 +115,6 @@ void irq_domain_remove(struct irq_domain *domain) } EXPORT_SYMBOL_GPL(irq_domain_remove); -static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain, -irq_hw_number_t hwirq) -{ - irq_hw_number_t first_hwirq = domain->revmap_data.legacy.first_hwirq; - int size = domain->revmap_data.legacy.size; - - if (WARN_ON(hwirq < first_hwirq || hwirq >= first_hwirq + size)) - return 0; - return hwirq - first_hwirq + domain->revmap_data.legacy.first_irq; -} - /** * irq_domain_add_simple() - Allocate and register a simple irq_domain. * @of_node: pointer to interrupt controller's device tree node. @@ -213,57 +195,17 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, void *host_data) { struct irq_domain *domain; - unsigned int i; - domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LEGACY, ops, host_data); + pr_debug("Setting up legacy domain virq[%i:%i] ==> hwirq[%i:%i]\n", +first_irq, first_irq + size - 1, +(int)first_hwirq, (int)first_hwirq + size -1); + + domain = irq_domain_add_linear(of_node, first_hwirq + size, ops, host_data); if (!domain) return NULL; - domain->revmap_data.legacy.first_irq = first_irq; - domain->revmap_data.legacy.first_hwirq = first_hwirq; - domain->revmap_data.legacy.size = size; - - mutex_lock(&irq_domain_mutex); - /* Verify that all the irqs are available */ - for (i = 0; i < size; i++) { - int irq = first_irq + i; - struct irq_data *irq_data = irq_get_irq_data(irq); - - if (WARN_ON(!irq_data || irq_data->domain)) { - mutex_unlock(&irq_domain_mutex); - irq_domain_free(domain); - return NULL; - } - } + WARN_ON(irq_domain_associate_many(domain, first_irq, first_hwirq, size)); - /* Claim all of the irqs before registering a legacy domain */ - for (i = 0; i < size; i++) { - struct irq_data *irq_data = irq_get_irq_data(first_irq + i); - irq_data->hwirq = first_hwirq + i; - irq_data->domain = domain; - } - mutex_unlock(&irq_domain_mutex); - - for (i = 0; i < size; i++) { - int irq = first_irq + i; - int hwirq = first_hwirq + i; - - /* IRQ0 gets ignored
[RFC 01/10] irqdomain: Relax failure path on setting up mappings
Commit 98aa468e, "irqdomain: Support for static IRQ mapping and association" introduced an API for directly associating blocks of hwirqs to linux irqs. However, if any irq in that block failed to map (say if the mapping functions returns an error because the irq is already mapped) then the whole thing will fail and roll back. This is probably too aggressive since there are valid reasons why a mapping may fail. ie. Firmware may have a particular IRQ marked as unusable. This patch drops the error path out of irq_domain_associate(). If a mapping fails, then it is simply skipped. There is no reason to fail the entire allocation. v2: Still output an information message on failed mappings and make sure attempted mapping gets cleared out of the irq_data structure. Signed-off-by: Grant Likely Cc: Paul Mundt Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner --- kernel/irq/irqdomain.c | 16 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 20b677d..61d6d3c 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -464,23 +464,15 @@ int irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, /* * If map() returns -EPERM, this interrupt is protected * by the firmware or some other service and shall not -* be mapped. -* -* Since on some platforms we blindly try to map everything -* we end up with a log full of backtraces. -* -* So instead, we silently fail on -EPERM, it is the -* responsibility of the PIC driver to display a relevant -* message if needed. +* be mapped. Don't bother telling the user about it. */ if (ret != -EPERM) { - pr_err("irq-%i==>hwirq-0x%lx mapping failed: %d\n", - virq, hwirq, ret); - WARN_ON(1); + pr_info("%s didn't like hwirq-0x%lx to VIRQ%i mapping (rc=%d)\n", + of_node_full_name(domain->of_node), hwirq, virq, ret); } irq_data->domain = NULL; irq_data->hwirq = 0; - goto err_unmap; + continue; } } -- 1.8.1.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
PowerPC assembler question
Hi all, I'm trying to fix a problem in the PowerPC backend of the Glasgow Haskell Compiler (GHC) and have a problem with the following instruction form: lwz 30, .label - (1b)(31) Reading the documentation I could find, I have figured out that this loads a 16 bit value into register 30. How it calculates that 16 bit value has got me somewhat flumoxed. Anybody care to explain? Cheers, Erik -- -- Erik de Castro Lopo http://www.mega-nerd.com/ ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[RFC 00/10] Refactor irqdomain
I've done a bunch of refactoring work on the irq_domain infrastructure. Some of these patches I've posted before, and some our brand new. The goal of this is to greatly simplify how irq_domains work. With this series, instead of there being multiple different types of irq domains, each with different mapping rules, instead there is now only one time of irq_domain that contains both kinds of map; the linear map for irqs below a certain value, and the radix tree for large & sparse irq controllers. As you can see from the following diffstat, the result is a fair bit less code. It should make it easier to understand irqdomains too. arch/powerpc/platforms/cell/beat_interrupt.c | 2 +- arch/powerpc/platforms/powermac/smp.c| 2 +- drivers/irqchip/Kconfig | 1 + drivers/irqchip/irq-versatile-fpga.c | 104 -- include/linux/irqdomain.h| 123 ++- kernel/irq/generic-chip.c| 6 +- kernel/irq/irqdomain.c | 555 -- 7 files changed, 282 insertions(+), 511 deletions(-) I've pushed this series out to my git server at the following branch: git://git.secretlab.ca/git/linux irqdomain/next It depends on the tip tree's irq/for-arm branch and also Linus' mainline (they need to be merged). The branch above includes both. I've tested this on ARM qemu models, but not much else. I'll test on real hardware before pushing out, but I would appreciate anybody doing additional testing, particularly on PowerPC and other non-ARM platforms. Cheers, g. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V3 1/2] powerpc, perf: Ignore separate BHRB privilege state filter request
Completely ignore BHRB privilege state filter request as we are already configuring that with privilege state filtering attribute for the accompanying PMU event. This would help achieve cleaner user space interaction for BHRB. This patch fixes a situation like this Before patch:- ./perf record -j any -e branch-misses:k ls Error: The sys_perf_event_open() syscall returned with 95 (Operation not supported) for event (branch-misses:k). /bin/dmesg may provide additional information. No CONFIG_PERF_EVENTS=y kernel support configured? Here 'perf record' actually copies over ':k' filter request into BHRB privilege state filter config and our previous check in kernel would fail that. After patch:- - ./perf record -j any -e branch-misses:k ls perf perf.data perf.data.old test-mmap-ring [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (~102 samples)] Signed-off-by: Anshuman Khandual --- arch/powerpc/perf/power8-pmu.c | 13 - 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index f7d1c4f..371c6e7 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -523,18 +523,13 @@ static int power8_generic_events[] = { static u64 power8_bhrb_filter_map(u64 branch_sample_type) { u64 pmu_bhrb_filter = 0; - u64 br_privilege = branch_sample_type & ONLY_PLM; - /* BHRB and regular PMU events share the same prvillege state + /* BHRB and regular PMU events share the same privilege state * filter configuration. BHRB is always recorded along with a -* regular PMU event. So privilege state filter criteria for BHRB -* and the companion PMU events has to be the same. As a default -* "perf record" tool sets all privillege bits ON when no filter -* criteria is provided in the command line. So as along as all -* privillege bits are ON or they are OFF, we are good to go. +* regular PMU event. As the privilege state filter is handled +* in the basic PMC configuration of the accompanying regular +* PMU event, we ignore any separate BHRB specific request. */ - if ((br_privilege != 7) && (br_privilege != 0)) - return -1; /* No branch filter requested */ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) -- 1.7.11.7 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V3 2/2] powerpc, perf: BHRB filter configuration should follow the task
When the task moves around the system, the corresponding cpuhw per cpu strcuture should be popullated with the BHRB filter request value so that PMU could be configured appropriately with that during the next call into power_pmu_enable(). Signed-off-by: Anshuman Khandual --- arch/powerpc/perf/core-book3s.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 426180b..48c68a8 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1122,8 +1122,11 @@ nocheck: ret = 0; out: - if (has_branch_stack(event)) + if (has_branch_stack(event)) { power_pmu_bhrb_enable(event); + cpuhw->bhrb_filter = ppmu->bhrb_filter_map( + event->attr.branch_sample_type); + } perf_pmu_enable(event->pmu); local_irq_restore(flags); -- 1.7.11.7 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V3 0/2] Improvement and fixes for BHRB
(1) The first patch fixes a situation like this Before patch:- ./perf record -j any -e branch-misses:k ls Error: The sys_perf_event_open() syscall returned with 95 (Operation not supported) for event (branch-misses:k). /bin/dmesg may provide additional information. No CONFIG_PERF_EVENTS=y kernel support configured? Here 'perf record' actually copies over ':k' filter request into BHRB privilege state filter config and our previous check in kernel would fail that. After patch:- - /perf record -j any -e branch-misses:k ls perf perf.data perf.data.old test-mmap-ring [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (~102 samples) ] (2) The second patch fixes context migration for BHRB filter configuration Changes in V2 - (1) Updated the comment section of the code (2) Removed the informational print (3) Updated the commit section of the first patch Changes in V3 - (1) Fixed the compilation warning problem after removing the unused variable Anshuman Khandual (2): powerpc, perf: Ignore separate BHRB privilege state filter request powerpc, perf: BHRB filter configuration should follow the task arch/powerpc/perf/core-book3s.c | 5 - arch/powerpc/perf/power8-pmu.c | 13 - 2 files changed, 8 insertions(+), 10 deletions(-) -- 1.7.11.7 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V3 1/2] powerpc, perf: Ignore separate BHRB privilege state filter request
Completely ignore BHRB privilege state filter request as we are already configuring that with privilege state filtering attribute for the accompanying PMU event. This would help achieve cleaner user space interaction for BHRB. This patch fixes a situation like this Before patch:- ./perf record -j any -e branch-misses:k ls Error: The sys_perf_event_open() syscall returned with 95 (Operation not supported) for event (branch-misses:k). /bin/dmesg may provide additional information. No CONFIG_PERF_EVENTS=y kernel support configured? Here 'perf record' actually copies over ':k' filter request into BHRB privilege state filter config and our previous check in kernel would fail that. After patch:- - ./perf record -j any -e branch-misses:k ls perf perf.data perf.data.old test-mmap-ring [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (~102 samples)] Signed-off-by: Anshuman Khandual --- arch/powerpc/perf/power8-pmu.c | 13 - 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index f7d1c4f..371c6e7 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -523,18 +523,13 @@ static int power8_generic_events[] = { static u64 power8_bhrb_filter_map(u64 branch_sample_type) { u64 pmu_bhrb_filter = 0; - u64 br_privilege = branch_sample_type & ONLY_PLM; - /* BHRB and regular PMU events share the same prvillege state + /* BHRB and regular PMU events share the same privilege state * filter configuration. BHRB is always recorded along with a -* regular PMU event. So privilege state filter criteria for BHRB -* and the companion PMU events has to be the same. As a default -* "perf record" tool sets all privillege bits ON when no filter -* criteria is provided in the command line. So as along as all -* privillege bits are ON or they are OFF, we are good to go. +* regular PMU event. As the privilege state filter is handled +* in the basic PMC configuration of the accompanying regular +* PMU event, we ignore any separate BHRB specific request. */ - if ((br_privilege != 7) && (br_privilege != 0)) - return -1; /* No branch filter requested */ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) -- 1.7.11.7 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V3 2/2] powerpc, perf: BHRB filter configuration should follow the task
When the task moves around the system, the corresponding cpuhw per cpu strcuture should be popullated with the BHRB filter request value so that PMU could be configured appropriately with that during the next call into power_pmu_enable(). Signed-off-by: Anshuman Khandual --- arch/powerpc/perf/core-book3s.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 426180b..48c68a8 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1122,8 +1122,11 @@ nocheck: ret = 0; out: - if (has_branch_stack(event)) + if (has_branch_stack(event)) { power_pmu_bhrb_enable(event); + cpuhw->bhrb_filter = ppmu->bhrb_filter_map( + event->attr.branch_sample_type); + } perf_pmu_enable(event->pmu); local_irq_restore(flags); -- 1.7.11.7 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH V3 0/2] Improvement and fixes for BHRB
(1) The first patch fixes a situation like this Before patch:- ./perf record -j any -e branch-misses:k ls Error: The sys_perf_event_open() syscall returned with 95 (Operation not supported) for event (branch-misses:k). /bin/dmesg may provide additional information. No CONFIG_PERF_EVENTS=y kernel support configured? Here 'perf record' actually copies over ':k' filter request into BHRB privilege state filter config and our previous check in kernel would fail that. After patch:- - /perf record -j any -e branch-misses:k ls perf perf.data perf.data.old test-mmap-ring [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (~102 samples) ] (2) The second patch fixes context migration for BHRB filter configuration Changes in V2 - (1) Updated the comment section of the code (2) Removed the informational print (3) Updated the commit section of the first patch Changes in V3 - (1) Fixed the compilation warning problem after removing the unused variable Anshuman Khandual (2): powerpc, perf: Ignore separate BHRB privilege state filter request powerpc, perf: BHRB filter configuration should follow the task arch/powerpc/perf/core-book3s.c | 5 - arch/powerpc/perf/power8-pmu.c | 13 - 2 files changed, 8 insertions(+), 10 deletions(-) -- 1.7.11.7 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [git pull] Please pull powerpc.git merge branch
On Sun, Jun 9, 2013 at 9:20 PM, Jeremy Kerr wrote: > > So, we now use the original date header (if present) in the mbox views: > > $ wget -qO - http://patchwork.ozlabs.org/patch/249598/mbox/ | grep ^Date > Date: Fri, 7 Jun 2013 15:42:54 +1000 > > ... for all your data-mining needs. Goodie, and I see that it even works with old patches. Thanks. Linus ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [git pull] Please pull powerpc.git merge branch
Hi Linus, > No. The date from the email was > > Date: Fri, 7 Jun 2013 15:42:54 +1000 > > and we want *that* date. Ah, gotchya. So, we now use the original date header (if present) in the mbox views: $ wget -qO - http://patchwork.ozlabs.org/patch/249598/mbox/ | grep ^Date Date: Fri, 7 Jun 2013 15:42:54 +1000 ... for all your data-mining needs. Cheers, Jeremy ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc: Partial revert of "Context switch more PMU related SPRs"
On 06/06/2013 09:33 AM, Michael Ellerman wrote: > In commit 59affcd I added context switching of more PMU SPRs, because > they are potentially exposed to userspace on Power8. However despite me > being a smart arse in the commit message it's actually not correct. In > particular it interacts badly with a global perf record. Could you please explain how it would interact badly with a global perf record. If any user space try to mess around with the MMCR* registers itself, it would definitely interfere with the kernel's own PMU config going on during perf record. But thats how it works. So when we are actually dealing with MMCR* registers directly, we should not invoke "perf record" session which can potential run on the same PMU. Regards Anshuman ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [git pull] Please pull powerpc.git merge branch
On Sun, Jun 9, 2013 at 8:20 PM, Linus Torvalds wrote: > > .. the rationale for this is that the work pattern of people is > actually interesting information. You can do things like this: > > git log --pretty=%aD --author=Torvalds Final side note: for me, and other git users that apply other peoples patches, it's probably better to use git log --pretty=%cD --committer=Torvalds instead. Interestingly, that shows a different pattern than my "authorship" statistics, which are mainly pull requests. It turns out I commit patches much more in the afternoon. The reason is probably simple: in the mornings, I have pull requests waiting from overnight, so a fair number of pull requests where I am author at 9-11. But my biggest source of patches tends to be Andrew Morton, who sends the patches in the afternoon, so suddenly the commit counts skew towards being between 3pm-8pm when you take all my commits into accoint. Doing git log --since=6.months --pretty=%aD --grep=Signed.*Andrew.Morton backs that up: most of the commits that have sign-offs by Andrew are sent in the afternoon. I just find details like that really interesting, where you can actually mine for the workpatterns of people. Linus ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [git pull] Please pull powerpc.git merge branch
On Sun, Jun 9, 2013 at 8:06 PM, Linus Torvalds wrote: > > And it does matter. .. the rationale for this is that the work pattern of people is actually interesting information. You can do things like this: git log --pretty=%aD --author=Torvalds to see what my work pattern is, and I think that's *interesting*. Gathering statistics like whether people are generally doing 9-5 Mon-Fri is actually interesting data. You can do things like this: git log --since=6.months --pretty=%aD --author=Torvalds | cut -c1-3 | sort | uniq -c | sort -n and see (for example) that I do slow down on weekends. Same goes for things like what time of day ends up being most productive. You can do the statistics for me, and see that I tend to do the bulk of my pulls in the mornings (peak between 9-11) and that I'm not a night-owl (*big* drop-off after 8PM - that's what kids do to you). You can see a few really early-morning cases, but I suspect they were when I was jetlagged. So the date data is actually meaningful data. It's not just random noise. And to do these kinds of things, you absolutely have to have local-time with proper timezone information. Anything that screws that up is *broken*. git gets this right, unlike a lot of other broken SCM's. Git gets it right for a reason. Yeah, yeah, when people forward other peoples patches they often drop the date field, and the date of the patch ends up being the time that the last version of the patch got sent rather than anything else, so many of the statistics aren't valid. But a _tool_ that actively corrupts the date and time of a patch is just broken. Linus ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [git pull] Please pull powerpc.git merge branch
On Sun, Jun 9, 2013 at 7:44 PM, Jeremy Kerr wrote: > > We keep all patch dates in UTC, but were generating the Date header > incorrectly. Now fixed: No, not fixed. Keeping patch dates in UTC *corrupts* the date. I'll ask people to stop using patchworks if it cannot keep track of emailed dates. The date very much is a local time WITH A TIMEZONE. And it does matter. > $ wget -qO - http://patchwork.ozlabs.org/patch/249598/mbox/ | grep ^Date > Date: Fri, 07 Jun 2013 05:42:54 - No. The date from the email was Date: Fri, 7 Jun 2013 15:42:54 +1000 and we want *that* date. Not some random date that patchwork makes up that has no relevance. I know you have that date, because it shows up when asking for the headers in patchwork. Just use the right one, don't make up incorrect ones. Linus ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [git pull] Please pull powerpc.git merge branch
Hi Linus, > Is Jeremy the patchwork maintainer? Yep, that's me. > and it turns out that apparently 'patchwork' is just making up random > times, because when you download the email as an mbox, it will turn > this into that corrupt and incorrect > > Date: Thu, 06 Jun 2013 19:42:54 - > > thing which is apparently how you got the wrong timestamp to begin with. We keep all patch dates in UTC, but were generating the Date header incorrectly. Now fixed: $ wget -qO - http://patchwork.ozlabs.org/patch/249598/mbox/ | grep ^Date Date: Fri, 07 Jun 2013 05:42:54 - Commit is at: http://git.ozlabs.org/?p=patchwork;a=commitdiff;h=e7353352 Cheers, Jeremy ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: PowerPC assembler question
On Mon, 2013-06-10 at 09:01 +1000, Erik de Castro Lopo wrote: > Hi all, > > I'm trying to fix a problem in the PowerPC backend of the Glasgow > Haskell Compiler (GHC) and have a problem with the following > instruction form: > > lwz 30, .label - (1b)(31) > > Reading the documentation I could find, I have figured out that this > loads a 16 bit value into register 30. How it calculates that 16 bit > value has got me somewhat flumoxed. > > Anybody care to explain? No, this loads a 32-bit value (16-bit would be lhz). Note: It's more readable if you use the register names, ie: lwz %r30, .label - (1b)(%r31) The form of lwz is lwz dest_reg, offset(address_reg) So it will load a 32-bit value from memory at the address contained in r31 offset by ".label - 1b" which is itself the difference between two labels, "label", and the first "1:" label before the instruction (gcc supports numeric labels that can be referenced with the suffix "b" for backward and "f" for forward which are handy for small displacements) So for example if 1: was the base of the structure and .label a field in the structure, it would load the 32-bit value of that field for the structure instance starting at %r31. In this case, this looks more like some kind of position-independent code though. Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
PowerPC assembler question
Hi all, I'm trying to fix a problem in the PowerPC backend of the Glasgow Haskell Compiler (GHC) and have a problem with the following instruction form: lwz 30, .label - (1b)(31) Reading the documentation I could find, I have figured out that this loads a 16 bit value into register 30. How it calculates that 16 bit value has got me somewhat flumoxed. Anybody care to explain? Cheers, Erik -- -- Erik de Castro Lopo http://www.mega-nerd.com/ ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[git pull] Please pull powerpc.git merge branch
Hi Linus ! Here's the previous pull request with a couple of commits removed, this is purely regressions (though not all recent ones) or stable material. Cheers, Ben. The following changes since commit 1612e111e4e565422242727efb59499cce8738e4: Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2013-06-06 18:09:05 -0700) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git merge for you to fetch changes up to b11ae95100f7061b39a15e5c1ecbf862464ac4b4: powerpc: Partial revert of "Context switch more PMU related SPRs" (2013-06-10 08:36:35 +1000) Gavin Shan (1): powerpc/eeh: Don't check RTAS token to get PE addr Kevin Hao (1): powerpc/pci: Check the bus address instead of resource address in pcibios_fixup_resources Michael Ellerman (2): powerpc/perf: Fix deadlock caused by calling printk() in PMU exception powerpc: Partial revert of "Context switch more PMU related SPRs" Michael Neuling (4): powerpc/power8: Fix oprofile and perf powerpc/pseries: Simplify denormalization handler powerpc/power8: Update denormalization handler powerpc/hw_breakpoints: Add DABRX cpu feature to fix 32-bit regression arch/powerpc/include/asm/cputable.h | 17 +++-- arch/powerpc/kernel/cputable.c |8 +-- arch/powerpc/kernel/entry_64.S | 28 arch/powerpc/kernel/exceptions-64s.S | 90 -- arch/powerpc/kernel/pci-common.c |4 +- arch/powerpc/kernel/process.c|3 +- arch/powerpc/perf/core-book3s.c |2 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 12 ++-- 8 files changed, 51 insertions(+), 113 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [git pull] Please pull powerpc.git merge branch
On Sun, 2013-06-09 at 14:33 -0700, Linus Torvalds wrote: > On Sun, Jun 9, 2013 at 12:56 AM, Benjamin Herrenschmidt > wrote: > > > > Here are a few more powerpc changes for 3.10. I've merged your > > tree in at some point (which I generally avoid) in order to get > > the compat network fixes as soon as possible. > > > > Mostly regressions, and stuff I judged could/should still go in at > > this stage. > > Not pulled, because your hamster smells of eldeberries. > > This is not just bugfixes. In fact, as far as I can tell, this > *introduces* bugs, with that "get_user()" in the exception path that > can apparently happen with irqs disabled and will thus potentially > result in new warnings that just make things unreadable. Ah right, brown paper bag for Anton and I :( I shouldn't have put that one in, it was a last minute bad decision after spending time tracking another stupid sigill in userspace (this time FSL CPUs not implementing some optional instructions that Fedora compiler seems configured to generate nowadays). > I'm f*cking tired of people having problems understanding "we're past > rc5". If it's not something you would call stable material, you > shouldn't send it to me. I've taken out that commit and the rename of the PMU interrupt (which while trivial probably wasn't important enough). Everything else is regressions/stable material. That does mean I rebased, but normally nobody bases on that merge branch so it should be fine. I'll send a new pull request. Cheers, Ben. > Linus > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [git pull] Please pull powerpc.git merge branch
[ Is Jeremy the patchwork maintainer? If not, can people forward this to the real maintainer? ] On Sun, Jun 9, 2013 at 2:33 PM, Linus Torvalds wrote: > > This is not just bugfixes. In fact, as far as I can tell, this > *introduces* bugs, with that "get_user()" in the exception path that > can apparently happen with irqs disabled and will thus potentially > result in new warnings that just make things unreadable. Looking at that particular commit, I also notice that the commit itself is buggered in other ways too. It says: Date: Thu Jun 6 19:42:54 2013 + which surprised me due to the odd timezone, and it turns out it is pure and utter crap. Google finds the patch in patchwork, and "Show headers" there shows the expected timezone Date: Fri, 7 Jun 2013 15:42:54 +1000 and it turns out that apparently 'patchwork' is just making up random times, because when you download the email as an mbox, it will turn this into that corrupt and incorrect Date: Thu, 06 Jun 2013 19:42:54 - thing which is apparently how you got the wrong timestamp to begin with. That odd time doesn't even make any sense that I can see, because those two times have absolutely nothing in common afaik. Unless timezones work differently down under. It looks like some west-coast local time, but then it says "-" which is code for "I have no f*cking clue what I'm doing". Just to make things extra exciting, patchwork actually shows yet *another* date string when you just look at the patch in the web interface: Date June 7, 2013, 5:42 a.m. and that actually seems to be the *correct* UTC version of that original email date. I have no idea what that "Thu, 06 Jun 2013 19:42:54 -" date is, and where it came from. But it is utter shite. Can somebody please make sure that patchwork doesn't destroy timezone/date information? I'm assuming this has been going on forever, and I just noticed because I looked at that particular commit for other reasons, and went "Is Anton in Europe now?". Linus ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [git pull] Please pull powerpc.git merge branch
On Sun, Jun 9, 2013 at 12:56 AM, Benjamin Herrenschmidt wrote: > > Here are a few more powerpc changes for 3.10. I've merged your > tree in at some point (which I generally avoid) in order to get > the compat network fixes as soon as possible. > > Mostly regressions, and stuff I judged could/should still go in at > this stage. Not pulled, because your hamster smells of eldeberries. This is not just bugfixes. In fact, as far as I can tell, this *introduces* bugs, with that "get_user()" in the exception path that can apparently happen with irqs disabled and will thus potentially result in new warnings that just make things unreadable. I'm f*cking tired of people having problems understanding "we're past rc5". If it's not something you would call stable material, you shouldn't send it to me. Linus ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads access to kernel space
Sorry, Please ignore this patch. This is replaced. Replace by: http://patchwork.ozlabs.org/patch/250033/ - dongsheng > -Original Message- > From: Wang Dongsheng-B40534 > Sent: Sunday, June 09, 2013 1:23 PM > To: b...@kernel.crashing.org; johan...@sipsolutions.net; an...@enomsg.org > Cc: Wood Scott-B07421; ga...@kernel.crashing.org; linuxppc- > d...@lists.ozlabs.org; Wang Dongsheng-B40534 > Subject: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads access > to kernel space > > If PID is used in the TLB, after hibernation resume, the user > threads will access to kernel space. > > We must restore PID register, because TLB will use PID. The > hibernation suspend flow is trapped from user space to kernel > space, the PID register is user thread pid. > > The hibernation resume is begin in kernel start flow, the PID > alway 0. After the kernel thread back to user thread, there is > not have context switch and the pid can not update, because the > kernel thread is trapped form user space. So if we did't restore > PID the user space of thread will be addressing in the kernel > space. > > There are two ways to restore PID: > 1/ In swsusp_arch_suspend/swsusp_arch_resume, save/resotre PID register. > 2/ Form restore_processor_state to restore. this function will >do context switch. >switch_mmu_context(current->active_mm, current->active_mm) > > PPC32 Using the second method. For consistency reason, PPC64 using > the same way. > > Signed-off-by: Wang Dongsheng > --- > arch/powerpc/kernel/swsusp.c | 2 -- > 1 file changed, 2 deletions(-) > > diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c > index eae33e1..1930e44 100644 > --- a/arch/powerpc/kernel/swsusp.c > +++ b/arch/powerpc/kernel/swsusp.c > @@ -32,7 +32,5 @@ void save_processor_state(void) > > void restore_processor_state(void) > { > -#ifdef CONFIG_PPC32 > switch_mmu_context(current->active_mm, current->active_mm); > -#endif > } > -- > 1.8.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: [PATCH v2 1/2] powerpc: add Book E support to 64-bit hibernation
Sorry, Please ignore this patch. This is replaced. Replace by: http://patchwork.ozlabs.org/patch/250032/ - dongsheng > -Original Message- > From: Wang Dongsheng-B40534 > Sent: Sunday, June 09, 2013 1:21 PM > To: johan...@sipsolutions.net; an...@enomsg.org; Wood Scott-B07421 > Cc: ga...@kernel.crashing.org; linuxppc-dev@lists.ozlabs.org; Wang > Dongsheng-B40534 > Subject: [PATCH v2 1/2] powerpc: add Book E support to 64-bit hibernation > > Update the 64-bit hibernation code to support Book E CPUs. > Some registers and instructions are not defined for Book3e > (SDR reg, tlbia instruction). > > SDR: Storage Description Register. Book3S and Book3E have different > address translation mode, we do not need HTABORG & HTABSIZE to > translate virtual address to real address. > > More registers are saved in BookE-64bit.(TCR, SPRGx, ...) > > Signed-off-by: Wang Dongsheng > --- > v2: > * Add: _tlbil_all > * > * The boot core get a virtual address, when the boot process, > * the virtual address corresponds to a physical address. After > * hibernation resume memory snapshots, The corresponding > * relationship between the virtual memory and physical memory > * might change again. We need to get a new page table. So we > * need to invalidate TLB after resume pages. > * > * Invalidations TLB Using tlbilx/tlbivax/MMUCSR0. > * tlbilx used here. > * > * Add: save/restore PID > * > * We must restore PID register, because TLB will use PID. The > * hibernation suspend flow is trapped from user space to kernel > * space, the PID register is user thread pid. > * > * The hibernation resume is begin in kernel start flow, the PID > * alway 0. After the kernel thread back to user thread, there is > * not have context switch and the pid can not update, because the > * kernel thread is trapped form user space. So if we did't restore > * PID the user space of thread will be addressing in the kernel > * space. > * > * There are two ways to restore PID: > * 1/ In this file save/resotre PID register. > * 2/ Form restore_processor_state to restore. this function will > *do context switch. > *switch_mmu_context(current->active_mm, current->active_mm) > * > * PPC32 Using the second method. For consistency reason, PPC64 > * using the same way. > * > * History: > * Wood Scott(A): Please investigate the issue of whether we are loading > *kernel module code in this step > * R: Kernel will allocate the memory for module code segment and data > *segment. First allocate a memory, and copy the umod to hdr members > *of the struct load_info. Due to the temporary assigned module > belongs > *to the kernel space, so it belongs to the kernel data. > * > *The kernel of all data will be saved when hibernation suspend. So > *the module which has already been inserted will be saved. > > arch/powerpc/kernel/swsusp_asm64.S | 102 > - > 1 file changed, 100 insertions(+), 2 deletions(-) > > diff --git a/arch/powerpc/kernel/swsusp_asm64.S > b/arch/powerpc/kernel/swsusp_asm64.S > index 86ac1d9..c7e2b4a 100644 > --- a/arch/powerpc/kernel/swsusp_asm64.S > +++ b/arch/powerpc/kernel/swsusp_asm64.S > @@ -46,10 +46,30 @@ > #define SL_r29 0xe8 > #define SL_r30 0xf0 > #define SL_r31 0xf8 > -#define SL_SIZE SL_r31+8 > +#define SL_SPRG0 0x100 > +#define SL_SPRG1 0x108 > +#define SL_SPRG2 0x110 > +#define SL_SPRG3 0x118 > +#define SL_SPRG4 0x120 > +#define SL_SPRG5 0x128 > +#define SL_SPRG6 0x130 > +#define SL_SPRG7 0x138 > +#define SL_TCR 0x140 > +#define SL_PID 0x148 > +#define SL_SIZE SL_PID+8 > > /* these macros rely on the save area being > * pointed to by r11 */ > + > +#define SAVE_SPR(register) \ > + mfspr r0,SPRN_##register ;\ > + std r0,SL_##register(r11) > +#define RESTORE_SPR(register)\ > + ld r0,SL_##register(r11) ;\ > + mtspr SPRN_##register,r0 > +#define RESTORE_SPRG(n) \ > + ld r0,SL_SPRG##n(r11) ;\ > + mtsprg n,r0 > #define SAVE_SPECIAL(special)\ > mf##special r0 ;\ > std r0, SL_##special(r11) > @@ -103,8 +123,22 @@ _GLOBAL(swsusp_arch_suspend) > SAVE_REGISTER(r30) > SAVE_REGISTER(r31) > SAVE_SPECIAL(MSR) > - SAVE_SPECIAL(SDR1) > SAVE_SPECIAL(XER) > +#ifdef CONFIG_PPC_BOOK3S_64 > + SAVE_SPECIAL(SDR1) > +#else > + SAVE_SPR(TCR) > + /* Save SPRGs */ > + SAVE_SPR(SPRG0) > + SAVE_SPR(SPRG1) > + SAVE_SPR(SPRG2) > + SAVE_SPR(SPRG3) > + SAVE_SPR(SPRG4) > + SAVE_SPR(SPRG5) > + SAVE_SPR(SPRG6) > + SAVE_SPR(SPRG7) > + SAVE_SPR(PID); > +#endif > > /* we push the stack up 128 bytes but don't store the >* stack pointer on the stack like a real stackframe */ > @@ -151,6 +185,7 @@ copy_page_loop: > bn
[PATCH 2/2] powerpc/hibernate: add restore mmu context after resume
add restore_mmu_context to replace switch_mmu_context in restore_processor_state, because the switch_mmu_context will do a whole pile of stuff that are probably completely unnecessary. There just need to restore the existing process context, and invalidate TLB for boot core. Signed-off-by: Wang Dongsheng --- arch/powerpc/include/asm/tlbflush.h | 2 ++ arch/powerpc/kernel/swsusp.c| 4 +--- arch/powerpc/mm/tlb_nohash.c| 12 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 61a5927..c401fe3 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -44,6 +44,8 @@ extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, int tsize, int ind); +extern void restore_mmu_context(void); + #ifdef CONFIG_SMP extern void flush_tlb_mm(struct mm_struct *mm); extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c index eae33e1..0b104d7 100644 --- a/arch/powerpc/kernel/swsusp.c +++ b/arch/powerpc/kernel/swsusp.c @@ -32,7 +32,5 @@ void save_processor_state(void) void restore_processor_state(void) { -#ifdef CONFIG_PPC32 - switch_mmu_context(current->active_mm, current->active_mm); -#endif + restore_mmu_context(); } diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index df32a83..a5a0708 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -39,10 +39,12 @@ #include #include +#include #include #include #include #include +#include #include "mmu_decl.h" @@ -193,6 +195,16 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) } EXPORT_SYMBOL(local_flush_tlb_page); +void restore_mmu_context(void) +{ + struct mm_struct *mm = current->active_mm; + + set_context(mm->context.id, mm->pgd); + + _tlbil_all(); +} +EXPORT_SYMBOL(restore_mmu_context); + /* * And here are the SMP non-local implementations */ -- 1.8.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/2] powerpc: add Book E support to 64-bit hibernation
Update the 64-bit hibernation code to support Book E CPUs. Some registers and instructions are not defined for Book3e (SDR reg, tlbia instruction). SDR: Storage Description Register. Book3S and Book3E have different address translation mode, we do not need HTABORG & HTABSIZE to translate virtual address to real address. More registers are saved in BookE-64bit.(TCR, SPRGx, ...) Signed-off-by: Wang Dongsheng --- * History: * Wood Scott(A): Please investigate the issue of whether we are loading *kernel module code in this step * R: Kernel will allocate the memory for module code segment and data *segment. First allocate a memory, and copy the umod to hdr members *of the struct load_info. Due to the temporary assigned module belongs *to the kernel space, so it belongs to the kernel data. * *The kernel of all data will be saved when hibernation suspend. So *the module which has already been inserted will be saved. arch/powerpc/kernel/swsusp_asm64.S | 64 -- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 86ac1d9..608e4ceb 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -46,10 +46,29 @@ #define SL_r29 0xe8 #define SL_r30 0xf0 #define SL_r31 0xf8 -#define SL_SIZESL_r31+8 +#define SL_SPRG0 0x100 +#define SL_SPRG1 0x108 +#define SL_SPRG2 0x110 +#define SL_SPRG3 0x118 +#define SL_SPRG4 0x120 +#define SL_SPRG5 0x128 +#define SL_SPRG6 0x130 +#define SL_SPRG7 0x138 +#define SL_TCR 0x140 +#define SL_SIZESL_TCR+8 /* these macros rely on the save area being * pointed to by r11 */ + +#define SAVE_SPR(register) \ + mfspr r0,SPRN_##register ;\ + std r0,SL_##register(r11) +#define RESTORE_SPR(register) \ + ld r0,SL_##register(r11) ;\ + mtspr SPRN_##register,r0 +#define RESTORE_SPRG(n)\ + ld r0,SL_SPRG##n(r11) ;\ + mtsprg n,r0 #define SAVE_SPECIAL(special) \ mf##special r0 ;\ std r0, SL_##special(r11) @@ -103,8 +122,21 @@ _GLOBAL(swsusp_arch_suspend) SAVE_REGISTER(r30) SAVE_REGISTER(r31) SAVE_SPECIAL(MSR) - SAVE_SPECIAL(SDR1) SAVE_SPECIAL(XER) +#ifdef CONFIG_PPC_BOOK3S_64 + SAVE_SPECIAL(SDR1) +#else + SAVE_SPR(TCR) + /* Save SPRGs */ + SAVE_SPR(SPRG0) + SAVE_SPR(SPRG1) + SAVE_SPR(SPRG2) + SAVE_SPR(SPRG3) + SAVE_SPR(SPRG4) + SAVE_SPR(SPRG5) + SAVE_SPR(SPRG6) + SAVE_SPR(SPRG7) +#endif /* we push the stack up 128 bytes but don't store the * stack pointer on the stack like a real stackframe */ @@ -151,6 +183,7 @@ copy_page_loop: bne+copyloop nothing_to_copy: +#ifdef CONFIG_PPC_BOOK3S_64 /* flush caches */ lis r3, 0x10 mtctr r3 @@ -167,6 +200,7 @@ nothing_to_copy: sync tlbia +#endif ld r11,swsusp_save_area_ptr@toc(r2) @@ -208,16 +242,42 @@ nothing_to_copy: RESTORE_REGISTER(r29) RESTORE_REGISTER(r30) RESTORE_REGISTER(r31) + +#ifdef CONFIG_PPC_BOOK3S_64 /* can't use RESTORE_SPECIAL(MSR) */ ld r0, SL_MSR(r11) mtmsrd r0, 0 RESTORE_SPECIAL(SDR1) +#else + /* Save SPRGs */ + RESTORE_SPRG(0) + RESTORE_SPRG(1) + RESTORE_SPRG(2) + RESTORE_SPRG(3) + RESTORE_SPRG(4) + RESTORE_SPRG(5) + RESTORE_SPRG(6) + RESTORE_SPRG(7) + + RESTORE_SPECIAL(MSR) + + /* Restore TCR and clear any pending bits in TSR. */ + RESTORE_SPR(TCR) + lis r0, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h + mtspr SPRN_TSR,r0 + + /* Kick decrementer */ + li r0,1 + mtdec r0 +#endif RESTORE_SPECIAL(XER) sync addir1,r1,-128 +#ifdef CONFIG_PPC_BOOK3S_64 bl slb_flush_and_rebolt +#endif bl do_after_copyback addir1,r1,128 -- 1.8.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 5/5] powerpc/tm: Fix return of active 64bit signals
Currently we only restore signals which are transactionally suspended but it's possible that the transaction can be restored even when it's active. Most likely this will result in a transactional rollback by the hardware as the transaction will have been doomed by an earlier treclaim. The current code is a legacy of earlier kernel implementations which did software rollback of active transactions in the kernel. That code has now gone but we didn't correctly fix up this part of the signals code which still makes assumptions based on having software rollback. This changes the signal return code to always restore both contexts on 64 bit signal return. It also ensures that the MSR TM bits are properly restored from the signal context which they are not currently. Signed-off-by: Michael Neuling cc: sta...@vger.kernel.org (v3.9 only) --- arch/powerpc/kernel/signal_64.c |8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 3459473..887e99d 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -410,6 +410,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, /* get MSR separately, transfer the LE bit if doing signal return */ err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + /* pull in MSR TM from user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + + /* pull in MSR LE from user context */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); /* The following non-GPR non-FPR non-VR state is also checkpointed: */ @@ -505,8 +509,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, tm_enable(); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); - /* The task has moved into TM state S, so ensure MSR reflects this: */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33); /* This loads the speculative FP/VEC state, if used */ if (msr & MSR_FP) { @@ -654,7 +656,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; - if (MSR_TM_SUSPENDED(msr)) { + if (MSR_TM_ACTIVE(msr)) { /* We recheckpoint on return. */ struct ucontext __user *uc_transact; if (__get_user(uc_transact, &uc->uc_link)) -- 1.7.10.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 4/5] powerpc/tm: Fix return of 32bit rt signals to active transactions
Currently we only restore signals which are transactionally suspended but it's possible that the transaction can be restored even when it's active. Most likely this will result in a transactional rollback by the hardware as the transaction will have been doomed by an earlier treclaim. The current code is a legacy of earlier kernel implementations which did software rollback of active transactions in the kernel. That code has now gone but we didn't correctly fix up this part of the signals code which still makes assumptions based on having software rollback. This changes the signal return code to always restore both contexts on 32 bit rt signal return. Signed-off-by: Michael Neuling cc: sta...@vger.kernel.org (v3.9 only) --- arch/powerpc/kernel/signal_32.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 364cb1e..0f83122 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1245,7 +1245,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR])) goto bad; - if (MSR_TM_SUSPENDED(msr_hi<<32)) { + if (MSR_TM_ACTIVE(msr_hi<<32)) { /* We only recheckpoint on return if we're * transaction. */ -- 1.7.10.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 3/5] powerpc/tm: Fix restoration of MSR on 32bit signal return
Currently we clear out the MSR TM bits on signal return assuming that the signal should never return to an active transaction. This is bogus as the user may do this. It's most likely the transaction will be doomed due to a treclaim but that's a problem for the HW not the kernel. The current code is a legacy of earlier kernel implementations which did software rollback of active transactions in the kernel. That code has now gone but we didn't correctly fix up this part of the signals code which still makes the assumption that it must be returning to a suspended transaction. This pulls out both MSR TM bits from the user supplied context rather than just setting TM suspend. We pull out only the bits needed to ensure the user can't do anything dangerous to the MSR. Signed-off-by: Michael Neuling cc: sta...@vger.kernel.org (v3.9 only) --- arch/powerpc/kernel/signal_32.c |9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index fa81462..364cb1e 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -754,7 +754,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *tm_sr) { long err; - unsigned long msr; + unsigned long msr, msr_hi; #ifdef CONFIG_VSX int i; #endif @@ -859,8 +859,11 @@ static long restore_tm_user_regs(struct pt_regs *regs, tm_enable(); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); - /* The task has moved into TM state S, so ensure MSR reflects this */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S; + /* Get the top half of the MSR */ + if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) + return 1; + /* Pull in MSR TM from user context */ + regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK); /* This loads the speculative FP/VEC state, if used */ if (msr & MSR_FP) { -- 1.7.10.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 2/5] powerpc/tm: Fix 32 bit non-rt signals
Currently sys_sigreturn() is TM unaware. Therefore, if we take a 32 bit signal without SIGINFO (non RT) inside a transaction, on signal return we don't restore the signal frame correctly. This checks if the signal frame being restoring is an active transaction, and if so, it copies the additional state to ptregs so it can be restored. Signed-off-by: Michael Neuling cc: sta...@vger.kernel.org (v3.9 only) --- arch/powerpc/kernel/signal_32.c | 30 +- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 5bc819f..fa81462 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1494,16 +1494,22 @@ badframe: long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, struct pt_regs *regs) { + struct sigframe __user *sf; struct sigcontext __user *sc; struct sigcontext sigctx; struct mcontext __user *sr; void __user *addr; sigset_t set; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + struct mcontext __user *mcp, *tm_mcp; + unsigned long msr_hi; +#endif /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; - sc = (struct sigcontext __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); + sc = &sf->sctx; addr = sc; if (copy_from_user(&sigctx, sc, sizeof(sigctx))) goto badframe; @@ -1520,11 +1526,25 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, #endif set_current_blocked(&set); - sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); - addr = sr; - if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) - || restore_user_regs(regs, sr, 1)) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + mcp = (struct mcontext __user *)&sf->mctx; + tm_mcp = (struct mcontext __user *)&sf->mctx_transact; + if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR])) goto badframe; + if (MSR_TM_ACTIVE(msr_hi<<32)) { + if (!cpu_has_feature(CPU_FTR_TM)) + goto badframe; + if (restore_tm_user_regs(regs, mcp, tm_mcp)) + goto badframe; + } else +#endif + { + sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); + addr = sr; + if (!access_ok(VERIFY_READ, sr, sizeof(*sr)) + || restore_user_regs(regs, sr, 1)) + goto badframe; + } set_thread_flag(TIF_RESTOREALL); return 0; -- 1.7.10.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 1/5] powerpc/tm: Fix writing top half of MSR on 32 bit signals
The MSR TM controls are in the top 32 bits of the MSR hence on 32 bit signals, we stick the top half of the MSR in the checkpointed signal context so that the user can access it. Unfortunately, we don't currently write anything to the checkpointed signal context when coming in a from a non transactional process and hence the top MSR bits can contain junk. This updates the 32 bit signal handling code to always write something to the top MSR bits so that users know if the process is transactional or not and the kernel can use it on signal return. Signed-off-by: Michael Neuling cc: sta...@vger.kernel.org (v3.9 only) --- arch/powerpc/kernel/signal_32.c | 29 + 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 201385c..5bc819f 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -407,7 +407,8 @@ inline unsigned long copy_transact_fpr_from_user(struct task_struct *task, * altivec/spe instructions at some point. */ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - int sigret, int ctx_has_vsx_region) + struct mcontext __user *tm_frame, int sigret, + int ctx_has_vsx_region) { unsigned long msr = regs->msr; @@ -475,6 +476,12 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, if (__put_user(msr, &frame->mc_gregs[PT_MSR])) return 1; + /* We need to write 0 the MSR top 32 bits in the tm frame so that we +* can check it on the restore to see if TM is active +*/ + if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR])) + return 1; + if (sigret) { /* Set up the sigreturn trampoline: li r0,sigret; sc */ if (__put_user(0x3800UL + sigret, &frame->tramp[0]) @@ -952,6 +959,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, { struct rt_sigframe __user *rt_sf; struct mcontext __user *frame; + struct mcontext __user *tm_frame = NULL; void __user *addr; unsigned long newsp = 0; int sigret; @@ -985,23 +993,24 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_frame = &rt_sf->uc_transact.uc_mcontext; if (MSR_TM_ACTIVE(regs->msr)) { - if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext, - &rt_sf->uc_transact.uc_mcontext, sigret)) + if (save_tm_user_regs(regs, frame, tm_frame, sigret)) goto badframe; } else #endif - if (save_user_regs(regs, frame, sigret, 1)) + { + if (save_user_regs(regs, frame, tm_frame, sigret, 1)) goto badframe; + } regs->link = tramp; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) { if (__put_user((unsigned long)&rt_sf->uc_transact, &rt_sf->uc.uc_link) - || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext), - &rt_sf->uc_transact.uc_regs)) + || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) goto badframe; } else @@ -1170,7 +1179,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx, mctx = (struct mcontext __user *) ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) - || save_user_regs(regs, mctx, 0, ctx_has_vsx_region) + || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region) || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) return -EFAULT; @@ -1392,6 +1401,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, { struct sigcontext __user *sc; struct sigframe __user *frame; + struct mcontext __user *tm_mctx = NULL; unsigned long newsp = 0; int sigret; unsigned long tramp; @@ -1425,6 +1435,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->mctx_transact; if (MSR_TM_ACTIVE(regs->msr)) { if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, sigret)) @@ -1432,8 +1443,10 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, } else #endif - if (save_user_regs(regs, &frame->mctx, sigret, 1)) + { + if (save_user_regs(regs, &frame->mctx, tm_mct
Re: [PATCH 2/5] powerpc/tm: Fix 32 bit non-rt signals
Benjamin Herrenschmidt wrote: > On Fri, 2013-06-07 at 20:36 +1000, Michael Neuling wrote: > > Currently sys_sigreturn() is TM unaware. Therefore, if we take a 32 bit > > signal > > without SIGINFO (non RT) inside a transaction, on signal return we don't > > restore the signal frame correctly. > > > > This checks if the signal frame being restoring is an active transaction, > > and > > if so, it copies the additional state to ptregs so it can be restored. > > > > Signed-off-by: Michael Neuling > > --- > > .../... > > > +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM > > + mcp = (struct mcontext __user *)&sf->mctx; > > + tm_mcp = (struct mcontext __user *)&sf->mctx_transact; > > + if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR])) > > goto badframe; > > + if MSR_TM_ACTIVE(msr_hi<<32) { > > Mising ( and ). I'll apply that fix locally. > > Appart from that, I suppose it's ok. I don't see any exposure > coming from users "cooking" the tm_frame and calling sigreturn, > so as long as we are confident userspace generally only uses > sigreturn with frames it got from an actual signal, and doesn't > try to "generate" frames by hand, we should be ok. We should add a has_cpu_feature(TM) here also in case someone cooks up an sig frame with MSR TM active, but on a non TM CPU. This could possibly result in a trecheckpoint on a non TM CPU hence an illegal in the kernel. I'll repost. Thanks, Mikey ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/5] powerpc/tm: Fix restoration of MSR on 32bit signal return
Benjamin Herrenschmidt wrote: > On Fri, 2013-06-07 at 20:36 +1000, Michael Neuling wrote: > > Currently we clear out the MSR TM bits on signal return assuming that the > > signal should never return to an active transaction. > > > > This is bogus as the user may do this. It's most likely the transaction > > will > > be doomed due to a treclaim but that's a problem for the HW not the kernel. > > > > This removes the stripping of these MSR TM bits. > > > > Signed-off-by: Michael Neuling > > --- > > > @@ -859,8 +860,10 @@ static long restore_tm_user_regs(struct pt_regs *regs, > > tm_enable(); > > /* This loads the checkpointed FP/VEC state, if used */ > > tm_recheckpoint(¤t->thread, msr); > > - /* The task has moved into TM state S, so ensure MSR reflects this */ > > - regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S; > > + /* Retore the top half of the MSR */ > > + if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) > > + return 1; > > + regs->msr = (regs->msr | (((unsigned long)msr_hi) << 32)); > > What kind of damage can I do by calling sigreturn with a cooked > frame with random MSR bits set ? You should probably filter > what bits you allow to come from the frame. Lots of damage.. good, point. We do that in the 64 bit version of this. I'll update to do the same. > Additionally, I would also make sure I only do that if the CPU > features say TM is supported in case that MSR bit means something else > on a different/older CPU... Ok, I'll add that also. Mikey ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Linux 3.9.5 [patch to fix powerpc build error attached]
Guenter Roeck wrote: > On Fri, Jun 07, 2013 at 12:58:16PM -0700, Greg KH wrote: > > I'm announcing the release of the 3.9.5 kernel. > > > > All users of the 3.9 kernel series must upgrade. > > > > The updated 3.9.y git tree can be found at: > > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git > > linux-3.9.y > > and can be browsed at the normal kernel.org git web browser: > > > > http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary > > > Powerpc build is fixed with the following patch. > > From 8ce3edac0a86f103bd1037110662d5d80a42dd5b Mon Sep 17 00:00:00 2001 > From: Guenter Roeck > Date: Sat, 8 Jun 2013 07:23:47 -0700 > Subject: [PATCH] powerpc: Fix build error in stable/3.9 > > Commit e71c42189 (powerpc/tm: Abort on emulation and alignment faults) > introduced a powerpc build error in 3.9.5. ACK. Looks like ba12eed (powerpc: Exception hooks for context tracking subsyste) was the cause (but not a prerequisite) that was introduced post 3.9. Thanks, Mikey > > Signed-off-by: Guenter Roeck > --- > No upstream commit. Compile tested only. > > arch/powerpc/kernel/traps.c |2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c > index 1c22b2d..29857c6 100644 > --- a/arch/powerpc/kernel/traps.c > +++ b/arch/powerpc/kernel/traps.c > @@ -1151,7 +1151,7 @@ void alignment_exception(struct pt_regs *regs) > local_irq_enable(); > > if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT)) > - goto bail; > + return; > > /* we don't implement logging of alignment exceptions */ > if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS)) > -- > 1.7.9.7 > ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: [PATCH] powerpc/mpc85xx: fix non-bootcpu cannot up after hibernation resume
Hi kumar, Could you apply this patche? Thanks. -dongsheng > -Original Message- > From: Anton Vorontsov [mailto:an...@scarybugs.org] On Behalf Of Anton > Vorontsov > Sent: Friday, May 24, 2013 1:34 AM > To: Wang Dongsheng-B40534 > Cc: pau...@samba.org; r...@sisk.pl; b...@kernel.crashing.org; > johan...@sipsolutions.net; Wood Scott-B07421; Li Yang-R58472; Zhao > Chenhui-B35336; linuxppc-dev@lists.ozlabs.org > Subject: Re: [PATCH] powerpc/mpc85xx: fix non-bootcpu cannot up after > hibernation resume > > Hi! > > On Tue, May 14, 2013 at 08:59:13AM +, Wang Dongsheng-B40534 wrote: > > I send to a wrong email address "Anton Vorontsov > " > > > > Add Anton Vorontsov to this email. > > I don't have any means to test it, but the patch itself looks good and > the description makes sense. So, > > Reviewed-by: Anton Vorontsov > > Thanks! > > > > > Thanks all. > > > > > -Original Message- > > > From: Wang Dongsheng-B40534 > > > Sent: Tuesday, May 14, 2013 4:06 PM > > > To: avoront...@ru.mvista.com > > > Cc: pau...@samba.org; r...@sisk.pl; b...@kernel.crashing.org; > > > johan...@sipsolutions.net; Wood Scott-B07421; Li Yang-R58472; Zhao > > > Chenhui-B35336; linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534 > > > Subject: [PATCH] powerpc/mpc85xx: fix non-bootcpu cannot up after > > > hibernation resume > > > > > > This problem belongs to the core synchronization issues. > > > The cpu1 already updated spin_table values, but bootcore cannot get > > > this value in time. > > > > > > After bootcpu hibiernation restore the pages. we are now running > > > with the kernel data of the old kernel fully restored. if we reset > > > the non-bootcpus that will be reset cache(tlb), the non-bootcpus > > > will get new address(map virtual and physical address spaces). > > > but bootcpu tlb cache still use boot kernel data, so we need to > > > invalidate the bootcpu tlb cache make it to get new main memory data. > > > > > > log: > > > Enabling non-boot CPUs ... > > > smp_85xx_kick_cpu: timeout waiting for core 1 to reset > > > smp: failed starting cpu 1 (rc -2) > > > Error taking CPU1 up: -2 > > > > > > Signed-off-by: Wang Dongsheng > > > > > > diff --git a/arch/powerpc/kernel/swsusp_booke.S > > > b/arch/powerpc/kernel/swsusp_booke.S > > > index 11a3930..9503249 100644 > > > --- a/arch/powerpc/kernel/swsusp_booke.S > > > +++ b/arch/powerpc/kernel/swsusp_booke.S > > > @@ -141,6 +141,19 @@ _GLOBAL(swsusp_arch_resume) > > > lis r11,swsusp_save_area@h > > > ori r11,r11,swsusp_save_area@l > > > > > > + /* > > > + * The boot core get a virtual address, when the boot process, > > > + * the virtual address corresponds to a physical address. After > > > + * hibernation resume memory snapshots, The corresponding > > > + * relationship between the virtual memory and physical memory > > > + * might change again. We need to get a new page table. So we > > > + * need to invalidate TLB after resume pages. > > > + * > > > + * Invalidations TLB Using tlbilx/tlbivax/MMUCSR0. > > > + * tlbilx used here. > > > + */ > > > + bl _tlbil_all > > > + > > > lwz r4,SL_SPRG0(r11) > > > mtsprg 0,r4 > > > lwz r4,SL_SPRG1(r11) > > > -- > > > 1.8.0 > > > > ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: [PATCH v3 1/4] powerpc/mpic: add irq_set_wake support
Hi ben, Could you apply these patches? Thanks. :) - dongsheng > -Original Message- > From: Benjamin Herrenschmidt [mailto:b...@kernel.crashing.org] > Sent: Monday, May 13, 2013 1:00 PM > To: Wang Dongsheng-B40534 > Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421; > ga...@kernel.crashing.org > Subject: Re: [PATCH v3 1/4] powerpc/mpic: add irq_set_wake support > > On Mon, 2013-05-13 at 04:25 +, Wang Dongsheng-B40534 wrote: > > Hi Benjamin, > > > > Could you apply these patches? > > I'll have a look, I was assuming Kumar would take them but since not I'll > queue them up. > > Cheers, > Ben. > > > Scott already ACK. > > > > [v3,1/4] powerpc/mpic: add irq_set_wake support > > http://patchwork.ozlabs.org/patch/234934/ > > > > [v3,2/4] powerpc/mpic: add global timer support > > http://patchwork.ozlabs.org/patch/234935/ > > > > [v3,3/4] powerpc/mpic: create mpic subsystem object > > http://patchwork.ozlabs.org/patch/234936/ > > > > [v3,4/4] powerpc/fsl: add MPIC timer wakeup support > > http://patchwork.ozlabs.org/patch/234937/ > > > > Thanks. > > > > > -Original Message- > > > From: Wang Dongsheng-B40534 > > > Sent: Friday, May 03, 2013 9:54 AM > > > To: 'ga...@kernel.crashing.org' > > > Cc: 'linuxppc-dev@lists.ozlabs.org'; Wood Scott-B07421; > > > 'b...@kernel.crashing.org' > > > Subject: RE: [PATCH v3 1/4] powerpc/mpic: add irq_set_wake support > > > > > > Hi Kumar, > > > > > > Could you apply these patches? > > > > > > Thanks. > > > > > > > -Original Message- > > > > From: Wang Dongsheng-B40534 > > > > Sent: Tuesday, April 23, 2013 6:10 PM > > > > To: ga...@kernel.crashing.org > > > > Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421 > > > > Subject: RE: [PATCH v3 1/4] powerpc/mpic: add irq_set_wake support > > > > > > > > Hi Kumar, > > > > > > > > Could you apply these patches? > > > > > > > > Thanks. > > > > > > > > [v3,1/4] powerpc/mpic: add irq_set_wake support > > > > http://patchwork.ozlabs.org/patch/234934/ > > > > > > > > [v3,2/4] powerpc/mpic: add global timer support > > > > http://patchwork.ozlabs.org/patch/234935/ > > > > > > > > [v3,3/4] powerpc/mpic: create mpic subsystem object > > > > http://patchwork.ozlabs.org/patch/234936/ > > > > > > > > [v3,4/4] powerpc/fsl: add MPIC timer wakeup support > > > > http://patchwork.ozlabs.org/patch/234937/ > > > > > > > > > > > > > -Original Message- > > > > > From: Wood Scott-B07421 > > > > > Sent: Wednesday, April 17, 2013 7:30 AM > > > > > To: Wang Dongsheng-B40534 > > > > > Cc: Wood Scott-B07421; linuxppc-dev@lists.ozlabs.org; > > > > > ga...@kernel.crashing.org > > > > > Subject: Re: [PATCH v3 1/4] powerpc/mpic: add irq_set_wake > > > > > support > > > > > > > > > > ACK > > > > > > > > > > -Scott > > > > > > > > > > On 04/16/2013 05:58:52 AM, Wang Dongsheng-B40534 wrote: > > > > > > Hi scott, > > > > > > > > > > > > Could you ACK these patches? > > > > > > > > > > > > [PATCH v3 2/4] powerpc/mpic: add global timer support [PATCH > > > > > > v3 3/4] > > > > > > powerpc/mpic: create mpic subsystem object [PATCH v3 4/4] > > > > > > powerpc/fsl: add MPIC timer wakeup support > > > > > > > > > > > > Thanks. > > > > > > > > ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[git pull] Please pull powerpc.git merge branch
Hi Linus ! Here are a few more powerpc changes for 3.10. I've merged your tree in at some point (which I generally avoid) in order to get the compat network fixes as soon as possible. Mostly regressions, and stuff I judged could/should still go in at this stage. I'm still waiting on some more fixes to the signal handling vs. transactional memory (this is really ugly btw) which are hopefully coming next week. Cheers, Ben. The following changes since commit 1612e111e4e565422242727efb59499cce8738e4: Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2013-06-06 18:09:05 -0700) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git merge for you to fetch changes up to 227331303afdca06e44dcda41d27b72015153827: powerpc: Print instruction when logging unhandled exceptions (2013-06-09 17:29:16 +1000) Anton Blanchard (1): powerpc: Print instruction when logging unhandled exceptions Benjamin Herrenschmidt (1): Merge remote-tracking branch 'origin/master' into merge Gavin Shan (1): powerpc/eeh: Don't check RTAS token to get PE addr Kevin Hao (1): powerpc/pci: Check the bus address instead of resource address in pcibios_fixup_resources Michael Ellerman (3): powerpc: Rename PMU interrupts from CNT to PMI powerpc/perf: Fix deadlock caused by calling printk() in PMU exception powerpc: Partial revert of "Context switch more PMU related SPRs" Michael Neuling (4): powerpc/power8: Fix oprofile and perf powerpc/pseries: Simplify denormalization handler powerpc/power8: Update denormalization handler powerpc/hw_breakpoints: Add DABRX cpu feature to fix 32-bit regression arch/powerpc/include/asm/cputable.h | 17 +++-- arch/powerpc/kernel/cputable.c |8 +-- arch/powerpc/kernel/entry_64.S | 28 arch/powerpc/kernel/exceptions-64s.S | 90 -- arch/powerpc/kernel/irq.c|2 +- arch/powerpc/kernel/pci-common.c |4 +- arch/powerpc/kernel/process.c|3 +- arch/powerpc/kernel/traps.c | 11 +++- arch/powerpc/perf/core-book3s.c |2 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 12 ++-- 10 files changed, 60 insertions(+), 117 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads access to kernel space
> -Original Message- > From: Benjamin Herrenschmidt [mailto:b...@kernel.crashing.org] > Sent: Sunday, June 09, 2013 3:46 PM > To: Wang Dongsheng-B40534 > Cc: johan...@sipsolutions.net; an...@enomsg.org; Wood Scott-B07421; > ga...@kernel.crashing.org; linuxppc-dev@lists.ozlabs.org > Subject: Re: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads > access to kernel space > > On Sun, 2013-06-09 at 07:44 +, Wang Dongsheng-B40534 wrote: > > So we just need set set_context() in restore_mmu_context(). > > > > void restore_mmu_context(struct mm_struct *next) { > > set_context(next->context.id, next->pgd); } > > We probably also want to flush the TLB, just in case the boot kernel has > left "something" there (though I wouldn't expect it to have run userspace > it's not completely impossible). > Yes, but TLB has already invalidated. See my other patch: http://patchwork.ozlabs.org/patch/250006/ > Cheers, > Ben. > > ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads access to kernel space
On Sun, 2013-06-09 at 07:44 +, Wang Dongsheng-B40534 wrote: > So we just need set set_context() in restore_mmu_context(). > > void restore_mmu_context(struct mm_struct *next) { > set_context(next->context.id, next->pgd); > } We probably also want to flush the TLB, just in case the boot kernel has left "something" there (though I wouldn't expect it to have run userspace it's not completely impossible). Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads access to kernel space
> -Original Message- > From: Benjamin Herrenschmidt [mailto:b...@kernel.crashing.org] > Sent: Sunday, June 09, 2013 2:44 PM > To: Wang Dongsheng-B40534 > Cc: johan...@sipsolutions.net; an...@enomsg.org; Wood Scott-B07421; > ga...@kernel.crashing.org; linuxppc-dev@lists.ozlabs.org > Subject: Re: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads > access to kernel space > > On Sun, 2013-06-09 at 13:22 +0800, Wang Dongsheng wrote: > > If PID is used in the TLB, after hibernation resume, the user threads > > will access to kernel space. > > .../... > > I think the explanation is way more convoluted and confusing here than > anything else. > > Simply say that upon resume from hibernation, the MMU context needs to be > restored (this includes the PID register today it might include more if > we decided to pre-set some MAS for example > etc...) and be done with it. > > Note that switch_mmu_context() used the way you do is quite "full on", it > will do a whole pile of stuff that are probably completely unnecessary, > and in addition might still miss the need to completely flush the TLB > anyway. > > I would suggest that instead, somebody adds the necessary routine to > tlb_nohash.c, something like restore_mmu_context() which will do that. > Thanks ben, This is a good idea. We do not need to decide the current thread has a context in restore_mmu_context(). Because the current has already get a context in hibernation suspend flow. So we just need set set_context() in restore_mmu_context(). void restore_mmu_context(struct mm_struct *next) { set_context(next->context.id, next->pgd); } -dongsheng > Cheers, > Ben. > > > Signed-off-by: Wang Dongsheng > > --- > > arch/powerpc/kernel/swsusp.c | 2 -- > > 1 file changed, 2 deletions(-) > > > > diff --git a/arch/powerpc/kernel/swsusp.c > > b/arch/powerpc/kernel/swsusp.c index eae33e1..1930e44 100644 > > --- a/arch/powerpc/kernel/swsusp.c > > +++ b/arch/powerpc/kernel/swsusp.c > > @@ -32,7 +32,5 @@ void save_processor_state(void) > > > > void restore_processor_state(void) > > { > > -#ifdef CONFIG_PPC32 > > switch_mmu_context(current->active_mm, current->active_mm); -#endif > > } > > ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/5] powerpc/tm: Fix restoration of MSR on 32bit signal return
On Fri, 2013-06-07 at 20:36 +1000, Michael Neuling wrote: > Currently we clear out the MSR TM bits on signal return assuming that the > signal should never return to an active transaction. > > This is bogus as the user may do this. It's most likely the transaction will > be doomed due to a treclaim but that's a problem for the HW not the kernel. > > This removes the stripping of these MSR TM bits. > > Signed-off-by: Michael Neuling > --- > @@ -859,8 +860,10 @@ static long restore_tm_user_regs(struct pt_regs *regs, > tm_enable(); > /* This loads the checkpointed FP/VEC state, if used */ > tm_recheckpoint(¤t->thread, msr); > - /* The task has moved into TM state S, so ensure MSR reflects this */ > - regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S; > + /* Retore the top half of the MSR */ > + if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) > + return 1; > + regs->msr = (regs->msr | (((unsigned long)msr_hi) << 32)); What kind of damage can I do by calling sigreturn with a cooked frame with random MSR bits set ? You should probably filter what bits you allow to come from the frame. Additionally, I would also make sure I only do that if the CPU features say TM is supported in case that MSR bit means something else on a different/older CPU... Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/5] powerpc/tm: Fix 32 bit non-rt signals
On Fri, 2013-06-07 at 20:36 +1000, Michael Neuling wrote: > Currently sys_sigreturn() is TM unaware. Therefore, if we take a 32 bit > signal > without SIGINFO (non RT) inside a transaction, on signal return we don't > restore the signal frame correctly. > > This checks if the signal frame being restoring is an active transaction, and > if so, it copies the additional state to ptregs so it can be restored. > > Signed-off-by: Michael Neuling > --- .../... > +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM > + mcp = (struct mcontext __user *)&sf->mctx; > + tm_mcp = (struct mcontext __user *)&sf->mctx_transact; > + if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR])) > goto badframe; > + if MSR_TM_ACTIVE(msr_hi<<32) { Mising ( and ). I'll apply that fix locally. Appart from that, I suppose it's ok. I don't see any exposure coming from users "cooking" the tm_frame and calling sigreturn, so as long as we are confident userspace generally only uses sigreturn with frames it got from an actual signal, and doesn't try to "generate" frames by hand, we should be ok. Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 3/3] powerpc/8xx: Remove 8xx specific "minimal FPU emulation"
This is duplicated code from math-emu and implements such a small subset of the FPU (load/stores/fmr) that it's essentially pointless nowdays. Signed-off-by: Benjamin Herrenschmidt --- Note that arch/powerpc/math-emu/math.c seems to have code in there to do exactly the same thing (subset emulation) if CONFIG_MATH_EMULATION isn't set. However that code isn't used since we simply don't call do_mathemu() not build any of the relevant files when this is not set. I'm not removing this just yet. If somebody thinks there is value in supporting that "minimal set emulation", then send me a patch that provides a Kconfig/Makefile way of enabling that code, which makes it work on all soft-fpu powerpc's and not just 8xx. If nobody reacts, I'll probably just rip the code out by the next kernel version. arch/powerpc/Kconfig| 11 --- arch/powerpc/kernel/traps.c | 22 +- 2 files changed, 1 insertion(+), 32 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e3009ab..5374776 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -319,17 +319,6 @@ config PPC_TRANSACTIONAL_MEM ---help--- Support user-mode Transactional Memory on POWERPC. -config 8XX_MINIMAL_FPEMU - bool "Minimal math emulation for 8xx" - depends on 8xx && !MATH_EMULATION - help - Older arch/ppc kernels still emulated a few floating point - instructions such as load and store, even when full math - emulation is disabled. Say "Y" here if you want to preserve - this behavior. - - It is recommended that you build a soft-float userspace instead. - config IOMMU_HELPER def_bool PPC64 diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index ff315a7..8093156 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1401,8 +1401,7 @@ void performance_monitor_exception(struct pt_regs *regs) void SoftwareEmulation(struct pt_regs *regs) { extern int do_mathemu(struct pt_regs *); - extern int Soft_emulate_8xx(struct pt_regs *); -#if defined(CONFIG_MATH_EMULATION) || defined(CONFIG_8XX_MINIMAL_FPEMU) +#if defined(CONFIG_MATH_EMULATION) int errcode; #endif @@ -1435,23 +1434,6 @@ void SoftwareEmulation(struct pt_regs *regs) _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return; } - -#elif defined(CONFIG_8XX_MINIMAL_FPEMU) - errcode = Soft_emulate_8xx(regs); - if (errcode >= 0) - PPC_WARN_EMULATED(8xx, regs); - - switch (errcode) { - case 0: - emulate_single_step(regs); - return; - case 1: - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - case -EFAULT: - _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); - return; - } #else _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); #endif @@ -1801,8 +1783,6 @@ struct ppc_emulated ppc_emulated = { WARN_EMULATED_SETUP(unaligned), #ifdef CONFIG_MATH_EMULATION WARN_EMULATED_SETUP(math), -#elif defined(CONFIG_8XX_MINIMAL_FPEMU) - WARN_EMULATED_SETUP(8xx), #endif #ifdef CONFIG_VSX WARN_EMULATED_SETUP(vsx), ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/3] powerpc/math-emu: Allow math-emu to be used for HW FPU
(Including 64-bit ones) This allow SW emulation by the kernel of optional instructions such as fsqrt which aren't implemented on some processors, and thus fixes some Fedora 19 issues such as Anaconda since the compiler is set to generate those by default on 64-bit. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig|6 +- arch/powerpc/kernel/traps.c | 12 +++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c33e3ad..e3009ab 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -298,7 +298,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE config MATH_EMULATION bool "Math emulation" - depends on 4xx || 8xx || E200 || PPC_MPC832x || E500 + depends on 4xx || 8xx || PPC_MPC832x || BOOKE ---help--- Some PowerPC chips designed for embedded applications do not have a floating-point unit and therefore do not implement the @@ -307,6 +307,10 @@ config MATH_EMULATION unit, which will allow programs that use floating-point instructions to run. + This is also useful to emulate missing (optional) instructions + such as fsqrt on cores that do have an FPU but do not implement + them (such as Freescale BookE). + config PPC_TRANSACTIONAL_MEM bool "Transactional Memory support for POWERPC" depends on PPC_BOOK3S_64 diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6dfbb38..ff315a7 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1130,7 +1130,17 @@ void __kprobes program_check_exception(struct pt_regs *regs) * ESR_DST (!?) or 0. In the process of chasing this with the * hardware people - not sure if it can happen on any illegal * instruction or only on FP instructions, whether there is a -* pattern to occurrences etc. -dgibson 31/Mar/2003 */ +* pattern to occurrences etc. -dgibson 31/Mar/2003 +*/ + + /* +* If we support a HW FPU, we need to ensure the FP state +* if flushed into the thread_struct before attempting +* emulation +*/ +#ifdef CONFIG_PPC_FPU + flush_fp_to_thread(current); +#endif switch (do_mathemu(regs)) { case 0: emulate_single_step(regs); ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/3] powerpc/math-emu: Fix decoding of some instructions
The decoding of some instructions such as fsqrt{s} was incorrect, using the wrong registers, and thus could not work. This fixes it and also adds a couple of place holders for missing instructions. Signed-off-by: Benjamin Herrenschmidt --- Note: fre.c etc.. are still empty placeholders, just like fres.c is today. Eventually somebody needs to fill-in the missing bits arch/powerpc/math-emu/Makefile |3 ++- arch/powerpc/math-emu/fre.c | 11 +++ arch/powerpc/math-emu/frsqrtes.c | 11 +++ arch/powerpc/math-emu/math.c | 14 ++ 4 files changed, 34 insertions(+), 5 deletions(-) create mode 100644 arch/powerpc/math-emu/fre.c create mode 100644 arch/powerpc/math-emu/frsqrtes.c diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile index 7d1dba0..8d035d2 100644 --- a/arch/powerpc/math-emu/Makefile +++ b/arch/powerpc/math-emu/Makefile @@ -4,7 +4,8 @@ obj-$(CONFIG_MATH_EMULATION)+= fabs.o fadd.o fadds.o fcmpo.o fcmpu.o \ fmadd.o fmadds.o fmsub.o fmsubs.o \ fmul.o fmuls.o fnabs.o fneg.o \ fnmadd.o fnmadds.o fnmsub.o fnmsubs.o \ - fres.o frsp.o frsqrte.o fsel.o lfs.o \ + fres.o fre.o frsp.o fsel.o lfs.o \ + frsqrte.o frsqrtes.o \ fsqrt.o fsqrts.o fsub.o fsubs.o \ mcrfs.o mffs.o mtfsb0.o mtfsb1.o \ mtfsf.o mtfsfi.o stfiwx.o stfs.o \ diff --git a/arch/powerpc/math-emu/fre.c b/arch/powerpc/math-emu/fre.c new file mode 100644 index 000..49ccf2c --- /dev/null +++ b/arch/powerpc/math-emu/fre.c @@ -0,0 +1,11 @@ +#include +#include +#include + +int fre(void *frD, void *frB) +{ +#ifdef DEBUG + printk("%s: %p %p\n", __func__, frD, frB); +#endif + return -ENOSYS; +} diff --git a/arch/powerpc/math-emu/frsqrtes.c b/arch/powerpc/math-emu/frsqrtes.c new file mode 100644 index 000..7e838e3 --- /dev/null +++ b/arch/powerpc/math-emu/frsqrtes.c @@ -0,0 +1,11 @@ +#include +#include +#include + +int frsqrtes(void *frD, void *frB) +{ +#ifdef DEBUG + printk("%s: %p %p\n", __func__, frD, frB); +#endif + return 0; +} diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c index 164d559..0328e66 100644 --- a/arch/powerpc/math-emu/math.c +++ b/arch/powerpc/math-emu/math.c @@ -58,8 +58,10 @@ FLOATFUNC(fnabs); FLOATFUNC(fneg); /* Optional */ +FLOATFUNC(fre); FLOATFUNC(fres); FLOATFUNC(frsqrte); +FLOATFUNC(frsqrtes); FLOATFUNC(fsel); FLOATFUNC(fsqrt); FLOATFUNC(fsqrts); @@ -97,6 +99,7 @@ FLOATFUNC(fsqrts); #define FSQRTS 0x016 /* 22 */ #define FRES 0x018 /* 24 */ #define FMULS 0x019 /* 25 */ +#define FRSQRTES 0x01a /* 26 */ #define FMSUBS 0x01c /* 28 */ #define FMADDS 0x01d /* 29 */ #define FNMSUBS0x01e /* 30 */ @@ -109,6 +112,7 @@ FLOATFUNC(fsqrts); #define FADD 0x015 /* 21 */ #define FSQRT 0x016 /* 22 */ #define FSEL 0x017 /* 23 */ +#define FRE0x018 /* 24 */ #define FMUL 0x019 /* 25 */ #define FRSQRTE0x01a /* 26 */ #define FMSUB 0x01c /* 28 */ @@ -299,9 +303,10 @@ do_mathemu(struct pt_regs *regs) case FDIVS: func = fdivs; type = AB; break; case FSUBS: func = fsubs; type = AB; break; case FADDS: func = fadds; type = AB; break; - case FSQRTS:func = fsqrts; type = AB; break; - case FRES: func = fres;type = AB; break; + case FSQRTS:func = fsqrts; type = XB; break; + case FRES: func = fres;type = XB; break; case FMULS: func = fmuls; type = AC; break; + case FRSQRTES: func = frsqrtes;type = XB; break; case FMSUBS:func = fmsubs; type = ABC; break; case FMADDS:func = fmadds; type = ABC; break; case FNMSUBS: func = fnmsubs; type = ABC; break; @@ -317,10 +322,11 @@ do_mathemu(struct pt_regs *regs) case FDIV: func = fdiv;type = AB; break; case FSUB: func = fsub;type = AB; break; case FADD: func = fadd;type = AB; break; - case FSQRT: func = fsqrt; type = AB; break; + case FSQRT: func = fsqrt; type = XB; break; + case FRE: func = fre; type = XB;