[RFC 10/10] irqchip: Make versatile fpga irq driver a generic chip

2013-06-09 Thread Grant Likely
This is an RFC patch to convert the versatile FPGA irq controller driver
to use generic irq chip. It builds on the series that extends the
generic chip code to allow a linear irq domain to contain one or more
generic irq chips so that each interrupt controller doesn't need to hand
code the generic chip setup.

I've written this as a proof of concept to see if the new generic irq
code does what it needs to. I had to extend it slightly to properly
handle the valid mask used by the versatile FPGA driver.

Tested on QEMU, but not on real hardware.

Signed-off-by: Grant Likely 
Cc: Russell King 
Cc: Linus Walleij 
Cc: Thomas Gleixner 
---
 drivers/irqchip/Kconfig  |   1 +
 drivers/irqchip/irq-versatile-fpga.c | 104 +--
 2 files changed, 39 insertions(+), 66 deletions(-)

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 4a33351..8765502 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -14,6 +14,7 @@ config ARM_VIC
bool
select IRQ_DOMAIN
select MULTI_IRQ_HANDLER
+   select GENERIC_IRQ_CHIP
 
 config ARM_VIC_NR
int
diff --git a/drivers/irqchip/irq-versatile-fpga.c 
b/drivers/irqchip/irq-versatile-fpga.c
index 47a52ab..8c7097b 100644
--- a/drivers/irqchip/irq-versatile-fpga.c
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -34,37 +34,18 @@
  * @used_irqs: number of active IRQs on this controller
  */
 struct fpga_irq_data {
-   void __iomem *base;
-   struct irq_chip chip;
-   u32 valid;
struct irq_domain *domain;
-   u8 used_irqs;
 };
 
 /* we cannot allocate memory when the controllers are initially registered */
 static struct fpga_irq_data fpga_irq_devices[CONFIG_VERSATILE_FPGA_IRQ_NR];
 static int fpga_irq_id;
 
-static void fpga_irq_mask(struct irq_data *d)
-{
-   struct fpga_irq_data *f = irq_data_get_irq_chip_data(d);
-   u32 mask = 1 << d->hwirq;
-
-   writel(mask, f->base + IRQ_ENABLE_CLEAR);
-}
-
-static void fpga_irq_unmask(struct irq_data *d)
-{
-   struct fpga_irq_data *f = irq_data_get_irq_chip_data(d);
-   u32 mask = 1 << d->hwirq;
-
-   writel(mask, f->base + IRQ_ENABLE_SET);
-}
-
 static void fpga_irq_handle(unsigned int irq, struct irq_desc *desc)
 {
-   struct fpga_irq_data *f = irq_desc_get_handler_data(desc);
-   u32 status = readl(f->base + IRQ_STATUS);
+   struct irq_domain *domain = irq_desc_get_handler_data(desc);
+   struct irq_chip_generic *gc = irq_get_domain_generic_chip(domain, 0);
+   u32 status = readl(gc->reg_base + IRQ_STATUS);
 
if (status == 0) {
do_bad_IRQ(irq, desc);
@@ -74,7 +55,7 @@ static void fpga_irq_handle(unsigned int irq, struct irq_desc 
*desc)
do {
irq = ffs(status) - 1;
status &= ~(1 << irq);
-   generic_handle_irq(irq_find_mapping(f->domain, irq));
+   generic_handle_irq(irq_find_mapping(domain, irq));
} while (status);
 }
 
@@ -85,11 +66,12 @@ static void fpga_irq_handle(unsigned int irq, struct 
irq_desc *desc)
  */
 static int handle_one_fpga(struct fpga_irq_data *f, struct pt_regs *regs)
 {
+   struct irq_chip_generic *gc = irq_get_domain_generic_chip(f->domain, 0);
int handled = 0;
int irq;
u32 status;
 
-   while ((status  = readl(f->base + IRQ_STATUS))) {
+   while ((status = readl(gc->reg_base + IRQ_STATUS))) {
irq = ffs(status) - 1;
handle_IRQ(irq_find_mapping(f->domain, irq), regs);
handled = 1;
@@ -112,63 +94,53 @@ asmlinkage void __exception_irq_entry 
fpga_handle_irq(struct pt_regs *regs)
} while (handled);
 }
 
-static int fpga_irqdomain_map(struct irq_domain *d, unsigned int irq,
-   irq_hw_number_t hwirq)
-{
-   struct fpga_irq_data *f = d->host_data;
-
-   /* Skip invalid IRQs, only register handlers for the real ones */
-   if (!(f->valid & BIT(hwirq)))
-   return -EPERM;
-   irq_set_chip_data(irq, f);
-   irq_set_chip_and_handler(irq, &f->chip,
-   handle_level_irq);
-   set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-   return 0;
-}
-
-static struct irq_domain_ops fpga_irqdomain_ops = {
-   .map = fpga_irqdomain_map,
-   .xlate = irq_domain_xlate_onetwocell,
-};
-
 void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start,
  int parent_irq, u32 valid, struct device_node *node)
 {
+   struct irq_chip_generic *gc;
struct fpga_irq_data *f;
-   int i;
+   int ret;
 
if (fpga_irq_id >= ARRAY_SIZE(fpga_irq_devices)) {
pr_err("%s: too few FPGA IRQ controllers, increase 
CONFIG_VERSATILE_FPGA_IRQ_NR\n", __func__);
return;
}
f = &fpga_irq_devices[fpga_irq_id];
-   f->base = base;
-   f->chip.name = name;
-   f->chip.irq_ack = fpga_irq_mask;
-   f->chip.irq_mask = fpga_

[RFC 09/10] irqdomain: remove irq_domain_generate_simple()

2013-06-09 Thread Grant Likely
Nobody calls it; remove the function

Signed-off-by: Grant Likely 
---
 include/linux/irqdomain.h |  8 
 kernel/irq/irqdomain.c| 15 ---
 2 files changed, 23 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index f9e8e06..fe7c57d 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -205,14 +205,6 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, 
struct device_node *ctrlr,
const u32 *intspec, unsigned int intsize,
irq_hw_number_t *out_hwirq, unsigned int *out_type);
 
-#if defined(CONFIG_OF_IRQ)
-extern void irq_domain_generate_simple(const struct of_device_id *match,
-   u64 phys_base, unsigned int irq_start);
-#else /* CONFIG_OF_IRQ */
-static inline void irq_domain_generate_simple(const struct of_device_id *match,
-   u64 phys_base, unsigned int irq_start) 
{ }
-#endif /* !CONFIG_OF_IRQ */
-
 #else /* CONFIG_IRQ_DOMAIN */
 static inline void irq_dispose_mapping(unsigned int virq) { }
 #endif /* !CONFIG_IRQ_DOMAIN */
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 80e9249..e47b356 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -741,18 +741,3 @@ const struct irq_domain_ops irq_domain_simple_ops = {
.xlate = irq_domain_xlate_onetwocell,
 };
 EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
-
-#ifdef CONFIG_OF_IRQ
-void irq_domain_generate_simple(const struct of_device_id *match,
-   u64 phys_base, unsigned int irq_start)
-{
-   struct device_node *node;
-   pr_debug("looking for phys_base=%llx, irq_start=%i\n",
-   (unsigned long long) phys_base, (int) irq_start);
-   node = of_find_matching_node_by_address(NULL, match, phys_base);
-   if (node)
-   irq_domain_add_legacy(node, 32, irq_start, 0,
- &irq_domain_simple_ops, NULL);
-}
-EXPORT_SYMBOL_GPL(irq_domain_generate_simple);
-#endif
-- 
1.8.1.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[RFC 08/10] irqdomain: Refactor irq_domain_associate_many()

2013-06-09 Thread Grant Likely
Originally, irq_domain_associate_many() was designed to unwind the
mapped irqs on a failure of any individual association. However, that
proved to be a problem with certain IRQ controllers. Some of them only
support a subset of irqs, and will fail when attempting to map a
reserved IRQ. In those cases we want to map as many IRQs as possible, so
instead it is better for irq_domain_associate_many() to make a
best-effort attempt to map irqs, but not fail if any or all of them
don't succeed. If a caller really cares about how many irqs got
associated, then it should instead go back and check that all of the
irqs is cares about were mapped.

The original design open-coded the individual association code into the
body of irq_domain_associate_many(), but with no longer needing to
unwind associations, the code becomes simpler to split out
irq_domain_associate() to contain the bulk of the logic, and
irq_domain_associate_many() to be a simple loop wrapper.

This patch also adds a new error check to the associate path to make
sure it isn't called for an irq larger than the controller can handle,
and adds locking so that the irq_domain_mutex is held while setting up a
new association.

Signed-off-by: Grant Likely 
---
 include/linux/irqdomain.h |  22 +++---
 kernel/irq/irqdomain.c| 185 +++---
 2 files changed, 101 insertions(+), 106 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index fd4b26f..f9e8e06 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -103,6 +103,7 @@ struct irq_domain {
struct irq_domain_chip_generic *gc;
 
/* reverse map data. The linear map gets appended to the irq_domain */
+   irq_hw_number_t hwirq_max;
unsigned int revmap_direct_max_irq;
unsigned int revmap_size;
struct radix_tree_root revmap_tree;
@@ -110,8 +111,8 @@ struct irq_domain {
 };
 
 #ifdef CONFIG_IRQ_DOMAIN
-struct irq_domain *__irq_domain_add(struct device_node *of_node,
-   int size, int direct_max,
+struct irq_domain *__irq_domain_add(struct device_node *of_node, int size,
+   irq_hw_number_t hwirq_max, int direct_max,
const struct irq_domain_ops *ops,
void *host_data);
 struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
@@ -140,14 +141,14 @@ static inline struct irq_domain 
*irq_domain_add_linear(struct device_node *of_no
 const struct irq_domain_ops *ops,
 void *host_data)
 {
-   return __irq_domain_add(of_node, size, 0, ops, host_data);
+   return __irq_domain_add(of_node, size, size, 0, ops, host_data);
 }
 static inline struct irq_domain *irq_domain_add_nomap(struct device_node 
*of_node,
 unsigned int max_irq,
 const struct irq_domain_ops *ops,
 void *host_data)
 {
-   return __irq_domain_add(of_node, 0, max_irq, ops, host_data);
+   return __irq_domain_add(of_node, 0, max_irq, max_irq, ops, host_data);
 }
 static inline struct irq_domain *irq_domain_add_legacy_isa(
struct device_node *of_node,
@@ -166,14 +167,11 @@ static inline struct irq_domain 
*irq_domain_add_tree(struct device_node *of_node
 
 extern void irq_domain_remove(struct irq_domain *host);
 
-extern int irq_domain_associate_many(struct irq_domain *domain,
-unsigned int irq_base,
-irq_hw_number_t hwirq_base, int count);
-static inline int irq_domain_associate(struct irq_domain *domain, unsigned int 
irq,
-   irq_hw_number_t hwirq)
-{
-   return irq_domain_associate_many(domain, irq, hwirq, 1);
-}
+extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq,
+   irq_hw_number_t hwirq);
+extern void irq_domain_associate_many(struct irq_domain *domain,
+ unsigned int irq_base,
+ irq_hw_number_t hwirq_base, int count);
 
 extern unsigned int irq_create_mapping(struct irq_domain *host,
   irq_hw_number_t hwirq);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 280b804..80e9249 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -35,8 +35,8 @@ static struct irq_domain *irq_default_domain;
  * register allocated irq_domain with irq_domain_register().  Returns pointer
  * to IRQ domain, or NULL on failure.
  */
-struct irq_domain *__irq_domain_add(struct device_node *of_node,
-   int size, int direct_max,
+struct irq_domain *__irq_domain_add(struct device_node *of_node, int size,
+

[RFC 06/10] irqdomain: Clean up aftermath of irq_domain refactoring

2013-06-09 Thread Grant Likely
After refactoring the irqdomain code, there are a number of API
functions that are merely empty wrappers around core code. Drop those
wrappers out of the C file and replace them with static inlines in the
header.

Signed-off-by: Grant Likely 
---
 arch/powerpc/platforms/cell/beat_interrupt.c |   2 +-
 arch/powerpc/platforms/powermac/smp.c|   2 +-
 include/linux/irqdomain.h|  31 +--
 kernel/irq/irqdomain.c   | 127 ---
 4 files changed, 62 insertions(+), 100 deletions(-)

diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c 
b/arch/powerpc/platforms/cell/beat_interrupt.c
index 8c6dc42..9e5dfbc 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -239,7 +239,7 @@ void __init beatic_init_IRQ(void)
ppc_md.get_irq = beatic_get_irq;
 
/* Allocate an irq host */
-   beatic_host = irq_domain_add_nomap(NULL, 0, &beatic_pic_host_ops, NULL);
+   beatic_host = irq_domain_add_nomap(NULL, ~0, &beatic_pic_host_ops, 
NULL);
BUG_ON(beatic_host == NULL);
irq_set_default_host(beatic_host);
 }
diff --git a/arch/powerpc/platforms/powermac/smp.c 
b/arch/powerpc/platforms/powermac/smp.c
index bdb738a..f921067 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -192,7 +192,7 @@ static int psurge_secondary_ipi_init(void)
 {
int rc = -ENOMEM;
 
-   psurge_host = irq_domain_add_nomap(NULL, 0, &psurge_host_ops, NULL);
+   psurge_host = irq_domain_add_nomap(NULL, ~0, &psurge_host_ops, NULL);
 
if (psurge_host)
psurge_secondary_virq = irq_create_direct_mapping(psurge_host);
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 51ef84a..fd4b26f 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -110,6 +110,10 @@ struct irq_domain {
 };
 
 #ifdef CONFIG_IRQ_DOMAIN
+struct irq_domain *__irq_domain_add(struct device_node *of_node,
+   int size, int direct_max,
+   const struct irq_domain_ops *ops,
+   void *host_data);
 struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
 unsigned int size,
 unsigned int first_irq,
@@ -121,17 +125,30 @@ struct irq_domain *irq_domain_add_legacy(struct 
device_node *of_node,
 irq_hw_number_t first_hwirq,
 const struct irq_domain_ops *ops,
 void *host_data);
-struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
+extern struct irq_domain *irq_find_host(struct device_node *node);
+extern void irq_set_default_host(struct irq_domain *host);
+
+/**
+ * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain.
+ * @of_node: pointer to interrupt controller's device tree node.
+ * @size: Number of interrupts in the domain.
+ * @ops: map/unmap domain callbacks
+ * @host_data: Controller private data pointer
+ */
+static inline struct irq_domain *irq_domain_add_linear(struct device_node 
*of_node,
 unsigned int size,
 const struct irq_domain_ops *ops,
-void *host_data);
-struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
+void *host_data)
+{
+   return __irq_domain_add(of_node, size, 0, ops, host_data);
+}
+static inline struct irq_domain *irq_domain_add_nomap(struct device_node 
*of_node,
 unsigned int max_irq,
 const struct irq_domain_ops *ops,
-void *host_data);
-extern struct irq_domain *irq_find_host(struct device_node *node);
-extern void irq_set_default_host(struct irq_domain *host);
-
+void *host_data)
+{
+   return __irq_domain_add(of_node, 0, max_irq, ops, host_data);
+}
 static inline struct irq_domain *irq_domain_add_legacy_isa(
struct device_node *of_node,
const struct irq_domain_ops *ops,
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index c38be78..e0db59e 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -23,8 +23,11 @@ static DEFINE_MUTEX(revmap_trees_mutex);
 static struct irq_domain *irq_default_domain;
 
 /**
- * irq_domain_alloc() - Allocate a new irq_domain data structure
+ * __irq_domain_add() - Allocate a new irq_domain data structure
  * @of_node: optional device-tree node of the interrupt controller
+ * @size: Size of linear map; 0 for radix mapping only
+ * @direct_max: Maximum value of direct maps; Use ~0 for no li

[RFC 07/10] irqdomain: Beef up debugfs output

2013-06-09 Thread Grant Likely
This patch increases the amount of output produced by the
irq_domain_mapping debugfs file by first listing all of the registered
irq domains at the beginning of the output, and then by including all
mapped IRQs in the output, not just the active ones. It is very useful
when debugging irqdomain issues to be able to see the entire list of
mapped irqs, not just the ones that happen to be connected to devices.

Signed-off-by: Grant Likely 
---
 kernel/irq/irqdomain.c | 35 ++-
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index e0db59e..280b804 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -596,12 +596,29 @@ static int virq_debug_show(struct seq_file *m, void 
*private)
 {
unsigned long flags;
struct irq_desc *desc;
-   void *data;
+   struct irq_domain *domain;
+   struct radix_tree_iter iter;
+   void *data, **slot;
int i;
 
-   seq_printf(m, "%-5s  %-7s  %-15s  %-*s  %s\n", "irq", "hwirq",
+   seq_printf(m, " %-16s  %-6s  %-10s  %-10s  %s\n",
+  "name", "mapped", "linear-max", "direct-max", 
"devtree-node");
+   mutex_lock(&irq_domain_mutex);
+   list_for_each_entry(domain, &irq_domain_list, link) {
+   int count = 0;
+   radix_tree_for_each_slot(slot, &domain->revmap_tree, &iter, 0)
+   count++;
+   seq_printf(m, "%c%-16s  %6u  %10u  %10u  %s\n",
+  domain == irq_default_domain ? '*' : ' ', 
domain->name,
+  domain->revmap_size + count, domain->revmap_size,
+  domain->revmap_direct_max_irq,
+  domain->of_node ? of_node_full_name(domain->of_node) 
: "");
+   }
+   mutex_unlock(&irq_domain_mutex);
+
+   seq_printf(m, "%-5s  %-7s  %-15s  %-*s  %6s  %-14s  %s\n", "irq", 
"hwirq",
  "chip name", (int)(2 * sizeof(void *) + 2), "chip data",
- "domain name");
+ "active", "type", "domain");
 
for (i = 1; i < nr_irqs; i++) {
desc = irq_to_desc(i);
@@ -609,12 +626,15 @@ static int virq_debug_show(struct seq_file *m, void 
*private)
continue;
 
raw_spin_lock_irqsave(&desc->lock, flags);
+   domain = desc->irq_data.domain;
 
-   if (desc->action && desc->action->handler) {
+   if (domain) {
struct irq_chip *chip;
+   int hwirq = desc->irq_data.hwirq;
+   bool direct;
 
seq_printf(m, "%5d  ", i);
-   seq_printf(m, "0x%05lx  ", desc->irq_data.hwirq);
+   seq_printf(m, "0x%05x  ", hwirq);
 
chip = irq_desc_get_chip(desc);
seq_printf(m, "%-15s  ", (chip && chip->name) ? 
chip->name : "none");
@@ -622,6 +642,11 @@ static int virq_debug_show(struct seq_file *m, void 
*private)
data = irq_desc_get_chip_data(desc);
seq_printf(m, data ? "0x%p  " : "  %p  ", data);
 
+   seq_printf(m, "   %c", (desc->action && 
desc->action->handler) ? '*' : ' ');
+   direct = (i == hwirq) && (i < 
domain->revmap_direct_max_irq);
+   seq_printf(m, "%6s%-8s  ",
+  (hwirq < domain->revmap_size) ? "LINEAR" : 
"RADIX",
+  direct ? "(DIRECT)" : "");
seq_printf(m, "%s\n", desc->irq_data.domain->name);
}
 
-- 
1.8.1.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[RFC 05/10] irqdomain: Eliminate revmap type

2013-06-09 Thread Grant Likely
The NOMAP irq_domain type is only used by a handful of interrupt
controllers and it unnecessarily complicates the code by adding special
cases on how to look up mappings and different revmap functions are used
for each type which need to validate the correct type is passed to it
before performing the reverse map. Eliminating the revmap_type and
making a single reverse mapping function simplifies the code. It also
shouldn't be any slower than having separate revmap functions because
the type of the revmap needed to be checked anyway.

The linear and tree revmap types were already merged in a previous
patch. This patch rolls the NOMAP or direct mapping behaviour into the
same domain code making is possible for an irq domain to do any mapping
type; linear, tree or direct; and that the mapping will be transparent
to the interrupt controller driver.

With this change, direct mappings will get stored in the linear or tree
mapping for consistency. Reverse mapping from the hwirq to virq will go
through the normal lookup process. However, any controller using a
direct mapping can take advantage of knowing that hwirq==virq for any
mapped interrupts skip doing a revmap lookup when handling IRQs.

Signed-off-by: Grant Likely 
---
 include/linux/irqdomain.h | 48 +
 kernel/irq/generic-chip.c |  5 +
 kernel/irq/irqdomain.c| 55 +++
 3 files changed, 43 insertions(+), 65 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 1cbb741..51ef84a 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -73,50 +73,42 @@ struct irq_domain_chip_generic;
 /**
  * struct irq_domain - Hardware interrupt number translation object
  * @link: Element in global irq_domain list.
- * @revmap_type: Method used for reverse mapping hwirq numbers to linux irq. 
This
- *   will be one of the IRQ_DOMAIN_MAP_* values.
+ * @name: Name of interrupt domain
  * @ops: pointer to irq_domain methods
  * @host_data: private data pointer for use by owner.  Not touched by 
irq_domain
  * core code.
- * @irq_base: Start of irq_desc range assigned to the irq_domain.  The creator
- *of the irq_domain is responsible for allocating the array of
- *irq_desc structures.
- * @nr_irq: Number of irqs managed by the irq domain
- * @hwirq_base: Starting number for hwirqs managed by the irq domain
- * @of_node: (optional) Pointer to device tree nodes associated with the
- *   irq_domain.  Used when decoding device tree interrupt specifiers.
+ *
+ * Optional elements
+ * @of_node: Pointer to device tree nodes associated with the irq_domain. Used
+ *   when decoding device tree interrupt specifiers.
+ * @gc: Pointer to a list of generic chips. There is a helper function for
+ *  setting up one or more generic chips for interrupt controllers
+ *  drivers using the generic chip library which uses this pointer.
+ *
+ * Revmap data, used internally by irq_domain
+ * @revmap_direct_max_irq: The largest hwirq that can be set for controllers 
that
+ * support direct mapping
+ * @revmap_size: Size of the linear map table @linear_revmap[]
+ * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
+ * @linear_revmap: Linear table of hwirq->virq reverse mappings
  */
 struct irq_domain {
struct list_head link;
const char *name;
-
-   /* type of reverse mapping_technique */
-   unsigned int revmap_type;
-   struct {
-   struct {
-   unsigned int size;
-   } linear;
-   struct {
-   unsigned int max_irq;
-   } nomap;
-   struct radix_tree_root tree;
-   } revmap_data;
const struct irq_domain_ops *ops;
void *host_data;
-   irq_hw_number_t inval_irq;
 
-   /* Optional device node pointer */
+   /* Optional data */
struct device_node *of_node;
-   /* Optional pointer to generic interrupt chips */
struct irq_domain_chip_generic *gc;
 
-   /* Linear reverse map */
+   /* reverse map data. The linear map gets appended to the irq_domain */
+   unsigned int revmap_direct_max_irq;
+   unsigned int revmap_size;
+   struct radix_tree_root revmap_tree;
unsigned int linear_revmap[];
 };
 
-#define IRQ_DOMAIN_MAP_NOMAP 1 /* no fast reverse mapping */
-#define IRQ_DOMAIN_MAP_LINEAR 2 /* linear map of interrupts */
-
 #ifdef CONFIG_IRQ_DOMAIN
 struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
 unsigned int size,
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index ca98cc5..4b01106 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -270,10 +270,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, 
int irqs_per_chip,
if (d->gc)
 

[RFC 04/10] irqdomain: merge linear and tree reverse mappings.

2013-06-09 Thread Grant Likely
From: Grant Likely 

Keeping them separate makes irq_domain more complex and adds a lot of
code (as proven by the diffstat).  Merging them simplifies the whole
scheme.  This change makes it so both the tree and linear methods can be
used by the same irq_domain instance.  If the hwirq is less than the
->linear_size, then the linear map is used to reverse map the hwirq.
Otherwise the radix tree is used.  The test for which map to use is no
more expensive that the existing code, so the performance of fast path
is preserved.

It also means that complex interrupt controllers can use both the
linear map and a tree in the same domain.  This may be useful for an
interrupt controller with a base set of core irqs and a large number
of GPIOs which might be used as irqs.  The linear map could cover the
core irqs, and the tree used for thas irqs.  The linear map could
cover the core irqs, and the tree used for the gpios.

v2: Drop reorganization of revmap data

Signed-off-by: Grant Likely 
Cc: Paul Mundt 
Cc: Benjamin Herrenschmidt 
Cc: Thomas Gleixner 
Cc: Rob Herring 
---
 include/linux/irqdomain.h |  18 
 kernel/irq/irqdomain.c| 107 +-
 2 files changed, 39 insertions(+), 86 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index e5e513c..1cbb741 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -75,7 +75,6 @@ struct irq_domain_chip_generic;
  * @link: Element in global irq_domain list.
  * @revmap_type: Method used for reverse mapping hwirq numbers to linux irq. 
This
  *   will be one of the IRQ_DOMAIN_MAP_* values.
- * @revmap_data: Revmap method specific data.
  * @ops: pointer to irq_domain methods
  * @host_data: private data pointer for use by owner.  Not touched by 
irq_domain
  * core code.
@@ -93,10 +92,9 @@ struct irq_domain {
 
/* type of reverse mapping_technique */
unsigned int revmap_type;
-   union {
+   struct {
struct {
unsigned int size;
-   unsigned int *revmap;
} linear;
struct {
unsigned int max_irq;
@@ -111,11 +109,13 @@ struct irq_domain {
struct device_node *of_node;
/* Optional pointer to generic interrupt chips */
struct irq_domain_chip_generic *gc;
+
+   /* Linear reverse map */
+   unsigned int linear_revmap[];
 };
 
 #define IRQ_DOMAIN_MAP_NOMAP 1 /* no fast reverse mapping */
 #define IRQ_DOMAIN_MAP_LINEAR 2 /* linear map of interrupts */
-#define IRQ_DOMAIN_MAP_TREE 3 /* radix tree */
 
 #ifdef CONFIG_IRQ_DOMAIN
 struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
@@ -137,10 +137,6 @@ struct irq_domain *irq_domain_add_nomap(struct device_node 
*of_node,
 unsigned int max_irq,
 const struct irq_domain_ops *ops,
 void *host_data);
-struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
-const struct irq_domain_ops *ops,
-void *host_data);
-
 extern struct irq_domain *irq_find_host(struct device_node *node);
 extern void irq_set_default_host(struct irq_domain *host);
 
@@ -152,6 +148,12 @@ static inline struct irq_domain *irq_domain_add_legacy_isa(
return irq_domain_add_legacy(of_node, NUM_ISA_INTERRUPTS, 0, 0, ops,
 host_data);
 }
+static inline struct irq_domain *irq_domain_add_tree(struct device_node 
*of_node,
+const struct irq_domain_ops *ops,
+void *host_data)
+{
+   return irq_domain_add_linear(of_node, 0, ops, host_data);
+}
 
 extern void irq_domain_remove(struct irq_domain *host);
 
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index b1b5e67..5a1d8ec 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -34,22 +34,24 @@ static struct irq_domain *irq_default_domain;
  * to IRQ domain, or NULL on failure.
  */
 static struct irq_domain *irq_domain_alloc(struct device_node *of_node,
-  unsigned int revmap_type,
+  unsigned int revmap_type, int size,
   const struct irq_domain_ops *ops,
   void *host_data)
 {
struct irq_domain *domain;
 
-   domain = kzalloc_node(sizeof(*domain), GFP_KERNEL,
- of_node_to_nid(of_node));
+   domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
+ GFP_KERNEL, of_node_to_nid(of_node));
if (WARN_ON(!domain))
return NULL;
 
/* Fill structure */
+   INIT_RADIX_TREE(&domain->revmap_data.tree, GFP

[RFC 03/10] irqdomain: Add a name field

2013-06-09 Thread Grant Likely
This patch adds a name field to the irq_domain structure to help mere
mortals understand the mappings between irq domains and virqs. It also
converts a number of places that have open-coded some kind of fudging
an irqdomain name to use the new field. This means a more consistent
display of names in irq domain log messages and debugfs output.

Signed-off-by: Grant Likely 
---
 include/linux/irqdomain.h |  1 +
 kernel/irq/generic-chip.c |  1 +
 kernel/irq/irqdomain.c| 19 ++-
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 6f06241..e5e513c 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -89,6 +89,7 @@ struct irq_domain_chip_generic;
  */
 struct irq_domain {
struct list_head link;
+   const char *name;
 
/* type of reverse mapping_technique */
unsigned int revmap_type;
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 95575d8..ca98cc5 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -305,6 +305,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, 
int irqs_per_chip,
/* Calc pointer to the next generic chip */
tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
}
+   d->name = name;
return 0;
 }
 EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 1ac8cf4..b1b5e67 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -410,12 +410,15 @@ int irq_domain_associate_many(struct irq_domain *domain, 
unsigned int irq_base,
 */
if (ret != -EPERM) {
pr_info("%s didn't like hwirq-0x%lx to 
VIRQ%i mapping (rc=%d)\n",
-  
of_node_full_name(domain->of_node), hwirq, virq, ret);
+  domain->name, hwirq, virq, ret);
}
irq_data->domain = NULL;
irq_data->hwirq = 0;
continue;
}
+   /* If not already assigned, give the domain the chip's 
name */
+   if (!domain->name && irq_data->chip)
+   domain->name = irq_data->chip->name;
}
 
switch (domain->revmap_type) {
@@ -708,8 +711,6 @@ static int virq_debug_show(struct seq_file *m, void 
*private)
 {
unsigned long flags;
struct irq_desc *desc;
-   const char *p;
-   static const char none[] = "none";
void *data;
int i;
 
@@ -731,20 +732,12 @@ static int virq_debug_show(struct seq_file *m, void 
*private)
seq_printf(m, "0x%05lx  ", desc->irq_data.hwirq);
 
chip = irq_desc_get_chip(desc);
-   if (chip && chip->name)
-   p = chip->name;
-   else
-   p = none;
-   seq_printf(m, "%-15s  ", p);
+   seq_printf(m, "%-15s  ", (chip && chip->name) ? 
chip->name : "none");
 
data = irq_desc_get_chip_data(desc);
seq_printf(m, data ? "0x%p  " : "  %p  ", data);
 
-   if (desc->irq_data.domain)
-   p = 
of_node_full_name(desc->irq_data.domain->of_node);
-   else
-   p = none;
-   seq_printf(m, "%s\n", p);
+   seq_printf(m, "%s\n", desc->irq_data.domain->name);
}
 
raw_spin_unlock_irqrestore(&desc->lock, flags);
-- 
1.8.1.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[RFC 02/10] irqdomain: Replace LEGACY mapping with LINEAR

2013-06-09 Thread Grant Likely
The LEGACY mapping unnecessarily complicates the irqdomain code and
can easily be implemented with a linear mapping.  By ripping it out
and replacing it with the LINEAR mapping the object size of
irqdomain.c shrinks by about 330 bytes (ARMv7) which offsets the
additional allocation required by the linear map.  It also makes it
possible for current LEGACY map users to pre-allocate irq_descs for a
subset of the hwirqs and dynamically allocate the rest as needed.

Signed-off-by: Grant Likely 
Cc: Paul Mundt 
Cc: Benjamin Herrenschmidt 
Cc: Thomas Gleixner 
Cc: Rob Herring 
---
 include/linux/irqdomain.h |  7 
 kernel/irq/irqdomain.c| 84 ---
 2 files changed, 6 insertions(+), 85 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index ba2c708..6f06241 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -95,11 +95,6 @@ struct irq_domain {
union {
struct {
unsigned int size;
-   unsigned int first_irq;
-   irq_hw_number_t first_hwirq;
-   } legacy;
-   struct {
-   unsigned int size;
unsigned int *revmap;
} linear;
struct {
@@ -117,8 +112,6 @@ struct irq_domain {
struct irq_domain_chip_generic *gc;
 };
 
-#define IRQ_DOMAIN_MAP_LEGACY 0 /* driver allocated fixed range of irqs.
-* ie. legacy 8259, gets irqs 1..15 */
 #define IRQ_DOMAIN_MAP_NOMAP 1 /* no fast reverse mapping */
 #define IRQ_DOMAIN_MAP_LINEAR 2 /* linear map of interrupts */
 #define IRQ_DOMAIN_MAP_TREE 3 /* radix tree */
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 61d6d3c..1ac8cf4 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -82,13 +82,6 @@ void irq_domain_remove(struct irq_domain *domain)
mutex_lock(&irq_domain_mutex);
 
switch (domain->revmap_type) {
-   case IRQ_DOMAIN_MAP_LEGACY:
-   /*
-* Legacy domains don't manage their own irq_desc
-* allocations, we expect the caller to handle irq_desc
-* freeing on their own.
-*/
-   break;
case IRQ_DOMAIN_MAP_TREE:
/*
 * radix_tree_delete() takes care of destroying the root
@@ -122,17 +115,6 @@ void irq_domain_remove(struct irq_domain *domain)
 }
 EXPORT_SYMBOL_GPL(irq_domain_remove);
 
-static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain,
-irq_hw_number_t hwirq)
-{
-   irq_hw_number_t first_hwirq = domain->revmap_data.legacy.first_hwirq;
-   int size = domain->revmap_data.legacy.size;
-
-   if (WARN_ON(hwirq < first_hwirq || hwirq >= first_hwirq + size))
-   return 0;
-   return hwirq - first_hwirq + domain->revmap_data.legacy.first_irq;
-}
-
 /**
  * irq_domain_add_simple() - Allocate and register a simple irq_domain.
  * @of_node: pointer to interrupt controller's device tree node.
@@ -213,57 +195,17 @@ struct irq_domain *irq_domain_add_legacy(struct 
device_node *of_node,
 void *host_data)
 {
struct irq_domain *domain;
-   unsigned int i;
 
-   domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LEGACY, ops, 
host_data);
+   pr_debug("Setting up legacy domain virq[%i:%i] ==> hwirq[%i:%i]\n",
+first_irq, first_irq + size - 1,
+(int)first_hwirq, (int)first_hwirq + size -1);
+
+   domain = irq_domain_add_linear(of_node, first_hwirq + size, ops, 
host_data);
if (!domain)
return NULL;
 
-   domain->revmap_data.legacy.first_irq = first_irq;
-   domain->revmap_data.legacy.first_hwirq = first_hwirq;
-   domain->revmap_data.legacy.size = size;
-
-   mutex_lock(&irq_domain_mutex);
-   /* Verify that all the irqs are available */
-   for (i = 0; i < size; i++) {
-   int irq = first_irq + i;
-   struct irq_data *irq_data = irq_get_irq_data(irq);
-
-   if (WARN_ON(!irq_data || irq_data->domain)) {
-   mutex_unlock(&irq_domain_mutex);
-   irq_domain_free(domain);
-   return NULL;
-   }
-   }
+   WARN_ON(irq_domain_associate_many(domain, first_irq, first_hwirq, 
size));
 
-   /* Claim all of the irqs before registering a legacy domain */
-   for (i = 0; i < size; i++) {
-   struct irq_data *irq_data = irq_get_irq_data(first_irq + i);
-   irq_data->hwirq = first_hwirq + i;
-   irq_data->domain = domain;
-   }
-   mutex_unlock(&irq_domain_mutex);
-
-   for (i = 0; i < size; i++) {
-   int irq = first_irq + i;
-   int hwirq = first_hwirq + i;
-
-   /* IRQ0 gets ignored

[RFC 01/10] irqdomain: Relax failure path on setting up mappings

2013-06-09 Thread Grant Likely
Commit 98aa468e, "irqdomain: Support for static IRQ mapping and
association" introduced an API for directly associating blocks of hwirqs
to linux irqs. However, if any irq in that block failed to map (say if
the mapping functions returns an error because the irq is already
mapped) then the whole thing will fail and roll back. This is probably
too aggressive since there are valid reasons why a mapping may fail.
ie. Firmware may have a particular IRQ marked as unusable.

This patch drops the error path out of irq_domain_associate(). If a
mapping fails, then it is simply skipped. There is no reason to fail the
entire allocation.

v2: Still output an information message on failed mappings and make sure
attempted mapping gets cleared out of the irq_data structure.

Signed-off-by: Grant Likely 
Cc: Paul Mundt 
Cc: Benjamin Herrenschmidt 
Cc: Thomas Gleixner 
---
 kernel/irq/irqdomain.c | 16 
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 20b677d..61d6d3c 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -464,23 +464,15 @@ int irq_domain_associate_many(struct irq_domain *domain, 
unsigned int irq_base,
/*
 * If map() returns -EPERM, this interrupt is 
protected
 * by the firmware or some other service and 
shall not
-* be mapped.
-*
-* Since on some platforms we blindly try to 
map everything
-* we end up with a log full of backtraces.
-*
-* So instead, we silently fail on -EPERM, it 
is the
-* responsibility of the PIC driver to display 
a relevant
-* message if needed.
+* be mapped. Don't bother telling the user 
about it.
 */
if (ret != -EPERM) {
-   pr_err("irq-%i==>hwirq-0x%lx mapping 
failed: %d\n",
-  virq, hwirq, ret);
-   WARN_ON(1);
+   pr_info("%s didn't like hwirq-0x%lx to 
VIRQ%i mapping (rc=%d)\n",
+  
of_node_full_name(domain->of_node), hwirq, virq, ret);
}
irq_data->domain = NULL;
irq_data->hwirq = 0;
-   goto err_unmap;
+   continue;
}
}
 
-- 
1.8.1.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


PowerPC assembler question

2013-06-09 Thread Erik de Castro Lopo
Hi all,

I'm trying to fix a problem in the PowerPC backend of the Glasgow
Haskell Compiler (GHC) and have a problem with the following
instruction form:

lwz 30, .label - (1b)(31)

Reading the documentation I could find, I have figured out that this
loads a 16 bit value into register 30. How it calculates that 16 bit
value has got me somewhat flumoxed.

Anybody care to explain?

Cheers,
Erik
-- 
--
Erik de Castro Lopo
http://www.mega-nerd.com/
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[RFC 00/10] Refactor irqdomain

2013-06-09 Thread Grant Likely
I've done a bunch of refactoring work on the irq_domain infrastructure.
Some of these patches I've posted before, and some our brand new. The
goal of this is to greatly simplify how irq_domains work. With this
series, instead of there being multiple different types of irq domains,
each with different mapping rules, instead there is now only one time of
irq_domain that contains both kinds of map; the linear map for irqs
below a certain value, and the radix tree for large & sparse irq
controllers. As you can see from the following diffstat, the result is a
fair bit less code. It should make it easier to understand irqdomains
too.

 arch/powerpc/platforms/cell/beat_interrupt.c |   2 +-
 arch/powerpc/platforms/powermac/smp.c|   2 +-
 drivers/irqchip/Kconfig  |   1 +
 drivers/irqchip/irq-versatile-fpga.c | 104 --
 include/linux/irqdomain.h| 123 ++-
 kernel/irq/generic-chip.c|   6 +-
 kernel/irq/irqdomain.c   | 555
 --
 7 files changed, 282 insertions(+), 511 deletions(-)

I've pushed this series out to my git server at the following branch:

git://git.secretlab.ca/git/linux irqdomain/next

It depends on the tip tree's irq/for-arm branch and also Linus' mainline
(they need to be merged). The branch above includes both.

I've tested this on ARM qemu models, but not much else. I'll test on
real hardware before pushing out, but I would appreciate anybody doing
additional testing, particularly on PowerPC and other non-ARM platforms.

Cheers,
g.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V3 1/2] powerpc, perf: Ignore separate BHRB privilege state filter request

2013-06-09 Thread Anshuman Khandual
Completely ignore BHRB privilege state filter request as we are
already configuring that with privilege state filtering attribute
for the accompanying PMU event. This would help achieve cleaner
user space interaction for BHRB.

This patch fixes a situation like this

Before patch:-

./perf record -j any -e branch-misses:k ls
Error:
The sys_perf_event_open() syscall returned with 95 (Operation not
supported) for event (branch-misses:k).
/bin/dmesg may provide additional information.
No CONFIG_PERF_EVENTS=y kernel support configured?

Here 'perf record' actually copies over ':k' filter request into BHRB
privilege state filter config and our previous check in kernel would
fail that.

After patch:-
-
./perf record -j any -e branch-misses:k ls
perf  perf.data  perf.data.old  test-mmap-ring
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (~102 samples)]

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/power8-pmu.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index f7d1c4f..371c6e7 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -523,18 +523,13 @@ static int power8_generic_events[] = {
 static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 {
u64 pmu_bhrb_filter = 0;
-   u64 br_privilege = branch_sample_type & ONLY_PLM;
 
-   /* BHRB and regular PMU events share the same prvillege state
+   /* BHRB and regular PMU events share the same privilege state
 * filter configuration. BHRB is always recorded along with a
-* regular PMU event. So privilege state filter criteria for BHRB
-* and the companion PMU events has to be the same. As a default
-* "perf record" tool sets all privillege bits ON when no filter
-* criteria is provided in the command line. So as along as all
-* privillege bits are ON or they are OFF, we are good to go.
+* regular PMU event. As the privilege state filter is handled
+* in the basic PMC configuration of the accompanying regular
+* PMU event, we ignore any separate BHRB specific request.
 */
-   if ((br_privilege != 7) && (br_privilege != 0))
-   return -1;
 
/* No branch filter requested */
if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V3 2/2] powerpc, perf: BHRB filter configuration should follow the task

2013-06-09 Thread Anshuman Khandual
When the task moves around the system, the corresponding cpuhw
per cpu strcuture should be popullated with the BHRB filter
request value so that PMU could be configured appropriately with
that during the next call into power_pmu_enable().

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 426180b..48c68a8 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1122,8 +1122,11 @@ nocheck:
 
ret = 0;
  out:
-   if (has_branch_stack(event))
+   if (has_branch_stack(event)) {
power_pmu_bhrb_enable(event);
+   cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+   event->attr.branch_sample_type);
+   }
 
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V3 0/2] Improvement and fixes for BHRB

2013-06-09 Thread Anshuman Khandual
(1) The first patch fixes a situation like this

Before patch:-


./perf record -j any -e branch-misses:k ls
Error:
The sys_perf_event_open() syscall returned with 95 (Operation not supported) 
for event (branch-misses:k).
/bin/dmesg may provide additional information.
No CONFIG_PERF_EVENTS=y kernel support configured?

Here 'perf record' actually copies over ':k' filter request into BHRB
privilege state filter config and our previous check in kernel would
fail that.

After patch:-
-

/perf record -j any -e branch-misses:k ls
perf  perf.data  perf.data.old  test-mmap-ring
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (~102 samples) ]

(2) The second patch fixes context migration for BHRB filter configuration

Changes in V2
-
(1) Updated the comment section of the code
(2) Removed the informational print
(3) Updated the commit section of the first patch

Changes in V3
-
(1) Fixed the compilation warning problem after removing the unused variable

Anshuman Khandual (2):
  powerpc, perf: Ignore separate BHRB privilege state filter request
  powerpc, perf: BHRB filter configuration should follow the task

 arch/powerpc/perf/core-book3s.c |  5 -
 arch/powerpc/perf/power8-pmu.c  | 13 -
 2 files changed, 8 insertions(+), 10 deletions(-)

-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V3 1/2] powerpc, perf: Ignore separate BHRB privilege state filter request

2013-06-09 Thread Anshuman Khandual
Completely ignore BHRB privilege state filter request as we are
already configuring that with privilege state filtering attribute
for the accompanying PMU event. This would help achieve cleaner
user space interaction for BHRB.

This patch fixes a situation like this

Before patch:-

./perf record -j any -e branch-misses:k ls
Error:
The sys_perf_event_open() syscall returned with 95 (Operation not
supported) for event (branch-misses:k).
/bin/dmesg may provide additional information.
No CONFIG_PERF_EVENTS=y kernel support configured?

Here 'perf record' actually copies over ':k' filter request into BHRB
privilege state filter config and our previous check in kernel would
fail that.

After patch:-
-
./perf record -j any -e branch-misses:k ls
perf  perf.data  perf.data.old  test-mmap-ring
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (~102 samples)]

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/power8-pmu.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index f7d1c4f..371c6e7 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -523,18 +523,13 @@ static int power8_generic_events[] = {
 static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 {
u64 pmu_bhrb_filter = 0;
-   u64 br_privilege = branch_sample_type & ONLY_PLM;
 
-   /* BHRB and regular PMU events share the same prvillege state
+   /* BHRB and regular PMU events share the same privilege state
 * filter configuration. BHRB is always recorded along with a
-* regular PMU event. So privilege state filter criteria for BHRB
-* and the companion PMU events has to be the same. As a default
-* "perf record" tool sets all privillege bits ON when no filter
-* criteria is provided in the command line. So as along as all
-* privillege bits are ON or they are OFF, we are good to go.
+* regular PMU event. As the privilege state filter is handled
+* in the basic PMC configuration of the accompanying regular
+* PMU event, we ignore any separate BHRB specific request.
 */
-   if ((br_privilege != 7) && (br_privilege != 0))
-   return -1;
 
/* No branch filter requested */
if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V3 2/2] powerpc, perf: BHRB filter configuration should follow the task

2013-06-09 Thread Anshuman Khandual
When the task moves around the system, the corresponding cpuhw
per cpu strcuture should be popullated with the BHRB filter
request value so that PMU could be configured appropriately with
that during the next call into power_pmu_enable().

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 426180b..48c68a8 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1122,8 +1122,11 @@ nocheck:
 
ret = 0;
  out:
-   if (has_branch_stack(event))
+   if (has_branch_stack(event)) {
power_pmu_bhrb_enable(event);
+   cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+   event->attr.branch_sample_type);
+   }
 
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH V3 0/2] Improvement and fixes for BHRB

2013-06-09 Thread Anshuman Khandual
(1) The first patch fixes a situation like this

Before patch:-


./perf record -j any -e branch-misses:k ls
Error:
The sys_perf_event_open() syscall returned with 95 (Operation not supported) 
for event (branch-misses:k).
/bin/dmesg may provide additional information.
No CONFIG_PERF_EVENTS=y kernel support configured?

Here 'perf record' actually copies over ':k' filter request into BHRB
privilege state filter config and our previous check in kernel would
fail that.

After patch:-
-

/perf record -j any -e branch-misses:k ls
perf  perf.data  perf.data.old  test-mmap-ring
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (~102 samples) ]

(2) The second patch fixes context migration for BHRB filter configuration

Changes in V2
-
(1) Updated the comment section of the code
(2) Removed the informational print
(3) Updated the commit section of the first patch

Changes in V3
-
(1) Fixed the compilation warning problem after removing the unused variable

Anshuman Khandual (2):
  powerpc, perf: Ignore separate BHRB privilege state filter request
  powerpc, perf: BHRB filter configuration should follow the task

 arch/powerpc/perf/core-book3s.c |  5 -
 arch/powerpc/perf/power8-pmu.c  | 13 -
 2 files changed, 8 insertions(+), 10 deletions(-)

-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [git pull] Please pull powerpc.git merge branch

2013-06-09 Thread Linus Torvalds
On Sun, Jun 9, 2013 at 9:20 PM, Jeremy Kerr  wrote:
>
> So, we now use the original date header (if present) in the mbox views:
>
> $ wget -qO - http://patchwork.ozlabs.org/patch/249598/mbox/ | grep ^Date
> Date: Fri, 7 Jun 2013 15:42:54 +1000
>
> ... for all your data-mining needs.

Goodie, and I see that it even works with old patches. Thanks.

   Linus
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [git pull] Please pull powerpc.git merge branch

2013-06-09 Thread Jeremy Kerr
Hi Linus,

> No. The date from the email was
> 
> Date: Fri, 7 Jun 2013 15:42:54 +1000
> 
> and we want *that* date.

Ah, gotchya.

So, we now use the original date header (if present) in the mbox views:

$ wget -qO - http://patchwork.ozlabs.org/patch/249598/mbox/ | grep ^Date
Date: Fri, 7 Jun 2013 15:42:54 +1000

... for all your data-mining needs.

Cheers,


Jeremy
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: Partial revert of "Context switch more PMU related SPRs"

2013-06-09 Thread Anshuman Khandual
On 06/06/2013 09:33 AM, Michael Ellerman wrote:
> In commit 59affcd I added context switching of more PMU SPRs, because
> they are potentially exposed to userspace on Power8. However despite me
> being a smart arse in the commit message it's actually not correct. In
> particular it interacts badly with a global perf record.

Could you please explain how it would interact badly with a global perf record.
If any user space try to mess around with the MMCR* registers itself, it would
definitely interfere with the kernel's own PMU config going on during perf 
record.
But thats how it works. So when we are actually dealing with MMCR* registers
directly, we should not invoke "perf record" session which can potential run on
the same PMU.

Regards
Anshuman


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [git pull] Please pull powerpc.git merge branch

2013-06-09 Thread Linus Torvalds
On Sun, Jun 9, 2013 at 8:20 PM, Linus Torvalds
 wrote:
>
> .. the rationale for this is that the work pattern of people is
> actually interesting information. You can do things like this:
>
> git log --pretty=%aD --author=Torvalds

Final side note: for me, and other git users that apply other peoples
patches, it's probably better to use

   git log --pretty=%cD --committer=Torvalds

instead.

Interestingly, that shows a different pattern than my "authorship"
statistics, which are mainly pull requests. It turns out I commit
patches much more in the afternoon. The reason is probably simple: in
the mornings, I have pull requests waiting from overnight, so a fair
number of pull requests where I am author at 9-11. But my biggest
source of patches tends to be Andrew Morton, who sends the patches in
the afternoon, so suddenly the commit counts skew towards being
between 3pm-8pm when you take all my commits into accoint.

Doing

git log --since=6.months --pretty=%aD --grep=Signed.*Andrew.Morton

backs that up: most of the commits that have sign-offs by Andrew are
sent in the afternoon.

I just find details like that really interesting, where you can
actually mine for the workpatterns of people.

   Linus
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [git pull] Please pull powerpc.git merge branch

2013-06-09 Thread Linus Torvalds
On Sun, Jun 9, 2013 at 8:06 PM, Linus Torvalds
 wrote:
>
> And it does matter.

.. the rationale for this is that the work pattern of people is
actually interesting information. You can do things like this:

git log --pretty=%aD --author=Torvalds

to see what my work pattern is, and I think that's *interesting*.
Gathering statistics like whether people are generally doing 9-5
Mon-Fri is actually interesting data. You can do things like this:

git log --since=6.months --pretty=%aD --author=Torvalds |
cut -c1-3 |
sort |
uniq -c |
sort -n

and see (for example) that I do slow down on weekends.

Same goes for things like what time of day ends up being most
productive. You can do the statistics for me, and see that I tend to
do the bulk of my pulls in the mornings (peak between 9-11) and that
I'm not a night-owl (*big* drop-off after 8PM - that's what kids do to
you). You can see a few really early-morning cases, but I suspect they
were when I was jetlagged.

So the date data is actually meaningful data. It's not just random noise.

And to do these kinds of things, you absolutely have to have
local-time with proper timezone information. Anything that screws that
up is *broken*. git gets this right, unlike a lot of other broken
SCM's. Git gets it right for a reason.

Yeah, yeah, when people forward other peoples patches they often drop
the date field, and the date of the patch ends up being the time that
the last version of the patch got sent rather than anything else, so
many of the statistics aren't valid. But a _tool_ that actively
corrupts the date and time of a patch is just broken.

 Linus
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [git pull] Please pull powerpc.git merge branch

2013-06-09 Thread Linus Torvalds
On Sun, Jun 9, 2013 at 7:44 PM, Jeremy Kerr  wrote:
>
> We keep all patch dates in UTC, but were generating the Date header
> incorrectly. Now fixed:

No, not fixed.

Keeping patch dates in UTC *corrupts* the date.

I'll ask people to stop using patchworks if it cannot keep track of
emailed dates. The date very much is a local time WITH A TIMEZONE.

And it does matter.

> $ wget -qO - http://patchwork.ozlabs.org/patch/249598/mbox/ | grep ^Date
> Date: Fri, 07 Jun 2013 05:42:54 -

No. The date from the email was

Date: Fri, 7 Jun 2013 15:42:54 +1000

and we want *that* date. Not some random date that patchwork makes up
that has no relevance.

I know you have that date, because it shows up when asking for the
headers in patchwork. Just use the right one, don't make up incorrect
ones.

   Linus
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [git pull] Please pull powerpc.git merge branch

2013-06-09 Thread Jeremy Kerr
Hi Linus,

> Is Jeremy the patchwork maintainer?

Yep, that's me.

> and it turns out that apparently 'patchwork' is just making up random
> times, because when you download the email as an mbox, it will turn
> this into that corrupt and incorrect
> 
> Date: Thu, 06 Jun 2013 19:42:54 -
> 
> thing which is apparently how you got the wrong timestamp to begin with.

We keep all patch dates in UTC, but were generating the Date header
incorrectly. Now fixed:

$ wget -qO - http://patchwork.ozlabs.org/patch/249598/mbox/ | grep ^Date
Date: Fri, 07 Jun 2013 05:42:54 -

Commit is at:

 http://git.ozlabs.org/?p=patchwork;a=commitdiff;h=e7353352

Cheers,


Jeremy
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: PowerPC assembler question

2013-06-09 Thread Benjamin Herrenschmidt
On Mon, 2013-06-10 at 09:01 +1000, Erik de Castro Lopo wrote:
> Hi all,
> 
> I'm trying to fix a problem in the PowerPC backend of the Glasgow
> Haskell Compiler (GHC) and have a problem with the following
> instruction form:
> 
> lwz 30, .label - (1b)(31)
> 
> Reading the documentation I could find, I have figured out that this
> loads a 16 bit value into register 30. How it calculates that 16 bit
> value has got me somewhat flumoxed.
> 
> Anybody care to explain?

No, this loads a 32-bit value (16-bit would be lhz).

Note: It's more readable if you use the register names, ie:

lwz %r30, .label - (1b)(%r31)

The form of lwz is

lwz dest_reg, offset(address_reg)

So it will load a 32-bit value from memory at the address contained in
r31 offset by ".label - 1b" which is itself the difference between
two labels, "label", and the first "1:" label before the instruction

(gcc supports numeric labels that can be referenced with the suffix "b"
for backward and "f" for forward which are handy for small
displacements)

So for example if 1: was the base of the structure and .label a field
in the structure, it would load the 32-bit value of that field for the
structure instance starting at %r31.

In this case, this looks more like some kind of position-independent
code though.

Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


PowerPC assembler question

2013-06-09 Thread Erik de Castro Lopo
Hi all,

I'm trying to fix a problem in the PowerPC backend of the Glasgow
Haskell Compiler (GHC) and have a problem with the following
instruction form:

lwz 30, .label - (1b)(31)

Reading the documentation I could find, I have figured out that this
loads a 16 bit value into register 30. How it calculates that 16 bit
value has got me somewhat flumoxed.

Anybody care to explain?

Cheers,
Erik
-- 
--
Erik de Castro Lopo
http://www.mega-nerd.com/
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[git pull] Please pull powerpc.git merge branch

2013-06-09 Thread Benjamin Herrenschmidt
Hi Linus !

Here's the previous pull request with a couple of commits removed,
this is purely regressions (though not all recent ones) or stable
material.

Cheers,
Ben.


The following changes since commit 1612e111e4e565422242727efb59499cce8738e4:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2013-06-06 
18:09:05 -0700)

are available in the git repository at:


  git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git merge

for you to fetch changes up to b11ae95100f7061b39a15e5c1ecbf862464ac4b4:

  powerpc: Partial revert of "Context switch more PMU related SPRs" (2013-06-10 
08:36:35 +1000)


Gavin Shan (1):
  powerpc/eeh: Don't check RTAS token to get PE addr

Kevin Hao (1):
  powerpc/pci: Check the bus address instead of resource address in 
pcibios_fixup_resources

Michael Ellerman (2):
  powerpc/perf: Fix deadlock caused by calling printk() in PMU exception
  powerpc: Partial revert of "Context switch more PMU related SPRs"

Michael Neuling (4):
  powerpc/power8: Fix oprofile and perf
  powerpc/pseries: Simplify denormalization handler
  powerpc/power8: Update denormalization handler
  powerpc/hw_breakpoints: Add DABRX cpu feature to fix 32-bit regression

 arch/powerpc/include/asm/cputable.h  |   17 +++--
 arch/powerpc/kernel/cputable.c   |8 +--
 arch/powerpc/kernel/entry_64.S   |   28 
 arch/powerpc/kernel/exceptions-64s.S |   90 --
 arch/powerpc/kernel/pci-common.c |4 +-
 arch/powerpc/kernel/process.c|3 +-
 arch/powerpc/perf/core-book3s.c  |2 +-
 arch/powerpc/platforms/pseries/eeh_pseries.c |   12 ++--
 8 files changed, 51 insertions(+), 113 deletions(-)


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [git pull] Please pull powerpc.git merge branch

2013-06-09 Thread Benjamin Herrenschmidt
On Sun, 2013-06-09 at 14:33 -0700, Linus Torvalds wrote:
> On Sun, Jun 9, 2013 at 12:56 AM, Benjamin Herrenschmidt
>  wrote:
> >
> > Here are a few more powerpc changes for 3.10. I've merged your
> > tree in at some point (which I generally avoid) in order to get
> > the compat network fixes as soon as possible.
> >
> > Mostly regressions, and stuff I judged could/should still go in at
> > this stage.
> 
> Not pulled, because your hamster smells of eldeberries.
> 
> This is not just bugfixes. In fact, as far as I can tell, this
> *introduces* bugs, with that "get_user()" in the exception path that
> can apparently happen with irqs disabled and will thus potentially
> result in new warnings that just make things unreadable.

Ah right, brown paper bag for Anton and I :(

I shouldn't have put that one in, it was a last minute bad decision
after spending time tracking another stupid sigill in userspace (this
time FSL CPUs not implementing some optional instructions that Fedora
compiler seems configured to generate nowadays).

> I'm f*cking tired of people having problems understanding "we're past
> rc5". If it's not something you would call stable material, you
> shouldn't send it to me.

I've taken out that commit and the rename of the PMU interrupt (which
while trivial probably wasn't important enough). Everything else is
regressions/stable material.

That does mean I rebased, but normally nobody bases on that merge branch
so it should be fine.

I'll send a new pull request.

Cheers,
Ben.

>   Linus
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [git pull] Please pull powerpc.git merge branch

2013-06-09 Thread Linus Torvalds
[ Is Jeremy the patchwork maintainer? If not, can people forward this
to the real maintainer? ]

On Sun, Jun 9, 2013 at 2:33 PM, Linus Torvalds
 wrote:
>
> This is not just bugfixes. In fact, as far as I can tell, this
> *introduces* bugs, with that "get_user()" in the exception path that
> can apparently happen with irqs disabled and will thus potentially
> result in new warnings that just make things unreadable.

Looking at that particular commit, I also notice that the commit
itself is buggered in other ways too.

It says:

Date:   Thu Jun 6 19:42:54 2013 +

which surprised me due to the odd timezone, and it turns out it is
pure and utter crap.

Google finds the patch in patchwork, and "Show headers" there shows
the expected timezone

Date: Fri, 7 Jun 2013 15:42:54 +1000

and it turns out that apparently 'patchwork' is just making up random
times, because when you download the email as an mbox, it will turn
this into that corrupt and incorrect

Date: Thu, 06 Jun 2013 19:42:54 -

thing which is apparently how you got the wrong timestamp to begin with.

That odd time doesn't even make any sense that I can see, because
those two times have absolutely nothing in common afaik. Unless
timezones work differently down under. It looks like some west-coast
local time, but then it says "-" which is code for "I have no
f*cking clue what I'm doing".

Just to make things extra exciting, patchwork actually shows yet
*another* date string when you just look at the patch in the web
interface:

Date June 7, 2013, 5:42 a.m.

and that actually seems to be the *correct* UTC version of that
original email date. I have no idea what that "Thu, 06 Jun 2013
19:42:54 -" date is, and where it came from. But it is utter
shite.

Can somebody please make sure that patchwork doesn't destroy
timezone/date information? I'm assuming this has been going on
forever, and I just noticed because I looked at that particular commit
for other reasons, and went "Is Anton in Europe now?".

 Linus
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [git pull] Please pull powerpc.git merge branch

2013-06-09 Thread Linus Torvalds
On Sun, Jun 9, 2013 at 12:56 AM, Benjamin Herrenschmidt
 wrote:
>
> Here are a few more powerpc changes for 3.10. I've merged your
> tree in at some point (which I generally avoid) in order to get
> the compat network fixes as soon as possible.
>
> Mostly regressions, and stuff I judged could/should still go in at
> this stage.

Not pulled, because your hamster smells of eldeberries.

This is not just bugfixes. In fact, as far as I can tell, this
*introduces* bugs, with that "get_user()" in the exception path that
can apparently happen with irqs disabled and will thus potentially
result in new warnings that just make things unreadable.

I'm f*cking tired of people having problems understanding "we're past
rc5". If it's not something you would call stable material, you
shouldn't send it to me.

  Linus
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads access to kernel space

2013-06-09 Thread Wang Dongsheng-B40534
Sorry, Please ignore this patch.
This is replaced.
Replace by: http://patchwork.ozlabs.org/patch/250033/

- dongsheng

> -Original Message-
> From: Wang Dongsheng-B40534
> Sent: Sunday, June 09, 2013 1:23 PM
> To: b...@kernel.crashing.org; johan...@sipsolutions.net; an...@enomsg.org
> Cc: Wood Scott-B07421; ga...@kernel.crashing.org; linuxppc-
> d...@lists.ozlabs.org; Wang Dongsheng-B40534
> Subject: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads access
> to kernel space
> 
> If PID is used in the TLB, after hibernation resume, the user
> threads will access to kernel space.
> 
> We must restore PID register, because TLB will use PID. The
> hibernation suspend flow is trapped from user space to kernel
> space, the PID register is user thread pid.
> 
> The hibernation resume is begin in kernel start flow, the PID
> alway 0. After the kernel thread back to user thread, there is
> not have context switch and the pid can not update, because the
> kernel thread is trapped form user space. So if we did't restore
> PID the user space of thread will be addressing in the kernel
> space.
> 
> There are two ways to restore PID:
> 1/ In swsusp_arch_suspend/swsusp_arch_resume, save/resotre PID register.
> 2/ Form restore_processor_state to restore. this function will
>do context switch.
>switch_mmu_context(current->active_mm, current->active_mm)
> 
> PPC32 Using the second method. For consistency reason, PPC64 using
> the same way.
> 
> Signed-off-by: Wang Dongsheng 
> ---
>  arch/powerpc/kernel/swsusp.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c
> index eae33e1..1930e44 100644
> --- a/arch/powerpc/kernel/swsusp.c
> +++ b/arch/powerpc/kernel/swsusp.c
> @@ -32,7 +32,5 @@ void save_processor_state(void)
> 
>  void restore_processor_state(void)
>  {
> -#ifdef CONFIG_PPC32
>   switch_mmu_context(current->active_mm, current->active_mm);
> -#endif
>  }
> --
> 1.8.0


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH v2 1/2] powerpc: add Book E support to 64-bit hibernation

2013-06-09 Thread Wang Dongsheng-B40534
Sorry, Please ignore this patch.
This is replaced.
Replace by: http://patchwork.ozlabs.org/patch/250032/

- dongsheng

> -Original Message-
> From: Wang Dongsheng-B40534
> Sent: Sunday, June 09, 2013 1:21 PM
> To: johan...@sipsolutions.net; an...@enomsg.org; Wood Scott-B07421
> Cc: ga...@kernel.crashing.org; linuxppc-dev@lists.ozlabs.org; Wang
> Dongsheng-B40534
> Subject: [PATCH v2 1/2] powerpc: add Book E support to 64-bit hibernation
> 
> Update the 64-bit hibernation code to support Book E CPUs.
> Some registers and instructions are not defined for Book3e
> (SDR reg, tlbia instruction).
> 
> SDR: Storage Description Register. Book3S and Book3E have different
> address translation mode, we do not need HTABORG & HTABSIZE to
> translate virtual address to real address.
> 
> More registers are saved in BookE-64bit.(TCR, SPRGx, ...)
> 
> Signed-off-by: Wang Dongsheng 
> ---
> v2:
> * Add: _tlbil_all
> *
> * The boot core get a virtual address, when the boot process,
> * the virtual address corresponds to a physical address. After
> * hibernation resume memory snapshots, The corresponding
> * relationship between the virtual memory and physical memory
> * might change again. We need to get a new page table. So we
> * need to invalidate TLB after resume pages.
> *
> * Invalidations TLB Using tlbilx/tlbivax/MMUCSR0.
> * tlbilx used here.
> *
> * Add: save/restore PID
> *
> * We must restore PID register, because TLB will use PID. The
> * hibernation suspend flow is trapped from user space to kernel
> * space, the PID register is user thread pid.
> *
> * The hibernation resume is begin in kernel start flow, the PID
> * alway 0. After the kernel thread back to user thread, there is
> * not have context switch and the pid can not update, because the
> * kernel thread is trapped form user space. So if we did't restore
> * PID the user space of thread will be addressing in the kernel
> * space.
> *
> * There are two ways to restore PID:
> * 1/ In this file save/resotre PID register.
> * 2/ Form restore_processor_state to restore. this function will
> *do context switch.
> *switch_mmu_context(current->active_mm, current->active_mm)
> *
> * PPC32 Using the second method. For consistency reason, PPC64
> * using the same way.
> *
> * History:
> * Wood Scott(A): Please investigate the issue of whether we are loading
> *kernel module code in this step
> * R: Kernel will allocate the memory for module code segment and data
> *segment. First allocate a memory, and copy the umod to hdr members
> *of the struct load_info. Due to the temporary assigned module
> belongs
> *to the kernel space, so it belongs to the kernel data.
> *
> *The kernel of all data will be saved when hibernation suspend. So
> *the module which has already been inserted will be saved.
> 
>  arch/powerpc/kernel/swsusp_asm64.S | 102
> -
>  1 file changed, 100 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/swsusp_asm64.S
> b/arch/powerpc/kernel/swsusp_asm64.S
> index 86ac1d9..c7e2b4a 100644
> --- a/arch/powerpc/kernel/swsusp_asm64.S
> +++ b/arch/powerpc/kernel/swsusp_asm64.S
> @@ -46,10 +46,30 @@
>  #define SL_r29   0xe8
>  #define SL_r30   0xf0
>  #define SL_r31   0xf8
> -#define SL_SIZE  SL_r31+8
> +#define SL_SPRG0 0x100
> +#define SL_SPRG1 0x108
> +#define SL_SPRG2 0x110
> +#define SL_SPRG3 0x118
> +#define SL_SPRG4 0x120
> +#define SL_SPRG5 0x128
> +#define SL_SPRG6 0x130
> +#define SL_SPRG7 0x138
> +#define SL_TCR   0x140
> +#define SL_PID   0x148
> +#define SL_SIZE  SL_PID+8
> 
>  /* these macros rely on the save area being
>   * pointed to by r11 */
> +
> +#define SAVE_SPR(register)   \
> + mfspr   r0,SPRN_##register  ;\
> + std r0,SL_##register(r11)
> +#define RESTORE_SPR(register)\
> + ld  r0,SL_##register(r11)   ;\
> + mtspr   SPRN_##register,r0
> +#define RESTORE_SPRG(n)  \
> + ld  r0,SL_SPRG##n(r11)  ;\
> + mtsprg  n,r0
>  #define SAVE_SPECIAL(special)\
>   mf##special r0  ;\
>   std r0, SL_##special(r11)
> @@ -103,8 +123,22 @@ _GLOBAL(swsusp_arch_suspend)
>   SAVE_REGISTER(r30)
>   SAVE_REGISTER(r31)
>   SAVE_SPECIAL(MSR)
> - SAVE_SPECIAL(SDR1)
>   SAVE_SPECIAL(XER)
> +#ifdef CONFIG_PPC_BOOK3S_64
> + SAVE_SPECIAL(SDR1)
> +#else
> + SAVE_SPR(TCR)
> + /* Save SPRGs */
> + SAVE_SPR(SPRG0)
> + SAVE_SPR(SPRG1)
> + SAVE_SPR(SPRG2)
> + SAVE_SPR(SPRG3)
> + SAVE_SPR(SPRG4)
> + SAVE_SPR(SPRG5)
> + SAVE_SPR(SPRG6)
> + SAVE_SPR(SPRG7)
> + SAVE_SPR(PID);
> +#endif
> 
>   /* we push the stack up 128 bytes but don't store the
>* stack pointer on the stack like a real stackframe */
> @@ -151,6 +185,7 @@ copy_page_loop:
>   bn

[PATCH 2/2] powerpc/hibernate: add restore mmu context after resume

2013-06-09 Thread Wang Dongsheng
add restore_mmu_context to replace switch_mmu_context in
restore_processor_state, because the switch_mmu_context will do
a whole pile of stuff that are probably completely unnecessary.

There just need to restore the existing process context, and
invalidate TLB for boot core.

Signed-off-by: Wang Dongsheng 
---
 arch/powerpc/include/asm/tlbflush.h |  2 ++
 arch/powerpc/kernel/swsusp.c|  4 +---
 arch/powerpc/mm/tlb_nohash.c| 12 
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/tlbflush.h 
b/arch/powerpc/include/asm/tlbflush.h
index 61a5927..c401fe3 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -44,6 +44,8 @@ extern void local_flush_tlb_page(struct vm_area_struct *vma, 
unsigned long vmadd
 extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
   int tsize, int ind);
 
+extern void restore_mmu_context(void);
+
 #ifdef CONFIG_SMP
 extern void flush_tlb_mm(struct mm_struct *mm);
 extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c
index eae33e1..0b104d7 100644
--- a/arch/powerpc/kernel/swsusp.c
+++ b/arch/powerpc/kernel/swsusp.c
@@ -32,7 +32,5 @@ void save_processor_state(void)
 
 void restore_processor_state(void)
 {
-#ifdef CONFIG_PPC32
-   switch_mmu_context(current->active_mm, current->active_mm);
-#endif
+   restore_mmu_context();
 }
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index df32a83..a5a0708 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -39,10 +39,12 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
 #include 
+#include 
 
 #include "mmu_decl.h"
 
@@ -193,6 +195,16 @@ void local_flush_tlb_page(struct vm_area_struct *vma, 
unsigned long vmaddr)
 }
 EXPORT_SYMBOL(local_flush_tlb_page);
 
+void restore_mmu_context(void)
+{
+   struct mm_struct *mm = current->active_mm;
+
+   set_context(mm->context.id, mm->pgd);
+
+   _tlbil_all();
+}
+EXPORT_SYMBOL(restore_mmu_context);
+
 /*
  * And here are the SMP non-local implementations
  */
-- 
1.8.0


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/2] powerpc: add Book E support to 64-bit hibernation

2013-06-09 Thread Wang Dongsheng
Update the 64-bit hibernation code to support Book E CPUs.
Some registers and instructions are not defined for Book3e
(SDR reg, tlbia instruction).

SDR: Storage Description Register. Book3S and Book3E have different
address translation mode, we do not need HTABORG & HTABSIZE to
translate virtual address to real address.

More registers are saved in BookE-64bit.(TCR, SPRGx, ...)

Signed-off-by: Wang Dongsheng 
---
* History:
* Wood Scott(A): Please investigate the issue of whether we are loading
*kernel module code in this step
* R: Kernel will allocate the memory for module code segment and data
*segment. First allocate a memory, and copy the umod to hdr members
*of the struct load_info. Due to the temporary assigned module belongs
*to the kernel space, so it belongs to the kernel data.
*
*The kernel of all data will be saved when hibernation suspend. So
*the module which has already been inserted will be saved.

 arch/powerpc/kernel/swsusp_asm64.S | 64 --
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/swsusp_asm64.S 
b/arch/powerpc/kernel/swsusp_asm64.S
index 86ac1d9..608e4ceb 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -46,10 +46,29 @@
 #define SL_r29 0xe8
 #define SL_r30 0xf0
 #define SL_r31 0xf8
-#define SL_SIZESL_r31+8
+#define SL_SPRG0   0x100
+#define SL_SPRG1   0x108
+#define SL_SPRG2   0x110
+#define SL_SPRG3   0x118
+#define SL_SPRG4   0x120
+#define SL_SPRG5   0x128
+#define SL_SPRG6   0x130
+#define SL_SPRG7   0x138
+#define SL_TCR 0x140
+#define SL_SIZESL_TCR+8
 
 /* these macros rely on the save area being
  * pointed to by r11 */
+
+#define SAVE_SPR(register) \
+   mfspr   r0,SPRN_##register  ;\
+   std r0,SL_##register(r11)
+#define RESTORE_SPR(register)  \
+   ld  r0,SL_##register(r11)   ;\
+   mtspr   SPRN_##register,r0
+#define RESTORE_SPRG(n)\
+   ld  r0,SL_SPRG##n(r11)  ;\
+   mtsprg  n,r0
 #define SAVE_SPECIAL(special)  \
mf##special r0  ;\
std r0, SL_##special(r11)
@@ -103,8 +122,21 @@ _GLOBAL(swsusp_arch_suspend)
SAVE_REGISTER(r30)
SAVE_REGISTER(r31)
SAVE_SPECIAL(MSR)
-   SAVE_SPECIAL(SDR1)
SAVE_SPECIAL(XER)
+#ifdef CONFIG_PPC_BOOK3S_64
+   SAVE_SPECIAL(SDR1)
+#else
+   SAVE_SPR(TCR)
+   /* Save SPRGs */
+   SAVE_SPR(SPRG0)
+   SAVE_SPR(SPRG1)
+   SAVE_SPR(SPRG2)
+   SAVE_SPR(SPRG3)
+   SAVE_SPR(SPRG4)
+   SAVE_SPR(SPRG5)
+   SAVE_SPR(SPRG6)
+   SAVE_SPR(SPRG7)
+#endif
 
/* we push the stack up 128 bytes but don't store the
 * stack pointer on the stack like a real stackframe */
@@ -151,6 +183,7 @@ copy_page_loop:
bne+copyloop
 nothing_to_copy:
 
+#ifdef CONFIG_PPC_BOOK3S_64
/* flush caches */
lis r3, 0x10
mtctr   r3
@@ -167,6 +200,7 @@ nothing_to_copy:
sync
 
tlbia
+#endif
 
ld  r11,swsusp_save_area_ptr@toc(r2)
 
@@ -208,16 +242,42 @@ nothing_to_copy:
RESTORE_REGISTER(r29)
RESTORE_REGISTER(r30)
RESTORE_REGISTER(r31)
+
+#ifdef CONFIG_PPC_BOOK3S_64
/* can't use RESTORE_SPECIAL(MSR) */
ld  r0, SL_MSR(r11)
mtmsrd  r0, 0
RESTORE_SPECIAL(SDR1)
+#else
+   /* Save SPRGs */
+   RESTORE_SPRG(0)
+   RESTORE_SPRG(1)
+   RESTORE_SPRG(2)
+   RESTORE_SPRG(3)
+   RESTORE_SPRG(4)
+   RESTORE_SPRG(5)
+   RESTORE_SPRG(6)
+   RESTORE_SPRG(7)
+
+   RESTORE_SPECIAL(MSR)
+
+   /* Restore TCR and clear any pending bits in TSR. */
+   RESTORE_SPR(TCR)
+   lis r0, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h
+   mtspr   SPRN_TSR,r0
+
+   /* Kick decrementer */
+   li  r0,1
+   mtdec   r0
+#endif
RESTORE_SPECIAL(XER)
 
sync
 
addir1,r1,-128
+#ifdef CONFIG_PPC_BOOK3S_64
bl  slb_flush_and_rebolt
+#endif
bl  do_after_copyback
addir1,r1,128
 
-- 
1.8.0


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 5/5] powerpc/tm: Fix return of active 64bit signals

2013-06-09 Thread Michael Neuling
Currently we only restore signals which are transactionally suspended but it's
possible that the transaction can be restored even when it's active.  Most
likely this will result in a transactional rollback by the hardware as the
transaction will have been doomed by an earlier treclaim.

The current code is a legacy of earlier kernel implementations which did
software rollback of active transactions in the kernel.  That code has now gone
but we didn't correctly fix up this part of the signals code which still makes
assumptions based on having software rollback.

This changes the signal return code to always restore both contexts on 64 bit
signal return.  It also ensures that the MSR TM bits are properly restored from
the signal context which they are not currently.

Signed-off-by: Michael Neuling 
cc: sta...@vger.kernel.org (v3.9 only)
---
 arch/powerpc/kernel/signal_64.c |8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 3459473..887e99d 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -410,6 +410,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
 
/* get MSR separately, transfer the LE bit if doing signal return */
err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+   /* pull in MSR TM from user context */
+   regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
+
+   /* pull in MSR LE from user context */
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
 
/* The following non-GPR non-FPR non-VR state is also checkpointed: */
@@ -505,8 +509,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
tm_enable();
/* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(¤t->thread, msr);
-   /* The task has moved into TM state S, so ensure MSR reflects this: */
-   regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33);
 
/* This loads the speculative FP/VEC state, if used */
if (msr & MSR_FP) {
@@ -654,7 +656,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, 
unsigned long r5,
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
goto badframe;
-   if (MSR_TM_SUSPENDED(msr)) {
+   if (MSR_TM_ACTIVE(msr)) {
/* We recheckpoint on return. */
struct ucontext __user *uc_transact;
if (__get_user(uc_transact, &uc->uc_link))
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 4/5] powerpc/tm: Fix return of 32bit rt signals to active transactions

2013-06-09 Thread Michael Neuling
Currently we only restore signals which are transactionally suspended but it's
possible that the transaction can be restored even when it's active.  Most
likely this will result in a transactional rollback by the hardware as the
transaction will have been doomed by an earlier treclaim.

The current code is a legacy of earlier kernel implementations which did
software rollback of active transactions in the kernel.  That code has now gone
but we didn't correctly fix up this part of the signals code which still makes
assumptions based on having software rollback.

This changes the signal return code to always restore both contexts on 32 bit
rt signal return.

Signed-off-by: Michael Neuling 
cc: sta...@vger.kernel.org (v3.9 only)
---
 arch/powerpc/kernel/signal_32.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 364cb1e..0f83122 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1245,7 +1245,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int 
r7, int r8,
if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR]))
goto bad;
 
-   if (MSR_TM_SUSPENDED(msr_hi<<32)) {
+   if (MSR_TM_ACTIVE(msr_hi<<32)) {
/* We only recheckpoint on return if we're
 * transaction.
 */
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 3/5] powerpc/tm: Fix restoration of MSR on 32bit signal return

2013-06-09 Thread Michael Neuling
Currently we clear out the MSR TM bits on signal return assuming that the
signal should never return to an active transaction.

This is bogus as the user may do this.  It's most likely the transaction will
be doomed due to a treclaim but that's a problem for the HW not the kernel.

The current code is a legacy of earlier kernel implementations which did
software rollback of active transactions in the kernel.  That code has now gone
but we didn't correctly fix up this part of the signals code which still makes
the assumption that it must be returning to a suspended transaction.

This pulls out both MSR TM bits from the user supplied context rather than just
setting TM suspend.  We pull out only the bits needed to ensure the user can't
do anything dangerous to the MSR.

Signed-off-by: Michael Neuling 
cc: sta...@vger.kernel.org (v3.9 only)
---
 arch/powerpc/kernel/signal_32.c |9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index fa81462..364cb1e 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -754,7 +754,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 struct mcontext __user *tm_sr)
 {
long err;
-   unsigned long msr;
+   unsigned long msr, msr_hi;
 #ifdef CONFIG_VSX
int i;
 #endif
@@ -859,8 +859,11 @@ static long restore_tm_user_regs(struct pt_regs *regs,
tm_enable();
/* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(¤t->thread, msr);
-   /* The task has moved into TM state S, so ensure MSR reflects this */
-   regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S;
+   /* Get the top half of the MSR */
+   if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
+   return 1;
+   /* Pull in MSR TM from user context */
+   regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK);
 
/* This loads the speculative FP/VEC state, if used */
if (msr & MSR_FP) {
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 2/5] powerpc/tm: Fix 32 bit non-rt signals

2013-06-09 Thread Michael Neuling
Currently sys_sigreturn() is TM unaware.  Therefore, if we take a 32 bit signal
without SIGINFO (non RT) inside a transaction, on signal return we don't
restore the signal frame correctly.

This checks if the signal frame being restoring is an active transaction, and
if so, it copies the additional state to ptregs so it can be restored.

Signed-off-by: Michael Neuling 
cc: sta...@vger.kernel.org (v3.9 only)
---
 arch/powerpc/kernel/signal_32.c |   30 +-
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 5bc819f..fa81462 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1494,16 +1494,22 @@ badframe:
 long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
   struct pt_regs *regs)
 {
+   struct sigframe __user *sf;
struct sigcontext __user *sc;
struct sigcontext sigctx;
struct mcontext __user *sr;
void __user *addr;
sigset_t set;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   struct mcontext __user *mcp, *tm_mcp;
+   unsigned long msr_hi;
+#endif
 
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-   sc = (struct sigcontext __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
+   sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
+   sc = &sf->sctx;
addr = sc;
if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
goto badframe;
@@ -1520,11 +1526,25 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int 
r7, int r8,
 #endif
set_current_blocked(&set);
 
-   sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
-   addr = sr;
-   if (!access_ok(VERIFY_READ, sr, sizeof(*sr))
-   || restore_user_regs(regs, sr, 1))
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   mcp = (struct mcontext __user *)&sf->mctx;
+   tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
+   if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
goto badframe;
+   if (MSR_TM_ACTIVE(msr_hi<<32)) {
+   if (!cpu_has_feature(CPU_FTR_TM))
+   goto badframe;
+   if (restore_tm_user_regs(regs, mcp, tm_mcp))
+   goto badframe;
+   } else
+#endif
+   {
+   sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
+   addr = sr;
+   if (!access_ok(VERIFY_READ, sr, sizeof(*sr))
+   || restore_user_regs(regs, sr, 1))
+   goto badframe;
+   }
 
set_thread_flag(TIF_RESTOREALL);
return 0;
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 1/5] powerpc/tm: Fix writing top half of MSR on 32 bit signals

2013-06-09 Thread Michael Neuling
The MSR TM controls are in the top 32 bits of the MSR hence on 32 bit signals,
we stick the top half of the MSR in the checkpointed signal context so that the
user can access it.

Unfortunately, we don't currently write anything to the checkpointed signal
context when coming in a from a non transactional process and hence the top MSR
bits can contain junk.

This updates the 32 bit signal handling code to always write something to the
top MSR bits so that users know if the process is transactional or not and the
kernel can use it on signal return.

Signed-off-by: Michael Neuling 
cc: sta...@vger.kernel.org (v3.9 only)
---
 arch/powerpc/kernel/signal_32.c |   29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 201385c..5bc819f 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -407,7 +407,8 @@ inline unsigned long copy_transact_fpr_from_user(struct 
task_struct *task,
  * altivec/spe instructions at some point.
  */
 static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
-   int sigret, int ctx_has_vsx_region)
+ struct mcontext __user *tm_frame, int sigret,
+ int ctx_has_vsx_region)
 {
unsigned long msr = regs->msr;
 
@@ -475,6 +476,12 @@ static int save_user_regs(struct pt_regs *regs, struct 
mcontext __user *frame,
 
if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
return 1;
+   /* We need to write 0 the MSR top 32 bits in the tm frame so that we
+* can check it on the restore to see if TM is active
+*/
+   if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR]))
+   return 1;
+
if (sigret) {
/* Set up the sigreturn trampoline: li r0,sigret; sc */
if (__put_user(0x3800UL + sigret, &frame->tramp[0])
@@ -952,6 +959,7 @@ int handle_rt_signal32(unsigned long sig, struct 
k_sigaction *ka,
 {
struct rt_sigframe __user *rt_sf;
struct mcontext __user *frame;
+   struct mcontext __user *tm_frame = NULL;
void __user *addr;
unsigned long newsp = 0;
int sigret;
@@ -985,23 +993,24 @@ int handle_rt_signal32(unsigned long sig, struct 
k_sigaction *ka,
}
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   tm_frame = &rt_sf->uc_transact.uc_mcontext;
if (MSR_TM_ACTIVE(regs->msr)) {
-   if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext,
- &rt_sf->uc_transact.uc_mcontext, sigret))
+   if (save_tm_user_regs(regs, frame, tm_frame, sigret))
goto badframe;
}
else
 #endif
-   if (save_user_regs(regs, frame, sigret, 1))
+   {
+   if (save_user_regs(regs, frame, tm_frame, sigret, 1))
goto badframe;
+   }
regs->link = tramp;
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (MSR_TM_ACTIVE(regs->msr)) {
if (__put_user((unsigned long)&rt_sf->uc_transact,
   &rt_sf->uc.uc_link)
-   || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext),
- &rt_sf->uc_transact.uc_regs))
+   || __put_user((unsigned long)tm_frame, 
&rt_sf->uc_transact.uc_regs))
goto badframe;
}
else
@@ -1170,7 +1179,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
mctx = (struct mcontext __user *)
((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size)
-   || save_user_regs(regs, mctx, 0, ctx_has_vsx_region)
+   || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region)
|| put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked)
|| __put_user(to_user_ptr(mctx), &old_ctx->uc_regs))
return -EFAULT;
@@ -1392,6 +1401,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction 
*ka,
 {
struct sigcontext __user *sc;
struct sigframe __user *frame;
+   struct mcontext __user *tm_mctx = NULL;
unsigned long newsp = 0;
int sigret;
unsigned long tramp;
@@ -1425,6 +1435,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction 
*ka,
}
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   tm_mctx = &frame->mctx_transact;
if (MSR_TM_ACTIVE(regs->msr)) {
if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact,
  sigret))
@@ -1432,8 +1443,10 @@ int handle_signal32(unsigned long sig, struct 
k_sigaction *ka,
}
else
 #endif
-   if (save_user_regs(regs, &frame->mctx, sigret, 1))
+   {
+   if (save_user_regs(regs, &frame->mctx, tm_mct

Re: [PATCH 2/5] powerpc/tm: Fix 32 bit non-rt signals

2013-06-09 Thread Michael Neuling
Benjamin Herrenschmidt  wrote:

> On Fri, 2013-06-07 at 20:36 +1000, Michael Neuling wrote:
> > Currently sys_sigreturn() is TM unaware.  Therefore, if we take a 32 bit 
> > signal
> > without SIGINFO (non RT) inside a transaction, on signal return we don't
> > restore the signal frame correctly.
> > 
> > This checks if the signal frame being restoring is an active transaction, 
> > and
> > if so, it copies the additional state to ptregs so it can be restored.
> > 
> > Signed-off-by: Michael Neuling 
> > ---
> 
>  .../...
> 
> > +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> > +   mcp = (struct mcontext __user *)&sf->mctx;
> > +   tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
> > +   if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
> > goto badframe;
> > +   if MSR_TM_ACTIVE(msr_hi<<32) {
> 
> Mising ( and ). I'll apply that fix locally.
> 
> Appart from that, I suppose it's ok. I don't see any exposure
> coming from users "cooking" the tm_frame and calling sigreturn,
> so as long as we are confident userspace generally only uses
> sigreturn with frames it got from an actual signal, and doesn't
> try to "generate" frames by hand, we should be ok.

We should add a has_cpu_feature(TM) here also in case someone cooks up
an sig frame with MSR TM active, but on a non TM CPU.  This could possibly
result in a trecheckpoint on a non TM CPU hence an illegal in the
kernel.  

I'll repost.

Thanks,
Mikey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/5] powerpc/tm: Fix restoration of MSR on 32bit signal return

2013-06-09 Thread Michael Neuling
Benjamin Herrenschmidt  wrote:

> On Fri, 2013-06-07 at 20:36 +1000, Michael Neuling wrote:
> > Currently we clear out the MSR TM bits on signal return assuming that the
> > signal should never return to an active transaction.
> > 
> > This is bogus as the user may do this.  It's most likely the transaction 
> > will
> > be doomed due to a treclaim but that's a problem for the HW not the kernel.
> > 
> > This removes the stripping of these MSR TM bits.
> > 
> > Signed-off-by: Michael Neuling 
> > ---
> 
> > @@ -859,8 +860,10 @@ static long restore_tm_user_regs(struct pt_regs *regs,
> > tm_enable();
> > /* This loads the checkpointed FP/VEC state, if used */
> > tm_recheckpoint(¤t->thread, msr);
> > -   /* The task has moved into TM state S, so ensure MSR reflects this */
> > -   regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S;
> > +   /* Retore the top half of the MSR */
> > +   if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
> > +   return 1;
> > +   regs->msr = (regs->msr | (((unsigned long)msr_hi) << 32));
> 
> What kind of damage can I do by calling sigreturn with a cooked
> frame with random MSR bits set ? You should probably filter
> what bits you allow to come from the frame.

Lots of damage.. good, point.  

We do that in the 64 bit version of this.  I'll update to do the same.

> Additionally, I would also make sure I only do that if the CPU
> features say TM is supported in case that MSR bit means something else
> on a different/older CPU...

Ok, I'll add that also.

Mikey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Linux 3.9.5 [patch to fix powerpc build error attached]

2013-06-09 Thread Michael Neuling
Guenter Roeck  wrote:

> On Fri, Jun 07, 2013 at 12:58:16PM -0700, Greg KH wrote:
> > I'm announcing the release of the 3.9.5 kernel.
> > 
> > All users of the 3.9 kernel series must upgrade.
> > 
> > The updated 3.9.y git tree can be found at:
> > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git 
> > linux-3.9.y
> > and can be browsed at the normal kernel.org git web browser:
> > 
> > http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
> > 
> Powerpc build is fixed with the following patch.
> 
> From 8ce3edac0a86f103bd1037110662d5d80a42dd5b Mon Sep 17 00:00:00 2001
> From: Guenter Roeck 
> Date: Sat, 8 Jun 2013 07:23:47 -0700
> Subject: [PATCH] powerpc: Fix build error in stable/3.9
> 
> Commit e71c42189 (powerpc/tm: Abort on emulation and alignment faults)
> introduced a powerpc build error in 3.9.5.

ACK.

Looks like ba12eed (powerpc: Exception hooks for context tracking
subsyste) was the cause (but not a prerequisite) that was introduced
post 3.9.

Thanks,
Mikey

> 
> Signed-off-by: Guenter Roeck 
> ---
> No upstream commit. Compile tested only.
> 
>  arch/powerpc/kernel/traps.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index 1c22b2d..29857c6 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -1151,7 +1151,7 @@ void alignment_exception(struct pt_regs *regs)
>   local_irq_enable();
>  
>   if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
> - goto bail;
> + return;
>  
>   /* we don't implement logging of alignment exceptions */
>   if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
> -- 
> 1.7.9.7
> 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH] powerpc/mpc85xx: fix non-bootcpu cannot up after hibernation resume

2013-06-09 Thread Wang Dongsheng-B40534
Hi kumar,

Could you apply this patche? 

Thanks.

-dongsheng

> -Original Message-
> From: Anton Vorontsov [mailto:an...@scarybugs.org] On Behalf Of Anton
> Vorontsov
> Sent: Friday, May 24, 2013 1:34 AM
> To: Wang Dongsheng-B40534
> Cc: pau...@samba.org; r...@sisk.pl; b...@kernel.crashing.org;
> johan...@sipsolutions.net; Wood Scott-B07421; Li Yang-R58472; Zhao
> Chenhui-B35336; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH] powerpc/mpc85xx: fix non-bootcpu cannot up after
> hibernation resume
> 
> Hi!
> 
> On Tue, May 14, 2013 at 08:59:13AM +, Wang Dongsheng-B40534 wrote:
> > I send to a wrong email address "Anton Vorontsov
> "
> >
> > Add Anton Vorontsov  to this email.
> 
> I don't have any means to test it, but the patch itself looks good and
> the description makes sense. So,
> 
> Reviewed-by: Anton Vorontsov 
> 
> Thanks!
> 
> >
> > Thanks all.
> >
> > > -Original Message-
> > > From: Wang Dongsheng-B40534
> > > Sent: Tuesday, May 14, 2013 4:06 PM
> > > To: avoront...@ru.mvista.com
> > > Cc: pau...@samba.org; r...@sisk.pl; b...@kernel.crashing.org;
> > > johan...@sipsolutions.net; Wood Scott-B07421; Li Yang-R58472; Zhao
> > > Chenhui-B35336; linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-B40534
> > > Subject: [PATCH] powerpc/mpc85xx: fix non-bootcpu cannot up after
> > > hibernation resume
> > >
> > > This problem belongs to the core synchronization issues.
> > > The cpu1 already updated spin_table values, but bootcore cannot get
> > > this value in time.
> > >
> > > After bootcpu hibiernation restore the pages. we are now running
> > > with the kernel data of the old kernel fully restored. if we reset
> > > the non-bootcpus that will be reset cache(tlb), the non-bootcpus
> > > will get new address(map virtual and physical address spaces).
> > > but bootcpu tlb cache still use boot kernel data, so we need to
> > > invalidate the bootcpu tlb cache make it to get new main memory data.
> > >
> > > log:
> > > Enabling non-boot CPUs ...
> > > smp_85xx_kick_cpu: timeout waiting for core 1 to reset
> > > smp: failed starting cpu 1 (rc -2)
> > > Error taking CPU1 up: -2
> > >
> > > Signed-off-by: Wang Dongsheng 
> > >
> > > diff --git a/arch/powerpc/kernel/swsusp_booke.S
> > > b/arch/powerpc/kernel/swsusp_booke.S
> > > index 11a3930..9503249 100644
> > > --- a/arch/powerpc/kernel/swsusp_booke.S
> > > +++ b/arch/powerpc/kernel/swsusp_booke.S
> > > @@ -141,6 +141,19 @@ _GLOBAL(swsusp_arch_resume)
> > >   lis r11,swsusp_save_area@h
> > >   ori r11,r11,swsusp_save_area@l
> > >
> > > + /*
> > > +  * The boot core get a virtual address, when the boot process,
> > > +  * the virtual address corresponds to a physical address. After
> > > +  * hibernation resume memory snapshots, The corresponding
> > > +  * relationship between the virtual memory and physical memory
> > > +  * might change again. We need to get a new page table. So we
> > > +  * need to invalidate TLB after resume pages.
> > > +  *
> > > +  * Invalidations TLB Using tlbilx/tlbivax/MMUCSR0.
> > > +  * tlbilx used here.
> > > +  */
> > > + bl  _tlbil_all
> > > +
> > >   lwz r4,SL_SPRG0(r11)
> > >   mtsprg  0,r4
> > >   lwz r4,SL_SPRG1(r11)
> > > --
> > > 1.8.0
> >
> >

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH v3 1/4] powerpc/mpic: add irq_set_wake support

2013-06-09 Thread Wang Dongsheng-B40534
Hi ben,

Could you apply these patches? Thanks. :)

- dongsheng

> -Original Message-
> From: Benjamin Herrenschmidt [mailto:b...@kernel.crashing.org]
> Sent: Monday, May 13, 2013 1:00 PM
> To: Wang Dongsheng-B40534
> Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421;
> ga...@kernel.crashing.org
> Subject: Re: [PATCH v3 1/4] powerpc/mpic: add irq_set_wake support
> 
> On Mon, 2013-05-13 at 04:25 +, Wang Dongsheng-B40534 wrote:
> > Hi Benjamin,
> >
> > Could you apply these patches?
> 
> I'll have a look, I was assuming Kumar would take them but since not I'll
> queue them up.
> 
> Cheers,
> Ben.
> 
> > Scott already ACK.
> >
> > [v3,1/4] powerpc/mpic: add irq_set_wake support
> > http://patchwork.ozlabs.org/patch/234934/
> >
> > [v3,2/4] powerpc/mpic: add global timer support
> > http://patchwork.ozlabs.org/patch/234935/
> >
> > [v3,3/4] powerpc/mpic: create mpic subsystem object
> > http://patchwork.ozlabs.org/patch/234936/
> >
> > [v3,4/4] powerpc/fsl: add MPIC timer wakeup support
> > http://patchwork.ozlabs.org/patch/234937/
> >
> > Thanks.
> >
> > > -Original Message-
> > > From: Wang Dongsheng-B40534
> > > Sent: Friday, May 03, 2013 9:54 AM
> > > To: 'ga...@kernel.crashing.org'
> > > Cc: 'linuxppc-dev@lists.ozlabs.org'; Wood Scott-B07421;
> > > 'b...@kernel.crashing.org'
> > > Subject: RE: [PATCH v3 1/4] powerpc/mpic: add irq_set_wake support
> > >
> > > Hi Kumar,
> > >
> > > Could you apply these patches?
> > >
> > > Thanks.
> > >
> > > > -Original Message-
> > > > From: Wang Dongsheng-B40534
> > > > Sent: Tuesday, April 23, 2013 6:10 PM
> > > > To: ga...@kernel.crashing.org
> > > > Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421
> > > > Subject: RE: [PATCH v3 1/4] powerpc/mpic: add irq_set_wake support
> > > >
> > > > Hi Kumar,
> > > >
> > > > Could you apply these patches?
> > > >
> > > > Thanks.
> > > >
> > > > [v3,1/4] powerpc/mpic: add irq_set_wake support
> > > > http://patchwork.ozlabs.org/patch/234934/
> > > >
> > > > [v3,2/4] powerpc/mpic: add global timer support
> > > > http://patchwork.ozlabs.org/patch/234935/
> > > >
> > > > [v3,3/4] powerpc/mpic: create mpic subsystem object
> > > > http://patchwork.ozlabs.org/patch/234936/
> > > >
> > > > [v3,4/4] powerpc/fsl: add MPIC timer wakeup support
> > > > http://patchwork.ozlabs.org/patch/234937/
> > > >
> > > >
> > > > > -Original Message-
> > > > > From: Wood Scott-B07421
> > > > > Sent: Wednesday, April 17, 2013 7:30 AM
> > > > > To: Wang Dongsheng-B40534
> > > > > Cc: Wood Scott-B07421; linuxppc-dev@lists.ozlabs.org;
> > > > > ga...@kernel.crashing.org
> > > > > Subject: Re: [PATCH v3 1/4] powerpc/mpic: add irq_set_wake
> > > > > support
> > > > >
> > > > > ACK
> > > > >
> > > > > -Scott
> > > > >
> > > > > On 04/16/2013 05:58:52 AM, Wang Dongsheng-B40534 wrote:
> > > > > > Hi scott,
> > > > > >
> > > > > > Could you ACK these patches?
> > > > > >
> > > > > > [PATCH v3 2/4] powerpc/mpic: add global timer support [PATCH
> > > > > > v3 3/4]
> > > > > > powerpc/mpic: create mpic subsystem object [PATCH v3 4/4]
> > > > > > powerpc/fsl: add MPIC timer wakeup support
> > > > > >
> > > > > > Thanks.
> > > > > >
> 
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[git pull] Please pull powerpc.git merge branch

2013-06-09 Thread Benjamin Herrenschmidt
Hi Linus !

Here are a few more powerpc changes for 3.10. I've merged your
tree in at some point (which I generally avoid) in order to get
the compat network fixes as soon as possible.

Mostly regressions, and stuff I judged could/should still go in at
this stage.

I'm still waiting on some more fixes to the signal handling
vs. transactional memory (this is really ugly btw) which are
hopefully coming next week.

Cheers,
Ben.

The following changes since commit 1612e111e4e565422242727efb59499cce8738e4:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2013-06-06 
18:09:05 -0700)

are available in the git repository at:


  git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git merge

for you to fetch changes up to 227331303afdca06e44dcda41d27b72015153827:

  powerpc: Print instruction when logging unhandled exceptions (2013-06-09 
17:29:16 +1000)


Anton Blanchard (1):
  powerpc: Print instruction when logging unhandled exceptions

Benjamin Herrenschmidt (1):
  Merge remote-tracking branch 'origin/master' into merge

Gavin Shan (1):
  powerpc/eeh: Don't check RTAS token to get PE addr

Kevin Hao (1):
  powerpc/pci: Check the bus address instead of resource address in 
pcibios_fixup_resources

Michael Ellerman (3):
  powerpc: Rename PMU interrupts from CNT to PMI
  powerpc/perf: Fix deadlock caused by calling printk() in PMU exception
  powerpc: Partial revert of "Context switch more PMU related SPRs"

Michael Neuling (4):
  powerpc/power8: Fix oprofile and perf
  powerpc/pseries: Simplify denormalization handler
  powerpc/power8: Update denormalization handler
  powerpc/hw_breakpoints: Add DABRX cpu feature to fix 32-bit regression

 arch/powerpc/include/asm/cputable.h  |   17 +++--
 arch/powerpc/kernel/cputable.c   |8 +--
 arch/powerpc/kernel/entry_64.S   |   28 
 arch/powerpc/kernel/exceptions-64s.S |   90 --
 arch/powerpc/kernel/irq.c|2 +-
 arch/powerpc/kernel/pci-common.c |4 +-
 arch/powerpc/kernel/process.c|3 +-
 arch/powerpc/kernel/traps.c  |   11 +++-
 arch/powerpc/perf/core-book3s.c  |2 +-
 arch/powerpc/platforms/pseries/eeh_pseries.c |   12 ++--
 10 files changed, 60 insertions(+), 117 deletions(-)


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads access to kernel space

2013-06-09 Thread Wang Dongsheng-B40534


> -Original Message-
> From: Benjamin Herrenschmidt [mailto:b...@kernel.crashing.org]
> Sent: Sunday, June 09, 2013 3:46 PM
> To: Wang Dongsheng-B40534
> Cc: johan...@sipsolutions.net; an...@enomsg.org; Wood Scott-B07421;
> ga...@kernel.crashing.org; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads
> access to kernel space
> 
> On Sun, 2013-06-09 at 07:44 +, Wang Dongsheng-B40534 wrote:
> > So we just need set set_context() in restore_mmu_context().
> >
> > void restore_mmu_context(struct mm_struct *next) {
> > set_context(next->context.id, next->pgd); }
> 
> We probably also want to flush the TLB, just in case the boot kernel has
> left "something" there (though I wouldn't expect it to have run userspace
> it's not completely impossible).
> 
Yes, but TLB has already invalidated.
See my other patch:
http://patchwork.ozlabs.org/patch/250006/

> Cheers,
> Ben.
> 
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads access to kernel space

2013-06-09 Thread Benjamin Herrenschmidt
On Sun, 2013-06-09 at 07:44 +, Wang Dongsheng-B40534 wrote:
> So we just need set set_context() in restore_mmu_context().
> 
> void restore_mmu_context(struct mm_struct *next) {
> set_context(next->context.id, next->pgd);
> }

We probably also want to flush the TLB, just in case the boot kernel has
left "something" there (though I wouldn't expect it to have run
userspace it's not completely impossible).

Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads access to kernel space

2013-06-09 Thread Wang Dongsheng-B40534


> -Original Message-
> From: Benjamin Herrenschmidt [mailto:b...@kernel.crashing.org]
> Sent: Sunday, June 09, 2013 2:44 PM
> To: Wang Dongsheng-B40534
> Cc: johan...@sipsolutions.net; an...@enomsg.org; Wood Scott-B07421;
> ga...@kernel.crashing.org; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH v2 2/2] powerpc/hibernate: PPC64 fix user threads
> access to kernel space
> 
> On Sun, 2013-06-09 at 13:22 +0800, Wang Dongsheng wrote:
> > If PID is used in the TLB, after hibernation resume, the user threads
> > will access to kernel space.
> 
>  .../...
> 
> I think the explanation is way more convoluted and confusing here than
> anything else.
> 
> Simply say that upon resume from hibernation, the MMU context needs to be
> restored (this includes the PID register today it might include more if
> we decided to pre-set some MAS for example
> etc...) and be done with it.
> 
> Note that switch_mmu_context() used the way you do is quite "full on", it
> will do a whole pile of stuff that are probably completely unnecessary,
> and in addition might still miss the need to completely flush the TLB
> anyway.
> 
> I would suggest that instead, somebody adds the necessary routine to
> tlb_nohash.c, something like restore_mmu_context() which will do that.
> 
Thanks ben, This is a good idea.

We do not need to decide the current thread has a context in 
restore_mmu_context().
Because the current has already get a context in hibernation suspend flow.

So we just need set set_context() in restore_mmu_context().

void restore_mmu_context(struct mm_struct *next) {
set_context(next->context.id, next->pgd);
}

-dongsheng

> Cheers,
> Ben.
> 
> > Signed-off-by: Wang Dongsheng 
> > ---
> >  arch/powerpc/kernel/swsusp.c | 2 --
> >  1 file changed, 2 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/swsusp.c
> > b/arch/powerpc/kernel/swsusp.c index eae33e1..1930e44 100644
> > --- a/arch/powerpc/kernel/swsusp.c
> > +++ b/arch/powerpc/kernel/swsusp.c
> > @@ -32,7 +32,5 @@ void save_processor_state(void)
> >
> >  void restore_processor_state(void)
> >  {
> > -#ifdef CONFIG_PPC32
> > switch_mmu_context(current->active_mm, current->active_mm); -#endif
> > }
> 
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/5] powerpc/tm: Fix restoration of MSR on 32bit signal return

2013-06-09 Thread Benjamin Herrenschmidt
On Fri, 2013-06-07 at 20:36 +1000, Michael Neuling wrote:
> Currently we clear out the MSR TM bits on signal return assuming that the
> signal should never return to an active transaction.
> 
> This is bogus as the user may do this.  It's most likely the transaction will
> be doomed due to a treclaim but that's a problem for the HW not the kernel.
> 
> This removes the stripping of these MSR TM bits.
> 
> Signed-off-by: Michael Neuling 
> ---

> @@ -859,8 +860,10 @@ static long restore_tm_user_regs(struct pt_regs *regs,
>   tm_enable();
>   /* This loads the checkpointed FP/VEC state, if used */
>   tm_recheckpoint(¤t->thread, msr);
> - /* The task has moved into TM state S, so ensure MSR reflects this */
> - regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S;
> + /* Retore the top half of the MSR */
> + if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
> + return 1;
> + regs->msr = (regs->msr | (((unsigned long)msr_hi) << 32));

What kind of damage can I do by calling sigreturn with a cooked
frame with random MSR bits set ? You should probably filter
what bits you allow to come from the frame.

Additionally, I would also make sure I only do that if the CPU
features say TM is supported in case that MSR bit means something else
on a different/older CPU...

Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/5] powerpc/tm: Fix 32 bit non-rt signals

2013-06-09 Thread Benjamin Herrenschmidt
On Fri, 2013-06-07 at 20:36 +1000, Michael Neuling wrote:
> Currently sys_sigreturn() is TM unaware.  Therefore, if we take a 32 bit 
> signal
> without SIGINFO (non RT) inside a transaction, on signal return we don't
> restore the signal frame correctly.
> 
> This checks if the signal frame being restoring is an active transaction, and
> if so, it copies the additional state to ptregs so it can be restored.
> 
> Signed-off-by: Michael Neuling 
> ---

 .../...

> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> + mcp = (struct mcontext __user *)&sf->mctx;
> + tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
> + if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
>   goto badframe;
> + if MSR_TM_ACTIVE(msr_hi<<32) {

Mising ( and ). I'll apply that fix locally.

Appart from that, I suppose it's ok. I don't see any exposure
coming from users "cooking" the tm_frame and calling sigreturn,
so as long as we are confident userspace generally only uses
sigreturn with frames it got from an actual signal, and doesn't
try to "generate" frames by hand, we should be ok.

Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 3/3] powerpc/8xx: Remove 8xx specific "minimal FPU emulation"

2013-06-09 Thread Benjamin Herrenschmidt
This is duplicated code from math-emu and implements such a small
subset of the FPU (load/stores/fmr) that it's essentially pointless
nowdays.

Signed-off-by: Benjamin Herrenschmidt 
---

Note that arch/powerpc/math-emu/math.c seems to have code in there
to do exactly the same thing (subset emulation) if CONFIG_MATH_EMULATION
isn't set. However that code isn't used since we simply don't call
do_mathemu() not build any of the relevant files when this is not
set.

I'm not removing this just yet. If somebody thinks there is value
in supporting that "minimal set emulation", then send me a patch
that provides a Kconfig/Makefile way of enabling that code, which
makes it work on all soft-fpu powerpc's and not just 8xx.

If nobody reacts, I'll probably just rip the code out by the next
kernel version.

 arch/powerpc/Kconfig|   11 ---
 arch/powerpc/kernel/traps.c |   22 +-
 2 files changed, 1 insertion(+), 32 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e3009ab..5374776 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -319,17 +319,6 @@ config PPC_TRANSACTIONAL_MEM
---help---
  Support user-mode Transactional Memory on POWERPC.
 
-config 8XX_MINIMAL_FPEMU
-   bool "Minimal math emulation for 8xx"
-   depends on 8xx && !MATH_EMULATION
-   help
- Older arch/ppc kernels still emulated a few floating point
- instructions such as load and store, even when full math
- emulation is disabled.  Say "Y" here if you want to preserve
- this behavior.
-
- It is recommended that you build a soft-float userspace instead.
-
 config IOMMU_HELPER
def_bool PPC64
 
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index ff315a7..8093156 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1401,8 +1401,7 @@ void performance_monitor_exception(struct pt_regs *regs)
 void SoftwareEmulation(struct pt_regs *regs)
 {
extern int do_mathemu(struct pt_regs *);
-   extern int Soft_emulate_8xx(struct pt_regs *);
-#if defined(CONFIG_MATH_EMULATION) || defined(CONFIG_8XX_MINIMAL_FPEMU)
+#if defined(CONFIG_MATH_EMULATION)
int errcode;
 #endif
 
@@ -1435,23 +1434,6 @@ void SoftwareEmulation(struct pt_regs *regs)
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
return;
}
-
-#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
-   errcode = Soft_emulate_8xx(regs);
-   if (errcode >= 0)
-   PPC_WARN_EMULATED(8xx, regs);
-
-   switch (errcode) {
-   case 0:
-   emulate_single_step(regs);
-   return;
-   case 1:
-   _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-   return;
-   case -EFAULT:
-   _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
-   return;
-   }
 #else
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
 #endif
@@ -1801,8 +1783,6 @@ struct ppc_emulated ppc_emulated = {
WARN_EMULATED_SETUP(unaligned),
 #ifdef CONFIG_MATH_EMULATION
WARN_EMULATED_SETUP(math),
-#elif defined(CONFIG_8XX_MINIMAL_FPEMU)
-   WARN_EMULATED_SETUP(8xx),
 #endif
 #ifdef CONFIG_VSX
WARN_EMULATED_SETUP(vsx),


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 2/3] powerpc/math-emu: Allow math-emu to be used for HW FPU

2013-06-09 Thread Benjamin Herrenschmidt
(Including 64-bit ones)

This allow SW emulation by the kernel of optional instructions
such as fsqrt which aren't implemented on some processors, and
thus fixes some Fedora 19 issues such as Anaconda since the
compiler is set to generate those by default on 64-bit.

Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/Kconfig|6 +-
 arch/powerpc/kernel/traps.c |   12 +++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c33e3ad..e3009ab 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -298,7 +298,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE
 
 config MATH_EMULATION
bool "Math emulation"
-   depends on 4xx || 8xx || E200 || PPC_MPC832x || E500
+   depends on 4xx || 8xx || PPC_MPC832x || BOOKE
---help---
  Some PowerPC chips designed for embedded applications do not have
  a floating-point unit and therefore do not implement the
@@ -307,6 +307,10 @@ config MATH_EMULATION
  unit, which will allow programs that use floating-point
  instructions to run.
 
+ This is also useful to emulate missing (optional) instructions
+ such as fsqrt on cores that do have an FPU but do not implement
+ them (such as Freescale BookE).
+
 config PPC_TRANSACTIONAL_MEM
bool "Transactional Memory support for POWERPC"
depends on PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 6dfbb38..ff315a7 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1130,7 +1130,17 @@ void __kprobes program_check_exception(struct pt_regs 
*regs)
 * ESR_DST (!?) or 0.  In the process of chasing this with the
 * hardware people - not sure if it can happen on any illegal
 * instruction or only on FP instructions, whether there is a
-* pattern to occurrences etc. -dgibson 31/Mar/2003 */
+* pattern to occurrences etc. -dgibson 31/Mar/2003
+*/
+
+   /*
+* If we support a HW FPU, we need to ensure the FP state
+* if flushed into the thread_struct before attempting
+* emulation
+*/
+#ifdef CONFIG_PPC_FPU
+   flush_fp_to_thread(current);
+#endif
switch (do_mathemu(regs)) {
case 0:
emulate_single_step(regs);


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/3] powerpc/math-emu: Fix decoding of some instructions

2013-06-09 Thread Benjamin Herrenschmidt
The decoding of some instructions such as fsqrt{s} was incorrect,
using the wrong registers, and thus could not work.

This fixes it and also adds a couple of place holders for missing
instructions.

Signed-off-by: Benjamin Herrenschmidt 
---

Note: fre.c etc.. are still empty placeholders, just like fres.c
is today. Eventually somebody needs to fill-in the missing bits

 arch/powerpc/math-emu/Makefile   |3 ++-
 arch/powerpc/math-emu/fre.c  |   11 +++
 arch/powerpc/math-emu/frsqrtes.c |   11 +++
 arch/powerpc/math-emu/math.c |   14 ++
 4 files changed, 34 insertions(+), 5 deletions(-)
 create mode 100644 arch/powerpc/math-emu/fre.c
 create mode 100644 arch/powerpc/math-emu/frsqrtes.c

diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile
index 7d1dba0..8d035d2 100644
--- a/arch/powerpc/math-emu/Makefile
+++ b/arch/powerpc/math-emu/Makefile
@@ -4,7 +4,8 @@ obj-$(CONFIG_MATH_EMULATION)+= fabs.o fadd.o fadds.o 
fcmpo.o fcmpu.o \
fmadd.o fmadds.o fmsub.o fmsubs.o \
fmul.o fmuls.o fnabs.o fneg.o \
fnmadd.o fnmadds.o fnmsub.o fnmsubs.o \
-   fres.o frsp.o frsqrte.o fsel.o lfs.o \
+   fres.o fre.o frsp.o fsel.o lfs.o \
+   frsqrte.o frsqrtes.o \
fsqrt.o fsqrts.o fsub.o fsubs.o \
mcrfs.o mffs.o mtfsb0.o mtfsb1.o \
mtfsf.o mtfsfi.o stfiwx.o stfs.o \
diff --git a/arch/powerpc/math-emu/fre.c b/arch/powerpc/math-emu/fre.c
new file mode 100644
index 000..49ccf2c
--- /dev/null
+++ b/arch/powerpc/math-emu/fre.c
@@ -0,0 +1,11 @@
+#include 
+#include 
+#include 
+
+int fre(void *frD, void *frB)
+{
+#ifdef DEBUG
+   printk("%s: %p %p\n", __func__, frD, frB);
+#endif
+   return -ENOSYS;
+}
diff --git a/arch/powerpc/math-emu/frsqrtes.c b/arch/powerpc/math-emu/frsqrtes.c
new file mode 100644
index 000..7e838e3
--- /dev/null
+++ b/arch/powerpc/math-emu/frsqrtes.c
@@ -0,0 +1,11 @@
+#include 
+#include 
+#include 
+
+int frsqrtes(void *frD, void *frB)
+{
+#ifdef DEBUG
+   printk("%s: %p %p\n", __func__, frD, frB);
+#endif
+   return 0;
+}
diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c
index 164d559..0328e66 100644
--- a/arch/powerpc/math-emu/math.c
+++ b/arch/powerpc/math-emu/math.c
@@ -58,8 +58,10 @@ FLOATFUNC(fnabs);
 FLOATFUNC(fneg);
 
 /* Optional */
+FLOATFUNC(fre);
 FLOATFUNC(fres);
 FLOATFUNC(frsqrte);
+FLOATFUNC(frsqrtes);
 FLOATFUNC(fsel);
 FLOATFUNC(fsqrt);
 FLOATFUNC(fsqrts);
@@ -97,6 +99,7 @@ FLOATFUNC(fsqrts);
 #define FSQRTS 0x016   /*   22 */
 #define FRES   0x018   /*   24 */
 #define FMULS  0x019   /*   25 */
+#define FRSQRTES   0x01a   /*   26 */
 #define FMSUBS 0x01c   /*   28 */
 #define FMADDS 0x01d   /*   29 */
 #define FNMSUBS0x01e   /*   30 */
@@ -109,6 +112,7 @@ FLOATFUNC(fsqrts);
 #define FADD   0x015   /*   21 */
 #define FSQRT  0x016   /*   22 */
 #define FSEL   0x017   /*   23 */
+#define FRE0x018   /*   24 */
 #define FMUL   0x019   /*   25 */
 #define FRSQRTE0x01a   /*   26 */
 #define FMSUB  0x01c   /*   28 */
@@ -299,9 +303,10 @@ do_mathemu(struct pt_regs *regs)
case FDIVS: func = fdivs;   type = AB;  break;
case FSUBS: func = fsubs;   type = AB;  break;
case FADDS: func = fadds;   type = AB;  break;
-   case FSQRTS:func = fsqrts;  type = AB;  break;
-   case FRES:  func = fres;type = AB;  break;
+   case FSQRTS:func = fsqrts;  type = XB;  break;
+   case FRES:  func = fres;type = XB;  break;
case FMULS: func = fmuls;   type = AC;  break;
+   case FRSQRTES:  func = frsqrtes;type = XB;  break;
case FMSUBS:func = fmsubs;  type = ABC; break;
case FMADDS:func = fmadds;  type = ABC; break;
case FNMSUBS:   func = fnmsubs; type = ABC; break;
@@ -317,10 +322,11 @@ do_mathemu(struct pt_regs *regs)
case FDIV:  func = fdiv;type = AB;  break;
case FSUB:  func = fsub;type = AB;  break;
case FADD:  func = fadd;type = AB;  break;
-   case FSQRT: func = fsqrt;   type = AB;  break;
+   case FSQRT: func = fsqrt;   type = XB;  break;
+   case FRE:   func = fre; type = XB;