[PATCH v2 6/6] powerpc/eeh: Rework eeh_ops->probe()

2020-03-05 Thread Oliver O'Halloran
With the EEH early probe now being pseries specific there's no need for
eeh_ops->probe() to take a pci_dn. Instead, we can make it take a pci_dev
and use the probe function to map a pci_dev to an eeh_dev. This allows
the platform to implement it's own method for finding (or creating) an
eeh_dev for a given pci_dev which also removes a use of pci_dn in
generic EEH code.

This patch also renames eeh_device_add_late() to eeh_device_probe(). This
better reflects what it does does and removes the last vestiges of the
early/late EEH probe split.

Reviewed-by: Sam Bobroff 
Signed-off-by: Oliver O'Halloran 
---
v2: Fixed the comment block above eeh_probe_device() to use the new
function name.
---
 arch/powerpc/include/asm/eeh.h   |  6 ++--
 arch/powerpc/kernel/eeh.c| 44 +++-
 arch/powerpc/platforms/powernv/eeh-powernv.c | 30 +--
 arch/powerpc/platforms/pseries/eeh_pseries.c | 23 ++-
 4 files changed, 62 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 8580238..964a542 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -215,7 +215,7 @@ enum {
 struct eeh_ops {
char *name;
int (*init)(void);
-   void* (*probe)(struct pci_dn *pdn, void *data);
+   struct eeh_dev *(*probe)(struct pci_dev *pdev);
int (*set_option)(struct eeh_pe *pe, int option);
int (*get_pe_addr)(struct eeh_pe *pe);
int (*get_state)(struct eeh_pe *pe, int *delay);
@@ -301,7 +301,7 @@ int __exit eeh_ops_unregister(const char *name);
 int eeh_check_failure(const volatile void __iomem *token);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_init(void);
-void eeh_add_device_late(struct pci_dev *);
+void eeh_probe_device(struct pci_dev *pdev);
 void eeh_remove_device(struct pci_dev *);
 int eeh_unfreeze_pe(struct eeh_pe *pe);
 int eeh_pe_reset_and_recover(struct eeh_pe *pe);
@@ -356,7 +356,7 @@ static inline int eeh_check_failure(const volatile void 
__iomem *token)
 
 static inline void eeh_addr_cache_init(void) { }
 
-static inline void eeh_add_device_late(struct pci_dev *dev) { }
+static inline void eeh_probe_device(struct pci_dev *dev) { }
 
 static inline void eeh_remove_device(struct pci_dev *dev) { }
 
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 55d3ef6..7cdcb41 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1107,35 +1107,43 @@ static int eeh_init(void)
 core_initcall_sync(eeh_init);
 
 /**
- * eeh_add_device_late - Perform EEH initialization for the indicated pci 
device
+ * eeh_probe_device() - Perform EEH initialization for the indicated pci device
  * @dev: pci device for which to set up EEH
  *
  * This routine must be used to complete EEH initialization for PCI
  * devices that were added after system boot (e.g. hotplug, dlpar).
  */
-void eeh_add_device_late(struct pci_dev *dev)
+void eeh_probe_device(struct pci_dev *dev)
 {
-   struct pci_dn *pdn;
struct eeh_dev *edev;
 
-   if (!dev)
+   pr_debug("EEH: Adding device %s\n", pci_name(dev));
+
+   /*
+* pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was
+* already called for this device.
+*/
+   if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) {
+   pci_dbg(dev, "Already bound to an eeh_dev!\n");
return;
+   }
 
-   pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
-   edev = pdn_to_eeh_dev(pdn);
-   eeh_edev_dbg(edev, "Adding device\n");
-   if (edev->pdev == dev) {
-   eeh_edev_dbg(edev, "Device already referenced!\n");
+   edev = eeh_ops->probe(dev);
+   if (!edev) {
+   pr_debug("EEH: Adding device failed\n");
return;
}
 
/*
-* The EEH cache might not be removed correctly because of
-* unbalanced kref to the device during unplug time, which
-* relies on pcibios_release_device(). So we have to remove
-* that here explicitly.
+* FIXME: We rely on pcibios_release_device() to remove the
+* existing EEH state. The release function is only called if
+* the pci_dev's refcount drops to zero so if something is
+* keeping a ref to a device (e.g. a filesystem) we need to
+* remove the old EEH state.
+*
+* FIXME: HEY MA, LOOK AT ME, NO LOCKING!
 */
-   if (edev->pdev) {
+   if (edev->pdev && edev->pdev != dev) {
eeh_rmv_from_parent_pe(edev);
eeh_addr_cache_rmv_dev(edev->pdev);
eeh_sysfs_remove_device(edev->pdev);
@@ -1146,17 +1154,11 @@ void eeh_add_device_late(struct pci_dev *dev)
 * into error handler afterwards.
 */
edev->mode |= EEH_DEV_NO_HANDLER;
-
-   edev->pdev = NULL;
-   dev->dev.archdata.edev = 

[PATCH v2 5/6] powerpc/eeh: Make early EEH init pseries specific

2020-03-05 Thread Oliver O'Halloran
The eeh_ops->probe() function is called from two different contexts:

1. On pseries, where we set EEH_PROBE_MODE_DEVTREE, it's called in
   eeh_add_device_early() which is supposed to run before we create
   a pci_dev.

2. On PowerNV, where we set EEH_PROBE_MODE_DEV, it's called in
   eeh_device_add_late() which is supposed to run *after* the
   pci_dev is created.

The "early" probe is required because PAPR requires that we perform an RTAS
call to enable EEH support on a device before we start interacting with it
via config space or MMIO. This requirement doesn't exist on PowerNV and
shoehorning two completely separate initialisation paths into a common
interface just results in a convoluted code everywhere.

Additionally the early probe requires the probe function to take an pci_dn
rather than a pci_dev argument. We'd like to make pci_dn a pseries specific
data structure since there's no real requirement for them on PowerNV. To
help both goals move the early probe into the pseries containment zone
so the platform depedence is more explicit.

Reviewed-by: Sam Bobroff 
Signed-off-by: Oliver O'Halloran 
---
v2: s/set set/we set/ in the commit message
---
 arch/powerpc/include/asm/eeh.h   | 14 +++---
 arch/powerpc/kernel/eeh.c| 46 
 arch/powerpc/kernel/of_platform.c|  6 +--
 arch/powerpc/platforms/powernv/eeh-powernv.c |  6 ---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 65 ++--
 arch/powerpc/platforms/pseries/pci_dlpar.c   |  2 +-
 drivers/pci/hotplug/rpadlpar_core.c  |  2 +-
 drivers/pci/hotplug/rpaphp_core.c|  2 +-
 drivers/pci/hotplug/rpaphp_pci.c |  2 +-
 9 files changed, 64 insertions(+), 81 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 5d10781..8580238 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -301,8 +301,6 @@ int __exit eeh_ops_unregister(const char *name);
 int eeh_check_failure(const volatile void __iomem *token);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_init(void);
-void eeh_add_device_early(struct pci_dn *);
-void eeh_add_device_tree_early(struct pci_dn *);
 void eeh_add_device_late(struct pci_dev *);
 void eeh_remove_device(struct pci_dev *);
 int eeh_unfreeze_pe(struct eeh_pe *pe);
@@ -358,10 +356,6 @@ static inline int eeh_check_failure(const volatile void 
__iomem *token)
 
 static inline void eeh_addr_cache_init(void) { }
 
-static inline void eeh_add_device_early(struct pci_dn *pdn) { }
-
-static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { }
-
 static inline void eeh_add_device_late(struct pci_dev *dev) { }
 
 static inline void eeh_remove_device(struct pci_dev *dev) { }
@@ -370,6 +364,14 @@ static inline void eeh_remove_device(struct pci_dev *dev) 
{ }
 #define EEH_IO_ERROR_VALUE(size) (-1UL)
 #endif /* CONFIG_EEH */
 
+#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_EEH)
+void pseries_eeh_init_edev(struct pci_dn *pdn);
+void pseries_eeh_init_edev_recursive(struct pci_dn *pdn);
+#else
+static inline void pseries_eeh_add_device_early(struct pci_dn *pdn) { }
+static inline void pseries_eeh_add_device_tree_early(struct pci_dn *pdn) { }
+#endif
+
 #ifdef CONFIG_PPC64
 /*
  * MMIO read/write operations with EEH support.
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index a9e4ca7..55d3ef6 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1107,52 +1107,6 @@ static int eeh_init(void)
 core_initcall_sync(eeh_init);
 
 /**
- * eeh_add_device_early - Enable EEH for the indicated device node
- * @pdn: PCI device node for which to set up EEH
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices that were added after system boot (e.g. hotplug, dlpar).
- * This routine must be called before any i/o is performed to the
- * adapter (inluding any config-space i/o).
- * Whether this actually enables EEH or not for this device depends
- * on the CEC architecture, type of the device, on earlier boot
- * command-line arguments & etc.
- */
-void eeh_add_device_early(struct pci_dn *pdn)
-{
-   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-
-   if (!edev)
-   return;
-
-   if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
-   return;
-
-   eeh_ops->probe(pdn, NULL);
-}
-
-/**
- * eeh_add_device_tree_early - Enable EEH for the indicated device
- * @pdn: PCI device node
- *
- * This routine must be used to perform EEH initialization for the
- * indicated PCI device that was added after system boot (e.g.
- * hotplug, dlpar).
- */
-void eeh_add_device_tree_early(struct pci_dn *pdn)
-{
-   struct pci_dn *n;
-
-   if (!pdn)
-   return;
-
-   list_for_each_entry(n, >child_list, list)
-   eeh_add_device_tree_early(n);
-   eeh_add_device_early(pdn);
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
-
-/**
  * 

[PATCH v2 4/6] powerpc/eeh: Remove PHB check in probe

2020-03-05 Thread Oliver O'Halloran
This check for a missing PHB has existing in various forms since the
initial PPC64 port was upstreamed in 2002. The idea seems to be that we
need to guard against creating pci-specific data structures for the non-pci
children of a PCI device tree node (e.g. USB devices). However, we only
create pci_dn structures for DT nodes that correspond to PCI devices so
there's not much point in doing this check in the eeh_probe path.

Reviewed-by: Sam Bobroff 
Signed-off-by: Oliver O'Halloran 
---
 arch/powerpc/kernel/eeh.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 9cb3370..a9e4ca7 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1120,7 +1120,6 @@ core_initcall_sync(eeh_init);
  */
 void eeh_add_device_early(struct pci_dn *pdn)
 {
-   struct pci_controller *phb = pdn ? pdn->phb : NULL;
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 
if (!edev)
@@ -1129,11 +1128,6 @@ void eeh_add_device_early(struct pci_dn *pdn)
if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
return;
 
-   /* USB Bus children of PCI devices will not have BUID's */
-   if (NULL == phb ||
-   (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
-   return;
-
eeh_ops->probe(pdn, NULL);
 }
 
-- 
2.9.5



[PATCH v2 3/6] powerpc/eeh: Do early EEH init only when required

2020-03-05 Thread Oliver O'Halloran
The pci hotplug helper (pci_hp_add_devices()) calls
eeh_add_device_tree_early() to scan the device-tree for new PCI devices and
do the early EEH probe before the device is scanned. This early probe is a
no-op in a lot of cases because:

a) The early init is only required to satisfy a PAPR requirement that EEH
   be configured before we start doing config accesses. On PowerNV it is
   a no-op.

b) It's a no-op for devices that have already had their eeh_dev
   initialised.

There are four callers of pci_hp_add_devices():

1. arch/powerpc/kernel/eeh_driver.c
Here the hotplug helper is called when re-scanning pci_devs that
were removed during an EEH recovery pass. The EEH stat for each
removed device (the eeh_dev) is retained across a recovery pass
so the early init is a no-op in this case.

2. drivers/pci/hotplug/pnv_php.c
This is also a no-op since the PowerNV hotplug driver is, suprisingly,
PowerNV specific.

3. drivers/pci/hotplug/rpaphp_core.c
4. drivers/pci/hotplug/rpaphp_pci.c
In these two cases new devices have been hotplugged and FW has
provided new DT nodes for each. These are the only two cases where
the EEH we might have new PCI device nodes in the DT so these are
the only two cases where the early EEH probe needs to be done.

We can move the calls to eeh_add_device_tree_early() to the locations where
it's needed and remove it from the generic path. This is preparation for
making the early EEH probe pseries specific.

Reviewed-by: Sam Bobroff 
Signed-off-by: Oliver O'Halloran 
---
 arch/powerpc/kernel/pci-hotplug.c | 2 --
 drivers/pci/hotplug/rpaphp_core.c | 2 ++
 drivers/pci/hotplug/rpaphp_pci.c  | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index d6a67f8..bf83f76 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -112,8 +112,6 @@ void pci_hp_add_devices(struct pci_bus *bus)
struct pci_controller *phb;
struct device_node *dn = pci_bus_to_OF_node(bus);
 
-   eeh_add_device_tree_early(PCI_DN(dn));
-
phb = pci_bus_to_host(bus);
 
mode = PCI_PROBE_NORMAL;
diff --git a/drivers/pci/hotplug/rpaphp_core.c 
b/drivers/pci/hotplug/rpaphp_core.c
index e408e40..9c1e43e 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -494,6 +494,8 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
return retval;
 
if (state == PRESENT) {
+   eeh_add_device_tree_early(PCI_DN(slot->dn));
+
pci_lock_rescan_remove();
pci_hp_add_devices(slot->bus);
pci_unlock_rescan_remove();
diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c
index beca61b..61ebbd8 100644
--- a/drivers/pci/hotplug/rpaphp_pci.c
+++ b/drivers/pci/hotplug/rpaphp_pci.c
@@ -95,8 +95,10 @@ int rpaphp_enable_slot(struct slot *slot)
return -EINVAL;
}
 
-   if (list_empty(>devices))
+   if (list_empty(>devices)) {
+   eeh_add_device_tree_early(PCI_DN(slot->dn));
pci_hp_add_devices(bus);
+   }
 
if (!list_empty(>devices)) {
slot->state = CONFIGURED;
-- 
2.9.5



[PATCH v2 2/6] powerpc/eeh: Remove eeh_add_device_tree_late()

2020-03-05 Thread Oliver O'Halloran
On pseries and PowerNV pcibios_bus_add_device() calls eeh_add_device_late()
so there's no need to do a separate tree traversal to bind the eeh_dev and
pci_dev together setting up the PHB at boot. As a result we can remove
eeh_add_device_tree_late().

Reviewed-by: Sam Bobroff 
Signed-off-by: Oliver O'Halloran 
---
 arch/powerpc/include/asm/eeh.h|  3 ---
 arch/powerpc/kernel/eeh.c | 25 -
 arch/powerpc/kernel/of_platform.c |  3 ---
 arch/powerpc/kernel/pci-common.c  |  3 ---
 4 files changed, 34 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 5a34907..5d10781 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -304,7 +304,6 @@ void eeh_addr_cache_init(void);
 void eeh_add_device_early(struct pci_dn *);
 void eeh_add_device_tree_early(struct pci_dn *);
 void eeh_add_device_late(struct pci_dev *);
-void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_remove_device(struct pci_dev *);
 int eeh_unfreeze_pe(struct eeh_pe *pe);
 int eeh_pe_reset_and_recover(struct eeh_pe *pe);
@@ -365,8 +364,6 @@ static inline void eeh_add_device_tree_early(struct pci_dn 
*pdn) { }
 
 static inline void eeh_add_device_late(struct pci_dev *dev) { }
 
-static inline void eeh_add_device_tree_late(struct pci_bus *bus) { }
-
 static inline void eeh_remove_device(struct pci_dev *dev) { }
 
 #define EEH_POSSIBLE_ERROR(val, type) (0)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 0878912..9cb3370 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1214,31 +1214,6 @@ void eeh_add_device_late(struct pci_dev *dev)
 }
 
 /**
- * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI 
bus
- * @bus: PCI bus
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_device_tree_late(struct pci_bus *bus)
-{
-   struct pci_dev *dev;
-
-   if (eeh_has_flag(EEH_FORCE_DISABLED))
-   return;
-   list_for_each_entry(dev, >devices, bus_list) {
-   eeh_add_device_late(dev);
-   if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
-   struct pci_bus *subbus = dev->subordinate;
-   if (subbus)
-   eeh_add_device_tree_late(subbus);
-   }
-   }
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
-
-/**
  * eeh_remove_device - Undo EEH setup for the indicated pci device
  * @dev: pci device to be removed
  *
diff --git a/arch/powerpc/kernel/of_platform.c 
b/arch/powerpc/kernel/of_platform.c
index cb68800..64edac81 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -80,9 +80,6 @@ static int of_pci_phb_probe(struct platform_device *dev)
 */
pcibios_claim_one_bus(phb->bus);
 
-   /* Finish EEH setup */
-   eeh_add_device_tree_late(phb->bus);
-
/* Add probed PCI devices to the device model */
pci_bus_add_devices(phb->bus);
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 3d2b1cf..8983afa 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1399,9 +1399,6 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
pci_assign_unassigned_bus_resources(bus);
}
 
-   /* Fixup EEH */
-   eeh_add_device_tree_late(bus);
-
/* Add new devices to global lists.  Register in proc, sysfs. */
pci_bus_add_devices(bus);
 }
-- 
2.9.5



[PATCH v2 1/6] powerpc/eeh: Add sysfs files in late probe

2020-03-05 Thread Oliver O'Halloran
Move creating the EEH specific sysfs files into eeh_add_device_late()
rather than being open-coded all over the place. Calling the function is
generally done immediately after calling eeh_add_device_late() anyway. This
is also a correctness fix since currently the sysfs files will be added
even if the EEH probe happens to fail.

Similarly, on pseries we currently add the sysfs files before calling
eeh_add_device_late(). This is flat-out broken since the sysfs files
require the pci_dev->dev.archdata.edev pointer to be set, and that is done
in eeh_add_device_late().

Reviewed-by: Sam Bobroff 
Signed-off-by: Oliver O'Halloran 
---
v2: Reworded commit message based on Sam Bobroff's comments. About the
current behaviour being broken.
---
 arch/powerpc/include/asm/eeh.h   |  3 ---
 arch/powerpc/kernel/eeh.c| 24 +---
 arch/powerpc/kernel/of_platform.c|  3 ---
 arch/powerpc/kernel/pci-common.c |  3 ---
 arch/powerpc/platforms/powernv/eeh-powernv.c |  1 -
 arch/powerpc/platforms/pseries/eeh_pseries.c |  3 +--
 6 files changed, 2 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 6f9b2a1..5a34907 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -305,7 +305,6 @@ void eeh_add_device_early(struct pci_dn *);
 void eeh_add_device_tree_early(struct pci_dn *);
 void eeh_add_device_late(struct pci_dev *);
 void eeh_add_device_tree_late(struct pci_bus *);
-void eeh_add_sysfs_files(struct pci_bus *);
 void eeh_remove_device(struct pci_dev *);
 int eeh_unfreeze_pe(struct eeh_pe *pe);
 int eeh_pe_reset_and_recover(struct eeh_pe *pe);
@@ -368,8 +367,6 @@ static inline void eeh_add_device_late(struct pci_dev *dev) 
{ }
 
 static inline void eeh_add_device_tree_late(struct pci_bus *bus) { }
 
-static inline void eeh_add_sysfs_files(struct pci_bus *bus) { }
-
 static inline void eeh_remove_device(struct pci_dev *dev) { }
 
 #define EEH_POSSIBLE_ERROR(val, type) (0)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 17cb3e9..0878912 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1210,6 +1210,7 @@ void eeh_add_device_late(struct pci_dev *dev)
dev->dev.archdata.edev = edev;
 
eeh_addr_cache_insert_dev(dev);
+   eeh_sysfs_add_device(dev);
 }
 
 /**
@@ -1238,29 +1239,6 @@ void eeh_add_device_tree_late(struct pci_bus *bus)
 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
 
 /**
- * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to add EEH sysfs files for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_sysfs_files(struct pci_bus *bus)
-{
-   struct pci_dev *dev;
-
-   list_for_each_entry(dev, >devices, bus_list) {
-   eeh_sysfs_add_device(dev);
-   if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
-   struct pci_bus *subbus = dev->subordinate;
-   if (subbus)
-   eeh_add_sysfs_files(subbus);
-   }
-   }
-}
-EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
-
-/**
  * eeh_remove_device - Undo EEH setup for the indicated pci device
  * @dev: pci device to be removed
  *
diff --git a/arch/powerpc/kernel/of_platform.c 
b/arch/powerpc/kernel/of_platform.c
index 427fc22..cb68800 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -86,9 +86,6 @@ static int of_pci_phb_probe(struct platform_device *dev)
/* Add probed PCI devices to the device model */
pci_bus_add_devices(phb->bus);
 
-   /* sysfs files should only be added after devices are added */
-   eeh_add_sysfs_files(phb->bus);
-
return 0;
 }
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index c6c0341..3d2b1cf 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1404,9 +1404,6 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
 
/* Add new devices to global lists.  Register in proc, sysfs. */
pci_bus_add_devices(bus);
-
-   /* sysfs files should only be added after devices are added */
-   eeh_add_sysfs_files(bus);
 }
 EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus);
 
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 6f300ab..ef727ec 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -48,7 +48,6 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
dev_dbg(>dev, "EEH: Setting up device\n");
eeh_add_device_early(pdn);
eeh_add_device_late(pdev);
-   eeh_sysfs_add_device(pdev);
 }
 
 static int pnv_eeh_init(void)
diff --git 

[PATCH v3] powerpc: setup_64: set up PACA earlier to avoid kcov problems

2020-03-05 Thread Daniel Axtens
kcov instrumentation is collected the __sanitizer_cov_trace_pc hook in
kernel/kcov.c. The compiler inserts these hooks into every basic block
unless kcov is disabled for that file.

We then have a deep call-chain:
 - __sanitizer_cov_trace_pc calls to check_kcov_mode()
 - check_kcov_mode() (kernel/kcov.c) calls in_task()
 - in_task() (include/linux/preempt.h) calls preempt_count().
 - preempt_count() (include/asm-generic/preempt.h) calls
 current_thread_info()
 - because powerpc has THREAD_INFO_IN_TASK, current_thread_info()
 (include/linux/thread_info.h) is defined to 'current'
 - current (arch/powerpc/include/asm/current.h) is defined to
 get_current().
 - get_current (same file) loads an offset of r13.
 - arch/powerpc/include/asm/paca.h makes r13 a register variable
 called local_paca - it is the PACA for the current CPU, so
 this has the effect of loading the current task from PACA.
 - get_current returns the current task from PACA,
 - current_thread_info returns the task cast to a thread_info
 - preempt_count dereferences the thread_info to load preempt_count
 - that value is used by in_task and so on up the chain

The problem is:

 - kcov instrumentation is enabled for arch/powerpc/kernel/dt_cpu_ftrs.c

 - even if it were not, dt_cpu_ftrs_init calls generic dt parsing code
   which should definitely have instrumentation enabled.

 - setup_64.c calls dt_cpu_ftrs_init before it sets up a PACA.

 - If we don't set up a paca, r13 will contain unpredictable data.

 - In a zImage compiled with kcov and KASAN, we see r13 containing a value
   that leads to dereferencing invalid memory (something like
   912a72603d420015).

 - Weirdly, the same kernel as a vmlinux loaded directly by qemu does not
   crash. Investigating with gdb, it seems that in the vmlinux boot case,
   r13 is near enough to zero that we just happen to be able to read that
   part of memory (we're operating with translation off at this point) and
   the current pointer also happens to land in readable memory and
   everything just works.

 - PACA setup refers to CPU features - setup_paca() looks at
   early_cpu_has_feature(CPU_FTR_HVMODE)

There's no generic kill switch for kcov (as far as I can tell), and we
don't want to have to turn off instrumentation in the generic dt parsing
code (which lives outside arch/powerpc/) just because we don't have a real
paca or task yet.

So:
 - change the test when setting up a PACA to consider the actual value of
   the MSR rather than the CPU feature.

 - move the PACA setup to before the cpu feature parsing.

Translations get switched on once we leave early_setup, so I think we'd
already catch any other cases where the PACA or task aren't set up.

Boot tested on a P9 guest and host.

Fixes: fb0b0a73b223 ("powerpc: Enable kcov")
Cc: Andrew Donnellan 
Suggested-by: Michael Ellerman 
Signed-off-by: Daniel Axtens 

---

Regarding moving the comment about printk()-safety:
I am about 75% sure that the thing that makes printk() safe is the PACA,
not the CPU features. That's what commit 24d9649574fb ("[POWERPC] Document
when printk is useable") seems to indicate, but as someone wise recently
told me, "bootstrapping is hard", so I may be totally wrong.

v3: Update comment, thanks Christophe Leroy.
Remove a comment in dt_cpu_ftrs.c that is no longer accurate - thanks
  Andrew. I think we want to retain all the code still, but I'm open to
  being told otherwise.
---
 arch/powerpc/kernel/dt_cpu_ftrs.c |  1 -
 arch/powerpc/kernel/paca.c|  2 +-
 arch/powerpc/kernel/setup_64.c| 20 +++-
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c 
b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 182b4047c1ef..36bc0d5c4f3a 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -139,7 +139,6 @@ static void __init cpufeatures_setup_cpu(void)
/* Initialize the base environment -- clear FSCR/HFSCR.  */
hv_mode = !!(mfmsr() & MSR_HV);
if (hv_mode) {
-   /* CPU_FTR_HVMODE is used early in PACA setup */
cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
mtspr(SPRN_HFSCR, 0);
}
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 949eceb254d8..347e947b9d4b 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -218,7 +218,7 @@ void setup_paca(struct paca_struct *new_paca)
 * if we do a GET_PACA() before the feature fixups have been
 * applied
 */
-   if (early_cpu_has_feature(CPU_FTR_HVMODE))
+   if (mfmsr() & MSR_HV)
mtspr(SPRN_SPRG_HPACA, local_paca);
 #endif
mtspr(SPRN_SPRG_PACA, local_paca);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e05e6dd67ae6..2259da8e8685 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -285,18 +285,28 @@ void __init 

Re: [PATCH v3] powerpc/64s/pgtable: fix an undefined behaviour

2020-03-05 Thread Christophe Leroy




Le 06/03/2020 à 05:48, Qian Cai a écrit :

Booting a power9 server with hash MMU could trigger an undefined
behaviour because pud_offset(p4d, 0) will do,

0 >> (PAGE_SHIFT:16 + PTE_INDEX_SIZE:8 + H_PMD_INDEX_SIZE:10)

Fix it by converting pud_index() and friends to static inline
functions.

UBSAN: shift-out-of-bounds in arch/powerpc/mm/ptdump/ptdump.c:282:15
shift exponent 34 is too large for 32-bit type 'int'
CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4-next-20200303+ #13
Call Trace:
dump_stack+0xf4/0x164 (unreliable)
ubsan_epilogue+0x18/0x78
__ubsan_handle_shift_out_of_bounds+0x160/0x21c
walk_pagetables+0x2cc/0x700
walk_pud at arch/powerpc/mm/ptdump/ptdump.c:282
(inlined by) walk_pagetables at arch/powerpc/mm/ptdump/ptdump.c:311
ptdump_check_wx+0x8c/0xf0
mark_rodata_ro+0x48/0x80
kernel_init+0x74/0x194
ret_from_kernel_thread+0x5c/0x74

Suggested-by: Christophe Leroy 
Signed-off-by: Qian Cai 


Reviewed-by: Christophe Leroy 


---

v3: convert pud_index() etc to static inline functions.
v2: convert pud_offset() etc to static inline functions.

  arch/powerpc/include/asm/book3s/64/pgtable.h | 23 
  1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 201a69e6a355..bd432c6706b9 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -998,10 +998,25 @@ extern struct page *pgd_page(pgd_t pgd);
  #define pud_page_vaddr(pud)   __va(pud_val(pud) & ~PUD_MASKED_BITS)
  #define pgd_page_vaddr(pgd)   __va(pgd_val(pgd) & ~PGD_MASKED_BITS)
  
-#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))

-#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1))
-#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
-#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1))
+static inline unsigned long pgd_index(unsigned long address)
+{
+   return (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1);
+}
+
+static inline unsigned long pud_index(unsigned long address)
+{
+   return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+
+static inline unsigned long pmd_index(unsigned long address)
+{
+   return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
+}
+
+static inline unsigned long pte_index(unsigned long address)
+{
+   return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+}
  
  /*

   * Find an entry in a page-table-directory.  We combine the address region



[PATCH v4 5/6] powerpc/fsl_booke/64: clear the original kernel if randomized

2020-03-05 Thread Jason Yan
The original kernel still exists in the memory, clear it now.

Signed-off-by: Jason Yan 
Cc: Scott Wood 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
---
 arch/powerpc/mm/nohash/kaslr_booke.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c 
b/arch/powerpc/mm/nohash/kaslr_booke.c
index bf60f956dc91..f7ab97aa2127 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -379,8 +379,10 @@ notrace void __init kaslr_early_init(void *dt_ptr, 
phys_addr_t size)
unsigned long kernel_sz;
 
if (IS_ENABLED(CONFIG_PPC64)) {
-   if (__run_at_load == 1)
+   if (__run_at_load == 1) {
+   kaslr_late_init();
return;
+   }
 
/* Setup flat device-tree pointer */
initial_boot_params = dt_ptr;
-- 
2.17.2



[PATCH v4 4/6] powerpc/fsl_booke/64: do not clear the BSS for the second pass

2020-03-05 Thread Jason Yan
The BSS section has already cleared out in the first pass. No need to
clear it again. This can save some time when booting with KASLR
enabled.

Signed-off-by: Jason Yan 
Cc: Scott Wood 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
---
 arch/powerpc/kernel/head_64.S | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 454129a3c259..9354c292b709 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -913,6 +913,13 @@ start_here_multiplatform:
bl  relative_toc
tovirt(r2,r2)
 
+   /* Do not clear the BSS for the second pass if randomized */
+   LOAD_REG_ADDR(r3, kernstart_virt_addr)
+   ld  r3,0(r3)
+   LOAD_REG_IMMEDIATE(r4, KERNELBASE)
+   cmpdr3,r4
+   bne 4f
+
/* Clear out the BSS. It may have been done in prom_init,
 * already but that's irrelevant since prom_init will soon
 * be detached from the kernel completely. Besides, we need
-- 
2.17.2



[PATCH v4 6/6] powerpc/fsl_booke/kaslr: rename kaslr-booke32.rst to kaslr-booke.rst and add 64bit part

2020-03-05 Thread Jason Yan
Now we support both 32 and 64 bit KASLR for fsl booke. Add document for
64 bit part and rename kaslr-booke32.rst to kaslr-booke.rst.

Signed-off-by: Jason Yan 
Cc: Scott Wood 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
---
 Documentation/powerpc/index.rst   |  2 +-
 .../{kaslr-booke32.rst => kaslr-booke.rst}| 35 ---
 2 files changed, 32 insertions(+), 5 deletions(-)
 rename Documentation/powerpc/{kaslr-booke32.rst => kaslr-booke.rst} (59%)

diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst
index 0d45f0fc8e57..3bad36943b22 100644
--- a/Documentation/powerpc/index.rst
+++ b/Documentation/powerpc/index.rst
@@ -20,7 +20,7 @@ powerpc
 hvcs
 imc
 isa-versions
-kaslr-booke32
+kaslr-booke
 mpc52xx
 papr_hcalls
 pci_iov_resource_on_powernv
diff --git a/Documentation/powerpc/kaslr-booke32.rst 
b/Documentation/powerpc/kaslr-booke.rst
similarity index 59%
rename from Documentation/powerpc/kaslr-booke32.rst
rename to Documentation/powerpc/kaslr-booke.rst
index 8b259fdfdf03..42121fed8249 100644
--- a/Documentation/powerpc/kaslr-booke32.rst
+++ b/Documentation/powerpc/kaslr-booke.rst
@@ -1,15 +1,18 @@
 .. SPDX-License-Identifier: GPL-2.0
 
-===
-KASLR for Freescale BookE32
-===
+=
+KASLR for Freescale BookE
+=
 
 The word KASLR stands for Kernel Address Space Layout Randomization.
 
 This document tries to explain the implementation of the KASLR for
-Freescale BookE32. KASLR is a security feature that deters exploit
+Freescale BookE. KASLR is a security feature that deters exploit
 attempts relying on knowledge of the location of kernel internals.
 
+KASLR for Freescale BookE32
+-
+
 Since CONFIG_RELOCATABLE has already supported, what we need to do is
 map or copy kernel to a proper place and relocate. Freescale Book-E
 parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1
@@ -38,5 +41,29 @@ bit of the entropy to decide the index of the 64M zone. Then 
we chose a
 
   kernstart_virt_addr
 
+
+KASLR for Freescale BookE64
+---
+
+The implementation for Freescale BookE64 is similar as BookE32. One
+difference is that Freescale BookE64 set up a TLB mapping of 1G during
+booting. Another difference is that ppc64 needs the kernel to be
+64K-aligned. So we can randomize the kernel in this 1G mapping and make
+it 64K-aligned. This can save some code to creat another TLB map at early
+boot. The disadvantage is that we only have about 1G/64K = 16384 slots to
+put the kernel in::
+
+KERNELBASE
+
+  64K |--> kernel <--|
+   |  |  |
++--+--+--++--+--+--+--+--+--+--+--+--++--+--+
+|  |  |  ||  |  |  |  |  |  |  |  |  ||  |  |
++--+--+--++--+--+--+--+--+--+--+--+--++--+--+
+| |1G
+|->   offset<-|
+
+  kernstart_virt_addr
+
 To enable KASLR, set CONFIG_RANDOMIZE_BASE = y. If KASLR is enable and you
 want to disable it at runtime, add "nokaslr" to the kernel cmdline.
-- 
2.17.2



[PATCH v4 2/6] powerpc/fsl_booke/64: introduce reloc_kernel_entry() helper

2020-03-05 Thread Jason Yan
Like the 32bit code, we introduce reloc_kernel_entry() helper to prepare
for the KASLR 64bit version. And move the C declaration of this function
out of CONFIG_PPC32 and use long instead of int for the parameter 'addr'.

Signed-off-by: Jason Yan 
Cc: Scott Wood 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
---
 arch/powerpc/kernel/exceptions-64e.S | 13 +
 arch/powerpc/mm/mmu_decl.h   |  3 ++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index e4076e3c072d..1b9b174bee86 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1679,3 +1679,16 @@ _GLOBAL(setup_ehv_ivors)
 _GLOBAL(setup_lrat_ivor)
SET_IVOR(42, 0x340) /* LRAT Error */
blr
+
+/*
+ * Return to the start of the relocated kernel and run again
+ * r3 - virtual address of fdt
+ * r4 - entry of the kernel
+ */
+_GLOBAL(reloc_kernel_entry)
+   mfmsr   r7
+   rlwinm  r7, r7, 0, ~(MSR_IS | MSR_DS)
+
+   mtspr   SPRN_SRR0,r4
+   mtspr   SPRN_SRR1,r7
+   rfi
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 7097e07a209a..605129b5ccdf 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -140,9 +140,10 @@ extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
 extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
 void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys);
-void reloc_kernel_entry(void *fdt, int addr);
 extern int is_second_reloc;
 #endif
+
+void reloc_kernel_entry(void *fdt, long addr);
 extern void loadcam_entry(unsigned int index);
 extern void loadcam_multi(int first_idx, int num, int tmp_idx);
 
-- 
2.17.2



[PATCH v4 3/6] powerpc/fsl_booke/64: implement KASLR for fsl_booke64

2020-03-05 Thread Jason Yan
The implementation for Freescale BookE64 is similar as BookE32. One
difference is that Freescale BookE64 set up a TLB mapping of 1G during
booting. Another difference is that ppc64 needs the kernel to be
64K-aligned. So we can randomize the kernel in this 1G mapping and make
it 64K-aligned. This can save some code to creat another TLB map at
early boot. The disadvantage is that we only have about 1G/64K = 16384
slots to put the kernel in.

To support secondary cpu boot up, a variable __kaslr_offset was added in
first_256B section. This can help secondary cpu get the kaslr offset
before the 1:1 mapping has been setup.

Signed-off-by: Jason Yan 
Cc: Scott Wood 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
---
 arch/powerpc/Kconfig |  2 +-
 arch/powerpc/kernel/exceptions-64e.S | 10 
 arch/powerpc/kernel/head_64.S|  6 +++
 arch/powerpc/kernel/setup_64.c   |  3 ++
 arch/powerpc/mm/mmu_decl.h   | 20 
 arch/powerpc/mm/nohash/kaslr_booke.c | 72 +++-
 6 files changed, 80 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 497b7d0b2d7e..0c76601fdd59 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -564,7 +564,7 @@ config RELOCATABLE
 
 config RANDOMIZE_BASE
bool "Randomize the address of the kernel image"
-   depends on (FSL_BOOKE && FLATMEM && PPC32)
+   depends on (PPC_FSL_BOOK3E && FLATMEM)
depends on RELOCATABLE
help
  Randomizes the virtual address at which the kernel image is
diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 1b9b174bee86..260cf1f1e71c 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1378,6 +1378,7 @@ skpinv:   addir6,r6,1 /* 
Increment */
 1: mflrr6
addir6,r6,(2f - 1b)
tovirt(r6,r6)
+   add r6,r6,r19
lis r7,MSR_KERNEL@h
ori r7,r7,MSR_KERNEL@l
mtspr   SPRN_SRR0,r6
@@ -1400,6 +1401,7 @@ skpinv:   addir6,r6,1 /* 
Increment */
 
/* We translate LR and return */
tovirt(r8,r8)
+   add r8,r8,r19
mtlrr8
blr
 
@@ -1528,6 +1530,7 @@ a2_tlbinit_code_end:
  */
 _GLOBAL(start_initialization_book3e)
mflrr28
+   li  r19, 0
 
/* First, we need to setup some initial TLBs to map the kernel
 * text, data and bss at PAGE_OFFSET. We don't have a real mode
@@ -1570,6 +1573,12 @@ _GLOBAL(book3e_secondary_core_init)
cmplwi  r4,0
bne 2f
 
+   li  r19, 0
+#ifdef CONFIG_RANDOMIZE_BASE
+   LOAD_REG_ADDR_PIC(r19, __kaslr_offset)
+   ld  r19,0(r19)
+   rlwinm  r19,r19,0,0,5
+#endif
/* Setup TLB for this core */
bl  initial_tlb_book3e
 
@@ -1602,6 +1611,7 @@ _GLOBAL(book3e_secondary_core_init)
lis r3,PAGE_OFFSET@highest
sldir3,r3,32
or  r28,r28,r3
+   add r28,r28,r19
 1: mtlrr28
blr
 
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index ad79fddb974d..454129a3c259 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -104,6 +104,12 @@ __secondary_hold_acknowledge:
.8byte  0x0
 
 #ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_RANDOMIZE_BASE
+   .globl  __kaslr_offset
+__kaslr_offset:
+   .8byte  0x0
+#endif
+
/* This flag is set to 1 by a loader if the kernel should run
 * at the loaded address instead of the linked address.  This
 * is used by kexec-tools to keep the the kdump kernel in the
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e05e6dd67ae6..836e202dfd5b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -67,6 +67,7 @@
 #include 
 #include 
 
+#include 
 #include "setup.h"
 
 int spinning_secondaries;
@@ -300,6 +301,8 @@ void __init early_setup(unsigned long dt_ptr)
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
 
+   kaslr_early_init(__va(dt_ptr), 0);
+
udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr);
 
/*
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 605129b5ccdf..6efbd7fd88a4 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -139,22 +139,16 @@ extern unsigned long calc_cam_sz(unsigned long ram, 
unsigned long virt,
 extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
 extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
+#endif
 void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys);
 extern int is_second_reloc;
-#endif
+extern unsigned long __kaslr_offset;
+extern unsigned int 

[PATCH v4 0/6] implement KASLR for powerpc/fsl_booke/64

2020-03-05 Thread Jason Yan
This is a try to implement KASLR for Freescale BookE64 which is based on
my earlier implementation for Freescale BookE32:
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=131718=*

The implementation for Freescale BookE64 is similar as BookE32. One
difference is that Freescale BookE64 set up a TLB mapping of 1G during
booting. Another difference is that ppc64 needs the kernel to be
64K-aligned. So we can randomize the kernel in this 1G mapping and make
it 64K-aligned. This can save some code to creat another TLB map at
early boot. The disadvantage is that we only have about 1G/64K = 16384
slots to put the kernel in.

KERNELBASE

  64K |--> kernel <--|
   |  |  |
+--+--+--++--+--+--+--+--+--+--+--+--++--+--+
|  |  |  ||  |  |  |  |  |  |  |  |  ||  |  |
+--+--+--++--+--+--+--+--+--+--+--+--++--+--+
| |1G
|->   offset<-|

  kernstart_virt_addr

I'm not sure if the slot numbers is enough or the design has any
defects. If you have some better ideas, I would be happy to hear that.

Thank you all.

v3->v4:
  Do not define __kaslr_offset as a fixed symbol. Reference __run_at_load and
__kaslr_offset by symbol instead of magic offsets.
  Use IS_ENABLED(CONFIG_PPC32) instead of #ifdef CONFIG_PPC32.
  Change kaslr-booke32 to kaslr-booke in index.rst
  Switch some instructions to 64-bit.
v2->v3:
  Fix build error when KASLR is disabled.
v1->v2:
  Add __kaslr_offset for the secondary cpu boot up.

Jason Yan (6):
  powerpc/fsl_booke/kaslr: refactor kaslr_legal_offset() and
kaslr_early_init()
  powerpc/fsl_booke/64: introduce reloc_kernel_entry() helper
  powerpc/fsl_booke/64: implement KASLR for fsl_booke64
  powerpc/fsl_booke/64: do not clear the BSS for the second pass
  powerpc/fsl_booke/64: clear the original kernel if randomized
  powerpc/fsl_booke/kaslr: rename kaslr-booke32.rst to kaslr-booke.rst
and add 64bit part

 Documentation/powerpc/index.rst   |  2 +-
 .../{kaslr-booke32.rst => kaslr-booke.rst}| 35 +++-
 arch/powerpc/Kconfig  |  2 +-
 arch/powerpc/kernel/exceptions-64e.S  | 23 +
 arch/powerpc/kernel/head_64.S | 13 +++
 arch/powerpc/kernel/setup_64.c|  3 +
 arch/powerpc/mm/mmu_decl.h| 23 ++---
 arch/powerpc/mm/nohash/kaslr_booke.c  | 88 +--
 8 files changed, 144 insertions(+), 45 deletions(-)
 rename Documentation/powerpc/{kaslr-booke32.rst => kaslr-booke.rst} (59%)

-- 
2.17.2



[PATCH v4 1/6] powerpc/fsl_booke/kaslr: refactor kaslr_legal_offset() and kaslr_early_init()

2020-03-05 Thread Jason Yan
Some code refactor in kaslr_legal_offset() and kaslr_early_init(). No
functional change. This is a preparation for KASLR fsl_booke64.

Signed-off-by: Jason Yan 
Cc: Scott Wood 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
---
 arch/powerpc/mm/nohash/kaslr_booke.c | 34 +++-
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c 
b/arch/powerpc/mm/nohash/kaslr_booke.c
index 4a75f2d9bf0e..6ebff31fefcc 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -25,6 +25,7 @@ struct regions {
unsigned long pa_start;
unsigned long pa_end;
unsigned long kernel_size;
+   unsigned long linear_sz;
unsigned long dtb_start;
unsigned long dtb_end;
unsigned long initrd_start;
@@ -260,11 +261,23 @@ static __init void get_cell_sizes(const void *fdt, int 
node, int *addr_cells,
*size_cells = fdt32_to_cpu(*prop);
 }
 
-static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long 
index,
-  unsigned long offset)
+static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long 
random)
 {
unsigned long koffset = 0;
unsigned long start;
+   unsigned long index;
+   unsigned long offset;
+
+   /*
+* Decide which 64M we want to start
+* Only use the low 8 bits of the random seed
+*/
+   index = random & 0xFF;
+   index %= regions.linear_sz / SZ_64M;
+
+   /* Decide offset inside 64M */
+   offset = random % (SZ_64M - regions.kernel_size);
+   offset = round_down(offset, SZ_16K);
 
while ((long)index >= 0) {
offset = memstart_addr + index * SZ_64M + offset;
@@ -289,10 +302,9 @@ static inline __init bool kaslr_disabled(void)
 static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t 
size,
  unsigned long kernel_sz)
 {
-   unsigned long offset, random;
+   unsigned long random;
unsigned long ram, linear_sz;
u64 seed;
-   unsigned long index;
 
kaslr_get_cmdline(dt_ptr);
if (kaslr_disabled())
@@ -333,22 +345,12 @@ static unsigned long __init kaslr_choose_location(void 
*dt_ptr, phys_addr_t size
regions.dtb_start = __pa(dt_ptr);
regions.dtb_end = __pa(dt_ptr) + fdt_totalsize(dt_ptr);
regions.kernel_size = kernel_sz;
+   regions.linear_sz = linear_sz;
 
get_initrd_range(dt_ptr);
get_crash_kernel(dt_ptr, ram);
 
-   /*
-* Decide which 64M we want to start
-* Only use the low 8 bits of the random seed
-*/
-   index = random & 0xFF;
-   index %= linear_sz / SZ_64M;
-
-   /* Decide offset inside 64M */
-   offset = random % (SZ_64M - kernel_sz);
-   offset = round_down(offset, SZ_16K);
-
-   return kaslr_legal_offset(dt_ptr, index, offset);
+   return kaslr_legal_offset(dt_ptr, random);
 }
 
 /*
-- 
2.17.2



linux-next: manual merge of the akpm tree with the powerpc tree

2020-03-05 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the akpm tree got conflicts in:

  arch/powerpc/mm/book3s32/mmu.c
  arch/powerpc/mm/book3s32/tlb.c
  arch/powerpc/mm/kasan/kasan_init_32.c
  arch/powerpc/mm/mem.c
  arch/powerpc/mm/nohash/40x.c
  arch/powerpc/mm/pgtable_32.c

between commits:

  0b1c524caaae ("powerpc/32: refactor pmd_offset(pud_offset(pgd_offset...")
  2efc7c085f05 ("powerpc/32: drop get_pteptr()")

from the powerpc tree and patch:

  "powerpc: add support for folded p4d page tables"

from the akpm tree.

I fixed it up (all the files above no longer need modifiying, and the
extra patch is below) and can carry the fix as necessary. This is now
fixed as far as linux-next is concerned, but any non trivial conflicts
should be mentioned to your upstream maintainer when your tree is
submitted for merging.  You may also want to consider cooperating with
the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

diff --cc arch/powerpc/mm/book3s32/mmu.c
index 39ba53ca5bb5,edef17c97206..
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
diff --cc arch/powerpc/mm/book3s32/tlb.c
index dc9039a170aa,175bc33b41b7..
--- a/arch/powerpc/mm/book3s32/tlb.c
+++ b/arch/powerpc/mm/book3s32/tlb.c
diff --cc arch/powerpc/mm/kasan/kasan_init_32.c
index f19526e7d3dc,88e2e16380b5..
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
diff --cc arch/powerpc/mm/mem.c
index 9b4f5fb719e0,8262b384dcf3..
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
diff --cc arch/powerpc/mm/nohash/40x.c
index 82862723ab42,7aaf7155e350..
--- a/arch/powerpc/mm/nohash/40x.c
+++ b/arch/powerpc/mm/nohash/40x.c
diff --cc arch/powerpc/mm/pgtable_32.c
index f62de06e3d07,5774d4bc94d0..
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index df7f63d37b74..74f890d6218c 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -44,12 +44,12 @@ struct mm_struct;
 #ifdef CONFIG_PPC32
 static inline pmd_t *pmd_ptr(struct mm_struct *mm, unsigned long va)
 {
-   return pmd_offset(pud_offset(pgd_offset(mm, va), va), va);
+   return pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, va), va) , va), 
va);
 }
 
 static inline pmd_t *pmd_ptr_k(unsigned long va)
 {
-   return pmd_offset(pud_offset(pgd_offset_k(va), va), va);
+   return pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va) , va), 
va);
 }
 
 static inline pte_t *virt_to_kpte(unsigned long vaddr)

-- 
Cheers,
Stephen Rothwell

Just to be clear, below is the new version of "powerpc: add support for
folded p4d page tables" (I am not sure it is completely correct, but
ppc64_defconfig, 32 and 64 bit allnoconfig, allyesconfig,
ppc44x_defconfig and pseries_le_defconfig all build without new
warnings):

From: Mike Rapoport 
Date: Wed, 4 Mar 2020 22:32:05 +1100
Subject: [PATCH] powerpc: add support for folded p4d page tables

Implement primitives necessary for the 4th level folding, add walks of p4d
level where appropriate and replace 5level-fixup.h with pgtable-nop4d.h.

Link: http://lkml.kernel.org/r/20200227084608.18223-9-r...@kernel.org
Signed-off-by: Mike Rapoport 
Tested-by: Christophe Leroy[8xx and 83xx]
Cc: Arnd Bergmann 
Cc: Benjamin Herrenschmidt 
Cc: Brian Cain 
Cc: Catalin Marinas 
Cc: Fenghua Yu 
Cc: Geert Uytterhoeven 
Cc: Guan Xuetao 
Cc: James Morse 
Cc: Jonas Bonn 
Cc: Julien Thierry 
Cc: Ley Foon Tan 
Cc: Marc Zyngier 
Cc: Michael Ellerman 
Cc: Paul Mackerras 
Cc: Rich Felker 
Cc: Russell King 
Cc: Stafford Horne 
Cc: Stefan Kristiansson 
Cc: Suzuki K Poulose 
Cc: Tony Luck 
Cc: Will Deacon 
Cc: Yoshinori Sato 
Cc: Anders Roxell 
Cc: Anshuman Khandual 
Cc: Naresh Kamboju 
Cc: Stephen Rothwell
Signed-off-by: Andrew Morton 
Signed-off-by: Stephen Rothwell 
---
 arch/powerpc/include/asm/book3s/32/pgtable.h  |  1 -
 arch/powerpc/include/asm/book3s/64/hash.h |  4 +-
 arch/powerpc/include/asm/book3s/64/pgalloc.h  |  4 +-
 arch/powerpc/include/asm/book3s/64/pgtable.h  | 60 ++-
 arch/powerpc/include/asm/book3s/64/radix.h|  6 +-
 arch/powerpc/include/asm/nohash/32/pgtable.h  |  1 -
 arch/powerpc/include/asm/nohash/64/pgalloc.h  |  2 +-
 .../include/asm/nohash/64/pgtable-4k.h| 32 +-
 arch/powerpc/include/asm/nohash/64/pgtable.h  |  6 +-
 arch/powerpc/include/asm/pgtable.h| 10 ++--
 arch/powerpc/kvm/book3s_64_mmu_radix.c| 30 ++
 arch/powerpc/lib/code-patching.c  |  7 ++-
 arch/powerpc/mm/book3s64/hash_pgtable.c   |  4 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c  | 26 +---
 arch/powerpc/mm/book3s64/subpage_prot.c   |  6 +-
 arch/powerpc/mm/hugetlbpage.c | 28 +
 arch/powerpc/mm/nohash/book3e_pgtable.c   | 15 ++---
 arch/powerpc/mm/pgtable.c | 30 

linux-next: manual merge of the akpm tree with the powerpc tree

2020-03-05 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the akpm tree got a conflict in:

  arch/powerpc/mm/pgtable_32.c

between commit:

  2efc7c085f05 ("powerpc/32: drop get_pteptr()")

from the powerpc tree and patch:

  "powerpc/32: drop get_pteptr()"

from the akpm tree.

I fixed it up (I just dropped the latter version) and can carry the fix
as necessary. This is now fixed as far as linux-next is concerned, but
any non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging.  You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.

-- 
Cheers,
Stephen Rothwell


pgpF8g8zJblRL.pgp
Description: OpenPGP digital signature


[powerpc:merge] BUILD SUCCESS ab326587bb5fb91cc97df9b9f48e9e1469f04621

2020-03-05 Thread kbuild test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
merge
branch HEAD: ab326587bb5fb91cc97df9b9f48e9e1469f04621  Automatic merge of 
branches 'master', 'next' and 'fixes' into merge

elapsed time: 1023m

configs tested: 175
configs skipped: 0

The following configs have been built successfully.
More configs may be tested in the coming days.

arm64allyesconfig
arm  allyesconfig
arm64allmodconfig
arm64 allnoconfig
arm   allnoconfig
arm at91_dt_defconfig
arm   efm32_defconfig
arm  exynos_defconfig
armmulti_v5_defconfig
armmulti_v7_defconfig
armshmobile_defconfig
arm   sunxi_defconfig
arm64   defconfig
sparcallyesconfig
mips  allnoconfig
s390 alldefconfig
ia64defconfig
i386  allnoconfig
i386 alldefconfig
i386 allyesconfig
i386defconfig
ia64 alldefconfig
ia64 allmodconfig
ia64  allnoconfig
ia64 allyesconfig
arm  allmodconfig
nios2 3c120_defconfig
nios2 10m50_defconfig
c6xevmc6678_defconfig
xtensa  iss_defconfig
c6x  allyesconfig
xtensa   common_defconfig
openrisc simple_smp_defconfig
openriscor1ksim_defconfig
alpha   defconfig
cskydefconfig
nds32 allnoconfig
nds32   defconfig
h8300 edosk2674_defconfig
h8300h8300h-sim_defconfig
h8300   h8s-sim_defconfig
m68k allmodconfig
m68k   m5475evb_defconfig
m68k  multi_defconfig
m68k   sun3_defconfig
arc  allyesconfig
arc defconfig
microblaze  mmu_defconfig
microblazenommu_defconfig
powerpc   allnoconfig
powerpc defconfig
powerpc   ppc64_defconfig
powerpc  rhel-kconfig
mips   32r2_defconfig
mips 64r6el_defconfig
mips allmodconfig
mips allyesconfig
mips  fuloong2e_defconfig
mips  malta_kvm_defconfig
pariscallnoconfig
parisc   allyesconfig
pariscgeneric-32bit_defconfig
pariscgeneric-64bit_defconfig
x86_64   randconfig-a001-20200305
x86_64   randconfig-a002-20200305
x86_64   randconfig-a003-20200305
i386 randconfig-a001-20200305
i386 randconfig-a002-20200305
i386 randconfig-a003-20200305
alpharandconfig-a001-20200305
m68k randconfig-a001-20200305
mips randconfig-a001-20200305
nds32randconfig-a001-20200305
parisc   randconfig-a001-20200305
riscvrandconfig-a001-20200305
c6x  randconfig-a001-20200305
h8300randconfig-a001-20200305
microblaze   randconfig-a001-20200305
nios2randconfig-a001-20200305
sparc64  randconfig-a001-20200305
csky randconfig-a001-20200305
openrisc randconfig-a001-20200305
s390 randconfig-a001-20200305
sh   randconfig-a001-20200305
xtensa   randconfig-a001-20200305
x86_64   randconfig-b002-20200306
x86_64   randconfig-b001-20200306
i386 randconfig-b001-20200306
i386 randconfig-b003-20200306
i386 randconfig-b002-20200306
x86_64   randconfig-b003-20200306
i386 randconfig-c002-20200306
i386 randconfig-c001-20200306
x86_64   randconfig-c003-20200306
x86_64   randconfig-c002-20200306
i386 randconfig-c003-20200306
x86_64   randconfig-c001-20200306
x86_64   randconfig-d001-20200305
x86_64   randconfig-d002-20200305
x86_64   randconfig-d003-20200305
i386

[powerpc:fixes-test] BUILD SUCCESS 59bee45b9712c759ea4d3dcc4eff1752f3a66558

2020-03-05 Thread kbuild test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
fixes-test
branch HEAD: 59bee45b9712c759ea4d3dcc4eff1752f3a66558  powerpc/mm: Fix missing 
KUAP disable in flush_coherent_icache()

elapsed time: 1371m

configs tested: 206
configs skipped: 207

The following configs have been built successfully.
More configs may be tested in the coming days.

arm  allmodconfig
arm   allnoconfig
arm  allyesconfig
arm64allmodconfig
arm64 allnoconfig
arm64allyesconfig
arm at91_dt_defconfig
arm   efm32_defconfig
arm  exynos_defconfig
armmulti_v5_defconfig
armmulti_v7_defconfig
armshmobile_defconfig
arm   sunxi_defconfig
arm64   defconfig
sparcallyesconfig
sh   allmodconfig
ia64defconfig
sh  sh7785lcr_32bit_defconfig
ia64  allnoconfig
h8300h8300h-sim_defconfig
pariscallnoconfig
s390defconfig
sparc64  allyesconfig
powerpc defconfig
i386  allnoconfig
i386 alldefconfig
i386 allyesconfig
i386defconfig
ia64 alldefconfig
ia64 allmodconfig
ia64 allyesconfig
c6x  allyesconfig
c6xevmc6678_defconfig
nios2 10m50_defconfig
nios2 3c120_defconfig
openriscor1ksim_defconfig
openrisc simple_smp_defconfig
xtensa   common_defconfig
xtensa  iss_defconfig
alpha   defconfig
cskydefconfig
nds32 allnoconfig
nds32   defconfig
h8300 edosk2674_defconfig
h8300   h8s-sim_defconfig
m68k allmodconfig
m68k   m5475evb_defconfig
m68k  multi_defconfig
m68k   sun3_defconfig
arc  allyesconfig
arc defconfig
microblaze  mmu_defconfig
microblazenommu_defconfig
powerpc   allnoconfig
powerpc   ppc64_defconfig
powerpc  rhel-kconfig
mips   32r2_defconfig
mips 64r6el_defconfig
mips allmodconfig
mips  allnoconfig
mips allyesconfig
mips  fuloong2e_defconfig
mips  malta_kvm_defconfig
parisc   allyesconfig
pariscgeneric-32bit_defconfig
pariscgeneric-64bit_defconfig
x86_64   randconfig-a001-20200306
x86_64   randconfig-a002-20200306
x86_64   randconfig-a003-20200306
i386 randconfig-a001-20200306
i386 randconfig-a002-20200306
i386 randconfig-a003-20200306
alpharandconfig-a001-20200306
m68k randconfig-a001-20200306
mips randconfig-a001-20200306
nds32randconfig-a001-20200306
parisc   randconfig-a001-20200306
riscvrandconfig-a001-20200306
alpharandconfig-a001-20200305
m68k randconfig-a001-20200305
mips randconfig-a001-20200305
nds32randconfig-a001-20200305
parisc   randconfig-a001-20200305
riscvrandconfig-a001-20200305
c6x  randconfig-a001-20200305
h8300randconfig-a001-20200305
microblaze   randconfig-a001-20200305
nios2randconfig-a001-20200305
sparc64  randconfig-a001-20200305
csky randconfig-a001-20200305
openrisc randconfig-a001-20200305
s390 randconfig-a001-20200305
sh   randconfig-a001-20200305
xtensa   randconfig-a001-20200305
x86_64   randconfig-b001-20200306
x86_64   randconfig-b002-20200306
x86_64   randconfig-b003-20200306
i386 randconfig-b001-20200306
i386 randconfig-b002-20200306
i386 randconfig-b003-20200306
x86_64

[powerpc:next-test] BUILD SUCCESS 5c61987c29055c619e116977c7d5db772d0f5239

2020-03-05 Thread kbuild test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
next-test
branch HEAD: 5c61987c29055c619e116977c7d5db772d0f5239  powerpc/lib: Fix 
emulate_step() std test

elapsed time: 1022m

configs tested: 176
configs skipped: 11

The following configs have been built successfully.
More configs may be tested in the coming days.

arm64allyesconfig
arm  allyesconfig
arm64 allnoconfig
arm   allnoconfig
arm  allmodconfig
arm64allmodconfig
arm at91_dt_defconfig
arm   efm32_defconfig
arm  exynos_defconfig
armmulti_v5_defconfig
armmulti_v7_defconfig
armshmobile_defconfig
arm   sunxi_defconfig
arm64   defconfig
sparcallyesconfig
sparc64   allnoconfig
ia64defconfig
powerpc defconfig
m68k   m5475evb_defconfig
parisc   allyesconfig
i386  allnoconfig
i386 alldefconfig
i386 allyesconfig
i386defconfig
ia64 alldefconfig
ia64 allmodconfig
ia64  allnoconfig
ia64 allyesconfig
c6x  allyesconfig
c6xevmc6678_defconfig
nios2 10m50_defconfig
nios2 3c120_defconfig
openriscor1ksim_defconfig
openrisc simple_smp_defconfig
xtensa   common_defconfig
xtensa  iss_defconfig
alpha   defconfig
cskydefconfig
nds32 allnoconfig
nds32   defconfig
h8300 edosk2674_defconfig
h8300h8300h-sim_defconfig
h8300   h8s-sim_defconfig
m68k allmodconfig
m68k  multi_defconfig
m68k   sun3_defconfig
arc  allyesconfig
arc defconfig
microblaze  mmu_defconfig
microblazenommu_defconfig
powerpc   allnoconfig
powerpc   ppc64_defconfig
powerpc  rhel-kconfig
mips   32r2_defconfig
mips 64r6el_defconfig
mips allmodconfig
mips  allnoconfig
mips allyesconfig
mips  fuloong2e_defconfig
mips  malta_kvm_defconfig
pariscallnoconfig
pariscgeneric-32bit_defconfig
pariscgeneric-64bit_defconfig
x86_64   randconfig-a001-20200305
x86_64   randconfig-a002-20200305
x86_64   randconfig-a003-20200305
i386 randconfig-a001-20200305
i386 randconfig-a002-20200305
i386 randconfig-a003-20200305
x86_64   randconfig-a001-20200306
x86_64   randconfig-a002-20200306
x86_64   randconfig-a003-20200306
i386 randconfig-a001-20200306
i386 randconfig-a002-20200306
i386 randconfig-a003-20200306
alpharandconfig-a001-20200305
m68k randconfig-a001-20200305
mips randconfig-a001-20200305
nds32randconfig-a001-20200305
parisc   randconfig-a001-20200305
riscvrandconfig-a001-20200305
c6x  randconfig-a001-20200305
h8300randconfig-a001-20200305
microblaze   randconfig-a001-20200305
nios2randconfig-a001-20200305
sparc64  randconfig-a001-20200305
csky randconfig-a001-20200305
openrisc randconfig-a001-20200305
s390 randconfig-a001-20200305
sh   randconfig-a001-20200305
xtensa   randconfig-a001-20200305
x86_64   randconfig-b001-20200305
x86_64   randconfig-b002-20200305
x86_64   randconfig-b003-20200305
i386 randconfig-b001-20200305
i386 randconfig-b002-20200305
i386 randconfig-b003-20200305
x86_64   randconfig-c001-20200306
x86_64   randconfig-c002-20200306
x86_64   randconfig-c003-20200306
i386 randconfig-c001-20200306

[powerpc:next] BUILD SUCCESS 247257b03b04398ca07da4bce3d17bee25d623cb

2020-03-05 Thread kbuild test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
next
branch HEAD: 247257b03b04398ca07da4bce3d17bee25d623cb  powerpc/numa: Remove 
late request for home node associativity

elapsed time: 1022m

configs tested: 230
configs skipped: 12

The following configs have been built successfully.
More configs may be tested in the coming days.

arm  allmodconfig
arm   allnoconfig
arm  allyesconfig
arm64allmodconfig
arm64 allnoconfig
arm64allyesconfig
arm at91_dt_defconfig
arm   efm32_defconfig
arm  exynos_defconfig
armmulti_v5_defconfig
armmulti_v7_defconfig
armshmobile_defconfig
arm   sunxi_defconfig
arm64   defconfig
sparcallyesconfig
microblazenommu_defconfig
mips  allnoconfig
ia64defconfig
h8300h8300h-sim_defconfig
pariscallnoconfig
s390defconfig
powerpc defconfig
m68k   m5475evb_defconfig
parisc   allyesconfig
i386  allnoconfig
i386 allyesconfig
i386 alldefconfig
i386defconfig
ia64 alldefconfig
ia64 allmodconfig
ia64  allnoconfig
ia64 allyesconfig
nios2 3c120_defconfig
nios2 10m50_defconfig
c6xevmc6678_defconfig
xtensa  iss_defconfig
c6x  allyesconfig
xtensa   common_defconfig
openrisc simple_smp_defconfig
openriscor1ksim_defconfig
alpha   defconfig
cskydefconfig
nds32 allnoconfig
nds32   defconfig
h8300 edosk2674_defconfig
h8300   h8s-sim_defconfig
m68k allmodconfig
m68k  multi_defconfig
m68k   sun3_defconfig
arc  allyesconfig
arc defconfig
microblaze  mmu_defconfig
powerpc   allnoconfig
powerpc   ppc64_defconfig
powerpc  rhel-kconfig
mips   32r2_defconfig
mips 64r6el_defconfig
mips allmodconfig
mips allyesconfig
mips  fuloong2e_defconfig
mips  malta_kvm_defconfig
pariscgeneric-32bit_defconfig
pariscgeneric-64bit_defconfig
x86_64   randconfig-a001-20200305
x86_64   randconfig-a002-20200305
x86_64   randconfig-a003-20200305
i386 randconfig-a001-20200305
i386 randconfig-a002-20200305
i386 randconfig-a003-20200305
x86_64   randconfig-a001-20200306
x86_64   randconfig-a002-20200306
x86_64   randconfig-a003-20200306
i386 randconfig-a001-20200306
i386 randconfig-a002-20200306
i386 randconfig-a003-20200306
alpharandconfig-a001-20200305
m68k randconfig-a001-20200305
mips randconfig-a001-20200305
nds32randconfig-a001-20200305
parisc   randconfig-a001-20200305
riscvrandconfig-a001-20200305
alpharandconfig-a001-20200306
m68k randconfig-a001-20200306
mips randconfig-a001-20200306
nds32randconfig-a001-20200306
parisc   randconfig-a001-20200306
riscvrandconfig-a001-20200306
c6x  randconfig-a001-20200305
h8300randconfig-a001-20200305
microblaze   randconfig-a001-20200305
nios2randconfig-a001-20200305
sparc64  randconfig-a001-20200305
csky randconfig-a001-20200305
openrisc randconfig-a001-20200305
s390 randconfig-a001-20200305
sh   randconfig-a001-20200305
xtensa   randconfig-a001-20200305
x86_64   randconfig-b001-20200305
x86_64   randconfig-b002-20200305
x86_64   randconfig-b003-20200305
i386 randconfig

Re: [PATCH v3 6/8] perf/tools: Enhance JSON/metric infrastructure to handle "?"

2020-03-05 Thread kajoljain



On 3/2/20 8:38 PM, Jiri Olsa wrote:
> On Sat, Feb 29, 2020 at 03:11:57PM +0530, Kajol Jain wrote:
> 
> SNIP
> 
>>  #define PVR_VER(pvr)(((pvr) >>  16) & 0x) /* Version field */
>>  #define PVR_REV(pvr)(((pvr) >>   0) & 0x) /* Revison field */
>>  
>> +#define SOCKETS_INFO_FILE_PATH "/devices/hv_24x7/interface/"
>> +
>>  int
>>  get_cpuid(char *buffer, size_t sz)
>>  {
>> @@ -44,3 +51,43 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
>>  
>>  return bufp;
>>  }
>> +
>> +int arch_get_runtimeparam(void)
>> +{
>> +int count = 0;
>> +DIR *dir;
>> +char path[PATH_MAX];
>> +const char *sysfs = sysfs__mountpoint();
>> +char filename[] = "sockets";
>> +FILE *file;
>> +char buf[16], *num;
>> +int data;
>> +
>> +if (!sysfs)
>> +goto out;
>> +
>> +snprintf(path, PATH_MAX,
>> + "%s" SOCKETS_INFO_FILE_PATH, sysfs);
>> +dir = opendir(path);
>> +
>> +if (!dir)
>> +goto out;
>> +
>> +strcat(path, filename);
>> +file = fopen(path, "r");
>> +
>> +if (!file)
>> +goto out;
>> +
>> +data = fread(buf, 1, sizeof(buf), file);
>> +
>> +if (data == 0)
>> +goto out;
>> +
>> +count = strtol(buf, , 10);
>> +out:
>> +if (!count)
>> +count = 1;
>> +
>> +return count;
> 
> we have sysfs__read_ull for this
> 

Hi Jiri,
Thanks for suggesting it. Will update.

Kajol

> jirka
> 


Re: [PATCH -next v2] powerpc/64s/pgtable: fix an undefined behaviour

2020-03-05 Thread Qian Cai



> On Mar 5, 2020, at 2:22 PM, Christophe Leroy  wrote:
> 
> 
> 
> Le 05/03/2020 à 15:32, Qian Cai a écrit :
>> Booting a power9 server with hash MMU could trigger an undefined
>> behaviour because pud_offset(p4d, 0) will do,
>> 0 >> (PAGE_SHIFT:16 + PTE_INDEX_SIZE:8 + H_PMD_INDEX_SIZE:10)
>> Fix it by converting pud_offset() and friends to static inline
>> functions.
> 
> I was suggesting to convert pud_index() to static inline, because that's 
> where the shift sits. Is it not possible ?
> 
> Here you seems to fix the problem for now, but if someone reuses pud_index() 
> in another macro one day, the same problem may happen again.
> 

Sounds reasonable. I send out a v3,

https://lore.kernel.org/lkml/20200306044852.3236-1-...@lca.pw/T/#u

> Christophe
> 
>>  UBSAN: shift-out-of-bounds in arch/powerpc/mm/ptdump/ptdump.c:282:15
>>  shift exponent 34 is too large for 32-bit type 'int'
>>  CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4-next-20200303+ #13
>>  Call Trace:
>>  dump_stack+0xf4/0x164 (unreliable)
>>  ubsan_epilogue+0x18/0x78
>>  __ubsan_handle_shift_out_of_bounds+0x160/0x21c
>>  walk_pagetables+0x2cc/0x700
>>  walk_pud at arch/powerpc/mm/ptdump/ptdump.c:282
>>  (inlined by) walk_pagetables at arch/powerpc/mm/ptdump/ptdump.c:311
>>  ptdump_check_wx+0x8c/0xf0
>>  mark_rodata_ro+0x48/0x80
>>  kernel_init+0x74/0x194
>>  ret_from_kernel_thread+0x5c/0x74
>> Suggested-by: Christophe Leroy 
>> Signed-off-by: Qian Cai 
>> ---
>>  arch/powerpc/include/asm/book3s/64/pgtable.h | 20 ++--
>>  1 file changed, 14 insertions(+), 6 deletions(-)
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
>> b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index fa60e8594b9f..4967bc9e25e2 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -1016,12 +1016,20 @@ static inline bool p4d_access_permitted(p4d_t p4d, 
>> bool write)
>>#define pgd_offset(mm, address)((mm)->pgd + pgd_index(address))
>>  -#define pud_offset(p4dp, addr) \
>> -(((pud_t *) p4d_page_vaddr(*(p4dp))) + pud_index(addr))
>> -#define pmd_offset(pudp,addr) \
>> -(((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr))
>> -#define pte_offset_kernel(dir,addr) \
>> -(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
>> +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
>> +{
>> +return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
>> +}
>> +
>> +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
>> +{
>> +return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
>> +}
>> +
>> +static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
>> +{
>> +return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
>> +}
>>#define pte_offset_map(dir,addr)  pte_offset_kernel((dir), (addr))
>>  



[PATCH v3] powerpc/64s/pgtable: fix an undefined behaviour

2020-03-05 Thread Qian Cai
Booting a power9 server with hash MMU could trigger an undefined
behaviour because pud_offset(p4d, 0) will do,

0 >> (PAGE_SHIFT:16 + PTE_INDEX_SIZE:8 + H_PMD_INDEX_SIZE:10)

Fix it by converting pud_index() and friends to static inline
functions.

UBSAN: shift-out-of-bounds in arch/powerpc/mm/ptdump/ptdump.c:282:15
shift exponent 34 is too large for 32-bit type 'int'
CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4-next-20200303+ #13
Call Trace:
dump_stack+0xf4/0x164 (unreliable)
ubsan_epilogue+0x18/0x78
__ubsan_handle_shift_out_of_bounds+0x160/0x21c
walk_pagetables+0x2cc/0x700
walk_pud at arch/powerpc/mm/ptdump/ptdump.c:282
(inlined by) walk_pagetables at arch/powerpc/mm/ptdump/ptdump.c:311
ptdump_check_wx+0x8c/0xf0
mark_rodata_ro+0x48/0x80
kernel_init+0x74/0x194
ret_from_kernel_thread+0x5c/0x74

Suggested-by: Christophe Leroy 
Signed-off-by: Qian Cai 
---

v3: convert pud_index() etc to static inline functions.
v2: convert pud_offset() etc to static inline functions.

 arch/powerpc/include/asm/book3s/64/pgtable.h | 23 
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 201a69e6a355..bd432c6706b9 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -998,10 +998,25 @@ extern struct page *pgd_page(pgd_t pgd);
 #define pud_page_vaddr(pud)__va(pud_val(pud) & ~PUD_MASKED_BITS)
 #define pgd_page_vaddr(pgd)__va(pgd_val(pgd) & ~PGD_MASKED_BITS)
 
-#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
-#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1))
-#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
-#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1))
+static inline unsigned long pgd_index(unsigned long address)
+{
+   return (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1);
+}
+
+static inline unsigned long pud_index(unsigned long address)
+{
+   return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+
+static inline unsigned long pmd_index(unsigned long address)
+{
+   return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
+}
+
+static inline unsigned long pte_index(unsigned long address)
+{
+   return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+}
 
 /*
  * Find an entry in a page-table-directory.  We combine the address region
-- 
2.21.0 (Apple Git-122.2)



Re: [PATCH v2] powerpc: setup_64: set up PACA earlier to avoid kcov problems

2020-03-05 Thread Andrew Donnellan

On 6/3/20 3:40 pm, Daniel Axtens wrote:

There's some special handling for CPU_FTR_HVMODE in
cpufeatures_setup_cpu() in kernel/dt_cpu_ftrs.c:

  /* Initialize the base environment -- clear FSCR/HFSCR.  */
  hv_mode = !!(mfmsr() & MSR_HV);
  if (hv_mode) {
  /* CPU_FTR_HVMODE is used early in PACA setup */
  cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
  mtspr(SPRN_HFSCR, 0);
  }

With this patch, the comment about PACA setup I assume is no longer
true. It looks like we still rely on hv_mode being set to deal with
discrepancies between the device tree and the MSR.


This code confuses me. IIUC it sets the CPU feature if we're in HV mode,
which will catch the case where the HV bit is set in the MSR but for
some reason it's not listed in the DT. With my patch, we'll directly
test the MSR so we don't need the cpu feature set for that.

However, the CPU feature is tested elsewhere, so I think the correct
behaviour is to keep the code but drop the comment. Having said that
bootstrapping is hard so lmk if I've misunderstood.


That was my thinking too.

--
Andrew Donnellan  OzLabs, ADL Canberra
a...@linux.ibm.com IBM Australia Limited



Re: [PATCH v2] powerpc: setup_64: set up PACA earlier to avoid kcov problems

2020-03-05 Thread Daniel Axtens
> There's some special handling for CPU_FTR_HVMODE in 
> cpufeatures_setup_cpu() in kernel/dt_cpu_ftrs.c:
>
>  /* Initialize the base environment -- clear FSCR/HFSCR.  */
>  hv_mode = !!(mfmsr() & MSR_HV);
>  if (hv_mode) {
>  /* CPU_FTR_HVMODE is used early in PACA setup */
>  cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
>  mtspr(SPRN_HFSCR, 0);
>  }
>
> With this patch, the comment about PACA setup I assume is no longer 
> true. It looks like we still rely on hv_mode being set to deal with 
> discrepancies between the device tree and the MSR.

This code confuses me. IIUC it sets the CPU feature if we're in HV mode,
which will catch the case where the HV bit is set in the MSR but for
some reason it's not listed in the DT. With my patch, we'll directly
test the MSR so we don't need the cpu feature set for that.

However, the CPU feature is tested elsewhere, so I think the correct
behaviour is to keep the code but drop the comment. Having said that
bootstrapping is hard so lmk if I've misunderstood.

Regards,
Daniel
>
> -- 
> Andrew Donnellan  OzLabs, ADL Canberra
> a...@linux.ibm.com IBM Australia Limited


Re: [PATCH] powerpc/64s/radix: Fix !SMP build

2020-03-05 Thread Anton Blanchard
Thanks Nick,

> Signed-off-by: Nicholas Piggin 

Tested-by: Anton Blanchard 

> ---
>  arch/powerpc/mm/book3s64/radix_pgtable.c | 1 +
>  arch/powerpc/mm/book3s64/radix_tlb.c | 7 ++-
>  2 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c
> b/arch/powerpc/mm/book3s64/radix_pgtable.c index
> dd1bea45325c..2a9a0cd79490 100644 ---
> a/arch/powerpc/mm/book3s64/radix_pgtable.c +++
> b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -26,6 +26,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c
> b/arch/powerpc/mm/book3s64/radix_tlb.c index
> 03f43c924e00..758ade2c2b6e 100644 ---
> a/arch/powerpc/mm/book3s64/radix_tlb.c +++
> b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -587,6 +587,11 @@ void
> radix__local_flush_all_mm(struct mm_struct *mm) preempt_enable();
>  }
>  EXPORT_SYMBOL(radix__local_flush_all_mm);
> +
> +static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
> +{
> + radix__local_flush_all_mm(mm);
> +}
>  #endif /* CONFIG_SMP */
>  
>  void radix__local_flush_tlb_page_psize(struct mm_struct *mm,
> unsigned long vmaddr, @@ -777,7 +782,7 @@ void
> radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long
> vmaddr) EXPORT_SYMBOL(radix__flush_tlb_page); 
>  #else /* CONFIG_SMP */
> -#define radix__flush_all_mm radix__local_flush_all_mm
> +static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
>  #endif /* CONFIG_SMP */
>  
>  static void do_tlbiel_kernel(void *info)



Re: [PATCH v2] powerpc/Makefile: Mark phony targets as PHONY

2020-03-05 Thread Masahiro Yamada
On Fri, Mar 6, 2020 at 9:27 AM Michael Ellerman
 wrote:
>
> On Wed, 2020-02-19 at 00:04:34 UTC, Michael Ellerman wrote:
> > Some of our phony targets are not marked as such. This can lead to
> > confusing errors, eg:
> >
> >   $ make clean
> >   $ touch install
> >   $ make install
> >   make: 'install' is up to date.
> >   $
> >
> > Fix it by adding them to the PHONY variable which is marked phony in
> > the top-level Makefile, or in scripts/Makefile.build for the boot
> > Makefile.
> >
> > Suggested-by: Masahiro Yamada 
> > Signed-off-by: Michael Ellerman 
>
> Applied to powerpc next.
>
> https://git.kernel.org/powerpc/c/d42c6d0f8d004c3661dde3c376ed637e9f292c22
>

You do not have to double your Signed-off-by.



-- 
Best Regards
Masahiro Yamada


Re: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs

2020-03-05 Thread Alastair D'Silva
On Wed, 2020-03-04 at 17:53 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
> > +   struct ioctl_ocxl_pmem_controller_dump_data __user
> > *uarg)
> > +{
> > +   struct ioctl_ocxl_pmem_controller_dump_data args;
> > +   u16 i;
> > +   u64 val;
> > +   int rc;
> > +
> > +   if (copy_from_user(, uarg, sizeof(args)))
> > +   return -EFAULT;
> > +
> > +   if (args.buf_size % 8)
> > +   return -EINVAL;
> > +
> > +   if (args.buf_size > ocxlpmem->admin_command.data_size)
> > +   return -EINVAL;
> > +
> > +   mutex_lock(>admin_command.lock);
> > +
> > +   rc = admin_command_request(ocxlpmem,
> > ADMIN_COMMAND_CONTROLLER_DUMP);
> > +   if (rc)
> > +   goto out;
> > +
> > +   val = ((u64)args.offset) << 32;
> > +   val |= args.buf_size;
> > +   rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.request_offset + 0x08,
> > + OCXL_LITTLE_ENDIAN, val);
> > +   if (rc)
> > +   goto out;
> > +
> > +   rc = admin_command_execute(ocxlpmem);
> > +   if (rc)
> > +   goto out;
> > +
> > +   rc = admin_command_complete_timeout(ocxlpmem,
> > +   ADMIN_COMMAND_CONTROLLER_DU
> > MP);
> > +   if (rc < 0) {
> > +   dev_warn(>dev, "Controller dump timed
> > out\n");
> > +   goto out;
> > +   }
> > +
> > +   rc = admin_response(ocxlpmem);
> > +   if (rc < 0)
> > +   goto out;
> > +   if (rc != STATUS_SUCCESS) {
> > +   warn_status(ocxlpmem,
> > +   "Unexpected status from retrieve error
> > log",
> 
> Controller dump
> 

Ok

> > +   rc);
> > +   goto out;
> > +   }
> > +
> > +   for (i = 0; i < args.buf_size; i += 8) {
> > +   u64 val;
> > +
> > +   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +ocxlpmem-
> > >admin_command.data_offset + i,
> > +OCXL_HOST_ENDIAN, );
> 
> Is a controller dump something where we want to do endian swapping?
> 

No, we just have raw binary data that we want to pass through.
OCXL_HOST_ENDIAN does no swapping.

> Any reason we're not doing the usual check of the data identifier, 
> additional data length etc?
> 

I'll add that

> > +   if (rc)
> > +   goto out;
> > +
> > +   if (copy_to_user([i], , sizeof(u64))) {
> > +   rc = -EFAULT;
> > +   goto out;
> > +   }
> > +   }
> > +
> > +   if (copy_to_user(uarg, , sizeof(args))) {
> > +   rc = -EFAULT;
> > +   goto out;
> > +   }
> > +
> > +   rc = admin_response_handled(ocxlpmem);
> > +   if (rc)
> > +   goto out;
> > +
> > +out:
> > +   mutex_unlock(>admin_command.lock);
> > +   return rc;
> > +}
> > +
> > +int request_controller_dump(struct ocxlpmem *ocxlpmem)
> > +{
> > +   int rc;
> > +   u64 busy = 1;
> > +
> > +   rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_CHIC,
> > +   OCXL_LITTLE_ENDIAN,
> > +   GLOBAL_MMIO_CHI_CDA);
> 
> This return code is ignored
> 
> > +
> > +
> > +   rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_HCI,
> > +   OCXL_LITTLE_ENDIAN,
> > +   GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
> > +   if (rc)
> > +   return rc;
> > +
> > +   while (busy) {
> > +   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +GLOBAL_MMIO_HCI,
> > +OCXL_LITTLE_ENDIAN,
> > );
> > +   if (rc)
> > +   return rc;
> > +
> > +   busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
> > +   cond_resched();
> > +   }
> > +
> > +   return 0;
> > +}
> 
> 
-- 
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819



[PATCH] powerpc/vdso: Fix multiple issues with sys_call_table

2020-03-05 Thread Anton Blanchard
The VDSO exports a bitmap of valid syscalls. vdso_setup_syscall_map()
sets this up, but there are both little and big endian bugs. The issue
is with:

   if (sys_call_table[i] != sys_ni_syscall)

On little endian, instead of comparing pointers to the two functions,
we compare the first two instructions of each function. If a function
happens to have the same first two instructions as sys_ni_syscall, then
we have a spurious match and mark the instruction as not implemented.
Fix this by removing the inline declarations.

On big endian we have a further issue where sys_ni_syscall is a function
descriptor and sys_call_table[] holds pointers to the instruction text.
Fix this by using dereference_kernel_function_descriptor().

Cc: sta...@vger.kernel.org
Signed-off-by: Anton Blanchard 

---
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index b9a108411c0d..d186b729026e 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -30,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #undef DEBUG
 
@@ -644,19 +646,16 @@ static __init int vdso_setup(void)
 static void __init vdso_setup_syscall_map(void)
 {
unsigned int i;
-   extern unsigned long *sys_call_table;
-#ifdef CONFIG_PPC64
-   extern unsigned long *compat_sys_call_table;
-#endif
-   extern unsigned long sys_ni_syscall;
+   unsigned long ni_syscall;
 
+   ni_syscall = (unsigned 
long)dereference_kernel_function_descriptor(sys_ni_syscall);
 
for (i = 0; i < NR_syscalls; i++) {
 #ifdef CONFIG_PPC64
-   if (sys_call_table[i] != sys_ni_syscall)
+   if (sys_call_table[i] != ni_syscall)
vdso_data->syscall_map_64[i >> 5] |=
0x8000UL >> (i & 0x1f);
-   if (compat_sys_call_table[i] != sys_ni_syscall)
+   if (compat_sys_call_table[i] != ni_syscall)
vdso_data->syscall_map_32[i >> 5] |=
0x8000UL >> (i & 0x1f);
 #else /* CONFIG_PPC64 */


Re: [PATCH] Add OPAL_GET_SYMBOL / OPAL_LOOKUP_SYMBOL

2020-03-05 Thread Oliver O'Halloran
On Fri, Feb 28, 2020 at 2:09 PM Nicholas Piggin  wrote:
>
> These calls can be used by Linux to annotate BUG addresses with symbols,
> look up symbol addresses in xmon, etc.
>
> This is preferable over having Linux parse the OPAL symbol map itself,
> because OPAL's parsing code already exists for its own symbol printing,
> and it can support other code regions than the skiboot symbols, e.g.,
> the wake-up code in the HOMER (where CPUs have been seen to get stuck).
>
> Signed-off-by: Nicholas Piggin 
> ---
>  core/opal.c |  2 +
>  core/utils.c| 92 +++--
>  doc/opal-api/opal-get-symbol-181.rst| 42 +++
>  doc/opal-api/opal-lookup-symbol-182.rst | 35 ++
>  include/opal-api.h  |  4 +-
>  5 files changed, 168 insertions(+), 7 deletions(-)
>  create mode 100644 doc/opal-api/opal-get-symbol-181.rst
>  create mode 100644 doc/opal-api/opal-lookup-symbol-182.rst
>
> diff --git a/core/opal.c b/core/opal.c
> index d6ff6027b..d9fc4fe05 100644
> --- a/core/opal.c
> +++ b/core/opal.c
> @@ -142,6 +142,8 @@ int64_t opal_entry_check(struct stack_frame *eframe)
> case OPAL_CEC_REBOOT:
> case OPAL_CEC_REBOOT2:
> case OPAL_SIGNAL_SYSTEM_RESET:
> +   case OPAL_GET_SYMBOL:
> +   case OPAL_LOOKUP_SYMBOL:

These names are still awful :|

> break;
> default:
> printf("CPU ATTEMPT TO RE-ENTER FIRMWARE! PIR=%04lx 
> cpu @%p -> pir=%04x token=%llu\n",
> diff --git a/core/utils.c b/core/utils.c
> index 8fd63fcb7..5f0d5130b 100644
> --- a/core/utils.c
> +++ b/core/utils.c
> @@ -48,40 +48,120 @@ char __attrconst tohex(uint8_t nibble)
> return __tohex[nibble];
>  }
>
> -static unsigned long get_symbol(unsigned long addr, char **sym, char 
> **sym_end)
> +static unsigned long get_symbol(unsigned long addr, char **sym, char 
> **sym_end, unsigned long *size)
>  {
> unsigned long prev = 0, next;
> char *psym = NULL, *p = __sym_map_start;
>
> *sym = *sym_end = NULL;
> -   while(p < __sym_map_end) {
> +   while (p < __sym_map_end) {
> next = strtoul(p, , 16) | SKIBOOT_BASE;
> if (next > addr && prev <= addr) {
> -   p = psym + 3;;
> +   p = psym + 3;
> if (p >= __sym_map_end)
> return 0;
> *sym = p;
> -   while(p < __sym_map_end && *p != 10)
> +   while (p < __sym_map_end && *p != '\n')
> p++;
> *sym_end = p;
> +   *size = next - prev;
> return prev;
> }
> prev = next;
> psym = p;
> -   while(p < __sym_map_end && *p != 10)
> +   while (p < __sym_map_end && *p != '\n')
> p++;
> p++;
> }
> return 0;
>  }
>
> +static unsigned long lookup_symbol(const char *name, unsigned long *size)
> +{
> +   size_t len = strlen(name);
> +   unsigned long addr = 0;
> +   char *sym;
> +   char *p = __sym_map_start;
> +
> +   while (p < __sym_map_end) {
> +   addr = strtoul(p, , 16) | SKIBOOT_BASE;
> +   p += 3;
> +   if (p >= __sym_map_end)
> +   return 0;
> +
> +   if (*(p + len) == '\n' && !strncmp(name, p, len)) {
> +   char *sym_end;
> +
> +   if (get_symbol(addr, , _end, size) == 0) {
> +   assert(!strcmp(name, "_end"));
> +   *size = 0;
> +   }
> +
> +   /*
> +* May be more than one symbol at this address but
> +* symbol length calculation should still work in
> +* that case.
> +*/
> +
> +   return addr;
> +   }
> +
> +   while(p < __sym_map_end && *p != '\n')
> +   p++;
> +   p++;
> +   }
> +   return 0;
> +}
> +
> +static int64_t opal_get_symbol(uint64_t addr, __be64 *symaddr, __be64 
> *symsize, char *namebuf, uint64_t buflen)
> +{
> +   unsigned long saddr;
> +   unsigned long ssize;
> +   char *sym, *sym_end;
> +   size_t l;
> +
> +   saddr = get_symbol(addr, , _end, );
> +   if (!saddr)
> +   return OPAL_RESOURCE;
> +
> +   if (buflen > sym_end - sym)
> +   l = sym_end - sym;
> +   else
> +   l = buflen - 1;
> +   memcpy(namebuf, sym, l);
> +   namebuf[l] = '\0';
> +
> +   *symaddr = cpu_to_be64(saddr);
> +   *symsize = cpu_to_be64(ssize);
> +
> +   return OPAL_SUCCESS;

Re: [PATCH] KVM: PPC: Book3S HV: Fix typos in comments

2020-03-05 Thread Gabriel Paubert
On Fri, Mar 06, 2020 at 11:26:36AM +1100, Gustavo Romero wrote:
> Fix typos found in comments about the parameter passed
> through r5 to kvmppc_{save,restore}_tm_hv functions.

Actually "iff" is a common shorthand in some fields and not necessarily
a spelling error:

https://en.wikipedia.org/wiki/If_and_only_if

Gabriel
> 
> Signed-off-by: Gustavo Romero 
> ---
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
> b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index dbc2fec..a55dbe8 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -3121,7 +3121,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
>   * Save transactional state and TM-related registers.
>   * Called with r3 pointing to the vcpu struct and r4 containing
>   * the guest MSR value.
> - * r5 is non-zero iff non-volatile register state needs to be maintained.
> + * r5 is non-zero if non-volatile register state needs to be maintained.
>   * If r5 == 0, this can modify all checkpointed registers, but
>   * restores r1 and r2 before exit.
>   */
> @@ -3194,7 +3194,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
>   * Restore transactional state and TM-related registers.
>   * Called with r3 pointing to the vcpu struct
>   * and r4 containing the guest MSR value.
> - * r5 is non-zero iff non-volatile register state needs to be maintained.
> + * r5 is non-zero if non-volatile register state needs to be maintained.
>   * This potentially modifies all checkpointed registers.
>   * It restores r1 and r2 from the PACA.
>   */
> -- 
> 1.8.3.1
> 


Re: [PATCH] KVM: PPC: Book3S HV: Fix typos in comments

2020-03-05 Thread Gustavo Romero

Hi Gabriel,

On 03/06/2020 01:06 PM, Gabriel Paubert wrote:

On Fri, Mar 06, 2020 at 11:26:36AM +1100, Gustavo Romero wrote:

Fix typos found in comments about the parameter passed
through r5 to kvmppc_{save,restore}_tm_hv functions.


Actually "iff" is a common shorthand in some fields and not necessarily
a spelling error:

https://en.wikipedia.org/wiki/If_and_only_if


I see. Thank you.


Best regards,
Gustavo



Re: [PATCH] crypto: Replace zero-length array with flexible-array member

2020-03-05 Thread Herbert Xu
On Mon, Feb 24, 2020 at 10:21:00AM -0600, Gustavo A. R. Silva wrote:
> The current codebase makes use of the zero-length array language
> extension to the C90 standard, but the preferred mechanism to declare
> variable-length types such as these ones is a flexible array member[1][2],
> introduced in C99:
> 
> struct foo {
> int stuff;
> struct boo array[];
> };
> 
> By making use of the mechanism above, we will get a compiler warning
> in case the flexible array does not occur last in the structure, which
> will help us prevent some kind of undefined behavior bugs from being
> inadvertently introduced[3] to the codebase from now on.
> 
> Also, notice that, dynamic memory allocations won't be affected by
> this change:
> 
> "Flexible array members have incomplete type, and so the sizeof operator
> may not be applied. As a quirk of the original implementation of
> zero-length arrays, sizeof evaluates to zero."[1]
> 
> This issue was found with the help of Coccinelle.
> 
> [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
> [2] https://github.com/KSPP/linux/issues/21
> [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")
> 
> Signed-off-by: Gustavo A. R. Silva 
> ---
>  drivers/crypto/caam/caamalg.c  | 2 +-
>  drivers/crypto/caam/caamalg_qi.c   | 4 ++--
>  drivers/crypto/caam/caamalg_qi2.h  | 6 +++---
>  drivers/crypto/caam/caamhash.c | 2 +-
>  drivers/crypto/cavium/nitrox/nitrox_main.c | 2 +-
>  drivers/crypto/chelsio/chcr_core.h | 2 +-
>  drivers/crypto/mediatek/mtk-sha.c  | 2 +-
>  drivers/crypto/nx/nx.h | 2 +-
>  drivers/crypto/omap-sham.c | 4 ++--
>  include/crypto/if_alg.h| 2 +-
>  10 files changed, 14 insertions(+), 14 deletions(-)

Patch applied.  Thanks.
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


[PATCH] KVM: PPC: Book3S HV: Fix typos in comments

2020-03-05 Thread Gustavo Romero
Fix typos found in comments about the parameter passed
through r5 to kvmppc_{save,restore}_tm_hv functions.

Signed-off-by: Gustavo Romero 
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index dbc2fec..a55dbe8 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -3121,7 +3121,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
  * Save transactional state and TM-related registers.
  * Called with r3 pointing to the vcpu struct and r4 containing
  * the guest MSR value.
- * r5 is non-zero iff non-volatile register state needs to be maintained.
+ * r5 is non-zero if non-volatile register state needs to be maintained.
  * If r5 == 0, this can modify all checkpointed registers, but
  * restores r1 and r2 before exit.
  */
@@ -3194,7 +3194,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
  * Restore transactional state and TM-related registers.
  * Called with r3 pointing to the vcpu struct
  * and r4 containing the guest MSR value.
- * r5 is non-zero iff non-volatile register state needs to be maintained.
+ * r5 is non-zero if non-volatile register state needs to be maintained.
  * This potentially modifies all checkpointed registers.
  * It restores r1 and r2 from the PACA.
  */
-- 
1.8.3.1



Re: [PATCH v3 1/5] powerpc: Rename current_stack_pointer() to current_stack_frame()

2020-03-05 Thread Michael Ellerman
On Thu, 2020-02-20 at 11:51:37 UTC, Michael Ellerman wrote:
> current_stack_pointer(), which was called __get_SP(), used to just
> return the value in r1.
> 
> But that caused problems in some cases, so it was turned into a
> function in commit bfe9a2cfe91a ("powerpc: Reimplement __get_SP() as a
> function not a define").
> 
> Because it's a function in a separate compilation unit to all its
> callers, it has the effect of causing a stack frame to be created, and
> then returns the address of that frame. This is good in some cases
> like those described in the above commit, but in other cases it's
> overkill, we just need to know what stack page we're on.
> 
> On some other arches current_stack_pointer is just a register global
> giving the stack pointer, and we'd like to do that too. So rename our
> current_stack_pointer() to current_stack_frame() to make that
> possible.
> 
> Signed-off-by: Michael Ellerman 

Series applied to powerpc next.

https://git.kernel.org/powerpc/c/3d13e839e801e081bdece0127c2affa33d0f77cf

cheers


Re: [PATCH v2] powerpc/Makefile: Mark phony targets as PHONY

2020-03-05 Thread Michael Ellerman
On Wed, 2020-02-19 at 00:04:34 UTC, Michael Ellerman wrote:
> Some of our phony targets are not marked as such. This can lead to
> confusing errors, eg:
> 
>   $ make clean
>   $ touch install
>   $ make install
>   make: 'install' is up to date.
>   $
> 
> Fix it by adding them to the PHONY variable which is marked phony in
> the top-level Makefile, or in scripts/Makefile.build for the boot
> Makefile.
> 
> Suggested-by: Masahiro Yamada 
> Signed-off-by: Michael Ellerman 

Applied to powerpc next.

https://git.kernel.org/powerpc/c/d42c6d0f8d004c3661dde3c376ed637e9f292c22

cheers


Re: [PATCH] powerpc/mm: Don't kmap_atomic() in pte_offset_map() on PPC32

2020-03-05 Thread Michael Ellerman
On Mon, 2020-02-17 at 09:41:35 UTC, Christophe Leroy wrote:
> On PPC32, pte_offset_map() does a kmap_atomic() in order to support
> page tables allocated in high memory, just like ARM and x86/32.
> 
> But since at least 2008 and commit 8054a3428fbe ("powerpc: Remove dead
> CONFIG_HIGHPTE"), page tables are never allocated in high memory.
> 
> When the page is in low mem, kmap_atomic() just returns the page
> address but still disable preemption and pagefault. And it is
> not an inlined function, so we suffer function call for no reason.
> 
> Make pte_offset_map() the same as pte_offset_kernel() and make
> pte_unmap() void, in the same way as PPC64 which doesn't have HIGHMEM.
> 
> Signed-off-by: Christophe Leroy 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/6453f9ed9d4e4b4cdf201bf34bf460c436bf50ea

cheers


Re: [PATCH 1/2] powerpc/kernel/sysfs: Refactor current sysfs.c

2020-03-05 Thread Michael Ellerman
On Fri, 2020-02-14 at 08:06:05 UTC, Kajol Jain wrote:
> From: Madhavan Srinivasan 
> 
> An attempt to refactor the current sysfs.c file.
> To start with a big chuck of macro #defines and dscr
> functions are moved to start of the file. Secondly,
> HAS_ #define macros are cleanup based on CONFIG_ options
> 
> Finally new HAS_ macro added:
> 1. HAS_PPC_PA6T (for PA6T) to separate out non-PMU SPRs.
> 2. HAS_PPC_PMC56 to separate out PMC SPR's from HAS_PPC_PMC_CLASSIC
>which come under CONFIG_PPC64.
> 
> Signed-off-by: Madhavan Srinivasan 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/fcdb524d440d6326c286006e16f252b40ba4fd6a

cheers


Re: [PATCH 1/2] powerpc/powernv: Treat an empty reboot string as default

2020-03-05 Thread Michael Ellerman
On Mon, 2020-02-17 at 02:48:32 UTC, Oliver O'Halloran wrote:
> Treat an empty reboot cmd string the same as a NULL string. This squashes a
> spurious unsupported reboot message that sometimes gets out when using
> xmon.
> 
> Signed-off-by: Oliver O'Halloran 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/16985f2d25095899685952296f128a71f0aff05c

cheers


Re: [PATCH 1/6] powerpc: kernel: no need to check return value of debugfs_create functions

2020-03-05 Thread Michael Ellerman
On Sun, 2020-02-09 at 10:58:56 UTC, Greg Kroah-Hartman wrote:
> When calling debugfs functions, there is no need to ever check the
> return value.  The function can work or not, but the code logic should
> never do something different based on this.
> 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: Hari Bathini 
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Greg Kroah-Hartman 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/860286cf33963fa8a0fe542995bdec2df5cb3abb

cheers


Re: [PATCH 1/2] powerpc/83xx: Fix some typo in some warning message

2020-03-05 Thread Michael Ellerman
On Sat, 2020-02-08 at 14:09:04 UTC, Christophe JAILLET wrote:
> "couldn;t" should be "couldn't".
> 
> Signed-off-by: Christophe JAILLET 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/365ad0b60d944050d61252e123e6a8b2c3950398

cheers


Re: [PATCH] powerpc/32s: Slenderize _tlbia() for powerpc 603/603e

2020-03-05 Thread Michael Ellerman
On Mon, 2020-02-03 at 16:47:37 UTC, Christophe Leroy wrote:
> _tlbia() is a function used only on 603/603e core, ie on CPUs which
> don't have a hash table.
> 
> _tlbia() uses the tlbia macro which implements a loop of 1024 tlbie.
> 
> On the 603/603e core, flushing the entire TLB requires no more than
> 32 tlbie.
> 
> Replace tlbia by a loop of 32 tlbie.
> 
> Signed-off-by: Christophe Leroy 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/e1347a020b81fe47c80cd277bfaa61295a9482a4

cheers


Re: [PATCH v2] powerpc/32s: Don't flush all TLBs when flushing one page

2020-03-05 Thread Michael Ellerman
On Sat, 2020-02-01 at 08:04:31 UTC, Christophe Leroy wrote:
> When flushing any memory range, the flushing function
> flushes all TLBs.
> 
> When (start) and (end - 1) are in the same memory page,
> flush that page instead.
> 
> Signed-off-by: Christophe Leroy 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/030e347430957f6f7f29db9099368f8b86c0bf76

cheers


Re: [PATCH v2] powerpc: drmem: avoid NULL pointer dereference when drmem is unavailable

2020-03-05 Thread Michael Ellerman
On Fri, 2020-01-31 at 13:28:29 UTC, Michal Suchanek wrote:
> 
> From: Libor Pechacek 
> 
> In guests without hotplugagble memory drmem structure is only zero
> initialized. Trying to manipulate DLPAR parameters results in a crash.
> 
> $ echo "memory add count 1" > /sys/kernel/dlpar
> Oops: Kernel access of bad area, sig: 11 [#1]
> LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
> Modules linked in: af_packet(E) rfkill(E) nvram(E) vmx_crypto(E)
> gf128mul(E) e1000(E) virtio_balloon(E) rtc_generic(E) crct10dif_vpmsum(E)
> btrfs(E) blake2b_generic(E) libcrc32c(E) xor(E) raid6_pq(E) virtio_rng(E)
> virtio_blk(E) ohci_pci(E) ehci_pci(E) ohci_hcd(E) ehci_hcd(E)
> crc32c_vpmsum(E) usbcore(E) virtio_pci(E) virtio_ring(E) virtio(E) sg(E)
> dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E)
> scsi_mod(E)
> CPU: 1 PID: 4114 Comm: bash Kdump: loaded Tainted: GE 
> 5.5.0-rc6-2-default #1
> NIP:  c00ff294 LR: c00ff248 CTR: 
> REGS: c000fb9d3880 TRAP: 0300   Tainted: GE  
> (5.5.0-rc6-2-default)
> MSR:  80009033   CR: 28242428  XER: 2000
> CFAR: c09a6c10 DAR: 0010 DSISR: 4000 IRQMASK: 0
> GPR00: c00ff248 c000fb9d3b10 c1682e00 0033
> GPR04: c000ff30bf90 c000ff394800 5110 ffe8
> GPR08:   fe1c 
> GPR12: 2200 c0003fffee00  00011cbc37c0
> GPR16: 00011cb27ed0  00011cb6dd10 
> GPR20: 00011cb7db28 01003ce035f0 00011cbc7828 00011cbc6c70
> GPR24: 01003cf01210  c000ffade4e0 c2d7216b
> GPR28: 0001 c2d78560  c15458d0
> NIP [c00ff294] dlpar_memory+0x6e4/0xd00
> LR [c00ff248] dlpar_memory+0x698/0xd00
> Call Trace:
> [c000fb9d3b10] [c00ff248] dlpar_memory+0x698/0xd00 (unreliable)
> [c000fb9d3ba0] [c00f5990] handle_dlpar_errorlog+0xc0/0x190
> [c000fb9d3c10] [c00f5c58] dlpar_store+0x198/0x4a0
> [c000fb9d3cd0] [c0c4cb00] kobj_attr_store+0x30/0x50
> [c000fb9d3cf0] [c05a37b4] sysfs_kf_write+0x64/0x90
> [c000fb9d3d10] [c05a2c90] kernfs_fop_write+0x1b0/0x290
> [c000fb9d3d60] [c04a2bec] __vfs_write+0x3c/0x70
> [c000fb9d3d80] [c04a6560] vfs_write+0xd0/0x260
> [c000fb9d3dd0] [c04a69ac] ksys_write+0xdc/0x130
> [c000fb9d3e20] [c000b478] system_call+0x5c/0x68
> Instruction dump:
> ebc9 1ce70018 38e7ffe8 7cfe3a14 7fbe3840 419dff14 fb610068 7fc9f378
> 3900 480c 6000 4195fef4 <81490010> 39290018 38c80001 7ea93840
> ---[ end trace cc2dd8152608c295 ]---
> 
> Taking closer look at the code, I can see that for_each_drmem_lmb is a
> macro expanding into `for (lmb = _info->lmbs[0]; lmb <=
> _info->lmbs[drmem_info->n_lmbs - 1]; lmb++)`. When drmem_info->lmbs
> is NULL, the loop would iterate through the whole address range if it
> weren't stopped by the NULL pointer dereference on the next line.
> 
> This patch aligns for_each_drmem_lmb and for_each_drmem_lmb_in_range macro
> behavior with the common C semantics, where the end marker does not belong
> to the scanned range, and alters get_lmb_range() semantics. As a side
> effect, the wraparound observed in the crash is prevented.
> 
> Fixes: 6c6ea53725b3 ("powerpc/mm: Separate ibm, dynamic-memory data from DT 
> format")
> Cc: Michal Suchanek 
> Cc: sta...@vger.kernel.org
> Signed-off-by: Libor Pechacek 
> Signed-off-by: Michal Suchanek 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/a83836dbc53e96f13fec248ecc201d18e1e3111d

cheers


Re: [PATCH v3 1/2] powerpc/32: Warn and return ENOSYS on syscalls from kernel

2020-03-05 Thread Michael Ellerman
On Fri, 2020-01-31 at 11:34:54 UTC, Christophe Leroy wrote:
> Since commit b86fb88855ea ("powerpc/32: implement fast entry for
> syscalls on non BOOKE") and commit 1a4b739bbb4f ("powerpc/32:
> implement fast entry for syscalls on BOOKE"), syscalls from
> kernel are unexpected and can have catastrophic consequences
> as it will destroy the kernel stack.
> 
> Test MSR_PR on syscall entry. In case syscall is from kernel,
> emit a warning and return ENOSYS error.
> 
> Signed-off-by: Christophe Leroy 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/9e27086292aa880921a0f2b8501e5189d5efcf03

cheers


Re: [PATCH 1/2] pseries/vio: Remove stray #ifdef CONFIG_PPC_PSERIES

2020-03-05 Thread Michael Ellerman
On Thu, 2020-01-30 at 06:31:52 UTC, Oliver O'Halloran wrote:
> vio.c requires CONFIG_IBMVIO which in turn depends on PPC_PSERIES.
> In other words, this ifdef is pointless. At a guess it's a carry-over
> from pre-history.
> 
> Signed-off-by: Oliver O'Halloran 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/f98df5ed0a670f2c4c1a50d7901acbb862a247c7

cheers


Re: [PATCH] powerpc/papr_scm: Mark papr_scm_ndctl() as static

2020-03-05 Thread Michael Ellerman
On Thu, 2020-01-30 at 04:02:06 UTC, Vaibhav Jain wrote:
> Function papr_scm_ndctl() is neither exported from the module nor
> called directly from outside 'papr.c' hence should be marked 'static'.
> 
> Signed-off-by: Vaibhav Jain 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/72c4ebbac476b8375e69fd09390e6b64c2891716

cheers


Re: [PATCH] powerpc/process: Remove unneccessary #ifdef CONFIG_PPC64 in copy_thread_tls()

2020-03-05 Thread Michael Ellerman
On Wed, 2020-01-29 at 19:50:07 UTC, Christophe Leroy wrote:
> is_32bit_task() exists on both PPC64 and PPC32, no need of an ifdefery.
> 
> Signed-off-by: Christophe Leroy 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/ba32f4b02105e57627912b42e141d65d90074c64

cheers


Re: [PATCH v6 1/5] powerpc/vphn: Check for error from hcall_vphn

2020-03-05 Thread Michael Ellerman
On Wed, 2020-01-29 at 13:52:57 UTC, Srikar Dronamraju wrote:
> There is no value in unpacking associativity, if
> H_HOME_NODE_ASSOCIATIVITY hcall has returned an error.
> 
> Signed-off-by: Srikar Dronamraju 
> Cc: Michael Ellerman 
> Cc: Nicholas Piggin 
> Cc: Nathan Lynch 
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: Abdul Haleem 
> Cc: Satheesh Rajendran 
> Reported-by: Abdul Haleem 
> Reviewed-by: Nathan Lynch 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/e7214ae9d85aa325c9f3cb34bf4fad7f112861d7

cheers


Re: [PATCH v4] powerpc/smp: Use nid as fallback for package_id

2020-03-05 Thread Michael Ellerman
On Wed, 2020-01-29 at 13:51:21 UTC, Srikar Dronamraju wrote:
> Package_id is to find out all cores that are part of the same chip. On
> PowerNV machines, package_id defaults to chip_id. However ibm,chip_id
> property is not present in device-tree of PowerVM Lpars. Hence lscpu
> output shows one core per socket and multiple cores.
> 
> To overcome this, use nid as the package_id on PowerVM Lpars.
> 
> Before the patch.
> ---
> Architecture:ppc64le
> Byte Order:  Little Endian
> CPU(s):  128
> On-line CPU(s) list: 0-127
> Thread(s) per core:  8
> Core(s) per socket:  1   <--
> Socket(s):   16  <--
> NUMA node(s):2
> Model:   2.2 (pvr 004e 0202)
> Model name:  POWER9 (architected), altivec supported
> Hypervisor vendor:   pHyp
> Virtualization type: para
> L1d cache:   32K
> L1i cache:   32K
> L2 cache:512K
> L3 cache:10240K
> NUMA node0 CPU(s):   0-63
> NUMA node1 CPU(s):   64-127
>  #
>  # cat /sys/devices/system/cpu/cpu0/topology/physical_package_id
>  -1
>  #
> 
> After the patch
> ---
> Architecture:ppc64le
> Byte Order:  Little Endian
> CPU(s):  128
> On-line CPU(s) list: 0-127
> Thread(s) per core:  8<--
> Core(s) per socket:  8<--
> Socket(s):   2
> NUMA node(s):2
> Model:   2.2 (pvr 004e 0202)
> Model name:  POWER9 (architected), altivec supported
> Hypervisor vendor:   pHyp
> Virtualization type: para
> L1d cache:   32K
> L1i cache:   32K
> L2 cache:512K
> L3 cache:10240K
> NUMA node0 CPU(s):   0-63
> NUMA node1 CPU(s):   64-127
>  #
>  # cat /sys/devices/system/cpu/cpu0/topology/physical_package_id
>  0
>  #
> Now lscpu output is more in line with the system configuration.
> 
> Signed-off-by: Srikar Dronamraju 
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: Michael Ellerman 
> Cc: Vasant Hegde 
> Cc: Vaidyanathan Srinivasan 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/a05f0e5be4e81e4977d3f92aaf7688ee0cb7d5db

cheers


Re: [PATCH v3 1/2] powerpc/32: refactor pmd_offset(pud_offset(pgd_offset...

2020-03-05 Thread Michael Ellerman
On Thu, 2020-01-09 at 08:25:25 UTC, Christophe Leroy wrote:
> At several places pmd pointer is retrieved through the same action:
> 
>   pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr);
> 
> or
> 
>   pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr);
> 
> Refactor this by implementing two helpers pmd_ptr() and pmd_ptr_k()
> 
> This will help when adding the p4d level.
> 
> Signed-off-by: Christophe Leroy 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/0b1c524caaae2428b20e714297243e5551251eb5

cheers


Re: [PATCH] powerpc/32: don't restore r0, r6-r8 on exception entry path after trace_hardirqs_off()

2020-03-05 Thread Michael Ellerman
On Tue, 2020-01-07 at 09:16:40 UTC, Christophe Leroy wrote:
> Since commit b86fb88855ea ("powerpc/32: implement fast entry for
> syscalls on non BOOKE") and commit 1a4b739bbb4f ("powerpc/32:
> implement fast entry for syscalls on BOOKE"), syscalls don't
> use the exception entry path anymore. It is therefore pointless
> to restore r0 and r6-r8 after calling trace_hardirqs_off().
> 
> In the meantime, drop the '2:' label which is unused and misleading.
> 
> Signed-off-by: Christophe Leroy 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/05642cf7289c5562e5939d2ee8a0529d310010b8

cheers


Re: [PATCH kernel v3] powerpc/book3s64: Fix error handling in mm_iommu_do_alloc()

2020-03-05 Thread Michael Ellerman
On Mon, 2019-12-23 at 06:03:51 UTC, Alexey Kardashevskiy wrote:
> The last jump to free_exit in mm_iommu_do_alloc() happens after page
> pointers in struct mm_iommu_table_group_mem_t were already converted to
> physical addresses. Thus calling put_page() on these physical addresses
> will likely crash.
> 
> This moves the loop which calculates the pageshift and converts page
> struct pointers to physical addresses later after the point when
> we cannot fail; thus eliminating the need to convert pointers back.
> 
> Fixes: eb9d7a62c386 ("powerpc/mm_iommu: Fix potential deadlock")
> Reported-by: Jan Kara 
> Signed-off-by: Alexey Kardashevskiy 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/c4b78169e3667413184c9a20e11b5832288a109f

cheers


Re: [PATCH v6 1/6] Documentation/ABI: add ABI documentation for /sys/kernel/fadump_*

2020-03-05 Thread Michael Ellerman
On Wed, 2019-12-11 at 16:09:05 UTC, Sourabh Jain wrote:
> Add missing ABI documentation for existing FADump sysfs files.
> 
> Signed-off-by: Sourabh Jain 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/f52153ab383f04a45c38d8a7f55a4249477b20df

cheers


Re: [RFC PATCH v2 1/1] powerpc/kernel: Enables memory hot-remove after reboot on pseries guests

2020-03-05 Thread Leonardo Bras
On Thu, 2020-03-05 at 20:32 -0300, Leonardo Bras wrote:
> I will send the matching qemu change as a reply later.

http://patchwork.ozlabs.org/patch/1249931/


signature.asc
Description: This is a digitally signed message part


Re: [PATCH v2] powerpc/mm: Fix missing KUAP disable in flush_coherent_icache()

2020-03-05 Thread Michael Ellerman
On Tue, 2020-03-03 at 23:57:08 UTC, Michael Ellerman wrote:
> We received a report of strange kernel faults which turned out to be
> due to a missing KUAP disable in flush_coherent_icache() called
> from flush_icache_range().
> 
> The fault looks like:
> 
>   Kernel attempted to access user page (7fffc30d9c00) - exploit attempt? 
> (uid: 1009)
>   BUG: Unable to handle kernel data access on read at 0x7fffc30d9c00
>   Faulting instruction address: 0xc007232c
>   Oops: Kernel access of bad area, sig: 11 [#1]
>   LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV
>   CPU: 35 PID: 5886 Comm: sigtramp Not tainted 
> 5.6.0-rc2-gcc-8.2.0-3-gfc37a1632d40 #79
>   NIP:  c007232c LR: c003b7fc CTR: 
>   REGS: c01e11093940 TRAP: 0300   Not tainted  
> (5.6.0-rc2-gcc-8.2.0-3-gfc37a1632d40)
>   MSR:  9280b033   CR: 28000884  
> XER: 
>   CFAR: c00722fc DAR: 7fffc30d9c00 DSISR: 0800 IRQMASK: 0
>   GPR00: c003b7fc c01e11093bd0 c23ac200 7fffc30d9c00
>   GPR04: 7fffc30d9c18  c01e11093bd4 
>   GPR08:  0001  c01e1104ed80
>   GPR12:  c01fff6ab380 c16be2d0 4000
>   GPR16: c000 bfff  
>   GPR20: 7fffc30d9c00 7fffc30d8f58 7fffc30d9c18 7fffc30d9c20
>   GPR24: 7fffc30d9c18  c01e11093d90 c01e1104ed80
>   GPR28: c01e11093e90  c23d9d18 7fffc30d9c00
>   NIP flush_icache_range+0x5c/0x80
>   LR  handle_rt_signal64+0x95c/0xc2c
>   Call Trace:
> 0xc01e11093d90 (unreliable)
> handle_rt_signal64+0x93c/0xc2c
> do_notify_resume+0x310/0x430
> ret_from_except_lite+0x70/0x74
>   Instruction dump:
>   409e002c 7c0802a6 3c62ff31 3863f6a0 f8010080 48195fed 6000 48fe4c8d
>   6000 e8010080 7c0803a6 7c0004ac <7c00ffac> 7c0004ac 4c00012c 38210070
> 
> This path through handle_rt_signal64() to setup_trampoline() and
> flush_icache_range() is only triggered by 64-bit processes that have
> unmapped their VDSO, which is rare.
> 
> flush_icache_range() takes a range of addresses to flush. In
> flush_coherent_icache() we implement an optimisation for CPUs where we
> know we don't actually have to flush the whole range, we just need to
> do a single icbi.
> 
> However we still execute the icbi on the user address of the start of
> the range we're flushing. On CPUs that also implement KUAP (Power9)
> that leads to the spurious fault above.
> 
> We should be able to pass any address, including a kernel address, to
> the icbi on these CPUs, which would avoid any interaction with KUAP.
> But I don't want to make that change in a bug fix, just in case it
> surfaces some strange behaviour on some CPU.
> 
> So for now just disable KUAP around the icbi. Note the icbi is treated
> as a load, so we allow read access, not write as you'd expect.
> 
> Fixes: 890274c2dc4c ("powerpc/64s: Implement KUAP for Radix MMU")
> Cc: sta...@vger.kernel.org # v5.2+
> Signed-off-by: Michael Ellerman 

Applied to powerpc fixes.

https://git.kernel.org/powerpc/c/59bee45b9712c759ea4d3dcc4eff1752f3a66558

cheers


Re: [PATCH, v2] powerpc: fix hardware PMU exception bug on PowerVM compatibility mode systems

2020-03-05 Thread Michael Ellerman
On Thu, 2020-02-27 at 13:47:15 UTC, "Desnes A. Nunes do Rosario" wrote:
> PowerVM systems running compatibility mode on a few Power8 revisions are
> still vulnerable to the hardware defect that loses PMU exceptions arriving
> prior to a context switch.
> 
> The software fix for this issue is enabled through the CPU_FTR_PMAO_BUG
> cpu_feature bit, nevertheless this bit also needs to be set for PowerVM
> compatibility mode systems.
> 
> Fixes: 68f2f0d431d9ea4 ("powerpc: Add a cpu feature CPU_FTR_PMAO_BUG")
> Signed-off-by: Desnes A. Nunes do Rosario 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/fc37a1632d40c80c067eb1bc235139f5867a2667

cheers


[Bug 206695] kmemleak reports leaks in drivers/macintosh/windfarm

2020-03-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=206695

--- Comment #5 from m...@ellerman.id.au ---
bugzilla-dae...@bugzilla.kernel.org writes:
> https://bugzilla.kernel.org/show_bug.cgi?id=206695
>
> --- Comment #4 from Erhard F. (erhar...@mailbox.org) ---
> (In reply to mpe from comment #3)
>> Can you try this patch?
>
> Applied your patch on top of 5.6-rc4 +
> https://patchwork.ozlabs.org/patch/1248350/ and let the G5 do a few hours
> compiling.
>
> Only getting those nice memleaks from bug #206203 but no windfarm_pm112
> memleak
> any longer. So your patch works well it seems. Thanks!

Thanks.

Can you try this one instead, it changes the order of operations to make
the code flow a bit nicer.

cheers

diff --git a/drivers/macintosh/windfarm_pm112.c
b/drivers/macintosh/windfarm_pm112.c
index 4150301a89a5..e8377ce0a95a 100644
--- a/drivers/macintosh/windfarm_pm112.c
+++ b/drivers/macintosh/windfarm_pm112.c
@@ -132,14 +132,6 @@ static int create_cpu_loop(int cpu)
s32 tmax;
int fmin;

-   /* Get PID params from the appropriate SAT */
-   hdr = smu_sat_get_sdb_partition(chip, 0xC8 + core, NULL);
-   if (hdr == NULL) {
-   printk(KERN_WARNING"windfarm: can't get CPU PID fan config\n");
-   return -EINVAL;
-   }
-   piddata = (struct smu_sdbp_cpupiddata *)[1];
-
/* Get FVT params to get Tmax; if not found, assume default */
hdr = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL);
if (hdr) {
@@ -152,6 +144,16 @@ static int create_cpu_loop(int cpu)
if (tmax < cpu_all_tmax)
cpu_all_tmax = tmax;

+   kfree(hdr);
+
+   /* Get PID params from the appropriate SAT */
+   hdr = smu_sat_get_sdb_partition(chip, 0xC8 + core, NULL);
+   if (hdr == NULL) {
+   printk(KERN_WARNING"windfarm: can't get CPU PID fan config\n");
+   return -EINVAL;
+   }
+   piddata = (struct smu_sdbp_cpupiddata *)[1];
+
/*
 * Darwin has a minimum fan speed of 1000 rpm for the 4-way and
 * 515 for the 2-way.  That appears to be overkill, so for now,
@@ -174,6 +176,9 @@ static int create_cpu_loop(int cpu)
pid.min = fmin;

wf_cpu_pid_init(_pid[cpu], );
+
+   kfree(hdr);
+
return 0;
 }

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

Re: [Bug 206695] kmemleak reports leaks in drivers/macintosh/windfarm

2020-03-05 Thread Michael Ellerman
bugzilla-dae...@bugzilla.kernel.org writes:
> https://bugzilla.kernel.org/show_bug.cgi?id=206695
>
> --- Comment #4 from Erhard F. (erhar...@mailbox.org) ---
> (In reply to mpe from comment #3)
>> Can you try this patch?
>
> Applied your patch on top of 5.6-rc4 +
> https://patchwork.ozlabs.org/patch/1248350/ and let the G5 do a few hours
> compiling.
>
> Only getting those nice memleaks from bug #206203 but no windfarm_pm112 
> memleak
> any longer. So your patch works well it seems. Thanks!

Thanks.

Can you try this one instead, it changes the order of operations to make
the code flow a bit nicer.

cheers

diff --git a/drivers/macintosh/windfarm_pm112.c 
b/drivers/macintosh/windfarm_pm112.c
index 4150301a89a5..e8377ce0a95a 100644
--- a/drivers/macintosh/windfarm_pm112.c
+++ b/drivers/macintosh/windfarm_pm112.c
@@ -132,14 +132,6 @@ static int create_cpu_loop(int cpu)
s32 tmax;
int fmin;
 
-   /* Get PID params from the appropriate SAT */
-   hdr = smu_sat_get_sdb_partition(chip, 0xC8 + core, NULL);
-   if (hdr == NULL) {
-   printk(KERN_WARNING"windfarm: can't get CPU PID fan config\n");
-   return -EINVAL;
-   }
-   piddata = (struct smu_sdbp_cpupiddata *)[1];
-
/* Get FVT params to get Tmax; if not found, assume default */
hdr = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL);
if (hdr) {
@@ -152,6 +144,16 @@ static int create_cpu_loop(int cpu)
if (tmax < cpu_all_tmax)
cpu_all_tmax = tmax;
 
+   kfree(hdr);
+
+   /* Get PID params from the appropriate SAT */
+   hdr = smu_sat_get_sdb_partition(chip, 0xC8 + core, NULL);
+   if (hdr == NULL) {
+   printk(KERN_WARNING"windfarm: can't get CPU PID fan config\n");
+   return -EINVAL;
+   }
+   piddata = (struct smu_sdbp_cpupiddata *)[1];
+
/*
 * Darwin has a minimum fan speed of 1000 rpm for the 4-way and
 * 515 for the 2-way.  That appears to be overkill, so for now,
@@ -174,6 +176,9 @@ static int create_cpu_loop(int cpu)
pid.min = fmin;
 
wf_cpu_pid_init(_pid[cpu], );
+
+   kfree(hdr);
+
return 0;
 }
 



[PATCH V15] mm/debug: Add tests validating architecture page table helpers

2020-03-05 Thread Anshuman Khandual
This adds tests which will validate architecture page table helpers and
other accessors in their compliance with expected generic MM semantics.
This will help various architectures in validating changes to existing
page table helpers or addition of new ones.

This test covers basic page table entry transformations including but not
limited to old, young, dirty, clean, write, write protect etc at various
level along with populating intermediate entries with next page table page
and validating them.

Test page table pages are allocated from system memory with required size
and alignments. The mapped pfns at page table levels are derived from a
real pfn representing a valid kernel text symbol. This test gets called
inside kernel_init() right after async_synchronize_full().

This test gets built and run when CONFIG_DEBUG_VM_PGTABLE is selected. Any
architecture, which is willing to subscribe this test will need to select
ARCH_HAS_DEBUG_VM_PGTABLE. For now this is limited to arc, arm64, x86, s390
and ppc32 platforms where the test is known to build and run successfully.
Going forward, other architectures too can subscribe the test after fixing
any build or runtime problems with their page table helpers. Meanwhile for
better platform coverage, the test can also be enabled with CONFIG_EXPERT
even without ARCH_HAS_DEBUG_VM_PGTABLE.

Folks interested in making sure that a given platform's page table helpers
conform to expected generic MM semantics should enable the above config
which will just trigger this test during boot. Any non conformity here will
be reported as an warning which would need to be fixed. This test will help
catch any changes to the agreed upon semantics expected from generic MM and
enable platforms to accommodate it thereafter.

Cc: Andrew Morton 
Cc: Mike Rapoport 
Cc: Vineet Gupta 
Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Heiko Carstens 
Cc: Vasily Gorbik 
Cc: Christian Borntraeger 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Borislav Petkov 
Cc: "H. Peter Anvin" 
Cc: Kirill A. Shutemov 
Cc: Paul Walmsley 
Cc: Palmer Dabbelt 
Cc: linux-snps-...@lists.infradead.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s...@vger.kernel.org
Cc: linux-ri...@lists.infradead.org
Cc: x...@kernel.org
Cc: linux-a...@vger.kernel.org
Cc: linux-ker...@vger.kernel.org

Suggested-by: Catalin Marinas 
Reviewed-by: Ingo Molnar 
Tested-by: Gerald Schaefer  # s390
Tested-by: Christophe Leroy# ppc32
Signed-off-by: Qian Cai 
Signed-off-by: Andrew Morton 
Signed-off-by: Christophe Leroy 
Signed-off-by: Anshuman Khandual 
---
This adds a test validation for architecture exported page table helpers.
Patch adds basic transformation tests at various levels of the page table.

This test was originally suggested by Catalin during arm64 THP migration
RFC discussion earlier. Going forward it can include more specific tests
with respect to various generic MM functions like THP, HugeTLB etc and
platform specific tests.

https://lore.kernel.org/linux-mm/20190628102003.ga56...@arrakis.emea.arm.com/

Needs to be applied on linux V5.6-rc4

Changes in V15:

- Replaced __pa() with __pa_symbol() 
(https://patchwork.kernel.org/patch/11407715/) 
- Replaced pte_alloc_map() with pte_alloc_map_lock() per Qian
- Replaced pte_unmap() with pte_unmap_unlock() per Qian
- Added address to pte_clear_tests() and passed it down till pte_clear() per 
Qian

Changes in V14: 
(https://patchwork.kernel.org/project/linux-mm/list/?series=242305)

- Disabled DEBUG_VM_PGTABLE for IA64 and ARM (32 Bit) per Andrew and Christophe
- Updated DEBUG_VM_PGTABLE documentation wrt EXPERT and disabled platforms
- Updated RANDOM_[OR|NZ]VALUE open encodings with GENMASK() per Catalin
- Updated s390 constraint bits from 12 to 4 (S390_MASK_BITS) per Gerald
- Updated in-code documentation for RANDOM_ORVALUE per Gerald
- Updated pxx_basic_tests() to use invert functions first per Catalin
- Dropped ARCH_HAS_4LEVEL_HACK check from pud_basic_tests()
- Replaced __ARCH_HAS_[4|5]LEVEL_HACK with __PAGETABLE_[PUD|P4D]_FOLDED per 
Catalin
- Trimmed the CC list on the commit message per Catalin

Changes in V13: 
(https://patchwork.kernel.org/project/linux-mm/list/?series=237125)

- Subscribed s390 platform and updated debug-vm-pgtable/arch-support.txt per 
Gerald
- Dropped keyword 'extern' from debug_vm_pgtable() declaration per Christophe
- Moved debug_vm_pgtable() declarations to  per Christophe
- Moved debug_vm_pgtable() call site into kernel_init() per Christophe
- Changed CONFIG_DEBUG_VM_PGTABLE rules per Christophe
- Updated commit to include new supported platforms and changed config selection

Changes in V12: 
(https://patchwork.kernel.org/project/linux-mm/list/?series=233905)

- Replaced __mmdrop() with mmdrop()
- Enable ARCH_HAS_DEBUG_VM_PGTABLE on X86 for non CONFIG_X86_PAE platforms as 
the
  test procedure interfere with pre-allocated PMDs attached to the PGD 

Re: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs

2020-03-05 Thread Alastair D'Silva
On Tue, 2020-03-03 at 19:04 +0100, Frederic Barrat wrote:
> 
> Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> > From: Alastair D'Silva 
> > 
> > This patch adds IOCTLs to allow userspace to request & fetch dumps
> > of the internal controller state.
> > 
> > This is useful during debugging or when a fatal error on the
> > controller
> > has occurred.
> > 
> > Signed-off-by: Alastair D'Silva 
> > ---
> >   arch/powerpc/platforms/powernv/pmem/ocxl.c | 132
> > +
> >   include/uapi/nvdimm/ocxl-pmem.h|  15 +++
> >   2 files changed, 147 insertions(+)
> > 
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index 2b64504f9129..2cabafe1fc58 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -640,6 +640,124 @@ static int ioctl_error_log(struct ocxlpmem
> > *ocxlpmem,
> > return 0;
> >   }
> >   
> > +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
> > +   struct ioctl_ocxl_pmem_controller_dump_data __user
> > *uarg)
> > +{
> > +   struct ioctl_ocxl_pmem_controller_dump_data args;
> > +   u16 i;
> > +   u64 val;
> > +   int rc;
> > +
> > +   if (copy_from_user(, uarg, sizeof(args)))
> > +   return -EFAULT;
> > +
> > +   if (args.buf_size % 8)
> > +   return -EINVAL;
> > +
> > +   if (args.buf_size > ocxlpmem->admin_command.data_size)
> > +   return -EINVAL;
> > +
> > +   mutex_lock(>admin_command.lock);
> > +
> > +   rc = admin_command_request(ocxlpmem,
> > ADMIN_COMMAND_CONTROLLER_DUMP);
> > +   if (rc)
> > +   goto out;
> > +
> > +   val = ((u64)args.offset) << 32;
> > +   val |= args.buf_size;
> > +   rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > + ocxlpmem-
> > >admin_command.request_offset + 0x08,
> > + OCXL_LITTLE_ENDIAN, val);
> > +   if (rc)
> > +   goto out;
> > +
> > +   rc = admin_command_execute(ocxlpmem);
> > +   if (rc)
> > +   goto out;
> > +
> > +   rc = admin_command_complete_timeout(ocxlpmem,
> > +   ADMIN_COMMAND_CONTROLLER_DU
> > MP);
> > +   if (rc < 0) {
> > +   dev_warn(>dev, "Controller dump timed
> > out\n");
> > +   goto out;
> > +   }
> > +
> > +   rc = admin_response(ocxlpmem);
> > +   if (rc < 0)
> > +   goto out;
> > +   if (rc != STATUS_SUCCESS) {
> > +   warn_status(ocxlpmem,
> > +   "Unexpected status from retrieve error
> > log",
> > +   rc);
> > +   goto out;
> > +   }
> 
> 
> It would help if there was a comment indicating how the 3 ioctls are 
> used. My understanding is that the userland is:
> - requesting the controller to prepare a state dump
> - then one or more ioctls to fetch the data. The number of calls 
> required to get the full state really depends on the size of the
> buffer 
> passed by user
> - a last ioctl to tell the controller that we're done, presumably to
> let 
> it free some resources.
> 

Ok, will add it to the blurb.
> 
> > +
> > +   for (i = 0; i < args.buf_size; i += 8) {
> > +   u64 val;
> > +
> > +   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +ocxlpmem-
> > >admin_command.data_offset + i,
> > +OCXL_HOST_ENDIAN, );
> > +   if (rc)
> > +   goto out;
> > +
> > +   if (copy_to_user([i], , sizeof(u64))) {
> > +   rc = -EFAULT;
> > +   goto out;
> > +   }
> > +   }
> > +
> > +   if (copy_to_user(uarg, , sizeof(args))) {
> > +   rc = -EFAULT;
> > +   goto out;
> > +   }
> > +
> > +   rc = admin_response_handled(ocxlpmem);
> > +   if (rc)
> > +   goto out;
> > +
> > +out:
> > +   mutex_unlock(>admin_command.lock);
> > +   return rc;
> > +}
> > +
> > +int request_controller_dump(struct ocxlpmem *ocxlpmem)
> > +{
> > +   int rc;
> > +   u64 busy = 1;
> > +
> > +   rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_CHIC,
> > +   OCXL_LITTLE_ENDIAN,
> > +   GLOBAL_MMIO_CHI_CDA);
> > +
> 
> rc is not checked here.

Whoops

> 
> 
> > +
> > +   rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_HCI,
> > +   OCXL_LITTLE_ENDIAN,
> > +   GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
> > +   if (rc)
> > +   return rc;
> > +
> > +   while (busy) {
> > +   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +GLOBAL_MMIO_HCI,
> > +OCXL_LITTLE_ENDIAN,
> > );
> > +   if (rc)
> > +   return rc;
> > +
> > +   busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
> 
> Setting 'busy' doesn't hurt, but it's not really 

[RFC PATCH v2 1/1] powerpc/kernel: Enables memory hot-remove after reboot on pseries guests

2020-03-05 Thread Leonardo Bras
While providing guests, it's desirable to resize it's memory on demand.

By now, it's possible to do so by creating a guest with a small base
memory, hot-plugging all the rest, and using 'movable_node' kernel
command-line parameter, which puts all hot-plugged memory in
ZONE_MOVABLE, allowing it to be removed whenever needed.

But there is an issue regarding guest reboot:
If memory is hot-plugged, and then the guest is rebooted, all hot-plugged
memory goes to ZONE_NORMAL, which offers no guaranteed hot-removal.
It usually prevents this memory to be hot-removed from the guest.

It's possible to use device-tree information to fix that behavior, as
it stores flags for LMB ranges on ibm,dynamic-memory-vN.
It involves marking each memblock with the correct flags as hotpluggable
memory, which mm/memblock.c puts in ZONE_MOVABLE during boot if
'movable_node' is passed.

For carrying such information, the new flag DRCONF_MEM_HOTPLUGGED is
proposed, which should be true if memory was hot-plugged on guest, and
false if it's base memory.

During boot, guest kernel reads the device-tree, early_init_drmem_lmb()
is called for every added LMBs. Here, checking for this new flag and
marking memblocks as hotplugable memory is enough to get the desirable
behavior.

This should cause no change if 'movable_node' parameter is not passed
in kernel command-line.

Signed-off-by: Leonardo Bras 

---
The new flag was already proposed on Power Architecture documentation,
and it's waiting for approval.

I would like to get your comments on this change, but it's still not
ready for being merged.

I will send the matching qemu change as a reply later.

Changes since v1:
- Adds new flag, so PowerVM is compatible with the change.
- Fixes mistakes in code
---
 arch/powerpc/include/asm/drmem.h | 1 +
 arch/powerpc/kernel/prom.c   | 9 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 3d76e1c388c2..92083b4565f6 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -65,6 +65,7 @@ struct of_drconf_cell_v2 {
 #define DRCONF_MEM_ASSIGNED0x0008
 #define DRCONF_MEM_AI_INVALID  0x0040
 #define DRCONF_MEM_RESERVED0x0080
+#define DRCONF_MEM_HOTPLUGGED  0x0100
 
 static inline u32 drmem_lmb_size(void)
 {
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 6620f37abe73..9c5cb2e8049e 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -515,9 +515,14 @@ static void __init early_init_drmem_lmb(struct drmem_lmb 
*lmb,
size = 0x8000ul - base;
}
 
+   if (!validate_mem_limit(base, ))
+   continue;
+
DBG("Adding: %llx -> %llx\n", base, size);
-   if (validate_mem_limit(base, ))
-   memblock_add(base, size);
+   memblock_add(base, size);
+
+   if (lmb->flags & DRCONF_MEM_HOTPLUGGED)
+   memblock_mark_hotplug(base, size);
} while (--rngs);
 }
 #endif /* CONFIG_PPC_PSERIES */
-- 
2.24.1



linux-next: manual merge of the net-next tree with the powerpc tree

2020-03-05 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the net-next tree got a conflict in:

  fs/sysfs/group.c

between commit:

  9255782f7061 ("sysfs: Wrap __compat_only_sysfs_link_entry_to_kobj function to 
change the symlink name")

from the powerpc tree and commit:

  303a42769c4c ("sysfs: add sysfs_group{s}_change_owner()")

from the net-next tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc fs/sysfs/group.c
index 1e2a096057bc,5afe0e7ff7cd..
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@@ -478,4 -457,118 +479,118 @@@ int compat_only_sysfs_link_entry_to_kob
kernfs_put(target);
return PTR_ERR_OR_ZERO(link);
  }
 -EXPORT_SYMBOL_GPL(__compat_only_sysfs_link_entry_to_kobj);
 +EXPORT_SYMBOL_GPL(compat_only_sysfs_link_entry_to_kobj);
+ 
+ static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn,
+ const struct attribute_group *grp,
+ struct iattr *newattrs)
+ {
+   struct kernfs_node *kn;
+   int error;
+ 
+   if (grp->attrs) {
+   struct attribute *const *attr;
+ 
+   for (attr = grp->attrs; *attr; attr++) {
+   kn = kernfs_find_and_get(grp_kn, (*attr)->name);
+   if (!kn)
+   return -ENOENT;
+ 
+   error = kernfs_setattr(kn, newattrs);
+   kernfs_put(kn);
+   if (error)
+   return error;
+   }
+   }
+ 
+   if (grp->bin_attrs) {
+   struct bin_attribute *const *bin_attr;
+ 
+   for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) {
+   kn = kernfs_find_and_get(grp_kn, 
(*bin_attr)->attr.name);
+   if (!kn)
+   return -ENOENT;
+ 
+   error = kernfs_setattr(kn, newattrs);
+   kernfs_put(kn);
+   if (error)
+   return error;
+   }
+   }
+ 
+   return 0;
+ }
+ 
+ /**
+  * sysfs_group_change_owner - change owner of an attribute group.
+  * @kobj: The kobject containing the group.
+  * @grp:  The attribute group.
+  * @kuid: new owner's kuid
+  * @kgid: new owner's kgid
+  *
+  * Returns 0 on success or error code on failure.
+  */
+ int sysfs_group_change_owner(struct kobject *kobj,
+const struct attribute_group *grp, kuid_t kuid,
+kgid_t kgid)
+ {
+   struct kernfs_node *grp_kn;
+   int error;
+   struct iattr newattrs = {
+   .ia_valid = ATTR_UID | ATTR_GID,
+   .ia_uid = kuid,
+   .ia_gid = kgid,
+   };
+ 
+   if (!kobj->state_in_sysfs)
+   return -EINVAL;
+ 
+   if (grp->name) {
+   grp_kn = kernfs_find_and_get(kobj->sd, grp->name);
+   } else {
+   kernfs_get(kobj->sd);
+   grp_kn = kobj->sd;
+   }
+   if (!grp_kn)
+   return -ENOENT;
+ 
+   error = kernfs_setattr(grp_kn, );
+   if (!error)
+   error = sysfs_group_attrs_change_owner(grp_kn, grp, );
+ 
+   kernfs_put(grp_kn);
+ 
+   return error;
+ }
+ EXPORT_SYMBOL_GPL(sysfs_group_change_owner);
+ 
+ /**
+  * sysfs_groups_change_owner - change owner of a set of attribute groups.
+  * @kobj: The kobject containing the groups.
+  * @groups:   The attribute groups.
+  * @kuid: new owner's kuid
+  * @kgid: new owner's kgid
+  *
+  * Returns 0 on success or error code on failure.
+  */
+ int sysfs_groups_change_owner(struct kobject *kobj,
+ const struct attribute_group **groups,
+ kuid_t kuid, kgid_t kgid)
+ {
+   int error = 0, i;
+ 
+   if (!kobj->state_in_sysfs)
+   return -EINVAL;
+ 
+   if (!groups)
+   return 0;
+ 
+   for (i = 0; groups[i]; i++) {
+   error = sysfs_group_change_owner(kobj, groups[i], kuid, kgid);
+   if (error)
+   break;
+   }
+ 
+   return error;
+ }
+ EXPORT_SYMBOL_GPL(sysfs_groups_change_owner);


pgpGyk52FBWn2.pgp
Description: OpenPGP digital signature


Re: [PATCH] powerpc/32: Stop printing the virtual memory layout

2020-03-05 Thread Tycho Andersen
On Thu, Mar 05, 2020 at 10:08:37AM -0500, Arvind Sankar wrote:
> For security, don't display the kernel's virtual memory layout.
> 
> Kees Cook points out:
> "These have been entirely removed on other architectures, so let's
> just do the same for ia32 and remove it unconditionally."
> 
> 071929dbdd86 ("arm64: Stop printing the virtual memory layout")
> 1c31d4e96b8c ("ARM: 8820/1: mm: Stop printing the virtual memory layout")
> 3182f798 ("m68k/mm: Stop printing the virtual memory layout")
> fd8d0ca25631 ("parisc: Hide virtual kernel memory layout")
> adb1fe9ae2ee ("mm/page_alloc: Remove kernel address exposure in 
> free_reserved_area()")
> 
> Signed-off-by: Arvind Sankar 

Acked-by: Tycho Andersen 


[PATCH] powerpc/32: Stop printing the virtual memory layout

2020-03-05 Thread Arvind Sankar
For security, don't display the kernel's virtual memory layout.

Kees Cook points out:
"These have been entirely removed on other architectures, so let's
just do the same for ia32 and remove it unconditionally."

071929dbdd86 ("arm64: Stop printing the virtual memory layout")
1c31d4e96b8c ("ARM: 8820/1: mm: Stop printing the virtual memory layout")
3182f798 ("m68k/mm: Stop printing the virtual memory layout")
fd8d0ca25631 ("parisc: Hide virtual kernel memory layout")
adb1fe9ae2ee ("mm/page_alloc: Remove kernel address exposure in 
free_reserved_area()")

Signed-off-by: Arvind Sankar 
---
 arch/powerpc/mm/mem.c | 17 -
 1 file changed, 17 deletions(-)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index ef7b1119b2e2..df2c143b6bf7 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -331,23 +331,6 @@ void __init mem_init(void)
 #endif
 
mem_init_print_info(NULL);
-#ifdef CONFIG_PPC32
-   pr_info("Kernel virtual memory layout:\n");
-#ifdef CONFIG_KASAN
-   pr_info("  * 0x%08lx..0x%08lx  : kasan shadow mem\n",
-   KASAN_SHADOW_START, KASAN_SHADOW_END);
-#endif
-   pr_info("  * 0x%08lx..0x%08lx  : fixmap\n", FIXADDR_START, FIXADDR_TOP);
-#ifdef CONFIG_HIGHMEM
-   pr_info("  * 0x%08lx..0x%08lx  : highmem PTEs\n",
-   PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP));
-#endif /* CONFIG_HIGHMEM */
-   if (ioremap_bot != IOREMAP_TOP)
-   pr_info("  * 0x%08lx..0x%08lx  : early ioremap\n",
-   ioremap_bot, IOREMAP_TOP);
-   pr_info("  * 0x%08lx..0x%08lx  : vmalloc & ioremap\n",
-   VMALLOC_START, VMALLOC_END);
-#endif /* CONFIG_PPC32 */
 }
 
 void free_initmem(void)
-- 
2.24.1



Re: [PATCH] dt: Remove booting-without-of.txt

2020-03-05 Thread Frank Rowand
On 3/4/20 12:45 PM, Rob Herring wrote:
> Well, not quite removed yet... Mauro is looking at moving this to ReST,
> but I think it would be better to trim or remove it.
> 
> boot-without-of.txt is an ancient document that first outlined
> Flattened DeviceTree. The DT world has evolved a lot in the 15 years
> since and boot-without-of.txt is pretty stale. The name of the document
> itself is confusing if you don't understand the evolution from real
> 'OpenFirmware'. Much of what booting-without-of.txt contains is now in
> the DT specification (which evolved out of the ePAPR).
> 
> This is a first pass of removing everything that has a DT spec
> equivalent or is no longer standard practice (e.g. soc for SoC
> nodes) in order to see what's left. This is what I have:
> 
> TODO
> - Move boot interface details to arch specific docs
> - Document 'serial-number' property in DT spec
> - Document the 'hotpluggable' memory property in DT spec
> - Document the 'sleep' property (PPC only)
> - Document the 'dma-coherent' property in DT spec
> - Need the history of node names and 'name' property?
> - Need how addresses work?

Looks good.  Since this is a first pass, I'm expecting that polishing
(things like updating section numbers) would happen in subsequent
patches after more of the content changes are done, so no need
to do so in this patch.

Reviewed-by: Frank Rowand 

-Frank

> 
> Cc: Frank Rowand 
> Cc: Mauro Carvalho Chehab 
> Cc: Benjamin Herrenschmidt 
> Cc: Geert Uytterhoeven 
> Cc: Michael Ellerman 
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Rob Herring 
> ---
>  .../devicetree/booting-without-of.txt | 1027 +
>  1 file changed, 1 insertion(+), 1026 deletions(-)
> 
> diff --git a/Documentation/devicetree/booting-without-of.txt 
> b/Documentation/devicetree/booting-without-of.txt
> index 4660ccee35a3..97beee828ba4 100644
> --- a/Documentation/devicetree/booting-without-of.txt
> +++ b/Documentation/devicetree/booting-without-of.txt
> @@ -19,44 +19,17 @@ Table of Contents
>  5) Entry point for arch/sh
>  
>II - The DT block format
> -1) Header
>  2) Device tree generalities
> -3) Device tree "structure" block
> -4) Device tree "strings" block
>  
>III - Required content of the device tree
>  1) Note about cells and address representation
> -2) Note about "compatible" properties
> -3) Note about "name" properties
> -4) Note about node and property names and character set
>  5) Required nodes and properties
>a) The root node
> -  b) The /cpus node
> -  c) The /cpus/* nodes
> -  d) the /memory node(s)
> -  e) The /chosen node
> -  f) the /soc node
> -
> -  IV - "dtc", the device tree compiler
> -
> -  V - Recommendations for a bootloader
> -
> -  VI - System-on-a-chip devices and nodes
> -1) Defining child nodes of an SOC
> -2) Representing devices without a current OF specification
> -
> -  VII - Specifying interrupt information for devices
> -1) interrupts property
> -2) interrupt-parent property
> -3) OpenPIC Interrupt Controllers
> -4) ISA Interrupt Controllers
>  
>VIII - Specifying device power management information (sleep property)
>  
>IX - Specifying dma bus information
>  
> -  Appendix A - Sample SOC node for MPC8540
> -
>  
>  Revision Information
>  
> @@ -105,19 +78,6 @@ Revision Information
>- Added chapter VI
>  
>  
> - ToDo:
> - - Add some definitions of interrupt tree (simple/complex)
> - - Add some definitions for PCI host bridges
> - - Add some common address format examples
> - - Add definitions for standard properties and "compatible"
> -   names for cells that are not already defined by the existing
> -   OF spec.
> - - Compare FSL SOC use of PCI to standard and make sure no new
> -   node definition required.
> - - Add more information about node definitions for SOC devices
> -   that currently have no standard, like the FSL CPM.
> -
> -
>  I - Introduction
>  
>  
> @@ -333,196 +293,17 @@ II - The DT block format
>  
>  
>  
> -This chapter defines the actual format of the flattened device-tree
> -passed to the kernel. The actual content of it and kernel requirements
> -are described later. You can find example of code manipulating that
> -format in various places, including arch/powerpc/kernel/prom_init.c
> -which will generate a flattened device-tree from the Open Firmware
> -representation, or the fs2dt utility which is part of the kexec tools
> -which will generate one from a filesystem representation. It is
> -expected that a bootloader like uboot provides a bit more support,
> -that will be discussed later as well.
> -
>  Note: The block has to be in main memory. It has to be accessible in
>  both real mode and virtual mode with no mapping other than main
>  memory. If you are writing a simple flash bootloader, it should 

[Bug 206695] kmemleak reports leaks in drivers/macintosh/windfarm

2020-03-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=206695

--- Comment #4 from Erhard F. (erhar...@mailbox.org) ---
(In reply to mpe from comment #3)
> Can you try this patch?

Applied your patch on top of 5.6-rc4 +
https://patchwork.ozlabs.org/patch/1248350/ and let the G5 do a few hours
compiling.

Only getting those nice memleaks from bug #206203 but no windfarm_pm112 memleak
any longer. So your patch works well it seems. Thanks!

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

Re: [RFC 00/11] perf: Enhancing perf to export processor hazard information

2020-03-05 Thread Kim Phillips
On 3/4/20 10:46 PM, Ravi Bangoria wrote:
> Hi Kim,

Hi Ravi,

> On 3/3/20 3:55 AM, Kim Phillips wrote:
>> On 3/2/20 2:21 PM, Stephane Eranian wrote:
>>> On Mon, Mar 2, 2020 at 2:13 AM Peter Zijlstra  wrote:

 On Mon, Mar 02, 2020 at 10:53:44AM +0530, Ravi Bangoria wrote:
> Modern processors export such hazard data in Performance
> Monitoring Unit (PMU) registers. Ex, 'Sampled Instruction Event
> Register' on IBM PowerPC[1][2] and 'Instruction-Based Sampling' on
> AMD[3] provides similar information.
>
> Implementation detail:
>
> A new sample_type called PERF_SAMPLE_PIPELINE_HAZ is introduced.
> If it's set, kernel converts arch specific hazard information
> into generic format:
>
>    struct perf_pipeline_haz_data {
>   /* Instruction/Opcode type: Load, Store, Branch  */
>   __u8    itype;
>   /* Instruction Cache source */
>   __u8    icache;
>   /* Instruction suffered hazard in pipeline stage */
>   __u8    hazard_stage;
>   /* Hazard reason */
>   __u8    hazard_reason;
>   /* Instruction suffered stall in pipeline stage */
>   __u8    stall_stage;
>   /* Stall reason */
>   __u8    stall_reason;
>   __u16   pad;
>    };

 Kim, does this format indeed work for AMD IBS?
>>
>> It's not really 1:1, we don't have these separations of stages
>> and reasons, for example: we have missed in L2 cache, for example.
>> So IBS output is flatter, with more cycle latency figures than
>> IBM's AFAICT.
> 
> AMD IBS captures pipeline latency data incase Fetch sampling like the
> Fetch latency, tag to retire latency, completion to retire latency and
> so on. Yes, Ops sampling do provide more data on load/store centric
> information. But it also captures more detailed data for Branch instructions.
> And we also looked at ARM SPE, which also captures more details pipeline
> data and latency information.
> 
>>> Personally, I don't like the term hazard. This is too IBM Power
>>> specific. We need to find a better term, maybe stall or penalty.
>>
>> Right, IBS doesn't have a filter to only count stalled or otherwise
>> bad events.  IBS' PPR descriptions has one occurrence of the
>> word stall, and no penalty.  The way I read IBS is it's just
>> reporting more sample data than just the precise IP: things like
>> hits, misses, cycle latencies, addresses, types, etc., so words
>> like 'extended', or the 'auxiliary' already used today even
>> are more appropriate for IBS, although I'm the last person to
>> bikeshed.
> 
> We are thinking of using "pipeline" word instead of Hazard.

Hm, the word 'pipeline' occurs 0 times in IBS documentation.

I realize there are a couple of core pipeline-specific pieces
of information coming out of it, but the vast majority
are addresses, latencies of various components in the memory
hierarchy, and various component hit/miss bits.

What's needed here is a vendor-specific extended
sample information that all these technologies gather,
of which things like e.g., 'L1 TLB cycle latency' we
all should have in common.

I'm not sure why a new PERF_SAMPLE_PIPELINE_HAZ is needed
either.  Can we use PERF_SAMPLE_AUX instead?  Take a look at
commit 98dcf14d7f9c "perf tools: Add kernel AUX area sampling
definitions".  The sample identifier can be used to determine
which vendor's sampling IP's data is in it, and events can
be recorded just by copying the content of the SIER, etc.
registers, and then events get synthesized from the aux
sample at report/inject/annotate etc. time.  This allows
for less sample recording overhead, and moves all the vendor
specific decoding and common event conversions for userspace
to figure out.

>>> Also worth considering is the support of ARM SPE (Statistical
>>> Profiling Extension) which is their version of IBS.
>>> Whatever gets added need to cover all three with no limitations.
>>
>> I thought Intel's various LBR, PEBS, and PT supported providing
>> similar sample data in perf already, like with perf mem/c2c?
> 
> perf-mem is more of data centric in my opinion. It is more towards
> memory profiling. So proposal here is to expose pipeline related
> details like stalls and latencies.

Like I said, I don't see it that way, I see it as "any particular
vendor's event's extended details', and these pipeline details
have overlap with existing infrastructure within perf, e.g., L2
cache misses.

Kim


Re: [PATCH v2] powerpc/mm: Fix missing KUAP disable in flush_coherent_icache()

2020-03-05 Thread Russell Currey
On Wed, 2020-03-04 at 10:57 +1100, Michael Ellerman wrote:
> We received a report of strange kernel faults which turned out to be
> due to a missing KUAP disable in flush_coherent_icache() called
> from flush_icache_range().
> 
> The fault looks like:
> 
>   Kernel attempted to access user page (7fffc30d9c00) - exploit
> attempt? (uid: 1009)
>   BUG: Unable to handle kernel data access on read at 0x7fffc30d9c00
>   Faulting instruction address: 0xc007232c
>   Oops: Kernel access of bad area, sig: 11 [#1]
>   LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV
>   CPU: 35 PID: 5886 Comm: sigtramp Not tainted 5.6.0-rc2-gcc-8.2.0-
> 3-gfc37a1632d40 #79
>   NIP:  c007232c LR: c003b7fc CTR: 
>   REGS: c01e11093940 TRAP: 0300   Not tainted  (5.6.0-rc2-gcc-
> 8.2.0-3-gfc37a1632d40)
>   MSR:  9280b033   CR:
> 28000884  XER: 
>   CFAR: c00722fc DAR: 7fffc30d9c00 DSISR: 0800
> IRQMASK: 0
>   GPR00: c003b7fc c01e11093bd0 c23ac200
> 7fffc30d9c00
>   GPR04: 7fffc30d9c18  c01e11093bd4
> 
>   GPR08:  0001 
> c01e1104ed80
>   GPR12:  c01fff6ab380 c16be2d0
> 4000
>   GPR16: c000 bfff 
> 
>   GPR20: 7fffc30d9c00 7fffc30d8f58 7fffc30d9c18
> 7fffc30d9c20
>   GPR24: 7fffc30d9c18  c01e11093d90
> c01e1104ed80
>   GPR28: c01e11093e90  c23d9d18
> 7fffc30d9c00
>   NIP flush_icache_range+0x5c/0x80
>   LR  handle_rt_signal64+0x95c/0xc2c
>   Call Trace:
> 0xc01e11093d90 (unreliable)
> handle_rt_signal64+0x93c/0xc2c
> do_notify_resume+0x310/0x430
> ret_from_except_lite+0x70/0x74
>   Instruction dump:
>   409e002c 7c0802a6 3c62ff31 3863f6a0 f8010080 48195fed 6000
> 48fe4c8d
>   6000 e8010080 7c0803a6 7c0004ac <7c00ffac> 7c0004ac 4c00012c
> 38210070
> 
> This path through handle_rt_signal64() to setup_trampoline() and
> flush_icache_range() is only triggered by 64-bit processes that have
> unmapped their VDSO, which is rare.
> 
> flush_icache_range() takes a range of addresses to flush. In
> flush_coherent_icache() we implement an optimisation for CPUs where
> we
> know we don't actually have to flush the whole range, we just need to
> do a single icbi.
> 
> However we still execute the icbi on the user address of the start of
> the range we're flushing. On CPUs that also implement KUAP (Power9)
> that leads to the spurious fault above.
> 
> We should be able to pass any address, including a kernel address, to
> the icbi on these CPUs, which would avoid any interaction with KUAP.
> But I don't want to make that change in a bug fix, just in case it
> surfaces some strange behaviour on some CPU.
> 
> So for now just disable KUAP around the icbi. Note the icbi is
> treated
> as a load, so we allow read access, not write as you'd expect.
> 
> Fixes: 890274c2dc4c ("powerpc/64s: Implement KUAP for Radix MMU")
> Cc: sta...@vger.kernel.org # v5.2+
> Signed-off-by: Michael Ellerman 
> ---

Reviewed-by: Russell Currey 



Re: [PATCH -next v2] powerpc/64s/pgtable: fix an undefined behaviour

2020-03-05 Thread Christophe Leroy




Le 05/03/2020 à 15:32, Qian Cai a écrit :

Booting a power9 server with hash MMU could trigger an undefined
behaviour because pud_offset(p4d, 0) will do,

0 >> (PAGE_SHIFT:16 + PTE_INDEX_SIZE:8 + H_PMD_INDEX_SIZE:10)

Fix it by converting pud_offset() and friends to static inline
functions.


I was suggesting to convert pud_index() to static inline, because that's 
where the shift sits. Is it not possible ?


Here you seems to fix the problem for now, but if someone reuses 
pud_index() in another macro one day, the same problem may happen again.


Christophe



  UBSAN: shift-out-of-bounds in arch/powerpc/mm/ptdump/ptdump.c:282:15
  shift exponent 34 is too large for 32-bit type 'int'
  CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4-next-20200303+ #13
  Call Trace:
  dump_stack+0xf4/0x164 (unreliable)
  ubsan_epilogue+0x18/0x78
  __ubsan_handle_shift_out_of_bounds+0x160/0x21c
  walk_pagetables+0x2cc/0x700
  walk_pud at arch/powerpc/mm/ptdump/ptdump.c:282
  (inlined by) walk_pagetables at arch/powerpc/mm/ptdump/ptdump.c:311
  ptdump_check_wx+0x8c/0xf0
  mark_rodata_ro+0x48/0x80
  kernel_init+0x74/0x194
  ret_from_kernel_thread+0x5c/0x74

Suggested-by: Christophe Leroy 
Signed-off-by: Qian Cai 
---
  arch/powerpc/include/asm/book3s/64/pgtable.h | 20 ++--
  1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index fa60e8594b9f..4967bc9e25e2 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1016,12 +1016,20 @@ static inline bool p4d_access_permitted(p4d_t p4d, bool 
write)
  
  #define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
  
-#define pud_offset(p4dp, addr)	\

-   (((pud_t *) p4d_page_vaddr(*(p4dp))) + pud_index(addr))
-#define pmd_offset(pudp,addr) \
-   (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr))
-#define pte_offset_kernel(dir,addr) \
-   (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+   return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
+}
+
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
+{
+   return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
+}
+
+static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
+{
+   return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
+}
  
  #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
  



Re: [PATCH rebased 1/2] powerpc: reserve memory for capture kernel after hugepages init

2020-03-05 Thread Michal Suchánek
Hello,

This seems to cause crash with kdump reservation 1GB quite reliably.

Thanks

Michal

On Tue, Feb 18, 2020 at 05:28:34PM +0100, Michal Suchanek wrote:
> From: Hari Bathini 
> 
> Sometimes, memory reservation for KDump/FADump can overlap with memory
> marked for hugepages. This overlap leads to error, hang in KDump case
> and copy error reported by f/w in case of FADump, while trying to
> capture dump. Report error while setting up memory for the capture
> kernel instead of running into issues while capturing dump, by moving
> KDump/FADump reservation below MMU early init and failing gracefully
> when hugepages memory overlaps with capture kernel memory.
> 
> Signed-off-by: Hari Bathini 
> ---
>  arch/powerpc/kernel/prom.c | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
> index 6620f37abe73..0f14dc9c4dab 100644
> --- a/arch/powerpc/kernel/prom.c
> +++ b/arch/powerpc/kernel/prom.c
> @@ -735,14 +735,6 @@ void __init early_init_devtree(void *params)
>   if (PHYSICAL_START > MEMORY_START)
>   memblock_reserve(MEMORY_START, 0x8000);
>   reserve_kdump_trampoline();
> -#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
> - /*
> -  * If we fail to reserve memory for firmware-assisted dump then
> -  * fallback to kexec based kdump.
> -  */
> - if (fadump_reserve_mem() == 0)
> -#endif
> - reserve_crashkernel();
>   early_reserve_mem();
>  
>   /* Ensure that total memory size is page-aligned. */
> @@ -781,6 +773,14 @@ void __init early_init_devtree(void *params)
>  #endif
>  
>   mmu_early_init_devtree();
> +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
> + /*
> +  * If we fail to reserve memory for firmware-assisted dump then
> +  * fallback to kexec based kdump.
> +  */
> + if (fadump_reserve_mem() == 0)
> +#endif
> + reserve_crashkernel();
>  
>  #ifdef CONFIG_PPC_POWERNV
>   /* Scan and build the list of machine check recoverable ranges */
> -- 
> 2.23.0
> 


Re: [PATCH 2/2] powerpc: Suppress .eh_frame generation

2020-03-05 Thread Segher Boessenkool
On Thu, Mar 05, 2020 at 08:05:30PM +0530, Naveen N. Rao wrote:
> GCC v8 defaults to enabling -fasynchronous-unwind-tables due to
> https://gcc.gnu.org/r259298, which results in .eh_frame section being
> generated. This results in additional disk usage by the build, as well
> as the kernel modules. Since the kernel has no use for this, this
> section is discarded.
> 
> Add -fno-asynchronous-unwind-tables to KBUILD_CFLAGS to suppress
> generation of .eh_frame section. Note that our VDSOs need .eh_frame, but
> are not affected by this change since our VDSO code are all in assembly.

That may change, but it is easy to change again for just the VDSOs.

> Reported-by: Rasmus Villemoes 
> Signed-off-by: Naveen N. Rao 

Reviewed-by: Segher Boessenkool 

Thanks!


Segher


RE: [RFC PATCH v1] powerpc/prom_init: disable XIVE in Secure VM.

2020-03-05 Thread Cédric Le Goater
On 3/5/20 4:15 PM, Ram Pai wrote:
> On Thu, Mar 05, 2020 at 10:55:45AM +1100, David Gibson wrote:
>> On Wed, Mar 04, 2020 at 04:56:09PM +0100, Cédric Le Goater wrote:
>>> [ ... ]
>>>
 (1) applied the patch which shares the EQ-page with the hypervisor.
 (2) set "kernel_irqchip=off"
 (3) set "ic-mode=xive"
>>>
>>> you don't have to set the interrupt mode. xive should be negotiated
>>> by default.
>>>
 (4) set "svm=on" on the kernel command line.
 (5) no changes to the hypervisor and ultravisor.

 And Boom it works!.   So you were right.
>>>
>>> Excellent.
>>>  
 I am sending out the patch for (1) above ASAP.
>>>
>>> Next step, could you please try to do the same with the TIMA and ESB pfn ?
>>> and use KVM.
>>
>> I'm a bit confused by this.  Aren't the TIMA and ESB pages essentially
>> IO pages, rather than memory pages from the guest's point of view?  I
>> assume only memory pages are protected with PEF - I can't even really
>> see what protecting an IO page would even mean.
> 
> It means, that the hypervisor and qemu cannot access the addresses used
> to access the I/O pages. It can only be accessed by Ultravisor and the
> SVM.
> 
> As it stands today, those pages are accessible from the hypervisor
> and not from the SVM or the ultravisor.
> 
> To make it work, we need to enable acccess to those pages from the SVM
> and from the ultravisor.  One thing I am not clear is should we block
> access to those pages from the hypervisor. If yes, than there is no> good way 
> to do that, without hardware help.  If no, than those GPA pages
> can be shared, so that hypervisor/ultravisor/qemu/SVM can all access
> those pages.

They are shared.

KVM will also access them, at interrupt creation, device reset and 
passthrough. QEMU will use them to mask on/off the interrupts in
case of guest migration or machine stop/continue. 

C.



RE: [RFC PATCH v1] powerpc/prom_init: disable XIVE in Secure VM.

2020-03-05 Thread Ram Pai
On Thu, Mar 05, 2020 at 10:55:45AM +1100, David Gibson wrote:
> On Wed, Mar 04, 2020 at 04:56:09PM +0100, Cédric Le Goater wrote:
> > [ ... ]
> > 
> > > (1) applied the patch which shares the EQ-page with the hypervisor.
> > > (2) set "kernel_irqchip=off"
> > > (3) set "ic-mode=xive"
> > 
> > you don't have to set the interrupt mode. xive should be negotiated
> > by default.
> > 
> > > (4) set "svm=on" on the kernel command line.
> > > (5) no changes to the hypervisor and ultravisor.
> > > 
> > > And Boom it works!.   So you were right.
> > 
> > Excellent.
> >  
> > > I am sending out the patch for (1) above ASAP.
> > 
> > Next step, could you please try to do the same with the TIMA and ESB pfn ?
> > and use KVM.
> 
> I'm a bit confused by this.  Aren't the TIMA and ESB pages essentially
> IO pages, rather than memory pages from the guest's point of view?  I
> assume only memory pages are protected with PEF - I can't even really
> see what protecting an IO page would even mean.

It means, that the hypervisor and qemu cannot access the addresses used
to access the I/O pages. It can only be accessed by Ultravisor and the
SVM.

As it stands today, those pages are accessible from the hypervisor
and not from the SVM or the ultravisor.

To make it work, we need to enable acccess to those pages from the SVM
and from the ultravisor.  One thing I am not clear is should we block
access to those pages from the hypervisor.  If yes, than there is no
good way to do that, without hardware help.  If no, than those GPA pages
can be shared, so that hypervisor/ultravisor/qemu/SVM can all access
those pages.

RP



[PATCH 2/2] powerpc: Suppress .eh_frame generation

2020-03-05 Thread Naveen N. Rao
GCC v8 defaults to enabling -fasynchronous-unwind-tables due to
https://gcc.gnu.org/r259298, which results in .eh_frame section being
generated. This results in additional disk usage by the build, as well
as the kernel modules. Since the kernel has no use for this, this
section is discarded.

Add -fno-asynchronous-unwind-tables to KBUILD_CFLAGS to suppress
generation of .eh_frame section. Note that our VDSOs need .eh_frame, but
are not affected by this change since our VDSO code are all in assembly.

Reported-by: Rasmus Villemoes 
Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 89956c4f1ce3..f310c32e88a4 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -239,6 +239,9 @@ KBUILD_CFLAGS += $(call cc-option,-mno-vsx)
 KBUILD_CFLAGS += $(call cc-option,-mno-spe)
 KBUILD_CFLAGS += $(call cc-option,-mspe=no)
 
+# Don't emit .eh_frame since we have no use for it
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+
 # Never use string load/store instructions as they are
 # often slow when they are implemented at all
 KBUILD_CFLAGS  += $(call cc-option,-mno-string)
-- 
2.24.1



[PATCH 1/2] powerpc: Drop -fno-dwarf2-cfi-asm

2020-03-05 Thread Naveen N. Rao
The original commit/discussion adding -fno-dwarf2-cfi-asm refers to
R_PPC64_REL32 relocations not being handled by our module loader:
http://lkml.kernel.org/r/20090224065112.ga6...@bombadil.infradead.org

However, that is now handled thanks to commit 9f751b82b491d
("powerpc/module: Add support for R_PPC64_REL32 relocations").

So, drop this flag from our Makefile.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/Makefile | 5 -
 1 file changed, 5 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index cbe5ca4f0ee5..89956c4f1ce3 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -239,11 +239,6 @@ KBUILD_CFLAGS += $(call cc-option,-mno-vsx)
 KBUILD_CFLAGS += $(call cc-option,-mno-spe)
 KBUILD_CFLAGS += $(call cc-option,-mspe=no)
 
-# FIXME: the module load should be taught about the additional relocs
-# generated by this.
-# revert to pre-gcc-4.4 behaviour of .eh_frame
-KBUILD_CFLAGS  += $(call cc-option,-fno-dwarf2-cfi-asm)
-
 # Never use string load/store instructions as they are
 # often slow when they are implemented at all
 KBUILD_CFLAGS  += $(call cc-option,-mno-string)
-- 
2.24.1



[PATCH -next v2] powerpc/64s/pgtable: fix an undefined behaviour

2020-03-05 Thread Qian Cai
Booting a power9 server with hash MMU could trigger an undefined
behaviour because pud_offset(p4d, 0) will do,

0 >> (PAGE_SHIFT:16 + PTE_INDEX_SIZE:8 + H_PMD_INDEX_SIZE:10)

Fix it by converting pud_offset() and friends to static inline
functions.

 UBSAN: shift-out-of-bounds in arch/powerpc/mm/ptdump/ptdump.c:282:15
 shift exponent 34 is too large for 32-bit type 'int'
 CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4-next-20200303+ #13
 Call Trace:
 dump_stack+0xf4/0x164 (unreliable)
 ubsan_epilogue+0x18/0x78
 __ubsan_handle_shift_out_of_bounds+0x160/0x21c
 walk_pagetables+0x2cc/0x700
 walk_pud at arch/powerpc/mm/ptdump/ptdump.c:282
 (inlined by) walk_pagetables at arch/powerpc/mm/ptdump/ptdump.c:311
 ptdump_check_wx+0x8c/0xf0
 mark_rodata_ro+0x48/0x80
 kernel_init+0x74/0x194
 ret_from_kernel_thread+0x5c/0x74

Suggested-by: Christophe Leroy 
Signed-off-by: Qian Cai 
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index fa60e8594b9f..4967bc9e25e2 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1016,12 +1016,20 @@ static inline bool p4d_access_permitted(p4d_t p4d, bool 
write)
 
 #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
 
-#define pud_offset(p4dp, addr) \
-   (((pud_t *) p4d_page_vaddr(*(p4dp))) + pud_index(addr))
-#define pmd_offset(pudp,addr) \
-   (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr))
-#define pte_offset_kernel(dir,addr) \
-   (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+   return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
+}
+
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
+{
+   return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
+}
+
+static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
+{
+   return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
+}
 
 #define pte_offset_map(dir,addr)   pte_offset_kernel((dir), (addr))
 
-- 
1.8.3.1



eh_frame confusion

2020-03-05 Thread Naveen N. Rao

Naveen N. Rao wrote:

Naveen N. Rao wrote:

Rasmus Villemoes wrote:


Can you check if the below patch works? I am yet to test this in more 
detail, but would be good to know the implications for ppc32.


- Naveen


---
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index f35730548e42..5b5bf98b8217 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -239,10 +239,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-vsx)
 KBUILD_CFLAGS += $(call cc-option,-mno-spe)
 KBUILD_CFLAGS += $(call cc-option,-mspe=no)
 
-# FIXME: the module load should be taught about the additional relocs

-# generated by this.
-# revert to pre-gcc-4.4 behaviour of .eh_frame
-KBUILD_CFLAGS  += $(call cc-option,-fno-dwarf2-cfi-asm)
+KBUILD_CFLAGS  += $(call cc-option,-fno-asynchronous-unwind-tables)


In terms of the CFI information, the primary difference I see with 
-fno-dwarf2-cfi-asm is that when dumping the debug frames, CIE indicates 
version 3, while otherwise (i.e., without -fno-dwarf2-cfi-asm and 
with/without -fasynchronous-unwind-tables), it is version 1, regardless 
of -gdwarf-2/-gdwarf-4. There are few more minor changes, but none of 
these looked significant to me.


 
 # Never use string load/store instructions as they are

 # often slow when they are implemented at all
diff --git a/arch/powerpc/kernel/vdso32/Makefile 
b/arch/powerpc/kernel/vdso32/Makefile
index e147bbdc12cd..d43b0b18137c 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -25,6 +25,7 @@ KCOV_INSTRUMENT := n
 UBSAN_SANITIZE := n
 
 ccflags-y := -shared -fno-common -fno-builtin -nostdlib \

+   -fasynchronous-unwind-tables \
-Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both
 asflags-y := -D__VDSO32__ -s
 
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile

index 32ebb3522ea1..b2cbb5c49bad 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -13,6 +13,7 @@ KCOV_INSTRUMENT := n
 UBSAN_SANITIZE := n
 
 ccflags-y := -shared -fno-common -fno-builtin -nostdlib \

+   -fasynchronous-unwind-tables \
-Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
 asflags-y := -D__VDSO64__ -s


The above vdso hunks can be dropped since all our VDSO are assembly, so 
the above have no impact.



- Naveen



Re: eh_frame confusion

2020-03-05 Thread Naveen N. Rao

Michael Ellerman wrote:

"Naveen N. Rao"  writes:

Rasmus Villemoes wrote:

I'm building a ppc32 kernel, and noticed that after upgrading from gcc-7
to gcc-8 all object files now end up having .eh_frame section. For
vmlinux, that's not a problem, because they all get discarded in
arch/powerpc/kernel/vmlinux.lds.S . However, they stick around in
modules, which doesn't seem to be useful - given that everything worked
just fine with gcc-7, and I don't see anything in the module loader that
handles .eh_frame.

The reason I care is that my target has a rather tight rootfs budget,
and the .eh_frame section seem to occupy 10-30% of the file size
(obviously very depending on the particular module).

Comparing the .foo.o.cmd files, I don't see change in options that might
explain this (there's a bunch of new -Wno-*, and the -mspe=no spelling
is apparently no longer supported in gcc-8). Both before and after, there's

-fno-dwarf2-cfi-asm

about which gcc's documentation says

'-fno-dwarf2-cfi-asm'
 Emit DWARF unwind info as compiler generated '.eh_frame' section
 instead of using GAS '.cfi_*' directives.

Looking into where that comes from got me even more confused, because
both arm and unicore32 say

# Never generate .eh_frame
KBUILD_CFLAGS   += $(call cc-option,-fno-dwarf2-cfi-asm)

while the ppc32 case at hand says

# FIXME: the module load should be taught about the additional relocs
# generated by this.
# revert to pre-gcc-4.4 behaviour of .eh_frame


Michael opened a task to look into this recently and I had spent some 
time last week on this. The original commit/discussion adding 
-fno-dwarf2-cfi-asm refers to R_PPC64_REL32 relocations not being 
handled by our module loader:

http://lkml.kernel.org/r/20090224065112.ga6...@bombadil.infradead.org


I opened that issue purely based on noticing the wart in the Makefile,
not because I'd actually tested it.


However, that is now handled thanks to commit 9f751b82b491d:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9f751b82b491d


Haha, written by me, what an idiot.

So the Makefile hack can presumably be dropped, because the module
loader can handle the relocations.

And then maybe we also want to turn off the unwind tables, but that
would be a separate patch.

I did a test build and a simple module loaded fine, so I think 
-fno-dwarf2-cfi-asm is not required anymore, unless Michael has seen 
some breakages with it. Michael?


No, as I said above it was just reading the Makefile.


Ok, thanks for clarifying. To test, I did 'allmodconfig' builds across 
three environments:

- gcc (Ubuntu 9.2.1-9ubuntu2) 9.2.1 20191008 -- ppc64le
- gcc (SUSE Linux) 7.5.0 -- ppc64le
- gcc (GCC) 8.2.1 20181215 (Red Hat 8.2.1-6) -- ppc64 (BE)

Then, used the below command to list all relocations in the modules:
$ find . -name '*.ko' | xargs -n 1 readelf -Wr  | grep -v "Relocation " | grep -v 
"Offset " | cut -d' ' -f4 | sort | uniq

R_PPC64_ADDR32
R_PPC64_ADDR64
R_PPC64_ENTRY
R_PPC64_REL24
R_PPC64_REL32
R_PPC64_REL64
R_PPC64_TOC
R_PPC64_TOC16_HA
R_PPC64_TOC16_LO
R_PPC64_TOC16_LO_DS

All three environments show up similar set of relocations, all of which 
we handle in the module loader today.


If Rasmus/Christophe can confirm that this is true for ppc32 as well, 
then we should be fine.


- Naveen



[Bug 206695] kmemleak reports leaks in drivers/macintosh/windfarm

2020-03-05 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=206695

--- Comment #3 from m...@ellerman.id.au ---
Can you try this patch?

diff --git a/drivers/macintosh/windfarm_pm112.c
b/drivers/macintosh/windfarm_pm112.c
index 4150301a89a5..a16f43a1def9 100644
--- a/drivers/macintosh/windfarm_pm112.c
+++ b/drivers/macintosh/windfarm_pm112.c
@@ -125,7 +125,7 @@ static int create_cpu_loop(int cpu)
 {
int chip = cpu / 2;
int core = cpu & 1;
-   struct smu_sdbp_header *hdr;
+   struct smu_sdbp_header *hdr, *hdr2;
struct smu_sdbp_cpupiddata *piddata;
struct wf_cpu_pid_param pid;
struct wf_control *main_fan = cpu_fans[0];
@@ -141,9 +141,9 @@ static int create_cpu_loop(int cpu)
piddata = (struct smu_sdbp_cpupiddata *)[1];

/* Get FVT params to get Tmax; if not found, assume default */
-   hdr = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL);
-   if (hdr) {
-   struct smu_sdbp_fvt *fvt = (struct smu_sdbp_fvt *)[1];
+   hdr2 = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL);
+   if (hdr2) {
+   struct smu_sdbp_fvt *fvt = (struct smu_sdbp_fvt *)[1];
tmax = fvt->maxtemp << 16;
} else
tmax = 95 << 16;/* default to 95 degrees C */
@@ -174,6 +174,10 @@ static int create_cpu_loop(int cpu)
pid.min = fmin;

wf_cpu_pid_init(_pid[cpu], );
+
+   kfree(hdr);
+   kfree(hdr2);
+
return 0;
 }

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

Re: [Bug 206695] New: kmemleak reports leaks in drivers/macintosh/windfarm

2020-03-05 Thread Michael Ellerman
Can you try this patch?

diff --git a/drivers/macintosh/windfarm_pm112.c 
b/drivers/macintosh/windfarm_pm112.c
index 4150301a89a5..a16f43a1def9 100644
--- a/drivers/macintosh/windfarm_pm112.c
+++ b/drivers/macintosh/windfarm_pm112.c
@@ -125,7 +125,7 @@ static int create_cpu_loop(int cpu)
 {
int chip = cpu / 2;
int core = cpu & 1;
-   struct smu_sdbp_header *hdr;
+   struct smu_sdbp_header *hdr, *hdr2;
struct smu_sdbp_cpupiddata *piddata;
struct wf_cpu_pid_param pid;
struct wf_control *main_fan = cpu_fans[0];
@@ -141,9 +141,9 @@ static int create_cpu_loop(int cpu)
piddata = (struct smu_sdbp_cpupiddata *)[1];
 
/* Get FVT params to get Tmax; if not found, assume default */
-   hdr = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL);
-   if (hdr) {
-   struct smu_sdbp_fvt *fvt = (struct smu_sdbp_fvt *)[1];
+   hdr2 = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL);
+   if (hdr2) {
+   struct smu_sdbp_fvt *fvt = (struct smu_sdbp_fvt *)[1];
tmax = fvt->maxtemp << 16;
} else
tmax = 95 << 16;/* default to 95 degrees C */
@@ -174,6 +174,10 @@ static int create_cpu_loop(int cpu)
pid.min = fmin;
 
wf_cpu_pid_init(_pid[cpu], );
+
+   kfree(hdr);
+   kfree(hdr2);
+
return 0;
 }
 


Re: [PATCH 2/2] mm/vma: Introduce VM_ACCESS_FLAGS

2020-03-05 Thread Vlastimil Babka
On 3/5/20 7:50 AM, Anshuman Khandual wrote:
> There are many places where all basic VMA access flags (read, write, exec)
> are initialized or checked against as a group. One such example is during
> page fault. Existing vma_is_accessible() wrapper already creates the notion
> of VMA accessibility as a group access permissions. Hence lets just create
> VM_ACCESS_FLAGS (VM_READ|VM_WRITE|VM_EXEC) which will not only reduce code
> duplication but also extend the VMA accessibility concept in general.
> 
> Cc: Russell King 
> CC: Catalin Marinas 
> CC: Mark Salter 
> Cc: Nick Hu 
> CC: Ley Foon Tan 
> Cc: Michael Ellerman 
> Cc: Heiko Carstens 
> Cc: Yoshinori Sato 
> Cc: Guan Xuetao 
> Cc: Dave Hansen 
> Cc: Thomas Gleixner 
> Cc: Rob Springer 
> Cc: Greg Kroah-Hartman 
> Cc: Andrew Morton 
> Cc: linux-arm-ker...@lists.infradead.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-s...@vger.kernel.org
> Cc: de...@driverdev.osuosl.org
> Cc: linux...@kvack.org
> Cc: linux-ker...@vger.kernel.org
> Signed-off-by: Anshuman Khandual 

Reviewed-by: Vlastimil Babka 

Thanks.


Re: [RFC PATCH v1] powerpc/prom_init: disable XIVE in Secure VM.

2020-03-05 Thread Cédric Le Goater
[ ... ] 

>> yes because you also need to share the XIVE TIMA and ESB pages mapped 
>> in xive_native_esb_fault() and xive_native_tima_fault(). 
> 
> These pages belong to the xive memory slot right? If that is the case,
> they are implicitly shared. The Ultravisor will set them up to be
> shared. The guest kernel should not be doing anything.
> 
> We still need some fixes in KVM and Ultravisor to correctly map the
> hardware pages to GPA ranges of the xive memory slot. Work is in progress...

ok. Since this is already done for KVM, I suppose it's not too hard. 
the VMA has VM_IO | VM_PFNMAP flags.

Otherwise you could still pick up the XIVE ESB and TIMA HW MMIO ranges 
in OPAL and brutally declare the whole as shared, if that's possible.

C.


Re: [PATCH] powerpc/64: BE option to use ELFv2 ABI for big endian kernels

2020-03-05 Thread Segher Boessenkool
On Thu, Mar 05, 2020 at 01:34:22PM +1000, Nicholas Piggin wrote:
> Segher Boessenkool's on March 4, 2020 9:09 am:
> >> +override flavour := linux-ppc64v2
> > 
> > That isn't a good name, heh.  This isn't "v2" of anything...  Spell out
> > the name "ELFv2"?  Or as "elfv2"?  It is just a name after all, it is
> > version 1 in all three version fields in the ELF headers!
> 
> Yeah okay. This part is only for some weird little perl asm generator
> script, but probably better to be careful. linux-ppc64-elfv2 ?

That generator is from openssl, or inspired by it, it is everywhere.
So it is more important to get it right than it would seem at first
glance ;-)

That name looks perfect to me.  You'll have to update REs expecting the
arch at the end (like /le$/), but you had to already I think?


Segher


Re: [PATCH v3 17/27] powerpc/powernv/pmem: Implement the Read Error Log command

2020-03-05 Thread Frederic Barrat




+   if (rc)
+   goto out;
+
+   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ocxlpmem-

admin_command.data_offset + 0x28,

+OCXL_HOST_ENDIAN, >wwid[1]);
+   if (rc)
+   goto out;
+
+   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ocxlpmem-

admin_command.data_offset + 0x30,

+OCXL_HOST_ENDIAN, (u64 *)log-

fw_revision);

+   if (rc)
+   goto out;
+   log->fw_revision[8] = '\0';
+
+   buf_length = (user_buf_length < log->buf_size) ?
+user_buf_length : log->buf_size;
+   for (i = 0; i < buf_length + 0x48; i += 8) {
+   u64 val;
+
+   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+ocxlpmem-

admin_command.data_offset + i,

+OCXL_HOST_ENDIAN, );
+   if (rc)
+   goto out;
+
+   if (buf_is_user) {
+   if (copy_to_user(>buf[i], ,
sizeof(u64))) {
+   rc = -EFAULT;
+   goto out;
+   }
+   } else
+   log->buf[i] = val;
+   }



I think it could be a bit simplified by keeping the handling of the
user
buffer out of this function. Always call it with a kernel buffer.
And
have only one copy_to_user() call on the ioctl() path. You'd need to
allocate a kernel buf on the ioctl path, but you're already doing it
on
the probe() path, so it should be doable to share code.


Hmm, the problem then is that on the IOCTL side, I'll have to save,
modify, then restore the buf member of struct
ioctl_ocxl_pmem_error_log, which would be uglier.



buf is just an output buffer. All you'd need to do is allocate a kernel 
buf, like it's already done for the "probe" case in dump_error_log(). 
And add a global copy_to_user() of the buf at the end of the ioctl path, 
instead of having multiple smaller copy_to_user() in the loop here.
copy_to_user() is a bit expensive so it's usually better to regroup 
them. I think it's easy here and make sense since that function is also 
trying to handle both a kernel and user space bufffers.
But we're not in a critical path, and after this patch, there are others 
copying out mmio content to user buffers and those don't have a kernel 
buffer to handle, so the copy_to_user() in a loop makes things easier.

So I guess the conclusion is whatever you think is the easiest...







+
+   rc = admin_response_handled(ocxlpmem);
+   if (rc)
+   goto out;
+
+out:
+   mutex_unlock(>admin_command.lock);
+   return rc;
+
+}
+
+static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
+   struct ioctl_ocxl_pmem_error_log __user *uarg)
+{
+   struct ioctl_ocxl_pmem_error_log args;
+   int rc;
+
+   if (copy_from_user(, uarg, sizeof(args)))
+   return -EFAULT;
+
+   rc = read_error_log(ocxlpmem, , true);
+   if (rc)
+   return rc;
+
+   if (copy_to_user(uarg, , sizeof(args)))
+   return -EFAULT;
+
+   return 0;
+}
+
+static long file_ioctl(struct file *file, unsigned int cmd,
unsigned long args)
+{
+   struct ocxlpmem *ocxlpmem = file->private_data;
+   int rc = -EINVAL;
+
+   switch (cmd) {
+   case IOCTL_OCXL_PMEM_ERROR_LOG:
+   rc = ioctl_error_log(ocxlpmem,
+(struct ioctl_ocxl_pmem_error_log
__user *)args);
+   break;
+   }
+   return rc;
+}
+
   static const struct file_operations fops = {
.owner  = THIS_MODULE,
.open   = file_open,
.release= file_release,
+   .unlocked_ioctl = file_ioctl,
+   .compat_ioctl   = file_ioctl,
   };
   
   /**

@@ -527,6 +736,60 @@ static int read_device_metadata(struct
ocxlpmem *ocxlpmem)
return 0;
   }
   
+static const char *decode_error_log_type(u8 error_log_type)

+{
+   switch (error_log_type) {
+   case 0x00:
+   return "general";
+   case 0x01:
+   return "predictive failure";
+   case 0x02:
+   return "thermal warning";
+   case 0x03:
+   return "data loss";
+   case 0x04:
+   return "health & performance";
+   default:
+   return "unknown";
+   }
+}
+
+static void dump_error_log(struct ocxlpmem *ocxlpmem)
+{
+   struct ioctl_ocxl_pmem_error_log log;
+   u32 buf_size;
+   u8 *buf;
+   int rc;
+
+   if (ocxlpmem->admin_command.data_size == 0)
+   return;
+
+   buf_size = ocxlpmem->admin_command.data_size - 0x48;
+   buf = kzalloc(buf_size, GFP_KERNEL);
+   if (!buf)
+   return;
+
+   log.buf = buf;
+   log.buf_size = buf_size;
+
+   rc = 

Re: [PATCH] selftests/powerpc: Turn off timeout setting for benchmarks, dscr, signal, tm

2020-03-05 Thread Po-Hsu Lin
On Thu, Mar 5, 2020 at 3:32 PM Michael Ellerman  wrote:
>
> Po-Hsu Lin  writes:
> > Some specific tests in powerpc can take longer than the default 45
> > seconds that added in commit 852c8cbf (selftests/kselftest/runner.sh:
> > Add 45 second timeout per test) to run, the following test result was
> > collected across 2 Power8 nodes and 1 Power9 node in our pool:
> >   powerpc/benchmarks/futex_bench - 52s
> >   powerpc/dscr/dscr_sysfs_test - 116s
> >   powerpc/signal/signal_fuzzer - 88s
> >   powerpc/tm/tm_unavailable_test - 168s
> >   powerpc/tm/tm-poison - 240s
> >
> > Thus they will fail with TIMEOUT error. Disable the timeout setting
> > for these sub-tests to allow them finish properly.
>
> I run the powerpc tests with run-parts, rather than the kselftest
> script, we already have our own test runner with a 120s timeout.
>
> I didn't think the kselftests runner actually worked with the powerpc
> tests? Because we override RUN_TESTS.
>
Hello Michael,

I have done a small experiment with timeout=1 in settings and use
run-parts to run the executables, it looks like this change won't
affect run-parts.

Not quite sure about the RUN_TESTS you mentioned here, we're testing
it with command like:
sudo make -C linux/tools/testing/selftests TARGETS=powerpc run_tests
And the timeout setting will take effect with this.

Thanks

> cheers
>
>
> > https://bugs.launchpad.net/bugs/1864642
> > Signed-off-by: Po-Hsu Lin 
> > ---
> >  tools/testing/selftests/powerpc/benchmarks/Makefile | 2 ++
> >  tools/testing/selftests/powerpc/benchmarks/settings | 1 +
> >  tools/testing/selftests/powerpc/dscr/Makefile   | 2 ++
> >  tools/testing/selftests/powerpc/dscr/settings   | 1 +
> >  tools/testing/selftests/powerpc/signal/Makefile | 2 ++
> >  tools/testing/selftests/powerpc/signal/settings | 1 +
> >  tools/testing/selftests/powerpc/tm/Makefile | 2 ++
> >  tools/testing/selftests/powerpc/tm/settings | 1 +
> >  8 files changed, 12 insertions(+)
> >  create mode 100644 tools/testing/selftests/powerpc/benchmarks/settings
> >  create mode 100644 tools/testing/selftests/powerpc/dscr/settings
> >  create mode 100644 tools/testing/selftests/powerpc/signal/settings
> >  create mode 100644 tools/testing/selftests/powerpc/tm/settings
> >
> > diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile 
> > b/tools/testing/selftests/powerpc/benchmarks/Makefile
> > index d40300a..a32a6ab 100644
> > --- a/tools/testing/selftests/powerpc/benchmarks/Makefile
> > +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
> > @@ -2,6 +2,8 @@
> >  TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench 
> > null_syscall
> >  TEST_GEN_FILES := exec_target
> >
> > +TEST_FILES := settings
> > +
> >  CFLAGS += -O2
> >
> >  top_srcdir = ../../../../..
> > diff --git a/tools/testing/selftests/powerpc/benchmarks/settings 
> > b/tools/testing/selftests/powerpc/benchmarks/settings
> > new file mode 100644
> > index 000..e7b9417
> > --- /dev/null
> > +++ b/tools/testing/selftests/powerpc/benchmarks/settings
> > @@ -0,0 +1 @@
> > +timeout=0
> > diff --git a/tools/testing/selftests/powerpc/dscr/Makefile 
> > b/tools/testing/selftests/powerpc/dscr/Makefile
> > index 5df4763..cfa6eed 100644
> > --- a/tools/testing/selftests/powerpc/dscr/Makefile
> > +++ b/tools/testing/selftests/powerpc/dscr/Makefile
> > @@ -3,6 +3,8 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test 
> > dscr_user_test \
> > dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test  \
> > dscr_sysfs_thread_test
> >
> > +TEST_FILES := settings
> > +
> >  top_srcdir = ../../../../..
> >  include ../../lib.mk
> >
> > diff --git a/tools/testing/selftests/powerpc/dscr/settings 
> > b/tools/testing/selftests/powerpc/dscr/settings
> > new file mode 100644
> > index 000..e7b9417
> > --- /dev/null
> > +++ b/tools/testing/selftests/powerpc/dscr/settings
> > @@ -0,0 +1 @@
> > +timeout=0
> > diff --git a/tools/testing/selftests/powerpc/signal/Makefile 
> > b/tools/testing/selftests/powerpc/signal/Makefile
> > index 113838f..153fafc 100644
> > --- a/tools/testing/selftests/powerpc/signal/Makefile
> > +++ b/tools/testing/selftests/powerpc/signal/Makefile
> > @@ -5,6 +5,8 @@ CFLAGS += -maltivec
> >  $(OUTPUT)/signal_tm: CFLAGS += -mhtm
> >  $(OUTPUT)/sigfuz: CFLAGS += -pthread -m64
> >
> > +TEST_FILES := settings
> > +
> >  top_srcdir = ../../../../..
> >  include ../../lib.mk
> >
> > diff --git a/tools/testing/selftests/powerpc/signal/settings 
> > b/tools/testing/selftests/powerpc/signal/settings
> > new file mode 100644
> > index 000..e7b9417
> > --- /dev/null
> > +++ b/tools/testing/selftests/powerpc/signal/settings
> > @@ -0,0 +1 @@
> > +timeout=0
> > diff --git a/tools/testing/selftests/powerpc/tm/Makefile 
> > b/tools/testing/selftests/powerpc/tm/Makefile
> > index b15a1a3..7b99d09 100644
> > --- a/tools/testing/selftests/powerpc/tm/Makefile
> > +++ b/tools/testing/selftests/powerpc/tm/Makefile
> > @@ -7,6