[PATCH v2 6/6] powerpc/eeh: Rework eeh_ops->probe()
With the EEH early probe now being pseries specific there's no need for eeh_ops->probe() to take a pci_dn. Instead, we can make it take a pci_dev and use the probe function to map a pci_dev to an eeh_dev. This allows the platform to implement it's own method for finding (or creating) an eeh_dev for a given pci_dev which also removes a use of pci_dn in generic EEH code. This patch also renames eeh_device_add_late() to eeh_device_probe(). This better reflects what it does does and removes the last vestiges of the early/late EEH probe split. Reviewed-by: Sam Bobroff Signed-off-by: Oliver O'Halloran --- v2: Fixed the comment block above eeh_probe_device() to use the new function name. --- arch/powerpc/include/asm/eeh.h | 6 ++-- arch/powerpc/kernel/eeh.c| 44 +++- arch/powerpc/platforms/powernv/eeh-powernv.c | 30 +-- arch/powerpc/platforms/pseries/eeh_pseries.c | 23 ++- 4 files changed, 62 insertions(+), 41 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 8580238..964a542 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -215,7 +215,7 @@ enum { struct eeh_ops { char *name; int (*init)(void); - void* (*probe)(struct pci_dn *pdn, void *data); + struct eeh_dev *(*probe)(struct pci_dev *pdev); int (*set_option)(struct eeh_pe *pe, int option); int (*get_pe_addr)(struct eeh_pe *pe); int (*get_state)(struct eeh_pe *pe, int *delay); @@ -301,7 +301,7 @@ int __exit eeh_ops_unregister(const char *name); int eeh_check_failure(const volatile void __iomem *token); int eeh_dev_check_failure(struct eeh_dev *edev); void eeh_addr_cache_init(void); -void eeh_add_device_late(struct pci_dev *); +void eeh_probe_device(struct pci_dev *pdev); void eeh_remove_device(struct pci_dev *); int eeh_unfreeze_pe(struct eeh_pe *pe); int eeh_pe_reset_and_recover(struct eeh_pe *pe); @@ -356,7 +356,7 @@ static inline int eeh_check_failure(const volatile void __iomem *token) static inline void eeh_addr_cache_init(void) { } -static inline void eeh_add_device_late(struct pci_dev *dev) { } +static inline void eeh_probe_device(struct pci_dev *dev) { } static inline void eeh_remove_device(struct pci_dev *dev) { } diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 55d3ef6..7cdcb41 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1107,35 +1107,43 @@ static int eeh_init(void) core_initcall_sync(eeh_init); /** - * eeh_add_device_late - Perform EEH initialization for the indicated pci device + * eeh_probe_device() - Perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH * * This routine must be used to complete EEH initialization for PCI * devices that were added after system boot (e.g. hotplug, dlpar). */ -void eeh_add_device_late(struct pci_dev *dev) +void eeh_probe_device(struct pci_dev *dev) { - struct pci_dn *pdn; struct eeh_dev *edev; - if (!dev) + pr_debug("EEH: Adding device %s\n", pci_name(dev)); + + /* +* pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was +* already called for this device. +*/ + if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) { + pci_dbg(dev, "Already bound to an eeh_dev!\n"); return; + } - pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); - edev = pdn_to_eeh_dev(pdn); - eeh_edev_dbg(edev, "Adding device\n"); - if (edev->pdev == dev) { - eeh_edev_dbg(edev, "Device already referenced!\n"); + edev = eeh_ops->probe(dev); + if (!edev) { + pr_debug("EEH: Adding device failed\n"); return; } /* -* The EEH cache might not be removed correctly because of -* unbalanced kref to the device during unplug time, which -* relies on pcibios_release_device(). So we have to remove -* that here explicitly. +* FIXME: We rely on pcibios_release_device() to remove the +* existing EEH state. The release function is only called if +* the pci_dev's refcount drops to zero so if something is +* keeping a ref to a device (e.g. a filesystem) we need to +* remove the old EEH state. +* +* FIXME: HEY MA, LOOK AT ME, NO LOCKING! */ - if (edev->pdev) { + if (edev->pdev && edev->pdev != dev) { eeh_rmv_from_parent_pe(edev); eeh_addr_cache_rmv_dev(edev->pdev); eeh_sysfs_remove_device(edev->pdev); @@ -1146,17 +1154,11 @@ void eeh_add_device_late(struct pci_dev *dev) * into error handler afterwards. */ edev->mode |= EEH_DEV_NO_HANDLER; - - edev->pdev = NULL; - dev->dev.archdata.edev =
[PATCH v2 5/6] powerpc/eeh: Make early EEH init pseries specific
The eeh_ops->probe() function is called from two different contexts: 1. On pseries, where we set EEH_PROBE_MODE_DEVTREE, it's called in eeh_add_device_early() which is supposed to run before we create a pci_dev. 2. On PowerNV, where we set EEH_PROBE_MODE_DEV, it's called in eeh_device_add_late() which is supposed to run *after* the pci_dev is created. The "early" probe is required because PAPR requires that we perform an RTAS call to enable EEH support on a device before we start interacting with it via config space or MMIO. This requirement doesn't exist on PowerNV and shoehorning two completely separate initialisation paths into a common interface just results in a convoluted code everywhere. Additionally the early probe requires the probe function to take an pci_dn rather than a pci_dev argument. We'd like to make pci_dn a pseries specific data structure since there's no real requirement for them on PowerNV. To help both goals move the early probe into the pseries containment zone so the platform depedence is more explicit. Reviewed-by: Sam Bobroff Signed-off-by: Oliver O'Halloran --- v2: s/set set/we set/ in the commit message --- arch/powerpc/include/asm/eeh.h | 14 +++--- arch/powerpc/kernel/eeh.c| 46 arch/powerpc/kernel/of_platform.c| 6 +-- arch/powerpc/platforms/powernv/eeh-powernv.c | 6 --- arch/powerpc/platforms/pseries/eeh_pseries.c | 65 ++-- arch/powerpc/platforms/pseries/pci_dlpar.c | 2 +- drivers/pci/hotplug/rpadlpar_core.c | 2 +- drivers/pci/hotplug/rpaphp_core.c| 2 +- drivers/pci/hotplug/rpaphp_pci.c | 2 +- 9 files changed, 64 insertions(+), 81 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 5d10781..8580238 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -301,8 +301,6 @@ int __exit eeh_ops_unregister(const char *name); int eeh_check_failure(const volatile void __iomem *token); int eeh_dev_check_failure(struct eeh_dev *edev); void eeh_addr_cache_init(void); -void eeh_add_device_early(struct pci_dn *); -void eeh_add_device_tree_early(struct pci_dn *); void eeh_add_device_late(struct pci_dev *); void eeh_remove_device(struct pci_dev *); int eeh_unfreeze_pe(struct eeh_pe *pe); @@ -358,10 +356,6 @@ static inline int eeh_check_failure(const volatile void __iomem *token) static inline void eeh_addr_cache_init(void) { } -static inline void eeh_add_device_early(struct pci_dn *pdn) { } - -static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { } - static inline void eeh_add_device_late(struct pci_dev *dev) { } static inline void eeh_remove_device(struct pci_dev *dev) { } @@ -370,6 +364,14 @@ static inline void eeh_remove_device(struct pci_dev *dev) { } #define EEH_IO_ERROR_VALUE(size) (-1UL) #endif /* CONFIG_EEH */ +#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_EEH) +void pseries_eeh_init_edev(struct pci_dn *pdn); +void pseries_eeh_init_edev_recursive(struct pci_dn *pdn); +#else +static inline void pseries_eeh_add_device_early(struct pci_dn *pdn) { } +static inline void pseries_eeh_add_device_tree_early(struct pci_dn *pdn) { } +#endif + #ifdef CONFIG_PPC64 /* * MMIO read/write operations with EEH support. diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index a9e4ca7..55d3ef6 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1107,52 +1107,6 @@ static int eeh_init(void) core_initcall_sync(eeh_init); /** - * eeh_add_device_early - Enable EEH for the indicated device node - * @pdn: PCI device node for which to set up EEH - * - * This routine must be used to perform EEH initialization for PCI - * devices that were added after system boot (e.g. hotplug, dlpar). - * This routine must be called before any i/o is performed to the - * adapter (inluding any config-space i/o). - * Whether this actually enables EEH or not for this device depends - * on the CEC architecture, type of the device, on earlier boot - * command-line arguments & etc. - */ -void eeh_add_device_early(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - - if (!edev) - return; - - if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) - return; - - eeh_ops->probe(pdn, NULL); -} - -/** - * eeh_add_device_tree_early - Enable EEH for the indicated device - * @pdn: PCI device node - * - * This routine must be used to perform EEH initialization for the - * indicated PCI device that was added after system boot (e.g. - * hotplug, dlpar). - */ -void eeh_add_device_tree_early(struct pci_dn *pdn) -{ - struct pci_dn *n; - - if (!pdn) - return; - - list_for_each_entry(n, >child_list, list) - eeh_add_device_tree_early(n); - eeh_add_device_early(pdn); -} -EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); - -/** *
[PATCH v2 4/6] powerpc/eeh: Remove PHB check in probe
This check for a missing PHB has existing in various forms since the initial PPC64 port was upstreamed in 2002. The idea seems to be that we need to guard against creating pci-specific data structures for the non-pci children of a PCI device tree node (e.g. USB devices). However, we only create pci_dn structures for DT nodes that correspond to PCI devices so there's not much point in doing this check in the eeh_probe path. Reviewed-by: Sam Bobroff Signed-off-by: Oliver O'Halloran --- arch/powerpc/kernel/eeh.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9cb3370..a9e4ca7 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1120,7 +1120,6 @@ core_initcall_sync(eeh_init); */ void eeh_add_device_early(struct pci_dn *pdn) { - struct pci_controller *phb = pdn ? pdn->phb : NULL; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); if (!edev) @@ -1129,11 +1128,6 @@ void eeh_add_device_early(struct pci_dn *pdn) if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) return; - /* USB Bus children of PCI devices will not have BUID's */ - if (NULL == phb || - (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) - return; - eeh_ops->probe(pdn, NULL); } -- 2.9.5
[PATCH v2 3/6] powerpc/eeh: Do early EEH init only when required
The pci hotplug helper (pci_hp_add_devices()) calls eeh_add_device_tree_early() to scan the device-tree for new PCI devices and do the early EEH probe before the device is scanned. This early probe is a no-op in a lot of cases because: a) The early init is only required to satisfy a PAPR requirement that EEH be configured before we start doing config accesses. On PowerNV it is a no-op. b) It's a no-op for devices that have already had their eeh_dev initialised. There are four callers of pci_hp_add_devices(): 1. arch/powerpc/kernel/eeh_driver.c Here the hotplug helper is called when re-scanning pci_devs that were removed during an EEH recovery pass. The EEH stat for each removed device (the eeh_dev) is retained across a recovery pass so the early init is a no-op in this case. 2. drivers/pci/hotplug/pnv_php.c This is also a no-op since the PowerNV hotplug driver is, suprisingly, PowerNV specific. 3. drivers/pci/hotplug/rpaphp_core.c 4. drivers/pci/hotplug/rpaphp_pci.c In these two cases new devices have been hotplugged and FW has provided new DT nodes for each. These are the only two cases where the EEH we might have new PCI device nodes in the DT so these are the only two cases where the early EEH probe needs to be done. We can move the calls to eeh_add_device_tree_early() to the locations where it's needed and remove it from the generic path. This is preparation for making the early EEH probe pseries specific. Reviewed-by: Sam Bobroff Signed-off-by: Oliver O'Halloran --- arch/powerpc/kernel/pci-hotplug.c | 2 -- drivers/pci/hotplug/rpaphp_core.c | 2 ++ drivers/pci/hotplug/rpaphp_pci.c | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index d6a67f8..bf83f76 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -112,8 +112,6 @@ void pci_hp_add_devices(struct pci_bus *bus) struct pci_controller *phb; struct device_node *dn = pci_bus_to_OF_node(bus); - eeh_add_device_tree_early(PCI_DN(dn)); - phb = pci_bus_to_host(bus); mode = PCI_PROBE_NORMAL; diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index e408e40..9c1e43e 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -494,6 +494,8 @@ static int enable_slot(struct hotplug_slot *hotplug_slot) return retval; if (state == PRESENT) { + eeh_add_device_tree_early(PCI_DN(slot->dn)); + pci_lock_rescan_remove(); pci_hp_add_devices(slot->bus); pci_unlock_rescan_remove(); diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c index beca61b..61ebbd8 100644 --- a/drivers/pci/hotplug/rpaphp_pci.c +++ b/drivers/pci/hotplug/rpaphp_pci.c @@ -95,8 +95,10 @@ int rpaphp_enable_slot(struct slot *slot) return -EINVAL; } - if (list_empty(>devices)) + if (list_empty(>devices)) { + eeh_add_device_tree_early(PCI_DN(slot->dn)); pci_hp_add_devices(bus); + } if (!list_empty(>devices)) { slot->state = CONFIGURED; -- 2.9.5
[PATCH v2 2/6] powerpc/eeh: Remove eeh_add_device_tree_late()
On pseries and PowerNV pcibios_bus_add_device() calls eeh_add_device_late() so there's no need to do a separate tree traversal to bind the eeh_dev and pci_dev together setting up the PHB at boot. As a result we can remove eeh_add_device_tree_late(). Reviewed-by: Sam Bobroff Signed-off-by: Oliver O'Halloran --- arch/powerpc/include/asm/eeh.h| 3 --- arch/powerpc/kernel/eeh.c | 25 - arch/powerpc/kernel/of_platform.c | 3 --- arch/powerpc/kernel/pci-common.c | 3 --- 4 files changed, 34 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 5a34907..5d10781 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -304,7 +304,6 @@ void eeh_addr_cache_init(void); void eeh_add_device_early(struct pci_dn *); void eeh_add_device_tree_early(struct pci_dn *); void eeh_add_device_late(struct pci_dev *); -void eeh_add_device_tree_late(struct pci_bus *); void eeh_remove_device(struct pci_dev *); int eeh_unfreeze_pe(struct eeh_pe *pe); int eeh_pe_reset_and_recover(struct eeh_pe *pe); @@ -365,8 +364,6 @@ static inline void eeh_add_device_tree_early(struct pci_dn *pdn) { } static inline void eeh_add_device_late(struct pci_dev *dev) { } -static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } - static inline void eeh_remove_device(struct pci_dev *dev) { } #define EEH_POSSIBLE_ERROR(val, type) (0) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 0878912..9cb3370 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1214,31 +1214,6 @@ void eeh_add_device_late(struct pci_dev *dev) } /** - * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus - * @bus: PCI bus - * - * This routine must be used to perform EEH initialization for PCI - * devices which are attached to the indicated PCI bus. The PCI bus - * is added after system boot through hotplug or dlpar. - */ -void eeh_add_device_tree_late(struct pci_bus *bus) -{ - struct pci_dev *dev; - - if (eeh_has_flag(EEH_FORCE_DISABLED)) - return; - list_for_each_entry(dev, >devices, bus_list) { - eeh_add_device_late(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_device_tree_late(subbus); - } - } -} -EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); - -/** * eeh_remove_device - Undo EEH setup for the indicated pci device * @dev: pci device to be removed * diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index cb68800..64edac81 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -80,9 +80,6 @@ static int of_pci_phb_probe(struct platform_device *dev) */ pcibios_claim_one_bus(phb->bus); - /* Finish EEH setup */ - eeh_add_device_tree_late(phb->bus); - /* Add probed PCI devices to the device model */ pci_bus_add_devices(phb->bus); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 3d2b1cf..8983afa 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1399,9 +1399,6 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) pci_assign_unassigned_bus_resources(bus); } - /* Fixup EEH */ - eeh_add_device_tree_late(bus); - /* Add new devices to global lists. Register in proc, sysfs. */ pci_bus_add_devices(bus); } -- 2.9.5
[PATCH v2 1/6] powerpc/eeh: Add sysfs files in late probe
Move creating the EEH specific sysfs files into eeh_add_device_late() rather than being open-coded all over the place. Calling the function is generally done immediately after calling eeh_add_device_late() anyway. This is also a correctness fix since currently the sysfs files will be added even if the EEH probe happens to fail. Similarly, on pseries we currently add the sysfs files before calling eeh_add_device_late(). This is flat-out broken since the sysfs files require the pci_dev->dev.archdata.edev pointer to be set, and that is done in eeh_add_device_late(). Reviewed-by: Sam Bobroff Signed-off-by: Oliver O'Halloran --- v2: Reworded commit message based on Sam Bobroff's comments. About the current behaviour being broken. --- arch/powerpc/include/asm/eeh.h | 3 --- arch/powerpc/kernel/eeh.c| 24 +--- arch/powerpc/kernel/of_platform.c| 3 --- arch/powerpc/kernel/pci-common.c | 3 --- arch/powerpc/platforms/powernv/eeh-powernv.c | 1 - arch/powerpc/platforms/pseries/eeh_pseries.c | 3 +-- 6 files changed, 2 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 6f9b2a1..5a34907 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -305,7 +305,6 @@ void eeh_add_device_early(struct pci_dn *); void eeh_add_device_tree_early(struct pci_dn *); void eeh_add_device_late(struct pci_dev *); void eeh_add_device_tree_late(struct pci_bus *); -void eeh_add_sysfs_files(struct pci_bus *); void eeh_remove_device(struct pci_dev *); int eeh_unfreeze_pe(struct eeh_pe *pe); int eeh_pe_reset_and_recover(struct eeh_pe *pe); @@ -368,8 +367,6 @@ static inline void eeh_add_device_late(struct pci_dev *dev) { } static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } -static inline void eeh_add_sysfs_files(struct pci_bus *bus) { } - static inline void eeh_remove_device(struct pci_dev *dev) { } #define EEH_POSSIBLE_ERROR(val, type) (0) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 17cb3e9..0878912 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1210,6 +1210,7 @@ void eeh_add_device_late(struct pci_dev *dev) dev->dev.archdata.edev = edev; eeh_addr_cache_insert_dev(dev); + eeh_sysfs_add_device(dev); } /** @@ -1238,29 +1239,6 @@ void eeh_add_device_tree_late(struct pci_bus *bus) EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); /** - * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus - * @bus: PCI bus - * - * This routine must be used to add EEH sysfs files for PCI - * devices which are attached to the indicated PCI bus. The PCI bus - * is added after system boot through hotplug or dlpar. - */ -void eeh_add_sysfs_files(struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, >devices, bus_list) { - eeh_sysfs_add_device(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_sysfs_files(subbus); - } - } -} -EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); - -/** * eeh_remove_device - Undo EEH setup for the indicated pci device * @dev: pci device to be removed * diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 427fc22..cb68800 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -86,9 +86,6 @@ static int of_pci_phb_probe(struct platform_device *dev) /* Add probed PCI devices to the device model */ pci_bus_add_devices(phb->bus); - /* sysfs files should only be added after devices are added */ - eeh_add_sysfs_files(phb->bus); - return 0; } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index c6c0341..3d2b1cf 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1404,9 +1404,6 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) /* Add new devices to global lists. Register in proc, sysfs. */ pci_bus_add_devices(bus); - - /* sysfs files should only be added after devices are added */ - eeh_add_sysfs_files(bus); } EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 6f300ab..ef727ec 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -48,7 +48,6 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev) dev_dbg(>dev, "EEH: Setting up device\n"); eeh_add_device_early(pdn); eeh_add_device_late(pdev); - eeh_sysfs_add_device(pdev); } static int pnv_eeh_init(void) diff --git
[PATCH v3] powerpc: setup_64: set up PACA earlier to avoid kcov problems
kcov instrumentation is collected the __sanitizer_cov_trace_pc hook in kernel/kcov.c. The compiler inserts these hooks into every basic block unless kcov is disabled for that file. We then have a deep call-chain: - __sanitizer_cov_trace_pc calls to check_kcov_mode() - check_kcov_mode() (kernel/kcov.c) calls in_task() - in_task() (include/linux/preempt.h) calls preempt_count(). - preempt_count() (include/asm-generic/preempt.h) calls current_thread_info() - because powerpc has THREAD_INFO_IN_TASK, current_thread_info() (include/linux/thread_info.h) is defined to 'current' - current (arch/powerpc/include/asm/current.h) is defined to get_current(). - get_current (same file) loads an offset of r13. - arch/powerpc/include/asm/paca.h makes r13 a register variable called local_paca - it is the PACA for the current CPU, so this has the effect of loading the current task from PACA. - get_current returns the current task from PACA, - current_thread_info returns the task cast to a thread_info - preempt_count dereferences the thread_info to load preempt_count - that value is used by in_task and so on up the chain The problem is: - kcov instrumentation is enabled for arch/powerpc/kernel/dt_cpu_ftrs.c - even if it were not, dt_cpu_ftrs_init calls generic dt parsing code which should definitely have instrumentation enabled. - setup_64.c calls dt_cpu_ftrs_init before it sets up a PACA. - If we don't set up a paca, r13 will contain unpredictable data. - In a zImage compiled with kcov and KASAN, we see r13 containing a value that leads to dereferencing invalid memory (something like 912a72603d420015). - Weirdly, the same kernel as a vmlinux loaded directly by qemu does not crash. Investigating with gdb, it seems that in the vmlinux boot case, r13 is near enough to zero that we just happen to be able to read that part of memory (we're operating with translation off at this point) and the current pointer also happens to land in readable memory and everything just works. - PACA setup refers to CPU features - setup_paca() looks at early_cpu_has_feature(CPU_FTR_HVMODE) There's no generic kill switch for kcov (as far as I can tell), and we don't want to have to turn off instrumentation in the generic dt parsing code (which lives outside arch/powerpc/) just because we don't have a real paca or task yet. So: - change the test when setting up a PACA to consider the actual value of the MSR rather than the CPU feature. - move the PACA setup to before the cpu feature parsing. Translations get switched on once we leave early_setup, so I think we'd already catch any other cases where the PACA or task aren't set up. Boot tested on a P9 guest and host. Fixes: fb0b0a73b223 ("powerpc: Enable kcov") Cc: Andrew Donnellan Suggested-by: Michael Ellerman Signed-off-by: Daniel Axtens --- Regarding moving the comment about printk()-safety: I am about 75% sure that the thing that makes printk() safe is the PACA, not the CPU features. That's what commit 24d9649574fb ("[POWERPC] Document when printk is useable") seems to indicate, but as someone wise recently told me, "bootstrapping is hard", so I may be totally wrong. v3: Update comment, thanks Christophe Leroy. Remove a comment in dt_cpu_ftrs.c that is no longer accurate - thanks Andrew. I think we want to retain all the code still, but I'm open to being told otherwise. --- arch/powerpc/kernel/dt_cpu_ftrs.c | 1 - arch/powerpc/kernel/paca.c| 2 +- arch/powerpc/kernel/setup_64.c| 20 +++- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 182b4047c1ef..36bc0d5c4f3a 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -139,7 +139,6 @@ static void __init cpufeatures_setup_cpu(void) /* Initialize the base environment -- clear FSCR/HFSCR. */ hv_mode = !!(mfmsr() & MSR_HV); if (hv_mode) { - /* CPU_FTR_HVMODE is used early in PACA setup */ cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; mtspr(SPRN_HFSCR, 0); } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 949eceb254d8..347e947b9d4b 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -218,7 +218,7 @@ void setup_paca(struct paca_struct *new_paca) * if we do a GET_PACA() before the feature fixups have been * applied */ - if (early_cpu_has_feature(CPU_FTR_HVMODE)) + if (mfmsr() & MSR_HV) mtspr(SPRN_SPRG_HPACA, local_paca); #endif mtspr(SPRN_SPRG_PACA, local_paca); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e05e6dd67ae6..2259da8e8685 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -285,18 +285,28 @@ void __init
Re: [PATCH v3] powerpc/64s/pgtable: fix an undefined behaviour
Le 06/03/2020 à 05:48, Qian Cai a écrit : Booting a power9 server with hash MMU could trigger an undefined behaviour because pud_offset(p4d, 0) will do, 0 >> (PAGE_SHIFT:16 + PTE_INDEX_SIZE:8 + H_PMD_INDEX_SIZE:10) Fix it by converting pud_index() and friends to static inline functions. UBSAN: shift-out-of-bounds in arch/powerpc/mm/ptdump/ptdump.c:282:15 shift exponent 34 is too large for 32-bit type 'int' CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4-next-20200303+ #13 Call Trace: dump_stack+0xf4/0x164 (unreliable) ubsan_epilogue+0x18/0x78 __ubsan_handle_shift_out_of_bounds+0x160/0x21c walk_pagetables+0x2cc/0x700 walk_pud at arch/powerpc/mm/ptdump/ptdump.c:282 (inlined by) walk_pagetables at arch/powerpc/mm/ptdump/ptdump.c:311 ptdump_check_wx+0x8c/0xf0 mark_rodata_ro+0x48/0x80 kernel_init+0x74/0x194 ret_from_kernel_thread+0x5c/0x74 Suggested-by: Christophe Leroy Signed-off-by: Qian Cai Reviewed-by: Christophe Leroy --- v3: convert pud_index() etc to static inline functions. v2: convert pud_offset() etc to static inline functions. arch/powerpc/include/asm/book3s/64/pgtable.h | 23 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 201a69e6a355..bd432c6706b9 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -998,10 +998,25 @@ extern struct page *pgd_page(pgd_t pgd); #define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) #define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS) -#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) -#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) -#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) -#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) +static inline unsigned long pgd_index(unsigned long address) +{ + return (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1); +} + +static inline unsigned long pud_index(unsigned long address) +{ + return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); +} + +static inline unsigned long pmd_index(unsigned long address) +{ + return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); +} + +static inline unsigned long pte_index(unsigned long address) +{ + return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); +} /* * Find an entry in a page-table-directory. We combine the address region
[PATCH v4 5/6] powerpc/fsl_booke/64: clear the original kernel if randomized
The original kernel still exists in the memory, clear it now. Signed-off-by: Jason Yan Cc: Scott Wood Cc: Diana Craciun Cc: Michael Ellerman Cc: Christophe Leroy Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Nicholas Piggin Cc: Kees Cook --- arch/powerpc/mm/nohash/kaslr_booke.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index bf60f956dc91..f7ab97aa2127 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -379,8 +379,10 @@ notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size) unsigned long kernel_sz; if (IS_ENABLED(CONFIG_PPC64)) { - if (__run_at_load == 1) + if (__run_at_load == 1) { + kaslr_late_init(); return; + } /* Setup flat device-tree pointer */ initial_boot_params = dt_ptr; -- 2.17.2
[PATCH v4 4/6] powerpc/fsl_booke/64: do not clear the BSS for the second pass
The BSS section has already cleared out in the first pass. No need to clear it again. This can save some time when booting with KASLR enabled. Signed-off-by: Jason Yan Cc: Scott Wood Cc: Diana Craciun Cc: Michael Ellerman Cc: Christophe Leroy Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Nicholas Piggin Cc: Kees Cook --- arch/powerpc/kernel/head_64.S | 7 +++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 454129a3c259..9354c292b709 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -913,6 +913,13 @@ start_here_multiplatform: bl relative_toc tovirt(r2,r2) + /* Do not clear the BSS for the second pass if randomized */ + LOAD_REG_ADDR(r3, kernstart_virt_addr) + ld r3,0(r3) + LOAD_REG_IMMEDIATE(r4, KERNELBASE) + cmpdr3,r4 + bne 4f + /* Clear out the BSS. It may have been done in prom_init, * already but that's irrelevant since prom_init will soon * be detached from the kernel completely. Besides, we need -- 2.17.2
[PATCH v4 6/6] powerpc/fsl_booke/kaslr: rename kaslr-booke32.rst to kaslr-booke.rst and add 64bit part
Now we support both 32 and 64 bit KASLR for fsl booke. Add document for 64 bit part and rename kaslr-booke32.rst to kaslr-booke.rst. Signed-off-by: Jason Yan Cc: Scott Wood Cc: Diana Craciun Cc: Michael Ellerman Cc: Christophe Leroy Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Nicholas Piggin Cc: Kees Cook --- Documentation/powerpc/index.rst | 2 +- .../{kaslr-booke32.rst => kaslr-booke.rst}| 35 --- 2 files changed, 32 insertions(+), 5 deletions(-) rename Documentation/powerpc/{kaslr-booke32.rst => kaslr-booke.rst} (59%) diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst index 0d45f0fc8e57..3bad36943b22 100644 --- a/Documentation/powerpc/index.rst +++ b/Documentation/powerpc/index.rst @@ -20,7 +20,7 @@ powerpc hvcs imc isa-versions -kaslr-booke32 +kaslr-booke mpc52xx papr_hcalls pci_iov_resource_on_powernv diff --git a/Documentation/powerpc/kaslr-booke32.rst b/Documentation/powerpc/kaslr-booke.rst similarity index 59% rename from Documentation/powerpc/kaslr-booke32.rst rename to Documentation/powerpc/kaslr-booke.rst index 8b259fdfdf03..42121fed8249 100644 --- a/Documentation/powerpc/kaslr-booke32.rst +++ b/Documentation/powerpc/kaslr-booke.rst @@ -1,15 +1,18 @@ .. SPDX-License-Identifier: GPL-2.0 -=== -KASLR for Freescale BookE32 -=== += +KASLR for Freescale BookE += The word KASLR stands for Kernel Address Space Layout Randomization. This document tries to explain the implementation of the KASLR for -Freescale BookE32. KASLR is a security feature that deters exploit +Freescale BookE. KASLR is a security feature that deters exploit attempts relying on knowledge of the location of kernel internals. +KASLR for Freescale BookE32 +- + Since CONFIG_RELOCATABLE has already supported, what we need to do is map or copy kernel to a proper place and relocate. Freescale Book-E parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1 @@ -38,5 +41,29 @@ bit of the entropy to decide the index of the 64M zone. Then we chose a kernstart_virt_addr + +KASLR for Freescale BookE64 +--- + +The implementation for Freescale BookE64 is similar as BookE32. One +difference is that Freescale BookE64 set up a TLB mapping of 1G during +booting. Another difference is that ppc64 needs the kernel to be +64K-aligned. So we can randomize the kernel in this 1G mapping and make +it 64K-aligned. This can save some code to creat another TLB map at early +boot. The disadvantage is that we only have about 1G/64K = 16384 slots to +put the kernel in:: + +KERNELBASE + + 64K |--> kernel <--| + | | | ++--+--+--++--+--+--+--+--+--+--+--+--++--+--+ +| | | || | | | | | | | | || | | ++--+--+--++--+--+--+--+--+--+--+--+--++--+--+ +| |1G +|-> offset<-| + + kernstart_virt_addr + To enable KASLR, set CONFIG_RANDOMIZE_BASE = y. If KASLR is enable and you want to disable it at runtime, add "nokaslr" to the kernel cmdline. -- 2.17.2
[PATCH v4 2/6] powerpc/fsl_booke/64: introduce reloc_kernel_entry() helper
Like the 32bit code, we introduce reloc_kernel_entry() helper to prepare for the KASLR 64bit version. And move the C declaration of this function out of CONFIG_PPC32 and use long instead of int for the parameter 'addr'. Signed-off-by: Jason Yan Cc: Scott Wood Cc: Diana Craciun Cc: Michael Ellerman Cc: Christophe Leroy Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Nicholas Piggin Cc: Kees Cook Reviewed-by: Christophe Leroy --- arch/powerpc/kernel/exceptions-64e.S | 13 + arch/powerpc/mm/mmu_decl.h | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index e4076e3c072d..1b9b174bee86 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1679,3 +1679,16 @@ _GLOBAL(setup_ehv_ivors) _GLOBAL(setup_lrat_ivor) SET_IVOR(42, 0x340) /* LRAT Error */ blr + +/* + * Return to the start of the relocated kernel and run again + * r3 - virtual address of fdt + * r4 - entry of the kernel + */ +_GLOBAL(reloc_kernel_entry) + mfmsr r7 + rlwinm r7, r7, 0, ~(MSR_IS | MSR_DS) + + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r7 + rfi diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 7097e07a209a..605129b5ccdf 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -140,9 +140,10 @@ extern void adjust_total_lowmem(void); extern int switch_to_as1(void); extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu); void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys); -void reloc_kernel_entry(void *fdt, int addr); extern int is_second_reloc; #endif + +void reloc_kernel_entry(void *fdt, long addr); extern void loadcam_entry(unsigned int index); extern void loadcam_multi(int first_idx, int num, int tmp_idx); -- 2.17.2
[PATCH v4 3/6] powerpc/fsl_booke/64: implement KASLR for fsl_booke64
The implementation for Freescale BookE64 is similar as BookE32. One difference is that Freescale BookE64 set up a TLB mapping of 1G during booting. Another difference is that ppc64 needs the kernel to be 64K-aligned. So we can randomize the kernel in this 1G mapping and make it 64K-aligned. This can save some code to creat another TLB map at early boot. The disadvantage is that we only have about 1G/64K = 16384 slots to put the kernel in. To support secondary cpu boot up, a variable __kaslr_offset was added in first_256B section. This can help secondary cpu get the kaslr offset before the 1:1 mapping has been setup. Signed-off-by: Jason Yan Cc: Scott Wood Cc: Diana Craciun Cc: Michael Ellerman Cc: Christophe Leroy Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Nicholas Piggin Cc: Kees Cook --- arch/powerpc/Kconfig | 2 +- arch/powerpc/kernel/exceptions-64e.S | 10 arch/powerpc/kernel/head_64.S| 6 +++ arch/powerpc/kernel/setup_64.c | 3 ++ arch/powerpc/mm/mmu_decl.h | 20 arch/powerpc/mm/nohash/kaslr_booke.c | 72 +++- 6 files changed, 80 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 497b7d0b2d7e..0c76601fdd59 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -564,7 +564,7 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" - depends on (FSL_BOOKE && FLATMEM && PPC32) + depends on (PPC_FSL_BOOK3E && FLATMEM) depends on RELOCATABLE help Randomizes the virtual address at which the kernel image is diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 1b9b174bee86..260cf1f1e71c 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1378,6 +1378,7 @@ skpinv: addir6,r6,1 /* Increment */ 1: mflrr6 addir6,r6,(2f - 1b) tovirt(r6,r6) + add r6,r6,r19 lis r7,MSR_KERNEL@h ori r7,r7,MSR_KERNEL@l mtspr SPRN_SRR0,r6 @@ -1400,6 +1401,7 @@ skpinv: addir6,r6,1 /* Increment */ /* We translate LR and return */ tovirt(r8,r8) + add r8,r8,r19 mtlrr8 blr @@ -1528,6 +1530,7 @@ a2_tlbinit_code_end: */ _GLOBAL(start_initialization_book3e) mflrr28 + li r19, 0 /* First, we need to setup some initial TLBs to map the kernel * text, data and bss at PAGE_OFFSET. We don't have a real mode @@ -1570,6 +1573,12 @@ _GLOBAL(book3e_secondary_core_init) cmplwi r4,0 bne 2f + li r19, 0 +#ifdef CONFIG_RANDOMIZE_BASE + LOAD_REG_ADDR_PIC(r19, __kaslr_offset) + ld r19,0(r19) + rlwinm r19,r19,0,0,5 +#endif /* Setup TLB for this core */ bl initial_tlb_book3e @@ -1602,6 +1611,7 @@ _GLOBAL(book3e_secondary_core_init) lis r3,PAGE_OFFSET@highest sldir3,r3,32 or r28,r28,r3 + add r28,r28,r19 1: mtlrr28 blr diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ad79fddb974d..454129a3c259 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -104,6 +104,12 @@ __secondary_hold_acknowledge: .8byte 0x0 #ifdef CONFIG_RELOCATABLE +#ifdef CONFIG_RANDOMIZE_BASE + .globl __kaslr_offset +__kaslr_offset: + .8byte 0x0 +#endif + /* This flag is set to 1 by a loader if the kernel should run * at the loaded address instead of the linked address. This * is used by kexec-tools to keep the the kdump kernel in the diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e05e6dd67ae6..836e202dfd5b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -67,6 +67,7 @@ #include #include +#include #include "setup.h" int spinning_secondaries; @@ -300,6 +301,8 @@ void __init early_setup(unsigned long dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); + kaslr_early_init(__va(dt_ptr), 0); + udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr); /* diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 605129b5ccdf..6efbd7fd88a4 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -139,22 +139,16 @@ extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, extern void adjust_total_lowmem(void); extern int switch_to_as1(void); extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu); +#endif void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys); extern int is_second_reloc; -#endif +extern unsigned long __kaslr_offset; +extern unsigned int
[PATCH v4 0/6] implement KASLR for powerpc/fsl_booke/64
This is a try to implement KASLR for Freescale BookE64 which is based on my earlier implementation for Freescale BookE32: https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=131718=* The implementation for Freescale BookE64 is similar as BookE32. One difference is that Freescale BookE64 set up a TLB mapping of 1G during booting. Another difference is that ppc64 needs the kernel to be 64K-aligned. So we can randomize the kernel in this 1G mapping and make it 64K-aligned. This can save some code to creat another TLB map at early boot. The disadvantage is that we only have about 1G/64K = 16384 slots to put the kernel in. KERNELBASE 64K |--> kernel <--| | | | +--+--+--++--+--+--+--+--+--+--+--+--++--+--+ | | | || | | | | | | | | || | | +--+--+--++--+--+--+--+--+--+--+--+--++--+--+ | |1G |-> offset<-| kernstart_virt_addr I'm not sure if the slot numbers is enough or the design has any defects. If you have some better ideas, I would be happy to hear that. Thank you all. v3->v4: Do not define __kaslr_offset as a fixed symbol. Reference __run_at_load and __kaslr_offset by symbol instead of magic offsets. Use IS_ENABLED(CONFIG_PPC32) instead of #ifdef CONFIG_PPC32. Change kaslr-booke32 to kaslr-booke in index.rst Switch some instructions to 64-bit. v2->v3: Fix build error when KASLR is disabled. v1->v2: Add __kaslr_offset for the secondary cpu boot up. Jason Yan (6): powerpc/fsl_booke/kaslr: refactor kaslr_legal_offset() and kaslr_early_init() powerpc/fsl_booke/64: introduce reloc_kernel_entry() helper powerpc/fsl_booke/64: implement KASLR for fsl_booke64 powerpc/fsl_booke/64: do not clear the BSS for the second pass powerpc/fsl_booke/64: clear the original kernel if randomized powerpc/fsl_booke/kaslr: rename kaslr-booke32.rst to kaslr-booke.rst and add 64bit part Documentation/powerpc/index.rst | 2 +- .../{kaslr-booke32.rst => kaslr-booke.rst}| 35 +++- arch/powerpc/Kconfig | 2 +- arch/powerpc/kernel/exceptions-64e.S | 23 + arch/powerpc/kernel/head_64.S | 13 +++ arch/powerpc/kernel/setup_64.c| 3 + arch/powerpc/mm/mmu_decl.h| 23 ++--- arch/powerpc/mm/nohash/kaslr_booke.c | 88 +-- 8 files changed, 144 insertions(+), 45 deletions(-) rename Documentation/powerpc/{kaslr-booke32.rst => kaslr-booke.rst} (59%) -- 2.17.2
[PATCH v4 1/6] powerpc/fsl_booke/kaslr: refactor kaslr_legal_offset() and kaslr_early_init()
Some code refactor in kaslr_legal_offset() and kaslr_early_init(). No functional change. This is a preparation for KASLR fsl_booke64. Signed-off-by: Jason Yan Cc: Scott Wood Cc: Diana Craciun Cc: Michael Ellerman Cc: Christophe Leroy Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Nicholas Piggin Cc: Kees Cook --- arch/powerpc/mm/nohash/kaslr_booke.c | 34 +++- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 4a75f2d9bf0e..6ebff31fefcc 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -25,6 +25,7 @@ struct regions { unsigned long pa_start; unsigned long pa_end; unsigned long kernel_size; + unsigned long linear_sz; unsigned long dtb_start; unsigned long dtb_end; unsigned long initrd_start; @@ -260,11 +261,23 @@ static __init void get_cell_sizes(const void *fdt, int node, int *addr_cells, *size_cells = fdt32_to_cpu(*prop); } -static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long index, - unsigned long offset) +static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long random) { unsigned long koffset = 0; unsigned long start; + unsigned long index; + unsigned long offset; + + /* +* Decide which 64M we want to start +* Only use the low 8 bits of the random seed +*/ + index = random & 0xFF; + index %= regions.linear_sz / SZ_64M; + + /* Decide offset inside 64M */ + offset = random % (SZ_64M - regions.kernel_size); + offset = round_down(offset, SZ_16K); while ((long)index >= 0) { offset = memstart_addr + index * SZ_64M + offset; @@ -289,10 +302,9 @@ static inline __init bool kaslr_disabled(void) static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size, unsigned long kernel_sz) { - unsigned long offset, random; + unsigned long random; unsigned long ram, linear_sz; u64 seed; - unsigned long index; kaslr_get_cmdline(dt_ptr); if (kaslr_disabled()) @@ -333,22 +345,12 @@ static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size regions.dtb_start = __pa(dt_ptr); regions.dtb_end = __pa(dt_ptr) + fdt_totalsize(dt_ptr); regions.kernel_size = kernel_sz; + regions.linear_sz = linear_sz; get_initrd_range(dt_ptr); get_crash_kernel(dt_ptr, ram); - /* -* Decide which 64M we want to start -* Only use the low 8 bits of the random seed -*/ - index = random & 0xFF; - index %= linear_sz / SZ_64M; - - /* Decide offset inside 64M */ - offset = random % (SZ_64M - kernel_sz); - offset = round_down(offset, SZ_16K); - - return kaslr_legal_offset(dt_ptr, index, offset); + return kaslr_legal_offset(dt_ptr, random); } /* -- 2.17.2
linux-next: manual merge of the akpm tree with the powerpc tree
Hi all, Today's linux-next merge of the akpm tree got conflicts in: arch/powerpc/mm/book3s32/mmu.c arch/powerpc/mm/book3s32/tlb.c arch/powerpc/mm/kasan/kasan_init_32.c arch/powerpc/mm/mem.c arch/powerpc/mm/nohash/40x.c arch/powerpc/mm/pgtable_32.c between commits: 0b1c524caaae ("powerpc/32: refactor pmd_offset(pud_offset(pgd_offset...") 2efc7c085f05 ("powerpc/32: drop get_pteptr()") from the powerpc tree and patch: "powerpc: add support for folded p4d page tables" from the akpm tree. I fixed it up (all the files above no longer need modifiying, and the extra patch is below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. diff --cc arch/powerpc/mm/book3s32/mmu.c index 39ba53ca5bb5,edef17c97206.. --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c diff --cc arch/powerpc/mm/book3s32/tlb.c index dc9039a170aa,175bc33b41b7.. --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c diff --cc arch/powerpc/mm/kasan/kasan_init_32.c index f19526e7d3dc,88e2e16380b5.. --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c diff --cc arch/powerpc/mm/mem.c index 9b4f5fb719e0,8262b384dcf3.. --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c diff --cc arch/powerpc/mm/nohash/40x.c index 82862723ab42,7aaf7155e350.. --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c diff --cc arch/powerpc/mm/pgtable_32.c index f62de06e3d07,5774d4bc94d0.. --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index df7f63d37b74..74f890d6218c 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -44,12 +44,12 @@ struct mm_struct; #ifdef CONFIG_PPC32 static inline pmd_t *pmd_ptr(struct mm_struct *mm, unsigned long va) { - return pmd_offset(pud_offset(pgd_offset(mm, va), va), va); + return pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, va), va) , va), va); } static inline pmd_t *pmd_ptr_k(unsigned long va) { - return pmd_offset(pud_offset(pgd_offset_k(va), va), va); + return pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va) , va), va); } static inline pte_t *virt_to_kpte(unsigned long vaddr) -- Cheers, Stephen Rothwell Just to be clear, below is the new version of "powerpc: add support for folded p4d page tables" (I am not sure it is completely correct, but ppc64_defconfig, 32 and 64 bit allnoconfig, allyesconfig, ppc44x_defconfig and pseries_le_defconfig all build without new warnings): From: Mike Rapoport Date: Wed, 4 Mar 2020 22:32:05 +1100 Subject: [PATCH] powerpc: add support for folded p4d page tables Implement primitives necessary for the 4th level folding, add walks of p4d level where appropriate and replace 5level-fixup.h with pgtable-nop4d.h. Link: http://lkml.kernel.org/r/20200227084608.18223-9-r...@kernel.org Signed-off-by: Mike Rapoport Tested-by: Christophe Leroy[8xx and 83xx] Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Brian Cain Cc: Catalin Marinas Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Guan Xuetao Cc: James Morse Cc: Jonas Bonn Cc: Julien Thierry Cc: Ley Foon Tan Cc: Marc Zyngier Cc: Michael Ellerman Cc: Paul Mackerras Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Suzuki K Poulose Cc: Tony Luck Cc: Will Deacon Cc: Yoshinori Sato Cc: Anders Roxell Cc: Anshuman Khandual Cc: Naresh Kamboju Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/powerpc/include/asm/book3s/32/pgtable.h | 1 - arch/powerpc/include/asm/book3s/64/hash.h | 4 +- arch/powerpc/include/asm/book3s/64/pgalloc.h | 4 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 60 ++- arch/powerpc/include/asm/book3s/64/radix.h| 6 +- arch/powerpc/include/asm/nohash/32/pgtable.h | 1 - arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 +- .../include/asm/nohash/64/pgtable-4k.h| 32 +- arch/powerpc/include/asm/nohash/64/pgtable.h | 6 +- arch/powerpc/include/asm/pgtable.h| 10 ++-- arch/powerpc/kvm/book3s_64_mmu_radix.c| 30 ++ arch/powerpc/lib/code-patching.c | 7 ++- arch/powerpc/mm/book3s64/hash_pgtable.c | 4 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 26 +--- arch/powerpc/mm/book3s64/subpage_prot.c | 6 +- arch/powerpc/mm/hugetlbpage.c | 28 + arch/powerpc/mm/nohash/book3e_pgtable.c | 15 ++--- arch/powerpc/mm/pgtable.c | 30
linux-next: manual merge of the akpm tree with the powerpc tree
Hi all, Today's linux-next merge of the akpm tree got a conflict in: arch/powerpc/mm/pgtable_32.c between commit: 2efc7c085f05 ("powerpc/32: drop get_pteptr()") from the powerpc tree and patch: "powerpc/32: drop get_pteptr()" from the akpm tree. I fixed it up (I just dropped the latter version) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell pgpF8g8zJblRL.pgp Description: OpenPGP digital signature
[powerpc:merge] BUILD SUCCESS ab326587bb5fb91cc97df9b9f48e9e1469f04621
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git merge branch HEAD: ab326587bb5fb91cc97df9b9f48e9e1469f04621 Automatic merge of branches 'master', 'next' and 'fixes' into merge elapsed time: 1023m configs tested: 175 configs skipped: 0 The following configs have been built successfully. More configs may be tested in the coming days. arm64allyesconfig arm allyesconfig arm64allmodconfig arm64 allnoconfig arm allnoconfig arm at91_dt_defconfig arm efm32_defconfig arm exynos_defconfig armmulti_v5_defconfig armmulti_v7_defconfig armshmobile_defconfig arm sunxi_defconfig arm64 defconfig sparcallyesconfig mips allnoconfig s390 alldefconfig ia64defconfig i386 allnoconfig i386 alldefconfig i386 allyesconfig i386defconfig ia64 alldefconfig ia64 allmodconfig ia64 allnoconfig ia64 allyesconfig arm allmodconfig nios2 3c120_defconfig nios2 10m50_defconfig c6xevmc6678_defconfig xtensa iss_defconfig c6x allyesconfig xtensa common_defconfig openrisc simple_smp_defconfig openriscor1ksim_defconfig alpha defconfig cskydefconfig nds32 allnoconfig nds32 defconfig h8300 edosk2674_defconfig h8300h8300h-sim_defconfig h8300 h8s-sim_defconfig m68k allmodconfig m68k m5475evb_defconfig m68k multi_defconfig m68k sun3_defconfig arc allyesconfig arc defconfig microblaze mmu_defconfig microblazenommu_defconfig powerpc allnoconfig powerpc defconfig powerpc ppc64_defconfig powerpc rhel-kconfig mips 32r2_defconfig mips 64r6el_defconfig mips allmodconfig mips allyesconfig mips fuloong2e_defconfig mips malta_kvm_defconfig pariscallnoconfig parisc allyesconfig pariscgeneric-32bit_defconfig pariscgeneric-64bit_defconfig x86_64 randconfig-a001-20200305 x86_64 randconfig-a002-20200305 x86_64 randconfig-a003-20200305 i386 randconfig-a001-20200305 i386 randconfig-a002-20200305 i386 randconfig-a003-20200305 alpharandconfig-a001-20200305 m68k randconfig-a001-20200305 mips randconfig-a001-20200305 nds32randconfig-a001-20200305 parisc randconfig-a001-20200305 riscvrandconfig-a001-20200305 c6x randconfig-a001-20200305 h8300randconfig-a001-20200305 microblaze randconfig-a001-20200305 nios2randconfig-a001-20200305 sparc64 randconfig-a001-20200305 csky randconfig-a001-20200305 openrisc randconfig-a001-20200305 s390 randconfig-a001-20200305 sh randconfig-a001-20200305 xtensa randconfig-a001-20200305 x86_64 randconfig-b002-20200306 x86_64 randconfig-b001-20200306 i386 randconfig-b001-20200306 i386 randconfig-b003-20200306 i386 randconfig-b002-20200306 x86_64 randconfig-b003-20200306 i386 randconfig-c002-20200306 i386 randconfig-c001-20200306 x86_64 randconfig-c003-20200306 x86_64 randconfig-c002-20200306 i386 randconfig-c003-20200306 x86_64 randconfig-c001-20200306 x86_64 randconfig-d001-20200305 x86_64 randconfig-d002-20200305 x86_64 randconfig-d003-20200305 i386
[powerpc:fixes-test] BUILD SUCCESS 59bee45b9712c759ea4d3dcc4eff1752f3a66558
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes-test branch HEAD: 59bee45b9712c759ea4d3dcc4eff1752f3a66558 powerpc/mm: Fix missing KUAP disable in flush_coherent_icache() elapsed time: 1371m configs tested: 206 configs skipped: 207 The following configs have been built successfully. More configs may be tested in the coming days. arm allmodconfig arm allnoconfig arm allyesconfig arm64allmodconfig arm64 allnoconfig arm64allyesconfig arm at91_dt_defconfig arm efm32_defconfig arm exynos_defconfig armmulti_v5_defconfig armmulti_v7_defconfig armshmobile_defconfig arm sunxi_defconfig arm64 defconfig sparcallyesconfig sh allmodconfig ia64defconfig sh sh7785lcr_32bit_defconfig ia64 allnoconfig h8300h8300h-sim_defconfig pariscallnoconfig s390defconfig sparc64 allyesconfig powerpc defconfig i386 allnoconfig i386 alldefconfig i386 allyesconfig i386defconfig ia64 alldefconfig ia64 allmodconfig ia64 allyesconfig c6x allyesconfig c6xevmc6678_defconfig nios2 10m50_defconfig nios2 3c120_defconfig openriscor1ksim_defconfig openrisc simple_smp_defconfig xtensa common_defconfig xtensa iss_defconfig alpha defconfig cskydefconfig nds32 allnoconfig nds32 defconfig h8300 edosk2674_defconfig h8300 h8s-sim_defconfig m68k allmodconfig m68k m5475evb_defconfig m68k multi_defconfig m68k sun3_defconfig arc allyesconfig arc defconfig microblaze mmu_defconfig microblazenommu_defconfig powerpc allnoconfig powerpc ppc64_defconfig powerpc rhel-kconfig mips 32r2_defconfig mips 64r6el_defconfig mips allmodconfig mips allnoconfig mips allyesconfig mips fuloong2e_defconfig mips malta_kvm_defconfig parisc allyesconfig pariscgeneric-32bit_defconfig pariscgeneric-64bit_defconfig x86_64 randconfig-a001-20200306 x86_64 randconfig-a002-20200306 x86_64 randconfig-a003-20200306 i386 randconfig-a001-20200306 i386 randconfig-a002-20200306 i386 randconfig-a003-20200306 alpharandconfig-a001-20200306 m68k randconfig-a001-20200306 mips randconfig-a001-20200306 nds32randconfig-a001-20200306 parisc randconfig-a001-20200306 riscvrandconfig-a001-20200306 alpharandconfig-a001-20200305 m68k randconfig-a001-20200305 mips randconfig-a001-20200305 nds32randconfig-a001-20200305 parisc randconfig-a001-20200305 riscvrandconfig-a001-20200305 c6x randconfig-a001-20200305 h8300randconfig-a001-20200305 microblaze randconfig-a001-20200305 nios2randconfig-a001-20200305 sparc64 randconfig-a001-20200305 csky randconfig-a001-20200305 openrisc randconfig-a001-20200305 s390 randconfig-a001-20200305 sh randconfig-a001-20200305 xtensa randconfig-a001-20200305 x86_64 randconfig-b001-20200306 x86_64 randconfig-b002-20200306 x86_64 randconfig-b003-20200306 i386 randconfig-b001-20200306 i386 randconfig-b002-20200306 i386 randconfig-b003-20200306 x86_64
[powerpc:next-test] BUILD SUCCESS 5c61987c29055c619e116977c7d5db772d0f5239
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next-test branch HEAD: 5c61987c29055c619e116977c7d5db772d0f5239 powerpc/lib: Fix emulate_step() std test elapsed time: 1022m configs tested: 176 configs skipped: 11 The following configs have been built successfully. More configs may be tested in the coming days. arm64allyesconfig arm allyesconfig arm64 allnoconfig arm allnoconfig arm allmodconfig arm64allmodconfig arm at91_dt_defconfig arm efm32_defconfig arm exynos_defconfig armmulti_v5_defconfig armmulti_v7_defconfig armshmobile_defconfig arm sunxi_defconfig arm64 defconfig sparcallyesconfig sparc64 allnoconfig ia64defconfig powerpc defconfig m68k m5475evb_defconfig parisc allyesconfig i386 allnoconfig i386 alldefconfig i386 allyesconfig i386defconfig ia64 alldefconfig ia64 allmodconfig ia64 allnoconfig ia64 allyesconfig c6x allyesconfig c6xevmc6678_defconfig nios2 10m50_defconfig nios2 3c120_defconfig openriscor1ksim_defconfig openrisc simple_smp_defconfig xtensa common_defconfig xtensa iss_defconfig alpha defconfig cskydefconfig nds32 allnoconfig nds32 defconfig h8300 edosk2674_defconfig h8300h8300h-sim_defconfig h8300 h8s-sim_defconfig m68k allmodconfig m68k multi_defconfig m68k sun3_defconfig arc allyesconfig arc defconfig microblaze mmu_defconfig microblazenommu_defconfig powerpc allnoconfig powerpc ppc64_defconfig powerpc rhel-kconfig mips 32r2_defconfig mips 64r6el_defconfig mips allmodconfig mips allnoconfig mips allyesconfig mips fuloong2e_defconfig mips malta_kvm_defconfig pariscallnoconfig pariscgeneric-32bit_defconfig pariscgeneric-64bit_defconfig x86_64 randconfig-a001-20200305 x86_64 randconfig-a002-20200305 x86_64 randconfig-a003-20200305 i386 randconfig-a001-20200305 i386 randconfig-a002-20200305 i386 randconfig-a003-20200305 x86_64 randconfig-a001-20200306 x86_64 randconfig-a002-20200306 x86_64 randconfig-a003-20200306 i386 randconfig-a001-20200306 i386 randconfig-a002-20200306 i386 randconfig-a003-20200306 alpharandconfig-a001-20200305 m68k randconfig-a001-20200305 mips randconfig-a001-20200305 nds32randconfig-a001-20200305 parisc randconfig-a001-20200305 riscvrandconfig-a001-20200305 c6x randconfig-a001-20200305 h8300randconfig-a001-20200305 microblaze randconfig-a001-20200305 nios2randconfig-a001-20200305 sparc64 randconfig-a001-20200305 csky randconfig-a001-20200305 openrisc randconfig-a001-20200305 s390 randconfig-a001-20200305 sh randconfig-a001-20200305 xtensa randconfig-a001-20200305 x86_64 randconfig-b001-20200305 x86_64 randconfig-b002-20200305 x86_64 randconfig-b003-20200305 i386 randconfig-b001-20200305 i386 randconfig-b002-20200305 i386 randconfig-b003-20200305 x86_64 randconfig-c001-20200306 x86_64 randconfig-c002-20200306 x86_64 randconfig-c003-20200306 i386 randconfig-c001-20200306
[powerpc:next] BUILD SUCCESS 247257b03b04398ca07da4bce3d17bee25d623cb
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next branch HEAD: 247257b03b04398ca07da4bce3d17bee25d623cb powerpc/numa: Remove late request for home node associativity elapsed time: 1022m configs tested: 230 configs skipped: 12 The following configs have been built successfully. More configs may be tested in the coming days. arm allmodconfig arm allnoconfig arm allyesconfig arm64allmodconfig arm64 allnoconfig arm64allyesconfig arm at91_dt_defconfig arm efm32_defconfig arm exynos_defconfig armmulti_v5_defconfig armmulti_v7_defconfig armshmobile_defconfig arm sunxi_defconfig arm64 defconfig sparcallyesconfig microblazenommu_defconfig mips allnoconfig ia64defconfig h8300h8300h-sim_defconfig pariscallnoconfig s390defconfig powerpc defconfig m68k m5475evb_defconfig parisc allyesconfig i386 allnoconfig i386 allyesconfig i386 alldefconfig i386defconfig ia64 alldefconfig ia64 allmodconfig ia64 allnoconfig ia64 allyesconfig nios2 3c120_defconfig nios2 10m50_defconfig c6xevmc6678_defconfig xtensa iss_defconfig c6x allyesconfig xtensa common_defconfig openrisc simple_smp_defconfig openriscor1ksim_defconfig alpha defconfig cskydefconfig nds32 allnoconfig nds32 defconfig h8300 edosk2674_defconfig h8300 h8s-sim_defconfig m68k allmodconfig m68k multi_defconfig m68k sun3_defconfig arc allyesconfig arc defconfig microblaze mmu_defconfig powerpc allnoconfig powerpc ppc64_defconfig powerpc rhel-kconfig mips 32r2_defconfig mips 64r6el_defconfig mips allmodconfig mips allyesconfig mips fuloong2e_defconfig mips malta_kvm_defconfig pariscgeneric-32bit_defconfig pariscgeneric-64bit_defconfig x86_64 randconfig-a001-20200305 x86_64 randconfig-a002-20200305 x86_64 randconfig-a003-20200305 i386 randconfig-a001-20200305 i386 randconfig-a002-20200305 i386 randconfig-a003-20200305 x86_64 randconfig-a001-20200306 x86_64 randconfig-a002-20200306 x86_64 randconfig-a003-20200306 i386 randconfig-a001-20200306 i386 randconfig-a002-20200306 i386 randconfig-a003-20200306 alpharandconfig-a001-20200305 m68k randconfig-a001-20200305 mips randconfig-a001-20200305 nds32randconfig-a001-20200305 parisc randconfig-a001-20200305 riscvrandconfig-a001-20200305 alpharandconfig-a001-20200306 m68k randconfig-a001-20200306 mips randconfig-a001-20200306 nds32randconfig-a001-20200306 parisc randconfig-a001-20200306 riscvrandconfig-a001-20200306 c6x randconfig-a001-20200305 h8300randconfig-a001-20200305 microblaze randconfig-a001-20200305 nios2randconfig-a001-20200305 sparc64 randconfig-a001-20200305 csky randconfig-a001-20200305 openrisc randconfig-a001-20200305 s390 randconfig-a001-20200305 sh randconfig-a001-20200305 xtensa randconfig-a001-20200305 x86_64 randconfig-b001-20200305 x86_64 randconfig-b002-20200305 x86_64 randconfig-b003-20200305 i386 randconfig
Re: [PATCH v3 6/8] perf/tools: Enhance JSON/metric infrastructure to handle "?"
On 3/2/20 8:38 PM, Jiri Olsa wrote: > On Sat, Feb 29, 2020 at 03:11:57PM +0530, Kajol Jain wrote: > > SNIP > >> #define PVR_VER(pvr)(((pvr) >> 16) & 0x) /* Version field */ >> #define PVR_REV(pvr)(((pvr) >> 0) & 0x) /* Revison field */ >> >> +#define SOCKETS_INFO_FILE_PATH "/devices/hv_24x7/interface/" >> + >> int >> get_cpuid(char *buffer, size_t sz) >> { >> @@ -44,3 +51,43 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused) >> >> return bufp; >> } >> + >> +int arch_get_runtimeparam(void) >> +{ >> +int count = 0; >> +DIR *dir; >> +char path[PATH_MAX]; >> +const char *sysfs = sysfs__mountpoint(); >> +char filename[] = "sockets"; >> +FILE *file; >> +char buf[16], *num; >> +int data; >> + >> +if (!sysfs) >> +goto out; >> + >> +snprintf(path, PATH_MAX, >> + "%s" SOCKETS_INFO_FILE_PATH, sysfs); >> +dir = opendir(path); >> + >> +if (!dir) >> +goto out; >> + >> +strcat(path, filename); >> +file = fopen(path, "r"); >> + >> +if (!file) >> +goto out; >> + >> +data = fread(buf, 1, sizeof(buf), file); >> + >> +if (data == 0) >> +goto out; >> + >> +count = strtol(buf, , 10); >> +out: >> +if (!count) >> +count = 1; >> + >> +return count; > > we have sysfs__read_ull for this > Hi Jiri, Thanks for suggesting it. Will update. Kajol > jirka >
Re: [PATCH -next v2] powerpc/64s/pgtable: fix an undefined behaviour
> On Mar 5, 2020, at 2:22 PM, Christophe Leroy wrote: > > > > Le 05/03/2020 à 15:32, Qian Cai a écrit : >> Booting a power9 server with hash MMU could trigger an undefined >> behaviour because pud_offset(p4d, 0) will do, >> 0 >> (PAGE_SHIFT:16 + PTE_INDEX_SIZE:8 + H_PMD_INDEX_SIZE:10) >> Fix it by converting pud_offset() and friends to static inline >> functions. > > I was suggesting to convert pud_index() to static inline, because that's > where the shift sits. Is it not possible ? > > Here you seems to fix the problem for now, but if someone reuses pud_index() > in another macro one day, the same problem may happen again. > Sounds reasonable. I send out a v3, https://lore.kernel.org/lkml/20200306044852.3236-1-...@lca.pw/T/#u > Christophe > >> UBSAN: shift-out-of-bounds in arch/powerpc/mm/ptdump/ptdump.c:282:15 >> shift exponent 34 is too large for 32-bit type 'int' >> CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4-next-20200303+ #13 >> Call Trace: >> dump_stack+0xf4/0x164 (unreliable) >> ubsan_epilogue+0x18/0x78 >> __ubsan_handle_shift_out_of_bounds+0x160/0x21c >> walk_pagetables+0x2cc/0x700 >> walk_pud at arch/powerpc/mm/ptdump/ptdump.c:282 >> (inlined by) walk_pagetables at arch/powerpc/mm/ptdump/ptdump.c:311 >> ptdump_check_wx+0x8c/0xf0 >> mark_rodata_ro+0x48/0x80 >> kernel_init+0x74/0x194 >> ret_from_kernel_thread+0x5c/0x74 >> Suggested-by: Christophe Leroy >> Signed-off-by: Qian Cai >> --- >> arch/powerpc/include/asm/book3s/64/pgtable.h | 20 ++-- >> 1 file changed, 14 insertions(+), 6 deletions(-) >> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h >> b/arch/powerpc/include/asm/book3s/64/pgtable.h >> index fa60e8594b9f..4967bc9e25e2 100644 >> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h >> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h >> @@ -1016,12 +1016,20 @@ static inline bool p4d_access_permitted(p4d_t p4d, >> bool write) >>#define pgd_offset(mm, address)((mm)->pgd + pgd_index(address)) >> -#define pud_offset(p4dp, addr) \ >> -(((pud_t *) p4d_page_vaddr(*(p4dp))) + pud_index(addr)) >> -#define pmd_offset(pudp,addr) \ >> -(((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) >> -#define pte_offset_kernel(dir,addr) \ >> -(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) >> +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) >> +{ >> +return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); >> +} >> + >> +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) >> +{ >> +return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); >> +} >> + >> +static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) >> +{ >> +return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); >> +} >>#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) >>
[PATCH v3] powerpc/64s/pgtable: fix an undefined behaviour
Booting a power9 server with hash MMU could trigger an undefined behaviour because pud_offset(p4d, 0) will do, 0 >> (PAGE_SHIFT:16 + PTE_INDEX_SIZE:8 + H_PMD_INDEX_SIZE:10) Fix it by converting pud_index() and friends to static inline functions. UBSAN: shift-out-of-bounds in arch/powerpc/mm/ptdump/ptdump.c:282:15 shift exponent 34 is too large for 32-bit type 'int' CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4-next-20200303+ #13 Call Trace: dump_stack+0xf4/0x164 (unreliable) ubsan_epilogue+0x18/0x78 __ubsan_handle_shift_out_of_bounds+0x160/0x21c walk_pagetables+0x2cc/0x700 walk_pud at arch/powerpc/mm/ptdump/ptdump.c:282 (inlined by) walk_pagetables at arch/powerpc/mm/ptdump/ptdump.c:311 ptdump_check_wx+0x8c/0xf0 mark_rodata_ro+0x48/0x80 kernel_init+0x74/0x194 ret_from_kernel_thread+0x5c/0x74 Suggested-by: Christophe Leroy Signed-off-by: Qian Cai --- v3: convert pud_index() etc to static inline functions. v2: convert pud_offset() etc to static inline functions. arch/powerpc/include/asm/book3s/64/pgtable.h | 23 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 201a69e6a355..bd432c6706b9 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -998,10 +998,25 @@ extern struct page *pgd_page(pgd_t pgd); #define pud_page_vaddr(pud)__va(pud_val(pud) & ~PUD_MASKED_BITS) #define pgd_page_vaddr(pgd)__va(pgd_val(pgd) & ~PGD_MASKED_BITS) -#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) -#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) -#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) -#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) +static inline unsigned long pgd_index(unsigned long address) +{ + return (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1); +} + +static inline unsigned long pud_index(unsigned long address) +{ + return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); +} + +static inline unsigned long pmd_index(unsigned long address) +{ + return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); +} + +static inline unsigned long pte_index(unsigned long address) +{ + return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); +} /* * Find an entry in a page-table-directory. We combine the address region -- 2.21.0 (Apple Git-122.2)
Re: [PATCH v2] powerpc: setup_64: set up PACA earlier to avoid kcov problems
On 6/3/20 3:40 pm, Daniel Axtens wrote: There's some special handling for CPU_FTR_HVMODE in cpufeatures_setup_cpu() in kernel/dt_cpu_ftrs.c: /* Initialize the base environment -- clear FSCR/HFSCR. */ hv_mode = !!(mfmsr() & MSR_HV); if (hv_mode) { /* CPU_FTR_HVMODE is used early in PACA setup */ cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; mtspr(SPRN_HFSCR, 0); } With this patch, the comment about PACA setup I assume is no longer true. It looks like we still rely on hv_mode being set to deal with discrepancies between the device tree and the MSR. This code confuses me. IIUC it sets the CPU feature if we're in HV mode, which will catch the case where the HV bit is set in the MSR but for some reason it's not listed in the DT. With my patch, we'll directly test the MSR so we don't need the cpu feature set for that. However, the CPU feature is tested elsewhere, so I think the correct behaviour is to keep the code but drop the comment. Having said that bootstrapping is hard so lmk if I've misunderstood. That was my thinking too. -- Andrew Donnellan OzLabs, ADL Canberra a...@linux.ibm.com IBM Australia Limited
Re: [PATCH v2] powerpc: setup_64: set up PACA earlier to avoid kcov problems
> There's some special handling for CPU_FTR_HVMODE in > cpufeatures_setup_cpu() in kernel/dt_cpu_ftrs.c: > > /* Initialize the base environment -- clear FSCR/HFSCR. */ > hv_mode = !!(mfmsr() & MSR_HV); > if (hv_mode) { > /* CPU_FTR_HVMODE is used early in PACA setup */ > cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; > mtspr(SPRN_HFSCR, 0); > } > > With this patch, the comment about PACA setup I assume is no longer > true. It looks like we still rely on hv_mode being set to deal with > discrepancies between the device tree and the MSR. This code confuses me. IIUC it sets the CPU feature if we're in HV mode, which will catch the case where the HV bit is set in the MSR but for some reason it's not listed in the DT. With my patch, we'll directly test the MSR so we don't need the cpu feature set for that. However, the CPU feature is tested elsewhere, so I think the correct behaviour is to keep the code but drop the comment. Having said that bootstrapping is hard so lmk if I've misunderstood. Regards, Daniel > > -- > Andrew Donnellan OzLabs, ADL Canberra > a...@linux.ibm.com IBM Australia Limited
Re: [PATCH] powerpc/64s/radix: Fix !SMP build
Thanks Nick, > Signed-off-by: Nicholas Piggin Tested-by: Anton Blanchard > --- > arch/powerpc/mm/book3s64/radix_pgtable.c | 1 + > arch/powerpc/mm/book3s64/radix_tlb.c | 7 ++- > 2 files changed, 7 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c > b/arch/powerpc/mm/book3s64/radix_pgtable.c index > dd1bea45325c..2a9a0cd79490 100644 --- > a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ > b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -26,6 +26,7 @@ > #include > #include > #include > +#include > #include > #include > #include > diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c > b/arch/powerpc/mm/book3s64/radix_tlb.c index > 03f43c924e00..758ade2c2b6e 100644 --- > a/arch/powerpc/mm/book3s64/radix_tlb.c +++ > b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -587,6 +587,11 @@ void > radix__local_flush_all_mm(struct mm_struct *mm) preempt_enable(); > } > EXPORT_SYMBOL(radix__local_flush_all_mm); > + > +static void __flush_all_mm(struct mm_struct *mm, bool fullmm) > +{ > + radix__local_flush_all_mm(mm); > +} > #endif /* CONFIG_SMP */ > > void radix__local_flush_tlb_page_psize(struct mm_struct *mm, > unsigned long vmaddr, @@ -777,7 +782,7 @@ void > radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long > vmaddr) EXPORT_SYMBOL(radix__flush_tlb_page); > #else /* CONFIG_SMP */ > -#define radix__flush_all_mm radix__local_flush_all_mm > +static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } > #endif /* CONFIG_SMP */ > > static void do_tlbiel_kernel(void *info)
Re: [PATCH v2] powerpc/Makefile: Mark phony targets as PHONY
On Fri, Mar 6, 2020 at 9:27 AM Michael Ellerman wrote: > > On Wed, 2020-02-19 at 00:04:34 UTC, Michael Ellerman wrote: > > Some of our phony targets are not marked as such. This can lead to > > confusing errors, eg: > > > > $ make clean > > $ touch install > > $ make install > > make: 'install' is up to date. > > $ > > > > Fix it by adding them to the PHONY variable which is marked phony in > > the top-level Makefile, or in scripts/Makefile.build for the boot > > Makefile. > > > > Suggested-by: Masahiro Yamada > > Signed-off-by: Michael Ellerman > > Applied to powerpc next. > > https://git.kernel.org/powerpc/c/d42c6d0f8d004c3661dde3c376ed637e9f292c22 > You do not have to double your Signed-off-by. -- Best Regards Masahiro Yamada
Re: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs
On Wed, 2020-03-04 at 17:53 +1100, Andrew Donnellan wrote: > On 21/2/20 2:27 pm, Alastair D'Silva wrote: > > +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem, > > + struct ioctl_ocxl_pmem_controller_dump_data __user > > *uarg) > > +{ > > + struct ioctl_ocxl_pmem_controller_dump_data args; > > + u16 i; > > + u64 val; > > + int rc; > > + > > + if (copy_from_user(, uarg, sizeof(args))) > > + return -EFAULT; > > + > > + if (args.buf_size % 8) > > + return -EINVAL; > > + > > + if (args.buf_size > ocxlpmem->admin_command.data_size) > > + return -EINVAL; > > + > > + mutex_lock(>admin_command.lock); > > + > > + rc = admin_command_request(ocxlpmem, > > ADMIN_COMMAND_CONTROLLER_DUMP); > > + if (rc) > > + goto out; > > + > > + val = ((u64)args.offset) << 32; > > + val |= args.buf_size; > > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, > > + ocxlpmem- > > >admin_command.request_offset + 0x08, > > + OCXL_LITTLE_ENDIAN, val); > > + if (rc) > > + goto out; > > + > > + rc = admin_command_execute(ocxlpmem); > > + if (rc) > > + goto out; > > + > > + rc = admin_command_complete_timeout(ocxlpmem, > > + ADMIN_COMMAND_CONTROLLER_DU > > MP); > > + if (rc < 0) { > > + dev_warn(>dev, "Controller dump timed > > out\n"); > > + goto out; > > + } > > + > > + rc = admin_response(ocxlpmem); > > + if (rc < 0) > > + goto out; > > + if (rc != STATUS_SUCCESS) { > > + warn_status(ocxlpmem, > > + "Unexpected status from retrieve error > > log", > > Controller dump > Ok > > + rc); > > + goto out; > > + } > > + > > + for (i = 0; i < args.buf_size; i += 8) { > > + u64 val; > > + > > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, > > +ocxlpmem- > > >admin_command.data_offset + i, > > +OCXL_HOST_ENDIAN, ); > > Is a controller dump something where we want to do endian swapping? > No, we just have raw binary data that we want to pass through. OCXL_HOST_ENDIAN does no swapping. > Any reason we're not doing the usual check of the data identifier, > additional data length etc? > I'll add that > > + if (rc) > > + goto out; > > + > > + if (copy_to_user([i], , sizeof(u64))) { > > + rc = -EFAULT; > > + goto out; > > + } > > + } > > + > > + if (copy_to_user(uarg, , sizeof(args))) { > > + rc = -EFAULT; > > + goto out; > > + } > > + > > + rc = admin_response_handled(ocxlpmem); > > + if (rc) > > + goto out; > > + > > +out: > > + mutex_unlock(>admin_command.lock); > > + return rc; > > +} > > + > > +int request_controller_dump(struct ocxlpmem *ocxlpmem) > > +{ > > + int rc; > > + u64 busy = 1; > > + > > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, > > GLOBAL_MMIO_CHIC, > > + OCXL_LITTLE_ENDIAN, > > + GLOBAL_MMIO_CHI_CDA); > > This return code is ignored > > > + > > + > > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, > > GLOBAL_MMIO_HCI, > > + OCXL_LITTLE_ENDIAN, > > + GLOBAL_MMIO_HCI_CONTROLLER_DUMP); > > + if (rc) > > + return rc; > > + > > + while (busy) { > > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, > > +GLOBAL_MMIO_HCI, > > +OCXL_LITTLE_ENDIAN, > > ); > > + if (rc) > > + return rc; > > + > > + busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP; > > + cond_resched(); > > + } > > + > > + return 0; > > +} > > -- Alastair D'Silva Open Source Developer Linux Technology Centre, IBM Australia mob: 0423 762 819
[PATCH] powerpc/vdso: Fix multiple issues with sys_call_table
The VDSO exports a bitmap of valid syscalls. vdso_setup_syscall_map() sets this up, but there are both little and big endian bugs. The issue is with: if (sys_call_table[i] != sys_ni_syscall) On little endian, instead of comparing pointers to the two functions, we compare the first two instructions of each function. If a function happens to have the same first two instructions as sys_ni_syscall, then we have a spurious match and mark the instruction as not implemented. Fix this by removing the inline declarations. On big endian we have a further issue where sys_ni_syscall is a function descriptor and sys_call_table[] holds pointers to the instruction text. Fix this by using dereference_kernel_function_descriptor(). Cc: sta...@vger.kernel.org Signed-off-by: Anton Blanchard --- diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index b9a108411c0d..d186b729026e 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #undef DEBUG @@ -644,19 +646,16 @@ static __init int vdso_setup(void) static void __init vdso_setup_syscall_map(void) { unsigned int i; - extern unsigned long *sys_call_table; -#ifdef CONFIG_PPC64 - extern unsigned long *compat_sys_call_table; -#endif - extern unsigned long sys_ni_syscall; + unsigned long ni_syscall; + ni_syscall = (unsigned long)dereference_kernel_function_descriptor(sys_ni_syscall); for (i = 0; i < NR_syscalls; i++) { #ifdef CONFIG_PPC64 - if (sys_call_table[i] != sys_ni_syscall) + if (sys_call_table[i] != ni_syscall) vdso_data->syscall_map_64[i >> 5] |= 0x8000UL >> (i & 0x1f); - if (compat_sys_call_table[i] != sys_ni_syscall) + if (compat_sys_call_table[i] != ni_syscall) vdso_data->syscall_map_32[i >> 5] |= 0x8000UL >> (i & 0x1f); #else /* CONFIG_PPC64 */
Re: [PATCH] Add OPAL_GET_SYMBOL / OPAL_LOOKUP_SYMBOL
On Fri, Feb 28, 2020 at 2:09 PM Nicholas Piggin wrote: > > These calls can be used by Linux to annotate BUG addresses with symbols, > look up symbol addresses in xmon, etc. > > This is preferable over having Linux parse the OPAL symbol map itself, > because OPAL's parsing code already exists for its own symbol printing, > and it can support other code regions than the skiboot symbols, e.g., > the wake-up code in the HOMER (where CPUs have been seen to get stuck). > > Signed-off-by: Nicholas Piggin > --- > core/opal.c | 2 + > core/utils.c| 92 +++-- > doc/opal-api/opal-get-symbol-181.rst| 42 +++ > doc/opal-api/opal-lookup-symbol-182.rst | 35 ++ > include/opal-api.h | 4 +- > 5 files changed, 168 insertions(+), 7 deletions(-) > create mode 100644 doc/opal-api/opal-get-symbol-181.rst > create mode 100644 doc/opal-api/opal-lookup-symbol-182.rst > > diff --git a/core/opal.c b/core/opal.c > index d6ff6027b..d9fc4fe05 100644 > --- a/core/opal.c > +++ b/core/opal.c > @@ -142,6 +142,8 @@ int64_t opal_entry_check(struct stack_frame *eframe) > case OPAL_CEC_REBOOT: > case OPAL_CEC_REBOOT2: > case OPAL_SIGNAL_SYSTEM_RESET: > + case OPAL_GET_SYMBOL: > + case OPAL_LOOKUP_SYMBOL: These names are still awful :| > break; > default: > printf("CPU ATTEMPT TO RE-ENTER FIRMWARE! PIR=%04lx > cpu @%p -> pir=%04x token=%llu\n", > diff --git a/core/utils.c b/core/utils.c > index 8fd63fcb7..5f0d5130b 100644 > --- a/core/utils.c > +++ b/core/utils.c > @@ -48,40 +48,120 @@ char __attrconst tohex(uint8_t nibble) > return __tohex[nibble]; > } > > -static unsigned long get_symbol(unsigned long addr, char **sym, char > **sym_end) > +static unsigned long get_symbol(unsigned long addr, char **sym, char > **sym_end, unsigned long *size) > { > unsigned long prev = 0, next; > char *psym = NULL, *p = __sym_map_start; > > *sym = *sym_end = NULL; > - while(p < __sym_map_end) { > + while (p < __sym_map_end) { > next = strtoul(p, , 16) | SKIBOOT_BASE; > if (next > addr && prev <= addr) { > - p = psym + 3;; > + p = psym + 3; > if (p >= __sym_map_end) > return 0; > *sym = p; > - while(p < __sym_map_end && *p != 10) > + while (p < __sym_map_end && *p != '\n') > p++; > *sym_end = p; > + *size = next - prev; > return prev; > } > prev = next; > psym = p; > - while(p < __sym_map_end && *p != 10) > + while (p < __sym_map_end && *p != '\n') > p++; > p++; > } > return 0; > } > > +static unsigned long lookup_symbol(const char *name, unsigned long *size) > +{ > + size_t len = strlen(name); > + unsigned long addr = 0; > + char *sym; > + char *p = __sym_map_start; > + > + while (p < __sym_map_end) { > + addr = strtoul(p, , 16) | SKIBOOT_BASE; > + p += 3; > + if (p >= __sym_map_end) > + return 0; > + > + if (*(p + len) == '\n' && !strncmp(name, p, len)) { > + char *sym_end; > + > + if (get_symbol(addr, , _end, size) == 0) { > + assert(!strcmp(name, "_end")); > + *size = 0; > + } > + > + /* > +* May be more than one symbol at this address but > +* symbol length calculation should still work in > +* that case. > +*/ > + > + return addr; > + } > + > + while(p < __sym_map_end && *p != '\n') > + p++; > + p++; > + } > + return 0; > +} > + > +static int64_t opal_get_symbol(uint64_t addr, __be64 *symaddr, __be64 > *symsize, char *namebuf, uint64_t buflen) > +{ > + unsigned long saddr; > + unsigned long ssize; > + char *sym, *sym_end; > + size_t l; > + > + saddr = get_symbol(addr, , _end, ); > + if (!saddr) > + return OPAL_RESOURCE; > + > + if (buflen > sym_end - sym) > + l = sym_end - sym; > + else > + l = buflen - 1; > + memcpy(namebuf, sym, l); > + namebuf[l] = '\0'; > + > + *symaddr = cpu_to_be64(saddr); > + *symsize = cpu_to_be64(ssize); > + > + return OPAL_SUCCESS;
Re: [PATCH] KVM: PPC: Book3S HV: Fix typos in comments
On Fri, Mar 06, 2020 at 11:26:36AM +1100, Gustavo Romero wrote: > Fix typos found in comments about the parameter passed > through r5 to kvmppc_{save,restore}_tm_hv functions. Actually "iff" is a common shorthand in some fields and not necessarily a spelling error: https://en.wikipedia.org/wiki/If_and_only_if Gabriel > > Signed-off-by: Gustavo Romero > --- > arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > index dbc2fec..a55dbe8 100644 > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > @@ -3121,7 +3121,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) > * Save transactional state and TM-related registers. > * Called with r3 pointing to the vcpu struct and r4 containing > * the guest MSR value. > - * r5 is non-zero iff non-volatile register state needs to be maintained. > + * r5 is non-zero if non-volatile register state needs to be maintained. > * If r5 == 0, this can modify all checkpointed registers, but > * restores r1 and r2 before exit. > */ > @@ -3194,7 +3194,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) > * Restore transactional state and TM-related registers. > * Called with r3 pointing to the vcpu struct > * and r4 containing the guest MSR value. > - * r5 is non-zero iff non-volatile register state needs to be maintained. > + * r5 is non-zero if non-volatile register state needs to be maintained. > * This potentially modifies all checkpointed registers. > * It restores r1 and r2 from the PACA. > */ > -- > 1.8.3.1 >
Re: [PATCH] KVM: PPC: Book3S HV: Fix typos in comments
Hi Gabriel, On 03/06/2020 01:06 PM, Gabriel Paubert wrote: On Fri, Mar 06, 2020 at 11:26:36AM +1100, Gustavo Romero wrote: Fix typos found in comments about the parameter passed through r5 to kvmppc_{save,restore}_tm_hv functions. Actually "iff" is a common shorthand in some fields and not necessarily a spelling error: https://en.wikipedia.org/wiki/If_and_only_if I see. Thank you. Best regards, Gustavo
Re: [PATCH] crypto: Replace zero-length array with flexible-array member
On Mon, Feb 24, 2020 at 10:21:00AM -0600, Gustavo A. R. Silva wrote: > The current codebase makes use of the zero-length array language > extension to the C90 standard, but the preferred mechanism to declare > variable-length types such as these ones is a flexible array member[1][2], > introduced in C99: > > struct foo { > int stuff; > struct boo array[]; > }; > > By making use of the mechanism above, we will get a compiler warning > in case the flexible array does not occur last in the structure, which > will help us prevent some kind of undefined behavior bugs from being > inadvertently introduced[3] to the codebase from now on. > > Also, notice that, dynamic memory allocations won't be affected by > this change: > > "Flexible array members have incomplete type, and so the sizeof operator > may not be applied. As a quirk of the original implementation of > zero-length arrays, sizeof evaluates to zero."[1] > > This issue was found with the help of Coccinelle. > > [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html > [2] https://github.com/KSPP/linux/issues/21 > [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") > > Signed-off-by: Gustavo A. R. Silva > --- > drivers/crypto/caam/caamalg.c | 2 +- > drivers/crypto/caam/caamalg_qi.c | 4 ++-- > drivers/crypto/caam/caamalg_qi2.h | 6 +++--- > drivers/crypto/caam/caamhash.c | 2 +- > drivers/crypto/cavium/nitrox/nitrox_main.c | 2 +- > drivers/crypto/chelsio/chcr_core.h | 2 +- > drivers/crypto/mediatek/mtk-sha.c | 2 +- > drivers/crypto/nx/nx.h | 2 +- > drivers/crypto/omap-sham.c | 4 ++-- > include/crypto/if_alg.h| 2 +- > 10 files changed, 14 insertions(+), 14 deletions(-) Patch applied. Thanks. -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
[PATCH] KVM: PPC: Book3S HV: Fix typos in comments
Fix typos found in comments about the parameter passed through r5 to kvmppc_{save,restore}_tm_hv functions. Signed-off-by: Gustavo Romero --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index dbc2fec..a55dbe8 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -3121,7 +3121,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) * Save transactional state and TM-related registers. * Called with r3 pointing to the vcpu struct and r4 containing * the guest MSR value. - * r5 is non-zero iff non-volatile register state needs to be maintained. + * r5 is non-zero if non-volatile register state needs to be maintained. * If r5 == 0, this can modify all checkpointed registers, but * restores r1 and r2 before exit. */ @@ -3194,7 +3194,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) * Restore transactional state and TM-related registers. * Called with r3 pointing to the vcpu struct * and r4 containing the guest MSR value. - * r5 is non-zero iff non-volatile register state needs to be maintained. + * r5 is non-zero if non-volatile register state needs to be maintained. * This potentially modifies all checkpointed registers. * It restores r1 and r2 from the PACA. */ -- 1.8.3.1
Re: [PATCH v3 1/5] powerpc: Rename current_stack_pointer() to current_stack_frame()
On Thu, 2020-02-20 at 11:51:37 UTC, Michael Ellerman wrote: > current_stack_pointer(), which was called __get_SP(), used to just > return the value in r1. > > But that caused problems in some cases, so it was turned into a > function in commit bfe9a2cfe91a ("powerpc: Reimplement __get_SP() as a > function not a define"). > > Because it's a function in a separate compilation unit to all its > callers, it has the effect of causing a stack frame to be created, and > then returns the address of that frame. This is good in some cases > like those described in the above commit, but in other cases it's > overkill, we just need to know what stack page we're on. > > On some other arches current_stack_pointer is just a register global > giving the stack pointer, and we'd like to do that too. So rename our > current_stack_pointer() to current_stack_frame() to make that > possible. > > Signed-off-by: Michael Ellerman Series applied to powerpc next. https://git.kernel.org/powerpc/c/3d13e839e801e081bdece0127c2affa33d0f77cf cheers
Re: [PATCH v2] powerpc/Makefile: Mark phony targets as PHONY
On Wed, 2020-02-19 at 00:04:34 UTC, Michael Ellerman wrote: > Some of our phony targets are not marked as such. This can lead to > confusing errors, eg: > > $ make clean > $ touch install > $ make install > make: 'install' is up to date. > $ > > Fix it by adding them to the PHONY variable which is marked phony in > the top-level Makefile, or in scripts/Makefile.build for the boot > Makefile. > > Suggested-by: Masahiro Yamada > Signed-off-by: Michael Ellerman Applied to powerpc next. https://git.kernel.org/powerpc/c/d42c6d0f8d004c3661dde3c376ed637e9f292c22 cheers
Re: [PATCH] powerpc/mm: Don't kmap_atomic() in pte_offset_map() on PPC32
On Mon, 2020-02-17 at 09:41:35 UTC, Christophe Leroy wrote: > On PPC32, pte_offset_map() does a kmap_atomic() in order to support > page tables allocated in high memory, just like ARM and x86/32. > > But since at least 2008 and commit 8054a3428fbe ("powerpc: Remove dead > CONFIG_HIGHPTE"), page tables are never allocated in high memory. > > When the page is in low mem, kmap_atomic() just returns the page > address but still disable preemption and pagefault. And it is > not an inlined function, so we suffer function call for no reason. > > Make pte_offset_map() the same as pte_offset_kernel() and make > pte_unmap() void, in the same way as PPC64 which doesn't have HIGHMEM. > > Signed-off-by: Christophe Leroy Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/6453f9ed9d4e4b4cdf201bf34bf460c436bf50ea cheers
Re: [PATCH 1/2] powerpc/kernel/sysfs: Refactor current sysfs.c
On Fri, 2020-02-14 at 08:06:05 UTC, Kajol Jain wrote: > From: Madhavan Srinivasan > > An attempt to refactor the current sysfs.c file. > To start with a big chuck of macro #defines and dscr > functions are moved to start of the file. Secondly, > HAS_ #define macros are cleanup based on CONFIG_ options > > Finally new HAS_ macro added: > 1. HAS_PPC_PA6T (for PA6T) to separate out non-PMU SPRs. > 2. HAS_PPC_PMC56 to separate out PMC SPR's from HAS_PPC_PMC_CLASSIC >which come under CONFIG_PPC64. > > Signed-off-by: Madhavan Srinivasan Series applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/fcdb524d440d6326c286006e16f252b40ba4fd6a cheers
Re: [PATCH 1/2] powerpc/powernv: Treat an empty reboot string as default
On Mon, 2020-02-17 at 02:48:32 UTC, Oliver O'Halloran wrote: > Treat an empty reboot cmd string the same as a NULL string. This squashes a > spurious unsupported reboot message that sometimes gets out when using > xmon. > > Signed-off-by: Oliver O'Halloran Series applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/16985f2d25095899685952296f128a71f0aff05c cheers
Re: [PATCH 1/6] powerpc: kernel: no need to check return value of debugfs_create functions
On Sun, 2020-02-09 at 10:58:56 UTC, Greg Kroah-Hartman wrote: > When calling debugfs functions, there is no need to ever check the > return value. The function can work or not, but the code logic should > never do something different based on this. > > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: Hari Bathini > Cc: linuxppc-dev@lists.ozlabs.org > Signed-off-by: Greg Kroah-Hartman Series applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/860286cf33963fa8a0fe542995bdec2df5cb3abb cheers
Re: [PATCH 1/2] powerpc/83xx: Fix some typo in some warning message
On Sat, 2020-02-08 at 14:09:04 UTC, Christophe JAILLET wrote: > "couldn;t" should be "couldn't". > > Signed-off-by: Christophe JAILLET Series applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/365ad0b60d944050d61252e123e6a8b2c3950398 cheers
Re: [PATCH] powerpc/32s: Slenderize _tlbia() for powerpc 603/603e
On Mon, 2020-02-03 at 16:47:37 UTC, Christophe Leroy wrote: > _tlbia() is a function used only on 603/603e core, ie on CPUs which > don't have a hash table. > > _tlbia() uses the tlbia macro which implements a loop of 1024 tlbie. > > On the 603/603e core, flushing the entire TLB requires no more than > 32 tlbie. > > Replace tlbia by a loop of 32 tlbie. > > Signed-off-by: Christophe Leroy Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/e1347a020b81fe47c80cd277bfaa61295a9482a4 cheers
Re: [PATCH v2] powerpc/32s: Don't flush all TLBs when flushing one page
On Sat, 2020-02-01 at 08:04:31 UTC, Christophe Leroy wrote: > When flushing any memory range, the flushing function > flushes all TLBs. > > When (start) and (end - 1) are in the same memory page, > flush that page instead. > > Signed-off-by: Christophe Leroy Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/030e347430957f6f7f29db9099368f8b86c0bf76 cheers
Re: [PATCH v2] powerpc: drmem: avoid NULL pointer dereference when drmem is unavailable
On Fri, 2020-01-31 at 13:28:29 UTC, Michal Suchanek wrote: > > From: Libor Pechacek > > In guests without hotplugagble memory drmem structure is only zero > initialized. Trying to manipulate DLPAR parameters results in a crash. > > $ echo "memory add count 1" > /sys/kernel/dlpar > Oops: Kernel access of bad area, sig: 11 [#1] > LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries > Modules linked in: af_packet(E) rfkill(E) nvram(E) vmx_crypto(E) > gf128mul(E) e1000(E) virtio_balloon(E) rtc_generic(E) crct10dif_vpmsum(E) > btrfs(E) blake2b_generic(E) libcrc32c(E) xor(E) raid6_pq(E) virtio_rng(E) > virtio_blk(E) ohci_pci(E) ehci_pci(E) ohci_hcd(E) ehci_hcd(E) > crc32c_vpmsum(E) usbcore(E) virtio_pci(E) virtio_ring(E) virtio(E) sg(E) > dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) > scsi_mod(E) > CPU: 1 PID: 4114 Comm: bash Kdump: loaded Tainted: GE > 5.5.0-rc6-2-default #1 > NIP: c00ff294 LR: c00ff248 CTR: > REGS: c000fb9d3880 TRAP: 0300 Tainted: GE > (5.5.0-rc6-2-default) > MSR: 80009033 CR: 28242428 XER: 2000 > CFAR: c09a6c10 DAR: 0010 DSISR: 4000 IRQMASK: 0 > GPR00: c00ff248 c000fb9d3b10 c1682e00 0033 > GPR04: c000ff30bf90 c000ff394800 5110 ffe8 > GPR08: fe1c > GPR12: 2200 c0003fffee00 00011cbc37c0 > GPR16: 00011cb27ed0 00011cb6dd10 > GPR20: 00011cb7db28 01003ce035f0 00011cbc7828 00011cbc6c70 > GPR24: 01003cf01210 c000ffade4e0 c2d7216b > GPR28: 0001 c2d78560 c15458d0 > NIP [c00ff294] dlpar_memory+0x6e4/0xd00 > LR [c00ff248] dlpar_memory+0x698/0xd00 > Call Trace: > [c000fb9d3b10] [c00ff248] dlpar_memory+0x698/0xd00 (unreliable) > [c000fb9d3ba0] [c00f5990] handle_dlpar_errorlog+0xc0/0x190 > [c000fb9d3c10] [c00f5c58] dlpar_store+0x198/0x4a0 > [c000fb9d3cd0] [c0c4cb00] kobj_attr_store+0x30/0x50 > [c000fb9d3cf0] [c05a37b4] sysfs_kf_write+0x64/0x90 > [c000fb9d3d10] [c05a2c90] kernfs_fop_write+0x1b0/0x290 > [c000fb9d3d60] [c04a2bec] __vfs_write+0x3c/0x70 > [c000fb9d3d80] [c04a6560] vfs_write+0xd0/0x260 > [c000fb9d3dd0] [c04a69ac] ksys_write+0xdc/0x130 > [c000fb9d3e20] [c000b478] system_call+0x5c/0x68 > Instruction dump: > ebc9 1ce70018 38e7ffe8 7cfe3a14 7fbe3840 419dff14 fb610068 7fc9f378 > 3900 480c 6000 4195fef4 <81490010> 39290018 38c80001 7ea93840 > ---[ end trace cc2dd8152608c295 ]--- > > Taking closer look at the code, I can see that for_each_drmem_lmb is a > macro expanding into `for (lmb = _info->lmbs[0]; lmb <= > _info->lmbs[drmem_info->n_lmbs - 1]; lmb++)`. When drmem_info->lmbs > is NULL, the loop would iterate through the whole address range if it > weren't stopped by the NULL pointer dereference on the next line. > > This patch aligns for_each_drmem_lmb and for_each_drmem_lmb_in_range macro > behavior with the common C semantics, where the end marker does not belong > to the scanned range, and alters get_lmb_range() semantics. As a side > effect, the wraparound observed in the crash is prevented. > > Fixes: 6c6ea53725b3 ("powerpc/mm: Separate ibm, dynamic-memory data from DT > format") > Cc: Michal Suchanek > Cc: sta...@vger.kernel.org > Signed-off-by: Libor Pechacek > Signed-off-by: Michal Suchanek Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/a83836dbc53e96f13fec248ecc201d18e1e3111d cheers
Re: [PATCH v3 1/2] powerpc/32: Warn and return ENOSYS on syscalls from kernel
On Fri, 2020-01-31 at 11:34:54 UTC, Christophe Leroy wrote: > Since commit b86fb88855ea ("powerpc/32: implement fast entry for > syscalls on non BOOKE") and commit 1a4b739bbb4f ("powerpc/32: > implement fast entry for syscalls on BOOKE"), syscalls from > kernel are unexpected and can have catastrophic consequences > as it will destroy the kernel stack. > > Test MSR_PR on syscall entry. In case syscall is from kernel, > emit a warning and return ENOSYS error. > > Signed-off-by: Christophe Leroy Series applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/9e27086292aa880921a0f2b8501e5189d5efcf03 cheers
Re: [PATCH 1/2] pseries/vio: Remove stray #ifdef CONFIG_PPC_PSERIES
On Thu, 2020-01-30 at 06:31:52 UTC, Oliver O'Halloran wrote: > vio.c requires CONFIG_IBMVIO which in turn depends on PPC_PSERIES. > In other words, this ifdef is pointless. At a guess it's a carry-over > from pre-history. > > Signed-off-by: Oliver O'Halloran Series applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/f98df5ed0a670f2c4c1a50d7901acbb862a247c7 cheers
Re: [PATCH] powerpc/papr_scm: Mark papr_scm_ndctl() as static
On Thu, 2020-01-30 at 04:02:06 UTC, Vaibhav Jain wrote: > Function papr_scm_ndctl() is neither exported from the module nor > called directly from outside 'papr.c' hence should be marked 'static'. > > Signed-off-by: Vaibhav Jain Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/72c4ebbac476b8375e69fd09390e6b64c2891716 cheers
Re: [PATCH] powerpc/process: Remove unneccessary #ifdef CONFIG_PPC64 in copy_thread_tls()
On Wed, 2020-01-29 at 19:50:07 UTC, Christophe Leroy wrote: > is_32bit_task() exists on both PPC64 and PPC32, no need of an ifdefery. > > Signed-off-by: Christophe Leroy Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/ba32f4b02105e57627912b42e141d65d90074c64 cheers
Re: [PATCH v6 1/5] powerpc/vphn: Check for error from hcall_vphn
On Wed, 2020-01-29 at 13:52:57 UTC, Srikar Dronamraju wrote: > There is no value in unpacking associativity, if > H_HOME_NODE_ASSOCIATIVITY hcall has returned an error. > > Signed-off-by: Srikar Dronamraju > Cc: Michael Ellerman > Cc: Nicholas Piggin > Cc: Nathan Lynch > Cc: linuxppc-dev@lists.ozlabs.org > Cc: Abdul Haleem > Cc: Satheesh Rajendran > Reported-by: Abdul Haleem > Reviewed-by: Nathan Lynch Series applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/e7214ae9d85aa325c9f3cb34bf4fad7f112861d7 cheers
Re: [PATCH v4] powerpc/smp: Use nid as fallback for package_id
On Wed, 2020-01-29 at 13:51:21 UTC, Srikar Dronamraju wrote: > Package_id is to find out all cores that are part of the same chip. On > PowerNV machines, package_id defaults to chip_id. However ibm,chip_id > property is not present in device-tree of PowerVM Lpars. Hence lscpu > output shows one core per socket and multiple cores. > > To overcome this, use nid as the package_id on PowerVM Lpars. > > Before the patch. > --- > Architecture:ppc64le > Byte Order: Little Endian > CPU(s): 128 > On-line CPU(s) list: 0-127 > Thread(s) per core: 8 > Core(s) per socket: 1 <-- > Socket(s): 16 <-- > NUMA node(s):2 > Model: 2.2 (pvr 004e 0202) > Model name: POWER9 (architected), altivec supported > Hypervisor vendor: pHyp > Virtualization type: para > L1d cache: 32K > L1i cache: 32K > L2 cache:512K > L3 cache:10240K > NUMA node0 CPU(s): 0-63 > NUMA node1 CPU(s): 64-127 > # > # cat /sys/devices/system/cpu/cpu0/topology/physical_package_id > -1 > # > > After the patch > --- > Architecture:ppc64le > Byte Order: Little Endian > CPU(s): 128 > On-line CPU(s) list: 0-127 > Thread(s) per core: 8<-- > Core(s) per socket: 8<-- > Socket(s): 2 > NUMA node(s):2 > Model: 2.2 (pvr 004e 0202) > Model name: POWER9 (architected), altivec supported > Hypervisor vendor: pHyp > Virtualization type: para > L1d cache: 32K > L1i cache: 32K > L2 cache:512K > L3 cache:10240K > NUMA node0 CPU(s): 0-63 > NUMA node1 CPU(s): 64-127 > # > # cat /sys/devices/system/cpu/cpu0/topology/physical_package_id > 0 > # > Now lscpu output is more in line with the system configuration. > > Signed-off-by: Srikar Dronamraju > Cc: linuxppc-dev@lists.ozlabs.org > Cc: Michael Ellerman > Cc: Vasant Hegde > Cc: Vaidyanathan Srinivasan Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/a05f0e5be4e81e4977d3f92aaf7688ee0cb7d5db cheers
Re: [PATCH v3 1/2] powerpc/32: refactor pmd_offset(pud_offset(pgd_offset...
On Thu, 2020-01-09 at 08:25:25 UTC, Christophe Leroy wrote: > At several places pmd pointer is retrieved through the same action: > > pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr); > > or > > pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr); > > Refactor this by implementing two helpers pmd_ptr() and pmd_ptr_k() > > This will help when adding the p4d level. > > Signed-off-by: Christophe Leroy Series applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/0b1c524caaae2428b20e714297243e5551251eb5 cheers
Re: [PATCH] powerpc/32: don't restore r0, r6-r8 on exception entry path after trace_hardirqs_off()
On Tue, 2020-01-07 at 09:16:40 UTC, Christophe Leroy wrote: > Since commit b86fb88855ea ("powerpc/32: implement fast entry for > syscalls on non BOOKE") and commit 1a4b739bbb4f ("powerpc/32: > implement fast entry for syscalls on BOOKE"), syscalls don't > use the exception entry path anymore. It is therefore pointless > to restore r0 and r6-r8 after calling trace_hardirqs_off(). > > In the meantime, drop the '2:' label which is unused and misleading. > > Signed-off-by: Christophe Leroy Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/05642cf7289c5562e5939d2ee8a0529d310010b8 cheers
Re: [PATCH kernel v3] powerpc/book3s64: Fix error handling in mm_iommu_do_alloc()
On Mon, 2019-12-23 at 06:03:51 UTC, Alexey Kardashevskiy wrote: > The last jump to free_exit in mm_iommu_do_alloc() happens after page > pointers in struct mm_iommu_table_group_mem_t were already converted to > physical addresses. Thus calling put_page() on these physical addresses > will likely crash. > > This moves the loop which calculates the pageshift and converts page > struct pointers to physical addresses later after the point when > we cannot fail; thus eliminating the need to convert pointers back. > > Fixes: eb9d7a62c386 ("powerpc/mm_iommu: Fix potential deadlock") > Reported-by: Jan Kara > Signed-off-by: Alexey Kardashevskiy Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/c4b78169e3667413184c9a20e11b5832288a109f cheers
Re: [PATCH v6 1/6] Documentation/ABI: add ABI documentation for /sys/kernel/fadump_*
On Wed, 2019-12-11 at 16:09:05 UTC, Sourabh Jain wrote: > Add missing ABI documentation for existing FADump sysfs files. > > Signed-off-by: Sourabh Jain Series applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/f52153ab383f04a45c38d8a7f55a4249477b20df cheers
Re: [RFC PATCH v2 1/1] powerpc/kernel: Enables memory hot-remove after reboot on pseries guests
On Thu, 2020-03-05 at 20:32 -0300, Leonardo Bras wrote: > I will send the matching qemu change as a reply later. http://patchwork.ozlabs.org/patch/1249931/ signature.asc Description: This is a digitally signed message part
Re: [PATCH v2] powerpc/mm: Fix missing KUAP disable in flush_coherent_icache()
On Tue, 2020-03-03 at 23:57:08 UTC, Michael Ellerman wrote: > We received a report of strange kernel faults which turned out to be > due to a missing KUAP disable in flush_coherent_icache() called > from flush_icache_range(). > > The fault looks like: > > Kernel attempted to access user page (7fffc30d9c00) - exploit attempt? > (uid: 1009) > BUG: Unable to handle kernel data access on read at 0x7fffc30d9c00 > Faulting instruction address: 0xc007232c > Oops: Kernel access of bad area, sig: 11 [#1] > LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV > CPU: 35 PID: 5886 Comm: sigtramp Not tainted > 5.6.0-rc2-gcc-8.2.0-3-gfc37a1632d40 #79 > NIP: c007232c LR: c003b7fc CTR: > REGS: c01e11093940 TRAP: 0300 Not tainted > (5.6.0-rc2-gcc-8.2.0-3-gfc37a1632d40) > MSR: 9280b033 CR: 28000884 > XER: > CFAR: c00722fc DAR: 7fffc30d9c00 DSISR: 0800 IRQMASK: 0 > GPR00: c003b7fc c01e11093bd0 c23ac200 7fffc30d9c00 > GPR04: 7fffc30d9c18 c01e11093bd4 > GPR08: 0001 c01e1104ed80 > GPR12: c01fff6ab380 c16be2d0 4000 > GPR16: c000 bfff > GPR20: 7fffc30d9c00 7fffc30d8f58 7fffc30d9c18 7fffc30d9c20 > GPR24: 7fffc30d9c18 c01e11093d90 c01e1104ed80 > GPR28: c01e11093e90 c23d9d18 7fffc30d9c00 > NIP flush_icache_range+0x5c/0x80 > LR handle_rt_signal64+0x95c/0xc2c > Call Trace: > 0xc01e11093d90 (unreliable) > handle_rt_signal64+0x93c/0xc2c > do_notify_resume+0x310/0x430 > ret_from_except_lite+0x70/0x74 > Instruction dump: > 409e002c 7c0802a6 3c62ff31 3863f6a0 f8010080 48195fed 6000 48fe4c8d > 6000 e8010080 7c0803a6 7c0004ac <7c00ffac> 7c0004ac 4c00012c 38210070 > > This path through handle_rt_signal64() to setup_trampoline() and > flush_icache_range() is only triggered by 64-bit processes that have > unmapped their VDSO, which is rare. > > flush_icache_range() takes a range of addresses to flush. In > flush_coherent_icache() we implement an optimisation for CPUs where we > know we don't actually have to flush the whole range, we just need to > do a single icbi. > > However we still execute the icbi on the user address of the start of > the range we're flushing. On CPUs that also implement KUAP (Power9) > that leads to the spurious fault above. > > We should be able to pass any address, including a kernel address, to > the icbi on these CPUs, which would avoid any interaction with KUAP. > But I don't want to make that change in a bug fix, just in case it > surfaces some strange behaviour on some CPU. > > So for now just disable KUAP around the icbi. Note the icbi is treated > as a load, so we allow read access, not write as you'd expect. > > Fixes: 890274c2dc4c ("powerpc/64s: Implement KUAP for Radix MMU") > Cc: sta...@vger.kernel.org # v5.2+ > Signed-off-by: Michael Ellerman Applied to powerpc fixes. https://git.kernel.org/powerpc/c/59bee45b9712c759ea4d3dcc4eff1752f3a66558 cheers
Re: [PATCH, v2] powerpc: fix hardware PMU exception bug on PowerVM compatibility mode systems
On Thu, 2020-02-27 at 13:47:15 UTC, "Desnes A. Nunes do Rosario" wrote: > PowerVM systems running compatibility mode on a few Power8 revisions are > still vulnerable to the hardware defect that loses PMU exceptions arriving > prior to a context switch. > > The software fix for this issue is enabled through the CPU_FTR_PMAO_BUG > cpu_feature bit, nevertheless this bit also needs to be set for PowerVM > compatibility mode systems. > > Fixes: 68f2f0d431d9ea4 ("powerpc: Add a cpu feature CPU_FTR_PMAO_BUG") > Signed-off-by: Desnes A. Nunes do Rosario Applied to powerpc fixes, thanks. https://git.kernel.org/powerpc/c/fc37a1632d40c80c067eb1bc235139f5867a2667 cheers
[Bug 206695] kmemleak reports leaks in drivers/macintosh/windfarm
https://bugzilla.kernel.org/show_bug.cgi?id=206695 --- Comment #5 from m...@ellerman.id.au --- bugzilla-dae...@bugzilla.kernel.org writes: > https://bugzilla.kernel.org/show_bug.cgi?id=206695 > > --- Comment #4 from Erhard F. (erhar...@mailbox.org) --- > (In reply to mpe from comment #3) >> Can you try this patch? > > Applied your patch on top of 5.6-rc4 + > https://patchwork.ozlabs.org/patch/1248350/ and let the G5 do a few hours > compiling. > > Only getting those nice memleaks from bug #206203 but no windfarm_pm112 > memleak > any longer. So your patch works well it seems. Thanks! Thanks. Can you try this one instead, it changes the order of operations to make the code flow a bit nicer. cheers diff --git a/drivers/macintosh/windfarm_pm112.c b/drivers/macintosh/windfarm_pm112.c index 4150301a89a5..e8377ce0a95a 100644 --- a/drivers/macintosh/windfarm_pm112.c +++ b/drivers/macintosh/windfarm_pm112.c @@ -132,14 +132,6 @@ static int create_cpu_loop(int cpu) s32 tmax; int fmin; - /* Get PID params from the appropriate SAT */ - hdr = smu_sat_get_sdb_partition(chip, 0xC8 + core, NULL); - if (hdr == NULL) { - printk(KERN_WARNING"windfarm: can't get CPU PID fan config\n"); - return -EINVAL; - } - piddata = (struct smu_sdbp_cpupiddata *)[1]; - /* Get FVT params to get Tmax; if not found, assume default */ hdr = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL); if (hdr) { @@ -152,6 +144,16 @@ static int create_cpu_loop(int cpu) if (tmax < cpu_all_tmax) cpu_all_tmax = tmax; + kfree(hdr); + + /* Get PID params from the appropriate SAT */ + hdr = smu_sat_get_sdb_partition(chip, 0xC8 + core, NULL); + if (hdr == NULL) { + printk(KERN_WARNING"windfarm: can't get CPU PID fan config\n"); + return -EINVAL; + } + piddata = (struct smu_sdbp_cpupiddata *)[1]; + /* * Darwin has a minimum fan speed of 1000 rpm for the 4-way and * 515 for the 2-way. That appears to be overkill, so for now, @@ -174,6 +176,9 @@ static int create_cpu_loop(int cpu) pid.min = fmin; wf_cpu_pid_init(_pid[cpu], ); + + kfree(hdr); + return 0; } -- You are receiving this mail because: You are watching the assignee of the bug.
Re: [Bug 206695] kmemleak reports leaks in drivers/macintosh/windfarm
bugzilla-dae...@bugzilla.kernel.org writes: > https://bugzilla.kernel.org/show_bug.cgi?id=206695 > > --- Comment #4 from Erhard F. (erhar...@mailbox.org) --- > (In reply to mpe from comment #3) >> Can you try this patch? > > Applied your patch on top of 5.6-rc4 + > https://patchwork.ozlabs.org/patch/1248350/ and let the G5 do a few hours > compiling. > > Only getting those nice memleaks from bug #206203 but no windfarm_pm112 > memleak > any longer. So your patch works well it seems. Thanks! Thanks. Can you try this one instead, it changes the order of operations to make the code flow a bit nicer. cheers diff --git a/drivers/macintosh/windfarm_pm112.c b/drivers/macintosh/windfarm_pm112.c index 4150301a89a5..e8377ce0a95a 100644 --- a/drivers/macintosh/windfarm_pm112.c +++ b/drivers/macintosh/windfarm_pm112.c @@ -132,14 +132,6 @@ static int create_cpu_loop(int cpu) s32 tmax; int fmin; - /* Get PID params from the appropriate SAT */ - hdr = smu_sat_get_sdb_partition(chip, 0xC8 + core, NULL); - if (hdr == NULL) { - printk(KERN_WARNING"windfarm: can't get CPU PID fan config\n"); - return -EINVAL; - } - piddata = (struct smu_sdbp_cpupiddata *)[1]; - /* Get FVT params to get Tmax; if not found, assume default */ hdr = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL); if (hdr) { @@ -152,6 +144,16 @@ static int create_cpu_loop(int cpu) if (tmax < cpu_all_tmax) cpu_all_tmax = tmax; + kfree(hdr); + + /* Get PID params from the appropriate SAT */ + hdr = smu_sat_get_sdb_partition(chip, 0xC8 + core, NULL); + if (hdr == NULL) { + printk(KERN_WARNING"windfarm: can't get CPU PID fan config\n"); + return -EINVAL; + } + piddata = (struct smu_sdbp_cpupiddata *)[1]; + /* * Darwin has a minimum fan speed of 1000 rpm for the 4-way and * 515 for the 2-way. That appears to be overkill, so for now, @@ -174,6 +176,9 @@ static int create_cpu_loop(int cpu) pid.min = fmin; wf_cpu_pid_init(_pid[cpu], ); + + kfree(hdr); + return 0; }
[PATCH V15] mm/debug: Add tests validating architecture page table helpers
This adds tests which will validate architecture page table helpers and other accessors in their compliance with expected generic MM semantics. This will help various architectures in validating changes to existing page table helpers or addition of new ones. This test covers basic page table entry transformations including but not limited to old, young, dirty, clean, write, write protect etc at various level along with populating intermediate entries with next page table page and validating them. Test page table pages are allocated from system memory with required size and alignments. The mapped pfns at page table levels are derived from a real pfn representing a valid kernel text symbol. This test gets called inside kernel_init() right after async_synchronize_full(). This test gets built and run when CONFIG_DEBUG_VM_PGTABLE is selected. Any architecture, which is willing to subscribe this test will need to select ARCH_HAS_DEBUG_VM_PGTABLE. For now this is limited to arc, arm64, x86, s390 and ppc32 platforms where the test is known to build and run successfully. Going forward, other architectures too can subscribe the test after fixing any build or runtime problems with their page table helpers. Meanwhile for better platform coverage, the test can also be enabled with CONFIG_EXPERT even without ARCH_HAS_DEBUG_VM_PGTABLE. Folks interested in making sure that a given platform's page table helpers conform to expected generic MM semantics should enable the above config which will just trigger this test during boot. Any non conformity here will be reported as an warning which would need to be fixed. This test will help catch any changes to the agreed upon semantics expected from generic MM and enable platforms to accommodate it thereafter. Cc: Andrew Morton Cc: Mike Rapoport Cc: Vineet Gupta Cc: Catalin Marinas Cc: Will Deacon Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Kirill A. Shutemov Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: linux-snps-...@lists.infradead.org Cc: linux-arm-ker...@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s...@vger.kernel.org Cc: linux-ri...@lists.infradead.org Cc: x...@kernel.org Cc: linux-a...@vger.kernel.org Cc: linux-ker...@vger.kernel.org Suggested-by: Catalin Marinas Reviewed-by: Ingo Molnar Tested-by: Gerald Schaefer # s390 Tested-by: Christophe Leroy# ppc32 Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Signed-off-by: Christophe Leroy Signed-off-by: Anshuman Khandual --- This adds a test validation for architecture exported page table helpers. Patch adds basic transformation tests at various levels of the page table. This test was originally suggested by Catalin during arm64 THP migration RFC discussion earlier. Going forward it can include more specific tests with respect to various generic MM functions like THP, HugeTLB etc and platform specific tests. https://lore.kernel.org/linux-mm/20190628102003.ga56...@arrakis.emea.arm.com/ Needs to be applied on linux V5.6-rc4 Changes in V15: - Replaced __pa() with __pa_symbol() (https://patchwork.kernel.org/patch/11407715/) - Replaced pte_alloc_map() with pte_alloc_map_lock() per Qian - Replaced pte_unmap() with pte_unmap_unlock() per Qian - Added address to pte_clear_tests() and passed it down till pte_clear() per Qian Changes in V14: (https://patchwork.kernel.org/project/linux-mm/list/?series=242305) - Disabled DEBUG_VM_PGTABLE for IA64 and ARM (32 Bit) per Andrew and Christophe - Updated DEBUG_VM_PGTABLE documentation wrt EXPERT and disabled platforms - Updated RANDOM_[OR|NZ]VALUE open encodings with GENMASK() per Catalin - Updated s390 constraint bits from 12 to 4 (S390_MASK_BITS) per Gerald - Updated in-code documentation for RANDOM_ORVALUE per Gerald - Updated pxx_basic_tests() to use invert functions first per Catalin - Dropped ARCH_HAS_4LEVEL_HACK check from pud_basic_tests() - Replaced __ARCH_HAS_[4|5]LEVEL_HACK with __PAGETABLE_[PUD|P4D]_FOLDED per Catalin - Trimmed the CC list on the commit message per Catalin Changes in V13: (https://patchwork.kernel.org/project/linux-mm/list/?series=237125) - Subscribed s390 platform and updated debug-vm-pgtable/arch-support.txt per Gerald - Dropped keyword 'extern' from debug_vm_pgtable() declaration per Christophe - Moved debug_vm_pgtable() declarations to per Christophe - Moved debug_vm_pgtable() call site into kernel_init() per Christophe - Changed CONFIG_DEBUG_VM_PGTABLE rules per Christophe - Updated commit to include new supported platforms and changed config selection Changes in V12: (https://patchwork.kernel.org/project/linux-mm/list/?series=233905) - Replaced __mmdrop() with mmdrop() - Enable ARCH_HAS_DEBUG_VM_PGTABLE on X86 for non CONFIG_X86_PAE platforms as the test procedure interfere with pre-allocated PMDs attached to the PGD
Re: [PATCH v3 18/27] powerpc/powernv/pmem: Add controller dump IOCTLs
On Tue, 2020-03-03 at 19:04 +0100, Frederic Barrat wrote: > > Le 21/02/2020 à 04:27, Alastair D'Silva a écrit : > > From: Alastair D'Silva > > > > This patch adds IOCTLs to allow userspace to request & fetch dumps > > of the internal controller state. > > > > This is useful during debugging or when a fatal error on the > > controller > > has occurred. > > > > Signed-off-by: Alastair D'Silva > > --- > > arch/powerpc/platforms/powernv/pmem/ocxl.c | 132 > > + > > include/uapi/nvdimm/ocxl-pmem.h| 15 +++ > > 2 files changed, 147 insertions(+) > > > > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c > > b/arch/powerpc/platforms/powernv/pmem/ocxl.c > > index 2b64504f9129..2cabafe1fc58 100644 > > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c > > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c > > @@ -640,6 +640,124 @@ static int ioctl_error_log(struct ocxlpmem > > *ocxlpmem, > > return 0; > > } > > > > +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem, > > + struct ioctl_ocxl_pmem_controller_dump_data __user > > *uarg) > > +{ > > + struct ioctl_ocxl_pmem_controller_dump_data args; > > + u16 i; > > + u64 val; > > + int rc; > > + > > + if (copy_from_user(, uarg, sizeof(args))) > > + return -EFAULT; > > + > > + if (args.buf_size % 8) > > + return -EINVAL; > > + > > + if (args.buf_size > ocxlpmem->admin_command.data_size) > > + return -EINVAL; > > + > > + mutex_lock(>admin_command.lock); > > + > > + rc = admin_command_request(ocxlpmem, > > ADMIN_COMMAND_CONTROLLER_DUMP); > > + if (rc) > > + goto out; > > + > > + val = ((u64)args.offset) << 32; > > + val |= args.buf_size; > > + rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, > > + ocxlpmem- > > >admin_command.request_offset + 0x08, > > + OCXL_LITTLE_ENDIAN, val); > > + if (rc) > > + goto out; > > + > > + rc = admin_command_execute(ocxlpmem); > > + if (rc) > > + goto out; > > + > > + rc = admin_command_complete_timeout(ocxlpmem, > > + ADMIN_COMMAND_CONTROLLER_DU > > MP); > > + if (rc < 0) { > > + dev_warn(>dev, "Controller dump timed > > out\n"); > > + goto out; > > + } > > + > > + rc = admin_response(ocxlpmem); > > + if (rc < 0) > > + goto out; > > + if (rc != STATUS_SUCCESS) { > > + warn_status(ocxlpmem, > > + "Unexpected status from retrieve error > > log", > > + rc); > > + goto out; > > + } > > > It would help if there was a comment indicating how the 3 ioctls are > used. My understanding is that the userland is: > - requesting the controller to prepare a state dump > - then one or more ioctls to fetch the data. The number of calls > required to get the full state really depends on the size of the > buffer > passed by user > - a last ioctl to tell the controller that we're done, presumably to > let > it free some resources. > Ok, will add it to the blurb. > > > + > > + for (i = 0; i < args.buf_size; i += 8) { > > + u64 val; > > + > > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, > > +ocxlpmem- > > >admin_command.data_offset + i, > > +OCXL_HOST_ENDIAN, ); > > + if (rc) > > + goto out; > > + > > + if (copy_to_user([i], , sizeof(u64))) { > > + rc = -EFAULT; > > + goto out; > > + } > > + } > > + > > + if (copy_to_user(uarg, , sizeof(args))) { > > + rc = -EFAULT; > > + goto out; > > + } > > + > > + rc = admin_response_handled(ocxlpmem); > > + if (rc) > > + goto out; > > + > > +out: > > + mutex_unlock(>admin_command.lock); > > + return rc; > > +} > > + > > +int request_controller_dump(struct ocxlpmem *ocxlpmem) > > +{ > > + int rc; > > + u64 busy = 1; > > + > > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, > > GLOBAL_MMIO_CHIC, > > + OCXL_LITTLE_ENDIAN, > > + GLOBAL_MMIO_CHI_CDA); > > + > > rc is not checked here. Whoops > > > > + > > + rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, > > GLOBAL_MMIO_HCI, > > + OCXL_LITTLE_ENDIAN, > > + GLOBAL_MMIO_HCI_CONTROLLER_DUMP); > > + if (rc) > > + return rc; > > + > > + while (busy) { > > + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, > > +GLOBAL_MMIO_HCI, > > +OCXL_LITTLE_ENDIAN, > > ); > > + if (rc) > > + return rc; > > + > > + busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP; > > Setting 'busy' doesn't hurt, but it's not really
[RFC PATCH v2 1/1] powerpc/kernel: Enables memory hot-remove after reboot on pseries guests
While providing guests, it's desirable to resize it's memory on demand. By now, it's possible to do so by creating a guest with a small base memory, hot-plugging all the rest, and using 'movable_node' kernel command-line parameter, which puts all hot-plugged memory in ZONE_MOVABLE, allowing it to be removed whenever needed. But there is an issue regarding guest reboot: If memory is hot-plugged, and then the guest is rebooted, all hot-plugged memory goes to ZONE_NORMAL, which offers no guaranteed hot-removal. It usually prevents this memory to be hot-removed from the guest. It's possible to use device-tree information to fix that behavior, as it stores flags for LMB ranges on ibm,dynamic-memory-vN. It involves marking each memblock with the correct flags as hotpluggable memory, which mm/memblock.c puts in ZONE_MOVABLE during boot if 'movable_node' is passed. For carrying such information, the new flag DRCONF_MEM_HOTPLUGGED is proposed, which should be true if memory was hot-plugged on guest, and false if it's base memory. During boot, guest kernel reads the device-tree, early_init_drmem_lmb() is called for every added LMBs. Here, checking for this new flag and marking memblocks as hotplugable memory is enough to get the desirable behavior. This should cause no change if 'movable_node' parameter is not passed in kernel command-line. Signed-off-by: Leonardo Bras --- The new flag was already proposed on Power Architecture documentation, and it's waiting for approval. I would like to get your comments on this change, but it's still not ready for being merged. I will send the matching qemu change as a reply later. Changes since v1: - Adds new flag, so PowerVM is compatible with the change. - Fixes mistakes in code --- arch/powerpc/include/asm/drmem.h | 1 + arch/powerpc/kernel/prom.c | 9 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 3d76e1c388c2..92083b4565f6 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -65,6 +65,7 @@ struct of_drconf_cell_v2 { #define DRCONF_MEM_ASSIGNED0x0008 #define DRCONF_MEM_AI_INVALID 0x0040 #define DRCONF_MEM_RESERVED0x0080 +#define DRCONF_MEM_HOTPLUGGED 0x0100 static inline u32 drmem_lmb_size(void) { diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 6620f37abe73..9c5cb2e8049e 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -515,9 +515,14 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, size = 0x8000ul - base; } + if (!validate_mem_limit(base, )) + continue; + DBG("Adding: %llx -> %llx\n", base, size); - if (validate_mem_limit(base, )) - memblock_add(base, size); + memblock_add(base, size); + + if (lmb->flags & DRCONF_MEM_HOTPLUGGED) + memblock_mark_hotplug(base, size); } while (--rngs); } #endif /* CONFIG_PPC_PSERIES */ -- 2.24.1
linux-next: manual merge of the net-next tree with the powerpc tree
Hi all, Today's linux-next merge of the net-next tree got a conflict in: fs/sysfs/group.c between commit: 9255782f7061 ("sysfs: Wrap __compat_only_sysfs_link_entry_to_kobj function to change the symlink name") from the powerpc tree and commit: 303a42769c4c ("sysfs: add sysfs_group{s}_change_owner()") from the net-next tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc fs/sysfs/group.c index 1e2a096057bc,5afe0e7ff7cd.. --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@@ -478,4 -457,118 +479,118 @@@ int compat_only_sysfs_link_entry_to_kob kernfs_put(target); return PTR_ERR_OR_ZERO(link); } -EXPORT_SYMBOL_GPL(__compat_only_sysfs_link_entry_to_kobj); +EXPORT_SYMBOL_GPL(compat_only_sysfs_link_entry_to_kobj); + + static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn, + const struct attribute_group *grp, + struct iattr *newattrs) + { + struct kernfs_node *kn; + int error; + + if (grp->attrs) { + struct attribute *const *attr; + + for (attr = grp->attrs; *attr; attr++) { + kn = kernfs_find_and_get(grp_kn, (*attr)->name); + if (!kn) + return -ENOENT; + + error = kernfs_setattr(kn, newattrs); + kernfs_put(kn); + if (error) + return error; + } + } + + if (grp->bin_attrs) { + struct bin_attribute *const *bin_attr; + + for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { + kn = kernfs_find_and_get(grp_kn, (*bin_attr)->attr.name); + if (!kn) + return -ENOENT; + + error = kernfs_setattr(kn, newattrs); + kernfs_put(kn); + if (error) + return error; + } + } + + return 0; + } + + /** + * sysfs_group_change_owner - change owner of an attribute group. + * @kobj: The kobject containing the group. + * @grp: The attribute group. + * @kuid: new owner's kuid + * @kgid: new owner's kgid + * + * Returns 0 on success or error code on failure. + */ + int sysfs_group_change_owner(struct kobject *kobj, +const struct attribute_group *grp, kuid_t kuid, +kgid_t kgid) + { + struct kernfs_node *grp_kn; + int error; + struct iattr newattrs = { + .ia_valid = ATTR_UID | ATTR_GID, + .ia_uid = kuid, + .ia_gid = kgid, + }; + + if (!kobj->state_in_sysfs) + return -EINVAL; + + if (grp->name) { + grp_kn = kernfs_find_and_get(kobj->sd, grp->name); + } else { + kernfs_get(kobj->sd); + grp_kn = kobj->sd; + } + if (!grp_kn) + return -ENOENT; + + error = kernfs_setattr(grp_kn, ); + if (!error) + error = sysfs_group_attrs_change_owner(grp_kn, grp, ); + + kernfs_put(grp_kn); + + return error; + } + EXPORT_SYMBOL_GPL(sysfs_group_change_owner); + + /** + * sysfs_groups_change_owner - change owner of a set of attribute groups. + * @kobj: The kobject containing the groups. + * @groups: The attribute groups. + * @kuid: new owner's kuid + * @kgid: new owner's kgid + * + * Returns 0 on success or error code on failure. + */ + int sysfs_groups_change_owner(struct kobject *kobj, + const struct attribute_group **groups, + kuid_t kuid, kgid_t kgid) + { + int error = 0, i; + + if (!kobj->state_in_sysfs) + return -EINVAL; + + if (!groups) + return 0; + + for (i = 0; groups[i]; i++) { + error = sysfs_group_change_owner(kobj, groups[i], kuid, kgid); + if (error) + break; + } + + return error; + } + EXPORT_SYMBOL_GPL(sysfs_groups_change_owner); pgpGyk52FBWn2.pgp Description: OpenPGP digital signature
Re: [PATCH] powerpc/32: Stop printing the virtual memory layout
On Thu, Mar 05, 2020 at 10:08:37AM -0500, Arvind Sankar wrote: > For security, don't display the kernel's virtual memory layout. > > Kees Cook points out: > "These have been entirely removed on other architectures, so let's > just do the same for ia32 and remove it unconditionally." > > 071929dbdd86 ("arm64: Stop printing the virtual memory layout") > 1c31d4e96b8c ("ARM: 8820/1: mm: Stop printing the virtual memory layout") > 3182f798 ("m68k/mm: Stop printing the virtual memory layout") > fd8d0ca25631 ("parisc: Hide virtual kernel memory layout") > adb1fe9ae2ee ("mm/page_alloc: Remove kernel address exposure in > free_reserved_area()") > > Signed-off-by: Arvind Sankar Acked-by: Tycho Andersen
[PATCH] powerpc/32: Stop printing the virtual memory layout
For security, don't display the kernel's virtual memory layout. Kees Cook points out: "These have been entirely removed on other architectures, so let's just do the same for ia32 and remove it unconditionally." 071929dbdd86 ("arm64: Stop printing the virtual memory layout") 1c31d4e96b8c ("ARM: 8820/1: mm: Stop printing the virtual memory layout") 3182f798 ("m68k/mm: Stop printing the virtual memory layout") fd8d0ca25631 ("parisc: Hide virtual kernel memory layout") adb1fe9ae2ee ("mm/page_alloc: Remove kernel address exposure in free_reserved_area()") Signed-off-by: Arvind Sankar --- arch/powerpc/mm/mem.c | 17 - 1 file changed, 17 deletions(-) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ef7b1119b2e2..df2c143b6bf7 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -331,23 +331,6 @@ void __init mem_init(void) #endif mem_init_print_info(NULL); -#ifdef CONFIG_PPC32 - pr_info("Kernel virtual memory layout:\n"); -#ifdef CONFIG_KASAN - pr_info(" * 0x%08lx..0x%08lx : kasan shadow mem\n", - KASAN_SHADOW_START, KASAN_SHADOW_END); -#endif - pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); -#ifdef CONFIG_HIGHMEM - pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n", - PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP)); -#endif /* CONFIG_HIGHMEM */ - if (ioremap_bot != IOREMAP_TOP) - pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", - ioremap_bot, IOREMAP_TOP); - pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", - VMALLOC_START, VMALLOC_END); -#endif /* CONFIG_PPC32 */ } void free_initmem(void) -- 2.24.1
Re: [PATCH] dt: Remove booting-without-of.txt
On 3/4/20 12:45 PM, Rob Herring wrote: > Well, not quite removed yet... Mauro is looking at moving this to ReST, > but I think it would be better to trim or remove it. > > boot-without-of.txt is an ancient document that first outlined > Flattened DeviceTree. The DT world has evolved a lot in the 15 years > since and boot-without-of.txt is pretty stale. The name of the document > itself is confusing if you don't understand the evolution from real > 'OpenFirmware'. Much of what booting-without-of.txt contains is now in > the DT specification (which evolved out of the ePAPR). > > This is a first pass of removing everything that has a DT spec > equivalent or is no longer standard practice (e.g. soc for SoC > nodes) in order to see what's left. This is what I have: > > TODO > - Move boot interface details to arch specific docs > - Document 'serial-number' property in DT spec > - Document the 'hotpluggable' memory property in DT spec > - Document the 'sleep' property (PPC only) > - Document the 'dma-coherent' property in DT spec > - Need the history of node names and 'name' property? > - Need how addresses work? Looks good. Since this is a first pass, I'm expecting that polishing (things like updating section numbers) would happen in subsequent patches after more of the content changes are done, so no need to do so in this patch. Reviewed-by: Frank Rowand -Frank > > Cc: Frank Rowand > Cc: Mauro Carvalho Chehab > Cc: Benjamin Herrenschmidt > Cc: Geert Uytterhoeven > Cc: Michael Ellerman > Cc: linuxppc-dev@lists.ozlabs.org > Signed-off-by: Rob Herring > --- > .../devicetree/booting-without-of.txt | 1027 + > 1 file changed, 1 insertion(+), 1026 deletions(-) > > diff --git a/Documentation/devicetree/booting-without-of.txt > b/Documentation/devicetree/booting-without-of.txt > index 4660ccee35a3..97beee828ba4 100644 > --- a/Documentation/devicetree/booting-without-of.txt > +++ b/Documentation/devicetree/booting-without-of.txt > @@ -19,44 +19,17 @@ Table of Contents > 5) Entry point for arch/sh > >II - The DT block format > -1) Header > 2) Device tree generalities > -3) Device tree "structure" block > -4) Device tree "strings" block > >III - Required content of the device tree > 1) Note about cells and address representation > -2) Note about "compatible" properties > -3) Note about "name" properties > -4) Note about node and property names and character set > 5) Required nodes and properties >a) The root node > - b) The /cpus node > - c) The /cpus/* nodes > - d) the /memory node(s) > - e) The /chosen node > - f) the /soc node > - > - IV - "dtc", the device tree compiler > - > - V - Recommendations for a bootloader > - > - VI - System-on-a-chip devices and nodes > -1) Defining child nodes of an SOC > -2) Representing devices without a current OF specification > - > - VII - Specifying interrupt information for devices > -1) interrupts property > -2) interrupt-parent property > -3) OpenPIC Interrupt Controllers > -4) ISA Interrupt Controllers > >VIII - Specifying device power management information (sleep property) > >IX - Specifying dma bus information > > - Appendix A - Sample SOC node for MPC8540 > - > > Revision Information > > @@ -105,19 +78,6 @@ Revision Information >- Added chapter VI > > > - ToDo: > - - Add some definitions of interrupt tree (simple/complex) > - - Add some definitions for PCI host bridges > - - Add some common address format examples > - - Add definitions for standard properties and "compatible" > - names for cells that are not already defined by the existing > - OF spec. > - - Compare FSL SOC use of PCI to standard and make sure no new > - node definition required. > - - Add more information about node definitions for SOC devices > - that currently have no standard, like the FSL CPM. > - > - > I - Introduction > > > @@ -333,196 +293,17 @@ II - The DT block format > > > > -This chapter defines the actual format of the flattened device-tree > -passed to the kernel. The actual content of it and kernel requirements > -are described later. You can find example of code manipulating that > -format in various places, including arch/powerpc/kernel/prom_init.c > -which will generate a flattened device-tree from the Open Firmware > -representation, or the fs2dt utility which is part of the kexec tools > -which will generate one from a filesystem representation. It is > -expected that a bootloader like uboot provides a bit more support, > -that will be discussed later as well. > - > Note: The block has to be in main memory. It has to be accessible in > both real mode and virtual mode with no mapping other than main > memory. If you are writing a simple flash bootloader, it should
[Bug 206695] kmemleak reports leaks in drivers/macintosh/windfarm
https://bugzilla.kernel.org/show_bug.cgi?id=206695 --- Comment #4 from Erhard F. (erhar...@mailbox.org) --- (In reply to mpe from comment #3) > Can you try this patch? Applied your patch on top of 5.6-rc4 + https://patchwork.ozlabs.org/patch/1248350/ and let the G5 do a few hours compiling. Only getting those nice memleaks from bug #206203 but no windfarm_pm112 memleak any longer. So your patch works well it seems. Thanks! -- You are receiving this mail because: You are watching the assignee of the bug.
Re: [RFC 00/11] perf: Enhancing perf to export processor hazard information
On 3/4/20 10:46 PM, Ravi Bangoria wrote: > Hi Kim, Hi Ravi, > On 3/3/20 3:55 AM, Kim Phillips wrote: >> On 3/2/20 2:21 PM, Stephane Eranian wrote: >>> On Mon, Mar 2, 2020 at 2:13 AM Peter Zijlstra wrote: On Mon, Mar 02, 2020 at 10:53:44AM +0530, Ravi Bangoria wrote: > Modern processors export such hazard data in Performance > Monitoring Unit (PMU) registers. Ex, 'Sampled Instruction Event > Register' on IBM PowerPC[1][2] and 'Instruction-Based Sampling' on > AMD[3] provides similar information. > > Implementation detail: > > A new sample_type called PERF_SAMPLE_PIPELINE_HAZ is introduced. > If it's set, kernel converts arch specific hazard information > into generic format: > > struct perf_pipeline_haz_data { > /* Instruction/Opcode type: Load, Store, Branch */ > __u8 itype; > /* Instruction Cache source */ > __u8 icache; > /* Instruction suffered hazard in pipeline stage */ > __u8 hazard_stage; > /* Hazard reason */ > __u8 hazard_reason; > /* Instruction suffered stall in pipeline stage */ > __u8 stall_stage; > /* Stall reason */ > __u8 stall_reason; > __u16 pad; > }; Kim, does this format indeed work for AMD IBS? >> >> It's not really 1:1, we don't have these separations of stages >> and reasons, for example: we have missed in L2 cache, for example. >> So IBS output is flatter, with more cycle latency figures than >> IBM's AFAICT. > > AMD IBS captures pipeline latency data incase Fetch sampling like the > Fetch latency, tag to retire latency, completion to retire latency and > so on. Yes, Ops sampling do provide more data on load/store centric > information. But it also captures more detailed data for Branch instructions. > And we also looked at ARM SPE, which also captures more details pipeline > data and latency information. > >>> Personally, I don't like the term hazard. This is too IBM Power >>> specific. We need to find a better term, maybe stall or penalty. >> >> Right, IBS doesn't have a filter to only count stalled or otherwise >> bad events. IBS' PPR descriptions has one occurrence of the >> word stall, and no penalty. The way I read IBS is it's just >> reporting more sample data than just the precise IP: things like >> hits, misses, cycle latencies, addresses, types, etc., so words >> like 'extended', or the 'auxiliary' already used today even >> are more appropriate for IBS, although I'm the last person to >> bikeshed. > > We are thinking of using "pipeline" word instead of Hazard. Hm, the word 'pipeline' occurs 0 times in IBS documentation. I realize there are a couple of core pipeline-specific pieces of information coming out of it, but the vast majority are addresses, latencies of various components in the memory hierarchy, and various component hit/miss bits. What's needed here is a vendor-specific extended sample information that all these technologies gather, of which things like e.g., 'L1 TLB cycle latency' we all should have in common. I'm not sure why a new PERF_SAMPLE_PIPELINE_HAZ is needed either. Can we use PERF_SAMPLE_AUX instead? Take a look at commit 98dcf14d7f9c "perf tools: Add kernel AUX area sampling definitions". The sample identifier can be used to determine which vendor's sampling IP's data is in it, and events can be recorded just by copying the content of the SIER, etc. registers, and then events get synthesized from the aux sample at report/inject/annotate etc. time. This allows for less sample recording overhead, and moves all the vendor specific decoding and common event conversions for userspace to figure out. >>> Also worth considering is the support of ARM SPE (Statistical >>> Profiling Extension) which is their version of IBS. >>> Whatever gets added need to cover all three with no limitations. >> >> I thought Intel's various LBR, PEBS, and PT supported providing >> similar sample data in perf already, like with perf mem/c2c? > > perf-mem is more of data centric in my opinion. It is more towards > memory profiling. So proposal here is to expose pipeline related > details like stalls and latencies. Like I said, I don't see it that way, I see it as "any particular vendor's event's extended details', and these pipeline details have overlap with existing infrastructure within perf, e.g., L2 cache misses. Kim
Re: [PATCH v2] powerpc/mm: Fix missing KUAP disable in flush_coherent_icache()
On Wed, 2020-03-04 at 10:57 +1100, Michael Ellerman wrote: > We received a report of strange kernel faults which turned out to be > due to a missing KUAP disable in flush_coherent_icache() called > from flush_icache_range(). > > The fault looks like: > > Kernel attempted to access user page (7fffc30d9c00) - exploit > attempt? (uid: 1009) > BUG: Unable to handle kernel data access on read at 0x7fffc30d9c00 > Faulting instruction address: 0xc007232c > Oops: Kernel access of bad area, sig: 11 [#1] > LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV > CPU: 35 PID: 5886 Comm: sigtramp Not tainted 5.6.0-rc2-gcc-8.2.0- > 3-gfc37a1632d40 #79 > NIP: c007232c LR: c003b7fc CTR: > REGS: c01e11093940 TRAP: 0300 Not tainted (5.6.0-rc2-gcc- > 8.2.0-3-gfc37a1632d40) > MSR: 9280b033 CR: > 28000884 XER: > CFAR: c00722fc DAR: 7fffc30d9c00 DSISR: 0800 > IRQMASK: 0 > GPR00: c003b7fc c01e11093bd0 c23ac200 > 7fffc30d9c00 > GPR04: 7fffc30d9c18 c01e11093bd4 > > GPR08: 0001 > c01e1104ed80 > GPR12: c01fff6ab380 c16be2d0 > 4000 > GPR16: c000 bfff > > GPR20: 7fffc30d9c00 7fffc30d8f58 7fffc30d9c18 > 7fffc30d9c20 > GPR24: 7fffc30d9c18 c01e11093d90 > c01e1104ed80 > GPR28: c01e11093e90 c23d9d18 > 7fffc30d9c00 > NIP flush_icache_range+0x5c/0x80 > LR handle_rt_signal64+0x95c/0xc2c > Call Trace: > 0xc01e11093d90 (unreliable) > handle_rt_signal64+0x93c/0xc2c > do_notify_resume+0x310/0x430 > ret_from_except_lite+0x70/0x74 > Instruction dump: > 409e002c 7c0802a6 3c62ff31 3863f6a0 f8010080 48195fed 6000 > 48fe4c8d > 6000 e8010080 7c0803a6 7c0004ac <7c00ffac> 7c0004ac 4c00012c > 38210070 > > This path through handle_rt_signal64() to setup_trampoline() and > flush_icache_range() is only triggered by 64-bit processes that have > unmapped their VDSO, which is rare. > > flush_icache_range() takes a range of addresses to flush. In > flush_coherent_icache() we implement an optimisation for CPUs where > we > know we don't actually have to flush the whole range, we just need to > do a single icbi. > > However we still execute the icbi on the user address of the start of > the range we're flushing. On CPUs that also implement KUAP (Power9) > that leads to the spurious fault above. > > We should be able to pass any address, including a kernel address, to > the icbi on these CPUs, which would avoid any interaction with KUAP. > But I don't want to make that change in a bug fix, just in case it > surfaces some strange behaviour on some CPU. > > So for now just disable KUAP around the icbi. Note the icbi is > treated > as a load, so we allow read access, not write as you'd expect. > > Fixes: 890274c2dc4c ("powerpc/64s: Implement KUAP for Radix MMU") > Cc: sta...@vger.kernel.org # v5.2+ > Signed-off-by: Michael Ellerman > --- Reviewed-by: Russell Currey
Re: [PATCH -next v2] powerpc/64s/pgtable: fix an undefined behaviour
Le 05/03/2020 à 15:32, Qian Cai a écrit : Booting a power9 server with hash MMU could trigger an undefined behaviour because pud_offset(p4d, 0) will do, 0 >> (PAGE_SHIFT:16 + PTE_INDEX_SIZE:8 + H_PMD_INDEX_SIZE:10) Fix it by converting pud_offset() and friends to static inline functions. I was suggesting to convert pud_index() to static inline, because that's where the shift sits. Is it not possible ? Here you seems to fix the problem for now, but if someone reuses pud_index() in another macro one day, the same problem may happen again. Christophe UBSAN: shift-out-of-bounds in arch/powerpc/mm/ptdump/ptdump.c:282:15 shift exponent 34 is too large for 32-bit type 'int' CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4-next-20200303+ #13 Call Trace: dump_stack+0xf4/0x164 (unreliable) ubsan_epilogue+0x18/0x78 __ubsan_handle_shift_out_of_bounds+0x160/0x21c walk_pagetables+0x2cc/0x700 walk_pud at arch/powerpc/mm/ptdump/ptdump.c:282 (inlined by) walk_pagetables at arch/powerpc/mm/ptdump/ptdump.c:311 ptdump_check_wx+0x8c/0xf0 mark_rodata_ro+0x48/0x80 kernel_init+0x74/0x194 ret_from_kernel_thread+0x5c/0x74 Suggested-by: Christophe Leroy Signed-off-by: Qian Cai --- arch/powerpc/include/asm/book3s/64/pgtable.h | 20 ++-- 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index fa60e8594b9f..4967bc9e25e2 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1016,12 +1016,20 @@ static inline bool p4d_access_permitted(p4d_t p4d, bool write) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) -#define pud_offset(p4dp, addr) \ - (((pud_t *) p4d_page_vaddr(*(p4dp))) + pud_index(addr)) -#define pmd_offset(pudp,addr) \ - (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) -#define pte_offset_kernel(dir,addr) \ - (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +{ + return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); +} + +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) +{ + return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); +} + +static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) +{ + return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); +} #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
Re: [PATCH rebased 1/2] powerpc: reserve memory for capture kernel after hugepages init
Hello, This seems to cause crash with kdump reservation 1GB quite reliably. Thanks Michal On Tue, Feb 18, 2020 at 05:28:34PM +0100, Michal Suchanek wrote: > From: Hari Bathini > > Sometimes, memory reservation for KDump/FADump can overlap with memory > marked for hugepages. This overlap leads to error, hang in KDump case > and copy error reported by f/w in case of FADump, while trying to > capture dump. Report error while setting up memory for the capture > kernel instead of running into issues while capturing dump, by moving > KDump/FADump reservation below MMU early init and failing gracefully > when hugepages memory overlaps with capture kernel memory. > > Signed-off-by: Hari Bathini > --- > arch/powerpc/kernel/prom.c | 16 > 1 file changed, 8 insertions(+), 8 deletions(-) > > diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c > index 6620f37abe73..0f14dc9c4dab 100644 > --- a/arch/powerpc/kernel/prom.c > +++ b/arch/powerpc/kernel/prom.c > @@ -735,14 +735,6 @@ void __init early_init_devtree(void *params) > if (PHYSICAL_START > MEMORY_START) > memblock_reserve(MEMORY_START, 0x8000); > reserve_kdump_trampoline(); > -#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) > - /* > - * If we fail to reserve memory for firmware-assisted dump then > - * fallback to kexec based kdump. > - */ > - if (fadump_reserve_mem() == 0) > -#endif > - reserve_crashkernel(); > early_reserve_mem(); > > /* Ensure that total memory size is page-aligned. */ > @@ -781,6 +773,14 @@ void __init early_init_devtree(void *params) > #endif > > mmu_early_init_devtree(); > +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) > + /* > + * If we fail to reserve memory for firmware-assisted dump then > + * fallback to kexec based kdump. > + */ > + if (fadump_reserve_mem() == 0) > +#endif > + reserve_crashkernel(); > > #ifdef CONFIG_PPC_POWERNV > /* Scan and build the list of machine check recoverable ranges */ > -- > 2.23.0 >
Re: [PATCH 2/2] powerpc: Suppress .eh_frame generation
On Thu, Mar 05, 2020 at 08:05:30PM +0530, Naveen N. Rao wrote: > GCC v8 defaults to enabling -fasynchronous-unwind-tables due to > https://gcc.gnu.org/r259298, which results in .eh_frame section being > generated. This results in additional disk usage by the build, as well > as the kernel modules. Since the kernel has no use for this, this > section is discarded. > > Add -fno-asynchronous-unwind-tables to KBUILD_CFLAGS to suppress > generation of .eh_frame section. Note that our VDSOs need .eh_frame, but > are not affected by this change since our VDSO code are all in assembly. That may change, but it is easy to change again for just the VDSOs. > Reported-by: Rasmus Villemoes > Signed-off-by: Naveen N. Rao Reviewed-by: Segher Boessenkool Thanks! Segher
RE: [RFC PATCH v1] powerpc/prom_init: disable XIVE in Secure VM.
On 3/5/20 4:15 PM, Ram Pai wrote: > On Thu, Mar 05, 2020 at 10:55:45AM +1100, David Gibson wrote: >> On Wed, Mar 04, 2020 at 04:56:09PM +0100, Cédric Le Goater wrote: >>> [ ... ] >>> (1) applied the patch which shares the EQ-page with the hypervisor. (2) set "kernel_irqchip=off" (3) set "ic-mode=xive" >>> >>> you don't have to set the interrupt mode. xive should be negotiated >>> by default. >>> (4) set "svm=on" on the kernel command line. (5) no changes to the hypervisor and ultravisor. And Boom it works!. So you were right. >>> >>> Excellent. >>> I am sending out the patch for (1) above ASAP. >>> >>> Next step, could you please try to do the same with the TIMA and ESB pfn ? >>> and use KVM. >> >> I'm a bit confused by this. Aren't the TIMA and ESB pages essentially >> IO pages, rather than memory pages from the guest's point of view? I >> assume only memory pages are protected with PEF - I can't even really >> see what protecting an IO page would even mean. > > It means, that the hypervisor and qemu cannot access the addresses used > to access the I/O pages. It can only be accessed by Ultravisor and the > SVM. > > As it stands today, those pages are accessible from the hypervisor > and not from the SVM or the ultravisor. > > To make it work, we need to enable acccess to those pages from the SVM > and from the ultravisor. One thing I am not clear is should we block > access to those pages from the hypervisor. If yes, than there is no> good way > to do that, without hardware help. If no, than those GPA pages > can be shared, so that hypervisor/ultravisor/qemu/SVM can all access > those pages. They are shared. KVM will also access them, at interrupt creation, device reset and passthrough. QEMU will use them to mask on/off the interrupts in case of guest migration or machine stop/continue. C.
RE: [RFC PATCH v1] powerpc/prom_init: disable XIVE in Secure VM.
On Thu, Mar 05, 2020 at 10:55:45AM +1100, David Gibson wrote: > On Wed, Mar 04, 2020 at 04:56:09PM +0100, Cédric Le Goater wrote: > > [ ... ] > > > > > (1) applied the patch which shares the EQ-page with the hypervisor. > > > (2) set "kernel_irqchip=off" > > > (3) set "ic-mode=xive" > > > > you don't have to set the interrupt mode. xive should be negotiated > > by default. > > > > > (4) set "svm=on" on the kernel command line. > > > (5) no changes to the hypervisor and ultravisor. > > > > > > And Boom it works!. So you were right. > > > > Excellent. > > > > > I am sending out the patch for (1) above ASAP. > > > > Next step, could you please try to do the same with the TIMA and ESB pfn ? > > and use KVM. > > I'm a bit confused by this. Aren't the TIMA and ESB pages essentially > IO pages, rather than memory pages from the guest's point of view? I > assume only memory pages are protected with PEF - I can't even really > see what protecting an IO page would even mean. It means, that the hypervisor and qemu cannot access the addresses used to access the I/O pages. It can only be accessed by Ultravisor and the SVM. As it stands today, those pages are accessible from the hypervisor and not from the SVM or the ultravisor. To make it work, we need to enable acccess to those pages from the SVM and from the ultravisor. One thing I am not clear is should we block access to those pages from the hypervisor. If yes, than there is no good way to do that, without hardware help. If no, than those GPA pages can be shared, so that hypervisor/ultravisor/qemu/SVM can all access those pages. RP
[PATCH 2/2] powerpc: Suppress .eh_frame generation
GCC v8 defaults to enabling -fasynchronous-unwind-tables due to https://gcc.gnu.org/r259298, which results in .eh_frame section being generated. This results in additional disk usage by the build, as well as the kernel modules. Since the kernel has no use for this, this section is discarded. Add -fno-asynchronous-unwind-tables to KBUILD_CFLAGS to suppress generation of .eh_frame section. Note that our VDSOs need .eh_frame, but are not affected by this change since our VDSO code are all in assembly. Reported-by: Rasmus Villemoes Signed-off-by: Naveen N. Rao --- arch/powerpc/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 89956c4f1ce3..f310c32e88a4 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -239,6 +239,9 @@ KBUILD_CFLAGS += $(call cc-option,-mno-vsx) KBUILD_CFLAGS += $(call cc-option,-mno-spe) KBUILD_CFLAGS += $(call cc-option,-mspe=no) +# Don't emit .eh_frame since we have no use for it +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables + # Never use string load/store instructions as they are # often slow when they are implemented at all KBUILD_CFLAGS += $(call cc-option,-mno-string) -- 2.24.1
[PATCH 1/2] powerpc: Drop -fno-dwarf2-cfi-asm
The original commit/discussion adding -fno-dwarf2-cfi-asm refers to R_PPC64_REL32 relocations not being handled by our module loader: http://lkml.kernel.org/r/20090224065112.ga6...@bombadil.infradead.org However, that is now handled thanks to commit 9f751b82b491d ("powerpc/module: Add support for R_PPC64_REL32 relocations"). So, drop this flag from our Makefile. Signed-off-by: Naveen N. Rao --- arch/powerpc/Makefile | 5 - 1 file changed, 5 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index cbe5ca4f0ee5..89956c4f1ce3 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -239,11 +239,6 @@ KBUILD_CFLAGS += $(call cc-option,-mno-vsx) KBUILD_CFLAGS += $(call cc-option,-mno-spe) KBUILD_CFLAGS += $(call cc-option,-mspe=no) -# FIXME: the module load should be taught about the additional relocs -# generated by this. -# revert to pre-gcc-4.4 behaviour of .eh_frame -KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) - # Never use string load/store instructions as they are # often slow when they are implemented at all KBUILD_CFLAGS += $(call cc-option,-mno-string) -- 2.24.1
[PATCH -next v2] powerpc/64s/pgtable: fix an undefined behaviour
Booting a power9 server with hash MMU could trigger an undefined behaviour because pud_offset(p4d, 0) will do, 0 >> (PAGE_SHIFT:16 + PTE_INDEX_SIZE:8 + H_PMD_INDEX_SIZE:10) Fix it by converting pud_offset() and friends to static inline functions. UBSAN: shift-out-of-bounds in arch/powerpc/mm/ptdump/ptdump.c:282:15 shift exponent 34 is too large for 32-bit type 'int' CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4-next-20200303+ #13 Call Trace: dump_stack+0xf4/0x164 (unreliable) ubsan_epilogue+0x18/0x78 __ubsan_handle_shift_out_of_bounds+0x160/0x21c walk_pagetables+0x2cc/0x700 walk_pud at arch/powerpc/mm/ptdump/ptdump.c:282 (inlined by) walk_pagetables at arch/powerpc/mm/ptdump/ptdump.c:311 ptdump_check_wx+0x8c/0xf0 mark_rodata_ro+0x48/0x80 kernel_init+0x74/0x194 ret_from_kernel_thread+0x5c/0x74 Suggested-by: Christophe Leroy Signed-off-by: Qian Cai --- arch/powerpc/include/asm/book3s/64/pgtable.h | 20 ++-- 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index fa60e8594b9f..4967bc9e25e2 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1016,12 +1016,20 @@ static inline bool p4d_access_permitted(p4d_t p4d, bool write) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) -#define pud_offset(p4dp, addr) \ - (((pud_t *) p4d_page_vaddr(*(p4dp))) + pud_index(addr)) -#define pmd_offset(pudp,addr) \ - (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) -#define pte_offset_kernel(dir,addr) \ - (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +{ + return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); +} + +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) +{ + return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); +} + +static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) +{ + return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); +} #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -- 1.8.3.1
eh_frame confusion
Naveen N. Rao wrote: Naveen N. Rao wrote: Rasmus Villemoes wrote: Can you check if the below patch works? I am yet to test this in more detail, but would be good to know the implications for ppc32. - Naveen --- diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index f35730548e42..5b5bf98b8217 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -239,10 +239,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-vsx) KBUILD_CFLAGS += $(call cc-option,-mno-spe) KBUILD_CFLAGS += $(call cc-option,-mspe=no) -# FIXME: the module load should be taught about the additional relocs -# generated by this. -# revert to pre-gcc-4.4 behaviour of .eh_frame -KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) +KBUILD_CFLAGS += $(call cc-option,-fno-asynchronous-unwind-tables) In terms of the CFI information, the primary difference I see with -fno-dwarf2-cfi-asm is that when dumping the debug frames, CIE indicates version 3, while otherwise (i.e., without -fno-dwarf2-cfi-asm and with/without -fasynchronous-unwind-tables), it is version 1, regardless of -gdwarf-2/-gdwarf-4. There are few more minor changes, but none of these looked significant to me. # Never use string load/store instructions as they are # often slow when they are implemented at all diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index e147bbdc12cd..d43b0b18137c 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -25,6 +25,7 @@ KCOV_INSTRUMENT := n UBSAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ + -fasynchronous-unwind-tables \ -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both asflags-y := -D__VDSO32__ -s diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 32ebb3522ea1..b2cbb5c49bad 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -13,6 +13,7 @@ KCOV_INSTRUMENT := n UBSAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ + -fasynchronous-unwind-tables \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both asflags-y := -D__VDSO64__ -s The above vdso hunks can be dropped since all our VDSO are assembly, so the above have no impact. - Naveen
Re: eh_frame confusion
Michael Ellerman wrote: "Naveen N. Rao" writes: Rasmus Villemoes wrote: I'm building a ppc32 kernel, and noticed that after upgrading from gcc-7 to gcc-8 all object files now end up having .eh_frame section. For vmlinux, that's not a problem, because they all get discarded in arch/powerpc/kernel/vmlinux.lds.S . However, they stick around in modules, which doesn't seem to be useful - given that everything worked just fine with gcc-7, and I don't see anything in the module loader that handles .eh_frame. The reason I care is that my target has a rather tight rootfs budget, and the .eh_frame section seem to occupy 10-30% of the file size (obviously very depending on the particular module). Comparing the .foo.o.cmd files, I don't see change in options that might explain this (there's a bunch of new -Wno-*, and the -mspe=no spelling is apparently no longer supported in gcc-8). Both before and after, there's -fno-dwarf2-cfi-asm about which gcc's documentation says '-fno-dwarf2-cfi-asm' Emit DWARF unwind info as compiler generated '.eh_frame' section instead of using GAS '.cfi_*' directives. Looking into where that comes from got me even more confused, because both arm and unicore32 say # Never generate .eh_frame KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) while the ppc32 case at hand says # FIXME: the module load should be taught about the additional relocs # generated by this. # revert to pre-gcc-4.4 behaviour of .eh_frame Michael opened a task to look into this recently and I had spent some time last week on this. The original commit/discussion adding -fno-dwarf2-cfi-asm refers to R_PPC64_REL32 relocations not being handled by our module loader: http://lkml.kernel.org/r/20090224065112.ga6...@bombadil.infradead.org I opened that issue purely based on noticing the wart in the Makefile, not because I'd actually tested it. However, that is now handled thanks to commit 9f751b82b491d: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9f751b82b491d Haha, written by me, what an idiot. So the Makefile hack can presumably be dropped, because the module loader can handle the relocations. And then maybe we also want to turn off the unwind tables, but that would be a separate patch. I did a test build and a simple module loaded fine, so I think -fno-dwarf2-cfi-asm is not required anymore, unless Michael has seen some breakages with it. Michael? No, as I said above it was just reading the Makefile. Ok, thanks for clarifying. To test, I did 'allmodconfig' builds across three environments: - gcc (Ubuntu 9.2.1-9ubuntu2) 9.2.1 20191008 -- ppc64le - gcc (SUSE Linux) 7.5.0 -- ppc64le - gcc (GCC) 8.2.1 20181215 (Red Hat 8.2.1-6) -- ppc64 (BE) Then, used the below command to list all relocations in the modules: $ find . -name '*.ko' | xargs -n 1 readelf -Wr | grep -v "Relocation " | grep -v "Offset " | cut -d' ' -f4 | sort | uniq R_PPC64_ADDR32 R_PPC64_ADDR64 R_PPC64_ENTRY R_PPC64_REL24 R_PPC64_REL32 R_PPC64_REL64 R_PPC64_TOC R_PPC64_TOC16_HA R_PPC64_TOC16_LO R_PPC64_TOC16_LO_DS All three environments show up similar set of relocations, all of which we handle in the module loader today. If Rasmus/Christophe can confirm that this is true for ppc32 as well, then we should be fine. - Naveen
[Bug 206695] kmemleak reports leaks in drivers/macintosh/windfarm
https://bugzilla.kernel.org/show_bug.cgi?id=206695 --- Comment #3 from m...@ellerman.id.au --- Can you try this patch? diff --git a/drivers/macintosh/windfarm_pm112.c b/drivers/macintosh/windfarm_pm112.c index 4150301a89a5..a16f43a1def9 100644 --- a/drivers/macintosh/windfarm_pm112.c +++ b/drivers/macintosh/windfarm_pm112.c @@ -125,7 +125,7 @@ static int create_cpu_loop(int cpu) { int chip = cpu / 2; int core = cpu & 1; - struct smu_sdbp_header *hdr; + struct smu_sdbp_header *hdr, *hdr2; struct smu_sdbp_cpupiddata *piddata; struct wf_cpu_pid_param pid; struct wf_control *main_fan = cpu_fans[0]; @@ -141,9 +141,9 @@ static int create_cpu_loop(int cpu) piddata = (struct smu_sdbp_cpupiddata *)[1]; /* Get FVT params to get Tmax; if not found, assume default */ - hdr = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL); - if (hdr) { - struct smu_sdbp_fvt *fvt = (struct smu_sdbp_fvt *)[1]; + hdr2 = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL); + if (hdr2) { + struct smu_sdbp_fvt *fvt = (struct smu_sdbp_fvt *)[1]; tmax = fvt->maxtemp << 16; } else tmax = 95 << 16;/* default to 95 degrees C */ @@ -174,6 +174,10 @@ static int create_cpu_loop(int cpu) pid.min = fmin; wf_cpu_pid_init(_pid[cpu], ); + + kfree(hdr); + kfree(hdr2); + return 0; } -- You are receiving this mail because: You are watching the assignee of the bug.
Re: [Bug 206695] New: kmemleak reports leaks in drivers/macintosh/windfarm
Can you try this patch? diff --git a/drivers/macintosh/windfarm_pm112.c b/drivers/macintosh/windfarm_pm112.c index 4150301a89a5..a16f43a1def9 100644 --- a/drivers/macintosh/windfarm_pm112.c +++ b/drivers/macintosh/windfarm_pm112.c @@ -125,7 +125,7 @@ static int create_cpu_loop(int cpu) { int chip = cpu / 2; int core = cpu & 1; - struct smu_sdbp_header *hdr; + struct smu_sdbp_header *hdr, *hdr2; struct smu_sdbp_cpupiddata *piddata; struct wf_cpu_pid_param pid; struct wf_control *main_fan = cpu_fans[0]; @@ -141,9 +141,9 @@ static int create_cpu_loop(int cpu) piddata = (struct smu_sdbp_cpupiddata *)[1]; /* Get FVT params to get Tmax; if not found, assume default */ - hdr = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL); - if (hdr) { - struct smu_sdbp_fvt *fvt = (struct smu_sdbp_fvt *)[1]; + hdr2 = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL); + if (hdr2) { + struct smu_sdbp_fvt *fvt = (struct smu_sdbp_fvt *)[1]; tmax = fvt->maxtemp << 16; } else tmax = 95 << 16;/* default to 95 degrees C */ @@ -174,6 +174,10 @@ static int create_cpu_loop(int cpu) pid.min = fmin; wf_cpu_pid_init(_pid[cpu], ); + + kfree(hdr); + kfree(hdr2); + return 0; }
Re: [PATCH 2/2] mm/vma: Introduce VM_ACCESS_FLAGS
On 3/5/20 7:50 AM, Anshuman Khandual wrote: > There are many places where all basic VMA access flags (read, write, exec) > are initialized or checked against as a group. One such example is during > page fault. Existing vma_is_accessible() wrapper already creates the notion > of VMA accessibility as a group access permissions. Hence lets just create > VM_ACCESS_FLAGS (VM_READ|VM_WRITE|VM_EXEC) which will not only reduce code > duplication but also extend the VMA accessibility concept in general. > > Cc: Russell King > CC: Catalin Marinas > CC: Mark Salter > Cc: Nick Hu > CC: Ley Foon Tan > Cc: Michael Ellerman > Cc: Heiko Carstens > Cc: Yoshinori Sato > Cc: Guan Xuetao > Cc: Dave Hansen > Cc: Thomas Gleixner > Cc: Rob Springer > Cc: Greg Kroah-Hartman > Cc: Andrew Morton > Cc: linux-arm-ker...@lists.infradead.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux-s...@vger.kernel.org > Cc: de...@driverdev.osuosl.org > Cc: linux...@kvack.org > Cc: linux-ker...@vger.kernel.org > Signed-off-by: Anshuman Khandual Reviewed-by: Vlastimil Babka Thanks.
Re: [RFC PATCH v1] powerpc/prom_init: disable XIVE in Secure VM.
[ ... ] >> yes because you also need to share the XIVE TIMA and ESB pages mapped >> in xive_native_esb_fault() and xive_native_tima_fault(). > > These pages belong to the xive memory slot right? If that is the case, > they are implicitly shared. The Ultravisor will set them up to be > shared. The guest kernel should not be doing anything. > > We still need some fixes in KVM and Ultravisor to correctly map the > hardware pages to GPA ranges of the xive memory slot. Work is in progress... ok. Since this is already done for KVM, I suppose it's not too hard. the VMA has VM_IO | VM_PFNMAP flags. Otherwise you could still pick up the XIVE ESB and TIMA HW MMIO ranges in OPAL and brutally declare the whole as shared, if that's possible. C.
Re: [PATCH] powerpc/64: BE option to use ELFv2 ABI for big endian kernels
On Thu, Mar 05, 2020 at 01:34:22PM +1000, Nicholas Piggin wrote: > Segher Boessenkool's on March 4, 2020 9:09 am: > >> +override flavour := linux-ppc64v2 > > > > That isn't a good name, heh. This isn't "v2" of anything... Spell out > > the name "ELFv2"? Or as "elfv2"? It is just a name after all, it is > > version 1 in all three version fields in the ELF headers! > > Yeah okay. This part is only for some weird little perl asm generator > script, but probably better to be careful. linux-ppc64-elfv2 ? That generator is from openssl, or inspired by it, it is everywhere. So it is more important to get it right than it would seem at first glance ;-) That name looks perfect to me. You'll have to update REs expecting the arch at the end (like /le$/), but you had to already I think? Segher
Re: [PATCH v3 17/27] powerpc/powernv/pmem: Implement the Read Error Log command
+ if (rc) + goto out; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem- admin_command.data_offset + 0x28, +OCXL_HOST_ENDIAN, >wwid[1]); + if (rc) + goto out; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem- admin_command.data_offset + 0x30, +OCXL_HOST_ENDIAN, (u64 *)log- fw_revision); + if (rc) + goto out; + log->fw_revision[8] = '\0'; + + buf_length = (user_buf_length < log->buf_size) ? +user_buf_length : log->buf_size; + for (i = 0; i < buf_length + 0x48; i += 8) { + u64 val; + + rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, +ocxlpmem- admin_command.data_offset + i, +OCXL_HOST_ENDIAN, ); + if (rc) + goto out; + + if (buf_is_user) { + if (copy_to_user(>buf[i], , sizeof(u64))) { + rc = -EFAULT; + goto out; + } + } else + log->buf[i] = val; + } I think it could be a bit simplified by keeping the handling of the user buffer out of this function. Always call it with a kernel buffer. And have only one copy_to_user() call on the ioctl() path. You'd need to allocate a kernel buf on the ioctl path, but you're already doing it on the probe() path, so it should be doable to share code. Hmm, the problem then is that on the IOCTL side, I'll have to save, modify, then restore the buf member of struct ioctl_ocxl_pmem_error_log, which would be uglier. buf is just an output buffer. All you'd need to do is allocate a kernel buf, like it's already done for the "probe" case in dump_error_log(). And add a global copy_to_user() of the buf at the end of the ioctl path, instead of having multiple smaller copy_to_user() in the loop here. copy_to_user() is a bit expensive so it's usually better to regroup them. I think it's easy here and make sense since that function is also trying to handle both a kernel and user space bufffers. But we're not in a critical path, and after this patch, there are others copying out mmio content to user buffers and those don't have a kernel buffer to handle, so the copy_to_user() in a loop makes things easier. So I guess the conclusion is whatever you think is the easiest... + + rc = admin_response_handled(ocxlpmem); + if (rc) + goto out; + +out: + mutex_unlock(>admin_command.lock); + return rc; + +} + +static int ioctl_error_log(struct ocxlpmem *ocxlpmem, + struct ioctl_ocxl_pmem_error_log __user *uarg) +{ + struct ioctl_ocxl_pmem_error_log args; + int rc; + + if (copy_from_user(, uarg, sizeof(args))) + return -EFAULT; + + rc = read_error_log(ocxlpmem, , true); + if (rc) + return rc; + + if (copy_to_user(uarg, , sizeof(args))) + return -EFAULT; + + return 0; +} + +static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args) +{ + struct ocxlpmem *ocxlpmem = file->private_data; + int rc = -EINVAL; + + switch (cmd) { + case IOCTL_OCXL_PMEM_ERROR_LOG: + rc = ioctl_error_log(ocxlpmem, +(struct ioctl_ocxl_pmem_error_log __user *)args); + break; + } + return rc; +} + static const struct file_operations fops = { .owner = THIS_MODULE, .open = file_open, .release= file_release, + .unlocked_ioctl = file_ioctl, + .compat_ioctl = file_ioctl, }; /** @@ -527,6 +736,60 @@ static int read_device_metadata(struct ocxlpmem *ocxlpmem) return 0; } +static const char *decode_error_log_type(u8 error_log_type) +{ + switch (error_log_type) { + case 0x00: + return "general"; + case 0x01: + return "predictive failure"; + case 0x02: + return "thermal warning"; + case 0x03: + return "data loss"; + case 0x04: + return "health & performance"; + default: + return "unknown"; + } +} + +static void dump_error_log(struct ocxlpmem *ocxlpmem) +{ + struct ioctl_ocxl_pmem_error_log log; + u32 buf_size; + u8 *buf; + int rc; + + if (ocxlpmem->admin_command.data_size == 0) + return; + + buf_size = ocxlpmem->admin_command.data_size - 0x48; + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) + return; + + log.buf = buf; + log.buf_size = buf_size; + + rc =
Re: [PATCH] selftests/powerpc: Turn off timeout setting for benchmarks, dscr, signal, tm
On Thu, Mar 5, 2020 at 3:32 PM Michael Ellerman wrote: > > Po-Hsu Lin writes: > > Some specific tests in powerpc can take longer than the default 45 > > seconds that added in commit 852c8cbf (selftests/kselftest/runner.sh: > > Add 45 second timeout per test) to run, the following test result was > > collected across 2 Power8 nodes and 1 Power9 node in our pool: > > powerpc/benchmarks/futex_bench - 52s > > powerpc/dscr/dscr_sysfs_test - 116s > > powerpc/signal/signal_fuzzer - 88s > > powerpc/tm/tm_unavailable_test - 168s > > powerpc/tm/tm-poison - 240s > > > > Thus they will fail with TIMEOUT error. Disable the timeout setting > > for these sub-tests to allow them finish properly. > > I run the powerpc tests with run-parts, rather than the kselftest > script, we already have our own test runner with a 120s timeout. > > I didn't think the kselftests runner actually worked with the powerpc > tests? Because we override RUN_TESTS. > Hello Michael, I have done a small experiment with timeout=1 in settings and use run-parts to run the executables, it looks like this change won't affect run-parts. Not quite sure about the RUN_TESTS you mentioned here, we're testing it with command like: sudo make -C linux/tools/testing/selftests TARGETS=powerpc run_tests And the timeout setting will take effect with this. Thanks > cheers > > > > https://bugs.launchpad.net/bugs/1864642 > > Signed-off-by: Po-Hsu Lin > > --- > > tools/testing/selftests/powerpc/benchmarks/Makefile | 2 ++ > > tools/testing/selftests/powerpc/benchmarks/settings | 1 + > > tools/testing/selftests/powerpc/dscr/Makefile | 2 ++ > > tools/testing/selftests/powerpc/dscr/settings | 1 + > > tools/testing/selftests/powerpc/signal/Makefile | 2 ++ > > tools/testing/selftests/powerpc/signal/settings | 1 + > > tools/testing/selftests/powerpc/tm/Makefile | 2 ++ > > tools/testing/selftests/powerpc/tm/settings | 1 + > > 8 files changed, 12 insertions(+) > > create mode 100644 tools/testing/selftests/powerpc/benchmarks/settings > > create mode 100644 tools/testing/selftests/powerpc/dscr/settings > > create mode 100644 tools/testing/selftests/powerpc/signal/settings > > create mode 100644 tools/testing/selftests/powerpc/tm/settings > > > > diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile > > b/tools/testing/selftests/powerpc/benchmarks/Makefile > > index d40300a..a32a6ab 100644 > > --- a/tools/testing/selftests/powerpc/benchmarks/Makefile > > +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile > > @@ -2,6 +2,8 @@ > > TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench > > null_syscall > > TEST_GEN_FILES := exec_target > > > > +TEST_FILES := settings > > + > > CFLAGS += -O2 > > > > top_srcdir = ../../../../.. > > diff --git a/tools/testing/selftests/powerpc/benchmarks/settings > > b/tools/testing/selftests/powerpc/benchmarks/settings > > new file mode 100644 > > index 000..e7b9417 > > --- /dev/null > > +++ b/tools/testing/selftests/powerpc/benchmarks/settings > > @@ -0,0 +1 @@ > > +timeout=0 > > diff --git a/tools/testing/selftests/powerpc/dscr/Makefile > > b/tools/testing/selftests/powerpc/dscr/Makefile > > index 5df4763..cfa6eed 100644 > > --- a/tools/testing/selftests/powerpc/dscr/Makefile > > +++ b/tools/testing/selftests/powerpc/dscr/Makefile > > @@ -3,6 +3,8 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test > > dscr_user_test \ > > dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \ > > dscr_sysfs_thread_test > > > > +TEST_FILES := settings > > + > > top_srcdir = ../../../../.. > > include ../../lib.mk > > > > diff --git a/tools/testing/selftests/powerpc/dscr/settings > > b/tools/testing/selftests/powerpc/dscr/settings > > new file mode 100644 > > index 000..e7b9417 > > --- /dev/null > > +++ b/tools/testing/selftests/powerpc/dscr/settings > > @@ -0,0 +1 @@ > > +timeout=0 > > diff --git a/tools/testing/selftests/powerpc/signal/Makefile > > b/tools/testing/selftests/powerpc/signal/Makefile > > index 113838f..153fafc 100644 > > --- a/tools/testing/selftests/powerpc/signal/Makefile > > +++ b/tools/testing/selftests/powerpc/signal/Makefile > > @@ -5,6 +5,8 @@ CFLAGS += -maltivec > > $(OUTPUT)/signal_tm: CFLAGS += -mhtm > > $(OUTPUT)/sigfuz: CFLAGS += -pthread -m64 > > > > +TEST_FILES := settings > > + > > top_srcdir = ../../../../.. > > include ../../lib.mk > > > > diff --git a/tools/testing/selftests/powerpc/signal/settings > > b/tools/testing/selftests/powerpc/signal/settings > > new file mode 100644 > > index 000..e7b9417 > > --- /dev/null > > +++ b/tools/testing/selftests/powerpc/signal/settings > > @@ -0,0 +1 @@ > > +timeout=0 > > diff --git a/tools/testing/selftests/powerpc/tm/Makefile > > b/tools/testing/selftests/powerpc/tm/Makefile > > index b15a1a3..7b99d09 100644 > > --- a/tools/testing/selftests/powerpc/tm/Makefile > > +++ b/tools/testing/selftests/powerpc/tm/Makefile > > @@ -7,6