[PATCH v4 24/25] nvdimm/ocxl: Expose the serial number & firmware version in sysfs

2020-03-29 Thread Alastair D'Silva
This patch exposes the serial number & firmware version in sysfs,
which will be used by ndctl in userspace to help users identify
the device.

Signed-off-by: Alastair D'Silva 
---
 drivers/nvdimm/ocxl/main.c | 42 --
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
index 92b4389e8cbb..1f422f0d51ef 100644
--- a/drivers/nvdimm/ocxl/main.c
+++ b/drivers/nvdimm/ocxl/main.c
@@ -235,6 +235,43 @@ static int reserve_metadata(struct ocxlpmem *ocxlpmem,
return 0;
 }
 
+static ssize_t serial_show(struct device *device, struct device_attribute 
*attr,
+  char *buf)
+{
+   struct nvdimm *nvdimm = to_nvdimm(device);
+   struct ocxlpmem *ocxlpmem = nvdimm_provider_data(nvdimm);
+   const struct ocxl_fn_config *fn_config = 
ocxl_function_config(ocxlpmem->ocxl_fn);
+
+   return scnprintf(buf, PAGE_SIZE, "%llu\n", fn_config->serial);
+}
+static DEVICE_ATTR_RO(serial);
+
+static ssize_t fw_version_show(struct device *device,
+  struct device_attribute *attr, char *buf)
+{
+   struct nvdimm *nvdimm = to_nvdimm(device);
+   struct ocxlpmem *ocxlpmem = nvdimm_provider_data(nvdimm);
+
+   return scnprintf(buf, PAGE_SIZE, "%s\n", ocxlpmem->fw_version);
+}
+static DEVICE_ATTR_RO(fw_version);
+
+static struct attribute *ocxl_pmem_attrs[] = {
+   _attr_serial.attr,
+   _attr_fw_version.attr,
+   NULL,
+};
+
+static const struct attribute_group ocxl_pmem_attribute_group = {
+   .name = "ocxlpmem",
+   .attrs = ocxl_pmem_attrs,
+};
+
+static const struct attribute_group *ocxl_pmem_dimm_attribute_groups[] = {
+   _pmem_attribute_group,
+   NULL,
+};
+
 /**
  * register_lpc_mem() - Discover persistent memory on a device and register it 
with the NVDIMM subsystem
  * @ocxlpmem: the device metadata
@@ -291,8 +328,9 @@ static int register_lpc_mem(struct ocxlpmem *ocxlpmem)
 
snprintf(serial, sizeof(serial), "%llx", fn_config->serial);
nd_mapping_desc.nvdimm = nvdimm_create(ocxlpmem->nvdimm_bus, ocxlpmem,
-  NULL, nvdimm_flags,
-  nvdimm_cmd_mask, 0, NULL);
+  ocxl_pmem_dimm_attribute_groups,
+  nvdimm_flags, nvdimm_cmd_mask, 0,
+  NULL);
if (!nd_mapping_desc.nvdimm)
return -ENOMEM;
 
-- 
2.24.1



[PATCH v4 20/25] nvdimm/ocxl: Add an IOCTL to request controller health & perf data

2020-03-29 Thread Alastair D'Silva
When health & performance data is requested from the controller,
it responds with an error log containing the requested information.

This patch allows the request to be issued via an IOCTL, the data
can later be collected in userspace via the error log IOCTL introduced
in a previous patch. Userspace will be notified of pending error
logs via an event.

Signed-off-by: Alastair D'Silva 
---
 drivers/nvdimm/ocxl/main.c | 16 
 include/uapi/nvdimm/ocxlpmem.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
index cb6cdc9eb899..a4315472683c 100644
--- a/drivers/nvdimm/ocxl/main.c
+++ b/drivers/nvdimm/ocxl/main.c
@@ -991,6 +991,18 @@ static int ioctl_event_check(struct ocxlpmem *ocxlpmem, 
u64 __user *uarg)
return rc;
 }
 
+/**
+ * req_controller_health_perf() - Request controller health & performance data
+ * @ocxlpmem: the device metadata
+ * Return: 0 on success, negative on failure
+ */
+int req_controller_health_perf(struct ocxlpmem *ocxlpmem)
+{
+   return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
+ OCXL_LITTLE_ENDIAN,
+ GLOBAL_MMIO_HCI_REQ_HEALTH_PERF);
+}
+
 static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
 {
struct ocxlpmem *ocxlpmem = file->private_data;
@@ -1028,6 +1040,10 @@ static long file_ioctl(struct file *file, unsigned int 
cmd, unsigned long args)
case IOCTL_OCXLPMEM_EVENT_CHECK:
rc = ioctl_event_check(ocxlpmem, (u64 __user *)args);
break;
+
+   case IOCTL_OCXLPMEM_REQUEST_HEALTH:
+   rc = req_controller_health_perf(ocxlpmem);
+   break;
}
 
return rc;
diff --git a/include/uapi/nvdimm/ocxlpmem.h b/include/uapi/nvdimm/ocxlpmem.h
index d573bd307e35..9c5c8585c1c2 100644
--- a/include/uapi/nvdimm/ocxlpmem.h
+++ b/include/uapi/nvdimm/ocxlpmem.h
@@ -91,5 +91,6 @@ struct ioctl_ocxlpmem_eventfd {
 #define IOCTL_OCXLPMEM_CONTROLLER_STATS
_IO(OCXLPMEM_MAGIC, 0x34)
 #define IOCTL_OCXLPMEM_EVENTFD _IOW(OCXLPMEM_MAGIC, 
0x35, struct ioctl_ocxlpmem_eventfd)
 #define IOCTL_OCXLPMEM_EVENT_CHECK _IOR(OCXLPMEM_MAGIC, 
0x36, __u64)
+#define IOCTL_OCXLPMEM_REQUEST_HEALTH  _IO(OCXLPMEM_MAGIC, 
0x37)
 
 #endif /* _UAPI_OCXL_SCM_H */
-- 
2.24.1



[PATCH v4 21/25] nvdimm/ocxl: Implement the heartbeat command

2020-03-29 Thread Alastair D'Silva
The heartbeat admin command is a simple admin command that exercises
the communication mechanisms within the controller.

This patch issues a heartbeat command to the card during init to ensure
we can communicate with the card's controller.

Signed-off-by: Alastair D'Silva 
Reviewed-by: Andrew Donnellan 
---
 drivers/nvdimm/ocxl/main.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
index a4315472683c..2fbe3f2f77d9 100644
--- a/drivers/nvdimm/ocxl/main.c
+++ b/drivers/nvdimm/ocxl/main.c
@@ -272,6 +272,30 @@ static int setup_command_metadata(struct ocxlpmem 
*ocxlpmem)
return 0;
 }
 
+/**
+ * heartbeat() - Issue a heartbeat command to the controller
+ * @ocxlpmem: the device metadata
+ * Return: 0 if the controller responded correctly, negative on error
+ */
+static int heartbeat(struct ocxlpmem *ocxlpmem)
+{
+   int rc;
+
+   mutex_lock(>admin_command.lock);
+
+   rc = admin_command_execute(ocxlpmem, ADMIN_COMMAND_HEARTBEAT);
+   if (rc < 0)
+   goto out;
+   if (rc != STATUS_SUCCESS)
+   warn_status(ocxlpmem, "Unexpected status from heartbeat", rc);
+
+   (void)admin_response_handled(ocxlpmem);
+
+out:
+   mutex_unlock(>admin_command.lock);
+   return rc;
+}
+
 /**
  * allocate_minor() - Allocate a minor number to use for an OpenCAPI pmem 
device
  * @ocxlpmem: the device metadata
@@ -1460,6 +1484,12 @@ static int probe(struct pci_dev *pdev, const struct 
pci_device_id *ent)
goto err;
}
 
+   rc = heartbeat(ocxlpmem);
+   if (rc) {
+   dev_err(>dev, "Heartbeat failed\n");
+   goto err;
+   }
+
elapsed = 0;
timeout = ocxlpmem->readiness_timeout +
  ocxlpmem->memory_available_timeout;
-- 
2.24.1



[PATCH v4 25/25] MAINTAINERS: Add myself & nvdimm/ocxl to ocxl

2020-03-29 Thread Alastair D'Silva
The OpenCAPI Persistent Memory driver will be maintained as part ofi
the ppc tree.

I'm also adding myself as an author of the driver & contributor to
the generic ocxl driver.

Signed-off-by: Alastair D'Silva 
---
 MAINTAINERS | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f8670989ec91..3fb9a9f576a7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12064,13 +12064,16 @@ F:tools/objtool/
 OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER
 M: Frederic Barrat 
 M: Andrew Donnellan 
+M: Alastair D'Silva 
 L: linuxppc-dev@lists.ozlabs.org
 S: Supported
 F: arch/powerpc/platforms/powernv/ocxl.c
+F: arch/powerpc/platforms/powernv/pmem/*
 F: arch/powerpc/include/asm/pnv-ocxl.h
 F: drivers/misc/ocxl/
 F: include/misc/ocxl*
 F: include/uapi/misc/ocxl.h
+F: include/uapi/nvdimm/ocxl-pmem.h
 F: Documentation/userspace-api/accelerators/ocxl.rst
 
 OMAP AUDIO SUPPORT
-- 
2.24.1



[PATCH v4 05/25] ocxl: Address kernel doc errors & warnings

2020-03-29 Thread Alastair D'Silva
This patch addresses warnings and errors from the kernel doc scripts for
the OpenCAPI driver.

It also makes minor tweaks to make the docs more consistent.

Signed-off-by: Alastair D'Silva 
Acked-by: Andrew Donnellan 
---
 drivers/misc/ocxl/config.c| 24 
 drivers/misc/ocxl/ocxl_internal.h |  9 +--
 include/misc/ocxl.h   | 96 ---
 3 files changed, 55 insertions(+), 74 deletions(-)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index c8e19bfb5ef9..a62e3d7db2bf 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -273,16 +273,16 @@ static int read_afu_info(struct pci_dev *dev, struct 
ocxl_fn_config *fn,
 }
 
 /**
- * Read the template version from the AFU
- * dev: the device for the AFU
- * fn: the AFU offsets
- * len: outputs the template length
- * version: outputs the major<<8,minor version
+ * read_template_version() - Read the template version from the AFU
+ * @dev: the device for the AFU
+ * @fn: the AFU offsets
+ * @len: outputs the template length
+ * @version: outputs the major<<8,minor version
  *
  * Returns 0 on success, negative on failure
  */
 static int read_template_version(struct pci_dev *dev, struct ocxl_fn_config 
*fn,
-   u16 *len, u16 *version)
+u16 *len, u16 *version)
 {
u32 val32;
u8 major, minor;
@@ -476,16 +476,16 @@ static int validate_afu(struct pci_dev *dev, struct 
ocxl_afu_config *afu)
 }
 
 /**
- * Populate AFU metadata regarding LPC memory
- * dev: the device for the AFU
- * fn: the AFU offsets
- * afu: the AFU struct to populate the LPC metadata into
+ * read_afu_lpc_memory_info() - Populate AFU metadata regarding LPC memory
+ * @dev: the device for the AFU
+ * @fn: the AFU offsets
+ * @afu: the AFU struct to populate the LPC metadata into
  *
  * Returns 0 on success, negative on failure
  */
 static int read_afu_lpc_memory_info(struct pci_dev *dev,
-   struct ocxl_fn_config *fn,
-   struct ocxl_afu_config *afu)
+   struct ocxl_fn_config *fn,
+   struct ocxl_afu_config *afu)
 {
int rc;
u32 val32;
diff --git a/drivers/misc/ocxl/ocxl_internal.h 
b/drivers/misc/ocxl/ocxl_internal.h
index 345bf843a38e..198e4e4bc51d 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -122,11 +122,12 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
struct ocxl_fn_config *fn, int afu_idx);
 
 /**
- * Update values within a Process Element
+ * ocxl_link_update_pe() - Update values within a Process Element
+ * @link_handle: the link handle associated with the process element
+ * @pasid: the PASID for the AFU context
+ * @tid: the new thread id for the process element
  *
- * link_handle: the link handle associated with the process element
- * pasid: the PASID for the AFU context
- * tid: the new thread id for the process element
+ * Returns 0 on success
  */
 int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid);
 
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
index 0a762e387418..357ef1aadbc0 100644
--- a/include/misc/ocxl.h
+++ b/include/misc/ocxl.h
@@ -62,8 +62,7 @@ struct ocxl_context;
 // Device detection & initialisation
 
 /**
- * Open an OpenCAPI function on an OpenCAPI device
- *
+ * ocxl_function_open() - Open an OpenCAPI function on an OpenCAPI device
  * @dev: The PCI device that contains the function
  *
  * Returns an opaque pointer to the function, or an error pointer (check with 
IS_ERR)
@@ -71,8 +70,7 @@ struct ocxl_context;
 struct ocxl_fn *ocxl_function_open(struct pci_dev *dev);
 
 /**
- * Get the list of AFUs associated with a PCI function device
- *
+ * ocxl_function_afu_list() - Get the list of AFUs associated with a PCI 
function device
  * Returns a list of struct ocxl_afu *
  *
  * @fn: The OpenCAPI function containing the AFUs
@@ -80,8 +78,7 @@ struct ocxl_fn *ocxl_function_open(struct pci_dev *dev);
 struct list_head *ocxl_function_afu_list(struct ocxl_fn *fn);
 
 /**
- * Fetch an AFU instance from an OpenCAPI function
- *
+ * ocxl_function_fetch_afu() - Fetch an AFU instance from an OpenCAPI function
  * @fn: The OpenCAPI function to get the AFU from
  * @afu_idx: The index of the AFU to get
  *
@@ -92,23 +89,20 @@ struct list_head *ocxl_function_afu_list(struct ocxl_fn 
*fn);
 struct ocxl_afu *ocxl_function_fetch_afu(struct ocxl_fn *fn, u8 afu_idx);
 
 /**
- * Take a reference to an AFU
- *
+ * ocxl_afu_get() - Take a reference to an AFU
  * @afu: The AFU to increment the reference count on
  */
 void ocxl_afu_get(struct ocxl_afu *afu);
 
 /**
- * Release a reference to an AFU
- *
+ * ocxl_afu_put() - Release a reference to an AFU
  * @afu: The AFU to decrement the reference count on
  */
 void ocxl_afu_put(struct ocxl_afu *afu);
 
 
 /**
- * Get the configuration information for an 

[PATCH v4 07/25] ocxl: Add functions to map/unmap LPC memory

2020-03-29 Thread Alastair D'Silva
Add functions to map/unmap LPC memory

Signed-off-by: Alastair D'Silva 
Acked-by: Frederic Barrat 
---
 drivers/misc/ocxl/core.c  | 51 +++
 drivers/misc/ocxl/ocxl_internal.h |  3 ++
 include/misc/ocxl.h   | 21 +
 3 files changed, 75 insertions(+)

diff --git a/drivers/misc/ocxl/core.c b/drivers/misc/ocxl/core.c
index 2531c6cf19a0..75ff14e3882a 100644
--- a/drivers/misc/ocxl/core.c
+++ b/drivers/misc/ocxl/core.c
@@ -210,6 +210,56 @@ static void unmap_mmio_areas(struct ocxl_afu *afu)
release_fn_bar(afu->fn, afu->config.global_mmio_bar);
 }
 
+int ocxl_afu_map_lpc_mem(struct ocxl_afu *afu)
+{
+   struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
+
+   if ((afu->config.lpc_mem_size + afu->config.special_purpose_mem_size) 
== 0)
+   return 0;
+
+   afu->lpc_base_addr = ocxl_link_lpc_map(afu->fn->link, dev);
+   if (afu->lpc_base_addr == 0)
+   return -EINVAL;
+
+   if (afu->config.lpc_mem_size > 0) {
+   afu->lpc_res.start = afu->lpc_base_addr + 
afu->config.lpc_mem_offset;
+   afu->lpc_res.end = afu->lpc_res.start + 
afu->config.lpc_mem_size - 1;
+   }
+
+   if (afu->config.special_purpose_mem_size > 0) {
+   afu->special_purpose_res.start = afu->lpc_base_addr +
+
afu->config.special_purpose_mem_offset;
+   afu->special_purpose_res.end = afu->special_purpose_res.start +
+  
afu->config.special_purpose_mem_size - 1;
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_afu_map_lpc_mem);
+
+struct resource *ocxl_afu_lpc_mem(struct ocxl_afu *afu)
+{
+   return >lpc_res;
+}
+EXPORT_SYMBOL_GPL(ocxl_afu_lpc_mem);
+
+static void unmap_lpc_mem(struct ocxl_afu *afu)
+{
+   struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
+
+   if (afu->lpc_res.start || afu->special_purpose_res.start) {
+   void *link = afu->fn->link;
+
+   // only release the link when the the last consumer calls 
release
+   ocxl_link_lpc_release(link, dev);
+
+   afu->lpc_res.start = 0;
+   afu->lpc_res.end = 0;
+   afu->special_purpose_res.start = 0;
+   afu->special_purpose_res.end = 0;
+   }
+}
+
 static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
 {
int rc;
@@ -251,6 +301,7 @@ static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, 
struct pci_dev *dev)
 
 static void deconfigure_afu(struct ocxl_afu *afu)
 {
+   unmap_lpc_mem(afu);
unmap_mmio_areas(afu);
reclaim_afu_pasid(afu);
reclaim_afu_actag(afu);
diff --git a/drivers/misc/ocxl/ocxl_internal.h 
b/drivers/misc/ocxl/ocxl_internal.h
index 2d7575225bd7..7b975a89db7b 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -52,6 +52,9 @@ struct ocxl_afu {
void __iomem *global_mmio_ptr;
u64 pp_mmio_start;
void *private;
+   u64 lpc_base_addr; /* Covers both LPC & special purpose memory */
+   struct resource lpc_res;
+   struct resource special_purpose_res;
 };
 
 enum ocxl_context_status {
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
index 357ef1aadbc0..d8b0b4d46bfb 100644
--- a/include/misc/ocxl.h
+++ b/include/misc/ocxl.h
@@ -203,6 +203,27 @@ int ocxl_irq_set_handler(struct ocxl_context *ctx, int 
irq_id,
 
 // AFU Metadata
 
+/**
+ * ocxl_afu_map_lpc_mem() - Map the LPC system & special purpose memory for an 
AFU
+ * Do not call this during device discovery, as there may me multiple
+ * devices on a link, and the memory is mapped for the whole link, not
+ * just one device. It should only be called after all devices have
+ * registered their memory on the link.
+ *
+ * @afu: The AFU that has the LPC memory to map
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ocxl_afu_map_lpc_mem(struct ocxl_afu *afu);
+
+/**
+ * ocxl_afu_lpc_mem() - Get the physical address range of LPC memory for an AFU
+ * @afu: The AFU associated with the LPC memory
+ *
+ * Returns a pointer to the resource struct for the physical address range
+ */
+struct resource *ocxl_afu_lpc_mem(struct ocxl_afu *afu);
+
 /**
  * ocxl_afu_config() - Get a pointer to the config for an AFU
  * @afu: a pointer to the AFU to get the config for
-- 
2.24.1



[PATCH v4 04/25] ocxl: Remove unnecessary externs

2020-03-29 Thread Alastair D'Silva
Function declarations don't need externs, remove the existing ones
so they are consistent with newer code

Signed-off-by: Alastair D'Silva 
Acked-by: Andrew Donnellan 
Acked-by: Frederic Barrat 
---
 arch/powerpc/include/asm/pnv-ocxl.h | 40 ++---
 include/misc/ocxl.h |  6 ++---
 2 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/pnv-ocxl.h 
b/arch/powerpc/include/asm/pnv-ocxl.h
index 560a19bb71b7..205efc41a33c 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -9,29 +9,27 @@
 #define PNV_OCXL_TL_BITS_PER_RATE   4
 #define PNV_OCXL_TL_RATE_BUF_SIZE   ((PNV_OCXL_TL_MAX_TEMPLATE+1) * 
PNV_OCXL_TL_BITS_PER_RATE / 8)
 
-extern int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
-   u16 *supported);
-extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);
+int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 
*supported);
+int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);
 
-extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
char *rate_buf, int rate_buf_size);
-extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
-   uint64_t rate_buf_phys, int rate_buf_size);
-
-extern int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq);
-extern void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
-   void __iomem *tfc, void __iomem *pe_handle);
-extern int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
-   void __iomem **dar, void __iomem **tfc,
-   void __iomem **pe_handle);
-
-extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
-   void **platform_data);
-extern void pnv_ocxl_spa_release(void *platform_data);
-extern int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int 
pe_handle);
-
-extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
-extern void pnv_ocxl_free_xive_irq(u32 irq);
+int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+uint64_t rate_buf_phys, int rate_buf_size);
+
+int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq);
+void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+void __iomem *tfc, void __iomem *pe_handle);
+int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+ void __iomem **dar, void __iomem **tfc,
+ void __iomem **pe_handle);
+
+int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void 
**platform_data);
+void pnv_ocxl_spa_release(void *platform_data);
+int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle);
+
+int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
+void pnv_ocxl_free_xive_irq(u32 irq);
 u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size);
 void pnv_ocxl_platform_lpc_release(struct pci_dev *pdev);
 
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
index 06dd5839e438..0a762e387418 100644
--- a/include/misc/ocxl.h
+++ b/include/misc/ocxl.h
@@ -173,7 +173,7 @@ int ocxl_context_detach(struct ocxl_context *ctx);
  *
  * Returns 0 on success, negative on failure
  */
-extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id);
+int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id);
 
 /**
  * Frees an IRQ associated with an AFU context
@@ -182,7 +182,7 @@ extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int 
*irq_id);
  *
  * Returns 0 on success, negative on failure
  */
-extern int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id);
+int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id);
 
 /**
  * Gets the address of the trigger page for an IRQ
@@ -193,7 +193,7 @@ extern int ocxl_afu_irq_free(struct ocxl_context *ctx, int 
irq_id);
  *
  * returns the trigger page address, or 0 if the IRQ is not valid
  */
-extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id);
+u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id);
 
 /**
  * Provide a callback to be called when an IRQ is triggered
-- 
2.24.1



[PATCH v4 14/25] nvdimm/ocxl: Add support for Admin commands

2020-03-29 Thread Alastair D'Silva
Admin commands for these devices are the primary means of interacting
with the device controller to provide functionality beyond the load/store
capabilities offered via the NPU.

For example, SMART data, firmware update, and device error logs are
implemented via admin commands.

This patch requests the metadata required to issue admin commands, as well
as some helper functions to construct and check the completion of the
commands.

Signed-off-by: Alastair D'Silva 
---
 drivers/nvdimm/ocxl/main.c  |  65 ++
 drivers/nvdimm/ocxl/ocxlpmem.h  |  50 -
 drivers/nvdimm/ocxl/ocxlpmem_internal.c | 261 
 3 files changed, 375 insertions(+), 1 deletion(-)

diff --git a/drivers/nvdimm/ocxl/main.c b/drivers/nvdimm/ocxl/main.c
index be76acd33d74..8db573036423 100644
--- a/drivers/nvdimm/ocxl/main.c
+++ b/drivers/nvdimm/ocxl/main.c
@@ -217,6 +217,58 @@ static int register_lpc_mem(struct ocxlpmem *ocxlpmem)
return 0;
 }
 
+/**
+ * extract_command_metadata() - Extract command data from MMIO & save it for 
further use
+ * @ocxlpmem: the device metadata
+ * @offset: The base address of the command data structures (address of CREQO)
+ * @command_metadata: A pointer to the command metadata to populate
+ * Return: 0 on success, negative on failure
+ */
+static int extract_command_metadata(struct ocxlpmem *ocxlpmem, u32 offset,
+   struct command_metadata *command_metadata)
+{
+   int rc;
+   u64 tmp;
+
+   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, offset,
+OCXL_LITTLE_ENDIAN, );
+   if (rc)
+   return rc;
+
+   command_metadata->request_offset = tmp >> 32;
+   command_metadata->response_offset = tmp & 0x;
+
+   rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, offset + 8,
+OCXL_LITTLE_ENDIAN, );
+   if (rc)
+   return rc;
+
+   command_metadata->data_offset = tmp >> 32;
+   command_metadata->data_size = tmp & 0x;
+
+   command_metadata->id = 0;
+
+   return 0;
+}
+
+/**
+ * setup_command_metadata() - Set up the command metadata
+ * @ocxlpmem: the device metadata
+ */
+static int setup_command_metadata(struct ocxlpmem *ocxlpmem)
+{
+   int rc;
+
+   mutex_init(>admin_command.lock);
+
+   rc = extract_command_metadata(ocxlpmem, GLOBAL_MMIO_ACMA_CREQO,
+ >admin_command);
+   if (rc)
+   return rc;
+
+   return 0;
+}
+
 /**
  * allocate_minor() - Allocate a minor number to use for an OpenCAPI pmem 
device
  * @ocxlpmem: the device metadata
@@ -421,6 +473,14 @@ static int probe(struct pci_dev *pdev, const struct 
pci_device_id *ent)
 
ocxlpmem->pdev = pci_dev_get(pdev);
 
+   ocxlpmem->timeouts[ADMIN_COMMAND_ERRLOG] = 2000; // ms
+   ocxlpmem->timeouts[ADMIN_COMMAND_HEARTBEAT] = 100; // ms
+   ocxlpmem->timeouts[ADMIN_COMMAND_SMART] = 100; // ms
+   ocxlpmem->timeouts[ADMIN_COMMAND_CONTROLLER_DUMP] = 1000; // ms
+   ocxlpmem->timeouts[ADMIN_COMMAND_CONTROLLER_STATS] = 100; // ms
+   ocxlpmem->timeouts[ADMIN_COMMAND_SHUTDOWN] = 1000; // ms
+   ocxlpmem->timeouts[ADMIN_COMMAND_FW_UPDATE] = 16000; // ms
+
pci_set_drvdata(pdev, ocxlpmem);
 
ocxlpmem->ocxl_fn = ocxl_function_open(pdev);
@@ -467,6 +527,11 @@ static int probe(struct pci_dev *pdev, const struct 
pci_device_id *ent)
goto err;
}
 
+   if (setup_command_metadata(ocxlpmem)) {
+   dev_err(>dev, "Could not read command metadata\n");
+   goto err;
+   }
+
elapsed = 0;
timeout = ocxlpmem->readiness_timeout +
  ocxlpmem->memory_available_timeout;
diff --git a/drivers/nvdimm/ocxl/ocxlpmem.h b/drivers/nvdimm/ocxl/ocxlpmem.h
index 3eadbe19f6d0..b72b3f909fc3 100644
--- a/drivers/nvdimm/ocxl/ocxlpmem.h
+++ b/drivers/nvdimm/ocxl/ocxlpmem.h
@@ -7,6 +7,7 @@
 #include 
 
 #define LABEL_AREA_SIZEBIT_ULL(PA_SECTION_SHIFT)
+#define DEFAULT_TIMEOUT 100
 
 #define GLOBAL_MMIO_CHI0x000
 #define GLOBAL_MMIO_CHIC   0x008
@@ -57,6 +58,7 @@
 #define GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED  BIT_ULL(5) // CDC
 #define GLOBAL_MMIO_HCI_REQ_HEALTH_PERFBIT_ULL(6) // 
CHPD
 
+// must be maintained with admin_command_names in ocxlpmem_internal.c
 #define ADMIN_COMMAND_HEARTBEAT0x00u
 #define ADMIN_COMMAND_SHUTDOWN 0x01u
 #define ADMIN_COMMAND_FW_UPDATE0x02u
@@ -68,6 +70,13 @@
 #define ADMIN_COMMAND_CMD_CAPS 0x08u
 #define ADMIN_COMMAND_MAX  0x08u
 
+#define NS_COMMAND_SECURE_ERASE0x20ull
+
+#define NS_RESPONSE_SECURE_ERASE_ACCESSIBLE_SUCCESS 0x20
+#define NS_RESPONSE_SECURE_ERASE_ACCESSIBLE_ATTEMPTED 0x28
+#define NS_RESPONSE_SECURE_ERASE_DEFECTIVE_SUCCESS 0x30
+#define NS_RESPONSE_SECURE_ERASE_DEFECTIVE_ATTEMPTED 0x38
+
 #define 

[PATCH v4 11/25] powerpc: Enable the OpenCAPI Persistent Memory driver for powernv_defconfig

2020-03-29 Thread Alastair D'Silva
This patch enables the OpenCAPI Persistent Memory driver, as well
as DAX support, for the 'powernv' defconfig.

DAX is not a strict requirement for the functioning of the driver, but it
is likely that a user will want to create a DAX device on top of their
persistent memory device.

Signed-off-by: Alastair D'Silva 
Reviewed-by: Andrew Donnellan 
---
 arch/powerpc/configs/powernv_defconfig | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/configs/powernv_defconfig 
b/arch/powerpc/configs/powernv_defconfig
index 71749377d164..921d77bbd3d2 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -348,3 +348,8 @@ CONFIG_KVM_BOOK3S_64=m
 CONFIG_KVM_BOOK3S_64_HV=m
 CONFIG_VHOST_NET=m
 CONFIG_PRINTK_TIME=y
+CONFIG_ZONE_DEVICE=y
+CONFIG_OCXL_PMEM=m
+CONFIG_DEV_DAX=m
+CONFIG_DEV_DAX_PMEM=m
+CONFIG_FS_DAX=y
-- 
2.24.1



[PATCH v4 02/25] mm/memory_hotplug: Allow check_hotplug_memory_addressable to be called from drivers

2020-03-29 Thread Alastair D'Silva
When setting up OpenCAPI connected persistent memory, the range check may
not be performed until quite late (or perhaps not at all, if the user does
not establish a DAX device).

This patch makes the range check callable so we can perform the check while
probing the OpenCAPI Persistent Memory device.

Signed-off-by: Alastair D'Silva 
Reviewed-by: Andrew Donnellan 
---
 include/linux/memory_hotplug.h | 5 +
 mm/memory_hotplug.c| 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index f4d59155f3d4..9a19ae0d7e31 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -337,6 +337,11 @@ static inline void __remove_memory(int nid, u64 start, u64 
size) {}
 extern void set_zone_contiguous(struct zone *zone);
 extern void clear_zone_contiguous(struct zone *zone);
 
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+int check_hotplug_memory_addressable(unsigned long pfn,
+unsigned long nr_pages);
+#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
+
 extern void __ref free_area_init_core_hotplug(int nid);
 extern int __add_memory(int nid, u64 start, u64 size);
 extern int add_memory(int nid, u64 start, u64 size);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0a54ffac8c68..14945f033594 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -276,8 +276,8 @@ static int check_pfn_span(unsigned long pfn, unsigned long 
nr_pages,
return 0;
 }
 
-static int check_hotplug_memory_addressable(unsigned long pfn,
-   unsigned long nr_pages)
+int check_hotplug_memory_addressable(unsigned long pfn,
+unsigned long nr_pages)
 {
const u64 max_addr = PFN_PHYS(pfn + nr_pages) - 1;
 
-- 
2.24.1



[PATCH v4 03/25] powerpc/powernv: Map & release OpenCAPI LPC memory

2020-03-29 Thread Alastair D'Silva
This patch adds OPAL calls to powernv so that the OpenCAPI
driver can map & release LPC (Lowest Point of Coherency)  memory.

Signed-off-by: Alastair D'Silva 
Reviewed-by: Andrew Donnellan 
---
 arch/powerpc/include/asm/pnv-ocxl.h   |  2 ++
 arch/powerpc/platforms/powernv/ocxl.c | 43 +++
 2 files changed, 45 insertions(+)

diff --git a/arch/powerpc/include/asm/pnv-ocxl.h 
b/arch/powerpc/include/asm/pnv-ocxl.h
index 7de82647e761..560a19bb71b7 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -32,5 +32,7 @@ extern int pnv_ocxl_spa_remove_pe_from_cache(void 
*platform_data, int pe_handle)
 
 extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
 extern void pnv_ocxl_free_xive_irq(u32 irq);
+u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size);
+void pnv_ocxl_platform_lpc_release(struct pci_dev *pdev);
 
 #endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/platforms/powernv/ocxl.c 
b/arch/powerpc/platforms/powernv/ocxl.c
index 8c65aacda9c8..f13119a7c026 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -475,6 +475,49 @@ void pnv_ocxl_spa_release(void *platform_data)
 }
 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
 
+u64 pnv_ocxl_platform_lpc_setup(struct pci_dev *pdev, u64 size)
+{
+   struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+   struct pnv_phb *phb = hose->private_data;
+   u32 bdfn = pci_dev_id(pdev);
+   __be64 base_addr_be64;
+   u64 base_addr;
+   int rc;
+
+   rc = opal_npu_mem_alloc(phb->opal_id, bdfn, size, _addr_be64);
+   if (rc) {
+   dev_warn(>dev,
+"OPAL could not allocate LPC memory, rc=%d\n", rc);
+   return 0;
+   }
+
+   base_addr = be64_to_cpu(base_addr_be64);
+
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+   rc = check_hotplug_memory_addressable(base_addr >> PAGE_SHIFT,
+ size >> PAGE_SHIFT);
+   if (rc)
+   return 0;
+#endif
+
+   return base_addr;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_platform_lpc_setup);
+
+void pnv_ocxl_platform_lpc_release(struct pci_dev *pdev)
+{
+   struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+   struct pnv_phb *phb = hose->private_data;
+   u32 bdfn = pci_dev_id(pdev);
+   int rc;
+
+   rc = opal_npu_mem_release(phb->opal_id, bdfn);
+   if (rc)
+   dev_warn(>dev,
+"OPAL reported rc=%d when releasing LPC memory\n", rc);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_platform_lpc_release);
+
 int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
 {
struct spa_data *data = (struct spa_data *) platform_data;
-- 
2.24.1



[PATCH v4 00/25] Add support for OpenCAPI Persistent Memory devices

2020-03-29 Thread Alastair D'Silva
This series adds support for OpenCAPI Persistent Memory devices on bare metal 
(arch/powernv), exposing them as nvdimms so that we can make use of the 
existing infrastructure. There already exists a driver for the same devices 
abstracted through PowerVM (arch/pseries): 
arch/powerpc/platforms/pseries/papr_scm.c

These devices are connected via OpenCAPI, and present as LPC (lowest coherence 
point) memory to the system, practically, that means that memory on these cards 
could be treated as conventional, cache-coherent memory.

Since the devices are connected via OpenCAPI, they are not enumerated via ACPI. 
Instead, OpenCAPI links present as pseudo-PCI bridges, with devices below them.

This series introduces a driver that exposes the memory on these cards as 
nvdimms, with each card getting it's own bus. This is somewhat complicated by 
the fact that the cards do not have out of band persistent storage for 
metadata, so 1 SECTION_SIZE's (see SPARSEMEM) worth of storage is carved out of 
the top of the card storage to implement the ndctl_config_* calls.

The driver is not responsible for configuring the NPU (NVLink Processing Unit) 
BARs to map the LPC memory from the card into the system's physical address 
space, instead, it requests this to be done via OPAL calls (typically 
implemented by Skiboot).

The series is structured as follows:
 - Required infrastructure changes & cleanup
 - A minimal driver implementation
 - Implementing additional features within the driver

Changelog:
V4:
  - Rebase on next-20200320
  - Bump copyright to 2020
  - Ensure all uapi headers use C89 compatible comments (missed ocxlpmem.h)
  - Move the driver back to drivers/nvdimm/ocxl, after confirmation
that this location is desirable
  - Rename ocxl.c to ocxlpmem.c (+ support files)
  - Rename all ocxl_pmem to ocxlpmem
  - Address checkpatch --strict issues
  - "powerpc/powernv: Add OPAL calls for LPC memory alloc/release"
- Pass base address as __be64
  - "ocxl: Tally up the LPC memory on a link & allow it to be mapped"
- Address checkpatch spacing warnings
- Reword blurb
- Reword size description for ocxl_link_add_lpc_mem()
- Add an early exit in ocxl_link_lpc_release() to avoid triggering
  bogus warnings if called after ocxl_link_lpc_map() fails
  - "powerpc/powernv: Add OPAL calls for LPC memory alloc/release"
- Reword blurb
  - "powerpc/powernv: Map & release OpenCAPI LPC memory"
- Reword blurb
  - Move minor_idr init from file_init() to ocxlpmem_init() (fixes runtime error
in "nvdimm: Add driver for OpenCAPI Persistent Memory")
  - Wrap long lines
  - "nvdimm: Add driver for OpenCAPI Storage Class Memory"
- Remove '+ 1' workround from serial number->cookie assignment
- Drop out of memory message for ocxlpmem in probe()
- Fix leaks of ocxlpmem & ocxlpmem->ocxl_fn in probe()
- remove struct ocxlpmem_function0, it didn't value add
- factor out err_unregistered label in probe
- Address more checkpatch warnings
- get/put the pci dev on probe/free
- Drop ocxlpmem_ prefix from static functions
- Propogate errors up from called functions in probe()
- Set MODULE_LICENSE to GPLv2
- Add myself as module author
- Call nvdimm_bus_unregister() in remove() to release references
- Don't call devm_memunmap on metadata_address, the release handler on
 the device already deals with this
  - "nvdimm/ocxl: Read the capability registers & wait for device ready"
- Fix mask for read_latency
- Fold in is_usable logic into timeout to remove error message race
- propogate bad rc from read_device_metadata
  - "nvdimm/ocxl: Add register addresses & status values to the header"
- Add comments for register abbreviations where names have been
  expanded
- Add missing status for blocked on background task
- Alias defines for firmware update status to show that the duplication
  of values is intentional
  - "nvdimm/ocxl: Register a character device for userspace to interact with"
- Add lock around minors IDR, delete the cdev before device_unregister
- Propogate errors up from called functions in probe()
  - "nvdimm/ocxl: Add support for Admin commands"
- Fix typo in setup_command_data error message, and drop 'ocxl' from it
- Drop vestigial CHI read from admin_command_request
- Change command ID mismatch message to dev_err, and return an error
- Use jiffies to implement admin_command_complete_timeout()
- Flesh out blurb
- Create a wrapper to issue the command & wait for timeout
  - "nvdimm/ocxl: Add support for near storage commands"
- dropped (will submit with the patches for nvdimm overwrite)
  - "nvdimm/ocxl: Implement the Read Error Log command"
- Remove stray blank line
- change misplaced goto to an early exit in 

[PATCH 4/4] powerpc/eeh: Clean up edev cleanup for VFs

2020-03-29 Thread Sam Bobroff
Because the bus notifier calls eeh_rmv_from_parent_pe() (via
eeh_remove_device()) when a VF is removed, the call in
remove_sriov_vf_pdns() is redundant.

So remove the call.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/pci_dn.c | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 4e654df55969..f6ac25f7af63 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -236,14 +236,7 @@ void remove_sriov_vf_pdns(struct pci_dev *pdev)
 */
edev = pdn_to_eeh_dev(pdn);
if (edev) {
-   /*
-* We allocate pci_dn's for the totalvfs count,
-* but only only the vfs that were activated
-* have a configured PE.
-*/
-   if (edev->pe)
-   eeh_rmv_from_parent_pe(edev);
-
+   WARN_ON_ONCE(edev->pe);
pdn->edev = NULL;
kfree(edev);
}
-- 
2.22.0.216.g00a2a96fc9



[PATCH 3/4] powerpc/eeh: Remove workaround from eeh_add_device_late()

2020-03-29 Thread Sam Bobroff
When EEH device state was released asynchronously by the device
release handler, it was possible for an outstanding reference to
prevent it's release and it was necessary to work around that if a
device was re-discovered at the same PCI location.

Now that the state is released synchronously that is no longer
possible and the workaround is no longer necessary.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/eeh.c | 23 +--
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index c36c5a7db5ca..12c248a16527 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1206,28 +1206,7 @@ void eeh_add_device_late(struct pci_dev *dev)
eeh_edev_dbg(edev, "Device already referenced!\n");
return;
}
-
-   /*
-* The EEH cache might not be removed correctly because of
-* unbalanced kref to the device during unplug time, which
-* relies on pcibios_release_device(). So we have to remove
-* that here explicitly.
-*/
-   if (edev->pdev) {
-   eeh_rmv_from_parent_pe(edev);
-   eeh_addr_cache_rmv_dev(edev->pdev);
-   eeh_sysfs_remove_device(edev->pdev);
-
-   /*
-* We definitely should have the PCI device removed
-* though it wasn't correctly. So we needn't call
-* into error handler afterwards.
-*/
-   edev->mode |= EEH_DEV_NO_HANDLER;
-
-   edev->pdev = NULL;
-   dev->dev.archdata.edev = NULL;
-   }
+   WARN_ON_ONCE(edev->pdev);
 
if (eeh_has_flag(EEH_PROBE_MODE_DEV))
eeh_ops->probe(pdn, NULL);
-- 
2.22.0.216.g00a2a96fc9



[PATCH 1/4] powerpc/eeh: fix pseries_eeh_configure_bridge()

2020-03-29 Thread Sam Bobroff
If a device is hot unplgged during EEH recovery, it's possible for the
RTAS call to ibm,configure-pe in pseries_eeh_configure() to return
parameter error (-3), however negative return values are not checked
for and this leads to an infinite loop.

Fix this by correctly bailing out on negative values.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 893ba3f562c4..c4ef03bec0de 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -605,7 +605,7 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
config_addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid));
 
-   if (!ret)
+   if (ret <= 0)
return ret;
 
/*
-- 
2.22.0.216.g00a2a96fc9



[PATCH 2/4] powerpc/eeh: Release EEH device state synchronously

2020-03-29 Thread Sam Bobroff
EEH device state is currently removed (by eeh_remove_device()) during
the device release handler, which is invoked as the device's reference
count drops to zero. This may take some time, or forever, as other
threads may hold references.

However, the PCI device state is released synchronously by
pci_stop_and_remove_bus_device(). This mismatch causes problems, for
example the device may be re-discovered as a new device before the
release handler has been called, leaving the PCI and EEH state
mismatched.

So instead, call eeh_remove_device() from the bus device removal
handlers, which are called synchronously in the removal path.

Signed-off-by: Sam Bobroff 
---
 arch/powerpc/kernel/eeh.c | 26 ++
 arch/powerpc/kernel/pci-hotplug.c |  2 --
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 17cb3e9b5697..c36c5a7db5ca 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1106,6 +1106,32 @@ static int eeh_init(void)
 
 core_initcall_sync(eeh_init);
 
+static int eeh_device_notifier(struct notifier_block *nb,
+  unsigned long action, void *data)
+{
+   struct device *dev = data;
+
+   switch (action) {
+   case BUS_NOTIFY_DEL_DEVICE:
+   eeh_remove_device(to_pci_dev(dev));
+   break;
+   default:
+   break;
+   }
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_device_nb = {
+   .notifier_call = eeh_device_notifier,
+};
+
+static __init int eeh_set_bus_notifier(void)
+{
+   bus_register_notifier(_bus_type, _device_nb);
+   return 0;
+}
+arch_initcall(eeh_set_bus_notifier);
+
 /**
  * eeh_add_device_early - Enable EEH for the indicated device node
  * @pdn: PCI device node for which to set up EEH
diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index d6a67f814983..28e9aa274f64 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -57,8 +57,6 @@ void pcibios_release_device(struct pci_dev *dev)
struct pci_controller *phb = pci_bus_to_host(dev->bus);
struct pci_dn *pdn = pci_get_pdn(dev);
 
-   eeh_remove_device(dev);
-
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
 
-- 
2.22.0.216.g00a2a96fc9



[PATCH 0/4] powerpc/eeh: Release EEH device state synchronously

2020-03-29 Thread Sam Bobroff
Hi everyone,

Here are some fixes and cleanups that have come from other work but that I
think stand on their own.

Only one patch ("Release EEH device state synchronously", suggested by Oliver
O'Halloran) is a significant change: it moves the cleanup of some EEH device
data out of the (possibly asynchronous) device release handler and into the
(synchronously called) bus notifier. This is useful for future work as it makes
it easier to reason about the lifetimes of EEH structures.

Note that I've left a few WARN_ON_ONCEs in the code because I'm paranoid, but I
have not been able to hit them during testing.

Cheers,
Sam.

Sam Bobroff (4):
  powerpc/eeh: fix pseries_eeh_configure_bridge()
  powerpc/eeh: Release EEH device state synchronously
  powerpc/eeh: Remove workaround from eeh_add_device_late()
  powerpc/eeh: Clean up edev cleanup for VFs

 arch/powerpc/kernel/eeh.c| 49 +++-
 arch/powerpc/kernel/pci-hotplug.c|  2 -
 arch/powerpc/kernel/pci_dn.c |  9 +---
 arch/powerpc/platforms/pseries/eeh_pseries.c |  2 +-
 4 files changed, 29 insertions(+), 33 deletions(-)

-- 
2.22.0.216.g00a2a96fc9



Re: linux-next: build failure after merge of the tip tree

2020-03-29 Thread Stephen Rothwell
Hi H.J.,

On Sun, 29 Mar 2020 20:53:42 -0700 "H.J. Lu"  wrote:
>
> Please see my enclosing email.   Is anyone from PPC community reading emails?

What you really need is an Ack from the PowerPC people for the fix you
suggested and then tha fix should go in the same series that is now
causing the failure (preferably before the problematic (for PowerPC)
patch.

For reference, the change is (white space damaged):

diff --git a/arch/powerpc/kernel/vmlinux.lds.S 
b/arch/powerpc/kernel/vmlinux.lds.S
index b4c89a1acebb..076b3e8a849d 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -365,9 +365,12 @@ SECTIONS
DISCARDS
/DISCARD/ : {
*(*.EMB.apuinfo)
-   *(.glink .iplt .plt .rela* .comment)
+   *(.glink .iplt .plt .comment)
*(.gnu.version*)
*(.gnu.attributes)
*(.eh_frame)
+#ifndef CONFIG_RELOCATABLE
+   *(.rela*)
+#endif
}
 }

-- 
Cheers,
Stephen Rothwell


pgpRDnjg8B0lk.pgp
Description: OpenPGP digital signature


[PATCH] powerpc/fadump: fix race between pstore write and fadump crash trigger

2020-03-29 Thread Sourabh Jain
When we enter into fadump crash path via system reset we fail to update
the pstore.

On the system reset path we first update the pstore then we go for fadump
crash. But the problem here is when all the CPUs try to get the pstore
lock to initiate the pstore write, only one CPUs will acquire the lock
and proceed with the pstore write. Meanwhile, other CPUs who did not get
the lock, do not wait for their turn to write to the pstore and simply
proceed with the next operation which is fadump crash. One of the CPU who
proceeded with fadump crash path, triggers the crash and do not wait for
the CPU who gets the pstore lock to complete the pstore update.

Timeline diagram to depicts the sequence of events that leads to an
unsuccessful pstore update when we hit fadump crash path via system reset.

 12 3...  n   CPU Threads
 || | |
 || | |
 Reached to   -->|--->|>| --->|
 system reset|| | |
 path|| | |
 || | |
 Try to   -->|--->|>|>|
 acquire the || | |
 pstore lock || | |
 || | |
 || | |
 Got the  -->| +->| | |<-+
 pstore lock | |  | | |  |-->  Didn't get the
 | --+ lock and moving
 || | |ahead on fadump
 || | |crash path
 || | |
  Begins the  -->|| | |
  process to || | |<-- Got the chance to
  update the || | |trigger the crash
  pstore | -> | |... <-   |
 | |  | | |   |
 | |  | | |   |<-- Triggers the
 | |  | | |   |crash
 | |  | | |   |  ^
 | |  | | |   |  |
  Writing to  -->| |  | | |   |  |
  pstore | |  | | |   |  |
   |  |  |
   ^   |__|  |
   |   CPU Relax |
   | |
   +-+
  |
  v
Race: crash triggered before pstore
  update completes

In order to avoid the race between the CPU who proceeds with the pstore
and the CPU who triggers the crash, a delay of 100 milliseconds is added
on fadump crash path to allow pstore update to complete before we trigger
the crash.

Signed-off-by: Sourabh Jain 
---
 arch/powerpc/kernel/fadump.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index ff0114aeba9b..9872bb528389 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -32,6 +32,15 @@
 #include 
 #include 
 
+
+/* The CPU who acquired the lock to trigger the fadump crash should
+ * wait for other CPUs to complete their tasks (for example updating
+ * pstore) before triggering the crash.
+ *
+ * The timeout is in milliseconds.
+ */
+#define CRASH_TIMEOUT  100
+
 static struct fw_dump fw_dump;
 
 static void __init fadump_reserve_crash_area(u64 base);
@@ -634,6 +643,13 @@ void crash_fadump(struct pt_regs *regs, const char *str)
 
fdh->online_mask = *cpu_online_mask;
 
+
+   /* If we reached here via system reset path then let's
+* wait for other CPU to complete the pstore update.
+*/
+   if (TRAP(regs) == 0x100)
+   mdelay(CRASH_TIMEOUT);
+
fw_dump.ops->fadump_trigger(fdh, str);
 }
 
-- 
2.21.1



[PATCH V2 7/9] powerpc/ps3: Add check for otheros image size

2020-03-29 Thread Geoff Levand
The ps3's otheros flash loader has a size limit of 16 MiB for the
uncompressed image.  If that limit will be reached output the
flash image file as 'otheros-too-big.bld'.

Signed-off-by: Geoff Levand 
---
 arch/powerpc/boot/wrapper | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 35ace40d9fc2..ab1e3ddc79f3 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -571,7 +571,20 @@ ps3)
 count=$overlay_size bs=1
 
 odir="$(dirname "$ofile.bin")"
-rm -f "$odir/otheros.bld"
-gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
+
+# The ps3's flash loader has a size limit of 16 MiB for the uncompressed
+# image.  If a compressed image that exceeded this limit is written to
+# flash the loader will decompress that image until the 16 MiB limit is
+# reached, then enter the system reset vector of the partially decompressed
+# image.  No warning is issued.
+rm -f "$odir"/{otheros,otheros-too-big}.bld
+size=$(${CROSS}nm --no-sort --radix=d "$ofile" | egrep ' _end$' | cut -d' 
' -f1)
+bld="otheros.bld"
+if [ $size -gt $((0x100)) ]; then
+bld="otheros-too-big.bld"
+echo "  INFO: Uncompressed kernel is too large to program into PS3 
flash memory;" \
+"size=0x$(printf "%x\n" $size), limit=0x100."
+fi
+gzip -n --force -9 --stdout "$ofile.bin" > "$odir/$bld"
 ;;
 esac
-- 
2.20.1



Re: [PATCH 7/9] powerpc/ps3: Add check for otheros image size

2020-03-29 Thread Geoff Levand
Hi Geert,

On 3/29/20 7:00 AM, Geert Uytterhoeven wrote:
>> --- a/arch/powerpc/boot/wrapper
>> +++ b/arch/powerpc/boot/wrapper
>>
>>  odir="$(dirname "$ofile.bin")"
>> -rm -f "$odir/otheros.bld"
>> -gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
>> +
>> +# The ps3's flash loader has a size limit of 16 MiB for the uncompressed
>> +# image.  If a compressed image that exceeded this limit is written to
>> +# flash the loader will decompress that image until the 16 MiB limit is
>> +# reached, then enter the system reset vector of the partially 
>> decompressed
>> +# image.  No warning is issued.
>> +rm -f "$odir"/{otheros,otheros-too-big}.bld
>> +size=$(${CROSS}nm --no-sort --radix=d "$ofile" | egrep ' _end$' | cut 
>> -d' ' -f1)
>> +bld="otheros.bld"
>> +[ $size -le 16777216 ] || bld="otheros-too-big.bld"
>> +gzip -n --force -9 --stdout "$ofile.bin" > "$odir/$bld"
>>  ;;
>>  esac
> 
> Why not print an error message and exit 1 instead, like is done for
> other fatal errors?

This is not really a fatal error for the entire build.  The default
make target will build both a vmlinux file and a .bld file.  The
.bld file is the one that can be programmed to the OtherOS flash
memory (bld = boot loader).  Even if the .bld file is too big, a
big vmlinux file from such a build would be completely fine for
petitboot to load.

It may be good to print an 'info' message though.  I'll post an
updated patch.

-Geoff




[PATCH v5 1/6] powerpc/fsl_booke/kaslr: refactor kaslr_legal_offset() and kaslr_early_init()

2020-03-29 Thread Jason Yan
Some code refactor in kaslr_legal_offset() and kaslr_early_init(). No
functional change. This is a preparation for KASLR fsl_booke64.

Signed-off-by: Jason Yan 
Cc: Scott Wood 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
---
 arch/powerpc/mm/nohash/kaslr_booke.c | 34 +++-
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c 
b/arch/powerpc/mm/nohash/kaslr_booke.c
index 4a75f2d9bf0e..6ebff31fefcc 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -25,6 +25,7 @@ struct regions {
unsigned long pa_start;
unsigned long pa_end;
unsigned long kernel_size;
+   unsigned long linear_sz;
unsigned long dtb_start;
unsigned long dtb_end;
unsigned long initrd_start;
@@ -260,11 +261,23 @@ static __init void get_cell_sizes(const void *fdt, int 
node, int *addr_cells,
*size_cells = fdt32_to_cpu(*prop);
 }
 
-static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long 
index,
-  unsigned long offset)
+static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long 
random)
 {
unsigned long koffset = 0;
unsigned long start;
+   unsigned long index;
+   unsigned long offset;
+
+   /*
+* Decide which 64M we want to start
+* Only use the low 8 bits of the random seed
+*/
+   index = random & 0xFF;
+   index %= regions.linear_sz / SZ_64M;
+
+   /* Decide offset inside 64M */
+   offset = random % (SZ_64M - regions.kernel_size);
+   offset = round_down(offset, SZ_16K);
 
while ((long)index >= 0) {
offset = memstart_addr + index * SZ_64M + offset;
@@ -289,10 +302,9 @@ static inline __init bool kaslr_disabled(void)
 static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t 
size,
  unsigned long kernel_sz)
 {
-   unsigned long offset, random;
+   unsigned long random;
unsigned long ram, linear_sz;
u64 seed;
-   unsigned long index;
 
kaslr_get_cmdline(dt_ptr);
if (kaslr_disabled())
@@ -333,22 +345,12 @@ static unsigned long __init kaslr_choose_location(void 
*dt_ptr, phys_addr_t size
regions.dtb_start = __pa(dt_ptr);
regions.dtb_end = __pa(dt_ptr) + fdt_totalsize(dt_ptr);
regions.kernel_size = kernel_sz;
+   regions.linear_sz = linear_sz;
 
get_initrd_range(dt_ptr);
get_crash_kernel(dt_ptr, ram);
 
-   /*
-* Decide which 64M we want to start
-* Only use the low 8 bits of the random seed
-*/
-   index = random & 0xFF;
-   index %= linear_sz / SZ_64M;
-
-   /* Decide offset inside 64M */
-   offset = random % (SZ_64M - kernel_sz);
-   offset = round_down(offset, SZ_16K);
-
-   return kaslr_legal_offset(dt_ptr, index, offset);
+   return kaslr_legal_offset(dt_ptr, random);
 }
 
 /*
-- 
2.17.2



[PATCH v5 4/6] powerpc/fsl_booke/64: do not clear the BSS for the second pass

2020-03-29 Thread Jason Yan
The BSS section has already cleared out in the first pass. No need to
clear it again. This can save some time when booting with KASLR
enabled.

Signed-off-by: Jason Yan 
Cc: Scott Wood 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
---
 arch/powerpc/kernel/head_64.S | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 454129a3c259..9354c292b709 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -913,6 +913,13 @@ start_here_multiplatform:
bl  relative_toc
tovirt(r2,r2)
 
+   /* Do not clear the BSS for the second pass if randomized */
+   LOAD_REG_ADDR(r3, kernstart_virt_addr)
+   ld  r3,0(r3)
+   LOAD_REG_IMMEDIATE(r4, KERNELBASE)
+   cmpdr3,r4
+   bne 4f
+
/* Clear out the BSS. It may have been done in prom_init,
 * already but that's irrelevant since prom_init will soon
 * be detached from the kernel completely. Besides, we need
-- 
2.17.2



[PATCH v5 6/6] powerpc/fsl_booke/kaslr: rename kaslr-booke32.rst to kaslr-booke.rst and add 64bit part

2020-03-29 Thread Jason Yan
Now we support both 32 and 64 bit KASLR for fsl booke. Add document for
64 bit part and rename kaslr-booke32.rst to kaslr-booke.rst.

Signed-off-by: Jason Yan 
Cc: Scott Wood 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
---
 Documentation/powerpc/index.rst   |  2 +-
 .../{kaslr-booke32.rst => kaslr-booke.rst}| 35 ---
 2 files changed, 32 insertions(+), 5 deletions(-)
 rename Documentation/powerpc/{kaslr-booke32.rst => kaslr-booke.rst} (59%)

diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst
index 0d45f0fc8e57..3bad36943b22 100644
--- a/Documentation/powerpc/index.rst
+++ b/Documentation/powerpc/index.rst
@@ -20,7 +20,7 @@ powerpc
 hvcs
 imc
 isa-versions
-kaslr-booke32
+kaslr-booke
 mpc52xx
 papr_hcalls
 pci_iov_resource_on_powernv
diff --git a/Documentation/powerpc/kaslr-booke32.rst 
b/Documentation/powerpc/kaslr-booke.rst
similarity index 59%
rename from Documentation/powerpc/kaslr-booke32.rst
rename to Documentation/powerpc/kaslr-booke.rst
index 8b259fdfdf03..27a862963242 100644
--- a/Documentation/powerpc/kaslr-booke32.rst
+++ b/Documentation/powerpc/kaslr-booke.rst
@@ -1,15 +1,18 @@
 .. SPDX-License-Identifier: GPL-2.0
 
-===
-KASLR for Freescale BookE32
-===
+=
+KASLR for Freescale BookE
+=
 
 The word KASLR stands for Kernel Address Space Layout Randomization.
 
 This document tries to explain the implementation of the KASLR for
-Freescale BookE32. KASLR is a security feature that deters exploit
+Freescale BookE. KASLR is a security feature that deters exploit
 attempts relying on knowledge of the location of kernel internals.
 
+KASLR for Freescale BookE32
+-
+
 Since CONFIG_RELOCATABLE has already supported, what we need to do is
 map or copy kernel to a proper place and relocate. Freescale Book-E
 parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1
@@ -38,5 +41,29 @@ bit of the entropy to decide the index of the 64M zone. Then 
we chose a
 
   kernstart_virt_addr
 
+
+KASLR for Freescale BookE64
+---
+
+The implementation for Freescale BookE64 is similar to BookE32. One
+difference is that Freescale BookE64 set up a TLB mapping of 1G during
+booting. Another difference is that ppc64 needs the kernel to be
+64K-aligned. So we can randomize the kernel in this 1G mapping and make
+it 64K-aligned. This can save some code to creat another TLB map at early
+boot. The disadvantage is that we only have about 1G/64K = 16384 slots to
+put the kernel in::
+
+KERNELBASE
+
+  64K |--> kernel <--|
+   |  |  |
++--+--+--++--+--+--+--+--+--+--+--+--++--+--+
+|  |  |  ||  |  |  |  |  |  |  |  |  ||  |  |
++--+--+--++--+--+--+--+--+--+--+--+--++--+--+
+| |1G
+|->   offset<-|
+
+  kernstart_virt_addr
+
 To enable KASLR, set CONFIG_RANDOMIZE_BASE = y. If KASLR is enable and you
 want to disable it at runtime, add "nokaslr" to the kernel cmdline.
-- 
2.17.2



[PATCH v5 5/6] powerpc/fsl_booke/64: clear the original kernel if randomized

2020-03-29 Thread Jason Yan
The original kernel still exists in the memory, clear it now.

Signed-off-by: Jason Yan 
Cc: Scott Wood 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
---
 arch/powerpc/mm/nohash/kaslr_booke.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c 
b/arch/powerpc/mm/nohash/kaslr_booke.c
index 24ad34641869..6c46f24cc53d 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -382,8 +382,10 @@ notrace void __init kaslr_early_init(void *dt_ptr, 
phys_addr_t size)
unsigned long kernel_sz;
 
if (IS_ENABLED(CONFIG_PPC64)) {
-   if (__run_at_load == 1)
+   if (__run_at_load == 1) {
+   kaslr_late_init();
return;
+   }
 
/* Get the first memblock size */
early_get_first_memblock_info(dt_ptr, );
-- 
2.17.2



[PATCH v5 3/6] powerpc/fsl_booke/64: implement KASLR for fsl_booke64

2020-03-29 Thread Jason Yan
The implementation for Freescale BookE64 is similar as BookE32. One
difference is that Freescale BookE64 set up a TLB mapping of 1G during
booting. Another difference is that ppc64 needs the kernel to be
64K-aligned. So we can randomize the kernel in this 1G mapping and make
it 64K-aligned. This can save some code to creat another TLB map at
early boot. The disadvantage is that we only have about 1G/64K = 16384
slots to put the kernel in.

To support secondary cpu boot up, a variable __kaslr_offset was added in
first_256B section. This can help secondary cpu get the kaslr offset
before the 1:1 mapping has been setup.

Signed-off-by: Jason Yan 
Cc: Scott Wood 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
---
 arch/powerpc/Kconfig |  2 +-
 arch/powerpc/kernel/exceptions-64e.S | 10 
 arch/powerpc/kernel/head_64.S|  6 +++
 arch/powerpc/kernel/setup_64.c   |  3 ++
 arch/powerpc/mm/mmu_decl.h   | 20 
 arch/powerpc/mm/nohash/kaslr_booke.c | 75 +++-
 6 files changed, 83 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 497b7d0b2d7e..0c76601fdd59 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -564,7 +564,7 @@ config RELOCATABLE
 
 config RANDOMIZE_BASE
bool "Randomize the address of the kernel image"
-   depends on (FSL_BOOKE && FLATMEM && PPC32)
+   depends on (PPC_FSL_BOOK3E && FLATMEM)
depends on RELOCATABLE
help
  Randomizes the virtual address at which the kernel image is
diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 1b9b174bee86..260cf1f1e71c 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1378,6 +1378,7 @@ skpinv:   addir6,r6,1 /* 
Increment */
 1: mflrr6
addir6,r6,(2f - 1b)
tovirt(r6,r6)
+   add r6,r6,r19
lis r7,MSR_KERNEL@h
ori r7,r7,MSR_KERNEL@l
mtspr   SPRN_SRR0,r6
@@ -1400,6 +1401,7 @@ skpinv:   addir6,r6,1 /* 
Increment */
 
/* We translate LR and return */
tovirt(r8,r8)
+   add r8,r8,r19
mtlrr8
blr
 
@@ -1528,6 +1530,7 @@ a2_tlbinit_code_end:
  */
 _GLOBAL(start_initialization_book3e)
mflrr28
+   li  r19, 0
 
/* First, we need to setup some initial TLBs to map the kernel
 * text, data and bss at PAGE_OFFSET. We don't have a real mode
@@ -1570,6 +1573,12 @@ _GLOBAL(book3e_secondary_core_init)
cmplwi  r4,0
bne 2f
 
+   li  r19, 0
+#ifdef CONFIG_RANDOMIZE_BASE
+   LOAD_REG_ADDR_PIC(r19, __kaslr_offset)
+   ld  r19,0(r19)
+   rlwinm  r19,r19,0,0,5
+#endif
/* Setup TLB for this core */
bl  initial_tlb_book3e
 
@@ -1602,6 +1611,7 @@ _GLOBAL(book3e_secondary_core_init)
lis r3,PAGE_OFFSET@highest
sldir3,r3,32
or  r28,r28,r3
+   add r28,r28,r19
 1: mtlrr28
blr
 
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index ad79fddb974d..454129a3c259 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -104,6 +104,12 @@ __secondary_hold_acknowledge:
.8byte  0x0
 
 #ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_RANDOMIZE_BASE
+   .globl  __kaslr_offset
+__kaslr_offset:
+   .8byte  0x0
+#endif
+
/* This flag is set to 1 by a loader if the kernel should run
 * at the loaded address instead of the linked address.  This
 * is used by kexec-tools to keep the the kdump kernel in the
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e05e6dd67ae6..836e202dfd5b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -67,6 +67,7 @@
 #include 
 #include 
 
+#include 
 #include "setup.h"
 
 int spinning_secondaries;
@@ -300,6 +301,8 @@ void __init early_setup(unsigned long dt_ptr)
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
 
+   kaslr_early_init(__va(dt_ptr), 0);
+
udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr);
 
/*
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 605129b5ccdf..6efbd7fd88a4 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -139,22 +139,16 @@ extern unsigned long calc_cam_sz(unsigned long ram, 
unsigned long virt,
 extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
 extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
+#endif
 void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys);
 extern int is_second_reloc;
-#endif
+extern unsigned long __kaslr_offset;
+extern unsigned int 

[PATCH v5 0/6] implement KASLR for powerpc/fsl_booke/64

2020-03-29 Thread Jason Yan
This is a try to implement KASLR for Freescale BookE64 which is based on
my earlier implementation for Freescale BookE32:
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=131718=*

The implementation for Freescale BookE64 is similar as BookE32. One
difference is that Freescale BookE64 set up a TLB mapping of 1G during
booting. Another difference is that ppc64 needs the kernel to be
64K-aligned. So we can randomize the kernel in this 1G mapping and make
it 64K-aligned. This can save some code to creat another TLB map at
early boot. The disadvantage is that we only have about 1G/64K = 16384
slots to put the kernel in.

KERNELBASE

  64K |--> kernel <--|
   |  |  |
+--+--+--++--+--+--+--+--+--+--+--+--++--+--+
|  |  |  ||  |  |  |  |  |  |  |  |  ||  |  |
+--+--+--++--+--+--+--+--+--+--+--+--++--+--+
| |1G
|->   offset<-|

  kernstart_virt_addr

I'm not sure if the slot numbers is enough or the design has any
defects. If you have some better ideas, I would be happy to hear that.

Thank you all.

v4->v5:
  Fix "-Werror=maybe-uninitialized" compile error.
  Fix typo "similar as" -> "similar to".
v3->v4:
  Do not define __kaslr_offset as a fixed symbol. Reference __run_at_load and
__kaslr_offset by symbol instead of magic offsets.
  Use IS_ENABLED(CONFIG_PPC32) instead of #ifdef CONFIG_PPC32.
  Change kaslr-booke32 to kaslr-booke in index.rst
  Switch some instructions to 64-bit.
v2->v3:
  Fix build error when KASLR is disabled.
v1->v2:
  Add __kaslr_offset for the secondary cpu boot up.

Jason Yan (6):
  powerpc/fsl_booke/kaslr: refactor kaslr_legal_offset() and
kaslr_early_init()
  powerpc/fsl_booke/64: introduce reloc_kernel_entry() helper
  powerpc/fsl_booke/64: implement KASLR for fsl_booke64
  powerpc/fsl_booke/64: do not clear the BSS for the second pass
  powerpc/fsl_booke/64: clear the original kernel if randomized
  powerpc/fsl_booke/kaslr: rename kaslr-booke32.rst to kaslr-booke.rst
and add 64bit part

 Documentation/powerpc/index.rst   |  2 +-
 .../{kaslr-booke32.rst => kaslr-booke.rst}| 35 ++-
 arch/powerpc/Kconfig  |  2 +-
 arch/powerpc/kernel/exceptions-64e.S  | 23 +
 arch/powerpc/kernel/head_64.S | 13 +++
 arch/powerpc/kernel/setup_64.c|  3 +
 arch/powerpc/mm/mmu_decl.h| 23 +++--
 arch/powerpc/mm/nohash/kaslr_booke.c  | 91 +--
 8 files changed, 147 insertions(+), 45 deletions(-)
 rename Documentation/powerpc/{kaslr-booke32.rst => kaslr-booke.rst} (59%)

-- 
2.17.2



[PATCH v5 2/6] powerpc/fsl_booke/64: introduce reloc_kernel_entry() helper

2020-03-29 Thread Jason Yan
Like the 32bit code, we introduce reloc_kernel_entry() helper to prepare
for the KASLR 64bit version. And move the C declaration of this function
out of CONFIG_PPC32 and use long instead of int for the parameter 'addr'.

Signed-off-by: Jason Yan 
Cc: Scott Wood 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
---
 arch/powerpc/kernel/exceptions-64e.S | 13 +
 arch/powerpc/mm/mmu_decl.h   |  3 ++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index e4076e3c072d..1b9b174bee86 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1679,3 +1679,16 @@ _GLOBAL(setup_ehv_ivors)
 _GLOBAL(setup_lrat_ivor)
SET_IVOR(42, 0x340) /* LRAT Error */
blr
+
+/*
+ * Return to the start of the relocated kernel and run again
+ * r3 - virtual address of fdt
+ * r4 - entry of the kernel
+ */
+_GLOBAL(reloc_kernel_entry)
+   mfmsr   r7
+   rlwinm  r7, r7, 0, ~(MSR_IS | MSR_DS)
+
+   mtspr   SPRN_SRR0,r4
+   mtspr   SPRN_SRR1,r7
+   rfi
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 7097e07a209a..605129b5ccdf 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -140,9 +140,10 @@ extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
 extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
 void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys);
-void reloc_kernel_entry(void *fdt, int addr);
 extern int is_second_reloc;
 #endif
+
+void reloc_kernel_entry(void *fdt, long addr);
 extern void loadcam_entry(unsigned int index);
 extern void loadcam_multi(int first_idx, int num, int tmp_idx);
 
-- 
2.17.2



[PATCH v2] powerpc/ptrace: Do not return ENOSYS if invalid syscall

2020-03-29 Thread Thadeu Lima de Souza Cascardo
If a tracer sets the syscall number to an invalid one, allow the return
value set by the tracer to be returned the tracee.

The test for NR_syscalls is already at entry_64.S, and it's at
do_syscall_trace_enter only to skip audit and trace.

After this, two failures from seccomp_bpf selftests complete just fine,
as the failing test was using ptrace to change the syscall to return an
error or a fake value, but were failing as it was always returning
-ENOSYS.

Signed-off-by: Thadeu Lima de Souza Cascardo 
---
 arch/powerpc/kernel/ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 25c0424e8868..557ae4bc2331 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -3314,7 +3314,7 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 
/* Avoid trace and audit when syscall is invalid. */
if (regs->gpr[0] >= NR_syscalls)
-   goto skip;
+   return regs->gpr[0];
 
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->gpr[0]);
-- 
2.17.1



Re: [PATCH V2 0/3] mm/debug: Add more arch page table helper tests

2020-03-29 Thread Anshuman Khandual


On 03/27/2020 12:30 PM, Christophe Leroy wrote:
> 
> 
> On 03/27/2020 06:46 AM, Anshuman Khandual wrote:
>>
>> On 03/26/2020 08:53 PM, Christophe Leroy wrote:
>>>
>>>
>>> Le 26/03/2020 à 03:23, Anshuman Khandual a écrit :


 On 03/24/2020 10:52 AM, Anshuman Khandual wrote:
> This series adds more arch page table helper tests. The new tests here are
> either related to core memory functions and advanced arch pgtable helpers.
> This also creates a documentation file enlisting all expected semantics as
> suggested by Mike Rapoport (https://lkml.org/lkml/2020/1/30/40).
>
> This series has been tested on arm64 and x86 platforms.

 If folks can test these patches out on remaining ARCH_HAS_DEBUG_VM_PGTABLE
 enabled platforms i.e s390, arc, powerpc (32 and 64), that will be really
 appreciated. Thank you.

>>>
>>> On powerpc 8xx (PPC32), I get:
>>>
>>> [   53.338368] debug_vm_pgtable: debug_vm_pgtable: Validating architecture 
>>> page table helpers
>>> [   53.347403] [ cut here ]
>>> [   53.351832] WARNING: CPU: 0 PID: 1 at mm/debug_vm_pgtable.c:647 
>>> debug_vm_pgtable+0x280/0x3f4
>>
>> mm/debug_vm_pgtable.c:647 ?
>>
>> With the following commits in place
>>
>> 53a8338ce (HEAD) Documentation/mm: Add descriptions for arch page table 
>> helper
>> 5d4913fc1 mm/debug: Add tests validating arch advanced page table helpers
>> bcaf120a7 mm/debug: Add tests validating arch page table helpers for core 
>> features
>> d6ed5a4a5 x86/memory: Drop pud_mknotpresent()
>> 0739d1f8d mm/debug: Add tests validating architecture page table helpers
>> 16fbf79b0 (tag: v5.6-rc7) Linux 5.6-rc7
> 
> I have:
> 
> facaa5eb5909 (HEAD -> helpers0) mm/debug: Add tests validating arch advanced 
> page table helpers
> 6389fed515fc mm/debug: Add tests validating arch page table helpers for core 
> features
> dc14ecc8b94e mm/debug: add tests validating architecture page table helpers
> c6624071c338 (origin/merge, merge) Automatic merge of branches 'master', 
> 'next' and 'fixes' into merge
> 58e05c5508e6 Automatic merge of branches 'master', 'next' and 'fixes' into 
> merge
> 1b649e0bcae7 (origin/master, origin/HEAD) Merge 
> git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
> 
> origin is https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git
> 
> I can't see your last patch in powerpc mailing list 
> (https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=166237)

My bad, did not update the last patch with required lists (will fix).

> 
>>
>> mm/debug_vm_pgtable.c:647 is here.
> 
> Line 647 is:
> 
> WARN_ON(!pte_same(pte, __swp_entry_to_pte(swp)));

Both set of definitions suggest that the last three bits (if present)
on the PTE will be discarded during PTE->SWP->PTE conversion which
might be leading to this mismatch and subsequent failure.

arch/powerpc/include/asm/nohash/32/pgtable.h
arch/powerpc/include/asm/book3s/32/pgtable.h

#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x)   ((pte_t) { (x).val << 3 })

Also there are some more architectures (microblaze, sh, etc) where these
conversions are not always preserving. On powerpc64, it sets back _PAGE_PTE
irrespective of whether the bit was originally set or not.

Probably it is wrong to expect that PTE->SWP->PTE conversion will be always
preserving. So wondering if it is worth changing this test to accommodate
all such architectures or just drop it instead.

> 
> 
>>
>> #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
>> static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot)
>> {
>>  swp_entry_t swp;
>>  pmd_t pmd;  -> Line #647
>>
>>  pmd = pfn_pmd(pfn, prot);
>>  swp = __pmd_to_swp_entry(pmd);
>>  WARN_ON(!pmd_same(pmd, __swp_entry_to_pmd(swp)));
>> }
>> #else
>> static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot) { }
>> #end
>>
>> Did I miss something ?
>>
> 
> [...]
> 
>> Could you please point me to the exact test which is failing ?
>>
>>> [   53.519778] Freeing unused kernel memory: 608K
>>>
>>>
>> So I assume that the system should have come till runtime just fine apart 
>> from
>> the above warning message because.
>>
> 
> Yes it boots fine otherwise.

Cool, that is good to know.

> 
> Christophe
> 


Re: [PATCH 7/9] powerpc/ps3: Add check for otheros image size

2020-03-29 Thread Geert Uytterhoeven
Hi Geoff,

On Fri, Mar 27, 2020 at 9:26 PM Geoff Levand  wrote:
> The ps3's otheros flash loader has a size limit of 16 MiB for the
> uncompressed image.  If that limit will be reached output the
> flash image file as 'otheros-too-big.bld'.
>
> Signed-off-by: Geoff Levand 

Thanks for your patch!

> --- a/arch/powerpc/boot/wrapper
> +++ b/arch/powerpc/boot/wrapper
> @@ -570,7 +570,16 @@ ps3)
>  count=$overlay_size bs=1
>
>  odir="$(dirname "$ofile.bin")"
> -rm -f "$odir/otheros.bld"
> -gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
> +
> +# The ps3's flash loader has a size limit of 16 MiB for the uncompressed
> +# image.  If a compressed image that exceeded this limit is written to
> +# flash the loader will decompress that image until the 16 MiB limit is
> +# reached, then enter the system reset vector of the partially 
> decompressed
> +# image.  No warning is issued.
> +rm -f "$odir"/{otheros,otheros-too-big}.bld
> +size=$(${CROSS}nm --no-sort --radix=d "$ofile" | egrep ' _end$' | cut 
> -d' ' -f1)
> +bld="otheros.bld"
> +[ $size -le 16777216 ] || bld="otheros-too-big.bld"
> +gzip -n --force -9 --stdout "$ofile.bin" > "$odir/$bld"
>  ;;
>  esac

Why not print an error message and exit 1 instead, like is done for
other fatal errors?

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH 0/2] mm/thp: Rename pmd_mknotpresent() as pmd_mknotvalid()

2020-03-29 Thread Anshuman Khandual



On 03/20/2020 10:24 AM, Anshuman Khandual wrote:
> This series renames pmd_mknotpresent() as pmd_mknotvalid(). Before that it
> drops an existing pmd_mknotpresent() definition from powerpc platform which
> was never required as it defines it's pmdp_invalidate() through subscribing
> __HAVE_ARCH_PMDP_INVALIDATE. This does not create any functional change.
> 
> This rename was suggested by Catalin during a previous discussion while we
> were trying to change the THP helpers on arm64 platform for migration.
> 
> https://patchwork.kernel.org/patch/11019637/
> 
> This series is based on v5.6-rc6.
> 
> Boot tested on arm64 and x86 platforms.
> Built tested on many other platforms including the ones changed here.

Gentle ping, any updates regarding this ?


Re: [PATCH v2] powerpc/kprobes: Blacklist functions running with MMU disabled on PPC32

2020-03-29 Thread Christophe Leroy




Le 27/03/2020 à 10:07, Naveen N. Rao a écrit :

Christophe Leroy wrote:

kprobe does not handle events happening in real mode, all
functions running with MMU disabled have to be blacklisted.

As already done for PPC64, do it for PPC32.

Signed-off-by: Christophe Leroy 
---
v2:
- Don't rename nonrecoverable as local, mark it noprobe instead.
- Add missing linux/kprobes.h include in pq2.c
---
 arch/powerpc/include/asm/ppc_asm.h   | 10 +++
 arch/powerpc/kernel/cpu_setup_6xx.S  |  4 +-
 arch/powerpc/kernel/entry_32.S   | 65 
 arch/powerpc/kernel/fpu.S    |  1 +
 arch/powerpc/kernel/idle_6xx.S   |  2 +-
 arch/powerpc/kernel/idle_e500.S  |  2 +-
 arch/powerpc/kernel/l2cr_6xx.S   |  2 +-
 arch/powerpc/kernel/misc.S   |  2 +
 arch/powerpc/kernel/misc_32.S    |  4 +-
 arch/powerpc/kernel/swsusp_32.S  |  6 +-
 arch/powerpc/kernel/vector.S |  1 +
 arch/powerpc/mm/book3s32/hash_low.S  | 38 ++--
 arch/powerpc/mm/mem.c    |  2 +
 arch/powerpc/platforms/52xx/lite5200_sleep.S |  2 +
 arch/powerpc/platforms/82xx/pq2.c    |  3 +
 arch/powerpc/platforms/83xx/suspend-asm.S    |  1 +
 arch/powerpc/platforms/powermac/cache.S  |  2 +
 arch/powerpc/platforms/powermac/sleep.S  | 13 ++--
 18 files changed, 86 insertions(+), 74 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h

index 6b03dff61a05..e8f34ba89497 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -267,8 +267,18 @@ GLUE(.,name):
 .pushsection "_kprobe_blacklist","aw";    \
 PPC_LONG (entry) ;    \
 .popsection
+#define _NOKPROBE_ENTRY(entry)    \
+    _ASM_NOKPROBE_SYMBOL(entry)    \
+    _ENTRY(entry)
+#define _NOKPROBE_GLOBAL(entry)    \
+    _ASM_NOKPROBE_SYMBOL(entry)    \
+    _GLOBAL(entry)
 #else
 #define _ASM_NOKPROBE_SYMBOL(entry)
+#define _NOKPROBE_ENTRY(entry)    \
+    _ENTRY(entry)
+#define _NOKPROBE_GLOBAL(entry)    \
+    _GLOBAL(entry)
 #endif


Michael hasn't preferred including NOKPROBE variants of those macros 
previously, since he would like to see some cleanups there:

https://patchwork.ozlabs.org/patch/696138/


Ok

[...]


@@ -194,8 +188,7 @@ transfer_to_handler:
 bt-    31-TLF_NAPPING,4f
 bt-    31-TLF_SLEEPING,7f
 #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
-    .globl transfer_to_handler_cont
-transfer_to_handler_cont:
+_NOKPROBE_ENTRY(transfer_to_handler_cont)
 3:
 mflr    r9
 tovirt_novmstack r2, r2 /* set r2 to current */
@@ -297,6 +290,7 @@ reenable_mmu:
  * On kernel stack overflow, load up an initial stack pointer
  * and call StackOverflow(regs), which should not return.
  */
+_ASM_NOKPROBE_SYMBOL(stack_ovf)
 stack_ovf:


The current convention is to add the NOKPROBE annotation at the _end_ of 
the associated function/symbol...


Ok. For big functions that looks unpractical, but I'll do that.

[...]


@@ -1033,17 +1026,15 @@ exc_exit_restart_end:
 li    r10, 0
 stw    r10, 8(r1)
 REST_2GPRS(9, r1)
-    .globl exc_exit_restart
+_NOKPROBE_ENTRY(exc_exit_restart)
 exc_exit_restart:
 lwz    r11,_NIP(r1)
 lwz    r12,_MSR(r1)
-exc_exit_start:
 mtspr    SPRN_SRR0,r11
 mtspr    SPRN_SRR1,r12
 REST_2GPRS(11, r1)
 lwz    r1,GPR1(r1)
-    .globl exc_exit_restart_end
-exc_exit_restart_end:
+.Lexc_exit_restart_end:


I think it would be good to break this into smaller patches to handle 
specific code paths, if possible. At the very least, it would be good to 
move changes to symbol visibility to a separate patch since this also 
changes the names printed in a backtrace.


Ok.

I removed most symbol visibility changes. I only kept the ones in 
book3s32/hash_low.S and did a separate patch for it.


I split into patches per platform, then one bigger for everything in 
arch/powerpc/kernel/ except entries, then I did one for exception entry, 
one for syscall exit and one for exception exit.


Christophe


[PATCH 11/12] powerpc/entry32: Blacklist syscall exit points for kprobe.

2020-03-29 Thread Christophe Leroy
kprobe does not handle events happening in real mode.

The very last part of syscall cannot support a trap.
Add a symbol syscall_exit_finish to identify that part and
blacklist it from kprobe.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 9a1a45d6038a..7035e838d422 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -463,6 +463,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
lwz r7,_NIP(r1)
lwz r2,GPR2(r1)
lwz r1,GPR1(r1)
+syscall_exit_finish:
 #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr   SPRN_NRI, r0
 #endif
@@ -470,6 +471,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
mtspr   SPRN_SRR1,r8
SYNC
RFI
+_ASM_NOKPROBE_SYMBOL(syscall_exit_finish)
 #ifdef CONFIG_44x
 2: li  r7,0
iccci   r0,r0
@@ -604,6 +606,7 @@ ret_from_kernel_syscall:
mtspr   SPRN_SRR1, r10
SYNC
RFI
+_ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall)
 
 /*
  * The fork/clone functions need to copy the full register set into
-- 
2.25.0



[PATCH 12/12] powerpc/entry32: Blacklist exception exit points for kprobe.

2020-03-29 Thread Christophe Leroy
kprobe does not handle events happening in real mode.

The very last part of exception exits cannot support a trap.
Blacklist them from kprobe.

While we are at it, remove exc_exit_start symbol which is not
used to avoid having to blacklist it.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 7035e838d422..e161fb7a0568 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -810,6 +810,7 @@ fast_exception_return:
lwz r11,GPR11(r11)
SYNC
RFI
+_ASM_NOKPROBE_SYMBOL(fast_exception_return)
 
 #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
 /* check if the exception happened in a restartable section */
@@ -1049,6 +1050,8 @@ exc_exit_restart:
 exc_exit_restart_end:
SYNC
RFI
+_ASM_NOKPROBE_SYMBOL(exc_exit_restart)
+_ASM_NOKPROBE_SYMBOL(exc_exit_restart_end)
 
 #else /* !(CONFIG_4xx || CONFIG_BOOKE) */
/*
@@ -1070,7 +1073,6 @@ exc_exit_restart_end:
 exc_exit_restart:
lwz r11,_NIP(r1)
lwz r12,_MSR(r1)
-exc_exit_start:
mtspr   SPRN_SRR0,r11
mtspr   SPRN_SRR1,r12
REST_2GPRS(11, r1)
@@ -1080,6 +1082,7 @@ exc_exit_restart_end:
PPC405_ERR77_SYNC
rfi
b   .   /* prevent prefetch past rfi */
+_ASM_NOKPROBE_SYMBOL(exc_exit_restart)
 
 /*
  * Returning from a critical interrupt in user mode doesn't need
@@ -1193,6 +1196,7 @@ ret_from_crit_exc:
mtspr   SPRN_SRR0,r9;
mtspr   SPRN_SRR1,r10;
RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
 #endif /* CONFIG_40x */
 
 #ifdef CONFIG_BOOKE
@@ -1204,6 +1208,7 @@ ret_from_crit_exc:
RESTORE_xSRR(SRR0,SRR1);
RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
 
.globl  ret_from_debug_exc
 ret_from_debug_exc:
@@ -1214,6 +1219,7 @@ ret_from_debug_exc:
RESTORE_xSRR(CSRR0,CSRR1);
RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, PPC_RFDI)
+_ASM_NOKPROBE_SYMBOL(ret_from_debug_exc)
 
.globl  ret_from_mcheck_exc
 ret_from_mcheck_exc:
@@ -1225,6 +1231,7 @@ ret_from_mcheck_exc:
RESTORE_xSRR(DSRR0,DSRR1);
RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc)
 #endif /* CONFIG_BOOKE */
 
 /*
-- 
2.25.0



[PATCH 10/12] powerpc/entry32: Blacklist exception entry points for kprobe.

2020-03-29 Thread Christophe Leroy
kprobe does not handle events happening in real mode.

As exception entry points are running with MMU disabled,
blacklist them.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 94f78c03cb79..9a1a45d6038a 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -51,6 +51,7 @@ mcheck_transfer_to_handler:
mfspr   r0,SPRN_DSRR1
stw r0,_DSRR1(r11)
/* fall through */
+_ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler)
 
.globl  debug_transfer_to_handler
 debug_transfer_to_handler:
@@ -59,6 +60,7 @@ debug_transfer_to_handler:
mfspr   r0,SPRN_CSRR1
stw r0,_CSRR1(r11)
/* fall through */
+_ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler)
 
.globl  crit_transfer_to_handler
 crit_transfer_to_handler:
@@ -94,6 +96,7 @@ crit_transfer_to_handler:
rlwinm  r0,r1,0,0,(31 - THREAD_SHIFT)
stw r0,KSP_LIMIT(r8)
/* fall through */
+_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
 #endif
 
 #ifdef CONFIG_40x
@@ -115,6 +118,7 @@ crit_transfer_to_handler:
rlwinm  r0,r1,0,0,(31 - THREAD_SHIFT)
stw r0,KSP_LIMIT(r8)
/* fall through */
+_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
 #endif
 
 /*
@@ -127,6 +131,7 @@ crit_transfer_to_handler:
.globl  transfer_to_handler_full
 transfer_to_handler_full:
SAVE_NVGPRS(r11)
+_ASM_NOKPROBE_SYMBOL(transfer_to_handler_full)
/* fall through */
 
.globl  transfer_to_handler
@@ -286,6 +291,8 @@ reenable_mmu:
lwz r2, GPR2(r11)
b   fast_exception_return
 #endif
+_ASM_NOKPROBE_SYMBOL(transfer_to_handler)
+_ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
 
 #ifndef CONFIG_VMAP_STACK
 /*
-- 
2.25.0



[PATCH 06/12] powerpc/32s: Make local symbols non visible in hash_low.

2020-03-29 Thread Christophe Leroy
In hash_low.S, a lot of named local symbols are used instead of
numbers to ease code lisibility. However, they don't need to be
visible.

In order to ease blacklisting of functions running with MMU
disabled for kprobe, rename the symbols to .Lsymbols in order
to hide them as if they were numbered labels.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/book3s32/hash_low.S | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/mm/book3s32/hash_low.S 
b/arch/powerpc/mm/book3s32/hash_low.S
index 6d236080cb1a..2afa3fa2012d 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -81,7 +81,7 @@ _GLOBAL(hash_page)
rlwinm. r8,r8,0,0,20/* extract pt base address */
 #endif
 #ifdef CONFIG_SMP
-   beq-hash_page_out   /* return if no mapping */
+   beq-.Lhash_page_out /* return if no mapping */
 #else
/* XXX it seems like the 601 will give a machine fault on the
   rfi if its alignment is wrong (bottom 4 bits of address are
@@ -109,11 +109,11 @@ _GLOBAL(hash_page)
 #if (PTE_FLAGS_OFFSET != 0)
addir8,r8,PTE_FLAGS_OFFSET
 #endif
-retry:
+.Lretry:
lwarx   r6,0,r8 /* get linux-style pte, flag word */
andc.   r5,r3,r6/* check access & ~permission */
 #ifdef CONFIG_SMP
-   bne-hash_page_out   /* return if access not permitted */
+   bne-.Lhash_page_out /* return if access not permitted */
 #else
bnelr-
 #endif
@@ -128,7 +128,7 @@ retry:
 #endif /* CONFIG_SMP */
 #endif /* CONFIG_PTE_64BIT */
stwcx.  r5,0,r8 /* attempt to update PTE */
-   bne-retry   /* retry if someone got there first */
+   bne-.Lretry /* retry if someone got there first */
 
mfsrin  r3,r4   /* get segment reg for segment */
 #ifndef CONFIG_VMAP_STACK
@@ -156,7 +156,7 @@ retry:
 #endif
 
 #ifdef CONFIG_SMP
-hash_page_out:
+.Lhash_page_out:
eieio
lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha
li  r0,0
@@ -358,7 +358,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
 1: LDPTEu  r6,HPTE_SIZE(r4)/* get next PTE */
CMPPTE  0,r6,r5
bdnzf   2,1b/* loop while ctr != 0 && !cr0.eq */
-   beq+found_slot
+   beq+.Lfound_slot
 
patch_site  0f, patch__hash_page_B
/* Search the secondary PTEG for a matching PTE */
@@ -370,7 +370,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
 2: LDPTEu  r6,HPTE_SIZE(r4)
CMPPTE  0,r6,r5
bdnzf   2,2b
-   beq+found_slot
+   beq+.Lfound_slot
xorir5,r5,PTE_H /* clear H bit again */
 
/* Search the primary PTEG for an empty slot */
@@ -379,7 +379,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
 1: LDPTEu  r6,HPTE_SIZE(r4)/* get next PTE */
TST_V(r6)   /* test valid bit */
bdnzf   2,1b/* loop while ctr != 0 && !cr0.eq */
-   beq+found_empty
+   beq+.Lfound_empty
 
/* update counter of times that the primary PTEG is full */
lis r4, (primary_pteg_full - PAGE_OFFSET)@ha
@@ -397,7 +397,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
 2: LDPTEu  r6,HPTE_SIZE(r4)
TST_V(r6)
bdnzf   2,2b
-   beq+found_empty
+   beq+.Lfound_empty
xorir5,r5,PTE_H /* clear H bit again */
 
/*
@@ -435,9 +435,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
 
 #ifndef CONFIG_SMP
/* Store PTE in PTEG */
-found_empty:
+.Lfound_empty:
STPTE   r5,0(r4)
-found_slot:
+.Lfound_slot:
STPTE   r8,HPTE_SIZE/2(r4)
 
 #else /* CONFIG_SMP */
@@ -458,8 +458,8 @@ found_slot:
  * We do however have to make sure that the PTE is never in an invalid
  * state with the V bit set.
  */
-found_empty:
-found_slot:
+.Lfound_empty:
+.Lfound_slot:
CLR_V(r5,r0)/* clear V (valid) bit in PTE */
STPTE   r5,0(r4)
sync
-- 
2.25.0



[PATCH 09/12] powerpc/32: Blacklist functions running with MMU disabled for kprobe

2020-03-29 Thread Christophe Leroy
kprobe does not handle events happening in real mode, all
functions running with MMU disabled have to be blacklisted.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/cpu_setup_6xx.S | 2 ++
 arch/powerpc/kernel/entry_32.S  | 3 +++
 arch/powerpc/kernel/fpu.S   | 1 +
 arch/powerpc/kernel/idle_6xx.S  | 1 +
 arch/powerpc/kernel/idle_e500.S | 1 +
 arch/powerpc/kernel/l2cr_6xx.S  | 1 +
 arch/powerpc/kernel/misc.S  | 2 ++
 arch/powerpc/kernel/misc_32.S   | 2 ++
 arch/powerpc/kernel/swsusp_32.S | 2 ++
 arch/powerpc/kernel/vector.S| 1 +
 10 files changed, 16 insertions(+)

diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S 
b/arch/powerpc/kernel/cpu_setup_6xx.S
index f6517f67265a..f8b5ff64b604 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -288,6 +288,7 @@ _GLOBAL(__init_fpu_registers)
mtmsr   r10
isync
blr
+_ASM_NOKPROBE_SYMBOL(__init_fpu_registers)
 
 
 /* Definitions for the table use to save CPU states */
@@ -483,4 +484,5 @@ _GLOBAL(__restore_cpu_setup)
 1:
mtcrr7
blr
+_ASM_NOKPROBE_SYMBOL(__restore_cpu_setup)
 
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e652f6506888..94f78c03cb79 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -313,6 +313,7 @@ stack_ovf:
mtspr   SPRN_SRR1,r10
SYNC
RFI
+_ASM_NOKPROBE_SYMBOL(stack_ovf)
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -1337,6 +1338,7 @@ nonrecoverable:
bl  unrecoverable_exception
/* shouldn't return */
b   4b
+_ASM_NOKPROBE_SYMBOL(nonrecoverable)
 
.section .bss
.align  2
@@ -1391,4 +1393,5 @@ _GLOBAL(enter_rtas)
mtspr   SPRN_SRR0,r8
mtspr   SPRN_SRR1,r9
RFI /* return to caller */
+_ASM_NOKPROBE_SYMBOL(enter_rtas)
 #endif /* CONFIG_PPC_RTAS */
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 3235a8da6af7..1dfccf58fbb1 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -119,6 +119,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
/* restore registers and return */
/* we haven't used ctr or xer or lr */
blr
+_ASM_NOKPROBE_SYMBOL(load_up_fpu)
 
 /*
  * save_fpu(tsk)
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 433d97bea1f3..69df840f7253 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -187,6 +187,7 @@ BEGIN_FTR_SECTION
mtspr   SPRN_HID1, r9
 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
b   transfer_to_handler_cont
+_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
 
.data
 
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
index 308f499e146c..72c85b6f3898 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -90,3 +90,4 @@ _GLOBAL(power_save_ppc32_restore)
 #endif
 
b   transfer_to_handler_cont
+_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 2020d255585f..5f07aa5e9851 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -455,5 +455,6 @@ _GLOBAL(__inval_enable_L1)
sync
 
blr
+_ASM_NOKPROBE_SYMBOL(__inval_enable_L1)
 
 
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 65f9f731c229..5be96feccb55 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -36,6 +36,8 @@ _GLOBAL(add_reloc_offset)
add r3,r3,r5
mtlrr0
blr
+_ASM_NOKPROBE_SYMBOL(reloc_offset)
+_ASM_NOKPROBE_SYMBOL(add_reloc_offset)
 
.align  3
 2: PPC_LONG 1b
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index d80212be8698..1edcc41e15fc 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -246,6 +246,7 @@ _GLOBAL(real_readb)
sync
isync
blr
+_ASM_NOKPROBE_SYMBOL(real_readb)
 
/*
  * Do an IO access in real mode
@@ -263,6 +264,7 @@ _GLOBAL(real_writeb)
sync
isync
blr
+_ASM_NOKPROBE_SYMBOL(real_writeb)
 
 #endif /* CONFIG_40x */
 
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index cbdf86228eaa..f73f4d72fea4 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -395,6 +395,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 
li  r3,0
blr
+_ASM_NOKPROBE_SYMBOL(swsusp_arch_resume)
 
 /* FIXME:This construct is actually not useful since we don't shut
  * down the instruction MMU, we could just flip back MSR-DR on.
@@ -406,4 +407,5 @@ turn_on_mmu:
sync
isync
rfi
+_ASM_NOKPROBE_SYMBOL(turn_on_mmu)
 
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 25c14a0981bf..403ab2c32dc8 100644
--- 

[PATCH 08/12] powerpc/rtas: Remove machine_check_in_rtas()

2020-03-29 Thread Christophe Leroy
machine_check_in_rtas() is just a trap.

Do the trap directly in the machine check exception handler.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S | 6 --
 arch/powerpc/kernel/head_32.S  | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index a6371fb8f761..e652f6506888 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -1391,10 +1391,4 @@ _GLOBAL(enter_rtas)
mtspr   SPRN_SRR0,r8
mtspr   SPRN_SRR1,r9
RFI /* return to caller */
-
-   .globl  machine_check_in_rtas
-machine_check_in_rtas:
-   twi 31,0,0
-   /* XXX load up BATs and panic */
-
 #endif /* CONFIG_PPC_RTAS */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index daaa153950c2..cbd30cac2496 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -297,7 +297,7 @@ MachineCheck:
cmpwi   cr1, r4, 0
 #endif
beq cr1, machine_check_tramp
-   b   machine_check_in_rtas
+   twi 31, 0, 0
 #else
b   machine_check_tramp
 #endif
-- 
2.25.0



[PATCH 07/12] powerpc/32s: Blacklist functions running with MMU disabled for kprobe

2020-03-29 Thread Christophe Leroy
kprobe does not handle events happening in real mode, all
functions running with MMU disabled have to be blacklisted.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/book3s32/hash_low.S | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/mm/book3s32/hash_low.S 
b/arch/powerpc/mm/book3s32/hash_low.S
index 2afa3fa2012d..f5f836477009 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -163,6 +163,7 @@ _GLOBAL(hash_page)
stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
blr
 #endif /* CONFIG_SMP */
+_ASM_NOKPROBE_SYMBOL(hash_page)
 
 /*
  * Add an entry for a particular page to the hash table.
@@ -267,6 +268,7 @@ _GLOBAL(add_hash_page)
lwz r0,4(r1)
mtlrr0
blr
+_ASM_NOKPROBE_SYMBOL(add_hash_page)
 
 /*
  * This routine adds a hardware PTE to the hash table.
@@ -472,6 +474,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
 
sync/* make sure pte updates get to memory */
blr
+_ASM_NOKPROBE_SYMBOL(create_hpte)
 
.section .bss
.align  2
@@ -628,6 +631,7 @@ _GLOBAL(flush_hash_pages)
isync
blr
 EXPORT_SYMBOL(flush_hash_pages)
+_ASM_NOKPROBE_SYMBOL(flush_hash_pages)
 
 /*
  * Flush an entry from the TLB
@@ -665,6 +669,7 @@ _GLOBAL(_tlbie)
sync
 #endif /* CONFIG_SMP */
blr
+_ASM_NOKPROBE_SYMBOL(_tlbie)
 
 /*
  * Flush the entire TLB. 603/603e only
@@ -706,3 +711,4 @@ _GLOBAL(_tlbia)
isync
 #endif /* CONFIG_SMP */
blr
+_ASM_NOKPROBE_SYMBOL(_tlbia)
-- 
2.25.0



[PATCH 04/12] powerpc/powermac: Blacklist functions running with MMU disabled for kprobe

2020-03-29 Thread Christophe Leroy
kprobe does not handle events happening in real mode, all
functions running with MMU disabled have to be blacklisted.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/platforms/powermac/cache.S | 2 ++
 arch/powerpc/platforms/powermac/sleep.S | 5 -
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powermac/cache.S 
b/arch/powerpc/platforms/powermac/cache.S
index da69e0fcb4f1..ced225415486 100644
--- a/arch/powerpc/platforms/powermac/cache.S
+++ b/arch/powerpc/platforms/powermac/cache.S
@@ -184,6 +184,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 
mtlrr10
blr
+_ASM_NOKPROBE_SYMBOL(flush_disable_75x)
 
 /* This code is for 745x processors */
 flush_disable_745x:
@@ -351,4 +352,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
mtmsr   r11 /* restore DR and EE */
isync
blr
+_ASM_NOKPROBE_SYMBOL(flush_disable_745x)
 #endif /* CONFIG_PPC_BOOK3S_32 */
diff --git a/arch/powerpc/platforms/powermac/sleep.S 
b/arch/powerpc/platforms/powermac/sleep.S
index bd6085b470b7..f9a680fdd9c4 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -244,7 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
mtmsr   r2
isync
b   1b
-
+_ASM_NOKPROBE_SYMBOL(low_cpu_die)
 /*
  * Here is the resume code.
  */
@@ -282,6 +282,7 @@ _GLOBAL(core99_wake_up)
lwz r1,0(r3)
 
/* Pass thru to older resume code ... */
+_ASM_NOKPROBE_SYMBOL(core99_wake_up)
 /*
  * Here is the resume code for older machines.
  * r1 has the physical address of SL_PC(sp).
@@ -429,6 +430,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
lwz r0,4(r1)
mtlrr0
blr
+_ASM_NOKPROBE_SYMBOL(grackle_wake_up)
 
 turn_on_mmu:
mflrr4
@@ -438,6 +440,7 @@ turn_on_mmu:
sync
isync
rfi
+_ASM_NOKPROBE_SYMBOL(turn_on_mmu)
 
 #endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
 
-- 
2.25.0



[PATCH 02/12] powerpc/82xx: Blacklist pq2_restart() for kprobe

2020-03-29 Thread Christophe Leroy
kprobe does not handle events happening in real mode, all
functions running with MMU disabled have to be blacklisted.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/platforms/82xx/pq2.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/platforms/82xx/pq2.c 
b/arch/powerpc/platforms/82xx/pq2.c
index 1cdd5ed9d896..3b5cb39a564c 100644
--- a/arch/powerpc/platforms/82xx/pq2.c
+++ b/arch/powerpc/platforms/82xx/pq2.c
@@ -10,6 +10,8 @@
  * Copyright (c) 2006 MontaVista Software, Inc.
  */
 
+#include 
+
 #include 
 #include 
 #include 
@@ -29,6 +31,7 @@ void __noreturn pq2_restart(char *cmd)
 
panic("Restart failed\n");
 }
+NOKPROBE_SYMBOL(pq2_restart)
 
 #ifdef CONFIG_PCI
 static int pq2_pci_exclude_device(struct pci_controller *hose,
-- 
2.25.0



[PATCH 03/12] powerpc/83xx: Blacklist mpc83xx_deep_resume() for kprobe

2020-03-29 Thread Christophe Leroy
kprobe does not handle events happening in real mode, all
functions running with MMU disabled have to be blacklisted.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/platforms/83xx/suspend-asm.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S 
b/arch/powerpc/platforms/83xx/suspend-asm.S
index 3acd7470dc5e..bc6bd4d0ae96 100644
--- a/arch/powerpc/platforms/83xx/suspend-asm.S
+++ b/arch/powerpc/platforms/83xx/suspend-asm.S
@@ -548,3 +548,4 @@ mpc83xx_deep_resume:
mtdec   r0
 
rfi
+_ASM_NOKPROBE_SYMBOL(mpc83xx_deep_resume)
-- 
2.25.0



[PATCH 05/12] powerpc/mem: Blacklist flush_dcache_icache_phys() for kprobe

2020-03-29 Thread Christophe Leroy
kprobe does not handle events happening in real mode, all
functions running with MMU disabled have to be blacklisted.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/mem.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9b4f5fb719e0..bcb6af6ba29a 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -466,6 +467,7 @@ static void flush_dcache_icache_phys(unsigned long physaddr)
: "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
: "ctr", "memory");
 }
+NOKPROBE_SYMBOL(flush_dcache_icache_phys)
 #endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
 
 /*
-- 
2.25.0



[PATCH 01/12] powerpc/52xx: Blacklist functions running with MMU disabled for kprobe

2020-03-29 Thread Christophe Leroy
kprobe does not handle events happening in real mode, all
functions running with MMU disabled have to be blacklisted.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/platforms/52xx/lite5200_sleep.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S 
b/arch/powerpc/platforms/52xx/lite5200_sleep.S
index 3a9969c429b3..70083649c9ea 100644
--- a/arch/powerpc/platforms/52xx/lite5200_sleep.S
+++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
@@ -248,6 +248,7 @@ mmu_on:
 
 
blr
+_ASM_NOKPROBE_SYMBOL(lite5200_wakeup)
 
 
 /* -- */
@@ -391,6 +392,7 @@ restore_regs:
LOAD_SPRN(TBWU,  0x5b);
 
blr
+_ASM_NOKPROBE_SYMBOL(restore_regs)
 
 
 
-- 
2.25.0