[PATCH v5] powerpc/powernv: Poweroff (EPOW, DPO) events support for PowerNV platform

2015-05-18 Thread Vipin K Parashar
This patch adds support for FSP EPOW (Early Power Off Warning) and
DPO (Delayed Power Off) events for PowerNV platform. EPOW events are
generated by SPCN/FSP due to various critical system conditions that
need system shutdown. Few examples of these conditions are high
ambient temperature or system running on UPS power with low UPS battery.
DPO event is generated in response to admin initiated shutdown request.
Upon receipt of EPOW and DPO events host kernel invokes
orderly_poweroff for performing graceful system shutdown. System admin
can also add systemd service shutdown scripts to perform any specific
actions like graceful guest shutdown upon system poweroff. libvirt-guests
is systemd service available on recent distros for management of guests
at system start/shutdown time.

Changes in v5:
 - Made changes to address review comments on previous patch.

Changes in v4:
 - Made changes to address review comments on previous patch.

Changes in v3:
 - Made changes to immediately call orderly_poweroff upon receipt of
   OPAL EPOW, DPO notifications.
 - Made code changes to address review comments on previous patch.
 - Made code changes to use existing OPAL EPOW API.
 - Removed patch to extract EPOW event timeout from OPAL device-tree.

Changes in v2:
 - Made code changes to improve code as per previous review comments.
 - Added patch to obtain EPOW event timeout values from OPAL device-tree.

Vipin K Parashar (1):
  powerpc/powernv: Add poweroff (EPOW, DPO) events support for PowerNV
platform

 arch/powerpc/include/asm/opal-api.h|  44 
 arch/powerpc/include/asm/opal.h|   3 +-
 arch/powerpc/platforms/powernv/opal-power.c| 147 ++---
 arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
 4 files changed, 179 insertions(+), 16 deletions(-)

-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5] powerpc/powernv: Add poweroff (EPOW, DPO) events support for PowerNV platform

2015-05-18 Thread Vipin K Parashar
This patch adds support for FSP EPOW (Early Power Off Warning) and
DPO (Delayed Power Off) events for PowerNV platform. EPOW events are
generated by SPCN/FSP due to various critical system conditions that
need system shutdown. Few examples of these conditions are high
ambient temperature or system running on UPS power with low UPS battery.
DPO event is generated in response to admin initiated system request.
Upon receipt of EPOW and DPO events host kernel invokes
orderly_poweroff for performing graceful system shutdown. System admin
can also add systemd service shutdown scripts to perform any specific
actions like graceful guest shutdown upon system poweroff. libvirt-guests
is systemd service available on recent distros for management of guests
at system start/shutdown time.

Signed-off-by: Vipin K Parashar vi...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/opal-api.h|  44 
 arch/powerpc/include/asm/opal.h|   3 +-
 arch/powerpc/platforms/powernv/opal-power.c| 147 ++---
 arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
 4 files changed, 179 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 0321a90..90fa364 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -355,6 +355,10 @@ enum opal_msg_type {
OPAL_MSG_TYPE_MAX,
 };
 
+/* OPAL_MSG_SHUTDOWN parameter values */
+#defineSOFT_OFF0x00
+#defineSOFT_REBOOT 0x01
+
 struct opal_msg {
__be32 msg_type;
__be32 reserved;
@@ -730,6 +734,46 @@ struct opal_i2c_request {
__be64 buffer_ra;   /* Buffer real address */
 };
 
+/*
+ * EPOW status sharing (OPAL and the host)
+ *
+ * The host will pass on OPAL, a buffer of length OPAL_SYSEPOW_MAX
+ * with individual elements being 16 bits wide to fetch the system
+ * wide EPOW status. Each element in the buffer will contain the
+ * EPOW status in it's bit representation for a particular EPOW sub
+ * class as defiend here. So multiple detailed EPOW status bits
+ * specific for any sub class can be represented in a single buffer
+ * element as it's bit representation.
+ */
+
+/* System EPOW type */
+enum OpalSysEpow {
+   OPAL_SYSEPOW_POWER  = 0,/* Power EPOW */
+   OPAL_SYSEPOW_TEMP   = 1,/* Temperature EPOW */
+   OPAL_SYSEPOW_COOLING= 2,/* Cooling EPOW */
+   OPAL_SYSEPOW_MAX= 3,/* Max EPOW categories */
+};
+
+/* Power EPOW */
+enum OpalSysPower {
+   OPAL_SYSPOWER_UPS   = 0x0001, /* System on UPS power */
+   OPAL_SYSPOWER_CHNG  = 0x0002, /* System power config change */
+   OPAL_SYSPOWER_FAIL  = 0x0004, /* System impending power failure */
+   OPAL_SYSPOWER_INCL  = 0x0008, /* System incomplete power */
+};
+
+/* Temperature EPOW */
+enum OpalSysTemp {
+   OPAL_SYSTEMP_AMB= 0x0001, /* System over ambient temperature */
+   OPAL_SYSTEMP_INT= 0x0002, /* System over internal temperature */
+   OPAL_SYSTEMP_HMD= 0x0004, /* System over ambient humidity */
+};
+
+/* Cooling EPOW */
+enum OpalSysCooling {
+   OPAL_SYSCOOL_INSF   = 0x0001, /* System insufficient cooling */
+};
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 042af1a..d30766f 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -141,7 +141,8 @@ int64_t opal_pci_fence_phb(uint64_t phb_id);
 int64_t opal_pci_reinit(uint64_t phb_id, uint64_t reinit_scope, uint64_t data);
 int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t 
error_type, uint8_t mask_action);
 int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t 
led_type, uint8_t led_action);
-int64_t opal_get_epow_status(__be64 *status);
+int64_t opal_get_epow_status(uint16_t *status, uint16_t *length);
+int64_t opal_get_dpo_status(int64_t *dpo_timeout);
 int64_t opal_set_system_attention_led(uint8_t led_action);
 int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe,
__be16 *pci_error_type, __be16 *severity);
diff --git a/arch/powerpc/platforms/powernv/opal-power.c 
b/arch/powerpc/platforms/powernv/opal-power.c
index ac46c2c..581bbd8 100644
--- a/arch/powerpc/platforms/powernv/opal-power.c
+++ b/arch/powerpc/platforms/powernv/opal-power.c
@@ -1,5 +1,5 @@
 /*
- * PowerNV OPAL power control for graceful shutdown handling
+ * PowerNV support for OPAL power-control, poweroff events
  *
  * Copyright 2015 IBM Corp.
  *
@@ -9,18 +9,87 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt)OPAL-POWER:   fmt
+
 #include linux/kernel.h
+#include linux/spinlock.h
+#include linux/timer.h
 #include linux/reboot.h
-#include linux/notifier.h
-
+#include linux/of.h
 #include asm/opal.h
 

Re: [PATCH] ppc64 ftrace: mark data_access callees notrace (pt.1)

2015-05-18 Thread Jiri Kosina
yOn Sat, 16 May 2015, Torsten Duwe wrote:

   There's got to be a better solution than this.
 
  Can you think of a better approach?
 
 Maybe a per thread variable to lock out a recursion into tracing?
 Thanks for your doubt.

ftrace already handles recursion protection by itself (depending on the 
per-ftrace-ops FTRACE_OPS_FL_RECURSION_SAFE flag).

It's however not really well-defined what to do when recursion would 
happen. Therefore __notrace__ annotation, that just completely avoid such 
situation by making tracing impossible, looks like saner general solution 
to me.

-- 
Jiri Kosina
SUSE Labs

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/fsl: Add FMan best effort port compatible

2015-05-18 Thread Igal . Liberman
From: Igal Liberman igal.liber...@freescale.com

This patch adds a compatible which represents FMan V3 best effort ports.
FMan best effort port is configured as 10G ports, however, it uses 1G
hardware.

Signed-off-by: Igal Liberman igal.liber...@freescale.com
---
 .../devicetree/bindings/powerpc/fsl/fman.txt   |5 +
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt 
b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt
index edda55f..c2e3ec3 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt
@@ -166,6 +166,11 @@ PROPERTIES
- fsl,fman-v3-port-oh for FManV3 OH ports
- fsl,fman-v3-port-rx for FManV3 RX ports
- fsl,fman-v3-port-tx for FManV3 TX ports
+   Optional compatible which can be used in addition to the
+   compatibles above is:
+   - fsl,fman-v3-best-effort-port
+   This compatible represents 10G best effort ports:
+   Port configured as 10G, using 1G hardware.
 
 - cell-index
Usage: required
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: mm: BUG_ON with NUMA_BALANCING (kernel BUG at include/linux/swapops.h:131!)

2015-05-18 Thread Mel Gorman
On Mon, May 18, 2015 at 12:32:29AM -0700, Haren Myneni wrote:
 Mel,
 I am hitting this issue with 4.0 kernel and even with 3.19 and
 3.17 kernels. I will also try with previous versions. Please let me
 know any suggestions on the debugging.
 

Please keep going further back in time to see if there was a point where
this was ever working. It could be a ppc64-specific bug but right now,
I'm still drawing a blank.

-- 
Mel Gorman
SUSE Labs
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: mm: BUG_ON with NUMA_BALANCING (kernel BUG at include/linux/swapops.h:131!)

2015-05-18 Thread Haren Myneni
On 5/18/15, Mel Gorman mgor...@suse.de wrote:
 On Mon, May 18, 2015 at 12:32:29AM -0700, Haren Myneni wrote:
 Mel,
 I am hitting this issue with 4.0 kernel and even with 3.19 and
 3.17 kernels. I will also try with previous versions. Please let me
 know any suggestions on the debugging.


 Please keep going further back in time to see if there was a point where
 this was ever working. It could be a ppc64-specific bug but right now,
 I'm still drawing a blank.

Sure, will do. I am running PPC64 LE kernel, but it does not show any
LE issue so far.

Thanks
Haren


 --
 Mel Gorman
 SUSE Labs

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc/fsl: Add FMan best effort port compatible

2015-05-18 Thread Scott Wood
On Mon, 2015-05-18 at 11:41 +0300, Igal.Liberman wrote:
 From: Igal Liberman igal.liber...@freescale.com
 
 This patch adds a compatible which represents FMan V3 best effort ports.
 FMan best effort port is configured as 10G ports, however, it uses 1G
 hardware.
 
 Signed-off-by: Igal Liberman igal.liber...@freescale.com
 ---
  .../devicetree/bindings/powerpc/fsl/fman.txt   |5 +
  1 file changed, 5 insertions(+)
 
 diff --git a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt 
 b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt
 index edda55f..c2e3ec3 100644
 --- a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt
 +++ b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt
 @@ -166,6 +166,11 @@ PROPERTIES
   - fsl,fman-v3-port-oh for FManV3 OH ports
   - fsl,fman-v3-port-rx for FManV3 RX ports
   - fsl,fman-v3-port-tx for FManV3 TX ports
 + Optional compatible which can be used in addition to the
 + compatibles above is:
 + - fsl,fman-v3-best-effort-port
 + This compatible represents 10G best effort ports:
 + Port configured as 10G, using 1G hardware.

What does this mean?  If it's using 1G hardware then it's a 1G port,
right?  How can you configure a 1G port to be 10G?

Why is this compatible in addition to others (note that this implies
such ports are 100% backwards compatible with hardware that lacks the
new compatible)?  You'd have the same compatible on rx and tx nodes (I'm
assuming this isn't applicable to oh)?

What are the implications of this that warrant adding a compatible?

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V6 00/10] VF EEH on Power8

2015-05-18 Thread Wei Yang
This patchset enables EEH on SRIOV VFs. The general idea is to create proper
VF edev and VF PE and handle them properly.

Different from the Bus PE, VF PE just contain one VF. This introduces the
difference of EEH error handling on a VF PE. Generally, it has several
differences.

First, the VF's removal and re-enumerate rely on its PF. VF has a tight
relationship between its PF. This is not proper to enumerate a VF by usual
scan procedure. That's why virtfn_add/virtfn_remove are exported in this patch
set.

Second, the reset/restore of a VF is done in kernel space. FW is not aware of
the VF, this means the usual reset function done in FW will not work. One of
the patch will imitate the reset/restore function in kernel space.

Third, the VF may be removed during the PF's error_detected function. In this
case, the original error_detected-slot_reset-resume sequence is not proper
to those removed VFs, since they are re-created by PF in a fresh state. A flag
in eeh_dev is introduce to mark the eeh_dev is in error state. By doing so, we
track whether this device needs to be reset or not.

This has been tested both on host and in guest on Power8 with latest kernel
version.

v6:
   * code / commit log refactor by Gavin
v5:
   * remove the compound field, iterate on Master VF PE instead
   * some code refine on PCI config restore and reset on VF
 the wait time for assert and deassert
 PCI device address format
 check on edev-pcie_cap and edev-aer_cap before access them
v4:
   * refine the change logs, comment and code style
   * change pnv_pci_fixup_vf_eeh() to pnv_eeh_vf_final_fixup() and remove the
 CONFIG_PCI_IOV macro
   * reorder patch 5/6 to make the logic more reasonable
   * remove remove_dev_pci_data()
   * remove the EEH_DEV_VF flag, use edev-physfn to identify a VF EEH DEV and
 remove related CONFIG_PCI_IOV macro
   * add the option for VF reset
   * fix the pnv_eeh_cfg_blocked() logic
   * replace pnv_pci_cfg_{read,write} with eeh_ops-{read,write}_config in
 pnv_eeh_vf_restore_config()
   * rename pnv_eeh_vf_restore_config() to pnv_eeh_restore_vf_config()
   * rename pnv_pci_fixup_vf_caps() to pnv_pci_vf_header_fixup() and move it
 to arch/powerpc/platforms/powernv/pci.c
   * add a field compound in pnv_ioda_pe to link compound PEs
   * handle compound PE for VF PEs
v3:
   * add back vf_index in pci_dn to track the VF's index
   * rename ppdev in eeh_dev to physfn for consistency
   * move edev-physfn assignment before dev-dev.archdata.edev is set
   * move pnv_pci_fixup_vf_eeh() and pnv_pci_fixup_vf_caps() to eeh-powernv.c
   * more clear and detail in commit log and comment in code
   * merge eeh_rmv_virt_device() with eeh_rmv_device()
   * move the cfg_blocked check logic from pnv_eeh_read/write_config() to
 pnv_eeh_cfg_blocked()
   * move the vf reset/restore logic into its own patch, two patches are
 created.
 powerpc/powernv: Support PCI config restore for VFs
 powerpc/powernv: Support EEH reset for VFs
   * simplify the vf reset logic
v2:
   * add prefix pci_iov_ to virtfn_add/virtfn_remove
   * use EEH_DEV_VF as a flag for a VF's eeh_dev
   * use eeh_dev instead of edev in change log
   * remove vf_index in eeh_dev, calculate it from pdn-busno and devfn
   * do eeh_add_device_late() and eeh_sysfs_add_device() both after pci_dev is
 well initialized
   * do FLR to reset a VF PE
   * imitate the restore function in FW for VF
   * remove the reverse order patch, since it is still under discussion

Wei Yang (10):
  PCI/IOV: Rename and export virtfn_add/virtfn_remove
  powerpc/pci: Cache VF index in pci_dn
  powerpc/pci: Remove VFs prior to PF
  powerpc/eeh: Trace first 7 BARs in address cache
  powerpc/powernv: EEH device for VF
  powerpc/eeh: Create PE for VFs
  powerpc/powernv: Support EEH reset for VF PE
  powerpc/powernv: Support PCI config restore for VFs
  powerpc/eeh: Support error recovery for VF PE
  powerpc/powernv: compound PE for VFs

 arch/powerpc/include/asm/eeh.h   |4 +
 arch/powerpc/include/asm/pci-bridge.h|2 +
 arch/powerpc/kernel/eeh.c|8 +
 arch/powerpc/kernel/eeh_cache.c  |2 +-
 arch/powerpc/kernel/eeh_driver.c |  100 +---
 arch/powerpc/kernel/eeh_pe.c |   13 +-
 arch/powerpc/kernel/pci-hotplug.c|2 +-
 arch/powerpc/kernel/pci_dn.c |   16 +-
 arch/powerpc/platforms/powernv/eeh-powernv.c |  221 +-
 arch/powerpc/platforms/powernv/pci-ioda.c|   46 +-
 arch/powerpc/platforms/powernv/pci.c |   35 +++-
 drivers/pci/iov.c|   10 +-
 include/linux/pci.h  |2 +
 13 files changed, 420 insertions(+), 41 deletions(-)

-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V6 09/10] powerpc/eeh: Support error recovery for VF PE

2015-05-18 Thread Wei Yang
Different from PCI bus dependent PE, PE for VFs doesn't have the
primary bus, on which the PCI hotplug is implemented. The patch
supports error recovery, especially the PCI hotplug for VF's PE.
The hotplug on VF's PE is implemented based on VFs, instead of
PCI bus any more.

[gwshan: changelog and code refactoring]
Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Acked-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/eeh.h   |1 +
 arch/powerpc/kernel/eeh.c|8 +++
 arch/powerpc/kernel/eeh_driver.c |  100 ++
 arch/powerpc/kernel/eeh_pe.c |3 +-
 4 files changed, 90 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 3d64cf3..d24382c 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -140,6 +140,7 @@ struct eeh_dev {
struct pci_controller *phb; /* Associated PHB   */
struct pci_dn *pdn; /* Associated PCI device node   */
struct pci_dev *pdev;   /* Associated PCI device*/
+   intin_error;/* Error flag for eeh_dev   */
struct pci_dev *physfn; /* Associated PF PORT   */
struct pci_bus *bus;/* PCI bus for partial hotplug  */
 };
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 9ee61d1..1207547 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1229,6 +1229,14 @@ void eeh_remove_device(struct pci_dev *dev)
 * from the parent PE during the BAR resotre.
 */
edev-pdev = NULL;
+
+   /*
+* The flag in_error is used to trace EEH devices for VFs
+* in error state or not. It's set in eeh_report_error(). If
+* it's not set, eeh_report_{reset,resume}() won't be called
+* for the VF EEH device.
+*/
+   edev-in_error = 0;
dev-dev.archdata.edev = NULL;
if (!(edev-pe-state  EEH_PE_KEEP))
eeh_rmv_from_parent_pe(edev);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 24768ff..63a2c33 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -211,6 +211,7 @@ static void *eeh_report_error(void *data, void *userdata)
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
 
+   edev-in_error = 1;
eeh_pcid_put(dev);
return NULL;
 }
@@ -282,7 +283,8 @@ static void *eeh_report_reset(void *data, void *userdata)
 
if (!driver-err_handler ||
!driver-err_handler-slot_reset ||
-   (edev-mode  EEH_DEV_NO_HANDLER)) {
+   (edev-mode  EEH_DEV_NO_HANDLER) ||
+   (!edev-in_error)) {
eeh_pcid_put(dev);
return NULL;
}
@@ -339,14 +341,16 @@ static void *eeh_report_resume(void *data, void *userdata)
 
if (!driver-err_handler ||
!driver-err_handler-resume ||
-   (edev-mode  EEH_DEV_NO_HANDLER)) {
+   (edev-mode  EEH_DEV_NO_HANDLER) ||
+   (!edev-in_error)) {
edev-mode = ~EEH_DEV_NO_HANDLER;
-   eeh_pcid_put(dev);
-   return NULL;
+   goto out;
}
 
driver-err_handler-resume(dev);
 
+out:
+   edev-in_error = 0;
eeh_pcid_put(dev);
return NULL;
 }
@@ -386,12 +390,38 @@ static void *eeh_report_failure(void *data, void 
*userdata)
return NULL;
 }
 
+static void *eeh_add_virt_device(void *data, void *userdata)
+{
+   struct pci_driver *driver;
+   struct eeh_dev *edev = (struct eeh_dev *)data;
+   struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+   struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+   if (!(edev-physfn)) {
+   pr_warn(%s: EEH dev %04x:%02x:%02x.%01x not for VF\n,
+   __func__, edev-phb-global_number, pdn-busno,
+   PCI_SLOT(pdn-devfn), PCI_FUNC(pdn-devfn));
+   return NULL;
+   }
+
+   driver = eeh_pcid_get(dev);
+   if (driver) {
+   eeh_pcid_put(dev);
+   if (driver-err_handler)
+   return NULL;
+   }
+
+   pci_iov_virtfn_add(edev-physfn, pdn-vf_index, 0);
+   return NULL;
+}
+
 static void *eeh_rmv_device(void *data, void *userdata)
 {
struct pci_driver *driver;
struct eeh_dev *edev = (struct eeh_dev *)data;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
int *removed = (int *)userdata;
+   struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 
/*
 * Actually, we should remove the PCI bridges as well.
@@ -416,7 +446,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
driver = eeh_pcid_get(dev);
if (driver) {
eeh_pcid_put(dev);
-   if (driver-err_handler)
+   if (removed  

[PATCH V6 10/10] powerpc/powernv: compound PE for VFs

2015-05-18 Thread Wei Yang
When VF BAR size is larger than 64MB, we group VFs in terms of M64 BAR,
which means those VFs in a group should form a compound PE.

This patch links those VF PEs into compound PE in this case.

[gwshan: code refactoring for a bit]
Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Acked-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   46 +
 arch/powerpc/platforms/powernv/pci.c  |   17 +--
 2 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index f8bc950..56e7b65 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1363,9 +1363,20 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, 
u16 num_vfs)
}
 
list_for_each_entry_safe(pe, pe_n, phb-ioda.pe_list, list) {
+   struct pnv_ioda_pe *s, *sn;
if (pe-parent_dev != pdev)
continue;
 
+   if ((pe-flags  PNV_IODA_PE_MASTER) 
+   (pe-flags  PNV_IODA_PE_VF)) {
+   list_for_each_entry_safe(s, sn, pe-slaves, list) {
+   pnv_pci_ioda2_release_dma_pe(pdev, s);
+   list_del(s-list);
+   pnv_ioda_deconfigure_pe(phb, s);
+   pnv_ioda_free_pe(phb, s-pe_number);
+   }
+   }
+
pnv_pci_ioda2_release_dma_pe(pdev, pe);
 
/* Remove from list */
@@ -1418,7 +1429,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, 
u16 num_vfs)
struct pci_bus*bus;
struct pci_controller *hose;
struct pnv_phb*phb;
-   struct pnv_ioda_pe*pe;
+   struct pnv_ioda_pe*pe, *master_pe;
intpe_num;
u16vf_index;
struct pci_dn *pdn;
@@ -1464,10 +1475,13 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, 
u16 num_vfs)
GFP_KERNEL, hose-node);
pe-tce32_table-data = pe;
 
-   /* Put PE to the list */
-   mutex_lock(phb-ioda.pe_list_mutex);
-   list_add_tail(pe-list, phb-ioda.pe_list);
-   mutex_unlock(phb-ioda.pe_list_mutex);
+   /* Put PE to the list, or postpone it for compound PEs */
+   if ((pdn-m64_per_iov != M64_PER_IOV) ||
+   (num_vfs = M64_PER_IOV)) {
+   mutex_lock(phb-ioda.pe_list_mutex);
+   list_add_tail(pe-list, phb-ioda.pe_list);
+   mutex_unlock(phb-ioda.pe_list_mutex);
+   }
 
pnv_pci_ioda2_setup_dma_pe(phb, pe);
}
@@ -1480,10 +1494,32 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, 
u16 num_vfs)
vf_per_group = roundup_pow_of_two(num_vfs) / pdn-m64_per_iov;
 
for (vf_group = 0; vf_group  M64_PER_IOV; vf_group++) {
+   master_pe = NULL;
+
for (vf_index = vf_group * vf_per_group;
 vf_index  (vf_group + 1) * vf_per_group 
 vf_index  num_vfs;
 vf_index++) {
+
+   /*
+* Figure out the master PE and put all slave
+* PEs to master PE's list.
+*/
+   pe = phb-ioda.pe_array[pdn-offset + 
vf_index];
+   if (!master_pe) {
+   pe-flags |= PNV_IODA_PE_MASTER;
+   INIT_LIST_HEAD(pe-slaves);
+   master_pe = pe;
+   mutex_lock(phb-ioda.pe_list_mutex);
+   list_add_tail(pe-list, 
phb-ioda.pe_list);
+   mutex_unlock(phb-ioda.pe_list_mutex);
+   } else {
+   pe-flags |= PNV_IODA_PE_SLAVE;
+   pe-master = master_pe;
+   list_add_tail(pe-list,
+   master_pe-slaves);
+   }
+
for (vf_index1 = vf_group * vf_per_group;
 vf_index1  (vf_group + 1) * vf_per_group 

 vf_index1  num_vfs;
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index 10bc8c3..717a58e 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -667,7 +667,7 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
   

Re: [PATCH V6 01/10] PCI/IOV: Rename and export virtfn_add/virtfn_remove

2015-05-18 Thread Wei Yang
Bjorn,

This patch set is dedicated for the VF EEH on Power. Most of them are in
powerpc arch, while this first one is PCI core related. Those following
patches are review and ack from Gavin, your ack to this one is important.

I'd appreciated it a lot if you have time to take a look at this one.

Thanks a lot.

On Tue, May 19, 2015 at 09:35:03AM +0800, Wei Yang wrote:
During EEH recovery, hotplug is applied to the devices which don't
have drivers or their drivers don't support EEH. However, the hotplug,
which was implemented based on PCI bus, can't be applied to VF directly.

The patch renames virtn_{add,remove}() and exports them so that they
can be used in PCI hotplug during EEH recovery.

[gwshan: changelog]
Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 drivers/pci/iov.c   |   10 +-
 include/linux/pci.h |2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index ee0ebff..cc941dd 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -108,7 +108,7 @@ resource_size_t pci_iov_resource_size(struct pci_dev *dev, 
int resno)
   return dev-sriov-barsz[resno - PCI_IOV_RESOURCES];
 }

-static int virtfn_add(struct pci_dev *dev, int id, int reset)
+int pci_iov_virtfn_add(struct pci_dev *dev, int id, int reset)
 {
   int i;
   int rc = -ENOMEM;
@@ -183,7 +183,7 @@ failed:
   return rc;
 }

-static void virtfn_remove(struct pci_dev *dev, int id, int reset)
+void pci_iov_virtfn_remove(struct pci_dev *dev, int id, int reset)
 {
   char buf[VIRTFN_ID_LEN];
   struct pci_dev *virtfn;
@@ -320,7 +320,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
   }

   for (i = 0; i  initial; i++) {
-  rc = virtfn_add(dev, i, 0);
+  rc = pci_iov_virtfn_add(dev, i, 0);
   if (rc)
   goto failed;
   }
@@ -332,7 +332,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)

 failed:
   for (j = 0; j  i; j++)
-  virtfn_remove(dev, j, 0);
+  pci_iov_virtfn_remove(dev, j, 0);

   iov-ctrl = ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
   pci_cfg_access_lock(dev);
@@ -361,7 +361,7 @@ static void sriov_disable(struct pci_dev *dev)
   return;

   for (i = 0; i  iov-num_VFs; i++)
-  virtfn_remove(dev, i, 0);
+  pci_iov_virtfn_remove(dev, i, 0);

   pcibios_sriov_disable(dev);

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 353db8d..94bacfa 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1679,6 +1679,8 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int id);

 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
 void pci_disable_sriov(struct pci_dev *dev);
+int pci_iov_virtfn_add(struct pci_dev *dev, int id, int reset);
+void pci_iov_virtfn_remove(struct pci_dev *dev, int id, int reset);
 int pci_num_vf(struct pci_dev *dev);
 int pci_vfs_assigned(struct pci_dev *dev);
 int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
-- 
1.7.9.5

-- 
Richard Yang
Help you, Help me

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] ppc64 ftrace: mark data_access callees notrace (pt.1)

2015-05-18 Thread Michael Ellerman
On Mon, 2015-05-18 at 14:29 +0200, Jiri Kosina wrote:
 yOn Sat, 16 May 2015, Torsten Duwe wrote:
 
There's got to be a better solution than this.
  
   Can you think of a better approach?
  
  Maybe a per thread variable to lock out a recursion into tracing?
  Thanks for your doubt.
 
 ftrace already handles recursion protection by itself (depending on the 
 per-ftrace-ops FTRACE_OPS_FL_RECURSION_SAFE flag).

OK, so I wonder why that's not working for us?

 It's however not really well-defined what to do when recursion would 
 happen. Therefore __notrace__ annotation, that just completely avoid such 
 situation by making tracing impossible, looks like saner general solution 
 to me.

I disagree. Correctly annotating all functions that might be called ever and
for all time is a maintenance nightmare and is never going to work in the long
term.

cheers


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v4 4/9] hvc: Convert to using interrupts instead of opal events

2015-05-18 Thread Michael Ellerman
On Fri, 2015-05-15 at 14:06 +1000, Alistair Popple wrote:
 Convert the opal hvc driver to use the new irqchip to register for
 opal events. As older firmware versions may not have device tree
 bindings for the interrupt parent we just use a hardcoded hwirq based
 on the event number.

This is still breaking the mambo skiboot test for me.

Nothing obvious on the console

cheers



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v4 4/9] hvc: Convert to using interrupts instead of opal events

2015-05-18 Thread Alistair Popple
On Tue, 19 May 2015 14:33:39 Michael Ellerman wrote:
 On Fri, 2015-05-15 at 14:06 +1000, Alistair Popple wrote:
  Convert the opal hvc driver to use the new irqchip to register for
  opal events. As older firmware versions may not have device tree
  bindings for the interrupt parent we just use a hardcoded hwirq based
  on the event number.
 
 This is still breaking the mambo skiboot test for me.

As in you still can't get to userspace?

You probably need http://patchwork.ozlabs.org/patch/472202/ which fixes a bug 
in skiboot.

 Nothing obvious on the console
 
 cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V6 01/10] PCI/IOV: Rename and export virtfn_add/virtfn_remove

2015-05-18 Thread Wei Yang
During EEH recovery, hotplug is applied to the devices which don't
have drivers or their drivers don't support EEH. However, the hotplug,
which was implemented based on PCI bus, can't be applied to VF directly.

The patch renames virtn_{add,remove}() and exports them so that they
can be used in PCI hotplug during EEH recovery.

[gwshan: changelog]
Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 drivers/pci/iov.c   |   10 +-
 include/linux/pci.h |2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index ee0ebff..cc941dd 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -108,7 +108,7 @@ resource_size_t pci_iov_resource_size(struct pci_dev *dev, 
int resno)
return dev-sriov-barsz[resno - PCI_IOV_RESOURCES];
 }
 
-static int virtfn_add(struct pci_dev *dev, int id, int reset)
+int pci_iov_virtfn_add(struct pci_dev *dev, int id, int reset)
 {
int i;
int rc = -ENOMEM;
@@ -183,7 +183,7 @@ failed:
return rc;
 }
 
-static void virtfn_remove(struct pci_dev *dev, int id, int reset)
+void pci_iov_virtfn_remove(struct pci_dev *dev, int id, int reset)
 {
char buf[VIRTFN_ID_LEN];
struct pci_dev *virtfn;
@@ -320,7 +320,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
}
 
for (i = 0; i  initial; i++) {
-   rc = virtfn_add(dev, i, 0);
+   rc = pci_iov_virtfn_add(dev, i, 0);
if (rc)
goto failed;
}
@@ -332,7 +332,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 
 failed:
for (j = 0; j  i; j++)
-   virtfn_remove(dev, j, 0);
+   pci_iov_virtfn_remove(dev, j, 0);
 
iov-ctrl = ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
pci_cfg_access_lock(dev);
@@ -361,7 +361,7 @@ static void sriov_disable(struct pci_dev *dev)
return;
 
for (i = 0; i  iov-num_VFs; i++)
-   virtfn_remove(dev, i, 0);
+   pci_iov_virtfn_remove(dev, i, 0);
 
pcibios_sriov_disable(dev);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 353db8d..94bacfa 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1679,6 +1679,8 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int id);
 
 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
 void pci_disable_sriov(struct pci_dev *dev);
+int pci_iov_virtfn_add(struct pci_dev *dev, int id, int reset);
+void pci_iov_virtfn_remove(struct pci_dev *dev, int id, int reset);
 int pci_num_vf(struct pci_dev *dev);
 int pci_vfs_assigned(struct pci_dev *dev);
 int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V6 02/10] powerpc/pci: Cache VF index in pci_dn

2015-05-18 Thread Wei Yang
The patch caches the VF index in pci_dn, which can be used to calculate
VF's bus, device and function number. Those information helps to locate
the VF's PCI device instance when doing hotplug during EEH recovery if
necessary.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Acked-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/pci-bridge.h |1 +
 arch/powerpc/kernel/pci_dn.c  |4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index 1811c44..d78afe4 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -199,6 +199,7 @@ struct pci_dn {
 #ifdef CONFIG_PCI_IOV
u16 vfs_expanded;   /* number of VFs IOV BAR expanded */
u16 num_vfs;/* number of VFs enabled*/
+   int vf_index;   /* VF index in the PF */
int offset; /* PE# for the first VF PE */
 #define M64_PER_IOV 4
int m64_per_iov;
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index b3b4df9..f771130 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -139,6 +139,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
 #ifdef CONFIG_PCI_IOV
 static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
   struct pci_dev *pdev,
+  int vf_index,
   int busno, int devfn)
 {
struct pci_dn *pdn;
@@ -157,6 +158,7 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn 
*parent,
pdn-parent = parent;
pdn-busno = busno;
pdn-devfn = devfn;
+   pdn-vf_index = vf_index;
 #ifdef CONFIG_PPC_POWERNV
pdn-pe_number = IODA_INVALID_PE;
 #endif
@@ -196,7 +198,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
return NULL;
 
for (i = 0; i  pci_sriov_get_totalvfs(pdev); i++) {
-   pdn = add_one_dev_pci_data(parent, NULL,
+   pdn = add_one_dev_pci_data(parent, NULL, i,
   pci_iov_virtfn_bus(pdev, i),
   pci_iov_virtfn_devfn(pdev, i));
if (!pdn) {
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V6 08/10] powerpc/powernv: Support PCI config restore for VFs

2015-05-18 Thread Wei Yang
After PE reset, OPAL API opal_pci_reinit() is called on all devices
contained in the PE to reinitialize them. However, VFs can't be seen
from skiboot firmware. We have to implement the functions, similar
those in skiboot firmware, to reinitialize VFs after reset on PE
for VFs.

[gwshan: changelog and code refactoring]
Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Acked-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/pci-bridge.h|1 +
 arch/powerpc/platforms/powernv/eeh-powernv.c |   70 +-
 arch/powerpc/platforms/powernv/pci.c |   18 +++
 3 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h 
b/arch/powerpc/include/asm/pci-bridge.h
index d78afe4..168b991 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -205,6 +205,7 @@ struct pci_dn {
int m64_per_iov;
 #define IODA_INVALID_M64(-1)
int m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV];
+   int mps;
 #endif /* CONFIG_PCI_IOV */
 #endif
struct list_head child_list;
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 7af3c1e..33deb78 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1612,6 +1612,67 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
return ret;
 }
 
+static int pnv_eeh_restore_vf_config(struct pci_dn *pdn)
+{
+   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+   u32 devctl, cmd, cap2, aer_capctl;
+   int old_mps;
+
+   /* Restore MPS */
+   if (edev-pcie_cap) {
+   old_mps = (ffs(pdn-mps) - 8)  5;
+   eeh_ops-read_config(pdn, edev-pcie_cap + PCI_EXP_DEVCTL,
+2, devctl);
+   devctl = ~PCI_EXP_DEVCTL_PAYLOAD;
+   devctl |= old_mps;
+   eeh_ops-write_config(pdn, edev-pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+   }
+
+   /* Disable Completion Timeout */
+   if (edev-pcie_cap) {
+   eeh_ops-read_config(pdn, edev-pcie_cap + PCI_EXP_DEVCAP2,
+4, cap2);
+   if (cap2  0x10) {
+   eeh_ops-read_config(pdn,
+   edev-pcie_cap + PCI_EXP_DEVCTL2,
+   4, cap2);
+   cap2 |= 0x10;
+   eeh_ops-write_config(pdn,
+   edev-pcie_cap + PCI_EXP_DEVCTL2,
+   4, cap2);
+   }
+   }
+
+   /* Enable SERR and parity checking */
+   eeh_ops-read_config(pdn, PCI_COMMAND, 2, cmd);
+   cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
+   eeh_ops-write_config(pdn, PCI_COMMAND, 2, cmd);
+
+   /* Enable report various errors */
+   if (edev-pcie_cap) {
+   eeh_ops-read_config(pdn, edev-pcie_cap + PCI_EXP_DEVCTL,
+   2, devctl);
+   devctl = ~PCI_EXP_DEVCTL_CERE;
+   devctl |= (PCI_EXP_DEVCTL_NFERE |
+  PCI_EXP_DEVCTL_FERE |
+  PCI_EXP_DEVCTL_URRE);
+   eeh_ops-write_config(pdn, edev-pcie_cap + PCI_EXP_DEVCTL,
+   2, devctl);
+   }
+
+   /* Enable ECRC generation and check */
+   if (edev-pcie_cap  edev-aer_cap) {
+   eeh_ops-read_config(pdn, edev-aer_cap + PCI_ERR_CAP,
+   4, aer_capctl);
+   aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
+   eeh_ops-write_config(pdn, edev-aer_cap + PCI_ERR_CAP,
+   4, aer_capctl);
+   }
+
+   return 0;
+}
+
 static int pnv_eeh_restore_config(struct pci_dn *pdn)
 {
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -1622,7 +1683,14 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
return -EEXIST;
 
phb = edev-phb-private_data;
-   ret = opal_pci_reinit(phb-opal_id,
+   /*
+* We have to restore the PCI config space after reset since the
+* firmware can't see SRIOV VFs.
+*/
+   if (edev-physfn)
+   ret = pnv_eeh_restore_vf_config(pdn);
+   else
+   ret = opal_pci_reinit(phb-opal_id,
  OPAL_REINIT_PCI_DEV, edev-config_addr);
if (ret) {
pr_warn(%s: Can't reinit PCI dev 0x%x (%lld)\n,
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index bca2aeb..10bc8c3 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -729,6 +729,24 @@ static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
 

[PATCH V6 07/10] powerpc/powernv: Support EEH reset for VF PE

2015-05-18 Thread Wei Yang
PEs for VFs don't have primary bus. So they have to have their own reset
backend, which is used during EEH recovery. The patch implements the reset
backend for VF's PE by issuing FLR or AF FLR to the VFs, which are contained
in the PE.

[gwshan: changelog and code refactoring]
Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Acked-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/eeh.h   |1 +
 arch/powerpc/platforms/powernv/eeh-powernv.c |  134 +-
 2 files changed, 134 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index c1fde48..3d64cf3 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -134,6 +134,7 @@ struct eeh_dev {
int pcix_cap;   /* Saved PCIx capability*/
int pcie_cap;   /* Saved PCIe capability*/
int aer_cap;/* Saved AER capability */
+   int af_cap; /* Saved AF capability  */
struct eeh_pe *pe;  /* Associated PE*/
struct list_head list;  /* Form link list in the PE */
struct pci_controller *phb; /* Associated PHB   */
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index c505036..7af3c1e 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -402,6 +402,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
edev-pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
edev-pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
edev-aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
+   edev-af_cap   = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF);
if ((edev-class_code  8) == PCI_CLASS_BRIDGE_PCI) {
edev-mode |= EEH_DEV_BRIDGE;
if (edev-pcie_cap) {
@@ -891,6 +892,127 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int 
option)
return 0;
 }
 
+static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos,
+u16 mask, bool af_flr_rst)
+{
+   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+   int status, i;
+
+   /* Wait for Transaction Pending bit to be cleared */
+   for (i = 0; i  4; i++) {
+   eeh_ops-read_config(pdn, pos, 2, status);
+   if (!(status  mask))
+   return;
+
+   msleep((1  i) * 100);
+   }
+
+   pr_warn(%s: Pending transaction while issuing %s FLR to 
+   %04x:%02x:%02x.%01x\n,
+   __func__, af_flr_rst ? AF : ,
+   edev-phb-global_number, pdn-busno,
+   PCI_SLOT(pdn-devfn), PCI_FUNC(pdn-devfn));
+}
+
+static int pnv_eeh_do_flr(struct pci_dn *pdn, int option)
+{
+   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+   u32 reg;
+
+   if (!edev-pcie_cap)
+   return -ENOTTY;
+
+   eeh_ops-read_config(pdn, edev-pcie_cap + PCI_EXP_DEVCAP, 4, reg);
+   if (!(reg  PCI_EXP_DEVCAP_FLR))
+   return -ENOTTY;
+
+   switch (option) {
+   case EEH_RESET_HOT:
+   case EEH_RESET_FUNDAMENTAL:
+   pnv_eeh_wait_for_pending(pdn, edev-pcie_cap + PCI_EXP_DEVSTA,
+PCI_EXP_DEVSTA_TRPND, false);
+   eeh_ops-read_config(pdn, edev-pcie_cap + PCI_EXP_DEVCTL,
+4, reg);
+   reg |= PCI_EXP_DEVCTL_BCR_FLR;
+   eeh_ops-write_config(pdn, edev-pcie_cap + PCI_EXP_DEVCTL,
+ 4, reg);
+   msleep(EEH_PE_RST_HOLD_TIME);
+   break;
+   case EEH_RESET_DEACTIVATE:
+   eeh_ops-read_config(pdn, edev-pcie_cap + PCI_EXP_DEVCTL,
+4, reg);
+   reg = ~PCI_EXP_DEVCTL_BCR_FLR;
+   eeh_ops-write_config(pdn, edev-pcie_cap + PCI_EXP_DEVCTL,
+ 4, reg);
+   msleep(EEH_PE_RST_SETTLE_TIME);
+   break;
+   }
+
+   return 0;
+}
+
+static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option)
+{
+   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+   u32 cap;
+
+   if (!edev-af_cap)
+   return -ENOTTY;
+
+   eeh_ops-read_config(pdn, edev-af_cap + PCI_AF_CAP, 1, cap);
+   if (!(cap  PCI_AF_CAP_TP) || !(cap  PCI_AF_CAP_FLR))
+   return -ENOTTY;
+
+   switch (option) {
+   case EEH_RESET_HOT:
+   case EEH_RESET_FUNDAMENTAL:
+   /*
+* Wait for Transaction Pending bit to clear. A word-aligned
+* test is used, so we use the conrol offset rather than status
+* and shift the test bit to match.
+*/
+   pnv_eeh_wait_for_pending(pdn, 

[PATCH V6 04/10] powerpc/eeh: Trace first 7 BARs in address cache

2015-05-18 Thread Wei Yang
EEH address cache, which helps to locate the PCI device according to
the given (physical) MMIO address, didn't cover PCI bridges. Also, it
shouldn't return PF with address in PF's IOV BARs. Instead, the VFs
should be returned.

The patch restricts the address cache to cover first 7 BARs for the
above purposes.

[gwshan: changelog]
Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Acked-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/eeh_cache.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index eeabeab..f6c5f05 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -196,7 +196,7 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
}
 
/* Walk resources on this device, poke them into the tree */
-   for (i = 0; i  DEVICE_COUNT_RESOURCE; i++) {
+   for (i = 0; i = PCI_ROM_RESOURCE; i++) {
unsigned long start = pci_resource_start(dev,i);
unsigned long end = pci_resource_end(dev,i);
unsigned int flags = pci_resource_flags(dev,i);
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[git pull] Please pull mpe/linux.git powerpc-4.1-4 tag

2015-05-18 Thread Michael Ellerman
Hi Linus,

Please pull some more powerpc fixes for 4.1:

The following changes since commit 030bbdbf4c833bc69f502eae58498bc5572db736:

  Linux 4.1-rc3 (2015-05-10 15:12:29 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux.git tags/powerpc-4.1-4

for you to fetch changes up to 5e95235ccd5442d4a4fe11ec4eb99ba1b7959368:

  powerpc: Align TOC to 256 bytes (2015-05-14 16:59:21 +1000)


powerpc fixes for 4.1 # 3

- THP/hugetlb fixes from Aneesh.
- MCE fix from Daniel.
- TOC fix from Anton.


Aneesh Kumar K.V (2):
  powerpc/thp: Serialize pmd clear against a linux page table walk.
  powerpc/mm: Return NULL for not present hugetlb page

Anton Blanchard (1):
  powerpc: Align TOC to 256 bytes

Daniel Axtens (1):
  powerpc/mce: fix off by one errors in mce event handling

 arch/powerpc/kernel/mce.c |  4 ++--
 arch/powerpc/kernel/vmlinux.lds.S |  1 +
 arch/powerpc/mm/hugetlbpage.c | 25 -
 arch/powerpc/mm/pgtable_64.c  | 11 +++
 4 files changed, 30 insertions(+), 11 deletions(-)




signature.asc
Description: This is a digitally signed message part
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V6 03/10] powerpc/pci: Remove VFs prior to PF

2015-05-18 Thread Wei Yang
As commit ac205b7b (PCI: make sriov work with hotplug remove) indicates,
VFs, which might be hooked to same PCI bus as their PF should be removed
before the PF. Otherwise, the PCI hot unplugging on the PCI bus would
cause kernel crash.

The patch applies the above pattern to PowerPC PCI hotplug path.

[gwshan: changelog]
Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Acked-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/pci-hotplug.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/pci-hotplug.c 
b/arch/powerpc/kernel/pci-hotplug.c
index 7ed85a6..98f84ed 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -50,7 +50,7 @@ void pcibios_remove_pci_devices(struct pci_bus *bus)
 
pr_debug(PCI: Removing devices on bus %04x:%02x\n,
 pci_domain_nr(bus),  bus-number);
-   list_for_each_entry_safe(dev, tmp, bus-devices, bus_list) {
+   list_for_each_entry_safe_reverse(dev, tmp, bus-devices, bus_list) {
pr_debug(   Removing %s...\n, pci_name(dev));
pci_stop_and_remove_bus_device(dev);
}
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V6 06/10] powerpc/eeh: Create PE for VFs

2015-05-18 Thread Wei Yang
Current EEH recovery code works with the assumption: the PE has primary
bus. Unfortunately, that's not true to VF PEs, which generally contains
one or multiple VFs (for VF group case). The patch creates PEs for VFs
at PCI final fixup time. Those PEs for VFs are indentified with newly
introduced flag EEH_PE_VF so that we handle them differently during
EEH recovery.

[gwshan: changelog and code refactoring]
Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Acked-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/eeh.h   |1 +
 arch/powerpc/kernel/eeh_pe.c |   10 --
 arch/powerpc/platforms/powernv/eeh-powernv.c |   17 +
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 1b3614d..c1fde48 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -70,6 +70,7 @@ struct pci_dn;
 #define EEH_PE_PHB (1  1)/* PHB PE*/
 #define EEH_PE_DEVICE  (1  2)/* Device PE */
 #define EEH_PE_BUS (1  3)/* Bus PE*/
+#define EEH_PE_VF  (1  4)/* VF PE */
 
 #define EEH_PE_ISOLATED(1  0)/* Isolated PE  
*/
 #define EEH_PE_RECOVERING  (1  1)/* Recovering PE*/
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 35f0b62..260a701 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -299,7 +299,10 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev 
*edev)
 * EEH device already having associated PE, but
 * the direct parent EEH device doesn't have yet.
 */
-   pdn = pdn ? pdn-parent : NULL;
+   if (edev-physfn)
+   pdn = pci_get_pdn(edev-physfn);
+   else
+   pdn = pdn ? pdn-parent : NULL;
while (pdn) {
/* We're poking out of PCI territory */
parent = pdn_to_eeh_dev(pdn);
@@ -382,7 +385,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
}
 
/* Create a new EEH PE */
-   pe = eeh_pe_alloc(edev-phb, EEH_PE_DEVICE);
+   if (edev-physfn)
+   pe = eeh_pe_alloc(edev-phb, EEH_PE_VF);
+   else
+   pe = eeh_pe_alloc(edev-phb, EEH_PE_DEVICE);
if (!pe) {
pr_err(%s: out of memory!\n, __func__);
return -ENOMEM;
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index ce738ab..c505036 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1520,6 +1520,23 @@ static struct eeh_ops pnv_eeh_ops = {
.restore_config = pnv_eeh_restore_config
 };
 
+static void pnv_eeh_vf_final_fixup(struct pci_dev *pdev)
+{
+   struct pci_dn *pdn = pci_get_pdn(pdev);
+
+   if (!pdev-is_virtfn)
+   return;
+
+   /*
+* The following operations will fail if VF's sysfs files
+* aren't created or its resources aren't finalized.
+*/
+   eeh_add_device_early(pdn);
+   eeh_add_device_late(pdev);
+   eeh_sysfs_add_device(pdev);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pnv_eeh_vf_final_fixup);
+
 /**
  * eeh_powernv_init - Register platform dependent EEH operations
  *
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V6 05/10] powerpc/powernv: EEH device for VF

2015-05-18 Thread Wei Yang
VFs and their corresponding pci_dn instances are created and released
dynamically as their PF's SRIOV capability is enabled and disabled.
The patch creates and releases EEH devices for VFs when creating and
releasing their pci_dn instances, which means EEH devices and pci_dn
instances have same life cycle. Also, VF's EEH device is identified
by (struct eeh_dev::physfn).

[gwshan: changelog and removed CONFIG_PCI_IOV]
Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
Acked-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/eeh.h |1 +
 arch/powerpc/kernel/pci_dn.c   |   12 
 2 files changed, 13 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index a52db28..1b3614d 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -138,6 +138,7 @@ struct eeh_dev {
struct pci_controller *phb; /* Associated PHB   */
struct pci_dn *pdn; /* Associated PCI device node   */
struct pci_dev *pdev;   /* Associated PCI device*/
+   struct pci_dev *physfn; /* Associated PF PORT   */
struct pci_bus *bus;/* PCI bus for partial hotplug  */
 };
 
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index f771130..f0ddde7 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -180,7 +180,9 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn 
*parent,
 struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
 {
 #ifdef CONFIG_PCI_IOV
+   struct pci_controller *hose = pci_bus_to_host(pdev-bus);
struct pci_dn *parent, *pdn;
+   struct eeh_dev *edev;
int i;
 
/* Only support IOV for now */
@@ -206,6 +208,9 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
 __func__, i);
return NULL;
}
+   eeh_dev_init(pdn, hose);
+   edev = pdn_to_eeh_dev(pdn);
+   edev-physfn = pdev;
}
 #endif /* CONFIG_PCI_IOV */
 
@@ -254,10 +259,17 @@ void remove_dev_pci_data(struct pci_dev *pdev)
for (i = 0; i  pci_sriov_get_totalvfs(pdev); i++) {
list_for_each_entry_safe(pdn, tmp,
parent-child_list, list) {
+   struct eeh_dev *edev;
if (pdn-busno != pci_iov_virtfn_bus(pdev, i) ||
pdn-devfn != pci_iov_virtfn_devfn(pdev, i))
continue;
 
+   edev = pdn_to_eeh_dev(pdn);
+   if (edev) {
+   pdn-edev = NULL;
+   kfree(edev);
+   }
+
if (!list_empty(pdn-list))
list_del(pdn-list);
 
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: powerpc/iommu: Support hybrid iommu/direct DMA ops for coherent_mask dma_mask

2015-05-18 Thread Michael Ellerman
On Mon, 2015-18-05 at 03:56:51 UTC, Benjamin Herrenschmidt wrote:
 This patch adds the ability to the DMA direct ops to fallback to the IOMMU
 ops for coherent alloc/free if the coherent mask of the device isn't
 suitable for accessing the direct DMA space and the device also happens
 to have an active IOMMU table.

Can you do the removal of the union, the #ifdef PPC64 and the static inlines as
a precursor patch, that would remove some of the noise in the diff.

And can you explain the changes to dart, pseries and powernv.

There's also some whitespace changes in iommu.h that I assume you didn't want?

cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v7 1/2] arm64: dts: Add the arasan mmc nodes in apm-storm.dtsi

2015-05-18 Thread Suman Tripathi
Hi Arnd,

On Wed, May 13, 2015 at 5:21 PM, Suman Tripathi stripa...@apm.com wrote:
 This patch adds the arasan mmc nodes to reuse the of-arasan
 driver for APM X-Gene SoC.

 Signed-off-by: Suman Tripathi stripa...@apm.com
 ---
 ---
  arch/arm64/boot/dts/apm/apm-mustang.dts |  4 +++
  arch/arm64/boot/dts/apm/apm-storm.dtsi  | 43 
 +
  2 files changed, 47 insertions(+)

 diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts 
 b/arch/arm64/boot/dts/apm/apm-mustang.dts
 index 83578e7..7a3ea72 100644
 --- a/arch/arm64/boot/dts/apm/apm-mustang.dts
 +++ b/arch/arm64/boot/dts/apm/apm-mustang.dts
 @@ -52,3 +52,7 @@
  xgenet {
 status = ok;
  };
 +
 +mmc0 {
 +   status = ok;
 +};
 diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi 
 b/arch/arm64/boot/dts/apm/apm-storm.dtsi
 index c8d3e0e..8e03ecd 100644
 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
 +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
 @@ -145,6 +145,40 @@
 clock-output-names = socplldiv2;
 };

 +   ahbclk: ahbclk@1f2ac000 {
 +   compatible = apm,xgene-device-clock;
 +   #clock-cells = 1;
 +   clocks = socplldiv2 0;
 +   reg = 0x0 0x1f2ac000 0x0 0x1000
 +   0x0 0x1700 0x0 0x2000;
 +   reg-names = csr-reg, div-reg;
 +   csr-offset = 0x0;
 +   csr-mask = 0x1;
 +   enable-offset = 0x8;
 +   enable-mask = 0x1;
 +   divider-offset = 0x164;
 +   divider-width = 0x5;
 +   divider-shift = 0x0;
 +   clock-output-names = ahbclk;
 +   };
 +
 +   sdioclk: sdioclk@1f2ac000 {
 +   compatible = apm,xgene-device-clock;
 +   #clock-cells = 1;
 +   clocks = socplldiv2 0;
 +   reg = 0x0 0x1f2ac000 0x0 0x1000
 +   0x0 0x1700 0x0 0x2000;
 +   reg-names = csr-reg, div-reg;
 +   csr-offset = 0x0;
 +   csr-mask = 0x2;
 +   enable-offset = 0x8;
 +   enable-mask = 0x2;
 +   divider-offset = 0x178;
 +   divider-width = 0x8;
 +   divider-shift = 0x0;
 +   clock-output-names = sdioclk;
 +   };
 +
 qmlclk: qmlclk {
 compatible = apm,xgene-device-clock;
 #clock-cells = 1;
 @@ -533,6 +567,15 @@
 interrupts = 0x0 0x4f 0x4;
 };

 +   mmc0: mmc@1c00 {
 +   compatible = arasan,sdhci-4.9a;
 +   reg = 0x0 0x1c00 0x0 0x100;
 +   interrupts = 0x0 0x49 0x4;
 +   dma-coherent;
 +   clock-names = clk_xin, clk_ahb;
 +   clocks = sdioclk 0, ahbclk 0;
 +   };
 +
 phy1: phy@1f21a000 {
 compatible = apm,xgene-phy;
 reg = 0x0 0x1f21a000 0x0 0x100;
 --
 1.8.2.1


Can you review this ?


-- 
Thanks,
with regards,
Suman Tripathi
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v7 2/2] mmc: host: sdhci: Add support to disable SDR104/SDR50/DDR50 based on capability register 0.

2015-05-18 Thread Suman Tripathi
On Wed, May 13, 2015 at 5:21 PM, Suman Tripathi stripa...@apm.com wrote:
 The sdhci framework disables SDR104/SDR50/DDR50 based on only quirk.
 This patch adds the support to disable SDR104/SDR50/DDR50 based on
 reading the capability register 0.

 Signed-off-by: Suman Tripathi stripa...@apm.com
 ---
 ---
  drivers/mmc/host/sdhci.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

 diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
 index 58c1770..a3d9b8a 100644
 --- a/drivers/mmc/host/sdhci.c
 +++ b/drivers/mmc/host/sdhci.c
 @@ -3118,7 +3118,8 @@ int sdhci_add_host(struct sdhci_host *host)
 }
 }

 -   if (host-quirks2  SDHCI_QUIRK2_NO_1_8_V)
 +   if (host-quirks2  SDHCI_QUIRK2_NO_1_8_V ||
 +   !(caps[0]  SDHCI_CAN_VDD_180))
 caps[1] = ~(SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_SDR50 |
SDHCI_SUPPORT_DDR50);

 --
 1.8.2.1


I didn't get any comments on this patch . Can anyone review it
(Michal , Ulf , Arnd etc ...)?


-- 
Thanks,
with regards,
Suman Tripathi
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: mm: BUG_ON with NUMA_BALANCING (kernel BUG at include/linux/swapops.h:131!)

2015-05-18 Thread Haren Myneni
Mel,
I am hitting this issue with 4.0 kernel and even with 3.19 and
3.17 kernels. I will also try with previous versions. Please let me
know any suggestions on the debugging.

Thanks
Haren

On 5/14/15, Haren Myneni hmyn...@gmail.com wrote:
 On 5/14/15, Mel Gorman mgor...@suse.de wrote:
 On Wed, May 13, 2015 at 01:17:54AM -0700, Haren Myneni wrote:
 Hi,

  I am getting BUG_ON in migration_entry_to_page() with 4.1.0-rc2
 kernel on powerpc system which has 512 CPUs (64 cores - 16 nodes) and
 1.6 TB memory. We can easily recreate this issue with kernel compile
 (make -j500). But I could not reproduce with numa_balancing=disable.


 Is this patched in any way? I ask because line 134 on 4.1.0-rc2 does not
 match up with a BUG_ON. It's close to a PageLocked check but I want to
 be sure there are no other modifications.

 Mel, Thanks for your help. I added some printks and dump_page() to get
 the page struct and swp_entry information.


 Otherwise, when was the last time this worked? Was 4.0 ok? As it can be
 easily reproduced, can the problem be bisected please?

 I did not try previous versions other than RHEL kernel (3.10.*). I
 will try with previous versions.

 In the failure case, also noticed pte and address values are matched
 in try_to_unmap_one() and remove_migration_pte(), but entry
 (swp_entry_t) value is different. So looks like page strut address in
 migration_entry_to_page() is not valid.

 try_to_unmap_one()
 {

 ...
 } else if (IS_ENABLED(CONFIG_MIGRATION)) {
 /*
  * Store the pfn of the page in a special migration
  * pte. do_swap_page() will wait until the
 migration
  * pte is removed and then restart fault handling.
  */
 BUG_ON(!(flags  TTU_MIGRATION));
 entry = make_migration_entry(page,
 pte_write(pteval));
 }
 swp_pte = swp_entry_to_pte(entry);
 if (pte_soft_dirty(pteval))
 swp_pte = pte_swp_mksoft_dirty(swp_pte);
 set_pte_at(mm, address, pte, swp_pte);

 /*pte=0xb16b8d0f8000 address=0x1815
 page=0xf00513f3e1e0  entry=0x3e000ec5ae34 */
 ...
 }

  remove_migration_pte()
 {
 ...
 /* address=0x1815 pte=0xb16b8d0f8000
 *old=0xf00513f3e1e0 */
 if (!is_migration_entry(entry) ||
 migration_entry_to_page(entry) != old)
 goto unlock;
 ...
 }

  migration_entry_to_page()  {
 pte=0xb16b8d0f8000  entry=0x3e0002c5ae34
 page=0xf000f3f3e1e0
 }


 Thanks
 Haren


 --
 Mel Gorman
 SUSE Labs


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [alsa-devel] [PATCH RESEND] sound: ppc: keywest: drop using attach adapter

2015-05-18 Thread Takashi Iwai
At Sat,  9 May 2015 19:42:22 +0200,
Wolfram Sang wrote:
 
 As we now have deferred probing, we can use a custom mechanism and
 finally get rid of this legacy interface from the i2c core.
 
 Signed-off-by: Wolfram Sang w...@the-dreams.de
 Tested-by: Dan DeVoto dand1...@yahoo.com
 Tested-by: Mark Elliott txliteb...@gmail.com
 ---
 
 Changes since last time: Added two test tags from the Debian PPC list.
 
 I'd love to have that in 4.2, so I can get rid of that legacy interface.
 I can take it via I2C but ALSA looks like the proper path to me.

I applied the patch now.  Sorry for the late response, as I've been on
vacation for last two weeks.


thanks,

Takashi

 
  sound/ppc/keywest.c | 23 +++
  1 file changed, 19 insertions(+), 4 deletions(-)
 
 diff --git a/sound/ppc/keywest.c b/sound/ppc/keywest.c
 index 0d1c27e911b8dd..d7627bae08362e 100644
 --- a/sound/ppc/keywest.c
 +++ b/sound/ppc/keywest.c
 @@ -52,7 +52,7 @@ static int keywest_attach_adapter(struct i2c_adapter 
 *adapter)
   return -EINVAL;
  
   if (strncmp(adapter-name, mac-io, 6))
 - return 0; /* ignored */
 + return -EINVAL; /* ignored */
  
   memset(info, 0, sizeof(struct i2c_board_info));
   strlcpy(info.type, keywest, I2C_NAME_SIZE);
 @@ -100,7 +100,6 @@ static struct i2c_driver keywest_driver = {
   .driver = {
   .name = PMac Keywest Audio,
   },
 - .attach_adapter = keywest_attach_adapter,
   .probe = keywest_probe,
   .remove = keywest_remove,
   .id_table = keywest_i2c_id,
 @@ -132,16 +131,32 @@ int snd_pmac_tumbler_post_init(void)
  /* exported */
  int snd_pmac_keywest_init(struct pmac_keywest *i2c)
  {
 - int err;
 + struct i2c_adapter *adap;
 + int err, i = 0;
  
   if (keywest_ctx)
   return -EBUSY;
  
 + adap = i2c_get_adapter(0);
 + if (!adap)
 + return -EPROBE_DEFER;
 +
   keywest_ctx = i2c;
  
   if ((err = i2c_add_driver(keywest_driver))) {
   snd_printk(KERN_ERR cannot register keywest i2c driver\n);
 + i2c_put_adapter(adap);
   return err;
   }
 - return 0;
 +
 + /* We assume Macs have consecutive I2C bus numbers starting at 0 */
 + while (adap) {
 + err = keywest_attach_adapter(adap);
 + if (!err)
 + return 0;
 + i2c_put_adapter(adap);
 + adap = i2c_get_adapter(++i);
 + }
 +
 + return -ENODEV;
  }
 -- 
 2.1.4
 
 ___
 Alsa-devel mailing list
 alsa-de...@alsa-project.org
 http://mailman.alsa-project.org/mailman/listinfo/alsa-devel
 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V3 09/13] selftests, powerpc: Add test for DSCR value inheritence across fork

2015-05-18 Thread Anshuman Khandual
On 05/18/2015 10:39 AM, Anton Blanchard wrote:
 Hi Anshuman,
 
 Thanks for getting these testcases into the kernel.
 
 This patch adds a test to verify that the changed DSCR value inside
 any process would be inherited to it's child process across the fork
 system call.
 
 One issue I do notice (a bug in my original test cases too), is that we
 don't restore the DSCR on exit. I'm not sure we need to go to the
 trouble of saving and restoring it, but we should at least get it back
 to 0 when done.

Will save/restore the system DSCR default before exiting.

 
 Also a tiny nit, no need for a newline in perror():
 
 open() failed
 : Permission denied
 

sure, will remove them.

 With those changes you can add:
 
 Signed-off-by: Anton Blanchard an...@samba.org
 
 to the patches based on my testcases.

Thanks.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V4 13/13] selftests, powerpc: Add gitignore file for the new DSCR tests

2015-05-18 Thread Anshuman Khandual
This patch adds .gitignore for all the newly added DSCR tests.

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
---
 tools/testing/selftests/powerpc/dscr/.gitignore | 7 +++
 1 file changed, 7 insertions(+)
 create mode 100644 tools/testing/selftests/powerpc/dscr/.gitignore

diff --git a/tools/testing/selftests/powerpc/dscr/.gitignore 
b/tools/testing/selftests/powerpc/dscr/.gitignore
new file mode 100644
index 000..b585c6c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/.gitignore
@@ -0,0 +1,7 @@
+dscr_default_test
+dscr_explicit_test
+dscr_inherit_exec_test
+dscr_inherit_test
+dscr_sysfs_test
+dscr_sysfs_thread_test
+dscr_user_test
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V4 04/13] powerpc, dscr: Added some in-code documentation

2015-05-18 Thread Anshuman Khandual
This patch adds some in-code documentation to the DSCR related
code to make it more readable without having any functional
change to it.

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/processor.h |  9 +
 arch/powerpc/kernel/sysfs.c  | 38 
 2 files changed, 47 insertions(+)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index bf117d8..28ded5d 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -295,6 +295,15 @@ struct thread_struct {
 #endif
 #ifdef CONFIG_PPC64
unsigned long   dscr;
+   /*
+* This member element dscr_inherit indicates that the process
+* has explicitly attempted and changed the DSCR register value
+* for itself. Hence kernel wont use the default CPU DSCR value
+* contained in the PACA structure anymore during process context
+* switch. Once this variable is set, this behaviour will also be
+* inherited to all the children of this process from that point
+* onwards.
+*/
int dscr_inherit;
unsigned long   ppr;/* used to save/restore SMT priority */
 #endif
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index fa1fd8a..692873b 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -496,13 +496,34 @@ static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
 static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
 
+/*
+ * This is the system wide DSCR register default value. Any
+ * change to this default value through the sysfs interface
+ * will update all per cpu DSCR default values across the
+ * system stored in their respective PACA structures.
+ */
 static unsigned long dscr_default;
 
+/**
+ * read_dscr() - Fetch the cpu specific DSCR default
+ * @val:   Returned cpu specific DSCR default value
+ *
+ * This function returns the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
 static void read_dscr(void *val)
 {
*(unsigned long *)val = get_paca()-dscr_default;
 }
 
+
+/**
+ * write_dscr() - Update the cpu specific DSCR default
+ * @val:   New cpu specific DSCR default value to update
+ *
+ * This function updates the per cpu DSCR default value
+ * for any cpu which is contained in it's PACA structure.
+ */
 static void write_dscr(void *val)
 {
get_paca()-dscr_default = *(unsigned long *)val;
@@ -520,12 +541,29 @@ static void add_write_permission_dev_attr(struct 
device_attribute *attr)
attr-attr.mode |= 0200;
 }
 
+/**
+ * show_dscr_default() - Fetch the system wide DSCR default
+ * @dev:   Device structure
+ * @attr:  Device attribute structure
+ * @buf:   Interface buffer
+ *
+ * This function returns the system wide DSCR default value.
+ */
 static ssize_t show_dscr_default(struct device *dev,
struct device_attribute *attr, char *buf)
 {
return sprintf(buf, %lx\n, dscr_default);
 }
 
+/**
+ * store_dscr_default() - Update the system wide DSCR default
+ * @dev:   Device structure
+ * @attr:  Device attribute structure
+ * @buf:   Interface buffer
+ * @count: Size of the update
+ *
+ * This function updates the system wide DSCR default value.
+ */
 static ssize_t __used store_dscr_default(struct device *dev,
struct device_attribute *attr, const char *buf,
size_t count)
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V4 00/13] POWER DSCR fixes, improvements, docs and tests

2015-05-18 Thread Anshuman Khandual
This patch series has patches for POWER DSCR fixes, improvements,
in code documentaion, kernel support user documentation and selftest based
test cases. It has got five test cases which are derived from Anton's DSCR
test bucket which can be listed as follows.

(1) http://ozlabs.org/~anton/junkcode/dscr_default_test.c
(2) http://ozlabs.org/~anton/junkcode/dscr_explicit_test.c
(3) http://ozlabs.org/~anton/junkcode/dscr_inherit_exec_test.c
(4) http://ozlabs.org/~anton/junkcode/dscr_inherit_test.c
(5) http://ozlabs.org/~anton/junkcode/user_dscr_test.c

NOTE1: Anton's original inherit exec test expected the child to have system
default DSCR value instead of the inherited DSCR value from it's parent.
But looks like thats not the case when we execute the test, it always
inherits it's parent's DSCR value over the exec call as well. So I have
changed the test program assuming its correct to have the inherited DSCR
value in the fork/execed child program. Please let me know if thats not
correct and I am missing something there.

Changes in V4:
- Restoring the system DSCR default value in test cases before exiting
- Removed new-line from all the perror() calls
- Some minor cosmetic changes to the test cases
- Added Anton's SOB to the above mentioned five derived test cases

Changes in V3:
-
- Minor changes to last couple of sysfs test cases
- Added .gitignore file for the new test directory

Changes in V2: (https://lkml.org/lkml/2015/1/13/148)
-
- Updated the thread struct DSCR value inside mtspr facility exception path
- Modified the in code documentation to follow the kernel-doc format
- Added seven selftest based DSCR related test cases under powerpc

Original V1:

- Posted here at https://patchwork.ozlabs.org/patch/418583/

Anshuman Khandual (13):
  powerpc: Fix handling of DSCR related facility unavailable exception
  powerpc, process: Remove the unused extern dscr_default
  powerpc, offset: Change PACA_DSCR to PACA_DSCR_DEFAULT
  powerpc, dscr: Added some in-code documentation
  documentation, powerpc: Add documentation for DSCR support
  selftests, powerpc: Add test for system wide DSCR default
  selftests, powerpc: Add test for explicitly changing DSCR value
  selftests, powerpc: Add test for DSCR SPR numbers
  selftests, powerpc: Add test for DSCR value inheritence across fork
  selftests, powerpc: Add test for DSCR inheritence across fork  exec
  selftests, powerpc: Add test for all DSCR sysfs interfaces
  selftests, powerpc: Add thread based stress test for DSCR sysfs interfaces
  selftests, powerpc: Add gitignore file for the new DSCR tests

 Documentation/powerpc/00-INDEX |   2 +
 Documentation/powerpc/dscr.txt |  83 +
 arch/powerpc/include/asm/processor.h   |   9 ++
 arch/powerpc/kernel/asm-offsets.c  |   2 +-
 arch/powerpc/kernel/entry_64.S |   2 +-
 arch/powerpc/kernel/process.c  |   1 -
 arch/powerpc/kernel/sysfs.c|  38 ++
 arch/powerpc/kernel/tm.S   |   4 +-
 arch/powerpc/kernel/traps.c|  45 +++-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S|   2 +-
 tools/testing/selftests/powerpc/Makefile   |   2 +-
 tools/testing/selftests/powerpc/dscr/.gitignore|   7 ++
 tools/testing/selftests/powerpc/dscr/Makefile  |  19 +++
 tools/testing/selftests/powerpc/dscr/dscr.h| 120 +++
 .../selftests/powerpc/dscr/dscr_default_test.c | 128 +
 .../selftests/powerpc/dscr/dscr_explicit_test.c|  72 
 .../powerpc/dscr/dscr_inherit_exec_test.c  | 118 +++
 .../selftests/powerpc/dscr/dscr_inherit_test.c |  96 
 .../selftests/powerpc/dscr/dscr_sysfs_test.c   |  98 
 .../powerpc/dscr/dscr_sysfs_thread_test.c  | 123 
 .../selftests/powerpc/dscr/dscr_user_test.c|  62 ++
 21 files changed, 1021 insertions(+), 12 deletions(-)
 create mode 100644 Documentation/powerpc/dscr.txt
 create mode 100644 tools/testing/selftests/powerpc/dscr/.gitignore
 create mode 100644 tools/testing/selftests/powerpc/dscr/Makefile
 create mode 100644 tools/testing/selftests/powerpc/dscr/dscr.h
 create mode 100644 tools/testing/selftests/powerpc/dscr/dscr_default_test.c
 create mode 100644 tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
 create mode 100644 
tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
 create mode 100644 tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
 create mode 100644 tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
 create mode 100644 
tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
 create mode 100644 tools/testing/selftests/powerpc/dscr/dscr_user_test.c

-- 
1.9.3

___
Linuxppc-dev mailing list

[PATCH V4 10/13] selftests, powerpc: Add test for DSCR inheritence across fork exec

2015-05-18 Thread Anshuman Khandual
This patch adds a test case to verify that the changed DSCR value
inside any process would be inherited to it's child across the fork
and exec system call.

Acked-by: Shuah Khan shua...@osg.samsung.com
Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
Signed-off-by: Anton Blanchard an...@samba.org
---
 tools/testing/selftests/powerpc/dscr/Makefile  |   2 +-
 .../powerpc/dscr/dscr_inherit_exec_test.c  | 118 +
 2 files changed, 119 insertions(+), 1 deletion(-)
 create mode 100644 
tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c

diff --git a/tools/testing/selftests/powerpc/dscr/Makefile 
b/tools/testing/selftests/powerpc/dscr/Makefile
index 81239e2..4e84309 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -1,5 +1,5 @@
 PROGS := dscr_default_test dscr_explicit_test dscr_user_test   \
-dscr_inherit_test
+dscr_inherit_test dscr_inherit_exec_test
 
 CFLAGS := $(CFLAGS) -lpthread
 
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c 
b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
new file mode 100644
index 000..7283954
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -0,0 +1,118 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork exec test
+ *
+ * This testcase modifies the DSCR using mtspr, forks  execs and
+ * verifies that the child is using the changed DSCR using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright (C) 2012 Anton Blanchard an...@au.ibm.com, IBM
+ * Copyright (C) 2015 Anshuman Khandual khand...@linux.vnet.ibm.com, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include dscr.h
+
+static char prog[LEN_MAX];
+
+static void do_exec(unsigned long parent_dscr)
+{
+   unsigned long cur_dscr, cur_dscr_usr;
+
+   cur_dscr = get_dscr();
+   cur_dscr_usr = get_dscr_usr();
+
+   if (cur_dscr != parent_dscr) {
+   fprintf(stderr, Parent DSCR %ld was not inherited 
+   over exec (kernel value)\n, parent_dscr);
+   exit(1);
+   }
+
+   if (cur_dscr_usr != parent_dscr) {
+   fprintf(stderr, Parent DSCR %ld was not inherited 
+   over exec (user value)\n, parent_dscr);
+   exit(1);
+   }
+   exit(0);
+}
+
+int dscr_inherit_exec(void)
+{
+   unsigned long i, dscr = 0;
+   pid_t pid;
+
+   for (i = 0; i  COUNT; i++) {
+   dscr++;
+   if (dscr  DSCR_MAX)
+   dscr = 0;
+
+   if (dscr == get_default_dscr())
+   continue;
+
+   if (i % 2 == 0)
+   set_dscr_usr(dscr);
+   else
+   set_dscr(dscr);
+
+   /*
+* XXX: Force a context switch out so that DSCR
+* current value is copied into the thread struct
+* which is required for the child to inherit the
+* changed value.
+*/
+   sleep(1);
+
+   pid = fork();
+   if (pid == -1) {
+   perror(fork() failed);
+   exit(1);
+   } else if (pid) {
+   int status;
+
+   if (waitpid(pid, status, 0) == -1) {
+   perror(waitpid() failed);
+   exit(1);
+   }
+
+   if (!WIFEXITED(status)) {
+   fprintf(stderr, Child didn't exit cleanly\n);
+   exit(1);
+   }
+
+   if (WEXITSTATUS(status) != 0) {
+   fprintf(stderr, Child didn't exit cleanly\n);
+   return 1;
+   }
+   } else {
+   char dscr_str[16];
+
+   sprintf(dscr_str, %ld, dscr);
+   execlp(prog, prog, exec, dscr_str, NULL);
+   exit(1);
+   }
+   }
+   return 0;
+}
+
+int main(int argc, char *argv[])
+{
+   if (argc == 3  !strcmp(argv[1], exec)) {
+   unsigned long parent_dscr;
+
+   parent_dscr = atoi(argv[2]);
+   do_exec(parent_dscr);
+   } else if (argc != 1) {
+   fprintf(stderr, 

[PATCH V4 09/13] selftests, powerpc: Add test for DSCR value inheritence across fork

2015-05-18 Thread Anshuman Khandual
This patch adds a test to verify that the changed DSCR value inside
any process would be inherited to it's child process across the fork
system call.

Acked-by: Shuah Khan shua...@osg.samsung.com
Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
Signed-off-by: Anton Blanchard an...@samba.org
---
 tools/testing/selftests/powerpc/dscr/Makefile  |  3 +-
 .../selftests/powerpc/dscr/dscr_inherit_test.c | 96 ++
 2 files changed, 98 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c

diff --git a/tools/testing/selftests/powerpc/dscr/Makefile 
b/tools/testing/selftests/powerpc/dscr/Makefile
index ae865d8..81239e2 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -1,4 +1,5 @@
-PROGS := dscr_default_test dscr_explicit_test dscr_user_test
+PROGS := dscr_default_test dscr_explicit_test dscr_user_test   \
+dscr_inherit_test
 
 CFLAGS := $(CFLAGS) -lpthread
 
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c 
b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
new file mode 100644
index 000..9d0ca37
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
@@ -0,0 +1,96 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork test
+ *
+ * This testcase modifies the DSCR using mtspr, forks and then
+ * verifies that the child process has the correct changed DSCR
+ * value using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright (C) 2012 Anton Blanchard an...@au.ibm.com, IBM
+ * Copyright (C) 2015 Anshuman Khandual khand...@linux.vnet.ibm.com, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include dscr.h
+
+int dscr_inherit(void)
+{
+   unsigned long i, dscr = 0;
+   pid_t pid;
+
+   srand(getpid());
+   set_dscr(dscr);
+
+   for (i = 0; i  COUNT; i++) {
+   unsigned long cur_dscr, cur_dscr_usr;
+
+   dscr++;
+   if (dscr  DSCR_MAX)
+   dscr = 0;
+
+   if (i % 2 == 0)
+   set_dscr_usr(dscr);
+   else
+   set_dscr(dscr);
+
+   /*
+* XXX: Force a context switch out so that DSCR
+* current value is copied into the thread struct
+* which is required for the child to inherit the
+* changed value.
+*/
+   sleep(1);
+
+   pid = fork();
+   if (pid == -1) {
+   perror(fork() failed);
+   exit(1);
+   } else if (pid) {
+   int status;
+
+   if (waitpid(pid, status, 0) == -1) {
+   perror(waitpid() failed);
+   exit(1);
+   }
+
+   if (!WIFEXITED(status)) {
+   fprintf(stderr, Child didn't exit cleanly\n);
+   exit(1);
+   }
+
+   if (WEXITSTATUS(status) != 0) {
+   fprintf(stderr, Child didn't exit cleanly\n);
+   return 1;
+   }
+   } else {
+   cur_dscr = get_dscr();
+   if (cur_dscr != dscr) {
+   fprintf(stderr, Kernel DSCR should be %ld 
+   but is %ld\n, dscr, cur_dscr);
+   exit(1);
+   }
+
+   cur_dscr_usr = get_dscr_usr();
+   if (cur_dscr_usr != dscr) {
+   fprintf(stderr, User DSCR should be %ld 
+   but is %ld\n, dscr, cur_dscr_usr);
+   exit(1);
+   }
+   exit(0);
+   }
+   }
+   return 0;
+}
+
+int main(int argc, char *argv[])
+{
+   return test_harness(dscr_inherit, dscr_inherit_test);
+}
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V4 01/13] powerpc: Fix handling of DSCR related facility unavailable exception

2015-05-18 Thread Anshuman Khandual
Currently DSCR (Data Stream Control Register) can be accessed with
mfspr or mtspr instructions inside a thread via two different SPR
numbers. One being the user accessible problem state SPR number 0x03
and the other being the privilege state SPR number 0x11. All access
through the privilege state SPR number get emulated through illegal
instruction exception. Any access through the problem state SPR number
raises one facility unavailable exception which sets the thread based
dscr_inherit bit and enables DSCR facility through FSCR register thus
allowing direct access to DSCR without going through this exception in
the future. We set the thread.dscr_inherit bit whether the access was
with mfspr or mtspr instruction which is neither correct nor does it
match the behaviour through the instruction emulation code path driven
from privilege state SPR number. User currently observes two different
kind of behaviour when accessing the DSCR through these two SPR numbers.
This problem can be observed through these two test cases by replacing
the privilege state SPR number with the problem state SPR number.

(1) http://ozlabs.org/~anton/junkcode/dscr_default_test.c
(2) http://ozlabs.org/~anton/junkcode/dscr_explicit_test.c

This patch fixes the problem by making sure that the behaviour visible
to the user remains the same irrespective of which SPR number is being
used. Inside facility unavailable exception, we check whether it was
cuased by a mfspr or a mtspr isntrucction. In case of mfspr instruction,
just emulate the instruction. In case of mtspr instruction, set the
thread based dscr_inherit bit and also enable the facility through FSCR.
All user SPR based mfspr instruction will be emulated till one user SPR
based mtspr has been executed.

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/traps.c | 45 -
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 19e4744..6530f1b 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1377,6 +1377,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
};
char *facility = unknown;
u64 value;
+   u32 instword, rd;
u8 status;
bool hv;
 
@@ -1388,12 +1389,46 @@ void facility_unavailable_exception(struct pt_regs 
*regs)
 
status = value  56;
if (status == FSCR_DSCR_LG) {
-   /* User is acessing the DSCR.  Set the inherit bit and allow
-* the user to set it directly in future by setting via the
-* FSCR DSCR bit.  We always leave HFSCR DSCR set.
+   /*
+* User is accessing the DSCR register using the problem
+* state only SPR number (0x03) either through a mfspr or
+* a mtspr instruction. If it is a write attempt through
+* a mtspr, then we set the inherit bit. This also allows
+* the user to write or read the register directly in the
+* future by setting via the FSCR DSCR bit. But in case it
+* is a read DSCR attempt through a mfspr instruction, we
+* just emulate the instruction instead. This code path will
+* always emulate all the mfspr instructions till the user
+* has attempted atleast one mtspr instruction. This way it
+* preserves the same behaviour when the user is accessing
+* the DSCR through privilege level only SPR number (0x11)
+* which is emulated through illegal instruction exception.
+* We always leave HFSCR DSCR set.
 */
-   current-thread.dscr_inherit = 1;
-   mtspr(SPRN_FSCR, value | FSCR_DSCR);
+   if (get_user(instword, (u32 __user *)(regs-nip))) {
+   pr_err(Failed to fetch the user instruction\n);
+   return;
+   }
+
+   /* Write into DSCR (mtspr 0x03, RS) */
+   if ((instword  PPC_INST_MTSPR_DSCR_USER_MASK)
+   == PPC_INST_MTSPR_DSCR_USER) {
+   rd = (instword  21)  0x1f;
+   current-thread.dscr = regs-gpr[rd];
+   current-thread.dscr_inherit = 1;
+   mtspr(SPRN_FSCR, value | FSCR_DSCR);
+   }
+
+   /* Read from DSCR (mfspr RT, 0x03) */
+   if ((instword  PPC_INST_MFSPR_DSCR_USER_MASK)
+   == PPC_INST_MFSPR_DSCR_USER) {
+   if (emulate_instruction(regs)) {
+   pr_err(DSCR based mfspr emulation failed\n);
+   return;
+   }
+   regs-nip += 4;
+   emulate_single_step(regs);
+   }
 

[PATCH V4 08/13] selftests, powerpc: Add test for DSCR SPR numbers

2015-05-18 Thread Anshuman Khandual
This patch adds a test which verifies that the DSCR privilege and
problem state SPR read  write accesses while making sure that the
results are always the same irrespective of which SPR number is
being used.

Acked-by: Shuah Khan shua...@osg.samsung.com
Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
Signed-off-by: Anton Blanchard an...@samba.org
---
 tools/testing/selftests/powerpc/dscr/Makefile  |  2 +-
 .../selftests/powerpc/dscr/dscr_user_test.c| 62 ++
 2 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/dscr/dscr_user_test.c

diff --git a/tools/testing/selftests/powerpc/dscr/Makefile 
b/tools/testing/selftests/powerpc/dscr/Makefile
index aede453..ae865d8 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -1,4 +1,4 @@
-PROGS := dscr_default_test dscr_explicit_test
+PROGS := dscr_default_test dscr_explicit_test dscr_user_test
 
 CFLAGS := $(CFLAGS) -lpthread
 
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c 
b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
new file mode 100644
index 000..c23c2f3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
@@ -0,0 +1,62 @@
+/*
+ * POWER Data Stream Control Register (DSCR) SPR test
+ *
+ * This test modifies the DSCR value through both the SPR number
+ * based mtspr instruction and then makes sure that the same is
+ * reflected through mfspr instruction using either of the SPR
+ * numbers.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright (C) 2013 Anton Blanchard an...@au.ibm.com, IBM
+ * Copyright (C) 2015 Anshuman Khandual khand...@linux.vnet.ibm.com, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include dscr.h
+
+static int check_dscr(char *str)
+{
+   unsigned long cur_dscr, cur_dscr_usr;
+
+   cur_dscr = get_dscr();
+   cur_dscr_usr = get_dscr_usr();
+   if (cur_dscr != cur_dscr_usr) {
+   printf(%s set, kernel get %lx != user get %lx\n,
+   str, cur_dscr, cur_dscr_usr);
+   return 1;
+   }
+   return 0;
+}
+
+int dscr_user(void)
+{
+   int i;
+
+   check_dscr();
+
+   for (i = 0; i  COUNT; i++) {
+   set_dscr(i);
+   if (check_dscr(kernel))
+   return 1;
+   }
+
+   for (i = 0; i  COUNT; i++) {
+   set_dscr_usr(i);
+   if (check_dscr(user))
+   return 1;
+   }
+   return 0;
+}
+
+int main(int argc, char *argv[])
+{
+   return test_harness(dscr_user, dscr_user_test);
+}
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V4 03/13] powerpc, offset: Change PACA_DSCR to PACA_DSCR_DEFAULT

2015-05-18 Thread Anshuman Khandual
PACA_DSCR offset macro tracks dscr_default element in the paca
structure. Better change the name of this macro to match that
of the data element it tracks. Makes the code more readable.

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/asm-offsets.c   | 2 +-
 arch/powerpc/kernel/entry_64.S  | 2 +-
 arch/powerpc/kernel/tm.S| 4 ++--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 0034b6b..9823057 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -247,7 +247,7 @@ int main(void)
 #endif
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
-   DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default));
+   DEFINE(PACA_DSCR_DEFAULT, offsetof(struct paca_struct, dscr_default));
DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, 
starttime_user));
DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index afbc200..27e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -556,7 +556,7 @@ BEGIN_FTR_SECTION
ld  r0,THREAD_DSCR(r4)
cmpwi   r6,0
bne 1f
-   ld  r0,PACA_DSCR(r13)
+   ld  r0,PACA_DSCR_DEFAULT(r13)
 1:
 BEGIN_FTR_SECTION_NESTED(70)
mfspr   r8, SPRN_FSCR
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 5754b22..bf8f34a 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -293,7 +293,7 @@ dont_backup_fp:
ld  r2, STK_GOT(r1)
 
/* Load CPU's default DSCR */
-   ld  r0, PACA_DSCR(r13)
+   ld  r0, PACA_DSCR_DEFAULT(r13)
mtspr   SPRN_DSCR, r0
 
blr
@@ -473,7 +473,7 @@ restore_gprs:
ld  r2, STK_GOT(r1)
 
/* Load CPU's default DSCR */
-   ld  r0, PACA_DSCR(r13)
+   ld  r0, PACA_DSCR_DEFAULT(r13)
mtspr   SPRN_DSCR, r0
 
blr
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 4d70df2..faa86e9 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -324,7 +324,7 @@ kvm_start_guest:
 kvm_secondary_got_guest:
 
/* Set HSTATE_DSCR(r13) to something sensible */
-   ld  r6, PACA_DSCR(r13)
+   ld  r6, PACA_DSCR_DEFAULT(r13)
std r6, HSTATE_DSCR(r13)
 
/* Order load of vcore, ptid etc. after load of vcpu */
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V4 11/13] selftests, powerpc: Add test for all DSCR sysfs interfaces

2015-05-18 Thread Anshuman Khandual
This test continuously updates the system wide DSCR default value
in the sysfs interface and makes sure that the same is reflected
across all the sysfs interfaces for each individual CPUs present
on the system.

Acked-by: Shuah Khan shua...@osg.samsung.com
Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
---
 tools/testing/selftests/powerpc/dscr/Makefile  |  3 +-
 .../selftests/powerpc/dscr/dscr_sysfs_test.c   | 98 ++
 2 files changed, 100 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c

diff --git a/tools/testing/selftests/powerpc/dscr/Makefile 
b/tools/testing/selftests/powerpc/dscr/Makefile
index 4e84309..fada526 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -1,5 +1,6 @@
 PROGS := dscr_default_test dscr_explicit_test dscr_user_test   \
-dscr_inherit_test dscr_inherit_exec_test
+dscr_inherit_test dscr_inherit_exec_test   \
+dscr_sysfs_test
 
 CFLAGS := $(CFLAGS) -lpthread
 
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c 
b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
new file mode 100644
index 000..f7082ce
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -0,0 +1,98 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs interface test
+ *
+ * This test updates to system wide DSCR default through the sysfs interface
+ * and then verifies that all the CPU specific DSCR defaults are updated as
+ * well verified from their sysfs interfaces.
+ *
+ * Copyright (C) 2015 Anshuman Khandual khand...@linux.vnet.ibm.com, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include dscr.h
+
+static int check_cpu_dscr_default(char *file, unsigned long val)
+{
+   char buf[10];
+   int fd, rc;
+
+   fd = open(file, O_RDWR);
+   if (fd == -1) {
+   perror(open() failed);
+   return 1;
+   }
+
+   rc = read(fd, buf, sizeof(buf));
+   if (rc == -1) {
+   perror(read() failed);
+   return 1;
+   }
+   close(fd);
+
+   buf[rc] = '\0';
+   if (strtol(buf, NULL, 16) != val) {
+   printf(DSCR match failed: %ld (system) %ld (cpu)\n,
+   val, strtol(buf, NULL, 16));
+   return 1;
+   }
+   return 0;
+}
+
+static int check_all_cpu_dscr_defaults(unsigned long val)
+{
+   DIR *sysfs;
+   struct dirent *dp;
+   char file[LEN_MAX];
+
+   sysfs = opendir(CPU_PATH);
+   if (!sysfs) {
+   perror(opendir() failed);
+   return 1;
+   }
+
+   while ((dp = readdir(sysfs))) {
+   if (!(dp-d_type  DT_DIR))
+   continue;
+   if (!strcmp(dp-d_name, cpuidle))
+   continue;
+   if (!strstr(dp-d_name, cpu))
+   continue;
+
+   sprintf(file, %s%s/dscr, CPU_PATH, dp-d_name);
+   if (access(file, F_OK))
+   continue;
+
+   if (check_cpu_dscr_default(file, val))
+   return 1;
+   }
+   closedir(sysfs);
+   return 0;
+}
+
+int dscr_sysfs(void)
+{
+   unsigned long orig_dscr_default;
+   int i, j;
+
+   orig_dscr_default = get_default_dscr();
+   for (i = 0; i  COUNT; i++) {
+   for (j = 0; j  DSCR_MAX; j++) {
+   set_default_dscr(j);
+   if (check_all_cpu_dscr_defaults(j))
+   goto fail;
+   }
+   }
+   set_default_dscr(orig_dscr_default);
+   return 0;
+fail:
+   set_default_dscr(orig_dscr_default);
+   return 1;
+}
+
+int main(int argc, char *argv[])
+{
+   return test_harness(dscr_sysfs, dscr_sysfs_test);
+}
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V4 02/13] powerpc, process: Remove the unused extern dscr_default

2015-05-18 Thread Anshuman Khandual
The process context switch code no longer uses dscr_default variable
from the sysfs.c file. The variable became unused when we started
storing the CPU specific DSCR value in the PACA structure instead.
This patch just removes this extern declaration. It was originally
added by the following commit.

efcac658: powerpc: Per process DSCR + some fixes (try#4)

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/process.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index febb50d..8005e18 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1112,7 +1112,6 @@ static void setup_ksp_vsid(struct task_struct *p, 
unsigned long sp)
 /*
  * Copy a thread..
  */
-extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
 
 /*
  * Copy architecture-specific thread state
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V4 05/13] documentation, powerpc: Add documentation for DSCR support

2015-05-18 Thread Anshuman Khandual
This patch adds a new documentation file explaining the DSCR
support on powerpc platforms. This explains DSCR related data
structure, code paths and also available user interfaces. Any
further functional changes to the DSCR support in the kernel
should definitely update the documentation here.

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
---
 Documentation/powerpc/00-INDEX |  2 +
 Documentation/powerpc/dscr.txt | 83 ++
 2 files changed, 85 insertions(+)
 create mode 100644 Documentation/powerpc/dscr.txt

diff --git a/Documentation/powerpc/00-INDEX b/Documentation/powerpc/00-INDEX
index 6fd0e8b..9dc845c 100644
--- a/Documentation/powerpc/00-INDEX
+++ b/Documentation/powerpc/00-INDEX
@@ -30,3 +30,5 @@ ptrace.txt
- Information on the ptrace interfaces for hardware debug registers.
 transactional_memory.txt
- Overview of the Power8 transactional memory support.
+dscr.txt
+   - Overview DSCR (Data Stream Control Register) support.
diff --git a/Documentation/powerpc/dscr.txt b/Documentation/powerpc/dscr.txt
new file mode 100644
index 000..1ff4400
--- /dev/null
+++ b/Documentation/powerpc/dscr.txt
@@ -0,0 +1,83 @@
+   DSCR (Data Stream Control Register)
+   
+
+DSCR register in powerpc allows user to have some control of prefetch of data
+stream in the processor. Please refer to the ISA documents or related manual
+for more detailed information regarding how to use this DSCR to attain this
+control of the pefetches . This document here provides an overview of kernel
+support for DSCR, related kernel objects, it's functionalities and exported
+user interface.
+
+(A) Data Structures:
+
+   (1) thread_struct:
+   dscr/* Thread DSCR value */
+   dscr_inherit/* Thread has changed default DSCR */
+
+   (2) PACA:
+   dscr_default/* per-CPU DSCR default value */
+
+   (3) sysfs.c:
+   dscr_default/* System DSCR default value */
+
+(B) Scheduler Changes:
+
+   Scheduler will write the per-CPU DSCR default which is stored in the
+   CPU's PACA value into the register if the thread has dscr_inherit value
+   cleared which means that it has not changed the default DSCR till now.
+   If the dscr_inherit value is set which means that it has changed the
+   default DSCR value, scheduler will write the changed value which will
+   now be contained in thread struct's dscr into the register instead of
+   the per-CPU default PACA based DSCR value.
+
+   NOTE: Please note here that the system wide global DSCR value never
+   gets used directly in the scheduler process context switch at all.
+
+(C) SYSFS Interface:
+
+   Global DSCR default:/sys/devices/system/cpu/dscr_default
+   CPU specific DSCR default:  /sys/devices/system/cpu/cpuN/dscr
+
+   Changing the global DSCR default in the sysfs will change all the CPU
+   specific DSCR defaults immediately in their PACA structures. Again if
+   the current process has the dscr_inherit clear, it also writes the new
+   value into every CPU's DSCR register right away and updates the current
+   thread's DSCR value as well.
+
+   Changing the CPU specif DSCR default value in the sysfs does exactly
+   the same thing as above but unlike the global one above, it just changes
+   stuff for that particular CPU instead for all the CPUs on the system.
+
+(D) User Space Instructions:
+
+   The DSCR register can be accessed in the user space using any of these
+   two SPR numbers available for that purpose.
+
+   (1) Problem state SPR:  0x03(Un-privileged, POWER8 only)
+   (2) Privileged state SPR:   0x11(Privileged)
+
+   Accessing DSCR through privileged SPR number (0x11) from user space
+   works, as it is emulated following an illegal instruction exception
+   inside the kernel. Both mfspr and mtspr instructions are emulated.
+
+   Accessing DSCR through user level SPR (0x03) from user space will first
+   create a facility unavailable exception. Inside this exception handler
+   all mfspr isntruction based read attempts will get emulated and returned
+   where as the first mtspr instruction based write attempts will enable
+   the DSCR facility for the next time around (both for read and write) by
+   setting DSCR facility in the FSCR register.
+
+(E) Specifics about 'dscr_inherit':
+
+   The thread struct element 'dscr_inherit' represents whether the thread
+   in question has attempted and changed the DSCR itself using any of the
+   following methods. This element signifies whether the thread wants to
+   use the CPU default DSCR value or its own changed DSCR value in the
+   kernel.
+
+   (1) mtspr instruction   (SPR number 0x03)
+   (2) 

[PATCH V4 07/13] selftests, powerpc: Add test for explicitly changing DSCR value

2015-05-18 Thread Anshuman Khandual
This patch adds a test which modifies the DSCR using mtspr instruction
and verifies the change using mfspr instruction. It uses both the
privilege state SPR as well as the problem state SPR for the purpose.

Acked-by: Shuah Khan shua...@osg.samsung.com
Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
Signed-off-by: Anton Blanchard an...@samba.org
---
 tools/testing/selftests/powerpc/dscr/Makefile  |  2 +-
 .../selftests/powerpc/dscr/dscr_explicit_test.c| 72 ++
 2 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c

diff --git a/tools/testing/selftests/powerpc/dscr/Makefile 
b/tools/testing/selftests/powerpc/dscr/Makefile
index 0aa90ab..aede453 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -1,4 +1,4 @@
-PROGS := dscr_default_test
+PROGS := dscr_default_test dscr_explicit_test
 
 CFLAGS := $(CFLAGS) -lpthread
 
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c 
b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
new file mode 100644
index 000..a0fe368
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
@@ -0,0 +1,72 @@
+/*
+ * POWER Data Stream Control Register (DSCR) explicit test
+ *
+ * This test modifies the DSCR value using mtspr instruction and
+ * verifies the change with mfspr instruction. It uses both the
+ * privilege state SPR and the problem state SPR for this purpose.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright (C) 2012 Anton Blanchard an...@au.ibm.com, IBM
+ * Copyright (C) 2015 Anshuman Khandual khand...@linux.vnet.ibm.com, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include dscr.h
+
+int dscr_explicit(void)
+{
+   unsigned long i, dscr = 0;
+
+   srand(getpid());
+   set_dscr(dscr);
+
+   for (i = 0; i  COUNT; i++) {
+   unsigned long cur_dscr, cur_dscr_usr;
+   double ret = uniform_deviate(rand());
+
+   if (ret  0.001) {
+   dscr++;
+   if (dscr  DSCR_MAX)
+   dscr = 0;
+
+   set_dscr(dscr);
+   }
+
+   cur_dscr = get_dscr();
+   if (cur_dscr != dscr) {
+   fprintf(stderr, Kernel DSCR should be %ld but 
+   is %ld\n, dscr, cur_dscr);
+   return 1;
+   }
+
+   ret = uniform_deviate(rand());
+   if (ret  0.001) {
+   dscr++;
+   if (dscr  DSCR_MAX)
+   dscr = 0;
+
+   set_dscr_usr(dscr);
+   }
+
+   cur_dscr_usr = get_dscr_usr();
+   if (cur_dscr_usr != dscr) {
+   fprintf(stderr, User DSCR should be %ld but 
+   is %ld\n, dscr, cur_dscr_usr);
+   return 1;
+   }
+   }
+   return 0;
+}
+
+int main(int argc, char *argv[])
+{
+   return test_harness(dscr_explicit, dscr_explicit_test);
+}
-- 
1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V4 12/13] selftests, powerpc: Add thread based stress test for DSCR sysfs interfaces

2015-05-18 Thread Anshuman Khandual
This patch adds a test to update the system wide DSCR value repeatedly
and then verifies that any thread on any given CPU on the system must
be able to see the same DSCR value whether its is being read through
the problem state based SPR or the privilege state based SPR.

Acked-by: Shuah Khan shua...@osg.samsung.com
Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
---
 tools/testing/selftests/powerpc/dscr/Makefile  |   2 +-
 .../powerpc/dscr/dscr_sysfs_thread_test.c  | 123 +
 2 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 
tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c

diff --git a/tools/testing/selftests/powerpc/dscr/Makefile 
b/tools/testing/selftests/powerpc/dscr/Makefile
index fada526..834ef88 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -1,6 +1,6 @@
 PROGS := dscr_default_test dscr_explicit_test dscr_user_test   \
 dscr_inherit_test dscr_inherit_exec_test   \
-dscr_sysfs_test
+dscr_sysfs_test dscr_sysfs_thread_test
 
 CFLAGS := $(CFLAGS) -lpthread
 
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c 
b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
new file mode 100644
index 000..bf4a8a6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
@@ -0,0 +1,123 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs thread test
+ *
+ * This test updates the system wide DSCR default value through
+ * sysfs interface which should then update all the CPU specific
+ * DSCR default values which must also be then visible to threads
+ * executing on individual CPUs on the system.
+ *
+ * Copyright (C) 2015 Anshuman Khandual khand...@linux.vnet.ibm.com, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define _GNU_SOURCE
+#include dscr.h
+
+static pthread_mutex_t lock;   /* Pthread lock */
+static cpu_set_t cpuset;   /* Thread cpu set */
+static int target; /* Thread target cpu */
+static unsigned long *result;  /* Thread exit status array */
+
+#define NR_ONLN sysconf(_SC_NPROCESSORS_ONLN)
+
+static void *test_thread_dscr(void *in)
+{
+   unsigned long cur_dscr, cur_dscr_usr;
+   unsigned long val = (unsigned long)in;
+
+   pthread_mutex_lock(lock);
+   target++;
+   if (target == NR_ONLN)
+   target =  0;
+   CPU_ZERO(cpuset);
+   CPU_SET(target, cpuset);
+
+   if (sched_setaffinity(0, sizeof(cpuset), cpuset)) {
+   perror(sched_setaffinity() failed);
+   pthread_mutex_unlock(lock);
+   result[target] = 1;
+   pthread_exit(result[target]);
+   }
+   pthread_mutex_unlock(lock);
+
+   cur_dscr = get_dscr();
+   cur_dscr_usr = get_dscr_usr();
+
+   if (val != cur_dscr) {
+   printf([cpu %d] Kernel DSCR should be %ld but is %ld\n,
+   sched_getcpu(), val, cur_dscr);
+   result[target] = 1;
+   pthread_exit(result[target]);
+   }
+
+   if (val != cur_dscr_usr) {
+   printf([cpu %d] User DSCR should be %ld but is %ld\n,
+   sched_getcpu(), val, cur_dscr_usr);
+   result[target] = 1;
+   pthread_exit(result[target]);
+   }
+   result[target] = 0;
+   pthread_exit(result[target]);
+}
+
+static int check_cpu_dscr_thread(unsigned long val)
+{
+   pthread_t threads[NR_ONLN];
+   unsigned long *status[NR_ONLN];
+   unsigned long i;
+
+   for (i = 0; i  NR_ONLN; i++) {
+   if (pthread_create(threads[i], NULL,
+   test_thread_dscr, (void *)val)) {
+   perror(pthread_create() failed);
+   return 1;
+   }
+   }
+
+   for (i = 0; i  NR_ONLN; i++) {
+   if (pthread_join(threads[i], (void **)(status[i]))) {
+   perror(pthread_join() failed);
+   return 1;
+   }
+
+   if (*status[i]) {
+   printf( %ldth thread join failed with %ld\n,
+   i, *status[i]);
+   return 1;
+   }
+   }
+   return 0;
+}
+
+int dscr_sysfs_thread(void)
+{
+   unsigned long orig_dscr_default;
+   int i, j;
+
+   result = malloc(sizeof(unsigned long) * NR_ONLN);
+   pthread_mutex_init(lock, NULL);
+   target = 0;
+   orig_dscr_default = get_default_dscr();
+   for (i = 0; i  COUNT; i++) {
+   for (j = 0; j  DSCR_MAX; j++) {
+   

[PATCH V4 06/13] selftests, powerpc: Add test for system wide DSCR default

2015-05-18 Thread Anshuman Khandual
This patch adds a test case for the system wide DSCR default
value, which when changed through it's sysfs interface must
be visible to all threads reading DSCR either through the
privilege state SPR or the problem state SPR. The DSCR value
change should be immediate as well.

Acked-by: Shuah Khan shua...@osg.samsung.com
Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
Signed-off-by: Anton Blanchard an...@samba.org
---
 tools/testing/selftests/powerpc/Makefile   |   2 +-
 tools/testing/selftests/powerpc/dscr/Makefile  |  17 +++
 tools/testing/selftests/powerpc/dscr/dscr.h| 120 +++
 .../selftests/powerpc/dscr/dscr_default_test.c | 128 +
 4 files changed, 266 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/dscr/Makefile
 create mode 100644 tools/testing/selftests/powerpc/dscr/dscr.h
 create mode 100644 tools/testing/selftests/powerpc/dscr/dscr_default_test.c

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 5ad0423..03ca2e6 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -12,7 +12,7 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror 
-DGIT_VERSION='$(GIT_VERSION)' -I$(CUR
 
 export CFLAGS
 
-SUB_DIRS = pmu copyloops mm tm primitives stringloops vphn switch_endian
+SUB_DIRS = pmu copyloops mm tm primitives stringloops vphn switch_endian dscr
 
 endif
 
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile 
b/tools/testing/selftests/powerpc/dscr/Makefile
new file mode 100644
index 000..0aa90ab
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -0,0 +1,17 @@
+PROGS := dscr_default_test
+
+CFLAGS := $(CFLAGS) -lpthread
+
+all: $(PROGS)
+
+$(PROGS): ../harness.c
+
+run_tests: all
+   @-for PROG in $(PROGS); do \
+   ./$$PROG; \
+   done;
+
+clean:
+   rm -f $(PROGS) *.o
+
+.PHONY: all run_tests clean
diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h 
b/tools/testing/selftests/powerpc/dscr/dscr.h
new file mode 100644
index 000..b9e734d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr.h
@@ -0,0 +1,120 @@
+/*
+ * POWER Data Stream Control Register (DSCR)
+ *
+ * This header file contains helper functions and macros
+ * required for all the DSCR related test cases.
+ *
+ * Copyright (C) 2012 Anton Blanchard an...@au.ibm.com, IBM
+ * Copyright (C) 2015 Anshuman Khandual khand...@linux.vnet.ibm.com, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _SELFTESTS_POWERPC_DSCR_DSCR_H
+#define _SELFTESTS_POWERPC_DSCR_DSCR_H
+
+#include unistd.h
+#include stdio.h
+#include stdlib.h
+#include string.h
+#include fcntl.h
+#include dirent.h
+#include pthread.h
+#include sched.h
+#include sys/types.h
+#include sys/stat.h
+#include sys/wait.h
+
+#include utils.h
+
+#define SPRN_DSCR  0x11/* Privilege state SPR */
+#define SPRN_DSCR_USR  0x03/* Problem state SPR */
+#define THREADS100 /* Max threads */
+#define COUNT  100 /* Max iterations */
+#define DSCR_MAX   16  /* Max DSCR value */
+#define LEN_MAX100 /* Max name length */
+
+#define DSCR_DEFAULT   /sys/devices/system/cpu/dscr_default
+#define CPU_PATH   /sys/devices/system/cpu/
+
+#define rmb()  asm volatile(lwsync:::memory)
+#define wmb()  asm volatile(lwsync:::memory)
+
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)(x))
+
+/* Prilvilege state DSCR access */
+inline unsigned long get_dscr(void)
+{
+   unsigned long ret;
+
+   asm volatile(mfspr %0,%1 : =r (ret): i (SPRN_DSCR));
+
+   return ret;
+}
+
+inline void set_dscr(unsigned long val)
+{
+   asm volatile(mtspr %1,%0 : : r (val), i (SPRN_DSCR));
+}
+
+/* Problem state DSCR access */
+inline unsigned long get_dscr_usr(void)
+{
+   unsigned long ret;
+
+   asm volatile(mfspr %0,%1 : =r (ret): i (SPRN_DSCR_USR));
+
+   return ret;
+}
+
+inline void set_dscr_usr(unsigned long val)
+{
+   asm volatile(mtspr %1,%0 : : r (val), i (SPRN_DSCR_USR));
+}
+
+/* Default DSCR access */
+unsigned long get_default_dscr(void)
+{
+   int fd = -1;
+   char buf[16];
+   unsigned long val;
+
+   if (fd == -1) {
+   fd = open(DSCR_DEFAULT, O_RDONLY);
+   if (fd == -1) {
+   perror(open() failed);
+   exit(1);
+   }
+   }
+   memset(buf, 0, sizeof(buf));
+   lseek(fd, 0, SEEK_SET);
+   read(fd, buf, sizeof(buf));
+   sscanf(buf, %lx, val);
+   close(fd);
+   return val;
+}
+
+void set_default_dscr(unsigned long val)
+{
+   int fd = -1;
+   char buf[16];
+
+   if (fd == -1) {
+   fd =