[PATCH 2/2] powerpc: Implement arch_spin_is_locked() using arch_spin_value_unlocked()

2014-01-14 Thread Michael Ellerman
At a glance these are just the inverse of each other. The one subtlety
is that arch_spin_value_unlocked() takes the lock by value, rather than
as a pointer, which is important for the lockref code.

On the other hand arch_spin_is_locked() doesn't really care, so
implement it in terms of arch_spin_value_unlocked().

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/spinlock.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/spinlock.h 
b/arch/powerpc/include/asm/spinlock.h
index 5162f8c..a30ef69 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -28,8 +28,6 @@
 #include 
 #include 
 
-#define arch_spin_is_locked(x) ((x)->slock != 0)
-
 #ifdef CONFIG_PPC64
 /* use 0x80yy when locked, where yy == CPU number */
 #ifdef __BIG_ENDIAN__
@@ -59,6 +57,11 @@ static __always_inline int 
arch_spin_value_unlocked(arch_spinlock_t lock)
return lock.slock == 0;
 }
 
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+   return !arch_spin_value_unlocked(*lock);
+}
+
 /*
  * This returns the old value in the lock, so we succeeded
  * in getting the lock if the return value is 0.
-- 
1.8.3.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/2] powerpc: Add support for the optimised lockref implementation

2014-01-14 Thread Michael Ellerman
This commit adds the architecture support required to enable the
optimised implementation of lockrefs.

That's as simple as defining arch_spin_value_unlocked() and selecting
the Kconfig option.

We also define cmpxchg64_relaxed(), because the lockref code does not
need the cmpxchg to have barrier semantics.

Using Linus' test case[1] on one system I see a 4x improvement for the
basic enablement, and a further 1.3x for cmpxchg64_relaxed(), for a
total of 5.3x vs the baseline.

On another system I see more like 2x improvement.

[1]: http://marc.info/?l=linux-fsdevel&m=137782380714721&w=4

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/Kconfig| 1 +
 arch/powerpc/include/asm/cmpxchg.h  | 1 +
 arch/powerpc/include/asm/spinlock.h | 5 +
 3 files changed, 7 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b44b52c..b34b53d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -139,6 +139,7 @@ config PPC
select OLD_SIGACTION if PPC32
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_IRQ_EXIT_ON_IRQ_STACK
+   select ARCH_USE_CMPXCHG_LOCKREF if PPC64
 
 config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/include/asm/cmpxchg.h 
b/arch/powerpc/include/asm/cmpxchg.h
index e245aab..d463c68 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -300,6 +300,7 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, 
unsigned long new,
BUILD_BUG_ON(sizeof(*(ptr)) != 8);  \
cmpxchg_local((ptr), (o), (n)); \
   })
+#define cmpxchg64_relaxed  cmpxchg64_local
 #else
 #include 
 #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
diff --git a/arch/powerpc/include/asm/spinlock.h 
b/arch/powerpc/include/asm/spinlock.h
index 5f54a74..5162f8c 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -54,6 +54,11 @@
 #define SYNC_IO
 #endif
 
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+   return lock.slock == 0;
+}
+
 /*
  * This returns the old value in the lock, so we succeeded
  * in getting the lock if the return value is 0.
-- 
1.8.3.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc, perf: Define perf_event_print_debug() to print PMU register values

2014-01-14 Thread Anshuman Khandual
As of now "echo p > /proc/sysrq-trigger" command does not print anything on
the console as we have a blank perf_event_print_debug function. This patch
defines perf_event_print_debug function to print various PMU registers.

With this patch, "echo p > /proc/sysrq-trigger" command on a POWER8 system
generates this sample output on the console.

echo p > /proc/sysrq-trigger
SysRq : Show Regs
CPU#5 PMC#1:   PMC#2:  
CPU#5 PMC#3:   PMC#4:  
CPU#5 PMC#5:  d03737ba PMC#6:  843aaf8c
CPU#5 MMCR0:   MMCR1:  
CPU#5 MMCRA:   SIAR:   
CPU#5 SDAR:   
CPU#5 SIER:   
CPU#5 MMCR2:   EBBHR:  
CPU#5 EBBRR:   BESCR:  

Signed-off-by: Anshuman Khandual 
---
 arch/powerpc/perf/core-book3s.c | 54 +
 1 file changed, 54 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 29b89e8..ac35aae 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -562,9 +562,63 @@ out:
 #endif /* CONFIG_PPC64 */
 
 static void perf_event_interrupt(struct pt_regs *regs);
+static unsigned long read_pmc(int idx);
 
+/* Called from generic sysrq dump register handler */
 void perf_event_print_debug(void)
 {
+   unsigned long flags;
+   int cpu, idx;
+
+   if (!ppmu->n_counter)
+   return;
+
+   local_irq_save(flags);
+
+   cpu = smp_processor_id();
+
+   /* General PMU counters */
+   for (idx = 1; idx <= ppmu->n_counter; idx = idx + 2)
+   pr_info("CPU#%d PMC#%d:  %08lx PMC#%d:  %08lx\n",
+   cpu, idx, read_pmc(idx), idx + 1, read_pmc(idx + 1));
+
+   /* General PMU config registers */
+   pr_info("CPU#%d MMCR0:  %016lx MMCR1:  %016lx\n", cpu,
+   mfspr(SPRN_MMCR0), mfspr(SPRN_MMCR1));
+   pr_info("CPU#%d MMCRA:  %016lx SIAR:   %016lx\n", cpu,
+   mfspr(SPRN_MMCRA), mfspr(SPRN_SIAR));
+
+#ifdef CONFIG_PPC64
+   pr_info("CPU#%d SDAR:   %016lx\n", cpu, mfspr(SPRN_SDAR));
+#endif /* CONFIG_PPC64 */
+
+   /* PMU specific config registers */
+   if (ppmu->flags & PPMU_HAS_SIER)
+   pr_info("CPU#%d SIER:   %016lx\n", cpu, mfspr(SPRN_SIER));
+
+
+   if (ppmu->flags & PPMU_EBB) {
+   pr_info("CPU#%d MMCR2:  %016lx EBBHR:  %016lx\n", cpu,
+   mfspr(SPRN_MMCR2), mfspr(SPRN_EBBHR));
+   pr_info("CPU#%d EBBRR:  %016lx BESCR:  %016lx\n", cpu,
+   mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR));
+   }
+
+   if (ppmu->flags & PPMU_BHRB) {
+   u64 val;
+
+   for (idx = 0; idx < ppmu->bhrb_nr; idx++) {
+   val = read_bhrb(idx);
+
+   /* BHRB terminal marker */
+   if (!val)
+   break;
+
+   pr_info("CPU#%d BHRBE[%d]:  %016llx\n", cpu, idx, val);
+   }
+   }
+
+   local_irq_restore(flags);
 }
 
 /*
-- 
1.7.11.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 0/4] powernv: kvm: numa fault improvement

2014-01-14 Thread Liu ping fan
On Thu, Jan 9, 2014 at 8:08 PM, Alexander Graf  wrote:
>
> On 11.12.2013, at 09:47, Liu Ping Fan  wrote:
>
>> This series is based on Aneesh's series  "[PATCH -V2 0/5] powerpc: mm: Numa 
>> faults support for ppc64"
>>
>> For this series, I apply the same idea from the previous thread "[PATCH 0/3] 
>> optimize for powerpc _PAGE_NUMA"
>> (for which, I still try to get a machine to show nums)
>>
>> But for this series, I think that I have a good justification -- the fact of 
>> heavy cost when switching context between guest and host,
>> which is  well known.
>
> This cover letter isn't really telling me anything. Please put a proper 
> description of what you're trying to achieve, why you're trying to achieve 
> what you're trying and convince your readers that it's a good idea to do it 
> the way you do it.
>
Sorry for the unclear message. After introducing the _PAGE_NUMA,
kvmppc_do_h_enter() can not fill up the hpte for guest. Instead, it
should rely on host's kvmppc_book3s_hv_page_fault() to call
do_numa_page() to do the numa fault check. This incurs the overhead
when exiting from rmode to vmode.  My idea is that in
kvmppc_do_h_enter(), we do a quick check, if the page is right placed,
there is no need to exit to vmode (i.e saving htab, slab switching)

>> If my suppose is correct, will CCing k...@vger.kernel.org from next version.
>
> This translates to me as "This is an RFC"?
>
Yes, I am not quite sure about it. I have no bare-metal to verify it.
So I hope at least, from the theory, it is correct.

Thanks and regards,
Ping Fan
>
> Alex
>
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


powerpc/powernv: Call OPAL sync before kexec'ing

2014-01-14 Thread Benjamin Herrenschmidt
From: Vasant Hegde 

Its possible that OPAL may be writing to host memory during
kexec (like dump retrieve scenario). In this situation we might
end up corrupting host memory.

This patch makes OPAL sync call to make sure OPAL stops
writing to host memory before kexec'ing.

Signed-off-by: Vasant Hegde 
Signed-off-by: Benjamin Herrenschmidt 
---

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0ac6f04..1920f70 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -162,6 +162,7 @@ extern int opal_enter_rtas(struct rtas_args *args,
 #define OPAL_DUMP_ACK  84
 #define OPAL_GET_MSG   85
 #define OPAL_CHECK_ASYNC_COMPLETION86
+#define OPAL_SYNC_HOST_REBOOT  87
 
 #ifndef __ASSEMBLY__
 
@@ -841,6 +842,7 @@ int64_t opal_dump_read(uint32_t dump_id, uint64_t buffer);
 int64_t opal_dump_ack(uint32_t dump_id);
 int64_t opal_get_msg(uint64_t buffer, size_t size);
 int64_t opal_check_completion(uint64_t buffer, size_t size, uint64_t token);
+int64_t opal_sync_host_reboot(void);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int 
depth, void *data);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index d58fcae..9e7ca21 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -127,3 +127,4 @@ OPAL_CALL(opal_dump_read,   OPAL_DUMP_READ);
 OPAL_CALL(opal_dump_ack,   OPAL_DUMP_ACK);
 OPAL_CALL(opal_get_msg,OPAL_GET_MSG);
 OPAL_CALL(opal_check_completion,   OPAL_CHECK_ASYNC_COMPLETION);
+OPAL_CALL(opal_sync_host_reboot,   OPAL_SYNC_HOST_REBOOT);
diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 0ba1ccb..619b94a 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -581,10 +582,25 @@ subsys_initcall(opal_init);
 void opal_shutdown(void)
 {
unsigned int i;
+   long rc = OPAL_BUSY;
 
+   /* First free interrupts, which will also mask them */
for (i = 0; i < opal_irq_count; i++) {
if (opal_irqs[i])
free_irq(opal_irqs[i], 0);
opal_irqs[i] = 0;
}
+
+   /*
+* Then sync with OPAL which ensure anything that can
+* potentially write to our memory has completed such
+* as an ongoing dump retrieval
+*/
+   while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+   rc = opal_sync_host_reboot();
+   if (rc == OPAL_BUSY)
+   opal_poll_events(NULL);
+   else
+   mdelay(10);
+   }
 }
diff --git a/arch/powerpc/platforms/powernv/setup.c 
b/arch/powerpc/platforms/powernv/setup.c
index ddc9690..86733d6 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -153,8 +153,10 @@ static void pnv_shutdown(void)
/* Let the PCI code clear up IODA tables */
pnv_pci_shutdown();
 
-   /* And unregister all OPAL interrupts so they don't fire
-* up while we kexec
+   /*
+* Stop OPAL activity: Unregister all OPAL interrupts so they
+* don't fire up while we kexec and make sure all potentially
+* DMA'ing ops are complete (such as dump retrieval).
 */
opal_shutdown();
 }


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 2/3] powerpc/eeh: Hotplug improvement

2014-01-14 Thread Gavin Shan
When EEH error comes to one specific PCI device before its driver
is loaded, we will apply hotplug to recover the error. During the
plug time, the PCI device will be probed and its driver is loaded.
Then we wrongly calls to the error handlers if the driver supports
EEH explicitly.

The patch intends to fix by introducing flag EEH_DEV_NO_HANDLER and
set it before we remove the PCI device. In turn, we can avoid wrongly
calls the error handlers of the PCI device after its driver loaded.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/eeh.h   |3 ++-
 arch/powerpc/kernel/eeh.c|   15 +++
 arch/powerpc/kernel/eeh_driver.c |   10 +++---
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index e37db7f..8e31dad 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -90,7 +90,8 @@ struct eeh_pe {
 #define EEH_DEV_IRQ_DISABLED   (1 << 3)/* Interrupt disabled   */
 #define EEH_DEV_DISCONNECTED   (1 << 4)/* Removing from PE */
 
-#define EEH_DEV_SYSFS  (1 << 8)/* Sysfs created*/
+#define EEH_DEV_NO_HANDLER (1 << 8)/* No error handler */
+#define EEH_DEV_SYSFS  (1 << 9)/* Sysfs created*/
 
 struct eeh_dev {
int mode;   /* EEH mode */
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 4bd687d..6a118db 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -921,6 +921,13 @@ void eeh_add_device_late(struct pci_dev *dev)
eeh_sysfs_remove_device(edev->pdev);
edev->mode &= ~EEH_DEV_SYSFS;
 
+   /*
+* We definitely should have the PCI device removed
+* though it wasn't correctly. So we needn't call
+* into error handler afterwards.
+*/
+   edev->mode |= EEH_DEV_NO_HANDLER;
+
edev->pdev = NULL;
dev->dev.archdata.edev = NULL;
}
@@ -1023,6 +1030,14 @@ void eeh_remove_device(struct pci_dev *dev)
else
edev->mode |= EEH_DEV_DISCONNECTED;
 
+   /*
+* We're removing from the PCI subsystem, that means
+* the PCI device driver can't support EEH or not
+* well. So we rely on hotplug completely to do recovery
+* for the specific PCI device.
+*/
+   edev->mode |= EEH_DEV_NO_HANDLER;
+
eeh_addr_cache_rmv_dev(dev);
eeh_sysfs_remove_device(dev);
edev->mode &= ~EEH_DEV_SYSFS;
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index d3a132c..ce3a698 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -217,7 +217,8 @@ static void *eeh_report_mmio_enabled(void *data, void 
*userdata)
if (!driver) return NULL;
 
if (!driver->err_handler ||
-   !driver->err_handler->mmio_enabled) {
+   !driver->err_handler->mmio_enabled ||
+   (edev->mode & EEH_DEV_NO_HANDLER)) {
eeh_pcid_put(dev);
return NULL;
}
@@ -258,7 +259,8 @@ static void *eeh_report_reset(void *data, void *userdata)
eeh_enable_irq(dev);
 
if (!driver->err_handler ||
-   !driver->err_handler->slot_reset) {
+   !driver->err_handler->slot_reset ||
+   (edev->mode & EEH_DEV_NO_HANDLER)) {
eeh_pcid_put(dev);
return NULL;
}
@@ -297,7 +299,9 @@ static void *eeh_report_resume(void *data, void *userdata)
eeh_enable_irq(dev);
 
if (!driver->err_handler ||
-   !driver->err_handler->resume) {
+   !driver->err_handler->resume ||
+   (edev->mode & EEH_DEV_NO_HANDLER)) {
+   edev->mode &= ~EEH_DEV_NO_HANDLER;
eeh_pcid_put(dev);
return NULL;
}
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 1/3] powerpc/eeh: Handle multiple EEH errors

2014-01-14 Thread Gavin Shan
For one PCI error relevant OPAL event, we possibly have multiple
EEH errors for that. For example, multiple frozen PEs detected on
different PHBs. Unfortunately, we didn't cover the case. The patch
enumarates the return value from eeh_ops::next_error() and change
eeh_handle_special_event() and eeh_ops::next_error() to handle all
existing EEH errors.

As Ben pointed out, we needn't list_for_each_entry_safe() since we
are not deleting any PHB from the hose_list and the EEH serialized
lock should be held while purging EEH events. The patch covers those
suggestions as well.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/eeh.h|   10 ++
 arch/powerpc/kernel/eeh_driver.c  |  150 +++--
 arch/powerpc/platforms/powernv/eeh-ioda.c |   39 +---
 3 files changed, 112 insertions(+), 87 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d3e5e9b..e37db7f 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -117,6 +117,16 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct 
eeh_dev *edev)
return edev ? edev->pdev : NULL;
 }
 
+/* Return values from eeh_ops::next_error */
+enum {
+   EEH_NEXT_ERR_NONE = 0,
+   EEH_NEXT_ERR_INF,
+   EEH_NEXT_ERR_FROZEN_PE,
+   EEH_NEXT_ERR_FENCED_PHB,
+   EEH_NEXT_ERR_DEAD_PHB,
+   EEH_NEXT_ERR_DEAD_IOC
+};
+
 /*
  * The struct is used to trace the registered EEH operation
  * callback functions. Actually, those operation callback
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 36bed5a..d3a132c 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -626,84 +626,90 @@ static void eeh_handle_special_event(void)
 {
struct eeh_pe *pe, *phb_pe;
struct pci_bus *bus;
-   struct pci_controller *hose, *tmp;
+   struct pci_controller *hose;
unsigned long flags;
-   int rc = 0;
+   int rc;
 
-   /*
-* The return value from next_error() has been classified as follows.
-* It might be good to enumerate them. However, next_error() is only
-* supported by PowerNV platform for now. So it would be fine to use
-* integer directly:
-*
-* 4 - Dead IOC   3 - Dead PHB
-* 2 - Fenced PHB 1 - Frozen PE
-* 0 - No error found
-*
-*/
-   rc = eeh_ops->next_error(&pe);
-   if (rc <= 0)
-   return;
 
-   switch (rc) {
-   case 4:
-   /* Mark all PHBs in dead state */
-   eeh_serialize_lock(&flags);
-   list_for_each_entry_safe(hose, tmp,
-   &hose_list, list_node) {
-   phb_pe = eeh_phb_pe_get(hose);
-   if (!phb_pe) continue;
-
-   eeh_pe_state_mark(phb_pe,
-   EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+   do {
+   rc = eeh_ops->next_error(&pe);
+
+   switch (rc) {
+   case EEH_NEXT_ERR_DEAD_IOC:
+   /* Mark all PHBs in dead state */
+   eeh_serialize_lock(&flags);
+
+   /* Purge all events */
+   eeh_remove_event(NULL);
+
+   list_for_each_entry(hose, &hose_list, list_node) {
+   phb_pe = eeh_phb_pe_get(hose);
+   if (!phb_pe) continue;
+
+   eeh_pe_state_mark(phb_pe,
+   EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+   }
+
+   eeh_serialize_unlock(flags);
+
+   break;
+   case EEH_NEXT_ERR_FROZEN_PE:
+   case EEH_NEXT_ERR_FENCED_PHB:
+   case EEH_NEXT_ERR_DEAD_PHB:
+   /* Mark the PE in fenced state */
+   eeh_serialize_lock(&flags);
+
+   /* Purge all events of the PHB */
+   eeh_remove_event(pe);
+
+   if (rc == EEH_NEXT_ERR_DEAD_PHB)
+   eeh_pe_state_mark(pe,
+   EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
+   else
+   eeh_pe_state_mark(pe,
+   EEH_PE_ISOLATED | EEH_PE_RECOVERING);
+
+   eeh_serialize_unlock(flags);
+
+   break;
+   case EEH_NEXT_ERR_NONE:
+   return;
+   default:
+   pr_warn("%s: Invalid value %d from next_error()\n",
+   __func__, rc);
+   return;
}
-   eeh_serialize_unlock(flags);
-
-   /* Purge all events */
-   eeh_remove_event(NULL);
-   break;
-   case

[PATCH v2 3/3] powerpc/eeh: Escalate error on non-existing PE

2014-01-14 Thread Gavin Shan
Sometimes, especially in sinario of loading another kernel with kdump,
we got EEH error on non-existing PE. That means the PEEV / PEST in
the corresponding PHB would be messy and we can't handle that case.
The patch escalates the error to fenced PHB so that the PHB could be
rested in order to revoer the errors on non-existing PEs.

Reported-by: Mahesh Salgaonkar 
Signed-off-by: Gavin Shan 
Tested-by: Mahesh Salgaonkar 
---
 arch/powerpc/platforms/powernv/eeh-ioda.c |   31 +++--
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c 
b/arch/powerpc/platforms/powernv/eeh-ioda.c
index e0b12d0..92aa1f9 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -862,11 +862,7 @@ static int ioda_eeh_get_pe(struct pci_controller *hose,
dev.phb = hose;
dev.pe_config_addr = pe_no;
dev_pe = eeh_pe_get(&dev);
-   if (!dev_pe) {
-   pr_warning("%s: Can't find PE for PHB#%x - PE#%x\n",
-  __func__, hose->global_number, pe_no);
-   return -EEXIST;
-   }
+   if (!dev_pe) return -EEXIST;
 
*pe = dev_pe;
return 0;
@@ -980,12 +976,27 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 
break;
case OPAL_EEH_PE_ERROR:
-   if (ioda_eeh_get_pe(hose, frozen_pe_no, pe))
-   break;
+   /*
+* If we can't find the corresponding PE, the
+* PEEV / PEST would be messy. So we force an
+* fenced PHB so that it can be recovered.
+*/
+   if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) {
+   if (!ioda_eeh_get_phb_pe(hose, pe)) {
+   pr_err("EEH: Escalated fenced PHB#%x "
+  "detected for PE#%llx\n",
+   hose->global_number,
+   frozen_pe_no);
+   ret = EEH_NEXT_ERR_FENCED_PHB;
+   } else {
+   ret = EEH_NEXT_ERR_NONE;
+   }
+   } else {
+   pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
+   (*pe)->addr, (*pe)->phb->global_number);
+   ret = EEH_NEXT_ERR_FROZEN_PE;
+   }
 
-   pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
-   (*pe)->addr, (*pe)->phb->global_number);
-   ret = EEH_NEXT_ERR_FROZEN_PE;
break;
default:
pr_warn("%s: Unexpected error type %d\n",
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[git pull] Please pull powerpc.git merge branch

2014-01-14 Thread Benjamin Herrenschmidt
Hi Linus !

So you make the call onto whether taking that one now or waiting for the
merge window. It's a bug fix for a crash in mremap that occurs on
powerpc with THP enabled.

The fix however requires a small change in the generic code. It moves a
condition into a helper we can override from the arch which is harmless,
but it *also* slightly changes the order of the set_pmd and the withdraw
& deposit, which should be fine according to Kirill (who wrote that
code) but I agree -rc8 is a bit late...

It was acked by Kirill and Andrew told me to just merge it via powerpc.

My original intend was to put it in powerpc-next and then shoot it to
stable, but it got a tad annoying (due to churn it needs to be applied
at least on rc4 or later while my next is at rc1 and clean that way), so
I put it in the merge branch.

>From there, you tell me if you want to take it now, if not, I'll send
you that branch along with my normal next one after you open the merge
window.

Cheers,
Ben.

The following changes since commit a6da83f98267bc8ee4e34aa899169991eb0ceb93:

  Merge branch 'merge' of 
git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc (2014-01-13 10:59:05 
+0700)

are available in the git repository at:


  git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git merge

for you to fetch changes up to b3084f4db3aeb991c507ca774337c7e7893ed04f:

  powerpc/thp: Fix crash on mremap (2014-01-15 15:46:38 +1100)


Aneesh Kumar K.V (1):
  powerpc/thp: Fix crash on mremap

 arch/powerpc/include/asm/pgtable-ppc64.h | 14 ++
 include/asm-generic/pgtable.h| 12 
 mm/huge_memory.c | 14 +-
 3 files changed, 31 insertions(+), 9 deletions(-)


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Pull request: scottwood/linux.git

2014-01-14 Thread Benjamin Herrenschmidt
On Fri, 2014-01-10 at 18:44 -0600, Scott Wood wrote:
> Highlights include 32-bit booke relocatable support, e6500 hardware
> tablewalk support, various e500 SPE fixes, some new/revived boards, and
> e6500 deeper idle and altivec powerdown modes.

This breaks WSP (A2) build with 64K pages:

/home/benh/linux-powerpc-test/arch/powerpc/mm/tlb_low_64e.S: Assembler messages:
/home/benh/linux-powerpc-test/arch/powerpc/mm/tlb_low_64e.S:334: Error: can't 
resolve `L0^A' {*ABS* section} - `PUD_SHIFT' {*UND* section}
/home/benh/linux-powerpc-test/arch/powerpc/mm/tlb_low_64e.S:334: Error: 
expression too complex
/home/benh/linux-powerpc-test/arch/powerpc/mm/tlb_low_64e.S:334: Error: operand 
out of range (67 is not between 0 and 63)
make[2]: *** [arch/powerpc/mm/tlb_low_64e.o] Error 1

I'm merging anyway because nobody uses WSP anymore (I'm keen to remove it by 
3.15 or so)
but in the meantime you may want to fix it (probably just ifdef the PUD level 
walk on
64k pages, look at what I do elsewhere).

Cheers,
Ben.

> The following changes since commit dece8ada993e1764a115bdff0f1eaa5fc8dc:
> 
>   Merge branch 'merge' into next (2013-12-30 15:19:31 +1100)
> 
> are available in the git repository at:
> 
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git next
> 
> for you to fetch changes up to d064f30e5063ec54ab50af08c64fb5055e759bfd:
> 
>   powerpc/fsl_pci: add versionless pci compatible (2014-01-10 17:38:56 -0600)
> 
> 
> Anton Blanchard (1):
>   drivers/tty: ehv_bytechan fails to build as a module
> 
> Christian Engelmayer (1):
>   powerpc/sysdev: Fix a pci section mismatch for Book E
> 
> Diana Craciun (1):
>   powerpc: Replaced tlbilx with tlbwe in the initialization code
> 
> Joseph Myers (6):
>   powerpc: fix exception clearing in e500 SPE float emulation
>   powerpc: fix e500 SPE float rounding inexactness detection
>   math-emu: fix floating-point to integer unsigned saturation
>   math-emu: fix floating-point to integer overflow detection
>   powerpc: fix e500 SPE float to integer and fixed-point conversions
>   powerpc: fix e500 SPE float SIGFPE generation
> 
> Kevin Hao (11):
>   powerpc/85xx: don't init the mpic ipi for the SoC which has doorbell 
> support
>   powerpc/fsl_booke: protect the access to MAS7
>   powerpc/fsl_booke: introduce get_phys_addr function
>   powerpc: introduce macro LOAD_REG_ADDR_PIC
>   powerpc: enable the relocatable support for the fsl booke 32bit kernel
>   powerpc/fsl_booke: set the tlb entry for the kernel address in AS1
>   powerpc: introduce early_get_first_memblock_info
>   powerpc/fsl_booke: introduce map_mem_in_cams_addr
>   powerpc/fsl_booke: make sure PAGE_OFFSET map to memstart_addr for 
> relocatable kernel
>   powerpc/fsl_booke: smp support for booting a relocatable kernel above 
> 64M
>   powerpc/fsl_booke: enable the relocatable for the kdump kernel
> 
> LEROY Christophe (1):
>   powerpc 8xx: defconfig: slice by 4 is more efficient than the default 
> slice by 8 on Powerpc 8xx.
> 
> Lijun Pan (1):
>   powerpc/85xx: Merge 85xx/p1023_defconfig into mpc85xx_smp and mpc85xx
> 
> Mihai Caraman (1):
>   powerpc/booke64: Add LRAT error exception handler
> 
> Paul Gortmaker (1):
>   powerpc: fix 8xx and 6xx final link failures
> 
> Scott Wood (5):
>   powerpc/fsl-booke: Use SPRN_SPRGn rather than mfsprg/mtsprg
>   powerpc: add barrier after writing kernel PTE
>   powerpc/e6500: TLB miss handler with hardware tablewalk support
>   powerpc/fsl-book3e-64: Use paca for hugetlb TLB1 entry selection
>   powerpc/booke-64: fix tlbsrx. path in bolted tlb handler
> 
> Shaohui Xie (1):
>   powerpc/85xx: handle the eLBC error interrupt if it exists in dts
> 
> Shengzhou Liu (2):
>   powerpc/85xx/dts: add third elo3 dma component
>   powerpc/fsl_pci: add versionless pci compatible
> 
> Stephen Chivers (1):
>   powerpc/embedded6xx: Add support for Motorola/Emerson MVME5100
> 
> Wang Dongsheng (9):
>   powerpc/fsl: add E6500 PVR and SPRN_PWRMGTCR0 define
>   powerpc/85xx: add hardware automatically enter altivec idle state
>   powerpc/85xx: add hardware automatically enter pw20 state
>   powerpc/85xx: add sysfs for pw20 state and altivec idle
>   powerpc/p1022ds: fix rtc compatible string
>   powerpc/p1022ds: add a interrupt for rtc node
>   powerpc/mpic_timer: fix the time is not accurate caused by GTCRR toggle 
> bit
>   powerpc/mpic_timer: fix convert ticks to time subtraction overflow
>   powerpc/dts: fix lbc lack of error interrupt
> 
> Xie Xiaobo (2):
>   powerpc/85xx: Add QE common init function
>   powerpc/85xx: Add TWR-P1025 board support
> 
> Zhao Qiang (3):
>   powerpc/p1010rdb:update dts to adapt to both old and new p1010rdb
>   powerpc/p1010rdb:update mtd of nand to adapt to both old and new 
> p1010rdb
>

Re: [PATCH] powerpc: dma-mapping: Return dma_direct_ops variable when dev == NULL

2014-01-14 Thread Benjamin Herrenschmidt
On Wed, 2014-01-15 at 11:36 +0800, Chunhe Lan wrote:

> >
> >> Signed-off-by: Chunhe Lan 
> >> Cc: Benjamin Herrenschmidt 
> >> Tested-by: Chunhe Lan 
> >> ---
> >>   arch/powerpc/include/asm/dma-mapping.h |   13 +
> >>   1 files changed, 9 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/arch/powerpc/include/asm/dma-mapping.h 
> >> b/arch/powerpc/include/asm/dma-mapping.h
> >> index e27e9ad..b8c10de 100644
> >> --- a/arch/powerpc/include/asm/dma-mapping.h
> >> +++ b/arch/powerpc/include/asm/dma-mapping.h
> >> @@ -84,10 +84,15 @@ static inline struct dma_map_ops *get_dma_ops(struct 
> >> device *dev)
>  I see the get_dma_ops function in 
> arch/*x86*/include/asm/dma-mapping.h as the following:
> 
>   32 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
>   33 {
>   34 #ifndef CONFIG_X86_DEV_DMA_OPS
>   35 return dma_ops;
>   36 #else
>   37 if (unlikely(!dev) || !dev->archdata.dma_ops)
>   38 return dma_ops;
>   39 else
>   40 return dev->archdata.dma_ops;
>   41 #endif
>   42 }
> 
>  And also  see the get_dma_ops function in  
> arch/*arm*/include/asm/dma-mapping.h as the following:
> 
>   18 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
>   19 {
>   20 if (dev && dev->archdata.dma_ops)
>   21 return dev->archdata.dma_ops;
>   22 return &arm_dma_ops;
>   23 }
> 
>Why not powerpc use this method to process dev == NULL ?

Because we don't :-) We used to and removed this. Due to how our HW
works it might not be correct. When an iommu is enabled for example
you simply cannot use the direct ops.

So the right fix is to properly establish the iommu for the VFs like
we do for the PFs.

> Thanks,
> -Chunhe
> 
> >> * only ISA DMA device we support is the floppy and we have a hack
> >> * in the floppy driver directly to get a device for us.
> >> */
> >> -  if (unlikely(dev == NULL))
> >> -  return NULL;
> >> -
> >> -  return dev->archdata.dma_ops;
> >> +  if (dev && dev->archdata.dma_ops)
> >> +  return dev->archdata.dma_ops;
> >> +  /*
> >> +   * In some cases (for example, use the Intel(R) 10 Gigabit PCI
> >> +   * expression Virtual Function Network Driver -- ixgbevf.ko),
> >> +   * their value of dev is the NULL. If return NULL, the driver is
> >> +   * aborting. So return dma_direct_ops variable when dev == NULL.
> >> +   */
> >> +  return &dma_direct_ops;
> >>   }
> >>   
> >>   static inline void set_dma_ops(struct device *dev, struct dma_map_ops 
> >> *ops)
> >
> >
> >
> 
> 


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: dma-mapping: Return dma_direct_ops variable when dev == NULL

2014-01-14 Thread Chunhe Lan

On 01/14/2014 06:14 PM, Benjamin Herrenschmidt wrote:

On Tue, 2014-01-14 at 17:44 +0800, Chunhe Lan wrote:

Without this patch, kind of below error will be dumped if
'insmod ixgbevf.ko' is executed:

 ixgbevf: Intel(R) 10 Gigabit PCI Express Virtual Function
  Network Driver - version 2.7.12-k
 ixgbevf: Copyright (c) 2009 - 2012 Intel Corporation.
 ixgbevf :01:10.0: enabling device ( -> 0002)
 ixgbevf :01:10.0: No usable DMA configuration, aborting
 ixgbevf: probe of :01:10.0 failed with error -5
 ..
 ..

That's not right. The DMA ops must be set properly for the VF somewhere
in the arch code instead. When creating VFs, is there a hook allowing
the arch to fix things up ?

(Also adding linux-pci on CC)

Ben.


Signed-off-by: Chunhe Lan 
Cc: Benjamin Herrenschmidt 
Tested-by: Chunhe Lan 
---
  arch/powerpc/include/asm/dma-mapping.h |   13 +
  1 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/dma-mapping.h 
b/arch/powerpc/include/asm/dma-mapping.h
index e27e9ad..b8c10de 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -84,10 +84,15 @@ static inline struct dma_map_ops *get_dma_ops(struct device 
*dev)
I see the get_dma_ops function in 
arch/*x86*/include/asm/dma-mapping.h as the following:


 32 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 33 {
 34 #ifndef CONFIG_X86_DEV_DMA_OPS
 35 return dma_ops;
 36 #else
 37 if (unlikely(!dev) || !dev->archdata.dma_ops)
 38 return dma_ops;
 39 else
 40 return dev->archdata.dma_ops;
 41 #endif
 42 }

And also  see the get_dma_ops function in 
arch/*arm*/include/asm/dma-mapping.h as the following:


 18 static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 19 {
 20 if (dev && dev->archdata.dma_ops)
 21 return dev->archdata.dma_ops;
 22 return &arm_dma_ops;
 23 }

  Why not powerpc use this method to process dev == NULL ?

Thanks,
-Chunhe


 * only ISA DMA device we support is the floppy and we have a hack
 * in the floppy driver directly to get a device for us.
 */
-   if (unlikely(dev == NULL))
-   return NULL;
-
-   return dev->archdata.dma_ops;
+   if (dev && dev->archdata.dma_ops)
+   return dev->archdata.dma_ops;
+   /*
+* In some cases (for example, use the Intel(R) 10 Gigabit PCI
+* expression Virtual Function Network Driver -- ixgbevf.ko),
+* their value of dev is the NULL. If return NULL, the driver is
+* aborting. So return dma_direct_ops variable when dev == NULL.
+*/
+   return &dma_direct_ops;
  }
  
  static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)








___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH 2/3] powerpc/85xx: Provide two functions to save/restore the core registers

2014-01-14 Thread dongsheng.w...@freescale.com


> -Original Message-
> From: Wood Scott-B07421
> Sent: Wednesday, January 15, 2014 7:51 AM
> To: Wang Dongsheng-B40534
> Cc: b...@kernel.crashing.org; Zhao Chenhui-B35336; an...@enomsg.org; linuxppc-
> d...@lists.ozlabs.org
> Subject: Re: [PATCH 2/3] powerpc/85xx: Provide two functions to save/restore 
> the
> core registers
> 
> On Tue, 2014-01-14 at 15:59 +0800, Dongsheng Wang wrote:
> > From: Wang Dongsheng 
> >
> > Add fsl_cpu_state_save/fsl_cpu_state_restore functions, used for deep
> > sleep and hibernation to save/restore core registers. We abstract out
> > save/restore code for use in various modules, to make them don't need
> > to maintain.
> >
> > Currently supported processors type are E6500, E5500, E500MC, E500v2
> > and E500v1.
> >
> > Signed-off-by: Wang Dongsheng 
> 
> What is there that is specfic to a particular core type that can't be handled
> from C code?
> 

In the context of the calling, maybe not in C environment.(Deep sleep without
C environment when calling those interfaces)

> > +   /*
> > +* Need to save float-point registers if MSR[FP] = 1.
> > +*/
> > +   mfmsr   r12
> > +   andi.   r12, r12, MSR_FP
> > +   beq 1f
> > +   do_sr_fpr_regs(save)
> 
> C code should have already ensured that MSR[FP] is not 1 (and thus the FP
> context has been saved).
> 

Yes, right. But I mean if the FP still use in core save flow, we need to save 
it.
In this process, i don't care what other code do, we need to focus on not losing
valuable data.

> > +/*
> > + * r3 = the virtual address of buffer
> > + * r4 = suspend type, 0-BASE_SAVE, 1-ALL_SAVE
> 
> #define these magic numbers, and define what is meant by "base save"
> versus "all save".

Ok, thanks.

-Dongsheng

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH 3/3] powerpc/fsl: Use the new interface to save or restore registers

2014-01-14 Thread dongsheng.w...@freescale.com


> -Original Message-
> From: Wood Scott-B07421
> Sent: Wednesday, January 15, 2014 7:30 AM
> To: Wang Dongsheng-B40534
> Cc: b...@kernel.crashing.org; Zhao Chenhui-B35336; an...@enomsg.org; linuxppc-
> d...@lists.ozlabs.org
> Subject: Re: [PATCH 3/3] powerpc/fsl: Use the new interface to save or restore
> registers
> 
> On Tue, 2014-01-14 at 15:59 +0800, Dongsheng Wang wrote:
> > From: Wang Dongsheng 
> >
> > Use fsl_cpu_state_save/fsl_cpu_state_restore to save/restore registers.
> > Use the functions to save/restore registers, so we don't need to
> > maintain the code.
> >
> > Signed-off-by: Wang Dongsheng 
> 
> Is there any functional change with this patchset (e.g. suspend
> supported on chips where it wasn't before), or is it just cleanup?  A
> cover letter would be useful to describe the purpose of the overall
> patchset when it isn't obvious.
> 

Yes, just cleanup..

> > +
> > +   /* Restore base register */
> > +   li  r4, 0
> > +   bl  fsl_cpu_state_restore
> 
> Why are you calling anything with "fsl" in the name from code that is
> supposed to be for all booke?
> 
E200, E300 not support.
Support E500, E500v2, E500MC, E5500, E6500.

Do you have any suggestions about this?

Thanks,
-Dongsheng

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v6,2/5] powerpc/book3e: store crit/mc/dbg exception thread info

2014-01-14 Thread Scott Wood
On Wed, Oct 23, 2013 at 05:31:22PM +0800, Tiejun Chen wrote:
> We need to store thread info to these exception thread info like something
> we already did for PPC32.
> 
> Signed-off-by: Tiejun Chen 
> 
> ---
> arch/powerpc/kernel/exceptions-64e.S |   22 +++---
>  1 file changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/exceptions-64e.S 
> b/arch/powerpc/kernel/exceptions-64e.S
> index 68d74b4..a55cf62 100644
> --- a/arch/powerpc/kernel/exceptions-64e.S
> +++ b/arch/powerpc/kernel/exceptions-64e.S
> @@ -36,6 +36,19 @@
>   */
>  #define  SPECIAL_EXC_FRAME_SIZE  INT_FRAME_SIZE
>  
> +/* Now we only store something to exception thread info */

Now as opposed to when?  Only as opposed to what else?

> +#define  EXC_LEVEL_EXCEPTION_PROLOG(type)
> \

I'd prefer .macro over #define.

> + ld  r14,PACAKSAVE(r13); \
> + CURRENT_THREAD_INFO(r14, r14);  \
> + CURRENT_THREAD_INFO(r15, r1);   \
> + ld  r10,TI_FLAGS(r14);  \
> + std r10,TI_FLAGS(r15);  \
> + ld  r10,TI_PREEMPT(r14);\
> + std r10,TI_PREEMPT(r15);\
> + ld  r10,TI_TASK(r14);   \
> + std r10,TI_TASK(r15);

This is a start, but we'll also need to save some more context to allow
TLB misses from within the exception (e.g. if a machine check handler or
GDB stub writes to a serial port, and the I/O registers aren't in the
TLB).  At a minimum I think we need to save SRR0, SRR1,
SPRN_SPRG_GEN_SCRATCH, SPRN_SPRG_TLB_SCRATCH, and the MAS registers. 
We'll also need to make the bolted TLB miss handlers capable of pointing
to different extables (though they won't need to auto-advance as the
original TLB miss handlers do -- we would advance SPRN_SPRG_TLB_EXFRAME
from this code), and the original TLB miss handlers will now need to
support more than 3 levels of nesting.

For the e6500 tablewalk TLB miss handler, we'll need to do something
special if we interrupt it when the lock is held, to revoke the lock and
return to code that retries.

Is there anything else I'm missing?

-Scott
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/3] powerpc/85xx: Provide two functions to save/restore the core registers

2014-01-14 Thread Scott Wood
On Tue, 2014-01-14 at 15:59 +0800, Dongsheng Wang wrote:
> From: Wang Dongsheng 
> 
> Add fsl_cpu_state_save/fsl_cpu_state_restore functions, used for deep
> sleep and hibernation to save/restore core registers. We abstract out
> save/restore code for use in various modules, to make them don't need
> to maintain.
> 
> Currently supported processors type are E6500, E5500, E500MC, E500v2 and
> E500v1.
> 
> Signed-off-by: Wang Dongsheng 

What is there that is specfic to a particular core type that can't be
handled from C code?

> + /*
> +  * Need to save float-point registers if MSR[FP] = 1.
> +  */
> + mfmsr   r12
> + andi.   r12, r12, MSR_FP
> + beq 1f
> + do_sr_fpr_regs(save)

C code should have already ensured that MSR[FP] is not 1 (and thus the
FP context has been saved).

> +/*
> + * r3 = the virtual address of buffer
> + * r4 = suspend type, 0-BASE_SAVE, 1-ALL_SAVE

#define these magic numbers, and define what is meant by "base save"
versus "all save".

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 3/3] powerpc/fsl: Use the new interface to save or restore registers

2014-01-14 Thread Scott Wood
On Tue, 2014-01-14 at 15:59 +0800, Dongsheng Wang wrote:
> From: Wang Dongsheng 
> 
> Use fsl_cpu_state_save/fsl_cpu_state_restore to save/restore registers.
> Use the functions to save/restore registers, so we don't need to
> maintain the code.
> 
> Signed-off-by: Wang Dongsheng 

Is there any functional change with this patchset (e.g. suspend
supported on chips where it wasn't before), or is it just cleanup?  A
cover letter would be useful to describe the purpose of the overall
patchset when it isn't obvious.

> 
> diff --git a/arch/powerpc/kernel/swsusp_booke.S 
> b/arch/powerpc/kernel/swsusp_booke.S
> index 553c140..b5992db 100644
> --- a/arch/powerpc/kernel/swsusp_booke.S
> +++ b/arch/powerpc/kernel/swsusp_booke.S
> @@ -4,92 +4,28 @@
>   * Copyright (c) 2009-2010 MontaVista Software, LLC.
>   */
>  
> -#include 
> -#include 
>  #include 
> -#include 
> -#include 
>  #include 
>  #include 
>  #include 
> -
> -/*
> - * Structure for storing CPU registers on the save area.
> - */
> -#define SL_SP0
> -#define SL_PC4
> -#define SL_MSR   8
> -#define SL_TCR   0xc
> -#define SL_SPRG0 0x10
> -#define SL_SPRG1 0x14
> -#define SL_SPRG2 0x18
> -#define SL_SPRG3 0x1c
> -#define SL_SPRG4 0x20
> -#define SL_SPRG5 0x24
> -#define SL_SPRG6 0x28
> -#define SL_SPRG7 0x2c
> -#define SL_TBU   0x30
> -#define SL_TBL   0x34
> -#define SL_R20x38
> -#define SL_CR0x3c
> -#define SL_LR0x40
> -#define SL_R12   0x44/* r12 to r31 */
> -#define SL_SIZE  (SL_R12 + 80)
> -
> - .section .data
> - .align  5
> -
> -_GLOBAL(swsusp_save_area)
> - .space  SL_SIZE
> -
> +#include 
>  
>   .section .text
>   .align  5
>  
>  _GLOBAL(swsusp_arch_suspend)
> - lis r11,swsusp_save_area@h
> - ori r11,r11,swsusp_save_area@l
> -
> - mflrr0
> - stw r0,SL_LR(r11)
> - mfcrr0
> - stw r0,SL_CR(r11)
> - stw r1,SL_SP(r11)
> - stw r2,SL_R2(r11)
> - stmwr12,SL_R12(r11)
> -
> - /* Save MSR & TCR */
> - mfmsr   r4
> - stw r4,SL_MSR(r11)
> - mfspr   r4,SPRN_TCR
> - stw r4,SL_TCR(r11)
> -
> - /* Get a stable timebase and save it */
> -1:   mfspr   r4,SPRN_TBRU
> - stw r4,SL_TBU(r11)
> - mfspr   r5,SPRN_TBRL
> - stw r5,SL_TBL(r11)
> - mfspr   r3,SPRN_TBRU
> - cmpwr3,r4
> - bne 1b
> + mflrr15
> + lis r3, core_registers_save_area@h
> + ori r3, r3, core_registers_save_area@l
> +
> + /* Save base register */
> + li  r4, 0
> + bl  fsl_cpu_state_save
>  
> - /* Save SPRGs */
> - mfspr   r4,SPRN_SPRG0
> - stw r4,SL_SPRG0(r11)
> - mfspr   r4,SPRN_SPRG1
> - stw r4,SL_SPRG1(r11)
> - mfspr   r4,SPRN_SPRG2
> - stw r4,SL_SPRG2(r11)
> - mfspr   r4,SPRN_SPRG3
> - stw r4,SL_SPRG3(r11)
> - mfspr   r4,SPRN_SPRG4
> - stw r4,SL_SPRG4(r11)
> - mfspr   r4,SPRN_SPRG5
> - stw r4,SL_SPRG5(r11)
> - mfspr   r4,SPRN_SPRG6
> - stw r4,SL_SPRG6(r11)
> - mfspr   r4,SPRN_SPRG7
> - stw r4,SL_SPRG7(r11)
> + /* Save LR */
> + lis r3, core_registers_save_area@h
> + ori r3, r3, core_registers_save_area@l
> + stw r15, SR_LR(r3)
>  
>   /* Call the low level suspend stuff (we should probably have made
>* a stackframe...
> @@ -97,11 +33,12 @@ _GLOBAL(swsusp_arch_suspend)
>   bl  swsusp_save
>  
>   /* Restore LR from the save area */
> - lis r11,swsusp_save_area@h
> - ori r11,r11,swsusp_save_area@l
> - lwz r0,SL_LR(r11)
> - mtlrr0
> + lis r3, core_registers_save_area@h
> + ori r3, r3, core_registers_save_area@l
> + lwz r15, SR_LR(r3)
> + mtlrr15
>  
> + li  r3, 0
>   blr
>  
>  _GLOBAL(swsusp_arch_resume)
> @@ -138,9 +75,6 @@ _GLOBAL(swsusp_arch_resume)
>   bl flush_dcache_L1
>   bl flush_instruction_cache
>  
> - lis r11,swsusp_save_area@h
> - ori r11,r11,swsusp_save_area@l
> -
>   /*
>* Mappings from virtual addresses to physical addresses may be
>* different than they were prior to restoring hibernation state. 
> @@ -149,53 +83,12 @@ _GLOBAL(swsusp_arch_resume)
>*/
>   bl  _tlbil_all
>  
> - lwz r4,SL_SPRG0(r11)
> - mtspr   SPRN_SPRG0,r4
> - lwz r4,SL_SPRG1(r11)
> - mtspr   SPRN_SPRG1,r4
> - lwz r4,SL_SPRG2(r11)
> - mtspr   SPRN_SPRG2,r4
> - lwz r4,SL_SPRG3(r11)
> - mtspr   SPRN_SPRG3,r4
> - lwz r4,SL_SPRG4(r11)
> - mtspr   SPRN_SPRG4,r4
> - lwz r4,SL_SPRG5(r11)
> - mtspr   SPRN_SPRG5,r4
> - lwz r4,SL_SPRG6(r11)
> - mtspr   SPRN_SPRG6,r4
> - lwz r4,SL_SPRG7(r11)
> - mtspr   SPRN_SPRG7,r4
> -
> - /* restore the MSR */
> - lwz 

Re: [PATCH 3/4] powerpc: use subsys_initcall for Freescale Local Bus

2014-01-14 Thread Scott Wood
On Mon, 2014-01-13 at 11:21 -0500, Paul Gortmaker wrote:
> The FSL_SOC option is bool, and hence this code is either
> present or absent.  It will never be modular, so using
> module_init as an alias for __initcall is rather misleading.
> 
> Fix this up now, so that we can relocate module_init from
> init.h into module.h in the future.  If we don't do this, we'd
> have to add module.h to obviously non-modular code, and that
> would be a worse thing.
> 
> Note that direct use of __initcall is discouraged, vs. one
> of the priority categorized subgroups.  As __initcall gets
> mapped onto device_initcall, our use of subsys_initcall (which
> makes sense for bus code) will thus change this registration
> from level 6-device to level 4-subsys (i.e. slightly earlier).
> However no observable impact of that small difference has
> been observed during testing, or is expected.
> 
> Signed-off-by: Paul Gortmaker 
> ---
>  arch/powerpc/sysdev/fsl_lbc.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c
> index 6bc5a546d49f..9f00e5f84abe 100644
> --- a/arch/powerpc/sysdev/fsl_lbc.c
> +++ b/arch/powerpc/sysdev/fsl_lbc.c
> @@ -388,4 +388,4 @@ static int __init fsl_lbc_init(void)
>  {
>   return platform_driver_register(&fsl_lbc_ctrl_driver);
>  }
> -module_init(fsl_lbc_init);
> +subsys_initcall(fsl_lbc_init);

Acked-by: Scott Wood 

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v2] Move precessing of MCE queued event out from syscall exit path.

2014-01-14 Thread Benjamin Herrenschmidt
On Tue, 2014-01-14 at 11:48 -0800, Hugh Dickins wrote:
> On Tue, 14 Jan 2014, Mahesh J Salgaonkar wrote:
> > From: Mahesh Salgaonkar 
> > 
> > Huge Dickins reported an issue that b5ff4211a829
> > "powerpc/book3s: Queue up and process delayed MCE events" breaks the
> > PowerMac G5 boot. This patch fixes it by moving the mce even processing
> > away from syscall exit, which was wrong to do that in first place, and
> > using irq work framework to delay processing of mce event.
> > 
> > Reported-by: Hugh Dickins  > Signed-off-by: Mahesh Salgaonkar 
> 
> This version also boots and runs fine for me on the G5
> (but of course, I'm probably not testing delayed MCE events at all).

Thanks Hugh !

Cheers,
Ben.

> Hugh
> 
> > ---
> >  arch/powerpc/include/asm/mce.h |1 -
> >  arch/powerpc/kernel/entry_64.S |5 -
> >  arch/powerpc/kernel/mce.c  |   13 ++---
> >  3 files changed, 10 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> > index 2257d1e..f97d8cb 100644
> > --- a/arch/powerpc/include/asm/mce.h
> > +++ b/arch/powerpc/include/asm/mce.h
> > @@ -192,7 +192,6 @@ extern void save_mce_event(struct pt_regs *regs, long 
> > handled,
> >  extern int get_mce_event(struct machine_check_event *mce, bool release);
> >  extern void release_mce_event(void);
> >  extern void machine_check_queue_event(void);
> > -extern void machine_check_process_queued_event(void);
> >  extern void machine_check_print_event_info(struct machine_check_event 
> > *evt);
> >  extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
> >  
> > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> > index 770d6d6..bbfb029 100644
> > --- a/arch/powerpc/kernel/entry_64.S
> > +++ b/arch/powerpc/kernel/entry_64.S
> > @@ -184,11 +184,6 @@ syscall_exit:
> > bl  .do_show_syscall_exit
> > ld  r3,RESULT(r1)
> >  #endif
> > -#ifdef CONFIG_PPC_BOOK3S_64
> > -BEGIN_FTR_SECTION
> > -   bl  .machine_check_process_queued_event
> > -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> > -#endif
> > CURRENT_THREAD_INFO(r12, r1)
> >  
> > ld  r8,_MSR(r1)
> > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> > index d6edf2b..a7fd4cb 100644
> > --- a/arch/powerpc/kernel/mce.c
> > +++ b/arch/powerpc/kernel/mce.c
> > @@ -26,6 +26,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  
> >  static DEFINE_PER_CPU(int, mce_nest_count);
> > @@ -35,6 +36,11 @@ static DEFINE_PER_CPU(struct 
> > machine_check_event[MAX_MC_EVT], mce_event);
> >  static DEFINE_PER_CPU(int, mce_queue_count);
> >  static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], 
> > mce_event_queue);
> >  
> > +static void machine_check_process_queued_event(struct irq_work *work);
> > +struct irq_work mce_event_process_work = {
> > +.func = machine_check_process_queued_event,
> > +};
> > +
> >  static void mce_set_error_info(struct machine_check_event *mce,
> >struct mce_error_info *mce_err)
> >  {
> > @@ -185,17 +191,19 @@ void machine_check_queue_event(void)
> > return;
> > }
> > __get_cpu_var(mce_event_queue[index]) = evt;
> > +
> > +   /* Queue irq work to process this event later. */
> > +   irq_work_queue(&mce_event_process_work);
> >  }
> >  
> >  /*
> >   * process pending MCE event from the mce event queue. This function will 
> > be
> >   * called during syscall exit.
> >   */
> > -void machine_check_process_queued_event(void)
> > +static void machine_check_process_queued_event(struct irq_work *work)
> >  {
> > int index;
> >  
> > -   preempt_disable();
> > /*
> >  * For now just print it to console.
> >  * TODO: log this error event to FSP or nvram.
> > @@ -206,7 +214,6 @@ void machine_check_process_queued_event(void)
> > &__get_cpu_var(mce_event_queue[index]));
> > __get_cpu_var(mce_queue_count)--;
> > }
> > -   preempt_enable();
> >  }
> >  
> >  void machine_check_print_event_info(struct machine_check_event *evt)
> > 
> > 


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Disable sleep states on P7+

2014-01-14 Thread Steven Pratt
On 01/14/2014 10:10 AM, Preeti U Murthy wrote:
> Hi Steven,
>
> On 01/14/2014 08:06 PM, Steven Pratt wrote:
>> I am looking for info on when and how we are able to disable power saving 
>> features of current (P7, P7+) chips in order to reduce latency. This is 
>> often done in latency sensitive applications when power consumption is not 
>> an issue. On Intel boxes we can disable P-state frequency changes as well as 
>> disabling C-State or sleep state changes. In fact we can control how deep a 
>> sleep the processor can go into.  I know we have control Dynamic Processor 
>> Scaling and Idle Power Savings, but what states do these really affect?  Can 
>> I really disable Nap mode of a processor? If so how?  Can I disable even the 
>> lightest winkle mode?  Looking for current information (read RHEL 6 and 
>> SLES11), future changes are interesting.
>>
>> Steve
> I can answer this question with respect to cpuidle on PowerNV platforms.
>
> 1. In order to disable cpuidle states management altogether, one can
> pass the powersave=off kernel cmd line parameter during boot up of the
> kernel. This will ensure that each time a CPU has nothing to do, it can
> enter low thread priority which could lower power consumption to some
> extent but is not expected to hit latency of applications noticeably.
>
> 2. In order to exactly control the cpuidle states into which idle CPUs
> can enter into during runtime, one can make use of the sysfs files under:
> /sys/devices/system/cpu/cpux/cpuidle/statex/disable option to
> selectively disable any state.
>
> However if one is using the menu cpuidle governor, disabling an idle
> state does not disable the idle states which are deeper than it. They
> continue to remain active unless they are specifically disabled. What
> this means is that one cannot control the depth of the idle states
> available for a CPU, although we can control the exact idle states
> available for a processor.
>
> But if the ladder governor is used, one can control the depth of the
> idle states that a CPU can enter into. The governor can be chosen by
> echoing either menu/ladder to
> /sys/devices/system/cpu/cpuidle/current_governor_ro. The cpuidle
> governor takes decisions about the idle state for a cpu to enter into
> depending on its idle history. The popular governor used by most archs
> is the menu governor.
>
> Hence nap/sleep/winkle any of these states can be disabled. The code
> which enables the above mentioned functionalities on powernv is yet to
> go upstream although the same is already upstream and can be used for
> the pseries platform to disable/enable the idle states on it.
>
> Today on powernv the default idle state nap is entered into all the
> time. One can disable it by echoing 0 to powersave_nap under
> /proc/sys/kernel/powersave_nap, in which case the cpu enters low thread

Thanks, that is great information going forward, now I just need info on what 
works today in PowerVM.

Steve

> priority.
>
> Thanks
>
> Regards
> Preeti U Murthy
>
>> ___
>> Linuxppc-dev mailing list
>> Linuxppc-dev@lists.ozlabs.org
>> https://lists.ozlabs.org/listinfo/linuxppc-dev
>>

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Disable sleep states on P7+

2014-01-14 Thread Preeti U Murthy
Hi Steven,

On 01/14/2014 08:06 PM, Steven Pratt wrote:
> I am looking for info on when and how we are able to disable power saving 
> features of current (P7, P7+) chips in order to reduce latency. This is often 
> done in latency sensitive applications when power consumption is not an 
> issue. On Intel boxes we can disable P-state frequency changes as well as 
> disabling C-State or sleep state changes. In fact we can control how deep a 
> sleep the processor can go into.  I know we have control Dynamic Processor 
> Scaling and Idle Power Savings, but what states do these really affect?  Can 
> I really disable Nap mode of a processor? If so how?  Can I disable even the 
> lightest winkle mode?  Looking for current information (read RHEL 6 and 
> SLES11), future changes are interesting.
> 
> Steve

I can answer this question with respect to cpuidle on PowerNV platforms.

1. In order to disable cpuidle states management altogether, one can
pass the powersave=off kernel cmd line parameter during boot up of the
kernel. This will ensure that each time a CPU has nothing to do, it can
enter low thread priority which could lower power consumption to some
extent but is not expected to hit latency of applications noticeably.

2. In order to exactly control the cpuidle states into which idle CPUs
can enter into during runtime, one can make use of the sysfs files under:
/sys/devices/system/cpu/cpux/cpuidle/statex/disable option to
selectively disable any state.

However if one is using the menu cpuidle governor, disabling an idle
state does not disable the idle states which are deeper than it. They
continue to remain active unless they are specifically disabled. What
this means is that one cannot control the depth of the idle states
available for a CPU, although we can control the exact idle states
available for a processor.

But if the ladder governor is used, one can control the depth of the
idle states that a CPU can enter into. The governor can be chosen by
echoing either menu/ladder to
/sys/devices/system/cpu/cpuidle/current_governor_ro. The cpuidle
governor takes decisions about the idle state for a cpu to enter into
depending on its idle history. The popular governor used by most archs
is the menu governor.

Hence nap/sleep/winkle any of these states can be disabled. The code
which enables the above mentioned functionalities on powernv is yet to
go upstream although the same is already upstream and can be used for
the pseries platform to disable/enable the idle states on it.

Today on powernv the default idle state nap is entered into all the
time. One can disable it by echoing 0 to powersave_nap under
/proc/sys/kernel/powersave_nap, in which case the cpu enters low thread
priority.

Thanks

Regards
Preeti U Murthy

> 
> ___
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Disable sleep states on P7+

2014-01-14 Thread Steven Pratt
I am looking for info on when and how we are able to disable power saving 
features of current (P7, P7+) chips in order to reduce latency. This is often 
done in latency sensitive applications when power consumption is not an issue. 
On Intel boxes we can disable P-state frequency changes as well as disabling 
C-State or sleep state changes. In fact we can control how deep a sleep the 
processor can go into.  I know we have control Dynamic Processor Scaling and 
Idle Power Savings, but what states do these really affect?  Can I really 
disable Nap mode of a processor? If so how?  Can I disable even the lightest 
winkle mode?  Looking for current information (read RHEL 6 and SLES11), future 
changes are interesting.

Steve

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] cpuidle/menu: Fail cpuidle_idle_call() if no idle state is acceptable

2014-01-14 Thread Preeti U Murthy
On 01/14/2014 01:07 PM, Srivatsa S. Bhat wrote:
> On 01/14/2014 12:30 PM, Srivatsa S. Bhat wrote:
>> On 01/14/2014 11:35 AM, Preeti U Murthy wrote:
>>> On PowerPC, in a particular test scenario, all the cpu idle states were 
>>> disabled.
>>> Inspite of this it was observed that the idle state count of the shallowest
>>> idle state, snooze, was increasing.
>>>
>>> This is because the governor returns the idle state index as 0 even in
>>> scenarios when no idle state can be chosen. These scenarios could be when 
>>> the
>>> latency requirement is 0 or as mentioned above when the user wants to 
>>> disable
>>> certain cpu idle states at runtime. In the latter case, its possible that no
>>> cpu idle state is valid because the suitable states were disabled
>>> and the rest did not match the menu governor criteria to be chosen as the
>>> next idle state.
>>>
>>> This patch adds the code to indicate that a valid cpu idle state could not 
>>> be
>>> chosen by the menu governor and reports back to arch so that it can take 
>>> some
>>> default action.
>>>
>>
>> That sounds fair enough. However, the "default" action of pseries idle loop
>> (pseries_lpar_idle()) surprises me. It enters Cede, which is _deeper_ than 
>> doing
>> a snooze! IOW, a user might "disable" cpuidle or set the 
>> PM_QOS_CPU_DMA_LATENCY
>> to 0 hoping to prevent the CPUs from going to deep idle states, but then the
>> machine would still end up going to Cede, even though that wont get reflected
>> in the idle state counts. IMHO that scenario needs some thought as well...
>>
> 
> I checked the git history and found that the default idle was changed (on 
> purpose)
> to cede the processor, in order to speed up booting.. Hmm..
> 
> commit 363edbe2614aa90df706c0f19ccfa2a6c06af0be
> Author: Vaidyanathan Srinivasan 
> Date:   Fri Sep 6 00:25:06 2013 +0530
> 
> powerpc: Default arch idle could cede processor on pseries

This issue is not powerpc specific as I observed on digging a bit into
the default idle routines of the common archs. The way that archs
perceive the call to cpuidle framework today is that if it fails, it
means that cpuidle backend driver fails to *function* due to some reason
(as is mentioned in the above commit: either since cpuidle driver is not
registered or it does not work on some specific platforms) and that
therefore the archs should decide on an idle state themselves. They
therefore end up choosing a convenient idle state which could very well
be one of the idle states in the cpuidle state table.

The archs do not see failed call to cpuidle driver as "cpuidle driver
says no idle state can be entered now because there are strict latency
requirements or the idle states are disabled". IOW, the call to cpuidle
driver is currently based on if cpuidle driver exists rather than if it
agrees on entry into any of the idle states.

This patch brings in the need for the archs to incorporate this
additional check of "did cpuidle_idle_call() fail because it did not
find it wise to enter any of the idle states". In which case they should
simply exit without taking any *default action*.

Need to give this some thought and reconsider the patch.

Regards
Preeti U Murthy
> 
> 
> Regards,
> Srivatsa S. Bhat
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v1] powernv/cpuidle: Back-end cpuidle driver for powernv platform.

2014-01-14 Thread Deepthi Dharwar
Following patch ports the cpuidle framework for powernv
platform and also implements a cpuidle back-end powernv
idle driver calling on to power7_nap and snooze idle states.

Signed-off-by: Deepthi Dharwar 
---
 arch/powerpc/platforms/powernv/setup.c |   13 ++
 drivers/cpuidle/Kconfig.powerpc|9 ++
 drivers/cpuidle/Makefile   |1 
 drivers/cpuidle/cpuidle-powernv.c  |  169 
 4 files changed, 191 insertions(+), 1 deletion(-)
 create mode 100644 drivers/cpuidle/cpuidle-powernv.c

diff --git a/arch/powerpc/platforms/powernv/setup.c 
b/arch/powerpc/platforms/powernv/setup.c
index 19884b2..764a14e 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -214,6 +215,16 @@ static int __init pnv_probe(void)
return 1;
 }
 
+void powernv_idle(void)
+{
+   /* Hook to cpuidle framework if available, else
+* call on default platform idle code
+*/
+   if (cpuidle_idle_call()) {
+   power7_idle();
+   }
+}
+
 define_machine(powernv) {
.name   = "PowerNV",
.probe  = pnv_probe,
@@ -223,7 +234,7 @@ define_machine(powernv) {
.show_cpuinfo   = pnv_show_cpuinfo,
.progress   = pnv_progress,
.machine_shutdown   = pnv_shutdown,
-   .power_save = power7_idle,
+   .power_save = powernv_idle,
.calibrate_decr = generic_calibrate_decr,
 #ifdef CONFIG_KEXEC
.kexec_cpu_down = pnv_kexec_cpu_down,
diff --git a/drivers/cpuidle/Kconfig.powerpc b/drivers/cpuidle/Kconfig.powerpc
index 8147de5..66c3a09 100644
--- a/drivers/cpuidle/Kconfig.powerpc
+++ b/drivers/cpuidle/Kconfig.powerpc
@@ -9,3 +9,12 @@ config PSERIES_CPUIDLE
help
  Select this option to enable processor idle state management
  through cpuidle subsystem.
+
+config POWERNV_CPUIDLE
+   bool "Cpuidle driver for powernv platforms"
+   depends on CPU_IDLE
+   depends on PPC_POWERNV
+   default y
+   help
+ Select this option to enable processor idle state management
+ through cpuidle subsystem.
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index a6331ad..f71ae1b 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_ARM_AT91_CPUIDLE)  += cpuidle-at91.o
 ###
 # POWERPC drivers
 obj-$(CONFIG_PSERIES_CPUIDLE)  += cpuidle-pseries.o
+obj-$(CONFIG_POWERNV_CPUIDLE)  += cpuidle-powernv.o
diff --git a/drivers/cpuidle/cpuidle-powernv.c 
b/drivers/cpuidle/cpuidle-powernv.c
new file mode 100644
index 000..78fd174
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -0,0 +1,169 @@
+/*
+ *  cpuidle-powernv - idle state cpuidle driver.
+ *  Adapted from drivers/cpuidle/cpuidle-pseries
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+struct cpuidle_driver powernv_idle_driver = {
+   .name = "powernv_idle",
+   .owner= THIS_MODULE,
+};
+
+static int max_idle_state;
+static struct cpuidle_state *cpuidle_state_table;
+
+static int snooze_loop(struct cpuidle_device *dev,
+   struct cpuidle_driver *drv,
+   int index)
+{
+   local_irq_enable();
+   set_thread_flag(TIF_POLLING_NRFLAG);
+
+   while (!need_resched()) {
+   HMT_low();
+   HMT_very_low();
+   }
+
+   HMT_medium();
+   clear_thread_flag(TIF_POLLING_NRFLAG);
+   smp_mb();
+   return index;
+}
+
+static int nap_loop(struct cpuidle_device *dev,
+   struct cpuidle_driver *drv,
+   int index)
+{
+   power7_idle();
+   return index;
+}
+
+/*
+ * States for dedicated partition case.
+ */
+static struct cpuidle_state powernv_states[] = {
+   { /* Snooze */
+   .name = "snooze",
+   .desc = "snooze",
+   .flags = CPUIDLE_FLAG_TIME_VALID,
+   .exit_latency = 0,
+   .target_residency = 0,
+   .enter = &snooze_loop },
+   { /* NAP */
+   .name = "NAP",
+   .desc = "NAP",
+   .flags = CPUIDLE_FLAG_TIME_VALID,
+   .exit_latency = 10,
+   .target_residency = 100,
+   .enter = &nap_loop },
+};
+
+static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
+   unsigned long action, void *hcpu)
+{
+   int hotcpu = (unsigned long)hcpu;
+   struct cpuidle_device *dev =
+   per_cpu(cpuidle_devices, hotcpu);
+
+   if (dev && cpuidle_get_driver()) {
+   switch (action) {
+  

[PATCH v1] powernv/cpuidle: Back-end cpuidle driver for powernv platform for idle state management.

2014-01-14 Thread Deepthi Dharwar
Following patch ports the cpuidle framework for powernv
platform and also implements a cpuidle back-end powernv 
idle driver calling on to power7_nap and snooze idle states.

Moving the idle states over to cpuidle framework can take advantage 
of advanced heuristics, tunables and features provided by cpuidle 
framework. Additional idle states can be exploited using the cpuidle 
framework. The statistics and tracing infrastructure provided by 
the cpuidle framework also helps in enabling power management 
related tools and help tune the system and applications.

This series aims to maintain compatibility and functionality to
existing powernv idle cpu management code.  There are no new functions
or idle states added as part of this series. This can be extended by 
adding more states to this existing framework.

For POWERNV platform to hook into CPUIDLE framework, one
needs to enable CONFIG_POWERNV_IDLE. 

This patch series applies on pseries cpuidle backend driver
fixes patchset posted earlier.
pseries/cpuidle: pseries cpuidle backend driver clean-ups.

 Deepthi Dharwar (1):
  powernv/cpuidle: Back-end cpuidle driver for powernv platform.


 arch/powerpc/platforms/powernv/setup.c |   13 ++
 drivers/cpuidle/Kconfig.powerpc|9 ++
 drivers/cpuidle/Makefile   |1 
 drivers/cpuidle/cpuidle-powernv.c  |  169 
 4 files changed, 191 insertions(+), 1 deletion(-)
 create mode 100644 drivers/cpuidle/cpuidle-powernv.c


-- Deepthi

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v1 6/6] pseries/cpuidle: smt-snooze-delay cleanup.

2014-01-14 Thread Deepthi Dharwar
smt-snooze-delay was designed to disable NAP state or delay the entry
to the NAP state prior to adoption of cpuidle framework. This
is per-cpu variable. With the coming of CPUIDLE framework,
states can be disabled on per-cpu basis using the cpuidle/enable
sysfs entry.

Also, with the coming of cpuidle driver each state's target residency
is per-driver unlike earlier which was per-device. Therefore,
the per-cpu sysfs smt-snooze-delay which decides the target residency
of the idle state on a particular cpu causes more confusion to the user
as we cannot have different smt-snooze-delay (target residency)
values for each cpu.

In the current code, smt-snooze-delay functionality is completely broken.
It makes sense to remove smt-snooze-delay from idle driver with the
coming of cpuidle framework.
However, sysfs files are retained as ppc64_util currently
utilises it. Once we fix ppc64_util, propose to clean
up the kernel code.

Signed-off-by: Deepthi Dharwar 
---
 arch/powerpc/include/asm/processor.h |7 ---
 arch/powerpc/kernel/sysfs.c  |2 --
 drivers/cpuidle/cpuidle-pseries.c|   17 -
 3 files changed, 26 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index fa98fdf..027fefd 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -444,13 +444,6 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, 
IDLE_POWERSAVE_OFF};
 
 extern int powersave_nap;  /* set if nap mode can be used in idle loop */
 extern void power7_nap(void);
-
-#ifdef CONFIG_PSERIES_CPUIDLE
-extern void update_smt_snooze_delay(int cpu, int residency);
-#else
-static inline void update_smt_snooze_delay(int cpu, int residency) {}
-#endif
-
 extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);
 extern void poweroff_now(void);
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index b4e6676..7f9e130 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -51,8 +51,6 @@ static ssize_t store_smt_snooze_delay(struct device *dev,
return -EINVAL;
 
per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
-   update_smt_snooze_delay(cpu->dev.id, snooze);
-
return count;
 }
 
diff --git a/drivers/cpuidle/cpuidle-pseries.c 
b/drivers/cpuidle/cpuidle-pseries.c
index bb56091..7ab564a 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -162,23 +162,6 @@ static struct cpuidle_state shared_states[] = {
.enter = &shared_cede_loop },
 };
 
-void update_smt_snooze_delay(int cpu, int residency)
-{
-   struct cpuidle_driver *drv = cpuidle_get_driver();
-   struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
-
-   if (cpuidle_state_table != dedicated_states)
-   return;
-
-   if (residency < 0) {
-   /* Disable the Nap state on that cpu */
-   if (dev)
-   dev->states_usage[1].disable = 1;
-   } else
-   if (drv)
-   drv->states[1].target_residency = residency;
-}
-
 static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n,
unsigned long action, void *hcpu)
 {

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v1 4/6] pseries/cpuidle: Make cpuidle-pseries backend driver a non-module.

2014-01-14 Thread Deepthi Dharwar
Currently cpuidle-pseries backend driver cannot be
built as a module due to dependencies wrt cpuidle framework.
This patch removes all the module related code in the driver.

Signed-off-by: Deepthi Dharwar 
---
 drivers/cpuidle/cpuidle-pseries.c |   15 +--
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-pseries.c 
b/drivers/cpuidle/cpuidle-pseries.c
index 32d86bc..5e13f6c 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -289,17 +289,4 @@ static int __init pseries_processor_idle_init(void)
return 0;
 }
 
-static void __exit pseries_processor_idle_exit(void)
-{
-
-   unregister_cpu_notifier(&setup_hotplug_notifier);
-   cpuidle_unregister(&pseries_idle_driver);
-   return;
-}
-
-module_init(pseries_processor_idle_init);
-module_exit(pseries_processor_idle_exit);
-
-MODULE_AUTHOR("Deepthi Dharwar ");
-MODULE_DESCRIPTION("Cpuidle driver for POWER");
-MODULE_LICENSE("GPL");
+device_initcall(pseries_processor_idle_init);

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v1 5/6] pseries/cpuidle: Remove MAX_IDLE_STATE macro.

2014-01-14 Thread Deepthi Dharwar
This patch removes the usage of MAX_IDLE_STATE macro
and dead code around it. The number of states
are determined at run time based on the cpuidle
state table selected on a given platform

Signed-off-by: Deepthi Dharwar 
---
 drivers/cpuidle/cpuidle-pseries.c |   28 ++--
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-pseries.c 
b/drivers/cpuidle/cpuidle-pseries.c
index 5e13f6c..bb56091 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -24,9 +24,7 @@ struct cpuidle_driver pseries_idle_driver = {
.owner= THIS_MODULE,
 };
 
-#define MAX_IDLE_STATE_COUNT   2
-
-static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
+static int max_idle_state;
 static struct cpuidle_state *cpuidle_state_table;
 
 static inline void idle_loop_prolog(unsigned long *in_purr)
@@ -134,7 +132,7 @@ static int shared_cede_loop(struct cpuidle_device *dev,
 /*
  * States for dedicated partition case.
  */
-static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
+static struct cpuidle_state dedicated_states[] = {
{ /* Snooze */
.name = "snooze",
.desc = "snooze",
@@ -154,7 +152,7 @@ static struct cpuidle_state 
dedicated_states[MAX_IDLE_STATE_COUNT] = {
 /*
  * States for shared partition case.
  */
-static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
+static struct cpuidle_state shared_states[] = {
{ /* Shared Cede */
.name = "Shared Cede",
.desc = "Shared Cede",
@@ -225,12 +223,8 @@ static int pseries_cpuidle_driver_init(void)
 
drv->state_count = 0;
 
-   for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) {
-
-   if (idle_state > max_idle_state)
-   break;
-
-   /* is the state not enabled? */
+   for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
+   /* Is the state not enabled? */
if (cpuidle_state_table[idle_state].enter == NULL)
continue;
 
@@ -253,16 +247,14 @@ static int pseries_idle_probe(void)
if (cpuidle_disable != IDLE_NO_OVERRIDE)
return -ENODEV;
 
-   if (max_idle_state == 0) {
-   printk(KERN_DEBUG "pseries processor idle disabled.\n");
-   return -EPERM;
-   }
-
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
-   if (lppaca_shared_proc(get_lppaca()))
+   if (lppaca_shared_proc(get_lppaca())) {
cpuidle_state_table = shared_states;
-   else
+   max_idle_state = ARRAY_SIZE(shared_states);
+   } else {
cpuidle_state_table = dedicated_states;
+   max_idle_state = ARRAY_SIZE(dedicated_states);
+   }
} else
return -ENODEV;
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v1 3/6] pseries/cpuidle: Use cpuidle_register() for initialisation.

2014-01-14 Thread Deepthi Dharwar
This patch replaces the cpuidle driver and devices initialisation
calls with a single generic cpuidle_register() call
and also includes minor refactoring of the code around it.

Remove the cpu online check in snooze loop, as this code can
only locally run on a cpu only if it is online. Therefore,
this check is not required.

Signed-off-by: Deepthi Dharwar 
---
 drivers/cpuidle/cpuidle-pseries.c |   78 +
 1 file changed, 11 insertions(+), 67 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-pseries.c 
b/drivers/cpuidle/cpuidle-pseries.c
index 2115478..32d86bc 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -27,7 +27,6 @@ struct cpuidle_driver pseries_idle_driver = {
 #define MAX_IDLE_STATE_COUNT   2
 
 static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
-static struct cpuidle_device __percpu *pseries_cpuidle_devices;
 static struct cpuidle_state *cpuidle_state_table;
 
 static inline void idle_loop_prolog(unsigned long *in_purr)
@@ -55,13 +54,12 @@ static int snooze_loop(struct cpuidle_device *dev,
int index)
 {
unsigned long in_purr;
-   int cpu = dev->cpu;
 
idle_loop_prolog(&in_purr);
local_irq_enable();
set_thread_flag(TIF_POLLING_NRFLAG);
 
-   while ((!need_resched()) && cpu_online(cpu)) {
+   while (!need_resched()) {
HMT_low();
HMT_very_low();
}
@@ -188,7 +186,7 @@ static int pseries_cpuidle_add_cpu_notifier(struct 
notifier_block *n,
 {
int hotcpu = (unsigned long)hcpu;
struct cpuidle_device *dev =
-   per_cpu_ptr(pseries_cpuidle_devices, hotcpu);
+   per_cpu(cpuidle_devices, hotcpu);
 
if (dev && cpuidle_get_driver()) {
switch (action) {
@@ -245,50 +243,6 @@ static int pseries_cpuidle_driver_init(void)
return 0;
 }
 
-/* pseries_idle_devices_uninit(void)
- * unregister cpuidle devices and de-allocate memory
- */
-static void pseries_idle_devices_uninit(void)
-{
-   int i;
-   struct cpuidle_device *dev;
-
-   for_each_possible_cpu(i) {
-   dev = per_cpu_ptr(pseries_cpuidle_devices, i);
-   cpuidle_unregister_device(dev);
-   }
-
-   free_percpu(pseries_cpuidle_devices);
-   return;
-}
-
-/* pseries_idle_devices_init()
- * allocate, initialize and register cpuidle device
- */
-static int pseries_idle_devices_init(void)
-{
-   int i;
-   struct cpuidle_driver *drv = &pseries_idle_driver;
-   struct cpuidle_device *dev;
-
-   pseries_cpuidle_devices = alloc_percpu(struct cpuidle_device);
-   if (pseries_cpuidle_devices == NULL)
-   return -ENOMEM;
-
-   for_each_possible_cpu(i) {
-   dev = per_cpu_ptr(pseries_cpuidle_devices, i);
-   dev->state_count = drv->state_count;
-   dev->cpu = i;
-   if (cpuidle_register_device(dev)) {
-   printk(KERN_DEBUG \
-   "cpuidle_register_device %d failed!\n", i);
-   return -EIO;
-   }
-   }
-
-   return 0;
-}
-
 /*
  * pseries_idle_probe()
  * Choose state table for shared versus dedicated partition
@@ -296,9 +250,6 @@ static int pseries_idle_devices_init(void)
 static int pseries_idle_probe(void)
 {
 
-   if (!firmware_has_feature(FW_FEATURE_SPLPAR))
-   return -ENODEV;
-
if (cpuidle_disable != IDLE_NO_OVERRIDE)
return -ENODEV;
 
@@ -307,10 +258,13 @@ static int pseries_idle_probe(void)
return -EPERM;
}
 
-   if (lppaca_shared_proc(get_lppaca()))
-   cpuidle_state_table = shared_states;
-   else
-   cpuidle_state_table = dedicated_states;
+   if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+   if (lppaca_shared_proc(get_lppaca()))
+   cpuidle_state_table = shared_states;
+   else
+   cpuidle_state_table = dedicated_states;
+   } else
+   return -ENODEV;
 
return 0;
 }
@@ -324,22 +278,14 @@ static int __init pseries_processor_idle_init(void)
return retval;
 
pseries_cpuidle_driver_init();
-   retval = cpuidle_register_driver(&pseries_idle_driver);
+   retval = cpuidle_register(&pseries_idle_driver, NULL);
if (retval) {
printk(KERN_DEBUG "Registration of pseries driver failed.\n");
return retval;
}
 
-   retval = pseries_idle_devices_init();
-   if (retval) {
-   pseries_idle_devices_uninit();
-   cpuidle_unregister_driver(&pseries_idle_driver);
-   return retval;
-   }
-
register_cpu_notifier(&setup_hotplug_notifier);
printk(KERN_DEBUG "pseries_idle_driver registered\n");
-
return 0;
 }
 
@@ -347,9 +293,7 @@ static void __exit pseries_pr

[PATCH v1 2/6] pseries/cpuidle: Move processor_idle.c to drivers/cpuidle.

2014-01-14 Thread Deepthi Dharwar
Move the file from arch specific pseries/processor_idle.c
to drivers/cpuidle/cpuidle-pseries.c
Make the relevant Makefile and Kconfig changes.
Also, introduce Kconfig.powerpc in drivers/cpuidle
for all powerpc cpuidle drivers.

Signed-off-by: Deepthi Dharwar 
---
 arch/powerpc/include/asm/processor.h|2 
 arch/powerpc/platforms/pseries/Kconfig  |9 -
 arch/powerpc/platforms/pseries/Makefile |1 
 arch/powerpc/platforms/pseries/processor_idle.c |  361 ---
 drivers/cpuidle/Kconfig |5 
 drivers/cpuidle/Kconfig.powerpc |   11 +
 drivers/cpuidle/Makefile|4 
 drivers/cpuidle/cpuidle-pseries.c   |  361 +++
 8 files changed, 382 insertions(+), 372 deletions(-)
 delete mode 100644 arch/powerpc/platforms/pseries/processor_idle.c
 create mode 100644 drivers/cpuidle/Kconfig.powerpc
 create mode 100644 drivers/cpuidle/cpuidle-pseries.c

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index fc14a38..fa98fdf 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -445,7 +445,7 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, 
IDLE_POWERSAVE_OFF};
 extern int powersave_nap;  /* set if nap mode can be used in idle loop */
 extern void power7_nap(void);
 
-#ifdef CONFIG_PSERIES_IDLE
+#ifdef CONFIG_PSERIES_CPUIDLE
 extern void update_smt_snooze_delay(int cpu, int residency);
 #else
 static inline void update_smt_snooze_delay(int cpu, int residency) {}
diff --git a/arch/powerpc/platforms/pseries/Kconfig 
b/arch/powerpc/platforms/pseries/Kconfig
index 62b4f80..bb59bb0 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -119,12 +119,3 @@ config DTL
  which are accessible through a debugfs file.
 
  Say N if you are unsure.
-
-config PSERIES_IDLE
-   bool "Cpuidle driver for pSeries platforms"
-   depends on CPU_IDLE
-   depends on PPC_PSERIES
-   default y
-   help
- Select this option to enable processor idle state management
- through cpuidle subsystem.
diff --git a/arch/powerpc/platforms/pseries/Makefile 
b/arch/powerpc/platforms/pseries/Makefile
index fbccac9..0348079 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -21,7 +21,6 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
 obj-$(CONFIG_CMM)  += cmm.o
 obj-$(CONFIG_DTL)  += dtl.o
 obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
-obj-$(CONFIG_PSERIES_IDLE) += processor_idle.o
 obj-$(CONFIG_LPARCFG)  += lparcfg.o
 
 ifeq ($(CONFIG_PPC_PSERIES),y)
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c 
b/arch/powerpc/platforms/pseries/processor_idle.c
deleted file mode 100644
index 09e4f56..000
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- *  processor_idle - idle state cpuidle driver.
- *  Adapted from drivers/idle/intel_idle.c and
- *  drivers/acpi/processor_idle.c
- *
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-#include 
-#include 
-#include 
-#include 
-
-struct cpuidle_driver pseries_idle_driver = {
-   .name = "pseries_idle",
-   .owner= THIS_MODULE,
-};
-
-#define MAX_IDLE_STATE_COUNT   2
-
-static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
-static struct cpuidle_device __percpu *pseries_cpuidle_devices;
-static struct cpuidle_state *cpuidle_state_table;
-
-static inline void idle_loop_prolog(unsigned long *in_purr)
-{
-   *in_purr = mfspr(SPRN_PURR);
-   /*
-* Indicate to the HV that we are idle. Now would be
-* a good time to find other work to dispatch.
-*/
-   get_lppaca()->idle = 1;
-}
-
-static inline void idle_loop_epilog(unsigned long in_purr)
-{
-   u64 wait_cycles;
-
-   wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles);
-   wait_cycles += mfspr(SPRN_PURR) - in_purr;
-   get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
-   get_lppaca()->idle = 0;
-}
-
-static int snooze_loop(struct cpuidle_device *dev,
-   struct cpuidle_driver *drv,
-   int index)
-{
-   unsigned long in_purr;
-   int cpu = dev->cpu;
-
-   idle_loop_prolog(&in_purr);
-   local_irq_enable();
-   set_thread_flag(TIF_POLLING_NRFLAG);
-
-   while ((!need_resched()) && cpu_online(cpu)) {
-   HMT_low();
-   HMT_very_low();
-   }
-
-   HMT_medium();
-   clear_thread_flag(TIF_POLLING_NRFLAG);
-   smp_mb();
-
-   idle_loop_epilog(in_purr);
-
-   return index;
-}
-
-static void check_and_cede_processor(void)
-{
-   /*
-* Ensure our interrupt state is properly tracked,
-* also checks if no interrupt has occurred while we
-

[PATCH v1 1/6] pseries/cpuidle: Remove redundant call to ppc64_runlatch_off() in cpu idle routines

2014-01-14 Thread Deepthi Dharwar
From: Preeti U Murthy 

Commit fbd7740fdfdf9475f(powerpc: Simplify pSeries idle loop) switched pseries
cpu idle handling from complete idle loops to ppc_md.powersave functions.
Earlier to this switch, ppc64_runlatch_off() had to be called in each of the
idle routines. But after the switch, this call is handled in arch_cpu_idle(),
just before the call to ppc_md.powersave, where platform specific idle
routines are called.

As a consequence, the call to ppc64_runlatch_off() got duplicated in the
arch_cpu_idle() routine as well as in the some of the idle routines in
pseries and commit fbd7740fdfdf9475f missed to get rid of these redundant
calls. These calls were carried over subsequent enhancements to the pseries
cpuidle routines.

Although multiple calls to ppc64_runlatch_off() is harmless, there is still some
overhead due to it. Besides that, these calls could also make way for a
misunderstanding that it is *necessary* to call ppc64_runlatch_off() multiple
times, when that is not the case. Hence this patch takes care of eliminating
this redundancy.

Signed-off-by: Preeti U Murthy 
Reviewed-by: Srivatsa S. Bhat 
Signed-off-by: Deepthi Dharwar 
---
 arch/powerpc/platforms/pseries/processor_idle.c |3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/processor_idle.c 
b/arch/powerpc/platforms/pseries/processor_idle.c
index a166e38..09e4f56 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -17,7 +17,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 struct cpuidle_driver pseries_idle_driver = {
@@ -63,7 +62,6 @@ static int snooze_loop(struct cpuidle_device *dev,
set_thread_flag(TIF_POLLING_NRFLAG);
 
while ((!need_resched()) && cpu_online(cpu)) {
-   ppc64_runlatch_off();
HMT_low();
HMT_very_low();
}
@@ -103,7 +101,6 @@ static int dedicated_cede_loop(struct cpuidle_device *dev,
idle_loop_prolog(&in_purr);
get_lppaca()->donate_dedicated_cpu = 1;
 
-   ppc64_runlatch_off();
HMT_medium();
check_and_cede_processor();
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v1 0/6] pseries/cpuidle: pseries cpuidle backend driver clean-ups.

2014-01-14 Thread Deepthi Dharwar
The following patch series includes a bunch of clean-ups for the
pseries cpuidle backend driver. This includes,
moving the driver from arch/powerpc/platforms/pseries to
driver/cpuidle, refactoring of the code, making it a non-module,
removing smt-snooze-delay update and dead code around it.

After a number of attempts to consolidate the backend cpuidle driver 
for pSeries and powernv platforms, it seems best to have separate 
idle drivers for both the platforms as any kind of  code duplication 
seizes to exist beyond snooze loop and a hot plug notifier.  
Also going further, with addition of device tree parsing to setup
idle states and related changes just for powernv platform, it is
best to keep these drivers separate without adding complexity 
and thus improving readabilty for both the platform drivers.  

The clean-up undertaken here was posted earlier as part of
generic powerpc cpuidle driver clean-up.

V1 -> http://lkml.org/lkml/2013/7/23/143
V2 -> https://lkml.org/lkml/2013/7/30/872
V3 -> http://comments.gmane.org/gmane.linux.ports.ppc.embedded/63093
V4 -> https://lkml.org/lkml/2013/8/22/25
V5 -> http://lkml.org/lkml/2013/8/22/184
V6 -> https://lkml.org/lkml/2013/8/27/432
V7 -> https://lkml.org/lkml/2013/10/29/216
V8 -> https://lkml.org/lkml/2013/11/11/29

 Deepthi Dharwar (5):
  pseries/cpuidle: Move processor_idle.c to drivers/cpuidle.
  pseries/cpuidle: Use cpuidle_register() for initialisation.
  pseries/cpuidle: Make cpuidle-pseries backend driver a non-module.
  pseries/cpuidle: Remove MAX_IDLE_STATE macro.
  pseries/cpuidle: smt-snooze-delay cleanup.

Preeti U Murthy (1):
  pseries/cpuidle: Remove redundant call to ppc64_runlatch_off() in cpu 
idle routines


 arch/powerpc/include/asm/processor.h|7 
 arch/powerpc/kernel/sysfs.c |2 
 arch/powerpc/platforms/pseries/Kconfig  |9 -
 arch/powerpc/platforms/pseries/Makefile |1 
 arch/powerpc/platforms/pseries/processor_idle.c |  364 ---
 drivers/cpuidle/Kconfig |5 
 drivers/cpuidle/Kconfig.powerpc |   11 +
 drivers/cpuidle/Makefile|4 
 drivers/cpuidle/cpuidle-pseries.c   |  267 +
 9 files changed, 287 insertions(+), 383 deletions(-)
 delete mode 100644 arch/powerpc/platforms/pseries/processor_idle.c
 create mode 100644 drivers/cpuidle/Kconfig.powerpc
 create mode 100644 drivers/cpuidle/cpuidle-pseries.c


-- Deepthi

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2] Move precessing of MCE queued event out from syscall exit path.

2014-01-14 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar 

Huge Dickins reported an issue that b5ff4211a829
"powerpc/book3s: Queue up and process delayed MCE events" breaks the
PowerMac G5 boot. This patch fixes it by moving the mce even processing
away from syscall exit, which was wrong to do that in first place, and
using irq work framework to delay processing of mce event.

Reported-by: Hugh Dickins 
---
 arch/powerpc/include/asm/mce.h |1 -
 arch/powerpc/kernel/entry_64.S |5 -
 arch/powerpc/kernel/mce.c  |   13 ++---
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 2257d1e..f97d8cb 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -192,7 +192,6 @@ extern void save_mce_event(struct pt_regs *regs, long 
handled,
 extern int get_mce_event(struct machine_check_event *mce, bool release);
 extern void release_mce_event(void);
 extern void machine_check_queue_event(void);
-extern void machine_check_process_queued_event(void);
 extern void machine_check_print_event_info(struct machine_check_event *evt);
 extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
 
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 770d6d6..bbfb029 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -184,11 +184,6 @@ syscall_exit:
bl  .do_show_syscall_exit
ld  r3,RESULT(r1)
 #endif
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_FTR_SECTION
-   bl  .machine_check_process_queued_event
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-#endif
CURRENT_THREAD_INFO(r12, r1)
 
ld  r8,_MSR(r1)
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index d6edf2b..a7fd4cb 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 static DEFINE_PER_CPU(int, mce_nest_count);
@@ -35,6 +36,11 @@ static DEFINE_PER_CPU(struct 
machine_check_event[MAX_MC_EVT], mce_event);
 static DEFINE_PER_CPU(int, mce_queue_count);
 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
 
+static void machine_check_process_queued_event(struct irq_work *work);
+struct irq_work mce_event_process_work = {
+.func = machine_check_process_queued_event,
+};
+
 static void mce_set_error_info(struct machine_check_event *mce,
   struct mce_error_info *mce_err)
 {
@@ -185,17 +191,19 @@ void machine_check_queue_event(void)
return;
}
__get_cpu_var(mce_event_queue[index]) = evt;
+
+   /* Queue irq work to process this event later. */
+   irq_work_queue(&mce_event_process_work);
 }
 
 /*
  * process pending MCE event from the mce event queue. This function will be
  * called during syscall exit.
  */
-void machine_check_process_queued_event(void)
+static void machine_check_process_queued_event(struct irq_work *work)
 {
int index;
 
-   preempt_disable();
/*
 * For now just print it to console.
 * TODO: log this error event to FSP or nvram.
@@ -206,7 +214,6 @@ void machine_check_process_queued_event(void)
&__get_cpu_var(mce_event_queue[index]));
__get_cpu_var(mce_queue_count)--;
}
-   preempt_enable();
 }
 
 void machine_check_print_event_info(struct machine_check_event *evt)

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: dma-mapping: Return dma_direct_ops variable when dev == NULL

2014-01-14 Thread Benjamin Herrenschmidt
On Tue, 2014-01-14 at 17:44 +0800, Chunhe Lan wrote:
> Without this patch, kind of below error will be dumped if
> 'insmod ixgbevf.ko' is executed:
> 
> ixgbevf: Intel(R) 10 Gigabit PCI Express Virtual Function
>  Network Driver - version 2.7.12-k
> ixgbevf: Copyright (c) 2009 - 2012 Intel Corporation.
> ixgbevf :01:10.0: enabling device ( -> 0002)
> ixgbevf :01:10.0: No usable DMA configuration, aborting
> ixgbevf: probe of :01:10.0 failed with error -5
> ..
> ..

That's not right. The DMA ops must be set properly for the VF somewhere
in the arch code instead. When creating VFs, is there a hook allowing
the arch to fix things up ?

(Also adding linux-pci on CC)

Ben.

> Signed-off-by: Chunhe Lan 
> Cc: Benjamin Herrenschmidt 
> Tested-by: Chunhe Lan 
> ---
>  arch/powerpc/include/asm/dma-mapping.h |   13 +
>  1 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/dma-mapping.h 
> b/arch/powerpc/include/asm/dma-mapping.h
> index e27e9ad..b8c10de 100644
> --- a/arch/powerpc/include/asm/dma-mapping.h
> +++ b/arch/powerpc/include/asm/dma-mapping.h
> @@ -84,10 +84,15 @@ static inline struct dma_map_ops *get_dma_ops(struct 
> device *dev)
>* only ISA DMA device we support is the floppy and we have a hack
>* in the floppy driver directly to get a device for us.
>*/
> - if (unlikely(dev == NULL))
> - return NULL;
> -
> - return dev->archdata.dma_ops;
> + if (dev && dev->archdata.dma_ops)
> + return dev->archdata.dma_ops;
> + /*
> +  * In some cases (for example, use the Intel(R) 10 Gigabit PCI
> +  * expression Virtual Function Network Driver -- ixgbevf.ko),
> +  * their value of dev is the NULL. If return NULL, the driver is
> +  * aborting. So return dma_direct_ops variable when dev == NULL.
> +  */
> + return &dma_direct_ops;
>  }
>  
>  static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc: dma-mapping: Return dma_direct_ops variable when dev == NULL

2014-01-14 Thread Chunhe Lan
Without this patch, kind of below error will be dumped if
'insmod ixgbevf.ko' is executed:

ixgbevf: Intel(R) 10 Gigabit PCI Express Virtual Function
 Network Driver - version 2.7.12-k
ixgbevf: Copyright (c) 2009 - 2012 Intel Corporation.
ixgbevf :01:10.0: enabling device ( -> 0002)
ixgbevf :01:10.0: No usable DMA configuration, aborting
ixgbevf: probe of :01:10.0 failed with error -5
..
..

Signed-off-by: Chunhe Lan 
Cc: Benjamin Herrenschmidt 
Tested-by: Chunhe Lan 
---
 arch/powerpc/include/asm/dma-mapping.h |   13 +
 1 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/dma-mapping.h 
b/arch/powerpc/include/asm/dma-mapping.h
index e27e9ad..b8c10de 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -84,10 +84,15 @@ static inline struct dma_map_ops *get_dma_ops(struct device 
*dev)
 * only ISA DMA device we support is the floppy and we have a hack
 * in the floppy driver directly to get a device for us.
 */
-   if (unlikely(dev == NULL))
-   return NULL;
-
-   return dev->archdata.dma_ops;
+   if (dev && dev->archdata.dma_ops)
+   return dev->archdata.dma_ops;
+   /*
+* In some cases (for example, use the Intel(R) 10 Gigabit PCI
+* expression Virtual Function Network Driver -- ixgbevf.ko),
+* their value of dev is the NULL. If return NULL, the driver is
+* aborting. So return dma_direct_ops variable when dev == NULL.
+*/
+   return &dma_direct_ops;
 }
 
 static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
-- 
1.7.6.5


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v3] watchdog: mpc8xxx_wdt convert to watchdog core

2014-01-14 Thread Wim Van Sebroeck
Hi Christophe,

> Convert mpc8xxx_wdt.c to the new watchdog API.
> 
> Signed-off-by: Christophe Leroy 

This patch has been added to linux-watchdog-next.

Kind regards,
Wim.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] cpuidle/menu: Fail cpuidle_idle_call() if no idle state is acceptable

2014-01-14 Thread Preeti U Murthy
Hi Srivatsa,

On 01/14/2014 12:30 PM, Srivatsa S. Bhat wrote:
> On 01/14/2014 11:35 AM, Preeti U Murthy wrote:
>> On PowerPC, in a particular test scenario, all the cpu idle states were 
>> disabled.
>> Inspite of this it was observed that the idle state count of the shallowest
>> idle state, snooze, was increasing.
>>
>> This is because the governor returns the idle state index as 0 even in
>> scenarios when no idle state can be chosen. These scenarios could be when the
>> latency requirement is 0 or as mentioned above when the user wants to disable
>> certain cpu idle states at runtime. In the latter case, its possible that no
>> cpu idle state is valid because the suitable states were disabled
>> and the rest did not match the menu governor criteria to be chosen as the
>> next idle state.
>>
>> This patch adds the code to indicate that a valid cpu idle state could not be
>> chosen by the menu governor and reports back to arch so that it can take some
>> default action.
>>
> 
> That sounds fair enough. However, the "default" action of pseries idle loop
> (pseries_lpar_idle()) surprises me. It enters Cede, which is _deeper_ than 
> doing
> a snooze! IOW, a user might "disable" cpuidle or set the 
> PM_QOS_CPU_DMA_LATENCY
> to 0 hoping to prevent the CPUs from going to deep idle states, but then the
> machine would still end up going to Cede, even though that wont get reflected
> in the idle state counts. IMHO that scenario needs some thought as well...

Yes I did see this, but since the patch intends to only communicate
whether the cpuidle governor was successful in choosing an idle state on
its part, I wished to address the default action of pseries idle loop
separately. You are right we will need to understand the patch which
introduced this action. I will take a look at it.

> 
>> Signed-off-by: Preeti U Murthy 
>> ---
>>
>>  drivers/cpuidle/cpuidle.c|6 +-
>>  drivers/cpuidle/governors/menu.c |7 ---
>>  2 files changed, 9 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
>> index a55e68f..5bf06bb 100644
>> --- a/drivers/cpuidle/cpuidle.c
>> +++ b/drivers/cpuidle/cpuidle.c
>> @@ -131,8 +131,9 @@ int cpuidle_idle_call(void)
>>
>>  /* ask the governor for the next state */
>>  next_state = cpuidle_curr_governor->select(drv, dev);
>> +
>> +dev->last_residency = 0;
>>  if (need_resched()) {
>> -dev->last_residency = 0;
>>  /* give the governor an opportunity to reflect on the outcome */
>>  if (cpuidle_curr_governor->reflect)
>>  cpuidle_curr_governor->reflect(dev, next_state);
> 
> The comments on top of the .reflect() routines of the governors say that the
> second parameter is the index of the actual state entered. But after this 
> patch,
> next_state can be negative, indicating an invalid index. So those comments 
> need
> to be updated accordingly.

Right, I will take care of the comment in the next post.
> 
>> @@ -140,6 +141,9 @@ int cpuidle_idle_call(void)
>>  return 0;
>>  }
>>
>> +if (next_state < 0)
>> +return -EINVAL;
> 
> The exit path above (due to need_resched) returns with irqs enabled, but the 
> new
> one you are adding (next_state < 0) returns with irqs disabled. This is 
> correct,
> because in the latter case, "idle" is still in progress and the arch will 
> choose
> a default handler to execute (unlike the former case where "idle" is over and
> hence its time to enable interrupts).

Correct.
> 
> IMHO it would be good to add comments around this code to explain this subtle
> difference. We can never be too careful with these things... ;-)

Ok, will do so.
> 
>> +
>>  trace_cpu_idle_rcuidle(next_state, dev->cpu);
>>
>>  broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP);
>> diff --git a/drivers/cpuidle/governors/menu.c 
>> b/drivers/cpuidle/governors/menu.c
>> index cf7f2f0..6921543 100644
>> --- a/drivers/cpuidle/governors/menu.c
>> +++ b/drivers/cpuidle/governors/menu.c
>> @@ -283,6 +283,7 @@ again:
>>   * menu_select - selects the next idle state to enter
>>   * @drv: cpuidle driver containing state data
>>   * @dev: the CPU
>> + * Returns -1 when no idle state is suitable
>>   */
>>  static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device 
>> *dev)
>>  {
>> @@ -292,17 +293,17 @@ static int menu_select(struct cpuidle_driver *drv, 
>> struct cpuidle_device *dev)
>>  int multiplier;
>>  struct timespec t;
>>
>> -if (data->needs_update) {
>> +if (data->last_state_idx >= 0 && data->needs_update) {
>^
> Doesn't hurt, but actually unnecessary, since ->needs_update is set to 1
> only when index >= 0.

Right we do not need this check. I was assuming that needs_update would
be consistent with the index >= 0 only in the need_resched() case. But
needs_update will get unset each time the governor is invoked to be set
only if index >= 0 t

Re: [PATCH] Move precessing of MCE queued event out from syscall exit path.

2014-01-14 Thread Benjamin Herrenschmidt
On Mon, 2014-01-13 at 23:47 -0800, Hugh Dickins wrote:
> 
> And I may be quite wrong to point a finger at ATA errors: perhaps
> they're always shown, and quickly cleared off screen in successful
> boots,
> but left visible when root cannot be mounted for some other reason.

dmesg would tell...

> I don't know, and won't have time to investigate further - bisecting
> intermittents is not much fun!  I'll just have to hope that it's
> sorted out before it reaches 3.14-rc, or else bite the bullet and
> investigate on that.)

Right :-) Oh well, I still use a G5 as a desktop so I might eventually
stumble upon them !

Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 3/3] powerpc/fsl: Use the new interface to save or restore registers

2014-01-14 Thread Dongsheng Wang
From: Wang Dongsheng 

Use fsl_cpu_state_save/fsl_cpu_state_restore to save/restore registers.
Use the functions to save/restore registers, so we don't need to
maintain the code.

Signed-off-by: Wang Dongsheng 

diff --git a/arch/powerpc/kernel/swsusp_booke.S 
b/arch/powerpc/kernel/swsusp_booke.S
index 553c140..b5992db 100644
--- a/arch/powerpc/kernel/swsusp_booke.S
+++ b/arch/powerpc/kernel/swsusp_booke.S
@@ -4,92 +4,28 @@
  * Copyright (c) 2009-2010 MontaVista Software, LLC.
  */
 
-#include 
-#include 
 #include 
-#include 
-#include 
 #include 
 #include 
 #include 
-
-/*
- * Structure for storing CPU registers on the save area.
- */
-#define SL_SP  0
-#define SL_PC  4
-#define SL_MSR 8
-#define SL_TCR 0xc
-#define SL_SPRG0   0x10
-#define SL_SPRG1   0x14
-#define SL_SPRG2   0x18
-#define SL_SPRG3   0x1c
-#define SL_SPRG4   0x20
-#define SL_SPRG5   0x24
-#define SL_SPRG6   0x28
-#define SL_SPRG7   0x2c
-#define SL_TBU 0x30
-#define SL_TBL 0x34
-#define SL_R2  0x38
-#define SL_CR  0x3c
-#define SL_LR  0x40
-#define SL_R12 0x44/* r12 to r31 */
-#define SL_SIZE(SL_R12 + 80)
-
-   .section .data
-   .align  5
-
-_GLOBAL(swsusp_save_area)
-   .space  SL_SIZE
-
+#include 
 
.section .text
.align  5
 
 _GLOBAL(swsusp_arch_suspend)
-   lis r11,swsusp_save_area@h
-   ori r11,r11,swsusp_save_area@l
-
-   mflrr0
-   stw r0,SL_LR(r11)
-   mfcrr0
-   stw r0,SL_CR(r11)
-   stw r1,SL_SP(r11)
-   stw r2,SL_R2(r11)
-   stmwr12,SL_R12(r11)
-
-   /* Save MSR & TCR */
-   mfmsr   r4
-   stw r4,SL_MSR(r11)
-   mfspr   r4,SPRN_TCR
-   stw r4,SL_TCR(r11)
-
-   /* Get a stable timebase and save it */
-1: mfspr   r4,SPRN_TBRU
-   stw r4,SL_TBU(r11)
-   mfspr   r5,SPRN_TBRL
-   stw r5,SL_TBL(r11)
-   mfspr   r3,SPRN_TBRU
-   cmpwr3,r4
-   bne 1b
+   mflrr15
+   lis r3, core_registers_save_area@h
+   ori r3, r3, core_registers_save_area@l
+
+   /* Save base register */
+   li  r4, 0
+   bl  fsl_cpu_state_save
 
-   /* Save SPRGs */
-   mfspr   r4,SPRN_SPRG0
-   stw r4,SL_SPRG0(r11)
-   mfspr   r4,SPRN_SPRG1
-   stw r4,SL_SPRG1(r11)
-   mfspr   r4,SPRN_SPRG2
-   stw r4,SL_SPRG2(r11)
-   mfspr   r4,SPRN_SPRG3
-   stw r4,SL_SPRG3(r11)
-   mfspr   r4,SPRN_SPRG4
-   stw r4,SL_SPRG4(r11)
-   mfspr   r4,SPRN_SPRG5
-   stw r4,SL_SPRG5(r11)
-   mfspr   r4,SPRN_SPRG6
-   stw r4,SL_SPRG6(r11)
-   mfspr   r4,SPRN_SPRG7
-   stw r4,SL_SPRG7(r11)
+   /* Save LR */
+   lis r3, core_registers_save_area@h
+   ori r3, r3, core_registers_save_area@l
+   stw r15, SR_LR(r3)
 
/* Call the low level suspend stuff (we should probably have made
 * a stackframe...
@@ -97,11 +33,12 @@ _GLOBAL(swsusp_arch_suspend)
bl  swsusp_save
 
/* Restore LR from the save area */
-   lis r11,swsusp_save_area@h
-   ori r11,r11,swsusp_save_area@l
-   lwz r0,SL_LR(r11)
-   mtlrr0
+   lis r3, core_registers_save_area@h
+   ori r3, r3, core_registers_save_area@l
+   lwz r15, SR_LR(r3)
+   mtlrr15
 
+   li  r3, 0
blr
 
 _GLOBAL(swsusp_arch_resume)
@@ -138,9 +75,6 @@ _GLOBAL(swsusp_arch_resume)
bl flush_dcache_L1
bl flush_instruction_cache
 
-   lis r11,swsusp_save_area@h
-   ori r11,r11,swsusp_save_area@l
-
/*
 * Mappings from virtual addresses to physical addresses may be
 * different than they were prior to restoring hibernation state. 
@@ -149,53 +83,12 @@ _GLOBAL(swsusp_arch_resume)
 */
bl  _tlbil_all
 
-   lwz r4,SL_SPRG0(r11)
-   mtspr   SPRN_SPRG0,r4
-   lwz r4,SL_SPRG1(r11)
-   mtspr   SPRN_SPRG1,r4
-   lwz r4,SL_SPRG2(r11)
-   mtspr   SPRN_SPRG2,r4
-   lwz r4,SL_SPRG3(r11)
-   mtspr   SPRN_SPRG3,r4
-   lwz r4,SL_SPRG4(r11)
-   mtspr   SPRN_SPRG4,r4
-   lwz r4,SL_SPRG5(r11)
-   mtspr   SPRN_SPRG5,r4
-   lwz r4,SL_SPRG6(r11)
-   mtspr   SPRN_SPRG6,r4
-   lwz r4,SL_SPRG7(r11)
-   mtspr   SPRN_SPRG7,r4
-
-   /* restore the MSR */
-   lwz r3,SL_MSR(r11)
-   mtmsr   r3
-
-   /* Restore TB */
-   li  r3,0
-   mtspr   SPRN_TBWL,r3
-   lwz r3,SL_TBU(r11)
-   lwz r4,SL_TBL(r11)
-   mtspr   SPRN_TBWU,r3
-   mtspr   SPRN_TBWL,r4
-
-   /* Restore TCR and clear any pending bits in TSR. */
-   lwz r4,SL_TCR(r11)
-   mtspr   SPRN_TCR,r4
-   lis r4, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h
-   mtspr   SPRN_TSR,r4
-
-   /* Kick decrementer */
-

[PATCH 2/3] powerpc/85xx: Provide two functions to save/restore the core registers

2014-01-14 Thread Dongsheng Wang
From: Wang Dongsheng 

Add fsl_cpu_state_save/fsl_cpu_state_restore functions, used for deep
sleep and hibernation to save/restore core registers. We abstract out
save/restore code for use in various modules, to make them don't need
to maintain.

Currently supported processors type are E6500, E5500, E500MC, E500v2 and
E500v1.

Signed-off-by: Wang Dongsheng 

diff --git a/arch/powerpc/include/asm/fsl_sleep.h 
b/arch/powerpc/include/asm/fsl_sleep.h
new file mode 100644
index 000..31c8a9b
--- /dev/null
+++ b/arch/powerpc/include/asm/fsl_sleep.h
@@ -0,0 +1,98 @@
+/*
+ * Freescale 85xx Power management set
+ *
+ * Author: Wang Dongsheng 
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_FSL_SLEEP_H
+#define __ASM_FSL_SLEEP_H
+
+/*
+ * Freescale 85xx Core registers set, core register map definition
+ * Address base on r3, we need to compatible with both 32-bit and 64-bit, so
+ * the data width is 64-bit(double word).
+ *
+ * Acronyms:
+ * dw(data width)  0x08
+ *
+ * Map:
+ * General-Purpose Registers
+ * GPR1(sp)0
+ * GPR20x8 (dw * 1)
+ * GPR13 - GPR31   0x10 ~ 0xa0 (dw * 2 ~ dw * 20)
+ * Foating-point registers
+ * FPR14 - FPR31   0xa8 ~ 0x130(dw * 21 ~ dw * 38)
+ * Registers for Branch Operations
+ * CR  0x138   (dw * 39)
+ * LR  0x140   (dw * 40)
+ * Processor Control Registers
+ * MSR 0x148   (dw * 41)
+ * EPCR0x150   (dw * 42)
+ *
+ * Only e500, e500v2 need to save HID0 - HID1
+ * HID0 - HID1 0x158 ~ 0x160 (dw * 43 ~ dw * 44)
+ * Timer Registers
+ * TCR 0x168   (dw * 45)
+ * TB(64bit)   0x170   (dw * 46)
+ * TBU(32bit)  0x178   (dw * 47)
+ * TBL(32bit)  0x180   (dw * 48)
+ * Interrupt Registers
+ * IVPR0x188   (dw * 49)
+ * IVOR0 - IVOR15  0x190 ~ 0x208   (dw * 50 ~ dw * 65)
+ * IVOR32 - IVOR41 0x210 ~ 0x258   (dw * 66 ~ dw * 75)
+ * Software-Use Registers
+ * SPRG1   0x260   (dw * 76), 64-bit need to save.
+ * SPRG3   0x268   (dw * 77), 32-bit need to save.
+ * MMU Registers
+ * PID0 - PID2 0x270 ~ 0x280   (dw * 78 ~ dw * 80)
+ * Debug Registers
+ * DBCR0 - DBCR2   0x288 ~ 0x298   (dw * 81 ~ dw * 83)
+ * IAC1 - IAC4 0x2a0 ~ 0x2b8   (dw * 84 ~ dw * 87)
+ * DAC1 - DAC2 0x2c0 ~ 0x2c8   (dw * 88 ~ dw * 89)
+ *
+ */
+
+#define SR_GPR10x000
+#define SR_GPR20x008
+#define SR_GPR13   0x010
+#define SR_FPR14   0x0a8
+#define SR_CR  0x138
+#define SR_LR  0x140
+#define SR_MSR 0x148
+#define SR_EPCR0x150
+#define SR_HID00x158
+#define SR_TCR 0x168
+#define SR_TB  0x170
+#define SR_TBU 0x178
+#define SR_TBL 0x180
+#define SR_IVPR0x188
+#define SR_IVOR0   0x190
+#define SR_IVOR32  0x210
+#define SR_SPRG1   0x260
+#define SR_SPRG3   0x268
+#define SR_PID00x270
+#define SR_DBCR0   0x288
+#define SR_IAC10x2a0
+#define SR_DAC10x2c0
+#define FSL_CPU_SR_SIZE(SR_DAC1 + 0x10)
+
+#ifndef __ASSEMBLY__
+
+enum core_save_type {
+   BASE_SAVE = 0,
+   ALL_SAVE = 1,
+};
+
+extern int fsl_cpu_state_save(void *save_page, enum core_save_type type);
+extern int fsl_cpu_state_restore(void *restore_page, enum core_save_type type);
+
+#endif
+
+#endif
+
diff --git a/arch/powerpc/platforms/85xx/Makefile 
b/arch/powerpc/platforms/85xx/Makefile
index 25cebe7..650a01c 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -4,6 +4,7 @@
 obj-$(CONFIG_SMP) += smp.o
 
 obj-y += common.o
+obj-y += save-core.o
 
 obj-$(CONFIG_BSC9131_RDB) += bsc913x_rdb.o
 obj-$(CONFIG_C293_PCIE)   += c293pcie.o
diff --git a/arch/powerpc/platforms/85xx/save-core.S 
b/arch/powerpc/platforms/85xx/save-core.S
new file mode 100644
index 000..a6b93b8
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/save-core.S
@@ -0,0 +1,497 @@
+/*
+ * Freescale Power Management, Save/Restore core state
+ *
+ * Copyright 2014 Freescale Semiconductor, Inc.
+ * Author: Wang Dongsheng 
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+

[PATCH 1/3] powerpc/fsl: add E500MC and E5500 PVR define

2014-01-14 Thread Dongsheng Wang
From: Wang Dongsheng 

E500MC and E5500 PVR will be used in subsequent save/restore core
state patches.

Signed-off-by: Wang Dongsheng 

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 62b114e..cd7b630 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1075,6 +1075,8 @@
 #define PVR_8560   0x8020
 #define PVR_VER_E500V1 0x8020
 #define PVR_VER_E500V2 0x8021
+#define PVR_VER_E500MC 0x8023
+#define PVR_VER_E5500  0x8024
 #define PVR_VER_E6500  0x8040
 
 /*
-- 
1.8.5


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] cpuidle/menu: Fail cpuidle_idle_call() if no idle state is acceptable

2014-01-14 Thread Deepthi Dharwar
On 01/14/2014 12:30 PM, Srivatsa S. Bhat wrote:
> On 01/14/2014 11:35 AM, Preeti U Murthy wrote:
>> On PowerPC, in a particular test scenario, all the cpu idle states were 
>> disabled.
>> Inspite of this it was observed that the idle state count of the shallowest
>> idle state, snooze, was increasing.
>>
>> This is because the governor returns the idle state index as 0 even in
>> scenarios when no idle state can be chosen. These scenarios could be when the
>> latency requirement is 0 or as mentioned above when the user wants to disable
>> certain cpu idle states at runtime. In the latter case, its possible that no
>> cpu idle state is valid because the suitable states were disabled
>> and the rest did not match the menu governor criteria to be chosen as the
>> next idle state.
>>
>> This patch adds the code to indicate that a valid cpu idle state could not be
>> chosen by the menu governor and reports back to arch so that it can take some
>> default action.
>>
> 
> That sounds fair enough. However, the "default" action of pseries idle loop
> (pseries_lpar_idle()) surprises me. It enters Cede, which is _deeper_ than 
> doing
> a snooze! IOW, a user might "disable" cpuidle or set the 
> PM_QOS_CPU_DMA_LATENCY
> to 0 hoping to prevent the CPUs from going to deep idle states, but then the
> machine would still end up going to Cede, even though that wont get reflected
> in the idle state counts. IMHO that scenario needs some thought as well...

It was the snooze loop earlier but later we changed it to cede in commit
363edbe2614 powerpc: Default arch idle will cede the processor on
pseries to address the following regressions:

>>snippet from the patch.
When adding cpuidle support to pSeries, we introduced two
regressions:

  - The new cpuidle backend driver only works under hypervisors
supporting the "SLPLAR" option, which isn't the case of the
old POWER4 hypervisor and the HV "light" used on js2x blades

  - The cpuidle driver registers fairly late, meaning that for
a significant portion of the boot process, we end up having
all threads spinning. This slows down the boot process and
increases the overall resource usage if the hypervisor has
shared processors.

This fixes both by implementing a "default" idle that will cede
to the hypervisor when possible, in a very simple way without
all the bells and whisles of cpuidle.

Regards,
Deepthi


>> Signed-off-by: Preeti U Murthy 
>> ---
>>
>>  drivers/cpuidle/cpuidle.c|6 +-
>>  drivers/cpuidle/governors/menu.c |7 ---
>>  2 files changed, 9 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
>> index a55e68f..5bf06bb 100644
>> --- a/drivers/cpuidle/cpuidle.c
>> +++ b/drivers/cpuidle/cpuidle.c
>> @@ -131,8 +131,9 @@ int cpuidle_idle_call(void)
>>
>>  /* ask the governor for the next state */
>>  next_state = cpuidle_curr_governor->select(drv, dev);
>> +
>> +dev->last_residency = 0;
>>  if (need_resched()) {
>> -dev->last_residency = 0;
>>  /* give the governor an opportunity to reflect on the outcome */
>>  if (cpuidle_curr_governor->reflect)
>>  cpuidle_curr_governor->reflect(dev, next_state);
> 
> The comments on top of the .reflect() routines of the governors say that the
> second parameter is the index of the actual state entered. But after this 
> patch,
> next_state can be negative, indicating an invalid index. So those comments 
> need
> to be updated accordingly.
> 
>> @@ -140,6 +141,9 @@ int cpuidle_idle_call(void)
>>  return 0;
>>  }
>>
>> +if (next_state < 0)
>> +return -EINVAL;
> 
> The exit path above (due to need_resched) returns with irqs enabled, but the 
> new
> one you are adding (next_state < 0) returns with irqs disabled. This is 
> correct,
> because in the latter case, "idle" is still in progress and the arch will 
> choose
> a default handler to execute (unlike the former case where "idle" is over and
> hence its time to enable interrupts).
> 
> IMHO it would be good to add comments around this code to explain this subtle
> difference. We can never be too careful with these things... ;-)
> 
>> +
>>  trace_cpu_idle_rcuidle(next_state, dev->cpu);
>>
>>  broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP);
>> diff --git a/drivers/cpuidle/governors/menu.c 
>> b/drivers/cpuidle/governors/menu.c
>> index cf7f2f0..6921543 100644
>> --- a/drivers/cpuidle/governors/menu.c
>> +++ b/drivers/cpuidle/governors/menu.c
>> @@ -283,6 +283,7 @@ again:
>>   * menu_select - selects the next idle state to enter
>>   * @drv: cpuidle driver containing state data
>>   * @dev: the CPU
>> + * Returns -1 when no idle state is suitable
>>   */
>>  static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device 
>> *dev)
>>  {
>> @@ -292,17 +293,17 @@ static int menu_select(struct cpuidle_