date:20200106

[PATCH v3] powerpc/kernel/sysfs: Add new config option PMU_SYSFS to enable PMU SPRs sysfs file creation

2020-01-06 Thread Kajol Jain

Many of the performance moniroting unit (PMU) SPRs are
exposed in the sysfs. This may not be a desirable since
"perf" API is the primary interface to program PMU and
collect counter data in the system. But that said, we
cant remove these sysfs files since we dont whether
anyone/anything is using them.

So the patch adds a new CONFIG option 'CONFIG_PMU_SYSFS'
(user selectable) to be used in sysfs file creation for
PMU SPRs. New option by default is disabled, but can be
enabled if user needs it.

Tested this patch behaviour in powernv and pseries machines.
Also did compilation testing for different architecture include:
x86, mips, mips64, alpha, arm. Patch is also compile tested for
pmac32_defconfig.

Signed-off-by: Kajol Jain 
---
 arch/powerpc/kernel/sysfs.c| 22 +-
 arch/powerpc/platforms/Kconfig.cputype |  6 ++
 2 files changed, 19 insertions(+), 9 deletions(-)

---
Changelog:
v2 -> v3
- Make 'PMU_SYSFS' config option a user selectable option
  and remove its dependency on 'PERF_EVENTS' option.
- Add PMU_SYSFS config check at time of register/unregister
  PMU SPRs.
- Replace #ifdefs with IS_ENABLE while registering/unregistering
  PMU SPRs.

Resend v2
Added 'Reviewed-by' and 'Tested-by' tag along with test scenarios.

v1 -> v2
- Added new config option 'PMU_SYSFS' for PMU SPR's creation
  rather than using PERF_EVENTS config option directly and make
  sure SPR's file creation only if 'CONFIG_PERF_EVENTS' disabled.
---
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 80a676da11cb..d4faa60f1d27 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -606,12 +606,14 @@ static void sysfs_create_dscr_default(void)
 #endif /* CONFIG_PPC64 */
 
 #ifdef HAS_PPC_PMC_PA6T
+#ifdef CONFIG_PMU_SYSFS
 SYSFS_PMCSETUP(pa6t_pmc0, SPRN_PA6T_PMC0);
 SYSFS_PMCSETUP(pa6t_pmc1, SPRN_PA6T_PMC1);
 SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2);
 SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);
 SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);
 SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);
+#endif /* CONFIG_PMU_SYSFS */
 #ifdef CONFIG_DEBUG_MISC
 SYSFS_SPRSETUP(hid0, SPRN_HID0);
 SYSFS_SPRSETUP(hid1, SPRN_HID1);
@@ -645,21 +647,21 @@ SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3);
 #endif /* HAS_PPC_PMC_PA6T */
 
 #ifdef HAS_PPC_PMC_IBM
-static struct device_attribute ibm_common_attrs[] = {
+static  __maybe_unused struct device_attribute ibm_common_attrs[] = {
__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
 };
 #endif /* HAS_PPC_PMC_G4 */
 
 #ifdef HAS_PPC_PMC_G4
-static struct device_attribute g4_common_attrs[] = {
+static  __maybe_unused struct device_attribute g4_common_attrs[] = {
__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
__ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2),
 };
 #endif /* HAS_PPC_PMC_G4 */
 
-static struct device_attribute classic_pmc_attrs[] = {
+static  __maybe_unused struct device_attribute classic_pmc_attrs[] = {
__ATTR(pmc1, 0600, show_pmc1, store_pmc1),
__ATTR(pmc2, 0600, show_pmc2, store_pmc2),
__ATTR(pmc3, 0600, show_pmc3, store_pmc3),
@@ -674,6 +676,7 @@ static struct device_attribute classic_pmc_attrs[] = {
 
 #ifdef HAS_PPC_PMC_PA6T
 static struct device_attribute pa6t_attrs[] = {
+#ifdef CONFIG_PMU_SYSFS
__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
__ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0),
@@ -682,6 +685,7 @@ static struct device_attribute pa6t_attrs[] = {
__ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
__ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
__ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
+#endif /* CONFIG_PMU_SYSFS */
 #ifdef CONFIG_DEBUG_MISC
__ATTR(hid0, 0600, show_hid0, store_hid0),
__ATTR(hid1, 0600, show_hid1, store_hid1),
@@ -751,13 +755,12 @@ static int register_cpu_online(unsigned int cpu)
 
/* PMC stuff */
switch (cur_cpu_spec->pmc_type) {
-#ifdef HAS_PPC_PMC_IBM
+#ifdef CONFIG_PMU_SYSFS
case PPC_PMC_IBM:
attrs = ibm_common_attrs;
nattrs = sizeof(ibm_common_attrs) / sizeof(struct 
device_attribute);
pmc_attrs = classic_pmc_attrs;
break;
-#endif /* HAS_PPC_PMC_IBM */
 #ifdef HAS_PPC_PMC_G4
case PPC_PMC_G4:
attrs = g4_common_attrs;
@@ -765,6 +768,7 @@ static int register_cpu_online(unsigned int cpu)
pmc_attrs = classic_pmc_attrs;
break;
 #endif /* HAS_PPC_PMC_G4 */
+#endif /* CONFIG_PMU_SYSFS */
 #ifdef HAS_PPC_PMC_PA6T
case PPC_PMC_PA6T:
/* PA Semi starts counting at PMC0 */
@@ -787,7 +791,7 @@ static int register_cpu_online(unsigned int cpu)
device_create_file(s, &pmc_attrs[i]);
 
 #ifdef CONFIG_PPC64
-   if (cpu_has_feature(CPU_FTR_MMCRA))

Re: [PATCH v11 00/25] mm/gup: track dma-pinned pages: FOLL_PIN

2020-01-06 Thread Jan Kara

On Sat 28-12-19 20:33:32, John Hubbard wrote:
> On 12/27/19 1:56 PM, John Hubbard wrote:
> ...
> >> It is ancient verification test (~10y) which is not an easy task to
> >> make it understandable and standalone :).
> >>
> > 
> > Is this the only test that fails, btw? No other test failures or hints of
> > problems?
> > 
> > (Also, maybe hopeless, but can *anyone* on the RDMA list provide some
> > characterization of the test, such as how many pins per page, what page
> > sizes are used? I'm still hoping to write a test to trigger something
> > close to this...)
> > 
> > I do have a couple more ideas for test runs:
> > 
> > 1. Reduce GUP_PIN_COUNTING_BIAS to 1. That would turn the whole override of
> > page->_refcount into a no-op, and so if all is well (it may not be!) with 
> > the
> > rest of the patch, then we'd expect this problem to not reappear.
> > 
> > 2. Active /proc/vmstat *foll_pin* statistics unconditionally (just for these
> > tests, of course), so we can see if there is a get/put mismatch. However, 
> > that
> > will change the timing, and so it must be attempted independently of (1), in
> > order to see if it ends up hiding the repro.
> > 
> > I've updated this branch to implement (1), but not (2), hoping you can give
> > this one a spin?
> > 
> >     g...@github.com:johnhubbard/linux.git  
> > pin_user_pages_tracking_v11_with_diags
> > 
> > 
> 
> Also, looking ahead:
> 
> a) if the problem disappears with the latest above test, then we likely have
>a huge page refcount overflow, and there are a couple of different ways to
>fix it. 
> 
> b) if it still reproduces with the above, then it's some other random mistake,
>and in that case I'd be inclined to do a sort of guided (or classic, 
> unguided)
>git bisect of the series. Because it could be any of several patches.
> 
>If that's too much trouble, then I'd have to fall back to submitting a few
>patches at a time and working my way up to the tracking patch...

It could also be that an ordinary page reference is dropped with 'unpin'
thus underflowing the page refcount...

Honza

-- 
Jan Kara 
SUSE Labs, CR

Re: [RFC 0/3] Integrate Support for self-save and determine

2020-01-06 Thread Pratik Sampat


Thanks for your comments Ram,

A list of preferred SPRs are maintained in the kernel which contains two
properties:
1. supported_mode: Helps in identifying if it strictly supports self
save or restore or both.

Will be good to capture the information that, 'supported_mode' gets
initialized using the information from the device tree.


2. preferred_mode: Calls out what mode is preferred for each SPR. It
could be strictly self save or restore, or it can also
determine the preference of  mode over the other if both
are present by encapsulating the other in bitmask from
LSB to MSB.

and 'preferred_mode'  is statically initialized.


Sure thing, I'll mention that.


Below is a table to show the Scenario::Consequence when the self save and
self restore modes are available or disabled in different combinations as
perceived from the device tree thus giving complete backwards compatibly
regardless of an older firmware running a newer kernel or vise-versa.

SR = Self restore; SS = Self save

.---..
| Scenario  |Consequence |
:---+:
| Legacy Firmware. No SS or SR node | Self restore is called for all |
|   | supported SPRs |
:---+:
| SR: !active SS: !active   | Deep stop states disabled  |
:---+:
| SR: active SS: !active| Self restore is called for all |
|   | supported SPRs |
:---+:
| SR: active SS: active | Goes through the preferences for each  |
|   | SPR and executes of the modes  |
|   | accordingly. Currently, Self restore is|
|   | called for all the SPRs except PSSCR   |
|   | which is self saved|
:---+:
| SR: active(only HID0) SS: active  | Self save called for all supported |
|   | registers expect HID0 (as HID0 cannot  |
|   | be self saved currently)   |

Not clear, how this will be conveyed to the hypervisor? Through the
device tree or through some other means?


This information will be conveyed through the device tree. I'll frame a sentence
that makes this more explicit.


:---+:
| SR: !active SS: active| currently will disable deep states as  |
|   | HID0 is needed to be self restored and |
|   | cannot be self saved   |
'---''

Pratik Rajesh Sampat (3):
   powerpc/powernv: Interface to define support and preference for a SPR
   powerpc/powernv: Introduce Self save support
   powerpc/powernv: Parse device tree, population of SPR support

  arch/powerpc/include/asm/opal-api.h|   3 +-
  arch/powerpc/include/asm/opal.h|   1 +
  arch/powerpc/platforms/powernv/idle.c  | 431 ++---
  arch/powerpc/platforms/powernv/opal-call.c |   1 +
  4 files changed, 379 insertions(+), 57 deletions(-)

--
2.21.0

Re: [RFC 1/3] powerpc/powernv: Interface to define support and preference for a SPR

2020-01-06 Thread Pratik Sampat


Hello Ram,

Thank you for your reviewing the patches.


+/* Interface for the stop state supported and preference */
+#define SELF_RESTORE_TYPE0
+#define SELF_SAVE_TYPE   1
+
+#define NR_PREFERENCES2
+#define PREFERENCE_SHIFT  8
+#define PREFERENCE_MASK   0xff
+
+#define UNSUPPORTED 0x0
+#define SELF_RESTORE_STRICT 0x01
+#define SELF_SAVE_STRICT0x10
+
+/*
+ * Bitmask defining the kind of preferences available.
+ * Note : The higher to lower preference is from LSB to MSB, with a shift of
+ * 8 bits.

A minor comment.

Is there a reason why shift is 8?  Shift of 4 must be sufficient,
and a mask of '0xf' should do. And SELF_SAVE_STRICT can be 0x2.



Yes, you're right! We could do away with using fewer bits here.


+/* Caching the lpcr & ptcr support to use later */
+static bool is_lpcr_self_save;
+static bool is_ptcr_self_save;

I understand why you need to track the status of PTCR register.
But its not clear, why LPCR register's save status need to be tracked?


Normally it does not but LPCR was previously unsupported in self-restore
and the kernel saved and restored its value in context. Now that we have
support for saving LPCR automatically I believe we leverage it and
make sure the kernel does not do redundant work.


+
+struct preferred_sprs {
+   u64 spr;
+   u32 preferred_mode;
+   u32 supported_mode;
+};
+
+struct preferred_sprs preferred_sprs[] = {
+   {
+   .spr = SPRN_HSPRG0,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_LPCR,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_PTCR,
+   .preferred_mode = PREFER_SAVE_RESTORE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_HMEER,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_HID0,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = P9_STOP_SPR_MSR,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = P9_STOP_SPR_PSSCR,
+   .preferred_mode = PREFER_SAVE_RESTORE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_HID1,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_HID4,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_HID5,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   }
+};

What determines the list of registers tracked in this table?


.snip..


This list is of the SPRs of all the registers that the kernel is interested in
at wakeup. It has been refactored out as a list from what the kernel used
previously in the kernel.

Re: [PATCH v2 2/2] powerpc/mm: Warn if W+X pages found on boot

2020-01-06 Thread Christophe Leroy





Le 02/05/2019 à 09:39, Russell Currey a écrit :

Implement code to walk all pages and warn if any are found to be both
writable and executable.  Depends on STRICT_KERNEL_RWX enabled, and is
behind the DEBUG_WX config option.

This only runs on boot and has no runtime performance implications.

Very heavily influenced (and in some cases copied verbatim) from the
ARM64 code written by Laura Abbott (thanks!), since our ptdump
infrastructure is similar.

Signed-off-by: Russell Currey 
---
v2: A myriad of fixes and cleanups thanks to Christophe Leroy

  arch/powerpc/Kconfig.debug | 19 ++
  arch/powerpc/include/asm/pgtable.h |  6 +
  arch/powerpc/mm/pgtable_32.c   |  3 +++
  arch/powerpc/mm/pgtable_64.c   |  3 +++
  arch/powerpc/mm/ptdump/ptdump.c| 41 +-
  5 files changed, 71 insertions(+), 1 deletion(-)



[...]


diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index a4a132f92810..e69b53a8a841 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -31,7 +31,7 @@
  #include "ptdump.h"
  
  #ifdef CONFIG_PPC32

-#define KERN_VIRT_START0
+#define KERN_VIRT_STARTPAGE_OFFSET
  #endif
  
  /*

@@ -68,6 +68,8 @@ struct pg_state {
unsigned long last_pa;
unsigned int level;
u64 current_flags;
+   bool check_wx;
+   unsigned long wx_pages;
  };
  
  struct addr_marker {

@@ -177,6 +179,20 @@ static void dump_addr(struct pg_state *st, unsigned long 
addr)
  
  }
  
+static void note_prot_wx(struct pg_state *st, unsigned long addr)

+{
+   if (!st->check_wx)
+   return;
+
+   if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == 
pgprot_val(PAGE_KERNEL_X)))
+   return;
+


I just realised that the above test is insuffisient, allthought it works 
by chance.


If I understand correctly, you want to make sure that no page is set 
with PAGE_KERNEL_X, ie that all X pages are PAGE_KERNEL_ROX


If you take the exemple of the 8xx, we have:

#define PAGE_KERNEL_X   __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)

#define _PAGE_KERNEL_RWX(_PAGE_SH | _PAGE_DIRTY | _PAGE_EXEC)
#define _PAGE_KERNEL_ROX(_PAGE_SH | _PAGE_RO | _PAGE_EXEC)

Your test is checking which bits are set, but doesn't test which bits 
are not set. So your test only relies on the fact that _PAGE_DIRTY is 
set when the page is RW. It looks rather fragile as for some reason, a 
page might be RW without being DIRTY yet.


I think the test should be more robust, something like:

pte_t pte = __pte(st->current_flags);

if (!pte_exec(pte) || !pte_write(pte))
return;

Christophe

[PATCH v2 0/3] Introduce Self-Save API for deep stop states

2020-01-06 Thread Pratik Rajesh Sampat

RFC v1 patches: https://lkml.org/lkml/2019/12/4/193
Changelog
RFC v1 --> v2
1. Optimized preference bitmask
2. Addressed comments from Ram Pai

Currently the stop-API supports a mechanism called as self-restore
which allows us to restore the values of certain SPRs on wakeup from a
deep-stop state to a desired value. To use this, the Kernel makes an
OPAL call passing the PIR of the CPU, the SPR number and the value to
which the SPR should be restored when that CPU wakes up from a deep
stop state.

Recently, a new feature, named self-save has been enabled in the
stop-api, which is an alternative mechanism to do the same, except
that self-save will save the current content of the SPR before
entering a deep stop state and also restore the content back on
waking up from a deep stop state.

This patch series aims at introducing and leveraging the self-save feature in
the kernel.

Now, as the kernel has a choice to prefer one mode over the other and
there can be registers in both the save/restore SPR list which are sent
from the device tree, a new interface has been defined for the seamless
handing of the modes for each SPR.

A list of preferred SPRs are maintained in the kernel which contains two
properties:
1. supported_mode: Helps in identifying if it strictly supports self
   save or restore or both.
   Initialized using the information from device tree.
2. preferred_mode: Calls out what mode is preferred for each SPR. It
   could be strictly self save or restore, or it can also
   determine the preference of  mode over the other if both
   are present by encapsulating the other in bitmask from
   LSB to MSB.
   Initialized statically.

Below is a table to show the Scenario::Consequence when the self save and
self restore modes are available or disabled in different combinations as
perceived from the device tree thus giving complete backwards compatibly
regardless of an older firmware running a newer kernel or vise-versa.
Support for self save or self-restore is embedded in the device tree,
along with the set of registers it supports.

SR = Self restore; SS = Self save

.---..
| Scenario  |Consequence |
:---+:
| Legacy Firmware. No SS or SR node | Self restore is called for all |
|   | supported SPRs |
:---+:
| SR: !active SS: !active   | Deep stop states disabled  |
:---+:
| SR: active SS: !active| Self restore is called for all |
|   | supported SPRs |
:---+:
| SR: active SS: active | Goes through the preferences for each  |
|   | SPR and executes of the modes  |
|   | accordingly. Currently, Self restore is|
|   | called for all the SPRs except PSSCR   |
|   | which is self saved|
:---+:
| SR: active(only HID0) SS: active  | Self save called for all supported |
|   | registers expect HID0 (as HID0 cannot  |
|   | be self saved currently)   |
:---+:
| SR: !active SS: active| currently will disable deep states as  |
|   | HID0 is needed to be self restored and |
|   | cannot be self saved   |
'---''

Pratik Rajesh Sampat (3):
  powerpc/powernv: Interface to define support and preference for a SPR
  powerpc/powernv: Introduce Self save support
  powerpc/powernv: Parse device tree, population of SPR support

 arch/powerpc/include/asm/opal-api.h|   3 +-
 arch/powerpc/include/asm/opal.h|   1 +
 arch/powerpc/platforms/powernv/idle.c  | 433 ++---
 arch/powerpc/platforms/powernv/opal-call.c |   1 +
 4 files changed, 381 insertions(+), 57 deletions(-)

-- 
2.24.1

[PATCH v2 1/3] powerpc/powernv: Interface to define support and preference for a SPR

2020-01-06 Thread Pratik Rajesh Sampat

Define a bitmask interface to determine support for the Self Restore,
Self Save or both.

Also define an interface to determine the preference of that SPR to
be strictly saved or restored or encapsulated with an order of preference.

The preference bitmask is shown as below:

|... | 2nd pref | 1st pref |

MSB   LSB

The preference from higher to lower is from LSB to MSB with a shift of 8
bits.
Example:
Prefer self save first, if not available then prefer self
restore
The preference mask for this scenario will be seen as below.
((SELF_RESTORE_STRICT << PREFERENCE_SHIFT) | SELF_SAVE_STRICT)
-
|... | Self restore | Self save |
-
MSB LSB

Finally, declare a list of preferred SPRs which encapsulate the bitmaks
for preferred and supported with defaults of both being set to support
legacy firmware.

This commit also implements using the above interface and retains the
legacy functionality of self restore.

Signed-off-by: Pratik Rajesh Sampat 
---
 arch/powerpc/platforms/powernv/idle.c | 327 +-
 1 file changed, 271 insertions(+), 56 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/idle.c 
b/arch/powerpc/platforms/powernv/idle.c
index 78599bca66c2..2f328403b0dc 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -32,9 +32,106 @@
 #define P9_STOP_SPR_MSR 2000
 #define P9_STOP_SPR_PSSCR  855
 
+/* Interface for the stop state supported and preference */
+#define SELF_RESTORE_TYPE0
+#define SELF_SAVE_TYPE   1
+
+#define NR_PREFERENCES2
+#define PREFERENCE_SHIFT  4
+#define PREFERENCE_MASK   0xf
+
+#define UNSUPPORTED 0x0
+#define SELF_RESTORE_STRICT 0x1
+#define SELF_SAVE_STRICT0x2
+
+/*
+ * Bitmask defining the kind of preferences available.
+ * Note : The higher to lower preference is from LSB to MSB, with a shift of
+ * 4 bits.
+ * 
+ * || 2nd pref | 1st pref |
+ * 
+ * MSB   LSB
+ */
+/* Prefer Restore if available, otherwise unsupported */
+#define PREFER_SELF_RESTORE_ONLY   SELF_RESTORE_STRICT
+/* Prefer Save if available, otherwise unsupported */
+#define PREFER_SELF_SAVE_ONLY  SELF_SAVE_STRICT
+/* Prefer Restore when available, otherwise prefer Save */
+#define PREFER_RESTORE_SAVE((SELF_SAVE_STRICT << \
+ PREFERENCE_SHIFT)\
+ | SELF_RESTORE_STRICT)
+/* Prefer Save when available, otherwise prefer Restore*/
+#define PREFER_SAVE_RESTORE((SELF_RESTORE_STRICT <<\
+ PREFERENCE_SHIFT)\
+ | SELF_SAVE_STRICT)
 static u32 supported_cpuidle_states;
 struct pnv_idle_states_t *pnv_idle_states;
 int nr_pnv_idle_states;
+/* Caching the lpcr & ptcr support to use later */
+static bool is_lpcr_self_save;
+static bool is_ptcr_self_save;
+
+struct preferred_sprs {
+   u64 spr;
+   u32 preferred_mode;
+   u32 supported_mode;
+};
+
+struct preferred_sprs preferred_sprs[] = {
+   {
+   .spr = SPRN_HSPRG0,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_LPCR,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_PTCR,
+   .preferred_mode = PREFER_SAVE_RESTORE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_HMEER,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_HID0,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = P9_STOP_SPR_MSR,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = P9_STOP_SPR_PSSCR,
+   .preferred_mode = PREFER_SAVE_RESTORE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_HID1,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_HID4,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   },
+   {
+   .spr = SPRN_HID5,
+   .preferred_mode = PREFER_RESTORE_SAVE,
+   .supported_mode = SELF_RESTORE_STRICT,
+   }
+};
+
+const int nr_preferred_sprs = ARRAY_SIZE(preferred_sprs);
 
 /*

[PATCH v2 2/3] powerpc/powernv: Introduce Self save support

2020-01-06 Thread Pratik Rajesh Sampat

This commit introduces and leverages the Self save API which OPAL now
supports.

Add the new Self Save OPAL API call in the list of OPAL calls.
Implement the self saving of the SPRs based on the support populated
while respecting it's preferences.

This implementation allows mixing of support for the SPRs, which
means that a SPR can be self restored while another SPR be self saved if
they support and prefer it to be so.

Signed-off-by: Pratik Rajesh Sampat 
---
 arch/powerpc/include/asm/opal-api.h| 3 ++-
 arch/powerpc/include/asm/opal.h| 1 +
 arch/powerpc/platforms/powernv/idle.c  | 2 ++
 arch/powerpc/platforms/powernv/opal-call.c | 1 +
 4 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index c1f25a760eb1..89b7c44124e6 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -214,7 +214,8 @@
 #define OPAL_SECVAR_GET176
 #define OPAL_SECVAR_GET_NEXT   177
 #define OPAL_SECVAR_ENQUEUE_UPDATE 178
-#define OPAL_LAST  178
+#define OPAL_SLW_SELF_SAVE_REG 179
+#define OPAL_LAST  179
 
 #define QUIESCE_HOLD   1 /* Spin all calls at entry */
 #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 9986ac34b8e2..389a85b63805 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -203,6 +203,7 @@ int64_t opal_handle_hmi(void);
 int64_t opal_handle_hmi2(__be64 *out_flags);
 int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
 int64_t opal_unregister_dump_region(uint32_t id);
+int64_t opal_slw_self_save_reg(uint64_t cpu_pir, uint64_t sprn);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
diff --git a/arch/powerpc/platforms/powernv/idle.c 
b/arch/powerpc/platforms/powernv/idle.c
index 2f328403b0dc..d67d4d0b169b 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -1172,6 +1172,8 @@ void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 
lpcr_val)
if (!is_lpcr_self_save)
opal_slw_set_reg(pir, SPRN_LPCR,
 lpcr_val);
+   else
+   opal_slw_self_save_reg(pir, SPRN_LPCR);
}
 }
 
diff --git a/arch/powerpc/platforms/powernv/opal-call.c 
b/arch/powerpc/platforms/powernv/opal-call.c
index 5cd0f52d258f..11e0ceb90de0 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -223,6 +223,7 @@ OPAL_CALL(opal_handle_hmi,  
OPAL_HANDLE_HMI);
 OPAL_CALL(opal_handle_hmi2,OPAL_HANDLE_HMI2);
 OPAL_CALL(opal_config_cpu_idle_state,  OPAL_CONFIG_CPU_IDLE_STATE);
 OPAL_CALL(opal_slw_set_reg,OPAL_SLW_SET_REG);
+OPAL_CALL(opal_slw_self_save_reg,  OPAL_SLW_SELF_SAVE_REG);
 OPAL_CALL(opal_register_dump_region,   OPAL_REGISTER_DUMP_REGION);
 OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
 OPAL_CALL(opal_pci_set_phb_cxl_mode,   OPAL_PCI_SET_PHB_CAPI_MODE);
-- 
2.24.1

[PATCH v2 3/3] powerpc/powernv: Parse device tree, population of SPR support

2020-01-06 Thread Pratik Rajesh Sampat

Parse the device tree for nodes self-save, self-restore and populate
support for the preferred SPRs based what was advertised by the device
tree.

Signed-off-by: Pratik Rajesh Sampat 
---
 arch/powerpc/platforms/powernv/idle.c | 104 ++
 1 file changed, 104 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/idle.c 
b/arch/powerpc/platforms/powernv/idle.c
index d67d4d0b169b..e910ff40b7e6 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -1429,6 +1429,107 @@ static void __init pnv_probe_idle_states(void)
supported_cpuidle_states |= pnv_idle_states[i].flags;
 }
 
+/*
+ * Extracts and populates the self save or restore capabilities
+ * passed from the device tree node
+ */
+static int extract_save_restore_state_dt(struct device_node *np, int type)
+{
+   int nr_sprns = 0, i, bitmask_index;
+   int rc = 0;
+   u64 *temp_u64;
+   const char *state_prop;
+   u64 bit_pos;
+
+   state_prop = of_get_property(np, "status", NULL);
+   if (!state_prop) {
+   pr_warn("opal: failed to find the active value for self 
save/restore node");
+   return -EINVAL;
+   }
+   if (strncmp(state_prop, "disabled", 8) == 0) {
+   /*
+* if the feature is not active, strip the preferred_sprs from
+* that capability.
+*/
+   if (type == SELF_RESTORE_TYPE) {
+   for (i = 0; i < nr_preferred_sprs; i++) {
+   preferred_sprs[i].supported_mode &=
+   ~SELF_RESTORE_STRICT;
+   }
+   } else {
+   for (i = 0; i < nr_preferred_sprs; i++) {
+   preferred_sprs[i].supported_mode &=
+   ~SELF_SAVE_STRICT;
+   }
+   }
+   return 0;
+   }
+   nr_sprns = of_property_count_u64_elems(np, "sprn-bitmask");
+   if (nr_sprns <= 0)
+   return rc;
+   temp_u64 = kcalloc(nr_sprns, sizeof(u64), GFP_KERNEL);
+   if (of_property_read_u64_array(np, "sprn-bitmask",
+  temp_u64, nr_sprns)) {
+   pr_warn("cpuidle-powernv: failed to find registers in DT\n");
+   kfree(temp_u64);
+   return -EINVAL;
+   }
+   /*
+* Populate acknowledgment of support for the sprs in the global vector
+* gotten by the registers supplied by the firmware.
+* The registers are in a bitmask, bit index within
+* that specifies the SPR
+*/
+   for (i = 0; i < nr_preferred_sprs; i++) {
+   bitmask_index = preferred_sprs[i].spr / 64;
+   bit_pos = preferred_sprs[i].spr % 64;
+   if ((temp_u64[bitmask_index] & (1UL << bit_pos)) == 0) {
+   if (type == SELF_RESTORE_TYPE)
+   preferred_sprs[i].supported_mode &=
+   ~SELF_RESTORE_STRICT;
+   else
+   preferred_sprs[i].supported_mode &=
+   ~SELF_SAVE_STRICT;
+   continue;
+   }
+   if (type == SELF_RESTORE_TYPE) {
+   preferred_sprs[i].supported_mode |=
+   SELF_RESTORE_STRICT;
+   } else {
+   preferred_sprs[i].supported_mode |=
+   SELF_SAVE_STRICT;
+   }
+   }
+
+   kfree(temp_u64);
+   return rc;
+}
+
+static int pnv_parse_deepstate_dt(void)
+{
+   struct device_node *np, *np1;
+   int rc = 0;
+
+   /* Self restore register population */
+   np = of_find_node_by_path("/ibm,opal/power-mgt/self-restore");
+   if (!np) {
+   pr_warn("opal: self restore Node not found");
+   } else {
+   rc = extract_save_restore_state_dt(np, SELF_RESTORE_TYPE);
+   if (rc != 0)
+   return rc;
+   }
+   /* Self save register population */
+   np1 = of_find_node_by_path("/ibm,opal/power-mgt/self-save");
+   if (!np1) {
+   pr_warn("opal: self save Node not found");
+   pr_warn("Legacy firmware. Assuming default self-restore 
support");
+   } else {
+   rc = extract_save_restore_state_dt(np1, SELF_SAVE_TYPE);
+   }
+   return rc;
+}
+
 /*
  * This function parses device-tree and populates all the information
  * into pnv_idle_states structure. It also sets up nr_pnv_idle_states
@@ -1577,6 +1678,9 @@ static int __init pnv_init_idle_states(void)
return rc;
pnv_probe_idle_states();
 
+   rc = pnv_parse_deepstate_dt();
+   if (rc)
+   return rc;
if (!cpu_has_feature(CPU_FTR_ARCH_300)) {

Re: PPC64: G5 & 4k/64k page size (was: Re: Call for report - G5/PPC970 status)

2020-01-06 Thread Aneesh Kumar K.V

Romain Dolbeau  writes:

> Le sam. 21 déc. 2019 à 05:31, Aneesh Kumar K.V
>  a écrit :
>> I don't have direct access to this system, I have asked if we can get a run
>> with 64K.
>
> OK, thanks! Do you know which model it is? It seems to be working on
> some systems,
> but we don't have enough samples to figure out why at this time, I think.
>
>> Meanwhile is there a way to find out what caused MachineCheck? more
>> details on this? I was checking the manual and I don't see any
>> restrictions w.r.t effective address. We now have very high EA with 64K
>> page size.
>
> Sorry, no idea, completely out of my depth here. I can try some kernel
> (build, runtime) options and/or patch, but someone will have to tell
> me what to try,
> as I have no ideas.


Can you try this change.

modified   arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -580,7 +580,7 @@ extern void slb_set_size(u16 size);
 #if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
 #define MAX_KERNEL_CTX_CNT (1UL << (MAX_PHYSMEM_BITS - 
MAX_EA_BITS_PER_CONTEXT))
 #else
-#define MAX_KERNEL_CTX_CNT 1
+#define MAX_KERNEL_CTX_CNT 4
 #endif
 
 #define MAX_VMALLOC_CTX_CNT1


-aneesh

Re: [PATCH 2/2] powerpc: avoid adjusting memory_limit for capture kernel memory reservation

2020-01-06 Thread Michal Suchánek

On Wed, Jul 24, 2019 at 11:26:59AM +0530, Mahesh Jagannath Salgaonkar wrote:
> On 7/22/19 11:19 PM, Michal Suchánek wrote:
> > On Fri, 28 Jun 2019 00:51:19 +0530
> > Hari Bathini  wrote:
> > 
> >> Currently, if memory_limit is specified and it overlaps with memory to
> >> be reserved for capture kernel, memory_limit is adjusted to accommodate
> >> capture kernel. With memory reservation for capture kernel moved later
> >> (after enforcing memory limit), this adjustment no longer holds water.
> >> So, avoid adjusting memory_limit and error out instead.
> > 
> > Can you split out the memory limit adjustment out of memory reservation
> > so it can still be adjusted?
> 
> Do you mean adjust the memory limit before we do the actual reservation ?

Yes, without that you get a regression in ability to enable fadump with
limited memory - something like the below patch should fix it. Then
again, there is no code to un-move the memory_limit in case the allocation
fails, and we now have cma allocation which is dubious to allocate
beyond memory_limit. So maybe removing the memory_limit adjustment is a
bugfix removing 'feature' that has bitrotten over time.

Thanks

Michal

From: Michal Suchanek 
Date: Mon, 6 Jan 2020 14:55:40 +0100
Subject: [PATCH 2/2] powerpc/fadump: adjust memlimit before MMU early init

Moving the farump memory reservation before early MMU init makes the
memlimit adjustment to make room for fadump ineffective.

Move the adjustment back before early MMU init.

Signed-off-by: Michal Suchanek 
---
 arch/powerpc/include/asm/fadump.h |  3 +-
 arch/powerpc/kernel/fadump.c  | 80 +++
 arch/powerpc/kernel/prom.c|  3 ++
 3 files changed, 66 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index 526a6a647312..76d3cbe1379c 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -30,6 +30,7 @@ static inline void fadump_cleanup(void) { }
 #if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
 extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
  int depth, void *data);
-extern int fadump_reserve_mem(void);
+int fadump_adjust_memlimit(void);
+int fadump_reserve_mem(void);
 #endif
 #endif /* _ASM_POWERPC_FADUMP_H */
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8ad6d8d1cdbe..4d76452dcb3d 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -431,19 +431,22 @@ static int __init fadump_get_boot_mem_regions(void)
return ret;
 }
 
-int __init fadump_reserve_mem(void)
+static inline u64 fadump_get_reserve_alignment(void)
 {
-   u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE;
-   bool is_memblock_bottom_up = memblock_bottom_up();
-   int ret = 1;
+   u64 align = PAGE_SIZE;
 
-   if (!fw_dump.fadump_enabled)
-   return 0;
+#ifdef CONFIG_CMA
+   if (!fw_dump.nocma)
+   align = FADUMP_CMA_ALIGNMENT;
+#endif
 
-   if (!fw_dump.fadump_supported) {
-   pr_info("Firmware-Assisted Dump is not supported on this 
hardware\n");
-   goto error_out;
-   }
+   return align;
+}
+
+static inline u64 fadump_get_bootmem_min(void)
+{
+   u64 bootmem_min = 0;
+   u64 align = fadump_get_reserve_alignment();
 
/*
 * Initialize boot memory size
@@ -455,7 +458,6 @@ int __init fadump_reserve_mem(void)
PAGE_ALIGN(fadump_calculate_reserve_size());
 #ifdef CONFIG_CMA
if (!fw_dump.nocma) {
-   align = FADUMP_CMA_ALIGNMENT;
fw_dump.boot_memory_size =
ALIGN(fw_dump.boot_memory_size, align);
}
@@ -472,8 +474,43 @@ int __init fadump_reserve_mem(void)
pr_err("Too many holes in boot memory area to enable 
fadump\n");
goto error_out;
}
+
+   }
+
+   return bootmem_min;
+error_out:
+   fw_dump.fadump_enabled = 0;
+   return 0;
+}
+
+int __init fadump_adjust_memlimit(void)
+{
+   u64 size, bootmem_min;
+
+   if (!fw_dump.fadump_enabled)
+   return 0;
+
+   if (!fw_dump.fadump_supported) {
+   pr_info("Firmware-Assisted Dump is not supported on this 
hardware\n");
+   fw_dump.fadump_enabled = 0;
+   return 0;
}
 
+#ifdef CONFIG_HUGETLB_PAGE
+   if (fw_dump.dump_active) {
+   /*
+* FADump capture kernel doesn't care much about hugepages.
+* In fact, handling hugepages in capture kernel is asking for
+* trouble. So, disable HugeTLB support when fadump is active.
+*/
+   hugetlb_disabled = true;
+   }
+#endif
+
+   bootmem_min = fadump_get_bootmem_min();
+   if (!bootmem_min)
+

Re: "ftrace: Rework event_create_dir()" triggers boot error messages

2020-01-06 Thread Qian Cai




> On Dec 18, 2019, at 11:31 PM, Steven Rostedt  wrote:
> 
> On Wed, 18 Dec 2019 22:58:23 -0500
> Qian Cai  wrote:
> 
>> The linux-next commit "ftrace: Rework event_create_dir()” [1] triggers boot 
>> warnings
>> for Clang-build (Clang version 8.0.1) kernels (reproduced on both arm64 and 
>> powerpc).
>> Reverted it (with trivial conflict fixes) on the top of today’s linux-next 
>> fixed the issue.
>> 
>> configs:
>> https://raw.githubusercontent.com/cailca/linux-mm/master/arm64.config
>> https://raw.githubusercontent.com/cailca/linux-mm/master/powerpc.config
>> 
>> [1] https://lore.kernel.org/lkml/2019132458.342979...@infradead.org/
>> 
>> [  115.799327][T1] Registered efivars operations
>> [  115.849770][T1] clocksource: Switched to clocksource arch_sys_counter
>> [  115.901145][T1] Could not initialize trace point 
>> events/sys_enter_rt_sigreturn
>> [  115.908854][T1] Could not create directory for event 
>> sys_enter_rt_sigreturn
>> [  115.998949][T1] Could not initialize trace point 
>> events/sys_enter_restart_syscall
>> [  116.006802][T1] Could not create directory for event 
>> sys_enter_restart_syscall
>> [  116.062702][T1] Could not initialize trace point 
>> events/sys_enter_getpid
>> [  116.069828][T1] Could not create directory for event sys_enter_getpid
>> [  116.078058][T1] Could not initialize trace point 
>> events/sys_enter_gettid
>> [  116.085181][T1] Could not create directory for event sys_enter_gettid
>> [  116.093405][T1] Could not initialize trace point 
>> events/sys_enter_getppid
>> [  116.100612][T1] Could not create directory for event sys_enter_getppid
>> [  116.108989][T1] Could not initialize trace point 
>> events/sys_enter_getuid
>> [  116.116058][T1] Could not create directory for event sys_enter_getuid
>> [  116.124250][T1] Could not initialize trace point 
>> events/sys_enter_geteuid
>> [  116.131457][T1] Could not create directory for event sys_enter_geteuid
>> [  116.139840][T1] Could not initialize trace point 
>> events/sys_enter_getgid
>> [  116.146908][T1] Could not create directory for event sys_enter_getgid
>> [  116.155163][T1] Could not initialize trace point 
>> events/sys_enter_getegid
>> [  116.162370][T1] Could not create directory for event sys_enter_getegid
>> [  116.178015][T1] Could not initialize trace point 
>> events/sys_enter_setsid
>> [  116.185138][T1] Could not create directory for event sys_enter_setsid
>> [  116.269307][T1] Could not initialize trace point 
>> events/sys_enter_sched_yield
>> [  116.276811][T1] Could not create directory for event 
>> sys_enter_sched_yield
>> [  116.527652][T1] Could not initialize trace point 
>> events/sys_enter_munlockall
>> [  116.535126][T1] Could not create directory for event 
>> sys_enter_munlockall
>> [  116.622096][T1] Could not initialize trace point 
>> events/sys_enter_vhangup
>> [  116.629307][T1] Could not create directory for event sys_enter_vhangup
>> [  116.783867][T1] Could not initialize trace point events/sys_enter_sync
>> [  116.790819][T1] Could not create directory for event sys_enter_sync
>> [  117.723402][T1] pnp: PnP ACPI init
> 
> I noticed that all of the above have zero parameters. Does the
> following patch fix it?
> 
> (note, I prefer "ret" and "i" on different lines anyway)
> 
> -- Steve
> 
> diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
> index 53935259f701..abb70c71fe60 100644
> --- a/kernel/trace/trace_syscalls.c
> +++ b/kernel/trace/trace_syscalls.c
> @@ -269,7 +269,8 @@ static int __init syscall_enter_define_fields(struct 
> trace_event_call *call)
>   struct syscall_trace_enter trace;
>   struct syscall_metadata *meta = call->data;
>   int offset = offsetof(typeof(trace), args);
> - int ret, i;
> + int ret = 0;
> + int i;
> 
>   for (i = 0; i < meta->nb_args; i++) {
>   ret = trace_define_field(call, meta->types[i],

Steve, those errors are still there in today’s linux-next. Is this patch on the 
way to the linux-next?

Re: [PATCH v3 02/22] compat: provide compat_ptr() on all architectures

2020-01-06 Thread Will Deacon

On Thu, Jan 02, 2020 at 03:55:20PM +0100, Arnd Bergmann wrote:
> In order to avoid needless #ifdef CONFIG_COMPAT checks,
> move the compat_ptr() definition to linux/compat.h
> where it can be seen by any file regardless of the
> architecture.
> 
> Only s390 needs a special definition, this can use the
> self-#define trick we have elsewhere.
> 
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/arm64/include/asm/compat.h   | 17 -
>  arch/mips/include/asm/compat.h| 18 --
>  arch/parisc/include/asm/compat.h  | 17 -
>  arch/powerpc/include/asm/compat.h | 17 -
>  arch/powerpc/oprofile/backtrace.c |  2 +-
>  arch/s390/include/asm/compat.h|  6 +-
>  arch/sparc/include/asm/compat.h   | 17 -
>  arch/x86/include/asm/compat.h | 17 -
>  include/linux/compat.h| 18 ++
>  9 files changed, 20 insertions(+), 109 deletions(-)

For arm64:

Acked-by: Will Deacon 

Will

Re: "ftrace: Rework event_create_dir()" triggers boot error messages

2020-01-06 Thread Steven Rostedt

On Mon, 6 Jan 2020 12:05:58 -0500
Qian Cai  wrote:

> > diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
> > index 53935259f701..abb70c71fe60 100644
> > --- a/kernel/trace/trace_syscalls.c
> > +++ b/kernel/trace/trace_syscalls.c
> > @@ -269,7 +269,8 @@ static int __init syscall_enter_define_fields(struct 
> > trace_event_call *call)
> > struct syscall_trace_enter trace;
> > struct syscall_metadata *meta = call->data;
> > int offset = offsetof(typeof(trace), args);
> > -   int ret, i;
> > +   int ret = 0;
> > +   int i;
> > 
> > for (i = 0; i < meta->nb_args; i++) {
> > ret = trace_define_field(call, meta->types[i],  
> 
> Steve, those errors are still there in today’s linux-next. Is this patch on 
> the way to the linux-next?

No, because this bug is not in my tree.

I'll send a proper patch to the tip folks.

-- Steve

Re: [PATCH 05/10] powerpc/83xx: use resource_size

2020-01-06 Thread Scott Wood

On Wed, 2020-01-01 at 18:49 +0100, Julia Lawall wrote:
> Use resource_size rather than a verbose computation on
> the end and start fields.
> 
> The semantic patch that makes this change is as follows:
> (http://coccinelle.lip6.fr/)
> 
> 
> @@ struct resource ptr; @@
> - (ptr.end - ptr.start + 1)
> + resource_size(&ptr)
> 
> 
> Signed-off-by: Julia Lawall 
> 
> ---
>  arch/powerpc/platforms/83xx/km83xx.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Acked-by: Scott Wood 

-Scott

Re: [PATCH v3 2/2] powerpc/mpc85xx: also write addr_h to spin table for 64bit boot entry

2020-01-06 Thread Scott Wood

On Mon, 2020-01-06 at 12:29 +0800, yingjie_...@126.com wrote:
> From: Bai Yingjie 
> 
> CPU like P4080 has 36bit physical address, its DDR physical
> start address can be configured above 4G by LAW registers.
> 
> For such systems in which their physical memory start address was
> configured higher than 4G, we need also to write addr_h into the spin
> table of the target secondary CPU, so that addr_h and addr_l together
> represent a 64bit physical address.
> Otherwise the secondary core can not get correct entry to start from.
> 
> Signed-off-by: Bai Yingjie 
> ---
>  arch/powerpc/platforms/85xx/smp.c | 9 +
>  1 file changed, 9 insertions(+)

Acked-by: Scott Wood 

-Scott

Re: PPC64: G5 & 4k/64k page size (was: Re: Call for report - G5/PPC970 status)

2020-01-06 Thread Romain Dolbeau

Le lun. 6 janv. 2020 à 15:06, Aneesh Kumar K.V
 a écrit :
> Can you try this change.

Applied, recompiled with 64 KiB pages, still crashes.

The backtrace seems more readable this time (and wasn't overwritten by
something else), bad photo here:


Cordially,

-- 
Romain Dolbeau

Re: PPC64: G5 & 4k/64k page size (was: Re: Call for report - G5/PPC970 status)

2020-01-06 Thread Romain Dolbeau

Le dim. 5 janv. 2020 à 16:06, Bertrand Dekoninck
 a écrit :
> I can now test on powermac 7,3 (with an ATI card)
> How can I build a deb package of this kernel ? Or is there a package to 
> download somewhere ?

I usually cross-compile on x86-64 from upstream sources. On a Debian
Buster with the powerpc tools installed,
it's just:

#
make ARCH=powerpc CROSS_COMPILE=powerpc64-linux-gnu- oldconfig && nice
-19 make ARCH=powerpc CROSS_COMPILE=powerpc64-linux-gnu- -j56
bindeb-pkg
#

(alter the -j56 for your own build system). For the dependency, as far
as I remember I only needed "gcc-powerpc64-linux-gnu" and
dependencies. My '.config' is Debian's 5.3 plus default values for
changes - with the exception of 4 KiB pages.

I've also uploaded the working kernel with 4 KiB pages DEB here:
, as it might be easier for a quick test.

Cordially,

-- 
Romain Dolbeau

Re: [PATCH v3 02/22] compat: provide compat_ptr() on all architectures

2020-01-06 Thread H. Peter Anvin

On 2020-01-02 06:55, Arnd Bergmann wrote:
> In order to avoid needless #ifdef CONFIG_COMPAT checks,
> move the compat_ptr() definition to linux/compat.h
> where it can be seen by any file regardless of the
> architecture.
> 
> Only s390 needs a special definition, this can use the
> self-#define trick we have elsewhere.
> 
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/arm64/include/asm/compat.h   | 17 -
>  arch/mips/include/asm/compat.h| 18 --
>  arch/parisc/include/asm/compat.h  | 17 -
>  arch/powerpc/include/asm/compat.h | 17 -
>  arch/powerpc/oprofile/backtrace.c |  2 +-
>  arch/s390/include/asm/compat.h|  6 +-
>  arch/sparc/include/asm/compat.h   | 17 -
>  arch/x86/include/asm/compat.h | 17 -
>  include/linux/compat.h| 18 ++
>  9 files changed, 20 insertions(+), 109 deletions(-)
> 

For x86:

Reviewed-by: H. Peter Anvin 

It still suffers from the zero-one-infinity rule failure of the compat
architecture as a whole, but that is a very different problem. In this case
"compat" is obviously meaning "a 32-on-64 ABI" and simply centralizes a common
API, which is a Good Thing[TM].

-hpa

Re: PPC64: G5 & 4k/64k page size (was: Re: Call for report - G5/PPC970 status)

2020-01-06 Thread Lennart Sorensen

On Mon, Jan 06, 2020 at 07:18:30PM +0100, Romain Dolbeau wrote:
> Applied, recompiled with 64 KiB pages, still crashes.
> 
> The backtrace seems more readable this time (and wasn't overwritten by
> something else), bad photo here:
> 

Is it possible this has to do with nouveau and not supporting 64K page
size on older nvidia chips?  My reading of the driver is that only
NV50 and above has implemented support for anything other than 4K pages,
so a geforce 6xxx series that I believe some of the G5 machines had would
be a problem with 64K pages, while those with ATI cards would probably
not have a problem.

Maybe I read the driver changes wrong, but it sure looks like only
NV50/G84 and up got the needed fixes a couple of years ago.

-- 
Len Sorensen

Re: PPC64: G5 & 4k/64k page size (was: Re: Call for report - G5/PPC970 status)

2020-01-06 Thread Romain Dolbeau

Le lun. 6 janv. 2020 à 19:54, Lennart Sorensen
 a écrit :
> Is it possible this has to do with nouveau and not supporting 64K page
> size on older nvidia chips?

Interesting idea (and I have a 6600 aka NV43 in there, indeed) but I
don't think so, as
a) 'nouveau' works in 4.19 with 64 KiB pages
b) using "module_blacklist=nouveau" doesn't help, I just tried
c) my original 'bisect' was probably using 'nouveau' when the kernel
was booting, so at least some 5.x w/o the offending commit and 64 KiB
pages is fine
d) to my untrained eye, the crash happens _before_ nouveau is loaded
(it seems to me I'm still on the OpenFirmware framebuffer, font change
occurs later).

Unfortunately I don't have a PCIe OpenFirmware ATI card to test the
theory further.
(... well I _do_ have a Sun XVR-300 ... technically it fits the bill ... )

Cordially,

-- 
Romain Dolbeau

Re: PPC64: G5 & 4k/64k page size (was: Re: Call for report - G5/PPC970 status)

2020-01-06 Thread Lennart Sorensen

On Mon, Jan 06, 2020 at 08:11:47PM +0100, Romain Dolbeau wrote:
> Interesting idea (and I have a 6600 aka NV43 in there, indeed) but I
> don't think so, as
> a) 'nouveau' works in 4.19 with 64 KiB pages
> b) using "module_blacklist=nouveau" doesn't help, I just tried
> c) my original 'bisect' was probably using 'nouveau' when the kernel
> was booting, so at least some 5.x w/o the offending commit and 64 KiB
> pages is fine
> d) to my untrained eye, the crash happens _before_ nouveau is loaded
> (it seems to me I'm still on the OpenFirmware framebuffer, font change
> occurs later).
> 
> Unfortunately I don't have a PCIe OpenFirmware ATI card to test the
> theory further.
> (... well I _do_ have a Sun XVR-300 ... technically it fits the bill ... )

Oh well.  I guess that means they did fix it for all cards and I just
don't see which change was relevant for the older chips then.

Unless something was missed that only triggers occationally.  That would
be annoying.

-- 
Len Sorensen

Re: [PATCH v2] selftests/powerpc: Add a test of bad (out-of-range) accesses

2020-01-06 Thread Michael Ellerman

On Mon, 2019-05-20 at 10:20:51 UTC, Michael Ellerman wrote:
> Userspace isn't allowed to access certain address ranges, make sure we
> actually test that to at least some degree.
> 
> This would have caught the recent bug where the SLB fault handler was
> incorrectly called on an out-of-range access when using the Radix MMU.
> It also would have caught the bug we had in get_region_id() where we
> were inserting SLB entries for bad addresses.
> 
> Signed-off-by: Michael Ellerman 
> Signed-off-by: Nicholas Piggin 

Applied to powerpc next.

https://git.kernel.org/powerpc/c/5eb7cfb3a2b178f3d443301cda0825bb9f475657

cheers

Re: [PATCH v2 1/3] powernv/iov: Ensure the pdn for VFs always contains a valid PE number

2020-01-06 Thread Michael Ellerman

On Mon, 2019-10-28 at 08:54:22 UTC, Oliver O'Halloran wrote:
> On pseries there is a bug with adding hotplugged devices to an IOMMU group.
> For a number of dumb reasons fixing that bug first requires re-working how
> VFs are configured on PowerNV. For background, on PowerNV we use the
> pcibios_sriov_enable() hook to do two things:
> 
> 1. Create a pci_dn structure for each of the VFs, and
> 2. Configure the PHB's internal BARs so the MMIO range for each VF
>maps to a unique PE.
> 
> Roughly speaking a PE is the hardware counterpart to a Linux IOMMU group
> since all the devices in a PE share the same IOMMU table. A PE also defines
> the set of devices that should be isolated in response to a PCI error (i.e.
> bad DMA, UR/CA, AER events, etc). When isolated all MMIO and DMA traffic to
> and from devicein the PE is blocked by the root complex until the PE is
> recovered by the OS.
> 
> The requirement to block MMIO causes a giant headache because the P8 PHB
> generally uses a fixed mapping between MMIO addresses and PEs.  As a result
> we need to delay configuring the IOMMU groups for device until after MMIO
> resources are assigned. For physical devices (i.e. non-VFs) the PE
> assignment is done in pcibios_setup_bridge() which is called immediately
> after the MMIO resources for downstream devices (and the bridge's windows)
> are assigned. For VFs the setup is more complicated because:
> 
> a) pcibios_setup_bridge() is not called again when VFs are activated, and
> b) The pci_dev for VFs are created by generic code which runs after
>pcibios_sriov_enable() is called.
> 
> The work around for this is a two step process:
> 
> 1. A fixup in pcibios_add_device() is used to initialised the cached
>pe_number in pci_dn, then
> 2. A bus notifier then adds the device to the IOMMU group for the PE
>specified in pci_dn->pe_number.
> 
> A side effect fixing the pseries bug mentioned in the first paragraph is
> moving the fixup out of pcibios_add_device() and into
> pcibios_bus_add_device(), which is called much later. This results in step
> 2. failing because pci_dn->pe_number won't be initialised when the bus
> notifier is run.
> 
> We can fix this by removing the need for the fixup. The PE for a VF is
> known before the VF is even scanned so we can initialise pci_dn->pe_number
> pcibios_sriov_enable() instead. Unfortunately, moving the initialisation
> causes two problems:
> 
> 1. We trip the WARN_ON() in the current fixup code, and
> 2. The EEH core clears pdn->pe_number when recovering a VF and relies
>on the fixup to correctly re-set it.
> 
> The only justification for either of these is a comment in eeh_rmv_device()
> suggesting that pdn->pe_number *must* be set to IODA_INVALID_PE in order
> for the VF to be scanned. However, this comment appears to have no basis
> in reality. Both bugs can be fixed by just deleting the code.
> 
> Tested-by: Alexey Kardashevskiy 
> Reviewed-by: Alexey Kardashevskiy 
> Signed-off-by: Oliver O'Halloran 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/3b5b9997b331e77ce967eba2c4bc80dc3134a7fe

cheers

Re: [PATCH v2] powerpc/imc: Add documentation for IMC and trace-mode

2020-01-06 Thread Michael Ellerman

On Mon, 2019-10-28 at 10:08:16 UTC, Michael Ellerman wrote:
> From: Anju T Sudhakar 
> 
> Documentation for IMC (In-Memory Collection Counters) infrastructure
> and trace-mode of IMC.
> 
> Signed-off-by: Anju T Sudhakar 
> [mpe: Convert to rst, minor rewording, make PMI example more concise]
> Signed-off-by: Michael Ellerman 

Applied to powerpc next.

https://git.kernel.org/powerpc/c/1a3ec143a90a4674e01099c3ba47c3268536a462

cheers

Re: [PATCH] powerpc/papr_scm: Update debug message

2020-01-06 Thread Michael Ellerman

On Mon, 2019-12-02 at 06:38:55 UTC, "Aneesh Kumar K.V" wrote:
> Resource struct p->res is assigned later. Avoid using %pR before the resource
> struct is assigned.
> 
> Signed-off-by: Aneesh Kumar K.V 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/0eb59382dff23910e7104c397b617fb0fede538e

cheers

Re: [PATCH] powerpc/44x: Adjust indentation in ibm4xx_denali_fixup_memsize

2020-01-06 Thread Michael Ellerman

On Mon, 2019-12-09 at 20:03:38 UTC, Nathan Chancellor wrote:
> Clang warns:
> 
> ../arch/powerpc/boot/4xx.c:231:3: warning: misleading indentation;
> statement is not part of the previous 'else' [-Wmisleading-indentation]
> val = SDRAM0_READ(DDR0_42);
> ^
> ../arch/powerpc/boot/4xx.c:227:2: note: previous statement is here
> else
> ^
> 
> This is because there is a space at the beginning of this line; remove
> it so that the indentation is consistent according to the Linux kernel
> coding style and clang no longer warns.
> 
> Fixes: d23f5099297c ("[POWERPC] 4xx: Adds decoding of 440SPE memory size to 
> boot wrapper library")
> Link: https://github.com/ClangBuiltLinux/linux/issues/780
> Signed-off-by: Nathan Chancellor 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/c3aae14e5d468d18dbb5d7c0c8c7e2968cc14aad

cheers

Re: [PATCH] powerpc/64: Use {SAVE,REST}_NVGPRS macros

2020-01-06 Thread Michael Ellerman

On Wed, 2019-12-11 at 02:35:52 UTC, Jordan Niethe wrote:
> In entry_64.S there are places that open code saving and restoring the
> non-volatile registers. There are already macros for doing this so use
> them.
> 
> Signed-off-by: Jordan Niethe 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/5290ae2b8e5fecc465e2fe92350ff02aa2e5acae

cheers

Re: [PATCH kernel v2 1/4] Revert "powerpc/pseries/iommu: Don't use dma_iommu_ops on secure guests"

2020-01-06 Thread Michael Ellerman

On Mon, 2019-12-16 at 04:19:21 UTC, Alexey Kardashevskiy wrote:
> From: Ram Pai 
> 
> This reverts commit edea902c1c1efb855f77e041f9daf1abe7a9768a.
> 
> At the time the change allowed direct DMA ops for secure VMs; however
> since then we switched on using SWIOTLB backed with IOMMU (direct mapping)
> and to make this work, we need dma_iommu_ops which handles all cases
> including TCE mapping I/O pages in the presence of an IOMMU.
> 
> Fixes: edea902c1c1e ("powerpc/pseries/iommu: Don't use dma_iommu_ops on 
> secure guests")
> Signed-off-by: Ram Pai 
> [aik: added "revert" and "fixes:"]
> Signed-off-by: Alexey Kardashevskiy 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/d862b44133b7a1d7de25288e09eabf4df415e971

cheers

Re: [PATCH] powerpc/512x: Use dma_request_chan() instead dma_request_slave_channel()

2020-01-06 Thread Michael Ellerman

On Tue, 2019-12-17 at 07:37:30 UTC, Peter Ujfalusi wrote:
> dma_request_slave_channel() is a wrapper on top of dma_request_chan()
> eating up the error code.
> 
> By using dma_request_chan() directly the driver can support deferred
> probing against DMA.
> 
> Signed-off-by: Peter Ujfalusi 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/fb185a4052b18c97ebc98f6a8db30a60abca35e0

cheers

Re: [PATCH] powerpc/pseries: Remove redundant select of PPC_DOORBELL

2020-01-06 Thread Michael Ellerman

On Thu, 2019-12-19 at 12:58:40 UTC, Michael Ellerman wrote:
> Commit d4e58e5928f8 ("powerpc/powernv: Enable POWER8 doorbell IPIs")
> added a select of PPC_DOORBELL to PPC_PSERIES, but it already had a
> select of PPC_DOORBELL. One is enough.
> 
> Reported-by: Jason A. Donenfeld 
> Signed-off-by: Michael Ellerman 

Applied to powerpc next.

https://git.kernel.org/powerpc/c/4a8e274e2d8cc5628d3027be0900e8835a2dfa7b

cheers

Re: [PATCH] powerpc/85xx: Get twr_p102x to compile again

2020-01-06 Thread Michael Ellerman

On Thu, 2019-12-19 at 15:16:02 UTC, Sebastian Andrzej Siewior wrote:
> With CONFIG_QUICC_ENGINE enabled and CONFIG_UCC_GETH + CONFIG_SERIAL_QE
> disabled we have an unused variable (np). The code won't compile with
> -Werror.
> 
> Move the np variable to the block where it is actually used.
> 
> Signed-off-by: Sebastian Andrzej Siewior 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/3a9d970f17e05a7b26f782beb8f7f2118d1741ea

cheers

Re: PPC64: G5 & 4k/64k page size (was: Re: Call for report - G5/PPC970 status)

2020-01-06 Thread Michael Ellerman

"Aneesh Kumar K.V"  writes:
> Romain Dolbeau  writes:
>
>> Le sam. 21 déc. 2019 à 05:31, Aneesh Kumar K.V
>>  a écrit :
>>> I don't have direct access to this system, I have asked if we can get a run
>>> with 64K.
>>
>> OK, thanks! Do you know which model it is? It seems to be working on
>> some systems,
>> but we don't have enough samples to figure out why at this time, I think.
>>
>>> Meanwhile is there a way to find out what caused MachineCheck? more
>>> details on this? I was checking the manual and I don't see any
>>> restrictions w.r.t effective address. We now have very high EA with 64K
>>> page size.
>>
>> Sorry, no idea, completely out of my depth here. I can try some kernel
>> (build, runtime) options and/or patch, but someone will have to tell
>> me what to try,
>> as I have no ideas.
>
>
> Can you try this change.
>
> modified   arch/powerpc/include/asm/book3s/64/mmu-hash.h
> @@ -580,7 +580,7 @@ extern void slb_set_size(u16 size);
>  #if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
>  #define MAX_KERNEL_CTX_CNT   (1UL << (MAX_PHYSMEM_BITS - 
> MAX_EA_BITS_PER_CONTEXT))
>  #else
> -#define MAX_KERNEL_CTX_CNT   1
> +#define MAX_KERNEL_CTX_CNT   4
>  #endif

Didn't help.

Same crash, here's a previous one OCR'ed from a photo:

Oops: Machine check, sig: 7 [#1]
BE PAGE SIZE-64 MMU-Hash SMP NR_CPUS 4 NUMA PowerMac
Modules linked in:
CPU: PID: 1 Comn: init Tainted: G M 5.5.0-rc4-gcc-8.2.0-00919-g443b9413a05e 
#1465
NIP: c026f528 LR: c0296138 CTR: 
REGS: ce000ffa3d70 TRAP: 0200 Tainted: G M 
(5.5.0-rc-gcc-8.2.0-00919-g443b9413a05e)
MSR: 90109032  CR: 24282048 XER: 
DAR: c00c00612c80 DSISR: 0400 IRQ MASK: 8
GPR00: c02970d0 c001bc343a90 c1399600 c001bc01c800
GPR04: ce01bc39 ce01bc3439d4 c001bc343a9c c001bb4b73b8
GPR08: c001bc32 00612c78 c1442a98 0fe0
GPR12: 7f7f7f7f7f7f7f7f c16a  f7df38c8
GPR16: 0002  ffb14bac f7df5690
GPRZ0: f7df26c4 000d 8000 f7ddfc0c
GPR24: ff9c 0010 0002 c001bc343db8
GPR28: c0296138 c00c00612c78 c001bc39 c001bc01c800
NIP [c000626f528] .kmem_cache_free+8x58/0x140
LR [c0296138] .putname.0x08/0xa
Call Trace:
c001bc343b40 [c0296138] .putname+0x88/0xa0
c001bc343bc0 [c02970d0] .filename_lookup.part.76+0xb0/0x160
ce01bc343d40 [c08000279b20] .do_faccessat+0xe0/0x380
c001bc343e20 [c000a40c] systen_call+0x5c/0x68
Instruction dump:
408201e8 2fa3 419e0080 fb8100c0 fb4100080 fb610088 789d8502 3d22000b
39499498 1d3d0038 ebaa 7fbd4a14  712a0001 40820240 3422001e


cheers

Re: [PATCH v11 00/25] mm/gup: track dma-pinned pages: FOLL_PIN

2020-01-06 Thread John Hubbard


On 1/6/20 1:01 AM, Jan Kara wrote:
...

Also, looking ahead:

a) if the problem disappears with the latest above test, then we likely have
a huge page refcount overflow, and there are a couple of different ways to
fix it.

b) if it still reproduces with the above, then it's some other random mistake,
and in that case I'd be inclined to do a sort of guided (or classic, 
unguided)
git bisect of the series. Because it could be any of several patches.

If that's too much trouble, then I'd have to fall back to submitting a few
patches at a time and working my way up to the tracking patch...


It could also be that an ordinary page reference is dropped with 'unpin'
thus underflowing the page refcount...

Honza



Yes.

And, I think I'm about out of time for this release cycle, so I'm probably 
going to
submit the prerequisite patches (patches 1-10, or more boldly, 1-22), for 
candidates
for 5.6.


thanks,
--
John Hubbard
NVIDIA

Re: [PATCH v4 2/2] KVM: PPC: Implement H_SVM_INIT_ABORT hcall

2020-01-06 Thread Sukadev Bhattiprolu

Ram Pai [linux...@us.ibm.com] wrote:
>
> One small comment.. H_STATE is a better return code than H_UNSUPPORTED.
> 

Here is the updated patch - we now return H_STATE if the abort call is
made after the VM has gone secure.
---
>From 73fe1fa5aff2829f2fae6a339169e56dc0bbae06 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu 
Date: Fri, 27 Sep 2019 14:30:36 -0500
Subject: [PATCH 2/2] KVM: PPC: Implement H_SVM_INIT_ABORT hcall

Implement the H_SVM_INIT_ABORT hcall which the Ultravisor can use to
abort an SVM after it has issued the H_SVM_INIT_START and before the
H_SVM_INIT_DONE hcalls. This hcall could be used when Ultravisor
encounters security violations or other errors when starting an SVM.

Note that this hcall is different from UV_SVM_TERMINATE ucall which
is used by HV to terminate/cleanup an VM that has becore secure.

The H_SVM_INIT_ABORT should basically undo operations that were done
since the H_SVM_INIT_START hcall - i.e page-out all the VM pages back
to normal memory, and terminate the SVM.

(If we do not bring the pages back to normal memory, the text/data
of the VM would be stuck in secure memory and since the SVM did not
go secure, its MSR_S bit will be clear and the VM wont be able to
access its pages even to do a clean exit).

Based on patches and discussion with Paul Mackerras, Ram Pai and
Bharata Rao.

Signed-off-by: Ram Pai 
Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Bharata B Rao 
---
Changelog[v4]:
- [Bharata Rao] Add missing rcu locking
- [Paul Mackerras] simplify code that walks memslots
- Add a check to ensure that H_SVM_INIT_ABORT is called before
  H_SVM_INIT_DONE hcall (i.e the SVM is not already secure).
- [Ram Pai] Return H_STATE if hcall is called after *INIT_DONE.

Changelog[v3]:
- Rather than pass the NIP/MSR as parameters, load them into
  SRR0/SRR1 (like we do with other registers) and terminate
  the VM after paging out pages
- Move the code to add a skip_page_out parameter into a
  separate patch.

Changelog[v2]:
[Paul Mackerras] avoid returning to UV "one last time" after
the state is cleaned up.  So, we now have H_SVM_INIT_ABORT:
- take the VM's NIP/MSR register states as parameters
- inherit the state of other registers as at UV_ESM call.
After cleaning up the partial state, HV uses these to return
directly to the VM with a failed UV_ESM call.
---
 Documentation/powerpc/ultravisor.rst| 60 +
 arch/powerpc/include/asm/hvcall.h   |  1 +
 arch/powerpc/include/asm/kvm_book3s_uvmem.h |  6 +++
 arch/powerpc/include/asm/kvm_host.h |  1 +
 arch/powerpc/kvm/book3s_hv.c|  3 ++
 arch/powerpc/kvm/book3s_hv_uvmem.c  | 28 ++
 6 files changed, 99 insertions(+)

diff --git a/Documentation/powerpc/ultravisor.rst 
b/Documentation/powerpc/ultravisor.rst
index 730854f73830..363736d7fd36 100644
--- a/Documentation/powerpc/ultravisor.rst
+++ b/Documentation/powerpc/ultravisor.rst
@@ -948,6 +948,66 @@ Use cases
 up its internal state for this virtual machine.
 
 
+H_SVM_INIT_ABORT
+
+
+Abort the process of securing an SVM.
+
+Syntax
+~~
+
+.. code-block:: c
+
+   uint64_t hypercall(const uint64_t H_SVM_INIT_ABORT)
+
+Return values
+~
+
+One of the following values:
+
+   * H_PARAMETER   on successfully cleaning up the state,
+   Hypervisor will return this value to the
+   **guest**, to indicate that the underlying
+   UV_ESM ultracall failed.
+
+   * H_STATE   if called after a VM has gone secure (i.e
+   H_SVM_INIT_DONE hypercall was successful).
+
+   * H_UNSUPPORTED if called from a wrong context (e.g. from a
+   normal VM).
+
+Description
+~~~
+
+Abort the process of securing a virtual machine. This call must
+be made after a prior call to ``H_SVM_INIT_START`` hypercall and
+before a call to ``H_SVM_INIT_DONE``.
+
+On entry into this hypercall the non-volatile GPRs and FPRs are
+expected to contain the values they had at the time the VM issued
+the UV_ESM ultracall. Further ``SRR0`` is expected to contain the
+address of the instruction after the ``UV_ESM`` ultracall and ``SRR1``
+the MSR value with which to return to the VM.
+
+This hypercall will cleanup any partial state that was established for
+the VM since the prior ``H_SVM_INIT_START`` hypercall, including paging
+out pages that were paged-into secure memory, and issue the
+``UV_SVM_TERMINATE`` ultracall to terminate the VM.
+
+After the partial state is cleaned up, control returns to the VM
+(**not Ultravisor**), at the address specified in ``SRR0`` with the
+MSR values set to the value in ``SRR1``.
+
+Use cases
+~
+
+I

Re: [PATCH v3 02/22] compat: provide compat_ptr() on all architectures

2020-01-06 Thread Michael Ellerman

Arnd Bergmann  writes:
> In order to avoid needless #ifdef CONFIG_COMPAT checks,
> move the compat_ptr() definition to linux/compat.h
> where it can be seen by any file regardless of the
> architecture.
>
> Only s390 needs a special definition, this can use the
> self-#define trick we have elsewhere.
>
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/arm64/include/asm/compat.h   | 17 -
>  arch/mips/include/asm/compat.h| 18 --
>  arch/parisc/include/asm/compat.h  | 17 -
>  arch/powerpc/include/asm/compat.h | 17 -
>  arch/powerpc/oprofile/backtrace.c |  2 +-

LGTM.

Acked-by: Michael Ellerman  (powerpc)

One minor comment:

> diff --git a/include/linux/compat.h b/include/linux/compat.h
> index 68f79d855c3d..11083d84eb23 100644
> --- a/include/linux/compat.h
> +++ b/include/linux/compat.h
> @@ -958,4 +958,22 @@ static inline bool in_compat_syscall(void) { return 
> false; }
>  
>  #endif /* CONFIG_COMPAT */
>  
> +/*
> + * A pointer passed in from user mode. This should not
> + * be used for syscall parameters, just declare them
> + * as pointers because the syscall entry code will have
> + * appropriately converted them already.
> + */
> +#ifndef compat_ptr
> +static inline void __user *compat_ptr(compat_uptr_t uptr)
> +{
> + return (void __user *)(unsigned long)uptr;
> +}
> +#endif
> +
> +static inline compat_uptr_t ptr_to_compat(void __user *uptr)
> +{
> + return (u32)(unsigned long)uptr;
> +}

Is there a reason we cast to u32 directly instead of using compat_uptr_t?

cheers

powerpc/xmon: don't access ASDR in VMs

2020-01-06 Thread Sukadev Bhattiprolu

>From 91a77dbea3c909ff15c66cded37f1334304a293d Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu 
Date: Mon, 6 Jan 2020 13:50:02 -0600
Subject: [PATCH 1/1] powerpc/xmon: don't access ASDR in VMs

ASDR is HV-privileged and must only be accessed in HV-mode.
Fixes a Program Check (0x700) when xmon in a VM dumps SPRs.

Signed-off-by: Sukadev Bhattiprolu 
---
 arch/powerpc/xmon/xmon.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 02fae453c2ec..b8d179b5cf4f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1949,15 +1949,14 @@ static void dump_300_sprs(void)
 
printf("pidr   = %.16lx  tidr  = %.16lx\n",
mfspr(SPRN_PID), mfspr(SPRN_TIDR));
-   printf("asdr   = %.16lx  psscr = %.16lx\n",
-   mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR)
-   : mfspr(SPRN_PSSCR_PR));
+   printf("psscr  = %.16lx\n",
+   hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR));
 
if (!hv)
return;
 
-   printf("ptcr   = %.16lx\n",
-   mfspr(SPRN_PTCR));
+   printf("ptcr   = %.16lx  asdr  = %.16lx\n",
+   mfspr(SPRN_PTCR), mfspr(SPRN_ASDR));
 #endif
 }
 
-- 
2.17.2

[Bug 206049] alg: skcipher: p8_aes_xts encryption unexpectedly succeeded on test vector "random: len=0 klen=64"; expected_error=-22, cfg="random: inplace may_sleep use_finup src_divs=[66.99%@+1

2020-01-06 Thread bugzilla-daemon

https://bugzilla.kernel.org/show_bug.cgi?id=206049

Daniel Axtens (d...@axtens.net) changed:

   What|Removed |Added

 CC||d...@axtens.net

--- Comment #2 from Daniel Axtens (d...@axtens.net) ---
Hi Erhard,

I'm having a look. Does this reproduce reliably/often? Or was it a one-off?

Regards,
Daniel

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

Re: powerpc/xmon: don't access ASDR in VMs

2020-01-06 Thread Andrew Donnellan


On 7/1/20 1:16 pm, Sukadev Bhattiprolu wrote:

 From 91a77dbea3c909ff15c66cded37f1334304a293d Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu 
Date: Mon, 6 Jan 2020 13:50:02 -0600
Subject: [PATCH 1/1] powerpc/xmon: don't access ASDR in VMs

ASDR is HV-privileged and must only be accessed in HV-mode.
Fixes a Program Check (0x700) when xmon in a VM dumps SPRs.

Signed-off-by: Sukadev Bhattiprolu 


I think this should be:

 Fixes: d1e1b351f50f9 ("powerpc/xmon: Add ISA v3.0 SPRs to SPR dump")
 Cc: sta...@vger.kernel.org

Apart from that

Reviewed-by: Andrew Donnellan 

--
Andrew Donnellan  OzLabs, ADL Canberra
a...@linux.ibm.com IBM Australia Limited

Re: [PATCH 05/18] powerpc sstep: Prepare to support prefixed instructions

2020-01-06 Thread Jordan Niethe

On Fri, Dec 20, 2019 at 4:17 PM Jordan Niethe  wrote:
>
> On Thu, Dec 19, 2019 at 1:15 AM Daniel Axtens  wrote:
> >
> > Jordan Niethe  writes:
> >
> > > Currently all instructions are a single word long. A future ISA version
> > > will include prefixed instructions which have a double word length. The
> > > functions used for analysing and emulating instructions need to be
> > > modified so that they can handle these new instruction types.
> > >
> > > A prefixed instruction is a word prefix followed by a word suffix. All
> > > prefixes uniquely have the primary op-code 1. Suffixes may be valid word
> > > instructions or instructions that only exist as suffixes.
> > >
> > > In handling prefixed instructions it will be convenient to treat the
> > > suffix and prefix as separate words. To facilitate this modify
> > > analyse_instr() and emulate_step() to take a take a suffix as a
> > > parameter. For word instructions it does not matter what is passed in
> > > here - it will be ignored.
> > >
> > > We also define a new flag, PREFIXED, to be used in instruction_op:type.
> > > This flag will indicate when emulating an analysed instruction if the
> > > NIP should be advanced by word length or double word length.
> > >
> > > The callers of analyse_instr() and emulate_step() will need their own
> > > changes to be able to support prefixed instructions. For now modify them
> > > to pass in 0 as a suffix.
> > >
> > > Note that at this point no prefixed instructions are emulated or
> > > analysed - this is just making it possible to do so.
> > >
> > > Signed-off-by: Jordan Niethe 
> > > ---
> > >  arch/powerpc/include/asm/ppc-opcode.h |  3 +++
> > >  arch/powerpc/include/asm/sstep.h  |  8 +--
> > >  arch/powerpc/include/asm/uaccess.h| 30 +++
> > >  arch/powerpc/kernel/align.c   |  2 +-
> > >  arch/powerpc/kernel/hw_breakpoint.c   |  4 ++--
> > >  arch/powerpc/kernel/kprobes.c |  2 +-
> > >  arch/powerpc/kernel/mce_power.c   |  2 +-
> > >  arch/powerpc/kernel/optprobes.c   |  2 +-
> > >  arch/powerpc/kernel/uprobes.c |  2 +-
> > >  arch/powerpc/kvm/emulate_loadstore.c  |  2 +-
> > >  arch/powerpc/lib/sstep.c  | 12 ++-
> > >  arch/powerpc/lib/test_emulate_step.c  | 30 +--
> > >  arch/powerpc/xmon/xmon.c  |  4 ++--
> > >  13 files changed, 71 insertions(+), 32 deletions(-)
> > >
> > > diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
> > > b/arch/powerpc/include/asm/ppc-opcode.h
> > > index c1df75edde44..a1dfa4bdd22f 100644
> > > --- a/arch/powerpc/include/asm/ppc-opcode.h
> > > +++ b/arch/powerpc/include/asm/ppc-opcode.h
> > > @@ -377,6 +377,9 @@
> > >  #define PPC_INST_VCMPEQUD0x10c7
> > >  #define PPC_INST_VCMPEQUB0x1006
> > >
> > > +/* macro to check if a word is a prefix */
> > > +#define IS_PREFIX(x) (((x) >> 26) == 1)
> > > +
> > >  /* macros to insert fields into opcodes */
> > >  #define ___PPC_RA(a) (((a) & 0x1f) << 16)
> > >  #define ___PPC_RB(b) (((b) & 0x1f) << 11)
> > > diff --git a/arch/powerpc/include/asm/sstep.h 
> > > b/arch/powerpc/include/asm/sstep.h
> > > index 769f055509c9..6d4cb602e231 100644
> > > --- a/arch/powerpc/include/asm/sstep.h
> > > +++ b/arch/powerpc/include/asm/sstep.h
> > > @@ -89,6 +89,9 @@ enum instruction_type {
> > >  #define VSX_LDLEFT   4   /* load VSX register from left */
> > >  #define VSX_CHECK_VEC8   /* check MSR_VEC not MSR_VSX for 
> > > reg >= 32 */
> > >
> > > +/* Prefixed flag, ORed in with type */
> > > +#define PREFIXED 0x800
> > > +
> > >  /* Size field in type word */
> > >  #define SIZE(n)  ((n) << 12)
> > >  #define GETSIZE(w)   ((w) >> 12)
> > > @@ -132,7 +135,7 @@ union vsx_reg {
> > >   * otherwise.
> > >   */
> > >  extern int analyse_instr(struct instruction_op *op, const struct pt_regs 
> > > *regs,
> > > -  unsigned int instr);
> > > +  unsigned int instr, unsigned int sufx);
> > >
> >
> > I'm not saying this is necessarily better, but did you consider:
> >
> >  - making instr 64 bits and using masking and shifting macros to get the
> >prefix and suffix?
> >
> >  - defining an instruction type/struct/union/whatever that contains both
> >halves in one object?
> >
> > I'm happy to be told that it ends up being way, way uglier/worse/etc,
> > but I just thought I'd ask.
> >
> > Regards,
> > Daniel
>
> It is a good question and something I thought and am not completely confident
> that this approach is the best. Basically what I ended up thinking was that
> the prefixed instructions were a bit of a special case, and by doing
> it like this
> the normal word instructions would just carry on the same as before.
>
> I can see this is a pretty flimsy reason, so I am happy for suggestions as
> to what would end up being clearer.
>
>

Sorry I was pretty vague here. Some more thoughts:
The current representation of an instruction is a

Re: [PATCH] powerpc: add support for folded p4d page tables

2020-01-06 Thread Michael Ellerman

Mike Rapoport  writes:
> On Mon, Jan 06, 2020 at 02:31:41PM +1100, Michael Ellerman wrote:
>> Mike Rapoport  writes:
>> > Any updates on this?
>> 
>> It's very ... big, and kind of intrusive.
>
> I've tried to split it to several smaller ones, but I couldn't find a way
> to do it without breaking bisectability.

Yeah I didn't necessarily mean splitting it, it's just a lot of churn.

It seems to break qemu mac99 booting pmac32 defconfig, haven't had time
to look any further:

  Loading compiled-in X.509 certificates
  rtc-generic rtc-generic: setting system clock to 2020-01-07T02:42:59 UTC 
(1578364979)
  BUG: Unable to handle kernel unaligned access at 0xf10af004
  Faulting instruction address: 0xc01cc6c4
  Vector: 600 (Alignment) at [ef0b5de0]
  pc: c01cc6c4: f_dupfd+0x6c/0xb8
  lr: c01cc698: f_dupfd+0x40/0xb8
  sp: ef0b5e98
 msr: 9032
 dar: f10af004
   dsisr: 4140
current = 0xef0b
  pid   = 1, comm = swapper
  Linux version 5.5.0-rc2+ (michael@alpine1-p1) (gcc version 9.2.1 20191127 
(Ubuntu 9.2.1-20ubuntu3)) #8 Tue Jan 7 13:38:04 AEDT 2020
  enter ? for help
  [ef0b5eb8] c000550c console_on_rootfs+0x44/0x90
  [ef0b5ed8] c090d7c0 kernel_init_freeable+0x1a4/0x24c
  [ef0b5f18] c0005770 kernel_init+0x18/0x108
  [ef0b5f38] c0017274 ret_from_kernel_thread+0x14/0x1c
  FAIL! Booting BE pmac32


cheers

Re: [PATCH 2/2] powerpc/pseries/svm: Disable BHRB/EBB/PMU access

2020-01-06 Thread maddy





On 12/27/19 10:59 AM, Sukadev Bhattiprolu wrote:

Sukadev Bhattiprolu [suka...@linux.ibm.com] wrote:

Ultravisor disables some CPU features like BHRB, EBB and PMU in
secure virtual machines (SVMs). Skip accessing those registers
in SVMs to avoid getting a Program Interrupt.

Here is an updated patch that explicitly includes  in
in some files to fix build errors reported by .
---

From: Sukadev Bhattiprolu 
Date: Thu, 16 May 2019 20:57:12 -0500
Subject: [PATCH 2/2] powerpc/pseries/svm: Disable BHRB/EBB/PMU access

Ultravisor disables some CPU features like BHRB, EBB and PMU in
secure virtual machines (SVMs). Skip accessing those registers
in SVMs to avoid getting a Program Interrupt.

Signed-off-by: Sukadev Bhattiprolu 
---
Changelog[v2]
- [Michael Ellerman] Optimize the code using FW_FEATURE_SVM
- Merged EBB/BHRB and PMU patches into one and reorganized code.
- Fix some build errors reported by 
---
  arch/powerpc/kernel/cpu_setup_power.S   | 21 
  arch/powerpc/kernel/process.c   | 23 ++---
  arch/powerpc/kvm/book3s_hv.c| 33 -
  arch/powerpc/kvm/book3s_hv_rmhandlers.S | 32 +++-
  arch/powerpc/kvm/book3s_hv_tm_builtin.c | 21 ++--
  arch/powerpc/perf/core-book3s.c |  6 +
  arch/powerpc/xmon/xmon.c| 30 +-
  7 files changed, 114 insertions(+), 52 deletions(-)

diff --git a/arch/powerpc/kernel/cpu_setup_power.S 
b/arch/powerpc/kernel/cpu_setup_power.S
index a460298c7ddb..9e895d8db468 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -206,14 +206,35 @@ __init_PMU_HV_ISA207:
blr

  __init_PMU:
+#ifdef CONFIG_PPC_SVM
+   /*
+* SVM's are restricted from accessing PMU, so skip.
+*/
+   mfmsr   r5
+   rldicl  r5, r5, 64-MSR_S_LG, 62
+   cmpwi   r5,1
+   beq skip1


I know all MMCR* are loaded with 0. But
it is better if PEF code load the MMCR0
with freeze bits on. I will send a separate
patch to handle in the non-svm case.

Rest looks good.
Acked-by: Madhavan Srinivasan 


+#endif
li  r5,0
mtspr   SPRN_MMCRA,r5
mtspr   SPRN_MMCR0,r5
mtspr   SPRN_MMCR1,r5
mtspr   SPRN_MMCR2,r5
+skip1:
blr

  __init_PMU_ISA207:
+
+#ifdef CONFIG_PPC_SVM
+   /*
+* SVM's are restricted from accessing PMU, so skip.
+   */
+   mfmsr   r5
+   rldicl  r5, r5, 64-MSR_S_LG, 62
+   cmpwi   r5,1
+   beq skip2
+#endif
li  r5,0
mtspr   SPRN_MMCRS,r5
+skip2:
blr
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 639ceae7da9d..83c7c4119305 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -64,6 +64,7 @@
  #include 
  #include 
  #include 
+#include 

  #include 
  #include 
@@ -1059,9 +1060,11 @@ static inline void save_sprs(struct thread_struct *t)
t->dscr = mfspr(SPRN_DSCR);

if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
-   t->bescr = mfspr(SPRN_BESCR);
-   t->ebbhr = mfspr(SPRN_EBBHR);
-   t->ebbrr = mfspr(SPRN_EBBRR);
+   if (!is_secure_guest()) {
+   t->bescr = mfspr(SPRN_BESCR);
+   t->ebbhr = mfspr(SPRN_EBBHR);
+   t->ebbrr = mfspr(SPRN_EBBRR);
+   }

t->fscr = mfspr(SPRN_FSCR);

@@ -1097,12 +1100,14 @@ static inline void restore_sprs(struct thread_struct 
*old_thread,
}

if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
-   if (old_thread->bescr != new_thread->bescr)
-   mtspr(SPRN_BESCR, new_thread->bescr);
-   if (old_thread->ebbhr != new_thread->ebbhr)
-   mtspr(SPRN_EBBHR, new_thread->ebbhr);
-   if (old_thread->ebbrr != new_thread->ebbrr)
-   mtspr(SPRN_EBBRR, new_thread->ebbrr);
+   if (!is_secure_guest()) {
+   if (old_thread->bescr != new_thread->bescr)
+   mtspr(SPRN_BESCR, new_thread->bescr);
+   if (old_thread->ebbhr != new_thread->ebbhr)
+   mtspr(SPRN_EBBHR, new_thread->ebbhr);
+   if (old_thread->ebbrr != new_thread->ebbrr)
+   mtspr(SPRN_EBBRR, new_thread->ebbrr);
+   }

if (old_thread->fscr != new_thread->fscr)
mtspr(SPRN_FSCR, new_thread->fscr);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 709cf1fd4cf4..29a2640108d1 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -42,6 +42,7 @@
  #include 
  #include 
  #include 
+#include 

  #include 
  #include 
@@ -3568,9 +3569,11 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 
time_limit,
mtspr(SPRN_PS

Re: [mm/debug] 87c4696d57: kernel_BUG_at_include/linux/mm.h

2020-01-06 Thread Anshuman Khandual

On 12/27/2019 07:52 PM, kernel test robot wrote:
> [9.781974] kernel BUG at include/linux/mm.h:592!
> [9.782810] invalid opcode:  [#1] PTI
> [9.783443] CPU: 0 PID: 1 Comm: swapper Not tainted 
> 5.5.0-rc3-1-g87c4696d57b5e #1
> [9.784528] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> 1.10.2-1 04/01/2014
> [9.785756] EIP: __free_pages+0x14/0x40
> [9.786442] Code: 0c 9c 5e fa 89 d8 e8 5b f3 ff ff 56 9d 5b 5e 5d c3 8d 74 
> 26 00 90 8b 48 1c 55 89 e5 85 c9 75 16 ba b4 b6 84 d6 e8 ac 49 fe ff <0f> 0b 
> 8d b4 26 00 00 00 00 8d 76 00 ff 48 1c 75 10 85 d2 75 07 e8
> [9.789697] EAX: d68761f7 EBX: ea52f000 ECX: ea4f8520 EDX: d684b6b4
> [9.790850] ESI:  EDI: ef45e000 EBP: ea501f08 ESP: ea501f08
> [9.791879] DS: 007b ES: 007b FS:  GS:  SS: 0068 EFLAGS: 00010286
> [9.792783] CR0: 80050033 CR2:  CR3: 16d0 CR4: 000406b0
> [9.792783] Call Trace:
> [9.792783]  free_pages+0x3c/0x50
> [9.792783]  pgd_free+0x5a/0x170
> [9.792783]  __mmdrop+0x42/0xe0
> [9.792783]  debug_vm_pgtable+0x54f/0x567
> [9.792783]  kernel_init_freeable+0x90/0x1e3
> [9.792783]  ? rest_init+0xf0/0xf0
> [9.792783]  kernel_init+0x8/0xf0
> [9.792783]  ret_from_fork+0x19/0x24
> [9.792783] Modules linked in:
> [9.792803] ---[ end trace 91b7335adcf0b656 ]---
> 
> 
> To reproduce:
> 
> # build kernel
>   cd linux
>   cp config-5.5.0-rc3-1-g87c4696d57b5e .config
>   make HOSTCC=gcc-7 CC=gcc-7 ARCH=i386 olddefconfig prepare 
> modules_prepare bzImage
> 
> git clone https://github.com/intel/lkp-tests.git
> cd lkp-tests
> bin/lkp qemu -k  job-script # job-script is attached in this 
> email

Hello,

As the failure might be happening during boot when the test executes,
do we really need to run these LKP based QEMU environment in order to
reproduce the problem ? Could not this be recreated on a standalone
system.

- Anshuman

[PATCH] powerpc/32: warn and return error on syscalls from kernel

2020-01-06 Thread Christophe Leroy

Since commit b86fb88855ea ("powerpc/32: implement fast entry for
syscalls on non BOOKE") and commit 1a4b739bbb4f ("powerpc/32:
implement fast entry for syscalls on BOOKE"), syscalls from
kernel are unexpected and can have catastrophic consequences
as it will destroy the kernel stack.

Test MSR_PR on syscall entry. In case syscall is from kernel,
emit a warning and return ENOSYS error.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/entry_32.S   | 26 ++
 arch/powerpc/kernel/head_32.h|  9 ++---
 arch/powerpc/kernel/head_booke.h |  5 -
 3 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index d60908ea37fb..4a7cd22a8aaf 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -568,6 +568,32 @@ syscall_exit_work:
bl  do_syscall_trace_leave
b   ret_from_except_full
 
+   /*
+* System call was called from kernel. We get here with SRR1 in r9.
+* Mark the exception as recoverable once we have retrieved SRR0,
+* trap a warning and return ENOSYS with CR[SO] set.
+*/
+   .globl  ret_from_kernel_syscall
+ret_from_kernel_syscall:
+   mfspr   r11, SPRN_SRR0
+#if !defined(CONFIG_4xx) && !defined(CONFIG_BOOKE)
+   LOAD_REG_IMMEDIATE(r12, MSR_KERNEL & ~(MSR_IR|MSR_DR))
+   MTMSRD(r12)
+#endif
+
+0: trap
+   EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+
+   li  r3, ENOSYS
+   crset   so
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
+   mtspr   SPRN_NRI, r0
+#endif
+   mtspr   SPRN_SRR1, r9
+   mtspr   SPRN_SRR0, r11
+   SYNC
+   RFI
+
 /*
  * The fork/clone functions need to copy the full register set into
  * the child process. Therefore we need to save all the nonvolatile
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 8abc7783dbe5..07524be96ca7 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -62,16 +62,18 @@
 
 .macro SYSCALL_ENTRY trapno
mfspr   r12,SPRN_SPRG_THREAD
+   mfspr   r9, SPRN_SRR1
mfcrr10
+   andi.   r11, r9, MSR_PR
lwz r11,TASK_STACK-THREAD(r12)
-   mflrr9
+   beq-99f
addir11,r11,THREAD_SIZE - INT_FRAME_SIZE
rlwinm  r10,r10,0,4,2   /* Clear SO bit in CR */
tophys(r11,r11)
stw r10,_CCR(r11)   /* save registers */
+   mflrr10
+   stw r10, _LINK(r11)
mfspr   r10,SPRN_SRR0
-   stw r9,_LINK(r11)
-   mfspr   r9,SPRN_SRR1
stw r1,GPR1(r11)
stw r1,0(r11)
tovirt(r1,r11)  /* set new kernel sp */
@@ -139,6 +141,7 @@
mtspr   SPRN_SRR0,r11
SYNC
RFI /* jump to handler, enable MMU */
+99:b   ret_from_kernel_syscall
 .endm
 
 /*
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 2ae635df9026..f2d11cb8102f 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -104,16 +104,18 @@ FTR_SECTION_ELSE
 #ifdef CONFIG_KVM_BOOKE_HV
 ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 #endif
+   mfspr   r9, SPRN_SRR1
BOOKE_CLEAR_BTB(r11)
+   andi.   r11, r9, MSR_PR
lwz r11, TASK_STACK - THREAD(r10)
rlwinm  r12,r12,0,4,2   /* Clear SO bit in CR */
+   beq-99f
ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
stw r12, _CCR(r11)  /* save various registers */
mflrr12
stw r12,_LINK(r11)
mfspr   r12,SPRN_SRR0
stw r1, GPR1(r11)
-   mfspr   r9,SPRN_SRR1
stw r1, 0(r11)
mr  r1, r11
stw r12,_NIP(r11)
@@ -176,6 +178,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
mtspr   SPRN_SRR0,r11
SYNC
RFI /* jump to handler, enable MMU */
+99:b   ret_from_kernel_syscall
 .endm
 
 /* To handle the additional exception priority levels on 40x and Book-E
-- 
2.13.3

44 matches

Mail list logo