Re: linux-next: fix ups for clashes between akpm and powerpc trees

2020-06-04 Thread Stephen Rothwell
Hi all,

On Thu, 4 Jun 2020 16:52:46 +1000 Stephen Rothwell  
wrote:
>
> diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
> b/arch/powerpc/include/asm/nohash/32/pgtable.h
> index c188a6f64bcd..1927e1b653f2 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
> @@ -205,10 +205,6 @@ static inline void pmd_clear(pmd_t *pmdp)
>   *pmdp = __pmd(0);
>  }
>  
> -
> -/* to find an entry in a kernel page-table-directory */
> -#define pgd_offset_k(address) pgd_offset(&init_mm, address)
> -
>  /* to find an entry in a page-table-directory */
>  #define pgd_index(address)((address) >> PGDIR_SHIFT)
>  #define pgd_offset(mm, address)   ((mm)->pgd + pgd_index(address))
> @@ -241,7 +237,7 @@ static inline pte_basic_t pte_update(struct mm_struct 
> *mm, unsigned long addr, p
>   pte_basic_t old = pte_val(*p);
>   pte_basic_t new = (old & ~(pte_basic_t)clr) | set;
>   int num, i;
> - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr);
> + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, addr), 
> addr), addr), addr);
>  
>   if (!huge)
>   num = PAGE_SIZE / SZ_4K;
> @@ -341,6 +337,10 @@ static inline int pte_young(pte_t pte)
>   pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT))
>  #endif
>  
> +#define pte_offset_kernel(dir, addr) \
> + (pmd_bad(*(dir)) ? NULL : (pte_t *)pmd_page_vaddr(*(dir)) + \
> +   pte_index(addr))
> +
>  /*
>   * Encode and decode a swap entry.
>   * Note that the bits we use in a PTE for representing a swap entry

Sorry, that ended up:

diff --cc arch/powerpc/include/asm/nohash/32/pgtable.h
index 639f3b3713ec,eb8538c85077..1927e1b653f2
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@@ -204,13 -205,6 +205,9 @@@ static inline void pmd_clear(pmd_t *pmd
*pmdp = __pmd(0);
  }
  
- 
- /* to find an entry in a kernel page-table-directory */
- #define pgd_offset_k(address) pgd_offset(&init_mm, address)
- 
 +/* to find an entry in a page-table-directory */
 +#define pgd_index(address) ((address) >> PGDIR_SHIFT)
 +#define pgd_offset(mm, address)((mm)->pgd + pgd_index(address))
  
  /*
   * PTE updates. This function is called whenever an existing
@@@ -240,7 -234,7 +237,7 @@@ static inline pte_basic_t pte_update(st
pte_basic_t old = pte_val(*p);
pte_basic_t new = (old & ~(pte_basic_t)clr) | set;
int num, i;
--  pmd_t *pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr);
++  pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, addr), 
addr), addr), addr);
  
if (!huge)
num = PAGE_SIZE / SZ_4K;
@@@ -342,15 -334,6 +337,10 @@@ static inline int pte_young(pte_t pte
pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT))
  #endif
  
- /* Find an entry in the third-level page table.. */
- #define pte_index(address)\
-   (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 +#define pte_offset_kernel(dir, addr)  \
 +  (pmd_bad(*(dir)) ? NULL : (pte_t *)pmd_page_vaddr(*(dir)) + \
 +pte_index(addr))
- #define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr))
- static inline void pte_unmap(pte_t *pte) { }
 +
  /*
   * Encode and decode a swap entry.
   * Note that the bits we use in a PTE for representing a swap entry

-- 
Cheers,
Stephen Rothwell


pgpQWxAVfnL7c.pgp
Description: OpenPGP digital signature


[PATCH] mm: Fix pud_alloc_track()

2020-06-04 Thread Joerg Roedel
From: Joerg Roedel 

The pud_alloc_track() needs to do different checks based on whether
__ARCH_HAS_5LEVEL_HACK is defined, like it already does in
pud_alloc(). Otherwise it causes boot failures on PowerPC.

Provide the correct implementations for both possible settings of
__ARCH_HAS_5LEVEL_HACK to fix the boot problems.

Reported-by: Abdul Haleem 
Tested-by: Abdul Haleem 
Tested-by: Satheesh Rajendran 
Fixes: d8626138009b ("mm: add functions to track page directory modifications")
Signed-off-by: Joerg Roedel 
---
 include/asm-generic/5level-fixup.h |  5 +
 include/linux/mm.h | 26 +-
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/include/asm-generic/5level-fixup.h 
b/include/asm-generic/5level-fixup.h
index 58046ddc08d0..afbab31fbd7e 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -17,6 +17,11 @@
((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
NULL : pud_offset(p4d, address))
 
+#define pud_alloc_track(mm, p4d, address, mask)
\
+   ((unlikely(pgd_none(*(p4d))) && 
\
+ (__pud_alloc(mm, p4d, address) || 
({*(mask)|=PGTBL_P4D_MODIFIED;0;})))?   \
+ NULL : pud_offset(p4d, address))
+
 #define p4d_alloc(mm, pgd, address)(pgd)
 #define p4d_alloc_track(mm, pgd, address, mask)(pgd)
 #define p4d_offset(pgd, start) (pgd)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 66e0977f970a..ad3b31c5bcc3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
p4d_t *p4d,
NULL : pud_offset(p4d, address);
 }
 
-static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
+static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
 unsigned long address,
 pgtbl_mod_mask *mod_mask)
-
 {
-   if (unlikely(pgd_none(*pgd))) {
-   if (__p4d_alloc(mm, pgd, address))
+   if (unlikely(p4d_none(*p4d))) {
+   if (__pud_alloc(mm, p4d, address))
return NULL;
-   *mod_mask |= PGTBL_PGD_MODIFIED;
+   *mod_mask |= PGTBL_P4D_MODIFIED;
}
 
-   return p4d_offset(pgd, address);
+   return pud_offset(p4d, address);
 }
 
-#endif /* !__ARCH_HAS_5LEVEL_HACK */
-
-static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
+static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
 unsigned long address,
 pgtbl_mod_mask *mod_mask)
+
 {
-   if (unlikely(p4d_none(*p4d))) {
-   if (__pud_alloc(mm, p4d, address))
+   if (unlikely(pgd_none(*pgd))) {
+   if (__p4d_alloc(mm, pgd, address))
return NULL;
-   *mod_mask |= PGTBL_P4D_MODIFIED;
+   *mod_mask |= PGTBL_PGD_MODIFIED;
}
 
-   return pud_offset(p4d, address);
+   return p4d_offset(pgd, address);
 }
 
+#endif /* !__ARCH_HAS_5LEVEL_HACK */
+
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
 {
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
-- 
2.26.2



Re: [PATCH v4] ocxl: control via sysfs whether the FPGA is reloaded on a link reset

2020-06-04 Thread Andrew Donnellan

On 30/3/20 7:34 pm, Frederic Barrat wrote:

From: Philippe Bergheaud 

Some opencapi FPGA images allow to control if the FPGA should be reloaded
on the next adapter reset. If it is supported, the image specifies it
through a Vendor Specific DVSEC in the config space of function 0.

Signed-off-by: Philippe Bergheaud 
Signed-off-by: Frederic Barrat 


Thanks for the cleanups.

My earlier concerns have been addressed thanks to an update to the 
relevant specification - a Vendor Specific DVSEC with an IBM vendor ID 
and IBM-specific DVSEC ID is specific to the IBM CFG subsystem 
implementation, alternative implementations will need to use a different 
vendor IDs and DVSEC IDs.



---

Changelog:
v2:
   - refine ResetReload debug message
   - do not call get_function_0() if pci_dev is for function 0
v3:
   - avoid get_function_0() in ocxl_config_set_reset_reload also
v4:
   - simplify parsing of Vendor Specific DVSEC during AFU init
   - only set/unset bit 0 of the config space register
   - commonize code to fetch the right PCI function and DVSEC offset
   - use kstrtoint() when parsing the sysfs buffer


  Documentation/ABI/testing/sysfs-class-ocxl | 10 +++
  drivers/misc/ocxl/config.c | 81 --
  drivers/misc/ocxl/ocxl_internal.h  |  6 ++
  drivers/misc/ocxl/sysfs.c  | 35 ++
  include/misc/ocxl-config.h |  1 +
  5 files changed, 128 insertions(+), 5 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-class-ocxl 
b/Documentation/ABI/testing/sysfs-class-ocxl
index b5b1fa197592..b9ea671d5805 100644
--- a/Documentation/ABI/testing/sysfs-class-ocxl
+++ b/Documentation/ABI/testing/sysfs-class-ocxl
@@ -33,3 +33,13 @@ Date:January 2018
  Contact:  linuxppc-dev@lists.ozlabs.org
  Description:  read/write
Give access the global mmio area for the AFU
+
+What:  /sys/class/ocxl//reload_on_reset
+Date:  February 2020
+Contact:   linuxppc-dev@lists.ozlabs.org
+Description:   read/write
+   Control whether the FPGA is reloaded on a link reset
+   0   Do not reload FPGA image from flash
+   1   Reload FPGA image from flash
+   unavailable
+   The device does not support this capability


We should perhaps document here that this is specific to the IBM CFG 
implementation and the IBM-specific DVSEC?


--
Andrew Donnellan  OzLabs, ADL Canberra
a...@linux.ibm.com IBM Australia Limited


Re: linux-next: fix ups for clashes between akpm and powerpc trees

2020-06-04 Thread Stephen Rothwell
Hi all,

On Thu, 4 Jun 2020 16:52:46 +1000 Stephen Rothwell  
wrote:
>
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
> b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 25c3cb8272c0..a6799723cd98 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -1008,6 +1008,12 @@ extern struct page *p4d_page(p4d_t p4d);
>  #define pud_page_vaddr(pud)  __va(pud_val(pud) & ~PUD_MASKED_BITS)
>  #define p4d_page_vaddr(p4d)  __va(p4d_val(p4d) & ~P4D_MASKED_BITS)
>  
> +static inline unsigned long pgd_index(unsigned long address)
> +{
> + return (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1);
> +}
> +#define pgd_index pgd_index
> +
>  #define pte_ERROR(e) \
>   pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
>  #define pmd_ERROR(e) \

I have added that hunk to linux-next for tomorrow as a fix for
mm-consolidate-pgd_index-and-pgd_offset_k-definitions.

Its not strickly necessary, but Michael expressed a preference for the
inline function.  I was wondering if pgd_index "Must be a compile-time
constant" on one (or a few) architectures, then why not leave the
default as an inline function and special case it as a macro where
needed ...

-- 
Cheers,
Stephen Rothwell


pgpLFbPUOA6tM.pgp
Description: OpenPGP digital signature


Re: linux-next: fix ups for clashes between akpm and powerpc trees

2020-06-04 Thread Stephen Rothwell
Hi all,

On Thu, 4 Jun 2020 16:52:46 +1000 Stephen Rothwell  
wrote:
>
> diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c
> index db4ef44af22f..569d98a41881 100644
> --- a/arch/powerpc/mm/kasan/8xx.c
> +++ b/arch/powerpc/mm/kasan/8xx.c
> @@ -10,7 +10,7 @@
>  static int __init
>  kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void *block)
>  {
> - pmd_t *pmd = pmd_ptr_k(k_start);
> + pmd_t *pmd = pmd_off_k(k_start);
>   unsigned long k_cur, k_next;
>  
>   for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd += 2, block 
> += SZ_8M) {
> @@ -59,7 +59,7 @@ int __init kasan_init_region(void *start, size_t size)
>   return ret;
>  
>   for (; k_cur < k_end; k_cur += PAGE_SIZE) {
> - pmd_t *pmd = pmd_ptr_k(k_cur);
> + pmd_t *pmd = pmd_off_k(k_cur);
>   void *va = block + k_cur - k_start;
>   pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
>  
> diff --git a/arch/powerpc/mm/kasan/book3s_32.c 
> b/arch/powerpc/mm/kasan/book3s_32.c
> index 4bc491a4a1fd..a32b4640b9de 100644
> --- a/arch/powerpc/mm/kasan/book3s_32.c
> +++ b/arch/powerpc/mm/kasan/book3s_32.c
> @@ -46,7 +46,7 @@ int __init kasan_init_region(void *start, size_t size)
>   kasan_update_early_region(k_start, k_cur, __pte(0));
>  
>   for (; k_cur < k_end; k_cur += PAGE_SIZE) {
> - pmd_t *pmd = pmd_ptr_k(k_cur);
> + pmd_t *pmd = pmd_off_k(k_cur);
>   void *va = block + k_cur - k_start;
>   pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
>  
> diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
> index 286441bbbe49..92e8929cbe3e 100644
> --- a/arch/powerpc/mm/nohash/8xx.c
> +++ b/arch/powerpc/mm/nohash/8xx.c
> @@ -74,7 +74,7 @@ static pte_t __init *early_hugepd_alloc_kernel(hugepd_t 
> *pmdp, unsigned long va)
>  static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t 
> pa,
>pgprot_t prot, int psize, bool new)
>  {
> - pmd_t *pmdp = pmd_ptr_k(va);
> + pmd_t *pmdp = pmd_off_k(va);
>   pte_t *ptep;
>  
>   if (WARN_ON(psize != MMU_PAGE_512K && psize != MMU_PAGE_8M))
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index 45a0556089e8..1136257c3a99 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -264,7 +264,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
>  #if defined(CONFIG_PPC_8xx)
>  void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, 
> pte_t pte)
>  {
> - pmd_t *pmd = pmd_ptr(mm, addr);
> + pmd_t *pmd = pmd_off(mm, addr);
>   pte_basic_t val;
>   pte_basic_t *entry = &ptep->pte;
>   int num = is_hugepd(*((hugepd_t *)pmd)) ? 1 : SZ_512K / SZ_4K;
> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
> index e2d054c9575e..6eb4eab79385 100644
> --- a/arch/powerpc/mm/pgtable_32.c
> +++ b/arch/powerpc/mm/pgtable_32.c
> @@ -40,7 +40,7 @@ notrace void __init early_ioremap_init(void)
>  {
>   unsigned long addr = ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE);
>   pte_t *ptep = (pte_t *)early_fixmap_pagetable;
> - pmd_t *pmdp = pmd_ptr_k(addr);
> + pmd_t *pmdp = pmd_off_k(addr);
>  
>   for (; (s32)(FIXADDR_TOP - addr) > 0;
>addr += PGDIR_SIZE, ptep += PTRS_PER_PTE, pmdp++)

I have added the above hunks as to linux-next for tomorrow as a fix for
mm-pgtable-add-shortcuts-for-accessing-kernel-pmd-and-pte.

-- 
Cheers,
Stephen Rothwell


pgpDTew8KHcbm.pgp
Description: OpenPGP digital signature


Re: [PATCH 13/13] fs: move binfmt_misc sysctl to its own file

2020-06-04 Thread Xiaoming Ni

On 2020/5/29 15:41, Luis Chamberlain wrote:

This moves the binfmt_misc sysctl to its own file to help remove
clutter from kernel/sysctl.c.

Signed-off-by: Luis Chamberlain 
---
  fs/binfmt_misc.c | 1 +
  kernel/sysctl.c  | 7 ---
  2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index f69a043f562b..656b3f5f3bbf 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -821,6 +821,7 @@ static int __init init_misc_binfmt(void)
int err = register_filesystem(&bm_fs_type);
if (!err)
insert_binfmt(&misc_format);
+   register_sysctl_empty_subdir("fs", "binfmt_misc");
return err;
  }

build error when CONFIG_BINFMT_MISC=m

ERROR: modpost: "register_sysctl_empty_subdir" [fs/binfmt_misc.ko] 
undefined!


diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 27f0c9ea..4129dfb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2853,6 +2853,7 @@ void register_sysctl_empty_subdir(const char *base,
 {
register_sysctl_subdir(base, subdir, sysctl_mount_point);
 }
+EXPORT_SYMBOL_GPL(register_sysctl_empty_subdir);
 #endif /* CONFIG_SYSCTL */


Thanks
Xiaoming Ni




Re: [mainline][Oops][bisected 2ba3e6 ] 5.7.0 boot fails with kernel panic on powerpc

2020-06-04 Thread Naresh Kamboju
On Wed, 3 Jun 2020 at 19:03, Joerg Roedel  wrote:
>
> On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> > @Joerg, Could you please have a look?
>
> Can you please try the attached patch?

@Joerg, Linaro test farm noticed this kernel crash on nxp ls2088
Machine model: Freescale Layerscape 2088A RDB Board
while booting Linux mainline 5.7.0 version kernel.

After applying your proposed patch fixed boot problem.

Tested-by: Naresh Kamboju 

Test ref:
https://lavalab.nxp.com/scheduler/job/23787#L426

Here is the kernel crash log before patch applied,

[0.00] Linux version 5.7.0-03887-gf6aee505c71b
(TuxBuild@ecb9ef34f06f) (gcc version 9.3.0 (Debian 9.3.0-8), GNU ld
(GNU Binutils for Debian) 2.34) #1 SMP PREEMPT Wed Jun 3 18:21:26 UTC
2020
[0.00] Machine model: Freescale Layerscape 2088A RDB Board
<>
[0.00] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0
[0.00] Unable to handle kernel paging request at virtual
address fffe8000
[0.00] Mem abort info:
[0.00]   ESR = 0x9604
[0.00]   EC = 0x25: DABT (current EL), IL = 32 bits
[0.00]   SET = 0, FnV = 0
[0.00]   EA = 0, S1PTW = 0
[0.00] Data abort info:
[0.00]   ISV = 0, ISS = 0x0004
[0.00]   CM = 0, WnR = 0
[0.00] [fffe8000] address between user and kernel address ranges
[0.00] Internal error: Oops: 9604 [#1] PREEMPT SMP
[0.00] Modules linked in:
[0.00] CPU: 0 PID: 0 Comm: swapper/0 Not tainted
5.7.0-03887-gf6aee505c71b #1
[0.00] Hardware name: Freescale Layerscape 2088A RDB Board (DT)
[0.00] pstate: 8085 (Nzcv daIf -PAN -UAO BTYPE=--)
[0.00] pc : map_kernel_range_noflush+0xc0/0x280
[0.00] lr : __vmalloc_node_range+0x154/0x2a0
[0.00] sp : b3b1dcbc3e20
[0.00] x29: b3b1dcbc3e20 x28: fffe8000
[0.00] x27: 800010004000 x26: 80001000
[0.00] x25: 00402dc2 x24: b3b1dc53c000
[0.00] x23: 00680f13 x22: 0004
[0.00] x21: b3b1dc53cf48 x20: 
[0.00] x19: b3b1dc627800 x18: 00c0
[0.00] x17:  x16: 0007
[0.00] x15: dead0100 x14: fe020b990600
[0.00] x13: dead0122 x12: 0001
[0.00] x11:  x10: 0082fe3fdec0
[0.00] x9 : 0082fe342d58 x8 : 4cd121ba5000
[0.00] x7 : 80801000 x6 : 0004
[0.00] x5 : fffd x4 : 4000
[0.00] x3 : 80005000 x2 : 00018000
[0.00] x1 :  x0 : 800010003fff
[0.00] Call trace:
[0.00]  map_kernel_range_noflush+0xc0/0x280
[0.00]  __vmalloc_node_range+0x154/0x2a0
[0.00]  __vmalloc_node+0x5c/0x70
[0.00]  init_IRQ+0xac/0xf8
[0.00]  start_kernel+0x2d0/0x4dc
[0.00] Code: f90047e0 d503201f d2a80003 8b030343 (f9400380)
[0.00] random: get_random_bytes called from
print_oops_end_marker+0x2c/0x58 with crng_init=0
[0.00] ---[ end trace  ]---
[0.00] Kernel panic - not syncing: Attempted to kill the idle task!

ref:
https://lavalab.nxp.com/scheduler/job/23596#L603

-- 
Linaro LKFT
https://lkft.linaro.org


RE: [RESEND PATCH v9 4/5] ndctl/papr_scm, uapi: Add support for PAPR nvdimm specific methods

2020-06-04 Thread Vaibhav Jain
Hi Dan,

Thanks for review and insights on this. My responses below:

"Williams, Dan J"  writes:

> [ forgive formatting I'm temporarily stuck using Outlook this week... ]
>
>> From: Vaibhav Jain 
> [..]
>> 
>> Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm
>> module and add the command family NVDIMM_FAMILY_PAPR to the white
>> list of NVDIMM command sets. Also advertise support for ND_CMD_CALL for
>> the nvdimm command mask and implement necessary scaffolding in the
>> module to handle ND_CMD_CALL ioctl and PDSM requests that we receive.
>> 
>> The layout of the PDSM request as we expect from libnvdimm/libndctl is
>> described in newly introduced uapi header 'papr_pdsm.h' which defines a
>> new 'struct nd_pdsm_cmd_pkg' header. This header is used to communicate
>> the PDSM request via member 'nd_cmd_pkg.nd_command' and size of
>> payload that need to be sent/received for servicing the PDSM.
>> 
>> A new function is_cmd_valid() is implemented that reads the args to
>> papr_scm_ndctl() and performs sanity tests on them. A new function
>> papr_scm_service_pdsm() is introduced and is called from
>> papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL
>> command from libnvdimm.
>> 
>> Cc: "Aneesh Kumar K . V" 
>> Cc: Dan Williams 
>> Cc: Michael Ellerman 
>> Cc: Ira Weiny 
>> Reviewed-by: Aneesh Kumar K.V 
>> Signed-off-by: Vaibhav Jain 
>> ---
>> Changelog:
>> 
>> Resend:
>> * Added ack from Aneesh.
>> 
>> v8..v9:
>> * Reduced the usage of term SCM replacing it with appropriate
>>   replacement [ Dan Williams, Aneesh ]
>> * Renamed 'papr_scm_pdsm.h' to 'papr_pdsm.h'
>> * s/PAPR_SCM_PDSM_*/PAPR_PDSM_*/g
>> * s/NVDIMM_FAMILY_PAPR_SCM/NVDIMM_FAMILY_PAPR/g
>> * Minor updates to 'papr_psdm.h' to replace usage of term 'SCM'.
>> * Minor update to patch description.
>> 
>> v7..v8:
>> * Removed the 'payload_offset' field from 'struct
>>   nd_pdsm_cmd_pkg'. Instead command payload is always assumed to start
>>   at 'nd_pdsm_cmd_pkg.payload'. [ Aneesh ]
>> * To enable introducing new fields to 'struct nd_pdsm_cmd_pkg',
>>   'reserved' field of 10-bytes is introduced. [ Aneesh ]
>> * Fixed a typo in "Backward Compatibility" section of papr_scm_pdsm.h
>>   [ Ira ]
>> 
>> Resend:
>> * None
>> 
>> v6..v7 :
>> * Removed the re-definitions of __packed macro from papr_scm_pdsm.h
>>   [Mpe].
>> * Removed the usage of __KERNEL__ macros in papr_scm_pdsm.h [Mpe].
>> * Removed macros that were unused in papr_scm.c from papr_scm_pdsm.h
>>   [Mpe].
>> * Made functions defined in papr_scm_pdsm.h as static inline. [Mpe]
>> 
>> v5..v6 :
>> * Changed the usage of the term DSM to PDSM to distinguish it from the
>>   ACPI term [ Dan Williams ]
>> * Renamed papr_scm_dsm.h to papr_scm_pdsm.h and updated various
>> struct
>>   to reflect the new terminology.
>> * Updated the patch description and title to reflect the new terminology.
>> * Squashed patch to introduce new command family in 'ndctl.h' with
>>   this patch [ Dan Williams ]
>> * Updated the papr_scm_pdsm method starting index from 0x1 to 0x0
>>   [ Dan Williams ]
>> * Removed redundant license text from the papr_scm_psdm.h file.
>>   [ Dan Williams ]
>> * s/envelop/envelope/ at various places [ Dan Williams ]
>> * Added '__packed' attribute to command package header to gaurd
>>   against different compiler adding paddings between the fields.
>>   [ Dan Williams]
>> * Converted various pr_debug to dev_debug [ Dan Williams ]
>> 
>> v4..v5 :
>> * None
>> 
>> v3..v4 :
>> * None
>> 
>> v2..v3 :
>> * Updated the patch prefix to 'ndctl/uapi' [Aneesh]
>> 
>> v1..v2 :
>> * None
>> ---
>>  arch/powerpc/include/uapi/asm/papr_pdsm.h | 136
>> ++  arch/powerpc/platforms/pseries/papr_scm.c |
>> 101 +++-
>>  include/uapi/linux/ndctl.h|   1 +
>>  3 files changed, 232 insertions(+), 6 deletions(-)  create mode 100644
>> arch/powerpc/include/uapi/asm/papr_pdsm.h
>> 
>> diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h
>> b/arch/powerpc/include/uapi/asm/papr_pdsm.h
>> new file mode 100644
>> index ..6407fefcc007
>> --- /dev/null
>> +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
>> @@ -0,0 +1,136 @@
>> +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
>> +/*
>> + * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
>> + *
>> + * (C) Copyright IBM 2020
>> + *
>> + * Author: Vaibhav Jain   */
>> +
>> +#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_
>> +#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_
>> +
>> +#include 
>> +
>> +/*
>> + * PDSM Envelope:
>> + *
>> + * The ioctl ND_CMD_CALL transfers data between user-space and kernel
>> +via
>> + * envelope which consists of a header and user-defined payload sections.
>> + * The header is described by 'struct nd_pdsm_cmd_pkg' which expects a
>> + * payload following it and accessible via 'nd_pdsm_cmd_pkg.payload' field.
>> + * There is reserved field that can used to introduce new fields to the
>> + * structure in future. It also tries to en

Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice

2020-06-04 Thread Guenter Roeck
On 6/3/20 11:22 PM, Ira Weiny wrote:
[ ... ]
> 
> s390: (does not compile)
> 
> :1511:2: warning: #warning syscall clone3 not implemented [-Wcpp]
> In file included from ./arch/sparc/include/asm/bug.h:6:0,
>  from ./include/linux/bug.h:5,
>  from ./include/linux/mmdebug.h:5,
>  from ./include/linux/mm.h:9,
>  from mm/huge_memory.c:8:
> mm/huge_memory.c: In function 'hugepage_init':
> ./include/linux/compiler.h:403:38: error: call to '__compiletime_assert_127' 
> declared with attribute error: BUILD_BUG_ON failed: ((13 + (13-3))-13) >= 9
>   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
>   ^
> ./include/linux/compiler.h:384:4: note: in definition of macro 
> '__compiletime_assert'
> prefix ## suffix();\
> ^~
> ./include/linux/compiler.h:403:2: note: in expansion of macro 
> '_compiletime_assert'
>   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
>   ^~~
> ./include/linux/build_bug.h:39:37: note: in expansion of macro 
> 'compiletime_assert'
>  #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
>  ^~
> ./include/linux/build_bug.h:50:2: note: in expansion of macro 
> 'BUILD_BUG_ON_MSG'
>   BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
>   ^~~~
> ./include/linux/bug.h:24:4: note: in expansion of macro 'BUILD_BUG_ON'
> BUILD_BUG_ON(cond); \
> ^~~~
> mm/huge_memory.c:403:2: note: in expansion of macro 'MAYBE_BUILD_BUG_ON'
>   MAYBE_BUILD_BUG_ON(HPAGE_PMD_ORDER >= MAX_ORDER);
>   ^~
> make[1]: *** [scripts/Makefile.build:267: mm/huge_memory.o] Error 1
> make[1]: *** Waiting for unfinished jobs
> make: *** [Makefile:1735: mm] Error 2
> make: *** Waiting for unfinished jobs
> 
> 
> 
> The s390 error is the same on Linus' master and linux-next.  So whatever is
> causing that has slipped into mainline and/or is something I've broken in the
> test scripts.
> 

Compiler version related. gcc version 8.x and later no longer work.
Bisect points to commit a148866489f ("sched: Replace rq::wake_list").
Oddly enough x86 images are broken as well. You'll have to use an
older version of gcc (or presumably clang) until this is fixed.

Guenter


Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice

2020-06-04 Thread Mike Rapoport
On Wed, Jun 03, 2020 at 04:44:17PM -0700, Guenter Roeck wrote:
> 
> sparc32 smp images in next-20200603 still crash for me with a spinlock
> recursion. s390 images hang early in boot. Several others (alpha, arm64,
> various ppc) don't even compile. I can run some more bisects over time,
> but this is becoming a full-time job :-(.

I've been able to bisect s390 hang to commit b614345f52bc ("x86/entry:
Clarify irq_{enter,exit}_rcu()").

After this commit, lockdep_hardirq_exit() is called twice on s390 (and
others) - one time in irq_exit_rcu() and another one in irq_exit():

/**
 * irq_exit_rcu() - Exit an interrupt context without updating RCU
 *
 * Also processes softirqs if needed and possible.
 */
void irq_exit_rcu(void)
{
__irq_exit_rcu();
 /* must be last! */
lockdep_hardirq_exit();
}

/**
 * irq_exit - Exit an interrupt context, update RCU and lockdep
 *
 * Also processes softirqs if needed and possible.
 */
void irq_exit(void)
{
irq_exit_rcu();
rcu_irq_exit();
 /* must be last! */
lockdep_hardirq_exit();
}

Removing the call in irq_exit() make s390 boot again, and judgung by the
x86 entry code, the comment /* must be last! */ is stale...

@Peter, @Thomas, can you comment please?

>From e51d50ee6f4d1f446decf91c2c67230da14ff82c Mon Sep 17 00:00:00 2001
From: Mike Rapoport 
Date: Thu, 4 Jun 2020 12:37:03 +0300
Subject: [PATCH] softirq: don't call lockdep_hardirq_exit() twice

After commit b614345f52bc ("x86/entry: Clarify irq_{enter,exit}_rcu()")
lockdep_hardirq_exit() is called twice on every architecture that uses
irq_exit(): one time in irq_exit_rcu() and another one in irq_exit().

Remove the extra call in irq_exit().

Signed-off-by: Mike Rapoport 
---
 kernel/softirq.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index a3eb6eba8c41..7523f4ce4c1d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -427,7 +427,6 @@ static inline void __irq_exit_rcu(void)
 void irq_exit_rcu(void)
 {
__irq_exit_rcu();
-/* must be last! */
lockdep_hardirq_exit();
 }
 
@@ -440,8 +439,6 @@ void irq_exit(void)
 {
irq_exit_rcu();
rcu_irq_exit();
-/* must be last! */
-   lockdep_hardirq_exit();
 }
 
 /*
-- 
2.26.2



> Guenter

-- 
Sincerely yours,
Mike.


Re: [PATCH v4 08/14] powerpc: add support for folded p4d page tables

2020-06-04 Thread Qian Cai



> On Jun 3, 2020, at 3:05 PM, Andrew Morton  wrote:
> 
> A bunch of new material just landed in linux-next/powerpc.
> 
> The timing is awkward!  I trust this will be going into mainline during
> this merge window?  If not, please drop it and repull after -rc1.

I have noticed the same pattern over and over again, i.e., many powerpc new 
material has only shown up in linux-next for only a few days before sending for 
a pull request to Linus.

There are absolutely no safe net for this kind of practice. The main problem is 
that Linus seems totally fine with it.

linux-next: build failure on powerpc 8xx with 16k pages

2020-06-04 Thread Christophe Leroy

Hi all,

Using mpc885_ads_defconfig with CONFIG_PPC_16K_PAGES instead of 
CONFIG_PPC_4K_PAGES, getting the following build failure:


  CC  mm/gup.o
In file included from ./include/linux/kernel.h:11:0,
 from mm/gup.c:2:
In function 'gup_hugepte.constprop',
inlined from 'gup_huge_pd.isra.78' at mm/gup.c:2465:8:
./include/linux/compiler.h:392:38: error: call to 
'__compiletime_assert_257' declared with attribute error: Unsupported 
access size for {READ,WRITE}_ONCE().

  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
  ^
./include/linux/compiler.h:373:4: note: in definition of macro 
'__compiletime_assert'

prefix ## suffix();\
^
./include/linux/compiler.h:392:2: note: in expansion of macro 
'_compiletime_assert'

  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
  ^
./include/linux/compiler.h:405:2: note: in expansion of macro 
'compiletime_assert'

  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
  ^
./include/linux/compiler.h:291:2: note: in expansion of macro 
'compiletime_assert_rwonce_type'

  compiletime_assert_rwonce_type(x);\
  ^
mm/gup.c:2428:8: note: in expansion of macro 'READ_ONCE'
  pte = READ_ONCE(*ptep);
^
In function 'gup_get_pte',
inlined from 'gup_pte_range' at mm/gup.c:2228:9,
inlined from 'gup_pmd_range' at mm/gup.c:2613:15,
inlined from 'gup_pud_range' at mm/gup.c:2641:15,
inlined from 'gup_p4d_range' at mm/gup.c:2666:15,
inlined from 'gup_pgd_range' at mm/gup.c:2694:15,
inlined from 'internal_get_user_pages_fast' at mm/gup.c:2785:3:
./include/linux/compiler.h:392:38: error: call to 
'__compiletime_assert_254' declared with attribute error: Unsupported 
access size for {READ,WRITE}_ONCE().

  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
  ^
./include/linux/compiler.h:373:4: note: in definition of macro 
'__compiletime_assert'

prefix ## suffix();\
^
./include/linux/compiler.h:392:2: note: in expansion of macro 
'_compiletime_assert'

  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
  ^
./include/linux/compiler.h:405:2: note: in expansion of macro 
'compiletime_assert'

  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
  ^
./include/linux/compiler.h:291:2: note: in expansion of macro 
'compiletime_assert_rwonce_type'

  compiletime_assert_rwonce_type(x);\
  ^
mm/gup.c:2199:9: note: in expansion of macro 'READ_ONCE'
  return READ_ONCE(*ptep);
 ^
make[2]: *** [mm/gup.o] Error 1


Bisected to:

2ab3a0a02905 (HEAD, refs/bisect/bad) READ_ONCE: Enforce atomicity for 
{READ,WRITE}_ONCE() memory accesses


Christophe



Re: linux-next: fix ups for clashes between akpm and powerpc trees

2020-06-04 Thread Stephen Rothwell
Hi all,

On Thu, 4 Jun 2020 17:49:25 +1000 Stephen Rothwell  
wrote:
>
> diff --cc arch/powerpc/include/asm/nohash/32/pgtable.h
> index 639f3b3713ec,eb8538c85077..1927e1b653f2
> --- a/arch/powerpc/include/asm/nohash/32/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
> @@@ -204,13 -205,6 +205,9 @@@ static inline void pmd_clear(pmd_t *pmd
>   *pmdp = __pmd(0);
>   }
>   
> - 
> - /* to find an entry in a kernel page-table-directory */
> - #define pgd_offset_k(address) pgd_offset(&init_mm, address)
> - 
>  +/* to find an entry in a page-table-directory */
>  +#define pgd_index(address)   ((address) >> PGDIR_SHIFT)
>  +#define pgd_offset(mm, address)  ((mm)->pgd + pgd_index(address))
>   
>   /*
>* PTE updates. This function is called whenever an existing
> @@@ -240,7 -234,7 +237,7 @@@ static inline pte_basic_t pte_update(st
>   pte_basic_t old = pte_val(*p);
>   pte_basic_t new = (old & ~(pte_basic_t)clr) | set;
>   int num, i;
> --pmd_t *pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr);
> ++pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, addr), 
> addr), addr), addr);
>   
>   if (!huge)
>   num = PAGE_SIZE / SZ_4K;

I have added those hunks (more or less) to linux-next for tomorrow as a
fix for mm-consolidate-pgd_index-and-pgd_offset_k-definitions.
-- 
Cheers,
Stephen Rothwell


pgpYLaVqz4JF5.pgp
Description: OpenPGP digital signature


Re: linux-next: build failure on powerpc 8xx with 16k pages

2020-06-04 Thread Will Deacon
Hi, [+Peter]

On Thu, Jun 04, 2020 at 10:48:03AM +, Christophe Leroy wrote:
> Using mpc885_ads_defconfig with CONFIG_PPC_16K_PAGES instead of
> CONFIG_PPC_4K_PAGES, getting the following build failure:
> 
>   CC  mm/gup.o
> In file included from ./include/linux/kernel.h:11:0,
>  from mm/gup.c:2:
> In function 'gup_hugepte.constprop',
> inlined from 'gup_huge_pd.isra.78' at mm/gup.c:2465:8:
> ./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_257'
> declared with attribute error: Unsupported access size for
> {READ,WRITE}_ONCE().
>   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
>   ^
> ./include/linux/compiler.h:373:4: note: in definition of macro
> '__compiletime_assert'
> prefix ## suffix();\
> ^
> ./include/linux/compiler.h:392:2: note: in expansion of macro
> '_compiletime_assert'
>   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
>   ^
> ./include/linux/compiler.h:405:2: note: in expansion of macro
> 'compiletime_assert'
>   compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
>   ^
> ./include/linux/compiler.h:291:2: note: in expansion of macro
> 'compiletime_assert_rwonce_type'
>   compiletime_assert_rwonce_type(x);\
>   ^
> mm/gup.c:2428:8: note: in expansion of macro 'READ_ONCE'
>   pte = READ_ONCE(*ptep);
> ^
> In function 'gup_get_pte',
> inlined from 'gup_pte_range' at mm/gup.c:2228:9,
> inlined from 'gup_pmd_range' at mm/gup.c:2613:15,
> inlined from 'gup_pud_range' at mm/gup.c:2641:15,
> inlined from 'gup_p4d_range' at mm/gup.c:2666:15,
> inlined from 'gup_pgd_range' at mm/gup.c:2694:15,
> inlined from 'internal_get_user_pages_fast' at mm/gup.c:2785:3:

At first glance, this looks like a real bug in the 16k page code -- you're
loading the pte non-atomically on the fast GUP path and so you're prone to
tearing, which probably isn't what you want. For a short-term hack, I'd
suggest having CONFIG_HAVE_FAST_GUP depend on !CONFIG_PPC_16K_PAGES, but if
you want to support this them you'll need to rework your pte_t so that it
can be loaded atomically.

Will


Re: linux-next: fix ups for clashes between akpm and powerpc trees

2020-06-04 Thread Stephen Rothwell
Hi all,

On Thu, 4 Jun 2020 17:49:25 +1000 Stephen Rothwell  
wrote:
>
> diff --cc arch/powerpc/include/asm/nohash/32/pgtable.h
> index 639f3b3713ec,eb8538c85077..1927e1b653f2
> --- a/arch/powerpc/include/asm/nohash/32/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
> @@@ -342,15 -334,6 +337,10 @@@ static inline int pte_young(pte_t pte
>   pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT))
>   #endif
>   
> - /* Find an entry in the third-level page table.. */
> - #define pte_index(address)  \
> - (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
>  +#define pte_offset_kernel(dir, addr)\
>  +(pmd_bad(*(dir)) ? NULL : (pte_t *)pmd_page_vaddr(*(dir)) + \
>  +  pte_index(addr))
> - #define pte_offset_map(dir, addr)   pte_offset_kernel((dir), (addr))
> - static inline void pte_unmap(pte_t *pte) { }
>  +
>   /*
>* Encode and decode a swap entry.
>* Note that the bits we use in a PTE for representing a swap entry

I have added this hunk (sort of - see below) to linux-next for tomorrow
as a fix for mm-consolidate-pte_index-and-pte_offset_-definitions.

From: Stephen Rothwell 
Date: Thu, 4 Jun 2020 21:16:19 +1000
Subject: [PATCH] mm-consolidate-pte_index-and-pte_offset_-definitions-fix

Signed-off-by: Stephen Rothwell 
---
 arch/powerpc/include/asm/nohash/32/pgtable.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index c188a6f64bcd..d94bcd117c5b 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -341,6 +341,10 @@ static inline int pte_young(pte_t pte)
pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT))
 #endif
 
+#define pte_offset_kernel(dir, addr)   \
+   (pmd_bad(*(dir)) ? NULL : (pte_t *)pmd_page_vaddr(*(dir)) + \
+ pte_index(addr))
+
 /*
  * Encode and decode a swap entry.
  * Note that the bits we use in a PTE for representing a swap entry
-- 
2.27.0.rc2

-- 
Cheers,
Stephen Rothwell


pgprESmm_ngbi.pgp
Description: OpenPGP digital signature


Re: linux-next: fix ups for clashes between akpm and powerpc trees

2020-06-04 Thread Michael Ellerman
Stephen Rothwell  writes:
> Hi all,
>
> On Thu, 4 Jun 2020 16:52:46 +1000 Stephen Rothwell  
> wrote:
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
>> b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index 25c3cb8272c0..a6799723cd98 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -1008,6 +1008,12 @@ extern struct page *p4d_page(p4d_t p4d);
>>  #define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS)
>>  #define p4d_page_vaddr(p4d) __va(p4d_val(p4d) & ~P4D_MASKED_BITS)
>>  
>> +static inline unsigned long pgd_index(unsigned long address)
>> +{
>> +return (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1);
>> +}
>> +#define pgd_index pgd_index
>> +
>>  #define pte_ERROR(e) \
>>  pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
>>  #define pmd_ERROR(e) \
>
> I have added that hunk to linux-next for tomorrow as a fix for
> mm-consolidate-pgd_index-and-pgd_offset_k-definitions.
>
> Its not strickly necessary, but Michael expressed a preference for the
> inline function.

That was because we just recently converted it into a static inline to
avoid UBSAN warnings:

commit c2e929b18cea6cbf71364f22d742d9aad7f4677a
Author: Qian Cai 
AuthorDate: Thu Mar 5 23:48:52 2020 -0500

powerpc/64s/pgtable: fix an undefined behaviour

Booting a power9 server with hash MMU could trigger an undefined
behaviour because pud_offset(p4d, 0) will do,

0 >> (PAGE_SHIFT:16 + PTE_INDEX_SIZE:8 + H_PMD_INDEX_SIZE:10)

Fix it by converting pud_index() and friends to static inline
functions.

UBSAN: shift-out-of-bounds in arch/powerpc/mm/ptdump/ptdump.c:282:15
shift exponent 34 is too large for 32-bit type 'int'
CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4-next-20200303+ #13
Call Trace:
dump_stack+0xf4/0x164 (unreliable)
ubsan_epilogue+0x18/0x78
__ubsan_handle_shift_out_of_bounds+0x160/0x21c
walk_pagetables+0x2cc/0x700
walk_pud at arch/powerpc/mm/ptdump/ptdump.c:282
(inlined by) walk_pagetables at arch/powerpc/mm/ptdump/ptdump.c:311
ptdump_check_wx+0x8c/0xf0
mark_rodata_ro+0x48/0x80
kernel_init+0x74/0x194
ret_from_kernel_thread+0x5c/0x74


> I was wondering if pgd_index "Must be a compile-time
> constant" on one (or a few) architectures, then why not leave the
> default as an inline function and special case it as a macro where
> needed ...

AIUI that requirement comes from x86 which has:

#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
...
#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
...
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];


Which will produce a variable length array if pgd_index() isn't a
compile-time constant.

cheers


Re: [PATCH v2 0/5] Statsfs: a new ram-based file sytem for Linux kernel statistics

2020-06-04 Thread Amit Kucheria
On Tue, May 5, 2020 at 3:07 AM David Rientjes  wrote:
>
> On Mon, 4 May 2020, Emanuele Giuseppe Esposito wrote:
>
> > There is currently no common way for Linux kernel subsystems to expose
> > statistics to userspace shared throughout the Linux kernel; subsystems
> > have to take care of gathering and displaying statistics by themselves,
> > for example in the form of files in debugfs. For example KVM has its own
> > code section that takes care of this in virt/kvm/kvm_main.c, where it sets
> > up debugfs handlers for displaying values and aggregating them from
> > various subfolders to obtain information about the system state (i.e.
> > displaying the total number of exits, calculated by summing all exits of
> > all cpus of all running virtual machines).
> >
> > Allowing each section of the kernel to do so has two disadvantages. First,
> > it will introduce redundant code. Second, debugfs is anyway not the right
> > place for statistics (for example it is affected by lockdown)
> >
> > In this patch series I introduce statsfs, a synthetic ram-based virtual
> > filesystem that takes care of gathering and displaying statistics for the
> > Linux kernel subsystems.
> >
>
> This is exciting, we have been looking in the same area recently.  Adding
> Jonathan Adams .
>
> In your diffstat, one thing I notice that is omitted: an update to
> Documentation/* :)  Any chance of getting some proposed Documentation/
> updates with structure of the fs, the per subsystem breakdown, and best
> practices for managing the stats from the kernel level?
>
> > The file system is mounted on /sys/kernel/stats and would be already used
> > by kvm. Statsfs was initially introduced by Paolo Bonzini [1].
> >
> > Statsfs offers a generic and stable API, allowing any kind of
> > directory/file organization and supporting multiple kind of aggregations
> > (not only sum, but also average, max, min and count_zero) and data types
> > (all unsigned and signed types plus boolean). The implementation, which is
> > a generalization of KVM’s debugfs statistics code, takes care of gathering
> > and displaying information at run time; users only need to specify the
> > values to be included in each source.
> >
> > Statsfs would also be a different mountpoint from debugfs, and would not
> > suffer from limited access due to the security lock down patches. Its main
> > function is to display each statistics as a file in the desired folder
> > hierarchy defined through the API. Statsfs files can be read, and possibly
> > cleared if their file mode allows it.
> >
> > Statsfs has two main components: the public API defined by
> > include/linux/statsfs.h, and the virtual file system which should end up
> > in /sys/kernel/stats.
> >
> > The API has two main elements, values and sources. Kernel subsystems like
> > KVM can use the API to create a source, add child
> > sources/values/aggregates and register it to the root source (that on the
> > virtual fs would be /sys/kernel/statsfs).
> >
> > Sources are created via statsfs_source_create(), and each source becomes a
> > directory in the file system. Sources form a parent-child relationship;
> > root sources are added to the file system via statsfs_source_register().
> > Every other source is added to or removed from a parent through the
> > statsfs_source_add_subordinate and statsfs_source_remote_subordinate APIs.
> > Once a source is created and added to the tree (via add_subordinate), it
> > will be used to compute aggregate values in the parent source.
> >
> > Values represent quantites that are gathered by the statsfs user. Examples
> > of values include the number of vm exits of a given kind, the amount of
> > memory used by some data structure, the length of the longest hash table
> > chain, or anything like that. Values are defined with the
> > statsfs_source_add_values function. Each value is defined by a struct
> > statsfs_value; the same statsfs_value can be added to many different
> > sources. A value can be considered "simple" if it fetches data from a
> > user-provided location, or "aggregate" if it groups all values in the
> > subordinates sources that include the same statsfs_value.
> >
>
> This seems like it could have a lot of overhead if we wanted to
> periodically track the totality of subsystem stats as a form of telemetry
> gathering from userspace.  To collect telemetry for 1,000 different stats,
> do we need to issue lseek()+read() syscalls for each of them individually
> (or, worse, open()+read()+close())?
>
> Any thoughts on how that can be optimized?  A couple of ideas:
>
>  - an interface that allows gathering of all stats for a particular
>interface through a single file that would likely be encoded in binary
>and the responsibility of userspace to disseminate, or
>
>  - an interface that extends beyond this proposal and allows the reader to
>specify which stats they are interested in collecting and then the
>kernel will only provide these stats in a well formed 

Re: linux-next: build failure on powerpc 8xx with 16k pages

2020-06-04 Thread Peter Zijlstra
On Thu, Jun 04, 2020 at 12:17:23PM +0100, Will Deacon wrote:
> Hi, [+Peter]
> 
> On Thu, Jun 04, 2020 at 10:48:03AM +, Christophe Leroy wrote:
> > Using mpc885_ads_defconfig with CONFIG_PPC_16K_PAGES instead of
> > CONFIG_PPC_4K_PAGES, getting the following build failure:
> > 
> >   CC  mm/gup.o
> > In file included from ./include/linux/kernel.h:11:0,
> >  from mm/gup.c:2:
> > In function 'gup_hugepte.constprop',
> > inlined from 'gup_huge_pd.isra.78' at mm/gup.c:2465:8:
> > ./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_257'
> > declared with attribute error: Unsupported access size for
> > {READ,WRITE}_ONCE().
> >   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
> >   ^
> > ./include/linux/compiler.h:373:4: note: in definition of macro
> > '__compiletime_assert'
> > prefix ## suffix();\
> > ^
> > ./include/linux/compiler.h:392:2: note: in expansion of macro
> > '_compiletime_assert'
> >   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
> >   ^
> > ./include/linux/compiler.h:405:2: note: in expansion of macro
> > 'compiletime_assert'
> >   compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
> >   ^
> > ./include/linux/compiler.h:291:2: note: in expansion of macro
> > 'compiletime_assert_rwonce_type'
> >   compiletime_assert_rwonce_type(x);\
> >   ^
> > mm/gup.c:2428:8: note: in expansion of macro 'READ_ONCE'
> >   pte = READ_ONCE(*ptep);
> > ^
> > In function 'gup_get_pte',
> > inlined from 'gup_pte_range' at mm/gup.c:2228:9,
> > inlined from 'gup_pmd_range' at mm/gup.c:2613:15,
> > inlined from 'gup_pud_range' at mm/gup.c:2641:15,
> > inlined from 'gup_p4d_range' at mm/gup.c:2666:15,
> > inlined from 'gup_pgd_range' at mm/gup.c:2694:15,
> > inlined from 'internal_get_user_pages_fast' at mm/gup.c:2785:3:
> 
> At first glance, this looks like a real bug in the 16k page code -- you're
> loading the pte non-atomically on the fast GUP path and so you're prone to
> tearing, which probably isn't what you want. For a short-term hack, I'd
> suggest having CONFIG_HAVE_FAST_GUP depend on !CONFIG_PPC_16K_PAGES, but if
> you want to support this them you'll need to rework your pte_t so that it
> can be loaded atomically.

Looking at commit 55c8fc3f49302, they're all the exact same value, so
what they could do is grow another special gup_get_pte() variant that
just loads the first value.

Also, per that very same commit, there's a distinct lack of WRITE_ONCE()
in the pte_update() / __set_pte_at() paths for much of Power.


Re: linux-next: fix ups for clashes between akpm and powerpc trees

2020-06-04 Thread Michael Ellerman
Stephen Rothwell  writes:
> Hi all,
>
> On Thu, 4 Jun 2020 16:52:46 +1000 Stephen Rothwell  
> wrote:
>>
>> diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c
>> index db4ef44af22f..569d98a41881 100644
>> --- a/arch/powerpc/mm/kasan/8xx.c
>> +++ b/arch/powerpc/mm/kasan/8xx.c
>> @@ -10,7 +10,7 @@
>>  static int __init
>>  kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void 
>> *block)
>>  {
>> -pmd_t *pmd = pmd_ptr_k(k_start);
>> +pmd_t *pmd = pmd_off_k(k_start);
>>  unsigned long k_cur, k_next;
>>  
>>  for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd += 2, block 
>> += SZ_8M) {
>> @@ -59,7 +59,7 @@ int __init kasan_init_region(void *start, size_t size)
>>  return ret;
>>  
>>  for (; k_cur < k_end; k_cur += PAGE_SIZE) {
>> -pmd_t *pmd = pmd_ptr_k(k_cur);
>> +pmd_t *pmd = pmd_off_k(k_cur);
>>  void *va = block + k_cur - k_start;
>>  pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
>>  
>> diff --git a/arch/powerpc/mm/kasan/book3s_32.c 
>> b/arch/powerpc/mm/kasan/book3s_32.c
>> index 4bc491a4a1fd..a32b4640b9de 100644
>> --- a/arch/powerpc/mm/kasan/book3s_32.c
>> +++ b/arch/powerpc/mm/kasan/book3s_32.c
>> @@ -46,7 +46,7 @@ int __init kasan_init_region(void *start, size_t size)
>>  kasan_update_early_region(k_start, k_cur, __pte(0));
>>  
>>  for (; k_cur < k_end; k_cur += PAGE_SIZE) {
>> -pmd_t *pmd = pmd_ptr_k(k_cur);
>> +pmd_t *pmd = pmd_off_k(k_cur);
>>  void *va = block + k_cur - k_start;
>>  pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
>>  
>> diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
>> index 286441bbbe49..92e8929cbe3e 100644
>> --- a/arch/powerpc/mm/nohash/8xx.c
>> +++ b/arch/powerpc/mm/nohash/8xx.c
>> @@ -74,7 +74,7 @@ static pte_t __init *early_hugepd_alloc_kernel(hugepd_t 
>> *pmdp, unsigned long va)
>>  static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t 
>> pa,
>>   pgprot_t prot, int psize, bool new)
>>  {
>> -pmd_t *pmdp = pmd_ptr_k(va);
>> +pmd_t *pmdp = pmd_off_k(va);
>>  pte_t *ptep;
>>  
>>  if (WARN_ON(psize != MMU_PAGE_512K && psize != MMU_PAGE_8M))
>> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
>> index 45a0556089e8..1136257c3a99 100644
>> --- a/arch/powerpc/mm/pgtable.c
>> +++ b/arch/powerpc/mm/pgtable.c
>> @@ -264,7 +264,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct 
>> *vma,
>>  #if defined(CONFIG_PPC_8xx)
>>  void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, 
>> pte_t pte)
>>  {
>> -pmd_t *pmd = pmd_ptr(mm, addr);
>> +pmd_t *pmd = pmd_off(mm, addr);
>>  pte_basic_t val;
>>  pte_basic_t *entry = &ptep->pte;
>>  int num = is_hugepd(*((hugepd_t *)pmd)) ? 1 : SZ_512K / SZ_4K;
>> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
>> index e2d054c9575e..6eb4eab79385 100644
>> --- a/arch/powerpc/mm/pgtable_32.c
>> +++ b/arch/powerpc/mm/pgtable_32.c
>> @@ -40,7 +40,7 @@ notrace void __init early_ioremap_init(void)
>>  {
>>  unsigned long addr = ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE);
>>  pte_t *ptep = (pte_t *)early_fixmap_pagetable;
>> -pmd_t *pmdp = pmd_ptr_k(addr);
>> +pmd_t *pmdp = pmd_off_k(addr);
>>  
>>  for (; (s32)(FIXADDR_TOP - addr) > 0;
>>   addr += PGDIR_SIZE, ptep += PTRS_PER_PTE, pmdp++)
>
> I have added the above hunks as to linux-next for tomorrow as a fix for
> mm-pgtable-add-shortcuts-for-accessing-kernel-pmd-and-pte.

Looks good. Thanks.

cheers


[PATCH v3 0/3] selftests: powerpc: Fixes and execute-disable test for pkeys

2020-06-04 Thread Sandipan Das
This fixes the way the Authority Mask Register (AMR) is updated
by the existing pkey tests and adds a new test to verify the
functionality of execute-disabled pkeys.

Previous versions can be found at:
v2: 
https://lore.kernel.org/linuxppc-dev/20200527030342.13712-1-sandi...@linux.ibm.com/
v1: 
https://lore.kernel.org/linuxppc-dev/20200508162332.65316-1-sandi...@linux.ibm.com/

Changes in v3:
- Fixed AMR writes for existing pkey tests (new patch).
- Moved Hash MMU check under utilities (new patch) and removed duplicate
  code.
- Fixed comments on why the pkey permission bits were redefined.
- Switched to existing mfspr() macro for reading AMR.
- Switched to sig_atomic_t as data type for variables updated in the
  signal handlers.
- Switched to exit()-ing if the signal handlers come across an unexpected
  condition instead of trying to reset page and pkey permissions.
- Switched to write() from printf() for printing error messages from
  the signal handlers.
- Switched to getpagesize().
- Renamed fault counter to denote remaining faults.
- Dropped unnecessary randomization for choosing an address to fault at.
- Added additional information on change in permissions due to AMR and
  IAMR bits in comments.
- Switched the first instruction word of the executable region to a trap
  to test if it is actually overwritten by a no-op later.
- Added an new test scenario where the pkey imposes no restrictions and
  an attempt is made to jump to the executable region again.

Changes in v2:
- Added .gitignore entry for test binary.
- Fixed builds for older distros where siginfo_t might not have si_pkey as
  a formal member based on discussion with Michael.

Sandipan Das (3):
  selftests: powerpc: Fix pkey access right updates
  selftests: powerpc: Move Hash MMU check to utilities
  selftests: powerpc: Add test for execute-disabled pkeys

 tools/testing/selftests/powerpc/include/reg.h |   6 +
 .../testing/selftests/powerpc/include/utils.h |   1 +
 tools/testing/selftests/powerpc/mm/.gitignore |   1 +
 tools/testing/selftests/powerpc/mm/Makefile   |   5 +-
 .../selftests/powerpc/mm/bad_accesses.c   |  28 --
 .../selftests/powerpc/mm/pkey_exec_prot.c | 388 ++
 .../selftests/powerpc/ptrace/core-pkey.c  |   2 +-
 .../selftests/powerpc/ptrace/ptrace-pkey.c|   2 +-
 tools/testing/selftests/powerpc/utils.c   |  28 ++
 9 files changed, 429 insertions(+), 32 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/mm/pkey_exec_prot.c

-- 
2.25.1



[PATCH v3 2/3] selftests: powerpc: Move Hash MMU check to utilities

2020-06-04 Thread Sandipan Das
This moves a function to test if the MMU is in Hash mode
under the generic test utilities.

Signed-off-by: Sandipan Das 
---
 .../testing/selftests/powerpc/include/utils.h |  1 +
 tools/testing/selftests/powerpc/mm/Makefile   |  2 +-
 .../selftests/powerpc/mm/bad_accesses.c   | 28 ---
 tools/testing/selftests/powerpc/utils.c   | 28 +++
 4 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/tools/testing/selftests/powerpc/include/utils.h 
b/tools/testing/selftests/powerpc/include/utils.h
index e089a0c30d9a..ad2728736ae5 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -60,6 +60,7 @@ static inline bool have_hwcap2(unsigned long ftr2)
 #endif
 
 bool is_ppc64le(void);
+int using_hash_mmu(bool *using_hash);
 
 /* Yes, this is evil */
 #define FAIL_IF(x) \
diff --git a/tools/testing/selftests/powerpc/mm/Makefile 
b/tools/testing/selftests/powerpc/mm/Makefile
index b9103c4bb414..2389bf791fd6 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -10,7 +10,7 @@ TEST_GEN_FILES := tempfile
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
 
 $(OUTPUT)/prot_sao: ../utils.c
 
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c 
b/tools/testing/selftests/powerpc/mm/bad_accesses.c
index adc465f499ef..a864ed7e2008 100644
--- a/tools/testing/selftests/powerpc/mm/bad_accesses.c
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -64,34 +64,6 @@ int bad_access(char *p, bool write)
return 0;
 }
 
-static int using_hash_mmu(bool *using_hash)
-{
-   char line[128];
-   FILE *f;
-   int rc;
-
-   f = fopen("/proc/cpuinfo", "r");
-   FAIL_IF(!f);
-
-   rc = 0;
-   while (fgets(line, sizeof(line), f) != NULL) {
-   if (strcmp(line, "MMU   : Hash\n") == 0) {
-   *using_hash = true;
-   goto out;
-   }
-
-   if (strcmp(line, "MMU   : Radix\n") == 0) {
-   *using_hash = false;
-   goto out;
-   }
-   }
-
-   rc = -1;
-out:
-   fclose(f);
-   return rc;
-}
-
 static int test(void)
 {
unsigned long i, j, addr, region_shift, page_shift, page_size;
diff --git a/tools/testing/selftests/powerpc/utils.c 
b/tools/testing/selftests/powerpc/utils.c
index 5ee0e98c4896..933678f1ed0a 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -293,3 +293,31 @@ void set_dscr(unsigned long val)
 
asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
 }
+
+int using_hash_mmu(bool *using_hash)
+{
+   char line[128];
+   FILE *f;
+   int rc;
+
+   f = fopen("/proc/cpuinfo", "r");
+   FAIL_IF(!f);
+
+   rc = 0;
+   while (fgets(line, sizeof(line), f) != NULL) {
+   if (strcmp(line, "MMU   : Hash\n") == 0) {
+   *using_hash = true;
+   goto out;
+   }
+
+   if (strcmp(line, "MMU   : Radix\n") == 0) {
+   *using_hash = false;
+   goto out;
+   }
+   }
+
+   rc = -1;
+out:
+   fclose(f);
+   return rc;
+}
-- 
2.25.1



[PATCH v3 3/3] selftests: powerpc: Add test for execute-disabled pkeys

2020-06-04 Thread Sandipan Das
Apart from read and write access, memory protection keys can
also be used for restricting execute permission of pages on
powerpc. This adds a test to verify if the feature works as
expected.

Signed-off-by: Sandipan Das 
---
 tools/testing/selftests/powerpc/mm/.gitignore |   1 +
 tools/testing/selftests/powerpc/mm/Makefile   |   3 +-
 .../selftests/powerpc/mm/pkey_exec_prot.c | 388 ++
 3 files changed, 391 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/mm/pkey_exec_prot.c

diff --git a/tools/testing/selftests/powerpc/mm/.gitignore 
b/tools/testing/selftests/powerpc/mm/.gitignore
index 2ca523255b1b..8f841f925baa 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -8,3 +8,4 @@ wild_bctr
 large_vm_fork_separation
 bad_accesses
 tlbie_test
+pkey_exec_prot
diff --git a/tools/testing/selftests/powerpc/mm/Makefile 
b/tools/testing/selftests/powerpc/mm/Makefile
index 2389bf791fd6..f9fa0ba7435c 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -3,7 +3,7 @@ noarg:
$(MAKE) -C ../
 
 TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors 
wild_bctr \
- large_vm_fork_separation bad_accesses
+ large_vm_fork_separation bad_accesses pkey_exec_prot
 TEST_GEN_PROGS_EXTENDED := tlbie_test
 TEST_GEN_FILES := tempfile
 
@@ -17,6 +17,7 @@ $(OUTPUT)/prot_sao: ../utils.c
 $(OUTPUT)/wild_bctr: CFLAGS += -m64
 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
 $(OUTPUT)/bad_accesses: CFLAGS += -m64
+$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64
 
 $(OUTPUT)/tempfile:
dd if=/dev/zero of=$@ bs=64k count=1
diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c 
b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
new file mode 100644
index ..7c7c93425c5e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if applying execute protection on pages using memory
+ * protection keys works as expected.
+ */
+
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include "reg.h"
+#include "utils.h"
+
+/*
+ * Older versions of libc use the Intel-specific access rights.
+ * Hence, override the definitions as they might be incorrect.
+ */
+#undef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS0x3
+
+#undef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE 0x2
+
+#undef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE   0x4
+
+/* Older versions of libc do not not define this */
+#ifndef SEGV_PKUERR
+#define SEGV_PKUERR4
+#endif
+
+#define SI_PKEY_OFFSET 0x20
+
+#define SYS_pkey_mprotect  386
+#define SYS_pkey_alloc 384
+#define SYS_pkey_free  385
+
+#define PKEY_BITS_PER_PKEY 2
+#define NR_PKEYS   32
+#define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1)
+
+#define PPC_INST_NOP   0x6000
+#define PPC_INST_TRAP  0x7fe8
+#define PPC_INST_BLR   0x4e800020
+
+#define sigsafe_err(msg)   ({ \
+   ssize_t nbytes __attribute__((unused)); \
+   nbytes = write(STDERR_FILENO, msg, strlen(msg)); })
+
+static inline unsigned long pkeyreg_get(void)
+{
+   return mfspr(SPRN_AMR);
+}
+
+static inline void pkeyreg_set(unsigned long amr)
+{
+   set_amr(amr);
+}
+
+static void pkey_set_rights(int pkey, unsigned long rights)
+{
+   unsigned long amr, shift;
+
+   shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+   amr = pkeyreg_get();
+   amr &= ~(PKEY_BITS_MASK << shift);
+   amr |= (rights & PKEY_BITS_MASK) << shift;
+   pkeyreg_set(amr);
+}
+
+static int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey)
+{
+   return syscall(SYS_pkey_mprotect, addr, len, prot, pkey);
+}
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long rights)
+{
+   return syscall(SYS_pkey_alloc, flags, rights);
+}
+
+static int sys_pkey_free(int pkey)
+{
+   return syscall(SYS_pkey_free, pkey);
+}
+
+static volatile sig_atomic_t fault_pkey, fault_code, fault_type;
+static volatile sig_atomic_t remaining_faults;
+static volatile unsigned int *fault_addr;
+static unsigned long pgsize, numinsns;
+static unsigned int *insns;
+
+static void trap_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+   /* Check if this fault originated from the expected address */
+   if (sinfo->si_addr != (void *) fault_addr)
+   sigsafe_err("got a fault for an unexpected address\n");
+
+   _exit(1);
+}
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+   int signal_pkey;
+
+   /*
+* In older versions of libc, siginfo_t does not have si_pkey as
+* a member.
+*/
+#ifdef si_pkey
+   signal_pkey = sinfo->si_pkey;
+#else
+   signal_pkey = *(

[PATCH v3 1/3] selftests: powerpc: Fix pkey access right updates

2020-06-04 Thread Sandipan Das
The Power ISA mandates that all writes to the Authority
Mask Register (AMR) must always be preceded as well as
succeeded by a context synchronizing instruction.

This makes sure that the tests follow this requirement
when attempting to update a pkey's access rights.

Signed-off-by: Sandipan Das 
---
 tools/testing/selftests/powerpc/include/reg.h| 6 ++
 tools/testing/selftests/powerpc/ptrace/core-pkey.c   | 2 +-
 tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/powerpc/include/reg.h 
b/tools/testing/selftests/powerpc/include/reg.h
index 022c5076b2c5..c0f2742a3a59 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -57,6 +57,12 @@
 #define SPRN_PPR   896 /* Program Priority Register */
 #define SPRN_AMR   13  /* Authority Mask Register - problem state */
 
+#define set_amr(v) asm volatile("isync;" \
+"mtspr " __stringify(SPRN_AMR) ",%0;" \
+"isync" : \
+   : "r" ((unsigned long)(v)) \
+   : "memory")
+
 /* TEXASR register bits */
 #define TEXASR_FC  0xFE00
 #define TEXASR_FP  0x0100
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c 
b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index d5c64fee032d..bbc05ffc5860 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -150,7 +150,7 @@ static int child(struct shared_info *info)
printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
   user_write, info->amr, pkey1, pkey2, pkey3);
 
-   mtspr(SPRN_AMR, info->amr);
+   set_amr(info->amr);
 
/*
 * We won't use pkey3. This tests whether the kernel restores the UAMOR
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c 
b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index bdbbbe8431e0..904c04f8c919 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -126,7 +126,7 @@ static int child(struct shared_info *info)
printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
   user_write, info->amr1, pkey1, pkey2, pkey3);
 
-   mtspr(SPRN_AMR, info->amr1);
+   set_amr(info->amr1);
 
/* Wait for parent to read our AMR value and write a new one. */
ret = prod_parent(&info->child_sync);
-- 
2.25.1



Re: linux-next: build failure on powerpc 8xx with 16k pages

2020-06-04 Thread Christophe Leroy




On 06/04/2020 11:17 AM, Will Deacon wrote:

Hi, [+Peter]

On Thu, Jun 04, 2020 at 10:48:03AM +, Christophe Leroy wrote:

Using mpc885_ads_defconfig with CONFIG_PPC_16K_PAGES instead of
CONFIG_PPC_4K_PAGES, getting the following build failure:

   CC  mm/gup.o
In file included from ./include/linux/kernel.h:11:0,
  from mm/gup.c:2:
In function 'gup_hugepte.constprop',
 inlined from 'gup_huge_pd.isra.78' at mm/gup.c:2465:8:
./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_257'
declared with attribute error: Unsupported access size for
{READ,WRITE}_ONCE().
   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
   ^
./include/linux/compiler.h:373:4: note: in definition of macro
'__compiletime_assert'
 prefix ## suffix();\
 ^
./include/linux/compiler.h:392:2: note: in expansion of macro
'_compiletime_assert'
   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
   ^
./include/linux/compiler.h:405:2: note: in expansion of macro
'compiletime_assert'
   compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
   ^
./include/linux/compiler.h:291:2: note: in expansion of macro
'compiletime_assert_rwonce_type'
   compiletime_assert_rwonce_type(x);\
   ^
mm/gup.c:2428:8: note: in expansion of macro 'READ_ONCE'
   pte = READ_ONCE(*ptep);
 ^
In function 'gup_get_pte',
 inlined from 'gup_pte_range' at mm/gup.c:2228:9,
 inlined from 'gup_pmd_range' at mm/gup.c:2613:15,
 inlined from 'gup_pud_range' at mm/gup.c:2641:15,
 inlined from 'gup_p4d_range' at mm/gup.c:2666:15,
 inlined from 'gup_pgd_range' at mm/gup.c:2694:15,
 inlined from 'internal_get_user_pages_fast' at mm/gup.c:2785:3:


At first glance, this looks like a real bug in the 16k page code -- you're
loading the pte non-atomically on the fast GUP path and so you're prone to
tearing, which probably isn't what you want. For a short-term hack, I'd
suggest having CONFIG_HAVE_FAST_GUP depend on !CONFIG_PPC_16K_PAGES, but if
you want to support this them you'll need to rework your pte_t so that it
can be loaded atomically.


What do you mean by *rework* pte_t ?
pte are 32 bits words in size and are spread every 4 words in memory. 
Therefore pte_t has to be 128 bits because unlike huge_pte handling 
which always use huge_pte_offset() in loops, many many places in the 
kernel do pte++, so we need the pte type to be the size of the interval 
from one pte to the next one.


Christophe


Re: [PATCH] ASoC: fsl-asoc-card: Defer probe when fail to find codec device

2020-06-04 Thread Mark Brown
On Thu, 4 Jun 2020 14:25:30 +0800, Shengjiu Wang wrote:
> Defer probe when fail to find codec device, because the codec
> device maybe probed later than machine driver.

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[1/1] ASoC: fsl-asoc-card: Defer probe when fail to find codec device
  commit: e396dec46c5600d426b2ca8a01a877928b50d1d9

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark


Boot issue with the latest Git kernel

2020-06-04 Thread Christian Zigotzky

Hi All,

I tested the latest Git kernel today. [1]. Unfortunately it doesn't boot 
on my PowerPC machines.


Could you please test the latest Git kernel with your PowerPC machine?

BTW, it doesn't boot in a virtual QEMU PowerPC machine either.

Thanks,
Christian

[1] 
https://forum.hyperion-entertainment.com/viewtopic.php?f=58&p=50758&sid=3f816f078869510dea9fe4baca3605db#p50758


Re: Boot issue with the latest Git kernel

2020-06-04 Thread Christophe Leroy

Hi,


Le 04/06/2020 à 16:16, Christian Zigotzky a écrit :

Hi All,

I tested the latest Git kernel today. [1]. Unfortunately it doesn't boot 
on my PowerPC machines.


Could you please test the latest Git kernel with your PowerPC machine?

BTW, it doesn't boot in a virtual QEMU PowerPC machine either.



Which machine/platform ? Which defconfig are you using ?

Christophe



Re: Boot issue with the latest Git kernel

2020-06-04 Thread Christophe Leroy




Le 04/06/2020 à 16:26, Christophe Leroy a écrit :

Hi,


Le 04/06/2020 à 16:16, Christian Zigotzky a écrit :

Hi All,

I tested the latest Git kernel today. [1]. Unfortunately it doesn't 
boot on my PowerPC machines.


Could you please test the latest Git kernel with your PowerPC machine?

BTW, it doesn't boot in a virtual QEMU PowerPC machine either.



Which machine/platform ? Which defconfig are you using ?




And are you able to perform a 'git bisect' to identify the guilty commit ?

Thanks
Christophe


Re: linux-next: build failure on powerpc 8xx with 16k pages

2020-06-04 Thread Christophe Leroy




On 06/04/2020 12:00 PM, Peter Zijlstra wrote:

On Thu, Jun 04, 2020 at 12:17:23PM +0100, Will Deacon wrote:

Hi, [+Peter]

On Thu, Jun 04, 2020 at 10:48:03AM +, Christophe Leroy wrote:

Using mpc885_ads_defconfig with CONFIG_PPC_16K_PAGES instead of
CONFIG_PPC_4K_PAGES, getting the following build failure:

   CC  mm/gup.o
In file included from ./include/linux/kernel.h:11:0,
  from mm/gup.c:2:
In function 'gup_hugepte.constprop',
 inlined from 'gup_huge_pd.isra.78' at mm/gup.c:2465:8:
./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_257'
declared with attribute error: Unsupported access size for
{READ,WRITE}_ONCE().
   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
   ^
./include/linux/compiler.h:373:4: note: in definition of macro
'__compiletime_assert'
 prefix ## suffix();\
 ^
./include/linux/compiler.h:392:2: note: in expansion of macro
'_compiletime_assert'
   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
   ^
./include/linux/compiler.h:405:2: note: in expansion of macro
'compiletime_assert'
   compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
   ^
./include/linux/compiler.h:291:2: note: in expansion of macro
'compiletime_assert_rwonce_type'
   compiletime_assert_rwonce_type(x);\
   ^
mm/gup.c:2428:8: note: in expansion of macro 'READ_ONCE'
   pte = READ_ONCE(*ptep);
 ^
In function 'gup_get_pte',
 inlined from 'gup_pte_range' at mm/gup.c:2228:9,
 inlined from 'gup_pmd_range' at mm/gup.c:2613:15,
 inlined from 'gup_pud_range' at mm/gup.c:2641:15,
 inlined from 'gup_p4d_range' at mm/gup.c:2666:15,
 inlined from 'gup_pgd_range' at mm/gup.c:2694:15,
 inlined from 'internal_get_user_pages_fast' at mm/gup.c:2785:3:


At first glance, this looks like a real bug in the 16k page code -- you're
loading the pte non-atomically on the fast GUP path and so you're prone to
tearing, which probably isn't what you want. For a short-term hack, I'd
suggest having CONFIG_HAVE_FAST_GUP depend on !CONFIG_PPC_16K_PAGES, but if
you want to support this them you'll need to rework your pte_t so that it
can be loaded atomically.


Looking at commit 55c8fc3f49302, they're all the exact same value, so
what they could do is grow another special gup_get_pte() variant that
just loads the first value.

Also, per that very same commit, there's a distinct lack of WRITE_ONCE()
in the pte_update() / __set_pte_at() paths for much of Power.



Thanks for the idea.

Now I get the same issue at

   CC  mm/mincore.o
In file included from ./include/asm-generic/bug.h:5:0,
 from ./arch/powerpc/include/asm/bug.h:109,
 from ./include/linux/bug.h:5,
 from ./include/linux/mmdebug.h:5,
 from ./include/linux/mm.h:9,
 from ./include/linux/pagemap.h:8,
 from mm/mincore.c:11:
In function 'huge_ptep_get',
inlined from 'mincore_hugetlb' at mm/mincore.c:35:20:
./include/linux/compiler.h:392:38: error: call to 
'__compiletime_assert_218' declared with attribute error: Unsupported 
access size for {READ,WRITE}_ONCE().

  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
  ^
./include/linux/compiler.h:373:4: note: in definition of macro 
'__compiletime_assert'

prefix ## suffix();\
^
./include/linux/compiler.h:392:2: note: in expansion of macro 
'_compiletime_assert'

  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
  ^
./include/linux/compiler.h:405:2: note: in expansion of macro 
'compiletime_assert'

  compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
  ^
./include/linux/compiler.h:291:2: note: in expansion of macro 
'compiletime_assert_rwonce_type'

  compiletime_assert_rwonce_type(x);\
  ^
./include/asm-generic/hugetlb.h:125:9: note: in expansion of macro 
'READ_ONCE'

  return READ_ONCE(*ptep);
 ^
make[2]: *** [mm/mincore.o] Error 1

I guess for this one I have to implement platform specific huge_ptep_get()

Christophe


Re: Boot issue with the latest Git kernel

2020-06-04 Thread Christian Zigotzky



> On 4. Jun 2020, at 16:29, Christophe Leroy  
> wrote:
> 
> And are you able to perform a 'git bisect' to identify the guilty commit ?
> 
> Thanks
> Christophe

Hello Christophe,

Unfortunately I haven’t had time to bisect the latest Git kernel. Does it boot 
on your PowerPC machine?

Thanks,
Christian

Re: linux-next: build failure on powerpc 8xx with 16k pages

2020-06-04 Thread Will Deacon
[+Arnd since I think we spoke about this on IRC once]

On Thu, Jun 04, 2020 at 02:35:14PM +, Christophe Leroy wrote:
> Now I get the same issue at
> 
>CC  mm/mincore.o
> In file included from ./include/asm-generic/bug.h:5:0,
>  from ./arch/powerpc/include/asm/bug.h:109,
>  from ./include/linux/bug.h:5,
>  from ./include/linux/mmdebug.h:5,
>  from ./include/linux/mm.h:9,
>  from ./include/linux/pagemap.h:8,
>  from mm/mincore.c:11:
> In function 'huge_ptep_get',
> inlined from 'mincore_hugetlb' at mm/mincore.c:35:20:
> ./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_218'
> declared with attribute error: Unsupported access size for
> {READ,WRITE}_ONCE().
>   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
>   ^
> ./include/linux/compiler.h:373:4: note: in definition of macro
> '__compiletime_assert'
> prefix ## suffix();\
> ^
> ./include/linux/compiler.h:392:2: note: in expansion of macro
> '_compiletime_assert'
>   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
>   ^
> ./include/linux/compiler.h:405:2: note: in expansion of macro
> 'compiletime_assert'
>   compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
>   ^
> ./include/linux/compiler.h:291:2: note: in expansion of macro
> 'compiletime_assert_rwonce_type'
>   compiletime_assert_rwonce_type(x);\
>   ^
> ./include/asm-generic/hugetlb.h:125:9: note: in expansion of macro
> 'READ_ONCE'
>   return READ_ONCE(*ptep);
>  ^
> make[2]: *** [mm/mincore.o] Error 1
> 
> I guess for this one I have to implement platform specific huge_ptep_get()

Yeah, or bite the bullet and introduce proper accessors for all these
things:

pte_read()
pmd_read()
pud_read()
etc

with the default implementation pointing at READ_ONCE(), but allowing an
architecture override. It's a big job because mm/ would need repainting,
but it would have the benefit of being able to remove aggregate types from
READ_ONCE() entirely and using a special accessor just for the page-table
types.

That might also mean that we could have asm-generic versions of things
like ptep_get_and_clear() that work for architectures with hardware
update and need atomic rmw. But I'm getting ahead of myself.

Will


Re: [PATCH] mm: Fix pud_alloc_track()

2020-06-04 Thread Mike Rapoport
On Thu, Jun 04, 2020 at 09:44:46AM +0200, Joerg Roedel wrote:
> From: Joerg Roedel 
> 
> The pud_alloc_track() needs to do different checks based on whether
> __ARCH_HAS_5LEVEL_HACK is defined, like it already does in
> pud_alloc(). Otherwise it causes boot failures on PowerPC.
> 
> Provide the correct implementations for both possible settings of
> __ARCH_HAS_5LEVEL_HACK to fix the boot problems.

There is a patch in mmotm [1] that completely removes
__ARCH_HAS_5LEVEL_HACK which is a part of the series [2] that updates
p4d folding accross architectures. This should fix boot on PowerPC and
the addition of pXd_alloc_track() for __ARCH_HAS_5LEVEL_HACK wouldn't be
necessary.


[1] 
https://github.com/hnaz/linux-mm/commit/cfae68792af3731ac902ea6ba5ed8df5a0f6bd2f
[2] https://lore.kernel.org/kvmarm/20200414153455.21744-1-r...@kernel.org/

> Reported-by: Abdul Haleem 
> Tested-by: Abdul Haleem 
> Tested-by: Satheesh Rajendran 
> Fixes: d8626138009b ("mm: add functions to track page directory 
> modifications")
> Signed-off-by: Joerg Roedel 
> ---
>  include/asm-generic/5level-fixup.h |  5 +
>  include/linux/mm.h | 26 +-
>  2 files changed, 18 insertions(+), 13 deletions(-)
> 
> diff --git a/include/asm-generic/5level-fixup.h 
> b/include/asm-generic/5level-fixup.h
> index 58046ddc08d0..afbab31fbd7e 100644
> --- a/include/asm-generic/5level-fixup.h
> +++ b/include/asm-generic/5level-fixup.h
> @@ -17,6 +17,11 @@
>   ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
>   NULL : pud_offset(p4d, address))
>  
> +#define pud_alloc_track(mm, p4d, address, mask)  
> \
> + ((unlikely(pgd_none(*(p4d))) && 
> \
> +   (__pud_alloc(mm, p4d, address) || 
> ({*(mask)|=PGTBL_P4D_MODIFIED;0;})))?   \
> +   NULL : pud_offset(p4d, address))
> +
>  #define p4d_alloc(mm, pgd, address)  (pgd)
>  #define p4d_alloc_track(mm, pgd, address, mask)  (pgd)
>  #define p4d_offset(pgd, start)   (pgd)
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 66e0977f970a..ad3b31c5bcc3 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
> p4d_t *p4d,
>   NULL : pud_offset(p4d, address);
>  }
>  
> -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
>unsigned long address,
>pgtbl_mod_mask *mod_mask)
> -
>  {
> - if (unlikely(pgd_none(*pgd))) {
> - if (__p4d_alloc(mm, pgd, address))
> + if (unlikely(p4d_none(*p4d))) {
> + if (__pud_alloc(mm, p4d, address))
>   return NULL;
> - *mod_mask |= PGTBL_PGD_MODIFIED;
> + *mod_mask |= PGTBL_P4D_MODIFIED;
>   }
>  
> - return p4d_offset(pgd, address);
> + return pud_offset(p4d, address);
>  }
>  
> -#endif /* !__ARCH_HAS_5LEVEL_HACK */
> -
> -static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> +static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
>unsigned long address,
>pgtbl_mod_mask *mod_mask)
> +
>  {
> - if (unlikely(p4d_none(*p4d))) {
> - if (__pud_alloc(mm, p4d, address))
> + if (unlikely(pgd_none(*pgd))) {
> + if (__p4d_alloc(mm, pgd, address))
>   return NULL;
> - *mod_mask |= PGTBL_P4D_MODIFIED;
> + *mod_mask |= PGTBL_PGD_MODIFIED;
>   }
>  
> - return pud_offset(p4d, address);
> + return p4d_offset(pgd, address);
>  }
>  
> +#endif /* !__ARCH_HAS_5LEVEL_HACK */
> +
>  static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned 
> long address)
>  {
>   return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
> -- 
> 2.26.2
> 

-- 
Sincerely yours,
Mike.


Re: [RFC][PATCH v3 1/5] sparc64: Fix asm/percpu.h build error

2020-06-04 Thread Peter Zijlstra
On Fri, May 29, 2020 at 04:29:17PM -0700, David Miller wrote:
> From: Peter Zijlstra 
> Date: Fri, 29 May 2020 23:35:51 +0200
> 
> > ../arch/sparc/include/asm/percpu_64.h:7:24: warning: call-clobbered 
> > register used for global register variable
> > register unsigned long __local_per_cpu_offset asm("g5");
> 
> The "-ffixed-g5" option on the command line tells gcc that we are
> using 'g5' as a fixed register, so some part of your build isn't using
> the:
> 
> KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
> 
> from arch/sparc/Makefile for some reason.

Thanks, that was the clue I needed.

I think I see, what happens is that these headers end up in the VDSO
build, and that doesn't have these CFLAGS, because userspace.

Let me see what to do about that.


Re: [PATCH] mm: Fix pud_alloc_track()

2020-06-04 Thread Sedat Dilek
On Thu, Jun 4, 2020 at 6:49 PM Mike Rapoport  wrote:
>
> On Thu, Jun 04, 2020 at 09:44:46AM +0200, Joerg Roedel wrote:
> > From: Joerg Roedel 
> >
> > The pud_alloc_track() needs to do different checks based on whether
> > __ARCH_HAS_5LEVEL_HACK is defined, like it already does in
> > pud_alloc(). Otherwise it causes boot failures on PowerPC.
> >
> > Provide the correct implementations for both possible settings of
> > __ARCH_HAS_5LEVEL_HACK to fix the boot problems.
>
> There is a patch in mmotm [1] that completely removes
> __ARCH_HAS_5LEVEL_HACK which is a part of the series [2] that updates
> p4d folding accross architectures. This should fix boot on PowerPC and
> the addition of pXd_alloc_track() for __ARCH_HAS_5LEVEL_HACK wouldn't be
> necessary.
>
>
> [1] 
> https://github.com/hnaz/linux-mm/commit/cfae68792af3731ac902ea6ba5ed8df5a0f6bd2f
> [2] https://lore.kernel.org/kvmarm/20200414153455.21744-1-r...@kernel.org/
>

That link shows an overview of v4 and is easily downloadable as a
single mbox file.
See " Series = mm: remove __ARCH_HAS_5LEVEL_HACK"

- Sedat -

[1] https://lore.kernel.org/patchwork/project/lkml/list/?series=438627

> > Reported-by: Abdul Haleem 
> > Tested-by: Abdul Haleem 
> > Tested-by: Satheesh Rajendran 
> > Fixes: d8626138009b ("mm: add functions to track page directory 
> > modifications")
> > Signed-off-by: Joerg Roedel 
> > ---
> >  include/asm-generic/5level-fixup.h |  5 +
> >  include/linux/mm.h | 26 +-
> >  2 files changed, 18 insertions(+), 13 deletions(-)
> >
> > diff --git a/include/asm-generic/5level-fixup.h 
> > b/include/asm-generic/5level-fixup.h
> > index 58046ddc08d0..afbab31fbd7e 100644
> > --- a/include/asm-generic/5level-fixup.h
> > +++ b/include/asm-generic/5level-fixup.h
> > @@ -17,6 +17,11 @@
> >   ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
> >   NULL : pud_offset(p4d, address))
> >
> > +#define pud_alloc_track(mm, p4d, address, mask)
> >   \
> > + ((unlikely(pgd_none(*(p4d))) &&   
> >   \
> > +   (__pud_alloc(mm, p4d, address) || 
> > ({*(mask)|=PGTBL_P4D_MODIFIED;0;})))?   \
> > +   NULL : pud_offset(p4d, address))
> > +
> >  #define p4d_alloc(mm, pgd, address)  (pgd)
> >  #define p4d_alloc_track(mm, pgd, address, mask)  (pgd)
> >  #define p4d_offset(pgd, start)   (pgd)
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 66e0977f970a..ad3b31c5bcc3 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct 
> > *mm, p4d_t *p4d,
> >   NULL : pud_offset(p4d, address);
> >  }
> >
> > -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> > +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> >unsigned long address,
> >pgtbl_mod_mask *mod_mask)
> > -
> >  {
> > - if (unlikely(pgd_none(*pgd))) {
> > - if (__p4d_alloc(mm, pgd, address))
> > + if (unlikely(p4d_none(*p4d))) {
> > + if (__pud_alloc(mm, p4d, address))
> >   return NULL;
> > - *mod_mask |= PGTBL_PGD_MODIFIED;
> > + *mod_mask |= PGTBL_P4D_MODIFIED;
> >   }
> >
> > - return p4d_offset(pgd, address);
> > + return pud_offset(p4d, address);
> >  }
> >
> > -#endif /* !__ARCH_HAS_5LEVEL_HACK */
> > -
> > -static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> > +static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> >unsigned long address,
> >pgtbl_mod_mask *mod_mask)
> > +
> >  {
> > - if (unlikely(p4d_none(*p4d))) {
> > - if (__pud_alloc(mm, p4d, address))
> > + if (unlikely(pgd_none(*pgd))) {
> > + if (__p4d_alloc(mm, pgd, address))
> >   return NULL;
> > - *mod_mask |= PGTBL_P4D_MODIFIED;
> > + *mod_mask |= PGTBL_PGD_MODIFIED;
> >   }
> >
> > - return pud_offset(p4d, address);
> > + return p4d_offset(pgd, address);
> >  }
> >
> > +#endif /* !__ARCH_HAS_5LEVEL_HACK */
> > +
> >  static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned 
> > long address)
> >  {
> >   return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
> > --
> > 2.26.2
> >
>
> --
> Sincerely yours,
> Mike.


Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Segher Boessenkool
On Tue, Jun 02, 2020 at 05:13:25PM +0200, Daniel Kolesa wrote:
> well, ppc64le already cannot be run on those, as far as I know (I don't think 
> it's possible to build ppc64le userland without VSX in any configuration)

VSX is required by the ELFv2 ABI:

"""
Specifically, to use this ABI and ABI-compliant programs, OpenPOWER-
compliant processors must implement the following categories:

[...]

Vector-Scalar
"""


Segher


Re: Boot issue with the latest Git kernel

2020-06-04 Thread Christophe Leroy




Le 04/06/2020 à 17:53, Christian Zigotzky a écrit :




On 4. Jun 2020, at 16:29, Christophe Leroy  wrote:

And are you able to perform a 'git bisect' to identify the guilty commit ?

Thanks
Christophe


Hello Christophe,

Unfortunately I haven’t had time to bisect the latest Git kernel. Does it boot 
on your PowerPC machine?



Yes today's linux-next boots on my powerpc 8xx board.

Christophe


Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Rich Felker
On Thu, Jun 04, 2020 at 12:12:32PM -0500, Segher Boessenkool wrote:
> On Tue, Jun 02, 2020 at 05:13:25PM +0200, Daniel Kolesa wrote:
> > well, ppc64le already cannot be run on those, as far as I know (I
> > don't think it's possible to build ppc64le userland without VSX in
> > any configuration)
> 
> VSX is required by the ELFv2 ABI:
> 
> """
> Specifically, to use this ABI and ABI-compliant programs, OpenPOWER-
> compliant processors must implement the following categories:

This is not actually ABI but IBM policy laundered into an ABI
document, which musl does not honor.

Rich


Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Segher Boessenkool
On Tue, Jun 02, 2020 at 05:27:24PM +0200, Michal Suchánek wrote:
> Naturally on POWER the first cpu that has LE support is POWER8 so you
> can count on all other POWER8 features to be present.

This is not true.

The oldest CPU the ELFv2 ABI (and so, powerpc64le-linux) supports is
POWER8, but most 6xx/7xx CPUs had a working LE mode already.  There are
very old ABIs that support LE as well.


Segher


Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Segher Boessenkool
On Thu, Jun 04, 2020 at 01:18:44PM -0400, Rich Felker wrote:
> On Thu, Jun 04, 2020 at 12:12:32PM -0500, Segher Boessenkool wrote:
> > On Tue, Jun 02, 2020 at 05:13:25PM +0200, Daniel Kolesa wrote:
> > > well, ppc64le already cannot be run on those, as far as I know (I
> > > don't think it's possible to build ppc64le userland without VSX in
> > > any configuration)
> > 
> > VSX is required by the ELFv2 ABI:
> > 
> > """
> > Specifically, to use this ABI and ABI-compliant programs, OpenPOWER-
> > compliant processors must implement the following categories:
> 
> This is not actually ABI but IBM policy laundered into an ABI
> document, which musl does not honor.

It is the ABI.  If you think it should be different, make your own ABI,
don't pretend the existing ABI is different than what it is.  Thank you.


Segher


Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Rich Felker
On Thu, Jun 04, 2020 at 12:33:12PM -0500, Segher Boessenkool wrote:
> On Thu, Jun 04, 2020 at 01:18:44PM -0400, Rich Felker wrote:
> > On Thu, Jun 04, 2020 at 12:12:32PM -0500, Segher Boessenkool wrote:
> > > On Tue, Jun 02, 2020 at 05:13:25PM +0200, Daniel Kolesa wrote:
> > > > well, ppc64le already cannot be run on those, as far as I know (I
> > > > don't think it's possible to build ppc64le userland without VSX in
> > > > any configuration)
> > > 
> > > VSX is required by the ELFv2 ABI:
> > > 
> > > """
> > > Specifically, to use this ABI and ABI-compliant programs, OpenPOWER-
> > > compliant processors must implement the following categories:
> > 
> > This is not actually ABI but IBM policy laundered into an ABI
> > document, which musl does not honor.
> 
> It is the ABI.  If you think it should be different, make your own ABI,
> don't pretend the existing ABI is different than what it is.  Thank you.

Our ABI is as specified in the ELFv2 document, but with ld as ld64,
and minus gratuitous requirements on ISA level that are not part of
implementing linkage.

Rich


Re: [PATCH] mm: Fix pud_alloc_track()

2020-06-04 Thread Guenter Roeck
On Thu, Jun 04, 2020 at 09:44:46AM +0200, Joerg Roedel wrote:
> From: Joerg Roedel 
> 
> The pud_alloc_track() needs to do different checks based on whether
> __ARCH_HAS_5LEVEL_HACK is defined, like it already does in
> pud_alloc(). Otherwise it causes boot failures on PowerPC.
> 
> Provide the correct implementations for both possible settings of
> __ARCH_HAS_5LEVEL_HACK to fix the boot problems.
> 
> Reported-by: Abdul Haleem 
> Tested-by: Abdul Haleem 
> Tested-by: Satheesh Rajendran 
> Fixes: d8626138009b ("mm: add functions to track page directory 
> modifications")
> Signed-off-by: Joerg Roedel 

Tested-by: Guenter Roeck 

> ---
>  include/asm-generic/5level-fixup.h |  5 +
>  include/linux/mm.h | 26 +-
>  2 files changed, 18 insertions(+), 13 deletions(-)
> 
> diff --git a/include/asm-generic/5level-fixup.h 
> b/include/asm-generic/5level-fixup.h
> index 58046ddc08d0..afbab31fbd7e 100644
> --- a/include/asm-generic/5level-fixup.h
> +++ b/include/asm-generic/5level-fixup.h
> @@ -17,6 +17,11 @@
>   ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
>   NULL : pud_offset(p4d, address))
>  
> +#define pud_alloc_track(mm, p4d, address, mask)  
> \
> + ((unlikely(pgd_none(*(p4d))) && 
> \
> +   (__pud_alloc(mm, p4d, address) || 
> ({*(mask)|=PGTBL_P4D_MODIFIED;0;})))?   \
> +   NULL : pud_offset(p4d, address))
> +
>  #define p4d_alloc(mm, pgd, address)  (pgd)
>  #define p4d_alloc_track(mm, pgd, address, mask)  (pgd)
>  #define p4d_offset(pgd, start)   (pgd)
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 66e0977f970a..ad3b31c5bcc3 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
> p4d_t *p4d,
>   NULL : pud_offset(p4d, address);
>  }
>  
> -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
>unsigned long address,
>pgtbl_mod_mask *mod_mask)
> -
>  {
> - if (unlikely(pgd_none(*pgd))) {
> - if (__p4d_alloc(mm, pgd, address))
> + if (unlikely(p4d_none(*p4d))) {
> + if (__pud_alloc(mm, p4d, address))
>   return NULL;
> - *mod_mask |= PGTBL_PGD_MODIFIED;
> + *mod_mask |= PGTBL_P4D_MODIFIED;
>   }
>  
> - return p4d_offset(pgd, address);
> + return pud_offset(p4d, address);
>  }
>  
> -#endif /* !__ARCH_HAS_5LEVEL_HACK */
> -
> -static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> +static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
>unsigned long address,
>pgtbl_mod_mask *mod_mask)
> +
>  {
> - if (unlikely(p4d_none(*p4d))) {
> - if (__pud_alloc(mm, p4d, address))
> + if (unlikely(pgd_none(*pgd))) {
> + if (__p4d_alloc(mm, pgd, address))
>   return NULL;
> - *mod_mask |= PGTBL_P4D_MODIFIED;
> + *mod_mask |= PGTBL_PGD_MODIFIED;
>   }
>  
> - return pud_offset(p4d, address);
> + return p4d_offset(pgd, address);
>  }
>  
> +#endif /* !__ARCH_HAS_5LEVEL_HACK */
> +
>  static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned 
> long address)
>  {
>   return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
> -- 
> 2.26.2
> 


Re: [RESEND PATCH v9 4/5] ndctl/papr_scm, uapi: Add support for PAPR nvdimm specific methods

2020-06-04 Thread Vaibhav Jain


Ira Weiny  writes:

> On Wed, Jun 03, 2020 at 11:41:42PM +0530, Vaibhav Jain wrote:
>> Hi Ira,
>> 
>> Thanks for reviewing this patch. My responses below:
>> 
>> Ira Weiny  writes:
>> 
>
> ...
>
>> >> + *
>> >> + * Payload Version:
>> >> + *
>> >> + * A 'payload_version' field is present in PDSM header that indicates a 
>> >> specific
>> >> + * version of the structure present in PDSM Payload for a given PDSM 
>> >> command.
>> >> + * This provides backward compatibility in case the PDSM Payload 
>> >> structure
>> >> + * evolves and different structures are supported by 'papr_scm' and 
>> >> 'libndctl'.
>> >> + *
>> >> + * When sending a PDSM Payload to 'papr_scm', 'libndctl' should send the 
>> >> version
>> >> + * of the payload struct it supports via 'payload_version' field. The 
>> >> 'papr_scm'
>> >> + * module when servicing the PDSM envelope checks the 'payload_version' 
>> >> and then
>> >> + * uses 'payload struct version' == MIN('payload_version field',
>> >> + * 'max payload-struct-version supported by papr_scm') to service the 
>> >> PDSM.
>> >> + * After servicing the PDSM, 'papr_scm' put the negotiated version of 
>> >> payload
>> >> + * struct in returned 'payload_version' field.
>> >> + *
>> >> + * Libndctl on receiving the envelope back from papr_scm again checks the
>> >> + * 'payload_version' field and based on it use the appropriate version 
>> >> dsm
>> >> + * struct to parse the results.
>> >> + *
>> >> + * Backward Compatibility:
>> >> + *
>> >> + * Above scheme of exchanging different versioned PDSM struct between 
>> >> libndctl
>> >> + * and papr_scm should provide backward compatibility until following two
>> >> + * assumptions/conditions when defining new PDSM structs hold:
>> >> + *
>> >> + * Let T(X) = { set of attributes in PDSM struct 'T' versioned X }
>> >> + *
>> >> + * 1. T(X) is a proper subset of T(Y) if Y > X.
>> >> + *i.e Each new version of PDSM struct should retain existing struct
>> >> + *attributes from previous version
>> >> + *
>> >> + * 2. If an entity (libndctl or papr_scm) supports a PDSM struct T(X) 
>> >> then
>> >> + *it should also support T(1), T(2)...T(X - 1).
>> >> + *i.e When adding support for new version of a PDSM struct, libndctl
>> >> + *and papr_scm should retain support of the existing PDSM struct
>> >> + *version they support.
>> >
>> > Please see this thread for an example why versions are a bad idea in UAPIs:
>> >
>> > https://lkml.org/lkml/2020/3/26/213
>> >
>> 
>> > While the use of version is different in that thread the fundamental 
>> > issues are
>> > the same.  You end up with some weird matrix of supported features and
>> > structure definitions.  For example, you are opening up the possibility of
>> > changing structures with a different version for no good reason.
>> 
>> Not really sure I understand the statement correctly "you are opening up
>> the possibility of changing structures with a different version for no
>> good reason."
>
[..]
> What I mean is:
>
> struct v1 {
>   u32 x;
>   u32 y;
> };
>
> struct v2 {
>   u32 y;
>   u32 x;
> };
>
> x and y are the same data but you have now redefined the order of the struct.
> You don't need that flexibility/complexity.
>
> Generally I think you are defining:
>
> struct v1 {
>   u32 x;
>   u32 y;
> };
>
> struct v2 {
>   u32 x;
>   u32 y;
>   u32 z;
>   u32 a;
> };
>
> Which becomes 2 structures...  There is no need.
>
> The easiest thing to do is:
>
> struct user_data {
>   u32 x;
>   u32 y;
> };
>
> And later you modify user_data to:
>
> struct user_data {
>   u32 x;
>   u32 y;
>   u32 z;
>   u32 a;
> };
>
> libndctl always passes sizeof(struct user_data) to the call. [Do ensure
> structures are 64bit aligned for this to work.]
>
> The kernel sees the size and returns the amount of data up to that size.
>
> Therefore, older kernels automatically fill in x and y,  newer kernels fill in
> z/a if the buffer was big enough.  libndctl only uses the fields it knows 
> about.
>
> It is _much_ easier this way.  Almost nothing needs to get changed as versions
> roll forward.  The only big issue is if libndctl _needs_ z then it has to 
> check
> if z is returned.
>
> In that case add a cap_mask with bit fields which the kernel can fill in for
> which fields are valid.
>
> struct user_data {
>   u64 cap_mask;  /* where bits define extra future capabilities */
>   u32 x;
>   u32 y;
> };
>
> IFF you need to add data within fields which are reserved you can use
> capability flags to indicate which fields are requested and which are returned
> by the kernel.
>
> But I _think_ for what you want libndctl must survive if z/a are not available
> right?  So just adding to the structure should be fine.
Agreed. But as I mentioned in my response to Dan's review comments [1], we
will be removing the version field altogether and instead will introduce
new psdm requests bound to new struct definitions in conj

Re: [RESEND PATCH v9 5/5] powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH

2020-06-04 Thread Vaibhav Jain
Hi Ira,

Thanks again for looking into patch. My responses below:

Ira Weiny  writes:

> On Thu, Jun 04, 2020 at 12:34:04AM +0530, Vaibhav Jain wrote:
>> Hi Ira,
>> 
>> Thanks for reviewing this patch. My responses below:
>> 
>> Ira Weiny  writes:
>> 
>> > On Tue, Jun 02, 2020 at 03:44:38PM +0530, Vaibhav Jain wrote:
>> >> This patch implements support for PDSM request 'PAPR_PDSM_HEALTH'
>> >> that returns a newly introduced 'struct nd_papr_pdsm_health' instance
>> >> containing dimm health information back to user space in response to
>> >> ND_CMD_CALL. This functionality is implemented in newly introduced
>> >> papr_pdsm_health() that queries the nvdimm health information and
>> >> then copies this information to the package payload whose layout is
>> >> defined by 'struct nd_papr_pdsm_health'.
>> >> 
>> >> The patch also introduces a new member 'struct papr_scm_priv.health'
>> >> thats an instance of 'struct nd_papr_pdsm_health' to cache the health
>> >> information of a nvdimm. As a result functions drc_pmem_query_health()
>> >> and flags_show() are updated to populate and use this new struct
>> >> instead of a u64 integer that was earlier used.
>> >> 
>> >> Cc: "Aneesh Kumar K . V" 
>> >> Cc: Dan Williams 
>> >> Cc: Michael Ellerman 
>> >> Cc: Ira Weiny 
>> >> Reviewed-by: Aneesh Kumar K.V 
>> >> Signed-off-by: Vaibhav Jain 
>> >> ---
>> >> Changelog:
>> >> 
>> >> Resend:
>> >> * Added ack from Aneesh.
>> >> 
>> >> v8..v9:
>> >> * s/PAPR_SCM_PDSM_HEALTH/PAPR_PDSM_HEALTH/g  [ Dan , Aneesh ]
>> >> * s/PAPR_SCM_PSDM_DIMM_*/PAPR_PDSM_DIMM_*/g
>> >> * Renamed papr_scm_get_health() to papr_psdm_health()
>> >> * Updated patch description to replace papr-scm dimm with nvdimm.
>> >> 
>> >> v7..v8:
>> >> * None
>> >> 
>> >> Resend:
>> >> * None
>> >> 
>> >> v6..v7:
>> >> * Updated flags_show() to use seq_buf_printf(). [Mpe]
>> >> * Updated papr_scm_get_health() to use newly introduced
>> >>   __drc_pmem_query_health() bypassing the cache [Mpe].
>> >> 
>> >> v5..v6:
>> >> * Added attribute '__packed' to 'struct nd_papr_pdsm_health_v1' to
>> >>   gaurd against possibility of different compilers adding different
>> >>   paddings to the struct [ Dan Williams ]
>> >> 
>> >> * Updated 'struct nd_papr_pdsm_health_v1' to use __u8 instead of
>> >>   'bool' and also updated drc_pmem_query_health() to take this into
>> >>   account. [ Dan Williams ]
>> >> 
>> >> v4..v5:
>> >> * None
>> >> 
>> >> v3..v4:
>> >> * Call the DSM_PAPR_SCM_HEALTH service function from
>> >>   papr_scm_service_dsm() instead of papr_scm_ndctl(). [Aneesh]
>> >> 
>> >> v2..v3:
>> >> * Updated struct nd_papr_scm_dimm_health_stat_v1 to use '__xx' types
>> >>   as its exported to the userspace [Aneesh]
>> >> * Changed the constants DSM_PAPR_SCM_DIMM_XX indicating dimm health
>> >>   from enum to #defines [Aneesh]
>> >> 
>> >> v1..v2:
>> >> * New patch in the series
>> >> ---
>> >>  arch/powerpc/include/uapi/asm/papr_pdsm.h |  39 +++
>> >>  arch/powerpc/platforms/pseries/papr_scm.c | 125 +++---
>> >>  2 files changed, 147 insertions(+), 17 deletions(-)
>> >> 
>> >> diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h 
>> >> b/arch/powerpc/include/uapi/asm/papr_pdsm.h
>> >> index 6407fefcc007..411725a91591 100644
>> >> --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h
>> >> +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
>> >> @@ -115,6 +115,7 @@ struct nd_pdsm_cmd_pkg {
>> >>   */
>> >>  enum papr_pdsm {
>> >>   PAPR_PDSM_MIN = 0x0,
>> >> + PAPR_PDSM_HEALTH,
>> >>   PAPR_PDSM_MAX,
>> >>  };
>> >>  
>> >> @@ -133,4 +134,42 @@ static inline void *pdsm_cmd_to_payload(struct 
>> >> nd_pdsm_cmd_pkg *pcmd)
>> >>   return (void *)(pcmd->payload);
>> >>  }
>> >>  
>> >> +/* Various nvdimm health indicators */
>> >> +#define PAPR_PDSM_DIMM_HEALTHY   0
>> >> +#define PAPR_PDSM_DIMM_UNHEALTHY 1
>> >> +#define PAPR_PDSM_DIMM_CRITICAL  2
>> >> +#define PAPR_PDSM_DIMM_FATAL 3
>> >> +
>> >> +/*
>> >> + * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
>> >> + * Various flags indicate the health status of the dimm.
>> >> + *
>> >> + * dimm_unarmed  : Dimm not armed. So contents wont persist.
>> >> + * dimm_bad_shutdown : Previous shutdown did not persist contents.
>> >> + * dimm_bad_restore  : Contents from previous shutdown werent 
>> >> restored.
>> >> + * dimm_scrubbed : Contents of the dimm have been scrubbed.
>> >> + * dimm_locked   : Contents of the dimm cant be modified until 
>> >> CEC reboot
>> >> + * dimm_encrypted: Contents of dimm are encrypted.
>> >> + * dimm_health   : Dimm health indicator. One of 
>> >> PAPR_PDSM_DIMM_
>> >> + */
>> >> +struct nd_papr_pdsm_health_v1 {
>> >> + __u8 dimm_unarmed;
>> >> + __u8 dimm_bad_shutdown;
>> >> + __u8 dimm_bad_restore;
>> >> + __u8 dimm_scrubbed;
>> >> + __u8 dimm_locked;
>> >> + __u8 dimm_encrypted;
>> >> + __u16 dimm_health;
>> >> +} __packed;
>> >> +
>> >> +/*
>> >> + * Typedef the current struct for dimm_h

[PATCH v4] powerpc/fadump: fix race between pstore write and fadump crash trigger

2020-06-04 Thread Sourabh Jain
When we enter into fadump crash path via system reset we fail to update
the pstore.

On the system reset path we first update the pstore then we go for fadump
crash. But the problem here is when all the CPUs try to get the pstore
lock to initiate the pstore write, only one CPUs will acquire the lock
and proceed with the pstore write. Since it in NMI context CPUs that fail
to get lock do not wait for their turn to write to the pstore and simply
proceed with the next operation which is fadump crash. One of the CPU who
proceeded with fadump crash path triggers the crash and does not wait for
the CPU who gets the pstore lock to complete the pstore update.

Timeline diagram to depicts the sequence of events that leads to an
unsuccessful pstore update when we hit fadump crash path via system reset.

 12 3...  n   CPU Threads
 || | |
 || | |
 Reached to   -->|--->|>| --->|
 system reset|| | |
 path|| | |
 || | |
 Try to   -->|--->|>|>|
 acquire the || | |
 pstore lock || | |
 || | |
 || | |
 Got the  -->| +->| | |<-+
 pstore lock | |  | | |  |-->  Didn't get the
 | --+ lock and moving
 || | |ahead on fadump
 || | |crash path
 || | |
  Begins the  -->|| | |
  process to || | |<-- Got the chance to
  update the || | |trigger the crash
  pstore | -> | |... <-   |
 | |  | | |   |
 | |  | | |   |<-- Triggers the
 | |  | | |   |crash
 | |  | | |   |  ^
 | |  | | |   |  |
  Writing to  -->| |  | | |   |  |
  pstore | |  | | |   |  |
   |  |  |
   ^   |__|  |
   |   CPU Relax |
   | |
   +-+
  |
  v
Race: crash triggered before pstore
  update completes

To avoid this race condition a barrier is added on crash_fadump path, it
prevents the CPU to trigger the crash until all the online CPUs completes
their task.

A barrier is added to make sure all the secondary CPUs hit the
crash_fadump function before we initiates the crash. A timeout is kept to
ensure the primary CPU (one who initiates the crash) do not wait for
secondary CPUs indefinitely.

Signed-off-by: Sourabh Jain 
---
 arch/powerpc/kernel/fadump.c | 24 
 1 file changed, 24 insertions(+)

 ---
Chanagelog:

v1 -> v3:
   - https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/208267.html

v3 -> v4:

   - Now the primary CPU (one who triggers dump) waits for all secondary
 CPUs to enter and then initiates the crash.

 ---

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 59e60a9a9f5c..4953f3246220 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -32,6 +32,14 @@
 #include 
 #include 
 
+/*
+ * The CPU who acquired the lock to trigger the fadump crash should
+ * wait for other CPUs to enter.
+ *
+ * The timeout is in milliseconds.
+ */
+#define CRASH_TIMEOUT  500
+
 static struct fw_dump fw_dump;
 
 static void __init fadump_reserve_crash_area(u64 base);
@@ -46,6 +54,8 @@ struct fadump_mrange_info reserved_mrange_info = { 
"reserved", NULL, 0, 0, 0 };
 #ifdef CONFIG_CMA
 static struct cma *fadump_cma;
 
+static atomic_t cpus_in_crash;
+
 /*
  * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
  *
@@ -596,8 +606,10 @@ early_param("fadump_reserve_mem", 
early_fadump_reserve_mem);
 
 void crash_fadump(struct pt_regs *regs, const char *str)
 {
+   unsigned int msecs;
struct fadump_crash_info_header *fdh = NULL;
int old_cpu, this_cpu;
+   unsigned int ncpus = num_online_cpus() - 1; /* Do not include first CPU 
*/
 
if (!should_fadump_crash())
return;
@@ -613,6 +625,8 @@ void crash_fadump(struct pt_regs *regs, const char *str)
old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
 
if (old_cpu != -1) {
+   atomic_inc(&cpus_in_crash);
+
/*
 * We can't loop here indefinitely. Wait as long as fadump
 * is in force. If we race with fadump un-registration th

Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Rich Felker
On Thu, Jun 04, 2020 at 03:00:51PM -0400, David Edelsohn wrote:
> On Thu, Jun 4, 2020 at 1:46 PM Rich Felker  wrote:
> >
> > On Thu, Jun 04, 2020 at 12:33:12PM -0500, Segher Boessenkool wrote:
> > > On Thu, Jun 04, 2020 at 01:18:44PM -0400, Rich Felker wrote:
> > > > On Thu, Jun 04, 2020 at 12:12:32PM -0500, Segher Boessenkool wrote:
> > > > > On Tue, Jun 02, 2020 at 05:13:25PM +0200, Daniel Kolesa wrote:
> > > > > > well, ppc64le already cannot be run on those, as far as I know (I
> > > > > > don't think it's possible to build ppc64le userland without VSX in
> > > > > > any configuration)
> > > > >
> > > > > VSX is required by the ELFv2 ABI:
> > > > >
> > > > > """
> > > > > Specifically, to use this ABI and ABI-compliant programs, OpenPOWER-
> > > > > compliant processors must implement the following categories:
> > > >
> > > > This is not actually ABI but IBM policy laundered into an ABI
> > > > document, which musl does not honor.
> > >
> > > It is the ABI.  If you think it should be different, make your own ABI,
> > > don't pretend the existing ABI is different than what it is.  Thank you.
> >
> > Our ABI is as specified in the ELFv2 document, but with ld as ld64,
> > and minus gratuitous requirements on ISA level that are not part of
> > implementing linkage.
> 
> Rich,
> 
> If you are changing the Power ELFv2 ABI then it is not the Power ELFv2
> ABI.  You can't cherry-pick what you like and claim that it is
> compatible.  You are not conforming to the ABI.

You are aware of this and you are aware that I don't care about your
opinion on the matter, and that I don't appreciate your ongoing
harassment of users of musl who run on pre-POWER8 hardware that IBM
does not approve them using.

Rich


Re: [PATCH] mm: Fix pud_alloc_track()

2020-06-04 Thread Andrew Morton
On Thu, 4 Jun 2020 19:48:14 +0300 Mike Rapoport  wrote:

> On Thu, Jun 04, 2020 at 09:44:46AM +0200, Joerg Roedel wrote:
> > From: Joerg Roedel 
> > 
> > The pud_alloc_track() needs to do different checks based on whether
> > __ARCH_HAS_5LEVEL_HACK is defined, like it already does in
> > pud_alloc(). Otherwise it causes boot failures on PowerPC.
> > 
> > Provide the correct implementations for both possible settings of
> > __ARCH_HAS_5LEVEL_HACK to fix the boot problems.
> 
> There is a patch in mmotm [1] that completely removes
> __ARCH_HAS_5LEVEL_HACK which is a part of the series [2] that updates
> p4d folding accross architectures. This should fix boot on PowerPC and
> the addition of pXd_alloc_track() for __ARCH_HAS_5LEVEL_HACK wouldn't be
> necessary.
> 
> 
> [1] 
> https://github.com/hnaz/linux-mm/commit/cfae68792af3731ac902ea6ba5ed8df5a0f6bd2f
> [2] https://lore.kernel.org/kvmarm/20200414153455.21744-1-r...@kernel.org/

That patchset is stacked up behind many other patches, including all
the powerpc stuff in linux-next :(

As it's a big bug fix, I'll pull those patches forward, hopefully send it
all Linuswards later today...


Re: [RFC][PATCH v3 1/5] sparc64: Fix asm/percpu.h build error

2020-06-04 Thread Peter Zijlstra
On Thu, Jun 04, 2020 at 06:57:03PM +0200, Peter Zijlstra wrote:

> I think I see, what happens is that these headers end up in the VDSO
> build, and that doesn't have these CFLAGS, because userspace.
> 
> Let me see what to do about that.

I feel like the below is cheating, but it's the best I could find :/
VDSO including kernel headers and the utter maze that our kernel headers
are makes it really hard to untangle :/

This builds sparc64-defconfig and sparc64-all{no,mod}config.

Dave, does this work for you, or should I try hardder?

---
 arch/sparc/include/asm/percpu_64.h  | 2 ++
 arch/sparc/include/asm/trap_block.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/sparc/include/asm/percpu_64.h 
b/arch/sparc/include/asm/percpu_64.h
index 32ef6f05cc565..a8786a4b90b6b 100644
--- a/arch/sparc/include/asm/percpu_64.h
+++ b/arch/sparc/include/asm/percpu_64.h
@@ -4,7 +4,9 @@

 #include 

+#ifndef BUILD_VDSO
 register unsigned long __local_per_cpu_offset asm("g5");
+#endif

 #ifdef CONFIG_SMP

diff --git a/arch/sparc/include/asm/trap_block.h 
b/arch/sparc/include/asm/trap_block.h
index 0f6d0c4f66838..ace0d48e837e5 100644
--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@@ -2,6 +2,8 @@
 #ifndef _SPARC_TRAP_BLOCK_H
 #define _SPARC_TRAP_BLOCK_H

+#include 
+
 #include 
 #include 




Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Daniel Kolesa
On Thu, Jun 4, 2020, at 19:33, Segher Boessenkool wrote:
> On Thu, Jun 04, 2020 at 01:18:44PM -0400, Rich Felker wrote:
> > On Thu, Jun 04, 2020 at 12:12:32PM -0500, Segher Boessenkool wrote:
> > > On Tue, Jun 02, 2020 at 05:13:25PM +0200, Daniel Kolesa wrote:
> > > > well, ppc64le already cannot be run on those, as far as I know (I
> > > > don't think it's possible to build ppc64le userland without VSX in
> > > > any configuration)
> > > 
> > > VSX is required by the ELFv2 ABI:
> > > 
> > > """
> > > Specifically, to use this ABI and ABI-compliant programs, OpenPOWER-
> > > compliant processors must implement the following categories:
> > 
> > This is not actually ABI but IBM policy laundered into an ABI
> > document, which musl does not honor.
> 
> It is the ABI.  If you think it should be different, make your own ABI,
> don't pretend the existing ABI is different than what it is.  Thank you.
> 

Well then - in that case, what do you suggest that I do?

Void currently ships an ELFv2 (or apparently not, I guess) 64-bit big endian 
port that works on 970/G5 up. It is important to me that it stays that way (a 
large amount of users are running 970s, so introducing a VSX dependency means I 
might as well abandon the port entirely).

It currently works out of box - there are no changes required in glibc, and 
nearly the entire userland builds and works (about ~11500 out of ~12000 
software packages, those that don't work either don't work on ppc64le either, 
or have issues related to endianness, or some other unrelated reason).

I'd like to eventually get this into a state where I don't have to worry about 
glibc arbitrarily breaking it - which means it would be necessary to stabilize 
it upstream. While I can probably maintain a downstream patchset when it comes 
to it, I'd much prefer if I didn't have to - but this sounds like an official 
ELFv2 glibc BE port would be impossible unless the VSX requirement (and thus 
IEEE 128-bit long double and so on) was in place, which would defeat the point 
of the port.

Is there *any* way I can take that would make upstreams of all parts of the 
toolchain happy? I explicitly don't want to go back to ELFv1. While at it, I'd 
like to transition to ld64 long double format, to match musl and improve 
software compatibility, which I feel will raise more objections from IBM side.

> 
> Segher
>

Daniel


Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice

2020-06-04 Thread Ira Weiny
On Thu, Jun 04, 2020 at 12:41:33PM +0300, Mike Rapoport wrote:
> On Wed, Jun 03, 2020 at 04:44:17PM -0700, Guenter Roeck wrote:
> > 
> > sparc32 smp images in next-20200603 still crash for me with a spinlock
> > recursion. s390 images hang early in boot. Several others (alpha, arm64,
> > various ppc) don't even compile. I can run some more bisects over time,
> > but this is becoming a full-time job :-(.
> 
> I've been able to bisect s390 hang to commit b614345f52bc ("x86/entry:
> Clarify irq_{enter,exit}_rcu()").
> 
> After this commit, lockdep_hardirq_exit() is called twice on s390 (and
> others) - one time in irq_exit_rcu() and another one in irq_exit():
> 
> /**
>  * irq_exit_rcu() - Exit an interrupt context without updating RCU
>  *
>  * Also processes softirqs if needed and possible.
>  */
> void irq_exit_rcu(void)
> {
>   __irq_exit_rcu();
>/* must be last! */
>   lockdep_hardirq_exit();
> }
> 
> /**
>  * irq_exit - Exit an interrupt context, update RCU and lockdep
>  *
>  * Also processes softirqs if needed and possible.
>  */
> void irq_exit(void)
> {
>   irq_exit_rcu();
>   rcu_irq_exit();
>/* must be last! */
>   lockdep_hardirq_exit();
> }
> 
> Removing the call in irq_exit() make s390 boot again, and judgung by the
> x86 entry code, the comment /* must be last! */ is stale...

FWIW I got s390 to compile and this patch fixes s390 booting for me as well.

13:05:25 > /home/iweiny/dev/linux-build-test/rootfs/s390/run-qemu-s390.sh 
Build reference: next-20200603-4-g840714292d8c

Building s390:defconfig:initrd ... running ... passed
Building s390:defconfig:virtio-blk-ccw:rootfs ... running ... passed
Building s390:defconfig:scsi[virtio-ccw]:rootfs ... running ..  
passed
Building s390:defconfig:virtio-pci:rootfs ... running ... passed
Building s390:defconfig:scsi[virtio-pci]:rootfs ... running ... passed

Ira

> 
> @Peter, @Thomas, can you comment please?
> 
> From e51d50ee6f4d1f446decf91c2c67230da14ff82c Mon Sep 17 00:00:00 2001
> From: Mike Rapoport 
> Date: Thu, 4 Jun 2020 12:37:03 +0300
> Subject: [PATCH] softirq: don't call lockdep_hardirq_exit() twice
> 
> After commit b614345f52bc ("x86/entry: Clarify irq_{enter,exit}_rcu()")
> lockdep_hardirq_exit() is called twice on every architecture that uses
> irq_exit(): one time in irq_exit_rcu() and another one in irq_exit().
> 
> Remove the extra call in irq_exit().
> 
> Signed-off-by: Mike Rapoport 
> ---
>  kernel/softirq.c | 3 ---
>  1 file changed, 3 deletions(-)
> 
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index a3eb6eba8c41..7523f4ce4c1d 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -427,7 +427,6 @@ static inline void __irq_exit_rcu(void)
>  void irq_exit_rcu(void)
>  {
>   __irq_exit_rcu();
> -  /* must be last! */
>   lockdep_hardirq_exit();
>  }
>  
> @@ -440,8 +439,6 @@ void irq_exit(void)
>  {
>   irq_exit_rcu();
>   rcu_irq_exit();
> -  /* must be last! */
> - lockdep_hardirq_exit();
>  }
>  
>  /*
> -- 
> 2.26.2
> 
> 
> 
> > Guenter
> 
> -- 
> Sincerely yours,
> Mike.


Re: [PATCH v3 2/7] documentation for stats_fs

2020-06-04 Thread Emanuele Giuseppe Esposito

Hi,


+
+The STATS_FS_HIDDEN attribute won't affect the aggregation, it will only
+block the creation of the files.


Why does HIDDEN block the creation of files?  instead of their visibility?


The file itself is used to allow the user to view the content of a 
value. In order to make it hidden, the framework just doesn't create the 
file.

The structure is still present and considered in statsfs, however.

Hidden in this case means not visible at all thus not created, not the 
hidden file concept of dotted files (".filename")





+
+Add values to parent and child (also here order doesn't matter)::
+
+struct kvm *base_ptr = kmalloc(..., sizeof(struct kvm));
+...
+stats_fs_source_add_values(child_source, kvm_stats, base_ptr, 0);
+stats_fs_source_add_values(parent_source, kvm_stats, NULL, 
STATS_FS_HIDDEN);
+
+``child_source`` will be a simple value, since it has a non-NULL base
+pointer, while ``parent_source`` will be an aggregate. During the adding
+phase, also values can optionally be marked as hidden, so that the folder
+and other values can be still shown.
+
+Of course the same ``struct stats_fs_value`` array can be also passed with a
+different base pointer, to represent the same value but in another instance
+of the kvm struct.
+
+Search:
+
+Fetch a value from the child source, returning the value
+pointed by ``(uint64_t *) base_ptr + kvm_stats[0].offset``::
+
+uint64_t ret_child, ret_parent;
+
+stats_fs_source_get_value(child_source, &kvm_stats[0], &ret_child);
+
+Fetch an aggregate value, searching all subsources of ``parent_source`` for
+the specified ``struct stats_fs_value``::
+
+stats_fs_source_get_value(parent_source, &kvm_stats[0], &ret_parent);
+
+assert(ret_child == ret_parent); // check expected result
+
+To make it more interesting, add another child::
+
+struct stats_fs_source child_source2 = stats_fs_source_create(0, 
"child2");
+
+stats_fs_source_add_subordinate(parent_source, child_source2);
+// now  the structure is parent -> child1
+//  -> child2


Is that the same as parent -> child1 -> child2
?  It could almost be read as
 parent -> child1
 parent -> child2


No the example in the documentation shows the relationship
parent -> child1 and
parent -> child2.
It's not the same as
parent -> child1 -> child2.
In order to do the latter, one would need to do:

stats_fs_source_add_subordinate(parent_source, child_source1);
stats_fs_source_add_subordinate(child_source1, child_source2);

Hope that this clarifies it.



Whichever it is, can you make it more explicit, please?



+
+struct kvm *other_base_ptr = kmalloc(..., sizeof(struct kvm));
+...
+stats_fs_source_add_values(child_source2, kvm_stats, other_base_ptr, 
0);
+
+Note that other_base_ptr points to another instance of kvm, so the struct
+stats_fs_value is the same but the address at which they point is not.
+
+Now get the aggregate value::
+
+uint64_t ret_child, ret_child2, ret_parent;
+
+stats_fs_source_get_value(child_source, &kvm_stats[0], &ret_child);
+stats_fs_source_get_value(parent_source, &kvm_stats[0], &ret_parent);
+stats_fs_source_get_value(child_source2, &kvm_stats[0], &ret_child2);
+
+assert((ret_child + ret_child2) == ret_parent);
+
+Cleanup::
+
+stats_fs_source_remove_subordinate(parent_source, child_source);
+stats_fs_source_revoke(child_source);
+stats_fs_source_put(child_source);
+
+stats_fs_source_remove_subordinate(parent_source, child_source2);
+stats_fs_source_revoke(child_source2);
+stats_fs_source_put(child_source2);
+
+stats_fs_source_put(parent_source);
+kfree(other_base_ptr);
+kfree(base_ptr);
+
+Calling stats_fs_source_revoke is very important, because it will ensure


stats_fs_source_revoke()


+that stats_fs will not access the data that were passed to
+stats_fs_source_add_value for this source.
+
+Because open files increase the reference count for a stats_fs_source, the
+source can end up living longer than the data that provides the values for
+the source.  Calling stats_fs_source_revoke just before the backing data


 stats_fs_source_revoke()


+is freed avoids accesses to freed data structures. The sources will return
+0.
+
+This is not needed for the parent_source, since it just contains
+aggregates that would be 0 anyways if no matching child value exist.
+
+API Documentation
+=
+
+.. kernel-doc:: include/linux/stats_fs.h
+   :export: fs/stats_fs/*.c
\ No newline at end of file


Please fix that. ^


Thanks for the documentation.



Thank you for the feedback,
Emanuele


Re: Boot issue with the latest Git kernel

2020-06-04 Thread Christian Zigotzky
Hello Christophe,

I tested it on my Nemo board with a P.A. Semi PA6T CPU [1], on my Cyrus+ board 
with a FSL P5040 CPU [2] and in a virtual e5500 QEMU machine. You can find the 
kernel configs in the following package.

Link: http://www.xenosoft.de/linux-image-5.8-alpha1-X1000_X5000.tar.gz

Cheers,
Christian

[1] https://en.m.wikipedia.org/wiki/AmigaOne_X1000
[2] https://www.amigaos.net/hardware/133/amigaone-x5000


> On 4. Jun 2020, at 16:29, Christophe Leroy  
> wrote:
> 
> 
> 
>> Le 04/06/2020 à 16:26, Christophe Leroy a écrit :
>> Hi,
>>> Le 04/06/2020 à 16:16, Christian Zigotzky a écrit :
>>> Hi All,
>>> 
>>> I tested the latest Git kernel today. [1]. Unfortunately it doesn't boot on 
>>> my PowerPC machines.
>>> 
>>> Could you please test the latest Git kernel with your PowerPC machine?
>>> 
>>> BTW, it doesn't boot in a virtual QEMU PowerPC machine either.
>>> 
>> Which machine/platform ? Which defconfig are you using ?
> 
> 
> And are you able to perform a 'git bisect' to identify the guilty commit ?
> 
> Thanks
> Christophe


Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread David Edelsohn
On Thu, Jun 4, 2020 at 1:46 PM Rich Felker  wrote:
>
> On Thu, Jun 04, 2020 at 12:33:12PM -0500, Segher Boessenkool wrote:
> > On Thu, Jun 04, 2020 at 01:18:44PM -0400, Rich Felker wrote:
> > > On Thu, Jun 04, 2020 at 12:12:32PM -0500, Segher Boessenkool wrote:
> > > > On Tue, Jun 02, 2020 at 05:13:25PM +0200, Daniel Kolesa wrote:
> > > > > well, ppc64le already cannot be run on those, as far as I know (I
> > > > > don't think it's possible to build ppc64le userland without VSX in
> > > > > any configuration)
> > > >
> > > > VSX is required by the ELFv2 ABI:
> > > >
> > > > """
> > > > Specifically, to use this ABI and ABI-compliant programs, OpenPOWER-
> > > > compliant processors must implement the following categories:
> > >
> > > This is not actually ABI but IBM policy laundered into an ABI
> > > document, which musl does not honor.
> >
> > It is the ABI.  If you think it should be different, make your own ABI,
> > don't pretend the existing ABI is different than what it is.  Thank you.
>
> Our ABI is as specified in the ELFv2 document, but with ld as ld64,
> and minus gratuitous requirements on ISA level that are not part of
> implementing linkage.

Rich,

If you are changing the Power ELFv2 ABI then it is not the Power ELFv2
ABI.  You can't cherry-pick what you like and claim that it is
compatible.  You are not conforming to the ABI.

Thanks, David


Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Segher Boessenkool
Hi!

On Thu, Jun 04, 2020 at 10:39:30PM +0200, Daniel Kolesa wrote:
> On Thu, Jun 4, 2020, at 19:33, Segher Boessenkool wrote:
> > It is the ABI.  If you think it should be different, make your own ABI,
> > don't pretend the existing ABI is different than what it is.  Thank you.
> 
> Well then - in that case, what do you suggest that I do?
> 
> Void currently ships an ELFv2 (or apparently not, I guess) 64-bit big endian 
> port that works on 970/G5 up. It is important to me that it stays that way (a 
> large amount of users are running 970s, so introducing a VSX dependency means 
> I might as well abandon the port entirely).

You can just clearly document what ABI changes you use, and try to make
sure that everyone who uses your distro / your tools / your ABI variant
knows about it.  Telling your users that it is ELFv2, without telling
them it is not compliant, namely X Y Z are different, is a bit of a
disservice to your users, and worse to everyone else involved.

If you always use -mcpu=970 (or similar), then not very much is
different for you most likely -- except of course there is no promise
to the user that they can use VSX and all instructions in ISA 2.07,
which is a very useful promise to have normally.

> It currently works out of box - there are no changes required in glibc, and 
> nearly the entire userland builds and works (about ~11500 out of ~12000 
> software packages, those that don't work either don't work on ppc64le either, 
> or have issues related to endianness, or some other unrelated reason).

Very nice!

> I'd like to eventually get this into a state where I don't have to worry 
> about glibc arbitrarily breaking it - which means it would be necessary to 
> stabilize it upstream. While I can probably maintain a downstream patchset 
> when it comes to it, I'd much prefer if I didn't have to - but this sounds 
> like an official ELFv2 glibc BE port would be impossible unless the VSX 
> requirement (and thus IEEE 128-bit long double and so on) was in place, which 
> would defeat the point of the port.
> 
> Is there *any* way I can take that would make upstreams of all parts of the 
> toolchain happy? I explicitly don't want to go back to ELFv1.

Oh absolutely, it sounds like things are in quite good shape already!
It will safe a lot of grief on all sides if you make clear this is not
"plain" ELFv2, and in what ways it differs.

Btw, if you use GCC, *please* send in testresults?  :-)

> While at it, I'd like to transition to ld64 long double format, to match musl 
> and improve software compatibility, which I feel will raise more objections 
> from IBM side.

I have no idea what "ld64 long double" is?  Is that just IEEE DP float?
Aka "long double is the same as double".  That is likely easier for new
ports than "double-double", yes, even if the eventual goal should be
IEEE QP float -- a much smoother transition.

Same goes here: document it!  If your users know that the ELFv2 variant
you give them is not *the* ELFv2, but it differs in some clear ways,
everyone will be happier :-)


Segher


Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Daniel Kolesa
On Thu, Jun 4, 2020, at 23:10, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Jun 04, 2020 at 10:39:30PM +0200, Daniel Kolesa wrote:
> > On Thu, Jun 4, 2020, at 19:33, Segher Boessenkool wrote:
> > > It is the ABI.  If you think it should be different, make your own ABI,
> > > don't pretend the existing ABI is different than what it is.  Thank you.
> > 
> > Well then - in that case, what do you suggest that I do?
> > 
> > Void currently ships an ELFv2 (or apparently not, I guess) 64-bit big 
> > endian port that works on 970/G5 up. It is important to me that it stays 
> > that way (a large amount of users are running 970s, so introducing a VSX 
> > dependency means I might as well abandon the port entirely).
> 
> You can just clearly document what ABI changes you use, and try to make
> sure that everyone who uses your distro / your tools / your ABI variant
> knows about it.  Telling your users that it is ELFv2, without telling
> them it is not compliant, namely X Y Z are different, is a bit of a
> disservice to your users, and worse to everyone else involved.

The thing is, I've yet to see in which way the ELFv2 ABI *actually* requires 
VSX - I don't think compiling for 970 introduces any actual differences. There 
will be omissions, yes - but then the more accurate thing would be to say that 
a subset of ELFv2 is used, rather than it being a different ABI per se.

The ELFv2 document specifies things like passing of quadruple precision floats. 
Indeed, VSX is needed there, but that's not a concern if you *don't* use 
quadruple precision floats.

> 
> If you always use -mcpu=970 (or similar), then not very much is
> different for you most likely -- except of course there is no promise
> to the user that they can use VSX and all instructions in ISA 2.07,
> which is a very useful promise to have normally.

Yes, -mcpu=970 is used for default packages. *However*, it is possible that the 
user compiles their own packages with -mcpu=power9 or something similar, and 
then it'll be possible to utilize VSX and all, and it should still work with 
the existing userland. When speaking of ABI, what matters is... well, the 
binary interface, which is the same - so I believe this is still ELFv2. A 
subset is always compliant with the whole.

That's why I'm worried when you speak of introducing a new ABI. As it is, we 
can benefit from having the compiler being generally the same (-mabi=elfv2 
producing correct results even for 970) and retaining interoperability when 
people compile their own code for modern targets that cover the ELFv2 ABI as a 
whole. As I said, it's perfectly possible for somebody to run BE Void on their 
POWER9 machine, then compile their software for POWER9, and still have it work 
with the system packages built for 970 baseline. Pretty sure glibc will still 
provide optimized stuff (e.g. memcpy and so on) for the modern targets based on 
runtime detection, too.

So the "differences" in our case come down to "This is ELFv2, except you can't 
strictly assume that all features are present. In general that means no quad 
precision floating point for you if you want things to run on 970, since you 
don't have VSX regs"

> 
> > It currently works out of box - there are no changes required in glibc, and 
> > nearly the entire userland builds and works (about ~11500 out of ~12000 
> > software packages, those that don't work either don't work on ppc64le 
> > either, or have issues related to endianness, or some other unrelated 
> > reason).
> 
> Very nice!
> 
> > I'd like to eventually get this into a state where I don't have to worry 
> > about glibc arbitrarily breaking it - which means it would be necessary to 
> > stabilize it upstream. While I can probably maintain a downstream patchset 
> > when it comes to it, I'd much prefer if I didn't have to - but this sounds 
> > like an official ELFv2 glibc BE port would be impossible unless the VSX 
> > requirement (and thus IEEE 128-bit long double and so on) was in place, 
> > which would defeat the point of the port.
> > 
> > Is there *any* way I can take that would make upstreams of all parts of the 
> > toolchain happy? I explicitly don't want to go back to ELFv1.
> 
> Oh absolutely, it sounds like things are in quite good shape already!
> It will safe a lot of grief on all sides if you make clear this is not
> "plain" ELFv2, and in what ways it differs.

See above.

> 
> Btw, if you use GCC, *please* send in testresults?  :-)

Yes, it's all gcc (we do have clang, but compiling repo packages with clang is 
generally frowned upon in the project, as we have vast majority of packages 
cross-compilable, and our cross-compiling infrastructure is gcc-centric, plus 
we enable certain things by default such as hardening flags that clang does not 
support). I'll try to remember next time I'm running tests.

> 
> > While at it, I'd like to transition to ld64 long double format, to match 
> > musl and improve software compatibility, which I feel will raise more 
> > o

Re: [PATCH] pwm: Add missing "CONFIG_" prefix

2020-06-04 Thread Kees Cook
On Wed, Jun 03, 2020 at 04:04:31PM -0700, Joe Perches wrote:
> On Wed, 2020-06-03 at 15:40 -0700, Kees Cook wrote:
> > The IS_ENABLED() use was missing the CONFIG_ prefix which would have
> > lead to skipping this code.
> > 
> > Fixes: 3ad1f3a33286 ("pwm: Implement some checks for lowlevel drivers")
> > Signed-off-by: Kees Cook 
> > ---
> >  drivers/pwm/core.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
> > index 9973c442b455..6b3cbc0490c6 100644
> > --- a/drivers/pwm/core.c
> > +++ b/drivers/pwm/core.c
> > @@ -121,7 +121,7 @@ static int pwm_device_request(struct pwm_device *pwm, 
> > const char *label)
> > pwm->chip->ops->get_state(pwm->chip, pwm, &pwm->state);
> > trace_pwm_get(pwm, &pwm->state);
> >  
> > -   if (IS_ENABLED(PWM_DEBUG))
> > +   if (IS_ENABLED(CONFIG_PWM_DEBUG))
> > pwm->last = pwm->state;
> > }
> >  
> > -- 
> > 2.25.1
> > 
> 
> more odd uses (mostly in comments)
> 
> $ git grep -P -oh '\bIS_ENABLED\s*\(\s*\w+\s*\)'| \
>   sed -r 's/\s+//g'| \
>   grep -v '(CONFIG_' | \
>   sort | uniq -c | sort -rn
>   7 IS_ENABLED(DEBUG)
>   4 IS_ENABLED(DRM_I915_SELFTEST)
>   4 IS_ENABLED(cfg)
>   2 IS_ENABLED(opt_name)
>   2 IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)
>   2 IS_ENABLED(config)
>   2 IS_ENABLED(cond)
>   2 IS_ENABLED(__BIG_ENDIAN)
>   1 IS_ENABLED(x)
>   1 IS_ENABLED(STRICT_KERNEL_RWX)
>   1 IS_ENABLED(PWM_DEBUG)
>   1 IS_ENABLED(option)
>   1 IS_ENABLED(ETHTOOL_NETLINK)
>   1 IS_ENABLED(DEBUG_RANDOM_TRIE)
>   1 IS_ENABLED(DEBUG_CHACHA20POLY1305_SLOW_CHUNK_TEST)
> 
> STRICT_KERNEL_RWX is misused here in ppc
> 
> ---
> 
> Fix pr_warn without newline too.
> 
>  arch/powerpc/mm/book3s64/hash_utils.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
> b/arch/powerpc/mm/book3s64/hash_utils.c
> index 51e3c15f7aff..dd60c5f2b991 100644
> --- a/arch/powerpc/mm/book3s64/hash_utils.c
> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
> @@ -660,11 +660,10 @@ static void __init htab_init_page_sizes(void)
>* Pick a size for the linear mapping. Currently, we only
>* support 16M, 1M and 4K which is the default
>*/
> - if (IS_ENABLED(STRICT_KERNEL_RWX) &&
> + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) &&
>   (unsigned long)_stext % 0x100) {
>   if (mmu_psize_defs[MMU_PAGE_16M].shift)
> - pr_warn("Kernel not 16M aligned, "
> - "disabling 16M linear map alignment");
> + pr_warn("Kernel not 16M aligned, disabling 16M 
> linear map alignment\n");
>   aligned = false;
>   }

Joe, I was going to send all of the fixes for these issues, but your
patch doesn't have a SoB. Shall I add one for the above patch?

-- 
Kees Cook


Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Daniel Kolesa
On Thu, Jun 4, 2020, at 23:55, Phil Blundell wrote:
> On Thu, Jun 04, 2020 at 10:39:30PM +0200, Daniel Kolesa wrote:
> > Is there *any* way I can take that would make upstreams of all parts 
> > of the toolchain happy? I explicitly don't want to go back to ELFv1. 
> > While at it, I'd like to transition to ld64 long double format, to 
> > match musl and improve software compatibility, which I feel will raise 
> > more objections from IBM side.
> 
> Although I don't pretend to understand all the nuances of your port, and 
> in particular I have no idea what the thing about "ld64 long double 
> format" means, this doesn't sound like a particularly unusual situation.  
> If I understand correctly, you are in the position of essentially 
> wanting to implement the calling-standard part of the ABI on hardware 
> that isn't capable of implementing the full ABI as documented.

Well, the ld64 part is a separate issue. Defining a new long double ABI would 
break the ELFv2 ABI, since ELFv2 says long double must be 16-byte, of either 
IBM double-double format or IEEE754 binary128 :)

However, when I was talking about ELFv2 on 970 being a subset, I meant with the 
IBM double-double format, which has been present since glibc 2.4 at least, and 
doesn't require any vector functionality (it works even on 32-bit PowerPC)

So, defining a new long double ABI would indeed be a change compared to 
standard ELFv2. But, if we were doing a new port anyway, I think it'd be 
potentially worth it.

> 
> If that's the case then, depending on exactly what instructions are
> missing, I think your choices are:
> 
> 1a. Define your own subset of ELFv2 which is interworkable with the full 
> ABI at the function call interface but doesn't make all the same 
> guarantees about binary compatibility.  That would mean that a binary 
> built with your toolchain and conforming to the subset ABI would run on 
> any system that implements the full ELFv2 ABI, but the opposite is not 
> necessarily true.  There should be no impediment to getting support for 
> such an ABI upstream in any part of the GNU toolchain where it's 
> required if you can demonstrate that there's a non-trivial userbase for 
> it.  The hardest part may be thinking of a name.

Yes, this is the approach I would like to take.

> 
> 1b. Or, if the missing instructions are severe enough that it simply 
> isn't possible to have an interworkable implementation, you just need to 
> define your own ABI that fits your needs.  You can still borrow as much 
> as necessary from ELFv2 but you definitely need to call it something 
> else at that point.  All the other comments from 1a above still apply.
> 
> 2. Implement kernel emulation for the missing instructions.  If they
> are seldom used in practice then this might be adequate.  Of course,
> binaries that use them intensively will be slow; you'd have to judge
> whether this is better or worse than having them not run at all.  If
> you do this then you can implement the full ELFv2 ABI; your own
> toolchain might still choose not to use the instructions that it knows
> are going to be emulated, but at least other binaries will still run
> and you can call yourself compatible.
> 
> 3. Persuade whoever controls the ELFv2 ABI to relax their requirements.
> But I assume they didn't make the original decision capriciously so
> this might be hard/impossible.  ABI definitions from hardware vendors
> are always slightly political and we just have to accept this.

IBM has their commercial interests here and I don't think it'd be wise to take 
this kind of path. Implementing a new variant would probably be better; if we 
were documenting such differences, it'd probably be worthwhile to sync up with 
musl, since it'd be exactly the same ABI.

> 
> FWIW, we faced a similar situation about 20 years ago when the then-new 
> ARM EABI was defined.  This essentially required implementations to 
> support the ARMv5T instruction set; the committee that defined the ABI 
> took the view that requiring implementations to cater for older 
> architectures would be too onerous.  It was entirely possible to 
> implement 99% of the EABI on older processors; such implementations 
> weren't strictly conforming but they were interworkable enough to be 
> useful in practice, and the "almost-EABI" was still significantly
> better than what had gone before.
> 
> Phil
>

Daniel


Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Joseph Myers
On Thu, 4 Jun 2020, Daniel Kolesa wrote:

> The ELFv2 document specifies things like passing of quadruple precision 
> floats. Indeed, VSX is needed there, but that's not a concern if you 
> *don't* use quadruple precision floats.

My understanding is that the registers used for argument passing are all 
ones that exactly correspond to the Vector registers in earlier 
instruction set versions.  In other words, you could *in principle* 
produce an object, or a whole libm shared library, that (a) passes or 
receives _Float128 values in registers, (b) does not use any instructions 
beyond those available with -mcpu=970, (c) would work as intended whether 
executed on a 970 or on POWER8 and (d) when executed on POWER8, would 
fully interoperate with objects receiving or passing _Float128 values and 
compiled for POWER8 to use VSX instructions for that purpose.  GCC may not 
support _Float128 for older processors, but that doesn't prevent you from 
maintaining patches to add such support.  (But if you want to support 
those 64-bit processors that don't have Vector registers at all, you 
indeed can't use binary128 and interoperate with code using VSX for that 
format in POWER8.)

(Cf. how the Arm hard-float ABI variant works even on processors with 
single-precision-only VFP, because such processors still have the 
double-precision loads and stores although not double-precision 
arithmetic.  When working on that ABI support in GCC some years ago, I 
also made sure that GNU vector types corresponding to NEON vector types 
were passed consistently for the hard-float ABI whether or not any vector 
instructions were present - thus, avoiding depending on the machine modes 
for those vector types because GCC could choose a different machine mode 
depending on the instructions available.)

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Daniel Kolesa
On Fri, Jun 5, 2020, at 00:08, Joseph Myers wrote:
> On Thu, 4 Jun 2020, Daniel Kolesa wrote:
> 
> > The ELFv2 document specifies things like passing of quadruple precision 
> > floats. Indeed, VSX is needed there, but that's not a concern if you 
> > *don't* use quadruple precision floats.
> 
> My understanding is that the registers used for argument passing are all 
> ones that exactly correspond to the Vector registers in earlier 
> instruction set versions.  In other words, you could *in principle* 
> produce an object, or a whole libm shared library, that (a) passes or 
> receives _Float128 values in registers, (b) does not use any instructions 
> beyond those available with -mcpu=970, (c) would work as intended whether 
> executed on a 970 or on POWER8 and (d) when executed on POWER8, would 
> fully interoperate with objects receiving or passing _Float128 values and 
> compiled for POWER8 to use VSX instructions for that purpose.  GCC may not 
> support _Float128 for older processors, but that doesn't prevent you from 
> maintaining patches to add such support.  (But if you want to support 
> those 64-bit processors that don't have Vector registers at all, you 
> indeed can't use binary128 and interoperate with code using VSX for that 
> format in POWER8.)

There's a potential userbase with 64-bit BE processors from Freescale/NXP that 
don't have any AltiVec support, I believe they are still in production - I'd 
like to retain support for these targets, as well as older IBM processors. The 
userland generally also supports that, and we've had multiple requests for 
support of this kind of hardware.

And while implementing it with just VMX may be possible, most hardware running 
this ABI wouldn't have any support for quad precision FP, and would perform 
better with using just double-precision.

We're not a commercial project, so we're just trying to support users within 
the FOSS community; I definitely wouldn't mind having this be just an ABI 
variant parallel to the others. Using 64-bit long doubles also has the benefit 
of being the same ABI as musl, which would enable things such as gcompat to 
work.

Either way I'll think about it some more and possibly prepare an RFC port. I'm 
definitely willing to put in the work and later maintenance effort if that's 
what it takes to make it happen.

> 
> (Cf. how the Arm hard-float ABI variant works even on processors with 
> single-precision-only VFP, because such processors still have the 
> double-precision loads and stores although not double-precision 
> arithmetic.  When working on that ABI support in GCC some years ago, I 
> also made sure that GNU vector types corresponding to NEON vector types 
> were passed consistently for the hard-float ABI whether or not any vector 
> instructions were present - thus, avoiding depending on the machine modes 
> for those vector types because GCC could choose a different machine mode 
> depending on the instructions available.)
> 
> -- 
> Joseph S. Myers
> jos...@codesourcery.com
>

Daniel


Re: [PATCH] net: ethernet: freescale: remove unneeded include for ucc_geth

2020-06-04 Thread David Miller
From: Valentin Longchamp 
Date: Wed,  3 Jun 2020 23:28:23 +0200

> net/sch_generic.h does not need to be included, remove it.
> 
> Signed-off-by: Valentin Longchamp 

Applied.


Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Segher Boessenkool
Hi!

On Thu, Jun 04, 2020 at 11:55:11PM +0200, Phil Blundell wrote:
> 1a. Define your own subset of ELFv2 which is interworkable with the full 
> ABI at the function call interface but doesn't make all the same 
> guarantees about binary compatibility.  That would mean that a binary 
> built with your toolchain and conforming to the subset ABI would run on 
> any system that implements the full ELFv2 ABI, but the opposite is not 
> necessarily true.  There should be no impediment to getting support for 
> such an ABI upstream in any part of the GNU toolchain where it's 
> required if you can demonstrate that there's a non-trivial userbase for 
> it.  The hardest part may be thinking of a name.

And you can only use shared objects also built for that subset ABI.  If
you use some binary distribution, then it will also have to be built for
that subset, practically anyway.

This is very similar to soft-float targets.  There are "standard" ways
to deal with this.  Distros usually balk at having to maintain multiple
variants of a target, and users do not usually want to be restricted to
the lowest common denominator.  There always is that tension.

> 1b. Or, if the missing instructions are severe enough that it simply 
> isn't possible to have an interworkable implementation, you just need to 
> define your own ABI that fits your needs.  You can still borrow as much 
> as necessary from ELFv2 but you definitely need to call it something 
> else at that point.  All the other comments from 1a above still apply.

A different name is handy in casual conversation then, yes; but also in
case 1a, it should be clear what is what somehow.

> 2. Implement kernel emulation for the missing instructions.  If they
> are seldom used in practice then this might be adequate.  Of course,
> binaries that use them intensively will be slow; you'd have to judge
> whether this is better or worse than having them not run at all.  If
> you do this then you can implement the full ELFv2 ABI; your own
> toolchain might still choose not to use the instructions that it knows
> are going to be emulated, but at least other binaries will still run
> and you can call yourself compatible.

But not just instructions, there are actual new registers!  This might
be way too much work in the case of VSX.

But it is possible that implementing QP float (binary128) this way is
a feasible way forward, _if_ you have AltiVec enabled.

> 3. Persuade whoever controls the ELFv2 ABI to relax their requirements.
> But I assume they didn't make the original decision capriciously so
> this might be hard/impossible.  ABI definitions from hardware vendors
> are always slightly political and we just have to accept this.

There is more process involved than most open source people are
comfortable with :-/

> FWIW, we faced a similar situation about 20 years ago when the then-new 
> ARM EABI was defined.  This essentially required implementations to 
> support the ARMv5T instruction set; the committee that defined the ABI 
> took the view that requiring implementations to cater for older 
> architectures would be too onerous.  It was entirely possible to 
> implement 99% of the EABI on older processors; such implementations 
> weren't strictly conforming but they were interworkable enough to be 
> useful in practice, and the "almost-EABI" was still significantly
> better than what had gone before.

Yeah, this situation is quite similar in some ways :-)

The compilers should be able to adjust to what you need pretty easily.
Since you seem to have a distribution on-board already, the biggest
hurdle left is getting glibc to accept the new port, I think.  I don't
know if it will be easy to them, or a lot of work instead.

Thanks,


Segher


Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Segher Boessenkool
Hi!

On Thu, Jun 04, 2020 at 11:43:53PM +0200, Daniel Kolesa wrote:
> The thing is, I've yet to see in which way the ELFv2 ABI *actually* requires 
> VSX - I don't think compiling for 970 introduces any actual differences. 
> There will be omissions, yes - but then the more accurate thing would be to 
> say that a subset of ELFv2 is used, rather than it being a different ABI per 
> se.

Two big things are that binaries that someone else made are supposed to
work for you as well -- including binaries using VSX registers, or any
instructions that require ISA 2.07 (or some older ISA after 970).  This
includes DSOs (shared libraries).  So for a distribution this means that
they will not use VSX *anywhere*, or only in very specialised things.
That is a many-years setback, for people/situations where it could be
used.

> The ELFv2 document specifies things like passing of quadruple precision 
> floats. Indeed, VSX is needed there, but that's not a concern if you *don't* 
> use quadruple precision floats.

As Joseph says, QP float is passed in the VRs, so that works just fine
*if* you have AltiVec.  If not, you probably should pass such values in
the GPRs, or however a struct of 16 bytes is passed in whatever ABI you
use (this is a much more general problem than just BE ELFv2 ;-) )  And
then you have some kind of "partial soft float".  Fun!  (Or not...)

> > If you always use -mcpu=970 (or similar), then not very much is
> > different for you most likely -- except of course there is no promise
> > to the user that they can use VSX and all instructions in ISA 2.07,
> > which is a very useful promise to have normally.
> 
> Yes, -mcpu=970 is used for default packages. *However*, it is possible that 
> the user compiles their own packages with -mcpu=power9 or something similar, 
> and then it'll be possible to utilize VSX and all, and it should still work 
> with the existing userland. When speaking of ABI, what matters is... well, 
> the binary interface, which is the same - so I believe this is still ELFv2. A 
> subset is always compliant with the whole.

The same calling convention will probably work, yes.  An ABI is more
than that though.


> > Btw, if you use GCC, *please* send in testresults?  :-)
> 
> Yes, it's all gcc (we do have clang, but compiling repo packages with clang 
> is generally frowned upon in the project, as we have vast majority of 
> packages cross-compilable, and our cross-compiling infrastructure is 
> gcc-centric, plus we enable certain things by default such as hardening flags 
> that clang does not support). I'll try to remember next time I'm running 
> tests.

Thanks in advance!

> I have a feeling that glibc would object to such port, since it means it 
> would have to exist in parallel with a potential different ELFv2 port that 
> does have a POWER8 minimal requirement; gcc would need a way to tell them 
> apart, too (based on what would they be told apart? the simplest way would 
> probably be something like, if --with-abi=elfv2 { if --with-cpu < power8 -> 
> use glibc-novsx else use glibc-vsx } ...)

The target name allows to make such distinctions: this could for example
be  powerpc64-*-linux-void  (maybe I put the distinction in the wrong
part of the name here?  The glibc people will know better, and "void" is
probably not a great name anyway).


Segher


[PATCH v10 0/6] powerpc/papr_scm: Add support for reporting nvdimm health

2020-06-04 Thread Vaibhav Jain
Changes since v9 [1]:
* Addressed review comments from Ira and Dan Williams.
* Removed the contentious 'payload_version' field from struct
  nd_pdsm_cmd_pkg.
* Also removed code/defines related to handling of different version
  of a pdsm payload struct.  
* Consolidated validation checks for nd_pdsm_cmd_pkg in
  is_cmd_valid().
* Added a check in is_cmd_valid() to ensure reserved fields in struct
  nd_pdsm_cmd_pkg are set to '0'.
* Reworked papr_pdsm_health() to avoid removing code that was added in
  initial part of this patch-series.
* Added a new patch to the series to move out some proposed changes to
  papr_scm_ndctl() in an independent patch.
* Reworked papr_pdsm_health() to ensure correct payload_size in the
  pdsm command package.

[1] 
https://lore.kernel.org/linux-nvdimm/20200602101438.73929-1-vaib...@linux.ibm.com
---

The PAPR standard[2][4] provides mechanisms to query the health and
performance stats of an NVDIMM via various hcalls as described in
Ref[3].  Until now these stats were never available nor exposed to the
user-space tools like 'ndctl'. This is partly due to PAPR platform not
having support for ACPI and NFIT. Hence 'ndctl' is unable to query and
report the dimm health status and a user had no way to determine the
current health status of a NDVIMM.

To overcome this limitation, this patch-set updates papr_scm kernel
module to query and fetch NVDIMM health stats using hcalls described
in Ref[3].  This health and performance stats are then exposed to
userspace via sysfs and PAPR-NVDIMM-Specific-Methods(PDSM) issued by
libndctl.

These changes coupled with proposed ndtcl changes located at Ref[5]
should provide a way for the user to retrieve NVDIMM health status
using ndtcl.

Below is a sample output using proposed kernel + ndctl for PAPR NVDIMM
in a emulation environment:

 # ndctl list -DH
[
  {
"dev":"nmem0",
"health":{
  "health_state":"fatal",
  "shutdown_state":"dirty"
}
  }
]

Dimm health report output on a pseries guest lpar with vPMEM or HMS
based NVDIMMs that are in perfectly healthy conditions:

 # ndctl list -d nmem0 -H
[
  {
"dev":"nmem0",
"health":{
  "health_state":"ok",
  "shutdown_state":"clean"
}
  }
]

PAPR NVDIMM-Specific-Methods(PDSM)
==

PDSM requests are issued by vendor specific code in libndctl to
execute certain operations or fetch information from NVDIMMS. PDSMs
requests can be sent to papr_scm module via libndctl(userspace) and
libnvdimm (kernel) using the ND_CMD_CALL ioctl command which can be
handled in the dimm control function papr_scm_ndctl(). Current
patchset proposes a single PDSM to retrieve NVDIMM health, defined in
the newly introduced uapi header named 'papr_pdsm.h'. Support for
more PDSMs will be added in future.

Structure of the patch-set
==

The patch-set starts with a doc patch documenting details of hcall
H_SCM_HEALTH. Second patch exports kernel symbol seq_buf_printf()
thats used in subsequent patches to generate sysfs attribute content.

Third patch implements support for fetching NVDIMM health information
from PHYP and partially exposing it to user-space via a NVDIMM sysfs
flag.

Fourth patch updates papr_scm_ndctl() to handle a possible error case
and also improve debug logging.

Fifth patch deals with implementing support for servicing PDSM
commands in papr_scm module.

Finally the last patch implements support for servicing PDSM
'PAPR_PDSM_HEALTH' that returns the NVDIMM health information to
libndctl.

References:
[2] "Power Architecture Platform Reference"
  https://en.wikipedia.org/wiki/Power_Architecture_Platform_Reference
[3] commit 58b278f568f0
 ("powerpc: Provide initial documentation for PAPR hcalls")
[4] "Linux on Power Architecture Platform Reference"
 https://members.openpowerfoundation.org/document/dl/469
[5] https://github.com/vaibhav92/ndctl/tree/papr_scm_health_v10

---

Vaibhav Jain (6):
  powerpc: Document details on H_SCM_HEALTH hcall
  seq_buf: Export seq_buf_printf
  powerpc/papr_scm: Fetch nvdimm health information from PHYP
  powerpc/papr_scm: Improve error logging and handling papr_scm_ndctl()
  ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods
  powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH

 Documentation/ABI/testing/sysfs-bus-papr-pmem |  27 ++
 Documentation/powerpc/papr_hcalls.rst |  46 ++-
 arch/powerpc/include/uapi/asm/papr_pdsm.h | 131 +++
 arch/powerpc/platforms/pseries/papr_scm.c | 361 +-
 include/uapi/linux/ndctl.h|   1 +
 lib/seq_buf.c |   1 +
 6 files changed, 554 insertions(+), 13 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-papr-pmem
 create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h

-- 
2.26.2



[PATCH v10 1/6] powerpc: Document details on H_SCM_HEALTH hcall

2020-06-04 Thread Vaibhav Jain
Add documentation to 'papr_hcalls.rst' describing the bitmap flags
that are returned from H_SCM_HEALTH hcall as per the PAPR-SCM
specification.

Cc: "Aneesh Kumar K . V" 
Cc: Dan Williams 
Cc: Michael Ellerman 
Cc: Ira Weiny 
Acked-by: Ira Weiny 
Signed-off-by: Vaibhav Jain 
---
Changelog:

v9..v10:
* Added ack from Ira.

Resend:
* None

v8..v9:
* s/SCM/PMEM device. [ Dan Williams, Aneesh ]

v7..v8:
* Added a clarification on bit-ordering of Health Bitmap

Resend:
* None

v6..v7:
* None

v5..v6:
* New patch in the series
---
 Documentation/powerpc/papr_hcalls.rst | 46 ---
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/Documentation/powerpc/papr_hcalls.rst 
b/Documentation/powerpc/papr_hcalls.rst
index 3493631a60f8..48fcf1255a33 100644
--- a/Documentation/powerpc/papr_hcalls.rst
+++ b/Documentation/powerpc/papr_hcalls.rst
@@ -220,13 +220,51 @@ from the LPAR memory.
 **H_SCM_HEALTH**
 
 | Input: drcIndex
-| Out: *health-bitmap, health-bit-valid-bitmap*
+| Out: *health-bitmap (r4), health-bit-valid-bitmap (r5)*
 | Return Value: *H_Success, H_Parameter, H_Hardware*
 
 Given a DRC Index return the info on predictive failure and overall health of
-the NVDIMM. The asserted bits in the health-bitmap indicate a single predictive
-failure and health-bit-valid-bitmap indicate which bits in health-bitmap are
-valid.
+the PMEM device. The asserted bits in the health-bitmap indicate one or more 
states
+(described in table below) of the PMEM device and health-bit-valid-bitmap 
indicate
+which bits in health-bitmap are valid. The bits are reported in
+reverse bit ordering for example a value of 0xC400
+indicates bits 0, 1, and 5 are valid.
+
+Health Bitmap Flags:
+
++--+---+
+|  Bit |   Definition  
|
++==+===+
+|  00  | PMEM device is unable to persist memory contents. 
|
+|  | If the system is powered down, nothing will be saved. 
|
++--+---+
+|  01  | PMEM device failed to persist memory contents. Either contents were   
|
+|  | not saved successfully on power down or were not restored properly on 
|
+|  | power up. 
|
++--+---+
+|  02  | PMEM device contents are persisted from previous IPL. The data from   
|
+|  | the last boot were successfully restored. 
|
++--+---+
+|  03  | PMEM device contents are not persisted from previous IPL. There was 
no|
+|  | data to restore from the last boot.   
|
++--+---+
+|  04  | PMEM device memory life remaining is critically low   
|
++--+---+
+|  05  | PMEM device will be garded off next IPL due to failure
|
++--+---+
+|  06  | PMEM device contents cannot persist due to current platform health
|
+|  | status. A hardware failure may prevent data from being saved or   
|
+|  | restored. 
|
++--+---+
+|  07  | PMEM device is unable to persist memory contents in certain 
conditions|
++--+---+
+|  08  | PMEM device is encrypted  
|
++--+---+
+|  09  | PMEM device has successfully completed a requested erase or secure
|
+|  | erase procedure.  
|
++--+---+
+|10:63 | Reserved / Unused 
|
++--+---+
 
 **H_SCM_PERFORMANCE_STATS**
 
-- 
2.26.2



[PATCH v10 5/6] ndctl/papr_scm, uapi: Add support for PAPR nvdimm specific methods

2020-06-04 Thread Vaibhav Jain
Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm
module and add the command family NVDIMM_FAMILY_PAPR to the white list
of NVDIMM command sets. Also advertise support for ND_CMD_CALL for the
nvdimm command mask and implement necessary scaffolding in the module
to handle ND_CMD_CALL ioctl and PDSM requests that we receive.

The layout of the PDSM request as we expect from libnvdimm/libndctl is
described in newly introduced uapi header 'papr_pdsm.h' which
defines a new 'struct nd_pdsm_cmd_pkg' header. This header is used
to communicate the PDSM request via member
'nd_cmd_pkg.nd_command' and size of payload that need to be
sent/received for servicing the PDSM.

A new function is_cmd_valid() is implemented that reads the args to
papr_scm_ndctl() and performs sanity tests on them. A new function
papr_scm_service_pdsm() is introduced and is called from
papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL
command from libnvdimm.

Cc: "Aneesh Kumar K . V" 
Cc: Dan Williams 
Cc: Michael Ellerman 
Cc: Ira Weiny 
Signed-off-by: Vaibhav Jain 
---
Changelog:

v9..v10:
* Simplified 'struct nd_pdsm_cmd_pkg' by removing the
  'payload_version' field.
* Removed the corrosponding documentation on versioning and backward
  compatibility from 'papr_pdsm.h'
* Reduced the size of reserved fields to 4-bytes making 'struct
  nd_pdsm_cmd_pkg' 64 + 8 bytes long.
* Updated is_cmd_valid() to enforce validation checks on pdsm
  commands. [ Dan Williams ]
* Added check for reserved fields being set to '0' in is_cmd_valid()
  [ Ira ]
* Moved changes for checking cmd_rc == NULL and logging improvements
  to a separate prelim patch [ Ira ].
* Moved  pdsm package validation checks from papr_scm_service_pdsm()
  to is_cmd_valid().
* Marked papr_scm_service_pdsm() return type as 'void' since errors
  are reported in nd_pdsm_cmd_pkg.cmd_status field.

Resend:
* Added ack from Aneesh.

v8..v9:
* Reduced the usage of term SCM replacing it with appropriate
  replacement [ Dan Williams, Aneesh ]
* Renamed 'papr_scm_pdsm.h' to 'papr_pdsm.h'
* s/PAPR_SCM_PDSM_*/PAPR_PDSM_*/g
* s/NVDIMM_FAMILY_PAPR_SCM/NVDIMM_FAMILY_PAPR/g
* Minor updates to 'papr_psdm.h' to replace usage of term 'SCM'.
* Minor update to patch description.

v7..v8:
* Removed the 'payload_offset' field from 'struct
  nd_pdsm_cmd_pkg'. Instead command payload is always assumed to start
  at 'nd_pdsm_cmd_pkg.payload'. [ Aneesh ]
* To enable introducing new fields to 'struct nd_pdsm_cmd_pkg',
  'reserved' field of 10-bytes is introduced. [ Aneesh ]
* Fixed a typo in "Backward Compatibility" section of papr_scm_pdsm.h
  [ Ira ]

Resend:
* None

v6..v7 :
* Removed the re-definitions of __packed macro from papr_scm_pdsm.h
  [Mpe].
* Removed the usage of __KERNEL__ macros in papr_scm_pdsm.h [Mpe].
* Removed macros that were unused in papr_scm.c from papr_scm_pdsm.h
  [Mpe].
* Made functions defined in papr_scm_pdsm.h as static inline. [Mpe]

v5..v6 :
* Changed the usage of the term DSM to PDSM to distinguish it from the
  ACPI term [ Dan Williams ]
* Renamed papr_scm_dsm.h to papr_scm_pdsm.h and updated various struct
  to reflect the new terminology.
* Updated the patch description and title to reflect the new terminology.
* Squashed patch to introduce new command family in 'ndctl.h' with
  this patch [ Dan Williams ]
* Updated the papr_scm_pdsm method starting index from 0x1 to 0x0
  [ Dan Williams ]
* Removed redundant license text from the papr_scm_psdm.h file.
  [ Dan Williams ]
* s/envelop/envelope/ at various places [ Dan Williams ]
* Added '__packed' attribute to command package header to gaurd
  against different compiler adding paddings between the fields.
  [ Dan Williams]
* Converted various pr_debug to dev_debug [ Dan Williams ]

v4..v5 :
* None

v3..v4 :
* None

v2..v3 :
* Updated the patch prefix to 'ndctl/uapi' [Aneesh]

v1..v2 :
* None
---
 arch/powerpc/include/uapi/asm/papr_pdsm.h |  98 +++
 arch/powerpc/platforms/pseries/papr_scm.c | 113 +-
 include/uapi/linux/ndctl.h|   1 +
 3 files changed, 207 insertions(+), 5 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h

diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h 
b/arch/powerpc/include/uapi/asm/papr_pdsm.h
new file mode 100644
index ..8b1a4f8fa316
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
+ *
+ * (C) Copyright IBM 2020
+ *
+ * Author: Vaibhav Jain 
+ */
+
+#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_
+#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_
+
+#include 
+
+/*
+ * PDSM Envelope:
+ *
+ * The ioctl ND_CMD_CALL transfers data between user-space and kernel via
+ * envelope which consists of a header and user-defined payload sections.
+ * The header is described by 'struct nd_pdsm_cmd_pkg' which expects a
+ * payload following it an

[PATCH v10 3/6] powerpc/papr_scm: Fetch nvdimm health information from PHYP

2020-06-04 Thread Vaibhav Jain
Implement support for fetching nvdimm health information via
H_SCM_HEALTH hcall as documented in Ref[1]. The hcall returns a pair
of 64-bit bitmap, bitwise-and of which is then stored in
'struct papr_scm_priv' and subsequently partially exposed to
user-space via newly introduced dimm specific attribute
'papr/flags'. Since the hcall is costly, the health information is
cached and only re-queried, 60s after the previous successful hcall.

The patch also adds a  documentation text describing flags reported by
the the new sysfs attribute 'papr/flags' is also introduced at
Documentation/ABI/testing/sysfs-bus-papr-pmem.

[1] commit 58b278f568f0 ("powerpc: Provide initial documentation for
PAPR hcalls")

Cc: "Aneesh Kumar K . V" 
Cc: Dan Williams 
Cc: Michael Ellerman 
Cc: Ira Weiny 
Signed-off-by: Vaibhav Jain 
---
Changelog:

v9..v10:
* Removed an avoidable 'goto' in __drc_pmem_query_health. [ Ira ].

Resend:
* Added ack from Aneesh.

v8..v9:
* Rename some variables and defines to reduce usage of term SCM
  replacing it with PMEM [Dan Williams, Aneesh]
* s/PAPR_SCM_DIMM/PAPR_PMEM/g
* s/papr_scm_nd_attributes/papr_nd_attributes/g
* s/papr_scm_nd_attribute_group/papr_nd_attribute_group/g
* s/papr_scm_dimm_attr_groups/papr_nd_attribute_groups/g
* Renamed file sysfs-bus-papr-scm to sysfs-bus-papr-pmem

v7..v8:
* Update type of variable 'rc' in __drc_pmem_query_health() and
  drc_pmem_query_health() to long and int respectively. [ Ira ]
* Updated the patch description to s/64 bit Big Endian Number/64-bit
  bitmap/ [ Ira, Aneesh ].

Resend:
* None

v6..v7 :
* Used the exported buf_seq_printf() function to generate content for
  'papr/flags'
* Moved the PAPR_SCM_DIMM_* bit-flags macro definitions to papr_scm.c
  and removed the papr_scm.h file [Mpe]
* Some minor consistency issued in sysfs-bus-papr-scm
  documentation. [Mpe]
* s/dimm_mutex/health_mutex/g [Mpe]
* Split drc_pmem_query_health() into two function one of which takes
  care of caching and locking. [Mpe]
* Fixed a local copy creation of dimm health information using
  READ_ONCE(). [Mpe]

v5..v6 :
* Change the flags sysfs attribute from 'papr_flags' to 'papr/flags'
  [Dan Williams]
* Include documentation for 'papr/flags' attr [Dan Williams]
* Change flag 'save_fail' to 'flush_fail' [Dan Williams]
* Caching of health bitmap to reduce expensive hcalls [Dan Williams]
* Removed usage of PPC_BIT from 'papr-scm.h' header [Mpe]
* Replaced two __be64 integers from papr_scm_priv to a single u64
  integer [Mpe]
* Updated patch description to reflect the changes made in this
  version.
* Removed avoidable usage of 'papr_scm_priv.dimm_mutex' from
  flags_show() [Dan Williams]

v4..v5 :
* None

v3..v4 :
* None

v2..v3 :
* Removed PAPR_SCM_DIMM_HEALTH_NON_CRITICAL as a condition for
 NVDIMM unarmed [Aneesh]

v1..v2 :
* New patch in the series.
---
 Documentation/ABI/testing/sysfs-bus-papr-pmem |  27 +++
 arch/powerpc/platforms/pseries/papr_scm.c | 168 +-
 2 files changed, 193 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-papr-pmem

diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem 
b/Documentation/ABI/testing/sysfs-bus-papr-pmem
new file mode 100644
index ..5b10d036a8d4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem
@@ -0,0 +1,27 @@
+What:  /sys/bus/nd/devices/nmemX/papr/flags
+Date:  Apr, 2020
+KernelVersion: v5.8
+Contact:   linuxppc-dev , 
linux-nvd...@lists.01.org,
+Description:
+   (RO) Report flags indicating various states of a
+   papr-pmem NVDIMM device. Each flag maps to a one or
+   more bits set in the dimm-health-bitmap retrieved in
+   response to H_SCM_HEALTH hcall. The details of the bit
+   flags returned in response to this hcall is available
+   at 'Documentation/powerpc/papr_hcalls.rst' . Below are
+   the flags reported in this sysfs file:
+
+   * "not_armed"   : Indicates that NVDIMM contents will not
+ survive a power cycle.
+   * "flush_fail"  : Indicates that NVDIMM contents
+ couldn't be flushed during last
+ shut-down event.
+   * "restore_fail": Indicates that NVDIMM contents
+ couldn't be restored during NVDIMM
+ initialization.
+   * "encrypted"   : NVDIMM contents are encrypted.
+   * "smart_notify": There is health event for the NVDIMM.
+   * "scrubbed": Indicating that contents of the
+ NVDIMM have been scrubbed.
+   * "locked"  : Indicating that NVDIMM contents cant
+ be modified until next power cycle.
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
b/arch/powerpc/platforms/pseries/papr_scm.c
index f35592423380.

[PATCH v10 4/6] powerpc/papr_scm: Improve error logging and handling papr_scm_ndctl()

2020-06-04 Thread Vaibhav Jain
Since papr_scm_ndctl() can be called from outside papr_scm, its
exposed to the possibility of receiving NULL as value of 'cmd_rc'
argument. This patch updates papr_scm_ndctl() to protect against such
possibility by assigning it pointer to a local variable in case cmd_rc
== NULL.

Finally the patch also updates the 'default' clause of the switch-case
block removing a 'return' statement thereby ensuring that value of
'cmd_rc' is always logged when papr_scm_ndctl() returns.

Cc: "Aneesh Kumar K . V" 
Cc: Dan Williams 
Cc: Michael Ellerman 
Cc: Ira Weiny 
Signed-off-by: Vaibhav Jain 
---
Changelog:

v9..v10
* New patch in the series
---
 arch/powerpc/platforms/pseries/papr_scm.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
b/arch/powerpc/platforms/pseries/papr_scm.c
index 0c091622b15e..6512fe6a2874 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -355,11 +355,16 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor 
*nd_desc,
 {
struct nd_cmd_get_config_size *get_size_hdr;
struct papr_scm_priv *p;
+   int rc;
 
/* Only dimm-specific calls are supported atm */
if (!nvdimm)
return -EINVAL;
 
+   /* Use a local variable in case cmd_rc pointer is NULL */
+   if (!cmd_rc)
+   cmd_rc = &rc;
+
p = nvdimm_provider_data(nvdimm);
 
switch (cmd) {
@@ -381,12 +386,13 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor 
*nd_desc,
break;
 
default:
-   return -EINVAL;
+   dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd);
+   *cmd_rc = -EINVAL;
}
 
dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc);
 
-   return 0;
+   return *cmd_rc;
 }
 
 static ssize_t flags_show(struct device *dev,
-- 
2.26.2



[PATCH v10 2/6] seq_buf: Export seq_buf_printf

2020-06-04 Thread Vaibhav Jain
'seq_buf' provides a very useful abstraction for writing to a string
buffer without needing to worry about it over-flowing. However even
though the API has been stable for couple of years now its still not
exported to kernel loadable modules limiting its usage.

Hence this patch proposes update to 'seq_buf.c' to mark
seq_buf_printf() which is part of the seq_buf API to be exported to
kernel loadable GPL modules. This symbol will be used in later parts
of this patch-set to simplify content creation for a sysfs attribute.

Cc: Piotr Maziarz 
Cc: Cezary Rojewski 
Cc: Christoph Hellwig 
Cc: Steven Rostedt 
Cc: Borislav Petkov 
Acked-by: Steven Rostedt (VMware) 
Signed-off-by: Vaibhav Jain 
---
Changelog:

v9..v10:
* None

Resend:
* Added ack from Steven Rostedt

v8..v9:
* None

v7..v8:
* Updated the patch title [ Christoph Hellwig ]
* Updated patch description to replace confusing term 'external kernel
  modules' to 'kernel lodable modules'.

Resend:
* Added ack from Steven Rostedt

v6..v7:
* New patch in the series
---
 lib/seq_buf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/seq_buf.c b/lib/seq_buf.c
index 4e865d42ab03..707453f5d58e 100644
--- a/lib/seq_buf.c
+++ b/lib/seq_buf.c
@@ -91,6 +91,7 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...)
 
return ret;
 }
+EXPORT_SYMBOL_GPL(seq_buf_printf);
 
 #ifdef CONFIG_BINARY_PRINTF
 /**
-- 
2.26.2



Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Segher Boessenkool
Hi!

On Thu, Jun 04, 2020 at 10:08:02PM +, Joseph Myers wrote:
> > The ELFv2 document specifies things like passing of quadruple precision 
> > floats. Indeed, VSX is needed there, but that's not a concern if you 
> > *don't* use quadruple precision floats.
> 
> My understanding is that the registers used for argument passing are all 
> ones that exactly correspond to the Vector registers in earlier 
> instruction set versions.  In other words, you could *in principle* 
> produce an object, or a whole libm shared library,  [...]

And then there is the VRSAVE register, if your OS still uses that.
Let's hope not :-)

This is similar to what -mno-float128-hardware does (which actually
requires VSX hardware for the emulation library currently).


Segher


[PATCH v10 6/6] powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH

2020-06-04 Thread Vaibhav Jain
This patch implements support for PDSM request 'PAPR_PDSM_HEALTH'
that returns a newly introduced 'struct nd_papr_pdsm_health' instance
containing dimm health information back to user space in response to
ND_CMD_CALL. This functionality is implemented in newly introduced
papr_pdsm_health() that queries the nvdimm health information and
then copies this information to the package payload whose layout is
defined by 'struct nd_papr_pdsm_health'.

Cc: "Aneesh Kumar K . V" 
Cc: Dan Williams 
Cc: Michael Ellerman 
Cc: Ira Weiny 
Signed-off-by: Vaibhav Jain 
---
Changelog:

v9..v10:
* Removed code in papr_pdsm_health that performed validation on pdsm
  payload version and corrosponding struct and defines used for
  validation of payload version.
* Dropped usage of struct papr_pdsm_health in 'struct
  papr_scm_priv'. Instead papr_psdm_health() now uses
  'papr_scm_priv.health_bitmap' to populate the pdsm payload.
* Above change also fixes the problem where this patch was removing
  the code that was previously introduced in this patch-series.
  [ Ira ]
* Introduced a new def ND_PDSM_ENVELOPE_HDR_SIZE that indicates the
  space allocated to 'struct nd_pdsm_cmd_pkg' fields except 'struct
  nd_cmd_pkg'. This def is useful in validating payload sizes.
* Reworked papr_pdsm_health() to enforce a specific payload size for
  'PAPR_PDSM_HEALTH' pdsm request.

Resend:
* Added ack from Aneesh.

v8..v9:
* s/PAPR_SCM_PDSM_HEALTH/PAPR_PDSM_HEALTH/g  [ Dan , Aneesh ]
* s/PAPR_SCM_PSDM_DIMM_*/PAPR_PDSM_DIMM_*/g
* Renamed papr_scm_get_health() to papr_psdm_health()
* Updated patch description to replace papr-scm dimm with nvdimm.

v7..v8:
* None

Resend:
* None

v6..v7:
* Updated flags_show() to use seq_buf_printf(). [Mpe]
* Updated papr_scm_get_health() to use newly introduced
  __drc_pmem_query_health() bypassing the cache [Mpe].

v5..v6:
* Added attribute '__packed' to 'struct nd_papr_pdsm_health_v1' to
  gaurd against possibility of different compilers adding different
  paddings to the struct [ Dan Williams ]

* Updated 'struct nd_papr_pdsm_health_v1' to use __u8 instead of
  'bool' and also updated drc_pmem_query_health() to take this into
  account. [ Dan Williams ]

v4..v5:
* None

v3..v4:
* Call the DSM_PAPR_SCM_HEALTH service function from
  papr_scm_service_dsm() instead of papr_scm_ndctl(). [Aneesh]

v2..v3:
* Updated struct nd_papr_scm_dimm_health_stat_v1 to use '__xx' types
  as its exported to the userspace [Aneesh]
* Changed the constants DSM_PAPR_SCM_DIMM_XX indicating dimm health
  from enum to #defines [Aneesh]

v1..v2:
* New patch in the series
---
 arch/powerpc/include/uapi/asm/papr_pdsm.h | 33 +++
 arch/powerpc/platforms/pseries/papr_scm.c | 70 +++
 2 files changed, 103 insertions(+)

diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h 
b/arch/powerpc/include/uapi/asm/papr_pdsm.h
index 8b1a4f8fa316..c4c990ede5d4 100644
--- a/arch/powerpc/include/uapi/asm/papr_pdsm.h
+++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
@@ -71,12 +71,17 @@ struct nd_pdsm_cmd_pkg {
__u8 payload[]; /* In/Out: Sub-cmd data buffer */
 } __packed;
 
+/* Calculate size used by the pdsm header fields minus 'struct nd_cmd_pkg' */
+#define ND_PDSM_ENVELOPE_HDR_SIZE \
+   (sizeof(struct nd_pdsm_cmd_pkg) - sizeof(struct nd_cmd_pkg))
+
 /*
  * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel
  * via 'nd_pdsm_cmd_pkg.hdr.nd_command' member of the ioctl struct
  */
 enum papr_pdsm {
PAPR_PDSM_MIN = 0x0,
+   PAPR_PDSM_HEALTH,
PAPR_PDSM_MAX,
 };
 
@@ -95,4 +100,32 @@ static inline void *pdsm_cmd_to_payload(struct 
nd_pdsm_cmd_pkg *pcmd)
return (void *)(pcmd->payload);
 }
 
+/* Various nvdimm health indicators */
+#define PAPR_PDSM_DIMM_HEALTHY   0
+#define PAPR_PDSM_DIMM_UNHEALTHY 1
+#define PAPR_PDSM_DIMM_CRITICAL  2
+#define PAPR_PDSM_DIMM_FATAL 3
+
+/*
+ * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
+ * Various flags indicate the health status of the dimm.
+ *
+ * dimm_unarmed: Dimm not armed. So contents wont persist.
+ * dimm_bad_shutdown   : Previous shutdown did not persist contents.
+ * dimm_bad_restore: Contents from previous shutdown werent restored.
+ * dimm_scrubbed   : Contents of the dimm have been scrubbed.
+ * dimm_locked : Contents of the dimm cant be modified until CEC reboot
+ * dimm_encrypted  : Contents of dimm are encrypted.
+ * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_
+ */
+struct nd_papr_pdsm_health {
+   __u8 dimm_unarmed;
+   __u8 dimm_bad_shutdown;
+   __u8 dimm_bad_restore;
+   __u8 dimm_scrubbed;
+   __u8 dimm_locked;
+   __u8 dimm_encrypted;
+   __u16 dimm_health;
+} __packed;
+
 #endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
b/arch/powerpc/platforms/pseries/papr_scm.c
index 05eb56ecab5e..984942be24c1 100644
--- a/arch/pow

Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Segher Boessenkool
Hi!

On Fri, Jun 05, 2020 at 12:26:22AM +0200, Daniel Kolesa wrote:
> Either way I'll think about it some more and possibly prepare an RFC port. 
> I'm definitely willing to put in the work and later maintenance effort if 
> that's what it takes to make it happen.

Yeah, you'll need to convince all parties involved that it will not be
more work to them then they are willing to put in.  Initial development
and ongoing work, as you say.

For GCC it is probably an easy decision (but we'll see your proposed ABI
amendments): it shouldn't be much work at all, and it even benefits us
directly (it'll fill some holes in our testing matrix).


Segher


RE: [RESEND PATCH v9 4/5] ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods

2020-06-04 Thread Williams, Dan J



> -Original Message-
> From: Vaibhav Jain 
> Sent: Thursday, June 4, 2020 2:06 AM
> To: Williams, Dan J ; linuxppc-
> d...@lists.ozlabs.org; linux-nvd...@lists.01.org; linux-
> ker...@vger.kernel.org
> Cc: Santosh Sivaraj ; Aneesh Kumar K . V
> ; Steven Rostedt ;
> Oliver O'Halloran ; Weiny, Ira 
> Subject: RE: [RESEND PATCH v9 4/5] ndctl/papr_scm,uapi: Add support for
> PAPR nvdimm specific methods
> 
> Hi Dan,
> 
> Thanks for review and insights on this. My responses below:
> 
> "Williams, Dan J"  writes:
> 
> > [ forgive formatting I'm temporarily stuck using Outlook this week...
> > ]
> >
> >> From: Vaibhav Jain 
> > [..]
> >>
> >> Introduce support for PAPR NVDIMM Specific Methods (PDSM) in
> papr_scm
> >> module and add the command family NVDIMM_FAMILY_PAPR to the
> white
> >> list of NVDIMM command sets. Also advertise support for ND_CMD_CALL
> >> for the nvdimm command mask and implement necessary scaffolding in
> >> the module to handle ND_CMD_CALL ioctl and PDSM requests that we
> receive.
> >>
> >> The layout of the PDSM request as we expect from libnvdimm/libndctl
> >> is described in newly introduced uapi header 'papr_pdsm.h' which
> >> defines a new 'struct nd_pdsm_cmd_pkg' header. This header is used to
> >> communicate the PDSM request via member
> 'nd_cmd_pkg.nd_command' and
> >> size of payload that need to be sent/received for servicing the PDSM.
> >>
> >> A new function is_cmd_valid() is implemented that reads the args to
> >> papr_scm_ndctl() and performs sanity tests on them. A new function
> >> papr_scm_service_pdsm() is introduced and is called from
> >> papr_scm_ndctl() in case of a PDSM request is received via
> >> ND_CMD_CALL command from libnvdimm.
> >>
> >> Cc: "Aneesh Kumar K . V" 
> >> Cc: Dan Williams 
> >> Cc: Michael Ellerman 
> >> Cc: Ira Weiny 
> >> Reviewed-by: Aneesh Kumar K.V 
> >> Signed-off-by: Vaibhav Jain 
> >> ---
> >> Changelog:
> >>
> >> Resend:
> >> * Added ack from Aneesh.
> >>
> >> v8..v9:
> >> * Reduced the usage of term SCM replacing it with appropriate
> >>   replacement [ Dan Williams, Aneesh ]
> >> * Renamed 'papr_scm_pdsm.h' to 'papr_pdsm.h'
> >> * s/PAPR_SCM_PDSM_*/PAPR_PDSM_*/g
> >> * s/NVDIMM_FAMILY_PAPR_SCM/NVDIMM_FAMILY_PAPR/g
> >> * Minor updates to 'papr_psdm.h' to replace usage of term 'SCM'.
> >> * Minor update to patch description.
> >>
> >> v7..v8:
> >> * Removed the 'payload_offset' field from 'struct
> >>   nd_pdsm_cmd_pkg'. Instead command payload is always assumed to
> start
> >>   at 'nd_pdsm_cmd_pkg.payload'. [ Aneesh ]
> >> * To enable introducing new fields to 'struct nd_pdsm_cmd_pkg',
> >>   'reserved' field of 10-bytes is introduced. [ Aneesh ]
> >> * Fixed a typo in "Backward Compatibility" section of papr_scm_pdsm.h
> >>   [ Ira ]
> >>
> >> Resend:
> >> * None
> >>
> >> v6..v7 :
> >> * Removed the re-definitions of __packed macro from papr_scm_pdsm.h
> >>   [Mpe].
> >> * Removed the usage of __KERNEL__ macros in papr_scm_pdsm.h
> [Mpe].
> >> * Removed macros that were unused in papr_scm.c from
> papr_scm_pdsm.h
> >>   [Mpe].
> >> * Made functions defined in papr_scm_pdsm.h as static inline. [Mpe]
> >>
> >> v5..v6 :
> >> * Changed the usage of the term DSM to PDSM to distinguish it from the
> >>   ACPI term [ Dan Williams ]
> >> * Renamed papr_scm_dsm.h to papr_scm_pdsm.h and updated various
> >> struct
> >>   to reflect the new terminology.
> >> * Updated the patch description and title to reflect the new terminology.
> >> * Squashed patch to introduce new command family in 'ndctl.h' with
> >>   this patch [ Dan Williams ]
> >> * Updated the papr_scm_pdsm method starting index from 0x1 to
> 0x0
> >>   [ Dan Williams ]
> >> * Removed redundant license text from the papr_scm_psdm.h file.
> >>   [ Dan Williams ]
> >> * s/envelop/envelope/ at various places [ Dan Williams ]
> >> * Added '__packed' attribute to command package header to gaurd
> >>   against different compiler adding paddings between the fields.
> >>   [ Dan Williams]
> >> * Converted various pr_debug to dev_debug [ Dan Williams ]
> >>
> >> v4..v5 :
> >> * None
> >>
> >> v3..v4 :
> >> * None
> >>
> >> v2..v3 :
> >> * Updated the patch prefix to 'ndctl/uapi' [Aneesh]
> >>
> >> v1..v2 :
> >> * None
> >> ---
> >>  arch/powerpc/include/uapi/asm/papr_pdsm.h | 136
> >> ++
> arch/powerpc/platforms/pseries/papr_scm.c |
> >> 101 +++-
> >>  include/uapi/linux/ndctl.h|   1 +
> >>  3 files changed, 232 insertions(+), 6 deletions(-)  create mode
> >> 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h
> >>
> >> diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h
> >> b/arch/powerpc/include/uapi/asm/papr_pdsm.h
> >> new file mode 100644
> >> index ..6407fefcc007
> >> --- /dev/null
> >> +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
> >> @@ -0,0 +1,136 @@
> >> +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
> >> +/*
> >> + * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl
> >> + 

Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Daniel Kolesa
On Fri, Jun 5, 2020, at 01:35, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Jun 04, 2020 at 11:43:53PM +0200, Daniel Kolesa wrote:
> > The thing is, I've yet to see in which way the ELFv2 ABI *actually* 
> > requires VSX - I don't think compiling for 970 introduces any actual 
> > differences. There will be omissions, yes - but then the more accurate 
> > thing would be to say that a subset of ELFv2 is used, rather than it being 
> > a different ABI per se.
> 
> Two big things are that binaries that someone else made are supposed to
> work for you as well -- including binaries using VSX registers, or any
> instructions that require ISA 2.07 (or some older ISA after 970).  This
> includes DSOs (shared libraries).  So for a distribution this means that
> they will not use VSX *anywhere*, or only in very specialised things.
> That is a many-years setback, for people/situations where it could be
> used.

Third party precompiled stuff doesn't really need to concern us, since none 
really exists. It's also still an upgrade over ELFv1 regardless (I mean, the 
same things apply there). I'm also not really all that convinced that vectors 
make a huge difference in non-specialized code (autovectorization still has a 
way to go) and code written to use vector instructions should probably check 
auxval and take those paths at runtime. As for other instructions, fair enough, 
but from my rough testing, it doesn't make such a massive difference for 
average case (and where it does, one can always rebuild their thing with 
CFLAGS=-mcpu=power9)

> 
> > The ELFv2 document specifies things like passing of quadruple precision 
> > floats. Indeed, VSX is needed there, but that's not a concern if you 
> > *don't* use quadruple precision floats.
> 
> As Joseph says, QP float is passed in the VRs, so that works just fine
> *if* you have AltiVec.  If not, you probably should pass such values in
> the GPRs, or however a struct of 16 bytes is passed in whatever ABI you
> use (this is a much more general problem than just BE ELFv2 ;-) )  And
> then you have some kind of "partial soft float".  Fun!  (Or not...)

As I mentioned previously, I kinda want to be able to cover the same targets as 
ELFv1 can, which also means non-altivec hardware, so it doesn't matter that 
much either way... being able to cover existing stable compilers with a 
manageable patchset would be nice, too.

> 
> > > If you always use -mcpu=970 (or similar), then not very much is
> > > different for you most likely -- except of course there is no promise
> > > to the user that they can use VSX and all instructions in ISA 2.07,
> > > which is a very useful promise to have normally.
> > 
> > Yes, -mcpu=970 is used for default packages. *However*, it is possible that 
> > the user compiles their own packages with -mcpu=power9 or something 
> > similar, and then it'll be possible to utilize VSX and all, and it should 
> > still work with the existing userland. When speaking of ABI, what matters 
> > is... well, the binary interface, which is the same - so I believe this is 
> > still ELFv2. A subset is always compliant with the whole.
> 
> The same calling convention will probably work, yes.  An ABI is more
> than that though.
> 
> 
> > > Btw, if you use GCC, *please* send in testresults?  :-)
> > 
> > Yes, it's all gcc (we do have clang, but compiling repo packages with clang 
> > is generally frowned upon in the project, as we have vast majority of 
> > packages cross-compilable, and our cross-compiling infrastructure is 
> > gcc-centric, plus we enable certain things by default such as hardening 
> > flags that clang does not support). I'll try to remember next time I'm 
> > running tests.
> 
> Thanks in advance!
> 
> > I have a feeling that glibc would object to such port, since it means it 
> > would have to exist in parallel with a potential different ELFv2 port that 
> > does have a POWER8 minimal requirement; gcc would need a way to tell them 
> > apart, too (based on what would they be told apart? the simplest way would 
> > probably be something like, if --with-abi=elfv2 { if --with-cpu < power8 -> 
> > use glibc-novsx else use glibc-vsx } ...)
> 
> The target name allows to make such distinctions: this could for example
> be  powerpc64-*-linux-void  (maybe I put the distinction in the wrong
> part of the name here?  The glibc people will know better, and "void" is
> probably not a great name anyway).

Hm, I'm not a huge fan of putting ABI specifics in the triplet, it feels wrong 
- there is no precedent for it with POWER (ARM did it with EABI though), the 
last part should remain 'gnu' as it's still glibc; besides, gcc is compiled for 
exactly one target triplet, and traditionally with ppc compilers it's always 
been possible to target everything with just one compiler (endian, 32bit, 
64bit, abi...). Detection based on CPU is probably quirky too, actually, since 
it'd mean different CPU types selecting different dynamic linkers.

The best way would probab

Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Phil Blundell
On Thu, Jun 04, 2020 at 10:39:30PM +0200, Daniel Kolesa wrote:
> Is there *any* way I can take that would make upstreams of all parts 
> of the toolchain happy? I explicitly don't want to go back to ELFv1. 
> While at it, I'd like to transition to ld64 long double format, to 
> match musl and improve software compatibility, which I feel will raise 
> more objections from IBM side.

Although I don't pretend to understand all the nuances of your port, and 
in particular I have no idea what the thing about "ld64 long double 
format" means, this doesn't sound like a particularly unusual situation.  
If I understand correctly, you are in the position of essentially 
wanting to implement the calling-standard part of the ABI on hardware 
that isn't capable of implementing the full ABI as documented.

If that's the case then, depending on exactly what instructions are
missing, I think your choices are:

1a. Define your own subset of ELFv2 which is interworkable with the full 
ABI at the function call interface but doesn't make all the same 
guarantees about binary compatibility.  That would mean that a binary 
built with your toolchain and conforming to the subset ABI would run on 
any system that implements the full ELFv2 ABI, but the opposite is not 
necessarily true.  There should be no impediment to getting support for 
such an ABI upstream in any part of the GNU toolchain where it's 
required if you can demonstrate that there's a non-trivial userbase for 
it.  The hardest part may be thinking of a name.

1b. Or, if the missing instructions are severe enough that it simply 
isn't possible to have an interworkable implementation, you just need to 
define your own ABI that fits your needs.  You can still borrow as much 
as necessary from ELFv2 but you definitely need to call it something 
else at that point.  All the other comments from 1a above still apply.

2. Implement kernel emulation for the missing instructions.  If they
are seldom used in practice then this might be adequate.  Of course,
binaries that use them intensively will be slow; you'd have to judge
whether this is better or worse than having them not run at all.  If
you do this then you can implement the full ELFv2 ABI; your own
toolchain might still choose not to use the instructions that it knows
are going to be emulated, but at least other binaries will still run
and you can call yourself compatible.

3. Persuade whoever controls the ELFv2 ABI to relax their requirements.
But I assume they didn't make the original decision capriciously so
this might be hard/impossible.  ABI definitions from hardware vendors
are always slightly political and we just have to accept this.

FWIW, we faced a similar situation about 20 years ago when the then-new 
ARM EABI was defined.  This essentially required implementations to 
support the ARMv5T instruction set; the committee that defined the ABI 
took the view that requiring implementations to cater for older 
architectures would be too onerous.  It was entirely possible to 
implement 99% of the EABI on older processors; such implementations 
weren't strictly conforming but they were interworkable enough to be 
useful in practice, and the "almost-EABI" was still significantly
better than what had gone before.

Phil


[powerpc:merge] BUILD SUCCESS 787e1419e6c96ab8641adb4391b8955bc2e76817

2020-06-04 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
merge
branch HEAD: 787e1419e6c96ab8641adb4391b8955bc2e76817  Automatic merge of 
branch 'next' into merge

elapsed time: 861m

configs tested: 108
configs skipped: 8

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
mipsar7_defconfig
csky alldefconfig
sh shx3_defconfig
mipsjmr3927_defconfig
xtensa  defconfig
nios2   defconfig
alpha   defconfig
powerpc  mpc866_ads_defconfig
mipsworkpad_defconfig
s390 alldefconfig
shsh7757lcr_defconfig
nds32alldefconfig
m68km5407c3_defconfig
mips  malta_defconfig
i386  allnoconfig
i386 allyesconfig
i386defconfig
i386  debian-10.3
ia64 allmodconfig
ia64defconfig
ia64  allnoconfig
ia64 allyesconfig
m68k allmodconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
m68k allyesconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
nds32   defconfig
nds32 allnoconfig
csky allyesconfig
cskydefconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
h8300allmodconfig
arc defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips allyesconfig
mips  allnoconfig
mips allmodconfig
pariscallnoconfig
parisc  defconfig
parisc   allyesconfig
parisc   allmodconfig
powerpc  allyesconfig
powerpc  rhel-kconfig
powerpc  allmodconfig
powerpc   allnoconfig
powerpc defconfig
i386 randconfig-a001-20200604
i386 randconfig-a006-20200604
i386 randconfig-a002-20200604
i386 randconfig-a005-20200604
i386 randconfig-a004-20200604
i386 randconfig-a003-20200604
x86_64   randconfig-a011-20200604
x86_64   randconfig-a016-20200604
x86_64   randconfig-a013-20200604
x86_64   randconfig-a014-20200604
x86_64   randconfig-a012-20200604
x86_64   randconfig-a015-20200604
i386 randconfig-a014-20200604
i386 randconfig-a015-20200604
i386 randconfig-a011-20200604
i386 randconfig-a016-20200604
i386 randconfig-a012-20200604
i386 randconfig-a013-20200604
riscvallyesconfig
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
s390 allyesconfig
s390  allnoconfig
s390 allmodconfig
s390defconfig
sparcallyesconfig
sparc   defconfig
sparc64 defconfig
sparc64   allnoconfig
sparc64  allyesconfig
sparc64  allmodconfig
um   allmodconfig
umallnoconfig
um  defconfig
um   al

[powerpc:next] BUILD SUCCESS 1395375c592770fe5158a592944aaeed67fa94ff

2020-06-04 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
next
branch HEAD: 1395375c592770fe5158a592944aaeed67fa94ff  Merge branch 
'topic/ppc-kvm' into next

elapsed time: 860m

configs tested: 99
configs skipped: 9

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
mipsar7_defconfig
csky alldefconfig
sh shx3_defconfig
mips  cavium_octeon_defconfig
um   x86_64_defconfig
ia64  tiger_defconfig
arc nps_defconfig
arc   tb10x_defconfig
arm   spear13xx_defconfig
nios2   defconfig
mipsjmr3927_defconfig
alpha   defconfig
xtensa  defconfig
powerpc  mpc866_ads_defconfig
mipsworkpad_defconfig
s390 alldefconfig
shsh7757lcr_defconfig
nds32alldefconfig
m68km5407c3_defconfig
mips  malta_defconfig
x86_64  defconfig
arm assabet_defconfig
arm   multi_v4t_defconfig
i386  allnoconfig
i386 allyesconfig
i386defconfig
i386  debian-10.3
ia64 allmodconfig
ia64defconfig
ia64  allnoconfig
ia64 allyesconfig
m68k allmodconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
m68k allyesconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
nds32   defconfig
nds32 allnoconfig
csky allyesconfig
cskydefconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
h8300allmodconfig
arc defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips allyesconfig
mips  allnoconfig
mips allmodconfig
pariscallnoconfig
parisc  defconfig
parisc   allyesconfig
parisc   allmodconfig
powerpc defconfig
powerpc  allyesconfig
powerpc  rhel-kconfig
powerpc  allmodconfig
powerpc   allnoconfig
riscvallyesconfig
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
s390 allyesconfig
s390  allnoconfig
s390 allmodconfig
s390defconfig
sparcallyesconfig
sparc   defconfig
sparc64 defconfig
sparc64   allnoconfig
sparc64  allyesconfig
sparc64  allmodconfig
um   allmodconfig
um   allyesconfig
umallnoconfig
um  defconfig
x86_64   rhel
x86_64   rhel-7.6
x86_64rhel-7.6-kselftests
x86_64 rhel-7.2-clear
x86_64lkp
x86_64  fedora-25
x86_64  kexec

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lis

Re: [musl] Re: ppc64le and 32-bit LE userland compatibility

2020-06-04 Thread Phil Blundell
On Thu, Jun 04, 2020 at 06:06:39PM -0500, Segher Boessenkool wrote:
> On Thu, Jun 04, 2020 at 11:55:11PM +0200, Phil Blundell wrote:
> > 1a. Define your own subset of ELFv2 which is interworkable with the full 
> > ABI at the function call interface but doesn't make all the same 
> > guarantees about binary compatibility.  That would mean that a binary 
> > built with your toolchain and conforming to the subset ABI would run on 
> > any system that implements the full ELFv2 ABI, but the opposite is not 
> > necessarily true.
> 
> And you can only use shared objects also built for that subset ABI.  If
> you use some binary distribution, then it will also have to be built for
> that subset, practically anyway.

Right, absolutely.  Any place that I wrote "binary", I meant to include
both DSOs and executables.

> This is very similar to soft-float targets.

Yes, agreed.

> There is more process involved than most open source people are
> comfortable with :-/

Yes, that's unfortunate but it goes with the territory.  I think we have 
to accept that any attempt to define a single ABI where there are 
multiple interests involved will be a significant effort involving 
thousands of person-hours of work, much discussion, and a certain amount 
of politics and compromise.  Inevitably, some people/organisations at 
the margins will decide that the game isn't worth the candle.  If they 
don't participate in the general ABI effort then they can hardly 
complain about the results, but equally there is nothing to stop these 
folks from defining their own ABIs.  If they can attain a critical mass 
to support such a variant ABI then, as far as I'm concerned, that's a 
fine thing and all power to them.

p.


Re: [PATCH] pwm: Add missing "CONFIG_" prefix

2020-06-04 Thread Joe Perches
On Thu, 2020-06-04 at 14:52 -0700, Kees Cook wrote:
> On Wed, Jun 03, 2020 at 04:04:31PM -0700, Joe Perches wrote:
> > On Wed, 2020-06-03 at 15:40 -0700, Kees Cook wrote:
> > > The IS_ENABLED() use was missing the CONFIG_ prefix which would have
> > > lead to skipping this code.
> > > 
> > > Fixes: 3ad1f3a33286 ("pwm: Implement some checks for lowlevel drivers")
> > > Signed-off-by: Kees Cook 
> > > ---
> > >  drivers/pwm/core.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
> > > index 9973c442b455..6b3cbc0490c6 100644
> > > --- a/drivers/pwm/core.c
> > > +++ b/drivers/pwm/core.c
> > > @@ -121,7 +121,7 @@ static int pwm_device_request(struct pwm_device *pwm, 
> > > const char *label)
> > >   pwm->chip->ops->get_state(pwm->chip, pwm, &pwm->state);
> > >   trace_pwm_get(pwm, &pwm->state);
> > >  
> > > - if (IS_ENABLED(PWM_DEBUG))
> > > + if (IS_ENABLED(CONFIG_PWM_DEBUG))
> > >   pwm->last = pwm->state;
> > >   }
> > >  
> > > -- 
> > > 2.25.1
> > > 
> > 
> > more odd uses (mostly in comments)
> > 
> > $ git grep -P -oh '\bIS_ENABLED\s*\(\s*\w+\s*\)'| \
> >   sed -r 's/\s+//g'| \
> >   grep -v '(CONFIG_' | \
> >   sort | uniq -c | sort -rn
> >   7 IS_ENABLED(DEBUG)
> >   4 IS_ENABLED(DRM_I915_SELFTEST)
> >   4 IS_ENABLED(cfg)
> >   2 IS_ENABLED(opt_name)
> >   2 IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)
> >   2 IS_ENABLED(config)
> >   2 IS_ENABLED(cond)
> >   2 IS_ENABLED(__BIG_ENDIAN)
> >   1 IS_ENABLED(x)
> >   1 IS_ENABLED(STRICT_KERNEL_RWX)
> >   1 IS_ENABLED(PWM_DEBUG)
> >   1 IS_ENABLED(option)
> >   1 IS_ENABLED(ETHTOOL_NETLINK)
> >   1 IS_ENABLED(DEBUG_RANDOM_TRIE)
> >   1 IS_ENABLED(DEBUG_CHACHA20POLY1305_SLOW_CHUNK_TEST)
> > 
> > STRICT_KERNEL_RWX is misused here in ppc
> > 
> > ---
> > 
> > Fix pr_warn without newline too.
> > 
> >  arch/powerpc/mm/book3s64/hash_utils.c | 5 ++---
> >  1 file changed, 2 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
> > b/arch/powerpc/mm/book3s64/hash_utils.c
> > index 51e3c15f7aff..dd60c5f2b991 100644
> > --- a/arch/powerpc/mm/book3s64/hash_utils.c
> > +++ b/arch/powerpc/mm/book3s64/hash_utils.c
> > @@ -660,11 +660,10 @@ static void __init htab_init_page_sizes(void)
> >  * Pick a size for the linear mapping. Currently, we only
> >  * support 16M, 1M and 4K which is the default
> >  */
> > -   if (IS_ENABLED(STRICT_KERNEL_RWX) &&
> > +   if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) &&
> > (unsigned long)_stext % 0x100) {
> > if (mmu_psize_defs[MMU_PAGE_16M].shift)
> > -   pr_warn("Kernel not 16M aligned, "
> > -   "disabling 16M linear map alignment");
> > +   pr_warn("Kernel not 16M aligned, disabling 16M 
> > linear map alignment\n");
> > aligned = false;
> > }
> 
> Joe, I was going to send all of the fixes for these issues, but your
> patch doesn't have a SoB. Shall I add one for the above patch?

 sure if you want, or submit it yourself.

My feeling about these types of changes is the maintainers
of the subsystems, in this case ppc, should manage this
themselves and shouldn't require anyone else to actually
bother to send real patches.




linux-next 04 June: warning: "ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE" is not defined

2020-06-04 Thread Christophe Leroy

Hi all,

Getting the following warning on linux-next from yesterday,

  CC  net/sunrpc/svcsock.o
net/sunrpc/svcsock.c:227:5: warning: "ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE" 
is not defined [-Wundef]

 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 ^

Bisected to ca07eda33e01 (refs/bisect/bad) SUNRPC: Refactor svc_recvfrom()

Missing #include 

Christophe


[PATCH] tpm: ibmvtpm: Wait for ready buffer before probing for TPM2 attributes

2020-06-04 Thread David Gibson
The tpm2_get_cc_attrs_tbl() call will result in TPM commands being issued,
which will need the use of the internal command/response buffer.  But,
we're issuing this *before* we've waited to make sure that buffer is
allocated.

This can result in intermittent failures to probe if the hypervisor / TPM
implementation doesn't respond quickly enough.  I find it fails almost
every time with an 8 vcpu guest under KVM with software emulated TPM.

Fixes: 18b3670d79ae9 "tpm: ibmvtpm: Add support for TPM2"
Signed-off-by: David Gibson 
---
 drivers/char/tpm/tpm_ibmvtpm.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 09fe45246b8c..994385bf37c0 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -683,13 +683,6 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
if (rc)
goto init_irq_cleanup;
 
-   if (!strcmp(id->compat, "IBM,vtpm20")) {
-   chip->flags |= TPM_CHIP_FLAG_TPM2;
-   rc = tpm2_get_cc_attrs_tbl(chip);
-   if (rc)
-   goto init_irq_cleanup;
-   }
-
if (!wait_event_timeout(ibmvtpm->crq_queue.wq,
ibmvtpm->rtce_buf != NULL,
HZ)) {
@@ -697,6 +690,13 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
goto init_irq_cleanup;
}
 
+   if (!strcmp(id->compat, "IBM,vtpm20")) {
+   chip->flags |= TPM_CHIP_FLAG_TPM2;
+   rc = tpm2_get_cc_attrs_tbl(chip);
+   if (rc)
+   goto init_irq_cleanup;
+   }
+
return tpm_chip_register(chip);
 init_irq_cleanup:
do {
-- 
2.26.2