[tip: objtool/core] x86/insn: Support big endian cross-compiles

2021-01-18 Thread tip-bot2 for Martin Schwidefsky
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 1d509f2a6ebca1aea3089c769f6375f01a832e9b
Gitweb:
https://git.kernel.org/tip/1d509f2a6ebca1aea3089c769f6375f01a832e9b
Author:Martin Schwidefsky 
AuthorDate:Fri, 13 Nov 2020 00:03:23 +01:00
Committer: Josh Poimboeuf 
CommitterDate: Wed, 13 Jan 2021 18:13:11 -06:00

x86/insn: Support big endian cross-compiles

The x86 instruction decoder code is shared across the kernel source and
the tools. Currently objtool seems to be the only tool from build tools
needed which breaks x86 cross-compilation on big endian systems. Make
the x86 instruction decoder build host endianness agnostic to support
x86 cross-compilation and enable objtool to implement endianness
awareness for big endian architectures support.

Signed-off-by: Martin Schwidefsky 
Co-developed-by: Vasily Gorbik 
Signed-off-by: Vasily Gorbik 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Masami Hiramatsu 
Signed-off-by: Josh Poimboeuf 
---
 arch/x86/include/asm/insn.h   |  33 +-
 arch/x86/lib/insn.c   | 101 +
 arch/x86/tools/insn_sanity.c  |   4 +-
 tools/arch/x86/include/asm/insn.h |  33 +-
 tools/arch/x86/lib/insn.c | 101 +
 5 files changed, 160 insertions(+), 112 deletions(-)

diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index a8c3d28..090863c 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -7,9 +7,12 @@
  * Copyright (C) IBM Corporation, 2009
  */
 
+#include 
 /* insn_attr_t is defined in inat.h */
 #include 
 
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : 
defined(__LITTLE_ENDIAN)
+
 struct insn_field {
union {
insn_value_t value;
@@ -20,6 +23,36 @@ struct insn_field {
unsigned char nbytes;
 };
 
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+   p->value = v;
+   p->nbytes = n;
+}
+
+#else
+
+struct insn_field {
+   insn_value_t value;
+   union {
+   insn_value_t little;
+   insn_byte_t bytes[4];
+   };
+   /* !0 if we've run insn_get_xxx() for this field */
+   unsigned char got;
+   unsigned char nbytes;
+};
+
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+   p->value = v;
+   p->little = __cpu_to_le32(v);
+   p->nbytes = n;
+}
+
+#endif
+
 struct insn {
struct insn_field prefixes; /*
 * Prefixes
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 4042795..520b31f 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -5,6 +5,7 @@
  * Copyright (C) IBM Corporation, 2002, 2004, 2009
  */
 
+#include 
 #ifdef __KERNEL__
 #include 
 #else
@@ -15,15 +16,28 @@
 
 #include 
 
+#define leXX_to_cpu(t, r)  \
+({ \
+   __typeof__(t) v;\
+   switch (sizeof(t)) {\
+   case 4: v = le32_to_cpu(r); break;  \
+   case 2: v = le16_to_cpu(r); break;  \
+   case 1: v = r; break;   \
+   default:\
+   BUILD_BUG(); break; \
+   }   \
+   v;  \
+})
+
 /* Verify next sizeof(t) bytes can be on the same instruction */
 #define validate_next(t, insn, n)  \
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
 
 #define __get_next(t, insn)\
-   ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+   ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); 
leXX_to_cpu(t, r); })
 
 #define __peek_nbyte_next(t, insn, n)  \
-   ({ t r = *(t*)((insn)->next_byte + n); r; })
+   ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); })
 
 #define get_next(t, insn)  \
({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; 
__get_next(t, insn); })
@@ -157,8 +171,7 @@ found:
b = peek_next(insn_byte_t, insn);
attr = inat_get_opcode_attribute(b);
if (inat_is_rex_prefix(attr)) {
-   insn->rex_prefix.value = b;
-   insn->rex_prefix.nbytes = 1;
+   insn_field_set(>rex_prefix, b, 1);
insn->next_byte++;
if (X86_REX_W(b))
/*

[tip: objtool/core] objtool: Fix reloc generation on big endian cross-compiles

2021-01-18 Thread tip-bot2 for Martin Schwidefsky
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: a1a664ece586457e9f7652b0bc5b08386259e358
Gitweb:
https://git.kernel.org/tip/a1a664ece586457e9f7652b0bc5b08386259e358
Author:Martin Schwidefsky 
AuthorDate:Fri, 13 Nov 2020 00:03:26 +01:00
Committer: Josh Poimboeuf 
CommitterDate: Wed, 13 Jan 2021 18:13:12 -06:00

objtool: Fix reloc generation on big endian cross-compiles

Relocations generated in elf_rebuild_rel[a]_reloc_section() are broken
if objtool is built and run on a big endian system.

The following errors pop up during x86 cross-compilation:

  x86_64-9.1.0-ld: fs/efivarfs/inode.o: bad reloc symbol index (0x200 >= 
0x22) for offset 0 in section `.orc_unwind_ip'
  x86_64-9.1.0-ld: final link failed: bad value

Convert those functions to use gelf_update_rel[a](), similar to what
elf_write_reloc() does.

Signed-off-by: Martin Schwidefsky 
Co-developed-by: Vasily Gorbik 
Signed-off-by: Vasily Gorbik 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Masami Hiramatsu 
Signed-off-by: Josh Poimboeuf 
---
 tools/objtool/elf.c | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index be89c74..c784122 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -855,25 +855,27 @@ static int elf_rebuild_rel_reloc_section(struct section 
*sec, int nr)
 {
struct reloc *reloc;
int idx = 0, size;
-   GElf_Rel *relocs;
+   void *buf;
 
/* Allocate a buffer for relocations */
-   size = nr * sizeof(*relocs);
-   relocs = malloc(size);
-   if (!relocs) {
+   size = nr * sizeof(GElf_Rel);
+   buf = malloc(size);
+   if (!buf) {
perror("malloc");
return -1;
}
 
-   sec->data->d_buf = relocs;
+   sec->data->d_buf = buf;
sec->data->d_size = size;
+   sec->data->d_type = ELF_T_REL;
 
sec->sh.sh_size = size;
 
idx = 0;
list_for_each_entry(reloc, >reloc_list, list) {
-   relocs[idx].r_offset = reloc->offset;
-   relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+   reloc->rel.r_offset = reloc->offset;
+   reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+   gelf_update_rel(sec->data, idx, >rel);
idx++;
}
 
@@ -884,26 +886,28 @@ static int elf_rebuild_rela_reloc_section(struct section 
*sec, int nr)
 {
struct reloc *reloc;
int idx = 0, size;
-   GElf_Rela *relocs;
+   void *buf;
 
/* Allocate a buffer for relocations with addends */
-   size = nr * sizeof(*relocs);
-   relocs = malloc(size);
-   if (!relocs) {
+   size = nr * sizeof(GElf_Rela);
+   buf = malloc(size);
+   if (!buf) {
perror("malloc");
return -1;
}
 
-   sec->data->d_buf = relocs;
+   sec->data->d_buf = buf;
sec->data->d_size = size;
+   sec->data->d_type = ELF_T_RELA;
 
sec->sh.sh_size = size;
 
idx = 0;
list_for_each_entry(reloc, >reloc_list, list) {
-   relocs[idx].r_offset = reloc->offset;
-   relocs[idx].r_addend = reloc->addend;
-   relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+   reloc->rela.r_offset = reloc->offset;
+   reloc->rela.r_addend = reloc->addend;
+   reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+   gelf_update_rela(sec->data, idx, >rela);
idx++;
}
 


[tip: objtool/core] objtool: Fix reloc generation on big endian cross compiles

2020-10-07 Thread tip-bot2 for Martin Schwidefsky
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 317664a7fcc99d9bcc0cb33ac05e27764f3024e4
Gitweb:
https://git.kernel.org/tip/317664a7fcc99d9bcc0cb33ac05e27764f3024e4
Author:Martin Schwidefsky 
AuthorDate:Sun, 04 Oct 2020 16:30:50 +02:00
Committer: Josh Poimboeuf 
CommitterDate: Tue, 06 Oct 2020 09:36:48 -05:00

objtool: Fix reloc generation on big endian cross compiles

Currently relocations generated in elf_rebuild_rel_reloc_section/
elf_rebuild_rela_reloc_section functions are broken if the objtool is
built and run on big endian system. E.g. the following errors pop up
during x86 cross compilation:
x86_64-9.1.0-ld: fs/efivarfs/inode.o: bad reloc symbol index (0x200 >=
0x22) for offset 0 in section `.orc_unwind_ip'
x86_64-9.1.0-ld: final link failed: bad value

To address that convert those functions to do things similar to
elf_write_reloc(), reuse gelf_update_rel/gelf_update_rela libelf library
functions.

Signed-off-by: Martin Schwidefsky 
Co-developed-by: Vasily Gorbik 
Signed-off-by: Vasily Gorbik 
Signed-off-by: Josh Poimboeuf 
---
 tools/objtool/elf.c | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 4e1d746..5c0341b 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -829,25 +829,27 @@ static int elf_rebuild_rel_reloc_section(struct section 
*sec, int nr)
 {
struct reloc *reloc;
int idx = 0, size;
-   GElf_Rel *relocs;
+   void *buf;
 
/* Allocate a buffer for relocations */
-   size = nr * sizeof(*relocs);
-   relocs = malloc(size);
-   if (!relocs) {
+   size = nr * sizeof(GElf_Rel);
+   buf = malloc(size);
+   if (!buf) {
perror("malloc");
return -1;
}
 
-   sec->data->d_buf = relocs;
+   sec->data->d_buf = buf;
sec->data->d_size = size;
+   sec->data->d_type = ELF_T_REL;
 
sec->sh.sh_size = size;
 
idx = 0;
list_for_each_entry(reloc, >reloc_list, list) {
-   relocs[idx].r_offset = reloc->offset;
-   relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+   reloc->rel.r_offset = reloc->offset;
+   reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+   gelf_update_rel(sec->data, idx, >rel);
idx++;
}
 
@@ -858,26 +860,28 @@ static int elf_rebuild_rela_reloc_section(struct section 
*sec, int nr)
 {
struct reloc *reloc;
int idx = 0, size;
-   GElf_Rela *relocs;
+   void *buf;
 
/* Allocate a buffer for relocations with addends */
-   size = nr * sizeof(*relocs);
-   relocs = malloc(size);
-   if (!relocs) {
+   size = nr * sizeof(GElf_Rela);
+   buf = malloc(size);
+   if (!buf) {
perror("malloc");
return -1;
}
 
-   sec->data->d_buf = relocs;
+   sec->data->d_buf = buf;
sec->data->d_size = size;
+   sec->data->d_type = ELF_T_RELA;
 
sec->sh.sh_size = size;
 
idx = 0;
list_for_each_entry(reloc, >reloc_list, list) {
-   relocs[idx].r_offset = reloc->offset;
-   relocs[idx].r_addend = reloc->addend;
-   relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+   reloc->rela.r_offset = reloc->offset;
+   reloc->rela.r_addend = reloc->addend;
+   reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+   gelf_update_rela(sec->data, idx, >rela);
idx++;
}
 


[tip: objtool/core] x86/insn: Support big endian cross-compiles

2020-10-07 Thread tip-bot2 for Martin Schwidefsky
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 2a522b53c47051d3bf98748418f4f8e5f20d2c04
Gitweb:
https://git.kernel.org/tip/2a522b53c47051d3bf98748418f4f8e5f20d2c04
Author:Martin Schwidefsky 
AuthorDate:Mon, 05 Oct 2020 17:50:31 +02:00
Committer: Josh Poimboeuf 
CommitterDate: Tue, 06 Oct 2020 09:32:29 -05:00

x86/insn: Support big endian cross-compiles

x86 instruction decoder code is shared across the kernel source and the
tools. Currently objtool seems to be the only tool from build tools needed
which breaks x86 cross compilation on big endian systems. Make the x86
instruction decoder build host endianness agnostic to support x86 cross
compilation and enable objtool to implement endianness awareness for
big endian architectures support.

Signed-off-by: Martin Schwidefsky 
Co-developed-by: Vasily Gorbik 
Signed-off-by: Vasily Gorbik 
Acked-by: Masami Hiramatsu 
Signed-off-by: Josh Poimboeuf 
---
 arch/x86/include/asm/insn.h   |  33 +-
 arch/x86/lib/insn.c   | 101 +
 tools/arch/x86/include/asm/insn.h |  33 +-
 tools/arch/x86/lib/insn.c | 101 +
 4 files changed, 160 insertions(+), 108 deletions(-)

diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 5c1ae3e..004e27b 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -7,9 +7,12 @@
  * Copyright (C) IBM Corporation, 2009
  */
 
+#include 
 /* insn_attr_t is defined in inat.h */
 #include 
 
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : 
defined(__LITTLE_ENDIAN)
+
 struct insn_field {
union {
insn_value_t value;
@@ -20,6 +23,36 @@ struct insn_field {
unsigned char nbytes;
 };
 
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+   p->value = v;
+   p->nbytes = n;
+}
+
+#else
+
+struct insn_field {
+   insn_value_t value;
+   union {
+   insn_value_t little;
+   insn_byte_t bytes[4];
+   };
+   /* !0 if we've run insn_get_xxx() for this field */
+   unsigned char got;
+   unsigned char nbytes;
+};
+
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+ unsigned char n)
+{
+   p->value = v;
+   p->little = __cpu_to_le32(v);
+   p->nbytes = n;
+}
+
+#endif
+
 struct insn {
struct insn_field prefixes; /*
 * Prefixes
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 4042795..520b31f 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -5,6 +5,7 @@
  * Copyright (C) IBM Corporation, 2002, 2004, 2009
  */
 
+#include 
 #ifdef __KERNEL__
 #include 
 #else
@@ -15,15 +16,28 @@
 
 #include 
 
+#define leXX_to_cpu(t, r)  \
+({ \
+   __typeof__(t) v;\
+   switch (sizeof(t)) {\
+   case 4: v = le32_to_cpu(r); break;  \
+   case 2: v = le16_to_cpu(r); break;  \
+   case 1: v = r; break;   \
+   default:\
+   BUILD_BUG(); break; \
+   }   \
+   v;  \
+})
+
 /* Verify next sizeof(t) bytes can be on the same instruction */
 #define validate_next(t, insn, n)  \
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
 
 #define __get_next(t, insn)\
-   ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+   ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); 
leXX_to_cpu(t, r); })
 
 #define __peek_nbyte_next(t, insn, n)  \
-   ({ t r = *(t*)((insn)->next_byte + n); r; })
+   ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); })
 
 #define get_next(t, insn)  \
({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; 
__get_next(t, insn); })
@@ -157,8 +171,7 @@ found:
b = peek_next(insn_byte_t, insn);
attr = inat_get_opcode_attribute(b);
if (inat_is_rex_prefix(attr)) {
-   insn->rex_prefix.value = b;
-   insn->rex_prefix.nbytes = 1;
+   insn_field_set(>rex_prefix, b, 1);
insn->next_byte++;
if (X86_REX_W(b))
/* REX.W overrides opnd_size */
@@ -295,8 +308,7 @@ void insn_get_modrm(stru

Re: [PATCH] s390: mark __cpacf_check_opcode() and cpacf_query_func() as __always_inline

2019-05-17 Thread Martin Schwidefsky
On Fri, 17 May 2019 15:54:24 +0900
Masahiro Yamada  wrote:

> Commit e60fb8bf68d4 ("s390/cpacf: mark scpacf_query() as __always_inline")
> was not enough to make sure to meet the 'i' (immediate) constraint for the
> asm operands.
> 
> With CONFIG_OPTIMIZE_INLINING enabled, Laura Abbott reported error
> with gcc 9.1.1:
> 
>   In file included from arch/s390/crypto/prng.c:29:
>   ./arch/s390/include/asm/cpacf.h: In function 'cpacf_query_func':
>   ./arch/s390/include/asm/cpacf.h:170:2: warning: asm operand 3 probably 
> doesn't match constraints
> 170 |  asm volatile(
> |  ^~~
>   ./arch/s390/include/asm/cpacf.h:170:2: error: impossible constraint in 'asm'
> 
> Add more __always_inline to force inlining.
> 
> Fixes: 9012d011660e ("compiler: allow all arches to enable 
> CONFIG_OPTIMIZE_INLINING")
> Reported-by: Laura Abbott 
> Signed-off-by: Masahiro Yamada 

Added to our internal tree and I will add it to s390/linux soon. Thanks.

Do you have a Kconfig patch in the works to enable OPTIMIZE_INLINING?
Otherwise we could just add it.

> ---
> 
>  arch/s390/include/asm/cpacf.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
> index f316de40e51b..19459dfb4295 100644
> --- a/arch/s390/include/asm/cpacf.h
> +++ b/arch/s390/include/asm/cpacf.h
> @@ -177,7 +177,7 @@ static inline void __cpacf_query(unsigned int opcode, 
> cpacf_mask_t *mask)
>   : "cc");
>  }
> 
> -static inline int __cpacf_check_opcode(unsigned int opcode)
> +static __always_inline int __cpacf_check_opcode(unsigned int opcode)
>  {
>   switch (opcode) {
>   case CPACF_KMAC:
> @@ -217,7 +217,7 @@ static inline int cpacf_test_func(cpacf_mask_t *mask, 
> unsigned int func)
>   return (mask->bytes[func >> 3] & (0x80 >> (func & 7))) != 0;
>  }
> 
> -static inline int cpacf_query_func(unsigned int opcode, unsigned int func)
> +static __always_inline int cpacf_query_func(unsigned int opcode, unsigned 
> int func)
>  {
>   cpacf_mask_t mask;
> 


-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH] s390: add unreachable() to dump_fault_info() to fix -Wmaybe-uninitialized

2019-05-17 Thread Martin Schwidefsky
On Fri, 17 May 2019 15:49:22 +0900
Masahiro Yamada  wrote:

> When CONFIG_OPTIMIZE_INLINING is enabled for s390, I see this warning:
> 
> arch/s390/mm/fault.c:127:15: warning: 'asce' may be used uninitialized in 
> this function [-Wmaybe-uninitialized]
>   switch (asce & _ASCE_TYPE_MASK) {
> arch/s390/mm/fault.c:177:16: note: 'asce' was declared here
>   unsigned long asce;
> ^~~~
> 
> If get_fault_type() is not inlined, the compiler cannot deduce that
> all the possible paths in the 'switch' statement are covered.
> 
> Of course, we could mark get_fault_type() as __always_inline to get
> back the original behavior, but I do not think it sensible to force
> inlining just for the purpose of suppressing the warning. Since this
> is just a matter of warning, I want to keep as much room for compiler
> optimization as possible.
> 
> I added unreachable() to teach the compiler that the 'default' label
> is unreachable.
> 
> I got rid of the 'inline' marker. Even without the 'inline' hint,
> the compiler inlines functions based on its inlining heuristic.
> 
> Fixes: 9012d011660e ("compiler: allow all arches to enable 
> CONFIG_OPTIMIZE_INLINING")
> Signed-off-by: Masahiro Yamada 

Added to our internal tree and I will add it to s390/linux soon. Thanks.

> ---
> 
>  arch/s390/mm/fault.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
> index c220399ae196..91ce03fd0c84 100644
> --- a/arch/s390/mm/fault.c
> +++ b/arch/s390/mm/fault.c
> @@ -85,7 +85,7 @@ static inline int notify_page_fault(struct pt_regs *regs)
>   * Find out which address space caused the exception.
>   * Access register mode is impossible, ignore space == 3.
>   */
> -static inline enum fault_type get_fault_type(struct pt_regs *regs)
> +static enum fault_type get_fault_type(struct pt_regs *regs)
>  {
>   unsigned long trans_exc_code;
> 
> @@ -211,6 +211,8 @@ static void dump_fault_info(struct pt_regs *regs)
>   asce = S390_lowcore.kernel_asce;
>   pr_cont("kernel ");
>   break;
> + default:
> + unreachable();
>   }
>   pr_cont("ASCE.\n");
>   dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);


-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



[GIT PULL] s390 patches for the 5.2 merge window #2

2019-05-17 Thread Martin Schwidefsky
The following changes since commit 71ae5fc87c34ecbdca293c2a5c563d6be2576558:

  Merge tag 'linux-kselftest-5.2-rc1' of 
git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest (2019-05-06 
20:29:45 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux tags/s390-5.2-2

for you to fetch changes up to f3e20ad67b4c8365df9818fd3c8026e105d6b53a:

  s390: move arch/s390/defconfig to arch/s390/configs/defconfig (2019-05-14 
17:54:48 +0200)


s390 updates for the 5.2 merge window #2

 - Enhancements for the QDIO layer

 - Remove the RCP trace event

 - Avoid three build issues

 - Move the defconfig to the configs directory


Farhan Ali (1):
  s390/cio: Remove tracing for rchp instruction

Julian Wiedmann (3):
  s390/qdio: allow to scan all Output SBALs in one go
  s390/qdio: use get_buf_state() in debug_get_buf_state()
  s390/qdio: optimize state inspection of HW-owned SBALs

Martin Schwidefsky (1):
  s390/boot: fix compiler error due to missing awk strtonum

Masahiro Yamada (1):
  s390: move arch/s390/defconfig to arch/s390/configs/defconfig

Vasily Gorbik (2):
  latent_entropy: avoid build error when plugin cflags are not set
  s390/kasan: adapt disabled_wait usage to avoid build error

 arch/s390/Makefile  |  2 ++
 arch/s390/boot/Makefile |  1 -
 arch/s390/boot/compressed/vmlinux.lds.S |  2 ++
 arch/s390/{ => configs}/defconfig   |  0
 arch/s390/mm/kasan_init.c   |  2 +-
 drivers/s390/cio/qdio_main.c| 19 +++
 drivers/s390/cio/trace.c|  1 -
 drivers/s390/cio/trace.h| 23 ---
 include/linux/random.h  |  2 +-
 9 files changed, 17 insertions(+), 35 deletions(-)
 rename arch/s390/{ => configs}/defconfig (100%)

diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index df1d6a1..de8521f 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -10,6 +10,8 @@
 # Copyright (C) 1994 by Linus Torvalds
 #
 
+KBUILD_DEFCONFIG := defconfig
+
 LD_BFD := elf64-s390
 KBUILD_LDFLAGS := -m elf64_s390
 KBUILD_AFLAGS_MODULE += -fPIC
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index c51496b..7cba96e 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -58,7 +58,6 @@ define cmd_section_cmp
touch $@
 endef
 
-OBJCOPYFLAGS_bzImage := --pad-to $$(readelf -s $(obj)/compressed/vmlinux | awk 
'/\<_end\>/ {print or(strtonum("0x"$$2),4095)+1}')
 $(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data 
$(obj)/section_cmp.boot.preserved.data FORCE
$(call if_changed,objcopy)
 
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S 
b/arch/s390/boot/compressed/vmlinux.lds.S
index 112b8d9..635217e 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -77,6 +77,8 @@ SECTIONS
_compressed_start = .;
*(.vmlinux.bin.compressed)
_compressed_end = .;
+   FILL(0xff);
+   . = ALIGN(4096);
}
. = ALIGN(256);
.bss : {
diff --git a/arch/s390/defconfig b/arch/s390/configs/defconfig
similarity index 100%
rename from arch/s390/defconfig
rename to arch/s390/configs/defconfig
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 01892dc..0c1f257 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -28,7 +28,7 @@ static void __init kasan_early_panic(const char *reason)
 {
sclp_early_printk("The Linux kernel failed to boot with the 
KernelAddressSanitizer:\n");
sclp_early_printk(reason);
-   disabled_wait(0);
+   disabled_wait();
 }
 
 static void * __init kasan_early_alloc_segment(void)
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index cfce255..7b7620d 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -205,17 +205,22 @@ static inline int get_buf_states(struct qdio_q *q, 
unsigned int bufnr,
 int auto_ack, int merge_pending)
 {
unsigned char __state = 0;
-   int i;
+   int i = 1;
 
if (is_qebsm(q))
return qdio_do_eqbs(q, state, bufnr, count, auto_ack);
 
/* get initial state: */
__state = q->slsb.val[bufnr];
+
+   /* Bail out early if there is no work on the queue: */
+   if (__state & SLSB_OWNER_CU)
+   goto out;
+
if (merge_pending && __state == SLSB_P_OUTPUT_PENDING)
__state = SLSB_P_OUTPUT_EMPTY;
 
-   for (i = 1; i < count; i++) {
+   for (; i < count; i++) {
bufnr = next_buf(bufnr);
 
/* merge PENDING into EMPTY: */
@@ -228,6 +233,8

Re: [PATCH] s390: move arch/s390/defconfig to arch/s390/configs/defconfig

2019-05-15 Thread Martin Schwidefsky
On Mon, 13 May 2019 11:11:16 +0900
Masahiro Yamada  wrote:

> As of Linux 5.1, alpha and s390 are the last architectures that
> have defconfig in arch/*/ instead of arch/*/configs/.
> 
>   $ find arch -name defconfig | sort
>   arch/alpha/defconfig
>   arch/arm64/configs/defconfig
>   arch/csky/configs/defconfig
>   arch/nds32/configs/defconfig
>   arch/riscv/configs/defconfig
>   arch/s390/defconfig
> 
> The arch/$(ARCH)/defconfig is the hard-coded default in Kconfig,
> and I want to deprecate it after evacuating the remaining defconfig
> into the standard location, arch/*/configs/.
> 
> Define KBUILD_DEFCONFIG like other architectures, and move defconfig
> into the configs/ subdirectory.
> 
> Signed-off-by: Masahiro Yamada 

I have added this patch to my linux/s390 tree and will push it upstream
soon. Thanks!

> ---
> 
>  arch/s390/Makefile| 2 ++
>  arch/s390/{ => configs}/defconfig | 0
>  2 files changed, 2 insertions(+)
>  rename arch/s390/{ => configs}/defconfig (100%)
> 
> diff --git a/arch/s390/Makefile b/arch/s390/Makefile
> index df1d6a150f30..de8521fc9de5 100644
> --- a/arch/s390/Makefile
> +++ b/arch/s390/Makefile
> @@ -10,6 +10,8 @@
>  # Copyright (C) 1994 by Linus Torvalds
>  #
> 
> +KBUILD_DEFCONFIG := defconfig
> +
>  LD_BFD   := elf64-s390
>  KBUILD_LDFLAGS   := -m elf64_s390
>  KBUILD_AFLAGS_MODULE += -fPIC
> diff --git a/arch/s390/defconfig b/arch/s390/configs/defconfig
> similarity index 100%
> rename from arch/s390/defconfig
> rename to arch/s390/configs/defconfig


-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH] latent_entropy: avoid build error when plugin cflags are not set

2019-05-08 Thread Martin Schwidefsky
On Tue, 7 May 2019 09:16:29 -0700
Kees Cook  wrote:

> On Tue, May 7, 2019 at 7:28 AM Vasily Gorbik  wrote:
> > Some architectures set up CFLAGS for linux decompressor phase from
> > scratch and do not include GCC_PLUGINS_CFLAGS. Since "latent_entropy"
> > variable declaration is generated by the plugin code itself including
> > linux/random.h in decompressor code then would cause a build
> > error. E.g. on s390:
> >
> > In file included from ./include/linux/net.h:22,
> >  from ./include/linux/skbuff.h:29,
> >  from ./include/linux/if_ether.h:23,
> >  from ./arch/s390/include/asm/diag.h:12,
> >  from arch/s390/boot/startup.c:8:
> > ./include/linux/random.h: In function 'add_latent_entropy':
> > ./include/linux/random.h:26:39: error: 'latent_entropy' undeclared
> > (first use in this function); did you mean 'add_latent_entropy'?
> >26 |  add_device_randomness((const void *)_entropy,
> >   |   ^~
> >   |   add_latent_entropy
> > ./include/linux/random.h:26:39: note: each undeclared identifier is
> > reported only once for each function it appears in
> >
> > The build error is triggered by commit a80313ff91ab ("s390/kernel:
> > introduce .dma sections") which made it into 5.2 merge window.
> >
> > To address that avoid using CONFIG_GCC_PLUGIN_LATENT_ENTROPY in
> > favour of LATENT_ENTROPY_PLUGIN definition which is defined as a
> > part of gcc plugins cflags and hence reflect more accurately when gcc
> > plugin is active. Besides that it is also used for similar purpose in
> > linux/compiler-gcc.h for latent_entropy attribute definition.
> >
> > Signed-off-by: Vasily Gorbik   
> 
> Thanks for fixing this! Do you want to take it via the s390 tree?
> 
> Acked-by: Kees Cook 

Sure, I can take it via the s390 tree.
 
> > ---
> >  include/linux/random.h | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/include/linux/random.h b/include/linux/random.h
> > index 445a0ea4ff49..d4eb9b3789ad 100644
> > --- a/include/linux/random.h
> > +++ b/include/linux/random.h
> > @@ -20,7 +20,7 @@ struct random_ready_callback {
> >
> >  extern void add_device_randomness(const void *, unsigned int);
> >
> > -#if defined(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) && !defined(__CHECKER__)
> > +#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
> >  static inline void add_latent_entropy(void)
> >  {
> > add_device_randomness((const void *)_entropy,
> > --
> > 2.18.0.13.gd42ae10
> >  
> 
> 


-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



[GIT PULL] s390 patches for the 5.2 merge window

2019-05-06 Thread Martin Schwidefsky
The following changes since commit 2cc9637ce825f3a9f51f8f78af7474e9e85bfa5f:

  s390/dasd: Fix capacity calculation for large volumes (2019-03-29 07:23:44 
+0100)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux s390-5.2-1

for you to fetch changes up to ce968f6012f632bbe071839d229db77c45fc38d1:

  s390/vdso: drop unnecessary cc-ldoption (2019-05-03 17:17:58 +0200)


s390 updates for the 5.2 merge window

 - Support for kernel address space layout randomization

 - Add support for kernel image signature verification

 - Convert s390 to the generic get_user_pages_fast code

 - Convert s390 to the stack unwind API analog to x86

 - Add support for CPU directed interrupts for PCI devices

 - Provide support for MIO instructions to the PCI base layer, this
   will allow the use of direct PCI mappings in user space code

 - Add the basic KVM guest ultravisor interface for protected VMs

 - Add AT_HWCAP bits for several new hardware capabilities

 - Update the CPU measurement facility counter definitions to SVN 6

 - Arnds cleanup patches for his quest to get LLVM compiles working

 - A vfio-ccw update with bug fixes and support for halt and clear

 - Improvements for the hardware TRNG code

 - Another round of cleanup for the QDIO layer

 - Numerous cleanups and bug fixes


Arnd Bergmann (12):
  s390: cio: fix cio_irb declaration
  s390: remove -fno-strength-reduce flag
  s390: don't build vdso32 with clang
  s390: syscall_wrapper: avoid clang warning
  s390: make __load_psw_mask work with clang
  s390: make chkbss work with clang
  s390: avoid __builtin_return_address(n) on clang
  s390: zcrypt: initialize variables before_use
  s390: only build for new CPUs with clang
  s390: boot, purgatory: pass $(CLANG_FLAGS) where needed
  s390: drop CONFIG_VIRT_TO_BUS
  s390: fix clang -Wpointer-sign warnigns in boot code

Cornelia Huck (6):
  vfio-ccw: make it safe to access channel programs
  vfio-ccw: rework ssch state handling
  vfio-ccw: protect the I/O region
  vfio-ccw: add capabilities chain
  s390/cio: export hsch to modules
  vfio-ccw: add handling for async channel instructions

Farhan Ali (3):
  vfio-ccw: Do not call flush_workqueue while holding the spinlock
  vfio-ccw: Release any channel program when releasing/removing vfio-ccw 
mdev
  vfio-ccw: Prevent quiesce function going into an infinite loop

Gerald Schaefer (8):
  s390: introduce .boot.preserved.data section
  s390/kernel: build a relocatable kernel
  s390/kernel: convert SYSCALL and PGM_CHECK handlers to .quad
  s390/kprobes: use static buffer for insn_page
  s390/sclp: do not use static sccbs
  s390/kernel: introduce .dma sections
  s390/kernel: add support for kernel address space layout randomization 
(KASLR)
  locking/lockdep: check for freed initmem in static_obj()

Harald Freudenberger (2):
  s390/crypto: rework generate_entropy function for pseudo random dd
  s390/crypto: use TRNG for seeding/reseeding

Joe Perches (1):
  s390: Convert IS_ENABLED uses to __is_defined

Julian Wiedmann (8):
  s390/qdio: clean up pci_out_supported()
  s390/qdio: clean up qdio_check_outbound_after_thinint()
  s390/qdio: fix output of DSCI value in debug file
  s390/qdio: pass up count of ready-to-process SBALs
  s390/qdio: simplify SBAL range calculation
  s390/qdio: eliminate queue's last_move cursor
  s390/qdio: limit direct access to first_to_check cursor
  s390/qdio: consolidate index tracking for queue scan

Martin Schwidefsky (21):
  s390/rseq: use trap4 for RSEQ_SIG
  s390: fine-tune stack switch helper
  s390/mm: make the pxd_offset functions more robust
  s390/mm: convert to the generic get_user_pages_fast code
  s390/mm: fix pxd_bad with folded page tables
  Merge tag 'vfio-ccw-20190425' of 
https://git.kernel.org/.../kvms390/vfio-ccw into features
  s390: report new CPU capabilities
  s390/ipl: make ipl_info less confusing
  s390/ipl: provide uapi header for list directed IPL
  s390/ipl: add definitions for the IPL report block
  s390/ipl: read IPL report at early boot
  s390/ipl: add helper functions to create an IPL report
  s390/boot: pad bzImage to 4K
  s390: add missing ENDPROC statements to assembler functions
  s390/nospec: rename assembler generated expoline thunks
  s390: use proper expoline sections for .dma code
  s390/bug: add entry size to the __bug_table section
  s390/opcodes: add missing instructions to the disassembler
  s390/unwind: introduce stack unwind API
  s390/ftrace: use HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
  s390: simplify disabled_wait

Nick Desaulniers (1):
  s390/vdso: drop unnecessary cc-ldoption

Philipp

Re: Linux 5.1-rc5

2019-05-02 Thread Martin Schwidefsky
On Thu, 2 May 2019 16:31:10 +0200
Greg KH  wrote:

> On Thu, May 02, 2019 at 04:17:58PM +0200, Martin Schwidefsky wrote:
> > On Thu, 2 May 2019 14:21:28 +0200
> > Greg KH  wrote:
> >   
> > > On Mon, Apr 15, 2019 at 09:17:10AM -0700, Linus Torvalds wrote:  
> > > > On Sun, Apr 14, 2019 at 10:19 PM Christoph Hellwig  
> > > > wrote:
> > > > >
> > > > > Can we please have the page refcount overflow fixes out on the list
> > > > > for review, even if it is after the fact?
> > > > 
> > > > They were actually on a list for review long before the fact, but it
> > > > was the security mailing list. The issue actually got discussed back
> > > > in January along with early versions of the patches, but then we
> > > > dropped the ball because it just wasn't on anybody's radar and it got
> > > > resurrected late March. Willy wrote a rather bigger patch-series, and
> > > > review of that is what then resulted in those commits. So they may
> > > > look recent, but that's just because the original patches got
> > > > seriously edited down and rewritten.
> > > > 
> > > > That said, powerpc and s390 should at least look at maybe adding a
> > > > check for the page ref in their gup paths too. Powerpc has the special
> > > > gup_hugepte() case, and s390 has its own version of gup entirely. I
> > > > was actually hoping the s390 guys would look at using the generic gup
> > > > code.
> > > > 
> > > > I ruthlessly also entirely ignored MIPS, SH and sparc, since they seem
> > > > largely irrelevant, partly since even theoretically this whole issue
> > > > needs a _lot_ of memory.
> > > > 
> > > > Michael, Martin, see commit 6b3a70773630 ("Merge branch 'page-refs'
> > > > (page ref overflow)"). You may or may not really care.
> > > 
> > > I've now queued these patches up for the next round of stable releases,
> > > as some people seem to care about these.
> > > 
> > > I didn't see any follow-on patches for s390 or ppc64 hit the tree for
> > > these changes, am I just missing them and should also queue up a few
> > > more to handle this issue on those platforms?  
> > 
> > I fixed that with a different approach. The following two patches are
> > queued for the next merge window:
> > 
> > d1874a0c2805 "s390/mm: make the pxd_offset functions more robust"
> > 1a42010cdc26 "s390/mm: convert to the generic get_user_pages_fast code"
> > 
> > With these two s390 now uses the generic gup code in mm/gup.c  
> 
> Nice!  Do you want me to queue those up for the stable backports once
> they hit a public -rc release?

Yes please!

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



[GIT PULL] s390 patches for 5.1-rc6

2019-04-18 Thread Martin Schwidefsky
The following changes since commit 9936328b41ce4bce8f20269dcac8cb476c8d0820:

  Merge tag 'pci-v5.1-fixes-1' of 
git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci (2019-03-28 13:29:09 
-0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux tags/s390-5.1-3

for you to fetch changes up to 35af0d469c6694c05f06e75c5d75caee9be66122:

  s390: correct some inline assembly constraints (2019-04-17 10:40:57 +0200)


s390 update with bug fixes for 5.1-rc6

 - Fix overwrite of the initial ramdisk due to misuse of IS_ENABLED

 - Fix integer overflow in the dasd driver resulting in incorrect number
   of blocks for large devices

 - Fix a lockdep false positive in the 3270 driver

 - Fix a deadlock in the zcrypt driver

 - Fix incorrect debug feature entries in the pkey api

 - Fix inline assembly constraints fallout with CONFIG_KASAN=y


Harald Freudenberger (2):
  s390/zcrypt: fix possible deadlock situation on ap queue remove
  s390/pkey: add one more argument space for debug feature entry

Joe Perches (1):
  s390/mem_detect: Use IS_ENABLED(CONFIG_BLK_DEV_INITRD)

Martin Schwidefsky (1):
  s390/3270: fix lockdep false positive on view->lock

Peter Oberparleiter (1):
  s390/dasd: Fix capacity calculation for large volumes

Vasily Gorbik (1):
  s390: correct some inline assembly constraints

 arch/s390/boot/mem_detect.c| 2 +-
 arch/s390/kernel/fpu.c | 2 +-
 arch/s390/kernel/vtime.c   | 8 
 drivers/s390/block/dasd_eckd.c | 6 +++---
 drivers/s390/char/con3270.c| 2 +-
 drivers/s390/char/fs3270.c | 3 ++-
 drivers/s390/char/raw3270.c| 3 ++-
 drivers/s390/char/raw3270.h| 4 +++-
 drivers/s390/char/tty3270.c| 3 ++-
 drivers/s390/crypto/ap_queue.c | 2 +-
 drivers/s390/crypto/pkey_api.c | 3 ++-
 11 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
index 4cb771b..5d316fe 100644
--- a/arch/s390/boot/mem_detect.c
+++ b/arch/s390/boot/mem_detect.c
@@ -25,7 +25,7 @@ static void *mem_detect_alloc_extended(void)
 {
unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64));
 
-   if (IS_ENABLED(BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
+   if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
INITRD_START < offset + ENTRIES_EXTENDED_MAX)
offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64));
 
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 594464f..0da378e 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -23,7 +23,7 @@ void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
 
if (flags & KERNEL_FPC)
/* Save floating point control */
-   asm volatile("stfpc %0" : "=m" (state->fpc));
+   asm volatile("stfpc %0" : "=Q" (state->fpc));
 
if (!MACHINE_HAS_VX) {
if (flags & KERNEL_VXR_V0V7) {
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index a69a091..c475ca4 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -37,7 +37,7 @@ static inline u64 get_vtimer(void)
 {
u64 timer;
 
-   asm volatile("stpt %0" : "=m" (timer));
+   asm volatile("stpt %0" : "=Q" (timer));
return timer;
 }
 
@@ -48,7 +48,7 @@ static inline void set_vtimer(u64 expires)
asm volatile(
"   stpt%0\n"   /* Store current cpu timer value */
"   spt %1" /* Set new value imm. afterwards */
-   : "=m" (timer) : "m" (expires));
+   : "=Q" (timer) : "Q" (expires));
S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
S390_lowcore.last_update_timer = expires;
 }
@@ -135,8 +135,8 @@ static int do_account_vtime(struct task_struct *tsk)
 #else
"   stck%1" /* Store current tod clock value */
 #endif
-   : "=m" (S390_lowcore.last_update_timer),
- "=m" (S390_lowcore.last_update_clock));
+   : "=Q" (S390_lowcore.last_update_timer),
+ "=Q" (S390_lowcore.last_update_clock));
clock = S390_lowcore.last_update_clock - clock;
timer -= S390_lowcore.last_update_timer;
 
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 6e294b4..f89f9d0 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2004,14 +2004,14 @@ static int dasd_eckd_end_analysis(struct dasd_block 
*block)
blk_per_trk 

Re: [PATCH 2/2] s390: boot, purgatory: pass $(CLANG_FLAGS) where needed

2019-04-15 Thread Martin Schwidefsky
On Thu, 11 Apr 2019 11:08:31 -0700
Nick Desaulniers  wrote:

> On Thu, Apr 11, 2019 at 1:52 AM Arnd Bergmann  wrote:
> >
> > On Thu, Apr 11, 2019 at 12:14 AM 'Nick Desaulniers' via Clang Built
> > Linux  wrote:  
> > > On Wed, Apr 10, 2019 at 1:13 PM Arnd Bergmann  wrote:  
> > > >
> > > > The purgatory and boot Makefiles do not inherit the original cflags,
> > > > so clang falls back to the default target architecture when building it,
> > > > typically this would be x86 when cross-compiling.
> > > >
> > > > Add $(CLANG_FLAGS) everywhere so we pass the correct 
> > > > --target=s390x-linux
> > > > option when cross-compiling.
> > > >
> > > > Signed-off-by: Arnd Bergmann 
> > > > ---
> > > >  arch/s390/Makefile   | 5 +++--
> > > >  arch/s390/purgatory/Makefile | 1 +
> > > >  2 files changed, 4 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/arch/s390/Makefile b/arch/s390/Makefile
> > > > index 9c079a506325..443990791099 100644
> > > > --- a/arch/s390/Makefile
> > > > +++ b/arch/s390/Makefile
> > > > @@ -17,12 +17,13 @@ KBUILD_CFLAGS_MODULE += -fPIC
> > > >  KBUILD_AFLAGS  += -m64
> > > >  KBUILD_CFLAGS  += -m64
> > > >  aflags_dwarf   := -Wa,-gdwarf-2
> > > > -KBUILD_AFLAGS_DECOMPRESSOR := -m64 -D__ASSEMBLY__
> > > > +KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
> > > >  KBUILD_AFLAGS_DECOMPRESSOR += $(if 
> > > > $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
> > > > -KBUILD_CFLAGS_DECOMPRESSOR := -m64 -O2
> > > > +KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2
> > > >  KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
> > > >  KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks 
> > > > -msoft-float
> > > >  KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables  
> > >
> > > Thanks for the respin with Nathan's suggestion.
> > >  
> > > > +KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning,pointer-sign)  
> > >
> > > What's up with this ^ ?  Seems like the top level sets it (without
> > > cc-disable-warning :( ), but then KBUILD_CFLAGS_DECOMPRESSOR discards
> > > it.  Does Clang actually flag code in this arch (that GCC doesn't)?  
> >
> > Oops, that should have been a separate patch.
> >
> > I think what happens is that clang warns more aggressively about pointer 
> > sign
> > bugs than gcc in some cases, and some of those cases happen in s390
> > header files that are included by both the kernel and the decompressor.
> >
> > The full warning log without this change is rather long, see
> > https://pastebin.com/KG9xaTNB  
> 
> From this link, it looks like the definitions of:
> __atomic64_or
> __atomic64_and
> __atomic64_xor
> and their *_barrier variants are problematic.  I think converting
> those to use unsigned long is the way to go.  Shouldn't you be doing
> bitwise ops on unsigned types anyways?

These functions follow the type of atomic64_t which is a "long" wrapped
in a structure. We do not want to change that to unsigned long, are we?
Then having some of the functions operate on "long" and others on
"unsigned long" seem odd.

> The warnings with __atomic64_add are tougher to read/understand since
> at that point the log lines look like they start to mix together.
> 
> >
> > I also tried patching the code to avoid the warnings, but I'm not entirely
> > happy with that result either, see
> > https://pastebin.com/pSMz5eZA  
> 
> That's no terrible, IMO, particularly with the change I suggest above.

That is not too bad, the only change I do not like is the s/u8/char/ in
struct ipl_block_fcp.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 05/12] s390: zcrypt: initialize variables before_use

2019-04-11 Thread Martin Schwidefsky
On Wed, 10 Apr 2019 20:57:21 +0200
Arnd Bergmann  wrote:

> On Wed, Apr 10, 2019 at 5:59 PM Martin Schwidefsky
>  wrote:
> > On Tue, 9 Apr 2019 11:54:30 +0200 Harald Freudenberger 
> >  wrote:  
> > > On 08.04.19 23:26, Arnd Bergmann wrote:  
> > > > }  
> > > Thanks Arnd, but as Nathan already wrote, I'd prefer to have the
> > > variable initialized with 0 instead of -1.
> > > If you agree with this, I'll rewrite the patch and apply it to our
> > > internal git and it will appear at kernel org with the next s390 code 
> > > merge then.  
> >
> > Do we agreement on func_coed=0 for this one ?  
> 
> Yes, I think that was the consensus.
> 
>Arnd
> 

Ok, committed with func_code=0.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 10/12] s390: avoid __builtin_return_address(n) on clang

2019-04-11 Thread Martin Schwidefsky
On Wed, 10 Apr 2019 21:07:56 +0200
Arnd Bergmann  wrote:

> On Wed, Apr 10, 2019 at 6:14 PM Steven Rostedt  wrote:
> > On Wed, 10 Apr 2019 18:03:57 +0200 Martin Schwidefsky 
> >  wrote:
> >  
> > > > --- a/arch/s390/include/asm/ftrace.h
> > > > +++ b/arch/s390/include/asm/ftrace.h
> > > > @@ -13,7 +13,12 @@
> > > >
> > > >  #ifndef __ASSEMBLY__
> > > >
> > > > +#ifdef CONFIG_CC_IS_CLANG
> > > > +/* https://bugs.llvm.org/show_bug.cgi?id=41424 */
> > > > +#define ftrace_return_address(n) __builtin_return_address(0)
> > > > +#else
> > > >  #define ftrace_return_address(n) __builtin_return_address(n)
> > > > +#endif
> > > >
> > > >  void _mcount(void);
> > > >  void ftrace_caller(void);  
> > >
> > > I can say I like this one. If the compiler can not do 
> > > __builtin_return_address(n)
> > > it feels wrong to just use __builtin_return_address(0).  
> >
> > I agree. The proper return value is 0UL, see include/linux/ftrace.h
> >
> > /* Archs may use other ways for ADDR1 and beyond */
> > #ifndef ftrace_return_address
> > # ifdef CONFIG_FRAME_POINTER
> > #  define ftrace_return_address(n) __builtin_return_address(n)
> > # else
> > #  define ftrace_return_address(n) 0UL
> > # endif
> > #endif
> >
> > This is why we treat zero differently:
> >
> > #define CALLER_ADDR0 ((unsigned long)ftrace_return_address0)
> > #define CALLER_ADDR1 ((unsigned long)ftrace_return_address(1))
> > #define CALLER_ADDR2 ((unsigned long)ftrace_return_address(2))
> > #define CALLER_ADDR3 ((unsigned long)ftrace_return_address(3))
> > #define CALLER_ADDR4 ((unsigned long)ftrace_return_address(4))
> > #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
> > #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))  
> 
> Right, got it.
> 
> Martin, do you want me to send a replacement patch, or can you
> commit the patch with
> 
> #ifdef CONFIG_CC_IS_CLANG
> /* https://bugs.llvm.org/show_bug.cgi?id=41424 */
> #define ftrace_return_address(n) 0UL
> #else
> #define ftrace_return_address(n) __builtin_return_address(n)
> #endif
> 
> instead?

Ok, done.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: rseq/s390: choosing code signature

2019-04-10 Thread Martin Schwidefsky
On Wed, 10 Apr 2019 11:57:36 -0400 (EDT)
Mathieu Desnoyers  wrote:

> - On Apr 10, 2019, at 11:52 AM, schwidefsky schwidef...@de.ibm.com wrote:
> 
> > On Wed, 10 Apr 2019 11:50:39 -0400 (EDT)
> > Mathieu Desnoyers  wrote:
> >   
> >> - On Apr 10, 2019, at 6:32 AM, schwidefsky schwidef...@de.ibm.com 
> >> wrote:
> >>   
> >> > On Tue, 9 Apr 2019 15:32:22 -0400 (EDT)
> >> > Mathieu Desnoyers  wrote:
> >> > 
> >> >> Hi,
> >> >> 
> >> >> We are about to include the code signature required prior to restartable
> >> >> sequences abort handlers into glibc, which will make this ABI choice 
> >> >> final.
> >> >> We need architecture maintainer input on that signature value.
> >> >> 
> >> >> That code signature is placed before each abort handler, so the kernel 
> >> >> can
> >> >> validate that it is indeed jumping to an abort handler (and not some
> >> >> arbitrary attacker-chosen code). The signature is never executed.
> >> >> 
> >> >> The current discussion thread on the glibc mailing list leads us towards
> >> >> using a trap with uncommon immediate operand, which simplifies 
> >> >> integration
> >> >> with disassemblers, emulators, makes it easier to debug if the control
> >> >> flow gets redirected there by mistake, and is nicer for some 
> >> >> architecture's
> >> >> speculative execution.
> >> >> 
> >> >> We can have different signatures for each sub-architecture, as long as 
> >> >> they
> >> >> don't have to co-exist within the same process. We can special-case with
> >> >> #ifdef for each sub-architecture and endianness if need be. If the 
> >> >> architecture
> >> >> has instruction set extensions that can co-exist with the architecture
> >> >> instruction set within the same process, we need to take into account 
> >> >> to which
> >> >> instruction the chosen signature value would map (and possibly decide 
> >> >> if we
> >> >> need to extend rseq to support many signatures).
> >> >> 
> >> >> Here is an example of rseq signature definition template:
> >> >> 
> >> >> /*
> >> >>  * TODO: document trap instruction objdump output on each 
> >> >> sub-architecture
> >> >>  * instruction sets, as well as instruction set extensions.
> >> >>  */
> >> >> #define RSEQ_SIG 0x
> >> >> 
> >> >> Ideally we'd need a patch on top of the Linux kernel
> >> >> tools/testing/selftests/rseq/rseq-s390.h file that updates
> >> >> the signature value, so I can then pick it up for the glibc
> >> >> patchset.  
> >> > 
> >> > The trap4 instruction is a suitable one. The patch would look like this  
> >> 
> >> Great! I'm picking it up into my rseq tree if that's OK with you.  
> > 
> > Just added the patch to s390/linux:features for the next merge window as 
> > well.  
> 
> Sounds good! I'll carry it in my tree to have a comprehensive up-to-date list 
> of
> rseq signatures for all architectures in a single tree. Worse-case the exact 
> same
> change will be pulled from both architecture and rseq trees, which I don't 
> think
> should be an issue, right ?

Should be fine, the worst that can happen is a minor merge conflict.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 10/12] s390: avoid __builtin_return_address(n) on clang

2019-04-10 Thread Martin Schwidefsky
On Mon,  8 Apr 2019 23:26:23 +0200
Arnd Bergmann  wrote:

> llvm on s390 has problems with __builtin_return_address(n), with n>0,
> this results in a somewhat cryptic error message:
> 
> fatal error: error in backend: Unsupported stack frame traversal count
> 
> To work around it, use the direct return address directly. This
> is probably not ideal here, but gets things to compile and should
> only lead to inferior reporting, not to misbehavior of the generated
> code.
> 
> Link: https://bugs.llvm.org/show_bug.cgi?id=41424
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/s390/include/asm/ftrace.h | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
> index 5a3c95b11952..7923c63946fb 100644
> --- a/arch/s390/include/asm/ftrace.h
> +++ b/arch/s390/include/asm/ftrace.h
> @@ -13,7 +13,12 @@
> 
>  #ifndef __ASSEMBLY__
> 
> +#ifdef CONFIG_CC_IS_CLANG
> +/* https://bugs.llvm.org/show_bug.cgi?id=41424 */
> +#define ftrace_return_address(n) __builtin_return_address(0)
> +#else
>  #define ftrace_return_address(n) __builtin_return_address(n)
> +#endif
> 
>  void _mcount(void);
>  void ftrace_caller(void);

I can say I like this one. If the compiler can not do 
__builtin_return_address(n)
it feels wrong to just use __builtin_return_address(0).

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 11/12] s390: make chkbss work with clang

2019-04-10 Thread Martin Schwidefsky
On Mon,  8 Apr 2019 23:26:24 +0200
Arnd Bergmann  wrote:

> llvm skips an empty .bss section entirely, which makes
> the check fail with an unexpected error:
> 
> /tmp/binutils-multi-test/bin/s390x-linux-gnu-objdump: section '.bss' 
> mentioned in a -j option, but not found in any input file
> error: arch/s390/boot/compressed/decompressor.o .bss section is not empty
> ../arch/s390/scripts/Makefile.chkbss:20: recipe for target 
> 'arch/s390/boot/compressed/decompressor.o.chkbss' failed
> 
> Change the check so we first see if a .bss section exists
> before trying to read its size.
> 
> Signed-off-by: Arnd Bergmann 

Added to s390/linux:features for the next merge window. Thanks.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 08/12] s390: syscall_wrapper: avoid clang warning

2019-04-10 Thread Martin Schwidefsky
On Mon,  8 Apr 2019 23:26:21 +0200
Arnd Bergmann  wrote:

> Building system calls with clang results in a warning
> about an alias from a global function to a static one:
> 
> ../fs/namei.c:3847:1: warning: unused function '__se_sys_mkdirat' 
> [-Wunused-function]
> SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, 
> mode)
> ^
> ../include/linux/syscalls.h:219:36: note: expanded from macro 
> 'SYSCALL_DEFINE3'
>  #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
>^
> ../include/linux/syscalls.h:228:2: note: expanded from macro 'SYSCALL_DEFINEx'
> __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
> ^
> ../arch/s390/include/asm/syscall_wrapper.h:126:18: note: expanded from macro 
> '__SYSCALL_DEFINEx'
> asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))
>   \
> ^
> :31:1: note: expanded from here
> __se_sys_mkdirat
> ^
> 
> The only reference to the static __se_sys_mkdirat() here is the alias, but
> this only gets evaluated later. Making this function global as well avoids
> the warning.
> 
> Signed-off-by: Arnd Bergmann 

Added to s390/linux:features for the next merge window. Thanks.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 09/12] s390: make __load_psw_mask work with clang

2019-04-10 Thread Martin Schwidefsky
On Mon,  8 Apr 2019 23:26:22 +0200
Arnd Bergmann  wrote:

> clang fails to use the %O and %R inline assembly modifiers
> the same way as gcc, leading to build failures with every use
> of __load_psw_mask():
> 
> /tmp/nmi-4a9f80.s: Assembler messages:
> /tmp/nmi-4a9f80.s:571: Error: junk at end of line: `+8(160(%r11))'
> /tmp/nmi-4a9f80.s:626: Error: junk at end of line: `+8(160(%r11))'
> 
> Replace these with a more conventional way of passing the addresses
> that should work with both clang and gcc.
> 
> Signed-off-by: Arnd Bergmann 

Added to s390/linux:features for the next merge window. Thanks.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 06/12] s390: ctcm: fix ctcm_new_device error return code

2019-04-10 Thread Martin Schwidefsky
On Mon,  8 Apr 2019 23:26:19 +0200
Arnd Bergmann  wrote:

> clang points out that the return code from this function is
> undefined for one of the error paths:
> 
> ../drivers/s390/net/ctcm_main.c:1595:7: warning: variable 'result' is used 
> uninitialized whenever 'if' condition is true
>   [-Wsometimes-uninitialized]
> if (priv->channel[direction] == NULL) {
> ^~~~
> ../drivers/s390/net/ctcm_main.c:1638:9: note: uninitialized use occurs here
> return result;
>^~
> ../drivers/s390/net/ctcm_main.c:1595:3: note: remove the 'if' if its 
> condition is always false
> if (priv->channel[direction] == NULL) {
> ^~~
> ../drivers/s390/net/ctcm_main.c:1539:12: note: initialize the variable 
> 'result' to silence this warning
> int result;
>   ^
> 
> Make it return -ENODEV here, as in the related failure cases.
> gcc has a known bug in underreporting some of these warnings
> when it has already eliminated the assignment of the return code
> based on some earlier optimization step.
> 
> Signed-off-by: Arnd Bergmann 

Added to our internal tree for Julian to pick up.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 05/12] s390: zcrypt: initialize variables before_use

2019-04-10 Thread Martin Schwidefsky
On Tue, 9 Apr 2019 11:54:30 +0200
Harald Freudenberger  wrote:

> On 08.04.19 23:26, Arnd Bergmann wrote:
> > The 'func_code' variable gets printed in debug statements without
> > a prior initialization in multiple functions, as reported when building
> > with clang:
> >
> > drivers/s390/crypto/zcrypt_api.c:659:6: warning: variable 'func_code' is 
> > used uninitialized whenever 'if' condition is true
> >   [-Wsometimes-uninitialized]
> > if (mex->outputdatalength < mex->inputdatalength) {
> > ^~~~
> > drivers/s390/crypto/zcrypt_api.c:725:29: note: uninitialized use occurs here
> > trace_s390_zcrypt_rep(mex, func_code, rc,
> >^
> > drivers/s390/crypto/zcrypt_api.c:659:2: note: remove the 'if' if its 
> > condition is always false
> > if (mex->outputdatalength < mex->inputdatalength) {
> > ^~~
> > drivers/s390/crypto/zcrypt_api.c:654:24: note: initialize the variable 
> > 'func_code' to silence this warning
> > unsigned int func_code;
> >   ^
> >
> > Add initializations to all affected code paths to shut up the warning
> > and make the warning output consistent.
> >
> > Signed-off-by: Arnd Bergmann 
> > ---
> >  drivers/s390/crypto/zcrypt_api.c | 4 
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/drivers/s390/crypto/zcrypt_api.c 
> > b/drivers/s390/crypto/zcrypt_api.c
> > index eb93c2d27d0a..23472063d9a8 100644
> > --- a/drivers/s390/crypto/zcrypt_api.c
> > +++ b/drivers/s390/crypto/zcrypt_api.c
> > @@ -657,6 +657,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms,
> > trace_s390_zcrypt_req(mex, TP_ICARSAMODEXPO);
> >  
> > if (mex->outputdatalength < mex->inputdatalength) {
> > +   func_code = -1;
> > rc = -EINVAL;
> > goto out;
> > }
> > @@ -739,6 +740,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms,
> > trace_s390_zcrypt_req(crt, TP_ICARSACRT);
> >  
> > if (crt->outputdatalength < crt->inputdatalength) {
> > +   func_code = -1;
> > rc = -EINVAL;
> > goto out;
> > }
> > @@ -946,6 +948,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms 
> > *perms,
> >  
> > targets = kcalloc(target_num, sizeof(*targets), GFP_KERNEL);
> > if (!targets) {
> > +   func_code = -1;
> > rc = -ENOMEM;
> > goto out;
> > }
> > @@ -953,6 +956,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms 
> > *perms,
> > uptr = (struct ep11_target_dev __force __user *) xcrb->targets;
> > if (copy_from_user(targets, uptr,
> >target_num * sizeof(*targets))) {
> > +   func_code = -1;
> > rc = -EFAULT;
> > goto out_free;
> > }  
> Thanks Arnd, but as Nathan already wrote, I'd prefer to have the
> variable initialized with 0 instead of -1.
> If you agree with this, I'll rewrite the patch and apply it to our
> internal git and it will appear at kernel org with the next s390 code merge 
> then.

Do we agreement on func_coed=0 for this one ?

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 04/12] s390: qeth: address type mismatch warning

2019-04-10 Thread Martin Schwidefsky
On Mon,  8 Apr 2019 23:26:17 +0200
Arnd Bergmann  wrote:

> clang produces a harmless warning for each use for the qeth_adp_supported
> macro:
> 
> drivers/s390/net/qeth_l2_main.c:559:31: warning: implicit conversion from 
> enumeration type 'enum qeth_ipa_setadp_cmd' to
>   different enumeration type 'enum qeth_ipa_funcs' [-Wenum-conversion]
> if (qeth_adp_supported(card, IPA_SETADP_SET_PROMISC_MODE))
> ~^~~~
> drivers/s390/net/qeth_core.h:179:41: note: expanded from macro 
> 'qeth_adp_supported'
> qeth_is_ipa_supported(>options.adp, f)
> ~  ^
> 
> Add a version of this macro that uses the correct types, and
> remove the unused qeth_adp_enabled() macro that has the same
> problem.
> 
> Signed-off-by: Arnd Bergmann 

I have added this to our internal tree for Julian to pick up.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 02/12] s390: don't build vdso32 with clang

2019-04-10 Thread Martin Schwidefsky
On Mon,  8 Apr 2019 23:26:15 +0200
Arnd Bergmann  wrote:

> clang does not support 31 bit object files on s390, so skip
> the 32-bit vdso here, and only build it when using gcc to compile
> the kernel.
> 
> Signed-off-by: Arnd Bergmann 

Added to s390/linux:features for the next merge window. Thanks.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 01/12] s390: remove -fno-strength-reduce flag

2019-04-10 Thread Martin Schwidefsky
On Mon,  8 Apr 2019 23:26:14 +0200
Arnd Bergmann  wrote:

> This was added as a workaround for really old compilers, and it prevents
> building with clang now. I can see no reason for keeping it, as it has
> already been removed for most architectures in the pre-git era, so
> let's remove it everywhere, rather than only for clang.
> 
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/s390/Makefile | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/s390/Makefile b/arch/s390/Makefile
> index 86c76b149cc2..0087d11e3eaf 100644
> --- a/arch/s390/Makefile
> +++ b/arch/s390/Makefile
> @@ -114,7 +114,7 @@ endif
>  cfi := $(call as-instr,.cfi_startproc\n.cfi_val_offset 
> 15$(comma)-160\n.cfi_endproc,-DCONFIG_AS_CFI_VAL_OFFSET=1)
> 
>  KBUILD_CFLAGS+= -mbackchain -msoft-float $(cflags-y)
> -KBUILD_CFLAGS+= -pipe -fno-strength-reduce -Wno-sign-compare
> +KBUILD_CFLAGS+= -pipe -Wno-sign-compare
>  KBUILD_CFLAGS+= -fno-asynchronous-unwind-tables $(cfi)
>  KBUILD_AFLAGS+= $(aflags-y) $(cfi)
>  export KBUILD_AFLAGS_DECOMPRESSOR

Added to s390/linux:features for the next merge window. Thanks.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: rseq/s390: choosing code signature

2019-04-10 Thread Martin Schwidefsky
On Wed, 10 Apr 2019 11:50:39 -0400 (EDT)
Mathieu Desnoyers  wrote:

> - On Apr 10, 2019, at 6:32 AM, schwidefsky schwidef...@de.ibm.com wrote:
> 
> > On Tue, 9 Apr 2019 15:32:22 -0400 (EDT)
> > Mathieu Desnoyers  wrote:
> >   
> >> Hi,
> >> 
> >> We are about to include the code signature required prior to restartable
> >> sequences abort handlers into glibc, which will make this ABI choice final.
> >> We need architecture maintainer input on that signature value.
> >> 
> >> That code signature is placed before each abort handler, so the kernel can
> >> validate that it is indeed jumping to an abort handler (and not some
> >> arbitrary attacker-chosen code). The signature is never executed.
> >> 
> >> The current discussion thread on the glibc mailing list leads us towards
> >> using a trap with uncommon immediate operand, which simplifies integration
> >> with disassemblers, emulators, makes it easier to debug if the control
> >> flow gets redirected there by mistake, and is nicer for some architecture's
> >> speculative execution.
> >> 
> >> We can have different signatures for each sub-architecture, as long as they
> >> don't have to co-exist within the same process. We can special-case with
> >> #ifdef for each sub-architecture and endianness if need be. If the 
> >> architecture
> >> has instruction set extensions that can co-exist with the architecture
> >> instruction set within the same process, we need to take into account to 
> >> which
> >> instruction the chosen signature value would map (and possibly decide if we
> >> need to extend rseq to support many signatures).
> >> 
> >> Here is an example of rseq signature definition template:
> >> 
> >> /*
> >>  * TODO: document trap instruction objdump output on each sub-architecture
> >>  * instruction sets, as well as instruction set extensions.
> >>  */
> >> #define RSEQ_SIG 0x
> >> 
> >> Ideally we'd need a patch on top of the Linux kernel
> >> tools/testing/selftests/rseq/rseq-s390.h file that updates
> >> the signature value, so I can then pick it up for the glibc
> >> patchset.  
> > 
> > The trap4 instruction is a suitable one. The patch would look like this  
> 
> Great! I'm picking it up into my rseq tree if that's OK with you.

Just added the patch to s390/linux:features for the next merge window as well.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 12/12] [PROBABLY WRONG] s390: void '0' constraint in inline assembly

2019-04-10 Thread Martin Schwidefsky
On Mon,  8 Apr 2019 23:26:25 +0200
Arnd Bergmann  wrote:

> clang does not understand the contraint "0" in the CALL_ON_STACK()
> macro:
> 
> ../arch/s390/mm/maccess.c:117:10: error: invalid input constraint '0' in asm
> return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack,
>^
> ../arch/s390/include/asm/processor.h:292:20: note: expanded from macro 
> 'CALL_ON_STACK'
>   [_fn] "X" (fn) CALL_FMT_##nr : CALL_CLOBBER_##nr);\
>  ^
> :207:1: note: expanded from here
> CALL_FMT_3
> ^
> ../arch/s390/include/asm/processor.h:267:20: note: expanded from macro 
> 'CALL_FMT_3'
>  #define CALL_FMT_3 CALL_FMT_2, "d" (r4)
>^
> ../arch/s390/include/asm/processor.h:266:20: note: expanded from macro 
> 'CALL_FMT_2'
>  #define CALL_FMT_2 CALL_FMT_1, "d" (r3)
>^
> ../arch/s390/include/asm/processor.h:265:32: note: expanded from macro 
> 'CALL_FMT_1'
>  #define CALL_FMT_1 CALL_FMT_0, "0" (r2)
>^
> 
> I don't know what the correct fix here would be, changing it to "d" made
> it build, since clang does understand this one.
> 
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/s390/include/asm/processor.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/s390/include/asm/processor.h 
> b/arch/s390/include/asm/processor.h
> index 700c650ffd4f..84c59c99668a 100644
> --- a/arch/s390/include/asm/processor.h
> +++ b/arch/s390/include/asm/processor.h
> @@ -262,7 +262,7 @@ static __no_kasan_or_inline unsigned short stap(void)
>   register unsigned long r4 asm("6") = (unsigned long)(arg5)
> 
>  #define CALL_FMT_0
> -#define CALL_FMT_1 CALL_FMT_0, "0" (r2)
> +#define CALL_FMT_1 CALL_FMT_0, "d" (r2)
>  #define CALL_FMT_2 CALL_FMT_1, "d" (r3)
>  #define CALL_FMT_3 CALL_FMT_2, "d" (r4)
>  #define CALL_FMT_4 CALL_FMT_3, "d" (r5)

This is (slightly) wrong. %r2 is used as the input register for the first 
argument
and the result value for the call. With your patch you force the compiler to 
load
the first argument in two registers. One solution would be to CALL_FMT1 as

#define CALL_FMT1 CALL_FMT_0

It still is not optimal though as for CALL_FMT_0 the "+" (r2) indicates an
input but CALL_ARGS_0 does not initialize r2.

I am thinking about the following patch to cover all cases:
--
>From 91a4abbec91a9f26f84f7386f2c0f96de669b0eb Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky 
Date: Wed, 10 Apr 2019 15:48:43 +0200
Subject: [PATCH] s390: fine-tune stack switch helper

The CALL_ON_STACK helper currently does not work with clang and for
calls without arguments it does not initialize r2 although the contraint
is "+". Rework the CALL_FMT_x and the CALL_ON_STACK macros to work
with clang and produce optimal code in all cases.

Reported-by: Arnd Bergmann 
Signed-off-by: Martin Schwidefsky 
---
 arch/s390/include/asm/processor.h | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/s390/include/asm/processor.h 
b/arch/s390/include/asm/processor.h
index 81038ab357ce..0ee022247580 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -261,12 +261,12 @@ static __no_kasan_or_inline unsigned short stap(void)
CALL_ARGS_4(arg1, arg2, arg3, arg4);\
register unsigned long r4 asm("6") = (unsigned long)(arg5)
 
-#define CALL_FMT_0
-#define CALL_FMT_1 CALL_FMT_0, "0" (r2)
-#define CALL_FMT_2 CALL_FMT_1, "d" (r3)
-#define CALL_FMT_3 CALL_FMT_2, "d" (r4)
-#define CALL_FMT_4 CALL_FMT_3, "d" (r5)
-#define CALL_FMT_5 CALL_FMT_4, "d" (r6)
+#define CALL_FMT_0 "=" (r2) :
+#define CALL_FMT_1 "+" (r2) :
+#define CALL_FMT_2 CALL_FMT_1 "d" (r3),
+#define CALL_FMT_3 CALL_FMT_2 "d" (r4),
+#define CALL_FMT_4 CALL_FMT_3 "d" (r5),
+#define CALL_FMT_5 CALL_FMT_4 "d" (r6),
 
 #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
 #define CALL_CLOBBER_4 CALL_CLOBBER_5
@@ -286,10 +286,10 @@ static __no_kasan_or_inline unsigned short stap(void)
"   stg %[_prev],%[_bc](15)\n"  \
"   brasl   14,%[_fn]\n"\
"   la  15,0(%[_prev])\n"   \
-   : "+" (r2), [_prev] "=" (prev)  \
-   : [_stack] "a" (stack), \
+   : [_prev] "=" (prev), CALL_FMT_##nr   \
+ [_stack] "a" (stack), \
  [_bc] "i" (offsetof(struct stack_frame, back_chain)), \
- [_fn] "X" (fn) CALL_FMT_##nr : CALL_CLOBBER_##nr);\
+ [_fn] "X" (fn) : CALL_CLOBBER_##nr);  \
r2; \
 })
 
-- 
2.16.4
-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: rseq/s390: choosing code signature

2019-04-10 Thread Martin Schwidefsky
On Tue, 9 Apr 2019 15:32:22 -0400 (EDT)
Mathieu Desnoyers  wrote:

> Hi,
> 
> We are about to include the code signature required prior to restartable
> sequences abort handlers into glibc, which will make this ABI choice final.
> We need architecture maintainer input on that signature value.
> 
> That code signature is placed before each abort handler, so the kernel can
> validate that it is indeed jumping to an abort handler (and not some
> arbitrary attacker-chosen code). The signature is never executed.
> 
> The current discussion thread on the glibc mailing list leads us towards
> using a trap with uncommon immediate operand, which simplifies integration
> with disassemblers, emulators, makes it easier to debug if the control
> flow gets redirected there by mistake, and is nicer for some architecture's
> speculative execution.
> 
> We can have different signatures for each sub-architecture, as long as they
> don't have to co-exist within the same process. We can special-case with
> #ifdef for each sub-architecture and endianness if need be. If the 
> architecture
> has instruction set extensions that can co-exist with the architecture
> instruction set within the same process, we need to take into account to which
> instruction the chosen signature value would map (and possibly decide if we
> need to extend rseq to support many signatures).
> 
> Here is an example of rseq signature definition template:
> 
> /*
>  * TODO: document trap instruction objdump output on each sub-architecture
>  * instruction sets, as well as instruction set extensions.
>  */
> #define RSEQ_SIG 0x
> 
> Ideally we'd need a patch on top of the Linux kernel
> tools/testing/selftests/rseq/rseq-s390.h file that updates
> the signature value, so I can then pick it up for the glibc
> patchset.

The trap4 instruction is a suitable one. The patch would look like this
--
commit 2ee28f6d1de968a71f074ab150384b90b4121216
Author: Martin Schwidefsky 
Date:   Wed Apr 10 12:28:41 2019 +0200

s390/rseq: use trap4 for RSEQ_SIG

Use trap4 as the guard instruction for the restartable sequence abort
handler.

Signed-off-by: Martin Schwidefsky 

diff --git a/tools/testing/selftests/rseq/rseq-s390.h 
b/tools/testing/selftests/rseq/rseq-s390.h
index 1069e85258ce..d4c8e1147d86 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -1,6 +1,13 @@
 /* SPDX-License-Identifier: LGPL-2.1 OR MIT */
 
-#define RSEQ_SIG   0x53053053
+/*
+ * RSEQ_SIG uses the trap4 instruction. As Linux does not make use of the
+ * access-register mode nor the linkage stack this instruction will always
+ * cause a special-operation exception (the trap-enabled bit in the DUCT
+ * is and will stay 0). The instruction pattern is
+ * b2 ff 0f ff trap4   4095(%r0)
+ */
+#define RSEQ_SIG   0xB2FF0FFF
 
 #define rseq_smp_mb()  __asm__ __volatile__ ("bcr 15,0" ::: "memory")
 #define rseq_smp_rmb() rseq_smp_mb()
-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



[GIT PULL] s390 patches for 5.1 #2

2019-03-28 Thread Martin Schwidefsky
The following changes since commit 3717f613f48df0222311f974cf8a06c8a6c97bae:

  Merge branch 'core-rcu-for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (2019-03-05 14:49:11 
-0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux tags/s390-5.1-2

for you to fetch changes up to b6ffdf27f3d4f1e9af56effe6f86989170d71e95:

  s390/cpumf: Fix warning from check_processor_id (2019-03-28 09:28:42 +0100)


s390 update with improvements and bug fixes for 5.1-rc2

 - Fix early free of the channel program in vfio

 - On AP device removal make sure that all messages are flushed
   with the driver still attached that queued the message

 - Limit brk randomization to 32MB to reduce the chance that the
   heap of ld.so is placed after the main stack

 - Add a rolling average for the steal time of a CPU, this will be
   needed for KVM to decide when to do busy waiting

 - Fix a warning in the CPU-MF code

 - Add a notification handler for AP configuration change to react
   faster to new AP devices


Cornelia Huck (1):
  vfio: ccw: only free cp on final interrupt

Harald Freudenberger (1):
  s390/zcrypt: revisit ap device remove procedure

Martin Schwidefsky (3):
  s390: limit brk randomization to 32MB
  s390/vtime: steal time exponential moving average
  Merge tag 'vfio-ccw-20190311' of 
git://git.kernel.org/.../kvms390/vfio-ccw into fixes

Thomas Richter (1):
  s390/cpumf: Fix warning from check_processor_id

Tony Krowiak (1):
  zcrypt: handle AP Info notification from CHSC SEI command

 arch/s390/include/asm/ap.h   | 11 +++
 arch/s390/include/asm/elf.h  | 11 ---
 arch/s390/include/asm/lowcore.h  | 61 ++--
 arch/s390/kernel/perf_cpum_cf_diag.c | 19 +++
 arch/s390/kernel/smp.c   |  3 +-
 arch/s390/kernel/vtime.c | 19 ++-
 drivers/s390/cio/chsc.c  | 13 
 drivers/s390/cio/vfio_ccw_drv.c  |  8 +++--
 drivers/s390/crypto/ap_bus.c | 19 ++-
 drivers/s390/crypto/ap_bus.h |  2 ++
 drivers/s390/crypto/ap_queue.c   | 26 ---
 drivers/s390/crypto/zcrypt_api.c | 30 +++---
 12 files changed, 154 insertions(+), 68 deletions(-)



Re: [PATCH 1/4] glibc: Perform rseq(2) registration at C startup and thread creation (v7)

2019-03-28 Thread Martin Schwidefsky
On Wed, 27 Mar 2019 16:38:32 -0400
"Carlos O'Donell"  wrote:

> On 3/27/19 5:16 AM, Martin Schwidefsky wrote:
> > On Mon, 25 Mar 2019 11:54:32 -0400 (EDT)
> > Mathieu Desnoyers  wrote:
> >   
> >>>> +++ b/sysdeps/unix/sysv/linux/s390/bits/rseq.h  
> >> [...]  
> >>>> +
> >>>> +/* Signature required before each abort handler code.  */
> >>>> +#define RSEQ_SIG 0x53053053  
> >>>
> >>> Why not a s390 specific value here?  
> >>
> >> s390 also has the abort handler in a __rseq_failure section:
> >>
> >> #define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
> >>  ".pushsection __rseq_failure, \"ax\"\n\t"   \
> >>  ".long " __rseq_str(RSEQ_SIG) "\n\t"\
> >>  __rseq_str(label) ":\n\t"   \
> >>  teardown\
> >>  "j %l[" __rseq_str(abort_label) "]\n\t" \
> >>  ".popsection\n\t"
> >>
> >> Same question applies as powerpc: since disassemblers will try to decode
> >> that instruction, would it be better to define it as a valid one ?
> >>
> >> [...]  
> > 
> > A 4-byte sequence starting with 0x53 is decoded as a "diebr" instruction.
> > And please replace that "j %l[...]" with a "jg %l[...]", the branch target
> > range of the "j" instruction is 64K, not enough for the general case.  
> 
> Why was this particular operated selected?
>   
> So on s390 the RSEQ_SIG will show up as an unexpected "divide to integer"
> instruction that can't be reached by any control flow?
> 
> Can we use a NOP with a unique value in an immediate operand?
> 
> The goal being to have something that won't confuse during a debug
> session, or that the debugger can ignore (like constant pools on Arm)

I was looking at the wrong table in regard to opcode 0x53. The pattern
0x53.. is not a known instruction as far as the disassembler is
concerned. As Mathieu pointed out "diebr" is actually 0xb353
Sorry about the confusion.

But why do we need this value in the first place if it can not be reached?

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 1/4] glibc: Perform rseq(2) registration at C startup and thread creation (v7)

2019-03-27 Thread Martin Schwidefsky
On Mon, 25 Mar 2019 11:54:32 -0400 (EDT)
Mathieu Desnoyers  wrote:

> >> +++ b/sysdeps/unix/sysv/linux/s390/bits/rseq.h  
> [...]
> >> +
> >> +/* Signature required before each abort handler code.  */
> >> +#define RSEQ_SIG 0x53053053  
> > 
> > Why not a s390 specific value here?  
> 
> s390 also has the abort handler in a __rseq_failure section:
> 
> #define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
> ".pushsection __rseq_failure, \"ax\"\n\t"   \
> ".long " __rseq_str(RSEQ_SIG) "\n\t"\
> __rseq_str(label) ":\n\t"   \
> teardown\
> "j %l[" __rseq_str(abort_label) "]\n\t" \
> ".popsection\n\t"
> 
> Same question applies as powerpc: since disassemblers will try to decode
> that instruction, would it be better to define it as a valid one ?
> 
> [...]

A 4-byte sequence starting with 0x53 is decoded as a "diebr" instruction.
And please replace that "j %l[...]" with a "jg %l[...]", the branch target
range of the "j" instruction is 64K, not enough for the general case.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH] s390/mem_detect: Use IS_ENABLED(CONFIG_BLK_DEV_INITRD)

2019-03-07 Thread Martin Schwidefsky
On Thu, 07 Mar 2019 15:51:45 -0800
Joe Perches  wrote:

> IS_ENABLED should generally use CONFIG_ prefaced symbols and
> it doesn't appear as if there is a BLK_DEV_INITRD define.
> 
> Signed-off-by: Joe Perches 
> ---
>  arch/s390/boot/mem_detect.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
> index 4cb771ba13fa..5d316fe40480 100644
> --- a/arch/s390/boot/mem_detect.c
> +++ b/arch/s390/boot/mem_detect.c
> @@ -25,7 +25,7 @@ static void *mem_detect_alloc_extended(void)
>  {
>   unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64));
> 
> - if (IS_ENABLED(BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
> + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
>   INITRD_START < offset + ENTRIES_EXTENDED_MAX)
>   offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64));
> 
> 

With more than the 255 embedded entries and a initrd that sits in the
wrong location that would have resulted in a corrupted ramdisk.
Nice catch, thanks. I'll add the patch to my queue.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



[GIT PULL] s390 patches for the 5.1 merge window

2019-03-04 Thread Martin Schwidefsky
The following changes since commit 1c7fc5cbc33980acd13d668f1c8f0313d6ae9fd8:

  Linux 5.0-rc2 (2019-01-14 10:41:12 +1200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux tags/s390-5.1-1

for you to fetch changes up to c533b46e552711575e6120a2ada2b5a7e2354573:

  Revert "s390/cpum_cf: Add kernel message exaplanations" (2019-03-04 08:25:00 
+0100)


s390 updates for the 5.1 merge window

 - A copy of Arnds compat wrapper generation series

 - Pass information about the KVM guest to the host in form the control
   program code and the control program version code

 - Map IOV resources to support PCI physical functions on s390

 - Add vector load and store alignment hints to improve performance

 - Use the "jdd" constraint with gcc 9 to make jump labels working again

 - Remove amode workaround for old z/VM releases from the DCSS code

 - Add support for in-kernel performance measurements using the
   CPU measurement counter facility

 - Introduce a new PMU device cpum_cf_diag to capture counters and
   store thenn as event raw data.

 - Bug fixes and cleanups


Arnd Bergmann (5):
  s390: open-code s390_personality syscall
  ipc: introduce ksys_ipc()/compat_ksys_ipc() for s390
  s390: use generic UID16 implementation
  s390: autogenerate compat syscall wrappers
  s390: remove compat_wrapper.c

Christoph Hellwig (1):
  s390: remove the ptep_modify_prot_{start,commit} exports

Collin Walling (1):
  s390/setup: set control program code via diag 318

Eric Farman (2):
  s390/cio: Fix vfio-ccw handling of recursive TICs
  s390/cio: Use cpa range elsewhere within vfio-ccw

Farhan Ali (1):
  vfio-ccw: Don't assume there are more ccws after a TIC

Gerald Schaefer (4):
  s390/setup: remove obsolete #ifdef
  s390: remove dead code
  s390/extmem: remove code for 31 bit addressing mode
  s390/extmem: print DCSS range with %px

Greg Kroah-Hartman (3):
  s390/hypfs: no need to check return value of debugfs_create functions
  s390: pci: no need to check return value of debugfs_create functions
  s390: no need to check return value of debugfs_create functions

Gustavo A. R. Silva (1):
  s390/hypfs: Use struct_size() in kzalloc()

Harald Freudenberger (1):
  s390/zcrypt: use new state UNBOUND during queue driver rebind

Heiko Carstens (1):
  s390: fix system call tracing

Hendrik Brueckner (11):
  s390/cpum_cf: move counter set controls to a new header file
  s390/cpum_cf: prepare for in-kernel counter measurements
  s390/cpum_cf: rename per-CPU counter facility structure and variables
  s390/cpu_mf: move struct cpu_cf_events and per-CPU variable to header file
  s390/cpum_cf: introduce kernel_cpumcf_alert() to obtain measurement alerts
  s390/cpum_cf: Add minimal in-kernel interface for counter measurements
  s390/cpu_mf: add store cpu counter multiple instruction support
  s390/cpu_mf: replace stcctm5() with the stcctm() function
  s390/cpum_cf: introduce kernel_cpumcf_avail() function
  s390/cpum_cf: move common functions into a separate file
  s390/cpum_cf: add ctr_stcctm() function

Ilya Leoshkevich (1):
  s390/jump_label: Use "jdd" constraint on gcc9

Ingo Franzki (1):
  pkey: Indicate old mkvp only if old and current mkvp are different

Julian Wiedmann (1):
  s390/qdio: make SBAL address array type-safe

Martin Schwidefsky (10):
  Merge branch 'compat' of git://git.kernel.org/.../s390/linux into features
  s390/mmap: take stack_guard_gap into account for mmap_base
  s390: add alignment hints to vector load and store
  Merge tag 'vfio-ccw-20190204' of 
git://git.kernel.org/.../kvms390/vfio-ccw into features
  s390/setup: fix early warning messages
  s390/setup: fix boot crash for machine without EDAT-1
  Merge tag 'vfio-ccw-20190227' of 
git://git.kernel.org/.../kvms390/vfio-ccw into features
  s390/suspend: fix prefix register reset in swsusp_arch_resume
  s390/dasd: fix read device characteristic with CONFIG_VMAP_STACK=y
  Revert "s390/cpum_cf: Add kernel message exaplanations"

Pierre Morel (1):
  s390: vfio_ap: link the vfio_ap devices to the vfio_ap bus subsystem

Sebastian Ott (3):
  s390/pci: improve bar check
  s390/pci: map IOV resources
  s390/ism: ignore some errors during deregistration

Thomas Richter (3):
  s390/cpum_cf_diag: Add support for s390 counter facility diagnostic trace
  s390/cpum_cf: Add kernel message exaplanations
  s390/cpum_cf: Handle EBUSY return code from CPU counter facility 
reservation

Vasily Gorbik (5):
  s390/kasan: improve string/memory functions checks
  s390/als: remove duplicated in-place implementation of stfle
  s390: clean up redundant facilities

Re: [PATCH v3 18/34] s390: mm: Add p?d_large() definitions

2019-02-27 Thread Martin Schwidefsky
On Wed, 27 Feb 2019 17:05:52 +
Steven Price  wrote:

> walk_page_range() is going to be allowed to walk page tables other than
> those of user space. For this it needs to know when it has reached a
> 'leaf' entry in the page tables. This information is provided by the
> p?d_large() functions/macros.
> 
> For s390, we don't support large pages, so add a stub returning 0.

Well s390 does support 1MB and 2GB large pages, pmd_large() and pud_large()
are non-empty. We do not support 4TB or 8PB large pages though, which
makes the patch itself correct. Just the wording is slightly off.
 
> CC: Martin Schwidefsky 
> CC: Heiko Carstens 
> CC: linux-s...@vger.kernel.org
> Signed-off-by: Steven Price 
> ---
>  arch/s390/include/asm/pgtable.h | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
> index 063732414dfb..9617f1fb69b4 100644
> --- a/arch/s390/include/asm/pgtable.h
> +++ b/arch/s390/include/asm/pgtable.h
> @@ -605,6 +605,11 @@ static inline int pgd_present(pgd_t pgd)
>   return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
>  }
> 
> +static inline int pgd_large(pgd_t pgd)
> +{
> + return 0;
> +}
> +
>  static inline int pgd_none(pgd_t pgd)
>  {
>   if (pgd_folded(pgd))
> @@ -645,6 +650,11 @@ static inline int p4d_present(p4d_t p4d)
>   return (p4d_val(p4d) & _REGION_ENTRY_ORIGIN) != 0UL;
>  }
> 
> +static inline int p4d_large(p4d_t p4d)
> +{
> + return 0;
> +}
> +
>  static inline int p4d_none(p4d_t p4d)
>  {
>   if (p4d_folded(p4d))


-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH] s390/setup: fix early warning messages

2019-02-18 Thread Martin Schwidefsky
On Mon, 18 Feb 2019 18:01:46 +0100
Martin Schwidefsky  wrote:

> On Mon, 18 Feb 2019 07:46:40 -0800
> Guenter Roeck  wrote:
> 
> > Hi,
> > 
> > On Thu, Feb 14, 2019 at 03:40:56PM +0100, Martin Schwidefsky wrote:  
> > > The setup_lowcore() function creates a new prefix page for the boot CPU.
> > > The PSW mask for the system_call, external interrupt, i/o interrupt and
> > > the program check handler have the DAT bit set in this new prefix page.
> > > 
> > > At the time setup_lowcore is called the system still runs without virtual
> > > address translation, the paging_init() function creates the kernel page
> > > table and loads the CR13 with the kernel ASCE.
> > > 
> > > Any code between setup_lowcore() and the end of paging_init() that has
> > > a BUG or WARN statement will create a program check that can not be
> > > handled correctly as there is no kernel page table yet.
> > > 
> > > To allow early WARN statements initially setup the lowcore with DAT off
> > > and set the DAT bit only after paging_init() has completed.
> > > 
> > > Cc: sta...@vger.kernel.org
> > > Signed-off-by: Martin Schwidefsky 
> > 
> > This patch causes s390 qemu emulations to crash with a kernel stack 
> > overflow.
> > Reverting the patch fixes the problem. Crash log and bisect results below.  
> 
> Urgs, yes. That is EDAT-1 again that makes it work with 1MB pages but breaks
> with 4K mapping where the prefix page is mapped to absolute zero.
> 
> Just using S390_lowcore instead of lowcore_ptr[0] does not work either
> because low-address protection is already active. I'll think of something.
> 
> Thanks for bug report!
 
This patch should fix the problem:
--
>From d4393e82c3ec9b2fe5dba4b0d1b6eef29f8d15c8 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky 
Date: Mon, 18 Feb 2019 18:10:08 +0100
Subject: [PATCH] s390/setup: fix boot crash for machine without EDAT-1

The fix to make WARN work in the early boot code created a problem
on older machines without EDAT-1. The setup_lowcore_dat_on function
uses the pointer from lowcore_ptr[0] to set the DAT bit in the new
PSWs. That does not work if the kernel page table is set up with
4K pages as the prefix address maps to absolute zero.

To make this work the PSWs need to be changed with via address 0 in
form of the S390_lowcore definition.

Cc: sta...@vger.kernel.org
Fixes: 94f85ed3e2 ("s390/setup: fix early warning messages")
Signed-off-by: Martin Schwidefsky 
---
 arch/s390/kernel/setup.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 65b22ef5141a..12934e8fbb91 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -451,13 +451,12 @@ static void __init setup_lowcore_dat_off(void)
 
 static void __init setup_lowcore_dat_on(void)
 {
-   struct lowcore *lc;
-
-   lc = lowcore_ptr[0];
-   lc->external_new_psw.mask |= PSW_MASK_DAT;
-   lc->svc_new_psw.mask |= PSW_MASK_DAT;
-   lc->program_new_psw.mask |= PSW_MASK_DAT;
-   lc->io_new_psw.mask |= PSW_MASK_DAT;
+   __ctl_clear_bit(0, 28);
+   S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
+   S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
+   S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
+   S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
+   __ctl_set_bit(0, 28);
 }
 
 static struct resource code_resource = {
-- 
2.16.4


-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH] s390/setup: fix early warning messages

2019-02-18 Thread Martin Schwidefsky
On Mon, 18 Feb 2019 07:46:40 -0800
Guenter Roeck  wrote:

> Hi,
> 
> On Thu, Feb 14, 2019 at 03:40:56PM +0100, Martin Schwidefsky wrote:
> > The setup_lowcore() function creates a new prefix page for the boot CPU.
> > The PSW mask for the system_call, external interrupt, i/o interrupt and
> > the program check handler have the DAT bit set in this new prefix page.
> > 
> > At the time setup_lowcore is called the system still runs without virtual
> > address translation, the paging_init() function creates the kernel page
> > table and loads the CR13 with the kernel ASCE.
> > 
> > Any code between setup_lowcore() and the end of paging_init() that has
> > a BUG or WARN statement will create a program check that can not be
> > handled correctly as there is no kernel page table yet.
> > 
> > To allow early WARN statements initially setup the lowcore with DAT off
> > and set the DAT bit only after paging_init() has completed.
> > 
> > Cc: sta...@vger.kernel.org
> > Signed-off-by: Martin Schwidefsky   
> 
> This patch causes s390 qemu emulations to crash with a kernel stack overflow.
> Reverting the patch fixes the problem. Crash log and bisect results below.

Urgs, yes. That is EDAT-1 again that makes it work with 1MB pages but breaks
with 4K mapping where the prefix page is mapped to absolute zero.

Just using S390_lowcore instead of lowcore_ptr[0] does not work either
because low-address protection is already active. I'll think of something.

Thanks for bug report!

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



[GIT PULL] s390 patches for 5.0 #3

2019-02-10 Thread Martin Schwidefsky
Hi Linus,

please pull s390 fixes for 5.0-rc7

The following changes since commit f17b5f06cb92ef2250513a1e154c47b78df07d40:

  Linux 5.0-rc4 (2019-01-27 15:18:05 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git tags/s390-5.0-3

for you to fetch changes up to 614db26954ff08fa8e92b04100e31ebc04b817cf:

  Revert "s390/pci: remove bit_lock usage in interrupt handler" (2019-02-07 
11:56:29 +0100)


s390 update with bug fixes for 5.0-rc6

 - Fix specification exception on z196 during ap probe

 - A fix for suspend-to-disk, the VMAP stack patch broke the
   swsusp_arch_suspend function

 - The EMC CKD ioctl of the dasd driver needs an additional size
   check for user space data

 - Revert an incorrect patch for the PCI base code that removed
   a bit lock that turned out to be required after all


Harald Freudenberger (1):
  s390/zcrypt: fix specification exception on z196 during ap probe

Martin Schwidefsky (1):
  s390/suspend: fix stack setup in swsusp_arch_suspend

Sebastian Ott (1):
  Revert "s390/pci: remove bit_lock usage in interrupt handler"

Stefan Haberland (1):
  s390/dasd: fix using offset into zero size array error

 arch/s390/kernel/swsusp.S  | 4 ++--
 arch/s390/pci/pci.c| 4 +++-
 drivers/s390/block/dasd_eckd.c | 8 
 drivers/s390/crypto/ap_bus.c   | 3 ++-
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index 537f97f..b6796e6 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -30,10 +30,10 @@
.section .text
 ENTRY(swsusp_arch_suspend)
lg  %r1,__LC_NODAT_STACK
-   aghi%r1,-STACK_FRAME_OVERHEAD
stmg%r6,%r15,__SF_GPRS(%r1)
+   aghi%r1,-STACK_FRAME_OVERHEAD
stg %r15,__SF_BACKCHAIN(%r1)
-   lgr %r1,%r15
+   lgr %r15,%r1
 
/* Store FPU registers */
brasl   %r14,save_fpu_regs
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index a966d7b..4266a4d 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -382,7 +382,9 @@ static void zpci_irq_handler(struct airq_struct *airq)
if (ai == -1UL)
break;
inc_irq_stat(IRQIO_MSI);
+   airq_iv_lock(aibv, ai);
generic_handle_irq(airq_iv_get_data(aibv, ai));
+   airq_iv_unlock(aibv, ai);
}
}
 }
@@ -408,7 +410,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int 
type)
zdev->aisb = aisb;
 
/* Create adapter interrupt vector */
-   zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA);
+   zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
if (!zdev->aibv)
return -ENOMEM;
 
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 4e7b55a..6e294b4 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -4469,6 +4469,14 @@ static int dasd_symm_io(struct dasd_device *device, void 
__user *argp)
usrparm.psf_data &= 0x7fffULL;
usrparm.rssd_result &= 0x7fffULL;
}
+   /* at least 2 bytes are accessed and should be allocated */
+   if (usrparm.psf_data_len < 2) {
+   DBF_DEV_EVENT(DBF_WARNING, device,
+ "Symmetrix ioctl invalid data length %d",
+ usrparm.psf_data_len);
+   rc = -EINVAL;
+   goto out;
+   }
/* alloc I/O data area */
psf_data = kzalloc(usrparm.psf_data_len, GFP_KERNEL | GFP_DMA);
rssd_result = kzalloc(usrparm.rssd_result_len, GFP_KERNEL | GFP_DMA);
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 48ea000..5a69974 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -248,7 +248,8 @@ static inline int ap_test_config(unsigned int *field, 
unsigned int nr)
 static inline int ap_test_config_card_id(unsigned int id)
 {
if (!ap_configuration)  /* QCI not supported */
-   return 1;
+   /* only ids 0...3F may be probed */
+   return id < 0x40 ? 1 : 0;
return ap_test_config(ap_configuration->apm, id);
 }
 



Re: [PATCH] zcrypt: handle AP Info notification from CHSC SEI command

2019-02-01 Thread Martin Schwidefsky
On Fri, 1 Feb 2019 10:01:59 +0100
Heiko Carstens  wrote:

> On Thu, Jan 31, 2019 at 06:28:39PM -0500, Tony Krowiak wrote:
> > On 1/30/19 1:32 PM, Sebastian Ott wrote:  
> > >On Wed, 30 Jan 2019, Tony Krowiak wrote:  
> > >>+#if IS_ENABLED(CONFIG_ZCRYPT)
> > >>+void ap_bus_cfg_chg(void);
> > >>+#else
> > >>+#error "no CONFIG_ZCRYPT"  
> > >^
> > >I don't think that's the right thing to do here.  
> > 
> > I'd like to leave it. If somebody edits .config
> > and sets CONFIG_ZCRYPT=n, then the build will
> > fail. The preprocessor error above tells them
> > why.  
> 
> No, the kernel build should never fail if a config option is not set.
> Also the above should be "#ifdef CONFIG_ZCRYPT".
> 
> In addition (this isn't quoted unfortunately) the alternative function
> in the header file is missing the "inline" attribute. Can you please
> add that too?
> 
> static inline void ap_bus_cfg_chg(void) { }
> 
> > >>+* A config change has happened, Force an ap bus rescan.
> > >>+*/
> > >>+void ap_bus_cfg_chg(void)
> > >>+{
> > >>+ AP_DBF(DBF_INFO, "%s config change, forcing bus rescan\n", __func__);
> > >>+
> > >>+ ap_bus_force_rescan();
> > >>+}
> > >>+EXPORT_SYMBOL(ap_bus_cfg_chg);  
> > >
> > >There is no need for the export symbol - you don't call that function
> > >from module code.
> > >As an unrelated question, just to be sure: ap_bus.c is compiled as
> > >built-in even with ZCRYPT=m, right?  
> > 
> > No. If you edit .config and set CONFIG_ZCRYPT=m, ap_bus.c will be built
> > into the zcrypt.ko module. Through some other magic, the zcrypt module
> > is loaded when linux boots.  
> 
> If that happens, then we have a build problem that needs to be
> fixed. What exactly are you doing to get the ap code linked into the
> zcrypt module?

Current upstream code:

ap-objs := ap_bus.o ap_card.o ap_queue.o
obj-$(subst m,y,$(CONFIG_ZCRYPT)) += ap.o

The ap_bus.o file is either not build at all or it is linked into the
main kernel image. If ap_bus.o is build then it is guaranteed that
CONFIG_ZCRYPT is either "m" or "y".

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



[GIT PULL] s390 patches for 5.0 #2

2019-01-23 Thread Martin Schwidefsky
Hi Linus,

please pull s390 fixes [and features] for 4.xx

The following changes since commit 1bdbe227492075d058e37cb3d400e6468d0095b5:

  Merge tag 'vfio-v5.0-rc2' of git://github.com/awilliam/linux-vfio (2019-01-10 
09:20:46 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git tags/s390-5.0-2

for you to fetch changes up to 60f1bf29c0b2519989927cae640cd1f50f59dc7f:

  s390/smp: Fix calling smp_call_ipl_cpu() from ipl CPU (2019-01-11 17:12:03 
+0100)


s390 update with bug fixes for 5.0-rc4

 - Do not claim to run under z/VM if the hypervisor can not be identified

 - Fix crashes due to outdated ASCEs in CR1

 - Avoid a deadlock in regard to CPU hotplug

 - Really fix the vdso mapping issue for compat tasks

 - Avoid crash on restart due to an incorrect stack address


Christian Borntraeger (1):
  s390/early: improve machine detection

David Hildenbrand (1):
  s390/smp: Fix calling smp_call_ipl_cpu() from ipl CPU

Gerald Schaefer (1):
  s390/smp: fix CPU hotplug deadlock with CPU rescan

Martin Schwidefsky (1):
  s390/mm: always force a load of the primary ASCE on context switch

Vasily Gorbik (1):
  s390/vdso: correct vdso mapping for compat tasks

 arch/s390/include/asm/mmu_context.h |  7 +++
 arch/s390/kernel/early.c|  4 ++--
 arch/s390/kernel/setup.c|  2 ++
 arch/s390/kernel/smp.c  | 11 ++-
 arch/s390/kernel/vdso.c |  5 ++---
 drivers/s390/char/sclp_config.c |  2 ++
 6 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/arch/s390/include/asm/mmu_context.h 
b/arch/s390/include/asm/mmu_context.h
index ccbb53e..8d04e6f 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -25,7 +25,7 @@ static inline int init_new_context(struct task_struct *tsk,
atomic_set(>context.flush_count, 0);
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
-   mm->context.compat_mm = 0;
+   mm->context.compat_mm = test_thread_flag(TIF_31BIT);
 #ifdef CONFIG_PGSTE
mm->context.alloc_pgste = page_table_allocate_pgste ||
test_thread_flag(TIF_PGSTE) ||
@@ -90,8 +90,6 @@ static inline void switch_mm(struct mm_struct *prev, struct 
mm_struct *next,
 {
int cpu = smp_processor_id();
 
-   if (prev == next)
-   return;
S390_lowcore.user_asce = next->context.asce;
cpumask_set_cpu(cpu, >context.cpu_attach_mask);
/* Clear previous user-ASCE from CR1 and CR7 */
@@ -103,7 +101,8 @@ static inline void switch_mm(struct mm_struct *prev, struct 
mm_struct *next,
__ctl_load(S390_lowcore.vdso_asce, 7, 7);
clear_cpu_flag(CIF_ASCE_SECONDARY);
}
-   cpumask_clear_cpu(cpu, >context.cpu_attach_mask);
+   if (prev != next)
+   cpumask_clear_cpu(cpu, >context.cpu_attach_mask);
 }
 
 #define finish_arch_post_lock_switch finish_arch_post_lock_switch
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index af5c2b3..a8c7789 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -63,10 +63,10 @@ static noinline __init void detect_machine_type(void)
if (stsi(vmms, 3, 2, 2) || !vmms->count)
return;
 
-   /* Running under KVM? If not we assume z/VM */
+   /* Detect known hypervisors */
if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
-   else
+   else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
 }
 
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 72dd23e..7ed90a7 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -1006,6 +1006,8 @@ void __init setup_arch(char **cmdline_p)
pr_info("Linux is running under KVM in 64-bit mode\n");
else if (MACHINE_IS_LPAR)
pr_info("Linux is running natively in 64-bit mode\n");
+   else
+   pr_info("Linux is running as a guest in 64-bit mode\n");
 
/* Have one command line that is parsed and saved in /proc/cmdline */
/* boot_command_line has been already set up in early.c */
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index f82b3d3..b198ece 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -381,8 +381,13 @@ void smp_call_online_cpu(void (*func)(void *), void *data)
  */
 void smp_call_ipl_cpu(void (*func)(void *), void *data)
 {
+   struct lowcore *lc = pcpu_devices->lowcore;
+
+   if (pcpu_devices[0].address == stap())
+   lc = _lowcore;
+
pcpu_delegate(_devices[0], fun

Re: [PATCH 0/5] s390: rework compat wrapper generation

2019-01-18 Thread Martin Schwidefsky
On Thu, 17 Jan 2019 21:19:20 +0100
Heiko Carstens  wrote:

> On Thu, Jan 17, 2019 at 05:21:50PM +0100, Arnd Bergmann wrote:
> > On Thu, Jan 17, 2019 at 2:36 PM Heiko Carstens
> >  wrote:  
> > >
> > > On Wed, Jan 16, 2019 at 02:15:18PM +0100, Arnd Bergmann wrote:  
> >   
> > > > I did not test the changes at runtime, but I looked at the
> > > > generated object code, which seems fine here and includes
> > > > the same conversions as before.  
> > >
> > > All looks good and seems to work fine. This is a very nice
> > > simplification of our compat code, even if it adds some dead code to
> > > the kernel image.
> > >
> > > I did some tests and it all looks good. Also the generated code looks
> > > fine. So, if nothing breaks, this will go upstream with next merge
> > > window via the s390 tree.
> > >
> > > Thanks again for your work!  
> > 
> > Awesome, thanks for testing it so quickly and agreeing to merge it!
> > 
> > There is a dependency that I now have for my y2038 syscall series
> > of course, so I'd need to have those patches on top of the s390 series.
> > 
> > I think we can either have a shared git branch that gets merged both
> > into your s390 tree and my y2038 tree, or we merge it only through
> > my tree, with your Ack.
> > 
> > Does that work for you?  
> 
> I'll discuss this with Martin tomorrow, however I think a shared git
> branch would be the best solution. I want to get as much testing as
> possible for this patch set, which means this must also be in the s390
> tree.

Yes, a shared git branch seems like the best solutoin, this is now
available as a topic branch on the s390/linux:

git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git compat

The branch has been merged to the features branch for v5.1.

Thanks for the work on the compat code, good stuff!

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH v1] s390/smp: Fix calling smp_call_ipl_cpu() from ipl CPU

2019-01-11 Thread Martin Schwidefsky
On Fri, 11 Jan 2019 15:18:22 +0100
David Hildenbrand  wrote:

> When calling smp_call_ipl_cpu() from the IPL CPU, we will try to read
> from pcpu_devices->lowcore. However, due to prefixing, that will result
> in reading from absolute address 0 on that CPU. We have to go via the
> actual lowcore instead.
> 
> This means that right now, we will read lc->nodat_stack == 0 and
> therfore work on a very wrong stack.
> 
> This BUG essentially broke rebooting under QEMU TCG (which will report
> a low address protection exception). And checking under KVM, it is
> also broken under KVM. With 1 VCPU it can be easily triggered.
> 
> :/# echo 1 > /proc/sys/kernel/sysrq
> :/# echo b > /proc/sysrq-trigger
> [   28.476745] sysrq: SysRq : Resetting
> [   28.476793] Kernel stack overflow.
> [   28.476817] CPU: 0 PID: 424 Comm: sh Not tainted 5.0.0-rc1+ #13
> [   28.476820] Hardware name: IBM 2964 NE1 716 (KVM/Linux)
> [   28.476826] Krnl PSW : 0400c0018000 00115c0c 
> (pcpu_delegate+0x12c/0x140)
> [   28.476861]R:0 T:1 IO:0 EX:0 Key:0 M:0 W:0 P:0 AS:3 CC:0 PM:0 
> RI:0 EA:3
> [   28.476863] Krnl GPRS:   0010dff8 
> 
> [   28.476864]  00ab7090 
> 03e0006efbf0
> [   28.476864]0010dff8   
> 
> [   28.476865]7fffc000 00730408 03e0006efc58 
> 
> [   28.476887] Krnl Code: 00115bfe: 4170f000la  
> %r7,0(%r15)
> [   28.476887]00115c02: 41f0a000la  
> %r15,0(%r10)
> [   28.476887]   #00115c06: e370f0980024stg 
> %r7,152(%r15)
> [   28.476887]   >00115c0c: c0e5f86ebrasl   
> %r14,114ce8
> [   28.476887]00115c12: 41f07000la  
> %r15,0(%r7)
> [   28.476887]00115c16: a7f4ffa8brc 
> 15,115b66
> [   28.476887]00115c1a: 0707bcr 0,%r7
> [   28.476887]00115c1c: 0707bcr 0,%r7
> [   28.476901] Call Trace:
> [   28.476902] Last Breaking-Event-Address:
> [   28.476920]  [<00a01c4a>] arch_call_rest_init+0x22/0x80
> [   28.476927] Kernel panic - not syncing: Corrupt kernel stack, can't 
> continue.
> [   28.476930] CPU: 0 PID: 424 Comm: sh Not tainted 5.0.0-rc1+ #13
> [   28.476932] Hardware name: IBM 2964 NE1 716 (KVM/Linux)
> [   28.476932] Call Trace:
> 
> Reported-by: Cornelia Huck 
> Signed-off-by: David Hildenbrand 
> ---
>  arch/s390/kernel/smp.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
> index f82b3d3c36e2..be32dd0b4191 100644
> --- a/arch/s390/kernel/smp.c
> +++ b/arch/s390/kernel/smp.c
> @@ -381,8 +381,13 @@ void smp_call_online_cpu(void (*func)(void *), void 
> *data)
>   */
>  void smp_call_ipl_cpu(void (*func)(void *), void *data)
>  {
> + struct lowcore *lc = pcpu_devices->lowcore;
> +
> + if (pcpu_devices[0].address == stap())
> + lc = _lowcore;
> +
>   pcpu_delegate(_devices[0], func, data,
> -   pcpu_devices->lowcore->nodat_stack);
> +   lc->nodat_stack);
>  }
> 
>  int smp_find_processor_id(u16 address)

Uhh, subtle. With EDAT-1 this sort of works because the lowcore of CPU #0
is allocated in an area of the kernel address space that is backed with 1M
pages. It is nevertheless broken.

I have added
Fixes: 2f859d0dad81 ("s390/smp: reduce size of struct pcpu")
Cc: sta...@vger.kernel.org # 4.0+   

and will queue the patch for the next pull. Thanks!

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: Kconfig label updates

2019-01-09 Thread Martin Schwidefsky
On Tue, 8 Jan 2019 16:30:24 -0600
Bjorn Helgaas  wrote:

> Hi,
> 
> I want to update the PCI Kconfig labels so they're more consistent and
> useful to users, something like the patch below.  IIUC, the items
> below are all IBM-related; please correct me if not.
> 
> I'd also like to expand (or remove) "RPA" because Google doesn't find
> anything about "IBM RPA", except Robotic Process Automation, which I
> think must be something else.
> 
> Is there some text expansion of RPA that we could use that would be
> meaningful to a user, i.e., something he/she might find on a nameplate
> or in a user manual?
> 
> Ideally the PCI Kconfig labels would match the terms used in
> arch/.../Kconfig, e.g.,
> 
>   config PPC_POWERNV
> bool "IBM PowerNV (Non-Virtualized) platform support"
> 
>   config PPC_PSERIES
> bool "IBM pSeries & new (POWER5-based) iSeries"
> 
>   config MARCH_Z900
> bool "IBM zSeries model z800 and z900"
> 
>   config MARCH_Z9_109
> bool "IBM System z9"
> 
> Bjorn
> 
> 
> diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
> index e9f78eb390d2..1c1d145bfd84 100644
> --- a/drivers/pci/hotplug/Kconfig
> +++ b/drivers/pci/hotplug/Kconfig
> @@ -112,7 +112,7 @@ config HOTPLUG_PCI_SHPC
> When in doubt, say N.
> 
>  config HOTPLUG_PCI_POWERNV
> - tristate "PowerPC PowerNV PCI Hotplug driver"
> + tristate "IBM PowerNV PCI Hotplug driver"
>   depends on PPC_POWERNV && EEH
>   select OF_DYNAMIC
>   help
> @@ -125,10 +125,11 @@ config HOTPLUG_PCI_POWERNV
> When in doubt, say N.
> 
>  config HOTPLUG_PCI_RPA
> - tristate "RPA PCI Hotplug driver"
> + tristate "IBM Power Systems RPA PCI Hotplug driver"
>   depends on PPC_PSERIES && EEH
>   help
> Say Y here if you have a RPA system that supports PCI Hotplug.
> +   This includes the earlier pSeries and iSeries.
> 
> To compile this driver as a module, choose M here: the
> module will be called rpaphp.
> @@ -136,7 +137,7 @@ config HOTPLUG_PCI_RPA
> When in doubt, say N.
> 
>  config HOTPLUG_PCI_RPA_DLPAR
> - tristate "RPA Dynamic Logical Partitioning for I/O slots"
> + tristate "IBM RPA Dynamic Logical Partitioning for I/O slots"
>   depends on HOTPLUG_PCI_RPA
>   help
> Say Y here if your system supports Dynamic Logical Partitioning
> @@ -157,7 +158,7 @@ config HOTPLUG_PCI_SGI
> When in doubt, say N.
> 
>  config HOTPLUG_PCI_S390
> - bool "System z PCI Hotplug Support"
> + bool "IBM System z PCI Hotplug Support"
>   depends on S390 && 64BIT
>   help
> Say Y here if you want to use the System z PCI Hotplug
> 

The rewording of the HOTPLUG_PCI_S390 entry is fine with me.
Acked-by: Martin Schwidefsky 

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



[GIT PULL] s390 patches for the 4.21 merge window

2019-01-01 Thread Martin Schwidefsky
Hi Linus,

please pull s390 fixes and features for 4.21

The following changes since commit 94f371cb73944b410a269d570d6946c042f2ddd0:

  Merge tag 'acpi-4.20-rc5' of 
git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm (2018-11-29 
15:54:12 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git tags/s390-4.21-1

for you to fetch changes up to ec10574d00da0d8b6ec9d0099410aae8aad4695a:

  Merge tag 'vfio-ccw-20181213' of 
git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw into features 
(2018-12-13 10:44:18 +0100)


s390 updates for the 4.21 merge window

 - A larger update for the zcrypt / AP bus code
   + Update two inline assemblies in the zcrypt driver to make gcc happy
   + Add a missing reply code for invalid special commands for zcrypt
   + Allow AP device reset to be triggered from user space
   + Split the AP scan function into smaller, more readable functions

 - Updates for vfio-ccw and vfio-ap
   + Add maintainers and reviewer for vfio-ccw
   + Include facility.h in vfio_ap_drv.c to avoid fragile include chain
   + Simplicy vfio-ccw state machine

 - Use the common code version of bust_spinlocks

 - Make use of the DEFINE_SHOW_ATTRIBUTE

 - Fix three incorrect file permissions in the DASD driver

 - Remove bit spin-lock from the PCI interrupt handler

 - Fix GFP_ATOMIC vs GFP_KERNEL in the PCI code


Christian Borntraeger (1):
  MAINTAINERS/vfio-ccw: add Farhan and Eric, make Halil Reviewer

Harald Freudenberger (4):
  s390/ap: rework assembler functions to use unions for in/out register 
variables
  s390/zcrypt: improve special ap message cmd handling
  s390/zcrypt: make sysfs reset attribute trigger queue reset
  s390/zcrypt: rework ap scan bus code

Martin Schwidefsky (1):
  Merge tag 'vfio-ccw-20181213' of 
git://git.kernel.org/.../kvms390/vfio-ccw into features

Petr Tesarik (1):
  s390: vfio-ap: include  for test_facility()

Pierre Morel (1):
  vfio: ccw: Merge BUSY and BOXED states

Sebastian Ott (3):
  s390/drivers: fix proc/debugfs file permissions
  s390/pci: remove bit_lock usage in interrupt handler
  s390/pci: fix sleeping in atomic during hotplug

Sergey Senozhatsky (1):
  s390: use common bust_spinlocks()

Yangtao Li (1):
  s390: convert to DEFINE_SHOW_ATTRIBUTE

 MAINTAINERS |   4 +-
 arch/s390/include/asm/ap.h  |  28 ++--
 arch/s390/include/uapi/asm/zcrypt.h |   4 +-
 arch/s390/mm/fault.c|  24 
 arch/s390/pci/pci.c |   4 +-
 arch/s390/pci/pci_clp.c |   2 +-
 drivers/s390/block/dasd.c   |  15 +-
 drivers/s390/block/dasd_proc.c  |   3 +-
 drivers/s390/char/tape_proc.c   |   7 +-
 drivers/s390/cio/qdio_debug.c   |  18 +--
 drivers/s390/cio/vfio_ccw_fsm.c |   7 +-
 drivers/s390/cio/vfio_ccw_private.h |   1 -
 drivers/s390/crypto/ap_bus.c| 277 +---
 drivers/s390/crypto/ap_queue.c  |  23 ++-
 drivers/s390/crypto/vfio_ap_drv.c   |   1 +
 drivers/s390/crypto/zcrypt_error.h  |   2 +
 lib/bust_spinlocks.c|   6 +-
 17 files changed, 222 insertions(+), 204 deletions(-)



Re: [PATCH 2/6] __wr_after_init: write rare for static allocation

2018-12-12 Thread Martin Schwidefsky
On Wed, 5 Dec 2018 15:13:56 -0800
Andy Lutomirski  wrote:

> I added some s390 and powerpc people.
> 
> On Tue, Dec 4, 2018 at 4:18 AM Igor Stoppa  wrote:
> >
> > Implementation of write rare for statically allocated data, located in a
> > specific memory section through the use of the __write_rare label.
> >
> > The basic functions are:
> > - wr_memset(): write rare counterpart of memset()
> > - wr_memcpy(): write rare counterpart of memcpy()
> > - wr_assign(): write rare counterpart of the assignment ('=') operator
> > - wr_rcu_assign_pointer(): write rare counterpart of rcu_assign_pointer()
> >
> > The implementation is based on code from Andy Lutomirski and Nadav Amit
> > for patching the text on x86 [here goes reference to commits, once merged]
> >
> > The modification of write protected data is done through an alternate
> > mapping of the same pages, as writable.
> > This mapping is local to each core and is active only for the duration
> > of each write operation.
> > Local interrupts are disabled, while the alternate mapping is active.
> >
> > In theory, it could introduce a non-predictable delay, in a preemptible
> > system, however the amount of data to be altered is likely to be far
> > smaller than a page.
> >
> > Signed-off-by: Igor Stoppa 
> >
> > CC: Andy Lutomirski 
> > CC: Nadav Amit 
> > CC: Matthew Wilcox 
> > CC: Peter Zijlstra 
> > CC: Kees Cook 
> > CC: Dave Hansen 
> > CC: linux-integr...@vger.kernel.org
> > CC: kernel-harden...@lists.openwall.com
> > CC: linux...@kvack.org
> > CC: linux-kernel@vger.kernel.org
> > ---
> >  include/linux/prmem.h | 133 ++
> >  init/main.c   |   2 +
> >  mm/Kconfig|   4 ++
> >  mm/Makefile   |   1 +
> >  mm/prmem.c| 124 +++
> >  5 files changed, 264 insertions(+)
> >  create mode 100644 include/linux/prmem.h
> >  create mode 100644 mm/prmem.c
> >
> > diff --git a/include/linux/prmem.h b/include/linux/prmem.h
> > new file mode 100644
> > index ..b0131c1f5dc0
> > --- /dev/null
> > +++ b/include/linux/prmem.h
> > @@ -0,0 +1,133 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * prmem.h: Header for memory protection library
> > + *
> > + * (C) Copyright 2018 Huawei Technologies Co. Ltd.
> > + * Author: Igor Stoppa 
> > + *
> > + * Support for:
> > + * - statically allocated write rare data
> > + */
> > +
> > +#ifndef _LINUX_PRMEM_H
> > +#define _LINUX_PRMEM_H
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +/**
> > + * memtst() - test n bytes of the source to match the c value
> > + * @p: beginning of the memory to test
> > + * @c: byte to compare against
> > + * @len: amount of bytes to test
> > + *
> > + * Returns 0 on success, non-zero otherwise.
> > + */
> > +static inline int memtst(void *p, int c, __kernel_size_t len)
> > +{
> > +   __kernel_size_t i;
> > +
> > +   for (i = 0; i < len; i++) {
> > +   u8 d =  *(i + (u8 *)p) - (u8)c;
> > +
> > +   if (unlikely(d))
> > +   return d;
> > +   }
> > +   return 0;
> > +}
> > +
> > +
> > +#ifndef CONFIG_PRMEM
> > +
> > +static inline void *wr_memset(void *p, int c, __kernel_size_t len)
> > +{
> > +   return memset(p, c, len);
> > +}
> > +
> > +static inline void *wr_memcpy(void *p, const void *q, __kernel_size_t size)
> > +{
> > +   return memcpy(p, q, size);
> > +}
> > +
> > +#define wr_assign(var, val)((var) = (val))
> > +
> > +#define wr_rcu_assign_pointer(p, v)\
> > +   rcu_assign_pointer(p, v)
> > +
> > +#else
> > +
> > +enum wr_op_type {
> > +   WR_MEMCPY,
> > +   WR_MEMSET,
> > +   WR_RCU_ASSIGN_PTR,
> > +   WR_OPS_NUMBER,
> > +};
> > +
> > +void *__wr_op(unsigned long dst, unsigned long src, __kernel_size_t len,
> > + enum wr_op_type op);
> > +
> > +/**
> > + * wr_memset() - sets n bytes of the destination to the c value
> > + * @p: beginning of the memory to write to
> > + * @c: byte to replicate
> > + * @len: amount of bytes to copy
> > + *
> > + * Returns true on success, false otherwise.
> > + */
> > +static inline void *wr_memset(void *p, int c, __kernel_size_t len)
> > +{
> > +   return __wr_op((unsigned long)p, (unsigned long)c, len, WR_MEMSET);
> > +}
> > +
> > +/**
> > + * wr_memcpy() - copyes n bytes from source to destination
> > + * @dst: beginning of the memory to write to
> > + * @src: beginning of the memory to read from
> > + * @n_bytes: amount of bytes to copy
> > + *
> > + * Returns pointer to the destination
> > + */
> > +static inline void *wr_memcpy(void *p, const void *q, __kernel_size_t size)
> > +{
> > +   return __wr_op((unsigned long)p, (unsigned long)q, size, WR_MEMCPY);
> > +}
> > +
> > +/**
> > + * wr_assign() - sets a write-rare variable to a specified value
> > + * @var: the variable to set
> > + * @val: the new value
> > + *
> > + 

Re: [PATCH AUTOSEL 4.19 13/73] s390/mm: fix mis-accounting of pgtable_bytes

2018-11-30 Thread Martin Schwidefsky
On Wed, 14 Nov 2018 17:21:07 -0500
Sasha Levin  wrote:

> From: Martin Schwidefsky 
> 
> [ Upstream commit e12e4044aede97974feb7f0ed726a5179a32 ]
> 
> In case a fork or a clone system fails in copy_process and the error
> handling does the mmput() at the bad_fork_cleanup_mm label, the
> following warning messages will appear on the console:
> 
>   BUG: non-zero pgtables_bytes on freeing mm: 16384
> 
> The reason for that is the tricks we play with mm_inc_nr_puds() and
> mm_inc_nr_pmds() in init_new_context().
> 
> A normal 64-bit process has 3 levels of page table, the p4d level and
> the pud level are folded. On process termination the free_pud_range()
> function in mm/memory.c will subtract 16KB from pgtable_bytes with a
> mm_dec_nr_puds() call, but there actually is not really a pud table.
> 
> One issue with this is the fact that pgtable_bytes is usually off
> by a few kilobytes, but the more severe problem is that for a failed
> fork or clone the free_pgtables() function is not called. In this case
> there is no mm_dec_nr_puds() or mm_dec_nr_pmds() that go together with
> the mm_inc_nr_puds() and mm_inc_nr_pmds in init_new_context().
> The pgtable_bytes will be off by 16384 or 32768 bytes and we get the
> BUG message. The message itself is purely cosmetic, but annoying.
> 
> To fix this override the mm_pmd_folded, mm_pud_folded and mm_p4d_folded
> function to check for the true size of the address space.
> 
> Reported-by: Li Wang 
> Tested-by: Li Wang 
> Signed-off-by: Martin Schwidefsky 
> Signed-off-by: Sasha Levin 

The fix for the page table accounting problem initially had four git commits:
a8874e7e8a "mm: make the __PAGETABLE_PxD_FOLDED defines non-empty"
1071fc5779 "mm: introduce mm_[p4d|pud|pmd]_folded"
6d212db119 "mm: add mm_pxd_folded checks to pgtable_bytes accounting functions"
e12e4044ae "s390/mm: fix mis-accounting of pgtable_bytes"

In the meantime a fifth git commit is needed to really fix it:
814cedbc0b "s390/mm: correct pgtable_bytes on page table downgrade"

The autoselect mechanism only pick up one of the initial four patches.
Not good. May I ask *WHY* this patch was picked out of the blue?

Same for 4.18-stable.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH AUTOSEL 4.19 13/73] s390/mm: fix mis-accounting of pgtable_bytes

2018-11-30 Thread Martin Schwidefsky
On Wed, 14 Nov 2018 17:21:07 -0500
Sasha Levin  wrote:

> From: Martin Schwidefsky 
> 
> [ Upstream commit e12e4044aede97974feb7f0ed726a5179a32 ]
> 
> In case a fork or a clone system fails in copy_process and the error
> handling does the mmput() at the bad_fork_cleanup_mm label, the
> following warning messages will appear on the console:
> 
>   BUG: non-zero pgtables_bytes on freeing mm: 16384
> 
> The reason for that is the tricks we play with mm_inc_nr_puds() and
> mm_inc_nr_pmds() in init_new_context().
> 
> A normal 64-bit process has 3 levels of page table, the p4d level and
> the pud level are folded. On process termination the free_pud_range()
> function in mm/memory.c will subtract 16KB from pgtable_bytes with a
> mm_dec_nr_puds() call, but there actually is not really a pud table.
> 
> One issue with this is the fact that pgtable_bytes is usually off
> by a few kilobytes, but the more severe problem is that for a failed
> fork or clone the free_pgtables() function is not called. In this case
> there is no mm_dec_nr_puds() or mm_dec_nr_pmds() that go together with
> the mm_inc_nr_puds() and mm_inc_nr_pmds in init_new_context().
> The pgtable_bytes will be off by 16384 or 32768 bytes and we get the
> BUG message. The message itself is purely cosmetic, but annoying.
> 
> To fix this override the mm_pmd_folded, mm_pud_folded and mm_p4d_folded
> function to check for the true size of the address space.
> 
> Reported-by: Li Wang 
> Tested-by: Li Wang 
> Signed-off-by: Martin Schwidefsky 
> Signed-off-by: Sasha Levin 

The fix for the page table accounting problem initially had four git commits:
a8874e7e8a "mm: make the __PAGETABLE_PxD_FOLDED defines non-empty"
1071fc5779 "mm: introduce mm_[p4d|pud|pmd]_folded"
6d212db119 "mm: add mm_pxd_folded checks to pgtable_bytes accounting functions"
e12e4044ae "s390/mm: fix mis-accounting of pgtable_bytes"

In the meantime a fifth git commit is needed to really fix it:
814cedbc0b "s390/mm: correct pgtable_bytes on page table downgrade"

The autoselect mechanism only pick up one of the initial four patches.
Not good. May I ask *WHY* this patch was picked out of the blue?

Same for 4.18-stable.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



[GIT PULL] s390 patches for 4.20 #3

2018-11-29 Thread Martin Schwidefsky
Hi Linus,

please pull s390 fixes for 4.20-rc5

The following changes since commit ccda4af0f4b92f7b4c308d3acc262f4a7e3affad:

  Linux 4.20-rc2 (2018-11-11 17:12:31 -0600)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git tags/s390-4.20-3

for you to fetch changes up to 814cedbc0b78d75e335c96da9b9391142eab5600:

  s390/mm: correct pgtable_bytes on page table downgrade (2018-11-27 14:07:12 
+0100)


s390 updates for 4.20-rc5

 - Add two missing kfree calls on error paths in the vfio-ccw code

 - Make sure that all data structures of a mediated vfio-ccw device are
   initialized before registering it

 - Fix a sparse warning in vfio-ccw

 - A followup patch for the pgtable_bytes accounting, the page table
   downgrade for compat processes missed a mm_dec_nr_pmds()

 - Reject sampling requests in the PMU init function of the CPU
   measurement counter facility

 - With the vfio AP driver an AP queue needs to be reset on every device
   probe as the alternative driver could have modified the device state


Eric Farman (2):
  s390/cio: Fix cleanup of pfn_array alloc failure
  s390/cio: Fix cleanup when unsupported IDA format is used

Harald Freudenberger (1):
  s390/zcrypt: reinit ap queue state machine during device probe

Martin Schwidefsky (2):
  Merge tag 'vfio-ccw-20181113' of 
git://git.kernel.org/.../kvms390/vfio-ccw into fixes
  s390/mm: correct pgtable_bytes on page table downgrade

Pierre Morel (1):
  vfio: ccw: Register mediated device once all structures are initialized

Sebastian Ott (1):
  s390/cio: make vfio_ccw_io_region static

Thomas Richter (1):
  s390/cpum_cf: Reject request for sampling in event initialization

 arch/s390/kernel/perf_cpum_cf.c|  2 ++
 arch/s390/mm/pgalloc.c |  1 +
 drivers/s390/cio/vfio_ccw_cp.c |  6 --
 drivers/s390/cio/vfio_ccw_drv.c| 10 +-
 drivers/s390/crypto/ap_bus.c   |  8 
 drivers/s390/crypto/ap_bus.h   |  1 +
 drivers/s390/crypto/ap_queue.c | 15 +++
 drivers/s390/crypto/zcrypt_cex2a.c |  1 -
 drivers/s390/crypto/zcrypt_cex2c.c |  1 -
 drivers/s390/crypto/zcrypt_cex4.c  |  1 -
 10 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 74091fd..d5523ad 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -346,6 +346,8 @@ static int __hw_perf_event_init(struct perf_event *event)
break;
 
case PERF_TYPE_HARDWARE:
+   if (is_sampling_event(event))   /* No sampling support */
+   return -ENOENT;
ev = attr->config;
/* Count user space (problem-state) only */
if (!attr->exclude_user && attr->exclude_kernel) {
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 814f265..6791562 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -131,6 +131,7 @@ void crst_table_downgrade(struct mm_struct *mm)
}
 
pgd = mm->pgd;
+   mm_dec_nr_pmds(mm);
mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
mm->context.asce_limit = _REGION3_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index fd77e46..70a006ba 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -387,8 +387,10 @@ static int ccwchain_calc_length(u64 iova, struct 
channel_program *cp)
 * orb specified one of the unsupported formats, we defer
 * checking for IDAWs in unsupported formats to here.
 */
-   if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw))
+   if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw)) {
+   kfree(p);
return -EOPNOTSUPP;
+   }
 
if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw)))
break;
@@ -528,7 +530,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
 
ret = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, ccw->cda, ccw->count);
if (ret < 0)
-   goto out_init;
+   goto out_unpin;
 
/* Translate this direct ccw to a idal ccw. */
idaws = kcalloc(ret, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index f47d16b..a10cec0 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -22,7 +22,7 @@
 #include "vfio_ccw_private.h"
 
 struct workqueue_struct *vfio_ccw_work_q;
-struct

[GIT PULL] s390 patches for 4.20 #3

2018-11-29 Thread Martin Schwidefsky
Hi Linus,

please pull s390 fixes for 4.20-rc5

The following changes since commit ccda4af0f4b92f7b4c308d3acc262f4a7e3affad:

  Linux 4.20-rc2 (2018-11-11 17:12:31 -0600)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git tags/s390-4.20-3

for you to fetch changes up to 814cedbc0b78d75e335c96da9b9391142eab5600:

  s390/mm: correct pgtable_bytes on page table downgrade (2018-11-27 14:07:12 
+0100)


s390 updates for 4.20-rc5

 - Add two missing kfree calls on error paths in the vfio-ccw code

 - Make sure that all data structures of a mediated vfio-ccw device are
   initialized before registering it

 - Fix a sparse warning in vfio-ccw

 - A followup patch for the pgtable_bytes accounting, the page table
   downgrade for compat processes missed a mm_dec_nr_pmds()

 - Reject sampling requests in the PMU init function of the CPU
   measurement counter facility

 - With the vfio AP driver an AP queue needs to be reset on every device
   probe as the alternative driver could have modified the device state


Eric Farman (2):
  s390/cio: Fix cleanup of pfn_array alloc failure
  s390/cio: Fix cleanup when unsupported IDA format is used

Harald Freudenberger (1):
  s390/zcrypt: reinit ap queue state machine during device probe

Martin Schwidefsky (2):
  Merge tag 'vfio-ccw-20181113' of 
git://git.kernel.org/.../kvms390/vfio-ccw into fixes
  s390/mm: correct pgtable_bytes on page table downgrade

Pierre Morel (1):
  vfio: ccw: Register mediated device once all structures are initialized

Sebastian Ott (1):
  s390/cio: make vfio_ccw_io_region static

Thomas Richter (1):
  s390/cpum_cf: Reject request for sampling in event initialization

 arch/s390/kernel/perf_cpum_cf.c|  2 ++
 arch/s390/mm/pgalloc.c |  1 +
 drivers/s390/cio/vfio_ccw_cp.c |  6 --
 drivers/s390/cio/vfio_ccw_drv.c| 10 +-
 drivers/s390/crypto/ap_bus.c   |  8 
 drivers/s390/crypto/ap_bus.h   |  1 +
 drivers/s390/crypto/ap_queue.c | 15 +++
 drivers/s390/crypto/zcrypt_cex2a.c |  1 -
 drivers/s390/crypto/zcrypt_cex2c.c |  1 -
 drivers/s390/crypto/zcrypt_cex4.c  |  1 -
 10 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 74091fd..d5523ad 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -346,6 +346,8 @@ static int __hw_perf_event_init(struct perf_event *event)
break;
 
case PERF_TYPE_HARDWARE:
+   if (is_sampling_event(event))   /* No sampling support */
+   return -ENOENT;
ev = attr->config;
/* Count user space (problem-state) only */
if (!attr->exclude_user && attr->exclude_kernel) {
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 814f265..6791562 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -131,6 +131,7 @@ void crst_table_downgrade(struct mm_struct *mm)
}
 
pgd = mm->pgd;
+   mm_dec_nr_pmds(mm);
mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
mm->context.asce_limit = _REGION3_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index fd77e46..70a006ba 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -387,8 +387,10 @@ static int ccwchain_calc_length(u64 iova, struct 
channel_program *cp)
 * orb specified one of the unsupported formats, we defer
 * checking for IDAWs in unsupported formats to here.
 */
-   if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw))
+   if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw)) {
+   kfree(p);
return -EOPNOTSUPP;
+   }
 
if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw)))
break;
@@ -528,7 +530,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
 
ret = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, ccw->cda, ccw->count);
if (ret < 0)
-   goto out_init;
+   goto out_unpin;
 
/* Translate this direct ccw to a idal ccw. */
idaws = kcalloc(ret, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index f47d16b..a10cec0 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -22,7 +22,7 @@
 #include "vfio_ccw_private.h"
 
 struct workqueue_struct *vfio_ccw_work_q;
-struct

Re: [PATCH 3/3] s390/mm: fix mis-accounting of pgtable_bytes

2018-11-27 Thread Martin Schwidefsky
On Tue, 27 Nov 2018 08:34:12 +0100
Heiko Carstens  wrote:

> On Wed, Oct 31, 2018 at 01:36:23PM +0300, Kirill A. Shutemov wrote:
> > On Wed, Oct 31, 2018 at 11:09:44AM +0100, Heiko Carstens wrote:  
> > > On Wed, Oct 31, 2018 at 07:31:49AM +0100, Martin Schwidefsky wrote:  
> > > > Thanks for testing. Unfortunately Heiko reported another issue yesterday
> > > > with the patch applied. This time the other way around:
> > > > 
> > > > BUG: non-zero pgtables_bytes on freeing mm: -16384
> > > > 
> > > > I am trying to understand how this can happen. For now I would like to
> > > > keep the patch on hold in case they need another change.  
> > > 
> > > FWIW, Kirill: is there a reason why this "BUG:" output is done with
> > > pr_alert() and not with VM_BUG_ON() or one of the WARN*() variants?
> > > 
> > > That would to get more information with DEBUG_VM and / or
> > > panic_on_warn=1 set. At least for automated testing it would be nice
> > > to have such triggers.  
> > 
> > Stack trace is not helpful there. It will always show the exit path which
> > is useless.  
> 
> So, even with the updated version of these patches I can flood dmesg
> and the console with
> 
> BUG: non-zero pgtables_bytes on freeing mm: 16384
> 
> messages with this complex reproducer on s390:
> 
> echo "void main(void) {}" | gcc -m31 -xc -o compat - && ./compat

Forgot a hunk in the fix.. I claim not enough coffee :-/
Patch is queued and I will send a please pull by the end of the week.
--
>From c0499f2aa853939984ecaf0d393012486e56c7ce Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky 
Date: Tue, 27 Nov 2018 14:04:04 +0100
Subject: [PATCH] s390/mm: correct pgtable_bytes on page table downgrade

The downgrade of a page table from 3 levels to 2 levels for a 31-bit compat
process removes a pmd table which has to be counted against pgtable_bytes.

Signed-off-by: Martin Schwidefsky 
---
 arch/s390/mm/pgalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 814f26520aa2..6791562779ee 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -131,6 +131,7 @@ void crst_table_downgrade(struct mm_struct *mm)
}
 
pgd = mm->pgd;
+   mm_dec_nr_pmds(mm);
mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
mm->context.asce_limit = _REGION3_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-- 
2.16.4
-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 3/3] s390/mm: fix mis-accounting of pgtable_bytes

2018-11-27 Thread Martin Schwidefsky
On Tue, 27 Nov 2018 08:34:12 +0100
Heiko Carstens  wrote:

> On Wed, Oct 31, 2018 at 01:36:23PM +0300, Kirill A. Shutemov wrote:
> > On Wed, Oct 31, 2018 at 11:09:44AM +0100, Heiko Carstens wrote:  
> > > On Wed, Oct 31, 2018 at 07:31:49AM +0100, Martin Schwidefsky wrote:  
> > > > Thanks for testing. Unfortunately Heiko reported another issue yesterday
> > > > with the patch applied. This time the other way around:
> > > > 
> > > > BUG: non-zero pgtables_bytes on freeing mm: -16384
> > > > 
> > > > I am trying to understand how this can happen. For now I would like to
> > > > keep the patch on hold in case they need another change.  
> > > 
> > > FWIW, Kirill: is there a reason why this "BUG:" output is done with
> > > pr_alert() and not with VM_BUG_ON() or one of the WARN*() variants?
> > > 
> > > That would to get more information with DEBUG_VM and / or
> > > panic_on_warn=1 set. At least for automated testing it would be nice
> > > to have such triggers.  
> > 
> > Stack trace is not helpful there. It will always show the exit path which
> > is useless.  
> 
> So, even with the updated version of these patches I can flood dmesg
> and the console with
> 
> BUG: non-zero pgtables_bytes on freeing mm: 16384
> 
> messages with this complex reproducer on s390:
> 
> echo "void main(void) {}" | gcc -m31 -xc -o compat - && ./compat

Forgot a hunk in the fix.. I claim not enough coffee :-/
Patch is queued and I will send a please pull by the end of the week.
--
>From c0499f2aa853939984ecaf0d393012486e56c7ce Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky 
Date: Tue, 27 Nov 2018 14:04:04 +0100
Subject: [PATCH] s390/mm: correct pgtable_bytes on page table downgrade

The downgrade of a page table from 3 levels to 2 levels for a 31-bit compat
process removes a pmd table which has to be counted against pgtable_bytes.

Signed-off-by: Martin Schwidefsky 
---
 arch/s390/mm/pgalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 814f26520aa2..6791562779ee 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -131,6 +131,7 @@ void crst_table_downgrade(struct mm_struct *mm)
}
 
pgd = mm->pgd;
+   mm_dec_nr_pmds(mm);
mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
mm->context.asce_limit = _REGION3_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-- 
2.16.4
-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH] s390: Remove obsolete bust_spinlock() implementation

2018-11-26 Thread Martin Schwidefsky
On Fri, 23 Nov 2018 08:02:12 +0100
Heiko Carstens  wrote:

> On Fri, Nov 23, 2018 at 11:17:48AM +0900, Sergey Senozhatsky wrote:
> > On (11/22/18 15:15), Petr Mladek wrote:  
> > > The commit cefc8be82403cf ("Consolidate bust_spinlocks()") kept
> > > the s390-specific implementation because of the absence of CONFIG_VT.
> > > In fact, the only difference was calling console_unblank() instead of
> > > unblank_screen().
> > > 
> > > The common implementation in lib/bust_spinlocks.c started to call
> > > unblank_screen() explicitly since the commit b61312d353da187
> > > ("oops handling: ensure that any oops is flushed to the mtdoops
> > > console").
> > > 
> > > As a result, the custom implementation is not longer necessary.
> > > And we could get all the other improvements of the common
> > > implementation for free.  
> > 
> > I believe I sent a similar patch several weeks ago and it's
> > in s390 patch queue as of now, waiting for the next merge
> > window.
> > 
> > lkml.kernel.org/r/20181025081108.GB26561@osiris  
> 
> Yes, it will be added soon to the features branch of the
> s390/linux.git repository on kernel.org and then hit linux-next.

The patch is now queued for the next merge window.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH] s390: Remove obsolete bust_spinlock() implementation

2018-11-26 Thread Martin Schwidefsky
On Fri, 23 Nov 2018 08:02:12 +0100
Heiko Carstens  wrote:

> On Fri, Nov 23, 2018 at 11:17:48AM +0900, Sergey Senozhatsky wrote:
> > On (11/22/18 15:15), Petr Mladek wrote:  
> > > The commit cefc8be82403cf ("Consolidate bust_spinlocks()") kept
> > > the s390-specific implementation because of the absence of CONFIG_VT.
> > > In fact, the only difference was calling console_unblank() instead of
> > > unblank_screen().
> > > 
> > > The common implementation in lib/bust_spinlocks.c started to call
> > > unblank_screen() explicitly since the commit b61312d353da187
> > > ("oops handling: ensure that any oops is flushed to the mtdoops
> > > console").
> > > 
> > > As a result, the custom implementation is not longer necessary.
> > > And we could get all the other improvements of the common
> > > implementation for free.  
> > 
> > I believe I sent a similar patch several weeks ago and it's
> > in s390 patch queue as of now, waiting for the next merge
> > window.
> > 
> > lkml.kernel.org/r/20181025081108.GB26561@osiris  
> 
> Yes, it will be added soon to the features branch of the
> s390/linux.git repository on kernel.org and then hit linux-next.

The patch is now queued for the next merge window.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [for-next][PATCH 11/18] s390/function_graph: Simplify with function_graph_entry()

2018-11-21 Thread Martin Schwidefsky
On Wed, 21 Nov 2018 19:28:12 -0500
Steven Rostedt  wrote:

> From: "Steven Rostedt (VMware)" 
> 
> The function_graph_entry() function does the work of calling the function
> graph hook function and the management of the shadow stack, simplifying the
> work done in the architecture dependent prepare_ftrace_return().
> 
> Have s390 use the new code, and remove the shadow stack management as well as
> having to set up the trace structure.
> 
> This is needed to prepare for a fix of a design bug on how the curr_ret_stack
> is used.
> 
> Cc: Martin Schwidefsky 
> Cc: Heiko Carstens 
> Cc: Julian Wiedmann 
> Cc: linux-s...@vger.kernel.org
> Cc: sta...@kernel.org
> Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling 
> trace return callback")
> Signed-off-by: Steven Rostedt (VMware) 

A quick test showed that the patch series works fine on s390.
Acked-by: Martin Schwidefsky 

> ---
>  arch/s390/kernel/ftrace.c | 13 ++---
>  1 file changed, 2 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
> index 84be7f02d0c2..39b13d71a8fe 100644
> --- a/arch/s390/kernel/ftrace.c
> +++ b/arch/s390/kernel/ftrace.c
> @@ -203,22 +203,13 @@ device_initcall(ftrace_plt_init);
>   */
>  unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
>  {
> - struct ftrace_graph_ent trace;
> -
>   if (unlikely(ftrace_graph_is_dead()))
>   goto out;
>   if (unlikely(atomic_read(>tracing_graph_pause)))
>   goto out;
>   ip -= MCOUNT_INSN_SIZE;
> - trace.func = ip;
> - trace.depth = current->curr_ret_stack + 1;
> - /* Only trace if the calling function expects to. */
> - if (!ftrace_graph_entry())
> - goto out;
> - if (ftrace_push_return_trace(parent, ip, , 0,
> -  NULL) == -EBUSY)
> - goto out;
> - parent = (unsigned long) return_to_handler;
> + if (!function_graph_enter(parent, ip, 0, NULL))
> + parent = (unsigned long) return_to_handler;
>  out:
>   return parent;
>  }


-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [for-next][PATCH 11/18] s390/function_graph: Simplify with function_graph_entry()

2018-11-21 Thread Martin Schwidefsky
On Wed, 21 Nov 2018 19:28:12 -0500
Steven Rostedt  wrote:

> From: "Steven Rostedt (VMware)" 
> 
> The function_graph_entry() function does the work of calling the function
> graph hook function and the management of the shadow stack, simplifying the
> work done in the architecture dependent prepare_ftrace_return().
> 
> Have s390 use the new code, and remove the shadow stack management as well as
> having to set up the trace structure.
> 
> This is needed to prepare for a fix of a design bug on how the curr_ret_stack
> is used.
> 
> Cc: Martin Schwidefsky 
> Cc: Heiko Carstens 
> Cc: Julian Wiedmann 
> Cc: linux-s...@vger.kernel.org
> Cc: sta...@kernel.org
> Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling 
> trace return callback")
> Signed-off-by: Steven Rostedt (VMware) 

A quick test showed that the patch series works fine on s390.
Acked-by: Martin Schwidefsky 

> ---
>  arch/s390/kernel/ftrace.c | 13 ++---
>  1 file changed, 2 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
> index 84be7f02d0c2..39b13d71a8fe 100644
> --- a/arch/s390/kernel/ftrace.c
> +++ b/arch/s390/kernel/ftrace.c
> @@ -203,22 +203,13 @@ device_initcall(ftrace_plt_init);
>   */
>  unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
>  {
> - struct ftrace_graph_ent trace;
> -
>   if (unlikely(ftrace_graph_is_dead()))
>   goto out;
>   if (unlikely(atomic_read(>tracing_graph_pause)))
>   goto out;
>   ip -= MCOUNT_INSN_SIZE;
> - trace.func = ip;
> - trace.depth = current->curr_ret_stack + 1;
> - /* Only trace if the calling function expects to. */
> - if (!ftrace_graph_entry())
> - goto out;
> - if (ftrace_push_return_trace(parent, ip, , 0,
> -  NULL) == -EBUSY)
> - goto out;
> - parent = (unsigned long) return_to_handler;
> + if (!function_graph_enter(parent, ip, 0, NULL))
> + parent = (unsigned long) return_to_handler;
>  out:
>   return parent;
>  }


-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: crashkernel=512M is no longer working on this aarch64 server

2018-11-11 Thread Martin Schwidefsky
On Sun, 11 Nov 2018 08:36:09 -0500
Qian Cai  wrote:

> > On Nov 11, 2018, at 6:35 AM, Martin Schwidefsky  
> > wrote:
> > 
> > On Sat, 10 Nov 2018 23:41:34 -0500
> > Qian Cai  wrote:
> >   
> >> It was broken somewhere between b00d209241ff and 3541833fd1f2.
> >> 
> >> [0.00] cannot allocate crashkernel (size:0x2000)
> >> 
> >> Where a good one looks like this,
> >> 
> >> [0.00] crashkernel reserved: 0x0860 - 
> >> 0x2860 (512 MB)
> >> 
> >> Some commits look more suspicious than others.
> >> 
> >>  mm: add mm_pxd_folded checks to pgtable_bytes accounting functions
> >>  mm: introduce mm_[p4d|pud|pmd]_folded
> >>  mm: make the __PAGETABLE_PxD_FOLDED defines non-empty  
> > 
> > The intent of these three patches is to add extra checks to the
> > pgtable_bytes accounting function. If applied incorrectly the expected
> > result would be warnings like this:
> >  BUG: non-zero pgtables_bytes on freeing mm: 16384
> > 
> > The change Linus worried about affects the __PAGETABLE_PxD_FOLDED defines.
> > These defines are used with #ifdef, #ifndef, and __is_defined() for the
> > new mm_p?d_folded() macros. I can not see how this would make a difference
> > for your iomem setup.
> >   
> >> # diff -u ../iomem.good.txt ../iomem.bad.txt 
> >> --- ../iomem.good.txt  2018-11-10 22:28:20.092614398 -0500
> >> +++ ../iomem.bad.txt   2018-11-10 20:39:54.930294479 -0500
> >> @@ -1,9 +1,8 @@
> >> -3965 : System RAM
> >>   0008-018c : Kernel code
> >> -  018d-020a : reserved
> >> -  020b-045a : Kernel data
> >> -  0860-285f : Crash kernel
> >> -  2873-2d5a : reserved
> >> +  018d-0762 : reserved
> >> +  0763-09b2 : Kernel data
> >> +  231b-2802 : reserved
> >>   30ec-30ec : reserved
> >>   3566-3965 : reserved
> >> 3966-396f : reserved
> >> @@ -127,7 +126,7 @@
> >>   7c520-7c520 : 0004:48:00.0
> >> 104000-17fbff : System RAM
> >>   13fbfd-13fdfd : reserved
> >> -  16fba8-17fbfd : reserved
> >> +  16fafd-17fbfd : reserved
> >>   17fbfe-17fbff : reserved
> >> 18-1ffbff : System RAM
> >>   1bfbff-1bfdfe : reserved  
> > 
> > The easiest way to verify if the three commits have something to do with 
> > your
> > problem is to revert them and run your test. Can you do that please ?  
> Yes, you are right. Those commits have nothing to do with the problem. I 
> should
> realized it earlier as those are virtual memory vs physical memory. Sorry for 
> the
> nosie.
> 
> It turned out I made a wrong assumption that if kmemleak is disabled by 
> default,
> there should be no memory reserved for kmemleak at all which is not the case.
> 
> CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=60
> CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y
> 
> Even without kmemleak=on in the kernel cmdline, it still reserve early log 
> memory
> which causes not enough memory for crashkernel.
> 
> Since there seems no way to turn kmemleak on later after boot, is there any
> reasons for the current behavior? 

Well seems like you do have CONFIG_DEBUG_KMEMLEAK=y in your config. The code
contains data structures for the case that you want to use the kmemleak checker.
The presence of these structures will change the sizes. The last commit in 
regard
to the 'early_log' buffer has been from 2009 with this change:

@@ -232,8 +232,9 @@ struct early_log {
 };
 
 /* early logging buffer and current position */
-static struct early_log early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE];
-static int crt_early_log;
+static struct early_log
+   early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE] __initdata;
+static int crt_early_log __initdata;
 
 static void kmemleak_disable(void);
 
The current behavior is imho nothing new.

Would it be possible to disable CONFIG_DEBUG_KMEMLEAK for your kdump kernel?
That seems like the simplest solution.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: crashkernel=512M is no longer working on this aarch64 server

2018-11-11 Thread Martin Schwidefsky
On Sun, 11 Nov 2018 08:36:09 -0500
Qian Cai  wrote:

> > On Nov 11, 2018, at 6:35 AM, Martin Schwidefsky  
> > wrote:
> > 
> > On Sat, 10 Nov 2018 23:41:34 -0500
> > Qian Cai  wrote:
> >   
> >> It was broken somewhere between b00d209241ff and 3541833fd1f2.
> >> 
> >> [0.00] cannot allocate crashkernel (size:0x2000)
> >> 
> >> Where a good one looks like this,
> >> 
> >> [0.00] crashkernel reserved: 0x0860 - 
> >> 0x2860 (512 MB)
> >> 
> >> Some commits look more suspicious than others.
> >> 
> >>  mm: add mm_pxd_folded checks to pgtable_bytes accounting functions
> >>  mm: introduce mm_[p4d|pud|pmd]_folded
> >>  mm: make the __PAGETABLE_PxD_FOLDED defines non-empty  
> > 
> > The intent of these three patches is to add extra checks to the
> > pgtable_bytes accounting function. If applied incorrectly the expected
> > result would be warnings like this:
> >  BUG: non-zero pgtables_bytes on freeing mm: 16384
> > 
> > The change Linus worried about affects the __PAGETABLE_PxD_FOLDED defines.
> > These defines are used with #ifdef, #ifndef, and __is_defined() for the
> > new mm_p?d_folded() macros. I can not see how this would make a difference
> > for your iomem setup.
> >   
> >> # diff -u ../iomem.good.txt ../iomem.bad.txt 
> >> --- ../iomem.good.txt  2018-11-10 22:28:20.092614398 -0500
> >> +++ ../iomem.bad.txt   2018-11-10 20:39:54.930294479 -0500
> >> @@ -1,9 +1,8 @@
> >> -3965 : System RAM
> >>   0008-018c : Kernel code
> >> -  018d-020a : reserved
> >> -  020b-045a : Kernel data
> >> -  0860-285f : Crash kernel
> >> -  2873-2d5a : reserved
> >> +  018d-0762 : reserved
> >> +  0763-09b2 : Kernel data
> >> +  231b-2802 : reserved
> >>   30ec-30ec : reserved
> >>   3566-3965 : reserved
> >> 3966-396f : reserved
> >> @@ -127,7 +126,7 @@
> >>   7c520-7c520 : 0004:48:00.0
> >> 104000-17fbff : System RAM
> >>   13fbfd-13fdfd : reserved
> >> -  16fba8-17fbfd : reserved
> >> +  16fafd-17fbfd : reserved
> >>   17fbfe-17fbff : reserved
> >> 18-1ffbff : System RAM
> >>   1bfbff-1bfdfe : reserved  
> > 
> > The easiest way to verify if the three commits have something to do with 
> > your
> > problem is to revert them and run your test. Can you do that please ?  
> Yes, you are right. Those commits have nothing to do with the problem. I 
> should
> realized it earlier as those are virtual memory vs physical memory. Sorry for 
> the
> nosie.
> 
> It turned out I made a wrong assumption that if kmemleak is disabled by 
> default,
> there should be no memory reserved for kmemleak at all which is not the case.
> 
> CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=60
> CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y
> 
> Even without kmemleak=on in the kernel cmdline, it still reserve early log 
> memory
> which causes not enough memory for crashkernel.
> 
> Since there seems no way to turn kmemleak on later after boot, is there any
> reasons for the current behavior? 

Well seems like you do have CONFIG_DEBUG_KMEMLEAK=y in your config. The code
contains data structures for the case that you want to use the kmemleak checker.
The presence of these structures will change the sizes. The last commit in 
regard
to the 'early_log' buffer has been from 2009 with this change:

@@ -232,8 +232,9 @@ struct early_log {
 };
 
 /* early logging buffer and current position */
-static struct early_log early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE];
-static int crt_early_log;
+static struct early_log
+   early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE] __initdata;
+static int crt_early_log __initdata;
 
 static void kmemleak_disable(void);
 
The current behavior is imho nothing new.

Would it be possible to disable CONFIG_DEBUG_KMEMLEAK for your kdump kernel?
That seems like the simplest solution.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: crashkernel=512M is no longer working on this aarch64 server

2018-11-11 Thread Martin Schwidefsky
On Sat, 10 Nov 2018 23:41:34 -0500
Qian Cai  wrote:

> It was broken somewhere between b00d209241ff and 3541833fd1f2.
> 
> [0.00] cannot allocate crashkernel (size:0x2000)
> 
> Where a good one looks like this,
> 
> [0.00] crashkernel reserved: 0x0860 - 0x2860 
> (512 MB)
> 
> Some commits look more suspicious than others.
> 
>   mm: add mm_pxd_folded checks to pgtable_bytes accounting functions
>   mm: introduce mm_[p4d|pud|pmd]_folded
>   mm: make the __PAGETABLE_PxD_FOLDED defines non-empty

The intent of these three patches is to add extra checks to the
pgtable_bytes accounting function. If applied incorrectly the expected
result would be warnings like this:
  BUG: non-zero pgtables_bytes on freeing mm: 16384

The change Linus worried about affects the __PAGETABLE_PxD_FOLDED defines.
These defines are used with #ifdef, #ifndef, and __is_defined() for the
new mm_p?d_folded() macros. I can not see how this would make a difference
for your iomem setup.

> # diff -u ../iomem.good.txt ../iomem.bad.txt 
> --- ../iomem.good.txt 2018-11-10 22:28:20.092614398 -0500
> +++ ../iomem.bad.txt  2018-11-10 20:39:54.930294479 -0500
> @@ -1,9 +1,8 @@
>  -3965 : System RAM
>0008-018c : Kernel code
> -  018d-020a : reserved
> -  020b-045a : Kernel data
> -  0860-285f : Crash kernel
> -  2873-2d5a : reserved
> +  018d-0762 : reserved
> +  0763-09b2 : Kernel data
> +  231b-2802 : reserved
>30ec-30ec : reserved
>3566-3965 : reserved
>  3966-396f : reserved
> @@ -127,7 +126,7 @@
>7c520-7c520 : 0004:48:00.0
>  104000-17fbff : System RAM
>13fbfd-13fdfd : reserved
> -  16fba8-17fbfd : reserved
> +  16fafd-17fbfd : reserved
>17fbfe-17fbff : reserved
>  18-1ffbff : System RAM
>1bfbff-1bfdfe : reserved

The easiest way to verify if the three commits have something to do with your
problem is to revert them and run your test. Can you do that please ?

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: crashkernel=512M is no longer working on this aarch64 server

2018-11-11 Thread Martin Schwidefsky
On Sat, 10 Nov 2018 23:41:34 -0500
Qian Cai  wrote:

> It was broken somewhere between b00d209241ff and 3541833fd1f2.
> 
> [0.00] cannot allocate crashkernel (size:0x2000)
> 
> Where a good one looks like this,
> 
> [0.00] crashkernel reserved: 0x0860 - 0x2860 
> (512 MB)
> 
> Some commits look more suspicious than others.
> 
>   mm: add mm_pxd_folded checks to pgtable_bytes accounting functions
>   mm: introduce mm_[p4d|pud|pmd]_folded
>   mm: make the __PAGETABLE_PxD_FOLDED defines non-empty

The intent of these three patches is to add extra checks to the
pgtable_bytes accounting function. If applied incorrectly the expected
result would be warnings like this:
  BUG: non-zero pgtables_bytes on freeing mm: 16384

The change Linus worried about affects the __PAGETABLE_PxD_FOLDED defines.
These defines are used with #ifdef, #ifndef, and __is_defined() for the
new mm_p?d_folded() macros. I can not see how this would make a difference
for your iomem setup.

> # diff -u ../iomem.good.txt ../iomem.bad.txt 
> --- ../iomem.good.txt 2018-11-10 22:28:20.092614398 -0500
> +++ ../iomem.bad.txt  2018-11-10 20:39:54.930294479 -0500
> @@ -1,9 +1,8 @@
>  -3965 : System RAM
>0008-018c : Kernel code
> -  018d-020a : reserved
> -  020b-045a : Kernel data
> -  0860-285f : Crash kernel
> -  2873-2d5a : reserved
> +  018d-0762 : reserved
> +  0763-09b2 : Kernel data
> +  231b-2802 : reserved
>30ec-30ec : reserved
>3566-3965 : reserved
>  3966-396f : reserved
> @@ -127,7 +126,7 @@
>7c520-7c520 : 0004:48:00.0
>  104000-17fbff : System RAM
>13fbfd-13fdfd : reserved
> -  16fba8-17fbfd : reserved
> +  16fafd-17fbfd : reserved
>17fbfe-17fbff : reserved
>  18-1ffbff : System RAM
>1bfbff-1bfdfe : reserved

The easiest way to verify if the three commits have something to do with your
problem is to revert them and run your test. Can you do that please ?

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [GIT PULL] s390 patches for 4.20 #2

2018-11-09 Thread Martin Schwidefsky
On Fri, 9 Nov 2018 06:39:07 -0600
Linus Torvalds  wrote:

> On Fri, Nov 9, 2018 at 1:14 AM Martin Schwidefsky
>  wrote:
> >
> > s390 updates for 4.20-rc2  
> 
> Pulled.
> 
> >  - A fix for the pgtable_bytes misaccounting on s390. The patch changes
> >common code part in regard to page table folding and adds extra
> >checks to mm_[inc|dec]_nr_[pmds|puds].  
> 
> Ugh. This is somewhat invasive, I worry  that some header include or
> architecture doesn't pick up on the subtle __PAGETABLE_XYZ_FOLDED
> things (if you don't get the includes, the mm_xyz_folded() maros will
> be mis-defined.
> 
> Has this been in linux-next or any other wide testing? The changes
> aren't _new_, but...

Just checked linux-next, the first version that picked up the patches
is 2018-11-19. The one dated 2018-10-19 does not have them.

Bad timing, no wider testing has been done. The bug itself shows up
as a false warning, nothing really breaks. I would understand if you
prefer not to have them in the official tree at this time.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [GIT PULL] s390 patches for 4.20 #2

2018-11-09 Thread Martin Schwidefsky
On Fri, 9 Nov 2018 06:39:07 -0600
Linus Torvalds  wrote:

> On Fri, Nov 9, 2018 at 1:14 AM Martin Schwidefsky
>  wrote:
> >
> > s390 updates for 4.20-rc2  
> 
> Pulled.
> 
> >  - A fix for the pgtable_bytes misaccounting on s390. The patch changes
> >common code part in regard to page table folding and adds extra
> >checks to mm_[inc|dec]_nr_[pmds|puds].  
> 
> Ugh. This is somewhat invasive, I worry  that some header include or
> architecture doesn't pick up on the subtle __PAGETABLE_XYZ_FOLDED
> things (if you don't get the includes, the mm_xyz_folded() maros will
> be mis-defined.
> 
> Has this been in linux-next or any other wide testing? The changes
> aren't _new_, but...

Just checked linux-next, the first version that picked up the patches
is 2018-11-19. The one dated 2018-10-19 does not have them.

Bad timing, no wider testing has been done. The bug itself shows up
as a false warning, nothing really breaks. I would understand if you
prefer not to have them in the official tree at this time.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



[GIT PULL] s390 patches for 4.20 #2

2018-11-08 Thread Martin Schwidefsky
Hi Linus,

please pull s390 fixes for 4.20-rc2

The following changes since commit e5f6d9afa3415104e402cd69288bb03f7165eeba:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc (2018-10-25 
18:14:31 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git s390-4.20-2

for you to fetch changes up to 0bb2ae1b26e1fb7543ec7474cdd374ac4b88c4da:

  s390/perf: Change CPUM_CF return code in event init function (2018-11-08 
07:58:16 +0100)


s390 updates for 4.20-rc2

 - A fix for the pgtable_bytes misaccounting on s390. The patch changes
   common code part in regard to page table folding and adds extra
   checks to mm_[inc|dec]_nr_[pmds|puds].

 - Add FORCE for all build targets using if_changed

 - Use non-loadable phdr for the .vmlinux.info section to avoid
   a segment overlap that confuses kexec

 - Cleanup the attribute definition for the diagnostic sampling

 - Increase stack size for CONFIG_KASAN=y builds

 - Export __node_distance to fix a build error

 - Correct return code of a PMU event init function

 - An update for the default configs


Heiko Carstens (1):
  s390: update defconfigs

Justin M. Forbes (1):
  s390/mm: Fix ERROR: "__node_distance" undefined!

Martin Schwidefsky (4):
  mm: make the __PAGETABLE_PxD_FOLDED defines non-empty
  mm: introduce mm_[p4d|pud|pmd]_folded
  mm: add mm_pxd_folded checks to pgtable_bytes accounting functions
  s390/mm: fix mis-accounting of pgtable_bytes

Thomas Richter (2):
  s390/cpum_sf: Rework attribute definition for diagnostic sampling
  s390/perf: Change CPUM_CF return code in event init function

Vasily Gorbik (4):
  s390/decompressor: add missing FORCE to build targets
  s390/vdso: add missing FORCE to build targets
  s390: avoid vmlinux segments overlap
  s390/kasan: increase instrumented stack size to 64k

 arch/arm/include/asm/pgtable-2level.h|  2 +-
 arch/m68k/include/asm/pgtable_mm.h   |  4 +-
 arch/microblaze/include/asm/pgtable.h|  2 +-
 arch/nds32/include/asm/pgtable.h |  2 +-
 arch/parisc/include/asm/pgtable.h|  2 +-
 arch/s390/Makefile   |  2 +-
 arch/s390/boot/compressed/Makefile   | 16 +++
 arch/s390/configs/debug_defconfig| 14 --
 arch/s390/configs/performance_defconfig  | 13 +-
 arch/s390/defconfig  | 79 +---
 arch/s390/include/asm/mmu_context.h  |  5 --
 arch/s390/include/asm/pgalloc.h  |  6 +--
 arch/s390/include/asm/pgtable.h  | 18 
 arch/s390/include/asm/thread_info.h  |  2 +-
 arch/s390/include/asm/tlb.h  |  6 +--
 arch/s390/kernel/entry.S |  6 +--
 arch/s390/kernel/perf_cpum_cf.c  |  2 +-
 arch/s390/kernel/perf_cpum_sf.c  | 33 +++--
 arch/s390/kernel/vdso32/Makefile |  6 +--
 arch/s390/kernel/vdso64/Makefile |  6 +--
 arch/s390/kernel/vmlinux.lds.S   |  4 +-
 arch/s390/mm/pgalloc.c   |  1 +
 arch/s390/numa/numa.c|  1 +
 include/asm-generic/4level-fixup.h   |  2 +-
 include/asm-generic/5level-fixup.h   |  2 +-
 include/asm-generic/pgtable-nop4d-hack.h |  2 +-
 include/asm-generic/pgtable-nop4d.h  |  2 +-
 include/asm-generic/pgtable-nopmd.h  |  2 +-
 include/asm-generic/pgtable-nopud.h  |  2 +-
 include/asm-generic/pgtable.h| 16 +++
 include/linux/mm.h   |  8 
 31 files changed, 175 insertions(+), 93 deletions(-)

diff --git a/arch/arm/include/asm/pgtable-2level.h 
b/arch/arm/include/asm/pgtable-2level.h
index 92fd2c8..12659ce 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -10,7 +10,7 @@
 #ifndef _ASM_PGTABLE_2LEVEL_H
 #define _ASM_PGTABLE_2LEVEL_H
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 
 /*
  * Hardware-wise, we have a two level page table structure, where the first
diff --git a/arch/m68k/include/asm/pgtable_mm.h 
b/arch/m68k/include/asm/pgtable_mm.h
index 6181e41..fe3ddd7 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -55,12 +55,12 @@
  */
 #ifdef CONFIG_SUN3
 #define PTRS_PER_PTE   16
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define PTRS_PER_PMD   1
 #define PTRS_PER_PGD   2048
 #elif defined(CONFIG_COLDFIRE)
 #define PTRS_PER_PTE   512
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define PTRS_PER_PMD   1
 #define PTRS_PER_PGD   1024
 #else
diff --git a/arch/microblaze/include/asm/pgtable.h 
b/arch/microblaze/include/asm/pgtable.h
index f64ebb9..e14b662 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -63,7 +63,7 @@ extern int mem_init_done;
 

[GIT PULL] s390 patches for 4.20 #2

2018-11-08 Thread Martin Schwidefsky
Hi Linus,

please pull s390 fixes for 4.20-rc2

The following changes since commit e5f6d9afa3415104e402cd69288bb03f7165eeba:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc (2018-10-25 
18:14:31 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git s390-4.20-2

for you to fetch changes up to 0bb2ae1b26e1fb7543ec7474cdd374ac4b88c4da:

  s390/perf: Change CPUM_CF return code in event init function (2018-11-08 
07:58:16 +0100)


s390 updates for 4.20-rc2

 - A fix for the pgtable_bytes misaccounting on s390. The patch changes
   common code part in regard to page table folding and adds extra
   checks to mm_[inc|dec]_nr_[pmds|puds].

 - Add FORCE for all build targets using if_changed

 - Use non-loadable phdr for the .vmlinux.info section to avoid
   a segment overlap that confuses kexec

 - Cleanup the attribute definition for the diagnostic sampling

 - Increase stack size for CONFIG_KASAN=y builds

 - Export __node_distance to fix a build error

 - Correct return code of a PMU event init function

 - An update for the default configs


Heiko Carstens (1):
  s390: update defconfigs

Justin M. Forbes (1):
  s390/mm: Fix ERROR: "__node_distance" undefined!

Martin Schwidefsky (4):
  mm: make the __PAGETABLE_PxD_FOLDED defines non-empty
  mm: introduce mm_[p4d|pud|pmd]_folded
  mm: add mm_pxd_folded checks to pgtable_bytes accounting functions
  s390/mm: fix mis-accounting of pgtable_bytes

Thomas Richter (2):
  s390/cpum_sf: Rework attribute definition for diagnostic sampling
  s390/perf: Change CPUM_CF return code in event init function

Vasily Gorbik (4):
  s390/decompressor: add missing FORCE to build targets
  s390/vdso: add missing FORCE to build targets
  s390: avoid vmlinux segments overlap
  s390/kasan: increase instrumented stack size to 64k

 arch/arm/include/asm/pgtable-2level.h|  2 +-
 arch/m68k/include/asm/pgtable_mm.h   |  4 +-
 arch/microblaze/include/asm/pgtable.h|  2 +-
 arch/nds32/include/asm/pgtable.h |  2 +-
 arch/parisc/include/asm/pgtable.h|  2 +-
 arch/s390/Makefile   |  2 +-
 arch/s390/boot/compressed/Makefile   | 16 +++
 arch/s390/configs/debug_defconfig| 14 --
 arch/s390/configs/performance_defconfig  | 13 +-
 arch/s390/defconfig  | 79 +---
 arch/s390/include/asm/mmu_context.h  |  5 --
 arch/s390/include/asm/pgalloc.h  |  6 +--
 arch/s390/include/asm/pgtable.h  | 18 
 arch/s390/include/asm/thread_info.h  |  2 +-
 arch/s390/include/asm/tlb.h  |  6 +--
 arch/s390/kernel/entry.S |  6 +--
 arch/s390/kernel/perf_cpum_cf.c  |  2 +-
 arch/s390/kernel/perf_cpum_sf.c  | 33 +++--
 arch/s390/kernel/vdso32/Makefile |  6 +--
 arch/s390/kernel/vdso64/Makefile |  6 +--
 arch/s390/kernel/vmlinux.lds.S   |  4 +-
 arch/s390/mm/pgalloc.c   |  1 +
 arch/s390/numa/numa.c|  1 +
 include/asm-generic/4level-fixup.h   |  2 +-
 include/asm-generic/5level-fixup.h   |  2 +-
 include/asm-generic/pgtable-nop4d-hack.h |  2 +-
 include/asm-generic/pgtable-nop4d.h  |  2 +-
 include/asm-generic/pgtable-nopmd.h  |  2 +-
 include/asm-generic/pgtable-nopud.h  |  2 +-
 include/asm-generic/pgtable.h| 16 +++
 include/linux/mm.h   |  8 
 31 files changed, 175 insertions(+), 93 deletions(-)

diff --git a/arch/arm/include/asm/pgtable-2level.h 
b/arch/arm/include/asm/pgtable-2level.h
index 92fd2c8..12659ce 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -10,7 +10,7 @@
 #ifndef _ASM_PGTABLE_2LEVEL_H
 #define _ASM_PGTABLE_2LEVEL_H
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 
 /*
  * Hardware-wise, we have a two level page table structure, where the first
diff --git a/arch/m68k/include/asm/pgtable_mm.h 
b/arch/m68k/include/asm/pgtable_mm.h
index 6181e41..fe3ddd7 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -55,12 +55,12 @@
  */
 #ifdef CONFIG_SUN3
 #define PTRS_PER_PTE   16
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define PTRS_PER_PMD   1
 #define PTRS_PER_PGD   2048
 #elif defined(CONFIG_COLDFIRE)
 #define PTRS_PER_PTE   512
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define PTRS_PER_PMD   1
 #define PTRS_PER_PGD   1024
 #else
diff --git a/arch/microblaze/include/asm/pgtable.h 
b/arch/microblaze/include/asm/pgtable.h
index f64ebb9..e14b662 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -63,7 +63,7 @@ extern int mem_init_done;
 

Re: [PATCH] s390: numa: Export __node_distance

2018-11-04 Thread Martin Schwidefsky
On Sun,  4 Nov 2018 13:28:06 -0800
Guenter Roeck  wrote:

> __node_distance is used by nvme, resulting in:
> 
> ERROR: "__node_distance" [drivers/nvme/host/nvme-core.ko] undefined!
> 
> when trying to build nvme as module.
> 
> Fixes: f333444708f8 ("nvme: take node locality into account when selecting a 
> path")
> Cc: Christoph Hellwig 
> Signed-off-by: Guenter Roeck 
> ---
> I thought I had seen that patch already, but I don't find it anywhere.
> Maybe that was for another architecture. My apologies for the noise if it
> is already queued.
> 
>  arch/s390/numa/numa.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
> index ae0d9e889534..d31bde0870d8 100644
> --- a/arch/s390/numa/numa.c
> +++ b/arch/s390/numa/numa.c
> @@ -53,6 +53,7 @@ int __node_distance(int a, int b)
>  {
>   return mode->distance ? mode->distance(a, b) : 0;
>  }
> +EXPORT_SYMBOL(__node_distance);
> 
>  int numa_debug_enabled;
> 

That one is already queued. I'll do the __no_kasan_or_inline
change that Linus requested and then send a please pull.
Should be upstream in a few days.

For reference:

https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=for-linus=a541f0ebcc08ed8bc0cc492eec9a86cb280a9f24

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH] s390: numa: Export __node_distance

2018-11-04 Thread Martin Schwidefsky
On Sun,  4 Nov 2018 13:28:06 -0800
Guenter Roeck  wrote:

> __node_distance is used by nvme, resulting in:
> 
> ERROR: "__node_distance" [drivers/nvme/host/nvme-core.ko] undefined!
> 
> when trying to build nvme as module.
> 
> Fixes: f333444708f8 ("nvme: take node locality into account when selecting a 
> path")
> Cc: Christoph Hellwig 
> Signed-off-by: Guenter Roeck 
> ---
> I thought I had seen that patch already, but I don't find it anywhere.
> Maybe that was for another architecture. My apologies for the noise if it
> is already queued.
> 
>  arch/s390/numa/numa.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
> index ae0d9e889534..d31bde0870d8 100644
> --- a/arch/s390/numa/numa.c
> +++ b/arch/s390/numa/numa.c
> @@ -53,6 +53,7 @@ int __node_distance(int a, int b)
>  {
>   return mode->distance ? mode->distance(a, b) : 0;
>  }
> +EXPORT_SYMBOL(__node_distance);
> 
>  int numa_debug_enabled;
> 

That one is already queued. I'll do the __no_kasan_or_inline
change that Linus requested and then send a please pull.
Should be upstream in a few days.

For reference:

https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=for-linus=a541f0ebcc08ed8bc0cc492eec9a86cb280a9f24

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: s390: runtime warning about pgtables_bytes

2018-11-02 Thread Martin Schwidefsky
On Wed, 31 Oct 2018 15:57:54 -0400
Joe Lawrence  wrote:

> On Fri, Oct 12, 2018 at 05:08:33PM +0200, Martin Schwidefsky wrote:
> > On Thu, 11 Oct 2018 15:02:11 +0200
> > Martin Schwidefsky  wrote:
> >   
> > > On Thu, 11 Oct 2018 18:04:12 +0800
> > > Li Wang  wrote:
> > >   
> > > > When running s390 system with LTP/cve-2017-17052.c[1], the following 
> > > > BUG is
> > > > came out repeatedly.
> > > > I remember this warning start from kernel-4.16.0 and now it still exist 
> > > > in
> > > > kernel-4.19-rc7.
> > > > Can anyone take a look?
> > > > 
> > > > [ 2678.991496] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.001543] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.002453] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.003256] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.013689] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.024647] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.064408] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.133963] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > 
> > > > [1]:
> > > > https://github.com/linux-test-project/ltp/blob/master/testcases/cve/cve-2017-17052.c
> > > > 
> > >  
> > > Confirmed, I see this bug with cvs-2017-17052 on my LPAR as well.
> > > I'll look into it.  
> >  
> > Ok, I think I understand the problem now. This is the patch I am testing
> > right now. It seems to fix the issue, but I had to change common mm
> > code for it.
> > --  
> > >From 9e3bc2e96930206ef1ece377e45224c51aca1799 Mon Sep 17 00:00:00 2001  
> > From: Martin Schwidefsky 
> > Date: Fri, 12 Oct 2018 16:32:29 +0200
> > Subject: [RFC][PATCH] s390/mm: fix mis-accounting of pgtable_bytes
> > 
> > In case a fork or a clone system fails in copy_process and the error
> > handling does the mmput() at the bad_fork_cleanup_mm label, the following
> > warning messages will appear on the console:
> > 
> > BUG: non-zero pgtables_bytes on freeing mm: 16384
> > 
> > The reason for that is the tricks we play with mm_inc_nr_puds() and
> > mm_inc_nr_pmds() in init_new_context().
> > 
> > A normal 64-bit process has 3 levels of page table, the p4d level and
> > the pud level are folded. On process termination the free_pud_range()
> > function in mm/memory.c will subtract 16KB from pgtable_bytes with a
> > mm_dec_nr_puds() call, but there actually is not really a pud table.
> > The s390 version of pud_free_tlb() recognized this an does nothing,
> > the region-3 table will be freed with the pgd_free() call later on.
> > But the mm_dec_nr_puds() is done unconditionally, to counter act this
> > the init_new_context() function has an extra mm_inc_nr_puds() call.
> > 
> > Now with a failed fork or clone the free_pgtables() function is not
> > called, there is no mm_dec_nr_puds() but the mm_inc_nr_puds() has
> > been done which leads to the incorrect pgtable_bytes of 16384.
> > Nothing is broken by this, but the warning is annoying.
> > 
> > To get rid of the warning drop the mm_inc_nr_pmds() & mm_inc_nr_puds()
> > calls from init_new_context(), introduce the mm_pmd_folded(),
> > pmd_pud_folded() and pmd_p4d_folded() helper, and add if-statements
> > to the functions mm_[inc|dec]_nr_[pmds|puds].
> > 
> > Signed-off-by: Martin Schwidefsky 
> > ---
> >  arch/s390/include/asm/mmu_context.h |  5 -
> >  arch/s390/include/asm/pgalloc.h |  6 ++---
> >  arch/s390/include/asm/pgtable.h | 18 +++
> >  arch/s390/include/asm/tlb.h |  6 ++---
> >  include/linux/mm.h  | 44 
> > -
> >  5 files changed, 62 insertions(+), 17 deletions(-)
> > 
> > diff --git a/arch/s390/include/asm/mmu_context.h 
> > b/arch/s390/include/asm/mmu_context.h
> > index dbd689d556ce..ccbb53e22024 100644
> > --- a/arch/s390/include/asm/mmu_context.h
> > +++ b/arch/s390/include/asm/mmu_context.h
> > @@ -46,8 +46,6 @@ static inline int init_new_context(struct task_struct 
> > *tsk,
> > mm->context.asce_limit = STACK_TOP_MAX;
> > mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
> >_ASCE_USER_BITS | _ASCE_TYPE_REGION3;
> > -   /* pgd_alloc() did not account this pud */
> > -  

Re: s390: runtime warning about pgtables_bytes

2018-11-02 Thread Martin Schwidefsky
On Wed, 31 Oct 2018 15:57:54 -0400
Joe Lawrence  wrote:

> On Fri, Oct 12, 2018 at 05:08:33PM +0200, Martin Schwidefsky wrote:
> > On Thu, 11 Oct 2018 15:02:11 +0200
> > Martin Schwidefsky  wrote:
> >   
> > > On Thu, 11 Oct 2018 18:04:12 +0800
> > > Li Wang  wrote:
> > >   
> > > > When running s390 system with LTP/cve-2017-17052.c[1], the following 
> > > > BUG is
> > > > came out repeatedly.
> > > > I remember this warning start from kernel-4.16.0 and now it still exist 
> > > > in
> > > > kernel-4.19-rc7.
> > > > Can anyone take a look?
> > > > 
> > > > [ 2678.991496] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.001543] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.002453] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.003256] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.013689] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.024647] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.064408] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > [ 2679.133963] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > > 
> > > > [1]:
> > > > https://github.com/linux-test-project/ltp/blob/master/testcases/cve/cve-2017-17052.c
> > > > 
> > >  
> > > Confirmed, I see this bug with cvs-2017-17052 on my LPAR as well.
> > > I'll look into it.  
> >  
> > Ok, I think I understand the problem now. This is the patch I am testing
> > right now. It seems to fix the issue, but I had to change common mm
> > code for it.
> > --  
> > >From 9e3bc2e96930206ef1ece377e45224c51aca1799 Mon Sep 17 00:00:00 2001  
> > From: Martin Schwidefsky 
> > Date: Fri, 12 Oct 2018 16:32:29 +0200
> > Subject: [RFC][PATCH] s390/mm: fix mis-accounting of pgtable_bytes
> > 
> > In case a fork or a clone system fails in copy_process and the error
> > handling does the mmput() at the bad_fork_cleanup_mm label, the following
> > warning messages will appear on the console:
> > 
> > BUG: non-zero pgtables_bytes on freeing mm: 16384
> > 
> > The reason for that is the tricks we play with mm_inc_nr_puds() and
> > mm_inc_nr_pmds() in init_new_context().
> > 
> > A normal 64-bit process has 3 levels of page table, the p4d level and
> > the pud level are folded. On process termination the free_pud_range()
> > function in mm/memory.c will subtract 16KB from pgtable_bytes with a
> > mm_dec_nr_puds() call, but there actually is not really a pud table.
> > The s390 version of pud_free_tlb() recognized this an does nothing,
> > the region-3 table will be freed with the pgd_free() call later on.
> > But the mm_dec_nr_puds() is done unconditionally, to counter act this
> > the init_new_context() function has an extra mm_inc_nr_puds() call.
> > 
> > Now with a failed fork or clone the free_pgtables() function is not
> > called, there is no mm_dec_nr_puds() but the mm_inc_nr_puds() has
> > been done which leads to the incorrect pgtable_bytes of 16384.
> > Nothing is broken by this, but the warning is annoying.
> > 
> > To get rid of the warning drop the mm_inc_nr_pmds() & mm_inc_nr_puds()
> > calls from init_new_context(), introduce the mm_pmd_folded(),
> > pmd_pud_folded() and pmd_p4d_folded() helper, and add if-statements
> > to the functions mm_[inc|dec]_nr_[pmds|puds].
> > 
> > Signed-off-by: Martin Schwidefsky 
> > ---
> >  arch/s390/include/asm/mmu_context.h |  5 -
> >  arch/s390/include/asm/pgalloc.h |  6 ++---
> >  arch/s390/include/asm/pgtable.h | 18 +++
> >  arch/s390/include/asm/tlb.h |  6 ++---
> >  include/linux/mm.h  | 44 
> > -
> >  5 files changed, 62 insertions(+), 17 deletions(-)
> > 
> > diff --git a/arch/s390/include/asm/mmu_context.h 
> > b/arch/s390/include/asm/mmu_context.h
> > index dbd689d556ce..ccbb53e22024 100644
> > --- a/arch/s390/include/asm/mmu_context.h
> > +++ b/arch/s390/include/asm/mmu_context.h
> > @@ -46,8 +46,6 @@ static inline int init_new_context(struct task_struct 
> > *tsk,
> > mm->context.asce_limit = STACK_TOP_MAX;
> > mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
> >_ASCE_USER_BITS | _ASCE_TYPE_REGION3;
> > -   /* pgd_alloc() did not account this pud */
> > -  

[PATCH 4/4] s390/mm: fix mis-accounting of pgtable_bytes

2018-10-31 Thread Martin Schwidefsky
In case a fork or a clone system fails in copy_process and the error
handling does the mmput() at the bad_fork_cleanup_mm label, the
following warning messages will appear on the console:

  BUG: non-zero pgtables_bytes on freeing mm: 16384

The reason for that is the tricks we play with mm_inc_nr_puds() and
mm_inc_nr_pmds() in init_new_context().

A normal 64-bit process has 3 levels of page table, the p4d level and
the pud level are folded. On process termination the free_pud_range()
function in mm/memory.c will subtract 16KB from pgtable_bytes with a
mm_dec_nr_puds() call, but there actually is not really a pud table.

One issue with this is the fact that pgtable_bytes is usually off
by a few kilobytes, but the more severe problem is that for a failed
fork or clone the free_pgtables() function is not called. In this case
there is no mm_dec_nr_puds() or mm_dec_nr_pmds() that go together with
the mm_inc_nr_puds() and mm_inc_nr_pmds in init_new_context().
The pgtable_bytes will be off by 16384 or 32768 bytes and we get the
BUG message. The message itself is purely cosmetic, but annoying.

To fix this override the mm_pmd_folded, mm_pud_folded and mm_p4d_folded
function to check for the true size of the address space.

Reported-by: Li Wang 
Tested-by: Li Wang 
Signed-off-by: Martin Schwidefsky 
---
 arch/s390/include/asm/mmu_context.h |  5 -
 arch/s390/include/asm/pgalloc.h |  6 +++---
 arch/s390/include/asm/pgtable.h | 18 ++
 arch/s390/include/asm/tlb.h |  6 +++---
 arch/s390/mm/pgalloc.c  |  1 +
 5 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/arch/s390/include/asm/mmu_context.h 
b/arch/s390/include/asm/mmu_context.h
index dbd689d..ccbb53e 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -46,8 +46,6 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.asce_limit = STACK_TOP_MAX;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
-   /* pgd_alloc() did not account this pud */
-   mm_inc_nr_puds(mm);
break;
case -PAGE_SIZE:
/* forked 5-level task, set new asce with new_mm->pgd */
@@ -63,9 +61,6 @@ static inline int init_new_context(struct task_struct *tsk,
/* forked 2-level compat task, set new asce with new mm->pgd */
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
-   /* pgd_alloc() did not account this pmd */
-   mm_inc_nr_pmds(mm);
-   mm_inc_nr_puds(mm);
}
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf..5ee7337 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -36,11 +36,11 @@ static inline void crst_table_init(unsigned long *crst, 
unsigned long entry)
 
 static inline unsigned long pgd_entry_type(struct mm_struct *mm)
 {
-   if (mm->context.asce_limit <= _REGION3_SIZE)
+   if (mm_pmd_folded(mm))
return _SEGMENT_ENTRY_EMPTY;
-   if (mm->context.asce_limit <= _REGION2_SIZE)
+   if (mm_pud_folded(mm))
return _REGION3_ENTRY_EMPTY;
-   if (mm->context.asce_limit <= _REGION1_SIZE)
+   if (mm_p4d_folded(mm))
return _REGION2_ENTRY_EMPTY;
return _REGION1_ENTRY_EMPTY;
 }
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 411d435..0637324 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -493,6 +493,24 @@ static inline int is_module_addr(void *addr)
   _REGION_ENTRY_PROTECT | \
   _REGION_ENTRY_NOEXEC)
 
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+   return mm->context.asce_limit <= _REGION1_SIZE;
+}
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+   return mm->context.asce_limit <= _REGION2_SIZE;
+}
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+   return mm->context.asce_limit <= _REGION3_SIZE;
+}
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+
 static inline int mm_has_pgste(struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 457b7ba..b31c779 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -136,7 +136,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, 
pgtable_t pte,
 static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 

[PATCH 4/4] s390/mm: fix mis-accounting of pgtable_bytes

2018-10-31 Thread Martin Schwidefsky
In case a fork or a clone system fails in copy_process and the error
handling does the mmput() at the bad_fork_cleanup_mm label, the
following warning messages will appear on the console:

  BUG: non-zero pgtables_bytes on freeing mm: 16384

The reason for that is the tricks we play with mm_inc_nr_puds() and
mm_inc_nr_pmds() in init_new_context().

A normal 64-bit process has 3 levels of page table, the p4d level and
the pud level are folded. On process termination the free_pud_range()
function in mm/memory.c will subtract 16KB from pgtable_bytes with a
mm_dec_nr_puds() call, but there actually is not really a pud table.

One issue with this is the fact that pgtable_bytes is usually off
by a few kilobytes, but the more severe problem is that for a failed
fork or clone the free_pgtables() function is not called. In this case
there is no mm_dec_nr_puds() or mm_dec_nr_pmds() that go together with
the mm_inc_nr_puds() and mm_inc_nr_pmds in init_new_context().
The pgtable_bytes will be off by 16384 or 32768 bytes and we get the
BUG message. The message itself is purely cosmetic, but annoying.

To fix this override the mm_pmd_folded, mm_pud_folded and mm_p4d_folded
function to check for the true size of the address space.

Reported-by: Li Wang 
Tested-by: Li Wang 
Signed-off-by: Martin Schwidefsky 
---
 arch/s390/include/asm/mmu_context.h |  5 -
 arch/s390/include/asm/pgalloc.h |  6 +++---
 arch/s390/include/asm/pgtable.h | 18 ++
 arch/s390/include/asm/tlb.h |  6 +++---
 arch/s390/mm/pgalloc.c  |  1 +
 5 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/arch/s390/include/asm/mmu_context.h 
b/arch/s390/include/asm/mmu_context.h
index dbd689d..ccbb53e 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -46,8 +46,6 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.asce_limit = STACK_TOP_MAX;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
-   /* pgd_alloc() did not account this pud */
-   mm_inc_nr_puds(mm);
break;
case -PAGE_SIZE:
/* forked 5-level task, set new asce with new_mm->pgd */
@@ -63,9 +61,6 @@ static inline int init_new_context(struct task_struct *tsk,
/* forked 2-level compat task, set new asce with new mm->pgd */
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
-   /* pgd_alloc() did not account this pmd */
-   mm_inc_nr_pmds(mm);
-   mm_inc_nr_puds(mm);
}
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf..5ee7337 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -36,11 +36,11 @@ static inline void crst_table_init(unsigned long *crst, 
unsigned long entry)
 
 static inline unsigned long pgd_entry_type(struct mm_struct *mm)
 {
-   if (mm->context.asce_limit <= _REGION3_SIZE)
+   if (mm_pmd_folded(mm))
return _SEGMENT_ENTRY_EMPTY;
-   if (mm->context.asce_limit <= _REGION2_SIZE)
+   if (mm_pud_folded(mm))
return _REGION3_ENTRY_EMPTY;
-   if (mm->context.asce_limit <= _REGION1_SIZE)
+   if (mm_p4d_folded(mm))
return _REGION2_ENTRY_EMPTY;
return _REGION1_ENTRY_EMPTY;
 }
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 411d435..0637324 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -493,6 +493,24 @@ static inline int is_module_addr(void *addr)
   _REGION_ENTRY_PROTECT | \
   _REGION_ENTRY_NOEXEC)
 
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+   return mm->context.asce_limit <= _REGION1_SIZE;
+}
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+   return mm->context.asce_limit <= _REGION2_SIZE;
+}
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+   return mm->context.asce_limit <= _REGION3_SIZE;
+}
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+
 static inline int mm_has_pgste(struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 457b7ba..b31c779 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -136,7 +136,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, 
pgtable_t pte,
 static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 

[PATCH 3/4] mm: add mm_pxd_folded checks to pgtable_bytes accounting functions

2018-10-31 Thread Martin Schwidefsky
The common mm code calls mm_dec_nr_pmds() and mm_dec_nr_puds()
in free_pgtables() if the address range spans a full pud or pmd.
If mm_dec_nr_puds/mm_dec_nr_pmds are non-empty due to configuration
settings they blindly subtract the size of the pmd or pud table from
pgtable_bytes even if the pud or pmd page table layer is folded.

Add explicit mm_[pmd|pud]_folded checks to the four pgtable_bytes
accounting functions mm_inc_nr_puds, mm_inc_nr_pmds, mm_dec_nr_puds
and mm_dec_nr_pmds. As the check for folded page tables can be
overwritten by the architecture, this allows to keep a correct
pgtable_bytes value for platforms that use a dynamic number of
page table levels.

Acked-by: Kirill A. Shutemov 
Signed-off-by: Martin Schwidefsky 
---
 include/linux/mm.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1e52b8f..844a853 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1744,11 +1744,15 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, 
unsigned long address);
 
 static inline void mm_inc_nr_puds(struct mm_struct *mm)
 {
+   if (mm_pud_folded(mm))
+   return;
atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), >pgtables_bytes);
 }
 
 static inline void mm_dec_nr_puds(struct mm_struct *mm)
 {
+   if (mm_pud_folded(mm))
+   return;
atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), >pgtables_bytes);
 }
 #endif
@@ -1768,11 +1772,15 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, 
unsigned long address);
 
 static inline void mm_inc_nr_pmds(struct mm_struct *mm)
 {
+   if (mm_pmd_folded(mm))
+   return;
atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), >pgtables_bytes);
 }
 
 static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 {
+   if (mm_pmd_folded(mm))
+   return;
atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), >pgtables_bytes);
 }
 #endif
-- 
2.7.4



[PATCH 1/4] mm: make the __PAGETABLE_PxD_FOLDED defines non-empty

2018-10-31 Thread Martin Schwidefsky
Change the currently empty defines for __PAGETABLE_PMD_FOLDED,
__PAGETABLE_PUD_FOLDED and __PAGETABLE_P4D_FOLDED to return 1.
This makes it possible to use __is_defined() to test if the
preprocessor define exists.

Signed-off-by: Martin Schwidefsky 
---
 arch/arm/include/asm/pgtable-2level.h| 2 +-
 arch/m68k/include/asm/pgtable_mm.h   | 4 ++--
 arch/microblaze/include/asm/pgtable.h| 2 +-
 arch/nds32/include/asm/pgtable.h | 2 +-
 arch/parisc/include/asm/pgtable.h| 2 +-
 include/asm-generic/4level-fixup.h   | 2 +-
 include/asm-generic/5level-fixup.h   | 2 +-
 include/asm-generic/pgtable-nop4d-hack.h | 2 +-
 include/asm-generic/pgtable-nop4d.h  | 2 +-
 include/asm-generic/pgtable-nopmd.h  | 2 +-
 include/asm-generic/pgtable-nopud.h  | 2 +-
 11 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/arm/include/asm/pgtable-2level.h 
b/arch/arm/include/asm/pgtable-2level.h
index 92fd2c8..12659ce 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -10,7 +10,7 @@
 #ifndef _ASM_PGTABLE_2LEVEL_H
 #define _ASM_PGTABLE_2LEVEL_H
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 
 /*
  * Hardware-wise, we have a two level page table structure, where the first
diff --git a/arch/m68k/include/asm/pgtable_mm.h 
b/arch/m68k/include/asm/pgtable_mm.h
index 6181e41..fe3ddd7 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -55,12 +55,12 @@
  */
 #ifdef CONFIG_SUN3
 #define PTRS_PER_PTE   16
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define PTRS_PER_PMD   1
 #define PTRS_PER_PGD   2048
 #elif defined(CONFIG_COLDFIRE)
 #define PTRS_PER_PTE   512
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define PTRS_PER_PMD   1
 #define PTRS_PER_PGD   1024
 #else
diff --git a/arch/microblaze/include/asm/pgtable.h 
b/arch/microblaze/include/asm/pgtable.h
index f64ebb9..e14b662 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -63,7 +63,7 @@ extern int mem_init_done;
 
 #include 
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index d3e19a5..9f52db9 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -4,7 +4,7 @@
 #ifndef _ASMNDS32_PGTABLE_H
 #define _ASMNDS32_PGTABLE_H
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #include 
 #include 
 
diff --git a/arch/parisc/include/asm/pgtable.h 
b/arch/parisc/include/asm/pgtable.h
index b941ac7..c7bb74e 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -111,7 +111,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, 
unsigned long addr)
 #if CONFIG_PGTABLE_LEVELS == 3
 #define BITS_PER_PMD   (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
 #else
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define BITS_PER_PMD   0
 #endif
 #define PTRS_PER_PMD(1UL << BITS_PER_PMD)
diff --git a/include/asm-generic/4level-fixup.h 
b/include/asm-generic/4level-fixup.h
index 89f3b03..e3667c9 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -3,7 +3,7 @@
 #define _4LEVEL_FIXUP_H
 
 #define __ARCH_HAS_4LEVEL_HACK
-#define __PAGETABLE_PUD_FOLDED
+#define __PAGETABLE_PUD_FOLDED 1
 
 #define PUD_SHIFT  PGDIR_SHIFT
 #define PUD_SIZE   PGDIR_SIZE
diff --git a/include/asm-generic/5level-fixup.h 
b/include/asm-generic/5level-fixup.h
index 9c2e070..73474bb 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -3,7 +3,7 @@
 #define _5LEVEL_FIXUP_H
 
 #define __ARCH_HAS_5LEVEL_HACK
-#define __PAGETABLE_P4D_FOLDED
+#define __PAGETABLE_P4D_FOLDED 1
 
 #define P4D_SHIFT  PGDIR_SHIFT
 #define P4D_SIZE   PGDIR_SIZE
diff --git a/include/asm-generic/pgtable-nop4d-hack.h 
b/include/asm-generic/pgtable-nop4d-hack.h
index 0c34215..1d6dd38 100644
--- a/include/asm-generic/pgtable-nop4d-hack.h
+++ b/include/asm-generic/pgtable-nop4d-hack.h
@@ -5,7 +5,7 @@
 #ifndef __ASSEMBLY__
 #include 
 
-#define __PAGETABLE_PUD_FOLDED
+#define __PAGETABLE_PUD_FOLDED 1
 
 /*
  * Having the pud type consist of a pgd gets the size right, and allows
diff --git a/include/asm-generic/pgtable-nop4d.h 
b/include/asm-generic/pgtable-nop4d.h
index 1a29b2a..04cb913 100644
--- a/include/asm-generic/pgtable-nop4d.h
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -4,7 +4,7 @@
 
 #ifndef __ASSEMBLY__
 
-#define __PAGETABLE_P4D_FOLDED
+#define __PAGETABLE_P4D_FOLDED 1
 
 typedef struct { pgd_t pgd; } p4d_t;
 
diff --git a/include/asm-generic/pgtable-nopmd.h 
b/include/asm-generic/pgtable-nopmd.h
index f35f6e8..b85b827 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-g

[PATCH 3/4] mm: add mm_pxd_folded checks to pgtable_bytes accounting functions

2018-10-31 Thread Martin Schwidefsky
The common mm code calls mm_dec_nr_pmds() and mm_dec_nr_puds()
in free_pgtables() if the address range spans a full pud or pmd.
If mm_dec_nr_puds/mm_dec_nr_pmds are non-empty due to configuration
settings they blindly subtract the size of the pmd or pud table from
pgtable_bytes even if the pud or pmd page table layer is folded.

Add explicit mm_[pmd|pud]_folded checks to the four pgtable_bytes
accounting functions mm_inc_nr_puds, mm_inc_nr_pmds, mm_dec_nr_puds
and mm_dec_nr_pmds. As the check for folded page tables can be
overwritten by the architecture, this allows to keep a correct
pgtable_bytes value for platforms that use a dynamic number of
page table levels.

Acked-by: Kirill A. Shutemov 
Signed-off-by: Martin Schwidefsky 
---
 include/linux/mm.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1e52b8f..844a853 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1744,11 +1744,15 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, 
unsigned long address);
 
 static inline void mm_inc_nr_puds(struct mm_struct *mm)
 {
+   if (mm_pud_folded(mm))
+   return;
atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), >pgtables_bytes);
 }
 
 static inline void mm_dec_nr_puds(struct mm_struct *mm)
 {
+   if (mm_pud_folded(mm))
+   return;
atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), >pgtables_bytes);
 }
 #endif
@@ -1768,11 +1772,15 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, 
unsigned long address);
 
 static inline void mm_inc_nr_pmds(struct mm_struct *mm)
 {
+   if (mm_pmd_folded(mm))
+   return;
atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), >pgtables_bytes);
 }
 
 static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 {
+   if (mm_pmd_folded(mm))
+   return;
atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), >pgtables_bytes);
 }
 #endif
-- 
2.7.4



[PATCH 1/4] mm: make the __PAGETABLE_PxD_FOLDED defines non-empty

2018-10-31 Thread Martin Schwidefsky
Change the currently empty defines for __PAGETABLE_PMD_FOLDED,
__PAGETABLE_PUD_FOLDED and __PAGETABLE_P4D_FOLDED to return 1.
This makes it possible to use __is_defined() to test if the
preprocessor define exists.

Signed-off-by: Martin Schwidefsky 
---
 arch/arm/include/asm/pgtable-2level.h| 2 +-
 arch/m68k/include/asm/pgtable_mm.h   | 4 ++--
 arch/microblaze/include/asm/pgtable.h| 2 +-
 arch/nds32/include/asm/pgtable.h | 2 +-
 arch/parisc/include/asm/pgtable.h| 2 +-
 include/asm-generic/4level-fixup.h   | 2 +-
 include/asm-generic/5level-fixup.h   | 2 +-
 include/asm-generic/pgtable-nop4d-hack.h | 2 +-
 include/asm-generic/pgtable-nop4d.h  | 2 +-
 include/asm-generic/pgtable-nopmd.h  | 2 +-
 include/asm-generic/pgtable-nopud.h  | 2 +-
 11 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/arm/include/asm/pgtable-2level.h 
b/arch/arm/include/asm/pgtable-2level.h
index 92fd2c8..12659ce 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -10,7 +10,7 @@
 #ifndef _ASM_PGTABLE_2LEVEL_H
 #define _ASM_PGTABLE_2LEVEL_H
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 
 /*
  * Hardware-wise, we have a two level page table structure, where the first
diff --git a/arch/m68k/include/asm/pgtable_mm.h 
b/arch/m68k/include/asm/pgtable_mm.h
index 6181e41..fe3ddd7 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -55,12 +55,12 @@
  */
 #ifdef CONFIG_SUN3
 #define PTRS_PER_PTE   16
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define PTRS_PER_PMD   1
 #define PTRS_PER_PGD   2048
 #elif defined(CONFIG_COLDFIRE)
 #define PTRS_PER_PTE   512
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define PTRS_PER_PMD   1
 #define PTRS_PER_PGD   1024
 #else
diff --git a/arch/microblaze/include/asm/pgtable.h 
b/arch/microblaze/include/asm/pgtable.h
index f64ebb9..e14b662 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -63,7 +63,7 @@ extern int mem_init_done;
 
 #include 
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index d3e19a5..9f52db9 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -4,7 +4,7 @@
 #ifndef _ASMNDS32_PGTABLE_H
 #define _ASMNDS32_PGTABLE_H
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #include 
 #include 
 
diff --git a/arch/parisc/include/asm/pgtable.h 
b/arch/parisc/include/asm/pgtable.h
index b941ac7..c7bb74e 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -111,7 +111,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, 
unsigned long addr)
 #if CONFIG_PGTABLE_LEVELS == 3
 #define BITS_PER_PMD   (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
 #else
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define BITS_PER_PMD   0
 #endif
 #define PTRS_PER_PMD(1UL << BITS_PER_PMD)
diff --git a/include/asm-generic/4level-fixup.h 
b/include/asm-generic/4level-fixup.h
index 89f3b03..e3667c9 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -3,7 +3,7 @@
 #define _4LEVEL_FIXUP_H
 
 #define __ARCH_HAS_4LEVEL_HACK
-#define __PAGETABLE_PUD_FOLDED
+#define __PAGETABLE_PUD_FOLDED 1
 
 #define PUD_SHIFT  PGDIR_SHIFT
 #define PUD_SIZE   PGDIR_SIZE
diff --git a/include/asm-generic/5level-fixup.h 
b/include/asm-generic/5level-fixup.h
index 9c2e070..73474bb 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -3,7 +3,7 @@
 #define _5LEVEL_FIXUP_H
 
 #define __ARCH_HAS_5LEVEL_HACK
-#define __PAGETABLE_P4D_FOLDED
+#define __PAGETABLE_P4D_FOLDED 1
 
 #define P4D_SHIFT  PGDIR_SHIFT
 #define P4D_SIZE   PGDIR_SIZE
diff --git a/include/asm-generic/pgtable-nop4d-hack.h 
b/include/asm-generic/pgtable-nop4d-hack.h
index 0c34215..1d6dd38 100644
--- a/include/asm-generic/pgtable-nop4d-hack.h
+++ b/include/asm-generic/pgtable-nop4d-hack.h
@@ -5,7 +5,7 @@
 #ifndef __ASSEMBLY__
 #include 
 
-#define __PAGETABLE_PUD_FOLDED
+#define __PAGETABLE_PUD_FOLDED 1
 
 /*
  * Having the pud type consist of a pgd gets the size right, and allows
diff --git a/include/asm-generic/pgtable-nop4d.h 
b/include/asm-generic/pgtable-nop4d.h
index 1a29b2a..04cb913 100644
--- a/include/asm-generic/pgtable-nop4d.h
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -4,7 +4,7 @@
 
 #ifndef __ASSEMBLY__
 
-#define __PAGETABLE_P4D_FOLDED
+#define __PAGETABLE_P4D_FOLDED 1
 
 typedef struct { pgd_t pgd; } p4d_t;
 
diff --git a/include/asm-generic/pgtable-nopmd.h 
b/include/asm-generic/pgtable-nopmd.h
index f35f6e8..b85b827 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-g

[PATCH 2/4] mm: introduce mm_[p4d|pud|pmd]_folded

2018-10-31 Thread Martin Schwidefsky
Add three architecture overrideable functions to test if the
p4d, pud, or pmd layer of a page table is folded or not.

Signed-off-by: Martin Schwidefsky 
---
 include/asm-generic/pgtable.h | 16 
 1 file changed, 16 insertions(+)

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 5657a20..359fb93 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1127,4 +1127,20 @@ static inline bool arch_has_pfn_modify_check(void)
 #endif
 #endif
 
+/*
+ * On some architectures it depends on the mm if the p4d/pud or pmd
+ * layer of the page table hierarchy is folded or not.
+ */
+#ifndef mm_p4d_folded
+#define mm_p4d_folded(mm)  __is_defined(__PAGETABLE_P4D_FOLDED)
+#endif
+
+#ifndef mm_pud_folded
+#define mm_pud_folded(mm)  __is_defined(__PAGETABLE_PUD_FOLDED)
+#endif
+
+#ifndef mm_pmd_folded
+#define mm_pmd_folded(mm)  __is_defined(__PAGETABLE_PMD_FOLDED)
+#endif
+
 #endif /* _ASM_GENERIC_PGTABLE_H */
-- 
2.7.4



[PATCH 2/4] mm: introduce mm_[p4d|pud|pmd]_folded

2018-10-31 Thread Martin Schwidefsky
Add three architecture overrideable functions to test if the
p4d, pud, or pmd layer of a page table is folded or not.

Signed-off-by: Martin Schwidefsky 
---
 include/asm-generic/pgtable.h | 16 
 1 file changed, 16 insertions(+)

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 5657a20..359fb93 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1127,4 +1127,20 @@ static inline bool arch_has_pfn_modify_check(void)
 #endif
 #endif
 
+/*
+ * On some architectures it depends on the mm if the p4d/pud or pmd
+ * layer of the page table hierarchy is folded or not.
+ */
+#ifndef mm_p4d_folded
+#define mm_p4d_folded(mm)  __is_defined(__PAGETABLE_P4D_FOLDED)
+#endif
+
+#ifndef mm_pud_folded
+#define mm_pud_folded(mm)  __is_defined(__PAGETABLE_PUD_FOLDED)
+#endif
+
+#ifndef mm_pmd_folded
+#define mm_pmd_folded(mm)  __is_defined(__PAGETABLE_PMD_FOLDED)
+#endif
+
 #endif /* _ASM_GENERIC_PGTABLE_H */
-- 
2.7.4



[PATCH 0/4] pgtable bytes mis-accounting v3

2018-10-31 Thread Martin Schwidefsky
Greetings,

version #3 of the fix for the pgtable_bytes mis-accounting problem
on s390. Three times is a charm..

Changes v2 -> v3:

 - Add a fourth patch to redefine __PAGETABLE_PxD_FOLDED as non-empty

 - Move mm_pxd_folded() to include/asm-generic/pgtable.h and use
__is_defined() again with the redefined __PAGETABLE_PxD_FOLDED

 - Add a missing mm_inc_nr_puds() in arch/s390/mm/pgalloc.c

Changes v1 -> v2:

 - Split the patch into three parts, one patch to add the mm_pxd_folded
helpers, one patch to use to the helpers in mm_[dec|inc]_nr_[pmds|puds]
   and finally the fix for s390.

 - Drop the use of __is_defined, it does not work with the
__PAGETABLE_PxD_FOLDED defines

 - Do not change the basic #ifdef'ery in mm.h, just add the calls
to mm_pxd_folded to the pgtable_bytes accounting functions. This
   fixes the compile error on alpha (and potentially on other archs).

Martin Schwidefsky (4):
  mm: make the __PAGETABLE_PxD_FOLDED defines non-empty
  mm: introduce mm_[p4d|pud|pmd]_folded
  mm: add mm_pxd_folded checks to pgtable_bytes accounting functions
  s390/mm: fix mis-accounting of pgtable_bytes

 arch/arm/include/asm/pgtable-2level.h|  2 +-
 arch/m68k/include/asm/pgtable_mm.h   |  4 ++--
 arch/microblaze/include/asm/pgtable.h|  2 +-
 arch/nds32/include/asm/pgtable.h |  2 +-
 arch/parisc/include/asm/pgtable.h|  2 +-
 arch/s390/include/asm/mmu_context.h  |  5 -
 arch/s390/include/asm/pgalloc.h  |  6 +++---
 arch/s390/include/asm/pgtable.h  | 18 ++
 arch/s390/include/asm/tlb.h  |  6 +++---
 arch/s390/mm/pgalloc.c   |  1 +
 include/asm-generic/4level-fixup.h   |  2 +-
 include/asm-generic/5level-fixup.h   |  2 +-
 include/asm-generic/pgtable-nop4d-hack.h |  2 +-
 include/asm-generic/pgtable-nop4d.h  |  2 +-
 include/asm-generic/pgtable-nopmd.h  |  2 +-
 include/asm-generic/pgtable-nopud.h  |  2 +-
 include/asm-generic/pgtable.h| 16 
 include/linux/mm.h   |  8 
 18 files changed, 61 insertions(+), 23 deletions(-)

-- 
2.7.4



[PATCH 0/4] pgtable bytes mis-accounting v3

2018-10-31 Thread Martin Schwidefsky
Greetings,

version #3 of the fix for the pgtable_bytes mis-accounting problem
on s390. Three times is a charm..

Changes v2 -> v3:

 - Add a fourth patch to redefine __PAGETABLE_PxD_FOLDED as non-empty

 - Move mm_pxd_folded() to include/asm-generic/pgtable.h and use
__is_defined() again with the redefined __PAGETABLE_PxD_FOLDED

 - Add a missing mm_inc_nr_puds() in arch/s390/mm/pgalloc.c

Changes v1 -> v2:

 - Split the patch into three parts, one patch to add the mm_pxd_folded
helpers, one patch to use to the helpers in mm_[dec|inc]_nr_[pmds|puds]
   and finally the fix for s390.

 - Drop the use of __is_defined, it does not work with the
__PAGETABLE_PxD_FOLDED defines

 - Do not change the basic #ifdef'ery in mm.h, just add the calls
to mm_pxd_folded to the pgtable_bytes accounting functions. This
   fixes the compile error on alpha (and potentially on other archs).

Martin Schwidefsky (4):
  mm: make the __PAGETABLE_PxD_FOLDED defines non-empty
  mm: introduce mm_[p4d|pud|pmd]_folded
  mm: add mm_pxd_folded checks to pgtable_bytes accounting functions
  s390/mm: fix mis-accounting of pgtable_bytes

 arch/arm/include/asm/pgtable-2level.h|  2 +-
 arch/m68k/include/asm/pgtable_mm.h   |  4 ++--
 arch/microblaze/include/asm/pgtable.h|  2 +-
 arch/nds32/include/asm/pgtable.h |  2 +-
 arch/parisc/include/asm/pgtable.h|  2 +-
 arch/s390/include/asm/mmu_context.h  |  5 -
 arch/s390/include/asm/pgalloc.h  |  6 +++---
 arch/s390/include/asm/pgtable.h  | 18 ++
 arch/s390/include/asm/tlb.h  |  6 +++---
 arch/s390/mm/pgalloc.c   |  1 +
 include/asm-generic/4level-fixup.h   |  2 +-
 include/asm-generic/5level-fixup.h   |  2 +-
 include/asm-generic/pgtable-nop4d-hack.h |  2 +-
 include/asm-generic/pgtable-nop4d.h  |  2 +-
 include/asm-generic/pgtable-nopmd.h  |  2 +-
 include/asm-generic/pgtable-nopud.h  |  2 +-
 include/asm-generic/pgtable.h| 16 
 include/linux/mm.h   |  8 
 18 files changed, 61 insertions(+), 23 deletions(-)

-- 
2.7.4



Re: [PATCH 3/3] s390/mm: fix mis-accounting of pgtable_bytes

2018-10-31 Thread Martin Schwidefsky
On Wed, 31 Oct 2018 07:46:47 +0100
Martin Schwidefsky  wrote:

> On Wed, 31 Oct 2018 14:43:38 +0800
> Li Wang  wrote:
> 
> > On Wed, Oct 31, 2018 at 2:31 PM, Martin Schwidefsky 
> > wrote:
> >   
> > > BUG: non-zero pgtables_bytes on freeing mm: -16384
> > >
> > 
> > Okay, the problem is still triggered by LTP/cve-2017-17052.c?  
> 
> No, unfortunately we do not have a simple testcase to trigger this new bug.
> It happened once with one of our test kernels, the path that leads to this
> is completely unclear.
 
Ok, got it. There is a mm_inc_nr_puds(mm) missing in the s390 code:

diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 76d89ee8b428..814f26520aa2 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -101,6 +101,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long 
end)
mm->context.asce_limit = _REGION1_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+   mm_inc_nr_puds(mm);
} else {
crst_table_init(table, _REGION1_ENTRY_EMPTY);
pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);

One of our test-cases did an upgrade of a 3-level page table.
I'll update the patch and send a v3.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 3/3] s390/mm: fix mis-accounting of pgtable_bytes

2018-10-31 Thread Martin Schwidefsky
On Wed, 31 Oct 2018 07:46:47 +0100
Martin Schwidefsky  wrote:

> On Wed, 31 Oct 2018 14:43:38 +0800
> Li Wang  wrote:
> 
> > On Wed, Oct 31, 2018 at 2:31 PM, Martin Schwidefsky 
> > wrote:
> >   
> > > BUG: non-zero pgtables_bytes on freeing mm: -16384
> > >
> > 
> > Okay, the problem is still triggered by LTP/cve-2017-17052.c?  
> 
> No, unfortunately we do not have a simple testcase to trigger this new bug.
> It happened once with one of our test kernels, the path that leads to this
> is completely unclear.
 
Ok, got it. There is a mm_inc_nr_puds(mm) missing in the s390 code:

diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 76d89ee8b428..814f26520aa2 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -101,6 +101,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long 
end)
mm->context.asce_limit = _REGION1_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+   mm_inc_nr_puds(mm);
} else {
crst_table_init(table, _REGION1_ENTRY_EMPTY);
pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);

One of our test-cases did an upgrade of a 3-level page table.
I'll update the patch and send a v3.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 1/3] mm: introduce mm_[p4d|pud|pmd]_folded

2018-10-31 Thread Martin Schwidefsky
On Wed, 31 Oct 2018 12:02:55 +0300
"Kirill A. Shutemov"  wrote:

> On Mon, Oct 15, 2018 at 06:42:37PM +0200, Martin Schwidefsky wrote:
> > Add three architecture overrideable function to test if the
> > p4d, pud, or pmd layer of a page table is folded or not.
> > 
> > Signed-off-by: Martin Schwidefsky 
> > ---
> >  include/linux/mm.h | 40 
> >  1 file changed, 40 insertions(+)
> > 
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 0416a7204be3..d1029972541c 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h  
> 
> Shouldn't it be somewhere in asm-generic/pgtable*?

If you prefer the definitions in asm-generic that is fine with me.
I'll give it a try to see if it still compiles.

> > @@ -105,6 +105,46 @@ extern int mmap_rnd_compat_bits __read_mostly;
> >  #define mm_zero_struct_page(pp)  ((void)memset((pp), 0, sizeof(struct 
> > page)))
> >  #endif
> >  
> > +/*
> > + * On some architectures it depends on the mm if the p4d/pud or pmd
> > + * layer of the page table hierarchy is folded or not.
> > + */
> > +#ifndef mm_p4d_folded
> > +#define mm_p4d_folded(mm) mm_p4d_folded(mm)  
> 
> Do we need to define it in generic header?

That is true, it should work without the #define in the generic header.

> > +static inline bool mm_p4d_folded(struct mm_struct *mm)
> > +{
> > +#ifdef __PAGETABLE_P4D_FOLDED
> > +   return 1;
> > +#else
> > +   return 0;
> > +#endif  
> 
> Maybe
>   return __is_defined(__PAGETABLE_P4D_FOLDED);
> 
> ?
 
I have tried that, doesn't work. The reason is that the
__PAGETABLE_xxx_FOLDED defines to not have a value.

#define __PAGETABLE_P4D_FOLDED
#define __PAGETABLE_PMD_FOLDED
#define __PAGETABLE_PUD_FOLDED

While the definition of CONFIG_xxx symbols looks like this

#define CONFIG_xxx 1

The __is_defined needs the value for the __take_second_arg trick.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 1/3] mm: introduce mm_[p4d|pud|pmd]_folded

2018-10-31 Thread Martin Schwidefsky
On Wed, 31 Oct 2018 12:02:55 +0300
"Kirill A. Shutemov"  wrote:

> On Mon, Oct 15, 2018 at 06:42:37PM +0200, Martin Schwidefsky wrote:
> > Add three architecture overrideable function to test if the
> > p4d, pud, or pmd layer of a page table is folded or not.
> > 
> > Signed-off-by: Martin Schwidefsky 
> > ---
> >  include/linux/mm.h | 40 
> >  1 file changed, 40 insertions(+)
> > 
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 0416a7204be3..d1029972541c 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h  
> 
> Shouldn't it be somewhere in asm-generic/pgtable*?

If you prefer the definitions in asm-generic that is fine with me.
I'll give it a try to see if it still compiles.

> > @@ -105,6 +105,46 @@ extern int mmap_rnd_compat_bits __read_mostly;
> >  #define mm_zero_struct_page(pp)  ((void)memset((pp), 0, sizeof(struct 
> > page)))
> >  #endif
> >  
> > +/*
> > + * On some architectures it depends on the mm if the p4d/pud or pmd
> > + * layer of the page table hierarchy is folded or not.
> > + */
> > +#ifndef mm_p4d_folded
> > +#define mm_p4d_folded(mm) mm_p4d_folded(mm)  
> 
> Do we need to define it in generic header?

That is true, it should work without the #define in the generic header.

> > +static inline bool mm_p4d_folded(struct mm_struct *mm)
> > +{
> > +#ifdef __PAGETABLE_P4D_FOLDED
> > +   return 1;
> > +#else
> > +   return 0;
> > +#endif  
> 
> Maybe
>   return __is_defined(__PAGETABLE_P4D_FOLDED);
> 
> ?
 
I have tried that, doesn't work. The reason is that the
__PAGETABLE_xxx_FOLDED defines to not have a value.

#define __PAGETABLE_P4D_FOLDED
#define __PAGETABLE_PMD_FOLDED
#define __PAGETABLE_PUD_FOLDED

While the definition of CONFIG_xxx symbols looks like this

#define CONFIG_xxx 1

The __is_defined needs the value for the __take_second_arg trick.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 3/3] s390/mm: fix mis-accounting of pgtable_bytes

2018-10-31 Thread Martin Schwidefsky
On Wed, 31 Oct 2018 14:43:38 +0800
Li Wang  wrote:

> On Wed, Oct 31, 2018 at 2:31 PM, Martin Schwidefsky 
> wrote:
> 
> > On Wed, 31 Oct 2018 14:18:33 +0800
> > Li Wang  wrote:
> >  
> > > On Tue, Oct 16, 2018 at 12:42 AM, Martin Schwidefsky <  
> > schwidef...@de.ibm.com  
> > > > wrote:  
> > >  
> > > > In case a fork or a clone system fails in copy_process and the error
> > > > handling does the mmput() at the bad_fork_cleanup_mm label, the
> > > > following warning messages will appear on the console:
> > > >
> > > >   BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > >
> > > > The reason for that is the tricks we play with mm_inc_nr_puds() and
> > > > mm_inc_nr_pmds() in init_new_context().
> > > >
> > > > A normal 64-bit process has 3 levels of page table, the p4d level and
> > > > the pud level are folded. On process termination the free_pud_range()
> > > > function in mm/memory.c will subtract 16KB from pgtable_bytes with a
> > > > mm_dec_nr_puds() call, but there actually is not really a pud table.
> > > >
> > > > One issue with this is the fact that pgtable_bytes is usually off
> > > > by a few kilobytes, but the more severe problem is that for a failed
> > > > fork or clone the free_pgtables() function is not called. In this case
> > > > there is no mm_dec_nr_puds() or mm_dec_nr_pmds() that go together with
> > > > the mm_inc_nr_puds() and mm_inc_nr_pmds in init_new_context().
> > > > The pgtable_bytes will be off by 16384 or 32768 bytes and we get the
> > > > BUG message. The message itself is purely cosmetic, but annoying.
> > > >
> > > > To fix this override the mm_pmd_folded, mm_pud_folded and mm_p4d_folded
> > > > function to check for the true size of the address space.
> > > >  
> > >
> > > I can confirm that it works to the problem, the warning message is gone
> > > after applying this patch on s390x. And I also done ltp syscalls/cve test
> > > for the patch set on x86_64 arch, there has no new regression.
> > >
> > > Tested-by: Li Wang   
> >
> > Thanks for testing. Unfortunately Heiko reported another issue yesterday
> > with the patch applied. This time the other way around:
> >
> > BUG: non-zero pgtables_bytes on freeing mm: -16384
> >  
> 
> Okay, the problem is still triggered by LTP/cve-2017-17052.c?

No, unfortunately we do not have a simple testcase to trigger this new bug.
It happened once with one of our test kernels, the path that leads to this
is completely unclear.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 3/3] s390/mm: fix mis-accounting of pgtable_bytes

2018-10-31 Thread Martin Schwidefsky
On Wed, 31 Oct 2018 14:43:38 +0800
Li Wang  wrote:

> On Wed, Oct 31, 2018 at 2:31 PM, Martin Schwidefsky 
> wrote:
> 
> > On Wed, 31 Oct 2018 14:18:33 +0800
> > Li Wang  wrote:
> >  
> > > On Tue, Oct 16, 2018 at 12:42 AM, Martin Schwidefsky <  
> > schwidef...@de.ibm.com  
> > > > wrote:  
> > >  
> > > > In case a fork or a clone system fails in copy_process and the error
> > > > handling does the mmput() at the bad_fork_cleanup_mm label, the
> > > > following warning messages will appear on the console:
> > > >
> > > >   BUG: non-zero pgtables_bytes on freeing mm: 16384
> > > >
> > > > The reason for that is the tricks we play with mm_inc_nr_puds() and
> > > > mm_inc_nr_pmds() in init_new_context().
> > > >
> > > > A normal 64-bit process has 3 levels of page table, the p4d level and
> > > > the pud level are folded. On process termination the free_pud_range()
> > > > function in mm/memory.c will subtract 16KB from pgtable_bytes with a
> > > > mm_dec_nr_puds() call, but there actually is not really a pud table.
> > > >
> > > > One issue with this is the fact that pgtable_bytes is usually off
> > > > by a few kilobytes, but the more severe problem is that for a failed
> > > > fork or clone the free_pgtables() function is not called. In this case
> > > > there is no mm_dec_nr_puds() or mm_dec_nr_pmds() that go together with
> > > > the mm_inc_nr_puds() and mm_inc_nr_pmds in init_new_context().
> > > > The pgtable_bytes will be off by 16384 or 32768 bytes and we get the
> > > > BUG message. The message itself is purely cosmetic, but annoying.
> > > >
> > > > To fix this override the mm_pmd_folded, mm_pud_folded and mm_p4d_folded
> > > > function to check for the true size of the address space.
> > > >  
> > >
> > > I can confirm that it works to the problem, the warning message is gone
> > > after applying this patch on s390x. And I also done ltp syscalls/cve test
> > > for the patch set on x86_64 arch, there has no new regression.
> > >
> > > Tested-by: Li Wang   
> >
> > Thanks for testing. Unfortunately Heiko reported another issue yesterday
> > with the patch applied. This time the other way around:
> >
> > BUG: non-zero pgtables_bytes on freeing mm: -16384
> >  
> 
> Okay, the problem is still triggered by LTP/cve-2017-17052.c?

No, unfortunately we do not have a simple testcase to trigger this new bug.
It happened once with one of our test kernels, the path that leads to this
is completely unclear.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 3/3] s390/mm: fix mis-accounting of pgtable_bytes

2018-10-31 Thread Martin Schwidefsky
On Wed, 31 Oct 2018 14:18:33 +0800
Li Wang  wrote:

> On Tue, Oct 16, 2018 at 12:42 AM, Martin Schwidefsky  > wrote:  
> 
> > In case a fork or a clone system fails in copy_process and the error
> > handling does the mmput() at the bad_fork_cleanup_mm label, the
> > following warning messages will appear on the console:
> >
> >   BUG: non-zero pgtables_bytes on freeing mm: 16384
> >
> > The reason for that is the tricks we play with mm_inc_nr_puds() and
> > mm_inc_nr_pmds() in init_new_context().
> >
> > A normal 64-bit process has 3 levels of page table, the p4d level and
> > the pud level are folded. On process termination the free_pud_range()
> > function in mm/memory.c will subtract 16KB from pgtable_bytes with a
> > mm_dec_nr_puds() call, but there actually is not really a pud table.
> >
> > One issue with this is the fact that pgtable_bytes is usually off
> > by a few kilobytes, but the more severe problem is that for a failed
> > fork or clone the free_pgtables() function is not called. In this case
> > there is no mm_dec_nr_puds() or mm_dec_nr_pmds() that go together with
> > the mm_inc_nr_puds() and mm_inc_nr_pmds in init_new_context().
> > The pgtable_bytes will be off by 16384 or 32768 bytes and we get the
> > BUG message. The message itself is purely cosmetic, but annoying.
> >
> > To fix this override the mm_pmd_folded, mm_pud_folded and mm_p4d_folded
> > function to check for the true size of the address space.
> >  
> 
> I can confirm that it works to the problem, the warning message is gone
> after applying this patch on s390x. And I also done ltp syscalls/cve test
> for the patch set on x86_64 arch, there has no new regression.
> 
> Tested-by: Li Wang 

Thanks for testing. Unfortunately Heiko reported another issue yesterday
with the patch applied. This time the other way around:

BUG: non-zero pgtables_bytes on freeing mm: -16384

I am trying to understand how this can happen. For now I would like to
keep the patch on hold in case they need another change.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH 3/3] s390/mm: fix mis-accounting of pgtable_bytes

2018-10-31 Thread Martin Schwidefsky
On Wed, 31 Oct 2018 14:18:33 +0800
Li Wang  wrote:

> On Tue, Oct 16, 2018 at 12:42 AM, Martin Schwidefsky  > wrote:  
> 
> > In case a fork or a clone system fails in copy_process and the error
> > handling does the mmput() at the bad_fork_cleanup_mm label, the
> > following warning messages will appear on the console:
> >
> >   BUG: non-zero pgtables_bytes on freeing mm: 16384
> >
> > The reason for that is the tricks we play with mm_inc_nr_puds() and
> > mm_inc_nr_pmds() in init_new_context().
> >
> > A normal 64-bit process has 3 levels of page table, the p4d level and
> > the pud level are folded. On process termination the free_pud_range()
> > function in mm/memory.c will subtract 16KB from pgtable_bytes with a
> > mm_dec_nr_puds() call, but there actually is not really a pud table.
> >
> > One issue with this is the fact that pgtable_bytes is usually off
> > by a few kilobytes, but the more severe problem is that for a failed
> > fork or clone the free_pgtables() function is not called. In this case
> > there is no mm_dec_nr_puds() or mm_dec_nr_pmds() that go together with
> > the mm_inc_nr_puds() and mm_inc_nr_pmds in init_new_context().
> > The pgtable_bytes will be off by 16384 or 32768 bytes and we get the
> > BUG message. The message itself is purely cosmetic, but annoying.
> >
> > To fix this override the mm_pmd_folded, mm_pud_folded and mm_p4d_folded
> > function to check for the true size of the address space.
> >  
> 
> I can confirm that it works to the problem, the warning message is gone
> after applying this patch on s390x. And I also done ltp syscalls/cve test
> for the patch set on x86_64 arch, there has no new regression.
> 
> Tested-by: Li Wang 

Thanks for testing. Unfortunately Heiko reported another issue yesterday
with the patch applied. This time the other way around:

BUG: non-zero pgtables_bytes on freeing mm: -16384

I am trying to understand how this can happen. For now I would like to
keep the patch on hold in case they need another change.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: dcache endless loop in d_invalidate

2018-10-25 Thread Martin Schwidefsky
On Tue, 16 Oct 2018 13:15:28 +0200
Martin Schwidefsky  wrote:

> In short, this if-statement in select_collect:
> 
> if (dentry->d_flags & DCACHE_SHRINK_LIST) {
> data->found++;
> }
> 
> with assumption that "somebody else" will do the shrinking seems broken.
> 
> Do you agree?

If I am not mistaken this problem should be fixed by upstream commit
4fb4887140 "restore cond_resched() in shrink_dcache_parent()"
which goes on top of
ff17fa561a "d_invalidate(): unhash immediately"

Due to the cond_resched() the task that set DCACHE_SHRINK_LIST for the
remaining two dcache entries will be scheduled eventually. This will
allow the task waiting for the deletion of these dcache entries 
to continue, although some CPU cycles may get wasted.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: dcache endless loop in d_invalidate

2018-10-25 Thread Martin Schwidefsky
On Tue, 16 Oct 2018 13:15:28 +0200
Martin Schwidefsky  wrote:

> In short, this if-statement in select_collect:
> 
> if (dentry->d_flags & DCACHE_SHRINK_LIST) {
> data->found++;
> }
> 
> with assumption that "somebody else" will do the shrinking seems broken.
> 
> Do you agree?

If I am not mistaken this problem should be fixed by upstream commit
4fb4887140 "restore cond_resched() in shrink_dcache_parent()"
which goes on top of
ff17fa561a "d_invalidate(): unhash immediately"

Due to the cond_resched() the task that set DCACHE_SHRINK_LIST for the
remaining two dcache entries will be scheduled eventually. This will
allow the task waiting for the deletion of these dcache entries 
to continue, although some CPU cycles may get wasted.

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



[GIT PULL] s390 patches for the 4.20 merge window #1

2018-10-23 Thread Martin Schwidefsky
Hi Linus,

please pull s390 fixes and features for 4.20

The following changes since commit 55a5542a546238354d1f209f794414168cf8c71d:

  s390/hibernate: fix error handling when suspend cpu != resume cpu (2018-09-20 
13:20:23 +0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git tags/s390-4.20-1

for you to fetch changes up to f822ad2c2c03af85a531c5174136b6d5b1abc566:

  s390/pkey: move pckmo subfunction available checks away from module init 
(2018-10-22 08:37:46 +0200)


s390 updates for the 4.20 merge window

 - Improved access control for the zcrypt driver, multiple device nodes
   can now be created with different access control lists

 - Extend the pkey API to provide random protected keys, this is useful
   for encrypted swap device with ephemeral protected keys

 - Add support for virtually mapped kernel stacks

 - Rework the early boot code, this moves the memory detection into the
   boot code that runs prior to decompression.

 - Add KASAN support

 - Bug fixes and cleanups


Chengguang Xu (1):
  s390/dasd: remove unnecessary condition check

Colin Ian King (1):
  s390/tape: fix spelling mistake "partion" -> "partition"

Halil Pasic (1):
  s390/zcrypt: enable AP bus scan without a valid default domain

Harald Freudenberger (6):
  s390/zcrypt: multiple zcrypt device nodes support
  s390/zcrypt: zcrypt device driver cleanup
  s390/zcrypt: provide apfs failure code on type 86 error reply
  s390/zcrypt: add ap_adapter_mask sysfs attribute
  s390/zcrypt: fix broken zcrypt_send_cprb in-kernel api function
  s390/pkey: move pckmo subfunction available checks away from module init

Heiko Carstens (2):
  s390/dumpstack: print psw mask and address again
  s390/mem_detect: add missing include

Ingo Franzki (8):
  s390/pkey: Introduce new API for random protected key generation
  s390/pkey: Define protected key blob format
  s390/pkey: Add sysfs attributes to emit protected key blobs
  s390/pkey: Add sysfs attributes to emit secure key blobs
  s390/pkey: Introduce new API for random protected key verification
  s390/pkey: Introduce new API for transforming key blobs
  s390/crypto: Enhance paes cipher to accept variable length key material
  s390/pkey: Load pkey kernel module automatically

Jan Höppner (1):
  s390/sclp: Allow to request adapter reset

Janosch Frank (1):
  s390/sthyi: Fix machine name validity indication

Julian Wiedmann (2):
  s390/qdio: clean up AOB handling
  s390/ccwgroup: add get_ccwgroupdev_by_busid()

Martin Schwidefsky (9):
  s390: add initial 64-bit restart PSW
  s390/appldata: pass parameter list pointer to appldata_asm
  s390/appldata: do not use stack buffers for hardware data
  s390/hypfs: do not use stack buffers for hardware data
  s390/monwriter: do not use stack buffers for hardware data
  s390/pfault: do not use stack buffers for hardware data
  init: add arch_call_rest_init to allow stack switching
  s390: add stack switch helper
  s390: add support for virtually mapped kernel stacks

Mikhail Zaslonko (1):
  s390/vmalloc: fix VMALLOC_START calculation

Thomas Richter (1):
  s390/perf: Return error when debug_register fails

Vasily Gorbik (47):
  s390/vdso: avoid 64-bit vdso mapping for compat tasks
  s390/vdso: correct CFI annotations of vDSO functions
  s390: clean up stacks setup
  s390: unify stack size definitions
  s390: remove decompressor's head.S
  s390/decompressor: rework uncompressed image info collection
  s390/decompressor: get rid of .bss usage
  s390/sclp: simplify early hsa_size detection
  s390: rescue initrd as early as possible
  s390/decompressor: clean up and rename compressed/misc.c
  s390: introduce .boot.data section
  s390/sclp: move sclp_early_read_info to sclp_early_core.c
  s390/mem_detect: move tprot loop to early boot phase
  s390: introduce .boot.data section compile time validation
  s390/mem_detect: introduce SCLP storage info
  s390/mem_detect: introduce z/VM specific diag260 call
  s390/mem_detect: use SCLP info for continuous memory detection
  s390/mem_detect: replace tprot loop with binary search
  s390/mem_detect: add info source debug print
  s390/sclp: introduce sclp_early_get_hsa_size
  s390: move ipl block and cmd line handling to early boot phase
  s390/mm: add missing pfn_to_kaddr helper
  s390/kasan: avoid vdso instrumentation
  s390/kasan: avoid instrumentation of early C code
  s390/kasan: replace some memory functions
  s390: introduce MAX_PTRS_PER_P4D
  s390: add pgd_page primitive
  s390/kasan: add initialization code and enable it
  s390/kasan: double the stack size

[GIT PULL] s390 patches for the 4.20 merge window #1

2018-10-23 Thread Martin Schwidefsky
Hi Linus,

please pull s390 fixes and features for 4.20

The following changes since commit 55a5542a546238354d1f209f794414168cf8c71d:

  s390/hibernate: fix error handling when suspend cpu != resume cpu (2018-09-20 
13:20:23 +0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git tags/s390-4.20-1

for you to fetch changes up to f822ad2c2c03af85a531c5174136b6d5b1abc566:

  s390/pkey: move pckmo subfunction available checks away from module init 
(2018-10-22 08:37:46 +0200)


s390 updates for the 4.20 merge window

 - Improved access control for the zcrypt driver, multiple device nodes
   can now be created with different access control lists

 - Extend the pkey API to provide random protected keys, this is useful
   for encrypted swap device with ephemeral protected keys

 - Add support for virtually mapped kernel stacks

 - Rework the early boot code, this moves the memory detection into the
   boot code that runs prior to decompression.

 - Add KASAN support

 - Bug fixes and cleanups


Chengguang Xu (1):
  s390/dasd: remove unnecessary condition check

Colin Ian King (1):
  s390/tape: fix spelling mistake "partion" -> "partition"

Halil Pasic (1):
  s390/zcrypt: enable AP bus scan without a valid default domain

Harald Freudenberger (6):
  s390/zcrypt: multiple zcrypt device nodes support
  s390/zcrypt: zcrypt device driver cleanup
  s390/zcrypt: provide apfs failure code on type 86 error reply
  s390/zcrypt: add ap_adapter_mask sysfs attribute
  s390/zcrypt: fix broken zcrypt_send_cprb in-kernel api function
  s390/pkey: move pckmo subfunction available checks away from module init

Heiko Carstens (2):
  s390/dumpstack: print psw mask and address again
  s390/mem_detect: add missing include

Ingo Franzki (8):
  s390/pkey: Introduce new API for random protected key generation
  s390/pkey: Define protected key blob format
  s390/pkey: Add sysfs attributes to emit protected key blobs
  s390/pkey: Add sysfs attributes to emit secure key blobs
  s390/pkey: Introduce new API for random protected key verification
  s390/pkey: Introduce new API for transforming key blobs
  s390/crypto: Enhance paes cipher to accept variable length key material
  s390/pkey: Load pkey kernel module automatically

Jan Höppner (1):
  s390/sclp: Allow to request adapter reset

Janosch Frank (1):
  s390/sthyi: Fix machine name validity indication

Julian Wiedmann (2):
  s390/qdio: clean up AOB handling
  s390/ccwgroup: add get_ccwgroupdev_by_busid()

Martin Schwidefsky (9):
  s390: add initial 64-bit restart PSW
  s390/appldata: pass parameter list pointer to appldata_asm
  s390/appldata: do not use stack buffers for hardware data
  s390/hypfs: do not use stack buffers for hardware data
  s390/monwriter: do not use stack buffers for hardware data
  s390/pfault: do not use stack buffers for hardware data
  init: add arch_call_rest_init to allow stack switching
  s390: add stack switch helper
  s390: add support for virtually mapped kernel stacks

Mikhail Zaslonko (1):
  s390/vmalloc: fix VMALLOC_START calculation

Thomas Richter (1):
  s390/perf: Return error when debug_register fails

Vasily Gorbik (47):
  s390/vdso: avoid 64-bit vdso mapping for compat tasks
  s390/vdso: correct CFI annotations of vDSO functions
  s390: clean up stacks setup
  s390: unify stack size definitions
  s390: remove decompressor's head.S
  s390/decompressor: rework uncompressed image info collection
  s390/decompressor: get rid of .bss usage
  s390/sclp: simplify early hsa_size detection
  s390: rescue initrd as early as possible
  s390/decompressor: clean up and rename compressed/misc.c
  s390: introduce .boot.data section
  s390/sclp: move sclp_early_read_info to sclp_early_core.c
  s390/mem_detect: move tprot loop to early boot phase
  s390: introduce .boot.data section compile time validation
  s390/mem_detect: introduce SCLP storage info
  s390/mem_detect: introduce z/VM specific diag260 call
  s390/mem_detect: use SCLP info for continuous memory detection
  s390/mem_detect: replace tprot loop with binary search
  s390/mem_detect: add info source debug print
  s390/sclp: introduce sclp_early_get_hsa_size
  s390: move ipl block and cmd line handling to early boot phase
  s390/mm: add missing pfn_to_kaddr helper
  s390/kasan: avoid vdso instrumentation
  s390/kasan: avoid instrumentation of early C code
  s390/kasan: replace some memory functions
  s390: introduce MAX_PTRS_PER_P4D
  s390: add pgd_page primitive
  s390/kasan: add initialization code and enable it
  s390/kasan: double the stack size

dcache endless loop in d_invalidate

2018-10-16 Thread Martin Schwidefsky
Hi Al,

I am currently looking into a customer dump and found what looks like
an issue in the dcache code. And I think the following commit of yours
has something to do with it:

commit fe91522a7ba82ca1a51b07e19954b3825e4aaa22
Author: Al Viro 
Date:   Sat May 3 00:02:25 2014 -0400

don't remove from shrink list in select_collect()

If we find something already on a shrink list, just increment
data->found and do nothing else.  Loops in shrink_dcache_parent() and
check_submounts_and_drop() will do the right thing - everything we
did put into our list will be evicted and if there had been nothing,
but data->found got non-zero, well, we have somebody else shrinking
those guys; just try again.

Signed-off-by: Al Viro 

The dump I got is based on kernel v4.4 but the affected dcache functions
look identical to the upstream version. Here is what I found in the dump:

A lot of "rcu_sched kthread starved for  jiffies!" messages
Only one CPU, currently running process "run-crons" task 0x65a8008
It just called check_and_drop from d_walk, full backchain:

PSW.addr   check_and_drop at 30a0e8
%r14   d_walk at 308202
 #0 [35b87b88] d_invalidate at 3096e8
 #1 [35b87bd8] proc_flush_task at 37190c
 #2 [35b87c58] release_task at 13f202
 #3 [35b87cc8] wait_task_zombie at 13fc36
 #4 [35b87d50] wait_consider_task at 140150
 #5 [35b87dc0] do_wait at 1403de
 #6 [35b87e18] sys_wait4 at 14181e
 #7 [35b87ea8] system_call at 659ec4

Tasks runtime is
  sum_exec_runtime 26813717162347 # nsec = 26813 seconds,
  utime = 3991252 # cputime = 974 seconds,
  stime = 99132516783832 # cputime = 24202 seconds,
Task 0x65a8008 has TIF_NEED_RESCHED set

d_walk() just called check_and_drop via the finish() function pointer,
check_and_drop() will return and d_walk() will return as well.
Look like an endless loop in d_invalidate().

The (struct dentry *) dentry in d_invalidate() is at 0x3cb15858
The struct detach_data data in d_invalidate() is at 0x35b87c28

dentry tree starting @ 0x3cb15858 has two entries in d_subdirs:
0x3cb15858  d_name.name: "11898"
0xb940d3d8 d_name.name: "cmdline"
0xb940dd98 d_name.name: "status"

crash> px *(struct dentry *) 0x3cb15858 | grep d_flags
  d_flags = 0x2000cc,

crash> px *(struct dentry *) 0xb940d3d8 | grep d_flags
  d_flags = 0x48048c,  # DCACHE_SHRINK_LIST is set

crash> px *(struct dentry *) 0xb940dd98 | grep d_flags
  d_flags = 0x48048c,  # DCACHE_SHRINK_LIST is set

crash> px *(struct detach_data *) 0x35b87c28
$29 = {
  select = {
start = 0x3cb15858,
dispose = {
  next = 0x35b87c30,
  prev = 0x35b87c30
},
found = 0x2
  },
  mountpoint = 0x0
}

select_collect() called from detach_and_collect() will increment
data.select.found in the struct detach_data @ 0x35b87c28 but will not
add any dentries to the dispose lists. The shrink_dentry_list() call in
d_invalidate() will do nothing as the dispose list is empty. The two
dentries 0xb940d3d8 and 0xb940dd98 are still there. After d_walk returns
d_invalidate() finds data.mountpoint == NULL and data.select.found == 2,
it will start the loop again without progress.

As this is a single CPU system without kernel preemption there is nobody
else that will do the shrinking of those dcache entries.

In short, this if-statement in select_collect:

if (dentry->d_flags & DCACHE_SHRINK_LIST) {
data->found++;
}

with assumption that "somebody else" will do the shrinking seems broken.

Do you agree?

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



dcache endless loop in d_invalidate

2018-10-16 Thread Martin Schwidefsky
Hi Al,

I am currently looking into a customer dump and found what looks like
an issue in the dcache code. And I think the following commit of yours
has something to do with it:

commit fe91522a7ba82ca1a51b07e19954b3825e4aaa22
Author: Al Viro 
Date:   Sat May 3 00:02:25 2014 -0400

don't remove from shrink list in select_collect()

If we find something already on a shrink list, just increment
data->found and do nothing else.  Loops in shrink_dcache_parent() and
check_submounts_and_drop() will do the right thing - everything we
did put into our list will be evicted and if there had been nothing,
but data->found got non-zero, well, we have somebody else shrinking
those guys; just try again.

Signed-off-by: Al Viro 

The dump I got is based on kernel v4.4 but the affected dcache functions
look identical to the upstream version. Here is what I found in the dump:

A lot of "rcu_sched kthread starved for  jiffies!" messages
Only one CPU, currently running process "run-crons" task 0x65a8008
It just called check_and_drop from d_walk, full backchain:

PSW.addr   check_and_drop at 30a0e8
%r14   d_walk at 308202
 #0 [35b87b88] d_invalidate at 3096e8
 #1 [35b87bd8] proc_flush_task at 37190c
 #2 [35b87c58] release_task at 13f202
 #3 [35b87cc8] wait_task_zombie at 13fc36
 #4 [35b87d50] wait_consider_task at 140150
 #5 [35b87dc0] do_wait at 1403de
 #6 [35b87e18] sys_wait4 at 14181e
 #7 [35b87ea8] system_call at 659ec4

Tasks runtime is
  sum_exec_runtime 26813717162347 # nsec = 26813 seconds,
  utime = 3991252 # cputime = 974 seconds,
  stime = 99132516783832 # cputime = 24202 seconds,
Task 0x65a8008 has TIF_NEED_RESCHED set

d_walk() just called check_and_drop via the finish() function pointer,
check_and_drop() will return and d_walk() will return as well.
Look like an endless loop in d_invalidate().

The (struct dentry *) dentry in d_invalidate() is at 0x3cb15858
The struct detach_data data in d_invalidate() is at 0x35b87c28

dentry tree starting @ 0x3cb15858 has two entries in d_subdirs:
0x3cb15858  d_name.name: "11898"
0xb940d3d8 d_name.name: "cmdline"
0xb940dd98 d_name.name: "status"

crash> px *(struct dentry *) 0x3cb15858 | grep d_flags
  d_flags = 0x2000cc,

crash> px *(struct dentry *) 0xb940d3d8 | grep d_flags
  d_flags = 0x48048c,  # DCACHE_SHRINK_LIST is set

crash> px *(struct dentry *) 0xb940dd98 | grep d_flags
  d_flags = 0x48048c,  # DCACHE_SHRINK_LIST is set

crash> px *(struct detach_data *) 0x35b87c28
$29 = {
  select = {
start = 0x3cb15858,
dispose = {
  next = 0x35b87c30,
  prev = 0x35b87c30
},
found = 0x2
  },
  mountpoint = 0x0
}

select_collect() called from detach_and_collect() will increment
data.select.found in the struct detach_data @ 0x35b87c28 but will not
add any dentries to the dispose lists. The shrink_dentry_list() call in
d_invalidate() will do nothing as the dispose list is empty. The two
dentries 0xb940d3d8 and 0xb940dd98 are still there. After d_walk returns
d_invalidate() finds data.mountpoint == NULL and data.select.found == 2,
it will start the loop again without progress.

As this is a single CPU system without kernel preemption there is nobody
else that will do the shrinking of those dcache entries.

In short, this if-statement in select_collect:

if (dentry->d_flags & DCACHE_SHRINK_LIST) {
data->found++;
}

with assumption that "somebody else" will do the shrinking seems broken.

Do you agree?

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



[PATCH 2/3] mm: add mm_pxd_folded checks to pgtable_bytes accounting functions

2018-10-15 Thread Martin Schwidefsky
The common mm code calls mm_dec_nr_pmds() and mm_dec_nr_puds()
in free_pgtables() if the address range spans a full pud or pmd.
If mm_dec_nr_puds/mm_dec_nr_pmds are non-empty due to configuration
settings they blindly subtract the size of the pmd or pud table from
pgtable_bytes even if the pud or pmd page table layer is folded.

Add explicit mm_[pmd|pud]_folded checks to the four pgtable_bytes
accounting functions mm_inc_nr_puds, mm_inc_nr_pmds, mm_dec_nr_puds
and mm_dec_nr_pmds. As the check for folded page tables can be
overwritten by the architecture, this allows to keep a correct
pgtable_bytes value for platforms that use a dynamic number of
page table levels.

Signed-off-by: Martin Schwidefsky 
---
 include/linux/mm.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d1029972541c..67f55c71e59a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1764,11 +1764,15 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, 
unsigned long address);
 
 static inline void mm_inc_nr_puds(struct mm_struct *mm)
 {
+   if (mm_pud_folded(mm))
+   return;
atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), >pgtables_bytes);
 }
 
 static inline void mm_dec_nr_puds(struct mm_struct *mm)
 {
+   if (mm_pud_folded(mm))
+   return;
atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), >pgtables_bytes);
 }
 #endif
@@ -1788,11 +1792,15 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, 
unsigned long address);
 
 static inline void mm_inc_nr_pmds(struct mm_struct *mm)
 {
+   if (mm_pmd_folded(mm))
+   return;
atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), >pgtables_bytes);
 }
 
 static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 {
+   if (mm_pmd_folded(mm))
+   return;
atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), >pgtables_bytes);
 }
 #endif
-- 
2.16.4



[PATCH 3/3] s390/mm: fix mis-accounting of pgtable_bytes

2018-10-15 Thread Martin Schwidefsky
In case a fork or a clone system fails in copy_process and the error
handling does the mmput() at the bad_fork_cleanup_mm label, the
following warning messages will appear on the console:

  BUG: non-zero pgtables_bytes on freeing mm: 16384

The reason for that is the tricks we play with mm_inc_nr_puds() and
mm_inc_nr_pmds() in init_new_context().

A normal 64-bit process has 3 levels of page table, the p4d level and
the pud level are folded. On process termination the free_pud_range()
function in mm/memory.c will subtract 16KB from pgtable_bytes with a
mm_dec_nr_puds() call, but there actually is not really a pud table.

One issue with this is the fact that pgtable_bytes is usually off
by a few kilobytes, but the more severe problem is that for a failed
fork or clone the free_pgtables() function is not called. In this case
there is no mm_dec_nr_puds() or mm_dec_nr_pmds() that go together with
the mm_inc_nr_puds() and mm_inc_nr_pmds in init_new_context().
The pgtable_bytes will be off by 16384 or 32768 bytes and we get the
BUG message. The message itself is purely cosmetic, but annoying.

To fix this override the mm_pmd_folded, mm_pud_folded and mm_p4d_folded
function to check for the true size of the address space.

Reported-by: Li Wang 
Signed-off-by: Martin Schwidefsky 
---
 arch/s390/include/asm/mmu_context.h |  5 -
 arch/s390/include/asm/pgalloc.h |  6 +++---
 arch/s390/include/asm/pgtable.h | 18 ++
 arch/s390/include/asm/tlb.h |  6 +++---
 4 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/arch/s390/include/asm/mmu_context.h 
b/arch/s390/include/asm/mmu_context.h
index 0717ee76885d..f1ab9420ccfb 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -45,8 +45,6 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.asce_limit = STACK_TOP_MAX;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
-   /* pgd_alloc() did not account this pud */
-   mm_inc_nr_puds(mm);
break;
case -PAGE_SIZE:
/* forked 5-level task, set new asce with new_mm->pgd */
@@ -62,9 +60,6 @@ static inline int init_new_context(struct task_struct *tsk,
/* forked 2-level compat task, set new asce with new mm->pgd */
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
-   /* pgd_alloc() did not account this pmd */
-   mm_inc_nr_pmds(mm);
-   mm_inc_nr_puds(mm);
}
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..5ee733720a57 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -36,11 +36,11 @@ static inline void crst_table_init(unsigned long *crst, 
unsigned long entry)
 
 static inline unsigned long pgd_entry_type(struct mm_struct *mm)
 {
-   if (mm->context.asce_limit <= _REGION3_SIZE)
+   if (mm_pmd_folded(mm))
return _SEGMENT_ENTRY_EMPTY;
-   if (mm->context.asce_limit <= _REGION2_SIZE)
+   if (mm_pud_folded(mm))
return _REGION3_ENTRY_EMPTY;
-   if (mm->context.asce_limit <= _REGION1_SIZE)
+   if (mm_p4d_folded(mm))
return _REGION2_ENTRY_EMPTY;
return _REGION1_ENTRY_EMPTY;
 }
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 0e7cb0dc9c33..de05466ce50c 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -485,6 +485,24 @@ static inline int is_module_addr(void *addr)
   _REGION_ENTRY_PROTECT | \
   _REGION_ENTRY_NOEXEC)
 
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+   return mm->context.asce_limit <= _REGION1_SIZE;
+}
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+   return mm->context.asce_limit <= _REGION2_SIZE;
+}
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+   return mm->context.asce_limit <= _REGION3_SIZE;
+}
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+
 static inline int mm_has_pgste(struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 457b7ba0fbb6..b31c779cf581 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -136,7 +136,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, 
pgtable_t pte,
 static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long address)

[PATCH 2/3] mm: add mm_pxd_folded checks to pgtable_bytes accounting functions

2018-10-15 Thread Martin Schwidefsky
The common mm code calls mm_dec_nr_pmds() and mm_dec_nr_puds()
in free_pgtables() if the address range spans a full pud or pmd.
If mm_dec_nr_puds/mm_dec_nr_pmds are non-empty due to configuration
settings they blindly subtract the size of the pmd or pud table from
pgtable_bytes even if the pud or pmd page table layer is folded.

Add explicit mm_[pmd|pud]_folded checks to the four pgtable_bytes
accounting functions mm_inc_nr_puds, mm_inc_nr_pmds, mm_dec_nr_puds
and mm_dec_nr_pmds. As the check for folded page tables can be
overwritten by the architecture, this allows to keep a correct
pgtable_bytes value for platforms that use a dynamic number of
page table levels.

Signed-off-by: Martin Schwidefsky 
---
 include/linux/mm.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d1029972541c..67f55c71e59a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1764,11 +1764,15 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, 
unsigned long address);
 
 static inline void mm_inc_nr_puds(struct mm_struct *mm)
 {
+   if (mm_pud_folded(mm))
+   return;
atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), >pgtables_bytes);
 }
 
 static inline void mm_dec_nr_puds(struct mm_struct *mm)
 {
+   if (mm_pud_folded(mm))
+   return;
atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), >pgtables_bytes);
 }
 #endif
@@ -1788,11 +1792,15 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, 
unsigned long address);
 
 static inline void mm_inc_nr_pmds(struct mm_struct *mm)
 {
+   if (mm_pmd_folded(mm))
+   return;
atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), >pgtables_bytes);
 }
 
 static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 {
+   if (mm_pmd_folded(mm))
+   return;
atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), >pgtables_bytes);
 }
 #endif
-- 
2.16.4



[PATCH 3/3] s390/mm: fix mis-accounting of pgtable_bytes

2018-10-15 Thread Martin Schwidefsky
In case a fork or a clone system fails in copy_process and the error
handling does the mmput() at the bad_fork_cleanup_mm label, the
following warning messages will appear on the console:

  BUG: non-zero pgtables_bytes on freeing mm: 16384

The reason for that is the tricks we play with mm_inc_nr_puds() and
mm_inc_nr_pmds() in init_new_context().

A normal 64-bit process has 3 levels of page table, the p4d level and
the pud level are folded. On process termination the free_pud_range()
function in mm/memory.c will subtract 16KB from pgtable_bytes with a
mm_dec_nr_puds() call, but there actually is not really a pud table.

One issue with this is the fact that pgtable_bytes is usually off
by a few kilobytes, but the more severe problem is that for a failed
fork or clone the free_pgtables() function is not called. In this case
there is no mm_dec_nr_puds() or mm_dec_nr_pmds() that go together with
the mm_inc_nr_puds() and mm_inc_nr_pmds in init_new_context().
The pgtable_bytes will be off by 16384 or 32768 bytes and we get the
BUG message. The message itself is purely cosmetic, but annoying.

To fix this override the mm_pmd_folded, mm_pud_folded and mm_p4d_folded
function to check for the true size of the address space.

Reported-by: Li Wang 
Signed-off-by: Martin Schwidefsky 
---
 arch/s390/include/asm/mmu_context.h |  5 -
 arch/s390/include/asm/pgalloc.h |  6 +++---
 arch/s390/include/asm/pgtable.h | 18 ++
 arch/s390/include/asm/tlb.h |  6 +++---
 4 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/arch/s390/include/asm/mmu_context.h 
b/arch/s390/include/asm/mmu_context.h
index 0717ee76885d..f1ab9420ccfb 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -45,8 +45,6 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.asce_limit = STACK_TOP_MAX;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
-   /* pgd_alloc() did not account this pud */
-   mm_inc_nr_puds(mm);
break;
case -PAGE_SIZE:
/* forked 5-level task, set new asce with new_mm->pgd */
@@ -62,9 +60,6 @@ static inline int init_new_context(struct task_struct *tsk,
/* forked 2-level compat task, set new asce with new mm->pgd */
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
-   /* pgd_alloc() did not account this pmd */
-   mm_inc_nr_pmds(mm);
-   mm_inc_nr_puds(mm);
}
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..5ee733720a57 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -36,11 +36,11 @@ static inline void crst_table_init(unsigned long *crst, 
unsigned long entry)
 
 static inline unsigned long pgd_entry_type(struct mm_struct *mm)
 {
-   if (mm->context.asce_limit <= _REGION3_SIZE)
+   if (mm_pmd_folded(mm))
return _SEGMENT_ENTRY_EMPTY;
-   if (mm->context.asce_limit <= _REGION2_SIZE)
+   if (mm_pud_folded(mm))
return _REGION3_ENTRY_EMPTY;
-   if (mm->context.asce_limit <= _REGION1_SIZE)
+   if (mm_p4d_folded(mm))
return _REGION2_ENTRY_EMPTY;
return _REGION1_ENTRY_EMPTY;
 }
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 0e7cb0dc9c33..de05466ce50c 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -485,6 +485,24 @@ static inline int is_module_addr(void *addr)
   _REGION_ENTRY_PROTECT | \
   _REGION_ENTRY_NOEXEC)
 
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+   return mm->context.asce_limit <= _REGION1_SIZE;
+}
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+   return mm->context.asce_limit <= _REGION2_SIZE;
+}
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+   return mm->context.asce_limit <= _REGION3_SIZE;
+}
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+
 static inline int mm_has_pgste(struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 457b7ba0fbb6..b31c779cf581 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -136,7 +136,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, 
pgtable_t pte,
 static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long address)

[PATCH 1/3] mm: introduce mm_[p4d|pud|pmd]_folded

2018-10-15 Thread Martin Schwidefsky
Add three architecture overrideable function to test if the
p4d, pud, or pmd layer of a page table is folded or not.

Signed-off-by: Martin Schwidefsky 
---
 include/linux/mm.h | 40 
 1 file changed, 40 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..d1029972541c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -105,6 +105,46 @@ extern int mmap_rnd_compat_bits __read_mostly;
 #define mm_zero_struct_page(pp)  ((void)memset((pp), 0, sizeof(struct page)))
 #endif
 
+/*
+ * On some architectures it depends on the mm if the p4d/pud or pmd
+ * layer of the page table hierarchy is folded or not.
+ */
+#ifndef mm_p4d_folded
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+#ifdef __PAGETABLE_P4D_FOLDED
+   return 1;
+#else
+   return 0;
+#endif
+}
+#endif
+
+#ifndef mm_pud_folded
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+#ifdef __PAGETABLE_PUD_FOLDED
+   return 1;
+#else
+   return 0;
+#endif
+}
+#endif
+
+#ifndef mm_pmd_folded
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+#ifdef __PAGETABLE_PMD_FOLDED
+   return 1;
+#else
+   return 0;
+#endif
+}
+#endif
+
 /*
  * Default maximum number of active map areas, this limits the number of vmas
  * per mm struct. Users can overwrite this number by sysctl but there is a
-- 
2.16.4



[PATCH 1/3] mm: introduce mm_[p4d|pud|pmd]_folded

2018-10-15 Thread Martin Schwidefsky
Add three architecture overrideable function to test if the
p4d, pud, or pmd layer of a page table is folded or not.

Signed-off-by: Martin Schwidefsky 
---
 include/linux/mm.h | 40 
 1 file changed, 40 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0416a7204be3..d1029972541c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -105,6 +105,46 @@ extern int mmap_rnd_compat_bits __read_mostly;
 #define mm_zero_struct_page(pp)  ((void)memset((pp), 0, sizeof(struct page)))
 #endif
 
+/*
+ * On some architectures it depends on the mm if the p4d/pud or pmd
+ * layer of the page table hierarchy is folded or not.
+ */
+#ifndef mm_p4d_folded
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+#ifdef __PAGETABLE_P4D_FOLDED
+   return 1;
+#else
+   return 0;
+#endif
+}
+#endif
+
+#ifndef mm_pud_folded
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+#ifdef __PAGETABLE_PUD_FOLDED
+   return 1;
+#else
+   return 0;
+#endif
+}
+#endif
+
+#ifndef mm_pmd_folded
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+#ifdef __PAGETABLE_PMD_FOLDED
+   return 1;
+#else
+   return 0;
+#endif
+}
+#endif
+
 /*
  * Default maximum number of active map areas, this limits the number of vmas
  * per mm struct. Users can overwrite this number by sysctl but there is a
-- 
2.16.4



[RFC][PATCH 0/3] pgtable bytes mis-accounting v2

2018-10-15 Thread Martin Schwidefsky
Greetings,

the first test patch to fix the pgtable_bytes mis-accounting on s390
still had a few problems. For one it didn't work for x86 ..

Changes v1 -> v2:

 - Split the patch into three parts, one patch to add the mm_pxd_folded
   helpers, one patch to use to the helpers in mm_[dec|inc]_nr_[pmds|puds]
   and finally the fix for s390.

 - Drop the use of __is_defined, it does not work with the
   __PAGETABLE_PxD_FOLDED defines

 - Do not change the basic #ifdef'ery in mm.h, just add the calls
   to mm_pxd_folded to the pgtable_bytes accounting functions. This
   fixes the compile error on alpha (and potentially on other archs).

Martin Schwidefsky (3):
  mm: introduce mm_[p4d|pud|pmd]_folded
  mm: add mm_pxd_folded checks to pgtable_bytes accounting functions
  s390/mm: fix mis-accounting of pgtable_bytes

 arch/s390/include/asm/mmu_context.h |  5 
 arch/s390/include/asm/pgalloc.h |  6 ++---
 arch/s390/include/asm/pgtable.h | 18 ++
 arch/s390/include/asm/tlb.h |  6 ++---
 include/linux/mm.h  | 48 +
 5 files changed, 72 insertions(+), 11 deletions(-)

-- 
2.16.4



[RFC][PATCH 0/3] pgtable bytes mis-accounting v2

2018-10-15 Thread Martin Schwidefsky
Greetings,

the first test patch to fix the pgtable_bytes mis-accounting on s390
still had a few problems. For one it didn't work for x86 ..

Changes v1 -> v2:

 - Split the patch into three parts, one patch to add the mm_pxd_folded
   helpers, one patch to use to the helpers in mm_[dec|inc]_nr_[pmds|puds]
   and finally the fix for s390.

 - Drop the use of __is_defined, it does not work with the
   __PAGETABLE_PxD_FOLDED defines

 - Do not change the basic #ifdef'ery in mm.h, just add the calls
   to mm_pxd_folded to the pgtable_bytes accounting functions. This
   fixes the compile error on alpha (and potentially on other archs).

Martin Schwidefsky (3):
  mm: introduce mm_[p4d|pud|pmd]_folded
  mm: add mm_pxd_folded checks to pgtable_bytes accounting functions
  s390/mm: fix mis-accounting of pgtable_bytes

 arch/s390/include/asm/mmu_context.h |  5 
 arch/s390/include/asm/pgalloc.h |  6 ++---
 arch/s390/include/asm/pgtable.h | 18 ++
 arch/s390/include/asm/tlb.h |  6 ++---
 include/linux/mm.h  | 48 +
 5 files changed, 72 insertions(+), 11 deletions(-)

-- 
2.16.4



Re: s390: runtime warning about pgtables_bytes

2018-10-12 Thread Martin Schwidefsky
On Thu, 11 Oct 2018 15:02:11 +0200
Martin Schwidefsky  wrote:

> On Thu, 11 Oct 2018 18:04:12 +0800
> Li Wang  wrote:
> 
> > When running s390 system with LTP/cve-2017-17052.c[1], the following BUG is
> > came out repeatedly.
> > I remember this warning start from kernel-4.16.0 and now it still exist in
> > kernel-4.19-rc7.
> > Can anyone take a look?
> > 
> > [ 2678.991496] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.001543] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.002453] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.003256] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.013689] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.024647] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.064408] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.133963] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > 
> > [1]:
> > https://github.com/linux-test-project/ltp/blob/master/testcases/cve/cve-2017-17052.c
> >   
>  
> Confirmed, I see this bug with cvs-2017-17052 on my LPAR as well.
> I'll look into it.
 
Ok, I think I understand the problem now. This is the patch I am testing
right now. It seems to fix the issue, but I had to change common mm
code for it.
--
>From 9e3bc2e96930206ef1ece377e45224c51aca1799 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky 
Date: Fri, 12 Oct 2018 16:32:29 +0200
Subject: [RFC][PATCH] s390/mm: fix mis-accounting of pgtable_bytes

In case a fork or a clone system fails in copy_process and the error
handling does the mmput() at the bad_fork_cleanup_mm label, the following
warning messages will appear on the console:

BUG: non-zero pgtables_bytes on freeing mm: 16384

The reason for that is the tricks we play with mm_inc_nr_puds() and
mm_inc_nr_pmds() in init_new_context().

A normal 64-bit process has 3 levels of page table, the p4d level and
the pud level are folded. On process termination the free_pud_range()
function in mm/memory.c will subtract 16KB from pgtable_bytes with a
mm_dec_nr_puds() call, but there actually is not really a pud table.
The s390 version of pud_free_tlb() recognized this an does nothing,
the region-3 table will be freed with the pgd_free() call later on.
But the mm_dec_nr_puds() is done unconditionally, to counter act this
the init_new_context() function has an extra mm_inc_nr_puds() call.

Now with a failed fork or clone the free_pgtables() function is not
called, there is no mm_dec_nr_puds() but the mm_inc_nr_puds() has
been done which leads to the incorrect pgtable_bytes of 16384.
Nothing is broken by this, but the warning is annoying.

To get rid of the warning drop the mm_inc_nr_pmds() & mm_inc_nr_puds()
calls from init_new_context(), introduce the mm_pmd_folded(),
pmd_pud_folded() and pmd_p4d_folded() helper, and add if-statements
to the functions mm_[inc|dec]_nr_[pmds|puds].

Signed-off-by: Martin Schwidefsky 
---
 arch/s390/include/asm/mmu_context.h |  5 -
 arch/s390/include/asm/pgalloc.h |  6 ++---
 arch/s390/include/asm/pgtable.h | 18 +++
 arch/s390/include/asm/tlb.h |  6 ++---
 include/linux/mm.h  | 44 -
 5 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/arch/s390/include/asm/mmu_context.h 
b/arch/s390/include/asm/mmu_context.h
index dbd689d556ce..ccbb53e22024 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -46,8 +46,6 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.asce_limit = STACK_TOP_MAX;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
-   /* pgd_alloc() did not account this pud */
-   mm_inc_nr_puds(mm);
break;
case -PAGE_SIZE:
/* forked 5-level task, set new asce with new_mm->pgd */
@@ -63,9 +61,6 @@ static inline int init_new_context(struct task_struct *tsk,
/* forked 2-level compat task, set new asce with new mm->pgd */
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
-   /* pgd_alloc() did not account this pmd */
-   mm_inc_nr_pmds(mm);
-   mm_inc_nr_puds(mm);
}
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..5ee733720a57 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -36,11 +36,11 @@ static inline void crst_table_init(unsigned long *crst, 
unsigned long entry)
 
 static inline unsigned long pgd_entry_

Re: s390: runtime warning about pgtables_bytes

2018-10-12 Thread Martin Schwidefsky
On Thu, 11 Oct 2018 15:02:11 +0200
Martin Schwidefsky  wrote:

> On Thu, 11 Oct 2018 18:04:12 +0800
> Li Wang  wrote:
> 
> > When running s390 system with LTP/cve-2017-17052.c[1], the following BUG is
> > came out repeatedly.
> > I remember this warning start from kernel-4.16.0 and now it still exist in
> > kernel-4.19-rc7.
> > Can anyone take a look?
> > 
> > [ 2678.991496] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.001543] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.002453] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.003256] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.013689] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.024647] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.064408] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > [ 2679.133963] BUG: non-zero pgtables_bytes on freeing mm: 16384
> > 
> > [1]:
> > https://github.com/linux-test-project/ltp/blob/master/testcases/cve/cve-2017-17052.c
> >   
>  
> Confirmed, I see this bug with cvs-2017-17052 on my LPAR as well.
> I'll look into it.
 
Ok, I think I understand the problem now. This is the patch I am testing
right now. It seems to fix the issue, but I had to change common mm
code for it.
--
>From 9e3bc2e96930206ef1ece377e45224c51aca1799 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky 
Date: Fri, 12 Oct 2018 16:32:29 +0200
Subject: [RFC][PATCH] s390/mm: fix mis-accounting of pgtable_bytes

In case a fork or a clone system fails in copy_process and the error
handling does the mmput() at the bad_fork_cleanup_mm label, the following
warning messages will appear on the console:

BUG: non-zero pgtables_bytes on freeing mm: 16384

The reason for that is the tricks we play with mm_inc_nr_puds() and
mm_inc_nr_pmds() in init_new_context().

A normal 64-bit process has 3 levels of page table, the p4d level and
the pud level are folded. On process termination the free_pud_range()
function in mm/memory.c will subtract 16KB from pgtable_bytes with a
mm_dec_nr_puds() call, but there actually is not really a pud table.
The s390 version of pud_free_tlb() recognized this an does nothing,
the region-3 table will be freed with the pgd_free() call later on.
But the mm_dec_nr_puds() is done unconditionally, to counter act this
the init_new_context() function has an extra mm_inc_nr_puds() call.

Now with a failed fork or clone the free_pgtables() function is not
called, there is no mm_dec_nr_puds() but the mm_inc_nr_puds() has
been done which leads to the incorrect pgtable_bytes of 16384.
Nothing is broken by this, but the warning is annoying.

To get rid of the warning drop the mm_inc_nr_pmds() & mm_inc_nr_puds()
calls from init_new_context(), introduce the mm_pmd_folded(),
pmd_pud_folded() and pmd_p4d_folded() helper, and add if-statements
to the functions mm_[inc|dec]_nr_[pmds|puds].

Signed-off-by: Martin Schwidefsky 
---
 arch/s390/include/asm/mmu_context.h |  5 -
 arch/s390/include/asm/pgalloc.h |  6 ++---
 arch/s390/include/asm/pgtable.h | 18 +++
 arch/s390/include/asm/tlb.h |  6 ++---
 include/linux/mm.h  | 44 -
 5 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/arch/s390/include/asm/mmu_context.h 
b/arch/s390/include/asm/mmu_context.h
index dbd689d556ce..ccbb53e22024 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -46,8 +46,6 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.asce_limit = STACK_TOP_MAX;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
-   /* pgd_alloc() did not account this pud */
-   mm_inc_nr_puds(mm);
break;
case -PAGE_SIZE:
/* forked 5-level task, set new asce with new_mm->pgd */
@@ -63,9 +61,6 @@ static inline int init_new_context(struct task_struct *tsk,
/* forked 2-level compat task, set new asce with new mm->pgd */
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
-   /* pgd_alloc() did not account this pmd */
-   mm_inc_nr_pmds(mm);
-   mm_inc_nr_puds(mm);
}
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c03..5ee733720a57 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -36,11 +36,11 @@ static inline void crst_table_init(unsigned long *crst, 
unsigned long entry)
 
 static inline unsigned long pgd_entry_

  1   2   3   4   5   6   7   8   9   10   >