Re: [PATCH 2/2] powerpc: Use ARRAY_SIZE to replace its implementation

2018-08-13 Thread Joe Perches
On Tue, 2018-08-14 at 10:46 +0800, zhong jiang wrote:
> Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element.
> So just replace it.

Better to remove the extern and the const altogether here as well.

$ git grep -w powerpc_num_opcodes
arch/powerpc/xmon/ppc-dis.c:  opcode_end = powerpc_opcodes + 
powerpc_num_opcodes;
arch/powerpc/xmon/ppc-opc.c:const int powerpc_num_opcodes =
arch/powerpc/xmon/ppc.h:extern const int powerpc_num_opcodes;

And this one could be removed instead:

$ git grep -w vle_num_opcodes
arch/powerpc/xmon/ppc-opc.c:const int vle_num_opcodes =
arch/powerpc/xmon/ppc.h:extern const int vle_num_opcodes;

> Signed-off-by: zhong jiang 
> ---
>  arch/powerpc/xmon/ppc-opc.c | 12 
>  1 file changed, 4 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/xmon/ppc-opc.c b/arch/powerpc/xmon/ppc-opc.c
> index ac2b55b..f3f57a1 100644
> --- a/arch/powerpc/xmon/ppc-opc.c
> +++ b/arch/powerpc/xmon/ppc-opc.c
> @@ -966,8 +966,7 @@
>{ 0xff, 11, NULL, NULL, PPC_OPERAND_SIGNOPT },
>  };
>  
> -const unsigned int num_powerpc_operands = (sizeof (powerpc_operands)
> -/ sizeof (powerpc_operands[0]));
> +const unsigned int num_powerpc_operands = ARRAY_SIZE(powerpc_operands);
>  
>  /* The functions used to insert and extract complicated operands.  */
>  
> @@ -6980,8 +6979,7 @@
>  {"fcfidu.",  XRC(63,974,1),  XRA_MASK, POWER7|PPCA2, PPCVLE, {FRT, 
> FRB}},
>  };
>  
> -const int powerpc_num_opcodes =
> -  sizeof (powerpc_opcodes) / sizeof (powerpc_opcodes[0]);
> +const int powerpc_num_opcodes = ARRAY_SIZE(powerpc_opcodes);
>  
>  /* The VLE opcode table.
>  
> @@ -7219,8 +7217,7 @@
>  {"se_bl",BD8(58,0,1),BD8_MASK,   PPCVLE, 0,  {B8}},
>  };
>  
> -const int vle_num_opcodes =
> -  sizeof (vle_opcodes) / sizeof (vle_opcodes[0]);
> +const int vle_num_opcodes = ARRAY_SIZE(vle_opcodes);
>  
>  /* The macro table.  This is only used by the assembler.  */
>  
> @@ -7288,5 +7285,4 @@
>  {"e_clrlslwi",4, PPCVLE, "e_rlwinm %0,%1,%3,(%2)-(%3),31-(%3)"},
>  };
>  
> -const int powerpc_num_macros =
> -  sizeof (powerpc_macros) / sizeof (powerpc_macros[0]);
> +const int powerpc_num_macros = ARRAY_SIZE(powerpc_macros);


Re: [PATCH 1/2] ia64: Use ARRAY_SIZE to replace its implementation

2018-08-13 Thread Joe Perches
On Tue, 2018-08-14 at 10:46 +0800, zhong jiang wrote:
> We prefer to ARRAY_SIZE rather than duplicating its implementation.
> So just replace it.
[]
> diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
[]
> @@ -4645,7 +4645,7 @@ static char *pfmfs_dname(struct dentry *dentry, char 
> *buffer, int buflen)
>  /* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, 
> pfarg_dbreg_t, NULL),
>  /* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, 
> pfarg_dbreg_t, NULL)
>  };
> -#define PFM_CMD_COUNT(sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
> +#define PFM_CMD_COUNTARRAY_SIZE(pfm_cmd_tab)

Better would be to remove the #define altogether and change
the one place where it's used to ARRAY_SIZE(...)
---
 arch/ia64/kernel/perfmon.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index a9d4dc6c0427..08ece2c7b6e1 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -4645,7 +4645,6 @@ static pfm_cmd_desc_t pfm_cmd_tab[]={
 /* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, 
pfarg_dbreg_t, NULL),
 /* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, 
pfarg_dbreg_t, NULL)
 };
-#define PFM_CMD_COUNT  (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
 
 static int
 pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
@@ -4770,7 +4769,7 @@ sys_perfmonctl (int fd, int cmd, void __user *arg, int 
count)
 */
if (unlikely(pmu_conf == NULL)) return -ENOSYS;
 
-   if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) {
+   if (unlikely(cmd < 0 || cmd >= ARRAY_SIZE(pfm_cmd_tab)) {
DPRINT(("invalid cmd=%d\n", cmd));
return -EINVAL;
}



Re: [PATCH 1/2] ia64: Use ARRAY_SIZE to replace its implementation

2018-08-13 Thread zhong jiang
On 2018/8/14 12:45, Joe Perches wrote:
> On Tue, 2018-08-14 at 10:46 +0800, zhong jiang wrote:
>> We prefer to ARRAY_SIZE rather than duplicating its implementation.
>> So just replace it.
> []
>> diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
> []
>> @@ -4645,7 +4645,7 @@ static char *pfmfs_dname(struct dentry *dentry, char 
>> *buffer, int buflen)
>>  /* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, 
>> pfarg_dbreg_t, NULL),
>>  /* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, 
>> pfarg_dbreg_t, NULL)
>>  };
>> -#define PFM_CMD_COUNT   (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
>> +#define PFM_CMD_COUNT   ARRAY_SIZE(pfm_cmd_tab)
> Better would be to remove the #define altogether and change
> the one place where it's used to ARRAY_SIZE(...)
> ---
>  arch/ia64/kernel/perfmon.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
> index a9d4dc6c0427..08ece2c7b6e1 100644
> --- a/arch/ia64/kernel/perfmon.c
> +++ b/arch/ia64/kernel/perfmon.c
> @@ -4645,7 +4645,6 @@ static pfm_cmd_desc_t pfm_cmd_tab[]={
>  /* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, 
> pfarg_dbreg_t, NULL),
>  /* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, 
> pfarg_dbreg_t, NULL)
>  };
> -#define PFM_CMD_COUNT(sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
>  
>  static int
>  pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
> @@ -4770,7 +4769,7 @@ sys_perfmonctl (int fd, int cmd, void __user *arg, int 
> count)
>*/
>   if (unlikely(pmu_conf == NULL)) return -ENOSYS;
>  
> - if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) {
> + if (unlikely(cmd < 0 || cmd >= ARRAY_SIZE(pfm_cmd_tab)) {
>   DPRINT(("invalid cmd=%d\n", cmd));
>   return -EINVAL;
>   }
>
>
> .
>
 Thank you for suggestion.  That's indeed better if just one palce use it.  I 
will repost in v2.

 Sincerely,
zhong jiang



Re: [PATCH v2] selftests/powerpc: Avoid remaining process/threads

2018-08-13 Thread Michael Ellerman
Breno Leitao  writes:

> Hello Michael,
>
> On 08/06/2018 08:06 AM, Michael Ellerman wrote:
>> Breno Leitao  writes:
>> 
>>> diff --git a/tools/testing/selftests/powerpc/harness.c 
>>> b/tools/testing/selftests/powerpc/harness.c
>>> index 66d31de60b9a..06c51e8d8ccb 100644
>>> --- a/tools/testing/selftests/powerpc/harness.c
>>> +++ b/tools/testing/selftests/powerpc/harness.c
>>> @@ -85,13 +85,16 @@ int run_test(int (test_function)(void), char *name)
>>> return status;
>>>  }
>>>  
>>> -static void alarm_handler(int signum)
>>> +static void sig_handler(int signum)
>>>  {
>>> -   /* Jut wake us up from waitpid */
>>> +   if (signum == SIGINT)
>>> +   kill(-pid, SIGTERM);
>> 
>> I don't think we need to do that here, if we just return then we'll pop
>> out of the waitpid() and go via the normal path.
>
> Correct, if we press ^C while the parent process is waiting at waitpid(),
> then waitpid() syscall will be interrupted (EINTR) and never restarted again
> (unless we set sa_flags = SA_RESTART), thus, the code will restart to execute
> the next instruction when the signal handler is done, as we had skipped
> waitpid().
>
> From a theoretical point of view, the user can press ^C before the process
> executes waitpid() syscall. In this case and the process will not 'skip' the
> waitpid(), which will continue to wait. We can clearly force this behavior
> putting a sleep(1) before waitpid() and pressing  ^C in the very first
> second, it will 'skip' the nanosleep() syscall instead of waitpid() which
> will be there, and the ^C will be ignored (thus not calling kill(-pid, 
> SIGTERM)).

True.

Though that race also exists vs us registering the SIGINT handler, so
it's basically not solvable, the user can always press ^C before we're
ready.

cheers


Re: cxl: remove a dead branch

2018-08-13 Thread Michael Ellerman
Mathieu Malaterre  writes:
> Frederic,
>
> Could you double check with Michael what is now best to do.

I decided it had been long enough (since March), so I just merged it.

If Fred et. al. want to do something better they can send me another
patch on top of it.

cheers

> On Mon, Aug 13, 2018 at 1:23 PM Michael Ellerman
>  wrote:
>>
>> On Thu, 2018-03-22 at 21:05:28 UTC, Mathieu Malaterre wrote:
>> > In commit 14baf4d9c739 ("cxl: Add guest-specific code") the following code
>> > was added:
>> >
>> >   if (afu->crs_len < 0) {
>> >   dev_err(>dev, "Unexpected configuration record size 
>> > value\n");
>> >   return -EINVAL;
>> >   }
>> >
>> > However the variable `crs_len` is of type u64 and cannot be compared < 0.
>> > Remove the dead code section. Fix the following warning treated as error
>> > with W=1:
>> >
>> > ../drivers/misc/cxl/guest.c:919:19: error: comparison of unsigned 
>> > expression < 0 is always false [-Werror=type-limits]
>> >
>> > Signed-off-by: Mathieu Malaterre 
>>
>> Applied to powerpc next, thanks.
>>
>> https://git.kernel.org/powerpc/c/e4ecafb14fd9cd77d8f4320af1922e
>>
>> cheers


Re: [PATCH] powerpc/powernv/idle: Fix build error

2018-08-13 Thread Michael Ellerman
Alexey Kardashevskiy  writes:
> On 10/08/2018 17:10, Michael Ellerman wrote:
>> "Aneesh Kumar K.V"  writes:
>> 
>>> Fix the below build error using strlcpy instead of strncpy
>>>
>>> In function 'pnv_parse_cpuidle_dt',
>>> inlined from 'pnv_init_idle_states' at 
>>> arch/powerpc/platforms/powernv/idle.c:840:7,
>>> inlined from '__machine_initcall_powernv_pnv_init_idle_states' at 
>>> arch/powerpc/platforms/powernv/idle.c:870:1:
>>> arch/powerpc/platforms/powernv/idle.c:820:3: error: 'strncpy' specified 
>>> bound 16 equals destination size [-Werror=stringop-truncation]
>>>strncpy(pnv_idle_states[i].name, temp_string[i],
>>>^~~~
>>> PNV_IDLE_NAME_LEN);
>> 
>> I'm curious why I haven't seen this? What compiler are you using?
>
> gcc 8 does this.

Yeah you're right. It was hidden by another build failure in my build
scripts :/

cheers


[PATCH] powerpc/mpc85xx: fix issues in clock node

2018-08-13 Thread Yuantian Tang
The compatible string is not correct in the clock node.
The clocks property refers to the wrong node too.
This patch is to fix them.

Signed-off-by: Tang Yuantian 
---
 arch/powerpc/boot/dts/fsl/t1023si-post.dtsi |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi 
b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
index 4908af5..763caf4 100644
--- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
@@ -348,7 +348,7 @@
mux0: mux0@0 {
#clock-cells = <0>;
reg = <0x0 4>;
-   compatible = "fsl,core-mux-clock";
+   compatible = "fsl,qoriq-core-mux-2.0";
clocks = < 0>, < 1>;
clock-names = "pll0_0", "pll0_1";
clock-output-names = "cmux0";
@@ -356,9 +356,9 @@
mux1: mux1@20 {
#clock-cells = <0>;
reg = <0x20 4>;
-   compatible = "fsl,core-mux-clock";
-   clocks = < 0>, < 1>;
-   clock-names = "pll0_0", "pll0_1";
+   compatible = "fsl,qoriq-core-mux-2.0";
+   clocks = < 0>, < 1>;
+   clock-names = "pll1_0", "pll1_1";
clock-output-names = "cmux1";
};
};
-- 
1.7.1



Re: [PATCH V2] ocxl: Fix access to the AFU Descriptor Data

2018-08-13 Thread Michael Ellerman
Hi Christophe,

The patch looks fine, just a nit about the change log:

Christophe Lombard  writes:
> The AFU Information DVSEC capability is a means to extract common,
> general information about all of the AFUs associated with a Function
> independent of the specific functionality that each AFU provides.
>
> This patch fixes the access to the AFU Descriptor Data indexed by the
> AFU Info Index field.

> Fixes: 5ef3166e8a32 ("ocxl: Driver code for 'generic' opencapi devices")
> Cc: stable  # 4.16
> Signed-off-by: Christophe Lombard 

When fixing a bug it's always good to describe how the bug manifests.
ie. in this case we are clearly writing to the wrong location in config
space, but what is the consequence of that? Does it kill the device, or
just fails to initialise something correctly? How could I tell if I'm
hitting this bug currently? How would I tell if the fix is applied
correctly?

cheers

> ---
> Changelog[v2]
>  - Rebase to latest upstream.
>  - Use pci_write_config_byte instead of pci_write_config_word
> ---
>  drivers/misc/ocxl/config.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
> index 2e30de9..57a6bb1 100644
> --- a/drivers/misc/ocxl/config.c
> +++ b/drivers/misc/ocxl/config.c
> @@ -280,7 +280,9 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
>   u32 val;
>   int rc, templ_major, templ_minor, len;
>  
> - pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
> + pci_write_config_byte(dev,
> + fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
> + afu_idx);
>   rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, );
>   if (rc)
>   return rc;
> -- 
> 2.7.4


[PATCH 2/2] powerpc: Use ARRAY_SIZE to replace its implementation

2018-08-13 Thread zhong jiang
Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element.
So just replace it.

Signed-off-by: zhong jiang 
---
 arch/powerpc/xmon/ppc-opc.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/xmon/ppc-opc.c b/arch/powerpc/xmon/ppc-opc.c
index ac2b55b..f3f57a1 100644
--- a/arch/powerpc/xmon/ppc-opc.c
+++ b/arch/powerpc/xmon/ppc-opc.c
@@ -966,8 +966,7 @@
   { 0xff, 11, NULL, NULL, PPC_OPERAND_SIGNOPT },
 };
 
-const unsigned int num_powerpc_operands = (sizeof (powerpc_operands)
-  / sizeof (powerpc_operands[0]));
+const unsigned int num_powerpc_operands = ARRAY_SIZE(powerpc_operands);
 
 /* The functions used to insert and extract complicated operands.  */
 
@@ -6980,8 +6979,7 @@
 {"fcfidu.",XRC(63,974,1),  XRA_MASK, POWER7|PPCA2, PPCVLE, {FRT, 
FRB}},
 };
 
-const int powerpc_num_opcodes =
-  sizeof (powerpc_opcodes) / sizeof (powerpc_opcodes[0]);
+const int powerpc_num_opcodes = ARRAY_SIZE(powerpc_opcodes);
 
 /* The VLE opcode table.
 
@@ -7219,8 +7217,7 @@
 {"se_bl",  BD8(58,0,1),BD8_MASK,   PPCVLE, 0,  {B8}},
 };
 
-const int vle_num_opcodes =
-  sizeof (vle_opcodes) / sizeof (vle_opcodes[0]);
+const int vle_num_opcodes = ARRAY_SIZE(vle_opcodes);
 
 /* The macro table.  This is only used by the assembler.  */
 
@@ -7288,5 +7285,4 @@
 {"e_clrlslwi",4, PPCVLE, "e_rlwinm %0,%1,%3,(%2)-(%3),31-(%3)"},
 };
 
-const int powerpc_num_macros =
-  sizeof (powerpc_macros) / sizeof (powerpc_macros[0]);
+const int powerpc_num_macros = ARRAY_SIZE(powerpc_macros);
-- 
1.7.12.4



[PATCH 1/2] ia64: Use ARRAY_SIZE to replace its implementation

2018-08-13 Thread zhong jiang
We prefer to ARRAY_SIZE rather than duplicating its implementation.
So just replace it.

Signed-off-by: zhong jiang 
---
 arch/ia64/kernel/perfmon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index a9d4dc6..6cbe6e0 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -4645,7 +4645,7 @@ static char *pfmfs_dname(struct dentry *dentry, char 
*buffer, int buflen)
 /* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, 
pfarg_dbreg_t, NULL),
 /* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, 
pfarg_dbreg_t, NULL)
 };
-#define PFM_CMD_COUNT  (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
+#define PFM_CMD_COUNT  ARRAY_SIZE(pfm_cmd_tab)
 
 static int
 pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
-- 
1.7.12.4



[PATCH 0/2] Use ARRAY_SIZE to replace its implementation

2018-08-13 Thread zhong jiang
The issue is detected with the help of Coccinelle.

zhong jiang (2):
  ia64: Use ARRAY_SIZE to replace its implementation
  powerpc: Use ARRAY_SIZE to replace its implementation

 arch/ia64/kernel/perfmon.c  |  2 +-
 arch/powerpc/xmon/ppc-opc.c | 12 
 2 files changed, 5 insertions(+), 9 deletions(-)

-- 
1.7.12.4



Re: [PATCH V2] ocxl: Fix access to the AFU Descriptor Data

2018-08-13 Thread Andrew Donnellan

On 14/08/18 00:09, Christophe Lombard wrote:

The AFU Information DVSEC capability is a means to extract common,
general information about all of the AFUs associated with a Function
independent of the specific functionality that each AFU provides.

This patch fixes the access to the AFU Descriptor Data indexed by the
AFU Info Index field.

Fixes: 5ef3166e8a32 ("ocxl: Driver code for 'generic' opencapi devices")
Cc: stable  # 4.16
Signed-off-by: Christophe Lombard 


Thanks

Acked-by: Andrew Donnellan 


---
Changelog[v2]
  - Rebase to latest upstream.
  - Use pci_write_config_byte instead of pci_write_config_word
---
  drivers/misc/ocxl/config.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index 2e30de9..57a6bb1 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -280,7 +280,9 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
u32 val;
int rc, templ_major, templ_minor, len;
  
-	pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);

+   pci_write_config_byte(dev,
+   fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+   afu_idx);
rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, );
if (rc)
return rc;



--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited



Re: [PATCH v6 00/11] hugetlb: Factorize hugetlb architecture primitives

2018-08-13 Thread Alex Ghiti

Hi everyone,

Does someone need anything more to be done regarding this series ?

Thanks,

Alex


On 08/06/2018 05:57 PM, Alexandre Ghiti wrote:

[CC linux-mm for inclusion in -mm tree]
  
In order to reduce copy/paste of functions across architectures and then

make riscv hugetlb port (and future ports) simpler and smaller, this
patchset intends to factorize the numerous hugetlb primitives that are
defined across all the architectures.
  
Except for prepare_hugepage_range, this patchset moves the versions that

are just pass-through to standard pte primitives into
asm-generic/hugetlb.h by using the same #ifdef semantic that can be
found in asm-generic/pgtable.h, i.e. __HAVE_ARCH_***.
  
s390 architecture has not been tackled in this serie since it does not

use asm-generic/hugetlb.h at all.
  
This patchset has been compiled on all addressed architectures with

success (except for parisc, but the problem does not come from this
series).
  
v6:

   - Remove nohash/32 and book3s/32 powerpc specific implementations in
 order to use the generic ones.
   - Add all the Reviewed-by, Acked-by and Tested-by in the commits,
 thanks to everyone.
  
v5:

   As suggested by Mike Kravetz, no need to move the #include
for arm and x86 architectures, let it live at
   the top of the file.
  
v4:

   Fix powerpc build error due to misplacing of #include
outside of #ifdef CONFIG_HUGETLB_PAGE, as
   pointed by Christophe Leroy.
  
v1, v2, v3:

   Same version, just problems with email provider and misuse of
   --batch-size option of git send-email

Alexandre Ghiti (11):
   hugetlb: Harmonize hugetlb.h arch specific defines with pgtable.h
   hugetlb: Introduce generic version of hugetlb_free_pgd_range
   hugetlb: Introduce generic version of set_huge_pte_at
   hugetlb: Introduce generic version of huge_ptep_get_and_clear
   hugetlb: Introduce generic version of huge_ptep_clear_flush
   hugetlb: Introduce generic version of huge_pte_none
   hugetlb: Introduce generic version of huge_pte_wrprotect
   hugetlb: Introduce generic version of prepare_hugepage_range
   hugetlb: Introduce generic version of huge_ptep_set_wrprotect
   hugetlb: Introduce generic version of huge_ptep_set_access_flags
   hugetlb: Introduce generic version of huge_ptep_get

  arch/arm/include/asm/hugetlb-3level.h| 32 +-
  arch/arm/include/asm/hugetlb.h   | 30 --
  arch/arm64/include/asm/hugetlb.h | 39 +++-
  arch/ia64/include/asm/hugetlb.h  | 47 ++-
  arch/mips/include/asm/hugetlb.h  | 40 +++--
  arch/parisc/include/asm/hugetlb.h| 33 +++
  arch/powerpc/include/asm/book3s/32/pgtable.h |  6 --
  arch/powerpc/include/asm/book3s/64/pgtable.h |  1 +
  arch/powerpc/include/asm/hugetlb.h   | 43 ++
  arch/powerpc/include/asm/nohash/32/pgtable.h |  6 --
  arch/powerpc/include/asm/nohash/64/pgtable.h |  1 +
  arch/sh/include/asm/hugetlb.h| 54 ++---
  arch/sparc/include/asm/hugetlb.h | 40 +++--
  arch/x86/include/asm/hugetlb.h   | 69 --
  include/asm-generic/hugetlb.h| 88 +++-
  15 files changed, 135 insertions(+), 394 deletions(-)





Re: [PATCH v2 3/3] powerpc/mce: Handle memcpy_mcsafe

2018-08-13 Thread Reza Arbab

On Thu, Apr 05, 2018 at 05:15:00PM +1000, Balbir Singh wrote:

Add a blocking notifier callback to be called in real-mode
on machine check exceptions for UE (ld/st) errors only.


It's been a while, but is this patchset still being pursued?

This patch in particular (callbacks for MCE handling) has other device 
memory use cases and I'd like to move it along.



The patch registers a callback on boot to be notified
of machine check exceptions and returns a NOTIFY_STOP when
a page of interest is seen as the source of the machine
check exception. This page of interest is a ZONE_DEVICE
page and hence for now, for memcpy_mcsafe to work, the page
needs to belong to ZONE_DEVICE and memcpy_mcsafe should be
used to access the memory.

The patch also modifies the NIP of the exception context
to go back to the fixup handler (in memcpy_mcsafe) and does
not print any error message as the error is treated as
returned via a return value and handled.

Signed-off-by: Balbir Singh 
---
arch/powerpc/include/asm/mce.h |  3 +-
arch/powerpc/kernel/mce.c  | 77 --
2 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 3a1226e9b465..a76638e3e47e 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -125,7 +125,8 @@ struct machine_check_event {
enum MCE_UeErrorType ue_error_type:8;
uint8_t effective_address_provided;
uint8_t physical_address_provided;
-   uint8_t reserved_1[5];
+   uint8_t error_return;
+   uint8_t reserved_1[4];
uint64_teffective_address;
uint64_tphysical_address;
uint8_t reserved_2[8];
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index efdd16a79075..b9e4881fa8c5 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -28,7 +28,9 @@
#include 
#include 
#include 
+#include 

+#include 
#include 
#include 

@@ -54,6 +56,52 @@ static struct irq_work mce_event_process_work = {

DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);

+static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
+
+int register_mce_notifier(struct notifier_block *nb)
+{
+   return blocking_notifier_chain_register(_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_mce_notifier);
+
+int unregister_mce_notifier(struct notifier_block *nb)
+{
+   return blocking_notifier_chain_unregister(_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_mce_notifier);
+
+
+static int check_memcpy_mcsafe(struct notifier_block *nb,
+   unsigned long val, void *data)
+{
+   /*
+* val contains the physical_address of the bad address
+*/
+   unsigned long pfn = val >> PAGE_SHIFT;
+   struct page *page = realmode_pfn_to_page(pfn);
+   int rc = NOTIFY_DONE;
+
+   if (!page)
+   goto out;
+
+   if (is_zone_device_page(page))  /* for HMM and PMEM */
+   rc = NOTIFY_STOP;
+out:
+   return rc;
+}
+
+struct notifier_block memcpy_mcsafe_nb = {
+   .priority = 0,
+   .notifier_call = check_memcpy_mcsafe,
+};
+
+int  mce_mcsafe_register(void)
+{
+   register_mce_notifier(_mcsafe_nb);
+   return 0;
+}
+arch_initcall(mce_mcsafe_register);
+
static void mce_set_error_info(struct machine_check_event *mce,
   struct mce_error_info *mce_err)
{
@@ -151,9 +199,31 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->u.ue_error.effective_address_provided = true;
mce->u.ue_error.effective_address = addr;
if (phys_addr != ULONG_MAX) {
+   int rc;
+   const struct exception_table_entry *entry;
+
+   /*
+* Once we have the physical address, we check to
+* see if the current nip has a fixup entry.
+* Having a fixup entry plus the notifier stating
+* that it can handle the exception is an indication
+* that we should return to the fixup entry and
+* return an error from there
+*/
mce->u.ue_error.physical_address_provided = true;
mce->u.ue_error.physical_address = phys_addr;
-   machine_check_ue_event(mce);
+
+   rc = blocking_notifier_call_chain(_notifier_list,
+   phys_addr, NULL);


Could we pass mce entirely here instead of just phys_addr? It would 
allow the callback itself to set error_return if needed.



+   if (rc & NOTIFY_STOP_MASK) {
+

Re: [PATCH V2] ocxl: Fix access to the AFU Descriptor Data

2018-08-13 Thread Frederic Barrat




Le 13/08/2018 à 16:09, Christophe Lombard a écrit :

The AFU Information DVSEC capability is a means to extract common,
general information about all of the AFUs associated with a Function
independent of the specific functionality that each AFU provides.

This patch fixes the access to the AFU Descriptor Data indexed by the
AFU Info Index field.

Fixes: 5ef3166e8a32 ("ocxl: Driver code for 'generic' opencapi devices")
Cc: stable  # 4.16
Signed-off-by: Christophe Lombard 
---


Thanks!
Acked-by: Frederic Barrat 



Changelog[v2]
  - Rebase to latest upstream.
  - Use pci_write_config_byte instead of pci_write_config_word
---
  drivers/misc/ocxl/config.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index 2e30de9..57a6bb1 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -280,7 +280,9 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
u32 val;
int rc, templ_major, templ_minor, len;

-   pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
+   pci_write_config_byte(dev,
+   fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+   afu_idx);
rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, );
if (rc)
return rc;





Re: [PATCH v2 1/2] powerpc/64s: move machine check SLB flushing to mm/slb.c

2018-08-13 Thread Nicholas Piggin
On Mon, 13 Aug 2018 09:57:33 +0530
Mahesh Jagannath Salgaonkar  wrote:

> On 08/10/2018 12:12 PM, Nicholas Piggin wrote:
> > The machine check code that flushes and restores bolted segments in
> > real mode belongs in mm/slb.c. This will also be used by pseries
> > machine check and idle code in future changes.
> > 
> > Signed-off-by: Nicholas Piggin 
> > 
> > Since v1:
> > - Restore the test for slb_shadow (mpe)
> > ---
> >  arch/powerpc/include/asm/book3s/64/mmu-hash.h |  3 ++
> >  arch/powerpc/kernel/mce_power.c   | 26 +
> >  arch/powerpc/mm/slb.c | 39 +++
> >  3 files changed, 51 insertions(+), 17 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
> > b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> > index 2f74bdc805e0..d4e398185b3a 100644
> > --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> > +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> > @@ -497,6 +497,9 @@ extern void hpte_init_native(void);
> > 
> >  extern void slb_initialize(void);
> >  extern void slb_flush_and_rebolt(void);
> > +extern void slb_flush_all_realmode(void);
> > +extern void __slb_restore_bolted_realmode(void);
> > +extern void slb_restore_bolted_realmode(void);
> > 
> >  extern void slb_vmalloc_update(void);
> >  extern void slb_set_size(u16 size);
> > diff --git a/arch/powerpc/kernel/mce_power.c 
> > b/arch/powerpc/kernel/mce_power.c
> > index d6756af6ec78..3497c8329c1d 100644
> > --- a/arch/powerpc/kernel/mce_power.c
> > +++ b/arch/powerpc/kernel/mce_power.c
> > @@ -62,11 +62,8 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, 
> > unsigned long addr)
> >  #ifdef CONFIG_PPC_BOOK3S_64
> >  static void flush_and_reload_slb(void)
> >  {
> > -   struct slb_shadow *slb;
> > -   unsigned long i, n;
> > -
> > /* Invalidate all SLBs */
> > -   asm volatile("slbmte %0,%0; slbia" : : "r" (0));
> > +   slb_flush_all_realmode();
> > 
> >  #ifdef CONFIG_KVM_BOOK3S_HANDLER
> > /*
> > @@ -76,22 +73,17 @@ static void flush_and_reload_slb(void)
> > if (get_paca()->kvm_hstate.in_guest)
> > return;
> >  #endif
> > -
> > -   /* For host kernel, reload the SLBs from shadow SLB buffer. */
> > -   slb = get_slb_shadow();
> > -   if (!slb)
> > +   if (early_radix_enabled())
> > return;  
> 
> Would we ever get MCE for SLB errors when radix is enabled ?

Well I'm not 100% sure. I don't think the MMU should in radix mode,
but KVM will put guests into HPT mode and put entries into the SLB.
I'm not completely sure we would never get a MCE come through here.

> 
> > 
> > -   n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
> > -
> > -   /* Load up the SLB entries from shadow SLB */
> > -   for (i = 0; i < n; i++) {
> > -   unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
> > -   unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
> > +   /*
> > +* This probably shouldn't happen, but it may be possible it's
> > +* called in early boot before SLB shadows are allocated.
> > +*/
> > +   if (!get_slb_shadow())
> > +   return;  
> 
> Any reason you added above check here instead on mm/slb.c ? Should we
> move above check inside slb_restore_bolted_realmode() ? I guess mm/slb.c
> is right place for this check. This will also help pseries machine check
> to avoid calling this extra check explicitly.

I thought it was a corner case because the slb.c code should not
be called before it's initialised. I'd prefer these exceptional
machine check cases be tested in the MCE code. Anything else calling
flush_and_reload_slb so early would be a bad bug.

Thanks,
Nick


Re: [PATCH v7 7/9] powerpc/pseries: Dump the SLB contents on SLB MCE errors.

2018-08-13 Thread Nicholas Piggin
On Mon, 13 Aug 2018 09:47:04 +0530
Mahesh Jagannath Salgaonkar  wrote:

> On 08/11/2018 10:03 AM, Nicholas Piggin wrote:
> > On Tue, 07 Aug 2018 19:47:39 +0530
> > Mahesh J Salgaonkar  wrote:
> >   
> >> From: Mahesh Salgaonkar 
> >>
> >> If we get a machine check exceptions due to SLB errors then dump the
> >> current SLB contents which will be very much helpful in debugging the
> >> root cause of SLB errors. Introduce an exclusive buffer per cpu to hold
> >> faulty SLB entries. In real mode mce handler saves the old SLB contents
> >> into this buffer accessible through paca and print it out later in virtual
> >> mode.
> >>
> >> With this patch the console will log SLB contents like below on SLB MCE
> >> errors:
> >>
> >> [  507.297236] SLB contents of cpu 0x1
> >> [  507.297237] Last SLB entry inserted at slot 16
> >> [  507.297238] 00 c800 400ea1b217000500
> >> [  507.297239]   1T  ESID=   c0  VSID=  ea1b217 LLP:100
> >> [  507.297240] 01 d800 400d43642f000510
> >> [  507.297242]   1T  ESID=   d0  VSID=  d43642f LLP:110
> >> [  507.297243] 11 f800 400a86c85f000500
> >> [  507.297244]   1T  ESID=   f0  VSID=  a86c85f LLP:100
> >> [  507.297245] 12 7f000800 4008119624000d90
> >> [  507.297246]   1T  ESID=   7f  VSID=  8119624 LLP:110
> >> [  507.297247] 13 1800 00092885f5150d90
> >> [  507.297247]  256M ESID=1  VSID=   92885f5150 LLP:110
> >> [  507.297248] 14 01000800 4009e7cb5d90
> >> [  507.297249]   1T  ESID=1  VSID=  9e7cb50 LLP:110
> >> [  507.297250] 15 d800 400d43642f000510
> >> [  507.297251]   1T  ESID=   d0  VSID=  d43642f LLP:110
> >> [  507.297252] 16 d800 400d43642f000510
> >> [  507.297253]   1T  ESID=   d0  VSID=  d43642f LLP:110
> >> [  507.297253] --
> >> [  507.297254] SLB cache ptr value = 3
> >> [  507.297254] Valid SLB cache entries:
> >> [  507.297255] 00 EA[0-35]=7f000
> >> [  507.297256] 01 EA[0-35]=1
> >> [  507.297257] 02 EA[0-35]= 1000
> >> [  507.297257] Rest of SLB cache entries:
> >> [  507.297258] 03 EA[0-35]=7f000
> >> [  507.297258] 04 EA[0-35]=1
> >> [  507.297259] 05 EA[0-35]= 1000
> >> [  507.297260] 06 EA[0-35]=   12
> >> [  507.297260] 07 EA[0-35]=7f000
> >>
> >> Suggested-by: Aneesh Kumar K.V 
> >> Suggested-by: Michael Ellerman 
> >> Signed-off-by: Mahesh Salgaonkar 
> >> ---
> >>
> >> Changes in V7:
> >> - Print slb cache ptr value and slb cache data
> >> ---
> >>  arch/powerpc/include/asm/book3s/64/mmu-hash.h |7 ++
> >>  arch/powerpc/include/asm/paca.h   |4 +
> >>  arch/powerpc/mm/slb.c |   73 
> >> +
> >>  arch/powerpc/platforms/pseries/ras.c  |   10 +++
> >>  arch/powerpc/platforms/pseries/setup.c|   10 +++
> >>  5 files changed, 103 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
> >> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> >> index cc00a7088cf3..5a3fe282076d 100644
> >> --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> >> +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> >> @@ -485,9 +485,16 @@ static inline void hpte_init_pseries(void) { }
> >>  
> >>  extern void hpte_init_native(void);
> >>  
> >> +struct slb_entry {
> >> +  u64 esid;
> >> +  u64 vsid;
> >> +};
> >> +
> >>  extern void slb_initialize(void);
> >>  extern void slb_flush_and_rebolt(void);
> >>  extern void slb_flush_and_rebolt_realmode(void);
> >> +extern void slb_save_contents(struct slb_entry *slb_ptr);
> >> +extern void slb_dump_contents(struct slb_entry *slb_ptr);
> >>  
> >>  extern void slb_vmalloc_update(void);
> >>  extern void slb_set_size(u16 size);
> >> diff --git a/arch/powerpc/include/asm/paca.h 
> >> b/arch/powerpc/include/asm/paca.h
> >> index 7f22929ce915..233d25ff6f64 100644
> >> --- a/arch/powerpc/include/asm/paca.h
> >> +++ b/arch/powerpc/include/asm/paca.h
> >> @@ -254,6 +254,10 @@ struct paca_struct {
> >>  #endif
> >>  #ifdef CONFIG_PPC_PSERIES
> >>u8 *mce_data_buf;   /* buffer to hold per cpu rtas errlog */
> >> +
> >> +  /* Capture SLB related old contents in MCE handler. */
> >> +  struct slb_entry *mce_faulty_slbs;
> >> +  u16 slb_save_cache_ptr;
> >>  #endif /* CONFIG_PPC_PSERIES */
> >>  } cacheline_aligned;
> >>  
> >> diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
> >> index e89f675f1b5e..16a53689ffd4 100644
> >> --- a/arch/powerpc/mm/slb.c
> >> +++ b/arch/powerpc/mm/slb.c
> >> @@ -151,6 +151,79 @@ void slb_flush_and_rebolt_realmode(void)
> >>get_paca()->slb_cache_ptr = 0;
> >>  }
> >>  
> >> +void slb_save_contents(struct slb_entry *slb_ptr)
> >> +{
> >> +  int i;
> >> +  unsigned long e, v;
> >> +
> >> +  /* Save slb_cache_ptr value. */
> >> +  get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;  
> > 
> > What's the point of 

[PATCH V2] ocxl: Fix access to the AFU Descriptor Data

2018-08-13 Thread Christophe Lombard
The AFU Information DVSEC capability is a means to extract common,
general information about all of the AFUs associated with a Function
independent of the specific functionality that each AFU provides.

This patch fixes the access to the AFU Descriptor Data indexed by the
AFU Info Index field.

Fixes: 5ef3166e8a32 ("ocxl: Driver code for 'generic' opencapi devices")
Cc: stable  # 4.16
Signed-off-by: Christophe Lombard 
---
Changelog[v2]
 - Rebase to latest upstream.
 - Use pci_write_config_byte instead of pci_write_config_word
---
 drivers/misc/ocxl/config.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index 2e30de9..57a6bb1 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -280,7 +280,9 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
u32 val;
int rc, templ_major, templ_minor, len;
 
-   pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
+   pci_write_config_byte(dev,
+   fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+   afu_idx);
rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, );
if (rc)
return rc;
-- 
2.7.4



Re: [PATCH v6 1/2] powerpc: Detect the presence of big-cores via "ibm,thread-groups"

2018-08-13 Thread Benjamin Herrenschmidt
On Mon, 2018-08-13 at 17:06 +0530, Gautham R Shenoy wrote:
> Hi Srikar,
> 
> Thanks for reviewing the patch.
> 
> On Thu, Aug 09, 2018 at 06:27:43AM -0700, Srikar Dronamraju wrote:
> > * Gautham R. Shenoy  [2018-08-09 11:02:07]:
> > 
> > > 
> > >  int threads_per_core, threads_per_subcore, threads_shift;
> > > +bool has_big_cores;
> > >  cpumask_t threads_core_mask;
> > >  EXPORT_SYMBOL_GPL(threads_per_core);
> > >  EXPORT_SYMBOL_GPL(threads_per_subcore);
> > >  EXPORT_SYMBOL_GPL(threads_shift);
> > > +EXPORT_SYMBOL_GPL(has_big_cores);
> > 
> > Why do we need EXPORT_SYMBOL_GPL?
> 
> As Christoph pointed out, I was blindly following the suit.
> 
> You are right, we don't need to export it at the moment. The remaining
> EXPORT_SYMBOL_GPL are required by KVM. However, as of now, there is no
> need for "has_big_cores" in the KVM.

There is actually. KVM needs to refuse to start on big cores, at least
HV KVM. And when KVM grows support for big cores (may or may not
happen), it will need to know. So keep the GPL export.

> Will remove this in the next version.
> > 
> > >  EXPORT_SYMBOL_GPL(threads_core_mask);
> > > 
> > > + *
> > > + * Returns 0 on success, -EINVAL if the property does not exist,
> > > + * -ENODATA if property does not have a value, and -EOVERFLOW if the
> > > + * property data isn't large enough.
> > > + */
> > > +int parse_thread_groups(struct device_node *dn,
> > > + struct thread_groups *tg)
> > > +{
> > > + unsigned int nr_groups, threads_per_group, property;
> > > + int i;
> > > + u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
> > > + u32 *thread_list;
> > > + size_t total_threads;
> > > + int ret;
> > > +
> > > + ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> > > +  thread_group_array, 3);
> > > +
> > > + if (ret)
> > > + goto out_err;
> > > +
> > > + property = thread_group_array[0];
> > > + nr_groups = thread_group_array[1];
> > > + threads_per_group = thread_group_array[2];
> > > + total_threads = nr_groups * threads_per_group;
> 
> 
> > > +
> > 
> > Shouldnt we check for property and nr_groups
> > If the property is not 1 and nr_groups < 1, we should error out
> > No point in calling a of_property read if property is not right.
> 
> Yes, the nr_groups < 1 check can be moved into this function.
> 
> However, this function merely parses the the thread group structure
> exposed by the device tree. So it should error out only if there is a
> failure in parsing, or as you said the parsed values are incorrect
> (nr_groups < 1, threads_per_group < 1, etc). Whether the thread group
> is relevant or not (in this case we are interested in thread groups
> that share L1 cache, translation etc) is something for the caller to
> decide.
> 
> However, I see what you mean. We can avoid parsing the remainder of
> the array if the property in the device-tree isn't the property that
> the caller is interested in.
> 
> This can be solved by passing the interested property value as a
> parameter and so that the code errors out if this property doesn't
> match the property in the device-tree. Will add this in the next
> version.
> 
> > 
> > 
> > Nit: 
> > Cant we directly assign to tg->property, and hence avoid local
> > variables, property, nr_groups and threads_per_group?
> 
> Will clean this up. This was from an older version where I added the
> local variables so that the statements referencing them don't need to
> be split across multiple lines. However, the code has been optimized
> since then. So, the local variables are not needed.
> 
> > 
> > > + ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> > > +  thread_group_array,
> > > +  3 + total_threads);
> > > +
> > > +static inline bool dt_has_big_core(struct device_node *dn,
> > > +struct thread_groups *tg)
> > > +{
> > > + if (parse_thread_groups(dn, tg))
> > > + return false;
> > > +
> > > + if (tg->property != 1)
> > > + return false;
> > > +
> > > + if (tg->nr_groups < 1)
> > > + return false;
> > 
> > Can avoid these check if we can check in parse_thread_groups.
> > 
> > >  /**
> > >   * setup_cpu_maps - initialize the following cpu maps:
> > >   *  cpu_possible_mask
> > > @@ -457,6 +605,7 @@ void __init smp_setup_cpu_maps(void)
> > >   int cpu = 0;
> > >   int nthreads = 1;
> > > 
> > > diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> > > index 755dc98..f5717de 100644
> > > --- a/arch/powerpc/kernel/sysfs.c
> > > +++ b/arch/powerpc/kernel/sysfs.c
> > > @@ -18,6 +18,7 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > > 
> > >  #include "cacheinfo.h"
> > >  #include "setup.h"
> > > @@ -1025,6 +1026,33 @@ static ssize_t show_physical_id(struct device *dev,
> > >  }
> > >  static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
> > > 
> > > +static ssize_t 

Re: [PATCH 1/3] powerpc/mm: fix a warning when a cache is common to PGD and hugepages

2018-08-13 Thread Aneesh Kumar K.V

On 08/13/2018 06:57 PM, Christophe Leroy wrote:

While implementing TLB miss HW assistance on the 8xx, the following
warning was encountered:

[  423.732965] WARNING: CPU: 0 PID: 345 at mm/slub.c:2412 
___slab_alloc.constprop.30+0x26c/0x46c
[  423.733033] CPU: 0 PID: 345 Comm: mmap Not tainted 
4.18.0-rc8-00664-g2dfff9121c55 #671
[  423.733075] NIP:  c0108f90 LR: c0109ad0 CTR: 0004
[  423.733121] REGS: c455bba0 TRAP: 0700   Not tainted  
(4.18.0-rc8-00664-g2dfff9121c55)
[  423.733147] MSR:  00021032   CR: 24224848  XER: 2000
[  423.733319]
[  423.733319] GPR00: c0109ad0 c455bc50 c4521910 c60053c0 007080c0 c0011b34 
c7fa41e0 c455be30
[  423.733319] GPR08: 0001 c00103a0 c7fa41e0 c49afcc4 24282842 10018840 
c079b37c 0040
[  423.733319] GPR16: 73f0 00210d00  0001 c455a000 0100 
0200 c455a000
[  423.733319] GPR24: c60053c0 c0011b34 007080c0 c455a000 c455a000 c7fa41e0 
 9032
[  423.734190] NIP [c0108f90] ___slab_alloc.constprop.30+0x26c/0x46c
[  423.734257] LR [c0109ad0] kmem_cache_alloc+0x210/0x23c
[  423.734283] Call Trace:
[  423.734326] [c455bc50] [0100] 0x100 (unreliable)
[  423.734430] [c455bcc0] [c0109ad0] kmem_cache_alloc+0x210/0x23c
[  423.734543] [c455bcf0] [c0011b34] huge_pte_alloc+0xc0/0x1dc
[  423.734633] [c455bd20] [c01044dc] hugetlb_fault+0x408/0x48c
[  423.734720] [c455bdb0] [c0104b20] follow_hugetlb_page+0x14c/0x44c
[  423.734826] [c455be10] [c00e8e54] __get_user_pages+0x1c4/0x3dc
[  423.734919] [c455be80] [c00e9924] __mm_populate+0xac/0x140
[  423.735020] [c455bec0] [c00db14c] vm_mmap_pgoff+0xb4/0xb8
[  423.735127] [c455bf00] [c00f27c0] ksys_mmap_pgoff+0xcc/0x1fc
[  423.735222] [c455bf40] [c000e0f8] ret_from_syscall+0x0/0x38
[  423.735271] Instruction dump:
[  423.735321] 7cbf482e 38fd0008 7fa6eb78 7fc4f378 4bfff5dd 7fe3fb78 4bfffe24 
81370010
[  423.735536] 71280004 41a2ff88 4840c571 4b80 <0fe0> 4bfffeb8 81340010 
712a0004
[  423.735757] ---[ end trace e9b222919a470790 ]---

This warning occurs when calling kmem_cache_zalloc() on a
cache having a constructor.

In this case it happens because PGD cache and 512k hugepte cache are
the same size (4k). While a cache with constructor is created for
the PGD, hugepages create cache without constructor and uses
kmem_cache_zalloc(). As both expect a cache with the same size,
the hugepages reuse the cache created for PGD, hence the conflict.

As the constructors only aim at zeroing the allocated memory, this
patch fixes this issue by removing the constructors and using
kmem_cache_zalloc() instead.



But that means we zero out on each alloc from the slab right? Earlier we 
allocated we we added memory to the slab. Also we have code that 
carefully zero things out when we free the page table back to slab.
The idea there was, it is better take the cost of zeroing out during 
free rather than fault.



Signed-off-by: Christophe Leroy 
---
  arch/powerpc/include/asm/book3s/32/pgalloc.h |  2 +-
  arch/powerpc/include/asm/book3s/64/pgalloc.h |  4 ++--
  arch/powerpc/include/asm/nohash/32/pgalloc.h |  2 +-
  arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 +++---
  arch/powerpc/mm/init-common.c| 21 +++--
  5 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h 
b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..4c23cc1ae7a1 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -32,7 +32,7 @@ extern struct kmem_cache *pgtable_cache[];

  static inline pgd_t *pgd_alloc(struct mm_struct *mm)
  {
-   return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+   return kmem_cache_zalloc(PGT_CACHE(PGD_INDEX_SIZE),
pgtable_gfp_flags(mm, GFP_KERNEL));
  }

diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h 
b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 76234a14b97d..074359cd632a 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -81,7 +81,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
if (radix_enabled())
return radix__pgd_alloc(mm);

-   pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+   pgd = kmem_cache_zalloc(PGT_CACHE(PGD_INDEX_SIZE),
   pgtable_gfp_flags(mm, GFP_KERNEL));
/*
 * Don't scan the PGD for pointers, it contains references to PUDs but
@@ -120,7 +120,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, 
unsigned long addr)
  {
pud_t *pud;

-   pud = kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
+   pud = kmem_cache_zalloc(PGT_CACHE(PUD_CACHE_INDEX),
   pgtable_gfp_flags(mm, GFP_KERNEL));
/*
 * Tell kmemleak to ignore the PUD, that means don't scan it for
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h 
b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 

Re: [PATCH] ocxl: Fix access to the AFU Descriptor Data

2018-08-13 Thread christophe lombard

Le 13/08/2018 à 11:48, Andrew Donnellan a écrit :

On 13/08/18 19:01, Christophe Lombard wrote:

From: Christophe Lombard 


Your git committer email should probably match your sign-off email.



The AFU Information DVSEC capability is a means to extract common,
general information about all of the AFUs associated with a Function
independent of the specific functionality that each AFU provides.

This patch fixes the access to the AFU Descriptor Data indexed by the
AFU Info Index field.

Fixes: 5ef3166e8a32 ("ocxl: Driver code for 'generic' opencapi devices")

Signed-off-by: Christophe Lombard 


This looks like it should go to stable? I assume the reason we haven't 
noticed this previously is because we have not been testing with 
multi-AFU cards.



---
  drivers/misc/ocxl/config.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index 2e30de9..de01623 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -280,7 +280,9 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
  u32 val;
  int rc, templ_major, templ_minor, len;
-    pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
+    pci_write_config_word(dev,
+  fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+  afu_idx);


pci_write_config_byte() would be more appropriate here (see 
ocxl_config_read_afu() at line 454)



  rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, );
  if (rc)
  return rc;





right. Thanks for the review.



[PATCH 3/3] powerpc/mm: remove unneccessary test in pgtable_cache_init()

2018-08-13 Thread Christophe Leroy
pgtable_cache_add() gracefully handles the case when a cache that
size already exists by returning early with the following test:

if (PGT_CACHE(shift))
return; /* Already have a cache of this size */

It is then not needed to test the existance of the cache before.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/init-common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 8fb182aaf87d..ebdb76cbb468 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -78,13 +78,13 @@ void pgtable_cache_init(void)
 {
pgtable_cache_add(PGD_INDEX_SIZE);
 
-   if (PMD_CACHE_INDEX && !PGT_CACHE(PMD_CACHE_INDEX))
+   if (PMD_CACHE_INDEX)
pgtable_cache_add(PMD_CACHE_INDEX);
/*
 * In all current configs, when the PUD index exists it's the
 * same size as either the pgd or pmd index except with THP enabled
 * on book3s 64
 */
-   if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
+   if (PUD_CACHE_INDEX)
pgtable_cache_add(PUD_CACHE_INDEX);
 }
-- 
2.13.3



[PATCH 2/3] powerpc/mm: remove ctor argument to pgtable_cache_add()

2018-08-13 Thread Christophe Leroy
As previous patch has removed all pgtable cache constructors,
lets remove it completely.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/pgtable.h |  2 +-
 arch/powerpc/mm/hugetlbpage.c  |  2 +-
 arch/powerpc/mm/init-common.c  | 10 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 14c79a7dc855..d3195ac00a0b 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -72,7 +72,7 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, 
unsigned long addr,
 /* can we use this in kvm */
 unsigned long vmalloc_to_phys(void *vmalloc_addr);
 
-void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
+void pgtable_cache_add(unsigned shift);
 void pgtable_cache_init(void);
 
 #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7296a42eb62e..72f31fc70b8e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -701,7 +701,7 @@ static int __init hugetlbpage_init(void)
 * use pgt cache for hugepd.
 */
if (pdshift > shift)
-   pgtable_cache_add(pdshift - shift, NULL);
+   pgtable_cache_add(pdshift - shift);
 #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
else if (!hugepte_cache) {
/*
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 2ae15ff8f76f..8fb182aaf87d 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -35,7 +35,7 @@ EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv 
module */
  * everything else.  Caches created by this function are used for all
  * the higher level pagetables, and for hugepage pagetables.
  */
-void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
+void pgtable_cache_add(unsigned shift)
 {
char *name;
unsigned long table_size = sizeof(void *) << shift;
@@ -63,7 +63,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
 
align = max_t(unsigned long, align, minalign);
name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
-   new = kmem_cache_create(name, table_size, align, 0, ctor);
+   new = kmem_cache_create(name, table_size, align, 0, NULL);
if (!new)
panic("Could not allocate pgtable cache for order %d", shift);
 
@@ -76,15 +76,15 @@ EXPORT_SYMBOL_GPL(pgtable_cache_add);   /* used by 
kvm_hv module */
 
 void pgtable_cache_init(void)
 {
-   pgtable_cache_add(PGD_INDEX_SIZE, NULL);
+   pgtable_cache_add(PGD_INDEX_SIZE);
 
if (PMD_CACHE_INDEX && !PGT_CACHE(PMD_CACHE_INDEX))
-   pgtable_cache_add(PMD_CACHE_INDEX, NULL);
+   pgtable_cache_add(PMD_CACHE_INDEX);
/*
 * In all current configs, when the PUD index exists it's the
 * same size as either the pgd or pmd index except with THP enabled
 * on book3s 64
 */
if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
-   pgtable_cache_add(PUD_CACHE_INDEX, NULL);
+   pgtable_cache_add(PUD_CACHE_INDEX);
 }
-- 
2.13.3



[PATCH 1/3] powerpc/mm: fix a warning when a cache is common to PGD and hugepages

2018-08-13 Thread Christophe Leroy
While implementing TLB miss HW assistance on the 8xx, the following
warning was encountered:

[  423.732965] WARNING: CPU: 0 PID: 345 at mm/slub.c:2412 
___slab_alloc.constprop.30+0x26c/0x46c
[  423.733033] CPU: 0 PID: 345 Comm: mmap Not tainted 
4.18.0-rc8-00664-g2dfff9121c55 #671
[  423.733075] NIP:  c0108f90 LR: c0109ad0 CTR: 0004
[  423.733121] REGS: c455bba0 TRAP: 0700   Not tainted  
(4.18.0-rc8-00664-g2dfff9121c55)
[  423.733147] MSR:  00021032   CR: 24224848  XER: 2000
[  423.733319]
[  423.733319] GPR00: c0109ad0 c455bc50 c4521910 c60053c0 007080c0 c0011b34 
c7fa41e0 c455be30
[  423.733319] GPR08: 0001 c00103a0 c7fa41e0 c49afcc4 24282842 10018840 
c079b37c 0040
[  423.733319] GPR16: 73f0 00210d00  0001 c455a000 0100 
0200 c455a000
[  423.733319] GPR24: c60053c0 c0011b34 007080c0 c455a000 c455a000 c7fa41e0 
 9032
[  423.734190] NIP [c0108f90] ___slab_alloc.constprop.30+0x26c/0x46c
[  423.734257] LR [c0109ad0] kmem_cache_alloc+0x210/0x23c
[  423.734283] Call Trace:
[  423.734326] [c455bc50] [0100] 0x100 (unreliable)
[  423.734430] [c455bcc0] [c0109ad0] kmem_cache_alloc+0x210/0x23c
[  423.734543] [c455bcf0] [c0011b34] huge_pte_alloc+0xc0/0x1dc
[  423.734633] [c455bd20] [c01044dc] hugetlb_fault+0x408/0x48c
[  423.734720] [c455bdb0] [c0104b20] follow_hugetlb_page+0x14c/0x44c
[  423.734826] [c455be10] [c00e8e54] __get_user_pages+0x1c4/0x3dc
[  423.734919] [c455be80] [c00e9924] __mm_populate+0xac/0x140
[  423.735020] [c455bec0] [c00db14c] vm_mmap_pgoff+0xb4/0xb8
[  423.735127] [c455bf00] [c00f27c0] ksys_mmap_pgoff+0xcc/0x1fc
[  423.735222] [c455bf40] [c000e0f8] ret_from_syscall+0x0/0x38
[  423.735271] Instruction dump:
[  423.735321] 7cbf482e 38fd0008 7fa6eb78 7fc4f378 4bfff5dd 7fe3fb78 4bfffe24 
81370010
[  423.735536] 71280004 41a2ff88 4840c571 4b80 <0fe0> 4bfffeb8 81340010 
712a0004
[  423.735757] ---[ end trace e9b222919a470790 ]---

This warning occurs when calling kmem_cache_zalloc() on a
cache having a constructor.

In this case it happens because PGD cache and 512k hugepte cache are
the same size (4k). While a cache with constructor is created for
the PGD, hugepages create cache without constructor and uses
kmem_cache_zalloc(). As both expect a cache with the same size,
the hugepages reuse the cache created for PGD, hence the conflict.

As the constructors only aim at zeroing the allocated memory, this
patch fixes this issue by removing the constructors and using
kmem_cache_zalloc() instead.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  2 +-
 arch/powerpc/include/asm/book3s/64/pgalloc.h |  4 ++--
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  2 +-
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  6 +++---
 arch/powerpc/mm/init-common.c| 21 +++--
 5 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h 
b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 82e44b1a00ae..4c23cc1ae7a1 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -32,7 +32,7 @@ extern struct kmem_cache *pgtable_cache[];
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-   return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+   return kmem_cache_zalloc(PGT_CACHE(PGD_INDEX_SIZE),
pgtable_gfp_flags(mm, GFP_KERNEL));
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h 
b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 76234a14b97d..074359cd632a 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -81,7 +81,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
if (radix_enabled())
return radix__pgd_alloc(mm);
 
-   pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+   pgd = kmem_cache_zalloc(PGT_CACHE(PGD_INDEX_SIZE),
   pgtable_gfp_flags(mm, GFP_KERNEL));
/*
 * Don't scan the PGD for pointers, it contains references to PUDs but
@@ -120,7 +120,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, 
unsigned long addr)
 {
pud_t *pud;
 
-   pud = kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
+   pud = kmem_cache_zalloc(PGT_CACHE(PUD_CACHE_INDEX),
   pgtable_gfp_flags(mm, GFP_KERNEL));
/*
 * Tell kmemleak to ignore the PUD, that means don't scan it for
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h 
b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 8825953c225b..766cf0c90d19 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -32,7 +32,7 @@ extern struct kmem_cache *pgtable_cache[];
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-   return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+   return kmem_cache_zalloc(PGT_CACHE(PGD_INDEX_SIZE),
   

[PATCH] powerpc/mm: Don't report hugepage tables as memory leaks when using kmemleak

2018-08-13 Thread Christophe Leroy
When a process allocates a hugepage, the following leak is
reported by kmemleak. This is a false positive which is
due to the pointer to the table being stored in the PGD
as physical memory address and not virtual memory pointer.

unreferenced object 0xc30f8200 (size 512):
  comm "mmap", pid 374, jiffies 4872494 (age 627.630s)
  hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
  backtrace:
[] huge_pte_alloc+0xdc/0x1f8
[<9e0df1e1>] hugetlb_fault+0x560/0x8f8
[<7938ec6c>] follow_hugetlb_page+0x14c/0x44c
[] __get_user_pages+0x1c4/0x3dc
[] __mm_populate+0xac/0x140
[<3215421e>] vm_mmap_pgoff+0xb4/0xb8
[] ksys_mmap_pgoff+0xcc/0x1fc
[<4fcd760f>] ret_from_syscall+0x0/0x38

See commit a984506c542e2 ("powerpc/mm: Don't report PUDs as
memory leaks when using kmemleak") for detailed explanation.

To fix that, this patch tells kmemleak to ignore the allocated
hugepage table.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/hugetlbpage.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index e87f9ef9115b..7296a42eb62e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -112,6 +113,8 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t 
*hpdp,
for (i = i - 1 ; i >= 0; i--, hpdp--)
*hpdp = __hugepd(0);
kmem_cache_free(cachep, new);
+   } else {
+   kmemleak_ignore(new);
}
spin_unlock(ptl);
return 0;
-- 
2.13.3



Re: [PATCH v6 2/2] powerpc: Use cpu_smallcore_sibling_mask at SMT level on bigcores

2018-08-13 Thread Gautham R Shenoy
On Thu, Aug 09, 2018 at 06:26:57AM -0700, Srikar Dronamraju wrote:
> * Gautham R. Shenoy  [2018-08-09 11:02:08]:
> 
> > 
> > 3) ppc64_cpu --smt=2
> >SMT domain ceases to exist as each domain consists of just one
> >group.
> > 
> 
> When seen in isolation, the above looks as if ppc64_cpu --smt=2 o/p says
> " SMT domain ceases to exist"

Ok. The intent was to say that one of the sched-domain level
collapses, thereby leaving only CACHE, DIE and NUMA. Will word it
better.

> 
> > @@ -999,7 +1012,17 @@ static void add_cpu_to_masks(int cpu)
> >  {
> > int first_thread = cpu_first_thread_sibling(cpu);
> > int chipid = cpu_to_chip_id(cpu);
> > -   int i;
> > +
> > +   struct thread_groups tg;
> > +   int i, cpu_group_start = -1;
> > +
> > +   if (has_big_cores) {
> > +   struct device_node *dn = of_get_cpu_node(cpu, NULL);
> > +
> 
> Not checking for validity of dn and no of_node_puts?

Will fix this. Thanks for catching this.

> 
> > +   parse_thread_groups(dn, );
> > +   cpu_group_start = get_cpu_thread_group_start(cpu, );
> > +   cpumask_set_cpu(cpu, cpu_smallcore_sibling_mask(cpu));
> > +   }
> > 
> > /*
> >  * This CPU will not be in the online mask yet so we need to manually
> 
> The rest looks good

Thanks for the review.


> 



Re: [PATCH v6 1/2] powerpc: Detect the presence of big-cores via "ibm,thread-groups"

2018-08-13 Thread Gautham R Shenoy
Hi Srikar,

Thanks for reviewing the patch.

On Thu, Aug 09, 2018 at 06:27:43AM -0700, Srikar Dronamraju wrote:
> * Gautham R. Shenoy  [2018-08-09 11:02:07]:
> 
> > 
> >  int threads_per_core, threads_per_subcore, threads_shift;
> > +bool has_big_cores;
> >  cpumask_t threads_core_mask;
> >  EXPORT_SYMBOL_GPL(threads_per_core);
> >  EXPORT_SYMBOL_GPL(threads_per_subcore);
> >  EXPORT_SYMBOL_GPL(threads_shift);
> > +EXPORT_SYMBOL_GPL(has_big_cores);
> 
> Why do we need EXPORT_SYMBOL_GPL?

As Christoph pointed out, I was blindly following the suit.

You are right, we don't need to export it at the moment. The remaining
EXPORT_SYMBOL_GPL are required by KVM. However, as of now, there is no
need for "has_big_cores" in the KVM.

Will remove this in the next version.
> 
> >  EXPORT_SYMBOL_GPL(threads_core_mask);
> > 
> > + *
> > + * Returns 0 on success, -EINVAL if the property does not exist,
> > + * -ENODATA if property does not have a value, and -EOVERFLOW if the
> > + * property data isn't large enough.
> > + */
> > +int parse_thread_groups(struct device_node *dn,
> > +   struct thread_groups *tg)
> > +{
> > +   unsigned int nr_groups, threads_per_group, property;
> > +   int i;
> > +   u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
> > +   u32 *thread_list;
> > +   size_t total_threads;
> > +   int ret;
> > +
> > +   ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> > +thread_group_array, 3);
> > +
> > +   if (ret)
> > +   goto out_err;
> > +
> > +   property = thread_group_array[0];
> > +   nr_groups = thread_group_array[1];
> > +   threads_per_group = thread_group_array[2];
> > +   total_threads = nr_groups * threads_per_group;


> > +
> 
> Shouldnt we check for property and nr_groups
> If the property is not 1 and nr_groups < 1, we should error out
> No point in calling a of_property read if property is not right.

Yes, the nr_groups < 1 check can be moved into this function.

However, this function merely parses the the thread group structure
exposed by the device tree. So it should error out only if there is a
failure in parsing, or as you said the parsed values are incorrect
(nr_groups < 1, threads_per_group < 1, etc). Whether the thread group
is relevant or not (in this case we are interested in thread groups
that share L1 cache, translation etc) is something for the caller to
decide.

However, I see what you mean. We can avoid parsing the remainder of
the array if the property in the device-tree isn't the property that
the caller is interested in.

This can be solved by passing the interested property value as a
parameter and so that the code errors out if this property doesn't
match the property in the device-tree. Will add this in the next
version.

> 
> 
> Nit: 
> Cant we directly assign to tg->property, and hence avoid local
> variables, property, nr_groups and threads_per_group?

Will clean this up. This was from an older version where I added the
local variables so that the statements referencing them don't need to
be split across multiple lines. However, the code has been optimized
since then. So, the local variables are not needed.

> 
> > +   ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> > +thread_group_array,
> > +3 + total_threads);
> > +
> > +static inline bool dt_has_big_core(struct device_node *dn,
> > +  struct thread_groups *tg)
> > +{
> > +   if (parse_thread_groups(dn, tg))
> > +   return false;
> > +
> > +   if (tg->property != 1)
> > +   return false;
> > +
> > +   if (tg->nr_groups < 1)
> > +   return false;
> 
> Can avoid these check if we can check in parse_thread_groups.
> 
> >  /**
> >   * setup_cpu_maps - initialize the following cpu maps:
> >   *  cpu_possible_mask
> > @@ -457,6 +605,7 @@ void __init smp_setup_cpu_maps(void)
> > int cpu = 0;
> > int nthreads = 1;
> > 
> > diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> > index 755dc98..f5717de 100644
> > --- a/arch/powerpc/kernel/sysfs.c
> > +++ b/arch/powerpc/kernel/sysfs.c
> > @@ -18,6 +18,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > 
> >  #include "cacheinfo.h"
> >  #include "setup.h"
> > @@ -1025,6 +1026,33 @@ static ssize_t show_physical_id(struct device *dev,
> >  }
> >  static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
> > 
> > +static ssize_t show_small_core_siblings(struct device *dev,
> > +   struct device_attribute *attr,
> > +   char *buf)
> > +{
> > +   struct cpu *cpu = container_of(dev, struct cpu, dev);
> > +   struct device_node *dn = of_get_cpu_node(cpu->dev.id, NULL);
> > +   struct thread_groups tg;
> > +   int i, j;
> > +   ssize_t ret = 0;
> > +
> 
> Here we need to check for validity of dn and error out accordingly.

Will add this check.

> 
> 

Re: cxl: remove a dead branch

2018-08-13 Thread Mathieu Malaterre
Frederic,

Could you double check with Michael what is now best to do.

Thanks

On Mon, Aug 13, 2018 at 1:23 PM Michael Ellerman
 wrote:
>
> On Thu, 2018-03-22 at 21:05:28 UTC, Mathieu Malaterre wrote:
> > In commit 14baf4d9c739 ("cxl: Add guest-specific code") the following code
> > was added:
> >
> >   if (afu->crs_len < 0) {
> >   dev_err(>dev, "Unexpected configuration record size 
> > value\n");
> >   return -EINVAL;
> >   }
> >
> > However the variable `crs_len` is of type u64 and cannot be compared < 0.
> > Remove the dead code section. Fix the following warning treated as error
> > with W=1:
> >
> > ../drivers/misc/cxl/guest.c:919:19: error: comparison of unsigned 
> > expression < 0 is always false [-Werror=type-limits]
> >
> > Signed-off-by: Mathieu Malaterre 
>
> Applied to powerpc next, thanks.
>
> https://git.kernel.org/powerpc/c/e4ecafb14fd9cd77d8f4320af1922e
>
> cheers


Re: [V2] powerpc/mm/book3s/radix: Add mapping statistics

2018-08-13 Thread Michael Ellerman
On Mon, 2018-08-13 at 05:44:57 UTC, "Aneesh Kumar K.V" wrote:
> Add statistics that show how memory is mapped within the kernel linear 
> mapping.
> This is similar to commit 37cd944c8d8f ("s390/pgtable: add mapping 
> statistics")
> 
> We don't do this with Hash translation mode. Hash uses one size 
> (mmu_linear_psize)
> to map the kernel linear mapping and we print the linear psize during boot as
> below.
> 
> "Page orders: linear mapping = 24, virtual = 16, io = 16, vmemmap = 24"
> 
> A sample output looks like:
> 
> DirectMap4k:   0 kB
> DirectMap64k:   18432 kB
> DirectMap2M: 1030144 kB
> DirectMap1G:11534336 kB
> 
> Signed-off-by: Aneesh Kumar K.V 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/a2dc009afa9ae8b92305be77286765

cheers


Re: powerpc/uaccess: Enable get_user(u64, *p) on 32-bit

2018-08-13 Thread Michael Ellerman
On Fri, 2018-08-10 at 12:25:35 UTC, Michael Ellerman wrote:
> Currently if you build a 32-bit powerpc kernel and use get_user() to
> load a u64 value it will fail to build with eg:
> 
>   kernel/rseq.o: In function `rseq_get_rseq_cs':
>   kernel/rseq.c:123: undefined reference to `__get_user_bad'
> 
> This is hitting the check in __get_user_size() that makes sure the
> size we're copying doesn't exceed the size of the destination:
> 
>   #define __get_user_size(x, ptr, size, retval)
>   do {
>   retval = 0;
>   __chk_user_ptr(ptr);
>   if (size > sizeof(x))
>   (x) = __get_user_bad();
> 
> Which doesn't immediately make sense because the size of the
> destination is u64, but it's not really, because __get_user_check()
> etc. internally create an unsigned long and copy into that:
> 
>   #define __get_user_check(x, ptr, size)
>   ({
>   long __gu_err = -EFAULT;
>   unsigned long  __gu_val = 0;
> 
> The problem being that on 32-bit unsigned long is not big enough to
> hold a u64. We can fix this with a trick from hpa in the x86 code, we
> statically check the type of x and set the type of __gu_val to either
> unsigned long or unsigned long long.
> 
> Signed-off-by: Michael Ellerman 

Applied to powerpc next.

https://git.kernel.org/powerpc/c/f7a6947cd49b7ff4e03f1b4f7e7b22

cheers


Re: [v2, 1/2] powerpc/64s: move machine check SLB flushing to mm/slb.c

2018-08-13 Thread Michael Ellerman
On Fri, 2018-08-10 at 06:42:48 UTC, Nicholas Piggin wrote:
> The machine check code that flushes and restores bolted segments in
> real mode belongs in mm/slb.c. This will also be used by pseries
> machine check and idle code in future changes.
> 
> Signed-off-by: Nicholas Piggin 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/e7e81847478b37a3958a3163171bf6

cheers


Re: powerpc/powernv/idle: Fix build error

2018-08-13 Thread Michael Ellerman
On Thu, 2018-08-09 at 13:37:20 UTC, "Aneesh Kumar K.V" wrote:
> Fix the below build error using strlcpy instead of strncpy
> 
> In function 'pnv_parse_cpuidle_dt',
> inlined from 'pnv_init_idle_states' at 
> arch/powerpc/platforms/powernv/idle.c:840:7,
> inlined from '__machine_initcall_powernv_pnv_init_idle_states' at 
> arch/powerpc/platforms/powernv/idle.c:870:1:
> arch/powerpc/platforms/powernv/idle.c:820:3: error: 'strncpy' specified bound 
> 16 equals destination size [-Werror=stringop-truncation]
>strncpy(pnv_idle_states[i].name, temp_string[i],
>^~~~
> PNV_IDLE_NAME_LEN);
> 
> Signed-off-by: Aneesh Kumar K.V 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/ae24ce5e12127eeef6bf946c3ee0e9

cheers


Re: powerpc/mm/tlbflush: update the mmu_gather page size while iterating address range

2018-08-13 Thread Michael Ellerman
On Thu, 2018-08-09 at 13:36:59 UTC, "Aneesh Kumar K.V" wrote:
> This patch makes sure we update the mmu_gather page size even if we are
> requesting for a fullmm flush. This avoids triggering VM_WARN_ON in code
> paths like __tlb_remove_page_size that explicitly check for removing range 
> page
> size to be same as mmu gather page size.
> 
> Signed-off-by: Aneesh Kumar K.V 
> Acked-by: Nicholas Piggin 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/0b6aa1a20add96437c46db77c9bae2

cheers


Re: powerpc/mm/hash: Remove unnecessary do { }while(0) loop

2018-08-13 Thread Michael Ellerman
On Thu, 2018-08-09 at 13:36:42 UTC, "Aneesh Kumar K.V" wrote:
> Avoid coverity false warnings like
> 
> *** CID 187347:  Control flow issues  (UNREACHABLE)
> /arch/powerpc/mm/hash_native_64.c: 819 in native_flush_hash_range()
> 813slot += hidx & _PTEIDX_GROUP_IX;
> 814hptep = htab_address + slot;
> 815want_v = hpte_encode_avpn(vpn, psize, ssize);
> 816hpte_v = hpte_get_old_v(hptep);
> 817
> 818if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
> >>> CID 187347:  Control flow issues  (UNREACHABLE)
> 
> Signed-off-by: Aneesh Kumar K.V 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/f405b510c93eeb7390d0e2c6ef8d12

cheers


Re: powerpc/lib: Use patch_site to patch copy_32 functions once cache is enabled

2018-08-13 Thread Michael Ellerman
On Thu, 2018-08-09 at 08:14:41 UTC, Christophe Leroy wrote:
> The symbol memcpy_nocache_branch defined in order to allow patching
> of memset function once cache is enabled leads to confusing reports
> by perf tool.
> 
> Using the new patch_site functionality solves this issue.
> 
> Signed-off-by: Christophe Leroy 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/fa54a981ea7a852c145b05c95abba1

cheers


Re: powerpc/mm: remove huge_pte_offset_and_shift() prototype

2018-08-13 Thread Michael Ellerman
On Wed, 2018-08-08 at 15:36:34 UTC, Christophe Leroy wrote:
> huge_pte_offset_and_shift() has never existed
> 
> Signed-off-by: Christophe Leroy 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/646dbe40fa2a54118975792fa9b98c

cheers


Re: powerpc: fix size calculation using resource_size()

2018-08-13 Thread Michael Ellerman
On Wed, 2018-08-08 at 11:57:24 UTC, Dan Carpenter wrote:
> The problem is the the calculation should be "end - start + 1" but the
> plus one is missing in this calculation.
> 
> Fixes: 8626816e905e ("powerpc: add support for MPIC message register API")
> Signed-off-by: Dan Carpenter 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/c42d3be0c06f0c1c416054022aa535

cheers


Re: [v7, 3/9] powerpc/pseries: Fix endainness while restoring of r3 in MCE handler.

2018-08-13 Thread Michael Ellerman
On Tue, 2018-08-07 at 14:16:46 UTC, Mahesh J Salgaonkar wrote:
> From: Mahesh Salgaonkar 
> 
> During Machine Check interrupt on pseries platform, register r3 points
> RTAS extended event log passed by hypervisor. Since hypervisor uses r3
> to pass pointer to rtas log, it stores the original r3 value at the
> start of the memory (first 8 bytes) pointed by r3. Since hypervisor
> stores this info and rtas log is in BE format, linux should make
> sure to restore r3 value in correct endian format.
> 
> Without this patch when MCE handler, after recovery, returns to code that
> that caused the MCE may end up with Data SLB access interrupt for invalid
> address followed by kernel panic or hang.
> 
> [   62.878965] Severe Machine check interrupt [Recovered]
> [   62.878968]   NIP [dca301b8]: init_module+0x1b8/0x338 [bork_kernel]
> [   62.878969]   Initiator: CPU
> [   62.878970]   Error type: SLB [Multihit]
> [   62.878971] Effective address: dca7
> cpu 0xa: Vector: 380 (Data SLB Access) at [c000fc7775b0]
> pc: c09694c0: vsnprintf+0x80/0x480
> lr: c09698e0: vscnprintf+0x20/0x60
> sp: c000fc777830
>msr: 82009033
>dar: a803a30c00d0
>   current = 0xcbc9ef00
>   paca= 0xc0001eca5c00 softe: 3irq_happened: 0x01
> pid   = 8860, comm = insmod
> [c000fc7778b0] c09698e0 vscnprintf+0x20/0x60
> [c000fc7778e0] c016b6c4 vprintk_emit+0xb4/0x4b0
> [c000fc777960] c016d40c vprintk_func+0x5c/0xd0
> [c000fc777980] c016cbb4 printk+0x38/0x4c
> [c000fc7779a0] dca301c0 init_module+0x1c0/0x338 [bork_kernel]
> [c000fc777a40] c000d9c4 do_one_initcall+0x54/0x230
> [c000fc777b00] c01b3b74 do_init_module+0x8c/0x248
> [c000fc777b90] c01b2478 load_module+0x12b8/0x15b0
> [c000fc777d30] c01b29e8 sys_finit_module+0xa8/0x110
> [c000fc777e30] c000b204 system_call+0x58/0x6c
> --- Exception: c00 (System Call) at 7fff8bda0644
> SP (7fffdfbfe980) is in userspace
> 
> This patch fixes this issue.
> 
> Fixes: a08a53ea4c97 ("powerpc/le: Enable RTAS events support")
> Cc: sta...@vger.kernel.org
> Reviewed-by: Nicholas Piggin 
> Signed-off-by: Mahesh Salgaonkar 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/cd813e1cd7122f2c261dce5b54d1e0

cheers


Re: [v3] selftests/powerpc: Kill child processes on SIGINT

2018-08-13 Thread Michael Ellerman
On Tue, 2018-08-07 at 14:15:39 UTC, Breno Leitao wrote:
> There are some powerpc selftests, as tm/tm-unavailable, that run for a long
> period (>120 seconds), and if it is interrupted, as pressing CRTL-C
> (SIGINT), the foreground process (harness) dies but the child process and
> threads continue to execute (with PPID = 1 now) in background.
> 
> In this case, you'd think the whole test exited, but there are remaining
> threads and processes being executed in background. Sometimes these
> zombies processes are doing annoying things, as consuming the whole CPU or
> dumping things to STDOUT.
> 
> This patch fixes this problem by attaching an empty signal handler to
> SIGINT in the harness process. This handler will interrupt (EINTR) the
> parent process waitpid() call, letting the code to follow through the
> normal flow, which will kill all the processes in the child process group.
> 
> This patch also fixes a typo.
> 
> Signed-off-by: Breno Leitao 
> Signed-off-by: Gustavo Romero 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/7c27a26e1ed5a7dd709aa19685d2c9

cheers


Re: powerpc/cpm1: fix compilation error with CONFIG_PPC_EARLY_DEBUG_CPM

2018-08-13 Thread Michael Ellerman
On Mon, 2018-08-06 at 15:09:11 UTC, Christophe Leroy wrote:
> commit e8cb7a55eb8dc ("powerpc: remove superflous inclusions of
> asm/fixmap.h") removed inclusion of asm/fixmap.h from files not
> including objects from that file.
> 
> However, asm/mmu-8xx.h includes  call to __fix_to_virt(). The proper
> way would be to include asm/fixmap.h in asm/mmu-8xx.h but it creates
> an inclusion loop.
> 
> So we have to leave asm/fixmap.h in sysdep/cpm_common.c for
> CONFIG_PPC_EARLY_DEBUG_CPM
> 
>   CC  arch/powerpc/sysdev/cpm_common.o
> In file included from ./arch/powerpc/include/asm/mmu.h:340:0,
>  from ./arch/powerpc/include/asm/reg_8xx.h:8,
>  from ./arch/powerpc/include/asm/reg.h:29,
>  from ./arch/powerpc/include/asm/processor.h:13,
>  from ./arch/powerpc/include/asm/thread_info.h:28,
>  from ./include/linux/thread_info.h:38,
>  from ./arch/powerpc/include/asm/ptrace.h:159,
>  from ./arch/powerpc/include/asm/hw_irq.h:12,
>  from ./arch/powerpc/include/asm/irqflags.h:12,
>  from ./include/linux/irqflags.h:16,
>  from ./include/asm-generic/cmpxchg-local.h:6,
>  from ./arch/powerpc/include/asm/cmpxchg.h:537,
>  from ./arch/powerpc/include/asm/atomic.h:11,
>  from ./include/linux/atomic.h:5,
>  from ./include/linux/mutex.h:18,
>  from ./include/linux/kernfs.h:13,
>  from ./include/linux/sysfs.h:16,
>  from ./include/linux/kobject.h:20,
>  from ./include/linux/device.h:16,
>  from ./include/linux/node.h:18,
>  from ./include/linux/cpu.h:17,
>  from ./include/linux/of_device.h:5,
>  from arch/powerpc/sysdev/cpm_common.c:21:
> arch/powerpc/sysdev/cpm_common.c: In function ‘udbg_init_cpm’:
> ./arch/powerpc/include/asm/mmu-8xx.h:218:25: error: implicit declaration of 
> function ‘__fix_to_virt’ [-Werror=implicit-function-declaration]
>  #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
>  ^
> arch/powerpc/sysdev/cpm_common.c:75:7: note: in expansion of macro 
> ‘VIRT_IMMR_BASE’
>VIRT_IMMR_BASE);
>^
> ./arch/powerpc/include/asm/mmu-8xx.h:218:39: error: ‘FIX_IMMR_BASE’ 
> undeclared (first use in this function)
>  #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
>^
> arch/powerpc/sysdev/cpm_common.c:75:7: note: in expansion of macro 
> ‘VIRT_IMMR_BASE’
>VIRT_IMMR_BASE);
>^
> ./arch/powerpc/include/asm/mmu-8xx.h:218:39: note: each undeclared identifier 
> is reported only once for each function it appears in
>  #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
>^
> arch/powerpc/sysdev/cpm_common.c:75:7: note: in expansion of macro 
> ‘VIRT_IMMR_BASE’
>VIRT_IMMR_BASE);
>^
> cc1: all warnings being treated as errors
> make[1]: *** [arch/powerpc/sysdev/cpm_common.o] Error 1
> 
> Fixes: e8cb7a55eb8dc ("powerpc: remove superflous inclusions of asm/fixmap.h")
> Signed-off-by: Christophe Leroy 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/6bd6d8672208e8dc0c18588d6eb458

cheers


Re: [v2, 1/2] powerpc/fadump: handle crash memory ranges array index overflow

2018-08-13 Thread Michael Ellerman
On Mon, 2018-08-06 at 20:42:45 UTC, Hari Bathini wrote:
> Crash memory ranges is an array of memory ranges of the crashing kernel
> to be exported as a dump via /proc/vmcore file. The size of the array
> is set based on INIT_MEMBLOCK_REGIONS, which works alright in most cases
> where memblock memory regions count is less than INIT_MEMBLOCK_REGIONS
> value. But this count can grow beyond INIT_MEMBLOCK_REGIONS value since
> commit 142b45a72e22 ("memblock: Add array resizing support").
> 
> On large memory systems with a few DLPAR operations, the memblock memory
> regions count could be larger than INIT_MEMBLOCK_REGIONS value. On such
> systems, registering fadump results in crash or other system failures
> like below:
> 
>   task: c7f39a290010 ti: cb738000 task.ti: cb738000
>   NIP: c0047df4 LR: c00f9e58 CTR: c010f180
>   REGS: cb73b570 TRAP: 0300   Tainted: G  L   X  (4.4.140+)
>   MSR: 80009033   CR: 22004484  XER: 2000
>   CFAR: c0008500 DAR: 07a45000 DSISR: 4000 SOFTE: 0
>   GPR00: c00f9e58 cb73b7f0 c0f09a00 001a
>   GPR04: c7f3bf774c90 0004 c0eb9a00 0800
>   GPR08: 0804 07a45000 c0fa9a00 c7ffb169ca20
>   GPR12: 22004482 cfa12c00 c7f3a0ea97a8 
>   GPR16: c7f3a0ea9a50 cb73bd60 0118 0001fe80
>   GPR20: 0118  c0b8c980 00d0
>   GPR24: 07ffb0b1 c7ffb169c980  c0b8c980
>   GPR28: 0004 c7ffb169c980 001a c7ffb169c980
>   NIP [c0047df4] smp_send_reschedule+0x24/0x80
>   LR [c00f9e58] resched_curr+0x138/0x160
>   Call Trace:
>   [cb73b7f0] [c00f9e58] resched_curr+0x138/0x160 (unreliable)
>   [cb73b820] [c00fb538] check_preempt_curr+0xc8/0xf0
>   [cb73b850] [c00fb598] ttwu_do_wakeup+0x38/0x150
>   [cb73b890] [c00fc9c4] try_to_wake_up+0x224/0x4d0
>   [cb73b900] [c011ef34] __wake_up_common+0x94/0x100
>   [cb73b960] [c034a78c] ep_poll_callback+0xac/0x1c0
>   [cb73b9b0] [c011ef34] __wake_up_common+0x94/0x100
>   [cb73ba10] [c011f810] __wake_up_sync_key+0x70/0xa0
>   [cb73ba60] [c067c3e8] sock_def_readable+0x58/0xa0
>   [cb73ba90] [c07848ac] unix_stream_sendmsg+0x2dc/0x4c0
>   [cb73bb70] [c0675a38] sock_sendmsg+0x68/0xa0
>   [cb73bba0] [c067673c] ___sys_sendmsg+0x2cc/0x2e0
>   [cb73bd30] [c0677dbc] __sys_sendmsg+0x5c/0xc0
>   [cb73bdd0] [c06789bc] SyS_socketcall+0x36c/0x3f0
>   [cb73be30] [c0009488] system_call+0x3c/0x100
>   Instruction dump:
>   4e800020 6000 6042 3c4c00ec 38421c30 7c0802a6 f8010010 6000
>   3d42000a e92ab420 2fa9 4dde0020  2fa9 419e0044 7c0802a6
>   ---[ end trace a6d1dd4bab5f8253 ]---
> 
> as array index overflow is not checked for while setting up crash memory
> ranges causing memory corruption. To resolve this issue, dynamically
> allocate memory for crash memory ranges and resize it incrementally,
> in units of pagesize, on hitting array size limit.
> 
> Fixes: 2df173d9e85d ("fadump: Initialize elfcore header and add PT_LOAD 
> program headers.")
> Cc: sta...@vger.kernel.org
> Cc: Mahesh Salgaonkar 
> Signed-off-by: Hari Bathini 
> Reviewed-by: Mahesh Salgaonkar 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/1bd6a1c4b80a28d975287630644e6b

cheers


Re: [1/2] powerpc: Allow memory that has been hot-removed to be hot-added

2018-08-13 Thread Michael Ellerman
On Fri, 2018-08-03 at 06:06:00 UTC, Rashmica Gupta wrote:
> This patch allows the memory removed by memtrace to be readded to the
> kernel. So now you don't have to reboot your system to add the memory
> back to the kernel or to have a different amount of memory removed.
> 
> Signed-off-by: Rashmica Gupta 
> Tested-by: Michael Neuling 

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/d3da701d3308ce1fa457f32c6c9e21

cheers


Re: powerpc: include setup.h header file to fix warnings

2018-08-13 Thread Michael Ellerman
On Fri, 2018-06-22 at 19:26:53 UTC, Mathieu Malaterre wrote:
> Make sure to include setup.h to provide the following prototypes:
> 
> - irqstack_early_init
> - setup_power_save
> - initialize_cache_info
> 
> Fix the following warnings (treated as error in W=1):
> 
>   arch/powerpc/kernel/setup_32.c:198:13: error: no previous prototype for 
> ‘irqstack_early_init’ [-Werror=missing-prototypes]
>   arch/powerpc/kernel/setup_32.c:238:13: error: no previous prototype for 
> ‘setup_power_save’ [-Werror=missing-prototypes]
>   arch/powerpc/kernel/setup_32.c:253:13: error: no previous prototype for 
> ‘initialize_cache_info’ [-Werror=missing-prototypes]
> 
> Signed-off-by: Mathieu Malaterre 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/f2c6d0d1092e3da2bd36c768777e88

cheers


Re: powerpc/mm: remove warning about ‘type’ being set

2018-08-13 Thread Michael Ellerman
On Fri, 2018-06-22 at 19:27:47 UTC, Mathieu Malaterre wrote:
> ‘type’ is only used when CONFIG_DEBUG_HIGHMEM is set. So add a possibly
> unused tag to variable. Remove warning treated as error with W=1:
> 
>   arch/powerpc/mm/highmem.c:59:6: error: variable ‘type’ set but not used 
> [-Werror=unused-but-set-variable]
> 
> Signed-off-by: Mathieu Malaterre 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/fce278af81daf8599f9e94883cbe43

cheers


Re: [14/15] tty: hvc: introduce the hv_ops.flush operation for hvc drivers

2018-08-13 Thread Michael Ellerman
On Mon, 2018-04-30 at 14:55:57 UTC, Nicholas Piggin wrote:
> Use .flush to wait for drivers to flush their console outside of
> the spinlock, to reduce lock/irq latencies.
> 
> Flush the hvc console driver after each write, which can help
> messages make it out to the console after a crash.
> 
> Signed-off-by: Nicholas Piggin 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/9f65b81f36e31563c5a5e4df3b3b8b

cheers


Re: [13/15] tty: hvc: hvc_write may sleep

2018-08-13 Thread Michael Ellerman
On Mon, 2018-04-30 at 14:55:56 UTC, Nicholas Piggin wrote:
> Rework the hvc_write loop to drop and re-take the spinlock on each
> iteration, add a cond_resched. Don't bother with an initial hvc_push
> initially, which makes the logic simpler -- just do a hvc_push on
> each time around the loop.
> 
> Signed-off-by: Nicholas Piggin 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/550ddadcc7580ec2a6c22d4ed04291

cheers


Re: [12/15] tty: hvc: hvc_poll may sleep

2018-08-13 Thread Michael Ellerman
On Mon, 2018-04-30 at 14:55:55 UTC, Nicholas Piggin wrote:
> Introduce points where hvc_poll drops the lock, enables interrupts,
> and reschedules.
> 
> Signed-off-by: Nicholas Piggin 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/cfb5946b55f1dfd19e042feae1fbff

cheers


Re: [11/15] tty: hvc: hvc_poll break hv read loop

2018-08-13 Thread Michael Ellerman
On Mon, 2018-04-30 at 14:55:54 UTC, Nicholas Piggin wrote:
> Avoid looping with the spinlock held while there is read data
> being returned from the hv driver. Instead note if the entire
> size returned by tty_buffer_request_room was read, and request
> another read poll.
> 
> This limits the critical section lengths, and provides more
> even service to other consoles in case there is a pathological
> condition.
> 
> Signed-off-by: Nicholas Piggin 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/ec97eaad1383ab2500fcf9a07ade60

cheers


Re: [10/15] tty: hvc: use mutex instead of spinlock for hvc_structs lock

2018-08-13 Thread Michael Ellerman
On Mon, 2018-04-30 at 14:55:53 UTC, Nicholas Piggin wrote:
> This allows hvc operations to sleep under the lock.
> 
> Signed-off-by: Nicholas Piggin 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/a9bf5c8a271b9a954709b7ada1bd25

cheers


Re: [v3,03/19] powerpc: Move `path` variable inside DEBUG_PROM

2018-08-13 Thread Michael Ellerman
On Wed, 2018-04-04 at 20:08:35 UTC, Mathieu Malaterre wrote:
> Add gcc attribute unused for two variables. Fix warnings treated as errors
> with W=1:
> 
>   arch/powerpc/kernel/prom_init.c:1388:8: error: variable ‘path’ set but 
> not used [-Werror=unused-but-set-variable]
> 
> Suggested-by: Christophe Leroy 
> Signed-off-by: Mathieu Malaterre 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/eab00a208eb63d863b6cc0a03f68b4

cheers


Re: [v2,07/19] powerpc/powermac: Make some functions static

2018-08-13 Thread Michael Ellerman
On Wed, 2018-03-28 at 19:39:35 UTC, Mathieu Malaterre wrote:
> These functions can all be static, make it so. Fix warnings treated as
> errors with W=1:
> 
>   arch/powerpc/platforms/powermac/pci.c:1022:6: error: no previous prototype 
> for ‘pmac_pci_fixup_ohci’ [-Werror=missing-prototypes]
>   arch/powerpc/platforms/powermac/pci.c:1057:6: error: no previous prototype 
> for ‘pmac_pci_fixup_cardbus’ [-Werror=missing-prototypes]
>   arch/powerpc/platforms/powermac/pci.c:1094:6: error: no previous prototype 
> for ‘pmac_pci_fixup_pciata’ [-Werror=missing-prototypes]
> 
> Remove has_address declaration and assignment since not used. Also add gcc
> attribute unused to fix a warning treated as error with W=1:
> 
>   arch/powerpc/platforms/powermac/pci.c:784:19: error: variable 
> ‘has_address’ set but not used [-Werror=unused-but-set-variable]
>   arch/powerpc/platforms/powermac/pci.c:907:22: error: variable ‘ht’ set 
> but not used [-Werror=unused-but-set-variable]
> 
> Suggested-by: Christophe Leroy 
> Signed-off-by: Mathieu Malaterre 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/618a89d738fdd97f6fc58b7b666c7a

cheers


Re: [v2,02/19] powerpc/powermac: Mark variable x as unused

2018-08-13 Thread Michael Ellerman
On Wed, 2018-03-28 at 19:30:28 UTC, Mathieu Malaterre wrote:
> Since the value of x is never intended to be read, remove it. Fix warning
> treated as error with W=1:
> 
>   arch/powerpc/platforms/powermac/udbg_scc.c:76:9: error: variable ‘x’ 
> set but not used [-Werror=unused-but-set-variable]
> 
> Suggested-by: Christophe Leroy 
> Signed-off-by: Mathieu Malaterre 
> Reviewed-by: Christophe Leroy 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/8921305c1ec9d9ea23e5f8eac30630

cheers


Re: cxl: remove a dead branch

2018-08-13 Thread Michael Ellerman
On Thu, 2018-03-22 at 21:05:28 UTC, Mathieu Malaterre wrote:
> In commit 14baf4d9c739 ("cxl: Add guest-specific code") the following code
> was added:
> 
>   if (afu->crs_len < 0) {
>   dev_err(>dev, "Unexpected configuration record size 
> value\n");
>   return -EINVAL;
>   }
> 
> However the variable `crs_len` is of type u64 and cannot be compared < 0.
> Remove the dead code section. Fix the following warning treated as error
> with W=1:
> 
> ../drivers/misc/cxl/guest.c:919:19: error: comparison of unsigned expression 
> < 0 is always false [-Werror=type-limits]
> 
> Signed-off-by: Mathieu Malaterre 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/e4ecafb14fd9cd77d8f4320af1922e

cheers


Re: [16/19] powerpc/powermac: Add missing include of header pmac.h

2018-08-13 Thread Michael Ellerman
On Thu, 2018-03-22 at 20:20:02 UTC, Mathieu Malaterre wrote:
> The header `pmac.h` was not included, leading to the following warnings,
> treated as error with W=1:
> 
>   arch/powerpc/platforms/powermac/time.c:69:13: error: no previous prototype 
> for ‘pmac_time_init’ [-Werror=missing-prototypes]
>   arch/powerpc/platforms/powermac/time.c:207:15: error: no previous prototype 
> for ‘pmac_get_boot_time’ [-Werror=missing-prototypes]
>   arch/powerpc/platforms/powermac/time.c:222:6: error: no previous prototype 
> for ‘pmac_get_rtc_time’ [-Werror=missing-prototypes]
>   arch/powerpc/platforms/powermac/time.c:240:5: error: no previous prototype 
> for ‘pmac_set_rtc_time’ [-Werror=missing-prototypes]
>   arch/powerpc/platforms/powermac/time.c:259:12: error: no previous prototype 
> for ‘via_calibrate_decr’ [-Werror=missing-prototypes]
>   arch/powerpc/platforms/powermac/time.c:311:13: error: no previous prototype 
> for ‘pmac_calibrate_decr’ [-Werror=missing-prototypes]
> 
> The function `via_calibrate_decr` was made static to silence a warning.
> 
> Signed-off-by: Mathieu Malaterre 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/2fff0f07b8441a481eb5fc0bfcd0e4

cheers


Re: powerpc: Use common error handling code in setup_new_fdt()

2018-08-13 Thread Michael Ellerman
On Sun, 2018-03-11 at 08:16:47 UTC, SF Markus Elfring wrote:
> From: Markus Elfring 
> Date: Sun, 11 Mar 2018 09:03:42 +0100
> 
> Add a jump target so that a bit of exception handling can be better reused
> at the end of this function.
> 
> This issue was detected by using the Coccinelle software.
> 
> Signed-off-by: Markus Elfring 
> Reviewed-by: Thiago Jung Bauermann 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/baedcdf5054c151a33e34392af7d8c

cheers


Re: [RFC,v2] powerpc: xmon: Add address lookup for percpu symbols

2018-08-13 Thread Michael Ellerman
On Tue, 2016-11-22 at 09:20:09 UTC, Boqun Feng wrote:
> Currently, in xmon, there is no obvious way to get an address for a
> percpu symbol for a particular cpu. Having such an ability would be good
> for debugging the system when percpu variables got involved.
> 
> Therefore, this patch introduces a new xmon command "lp" to lookup the
> address for percpu symbols. Usage of "lp" is similar to "ls", except
> that we could add a cpu number to choose the variable of which cpu we
> want to lookup. If no cpu number is given, lookup for current cpu.
> 
> Signed-off-by: Boqun Feng 

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/302c7b0c4ff5aed585419603f835de

cheers


Re: [PATCH] ocxl: Fix access to the AFU Descriptor Data

2018-08-13 Thread Frederic Barrat




Le 13/08/2018 à 11:48, Andrew Donnellan a écrit :

On 13/08/18 19:01, Christophe Lombard wrote:

From: Christophe Lombard 


Your git committer email should probably match your sign-off email.



The AFU Information DVSEC capability is a means to extract common,
general information about all of the AFUs associated with a Function
independent of the specific functionality that each AFU provides.

This patch fixes the access to the AFU Descriptor Data indexed by the
AFU Info Index field.

Fixes: 5ef3166e8a32 ("ocxl: Driver code for 'generic' opencapi devices")

Signed-off-by: Christophe Lombard 


This looks like it should go to stable? I assume the reason we haven't 
noticed this previously is because we have not been testing with 
multi-AFU cards.


Yes, it's hidden until we have more than one AFU per function. Also, the 
field we overwrite at the wrong offset is read-only, so it goes undetected.


  Fred







---
  drivers/misc/ocxl/config.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index 2e30de9..de01623 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -280,7 +280,9 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
  u32 val;
  int rc, templ_major, templ_minor, len;
-    pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
+    pci_write_config_word(dev,
+  fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+  afu_idx);


pci_write_config_byte() would be more appropriate here (see 
ocxl_config_read_afu() at line 454)



  rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, );
  if (rc)
  return rc;







Re: [PATCH] ocxl: Fix access to the AFU Descriptor Data

2018-08-13 Thread Andrew Donnellan

On 13/08/18 19:01, Christophe Lombard wrote:

From: Christophe Lombard 


Your git committer email should probably match your sign-off email.



The AFU Information DVSEC capability is a means to extract common,
general information about all of the AFUs associated with a Function
independent of the specific functionality that each AFU provides.

This patch fixes the access to the AFU Descriptor Data indexed by the
AFU Info Index field.

Fixes: 5ef3166e8a32 ("ocxl: Driver code for 'generic' opencapi devices")

Signed-off-by: Christophe Lombard 


This looks like it should go to stable? I assume the reason we haven't 
noticed this previously is because we have not been testing with 
multi-AFU cards.



---
  drivers/misc/ocxl/config.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index 2e30de9..de01623 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -280,7 +280,9 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
u32 val;
int rc, templ_major, templ_minor, len;
  
-	pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);

+   pci_write_config_word(dev,
+ fn->dvsec_afu_info_pos + 
OCXL_DVSEC_AFU_INFO_AFU_IDX,
+ afu_idx);


pci_write_config_byte() would be more appropriate here (see 
ocxl_config_read_afu() at line 454)



rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, );
if (rc)
return rc;



--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited



[PATCH] ocxl: Fix access to the AFU Descriptor Data

2018-08-13 Thread Christophe Lombard
From: Christophe Lombard 

The AFU Information DVSEC capability is a means to extract common,
general information about all of the AFUs associated with a Function
independent of the specific functionality that each AFU provides.

This patch fixes the access to the AFU Descriptor Data indexed by the
AFU Info Index field.

Fixes: 5ef3166e8a32 ("ocxl: Driver code for 'generic' opencapi devices")

Signed-off-by: Christophe Lombard 
---
 drivers/misc/ocxl/config.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index 2e30de9..de01623 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -280,7 +280,9 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
u32 val;
int rc, templ_major, templ_minor, len;
 
-   pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
+   pci_write_config_word(dev,
+ fn->dvsec_afu_info_pos + 
OCXL_DVSEC_AFU_INFO_AFU_IDX,
+ afu_idx);
rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, );
if (rc)
return rc;
-- 
2.7.4



[PATCH v4] powerpc/topology: Get topology for shared processors at boot

2018-08-13 Thread Srikar Dronamraju
On a shared lpar, Phyp will not update the cpu associativity at boot
time. Just after the boot system does recognize itself as a shared lpar and
trigger a request for correct cpu associativity. But by then the scheduler
would have already created/destroyed its sched domains.

This causes
- Broken load balance across Nodes causing islands of cores.
- Performance degradation esp if the system is lightly loaded
- dmesg to wrongly report all cpus to be in Node 0.
- Messages in dmesg saying borken topology.
- With commit 051f3ca02e46 ("sched/topology: Introduce NUMA identity
  node sched domain"), can cause rcu stalls at boot up.

>From a scheduler maintainer's perspective, moving cpus from one node to
another or creating more numa levels after boot is not appropriate
without some notification to the user space.
https://lore.kernel.org/lkml/20150406214558.ga38...@linux.vnet.ibm.com/T/#u

The sched_domains_numa_masks table which is used to generate cpumasks is
only created at boot time just before creating sched domains and never
updated.  Hence, its better to get the topology correct before the sched
domains are created.

For example on 64 core Power 8 shared lpar, dmesg reports

[2.088360] Brought up 512 CPUs
[2.088368] Node 0 CPUs: 0-511
[2.088371] Node 1 CPUs:
[2.088373] Node 2 CPUs:
[2.088375] Node 3 CPUs:
[2.088376] Node 4 CPUs:
[2.088378] Node 5 CPUs:
[2.088380] Node 6 CPUs:
[2.088382] Node 7 CPUs:
[2.088386] Node 8 CPUs:
[2.088388] Node 9 CPUs:
[2.088390] Node 10 CPUs:
[2.088392] Node 11 CPUs:
...
[3.916091] BUG: arch topology borken
[3.916103]  the DIE domain not a subset of the NUMA domain
[3.916105] BUG: arch topology borken
[3.916106]  the DIE domain not a subset of the NUMA domain
...

numactl/lscpu output will still be correct with cores spreading across
all nodes.

Socket(s): 64
NUMA node(s):  12
Model: 2.0 (pvr 004d 0200)
Model name:POWER8 (architected), altivec supported
Hypervisor vendor: pHyp
Virtualization type:   para
L1d cache: 64K
L1i cache: 32K
NUMA node0 CPU(s): 0-7,32-39,64-71,96-103,176-183,272-279,368-375,464-471
NUMA node1 CPU(s): 8-15,40-47,72-79,104-111,184-191,280-287,376-383,472-479
NUMA node2 CPU(s): 16-23,48-55,80-87,112-119,192-199,288-295,384-391,480-487
NUMA node3 CPU(s): 24-31,56-63,88-95,120-127,200-207,296-303,392-399,488-495
NUMA node4 CPU(s): 208-215,304-311,400-407,496-503
NUMA node5 CPU(s): 168-175,264-271,360-367,456-463
NUMA node6 CPU(s): 128-135,224-231,320-327,416-423
NUMA node7 CPU(s): 136-143,232-239,328-335,424-431
NUMA node8 CPU(s): 216-223,312-319,408-415,504-511
NUMA node9 CPU(s): 144-151,240-247,336-343,432-439
NUMA node10 CPU(s):152-159,248-255,344-351,440-447
NUMA node11 CPU(s):160-167,256-263,352-359,448-455

Currently on this lpar, the scheduler detects 2 levels of Numa and
created numa sched domains for all cpus, but it finds a single DIE
domain consisting of all cpus. Hence it deletes all numa sched domains.

To address this, detect the shared processor and update topology soon after
cpus are setup so that correct topology is updated just before scheduler
creates sched domain.

With the fix, dmesg reports

[0.491336] numa: Node 0 CPUs: 0-7 32-39 64-71 96-103 176-183 272-279 
368-375 464-471
[0.491351] numa: Node 1 CPUs: 8-15 40-47 72-79 104-111 184-191 280-287 
376-383 472-479
[0.491359] numa: Node 2 CPUs: 16-23 48-55 80-87 112-119 192-199 288-295 
384-391 480-487
[0.491366] numa: Node 3 CPUs: 24-31 56-63 88-95 120-127 200-207 296-303 
392-399 488-495
[0.491374] numa: Node 4 CPUs: 208-215 304-311 400-407 496-503
[0.491379] numa: Node 5 CPUs: 168-175 264-271 360-367 456-463
[0.491384] numa: Node 6 CPUs: 128-135 224-231 320-327 416-423
[0.491389] numa: Node 7 CPUs: 136-143 232-239 328-335 424-431
[0.491394] numa: Node 8 CPUs: 216-223 312-319 408-415 504-511
[0.491399] numa: Node 9 CPUs: 144-151 240-247 336-343 432-439
[0.491404] numa: Node 10 CPUs: 152-159 248-255 344-351 440-447
[0.491409] numa: Node 11 CPUs: 160-167 256-263 352-359 448-455

and lscpu would also report

Socket(s): 64
NUMA node(s):  12
Model: 2.0 (pvr 004d 0200)
Model name:POWER8 (architected), altivec supported
Hypervisor vendor: pHyp
Virtualization type:   para
L1d cache: 64K
L1i cache: 32K
NUMA node0 CPU(s): 0-7,32-39,64-71,96-103,176-183,272-279,368-375,464-471
NUMA node1 CPU(s): 8-15,40-47,72-79,104-111,184-191,280-287,376-383,472-479
NUMA node2 CPU(s): 16-23,48-55,80-87,112-119,192-199,288-295,384-391,480-487
NUMA node3 CPU(s): 24-31,56-63,88-95,120-127,200-207,296-303,392-399,488-495
NUMA node4 CPU(s): 208-215,304-311,400-407,496-503
NUMA node5 CPU(s): 168-175,264-271,360-367,456-463
NUMA node6 CPU(s): 128-135,224-231,320-327,416-423
NUMA node7 CPU(s): 

Re: [PATCH v6 1/2] powerpc: Detect the presence of big-cores via "ibm,thread-groups"

2018-08-13 Thread Christoph Hellwig
On Thu, Aug 09, 2018 at 06:27:43AM -0700, Srikar Dronamraju wrote:
> * Gautham R. Shenoy  [2018-08-09 11:02:07]:
> 
> > 
> >  int threads_per_core, threads_per_subcore, threads_shift;
> > +bool has_big_cores;
> >  cpumask_t threads_core_mask;
> >  EXPORT_SYMBOL_GPL(threads_per_core);
> >  EXPORT_SYMBOL_GPL(threads_per_subcore);
> >  EXPORT_SYMBOL_GPL(threads_shift);
> > +EXPORT_SYMBOL_GPL(has_big_cores);
> 
> Why do we need EXPORT_SYMBOL_GPL?

Because it is deeply internal, and in that matches the other related
exports.


Re: [PATCH] powerpc/powernv/idle: Fix build error

2018-08-13 Thread Alexey Kardashevskiy



On 10/08/2018 17:10, Michael Ellerman wrote:
> "Aneesh Kumar K.V"  writes:
> 
>> Fix the below build error using strlcpy instead of strncpy
>>
>> In function 'pnv_parse_cpuidle_dt',
>> inlined from 'pnv_init_idle_states' at 
>> arch/powerpc/platforms/powernv/idle.c:840:7,
>> inlined from '__machine_initcall_powernv_pnv_init_idle_states' at 
>> arch/powerpc/platforms/powernv/idle.c:870:1:
>> arch/powerpc/platforms/powernv/idle.c:820:3: error: 'strncpy' specified 
>> bound 16 equals destination size [-Werror=stringop-truncation]
>>strncpy(pnv_idle_states[i].name, temp_string[i],
>>^~~~
>> PNV_IDLE_NAME_LEN);
> 
> I'm curious why I haven't seen this? What compiler are you using?


gcc 8 does this.


> 
> cheers
> 
>> diff --git a/arch/powerpc/platforms/powernv/idle.c 
>> b/arch/powerpc/platforms/powernv/idle.c
>> index ecb002c5db83..35f699ebb662 100644
>> --- a/arch/powerpc/platforms/powernv/idle.c
>> +++ b/arch/powerpc/platforms/powernv/idle.c
>> @@ -817,7 +817,7 @@ static int pnv_parse_cpuidle_dt(void)
>>  goto out;
>>  }
>>  for (i = 0; i < nr_idle_states; i++)
>> -strncpy(pnv_idle_states[i].name, temp_string[i],
>> +strlcpy(pnv_idle_states[i].name, temp_string[i],
>>  PNV_IDLE_NAME_LEN);
>>  nr_pnv_idle_states = nr_idle_states;
>>  rc = 0;
>> -- 
>> 2.17.1

-- 
Alexey