[PATCH v2 2/2] f2fs: introduce CP_TRIMMED_FLAG to avoid unneeded discard

2017-04-27 Thread Chao Yu
Introduce CP_TRIMMED_FLAG to indicate all invalid block were trimmed
before umount, so once we do mount with image which contain the flag,
we don't record invalid blocks as undiscard one, when fstrim is being
triggered, we can avoid issuing redundant discard commands.

Signed-off-by: Chao Yu 
Signed-off-by: Jaegeuk Kim 
---
v2: add trace info.
 fs/f2fs/checkpoint.c|  3 +++
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/segment.c   | 28 
 fs/f2fs/super.c |  7 +++
 include/linux/f2fs_fs.h |  1 +
 include/trace/events/f2fs.h |  4 +++-
 6 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 27578903eeb6..ea9c317b5916 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1059,6 +1059,9 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
disable_nat_bits(sbi, false);
 
+   if (cpc->reason & CP_TRIMMED)
+   __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
+
if (cpc->reason & CP_UMOUNT)
__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 89bfc5787d13..baefe379d43a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -130,6 +130,7 @@ enum {
 #defineCP_SYNC 0x0004
 #defineCP_RECOVERY 0x0008
 #defineCP_DISCARD  0x0010
+#define CP_TRIMMED 0x0020
 
 #define DEF_BATCHED_TRIM_SECTIONS  2048
 #define BATCHED_TRIM_SEGMENTS(sbi) \
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 69b99a8f9a01..a32268eeb472 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3005,10 +3005,17 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
 
/* build discard map only one time */
if (f2fs_discard_en(sbi)) {
-   memcpy(se->discard_map, se->cur_valid_map,
-   SIT_VBLOCK_MAP_SIZE);
-   sbi->discard_blks += sbi->blocks_per_seg -
-   se->valid_blocks;
+   if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+   memset(se->discard_map, 0xff,
+   SIT_VBLOCK_MAP_SIZE);
+   } else {
+   memcpy(se->discard_map,
+   se->cur_valid_map,
+   SIT_VBLOCK_MAP_SIZE);
+   sbi->discard_blks +=
+   sbi->blocks_per_seg -
+   se->valid_blocks;
+   }
}
 
if (sbi->segs_per_sec > 1)
@@ -3032,10 +3039,15 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
seg_info_from_raw_sit(se, );
 
if (f2fs_discard_en(sbi)) {
-   memcpy(se->discard_map, se->cur_valid_map,
-   SIT_VBLOCK_MAP_SIZE);
-   sbi->discard_blks += old_valid_blocks -
-   se->valid_blocks;
+   if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+   memset(se->discard_map, 0xff,
+   SIT_VBLOCK_MAP_SIZE);
+   } else {
+   memcpy(se->discard_map, se->cur_valid_map,
+   SIT_VBLOCK_MAP_SIZE);
+   sbi->discard_blks += old_valid_blocks -
+   se->valid_blocks;
+   }
}
 
if (sbi->segs_per_sec > 1)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 4cd3bee6775f..9a14b2590337 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -797,6 +797,13 @@ static void f2fs_put_super(struct super_block *sb)
/* be sure to wait for any on-going discard commands */
f2fs_wait_discard_bios(sbi);
 
+   if (!sbi->discard_blks) {
+   struct cp_control cpc = {
+   .reason = CP_UMOUNT | CP_TRIMMED,
+   };
+   write_checkpoint(sbi, );
+   }
+
/* write_checkpoint can update stat informaion */
f2fs_destroy_stats(sbi);
 
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 093549e10ee2..b6feed6547ce 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -114,6 +114,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */

[PATCH v2 2/2] f2fs: introduce CP_TRIMMED_FLAG to avoid unneeded discard

2017-04-27 Thread Chao Yu
Introduce CP_TRIMMED_FLAG to indicate all invalid block were trimmed
before umount, so once we do mount with image which contain the flag,
we don't record invalid blocks as undiscard one, when fstrim is being
triggered, we can avoid issuing redundant discard commands.

Signed-off-by: Chao Yu 
Signed-off-by: Jaegeuk Kim 
---
v2: add trace info.
 fs/f2fs/checkpoint.c|  3 +++
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/segment.c   | 28 
 fs/f2fs/super.c |  7 +++
 include/linux/f2fs_fs.h |  1 +
 include/trace/events/f2fs.h |  4 +++-
 6 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 27578903eeb6..ea9c317b5916 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1059,6 +1059,9 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
disable_nat_bits(sbi, false);
 
+   if (cpc->reason & CP_TRIMMED)
+   __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
+
if (cpc->reason & CP_UMOUNT)
__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 89bfc5787d13..baefe379d43a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -130,6 +130,7 @@ enum {
 #defineCP_SYNC 0x0004
 #defineCP_RECOVERY 0x0008
 #defineCP_DISCARD  0x0010
+#define CP_TRIMMED 0x0020
 
 #define DEF_BATCHED_TRIM_SECTIONS  2048
 #define BATCHED_TRIM_SEGMENTS(sbi) \
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 69b99a8f9a01..a32268eeb472 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3005,10 +3005,17 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
 
/* build discard map only one time */
if (f2fs_discard_en(sbi)) {
-   memcpy(se->discard_map, se->cur_valid_map,
-   SIT_VBLOCK_MAP_SIZE);
-   sbi->discard_blks += sbi->blocks_per_seg -
-   se->valid_blocks;
+   if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+   memset(se->discard_map, 0xff,
+   SIT_VBLOCK_MAP_SIZE);
+   } else {
+   memcpy(se->discard_map,
+   se->cur_valid_map,
+   SIT_VBLOCK_MAP_SIZE);
+   sbi->discard_blks +=
+   sbi->blocks_per_seg -
+   se->valid_blocks;
+   }
}
 
if (sbi->segs_per_sec > 1)
@@ -3032,10 +3039,15 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
seg_info_from_raw_sit(se, );
 
if (f2fs_discard_en(sbi)) {
-   memcpy(se->discard_map, se->cur_valid_map,
-   SIT_VBLOCK_MAP_SIZE);
-   sbi->discard_blks += old_valid_blocks -
-   se->valid_blocks;
+   if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+   memset(se->discard_map, 0xff,
+   SIT_VBLOCK_MAP_SIZE);
+   } else {
+   memcpy(se->discard_map, se->cur_valid_map,
+   SIT_VBLOCK_MAP_SIZE);
+   sbi->discard_blks += old_valid_blocks -
+   se->valid_blocks;
+   }
}
 
if (sbi->segs_per_sec > 1)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 4cd3bee6775f..9a14b2590337 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -797,6 +797,13 @@ static void f2fs_put_super(struct super_block *sb)
/* be sure to wait for any on-going discard commands */
f2fs_wait_discard_bios(sbi);
 
+   if (!sbi->discard_blks) {
+   struct cp_control cpc = {
+   .reason = CP_UMOUNT | CP_TRIMMED,
+   };
+   write_checkpoint(sbi, );
+   }
+
/* write_checkpoint can update stat informaion */
f2fs_destroy_stats(sbi);
 
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 093549e10ee2..b6feed6547ce 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -114,6 +114,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_TRIMMED_FLAG

Re: [REGRESSION next-20170426] Commit 09515ef5ddad ("of/acpi: Configure dma operations at probe time for platform/amba/pci bus devices") causes oops in mvneta

2017-04-27 Thread Sricharan R
Hi Ralph,

On 4/27/2017 8:10 PM, Ralph Sennhauser wrote:
> On Thu, 27 Apr 2017 19:05:09 +0530
> Sricharan R  wrote:
> 
>> Hi,
>>
>> On 4/26/2017 9:45 PM, Ralph Sennhauser wrote:
>>> Hi Sricharan R,
>>>
>>> Commit 09515ef5ddad ("of/acpi: Configure dma operations at probe
>>> time for platform/amba/pci bus devices") causes a kernel panic as
>>> in the log below on an armada-385. Reverting the commit fixes the
>>> issue.
>>>
>>> Regards
>>> Ralph  
>>
>> Somehow not getting a obvious clue on whats going wrong with the logs
>> below. From the log and looking in to dts, the drivers seems to the
>> one for "marvell,armada-370-neta".
> 
> Correct.
> 
>> Issue looks the data from the dma
>> has gone bad and subsequently referring the wrong data has resulted
>> in the crash. Looks like the dma_masks is the one going wrong.
>> Can i get some logs from mvneta_probe, about dev->dma_mask,
>> dev->coherent_dma_mask and dev->dma_ops with and without the patch
>> to see whats the difference ?
> 
> Not sure I understood what exactly you are after. Might be faster to
> just send me a patch with all debug print statements you like to see.
> 

Attached the patch with debug prints.

Regards,
 Sricharan

-- 
"QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
From fe77d3968d6bc35b46e5f30d5c67c7603aa10597 Mon Sep 17 00:00:00 2001
From: Sricharan R 
Date: Fri, 28 Apr 2017 09:43:26 +0530
Subject: [PATCH] Debug prints

Signed-off-by: Sricharan R 
---
 drivers/net/ethernet/marvell/mvneta.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/marvell/mvneta.c 
b/drivers/net/ethernet/marvell/mvneta.c
index d297011..250f0b2 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4146,6 +4146,12 @@ static int mvneta_probe(struct platform_device *pdev)
int phy_mode;
int err;
int cpu;
+   struct device *ddev = >dev;
+
+
+   dev_err(ddev, "dev->dma_mask 0x%llx\n", *(ddev->dma_mask));
+   dev_err(ddev, "dev->coherent_dma_mask 0x%llx\n", 
ddev->coherent_dma_mask);
+   dev_err(ddev, "dev->dma_ops 0x%llx\n", ddev->dma_ops);
 
dev = alloc_etherdev_mqs(sizeof(struct mvneta_port), txq_number, 
rxq_number);
if (!dev)
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation



Re: [REGRESSION next-20170426] Commit 09515ef5ddad ("of/acpi: Configure dma operations at probe time for platform/amba/pci bus devices") causes oops in mvneta

2017-04-27 Thread Sricharan R
Hi Ralph,

On 4/27/2017 8:10 PM, Ralph Sennhauser wrote:
> On Thu, 27 Apr 2017 19:05:09 +0530
> Sricharan R  wrote:
> 
>> Hi,
>>
>> On 4/26/2017 9:45 PM, Ralph Sennhauser wrote:
>>> Hi Sricharan R,
>>>
>>> Commit 09515ef5ddad ("of/acpi: Configure dma operations at probe
>>> time for platform/amba/pci bus devices") causes a kernel panic as
>>> in the log below on an armada-385. Reverting the commit fixes the
>>> issue.
>>>
>>> Regards
>>> Ralph  
>>
>> Somehow not getting a obvious clue on whats going wrong with the logs
>> below. From the log and looking in to dts, the drivers seems to the
>> one for "marvell,armada-370-neta".
> 
> Correct.
> 
>> Issue looks the data from the dma
>> has gone bad and subsequently referring the wrong data has resulted
>> in the crash. Looks like the dma_masks is the one going wrong.
>> Can i get some logs from mvneta_probe, about dev->dma_mask,
>> dev->coherent_dma_mask and dev->dma_ops with and without the patch
>> to see whats the difference ?
> 
> Not sure I understood what exactly you are after. Might be faster to
> just send me a patch with all debug print statements you like to see.
> 

Attached the patch with debug prints.

Regards,
 Sricharan

-- 
"QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation
From fe77d3968d6bc35b46e5f30d5c67c7603aa10597 Mon Sep 17 00:00:00 2001
From: Sricharan R 
Date: Fri, 28 Apr 2017 09:43:26 +0530
Subject: [PATCH] Debug prints

Signed-off-by: Sricharan R 
---
 drivers/net/ethernet/marvell/mvneta.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/marvell/mvneta.c 
b/drivers/net/ethernet/marvell/mvneta.c
index d297011..250f0b2 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4146,6 +4146,12 @@ static int mvneta_probe(struct platform_device *pdev)
int phy_mode;
int err;
int cpu;
+   struct device *ddev = >dev;
+
+
+   dev_err(ddev, "dev->dma_mask 0x%llx\n", *(ddev->dma_mask));
+   dev_err(ddev, "dev->coherent_dma_mask 0x%llx\n", 
ddev->coherent_dma_mask);
+   dev_err(ddev, "dev->dma_ops 0x%llx\n", ddev->dma_ops);
 
dev = alloc_etherdev_mqs(sizeof(struct mvneta_port), txq_number, 
rxq_number);
if (!dev)
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation



Re: [PATCH] fs: Handle register_shrinker failure

2017-04-27 Thread Al Viro
On Fri, Apr 28, 2017 at 05:30:46AM +0100, Al Viro wrote:

> I really don't like that.  Your "remove it from all lists and pray that
> nobody has picked a reference of any kind" at the very least needs a careful
> written proof of correctness.  AFAICS, somebody might've found it on the
> list and attempted to grab ->s_umount (grab_super() from another thread
> calling sget()).  Then they'd block until your up_write() in there and
> bugger the system up trying to play with ->s_umount in the object you've
> freed.
> 
> NAK.  Yes, the bug is real, but this is not a solution.

Why do we register it that early, anyway?  super_cache_scan() won't do
anything until we are done with setting the sucker up and dropped ->s_umount.
How about we initialize ->s_shrink.list in alloc_super(), have
deactivate_locked_super() call unregister_shrinker() only if list_empty(...)
and have mount_fs() do
error = register_shrinker(>s_shrink);
if (error)
goto out_sb;
sb->s_flags |= MS_BORN;
error = security_sb_kern_mount(sb, flags, secdata);
if (error)
goto out_sb;

Folks?  Am I missing something subtle here?


Re: [PATCH] fs: Handle register_shrinker failure

2017-04-27 Thread Al Viro
On Fri, Apr 28, 2017 at 05:30:46AM +0100, Al Viro wrote:

> I really don't like that.  Your "remove it from all lists and pray that
> nobody has picked a reference of any kind" at the very least needs a careful
> written proof of correctness.  AFAICS, somebody might've found it on the
> list and attempted to grab ->s_umount (grab_super() from another thread
> calling sget()).  Then they'd block until your up_write() in there and
> bugger the system up trying to play with ->s_umount in the object you've
> freed.
> 
> NAK.  Yes, the bug is real, but this is not a solution.

Why do we register it that early, anyway?  super_cache_scan() won't do
anything until we are done with setting the sucker up and dropped ->s_umount.
How about we initialize ->s_shrink.list in alloc_super(), have
deactivate_locked_super() call unregister_shrinker() only if list_empty(...)
and have mount_fs() do
error = register_shrinker(>s_shrink);
if (error)
goto out_sb;
sb->s_flags |= MS_BORN;
error = security_sb_kern_mount(sb, flags, secdata);
if (error)
goto out_sb;

Folks?  Am I missing something subtle here?


Re: [PATCH v5 31/32] x86: Add sysfs support for Secure Memory Encryption

2017-04-27 Thread Dave Young
On 04/27/17 at 08:52am, Dave Hansen wrote:
> On 04/27/2017 12:25 AM, Dave Young wrote:
> > On 04/21/17 at 02:55pm, Dave Hansen wrote:
> >> On 04/18/2017 02:22 PM, Tom Lendacky wrote:
> >>> Add sysfs support for SME so that user-space utilities (kdump, etc.) can
> >>> determine if SME is active.
> >>>
> >>> A new directory will be created:
> >>>   /sys/kernel/mm/sme/
> >>>
> >>> And two entries within the new directory:
> >>>   /sys/kernel/mm/sme/active
> >>>   /sys/kernel/mm/sme/encryption_mask
> >>
> >> Why do they care, and what will they be doing with this information?
> > 
> > Since kdump will copy old memory but need this to know if the old memory
> > was encrypted or not. With this sysfs file we can know the previous SME
> > status and pass to kdump kernel as like a kernel param.
> > 
> > Tom, have you got chance to try if it works or not?
> 
> What will the kdump kernel do with it though?  We kexec() into that
> kernel so the SME keys will all be the same, right?  So, will the kdump
> kernel be just setting the encryption bit in the PTE so it can copy the
> old plaintext out?

I assume it is for active -> non active case, the new boot need to know
the old memory is encrypted. But I think I did not read all the patches
I may miss things.

> 
> Why do we need both 'active' and 'encryption_mask'?  How could it be
> that the hardware-enumerated 'encryption_mask' changes across a kexec()?

Leave this question to Tom..

Thanks
Dave


Re: [PATCH v5 31/32] x86: Add sysfs support for Secure Memory Encryption

2017-04-27 Thread Dave Young
On 04/27/17 at 08:52am, Dave Hansen wrote:
> On 04/27/2017 12:25 AM, Dave Young wrote:
> > On 04/21/17 at 02:55pm, Dave Hansen wrote:
> >> On 04/18/2017 02:22 PM, Tom Lendacky wrote:
> >>> Add sysfs support for SME so that user-space utilities (kdump, etc.) can
> >>> determine if SME is active.
> >>>
> >>> A new directory will be created:
> >>>   /sys/kernel/mm/sme/
> >>>
> >>> And two entries within the new directory:
> >>>   /sys/kernel/mm/sme/active
> >>>   /sys/kernel/mm/sme/encryption_mask
> >>
> >> Why do they care, and what will they be doing with this information?
> > 
> > Since kdump will copy old memory but need this to know if the old memory
> > was encrypted or not. With this sysfs file we can know the previous SME
> > status and pass to kdump kernel as like a kernel param.
> > 
> > Tom, have you got chance to try if it works or not?
> 
> What will the kdump kernel do with it though?  We kexec() into that
> kernel so the SME keys will all be the same, right?  So, will the kdump
> kernel be just setting the encryption bit in the PTE so it can copy the
> old plaintext out?

I assume it is for active -> non active case, the new boot need to know
the old memory is encrypted. But I think I did not read all the patches
I may miss things.

> 
> Why do we need both 'active' and 'encryption_mask'?  How could it be
> that the hardware-enumerated 'encryption_mask' changes across a kexec()?

Leave this question to Tom..

Thanks
Dave


[PATCH v3 4/4] ACPI: Fix memory mapping leaks in current sysfs dumpable ACPI tables support

2017-04-27 Thread Lv Zheng
This patch adds acpi_put_table() to make all acpi_get_table() clone
invocations balanced for sysfs ACPI table dump code.

Since Linux does not use all of the tables, this can help to reduce some
usless memory mappings.

While originally, all tables will be remained to be mapped after a
userspace acpidump execution, potentially causing problem on server
platforms. With the new APIs, it is possible to release such useless table
mappings.

Signed-off-by: Lv Zheng 
---
 drivers/acpi/sysfs.c | 41 +++--
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 2bbf722..14425dc 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -346,11 +346,22 @@ static ssize_t acpi_table_show(struct file *filp, struct 
kobject *kobj,
return len;
 }
 
+static bool acpi_table_has_multiple_instances(char *signature)
+{
+   acpi_status status;
+   struct acpi_table_header *header;
+
+   status = acpi_get_table(signature, 2, );
+   if (ACPI_FAILURE(status))
+   return false;
+   acpi_put_table(header);
+   return true;
+}
+
 static int acpi_table_attr_init(struct kobject *tables_obj,
struct acpi_table_attr *table_attr,
struct acpi_table_header *table_header)
 {
-   struct acpi_table_header *header = NULL;
struct acpi_table_attr *attr = NULL;
char instance_str[ACPI_INST_SIZE];
 
@@ -371,9 +382,9 @@ static int acpi_table_attr_init(struct kobject *tables_obj,
 
ACPI_MOVE_NAME(table_attr->filename, table_header->signature);
table_attr->filename[ACPI_NAME_SIZE] = '\0';
-   if (table_attr->instance > 1 || (table_attr->instance == 1 &&
-!acpi_get_table
-(table_header->signature, 2, 
))) {
+   if (table_attr->instance > 1 ||
+   (table_attr->instance == 1 &&
+acpi_table_has_multiple_instances(table_header->signature))) {
snprintf(instance_str, sizeof(instance_str), "%u",
 table_attr->instance);
strcat(table_attr->filename, instance_str);
@@ -422,11 +433,11 @@ acpi_status acpi_sysfs_table_handler(u32 event, void 
*table, void *context)
 
 static int acpi_tables_sysfs_init(void)
 {
-   struct acpi_table_attr *table_attr;
+   struct acpi_table_attr *table_attr = NULL;
struct acpi_table_header *table_header = NULL;
int table_index;
acpi_status status;
-   int ret;
+   int ret = 0;
 
tables_kobj = kobject_create_and_add("tables", acpi_kobj);
if (!tables_kobj)
@@ -446,16 +457,26 @@ static int acpi_tables_sysfs_init(void)
continue;
 
table_attr = kzalloc(sizeof(*table_attr), GFP_KERNEL);
-   if (!table_attr)
-   return -ENOMEM;
+   if (!table_attr) {
+   ret = -ENOMEM;
+   goto next_table;
+   }
 
ret = acpi_table_attr_init(tables_kobj,
   table_attr, table_header);
+   if (ret)
+   goto next_table;
+   list_add_tail(_attr->node, _table_attr_list);
+
+next_table:
+   acpi_put_table(table_header);
if (ret) {
-   kfree(table_attr);
+   if (table_attr) {
+   kfree(table_attr);
+   table_attr = NULL;
+   }
return ret;
}
-   list_add_tail(_attr->node, _table_attr_list);
}
 
kobject_uevent(tables_kobj, KOBJ_ADD);
-- 
2.7.4



[PATCH v3 4/4] ACPI: Fix memory mapping leaks in current sysfs dumpable ACPI tables support

2017-04-27 Thread Lv Zheng
This patch adds acpi_put_table() to make all acpi_get_table() clone
invocations balanced for sysfs ACPI table dump code.

Since Linux does not use all of the tables, this can help to reduce some
usless memory mappings.

While originally, all tables will be remained to be mapped after a
userspace acpidump execution, potentially causing problem on server
platforms. With the new APIs, it is possible to release such useless table
mappings.

Signed-off-by: Lv Zheng 
---
 drivers/acpi/sysfs.c | 41 +++--
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 2bbf722..14425dc 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -346,11 +346,22 @@ static ssize_t acpi_table_show(struct file *filp, struct 
kobject *kobj,
return len;
 }
 
+static bool acpi_table_has_multiple_instances(char *signature)
+{
+   acpi_status status;
+   struct acpi_table_header *header;
+
+   status = acpi_get_table(signature, 2, );
+   if (ACPI_FAILURE(status))
+   return false;
+   acpi_put_table(header);
+   return true;
+}
+
 static int acpi_table_attr_init(struct kobject *tables_obj,
struct acpi_table_attr *table_attr,
struct acpi_table_header *table_header)
 {
-   struct acpi_table_header *header = NULL;
struct acpi_table_attr *attr = NULL;
char instance_str[ACPI_INST_SIZE];
 
@@ -371,9 +382,9 @@ static int acpi_table_attr_init(struct kobject *tables_obj,
 
ACPI_MOVE_NAME(table_attr->filename, table_header->signature);
table_attr->filename[ACPI_NAME_SIZE] = '\0';
-   if (table_attr->instance > 1 || (table_attr->instance == 1 &&
-!acpi_get_table
-(table_header->signature, 2, 
))) {
+   if (table_attr->instance > 1 ||
+   (table_attr->instance == 1 &&
+acpi_table_has_multiple_instances(table_header->signature))) {
snprintf(instance_str, sizeof(instance_str), "%u",
 table_attr->instance);
strcat(table_attr->filename, instance_str);
@@ -422,11 +433,11 @@ acpi_status acpi_sysfs_table_handler(u32 event, void 
*table, void *context)
 
 static int acpi_tables_sysfs_init(void)
 {
-   struct acpi_table_attr *table_attr;
+   struct acpi_table_attr *table_attr = NULL;
struct acpi_table_header *table_header = NULL;
int table_index;
acpi_status status;
-   int ret;
+   int ret = 0;
 
tables_kobj = kobject_create_and_add("tables", acpi_kobj);
if (!tables_kobj)
@@ -446,16 +457,26 @@ static int acpi_tables_sysfs_init(void)
continue;
 
table_attr = kzalloc(sizeof(*table_attr), GFP_KERNEL);
-   if (!table_attr)
-   return -ENOMEM;
+   if (!table_attr) {
+   ret = -ENOMEM;
+   goto next_table;
+   }
 
ret = acpi_table_attr_init(tables_kobj,
   table_attr, table_header);
+   if (ret)
+   goto next_table;
+   list_add_tail(_attr->node, _table_attr_list);
+
+next_table:
+   acpi_put_table(table_header);
if (ret) {
-   kfree(table_attr);
+   if (table_attr) {
+   kfree(table_attr);
+   table_attr = NULL;
+   }
return ret;
}
-   list_add_tail(_attr->node, _table_attr_list);
}
 
kobject_uevent(tables_kobj, KOBJ_ADD);
-- 
2.7.4



[PATCH v3 1/4] ACPICA: Tables: Fix regression introduced by a too early mechanism enabling

2017-04-27 Thread Lv Zheng
In the Linux kernel side, acpi_get_table() clones haven't been fully
balanced by acpi_put_table() invocations. In ACPICA side, due to the design
change, there are also unbalanced acpi_get_table_by_index() invocations
requiring special care.

So it is not a good timing to report acpi_get_table() counting errors. The
strict balanced validation count check should only be enabled after
confirming that all invocations are safe and compliant to their designed
purposes.

Thus this patch removes the fatal error along with the error report to
fix this issue. Reported by Dan Williams, fixed by Lv Zheng.

Fixes: 174cc7187e6f ("ACPICA: Tables: Back port acpi_get_table_with_size() and 
early_acpi_os_unmap_memory() from Linux kernel")
Cc: Anush Seetharaman 
Cc: Dan Williams 
Reported-by: Anush Seetharaman 
Reported-by: Dan Williams 
Signed-off-by: Lv Zheng 
---
 drivers/acpi/acpica/tbutils.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 5a968a7..7abe665 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -418,11 +418,7 @@ acpi_tb_get_table(struct acpi_table_desc *table_desc,
 
table_desc->validation_count++;
if (table_desc->validation_count == 0) {
-   ACPI_ERROR((AE_INFO,
-   "Table %p, Validation count is zero after 
increment\n",
-   table_desc));
table_desc->validation_count--;
-   return_ACPI_STATUS(AE_LIMIT);
}
 
*out_table = table_desc->pointer;
-- 
2.7.4



[PATCH v3 1/4] ACPICA: Tables: Fix regression introduced by a too early mechanism enabling

2017-04-27 Thread Lv Zheng
In the Linux kernel side, acpi_get_table() clones haven't been fully
balanced by acpi_put_table() invocations. In ACPICA side, due to the design
change, there are also unbalanced acpi_get_table_by_index() invocations
requiring special care.

So it is not a good timing to report acpi_get_table() counting errors. The
strict balanced validation count check should only be enabled after
confirming that all invocations are safe and compliant to their designed
purposes.

Thus this patch removes the fatal error along with the error report to
fix this issue. Reported by Dan Williams, fixed by Lv Zheng.

Fixes: 174cc7187e6f ("ACPICA: Tables: Back port acpi_get_table_with_size() and 
early_acpi_os_unmap_memory() from Linux kernel")
Cc: Anush Seetharaman 
Cc: Dan Williams 
Reported-by: Anush Seetharaman 
Reported-by: Dan Williams 
Signed-off-by: Lv Zheng 
---
 drivers/acpi/acpica/tbutils.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 5a968a7..7abe665 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -418,11 +418,7 @@ acpi_tb_get_table(struct acpi_table_desc *table_desc,
 
table_desc->validation_count++;
if (table_desc->validation_count == 0) {
-   ACPI_ERROR((AE_INFO,
-   "Table %p, Validation count is zero after 
increment\n",
-   table_desc));
table_desc->validation_count--;
-   return_ACPI_STATUS(AE_LIMIT);
}
 
*out_table = table_desc->pointer;
-- 
2.7.4



[PATCH v3 2/4] ACPICA: Tables: Add mechanism to allow to balance late stage acpi_get_table() independently

2017-04-27 Thread Lv Zheng
For all frequent late stage acpi_get_table() clone invocations, we should
only fix them altogether, otherwise, excessive acpi_put_table() could
unexpectedly unmap the table used by the other users. Thus the current plan
is to fix all acpi_get_table() clones together or to fix none of them. This
prevents kernel developers from improving the late stage code quality
without waiting for the ACPICA upstream to improve first.

This patch adds a mechanism to stop decrementing validation count to
prevent the table unmapping operations so that acpi_put_table() balance
fixes can be done independently to each others.

Cc: Dan Williams 
Signed-off-by: Lv Zheng 
---
 drivers/acpi/acpica/tbutils.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 7abe665..b517bd0 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -445,12 +445,18 @@ void acpi_tb_put_table(struct acpi_table_desc *table_desc)
 
ACPI_FUNCTION_TRACE(acpi_tb_put_table);
 
-   if (table_desc->validation_count == 0) {
+   if ((table_desc->validation_count + 1) == 0) {
ACPI_WARNING((AE_INFO,
- "Table %p, Validation count is zero before 
decrement\n",
+ "Table %p, Validation count is about to expire, 
decrement is unsafe\n",
  table_desc));
return_VOID;
}
+   if (table_desc->validation_count == 0) {
+   ACPI_ERROR((AE_INFO,
+  "Table %p, Validation count is zero before 
decrement\n",
+  table_desc));
+   return_VOID;
+   }
table_desc->validation_count--;
 
if (table_desc->validation_count == 0) {
-- 
2.7.4



[PATCH v3 3/4] ACPI: sysfs: Fix acpi_get_table() leak

2017-04-27 Thread Lv Zheng
From: Dan Williams 

Reading an ACPI table through the /sys/firmware/acpi/tables interface
more than 65,536 times leads to the following log message:

 ACPI Error: Table 88033595eaa8, Validation count is zero after
increment
  (20170119/tbutils-423)

Add the missing acpi_put_table() so the table ->validation_count is
decremented after each read.

Reported-by: Anush Seetharaman 
Signed-off-by: Dan Williams 
Signed-off-by: Lv Zheng 
---
 drivers/acpi/sysfs.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 1b5ee1e..2bbf722 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -333,14 +333,17 @@ static ssize_t acpi_table_show(struct file *filp, struct 
kobject *kobj,
container_of(bin_attr, struct acpi_table_attr, attr);
struct acpi_table_header *table_header = NULL;
acpi_status status;
+   ssize_t len;
 
status = acpi_get_table(table_attr->name, table_attr->instance,
_header);
if (ACPI_FAILURE(status))
return -ENODEV;
 
-   return memory_read_from_buffer(buf, count, ,
-  table_header, table_header->length);
+   len = memory_read_from_buffer(buf, count, ,
+ table_header, table_header->length);
+   acpi_put_table(table_header);
+   return len;
 }
 
 static int acpi_table_attr_init(struct kobject *tables_obj,
-- 
2.7.4



[PATCH v3 2/4] ACPICA: Tables: Add mechanism to allow to balance late stage acpi_get_table() independently

2017-04-27 Thread Lv Zheng
For all frequent late stage acpi_get_table() clone invocations, we should
only fix them altogether, otherwise, excessive acpi_put_table() could
unexpectedly unmap the table used by the other users. Thus the current plan
is to fix all acpi_get_table() clones together or to fix none of them. This
prevents kernel developers from improving the late stage code quality
without waiting for the ACPICA upstream to improve first.

This patch adds a mechanism to stop decrementing validation count to
prevent the table unmapping operations so that acpi_put_table() balance
fixes can be done independently to each others.

Cc: Dan Williams 
Signed-off-by: Lv Zheng 
---
 drivers/acpi/acpica/tbutils.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 7abe665..b517bd0 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -445,12 +445,18 @@ void acpi_tb_put_table(struct acpi_table_desc *table_desc)
 
ACPI_FUNCTION_TRACE(acpi_tb_put_table);
 
-   if (table_desc->validation_count == 0) {
+   if ((table_desc->validation_count + 1) == 0) {
ACPI_WARNING((AE_INFO,
- "Table %p, Validation count is zero before 
decrement\n",
+ "Table %p, Validation count is about to expire, 
decrement is unsafe\n",
  table_desc));
return_VOID;
}
+   if (table_desc->validation_count == 0) {
+   ACPI_ERROR((AE_INFO,
+  "Table %p, Validation count is zero before 
decrement\n",
+  table_desc));
+   return_VOID;
+   }
table_desc->validation_count--;
 
if (table_desc->validation_count == 0) {
-- 
2.7.4



[PATCH v3 3/4] ACPI: sysfs: Fix acpi_get_table() leak

2017-04-27 Thread Lv Zheng
From: Dan Williams 

Reading an ACPI table through the /sys/firmware/acpi/tables interface
more than 65,536 times leads to the following log message:

 ACPI Error: Table 88033595eaa8, Validation count is zero after
increment
  (20170119/tbutils-423)

Add the missing acpi_put_table() so the table ->validation_count is
decremented after each read.

Reported-by: Anush Seetharaman 
Signed-off-by: Dan Williams 
Signed-off-by: Lv Zheng 
---
 drivers/acpi/sysfs.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 1b5ee1e..2bbf722 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -333,14 +333,17 @@ static ssize_t acpi_table_show(struct file *filp, struct 
kobject *kobj,
container_of(bin_attr, struct acpi_table_attr, attr);
struct acpi_table_header *table_header = NULL;
acpi_status status;
+   ssize_t len;
 
status = acpi_get_table(table_attr->name, table_attr->instance,
_header);
if (ACPI_FAILURE(status))
return -ENODEV;
 
-   return memory_read_from_buffer(buf, count, ,
-  table_header, table_header->length);
+   len = memory_read_from_buffer(buf, count, ,
+ table_header, table_header->length);
+   acpi_put_table(table_header);
+   return len;
 }
 
 static int acpi_table_attr_init(struct kobject *tables_obj,
-- 
2.7.4



[PATCH v2 1/4] ACPICA: Tables: Fix regression introduced by a too early mechanism enabling

2017-04-27 Thread Lv Zheng
In the Linux kernel side, acpi_get_table() clones haven't been fully
balanced by acpi_put_table() invocations. In ACPICA side, due to the design
change, there are also unbalanced acpi_get_table_by_index() invocations
requiring special care.

So it is not a good timing to report acpi_get_table() counting errors. The
strict balanced validation count check should only be enabled after
confirming that all invocations are safe and compliant to their designed
purposes.

Thus this patch removes the fatal error along with the error report to
fix this issue. Reported by Dan Williams, fixed by Lv Zheng.

Fixes: 174cc7187e6f ("ACPICA: Tables: Back port acpi_get_table_with_size() and 
early_acpi_os_unmap_memory() from Linux kernel")
Cc: Dan Williams 
Reported-by: Anush Seetharaman 
Reported-by: Dan Williams 
Signed-off-by: Lv Zheng 
---
 drivers/acpi/acpica/tbutils.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 5a968a7..7abe665 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -418,11 +418,7 @@ acpi_tb_get_table(struct acpi_table_desc *table_desc,
 
table_desc->validation_count++;
if (table_desc->validation_count == 0) {
-   ACPI_ERROR((AE_INFO,
-   "Table %p, Validation count is zero after 
increment\n",
-   table_desc));
table_desc->validation_count--;
-   return_ACPI_STATUS(AE_LIMIT);
}
 
*out_table = table_desc->pointer;
-- 
2.7.4



[PATCH v2 1/4] ACPICA: Tables: Fix regression introduced by a too early mechanism enabling

2017-04-27 Thread Lv Zheng
In the Linux kernel side, acpi_get_table() clones haven't been fully
balanced by acpi_put_table() invocations. In ACPICA side, due to the design
change, there are also unbalanced acpi_get_table_by_index() invocations
requiring special care.

So it is not a good timing to report acpi_get_table() counting errors. The
strict balanced validation count check should only be enabled after
confirming that all invocations are safe and compliant to their designed
purposes.

Thus this patch removes the fatal error along with the error report to
fix this issue. Reported by Dan Williams, fixed by Lv Zheng.

Fixes: 174cc7187e6f ("ACPICA: Tables: Back port acpi_get_table_with_size() and 
early_acpi_os_unmap_memory() from Linux kernel")
Cc: Dan Williams 
Reported-by: Anush Seetharaman 
Reported-by: Dan Williams 
Signed-off-by: Lv Zheng 
---
 drivers/acpi/acpica/tbutils.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 5a968a7..7abe665 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -418,11 +418,7 @@ acpi_tb_get_table(struct acpi_table_desc *table_desc,
 
table_desc->validation_count++;
if (table_desc->validation_count == 0) {
-   ACPI_ERROR((AE_INFO,
-   "Table %p, Validation count is zero after 
increment\n",
-   table_desc));
table_desc->validation_count--;
-   return_ACPI_STATUS(AE_LIMIT);
}
 
*out_table = table_desc->pointer;
-- 
2.7.4



Re: [PATCH] kexec: allocate buffer in top-down, if specified, correctly

2017-04-27 Thread Dave Young
Correct Vivek's email address
On 04/28/17 at 01:19pm, Dave Young wrote:
> Vivek, can you help to give some comments about the locate hole isssue
> in kexec_file?
> 
> On 04/28/17 at 09:51am, AKASHI Takahiro wrote:
> > Thiago,
> > 
> > Thank you for the comment.
> > 
> > On Thu, Apr 27, 2017 at 07:00:04PM -0300, Thiago Jung Bauermann wrote:
> > > Hello,
> > > 
> > > Am Mittwoch, 26. April 2017, 17:22:09 BRT schrieb AKASHI Takahiro:
> > > > The current kexec_locate_mem_hole(kbuf.top_down == 1) stops searching at
> > > > the first memory region that has enough space for requested size even if
> > > > some of higher regions may also have.
> > > 
> > > kexec_locate_mem_hole expects arch_kexec_walk_mem to walk memory from top 
> > > to 
> > > bottom if top_down is true. That is what powerpc's version does.
> > 
> > Ah, I haven't noticed that, but x86 doesn't have arch_kexec_walk_mem and
> > how can it work for x86?
> > 
> > > Isn't it possible to walk resources from top to bottom?
> > 
> > Yes, it will be, but it seems to me that such a behavior is not intuitive
> > and even confusing if it doesn't come with explicit explanation.
> 
> Thing need to make clear is why do we need the change, it might be a
> problem for crashkernel=xM,low since that is for softiotlb in case
> crashkernel=xM,high being used, otherwise seems current code is fine.
>  
> Need seeking for old memory from Vivek to confirm.
> > 
> > > > This behavior is not consistent with locate_hole(hole_end == -1) 
> > > > function
> > > > of kexec-tools.
> > > > 
> > > > This patch fixes the bug, going though all the memory regions anyway.
> > > 
> > > This patch would break powerpc, because at the end of the memory walk 
> > > kbuf 
> > > would have the lowest memory hole.
> > > 
> > > If it's not possible to walk resources in reverse order, then this patch 
> > > needs 
> > > to change powerpc to always walk memory from bottom to top.
> > 
> > So I would like to hear from x86 guys.
> > 
> > Thanks
> > -Takahiro AKASHI
> > 
> > > -- 
> > > Thiago Jung Bauermann
> > > IBM Linux Technology Center
> > > 


Re: [PATCH] kexec: allocate buffer in top-down, if specified, correctly

2017-04-27 Thread Dave Young
Correct Vivek's email address
On 04/28/17 at 01:19pm, Dave Young wrote:
> Vivek, can you help to give some comments about the locate hole isssue
> in kexec_file?
> 
> On 04/28/17 at 09:51am, AKASHI Takahiro wrote:
> > Thiago,
> > 
> > Thank you for the comment.
> > 
> > On Thu, Apr 27, 2017 at 07:00:04PM -0300, Thiago Jung Bauermann wrote:
> > > Hello,
> > > 
> > > Am Mittwoch, 26. April 2017, 17:22:09 BRT schrieb AKASHI Takahiro:
> > > > The current kexec_locate_mem_hole(kbuf.top_down == 1) stops searching at
> > > > the first memory region that has enough space for requested size even if
> > > > some of higher regions may also have.
> > > 
> > > kexec_locate_mem_hole expects arch_kexec_walk_mem to walk memory from top 
> > > to 
> > > bottom if top_down is true. That is what powerpc's version does.
> > 
> > Ah, I haven't noticed that, but x86 doesn't have arch_kexec_walk_mem and
> > how can it work for x86?
> > 
> > > Isn't it possible to walk resources from top to bottom?
> > 
> > Yes, it will be, but it seems to me that such a behavior is not intuitive
> > and even confusing if it doesn't come with explicit explanation.
> 
> Thing need to make clear is why do we need the change, it might be a
> problem for crashkernel=xM,low since that is for softiotlb in case
> crashkernel=xM,high being used, otherwise seems current code is fine.
>  
> Need seeking for old memory from Vivek to confirm.
> > 
> > > > This behavior is not consistent with locate_hole(hole_end == -1) 
> > > > function
> > > > of kexec-tools.
> > > > 
> > > > This patch fixes the bug, going though all the memory regions anyway.
> > > 
> > > This patch would break powerpc, because at the end of the memory walk 
> > > kbuf 
> > > would have the lowest memory hole.
> > > 
> > > If it's not possible to walk resources in reverse order, then this patch 
> > > needs 
> > > to change powerpc to always walk memory from bottom to top.
> > 
> > So I would like to hear from x86 guys.
> > 
> > Thanks
> > -Takahiro AKASHI
> > 
> > > -- 
> > > Thiago Jung Bauermann
> > > IBM Linux Technology Center
> > > 


Re: [PATCH v5 10/10] arm: dts: genmai: Add ethernet pin group

2017-04-27 Thread Simon Horman
On Thu, Apr 27, 2017 at 10:48:45AM +, Chris Brandt wrote:
> Hi Geert,
> 
> On Thursday, April 27, 2017, Geert Uytterhoeven wrote:
> > > + {
> > > +   pinctrl-names = "default";
> > > +   pinctrl-0 = <_pins>;
> > > +
> > > +   status = "okay";
> > > +
> > > +   renesas,no-ether-link;
> > > +   phy-handle = <>;
> > > +   phy0: ethernet-phy@0 {
> > > +   reg = <0>;
> > 
> > Shouldn't the interrupt (connected to P1_15) be described?
> 
> 
> That interrupt pin from the PHY is not used. It did not need to be connected.

So things are fine as above or should I expect to see v6?


Re: [PATCH v5 10/10] arm: dts: genmai: Add ethernet pin group

2017-04-27 Thread Simon Horman
On Thu, Apr 27, 2017 at 10:48:45AM +, Chris Brandt wrote:
> Hi Geert,
> 
> On Thursday, April 27, 2017, Geert Uytterhoeven wrote:
> > > + {
> > > +   pinctrl-names = "default";
> > > +   pinctrl-0 = <_pins>;
> > > +
> > > +   status = "okay";
> > > +
> > > +   renesas,no-ether-link;
> > > +   phy-handle = <>;
> > > +   phy0: ethernet-phy@0 {
> > > +   reg = <0>;
> > 
> > Shouldn't the interrupt (connected to P1_15) be described?
> 
> 
> That interrupt pin from the PHY is not used. It did not need to be connected.

So things are fine as above or should I expect to see v6?


Re: [PATCH v5 00/10] Renesas RZ/A1 pin and gpio controller

2017-04-27 Thread Simon Horman
On Thu, Apr 27, 2017 at 10:42:02AM +0200, Geert Uytterhoeven wrote:
> Hi Jacopo,
> 
> On Thu, Apr 27, 2017 at 10:19 AM, Jacopo Mondi
>  wrote:
> >this is 5th round of gpio/pincontroller for RZ/A1 devices.
> >
> > I have updated the pin controller driver to use the newly introduced
> > "pinctrl_enable()" function.
> > This is required since v4.11-rc7 as otherwise, as reported by Chris Brandt,
> > the pin controller does not start.
> >
> > I have incorporated your comments on the device tree bindings documentation,
> > and added to pinctrl-generic.h header file two macros to unpack generic
> > properties and their arguments.
> >
> > Tested with SCIF, RIIC, ETHER and gpio-leds on Genmai board.
> 
> Thanks for the update!
> 
> > Jacopo Mondi (10):
> >   pinctrl: generic: Add bi-directional and output-enable
> 
> Already applied by LinusW.
> 
> >   pinctrl: generic: Add macros to unpack properties
> 
> LinusW: do you want me to queue this together with the driver for v4.13,
> or will you take this single patch for v4.12?
> 
> >   pinctrl: Renesas RZ/A1 pin and gpio controller
> >   dt-bindings: pinctrl: Add RZ/A1 bindings doc
> 
> Will queue in sh-pfc-for-v4.13.
> 
> >   arm: dts: dt-bindings: Add Renesas RZ/A1 pinctrl header
> >   arm: dts: r7s72100: Add pin controller node
> >   arm: dts: genmai: Add SCIF2 pin group
> >   arm: dts: genmai: Add RIIC2 pin group
> >   arm: dts: genmai: Add user led device nodes
> >   arm: dts: genmai: Add ethernet pin group
> 
> These are for Simon.
> 
> Does applying the DTS changes before the driver introduce regressions?
> If no, Simon can queue them for v4.13.
> If yes, they'll have to wait for v4.14.

That is my question too.


Re: [PATCH v5 00/10] Renesas RZ/A1 pin and gpio controller

2017-04-27 Thread Simon Horman
On Thu, Apr 27, 2017 at 10:42:02AM +0200, Geert Uytterhoeven wrote:
> Hi Jacopo,
> 
> On Thu, Apr 27, 2017 at 10:19 AM, Jacopo Mondi
>  wrote:
> >this is 5th round of gpio/pincontroller for RZ/A1 devices.
> >
> > I have updated the pin controller driver to use the newly introduced
> > "pinctrl_enable()" function.
> > This is required since v4.11-rc7 as otherwise, as reported by Chris Brandt,
> > the pin controller does not start.
> >
> > I have incorporated your comments on the device tree bindings documentation,
> > and added to pinctrl-generic.h header file two macros to unpack generic
> > properties and their arguments.
> >
> > Tested with SCIF, RIIC, ETHER and gpio-leds on Genmai board.
> 
> Thanks for the update!
> 
> > Jacopo Mondi (10):
> >   pinctrl: generic: Add bi-directional and output-enable
> 
> Already applied by LinusW.
> 
> >   pinctrl: generic: Add macros to unpack properties
> 
> LinusW: do you want me to queue this together with the driver for v4.13,
> or will you take this single patch for v4.12?
> 
> >   pinctrl: Renesas RZ/A1 pin and gpio controller
> >   dt-bindings: pinctrl: Add RZ/A1 bindings doc
> 
> Will queue in sh-pfc-for-v4.13.
> 
> >   arm: dts: dt-bindings: Add Renesas RZ/A1 pinctrl header
> >   arm: dts: r7s72100: Add pin controller node
> >   arm: dts: genmai: Add SCIF2 pin group
> >   arm: dts: genmai: Add RIIC2 pin group
> >   arm: dts: genmai: Add user led device nodes
> >   arm: dts: genmai: Add ethernet pin group
> 
> These are for Simon.
> 
> Does applying the DTS changes before the driver introduce regressions?
> If no, Simon can queue them for v4.13.
> If yes, they'll have to wait for v4.14.

That is my question too.


Re: [PATCH v5 07/10] arm: dts: genmai: Add SCIF2 pin group

2017-04-27 Thread Simon Horman
On Thu, Apr 27, 2017 at 10:19:51AM +0200, Jacopo Mondi wrote:
> Add pin configuration subnode for SCIF2 serial debug interface.
> 
> Signed-off-by: Jacopo Mondi 
> Reviewed-by: Geert Uytterhoeven 

As the dt-bindings (documentation) has been acked by Geert I'd be happy
to queue up this and other "arm: dts: genmai" DT patches in this series
which do not have any outstanding review comments. Is it safe to do so
without the PFC driver patches in place?

> ---
>  arch/arm/boot/dts/r7s72100-genmai.dts | 12 
>  1 file changed, 12 insertions(+)
> 
> diff --git a/arch/arm/boot/dts/r7s72100-genmai.dts 
> b/arch/arm/boot/dts/r7s72100-genmai.dts
> index 118a8e2..c28d74b 100644
> --- a/arch/arm/boot/dts/r7s72100-genmai.dts
> +++ b/arch/arm/boot/dts/r7s72100-genmai.dts
> @@ -11,6 +11,7 @@
>  
>  /dts-v1/;
>  #include "r7s72100.dtsi"
> +#include 
>  
>  / {
>   model = "Genmai";
> @@ -36,6 +37,14 @@
>   };
>  };
>  
> + {
> +
> + scif2_pins: serial2 {
> + /* P3_0 as TxD2; P3_2 as RxD2 */
> + pinmux = , ;
> + };
> +};
> +
>  _clk {
>   clock-frequency = <1333>;
>  };
> @@ -60,6 +69,9 @@
>  };
>  
>   {
> + pinctrl-names = "default";
> + pinctrl-0 = <_pins>;
> +
>   status = "okay";
>  };
>  
> -- 
> 2.7.4
> 


Re: [PATCH v5 07/10] arm: dts: genmai: Add SCIF2 pin group

2017-04-27 Thread Simon Horman
On Thu, Apr 27, 2017 at 10:19:51AM +0200, Jacopo Mondi wrote:
> Add pin configuration subnode for SCIF2 serial debug interface.
> 
> Signed-off-by: Jacopo Mondi 
> Reviewed-by: Geert Uytterhoeven 

As the dt-bindings (documentation) has been acked by Geert I'd be happy
to queue up this and other "arm: dts: genmai" DT patches in this series
which do not have any outstanding review comments. Is it safe to do so
without the PFC driver patches in place?

> ---
>  arch/arm/boot/dts/r7s72100-genmai.dts | 12 
>  1 file changed, 12 insertions(+)
> 
> diff --git a/arch/arm/boot/dts/r7s72100-genmai.dts 
> b/arch/arm/boot/dts/r7s72100-genmai.dts
> index 118a8e2..c28d74b 100644
> --- a/arch/arm/boot/dts/r7s72100-genmai.dts
> +++ b/arch/arm/boot/dts/r7s72100-genmai.dts
> @@ -11,6 +11,7 @@
>  
>  /dts-v1/;
>  #include "r7s72100.dtsi"
> +#include 
>  
>  / {
>   model = "Genmai";
> @@ -36,6 +37,14 @@
>   };
>  };
>  
> + {
> +
> + scif2_pins: serial2 {
> + /* P3_0 as TxD2; P3_2 as RxD2 */
> + pinmux = , ;
> + };
> +};
> +
>  _clk {
>   clock-frequency = <1333>;
>  };
> @@ -60,6 +69,9 @@
>  };
>  
>   {
> + pinctrl-names = "default";
> + pinctrl-0 = <_pins>;
> +
>   status = "okay";
>  };
>  
> -- 
> 2.7.4
> 


Re: [PATCH] kexec: allocate buffer in top-down, if specified, correctly

2017-04-27 Thread Dave Young
Vivek, can you help to give some comments about the locate hole isssue
in kexec_file?

On 04/28/17 at 09:51am, AKASHI Takahiro wrote:
> Thiago,
> 
> Thank you for the comment.
> 
> On Thu, Apr 27, 2017 at 07:00:04PM -0300, Thiago Jung Bauermann wrote:
> > Hello,
> > 
> > Am Mittwoch, 26. April 2017, 17:22:09 BRT schrieb AKASHI Takahiro:
> > > The current kexec_locate_mem_hole(kbuf.top_down == 1) stops searching at
> > > the first memory region that has enough space for requested size even if
> > > some of higher regions may also have.
> > 
> > kexec_locate_mem_hole expects arch_kexec_walk_mem to walk memory from top 
> > to 
> > bottom if top_down is true. That is what powerpc's version does.
> 
> Ah, I haven't noticed that, but x86 doesn't have arch_kexec_walk_mem and
> how can it work for x86?
> 
> > Isn't it possible to walk resources from top to bottom?
> 
> Yes, it will be, but it seems to me that such a behavior is not intuitive
> and even confusing if it doesn't come with explicit explanation.

Thing need to make clear is why do we need the change, it might be a
problem for crashkernel=xM,low since that is for softiotlb in case
crashkernel=xM,high being used, otherwise seems current code is fine.
 
Need seeking for old memory from Vivek to confirm.
> 
> > > This behavior is not consistent with locate_hole(hole_end == -1) function
> > > of kexec-tools.
> > > 
> > > This patch fixes the bug, going though all the memory regions anyway.
> > 
> > This patch would break powerpc, because at the end of the memory walk kbuf 
> > would have the lowest memory hole.
> > 
> > If it's not possible to walk resources in reverse order, then this patch 
> > needs 
> > to change powerpc to always walk memory from bottom to top.
> 
> So I would like to hear from x86 guys.
> 
> Thanks
> -Takahiro AKASHI
> 
> > -- 
> > Thiago Jung Bauermann
> > IBM Linux Technology Center
> > 


Re: [PATCH] kexec: allocate buffer in top-down, if specified, correctly

2017-04-27 Thread Dave Young
Vivek, can you help to give some comments about the locate hole isssue
in kexec_file?

On 04/28/17 at 09:51am, AKASHI Takahiro wrote:
> Thiago,
> 
> Thank you for the comment.
> 
> On Thu, Apr 27, 2017 at 07:00:04PM -0300, Thiago Jung Bauermann wrote:
> > Hello,
> > 
> > Am Mittwoch, 26. April 2017, 17:22:09 BRT schrieb AKASHI Takahiro:
> > > The current kexec_locate_mem_hole(kbuf.top_down == 1) stops searching at
> > > the first memory region that has enough space for requested size even if
> > > some of higher regions may also have.
> > 
> > kexec_locate_mem_hole expects arch_kexec_walk_mem to walk memory from top 
> > to 
> > bottom if top_down is true. That is what powerpc's version does.
> 
> Ah, I haven't noticed that, but x86 doesn't have arch_kexec_walk_mem and
> how can it work for x86?
> 
> > Isn't it possible to walk resources from top to bottom?
> 
> Yes, it will be, but it seems to me that such a behavior is not intuitive
> and even confusing if it doesn't come with explicit explanation.

Thing need to make clear is why do we need the change, it might be a
problem for crashkernel=xM,low since that is for softiotlb in case
crashkernel=xM,high being used, otherwise seems current code is fine.
 
Need seeking for old memory from Vivek to confirm.
> 
> > > This behavior is not consistent with locate_hole(hole_end == -1) function
> > > of kexec-tools.
> > > 
> > > This patch fixes the bug, going though all the memory regions anyway.
> > 
> > This patch would break powerpc, because at the end of the memory walk kbuf 
> > would have the lowest memory hole.
> > 
> > If it's not possible to walk resources in reverse order, then this patch 
> > needs 
> > to change powerpc to always walk memory from bottom to top.
> 
> So I would like to hear from x86 guys.
> 
> Thanks
> -Takahiro AKASHI
> 
> > -- 
> > Thiago Jung Bauermann
> > IBM Linux Technology Center
> > 


Re: [PATCH v5 05/10] arm: dts: dt-bindings: Add Renesas RZ/A1 pinctrl header

2017-04-27 Thread Simon Horman
On Thu, Apr 27, 2017 at 10:38:39AM +0200, Geert Uytterhoeven wrote:
> On Thu, Apr 27, 2017 at 10:19 AM, Jacopo Mondi
>  wrote:
> > Add dt-bindings for Renesas r7s72100 pin controller header file.
> >
> > Signed-off-by: Jacopo Mondi 
> 
> Reviewed-by: Geert Uytterhoeven 

Thanks, I have queued this up.


Re: [PATCH v5 05/10] arm: dts: dt-bindings: Add Renesas RZ/A1 pinctrl header

2017-04-27 Thread Simon Horman
On Thu, Apr 27, 2017 at 10:38:39AM +0200, Geert Uytterhoeven wrote:
> On Thu, Apr 27, 2017 at 10:19 AM, Jacopo Mondi
>  wrote:
> > Add dt-bindings for Renesas r7s72100 pin controller header file.
> >
> > Signed-off-by: Jacopo Mondi 
> 
> Reviewed-by: Geert Uytterhoeven 

Thanks, I have queued this up.


[PATCH] mnt: allow to add a mount into an existing group

2017-04-27 Thread Andrei Vagin
Now a shared group can be only inherited from a source mount.
This patch adds an ability to add a mount into an existing shared
group.

mount(source, target, NULL, MS_SET_GROUP, NULL)

mount() with the MS_SET_GROUP flag adds the "target" mount into a group
of the "source" mount. The calling process has to have the CAP_SYS_ADMIN
capability in namespaces of these mounts. The source and the target
mounts have to have the same super block.

This new functionality together with "mnt: Tuck mounts under others
instead of creating shadow/side mounts." allows CRIU to dump and restore
any set of mount namespaces.

Currently we have a lot of issues about dumping and restoring mount
namespaces. The bigest problem is that we can't construct mount trees
directly due to several reasons:
* groups can't be set, they can be only inherited
* file systems has to be mounted from the specified user namespaces
* the mount() syscall doesn't just create one mount -- the mount is
  also propagated to all members of a parent group
* umount() doesn't detach mounts from all members of a group
  (mounts with children are not umounted)
* mounts are propagated underneath of existing mounts
* mount() doesn't allow to make bind-mounts between two namespaces
* processes can have opened file descriptors to overmounted files

All these operations are non-trivial, making the task of restoring
a mount namespace practically unsolvable for reasonable time. The
proposed change allows to restore a mount namespace in a direct
manner, without any super complex logic.

Cc: Eric W. Biederman 
Cc: Alexander Viro 
Signed-off-by: Andrei Vagin 
---
 fs/namespace.c  | 66 ++---
 include/uapi/linux/fs.h |  6 +
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index cc1375ef..3bf0cd2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2355,6 +2355,57 @@ static inline int tree_contains_unbindable(struct mount 
*mnt)
return 0;
 }
 
+static int do_set_group(struct path *path, const char *sibling_name)
+{
+   struct mount *sibling, *mnt;
+   struct path sibling_path;
+   int err;
+
+   if (!sibling_name || !*sibling_name)
+   return -EINVAL;
+
+   err = kern_path(sibling_name, LOOKUP_FOLLOW, _path);
+   if (err)
+   return err;
+
+   sibling = real_mount(sibling_path.mnt);
+   mnt = real_mount(path->mnt);
+
+   namespace_lock();
+
+   err = -EPERM;
+   if (!sibling->mnt_ns ||
+   !ns_capable(sibling->mnt_ns->user_ns, CAP_SYS_ADMIN))
+   goto out_unlock;
+
+   err = -EINVAL;
+   if (sibling->mnt.mnt_sb != mnt->mnt.mnt_sb)
+   goto out_unlock;
+
+   if (IS_MNT_SHARED(mnt) || IS_MNT_SLAVE(mnt))
+   goto out_unlock;
+
+   if (IS_MNT_SLAVE(sibling)) {
+   struct mount *m = sibling->mnt_master;
+
+   list_add(>mnt_slave, >mnt_slave_list);
+   mnt->mnt_master = m;
+   }
+
+   if (IS_MNT_SHARED(sibling)) {
+   mnt->mnt_group_id = sibling->mnt_group_id;
+   list_add(>mnt_share, >mnt_share);
+   set_mnt_shared(mnt);
+   }
+
+   err = 0;
+out_unlock:
+   namespace_unlock();
+
+   path_put(_path);
+   return err;
+}
+
 static int do_move_mount(struct path *path, const char *old_name)
 {
struct path old_path, parent_path;
@@ -2769,6 +2820,7 @@ long do_mount(const char *dev_name, const char __user 
*dir_name,
struct path path;
int retval = 0;
int mnt_flags = 0;
+   unsigned long cmd;
 
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
@@ -2820,19 +2872,25 @@ long do_mount(const char *dev_name, const char __user 
*dir_name,
mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
}
 
+   cmd = flags & (MS_REMOUNT | MS_BIND |
+  MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE |
+  MS_MOVE | MS_SET_GROUP);
+
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
   MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT);
 
-   if (flags & MS_REMOUNT)
+   if (cmd & MS_REMOUNT)
retval = do_remount(, flags & ~MS_REMOUNT, mnt_flags,
data_page);
-   else if (flags & MS_BIND)
+   else if (cmd & MS_BIND)
retval = do_loopback(, dev_name, flags & MS_REC);
-   else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
+   else if (cmd & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(, flags);
-   else if (flags & MS_MOVE)
+   else if (cmd & MS_MOVE)
retval = do_move_mount(, dev_name);
+   

[PATCH] mnt: allow to add a mount into an existing group

2017-04-27 Thread Andrei Vagin
Now a shared group can be only inherited from a source mount.
This patch adds an ability to add a mount into an existing shared
group.

mount(source, target, NULL, MS_SET_GROUP, NULL)

mount() with the MS_SET_GROUP flag adds the "target" mount into a group
of the "source" mount. The calling process has to have the CAP_SYS_ADMIN
capability in namespaces of these mounts. The source and the target
mounts have to have the same super block.

This new functionality together with "mnt: Tuck mounts under others
instead of creating shadow/side mounts." allows CRIU to dump and restore
any set of mount namespaces.

Currently we have a lot of issues about dumping and restoring mount
namespaces. The bigest problem is that we can't construct mount trees
directly due to several reasons:
* groups can't be set, they can be only inherited
* file systems has to be mounted from the specified user namespaces
* the mount() syscall doesn't just create one mount -- the mount is
  also propagated to all members of a parent group
* umount() doesn't detach mounts from all members of a group
  (mounts with children are not umounted)
* mounts are propagated underneath of existing mounts
* mount() doesn't allow to make bind-mounts between two namespaces
* processes can have opened file descriptors to overmounted files

All these operations are non-trivial, making the task of restoring
a mount namespace practically unsolvable for reasonable time. The
proposed change allows to restore a mount namespace in a direct
manner, without any super complex logic.

Cc: Eric W. Biederman 
Cc: Alexander Viro 
Signed-off-by: Andrei Vagin 
---
 fs/namespace.c  | 66 ++---
 include/uapi/linux/fs.h |  6 +
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index cc1375ef..3bf0cd2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2355,6 +2355,57 @@ static inline int tree_contains_unbindable(struct mount 
*mnt)
return 0;
 }
 
+static int do_set_group(struct path *path, const char *sibling_name)
+{
+   struct mount *sibling, *mnt;
+   struct path sibling_path;
+   int err;
+
+   if (!sibling_name || !*sibling_name)
+   return -EINVAL;
+
+   err = kern_path(sibling_name, LOOKUP_FOLLOW, _path);
+   if (err)
+   return err;
+
+   sibling = real_mount(sibling_path.mnt);
+   mnt = real_mount(path->mnt);
+
+   namespace_lock();
+
+   err = -EPERM;
+   if (!sibling->mnt_ns ||
+   !ns_capable(sibling->mnt_ns->user_ns, CAP_SYS_ADMIN))
+   goto out_unlock;
+
+   err = -EINVAL;
+   if (sibling->mnt.mnt_sb != mnt->mnt.mnt_sb)
+   goto out_unlock;
+
+   if (IS_MNT_SHARED(mnt) || IS_MNT_SLAVE(mnt))
+   goto out_unlock;
+
+   if (IS_MNT_SLAVE(sibling)) {
+   struct mount *m = sibling->mnt_master;
+
+   list_add(>mnt_slave, >mnt_slave_list);
+   mnt->mnt_master = m;
+   }
+
+   if (IS_MNT_SHARED(sibling)) {
+   mnt->mnt_group_id = sibling->mnt_group_id;
+   list_add(>mnt_share, >mnt_share);
+   set_mnt_shared(mnt);
+   }
+
+   err = 0;
+out_unlock:
+   namespace_unlock();
+
+   path_put(_path);
+   return err;
+}
+
 static int do_move_mount(struct path *path, const char *old_name)
 {
struct path old_path, parent_path;
@@ -2769,6 +2820,7 @@ long do_mount(const char *dev_name, const char __user 
*dir_name,
struct path path;
int retval = 0;
int mnt_flags = 0;
+   unsigned long cmd;
 
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
@@ -2820,19 +2872,25 @@ long do_mount(const char *dev_name, const char __user 
*dir_name,
mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
}
 
+   cmd = flags & (MS_REMOUNT | MS_BIND |
+  MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE |
+  MS_MOVE | MS_SET_GROUP);
+
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
   MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT);
 
-   if (flags & MS_REMOUNT)
+   if (cmd & MS_REMOUNT)
retval = do_remount(, flags & ~MS_REMOUNT, mnt_flags,
data_page);
-   else if (flags & MS_BIND)
+   else if (cmd & MS_BIND)
retval = do_loopback(, dev_name, flags & MS_REC);
-   else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
+   else if (cmd & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(, flags);
-   else if (flags & MS_MOVE)
+   else if (cmd & MS_MOVE)
retval = do_move_mount(, dev_name);
+   else if (cmd & MS_SET_GROUP)
+   retval = do_set_group(, 

Re: [PATCH v2 pci] PCI/MSI: pci-xgene-msi: Enable MSI support in ACPI boot for X-Gene v1

2017-04-27 Thread Jon Masters
On 04/27/2017 08:54 PM, Khuong Dinh wrote:
> From: Khuong Dinh 
> 
> This patch makes pci-xgene-msi driver ACPI-aware and provides
> MSI capability for X-Gene v1 PCIe controllers in ACPI boot mode.
> 
> Signed-off-by: Khuong Dinh 
> Signed-off-by: Duc Dang 
> Acked-by: Marc Zyngier 

Thanks. Currently testing. Will review and followup shortly.

Jon.

-- 
Computer Architect | Sent from my Fedora powered laptop


Re: [PATCH v2 pci] PCI/MSI: pci-xgene-msi: Enable MSI support in ACPI boot for X-Gene v1

2017-04-27 Thread Jon Masters
On 04/27/2017 08:54 PM, Khuong Dinh wrote:
> From: Khuong Dinh 
> 
> This patch makes pci-xgene-msi driver ACPI-aware and provides
> MSI capability for X-Gene v1 PCIe controllers in ACPI boot mode.
> 
> Signed-off-by: Khuong Dinh 
> Signed-off-by: Duc Dang 
> Acked-by: Marc Zyngier 

Thanks. Currently testing. Will review and followup shortly.

Jon.

-- 
Computer Architect | Sent from my Fedora powered laptop


Re: [PATCH] xen/x86: Call xen_smp_intr_init_pv() on BSP

2017-04-27 Thread Juergen Gross
On 26/04/17 15:42, Boris Ostrovsky wrote:
> Recent code rework that split handling ov PV, HVM and PVH guests into
> separate files missed calling xen_smp_intr_init_pv() on CPU0.
> 
> Add this call.
> 
> Signed-off-by: Boris Ostrovsky 
> Reported-by: Sander Eikelenboom 

Reviewed-by: Juergen Gross 


Thanks,

Juergen



Re: [PATCH] xen/x86: Call xen_smp_intr_init_pv() on BSP

2017-04-27 Thread Juergen Gross
On 26/04/17 15:42, Boris Ostrovsky wrote:
> Recent code rework that split handling ov PV, HVM and PVH guests into
> separate files missed calling xen_smp_intr_init_pv() on CPU0.
> 
> Add this call.
> 
> Signed-off-by: Boris Ostrovsky 
> Reported-by: Sander Eikelenboom 

Reviewed-by: Juergen Gross 


Thanks,

Juergen



Re: [PATCH V4 1/9] PM / OPP: Allow OPP table to be used for power-domains

2017-04-27 Thread Viresh Kumar
On 27-04-17, 16:20, Rajendra Nayak wrote:
> 
> On 04/27/2017 03:12 PM, Sudeep Holla wrote:
> []..
> 
> >>
> >>> At qualcomm, we have an external M3 core (running its own firmware) which 
> >>> controls
> >>> a few voltage rails (including AVS on those). The devices vote for the 
> >>> voltage levels
> > 
> > Thanks for explicitly mentioning this, but ...
> > 
> >>> (or performance levels) they need by passing an integer value to the M3 
> >>> (not actual
> > 
> > you contradict here, is it just voltage or performance(i.e. frequency)
> > or both ? We need clarity there to choose the right representation.
> 
> Its just voltage.

Right. Its just voltage in this case, but we can't speak of future
platforms here and we have to consider this thing as an operating
performance point only. I still think that this thread is moving in
the right direction, specially after V6 which looks much better.

If we have anything strong against the way V6 is trying to solve it, I
want to talk about it right now and get inputs from all the parties
involved. Scrapping all this work is fine, but I would like to do it
ASAP in that case :)

-- 
viresh


Re: [PATCH V4 1/9] PM / OPP: Allow OPP table to be used for power-domains

2017-04-27 Thread Viresh Kumar
On 27-04-17, 16:20, Rajendra Nayak wrote:
> 
> On 04/27/2017 03:12 PM, Sudeep Holla wrote:
> []..
> 
> >>
> >>> At qualcomm, we have an external M3 core (running its own firmware) which 
> >>> controls
> >>> a few voltage rails (including AVS on those). The devices vote for the 
> >>> voltage levels
> > 
> > Thanks for explicitly mentioning this, but ...
> > 
> >>> (or performance levels) they need by passing an integer value to the M3 
> >>> (not actual
> > 
> > you contradict here, is it just voltage or performance(i.e. frequency)
> > or both ? We need clarity there to choose the right representation.
> 
> Its just voltage.

Right. Its just voltage in this case, but we can't speak of future
platforms here and we have to consider this thing as an operating
performance point only. I still think that this thread is moving in
the right direction, specially after V6 which looks much better.

If we have anything strong against the way V6 is trying to solve it, I
want to talk about it right now and get inputs from all the parties
involved. Scrapping all this work is fine, but I would like to do it
ASAP in that case :)

-- 
viresh


[PATCH] MAINTAINERS: Update MAX77802 PMIC entry

2017-04-27 Thread Javier Martinez Canillas
The Samsung email address will stop working soon, so use my personal
email address instead.

Also, there used to be a MFD and RTC drivers for max77802 but now these
have been merged with the max77686 MFD and RTC drivers. The only driver
that's still max77802 specific is the regulator one since there are too
many differences with max77686 there to justify merging the two drivers.

Signed-off-by: Javier Martinez Canillas 

---

 MAINTAINERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e35370a0c04c..5e19e6913682 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8069,11 +8069,11 @@ S:  Supported
 F: drivers/power/supply/max14577_charger.c
 F: drivers/power/supply/max77693_charger.c
 
-MAXIM MAX77802 MULTIFUNCTION PMIC DEVICE DRIVERS
-M: Javier Martinez Canillas 
+MAXIM MAX77802 PMIC REGULATOR DEVICE DRIVER
+M: Javier Martinez Canillas 
 L: linux-kernel@vger.kernel.org
 S: Supported
-F: drivers/*/*max77802*.c
+F: drivers/regulator/max77802-regulator.c
 F: Documentation/devicetree/bindings/*/*max77802.txt
 F: include/dt-bindings/*/*max77802.h
 
-- 
2.9.3



[PATCH] MAINTAINERS: Update MAX77802 PMIC entry

2017-04-27 Thread Javier Martinez Canillas
The Samsung email address will stop working soon, so use my personal
email address instead.

Also, there used to be a MFD and RTC drivers for max77802 but now these
have been merged with the max77686 MFD and RTC drivers. The only driver
that's still max77802 specific is the regulator one since there are too
many differences with max77686 there to justify merging the two drivers.

Signed-off-by: Javier Martinez Canillas 

---

 MAINTAINERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e35370a0c04c..5e19e6913682 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8069,11 +8069,11 @@ S:  Supported
 F: drivers/power/supply/max14577_charger.c
 F: drivers/power/supply/max77693_charger.c
 
-MAXIM MAX77802 MULTIFUNCTION PMIC DEVICE DRIVERS
-M: Javier Martinez Canillas 
+MAXIM MAX77802 PMIC REGULATOR DEVICE DRIVER
+M: Javier Martinez Canillas 
 L: linux-kernel@vger.kernel.org
 S: Supported
-F: drivers/*/*max77802*.c
+F: drivers/regulator/max77802-regulator.c
 F: Documentation/devicetree/bindings/*/*max77802.txt
 F: include/dt-bindings/*/*max77802.h
 
-- 
2.9.3



[PATCH v2 4/4] perf vendor events arm64: Add implementation defined pmu core events of ThunderX2

2017-04-27 Thread Ganapatrao Kulkarni
This is not a full event list, but a short list of useful events.

Signed-off-by: Ganapatrao Kulkarni 
---
 tools/perf/pmu-events/arch/arm64/mapfile.csv   | 14 +
 .../arm64/thunderx2/implementation-defined.json| 62 ++
 2 files changed, 76 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/arm64/mapfile.csv
 create mode 100644 
tools/perf/pmu-events/arch/arm64/thunderx2/implementation-defined.json

diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv 
b/tools/perf/pmu-events/arch/arm64/mapfile.csv
new file mode 100644
index 000..bc9f798
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -0,0 +1,14 @@
+# Format:
+#  MIDR,Version,JSON/file/pathname,Type
+#
+# where
+#  MIDRProcessor version
+#  Version could be used to track version of of JSON file
+#  but currently unused.
+#  JSON/file/pathname is the path to JSON file, relative
+#  to tools/perf/pmu-events/arch/arm64/.
+#  Type is core, uncore etc
+#
+#
+#Family-model,Version,Filename,EventType
+0x420f5161,v1,thunderx2,core
diff --git 
a/tools/perf/pmu-events/arch/arm64/thunderx2/implementation-defined.json 
b/tools/perf/pmu-events/arch/arm64/thunderx2/implementation-defined.json
new file mode 100644
index 000..cdb03db
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/thunderx2/implementation-defined.json
@@ -0,0 +1,62 @@
+[
+{
+"PublicDescription": "Attributable Level 1 data cache access, read",
+"EventCode": "0x40",
+"EventName": "l1d_cache_access_read",
+"BriefDescription": "l1d cache access, read",
+},
+{
+"PublicDescription": "Attributable Level 1 data cache access, write ",
+"EventCode": "0x41",
+"EventName": "l1d_cache_access_write",
+"BriefDescription": "l1d cache access, write",
+},
+{
+"PublicDescription": "Attributable Level 1 data cache refill, read",
+"EventCode": "0x42",
+"EventName": "l1d_cache_refill_read",
+"BriefDescription": "l1d cache refill, read",
+},
+{
+"PublicDescription": "Attributable Level 1 data cache refill, write",
+"EventCode": "0x43",
+"EventName": "l1d_cache_refill_write",
+"BriefDescription": "l1d refill, write",
+},
+{
+"PublicDescription": "Attributable Level 1 data TLB refill, read",
+"EventCode": "0x4C",
+"EventName": "l1d_tlb_refill_read",
+"BriefDescription": "l1d tlb refill, read",
+},
+{
+"PublicDescription": "Attributable Level 1 data TLB refill, write",
+"EventCode": "0x4D",
+"EventName": "l1d_tlb_refill_write",
+"BriefDescription": "l1d tlb refill, write",
+},
+{
+"PublicDescription": "Attributable Level 1 data or unified TLB access, 
read",
+"EventCode": "0x4E",
+"EventName": "l1d_tlb_read",
+"BriefDescription": "l1d tlb, read",
+},
+{
+"PublicDescription": "Attributable Level 1 data or unified TLB access, 
write",
+"EventCode": "0x4F",
+"EventName": "l1d_tlb_write",
+"BriefDescription": "l1d tlb, write",
+},
+{
+"PublicDescription": "Bus access, read",
+"EventCode": "0x60",
+"EventName": "bus_access_read",
+"BriefDescription": "Bus access, read",
+   },
+   {
+"PublicDescription": "Bus access, write",
+"EventCode": "0x61",
+"EventName": "bus_access_write",
+"BriefDescription": "Bus access, write",
+   }
+]
-- 
1.8.1.4



[PATCH v2 3/4] perf utils: Add helper function is_pmu_core to detect PMU CORE devices

2017-04-27 Thread Ganapatrao Kulkarni
On some platforms, PMU core devices sysfs name is not cpu.
Adding function is_pmu_core to detect as core device using
core device specific hints in sysfs.

For arm64 platforms, all core devices have file "cpus" in sysfs.

Signed-off-by: Ganapatrao Kulkarni 
---
 tools/perf/util/pmu.c | 44 
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index c754805..48333fe 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -499,6 +499,39 @@ static struct cpu_map *pmu_cpumask(const char *name)
 }
 
 /*
+ *  PMU CORE devices have different name other than cpu in sysfs on some
+ *  platforms. looking for possible sysfs files to identify as core device.
+ */
+static int is_pmu_core(const char *name)
+{
+   struct stat st;
+   char path[PATH_MAX];
+   const char *sysfs = sysfs__mountpoint();
+   const char **template;
+   const char *templates[] = {
+"%s/bus/event_source/devices/%s/cpus",
+NULL
+   };
+
+   if (!sysfs)
+   return 0;
+
+   /* Look for cpu sysfs */
+   snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs);
+   if ((stat(path, ) == 0) &&
+   (strncmp(name, "cpu", strlen("cpu")) == 0))
+   return 1;
+
+   for (template = templates; *template; template++) {
+   snprintf(path, PATH_MAX, *template, sysfs, name);
+   if (stat(path, ) == 0)
+   return 1;
+   }
+
+   return 0;
+}
+
+/*
  * Return the CPU id as a raw string.
  *
  * Each architecture should provide a more precise id string that
@@ -552,15 +585,18 @@ static void pmu_add_cpu_aliases(struct list_head *head, 
const char *name,
 */
i = 0;
while (1) {
-   const char *pname;
 
pe = >table[i++];
if (!pe->name)
break;
 
-   pname = pe->pmu ? pe->pmu : "cpu";
-   if (strncmp(pname, name, strlen(pname)))
-   continue;
+   if (!is_pmu_core(name)) {
+   /* check for uncore devices */
+   if (pe->pmu == NULL)
+   continue;
+   if (strncmp(pe->pmu, name, strlen(pe->pmu)))
+   continue;
+   }
 
/* need type casts to override 'const' */
__perf_pmu__new_alias(head, NULL, (char *)pe->name,
-- 
1.8.1.4



[PATCH v2 4/4] perf vendor events arm64: Add implementation defined pmu core events of ThunderX2

2017-04-27 Thread Ganapatrao Kulkarni
This is not a full event list, but a short list of useful events.

Signed-off-by: Ganapatrao Kulkarni 
---
 tools/perf/pmu-events/arch/arm64/mapfile.csv   | 14 +
 .../arm64/thunderx2/implementation-defined.json| 62 ++
 2 files changed, 76 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/arm64/mapfile.csv
 create mode 100644 
tools/perf/pmu-events/arch/arm64/thunderx2/implementation-defined.json

diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv 
b/tools/perf/pmu-events/arch/arm64/mapfile.csv
new file mode 100644
index 000..bc9f798
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -0,0 +1,14 @@
+# Format:
+#  MIDR,Version,JSON/file/pathname,Type
+#
+# where
+#  MIDRProcessor version
+#  Version could be used to track version of of JSON file
+#  but currently unused.
+#  JSON/file/pathname is the path to JSON file, relative
+#  to tools/perf/pmu-events/arch/arm64/.
+#  Type is core, uncore etc
+#
+#
+#Family-model,Version,Filename,EventType
+0x420f5161,v1,thunderx2,core
diff --git 
a/tools/perf/pmu-events/arch/arm64/thunderx2/implementation-defined.json 
b/tools/perf/pmu-events/arch/arm64/thunderx2/implementation-defined.json
new file mode 100644
index 000..cdb03db
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/thunderx2/implementation-defined.json
@@ -0,0 +1,62 @@
+[
+{
+"PublicDescription": "Attributable Level 1 data cache access, read",
+"EventCode": "0x40",
+"EventName": "l1d_cache_access_read",
+"BriefDescription": "l1d cache access, read",
+},
+{
+"PublicDescription": "Attributable Level 1 data cache access, write ",
+"EventCode": "0x41",
+"EventName": "l1d_cache_access_write",
+"BriefDescription": "l1d cache access, write",
+},
+{
+"PublicDescription": "Attributable Level 1 data cache refill, read",
+"EventCode": "0x42",
+"EventName": "l1d_cache_refill_read",
+"BriefDescription": "l1d cache refill, read",
+},
+{
+"PublicDescription": "Attributable Level 1 data cache refill, write",
+"EventCode": "0x43",
+"EventName": "l1d_cache_refill_write",
+"BriefDescription": "l1d refill, write",
+},
+{
+"PublicDescription": "Attributable Level 1 data TLB refill, read",
+"EventCode": "0x4C",
+"EventName": "l1d_tlb_refill_read",
+"BriefDescription": "l1d tlb refill, read",
+},
+{
+"PublicDescription": "Attributable Level 1 data TLB refill, write",
+"EventCode": "0x4D",
+"EventName": "l1d_tlb_refill_write",
+"BriefDescription": "l1d tlb refill, write",
+},
+{
+"PublicDescription": "Attributable Level 1 data or unified TLB access, 
read",
+"EventCode": "0x4E",
+"EventName": "l1d_tlb_read",
+"BriefDescription": "l1d tlb, read",
+},
+{
+"PublicDescription": "Attributable Level 1 data or unified TLB access, 
write",
+"EventCode": "0x4F",
+"EventName": "l1d_tlb_write",
+"BriefDescription": "l1d tlb, write",
+},
+{
+"PublicDescription": "Bus access, read",
+"EventCode": "0x60",
+"EventName": "bus_access_read",
+"BriefDescription": "Bus access, read",
+   },
+   {
+"PublicDescription": "Bus access, write",
+"EventCode": "0x61",
+"EventName": "bus_access_write",
+"BriefDescription": "Bus access, write",
+   }
+]
-- 
1.8.1.4



[PATCH v2 3/4] perf utils: Add helper function is_pmu_core to detect PMU CORE devices

2017-04-27 Thread Ganapatrao Kulkarni
On some platforms, PMU core devices sysfs name is not cpu.
Adding function is_pmu_core to detect as core device using
core device specific hints in sysfs.

For arm64 platforms, all core devices have file "cpus" in sysfs.

Signed-off-by: Ganapatrao Kulkarni 
---
 tools/perf/util/pmu.c | 44 
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index c754805..48333fe 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -499,6 +499,39 @@ static struct cpu_map *pmu_cpumask(const char *name)
 }
 
 /*
+ *  PMU CORE devices have different name other than cpu in sysfs on some
+ *  platforms. looking for possible sysfs files to identify as core device.
+ */
+static int is_pmu_core(const char *name)
+{
+   struct stat st;
+   char path[PATH_MAX];
+   const char *sysfs = sysfs__mountpoint();
+   const char **template;
+   const char *templates[] = {
+"%s/bus/event_source/devices/%s/cpus",
+NULL
+   };
+
+   if (!sysfs)
+   return 0;
+
+   /* Look for cpu sysfs */
+   snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs);
+   if ((stat(path, ) == 0) &&
+   (strncmp(name, "cpu", strlen("cpu")) == 0))
+   return 1;
+
+   for (template = templates; *template; template++) {
+   snprintf(path, PATH_MAX, *template, sysfs, name);
+   if (stat(path, ) == 0)
+   return 1;
+   }
+
+   return 0;
+}
+
+/*
  * Return the CPU id as a raw string.
  *
  * Each architecture should provide a more precise id string that
@@ -552,15 +585,18 @@ static void pmu_add_cpu_aliases(struct list_head *head, 
const char *name,
 */
i = 0;
while (1) {
-   const char *pname;
 
pe = >table[i++];
if (!pe->name)
break;
 
-   pname = pe->pmu ? pe->pmu : "cpu";
-   if (strncmp(pname, name, strlen(pname)))
-   continue;
+   if (!is_pmu_core(name)) {
+   /* check for uncore devices */
+   if (pe->pmu == NULL)
+   continue;
+   if (strncmp(pe->pmu, name, strlen(pe->pmu)))
+   continue;
+   }
 
/* need type casts to override 'const' */
__perf_pmu__new_alias(head, NULL, (char *)pe->name,
-- 
1.8.1.4



[PATCH v2 1/4] perf utils: passing pmu as a parameter to function get_cpuid_str

2017-04-27 Thread Ganapatrao Kulkarni
cpuid string will not be same on all CPUs on heterogeneous
platforms like ARM's big.LITTLE, adding provision(using pmu->cpus)
to find cpuid string from associated CPUs of PMU CORE device.

Signed-off-by: Ganapatrao Kulkarni 
---
 tools/perf/arch/powerpc/util/header.c |  3 ++-
 tools/perf/arch/x86/util/header.c |  4 +++-
 tools/perf/util/header.h  |  3 ++-
 tools/perf/util/pmu.c | 10 ++
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/header.c 
b/tools/perf/arch/powerpc/util/header.c
index 9aaa6f5..9a70cc1 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -34,10 +34,11 @@
 }
 
 char *
-get_cpuid_str(void)
+get_cpuid_str(struct perf_pmu *pmu)
 {
char *bufp;
 
+   do { if (pmu) {} } while (0);
if (asprintf(, "%.8lx", mfspr(SPRN_PVR)) < 0)
bufp = NULL;
 
diff --git a/tools/perf/arch/x86/util/header.c 
b/tools/perf/arch/x86/util/header.c
index a74a48d..1e240ca 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -65,10 +65,12 @@
 }
 
 char *
-get_cpuid_str(void)
+get_cpuid_str(struct perf_pmu *pmu)
 {
char *buf = malloc(128);
 
+   do { if (pmu) {} } while (0);
+
if (__get_cpuid(buf, 128, "%s-%u-%X$") < 0) {
free(buf);
return NULL;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index d30109b..6bb14a4 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -8,6 +8,7 @@
 #include 
 #include "event.h"
 #include "env.h"
+#include "pmu.h"
 
 enum {
HEADER_RESERVED = 0,/* always cleared */
@@ -151,5 +152,5 @@ int perf_event__process_build_id(struct perf_tool *tool,
  */
 int get_cpuid(char *buffer, size_t sz);
 
-char *get_cpuid_str(void);
+char *get_cpuid_str(struct perf_pmu *pmu);
 #endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 12f84dd..c754805 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -504,8 +504,9 @@ static struct cpu_map *pmu_cpumask(const char *name)
  * Each architecture should provide a more precise id string that
  * can be use to match the architecture's "mapfile".
  */
-char * __weak get_cpuid_str(void)
+char * __weak get_cpuid_str(struct perf_pmu *pmu)
 {
+   do { if (pmu) {} } while (0);
return NULL;
 }
 
@@ -514,7 +515,8 @@ char * __weak get_cpuid_str(void)
  * to the current running CPU. Then, add all PMU events from that table
  * as aliases.
  */
-static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
+static void pmu_add_cpu_aliases(struct list_head *head, const char *name,
+   struct perf_pmu *pmu)
 {
int i;
struct pmu_events_map *map;
@@ -526,7 +528,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, 
const char *name)
if (cpuid)
cpuid = strdup(cpuid);
if (!cpuid)
-   cpuid = get_cpuid_str();
+   cpuid = get_cpuid_str(pmu);
if (!cpuid)
return;
 
@@ -601,12 +603,12 @@ static struct perf_pmu *pmu_lookup(const char *name)
if (pmu_aliases(name, ))
return NULL;
 
-   pmu_add_cpu_aliases(, name);
pmu = zalloc(sizeof(*pmu));
if (!pmu)
return NULL;
 
pmu->cpus = pmu_cpumask(name);
+   pmu_add_cpu_aliases(, name, pmu);
 
INIT_LIST_HEAD(>format);
INIT_LIST_HEAD(>aliases);
-- 
1.8.1.4



[PATCH v2 2/4] perf tools arm64: Add support for get_cpuid_str function.

2017-04-27 Thread Ganapatrao Kulkarni
function get_cpuid_str returns MIDR string of the first online
cpu from the range of cpus associated with the pmu core device.

Signed-off-by: Ganapatrao Kulkarni 
---
 tools/perf/arch/arm64/util/Build|  1 +
 tools/perf/arch/arm64/util/header.c | 41 +
 2 files changed, 42 insertions(+)
 create mode 100644 tools/perf/arch/arm64/util/header.c

diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index cef6fb3..b1ab72d 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,3 +1,4 @@
+libperf-y += header.o
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
 
diff --git a/tools/perf/arch/arm64/util/header.c 
b/tools/perf/arch/arm64/util/header.c
new file mode 100644
index 000..d8980a6b
--- /dev/null
+++ b/tools/perf/arch/arm64/util/header.c
@@ -0,0 +1,41 @@
+#include 
+#include 
+#include "header.h"
+
+#define LINUX_SYS_CPU_DIRECTORY "/sys/devices/system/cpu/"
+#define MIDR "/regs/identification/midr_el1"
+
+char *get_cpuid_str(struct perf_pmu *pmu)
+{
+   char *buf = malloc(128);
+   FILE *file;
+   char *ret = NULL;
+   int cpu;
+   char sys_file[256];
+   struct cpu_map *cpus;
+
+   if (!pmu->cpus)
+   return NULL;
+
+   /* read midr from list of cpus mapped to this pmu */
+   cpus = cpu_map__get(pmu->cpus);
+
+   for (cpu = 0; cpu < cpus->nr; cpu++) {
+   sprintf(sys_file, LINUX_SYS_CPU_DIRECTORY"cpu%d"MIDR,
+   cpus->map[cpu]);
+   file = fopen(sys_file, "r");
+   if (file) {
+   ret = fgets(buf, 128, file);
+   if (ret) {
+   buf[strlen(buf)-1] = '\0';
+   fclose(file);
+   cpu_map__put(cpus);
+   return buf;
+   }
+   fclose(file);
+   }
+   }
+   cpu_map__put(cpus);
+   free(buf);
+   return ret;
+}
-- 
1.8.1.4



[PATCH v2 1/4] perf utils: passing pmu as a parameter to function get_cpuid_str

2017-04-27 Thread Ganapatrao Kulkarni
cpuid string will not be same on all CPUs on heterogeneous
platforms like ARM's big.LITTLE, adding provision(using pmu->cpus)
to find cpuid string from associated CPUs of PMU CORE device.

Signed-off-by: Ganapatrao Kulkarni 
---
 tools/perf/arch/powerpc/util/header.c |  3 ++-
 tools/perf/arch/x86/util/header.c |  4 +++-
 tools/perf/util/header.h  |  3 ++-
 tools/perf/util/pmu.c | 10 ++
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/header.c 
b/tools/perf/arch/powerpc/util/header.c
index 9aaa6f5..9a70cc1 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -34,10 +34,11 @@
 }
 
 char *
-get_cpuid_str(void)
+get_cpuid_str(struct perf_pmu *pmu)
 {
char *bufp;
 
+   do { if (pmu) {} } while (0);
if (asprintf(, "%.8lx", mfspr(SPRN_PVR)) < 0)
bufp = NULL;
 
diff --git a/tools/perf/arch/x86/util/header.c 
b/tools/perf/arch/x86/util/header.c
index a74a48d..1e240ca 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -65,10 +65,12 @@
 }
 
 char *
-get_cpuid_str(void)
+get_cpuid_str(struct perf_pmu *pmu)
 {
char *buf = malloc(128);
 
+   do { if (pmu) {} } while (0);
+
if (__get_cpuid(buf, 128, "%s-%u-%X$") < 0) {
free(buf);
return NULL;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index d30109b..6bb14a4 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -8,6 +8,7 @@
 #include 
 #include "event.h"
 #include "env.h"
+#include "pmu.h"
 
 enum {
HEADER_RESERVED = 0,/* always cleared */
@@ -151,5 +152,5 @@ int perf_event__process_build_id(struct perf_tool *tool,
  */
 int get_cpuid(char *buffer, size_t sz);
 
-char *get_cpuid_str(void);
+char *get_cpuid_str(struct perf_pmu *pmu);
 #endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 12f84dd..c754805 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -504,8 +504,9 @@ static struct cpu_map *pmu_cpumask(const char *name)
  * Each architecture should provide a more precise id string that
  * can be use to match the architecture's "mapfile".
  */
-char * __weak get_cpuid_str(void)
+char * __weak get_cpuid_str(struct perf_pmu *pmu)
 {
+   do { if (pmu) {} } while (0);
return NULL;
 }
 
@@ -514,7 +515,8 @@ char * __weak get_cpuid_str(void)
  * to the current running CPU. Then, add all PMU events from that table
  * as aliases.
  */
-static void pmu_add_cpu_aliases(struct list_head *head, const char *name)
+static void pmu_add_cpu_aliases(struct list_head *head, const char *name,
+   struct perf_pmu *pmu)
 {
int i;
struct pmu_events_map *map;
@@ -526,7 +528,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, 
const char *name)
if (cpuid)
cpuid = strdup(cpuid);
if (!cpuid)
-   cpuid = get_cpuid_str();
+   cpuid = get_cpuid_str(pmu);
if (!cpuid)
return;
 
@@ -601,12 +603,12 @@ static struct perf_pmu *pmu_lookup(const char *name)
if (pmu_aliases(name, ))
return NULL;
 
-   pmu_add_cpu_aliases(, name);
pmu = zalloc(sizeof(*pmu));
if (!pmu)
return NULL;
 
pmu->cpus = pmu_cpumask(name);
+   pmu_add_cpu_aliases(, name, pmu);
 
INIT_LIST_HEAD(>format);
INIT_LIST_HEAD(>aliases);
-- 
1.8.1.4



[PATCH v2 2/4] perf tools arm64: Add support for get_cpuid_str function.

2017-04-27 Thread Ganapatrao Kulkarni
function get_cpuid_str returns MIDR string of the first online
cpu from the range of cpus associated with the pmu core device.

Signed-off-by: Ganapatrao Kulkarni 
---
 tools/perf/arch/arm64/util/Build|  1 +
 tools/perf/arch/arm64/util/header.c | 41 +
 2 files changed, 42 insertions(+)
 create mode 100644 tools/perf/arch/arm64/util/header.c

diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index cef6fb3..b1ab72d 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,3 +1,4 @@
+libperf-y += header.o
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
 
diff --git a/tools/perf/arch/arm64/util/header.c 
b/tools/perf/arch/arm64/util/header.c
new file mode 100644
index 000..d8980a6b
--- /dev/null
+++ b/tools/perf/arch/arm64/util/header.c
@@ -0,0 +1,41 @@
+#include 
+#include 
+#include "header.h"
+
+#define LINUX_SYS_CPU_DIRECTORY "/sys/devices/system/cpu/"
+#define MIDR "/regs/identification/midr_el1"
+
+char *get_cpuid_str(struct perf_pmu *pmu)
+{
+   char *buf = malloc(128);
+   FILE *file;
+   char *ret = NULL;
+   int cpu;
+   char sys_file[256];
+   struct cpu_map *cpus;
+
+   if (!pmu->cpus)
+   return NULL;
+
+   /* read midr from list of cpus mapped to this pmu */
+   cpus = cpu_map__get(pmu->cpus);
+
+   for (cpu = 0; cpu < cpus->nr; cpu++) {
+   sprintf(sys_file, LINUX_SYS_CPU_DIRECTORY"cpu%d"MIDR,
+   cpus->map[cpu]);
+   file = fopen(sys_file, "r");
+   if (file) {
+   ret = fgets(buf, 128, file);
+   if (ret) {
+   buf[strlen(buf)-1] = '\0';
+   fclose(file);
+   cpu_map__put(cpus);
+   return buf;
+   }
+   fclose(file);
+   }
+   }
+   cpu_map__put(cpus);
+   free(buf);
+   return ret;
+}
-- 
1.8.1.4



[PATCH v2 0/4] Add support for ThunderX2 pmu events using json files

2017-04-27 Thread Ganapatrao Kulkarni
Extending json/jevent framework for parsing arm64 event files.
Adding jevents for ThunderX2 implementation defined PMU events.

v2:
   - Updated as per Mark Rutland's suggestions.
   - added provision for get_cpuid_str to get cpu id string
 from associated cpus of pmu core device.

v1: Initial patchset.

Ganapatrao Kulkarni (4):
  perf utils: passing pmu as a parameter to function get_cpuid_str
  perf tools arm64: Add support for get_cpuid_str function.
  perf utils: Add helper function is_pmu_core to detect PMU CORE devices
  perf vendor events arm64: Add implementation defined pmu core events
of ThunderX2

 tools/perf/arch/arm64/util/Build   |  1 +
 tools/perf/arch/arm64/util/header.c| 41 ++
 tools/perf/arch/powerpc/util/header.c  |  3 +-
 tools/perf/arch/x86/util/header.c  |  4 +-
 tools/perf/pmu-events/arch/arm64/mapfile.csv   | 14 +
 .../arm64/thunderx2/implementation-defined.json| 62 ++
 tools/perf/util/header.h   |  3 +-
 tools/perf/util/pmu.c  | 54 ---
 8 files changed, 171 insertions(+), 11 deletions(-)
 create mode 100644 tools/perf/arch/arm64/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/arm64/mapfile.csv
 create mode 100644 
tools/perf/pmu-events/arch/arm64/thunderx2/implementation-defined.json

-- 
1.8.1.4



[PATCH v2 0/4] Add support for ThunderX2 pmu events using json files

2017-04-27 Thread Ganapatrao Kulkarni
Extending json/jevent framework for parsing arm64 event files.
Adding jevents for ThunderX2 implementation defined PMU events.

v2:
   - Updated as per Mark Rutland's suggestions.
   - added provision for get_cpuid_str to get cpu id string
 from associated cpus of pmu core device.

v1: Initial patchset.

Ganapatrao Kulkarni (4):
  perf utils: passing pmu as a parameter to function get_cpuid_str
  perf tools arm64: Add support for get_cpuid_str function.
  perf utils: Add helper function is_pmu_core to detect PMU CORE devices
  perf vendor events arm64: Add implementation defined pmu core events
of ThunderX2

 tools/perf/arch/arm64/util/Build   |  1 +
 tools/perf/arch/arm64/util/header.c| 41 ++
 tools/perf/arch/powerpc/util/header.c  |  3 +-
 tools/perf/arch/x86/util/header.c  |  4 +-
 tools/perf/pmu-events/arch/arm64/mapfile.csv   | 14 +
 .../arm64/thunderx2/implementation-defined.json| 62 ++
 tools/perf/util/header.h   |  3 +-
 tools/perf/util/pmu.c  | 54 ---
 8 files changed, 171 insertions(+), 11 deletions(-)
 create mode 100644 tools/perf/arch/arm64/util/header.c
 create mode 100644 tools/perf/pmu-events/arch/arm64/mapfile.csv
 create mode 100644 
tools/perf/pmu-events/arch/arm64/thunderx2/implementation-defined.json

-- 
1.8.1.4



[PATCH v2] [media] vb2: Fix an off by one error in 'vb2_plane_vaddr'

2017-04-27 Thread Christophe JAILLET
We should ensure that 'plane_no' is '< vb->num_planes' as done in
'vb2_plane_cookie' just a few lines below.

Cc: sta...@vger.kernel.org
Fixes: e23ccc0ad925 ("[media] v4l: add videobuf2 Video for Linux 2 driver 
framework")

Signed-off-by: Christophe JAILLET 
---
v2: add CC and Fixes tags
---
 drivers/media/v4l2-core/videobuf2-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/v4l2-core/videobuf2-core.c 
b/drivers/media/v4l2-core/videobuf2-core.c
index 94afbbf92807..c0175ea7e7ad 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -868,7 +868,7 @@ EXPORT_SYMBOL_GPL(vb2_core_create_bufs);
 
 void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no)
 {
-   if (plane_no > vb->num_planes || !vb->planes[plane_no].mem_priv)
+   if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv)
return NULL;
 
return call_ptr_memop(vb, vaddr, vb->planes[plane_no].mem_priv);
-- 
2.11.0



[PATCH v2] [media] vb2: Fix an off by one error in 'vb2_plane_vaddr'

2017-04-27 Thread Christophe JAILLET
We should ensure that 'plane_no' is '< vb->num_planes' as done in
'vb2_plane_cookie' just a few lines below.

Cc: sta...@vger.kernel.org
Fixes: e23ccc0ad925 ("[media] v4l: add videobuf2 Video for Linux 2 driver 
framework")

Signed-off-by: Christophe JAILLET 
---
v2: add CC and Fixes tags
---
 drivers/media/v4l2-core/videobuf2-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/v4l2-core/videobuf2-core.c 
b/drivers/media/v4l2-core/videobuf2-core.c
index 94afbbf92807..c0175ea7e7ad 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -868,7 +868,7 @@ EXPORT_SYMBOL_GPL(vb2_core_create_bufs);
 
 void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no)
 {
-   if (plane_no > vb->num_planes || !vb->planes[plane_no].mem_priv)
+   if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv)
return NULL;
 
return call_ptr_memop(vb, vaddr, vb->planes[plane_no].mem_priv);
-- 
2.11.0



Re: [PATCH] kexec: allocate buffer in top-down, if specified, correctly

2017-04-27 Thread Dave Young
Hi AKASHI
On 04/26/17 at 05:22pm, AKASHI Takahiro wrote:
> The current kexec_locate_mem_hole(kbuf.top_down == 1) stops searching at
> the first memory region that has enough space for requested size even if
> some of higher regions may also have.
> This behavior is not consistent with locate_hole(hole_end == -1) function
> of kexec-tools.

Have you seen actual bug happened or just observing this during code
review?

Till now seems we do not see any reports about this.

> 
> This patch fixes the bug, going though all the memory regions anyway.
> 
> Signed-off-by: AKASHI Takahiro 
> ---
>  kernel/kexec_file.c | 19 ++-
>  1 file changed, 14 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> index b118735fea9d..2f131c0d9017 100644
> --- a/kernel/kexec_file.c
> +++ b/kernel/kexec_file.c
> @@ -373,8 +373,8 @@ static int locate_mem_hole_top_down(unsigned long start, 
> unsigned long end,
>   /* If we are here, we found a suitable memory range */
>   kbuf->mem = temp_start;
>  
> - /* Success, stop navigating through remaining System RAM ranges */
> - return 1;
> + /* always return zero, going through all the System RAM ranges */
> + return 0;
>  }
>  
>  static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
> @@ -439,18 +439,27 @@ static int locate_mem_hole_callback(u64 start, u64 end, 
> void *arg)
>   *
>   * Return: The memory walk will stop when func returns a non-zero value
>   * and that value will be returned. If all free regions are visited without
> - * func returning non-zero, then zero will be returned.
> + * func returning non-zero, then kbuf->mem will be additionally checked
> + * for top-down search.
> + * After all, zero will be returned if none of regions fits.
>   */
>  int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
>  int (*func)(u64, u64, void *))
>  {
> + int ret;
> +
> + kbuf->mem = 0;
>   if (kbuf->image->type == KEXEC_TYPE_CRASH)
> - return walk_iomem_res_desc(crashk_res.desc,
> + ret = walk_iomem_res_desc(crashk_res.desc,
>  IORESOURCE_SYSTEM_RAM | 
> IORESOURCE_BUSY,
>  crashk_res.start, crashk_res.end,
>  kbuf, func);
>   else
> - return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
> + ret = walk_system_ram_res(0, ULONG_MAX, kbuf, func);
> +
> + if (!ret && kbuf->mem)
> + ret = 1; /* found for top-down search */
> + return ret;
>  }
>  
>  /**
> -- 
> 2.11.1
> 


Re: [PATCH] kexec: allocate buffer in top-down, if specified, correctly

2017-04-27 Thread Dave Young
Hi AKASHI
On 04/26/17 at 05:22pm, AKASHI Takahiro wrote:
> The current kexec_locate_mem_hole(kbuf.top_down == 1) stops searching at
> the first memory region that has enough space for requested size even if
> some of higher regions may also have.
> This behavior is not consistent with locate_hole(hole_end == -1) function
> of kexec-tools.

Have you seen actual bug happened or just observing this during code
review?

Till now seems we do not see any reports about this.

> 
> This patch fixes the bug, going though all the memory regions anyway.
> 
> Signed-off-by: AKASHI Takahiro 
> ---
>  kernel/kexec_file.c | 19 ++-
>  1 file changed, 14 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> index b118735fea9d..2f131c0d9017 100644
> --- a/kernel/kexec_file.c
> +++ b/kernel/kexec_file.c
> @@ -373,8 +373,8 @@ static int locate_mem_hole_top_down(unsigned long start, 
> unsigned long end,
>   /* If we are here, we found a suitable memory range */
>   kbuf->mem = temp_start;
>  
> - /* Success, stop navigating through remaining System RAM ranges */
> - return 1;
> + /* always return zero, going through all the System RAM ranges */
> + return 0;
>  }
>  
>  static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
> @@ -439,18 +439,27 @@ static int locate_mem_hole_callback(u64 start, u64 end, 
> void *arg)
>   *
>   * Return: The memory walk will stop when func returns a non-zero value
>   * and that value will be returned. If all free regions are visited without
> - * func returning non-zero, then zero will be returned.
> + * func returning non-zero, then kbuf->mem will be additionally checked
> + * for top-down search.
> + * After all, zero will be returned if none of regions fits.
>   */
>  int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
>  int (*func)(u64, u64, void *))
>  {
> + int ret;
> +
> + kbuf->mem = 0;
>   if (kbuf->image->type == KEXEC_TYPE_CRASH)
> - return walk_iomem_res_desc(crashk_res.desc,
> + ret = walk_iomem_res_desc(crashk_res.desc,
>  IORESOURCE_SYSTEM_RAM | 
> IORESOURCE_BUSY,
>  crashk_res.start, crashk_res.end,
>  kbuf, func);
>   else
> - return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
> + ret = walk_system_ram_res(0, ULONG_MAX, kbuf, func);
> +
> + if (!ret && kbuf->mem)
> + ret = 1; /* found for top-down search */
> + return ret;
>  }
>  
>  /**
> -- 
> 2.11.1
> 


nvdimm/pmem device lifetime

2017-04-27 Thread Mika Penttilä
Hi,

Just wondering the pmem struct device vs gendisk lifetimes.. from 
pmem_attach_disk():

device_add_disk(dev, disk);
devm_add_action_or_reset(dev, pmem_release_disk, disk);


where:
static void pmem_release_disk(void *disk)
{
del_gendisk(disk);
put_disk(disk);
}


but device_add_disk() makes disk pin dev (as a parent), and it's unpinned by 
del_gendisk()
which is called when dev is released, but it's not because of this circular 
dependency?

--Mika


nvdimm/pmem device lifetime

2017-04-27 Thread Mika Penttilä
Hi,

Just wondering the pmem struct device vs gendisk lifetimes.. from 
pmem_attach_disk():

device_add_disk(dev, disk);
devm_add_action_or_reset(dev, pmem_release_disk, disk);


where:
static void pmem_release_disk(void *disk)
{
del_gendisk(disk);
put_disk(disk);
}


but device_add_disk() makes disk pin dev (as a parent), and it's unpinned by 
del_gendisk()
which is called when dev is released, but it's not because of this circular 
dependency?

--Mika


linux-next: build failure after merge of the kvm-ppc tree

2017-04-27 Thread Stephen Rothwell
Hi Paul,

After merging the kvm-ppc tree, today's linux-next build (powerpc
ppc64_defconfig) failed like this:

arch/powerpc/kvm/book3s_xive.c: In function 'xive_debugfs_init':
arch/powerpc/kvm/book3s_xive.c:1852:52: error: 'powerpc_debugfs_root' 
undeclared (first use in this function)
  xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
^

Caused by commit

  5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt 
controller")

interacting with commit

  7644d5819cf8 ("powerpc: Create asm/debugfs.h and move powerpc_debugfs_root 
there")

from the powerpc tree.

I have added the following merge fix patch.

From: Stephen Rothwell 
Date: Fri, 28 Apr 2017 14:28:17 +1000
Subject: [PATCH] powerpc: merge fix for powerpc_debugfs_root move.

Signed-off-by: Stephen Rothwell 
---
 arch/powerpc/kvm/book3s_xive.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 7807ee17af4b..ffe1da95033a 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
-- 
2.11.0

-- 
Cheers,
Stephen Rothwell


linux-next: build failure after merge of the kvm-ppc tree

2017-04-27 Thread Stephen Rothwell
Hi Paul,

After merging the kvm-ppc tree, today's linux-next build (powerpc
ppc64_defconfig) failed like this:

arch/powerpc/kvm/book3s_xive.c: In function 'xive_debugfs_init':
arch/powerpc/kvm/book3s_xive.c:1852:52: error: 'powerpc_debugfs_root' 
undeclared (first use in this function)
  xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
^

Caused by commit

  5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt 
controller")

interacting with commit

  7644d5819cf8 ("powerpc: Create asm/debugfs.h and move powerpc_debugfs_root 
there")

from the powerpc tree.

I have added the following merge fix patch.

From: Stephen Rothwell 
Date: Fri, 28 Apr 2017 14:28:17 +1000
Subject: [PATCH] powerpc: merge fix for powerpc_debugfs_root move.

Signed-off-by: Stephen Rothwell 
---
 arch/powerpc/kvm/book3s_xive.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 7807ee17af4b..ffe1da95033a 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
-- 
2.11.0

-- 
Cheers,
Stephen Rothwell


[PATCH] of: unittest, fix possible use of unitialized variable

2017-04-27 Thread frowand . list
From: Frank Rowand 

Fix problem reported in the linux-next build.  last_sibling may be
unitialized in of_unittest() if the device tree is empty.

Signed-off-by: Frank Rowand 
---
 drivers/of/unittest.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 12597ff8cfb0..8f14a43f48e5 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -2119,7 +2119,7 @@ static int __init overlay_data_add(int onum)
  */
 static __init void of_unittest_overlay_high_level(void)
 {
-   struct device_node *last_sibling;
+   struct device_node *last_sibling = NULL;
struct device_node *np;
struct device_node *of_symbols;
struct device_node *overlay_base_symbols;
-- 
Frank Rowand 



[PATCH] of: unittest, fix possible use of unitialized variable

2017-04-27 Thread frowand . list
From: Frank Rowand 

Fix problem reported in the linux-next build.  last_sibling may be
unitialized in of_unittest() if the device tree is empty.

Signed-off-by: Frank Rowand 
---
 drivers/of/unittest.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 12597ff8cfb0..8f14a43f48e5 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -2119,7 +2119,7 @@ static int __init overlay_data_add(int onum)
  */
 static __init void of_unittest_overlay_high_level(void)
 {
-   struct device_node *last_sibling;
+   struct device_node *last_sibling = NULL;
struct device_node *np;
struct device_node *of_symbols;
struct device_node *overlay_base_symbols;
-- 
Frank Rowand 



Re: [PATCH] fs: Handle register_shrinker failure

2017-04-27 Thread Al Viro
On Fri, Mar 24, 2017 at 09:55:40AM +0200, Nikolay Borisov wrote:
> register_shrinker allocates dynamic memory and thus is susceptible to failures
> under low-memory situation. Currently,get_userns ignores the return value of
> register_shrinker, potentially exposing not fully initialised object. This
> can lead to a NULL-ptr deref everytime shrinker->nr_deferred is referenced.
> 
> Fix this by failing to register the filesystem in case there is not enough
> memory to fully construct the shrinker object.
> 
> Signed-off-by: Nikolay Borisov 
> ---
>  fs/super.c | 14 +-
>  1 file changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/super.c b/fs/super.c
> index b8b6a086c03b..964b18447c92 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -518,7 +518,19 @@ struct super_block *sget_userns(struct file_system_type 
> *type,
>   hlist_add_head(>s_instances, >fs_supers);
>   spin_unlock(_lock);
>   get_filesystem(type);
> - register_shrinker(>s_shrink);
> + err = register_shrinker(>s_shrink);
> + if (err) {
> + spin_lock(_lock);
> + list_del(>s_list);
> + hlist_del(>s_instances);
> + spin_unlock(_lock);
> +
> + up_write(>s_umount);
> + destroy_super(s);
> + put_filesystem(type);
> + return ERR_PTR(err);

I really don't like that.  Your "remove it from all lists and pray that
nobody has picked a reference of any kind" at the very least needs a careful
written proof of correctness.  AFAICS, somebody might've found it on the
list and attempted to grab ->s_umount (grab_super() from another thread
calling sget()).  Then they'd block until your up_write() in there and
bugger the system up trying to play with ->s_umount in the object you've
freed.

NAK.  Yes, the bug is real, but this is not a solution.


Re: [PATCH] fs: Handle register_shrinker failure

2017-04-27 Thread Al Viro
On Fri, Mar 24, 2017 at 09:55:40AM +0200, Nikolay Borisov wrote:
> register_shrinker allocates dynamic memory and thus is susceptible to failures
> under low-memory situation. Currently,get_userns ignores the return value of
> register_shrinker, potentially exposing not fully initialised object. This
> can lead to a NULL-ptr deref everytime shrinker->nr_deferred is referenced.
> 
> Fix this by failing to register the filesystem in case there is not enough
> memory to fully construct the shrinker object.
> 
> Signed-off-by: Nikolay Borisov 
> ---
>  fs/super.c | 14 +-
>  1 file changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/super.c b/fs/super.c
> index b8b6a086c03b..964b18447c92 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -518,7 +518,19 @@ struct super_block *sget_userns(struct file_system_type 
> *type,
>   hlist_add_head(>s_instances, >fs_supers);
>   spin_unlock(_lock);
>   get_filesystem(type);
> - register_shrinker(>s_shrink);
> + err = register_shrinker(>s_shrink);
> + if (err) {
> + spin_lock(_lock);
> + list_del(>s_list);
> + hlist_del(>s_instances);
> + spin_unlock(_lock);
> +
> + up_write(>s_umount);
> + destroy_super(s);
> + put_filesystem(type);
> + return ERR_PTR(err);

I really don't like that.  Your "remove it from all lists and pray that
nobody has picked a reference of any kind" at the very least needs a careful
written proof of correctness.  AFAICS, somebody might've found it on the
list and attempted to grab ->s_umount (grab_super() from another thread
calling sget()).  Then they'd block until your up_write() in there and
bugger the system up trying to play with ->s_umount in the object you've
freed.

NAK.  Yes, the bug is real, but this is not a solution.


Re: [REGRESSION next-20170426] Commit 09515ef5ddad ("of/acpi: Configure dma operations at probe time for platform/amba/pci bus devices") causes oops in mvneta

2017-04-27 Thread Sricharan R
Hi Ralph,

On 4/27/2017 7:05 PM, Sricharan R wrote:
> Hi,
> 
> On 4/26/2017 9:45 PM, Ralph Sennhauser wrote:
>> Hi Sricharan R,
>>
>> Commit 09515ef5ddad ("of/acpi: Configure dma operations at probe time
>> for platform/amba/pci bus devices") causes a kernel panic as in the log
>> below on an armada-385. Reverting the commit fixes the issue.
>>
>> Regards
>> Ralph
> 
> Somehow not getting a obvious clue on whats going wrong with the logs
> below. From the log and looking in to dts, the drivers seems to the one for
> "marvell,armada-370-neta". Issue looks the data from the dma has gone bad
> and subsequently referring the wrong data has resulted in the crash.
> Looks like the dma_masks is the one going wrong.
> Can i get some logs from mvneta_probe, about dev->dma_mask,
> dev->coherent_dma_mask and dev->dma_ops with and without the patch
> to see whats the difference ?
> 

Attached the patch with debug prints.

Regards,
 Sricharan
From fe77d3968d6bc35b46e5f30d5c67c7603aa10597 Mon Sep 17 00:00:00 2001
From: Sricharan R 
Date: Fri, 28 Apr 2017 09:43:26 +0530
Subject: [PATCH] Debug prints

Signed-off-by: Sricharan R 
---
 drivers/net/ethernet/marvell/mvneta.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/marvell/mvneta.c 
b/drivers/net/ethernet/marvell/mvneta.c
index d297011..250f0b2 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4146,6 +4146,12 @@ static int mvneta_probe(struct platform_device *pdev)
int phy_mode;
int err;
int cpu;
+   struct device *ddev = >dev;
+
+
+   dev_err(ddev, "dev->dma_mask 0x%llx\n", *(ddev->dma_mask));
+   dev_err(ddev, "dev->coherent_dma_mask 0x%llx\n", 
ddev->coherent_dma_mask);
+   dev_err(ddev, "dev->dma_ops 0x%llx\n", ddev->dma_ops);
 
dev = alloc_etherdev_mqs(sizeof(struct mvneta_port), txq_number, 
rxq_number);
if (!dev)
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation



Re: [REGRESSION next-20170426] Commit 09515ef5ddad ("of/acpi: Configure dma operations at probe time for platform/amba/pci bus devices") causes oops in mvneta

2017-04-27 Thread Sricharan R
Hi Ralph,

On 4/27/2017 7:05 PM, Sricharan R wrote:
> Hi,
> 
> On 4/26/2017 9:45 PM, Ralph Sennhauser wrote:
>> Hi Sricharan R,
>>
>> Commit 09515ef5ddad ("of/acpi: Configure dma operations at probe time
>> for platform/amba/pci bus devices") causes a kernel panic as in the log
>> below on an armada-385. Reverting the commit fixes the issue.
>>
>> Regards
>> Ralph
> 
> Somehow not getting a obvious clue on whats going wrong with the logs
> below. From the log and looking in to dts, the drivers seems to the one for
> "marvell,armada-370-neta". Issue looks the data from the dma has gone bad
> and subsequently referring the wrong data has resulted in the crash.
> Looks like the dma_masks is the one going wrong.
> Can i get some logs from mvneta_probe, about dev->dma_mask,
> dev->coherent_dma_mask and dev->dma_ops with and without the patch
> to see whats the difference ?
> 

Attached the patch with debug prints.

Regards,
 Sricharan
From fe77d3968d6bc35b46e5f30d5c67c7603aa10597 Mon Sep 17 00:00:00 2001
From: Sricharan R 
Date: Fri, 28 Apr 2017 09:43:26 +0530
Subject: [PATCH] Debug prints

Signed-off-by: Sricharan R 
---
 drivers/net/ethernet/marvell/mvneta.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/marvell/mvneta.c 
b/drivers/net/ethernet/marvell/mvneta.c
index d297011..250f0b2 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4146,6 +4146,12 @@ static int mvneta_probe(struct platform_device *pdev)
int phy_mode;
int err;
int cpu;
+   struct device *ddev = >dev;
+
+
+   dev_err(ddev, "dev->dma_mask 0x%llx\n", *(ddev->dma_mask));
+   dev_err(ddev, "dev->coherent_dma_mask 0x%llx\n", 
ddev->coherent_dma_mask);
+   dev_err(ddev, "dev->dma_ops 0x%llx\n", ddev->dma_ops);
 
dev = alloc_etherdev_mqs(sizeof(struct mvneta_port), txq_number, 
rxq_number);
if (!dev)
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of 
Code Aurora Forum, hosted by The Linux Foundation



Re: [PATCH V4 00/17] thermal: cpu_cooling: improve interaction with cpufreq core

2017-04-27 Thread Viresh Kumar
On 27-04-17, 09:26, Eduardo Valentin wrote:
> On Wed, Apr 26, 2017 at 04:17:52PM +0530, Viresh Kumar wrote:
> > On 26-04-17, 11:41, Lukasz Luba wrote:
> > > Hi Viresh,
> > > 
> > > I went through the v4 code and it looks good to me.
> > > Feel free to add for the v4 series
> > > Reviewed-by: Lukasz Luba 
> > 
> > Thanks a lot for testing and reviewing the series.
> > 
> > @Eduardo: Will you be able to pick it for 4.12 ?
> 
> I feel like this is late for 4.12 given that we are already -rc8. 
> Besides, it has gone through a couple of cycles of broken interactions
> with IPA (now fixed).
> 
> So, I would prefer we give this series a bit more of testing time on
> linux-next. I can pull it and put into linux-next after the coming
> merge window is closed.

Ok, sure. No hurry.

-- 
viresh


Re: [PATCH V4 00/17] thermal: cpu_cooling: improve interaction with cpufreq core

2017-04-27 Thread Viresh Kumar
On 27-04-17, 09:26, Eduardo Valentin wrote:
> On Wed, Apr 26, 2017 at 04:17:52PM +0530, Viresh Kumar wrote:
> > On 26-04-17, 11:41, Lukasz Luba wrote:
> > > Hi Viresh,
> > > 
> > > I went through the v4 code and it looks good to me.
> > > Feel free to add for the v4 series
> > > Reviewed-by: Lukasz Luba 
> > 
> > Thanks a lot for testing and reviewing the series.
> > 
> > @Eduardo: Will you be able to pick it for 4.12 ?
> 
> I feel like this is late for 4.12 given that we are already -rc8. 
> Besides, it has gone through a couple of cycles of broken interactions
> with IPA (now fixed).
> 
> So, I would prefer we give this series a bit more of testing time on
> linux-next. I can pull it and put into linux-next after the coming
> merge window is closed.

Ok, sure. No hurry.

-- 
viresh


[PATCH] MAINTAINERS: Remove myself as reviewer for Exynos

2017-04-27 Thread Javier Martinez Canillas
I left Samsung and lost access to most Exynos hardware and documentation.
Also, I likely won't be able to keep an eye on the platform anymore in the
short term so remove myself as a reviewer for Exynos.

Signed-off-by: Javier Martinez Canillas 

---

 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 45be5ef6056c..e35370a0c04c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1722,7 +1722,6 @@ N:rockchip
 ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
 M: Kukjin Kim 
 M: Krzysztof Kozlowski 
-R: Javier Martinez Canillas 
 L: linux-arm-ker...@lists.infradead.org (moderated for non-subscribers)
 L: linux-samsung-...@vger.kernel.org (moderated for non-subscribers)
 Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/
-- 
2.9.3



[PATCH] MAINTAINERS: Remove myself as reviewer for Exynos

2017-04-27 Thread Javier Martinez Canillas
I left Samsung and lost access to most Exynos hardware and documentation.
Also, I likely won't be able to keep an eye on the platform anymore in the
short term so remove myself as a reviewer for Exynos.

Signed-off-by: Javier Martinez Canillas 

---

 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 45be5ef6056c..e35370a0c04c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1722,7 +1722,6 @@ N:rockchip
 ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
 M: Kukjin Kim 
 M: Krzysztof Kozlowski 
-R: Javier Martinez Canillas 
 L: linux-arm-ker...@lists.infradead.org (moderated for non-subscribers)
 L: linux-samsung-...@vger.kernel.org (moderated for non-subscribers)
 Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/
-- 
2.9.3



Re: [PATCH] iio: st_pressure: st_accel: Initialise sensor platform data properly

2017-04-27 Thread Shrirang Bagul
On Wed, 2017-04-26 at 06:37 +0100, Jonathan Cameron wrote:
> On 19/04/17 15:05, Shrirang Bagul wrote:
> > This patch fixes the sensor platform data initialisation for st_pressure
> > and st_accel device drivers. Without this patch, the driver fails to
> > register the sensors when the user removes and re-loads the driver.
> > 
> > 1. Unload the kernel modules for st_pressure
> > $ sudo rmmod st_pressure_i2c
> > $ sudo rmmod st_pressure
> > 
> > 2. Re-load the driver
> > $ sudo insmod st_pressure
> > $ sudo insmod st_pressure_i2c
> > --- OR ---
> > $ sudo modprobe st_pressure_i2c
> > 
> > dmesg errors:
> > 
> > [ 160.935707] iio iio:device2: DRDY on pdata not valid.
> > [ 160.941505] st-press-i2c: probe of i2c-SMO9210:00 failed with error -22
> > 
> > The driver fails to register the pressure sensor device. Devices
> > supported by st_accel driver also suffer from the same bug.
> > 
> > Signed-off-by: Shrirang Bagul 
> 
> Unless I am missing something, the severity of this bug is fairly minor
> so whilst I'm pleased to have it fixed, I'm not intending to mark this
> for stable.
> 
> If anyone has a cunning way of exploiting it then let me know!
Stress testing st_pressure_i2c, st_pressure module load/unload does cause the 
kernel
to panic.

Apr 27 18:11:21 caracalla kernel: [ 8417.756968] BUG: unable to handle kernel 
paging
request at c0377c48
Apr 27 18:11:21 caracalla kernel: [ 8417.764869] IP: []
st_sensors_init_sensor+0x26/0x1f0 [st_sensors]
Apr 27 18:11:21 caracalla kernel: [ 8417.773550] PGD 2e0d067 PUD 2e0f067 PMD 
3792b067
PTE 0
Apr 27 18:11:21 caracalla kernel: [ 8417.779382] Oops:  [#1] SMP
Apr 27 18:11:21 caracalla kernel: [ 8417.783045] Modules linked in:
st_pressure_i2c(+) st_pressure hts221_i2c msr cmac algif_hash algif_skcipher 
af_alg
hci_vhci rfcomm bnep arc4 iTCO_wdt iTCO_vendor_support snd_soc_sst_bytcr_rt5660
dell_wmi sparse_keymap ven_rsi_sdio ven_rsi_91x intel_soc_dts_iosf bluetooth
intel_powerclamp coretemp kvm_intel kvm dcdbas mac80211 irqbypass 
punit_atom_debug
crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel aes_x86_64 
cfg80211 lrw
gf128mul glue_helper ablk_helper input_leds cryptd psmouse serio_raw cdc_mbim 
cdc_wdm
cdc_ncm uas r8169 usbnet cdc_acm lpc_ich mii mei_txe mei snd_intel_sst_acpi 
shpchp
snd_intel_sst_core snd_soc_rt5660 snd_soc_sst_mfld_platform snd_soc_rl6231
snd_soc_core 8250_fintek fjes st_accel_i2c st_accel st_sensors_i2c hts221 
i2c_hid
st_sensors snd_compress ad5593r ac97_bus hid industrialio_triggered_buffer 
kfifo_buf
ad5592r_base industrialio snd_pcm_dmaengine snd_pcm tpm_crb snd_timer dw_dmac 
snd
dw_dmac_core soundcore rfkill_gpio mac_hid spi_pxa2xx_platform
i2c_designware_platform i2c_designware_core pwm_lpss_platform snd_soc_sst_acpi
8250_dw pwm_lpss iptable_filter ip_tables ip6table_filter ip6_tables x_tables 
autofs4
sdhci_pci virtio_scsi ahci libahci mmc_block i915 i2c_algo_bit drm_kms_helper
syscopyarea sysfillrect sysimgblt fb_sys_fops usb_storage drm wmi video 
sdhci_acpi
sdhci [last unloaded: st_pressure]
Apr 27 18:11:21 caracalla kernel: [ 8417.917926] CPU: 0 PID: 15523 Comm: 
modprobe Not
tainted 4.4.0-77-generic #98-Ubuntu
Apr 27 18:11:21 caracalla kernel: [ 8417.926686] Hardware name: Dell Inc. Edge
Gateway 3003/ , BIOS 00.00.28 03/23/2017
Apr 27 18:11:21 caracalla kernel: [ 8417.935739] task: 88007521a640 ti:
88007639c000 task.ti: 88007639c000
Apr 27 18:11:21 caracalla kernel: [ 8417.944202] RIP: 0010:[]
[] st_sensors_init_sensor+0x26/0x1f0 [st_sensors]
Apr 27 18:11:21 caracalla kernel: [ 8417.955627] RSP: 0018:88007639fac8 
EFLAGS:
00010206
Apr 27 18:11:21 caracalla kernel: [ 8417.961632] RAX: c0328640 RBX:
880076832800 RCX: 0003
Apr 27 18:11:21 caracalla kernel: [ 8417.969702] RDX: 8800371c9820 RSI:
c0377c48 RDI: 880076832800
Apr 27 18:11:21 caracalla kernel: [ 8417.92] RBP: 88007639fad0 R08:
88007639c000 R09: 
Apr 27 18:11:21 caracalla kernel: [ 8417.985841] R10: 00b6 R11:
 R12: 
Apr 27 18:11:21 caracalla kernel: [ 8417.993911] R13: 0003 R14:
880076832cc0 R15: c03730a0
Apr 27 18:11:21 caracalla kernel: [ 8418.001982] FS: 7f71faa83700()
GS:880070a0() knlGS:
Apr 27 18:11:21 caracalla kernel: [ 8418.011133] CS: 0010 DS:  ES:  CR0:
80050033
Apr 27 18:11:21 caracalla kernel: [ 8418.017629] CR2: c0377c48 CR3:
74d7d000 CR4: 001006f0
Apr 27 18:11:21 caracalla kernel: [ 8418.025698] Stack:
Apr 27 18:11:21 caracalla kernel: [ 8418.027967] 880076832800 
88007639faf8
c03270ea 880076832800
Apr 27 18:11:21 caracalla kernel: [ 8418.036363] 8800371c9800 
8800371c9820
88007639fb30 c037207d
Apr 27 18:11:21 caracalla kernel: [ 8418.044757] c03730a0 
8800371c9820
8800371c9800 c0372020
Apr 27 18:11:21 caracalla kernel: 

Re: [PATCH] iio: st_pressure: st_accel: Initialise sensor platform data properly

2017-04-27 Thread Shrirang Bagul
On Wed, 2017-04-26 at 06:37 +0100, Jonathan Cameron wrote:
> On 19/04/17 15:05, Shrirang Bagul wrote:
> > This patch fixes the sensor platform data initialisation for st_pressure
> > and st_accel device drivers. Without this patch, the driver fails to
> > register the sensors when the user removes and re-loads the driver.
> > 
> > 1. Unload the kernel modules for st_pressure
> > $ sudo rmmod st_pressure_i2c
> > $ sudo rmmod st_pressure
> > 
> > 2. Re-load the driver
> > $ sudo insmod st_pressure
> > $ sudo insmod st_pressure_i2c
> > --- OR ---
> > $ sudo modprobe st_pressure_i2c
> > 
> > dmesg errors:
> > 
> > [ 160.935707] iio iio:device2: DRDY on pdata not valid.
> > [ 160.941505] st-press-i2c: probe of i2c-SMO9210:00 failed with error -22
> > 
> > The driver fails to register the pressure sensor device. Devices
> > supported by st_accel driver also suffer from the same bug.
> > 
> > Signed-off-by: Shrirang Bagul 
> 
> Unless I am missing something, the severity of this bug is fairly minor
> so whilst I'm pleased to have it fixed, I'm not intending to mark this
> for stable.
> 
> If anyone has a cunning way of exploiting it then let me know!
Stress testing st_pressure_i2c, st_pressure module load/unload does cause the 
kernel
to panic.

Apr 27 18:11:21 caracalla kernel: [ 8417.756968] BUG: unable to handle kernel 
paging
request at c0377c48
Apr 27 18:11:21 caracalla kernel: [ 8417.764869] IP: []
st_sensors_init_sensor+0x26/0x1f0 [st_sensors]
Apr 27 18:11:21 caracalla kernel: [ 8417.773550] PGD 2e0d067 PUD 2e0f067 PMD 
3792b067
PTE 0
Apr 27 18:11:21 caracalla kernel: [ 8417.779382] Oops:  [#1] SMP
Apr 27 18:11:21 caracalla kernel: [ 8417.783045] Modules linked in:
st_pressure_i2c(+) st_pressure hts221_i2c msr cmac algif_hash algif_skcipher 
af_alg
hci_vhci rfcomm bnep arc4 iTCO_wdt iTCO_vendor_support snd_soc_sst_bytcr_rt5660
dell_wmi sparse_keymap ven_rsi_sdio ven_rsi_91x intel_soc_dts_iosf bluetooth
intel_powerclamp coretemp kvm_intel kvm dcdbas mac80211 irqbypass 
punit_atom_debug
crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel aes_x86_64 
cfg80211 lrw
gf128mul glue_helper ablk_helper input_leds cryptd psmouse serio_raw cdc_mbim 
cdc_wdm
cdc_ncm uas r8169 usbnet cdc_acm lpc_ich mii mei_txe mei snd_intel_sst_acpi 
shpchp
snd_intel_sst_core snd_soc_rt5660 snd_soc_sst_mfld_platform snd_soc_rl6231
snd_soc_core 8250_fintek fjes st_accel_i2c st_accel st_sensors_i2c hts221 
i2c_hid
st_sensors snd_compress ad5593r ac97_bus hid industrialio_triggered_buffer 
kfifo_buf
ad5592r_base industrialio snd_pcm_dmaengine snd_pcm tpm_crb snd_timer dw_dmac 
snd
dw_dmac_core soundcore rfkill_gpio mac_hid spi_pxa2xx_platform
i2c_designware_platform i2c_designware_core pwm_lpss_platform snd_soc_sst_acpi
8250_dw pwm_lpss iptable_filter ip_tables ip6table_filter ip6_tables x_tables 
autofs4
sdhci_pci virtio_scsi ahci libahci mmc_block i915 i2c_algo_bit drm_kms_helper
syscopyarea sysfillrect sysimgblt fb_sys_fops usb_storage drm wmi video 
sdhci_acpi
sdhci [last unloaded: st_pressure]
Apr 27 18:11:21 caracalla kernel: [ 8417.917926] CPU: 0 PID: 15523 Comm: 
modprobe Not
tainted 4.4.0-77-generic #98-Ubuntu
Apr 27 18:11:21 caracalla kernel: [ 8417.926686] Hardware name: Dell Inc. Edge
Gateway 3003/ , BIOS 00.00.28 03/23/2017
Apr 27 18:11:21 caracalla kernel: [ 8417.935739] task: 88007521a640 ti:
88007639c000 task.ti: 88007639c000
Apr 27 18:11:21 caracalla kernel: [ 8417.944202] RIP: 0010:[]
[] st_sensors_init_sensor+0x26/0x1f0 [st_sensors]
Apr 27 18:11:21 caracalla kernel: [ 8417.955627] RSP: 0018:88007639fac8 
EFLAGS:
00010206
Apr 27 18:11:21 caracalla kernel: [ 8417.961632] RAX: c0328640 RBX:
880076832800 RCX: 0003
Apr 27 18:11:21 caracalla kernel: [ 8417.969702] RDX: 8800371c9820 RSI:
c0377c48 RDI: 880076832800
Apr 27 18:11:21 caracalla kernel: [ 8417.92] RBP: 88007639fad0 R08:
88007639c000 R09: 
Apr 27 18:11:21 caracalla kernel: [ 8417.985841] R10: 00b6 R11:
 R12: 
Apr 27 18:11:21 caracalla kernel: [ 8417.993911] R13: 0003 R14:
880076832cc0 R15: c03730a0
Apr 27 18:11:21 caracalla kernel: [ 8418.001982] FS: 7f71faa83700()
GS:880070a0() knlGS:
Apr 27 18:11:21 caracalla kernel: [ 8418.011133] CS: 0010 DS:  ES:  CR0:
80050033
Apr 27 18:11:21 caracalla kernel: [ 8418.017629] CR2: c0377c48 CR3:
74d7d000 CR4: 001006f0
Apr 27 18:11:21 caracalla kernel: [ 8418.025698] Stack:
Apr 27 18:11:21 caracalla kernel: [ 8418.027967] 880076832800 
88007639faf8
c03270ea 880076832800
Apr 27 18:11:21 caracalla kernel: [ 8418.036363] 8800371c9800 
8800371c9820
88007639fb30 c037207d
Apr 27 18:11:21 caracalla kernel: [ 8418.044757] c03730a0 
8800371c9820
8800371c9800 c0372020
Apr 27 18:11:21 caracalla kernel: [ 8418.053152] Call Trace:
Apr 

RE: [PATCH v2 1/2] ACPICA: Tables: Fix regression introduced by a too early mechanism enabling

2017-04-27 Thread Zheng, Lv
Hi, Rafael

I reconsidered your comments.
Seems there are several problems you might not be aware of.
Let me reply again.

> From: linux-acpi-ow...@vger.kernel.org 
> [mailto:linux-acpi-ow...@vger.kernel.org] On Behalf Of Rafael J.
> Wysocki
> Subject: Re: [PATCH v2 1/2] ACPICA: Tables: Fix regression introduced by a 
> too early mechanism
> enabling
> 
> On Thursday, April 27, 2017 04:22:44 PM Lv Zheng wrote:
> > In the Linux kernel side, acpi_get_table() clones haven't been fully
> > balanced by acpi_put_table() invocations. In ACPICA side, due to the design
> > change, there are also unbalanced acpi_get_table_by_index() invocations
> > requiring special care to be cleaned up.
> >
> > So it is not a good timing to report acpi_get_table() counting errors for
> > this period. The strict balanced validation count check should only be
> > enabled after confirming that all invocations are safe and compliant to
> > their designed purposes.
> >
> > Thus this patch removes the fatal error along with lthe error report to
> > fix this issue. Reported by Dan Williams, fixed by Lv Zheng.
> >
> > Fixes: 174cc7187e6f ("ACPICA: Tables: Back port acpi_get_table_with_size() 
> > and
> early_acpi_os_unmap_memory() from Linux kernel")
> > Reported-by: Dan Williams 
> > Signed-off-by: Lv Zheng 
> 
> Please do not add #if 0 anywhere to the kernel code base.
> 
> If you need to drop some piece of code, just drop it.

OK. This comment is addressable.

> 
> And in this particular case validation_count should be dropped from the data
> type definition too.

I don't think this comment is addressable.

===
The validation_count is not only used for the late stage, it's also used
for the early boot stage. And during the early boot stage, I'm sure the
validation count is used by us now to correctly unmap early maps. If it is
dropped, the early maps will be leaked to the late stage, leading to some
kernel crashes.

The use case has already been validated to be working in previous Linux
release cycles, also with you, Dan and the other driver maintainers' help.
All crashing early cases are identified and the validation counts are
ensured to be balanced during the early stage.
===
Then I thought again if you did have some special concerns for using this
mechanism in the late stage without making the validation count balanced.
The only case we need to worry about is when acpi_get_table() is invoked
more than 65535 times, then more than 65535 acpi_put_table() could
potentially release the table mapping while there are still users using
the mapped table, this could cause kernel crashes.

What does this problem mean to us? It mean: For all frequently invoked
acpi_get_table(), without fixing them altogether, we SHOULD NOT add
acpi_put_table() for them.

As far as I can see, the followings are frequent acpi_get_table() invoking
cases, they need to be fixed altogether:
1. sysfs table access
2. load/unload tables, could potentially be invoked in a
   per-cpu-hotplugging frequent style.
3. get/put in some loadable kernel modules (this actually is not such
   frequent)
4. tables used across suspend/resume (I checked, FACS used in this case is
   safe)

I began to doubt the correctness of fixing only the sysfs case without
fixing all the other "frequent acpi_get_table() cases". It seems fixing it
only adds unwanted acpi_put_table() during the special period, and this
actually breaks the planned way, and can potentially break the end users.
===
Then I checked how we could make Load/Unload invocations balanced inside of
ACPICA. It looks we have the following things that must be done to achieve
the balanced validation count inside of ACPICA:
1. There are many "table handler" invocations, we need a single place to
   invoke get/put before/after invoking the handler.
2. There is an acpi_tb_set_table_loaded_flag() API, we need to invoke
   get/put inside of it when the flag is set/unset.
3. For all other acpi_get_table() clone invocations, we need to add
   balanced put(s) to the get(s) except those pinned as global maps.

Then we'll soon figure out that we need to do the following prerequisites:
1. Collect table handlers into 1 function;
2. Should stop invoking acpi_ns_load_table() from other places, but only
   invoke it from acpi_tb_load_table();
3. After changing due to 1 and 2, we need to make sure the table mutex is
   still correctly used;
4. After changing due to 2 and 3, we actually extends some sanity checks
   to the processes originally not covered, and we need to make sure the
   changed sanity checks is still working.
So you'll see the entire work must be based on what Hans De Geode is
asking for: "https://github.com/acpica/acpica/pull/121;. This pull request
is still pending for 

RE: [PATCH v2 1/2] ACPICA: Tables: Fix regression introduced by a too early mechanism enabling

2017-04-27 Thread Zheng, Lv
Hi, Rafael

I reconsidered your comments.
Seems there are several problems you might not be aware of.
Let me reply again.

> From: linux-acpi-ow...@vger.kernel.org 
> [mailto:linux-acpi-ow...@vger.kernel.org] On Behalf Of Rafael J.
> Wysocki
> Subject: Re: [PATCH v2 1/2] ACPICA: Tables: Fix regression introduced by a 
> too early mechanism
> enabling
> 
> On Thursday, April 27, 2017 04:22:44 PM Lv Zheng wrote:
> > In the Linux kernel side, acpi_get_table() clones haven't been fully
> > balanced by acpi_put_table() invocations. In ACPICA side, due to the design
> > change, there are also unbalanced acpi_get_table_by_index() invocations
> > requiring special care to be cleaned up.
> >
> > So it is not a good timing to report acpi_get_table() counting errors for
> > this period. The strict balanced validation count check should only be
> > enabled after confirming that all invocations are safe and compliant to
> > their designed purposes.
> >
> > Thus this patch removes the fatal error along with lthe error report to
> > fix this issue. Reported by Dan Williams, fixed by Lv Zheng.
> >
> > Fixes: 174cc7187e6f ("ACPICA: Tables: Back port acpi_get_table_with_size() 
> > and
> early_acpi_os_unmap_memory() from Linux kernel")
> > Reported-by: Dan Williams 
> > Signed-off-by: Lv Zheng 
> 
> Please do not add #if 0 anywhere to the kernel code base.
> 
> If you need to drop some piece of code, just drop it.

OK. This comment is addressable.

> 
> And in this particular case validation_count should be dropped from the data
> type definition too.

I don't think this comment is addressable.

===
The validation_count is not only used for the late stage, it's also used
for the early boot stage. And during the early boot stage, I'm sure the
validation count is used by us now to correctly unmap early maps. If it is
dropped, the early maps will be leaked to the late stage, leading to some
kernel crashes.

The use case has already been validated to be working in previous Linux
release cycles, also with you, Dan and the other driver maintainers' help.
All crashing early cases are identified and the validation counts are
ensured to be balanced during the early stage.
===
Then I thought again if you did have some special concerns for using this
mechanism in the late stage without making the validation count balanced.
The only case we need to worry about is when acpi_get_table() is invoked
more than 65535 times, then more than 65535 acpi_put_table() could
potentially release the table mapping while there are still users using
the mapped table, this could cause kernel crashes.

What does this problem mean to us? It mean: For all frequently invoked
acpi_get_table(), without fixing them altogether, we SHOULD NOT add
acpi_put_table() for them.

As far as I can see, the followings are frequent acpi_get_table() invoking
cases, they need to be fixed altogether:
1. sysfs table access
2. load/unload tables, could potentially be invoked in a
   per-cpu-hotplugging frequent style.
3. get/put in some loadable kernel modules (this actually is not such
   frequent)
4. tables used across suspend/resume (I checked, FACS used in this case is
   safe)

I began to doubt the correctness of fixing only the sysfs case without
fixing all the other "frequent acpi_get_table() cases". It seems fixing it
only adds unwanted acpi_put_table() during the special period, and this
actually breaks the planned way, and can potentially break the end users.
===
Then I checked how we could make Load/Unload invocations balanced inside of
ACPICA. It looks we have the following things that must be done to achieve
the balanced validation count inside of ACPICA:
1. There are many "table handler" invocations, we need a single place to
   invoke get/put before/after invoking the handler.
2. There is an acpi_tb_set_table_loaded_flag() API, we need to invoke
   get/put inside of it when the flag is set/unset.
3. For all other acpi_get_table() clone invocations, we need to add
   balanced put(s) to the get(s) except those pinned as global maps.

Then we'll soon figure out that we need to do the following prerequisites:
1. Collect table handlers into 1 function;
2. Should stop invoking acpi_ns_load_table() from other places, but only
   invoke it from acpi_tb_load_table();
3. After changing due to 1 and 2, we need to make sure the table mutex is
   still correctly used;
4. After changing due to 2 and 3, we actually extends some sanity checks
   to the processes originally not covered, and we need to make sure the
   changed sanity checks is still working.
So you'll see the entire work must be based on what Hans De Geode is
asking for: "https://github.com/acpica/acpica/pull/121;. This pull request
is still pending for further Windows behavior validation for how 

[rhashtable] 6d684e5469: Kernel panic - not syncing: rtnetlink_init: cannot initialize rtnetlink

2017-04-27 Thread kernel test robot
Greetings,

0day kernel testing robot got the below dmesg and the first bad commit is

https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git master

commit 6d684e54690caef45cf14051ddeb7c71beeb681b
Author: Herbert Xu 
AuthorDate: Thu Apr 27 13:44:51 2017 +0800
Commit: David S. Miller 
CommitDate: Thu Apr 27 11:48:24 2017 -0400

rhashtable: Cap total number of entries to 2^31

When max_size is not set or if it set to a sufficiently large
value, the nelems counter can overflow.  This would cause havoc
with the automatic shrinking as it would then attempt to fit a
huge number of entries into a tiny hash table.

This patch fixes this by adding max_elems to struct rhashtable
to cap the number of elements.  This is set to 2^31 as nelems is
not a precise count.  This is sufficiently smaller than UINT_MAX
that it should be safe.

When max_size is set max_elems will be lowered to at most twice
max_size as is the status quo.

Signed-off-by: Herbert Xu 
Signed-off-by: David S. Miller 

4b726e81da  tcp: tcp_rack_reo_timeout() must update tp->tcp_mstamp
6d684e5469  rhashtable: Cap total number of entries to 2^31
e221c1f0fe  Merge tag 'wireless-drivers-next-for-davem-2017-04-27' of 
git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next
+-++++
| | 
4b726e81da | 6d684e5469 | e221c1f0fe |
+-++++
| boot_successes  | 35  
   | 0  | 0  |
| boot_failures   | 0   
   | 13 | 13 |
| Kernel_panic-not_syncing:rtnetlink_init:cannot_initialize_rtnetlink | 0   
   | 13 | 13 |
| BUG:kernel_in_stage | 0   
   | 1  | 1  |
+-++++

[0.106834] futex hash table entries: 256 (order: 3, 32768 bytes)
[0.148019] xor: automatically using best checksumming function   avx   
[0.148019] xor: automatically using best checksumming function   avx   
[0.152501] NET: Registered protocol family 16
[0.152501] NET: Registered protocol family 16
[0.157635] Kernel panic - not syncing: rtnetlink_init: cannot initialize 
rtnetlink
[0.157635] 
[0.157635] Kernel panic - not syncing: rtnetlink_init: cannot initialize 
rtnetlink
[0.157635] 
[0.162789] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 
4.11.0-rc8-02028-g6d684e5 #1
[0.162789] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 
4.11.0-rc8-02028-g6d684e5 #1
[0.166143] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.9.3-20161025_171302-gandalf 04/01/2014
[0.166143] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.9.3-20161025_171302-gandalf 04/01/2014
[0.169615] Call Trace:
[0.169615] Call Trace:
[0.170458]  show_stack+0x72/0x76
[0.170458]  show_stack+0x72/0x76
[0.171559]  dump_stack+0x10c/0x181
[0.171559]  dump_stack+0x10c/0x181
[0.172753]  ? __mutex_unlock_slowpath+0x171/0x34a
[0.172753]  ? __mutex_unlock_slowpath+0x171/0x34a
[0.174332]  panic+0x121/0x329
[0.174332]  panic+0x121/0x329
[0.175385]  rtnetlink_init+0x39/0x1b7
[0.175385]  rtnetlink_init+0x39/0x1b7
[0.176784]  netlink_proto_init+0x1bc/0x1da
[0.176784]  netlink_proto_init+0x1bc/0x1da
[0.178399]  ? net_ns_init+0x19c/0x19c
[0.178399]  ? net_ns_init+0x19c/0x19c
[0.180171]  ? eth_offload_init+0x14/0x14
[0.180171]  ? eth_offload_init+0x14/0x14
[0.182045]  do_one_initcall+0xa2/0x1cc
[0.182045]  do_one_initcall+0xa2/0x1cc
[0.183789]  do_basic_setup+0x98/0xb8
[0.183789]  do_basic_setup+0x98/0xb8
[0.185369]  kernel_init_freeable+0x92/0x147
[0.185369]  kernel_init_freeable+0x92/0x147
[0.187076]  ? rest_init+0x170/0x170
[0.187076]  ? rest_init+0x170/0x170
[0.188694]  kernel_init+0xe/0x171
[0.188694]  kernel_init+0xe/0x171
[0.190235]  ret_from_fork+0x31/0x40
[0.190235]  ret_from_fork+0x31/0x40


  # HH:MM RESULT GOOD 
BAD GOOD_BUT_DIRTY DIRTY_NOT_BAD
git bisect start d1c34825a799a7ba4693bf46fbc79238aed374e6 
5a7ad1146caa895ad718a534399e38bd2ba721b7 --
git bisect good c6616d396b8b3e3c55cc66b24652a63108ea98dc  # 06:18  G 11 
00   0  Merge 
'linux-review/Jerry-Snitselaar/tpm-print-tpm-error-code-in-hex-for-TPM2-0-devices/20170428-043331'
 into devel-catchup-201704280512
git bisect good 

[rhashtable] 6d684e5469: Kernel panic - not syncing: rtnetlink_init: cannot initialize rtnetlink

2017-04-27 Thread kernel test robot
Greetings,

0day kernel testing robot got the below dmesg and the first bad commit is

https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git master

commit 6d684e54690caef45cf14051ddeb7c71beeb681b
Author: Herbert Xu 
AuthorDate: Thu Apr 27 13:44:51 2017 +0800
Commit: David S. Miller 
CommitDate: Thu Apr 27 11:48:24 2017 -0400

rhashtable: Cap total number of entries to 2^31

When max_size is not set or if it set to a sufficiently large
value, the nelems counter can overflow.  This would cause havoc
with the automatic shrinking as it would then attempt to fit a
huge number of entries into a tiny hash table.

This patch fixes this by adding max_elems to struct rhashtable
to cap the number of elements.  This is set to 2^31 as nelems is
not a precise count.  This is sufficiently smaller than UINT_MAX
that it should be safe.

When max_size is set max_elems will be lowered to at most twice
max_size as is the status quo.

Signed-off-by: Herbert Xu 
Signed-off-by: David S. Miller 

4b726e81da  tcp: tcp_rack_reo_timeout() must update tp->tcp_mstamp
6d684e5469  rhashtable: Cap total number of entries to 2^31
e221c1f0fe  Merge tag 'wireless-drivers-next-for-davem-2017-04-27' of 
git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next
+-++++
| | 
4b726e81da | 6d684e5469 | e221c1f0fe |
+-++++
| boot_successes  | 35  
   | 0  | 0  |
| boot_failures   | 0   
   | 13 | 13 |
| Kernel_panic-not_syncing:rtnetlink_init:cannot_initialize_rtnetlink | 0   
   | 13 | 13 |
| BUG:kernel_in_stage | 0   
   | 1  | 1  |
+-++++

[0.106834] futex hash table entries: 256 (order: 3, 32768 bytes)
[0.148019] xor: automatically using best checksumming function   avx   
[0.148019] xor: automatically using best checksumming function   avx   
[0.152501] NET: Registered protocol family 16
[0.152501] NET: Registered protocol family 16
[0.157635] Kernel panic - not syncing: rtnetlink_init: cannot initialize 
rtnetlink
[0.157635] 
[0.157635] Kernel panic - not syncing: rtnetlink_init: cannot initialize 
rtnetlink
[0.157635] 
[0.162789] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 
4.11.0-rc8-02028-g6d684e5 #1
[0.162789] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 
4.11.0-rc8-02028-g6d684e5 #1
[0.166143] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.9.3-20161025_171302-gandalf 04/01/2014
[0.166143] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.9.3-20161025_171302-gandalf 04/01/2014
[0.169615] Call Trace:
[0.169615] Call Trace:
[0.170458]  show_stack+0x72/0x76
[0.170458]  show_stack+0x72/0x76
[0.171559]  dump_stack+0x10c/0x181
[0.171559]  dump_stack+0x10c/0x181
[0.172753]  ? __mutex_unlock_slowpath+0x171/0x34a
[0.172753]  ? __mutex_unlock_slowpath+0x171/0x34a
[0.174332]  panic+0x121/0x329
[0.174332]  panic+0x121/0x329
[0.175385]  rtnetlink_init+0x39/0x1b7
[0.175385]  rtnetlink_init+0x39/0x1b7
[0.176784]  netlink_proto_init+0x1bc/0x1da
[0.176784]  netlink_proto_init+0x1bc/0x1da
[0.178399]  ? net_ns_init+0x19c/0x19c
[0.178399]  ? net_ns_init+0x19c/0x19c
[0.180171]  ? eth_offload_init+0x14/0x14
[0.180171]  ? eth_offload_init+0x14/0x14
[0.182045]  do_one_initcall+0xa2/0x1cc
[0.182045]  do_one_initcall+0xa2/0x1cc
[0.183789]  do_basic_setup+0x98/0xb8
[0.183789]  do_basic_setup+0x98/0xb8
[0.185369]  kernel_init_freeable+0x92/0x147
[0.185369]  kernel_init_freeable+0x92/0x147
[0.187076]  ? rest_init+0x170/0x170
[0.187076]  ? rest_init+0x170/0x170
[0.188694]  kernel_init+0xe/0x171
[0.188694]  kernel_init+0xe/0x171
[0.190235]  ret_from_fork+0x31/0x40
[0.190235]  ret_from_fork+0x31/0x40


  # HH:MM RESULT GOOD 
BAD GOOD_BUT_DIRTY DIRTY_NOT_BAD
git bisect start d1c34825a799a7ba4693bf46fbc79238aed374e6 
5a7ad1146caa895ad718a534399e38bd2ba721b7 --
git bisect good c6616d396b8b3e3c55cc66b24652a63108ea98dc  # 06:18  G 11 
00   0  Merge 
'linux-review/Jerry-Snitselaar/tpm-print-tpm-error-code-in-hex-for-TPM2-0-devices/20170428-043331'
 into devel-catchup-201704280512
git bisect good b25b2abcfd0401937550e1d1806d83d796f1a043  # 06:50  G 10 
02   2  Merge 'iio/fixes-togreg' into 

Re: [PATCH v6 2/5] firmware: add extensible driver data API

2017-04-27 Thread AKASHI Takahiro
Luis,

On Fri, Apr 28, 2017 at 02:51:44AM +0200, Luis R. Rodriguez wrote:
> On Thu, Apr 13, 2017 at 06:36:17PM +0900, AKASHI Takahiro wrote:
> > On Wed, Mar 29, 2017 at 08:25:11PM -0700, Luis R. Rodriguez wrote:
> > > Signed-off-by: Luis R. Rodriguez 
> > > ---
> > >  Documentation/driver-api/firmware/driver_data.rst  |  77 +
> > >  Documentation/driver-api/firmware/index.rst|   1 +
> > >  Documentation/driver-api/firmware/introduction.rst |  16 +
> > 
> > I think we'd better to split code and documents into different patches
> > for easier reviews.
> 
> Sure, done.
> 
> > > --- a/Documentation/driver-api/firmware/introduction.rst
> > > +++ b/Documentation/driver-api/firmware/introduction.rst
> > > @@ -25,3 +25,19 @@ are already using asynchronous initialization 
> > > mechanisms which will not
> > >  stall or delay boot. Even if loading firmware does not take a lot of time
> > >  processing firmware might, and this can still delay boot or 
> > > initialization,
> > >  as such mechanisms such as asynchronous probe can help supplement 
> > > drivers.
> > > +
> > > +Two APIs
> > > +
> > > +
> > > +Two APIs are provided for firmware:
> > > +
> > > +* request_firmware API - old firmware API
> > > +* driver_data API - flexible API
> > 
> > You can add links:
> > 
> >   * `request_firmware API`_ - old firmware API
> >   * `driver_data API`_ - flexible API
> > 
> >   .. _`request_firmware API`: ./request_firmware.rst
> >   .. _`driver_data API`: ./driver_data.rst
> 
> Done!
> 
> > > +int driver_data_request_sync(const char *name,
> > > +  const struct driver_data_req_params *req_params,
> > > +  struct device *device)
> > > +{
> > > + const struct firmware *driver_data;
> > > + const struct driver_data_reqs *sync_reqs;
> > > + struct driver_data_params params = {
> > > + .req_params = *req_params,
> > > + };
> > > + int ret;
> > > +
> > > + if (!device || !req_params || !name || name[0] == '\0')
> > > + return -EINVAL;
> > > +
> > > + if (req_params->sync_reqs.mode != DRIVER_DATA_SYNC)
> > > + return -EINVAL;
> > > +
> > > + if (driver_data_sync_opt_cb(req_params) &&
> > > + !driver_data_param_optional(req_params))
> > > + return -EINVAL;
> > > +
> > > + sync_reqs = _sync_reqs;
> > > +
> > > + __module_get(sync_reqs->module);
> > > + get_device(device);
> > > +
> > > + ret = _request_firmware(_data, name, , device);
> > > + if (ret && driver_data_param_optional(req_params))
> > > + ret = driver_data_sync_opt_call_cb(req_params);
> > > + else
> > > + ret = driver_data_sync_call_cb(req_params, driver_data);
> > 
> > It looks a bit weird to me that a failure callback is called
> > only if "optional" is set. I think that it makes more sense
> > that a failure callback is always called if _request_firmware() fails.
> 
> Let's think about this: does it make sense for the there to be a callback
> if the file was not optional ? Keep in mind the optional flag has its own
> semantics, it affects printing on error, on file not found. The semantics
> of the "optional callback" is precisely defined for when the first file
> is optional, so its by definition.
> 
> If we were to not require optional then it would be more of a "failure 
> callback",
> as you put it, but then folks could be stuffing these with all their error
> paths, and that's not what this is for. The optional callback is to handle
> an alternative *viable* approach *iff* the first file we look for is not 
> found.

In sync case, I don't think we have a strong reason to have a callback
as we can do anything depending on a return value from _request_firmware().
The only merit would be that we could release buffers automatically?

In async case, I think that we should have a callback whether asynchronous
loader has succeeded or failed in order to know the result.
It will never be "optional" even on failure.

> > In addition, why not always return a return value of _request_firmare()?
> > Overwriting a return value by any of callback functions doesn't make sense,
> > particularly, in "sync" case.
> > One of the problems in this implementation is that we, drivers, have
> > no chance to know a return value of _request_firmware().
> 
> Ah, good point, well, we can pass it on the optional callback then, this
> way no information is lost.
> 
> Thoughts?

Depends on the discussion above.

Thanks,
-Takahiro AKASHI

> > For example, if the signature verification, which I'm now working on, fails,
> > ENOKEY or EKEYxxx will be returns. We may want to say more detailed
> > error messages depending on error code.
> 
> Makes sense. If the above suffices let me know.
> 
> > >  struct driver_data_req_params {
> > >   bool optional;
> > > + bool keep;
> > > + bool uses_api_versioning;
> > 
> > Do you have any reason that you don't use bit fields here?
> > More features are added, more 'boolean' are added.
> > (I mean it wastes memory.)
> 
> You're 

Re: [PATCH v6 2/5] firmware: add extensible driver data API

2017-04-27 Thread AKASHI Takahiro
Luis,

On Fri, Apr 28, 2017 at 02:51:44AM +0200, Luis R. Rodriguez wrote:
> On Thu, Apr 13, 2017 at 06:36:17PM +0900, AKASHI Takahiro wrote:
> > On Wed, Mar 29, 2017 at 08:25:11PM -0700, Luis R. Rodriguez wrote:
> > > Signed-off-by: Luis R. Rodriguez 
> > > ---
> > >  Documentation/driver-api/firmware/driver_data.rst  |  77 +
> > >  Documentation/driver-api/firmware/index.rst|   1 +
> > >  Documentation/driver-api/firmware/introduction.rst |  16 +
> > 
> > I think we'd better to split code and documents into different patches
> > for easier reviews.
> 
> Sure, done.
> 
> > > --- a/Documentation/driver-api/firmware/introduction.rst
> > > +++ b/Documentation/driver-api/firmware/introduction.rst
> > > @@ -25,3 +25,19 @@ are already using asynchronous initialization 
> > > mechanisms which will not
> > >  stall or delay boot. Even if loading firmware does not take a lot of time
> > >  processing firmware might, and this can still delay boot or 
> > > initialization,
> > >  as such mechanisms such as asynchronous probe can help supplement 
> > > drivers.
> > > +
> > > +Two APIs
> > > +
> > > +
> > > +Two APIs are provided for firmware:
> > > +
> > > +* request_firmware API - old firmware API
> > > +* driver_data API - flexible API
> > 
> > You can add links:
> > 
> >   * `request_firmware API`_ - old firmware API
> >   * `driver_data API`_ - flexible API
> > 
> >   .. _`request_firmware API`: ./request_firmware.rst
> >   .. _`driver_data API`: ./driver_data.rst
> 
> Done!
> 
> > > +int driver_data_request_sync(const char *name,
> > > +  const struct driver_data_req_params *req_params,
> > > +  struct device *device)
> > > +{
> > > + const struct firmware *driver_data;
> > > + const struct driver_data_reqs *sync_reqs;
> > > + struct driver_data_params params = {
> > > + .req_params = *req_params,
> > > + };
> > > + int ret;
> > > +
> > > + if (!device || !req_params || !name || name[0] == '\0')
> > > + return -EINVAL;
> > > +
> > > + if (req_params->sync_reqs.mode != DRIVER_DATA_SYNC)
> > > + return -EINVAL;
> > > +
> > > + if (driver_data_sync_opt_cb(req_params) &&
> > > + !driver_data_param_optional(req_params))
> > > + return -EINVAL;
> > > +
> > > + sync_reqs = _sync_reqs;
> > > +
> > > + __module_get(sync_reqs->module);
> > > + get_device(device);
> > > +
> > > + ret = _request_firmware(_data, name, , device);
> > > + if (ret && driver_data_param_optional(req_params))
> > > + ret = driver_data_sync_opt_call_cb(req_params);
> > > + else
> > > + ret = driver_data_sync_call_cb(req_params, driver_data);
> > 
> > It looks a bit weird to me that a failure callback is called
> > only if "optional" is set. I think that it makes more sense
> > that a failure callback is always called if _request_firmware() fails.
> 
> Let's think about this: does it make sense for the there to be a callback
> if the file was not optional ? Keep in mind the optional flag has its own
> semantics, it affects printing on error, on file not found. The semantics
> of the "optional callback" is precisely defined for when the first file
> is optional, so its by definition.
> 
> If we were to not require optional then it would be more of a "failure 
> callback",
> as you put it, but then folks could be stuffing these with all their error
> paths, and that's not what this is for. The optional callback is to handle
> an alternative *viable* approach *iff* the first file we look for is not 
> found.

In sync case, I don't think we have a strong reason to have a callback
as we can do anything depending on a return value from _request_firmware().
The only merit would be that we could release buffers automatically?

In async case, I think that we should have a callback whether asynchronous
loader has succeeded or failed in order to know the result.
It will never be "optional" even on failure.

> > In addition, why not always return a return value of _request_firmare()?
> > Overwriting a return value by any of callback functions doesn't make sense,
> > particularly, in "sync" case.
> > One of the problems in this implementation is that we, drivers, have
> > no chance to know a return value of _request_firmware().
> 
> Ah, good point, well, we can pass it on the optional callback then, this
> way no information is lost.
> 
> Thoughts?

Depends on the discussion above.

Thanks,
-Takahiro AKASHI

> > For example, if the signature verification, which I'm now working on, fails,
> > ENOKEY or EKEYxxx will be returns. We may want to say more detailed
> > error messages depending on error code.
> 
> Makes sense. If the above suffices let me know.
> 
> > >  struct driver_data_req_params {
> > >   bool optional;
> > > + bool keep;
> > > + bool uses_api_versioning;
> > 
> > Do you have any reason that you don't use bit fields here?
> > More features are added, more 'boolean' are added.
> > (I mean it wastes memory.)
> 
> You're right, will fold 

Re: [PATCH] pstore: Solve lockdep warning by moving inode locks

2017-04-27 Thread Namhyung Kim
Hi,

On Fri, Apr 28, 2017 at 8:20 AM, Kees Cook  wrote:
> Lockdep complains about a possible deadlock between mount and unlink
> (which is technically impossible), but fixing this improves possible
> future multiple-backend support, and keeps locking in the right order.
>
> The lockdep warning could be triggered by unlinking a file in the
> pstore filesystem:
>
>   -> #1 (>s_type->i_mutex_key#14){++}:
>  lock_acquire+0xc9/0x220
>  down_write+0x3f/0x70
>  pstore_mkfile+0x1f4/0x460
>  pstore_get_records+0x17a/0x320
>  pstore_fill_super+0xa4/0xc0
>  mount_single+0x89/0xb0
>  pstore_mount+0x13/0x20
>  mount_fs+0xf/0x90
>  vfs_kern_mount+0x66/0x170
>  do_mount+0x190/0xd50
>  SyS_mount+0x90/0xd0
>  entry_SYSCALL_64_fastpath+0x1c/0xb1
>
>   -> #0 (>read_mutex){+.+.+.}:
>  __lock_acquire+0x1ac0/0x1bb0
>  lock_acquire+0xc9/0x220
>  __mutex_lock+0x6e/0x990
>  mutex_lock_nested+0x16/0x20
>  pstore_unlink+0x3f/0xa0
>  vfs_unlink+0xb5/0x190
>  do_unlinkat+0x24c/0x2a0
>  SyS_unlinkat+0x16/0x30
>  entry_SYSCALL_64_fastpath+0x1c/0xb1
>
>   Possible unsafe locking scenario:
>
> CPU0CPU1
> 
>lock(>s_type->i_mutex_key#14);
> lock(>read_mutex);
> lock(>s_type->i_mutex_key#14);
>lock(>read_mutex);
>
> Reported-by: Marta Lofstedt 
> Reported-by: Chris Wilson 
> Signed-off-by: Kees Cook 

Looks good to me!

Acked-by: Namhyung Kim 

Thanks,
Namhyung


> ---
>  fs/pstore/inode.c| 37 +++--
>  fs/pstore/internal.h |  5 -
>  fs/pstore/platform.c | 10 +-
>  3 files changed, 36 insertions(+), 16 deletions(-)
>
> diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
> index 06504b69575b..792a4e5f9226 100644
> --- a/fs/pstore/inode.c
> +++ b/fs/pstore/inode.c
> @@ -311,9 +311,8 @@ bool pstore_is_mounted(void)
>   * Load it up with "size" bytes of data from "buf".
>   * Set the mtime & ctime to the date that this record was originally stored.
>   */
> -int pstore_mkfile(struct pstore_record *record)
> +int pstore_mkfile(struct dentry *root, struct pstore_record *record)
>  {
> -   struct dentry   *root = pstore_sb->s_root;
> struct dentry   *dentry;
> struct inode*inode;
> int rc = 0;
> @@ -322,6 +321,8 @@ int pstore_mkfile(struct pstore_record *record)
> unsigned long   flags;
> size_t  size = record->size + record->ecc_notice_size;
>
> +   WARN_ON(!inode_is_locked(d_inode(root)));
> +
> spin_lock_irqsave(_lock, flags);
> list_for_each_entry(pos, , list) {
> if (pos->record->type == record->type &&
> @@ -336,7 +337,7 @@ int pstore_mkfile(struct pstore_record *record)
> return rc;
>
> rc = -ENOMEM;
> -   inode = pstore_get_inode(pstore_sb);
> +   inode = pstore_get_inode(root->d_sb);
> if (!inode)
> goto fail;
> inode->i_mode = S_IFREG | 0444;
> @@ -394,11 +395,9 @@ int pstore_mkfile(struct pstore_record *record)
> break;
> }
>
> -   inode_lock(d_inode(root));
> -
> dentry = d_alloc_name(root, name);
> if (!dentry)
> -   goto fail_lockedalloc;
> +   goto fail_private;
>
> inode->i_size = private->total_size = size;
>
> @@ -413,12 +412,9 @@ int pstore_mkfile(struct pstore_record *record)
> list_add(>list, );
> spin_unlock_irqrestore(_lock, flags);
>
> -   inode_unlock(d_inode(root));
> -
> return 0;
>
> -fail_lockedalloc:
> -   inode_unlock(d_inode(root));
> +fail_private:
> free_pstore_private(private);
>  fail_alloc:
> iput(inode);
> @@ -427,6 +423,27 @@ int pstore_mkfile(struct pstore_record *record)
> return rc;
>  }
>
> +/*
> + * Read all the records from the persistent store. Create
> + * files in our filesystem.  Don't warn about -EEXIST errors
> + * when we are re-scanning the backing store looking to add new
> + * error records.
> + */
> +void pstore_get_records(int quiet)
> +{
> +   struct pstore_info *psi = psinfo;
> +   struct dentry *root;
> +
> +   if (!psi || !pstore_sb)
> +   return;
> +
> +   root = pstore_sb->s_root;
> +
> +   inode_lock(d_inode(root));
> +   pstore_get_backend_records(psi, root, quiet);
> +   inode_unlock(d_inode(root));
> +}
> +
>  static int pstore_fill_super(struct super_block *sb, void *data, int silent)
>  {
> struct inode *inode;
> diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
> index af1df5a36d86..c416e653dc4f 100644
> --- 

Re: [PATCH] pstore: Solve lockdep warning by moving inode locks

2017-04-27 Thread Namhyung Kim
Hi,

On Fri, Apr 28, 2017 at 8:20 AM, Kees Cook  wrote:
> Lockdep complains about a possible deadlock between mount and unlink
> (which is technically impossible), but fixing this improves possible
> future multiple-backend support, and keeps locking in the right order.
>
> The lockdep warning could be triggered by unlinking a file in the
> pstore filesystem:
>
>   -> #1 (>s_type->i_mutex_key#14){++}:
>  lock_acquire+0xc9/0x220
>  down_write+0x3f/0x70
>  pstore_mkfile+0x1f4/0x460
>  pstore_get_records+0x17a/0x320
>  pstore_fill_super+0xa4/0xc0
>  mount_single+0x89/0xb0
>  pstore_mount+0x13/0x20
>  mount_fs+0xf/0x90
>  vfs_kern_mount+0x66/0x170
>  do_mount+0x190/0xd50
>  SyS_mount+0x90/0xd0
>  entry_SYSCALL_64_fastpath+0x1c/0xb1
>
>   -> #0 (>read_mutex){+.+.+.}:
>  __lock_acquire+0x1ac0/0x1bb0
>  lock_acquire+0xc9/0x220
>  __mutex_lock+0x6e/0x990
>  mutex_lock_nested+0x16/0x20
>  pstore_unlink+0x3f/0xa0
>  vfs_unlink+0xb5/0x190
>  do_unlinkat+0x24c/0x2a0
>  SyS_unlinkat+0x16/0x30
>  entry_SYSCALL_64_fastpath+0x1c/0xb1
>
>   Possible unsafe locking scenario:
>
> CPU0CPU1
> 
>lock(>s_type->i_mutex_key#14);
> lock(>read_mutex);
> lock(>s_type->i_mutex_key#14);
>lock(>read_mutex);
>
> Reported-by: Marta Lofstedt 
> Reported-by: Chris Wilson 
> Signed-off-by: Kees Cook 

Looks good to me!

Acked-by: Namhyung Kim 

Thanks,
Namhyung


> ---
>  fs/pstore/inode.c| 37 +++--
>  fs/pstore/internal.h |  5 -
>  fs/pstore/platform.c | 10 +-
>  3 files changed, 36 insertions(+), 16 deletions(-)
>
> diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
> index 06504b69575b..792a4e5f9226 100644
> --- a/fs/pstore/inode.c
> +++ b/fs/pstore/inode.c
> @@ -311,9 +311,8 @@ bool pstore_is_mounted(void)
>   * Load it up with "size" bytes of data from "buf".
>   * Set the mtime & ctime to the date that this record was originally stored.
>   */
> -int pstore_mkfile(struct pstore_record *record)
> +int pstore_mkfile(struct dentry *root, struct pstore_record *record)
>  {
> -   struct dentry   *root = pstore_sb->s_root;
> struct dentry   *dentry;
> struct inode*inode;
> int rc = 0;
> @@ -322,6 +321,8 @@ int pstore_mkfile(struct pstore_record *record)
> unsigned long   flags;
> size_t  size = record->size + record->ecc_notice_size;
>
> +   WARN_ON(!inode_is_locked(d_inode(root)));
> +
> spin_lock_irqsave(_lock, flags);
> list_for_each_entry(pos, , list) {
> if (pos->record->type == record->type &&
> @@ -336,7 +337,7 @@ int pstore_mkfile(struct pstore_record *record)
> return rc;
>
> rc = -ENOMEM;
> -   inode = pstore_get_inode(pstore_sb);
> +   inode = pstore_get_inode(root->d_sb);
> if (!inode)
> goto fail;
> inode->i_mode = S_IFREG | 0444;
> @@ -394,11 +395,9 @@ int pstore_mkfile(struct pstore_record *record)
> break;
> }
>
> -   inode_lock(d_inode(root));
> -
> dentry = d_alloc_name(root, name);
> if (!dentry)
> -   goto fail_lockedalloc;
> +   goto fail_private;
>
> inode->i_size = private->total_size = size;
>
> @@ -413,12 +412,9 @@ int pstore_mkfile(struct pstore_record *record)
> list_add(>list, );
> spin_unlock_irqrestore(_lock, flags);
>
> -   inode_unlock(d_inode(root));
> -
> return 0;
>
> -fail_lockedalloc:
> -   inode_unlock(d_inode(root));
> +fail_private:
> free_pstore_private(private);
>  fail_alloc:
> iput(inode);
> @@ -427,6 +423,27 @@ int pstore_mkfile(struct pstore_record *record)
> return rc;
>  }
>
> +/*
> + * Read all the records from the persistent store. Create
> + * files in our filesystem.  Don't warn about -EEXIST errors
> + * when we are re-scanning the backing store looking to add new
> + * error records.
> + */
> +void pstore_get_records(int quiet)
> +{
> +   struct pstore_info *psi = psinfo;
> +   struct dentry *root;
> +
> +   if (!psi || !pstore_sb)
> +   return;
> +
> +   root = pstore_sb->s_root;
> +
> +   inode_lock(d_inode(root));
> +   pstore_get_backend_records(psi, root, quiet);
> +   inode_unlock(d_inode(root));
> +}
> +
>  static int pstore_fill_super(struct super_block *sb, void *data, int silent)
>  {
> struct inode *inode;
> diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
> index af1df5a36d86..c416e653dc4f 100644
> --- a/fs/pstore/internal.h
> +++ b/fs/pstore/internal.h
> @@ -25,7 +25,10 @@ extern struct pstore_info *psinfo;
>
>  extern void

Re: perf report warnings on tracepoint events hidden by ui

2017-04-27 Thread Namhyung Kim
Hi,

On Fri, Apr 28, 2017 at 9:31 AM, David Carrillo-Cisneros
 wrote:
> When processing tracepoint events, perf report outputs warnings about
> field not founds. The warnings are usually hidden by perf report UI
> and appear when using the --stdio option. e.g.

Yep, currently perf report TUI hides warnings since there's no space
to show them.

>
>   $ perf record -e syscalls:sys_enter_mmap some_load
>
>   $ perf report --stdio
>
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field addr not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field len not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field prot not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field flags not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field fd not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field off not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field addr not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field len not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field prot not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field flags not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field fd not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field off not found
>
> I bisected the warnings to:
>   commit d49dadea78624 ("perf tools: Make 'trace' or 'trace_fields'
> sort key default for tracepoint events").
>
> that introduced sorting by trace. The event's format in my example is:
>
>   $ cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_socket/format
>
>   name: sys_enter_socket
>   ID: 1850
>   format:
>   field:unsigned short common_type;   offset:0;
> size:2; signed:0;
>   field:unsigned char common_flags;   offset:2;
> size:1; signed:0;
>   field:unsigned char common_preempt_count;   offset:3;
>size:1; signed:0;
>   field:int common_pid;   offset:4;   size:4; signed:1;
>
>   print fmt: "family: 0x%08lx, type: 0x%08lx, protocol: 0x%08lx",
> ((unsigned long)(REC->family)), ((unsigned long)(REC->type)),
> ((unsigned long)(REC->protocol))
>
>
> An appears to be correctly parsed by event_read_print in
> tools/lib/tracevent/event-parse.c . Has anyone seen this before?

But I don't see any warning when I run it with --stdio.  Could you
show me the format file of sys_enter_mmap?


Thanks,
Namhyung


Re: perf report warnings on tracepoint events hidden by ui

2017-04-27 Thread Namhyung Kim
Hi,

On Fri, Apr 28, 2017 at 9:31 AM, David Carrillo-Cisneros
 wrote:
> When processing tracepoint events, perf report outputs warnings about
> field not founds. The warnings are usually hidden by perf report UI
> and appear when using the --stdio option. e.g.

Yep, currently perf report TUI hides warnings since there's no space
to show them.

>
>   $ perf record -e syscalls:sys_enter_mmap some_load
>
>   $ perf report --stdio
>
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field addr not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field len not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field prot not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field flags not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field fd not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field off not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field addr not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field len not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field prot not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field flags not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field fd not found
>   Warning: [syscalls:sys_enter_mmap] eval_num_arg: field off not found
>
> I bisected the warnings to:
>   commit d49dadea78624 ("perf tools: Make 'trace' or 'trace_fields'
> sort key default for tracepoint events").
>
> that introduced sorting by trace. The event's format in my example is:
>
>   $ cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_socket/format
>
>   name: sys_enter_socket
>   ID: 1850
>   format:
>   field:unsigned short common_type;   offset:0;
> size:2; signed:0;
>   field:unsigned char common_flags;   offset:2;
> size:1; signed:0;
>   field:unsigned char common_preempt_count;   offset:3;
>size:1; signed:0;
>   field:int common_pid;   offset:4;   size:4; signed:1;
>
>   print fmt: "family: 0x%08lx, type: 0x%08lx, protocol: 0x%08lx",
> ((unsigned long)(REC->family)), ((unsigned long)(REC->type)),
> ((unsigned long)(REC->protocol))
>
>
> An appears to be correctly parsed by event_read_print in
> tools/lib/tracevent/event-parse.c . Has anyone seen this before?

But I don't see any warning when I run it with --stdio.  Could you
show me the format file of sys_enter_mmap?


Thanks,
Namhyung


Re: [PATCH v3 7/8] arm64: exception: handle asynchronous SError interrupt

2017-04-27 Thread Xiongfeng Wang
Hi James,

Thanks for your explanation and suggests.

On 2017/4/25 1:14, James Morse wrote:
> Hi Wang Xiongfeng,
> 
> On 21/04/17 12:33, Xiongfeng Wang wrote:
>> On 2017/4/20 16:52, James Morse wrote:
>>> On 19/04/17 03:37, Xiongfeng Wang wrote:
 On 2017/4/18 18:51, James Morse wrote:
> The host expects to receive physical SError Interrupts. The ARM-ARM 
> doesn't
> describe a way to inject these as they are generated by the CPU.
>
> Am I right in thinking you want this to use SError Interrupts as an APEI
> notification? (This isn't a CPU thing so the RAS spec doesn't cover this 
> use)

 Yes, using sei as an APEI notification is one part of my consideration. 
 Another use is for ESB.
 RAS spec 6.5.3 'Example software sequences: Variant: asynchronous External 
 Abort with ESB'
 describes the SEI recovery process when ESB is implemented.

 In this situation, SEI is routed to EL3 (SCR_EL3.EA = 1). When an SEI 
 occurs in EL0 and not been taken immediately,
 and then an ESB instruction at SVC entry is executed, SEI is taken to EL3. 
 The ESB at SVC entry is
 used for preventing the error propagating from user space to kernel space. 
 The EL3 SEI handler collects
>>>
 the errors and fills in the APEI table, and then jump to EL2 SEI handler. 
 EL2 SEI handler inject
 an vSEI into EL1 by setting HCR_EL2.VSE = 1, so that when returned to OS, 
 an SEI is pending.
>>>
>>> This step has confused me. How would this work with VHE where the host runs 
>>> at
>>> EL2 and there is nothing at Host:EL1?
>>
>> RAS spec 6.5.3 'Example software sequences: Variant: asynchronous External 
>> Abort with ESB'
>> I don't know whether the step described in the section is designed for guest 
>> os or host os or both.
>> Yes, it doesn't work with VHE where the host runs at EL2.
> 
> If it uses Virtual SError, it must be for an OS running at EL1, as the code
> running at EL2 (VHE:Linux, firmware or Xen) must set HCR_EL2.AMO to make this 
> work.
> 
> I can't find the section you describe in this RAS spec:
> https://developer.arm.com/docs/ddi0587/latest/armreliabilityavailabilityandserviceabilityrasspecificationarmv8forthearmv8aarchitectureprofile
> 
I got the RAS spec from my company's ARM account. The document number is 
PRD03-PRDC-010953 .

> 
>>> >From your description I assume you have some firmware resident at EL2.
> 
>> Our actual SEI step is planned as follows:
> 
> Ah, okay. You can't use Virtual SError at all then, as firmware doesn't own 
> EL2:
> KVM does. KVM will save/restore the HCR_EL2 register as part of its world
> switch. Firmware must not modify the hyp registers behind the hyper-visors 
> back.
> 
> 
>> Host OS:  EL0/EL1 -> EL3 -> EL0/EL1
> 
> i.e. While the host was running, so EL3 sees HCR_EL2.AMO clear, and directs 
> the
> SEI to EL1.
> 
>> Guest OS:  EL0/EL1 -> EL3 -> EL2 -> EL0/EL1
> 
> i.e. While the guest was running, so EL3 sees HCR_EL2.AMO set, directs the SEI
> to EL2 where KVM performs the world-switch and returns to host EL1.
> 
> Looks sensible.
> 
> 
>> In guest os situation, we can inject an vSEI and return to where the SEI is 
>> taken from.
> 
> An SEI from firmware should always end up in the host OS. If a guest was 
> running
> KVM is responsible for doing the world switch to restore host EL1 and return
> there. This should all work today.
> 
> 
>> But in host os situation, we can't inject an vSEI (if we don't set 
>> HCR_EL2.AMO), so we have to jump to EL1 SEI vector.
> 
> Because your firmware is at EL3 you have to check PSTATE.A and jump into the
> SError vector in both cases, you just choose if its VBAR_EL2 or VBAR_EL1 based
> on HCR_EL2.AMO.
> 
> 
>> Then the process following ESB won't be executed becuase SEI is taken to EL3 
>> from the ESB instruction in EL1, and when control
>> is returned to OS, we are in EL1 SEI vector rather than the ESB instruction.
> 
> Firmware should set ELR_EL1 so that an eret from the EL1 SError vector takes 
> you
> back to the ESB instruction that took us to EL3 in the first place.
> 
> 
> There is a problem here mixing SError as the CPU->Software notification of RAS
> errors and as APEI's SEI Software->Software notification that a firmware-first
> error has been handled by EL3.
> 
> To elaborate:
> The routine in this patch was something like:
> 1 ESB
> 2 Read DISR_EL1
> 3 If set, branch to the SError handler.
> 
> If we have firmware-first ESB will generate an SError as PSTATE.A at EL{1,2}
> doesn't mask SError for EL3. Firmware can then handle the RAS error. With this
> the CPU->Software story is done. Firmware notifying the OS via APEI is a
> different problem.
> 
> If the error doesn't need to be notified to the OS via SEI, firmware can 
> return
> to step 1 above with DISR_EL1 clear. The OS may be told via another mechanism
> such as polling or an irq.
> 
> If PSTATE.A was clear, firmware can return to the SError vector. If PSTATE.A 
> was
> set and 

Re: [PATCH v3 7/8] arm64: exception: handle asynchronous SError interrupt

2017-04-27 Thread Xiongfeng Wang
Hi James,

Thanks for your explanation and suggests.

On 2017/4/25 1:14, James Morse wrote:
> Hi Wang Xiongfeng,
> 
> On 21/04/17 12:33, Xiongfeng Wang wrote:
>> On 2017/4/20 16:52, James Morse wrote:
>>> On 19/04/17 03:37, Xiongfeng Wang wrote:
 On 2017/4/18 18:51, James Morse wrote:
> The host expects to receive physical SError Interrupts. The ARM-ARM 
> doesn't
> describe a way to inject these as they are generated by the CPU.
>
> Am I right in thinking you want this to use SError Interrupts as an APEI
> notification? (This isn't a CPU thing so the RAS spec doesn't cover this 
> use)

 Yes, using sei as an APEI notification is one part of my consideration. 
 Another use is for ESB.
 RAS spec 6.5.3 'Example software sequences: Variant: asynchronous External 
 Abort with ESB'
 describes the SEI recovery process when ESB is implemented.

 In this situation, SEI is routed to EL3 (SCR_EL3.EA = 1). When an SEI 
 occurs in EL0 and not been taken immediately,
 and then an ESB instruction at SVC entry is executed, SEI is taken to EL3. 
 The ESB at SVC entry is
 used for preventing the error propagating from user space to kernel space. 
 The EL3 SEI handler collects
>>>
 the errors and fills in the APEI table, and then jump to EL2 SEI handler. 
 EL2 SEI handler inject
 an vSEI into EL1 by setting HCR_EL2.VSE = 1, so that when returned to OS, 
 an SEI is pending.
>>>
>>> This step has confused me. How would this work with VHE where the host runs 
>>> at
>>> EL2 and there is nothing at Host:EL1?
>>
>> RAS spec 6.5.3 'Example software sequences: Variant: asynchronous External 
>> Abort with ESB'
>> I don't know whether the step described in the section is designed for guest 
>> os or host os or both.
>> Yes, it doesn't work with VHE where the host runs at EL2.
> 
> If it uses Virtual SError, it must be for an OS running at EL1, as the code
> running at EL2 (VHE:Linux, firmware or Xen) must set HCR_EL2.AMO to make this 
> work.
> 
> I can't find the section you describe in this RAS spec:
> https://developer.arm.com/docs/ddi0587/latest/armreliabilityavailabilityandserviceabilityrasspecificationarmv8forthearmv8aarchitectureprofile
> 
I got the RAS spec from my company's ARM account. The document number is 
PRD03-PRDC-010953 .

> 
>>> >From your description I assume you have some firmware resident at EL2.
> 
>> Our actual SEI step is planned as follows:
> 
> Ah, okay. You can't use Virtual SError at all then, as firmware doesn't own 
> EL2:
> KVM does. KVM will save/restore the HCR_EL2 register as part of its world
> switch. Firmware must not modify the hyp registers behind the hyper-visors 
> back.
> 
> 
>> Host OS:  EL0/EL1 -> EL3 -> EL0/EL1
> 
> i.e. While the host was running, so EL3 sees HCR_EL2.AMO clear, and directs 
> the
> SEI to EL1.
> 
>> Guest OS:  EL0/EL1 -> EL3 -> EL2 -> EL0/EL1
> 
> i.e. While the guest was running, so EL3 sees HCR_EL2.AMO set, directs the SEI
> to EL2 where KVM performs the world-switch and returns to host EL1.
> 
> Looks sensible.
> 
> 
>> In guest os situation, we can inject an vSEI and return to where the SEI is 
>> taken from.
> 
> An SEI from firmware should always end up in the host OS. If a guest was 
> running
> KVM is responsible for doing the world switch to restore host EL1 and return
> there. This should all work today.
> 
> 
>> But in host os situation, we can't inject an vSEI (if we don't set 
>> HCR_EL2.AMO), so we have to jump to EL1 SEI vector.
> 
> Because your firmware is at EL3 you have to check PSTATE.A and jump into the
> SError vector in both cases, you just choose if its VBAR_EL2 or VBAR_EL1 based
> on HCR_EL2.AMO.
> 
> 
>> Then the process following ESB won't be executed becuase SEI is taken to EL3 
>> from the ESB instruction in EL1, and when control
>> is returned to OS, we are in EL1 SEI vector rather than the ESB instruction.
> 
> Firmware should set ELR_EL1 so that an eret from the EL1 SError vector takes 
> you
> back to the ESB instruction that took us to EL3 in the first place.
> 
> 
> There is a problem here mixing SError as the CPU->Software notification of RAS
> errors and as APEI's SEI Software->Software notification that a firmware-first
> error has been handled by EL3.
> 
> To elaborate:
> The routine in this patch was something like:
> 1 ESB
> 2 Read DISR_EL1
> 3 If set, branch to the SError handler.
> 
> If we have firmware-first ESB will generate an SError as PSTATE.A at EL{1,2}
> doesn't mask SError for EL3. Firmware can then handle the RAS error. With this
> the CPU->Software story is done. Firmware notifying the OS via APEI is a
> different problem.
> 
> If the error doesn't need to be notified to the OS via SEI, firmware can 
> return
> to step 1 above with DISR_EL1 clear. The OS may be told via another mechanism
> such as polling or an irq.
> 
> If PSTATE.A was clear, firmware can return to the SError vector. If PSTATE.A 
> was
> set and 

Re: [PATCH] Enabled pstore write for powerpc

2017-04-27 Thread Michael Ellerman
Kees Cook  writes:

> On Thu, Apr 27, 2017 at 4:33 AM, Ankit Kumar  wrote:
>> After commit c950fd6f201a kernel registers pstore write based on flag set.
>> Pstore write for powerpc is broken as flags(PSTORE_FLAGS_DMESG) is not set 
>> for
>> powerpc architecture. On panic, kernel doesn't write message to
>> /fs/pstore/dmesg*(Entry doesn't gets created at all).
>>
>> This patch enables pstore write for powerpc architecture by setting
>> PSTORE_FLAGS_DMESG flag.
>>
>> Fixes:c950fd6f201a pstore: Split pstore fragile flags
>> Signed-off-by: Ankit Kumar 
>
> Argh, thanks! I thought I'd caught all of these. I'll include this for 
> -stable.

I see you've picked it up, thanks.

cheers


Re: [PATCH] Enabled pstore write for powerpc

2017-04-27 Thread Michael Ellerman
Kees Cook  writes:

> On Thu, Apr 27, 2017 at 4:33 AM, Ankit Kumar  wrote:
>> After commit c950fd6f201a kernel registers pstore write based on flag set.
>> Pstore write for powerpc is broken as flags(PSTORE_FLAGS_DMESG) is not set 
>> for
>> powerpc architecture. On panic, kernel doesn't write message to
>> /fs/pstore/dmesg*(Entry doesn't gets created at all).
>>
>> This patch enables pstore write for powerpc architecture by setting
>> PSTORE_FLAGS_DMESG flag.
>>
>> Fixes:c950fd6f201a pstore: Split pstore fragile flags
>> Signed-off-by: Ankit Kumar 
>
> Argh, thanks! I thought I'd caught all of these. I'll include this for 
> -stable.

I see you've picked it up, thanks.

cheers


Re: perf report warnings on tracepoint events hidden by ui

2017-04-27 Thread Steven Rostedt
On Thu, 27 Apr 2017 20:51:28 -0600
David Ahern  wrote:

> On 4/27/17 8:49 PM, Steven Rostedt wrote:
> > On Thu, 27 Apr 2017 20:13:43 -0600
> > David Ahern  wrote:
> >   
> >> On 4/27/17 7:41 PM, Steven Rostedt wrote:  
> >>> On Thu, 27 Apr 2017 19:31:12 -0500
> >>> David Carrillo-Cisneros  wrote:
> >>> 
>  When processing tracepoint events, perf report outputs warnings about
>  field not founds. The warnings are usually hidden by perf report UI
>  and appear when using the --stdio option. e.g.
> 
>    $ perf record -e syscalls:sys_enter_mmap some_load
> >>
> >> '-e syscalls' vs raw_syscalls suggests an older kernel -- like RHEL6.  
> > 
> > New kernels have both syscalls and raw_syscalls. The raw_syscalls are
> > the true tracepoints that the syscalls use. His option works on
> > 4.11-rc8.  
> 
> 
> What's old is new again?
> 
> root@kenny-jessie4:~# uname -a
> Linux kenny-jessie4 4.11.0-rc8+ #258 SMP Thu Apr 27 08:36:06 PDT 2017
> x86_64 GNU/Linux
> 
> root@kenny-jessie4:~# perf list | grep syscalls
>   raw_syscalls:sys_enter [Tracepoint event]
>   raw_syscalls:sys_exit  [Tracepoint event]
> 
> 
> Is an option needed to enable syscalls vs raw_syscalls?

Yep!

CONFIG_FTRACE_SYSCALLS=y

-- Steve


Re: perf report warnings on tracepoint events hidden by ui

2017-04-27 Thread Steven Rostedt
On Thu, 27 Apr 2017 20:51:28 -0600
David Ahern  wrote:

> On 4/27/17 8:49 PM, Steven Rostedt wrote:
> > On Thu, 27 Apr 2017 20:13:43 -0600
> > David Ahern  wrote:
> >   
> >> On 4/27/17 7:41 PM, Steven Rostedt wrote:  
> >>> On Thu, 27 Apr 2017 19:31:12 -0500
> >>> David Carrillo-Cisneros  wrote:
> >>> 
>  When processing tracepoint events, perf report outputs warnings about
>  field not founds. The warnings are usually hidden by perf report UI
>  and appear when using the --stdio option. e.g.
> 
>    $ perf record -e syscalls:sys_enter_mmap some_load
> >>
> >> '-e syscalls' vs raw_syscalls suggests an older kernel -- like RHEL6.  
> > 
> > New kernels have both syscalls and raw_syscalls. The raw_syscalls are
> > the true tracepoints that the syscalls use. His option works on
> > 4.11-rc8.  
> 
> 
> What's old is new again?
> 
> root@kenny-jessie4:~# uname -a
> Linux kenny-jessie4 4.11.0-rc8+ #258 SMP Thu Apr 27 08:36:06 PDT 2017
> x86_64 GNU/Linux
> 
> root@kenny-jessie4:~# perf list | grep syscalls
>   raw_syscalls:sys_enter [Tracepoint event]
>   raw_syscalls:sys_exit  [Tracepoint event]
> 
> 
> Is an option needed to enable syscalls vs raw_syscalls?

Yep!

CONFIG_FTRACE_SYSCALLS=y

-- Steve


Re: [PATCH v2 2/2] mm: skip HWPoisoned pages when onlining pages

2017-04-27 Thread Balbir Singh
On Wed, 2017-04-26 at 03:13 +, Naoya Horiguchi wrote:
> On Wed, Apr 26, 2017 at 12:10:15PM +1000, Balbir Singh wrote:
> > On Tue, 2017-04-25 at 16:27 +0200, Laurent Dufour wrote:
> > > The commit b023f46813cd ("memory-hotplug: skip HWPoisoned page when
> > > offlining pages") skip the HWPoisoned pages when offlining pages, but
> > > this should be skipped when onlining the pages too.
> > > 
> > > Signed-off-by: Laurent Dufour 
> > > ---
> > >  mm/memory_hotplug.c | 4 
> > >  1 file changed, 4 insertions(+)
> > > 
> > > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> > > index 6fa7208bcd56..741ddb50e7d2 100644
> > > --- a/mm/memory_hotplug.c
> > > +++ b/mm/memory_hotplug.c
> > > @@ -942,6 +942,10 @@ static int online_pages_range(unsigned long 
> > > start_pfn, unsigned long nr_pages,
> > >   if (PageReserved(pfn_to_page(start_pfn)))
> > >   for (i = 0; i < nr_pages; i++) {
> > >   page = pfn_to_page(start_pfn + i);
> > > + if (PageHWPoison(page)) {
> > > + ClearPageReserved(page);
> > 
> > Why do we clear page reserved? Also if the page is marked PageHWPoison, it
> > was never offlined to begin with? Or do you expect this to be set on newly
> > hotplugged memory? Also don't we need to skip the entire pageblock?
> 
> If I read correctly, to "skip HWPoiosned page" in commit b023f46813cd means
> that we skip the page status check for hwpoisoned pages *not* to prevent
> memory offlining for memblocks with hwpoisoned pages. That means that
> hwpoisoned pages can be offlined.
> 
> And another reason to clear PageReserved is that we could reuse the
> hwpoisoned page after onlining back with replacing the broken DIMM.
> In this usecase, we first do unpoisoning to clear PageHWPoison,
> but it doesn't work if PageReserved is set. My simple testing shows
> the BUG below in unpoisoning (without the ClearPageReserved):
>

Fair enough, thanks for the explanation

Balbir Singh. 


Re: [PATCH v2 2/2] mm: skip HWPoisoned pages when onlining pages

2017-04-27 Thread Balbir Singh
On Wed, 2017-04-26 at 03:13 +, Naoya Horiguchi wrote:
> On Wed, Apr 26, 2017 at 12:10:15PM +1000, Balbir Singh wrote:
> > On Tue, 2017-04-25 at 16:27 +0200, Laurent Dufour wrote:
> > > The commit b023f46813cd ("memory-hotplug: skip HWPoisoned page when
> > > offlining pages") skip the HWPoisoned pages when offlining pages, but
> > > this should be skipped when onlining the pages too.
> > > 
> > > Signed-off-by: Laurent Dufour 
> > > ---
> > >  mm/memory_hotplug.c | 4 
> > >  1 file changed, 4 insertions(+)
> > > 
> > > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> > > index 6fa7208bcd56..741ddb50e7d2 100644
> > > --- a/mm/memory_hotplug.c
> > > +++ b/mm/memory_hotplug.c
> > > @@ -942,6 +942,10 @@ static int online_pages_range(unsigned long 
> > > start_pfn, unsigned long nr_pages,
> > >   if (PageReserved(pfn_to_page(start_pfn)))
> > >   for (i = 0; i < nr_pages; i++) {
> > >   page = pfn_to_page(start_pfn + i);
> > > + if (PageHWPoison(page)) {
> > > + ClearPageReserved(page);
> > 
> > Why do we clear page reserved? Also if the page is marked PageHWPoison, it
> > was never offlined to begin with? Or do you expect this to be set on newly
> > hotplugged memory? Also don't we need to skip the entire pageblock?
> 
> If I read correctly, to "skip HWPoiosned page" in commit b023f46813cd means
> that we skip the page status check for hwpoisoned pages *not* to prevent
> memory offlining for memblocks with hwpoisoned pages. That means that
> hwpoisoned pages can be offlined.
> 
> And another reason to clear PageReserved is that we could reuse the
> hwpoisoned page after onlining back with replacing the broken DIMM.
> In this usecase, we first do unpoisoning to clear PageHWPoison,
> but it doesn't work if PageReserved is set. My simple testing shows
> the BUG below in unpoisoning (without the ClearPageReserved):
>

Fair enough, thanks for the explanation

Balbir Singh. 


Re: perf report warnings on tracepoint events hidden by ui

2017-04-27 Thread David Ahern
On 4/27/17 8:49 PM, Steven Rostedt wrote:
> On Thu, 27 Apr 2017 20:13:43 -0600
> David Ahern  wrote:
> 
>> On 4/27/17 7:41 PM, Steven Rostedt wrote:
>>> On Thu, 27 Apr 2017 19:31:12 -0500
>>> David Carrillo-Cisneros  wrote:
>>>   
 When processing tracepoint events, perf report outputs warnings about
 field not founds. The warnings are usually hidden by perf report UI
 and appear when using the --stdio option. e.g.

   $ perf record -e syscalls:sys_enter_mmap some_load  
>>
>> '-e syscalls' vs raw_syscalls suggests an older kernel -- like RHEL6.
> 
> New kernels have both syscalls and raw_syscalls. The raw_syscalls are
> the true tracepoints that the syscalls use. His option works on
> 4.11-rc8.


What's old is new again?

root@kenny-jessie4:~# uname -a
Linux kenny-jessie4 4.11.0-rc8+ #258 SMP Thu Apr 27 08:36:06 PDT 2017
x86_64 GNU/Linux

root@kenny-jessie4:~# perf list | grep syscalls
  raw_syscalls:sys_enter [Tracepoint event]
  raw_syscalls:sys_exit  [Tracepoint event]


Is an option needed to enable syscalls vs raw_syscalls?


Re: perf report warnings on tracepoint events hidden by ui

2017-04-27 Thread David Ahern
On 4/27/17 8:49 PM, Steven Rostedt wrote:
> On Thu, 27 Apr 2017 20:13:43 -0600
> David Ahern  wrote:
> 
>> On 4/27/17 7:41 PM, Steven Rostedt wrote:
>>> On Thu, 27 Apr 2017 19:31:12 -0500
>>> David Carrillo-Cisneros  wrote:
>>>   
 When processing tracepoint events, perf report outputs warnings about
 field not founds. The warnings are usually hidden by perf report UI
 and appear when using the --stdio option. e.g.

   $ perf record -e syscalls:sys_enter_mmap some_load  
>>
>> '-e syscalls' vs raw_syscalls suggests an older kernel -- like RHEL6.
> 
> New kernels have both syscalls and raw_syscalls. The raw_syscalls are
> the true tracepoints that the syscalls use. His option works on
> 4.11-rc8.


What's old is new again?

root@kenny-jessie4:~# uname -a
Linux kenny-jessie4 4.11.0-rc8+ #258 SMP Thu Apr 27 08:36:06 PDT 2017
x86_64 GNU/Linux

root@kenny-jessie4:~# perf list | grep syscalls
  raw_syscalls:sys_enter [Tracepoint event]
  raw_syscalls:sys_exit  [Tracepoint event]


Is an option needed to enable syscalls vs raw_syscalls?


Re: perf report warnings on tracepoint events hidden by ui

2017-04-27 Thread Steven Rostedt
On Thu, 27 Apr 2017 20:13:43 -0600
David Ahern  wrote:

> On 4/27/17 7:41 PM, Steven Rostedt wrote:
> > On Thu, 27 Apr 2017 19:31:12 -0500
> > David Carrillo-Cisneros  wrote:
> >   
> >> When processing tracepoint events, perf report outputs warnings about
> >> field not founds. The warnings are usually hidden by perf report UI
> >> and appear when using the --stdio option. e.g.
> >>
> >>   $ perf record -e syscalls:sys_enter_mmap some_load  
> 
> '-e syscalls' vs raw_syscalls suggests an older kernel -- like RHEL6.

New kernels have both syscalls and raw_syscalls. The raw_syscalls are
the true tracepoints that the syscalls use. His option works on
4.11-rc8.

-- Steve


> 
> 
> See the note I put into tools/perf/builtin-trace.c:
> 
> static struct perf_evsel *perf_evsel__syscall_newtp(const char
> *direction, void *handler)
> {
> struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls",
> direction);
> 
> /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
> if (IS_ERR(evsel))
> evsel = perf_evsel__newtp("syscalls", direction);
> 
> 
> Are you running top of tree perf on RHEL6?



Re: perf report warnings on tracepoint events hidden by ui

2017-04-27 Thread Steven Rostedt
On Thu, 27 Apr 2017 20:13:43 -0600
David Ahern  wrote:

> On 4/27/17 7:41 PM, Steven Rostedt wrote:
> > On Thu, 27 Apr 2017 19:31:12 -0500
> > David Carrillo-Cisneros  wrote:
> >   
> >> When processing tracepoint events, perf report outputs warnings about
> >> field not founds. The warnings are usually hidden by perf report UI
> >> and appear when using the --stdio option. e.g.
> >>
> >>   $ perf record -e syscalls:sys_enter_mmap some_load  
> 
> '-e syscalls' vs raw_syscalls suggests an older kernel -- like RHEL6.

New kernels have both syscalls and raw_syscalls. The raw_syscalls are
the true tracepoints that the syscalls use. His option works on
4.11-rc8.

-- Steve


> 
> 
> See the note I put into tools/perf/builtin-trace.c:
> 
> static struct perf_evsel *perf_evsel__syscall_newtp(const char
> *direction, void *handler)
> {
> struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls",
> direction);
> 
> /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
> if (IS_ERR(evsel))
> evsel = perf_evsel__newtp("syscalls", direction);
> 
> 
> Are you running top of tree perf on RHEL6?



Re: [PATCH 1/2] PCI: mediatek: Add Mediatek PCIe host controller support

2017-04-27 Thread Ryder Lee
Hi,

On Thu, 2017-04-27 at 20:55 +0200, Arnd Bergmann wrote:
> On Wed, Apr 26, 2017 at 10:10 AM, Ryder Lee  wrote:
> > On Tue, 2017-04-25 at 14:38 +0200, Arnd Bergmann wrote:
> >> On Sun, Apr 23, 2017 at 10:19 AM, Ryder Lee  wrote:
> 
> >> > +static int mtk_pcie_enable_ports(struct mtk_pcie *pcie)
> >> > +{
> >> > +   struct device *dev = pcie->dev;
> >> > +   struct mtk_pcie_port *port, *tmp;
> >> > +   int err, linkup = 0;
> >> > +
> >> > +   list_for_each_entry_safe(port, tmp, >ports, list) {
> >> > +   err = clk_prepare_enable(port->sys_ck);
> >> > +   if (err) {
> >> > +   dev_err(dev, "failed to enable port%d clock\n",
> >> > +   port->index);
> >> > +   continue;
> >> > +   }
> >> > +
> >> > +   /* assert RC */
> >> > +   reset_control_assert(port->reset);
> >> > +   /* de-assert RC */
> >> > +   reset_control_deassert(port->reset);
> >> > +
> >> > +   /* power on PHY */
> >> > +   err = phy_power_on(port->phy);
> >> > +   if (err) {
> >> > +   dev_err(dev, "failed to power on port%d phy\n",
> >> > +   port->index);
> >> > +   goto err_phy_on;
> >> > +   }
> >> > +
> >> > +   mtk_pcie_assert_ports(port);
> >> > +
> >>
> >> Similar to the comment I had for the binding, I wonder if it would be
> >> better to keep all the information about the ports in one place and
> >> then just deal with it at the root level.
> >>
> >> Alternatively, we could decide to standardize on the properties
> >> you have added to the pcie port node, but then I would handle
> >> them in the pcieport driver rather than in the host bridge driver.
> >
> > Sorry, I'm not sure what you want me to do here.
> >
> > I could move all clock operation in root level. But we need to keep the
> > reset and PHY operation sequence in the loop, In addition, we could
> > easily free resources if ports link fail.
> >
> > How about moving this function to mtk_pcie_parse_and_add_res()?
> 
> That could work, please try it out and see if the code gets better or
> worse. This may depend on what we end up doing with the DT
> properties.

I will try it on next version, and we can continue our discussion on
that series.

> >> > +/*
> >> > + * This IP lacks interrupt status register to check or map INTx from
> >> > + * different devices at the same time.
> >> > + */
> >> > +static int __init mtk_pcie_map_irq(const struct pci_dev *dev, u8 slot, 
> >> > u8 pin)
> >> > +{
> >> > +   struct mtk_pcie *pcie = dev->bus->sysdata;
> >> > +   struct mtk_pcie_port *port;
> >> > +
> >> > +   list_for_each_entry(port, >ports, list)
> >> > +   if (port->index == slot)
> >> > +   return port->irq;
> >> > +
> >> > +   return -1;
> >> > +}
> >>
> >> This looks odd, what is it needed for specifically? It looks like
> >> it's broken for devices behind bridges, and the interrupt mapping
> >> should normally come from the interrupt-map property, without
> >> the need for a driver specific map_irq override.
> >
> > Our hardware just has a GIC for each port and lacks interrupt status for
> > host driver to distinguish INTx. So I return port IRQ here.
> 
> You should still be able to express this with standard interrupt-map
> DT property, without having to resort to your own map_irq
> callback handler.
> 
> In the interrupt-map-mask, you can ignore the interrupt line
> only list the devfn of the root ports for each entry.

Okay, I will fix it.

> >> > +static int mtk_pcie_register_ports(struct mtk_pcie *pcie)
> >> > +{
> >> > +   struct pci_bus *bus, *child;
> >> > +
> >> > +   bus = pci_scan_root_bus(pcie->dev, 0, _pcie_ops, pcie,
> >> > +   >resources);
> >>
> >> Can you use the new pci_register_host_bridge() method instead of
> >> pci_scan_root_bus() here?
> >
> > May I know what's difference between pci_scan_root_bus() and using
> > pci_register_host_bridge() directly? What situation should we use it?
> > It seems that just tegra use this new method currently.
> 
> We introduced the new function for tegra for now, in the long run
> I would hope we can convert all other drivers to it as well, to make it
> easier to add further parameters.
> 
> The new function also has a cleaner way of dealing with the memory
> allocations, similar to how other subsystems work.

Sounds good. I will change to use that.

Thanks!



Re: [PATCH 1/2] PCI: mediatek: Add Mediatek PCIe host controller support

2017-04-27 Thread Ryder Lee
Hi,

On Thu, 2017-04-27 at 20:55 +0200, Arnd Bergmann wrote:
> On Wed, Apr 26, 2017 at 10:10 AM, Ryder Lee  wrote:
> > On Tue, 2017-04-25 at 14:38 +0200, Arnd Bergmann wrote:
> >> On Sun, Apr 23, 2017 at 10:19 AM, Ryder Lee  wrote:
> 
> >> > +static int mtk_pcie_enable_ports(struct mtk_pcie *pcie)
> >> > +{
> >> > +   struct device *dev = pcie->dev;
> >> > +   struct mtk_pcie_port *port, *tmp;
> >> > +   int err, linkup = 0;
> >> > +
> >> > +   list_for_each_entry_safe(port, tmp, >ports, list) {
> >> > +   err = clk_prepare_enable(port->sys_ck);
> >> > +   if (err) {
> >> > +   dev_err(dev, "failed to enable port%d clock\n",
> >> > +   port->index);
> >> > +   continue;
> >> > +   }
> >> > +
> >> > +   /* assert RC */
> >> > +   reset_control_assert(port->reset);
> >> > +   /* de-assert RC */
> >> > +   reset_control_deassert(port->reset);
> >> > +
> >> > +   /* power on PHY */
> >> > +   err = phy_power_on(port->phy);
> >> > +   if (err) {
> >> > +   dev_err(dev, "failed to power on port%d phy\n",
> >> > +   port->index);
> >> > +   goto err_phy_on;
> >> > +   }
> >> > +
> >> > +   mtk_pcie_assert_ports(port);
> >> > +
> >>
> >> Similar to the comment I had for the binding, I wonder if it would be
> >> better to keep all the information about the ports in one place and
> >> then just deal with it at the root level.
> >>
> >> Alternatively, we could decide to standardize on the properties
> >> you have added to the pcie port node, but then I would handle
> >> them in the pcieport driver rather than in the host bridge driver.
> >
> > Sorry, I'm not sure what you want me to do here.
> >
> > I could move all clock operation in root level. But we need to keep the
> > reset and PHY operation sequence in the loop, In addition, we could
> > easily free resources if ports link fail.
> >
> > How about moving this function to mtk_pcie_parse_and_add_res()?
> 
> That could work, please try it out and see if the code gets better or
> worse. This may depend on what we end up doing with the DT
> properties.

I will try it on next version, and we can continue our discussion on
that series.

> >> > +/*
> >> > + * This IP lacks interrupt status register to check or map INTx from
> >> > + * different devices at the same time.
> >> > + */
> >> > +static int __init mtk_pcie_map_irq(const struct pci_dev *dev, u8 slot, 
> >> > u8 pin)
> >> > +{
> >> > +   struct mtk_pcie *pcie = dev->bus->sysdata;
> >> > +   struct mtk_pcie_port *port;
> >> > +
> >> > +   list_for_each_entry(port, >ports, list)
> >> > +   if (port->index == slot)
> >> > +   return port->irq;
> >> > +
> >> > +   return -1;
> >> > +}
> >>
> >> This looks odd, what is it needed for specifically? It looks like
> >> it's broken for devices behind bridges, and the interrupt mapping
> >> should normally come from the interrupt-map property, without
> >> the need for a driver specific map_irq override.
> >
> > Our hardware just has a GIC for each port and lacks interrupt status for
> > host driver to distinguish INTx. So I return port IRQ here.
> 
> You should still be able to express this with standard interrupt-map
> DT property, without having to resort to your own map_irq
> callback handler.
> 
> In the interrupt-map-mask, you can ignore the interrupt line
> only list the devfn of the root ports for each entry.

Okay, I will fix it.

> >> > +static int mtk_pcie_register_ports(struct mtk_pcie *pcie)
> >> > +{
> >> > +   struct pci_bus *bus, *child;
> >> > +
> >> > +   bus = pci_scan_root_bus(pcie->dev, 0, _pcie_ops, pcie,
> >> > +   >resources);
> >>
> >> Can you use the new pci_register_host_bridge() method instead of
> >> pci_scan_root_bus() here?
> >
> > May I know what's difference between pci_scan_root_bus() and using
> > pci_register_host_bridge() directly? What situation should we use it?
> > It seems that just tegra use this new method currently.
> 
> We introduced the new function for tegra for now, in the long run
> I would hope we can convert all other drivers to it as well, to make it
> easier to add further parameters.
> 
> The new function also has a cleaner way of dealing with the memory
> allocations, similar to how other subsystems work.

Sounds good. I will change to use that.

Thanks!



Re: [PATCH 2/2] dt-bindings: pcie: Add documentation for Mediatek PCIe

2017-04-27 Thread Ryder Lee
On Thu, 2017-04-27 at 21:06 +0200, Arnd Bergmann wrote:
> On Wed, Apr 26, 2017 at 10:10 AM, Ryder Lee  wrote:
> > Hi
> >
> > On Tue, 2017-04-25 at 14:18 +0200, Arnd Bergmann wrote:
> >> On Sun, Apr 23, 2017 at 10:19 AM, Ryder Lee  wrote:
> >> > Add documentation for PCIe host driver available in MT7623
> >> > series SoCs.
> >> >
> >> > Signed-off-by: Ryder Lee 
> >> > ---
> >> >  .../bindings/pci/mediatek,mt7623-pcie.txt  | 153 
> >> > +
> >> >  1 file changed, 153 insertions(+)
> >> >  create mode 100644 
> >> > Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt
> >> >
> >> > diff --git 
> >> > a/Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt 
> >> > b/Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt
> >> > new file mode 100644
> >> > index 000..ee93ba2
> >> > --- /dev/null
> >> > +++ b/Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt
> >> > @@ -0,0 +1,153 @@
> >> > +Mediatek MT7623 PCIe controller
> >> > +
> >> > +Required properties:
> >> > +- compatible: Should contain "mediatek,mt7623-pcie".
> >>
> >> Did mediatek license the IP block from someone else or was it
> >> developed in-house? Is there a name and/or version identifier
> >> for the block itself other than identifying it as the one in mt7623?
> >
> > Originally, it license from synopsys. Our designer add a wrapper to hide
> > the DBI detail so that we cannot use them directly. Perhaps I can call
> > it "mediatek,gen2v1-pcie", because we have a plan to upstream a in-house
> > Gen2 IP in the future.
> 
> Ok, so this is the same hardware that drivers/pci/dwc/ handles, but
> it needs a separate driver because the wrapper that was added uses
> a completely different register layout, right?

Yes, that's what I mean. At first, I really want to base on
drivers/pci/dwc/ to implement this driver. Eventually I found it hard to
go on, like what I said before.

> Are any of the registers the same at all, e.g. for MSI handling?

No, It doesn't support MSI. All I can do is using the registers that designer 
provide
to me. The others are inviable for software. So I treat it as different 
hardware.
Furthermore, we hope that we can put all mediatek drivers together
regardless of in-house IP or lincense IP

We have no particular IP name but just use chip name to call it. So I
will temporarily use "mediatek,gen2v1-pcie" in patch v1.

> >> > +Required properties:
> >> > +- device_type: Must be "pci"
> >> > +- assigned-addresses: Address and size of the port configuration 
> >> > registers
> >> > +- reg: Only the first four bytes are used to refer to the correct bus 
> >> > number
> >> > +  and device number.
> >> > +- #address-cells: Must be 3
> >> > +- #size-cells: Must be 2
> >> > +- ranges: Sub-ranges distributed from the PCIe controller node. An empty
> >> > +  property is sufficient.
> >> > +- clocks: Must contain an entry for each entry in clock-names.
> >> > +  See ../clocks/clock-bindings.txt for details.
> >> > +- clock-names: Must include the following entries:
> >> > +  - sys_ck
> >> > +- resets: Must contain an entry for each entry in reset-names.
> >> > +  See ../reset/reset.txt for details.
> >>
> >> This seems odd: you have a device that is simply identified as "pci"
> >> without any more specific ID, but you require additional properties
> >> (clocks, reset, ...) that are not part of the standard PCI binding.
> >>
> >> Can you clarify how the port devices related to the root device in
> >> this hardware design?
> >
> > I will write clarify like this:
> >
> > PCIe subsys includes one Host/PCI bridge and 3 PCIe MAC port. There
> > are 3 bus master for data access and 1 slave for configuration and
> > status register access. Each port has PIPE interface to PHY and
> 
> If I understand this right, then each of the ports in your hardware
> is what we normally drive using the drivers/pci/dwc/ driver framework,
> but your implementation actually made it more PCI standard compliant
> by implementing the normal PCIe host bridge registers for all ports
> combined, something that most others don't.

In my view, it's correct to implement our driver in this way. But I
don't really understand the details about other platforms. 

> >> Have you considered moving the nonstandard properties into the host
> >> bridge node and having that device deal with setting up the links
> >> to the other drivers? That way we could use the regular pcie
> >> port driver for the children.
> >>
> >
> > OK, but I still want to use port->reset to catch reset properties in
> > driver.
> 
> Do you mean in drivers/pci/pcie/portdrv_pci.c? I see that it
> has a function called pcie_portdrv_slot_reset(), but I don't see
> how that relates to your reset line at the moment. Is this
> something you have submitted in a different series?
> 
> Or do you mean in this host driver? The problem I see with
> that approach is that the port 

Re: [PATCH 2/2] dt-bindings: pcie: Add documentation for Mediatek PCIe

2017-04-27 Thread Ryder Lee
On Thu, 2017-04-27 at 21:06 +0200, Arnd Bergmann wrote:
> On Wed, Apr 26, 2017 at 10:10 AM, Ryder Lee  wrote:
> > Hi
> >
> > On Tue, 2017-04-25 at 14:18 +0200, Arnd Bergmann wrote:
> >> On Sun, Apr 23, 2017 at 10:19 AM, Ryder Lee  wrote:
> >> > Add documentation for PCIe host driver available in MT7623
> >> > series SoCs.
> >> >
> >> > Signed-off-by: Ryder Lee 
> >> > ---
> >> >  .../bindings/pci/mediatek,mt7623-pcie.txt  | 153 
> >> > +
> >> >  1 file changed, 153 insertions(+)
> >> >  create mode 100644 
> >> > Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt
> >> >
> >> > diff --git 
> >> > a/Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt 
> >> > b/Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt
> >> > new file mode 100644
> >> > index 000..ee93ba2
> >> > --- /dev/null
> >> > +++ b/Documentation/devicetree/bindings/pci/mediatek,mt7623-pcie.txt
> >> > @@ -0,0 +1,153 @@
> >> > +Mediatek MT7623 PCIe controller
> >> > +
> >> > +Required properties:
> >> > +- compatible: Should contain "mediatek,mt7623-pcie".
> >>
> >> Did mediatek license the IP block from someone else or was it
> >> developed in-house? Is there a name and/or version identifier
> >> for the block itself other than identifying it as the one in mt7623?
> >
> > Originally, it license from synopsys. Our designer add a wrapper to hide
> > the DBI detail so that we cannot use them directly. Perhaps I can call
> > it "mediatek,gen2v1-pcie", because we have a plan to upstream a in-house
> > Gen2 IP in the future.
> 
> Ok, so this is the same hardware that drivers/pci/dwc/ handles, but
> it needs a separate driver because the wrapper that was added uses
> a completely different register layout, right?

Yes, that's what I mean. At first, I really want to base on
drivers/pci/dwc/ to implement this driver. Eventually I found it hard to
go on, like what I said before.

> Are any of the registers the same at all, e.g. for MSI handling?

No, It doesn't support MSI. All I can do is using the registers that designer 
provide
to me. The others are inviable for software. So I treat it as different 
hardware.
Furthermore, we hope that we can put all mediatek drivers together
regardless of in-house IP or lincense IP

We have no particular IP name but just use chip name to call it. So I
will temporarily use "mediatek,gen2v1-pcie" in patch v1.

> >> > +Required properties:
> >> > +- device_type: Must be "pci"
> >> > +- assigned-addresses: Address and size of the port configuration 
> >> > registers
> >> > +- reg: Only the first four bytes are used to refer to the correct bus 
> >> > number
> >> > +  and device number.
> >> > +- #address-cells: Must be 3
> >> > +- #size-cells: Must be 2
> >> > +- ranges: Sub-ranges distributed from the PCIe controller node. An empty
> >> > +  property is sufficient.
> >> > +- clocks: Must contain an entry for each entry in clock-names.
> >> > +  See ../clocks/clock-bindings.txt for details.
> >> > +- clock-names: Must include the following entries:
> >> > +  - sys_ck
> >> > +- resets: Must contain an entry for each entry in reset-names.
> >> > +  See ../reset/reset.txt for details.
> >>
> >> This seems odd: you have a device that is simply identified as "pci"
> >> without any more specific ID, but you require additional properties
> >> (clocks, reset, ...) that are not part of the standard PCI binding.
> >>
> >> Can you clarify how the port devices related to the root device in
> >> this hardware design?
> >
> > I will write clarify like this:
> >
> > PCIe subsys includes one Host/PCI bridge and 3 PCIe MAC port. There
> > are 3 bus master for data access and 1 slave for configuration and
> > status register access. Each port has PIPE interface to PHY and
> 
> If I understand this right, then each of the ports in your hardware
> is what we normally drive using the drivers/pci/dwc/ driver framework,
> but your implementation actually made it more PCI standard compliant
> by implementing the normal PCIe host bridge registers for all ports
> combined, something that most others don't.

In my view, it's correct to implement our driver in this way. But I
don't really understand the details about other platforms. 

> >> Have you considered moving the nonstandard properties into the host
> >> bridge node and having that device deal with setting up the links
> >> to the other drivers? That way we could use the regular pcie
> >> port driver for the children.
> >>
> >
> > OK, but I still want to use port->reset to catch reset properties in
> > driver.
> 
> Do you mean in drivers/pci/pcie/portdrv_pci.c? I see that it
> has a function called pcie_portdrv_slot_reset(), but I don't see
> how that relates to your reset line at the moment. Is this
> something you have submitted in a different series?
> 
> Or do you mean in this host driver? The problem I see with
> that approach is that the port device is owned by portdrv_pci,
> so the host bridge driver should not look 

Re: [lkp-robot] [perf] 924726b2b5: double_fault:#[##]

2017-04-27 Thread Ye Xiaolong
On 04/27, Sebastian Andrzej Siewior wrote:
>On 2017-04-27 10:58:36 [+0800], kernel test robot wrote:
>> 
>> FYI, we noticed the following commit:
>> 
>> commit: 924726b2b5e5000dfb8eb6032651baed1b1bdc6c ("perf: Cure hotplug lock 
>> ordering issues")
>> https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git smp/hotplug
>
>I can't find that commit in tip so I used
>   14aff62ede39 ("perf: Cure hotplug lock ordering issues")
>
>which should be the same thing.

Yes, git patch-id tells they are the same thing.

>
>> in testcase: boot
>> 
>> on test machine: qemu-system-x86_64 -enable-kvm -smp 2 -m 512M
>
>I booted this. I grabbed the command line the yaml file attached and
>quantal-core-x86_64.cgz from github.
>
>> [  203.023547] mmap: trinity-c0 (2509) uses deprecated remap_file_pages() 
>> syscall. See Documentation/vm/remap_file_pages.txt.
>> [child0:2509] uid changed! Was: 0, now 252
>> [child1:2511] child exiting.
>> Bailing main loop. Exit reason: UID changed.
>> [watchdog] [2502] Watchdog exiting
>> [  204.078036] double fault:  [#1] SMP DEBUG_PAGEALLOC
>> [  204.078036] double fault:  [#1] SMP DEBUG_PAGEALLOC
>
>I don't see that.
>

I can't reproduce it either after I rebuilt kernel for commit 14aff62ede39 for
6 times test.

>> To reproduce:
>> 
>> git clone https://github.com/01org/lkp-tests.git
>> cd lkp-tests
>> bin/lkp qemu -k  job-script  # job-script is attached in 
>> this email
>
>I tried that, too. The difference between running qemu manually and this
>seems that the manual approach runs the kernel a few times while this
>aborts after the first run. I attached my manual run. It looks similar
>(including the trinity segfault) but the kernels runs smooth.
>
>You don't seem to inject / load the kernels modules you compile.

Yes, as 0day built a large number of kernel everyday, we won't upload kernel
images and modules to github, it depends user to generate and append it to
initrd.

Thanks,
Xiaolong

>
>> Thanks,
>> Xiaolong
>
>Sebastian




Re: [lkp-robot] [perf] 924726b2b5: double_fault:#[##]

2017-04-27 Thread Ye Xiaolong
On 04/27, Sebastian Andrzej Siewior wrote:
>On 2017-04-27 10:58:36 [+0800], kernel test robot wrote:
>> 
>> FYI, we noticed the following commit:
>> 
>> commit: 924726b2b5e5000dfb8eb6032651baed1b1bdc6c ("perf: Cure hotplug lock 
>> ordering issues")
>> https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git smp/hotplug
>
>I can't find that commit in tip so I used
>   14aff62ede39 ("perf: Cure hotplug lock ordering issues")
>
>which should be the same thing.

Yes, git patch-id tells they are the same thing.

>
>> in testcase: boot
>> 
>> on test machine: qemu-system-x86_64 -enable-kvm -smp 2 -m 512M
>
>I booted this. I grabbed the command line the yaml file attached and
>quantal-core-x86_64.cgz from github.
>
>> [  203.023547] mmap: trinity-c0 (2509) uses deprecated remap_file_pages() 
>> syscall. See Documentation/vm/remap_file_pages.txt.
>> [child0:2509] uid changed! Was: 0, now 252
>> [child1:2511] child exiting.
>> Bailing main loop. Exit reason: UID changed.
>> [watchdog] [2502] Watchdog exiting
>> [  204.078036] double fault:  [#1] SMP DEBUG_PAGEALLOC
>> [  204.078036] double fault:  [#1] SMP DEBUG_PAGEALLOC
>
>I don't see that.
>

I can't reproduce it either after I rebuilt kernel for commit 14aff62ede39 for
6 times test.

>> To reproduce:
>> 
>> git clone https://github.com/01org/lkp-tests.git
>> cd lkp-tests
>> bin/lkp qemu -k  job-script  # job-script is attached in 
>> this email
>
>I tried that, too. The difference between running qemu manually and this
>seems that the manual approach runs the kernel a few times while this
>aborts after the first run. I attached my manual run. It looks similar
>(including the trinity segfault) but the kernels runs smooth.
>
>You don't seem to inject / load the kernels modules you compile.

Yes, as 0day built a large number of kernel everyday, we won't upload kernel
images and modules to github, it depends user to generate and append it to
initrd.

Thanks,
Xiaolong

>
>> Thanks,
>> Xiaolong
>
>Sebastian




linux-next: build warning after merge of the devicetree tree

2017-04-27 Thread Stephen Rothwell
Hi Rob,

After merging the devicetree tree, today's linux-next build (x86_64
allmodconfig) produced this warning:

drivers/of/unittest.c: In function 'of_unittest':
drivers/of/unittest.c:2199:25: warning: 'last_sibling' may be used 
uninitialized in this function [-Wmaybe-uninitialized]
   last_sibling->sibling = overlay_base_root->child;
 ^
drivers/of/unittest.c:2122:22: note: 'last_sibling' was declared here
  struct device_node *last_sibling;
  ^

Introduced by commit

  81d0848fc8d2 ("of: Add unit tests for applying overlays")

-- 
Cheers,
Stephen Rothwell


linux-next: build warning after merge of the devicetree tree

2017-04-27 Thread Stephen Rothwell
Hi Rob,

After merging the devicetree tree, today's linux-next build (x86_64
allmodconfig) produced this warning:

drivers/of/unittest.c: In function 'of_unittest':
drivers/of/unittest.c:2199:25: warning: 'last_sibling' may be used 
uninitialized in this function [-Wmaybe-uninitialized]
   last_sibling->sibling = overlay_base_root->child;
 ^
drivers/of/unittest.c:2122:22: note: 'last_sibling' was declared here
  struct device_node *last_sibling;
  ^

Introduced by commit

  81d0848fc8d2 ("of: Add unit tests for applying overlays")

-- 
Cheers,
Stephen Rothwell


  1   2   3   4   5   6   7   8   9   10   >