Re: [PATCH v8 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support

2017-05-09 Thread Michael Ellerman
Daniel Axtens  writes:
>>  include/linux/cpuhotplug.h |   1 +
...
>> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
>> index 62d240e..51dff54 100644
>> --- a/include/linux/cpuhotplug.h
>> +++ b/include/linux/cpuhotplug.h
>> @@ -137,6 +137,7 @@ enum cpuhp_state {
>>  CPUHP_AP_PERF_ARM_CCN_ONLINE,
>>  CPUHP_AP_PERF_ARM_L2X0_ONLINE,
>>  CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
>> +CPUHP_AP_PERF_POWERPC_NEST_ONLINE,
>>  CPUHP_AP_WORKQUEUE_ONLINE,
>>  CPUHP_AP_RCUTREE_ONLINE,
>>  CPUHP_AP_ONLINE_DYN,

> Who owns this? get_maintainer.pl doesn't give me anything helpful
> here... Do we need an Ack from anyone?

Presumably Thomas Gleixner (who I added to Cc), though I don't know if
he wants to ack every change to it.

cheers


Re: [PATCH] macintosh: move mac_hid driver to input/mouse.

2017-05-09 Thread Dmitry Torokhov
Hi Michal,

On Tue, May 09, 2017 at 09:14:18PM +0200, Michal Suchanek wrote:
> There is nothing mac-specific about this driver. Non-mac hardware with
> suboptimal built-in pointer devices exists.
> 
> This makes it possible to use this emulation not only on x86 and ppc
> notebooks but also on arm and mips.

I'd rather we did not promote from drivers/macintosh to other platforms,
but rather removed it. The same functionality can be done from
userspace.

What hardware do you believe would benefit from this and why?

Thanks.

> 
> Signed-off-by: Michal Suchanek 
> ---
>  drivers/input/mouse/Kconfig  | 20 
>  drivers/input/mouse/Makefile |  1 +
>  drivers/{macintosh => input/mouse}/mac_hid.c |  0
>  drivers/macintosh/Kconfig| 17 -
>  drivers/macintosh/Makefile   |  1 -
>  5 files changed, 21 insertions(+), 18 deletions(-)
>  rename drivers/{macintosh => input/mouse}/mac_hid.c (100%)
> 
> diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig
> index 89ebb8f39fee..5533fd3a113f 100644
> --- a/drivers/input/mouse/Kconfig
> +++ b/drivers/input/mouse/Kconfig
> @@ -12,6 +12,26 @@ menuconfig INPUT_MOUSE
>  
>  if INPUT_MOUSE
>  
> +config MAC_EMUMOUSEBTN
> + tristate "Support for mouse button 2+3 emulation"
> + depends on SYSCTL && INPUT
> + help
> +   This provides generic support for emulating the 2nd and 3rd mouse
> +   button with keypresses.  If you say Y here, the emulation is still
> +   disabled by default.  The emulation is controlled by these sysctl
> +   entries:
> +   /proc/sys/dev/mac_hid/mouse_button_emulation
> +   /proc/sys/dev/mac_hid/mouse_button2_keycode
> +   /proc/sys/dev/mac_hid/mouse_button3_keycode
> +
> +   If you have an Apple machine with a 1-button mouse, say Y here.
> +
> +   This emulation can be useful on notebooks with suboptimal touchpad
> +   hardware as well.
> +
> +   To compile this driver as a module, choose M here: the
> +   module will be called mac_hid.
> +
>  config MOUSE_PS2
>   tristate "PS/2 mouse"
>   default y
> diff --git a/drivers/input/mouse/Makefile b/drivers/input/mouse/Makefile
> index 56bf0ad877c6..dfaad1dd8857 100644
> --- a/drivers/input/mouse/Makefile
> +++ b/drivers/input/mouse/Makefile
> @@ -4,6 +4,7 @@
>  
>  # Each configuration option enables a list of files.
>  
> +obj-$(CONFIG_MAC_EMUMOUSEBTN)+= mac_hid.o
>  obj-$(CONFIG_MOUSE_AMIGA)+= amimouse.o
>  obj-$(CONFIG_MOUSE_APPLETOUCH)   += appletouch.o
>  obj-$(CONFIG_MOUSE_ATARI)+= atarimouse.o
> diff --git a/drivers/macintosh/mac_hid.c b/drivers/input/mouse/mac_hid.c
> similarity index 100%
> rename from drivers/macintosh/mac_hid.c
> rename to drivers/input/mouse/mac_hid.c
> diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
> index 97a420c11eed..011df09c5167 100644
> --- a/drivers/macintosh/Kconfig
> +++ b/drivers/macintosh/Kconfig
> @@ -159,23 +159,6 @@ config INPUT_ADBHID
>  
> If unsure, say Y.
>  
> -config MAC_EMUMOUSEBTN
> - tristate "Support for mouse button 2+3 emulation"
> - depends on SYSCTL && INPUT
> - help
> -   This provides generic support for emulating the 2nd and 3rd mouse
> -   button with keypresses.  If you say Y here, the emulation is still
> -   disabled by default.  The emulation is controlled by these sysctl
> -   entries:
> -   /proc/sys/dev/mac_hid/mouse_button_emulation
> -   /proc/sys/dev/mac_hid/mouse_button2_keycode
> -   /proc/sys/dev/mac_hid/mouse_button3_keycode
> -
> -   If you have an Apple machine with a 1-button mouse, say Y here.
> -
> -   To compile this driver as a module, choose M here: the
> -   module will be called mac_hid.
> -
>  config THERM_WINDTUNNEL
>   tristate "Support for thermal management on Windtunnel G4s"
>   depends on I2C && I2C_POWERMAC && PPC_PMAC && !PPC_PMAC64
> diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile
> index 516eb65bcacc..ab8b1e74d160 100644
> --- a/drivers/macintosh/Makefile
> +++ b/drivers/macintosh/Makefile
> @@ -7,7 +7,6 @@
>  obj-$(CONFIG_PPC_PMAC)   += macio_asic.o macio_sysfs.o
>  
>  obj-$(CONFIG_PMAC_MEDIABAY)  += mediabay.o
> -obj-$(CONFIG_MAC_EMUMOUSEBTN)+= mac_hid.o
>  obj-$(CONFIG_INPUT_ADBHID)   += adbhid.o
>  obj-$(CONFIG_ANSLCD) += ans-lcd.o
>  
> -- 
> 2.10.2
> 

-- 
Dmitry


Re: [PATCH v6 2/7] perf/x86/intel: Record branch type

2017-05-09 Thread Jin, Yao



On 5/9/2017 8:39 PM, Jiri Olsa wrote:

On Tue, May 09, 2017 at 07:57:11PM +0800, Jin, Yao wrote:

SNIP


+
+   type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
+   mask = ~(~0 << 1);

is that a fancy way to get 1 into the mask? what do I miss?

you did not comment on this one

Sorry, I misunderstood that this comment and the next comment had the same
meaning.

In the previous version, I used the switch/case to convert from X86_BR to
PERF_BR. I got a comment from community that it'd better use a lookup table
for conversion.

Since each bit in type represents a X86_BR type so I use a mask (0x1) to
filter the bit. Yes, it looks I can also directly set 0x1 to mask.

I write the code "mask = ~(~0 << 1)" according to my coding habits. If you
think I should change the code to "mask = 0x1", that's OK  :)

im ok with that.. was just wondering for the reason
I guess compiler will make it single constant assignment anyway


I think so.  The compiler should be clever enough for this optimization.

+
+   for (i = 0; i < X86_BR_TYPE_MAP_MAX; i++) {
+   if (type & mask)
+   return branch_map[i];

I wonder some bit search would be faster in here, but maybe not big deal

jirka

I just think the branch_map[] doesn't contain many entries (16 entries
here), so maybe checking 1 bit one time should be acceptable. I just want to
keep the code simple.

But if the number of entries is more (e.g. 64), maybe it'd better check 2 or
4 bits one time.

ook

jirka

Sorry, what's the meaning of ook? Does it mean "OK"?

just means ok ;-)

thanks,
jirka


Thanks so much!

Jin Yao



Re: [PATCH] powerpc/64e: Don't place the stack beyond TASK_SIZE

2017-05-09 Thread Michael Ellerman
Scott Wood  writes:

> On Tue, 2017-05-09 at 00:09 +1000, Michael Ellerman wrote:
>> Scott Wood  writes:
>> 
>> > Commit f4ea6dcb08ea ("powerpc/mm: Enable mappings above 128TB") increased
>> > the task size on book3s, and introduced a mechanism to dynamically
>> > control whether a task uses these larger addresses.  While the change to
>> > the task size itself was ifdef-protected to only apply on book3s, the
>> > change to STACK_TOP_USER64 was not.  On book3e, this had the effect of
>> > trying to use addresses up to 128TiB for the stack despite a 64TiB task
>> > size limit -- which broke 64-bit userspace producing the following errors:
>> > 
>> > Starting init: /sbin/init exists but couldn't execute it (error -14)
>> > Starting init: /bin/sh exists but couldn't execute it (error -14)
>> > Kernel panic - not syncing: No working init found.  Try passing init=
>> > option to kernel. See Linux Documentation/admin-guide/init.rst for
>> > guidance.
>> 
>> For some reason I am not seeing this on my p5020ds?
>> 
>> I just checked, it's definitely booting:
>> 
>>   [0.00] Linux version 4.11.0-gcc5-g13e0988 (kerkins@alpine1-p1)
>> (gcc version 5.2.1 20151001 (GCC) ) #1 SMP Mon May 8 05:33:22 AEST 2017
>>   [0.00] Using CoreNet Generic machine description
>>   ...
>>   [3.216940] systemd[1]: Detected architecture ppc64.
>>   ...
>>   Debian GNU/Linux stretch/sid p5020ds ttyS0
>>   
>>   p5020ds login:
>> 
>> 
>> Anyway patch looks good, I'll pull it in with the rest of your tree.
>
> Are you using a 64-bit userspace?

Ah of course. It's debian so it's mostly 32-bit. I'd forgotten 32-bit
userspace was a thing! :)

I'll fix my tests to run some 64-bit binaries.

cheers


[PATCH] macintosh: move mac_hid driver to input/mouse.

2017-05-09 Thread Michal Suchanek
There is nothing mac-specific about this driver. Non-mac hardware with
suboptimal built-in pointer devices exists.

This makes it possible to use this emulation not only on x86 and ppc
notebooks but also on arm and mips.

Signed-off-by: Michal Suchanek 
---
 drivers/input/mouse/Kconfig  | 20 
 drivers/input/mouse/Makefile |  1 +
 drivers/{macintosh => input/mouse}/mac_hid.c |  0
 drivers/macintosh/Kconfig| 17 -
 drivers/macintosh/Makefile   |  1 -
 5 files changed, 21 insertions(+), 18 deletions(-)
 rename drivers/{macintosh => input/mouse}/mac_hid.c (100%)

diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig
index 89ebb8f39fee..5533fd3a113f 100644
--- a/drivers/input/mouse/Kconfig
+++ b/drivers/input/mouse/Kconfig
@@ -12,6 +12,26 @@ menuconfig INPUT_MOUSE
 
 if INPUT_MOUSE
 
+config MAC_EMUMOUSEBTN
+   tristate "Support for mouse button 2+3 emulation"
+   depends on SYSCTL && INPUT
+   help
+ This provides generic support for emulating the 2nd and 3rd mouse
+ button with keypresses.  If you say Y here, the emulation is still
+ disabled by default.  The emulation is controlled by these sysctl
+ entries:
+ /proc/sys/dev/mac_hid/mouse_button_emulation
+ /proc/sys/dev/mac_hid/mouse_button2_keycode
+ /proc/sys/dev/mac_hid/mouse_button3_keycode
+
+ If you have an Apple machine with a 1-button mouse, say Y here.
+
+ This emulation can be useful on notebooks with suboptimal touchpad
+ hardware as well.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mac_hid.
+
 config MOUSE_PS2
tristate "PS/2 mouse"
default y
diff --git a/drivers/input/mouse/Makefile b/drivers/input/mouse/Makefile
index 56bf0ad877c6..dfaad1dd8857 100644
--- a/drivers/input/mouse/Makefile
+++ b/drivers/input/mouse/Makefile
@@ -4,6 +4,7 @@
 
 # Each configuration option enables a list of files.
 
+obj-$(CONFIG_MAC_EMUMOUSEBTN)  += mac_hid.o
 obj-$(CONFIG_MOUSE_AMIGA)  += amimouse.o
 obj-$(CONFIG_MOUSE_APPLETOUCH) += appletouch.o
 obj-$(CONFIG_MOUSE_ATARI)  += atarimouse.o
diff --git a/drivers/macintosh/mac_hid.c b/drivers/input/mouse/mac_hid.c
similarity index 100%
rename from drivers/macintosh/mac_hid.c
rename to drivers/input/mouse/mac_hid.c
diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index 97a420c11eed..011df09c5167 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -159,23 +159,6 @@ config INPUT_ADBHID
 
  If unsure, say Y.
 
-config MAC_EMUMOUSEBTN
-   tristate "Support for mouse button 2+3 emulation"
-   depends on SYSCTL && INPUT
-   help
- This provides generic support for emulating the 2nd and 3rd mouse
- button with keypresses.  If you say Y here, the emulation is still
- disabled by default.  The emulation is controlled by these sysctl
- entries:
- /proc/sys/dev/mac_hid/mouse_button_emulation
- /proc/sys/dev/mac_hid/mouse_button2_keycode
- /proc/sys/dev/mac_hid/mouse_button3_keycode
-
- If you have an Apple machine with a 1-button mouse, say Y here.
-
- To compile this driver as a module, choose M here: the
- module will be called mac_hid.
-
 config THERM_WINDTUNNEL
tristate "Support for thermal management on Windtunnel G4s"
depends on I2C && I2C_POWERMAC && PPC_PMAC && !PPC_PMAC64
diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile
index 516eb65bcacc..ab8b1e74d160 100644
--- a/drivers/macintosh/Makefile
+++ b/drivers/macintosh/Makefile
@@ -7,7 +7,6 @@
 obj-$(CONFIG_PPC_PMAC) += macio_asic.o macio_sysfs.o
 
 obj-$(CONFIG_PMAC_MEDIABAY)+= mediabay.o
-obj-$(CONFIG_MAC_EMUMOUSEBTN)  += mac_hid.o
 obj-$(CONFIG_INPUT_ADBHID) += adbhid.o
 obj-$(CONFIG_ANSLCD)   += ans-lcd.o
 
-- 
2.10.2



Re: [v3 0/9] parallelized "struct page" zeroing

2017-05-09 Thread Pasha Tatashin

Hi Michal,


I like the idea of postponing the zeroing from the allocation to the
init time. To be honest the improvement looks much larger than I would
expect (Btw. this should be a part of the changelog rather than a
outside link).


The improvements are larger, because this time was never measured, as 
Linux does not have early boot time stamps. I added them for x86 and 
SPARC to emasure the performance. I am pushing those changes through 
separate patchsets.




The implementation just looks too large to what I would expect. E.g. do
we really need to add zero argument to the large part of the memblock
API? Wouldn't it be easier to simply export memblock_virt_alloc_internal
(or its tiny wrapper memblock_virt_alloc_core) and move the zeroing
outside to its 2 callers? A completely untested scratched version at the
end of the email.


I am OK, with this change. But, I do not really see a difference between:

memblock_virt_alloc_raw()
and
memblock_virt_alloc_core()

In both cases we use memblock_virt_alloc_internal(), but the only 
difference is that in my case we tell memblock_virt_alloc_internal() to 
zero the pages if needed, and in your case the other two callers are 
zeroing it. I like moving memblock_dbg() inside 
memblock_virt_alloc_internal()




Also it seems that this is not 100% correct either as it only cares
about VMEMMAP while DEFERRED_STRUCT_PAGE_INIT might be enabled also for
SPARSEMEM. This would suggest that we would zero out pages twice,
right?


Thank you, I will check this combination before sending out the next patch.



A similar concern would go to the memory hotplug patch which will
fall back to the slab/page allocator IIRC. On the other hand
__init_single_page is shared with the hotplug code so again we would
initialize 2 times.


Correct, when memory it hotplugged, to gain the benefit of this fix, and 
also not to regress by actually double zeroing "struct pages" we should 
not zero it out. However, I do not really have means to test it.




So I suspect more changes are needed. I will have a closer look tomorrow.


Thank you for reviewing this work. I will wait for your comments before 
sending out updated patches.


Pasha


Re: [v3 0/9] parallelized "struct page" zeroing

2017-05-09 Thread Michal Hocko
On Fri 05-05-17 13:03:07, Pavel Tatashin wrote:
> Changelog:
>   v2 - v3
>   - Addressed David's comments about one change per patch:
>   * Splited changes to platforms into 4 patches
>   * Made "do not zero vmemmap_buf" as a separate patch
>   v1 - v2
>   - Per request, added s390 to deferred "struct page" zeroing
>   - Collected performance data on x86 which proofs the importance to
> keep memset() as prefetch (see below).
> 
> When deferred struct page initialization feature is enabled, we get a
> performance gain of initializing vmemmap in parallel after other CPUs are
> started. However, we still zero the memory for vmemmap using one boot CPU.
> This patch-set fixes the memset-zeroing limitation by deferring it as well.

I like the idea of postponing the zeroing from the allocation to the
init time. To be honest the improvement looks much larger than I would
expect (Btw. this should be a part of the changelog rather than a
outside link).

The implementation just looks too large to what I would expect. E.g. do
we really need to add zero argument to the large part of the memblock
API? Wouldn't it be easier to simply export memblock_virt_alloc_internal
(or its tiny wrapper memblock_virt_alloc_core) and move the zeroing
outside to its 2 callers? A completely untested scratched version at the
end of the email.

Also it seems that this is not 100% correct either as it only cares
about VMEMMAP while DEFERRED_STRUCT_PAGE_INIT might be enabled also for
SPARSEMEM. This would suggest that we would zero out pages twice,
right?

A similar concern would go to the memory hotplug patch which will
fall back to the slab/page allocator IIRC. On the other hand
__init_single_page is shared with the hotplug code so again we would
initialize 2 times.

So I suspect more changes are needed. I will have a closer look tomorrow.

>  arch/powerpc/mm/init_64.c |4 +-
>  arch/s390/mm/vmem.c   |5 ++-
>  arch/sparc/mm/init_64.c   |   26 +++
>  arch/x86/mm/init_64.c |3 +-
>  include/linux/bootmem.h   |3 ++
>  include/linux/mm.h|   15 +++--
>  mm/memblock.c |   46 --
>  mm/page_alloc.c   |3 ++
>  mm/sparse-vmemmap.c   |   48 +---
>  9 files changed, 103 insertions(+), 50 deletions(-)


The bootmem API change mentioned above.

 include/linux/bootmem.h |  3 +++
 mm/memblock.c   | 41 ++---
 mm/sparse-vmemmap.c |  2 +-
 3 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 962164d36506..c9a08463d9a8 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -160,6 +160,9 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 #define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0)
 
 /* FIXME: Move to memblock.h at a point where we remove nobootmem.c */
+void * memblock_virt_alloc_core(phys_addr_t size, phys_addr_t align,
+   phys_addr_t min_addr, phys_addr_t max_addr,
+   int nid);
 void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size,
phys_addr_t align, phys_addr_t min_addr,
phys_addr_t max_addr, int nid);
diff --git a/mm/memblock.c b/mm/memblock.c
index b049c9b2dba8..eab7da94f873 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1271,8 +1271,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t 
size, phys_addr_t align, i
  *
  * The memory block is aligned on SMP_CACHE_BYTES if @align == 0.
  *
- * The phys address of allocated boot memory block is converted to virtual and
- * allocated memory is reset to 0.
+ * The function has to be zeroed out explicitly.
  *
  * In addition, function sets the min_count to 0 using kmemleak_alloc for
  * allocated boot memory block, so that it is never reported as leaks.
@@ -1280,15 +1279,18 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t 
size, phys_addr_t align, i
  * RETURNS:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
-static void * __init memblock_virt_alloc_internal(
+static inline void * __init memblock_virt_alloc_internal(
phys_addr_t size, phys_addr_t align,
phys_addr_t min_addr, phys_addr_t max_addr,
-   int nid)
+   int nid, void *caller)
 {
phys_addr_t alloc;
void *ptr;
ulong flags = choose_memblock_flags();
 
+   memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx 
max_addr=0x%llx %pF\n",
+__func__, (u64)size, (u64)align, nid, (u64)min_addr,
+(u64)max_addr, caller);
if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is 
deprecated. Use NUMA_NO_NODE instead\n"))
nid = 

Re: [linux-next][bock] [bisected c20cfc27a] WARNING: CPU: 22 PID: 0 at block/blk-core.c:2655 .blk_update_request+0x4f8/0x500

2017-05-09 Thread Abdul Haleem
On Mon, 2017-05-08 at 08:00 -0600, Jens Axboe wrote:
> On 05/08/2017 01:13 AM, Abdul Haleem wrote:
> > On Fri, 2017-05-05 at 08:02 -0600, Jens Axboe wrote:
> >> On 05/05/2017 12:25 AM, Abdul Haleem wrote:
> >>> Hi,
> >>>
> >>> 4.11.0 Linus mainline booted with Warnings on PowerPC.
> >>>
> >>> We did not see this on next-20170407 but on next-20170410 and later.
> >>
> >> Have you tried current Linus -git? Both of the -next versions you list
> >> are rather old.
> >>
> > 
> > Hi Jens, 
> > 
> > Warning is still seen with next-20170505 and also with today's mainline.
> > 
> > It was first seen on next-20170410, so the last good was next-20170407.
> 
> The log between the known good and first bad version, condensed a bit for
> primary suspects, is below.
> 
> Christoph Hellwig (4):
>   sd: split sd_setup_discard_cmnd
>   sd: implement REQ_OP_WRITE_ZEROES
>   sd: implement unmapping Write Zeroes
>   block: remove the discard_zeroes_data flag
> 
> Martin K. Petersen (2):
>   scsi: sd: Separate zeroout and discard command choices
>   scsi: sd: Remove LBPRZ dependency for discards
> 
> Christoph Hellwig (7):
>   block: implement splitting of REQ_OP_WRITE_ZEROES bios
>   block: stop using blkdev_issue_write_same for zeroing
>   block: add a flags argument to (__)blkdev_issue_zeroout
>   block: add a REQ_NOUNMAP flag for REQ_OP_WRITE_ZEROES
>   block: add a new BLKDEV_ZERO_NOFALLBACK flag
>   block: stop using discards for zeroing
>   block: remove the discard_zeroes_data flag
> 
> Christoph, Martin - any ideas? Trace from Abdul below.

A bisection for the above suspects resulted a bad commit;

c20cfc27a47307e811346f85959cf3cc07ae42f9 is the first bad commit
commit c20cfc27a47307e811346f85959cf3cc07ae42f9
Author: Christoph Hellwig 
Date:   Wed Apr 5 19:21:07 2017 +0200

block: stop using blkdev_issue_write_same for zeroing

We'll always use the WRITE ZEROES code for zeroing now.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Martin K. Petersen 
Reviewed-by: Hannes Reinecke 
Signed-off-by: Jens Axboe 


@Christoph FYI, the machine configured with 64K page size
> 
> WARNING: CPU: 12 PID: 0 at block/blk-core.c:2651 
> .blk_update_request+0x4cc/0x4e0
> Modules linked in: sg(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) 
> sunrpc(E) binfmt_misc(E) ip_tables(E) ext4(E) mbcache(E) jbd2(E) sd_mod(E) 
> ibmvscsi(E) scsi_transport_srp(E) ibmveth(E)
> CPU: 12 PID: 0 Comm: swapper/12 Tainted: GE   4.11.0-autotest #1
> task: c009f455ee80 task.stack: c009fb2e8000
> NIP: c050bd1c LR: c050b8ec CTR: c05114b0
> REGS: c013fff73740 TRAP: 0700   Tainted: GE
> (4.11.0-autotest)
> MSR: 80029032 
>   CR: 48042048  XER: 0001
> CFAR: c050bb34 SOFTE: 1 
> GPR00: c050b8ec c013fff739c0 c1389c00 c009eca9c800
> GPR04:   0001 0060 
> GPR08: 00067887  c009eca9c800 de5f7e30 
> GPR12: 88044044 ce9f6c00 c009fb2ebf90 00200042 
> GPR16: 9367 c013fff7  c0df4100 
> GPR20: c13c3b00 c0df4100  0005 
> GPR24: 2ee0 c17789f8   
> GPR28:  c38ba400  c009eca9c800 
> NIP [c050bd1c] .blk_update_request+0x4cc/0x4e0
> LR [c050b8ec] .blk_update_request+0x9c/0x4e0
> Call Trace:
> [c013fff739c0] [c050b8ec] .blk_update_request+0x9c/0x4e0 
> (unreliable)
> [c013fff73a60] [c06b06fc] .scsi_end_request+0x4c/0x240
> [c013fff73b10] [c06b4564] .scsi_io_completion+0x1d4/0x6c0
> [c013fff73be0] [c06a8cd0] .scsi_finish_command+0x100/0x1b0
> [c013fff73c70] [c06b3978] .scsi_softirq_done+0x188/0x1e0
> [c013fff73d00] [c0516b44] .blk_done_softirq+0xc4/0xf0
> [c013fff73d90] [c00daef8] .__do_softirq+0x158/0x3b0
> [c013fff73e90] [c00db5b8] .irq_exit+0x1a8/0x1c0
> [c013fff73f10] [c0014f84] .__do_irq+0x94/0x1f0
> [c013fff73f90] [c0026cbc] .call_do_irq+0x14/0x24
> [c009fb2eb7f0] [c001516c] .do_IRQ+0x8c/0x100
> [c009fb2eb890] [c0008bf4] hardware_interrupt_common+0x114/0x120
> --- interrupt: 501 at .plpar_hcall_norets+0x14/0x20
> LR = .check_and_cede_processor+0x24/0x40
> [c009fb2ebb80] [0002] 0x2 (unreliable)
> [c009fb2ebbf0] [c07c360c] .dedicated_cede_loop+0x4c/0x150
> [c009fb2ebc70] [c07c1040] .cpuidle_enter_state+0xb0/0x3b0
> [c009fb2ebd20] [c012d1bc] .call_cpuidle+0x3c/0x70
> [c009fb2ebd90] [c012d550] .do_idle+0x280/0x2e0
> [c009fb2ebe50] [c012d768] 

[PATCH] powerpc/mm: Simplify _PAGE_RO handling in page table dump

2017-05-09 Thread Christophe Leroy
Commit fd893fe56a130 ("powerpc/mm: Fix missing page attributes in
page table dump") added support of _PAGE_RO attribute.

This patch makes it more simple

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/dump_linuxpagetables.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/powerpc/mm/dump_linuxpagetables.c 
b/arch/powerpc/mm/dump_linuxpagetables.c
index d659345a98d6..eeef51107cff 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -121,13 +121,8 @@ static const struct flag_info flag_array[] = {
.set= "user",
.clear  = "",
}, {
-#if _PAGE_RO == 0
-   .mask   = _PAGE_RW,
+   .mask   = _PAGE_RW | _PAGE_RO,
.val= _PAGE_RW,
-#else
-   .mask   = _PAGE_RO,
-   .val= 0,
-#endif
.set= "rw",
.clear  = "ro",
}, {
-- 
2.12.0



[PATCH 3/3] powerpc/64s: Support new device tree binding for discovering CPU features

2017-05-09 Thread Michael Ellerman
From: Nicholas Piggin 

The ibm,powerpc-cpu-features device tree binding describes CPU features with
ASCII names and extensible compatibility, privilege, and enablement metadata
that allows improved flexibility and compatibility with new hardware.

The interface is described in detail in ibm,powerpc-cpu-features.txt in this
patch.

Currently this code is not enabled by default, and there are no released
firmwares that provide the binding.

Signed-off-by: Nicholas Piggin 
Signed-off-by: Michael Ellerman 
---
 .../bindings/powerpc/ibm,powerpc-cpu-features.txt  |  248 +
 arch/powerpc/Kconfig   |   16 +
 arch/powerpc/include/asm/cpu_has_feature.h |6 +-
 arch/powerpc/include/asm/cputable.h|2 +
 arch/powerpc/include/asm/dt_cpu_ftrs.h |   26 +
 arch/powerpc/include/asm/reg.h |1 +
 arch/powerpc/include/uapi/asm/cputable.h   |7 +
 arch/powerpc/kernel/Makefile   |1 +
 arch/powerpc/kernel/cputable.c |   37 +-
 arch/powerpc/kernel/dt_cpu_ftrs.c  | 1031 
 arch/powerpc/kernel/prom.c |   29 +-
 arch/powerpc/kernel/setup_64.c |   10 +-
 12 files changed, 1398 insertions(+), 16 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt
 create mode 100644 arch/powerpc/include/asm/dt_cpu_ftrs.h
 create mode 100644 arch/powerpc/kernel/dt_cpu_ftrs.c

diff --git 
a/Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt 
b/Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt
new file mode 100644
index ..5af426e13334
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt
@@ -0,0 +1,248 @@
+*** NOTE ***
+This document is copied from OPAL firmware
+(skiboot/doc/device-tree/ibm,powerpc-cpu-features/binding.txt)
+
+There is more complete overview and documentation of features in that
+source tree.  All patches and modifications should go there.
+
+
+ibm,powerpc-cpu-features binding
+
+
+This device tree binding describes CPU features available to software, with
+enablement, privilege, and compatibility metadata.
+
+More general description of design and implementation of this binding is
+found in design.txt, which also points to documentation of specific features.
+
+
+/cpus/ibm,powerpc-cpu-features node binding
+---
+
+Node: ibm,powerpc-cpu-features
+
+Description: Container of CPU feature nodes.
+
+The node name must be "ibm,powerpc-cpu-features".
+
+It is implemented as a child of the node "/cpus", but this must not be
+assumed by parsers.
+
+The node is optional but should be provided by new OPAL firmware.
+
+Properties:
+
+- compatible
+  Usage: required
+  Value type: string
+  Definition: "ibm,powerpc-cpu-features"
+
+  This compatibility refers to backwards compatibility of the overall
+  design with parsers that behave according to these guidelines. This can
+  be extended in a backward compatible manner which would not warrant a
+  revision of the compatible property.
+
+- isa
+  Usage: required
+  Value type: 
+  Definition:
+
+  isa that the CPU is currently running in. This provides instruction set
+  compatibility, less the individual feature nodes. For example, an ISA v3.0
+  implementation that lacks the "transactional-memory" cpufeature node
+  should not use transactional memory facilities.
+
+  Value corresponds to the "Power ISA Version" multiplied by 1000.
+  For example, <3000> corresponds to Version 3.0, <2070> to Version 2.07.
+  The minor digit is available for revisions.
+
+- display-name
+  Usage: optional
+  Value type: string
+  Definition:
+
+  A human readable name for the CPU.
+
+/cpus/ibm,powerpc-cpu-features/example-feature node bindings
+
+
+Each child node of cpu-features represents a CPU feature / capability.
+
+Node: A string describing an architected CPU feature, e.g., "floating-point".
+
+Description: A feature or capability supported by the CPUs.
+
+The name of the node is a human readable string that forms the interface
+used to describe features to software. Features are currently documented
+in the code where they are implemented in skiboot/core/cpufeatures.c
+
+Presence of the node indicates the feature is available.
+
+Properties:
+
+- isa
+  Usage: required
+  Value type: 
+  Definition:
+
+  First level of the Power ISA that the feature appears in.
+  Software should filter out features when constraining the
+  environment to a particular ISA version.
+
+  Value is defined similarly to /cpus/features/isa
+
+- usable-privilege
+  Usage: required
+  Value type:  bit mask
+  Definition:
+  Bit numbers are LSB0
+ 

[PATCH 2/3] powerpc: Don't print cpu_spec->cpu_name if it's NULL

2017-05-09 Thread Michael Ellerman
From: Nicholas Piggin 

Currently we assume that if the cpu_spec has a pvr_mask then it must also have a
cpu_name. But that will change in a subsequent commit when we do CPU feature
discovery via the device tree, so check explicitly if cpu_name is NULL.

Signed-off-by: Nicholas Piggin 
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/kernel/setup-common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 5c10b5925ac2..b57df7fc9e5f 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -256,7 +256,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "processor\t: %lu\n", cpu_id);
seq_printf(m, "cpu\t\t: ");
 
-   if (cur_cpu_spec->pvr_mask)
+   if (cur_cpu_spec->pvr_mask && cur_cpu_spec->cpu_name)
seq_printf(m, "%s", cur_cpu_spec->cpu_name);
else
seq_printf(m, "unknown (%08x)", pvr);
-- 
2.7.4



[PATCH 1/3] of/fdt: introduce of_scan_flat_dt_subnodes and of_get_flat_dt_phandle

2017-05-09 Thread Michael Ellerman
From: Nicholas Piggin 

Introduce primitives for FDT parsing. These will be used for powerpc
cpufeatures node scanning, which has quite complex structure but should
be processed early.

Cc: devicet...@vger.kernel.org
Acked-by: Rob Herring 
Signed-off-by: Nicholas Piggin 
Signed-off-by: Michael Ellerman 
---
 drivers/of/fdt.c   | 38 ++
 include/linux/of_fdt.h |  6 ++
 2 files changed, 44 insertions(+)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index e5ce4b59e162..961ca97072a9 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -754,6 +754,36 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 }
 
 /**
+ * of_scan_flat_dt_subnodes - scan sub-nodes of a node call callback on each.
+ * @it: callback function
+ * @data: context data pointer
+ *
+ * This function is used to scan sub-nodes of a node.
+ */
+int __init of_scan_flat_dt_subnodes(unsigned long parent,
+   int (*it)(unsigned long node,
+ const char *uname,
+ void *data),
+   void *data)
+{
+   const void *blob = initial_boot_params;
+   int node;
+
+   fdt_for_each_subnode(node, blob, parent) {
+   const char *pathp;
+   int rc;
+
+   pathp = fdt_get_name(blob, node, NULL);
+   if (*pathp == '/')
+   pathp = kbasename(pathp);
+   rc = it(node, pathp, data);
+   if (rc)
+   return rc;
+   }
+   return 0;
+}
+
+/**
  * of_get_flat_dt_subnode_by_name - get the subnode by given name
  *
  * @node: the parent node
@@ -812,6 +842,14 @@ int __init of_flat_dt_match(unsigned long node, const char 
*const *compat)
return of_fdt_match(initial_boot_params, node, compat);
 }
 
+/**
+ * of_get_flat_dt_prop - Given a node in the flat blob, return the phandle
+ */
+uint32_t __init of_get_flat_dt_phandle(unsigned long node)
+{
+   return fdt_get_phandle(initial_boot_params, node);
+}
+
 struct fdt_scan_status {
const char *name;
int namelen;
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 271b3fdf0070..1dfbfd0d8040 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -54,6 +54,11 @@ extern char __dtb_end[];
 extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname,
 int depth, void *data),
   void *data);
+extern int of_scan_flat_dt_subnodes(unsigned long node,
+   int (*it)(unsigned long node,
+ const char *uname,
+ void *data),
+   void *data);
 extern int of_get_flat_dt_subnode_by_name(unsigned long node,
  const char *uname);
 extern const void *of_get_flat_dt_prop(unsigned long node, const char *name,
@@ -62,6 +67,7 @@ extern int of_flat_dt_is_compatible(unsigned long node, const 
char *name);
 extern int of_flat_dt_match(unsigned long node, const char *const *matches);
 extern unsigned long of_get_flat_dt_root(void);
 extern int of_get_flat_dt_size(void);
+extern uint32_t of_get_flat_dt_phandle(unsigned long node);
 
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 int depth, void *data);
-- 
2.7.4



Re: [PATCH v6 2/7] perf/x86/intel: Record branch type

2017-05-09 Thread Jiri Olsa
On Tue, May 09, 2017 at 07:57:11PM +0800, Jin, Yao wrote:

SNIP

> > > > > +
> > > > > + type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
> > > > > + mask = ~(~0 << 1);
> > > > is that a fancy way to get 1 into the mask? what do I miss?
> > you did not comment on this one
> 
> Sorry, I misunderstood that this comment and the next comment had the same
> meaning.
> 
> In the previous version, I used the switch/case to convert from X86_BR to
> PERF_BR. I got a comment from community that it'd better use a lookup table
> for conversion.
> 
> Since each bit in type represents a X86_BR type so I use a mask (0x1) to
> filter the bit. Yes, it looks I can also directly set 0x1 to mask.
> 
> I write the code "mask = ~(~0 << 1)" according to my coding habits. If you
> think I should change the code to "mask = 0x1", that's OK  :)

im ok with that.. was just wondering for the reason
I guess compiler will make it single constant assignment anyway

> 
> > > > > +
> > > > > + for (i = 0; i < X86_BR_TYPE_MAP_MAX; i++) {
> > > > > + if (type & mask)
> > > > > + return branch_map[i];
> > > > I wonder some bit search would be faster in here, but maybe not big deal
> > > > 
> > > > jirka
> > > I just think the branch_map[] doesn't contain many entries (16 entries
> > > here), so maybe checking 1 bit one time should be acceptable. I just want 
> > > to
> > > keep the code simple.
> > > 
> > > But if the number of entries is more (e.g. 64), maybe it'd better check 2 
> > > or
> > > 4 bits one time.
> > ook
> > 
> > jirka
> Sorry, what's the meaning of ook? Does it mean "OK"?

just means ok ;-)

thanks,
jirka


Re: [PATCH v6 2/7] perf/x86/intel: Record branch type

2017-05-09 Thread Jin, Yao



On 5/9/2017 4:26 PM, Jiri Olsa wrote:

On Mon, Apr 24, 2017 at 08:47:14AM +0800, Jin, Yao wrote:


On 4/23/2017 9:55 PM, Jiri Olsa wrote:

On Thu, Apr 20, 2017 at 08:07:50PM +0800, Jin Yao wrote:

SNIP


+#define X86_BR_TYPE_MAP_MAX16
+
+static int
+common_branch_type(int type)
+{
+   int i, mask;
+   const int branch_map[X86_BR_TYPE_MAP_MAX] = {
+   PERF_BR_CALL,   /* X86_BR_CALL */
+   PERF_BR_RET,/* X86_BR_RET */
+   PERF_BR_SYSCALL,/* X86_BR_SYSCALL */
+   PERF_BR_SYSRET, /* X86_BR_SYSRET */
+   PERF_BR_INT,/* X86_BR_INT */
+   PERF_BR_IRET,   /* X86_BR_IRET */
+   PERF_BR_JCC,/* X86_BR_JCC */
+   PERF_BR_JMP,/* X86_BR_JMP */
+   PERF_BR_IRQ,/* X86_BR_IRQ */
+   PERF_BR_IND_CALL,   /* X86_BR_IND_CALL */
+   PERF_BR_NONE,   /* X86_BR_ABORT */
+   PERF_BR_NONE,   /* X86_BR_IN_TX */
+   PERF_BR_NONE,   /* X86_BR_NO_TX */
+   PERF_BR_CALL,   /* X86_BR_ZERO_CALL */
+   PERF_BR_NONE,   /* X86_BR_CALL_STACK */
+   PERF_BR_IND_JMP,/* X86_BR_IND_JMP */
+   };
+
+   type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
+   mask = ~(~0 << 1);

is that a fancy way to get 1 into the mask? what do I miss?

you did not comment on this one


Sorry, I misunderstood that this comment and the next comment had the 
same meaning.


In the previous version, I used the switch/case to convert from X86_BR 
to PERF_BR. I got a comment from community that it'd better use a lookup 
table for conversion.


Since each bit in type represents a X86_BR type so I use a mask (0x1) to 
filter the bit. Yes, it looks I can also directly set 0x1 to mask.


I write the code "mask = ~(~0 << 1)" according to my coding habits. If 
you think I should change the code to "mask = 0x1", that's OK  :)



+
+   for (i = 0; i < X86_BR_TYPE_MAP_MAX; i++) {
+   if (type & mask)
+   return branch_map[i];

I wonder some bit search would be faster in here, but maybe not big deal

jirka

I just think the branch_map[] doesn't contain many entries (16 entries
here), so maybe checking 1 bit one time should be acceptable. I just want to
keep the code simple.

But if the number of entries is more (e.g. 64), maybe it'd better check 2 or
4 bits one time.

ook

jirka

Sorry, what's the meaning of ook? Does it mean "OK"?

Thanks
Jin Yao



Re: [PATCH v8 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support

2017-05-09 Thread Anju T Sudhakar

Hi Daniel,


On Monday 08 May 2017 07:42 PM, Daniel Axtens wrote:

Hi all,

I've had a look at the API as it was a big thing I didn't like in the
earlier version.

I am much happier with this one.

Some comments:

  - I'm no longer subscribed to skiboot but I've had a look at the
patches on that side:

 * in patch 9 should opal_imc_counters_init return something other
   than OPAL_SUCCESS in the case on invalid arguments? Maybe
   OPAL_PARAMETER? (I think you fix this in a later patch anyway?)

 * in start/stop, should there be some sort of write barrier to make
   sure the cb->imc_chip_command actually gets written out to memory
   at the time we expect?

The rest of my comments are in line.


Adds cpumask attribute to be used by each IMC pmu. Only one cpu (any
online CPU) from each chip for nest PMUs is designated to read counters.

On CPU hotplug, dying CPU is checked to see whether it is one of the
designated cpus, if yes, next online cpu from the same chip (for nest
units) is designated as new cpu to read counters. For this purpose, we
introduce a new state : CPUHP_AP_PERF_POWERPC_NEST_ONLINE.

Signed-off-by: Anju T Sudhakar 
Signed-off-by: Hemant Kumar 
Signed-off-by: Madhavan Srinivasan 
---
  arch/powerpc/include/asm/imc-pmu.h |   4 +
  arch/powerpc/include/asm/opal-api.h|  12 +-
  arch/powerpc/include/asm/opal.h|   4 +
  arch/powerpc/perf/imc-pmu.c| 248 -
  arch/powerpc/platforms/powernv/opal-wrappers.S |   3 +
  include/linux/cpuhotplug.h |   1 +

Who owns this? get_maintainer.pl doesn't give me anything helpful
here... Do we need an Ack from anyone?


  6 files changed, 266 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 6bbe184..1478d0f 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -92,6 +92,10 @@ struct imc_pmu {
  #define IMC_DOMAIN_NEST   1
  #define IMC_DOMAIN_UNKNOWN-1
  
+#define IMC_COUNTER_ENABLE	1

+#define IMC_COUNTER_DISABLE0

I'm not sure these constants are particularly useful any more, but I'll
have more to say on that later.


+
+
  extern struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
  extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
  extern int __init init_imc_pmu(struct imc_events *events,int idx, struct 
imc_pmu *pmu_ptr);
diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index a0aa285..ce863d9 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -168,7 +168,10 @@
  #define OPAL_INT_SET_MFRR 125
  #define OPAL_PCI_TCE_KILL 126
  #define OPAL_NMMU_SET_PTCR127
-#define OPAL_LAST  127
+#define OPAL_IMC_COUNTERS_INIT 149
+#define OPAL_IMC_COUNTERS_START150
+#define OPAL_IMC_COUNTERS_STOP 151

Yay, this is heaps better!


+#define OPAL_LAST  151
  
  /* Device tree flags */
  
@@ -928,6 +931,13 @@ enum {

OPAL_PCI_TCE_KILL_ALL,
  };
  
+/* Argument to OPAL_IMC_COUNTERS_*  */

+enum {
+   OPAL_IMC_COUNTERS_NEST = 1,
+   OPAL_IMC_COUNTERS_CORE = 2,
+   OPAL_IMC_COUNTERS_THREAD = 3,
+};
+
  #endif /* __ASSEMBLY__ */
  
  #endif /* __OPAL_API_H */

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 1ff03a6..9c16ec6 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -227,6 +227,10 @@ int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t 
kill_type,
  uint64_t dma_addr, uint32_t npages);
  int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr);
  
+int64_t opal_imc_counters_init(uint32_t type, uint64_t address);

This isn't called anywhere in this patch... including (worryingly) in
the init function...


+int64_t opal_imc_counters_start(uint32_t type);
+int64_t opal_imc_counters_stop(uint32_t type);
+
  /* Internal functions */
  extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
   int depth, void *data);
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index f09a37a..40792424 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -18,6 +18,11 @@
  
  struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];

  struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+static cpumask_t nest_imc_cpumask;
+
+static atomic_t nest_events;
+/* Used to avoid races in calling enable/disable nest-pmu units*/

You need a space here between s and * ^


Sure. I will correct this.  :)



+static DEFINE_MUTEX(imc_nest_reserve);
  
  /* Needed for sanity 

Re: P.A. Semi: "PR" KVM configure error after KVM changes for 4.12

2017-05-09 Thread Christian Zigotzky

On 09 May 2017 at 11:08 AM, Paul Mackerras wrote:

On Tue, May 09, 2017 at 10:42:51AM +0200, Christian Zigotzky wrote:

Hi All,

After the first batch of KVM changes for 4.12 merge window I get the
following error message if I want to configure the Linux kernel with KVM PR
support on my P.A. Semi board.

warning: (KVM_BOOK3S_64) selects SPAPR_TCE_IOMMU which has unmet direct
dependencies (IOMMU_SUPPORT && (PPC_POWERNV || PPC_PSERIES))

Did you change the system requirements for KVM? Does it only work on IBM
pSeries machines now?

That probably came in with Alexey Kardashevskiy's patches to
accelerate the TCE (IOMMU) hypercalls.  There was no deliberate
intention to restrict PR KVM to IBM machines.  I'll fix it.

However, I expect that at least for now we will not support PCI
pass-through on machines other than IBM pSeries, because the KVM vfio
code needs some specific services from the iommu layer which are
platform-dependent and probably not implemented on other platforms
(maple, pasemi, powermac).  If you have been using PCI pass-through
successfully in the past with PR KVM then please let me know and we'll
look at what is needed to keep it working.

Paul.


Hi Paul,

Thanks a lot for your fast reply. I had to select POWERNV as well 
because without it, the kernel didn't compile. After that I tested 
Mac-on-Linux/KVM PR with the new kernel. It works without any problems.


Screenshot: https://plus.google.com/115515624056477014971/posts/fVrEXs3S3dU

It would be nice if you could fix it.

Thanks,

Christian



Re: [linux-next][bock] WARNING: CPU: 22 PID: 0 at block/blk-core.c:2655 .blk_update_request+0x4f8/0x500

2017-05-09 Thread Christoph Hellwig
On Mon, May 08, 2017 at 08:00:41AM -0600, Jens Axboe wrote:
> Christoph, Martin - any ideas? Trace from Abdul below.

Btw, what page size does the system have?

> 
> WARNING: CPU: 12 PID: 0 at block/blk-core.c:2651 
> .blk_update_request+0x4cc/0x4e0

Any knowledge from tracing or printk on what command is complete?
Both req_op type and SCSI command?


Re: P.A. Semi: "PR" KVM configure error after KVM changes for 4.12

2017-05-09 Thread Paul Mackerras
On Tue, May 09, 2017 at 10:42:51AM +0200, Christian Zigotzky wrote:
> Hi All,
> 
> After the first batch of KVM changes for 4.12 merge window I get the
> following error message if I want to configure the Linux kernel with KVM PR
> support on my P.A. Semi board.
> 
> warning: (KVM_BOOK3S_64) selects SPAPR_TCE_IOMMU which has unmet direct
> dependencies (IOMMU_SUPPORT && (PPC_POWERNV || PPC_PSERIES))
> 
> Did you change the system requirements for KVM? Does it only work on IBM
> pSeries machines now?

That probably came in with Alexey Kardashevskiy's patches to
accelerate the TCE (IOMMU) hypercalls.  There was no deliberate
intention to restrict PR KVM to IBM machines.  I'll fix it.

However, I expect that at least for now we will not support PCI
pass-through on machines other than IBM pSeries, because the KVM vfio
code needs some specific services from the iommu layer which are
platform-dependent and probably not implemented on other platforms
(maple, pasemi, powermac).  If you have been using PCI pass-through
successfully in the past with PR KVM then please let me know and we'll
look at what is needed to keep it working.

Paul.


P.A. Semi: "PR" KVM configure error after KVM changes for 4.12

2017-05-09 Thread Christian Zigotzky

Hi All,

After the first batch of KVM changes for 4.12 merge window I get the 
following error message if I want to configure the Linux kernel with KVM 
PR support on my P.A. Semi board.


warning: (KVM_BOOK3S_64) selects SPAPR_TCE_IOMMU which has unmet direct 
dependencies (IOMMU_SUPPORT && (PPC_POWERNV || PPC_PSERIES))


Did you change the system requirements for KVM? Does it only work on IBM 
pSeries machines now?


Before the changes, I successfully compiled the Git version of kernel 
4.12 with KVM PR support. I successfully tested it with Mac-on-Linux/KVM PR.


Cheers,

Christian


Re: [PATCH] powerpc/mm/book3s/64: Rework page table geometry for lower memory usage

2017-05-09 Thread Balbir Singh
On Tue, 2017-05-09 at 18:05 +1000, Michael Ellerman wrote:
> Recently in commit f6eedbba7a26 ("powerpc/mm/hash: Increase VA range to 
> 128TB")
> we increased the virtual address space for user processes to 128TB by default,
> and up to 512TB if user space opts in.
> 
> This obviously required expanding the range of the Linux page tables. For 
> Book3s
> 64-bit using hash and with PAGE_SIZE=64K, we increased the PGD to 2^15 
> entries.
> This meant we could cover the full address range, while still being able to
> insert a 16G hugepage at the PGD level and a 16M hugepage in the PMD.
> 
> The downside of that geometry is that it uses a lot of memory for the PGD, and
> in particular makes the PGD a 4-page allocation, which means it's much more
> likely to fail under memory pressure.
> 
> Instead we can make the PMD larger, so that a single PUD entry maps 16G,
> allowing the 16G hugepages to sit at that level in the tree. We're then able 
> to
> split the remaining bits between the PUG and PGD. We make the PGD slightly
> larger as that results in lower memory usage for typical programs.
> 
> When THP is enabled the PMD actually doubles in size, to 2^11 entries, or 2^14
> bytes, which is large but still < PAGE_SIZE.
> 
> Signed-off-by: Michael Ellerman 
> ---

Reviewed-by: Balbir Singh 


Re: [PATCH v6 2/7] perf/x86/intel: Record branch type

2017-05-09 Thread Jiri Olsa
On Mon, Apr 24, 2017 at 08:47:14AM +0800, Jin, Yao wrote:
> 
> 
> On 4/23/2017 9:55 PM, Jiri Olsa wrote:
> > On Thu, Apr 20, 2017 at 08:07:50PM +0800, Jin Yao wrote:
> > 
> > SNIP
> > 
> > > +#define X86_BR_TYPE_MAP_MAX  16
> > > +
> > > +static int
> > > +common_branch_type(int type)
> > > +{
> > > + int i, mask;
> > > + const int branch_map[X86_BR_TYPE_MAP_MAX] = {
> > > + PERF_BR_CALL,   /* X86_BR_CALL */
> > > + PERF_BR_RET,/* X86_BR_RET */
> > > + PERF_BR_SYSCALL,/* X86_BR_SYSCALL */
> > > + PERF_BR_SYSRET, /* X86_BR_SYSRET */
> > > + PERF_BR_INT,/* X86_BR_INT */
> > > + PERF_BR_IRET,   /* X86_BR_IRET */
> > > + PERF_BR_JCC,/* X86_BR_JCC */
> > > + PERF_BR_JMP,/* X86_BR_JMP */
> > > + PERF_BR_IRQ,/* X86_BR_IRQ */
> > > + PERF_BR_IND_CALL,   /* X86_BR_IND_CALL */
> > > + PERF_BR_NONE,   /* X86_BR_ABORT */
> > > + PERF_BR_NONE,   /* X86_BR_IN_TX */
> > > + PERF_BR_NONE,   /* X86_BR_NO_TX */
> > > + PERF_BR_CALL,   /* X86_BR_ZERO_CALL */
> > > + PERF_BR_NONE,   /* X86_BR_CALL_STACK */
> > > + PERF_BR_IND_JMP,/* X86_BR_IND_JMP */
> > > + };
> > > +
> > > + type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
> > > + mask = ~(~0 << 1);
> > is that a fancy way to get 1 into the mask? what do I miss?

you did not comment on this one

> > 
> > > +
> > > + for (i = 0; i < X86_BR_TYPE_MAP_MAX; i++) {
> > > + if (type & mask)
> > > + return branch_map[i];
> > I wonder some bit search would be faster in here, but maybe not big deal
> > 
> > jirka
> 
> I just think the branch_map[] doesn't contain many entries (16 entries
> here), so maybe checking 1 bit one time should be acceptable. I just want to
> keep the code simple.
> 
> But if the number of entries is more (e.g. 64), maybe it'd better check 2 or
> 4 bits one time.

ook

jirka


Re: [PATCH] powerpc/mm/book3s/64: Rework page table geometry for lower memory usage

2017-05-09 Thread Aneesh Kumar K.V



On Tuesday 09 May 2017 01:35 PM, Michael Ellerman wrote:

Recently in commit f6eedbba7a26 ("powerpc/mm/hash: Increase VA range to 128TB")
we increased the virtual address space for user processes to 128TB by default,
and up to 512TB if user space opts in.

This obviously required expanding the range of the Linux page tables. For Book3s
64-bit using hash and with PAGE_SIZE=64K, we increased the PGD to 2^15 entries.
This meant we could cover the full address range, while still being able to
insert a 16G hugepage at the PGD level and a 16M hugepage in the PMD.

The downside of that geometry is that it uses a lot of memory for the PGD, and
in particular makes the PGD a 4-page allocation, which means it's much more
likely to fail under memory pressure.

Instead we can make the PMD larger, so that a single PUD entry maps 16G,
allowing the 16G hugepages to sit at that level in the tree. We're then able to
split the remaining bits between the PUG and PGD. We make the PGD slightly
larger as that results in lower memory usage for typical programs.

When THP is enabled the PMD actually doubles in size, to 2^11 entries, or 2^14
bytes, which is large but still < PAGE_SIZE.



Reviewed-by: Aneesh Kumar K.V 


Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 214219dff87c..9732837aaae8 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -2,9 +2,9 @@
 #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H

 #define H_PTE_INDEX_SIZE  8
-#define H_PMD_INDEX_SIZE  5
-#define H_PUD_INDEX_SIZE  5
-#define H_PGD_INDEX_SIZE  15
+#define H_PMD_INDEX_SIZE  10
+#define H_PUD_INDEX_SIZE  7
+#define H_PGD_INDEX_SIZE  8

 /*
  * 64k aligned address free up few of the lower bits of RPN for us





[PATCH] powerpc/mm/book3s/64: Rework page table geometry for lower memory usage

2017-05-09 Thread Michael Ellerman
Recently in commit f6eedbba7a26 ("powerpc/mm/hash: Increase VA range to 128TB")
we increased the virtual address space for user processes to 128TB by default,
and up to 512TB if user space opts in.

This obviously required expanding the range of the Linux page tables. For Book3s
64-bit using hash and with PAGE_SIZE=64K, we increased the PGD to 2^15 entries.
This meant we could cover the full address range, while still being able to
insert a 16G hugepage at the PGD level and a 16M hugepage in the PMD.

The downside of that geometry is that it uses a lot of memory for the PGD, and
in particular makes the PGD a 4-page allocation, which means it's much more
likely to fail under memory pressure.

Instead we can make the PMD larger, so that a single PUD entry maps 16G,
allowing the 16G hugepages to sit at that level in the tree. We're then able to
split the remaining bits between the PUG and PGD. We make the PGD slightly
larger as that results in lower memory usage for typical programs.

When THP is enabled the PMD actually doubles in size, to 2^11 entries, or 2^14
bytes, which is large but still < PAGE_SIZE.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h 
b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 214219dff87c..9732837aaae8 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -2,9 +2,9 @@
 #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
 
 #define H_PTE_INDEX_SIZE  8
-#define H_PMD_INDEX_SIZE  5
-#define H_PUD_INDEX_SIZE  5
-#define H_PGD_INDEX_SIZE  15
+#define H_PMD_INDEX_SIZE  10
+#define H_PUD_INDEX_SIZE  7
+#define H_PGD_INDEX_SIZE  8
 
 /*
  * 64k aligned address free up few of the lower bits of RPN for us
-- 
2.7.4



Re: [PATCH 2/2] v1 powerpc/powernv: Enable removal of memory for in memory tracing

2017-05-09 Thread Rashmica Gupta

Sorry for the late reply, I somehow missed this.


On 03/05/17 21:56, Anshuman Khandual wrote:

On 05/03/2017 09:22 AM, Rashmica Gupta wrote:

On 28/04/17 19:52, Anshuman Khandual wrote:

On 04/28/2017 11:12 AM, Rashmica Gupta wrote:

Some powerpc hardware features may want to gain access to a chunk of

What kind of features ? Please add specifics.


undisturbed real memory.  This update provides a means to unplug said
memory

Undisturbed ? Meaning part of memblock and currently inside the buddy
allocator which we are trying to hot unplug out ?


from the kernel with a set of debugfs calls.  By writing an integer
containing
   the size of memory to be unplugged into

Does the size has some constraints like aligned with memblock section
size ? LMB size ? page block size ? etc. Please add the details.

Will do.


/sys/kernel/debug/powerpc/memtrace/enable, the code will remove that
much
memory from the end of each available chip's memory space (ie each
memory node).

 amount (I guess bytes in this case) of memory will be removed
from the end of the NUMA node ? Whats the guarantee that they would be
free at that time and not being pinned by some process ? If its not
guaranteed to be freed, then interface description should state that
clearly.

We start looking from the end of the NUMA node but of course there is no
guarantee
that we will always be able to find some memory there that we are able
to remove.


Okay. Do we have interface for giving this memory back to the buddy
allocator again when we are done with HW tracing ? If not we need to
add one.


Not at the moment. Last time I spoke to Anton he said something along 
the lines
of it not being too important as if you are getting the hardware traces 
for debugging
purposes you are probably not worried about a bit of memory being out of 
action.


However I can't see why having an interface to online the memory would 
be a bad thing,

so I'll look into it.


In addition, the means to read out the contents of the unplugged
memory is also
provided by reading out the
/sys/kernel/debug/powerpc/memtrace//trace
file.

All of the debugfs file interfaces added here should be documented some
where in detail.


Signed-off-by: Anton Blanchard 
Signed-off-by: Rashmica Gupta 

---
This requires the 'Wire up hpte_removebolted for powernv' patch.

RFC -> v1: Added in two missing locks. Replaced the open-coded
flush_memory_region() with the existing
flush_inval_dcache_range(start, end).

memtrace_offline_pages() is open-coded because offline_pages is
designed to be
called through the sysfs interface - not directly.

We could move the offlining of pages to userspace, which removes some
of this
open-coding. This would then require passing info to the kernel such
that it
can then remove the memory that has been offlined. This could be done
using
notifiers, but this isn't simple due to locking (remove_memory needs
mem_hotplug_begin() which the sysfs interface already has). This
could also be
done through the debugfs interface (similar to what is done here).
Either way,
this would require the process that needs the memory to have
open-coded code
which it shouldn't really be involved with.

As the current remove_memory() function requires the memory to
already be
offlined, it makes sense to keep the offlining and removal of memory
functionality grouped together so that a process can simply make one
request to
unplug some memory. Ideally there would be a kernel function we could
call that
would offline the memory and then remove it.


   arch/powerpc/platforms/powernv/memtrace.c | 276
++
   1 file changed, 276 insertions(+)
   create mode 100644 arch/powerpc/platforms/powernv/memtrace.c

diff --git a/arch/powerpc/platforms/powernv/memtrace.c
b/arch/powerpc/platforms/powernv/memtrace.c
new file mode 100644
index 000..86184b1
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -0,0 +1,276 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) IBM Corporation, 2014
+ *
+ * Author: Anton Blanchard 
+ */
+
+#define pr_fmt(fmt) "powernv-memtrace: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct memtrace_entry {
+void *mem;
+u64 start;
+u64 size;
+u32 nid;
+struct dentry *dir;
+char name[16];
+};

Little bit of description about the structure here will help.

Something like 

Re: [kernel-hardening] [PATCH] add the option of fortified string.h functions

2017-05-09 Thread Andrew Donnellan

On 09/05/17 03:57, Daniel Axtens wrote:

(ppc people: this does some compile and run time bounds checking on
string functions. It's cool - currently it picks up a lot of random
things so it will require some more work across the tree, but hopefully
it will eventually hit mainline.)


Ooh, nice!



I've tested this on ppc with pseries_le_defconfig.

I needed a couple of the fixes from github
(https://github.com/thestinger/linux-hardened/commits/4.11) in order to
build, specifically
https://github.com/thestinger/linux-hardened/commit/c65d6a6f309b06703584a23ac2b2bda4bb363143
https://github.com/thestinger/linux-hardened/commit/adcec4756574a8c7f7cb5b6fa51ebeaeeae71aae

Once those were added, I needed to disable fortification in prom_init.c,
as we apparently can't have new symbols there. (I don't understand that
file so I haven't dug into it.)

We also have problems with the feature fixup tests leading to a panic on
boot. It relates to getting what I think are asm labels(?) and how we
address them. I have just disabled fortify here for now; I think the
code could be rewritten to take the labels as unsigned char *, but I
haven't dug into it.

With the following fixups, I can boot a LE buildroot initrd (per
https://github.com/linuxppc/linux/wiki/Booting-with-Qemu). Sadly I don't
have access to real hardware any more, so I can't say anything more than
that. (ajd - perhaps relevant to your interests?)


I'll test it baremetal when I get the chance, and I'll see if I can 
investigate the issues you've raised.


--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited



Re: [PATCH v8 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support

2017-05-09 Thread Madhavan Srinivasan



On Monday 08 May 2017 07:42 PM, Daniel Axtens wrote:

Hi all,

I've had a look at the API as it was a big thing I didn't like in the
earlier version.

I am much happier with this one.


Thanks to mpe for suggesting this. :)



Some comments:

  - I'm no longer subscribed to skiboot but I've had a look at the
patches on that side:


Thanks alot for the review comments.



 * in patch 9 should opal_imc_counters_init return something other
   than OPAL_SUCCESS in the case on invalid arguments? Maybe
   OPAL_PARAMETER? (I think you fix this in a later patch anyway?)


So, init call will return OPAL_PARAMETER for the unsupported
domains (core and nest are supported). And if the init operation
fails for any reason, it would return OPAL_HARDWARE. And this is
documented.



 * in start/stop, should there be some sort of write barrier to make
   sure the cb->imc_chip_command actually gets written out to memory
   at the time we expect?


In the current implementation we make the opal call in the
*_event_stop and *_event_start function. But we wanted to
move opal call to the corresponding *_event_init(), so this
avoid a opal call on each _event_start and _event_stop to
this pmu. With this change, we may not need the barrier.

Maddy



The rest of my comments are in line.


Adds cpumask attribute to be used by each IMC pmu. Only one cpu (any
online CPU) from each chip for nest PMUs is designated to read counters.

On CPU hotplug, dying CPU is checked to see whether it is one of the
designated cpus, if yes, next online cpu from the same chip (for nest
units) is designated as new cpu to read counters. For this purpose, we
introduce a new state : CPUHP_AP_PERF_POWERPC_NEST_ONLINE.

Signed-off-by: Anju T Sudhakar 
Signed-off-by: Hemant Kumar 
Signed-off-by: Madhavan Srinivasan 
---
  arch/powerpc/include/asm/imc-pmu.h |   4 +
  arch/powerpc/include/asm/opal-api.h|  12 +-
  arch/powerpc/include/asm/opal.h|   4 +
  arch/powerpc/perf/imc-pmu.c| 248 -
  arch/powerpc/platforms/powernv/opal-wrappers.S |   3 +
  include/linux/cpuhotplug.h |   1 +

Who owns this? get_maintainer.pl doesn't give me anything helpful
here... Do we need an Ack from anyone?


  6 files changed, 266 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h 
b/arch/powerpc/include/asm/imc-pmu.h
index 6bbe184..1478d0f 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -92,6 +92,10 @@ struct imc_pmu {
  #define IMC_DOMAIN_NEST   1
  #define IMC_DOMAIN_UNKNOWN-1
  
+#define IMC_COUNTER_ENABLE	1

+#define IMC_COUNTER_DISABLE0

I'm not sure these constants are particularly useful any more, but I'll
have more to say on that later.


+
+
  extern struct perchip_nest_info nest_perchip_info[IMC_MAX_CHIPS];
  extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
  extern int __init init_imc_pmu(struct imc_events *events,int idx, struct 
imc_pmu *pmu_ptr);
diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index a0aa285..ce863d9 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -168,7 +168,10 @@
  #define OPAL_INT_SET_MFRR 125
  #define OPAL_PCI_TCE_KILL 126
  #define OPAL_NMMU_SET_PTCR127
-#define OPAL_LAST  127
+#define OPAL_IMC_COUNTERS_INIT 149
+#define OPAL_IMC_COUNTERS_START150
+#define OPAL_IMC_COUNTERS_STOP 151

Yay, this is heaps better!


+#define OPAL_LAST  151
  
  /* Device tree flags */
  
@@ -928,6 +931,13 @@ enum {

OPAL_PCI_TCE_KILL_ALL,
  };
  
+/* Argument to OPAL_IMC_COUNTERS_*  */

+enum {
+   OPAL_IMC_COUNTERS_NEST = 1,
+   OPAL_IMC_COUNTERS_CORE = 2,
+   OPAL_IMC_COUNTERS_THREAD = 3,
+};
+
  #endif /* __ASSEMBLY__ */
  
  #endif /* __OPAL_API_H */

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 1ff03a6..9c16ec6 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -227,6 +227,10 @@ int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t 
kill_type,
  uint64_t dma_addr, uint32_t npages);
  int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr);
  
+int64_t opal_imc_counters_init(uint32_t type, uint64_t address);

This isn't called anywhere in this patch... including (worryingly) in
the init function...


+int64_t opal_imc_counters_start(uint32_t type);
+int64_t opal_imc_counters_stop(uint32_t type);
+
  /* Internal functions */
  extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
   int depth, void *data);
diff --git