[PATCH][v5] tools/power turbostat: if --iterations, print for specific count of iterations

2018-04-13 Thread Yu Chen
From: Chen Yu 

There's a use case during test to only print specific round of iterations
if --iterations is specified, for example, with this patch applied:

turbostat -i 5 -r 4
will capture 4 samples with 5 seconds interval.

Cc: Len Brown 
Cc: Rafael J Wysocki 
Cc: Artem Bityutskiy 
Cc: Doug Smythies 
Cc: linux...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Chen Yu 
---
 tools/power/x86/turbostat/turbostat.8 |  2 ++
 tools/power/x86/turbostat/turbostat.c | 23 ++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 
b/tools/power/x86/turbostat/turbostat.8
index ccf2a69365cc..7452dc42273b 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -64,6 +64,8 @@ name as necessary to disambiguate it from others is 
necessary.  Note that option
 .PP
 \fB--interval seconds\fP overrides the default 5.0 second measurement interval.
 .PP
+\fB--iterations count\fP number of the measurement iterations.
+.PP
 \fB--out output_file\fP turbostat output is written to the specified 
output_file.
 The file is truncated if it already exists, and it is created if it does not 
exist.
 .PP
diff --git a/tools/power/x86/turbostat/turbostat.c 
b/tools/power/x86/turbostat/turbostat.c
index bd9c6b31a504..ae9e3d08e3cf 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -48,6 +48,7 @@ char *proc_stat = "/proc/stat";
 FILE *outf;
 int *fd_percpu;
 struct timespec interval_ts = {5, 0};
+int iterations;
 unsigned int debug;
 unsigned int quiet;
 unsigned int sums_need_wide_columns;
@@ -470,6 +471,7 @@ void help(void)
"   {core | package | j,k,l..m,n-p }\n"
"--quietskip decoding system configuration header\n"
"--interval sec Override default 5-second measurement interval\n"
+   "--iterations count number of the measurement iterations\n"
"--help print this help message\n"
"--list list column headers only\n"
"--out file create or truncate \"file\" for all output\n"
@@ -2565,6 +2567,7 @@ void turbostat_loop()
 {
int retval;
int restarted = 0;
+   int done_iters = 0;
 
 restart:
restarted++;
@@ -2581,6 +2584,7 @@ void turbostat_loop()
goto restart;
}
restarted = 0;
+   done_iters = 0;
gettimeofday(_even, (struct timezone *)NULL);
 
while (1) {
@@ -2607,6 +2611,10 @@ void turbostat_loop()
compute_average(EVEN_COUNTERS);
format_all_counters(EVEN_COUNTERS);
flush_output_stdout();
+
+   if (iterations && (++done_iters >= iterations))
+   break;
+
nanosleep(_ts, NULL);
if (snapshot_proc_sysfs_files())
goto restart;
@@ -2626,6 +2634,9 @@ void turbostat_loop()
compute_average(ODD_COUNTERS);
format_all_counters(ODD_COUNTERS);
flush_output_stdout();
+
+   if (iterations && (++done_iters >= iterations))
+   break;
}
 }
 
@@ -4999,6 +5010,7 @@ void cmdline(int argc, char **argv)
{"Dump",no_argument,0, 'D'},
{"debug",   no_argument,0, 'd'},/* 
internal, not documented */
{"interval",required_argument,  0, 'i'},
+   {"iterations",  required_argument,  0, 'r'},
{"help",no_argument,0, 'h'},
{"hide",required_argument,  0, 'H'},// meh, 
-h taken by --help
{"Joules",  no_argument,0, 'J'},
@@ -5014,7 +5026,7 @@ void cmdline(int argc, char **argv)
 
progname = argv[0];
 
-   while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v",
+   while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qr:ST:v",
long_options, _index)) != -1) {
switch (opt) {
case 'a':
@@ -5063,6 +5075,15 @@ void cmdline(int argc, char **argv)
case 'q':
quiet = 1;
break;
+   case 'r':
+   iterations = strtod(optarg, NULL);
+
+   if (iterations <= 0) {
+   fprintf(outf, "iterations %d should be positive 
number\n",
+   iterations);
+   exit(2);
+   }
+   break;
case 's':
parse_show_hide(optarg, SHOW_LIST);
break;
-- 
2.13.6



[PATCH][v5] tools/power turbostat: if --iterations, print for specific count of iterations

2018-04-13 Thread Yu Chen
From: Chen Yu 

There's a use case during test to only print specific round of iterations
if --iterations is specified, for example, with this patch applied:

turbostat -i 5 -r 4
will capture 4 samples with 5 seconds interval.

Cc: Len Brown 
Cc: Rafael J Wysocki 
Cc: Artem Bityutskiy 
Cc: Doug Smythies 
Cc: linux...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Chen Yu 
---
 tools/power/x86/turbostat/turbostat.8 |  2 ++
 tools/power/x86/turbostat/turbostat.c | 23 ++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 
b/tools/power/x86/turbostat/turbostat.8
index ccf2a69365cc..7452dc42273b 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -64,6 +64,8 @@ name as necessary to disambiguate it from others is 
necessary.  Note that option
 .PP
 \fB--interval seconds\fP overrides the default 5.0 second measurement interval.
 .PP
+\fB--iterations count\fP number of the measurement iterations.
+.PP
 \fB--out output_file\fP turbostat output is written to the specified 
output_file.
 The file is truncated if it already exists, and it is created if it does not 
exist.
 .PP
diff --git a/tools/power/x86/turbostat/turbostat.c 
b/tools/power/x86/turbostat/turbostat.c
index bd9c6b31a504..ae9e3d08e3cf 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -48,6 +48,7 @@ char *proc_stat = "/proc/stat";
 FILE *outf;
 int *fd_percpu;
 struct timespec interval_ts = {5, 0};
+int iterations;
 unsigned int debug;
 unsigned int quiet;
 unsigned int sums_need_wide_columns;
@@ -470,6 +471,7 @@ void help(void)
"   {core | package | j,k,l..m,n-p }\n"
"--quietskip decoding system configuration header\n"
"--interval sec Override default 5-second measurement interval\n"
+   "--iterations count number of the measurement iterations\n"
"--help print this help message\n"
"--list list column headers only\n"
"--out file create or truncate \"file\" for all output\n"
@@ -2565,6 +2567,7 @@ void turbostat_loop()
 {
int retval;
int restarted = 0;
+   int done_iters = 0;
 
 restart:
restarted++;
@@ -2581,6 +2584,7 @@ void turbostat_loop()
goto restart;
}
restarted = 0;
+   done_iters = 0;
gettimeofday(_even, (struct timezone *)NULL);
 
while (1) {
@@ -2607,6 +2611,10 @@ void turbostat_loop()
compute_average(EVEN_COUNTERS);
format_all_counters(EVEN_COUNTERS);
flush_output_stdout();
+
+   if (iterations && (++done_iters >= iterations))
+   break;
+
nanosleep(_ts, NULL);
if (snapshot_proc_sysfs_files())
goto restart;
@@ -2626,6 +2634,9 @@ void turbostat_loop()
compute_average(ODD_COUNTERS);
format_all_counters(ODD_COUNTERS);
flush_output_stdout();
+
+   if (iterations && (++done_iters >= iterations))
+   break;
}
 }
 
@@ -4999,6 +5010,7 @@ void cmdline(int argc, char **argv)
{"Dump",no_argument,0, 'D'},
{"debug",   no_argument,0, 'd'},/* 
internal, not documented */
{"interval",required_argument,  0, 'i'},
+   {"iterations",  required_argument,  0, 'r'},
{"help",no_argument,0, 'h'},
{"hide",required_argument,  0, 'H'},// meh, 
-h taken by --help
{"Joules",  no_argument,0, 'J'},
@@ -5014,7 +5026,7 @@ void cmdline(int argc, char **argv)
 
progname = argv[0];
 
-   while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v",
+   while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qr:ST:v",
long_options, _index)) != -1) {
switch (opt) {
case 'a':
@@ -5063,6 +5075,15 @@ void cmdline(int argc, char **argv)
case 'q':
quiet = 1;
break;
+   case 'r':
+   iterations = strtod(optarg, NULL);
+
+   if (iterations <= 0) {
+   fprintf(outf, "iterations %d should be positive 
number\n",
+   iterations);
+   exit(2);
+   }
+   break;
case 's':
parse_show_hide(optarg, SHOW_LIST);
break;
-- 
2.13.6



Re: NO_HZ_FULL and tick running within a reasonable amount of time

2018-04-13 Thread Jacek Tomaka
> On Tue, Apr 03, 2018 at 10:08:58AM -0700, Paul E. McKenney wrote:
> > On Tue, Apr 03, 2018 at 01:41:31PM +0200, Frederic Weisbecker wrote:
> > > On Mon, Apr 02, 2018 at 03:04:38PM -0700, Paul E. McKenney wrote:
> > > > Hello!
> > > >
> > > > I am hitting the following on today's mainline under rcutorture, but
> > > > only on scenarios built with CONFIG_NO_HZ_FULL=y:
> > > >
> > > > 
> > > >
> > > > WARNING: CPU: 0 PID: 7 at 
> > > > /home/paulmck/public_git/linux-rcu/kernel/sched/core.c:3124 
> > > > sched_tick_remote+0x113/0x120
> > > > Modules linked in:
> > > > CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 4.16.0+ #1
> > > > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> > > > Ubuntu-1.8.2-1ubuntu1 04/01/2014
> > > > Workqueue: events_unbound sched_tick_remote
> > > > RIP: 0010:sched_tick_remote+0x113/0x120
> > > > RSP: 0018:94d540103e20 EFLAGS: 00010002
> > > > RAX: 00012e9bb357 RBX: 8f95dfd21840 RCX: 001f
> > > > RDX: b2d05e00 RSI:  RDI: 8f95dfd21858
> > > > RBP: 94d540103e48 R08: f6499019 R09: f6499000
> > > > R10: b163d33b R11: a5c8c212 R12: 8f95dfd25518
> > > > R13: 8f95de9e4200 R14: 3402 R15: 8f95dfd21858
> > > > FS:  () GS:8f95dfc0() 
> > > > knlGS:
> > > > CS:  0010 DS:  ES:  CR0: 80050033
> > > > CR2: 0a015b40 CR3: 1de14000 CR4: 06f0
> > > > Call Trace:
> > > >  process_one_work+0x1d9/0x6a0
> > > >  worker_thread+0x42/0x420
> > > >  kthread+0xf3/0x130
> > > >  ? rescuer_thread+0x340/0x340
> > > >  ? kthread_delayed_work_timer_fn+0x80/0x80
> > > >  ret_from_fork+0x3a/0x50
> > > > Code: ff 48 8b 83 80 0b 00 00 48 85 c0 0f 85 41 ff ff ff e9 45 ff ff ff 
> > > > be ff ff ff ff 4c 89 ff e8 55 44 02 00 85 c0 75 87 0f 0b eb 83 <0f> 0b 
> > > > eb 97 66 0f 1f 84 00 00 00 00 00 41 57 41 56 41 55 41 54
> > > > ---[ end trace fbdcbe529a8ae799 ]--
> > > >
> > > > 
> > > >
> > > > The WARN_ON_ONCE() triggering is this guy:
> > > >
> > > > delta = rq_clock_task(rq) - curr->se.exec_start;
> > > > WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
> > >
> > > Weird. Can you try to print up those values and see how much they drift?
> > >
> > > if (WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3))
> > >printk_once("clock_task: %lld exec_start: %lld\n", 
> > > rq_clock_task(rq), curr-> >se.exec_start);
> >
> > Here you go!
> >
> > Thanx, Paul
> >
> > 
> >
> > WARNING: CPU: 0 PID: 7 at 
> > /home/paulmck/public_git/linux-rcu/kernel/sched/core.c:3124 
> > sched_tick_remote+0xdb/0x100
> > Modules linked in:
> > CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 4.16.0+ #1
> > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> > Ubuntu-1.8.2-1ubuntu1 04/01/2014
> > Workqueue: events_unbound sched_tick_remote
> > RIP: 0010:sched_tick_remote+0xdb/0x100
> > RSP: 0018:a2c440103e60 EFLAGS: 00010006
> > RAX: b2d05e00 RBX: 96f0dfd20980 RCX: 00016a8de322
> > RDX: 0d33a301 RSI: 000177c18623 RDI: fffeb0bf0e53
> > RBP: 96f0dfd24328 R08:  R09: 0001
> > R10:  R11:  R12: 96f0de9d2640
> > R13:  R14: 096f0de81700 R15: 96f0de96f540
> > FS:  () GS:96f0dfc0() knlGS:
> > CS:  0010 DS:  ES:  CR0: 80050033
> > CR2: f7ec6ca3 CR3: 1e03e000 CR4: 06f0
> > Call Trace:
> >  process_one_work+0x139/0x3e0
> >  worker_thread+0x42/0x420
> >  kthread+0xf3/0x130
> >  ? create_worker+0x190/0x190
> >  ? kthread_destroy_worker+0x40/0x40
> >  ret_from_fork+0x35/0x40
> > Code: 89 8b 43 04 85 c0 0f 85 75 ff ff ff 48 8b 83 e0 0a 00 00 48 85 c0 0f 
> > 85 65 ff ff ff e9 69 ff ff ff 48 89 df e8 87 fe ff ff eb 8d <0f> 0b 80 3d 
> > 7d 57 2b 01 00 75 a8 48 c7 c7 e8 e1 fd a4 c6 05 6d
> > ---[ end trace f0c6a1afa55d130d ]---
> > clock_task: 6304138787 exec_start: 221487873
> >
>
> That's a 6 second delay, it's huge!
>
> Could it be because you use Qemu and the virtualized CPUs got interrupted for
a long while?

Hello,
I am seeing the following message as well and I am running it on real
hardware (Intel Xeon Phi 7250):
My kernel parameters include:
nohz_full=1-271 noretpoline isolcpus=nohz

RCU threads are pinned to cpu zero
for i in `pgrep rcu[^c]` ; do
taskset -pc 0 $i
done

One thing i noticed though that my date is not set properly:
-bash-4.2# date
Thu Jan  1 10:56:21 UTC 1970

Probably because of :
[0.00] NO_HZ: Full dynticks CPUs: 1-271.
[0.00]  Offload RCU callbacks from CPUs: 1-271.
[0.00] WARNING: Persistent clock returned invalid value!
[0.00]  

Re: NO_HZ_FULL and tick running within a reasonable amount of time

2018-04-13 Thread Jacek Tomaka
> On Tue, Apr 03, 2018 at 10:08:58AM -0700, Paul E. McKenney wrote:
> > On Tue, Apr 03, 2018 at 01:41:31PM +0200, Frederic Weisbecker wrote:
> > > On Mon, Apr 02, 2018 at 03:04:38PM -0700, Paul E. McKenney wrote:
> > > > Hello!
> > > >
> > > > I am hitting the following on today's mainline under rcutorture, but
> > > > only on scenarios built with CONFIG_NO_HZ_FULL=y:
> > > >
> > > > 
> > > >
> > > > WARNING: CPU: 0 PID: 7 at 
> > > > /home/paulmck/public_git/linux-rcu/kernel/sched/core.c:3124 
> > > > sched_tick_remote+0x113/0x120
> > > > Modules linked in:
> > > > CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 4.16.0+ #1
> > > > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> > > > Ubuntu-1.8.2-1ubuntu1 04/01/2014
> > > > Workqueue: events_unbound sched_tick_remote
> > > > RIP: 0010:sched_tick_remote+0x113/0x120
> > > > RSP: 0018:94d540103e20 EFLAGS: 00010002
> > > > RAX: 00012e9bb357 RBX: 8f95dfd21840 RCX: 001f
> > > > RDX: b2d05e00 RSI:  RDI: 8f95dfd21858
> > > > RBP: 94d540103e48 R08: f6499019 R09: f6499000
> > > > R10: b163d33b R11: a5c8c212 R12: 8f95dfd25518
> > > > R13: 8f95de9e4200 R14: 3402 R15: 8f95dfd21858
> > > > FS:  () GS:8f95dfc0() 
> > > > knlGS:
> > > > CS:  0010 DS:  ES:  CR0: 80050033
> > > > CR2: 0a015b40 CR3: 1de14000 CR4: 06f0
> > > > Call Trace:
> > > >  process_one_work+0x1d9/0x6a0
> > > >  worker_thread+0x42/0x420
> > > >  kthread+0xf3/0x130
> > > >  ? rescuer_thread+0x340/0x340
> > > >  ? kthread_delayed_work_timer_fn+0x80/0x80
> > > >  ret_from_fork+0x3a/0x50
> > > > Code: ff 48 8b 83 80 0b 00 00 48 85 c0 0f 85 41 ff ff ff e9 45 ff ff ff 
> > > > be ff ff ff ff 4c 89 ff e8 55 44 02 00 85 c0 75 87 0f 0b eb 83 <0f> 0b 
> > > > eb 97 66 0f 1f 84 00 00 00 00 00 41 57 41 56 41 55 41 54
> > > > ---[ end trace fbdcbe529a8ae799 ]--
> > > >
> > > > 
> > > >
> > > > The WARN_ON_ONCE() triggering is this guy:
> > > >
> > > > delta = rq_clock_task(rq) - curr->se.exec_start;
> > > > WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
> > >
> > > Weird. Can you try to print up those values and see how much they drift?
> > >
> > > if (WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3))
> > >printk_once("clock_task: %lld exec_start: %lld\n", 
> > > rq_clock_task(rq), curr-> >se.exec_start);
> >
> > Here you go!
> >
> > Thanx, Paul
> >
> > 
> >
> > WARNING: CPU: 0 PID: 7 at 
> > /home/paulmck/public_git/linux-rcu/kernel/sched/core.c:3124 
> > sched_tick_remote+0xdb/0x100
> > Modules linked in:
> > CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 4.16.0+ #1
> > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> > Ubuntu-1.8.2-1ubuntu1 04/01/2014
> > Workqueue: events_unbound sched_tick_remote
> > RIP: 0010:sched_tick_remote+0xdb/0x100
> > RSP: 0018:a2c440103e60 EFLAGS: 00010006
> > RAX: b2d05e00 RBX: 96f0dfd20980 RCX: 00016a8de322
> > RDX: 0d33a301 RSI: 000177c18623 RDI: fffeb0bf0e53
> > RBP: 96f0dfd24328 R08:  R09: 0001
> > R10:  R11:  R12: 96f0de9d2640
> > R13:  R14: 096f0de81700 R15: 96f0de96f540
> > FS:  () GS:96f0dfc0() knlGS:
> > CS:  0010 DS:  ES:  CR0: 80050033
> > CR2: f7ec6ca3 CR3: 1e03e000 CR4: 06f0
> > Call Trace:
> >  process_one_work+0x139/0x3e0
> >  worker_thread+0x42/0x420
> >  kthread+0xf3/0x130
> >  ? create_worker+0x190/0x190
> >  ? kthread_destroy_worker+0x40/0x40
> >  ret_from_fork+0x35/0x40
> > Code: 89 8b 43 04 85 c0 0f 85 75 ff ff ff 48 8b 83 e0 0a 00 00 48 85 c0 0f 
> > 85 65 ff ff ff e9 69 ff ff ff 48 89 df e8 87 fe ff ff eb 8d <0f> 0b 80 3d 
> > 7d 57 2b 01 00 75 a8 48 c7 c7 e8 e1 fd a4 c6 05 6d
> > ---[ end trace f0c6a1afa55d130d ]---
> > clock_task: 6304138787 exec_start: 221487873
> >
>
> That's a 6 second delay, it's huge!
>
> Could it be because you use Qemu and the virtualized CPUs got interrupted for
a long while?

Hello,
I am seeing the following message as well and I am running it on real
hardware (Intel Xeon Phi 7250):
My kernel parameters include:
nohz_full=1-271 noretpoline isolcpus=nohz

RCU threads are pinned to cpu zero
for i in `pgrep rcu[^c]` ; do
taskset -pc 0 $i
done

One thing i noticed though that my date is not set properly:
-bash-4.2# date
Thu Jan  1 10:56:21 UTC 1970

Probably because of :
[0.00] NO_HZ: Full dynticks CPUs: 1-271.
[0.00]  Offload RCU callbacks from CPUs: 1-271.
[0.00] WARNING: Persistent clock returned invalid value!
[0.00]  

Re: [PATCH] x86/xen: Remove use of VLAs

2018-04-13 Thread Laura Abbott

On 04/13/2018 07:55 PM, David Brown wrote:

On Fri, Apr 13, 2018 at 03:11:46PM -0700, Laura Abbott wrote:


There's an ongoing effort to remove VLAs[1] from the kernel to eventually
turn on -Wvla. The few VLAs in use have an upper bound based on a size
of 64K. This doesn't produce an excessively large stack so just switch
the upper bound.

[1] https://lkml.org/lkml/2018/3/7/621


This comment is more in regards to many of these patches, and not as
much this one specifically.

How confident are we in the upper bounds we're setting, and how
obvious is it in the resulting code so that something does later
change to overflow these bounds.

The danger here is that we're converting something a little easier to
detect (a stack overflow), with something harder to detect
(overflowing an array on the stack).



Several people have remarked on that and the solution has been to
put in some kind of WARN and/or error check to make it obvious something
needs to be adjusted.


I guess the question is twofold: how did you determine that 64K was
the largest 'size' value, and how should reviewers verify this as
well.  Perhaps this should at least be in the commit text so someone
tracking down something with this code can find it later.



It's not in the patch context but there's a large comment below:

/*
 * A GDT can be up to 64k in size, which corresponds to 8192
 * 8-byte entries, or 16 4k pages..
 */

BUG_ON(size > 65536);


Given the frames was calculated based off the size, that seemed
sufficient.


David


Thanks,
Laura


Re: [PATCH] x86/xen: Remove use of VLAs

2018-04-13 Thread Laura Abbott

On 04/13/2018 07:55 PM, David Brown wrote:

On Fri, Apr 13, 2018 at 03:11:46PM -0700, Laura Abbott wrote:


There's an ongoing effort to remove VLAs[1] from the kernel to eventually
turn on -Wvla. The few VLAs in use have an upper bound based on a size
of 64K. This doesn't produce an excessively large stack so just switch
the upper bound.

[1] https://lkml.org/lkml/2018/3/7/621


This comment is more in regards to many of these patches, and not as
much this one specifically.

How confident are we in the upper bounds we're setting, and how
obvious is it in the resulting code so that something does later
change to overflow these bounds.

The danger here is that we're converting something a little easier to
detect (a stack overflow), with something harder to detect
(overflowing an array on the stack).



Several people have remarked on that and the solution has been to
put in some kind of WARN and/or error check to make it obvious something
needs to be adjusted.


I guess the question is twofold: how did you determine that 64K was
the largest 'size' value, and how should reviewers verify this as
well.  Perhaps this should at least be in the commit text so someone
tracking down something with this code can find it later.



It's not in the patch context but there's a large comment below:

/*
 * A GDT can be up to 64k in size, which corresponds to 8192
 * 8-byte entries, or 16 4k pages..
 */

BUG_ON(size > 65536);


Given the frames was calculated based off the size, that seemed
sufficient.


David


Thanks,
Laura


Re: [PATCH] staging fbtft: Fixed lines exceeding columns limit

2018-04-13 Thread Renato Soma
On Fri, Apr 13, 2018 at 03:57:47PM +0300, Dan Carpenter wrote:
> > -   ret = fbtft_write_buf_dc(par, par->buf, sizeof(data_type) + offset, 0); 
> > \
> 
> I feel like the original is basically OK but if we're going to change it
> then align it like this:
> 
>   ret = fbtft_write_buf_dc(par, par->buf, sizeof(data_type) + offset, 
> \
>0);
> \
> 
> 
> Etc.
> 

Alright, I'll fix this issue then send another email with the fixed patch.

Thanks!
renato


Re: [PATCH] staging fbtft: Fixed lines exceeding columns limit

2018-04-13 Thread Renato Soma
On Fri, Apr 13, 2018 at 03:57:47PM +0300, Dan Carpenter wrote:
> > -   ret = fbtft_write_buf_dc(par, par->buf, sizeof(data_type) + offset, 0); 
> > \
> 
> I feel like the original is basically OK but if we're going to change it
> then align it like this:
> 
>   ret = fbtft_write_buf_dc(par, par->buf, sizeof(data_type) + offset, 
> \
>0);
> \
> 
> 
> Etc.
> 

Alright, I'll fix this issue then send another email with the fixed patch.

Thanks!
renato


[PATCH 1/1] dts: qcom: db820c: Add gpio-line-names property

2018-04-13 Thread Manivannan Sadhasivam
Add gpio-line-names property for Dragonboard820c based on APQ8096 SoC.
There are 4 gpio-controllers present on this board, including the
APQ8096 SoC, PM8994 (GPIO and MPP) and PMI8994 (GPIO).

Lines names are derived from 96Boards CE Specification 1.0, Appendix
"Expansion Connector Signal Description". Line names for PMI8994 MPP
pins are not added due to the absence of the gpio-controller support.

Signed-off-by: Manivannan Sadhasivam 
---
 arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 240 +++
 1 file changed, 240 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi 
b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
index 1c8f1b86472d..1c1deef031c6 100644
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
@@ -19,6 +19,34 @@
 #include 
 #include 
 
+/*
+ * GPIO name legend: proper name = the GPIO line is used as GPIO
+ * NC  = not connected (pin out but not routed from the chip to
+ *   anything the board)
+ * "[PER]" = pin is muxed for [peripheral] (not GPIO)
+ * LSEC= Low Speed External Connector
+ * HSEC= High Speed External Connector
+ * P HSEC  = Primary High Speed External Connector
+ * S HSEC  = Secondary High Speed External Connector
+ * J14 = Camera Connector
+ * TP  = Test Points
+ *
+ * Line names are taken from the schematic "DragonBoard 820c",
+ * drawing no: LM25-P2751-1
+ *
+ * For the lines routed to the external connectors the
+ * lines are named after the 96Boards CE Specification 1.0,
+ * Appendix "Expansion Connector Signal Description".
+ *
+ * When the 96Board naming of a line and the schematic name of
+ * the same line are in conflict, the 96Board specification
+ * takes precedence, which means that the external UART on the
+ * LSEC is named UART0 while the schematic and SoC names this
+ * UART3. This is only for the informational lines i.e. "[FOO]",
+ * the GPIO named lines "GPIO-A" thru "GPIO-L" are the only
+ * ones actually used for GPIO.
+ */
+
 / {
aliases {
serial0 = _uart1;
@@ -90,6 +118,218 @@
status = "okay";
};
 
+   pinctrl@101 {
+   gpio-line-names =
+   "[SPI0_DOUT]", /* GPIO_0, LSEC pin 14 */
+   "[SPI0_DIN]", /* GPIO_1, LSEC pin 10 */
+   "[SPI0_CS]", /* GPIO_2, LSEC pin 12 */
+   "[SPI0_SCLK]", /* GPIO_3, LSEC pin 8 */
+   "[UART1_TxD]", /* GPIO_4, LSEC pin 11 */
+   "[UART1_RxD]", /* GPIO_5, LSEC pin 13 */
+   "[I2C1_SDA]", /* GPIO_6, LSEC pin 21 */
+   "[I2C1_SCL]", /* GPIO_7, LSEC pin 19 */
+   "GPIO-H", /* GPIO_8, LSEC pin 30 */
+   "TP93", /* GPIO_9 */
+   "GPIO-G", /* GPIO_10, LSEC pin 29 */
+   "[MDP_VSYNC_S]", /* GPIO_11, P HSEC pin 55 */
+   "NC", /* GPIO_12 */
+   "[CSI0_MCLK]", /* GPIO_13, P HSEC pin 15 */
+   "[CAM_MCLK1]", /* GPIO_14, J14 pin 11 */
+   "[CSI1_MCLK]", /* GPIO_15, P HSEC pin 17 */
+   "TP99", /* GPIO_16 */
+   "[I2C2_SDA]", /* GPIO_17, P HSEC pin 34 */
+   "[I2C2_SCL]", /* GPIO_18, P HSEC pin 32 */
+   "[CCI_I2C_SDA1]", /* GPIO_19, S HSEC pin 38 */
+   "[CCI_I2C_SCL1]", /* GPIO_20, S HSEC pin 36 */
+   "FLASH_STROBE_EN", /* GPIO_21, S HSEC pin 5 */
+   "FLASH_STROBE_TRIG", /* GPIO_22, S HSEC pin 1 */
+   "GPIO-K", /* GPIO_23, LSEC pin 33 */
+   "GPIO-D", /* GPIO_24, LSEC pin 26 */
+   "GPIO-I", /* GPIO_25, LSEC pin 31 */
+   "GPIO-J", /* GPIO_26, LSEC pin 32 */
+   "BLSP6_I2C_SDA", /* GPIO_27 */
+   "BLSP6_I2C_SCL", /* GPIO_28 */
+   "GPIO-B", /* GPIO_29, LSEC pin 24 */
+   "GPIO30", /* GPIO_30, S HSEC pin 4 */
+   "HDMI_CEC", /* GPIO_31 */
+   "HDMI_DDC_CLOCK", /* GPIO_32 */
+   "HDMI_DDC_DATA", /* GPIO_33 */
+   "HDMI_HOT_PLUG_DETECT", /* GPIO_34 */
+   "PCIE0_RST_N", /* GPIO_35 */
+   "PCIE0_CLKREQ_N", /* GPIO_36 */
+   "PCIE0_WAKE", /* GPIO_37 */
+ 

[PATCH 0/1] Add gpio-line-names property for Dragonboard820c

2018-04-13 Thread Manivannan Sadhasivam
Add gpio-line-names property for 96Boards Dragonboard820c development
board based on APQ8096 SoC. The lines are named after the 96Boards
CE Specification 1.0, Appendix "Expansion Connector Signal Description".
There are 4 gpio-controllers present on this board, including the
APQ8096 SoC, PM8994 (GPIO, MPP) and PMI8994 (GPIO).

Considering the regular 96Boards Consumer Edition boards, this board belongs
to Entended edition form factor, hence includes additional Primary,
Secondary High Speed Connectors and one Camera Connector.

Line names for PMI8994 MPP pins are not added due to the absence of the
gpio-controller support.

Manivannan Sadhasivam (1):
  dts: qcom: db820c: Add gpio-line-names property

 arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 240 +++
 1 file changed, 240 insertions(+)

-- 
2.14.1



[PATCH 1/1] dts: qcom: db820c: Add gpio-line-names property

2018-04-13 Thread Manivannan Sadhasivam
Add gpio-line-names property for Dragonboard820c based on APQ8096 SoC.
There are 4 gpio-controllers present on this board, including the
APQ8096 SoC, PM8994 (GPIO and MPP) and PMI8994 (GPIO).

Lines names are derived from 96Boards CE Specification 1.0, Appendix
"Expansion Connector Signal Description". Line names for PMI8994 MPP
pins are not added due to the absence of the gpio-controller support.

Signed-off-by: Manivannan Sadhasivam 
---
 arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 240 +++
 1 file changed, 240 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi 
b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
index 1c8f1b86472d..1c1deef031c6 100644
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
@@ -19,6 +19,34 @@
 #include 
 #include 
 
+/*
+ * GPIO name legend: proper name = the GPIO line is used as GPIO
+ * NC  = not connected (pin out but not routed from the chip to
+ *   anything the board)
+ * "[PER]" = pin is muxed for [peripheral] (not GPIO)
+ * LSEC= Low Speed External Connector
+ * HSEC= High Speed External Connector
+ * P HSEC  = Primary High Speed External Connector
+ * S HSEC  = Secondary High Speed External Connector
+ * J14 = Camera Connector
+ * TP  = Test Points
+ *
+ * Line names are taken from the schematic "DragonBoard 820c",
+ * drawing no: LM25-P2751-1
+ *
+ * For the lines routed to the external connectors the
+ * lines are named after the 96Boards CE Specification 1.0,
+ * Appendix "Expansion Connector Signal Description".
+ *
+ * When the 96Board naming of a line and the schematic name of
+ * the same line are in conflict, the 96Board specification
+ * takes precedence, which means that the external UART on the
+ * LSEC is named UART0 while the schematic and SoC names this
+ * UART3. This is only for the informational lines i.e. "[FOO]",
+ * the GPIO named lines "GPIO-A" thru "GPIO-L" are the only
+ * ones actually used for GPIO.
+ */
+
 / {
aliases {
serial0 = _uart1;
@@ -90,6 +118,218 @@
status = "okay";
};
 
+   pinctrl@101 {
+   gpio-line-names =
+   "[SPI0_DOUT]", /* GPIO_0, LSEC pin 14 */
+   "[SPI0_DIN]", /* GPIO_1, LSEC pin 10 */
+   "[SPI0_CS]", /* GPIO_2, LSEC pin 12 */
+   "[SPI0_SCLK]", /* GPIO_3, LSEC pin 8 */
+   "[UART1_TxD]", /* GPIO_4, LSEC pin 11 */
+   "[UART1_RxD]", /* GPIO_5, LSEC pin 13 */
+   "[I2C1_SDA]", /* GPIO_6, LSEC pin 21 */
+   "[I2C1_SCL]", /* GPIO_7, LSEC pin 19 */
+   "GPIO-H", /* GPIO_8, LSEC pin 30 */
+   "TP93", /* GPIO_9 */
+   "GPIO-G", /* GPIO_10, LSEC pin 29 */
+   "[MDP_VSYNC_S]", /* GPIO_11, P HSEC pin 55 */
+   "NC", /* GPIO_12 */
+   "[CSI0_MCLK]", /* GPIO_13, P HSEC pin 15 */
+   "[CAM_MCLK1]", /* GPIO_14, J14 pin 11 */
+   "[CSI1_MCLK]", /* GPIO_15, P HSEC pin 17 */
+   "TP99", /* GPIO_16 */
+   "[I2C2_SDA]", /* GPIO_17, P HSEC pin 34 */
+   "[I2C2_SCL]", /* GPIO_18, P HSEC pin 32 */
+   "[CCI_I2C_SDA1]", /* GPIO_19, S HSEC pin 38 */
+   "[CCI_I2C_SCL1]", /* GPIO_20, S HSEC pin 36 */
+   "FLASH_STROBE_EN", /* GPIO_21, S HSEC pin 5 */
+   "FLASH_STROBE_TRIG", /* GPIO_22, S HSEC pin 1 */
+   "GPIO-K", /* GPIO_23, LSEC pin 33 */
+   "GPIO-D", /* GPIO_24, LSEC pin 26 */
+   "GPIO-I", /* GPIO_25, LSEC pin 31 */
+   "GPIO-J", /* GPIO_26, LSEC pin 32 */
+   "BLSP6_I2C_SDA", /* GPIO_27 */
+   "BLSP6_I2C_SCL", /* GPIO_28 */
+   "GPIO-B", /* GPIO_29, LSEC pin 24 */
+   "GPIO30", /* GPIO_30, S HSEC pin 4 */
+   "HDMI_CEC", /* GPIO_31 */
+   "HDMI_DDC_CLOCK", /* GPIO_32 */
+   "HDMI_DDC_DATA", /* GPIO_33 */
+   "HDMI_HOT_PLUG_DETECT", /* GPIO_34 */
+   "PCIE0_RST_N", /* GPIO_35 */
+   "PCIE0_CLKREQ_N", /* GPIO_36 */
+   "PCIE0_WAKE", /* GPIO_37 */
+   "SD_CARD_DET_N", /* GPIO_38 

[PATCH 0/1] Add gpio-line-names property for Dragonboard820c

2018-04-13 Thread Manivannan Sadhasivam
Add gpio-line-names property for 96Boards Dragonboard820c development
board based on APQ8096 SoC. The lines are named after the 96Boards
CE Specification 1.0, Appendix "Expansion Connector Signal Description".
There are 4 gpio-controllers present on this board, including the
APQ8096 SoC, PM8994 (GPIO, MPP) and PMI8994 (GPIO).

Considering the regular 96Boards Consumer Edition boards, this board belongs
to Entended edition form factor, hence includes additional Primary,
Secondary High Speed Connectors and one Camera Connector.

Line names for PMI8994 MPP pins are not added due to the absence of the
gpio-controller support.

Manivannan Sadhasivam (1):
  dts: qcom: db820c: Add gpio-line-names property

 arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 240 +++
 1 file changed, 240 insertions(+)

-- 
2.14.1



Re: [PATCH 1/2] X86/KVM: Properly update 'tsc_offset' to represent the running guest

2018-04-13 Thread Raslan, KarimAllah
On Fri, 2018-04-13 at 18:04 +0200, Paolo Bonzini wrote:
> On 13/04/2018 18:02, Jim Mattson wrote:
> > 
> > On Fri, Apr 13, 2018 at 4:23 AM, Paolo Bonzini  wrote:
> > > 
> > > From: KarimAllah Ahmed 
> > > 
> > > Update 'tsc_offset' on vmenty/vmexit of L2 guests to ensure that it always
> > > captures the TSC_OFFSET of the running guest whether it is the L1 or L2
> > > guest.
> > > 
> > > Cc: Jim Mattson 
> > > Cc: Paolo Bonzini 
> > > Cc: Radim Krčmář 
> > > Cc: k...@vger.kernel.org
> > > Cc: linux-kernel@vger.kernel.org
> > > Suggested-by: Paolo Bonzini 
> > > Signed-off-by: KarimAllah Ahmed 
> > > [AMD changes, fix update_ia32_tsc_adjust_msr. - Paolo]
> > > Signed-off-by: Paolo Bonzini 
> > 
> > > 
> > > @@ -11489,6 +11497,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, 
> > > bool launch)
> > > if (enable_shadow_vmcs)
> > > copy_shadow_to_vmcs12(vmx);
> > > 
> > > +   if (vmcs12->cpu_based_vm_exec_control & 
> > > CPU_BASED_USE_TSC_OFFSETING)
> > > +   vcpu->arch.tsc_offset += vmcs12->tsc_offset;
> > > +
> > 
> > This seems a little early, since we don't restore the L1 TSC offset on
> > the nested_vmx_failValid path.
> > 
> 
> Now this can be a nice one to introduce the VMX API tests. :)  I'll try
> to do it on Monday as punishment for not noticing the bug.  In the
> meanwhile, Karim, can you post a fixed fixed version?

done

> 
> Paolo
> 
Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B


Re: [PATCH 1/2] X86/KVM: Properly update 'tsc_offset' to represent the running guest

2018-04-13 Thread Raslan, KarimAllah
On Fri, 2018-04-13 at 18:04 +0200, Paolo Bonzini wrote:
> On 13/04/2018 18:02, Jim Mattson wrote:
> > 
> > On Fri, Apr 13, 2018 at 4:23 AM, Paolo Bonzini  wrote:
> > > 
> > > From: KarimAllah Ahmed 
> > > 
> > > Update 'tsc_offset' on vmenty/vmexit of L2 guests to ensure that it always
> > > captures the TSC_OFFSET of the running guest whether it is the L1 or L2
> > > guest.
> > > 
> > > Cc: Jim Mattson 
> > > Cc: Paolo Bonzini 
> > > Cc: Radim Krčmář 
> > > Cc: k...@vger.kernel.org
> > > Cc: linux-kernel@vger.kernel.org
> > > Suggested-by: Paolo Bonzini 
> > > Signed-off-by: KarimAllah Ahmed 
> > > [AMD changes, fix update_ia32_tsc_adjust_msr. - Paolo]
> > > Signed-off-by: Paolo Bonzini 
> > 
> > > 
> > > @@ -11489,6 +11497,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, 
> > > bool launch)
> > > if (enable_shadow_vmcs)
> > > copy_shadow_to_vmcs12(vmx);
> > > 
> > > +   if (vmcs12->cpu_based_vm_exec_control & 
> > > CPU_BASED_USE_TSC_OFFSETING)
> > > +   vcpu->arch.tsc_offset += vmcs12->tsc_offset;
> > > +
> > 
> > This seems a little early, since we don't restore the L1 TSC offset on
> > the nested_vmx_failValid path.
> > 
> 
> Now this can be a nice one to introduce the VMX API tests. :)  I'll try
> to do it on Monday as punishment for not noticing the bug.  In the
> meanwhile, Karim, can you post a fixed fixed version?

done

> 
> Paolo
> 
Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B


[PATCH v4] X86/KVM: Properly update 'tsc_offset' to represent the running guest

2018-04-13 Thread KarimAllah Ahmed
Update 'tsc_offset' on vmentry/vmexit of L2 guests to ensure that it always
captures the TSC_OFFSET of the running guest whether it is the L1 or L2
guest.

Cc: Jim Mattson 
Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: k...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Suggested-by: Paolo Bonzini 
Signed-off-by: KarimAllah Ahmed 
[AMD changes, fix update_ia32_tsc_adjust_msr. - Paolo]
Signed-off-by: Paolo Bonzini 

---
v3 -> v4:
- Restore L01 tsc_offset on enter_vmx_non_root_mode failures.
- Move tsc_offset update for L02 later in nested_vmx_run.

v2 -> v3:
- Add AMD bits as well.
- Fix update_ia32_tsc_adjust_msr.

v1 -> v2:
- Rewrote the patch to always update tsc_offset to represent the current
  guest (pbonzini@)
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm.c  | 17 -
 arch/x86/kvm/vmx.c  | 29 -
 arch/x86/kvm/x86.c  |  6 --
 4 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7a200f6..a40a32e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1016,6 +1016,7 @@ struct kvm_x86_ops {
 
bool (*has_wbinvd_exit)(void);
 
+   u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b58787d..1f00c18 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1423,12 +1423,23 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t 
type)
seg->base = 0;
 }
 
+static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   if (is_guest_mode(vcpu))
+   return svm->nested.hsave->control.tsc_offset;
+
+   return vcpu->arch.tsc_offset;
+}
+
 static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
struct vcpu_svm *svm = to_svm(vcpu);
u64 g_tsc_offset = 0;
 
if (is_guest_mode(vcpu)) {
+   /* Write L1's TSC offset.  */
g_tsc_offset = svm->vmcb->control.tsc_offset -
   svm->nested.hsave->control.tsc_offset;
svm->nested.hsave->control.tsc_offset = offset;
@@ -3322,6 +,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
/* Restore the original control entries */
copy_vmcb_control_area(vmcb, hsave);
 
+   vcpu->arch.tsc_offset = svm->vmcb->control.tsc_offset;
kvm_clear_exception_queue(>vcpu);
kvm_clear_interrupt_queue(>vcpu);
 
@@ -3482,10 +3494,12 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, 
u64 vmcb_gpa,
/* We don't want to see VMMCALLs from a nested guest */
clr_intercept(svm, INTERCEPT_VMMCALL);
 
+   vcpu->arch.tsc_offset += nested_vmcb->control.tsc_offset;
+   svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset;
+
svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
-   svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
 
@@ -7102,6 +7116,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 
.has_wbinvd_exit = svm_has_wbinvd_exit,
 
+   .read_l1_tsc_offset = svm_read_l1_tsc_offset,
.write_tsc_offset = svm_write_tsc_offset,
 
.set_tdp_cr3 = set_tdp_cr3,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b6942de..05ba3c6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2885,6 +2885,17 @@ static void setup_msrs(struct vcpu_vmx *vmx)
vmx_update_msr_bitmap(>vcpu);
 }
 
+static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
+{
+   struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+   if (is_guest_mode(vcpu) &&
+   (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
+   return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
+
+   return vcpu->arch.tsc_offset;
+}
+
 /*
  * reads and returns guest's timestamp counter "register"
  * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset
@@ -2,11 +11123,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, 
struct vmcs12 *vmcs12,
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
}
 
-   if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
-   vmcs_write64(TSC_OFFSET,
-   vcpu->arch.tsc_offset + vmcs12->tsc_offset);
-   else
-   

[PATCH v4] X86/KVM: Properly update 'tsc_offset' to represent the running guest

2018-04-13 Thread KarimAllah Ahmed
Update 'tsc_offset' on vmentry/vmexit of L2 guests to ensure that it always
captures the TSC_OFFSET of the running guest whether it is the L1 or L2
guest.

Cc: Jim Mattson 
Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: k...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Suggested-by: Paolo Bonzini 
Signed-off-by: KarimAllah Ahmed 
[AMD changes, fix update_ia32_tsc_adjust_msr. - Paolo]
Signed-off-by: Paolo Bonzini 

---
v3 -> v4:
- Restore L01 tsc_offset on enter_vmx_non_root_mode failures.
- Move tsc_offset update for L02 later in nested_vmx_run.

v2 -> v3:
- Add AMD bits as well.
- Fix update_ia32_tsc_adjust_msr.

v1 -> v2:
- Rewrote the patch to always update tsc_offset to represent the current
  guest (pbonzini@)
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm.c  | 17 -
 arch/x86/kvm/vmx.c  | 29 -
 arch/x86/kvm/x86.c  |  6 --
 4 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7a200f6..a40a32e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1016,6 +1016,7 @@ struct kvm_x86_ops {
 
bool (*has_wbinvd_exit)(void);
 
+   u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b58787d..1f00c18 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1423,12 +1423,23 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t 
type)
seg->base = 0;
 }
 
+static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   if (is_guest_mode(vcpu))
+   return svm->nested.hsave->control.tsc_offset;
+
+   return vcpu->arch.tsc_offset;
+}
+
 static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
struct vcpu_svm *svm = to_svm(vcpu);
u64 g_tsc_offset = 0;
 
if (is_guest_mode(vcpu)) {
+   /* Write L1's TSC offset.  */
g_tsc_offset = svm->vmcb->control.tsc_offset -
   svm->nested.hsave->control.tsc_offset;
svm->nested.hsave->control.tsc_offset = offset;
@@ -3322,6 +,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
/* Restore the original control entries */
copy_vmcb_control_area(vmcb, hsave);
 
+   vcpu->arch.tsc_offset = svm->vmcb->control.tsc_offset;
kvm_clear_exception_queue(>vcpu);
kvm_clear_interrupt_queue(>vcpu);
 
@@ -3482,10 +3494,12 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, 
u64 vmcb_gpa,
/* We don't want to see VMMCALLs from a nested guest */
clr_intercept(svm, INTERCEPT_VMMCALL);
 
+   vcpu->arch.tsc_offset += nested_vmcb->control.tsc_offset;
+   svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset;
+
svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
-   svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
 
@@ -7102,6 +7116,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 
.has_wbinvd_exit = svm_has_wbinvd_exit,
 
+   .read_l1_tsc_offset = svm_read_l1_tsc_offset,
.write_tsc_offset = svm_write_tsc_offset,
 
.set_tdp_cr3 = set_tdp_cr3,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b6942de..05ba3c6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2885,6 +2885,17 @@ static void setup_msrs(struct vcpu_vmx *vmx)
vmx_update_msr_bitmap(>vcpu);
 }
 
+static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
+{
+   struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+   if (is_guest_mode(vcpu) &&
+   (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
+   return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
+
+   return vcpu->arch.tsc_offset;
+}
+
 /*
  * reads and returns guest's timestamp counter "register"
  * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset
@@ -2,11 +11123,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, 
struct vmcs12 *vmcs12,
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
}
 
-   if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
-   vmcs_write64(TSC_OFFSET,
-   vcpu->arch.tsc_offset + vmcs12->tsc_offset);
-   else
-   vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+   vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+
if 

[PATCH 2/2] printk: wake up klogd in vprintk_emit

2018-04-13 Thread Sergey Senozhatsky
We wake up klogd very late - only when current console_sem owner
is done pushing pending kernel messages to the serial/net consoles.
In some cases this results in lost syslog messages, because kernel
log buffer is a circular buffer and if we don't wakeup syslog long
enough there are chances that logbuf simply will wrap around.

The patch moves the klog wake up call to vprintk_emit(), which is
the only legit way for a kernel message to appear in the logbuf,
right before we attempt to grab the console_sem (possibly spinning
on it waiting for the hand off) and call console drivers.

Signed-off-by: Sergey Senozhatsky 
---
 kernel/printk/printk.c | 14 ++
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 2f4af216bd6e..86f0b337cbf6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1888,6 +1888,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 
printed_len = log_output(facility, level, lflags, dict, dictlen, text, 
text_len);
 
+   wake_up_klogd();
logbuf_unlock_irqrestore(flags);
 
/* If called from the scheduler, we can not call up(). */
@@ -2289,9 +2290,7 @@ void console_unlock(void)
 {
static char ext_text[CONSOLE_EXT_LOG_MAX];
static char text[LOG_LINE_MAX + PREFIX_MAX];
-   static u64 seen_seq;
unsigned long flags;
-   bool wake_klogd = false;
bool do_cond_resched, retry;
 
if (console_suspended) {
@@ -2335,11 +2334,6 @@ void console_unlock(void)
 
printk_safe_enter_irqsave(flags);
raw_spin_lock(_lock);
-   if (seen_seq != log_next_seq) {
-   wake_klogd = true;
-   seen_seq = log_next_seq;
-   }
-
if (console_seq < log_first_seq) {
len = sprintf(text, "** %u printk messages dropped 
**\n",
  (unsigned)(log_first_seq - console_seq));
@@ -2397,7 +2391,7 @@ void console_unlock(void)
 
if (console_lock_spinning_disable_and_check()) {
printk_safe_exit_irqrestore(flags);
-   goto out;
+   return;
}
 
printk_safe_exit_irqrestore(flags);
@@ -2429,10 +2423,6 @@ void console_unlock(void)
 
if (retry && console_trylock())
goto again;
-
-out:
-   if (wake_klogd)
-   wake_up_klogd();
 }
 EXPORT_SYMBOL(console_unlock);
 
-- 
2.17.0



[PATCH 2/2] printk: wake up klogd in vprintk_emit

2018-04-13 Thread Sergey Senozhatsky
We wake up klogd very late - only when current console_sem owner
is done pushing pending kernel messages to the serial/net consoles.
In some cases this results in lost syslog messages, because kernel
log buffer is a circular buffer and if we don't wakeup syslog long
enough there are chances that logbuf simply will wrap around.

The patch moves the klog wake up call to vprintk_emit(), which is
the only legit way for a kernel message to appear in the logbuf,
right before we attempt to grab the console_sem (possibly spinning
on it waiting for the hand off) and call console drivers.

Signed-off-by: Sergey Senozhatsky 
---
 kernel/printk/printk.c | 14 ++
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 2f4af216bd6e..86f0b337cbf6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1888,6 +1888,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 
printed_len = log_output(facility, level, lflags, dict, dictlen, text, 
text_len);
 
+   wake_up_klogd();
logbuf_unlock_irqrestore(flags);
 
/* If called from the scheduler, we can not call up(). */
@@ -2289,9 +2290,7 @@ void console_unlock(void)
 {
static char ext_text[CONSOLE_EXT_LOG_MAX];
static char text[LOG_LINE_MAX + PREFIX_MAX];
-   static u64 seen_seq;
unsigned long flags;
-   bool wake_klogd = false;
bool do_cond_resched, retry;
 
if (console_suspended) {
@@ -2335,11 +2334,6 @@ void console_unlock(void)
 
printk_safe_enter_irqsave(flags);
raw_spin_lock(_lock);
-   if (seen_seq != log_next_seq) {
-   wake_klogd = true;
-   seen_seq = log_next_seq;
-   }
-
if (console_seq < log_first_seq) {
len = sprintf(text, "** %u printk messages dropped 
**\n",
  (unsigned)(log_first_seq - console_seq));
@@ -2397,7 +2391,7 @@ void console_unlock(void)
 
if (console_lock_spinning_disable_and_check()) {
printk_safe_exit_irqrestore(flags);
-   goto out;
+   return;
}
 
printk_safe_exit_irqrestore(flags);
@@ -2429,10 +2423,6 @@ void console_unlock(void)
 
if (retry && console_trylock())
goto again;
-
-out:
-   if (wake_klogd)
-   wake_up_klogd();
 }
 EXPORT_SYMBOL(console_unlock);
 
-- 
2.17.0



[PATCH 1/2] vsprintf: Tweak pF/pf comment

2018-04-13 Thread Sergey Senozhatsky
Reflect changes that have happened to pf/pF (deprecation)
specifiers in pointer() comment section.

Signed-off-by: Sergey Senozhatsky 
---
 lib/vsprintf.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 30c0cb8cc9bc..dd18bb2a56ef 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1746,10 +1746,10 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, 
struct printf_spec spec)
  *
  * Right now we handle:
  *
- * - 'F' For symbolic function descriptor pointers with offset
- * - 'f' For simple symbolic function names without offset
- * - 'S' For symbolic direct pointers with offset
- * - 's' For symbolic direct pointers without offset
+ * - 'S' For symbolic direct pointers (or function descriptors) with offset
+ * - 's' For symbolic direct pointers (or function descriptors) without offset
+ * - 'F' Same as 'S'
+ * - 'f' Same as 's'
  * - '[FfSs]R' as above with __builtin_extract_return_addr() translation
  * - 'B' For backtraced symbolic direct pointers with offset
  * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
@@ -1846,10 +1846,6 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, 
struct printf_spec spec)
  * ** When making changes please also update:
  * Documentation/core-api/printk-formats.rst
  *
- * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
- * function pointers are really function descriptors, which contain a
- * pointer to the real address.
- *
  * Note: The default behaviour (unadorned %p) is to hash the address,
  * rendering it useful as a unique identifier.
  */
-- 
2.17.0



[PATCH 1/2] vsprintf: Tweak pF/pf comment

2018-04-13 Thread Sergey Senozhatsky
Reflect changes that have happened to pf/pF (deprecation)
specifiers in pointer() comment section.

Signed-off-by: Sergey Senozhatsky 
---
 lib/vsprintf.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 30c0cb8cc9bc..dd18bb2a56ef 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1746,10 +1746,10 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, 
struct printf_spec spec)
  *
  * Right now we handle:
  *
- * - 'F' For symbolic function descriptor pointers with offset
- * - 'f' For simple symbolic function names without offset
- * - 'S' For symbolic direct pointers with offset
- * - 's' For symbolic direct pointers without offset
+ * - 'S' For symbolic direct pointers (or function descriptors) with offset
+ * - 's' For symbolic direct pointers (or function descriptors) without offset
+ * - 'F' Same as 'S'
+ * - 'f' Same as 's'
  * - '[FfSs]R' as above with __builtin_extract_return_addr() translation
  * - 'B' For backtraced symbolic direct pointers with offset
  * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
@@ -1846,10 +1846,6 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, 
struct printf_spec spec)
  * ** When making changes please also update:
  * Documentation/core-api/printk-formats.rst
  *
- * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
- * function pointers are really function descriptors, which contain a
- * pointer to the real address.
- *
  * Note: The default behaviour (unadorned %p) is to hash the address,
  * rendering it useful as a unique identifier.
  */
-- 
2.17.0



Re: [PATCH] x86/xen: Remove use of VLAs

2018-04-13 Thread David Brown

On Fri, Apr 13, 2018 at 03:11:46PM -0700, Laura Abbott wrote:


There's an ongoing effort to remove VLAs[1] from the kernel to eventually
turn on -Wvla. The few VLAs in use have an upper bound based on a size
of 64K. This doesn't produce an excessively large stack so just switch
the upper bound.

[1] https://lkml.org/lkml/2018/3/7/621


This comment is more in regards to many of these patches, and not as
much this one specifically.

How confident are we in the upper bounds we're setting, and how
obvious is it in the resulting code so that something does later
change to overflow these bounds.

The danger here is that we're converting something a little easier to
detect (a stack overflow), with something harder to detect
(overflowing an array on the stack).

I guess the question is twofold: how did you determine that 64K was
the largest 'size' value, and how should reviewers verify this as
well.  Perhaps this should at least be in the commit text so someone
tracking down something with this code can find it later.

David


Re: [PATCH] x86/xen: Remove use of VLAs

2018-04-13 Thread David Brown

On Fri, Apr 13, 2018 at 03:11:46PM -0700, Laura Abbott wrote:


There's an ongoing effort to remove VLAs[1] from the kernel to eventually
turn on -Wvla. The few VLAs in use have an upper bound based on a size
of 64K. This doesn't produce an excessively large stack so just switch
the upper bound.

[1] https://lkml.org/lkml/2018/3/7/621


This comment is more in regards to many of these patches, and not as
much this one specifically.

How confident are we in the upper bounds we're setting, and how
obvious is it in the resulting code so that something does later
change to overflow these bounds.

The danger here is that we're converting something a little easier to
detect (a stack overflow), with something harder to detect
(overflowing an array on the stack).

I guess the question is twofold: how did you determine that 64K was
the largest 'size' value, and how should reviewers verify this as
well.  Perhaps this should at least be in the commit text so someone
tracking down something with this code can find it later.

David


[PATCH v2 1/2] regulator: dt-bindings: add QCOM RPMh regulator bindings

2018-04-13 Thread David Collins
Introduce bindings for RPMh regulator devices found on some
Qualcomm Technlogies, Inc. SoCs.  These devices allow a given
processor within the SoC to make PMIC regulator requests which
are aggregated within the RPMh hardware block along with requests
from other processors in the SoC to determine the final PMIC
regulator hardware state.

Signed-off-by: David Collins 
---
 .../bindings/regulator/qcom,rpmh-regulator.txt | 207 +
 .../dt-bindings/regulator/qcom,rpmh-regulator.h|  36 
 2 files changed, 243 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
 create mode 100644 include/dt-bindings/regulator/qcom,rpmh-regulator.h

diff --git 
a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt 
b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
new file mode 100644
index 000..69748ea
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
@@ -0,0 +1,207 @@
+Qualcomm Technologies, Inc. RPMh Regulators
+
+rpmh-regulator devices support PMIC regulator management via the Voltage
+Regulator Manager (VRM) and Oscillator Buffer (XOB) RPMh accelerators.  The 
APPS
+processor communicates with these hardware blocks via a Resource State
+Coordinator (RSC) using command packets.  The VRM allows changing four
+parameters for a given regulator: enable state, output voltage, operating mode,
+and minimum headroom voltage.  The XOB allows changing only a single parameter
+for a given regulator: its enable state.  Despite its name, the XOB is capable
+of controlling the enable state of any PMIC peripheral.  It is used for clock
+buffers, low-voltage switches, and LDO/SMPS regulators which have a fixed
+voltage and mode.
+
+===
+Required Node Structure
+===
+
+RPMh regulators must be described in two levels of device nodes.  The first
+level describes the PMIC containing the regulators and must reside within an
+RPMh device node.  The second level describes each regulator within the PMIC
+which is to be used on the board.  Each of these regulators maps to a single
+RPMh resource.
+
+The names used for regulator nodes must match those supported by a given PMIC.
+Supported regulator node names:
+   PM8998: smps1 - smps13, ldo1 - ldo28, lvs1 - lvs2
+   PMI8998:bob
+   PM8005: smps1 - smps4
+
+
+First Level Nodes - PMIC
+
+
+- compatible
+   Usage:  required
+   Value type: 
+   Definition: Must be one of: "qcom,pm8998-rpmh-regulators",
+   "qcom,pmi8998-rpmh-regulators" or
+   "qcom,pm8005-rpmh-regulators".
+
+- qcom,pmic-id
+   Usage:  required
+   Value type: 
+   Definition: RPMh resource name suffix used for the regulators found on
+   this PMIC.  Typical values: "a", "b", "c", "d", "e", "f".
+
+- vdd_s1-supply
+- vdd_s2-supply
+- vdd_s3-supply
+- vdd_s4-supply
+- vdd_s5-supply
+- vdd_s6-supply
+- vdd_s7-supply
+- vdd_s8-supply
+- vdd_s9-supply
+- vdd_s10-supply
+- vdd_s11-supply
+- vdd_s12-supply
+- vdd_s13-supply
+- vdd_l1_l27-supply
+- vdd_l2_l8_l17-supply
+- vdd_l3_l11-supply
+- vdd_l4_l5-supply
+- vdd_l6-supply
+- vdd_l7_l12_l14_l15-supply
+- vdd_l9-supply
+- vdd_l10_l23_l25-supply
+- vdd_l13_l19_l21-supply
+- vdd_l16_l28-supply
+- vdd_l18_l22-supply
+- vdd_l20_l24-supply
+- vdd_l26-supply
+- vdd_lvs1_lvs2-supply
+- vdd_lvs1_lvs2-supply
+   Usage:  optional (PM8998 only)
+   Value type: 
+   Definition: phandle of the parent supply regulator of one or more of the
+   regulators for this PMIC.
+
+- vdd_bob-supply
+   Usage:  optional (PMI8998 only)
+   Value type: 
+   Definition: BOB regulator parent supply phandle
+
+- vdd_s1-supply
+- vdd_s2-supply
+- vdd_s3-supply
+- vdd_s4-supply
+   Usage:  optional (PM8005 only)
+   Value type: 
+   Definition: phandle of the parent supply regulator of one or more of the
+   regulators for this PMIC.
+
+===
+Second Level Nodes - Regulators
+===
+
+- qcom,regulator-initial-voltage
+   Usage:  optional; VRM regulators only
+   Value type: 
+   Definition: Specifies the initial voltage in microvolts to request for a
+   VRM regulator.
+
+- regulator-initial-mode
+   Usage:  optional; VRM regulators only
+   Value type: 
+   Definition: Specifies the initial mode to request for a VRM regulator.
+   Supported values are RPMH_REGULATOR_MODE_* which are defined
+   in [1] (i.e. 0 to 3).  This property may be specified even
+   if the regulator-allow-set-load property is not specified.
+
+- qcom,allowed-drms-modes
+   Usage:  required if regulator-allow-set-load is specified;
+

[PATCH v2 1/2] regulator: dt-bindings: add QCOM RPMh regulator bindings

2018-04-13 Thread David Collins
Introduce bindings for RPMh regulator devices found on some
Qualcomm Technlogies, Inc. SoCs.  These devices allow a given
processor within the SoC to make PMIC regulator requests which
are aggregated within the RPMh hardware block along with requests
from other processors in the SoC to determine the final PMIC
regulator hardware state.

Signed-off-by: David Collins 
---
 .../bindings/regulator/qcom,rpmh-regulator.txt | 207 +
 .../dt-bindings/regulator/qcom,rpmh-regulator.h|  36 
 2 files changed, 243 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
 create mode 100644 include/dt-bindings/regulator/qcom,rpmh-regulator.h

diff --git 
a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt 
b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
new file mode 100644
index 000..69748ea
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
@@ -0,0 +1,207 @@
+Qualcomm Technologies, Inc. RPMh Regulators
+
+rpmh-regulator devices support PMIC regulator management via the Voltage
+Regulator Manager (VRM) and Oscillator Buffer (XOB) RPMh accelerators.  The 
APPS
+processor communicates with these hardware blocks via a Resource State
+Coordinator (RSC) using command packets.  The VRM allows changing four
+parameters for a given regulator: enable state, output voltage, operating mode,
+and minimum headroom voltage.  The XOB allows changing only a single parameter
+for a given regulator: its enable state.  Despite its name, the XOB is capable
+of controlling the enable state of any PMIC peripheral.  It is used for clock
+buffers, low-voltage switches, and LDO/SMPS regulators which have a fixed
+voltage and mode.
+
+===
+Required Node Structure
+===
+
+RPMh regulators must be described in two levels of device nodes.  The first
+level describes the PMIC containing the regulators and must reside within an
+RPMh device node.  The second level describes each regulator within the PMIC
+which is to be used on the board.  Each of these regulators maps to a single
+RPMh resource.
+
+The names used for regulator nodes must match those supported by a given PMIC.
+Supported regulator node names:
+   PM8998: smps1 - smps13, ldo1 - ldo28, lvs1 - lvs2
+   PMI8998:bob
+   PM8005: smps1 - smps4
+
+
+First Level Nodes - PMIC
+
+
+- compatible
+   Usage:  required
+   Value type: 
+   Definition: Must be one of: "qcom,pm8998-rpmh-regulators",
+   "qcom,pmi8998-rpmh-regulators" or
+   "qcom,pm8005-rpmh-regulators".
+
+- qcom,pmic-id
+   Usage:  required
+   Value type: 
+   Definition: RPMh resource name suffix used for the regulators found on
+   this PMIC.  Typical values: "a", "b", "c", "d", "e", "f".
+
+- vdd_s1-supply
+- vdd_s2-supply
+- vdd_s3-supply
+- vdd_s4-supply
+- vdd_s5-supply
+- vdd_s6-supply
+- vdd_s7-supply
+- vdd_s8-supply
+- vdd_s9-supply
+- vdd_s10-supply
+- vdd_s11-supply
+- vdd_s12-supply
+- vdd_s13-supply
+- vdd_l1_l27-supply
+- vdd_l2_l8_l17-supply
+- vdd_l3_l11-supply
+- vdd_l4_l5-supply
+- vdd_l6-supply
+- vdd_l7_l12_l14_l15-supply
+- vdd_l9-supply
+- vdd_l10_l23_l25-supply
+- vdd_l13_l19_l21-supply
+- vdd_l16_l28-supply
+- vdd_l18_l22-supply
+- vdd_l20_l24-supply
+- vdd_l26-supply
+- vdd_lvs1_lvs2-supply
+- vdd_lvs1_lvs2-supply
+   Usage:  optional (PM8998 only)
+   Value type: 
+   Definition: phandle of the parent supply regulator of one or more of the
+   regulators for this PMIC.
+
+- vdd_bob-supply
+   Usage:  optional (PMI8998 only)
+   Value type: 
+   Definition: BOB regulator parent supply phandle
+
+- vdd_s1-supply
+- vdd_s2-supply
+- vdd_s3-supply
+- vdd_s4-supply
+   Usage:  optional (PM8005 only)
+   Value type: 
+   Definition: phandle of the parent supply regulator of one or more of the
+   regulators for this PMIC.
+
+===
+Second Level Nodes - Regulators
+===
+
+- qcom,regulator-initial-voltage
+   Usage:  optional; VRM regulators only
+   Value type: 
+   Definition: Specifies the initial voltage in microvolts to request for a
+   VRM regulator.
+
+- regulator-initial-mode
+   Usage:  optional; VRM regulators only
+   Value type: 
+   Definition: Specifies the initial mode to request for a VRM regulator.
+   Supported values are RPMH_REGULATOR_MODE_* which are defined
+   in [1] (i.e. 0 to 3).  This property may be specified even
+   if the regulator-allow-set-load property is not specified.
+
+- qcom,allowed-drms-modes
+   Usage:  required if regulator-allow-set-load is specified;
+   VRM 

[PATCH v2 0/2] regulator: add QCOM RPMh regulator driver

2018-04-13 Thread David Collins
Hello,

This patch series adds a driver and device tree binding documentation for
PMIC regulator control via Resource Power Manager-hardened (RPMh) on some
Qualcomm Technologies, Inc. SoCs such as SDM845.  RPMh is a hardware block
which contains several accelerators which are used to manage various
hardware resources that are shared between the processors of the SoC.  The
final hardware state of a regulator is determined within RPMh by performing
max aggregation of the requests made by all of the processors.

The RPMh regulator driver depends upon the RPMh driver [1] and command DB
driver [2] which are both still undergoing review.

Changes since v1 [3]:
 - Addressed review feedback from Doug, Mark, and Stephen
 - Replaced set_voltage()/get_voltage() callbacks with set_voltage_sel()/
   get_voltage_sel()
 - Added set_bypass()/get_bypass() callbacks for BOB pass-through mode
   control
 - Removed top-level PMIC data structures
 - Removed initialization variables from structs and passed them as
   function parameters
 - Removed various comments and error messages
 - Simplified mode handling
 - Refactored per-PMIC rpmh-regulator data specification
 - Simplified probe function
 - Moved header into DT patch
 - Removed redundant property listings from DT binding documentation

Thanks,
David

[1]: https://lkml.org/lkml/2018/4/5/480
[2]: https://lkml.org/lkml/2018/4/10/714
[3]: https://lkml.org/lkml/2018/3/16/1431

David Collins (2):
  regulator: dt-bindings: add QCOM RPMh regulator bindings
  regulator: add QCOM RPMh regulator driver

 .../bindings/regulator/qcom,rpmh-regulator.txt | 207 +
 drivers/regulator/Kconfig  |   9 +
 drivers/regulator/Makefile |   1 +
 drivers/regulator/qcom_rpmh-regulator.c| 910 +
 .../dt-bindings/regulator/qcom,rpmh-regulator.h|  36 +
 5 files changed, 1163 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
 create mode 100644 drivers/regulator/qcom_rpmh-regulator.c
 create mode 100644 include/dt-bindings/regulator/qcom,rpmh-regulator.h

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v2 0/2] regulator: add QCOM RPMh regulator driver

2018-04-13 Thread David Collins
Hello,

This patch series adds a driver and device tree binding documentation for
PMIC regulator control via Resource Power Manager-hardened (RPMh) on some
Qualcomm Technologies, Inc. SoCs such as SDM845.  RPMh is a hardware block
which contains several accelerators which are used to manage various
hardware resources that are shared between the processors of the SoC.  The
final hardware state of a regulator is determined within RPMh by performing
max aggregation of the requests made by all of the processors.

The RPMh regulator driver depends upon the RPMh driver [1] and command DB
driver [2] which are both still undergoing review.

Changes since v1 [3]:
 - Addressed review feedback from Doug, Mark, and Stephen
 - Replaced set_voltage()/get_voltage() callbacks with set_voltage_sel()/
   get_voltage_sel()
 - Added set_bypass()/get_bypass() callbacks for BOB pass-through mode
   control
 - Removed top-level PMIC data structures
 - Removed initialization variables from structs and passed them as
   function parameters
 - Removed various comments and error messages
 - Simplified mode handling
 - Refactored per-PMIC rpmh-regulator data specification
 - Simplified probe function
 - Moved header into DT patch
 - Removed redundant property listings from DT binding documentation

Thanks,
David

[1]: https://lkml.org/lkml/2018/4/5/480
[2]: https://lkml.org/lkml/2018/4/10/714
[3]: https://lkml.org/lkml/2018/3/16/1431

David Collins (2):
  regulator: dt-bindings: add QCOM RPMh regulator bindings
  regulator: add QCOM RPMh regulator driver

 .../bindings/regulator/qcom,rpmh-regulator.txt | 207 +
 drivers/regulator/Kconfig  |   9 +
 drivers/regulator/Makefile |   1 +
 drivers/regulator/qcom_rpmh-regulator.c| 910 +
 .../dt-bindings/regulator/qcom,rpmh-regulator.h|  36 +
 5 files changed, 1163 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
 create mode 100644 drivers/regulator/qcom_rpmh-regulator.c
 create mode 100644 include/dt-bindings/regulator/qcom,rpmh-regulator.h

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v2 2/2] regulator: add QCOM RPMh regulator driver

2018-04-13 Thread David Collins
Add the QCOM RPMh regulator driver to manage PMIC regulators
which are controlled via RPMh on some Qualcomm Technologies, Inc.
SoCs.  RPMh is a hardware block which contains several
accelerators which are used to manage various hardware resources
that are shared between the processors of the SoC.  The final
hardware state of a regulator is determined within RPMh by
performing max aggregation of the requests made by all of the
processors.

Add support for PMIC regulator control via the voltage regulator
manager (VRM) and oscillator buffer (XOB) RPMh accelerators.  VRM
supports manipulation of enable state, voltage, mode, and
headroom voltage.  XOB supports manipulation of enable state.

Signed-off-by: David Collins 
---
 drivers/regulator/Kconfig   |   9 +
 drivers/regulator/Makefile  |   1 +
 drivers/regulator/qcom_rpmh-regulator.c | 910 
 3 files changed, 920 insertions(+)
 create mode 100644 drivers/regulator/qcom_rpmh-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 097f617..e0ecd0a 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -671,6 +671,15 @@ config REGULATOR_QCOM_RPM
  Qualcomm RPM as a module. The module will be named
  "qcom_rpm-regulator".
 
+config REGULATOR_QCOM_RPMH
+   tristate "Qualcomm Technologies, Inc. RPMh regulator driver"
+   depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST
+   help
+ This driver supports control of PMIC regulators via the RPMh hardware
+ block found on Qualcomm Technologies Inc. SoCs.  RPMh regulator
+ control allows for voting on regulator state between multiple
+ processors within the SoC.
+
 config REGULATOR_QCOM_SMD_RPM
tristate "Qualcomm SMD based RPM regulator driver"
depends on QCOM_SMD_RPM
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 590674f..c2274dd 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_REGULATOR_MT6323)+= mt6323-regulator.o
 obj-$(CONFIG_REGULATOR_MT6380) += mt6380-regulator.o
 obj-$(CONFIG_REGULATOR_MT6397) += mt6397-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_RPM) += qcom_rpm-regulator.o
+obj-$(CONFIG_REGULATOR_QCOM_RPMH) += qcom_rpmh-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_SMD_RPM) += qcom_smd-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_SPMI) += qcom_spmi-regulator.o
 obj-$(CONFIG_REGULATOR_PALMAS) += palmas-regulator.o
diff --git a/drivers/regulator/qcom_rpmh-regulator.c 
b/drivers/regulator/qcom_rpmh-regulator.c
new file mode 100644
index 000..03b1301
--- /dev/null
+++ b/drivers/regulator/qcom_rpmh-regulator.c
@@ -0,0 +1,910 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, The Linux Foundation. All rights reserved. */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+
+/**
+ * enum rpmh_regulator_type - supported RPMh accelerator types
+ * %VRM:   RPMh VRM accelerator which supports voting on enable, voltage,
+ * mode, and headroom voltage of LDO, SMPS, and BOB type PMIC
+ * regulators.
+ * %XOB:   RPMh XOB accelerator which supports voting on the enable state
+ * of PMIC regulators.
+ */
+enum rpmh_regulator_type {
+   VRM,
+   XOB,
+};
+
+#define RPMH_VRM_HEADROOM_MAX_UV   511000
+
+#define RPMH_REGULATOR_REG_VRM_VOLTAGE 0x0
+#define RPMH_REGULATOR_REG_ENABLE  0x4
+#define RPMH_REGULATOR_REG_VRM_MODE0x8
+#define RPMH_REGULATOR_REG_VRM_HEADROOM0xC
+
+#define RPMH_REGULATOR_DISABLE 0x0
+#define RPMH_REGULATOR_ENABLE  0x1
+
+#define RPMH_REGULATOR_MODE_COUNT  4
+
+/**
+ * struct rpmh_vreg_hw_data - RPMh regulator hardware configurations
+ * @regulator_type:RPMh accelerator type used to manage this
+ * regulator
+ * @ops:   Pointer to regulator ops callback structure
+ * @voltage_range: The single range of voltages supported by this
+ * PMIC regulator type
+ * @n_voltages:The number of unique voltage set points 
defined
+ * by voltage_range
+ * @pmic_mode_map: Array indexed by regulator framework mode
+ * containing PMIC hardware modes.  Must be large
+ * enough to index all framework modes supported
+ * by this regulator hardware type.
+ * @of_map_mode:   Maps an RPMH_REGULATOR_MODE_* mode value defined
+ * in device tree to a regulator framework mode
+ * @bypass_mode:   VRM PMIC mode value corresponding 

[PATCH v2 2/2] regulator: add QCOM RPMh regulator driver

2018-04-13 Thread David Collins
Add the QCOM RPMh regulator driver to manage PMIC regulators
which are controlled via RPMh on some Qualcomm Technologies, Inc.
SoCs.  RPMh is a hardware block which contains several
accelerators which are used to manage various hardware resources
that are shared between the processors of the SoC.  The final
hardware state of a regulator is determined within RPMh by
performing max aggregation of the requests made by all of the
processors.

Add support for PMIC regulator control via the voltage regulator
manager (VRM) and oscillator buffer (XOB) RPMh accelerators.  VRM
supports manipulation of enable state, voltage, mode, and
headroom voltage.  XOB supports manipulation of enable state.

Signed-off-by: David Collins 
---
 drivers/regulator/Kconfig   |   9 +
 drivers/regulator/Makefile  |   1 +
 drivers/regulator/qcom_rpmh-regulator.c | 910 
 3 files changed, 920 insertions(+)
 create mode 100644 drivers/regulator/qcom_rpmh-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 097f617..e0ecd0a 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -671,6 +671,15 @@ config REGULATOR_QCOM_RPM
  Qualcomm RPM as a module. The module will be named
  "qcom_rpm-regulator".
 
+config REGULATOR_QCOM_RPMH
+   tristate "Qualcomm Technologies, Inc. RPMh regulator driver"
+   depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST
+   help
+ This driver supports control of PMIC regulators via the RPMh hardware
+ block found on Qualcomm Technologies Inc. SoCs.  RPMh regulator
+ control allows for voting on regulator state between multiple
+ processors within the SoC.
+
 config REGULATOR_QCOM_SMD_RPM
tristate "Qualcomm SMD based RPM regulator driver"
depends on QCOM_SMD_RPM
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 590674f..c2274dd 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_REGULATOR_MT6323)+= mt6323-regulator.o
 obj-$(CONFIG_REGULATOR_MT6380) += mt6380-regulator.o
 obj-$(CONFIG_REGULATOR_MT6397) += mt6397-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_RPM) += qcom_rpm-regulator.o
+obj-$(CONFIG_REGULATOR_QCOM_RPMH) += qcom_rpmh-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_SMD_RPM) += qcom_smd-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_SPMI) += qcom_spmi-regulator.o
 obj-$(CONFIG_REGULATOR_PALMAS) += palmas-regulator.o
diff --git a/drivers/regulator/qcom_rpmh-regulator.c 
b/drivers/regulator/qcom_rpmh-regulator.c
new file mode 100644
index 000..03b1301
--- /dev/null
+++ b/drivers/regulator/qcom_rpmh-regulator.c
@@ -0,0 +1,910 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, The Linux Foundation. All rights reserved. */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+
+/**
+ * enum rpmh_regulator_type - supported RPMh accelerator types
+ * %VRM:   RPMh VRM accelerator which supports voting on enable, voltage,
+ * mode, and headroom voltage of LDO, SMPS, and BOB type PMIC
+ * regulators.
+ * %XOB:   RPMh XOB accelerator which supports voting on the enable state
+ * of PMIC regulators.
+ */
+enum rpmh_regulator_type {
+   VRM,
+   XOB,
+};
+
+#define RPMH_VRM_HEADROOM_MAX_UV   511000
+
+#define RPMH_REGULATOR_REG_VRM_VOLTAGE 0x0
+#define RPMH_REGULATOR_REG_ENABLE  0x4
+#define RPMH_REGULATOR_REG_VRM_MODE0x8
+#define RPMH_REGULATOR_REG_VRM_HEADROOM0xC
+
+#define RPMH_REGULATOR_DISABLE 0x0
+#define RPMH_REGULATOR_ENABLE  0x1
+
+#define RPMH_REGULATOR_MODE_COUNT  4
+
+/**
+ * struct rpmh_vreg_hw_data - RPMh regulator hardware configurations
+ * @regulator_type:RPMh accelerator type used to manage this
+ * regulator
+ * @ops:   Pointer to regulator ops callback structure
+ * @voltage_range: The single range of voltages supported by this
+ * PMIC regulator type
+ * @n_voltages:The number of unique voltage set points 
defined
+ * by voltage_range
+ * @pmic_mode_map: Array indexed by regulator framework mode
+ * containing PMIC hardware modes.  Must be large
+ * enough to index all framework modes supported
+ * by this regulator hardware type.
+ * @of_map_mode:   Maps an RPMH_REGULATOR_MODE_* mode value defined
+ * in device tree to a regulator framework mode
+ * @bypass_mode:   VRM PMIC mode value corresponding to bypass
+ *  

[PATCH] gpu/drm/amd/amdkfd: fix build, select MMU_NOTIFIER

2018-04-13 Thread Randy Dunlap
From: Randy Dunlap 

When CONFIG_MMU_NOTIFIER is not enabled, struct mmu_notifier has an
incomplete type definition, which causes build errors.

../drivers/gpu/drm/amd/amdkfd/kfd_priv.h:607:22: error: field 'mmu_notifier' 
has incomplete type
../include/linux/kernel.h:979:32: error: dereferencing pointer to incomplete 
type
../include/linux/kernel.h:980:18: error: dereferencing pointer to incomplete 
type
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:434:2: error: implicit declaration 
of function 'mmu_notifier_unregister_no_release' 
[-Werror=implicit-function-declaration]
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:435:2: error: implicit declaration 
of function 'mmu_notifier_call_srcu' [-Werror=implicit-function-declaration]
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:438:21: error: variable 
'kfd_process_mmu_notifier_ops' has initializer but incomplete type
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:439:2: error: unknown field 
'release' specified in initializer
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:439:2: warning: excess elements in 
struct initializer [enabled by default]
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:439:2: warning: (near 
initialization for 'kfd_process_mmu_notifier_ops') [enabled by default]
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:534:2: error: implicit declaration 
of function 'mmu_notifier_register' [-Werror=implicit-function-declaration]

Signed-off-by: Randy Dunlap 
Cc: Oded Gabbay 
Cc: dri-de...@lists.freedesktop.org
---
>From linux-next, not mmotm, but found in mmotm builds.

 drivers/gpu/drm/amd/amdkfd/Kconfig |1 +
 1 file changed, 1 insertion(+)

--- mmotm-2018-0413-1728.orig/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ mmotm-2018-0413-1728/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -6,5 +6,6 @@ config HSA_AMD
tristate "HSA kernel driver for AMD GPU devices"
depends on DRM_AMDGPU && X86_64
imply AMD_IOMMU_V2
+   select MMU_NOTIFIER
help
  Enable this if you want to use HSA features on AMD GPU devices.



[PATCH] gpu/drm/amd/amdkfd: fix build, select MMU_NOTIFIER

2018-04-13 Thread Randy Dunlap
From: Randy Dunlap 

When CONFIG_MMU_NOTIFIER is not enabled, struct mmu_notifier has an
incomplete type definition, which causes build errors.

../drivers/gpu/drm/amd/amdkfd/kfd_priv.h:607:22: error: field 'mmu_notifier' 
has incomplete type
../include/linux/kernel.h:979:32: error: dereferencing pointer to incomplete 
type
../include/linux/kernel.h:980:18: error: dereferencing pointer to incomplete 
type
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:434:2: error: implicit declaration 
of function 'mmu_notifier_unregister_no_release' 
[-Werror=implicit-function-declaration]
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:435:2: error: implicit declaration 
of function 'mmu_notifier_call_srcu' [-Werror=implicit-function-declaration]
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:438:21: error: variable 
'kfd_process_mmu_notifier_ops' has initializer but incomplete type
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:439:2: error: unknown field 
'release' specified in initializer
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:439:2: warning: excess elements in 
struct initializer [enabled by default]
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:439:2: warning: (near 
initialization for 'kfd_process_mmu_notifier_ops') [enabled by default]
../drivers/gpu/drm/amd/amdkfd/kfd_process.c:534:2: error: implicit declaration 
of function 'mmu_notifier_register' [-Werror=implicit-function-declaration]

Signed-off-by: Randy Dunlap 
Cc: Oded Gabbay 
Cc: dri-de...@lists.freedesktop.org
---
>From linux-next, not mmotm, but found in mmotm builds.

 drivers/gpu/drm/amd/amdkfd/Kconfig |1 +
 1 file changed, 1 insertion(+)

--- mmotm-2018-0413-1728.orig/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ mmotm-2018-0413-1728/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -6,5 +6,6 @@ config HSA_AMD
tristate "HSA kernel driver for AMD GPU devices"
depends on DRM_AMDGPU && X86_64
imply AMD_IOMMU_V2
+   select MMU_NOTIFIER
help
  Enable this if you want to use HSA features on AMD GPU devices.



[PATCH v3 2/2] clk: qcom: clk-rpmh: Add QCOM RPMh clock driver

2018-04-13 Thread Taniya Das
Add the RPMh clock driver to control the RPMh managed clock resources on
some of the Qualcomm Technologies, Inc. SoCs.

Signed-off-by: David Collins 
Signed-off-by: Amit Nischal 
---
 drivers/clk/qcom/Kconfig|   9 ++
 drivers/clk/qcom/Makefile   |   1 +
 drivers/clk/qcom/clk-rpmh.c | 367 
 3 files changed, 377 insertions(+)
 create mode 100644 drivers/clk/qcom/clk-rpmh.c

diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index fbf4532..63c3372 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -112,6 +112,15 @@ config IPQ_GCC_8074
  i2c, USB, SD/eMMC, etc. Select this for the root clock
  of ipq8074.

+config QCOM_CLK_RPMH
+   tristate "RPMh Clock Driver"
+   depends on COMMON_CLK_QCOM && QCOM_RPMH
+   help
+RPMh manages shared resources on some Qualcomm Technologies, Inc.
+SoCs. It accepts requests from other hardware subsystems via RSC.
+Say Y if you want to support the clocks exposed by RPMh on
+platforms such as sdm845.
+
 config MSM_GCC_8660
tristate "MSM8660 Global Clock Controller"
depends on COMMON_CLK_QCOM
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index 230332c..b2b5babf 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -36,5 +36,6 @@ obj-$(CONFIG_MSM_MMCC_8996) += mmcc-msm8996.o
 obj-$(CONFIG_QCOM_A53PLL) += a53-pll.o
 obj-$(CONFIG_QCOM_CLK_APCS_MSM8916) += apcs-msm8916.o
 obj-$(CONFIG_QCOM_CLK_RPM) += clk-rpm.o
+obj-$(CONFIG_QCOM_CLK_RPMH) += clk-rpmh.o
 obj-$(CONFIG_QCOM_CLK_SMD_RPM) += clk-smd-rpm.o
 obj-$(CONFIG_SPMI_PMIC_CLKDIV) += clk-spmi-pmic-div.o
diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c
new file mode 100644
index 000..fa61284
--- /dev/null
+++ b/drivers/clk/qcom/clk-rpmh.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#define CLK_RPMH_ARC_EN_OFFSET 0
+#define CLK_RPMH_VRM_EN_OFFSET 4
+#define CLK_RPMH_APPS_RSC_AO_STATE_MASK (BIT(RPMH_WAKE_ONLY_STATE) | \
+BIT(RPMH_ACTIVE_ONLY_STATE))
+#define CLK_RPMH_APPS_RSC_STATE_MASK (BIT(RPMH_WAKE_ONLY_STATE) | \
+ BIT(RPMH_ACTIVE_ONLY_STATE) | \
+ BIT(RPMH_SLEEP_STATE))
+struct clk_rpmh {
+   struct clk_hw hw;
+   const char *res_name;
+   u32 res_addr;
+   u32 res_en_offset;
+   u32 res_on_val;
+   u32 res_off_val;
+   u32 state;
+   u32 aggr_state;
+   u32 last_sent_aggr_state;
+   u32 valid_state_mask;
+   struct rpmh_client *rpmh_client;
+   struct device *dev;
+   struct clk_rpmh *peer;
+   unsigned long rate;
+};
+
+struct rpmh_cc {
+   struct clk_onecell_data data;
+   struct clk *clks[];
+};
+
+struct clk_rpmh_desc {
+   struct clk_hw **clks;
+   size_t num_clks;
+};
+
+static DEFINE_MUTEX(rpmh_clk_lock);
+
+#define __DEFINE_CLK_RPMH(_platform, _name, _name_active, _res_name,   \
+ _res_en_offset, _res_on, _res_off, _rate) \
+   static struct clk_rpmh _platform##_##_name_active;\
+   static struct clk_rpmh _platform##_##_name = {\
+   .res_name = _res_name,\
+   .res_en_offset = _res_en_offset,  \
+   .res_on_val = _res_on,\
+   .res_off_val = _res_off,  \
+   .rate = _rate,\
+   .peer = &_platform##_##_name_active,  \
+   .valid_state_mask = CLK_RPMH_APPS_RSC_STATE_MASK, \
+   .hw.init = &(struct clk_init_data){   \
+   .ops = _rpmh_ops, \
+   .name = #_name,   \
+   },\
+   };\
+   static struct clk_rpmh _platform##_##_name_active = { \
+   .res_name = _res_name,\
+   .res_en_offset = _res_en_offset,  \
+   .res_on_val = _res_on,\
+   .res_off_val = _res_off,  \
+   .rate = _rate,\
+   .peer = &_platform##_##_name, 

[PATCH v3 2/2] clk: qcom: clk-rpmh: Add QCOM RPMh clock driver

2018-04-13 Thread Taniya Das
Add the RPMh clock driver to control the RPMh managed clock resources on
some of the Qualcomm Technologies, Inc. SoCs.

Signed-off-by: David Collins 
Signed-off-by: Amit Nischal 
---
 drivers/clk/qcom/Kconfig|   9 ++
 drivers/clk/qcom/Makefile   |   1 +
 drivers/clk/qcom/clk-rpmh.c | 367 
 3 files changed, 377 insertions(+)
 create mode 100644 drivers/clk/qcom/clk-rpmh.c

diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index fbf4532..63c3372 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -112,6 +112,15 @@ config IPQ_GCC_8074
  i2c, USB, SD/eMMC, etc. Select this for the root clock
  of ipq8074.

+config QCOM_CLK_RPMH
+   tristate "RPMh Clock Driver"
+   depends on COMMON_CLK_QCOM && QCOM_RPMH
+   help
+RPMh manages shared resources on some Qualcomm Technologies, Inc.
+SoCs. It accepts requests from other hardware subsystems via RSC.
+Say Y if you want to support the clocks exposed by RPMh on
+platforms such as sdm845.
+
 config MSM_GCC_8660
tristate "MSM8660 Global Clock Controller"
depends on COMMON_CLK_QCOM
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index 230332c..b2b5babf 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -36,5 +36,6 @@ obj-$(CONFIG_MSM_MMCC_8996) += mmcc-msm8996.o
 obj-$(CONFIG_QCOM_A53PLL) += a53-pll.o
 obj-$(CONFIG_QCOM_CLK_APCS_MSM8916) += apcs-msm8916.o
 obj-$(CONFIG_QCOM_CLK_RPM) += clk-rpm.o
+obj-$(CONFIG_QCOM_CLK_RPMH) += clk-rpmh.o
 obj-$(CONFIG_QCOM_CLK_SMD_RPM) += clk-smd-rpm.o
 obj-$(CONFIG_SPMI_PMIC_CLKDIV) += clk-spmi-pmic-div.o
diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c
new file mode 100644
index 000..fa61284
--- /dev/null
+++ b/drivers/clk/qcom/clk-rpmh.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#define CLK_RPMH_ARC_EN_OFFSET 0
+#define CLK_RPMH_VRM_EN_OFFSET 4
+#define CLK_RPMH_APPS_RSC_AO_STATE_MASK (BIT(RPMH_WAKE_ONLY_STATE) | \
+BIT(RPMH_ACTIVE_ONLY_STATE))
+#define CLK_RPMH_APPS_RSC_STATE_MASK (BIT(RPMH_WAKE_ONLY_STATE) | \
+ BIT(RPMH_ACTIVE_ONLY_STATE) | \
+ BIT(RPMH_SLEEP_STATE))
+struct clk_rpmh {
+   struct clk_hw hw;
+   const char *res_name;
+   u32 res_addr;
+   u32 res_en_offset;
+   u32 res_on_val;
+   u32 res_off_val;
+   u32 state;
+   u32 aggr_state;
+   u32 last_sent_aggr_state;
+   u32 valid_state_mask;
+   struct rpmh_client *rpmh_client;
+   struct device *dev;
+   struct clk_rpmh *peer;
+   unsigned long rate;
+};
+
+struct rpmh_cc {
+   struct clk_onecell_data data;
+   struct clk *clks[];
+};
+
+struct clk_rpmh_desc {
+   struct clk_hw **clks;
+   size_t num_clks;
+};
+
+static DEFINE_MUTEX(rpmh_clk_lock);
+
+#define __DEFINE_CLK_RPMH(_platform, _name, _name_active, _res_name,   \
+ _res_en_offset, _res_on, _res_off, _rate) \
+   static struct clk_rpmh _platform##_##_name_active;\
+   static struct clk_rpmh _platform##_##_name = {\
+   .res_name = _res_name,\
+   .res_en_offset = _res_en_offset,  \
+   .res_on_val = _res_on,\
+   .res_off_val = _res_off,  \
+   .rate = _rate,\
+   .peer = &_platform##_##_name_active,  \
+   .valid_state_mask = CLK_RPMH_APPS_RSC_STATE_MASK, \
+   .hw.init = &(struct clk_init_data){   \
+   .ops = _rpmh_ops, \
+   .name = #_name,   \
+   },\
+   };\
+   static struct clk_rpmh _platform##_##_name_active = { \
+   .res_name = _res_name,\
+   .res_en_offset = _res_en_offset,  \
+   .res_on_val = _res_on,\
+   .res_off_val = _res_off,  \
+   .rate = _rate,\
+   .peer = &_platform##_##_name, \
+   

[PATCH v3 0/2] clk: qcom: clk-rpmh: Add QCOM RPMh clock driver

2018-04-13 Thread Taniya Das
 [v3]
  * Addressed documentation & code review comments from Bjorn
  * Addressed bindings comments from Rob
  * Updated the patch series order for bindings

 [v2]
  * Addressed comments from Stephen
  * Addressed comments from Evan

This patch series adds a driver and device tree documentation binding
for the clock control via Resource Power Manager-hardened (RPMh) on some
Qualcomm Technologies, Inc, SoCs such as SDM845. The clock RPMh driver
would send requests for the RPMh managed clock resources.

The RPMh clock driver depends upon the RPMh driver [1] and command DB
driver [2] which are both still undergoing review.

Thanks,
Taniya

[1]: https://lkml.org/lkml/2018/3/9/979
[2]: https://lkml.org/lkml/2018/3/14/787


Taniya Das (2):
  dt-bindings: clock: Introduce QCOM RPMh clock bindings
  clk: qcom: clk-rpmh: Add QCOM RPMh clock driver

 .../devicetree/bindings/clock/qcom,rpmh-clk.txt|  22 ++
 drivers/clk/qcom/Kconfig   |   9 +
 drivers/clk/qcom/Makefile  |   1 +
 drivers/clk/qcom/clk-rpmh.c| 367 +
 include/dt-bindings/clock/qcom,rpmh.h  |  24 ++
 5 files changed, 423 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
 create mode 100644 drivers/clk/qcom/clk-rpmh.c
 create mode 100644 include/dt-bindings/clock/qcom,rpmh.h

--
Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member
of the Code Aurora Forum, hosted by the  Linux Foundation.



[PATCH v3 1/2] dt-bindings: clock: Introduce QCOM RPMh clock bindings

2018-04-13 Thread Taniya Das
Add RPMh clock device bindings for Qualcomm Technology Inc's SoCs. These
devices would be used for communicating resource state requests to control
the clocks managed by RPMh.

Signed-off-by: Amit Nischal 
Signed-off-by: Taniya Das 
---
 .../devicetree/bindings/clock/qcom,rpmh-clk.txt| 22 
 include/dt-bindings/clock/qcom,rpmh.h  | 24 ++
 2 files changed, 46 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
 create mode 100644 include/dt-bindings/clock/qcom,rpmh.h

diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt 
b/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
new file mode 100644
index 000..3c00765
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
@@ -0,0 +1,22 @@
+Qualcomm Technologies, Inc. RPMh Clocks
+---
+
+Resource Power Manager Hardened (RPMh) manages shared resources on
+some Qualcomm Technologies Inc. SoCs. It accepts clock requests from
+other hardware subsystems via RSC to control clocks.
+
+Required properties :
+- compatible : shall contain "qcom,sdm845-rpmh-clk"
+
+- #clock-cells : must contain 1
+
+Example :
+
+#include 
+
+   _rsc {
+   rpmhcc: clock-controller {
+   compatible = "qcom,sdm845-rpmh-clk";
+   #clock-cells = <1>;
+   };
+   };
diff --git a/include/dt-bindings/clock/qcom,rpmh.h 
b/include/dt-bindings/clock/qcom,rpmh.h
new file mode 100644
index 000..36caab2
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,rpmh.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, The Linux Foundation. All rights reserved. */
+
+
+#ifndef _DT_BINDINGS_CLK_MSM_RPMH_H
+#define _DT_BINDINGS_CLK_MSM_RPMH_H
+
+/* RPMh controlled clocks */
+#define RPMH_CXO_CLK   0
+#define RPMH_CXO_CLK_A 1
+#define RPMH_LN_BB_CLK22
+#define RPMH_LN_BB_CLK2_A  3
+#define RPMH_LN_BB_CLK34
+#define RPMH_LN_BB_CLK3_A  5
+#define RPMH_RF_CLK1   6
+#define RPMH_RF_CLK1_A 7
+#define RPMH_RF_CLK2   8
+#define RPMH_RF_CLK2_A 9
+#define RPMH_RF_CLK3   10
+#define RPMH_RF_CLK3_A 11
+#define RPMH_RF_CLK4   12
+#define RPMH_RF_CLK4_A 13
+
+#endif
--
Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member
of the Code Aurora Forum, hosted by the  Linux Foundation.



[PATCH v3 1/2] dt-bindings: clock: Introduce QCOM RPMh clock bindings

2018-04-13 Thread Taniya Das
Add RPMh clock device bindings for Qualcomm Technology Inc's SoCs. These
devices would be used for communicating resource state requests to control
the clocks managed by RPMh.

Signed-off-by: Amit Nischal 
Signed-off-by: Taniya Das 
---
 .../devicetree/bindings/clock/qcom,rpmh-clk.txt| 22 
 include/dt-bindings/clock/qcom,rpmh.h  | 24 ++
 2 files changed, 46 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
 create mode 100644 include/dt-bindings/clock/qcom,rpmh.h

diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt 
b/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
new file mode 100644
index 000..3c00765
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
@@ -0,0 +1,22 @@
+Qualcomm Technologies, Inc. RPMh Clocks
+---
+
+Resource Power Manager Hardened (RPMh) manages shared resources on
+some Qualcomm Technologies Inc. SoCs. It accepts clock requests from
+other hardware subsystems via RSC to control clocks.
+
+Required properties :
+- compatible : shall contain "qcom,sdm845-rpmh-clk"
+
+- #clock-cells : must contain 1
+
+Example :
+
+#include 
+
+   _rsc {
+   rpmhcc: clock-controller {
+   compatible = "qcom,sdm845-rpmh-clk";
+   #clock-cells = <1>;
+   };
+   };
diff --git a/include/dt-bindings/clock/qcom,rpmh.h 
b/include/dt-bindings/clock/qcom,rpmh.h
new file mode 100644
index 000..36caab2
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,rpmh.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, The Linux Foundation. All rights reserved. */
+
+
+#ifndef _DT_BINDINGS_CLK_MSM_RPMH_H
+#define _DT_BINDINGS_CLK_MSM_RPMH_H
+
+/* RPMh controlled clocks */
+#define RPMH_CXO_CLK   0
+#define RPMH_CXO_CLK_A 1
+#define RPMH_LN_BB_CLK22
+#define RPMH_LN_BB_CLK2_A  3
+#define RPMH_LN_BB_CLK34
+#define RPMH_LN_BB_CLK3_A  5
+#define RPMH_RF_CLK1   6
+#define RPMH_RF_CLK1_A 7
+#define RPMH_RF_CLK2   8
+#define RPMH_RF_CLK2_A 9
+#define RPMH_RF_CLK3   10
+#define RPMH_RF_CLK3_A 11
+#define RPMH_RF_CLK4   12
+#define RPMH_RF_CLK4_A 13
+
+#endif
--
Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member
of the Code Aurora Forum, hosted by the  Linux Foundation.



[PATCH v3 0/2] clk: qcom: clk-rpmh: Add QCOM RPMh clock driver

2018-04-13 Thread Taniya Das
 [v3]
  * Addressed documentation & code review comments from Bjorn
  * Addressed bindings comments from Rob
  * Updated the patch series order for bindings

 [v2]
  * Addressed comments from Stephen
  * Addressed comments from Evan

This patch series adds a driver and device tree documentation binding
for the clock control via Resource Power Manager-hardened (RPMh) on some
Qualcomm Technologies, Inc, SoCs such as SDM845. The clock RPMh driver
would send requests for the RPMh managed clock resources.

The RPMh clock driver depends upon the RPMh driver [1] and command DB
driver [2] which are both still undergoing review.

Thanks,
Taniya

[1]: https://lkml.org/lkml/2018/3/9/979
[2]: https://lkml.org/lkml/2018/3/14/787


Taniya Das (2):
  dt-bindings: clock: Introduce QCOM RPMh clock bindings
  clk: qcom: clk-rpmh: Add QCOM RPMh clock driver

 .../devicetree/bindings/clock/qcom,rpmh-clk.txt|  22 ++
 drivers/clk/qcom/Kconfig   |   9 +
 drivers/clk/qcom/Makefile  |   1 +
 drivers/clk/qcom/clk-rpmh.c| 367 +
 include/dt-bindings/clock/qcom,rpmh.h  |  24 ++
 5 files changed, 423 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
 create mode 100644 drivers/clk/qcom/clk-rpmh.c
 create mode 100644 include/dt-bindings/clock/qcom,rpmh.h

--
Qualcomm INDIA, on behalf of Qualcomm Innovation Center, Inc.is a member
of the Code Aurora Forum, hosted by the  Linux Foundation.



Re: [PATCH] printk: Ratelimit messages printed by console drivers

2018-04-13 Thread Sergey Senozhatsky
On (04/13/18 10:12), Steven Rostedt wrote:
> 
> > The interval is set to one hour. It is rather arbitrary selected time.
> > It is supposed to be a compromise between never print these messages,
> > do not lockup the machine, do not fill the entire buffer too quickly,
> > and get information if something changes over time.
> 
> 
> I think an hour is incredibly long. We only allow 100 lines per hour for
> printks happening inside another printk?
> 
> I think 5 minutes (at most) would probably be plenty. One minute may be
> good enough.

Besides 100 lines is absolutely not enough for any real lockdep splat.
My call would be - up to 1000 lines in a 1 minute interval.

-ss


Re: [PATCH] printk: Ratelimit messages printed by console drivers

2018-04-13 Thread Sergey Senozhatsky
On (04/13/18 10:12), Steven Rostedt wrote:
> 
> > The interval is set to one hour. It is rather arbitrary selected time.
> > It is supposed to be a compromise between never print these messages,
> > do not lockup the machine, do not fill the entire buffer too quickly,
> > and get information if something changes over time.
> 
> 
> I think an hour is incredibly long. We only allow 100 lines per hour for
> printks happening inside another printk?
> 
> I think 5 minutes (at most) would probably be plenty. One minute may be
> good enough.

Besides 100 lines is absolutely not enough for any real lockdep splat.
My call would be - up to 1000 lines in a 1 minute interval.

-ss


Hello Dear

2018-04-13 Thread Susan Williams
Hi,How are you? I must confess that you're a nice looking gentle man on your 
profile.Are you married?, Can we be friends? 
Susan 


Hello Dear

2018-04-13 Thread Susan Williams
Hi,How are you? I must confess that you're a nice looking gentle man on your 
profile.Are you married?, Can we be friends? 
Susan 


Re: [[PATCH v2] 1/2] clk: qcom: clk-rpmh: Add QCOM RPMh clock driver

2018-04-13 Thread Taniya Das

Hello Bjorn,

Thank you for the review comments.

On 4/10/2018 11:09 PM, Bjorn Andersson wrote:

On Sun 08 Apr 03:32 PDT 2018, Taniya Das wrote:


From: Amit Nischal 

Add the RPMh clock driver to control the RPMh managed clock resources on
some of the Qualcomm Technologies, Inc. SoCs.

Signed-off-by: David Collins 
Signed-off-by: Amit Nischal 
Signed-off-by: Taniya Das 
---
  drivers/clk/qcom/Kconfig  |   9 +
  drivers/clk/qcom/Makefile |   1 +
  drivers/clk/qcom/clk-rpmh.c   | 394 ++
  include/dt-bindings/clock/qcom,rpmh.h |  25 +++
  4 files changed, 429 insertions(+)
  create mode 100644 drivers/clk/qcom/clk-rpmh.c
  create mode 100644 include/dt-bindings/clock/qcom,rpmh.h

diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index fbf4532..3697a6a 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -112,6 +112,15 @@ config IPQ_GCC_8074
  i2c, USB, SD/eMMC, etc. Select this for the root clock
  of ipq8074.
  
+config MSM_CLK_RPMH


QCOM_CLK_RPMH



Would update it to use "QCOM_CLK_RPMH".


+   tristate "RPMh Clock Driver"
+   depends on COMMON_CLK_QCOM && QCOM_RPMH
+   help
+RPMh manages shared resources on some Qualcomm Technologies, Inc.
+SoCs. It accepts requests from other hardware subsystems via RSC.
+Say Y if you want to support the clocks exposed by RPMh on
+platforms such as sdm845.
+
  config MSM_GCC_8660
tristate "MSM8660 Global Clock Controller"
depends on COMMON_CLK_QCOM
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index 230332c..b7da05b 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_IPQ_GCC_8074) += gcc-ipq8074.o
  obj-$(CONFIG_IPQ_LCC_806X) += lcc-ipq806x.o
  obj-$(CONFIG_MDM_GCC_9615) += gcc-mdm9615.o
  obj-$(CONFIG_MDM_LCC_9615) += lcc-mdm9615.o
+obj-$(CONFIG_MSM_CLK_RPMH) += clk-rpmh.o
  obj-$(CONFIG_MSM_GCC_8660) += gcc-msm8660.o
  obj-$(CONFIG_MSM_GCC_8916) += gcc-msm8916.o
  obj-$(CONFIG_MSM_GCC_8960) += gcc-msm8960.o
diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c
new file mode 100644
index 000..763401f
--- /dev/null
+++ b/drivers/clk/qcom/clk-rpmh.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 


Unused


+#include 
+#include 
+
+#include 
+
+#include "common.h"
+#include "clk-regmap.h"


Unused



I would remove the unused header includes.


+
+#define CLK_RPMH_ARC_EN_OFFSET 0
+#define CLK_RPMH_VRM_EN_OFFSET 4
+#define CLK_RPMH_VRM_OFF_VAL 0
+#define CLK_RPMH_VRM_ON_VAL 1


Use a bool (true/false) rather than lugging around these two defines.


I would remove these in the next patch.




+#define CLK_RPMH_APPS_RSC_AO_STATE_MASK (BIT(RPMH_WAKE_ONLY_STATE) | \
+BIT(RPMH_ACTIVE_ONLY_STATE))
+#define CLK_RPMH_APPS_RSC_STATE_MASK (BIT(RPMH_WAKE_ONLY_STATE) | \
+ BIT(RPMH_ACTIVE_ONLY_STATE) | \
+ BIT(RPMH_SLEEP_STATE))
+struct clk_rpmh {
+   struct clk_hw hw;
+   const char *res_name;
+   u32 res_addr;
+   u32 res_en_offset;
+   u32 res_on_val;
+   u32 res_off_val;


res_off_val is 0 for all clocks.



They could change if required, so is the reason for keeping it.


+   u32 state;
+   u32 aggr_state;
+   u32 last_sent_aggr_state;


Through the use of some local variables you shouldn't have to lug around
aggr_state and last_sent_aggr_state.



We need to check if the last state and the current state requested, 
has_state_changed().



+   u32 valid_state_mask;
+   struct rpmh_client *rpmh_client;
+   unsigned long rate;
+   struct clk_rpmh *peer;
+};
+
+struct rpmh_cc {
+   struct clk_onecell_data data;
+   struct clk *clks[];
+};
+
+struct clk_rpmh_desc {
+   struct clk_hw **clks;
+   size_t num_clks;
+};
+
+static DEFINE_MUTEX(rpmh_clk_lock);
+
+#define __DEFINE_CLK_RPMH(_platform, _name, _name_active, _res_name, \
+ _res_en_offset, _res_on, _res_off, _rate, \
+ _state_mask, _state_on_mask)\
+   static struct clk_rpmh _platform##_##_name_active;\
+   static struct clk_rpmh _platform##_##_name = {\
+   .res_name = _res_name,\
+   .res_en_offset = _res_en_offset,  \
+   .res_on_val = _res_on,\
+   .res_off_val = _res_off,  \
+   

Re: [[PATCH v2] 1/2] clk: qcom: clk-rpmh: Add QCOM RPMh clock driver

2018-04-13 Thread Taniya Das

Hello Bjorn,

Thank you for the review comments.

On 4/10/2018 11:09 PM, Bjorn Andersson wrote:

On Sun 08 Apr 03:32 PDT 2018, Taniya Das wrote:


From: Amit Nischal 

Add the RPMh clock driver to control the RPMh managed clock resources on
some of the Qualcomm Technologies, Inc. SoCs.

Signed-off-by: David Collins 
Signed-off-by: Amit Nischal 
Signed-off-by: Taniya Das 
---
  drivers/clk/qcom/Kconfig  |   9 +
  drivers/clk/qcom/Makefile |   1 +
  drivers/clk/qcom/clk-rpmh.c   | 394 ++
  include/dt-bindings/clock/qcom,rpmh.h |  25 +++
  4 files changed, 429 insertions(+)
  create mode 100644 drivers/clk/qcom/clk-rpmh.c
  create mode 100644 include/dt-bindings/clock/qcom,rpmh.h

diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index fbf4532..3697a6a 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -112,6 +112,15 @@ config IPQ_GCC_8074
  i2c, USB, SD/eMMC, etc. Select this for the root clock
  of ipq8074.
  
+config MSM_CLK_RPMH


QCOM_CLK_RPMH



Would update it to use "QCOM_CLK_RPMH".


+   tristate "RPMh Clock Driver"
+   depends on COMMON_CLK_QCOM && QCOM_RPMH
+   help
+RPMh manages shared resources on some Qualcomm Technologies, Inc.
+SoCs. It accepts requests from other hardware subsystems via RSC.
+Say Y if you want to support the clocks exposed by RPMh on
+platforms such as sdm845.
+
  config MSM_GCC_8660
tristate "MSM8660 Global Clock Controller"
depends on COMMON_CLK_QCOM
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index 230332c..b7da05b 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_IPQ_GCC_8074) += gcc-ipq8074.o
  obj-$(CONFIG_IPQ_LCC_806X) += lcc-ipq806x.o
  obj-$(CONFIG_MDM_GCC_9615) += gcc-mdm9615.o
  obj-$(CONFIG_MDM_LCC_9615) += lcc-mdm9615.o
+obj-$(CONFIG_MSM_CLK_RPMH) += clk-rpmh.o
  obj-$(CONFIG_MSM_GCC_8660) += gcc-msm8660.o
  obj-$(CONFIG_MSM_GCC_8916) += gcc-msm8916.o
  obj-$(CONFIG_MSM_GCC_8960) += gcc-msm8960.o
diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c
new file mode 100644
index 000..763401f
--- /dev/null
+++ b/drivers/clk/qcom/clk-rpmh.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 


Unused


+#include 
+#include 
+
+#include 
+
+#include "common.h"
+#include "clk-regmap.h"


Unused



I would remove the unused header includes.


+
+#define CLK_RPMH_ARC_EN_OFFSET 0
+#define CLK_RPMH_VRM_EN_OFFSET 4
+#define CLK_RPMH_VRM_OFF_VAL 0
+#define CLK_RPMH_VRM_ON_VAL 1


Use a bool (true/false) rather than lugging around these two defines.


I would remove these in the next patch.




+#define CLK_RPMH_APPS_RSC_AO_STATE_MASK (BIT(RPMH_WAKE_ONLY_STATE) | \
+BIT(RPMH_ACTIVE_ONLY_STATE))
+#define CLK_RPMH_APPS_RSC_STATE_MASK (BIT(RPMH_WAKE_ONLY_STATE) | \
+ BIT(RPMH_ACTIVE_ONLY_STATE) | \
+ BIT(RPMH_SLEEP_STATE))
+struct clk_rpmh {
+   struct clk_hw hw;
+   const char *res_name;
+   u32 res_addr;
+   u32 res_en_offset;
+   u32 res_on_val;
+   u32 res_off_val;


res_off_val is 0 for all clocks.



They could change if required, so is the reason for keeping it.


+   u32 state;
+   u32 aggr_state;
+   u32 last_sent_aggr_state;


Through the use of some local variables you shouldn't have to lug around
aggr_state and last_sent_aggr_state.



We need to check if the last state and the current state requested, 
has_state_changed().



+   u32 valid_state_mask;
+   struct rpmh_client *rpmh_client;
+   unsigned long rate;
+   struct clk_rpmh *peer;
+};
+
+struct rpmh_cc {
+   struct clk_onecell_data data;
+   struct clk *clks[];
+};
+
+struct clk_rpmh_desc {
+   struct clk_hw **clks;
+   size_t num_clks;
+};
+
+static DEFINE_MUTEX(rpmh_clk_lock);
+
+#define __DEFINE_CLK_RPMH(_platform, _name, _name_active, _res_name, \
+ _res_en_offset, _res_on, _res_off, _rate, \
+ _state_mask, _state_on_mask)\
+   static struct clk_rpmh _platform##_##_name_active;\
+   static struct clk_rpmh _platform##_##_name = {\
+   .res_name = _res_name,\
+   .res_en_offset = _res_en_offset,  \
+   .res_on_val = _res_on,\
+   .res_off_val = _res_off,  \
+   .rate = _rate,\
+   .peer = 

Re: [[PATCH v2] 2/2] dt-bindings: clock: Introduce QCOM RPMh clock bindings

2018-04-13 Thread Taniya Das

Hello Bjorn,

Thanks for your review comments.

On 4/10/2018 4:28 AM, Bjorn Andersson wrote:

On Sun 08 Apr 03:32 PDT 2018, Taniya Das wrote:


From: Amit Nischal 

Add RPMh clock device bindings for Qualcomm Technology Inc's SoCs. These
devices would be used for communicating resource state requests to control
the clocks managed by RPMh.

Signed-off-by: Amit Nischal 
Signed-off-by: Taniya Das 
---
  .../devicetree/bindings/clock/qcom,rpmh-clk.txt| 22 ++
  1 file changed, 22 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt

diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt 
b/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
new file mode 100644
index 000..4ade82b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
@@ -0,0 +1,22 @@
+Qualcomm Technologies, Inc. RPMh Clocks
+---
+
+Resource Power Manager Hardened (RPMh) manages shared resources on
+some Qualcomm Technologies Inc. SoCs. It accepts clock requests from
+other hardware subsystems via RSC to control clocks.
+
+Required properties :
+- compatible : shall contain "qcom,rpmh-clk-sdm845"


We have a chance to fix this to Rob's liking, so please make this
"qcom,sdm845-rpmh-clock" (or maybe "qcom,sdm845-rpmhcc"?)


I would fix  it to "qcom,sdm845-rpmh-clk".




+
+- #clock-cells : must contain 1
+
+Example :
+
+#include 
+
+   _rsc {
+   rpmh: clock-controller {


I believe the clock-controller, the power-domain and the bus-scaler are
equal contenders of the label "rpmh", so please make it more specific.
How about "rpmhcc" to mimic previous platforms?



Would update it "rpmhcc".


+   compatible = "qcom,rpmh-clk-sdm845";
+   #clock-cells = <1>;
+   };
+   };


Regards,
Bjorn



--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation.

--


Re: [[PATCH v2] 1/2] clk: qcom: clk-rpmh: Add QCOM RPMh clock driver

2018-04-13 Thread Taniya Das

Hello Rob,

Thank you for the review comments.

On 4/13/2018 10:07 PM, Rob Herring wrote:

On Sun, Apr 08, 2018 at 04:02:12PM +0530, Taniya Das wrote:

From: Amit Nischal 

Add the RPMh clock driver to control the RPMh managed clock resources on
some of the Qualcomm Technologies, Inc. SoCs.

Signed-off-by: David Collins 
Signed-off-by: Amit Nischal 
Signed-off-by: Taniya Das 
---
  drivers/clk/qcom/Kconfig  |   9 +
  drivers/clk/qcom/Makefile |   1 +
  drivers/clk/qcom/clk-rpmh.c   | 394 ++



  include/dt-bindings/clock/qcom,rpmh.h |  25 +++


This goes with the binding patch which should come first in the series.



I would make the change in the next patch series.


  4 files changed, 429 insertions(+)
  create mode 100644 drivers/clk/qcom/clk-rpmh.c
  create mode 100644 include/dt-bindings/clock/qcom,rpmh.h


--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation.

--


Re: [[PATCH v2] 2/2] dt-bindings: clock: Introduce QCOM RPMh clock bindings

2018-04-13 Thread Taniya Das

Hello Bjorn,

Thanks for your review comments.

On 4/10/2018 4:28 AM, Bjorn Andersson wrote:

On Sun 08 Apr 03:32 PDT 2018, Taniya Das wrote:


From: Amit Nischal 

Add RPMh clock device bindings for Qualcomm Technology Inc's SoCs. These
devices would be used for communicating resource state requests to control
the clocks managed by RPMh.

Signed-off-by: Amit Nischal 
Signed-off-by: Taniya Das 
---
  .../devicetree/bindings/clock/qcom,rpmh-clk.txt| 22 ++
  1 file changed, 22 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt

diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt 
b/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
new file mode 100644
index 000..4ade82b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,rpmh-clk.txt
@@ -0,0 +1,22 @@
+Qualcomm Technologies, Inc. RPMh Clocks
+---
+
+Resource Power Manager Hardened (RPMh) manages shared resources on
+some Qualcomm Technologies Inc. SoCs. It accepts clock requests from
+other hardware subsystems via RSC to control clocks.
+
+Required properties :
+- compatible : shall contain "qcom,rpmh-clk-sdm845"


We have a chance to fix this to Rob's liking, so please make this
"qcom,sdm845-rpmh-clock" (or maybe "qcom,sdm845-rpmhcc"?)


I would fix  it to "qcom,sdm845-rpmh-clk".




+
+- #clock-cells : must contain 1
+
+Example :
+
+#include 
+
+   _rsc {
+   rpmh: clock-controller {


I believe the clock-controller, the power-domain and the bus-scaler are
equal contenders of the label "rpmh", so please make it more specific.
How about "rpmhcc" to mimic previous platforms?



Would update it "rpmhcc".


+   compatible = "qcom,rpmh-clk-sdm845";
+   #clock-cells = <1>;
+   };
+   };


Regards,
Bjorn



--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation.

--


Re: [[PATCH v2] 1/2] clk: qcom: clk-rpmh: Add QCOM RPMh clock driver

2018-04-13 Thread Taniya Das

Hello Rob,

Thank you for the review comments.

On 4/13/2018 10:07 PM, Rob Herring wrote:

On Sun, Apr 08, 2018 at 04:02:12PM +0530, Taniya Das wrote:

From: Amit Nischal 

Add the RPMh clock driver to control the RPMh managed clock resources on
some of the Qualcomm Technologies, Inc. SoCs.

Signed-off-by: David Collins 
Signed-off-by: Amit Nischal 
Signed-off-by: Taniya Das 
---
  drivers/clk/qcom/Kconfig  |   9 +
  drivers/clk/qcom/Makefile |   1 +
  drivers/clk/qcom/clk-rpmh.c   | 394 ++



  include/dt-bindings/clock/qcom,rpmh.h |  25 +++


This goes with the binding patch which should come first in the series.



I would make the change in the next patch series.


  4 files changed, 429 insertions(+)
  create mode 100644 drivers/clk/qcom/clk-rpmh.c
  create mode 100644 include/dt-bindings/clock/qcom,rpmh.h


--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation.

--


issues with suspend on Dell XPS 13 2-in-1

2018-04-13 Thread Dennis Gilmore
Hi All,

I have a Dell XPS 13 2-in-1 (9365) that when I supend gets warm and has
much shorter than expected battery life, it is about the same as if the
laptop just runs. I am currently running Fedora 28 with 4.16.2 kernel.

My laptop has NVMe for storage and is configured for AHCI mode in the
bios.

powertop by default shows
>> Bad   VM writeback timeout
   Bad   NMI watchdog should be turned off
   Bad   Autosuspend for unknown USB device 1-7 (138a:0091)
   Bad   Runtime PM for I2C Adapter i2c-0 (i915 gmbus dpc)
   Bad   Runtime PM for I2C Adapter i2c-1 (i915 gmbus dpb)
   Bad   Runtime PM for I2C Adapter i2c-2 (i915 gmbus dpd)
   Bad   Runtime PM for PCI Device Intel Corporation Wireless
8265 / 8275
   Bad   Runtime PM for PCI Device Intel Corporation Device
590c
   Bad   Runtime PM for PCI Device Realtek Semiconductor Co.,
Ltd. RTS525A PCI Express Card Reader
   Bad   Runtime PM for PCI Device Intel Corporation Device
9d3d
   Bad   Runtime PM for PCI Device Sandisk Corp WD Black NVMe
SSD
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP PCI Express Root Port #9
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP Thermal subsystem
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP USB 3.0 xHCI Controller
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP PMC
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP HD Audio
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP PCI Express Root Port #1
   Bad   Runtime PM for PCI Device Intel Corporation Device
9d4b
   Bad   Runtime PM for PCI Device Intel Corporation Xeon E3-
1200 v5/E3-1500 v5/6th Gen Core Processor Thermal Subsystem
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP Integrated Sensor Hub
   Bad   Runtime PM for PCI Device Intel Corporation Device
591e
   Good  Bluetooth device interface status
   Good  Enable Audio codec power management
   Good  Runtime PM for I2C Adapter i2c-8 (Synopsys DesignWare
I2C adapter)
   Good  Autosuspend for USB device Integrated_Webcam_HD
[CNFGE16N092020028362]
   Good  Autosuspend for USB device xHCI Host Controller [usb1]
   Good  Autosuspend for USB device xHCI Host Controller [usb2]
   Good  Runtime PM for I2C Adapter i2c-7 (SMBus I801 adapter
at efa0)
   Good  Autosuspend for unknown USB device 1-2 (8087:0a2b)
   Good  Runtime PM for I2C Adapter i2c-6 (Synopsys DesignWare
I2C adapter)
   Good  I2C Device i2c-DLL077A:01 has no runtime power
management
   Good  I2C Device i2c-WCOM482F:00 has no runtime power
management
   Good  Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP PCI Express Root Port #10
   Good  Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP CSME HECI #1
   Good  Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP Serial IO I2C Controller #1
   Good  Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP SMBus
   Good  Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP PCI Express Root Port #5
   Good  Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP Serial IO I2C Controller #0
   Good  Wake-on-lan status for device virbr0-nic
   Good  Wake-on-lan status for device virbr0
   Good  Wake-on-lan status for device wlp60s0

Regards

Dennis


issues with suspend on Dell XPS 13 2-in-1

2018-04-13 Thread Dennis Gilmore
Hi All,

I have a Dell XPS 13 2-in-1 (9365) that when I supend gets warm and has
much shorter than expected battery life, it is about the same as if the
laptop just runs. I am currently running Fedora 28 with 4.16.2 kernel.

My laptop has NVMe for storage and is configured for AHCI mode in the
bios.

powertop by default shows
>> Bad   VM writeback timeout
   Bad   NMI watchdog should be turned off
   Bad   Autosuspend for unknown USB device 1-7 (138a:0091)
   Bad   Runtime PM for I2C Adapter i2c-0 (i915 gmbus dpc)
   Bad   Runtime PM for I2C Adapter i2c-1 (i915 gmbus dpb)
   Bad   Runtime PM for I2C Adapter i2c-2 (i915 gmbus dpd)
   Bad   Runtime PM for PCI Device Intel Corporation Wireless
8265 / 8275
   Bad   Runtime PM for PCI Device Intel Corporation Device
590c
   Bad   Runtime PM for PCI Device Realtek Semiconductor Co.,
Ltd. RTS525A PCI Express Card Reader
   Bad   Runtime PM for PCI Device Intel Corporation Device
9d3d
   Bad   Runtime PM for PCI Device Sandisk Corp WD Black NVMe
SSD
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP PCI Express Root Port #9
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP Thermal subsystem
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP USB 3.0 xHCI Controller
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP PMC
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP HD Audio
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP PCI Express Root Port #1
   Bad   Runtime PM for PCI Device Intel Corporation Device
9d4b
   Bad   Runtime PM for PCI Device Intel Corporation Xeon E3-
1200 v5/E3-1500 v5/6th Gen Core Processor Thermal Subsystem
   Bad   Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP Integrated Sensor Hub
   Bad   Runtime PM for PCI Device Intel Corporation Device
591e
   Good  Bluetooth device interface status
   Good  Enable Audio codec power management
   Good  Runtime PM for I2C Adapter i2c-8 (Synopsys DesignWare
I2C adapter)
   Good  Autosuspend for USB device Integrated_Webcam_HD
[CNFGE16N092020028362]
   Good  Autosuspend for USB device xHCI Host Controller [usb1]
   Good  Autosuspend for USB device xHCI Host Controller [usb2]
   Good  Runtime PM for I2C Adapter i2c-7 (SMBus I801 adapter
at efa0)
   Good  Autosuspend for unknown USB device 1-2 (8087:0a2b)
   Good  Runtime PM for I2C Adapter i2c-6 (Synopsys DesignWare
I2C adapter)
   Good  I2C Device i2c-DLL077A:01 has no runtime power
management
   Good  I2C Device i2c-WCOM482F:00 has no runtime power
management
   Good  Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP PCI Express Root Port #10
   Good  Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP CSME HECI #1
   Good  Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP Serial IO I2C Controller #1
   Good  Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP SMBus
   Good  Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP PCI Express Root Port #5
   Good  Runtime PM for PCI Device Intel Corporation Sunrise
Point-LP Serial IO I2C Controller #0
   Good  Wake-on-lan status for device virbr0-nic
   Good  Wake-on-lan status for device virbr0
   Good  Wake-on-lan status for device wlp60s0

Regards

Dennis


Re: [PATCH 1/3] infiniband: i40iw: Replace GFP_ATOMIC with GFP_KERNEL in i40iw_add_mqh_4

2018-04-13 Thread Shiraz Saleem
On Wed, Apr 11, 2018 at 10:53:13AM -0400, Dennis Dalessandro wrote:
> On 4/11/2018 3:32 AM, Jia-Ju Bai wrote:
> > i40iw_add_mqh_4() is never called in atomic context, because it
> > calls rtnl_lock() that can sleep.
> > 
> > Despite never getting called from atomic context,
> > i40iw_add_mqh_4() calls kzalloc() with GFP_ATOMIC,
> > which does not sleep for allocation.
> > GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
> > which can sleep and improve the possibility of sucessful allocation.
> 
> Just a general comment. I don't know that this is the greatest idea. I can
> imagine instances where sleeping is OK as far as how the code is written,
> but for performance reasons you would rather fail than sleep.
> 
> As to whether that is the case here I'll let the i40iw folks comment.
>

In this case, the changes Jia made look safe and not in the perf. path. Thanks!

 
> > This is found by a static analysis tool named DCNS written by myself.
> > And I also manually check it.
> You should probably post a pointer to your tool.
> 
> -Denny


Re: [PATCH 1/3] infiniband: i40iw: Replace GFP_ATOMIC with GFP_KERNEL in i40iw_add_mqh_4

2018-04-13 Thread Shiraz Saleem
On Wed, Apr 11, 2018 at 10:53:13AM -0400, Dennis Dalessandro wrote:
> On 4/11/2018 3:32 AM, Jia-Ju Bai wrote:
> > i40iw_add_mqh_4() is never called in atomic context, because it
> > calls rtnl_lock() that can sleep.
> > 
> > Despite never getting called from atomic context,
> > i40iw_add_mqh_4() calls kzalloc() with GFP_ATOMIC,
> > which does not sleep for allocation.
> > GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
> > which can sleep and improve the possibility of sucessful allocation.
> 
> Just a general comment. I don't know that this is the greatest idea. I can
> imagine instances where sleeping is OK as far as how the code is written,
> but for performance reasons you would rather fail than sleep.
> 
> As to whether that is the case here I'll let the i40iw folks comment.
>

In this case, the changes Jia made look safe and not in the perf. path. Thanks!

 
> > This is found by a static analysis tool named DCNS written by myself.
> > And I also manually check it.
> You should probably post a pointer to your tool.
> 
> -Denny


Re: [PATCH 3/3] infiniband: i40iw: Replace GFP_ATOMIC with GFP_KERNEL in i40iw_l2param_change

2018-04-13 Thread Shiraz Saleem
On Wed, Apr 11, 2018 at 03:33:06PM +0800, Jia-Ju Bai wrote:
> i40iw_l2param_change() is never called in atomic context.
> 
> i40iw_make_listen_node() is only set as ".l2_param_change" 
> in struct i40e_client_ops, and this function pointer is not called 
> in atomic context.
> 
> Despite never getting called from atomic context,
> i40iw_l2param_change() calls kzalloc() with GFP_ATOMIC,
> which does not sleep for allocation.
> GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
> which can sleep and improve the possibility of sucessful allocation.
> 
> This is found by a static analysis tool named DCNS written by myself.
> And I also manually check it.
> 
> Signed-off-by: Jia-Ju Bai 
> ---
>

Acked-by: Shiraz Saleem  


Re: [PATCH 3/3] infiniband: i40iw: Replace GFP_ATOMIC with GFP_KERNEL in i40iw_l2param_change

2018-04-13 Thread Shiraz Saleem
On Wed, Apr 11, 2018 at 03:33:06PM +0800, Jia-Ju Bai wrote:
> i40iw_l2param_change() is never called in atomic context.
> 
> i40iw_make_listen_node() is only set as ".l2_param_change" 
> in struct i40e_client_ops, and this function pointer is not called 
> in atomic context.
> 
> Despite never getting called from atomic context,
> i40iw_l2param_change() calls kzalloc() with GFP_ATOMIC,
> which does not sleep for allocation.
> GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
> which can sleep and improve the possibility of sucessful allocation.
> 
> This is found by a static analysis tool named DCNS written by myself.
> And I also manually check it.
> 
> Signed-off-by: Jia-Ju Bai 
> ---
>

Acked-by: Shiraz Saleem  


Re: [PATCH 1/3] infiniband: i40iw: Replace GFP_ATOMIC with GFP_KERNEL in i40iw_add_mqh_4

2018-04-13 Thread Shiraz Saleem
On Wed, Apr 11, 2018 at 03:32:25PM +0800, Jia-Ju Bai wrote:
> i40iw_add_mqh_4() is never called in atomic context, because it 
> calls rtnl_lock() that can sleep.
> 
> Despite never getting called from atomic context,
> i40iw_add_mqh_4() calls kzalloc() with GFP_ATOMIC,
> which does not sleep for allocation.
> GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
> which can sleep and improve the possibility of sucessful allocation.
> 
> This is found by a static analysis tool named DCNS written by myself.
> And I also manually check it.
> 
> Signed-off-by: Jia-Ju Bai 
> ---

Acked-by: Shiraz Saleem 


Re: [PATCH 1/3] infiniband: i40iw: Replace GFP_ATOMIC with GFP_KERNEL in i40iw_add_mqh_4

2018-04-13 Thread Shiraz Saleem
On Wed, Apr 11, 2018 at 03:32:25PM +0800, Jia-Ju Bai wrote:
> i40iw_add_mqh_4() is never called in atomic context, because it 
> calls rtnl_lock() that can sleep.
> 
> Despite never getting called from atomic context,
> i40iw_add_mqh_4() calls kzalloc() with GFP_ATOMIC,
> which does not sleep for allocation.
> GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
> which can sleep and improve the possibility of sucessful allocation.
> 
> This is found by a static analysis tool named DCNS written by myself.
> And I also manually check it.
> 
> Signed-off-by: Jia-Ju Bai 
> ---

Acked-by: Shiraz Saleem 


Re: [PATCH 2/3] infiniband: i40iw: Replace GFP_ATOMIC with GFP_KERNEL in i40iw_make_listen_node

2018-04-13 Thread Shiraz Saleem
On Wed, Apr 11, 2018 at 03:32:48PM +0800, Jia-Ju Bai wrote:
> i40iw_make_listen_node() is never called in atomic context.
> 
> i40iw_make_listen_node() is only called by i40iw_create_listen, which is 
> set as ".create_listen" in struct iw_cm_verbs.
> 
> Despite never getting called from atomic context,
> i40iw_make_listen_node() calls kzalloc() with GFP_ATOMIC,
> which does not sleep for allocation.
> GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
> which can sleep and improve the possibility of sucessful allocation.
> 
> This is found by a static analysis tool named DCNS written by myself.
> And I also manually check it.
> 
> Signed-off-by: Jia-Ju Bai 
> ---
>

Acked-by: Shiraz Saleem  


Re: [PATCH 2/3] infiniband: i40iw: Replace GFP_ATOMIC with GFP_KERNEL in i40iw_make_listen_node

2018-04-13 Thread Shiraz Saleem
On Wed, Apr 11, 2018 at 03:32:48PM +0800, Jia-Ju Bai wrote:
> i40iw_make_listen_node() is never called in atomic context.
> 
> i40iw_make_listen_node() is only called by i40iw_create_listen, which is 
> set as ".create_listen" in struct iw_cm_verbs.
> 
> Despite never getting called from atomic context,
> i40iw_make_listen_node() calls kzalloc() with GFP_ATOMIC,
> which does not sleep for allocation.
> GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
> which can sleep and improve the possibility of sucessful allocation.
> 
> This is found by a static analysis tool named DCNS written by myself.
> And I also manually check it.
> 
> Signed-off-by: Jia-Ju Bai 
> ---
>

Acked-by: Shiraz Saleem  


Re: general protection fault in vmx_vcpu_run

2018-04-13 Thread syzbot

syzbot has found reproducer for the following crash on upstream commit
1bad9ce155a7c010a9a5f3261ad12a6a8eccfb2c (Fri Apr 13 19:27:11 2018 +)
Merge tag 'sh-for-4.17' of git://git.libc.org/linux-sh
syzbot dashboard link:  
https://syzkaller.appspot.com/bug?extid=cc483201a3c6436d3550


So far this crash happened 4 times on upstream.
C reproducer: https://syzkaller.appspot.com/x/repro.c?id=6257386297753600
syzkaller reproducer:  
https://syzkaller.appspot.com/x/repro.syz?id=4808329293463552
Raw console output:  
https://syzkaller.appspot.com/x/log.txt?id=4943675322793984
Kernel config:  
https://syzkaller.appspot.com/x/.config?id=-5947642240294114534

compiler: gcc (GCC) 8.0.1 20180413 (experimental)

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+cc483201a3c6436d3...@syzkaller.appspotmail.com
It will help syzbot understand when the bug is fixed.

IPv6: ADDRCONF(NETDEV_CHANGE): veth1: link becomes ready
IPv6: ADDRCONF(NETDEV_CHANGE): veth0: link becomes ready
8021q: adding VLAN 0 to HW filter on device team0
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 0 PID: 6472 Comm: syzkaller667776 Not tainted 4.16.0+ #1
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

RIP: 0010:vmx_vcpu_run+0x95f/0x25f0 arch/x86/kvm/vmx.c:9746
RSP: 0018:8801c95bf368 EFLAGS: 00010002
RAX: 8801b44df6e8 RBX: 8801ada0ec40 RCX: 1100392b7e78
RDX:  RSI: 81467b15 RDI: 8801ada0ec50
RBP: 8801b44df790 R08: 8801c4efe780 R09: fbfff1141218
R10: fbfff1141218 R11: 88a090c3 R12: 8801b186aa90
R13: 8801ae61e000 R14: dc00 R15: 8801ae61e3e0
FS:  7fa147982700() GS:8801db00() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2:  CR3: 0001d780d000 CR4: 001426f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
Code: 8b a9 68 03 00 00 4c 8b b1 70 03 00 00 4c 8b b9 78 03 00 00 48 8b 89  
08 03 00 00 75 05 0f 01 c2 eb 03 0f 01 c3 48 89 4c 24 08 59 <0f> 96 81 88  
56 00 00 48 89 81 00 03 00 00 48 89 99 18 03 00 00

RIP: vmx_vcpu_run+0x95f/0x25f0 arch/x86/kvm/vmx.c:9746 RSP: 8801c95bf368
---[ end trace ffd91ebc3bb06b01 ]---
Kernel panic - not syncing: Fatal exception
Shutting down cpus with NMI
Dumping ftrace buffer:
   (ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..



Re: general protection fault in vmx_vcpu_run

2018-04-13 Thread syzbot

syzbot has found reproducer for the following crash on upstream commit
1bad9ce155a7c010a9a5f3261ad12a6a8eccfb2c (Fri Apr 13 19:27:11 2018 +)
Merge tag 'sh-for-4.17' of git://git.libc.org/linux-sh
syzbot dashboard link:  
https://syzkaller.appspot.com/bug?extid=cc483201a3c6436d3550


So far this crash happened 4 times on upstream.
C reproducer: https://syzkaller.appspot.com/x/repro.c?id=6257386297753600
syzkaller reproducer:  
https://syzkaller.appspot.com/x/repro.syz?id=4808329293463552
Raw console output:  
https://syzkaller.appspot.com/x/log.txt?id=4943675322793984
Kernel config:  
https://syzkaller.appspot.com/x/.config?id=-5947642240294114534

compiler: gcc (GCC) 8.0.1 20180413 (experimental)

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+cc483201a3c6436d3...@syzkaller.appspotmail.com
It will help syzbot understand when the bug is fixed.

IPv6: ADDRCONF(NETDEV_CHANGE): veth1: link becomes ready
IPv6: ADDRCONF(NETDEV_CHANGE): veth0: link becomes ready
8021q: adding VLAN 0 to HW filter on device team0
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 0 PID: 6472 Comm: syzkaller667776 Not tainted 4.16.0+ #1
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

RIP: 0010:vmx_vcpu_run+0x95f/0x25f0 arch/x86/kvm/vmx.c:9746
RSP: 0018:8801c95bf368 EFLAGS: 00010002
RAX: 8801b44df6e8 RBX: 8801ada0ec40 RCX: 1100392b7e78
RDX:  RSI: 81467b15 RDI: 8801ada0ec50
RBP: 8801b44df790 R08: 8801c4efe780 R09: fbfff1141218
R10: fbfff1141218 R11: 88a090c3 R12: 8801b186aa90
R13: 8801ae61e000 R14: dc00 R15: 8801ae61e3e0
FS:  7fa147982700() GS:8801db00() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2:  CR3: 0001d780d000 CR4: 001426f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
Code: 8b a9 68 03 00 00 4c 8b b1 70 03 00 00 4c 8b b9 78 03 00 00 48 8b 89  
08 03 00 00 75 05 0f 01 c2 eb 03 0f 01 c3 48 89 4c 24 08 59 <0f> 96 81 88  
56 00 00 48 89 81 00 03 00 00 48 89 99 18 03 00 00

RIP: vmx_vcpu_run+0x95f/0x25f0 arch/x86/kvm/vmx.c:9746 RSP: 8801c95bf368
---[ end trace ffd91ebc3bb06b01 ]---
Kernel panic - not syncing: Fatal exception
Shutting down cpus with NMI
Dumping ftrace buffer:
   (ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..



Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

2018-04-13 Thread Johannes Hirte
On 2018 Feb 01, Yazen Ghannam wrote:
> From: Yazen Ghannam 
> 
> The block address is saved after the block is initialized when
> threshold_init_device() is called.
> 
> Use the saved block address, if available, rather than trying to
> rediscover it.
> 
> We can avoid some *on_cpu() calls in the init path that will cause a
> call trace when resuming from suspend.
> 
> Cc:  # 4.14.x
> Signed-off-by: Yazen Ghannam 
> ---
>  arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
> b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> index bf53b4549a17..8c4f8f30c779 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> @@ -436,6 +436,21 @@ static u32 get_block_address(unsigned int cpu, u32 
> current_addr, u32 low, u32 hi
>  {
>   u32 addr = 0, offset = 0;
>  
> + if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
> + return addr;
> +
> + /* Get address from already initialized block. */
> + if (per_cpu(threshold_banks, cpu)) {
> + struct threshold_bank *bankp = per_cpu(threshold_banks, 
> cpu)[bank];
> +
> + if (bankp && bankp->blocks) {
> + struct threshold_block *blockp = >blocks[block];
> +
> + if (blockp)
> + return blockp->address;
> + }
> + }
> +
>   if (mce_flags.smca) {
>   if (smca_get_bank_type(bank) == SMCA_RESERVED)
>   return addr;
> -- 
> 2.14.1

I have a KASAN: slab-out-of-bounds, and git bisect points me to this
change:

Apr 13 00:40:32 probook kernel: 
==
Apr 13 00:40:32 probook kernel: BUG: KASAN: slab-out-of-bounds in 
get_block_address.isra.3+0x1e9/0x520
Apr 13 00:40:32 probook kernel: Read of size 4 at addr 8803f165ddf4 by task 
swapper/0/1
Apr 13 00:40:32 probook kernel: 
Apr 13 00:40:32 probook kernel: CPU: 1 PID: 1 Comm: swapper/0 Not tainted 
4.16.0-10757-g4ca8ba4ccff9 #532
Apr 13 00:40:32 probook kernel: Hardware name: HP HP ProBook 645 G2/80FE, BIOS 
N77 Ver. 01.12 12/19/2017
Apr 13 00:40:32 probook kernel: Call Trace:
Apr 13 00:40:32 probook kernel:  dump_stack+0x5b/0x8b
Apr 13 00:40:32 probook kernel:  ? get_block_address.isra.3+0x1e9/0x520
Apr 13 00:40:32 probook kernel:  print_address_description+0x65/0x270
Apr 13 00:40:32 probook kernel:  ? get_block_address.isra.3+0x1e9/0x520
Apr 13 00:40:32 probook kernel:  kasan_report+0x232/0x350
Apr 13 00:40:32 probook kernel:  get_block_address.isra.3+0x1e9/0x520
Apr 13 00:40:32 probook kernel:  ? kobject_init_and_add+0xde/0x130
Apr 13 00:40:32 probook kernel:  ? get_name+0x390/0x390
Apr 13 00:40:32 probook kernel:  ? kasan_unpoison_shadow+0x30/0x40
Apr 13 00:40:32 probook kernel:  ? kasan_kmalloc+0xa0/0xd0
Apr 13 00:40:32 probook kernel:  allocate_threshold_blocks+0x12c/0xc60
Apr 13 00:40:32 probook kernel:  ? kobject_add_internal+0x800/0x800
Apr 13 00:40:32 probook kernel:  ? get_block_address.isra.3+0x520/0x520
Apr 13 00:40:32 probook kernel:  ? kasan_kmalloc+0xa0/0xd0
Apr 13 00:40:32 probook kernel:  mce_threshold_create_device+0x35b/0x990
Apr 13 00:40:32 probook kernel:  ? init_special_inode+0x1d0/0x230
Apr 13 00:40:32 probook kernel:  threshold_init_device+0x98/0xa7
Apr 13 00:40:32 probook kernel:  ? mcheck_vendor_init_severity+0x43/0x43
Apr 13 00:40:32 probook kernel:  do_one_initcall+0x76/0x30c
Apr 13 00:40:32 probook kernel:  ? 
trace_event_raw_event_initcall_finish+0x190/0x190
Apr 13 00:40:32 probook kernel:  ? kasan_unpoison_shadow+0xb/0x40
Apr 13 00:40:32 probook kernel:  ? kasan_unpoison_shadow+0x30/0x40
Apr 13 00:40:32 probook kernel:  kernel_init_freeable+0x3d6/0x471
Apr 13 00:40:32 probook kernel:  ? rest_init+0xf0/0xf0
Apr 13 00:40:32 probook kernel:  kernel_init+0xa/0x120
Apr 13 00:40:32 probook kernel:  ? rest_init+0xf0/0xf0
Apr 13 00:40:32 probook kernel:  ret_from_fork+0x22/0x40
Apr 13 00:40:32 probook kernel:
Apr 13 00:40:32 probook kernel: Allocated by task 1:
Apr 13 00:40:32 probook kernel:  kasan_kmalloc+0xa0/0xd0
Apr 13 00:40:32 probook kernel:  kmem_cache_alloc_trace+0xf3/0x1f0
Apr 13 00:40:32 probook kernel:  allocate_threshold_blocks+0x1bc/0xc60
Apr 13 00:40:32 probook kernel:  mce_threshold_create_device+0x35b/0x990
Apr 13 00:40:32 probook kernel:  threshold_init_device+0x98/0xa7
Apr 13 00:40:32 probook kernel:  do_one_initcall+0x76/0x30c
Apr 13 00:40:32 probook kernel:  kernel_init_freeable+0x3d6/0x471
Apr 13 00:40:32 probook kernel:  kernel_init+0xa/0x120
Apr 13 00:40:32 probook kernel:  ret_from_fork+0x22/0x40
Apr 13 00:40:32 probook kernel: 
Apr 13 00:40:32 probook kernel: Freed by task 0:
Apr 13 00:40:32 probook kernel: (stack is not available)
Apr 13 00:40:32 probook kernel: 
Apr 13 00:40:32 probook kernel: The buggy address belongs to the object at 
8803f165dd80
 

Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

2018-04-13 Thread Johannes Hirte
On 2018 Feb 01, Yazen Ghannam wrote:
> From: Yazen Ghannam 
> 
> The block address is saved after the block is initialized when
> threshold_init_device() is called.
> 
> Use the saved block address, if available, rather than trying to
> rediscover it.
> 
> We can avoid some *on_cpu() calls in the init path that will cause a
> call trace when resuming from suspend.
> 
> Cc:  # 4.14.x
> Signed-off-by: Yazen Ghannam 
> ---
>  arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
> b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> index bf53b4549a17..8c4f8f30c779 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> @@ -436,6 +436,21 @@ static u32 get_block_address(unsigned int cpu, u32 
> current_addr, u32 low, u32 hi
>  {
>   u32 addr = 0, offset = 0;
>  
> + if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
> + return addr;
> +
> + /* Get address from already initialized block. */
> + if (per_cpu(threshold_banks, cpu)) {
> + struct threshold_bank *bankp = per_cpu(threshold_banks, 
> cpu)[bank];
> +
> + if (bankp && bankp->blocks) {
> + struct threshold_block *blockp = >blocks[block];
> +
> + if (blockp)
> + return blockp->address;
> + }
> + }
> +
>   if (mce_flags.smca) {
>   if (smca_get_bank_type(bank) == SMCA_RESERVED)
>   return addr;
> -- 
> 2.14.1

I have a KASAN: slab-out-of-bounds, and git bisect points me to this
change:

Apr 13 00:40:32 probook kernel: 
==
Apr 13 00:40:32 probook kernel: BUG: KASAN: slab-out-of-bounds in 
get_block_address.isra.3+0x1e9/0x520
Apr 13 00:40:32 probook kernel: Read of size 4 at addr 8803f165ddf4 by task 
swapper/0/1
Apr 13 00:40:32 probook kernel: 
Apr 13 00:40:32 probook kernel: CPU: 1 PID: 1 Comm: swapper/0 Not tainted 
4.16.0-10757-g4ca8ba4ccff9 #532
Apr 13 00:40:32 probook kernel: Hardware name: HP HP ProBook 645 G2/80FE, BIOS 
N77 Ver. 01.12 12/19/2017
Apr 13 00:40:32 probook kernel: Call Trace:
Apr 13 00:40:32 probook kernel:  dump_stack+0x5b/0x8b
Apr 13 00:40:32 probook kernel:  ? get_block_address.isra.3+0x1e9/0x520
Apr 13 00:40:32 probook kernel:  print_address_description+0x65/0x270
Apr 13 00:40:32 probook kernel:  ? get_block_address.isra.3+0x1e9/0x520
Apr 13 00:40:32 probook kernel:  kasan_report+0x232/0x350
Apr 13 00:40:32 probook kernel:  get_block_address.isra.3+0x1e9/0x520
Apr 13 00:40:32 probook kernel:  ? kobject_init_and_add+0xde/0x130
Apr 13 00:40:32 probook kernel:  ? get_name+0x390/0x390
Apr 13 00:40:32 probook kernel:  ? kasan_unpoison_shadow+0x30/0x40
Apr 13 00:40:32 probook kernel:  ? kasan_kmalloc+0xa0/0xd0
Apr 13 00:40:32 probook kernel:  allocate_threshold_blocks+0x12c/0xc60
Apr 13 00:40:32 probook kernel:  ? kobject_add_internal+0x800/0x800
Apr 13 00:40:32 probook kernel:  ? get_block_address.isra.3+0x520/0x520
Apr 13 00:40:32 probook kernel:  ? kasan_kmalloc+0xa0/0xd0
Apr 13 00:40:32 probook kernel:  mce_threshold_create_device+0x35b/0x990
Apr 13 00:40:32 probook kernel:  ? init_special_inode+0x1d0/0x230
Apr 13 00:40:32 probook kernel:  threshold_init_device+0x98/0xa7
Apr 13 00:40:32 probook kernel:  ? mcheck_vendor_init_severity+0x43/0x43
Apr 13 00:40:32 probook kernel:  do_one_initcall+0x76/0x30c
Apr 13 00:40:32 probook kernel:  ? 
trace_event_raw_event_initcall_finish+0x190/0x190
Apr 13 00:40:32 probook kernel:  ? kasan_unpoison_shadow+0xb/0x40
Apr 13 00:40:32 probook kernel:  ? kasan_unpoison_shadow+0x30/0x40
Apr 13 00:40:32 probook kernel:  kernel_init_freeable+0x3d6/0x471
Apr 13 00:40:32 probook kernel:  ? rest_init+0xf0/0xf0
Apr 13 00:40:32 probook kernel:  kernel_init+0xa/0x120
Apr 13 00:40:32 probook kernel:  ? rest_init+0xf0/0xf0
Apr 13 00:40:32 probook kernel:  ret_from_fork+0x22/0x40
Apr 13 00:40:32 probook kernel:
Apr 13 00:40:32 probook kernel: Allocated by task 1:
Apr 13 00:40:32 probook kernel:  kasan_kmalloc+0xa0/0xd0
Apr 13 00:40:32 probook kernel:  kmem_cache_alloc_trace+0xf3/0x1f0
Apr 13 00:40:32 probook kernel:  allocate_threshold_blocks+0x1bc/0xc60
Apr 13 00:40:32 probook kernel:  mce_threshold_create_device+0x35b/0x990
Apr 13 00:40:32 probook kernel:  threshold_init_device+0x98/0xa7
Apr 13 00:40:32 probook kernel:  do_one_initcall+0x76/0x30c
Apr 13 00:40:32 probook kernel:  kernel_init_freeable+0x3d6/0x471
Apr 13 00:40:32 probook kernel:  kernel_init+0xa/0x120
Apr 13 00:40:32 probook kernel:  ret_from_fork+0x22/0x40
Apr 13 00:40:32 probook kernel: 
Apr 13 00:40:32 probook kernel: Freed by task 0:
Apr 13 00:40:32 probook kernel: (stack is not available)
Apr 13 00:40:32 probook kernel: 
Apr 13 00:40:32 probook kernel: The buggy address belongs to the object at 
8803f165dd80
 which belongs to the cache kmalloc-128 of size 128
Apr 13 00:40:32 

Re: [PATCH v2 1/2] gpio: dwapb: Add support for 1 interrupt per port A GPIO

2018-04-13 Thread kbuild test robot
Hi Phil,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on gpio/for-next]
[also build test ERROR on v4.16 next-20180413]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Phil-Edworthy/gpio-dwapb-Add-support-for-1-interrupt-per-port-A-GPIO/20180414-042450
base:   https://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git 
for-next
config: x86_64-randconfig-ne0-04140637 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All errors (new ones prefixed by >>):

>> ERROR: "of_irq_parse_one" [drivers/gpio/gpio-dwapb.ko] undefined!

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH v2 1/2] gpio: dwapb: Add support for 1 interrupt per port A GPIO

2018-04-13 Thread kbuild test robot
Hi Phil,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on gpio/for-next]
[also build test ERROR on v4.16 next-20180413]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Phil-Edworthy/gpio-dwapb-Add-support-for-1-interrupt-per-port-A-GPIO/20180414-042450
base:   https://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git 
for-next
config: x86_64-randconfig-ne0-04140637 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All errors (new ones prefixed by >>):

>> ERROR: "of_irq_parse_one" [drivers/gpio/gpio-dwapb.ko] undefined!

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


mmotm 2018-04-13-17-28 uploaded

2018-04-13 Thread akpm
The mm-of-the-moment snapshot 2018-04-13-17-28 has been uploaded to

   http://www.ozlabs.org/~akpm/mmotm/

mmotm-readme.txt says

README for mm-of-the-moment:

http://www.ozlabs.org/~akpm/mmotm/

This is a snapshot of my -mm patch queue.  Uploaded at random hopefully
more than once a week.

You will need quilt to apply these patches to the latest Linus release (4.x
or 4.x-rcY).  The series file is in broken-out.tar.gz and is duplicated in
http://ozlabs.org/~akpm/mmotm/series

The file broken-out.tar.gz contains two datestamp files: .DATE and
.DATE--mm-dd-hh-mm-ss.  Both contain the string -mm-dd-hh-mm-ss,
followed by the base kernel version against which this patch series is to
be applied.

This tree is partially included in linux-next.  To see which patches are
included in linux-next, consult the `series' file.  Only the patches
within the #NEXT_PATCHES_START/#NEXT_PATCHES_END markers are included in
linux-next.

A git tree which contains the memory management portion of this tree is
maintained at git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
by Michal Hocko.  It contains the patches which are between the
"#NEXT_PATCHES_START mm" and "#NEXT_PATCHES_END" markers, from the series
file, http://www.ozlabs.org/~akpm/mmotm/series.


A full copy of the full kernel tree with the linux-next and mmotm patches
already applied is available through git within an hour of the mmotm
release.  Individual mmotm releases are tagged.  The master branch always
points to the latest release, so it's constantly rebasing.

http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/

To develop on top of mmotm git:

  $ git remote add mmotm 
git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
  $ git remote update mmotm
  $ git checkout -b topic mmotm/master
  
  $ git send-email mmotm/master.. [...]

To rebase a branch with older patches to a new mmotm release:

  $ git remote update mmotm
  $ git rebase --onto mmotm/master  topic




The directory http://www.ozlabs.org/~akpm/mmots/ (mm-of-the-second)
contains daily snapshots of the -mm tree.  It is updated more frequently
than mmotm, and is untested.

A git copy of this tree is available at

http://git.cmpxchg.org/cgit.cgi/linux-mmots.git/

and use of this tree is similar to
http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/, described above.


This mmotm tree contains the following patches against 4.16:
(patches marked "*" will be included in linux-next)

  origin.patch
  i-need-old-gcc.patch
* resource-fix-integer-overflow-at-reallocation-v1.patch
* mm-gup_benchmark-handle-gup-failures.patch
* gup-return-efault-on-access_ok-failure.patch
* mm-gup-document-return-value.patch
* mm-filemap-provide-dummy-filemap_page_mkwrite-for-nommu.patch
* ipc-shm-fix-use-after-free-of-shm-file-via-remap_file_pages.patch
* kexec-export-pg_swapbacked-to-vmcoreinfo.patch
* mm-slab-reschedule-cache_reap-on-the-same-cpu.patch
* proc-revalidate-misc-dentries.patch
* kexec_file-make-an-use-of-purgatory-optional.patch
* kexec_filex86powerpc-factor-out-kexec_file_ops-functions.patch
* x86-kexec_file-purge-system-ram-walking-from-prepare_elf64_headers.patch
* x86-kexec_file-remove-x86_64-dependency-from-prepare_elf64_headers.patch
* x86-kexec_file-lift-crash_max_ranges-limit-on-crash_mem-buffer.patch
* x86-kexec_file-clean-up-prepare_elf64_headers.patch
* kexec_file-x86-move-re-factored-code-to-generic-side.patch
* kexec_file-silence-compile-warnings.patch
* kexec_file-remove-checks-in-kexec_purgatory_load.patch
* kexec_file-make-purgatory_info-ehdr-const.patch
* kexec_file-search-symbols-in-read-only-kexec_purgatory.patch
* kexec_file-use-read-only-sections-in-arch_kexec_apply_relocations.patch
* kexec_file-split-up-__kexec_load_puragory.patch
* kexec_file-remove-unneeded-for-loop-in-kexec_purgatory_setup_sechdrs.patch
* kexec_file-remove-unneeded-variables-in-kexec_purgatory_setup_sechdrs.patch
* kexec_file-remove-mis-use-of-sh_offset-field-during-purgatory-load.patch
* kexec_file-allow-archs-to-set-purgatory-load-address.patch
* kexec_file-move-purgatories-sha256-to-common-code.patch
* mm-pagemap-fix-swap-offset-value-for-pmd-migration-entry.patch
* mm-pagemap-fix-swap-offset-value-for-pmd-migration-entry-fix.patch
* writeback-safer-lock-nesting.patch
* writeback-safer-lock-nesting-fix.patch
* writeback-safer-lock-nesting-v4.patch
* mm-enable-thp-migration-for-shmem-thp.patch
* memcg-remove-memcg_cgroup-id-from-idr-on-mem_cgroup_css_alloc-failure.patch
* rapidio-fix-rio_dma_transfer-error-handling.patch
* kasan-add-no_sanitize-attribute-for-clang-builds.patch
* maintainers-add-personal-addresses-for-sascha-and-me.patch
* fs-dcachec-re-add-cond_resched-in-shrink_dcache_parent.patch
* autofs-mount-point-create-should-honour-passed-in-mode.patch
* ocfs2-submit-another-bio-if-current-bio-is-full.patch
* proc-revalidate-kernel-thread-inodes-to-root-root.patch
* proc-fix-proc-loadavg-regression.patch
* arm-arch-arm-include-asm-pageh-needs-personalityh.patch
* 

mmotm 2018-04-13-17-28 uploaded

2018-04-13 Thread akpm
The mm-of-the-moment snapshot 2018-04-13-17-28 has been uploaded to

   http://www.ozlabs.org/~akpm/mmotm/

mmotm-readme.txt says

README for mm-of-the-moment:

http://www.ozlabs.org/~akpm/mmotm/

This is a snapshot of my -mm patch queue.  Uploaded at random hopefully
more than once a week.

You will need quilt to apply these patches to the latest Linus release (4.x
or 4.x-rcY).  The series file is in broken-out.tar.gz and is duplicated in
http://ozlabs.org/~akpm/mmotm/series

The file broken-out.tar.gz contains two datestamp files: .DATE and
.DATE--mm-dd-hh-mm-ss.  Both contain the string -mm-dd-hh-mm-ss,
followed by the base kernel version against which this patch series is to
be applied.

This tree is partially included in linux-next.  To see which patches are
included in linux-next, consult the `series' file.  Only the patches
within the #NEXT_PATCHES_START/#NEXT_PATCHES_END markers are included in
linux-next.

A git tree which contains the memory management portion of this tree is
maintained at git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
by Michal Hocko.  It contains the patches which are between the
"#NEXT_PATCHES_START mm" and "#NEXT_PATCHES_END" markers, from the series
file, http://www.ozlabs.org/~akpm/mmotm/series.


A full copy of the full kernel tree with the linux-next and mmotm patches
already applied is available through git within an hour of the mmotm
release.  Individual mmotm releases are tagged.  The master branch always
points to the latest release, so it's constantly rebasing.

http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/

To develop on top of mmotm git:

  $ git remote add mmotm 
git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
  $ git remote update mmotm
  $ git checkout -b topic mmotm/master
  
  $ git send-email mmotm/master.. [...]

To rebase a branch with older patches to a new mmotm release:

  $ git remote update mmotm
  $ git rebase --onto mmotm/master  topic




The directory http://www.ozlabs.org/~akpm/mmots/ (mm-of-the-second)
contains daily snapshots of the -mm tree.  It is updated more frequently
than mmotm, and is untested.

A git copy of this tree is available at

http://git.cmpxchg.org/cgit.cgi/linux-mmots.git/

and use of this tree is similar to
http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/, described above.


This mmotm tree contains the following patches against 4.16:
(patches marked "*" will be included in linux-next)

  origin.patch
  i-need-old-gcc.patch
* resource-fix-integer-overflow-at-reallocation-v1.patch
* mm-gup_benchmark-handle-gup-failures.patch
* gup-return-efault-on-access_ok-failure.patch
* mm-gup-document-return-value.patch
* mm-filemap-provide-dummy-filemap_page_mkwrite-for-nommu.patch
* ipc-shm-fix-use-after-free-of-shm-file-via-remap_file_pages.patch
* kexec-export-pg_swapbacked-to-vmcoreinfo.patch
* mm-slab-reschedule-cache_reap-on-the-same-cpu.patch
* proc-revalidate-misc-dentries.patch
* kexec_file-make-an-use-of-purgatory-optional.patch
* kexec_filex86powerpc-factor-out-kexec_file_ops-functions.patch
* x86-kexec_file-purge-system-ram-walking-from-prepare_elf64_headers.patch
* x86-kexec_file-remove-x86_64-dependency-from-prepare_elf64_headers.patch
* x86-kexec_file-lift-crash_max_ranges-limit-on-crash_mem-buffer.patch
* x86-kexec_file-clean-up-prepare_elf64_headers.patch
* kexec_file-x86-move-re-factored-code-to-generic-side.patch
* kexec_file-silence-compile-warnings.patch
* kexec_file-remove-checks-in-kexec_purgatory_load.patch
* kexec_file-make-purgatory_info-ehdr-const.patch
* kexec_file-search-symbols-in-read-only-kexec_purgatory.patch
* kexec_file-use-read-only-sections-in-arch_kexec_apply_relocations.patch
* kexec_file-split-up-__kexec_load_puragory.patch
* kexec_file-remove-unneeded-for-loop-in-kexec_purgatory_setup_sechdrs.patch
* kexec_file-remove-unneeded-variables-in-kexec_purgatory_setup_sechdrs.patch
* kexec_file-remove-mis-use-of-sh_offset-field-during-purgatory-load.patch
* kexec_file-allow-archs-to-set-purgatory-load-address.patch
* kexec_file-move-purgatories-sha256-to-common-code.patch
* mm-pagemap-fix-swap-offset-value-for-pmd-migration-entry.patch
* mm-pagemap-fix-swap-offset-value-for-pmd-migration-entry-fix.patch
* writeback-safer-lock-nesting.patch
* writeback-safer-lock-nesting-fix.patch
* writeback-safer-lock-nesting-v4.patch
* mm-enable-thp-migration-for-shmem-thp.patch
* memcg-remove-memcg_cgroup-id-from-idr-on-mem_cgroup_css_alloc-failure.patch
* rapidio-fix-rio_dma_transfer-error-handling.patch
* kasan-add-no_sanitize-attribute-for-clang-builds.patch
* maintainers-add-personal-addresses-for-sascha-and-me.patch
* fs-dcachec-re-add-cond_resched-in-shrink_dcache_parent.patch
* autofs-mount-point-create-should-honour-passed-in-mode.patch
* ocfs2-submit-another-bio-if-current-bio-is-full.patch
* proc-revalidate-kernel-thread-inodes-to-root-root.patch
* proc-fix-proc-loadavg-regression.patch
* arm-arch-arm-include-asm-pageh-needs-personalityh.patch
* 

RE: [Resend Patch 3/3] Storvsc: Select channel based on available percentage of ring buffer to write

2018-04-13 Thread Long Li
> Subject: RE: [Resend Patch 3/3] Storvsc: Select channel based on available
> percentage of ring buffer to write
> 
> > -Original Message-
> > From: linux-kernel-ow...@vger.kernel.org
> >  On Behalf Of Long Li
> > Sent: Tuesday, March 27, 2018 5:49 PM
> > To: KY Srinivasan ; Haiyang Zhang
> > ; Stephen Hemminger
> ;
> > James E . J . Bottomley ; Martin K . Petersen
> > ; de...@linuxdriverproject.org; linux-
> > s...@vger.kernel.org; linux-kernel@vger.kernel.org;
> > net...@vger.kernel.org
> > Cc: Long Li 
> > Subject: [Resend Patch 3/3] Storvsc: Select channel based on available
> > percentage of ring buffer to write
> >
> > From: Long Li 
> >
> > This is a best effort for estimating on how busy the ring buffer is
> > for that channel, based on available buffer to write in percentage. It
> > is still possible that at the time of actual ring buffer write, the
> > space may not be available due to other processes may be writing at the
> time.
> >
> > Selecting a channel based on how full it is can reduce the possibility
> > that a ring buffer write will fail, and avoid the situation a channel
> > is over busy.
> >
> > Now it's possible that storvsc can use a smaller ring buffer size
> > (e.g. 40k bytes) to take advantage of cache locality.
> >
> > Signed-off-by: Long Li 
> > ---
> >  drivers/scsi/storvsc_drv.c | 62
> > +-
> >  1 file changed, 50 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> > index a2ec0bc9e9fa..b1a87072b3ab 100644
> > --- a/drivers/scsi/storvsc_drv.c
> > +++ b/drivers/scsi/storvsc_drv.c
> > @@ -395,6 +395,12 @@ MODULE_PARM_DESC(storvsc_ringbuffer_size,
> "Ring
> > buffer size (bytes)");
> >
> >  module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
> > MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs
> to
> > subchannels");
> > +
> > +static int ring_avail_percent_lowater = 10;
> 
> Reserving 10% of each ring buffer by default seems like more than is needed
> in the storvsc driver.  That would be about 4Kbytes for the 40K ring buffer
> you suggest, and even more for a ring buffer of 128K.  Each outgoing record is
> only about 344 bytes (I'd have to check exactly).  With the new channel
> selection algorithm below, the only time we use a channel that is already
> below the low water mark is when no channel could be found that is above
> the low water mark.   There could be a case of two or more threads deciding
> that a channel is above the low water mark at the same time and both
> choosing it, but that's likely to be rare.  So it seems like we could set the

It's not rare for two processes checking on the same channel at the same time, 
when running multiple processes I/O workload. The CPU to channel is not 1:1 
mapping.

> default low water mark to 5 percent or even 3 percent, which will let more of
> the ring buffer be used, and let a channel be assigned according to the
> algorithm, rather than falling through to the default because all channels
> appear to be "full".

It seems it's not about how big ring buffer is, e.g. even you have a ring 
buffer of infinite size, it won't help with performance if it's getting queued 
all the time, while other ring buffers are near empty. It's more about how 
multiple ring buffers are getting utilized in a reasonable and balanced way. 
Testing shows 10 is a good choice, while 3 is prone to return BUSY and trigger 
block layer retry.

> 
> > +module_param(ring_avail_percent_lowater, int, S_IRUGO);
> > +MODULE_PARM_DESC(ring_avail_percent_lowater,
> > +   "Select a channel if available ring size > this in percent");
> > +
> >  /*
> >   * Timeout in seconds for all devices managed by this driver.
> >   */
> > @@ -1285,9 +1291,9 @@ static int storvsc_do_io(struct hv_device
> > *device,  {
> > struct storvsc_device *stor_device;
> > struct vstor_packet *vstor_packet;
> > -   struct vmbus_channel *outgoing_channel;
> > +   struct vmbus_channel *outgoing_channel, *channel;
> > int ret = 0;
> > -   struct cpumask alloced_mask;
> > +   struct cpumask alloced_mask, other_numa_mask;
> > int tgt_cpu;
> >
> > vstor_packet = >vstor_packet; @@ -1301,22 +1307,53 @@
> > static int storvsc_do_io(struct hv_device *device,
> > /*
> >  * Select an an appropriate channel to send the request out.
> >  */
> > -
> > if (stor_device->stor_chns[q_num] != NULL) {
> > outgoing_channel = stor_device->stor_chns[q_num];
> > -   if (outgoing_channel->target_cpu == smp_processor_id()) {
> > +   if (outgoing_channel->target_cpu == q_num) {
> > /*
> >  * Ideally, we want to pick a different channel if
> >  

RE: [Resend Patch 3/3] Storvsc: Select channel based on available percentage of ring buffer to write

2018-04-13 Thread Long Li
> Subject: RE: [Resend Patch 3/3] Storvsc: Select channel based on available
> percentage of ring buffer to write
> 
> > -Original Message-
> > From: linux-kernel-ow...@vger.kernel.org
> >  On Behalf Of Long Li
> > Sent: Tuesday, March 27, 2018 5:49 PM
> > To: KY Srinivasan ; Haiyang Zhang
> > ; Stephen Hemminger
> ;
> > James E . J . Bottomley ; Martin K . Petersen
> > ; de...@linuxdriverproject.org; linux-
> > s...@vger.kernel.org; linux-kernel@vger.kernel.org;
> > net...@vger.kernel.org
> > Cc: Long Li 
> > Subject: [Resend Patch 3/3] Storvsc: Select channel based on available
> > percentage of ring buffer to write
> >
> > From: Long Li 
> >
> > This is a best effort for estimating on how busy the ring buffer is
> > for that channel, based on available buffer to write in percentage. It
> > is still possible that at the time of actual ring buffer write, the
> > space may not be available due to other processes may be writing at the
> time.
> >
> > Selecting a channel based on how full it is can reduce the possibility
> > that a ring buffer write will fail, and avoid the situation a channel
> > is over busy.
> >
> > Now it's possible that storvsc can use a smaller ring buffer size
> > (e.g. 40k bytes) to take advantage of cache locality.
> >
> > Signed-off-by: Long Li 
> > ---
> >  drivers/scsi/storvsc_drv.c | 62
> > +-
> >  1 file changed, 50 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> > index a2ec0bc9e9fa..b1a87072b3ab 100644
> > --- a/drivers/scsi/storvsc_drv.c
> > +++ b/drivers/scsi/storvsc_drv.c
> > @@ -395,6 +395,12 @@ MODULE_PARM_DESC(storvsc_ringbuffer_size,
> "Ring
> > buffer size (bytes)");
> >
> >  module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
> > MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs
> to
> > subchannels");
> > +
> > +static int ring_avail_percent_lowater = 10;
> 
> Reserving 10% of each ring buffer by default seems like more than is needed
> in the storvsc driver.  That would be about 4Kbytes for the 40K ring buffer
> you suggest, and even more for a ring buffer of 128K.  Each outgoing record is
> only about 344 bytes (I'd have to check exactly).  With the new channel
> selection algorithm below, the only time we use a channel that is already
> below the low water mark is when no channel could be found that is above
> the low water mark.   There could be a case of two or more threads deciding
> that a channel is above the low water mark at the same time and both
> choosing it, but that's likely to be rare.  So it seems like we could set the

It's not rare for two processes checking on the same channel at the same time, 
when running multiple processes I/O workload. The CPU to channel is not 1:1 
mapping.

> default low water mark to 5 percent or even 3 percent, which will let more of
> the ring buffer be used, and let a channel be assigned according to the
> algorithm, rather than falling through to the default because all channels
> appear to be "full".

It seems it's not about how big ring buffer is, e.g. even you have a ring 
buffer of infinite size, it won't help with performance if it's getting queued 
all the time, while other ring buffers are near empty. It's more about how 
multiple ring buffers are getting utilized in a reasonable and balanced way. 
Testing shows 10 is a good choice, while 3 is prone to return BUSY and trigger 
block layer retry.

> 
> > +module_param(ring_avail_percent_lowater, int, S_IRUGO);
> > +MODULE_PARM_DESC(ring_avail_percent_lowater,
> > +   "Select a channel if available ring size > this in percent");
> > +
> >  /*
> >   * Timeout in seconds for all devices managed by this driver.
> >   */
> > @@ -1285,9 +1291,9 @@ static int storvsc_do_io(struct hv_device
> > *device,  {
> > struct storvsc_device *stor_device;
> > struct vstor_packet *vstor_packet;
> > -   struct vmbus_channel *outgoing_channel;
> > +   struct vmbus_channel *outgoing_channel, *channel;
> > int ret = 0;
> > -   struct cpumask alloced_mask;
> > +   struct cpumask alloced_mask, other_numa_mask;
> > int tgt_cpu;
> >
> > vstor_packet = >vstor_packet; @@ -1301,22 +1307,53 @@
> > static int storvsc_do_io(struct hv_device *device,
> > /*
> >  * Select an an appropriate channel to send the request out.
> >  */
> > -
> > if (stor_device->stor_chns[q_num] != NULL) {
> > outgoing_channel = stor_device->stor_chns[q_num];
> > -   if (outgoing_channel->target_cpu == smp_processor_id()) {
> > +   if (outgoing_channel->target_cpu == q_num) {
> > /*
> >  * Ideally, we want to pick a different channel if
> >  * available on the same NUMA node.
> >  */
> > cpumask_and(_mask, _device-
> >alloced_cpus,
> >
> cpumask_of_node(cpu_to_node(q_num)));
> > -   

Re: [PATCH v3 2/2] iommu/amd: Add basic debugfs infrastructure for AMD IOMMU

2018-04-13 Thread Mehta, Sohil
On Fri, 2018-04-06 at 08:17 -0500, Gary R Hook wrote:
> 
> +
> +void amd_iommu_debugfs_setup(struct amd_iommu *iommu)
> +{
> + char name[MAX_NAME_LEN + 1];
> + struct dentry *d_top;
> +
> + if (!debugfs_initialized())

Probably not needed.

> + return;
> +
> + mutex_lock(_iommu_debugfs_lock);
> + if (!amd_iommu_debugfs) {
> + d_top = iommu_debugfs_setup();
> + if (d_top)
> + amd_iommu_debugfs =
> debugfs_create_dir("amd", d_top);
> + }
> + mutex_unlock(_iommu_debugfs_lock);


You can do the above only once if you iterate over the IOMMUs here
 instead of doing it in amd_iommu_init.

> + if (amd_iommu_debugfs) {
> + snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu-
> >index);
> + iommu->debugfs = debugfs_create_dir(name,
> + amd_iommu_debugf
> s);
> + if (!iommu->debugfs) {
> + debugfs_remove_recursive(amd_iommu_debugfs);
> + amd_iommu_debugfs = NULL;
> + }
> + }
> +}

-Sohil

Re: [PATCH v3 2/2] iommu/amd: Add basic debugfs infrastructure for AMD IOMMU

2018-04-13 Thread Mehta, Sohil
On Fri, 2018-04-06 at 08:17 -0500, Gary R Hook wrote:
> 
> +
> +void amd_iommu_debugfs_setup(struct amd_iommu *iommu)
> +{
> + char name[MAX_NAME_LEN + 1];
> + struct dentry *d_top;
> +
> + if (!debugfs_initialized())

Probably not needed.

> + return;
> +
> + mutex_lock(_iommu_debugfs_lock);
> + if (!amd_iommu_debugfs) {
> + d_top = iommu_debugfs_setup();
> + if (d_top)
> + amd_iommu_debugfs =
> debugfs_create_dir("amd", d_top);
> + }
> + mutex_unlock(_iommu_debugfs_lock);


You can do the above only once if you iterate over the IOMMUs here
 instead of doing it in amd_iommu_init.

> + if (amd_iommu_debugfs) {
> + snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu-
> >index);
> + iommu->debugfs = debugfs_create_dir(name,
> + amd_iommu_debugf
> s);
> + if (!iommu->debugfs) {
> + debugfs_remove_recursive(amd_iommu_debugfs);
> + amd_iommu_debugfs = NULL;
> + }
> + }
> +}

-Sohil

Re: [RFC PATCH v2 3/6] sched: Add over-utilization/tipping point indicator

2018-04-13 Thread Joel Fernandes
Hi,

On Fri, Apr 6, 2018 at 8:36 AM, Dietmar Eggemann
 wrote:
> From: Thara Gopinath 
>
> Energy-aware scheduling should only operate when the system is not
> overutilized. There must be cpu time available to place tasks based on
> utilization in an energy-aware fashion, i.e. to pack tasks on
> energy-efficient cpus without harming the overall throughput.
>
> In case the system operates above this tipping point the tasks have to
> be placed based on task and cpu load in the classical way of spreading
> tasks across as many cpus as possible.
>
> The point in which a system switches from being not overutilized to
> being overutilized is called the tipping point.
>
> Such a tipping point indicator on a sched domain as the system
> boundary is introduced here. As soon as one cpu of a sched domain is
> overutilized the whole sched domain is declared overutilized as well.
> A cpu becomes overutilized when its utilization is higher that 80%
> (capacity_margin) of its capacity.
>
> The implementation takes advantage of the shared sched domain which is
> shared across all per-cpu views of a sched domain level. The new
> overutilized flag is placed in this shared sched domain.
>
> Load balancing is skipped in case the energy model is present and the
> sched domain is not overutilized because under this condition the
> predominantly load-per-capacity driven load-balancer should not
> interfere with the energy-aware wakeup placement based on utilization.
>
> In case the total utilization of a sched domain is greater than the
> total sched domain capacity the overutilized flag is set at the parent
> sched domain level to let other sched groups help getting rid of the
> overutilization of cpus.
>
> Signed-off-by: Thara Gopinath 
> Signed-off-by: Dietmar Eggemann 
> ---
>  include/linux/sched/topology.h |  1 +
>  kernel/sched/fair.c| 62 
> --
>  kernel/sched/sched.h   |  1 +
>  kernel/sched/topology.c| 12 +++-
>  4 files changed, 65 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
> index 26347741ba50..dd001c232646 100644
> --- a/include/linux/sched/topology.h
> +++ b/include/linux/sched/topology.h
> @@ -72,6 +72,7 @@ struct sched_domain_shared {
> atomic_tref;
> atomic_tnr_busy_cpus;
> int has_idle_cores;
> +   int overutilized;
>  };
>
>  struct sched_domain {
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 0a76ad2ef022..6960e5ef3c14 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5345,6 +5345,28 @@ static inline void hrtick_update(struct rq *rq)
>  }
>  #endif
>
> +#ifdef CONFIG_SMP
> +static inline int cpu_overutilized(int cpu);
> +
> +static inline int sd_overutilized(struct sched_domain *sd)
> +{
> +   return READ_ONCE(sd->shared->overutilized);
> +}
> +
> +static inline void update_overutilized_status(struct rq *rq)
> +{
> +   struct sched_domain *sd;
> +
> +   rcu_read_lock();
> +   sd = rcu_dereference(rq->sd);
> +   if (sd && !sd_overutilized(sd) && cpu_overutilized(rq->cpu))
> +   WRITE_ONCE(sd->shared->overutilized, 1);
> +   rcu_read_unlock();
> +}
> +#else
> +static inline void update_overutilized_status(struct rq *rq) {}
> +#endif /* CONFIG_SMP */
> +
>  /*
>   * The enqueue_task method is called before nr_running is
>   * increased. Here we update the fair scheduling stats and
> @@ -5394,8 +5416,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct 
> *p, int flags)
> update_cfs_group(se);
> }
>
> -   if (!se)
> +   if (!se) {
> add_nr_running(rq, 1);
> +   update_overutilized_status(rq);
> +   }

I'm wondering if it makes sense for considering scenarios whether
other classes cause CPUs in the domain to go above the tipping point.
Then in that case also, it makes sense to not to do EAS in that domain
because of the overutilization.

I guess task_fits using cpu_util which is PELT only at the moment...
so may require some other method like aggregation of CFS PELT, with
RT-PELT and DL running bw or something.

thanks,

- Joel


Re: [RFC PATCH v2 3/6] sched: Add over-utilization/tipping point indicator

2018-04-13 Thread Joel Fernandes
Hi,

On Fri, Apr 6, 2018 at 8:36 AM, Dietmar Eggemann
 wrote:
> From: Thara Gopinath 
>
> Energy-aware scheduling should only operate when the system is not
> overutilized. There must be cpu time available to place tasks based on
> utilization in an energy-aware fashion, i.e. to pack tasks on
> energy-efficient cpus without harming the overall throughput.
>
> In case the system operates above this tipping point the tasks have to
> be placed based on task and cpu load in the classical way of spreading
> tasks across as many cpus as possible.
>
> The point in which a system switches from being not overutilized to
> being overutilized is called the tipping point.
>
> Such a tipping point indicator on a sched domain as the system
> boundary is introduced here. As soon as one cpu of a sched domain is
> overutilized the whole sched domain is declared overutilized as well.
> A cpu becomes overutilized when its utilization is higher that 80%
> (capacity_margin) of its capacity.
>
> The implementation takes advantage of the shared sched domain which is
> shared across all per-cpu views of a sched domain level. The new
> overutilized flag is placed in this shared sched domain.
>
> Load balancing is skipped in case the energy model is present and the
> sched domain is not overutilized because under this condition the
> predominantly load-per-capacity driven load-balancer should not
> interfere with the energy-aware wakeup placement based on utilization.
>
> In case the total utilization of a sched domain is greater than the
> total sched domain capacity the overutilized flag is set at the parent
> sched domain level to let other sched groups help getting rid of the
> overutilization of cpus.
>
> Signed-off-by: Thara Gopinath 
> Signed-off-by: Dietmar Eggemann 
> ---
>  include/linux/sched/topology.h |  1 +
>  kernel/sched/fair.c| 62 
> --
>  kernel/sched/sched.h   |  1 +
>  kernel/sched/topology.c| 12 +++-
>  4 files changed, 65 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
> index 26347741ba50..dd001c232646 100644
> --- a/include/linux/sched/topology.h
> +++ b/include/linux/sched/topology.h
> @@ -72,6 +72,7 @@ struct sched_domain_shared {
> atomic_tref;
> atomic_tnr_busy_cpus;
> int has_idle_cores;
> +   int overutilized;
>  };
>
>  struct sched_domain {
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 0a76ad2ef022..6960e5ef3c14 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5345,6 +5345,28 @@ static inline void hrtick_update(struct rq *rq)
>  }
>  #endif
>
> +#ifdef CONFIG_SMP
> +static inline int cpu_overutilized(int cpu);
> +
> +static inline int sd_overutilized(struct sched_domain *sd)
> +{
> +   return READ_ONCE(sd->shared->overutilized);
> +}
> +
> +static inline void update_overutilized_status(struct rq *rq)
> +{
> +   struct sched_domain *sd;
> +
> +   rcu_read_lock();
> +   sd = rcu_dereference(rq->sd);
> +   if (sd && !sd_overutilized(sd) && cpu_overutilized(rq->cpu))
> +   WRITE_ONCE(sd->shared->overutilized, 1);
> +   rcu_read_unlock();
> +}
> +#else
> +static inline void update_overutilized_status(struct rq *rq) {}
> +#endif /* CONFIG_SMP */
> +
>  /*
>   * The enqueue_task method is called before nr_running is
>   * increased. Here we update the fair scheduling stats and
> @@ -5394,8 +5416,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct 
> *p, int flags)
> update_cfs_group(se);
> }
>
> -   if (!se)
> +   if (!se) {
> add_nr_running(rq, 1);
> +   update_overutilized_status(rq);
> +   }

I'm wondering if it makes sense for considering scenarios whether
other classes cause CPUs in the domain to go above the tipping point.
Then in that case also, it makes sense to not to do EAS in that domain
because of the overutilization.

I guess task_fits using cpu_util which is PELT only at the moment...
so may require some other method like aggregation of CFS PELT, with
RT-PELT and DL running bw or something.

thanks,

- Joel


Re: [PATCH v3 1/2] iommu - Enable debugfs exposure of the IOMMU

2018-04-13 Thread Mehta, Sohil
On Fri, 2018-04-06 at 08:17 -0500, Gary R Hook wrote:
> 
> 
> +struct dentry *iommu_debugfs_setup(void)
> +{
> + if (!debugfs_initialized())

This check is probably not needed.

> + return NULL;
> +
> + if (!iommu_debugfs_dir)
> + iommu_debugfs_dir = debugfs_create_dir("iommu",
> NULL);
> +
> + if (iommu_debugfs_dir)
> + pr_warn("WARNING: IOMMU DEBUGFS SUPPORT HAS BEEN
> ENABLED IN THIS KERNEL\n");
> +

As this gets called for each IOMMU, do you want to use pr_warn_once?

> + return iommu_debugfs_dir;
> +}
> +EXPORT_SYMBOL_GPL(iommu_debugfs_setup);

-Sohil

Re: [PATCH v3 1/2] iommu - Enable debugfs exposure of the IOMMU

2018-04-13 Thread Mehta, Sohil
On Fri, 2018-04-06 at 08:17 -0500, Gary R Hook wrote:
> 
> 
> +struct dentry *iommu_debugfs_setup(void)
> +{
> + if (!debugfs_initialized())

This check is probably not needed.

> + return NULL;
> +
> + if (!iommu_debugfs_dir)
> + iommu_debugfs_dir = debugfs_create_dir("iommu",
> NULL);
> +
> + if (iommu_debugfs_dir)
> + pr_warn("WARNING: IOMMU DEBUGFS SUPPORT HAS BEEN
> ENABLED IN THIS KERNEL\n");
> +

As this gets called for each IOMMU, do you want to use pr_warn_once?

> + return iommu_debugfs_dir;
> +}
> +EXPORT_SYMBOL_GPL(iommu_debugfs_setup);

-Sohil

Re: [PATCH] cifs: smb2ops: Fix NULL check in smb2_query_symlink

2018-04-13 Thread Pavel Shilovsky
2018-04-13 8:13 GMT-07:00 Gustavo A. R. Silva :
> The current code null checks variable err_buf, which is always null
> when it is checked, hence utf16_path is free'd and the function
> returns -ENOENT everytime it is called, making it impossible for the
> execution path to reach the following code:
>
> err_buf = err_iov.iov_base;
>
> Fix this by null checking err_iov.iov_base instead of err_buf. Also,
> notice that err_buf no longer needs to be initialized to NULL.
>
> Addresses-Coverity-ID: 1467876 ("Logically dead code")
> Fixes: 2d636199e400 ("cifs: Change SMB2_open to return an iov for the error 
> parameter")
> Signed-off-by: Gustavo A. R. Silva 
> ---
>  fs/cifs/smb2ops.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
> index b4ae932..38ebf3f 100644
> --- a/fs/cifs/smb2ops.c
> +++ b/fs/cifs/smb2ops.c
> @@ -1452,7 +1452,7 @@ smb2_query_symlink(const unsigned int xid, struct 
> cifs_tcon *tcon,
> struct cifs_open_parms oparms;
> struct cifs_fid fid;
> struct kvec err_iov = {NULL, 0};
> -   struct smb2_err_rsp *err_buf = NULL;
> +   struct smb2_err_rsp *err_buf;
> struct smb2_symlink_err_rsp *symlink;
> unsigned int sub_len;
> unsigned int sub_offset;
> @@ -1476,7 +1476,7 @@ smb2_query_symlink(const unsigned int xid, struct 
> cifs_tcon *tcon,
>
> rc = SMB2_open(xid, , utf16_path, , NULL, _iov);
>
> -   if (!rc || !err_buf) {
> +   if (!rc || !err_iov.iov_base) {
> kfree(utf16_path);
> return -ENOENT;
> }
> --
> 2.7.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Thanks!

Reviewed-by: Pavel Shilovsky 

--
Best regards,
Pavel Shilovsky


Re: [PATCH] cifs: smb2ops: Fix NULL check in smb2_query_symlink

2018-04-13 Thread Pavel Shilovsky
2018-04-13 8:13 GMT-07:00 Gustavo A. R. Silva :
> The current code null checks variable err_buf, which is always null
> when it is checked, hence utf16_path is free'd and the function
> returns -ENOENT everytime it is called, making it impossible for the
> execution path to reach the following code:
>
> err_buf = err_iov.iov_base;
>
> Fix this by null checking err_iov.iov_base instead of err_buf. Also,
> notice that err_buf no longer needs to be initialized to NULL.
>
> Addresses-Coverity-ID: 1467876 ("Logically dead code")
> Fixes: 2d636199e400 ("cifs: Change SMB2_open to return an iov for the error 
> parameter")
> Signed-off-by: Gustavo A. R. Silva 
> ---
>  fs/cifs/smb2ops.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
> index b4ae932..38ebf3f 100644
> --- a/fs/cifs/smb2ops.c
> +++ b/fs/cifs/smb2ops.c
> @@ -1452,7 +1452,7 @@ smb2_query_symlink(const unsigned int xid, struct 
> cifs_tcon *tcon,
> struct cifs_open_parms oparms;
> struct cifs_fid fid;
> struct kvec err_iov = {NULL, 0};
> -   struct smb2_err_rsp *err_buf = NULL;
> +   struct smb2_err_rsp *err_buf;
> struct smb2_symlink_err_rsp *symlink;
> unsigned int sub_len;
> unsigned int sub_offset;
> @@ -1476,7 +1476,7 @@ smb2_query_symlink(const unsigned int xid, struct 
> cifs_tcon *tcon,
>
> rc = SMB2_open(xid, , utf16_path, , NULL, _iov);
>
> -   if (!rc || !err_buf) {
> +   if (!rc || !err_iov.iov_base) {
> kfree(utf16_path);
> return -ENOENT;
> }
> --
> 2.7.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Thanks!

Reviewed-by: Pavel Shilovsky 

--
Best regards,
Pavel Shilovsky


Re: [PATCH v4 2/2] MIPS: io: add a barrier after register read in readX()

2018-04-13 Thread James Hogan
On Thu, Apr 12, 2018 at 10:33:42PM -0400, Sinan Kaya wrote:
> On 4/12/2018 10:30 PM, Sinan Kaya wrote:
> > +   /* prevent prefetching of coherent DMA dma prematurely */   \
> 
> I tried to write DMA data but my keyboard is not cooperating. I'll hold onto
> posting another version until I hear back from you for wmb().

No problem, I've applied both for 4.17 and tweaked the comment.

Thanks
James


signature.asc
Description: Digital signature


Re: [PATCH v4 2/2] MIPS: io: add a barrier after register read in readX()

2018-04-13 Thread James Hogan
On Thu, Apr 12, 2018 at 10:33:42PM -0400, Sinan Kaya wrote:
> On 4/12/2018 10:30 PM, Sinan Kaya wrote:
> > +   /* prevent prefetching of coherent DMA dma prematurely */   \
> 
> I tried to write DMA data but my keyboard is not cooperating. I'll hold onto
> posting another version until I hear back from you for wmb().

No problem, I've applied both for 4.17 and tweaked the comment.

Thanks
James


signature.asc
Description: Digital signature


Re: [PATCH] virtio_balloon: add array of stat names

2018-04-13 Thread Jonathan Helman



On 04/13/2018 03:07 PM, Michael S. Tsirkin wrote:

On Fri, Apr 13, 2018 at 11:53:31AM -0700, Jonathan Helman wrote:



On 04/13/2018 06:44 AM, Michael S. Tsirkin wrote:

Jason Wang points out that it's vary hard for users to build an array of


s/vary/very


stat names. The naive thing is to use VIRTIO_BALLOON_S_NR but that
breaks if we add more stats.

Let's add an array of reasonably readable names.


Thanks for doing this, this is goodness. Clients of this interface will now
only require a modification to their internal copy of the header file (if
they have one), rather than updating their string array as well.



Fixes: 6c64fe7f2 ("virtio_balloon: export hugetlb page allocation counts")


I think this is a tad bit confusing since the only way you'd know why this
patch "fixes" 6c64fe7f2 is by reading the LKML archives and finding Jason's
comment.


So

... add more stats as recently by commit 6c64fe7f2 ("virtio_balloon: export hugetlb 
page allocation counts")




Sure. Thanks.

Jon





Cc: Jason Wang 
Cc: Jonathan Helman , > Signed-off-by: Michael S. Tsirkin 



Reviewed-by: Jonathan Helman 


---
   include/uapi/linux/virtio_balloon.h | 15 +++
   1 file changed, 15 insertions(+)

diff --git a/include/uapi/linux/virtio_balloon.h 
b/include/uapi/linux/virtio_balloon.h
index 9e02137..1477c17 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -64,6 +64,21 @@ struct virtio_balloon_config {
   #define VIRTIO_BALLOON_S_HTLB_PGFAIL   9  /* Hugetlb page allocation 
failures */
   #define VIRTIO_BALLOON_S_NR   10
+#define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
+   VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
+   VIRTIO_BALLOON_S_NAMES_prefix "swap-out", \
+   VIRTIO_BALLOON_S_NAMES_prefix "major-faults", \
+   VIRTIO_BALLOON_S_NAMES_prefix "minor-faults", \
+   VIRTIO_BALLOON_S_NAMES_prefix "free-memory", \
+   VIRTIO_BALLOON_S_NAMES_prefix "total-memory", \
+   VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \
+   VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \
+   VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
+   VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \ > +}
+
+#define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
+
   /*
* Memory statistics structure.
* Driver fills an array of these structures and passes to device.



Re: [PATCH] virtio_balloon: add array of stat names

2018-04-13 Thread Jonathan Helman



On 04/13/2018 03:07 PM, Michael S. Tsirkin wrote:

On Fri, Apr 13, 2018 at 11:53:31AM -0700, Jonathan Helman wrote:



On 04/13/2018 06:44 AM, Michael S. Tsirkin wrote:

Jason Wang points out that it's vary hard for users to build an array of


s/vary/very


stat names. The naive thing is to use VIRTIO_BALLOON_S_NR but that
breaks if we add more stats.

Let's add an array of reasonably readable names.


Thanks for doing this, this is goodness. Clients of this interface will now
only require a modification to their internal copy of the header file (if
they have one), rather than updating their string array as well.



Fixes: 6c64fe7f2 ("virtio_balloon: export hugetlb page allocation counts")


I think this is a tad bit confusing since the only way you'd know why this
patch "fixes" 6c64fe7f2 is by reading the LKML archives and finding Jason's
comment.


So

... add more stats as recently by commit 6c64fe7f2 ("virtio_balloon: export hugetlb 
page allocation counts")




Sure. Thanks.

Jon





Cc: Jason Wang 
Cc: Jonathan Helman , > Signed-off-by: Michael S. Tsirkin 



Reviewed-by: Jonathan Helman 


---
   include/uapi/linux/virtio_balloon.h | 15 +++
   1 file changed, 15 insertions(+)

diff --git a/include/uapi/linux/virtio_balloon.h 
b/include/uapi/linux/virtio_balloon.h
index 9e02137..1477c17 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -64,6 +64,21 @@ struct virtio_balloon_config {
   #define VIRTIO_BALLOON_S_HTLB_PGFAIL   9  /* Hugetlb page allocation 
failures */
   #define VIRTIO_BALLOON_S_NR   10
+#define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
+   VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
+   VIRTIO_BALLOON_S_NAMES_prefix "swap-out", \
+   VIRTIO_BALLOON_S_NAMES_prefix "major-faults", \
+   VIRTIO_BALLOON_S_NAMES_prefix "minor-faults", \
+   VIRTIO_BALLOON_S_NAMES_prefix "free-memory", \
+   VIRTIO_BALLOON_S_NAMES_prefix "total-memory", \
+   VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \
+   VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \
+   VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
+   VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \ > +}
+
+#define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
+
   /*
* Memory statistics structure.
* Driver fills an array of these structures and passes to device.



RE: [Resend Patch 2/3] Netvsc: Use the vmbus functiton to calculate ring buffer percentage

2018-04-13 Thread Michael Kelley (EOSG)
> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org  
> On Behalf
> Of Long Li
> Sent: Tuesday, March 27, 2018 5:49 PM
> To: KY Srinivasan ; Haiyang Zhang 
> ; Stephen
> Hemminger ; James E . J . Bottomley 
> ;
> Martin K . Petersen ; 
> de...@linuxdriverproject.org; linux-
> s...@vger.kernel.org; linux-kernel@vger.kernel.org; net...@vger.kernel.org
> Cc: Long Li 
> Subject: [Resend Patch 2/3] Netvsc: Use the vmbus functiton to calculate ring 
> buffer
> percentage
> 
> From: Long Li 
> 
> In Vmbus, we have defined a function to calculate available ring buffer
> percentage to write.
> 
> Use that function and remove netvsc's private version.
> 
> Signed-off-by: Long Li 

Reviewed-by:  Michael Kelley 

> ---
>  drivers/net/hyperv/hyperv_net.h |  1 -
>  drivers/net/hyperv/netvsc.c | 17 +++--
>  drivers/net/hyperv/netvsc_drv.c |  3 ---
>  3 files changed, 3 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
> index cd538d5a7986..a0199ab13d67 100644
> --- a/drivers/net/hyperv/hyperv_net.h
> +++ b/drivers/net/hyperv/hyperv_net.h
> @@ -189,7 +189,6 @@ struct netvsc_device;
>  struct net_device_context;
> 
>  extern u32 netvsc_ring_bytes;
> -extern struct reciprocal_value netvsc_ring_reciprocal;
> 
>  struct netvsc_device *netvsc_device_add(struct hv_device *device,
>   const struct netvsc_device_info *info);
> diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
> index 0265d703eb03..8af0069e4d8c 100644
> --- a/drivers/net/hyperv/netvsc.c
> +++ b/drivers/net/hyperv/netvsc.c
> @@ -31,7 +31,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
> 
>  #include 
> 
> @@ -590,17 +589,6 @@ void netvsc_device_remove(struct hv_device *device)
>  #define RING_AVAIL_PERCENT_HIWATER 20
>  #define RING_AVAIL_PERCENT_LOWATER 10
> 
> -/*
> - * Get the percentage of available bytes to write in the ring.
> - * The return value is in range from 0 to 100.
> - */
> -static u32 hv_ringbuf_avail_percent(const struct hv_ring_buffer_info 
> *ring_info)
> -{
> - u32 avail_write = hv_get_bytes_to_write(ring_info);
> -
> - return reciprocal_divide(avail_write  * 100, netvsc_ring_reciprocal);
> -}
> -
>  static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
>u32 index)
>  {
> @@ -649,7 +637,8 @@ static void netvsc_send_tx_complete(struct netvsc_device
> *net_device,
>   wake_up(_device->wait_drain);
> 
>   if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
> - (hv_ringbuf_avail_percent(>outbound) >
> RING_AVAIL_PERCENT_HIWATER ||
> + (hv_get_avail_to_write_percent(>outbound) >
> +  RING_AVAIL_PERCENT_HIWATER ||
>queue_sends < 1)) {
>   netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
>   ndev_ctx->eth_stats.wake_queue++;
> @@ -757,7 +746,7 @@ static inline int netvsc_send_pkt(
>   struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
>   u64 req_id;
>   int ret;
> - u32 ring_avail = hv_ringbuf_avail_percent(_channel->outbound);
> + u32 ring_avail = hv_get_avail_to_write_percent(_channel->outbound);
> 
>   nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
>   if (skb)
> diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
> index faea0be18924..b0b1c2fd2b7b 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -35,7 +35,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
> 
>  #include 
>  #include 
> @@ -55,7 +54,6 @@ static unsigned int ring_size __ro_after_init = 128;
>  module_param(ring_size, uint, S_IRUGO);
>  MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
>  unsigned int netvsc_ring_bytes __ro_after_init;
> -struct reciprocal_value netvsc_ring_reciprocal __ro_after_init;
> 
>  static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
>   NETIF_MSG_LINK | NETIF_MSG_IFUP |
> @@ -2186,7 +2184,6 @@ static int __init netvsc_drv_init(void)
>   ring_size);
>   }
>   netvsc_ring_bytes = ring_size * PAGE_SIZE;
> - netvsc_ring_reciprocal = reciprocal_value(netvsc_ring_bytes);
> 
>   ret = vmbus_driver_register(_drv);
>   if (ret)
> --
> 2.14.1



RE: [Resend Patch 2/3] Netvsc: Use the vmbus functiton to calculate ring buffer percentage

2018-04-13 Thread Michael Kelley (EOSG)
> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org  
> On Behalf
> Of Long Li
> Sent: Tuesday, March 27, 2018 5:49 PM
> To: KY Srinivasan ; Haiyang Zhang 
> ; Stephen
> Hemminger ; James E . J . Bottomley 
> ;
> Martin K . Petersen ; 
> de...@linuxdriverproject.org; linux-
> s...@vger.kernel.org; linux-kernel@vger.kernel.org; net...@vger.kernel.org
> Cc: Long Li 
> Subject: [Resend Patch 2/3] Netvsc: Use the vmbus functiton to calculate ring 
> buffer
> percentage
> 
> From: Long Li 
> 
> In Vmbus, we have defined a function to calculate available ring buffer
> percentage to write.
> 
> Use that function and remove netvsc's private version.
> 
> Signed-off-by: Long Li 

Reviewed-by:  Michael Kelley 

> ---
>  drivers/net/hyperv/hyperv_net.h |  1 -
>  drivers/net/hyperv/netvsc.c | 17 +++--
>  drivers/net/hyperv/netvsc_drv.c |  3 ---
>  3 files changed, 3 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
> index cd538d5a7986..a0199ab13d67 100644
> --- a/drivers/net/hyperv/hyperv_net.h
> +++ b/drivers/net/hyperv/hyperv_net.h
> @@ -189,7 +189,6 @@ struct netvsc_device;
>  struct net_device_context;
> 
>  extern u32 netvsc_ring_bytes;
> -extern struct reciprocal_value netvsc_ring_reciprocal;
> 
>  struct netvsc_device *netvsc_device_add(struct hv_device *device,
>   const struct netvsc_device_info *info);
> diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
> index 0265d703eb03..8af0069e4d8c 100644
> --- a/drivers/net/hyperv/netvsc.c
> +++ b/drivers/net/hyperv/netvsc.c
> @@ -31,7 +31,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
> 
>  #include 
> 
> @@ -590,17 +589,6 @@ void netvsc_device_remove(struct hv_device *device)
>  #define RING_AVAIL_PERCENT_HIWATER 20
>  #define RING_AVAIL_PERCENT_LOWATER 10
> 
> -/*
> - * Get the percentage of available bytes to write in the ring.
> - * The return value is in range from 0 to 100.
> - */
> -static u32 hv_ringbuf_avail_percent(const struct hv_ring_buffer_info 
> *ring_info)
> -{
> - u32 avail_write = hv_get_bytes_to_write(ring_info);
> -
> - return reciprocal_divide(avail_write  * 100, netvsc_ring_reciprocal);
> -}
> -
>  static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
>u32 index)
>  {
> @@ -649,7 +637,8 @@ static void netvsc_send_tx_complete(struct netvsc_device
> *net_device,
>   wake_up(_device->wait_drain);
> 
>   if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
> - (hv_ringbuf_avail_percent(>outbound) >
> RING_AVAIL_PERCENT_HIWATER ||
> + (hv_get_avail_to_write_percent(>outbound) >
> +  RING_AVAIL_PERCENT_HIWATER ||
>queue_sends < 1)) {
>   netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
>   ndev_ctx->eth_stats.wake_queue++;
> @@ -757,7 +746,7 @@ static inline int netvsc_send_pkt(
>   struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
>   u64 req_id;
>   int ret;
> - u32 ring_avail = hv_ringbuf_avail_percent(_channel->outbound);
> + u32 ring_avail = hv_get_avail_to_write_percent(_channel->outbound);
> 
>   nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
>   if (skb)
> diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
> index faea0be18924..b0b1c2fd2b7b 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -35,7 +35,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
> 
>  #include 
>  #include 
> @@ -55,7 +54,6 @@ static unsigned int ring_size __ro_after_init = 128;
>  module_param(ring_size, uint, S_IRUGO);
>  MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
>  unsigned int netvsc_ring_bytes __ro_after_init;
> -struct reciprocal_value netvsc_ring_reciprocal __ro_after_init;
> 
>  static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
>   NETIF_MSG_LINK | NETIF_MSG_IFUP |
> @@ -2186,7 +2184,6 @@ static int __init netvsc_drv_init(void)
>   ring_size);
>   }
>   netvsc_ring_bytes = ring_size * PAGE_SIZE;
> - netvsc_ring_reciprocal = reciprocal_value(netvsc_ring_bytes);
> 
>   ret = vmbus_driver_register(_drv);
>   if (ret)
> --
> 2.14.1



Re: [PATCH] Documentation/i2c: sync docs with current state of i2c-tools.

2018-04-13 Thread Sam Hansen
On Fri, Apr 13, 2018 at 11:49 AM, Jean Delvare  wrote:
> On Fri, 13 Apr 2018 09:02:03 -0700, Sam Hansen wrote:
>> On Fri, Apr 13, 2018 at 5:13 AM, Jean Delvare  wrote:
>> > On Fri, 13 Apr 2018 00:24:57 +0200, Wolfram Sang wrote:
>> >> On Thu, Apr 12, 2018 at 02:33:42PM -0700, Sam Hansen wrote:
>> >> > -  Not meant to be called  directly; instead, use the access functions
>> >> > -  below.
>> >> > +  If possible, use the provided i2c_smbus_* methods described below in 
>> >> > favor
>> >> > +  of issuing direct ioctls.
>> >>
>> >> Why this change?
>> >
>> > I'm also not sure if "in favor of" is right. "instead of" would sound
>> > better to me, but I'm no native English speaker, I could be wrong.
>>
>> Sounds good, I'll adopt "instead of".  Regarding Wolfram's earlier
>> comment, as an engineer, requiring an out-of-tree library to build
>> drivers felt a little off.  I can revert this section if you want,
>> just let me know.
>
> The i2c dev interface, and the overlaying library, are used by
> user-space applications. This has nothing to do with "building
> drivers", and makes your "out-of-tree" objection irrelevant. I doubt
> libi2c is the only user-space library building on top of a kernel
> interface.

Ok, sounds good.  I'll send a revised patch set reverting this block.

(also, did I send the v3 patch series threaded correctly?)

>
> --
> Jean Delvare
> SUSE L3 Support


Re: [PATCH] Documentation/i2c: sync docs with current state of i2c-tools.

2018-04-13 Thread Sam Hansen
On Fri, Apr 13, 2018 at 11:49 AM, Jean Delvare  wrote:
> On Fri, 13 Apr 2018 09:02:03 -0700, Sam Hansen wrote:
>> On Fri, Apr 13, 2018 at 5:13 AM, Jean Delvare  wrote:
>> > On Fri, 13 Apr 2018 00:24:57 +0200, Wolfram Sang wrote:
>> >> On Thu, Apr 12, 2018 at 02:33:42PM -0700, Sam Hansen wrote:
>> >> > -  Not meant to be called  directly; instead, use the access functions
>> >> > -  below.
>> >> > +  If possible, use the provided i2c_smbus_* methods described below in 
>> >> > favor
>> >> > +  of issuing direct ioctls.
>> >>
>> >> Why this change?
>> >
>> > I'm also not sure if "in favor of" is right. "instead of" would sound
>> > better to me, but I'm no native English speaker, I could be wrong.
>>
>> Sounds good, I'll adopt "instead of".  Regarding Wolfram's earlier
>> comment, as an engineer, requiring an out-of-tree library to build
>> drivers felt a little off.  I can revert this section if you want,
>> just let me know.
>
> The i2c dev interface, and the overlaying library, are used by
> user-space applications. This has nothing to do with "building
> drivers", and makes your "out-of-tree" objection irrelevant. I doubt
> libi2c is the only user-space library building on top of a kernel
> interface.

Ok, sounds good.  I'll send a revised patch set reverting this block.

(also, did I send the v3 patch series threaded correctly?)

>
> --
> Jean Delvare
> SUSE L3 Support


Re: [GIT PULL] chrome-platform updates for v4.17

2018-04-13 Thread Benson Leung
On Fri, Apr 13, 2018 at 04:26:41PM -0700, Linus Torvalds wrote:
> Please don't do this to me.
> 
> You're sending me a pull request in the second half of the second week
> of the merge window, and none of this has been in linux-next as far as
> I can tell.
> 
> I don't even check linux-next as of when the merge window opened - I
> check it as of "one week into the merge window".
> 
> Ok, ok, I do in fact find two of the commits in linux-next, but even
> they have been rebased for some reason.
> 
> The others? I can't find them even in some rebased form.
> 
> I'll take this for now, but next time I see this kind of "late in the
> merge window, absolutely zero exposure anywhere else", I'll just skip
> the whole pull request. Don't do it.

Sorry about that, Linus. Won't happen again.

-- 
Benson Leung
Staff Software Engineer
Chrome OS Kernel
Google Inc.
ble...@google.com
Chromium OS Project
ble...@chromium.org


signature.asc
Description: PGP signature


Re: [GIT PULL] chrome-platform updates for v4.17

2018-04-13 Thread Benson Leung
On Fri, Apr 13, 2018 at 04:26:41PM -0700, Linus Torvalds wrote:
> Please don't do this to me.
> 
> You're sending me a pull request in the second half of the second week
> of the merge window, and none of this has been in linux-next as far as
> I can tell.
> 
> I don't even check linux-next as of when the merge window opened - I
> check it as of "one week into the merge window".
> 
> Ok, ok, I do in fact find two of the commits in linux-next, but even
> they have been rebased for some reason.
> 
> The others? I can't find them even in some rebased form.
> 
> I'll take this for now, but next time I see this kind of "late in the
> merge window, absolutely zero exposure anywhere else", I'll just skip
> the whole pull request. Don't do it.

Sorry about that, Linus. Won't happen again.

-- 
Benson Leung
Staff Software Engineer
Chrome OS Kernel
Google Inc.
ble...@google.com
Chromium OS Project
ble...@chromium.org


signature.asc
Description: PGP signature


RE: [Resend Patch 3/3] Storvsc: Select channel based on available percentage of ring buffer to write

2018-04-13 Thread Michael Kelley (EOSG)
> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org  
> On Behalf
> Of Long Li
> Sent: Tuesday, March 27, 2018 5:49 PM
> To: KY Srinivasan ; Haiyang Zhang 
> ; Stephen
> Hemminger ; James E . J . Bottomley 
> ;
> Martin K . Petersen ; 
> de...@linuxdriverproject.org; linux-
> s...@vger.kernel.org; linux-kernel@vger.kernel.org; net...@vger.kernel.org
> Cc: Long Li 
> Subject: [Resend Patch 3/3] Storvsc: Select channel based on available 
> percentage of ring
> buffer to write
> 
> From: Long Li 
> 
> This is a best effort for estimating on how busy the ring buffer is for
> that channel, based on available buffer to write in percentage. It is still
> possible that at the time of actual ring buffer write, the space may not be
> available due to other processes may be writing at the time.
> 
> Selecting a channel based on how full it is can reduce the possibility that
> a ring buffer write will fail, and avoid the situation a channel is over
> busy.
> 
> Now it's possible that storvsc can use a smaller ring buffer size
> (e.g. 40k bytes) to take advantage of cache locality.
> 
> Signed-off-by: Long Li 
> ---
>  drivers/scsi/storvsc_drv.c | 62 
> +-
>  1 file changed, 50 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> index a2ec0bc9e9fa..b1a87072b3ab 100644
> --- a/drivers/scsi/storvsc_drv.c
> +++ b/drivers/scsi/storvsc_drv.c
> @@ -395,6 +395,12 @@ MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer 
> size
> (bytes)");
> 
>  module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
>  MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to 
> subchannels");
> +
> +static int ring_avail_percent_lowater = 10;

Reserving 10% of each ring buffer by default seems like more than is needed
in the storvsc driver.  That would be about 4Kbytes for the 40K ring buffer
you suggest, and even more for a ring buffer of 128K.  Each outgoing record
is only about 344 bytes (I'd have to check exactly).  With the new channel
selection algorithm below, the only time we use a channel that is already
below the low water mark is when no channel could be found that is above
the low water mark.   There could be a case of two or more threads deciding
that a channel is above the low water mark at the same time and both
choosing it, but that's likely to be rare.  So it seems like we could set the
default low water mark to 5 percent or even 3 percent, which will let more
of the ring buffer be used, and let a channel be assigned according to the
algorithm, rather than falling through to the default because all channels
appear to be "full".

> +module_param(ring_avail_percent_lowater, int, S_IRUGO);
> +MODULE_PARM_DESC(ring_avail_percent_lowater,
> + "Select a channel if available ring size > this in percent");
> +
>  /*
>   * Timeout in seconds for all devices managed by this driver.
>   */
> @@ -1285,9 +1291,9 @@ static int storvsc_do_io(struct hv_device *device,
>  {
>   struct storvsc_device *stor_device;
>   struct vstor_packet *vstor_packet;
> - struct vmbus_channel *outgoing_channel;
> + struct vmbus_channel *outgoing_channel, *channel;
>   int ret = 0;
> - struct cpumask alloced_mask;
> + struct cpumask alloced_mask, other_numa_mask;
>   int tgt_cpu;
> 
>   vstor_packet = >vstor_packet;
> @@ -1301,22 +1307,53 @@ static int storvsc_do_io(struct hv_device *device,
>   /*
>* Select an an appropriate channel to send the request out.
>*/
> -
>   if (stor_device->stor_chns[q_num] != NULL) {
>   outgoing_channel = stor_device->stor_chns[q_num];
> - if (outgoing_channel->target_cpu == smp_processor_id()) {
> + if (outgoing_channel->target_cpu == q_num) {
>   /*
>* Ideally, we want to pick a different channel if
>* available on the same NUMA node.
>*/
>   cpumask_and(_mask, _device->alloced_cpus,
>   cpumask_of_node(cpu_to_node(q_num)));
> - for_each_cpu_wrap(tgt_cpu, _mask,
> - outgoing_channel->target_cpu + 1) {
> - if (tgt_cpu != outgoing_channel->target_cpu) {
> - outgoing_channel =
> - stor_device->stor_chns[tgt_cpu];
> - break;
> +
> + for_each_cpu_wrap(tgt_cpu, _mask, q_num + 1) {
> + if (tgt_cpu == q_num)
> + continue;
> + channel = 

RE: [Resend Patch 3/3] Storvsc: Select channel based on available percentage of ring buffer to write

2018-04-13 Thread Michael Kelley (EOSG)
> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org  
> On Behalf
> Of Long Li
> Sent: Tuesday, March 27, 2018 5:49 PM
> To: KY Srinivasan ; Haiyang Zhang 
> ; Stephen
> Hemminger ; James E . J . Bottomley 
> ;
> Martin K . Petersen ; 
> de...@linuxdriverproject.org; linux-
> s...@vger.kernel.org; linux-kernel@vger.kernel.org; net...@vger.kernel.org
> Cc: Long Li 
> Subject: [Resend Patch 3/3] Storvsc: Select channel based on available 
> percentage of ring
> buffer to write
> 
> From: Long Li 
> 
> This is a best effort for estimating on how busy the ring buffer is for
> that channel, based on available buffer to write in percentage. It is still
> possible that at the time of actual ring buffer write, the space may not be
> available due to other processes may be writing at the time.
> 
> Selecting a channel based on how full it is can reduce the possibility that
> a ring buffer write will fail, and avoid the situation a channel is over
> busy.
> 
> Now it's possible that storvsc can use a smaller ring buffer size
> (e.g. 40k bytes) to take advantage of cache locality.
> 
> Signed-off-by: Long Li 
> ---
>  drivers/scsi/storvsc_drv.c | 62 
> +-
>  1 file changed, 50 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> index a2ec0bc9e9fa..b1a87072b3ab 100644
> --- a/drivers/scsi/storvsc_drv.c
> +++ b/drivers/scsi/storvsc_drv.c
> @@ -395,6 +395,12 @@ MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer 
> size
> (bytes)");
> 
>  module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
>  MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to 
> subchannels");
> +
> +static int ring_avail_percent_lowater = 10;

Reserving 10% of each ring buffer by default seems like more than is needed
in the storvsc driver.  That would be about 4Kbytes for the 40K ring buffer
you suggest, and even more for a ring buffer of 128K.  Each outgoing record
is only about 344 bytes (I'd have to check exactly).  With the new channel
selection algorithm below, the only time we use a channel that is already
below the low water mark is when no channel could be found that is above
the low water mark.   There could be a case of two or more threads deciding
that a channel is above the low water mark at the same time and both
choosing it, but that's likely to be rare.  So it seems like we could set the
default low water mark to 5 percent or even 3 percent, which will let more
of the ring buffer be used, and let a channel be assigned according to the
algorithm, rather than falling through to the default because all channels
appear to be "full".

> +module_param(ring_avail_percent_lowater, int, S_IRUGO);
> +MODULE_PARM_DESC(ring_avail_percent_lowater,
> + "Select a channel if available ring size > this in percent");
> +
>  /*
>   * Timeout in seconds for all devices managed by this driver.
>   */
> @@ -1285,9 +1291,9 @@ static int storvsc_do_io(struct hv_device *device,
>  {
>   struct storvsc_device *stor_device;
>   struct vstor_packet *vstor_packet;
> - struct vmbus_channel *outgoing_channel;
> + struct vmbus_channel *outgoing_channel, *channel;
>   int ret = 0;
> - struct cpumask alloced_mask;
> + struct cpumask alloced_mask, other_numa_mask;
>   int tgt_cpu;
> 
>   vstor_packet = >vstor_packet;
> @@ -1301,22 +1307,53 @@ static int storvsc_do_io(struct hv_device *device,
>   /*
>* Select an an appropriate channel to send the request out.
>*/
> -
>   if (stor_device->stor_chns[q_num] != NULL) {
>   outgoing_channel = stor_device->stor_chns[q_num];
> - if (outgoing_channel->target_cpu == smp_processor_id()) {
> + if (outgoing_channel->target_cpu == q_num) {
>   /*
>* Ideally, we want to pick a different channel if
>* available on the same NUMA node.
>*/
>   cpumask_and(_mask, _device->alloced_cpus,
>   cpumask_of_node(cpu_to_node(q_num)));
> - for_each_cpu_wrap(tgt_cpu, _mask,
> - outgoing_channel->target_cpu + 1) {
> - if (tgt_cpu != outgoing_channel->target_cpu) {
> - outgoing_channel =
> - stor_device->stor_chns[tgt_cpu];
> - break;
> +
> + for_each_cpu_wrap(tgt_cpu, _mask, q_num + 1) {
> + if (tgt_cpu == q_num)
> + continue;
> + channel = stor_device->stor_chns[tgt_cpu];
> + if (hv_get_avail_to_write_percent(
> + >outbound)
> + > 

Re: [GIT PULL] chrome-platform updates for v4.17

2018-04-13 Thread Linus Torvalds
On Thu, Apr 12, 2018 at 3:21 PM, Benson Leung  wrote:
>
>   git://git.kernel.org/pub/scm/linux/kernel/git/bleung/chrome-platform.git 
> tags/chrome-platform-for-linus-4.17

Please don't do this to me.

You're sending me a pull request in the second half of the second week
of the merge window, and none of this has been in linux-next as far as
I can tell.

I don't even check linux-next as of when the merge window opened - I
check it as of "one week into the merge window".

Ok, ok, I do in fact find two of the commits in linux-next, but even
they have been rebased for some reason.

The others? I can't find them even in some rebased form.

I'll take this for now, but next time I see this kind of "late in the
merge window, absolutely zero exposure anywhere else", I'll just skip
the whole pull request. Don't do it.

 Linus


Re: [GIT PULL] chrome-platform updates for v4.17

2018-04-13 Thread Linus Torvalds
On Thu, Apr 12, 2018 at 3:21 PM, Benson Leung  wrote:
>
>   git://git.kernel.org/pub/scm/linux/kernel/git/bleung/chrome-platform.git 
> tags/chrome-platform-for-linus-4.17

Please don't do this to me.

You're sending me a pull request in the second half of the second week
of the merge window, and none of this has been in linux-next as far as
I can tell.

I don't even check linux-next as of when the merge window opened - I
check it as of "one week into the merge window".

Ok, ok, I do in fact find two of the commits in linux-next, but even
they have been rebased for some reason.

The others? I can't find them even in some rebased form.

I'll take this for now, but next time I see this kind of "late in the
merge window, absolutely zero exposure anywhere else", I'll just skip
the whole pull request. Don't do it.

 Linus


Re: [PATCH v4 2/2] drivers: soc: Add LLCC driver

2018-04-13 Thread rishabhb

On 2018-04-12 15:02, Evan Green wrote:

Hi Rishabh,

On Tue, Apr 10, 2018 at 1:09 PM Rishabh Bhatnagar 


wrote:


LLCC (Last Level Cache Controller) provides additional cache memory
in the system. LLCC is partitioned into multiple slices and each
slice gets its own priority, size, ID and other config parameters.
LLCC driver programs these parameters for each slice. Clients that
are assigned to use LLCC need to get information such size & ID of the
slice they get and activate or deactivate the slice as needed. LLCC 
driver

provides API for the clients to perform these operations.



Signed-off-by: Channagoud Kadabi 
Signed-off-by: Rishabh Bhatnagar 
---
  drivers/soc/qcom/Kconfig   |  17 ++
  drivers/soc/qcom/Makefile  |   2 +
  drivers/soc/qcom/llcc-sdm845.c | 110 ++
  drivers/soc/qcom/llcc-slice.c  | 404

+

  include/linux/soc/qcom/llcc-qcom.h | 168 +++
  5 files changed, 701 insertions(+)
  create mode 100644 drivers/soc/qcom/llcc-sdm845.c
  create mode 100644 drivers/soc/qcom/llcc-slice.c
  create mode 100644 include/linux/soc/qcom/llcc-qcom.h


[...]

diff --git a/drivers/soc/qcom/llcc-sdm845.c

b/drivers/soc/qcom/llcc-sdm845.c

new file mode 100644
index 000..619b226
--- /dev/null
+++ b/drivers/soc/qcom/llcc-sdm845.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2018, The Linux Foundation. All rights 
reserved.

+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * SCT(System Cache Table) entry contains of the following parameters


contains the following members:

+ * name: Name of the client's use case for which the llcc slice is 
used

+ * uid: Unique id for the client's use case


s/uid/usecase_id/


+ * slice_id: llcc slice id for each client
+ * max_cap: The maximum capacity of the cache slice provided in KB
+ * priority: Priority of the client used to select victim line for

replacement

+ * fixed_size: Determine if the slice has a fixed capacity


"Boolean indicating if the slice has a fixed capacity" might be better

diff --git a/drivers/soc/qcom/llcc-slice.c 
b/drivers/soc/qcom/llcc-slice.c

new file mode 100644
index 000..67a81b0
--- /dev/null
+++ b/drivers/soc/qcom/llcc-slice.c

...

+static int llcc_update_act_ctrl(struct llcc_drv_data *drv, u32 sid,
+   u32 act_ctrl_reg_val, u32 status)
+{
+   u32 act_ctrl_reg;
+   u32 status_reg;
+   u32 slice_status;
+   int ret = 0;
+
+   act_ctrl_reg = drv->bcast_off + LLCC_TRP_ACT_CTRLn(sid);
+   status_reg = drv->bcast_off + LLCC_TRP_STATUSn(sid);
+
+   /*Set the ACTIVE trigger*/


Add spaces around /* */


+   act_ctrl_reg_val |= ACT_CTRL_ACT_TRIG;
+   ret = regmap_write(drv->regmap, act_ctrl_reg, 
act_ctrl_reg_val);

+   if (ret)
+   return ret;
+
+   /* Clear the ACTIVE trigger */
+   act_ctrl_reg_val &= ~ACT_CTRL_ACT_TRIG;
+   ret = regmap_write(drv->regmap, act_ctrl_reg, 
act_ctrl_reg_val);

+   if (ret)
+   return ret;
+
+   ret = regmap_read_poll_timeout(drv->regmap, status_reg,

slice_status,

+   !(slice_status & status), 0,

LLCC_STATUS_READ_DELAY);

+   return ret;
+}
+
+/**
+ * llcc_slice_activate - Activate the llcc slice
+ * @desc: Pointer to llcc slice descriptor
+ *
+ * A value zero will be returned on success and a negative errno will


a value of zero


+ * be returned in error cases
+ */
+int llcc_slice_activate(struct llcc_slice_desc *desc)
+{
+   int ret;
+   u32 act_ctrl_val;
+   struct llcc_drv_data *drv;
+
+   if (desc == NULL)
+   return -EINVAL;


I think we can remove this check, right?


+
+   drv = dev_get_drvdata(desc->dev);
+   if (!drv)
+   return -EINVAL;
+
+   mutex_lock(>lock);
+   if (test_bit(desc->slice_id, drv->bitmap)) {
+   mutex_unlock(>lock);
+   return 0;
+   }
+
+   act_ctrl_val = ACT_CTRL_OPCODE_ACTIVATE << 
ACT_CTRL_OPCODE_SHIFT;

+
+   ret = llcc_update_act_ctrl(drv, desc->slice_id, act_ctrl_val,
+ DEACTIVATE);
+
+   __set_bit(desc->slice_id, drv->bitmap);
+   mutex_unlock(>lock);
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(llcc_slice_activate);
+
+/**
+ * llcc_slice_deactivate - Deactivate the llcc slice
+ * @desc: Pointer to llcc slice descriptor
+ *
+ * A value zero will be returned on success and a negative errno will
+ * be returned in error cases
+ */
+int llcc_slice_deactivate(struct llcc_slice_desc *desc)
+{
+   u32 act_ctrl_val;
+   int ret;
+   struct llcc_drv_data *drv;
+
+   if (desc == NULL)
+   return -EINVAL;
+
+   drv = dev_get_drvdata(desc->dev);
+   if (!drv)
+   return -EINVAL;
+
+   mutex_lock(>lock);
+   if (!test_bit(desc->slice_id, 

Re: [PATCH v4 2/2] drivers: soc: Add LLCC driver

2018-04-13 Thread rishabhb

On 2018-04-12 15:02, Evan Green wrote:

Hi Rishabh,

On Tue, Apr 10, 2018 at 1:09 PM Rishabh Bhatnagar 


wrote:


LLCC (Last Level Cache Controller) provides additional cache memory
in the system. LLCC is partitioned into multiple slices and each
slice gets its own priority, size, ID and other config parameters.
LLCC driver programs these parameters for each slice. Clients that
are assigned to use LLCC need to get information such size & ID of the
slice they get and activate or deactivate the slice as needed. LLCC 
driver

provides API for the clients to perform these operations.



Signed-off-by: Channagoud Kadabi 
Signed-off-by: Rishabh Bhatnagar 
---
  drivers/soc/qcom/Kconfig   |  17 ++
  drivers/soc/qcom/Makefile  |   2 +
  drivers/soc/qcom/llcc-sdm845.c | 110 ++
  drivers/soc/qcom/llcc-slice.c  | 404

+

  include/linux/soc/qcom/llcc-qcom.h | 168 +++
  5 files changed, 701 insertions(+)
  create mode 100644 drivers/soc/qcom/llcc-sdm845.c
  create mode 100644 drivers/soc/qcom/llcc-slice.c
  create mode 100644 include/linux/soc/qcom/llcc-qcom.h


[...]

diff --git a/drivers/soc/qcom/llcc-sdm845.c

b/drivers/soc/qcom/llcc-sdm845.c

new file mode 100644
index 000..619b226
--- /dev/null
+++ b/drivers/soc/qcom/llcc-sdm845.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2018, The Linux Foundation. All rights 
reserved.

+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * SCT(System Cache Table) entry contains of the following parameters


contains the following members:

+ * name: Name of the client's use case for which the llcc slice is 
used

+ * uid: Unique id for the client's use case


s/uid/usecase_id/


+ * slice_id: llcc slice id for each client
+ * max_cap: The maximum capacity of the cache slice provided in KB
+ * priority: Priority of the client used to select victim line for

replacement

+ * fixed_size: Determine if the slice has a fixed capacity


"Boolean indicating if the slice has a fixed capacity" might be better

diff --git a/drivers/soc/qcom/llcc-slice.c 
b/drivers/soc/qcom/llcc-slice.c

new file mode 100644
index 000..67a81b0
--- /dev/null
+++ b/drivers/soc/qcom/llcc-slice.c

...

+static int llcc_update_act_ctrl(struct llcc_drv_data *drv, u32 sid,
+   u32 act_ctrl_reg_val, u32 status)
+{
+   u32 act_ctrl_reg;
+   u32 status_reg;
+   u32 slice_status;
+   int ret = 0;
+
+   act_ctrl_reg = drv->bcast_off + LLCC_TRP_ACT_CTRLn(sid);
+   status_reg = drv->bcast_off + LLCC_TRP_STATUSn(sid);
+
+   /*Set the ACTIVE trigger*/


Add spaces around /* */


+   act_ctrl_reg_val |= ACT_CTRL_ACT_TRIG;
+   ret = regmap_write(drv->regmap, act_ctrl_reg, 
act_ctrl_reg_val);

+   if (ret)
+   return ret;
+
+   /* Clear the ACTIVE trigger */
+   act_ctrl_reg_val &= ~ACT_CTRL_ACT_TRIG;
+   ret = regmap_write(drv->regmap, act_ctrl_reg, 
act_ctrl_reg_val);

+   if (ret)
+   return ret;
+
+   ret = regmap_read_poll_timeout(drv->regmap, status_reg,

slice_status,

+   !(slice_status & status), 0,

LLCC_STATUS_READ_DELAY);

+   return ret;
+}
+
+/**
+ * llcc_slice_activate - Activate the llcc slice
+ * @desc: Pointer to llcc slice descriptor
+ *
+ * A value zero will be returned on success and a negative errno will


a value of zero


+ * be returned in error cases
+ */
+int llcc_slice_activate(struct llcc_slice_desc *desc)
+{
+   int ret;
+   u32 act_ctrl_val;
+   struct llcc_drv_data *drv;
+
+   if (desc == NULL)
+   return -EINVAL;


I think we can remove this check, right?


+
+   drv = dev_get_drvdata(desc->dev);
+   if (!drv)
+   return -EINVAL;
+
+   mutex_lock(>lock);
+   if (test_bit(desc->slice_id, drv->bitmap)) {
+   mutex_unlock(>lock);
+   return 0;
+   }
+
+   act_ctrl_val = ACT_CTRL_OPCODE_ACTIVATE << 
ACT_CTRL_OPCODE_SHIFT;

+
+   ret = llcc_update_act_ctrl(drv, desc->slice_id, act_ctrl_val,
+ DEACTIVATE);
+
+   __set_bit(desc->slice_id, drv->bitmap);
+   mutex_unlock(>lock);
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(llcc_slice_activate);
+
+/**
+ * llcc_slice_deactivate - Deactivate the llcc slice
+ * @desc: Pointer to llcc slice descriptor
+ *
+ * A value zero will be returned on success and a negative errno will
+ * be returned in error cases
+ */
+int llcc_slice_deactivate(struct llcc_slice_desc *desc)
+{
+   u32 act_ctrl_val;
+   int ret;
+   struct llcc_drv_data *drv;
+
+   if (desc == NULL)
+   return -EINVAL;
+
+   drv = dev_get_drvdata(desc->dev);
+   if (!drv)
+   return -EINVAL;
+
+   mutex_lock(>lock);
+   if (!test_bit(desc->slice_id, drv->bitmap)) {
+   mutex_unlock(>lock);
+   return 0;
+   

Re: [GIT PULL] auxdisplay for v4.17-rc1

2018-04-13 Thread Linus Torvalds
On Thu, Apr 12, 2018 at 11:37 AM, Miguel Ojeda
 wrote:
>
> Please pull these fixes and cleanups for auxdisplay.

As far as I can tell, none of this has been in linux-next.

By the end of the merge window, I styart getting a whole lot more anal
about what I pull. In particular, if people send me pull requests
late, I had better get the warm fuzzy feeling of "at least it's been
tested in linux-next".

These may be recently rebased, but there isn't even anything
*non-rebased* in linux-next as of the beginning of this week.

  Linus


Re: [GIT PULL] auxdisplay for v4.17-rc1

2018-04-13 Thread Linus Torvalds
On Thu, Apr 12, 2018 at 11:37 AM, Miguel Ojeda
 wrote:
>
> Please pull these fixes and cleanups for auxdisplay.

As far as I can tell, none of this has been in linux-next.

By the end of the merge window, I styart getting a whole lot more anal
about what I pull. In particular, if people send me pull requests
late, I had better get the warm fuzzy feeling of "at least it's been
tested in linux-next".

These may be recently rebased, but there isn't even anything
*non-rebased* in linux-next as of the beginning of this week.

  Linus


Re: [PATCH v8 15/18] mm, fs, dax: handle layout changes to pinned dax mappings

2018-04-13 Thread Paul E. McKenney
On Fri, Apr 13, 2018 at 03:03:51PM -0700, Dan Williams wrote:
> On Mon, Apr 9, 2018 at 9:51 AM, Dan Williams  wrote:
> > On Mon, Apr 9, 2018 at 9:49 AM, Jan Kara  wrote:
> >> On Sat 07-04-18 12:38:24, Dan Williams wrote:
> > [..]
> >>> I wonder if this can be trivially solved by using srcu. I.e. we don't
> >>> need to wait for a global quiescent state, just a
> >>> get_user_pages_fast() quiescent state. ...or is that an abuse of the
> >>> srcu api?
> >>
> >> Well, I'd rather use the percpu rwsemaphore (linux/percpu-rwsem.h) than
> >> SRCU. It is a more-or-less standard locking mechanism rather than relying
> >> on implementation properties of SRCU which is a data structure protection
> >> method. And the overhead of percpu rwsemaphore for your use case should be
> >> about the same as that of SRCU.
> >
> > I was just about to ask that. Yes, it seems they would share similar
> > properties and it would be better to use the explicit implementation
> > rather than a side effect of srcu.
> 
> ...unfortunately:
> 
>  BUG: sleeping function called from invalid context at
> ./include/linux/percpu-rwsem.h:34
>  [..]
>  Call Trace:
>   dump_stack+0x85/0xcb
>   ___might_sleep+0x15b/0x240
>   dax_layout_lock+0x18/0x80
>   get_user_pages_fast+0xf8/0x140
> 
> ...and thinking about it more srcu is a better fit. We don't need the
> 100% exclusion provided by an rwsem we only need the guarantee that
> all cpus that might have been running get_user_pages_fast() have
> finished it at least once.
> 
> In my tests synchronize_srcu is a bit slower than unpatched for the
> trivial 100 truncate test, but certainly not the 200x latency you were
> seeing with syncrhonize_rcu.
> 
> Elapsed time:
> 0.006149178 unpatched
> 0.009426360 srcu

You might want to try synchronize_srcu_expedited().  Unlike plain RCU,
it does not send IPIs, so should be less controversial.  And it might
well more than make up the performance difference you are seeing above.

Thanx, Paul



Re: [PATCH v8 15/18] mm, fs, dax: handle layout changes to pinned dax mappings

2018-04-13 Thread Paul E. McKenney
On Fri, Apr 13, 2018 at 03:03:51PM -0700, Dan Williams wrote:
> On Mon, Apr 9, 2018 at 9:51 AM, Dan Williams  wrote:
> > On Mon, Apr 9, 2018 at 9:49 AM, Jan Kara  wrote:
> >> On Sat 07-04-18 12:38:24, Dan Williams wrote:
> > [..]
> >>> I wonder if this can be trivially solved by using srcu. I.e. we don't
> >>> need to wait for a global quiescent state, just a
> >>> get_user_pages_fast() quiescent state. ...or is that an abuse of the
> >>> srcu api?
> >>
> >> Well, I'd rather use the percpu rwsemaphore (linux/percpu-rwsem.h) than
> >> SRCU. It is a more-or-less standard locking mechanism rather than relying
> >> on implementation properties of SRCU which is a data structure protection
> >> method. And the overhead of percpu rwsemaphore for your use case should be
> >> about the same as that of SRCU.
> >
> > I was just about to ask that. Yes, it seems they would share similar
> > properties and it would be better to use the explicit implementation
> > rather than a side effect of srcu.
> 
> ...unfortunately:
> 
>  BUG: sleeping function called from invalid context at
> ./include/linux/percpu-rwsem.h:34
>  [..]
>  Call Trace:
>   dump_stack+0x85/0xcb
>   ___might_sleep+0x15b/0x240
>   dax_layout_lock+0x18/0x80
>   get_user_pages_fast+0xf8/0x140
> 
> ...and thinking about it more srcu is a better fit. We don't need the
> 100% exclusion provided by an rwsem we only need the guarantee that
> all cpus that might have been running get_user_pages_fast() have
> finished it at least once.
> 
> In my tests synchronize_srcu is a bit slower than unpatched for the
> trivial 100 truncate test, but certainly not the 200x latency you were
> seeing with syncrhonize_rcu.
> 
> Elapsed time:
> 0.006149178 unpatched
> 0.009426360 srcu

You might want to try synchronize_srcu_expedited().  Unlike plain RCU,
it does not send IPIs, so should be less controversial.  And it might
well more than make up the performance difference you are seeing above.

Thanx, Paul



PRINCE CHARLES CONSULTING/GOOD DAY

2018-04-13 Thread MR PRINCE CHARLES
We are Consultant to a private investor who is willing to invest over a $100 
million in any lucrative Business as long as he is convinced of return of 
investment. Get back to me on (princecharles...@yahoo.com) for onward process 
and partnership.

Regards,

PRINCE & CHARLES CONSULTING LTD
c/o PRINCE & CHARLES Consultant Ltd,
20 Towers Road, Pinner, Middlesex, HA5 4SJ



PRINCE CHARLES CONSULTING/GOOD DAY

2018-04-13 Thread MR PRINCE CHARLES
We are Consultant to a private investor who is willing to invest over a $100 
million in any lucrative Business as long as he is convinced of return of 
investment. Get back to me on (princecharles...@yahoo.com) for onward process 
and partnership.

Regards,

PRINCE & CHARLES CONSULTING LTD
c/o PRINCE & CHARLES Consultant Ltd,
20 Towers Road, Pinner, Middlesex, HA5 4SJ



Re: [PATCH] [RFC][WIP] namespace.c: Allow some unprivileged proc mounts when not fully visible

2018-04-13 Thread Djalal Harouni
On Wed, Apr 4, 2018 at 4:45 PM, Eric W. Biederman  wrote:
[...]
>
> The only option I have seen proposed that might qualify as something
> general purpose and simple is a new filesystem that is just the process
> directories of proc.  As there would in essence be no files that would
> need restrictions it would be safe to allow anyone to mount without
> restriction.
>
Eric, there is a series for this:
https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1533642.html

patch on top for pids:
https://github.com/legionus/linux/commit/993a2a5b9af95b0ac901ff41d32124b72ed676e3

it was reviewed, and suggestions were integrated from Andy and Al Viro
feedback, thanks. It works on Debian, Ubuntu and others, not on Fedora
due to bug with dracut+systemd.

I do not have time to work on it now, anyone can just pick them.

Thanks!


-- 
tixxdz


Re: [PATCH] [RFC][WIP] namespace.c: Allow some unprivileged proc mounts when not fully visible

2018-04-13 Thread Djalal Harouni
On Wed, Apr 4, 2018 at 4:45 PM, Eric W. Biederman  wrote:
[...]
>
> The only option I have seen proposed that might qualify as something
> general purpose and simple is a new filesystem that is just the process
> directories of proc.  As there would in essence be no files that would
> need restrictions it would be safe to allow anyone to mount without
> restriction.
>
Eric, there is a series for this:
https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1533642.html

patch on top for pids:
https://github.com/legionus/linux/commit/993a2a5b9af95b0ac901ff41d32124b72ed676e3

it was reviewed, and suggestions were integrated from Andy and Al Viro
feedback, thanks. It works on Debian, Ubuntu and others, not on Fedora
due to bug with dracut+systemd.

I do not have time to work on it now, anyone can just pick them.

Thanks!


-- 
tixxdz


Re: [PATCH v5 02/10] dt-bindings: introduce RPMH RSC bindings for Qualcomm SoCs

2018-04-13 Thread Stephen Boyd
Quoting Lina Iyer (2018-04-11 14:24:31)
> On Wed, Apr 11 2018 at 09:29 -0600, Stephen Boyd wrote:
> >Quoting Lina Iyer (2018-04-09 09:08:00)
> >> On Fri, Apr 06 2018 at 19:14 -0600, Stephen Boyd wrote:
> >> >Quoting Lina Iyer (2018-04-05 09:18:26)
> >> >> diff --git a/Documentation/devicetree/bindings/soc/qcom/rpmh-rsc.txt 
> >> >> b/Documentation/devicetree/bindings/soc/qcom/rpmh-rsc.txt
> >> >> new file mode 100644
> >> >> index ..dcf71a5b302f
> >> >> --- /dev/null
> >> >> +++ b/Documentation/devicetree/bindings/soc/qcom/rpmh-rsc.txt
> >> >> @@ -0,0 +1,127 @@
> >> >> +
> >> >> +Example 1:
> >> >> +
> >> >> +For a TCS whose RSC base address is is 0x179C and is at a DRV id 
> >> >> of 2, the
> >> >> +register offsets for DRV2 start at 0D00, the register calculations are 
> >> >> like
> >> >> +this -
> >> >> +First tuple: 0x179C + 0x1 * 2 = 0x179E
> >> >> +Second tuple: 0x179E + 0xD00 = 0x179E0D00
> >> >> +
> >> >> +   apps_rsc: rsc@179e000 {
> >> >> +   label = "apps_rsc";
> >> >> +   compatible = "qcom,rpmh-rsc";
> >> >> +   reg = <0x179e 0x1>, <0x179e0d00 0x3000>;
> >> >
> >> >The first reg property overlaps the second one. Does this second one
> >> >ever move around? I would hardcode it in the driver to be 0xd00 away
> >> >from the drv base instead of specifying it in DT if it's the same all
> >> >the time.
> >> >
> >> >Also, the example shows 0x179c which I guess is the actual beginning
> >> >of the RSC block. So the binding seems to be for one DRV inside of an
> >> >RSC. Can we get the full description of the RSC in the binding instead?
> >> >I imagine that means there's a DRV0,1,2 and those probably have an
> >> >interrupt per each DRV and then a different TCS config per each one too?
> >> >If the binding can describe all of the RSC then we can use different
> >> >DRVs by changing the qcom,drv-id property.
> >> >
> >> >   rsc@179c {
> >> >   compatible = "qcom,rpmh-rsc";
> >> >   reg = <0x179c 0x1>,
> >> > <0x179d 0x1>,
> >> > <0x179e 0x1>;
> >> >   qcom,tcs-offset = <0xd00>;
> >> >   qcom,drv-id = <0/1/2>;
> >> >   interrupts = ,
> >> >,
> >> >;
> >> >   }
> >> >
> >> >This is sort of what I imagine it would look like. I have no idea how
> >> >the tcs config would work unless each DRV has the same TCS config
> >> >though. Otherwise, if each node is for a drv, then I would expect the
> >> >node would be called 'drv' and we wouldn't need the drv-id property and
> >> >the compatible string would say drv instead of rsc?
> >> >
> >> >BTW, what are the other DRVs used for in the apps RSC?
> >> >
> >> The DRV is the voter for an execution environment (Linux, Hypervisor,
> >> ATF) in the RSC. The RSC has a lot of other registers that Linux is not
> >> privy to. They are access restricted.
> >
> >Alright. Well sometimes access restrictions aren't there, so this isn't
> >a good assumption to make.
> >
> >> The memory organization of the RSC
> >> mandates that we know the DRV id to access registers specific to the
> >> DRV.
> >
> >I think qcom,drv-id covers that, no?
> >
> >> Unfortunately, not all RSC have identical DRV configuration and the
> >> register space is also variable depending on the capability of the RSC.
> >> There are functionalities supported by other RSCs in the SoC that are
> >> not supported by the RSC associated with the application processor,
> >> while not many RSCs' support multiple DRVs. Therefore it doesn't benefit
> >> describing the whole RSC as it is not usable from Linux (because of
> >> access restrictions).
> >
> >If we're not describing the whole RSC in the RSC binding then we're not
> >going to get very far. From what I can tell, this binding describes one
> >DRV inside of an RSC instead of the whole RSC. Yes we'll probably never
> >use the ATF part of the RSC in Linux, but we may use the hypervisor part
> >if we use KVM/Xen so the binding should be describing as much as it can
> >about this device in case some software needs to use it.
> >
> The RSC is pretty much this. A set of registers that are RSC specific at
> the address pointed to by the "rsc" reg and the TCS regsiters pointed to
> by the "tcs" reg. You do not want to clobber multiple DRVs into the same
> device node. It will be a lot confusing for the drivers to determine
> which DRV to vote.

Well it seems like an RSC contains many DRVs and those DRVs contain many
TCSes. This is what I get after talking with Bjorn on IRC.

+--+ (0x0)
|  |
|  DRV #0  |
|  |
|--  --| (tcs-offset 

Re: [PATCH v5 02/10] dt-bindings: introduce RPMH RSC bindings for Qualcomm SoCs

2018-04-13 Thread Stephen Boyd
Quoting Lina Iyer (2018-04-11 14:24:31)
> On Wed, Apr 11 2018 at 09:29 -0600, Stephen Boyd wrote:
> >Quoting Lina Iyer (2018-04-09 09:08:00)
> >> On Fri, Apr 06 2018 at 19:14 -0600, Stephen Boyd wrote:
> >> >Quoting Lina Iyer (2018-04-05 09:18:26)
> >> >> diff --git a/Documentation/devicetree/bindings/soc/qcom/rpmh-rsc.txt 
> >> >> b/Documentation/devicetree/bindings/soc/qcom/rpmh-rsc.txt
> >> >> new file mode 100644
> >> >> index ..dcf71a5b302f
> >> >> --- /dev/null
> >> >> +++ b/Documentation/devicetree/bindings/soc/qcom/rpmh-rsc.txt
> >> >> @@ -0,0 +1,127 @@
> >> >> +
> >> >> +Example 1:
> >> >> +
> >> >> +For a TCS whose RSC base address is is 0x179C and is at a DRV id 
> >> >> of 2, the
> >> >> +register offsets for DRV2 start at 0D00, the register calculations are 
> >> >> like
> >> >> +this -
> >> >> +First tuple: 0x179C + 0x1 * 2 = 0x179E
> >> >> +Second tuple: 0x179E + 0xD00 = 0x179E0D00
> >> >> +
> >> >> +   apps_rsc: rsc@179e000 {
> >> >> +   label = "apps_rsc";
> >> >> +   compatible = "qcom,rpmh-rsc";
> >> >> +   reg = <0x179e 0x1>, <0x179e0d00 0x3000>;
> >> >
> >> >The first reg property overlaps the second one. Does this second one
> >> >ever move around? I would hardcode it in the driver to be 0xd00 away
> >> >from the drv base instead of specifying it in DT if it's the same all
> >> >the time.
> >> >
> >> >Also, the example shows 0x179c which I guess is the actual beginning
> >> >of the RSC block. So the binding seems to be for one DRV inside of an
> >> >RSC. Can we get the full description of the RSC in the binding instead?
> >> >I imagine that means there's a DRV0,1,2 and those probably have an
> >> >interrupt per each DRV and then a different TCS config per each one too?
> >> >If the binding can describe all of the RSC then we can use different
> >> >DRVs by changing the qcom,drv-id property.
> >> >
> >> >   rsc@179c {
> >> >   compatible = "qcom,rpmh-rsc";
> >> >   reg = <0x179c 0x1>,
> >> > <0x179d 0x1>,
> >> > <0x179e 0x1>;
> >> >   qcom,tcs-offset = <0xd00>;
> >> >   qcom,drv-id = <0/1/2>;
> >> >   interrupts = ,
> >> >,
> >> >;
> >> >   }
> >> >
> >> >This is sort of what I imagine it would look like. I have no idea how
> >> >the tcs config would work unless each DRV has the same TCS config
> >> >though. Otherwise, if each node is for a drv, then I would expect the
> >> >node would be called 'drv' and we wouldn't need the drv-id property and
> >> >the compatible string would say drv instead of rsc?
> >> >
> >> >BTW, what are the other DRVs used for in the apps RSC?
> >> >
> >> The DRV is the voter for an execution environment (Linux, Hypervisor,
> >> ATF) in the RSC. The RSC has a lot of other registers that Linux is not
> >> privy to. They are access restricted.
> >
> >Alright. Well sometimes access restrictions aren't there, so this isn't
> >a good assumption to make.
> >
> >> The memory organization of the RSC
> >> mandates that we know the DRV id to access registers specific to the
> >> DRV.
> >
> >I think qcom,drv-id covers that, no?
> >
> >> Unfortunately, not all RSC have identical DRV configuration and the
> >> register space is also variable depending on the capability of the RSC.
> >> There are functionalities supported by other RSCs in the SoC that are
> >> not supported by the RSC associated with the application processor,
> >> while not many RSCs' support multiple DRVs. Therefore it doesn't benefit
> >> describing the whole RSC as it is not usable from Linux (because of
> >> access restrictions).
> >
> >If we're not describing the whole RSC in the RSC binding then we're not
> >going to get very far. From what I can tell, this binding describes one
> >DRV inside of an RSC instead of the whole RSC. Yes we'll probably never
> >use the ATF part of the RSC in Linux, but we may use the hypervisor part
> >if we use KVM/Xen so the binding should be describing as much as it can
> >about this device in case some software needs to use it.
> >
> The RSC is pretty much this. A set of registers that are RSC specific at
> the address pointed to by the "rsc" reg and the TCS regsiters pointed to
> by the "tcs" reg. You do not want to clobber multiple DRVs into the same
> device node. It will be a lot confusing for the drivers to determine
> which DRV to vote.

Well it seems like an RSC contains many DRVs and those DRVs contain many
TCSes. This is what I get after talking with Bjorn on IRC.

+--+ (0x0)
|  |
|  DRV #0  |
|  |
|--  --| (tcs-offset 

  1   2   3   4   5   6   7   8   9   10   >