[PATCH] f2fs: check cap_resource only for data blocks

2018-04-20 Thread Jaegeuk Kim
This patch changes the rule to check cap_resource for data blocks, not inode
or node blocks in order to avoid selinux denial.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/f2fs.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9f2bc78a9f5b..8f3ad9662d13 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1611,7 +1611,7 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
 }
 
 static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
-   struct inode *inode)
+   struct inode *inode, bool cap)
 {
if (!inode)
return true;
@@ -1624,7 +1624,7 @@ static inline bool __allow_reserved_blocks(struct 
f2fs_sb_info *sbi,
if (!gid_eq(F2FS_OPTION(sbi).s_resgid, GLOBAL_ROOT_GID) &&
in_group_p(F2FS_OPTION(sbi).s_resgid))
return true;
-   if (capable(CAP_SYS_RESOURCE))
+   if (cap && capable(CAP_SYS_RESOURCE))
return true;
return false;
 }
@@ -1659,7 +1659,7 @@ static inline int inc_valid_block_count(struct 
f2fs_sb_info *sbi,
avail_user_block_count = sbi->user_block_count -
sbi->current_reserved_blocks;
 
-   if (!__allow_reserved_blocks(sbi, inode))
+   if (!__allow_reserved_blocks(sbi, inode, true))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
 
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
@@ -1866,7 +1866,7 @@ static inline int inc_valid_node_count(struct 
f2fs_sb_info *sbi,
valid_block_count = sbi->total_valid_block_count +
sbi->current_reserved_blocks + 1;
 
-   if (!__allow_reserved_blocks(sbi, inode))
+   if (!__allow_reserved_blocks(sbi, inode, false))
valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
 
if (unlikely(valid_block_count > sbi->user_block_count)) {
-- 
2.17.0.484.g0c8726318c-goog



Re: general protection fault in smc_getsockopt

2018-04-20 Thread syzbot

syzbot has found reproducer for the following crash on upstream commit
83beed7b2b26f232d782127792dd0cd4362fdc41 (Fri Apr 20 17:56:32 2018 +)
Merge branch 'fixes' of  
git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal
syzbot dashboard link:  
https://syzkaller.appspot.com/bug?extid=28a2c86cf19c81d871fa


So far this crash happened 59 times on net-next, upstream.
C reproducer: https://syzkaller.appspot.com/x/repro.c?id=6375334488309760
syzkaller reproducer:  
https://syzkaller.appspot.com/x/repro.syz?id=6112997885870080
Raw console output:  
https://syzkaller.appspot.com/x/log.txt?id=5942131738804224
Kernel config:  
https://syzkaller.appspot.com/x/.config?id=1808800213120130118

compiler: gcc (GCC) 8.0.1 20180413 (experimental)

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+28a2c86cf19c81d87...@syzkaller.appspotmail.com
It will help syzbot understand when the bug is fixed.

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 0 PID: 4492 Comm: syzkaller771634 Not tainted 4.17.0-rc1+ #10
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

RIP: 0010:smc_getsockopt+0x8b/0x120 net/smc/af_smc.c:1298
RSP: 0018:8801d90b7cc0 EFLAGS: 00010206
RAX: dc00 RBX:  RCX: 2000
RDX: 0005 RSI: 873e3d16 RDI: 0028
RBP: 8801d90b7cf0 R08: 2040 R09: ed0036a05800
R10: ed0036a05800 R11: 8801b502c003 R12: 8801d90b7d40
R13:  R14: 0008 R15: 2000
FS:  02017880() GS:8801dae0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 2040 CR3: 0001ac552000 CR4: 001406f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 __sys_getsockopt+0x1a5/0x370 net/socket.c:1940
 __do_sys_getsockopt net/socket.c:1951 [inline]
 __se_sys_getsockopt net/socket.c:1948 [inline]
 __x64_sys_getsockopt+0xbe/0x150 net/socket.c:1948
 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x43fcf9
RSP: 002b:7ffcb16b9c58 EFLAGS: 0217 ORIG_RAX: 0037
RAX: ffda RBX: 004002c8 RCX: 0043fcf9
RDX: 0008 RSI:  RDI: 0003
RBP: 006ca018 R08: 2040 R09: 004002c8
R10: 2000 R11: 0217 R12: 00401620
R13: 004016b0 R14:  R15: 
Code: fa 48 c1 ea 03 80 3c 02 00 0f 85 93 00 00 00 48 8b 9b 50 04 00 00 48  
b8 00 00 00 00 00 fc ff df 48 8d 7b 28 48 89 fa 48 c1 ea 03 <80> 3c 02 00  
75 62 48 b8 00 00 00 00 00 fc ff df 4c 8b 63 28 49

RIP: smc_getsockopt+0x8b/0x120 net/smc/af_smc.c:1298 RSP: 8801d90b7cc0
---[ end trace 7e67761582d7c7ee ]---



[PATCH] Revert "drm/sun4i: add lvds mode_valid function"

2018-04-20 Thread megous
From: Ondrej Jirman 

The reverted commit broke LVDS output on TBS A711 Tablet. That tablet
has simple-panel node that has fixed pixel clock-frequency that A83T
SoC used in the tablet can't generate exactly.

Requested rate is 5200 and rounded_rate is calculated as 51857142.
It's close enough for it to work in practice, but with strict check
in the reverted commit, the mode is rejected needlessly in this case.

DT allows to specify a range of values for simple-panel/clock-frequency,
but driver doesn't respect that ATM. Given that TBS A711 is the single
user of sun4i-lvds driver, let's revert that commit for now, until
a better solution for the problem is found.

Also see: https://patchwork.kernel.org/patch/9446385/ for relevant
discussion (or search for "[RFC] drm/sun4i: rgb: Add 5% tolerance
to dot clock frequency check").

Fixes: e4e4b7ad50cf ("drm/sun4i: add lvds mode_valid function")
Reported-by: Ondrej Jirman 
Signed-off-by: Ondrej Jirman 
---
 drivers/gpu/drm/sun4i/sun4i_lvds.c | 55 --
 1 file changed, 55 deletions(-)

diff --git a/drivers/gpu/drm/sun4i/sun4i_lvds.c 
b/drivers/gpu/drm/sun4i/sun4i_lvds.c
index b4c9fbf5..be3f14d7746d 100644
--- a/drivers/gpu/drm/sun4i/sun4i_lvds.c
+++ b/drivers/gpu/drm/sun4i/sun4i_lvds.c
@@ -94,64 +94,9 @@ static void sun4i_lvds_encoder_disable(struct drm_encoder 
*encoder)
}
 }
 
-static enum drm_mode_status sun4i_lvds_encoder_mode_valid(struct drm_encoder 
*crtc,
- const struct 
drm_display_mode *mode)
-{
-   struct sun4i_lvds *lvds = drm_encoder_to_sun4i_lvds(crtc);
-   struct sun4i_tcon *tcon = lvds->tcon;
-   u32 hsync = mode->hsync_end - mode->hsync_start;
-   u32 vsync = mode->vsync_end - mode->vsync_start;
-   unsigned long rate = mode->clock * 1000;
-   long rounded_rate;
-
-   DRM_DEBUG_DRIVER("Validating modes...\n");
-
-   if (hsync < 1)
-   return MODE_HSYNC_NARROW;
-
-   if (hsync > 0x3ff)
-   return MODE_HSYNC_WIDE;
-
-   if ((mode->hdisplay < 1) || (mode->htotal < 1))
-   return MODE_H_ILLEGAL;
-
-   if ((mode->hdisplay > 0x7ff) || (mode->htotal > 0xfff))
-   return MODE_BAD_HVALUE;
-
-   DRM_DEBUG_DRIVER("Horizontal parameters OK\n");
-
-   if (vsync < 1)
-   return MODE_VSYNC_NARROW;
-
-   if (vsync > 0x3ff)
-   return MODE_VSYNC_WIDE;
-
-   if ((mode->vdisplay < 1) || (mode->vtotal < 1))
-   return MODE_V_ILLEGAL;
-
-   if ((mode->vdisplay > 0x7ff) || (mode->vtotal > 0xfff))
-   return MODE_BAD_VVALUE;
-
-   DRM_DEBUG_DRIVER("Vertical parameters OK\n");
-
-   tcon->dclk_min_div = 7;
-   tcon->dclk_max_div = 7;
-   rounded_rate = clk_round_rate(tcon->dclk, rate);
-   if (rounded_rate < rate)
-   return MODE_CLOCK_LOW;
-
-   if (rounded_rate > rate)
-   return MODE_CLOCK_HIGH;
-
-   DRM_DEBUG_DRIVER("Clock rate OK\n");
-
-   return MODE_OK;
-}
-
 static const struct drm_encoder_helper_funcs sun4i_lvds_enc_helper_funcs = {
.disable= sun4i_lvds_encoder_disable,
.enable = sun4i_lvds_encoder_enable,
-   .mode_valid = sun4i_lvds_encoder_mode_valid,
 };
 
 static const struct drm_encoder_funcs sun4i_lvds_enc_funcs = {
-- 
2.17.0



[PATCH] Revert "f2fs: introduce f2fs_set_page_dirty_nobuffer"

2018-04-20 Thread Jaegeuk Kim
This patch reverts copied f2fs_set_page_dirty_nobuffer to use generic function
for stability.

This reverts commit fe76b796fc5194cc3d57265002e3a748566d073f.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/checkpoint.c |  2 +-
 fs/f2fs/data.c   | 33 +
 fs/f2fs/f2fs.h   |  1 -
 fs/f2fs/node.c   |  2 +-
 4 files changed, 3 insertions(+), 35 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 2e23b953d304..8fc55d42ba25 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -384,7 +384,7 @@ static int f2fs_set_meta_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
if (!PageDirty(page)) {
-   f2fs_set_page_dirty_nobuffers(page);
+   __set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
SetPagePrivate(page);
f2fs_trace_pid(page);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d32bf363cb60..5477fc09c3cd 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -19,8 +19,6 @@
 #include 
 #include 
 #include 
-#include 
-#include 
 #include 
 #include 
 
@@ -2471,35 +2469,6 @@ int f2fs_release_page(struct page *page, gfp_t wait)
return 1;
 }
 
-/*
- * This was copied from __set_page_dirty_buffers which gives higher performance
- * in very high speed storages. (e.g., pmem)
- */
-void f2fs_set_page_dirty_nobuffers(struct page *page)
-{
-   struct address_space *mapping = page->mapping;
-   unsigned long flags;
-
-   if (unlikely(!mapping))
-   return;
-
-   spin_lock(&mapping->private_lock);
-   lock_page_memcg(page);
-   SetPageDirty(page);
-   spin_unlock(&mapping->private_lock);
-
-   xa_lock_irqsave(&mapping->i_pages, flags);
-   WARN_ON_ONCE(!PageUptodate(page));
-   account_page_dirtied(page, mapping);
-   radix_tree_tag_set(&mapping->i_pages,
-   page_index(page), PAGECACHE_TAG_DIRTY);
-   xa_unlock_irqrestore(&mapping->i_pages, flags);
-   unlock_page_memcg(page);
-
-   __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-   return;
-}
-
 static int f2fs_set_data_page_dirty(struct page *page)
 {
struct address_space *mapping = page->mapping;
@@ -2523,7 +2492,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
}
 
if (!PageDirty(page)) {
-   f2fs_set_page_dirty_nobuffers(page);
+   __set_page_dirty_nobuffers(page);
update_dirty_page(inode, page);
return 1;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0c72908f3185..9f2bc78a9f5b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2939,7 +2939,6 @@ int f2fs_fiemap(struct inode *inode, struct 
fiemap_extent_info *fieinfo,
u64 start, u64 len);
 bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio);
 bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio);
-void f2fs_set_page_dirty_nobuffers(struct page *page);
 int __f2fs_write_data_pages(struct address_space *mapping,
struct writeback_control *wbc,
enum iostat_type io_type);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index f202398e20ea..ae83ca9d2d31 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1753,7 +1753,7 @@ static int f2fs_set_node_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
if (!PageDirty(page)) {
-   f2fs_set_page_dirty_nobuffers(page);
+   __set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
SetPagePrivate(page);
f2fs_trace_pid(page);
-- 
2.17.0.484.g0c8726318c-goog



Re: general protection fault in smc_setsockopt

2018-04-20 Thread syzbot

syzbot has found reproducer for the following crash on upstream commit
83beed7b2b26f232d782127792dd0cd4362fdc41 (Fri Apr 20 17:56:32 2018 +)
Merge branch 'fixes' of  
git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal
syzbot dashboard link:  
https://syzkaller.appspot.com/bug?extid=9045fc589fcd196ef522


So far this crash happened 124 times on net-next, upstream.
C reproducer: https://syzkaller.appspot.com/x/repro.c?id=6522155797839872
syzkaller reproducer:  
https://syzkaller.appspot.com/x/repro.syz?id=5566093930266624
Raw console output:  
https://syzkaller.appspot.com/x/log.txt?id=6661555940753408
Kernel config:  
https://syzkaller.appspot.com/x/.config?id=1808800213120130118

compiler: gcc (GCC) 8.0.1 20180413 (experimental)

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+9045fc589fcd196ef...@syzkaller.appspotmail.com
It will help syzbot understand when the bug is fixed.

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 1 PID: 4520 Comm: syzkaller696326 Not tainted 4.17.0-rc1+ #10
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

RIP: 0010:smc_setsockopt+0x8b/0x120 net/smc/af_smc.c:1287
RSP: 0018:8801b433fcc8 EFLAGS: 00010206
RAX: dc00 RBX:  RCX: 2000
RDX: 0005 RSI: 873e3bf6 RDI: 0028
RBP: 8801b433fcf8 R08:  R09: ed00359a3780
R10: ed00359a3780 R11: 8801acd1bc03 R12: 8801b433fd40
R13: 0021 R14: 000d R15: 2000
FS:  00b01880() GS:8801daf0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 562e1fa410d0 CR3: 0001acf1e000 CR4: 001406e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 __sys_setsockopt+0x1bd/0x390 net/socket.c:1903
 __do_sys_setsockopt net/socket.c:1914 [inline]
 __se_sys_setsockopt net/socket.c:1911 [inline]
 __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1911
 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x43fd19
RSP: 002b:7ffccc4960f8 EFLAGS: 0217 ORIG_RAX: 0036
RAX: ffda RBX: 004002c8 RCX: 0043fd19
RDX: 000d RSI: 0021 RDI: 0004
RBP: 006ca018 R08:  R09: 004002c8
R10: 2000 R11: 0217 R12: 00401640
R13: 004016d0 R14:  R15: 
Code: fa 48 c1 ea 03 80 3c 02 00 0f 85 93 00 00 00 48 8b 9b 50 04 00 00 48  
b8 00 00 00 00 00 fc ff df 48 8d 7b 28 48 89 fa 48 c1 ea 03 <80> 3c 02 00  
75 62 48 b8 00 00 00 00 00 fc ff df 4c 8b 63 28 49

RIP: smc_setsockopt+0x8b/0x120 net/smc/af_smc.c:1287 RSP: 8801b433fcc8
---[ end trace 3858d0cd9ce5e4d4 ]---



loan offer

2018-04-20 Thread info
DO YOU NEED FINANCIAL HELP? $5000 to $20,000,000.00 No credit check 

Repaid over 1 year to maximum of 30 years at a low interest rate of 3%. 

Approval in 15-30 minutes Open 7 days a week from 24/7 Service available 

nationwide E-MAIL  globalsolutionfinanc...@yahoo.com


[PATCH] staging: android: ion: remove duplicate buffer field initializes

2018-04-20 Thread Todd Poynor
As a result of various previous patches, ion_buffer_create() now
has two sets of identical statements for initializing two fields of
the buffer struct, next to each other.  Remove one set.

Move the initialization of these two fields together with the
statements that initialize the other two fields from the function
parameters, prior to the heap allocate() call, for consistency.

Signed-off-by: Todd Poynor 
---
 drivers/staging/android/ion/ion.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/android/ion/ion.c 
b/drivers/staging/android/ion/ion.c
index e74db7902549..269a431646be 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -74,6 +74,8 @@ static struct ion_buffer *ion_buffer_create(struct ion_heap 
*heap,
 
buffer->heap = heap;
buffer->flags = flags;
+   buffer->dev = dev;
+   buffer->size = len;
 
ret = heap->ops->allocate(heap, buffer, len, flags);
 
@@ -93,11 +95,7 @@ static struct ion_buffer *ion_buffer_create(struct ion_heap 
*heap,
goto err1;
}
 
-   buffer->dev = dev;
-   buffer->size = len;
 
-   buffer->dev = dev;
-   buffer->size = len;
INIT_LIST_HEAD(&buffer->attachments);
mutex_init(&buffer->lock);
mutex_lock(&dev->buffer_lock);
-- 
2.17.0.484.g0c8726318c-goog



[no subject]

2018-04-20 Thread Maratovich Fridman


I Mikhail Fridman. has selected you specially as one of my beneficiaries
for my Charitable Donation, Just as I have declared on May 23, 2016 to give
my fortune as charity.

Check the link below for confirmation:

http://www.ibtimes.co.uk/russias-second-wealthiest-man-mikhail-fridman-plans-leaving-14-2bn-fortune-charity-1561604

Reply as soon as possible with further directives.

Best Regards,
Mikhail Fridman.


[no subject]

2018-04-20 Thread Fridman M. Maratovich


I Mikhail Fridman. has selected you specially as one of my beneficiaries
for my Charitable Donation, Just as I have declared on May 23, 2016 to give
my fortune as charity.

Check the link below for confirmation:

http://www.ibtimes.co.uk/russias-second-wealthiest-man-mikhail-fridman-plans-leaving-14-2bn-fortune-charity-1561604

Reply as soon as possible with further directives.

Best Regards,
Mikhail Fridman.


Re: [RFC PATCH] dt-bindings: add a jsonschema binding example

2018-04-20 Thread Rob Herring
On Fri, Apr 20, 2018 at 6:41 PM, Stephen Boyd  wrote:
> Quoting Rob Herring (2018-04-20 11:15:04)
>> On Fri, Apr 20, 2018 at 11:47 AM, Stephen Boyd  wrote:
>> > Quoting Rob Herring (2018-04-18 15:29:05)
>> >> diff --git a/Documentation/devicetree/bindings/example-schema.yaml 
>> >> b/Documentation/devicetree/bindings/example-schema.yaml
>> >> new file mode 100644
>> >> index ..fe0a3bd1668e
>> >> --- /dev/null
>> >> +++ b/Documentation/devicetree/bindings/example-schema.yaml
>> >> @@ -0,0 +1,149 @@
>> >> +
>> >> +  The end of the description is marked by indentation less than the 
>> >> first line
>> >> +  in the description.
>> >> +
>> >> +select: false
>> >> +  # 'select' is a schema applied to a DT node to determine if this 
>> >> binding
>> >> +  # schema should be applied to the node. It is optional and by default 
>> >> the
>> >> +  # possible compatible strings are extracted and used to match.
>> >
>> > Can we get a concrete example here?
>>
>> select: true
>>
>> :) Which is apply to every node.
>>
>> A better one is from the memory node schema ('$nodename' gets added :
>>
>> select:
>>   required: ["$nodename"]
>>   properties:
>> $nodename:
>>   oneOf:
>> - pattern: "^memory@[0-9a-f]*"
>> - const: "memory" # 'memory' only allowed for selecting
>>
>>
>> I expect the vast majority of device bindings will not use select at
>> all and rely on compatible string matching.
>
> Thanks! I was looking to see how the select syntax would work and this
> shows one example nicely. I suppose another way would be to show how a
> compatible string would be matched through select, even though it's
> redundant.
>
> Is there a way we can enforce node names through the schema too? For
> example to enforce that a clock controller is called 'clock-controller'
> or a spi master is called 'spi'.

Yes, that's something I'd like to do. I think the easiest is to just
treat node name as a property. We already generate a $nodename
property when parsing the yaml format DT.

>>
>> >> +
>> >> +properties:
>> > [...]
>> >> +
>> >> +  interrupts:
>> >> +# Either 1 or 2 interrupts can be present
>> >> +minItems: 1
>> >> +maxItems: 2
>> >> +items:
>> >> +  - description: tx or combined interrupt
>> >> +  - description: rx interrupt
>> >> +
>> >> +description: |
>> >
>> > The '|' is needed to make yaml happy?
>>
>> Yes, this is simply how you do literal text blocks in yaml.
>>
>> We don't really need for this one really, but for the top-level
>> 'description' we do. The long term intent is 'description' would be
>> written in sphinx/rst and can be extracted into the DT spec (for
>> common bindings). Grant has experimented with that some.
>
> Ok. That sounds cool. Then we could embed links to datasheets and SVGs
> too.
>
>>
>> >> +  A variable number of interrupts warrants a description of what 
>> >> conditions
>> >> +  affect the number of interrupts. Otherwise, descriptions on 
>> >> standard
>> >> +  properties are not necessary.
>> >> +
>> >> +  interrupt-names:
>> >> +# minItems must be specified here because the default would be 2
>> >> +minItems: 1
>> >> +items:
>> >> +  - const: "tx irq"
>> >> +  - const: "rx irq"
>> >> +
>> >> +  # Property names starting with '#' must be quoted
>> >> +  '#interrupt-cells':
>> >> +# A simple case where the value must always be '2'.
>> >> +# The core schema handles that this must be a single integer.
>> >> +const: 2
>> >> +
>> >> +  interrupt-controller: {}
>> >
>> > Does '{}' mean nothing to see here?
>>
>> Yes. It's just an empty schema that's always valid.
>
> Could we include another schema to indicate that this is an interrupt
> controller? I'm sort of asking for multi-schema inheritance.

Yes, but there's no need to do that here. Another schema can select on
"interrupt-controller" property and be applied independently. There's
already an example of that for the root node in my yaml-bindings repo.

Rob


Re: [RFC PATCH] dt-bindings: add a jsonschema binding example

2018-04-20 Thread Rob Herring
On Fri, Apr 20, 2018 at 4:00 PM, Frank Rowand  wrote:
> Hi Rob,
>
> Thanks for the example.  It was a good starting tutorial of sorts for me
> to understand the format a bit.
>
>
> On 04/18/18 15:29, Rob Herring wrote:
>> The current DT binding documentation format of freeform text is painful
>> to write, review, validate and maintain.
>>
>> This is just an example of what a binding in the schema format looks
>> like. It's using jsonschema vocabulary in a YAML encoded document. Using
>> jsonschema gives us access to existing tooling. A YAML encoding gives us
>> something easy to edit.
>>
>> This example is just the tip of the iceberg, but it the part most
>> developers writing bindings will interact with. Backing all this up
>> are meta-schema (to validate the binding schemas), some DT core schema,
>> YAML encoded DT output with dtc, and a small number of python scripts to
>> run validation. The gory details including how to run end-to-end
>> validation can be found here:
>>
>> https://www.spinics.net/lists/devicetree-spec/msg00649.html
>>
>> Signed-off-by: Rob Herring 
>> ---
>> Cc list,
>> You all review and/or write lots of binding documents. I'd like some feedback
>> on the format.
>>
>> Thanks,
>> Rob
>>
>>  .../devicetree/bindings/example-schema.yaml| 149 
>> +
>>  1 file changed, 149 insertions(+)
>>  create mode 100644 Documentation/devicetree/bindings/example-schema.yaml
>>
>> diff --git a/Documentation/devicetree/bindings/example-schema.yaml 
>> b/Documentation/devicetree/bindings/example-schema.yaml
>> new file mode 100644
>> index ..fe0a3bd1668e
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/example-schema.yaml
>
> I'm guessing by the path name that this is in the Linux kernel source tree.

Yes, well, my kernel tree. Most of the work still lives here:

https://github.com/robherring/yaml-bindings/

>> @@ -0,0 +1,149 @@
>> +# SPDX-License-Identifier: BSD-2-Clause
>
> If in the Linux kernel source tree, then allow gpl-v2 as a possible license.

Why? BSD is compatible. The license of the above repo is all BSD.

Of course there's all the existing docs which default to GPLv2 and
we'll probably have to maintain that.

>> +# Copyright 2018 Linaro Ltd.
>> +%YAML 1.2
>> +---
>> +# All the top-level keys are standard json-schema keywords except for
>> +# 'maintainers' and 'select'
>> +
>> +# $id is a unique idenifier based on the filename
>
>  ^  identifier
>
>> +$id: "http://devicetree.org/schemas/example-schema.yaml#";
>
> Does this imply that all schemas will be at devicetree.org instead
> of in the Linux kernel source tree?  This would be counter to my
> earlier guess about where this patch is applied.

They could be, but not necessarily. This is just convention in
jsonschema is the best I understand it.

I don't think you'd want validation to require an internet connection.
For the base meta-schema, for example, it does exist at
http://json-schema.org/draft-06/schema, but that's also distributed
with implementations of jsonschema validators.

A large part (not that any part is large) of the tools Grant and I
have written is doing the cross reference resolution of files which
uses the $id field.


>> +$schema: "http://devicetree.org/meta-schemas/core.yaml#";
>
> How is $schema used?

Tells the validator what meta-schema this schema follows. Typically
you see draft04 or draft06 here if you haven't written a meta-schema.

> Is it accessed across the network?

Could be, but generally no.


>> +
>> +# Only 1 version supported for now
>> +version: 1
>> +
>> +title: An example schema annotated with jsonschema details
>> +
>> +maintainers:
>> +  - Rob Herring 
>> +
>> +description: |
>> +  A more detailed multi-line description of the binding.
>> +
>> +  Details about the hardware device and any links to datasheets can go here.
>> +
>> +  The end of the description is marked by indentation less than the first 
>> line
>> +  in the description.
>> +
>> +select: false
>> +  # 'select' is a schema applied to a DT node to determine if this binding
>> +  # schema should be applied to the node. It is optional and by default the
>> +  # possible compatible strings are extracted and used to match.
>
> My first reaction was that 'node' should somehow be included in the name
> of 'select'.  But my second thought was that maybe 'node' is implied,
> because every schema file describes a single node???  This is where my
> lack of knowledge kicks in - I'll go read stuff in your yaml-bindings
> repo to get a better background...

Yes, schema are applied to nodes and most of the current uses of
select are based on node name. We walk the DT and apply all the schema
for a node that select (either in the doc or generated from
compatible) is valid.


>> +
>> +properties:
>> +  # A dictionary of DT properties for this binding schema
>> +  compatible:
>> +# More complicated schema can use oneOf (XOR), anyOf (OR), or allOf 
>> (AND)
>> +# to h

Re: [PATCH 5/5] x86, pti: filter at vma->vm_page_prot population

2018-04-20 Thread Nadav Amit
Dave Hansen  wrote:

> 
> From: Dave Hansen 
> 
> 0day reported warnings at boot on 32-bit systems without NX support:
> 
> [   12.349193] attempted to set unsupported pgprot: 8025 bits: 
> 8000 supported: 7fff
> [   12.350792] WARNING: CPU: 0 PID: 1 at arch/x86/include/asm/pgtable.h:540 
> handle_mm_fault+0xfc1/0xfe0:
>   check_pgprot at 
> arch/x86/include/asm/pgtable.h:535
>(inlined by) pfn_pte at 
> arch/x86/include/asm/pgtable.h:549
>(inlined by) do_anonymous_page 
> at mm/memory.c:3169
>(inlined by) handle_pte_fault 
> at mm/memory.c:3961
>(inlined by) __handle_mm_fault 
> at mm/memory.c:4087
>(inlined by) handle_mm_fault 
> at mm/memory.c:4124
> 
> The problem was that we stopped massaging page permissions at PTE creation
> time, so vma->vm_page_prot was passed unfiltered to PTE creation.
> 
> To fix it, filter the page protections before they are installed in
> vma->vm_page_prot.
> 
> Signed-off-by: Dave Hansen 
> Reported-by: Fengguang Wu 
> Fixes: fb43d6cb91 ("x86/mm: Do not auto-massage page protections")
> Cc: Andrea Arcangeli 
> Cc: Andy Lutomirski 
> Cc: Arjan van de Ven 
> Cc: Borislav Petkov 
> Cc: Dan Williams 
> Cc: David Woodhouse 
> Cc: Greg Kroah-Hartman 
> Cc: Hugh Dickins 
> Cc: Josh Poimboeuf 
> Cc: Juergen Gross 
> Cc: Kees Cook 
> Cc: Linus Torvalds 
> Cc: Nadav Amit 
> Cc: Peter Zijlstra 
> Cc: Thomas Gleixner 
> Cc: linux...@kvack.org
> Cc: Ingo Molnar 
> ---
> 
> b/arch/x86/Kconfig   |4 
> b/arch/x86/include/asm/pgtable.h |5 +
> b/mm/mmap.c  |   11 ++-
> 3 files changed, 19 insertions(+), 1 deletion(-)
> 
> diff -puN arch/x86/include/asm/pgtable.h~pti-glb-protection_map 
> arch/x86/include/asm/pgtable.h
> --- a/arch/x86/include/asm/pgtable.h~pti-glb-protection_map   2018-04-20 
> 14:10:08.251749151 -0700
> +++ b/arch/x86/include/asm/pgtable.h  2018-04-20 14:10:08.260749151 -0700
> @@ -601,6 +601,11 @@ static inline pgprot_t pgprot_modify(pgp
> 
> #define canon_pgprot(p) __pgprot(massage_pgprot(p))
> 
> +static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
> +{
> + return canon_pgprot(prot);
> +}
> +
> static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
>enum page_cache_mode pcm,
>enum page_cache_mode new_pcm)
> diff -puN arch/x86/Kconfig~pti-glb-protection_map arch/x86/Kconfig
> --- a/arch/x86/Kconfig~pti-glb-protection_map 2018-04-20 14:10:08.253749151 
> -0700
> +++ b/arch/x86/Kconfig2018-04-20 14:10:08.260749151 -0700
> @@ -52,6 +52,7 @@ config X86
>   select ARCH_HAS_DEVMEM_IS_ALLOWED
>   select ARCH_HAS_ELF_RANDOMIZE
>   select ARCH_HAS_FAST_MULTIPLIER
> + select ARCH_HAS_FILTER_PGPROT
>   select ARCH_HAS_FORTIFY_SOURCE
>   select ARCH_HAS_GCOV_PROFILE_ALL
>   select ARCH_HAS_KCOVif X86_64
> @@ -273,6 +274,9 @@ config ARCH_HAS_CPU_RELAX
> config ARCH_HAS_CACHE_LINE_SIZE
>   def_bool y
> 
> +config ARCH_HAS_FILTER_PGPROT
> + def_bool y
> +
> config HAVE_SETUP_PER_CPU_AREA
>   def_bool y
> 
> diff -puN mm/mmap.c~pti-glb-protection_map mm/mmap.c
> --- a/mm/mmap.c~pti-glb-protection_map2018-04-20 14:10:08.256749151 
> -0700
> +++ b/mm/mmap.c   2018-04-20 14:10:08.261749151 -0700
> @@ -100,11 +100,20 @@ pgprot_t protection_map[16] __ro_after_i
>   __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
> };
> 
> +#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
> +static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
> +{
> + return prot;
> +}
> +#endif
> +
> pgprot_t vm_get_page_prot(unsigned long vm_flags)
> {
> - return __pgprot(pgprot_val(protection_map[vm_flags &
> + pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
>   (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
>   pgprot_val(arch_vm_get_page_prot(vm_flags)));
> +
> + return arch_filter_pgprot(ret);
> }
> EXPORT_SYMBOL(vm_get_page_prot);

Wouldn’t it be simpler or at least cleaner to change the protection map if
NX is not supported? I presume it can be done paging_init() similarly to the
way other archs (e.g., arm, mips) do.



RE: [RESEND PATCH 1/1] drm/i915/glk: Add MODULE_FIRMWARE for Geminilake

2018-04-20 Thread Botello Ortega, Luis
Hi all:

We tested GLK DMC 1.04 FW in last week of September 2017, using the latest 
drm-tip version for that time (4.14.0-rc2) and according to our results we 
could declare this FW as acceptable and healthy to be used with kernel version 
4.14 . 
However, we cannot guarantee quality and healthy of this FW if it is used in 
top of current drm-tip kernel (4.17-rc0).

Best Regards
Luis Botello


-Original Message-
From: Srivatsa, Anusha 
Sent: Friday, April 20, 2018 1:30 PM
To: Vivi, Rodrigo ; Jani Nikula 
; Botello Ortega, Luis 
; Martinez Monroy, Elio 

Cc: Ian W MORRISON ; airl...@linux.ie; Greg KH 
; intel-...@lists.freedesktop.org; 
linux-kernel@vger.kernel.org; sta...@vger.kernel.org; 
dri-de...@lists.freedesktop.org; Wajdeczko, Michal 
Subject: RE: [RESEND PATCH 1/1] drm/i915/glk: Add MODULE_FIRMWARE for Geminilake



>-Original Message-
>From: Vivi, Rodrigo
>Sent: Friday, April 20, 2018 11:04 AM
>To: Jani Nikula 
>Cc: Srivatsa, Anusha ; Ian W MORRISON 
>; airl...@linux.ie; Greg KH 
>; intel-...@lists.freedesktop.org; linux- 
>ker...@vger.kernel.org; sta...@vger.kernel.org; dri- 
>de...@lists.freedesktop.org; Wajdeczko, Michal 
>
>Subject: Re: [RESEND PATCH 1/1] drm/i915/glk: Add MODULE_FIRMWARE for 
>Geminilake
>
>On Tue, Apr 17, 2018 at 12:02:52PM +0300, Jani Nikula wrote:
>> On Mon, 16 Apr 2018, "Srivatsa, Anusha"  wrote:
>> >>-Original Message-
>> >>From: Jani Nikula [mailto:jani.nik...@linux.intel.com]
>> >>Sent: Wednesday, April 11, 2018 5:27 AM
>> >>To: Ian W MORRISON 
>> >>Cc: Vivi, Rodrigo ; Srivatsa, Anusha 
>> >>; Wajdeczko, Michal 
>> >>; Greg KH ; 
>> >>airl...@linux.ie; joonas.lahti...@linux.intel.com; 
>> >>linux-kernel@vger.kernel.org; sta...@vger.kernel.org; 
>> >>intel-...@lists.freedesktop.org; dri- de...@lists.freedesktop.org
>> >>Subject: Re: [RESEND PATCH 1/1] drm/i915/glk: Add MODULE_FIRMWARE 
>> >>for Geminilake
>> >>
>> >>On Wed, 11 Apr 2018, Ian W MORRISON  wrote:
>> >>> 
>> >>>
>> 
>>  NAK on indiscriminate Cc: stable. There are zero guarantees that 
>>  older kernels will work with whatever firmware you throw at them.
>> 
>> >>>
>> >>> I included 'Cc: stable' so the patch would get added to the v4.16 
>> >>> and
>> >>> v4.15 kernels which I have tested with the patch. I found that 
>> >>> earlier kernels didn't support the 'linux-firmware' package 
>> >>> required to get wifi working on Intel's new Gemini Lake NUC.
>> >>
>> >>You realize that this patch should have nothing to do with wifi?
>> >>
>> >>Rodrigo, Anusha, if you think Cc: stable is appropriate, please 
>> >>indicate the specific versions of stable it is appropriate for.
>> >
>> > Hi Jani,
>> >
>> > The stable kernel version is 4.12 and beyond.
>> > It is appropriate to add the CC: stable in my opinion
>>
>> Who tested the firmware with v4.12 and later? We only have the CI 
>> results against *current* drm-tip. We don't even know about v4.16.
>>
>
>I understand your concerns, but the problem was that our old process 
>was a bit
>(lot?) messed and there was the unreliable time until the firmware 
>really lands on linux-firmware.git. So MODULE_FIRMWARE call was only 
>added after firmware was really there on firmware repository but it wasn't 
>about the testing.
>
>In other words, the bump version patch was merged after tested, but 
>MODULE_FIRMWARE was left behind because firmware blob took a while to 
>get pulled into linux-firmware.git and we end up forgetting to add it there.
>
>In my opinion it should be safe to add the MODULE_FIRMWARE there based 
>on the tests from when the version was bumped.

Luis, Elio, can you guys confirm that this firmware is tested and healthy? And 
also, give a tested-by to this patch please?

Thanks,
Anusha 
>> I'm not going to ack and take responsibility for the stable backports 
>> unless someone actually comes forward with credible Tested-bys.
>>
>> BR,
>> Jani.
>>
>>
>> >
>> > Anusha
>> >>BR,
>> >>Jani.
>> >>
>> >>>
>> 
>>  PS. How is this a "RESEND"? I haven't seen this before.
>> 
>> >>>
>> >>> It is a 'RESEND' for that very reason. I initially sent the patch 
>> >>> to the same people as a similar patch
>> >>> (https://patchwork.kernel.org/patch/10143637/) however after 
>> >>> realising this omitted required addresses I added them and resent 
>> >>> the
>patch.
>> >>>
>> >>> Best regards,
>> >>> Ian
>> >>
>> >>--
>> >>Jani Nikula, Intel Open Source Technology Center
>>
>> --
>> Jani Nikula, Intel Open Source Technology Center 
>> ___
>> dri-devel mailing list
>> dri-de...@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 02/13] staging: iio: tsl2x7x: use GPL-2.0+ SPDX license identifier

2018-04-20 Thread Brian Masney
The summary text for the GPL is not needed since the SPDX identifier
is a legally binding shorthand that can be used instead.

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 10 +-
 drivers/staging/iio/light/tsl2x7x.h | 14 +-
 2 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index eeccfbb0eb1f..9cdcc8c9e812 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -5,15 +5,7 @@
  * Copyright (c) 2012, TAOS Corporation.
  * Copyright (c) 2017-2018 Brian Masney 
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
+ * SPDX-License-Identifier: GPL-2.0+
  */
 
 #include 
diff --git a/drivers/staging/iio/light/tsl2x7x.h 
b/drivers/staging/iio/light/tsl2x7x.h
index d382cdbb976e..992ee2465609 100644
--- a/drivers/staging/iio/light/tsl2x7x.h
+++ b/drivers/staging/iio/light/tsl2x7x.h
@@ -4,19 +4,7 @@
  *
  * Copyright (c) 2012, TAOS Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * SPDX-License-Identifier: GPL-2.0+
  */
 
 #ifndef __TSL2X7X_H
-- 
2.14.3



[PATCH 03/13] staging: iio: tsl2x7x: don't return error in IRQ handler

2018-04-20 Thread Brian Masney
tsl2x7x_event_handler() could return an error and this could cause the
interrupt to remain masked. We shouldn't return an error in the
interrupt handler so this patch always returns IRQ_HANDLED. An error
will be logged if one occurs.

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index 9cdcc8c9e812..95a00b965c5e 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -1320,7 +1320,7 @@ static irqreturn_t tsl2x7x_event_handler(int irq, void 
*private)
 
ret = tsl2x7x_read_status(chip);
if (ret < 0)
-   return ret;
+   return IRQ_HANDLED;
 
/* What type of interrupt do we need to process */
if (ret & TSL2X7X_STA_PRX_INTR) {
@@ -1341,9 +1341,7 @@ static irqreturn_t tsl2x7x_event_handler(int irq, void 
*private)
   timestamp);
}
 
-   ret = tsl2x7x_clear_interrupts(chip, TSL2X7X_CMD_PROXALS_INT_CLR);
-   if (ret < 0)
-   return ret;
+   tsl2x7x_clear_interrupts(chip, TSL2X7X_CMD_PROXALS_INT_CLR);
 
return IRQ_HANDLED;
 }
-- 
2.14.3



[PATCH 04/13] staging: iio: tsl2x7x: simplify tsl2x7x_clear_interrupts function

2018-04-20 Thread Brian Masney
tsl2x7x_clear_interrupts() takes a reg argument but there are only
two callers to this function and both callers pass the same value.
Since this function was introduced, interrupts are now working
properly for this driver, and several unnecessary calls to
tsl2x7x_clear_interrupts() were removed. This patch removes the
tsl2x7x_clear_interrupts() function and replaces the two callers
with the i2c_smbus_write_byte() call instead.

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 32 +++-
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index 95a00b965c5e..f37fc74b8fbc 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -271,20 +271,6 @@ static const u8 device_channel_config[] = {
ALSPRX2
 };
 
-static int tsl2x7x_clear_interrupts(struct tsl2X7X_chip *chip, int reg)
-{
-   int ret;
-
-   ret = i2c_smbus_write_byte(chip->client,
-  TSL2X7X_CMD_REG | TSL2X7X_CMD_SPL_FN | reg);
-   if (ret < 0)
-   dev_err(&chip->client->dev,
-   "%s: failed to clear interrupt status %x: %d\n",
-   __func__, reg, ret);
-
-   return ret;
-}
-
 static int tsl2x7x_read_status(struct tsl2X7X_chip *chip)
 {
int ret;
@@ -714,9 +700,15 @@ static int tsl2x7x_chip_on(struct iio_dev *indio_dev)
if (ret < 0)
return ret;
 
-   ret = tsl2x7x_clear_interrupts(chip, TSL2X7X_CMD_PROXALS_INT_CLR);
-   if (ret < 0)
+   ret = i2c_smbus_write_byte(chip->client,
+  TSL2X7X_CMD_REG | TSL2X7X_CMD_SPL_FN |
+  TSL2X7X_CMD_PROXALS_INT_CLR);
+   if (ret < 0) {
+   dev_err(&chip->client->dev,
+   "%s: failed to clear interrupt status: %d\n",
+   __func__, ret);
return ret;
+   }
 
chip->tsl2x7x_chip_status = TSL2X7X_CHIP_WORKING;
 
@@ -1341,7 +1333,13 @@ static irqreturn_t tsl2x7x_event_handler(int irq, void 
*private)
   timestamp);
}
 
-   tsl2x7x_clear_interrupts(chip, TSL2X7X_CMD_PROXALS_INT_CLR);
+   ret = i2c_smbus_write_byte(chip->client,
+  TSL2X7X_CMD_REG | TSL2X7X_CMD_SPL_FN |
+  TSL2X7X_CMD_PROXALS_INT_CLR);
+   if (ret < 0)
+   dev_err(&chip->client->dev,
+   "%s: failed to clear interrupt status: %d\n",
+   __func__, ret);
 
return IRQ_HANDLED;
 }
-- 
2.14.3



[PATCH 05/13] staging: iio: tsl2x7x: remove unnecessary chip status checks in suspend/resume

2018-04-20 Thread Brian Masney
tsl2x7x_suspend() and tsl2x7x_resume() both check to see what the
current chip status is. These checks are not necessary so this patch
removes those checks.

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 16 ++--
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index f37fc74b8fbc..8d8af0cf9768 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -1682,27 +1682,15 @@ static int tsl2x7x_probe(struct i2c_client *clientp,
 static int tsl2x7x_suspend(struct device *dev)
 {
struct iio_dev *indio_dev = dev_get_drvdata(dev);
-   struct tsl2X7X_chip *chip = iio_priv(indio_dev);
-   int ret = 0;
-
-   if (chip->tsl2x7x_chip_status == TSL2X7X_CHIP_WORKING) {
-   ret = tsl2x7x_chip_off(indio_dev);
-   chip->tsl2x7x_chip_status = TSL2X7X_CHIP_SUSPENDED;
-   }
 
-   return ret;
+   return tsl2x7x_chip_off(indio_dev);
 }
 
 static int tsl2x7x_resume(struct device *dev)
 {
struct iio_dev *indio_dev = dev_get_drvdata(dev);
-   struct tsl2X7X_chip *chip = iio_priv(indio_dev);
-   int ret = 0;
 
-   if (chip->tsl2x7x_chip_status == TSL2X7X_CHIP_SUSPENDED)
-   ret = tsl2x7x_chip_on(indio_dev);
-
-   return ret;
+   return tsl2x7x_chip_on(indio_dev);
 }
 
 static int tsl2x7x_remove(struct i2c_client *client)
-- 
2.14.3



[PATCH 08/13] staging: iio: tsl2x7x: add range checking to three sysfs attributes

2018-04-20 Thread Brian Masney
The sysfs attributes in_illuminance0_target_input,
in_illuminance0_calibrate, and in_proximity0_calibrate did not have
proper range checking in place so this patch adds the correct range
checks.

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 25 +++--
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index 56730baea927..15bc0af1bb6c 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -835,9 +835,10 @@ static ssize_t in_illuminance0_target_input_store(struct 
device *dev,
if (kstrtoul(buf, 0, &value))
return -EINVAL;
 
-   if (value)
-   chip->settings.als_cal_target = value;
+   if (value < 0 || value > 65535)
+   return -ERANGE;
 
+   chip->settings.als_cal_target = value;
ret = tsl2x7x_invoke_change(indio_dev);
if (ret < 0)
return ret;
@@ -853,14 +854,12 @@ static ssize_t in_illuminance0_calibrate_store(struct 
device *dev,
bool value;
int ret;
 
-   if (strtobool(buf, &value))
+   if (kstrtobool(buf, &value) || !value)
return -EINVAL;
 
-   if (value) {
-   ret = tsl2x7x_als_calibrate(indio_dev);
-   if (ret < 0)
-   return ret;
-   }
+   ret = tsl2x7x_als_calibrate(indio_dev);
+   if (ret < 0)
+   return ret;
 
ret = tsl2x7x_invoke_change(indio_dev);
if (ret < 0)
@@ -946,14 +945,12 @@ static ssize_t in_proximity0_calibrate_store(struct 
device *dev,
bool value;
int ret;
 
-   if (strtobool(buf, &value))
+   if (kstrtobool(buf, &value) || !value)
return -EINVAL;
 
-   if (value) {
-   ret = tsl2x7x_prox_cal(indio_dev);
-   if (ret < 0)
-   return ret;
-   }
+   ret = tsl2x7x_prox_cal(indio_dev);
+   if (ret < 0)
+   return ret;
 
ret = tsl2x7x_invoke_change(indio_dev);
if (ret < 0)
-- 
2.14.3



[PATCH 06/13] staging: iio: tsl2x7x: simplify tsl2x7x_write_interrupt_config return

2018-04-20 Thread Brian Masney
tsl2x7x_write_interrupt_config() has an unnecessary return value check
at the end of the function. This patch changes the function to just
return the value from the call to tsl2x7x_invoke_change().

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index 8d8af0cf9768..d202bc7e1f4f 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -982,18 +982,13 @@ static int tsl2x7x_write_interrupt_config(struct iio_dev 
*indio_dev,
  int val)
 {
struct tsl2X7X_chip *chip = iio_priv(indio_dev);
-   int ret;
 
if (chan->type == IIO_INTENSITY)
chip->settings.als_interrupt_en = val ? true : false;
else
chip->settings.prox_interrupt_en = val ? true : false;
 
-   ret = tsl2x7x_invoke_change(indio_dev);
-   if (ret < 0)
-   return ret;
-
-   return 0;
+   return tsl2x7x_invoke_change(indio_dev);
 }
 
 static int tsl2x7x_write_event_value(struct iio_dev *indio_dev,
-- 
2.14.3



[PATCH 07/13] staging: iio: tsl2x7x: simplify device id verification

2018-04-20 Thread Brian Masney
This patch renames tsl2x7x_device_id() to tsl2x7x_device_id_verif(),
removes the unnecessary pointer on the id parameter, and only calls
the verification function once.

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index d202bc7e1f4f..56730baea927 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -1277,22 +1277,22 @@ static DEVICE_ATTR_WO(in_proximity0_calibrate);
 static DEVICE_ATTR_RW(in_illuminance0_lux_table);
 
 /* Use the default register values to identify the Taos device */
-static int tsl2x7x_device_id(int *id, int target)
+static int tsl2x7x_device_id_verif(int id, int target)
 {
switch (target) {
case tsl2571:
case tsl2671:
case tsl2771:
-   return (*id & 0xf0) == TRITON_ID;
+   return (id & 0xf0) == TRITON_ID;
case tmd2671:
case tmd2771:
-   return (*id & 0xf0) == HALIBUT_ID;
+   return (id & 0xf0) == HALIBUT_ID;
case tsl2572:
case tsl2672:
case tmd2672:
case tsl2772:
case tmd2772:
-   return (*id & 0xf0) == SWORDFISH_ID;
+   return (id & 0xf0) == SWORDFISH_ID;
}
 
return -EINVAL;
@@ -1612,8 +1612,7 @@ static int tsl2x7x_probe(struct i2c_client *clientp,
if (ret < 0)
return ret;
 
-   if ((!tsl2x7x_device_id(&ret, id->driver_data)) ||
-   (tsl2x7x_device_id(&ret, id->driver_data) == -EINVAL)) {
+   if (tsl2x7x_device_id_verif(ret, id->driver_data) <= 0) {
dev_info(&chip->client->dev,
 "%s: i2c device found does not match expected id\n",
__func__);
-- 
2.14.3



[PATCH 09/13] staging: iio: tsl2x7x: move power and diode settings into header file

2018-04-20 Thread Brian Masney
The power and diode defines are needed for the platform data so this
patch moves the defines out of the .c file and into the header file. A
comment for the diode is also cleaned up while this code is touched.

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 12 
 drivers/staging/iio/light/tsl2x7x.h | 12 
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index 15bc0af1bb6c..87b99deef7a8 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -103,18 +103,6 @@
 #define TSL2X7X_CNTL_PROXPON_ENBL  0x0F
 #define TSL2X7X_CNTL_INTPROXPON_ENBL   0x2F
 
-/*Prox diode to use */
-#define TSL2X7X_DIODE0 0x01
-#define TSL2X7X_DIODE1 0x02
-#define TSL2X7X_DIODE_BOTH 0x03
-
-/* LED Power */
-#define TSL2X7X_100_mA 0x00
-#define TSL2X7X_50_mA  0x01
-#define TSL2X7X_25_mA  0x02
-#define TSL2X7X_13_mA  0x03
-#define TSL2X7X_MAX_TIMER_CNT  0xFF
-
 #define TSL2X7X_MIN_ITIME  3
 
 /* TAOS txx2x7x Device family members */
diff --git a/drivers/staging/iio/light/tsl2x7x.h 
b/drivers/staging/iio/light/tsl2x7x.h
index 992ee2465609..2c96f0b39b1e 100644
--- a/drivers/staging/iio/light/tsl2x7x.h
+++ b/drivers/staging/iio/light/tsl2x7x.h
@@ -23,6 +23,18 @@ struct tsl2x7x_lux {
 #define TSL2X7X_DEFAULT_TABLE_BYTES (sizeof(struct tsl2x7x_lux) * \
 TSL2X7X_DEF_LUX_TABLE_SZ)
 
+/* Proximity diode to use */
+#define TSL2X7X_DIODE0  0x01
+#define TSL2X7X_DIODE1  0x02
+#define TSL2X7X_DIODE_BOTH  0x03
+
+/* LED Power */
+#define TSL2X7X_100_mA  0x00
+#define TSL2X7X_50_mA   0x01
+#define TSL2X7X_25_mA   0x02
+#define TSL2X7X_13_mA   0x03
+#define TSL2X7X_MAX_TIMER_CNT   0xFF
+
 /**
  * struct tsl2x7x_default_settings - power on defaults unless
  *   overridden by platform data.
-- 
2.14.3



[PATCH 13/13] staging: iio: tsl2x7x: rename prox_config to als_prox_config

2018-04-20 Thread Brian Masney
The configuration register on the device is represented with the
prox_config member on the tsl2x7x_settings structure. According to the
TSL2772 data sheet, this register can hold: 1) the proximity drive
level, 2) ALS/Proximity long wait, and 3) the ALS gain level. This
patch renames prox_config to als_prox_config since ALS settings can
be stored here as well.

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 7 ---
 drivers/staging/iio/light/tsl2x7x.h | 5 +++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index 05c0f3d5fac0..708b2c6bdf4b 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -56,7 +56,7 @@
 #define TSL2X7X_PRX_MAXTHRESHLO0X0A
 #define TSL2X7X_PRX_MAXTHRESHHI0X0B
 #define TSL2X7X_PERSISTENCE0x0C
-#define TSL2X7X_PRX_CONFIG 0x0D
+#define TSL2X7X_ALS_PRX_CONFIG 0x0D
 #define TSL2X7X_PRX_COUNT  0x0E
 #define TSL2X7X_GAIN   0x0F
 #define TSL2X7X_NOTUSED0x10
@@ -207,7 +207,7 @@ static const struct tsl2x7x_settings 
tsl2x7x_default_settings = {
.prox_time = 255, /* 2.73 ms */
.prox_gain = 0,
.wait_time = 255,
-   .prox_config = 0,
+   .als_prox_config = 0,
.als_gain_trim = 1000,
.als_cal_target = 150,
.als_persistence = 1,
@@ -594,7 +594,8 @@ static int tsl2x7x_chip_on(struct iio_dev *indio_dev)
/* Non calculated parameters */
chip->tsl2x7x_config[TSL2X7X_PRX_TIME] = chip->settings.prox_time;
chip->tsl2x7x_config[TSL2X7X_WAIT_TIME] = chip->settings.wait_time;
-   chip->tsl2x7x_config[TSL2X7X_PRX_CONFIG] = chip->settings.prox_config;
+   chip->tsl2x7x_config[TSL2X7X_ALS_PRX_CONFIG] =
+   chip->settings.als_prox_config;
 
chip->tsl2x7x_config[TSL2X7X_ALS_MINTHRESHLO] =
(chip->settings.als_thresh_low) & 0xFF;
diff --git a/drivers/staging/iio/light/tsl2x7x.h 
b/drivers/staging/iio/light/tsl2x7x.h
index 85d8fe7a94c8..6e30e71a2127 100644
--- a/drivers/staging/iio/light/tsl2x7x.h
+++ b/drivers/staging/iio/light/tsl2x7x.h
@@ -48,7 +48,8 @@ struct tsl2x7x_lux {
  *  increments. Total integration time is
  *  (256 - prx_time) * 2.73.
  *  @prox_gain: Index into the tsl2x7x_prx_gain array.
- *  @prox_config:   Prox configuration filters.
+ *  @als_prox_config:   The value of the ALS / Proximity configuration
+ *  register.
  *  @als_cal_target:Known external ALS reading for calibration.
  *  @als_persistence:   H/W Filters, Number of 'out of limits' ALS 
readings.
  *  @als_interrupt_en:  Enable/Disable ALS interrupts
@@ -73,7 +74,7 @@ struct tsl2x7x_settings {
int wait_time;
int prox_time;
int prox_gain;
-   int prox_config;
+   int als_prox_config;
int als_cal_target;
u8 als_persistence;
bool als_interrupt_en;
-- 
2.14.3



[PATCH 01/13] staging: iio: tsl2x7x: move integration_time* attributes to IIO_INTENSITY channel

2018-04-20 Thread Brian Masney
The integration_time* attributes are currently associated with the
IIO_LIGHT channel but should be associated with the IIO_INTENSITY
channel. Directory listing of the sysfs attributes for a TSL2772
with this patch applied:

dev
events
in_illuminance0_calibrate
in_illuminance0_calibscale_available
in_illuminance0_input
in_illuminance0_lux_table
in_illuminance0_target_input
in_intensity0_calibbias
in_intensity0_calibscale
in_intensity0_integration_time
in_intensity0_integration_time_available
in_intensity0_raw
in_intensity1_raw
in_proximity0_calibrate
in_proximity0_calibscale
in_proximity0_calibscale_available
in_proximity0_raw
name
of_node
power
subsystem
uevent

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index 9991b0483956..eeccfbb0eb1f 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -827,7 +827,7 @@ in_illuminance0_calibscale_available_show(struct device 
*dev,
 
 static IIO_CONST_ATTR(in_proximity0_calibscale_available, "1 2 4 8");
 
-static IIO_CONST_ATTR(in_illuminance0_integration_time_available,
+static IIO_CONST_ATTR(in_intensity0_integration_time_available,
".00272 - .696");
 
 static ssize_t in_illuminance0_target_input_show(struct device *dev,
@@ -1358,7 +1358,7 @@ static irqreturn_t tsl2x7x_event_handler(int irq, void 
*private)
 
 static struct attribute *tsl2x7x_ALS_device_attrs[] = {
&dev_attr_in_illuminance0_calibscale_available.attr,
-   &iio_const_attr_in_illuminance0_integration_time_available
+   &iio_const_attr_in_intensity0_integration_time_available
.dev_attr.attr,
&dev_attr_in_illuminance0_target_input.attr,
&dev_attr_in_illuminance0_calibrate.attr,
@@ -1373,7 +1373,7 @@ static struct attribute *tsl2x7x_PRX_device_attrs[] = {
 
 static struct attribute *tsl2x7x_ALSPRX_device_attrs[] = {
&dev_attr_in_illuminance0_calibscale_available.attr,
-   &iio_const_attr_in_illuminance0_integration_time_available
+   &iio_const_attr_in_intensity0_integration_time_available
.dev_attr.attr,
&dev_attr_in_illuminance0_target_input.attr,
&dev_attr_in_illuminance0_calibrate.attr,
@@ -1389,7 +1389,7 @@ static struct attribute *tsl2x7x_PRX2_device_attrs[] = {
 
 static struct attribute *tsl2x7x_ALSPRX2_device_attrs[] = {
&dev_attr_in_illuminance0_calibscale_available.attr,
-   &iio_const_attr_in_illuminance0_integration_time_available
+   &iio_const_attr_in_intensity0_integration_time_available
.dev_attr.attr,
&dev_attr_in_illuminance0_target_input.attr,
&dev_attr_in_illuminance0_calibrate.attr,
@@ -1489,13 +1489,13 @@ static const struct tsl2x7x_chip_info 
tsl2x7x_chip_info_tbl[] = {
.type = IIO_LIGHT,
.indexed = 1,
.channel = 0,
-   .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED) |
- BIT(IIO_CHAN_INFO_INT_TIME),
+   .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
}, {
.type = IIO_INTENSITY,
.indexed = 1,
.channel = 0,
.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
+   BIT(IIO_CHAN_INFO_INT_TIME) |
BIT(IIO_CHAN_INFO_CALIBSCALE) |
BIT(IIO_CHAN_INFO_CALIBBIAS),
.event_spec = tsl2x7x_events,
@@ -1529,13 +1529,13 @@ static const struct tsl2x7x_chip_info 
tsl2x7x_chip_info_tbl[] = {
.type = IIO_LIGHT,
.indexed = 1,
.channel = 0,
-   .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED) |
- BIT(IIO_CHAN_INFO_INT_TIME),
+   .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
}, {
.type = IIO_INTENSITY,
.indexed = 1,
.channel = 0,
.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
+   BIT(IIO_CHAN_INFO_INT_TIME) |
BIT(IIO_CHAN_INFO_CALIBSCALE) |
BIT(IIO_CHAN_INFO_CALIBBIAS),
.event_spec = tsl2x7x_events,
@@ -1578,13 +1578,13 @@ static const struct tsl2x7x_chip_info 
tsl2x7x_chip_info_tbl[] = {
.type = IIO_LIGHT,
.indexed = 1,
.channel = 0,
-   .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED) |
- BIT(IIO_CHAN_INFO_IN

[PATCH 00/13] iio: tsl2x7x: staging cleanups

2018-04-20 Thread Brian Masney
Here is another round of staging cleanups for this driver mostly based
on Jonathon's feedback. We're very close to a staging graduation and I
only have a few items remaining on my todo list:
 
- Remove wildcards from the driver name. Jonathan suggested tsl2571.
- Don't make the events/ directory available to user space via sysfs if
  an interrupt pin is not configured.
- Go through the newer ALS part numbers on AMS's site and see if there
  are other part numbers that can be added to this driver.
- I found this week an issue with the integration_time sysfs attribute.

Hopefully I'll have time to wrap this up next week.

Brian Masney (13):
  staging: iio: tsl2x7x: move integration_time* attributes to
IIO_INTENSITY channel
  staging: iio: tsl2x7x: use GPL-2.0+ SPDX license identifier
  staging: iio: tsl2x7x: don't return error in IRQ handler
  staging: iio: tsl2x7x: simplify tsl2x7x_clear_interrupts function
  staging: iio: tsl2x7x: remove unnecessary chip status checks in
suspend/resume
  staging: iio: tsl2x7x: simplify tsl2x7x_write_interrupt_config return
  staging: iio: tsl2x7x: simplify device id verification
  staging: iio: tsl2x7x: add range checking to three sysfs attributes
  staging: iio: tsl2x7x: move power and diode settings into header file
  staging: iio: tsl2x7x: rename prx to prox for consistency
  staging: iio: tsl2x7x: use device defaults for als_time, prox_time and
wait_time
  staging: iio: tsl2x7x: various comment cleanups
  staging: iio: tsl2x7x: rename prox_config to als_prox_config

 drivers/staging/iio/light/tsl2x7x.c | 217 ++--
 drivers/staging/iio/light/tsl2x7x.h |  81 +++---
 2 files changed, 125 insertions(+), 173 deletions(-)

-- 
2.14.3



[PATCH 12/13] staging: iio: tsl2x7x: various comment cleanups

2018-04-20 Thread Brian Masney
This patch removes several unnecessary comments, changes some comments
so that the use as much of the allowable 80 characters as possible, adds
the proper whitespace, removes some structure members from the kernel
docs that are no longer present, and improves the existing kernel doc
information for some existing structure members.

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 59 +
 drivers/staging/iio/light/tsl2x7x.h | 48 +++---
 2 files changed, 51 insertions(+), 56 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index 293810ff11b9..05c0f3d5fac0 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -1,6 +1,6 @@
 /*
- * Device driver for monitoring ambient light intensity in (lux)
- * and proximity detection (prox) within the TAOS TSL2X7X family of devices.
+ * Device driver for monitoring ambient light intensity in (lux) and proximity
+ * detection (prox) within the TAOS TSL2X7X family of devices.
  *
  * Copyright (c) 2012, TAOS Corporation.
  * Copyright (c) 2017-2018 Brian Masney 
@@ -21,7 +21,7 @@
 #include 
 #include "tsl2x7x.h"
 
-/* Cal defs*/
+/* Cal defs */
 #define PROX_STAT_CAL  0
 #define PROX_STAT_SAMP 1
 #define MAX_SAMPLES_CAL200
@@ -34,10 +34,11 @@
 /* Lux calculation constants */
 #define TSL2X7X_LUX_CALC_OVER_FLOW 65535
 
-/* TAOS Register definitions - note:
- * depending on device, some of these register are not used and the
- * register address is benign.
+/*
+ * TAOS Register definitions - Note: depending on device, some of these 
register
+ * are not used and the register address is benign.
  */
+
 /* 2X7X register offsets */
 #define TSL2X7X_MAX_CONFIG_REG 16
 
@@ -342,15 +343,14 @@ static int tsl2x7x_read_autoinc_regs(struct tsl2X7X_chip 
*chip, int lower_reg,
  * @indio_dev: pointer to IIO device
  *
  * The raw ch0 and ch1 values of the ambient light sensed in the last
- * integration cycle are read from the device.
- * Time scale factor array values are adjusted based on the integration time.
- * The raw values are multiplied by a scale factor, and device gain is obtained
- * using gain index. Limit checks are done next, then the ratio of a multiple
- * of ch1 value, to the ch0 value, is calculated. Array tsl2x7x_device_lux[]
- * is then scanned to find the first ratio value that is just above the ratio
- * we just calculated. The ch0 and ch1 multiplier constants in the array are
- * then used along with the time scale factor array values, to calculate the
- * lux.
+ * integration cycle are read from the device. Time scale factor array values
+ * are adjusted based on the integration time. The raw values are multiplied
+ * by a scale factor, and device gain is obtained using gain index. Limit
+ * checks are done next, then the ratio of a multiple of ch1 value, to the
+ * ch0 value, is calculated. Array tsl2x7x_device_lux[] is then scanned to
+ * find the first ratio value that is just above the ratio we just calculated.
+ * The ch0 and ch1 multiplier constants in the array are then used along with
+ * the time scale factor array values, to calculate the lux.
  */
 static int tsl2x7x_get_lux(struct iio_dev *indio_dev)
 {
@@ -363,7 +363,6 @@ static int tsl2x7x_get_lux(struct iio_dev *indio_dev)
mutex_lock(&chip->als_mutex);
 
if (chip->tsl2x7x_chip_status != TSL2X7X_CHIP_WORKING) {
-   /* device is not enabled */
dev_err(&chip->client->dev, "%s: device is not enabled\n",
__func__);
ret = -EBUSY;
@@ -374,7 +373,6 @@ static int tsl2x7x_get_lux(struct iio_dev *indio_dev)
if (ret < 0)
goto out_unlock;
 
-   /* is data new & valid */
if (!(ret & TSL2X7X_STA_ADC_VALID)) {
dev_err(&chip->client->dev,
"%s: data not valid yet\n", __func__);
@@ -430,12 +428,12 @@ static int tsl2x7x_get_lux(struct iio_dev *indio_dev)
lux = (lux + (chip->als_time_scale >> 1)) /
chip->als_time_scale;
 
-   /* adjust for active gain scale
-* The tsl2x7x_device_lux tables have a factor of 256 built-in.
-* User-specified gain provides a multiplier.
+   /*
+* adjust for active gain scale. The tsl2x7x_device_lux tables have a
+* factor of 256 built-in. User-specified gain provides a multiplier.
 * Apply user-specified gain before shifting right to retain precision.
-* Use 64 bits to avoid overflow on multiplication.
-* Then go back to 32 bits before division to avoid using div_u64().
+* Use 64 bits to avoid overflow on multiplication. Then go back to
+* 32 bits before division to avoid using div_u64().
 */
 
lux64 = lux;
@@ -713,14 +711,13 @@ static int tsl2x7x_chip_off(struct ii

[PATCH 11/13] staging: iio: tsl2x7x: use device defaults for als_time, prox_time and wait_time

2018-04-20 Thread Brian Masney
This patch changes the defaults of the als_time, prox_time and
wait_time to match the defaults according to the TSL2772 datasheet.

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index a7b4fcba7935..293810ff11b9 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -201,11 +201,11 @@ static const struct tsl2x7x_lux 
*tsl2x7x_default_lux_table_group[] = {
 };
 
 static const struct tsl2x7x_settings tsl2x7x_default_settings = {
-   .als_time = 219, /* 101 ms */
+   .als_time = 255, /* 2.73 ms */
.als_gain = 0,
-   .prox_time = 254, /* 5.4 ms */
+   .prox_time = 255, /* 2.73 ms */
.prox_gain = 0,
-   .wait_time = 245,
+   .wait_time = 255,
.prox_config = 0,
.als_gain_trim = 1000,
.als_cal_target = 150,
-- 
2.14.3



[PATCH 10/13] staging: iio: tsl2x7x: rename prx to prox for consistency

2018-04-20 Thread Brian Masney
The driver mostly uses the 'prox' naming convention for most of the
proximity settings, however prx_time and tsl2x7x_prx_gain was present.
This patch renames these to prox_time and tsl2x7x_prox_gain for
consistency with everything else in the driver.

The kernel documentation for prx_gain is corrected to prox_gain so that
it matches what is actually in the structure.

Signed-off-by: Brian Masney 
---
 drivers/staging/iio/light/tsl2x7x.c | 12 ++--
 drivers/staging/iio/light/tsl2x7x.h |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/iio/light/tsl2x7x.c 
b/drivers/staging/iio/light/tsl2x7x.c
index 87b99deef7a8..a7b4fcba7935 100644
--- a/drivers/staging/iio/light/tsl2x7x.c
+++ b/drivers/staging/iio/light/tsl2x7x.c
@@ -203,7 +203,7 @@ static const struct tsl2x7x_lux 
*tsl2x7x_default_lux_table_group[] = {
 static const struct tsl2x7x_settings tsl2x7x_default_settings = {
.als_time = 219, /* 101 ms */
.als_gain = 0,
-   .prx_time = 254, /* 5.4 ms */
+   .prox_time = 254, /* 5.4 ms */
.prox_gain = 0,
.wait_time = 245,
.prox_config = 0,
@@ -230,7 +230,7 @@ static const s16 tsl2x7x_als_gain[] = {
120
 };
 
-static const s16 tsl2x7x_prx_gain[] = {
+static const s16 tsl2x7x_prox_gain[] = {
1,
2,
4,
@@ -594,7 +594,7 @@ static int tsl2x7x_chip_on(struct iio_dev *indio_dev)
u8 *dev_reg, reg_val;
 
/* Non calculated parameters */
-   chip->tsl2x7x_config[TSL2X7X_PRX_TIME] = chip->settings.prx_time;
+   chip->tsl2x7x_config[TSL2X7X_PRX_TIME] = chip->settings.prox_time;
chip->tsl2x7x_config[TSL2X7X_WAIT_TIME] = chip->settings.wait_time;
chip->tsl2x7x_config[TSL2X7X_PRX_CONFIG] = chip->settings.prox_config;
 
@@ -1021,7 +1021,7 @@ static int tsl2x7x_write_event_value(struct iio_dev 
*indio_dev,
if (chan->type == IIO_INTENSITY)
time = chip->settings.als_time;
else
-   time = chip->settings.prx_time;
+   time = chip->settings.prox_time;
 
y = (TSL2X7X_MAX_TIMER_CNT - time) + 1;
z = y * TSL2X7X_MIN_ITIME;
@@ -1090,7 +1090,7 @@ static int tsl2x7x_read_event_value(struct iio_dev 
*indio_dev,
time = chip->settings.als_time;
mult = chip->settings.als_persistence;
} else {
-   time = chip->settings.prx_time;
+   time = chip->settings.prox_time;
mult = chip->settings.prox_persistence;
}
 
@@ -1153,7 +1153,7 @@ static int tsl2x7x_read_raw(struct iio_dev *indio_dev,
if (chan->type == IIO_LIGHT)
*val = tsl2x7x_als_gain[chip->settings.als_gain];
else
-   *val = tsl2x7x_prx_gain[chip->settings.prox_gain];
+   *val = tsl2x7x_prox_gain[chip->settings.prox_gain];
ret = IIO_VAL_INT;
break;
case IIO_CHAN_INFO_CALIBBIAS:
diff --git a/drivers/staging/iio/light/tsl2x7x.h 
b/drivers/staging/iio/light/tsl2x7x.h
index 2c96f0b39b1e..408e5a89edb1 100644
--- a/drivers/staging/iio/light/tsl2x7x.h
+++ b/drivers/staging/iio/light/tsl2x7x.h
@@ -44,9 +44,9 @@ struct tsl2x7x_lux {
  *  aperture effects.
  *  @wait_time: Time between PRX and ALS cycles
  *  in 2.7 periods
- *  @prx_time:  5.2ms prox integration time -
+ *  @prox_time: 5.2ms prox integration time -
  *  decrease in 2.7ms periods
- *  @prx_gain:  Proximity gain index
+ *  @prox_gain: Proximity gain index
  *  @prox_config:   Prox configuration filters.
  *  @als_cal_target:Known external ALS reading for
  *  calibration.
@@ -68,7 +68,7 @@ struct tsl2x7x_settings {
int als_gain;
int als_gain_trim;
int wait_time;
-   int prx_time;
+   int prox_time;
int prox_gain;
int prox_config;
int als_cal_target;
-- 
2.14.3



Re: [PATCH v2] KVM: Extend MAX_IRQ_ROUTES to 4096 for all archs

2018-04-20 Thread Wanpeng Li
2018-04-20 22:21 GMT+08:00 Cornelia Huck :
> On Fri, 20 Apr 2018 21:51:13 +0800
> Wanpeng Li  wrote:
>
>> 2018-04-20 15:15 GMT+08:00 Cornelia Huck :
>> > On Thu, 19 Apr 2018 17:47:28 -0700
>> > Wanpeng Li  wrote:
>> >
>> >> From: Wanpeng Li 
>> >>
>> >> Our virtual machines make use of device assignment by configuring
>> >> 12 NVMe disks for high I/O performance. Each NVMe device has 129
>> >> MSI-X Table entries:
>> >> Capabilities: [50] MSI-X: Enable+ Count=129 Masked-Vector table: BAR=0 
>> >> offset=2000
>> >> The windows virtual machines fail to boot since they will map the number 
>> >> of
>> >> MSI-table entries that the NVMe hardware reported to the bus to msi 
>> >> routing
>> >> table, this will exceed the 1024. This patch extends MAX_IRQ_ROUTES to 
>> >> 4096
>> >> for all archs, in the future this might be extended again if needed.
>> >>
>> >> Cc: Paolo Bonzini 
>> >> Cc: Radim Krčmář 
>> >> Cc: Tonny Lu 
>> >> Cc: Cornelia Huck 
>> >> Signed-off-by: Wanpeng Li 
>> >> Signed-off-by: Tonny Lu 
>> >> ---
>> >> v1 -> v2:
>> >>  * extend MAX_IRQ_ROUTES to 4096 for all archs
>> >>
>> >>  include/linux/kvm_host.h | 6 --
>> >>  1 file changed, 6 deletions(-)
>> >>
>> >> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> >> index 6930c63..0a5c299 100644
>> >> --- a/include/linux/kvm_host.h
>> >> +++ b/include/linux/kvm_host.h
>> >> @@ -1045,13 +1045,7 @@ static inline int mmu_notifier_retry(struct kvm 
>> >> *kvm, unsigned long mmu_seq)
>> >>
>> >>  #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
>> >>
>> >> -#ifdef CONFIG_S390
>> >>  #define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that...
>> >
>> > What about /* might need extension/rework in the future */ instead of
>> > the FIXME?
>>
>> Yeah, I guess the maintainers can help to fix it when applying. :)
>>
>> >
>> > As far as I understand, 4096 should cover most architectures and the
>> > sane end of s390 configurations, but will not be enough at the scarier
>> > end of s390. (I'm not sure how much it matters in practice.)
>> >
>> > Do we want to make this a tuneable in the future? Do some kind of
>> > dynamic allocation? Not sure whether it is worth the trouble.
>>
>> I think keep as it is currently.
>
> My main question here is how long this is enough... the number of
> virtqueues per device is up to 1K from the initial 64, which makes it
> possible to hit the 4K limit with fewer virtio devices than before (on
> s390, each virtqueue uses a routing table entry). OTOH, we don't want
> giant tables everywhere just to accommodate s390.

I suspect there is no real scenario to futher extend for s390 since no
guys report.

> If the s390 maintainers tell me that nobody is doing the really insane
> stuff, I'm happy as well :)

Christian, any thoughts?

Regards,
Wanpeng Li


Re: [PATCH] KVM: s390: reset crypto attributes for all vcpus

2018-04-20 Thread kbuild test robot
Hi Tony,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on s390/features]
[also build test ERROR on v4.17-rc1 next-20180420]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Tony-Krowiak/KVM-s390-reset-crypto-attributes-for-all-vcpus/20180421-050734
base:   https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git features
config: s390-allyesconfig (attached as .config)
compiler: s390x-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=s390 

All error/warnings (new ones prefixed by >>):

   arch/s390/kvm/kvm-s390.c: In function 'kvm_s390_vcpu_crypto_reset_all':
>> arch/s390/kvm/kvm-s390.c:800:10: error: implicit declaration of function 
>> 'kvm_s390_vcpu_crypto_setup'; did you mean 'kvm_s390_vcpu_crypto_reset_all'? 
>> [-Werror=implicit-function-declaration]
 kvm_s390_vcpu_crypto_setup(vcpu);
 ^~
 kvm_s390_vcpu_crypto_reset_all
   arch/s390/kvm/kvm-s390.c: At top level:
>> arch/s390/kvm/kvm-s390.c:805:13: warning: conflicting types for 
>> 'kvm_s390_vcpu_crypto_setup'
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
^~
>> arch/s390/kvm/kvm-s390.c:805:13: error: static declaration of 
>> 'kvm_s390_vcpu_crypto_setup' follows non-static declaration
   arch/s390/kvm/kvm-s390.c:800:10: note: previous implicit declaration of 
'kvm_s390_vcpu_crypto_setup' was here
 kvm_s390_vcpu_crypto_setup(vcpu);
 ^~
   arch/s390/kvm/kvm-s390.c: In function 'kvm_s390_vm_set_crypto':
   arch/s390/kvm/kvm-s390.c:810:6: warning: unused variable 'i' 
[-Wunused-variable]
 int i;
 ^
   arch/s390/kvm/kvm-s390.c:809:19: warning: unused variable 'vcpu' 
[-Wunused-variable]
 struct kvm_vcpu *vcpu;
  ^~~~
   cc1: some warnings being treated as errors

vim +800 arch/s390/kvm/kvm-s390.c

   791  
   792  void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
   793   {
   794  int i;
   795  struct kvm_vcpu *vcpu;
   796  
   797  kvm_s390_vcpu_block_all(kvm);
   798  
   799  kvm_for_each_vcpu(i, vcpu, kvm)
 > 800  kvm_s390_vcpu_crypto_setup(vcpu);
   801  
   802  kvm_s390_vcpu_unblock_all(kvm);
   803  }
   804  
 > 805  static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
   806  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH 1/3] fs: move documentation for thaw_super() where appropriate

2018-04-20 Thread Randy Dunlap
On 04/20/18 17:07, Luis R. Rodriguez wrote:
> On Sat, Apr 21, 2018 at 12:01:31AM +, Bart Van Assche wrote:
>> On Fri, 2018-04-20 at 16:59 -0700, Luis R. Rodriguez wrote:
>>> +/**
>>> + * thaw_super -- unlock filesystem
>>> + * @sb: the super to thaw
>>> + *
>>> + * Unlocks the filesystem and marks it writeable again after 
>>> freeze_super().
>>> + */
>>
>> Have you verified the output generated by scripts/kernel-doc? Last
>> time I checked the output for headers containing a double dash looked
>> weird.
> 
> No, I just moved the block of kdoc crap. Randy would have this memorized I'm
> sure though so I should just fix the kdoc if its bad while at it.
> 
>   Luis
> 

"--" does sound odd.  I've never seen it used on purpose.
FWIW, I just go by what is in Documentation/doc-guide/kernel-doc.rst:

Function documentation
--

The general format of a function and function-like macro kernel-doc comment is::

  /**
   * function_name() - Brief description of function.
   * @arg1: Describe the first argument.
   * @arg2: Describe the second argument.
   *One can provide multiple line descriptions
   *for arguments.



-- 
~Randy


Re: [PATCH 1/3] fs: move documentation for thaw_super() where appropriate

2018-04-20 Thread Luis R. Rodriguez
On Sat, Apr 21, 2018 at 12:01:31AM +, Bart Van Assche wrote:
> On Fri, 2018-04-20 at 16:59 -0700, Luis R. Rodriguez wrote:
> > +/**
> > + * thaw_super -- unlock filesystem
> > + * @sb: the super to thaw
> > + *
> > + * Unlocks the filesystem and marks it writeable again after 
> > freeze_super().
> > + */
> 
> Have you verified the output generated by scripts/kernel-doc? Last
> time I checked the output for headers containing a double dash looked
> weird.

No, I just moved the block of kdoc crap. Randy would have this memorized I'm
sure though so I should just fix the kdoc if its bad while at it.

  Luis


Re: [PATCH 0/3] fs: minor fs thaw fixes and adjustments

2018-04-20 Thread Luis R. Rodriguez
On Fri, Apr 20, 2018 at 04:59:01PM -0700, Luis R. Rodriguez wrote:
> Here's a few minor fs thaw fixes and adjustments I ran accross
> as I started to refresh my development for to use freeze_fs on
> suspend/hibernation [0]. These are just prep commits for the real
> work, I'm still reviewing feedback and adjusting the code for
> that.

And I forgot to provide the URL, for those that missed the old series
I was working on:

[0] https://lkml.kernel.org/r/20171129232356.28296-1-mcg...@kernel.org

  Luis
> 
> I've tested the patches with generic/085 on xfs and found no regressions.
> 
> Luis R. Rodriguez (3):
>   fs: move documentation for thaw_super() where appropriate
>   fs: make thaw_super_locked() really just a helper
>   fs: fix corner case race on freeze_bdev() when sb disappears
> 
>  fs/block_dev.c |  4 +++-
>  fs/super.c | 39 ++-
>  2 files changed, 29 insertions(+), 14 deletions(-)
> 
> -- 
> 2.16.3
> 
> 

-- 
Do not panic


Re: [PATCH 1/3] fs: move documentation for thaw_super() where appropriate

2018-04-20 Thread Bart Van Assche
On Fri, 2018-04-20 at 16:59 -0700, Luis R. Rodriguez wrote:
> +/**
> + * thaw_super -- unlock filesystem
> + * @sb: the super to thaw
> + *
> + * Unlocks the filesystem and marks it writeable again after freeze_super().
> + */

Have you verified the output generated by scripts/kernel-doc? Last
time I checked the output for headers containing a double dash looked
weird.

Bart.





[PATCH 3/3] fs: fix corner case race on freeze_bdev() when sb disappears

2018-04-20 Thread Luis R. Rodriguez
freeze_bdev() will bail but leave the bd_fsfreeze_count incremented
if the get_active_super() does not find the superblock on our
super_blocks list to match.

This issue has been present since v2.6.29 during the introduction of the
ioctl_fsfreeze() and ioctl_fsthaw() via commit fcccf502540e3 ("filesystem
freeze: implement generic freeze feature").

I am not aware of any existing races which have triggered this
situation, however, if it does trigger it could mean leaving a
superblock with bd_fsfreeze_count always positive.

Fixes: fcccf502540e3 ("filesystem freeze: implement generic freeze feature")
Signed-off-by: Luis R. Rodriguez 
---
 fs/block_dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index b54966679833..7a532aa58c07 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -507,8 +507,10 @@ struct super_block *freeze_bdev(struct block_device *bdev)
}
 
sb = get_active_super(bdev);
-   if (!sb)
+   if (!sb) {
+   bdev->bd_fsfreeze_count--;
goto out;
+   }
if (sb->s_op->freeze_super)
error = sb->s_op->freeze_super(sb);
else
-- 
2.16.3



[PATCH 1/3] fs: move documentation for thaw_super() where appropriate

2018-04-20 Thread Luis R. Rodriguez
On commit 08fdc8a0138a ("buffer.c: call thaw_super during emergency thaw")
Mateusz added thaw_super_locked() and made thaw_super() use it, but
forgot to move the documentation.

Signed-off-by: Luis R. Rodriguez 
---
 fs/super.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 5fa9a8d8d865..9d0eb5e20a1f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1532,12 +1532,6 @@ int freeze_super(struct super_block *sb)
 }
 EXPORT_SYMBOL(freeze_super);
 
-/**
- * thaw_super -- unlock filesystem
- * @sb: the super to thaw
- *
- * Unlocks the filesystem and marks it writeable again after freeze_super().
- */
 static int thaw_super_locked(struct super_block *sb)
 {
int error;
@@ -1573,6 +1567,12 @@ static int thaw_super_locked(struct super_block *sb)
return 0;
 }
 
+/**
+ * thaw_super -- unlock filesystem
+ * @sb: the super to thaw
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_super().
+ */
 int thaw_super(struct super_block *sb)
 {
down_write(&sb->s_umount);
-- 
2.16.3



[PATCH 2/3] fs: make thaw_super_locked() really just a helper

2018-04-20 Thread Luis R. Rodriguez
thaw_super_locked() was added via commit 08fdc8a0138a ("buffer.c: call
thaw_super during emergency thaw") merged on v4.17 to help with the
ability so that the caller can take charge of handling the s_umount lock,
however, it has left all* of the failure handling including unlocking
lock of s_umount inside thaw_super_locked().

This does not make thaw_super_locked() flexible. For instance we may
later want to use it with the abilty to handle unfolding of the locks
ourselves.

Change thaw_super_locked() to really just be a helper, and let the
callers deal with all the error handling.

This commit introeuces no functional changes.

Signed-off-by: Luis R. Rodriguez 
---
 fs/super.c | 27 ---
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 9d0eb5e20a1f..82bc74a16f06 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -937,10 +937,15 @@ void emergency_remount(void)
 
 static void do_thaw_all_callback(struct super_block *sb)
 {
+   int error;
+
down_write(&sb->s_umount);
if (sb->s_root && sb->s_flags & MS_BORN) {
emergency_thaw_bdev(sb);
-   thaw_super_locked(sb);
+   error = thaw_super_locked(sb);
+   if (error)
+   up_write(&sb->s_umount);
+   deactivate_locked_super(sb);
} else {
up_write(&sb->s_umount);
}
@@ -1532,14 +1537,13 @@ int freeze_super(struct super_block *sb)
 }
 EXPORT_SYMBOL(freeze_super);
 
+/* Caller takes the sb->s_umount rw_semaphore lock and handles active count */
 static int thaw_super_locked(struct super_block *sb)
 {
int error;
 
-   if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
-   up_write(&sb->s_umount);
+   if (sb->s_writers.frozen != SB_FREEZE_COMPLETE)
return -EINVAL;
-   }
 
if (sb_rdonly(sb)) {
sb->s_writers.frozen = SB_UNFROZEN;
@@ -1554,7 +1558,6 @@ static int thaw_super_locked(struct super_block *sb)
printk(KERN_ERR
"VFS:Filesystem thaw failed\n");
lockdep_sb_freeze_release(sb);
-   up_write(&sb->s_umount);
return error;
}
}
@@ -1563,7 +1566,6 @@ static int thaw_super_locked(struct super_block *sb)
sb_freeze_unlock(sb);
 out:
wake_up(&sb->s_writers.wait_unfrozen);
-   deactivate_locked_super(sb);
return 0;
 }
 
@@ -1575,7 +1577,18 @@ static int thaw_super_locked(struct super_block *sb)
  */
 int thaw_super(struct super_block *sb)
 {
+   int error;
+
down_write(&sb->s_umount);
-   return thaw_super_locked(sb);
+   error = thaw_super_locked(sb);
+   if (error) {
+   up_write(&sb->s_umount);
+   goto out;
+   }
+
+   deactivate_locked_super(sb);
+
+out:
+   return error;
 }
 EXPORT_SYMBOL(thaw_super);
-- 
2.16.3



Re: unregister_netdevice: waiting for DEV to become free

2018-04-20 Thread syzbot
syzbot has found reproducer for the following crash on  
https://github.com/google/kmsan.git/master commit

48c6a2b0ab1b752451cdc40b5392471ed1a2a329 (Mon Apr 16 08:42:26 2018 +)
mm/kmsan: fix origin calculation in kmsan_internal_check_memory
syzbot dashboard link:  
https://syzkaller.appspot.com/bug?extid=2dfb68e639f0621b19fb


So far this crash happened 180 times on  
https://github.com/google/kmsan.git/master, net-next, upstream.

C reproducer: https://syzkaller.appspot.com/x/repro.c?id=4936564132020224
syzkaller reproducer:  
https://syzkaller.appspot.com/x/repro.syz?id=5817131010621440
Raw console output:  
https://syzkaller.appspot.com/x/log.txt?id=6313498770407424
Kernel config:  
https://syzkaller.appspot.com/x/.config?id=6627248707860932248

compiler: clang version 7.0.0 (trunk 329391)

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+2dfb68e639f0621b1...@syzkaller.appspotmail.com
It will help syzbot understand when the bug is fixed.

IPv6: ADDRCONF(NETDEV_CHANGE): veth0: link becomes ready
device bridge_slave_1 left promiscuous mode
bridge0: port 2(bridge_slave_1) entered disabled state
device bridge_slave_0 left promiscuous mode
bridge0: port 1(bridge_slave_0) entered disabled state
unregister_netdevice: waiting for lo to become free. Usage count = 3



[PATCH 0/3] fs: minor fs thaw fixes and adjustments

2018-04-20 Thread Luis R. Rodriguez
Here's a few minor fs thaw fixes and adjustments I ran accross
as I started to refresh my development for to use freeze_fs on
suspend/hibernation [0]. These are just prep commits for the real
work, I'm still reviewing feedback and adjusting the code for
that.

I've tested the patches with generic/085 on xfs and found no regressions.

Luis R. Rodriguez (3):
  fs: move documentation for thaw_super() where appropriate
  fs: make thaw_super_locked() really just a helper
  fs: fix corner case race on freeze_bdev() when sb disappears

 fs/block_dev.c |  4 +++-
 fs/super.c | 39 ++-
 2 files changed, 29 insertions(+), 14 deletions(-)

-- 
2.16.3



Re: [RFC PATCH 00/79] Generic page write protection and a solution to page waitqueue

2018-04-20 Thread Tim Chen
On 04/20/2018 03:19 PM, Jerome Glisse wrote:
> On Fri, Apr 20, 2018 at 12:57:41PM -0700, Tim Chen wrote:
>> On 04/04/2018 12:17 PM, jgli...@redhat.com wrote:
>>
>>
>> Your approach seems useful if there are lots of locked pages sharing
>> the same wait queue.  
>>
>> That said, in the original workload from our customer with the long wait 
>> queue
>> problem, there was a single super hot page getting migrated, and it
>> is being accessed by all threads which caused the big log jam while they 
>> wait for
>> the migration to get completed.  
>> With your approach, we will still likely end up with a long queue 
>> in that workload even if we have per page wait queue.
>>
>> Thanks.
> 
> Ok so i re-read the thread, i was writting this cover letter from memory
> and i had bad recollection of your issue, so sorry.
> 
> First, do you have a way to reproduce the issue ? Something easy would
> be nice :)

Unfortunately it is a customer workload that they guard closely and wouldn't 
let us
look at the source code.  We have to profile and backtrace its behavior.

Mel made a quick attempt to reproduce the behavior with a hot page migration, 
but he wasn't quite able to duplicate the pathologic behavior.

> 
> So what i am proposing for per page wait queue would only marginaly help
> you (it might not even be mesurable in your workload). It would certainly
> make the code smaller and easier to understand i believe.

In certain cases if we have lots of pages sharing a page wait queue,
your solution would help, and we wouldn't be wasting time checking
waiters not waiting on the page that's being unlocked.  Though I
don't have a specific workload that has such behavior.

> 
> Now that i have look back at your issue i think there is 2 things we
> should do. First keep migration page map read only, this would at least
> avoid CPU read fault. In trace you captured i wasn't able to ascertain
> if this were read or write fault.
> 
> Second idea i have is about NUMA, everytime we NUMA migrate a page we
> could attach a temporary struct to the page (using page->mapping). So
> if we scan that page again we can inspect information about previous
> migration and see if we are not over migrating that page (ie bouncing
> it all over). If so we can mark the page (maybe with a page flag if we
> can find one) to protect it from further migration. That temporary
> struct would be remove after a while, ie autonuma would preallocate a
> bunch of those and keep an LRU of them and recycle the oldest when it
> needs a new one to migrate another page.

The goal to migrate a hot page with care, or avoid bouncing it around 
frequently makes sense.  If it is a hot page shared by many threads
running on different NUMA nodes, and moving it will only mildly improve NUMA
locality, we should avoid the migration.

Tim

> 
> 
> LSF/MM slots:
> 
> Michal can i get 2 slots to talk about this ? MM only discussion, one
> to talk about doing migration with page map read only but write
> protected while migration is happening. The other one to talk about
> attaching auto NUMA tracking struct to page.
> 
> Cheers,
> Jérôme
> 



Re: [PATCH 2/2] drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+

2018-04-20 Thread Eric Anholt
Daniel Vetter  writes:

> On Thu, Apr 19, 2018 at 12:20:35PM -0700, Eric Anholt wrote:
>> This driver will be used to support Mesa on the Broadcom 7268 and 7278
>> platforms.
>> 
>> V3D 3.3 introduces an MMU, which means we no longer need CMA or vc4's
>> complicated CL/shader validation scheme.  This massively changes the
>> GEM behavior, so I've forked off to a new driver.
>> 
>> Signed-off-by: Eric Anholt 
>
> Read through the entire thing, ignored the hw details, but dropped a few
> comments all over. With those addressed one way or another this has my
>
> Acked-by: Daniel Vetter 
>
> Can I call in a return favour for
> https://patchwork.freedesktop.org/series/41219/ ?

Done.

>> diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst
>> index d3ab6abae838..f982558fc25d 100644
>> --- a/Documentation/gpu/drivers.rst
>> +++ b/Documentation/gpu/drivers.rst
>> @@ -10,6 +10,7 @@ GPU Driver Documentation
>> tegra
>> tinydrm
>> tve200
>> +   v3d
>> vc4
>> bridge/dw-hdmi
>> xen-front
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index bca3c32fb141..7314d66833fd 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -4795,6 +4795,15 @@ S:Maintained
>>  F:  drivers/gpu/drm/omapdrm/
>>  F:  Documentation/devicetree/bindings/display/ti/
>>  
>> +DRM DRIVERS FOR V3D
>> +M:  Eric Anholt 
>> +T:  git git://github.com/anholt/linux
>
> This one also official? If it's just for now I'd drop it ...

Sure.

>> +/* Pins the shmem pages, fills in the .pages and .sgt fields of the BO, and 
>> maps
>> + * it for DMA.
>> + */
>> +static int
>> +v3d_bo_get_pages(struct v3d_bo *bo)
>> +{
>> +struct drm_gem_object *obj = &bo->base;
>> +struct drm_device *dev = obj->dev;
>> +int npages = obj->size >> PAGE_SHIFT;
>> +int ret = 0;
>> +
>> +mutex_lock(&bo->lock);
>> +if (bo->pages_refcount++ != 0)
>> +goto unlock;
>> +
>> +if (!obj->import_attach) {
>> +bo->pages = drm_gem_get_pages(obj);
>> +if (IS_ERR(bo->pages)) {
>> +ret = PTR_ERR(bo->pages);
>> +goto unlock;
>> +}
>> +
>> +bo->sgt = drm_prime_pages_to_sg(bo->pages, npages);
>> +if (IS_ERR(bo->sgt)) {
>> +ret = PTR_ERR(bo->sgt);
>> +goto put_pages;
>> +}
>> +} else {
>> +bo->pages = kcalloc(npages, sizeof(*bo->pages), GFP_KERNEL);
>> +if (!bo->pages)
>> +goto put_pages;
>> +
>> +drm_prime_sg_to_page_addr_arrays(bo->sgt, bo->pages,
>> + NULL, npages);
>> +}
>> +
>> +/* Map the pages for use by the GPU. */
>> +dma_map_sg(dev->dev, bo->sgt->sgl,
>> +   bo->sgt->nents, DMA_BIDIRECTIONAL);
>
> For dma-buf you already get a mapped sgt, and the idea at least is to not
> noodle around in internals like drm_prime_sg_to_page_addr_arrays does ...
> That was just a hack Dave did to avoid rewriting all of ttm, which imo
> shouldn't be copied all over the place (but it happens).
>
> Since you immediately convert the page list back into a mapped sg table
> it's a bit silly.
>
> I guess longer-term we could switch the gem helpers to just use sg tables
> directly, instead of going through pages arrays. But core mm folks just
> got nasty on us doing that, so I'm not sure which direction we should go
> here longer-term.

I moved the map/unmap to the !import case.  We still need the pages
array for v3d_gem_fault().  If we have an alternative for that that
isn't a linear walk of the sg at fault time, I'd be up for that.

>> +int v3d_gem_fault(struct vm_fault *vmf)
>> +{
>> +struct vm_area_struct *vma = vmf->vma;
>> +struct drm_gem_object *obj = vma->vm_private_data;
>> +struct v3d_bo *bo = to_v3d_bo(obj);
>> +unsigned long pfn;
>> +pgoff_t pgoff;
>> +int ret;
>> +
>> +/* We don't use vmf->pgoff since that has the fake offset: */
>> +pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
>> +pfn = page_to_pfn(bo->pages[pgoff]);
>
> Freaked out for a bit, then noticed you're pinning everything. That makes
> the bo->pages_refcount a bit confusing since totally not needed.
>
> I guess if you do have longer-term plans to roll out a shrinker I'd put at
> least a FIXME here.

Yep, long-term shrinker plans.  Added a note to the kerneldoc about why
we don't shrink yet.

>> +int v3d_mmap(struct file *filp, struct vm_area_struct *vma)
>> +{
>> +int ret;
>> +
>> +ret = drm_gem_mmap(filp, vma);
>> +if (ret)
>> +return ret;
>> +
>> +v3d_set_mmap_vma_flags(vma);
>
> If it'd actually understand what these vma flag frobberies in most drivers
> do I might come up with an idea how we can avoid the copypaste. Oh well.
> Maybe a drm_gem_mmap_wc?

drm_gem_mmap seems to already be wc, just with different vma flags.  If
you ever figure out the flag frobbing, please let me know. :(

>> +
>> +return ret

Re: [RFC PATCH] dt-bindings: add a jsonschema binding example

2018-04-20 Thread Stephen Boyd
Quoting Rob Herring (2018-04-20 11:15:04)
> On Fri, Apr 20, 2018 at 11:47 AM, Stephen Boyd  wrote:
> > Quoting Rob Herring (2018-04-18 15:29:05)
> >> diff --git a/Documentation/devicetree/bindings/example-schema.yaml 
> >> b/Documentation/devicetree/bindings/example-schema.yaml
> >> new file mode 100644
> >> index ..fe0a3bd1668e
> >> --- /dev/null
> >> +++ b/Documentation/devicetree/bindings/example-schema.yaml
> >> @@ -0,0 +1,149 @@
> >> +
> >> +  The end of the description is marked by indentation less than the first 
> >> line
> >> +  in the description.
> >> +
> >> +select: false
> >> +  # 'select' is a schema applied to a DT node to determine if this binding
> >> +  # schema should be applied to the node. It is optional and by default 
> >> the
> >> +  # possible compatible strings are extracted and used to match.
> >
> > Can we get a concrete example here?
> 
> select: true
> 
> :) Which is apply to every node.
> 
> A better one is from the memory node schema ('$nodename' gets added :
> 
> select:
>   required: ["$nodename"]
>   properties:
> $nodename:
>   oneOf:
> - pattern: "^memory@[0-9a-f]*"
> - const: "memory" # 'memory' only allowed for selecting
> 
> 
> I expect the vast majority of device bindings will not use select at
> all and rely on compatible string matching.

Thanks! I was looking to see how the select syntax would work and this
shows one example nicely. I suppose another way would be to show how a
compatible string would be matched through select, even though it's
redundant.

Is there a way we can enforce node names through the schema too? For
example to enforce that a clock controller is called 'clock-controller'
or a spi master is called 'spi'.

> 
> >> +
> >> +properties:
> > [...]
> >> +
> >> +  interrupts:
> >> +# Either 1 or 2 interrupts can be present
> >> +minItems: 1
> >> +maxItems: 2
> >> +items:
> >> +  - description: tx or combined interrupt
> >> +  - description: rx interrupt
> >> +
> >> +description: |
> >
> > The '|' is needed to make yaml happy?
> 
> Yes, this is simply how you do literal text blocks in yaml.
> 
> We don't really need for this one really, but for the top-level
> 'description' we do. The long term intent is 'description' would be
> written in sphinx/rst and can be extracted into the DT spec (for
> common bindings). Grant has experimented with that some.

Ok. That sounds cool. Then we could embed links to datasheets and SVGs
too.

> 
> >> +  A variable number of interrupts warrants a description of what 
> >> conditions
> >> +  affect the number of interrupts. Otherwise, descriptions on standard
> >> +  properties are not necessary.
> >> +
> >> +  interrupt-names:
> >> +# minItems must be specified here because the default would be 2
> >> +minItems: 1
> >> +items:
> >> +  - const: "tx irq"
> >> +  - const: "rx irq"
> >> +
> >> +  # Property names starting with '#' must be quoted
> >> +  '#interrupt-cells':
> >> +# A simple case where the value must always be '2'.
> >> +# The core schema handles that this must be a single integer.
> >> +const: 2
> >> +
> >> +  interrupt-controller: {}
> >
> > Does '{}' mean nothing to see here?
> 
> Yes. It's just an empty schema that's always valid.

Could we include another schema to indicate that this is an interrupt
controller? I'm sort of asking for multi-schema inheritance.

> 
> >> +  foo-gpios:
> >> +maxItems: 1
> >> +description: A connection of the 'foo' gpio line.
> >> +
> >> +  vendor,int-property:
> >> +description: Vendor specific properties must have a description
> >> +type: integer # A type is also required
> >> +enum: [2, 4, 6, 8, 10]
> >> +
> >> +  vendor,bool-property:
> >> +description: Vendor specific properties must have a description
> >> +type: boolean
> >> +
> >> +required:
> >> +  - compatible
> >> +  - reg
> >> +  - interrupts
> >> +  - interrupt-controller
> >
> > Can the required or optional parts go under each property instead of
> > having a different section?
> 
> No, because then it is not json-schema language.
> 
> > Or does that make the schema parser
> > difficult to implement?
> 
> Yes, because then we have to implement a schema parser.

:/


Re: [PATCH v4 04/22] iommu/vt-d: add bind_pasid_table function

2018-04-20 Thread Jacob Pan
On Fri, 20 Apr 2018 19:25:34 +0100
Jean-Philippe Brucker  wrote:

> On Tue, Apr 17, 2018 at 08:10:47PM +0100, Alex Williamson wrote:
> [...]
> > > + /* Assign guest PASID table pointer and size order */
> > > + ctx_lo = (pasidt_binfo->base_ptr & VTD_PAGE_MASK) |
> > > + (pasidt_binfo->pasid_bits - MIN_NR_PASID_BITS);  
> > 
> > Where does this IOMMU API interface define that base_ptr is 4K
> > aligned or the format of the PASID table?  Are these all
> > standardized or do they vary by host IOMMU?  If they're standards,
> > maybe we could note that and the spec which defines them when we
> > declare base_ptr.  If they're IOMMU specific then I don't
> > understand how we'll match a user provided PASID table to the
> > requirements and format of the host IOMMU. Thanks,  
> 
> On SMMUv3 the minimum alignment for base_ptr is 64 bytes, so a guest
> under a vSMMU might pass a pointer that's not aligned on 4k.
> 
PASID table pointer for VT-d is 4K aligned. 
> Maybe this information could be part of the data passed to userspace
> about IOMMU table formats and features? They're not part of this
> series, but I think we wanted to communicate IOMMU-specific features
> via sysfs.
> 
Agreed, I believe Yi Liu is working on a sysfs interface such that QEMU
can match IOMMU model and features.


Re: [PATCH] [v2] scsi: ips: fix firmware timestamps for 32-bit

2018-04-20 Thread Martin K. Petersen

Arnd,

> do_gettimeofday() is deprecated since it will stop working in 2038 on
> 32-bit platforms, leading to incorrect times passed to the firmware.
> On 64-bit platforms the current code appears to be fine, as the
> calculation passes an 8-bit century number into the firmware that can
> represent times long in the future (possibly until 25599).
>
> Using ktime_get_real_seconds() to get a 64-bit seconds value and
> time64_to_tm() to convert it into the firmware format greatly
> simplifies the ips timekeeping code, makes 32-bit and 64-bit behave
> the same way here, and gets us closer to removing the deprecated
> interfaces.

Applied to 4.18/scsi-queue, thanks!

-- 
Martin K. Petersen  Oracle Linux Engineering


Re: [PATCH] [RESEND 2] scsi: esas2r: use ktime_get_real_seconds()

2018-04-20 Thread Martin K. Petersen

Arnd,

> do_gettimeofday() is deprecated because of the y2038 overflow.  Here,
> we use the result to pass into a 32-bit field in the firmware, which
> still risks an overflow, but if the firmware is written to expect
> unsigned values, it can at least last until y2106, and there is not
> much we can do about it.
>
> This changes do_gettimeofday() to ktime_get_real_seconds(), which at
> least simplifies the code a bit, and avoids the deprecated
> interface. I'm adding a comment about the overflow to document what
> happens.

Applied to 4.18/scsi-queue. Thanks!

-- 
Martin K. Petersen  Oracle Linux Engineering


[PATCH] fscrypt: use unbound workqueue for decryption

2018-04-20 Thread Eric Biggers
Improve fscrypt read performance by switching the decryption workqueue
from bound to unbound.  With the bound workqueue, when multiple bios
completed on the same CPU, they were decrypted on that same CPU.  But
with the unbound queue, they are now decrypted in parallel on any CPU.

Although fscrypt read performance can be tough to measure due to the
many sources of variation, this change is most beneficial when
decryption is slow, e.g. on CPUs without AES instructions.  For example,
I timed tarring up encrypted directories on f2fs.  On x86 with AES-NI
instructions disabled, the unbound workqueue improved performance by
about 25-35%, using 1 to NUM_CPUs jobs with 4 or 8 CPUs available.  But
with AES-NI enabled, performance was unchanged to within ~2%.

I also did the same test on a quad-core ARM CPU using xts-speck128-neon
encryption.  There performance was usually about 10% better with the
unbound workqueue, bringing it closer to the unencrypted speed.

The unbound workqueue may be worse in some cases due to worse locality,
but I think it's still the better default.  dm-crypt uses an unbound
workqueue by default too, so this change makes fscrypt match.

Signed-off-by: Eric Biggers 
---
 fs/crypto/crypto.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index ce654526c0fb..984e190f9b89 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -427,8 +427,17 @@ int fscrypt_initialize(unsigned int cop_flags)
  */
 static int __init fscrypt_init(void)
 {
+   /*
+* Use an unbound workqueue to allow bios to be decrypted in parallel
+* even when they happen to complete on the same CPU.  This sacrifices
+* locality, but it's worthwhile since decryption is CPU-intensive.
+*
+* Also use a high-priority workqueue to prioritize decryption work,
+* which blocks reads from completing, over regular application tasks.
+*/
fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue",
-   WQ_HIGHPRI, 0);
+WQ_UNBOUND | WQ_HIGHPRI,
+num_online_cpus());
if (!fscrypt_read_workqueue)
goto fail;
 
-- 
2.17.0.484.g0c8726318c-goog



Re: [PATCH][V2] isci: Fix infinite loop in while loop

2018-04-20 Thread Martin K. Petersen

Colin,

> In the case when the phy_mask is bitwise anded with the phy_index bit
> is zero the continue statement currently jumps to the next iteration
> of the while loop and phy_index is never actually incremented,
> potentially causing an infinite loop if phy_index is less than
> SCI_MAX_PHS. Fix this by turning the while loop into a for loop.

Applied to 4.17/scsi-fixes. Thanks!

-- 
Martin K. Petersen  Oracle Linux Engineering


Re: [PATCH] perf tools: set kernel end address properly

2018-04-20 Thread Kim Phillips
On Fri, 20 Apr 2018 08:59:15 +0900
Namhyung Kim  wrote:

> The map_groups__fixup_end() was called to set end addresses of kernel
> map and module maps.  But now machine__create_modules() is set the end
> address of modules properly, the only remaining piece is the kernel map.
> We can set it with adjacent module's address directly instead of calling
> the map_groups__fixup_end().  If there's no module after the kernel map,
> the end address will be ~0ULL.
> 
> Since it also changes the start address of the kernel map, it needs to
> re-insert the map to the kmaps in order to keep a correct ordering.  Kim
> reported that it caused problems on ARM64.
> 
> Reported-by: Kim Phillips 
> Signed-off-by: Namhyung Kim 
> ---

Acked-by: Kim Phillips 

Thanks,

Kim


Re: [PATCH v4 04/22] iommu/vt-d: add bind_pasid_table function

2018-04-20 Thread Jacob Pan
On Tue, 17 Apr 2018 13:10:47 -0600
Alex Williamson  wrote:

> On Mon, 16 Apr 2018 14:48:53 -0700
> Jacob Pan  wrote:
> 
> > Add Intel VT-d ops to the generic iommu_bind_pasid_table API
> > functions.
> > 
> > The primary use case is for direct assignment of SVM capable
> > device. Originated from emulated IOMMU in the guest, the request
> > goes through many layers (e.g. VFIO). Upon calling host IOMMU
> > driver, caller passes guest PASID table pointer (GPA) and size.
> > 
> > Device context table entry is modified by Intel IOMMU specific
> > bind_pasid_table function. This will turn on nesting mode and
> > matching translation type.
> > 
> > The unbind operation restores default context mapping.
> > 
> > Signed-off-by: Jacob Pan 
> > Signed-off-by: Liu, Yi L 
> > Signed-off-by: Ashok Raj 
> > ---
> >  drivers/iommu/intel-iommu.c   | 119
> > ++
> > include/linux/dma_remapping.h |   1 + 2 files changed, 120
> > insertions(+)
> > 
> > diff --git a/drivers/iommu/intel-iommu.c
> > b/drivers/iommu/intel-iommu.c index a0f81a4..d8058be 100644
> > --- a/drivers/iommu/intel-iommu.c
> > +++ b/drivers/iommu/intel-iommu.c
> > @@ -2409,6 +2409,7 @@ static struct dmar_domain
> > *dmar_insert_one_dev_info(struct intel_iommu *iommu,
> > info->ats_supported = info->pasid_supported = info->pri_supported =
> > 0; info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
> > info->ats_qdep = 0;
> > +   info->pasid_table_bound = 0;
> > info->dev = dev;
> > info->domain = domain;
> > info->iommu = iommu;
> > @@ -5132,6 +5133,7 @@ static void
> > intel_iommu_put_resv_regions(struct device *dev, 
> >  #ifdef CONFIG_INTEL_IOMMU_SVM
> >  #define MAX_NR_PASID_BITS (20)
> > +#define MIN_NR_PASID_BITS (5)
> >  static inline unsigned long intel_iommu_get_pts(struct intel_iommu
> > *iommu) {
> > /*
> > @@ -5258,6 +5260,119 @@ struct intel_iommu
> > *intel_svm_device_to_iommu(struct device *dev) 
> > return iommu;
> >  }
> > +
> > +static int intel_iommu_bind_pasid_table(struct iommu_domain
> > *domain,
> > +   struct device *dev, struct pasid_table_config
> > *pasidt_binfo) +{  
> 
> Never validates pasidt_binfo->{version,bytes}
> 
good catch, will do.
> > +   struct intel_iommu *iommu;
> > +   struct context_entry *context;
> > +   struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> > +   struct device_domain_info *info;
> > +   struct pci_dev *pdev;
> > +   u8 bus, devfn, host_table_pasid_bits;
> > +   u16 did, sid;
> > +   int ret = 0;
> > +   unsigned long flags;
> > +   u64 ctx_lo;
> > +
> > +   iommu = device_to_iommu(dev, &bus, &devfn);
> > +   if (!iommu)
> > +   return -ENODEV;
> > +   /* VT-d spec section 9.4 says pasid table size is encoded
> > as 2^(x+5) */
> > +   host_table_pasid_bits = intel_iommu_get_pts(iommu) +
> > MIN_NR_PASID_BITS;
> > +   if (!pasidt_binfo || pasidt_binfo->pasid_bits >
> > host_table_pasid_bits ||
> > +   pasidt_binfo->pasid_bits < MIN_NR_PASID_BITS) {
> > +   pr_err("Invalid gPASID bits %d, host range %d -
> > %d\n",
> > +   pasidt_binfo->pasid_bits,
> > +   MIN_NR_PASID_BITS, host_table_pasid_bits);
> > +   return -ERANGE;
> > +   }
> > +   if (!ecap_nest(iommu->ecap)) {
> > +   dev_err(dev, "Cannot bind PASID table, no nested
> > translation\n");
> > +   ret = -EINVAL;
> > +   goto out;
> > +   }  
> 
> Gratuitous use of pr_err, some of these look user reachable, for
> instance can vfio know in advance the supported widths or can the user
> trigger that pr_err at will?
Yes, the current IOMMU sysfs for vt-d does show the content of
capability registers so user could know in advance whether the nested
mode is supported. But I think we are in need of some generic interface
to enumerate IOMMU features. Here I am trying to prepare for the worst.
Are you concerned about security if user can trigger that error at
will? Sorry I didn't get the point.

>  Some of these errno values are also
> maybe not as descriptive as they could be.  For instance if the iommu
> doesn't support nesting, that's not a calling argument error, that's
> an unsupported device error, right?
> 
your are right, that is not invalid argument. You mean use ENODEV?

> > +   pdev = to_pci_dev(dev);
> > +   sid = PCI_DEVID(bus, devfn);
> > +   info = dev->archdata.iommu;
> > +
> > +   if (!info) {
> > +   dev_err(dev, "Invalid device domain info\n");
> > +   ret = -EINVAL;
> > +   goto out;
> > +   }
> > +   if (info->pasid_table_bound) {
> > +   dev_err(dev, "Device PASID table already bound\n");
> > +   ret = -EBUSY;
> > +   goto out;
> > +   }
> > +   if (!info->pasid_enabled) {
> > +   ret = pci_enable_pasid(pdev, info->pasid_supported
> > & ~1);
> > +   if (ret) {
> > +   dev_err(dev, "Failed to enable PASID\n");
> > +   goto out;
> > +   }
> > +   }
> > +   spi

Re: [PATCH v3 0/6] scsi: handle special return codes for ABORTED COMMAND

2018-04-20 Thread Martin K. Petersen

Martin,

> Here is another attempt to handle the special return codes for ABORTED
> COMMAND for certain SCSI devices. Following MKP's recommendation, I've
> created two new BLIST flags, simplifying the code in scsi_error.c
> compared to the previous versions. Rather than using "free" bits, I
> increased the length of blist_flag_t to 64 bit, and used previously
> unused bits. I also added checking for obsolete and unused bits.
>
> For the blist_flag_t size increase, I used sparse to try and avoid
> regressions; that necessitated fixing sparse's errors for the current
> code first.

Much better, thanks for reworking this. Applied to 4.18/scsi-queue.

-- 
Martin K. Petersen  Oracle Linux Engineering


Re: [patch V2 0/8] rslib: Cleanup and VLA removal

2018-04-20 Thread Kees Cook
On Thu, Apr 19, 2018 at 3:04 AM, Thomas Gleixner  wrote:
> Kees tried to get rid of the Variable Length Arrays in the Reed-Solomon
> library by replacing them with fixed length arrays on stack. Though they
> are rather large and Andrew did not fall in love with that solution.
>
> This series addresses that in a different way by splitting the rs control
> structure up, so that each invocation of rs_init() returns a new instance
> in which the decoder buffers are allocated. The polynom tables which build
> the base for the RS codecs are still shareable between the instances to
> avoid large allocations and initializations of the same data over and over.
>
> The usage sites have been audited and fixed up where necessary to
> accomodate the decoder change which forbids parallel decoder invocation for
> a particular rs control instance to prevent buffer corruption.
>
> While at it the patch set tidies up the code and converts the related files
> over to use SPDX license identifiers.
>
> Changes since V1:
>
>Simplify error path in the diskonchip code and use the proper
>function to free the decoder.
>
> As this spawns multiple subsystems it should either go through Andrews tree
> or Kees can route it with his other hardening stuff.

I can start collecting ignored VLA patches. I haven't done that yet as
most maintainers have been taking them. There are only a handful
unapplied. Should I start the tree with rslib? :)

Either way:

Reviewed-by: Kees Cook 

-Kees

-- 
Kees Cook
Pixel Security


[PATCH v4] mtd: spi-nor: clear Winbond Extended Address Reg on switch to 3-byte addressing.

2018-04-20 Thread NeilBrown

Winbond spi-nor flash 32MB and larger have an 'Extended Address
Register' as one option for addressing beyond 16MB (Macronix
has the same concept, Spansion has EXTADD bits in the Bank Address
Register).

According to section
   8.2.7 Write Extended Address Register (C5h)

of the Winbond W25Q256FV data sheet (256M-BIT SPI flash)

   The Extended Address Register is only effective when the device is
   in the 3-Byte Address Mode.  When the device operates in the 4-Byte
   Address Mode (ADS=1), any command with address input of A31-A24
   will replace the Extended Address Register values. It is
   recommended to check and update the Extended Address Register if
   necessary when the device is switched from 4-Byte to 3-Byte Address
   Mode.

So the documentation suggests clearing the EAR after switching to
3-byte mode.  Experimentation shows that the EAR is *always* one after
the switch to 3-byte mode, so clearing the EAR is mandatory at
shutdown for a subsequent 3-byte-addressed reboot to work.

Note that some SOCs (e.g. MT7621) do not assert a reset line at normal
reboot, so we cannot rely on hardware reset.  The MT7621 does assert a
reset line at watchdog-reset.

Acked-by: Marek Vasut 
Signed-off-by: NeilBrown 
---

Changes since v3:
 Removed Fixes/stable tags. Added Acked-by from Marek.
Changes sinc3 v2:
  Fixed comment style.


 drivers/mtd/spi-nor/spi-nor.c | 14 ++
 include/linux/mtd/spi-nor.h   |  2 ++
 2 files changed, 16 insertions(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 5bfa36e95f35..42ae9a1529bb 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -284,6 +284,20 @@ static inline int set_4byte(struct spi_nor *nor, const 
struct flash_info *info,
if (need_wren)
write_disable(nor);
 
+   if (!status && !enable &&
+   JEDEC_MFR(info) == SNOR_MFR_WINBOND) {
+   /*
+* On Winbond W25Q256FV, leaving 4byte mode causes
+* the Extended Address Register to be set to 1, so all
+* 3-byte-address reads come from the second 16M.
+* We must clear the register to enable normal behavior.
+*/
+   write_enable(nor);
+   nor->cmd_buf[0] = 0;
+   nor->write_reg(nor, SPINOR_OP_WREAR, nor->cmd_buf, 1);
+   write_disable(nor);
+   }
+
return status;
default:
/* Spansion style */
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index de36969eb359..e60da0d34cc1 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -62,6 +62,8 @@
 #define SPINOR_OP_RDCR 0x35/* Read configuration register */
 #define SPINOR_OP_RDFSR0x70/* Read flag status register */
 #define SPINOR_OP_CLFSR0x50/* Clear flag status register */
+#define SPINOR_OP_RDEAR0xc8/* Read Extended Address 
Register */
+#define SPINOR_OP_WREAR0xc5/* Write Extended Address 
Register */
 
 /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
 #define SPINOR_OP_READ_4B  0x13/* Read data bytes (low frequency) */
-- 
2.14.0.rc0.dirty



signature.asc
Description: PGP signature


Re: Representative Needed.

2018-04-20 Thread PPMC OFFSHORE
Good day,

  I am seeking your concept with great gratitude to present you as a 
representative to carry out business transactions with a reasonable share upon 
your interest and cooperation to work with us in trust. If interested please 
get back.

Regards
Kingsley

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus



Re: [PATCH v2] mtd: spi-nor: clear Winbond Extended Address Reg on switch to 3-byte addressing.

2018-04-20 Thread NeilBrown
On Sat, Apr 21 2018, Boris Brezillon wrote:

> On Sat, 21 Apr 2018 07:28:19 +1000
> NeilBrown  wrote:
>
>> On Fri, Apr 20 2018, Boris Brezillon wrote:
>> 
>> > Hi Neil,
>> >
>> > On Mon, 16 Apr 2018 09:42:30 +1000
>> > NeilBrown  wrote:
>> >  
>> >> Winbond spi-nor flash 32MB and larger have an 'Extended Address
>> >> Register' as one option for addressing beyond 16MB (Macronix
>> >> has the same concept, Spansion has EXTADD bits in the Bank Address
>> >> Register).
>> >> 
>> >> According to section
>> >>8.2.7 Write Extended Address Register (C5h)
>> >> 
>> >> of the Winbond W25Q256FV data sheet (256M-BIT SPI flash)
>> >> 
>> >>The Extended Address Register is only effective when the device is
>> >>in the 3-Byte Address Mode.  When the device operates in the 4-Byte
>> >>Address Mode (ADS=1), any command with address input of A31-A24
>> >>will replace the Extended Address Register values. It is
>> >>recommended to check and update the Extended Address Register if
>> >>necessary when the device is switched from 4-Byte to 3-Byte Address
>> >>Mode.
>> >> 
>> >> So the documentation suggests clearing the EAR after switching to
>> >> 3-byte mode.  Experimentation shows that the EAR is *always* one after
>> >> the switch to 3-byte mode, so clearing the EAR is mandatory at
>> >> shutdown for a subsequent 3-byte-addressed reboot to work.
>> >> 
>> >> Note that some SOCs (e.g. MT7621) do not assert a reset line at normal
>> >> reboot, so we cannot rely on hardware reset.  The MT7621 does assert a
>> >> reset line at watchdog-reset.
>> >> 
>> >> Signed-off-by: NeilBrown   
>> >
>> > We should probably backport the fix. Can you add a Fixes and Cc-stable
>> > tag?  
>> 
>> It's a bit weird having Fixes when this isn't a regression, but I guess
>> it doesn't hurt.
>
> Well, I thought you wanted this patch to be backported to stable
> releases, hence my suggestion. Of course it's not a regression, since
> the bug is here from the beginning, but nonetheless it's fixing a bug.

I have not interest in this patch going to stable.  I'll be perfectly
happy once it lands upstream.

>
>> I chose
>>   Fixes: 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when 
>> exiting")
>> as this patch it useless without that one.
>
> Not sure that's how Fixes should be used. IIUC, it should point to the
> first commit where the bug was introduced, so I guess it's 0aa87b7563f1
> ("mtd: m25p80: add support for the windbond w25q256 chip") in this case.

That does make a certain sort of sense, but if you tried applying this
patch to that kernel, it wouldn't apply at all.  So it is hard to say
that it fixes anything there.

I think it is best to drop the fixes/stable tags.  It isn't a
regression, doesn't cause data corruption, doesn't make it possible to
crash the kernel, so it isn't really "stable" material in my mind.

Thanks,
NeilBrown


>
> Now, if you want to restrict the kernel releases this patch should be
> backported to, you can use the '# vX.Y+' suffix in your Cc-stable tag.
>
>> 
>> I also fixed the comment and have resent.
>
> Thanks.


signature.asc
Description: PGP signature


Re: [PATCH v3 0/7] MIPS: perf: MT fixes and improvements

2018-04-20 Thread Florian Fainelli
On 04/20/2018 03:23 AM, Matt Redfearn wrote:
> This series addresses a few issues with how the MIPS performance
> counters code supports the hardware multithreading MT ASE.
> 
> Firstly, implementations of the MT ASE may implement performance
> counters
> per core or per thread(TC). MIPS Techologies implementations signal this
> via a bit in the implmentation specific CONFIG7 register. Since this
> register is implementation specific, checking it should be guarded by a
> PRID check. This also replaces a bit defined by a magic number.
> 
> Secondly, the code currently uses vpe_id(), defined as
> smp_processor_id(), divided by 2, to share core performance counters
> between VPEs. This relies on a couple of assumptions of the hardware
> implementation to function correctly (always 2 VPEs per core, and the
> hardware reading only the least significant bit).
> 
> Finally, the method of sharing core performance counters between VPEs is
> suboptimal since it does not allow one process running on a VPE to use
> all of the performance counters available to it, because the kernel will
> reserve half of the coutners for the other VPE even if it may never use
> them. This reservation at counter probe is replaced with an allocation
> on use strategy.
> 
> Tested on a MIPS Creator CI40 (2C2T MIPS InterAptiv with per-TC
> counters, though for the purposes of testing the per-TC availability was
> hardcoded to allow testing both paths).
> 
> Series applies to v4.16

Sorry it took so long to get that tested.

Sounds like you need to build test this on a BMIPS5000 configuration
(bmips_stb_defconfig should provide that):

In file included from ./arch/mips/include/asm/mach-generic/spaces.h:15:0,
 from ./arch/mips/include/asm/mach-bmips/spaces.h:16,
 from ./arch/mips/include/asm/addrspace.h:13,
 from ./arch/mips/include/asm/barrier.h:11,
 from ./include/linux/compiler.h:245,
 from ./include/linux/kernel.h:10,
 from ./include/linux/cpumask.h:10,
 from arch/mips/kernel/perf_event_mipsxx.c:18:
arch/mips/kernel/perf_event_mipsxx.c: In function 'mipsxx_pmu_enable_event':
./arch/mips/include/asm/mipsregs.h:738:52: error: suggest parentheses
around '+' in operand of '&' [-Werror=parentheses]
 #define BRCM_PERFCTRL_VPEID(v) (_ULCAST_(1) << (12 + v))

arch/mips/kernel/perf_event_mipsxx.c:385:10: note: in expansion of macro
'BRCM_PERFCTRL_VPEID'
   ctrl = BRCM_PERFCTRL_VPEID(cpu & MIPS_CPUID_TO_COUNTER_MASK);
  ^~~
  CC  drivers/of/fdt_addres

after fixing that, I tried the following to see whether this would be a
good test case to exercise against:

perf record -a -C 0 taskset -c 1 /bin/true
perf record -a -C 1 taskset -c 0 /bin/true

and would not see anything related to /bin/true running in either case,
which seems like it does the right thing?

Tested-by: Florian Fainelli 

BTW, for some reason not specifying -a -C  does lead to lockups,
consistently and for pretty much any perf command, this could be BMIPS
specific, did not get a chance to cross test on a different machine.

> 
> 
> Changes in v3:
> New patch to detect feature presence in cpu-probe.c
> Use flag in cpu_data set by cpu_probe.c to indicate feature presence.
> - rebase on new feature detection
> 
> Changes in v2:
> Fix mipsxx_pmu_enable_event for !#ifdef CONFIG_MIPS_MT_SMP
> - Fix !#ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS build
> - re-use cpuc variable in mipsxx_pmu_alloc_counter,
>   mipsxx_pmu_free_counter rather than having sibling_ version.
> Since BMIPS5000 does not implement per TC counters, we can remove the
> check on cpu_has_mipsmt_pertccounters.
> New patch to fix BMIPS5000 system mode perf.
> 
> Matt Redfearn (7):
>   MIPS: Probe for MIPS MT perf counters per TC
>   MIPS: perf: More robustly probe for the presence of per-tc counters
>   MIPS: perf: Use correct VPE ID when setting up VPE tracing
>   MIPS: perf: Fix perf with MT counting other threads
>   MIPS: perf: Allocate per-core counters on demand
>   MIPS: perf: Fold vpe_id() macro into it's one last usage
>   MIPS: perf: Fix BMIPS5000 system mode counting
> 
>  arch/mips/include/asm/cpu-features.h |   7 ++
>  arch/mips/include/asm/cpu.h  |   2 +
>  arch/mips/include/asm/mipsregs.h |   6 +
>  arch/mips/kernel/cpu-probe.c |  12 ++
>  arch/mips/kernel/perf_event_mipsxx.c | 232 
> +++
>  arch/mips/oprofile/op_model_mipsxx.c |   2 -
>  6 files changed, 150 insertions(+), 111 deletions(-)
> 


-- 
Florian


Re: [PATCH v6 01/16] initrd: Add weakly-linked generic free_initrd_mem.

2018-04-20 Thread Shea Levy
Hi Palmer,

Palmer Dabbelt  writes:

> On Wed, 18 Apr 2018 04:10:16 PDT (-0700), s...@shealevy.com wrote:
>> Hi all,
>>
>> Shea Levy  writes:
>>
>>> This function is effectively identical across 14 architectures, and
>>> the generic implementation is small enough to be negligible in the
>>> architectures that do override it. Many of the remaining divergent
>>> implementations can be included in the common code path in future,
>>> further reducing code duplication and sharing improvements between
>>> architectures.
>>>
>>> Series boot-tested on RISC-V (which now uses the generic
>>> implementation) and x86_64 (which doesn't).
>>>
>>> v6: Add information about build/run testing.
>>> v5: Add more complete commit messages.
>>> v4: Use weak symbols instead of Kconfig.
>>> v3: Make the generic path opt-out instead of opt-in.
>>> v2: Mark generic free_initrd_mem __init.
>>>
>>> Signed-off-by: Shea Levy 
>>> ---
>>>  init/initramfs.c | 5 +
>>>  1 file changed, 5 insertions(+)
>>>
>>> diff --git a/init/initramfs.c b/init/initramfs.c
>>> index 7e99a0038942..c8fe150f958a 100644
>>> --- a/init/initramfs.c
>>> +++ b/init/initramfs.c
>>> @@ -526,6 +526,11 @@ extern unsigned long __initramfs_size;
>>>  #include 
>>>  #include 
>>>  
>>> +void __init __weak free_initrd_mem(unsigned long start, unsigned long end)
>>> +{
>>> +   free_reserved_area((void *)start, (void *)end, -1, "initrd");
>>> +}
>>> +
>>>  static void __init free_initrd(void)
>>>  {
>>>  #ifdef CONFIG_KEXEC_CORE
>>> -- 
>>> 2.16.2
>>
>> This series has been quiet for a few weeks other than picking up some
>> arch-specific acks. What is the next step here?
>
> I'm not sure.  I don't really think it's sane for the RISC-V tree because it 
> touches so many architectures -- I haven't looked closely, though.

Yeah, I think that makes sense.

> IIRC 
> there's a slight behavior change to the RISC-V port, which I'd be OK taking 
> through my tree (and then obviously the RISC-V cleanup as well, unless it 
> goes 
> in as a whole patch set).
>

So currently the behavior for RISC-V is changed by simply deleting the
arch-specific free_initrd_mem, which was a noop. Would you like me to
first submit a patch to have the arch-specific free_initrd_mem and then
change that in this series?

>
> For the IRQ cleanup I currently have in flight
>
> * Add the generic support
> * Move every arch over (RISC-V is in, the rest aren't yet)
> * Clean up a bit now that everyone is generic
>
> That lets all the arch-specific patches go in parallel, but can be a bit of a 
> headache to manage.

With the current series, the first patch could go in on its own and all
of the arch-specific patches can go in in parallel if we wanted to, but
beyond the above-suggested implementation of the RISC-V free_initrd_mem
there's no real reordering meaningful here.

>
> I'm adding Arnd and Olof, as they know a lot more about Linux than I do.  
> Here's the top-level of the v4 patch set: https://lkml.org/lkml/2018/3/28/744

And here's the top-level of v6, the latest: https://lkml.org/lkml/2018/4/1/50


signature.asc
Description: PGP signature


Re: Representative Needed.

2018-04-20 Thread PPMC OFFSHORE
Good day,

  I am seeking your concept with great gratitude to present you as a 
representative to carry out business transactions with a reasonable share upon 
your interest and cooperation to work with us in trust. If interested please 
get back.

Regards
Kingsley

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus



Re: [PATCH 1/2] regulator: add QCOM RPMh regulator driver

2018-04-20 Thread Lina Iyer

On Fri, Apr 20 2018 at 13:07 -0600, David Collins wrote:

On 04/18/2018 10:55 PM, Stephen Boyd wrote:

Quoting David Collins (2018-03-22 18:30:06)

On 03/21/2018 12:07 PM, Stephen Boyd wrote:

Quoting David Collins (2018-03-16 18:09:10)

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 097f617..e0ecd0a 100644



+   ret = cmd_db_ready();
+   if (ret < 0) {
+   if (ret != -EPROBE_DEFER)
+   dev_err(dev, "Command DB not available, ret=%d\n", ret);
+   return ret;
+   }


We should just make rpmh parent device call cmd_db_ready() so that these
devices aren't even populated until then and so that cmd_db_ready() is
only in one place. Lina?


Let's see if Lina has qualms about this plan.


Sounds like you're ok with it.


Sure, I'll remove this check if Lina agrees to add it in the rpmh driver.


We want to make the RSC nodes child of Command DB? That way we probe the
controllers only if the command DB is ready?

I could do that. Just so you know, there is are no strict directives to
use Command DB. If a driver knows the information it needs to pass to
the accelerator, it may choose to skip command DB completely.

Thanks,
Lina



Re: [PATCH] bsg referencing bus driver module

2018-04-20 Thread Anatoliy Glagolev
 
> This patch isn't applyable because your mailer has changed all the tabs
> to spaces.
> 
> I also think there's no need to do it this way.  I think what we need
> is for fc_bsg_remove() to wait until the bsg queue is drained.  It does
> look like the author thought this happened otherwise the code wouldn't
> have the note.  If we fix it that way we can do the same thing in all
> the other transport classes that use bsg (which all have a similar
> issue).
> 
> James
> 

Thanks, James. Sorry about the tabs; re-sending.

On fc_bsg_remove()...: are you suggesting to implement the whole fix
in scsi_transport_fc.c? That would be nice, but I do not see how that
is possible. Even with the queue drained bsg still holds a reference
to the Scsi_Host via bsg_class_device; bsg_class_device itself is
referenced on bsg_open and kept around while a user-mode process keeps
a handle to bsg.
Even if we somehow implement the waiting the call may be stuck
forever if the user-mode process keeps the handle.
I think handling it via a rererence to the module is more consistent
with the way things are done in Linux. You suggested the approach
youself back in "Waiting for scsi_host_template release" discussion.


>From df939b80d02bf37b21efaaef8ede86cfd39b0cb8 Mon Sep 17 00:00:00 2001
From: Anatoliy Glagolev 
Date: Thu, 19 Apr 2018 15:06:06 -0600
Subject: [PATCH] bsg referencing parent module

Fixing a bug when bsg layer holds the last reference to device
when the device's module has been unloaded. Upon dropping the
reference the device's release function may touch memory of
the unloaded module.
---
 block/bsg-lib.c  | 24 ++--
 block/bsg.c  | 29 ++---
 drivers/scsi/scsi_transport_fc.c |  8 ++--
 include/linux/bsg-lib.h  |  4 
 include/linux/bsg.h  |  5 +
 5 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index fc2e5ff..90c28fd 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -309,6 +309,25 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
const char *name,
bsg_job_fn *job_fn, int dd_job_size,
void (*release)(struct device *))
 {
+   return bsg_setup_queue_ex(dev, name, job_fn, dd_job_size, release,
+   NULL);
+}
+EXPORT_SYMBOL_GPL(bsg_setup_queue);
+
+/**
+ * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
+ * @dev: device to attach bsg device to
+ * @name: device to give bsg device
+ * @job_fn: bsg job handler
+ * @dd_job_size: size of LLD data needed for each job
+ * @release: @dev release function
+ * @dev_module: @dev's module
+ */
+struct request_queue *bsg_setup_queue_ex(struct device *dev, const char *name,
+   bsg_job_fn *job_fn, int dd_job_size,
+   void (*release)(struct device *),
+   struct module *dev_module)
+{
struct request_queue *q;
int ret;
 
@@ -331,7 +350,8 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
const char *name,
blk_queue_softirq_done(q, bsg_softirq_done);
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
-   ret = bsg_register_queue(q, dev, name, &bsg_transport_ops, release);
+   ret = bsg_register_queue_ex(q, dev, name, &bsg_transport_ops, release,
+   dev_module);
if (ret) {
printk(KERN_ERR "%s: bsg interface failed to "
   "initialize - register queue\n", dev->kobj.name);
@@ -343,4 +363,4 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
const char *name,
blk_cleanup_queue(q);
return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(bsg_setup_queue);
+EXPORT_SYMBOL_GPL(bsg_setup_queue_ex);
diff --git a/block/bsg.c b/block/bsg.c
index defa06c..6920c5b 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -750,7 +750,8 @@ static struct bsg_device *__bsg_get_device(int minor, 
struct request_queue *q)
return bd;
 }
 
-static struct bsg_device *bsg_get_device(struct inode *inode, struct file 
*file)
+static struct bsg_device *bsg_get_device(struct inode *inode, struct file 
*file,
+   struct bsg_class_device **pbcd)
 {
struct bsg_device *bd;
struct bsg_class_device *bcd;
@@ -766,6 +767,7 @@ static struct bsg_device *bsg_get_device(struct inode 
*inode, struct file *file)
 
if (!bcd)
return ERR_PTR(-ENODEV);
+   *pbcd = bcd;
 
bd = __bsg_get_device(iminor(inode), bcd->queue);
if (bd)
@@ -781,22 +783,34 @@ static struct bsg_device *bsg_get_device(struct inode 
*inode, struct file *file)
 static int bsg_open(struct inode *inode, struct file *file)
 {
struct bsg_device *bd;
+   struct bsg_class_device *bcd;
 
-   bd = bsg_get_device(inode, file);
+   bd = bsg_get_device(inode, file, &bcd);
 
if (IS_ERR(bd))
return PTR_ERR(bd);
 
file->private_data = bd;
+   if (bcd->parent_module) {

[PATCH 3/6] x86/intel_rdt/mba_sc: Add initialization support

2018-04-20 Thread Vikas Shivappa
When MBA software controller is enabled, we need a per domain storage
for user specified bandwidth in "MBps" and the "percentage" values which
are programmed into the IA32_MBA_THRTL_MSR. Add support for these data
structures and initialization.

The MBA percentage values have a default max value of 100 but however
the max value in MBps is not available from the hardware and we keep it
just at U32_MAX. This simply says that user can use all bandwidth by
default but does not say what is the actual max bandwidth available. The
actual bandwidth that is available may depend on lot of factors like QPI
link, number of memory channels, memory channel frequency, its width and
memory speed, how many channels are configured and also if memory
interleaving is enabled.

Signed-off-by: Vikas Shivappa 
---
 arch/x86/kernel/cpu/intel_rdt.c  | 37 +++-
 arch/x86/kernel/cpu/intel_rdt.h  |  3 +++
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c |  3 +++
 3 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 2a0931f..07f6b3b 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -35,6 +35,7 @@
 
 #define MAX_MBA_BW 100u
 #define MBA_IS_LINEAR  0x4
+#define MBA_MAX_MBPS   U32_MAX
 
 /* Mutex to protect rdtgroup access. */
 DEFINE_MUTEX(rdtgroup_mutex);
@@ -439,25 +440,40 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource 
*r, int id,
return NULL;
 }
 
+void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
+{
+   int i;
+
+   /*
+* Initialize the Control MSRs to having no control.
+* For Cache Allocation: Set all bits in cbm
+* For Memory Allocation: Set b/w requested to 100%
+* and the bandwidth in MBps to U32_MAX
+*/
+   for (i = 0; i < r->num_closid; i++, dc++, dm++) {
+   *dc = r->default_ctrl;
+   *dm = MBA_MAX_MBPS;
+   }
+}
+
 static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
 {
struct msr_param m;
-   u32 *dc;
-   int i;
+   u32 *dc, *dm;
 
dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
if (!dc)
return -ENOMEM;
 
-   d->ctrl_val = dc;
+   dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL);
+   if (!dm) {
+   kfree(dc);
+   return -ENOMEM;
+   }
 
-   /*
-* Initialize the Control MSRs to having no control.
-* For Cache Allocation: Set all bits in cbm
-* For Memory Allocation: Set b/w requested to 100
-*/
-   for (i = 0; i < r->num_closid; i++, dc++)
-   *dc = r->default_ctrl;
+   d->ctrl_val = dc;
+   d->mbps_val = dm;
+   setup_default_ctrlval(r, dc, dm);
 
m.low = 0;
m.high = r->num_closid;
@@ -596,6 +612,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource 
*r)
}
 
kfree(d->ctrl_val);
+   kfree(d->mbps_val);
kfree(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 74aee0f..91cc310 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -202,6 +202,7 @@ struct mbm_state {
  * @cqm_work_cpu:
  * worker cpu for CQM h/w counters
  * @ctrl_val:  array of cache or mem ctrl values (indexed by CLOSID)
+ * @mbps_val:  When mba_sc is enabled, this holds the bandwidth in MBps
  * @new_ctrl:  new ctrl value to be loaded
  * @have_new_ctrl: did user provide new_ctrl for this domain
  */
@@ -217,6 +218,7 @@ struct rdt_domain {
int mbm_work_cpu;
int cqm_work_cpu;
u32 *ctrl_val;
+   u32 *mbps_val;
u32 new_ctrl;
boolhave_new_ctrl;
 };
@@ -448,6 +450,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom,
unsigned long delay_ms);
 void mbm_handle_overflow(struct work_struct *work);
 bool is_mba_sc(struct rdt_resource *r);
+void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
 void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
 void cqm_handle_limbo(struct work_struct *work);
 bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c 
b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index b9ceb13..bfd707d 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1055,12 +1055,15 @@ static int set_cache_qos_cfg(int level, bool enable)
 static int set_mba_sc(bool mba_sc)
 {
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
+   struct rdt_domain

[PATCH V2 0/6] Memory bandwidth allocation software controller(mba_sc)

2018-04-20 Thread Vikas Shivappa
Sending the second version of MBA software controller which addresses
the feedback on V1. Thanks to the feedback from Thomas on the V1. Thomas
was unhappy about the bad structure and english in the documentation and
comments explaining the changes and also about duct taping of data
structure which saves the throttle MSRs. Also issues were pointed out in
the mounting and other init code.
This series also changed the counting
and feedback loop patches with some improvements to not do any division
and take care of hierarchy and some l2 -> l3 traffic scenarios.

The patches are based on 4.16.

Background:

Intel RDT memory bandwidth allocation (MBA) currently uses the resctrl
interface and uses the schemata file in each rdtgroup to specify the max
"bandwidth percentage" that is allowed to be used by the "threads" and
"cpus" in the rdtgroup. These values are specified "per package" in each
rdtgroup in the schemata file as below:

$ cat /sys/fs/resctrl/p1/schemata 
L3:0=7ff;1=7ff
MB:0=100;1=50

In the above example the MB is the memory bandwidth percentage and "0"
and "1" specify the package/socket ids. The threads in rdtgroup "p1"
would get 100% memory bandwidth on socket0 and 50% bandwidth on socket1.

Problem:

However there are confusions in specifying the MBA in "percentage":

1. In some scenarios, when user increases bandwidth percentage values he
   does not not see any raw bandwidth increase in "MBps" 
2. Same bandwidth "percentage values" may mean different raw bandwidth
   in "MBps".
3. This interface may also end up unnecessarily controlling the L2 <->
   L3 traffic which has no or very minimal L3 external traffic.

Proposed solution:

In short, we let user specify the bandwidth in "MBps" and we introduce
a software feedback loop which measures the bandwidth using MBM and
restricts the bandwidth "percentage" internally.

The fact that Memory bandwidth allocation(MBA) is a core specific
mechanism where as memory bandwidth monitoring(MBM) is done at the
package level is what leads to confusion when users try to apply control
via the MBA and then monitor the bandwidth to see if the controls are
effective. Below are details on such scenarios:

1. User may *not* see increase in actual bandwidth when bandwidth
   percentage values are increased:

This can occur when aggregate L2 external bandwidth is more than L3
external bandwidth. Consider an SKL SKU with 24 cores on a package and
where L2 external  is 10GBps (hence aggregate L2 external bandwidth is
240GBps) and L3 external bandwidth is 100GBps. Now a workload with '20
threads, having 50% bandwidth, each consuming 5GBps' consumes the max L3
bandwidth of 100GBps although the percentage value specified is only 50%
<< 100%. Hence increasing the bandwidth percentage will not yield any
more bandwidth. This is because although the L2 external bandwidth still
has capacity, the L3 external bandwidth is fully used. Also note that
this would be dependent on number of cores the benchmark is run on.

2. Same bandwidth percentage may mean different actual bandwidth
   depending on # of threads:

For the same SKU in #1, a 'single thread, with 10% bandwidth' and '4
thread, with 10% bandwidth' can consume upto 10GBps and 40GBps although
they have same percentage bandwidth of 10%. This is simply because as
threads start using more cores in an rdtgroup, the actual bandwidth may
increase or vary although user specified bandwidth percentage is same.

In order to mitigate this and make the interface more user friendly,
resctrl added support for specifying the bandwidth in "MBps" as well.
The kernel underneath would use a software feedback mechanism or
a "Software Controller" which reads the actual bandwidth using MBM
counters and adjust the memory bandwidth percentages to ensure

"actual bandwidth < user specified bandwidth".

By default, the schemata would take the bandwidth percentage values
where as user can switch to the "MBA software controller" mode using
a mount option 'mba_MBps'. The schemata format is specified in the
below.

To use the feature mount the file system using mba_MBps option:
 
$ mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl

If the MBA is specified in MBps then user can enter the max bandwidth in
MBps rather than the percentage values. The default value when mounted
is max_u32.

$ echo "L3:0=3;1=c\nMB:0=1024;1=500" > /sys/fs/resctrl/p0/schemata
$ echo "L3:0=3;1=3\nMB:0=1024;1=500" > /sys/fs/resctrl/p1/schemata

In the above example the tasks in "p1" and "p0" rdtgroup
would use a max bandwidth of 1024MBps on socket0 and 500MBps on socket1.

Vikas Shivappa (6):
  x86/intel_rdt/mba_sc: Documentation for MBA software
controller(mba_sc)
  x86/intel_rdt/mba_sc: Enable/disable MBA software controller(mba_sc)
  x86/intel_rdt/mba_sc: Add initialization support
  x86/intel_rdt/mba_sc: Add schemata support
  x86/intel_rdt/mba_sc: Prepare for feedback loop
  x86/intel_rdt/mba_sc: Feedback loop to dynamically update mem
bandwidth

 Do

[PATCH 5/6] x86/intel_rdt/mba_sc: Prepare for feedback loop

2018-04-20 Thread Vikas Shivappa
This is a preparatory patch for the mba feedback loop. We add support to
measure the "bandwidth in MBps" and the "delta bandwidth". We measure it
reading the MBM IA32_QM_CTR MSRs and calculating the amount of "bytes"
moved. There is no user interface for this and will only be used by the
feedback loop patch.

Signed-off-by: Vikas Shivappa 
---
 arch/x86/kernel/cpu/intel_rdt.h | 10 
 arch/x86/kernel/cpu/intel_rdt_monitor.c | 45 -
 2 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 91cc310..66a0ba3 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -180,10 +180,20 @@ struct rftype {
  * struct mbm_state - status for each MBM counter in each domain
  * @chunks:Total data moved (multiply by rdt_group.mon_scale to get bytes)
  * @prev_msr   Value of IA32_QM_CTR for this RMID last time we read it
+ * @chunks_bw  Total local data moved. Used for bandwidth calculation
+ * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
+ * @prev_bwThe most recent bandwidth in MBps
+ * @delta_bw   Difference between the current and previous bandwidth
+ * @delta_comp Indicates whether to compute the delta_bw
  */
 struct mbm_state {
u64 chunks;
u64 prev_msr;
+   u64 chunks_bw;
+   u64 prev_bw_msr;
+   u32 prev_bw;
+   u32 delta_bw;
+   booldelta_comp;
 };
 
 /**
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c 
b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 681450e..32c9e55 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -225,10 +225,18 @@ void free_rmid(u32 rmid)
list_add_tail(&entry->list, &rmid_free_lru);
 }
 
+static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr)
+{
+   u64 shift = 64 - MBM_CNTR_WIDTH, chunks;
+
+   chunks = (cur_msr << shift) - (prev_msr << shift);
+   return chunks >>= shift;
+}
+
 static int __mon_event_count(u32 rmid, struct rmid_read *rr)
 {
-   u64 chunks, shift, tval;
struct mbm_state *m;
+   u64 chunks, tval;
 
tval = __rmid_read(rmid, rr->evtid);
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
@@ -254,14 +262,12 @@ static int __mon_event_count(u32 rmid, struct rmid_read 
*rr)
}
 
if (rr->first) {
-   m->prev_msr = tval;
-   m->chunks = 0;
+   memset(m, 0, sizeof(struct mbm_state));
+   m->prev_bw_msr = m->prev_msr = tval;
return 0;
}
 
-   shift = 64 - MBM_CNTR_WIDTH;
-   chunks = (tval << shift) - (m->prev_msr << shift);
-   chunks >>= shift;
+   chunks = mbm_overflow_count(m->prev_msr, tval);
m->chunks += chunks;
m->prev_msr = tval;
 
@@ -270,6 +276,33 @@ static int __mon_event_count(u32 rmid, struct rmid_read 
*rr)
 }
 
 /*
+ * Supporting function to calculate the memory bandwidth
+ * and delta bandwidth in MBps.
+ */
+static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
+{
+   struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
+   struct mbm_state *m = &rr->d->mbm_local[rmid];
+   u64 tval, cur_bw, chunks;
+
+   tval = __rmid_read(rmid, rr->evtid);
+   if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
+   return;
+   }
+
+   chunks = mbm_overflow_count(m->prev_bw_msr, tval);
+   m->chunks_bw += chunks;
+   m->chunks = m->chunks_bw;
+   cur_bw = (chunks * r->mon_scale) >> 20;
+
+   if (m->delta_comp)
+   m->delta_bw = abs(cur_bw - m->prev_bw);
+   m->delta_comp = false;
+   m->prev_bw = cur_bw;
+   m->prev_bw_msr = tval;
+}
+
+/*
  * This is called via IPI to read the CQM/MBM counters
  * on a domain.
  */
-- 
1.9.1



[PATCH 2/6] x86/intel_rdt/mba_sc: Enable/disable MBA software controller

2018-04-20 Thread Vikas Shivappa
Currently user does memory bandwidth allocation(MBA) by specifying the
bandwidth in percentage via the resctrl schemata file:
"/sys/fs/resctrl/schemata"

Add a new mount option "mba_MBps" to enable the user to specify MBA
in MBps:

$mount -t resctrl resctrl [-o cdp[,cdpl2][mba_MBps]] /sys/fs/resctrl

Signed-off-by: Vikas Shivappa 
---
 arch/x86/kernel/cpu/intel_rdt.c  |  8 
 arch/x86/kernel/cpu/intel_rdt.h  |  3 +++
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 30 ++
 3 files changed, 41 insertions(+)

diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 2b65601..2a0931f 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -230,6 +230,14 @@ static inline void cache_alloc_hsw_probe(void)
rdt_alloc_capable = true;
 }
 
+bool is_mba_sc(struct rdt_resource *r)
+{
+   if (!r)
+   return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc;
+
+   return r->membw.mba_sc;
+}
+
 /*
  * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
  * exposed to user interface and the h/w understandable delay values.
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 3fd7a70..74aee0f 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -259,6 +259,7 @@ struct rdt_cache {
  * @min_bw:Minimum memory bandwidth percentage user can request
  * @bw_gran:   Granularity at which the memory bandwidth is allocated
  * @delay_linear:  True if memory B/W delay is in linear scale
+ * @mba_sc:True if MBA software controller(mba_sc) is enabled
  * @mb_map:Mapping of memory B/W percentage to memory B/W delay
  */
 struct rdt_membw {
@@ -266,6 +267,7 @@ struct rdt_membw {
u32 min_bw;
u32 bw_gran;
u32 delay_linear;
+   boolmba_sc;
u32 *mb_map;
 };
 
@@ -445,6 +447,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_domain 
*d,
 void mbm_setup_overflow_handler(struct rdt_domain *dom,
unsigned long delay_ms);
 void mbm_handle_overflow(struct work_struct *work);
+bool is_mba_sc(struct rdt_resource *r);
 void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
 void cqm_handle_limbo(struct work_struct *work);
 bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c 
b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index fca759d..b9ceb13 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1005,6 +1005,11 @@ static void l2_qos_cfg_update(void *arg)
wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
 }
 
+static inline bool is_mba_linear(void)
+{
+   return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
+}
+
 static int set_cache_qos_cfg(int level, bool enable)
 {
void (*update)(void *arg);
@@ -1041,6 +1046,25 @@ static int set_cache_qos_cfg(int level, bool enable)
return 0;
 }
 
+/*
+ * Enable or disable the MBA software controller
+ * which helps user specify bandwidth in MBps.
+ * MBA software controller is supported only if
+ * MBM is supported and MBA is in linear scale.
+ */
+static int set_mba_sc(bool mba_sc)
+{
+   struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
+
+   if (!is_mbm_enabled() || !is_mba_linear() ||
+   mba_sc == is_mba_sc(r))
+   return -1;
+
+   r->membw.mba_sc = mba_sc;
+
+   return 0;
+}
+
 static int cdp_enable(int level, int data_type, int code_type)
 {
struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
@@ -1123,6 +1147,10 @@ static int parse_rdtgroupfs_options(char *data)
ret = cdpl2_enable();
if (ret)
goto out;
+   } else if (!strcmp(token, "mba_MBps")) {
+   ret = set_mba_sc(true);
+   if (ret)
+   goto out;
} else {
ret = -EINVAL;
goto out;
@@ -1445,6 +1473,8 @@ static void rdt_kill_sb(struct super_block *sb)
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
 
+   set_mba_sc(false);
+
/*Put everything back to default values. */
for_each_alloc_enabled_rdt_resource(r)
reset_all_ctrls(r);
-- 
1.9.1



[PATCH 4/6] x86/intel_rdt/mba_sc: Add schemata support

2018-04-20 Thread Vikas Shivappa
Currently when user updates the "schemata" with new MBA percentage
values, kernel writes the corresponding bandwidth percentage values to
the IA32_MBA_THRTL_MSR.

When MBA is expressed in MBps, the schemata format is changed to have
the per package memory bandwidth in MBps instead of being specified in
percentage. We do not write the IA32_MBA_THRTL_MSRs when the schemata
is updated as that is handled separately.

Signed-off-by: Vikas Shivappa 
---
 arch/x86/kernel/cpu/intel_rdt.c |  2 +-
 arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 24 +++-
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 07f6b3b..85805d7 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -179,7 +179,7 @@ struct rdt_resource rdt_resources_all[] = {
.msr_update = mba_wrmsr,
.cache_level= 3,
.parse_ctrlval  = parse_bw,
-   .format_str = "%d=%*d",
+   .format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
},
 };
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c 
b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 23e1d5c..116d57b 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -53,7 +53,8 @@ static bool bw_validate(char *buf, unsigned long *data, 
struct rdt_resource *r)
return false;
}
 
-   if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+   if ((bw < r->membw.min_bw || bw > r->default_ctrl) &&
+   !is_mba_sc(r)) {
rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
r->membw.min_bw, r->default_ctrl);
return false;
@@ -179,6 +180,8 @@ static int update_domains(struct rdt_resource *r, int 
closid)
struct msr_param msr_param;
cpumask_var_t cpu_mask;
struct rdt_domain *d;
+   bool mba_sc;
+   u32 *dc;
int cpu;
 
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
@@ -188,13 +191,20 @@ static int update_domains(struct rdt_resource *r, int 
closid)
msr_param.high = msr_param.low + 1;
msr_param.res = r;
 
+   mba_sc = is_mba_sc(r);
list_for_each_entry(d, &r->domains, list) {
-   if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) {
+   dc = !mba_sc ? d->ctrl_val : d->mbps_val;
+   if (d->have_new_ctrl && d->new_ctrl != dc[closid]) {
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
-   d->ctrl_val[closid] = d->new_ctrl;
+   dc[closid] = d->new_ctrl;
}
}
-   if (cpumask_empty(cpu_mask))
+
+   /*
+* Avoid writing the control msr with control values when
+* MBA software controller is enabled
+*/
+   if (cpumask_empty(cpu_mask) || mba_sc)
goto done;
cpu = get_cpu();
/* Update CBM on this cpu if it's in cpu_mask. */
@@ -282,13 +292,17 @@ static void show_doms(struct seq_file *s, struct 
rdt_resource *r, int closid)
 {
struct rdt_domain *dom;
bool sep = false;
+   u32 ctrl_val;
 
seq_printf(s, "%*s:", max_name_width, r->name);
list_for_each_entry(dom, &r->domains, list) {
if (sep)
seq_puts(s, ";");
+
+   ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] :
+   dom->mbps_val[closid]);
seq_printf(s, r->format_str, dom->id, max_data_width,
-  dom->ctrl_val[closid]);
+  ctrl_val);
sep = true;
}
seq_puts(s, "\n");
-- 
1.9.1



[PATCH 1/6] x86/intel_rdt/mba_sc: Documentation for MBA software controller(mba_sc)

2018-04-20 Thread Vikas Shivappa
Add documentation about the feedback loop mechanism (MBA software
controller) which lets the user specify the memory bandwidth allocation
in MBps. This includes some changes to "schemata" formati with
examples.

Signed-off-by: Vikas Shivappa 
---
 Documentation/x86/intel_rdt_ui.txt | 75 ++
 1 file changed, 67 insertions(+), 8 deletions(-)

diff --git a/Documentation/x86/intel_rdt_ui.txt 
b/Documentation/x86/intel_rdt_ui.txt
index 71c3098..a16aa21 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -17,12 +17,14 @@ MBA (Memory Bandwidth Allocation) - "mba"
 
 To use the feature mount the file system:
 
- # mount -t resctrl resctrl [-o cdp[,cdpl2]] /sys/fs/resctrl
+ # mount -t resctrl resctrl [-o cdp[,cdpl2][,mba_MBps]] /sys/fs/resctrl
 
 mount options are:
 
 "cdp": Enable code/data prioritization in L3 cache allocations.
 "cdpl2": Enable code/data prioritization in L2 cache allocations.
+"mba_MBps": Enable the MBA Software Controller(mba_sc) to specify MBA
+ bandwidth in MBps
 
 L2 and L3 CDP are controlled seperately.
 
@@ -270,10 +272,11 @@ and 0xA are not.  On a system with a 20-bit mask each bit 
represents 5%
 of the capacity of the cache. You could partition the cache into four
 equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000.
 
-Memory bandwidth(b/w) percentage
-
-For Memory b/w resource, user controls the resource by indicating the
-percentage of total memory b/w.
+Memory bandwidth Allocation and monitoring
+--
+
+For Memory bandwidth resource, by default the user controls the resource
+by indicating the percentage of total memory bandwidth.
 
 The minimum bandwidth percentage value for each cpu model is predefined
 and can be looked up through "info/MB/min_bandwidth". The bandwidth
@@ -285,7 +288,47 @@ to the next control step available on the hardware.
 The bandwidth throttling is a core specific mechanism on some of Intel
 SKUs. Using a high bandwidth and a low bandwidth setting on two threads
 sharing a core will result in both threads being throttled to use the
-low bandwidth.
+low bandwidth. The fact that Memory bandwidth allocation(MBA) is a core
+specific mechanism where as memory bandwidth monitoring(MBM) is done at
+the package level may lead to confusion when users try to apply control
+via the MBA and then monitor the bandwidth to see if the controls are
+effective. Below are such scenarios:
+
+1. User may *not* see increase in actual bandwidth when percentage
+   values are increased:
+
+This can occur when aggregate L2 external bandwidth is more than L3
+external bandwidth. Consider an SKL SKU with 24 cores on a package and
+where L2 external  is 10GBps (hence aggregate L2 external bandwidth is
+240GBps) and L3 external bandwidth is 100GBps. Now a workload with '20
+threads, having 50% bandwidth, each consuming 5GBps' consumes the max L3
+bandwidth of 100GBps although the percentage value specified is only 50%
+<< 100%. Hence increasing the bandwidth percentage will not yeild any
+more bandwidth. This is because although the L2 external bandwidth still
+has capacity, the L3 external bandwidth is fully used. Also note that
+this would be dependent on number of cores the benchmark is run on.
+
+2. Same bandwidth percentage may mean different actual bandwidth
+   depending on # of threads:
+
+For the same SKU in #1, a 'single thread, with 10% bandwidth' and '4
+thread, with 10% bandwidth' can consume upto 10GBps and 40GBps although
+they have same percentage bandwidth of 10%. This is simply because as
+threads start using more cores in an rdtgroup, the actual bandwidth may
+increase or vary although user specified bandwidth percentage is same.
+
+In order to mitigate this and make the interface more user friendly,
+resctrl added support for specifying the bandwidth in MBps as well.  The
+kernel underneath would use a software feedback mechanism or a "Software
+Controller(mba_sc)" which reads the actual bandwidth using MBM counters
+and adjust the memowy bandwidth percentages to ensure
+
+   "actual bandwidth < user specified bandwidth".
+
+By default, the schemata would take the bandwidth percentage values
+where as user can switch to the "MBA software controller" mode using
+a mount option 'mba_MBps'. The schemata format is specified in the below
+sections.
 
 L3 schemata file details (code and data prioritization disabled)
 
@@ -308,13 +351,20 @@ schemata format is always:
 
L2:=;=;...
 
-Memory b/w Allocation details
--
+Memory bandwidth Allocation (default mode)
+--
 
 Memory b/w domain is L3 cache.
 
MB:=bandwidth0;=bandwidth1;...
 
+Memory bandwidth Allocation specified in MBps
+-
+
+Memory bandwidth domain is L3 cache.
+
+   MB:=bw_MB

[PATCH 6/6] x86/intel_rdt/mba_sc: Feedback loop to dynamically update mem bandwidth

2018-04-20 Thread Vikas Shivappa
mba_sc is a feedback loop where we periodically read MBM counters and
try to restrict the bandwidth below a max value so the below is always
true:

  "current bandwidth(cur_bw) < user specified bandwidth(user_bw)"

The frequency of these checks is currently 1s and we just tag along the
MBM overflow timer to do the updates. Doing it once in a second also
makes the calculation of bandwidth easy. The steps of increase or
decrease of bandwidth is the minimum granularity specified by the
hardware.

Although the MBA's goal is to restrict the bandwidth below a maximum,
there may be a need to even increase the bandwidth. Since MBA controls
the L2 external bandwidth where as MBM measures the L3 external
bandwidth, we may end up restricting some rdtgroups unnecessarily. This
may happen in the sequence where rdtgroup (set of jobs) had high
"L3 <-> memory traffic" in initial phases -> mba_sc kicks in and reduced
bandwidth percentage values -> but after some it has mostly "L2 <-> L3"
traffic. In this scenario mba_sc increases the bandwidth percentage when
there is lesser memory traffic.

Signed-off-by: Vikas Shivappa 
---
 arch/x86/kernel/cpu/intel_rdt.c |   3 +-
 arch/x86/kernel/cpu/intel_rdt.h |   2 +
 arch/x86/kernel/cpu/intel_rdt_monitor.c | 121 +++-
 3 files changed, 123 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 85805d7..6dcd93b 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -33,7 +33,6 @@
 #include 
 #include "intel_rdt.h"
 
-#define MAX_MBA_BW 100u
 #define MBA_IS_LINEAR  0x4
 #define MBA_MAX_MBPS   U32_MAX
 
@@ -350,7 +349,7 @@ static int get_cache_id(int cpu, int level)
  * that can be written to QOS_MSRs.
  * There are currently no SKUs which support non linear delay values.
  */
-static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
+u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
 {
if (r->membw.delay_linear)
return MAX_MBA_BW - bw;
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 66a0ba3..3975282 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -28,6 +28,7 @@
 
 #define MBM_CNTR_WIDTH 24
 #define MBM_OVERFLOW_INTERVAL  1000
+#define MAX_MBA_BW 100u
 
 #define RMID_VAL_ERROR BIT_ULL(63)
 #define RMID_VAL_UNAVAIL   BIT_ULL(62)
@@ -461,6 +462,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom,
 void mbm_handle_overflow(struct work_struct *work);
 bool is_mba_sc(struct rdt_resource *r);
 void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
+u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
 void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
 void cqm_handle_limbo(struct work_struct *work);
 bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c 
b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 32c9e55..5a26c1c 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -330,6 +330,113 @@ void mon_event_count(void *info)
}
 }
 
+/*
+ * This implements feedback loop for MBA software controller(mba_sc)
+ *
+ * mba_sc is a feedback loop where we periodically read MBM counters
+ * and adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs
+ * so that:
+ * "current bandwdith(cur_bw) < user specified bandwidth(user_bw)".
+ * We can simply use the MBM counters to measure the bandwidth and
+ * use MBA throttle MSRs to control the bandwidth for a particular
+ * rdtgrp because we use the same resctrl rdtgroup for both monitoring
+ * and control now as suggested by Thomas Gleixner.
+ *
+ * The frequency of the checks is 1s and we just tag along the
+ * MBM overflow timer. Having 1s interval makes the calculation of
+ * bandwidth simpler.
+ *
+ * Although MBA's goal is to restrict the bandwidth to a maximum,
+ * there may be a need to increase the bandwidth to avoid
+ * uncecessarily restricting the L2 <-> L3 traffic.
+ * Since MBA controls the L2 external bandwidth where as
+ * MBM measures the L3 external bandwidth the following sequence
+ * could lead to such a situation.
+ * Consider an rdtgroup which had high L3 <-> memory traffic
+ * in initial phases -> mba_sc kicks in and reduced bandwidth
+ * percentage values -> but after some time rdtgroup has mostly
+ * L2 <-> L3 traffic.
+ * In this case we may restrict the rdtgroup's L2 <-> L3 traffic
+ * as its throttle MSRs already have low percentage values.
+ * To avoid unnecessarily restricting such rdtgroups,we also
+ * increase the bandwidth.
+ */
+static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
+{
+   u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
+   struct mbm_state *pmbm_data, *cmbm_data;
+  

Re: [PATCH 1/2] scsi: st: Replace GFP_ATOMIC with GFP_KERNEL in st_probe

2018-04-20 Thread Martin K. Petersen

Jia-Ju,

> st_probe() is never called in atomic context.

Applied patches 1 and 2 to 4.18/scsi-queue. Thanks!

-- 
Martin K. Petersen  Oracle Linux Engineering


Re: [PATCH v4 09/22] iommu/vt-d: add svm/sva invalidate function

2018-04-20 Thread Jacob Pan
On Tue, 17 Apr 2018 13:10:45 -0600
Alex Williamson  wrote:

> On Mon, 16 Apr 2018 14:48:58 -0700
> Jacob Pan  wrote:
> 
> > When Shared Virtual Address (SVA) is enabled for a guest OS via
> > vIOMMU, we need to provide invalidation support at IOMMU API and
> > driver level. This patch adds Intel VT-d specific function to
> > implement iommu passdown invalidate API for shared virtual address.
> > 
> > The use case is for supporting caching structure invalidation
> > of assigned SVM capable devices. Emulated IOMMU exposes queue
> > invalidation capability and passes down all descriptors from the
> > guest to the physical IOMMU.
> > 
> > The assumption is that guest to host device ID mapping should be
> > resolved prior to calling IOMMU driver. Based on the device handle,
> > host IOMMU driver can replace certain fields before submit to the
> > invalidation queue.
> > 
> > Signed-off-by: Liu, Yi L 
> > Signed-off-by: Ashok Raj 
> > Signed-off-by: Jacob Pan 
> > ---
> >  drivers/iommu/intel-iommu.c | 170
> >  1 file changed, 170
> > insertions(+)
> > 
> > diff --git a/drivers/iommu/intel-iommu.c
> > b/drivers/iommu/intel-iommu.c index cae4042..c765448 100644
> > --- a/drivers/iommu/intel-iommu.c
> > +++ b/drivers/iommu/intel-iommu.c
> > @@ -4973,6 +4973,175 @@ static void
> > intel_iommu_detach_device(struct iommu_domain *domain,
> > dmar_remove_one_dev_info(to_dmar_domain(domain), dev); }
> >  
> > +/*
> > + * 3D array for converting IOMMU generic type-granularity to VT-d
> > granularity
> > + * X indexed by enum iommu_inv_type
> > + * Y indicates request without and with PASID
> > + * Z indexed by enum iommu_inv_granularity
> > + *
> > + * For an example, if we want to find the VT-d granularity
> > encoding for IOTLB
> > + * type, DMA request with PASID, and page selective. The look up
> > indices are:
> > + * [1][1][8], where
> > + * 1: IOMMU_INV_TYPE_TLB
> > + * 1: with PASID
> > + * 8: IOMMU_INV_GRANU_PAGE_PASID
> > + *
> > + * Granu_map array indicates validity of the table. 1: valid, 0:
> > invalid
> > + *
> > + */
> > +const static int
> > inv_type_granu_map[IOMMU_INV_NR_TYPE][2][IOMMU_INV_NR_GRANU] = {
> > +   /* extended dev IOTLBs, for dev-IOTLB, only global is
> > valid,
> > +  for dev-EXIOTLB, two valid granu */
> > +   {
> > +   {1},
> > +   {0, 0, 0, 0, 1, 1, 0, 0, 0}
> > +   },
> > +   /* IOTLB and EIOTLB */
> > +   {
> > +   {1, 1, 0, 1, 0, 0, 0, 0, 0},
> > +   {0, 0, 0, 0, 1, 0, 1, 1, 1}
> > +   },
> > +   /* PASID cache */
> > +   {
> > +   {0},
> > +   {0, 0, 0, 0, 1, 1, 0, 0, 0}
> > +   },
> > +   /* context cache */
> > +   {
> > +   {1, 1, 1}
> > +   }
> > +};
> > +
> > +const static u64
> > inv_type_granu_table[IOMMU_INV_NR_TYPE][2][IOMMU_INV_NR_GRANU] = {
> > +   /* extended dev IOTLBs, only global is valid */
> > +   {
> > +   {QI_DEV_IOTLB_GRAN_ALL},
> > +   {0, 0, 0, 0, QI_DEV_IOTLB_GRAN_ALL,
> > QI_DEV_IOTLB_GRAN_PASID_SEL, 0, 0, 0}
> > +   },
> > +   /* IOTLB and EIOTLB */
> > +   {
> > +   {DMA_TLB_GLOBAL_FLUSH, DMA_TLB_DSI_FLUSH, 0,
> > DMA_TLB_PSI_FLUSH},
> > +   {0, 0, 0, 0, QI_GRAN_ALL_ALL, 0, QI_GRAN_NONG_ALL,
> > QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID}
> > +   },
> > +   /* PASID cache */
> > +   {
> > +   {0},
> > +   {0, 0, 0, 0, QI_PC_ALL_PASIDS, QI_PC_PASID_SEL}
> > +   },
> > +   /* context cache */
> > +   {
> > +   {DMA_CCMD_GLOBAL_INVL, DMA_CCMD_DOMAIN_INVL,
> > DMA_CCMD_DEVICE_INVL}
> > +   }
> > +};
> > +
> > +static inline int to_vtd_granularity(int type, int granu, int
> > with_pasid, u64 *vtd_granu) +{
> > +   if (type >= IOMMU_INV_NR_TYPE || granu >=
> > IOMMU_INV_NR_GRANU || with_pasid > 1)
> > +   return -EINVAL;
> > +
> > +   if (inv_type_granu_map[type][with_pasid][granu] == 0)
> > +   return -EINVAL;
> > +
> > +   *vtd_granu = inv_type_granu_table[type][with_pasid][granu];
> > +
> > +   return 0;
> > +}
> > +
> > +static int intel_iommu_sva_invalidate(struct iommu_domain *domain,
> > +   struct device *dev, struct tlb_invalidate_info
> > *inv_info)  
> 
> inv_info->hdr.version is never checked, why do we have these if
> they're not used?
> 
the version was added to leave room for future extension. you are
right, it should be checked.
> > +{
> > +   struct intel_iommu *iommu;
> > +   struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> > +   struct device_domain_info *info;
> > +   u16 did, sid;
> > +   u8 bus, devfn;
> > +   int ret = 0;
> > +   u64 granu;
> > +   unsigned long flags;
> > +
> > +   if (!inv_info || !dmar_domain)
> > +   return -EINVAL;
> > +
> > +   iommu = device_to_iommu(dev, &bus, &devfn);
> > +   if (!iommu)
> > +   return -ENODEV;
> > +
> > +   if (!dev || !dev_is_pci(dev))
> > +   return -ENODEV;
> > +
> > +   did = dmar_domain->iommu_did[iommu->seq_id];
> > +   sid = PCI_DEVID(bus, devfn);
> > +   ret = to_vtd_granularity

[PATCH 4/5] x86, pti: disallow global kernel text with RANDSTRUCT

2018-04-20 Thread Dave Hansen

I believe this was originally reported by the grsecurity team who
tweeted about it (link below).

RANDSTRUCT derives its hardening benefits from the attacker's lack of
knowledge about the layout of kernel data structures.  Keep the kernel
image non-global in cases where RANDSTRUCT is in use to help keep the
layout a secret.

Signed-off-by: Dave Hansen 
Reported-by: Kees Cook 
Link: https://twitter.com/grsecurity/status/985678720630476800
Fixes: 8c06c7740 (x86/pti: Leave kernel text global for !PCID)
Cc: Andrea Arcangeli 
Cc: Andy Lutomirski 
Cc: Arjan van de Ven 
Cc: Borislav Petkov 
Cc: Dan Williams 
Cc: David Woodhouse 
Cc: Greg Kroah-Hartman 
Cc: Hugh Dickins 
Cc: Josh Poimboeuf 
Cc: Juergen Gross 
Cc: Kees Cook 
Cc: Linus Torvalds 
Cc: Nadav Amit 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Vlastimil Babka 
Cc: linux...@kvack.org
---

 b/arch/x86/mm/pti.c |   10 ++
 1 file changed, 10 insertions(+)

diff -puN arch/x86/mm/pti.c~pti-glb-disable-with-compile-options 
arch/x86/mm/pti.c
--- a/arch/x86/mm/pti.c~pti-glb-disable-with-compile-options2018-04-20 
14:10:02.702749165 -0700
+++ b/arch/x86/mm/pti.c 2018-04-20 14:10:02.706749165 -0700
@@ -421,6 +421,16 @@ static inline bool pti_kernel_image_glob
if (boot_cpu_has(X86_FEATURE_K8))
return false;
 
+   /*
+* RANDSTRUCT derives its hardening benefits from the
+* attacker's lack of knowledge about the layout of kernel
+* data structures.  Keep the kernel image non-global in
+* cases where RANDSTRUCT is in use to help keep the layout a
+* secret.
+*/
+   if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT))
+   return false;
+
return true;
 }
 
_


[PATCH 0/5] x86, mm: PTI Global page fixes for 4.17

2018-04-20 Thread Dave Hansen
There have been a number of reports about issues with the patches that
restore the Global PTE bit when using KPIT.  This set resolves all of
the issues that have been reported.

These have been pushed out to a git tree where 0day should be chewing
on them.  Considering the troubles thus far, we should probably wait
for 0day to spend _some_ time on these fixes before these get merged.

Cc: Aaro Koskinen 
Cc: Andrea Arcangeli 
Cc: Andy Lutomirski 
Cc: Arjan van de Ven 
Cc: Borislav Petkov 
Cc: Dan Williams 
Cc: David Woodhouse 
Cc: Fengguang Wu 
Cc: Greg Kroah-Hartman 
Cc: Hugh Dickins 
Cc: Ingo Molnar 
Cc: Josh Poimboeuf 
Cc: Juergen Gross 
Cc: Kees Cook 
Cc: Linus Torvalds 
Cc: Mariusz Ceier 
Cc: Nadav Amit 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Vlastimil Babka 
Cc: linux...@kvack.org


[PATCH 2/5] x86, pti: fix boot warning from Global-bit setting

2018-04-20 Thread Dave Hansen

From: Dave Hansen 

The pageattr.c code attempts to process "faults" when it goes looking
for PTEs to change and finds non-present entries.  It allows these
faults in the linear map which is "expected to have holes", but
WARN()s about them elsewhere, like when called on the kernel image.

However, we are now calling change_page_attr_clear() on the kernel
image in the process of trying to clear the Global bit.

This trips the warning in __cpa_process_fault() if a non-present PTE is
encountered in the kernel image.  The "holes" in the kernel image
result from free_init_pages()'s use of set_memory_np().  These holes
are totally fine, and result from normal operation, just as they would
be in the kernel linear map.

Just silence the warning when holes in the kernel image are encountered.

Signed-off-by: Dave Hansen 
Fixes: 39114b7a7 (x86/pti: Never implicitly clear _PAGE_GLOBAL for kernel image)
Reported-by: Mariusz Ceier 
Reported-by: Aaro Koskinen 
Cc: Andrea Arcangeli 
Cc: Andy Lutomirski 
Cc: Arjan van de Ven 
Cc: Borislav Petkov 
Cc: Dan Williams 
Cc: David Woodhouse 
Cc: Greg Kroah-Hartman 
Cc: Hugh Dickins 
Cc: Josh Poimboeuf 
Cc: Juergen Gross 
Cc: Kees Cook 
Cc: Linus Torvalds 
Cc: Nadav Amit 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux...@kvack.org
---

 b/arch/x86/mm/pageattr.c |   41 +++--
 1 file changed, 31 insertions(+), 10 deletions(-)

diff -puN arch/x86/mm/pageattr.c~pti-glb-warning-inpageattr 
arch/x86/mm/pageattr.c
--- a/arch/x86/mm/pageattr.c~pti-glb-warning-inpageattr 2018-04-20 
14:10:01.619749168 -0700
+++ b/arch/x86/mm/pageattr.c2018-04-20 14:10:01.623749168 -0700
@@ -93,6 +93,18 @@ void arch_report_meminfo(struct seq_file
 static inline void split_page_count(int level) { }
 #endif
 
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
+{
+   return addr >= start && addr < end;
+}
+
+static inline int
+within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
+{
+   return addr >= start && addr <= end;
+}
+
 #ifdef CONFIG_X86_64
 
 static inline unsigned long highmap_start_pfn(void)
@@ -106,20 +118,26 @@ static inline unsigned long highmap_end_
return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
 }
 
-#endif
-
-static inline int
-within(unsigned long addr, unsigned long start, unsigned long end)
+static bool __cpa_pfn_in_highmap(unsigned long pfn)
 {
-   return addr >= start && addr < end;
+   /*
+* Kernel text has an alias mapping at a high address, known
+* here as "highmap".
+*/
+   return within_inclusive(pfn, highmap_start_pfn(),
+   highmap_end_pfn());
 }
 
-static inline int
-within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
+#else
+
+static bool __cpa_pfn_in_highmap(unsigned long pfn)
 {
-   return addr >= start && addr <= end;
+   /* There is no highmap on 32-bit */
+   return false;
 }
 
+#endif
+
 /*
  * Flushing functions
  */
@@ -1183,6 +1201,10 @@ static int __cpa_process_fault(struct cp
cpa->numpages = 1;
cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
return 0;
+
+   } else if (__cpa_pfn_in_highmap(cpa->pfn)) {
+   /* Faults in the highmap are OK, so do not warn: */
+   return -EFAULT;
} else {
WARN(1, KERN_WARNING "CPA: called for zero pte. "
"vaddr = %lx cpa->vaddr = %lx\n", vaddr,
@@ -1335,8 +1357,7 @@ static int cpa_process_alias(struct cpa_
 * to touch the high mapped kernel as well:
 */
if (!within(vaddr, (unsigned long)_text, _brk_end) &&
-   within_inclusive(cpa->pfn, highmap_start_pfn(),
-highmap_end_pfn())) {
+   __cpa_pfn_in_highmap(cpa->pfn)) {
unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
   __START_KERNEL_map - phys_base;
alias_cpa = *cpa;
_


[PATCH 3/5] x86, pti: reduce amount of kernel text allowed to be Global

2018-04-20 Thread Dave Hansen

Kees reported to me that I made too much of the kernel image global.
It was far more than just text:

I think this is too much set global: _end is after data,
bss, and brk, and all kinds of other stuff that could
hold secrets. I think this should match what
mark_rodata_ro() is doing.

This does exactly that.  We use __end_rodata_hpage_align as our
marker both because it is huge-page-aligned and it does not contain
any sections we expect to hold secrets.

Kees's logic was that r/o data is in the kernel image anyway and,
in the case of traditional distributions, can be freely downloaded
from the web, so there's no reason to hide it.

Signed-off-by: Dave Hansen 
Reported-by: Kees Cook 
Fixes: 8c06c7740 (x86/pti: Leave kernel text global for !PCID)
Cc: Andrea Arcangeli 
Cc: Andy Lutomirski 
Cc: Arjan van de Ven 
Cc: Borislav Petkov 
Cc: Dan Williams 
Cc: David Woodhouse 
Cc: Greg Kroah-Hartman 
Cc: Hugh Dickins 
Cc: Josh Poimboeuf 
Cc: Juergen Gross 
Cc: Kees Cook 
Cc: Linus Torvalds 
Cc: Nadav Amit 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux...@kvack.org
---

 b/arch/x86/mm/pti.c |   16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff -puN arch/x86/mm/pti.c~pti-glb-too-much-mapped arch/x86/mm/pti.c
--- a/arch/x86/mm/pti.c~pti-glb-too-much-mapped 2018-04-20 14:10:02.164749166 
-0700
+++ b/arch/x86/mm/pti.c 2018-04-20 14:10:02.168749166 -0700
@@ -430,12 +430,24 @@ static inline bool pti_kernel_image_glob
  */
 void pti_clone_kernel_text(void)
 {
+   /*
+* rodata is part of the kernel image and is normally
+* readable on the filesystem or on the web.  But, do not
+* clone the areas past rodata, they might contain secrets.
+*/
unsigned long start = PFN_ALIGN(_text);
-   unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
+   unsigned long end = (unsigned long)__end_rodata_hpage_align;
 
if (!pti_kernel_image_global_ok())
return;
 
+   pr_debug("mapping partial kernel image into user address space\n");
+
+   /*
+* Note that this will undo _some_ of the work that
+* pti_set_kernel_image_nonglobal() did to clear the
+* global bit.
+*/
pti_clone_pmds(start, end, _PAGE_RW);
 }
 
@@ -458,8 +470,6 @@ void pti_set_kernel_image_nonglobal(void
if (pti_kernel_image_global_ok())
return;
 
-   pr_debug("set kernel image non-global\n");
-
set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
 }
 
_


[PATCH 5/5] x86, pti: filter at vma->vm_page_prot population

2018-04-20 Thread Dave Hansen

From: Dave Hansen 

0day reported warnings at boot on 32-bit systems without NX support:

 [   12.349193] attempted to set unsupported pgprot: 8025 bits: 
8000 supported: 7fff
 [   12.350792] WARNING: CPU: 0 PID: 1 at arch/x86/include/asm/pgtable.h:540 
handle_mm_fault+0xfc1/0xfe0:
check_pgprot at 
arch/x86/include/asm/pgtable.h:535
 (inlined by) pfn_pte at 
arch/x86/include/asm/pgtable.h:549
 (inlined by) do_anonymous_page 
at mm/memory.c:3169
 (inlined by) handle_pte_fault 
at mm/memory.c:3961
 (inlined by) __handle_mm_fault 
at mm/memory.c:4087
 (inlined by) handle_mm_fault 
at mm/memory.c:4124

The problem was that we stopped massaging page permissions at PTE creation
time, so vma->vm_page_prot was passed unfiltered to PTE creation.

To fix it, filter the page protections before they are installed in
vma->vm_page_prot.

Signed-off-by: Dave Hansen 
Reported-by: Fengguang Wu 
Fixes: fb43d6cb91 ("x86/mm: Do not auto-massage page protections")
Cc: Andrea Arcangeli 
Cc: Andy Lutomirski 
Cc: Arjan van de Ven 
Cc: Borislav Petkov 
Cc: Dan Williams 
Cc: David Woodhouse 
Cc: Greg Kroah-Hartman 
Cc: Hugh Dickins 
Cc: Josh Poimboeuf 
Cc: Juergen Gross 
Cc: Kees Cook 
Cc: Linus Torvalds 
Cc: Nadav Amit 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux...@kvack.org
Cc: Ingo Molnar 
---

 b/arch/x86/Kconfig   |4 
 b/arch/x86/include/asm/pgtable.h |5 +
 b/mm/mmap.c  |   11 ++-
 3 files changed, 19 insertions(+), 1 deletion(-)

diff -puN arch/x86/include/asm/pgtable.h~pti-glb-protection_map 
arch/x86/include/asm/pgtable.h
--- a/arch/x86/include/asm/pgtable.h~pti-glb-protection_map 2018-04-20 
14:10:08.251749151 -0700
+++ b/arch/x86/include/asm/pgtable.h2018-04-20 14:10:08.260749151 -0700
@@ -601,6 +601,11 @@ static inline pgprot_t pgprot_modify(pgp
 
 #define canon_pgprot(p) __pgprot(massage_pgprot(p))
 
+static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
+{
+   return canon_pgprot(prot);
+}
+
 static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
 enum page_cache_mode pcm,
 enum page_cache_mode new_pcm)
diff -puN arch/x86/Kconfig~pti-glb-protection_map arch/x86/Kconfig
--- a/arch/x86/Kconfig~pti-glb-protection_map   2018-04-20 14:10:08.253749151 
-0700
+++ b/arch/x86/Kconfig  2018-04-20 14:10:08.260749151 -0700
@@ -52,6 +52,7 @@ config X86
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
+   select ARCH_HAS_FILTER_PGPROT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOVif X86_64
@@ -273,6 +274,9 @@ config ARCH_HAS_CPU_RELAX
 config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
 
+config ARCH_HAS_FILTER_PGPROT
+   def_bool y
+
 config HAVE_SETUP_PER_CPU_AREA
def_bool y
 
diff -puN mm/mmap.c~pti-glb-protection_map mm/mmap.c
--- a/mm/mmap.c~pti-glb-protection_map  2018-04-20 14:10:08.256749151 -0700
+++ b/mm/mmap.c 2018-04-20 14:10:08.261749151 -0700
@@ -100,11 +100,20 @@ pgprot_t protection_map[16] __ro_after_i
__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
 };
 
+#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
+static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
+{
+   return prot;
+}
+#endif
+
 pgprot_t vm_get_page_prot(unsigned long vm_flags)
 {
-   return __pgprot(pgprot_val(protection_map[vm_flags &
+   pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
pgprot_val(arch_vm_get_page_prot(vm_flags)));
+
+   return arch_filter_pgprot(ret);
 }
 EXPORT_SYMBOL(vm_get_page_prot);
 
_


[PATCH 1/5] x86, pti: fix boot problems from Global-bit setting

2018-04-20 Thread Dave Hansen

From: Dave Hansen 

Part of the global bit _setting_ patches also includes clearing the
Global bit when we do not want it.  That is done with
set_memory_nonglobal(), which uses change_page_attr_clear() in
pageattr.c under the covers.

The TLB flushing code inside pageattr.c has has checks like
BUG_ON(irqs_disabled()), looking for interrupt disabling that might
cause deadlocks.  But, these also trip in early boot on certain
preempt configurations.  Just copy the existing BUG_ON() sequence from
cpa_flush_range() to the other two sites and check for early boot.

Signed-off-by: Dave Hansen 
Fixes: 39114b7a7 (x86/pti: Never implicitly clear _PAGE_GLOBAL for kernel image)
Reported-by: Mariusz Ceier 
Reported-by: Aaro Koskinen 
Cc: Andrea Arcangeli 
Cc: Andy Lutomirski 
Cc: Arjan van de Ven 
Cc: Borislav Petkov 
Cc: Dan Williams 
Cc: David Woodhouse 
Cc: Greg Kroah-Hartman 
Cc: Hugh Dickins 
Cc: Josh Poimboeuf 
Cc: Juergen Gross 
Cc: Kees Cook 
Cc: Linus Torvalds 
Cc: Nadav Amit 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux...@kvack.org
---

 b/arch/x86/mm/pageattr.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff -puN arch/x86/mm/pageattr.c~pti-glb-boot-problem-fix arch/x86/mm/pageattr.c
--- a/arch/x86/mm/pageattr.c~pti-glb-boot-problem-fix   2018-04-20 
14:10:01.086749169 -0700
+++ b/arch/x86/mm/pageattr.c2018-04-20 14:10:01.090749169 -0700
@@ -172,7 +172,7 @@ static void __cpa_flush_all(void *arg)
 
 static void cpa_flush_all(unsigned long cache)
 {
-   BUG_ON(irqs_disabled());
+   BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
 
on_each_cpu(__cpa_flush_all, (void *) cache, 1);
 }
@@ -236,7 +236,7 @@ static void cpa_flush_array(unsigned lon
unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
 #endif
 
-   BUG_ON(irqs_disabled());
+   BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
 
on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
 
_


Re: [RFC PATCH 00/79] Generic page write protection and a solution to page waitqueue

2018-04-20 Thread Jerome Glisse
On Fri, Apr 20, 2018 at 12:57:41PM -0700, Tim Chen wrote:
> On 04/04/2018 12:17 PM, jgli...@redhat.com wrote:
> > From: Jérôme Glisse 
> > 
> > https://cgit.freedesktop.org/~glisse/linux/log/?h=generic-write-protection-rfc
> > 
> > This is an RFC for LSF/MM discussions. It impacts the file subsystem,
> > the block subsystem and the mm subsystem. Hence it would benefit from
> > a cross sub-system discussion.
> > 
> > Patchset is not fully bake so take it with a graint of salt. I use it
> > to illustrate the fact that it is doable and now that i did it once i
> > believe i have a better and cleaner plan in my head on how to do this.
> > I intend to share and discuss it at LSF/MM (i still need to write it
> > down). That plan lead to quite different individual steps than this
> > patchset takes and his also easier to split up in more manageable
> > pieces.
> > 
> > I also want to apologize for the size and number of patches (and i am
> > not even sending them all).
> > 
> > --
> > The Why ?
> > 
> > I have two objectives: duplicate memory read only accross nodes and or
> > devices and work around PCIE atomic limitations. More on each of those
> > objective below. I also want to put forward that it can solve the page
> > wait list issue ie having each page with its own wait list and thus
> > avoiding long wait list traversale latency recently reported [1].
> > 
> > It does allow KSM for file back pages (truely generic KSM even between
> > both anonymous and file back page). I am not sure how useful this can
> > be, this was not an objective i did pursue, this is just a for free
> > feature (see below).
> > 
> > [1] https://groups.google.com/forum/#!topic/linux.kernel/Iit1P5BNyX8
> > 
> > --
> > Per page wait list, so long page_waitqueue() !
> > 
> > Not implemented in this RFC but below is the logic and pseudo code
> > at bottom of this email.
> > 
> > When there is a contention on struct page lock bit, the caller which
> > is trying to lock the page will add itself to a waitqueue. The issues
> > here is that multiple pages share the same wait queue and on large
> > system with a lot of ram this means we can quickly get to a long list
> > of waiters for differents pages (or for the same page) on the same
> > list [1].
> 
> Your approach seems useful if there are lots of locked pages sharing
> the same wait queue.  
> 
> That said, in the original workload from our customer with the long wait queue
> problem, there was a single super hot page getting migrated, and it
> is being accessed by all threads which caused the big log jam while they wait 
> for
> the migration to get completed.  
> With your approach, we will still likely end up with a long queue 
> in that workload even if we have per page wait queue.
> 
> Thanks.

Ok so i re-read the thread, i was writting this cover letter from memory
and i had bad recollection of your issue, so sorry.

First, do you have a way to reproduce the issue ? Something easy would
be nice :)

So what i am proposing for per page wait queue would only marginaly help
you (it might not even be mesurable in your workload). It would certainly
make the code smaller and easier to understand i believe.

Now that i have look back at your issue i think there is 2 things we
should do. First keep migration page map read only, this would at least
avoid CPU read fault. In trace you captured i wasn't able to ascertain
if this were read or write fault.

Second idea i have is about NUMA, everytime we NUMA migrate a page we
could attach a temporary struct to the page (using page->mapping). So
if we scan that page again we can inspect information about previous
migration and see if we are not over migrating that page (ie bouncing
it all over). If so we can mark the page (maybe with a page flag if we
can find one) to protect it from further migration. That temporary
struct would be remove after a while, ie autonuma would preallocate a
bunch of those and keep an LRU of them and recycle the oldest when it
needs a new one to migrate another page.


LSF/MM slots:

Michal can i get 2 slots to talk about this ? MM only discussion, one
to talk about doing migration with page map read only but write
protected while migration is happening. The other one to talk about
attaching auto NUMA tracking struct to page.

Cheers,
Jérôme


Re: [Intel-gfx] [PATCH] gpu: drm: i915: Change return type to vm_fault_t

2018-04-20 Thread Rodrigo Vivi
On Wed, Apr 18, 2018 at 08:46:44AM +0300, Jani Nikula wrote:
> On Tue, 17 Apr 2018, Souptick Joarder  wrote:
> > On 17-Apr-2018 9:45 PM, "Matthew Wilcox"  wrote:
> >>
> >> On Tue, Apr 17, 2018 at 09:14:32PM +0530, Souptick Joarder wrote:
> >> > Not exactly. The plan for these patches is to introduce new vm_fault_t
> > type
> >> > in vm_operations_struct fault handlers. It's now available in 4.17-rc1.
> > We will
> >> > push all the required drivers/filesystem changes through different
> > maintainers
> >> > to linus tree. Once everything is converted into vm_fault_t type then
> > Changing
> >> > it from a signed to an unsigned int causes GCC to warn about an
> > assignment
> >> > from an incompatible type -- int foo(void) is incompatible with
> >> > unsigned int foo(void).
> >> >
> >> > Please refer 1c8f422059ae ("mm: change return type to vm_fault_t") in
> > 4.17-rc1.
> >>
> >> I think this patch would be clearer if you did
> >>
> >> -   int ret;
> >> +   int err;
> >> +   vm_fault_t ret;
> >>
> >> Then it would be clearer to the maintainer that you're splitting apart the
> >> VM_FAULT and errno codes.
> >>
> >> Sorry for not catching this during initial review.
> >
> > Ok, I will make required changes and send v2. Sorry, even I missed this :)
> 
> I'm afraid Daniel is closer to the truth.

+1.

> My bad, sorry for the noise.

I opened this thread to add exactly question/noise ;).

So my recommendation for some next time is to make the intention clear
on the commit message itself from the begin.

> 
> BR,
> Jani.
> 
> 
> 
> >>
> >> > On Tue, Apr 17, 2018 at 8:59 PM, Jani Nikula
> >> >  wrote:
> >> > > On Tue, 17 Apr 2018, Souptick Joarder  wrote:
> >> > >> Use new return type vm_fault_t for fault handler. For
> >> > >> now, this is just documenting that the function returns
> >> > >> a VM_FAULT value rather than an errno. Once all instances
> >> > >> are converted, vm_fault_t will become a distinct type.
> >> > >>
> >> > >> Reference id -> 1c8f422059ae ("mm: change return type to
> >> > >> vm_fault_t")
> >> > >>
> >> > >> Signed-off-by: Souptick Joarder 
> >> > >> ---
> >> > >>  drivers/gpu/drm/i915/i915_drv.h |  3 ++-
> >> > >>  drivers/gpu/drm/i915/i915_gem.c | 15 ---
> >> > >>  2 files changed, 10 insertions(+), 8 deletions(-)
> >> > >>
> >> > >> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > b/drivers/gpu/drm/i915/i915_drv.h
> >> > >> index a42deeb..95b0d50 100644
> >> > >> --- a/drivers/gpu/drm/i915/i915_drv.h
> >> > >> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >> > >> @@ -51,6 +51,7 @@
> >> > >>  #include 
> >> > >>  #include 
> >> > >>  #include 
> >> > >> +#include 
> >> > >>
> >> > >>  #include "i915_params.h"
> >> > >>  #include "i915_reg.h"
> >> > >> @@ -3363,7 +3364,7 @@ int i915_gem_wait_for_idle(struct
> > drm_i915_private *dev_priv,
> >> > >>  unsigned int flags);
> >> > >>  int __must_check i915_gem_suspend(struct drm_i915_private
> > *dev_priv);
> >> > >>  void i915_gem_resume(struct drm_i915_private *dev_priv);
> >> > >> -int i915_gem_fault(struct vm_fault *vmf);
> >> > >> +vm_fault_t i915_gem_fault(struct vm_fault *vmf);
> >> > >>  int i915_gem_object_wait(struct drm_i915_gem_object *obj,
> >> > >>unsigned int flags,
> >> > >>long timeout,
> >> > >> diff --git a/drivers/gpu/drm/i915/i915_gem.c
> > b/drivers/gpu/drm/i915/i915_gem.c
> >> > >> index dd89abd..bdac690 100644
> >> > >> --- a/drivers/gpu/drm/i915/i915_gem.c
> >> > >> +++ b/drivers/gpu/drm/i915/i915_gem.c
> >> > >> @@ -1882,7 +1882,7 @@ int i915_gem_mmap_gtt_version(void)
> >> > >>   * The current feature set supported by i915_gem_fault() and thus
> > GTT mmaps
> >> > >>   * is exposed via I915_PARAM_MMAP_GTT_VERSION (see
> > i915_gem_mmap_gtt_version).
> >> > >>   */
> >> > >> -int i915_gem_fault(struct vm_fault *vmf)
> >> > >> +vm_fault_t i915_gem_fault(struct vm_fault *vmf)
> >> > >>  {
> >> > >>  #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
> >> > >>   struct vm_area_struct *area = vmf->vma;
> >> > >> @@ -1895,6 +1895,7 @@ int i915_gem_fault(struct vm_fault *vmf)
> >> > >>   pgoff_t page_offset;
> >> > >>   unsigned int flags;
> >> > >>   int ret;
> >> > >> + vm_fault_t retval;
> >> > >
> >> > > What's the point of changing the name? An unnecessary change.
> >> > >
> >> > > BR,
> >> > > Jani.
> >> > >
> >> > >>
> >> > >>   /* We don't use vmf->pgoff since that has the fake offset */
> >> > >>   page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
> >> > >> @@ -2000,7 +2001,7 @@ int i915_gem_fault(struct vm_fault *vmf)
> >> > >>* and so needs to be reported.
> >> > >>*/
> >> > >>   if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
> >> > >> - ret = VM_FAULT_SIGBUS;
> >> > >> + retval = VM_FAULT_SIGBUS;
> >> > >>   break;
> >> > >>   }
> >> > >>   case -EAGAI

Re: [PATCH v2] mtd: spi-nor: clear Winbond Extended Address Reg on switch to 3-byte addressing.

2018-04-20 Thread Boris Brezillon
On Sat, 21 Apr 2018 07:28:19 +1000
NeilBrown  wrote:

> On Fri, Apr 20 2018, Boris Brezillon wrote:
> 
> > Hi Neil,
> >
> > On Mon, 16 Apr 2018 09:42:30 +1000
> > NeilBrown  wrote:
> >  
> >> Winbond spi-nor flash 32MB and larger have an 'Extended Address
> >> Register' as one option for addressing beyond 16MB (Macronix
> >> has the same concept, Spansion has EXTADD bits in the Bank Address
> >> Register).
> >> 
> >> According to section
> >>8.2.7 Write Extended Address Register (C5h)
> >> 
> >> of the Winbond W25Q256FV data sheet (256M-BIT SPI flash)
> >> 
> >>The Extended Address Register is only effective when the device is
> >>in the 3-Byte Address Mode.  When the device operates in the 4-Byte
> >>Address Mode (ADS=1), any command with address input of A31-A24
> >>will replace the Extended Address Register values. It is
> >>recommended to check and update the Extended Address Register if
> >>necessary when the device is switched from 4-Byte to 3-Byte Address
> >>Mode.
> >> 
> >> So the documentation suggests clearing the EAR after switching to
> >> 3-byte mode.  Experimentation shows that the EAR is *always* one after
> >> the switch to 3-byte mode, so clearing the EAR is mandatory at
> >> shutdown for a subsequent 3-byte-addressed reboot to work.
> >> 
> >> Note that some SOCs (e.g. MT7621) do not assert a reset line at normal
> >> reboot, so we cannot rely on hardware reset.  The MT7621 does assert a
> >> reset line at watchdog-reset.
> >> 
> >> Signed-off-by: NeilBrown   
> >
> > We should probably backport the fix. Can you add a Fixes and Cc-stable
> > tag?  
> 
> It's a bit weird having Fixes when this isn't a regression, but I guess
> it doesn't hurt.

Well, I thought you wanted this patch to be backported to stable
releases, hence my suggestion. Of course it's not a regression, since
the bug is here from the beginning, but nonetheless it's fixing a bug.

> I chose
>   Fixes: 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when 
> exiting")
> as this patch it useless without that one.

Not sure that's how Fixes should be used. IIUC, it should point to the
first commit where the bug was introduced, so I guess it's 0aa87b7563f1
("mtd: m25p80: add support for the windbond w25q256 chip") in this case.

Now, if you want to restrict the kernel releases this patch should be
backported to, you can use the '# vX.Y+' suffix in your Cc-stable tag.

> 
> I also fixed the comment and have resent.

Thanks.


Re: [pci PATCH v8 2/4] ena: Migrate over to unmanaged SR-IOV support

2018-04-20 Thread Gregory Rose

On 4/20/2018 9:30 AM, Alexander Duyck wrote:

Instead of implementing our own version of a SR-IOV configuration stub in
the ena driver we can just reuse the existing
pci_sriov_configure_simple function.

Signed-off-by: Alexander Duyck 
---

v5: Replaced call to pci_sriov_configure_unmanaged with
 pci_sriov_configure_simple
v6: Dropped "#ifdef" checks for IOV wrapping sriov_configure definition
v7: No change

  drivers/net/ethernet/amazon/ena/ena_netdev.c |   28 +-
  1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c 
b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index a822e70..f2af87d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3386,32 +3386,6 @@ static int ena_probe(struct pci_dev *pdev, const struct 
pci_device_id *ent)
  }
  
  /*/

-static int ena_sriov_configure(struct pci_dev *dev, int numvfs)
-{
-   int rc;
-
-   if (numvfs > 0) {
-   rc = pci_enable_sriov(dev, numvfs);
-   if (rc != 0) {
-   dev_err(&dev->dev,
-   "pci_enable_sriov failed to enable: %d vfs with the 
error: %d\n",
-   numvfs, rc);
-   return rc;
-   }
-
-   return numvfs;
-   }
-
-   if (numvfs == 0) {
-   pci_disable_sriov(dev);
-   return 0;
-   }
-
-   return -EINVAL;
-}
-
-/*/
-/*/
  
  /* ena_remove - Device Removal Routine

   * @pdev: PCI device information struct
@@ -3526,7 +3500,7 @@ static int ena_resume(struct pci_dev *pdev)
.suspend= ena_suspend,
.resume = ena_resume,
  #endif
-   .sriov_configure = ena_sriov_configure,
+   .sriov_configure = pci_sriov_configure_simple,
  };
  
  static int __init ena_init(void)




Reviewed-by: Greg Rose 



Re: [PATCH v2 2/2] regulator: add QCOM RPMh regulator driver

2018-04-20 Thread David Collins
On 04/19/2018 09:16 AM, Doug Anderson wrote:
> On Wed, Apr 18, 2018 at 4:30 PM, David Collins  
> wrote:
 + * @drms_mode: Array of regulator framework modes which 
 can
 + * be configured dynamically for this 
 regulator
 + * via the set_load() callback.
>>>
>>> Using the singular for something that is an array is confusing.  Why
>>> not "drms_modes" or "drms_mode_arr"?  In the past review you said
>>> 'Perhaps something along the lines of "drms_modes"'.
>>
>> It seems awkward to me to use a plural for arrays as it leads to indexing
>> like this: vreg->drms_modes[i].  "mode i" seems better than "modes i".
>> However, I'm willing to change this to be drms_modes and drms_mode_max_uAs
>> if that style is preferred.
> 
> I'd very much like a plural here.

Ok.  I'll change this to be plural.


 +   prop = "qcom,regulator-initial-voltage";
 +   ret = of_property_read_u32(node, prop, &uV);
 +   if (!ret) {
 +   range = &vreg->hw_data->voltage_range;
 +   selector = DIV_ROUND_UP(uV - range->min_uV,
 +   range->uV_step) + range->min_sel;
 +   if (uV < range->min_uV || selector > 
 range->max_sel) {
 +   dev_err(dev, "%s: %s=%u is invalid\n",
 +   node->name, prop, uV);
 +   return -EINVAL;
 +   }
 +
 +   vreg->voltage_selector = selector;
 +
 +   cmd[cmd_count].addr
 +   = vreg->addr + 
 RPMH_REGULATOR_REG_VRM_VOLTAGE;
 +   cmd[cmd_count++].data
 +   = DIV_ROUND_UP(selector * range->uV_step
 +   + range->min_uV, 1000);
 +   }
>>>
>>> Seems like you want an "else { vreg->voltage_selector = -EINVAL; }".
>>> Otherwise "get_voltage_sel" will return selector 0 before the first
>>> set, right?
>>>
>>> Previously Mark said: "If the driver can't read values it should
>>> return an appropriate error code."
>>> ...and previously you said: "I'll try this out and see if the
>>> regulator framework complains during regulator registration."
>>
>> I tested out what happens when vreg->voltage_selector = -EINVAL is set
>> when qcom,regulator-initial-voltage is not present.  This results in
>> devm_regulator_register() failing and subsequently causing the
>> qcom_rpmh-regulator probe to fail.  The error happens in
>> machine_constraints_voltage() [1].
>>
>> This leaves two courses of action:
>> 1. (current patch set) allow voltage_selector to stay 0 if uninitialized
>> 2. Set voltage_selector = -EINVAL by default and specify in DT binding
>> documentation that qcom,regulator-initial-voltage is required for VRM
>> managed RPMh regulator resources which have regulator-min-microvolt and
>> regulator-max-microvolt specified.
>>
>> Are you ok with the DT implications of option #2?
> 
> You'd need to ask Mark if he's OK with it, but a option #3 is to add a
> patch to your series fix the regulator framework to try setting the
> voltage if _regulator_get_voltage() fails.  Presumably in
> machine_constraints_voltage() you'd now do something like:
> 
>   int target_min, target_max;
>   int current_uV = _regulator_get_voltage(rdev);
>   if (current_uV < 0) {
> /* Maybe this regulator's hardware can't be read and needs to be initted 
> */
> _regulator_do_set_voltage(
>   rdev, rdev->constraints->min_uV, rdev->constraints->min_uV);
> current_uV = _regulator_get_voltage(rdev);
>   }
>   if (current_uV < 0) {
> rdev_err(rdev,
>   "failed to get the current voltage(%d)\n",
>   current_uV);
>   return current_uV;
>   }
> 
> If Mark doesn't like that then I guess I'd be OK w/ initting it to 0
> but this needs to be documented _somewhere_ (unlike for bypass it's
> not obvious, so you need to find someplace to put it).  I'd rather not
> hack the DT to deal with our software limitations.

I'm not opposed to your option #3 though it does seem a little hacky and
tailored to the qcom_rpmh-regulator specific case.  Note that I think it
would be better to vote for min_uV to max_uV than min_uV to min_uV though.

Mark, what are your thoughts on the best way to handle this situation?


 +static unsigned int rpmh_regulator_pmic4_bob_of_map_mode(unsigned int 
 mode)
 +{
 +   static const unsigned int of_mode_map[RPMH_REGULATOR_MODE_COUNT] = 
 {
 +   [RPMH_REGULATOR_MODE_RET]  = -EINVAL,
 +   [RPMH_REGULATOR_MODE_LPM]  = REGULATOR_MODE_IDLE,
 +   [RPMH_REGULATOR_MODE_AUTO] = REGULATOR_MODE_NORMAL,
 +   [RPMH_REGULATOR_MODE_HPM]  = REGULATOR_MODE_FAST,
 +

Re: [pci PATCH v8 4/4] pci-pf-stub: Add PF driver stub for PFs that function only to enable VFs

2018-04-20 Thread Gregory Rose

On 4/20/2018 9:31 AM, Alexander Duyck wrote:

Add a new driver called "pci-pf-stub" to act as a "white-list" for PF
devices that provide no other functionality other then acting as a means of
allocating a set of VFs. For now I only have one example ID provided by
Amazon in terms of devices that require this functionality. The general
idea is that in the future we will see other devices added as vendors come
up with devices where the PF is more or less just a lightweight shim used
to allocate VFs.

Signed-off-by: Alexander Duyck 
---

v6: New driver to address concerns about Amazon devices left unsupported
v7: Dropped pci_id table explanation from pci-pf-stub driver

  drivers/pci/Kconfig   |   12 ++
  drivers/pci/Makefile  |2 ++
  drivers/pci/pci-pf-stub.c |   54 +
  include/linux/pci_ids.h   |2 ++
  4 files changed, 70 insertions(+)
  create mode 100644 drivers/pci/pci-pf-stub.c

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 34b56a8..cdef2a2 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -71,6 +71,18 @@ config PCI_STUB
  
  	  When in doubt, say N.
  
+config PCI_PF_STUB

+   tristate "PCI PF Stub driver"
+   depends on PCI
+   depends on PCI_IOV
+   help
+ Say Y or M here if you want to enable support for devices that
+ require SR-IOV support, while at the same time the PF itself is
+ not providing any actual services on the host itself such as
+ storage or networking.
+
+ When in doubt, say N.
+
  config XEN_PCIDEV_FRONTEND
  tristate "Xen PCI Frontend"
  depends on PCI && X86 && XEN
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 9419709..4e133d3 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -43,6 +43,8 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o
  
  obj-$(CONFIG_PCI_STUB) += pci-stub.o
  
+obj-$(CONFIG_PCI_PF_STUB) += pci-pf-stub.o

+
  obj-$(CONFIG_PCI_ECAM) += ecam.o
  
  obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o

diff --git a/drivers/pci/pci-pf-stub.c b/drivers/pci/pci-pf-stub.c
new file mode 100644
index 000..9d5fdf2
--- /dev/null
+++ b/drivers/pci/pci-pf-stub.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* pci-pf-stub - simple stub driver for PCI SR-IOV PF device
+ *
+ * This driver is meant to act as a "white-list" for devices that provde
+ * SR-IOV functionality while at the same time not actually needing a
+ * driver of their own.
+ */
+
+#include 
+#include 
+
+/**
+ * pci_pf_stub_white_list - White list of devices to bind pci-pf-stub onto
+ *
+ * This table provides the list of IDs this driver is supposed to bind
+ * onto. You could think of this as a list of "quirked" devices where we
+ * are adding support for SR-IOV here since there are no other drivers
+ * that they would be running under.
+ */
+static const struct pci_device_id pci_pf_stub_white_list[] = {
+   { PCI_VDEVICE(AMAZON, 0x0053) },
+   /* required last entry */
+   { 0 }
+};
+MODULE_DEVICE_TABLE(pci, pci_pf_stub_white_list);
+
+static int pci_pf_stub_probe(struct pci_dev *dev,
+const struct pci_device_id *id)
+{
+   pci_info(dev, "claimed by pci-pf-stub\n");
+   return 0;
+}
+
+static struct pci_driver pf_stub_driver = {
+   .name   = "pci-pf-stub",
+   .id_table   = pci_pf_stub_white_list,
+   .probe  = pci_pf_stub_probe,
+   .sriov_configure= pci_sriov_configure_simple,
+};
+
+static int __init pci_pf_stub_init(void)
+{
+   return pci_register_driver(&pf_stub_driver);
+}
+
+static void __exit pci_pf_stub_exit(void)
+{
+   pci_unregister_driver(&pf_stub_driver);
+}
+
+module_init(pci_pf_stub_init);
+module_exit(pci_pf_stub_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index a637a7d..62dab14 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2549,6 +2549,8 @@
  #define PCI_VENDOR_ID_CIRCUITCO   0x1cc8
  #define PCI_SUBSYSTEM_ID_CIRCUITCO_MINNOWBOARD0x0001
  
+#define PCI_VENDOR_ID_AMAZON		0x1d0f

+
  #define PCI_VENDOR_ID_TEKRAM  0x1de1
  #define PCI_DEVICE_ID_TEKRAM_DC2900xdc29
  



LGTM

Reviewed-by: Greg Rose 



Re: [pci PATCH v8 1/4] pci: Add pci_sriov_configure_simple for PFs that don't manage VF resources

2018-04-20 Thread Gregory Rose


On 4/20/2018 9:28 AM, Alexander Duyck wrote:

This patch adds a common configuration function called
pci_sriov_configure_simple that will allow for managing VFs on devices
where the PF is not capable of managing VF resources.

Signed-off-by: Alexander Duyck 
Tested-by: Mark Rustad 
---

v5: New patch replacing pci_sriov_configure_unmanaged with
   pci_sriov_configure_simple
 Dropped bits related to autoprobe changes
v6: Defined pci_sriov_configure_simple as NULL if IOV is disabled
v7: Updated pci_sriov_configure_simple to drop need for err value
 Fixed comment explaining why pci_sriov_configure_simple is NULL

  drivers/pci/iov.c   |   31 +++
  include/linux/pci.h |3 +++
  2 files changed, 34 insertions(+)

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 677924a..3e0a7fd 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -807,3 +807,34 @@ int pci_sriov_get_totalvfs(struct pci_dev *dev)
return dev->sriov->total_VFs;
  }
  EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs);
+
+/**
+ * pci_sriov_configure_simple - helper to configure unmanaged SR-IOV
+ * @dev: the PCI device
+ * @nr_virtfn: number of virtual functions to enable, 0 to disable
+ *
+ * Used to provide generic enable/disable SR-IOV option for devices
+ * that do not manage the VFs generated by their driver. Return value
+ * is negative on error, or number of VFs allocated on success.
+ */
+int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn)
+{
+   might_sleep();
+
+   if (!dev->is_physfn)
+   return -ENODEV;
+
+   if (pci_vfs_assigned(dev)) {
+   pci_warn(dev,
+"Cannot modify SR-IOV while VFs are assigned\n");
+   return -EPERM;
+   }
+
+   if (!nr_virtfn) {
+   sriov_disable(dev);
+   return 0;
+   }
+
+   return sriov_enable(dev, nr_virtfn) ? : nr_virtfn;
+}
+EXPORT_SYMBOL_GPL(pci_sriov_configure_simple);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ae42289..7d36e39 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1955,6 +1955,7 @@ static inline void pci_mmcfg_late_init(void) { }
  int pci_vfs_assigned(struct pci_dev *dev);
  int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
  int pci_sriov_get_totalvfs(struct pci_dev *dev);
+int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn);
  resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno);
  void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe);
  #else
@@ -1982,6 +1983,8 @@ static inline int pci_sriov_set_totalvfs(struct pci_dev 
*dev, u16 numvfs)
  { return 0; }
  static inline int pci_sriov_get_totalvfs(struct pci_dev *dev)
  { return 0; }
+/* this is expected to be used as a function pointer, just define as NULL */
+#define pci_sriov_configure_simple NULL
  static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int 
resno)
  { return 0; }
  static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) 
{ }



Thanks Alex!

Reviewed-by: Greg Rose 


Re: [RFC PATCH 3/4] acpi: apei: Do not panic() in NMI because of GHES messages

2018-04-20 Thread Alex G.


On 04/20/2018 02:27 AM, James Morse wrote:
> Hi Alex,
> 
> On 04/16/2018 10:59 PM, Alex G. wrote:
>> On 04/13/2018 11:38 AM, James Morse wrote:
>>> This assumes a cache-invalidate will clear the error, which I don't
> think we're
>>> guaranteed on arm.
>>> It also destroys any adjacent data, "everyone's happy" includes the
> thread that
>>> got a chunk of someone-else's stack frame, I don't think it will be
> happy for
>>> very long!
>>
>> Hmm, no cache-line (or page) invalidation on arm64? How does
>> dma_map/unmap_*() work then? You may not guarantee to fix the error, but
> 
> There are cache-invalidate instructions, but I don't think 'solving' a
> RAS error with them is the right thing to do.

You seem to be putting RAS on a pedestal in a very cloudy and foggy day.
I admit that I fail to see the specialness of RAS in comparison to other
errors.

>> I don't buy into the "let's crash without trying" argument.
> 
> Our 'cache writeback granule' may be as large as 2K, so we may have to
> invalidate up to 2K of data to convince the hardware this address is
> okay again.

Eureka! OS can invalidate the entire page. 1:1 mapping with the memory
management data.

> All we've done here is differently-corrupt the data so that it no longer
> generates a RAS fault, it just gives you the wrong data instead.
> Cache-invalidation is destructive.
> 
> I don't think there is a one-size-fits-all solution here.

Of course there isn't. That's not the issue.

A cache corruption is a special case of a memory access issue, and that,
we already know how to handle. Triple-fault and cpu-on-fire concerns
apply wrt returning to the context which triggered the problem. We've
already figured that out.

There is a lot of opportunity here for using well tested code paths and
not crashing on first go. Why let firmware make this a problem again?

>>> (this is a side issue for AER though)
>>
>> Somebody muddled up AER with these tables, so we now have to worry about
>> it. :)
> 
> Eh? I see there is a v2, maybe I'll understand this comment once I read it.

I meant that somebody (the spec writers) decided to put ominous errors
(PCIe) on the same severity scale with "cpu is on fire" errors.

 How does FFS handle race conditions that can occur when accessing HW
 concurrently with the OS? I'm told it's the main reasons why BIOS
 doesn't release unused cores from SMM early.
>>>
>>> This is firmware's problem, it depends on whether there is any
> hardware that is
>>> shared with the OS. Some hardware can be marked 'secure' in which
> case only
>>> firmware can access it, alternatively firmware can trap or just
> disable the OS's
>>> access to the shared hardware.
>>
>> It's everyone's problem. It's the firmware's responsibility.
> 
> It depends on the SoC design. If there is no hardware that the OS and
> firmware both need to access to handle an error then I don't think
> firmware needs to do this.
> 
> 
>>> For example, with the v8.2 RAS Extensions, there are some per-cpu error
>>> registers. Firmware can disable these for the OS, so that it always
> reads 0 from
>>> them. Instead firmware takes the error via FF, reads the registers from
>>> firmware, and dumps CPER records into the OS's memory.
>>>
>>> If there is a shared hardware resource that both the OS and firmware
> may be
>>> accessing, yes firmware needs to pull the other CPUs in, but this
> depends on the
>>> SoC design, it doesn't necessarily happen.
>>
>> The problem with shared resources is just a problem. I've seen systems
>> where all 100 cores are held up for 300+ ms. In latency-critical
>> applications reliability drops exponentially. Am I correct in assuming
>> your answer would be to "hide" more stuff from the OS?
> 
> No, I'm not a fan of firmware cycle stealing. If you can design the SoC or
> firmware so that the 'all CPUs' stuff doesn't need to happen, then you
> won't get
> these issues. (I don't design these things, I'm sure they're much more
> complicated
> than I think!)
> 
> Because the firmware is SoC-specific, so it only needs to do exactly
> what is necessary.

Irrespective of the hardware design, there's devicetree, ACPI methods,
and a few other ways to inform the OS of non-standard bits. They don't
have the resource sharing problem. I'm confused as to why FFS is used
when there are concerns about resource conflicts instead of race-free
alternatives.

 I think the idea of firmware-first is broken. But it's there, it's
 shipping in FW, so we have to accommodate it in SW.
>>>
>>> Part of our different-views here is firmware-first is taking
> something away from
>>> you, whereas for me its giving me information that would otherwise be in
>>> secret-soc-specific registers.
>>
>> Under this interpretation, FFS is a band-aid to the problem of "secret"
>> registers. "Secret" hardware doesn't really fit well into the idea of an
>> OS [1].
> 
> Sorry, I'm being sloppy with my terminology, by secret-soc-specific I
> mean either Linux can't access them 

Re: [PATCH v7 00/11] Sunxi: Add SMP support on A83T

2018-04-20 Thread Mylène Josserand
Hello,

On Fri, 20 Apr 2018 23:10:11 +0200
Mylène Josserand  wrote:

> Hello everyone,
> 
> This is a V7 of my series that adds SMP support for Allwinner sun8i-a83t.
> Based on sunxi's tree, sunxi/for-next branch.
> Depends on a patch from Doug Berger that allows to include the "cpu-type"
> header on assembly files:
> https://lkml.org/lkml/2018/2/23/1263
> (applied on Broadcom's tree: 
> https://github.com/Broadcom/stblinux/commits/soc/next)

And I forgot to say that I will have a look at the CPU0 hotplug but not
on this series.

Best regards,

Mylène

> 
> If you have any remarks/questions, let me know.
> Thank you in advance,
> Mylène
> 
> Changes from v6:
> - Correct the commit log on patch 07 according to Sergei Shtylyov's
> review.
> - Rename the field "is_sun8i" into "is_a83t".
> - Add all Tested-by and Reviewed-by from previous version.
> 
> Changes from v5:
> - Remove my patch 01 and use the patch of Doug Berger to be able to
> include the cpu-type header on assembly files.
> - Rename smp_init_cntvoff function into secure_cntvoff_init according
> to Marc Zyngier's review.
> - According to Chen-Yu and Maxime's reviews, remove the patch that was
> moving structures. Instead of using an index to retrieve which
> architecture we are having, use a global variable.
> - Merge the 2 patches that move assembly code from C to assembly file.
> - Use a sun8i field instead of sun9i to know on which architecture we
> are using because many modifications/additions of the code are for
> sun8i-a83t.
> - Rework the patch "add is_sun8i field" to add only this field in this
> patch. The part of the patch that was starting to handle the differences
> between sun8i-a83t and sun9i-a80 is merged in the patch that adds the
> support of sun8i-a83t.
> - Add a new patch that refactor the shmobile code to use the new function
> secure_cntvoff_init introduced in this series.
> 
> Changes from v4:
> - Rebased my series according to new Chen-Yu series:
>"ARM: sunxi: Clean and improvements for multi-cluster SMP"
>https://lkml.org/lkml/2018/3/8/886
> - Updated my series according to Marc Zyngier's reviews to add CNTVOFF
> initialization's function into ARM's common part. Thanks to that, other
> platforms such as Renesa can use this function.
> - For boot CPU, create a new machine to handle the CNTVOFF initialization
> using "init_early" callback.
> 
> Changes from v3:
> - Take into account Maxime's reviews:
>   - split the first patch into 4 new patches: add sun9i device tree
>   parsing, rename some variables, add a83t support and finally,
>   add hotplug support.
>   - Move the code of previous patch 07 (to disable CPU0 disabling)
>   into hotplug support patch (see patch 04)
>   - Remove the patch that added PRCM register because it is already
>   available. Because of that, update the device tree parsing to use
>   "sun8i-a83t-r-ccu".
>   - Use a variable to know which SoC we currently have
> - Take into account Chen-Yu's reviews: create two iounmap functions
> to release the resources of the device tree parsing.
> - Take into account Marc's review: Update the code to initialize CNTVOFF
> register. As there is already assembly code in the driver, I decided
> to create an assembly file not to mix assembly and C code.
> For that, I create 3 new patches: move the current assembly code that
> handles the cluster cache enabling into a file, move the cpu_resume entry
> in this file and finally, add a new assembly entry to initialize the timer
> offset for boot CPU and secondary CPUs.
> 
> Changes from v2:
> - Rebased my modifications according to new Chen Yu's patch series
> that adds SMP support for sun9i-a80 (without MCPM).
> - Split the device-tree patches into 3 patches for CPUCFG, R_CPUCFG
> and PRCM registers for more visibility.
> - The hotplug of CPU0 is currently not working (even after trying what
> Allwinner's code is doing) so remove the possibility of disabling
> this CPU. Created a new patch for it.
> 
> Changes from v1:
> - Add Chen Yu's patch in my series (see path 01)
> - Add new compatibles for prcm and cpucfg registers for sun8i-a83t.
> Create two functions to separate the DT parsing of sun9i-a80 and
> sun8i-a83t.
> - Thanks to Maxime's review: order device tree's nodes according
> to physical addresses, remove unused label and fix registers' sizes.
> Update the commit log and commit title of my last patch (see
> patch 05).
> 
> Mylène Josserand (11):
>   ARM: sunxi: smp: Move assembly code into a file
>   ARM: dts: sun8i: Add CPUCFG device node for A83T dtsi
>   ARM: dts: sun8i: Add R_CPUCFG device node for the A83T dtsi
>   ARM: dts: sun8i: a83t: Add CCI-400 node
>   ARM: smp: Add initialization of CNTVOFF
>   ARM: sunxi: Add initialization of CNTVOFF
>   AR

mmotm 2018-04-20-14-58 uploaded

2018-04-20 Thread akpm
The mm-of-the-moment snapshot 2018-04-20-14-58 has been uploaded to

   http://www.ozlabs.org/~akpm/mmotm/

mmotm-readme.txt says

README for mm-of-the-moment:

http://www.ozlabs.org/~akpm/mmotm/

This is a snapshot of my -mm patch queue.  Uploaded at random hopefully
more than once a week.

You will need quilt to apply these patches to the latest Linus release (4.x
or 4.x-rcY).  The series file is in broken-out.tar.gz and is duplicated in
http://ozlabs.org/~akpm/mmotm/series

The file broken-out.tar.gz contains two datestamp files: .DATE and
.DATE--mm-dd-hh-mm-ss.  Both contain the string -mm-dd-hh-mm-ss,
followed by the base kernel version against which this patch series is to
be applied.

This tree is partially included in linux-next.  To see which patches are
included in linux-next, consult the `series' file.  Only the patches
within the #NEXT_PATCHES_START/#NEXT_PATCHES_END markers are included in
linux-next.

A git tree which contains the memory management portion of this tree is
maintained at git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
by Michal Hocko.  It contains the patches which are between the
"#NEXT_PATCHES_START mm" and "#NEXT_PATCHES_END" markers, from the series
file, http://www.ozlabs.org/~akpm/mmotm/series.


A full copy of the full kernel tree with the linux-next and mmotm patches
already applied is available through git within an hour of the mmotm
release.  Individual mmotm releases are tagged.  The master branch always
points to the latest release, so it's constantly rebasing.

http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/

To develop on top of mmotm git:

  $ git remote add mmotm 
git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
  $ git remote update mmotm
  $ git checkout -b topic mmotm/master
  
  $ git send-email mmotm/master.. [...]

To rebase a branch with older patches to a new mmotm release:

  $ git remote update mmotm
  $ git rebase --onto mmotm/master  topic




The directory http://www.ozlabs.org/~akpm/mmots/ (mm-of-the-second)
contains daily snapshots of the -mm tree.  It is updated more frequently
than mmotm, and is untested.

A git copy of this tree is available at

http://git.cmpxchg.org/cgit.cgi/linux-mmots.git/

and use of this tree is similar to
http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/, described above.


This mmotm tree contains the following patches against 4.17-rc1:
(patches marked "*" will be included in linux-next)

  origin.patch
  i-need-old-gcc.patch
* fork-unconditionally-clear-stack-on-fork.patch
* mm-fix-do_pages_move-status-handling.patch
* mm-pagemap-fix-swap-offset-value-for-pmd-migration-entry.patch
* writeback-safer-lock-nesting.patch
* mm-enable-thp-migration-for-shmem-thp.patch
* rapidio-fix-rio_dma_transfer-error-handling.patch
* kasan-add-no_sanitize-attribute-for-clang-builds.patch
* maintainers-add-personal-addresses-for-sascha-and-me.patch
* autofs-mount-point-create-should-honour-passed-in-mode.patch
* proc-revalidate-kernel-thread-inodes-to-root-root.patch
* proc-fix-proc-loadavg-regression.patch
* kexec_file-do-not-add-extra-alignment-to-efi-memmap.patch
* fs-elf-dont-complain-map_fixed_noreplace-unless-eexist-error.patch
* mm-memcg-add-__gfp_nowarn-in-__memcg_schedule_kmem_cache_create.patch
* fix-null-pointer-in-page_cache_tree_insert.patch
* mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap.patch
* mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap-v2.patch
* kasan-prohibit-kasanstructleak-combination.patch
* lib-avoid-soft-lockup-in-test_find_first_bit.patch
* memcg-remove-memcg_cgroup-id-from-idr-on-mem_cgroup_css_alloc-failure.patch
* ocfs2-submit-another-bio-if-current-bio-is-full.patch
* arm-arch-arm-include-asm-pageh-needs-personalityh.patch
* prctl-add-pr_et_pdeathsig_proc.patch
* ocfs2-get-rid-of-ocfs2_is_o2cb_active-function.patch
* ocfs2-without-quota-support-try-to-avoid-calling-quota-recovery.patch
* 
ocfs2-without-quota-support-try-to-avoid-calling-quota-recovery-checkpatch-fixes.patch
* ocfs2-dont-put-and-assign-null-to-bh-allocated-outside.patch
* ocfs2-dont-use-iocb-when-eiocbqueued-returns.patch
* 
block-restore-proc-partitions-to-not-display-non-partitionable-removable-devices.patch
* dentry-fix-kmemcheck-splat-at-take_dentry_name_snapshot.patch
* namei-allow-restricted-o_creat-of-fifos-and-regular-files.patch
  mm.patch
* slab-__gfp_zero-is-incompatible-with-a-constructor.patch
* 
mm-introduce-arg_lock-to-protect-arg_startend-and-env_startend-in-mm_struct.patch
* 
mm-introduce-arg_lock-to-protect-arg_startend-and-env_startend-in-mm_struct-fix.patch
* mm-memcontrol-move-swap-charge-handling-into-get_swap_page.patch
* mm-memcontrol-implement-memoryswapevents.patch
* zram-correct-flag-name-of-zram_access.patch
* zram-mark-incompressible-page-as-zram_huge.patch
* zram-record-accessed-second.patch
* zram-introduce-zram-memory-tracking.patch
* zram-introduce-zram-memory-tracking-update.patch
* zram-introduce-zram-memory-tracking-update-fix.patch
* mm-shmem-add-__rcu-annot

Re: [PATCH v3] mtd: spi-nor: clear Winbond Extended Address Reg on switch to 3-byte addressing.

2018-04-20 Thread Marek Vasut
On 04/20/2018 11:26 PM, NeilBrown wrote:
> 
> Winbond spi-nor flash 32MB and larger have an 'Extended Address
> Register' as one option for addressing beyond 16MB (Macronix
> has the same concept, Spansion has EXTADD bits in the Bank Address
> Register).
> 
> According to section
>8.2.7 Write Extended Address Register (C5h)
> 
> of the Winbond W25Q256FV data sheet (256M-BIT SPI flash)
> 
>The Extended Address Register is only effective when the device is
>in the 3-Byte Address Mode.  When the device operates in the 4-Byte
>Address Mode (ADS=1), any command with address input of A31-A24
>will replace the Extended Address Register values. It is
>recommended to check and update the Extended Address Register if
>necessary when the device is switched from 4-Byte to 3-Byte Address
>Mode.
> 
> So the documentation suggests clearing the EAR after switching to
> 3-byte mode.  Experimentation shows that the EAR is *always* one after
> the switch to 3-byte mode, so clearing the EAR is mandatory at
> shutdown for a subsequent 3-byte-addressed reboot to work.
> 
> Note that some SOCs (e.g. MT7621) do not assert a reset line at normal
> reboot, so we cannot rely on hardware reset.  The MT7621 does assert a
> reset line at watchdog-reset.
> 
> This problem is not a regression.  However the commit identified below
> added support for resetting an spi-nor chip at shutdown, but didn't
> achieve the goal for all spi-nor chips.  So this patch can be seen as
> fixing that one.
> 
> Fixes: 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when 
> exiting")
> Cc: sta...@vger.kernel.org (v4.16)
> Signed-off-by: NeilBrown 

Acked-by: Marek Vasut 

> ---
>  drivers/mtd/spi-nor/spi-nor.c | 14 ++
>  include/linux/mtd/spi-nor.h   |  2 ++
>  2 files changed, 16 insertions(+)
> 
> diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
> index 5bfa36e95f35..42ae9a1529bb 100644
> --- a/drivers/mtd/spi-nor/spi-nor.c
> +++ b/drivers/mtd/spi-nor/spi-nor.c
> @@ -284,6 +284,20 @@ static inline int set_4byte(struct spi_nor *nor, const 
> struct flash_info *info,
>   if (need_wren)
>   write_disable(nor);
>  
> + if (!status && !enable &&
> + JEDEC_MFR(info) == SNOR_MFR_WINBOND) {
> + /*
> +  * On Winbond W25Q256FV, leaving 4byte mode causes
> +  * the Extended Address Register to be set to 1, so all
> +  * 3-byte-address reads come from the second 16M.
> +  * We must clear the register to enable normal behavior.
> +  */
> + write_enable(nor);
> + nor->cmd_buf[0] = 0;
> + nor->write_reg(nor, SPINOR_OP_WREAR, nor->cmd_buf, 1);
> + write_disable(nor);
> + }
> +
>   return status;
>   default:
>   /* Spansion style */
> diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
> index de36969eb359..e60da0d34cc1 100644
> --- a/include/linux/mtd/spi-nor.h
> +++ b/include/linux/mtd/spi-nor.h
> @@ -62,6 +62,8 @@
>  #define SPINOR_OP_RDCR   0x35/* Read configuration register 
> */
>  #define SPINOR_OP_RDFSR  0x70/* Read flag status register */
>  #define SPINOR_OP_CLFSR  0x50/* Clear flag status register */
> +#define SPINOR_OP_RDEAR  0xc8/* Read Extended Address 
> Register */
> +#define SPINOR_OP_WREAR  0xc5/* Write Extended Address 
> Register */
>  
>  /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
>  #define SPINOR_OP_READ_4B0x13/* Read data bytes (low frequency) */
> 


-- 
Best regards,
Marek Vasut


Re: [PATCH 4/4] sh: remove board_time_init() callback

2018-04-20 Thread Rich Felker
On Fri, Apr 20, 2018 at 11:51:18PM +0200, Arnd Bergmann wrote:
> On Fri, Apr 20, 2018 at 5:48 PM, Arnd Bergmann  wrote:
> 
> > @@ -41,8 +39,7 @@ static void __init sh_late_time_init(void)
> >
> >  void __init time_init(void)
> >  {
> > -   if (board_time_init)
> > -   board_time_init();
> > +   timer_init();
> 
> Testing revealed this to be broken, the fix is:
> 
> diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
> index a29eb989d81b..8a1c6c8ab4ec 100644
> --- a/arch/sh/kernel/time.c
> +++ b/arch/sh/kernel/time.c
> @@ -39,7 +39,7 @@ static void __init sh_late_time_init(void)
> 
>  void __init time_init(void)
>  {
> - timer_init();
> + timer_probe();
> 
>   clk_init();
> 
> Let me know if you'd like me to resend the series with that typo fixed.

If there are no other issues to correct, I can fix this when merging.

Rich


Re: [PATCH] platform/x86: Kconfig: Fix dell-laptop dependency chain.

2018-04-20 Thread Darren Hart
On Fri, Apr 20, 2018 at 05:55:28PM +, mario.limoncie...@dell.com wrote:
> 
> > -Original Message-
> > From: platform-driver-x86-ow...@vger.kernel.org [mailto:platform-driver-x86-
> > ow...@vger.kernel.org] On Behalf Of Randy Dunlap
> > Sent: Friday, April 20, 2018 12:53 PM
> > To: Limonciello, Mario; dvh...@infradead.org; Andy Shevchenko
> > Cc: LKML; platform-driver-...@vger.kernel.org
> > Subject: Re: [PATCH] platform/x86: Kconfig: Fix dell-laptop dependency 
> > chain.
> > 
> > On 04/20/18 10:42, Mario Limonciello wrote:
> > > As reported by Randy Dunlap:
> > >>> WARNING: unmet direct dependencies detected for DELL_SMBIOS
> > >>>   Depends on [m]: X86 [=y] && X86_PLATFORM_DEVICES [=y]
> > >>> && (DCDBAS [=m] ||
> > >>> DCDBAS [=m]=n) && (ACPI_WMI [=n] || ACPI_WMI [=n]=n)
> > >>>   Selected by [y]:
> > >>>   - DELL_LAPTOP [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y]
> > >>> && DMI [=y]
> > >>> && BACKLIGHT_CLASS_DEVICE [=y] && (ACPI_VIDEO [=n] ||
> > >>> ACPI_VIDEO [=n]=n)
> > >>> && (RFKILL [=n] || RFKILL [=n]=n) && SERIO_I8042 [=y]
> > >>>
> > >
> > > Right now it's possible to set dell laptop to compile in but this
> > > causes dell-smbios to compile in which breaks if dcdbas is a module.
> > >
> > > Dell laptop shouldn't select dell-smbios anymore, but depend on it.
> > >
> > > Fixes: 32d7b19 (platform/x86: dell-smbios: Resolve dependency error on
> > DCDBAS)
> > 
> > meta-comment: the SHA-1 ID should be the first 12 characters of the commit 
> > ID
> > according to Documentation/process/submitting-patches.rst.
> 
> Thanks for letting me, I didn't realize that.
> 
> Darren, let me know if you want me to resubmit with this adjustment or if you
> can adjust when you queue for testing.


Will fix locally.

-- 
Darren Hart
VMware Open Source Technology Center


Re: [PATCH] platform/x86: Kconfig: Fix dell-laptop dependency chain.

2018-04-20 Thread Darren Hart
On Fri, Apr 20, 2018 at 12:42:11PM -0500, Mario Limonciello wrote:
> As reported by Randy Dunlap:
> >> WARNING: unmet direct dependencies detected for DELL_SMBIOS
> >>   Depends on [m]: X86 [=y] && X86_PLATFORM_DEVICES [=y]
> >>&& (DCDBAS [=m] ||
> >> DCDBAS [=m]=n) && (ACPI_WMI [=n] || ACPI_WMI [=n]=n)
> >>   Selected by [y]:
> >>   - DELL_LAPTOP [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y]
> >> && DMI [=y]
> >> && BACKLIGHT_CLASS_DEVICE [=y] && (ACPI_VIDEO [=n] ||
> >>ACPI_VIDEO [=n]=n)
> >> && (RFKILL [=n] || RFKILL [=n]=n) && SERIO_I8042 [=y]
> >>
> 
> Right now it's possible to set dell laptop to compile in but this
> causes dell-smbios to compile in which breaks if dcdbas is a module.
> 
> Dell laptop shouldn't select dell-smbios anymore, but depend on it.

Ugh. Indeed.

One of the goals of the previous maelstrom of dell* kconfig depencency
changes was to make it so DELL_LAPTOP was visible without having to
toggle DELL_SMBIOS*.  But, having "select DELL_SMBIOS" avoids the
dependency check causing this failure (I'm surprised and dismayed this
made it through all the config permutation testing this saw).

I'll apply this patch, and I guess it should go to stable, and for the
next version I think it's time for a new "CONFIG DELL_EXTRAS" which will
allow us to default DELL_SMBIOS to y and make the menu more consistent.

Thank you for the catch. Grr :-(

-- 
Darren Hart
VMware Open Source Technology Center


Re: [PATCH 4/4] sh: remove board_time_init() callback

2018-04-20 Thread Arnd Bergmann
On Fri, Apr 20, 2018 at 5:48 PM, Arnd Bergmann  wrote:

> @@ -41,8 +39,7 @@ static void __init sh_late_time_init(void)
>
>  void __init time_init(void)
>  {
> -   if (board_time_init)
> -   board_time_init();
> +   timer_init();

Testing revealed this to be broken, the fix is:

diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index a29eb989d81b..8a1c6c8ab4ec 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -39,7 +39,7 @@ static void __init sh_late_time_init(void)

 void __init time_init(void)
 {
- timer_init();
+ timer_probe();

  clk_init();

Let me know if you'd like me to resend the series with that typo fixed.

  Arnd


Re: [RFC PATCH] dt-bindings: add a jsonschema binding example

2018-04-20 Thread Bjorn Andersson
On Wed 18 Apr 15:29 PDT 2018, Rob Herring wrote:

> The current DT binding documentation format of freeform text is painful
> to write, review, validate and maintain.
> 
> This is just an example of what a binding in the schema format looks
> like. It's using jsonschema vocabulary in a YAML encoded document. Using
> jsonschema gives us access to existing tooling. A YAML encoding gives us
> something easy to edit.
> 
> This example is just the tip of the iceberg, but it the part most
> developers writing bindings will interact with. Backing all this up
> are meta-schema (to validate the binding schemas), some DT core schema,
> YAML encoded DT output with dtc, and a small number of python scripts to
> run validation. The gory details including how to run end-to-end
> validation can be found here:
> 
> https://www.spinics.net/lists/devicetree-spec/msg00649.html
> 
> Signed-off-by: Rob Herring 
> ---
> Cc list,
> You all review and/or write lots of binding documents. I'd like some feedback
> on the format.
> 

I really like the idea of formalizing the binding document format and
the ability of validating a dtb is really nice.

> Thanks,
> Rob
> 
>  .../devicetree/bindings/example-schema.yaml| 149 
> +
>  1 file changed, 149 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/example-schema.yaml
> 
> diff --git a/Documentation/devicetree/bindings/example-schema.yaml 
> b/Documentation/devicetree/bindings/example-schema.yaml
[..]
> +  reg:
> +# The description of each element defines the order and implicitly 
> defines
> +# the number of reg entries
> +items:
> +  - description: core registers
> +  - description: aux registers
> +# minItems/maxItems equal to 2 is implied

I assume that a reg with variable number of entries would have
"description" for all of them and then a minItems that matches the
required ones and maxItems matching all of them?

> +
> +  reg-names:
> +# The core schema enforces this is a string array
> +items:
> +  - const: core
> +  - const: aux

I presume validation based on this should check that there's equal
number of entries in reg-names as there where in reg. Should this
relationship be described in the schema?

[..]
> +  interrupts:
> +# Either 1 or 2 interrupts can be present
> +minItems: 1
> +maxItems: 2
> +items:
> +  - description: tx or combined interrupt
> +  - description: rx interrupt
> +
> +description: |
> +  A variable number of interrupts warrants a description of what 
> conditions
> +  affect the number of interrupts. Otherwise, descriptions on standard
> +  properties are not necessary.

For validation purposes this could be interrupts with interrupt-parents
or a interrupts-extend, a fact that we probably don't want to duplicate
in every definition?

Perhaps we should just do like you did here and define the "interrupts"
and then in the validation tool - where we need to encode the logic
behind this anyways - we support the different variants.

> +
> +  interrupt-names:
> +# minItems must be specified here because the default would be 2
> +minItems: 1

As with reg-names, it would be good to have the validator warn if this
is not the same number of items as entries in "interrupts".

> +items:
> +  - const: "tx irq"
> +  - const: "rx irq"
> +
> +  # Property names starting with '#' must be quoted
> +  '#interrupt-cells':
> +# A simple case where the value must always be '2'.
> +# The core schema handles that this must be a single integer.
> +const: 2

If this is specified then interrupt-controller should also be given, or
vise versa. How would we describe that?

> +
> +  interrupt-controller: {}
> +# The core checks this is a boolean, so just have to list it here to be
> +# valid for this binding.
> +

Regards,
Bjorn


Re: [PATCH] prctl: Don't compile some of prctl functions when CRUI

2018-04-20 Thread kbuild test robot
Hi Sergey,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.17-rc1 next-20180420]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Sergey-Senozhatsky/prctl-Don-t-compile-some-of-prctl-functions-when-CRUI/20180421-040826
config: i386-randconfig-s1-201815 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   kernel/sys.c: In function 'prctl_set_mm':
>> kernel/sys.c:2108:10: error: implicit declaration of function 
>> 'prctl_set_mm_exe_file' [-Werror=implicit-function-declaration]
  return prctl_set_mm_exe_file(mm, (unsigned int)addr);
 ^
>> kernel/sys.c:2174:10: error: implicit declaration of function 
>> 'validate_prctl_map' [-Werror=implicit-function-declaration]
 error = validate_prctl_map(&prctl_map);
 ^~
   cc1: some warnings being treated as errors

vim +/prctl_set_mm_exe_file +2108 kernel/sys.c

f606b77f1 Cyrill Gorcunov 2014-10-09  2103  
79f0713d4 Cyrill Gorcunov 2012-03-15  2104  if (!capable(CAP_SYS_RESOURCE))
028ee4be3 Cyrill Gorcunov 2012-01-12  2105  return -EPERM;
028ee4be3 Cyrill Gorcunov 2012-01-12  2106  
6e399cd14 Davidlohr Bueso 2015-04-16  2107  if (opt == PR_SET_MM_EXE_FILE)
6e399cd14 Davidlohr Bueso 2015-04-16 @2108  return 
prctl_set_mm_exe_file(mm, (unsigned int)addr);
b32dfe377 Cyrill Gorcunov 2012-05-31  2109  
4a00e9df2 Alexey Dobriyan 2015-06-25  2110  if (opt == PR_SET_MM_AUXV)
4a00e9df2 Alexey Dobriyan 2015-06-25  2111  return 
prctl_set_auxv(mm, addr, arg4);
4a00e9df2 Alexey Dobriyan 2015-06-25  2112  
1ad75b9e1 Cyrill Gorcunov 2012-06-07  2113  if (addr >= TASK_SIZE || addr < 
mmap_min_addr)
028ee4be3 Cyrill Gorcunov 2012-01-12  2114  return -EINVAL;
028ee4be3 Cyrill Gorcunov 2012-01-12  2115  
fe8c7f5cb Cyrill Gorcunov 2012-05-31  2116  error = -EINVAL;
fe8c7f5cb Cyrill Gorcunov 2012-05-31  2117  
ddf1d398e Mateusz Guzik   2016-01-20  2118  down_write(&mm->mmap_sem);
028ee4be3 Cyrill Gorcunov 2012-01-12  2119  vma = find_vma(mm, addr);
028ee4be3 Cyrill Gorcunov 2012-01-12  2120  
4a00e9df2 Alexey Dobriyan 2015-06-25  2121  prctl_map.start_code= 
mm->start_code;
4a00e9df2 Alexey Dobriyan 2015-06-25  2122  prctl_map.end_code  = 
mm->end_code;
4a00e9df2 Alexey Dobriyan 2015-06-25  2123  prctl_map.start_data= 
mm->start_data;
4a00e9df2 Alexey Dobriyan 2015-06-25  2124  prctl_map.end_data  = 
mm->end_data;
4a00e9df2 Alexey Dobriyan 2015-06-25  2125  prctl_map.start_brk = 
mm->start_brk;
4a00e9df2 Alexey Dobriyan 2015-06-25  2126  prctl_map.brk   = 
mm->brk;
4a00e9df2 Alexey Dobriyan 2015-06-25  2127  prctl_map.start_stack   = 
mm->start_stack;
4a00e9df2 Alexey Dobriyan 2015-06-25  2128  prctl_map.arg_start = 
mm->arg_start;
4a00e9df2 Alexey Dobriyan 2015-06-25  2129  prctl_map.arg_end   = 
mm->arg_end;
4a00e9df2 Alexey Dobriyan 2015-06-25  2130  prctl_map.env_start = 
mm->env_start;
4a00e9df2 Alexey Dobriyan 2015-06-25  2131  prctl_map.env_end   = 
mm->env_end;
4a00e9df2 Alexey Dobriyan 2015-06-25  2132  prctl_map.auxv  = NULL;
4a00e9df2 Alexey Dobriyan 2015-06-25  2133  prctl_map.auxv_size = 0;
4a00e9df2 Alexey Dobriyan 2015-06-25  2134  prctl_map.exe_fd= -1;
4a00e9df2 Alexey Dobriyan 2015-06-25  2135  
028ee4be3 Cyrill Gorcunov 2012-01-12  2136  switch (opt) {
028ee4be3 Cyrill Gorcunov 2012-01-12  2137  case PR_SET_MM_START_CODE:
4a00e9df2 Alexey Dobriyan 2015-06-25  2138  prctl_map.start_code = 
addr;
fe8c7f5cb Cyrill Gorcunov 2012-05-31  2139  break;
fe8c7f5cb Cyrill Gorcunov 2012-05-31  2140  case PR_SET_MM_END_CODE:
4a00e9df2 Alexey Dobriyan 2015-06-25  2141  prctl_map.end_code = 
addr;
028ee4be3 Cyrill Gorcunov 2012-01-12  2142  break;
028ee4be3 Cyrill Gorcunov 2012-01-12  2143  case PR_SET_MM_START_DATA:
4a00e9df2 Alexey Dobriyan 2015-06-25  2144  prctl_map.start_data = 
addr;
028ee4be3 Cyrill Gorcunov 2012-01-12  2145  break;
fe8c7f5cb Cyrill Gorcunov 2012-05-31  2146  case PR_SET_MM_END_DATA:
4a00e9df2 Alexey Dobriyan 2015-06-25  2147  prctl_map.end_data = 
addr;
4a00e9df2 Alexey Dobriyan 2015-06-25  2148  break;
4a00e9df2 Alexey Dobriyan 2015-06-25  2149  case PR_SET_MM_START_STACK:
4a00e9df2 Alexey Dobriyan 2015-06-25  2150  prctl_map.start_stack = 
addr;
028ee4be3 Cyrill Gorcunov 2012-01-12  2151  break;
028ee4be3 Cyrill Gorcunov 2012-01-12  2

[PATCH] platform/x86: acer-wmi: add another KEY_POWER keycode

2018-04-20 Thread Antonio Rosario Intilisano
Now that we have informed the firmware that the Power Button driver is
active, laptops such as the Acer Swift 3 will generate
a WMI key event with code 0x87 when the power button key is
pressed. 

Add this keycode to the table so that it is converted to an appropriate
input event.

Signed-off-by: Antonio Rosario Intilisano 
Acked-by: Gianfranco Costamagna 
Cc: Chris Chiu 
Cc: Daniel Drake 
Cc: Andy Shevchenko 

---
 drivers/platform/x86/acer-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 1be71f956d5c..8952173dd380 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -129,6 +129,7 @@ static const struct key_entry acer_wmi_keymap[] __initconst 
= { {KE_IGNORE, 0x83, {KEY_TOUCHPAD_TOGGLE} },
{KE_KEY, 0x85, {KEY_TOUCHPAD_TOGGLE} },
{KE_KEY, 0x86, {KEY_WLAN} },
+   {KE_KEY, 0x87, {KEY_POWER} },
{KE_END, 0}
 };
 
-- 
2.15.1



Re: [PATCH] sched/fair: Change sched_feat(x) in !CONFIG_SCHED_DEBUG case

2018-04-20 Thread Lukas Bulwahn

On Fri, 20 Apr 2018, Peter Zijlstra wrote:

> On Fri, Apr 20, 2018 at 06:29:07PM +0200, Philipp Klocke wrote:
> > The gain is stopping a warning that clutters the output log of clang.
> 
> Well, you should not be using clang anyway. It is known to miscompile
> the kernel.
> 

There are some advantages of having a second compiler that can compile
the kernel (https://lwn.net/Articles/734071/). Some people in the kernel 
community and LLVM community are trying to get that to work.

We also want a zero-warning policy for clang, similar to gcc. 
Hence, this motivates to have a look at those few clang warnings and come 
up with patches for them.

This does not imply to make changes at any cost, and we need to determine 
a proper patch to either change the source code, disable the warning in 
the build script or annotate the file with some clang-specific pragmas.

To us, a minor change in the source sounded most reasonable after looking
at all three possible patches. Philipp might need another iteration, but
it generally looks sound to me if we get the details flattened out. 

Lukas


Re: [PATCH v2] mtd: spi-nor: clear Winbond Extended Address Reg on switch to 3-byte addressing.

2018-04-20 Thread NeilBrown
On Fri, Apr 20 2018, Boris Brezillon wrote:

> Hi Neil,
>
> On Mon, 16 Apr 2018 09:42:30 +1000
> NeilBrown  wrote:
>
>> Winbond spi-nor flash 32MB and larger have an 'Extended Address
>> Register' as one option for addressing beyond 16MB (Macronix
>> has the same concept, Spansion has EXTADD bits in the Bank Address
>> Register).
>> 
>> According to section
>>8.2.7 Write Extended Address Register (C5h)
>> 
>> of the Winbond W25Q256FV data sheet (256M-BIT SPI flash)
>> 
>>The Extended Address Register is only effective when the device is
>>in the 3-Byte Address Mode.  When the device operates in the 4-Byte
>>Address Mode (ADS=1), any command with address input of A31-A24
>>will replace the Extended Address Register values. It is
>>recommended to check and update the Extended Address Register if
>>necessary when the device is switched from 4-Byte to 3-Byte Address
>>Mode.
>> 
>> So the documentation suggests clearing the EAR after switching to
>> 3-byte mode.  Experimentation shows that the EAR is *always* one after
>> the switch to 3-byte mode, so clearing the EAR is mandatory at
>> shutdown for a subsequent 3-byte-addressed reboot to work.
>> 
>> Note that some SOCs (e.g. MT7621) do not assert a reset line at normal
>> reboot, so we cannot rely on hardware reset.  The MT7621 does assert a
>> reset line at watchdog-reset.
>> 
>> Signed-off-by: NeilBrown 
>
> We should probably backport the fix. Can you add a Fixes and Cc-stable
> tag?

It's a bit weird having Fixes when this isn't a regression, but I guess
it doesn't hurt.
I chose
  Fixes: 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when 
exiting")
as this patch it useless without that one.

I also fixed the comment and have resent.

Thanks,
NeilBrown

>
>> ---
>> 
>> following a helpful discussion with Marek, I've revised the description
>> a little, and make the code change specific to winbond.
>> I've change the OP names to RDEAR and WREAR instead of RDXA and WRXA to
>> match names used in the Macronix documentation.  Winbond documentation
>> doesn't provide abbreviated OP names.
>> 
>> Thanks,
>> NeilBrown
>> 
>> 
>>  drivers/mtd/spi-nor/spi-nor.c | 13 +
>>  include/linux/mtd/spi-nor.h   |  2 ++
>>  2 files changed, 15 insertions(+)
>> 
>> diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
>> index d445a4d3b770..0d0af0acf8b9 100644
>> --- a/drivers/mtd/spi-nor/spi-nor.c
>> +++ b/drivers/mtd/spi-nor/spi-nor.c
>> @@ -284,6 +284,19 @@ static inline int set_4byte(struct spi_nor *nor, const 
>> struct flash_info *info,
>>  if (need_wren)
>>  write_disable(nor);
>>  
>> +if (!status && !enable &&
>> +JEDEC_MFR(info) == SNOR_MFR_WINBOND) {
>> +/* On Winbond W25Q256FV, leaving 4byte mode causes
>
> We use regular kernel-comment style in MTD:
>
>   /*
>* blablabla
>*/
>
> Thanks,
>
> Boris
>
>> + * the Extended Address Register to be set to 1, so all
>> + * 3-byte-address reads come from the second 16M.
>> + * We must clear the register to enable normal behavior.
>> + */
>> +write_enable(nor);
>> +nor->cmd_buf[0] = 0;
>> +nor->write_reg(nor, SPINOR_OP_WREAR, nor->cmd_buf, 1);
>> +write_disable(nor);
>> +}
>> +
>>  return status;
>>  default:
>>  /* Spansion style */
>> diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
>> index de36969eb359..e60da0d34cc1 100644
>> --- a/include/linux/mtd/spi-nor.h
>> +++ b/include/linux/mtd/spi-nor.h
>> @@ -62,6 +62,8 @@
>>  #define SPINOR_OP_RDCR  0x35/* Read configuration register 
>> */
>>  #define SPINOR_OP_RDFSR 0x70/* Read flag status register */
>>  #define SPINOR_OP_CLFSR 0x50/* Clear flag status register */
>> +#define SPINOR_OP_RDEAR 0xc8/* Read Extended Address 
>> Register */
>> +#define SPINOR_OP_WREAR 0xc5/* Write Extended Address 
>> Register */
>>  
>>  /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
>>  #define SPINOR_OP_READ_4B   0x13/* Read data bytes (low frequency) */


signature.asc
Description: PGP signature


[PATCH] platform/x86: acer-wmi: add another KEY_POWER keycode

2018-04-20 Thread Antonio Rosario Intilisano
Now that we have informed the firmware that the Power Button driver is
active, laptops such as the Acer Swift 3 will generate
a WMI key event with code 0x87 when the power button key is
pressed. 

Add this keycode to the table so that it is converted to an appropriate
input event.

Signed-off-by: Antonio Rosario Intilisano 
Acked-by: Gianfranco Costamagna 
Cc: Chris Chiu 
Cc: Daniel Drake 
Cc: Andy Shevchenko 

---
 drivers/platform/x86/acer-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 1be71f956d5c..8952173dd380 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -129,6 +129,7 @@ static const struct key_entry acer_wmi_keymap[] __initconst 
= { {KE_IGNORE, 0x83, {KEY_TOUCHPAD_TOGGLE} },
{KE_KEY, 0x85, {KEY_TOUCHPAD_TOGGLE} },
{KE_KEY, 0x86, {KEY_WLAN} },
+   {KE_KEY, 0x87, {KEY_POWER} },
{KE_END, 0}
 };
 
-- 
2.15.1



[PATCH v3] mtd: spi-nor: clear Winbond Extended Address Reg on switch to 3-byte addressing.

2018-04-20 Thread NeilBrown

Winbond spi-nor flash 32MB and larger have an 'Extended Address
Register' as one option for addressing beyond 16MB (Macronix
has the same concept, Spansion has EXTADD bits in the Bank Address
Register).

According to section
   8.2.7 Write Extended Address Register (C5h)

of the Winbond W25Q256FV data sheet (256M-BIT SPI flash)

   The Extended Address Register is only effective when the device is
   in the 3-Byte Address Mode.  When the device operates in the 4-Byte
   Address Mode (ADS=1), any command with address input of A31-A24
   will replace the Extended Address Register values. It is
   recommended to check and update the Extended Address Register if
   necessary when the device is switched from 4-Byte to 3-Byte Address
   Mode.

So the documentation suggests clearing the EAR after switching to
3-byte mode.  Experimentation shows that the EAR is *always* one after
the switch to 3-byte mode, so clearing the EAR is mandatory at
shutdown for a subsequent 3-byte-addressed reboot to work.

Note that some SOCs (e.g. MT7621) do not assert a reset line at normal
reboot, so we cannot rely on hardware reset.  The MT7621 does assert a
reset line at watchdog-reset.

This problem is not a regression.  However the commit identified below
added support for resetting an spi-nor chip at shutdown, but didn't
achieve the goal for all spi-nor chips.  So this patch can be seen as
fixing that one.

Fixes: 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when 
exiting")
Cc: sta...@vger.kernel.org (v4.16)
Signed-off-by: NeilBrown 
---
 drivers/mtd/spi-nor/spi-nor.c | 14 ++
 include/linux/mtd/spi-nor.h   |  2 ++
 2 files changed, 16 insertions(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 5bfa36e95f35..42ae9a1529bb 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -284,6 +284,20 @@ static inline int set_4byte(struct spi_nor *nor, const 
struct flash_info *info,
if (need_wren)
write_disable(nor);
 
+   if (!status && !enable &&
+   JEDEC_MFR(info) == SNOR_MFR_WINBOND) {
+   /*
+* On Winbond W25Q256FV, leaving 4byte mode causes
+* the Extended Address Register to be set to 1, so all
+* 3-byte-address reads come from the second 16M.
+* We must clear the register to enable normal behavior.
+*/
+   write_enable(nor);
+   nor->cmd_buf[0] = 0;
+   nor->write_reg(nor, SPINOR_OP_WREAR, nor->cmd_buf, 1);
+   write_disable(nor);
+   }
+
return status;
default:
/* Spansion style */
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index de36969eb359..e60da0d34cc1 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -62,6 +62,8 @@
 #define SPINOR_OP_RDCR 0x35/* Read configuration register */
 #define SPINOR_OP_RDFSR0x70/* Read flag status register */
 #define SPINOR_OP_CLFSR0x50/* Clear flag status register */
+#define SPINOR_OP_RDEAR0xc8/* Read Extended Address 
Register */
+#define SPINOR_OP_WREAR0xc5/* Write Extended Address 
Register */
 
 /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
 #define SPINOR_OP_READ_4B  0x13/* Read data bytes (low frequency) */
-- 
2.14.0.rc0.dirty



signature.asc
Description: PGP signature


Re: [PATCH] kvmalloc: always use vmalloc if CONFIG_DEBUG_VM

2018-04-20 Thread Mikulas Patocka


On Fri, 20 Apr 2018, Matthew Wilcox wrote:

> On Fri, Apr 20, 2018 at 04:54:53PM -0400, Mikulas Patocka wrote:
> > On Fri, 20 Apr 2018, Michal Hocko wrote:
> > > No way. This is just wrong! First of all, you will explode most likely
> > > on many allocations of small sizes. Second, CONFIG_DEBUG_VM tends to be
> > > enabled quite often.
> > 
> > You're an evil person who doesn't want to fix bugs.
> 
> Steady on.  There's no need for that.  Michal isn't evil.  Please
> apologise.

I see this attitude from Michal again and again.

He didn't want to fix vmalloc(GFP_NOIO), he didn't want to fix alloc_pages 
sleeping when __GFP_NORETRY is used. So what should I say? Fix them and 
you won't be evil :-)

(he could also fix the oom killer, so that it is triggered when 
free_memory+cache+free_swap goes beyond a threshold and not when you loop 
too long in the allocator)

> > You refused to fix vmalloc(GFP_NOIO) misbehavior a year ago (did you make 
> > some progress with it since that time?) and you refuse to fix kvmalloc 
> > misuses.
> 
> I understand you're frustrated, but this is not the way to get the problems
> fixed.
> 
> > I tried this patch on text-only virtual machine and /proc/vmallocinfo 
> > shows 614kB more memory. I tried it on a desktop machine with the chrome 
> > browser open and /proc/vmallocinfo space is increased by 7MB. So no - this 
> > won't exhaust memory and kill the machine.
> 
> This is good data, thank you for providing it.
> 
> > Arguing that this increases memory consumption is as bogus as arguing that 
> > CONFIG_LOCKDEP increses memory consumption. No one is forcing you to 
> > enable CONFIG_LOCKDEP and no one is forcing you to enable this kvmalloc 
> > test too.
> 
> I think there's a real problem which is that CONFIG_DEBUG_VM is too broad.
> It inserts code in a *lot* of places, some of which is quite expensive.
> We would do better to split it into more granular pieces ... although
> an explosion of configuration options isn't great either.  Maybe just
> CONFIG_DEBUG_VM and CONFIG_DEBUG_VM_EXPENSIVE.
> 
> Michal may be wrong, but he's not evil.

I already said that we can change it from CONFIG_DEBUG_VM to 
CONFIG_DEBUG_SG - or to whatever other option you may want, just to make 
sure that it is enabled in distro debug kernels by default.

Mikulas


[PATCH v3] clk: add duty cycle support

2018-04-20 Thread Jerome Brunet
Add the possibility to apply and query the clock signal duty cycle ratio.

This is useful when the duty cycle of the clock signal depends on some
other parameters controlled by the clock framework.

For example, the duty cycle of a divider may depends on the raw divider
setting (ratio = N / div) , which is controlled by the CCF. In such case,
going through the pwm framework to control the duty cycle ratio of this
clock would be a burden.

A clock provider is not required to implement the operation to set and get
the duty cycle. If it does not implement .get_duty_cycle(), the ratio is
assumed to be 50%.

This change also adds a new flag, CLK_DUTY_CYCLE_PARENT. This flag should
be used to indicate that a clock, such as gates and muxes, may inherit
the duty cycle ratio of its parent clock. If a clock does not provide a
get_duty_cycle() callback and has CLK_DUTY_CYCLE_PARENT, then the call
will be directly forwarded to its parent clock, if any. For
set_duty_cycle(), the clock should also have CLK_SET_RATE_PARENT for the
call to be forwarded

Signed-off-by: Jerome Brunet 
---

The series has been developed to handled the sample clocks provided by
audio clock controller of amlogic's A113 SoC. To support i2s modes, this
clock need to have a 50% duty cycle ratio, while it should be just one
pulse of the parent clock in dsp modes.

Changes since v2 [1]:
 - Fix kbuild robot issue with the trace file (imcomplete change related
   to clk_duty structure)

Changes since v1 [0]:
 - Use a structure to hold the duty cycle ratio
 - Change the way parent traversal is done, so the core framework is
   more aware of what is going on. Pass-through ops dropped as a result
 - Only one debugfs entry for the duty cycle ratio, instead of 2
 - Minor fixes as pointed out by Stephen

[0]: https://lkml.kernel.org/r/20180416175743.20826-1-jbru...@baylibre.com
[1]: https://lkml.kernel.org/r/20180420153431.13003-1-jbru...@baylibre.com


 drivers/clk/clk.c| 202 +--
 include/linux/clk-provider.h |  26 ++
 include/linux/clk.h  |  33 +++
 include/trace/events/clk.h   |  36 
 4 files changed, 292 insertions(+), 5 deletions(-)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 7af555f0e60c..a1874f2932ee 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -68,6 +68,7 @@ struct clk_core {
unsigned long   max_rate;
unsigned long   accuracy;
int phase;
+   struct clk_duty duty;
struct hlist_head   children;
struct hlist_node   child_node;
struct hlist_head   clks;
@@ -2401,6 +2402,172 @@ int clk_get_phase(struct clk *clk)
 }
 EXPORT_SYMBOL_GPL(clk_get_phase);
 
+static void clk_core_reset_duty_cycle_nolock(struct clk_core *core)
+{
+   /* Assume a default value of 50% */
+   core->duty.num = 1;
+   core->duty.den = 2;
+}
+
+static int clk_core_update_duty_cycle_parent_nolock(struct clk_core *core);
+
+static int clk_core_update_duty_cycle_nolock(struct clk_core *core)
+{
+   struct clk_duty *duty = &core->duty;
+   int ret = 0;
+
+   if (!core->ops->get_duty_cycle)
+   return clk_core_update_duty_cycle_parent_nolock(core);
+
+   ret = core->ops->get_duty_cycle(core->hw, duty);
+   if (ret)
+   goto reset;
+
+   /* Don't trust the clock provider too much */
+   if (duty->den == 0 || duty->num > duty->den) {
+   ret = -EINVAL;
+   goto reset;
+   }
+
+   return 0;
+
+reset:
+   clk_core_reset_duty_cycle_nolock(core);
+   return ret;
+}
+
+static int clk_core_update_duty_cycle_parent_nolock(struct clk_core *core)
+{
+   int ret = 0;
+
+   if (core->parent &&
+   core->flags & CLK_DUTY_CYCLE_PARENT) {
+   ret = clk_core_update_duty_cycle_nolock(core->parent);
+   memcpy(&core->duty, &core->parent->duty, sizeof(core->duty));
+   } else {
+   clk_core_reset_duty_cycle_nolock(core);
+   }
+
+   return ret;
+}
+
+static int clk_core_set_duty_cycle_parent_nolock(struct clk_core *core,
+struct clk_duty *duty);
+
+static int clk_core_set_duty_cycle_nolock(struct clk_core *core,
+ struct clk_duty *duty)
+{
+   int ret;
+
+   lockdep_assert_held(&prepare_lock);
+
+   if (clk_core_rate_is_protected(core))
+   return -EBUSY;
+
+   trace_clk_set_duty_cycle(core, duty);
+
+   if (!core->ops->set_duty_cycle)
+   return clk_core_set_duty_cycle_parent_nolock(core, duty);
+
+   ret = core->ops->set_duty_cycle(core->hw, duty);
+   if (!ret)
+   memcpy(&core->duty, duty, sizeof(*duty));
+
+   trace_clk_set_duty_cycle_complete(core, duty);
+
+   return ret;
+}
+
+static int clk_core_set_duty_cycle_parent_nolock(struct clk_core *core,
+   

[PATCH v7 05/11] ARM: smp: Add initialization of CNTVOFF

2018-04-20 Thread Mylène Josserand
The CNTVOFF register from arch timer is uninitialized.
It should be done by the bootloader but it is currently not the case,
even for boot CPU because this SoC is booting in secure mode.
It leads to an random offset value meaning that each CPU will have a
different time, which isn't working very well.

Add assembly code used for boot CPU and secondary CPU cores to make
sure that the CNTVOFF register is initialized. Because this code can
be used by different platforms, add this assembly file in ARM's common
folder.

Signed-off-by: Mylène Josserand 
Reviewed-by: Geert Uytterhoeven 
Tested-by: Geert Uytterhoeven 
---
 arch/arm/common/Makefile  |  1 +
 arch/arm/common/secure_cntvoff.S  | 31 +++
 arch/arm/include/asm/secure_cntvoff.h |  8 
 3 files changed, 40 insertions(+)
 create mode 100644 arch/arm/common/secure_cntvoff.S
 create mode 100644 arch/arm/include/asm/secure_cntvoff.h

diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 70b4a14ed993..1e9f7af8f70f 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_DMABOUNCE)   += dmabounce.o
 obj-$(CONFIG_SHARP_LOCOMO) += locomo.o
 obj-$(CONFIG_SHARP_PARAM)  += sharpsl_param.o
 obj-$(CONFIG_SHARP_SCOOP)  += scoop.o
+obj-$(CONFIG_SMP)  += secure_cntvoff.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
 obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o 
vlock.o
 CFLAGS_REMOVE_mcpm_entry.o = -pg
diff --git a/arch/arm/common/secure_cntvoff.S b/arch/arm/common/secure_cntvoff.S
new file mode 100644
index ..68a4a8344319
--- /dev/null
+++ b/arch/arm/common/secure_cntvoff.S
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2014 Renesas Electronics Corporation
+ *
+ * Initialization of CNTVOFF register from secure mode
+ *
+ */
+
+#include 
+#include 
+
+ENTRY(secure_cntvoff_init)
+   .arch   armv7-a
+   /*
+* CNTVOFF has to be initialized either from non-secure Hypervisor
+* mode or secure Monitor mode with SCR.NS==1. If TrustZone is enabled
+* then it should be handled by the secure code
+*/
+   cps #MON_MODE
+   mrc p15, 0, r1, c1, c1, 0   /* Get Secure Config */
+   orr r0, r1, #1
+   mcr p15, 0, r0, c1, c1, 0   /* Set Non Secure bit */
+   isb
+   mov r0, #0
+   mcrrp15, 4, r0, r0, c14 /* CNTVOFF = 0 */
+   isb
+   mcr p15, 0, r1, c1, c1, 0   /* Set Secure bit */
+   isb
+   cps #SVC_MODE
+   ret lr
+ENDPROC(secure_cntvoff_init)
diff --git a/arch/arm/include/asm/secure_cntvoff.h 
b/arch/arm/include/asm/secure_cntvoff.h
new file mode 100644
index ..1f93aee1f630
--- /dev/null
+++ b/arch/arm/include/asm/secure_cntvoff.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASMARM_ARCH_CNTVOFF_H
+#define __ASMARM_ARCH_CNTVOFF_H
+
+extern void secure_cntvoff_init(void);
+
+#endif
-- 
2.11.0



[PATCH v7 09/11] ARM: sun8i: smp: Add support for A83T

2018-04-20 Thread Mylène Josserand
Add the support for A83T.

A83T SoC has an additional register than A80 to handle CPU configurations:
R_CPUS_CFG. Information about the register comes from Allwinner's BSP
driver.
An important difference is the Power Off Gating register for clusters
which is BIT(4) in case of SUN9I-A80 and BIT(0) in case of SUN8I-A83T.
There is also a bit swap between sun8i-a83t and sun9i-a80 that must be
handled.

Signed-off-by: Mylène Josserand 
---
 arch/arm/mach-sunxi/Kconfig  |   2 +-
 arch/arm/mach-sunxi/mc_smp.c | 151 ++-
 2 files changed, 137 insertions(+), 16 deletions(-)

diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig
index ce53ceaf4cc5..d9c8ecf88ec6 100644
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@@ -51,7 +51,7 @@ config MACH_SUN9I
 config ARCH_SUNXI_MC_SMP
bool
depends on SMP
-   default MACH_SUN9I
+   default MACH_SUN9I || MACH_SUN8I
select ARM_CCI400_PORT_CTRL
select ARM_CPU_SUSPEND
 
diff --git a/arch/arm/mach-sunxi/mc_smp.c b/arch/arm/mach-sunxi/mc_smp.c
index 48e5f4db64b6..4fd2267ecb04 100644
--- a/arch/arm/mach-sunxi/mc_smp.c
+++ b/arch/arm/mach-sunxi/mc_smp.c
@@ -55,22 +55,31 @@
 #define CPUCFG_CX_RST_CTRL_L2_RST  BIT(8)
 #define CPUCFG_CX_RST_CTRL_CX_RST(n)   BIT(4 + (n))
 #define CPUCFG_CX_RST_CTRL_CORE_RST(n) BIT(n)
+#define CPUCFG_CX_RST_CTRL_CORE_RST_ALL(0xf << 0)
 
 #define PRCM_CPU_PO_RST_CTRL(c)(0x4 + 0x4 * (c))
 #define PRCM_CPU_PO_RST_CTRL_CORE(n)   BIT(n)
 #define PRCM_CPU_PO_RST_CTRL_CORE_ALL  0xf
 #define PRCM_PWROFF_GATING_REG(c)  (0x100 + 0x4 * (c))
+/* The power off register for clusters are different from a80 and a83t */
+#define PRCM_PWROFF_GATING_REG_CLUSTER_SUN8I   BIT(0)
 #define PRCM_PWROFF_GATING_REG_CLUSTER_SUN9I   BIT(4)
 #define PRCM_PWROFF_GATING_REG_CORE(n) BIT(n)
 #define PRCM_PWR_SWITCH_REG(c, cpu)(0x140 + 0x10 * (c) + 0x4 * (cpu))
 #define PRCM_CPU_SOFT_ENTRY_REG0x164
 
+/* R_CPUCFG registers, specific to sun8i-a83t */
+#define R_CPUCFG_CLUSTER_PO_RST_CTRL(c)(0x30 + (c) * 0x4)
+#define R_CPUCFG_CLUSTER_PO_RST_CTRL_CORE(n)   BIT(n)
+#define R_CPUCFG_CPU_SOFT_ENTRY_REG0x01a4
+
 #define CPU0_SUPPORT_HOTPLUG_MAGIC00xFA50392F
 #define CPU0_SUPPORT_HOTPLUG_MAGIC10x790DCA3A
 
 static void __iomem *cpucfg_base;
 static void __iomem *prcm_base;
 static void __iomem *sram_b_smp_base;
+static void __iomem *r_cpucfg_base;
 
 extern void sunxi_mc_smp_secondary_startup(void);
 extern void sunxi_mc_smp_resume(void);
@@ -161,6 +170,16 @@ static int sunxi_cpu_powerup(unsigned int cpu, unsigned 
int cluster)
reg &= ~PRCM_CPU_PO_RST_CTRL_CORE(cpu);
writel(reg, prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
 
+   if (is_a83t) {
+   /* assert cpu power-on reset */
+   reg  = readl(r_cpucfg_base +
+R_CPUCFG_CLUSTER_PO_RST_CTRL(cluster));
+   reg &= ~(R_CPUCFG_CLUSTER_PO_RST_CTRL_CORE(cpu));
+   writel(reg, r_cpucfg_base +
+  R_CPUCFG_CLUSTER_PO_RST_CTRL(cluster));
+   udelay(10);
+   }
+
/* Cortex-A7: hold L1 reset disable signal low */
if (!sunxi_core_is_cortex_a15(cpu, cluster)) {
reg = readl(cpucfg_base + CPUCFG_CX_CTRL_REG0(cluster));
@@ -184,17 +203,38 @@ static int sunxi_cpu_powerup(unsigned int cpu, unsigned 
int cluster)
/* open power switch */
sunxi_cpu_power_switch_set(cpu, cluster, true);
 
+   /* Handle A83T bit swap */
+   if (is_a83t) {
+   if (cpu == 0)
+   cpu = 4;
+   }
+
/* clear processor power gate */
reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster));
reg &= ~PRCM_PWROFF_GATING_REG_CORE(cpu);
writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster));
udelay(20);
 
+   /* Handle A83T bit swap */
+   if (is_a83t) {
+   if (cpu == 4)
+   cpu = 0;
+   }
+
/* de-assert processor power-on reset */
reg = readl(prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
reg |= PRCM_CPU_PO_RST_CTRL_CORE(cpu);
writel(reg, prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
 
+   if (is_a83t) {
+   reg  = readl(r_cpucfg_base +
+R_CPUCFG_CLUSTER_PO_RST_CTRL(cluster));
+   reg |= R_CPUCFG_CLUSTER_PO_RST_CTRL_CORE(cpu);
+   writel(reg, r_cpucfg_base +
+  R_CPUCFG_CLUSTER_PO_RST_CTRL(cluster));
+   udelay(10);
+   }
+
/* de-assert all processor resets */
reg = readl(cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
reg |= CPUCFG_CX_RST_CTRL_DBG_RST(cpu);
@@ -216,6 +256,14 @@ static int sunxi_cluster_powerup(unsigned int cluster)
if (cluster >= SUNXI_NR_CLUSTERS)
return -EINVAL;
 
+   /* For A83T, assert cluster cores resets

[PATCH v7 08/11] ARM: sun9i: smp: Add is_a83t field

2018-04-20 Thread Mylène Josserand
To prepare the support of sun8i-a83t, add a field in the smp_data
structure to know if we are on sun9i-a80 or sun8i-a83t.

Add also a global variable to retrieve which architecture we are
having.

Signed-off-by: Mylène Josserand 
---
 arch/arm/mach-sunxi/mc_smp.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/mach-sunxi/mc_smp.c b/arch/arm/mach-sunxi/mc_smp.c
index 03f021d0c73e..48e5f4db64b6 100644
--- a/arch/arm/mach-sunxi/mc_smp.c
+++ b/arch/arm/mach-sunxi/mc_smp.c
@@ -74,6 +74,7 @@ static void __iomem *sram_b_smp_base;
 
 extern void sunxi_mc_smp_secondary_startup(void);
 extern void sunxi_mc_smp_resume(void);
+static int is_a83t;
 
 static bool sunxi_core_is_cortex_a15(unsigned int core, unsigned int cluster)
 {
@@ -624,6 +625,7 @@ struct sunxi_mc_smp_nodes {
 struct sunxi_mc_smp_data {
const char *enable_method;
int (*get_smp_nodes)(struct sunxi_mc_smp_nodes *nodes);
+   int is_a83t;
 };
 
 static void __init sunxi_mc_smp_put_nodes(struct sunxi_mc_smp_nodes *nodes)
@@ -697,6 +699,8 @@ static int __init sunxi_mc_smp_init(void)
break;
}
 
+   is_a83t = sunxi_mc_smp_data[i].is_a83t;
+
of_node_put(node);
if (ret)
return -ENODEV;
-- 
2.11.0



  1   2   3   4   5   6   7   8   9   10   >