[PATCH] f2fs: check cap_resource only for data blocks
This patch changes the rule to check cap_resource for data blocks, not inode or node blocks in order to avoid selinux denial. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9f2bc78a9f5b..8f3ad9662d13 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1611,7 +1611,7 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) } static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, - struct inode *inode) + struct inode *inode, bool cap) { if (!inode) return true; @@ -1624,7 +1624,7 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, if (!gid_eq(F2FS_OPTION(sbi).s_resgid, GLOBAL_ROOT_GID) && in_group_p(F2FS_OPTION(sbi).s_resgid)) return true; - if (capable(CAP_SYS_RESOURCE)) + if (cap && capable(CAP_SYS_RESOURCE)) return true; return false; } @@ -1659,7 +1659,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, avail_user_block_count = sbi->user_block_count - sbi->current_reserved_blocks; - if (!__allow_reserved_blocks(sbi, inode)) + if (!__allow_reserved_blocks(sbi, inode, true)) avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { @@ -1866,7 +1866,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, valid_block_count = sbi->total_valid_block_count + sbi->current_reserved_blocks + 1; - if (!__allow_reserved_blocks(sbi, inode)) + if (!__allow_reserved_blocks(sbi, inode, false)) valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; if (unlikely(valid_block_count > sbi->user_block_count)) { -- 2.17.0.484.g0c8726318c-goog
Re: general protection fault in smc_getsockopt
syzbot has found reproducer for the following crash on upstream commit 83beed7b2b26f232d782127792dd0cd4362fdc41 (Fri Apr 20 17:56:32 2018 +) Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=28a2c86cf19c81d871fa So far this crash happened 59 times on net-next, upstream. C reproducer: https://syzkaller.appspot.com/x/repro.c?id=6375334488309760 syzkaller reproducer: https://syzkaller.appspot.com/x/repro.syz?id=6112997885870080 Raw console output: https://syzkaller.appspot.com/x/log.txt?id=5942131738804224 Kernel config: https://syzkaller.appspot.com/x/.config?id=1808800213120130118 compiler: gcc (GCC) 8.0.1 20180413 (experimental) IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+28a2c86cf19c81d87...@syzkaller.appspotmail.com It will help syzbot understand when the bug is fixed. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 4492 Comm: syzkaller771634 Not tainted 4.17.0-rc1+ #10 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:smc_getsockopt+0x8b/0x120 net/smc/af_smc.c:1298 RSP: 0018:8801d90b7cc0 EFLAGS: 00010206 RAX: dc00 RBX: RCX: 2000 RDX: 0005 RSI: 873e3d16 RDI: 0028 RBP: 8801d90b7cf0 R08: 2040 R09: ed0036a05800 R10: ed0036a05800 R11: 8801b502c003 R12: 8801d90b7d40 R13: R14: 0008 R15: 2000 FS: 02017880() GS:8801dae0() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 2040 CR3: 0001ac552000 CR4: 001406f0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: __sys_getsockopt+0x1a5/0x370 net/socket.c:1940 __do_sys_getsockopt net/socket.c:1951 [inline] __se_sys_getsockopt net/socket.c:1948 [inline] __x64_sys_getsockopt+0xbe/0x150 net/socket.c:1948 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x43fcf9 RSP: 002b:7ffcb16b9c58 EFLAGS: 0217 ORIG_RAX: 0037 RAX: ffda RBX: 004002c8 RCX: 0043fcf9 RDX: 0008 RSI: RDI: 0003 RBP: 006ca018 R08: 2040 R09: 004002c8 R10: 2000 R11: 0217 R12: 00401620 R13: 004016b0 R14: R15: Code: fa 48 c1 ea 03 80 3c 02 00 0f 85 93 00 00 00 48 8b 9b 50 04 00 00 48 b8 00 00 00 00 00 fc ff df 48 8d 7b 28 48 89 fa 48 c1 ea 03 <80> 3c 02 00 75 62 48 b8 00 00 00 00 00 fc ff df 4c 8b 63 28 49 RIP: smc_getsockopt+0x8b/0x120 net/smc/af_smc.c:1298 RSP: 8801d90b7cc0 ---[ end trace 7e67761582d7c7ee ]---
[PATCH] Revert "drm/sun4i: add lvds mode_valid function"
From: Ondrej Jirman The reverted commit broke LVDS output on TBS A711 Tablet. That tablet has simple-panel node that has fixed pixel clock-frequency that A83T SoC used in the tablet can't generate exactly. Requested rate is 5200 and rounded_rate is calculated as 51857142. It's close enough for it to work in practice, but with strict check in the reverted commit, the mode is rejected needlessly in this case. DT allows to specify a range of values for simple-panel/clock-frequency, but driver doesn't respect that ATM. Given that TBS A711 is the single user of sun4i-lvds driver, let's revert that commit for now, until a better solution for the problem is found. Also see: https://patchwork.kernel.org/patch/9446385/ for relevant discussion (or search for "[RFC] drm/sun4i: rgb: Add 5% tolerance to dot clock frequency check"). Fixes: e4e4b7ad50cf ("drm/sun4i: add lvds mode_valid function") Reported-by: Ondrej Jirman Signed-off-by: Ondrej Jirman --- drivers/gpu/drm/sun4i/sun4i_lvds.c | 55 -- 1 file changed, 55 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_lvds.c b/drivers/gpu/drm/sun4i/sun4i_lvds.c index b4c9fbf5..be3f14d7746d 100644 --- a/drivers/gpu/drm/sun4i/sun4i_lvds.c +++ b/drivers/gpu/drm/sun4i/sun4i_lvds.c @@ -94,64 +94,9 @@ static void sun4i_lvds_encoder_disable(struct drm_encoder *encoder) } } -static enum drm_mode_status sun4i_lvds_encoder_mode_valid(struct drm_encoder *crtc, - const struct drm_display_mode *mode) -{ - struct sun4i_lvds *lvds = drm_encoder_to_sun4i_lvds(crtc); - struct sun4i_tcon *tcon = lvds->tcon; - u32 hsync = mode->hsync_end - mode->hsync_start; - u32 vsync = mode->vsync_end - mode->vsync_start; - unsigned long rate = mode->clock * 1000; - long rounded_rate; - - DRM_DEBUG_DRIVER("Validating modes...\n"); - - if (hsync < 1) - return MODE_HSYNC_NARROW; - - if (hsync > 0x3ff) - return MODE_HSYNC_WIDE; - - if ((mode->hdisplay < 1) || (mode->htotal < 1)) - return MODE_H_ILLEGAL; - - if ((mode->hdisplay > 0x7ff) || (mode->htotal > 0xfff)) - return MODE_BAD_HVALUE; - - DRM_DEBUG_DRIVER("Horizontal parameters OK\n"); - - if (vsync < 1) - return MODE_VSYNC_NARROW; - - if (vsync > 0x3ff) - return MODE_VSYNC_WIDE; - - if ((mode->vdisplay < 1) || (mode->vtotal < 1)) - return MODE_V_ILLEGAL; - - if ((mode->vdisplay > 0x7ff) || (mode->vtotal > 0xfff)) - return MODE_BAD_VVALUE; - - DRM_DEBUG_DRIVER("Vertical parameters OK\n"); - - tcon->dclk_min_div = 7; - tcon->dclk_max_div = 7; - rounded_rate = clk_round_rate(tcon->dclk, rate); - if (rounded_rate < rate) - return MODE_CLOCK_LOW; - - if (rounded_rate > rate) - return MODE_CLOCK_HIGH; - - DRM_DEBUG_DRIVER("Clock rate OK\n"); - - return MODE_OK; -} - static const struct drm_encoder_helper_funcs sun4i_lvds_enc_helper_funcs = { .disable= sun4i_lvds_encoder_disable, .enable = sun4i_lvds_encoder_enable, - .mode_valid = sun4i_lvds_encoder_mode_valid, }; static const struct drm_encoder_funcs sun4i_lvds_enc_funcs = { -- 2.17.0
[PATCH] Revert "f2fs: introduce f2fs_set_page_dirty_nobuffer"
This patch reverts copied f2fs_set_page_dirty_nobuffer to use generic function for stability. This reverts commit fe76b796fc5194cc3d57265002e3a748566d073f. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 33 + fs/f2fs/f2fs.h | 1 - fs/f2fs/node.c | 2 +- 4 files changed, 3 insertions(+), 35 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2e23b953d304..8fc55d42ba25 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -384,7 +384,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); if (!PageDirty(page)) { - f2fs_set_page_dirty_nobuffers(page); + __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); SetPagePrivate(page); f2fs_trace_pid(page); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d32bf363cb60..5477fc09c3cd 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -19,8 +19,6 @@ #include #include #include -#include -#include #include #include @@ -2471,35 +2469,6 @@ int f2fs_release_page(struct page *page, gfp_t wait) return 1; } -/* - * This was copied from __set_page_dirty_buffers which gives higher performance - * in very high speed storages. (e.g., pmem) - */ -void f2fs_set_page_dirty_nobuffers(struct page *page) -{ - struct address_space *mapping = page->mapping; - unsigned long flags; - - if (unlikely(!mapping)) - return; - - spin_lock(&mapping->private_lock); - lock_page_memcg(page); - SetPageDirty(page); - spin_unlock(&mapping->private_lock); - - xa_lock_irqsave(&mapping->i_pages, flags); - WARN_ON_ONCE(!PageUptodate(page)); - account_page_dirtied(page, mapping); - radix_tree_tag_set(&mapping->i_pages, - page_index(page), PAGECACHE_TAG_DIRTY); - xa_unlock_irqrestore(&mapping->i_pages, flags); - unlock_page_memcg(page); - - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return; -} - static int f2fs_set_data_page_dirty(struct page *page) { struct address_space *mapping = page->mapping; @@ -2523,7 +2492,7 @@ static int f2fs_set_data_page_dirty(struct page *page) } if (!PageDirty(page)) { - f2fs_set_page_dirty_nobuffers(page); + __set_page_dirty_nobuffers(page); update_dirty_page(inode, page); return 1; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0c72908f3185..9f2bc78a9f5b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2939,7 +2939,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); -void f2fs_set_page_dirty_nobuffers(struct page *page); int __f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc, enum iostat_type io_type); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f202398e20ea..ae83ca9d2d31 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1753,7 +1753,7 @@ static int f2fs_set_node_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); if (!PageDirty(page)) { - f2fs_set_page_dirty_nobuffers(page); + __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); SetPagePrivate(page); f2fs_trace_pid(page); -- 2.17.0.484.g0c8726318c-goog
Re: general protection fault in smc_setsockopt
syzbot has found reproducer for the following crash on upstream commit 83beed7b2b26f232d782127792dd0cd4362fdc41 (Fri Apr 20 17:56:32 2018 +) Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=9045fc589fcd196ef522 So far this crash happened 124 times on net-next, upstream. C reproducer: https://syzkaller.appspot.com/x/repro.c?id=6522155797839872 syzkaller reproducer: https://syzkaller.appspot.com/x/repro.syz?id=5566093930266624 Raw console output: https://syzkaller.appspot.com/x/log.txt?id=6661555940753408 Kernel config: https://syzkaller.appspot.com/x/.config?id=1808800213120130118 compiler: gcc (GCC) 8.0.1 20180413 (experimental) IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+9045fc589fcd196ef...@syzkaller.appspotmail.com It will help syzbot understand when the bug is fixed. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 4520 Comm: syzkaller696326 Not tainted 4.17.0-rc1+ #10 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:smc_setsockopt+0x8b/0x120 net/smc/af_smc.c:1287 RSP: 0018:8801b433fcc8 EFLAGS: 00010206 RAX: dc00 RBX: RCX: 2000 RDX: 0005 RSI: 873e3bf6 RDI: 0028 RBP: 8801b433fcf8 R08: R09: ed00359a3780 R10: ed00359a3780 R11: 8801acd1bc03 R12: 8801b433fd40 R13: 0021 R14: 000d R15: 2000 FS: 00b01880() GS:8801daf0() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 562e1fa410d0 CR3: 0001acf1e000 CR4: 001406e0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: __sys_setsockopt+0x1bd/0x390 net/socket.c:1903 __do_sys_setsockopt net/socket.c:1914 [inline] __se_sys_setsockopt net/socket.c:1911 [inline] __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1911 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x43fd19 RSP: 002b:7ffccc4960f8 EFLAGS: 0217 ORIG_RAX: 0036 RAX: ffda RBX: 004002c8 RCX: 0043fd19 RDX: 000d RSI: 0021 RDI: 0004 RBP: 006ca018 R08: R09: 004002c8 R10: 2000 R11: 0217 R12: 00401640 R13: 004016d0 R14: R15: Code: fa 48 c1 ea 03 80 3c 02 00 0f 85 93 00 00 00 48 8b 9b 50 04 00 00 48 b8 00 00 00 00 00 fc ff df 48 8d 7b 28 48 89 fa 48 c1 ea 03 <80> 3c 02 00 75 62 48 b8 00 00 00 00 00 fc ff df 4c 8b 63 28 49 RIP: smc_setsockopt+0x8b/0x120 net/smc/af_smc.c:1287 RSP: 8801b433fcc8 ---[ end trace 3858d0cd9ce5e4d4 ]---
loan offer
DO YOU NEED FINANCIAL HELP? $5000 to $20,000,000.00 No credit check Repaid over 1 year to maximum of 30 years at a low interest rate of 3%. Approval in 15-30 minutes Open 7 days a week from 24/7 Service available nationwide E-MAIL globalsolutionfinanc...@yahoo.com
[PATCH] staging: android: ion: remove duplicate buffer field initializes
As a result of various previous patches, ion_buffer_create() now has two sets of identical statements for initializing two fields of the buffer struct, next to each other. Remove one set. Move the initialization of these two fields together with the statements that initialize the other two fields from the function parameters, prior to the heap allocate() call, for consistency. Signed-off-by: Todd Poynor --- drivers/staging/android/ion/ion.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index e74db7902549..269a431646be 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -74,6 +74,8 @@ static struct ion_buffer *ion_buffer_create(struct ion_heap *heap, buffer->heap = heap; buffer->flags = flags; + buffer->dev = dev; + buffer->size = len; ret = heap->ops->allocate(heap, buffer, len, flags); @@ -93,11 +95,7 @@ static struct ion_buffer *ion_buffer_create(struct ion_heap *heap, goto err1; } - buffer->dev = dev; - buffer->size = len; - buffer->dev = dev; - buffer->size = len; INIT_LIST_HEAD(&buffer->attachments); mutex_init(&buffer->lock); mutex_lock(&dev->buffer_lock); -- 2.17.0.484.g0c8726318c-goog
[no subject]
I Mikhail Fridman. has selected you specially as one of my beneficiaries for my Charitable Donation, Just as I have declared on May 23, 2016 to give my fortune as charity. Check the link below for confirmation: http://www.ibtimes.co.uk/russias-second-wealthiest-man-mikhail-fridman-plans-leaving-14-2bn-fortune-charity-1561604 Reply as soon as possible with further directives. Best Regards, Mikhail Fridman.
[no subject]
I Mikhail Fridman. has selected you specially as one of my beneficiaries for my Charitable Donation, Just as I have declared on May 23, 2016 to give my fortune as charity. Check the link below for confirmation: http://www.ibtimes.co.uk/russias-second-wealthiest-man-mikhail-fridman-plans-leaving-14-2bn-fortune-charity-1561604 Reply as soon as possible with further directives. Best Regards, Mikhail Fridman.
Re: [RFC PATCH] dt-bindings: add a jsonschema binding example
On Fri, Apr 20, 2018 at 6:41 PM, Stephen Boyd wrote: > Quoting Rob Herring (2018-04-20 11:15:04) >> On Fri, Apr 20, 2018 at 11:47 AM, Stephen Boyd wrote: >> > Quoting Rob Herring (2018-04-18 15:29:05) >> >> diff --git a/Documentation/devicetree/bindings/example-schema.yaml >> >> b/Documentation/devicetree/bindings/example-schema.yaml >> >> new file mode 100644 >> >> index ..fe0a3bd1668e >> >> --- /dev/null >> >> +++ b/Documentation/devicetree/bindings/example-schema.yaml >> >> @@ -0,0 +1,149 @@ >> >> + >> >> + The end of the description is marked by indentation less than the >> >> first line >> >> + in the description. >> >> + >> >> +select: false >> >> + # 'select' is a schema applied to a DT node to determine if this >> >> binding >> >> + # schema should be applied to the node. It is optional and by default >> >> the >> >> + # possible compatible strings are extracted and used to match. >> > >> > Can we get a concrete example here? >> >> select: true >> >> :) Which is apply to every node. >> >> A better one is from the memory node schema ('$nodename' gets added : >> >> select: >> required: ["$nodename"] >> properties: >> $nodename: >> oneOf: >> - pattern: "^memory@[0-9a-f]*" >> - const: "memory" # 'memory' only allowed for selecting >> >> >> I expect the vast majority of device bindings will not use select at >> all and rely on compatible string matching. > > Thanks! I was looking to see how the select syntax would work and this > shows one example nicely. I suppose another way would be to show how a > compatible string would be matched through select, even though it's > redundant. > > Is there a way we can enforce node names through the schema too? For > example to enforce that a clock controller is called 'clock-controller' > or a spi master is called 'spi'. Yes, that's something I'd like to do. I think the easiest is to just treat node name as a property. We already generate a $nodename property when parsing the yaml format DT. >> >> >> + >> >> +properties: >> > [...] >> >> + >> >> + interrupts: >> >> +# Either 1 or 2 interrupts can be present >> >> +minItems: 1 >> >> +maxItems: 2 >> >> +items: >> >> + - description: tx or combined interrupt >> >> + - description: rx interrupt >> >> + >> >> +description: | >> > >> > The '|' is needed to make yaml happy? >> >> Yes, this is simply how you do literal text blocks in yaml. >> >> We don't really need for this one really, but for the top-level >> 'description' we do. The long term intent is 'description' would be >> written in sphinx/rst and can be extracted into the DT spec (for >> common bindings). Grant has experimented with that some. > > Ok. That sounds cool. Then we could embed links to datasheets and SVGs > too. > >> >> >> + A variable number of interrupts warrants a description of what >> >> conditions >> >> + affect the number of interrupts. Otherwise, descriptions on >> >> standard >> >> + properties are not necessary. >> >> + >> >> + interrupt-names: >> >> +# minItems must be specified here because the default would be 2 >> >> +minItems: 1 >> >> +items: >> >> + - const: "tx irq" >> >> + - const: "rx irq" >> >> + >> >> + # Property names starting with '#' must be quoted >> >> + '#interrupt-cells': >> >> +# A simple case where the value must always be '2'. >> >> +# The core schema handles that this must be a single integer. >> >> +const: 2 >> >> + >> >> + interrupt-controller: {} >> > >> > Does '{}' mean nothing to see here? >> >> Yes. It's just an empty schema that's always valid. > > Could we include another schema to indicate that this is an interrupt > controller? I'm sort of asking for multi-schema inheritance. Yes, but there's no need to do that here. Another schema can select on "interrupt-controller" property and be applied independently. There's already an example of that for the root node in my yaml-bindings repo. Rob
Re: [RFC PATCH] dt-bindings: add a jsonschema binding example
On Fri, Apr 20, 2018 at 4:00 PM, Frank Rowand wrote: > Hi Rob, > > Thanks for the example. It was a good starting tutorial of sorts for me > to understand the format a bit. > > > On 04/18/18 15:29, Rob Herring wrote: >> The current DT binding documentation format of freeform text is painful >> to write, review, validate and maintain. >> >> This is just an example of what a binding in the schema format looks >> like. It's using jsonschema vocabulary in a YAML encoded document. Using >> jsonschema gives us access to existing tooling. A YAML encoding gives us >> something easy to edit. >> >> This example is just the tip of the iceberg, but it the part most >> developers writing bindings will interact with. Backing all this up >> are meta-schema (to validate the binding schemas), some DT core schema, >> YAML encoded DT output with dtc, and a small number of python scripts to >> run validation. The gory details including how to run end-to-end >> validation can be found here: >> >> https://www.spinics.net/lists/devicetree-spec/msg00649.html >> >> Signed-off-by: Rob Herring >> --- >> Cc list, >> You all review and/or write lots of binding documents. I'd like some feedback >> on the format. >> >> Thanks, >> Rob >> >> .../devicetree/bindings/example-schema.yaml| 149 >> + >> 1 file changed, 149 insertions(+) >> create mode 100644 Documentation/devicetree/bindings/example-schema.yaml >> >> diff --git a/Documentation/devicetree/bindings/example-schema.yaml >> b/Documentation/devicetree/bindings/example-schema.yaml >> new file mode 100644 >> index ..fe0a3bd1668e >> --- /dev/null >> +++ b/Documentation/devicetree/bindings/example-schema.yaml > > I'm guessing by the path name that this is in the Linux kernel source tree. Yes, well, my kernel tree. Most of the work still lives here: https://github.com/robherring/yaml-bindings/ >> @@ -0,0 +1,149 @@ >> +# SPDX-License-Identifier: BSD-2-Clause > > If in the Linux kernel source tree, then allow gpl-v2 as a possible license. Why? BSD is compatible. The license of the above repo is all BSD. Of course there's all the existing docs which default to GPLv2 and we'll probably have to maintain that. >> +# Copyright 2018 Linaro Ltd. >> +%YAML 1.2 >> +--- >> +# All the top-level keys are standard json-schema keywords except for >> +# 'maintainers' and 'select' >> + >> +# $id is a unique idenifier based on the filename > > ^ identifier > >> +$id: "http://devicetree.org/schemas/example-schema.yaml#"; > > Does this imply that all schemas will be at devicetree.org instead > of in the Linux kernel source tree? This would be counter to my > earlier guess about where this patch is applied. They could be, but not necessarily. This is just convention in jsonschema is the best I understand it. I don't think you'd want validation to require an internet connection. For the base meta-schema, for example, it does exist at http://json-schema.org/draft-06/schema, but that's also distributed with implementations of jsonschema validators. A large part (not that any part is large) of the tools Grant and I have written is doing the cross reference resolution of files which uses the $id field. >> +$schema: "http://devicetree.org/meta-schemas/core.yaml#"; > > How is $schema used? Tells the validator what meta-schema this schema follows. Typically you see draft04 or draft06 here if you haven't written a meta-schema. > Is it accessed across the network? Could be, but generally no. >> + >> +# Only 1 version supported for now >> +version: 1 >> + >> +title: An example schema annotated with jsonschema details >> + >> +maintainers: >> + - Rob Herring >> + >> +description: | >> + A more detailed multi-line description of the binding. >> + >> + Details about the hardware device and any links to datasheets can go here. >> + >> + The end of the description is marked by indentation less than the first >> line >> + in the description. >> + >> +select: false >> + # 'select' is a schema applied to a DT node to determine if this binding >> + # schema should be applied to the node. It is optional and by default the >> + # possible compatible strings are extracted and used to match. > > My first reaction was that 'node' should somehow be included in the name > of 'select'. But my second thought was that maybe 'node' is implied, > because every schema file describes a single node??? This is where my > lack of knowledge kicks in - I'll go read stuff in your yaml-bindings > repo to get a better background... Yes, schema are applied to nodes and most of the current uses of select are based on node name. We walk the DT and apply all the schema for a node that select (either in the doc or generated from compatible) is valid. >> + >> +properties: >> + # A dictionary of DT properties for this binding schema >> + compatible: >> +# More complicated schema can use oneOf (XOR), anyOf (OR), or allOf >> (AND) >> +# to h
Re: [PATCH 5/5] x86, pti: filter at vma->vm_page_prot population
Dave Hansen wrote: > > From: Dave Hansen > > 0day reported warnings at boot on 32-bit systems without NX support: > > [ 12.349193] attempted to set unsupported pgprot: 8025 bits: > 8000 supported: 7fff > [ 12.350792] WARNING: CPU: 0 PID: 1 at arch/x86/include/asm/pgtable.h:540 > handle_mm_fault+0xfc1/0xfe0: > check_pgprot at > arch/x86/include/asm/pgtable.h:535 >(inlined by) pfn_pte at > arch/x86/include/asm/pgtable.h:549 >(inlined by) do_anonymous_page > at mm/memory.c:3169 >(inlined by) handle_pte_fault > at mm/memory.c:3961 >(inlined by) __handle_mm_fault > at mm/memory.c:4087 >(inlined by) handle_mm_fault > at mm/memory.c:4124 > > The problem was that we stopped massaging page permissions at PTE creation > time, so vma->vm_page_prot was passed unfiltered to PTE creation. > > To fix it, filter the page protections before they are installed in > vma->vm_page_prot. > > Signed-off-by: Dave Hansen > Reported-by: Fengguang Wu > Fixes: fb43d6cb91 ("x86/mm: Do not auto-massage page protections") > Cc: Andrea Arcangeli > Cc: Andy Lutomirski > Cc: Arjan van de Ven > Cc: Borislav Petkov > Cc: Dan Williams > Cc: David Woodhouse > Cc: Greg Kroah-Hartman > Cc: Hugh Dickins > Cc: Josh Poimboeuf > Cc: Juergen Gross > Cc: Kees Cook > Cc: Linus Torvalds > Cc: Nadav Amit > Cc: Peter Zijlstra > Cc: Thomas Gleixner > Cc: linux...@kvack.org > Cc: Ingo Molnar > --- > > b/arch/x86/Kconfig |4 > b/arch/x86/include/asm/pgtable.h |5 + > b/mm/mmap.c | 11 ++- > 3 files changed, 19 insertions(+), 1 deletion(-) > > diff -puN arch/x86/include/asm/pgtable.h~pti-glb-protection_map > arch/x86/include/asm/pgtable.h > --- a/arch/x86/include/asm/pgtable.h~pti-glb-protection_map 2018-04-20 > 14:10:08.251749151 -0700 > +++ b/arch/x86/include/asm/pgtable.h 2018-04-20 14:10:08.260749151 -0700 > @@ -601,6 +601,11 @@ static inline pgprot_t pgprot_modify(pgp > > #define canon_pgprot(p) __pgprot(massage_pgprot(p)) > > +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) > +{ > + return canon_pgprot(prot); > +} > + > static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, >enum page_cache_mode pcm, >enum page_cache_mode new_pcm) > diff -puN arch/x86/Kconfig~pti-glb-protection_map arch/x86/Kconfig > --- a/arch/x86/Kconfig~pti-glb-protection_map 2018-04-20 14:10:08.253749151 > -0700 > +++ b/arch/x86/Kconfig2018-04-20 14:10:08.260749151 -0700 > @@ -52,6 +52,7 @@ config X86 > select ARCH_HAS_DEVMEM_IS_ALLOWED > select ARCH_HAS_ELF_RANDOMIZE > select ARCH_HAS_FAST_MULTIPLIER > + select ARCH_HAS_FILTER_PGPROT > select ARCH_HAS_FORTIFY_SOURCE > select ARCH_HAS_GCOV_PROFILE_ALL > select ARCH_HAS_KCOVif X86_64 > @@ -273,6 +274,9 @@ config ARCH_HAS_CPU_RELAX > config ARCH_HAS_CACHE_LINE_SIZE > def_bool y > > +config ARCH_HAS_FILTER_PGPROT > + def_bool y > + > config HAVE_SETUP_PER_CPU_AREA > def_bool y > > diff -puN mm/mmap.c~pti-glb-protection_map mm/mmap.c > --- a/mm/mmap.c~pti-glb-protection_map2018-04-20 14:10:08.256749151 > -0700 > +++ b/mm/mmap.c 2018-04-20 14:10:08.261749151 -0700 > @@ -100,11 +100,20 @@ pgprot_t protection_map[16] __ro_after_i > __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 > }; > > +#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT > +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) > +{ > + return prot; > +} > +#endif > + > pgprot_t vm_get_page_prot(unsigned long vm_flags) > { > - return __pgprot(pgprot_val(protection_map[vm_flags & > + pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags & > (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | > pgprot_val(arch_vm_get_page_prot(vm_flags))); > + > + return arch_filter_pgprot(ret); > } > EXPORT_SYMBOL(vm_get_page_prot); Wouldn’t it be simpler or at least cleaner to change the protection map if NX is not supported? I presume it can be done paging_init() similarly to the way other archs (e.g., arm, mips) do.
RE: [RESEND PATCH 1/1] drm/i915/glk: Add MODULE_FIRMWARE for Geminilake
Hi all: We tested GLK DMC 1.04 FW in last week of September 2017, using the latest drm-tip version for that time (4.14.0-rc2) and according to our results we could declare this FW as acceptable and healthy to be used with kernel version 4.14 . However, we cannot guarantee quality and healthy of this FW if it is used in top of current drm-tip kernel (4.17-rc0). Best Regards Luis Botello -Original Message- From: Srivatsa, Anusha Sent: Friday, April 20, 2018 1:30 PM To: Vivi, Rodrigo ; Jani Nikula ; Botello Ortega, Luis ; Martinez Monroy, Elio Cc: Ian W MORRISON ; airl...@linux.ie; Greg KH ; intel-...@lists.freedesktop.org; linux-kernel@vger.kernel.org; sta...@vger.kernel.org; dri-de...@lists.freedesktop.org; Wajdeczko, Michal Subject: RE: [RESEND PATCH 1/1] drm/i915/glk: Add MODULE_FIRMWARE for Geminilake >-Original Message- >From: Vivi, Rodrigo >Sent: Friday, April 20, 2018 11:04 AM >To: Jani Nikula >Cc: Srivatsa, Anusha ; Ian W MORRISON >; airl...@linux.ie; Greg KH >; intel-...@lists.freedesktop.org; linux- >ker...@vger.kernel.org; sta...@vger.kernel.org; dri- >de...@lists.freedesktop.org; Wajdeczko, Michal > >Subject: Re: [RESEND PATCH 1/1] drm/i915/glk: Add MODULE_FIRMWARE for >Geminilake > >On Tue, Apr 17, 2018 at 12:02:52PM +0300, Jani Nikula wrote: >> On Mon, 16 Apr 2018, "Srivatsa, Anusha" wrote: >> >>-Original Message- >> >>From: Jani Nikula [mailto:jani.nik...@linux.intel.com] >> >>Sent: Wednesday, April 11, 2018 5:27 AM >> >>To: Ian W MORRISON >> >>Cc: Vivi, Rodrigo ; Srivatsa, Anusha >> >>; Wajdeczko, Michal >> >>; Greg KH ; >> >>airl...@linux.ie; joonas.lahti...@linux.intel.com; >> >>linux-kernel@vger.kernel.org; sta...@vger.kernel.org; >> >>intel-...@lists.freedesktop.org; dri- de...@lists.freedesktop.org >> >>Subject: Re: [RESEND PATCH 1/1] drm/i915/glk: Add MODULE_FIRMWARE >> >>for Geminilake >> >> >> >>On Wed, 11 Apr 2018, Ian W MORRISON wrote: >> >>> >> >>> >> >> NAK on indiscriminate Cc: stable. There are zero guarantees that >> older kernels will work with whatever firmware you throw at them. >> >> >>> >> >>> I included 'Cc: stable' so the patch would get added to the v4.16 >> >>> and >> >>> v4.15 kernels which I have tested with the patch. I found that >> >>> earlier kernels didn't support the 'linux-firmware' package >> >>> required to get wifi working on Intel's new Gemini Lake NUC. >> >> >> >>You realize that this patch should have nothing to do with wifi? >> >> >> >>Rodrigo, Anusha, if you think Cc: stable is appropriate, please >> >>indicate the specific versions of stable it is appropriate for. >> > >> > Hi Jani, >> > >> > The stable kernel version is 4.12 and beyond. >> > It is appropriate to add the CC: stable in my opinion >> >> Who tested the firmware with v4.12 and later? We only have the CI >> results against *current* drm-tip. We don't even know about v4.16. >> > >I understand your concerns, but the problem was that our old process >was a bit >(lot?) messed and there was the unreliable time until the firmware >really lands on linux-firmware.git. So MODULE_FIRMWARE call was only >added after firmware was really there on firmware repository but it wasn't >about the testing. > >In other words, the bump version patch was merged after tested, but >MODULE_FIRMWARE was left behind because firmware blob took a while to >get pulled into linux-firmware.git and we end up forgetting to add it there. > >In my opinion it should be safe to add the MODULE_FIRMWARE there based >on the tests from when the version was bumped. Luis, Elio, can you guys confirm that this firmware is tested and healthy? And also, give a tested-by to this patch please? Thanks, Anusha >> I'm not going to ack and take responsibility for the stable backports >> unless someone actually comes forward with credible Tested-bys. >> >> BR, >> Jani. >> >> >> > >> > Anusha >> >>BR, >> >>Jani. >> >> >> >>> >> >> PS. How is this a "RESEND"? I haven't seen this before. >> >> >>> >> >>> It is a 'RESEND' for that very reason. I initially sent the patch >> >>> to the same people as a similar patch >> >>> (https://patchwork.kernel.org/patch/10143637/) however after >> >>> realising this omitted required addresses I added them and resent >> >>> the >patch. >> >>> >> >>> Best regards, >> >>> Ian >> >> >> >>-- >> >>Jani Nikula, Intel Open Source Technology Center >> >> -- >> Jani Nikula, Intel Open Source Technology Center >> ___ >> dri-devel mailing list >> dri-de...@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH 02/13] staging: iio: tsl2x7x: use GPL-2.0+ SPDX license identifier
The summary text for the GPL is not needed since the SPDX identifier is a legally binding shorthand that can be used instead. Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 10 +- drivers/staging/iio/light/tsl2x7x.h | 14 +- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index eeccfbb0eb1f..9cdcc8c9e812 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -5,15 +5,7 @@ * Copyright (c) 2012, TAOS Corporation. * Copyright (c) 2017-2018 Brian Masney * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. + * SPDX-License-Identifier: GPL-2.0+ */ #include diff --git a/drivers/staging/iio/light/tsl2x7x.h b/drivers/staging/iio/light/tsl2x7x.h index d382cdbb976e..992ee2465609 100644 --- a/drivers/staging/iio/light/tsl2x7x.h +++ b/drivers/staging/iio/light/tsl2x7x.h @@ -4,19 +4,7 @@ * * Copyright (c) 2012, TAOS Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * SPDX-License-Identifier: GPL-2.0+ */ #ifndef __TSL2X7X_H -- 2.14.3
[PATCH 03/13] staging: iio: tsl2x7x: don't return error in IRQ handler
tsl2x7x_event_handler() could return an error and this could cause the interrupt to remain masked. We shouldn't return an error in the interrupt handler so this patch always returns IRQ_HANDLED. An error will be logged if one occurs. Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index 9cdcc8c9e812..95a00b965c5e 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -1320,7 +1320,7 @@ static irqreturn_t tsl2x7x_event_handler(int irq, void *private) ret = tsl2x7x_read_status(chip); if (ret < 0) - return ret; + return IRQ_HANDLED; /* What type of interrupt do we need to process */ if (ret & TSL2X7X_STA_PRX_INTR) { @@ -1341,9 +1341,7 @@ static irqreturn_t tsl2x7x_event_handler(int irq, void *private) timestamp); } - ret = tsl2x7x_clear_interrupts(chip, TSL2X7X_CMD_PROXALS_INT_CLR); - if (ret < 0) - return ret; + tsl2x7x_clear_interrupts(chip, TSL2X7X_CMD_PROXALS_INT_CLR); return IRQ_HANDLED; } -- 2.14.3
[PATCH 04/13] staging: iio: tsl2x7x: simplify tsl2x7x_clear_interrupts function
tsl2x7x_clear_interrupts() takes a reg argument but there are only two callers to this function and both callers pass the same value. Since this function was introduced, interrupts are now working properly for this driver, and several unnecessary calls to tsl2x7x_clear_interrupts() were removed. This patch removes the tsl2x7x_clear_interrupts() function and replaces the two callers with the i2c_smbus_write_byte() call instead. Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 32 +++- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index 95a00b965c5e..f37fc74b8fbc 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -271,20 +271,6 @@ static const u8 device_channel_config[] = { ALSPRX2 }; -static int tsl2x7x_clear_interrupts(struct tsl2X7X_chip *chip, int reg) -{ - int ret; - - ret = i2c_smbus_write_byte(chip->client, - TSL2X7X_CMD_REG | TSL2X7X_CMD_SPL_FN | reg); - if (ret < 0) - dev_err(&chip->client->dev, - "%s: failed to clear interrupt status %x: %d\n", - __func__, reg, ret); - - return ret; -} - static int tsl2x7x_read_status(struct tsl2X7X_chip *chip) { int ret; @@ -714,9 +700,15 @@ static int tsl2x7x_chip_on(struct iio_dev *indio_dev) if (ret < 0) return ret; - ret = tsl2x7x_clear_interrupts(chip, TSL2X7X_CMD_PROXALS_INT_CLR); - if (ret < 0) + ret = i2c_smbus_write_byte(chip->client, + TSL2X7X_CMD_REG | TSL2X7X_CMD_SPL_FN | + TSL2X7X_CMD_PROXALS_INT_CLR); + if (ret < 0) { + dev_err(&chip->client->dev, + "%s: failed to clear interrupt status: %d\n", + __func__, ret); return ret; + } chip->tsl2x7x_chip_status = TSL2X7X_CHIP_WORKING; @@ -1341,7 +1333,13 @@ static irqreturn_t tsl2x7x_event_handler(int irq, void *private) timestamp); } - tsl2x7x_clear_interrupts(chip, TSL2X7X_CMD_PROXALS_INT_CLR); + ret = i2c_smbus_write_byte(chip->client, + TSL2X7X_CMD_REG | TSL2X7X_CMD_SPL_FN | + TSL2X7X_CMD_PROXALS_INT_CLR); + if (ret < 0) + dev_err(&chip->client->dev, + "%s: failed to clear interrupt status: %d\n", + __func__, ret); return IRQ_HANDLED; } -- 2.14.3
[PATCH 05/13] staging: iio: tsl2x7x: remove unnecessary chip status checks in suspend/resume
tsl2x7x_suspend() and tsl2x7x_resume() both check to see what the current chip status is. These checks are not necessary so this patch removes those checks. Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 16 ++-- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index f37fc74b8fbc..8d8af0cf9768 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -1682,27 +1682,15 @@ static int tsl2x7x_probe(struct i2c_client *clientp, static int tsl2x7x_suspend(struct device *dev) { struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct tsl2X7X_chip *chip = iio_priv(indio_dev); - int ret = 0; - - if (chip->tsl2x7x_chip_status == TSL2X7X_CHIP_WORKING) { - ret = tsl2x7x_chip_off(indio_dev); - chip->tsl2x7x_chip_status = TSL2X7X_CHIP_SUSPENDED; - } - return ret; + return tsl2x7x_chip_off(indio_dev); } static int tsl2x7x_resume(struct device *dev) { struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct tsl2X7X_chip *chip = iio_priv(indio_dev); - int ret = 0; - if (chip->tsl2x7x_chip_status == TSL2X7X_CHIP_SUSPENDED) - ret = tsl2x7x_chip_on(indio_dev); - - return ret; + return tsl2x7x_chip_on(indio_dev); } static int tsl2x7x_remove(struct i2c_client *client) -- 2.14.3
[PATCH 08/13] staging: iio: tsl2x7x: add range checking to three sysfs attributes
The sysfs attributes in_illuminance0_target_input, in_illuminance0_calibrate, and in_proximity0_calibrate did not have proper range checking in place so this patch adds the correct range checks. Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 25 +++-- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index 56730baea927..15bc0af1bb6c 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -835,9 +835,10 @@ static ssize_t in_illuminance0_target_input_store(struct device *dev, if (kstrtoul(buf, 0, &value)) return -EINVAL; - if (value) - chip->settings.als_cal_target = value; + if (value < 0 || value > 65535) + return -ERANGE; + chip->settings.als_cal_target = value; ret = tsl2x7x_invoke_change(indio_dev); if (ret < 0) return ret; @@ -853,14 +854,12 @@ static ssize_t in_illuminance0_calibrate_store(struct device *dev, bool value; int ret; - if (strtobool(buf, &value)) + if (kstrtobool(buf, &value) || !value) return -EINVAL; - if (value) { - ret = tsl2x7x_als_calibrate(indio_dev); - if (ret < 0) - return ret; - } + ret = tsl2x7x_als_calibrate(indio_dev); + if (ret < 0) + return ret; ret = tsl2x7x_invoke_change(indio_dev); if (ret < 0) @@ -946,14 +945,12 @@ static ssize_t in_proximity0_calibrate_store(struct device *dev, bool value; int ret; - if (strtobool(buf, &value)) + if (kstrtobool(buf, &value) || !value) return -EINVAL; - if (value) { - ret = tsl2x7x_prox_cal(indio_dev); - if (ret < 0) - return ret; - } + ret = tsl2x7x_prox_cal(indio_dev); + if (ret < 0) + return ret; ret = tsl2x7x_invoke_change(indio_dev); if (ret < 0) -- 2.14.3
[PATCH 06/13] staging: iio: tsl2x7x: simplify tsl2x7x_write_interrupt_config return
tsl2x7x_write_interrupt_config() has an unnecessary return value check at the end of the function. This patch changes the function to just return the value from the call to tsl2x7x_invoke_change(). Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index 8d8af0cf9768..d202bc7e1f4f 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -982,18 +982,13 @@ static int tsl2x7x_write_interrupt_config(struct iio_dev *indio_dev, int val) { struct tsl2X7X_chip *chip = iio_priv(indio_dev); - int ret; if (chan->type == IIO_INTENSITY) chip->settings.als_interrupt_en = val ? true : false; else chip->settings.prox_interrupt_en = val ? true : false; - ret = tsl2x7x_invoke_change(indio_dev); - if (ret < 0) - return ret; - - return 0; + return tsl2x7x_invoke_change(indio_dev); } static int tsl2x7x_write_event_value(struct iio_dev *indio_dev, -- 2.14.3
[PATCH 07/13] staging: iio: tsl2x7x: simplify device id verification
This patch renames tsl2x7x_device_id() to tsl2x7x_device_id_verif(), removes the unnecessary pointer on the id parameter, and only calls the verification function once. Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index d202bc7e1f4f..56730baea927 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -1277,22 +1277,22 @@ static DEVICE_ATTR_WO(in_proximity0_calibrate); static DEVICE_ATTR_RW(in_illuminance0_lux_table); /* Use the default register values to identify the Taos device */ -static int tsl2x7x_device_id(int *id, int target) +static int tsl2x7x_device_id_verif(int id, int target) { switch (target) { case tsl2571: case tsl2671: case tsl2771: - return (*id & 0xf0) == TRITON_ID; + return (id & 0xf0) == TRITON_ID; case tmd2671: case tmd2771: - return (*id & 0xf0) == HALIBUT_ID; + return (id & 0xf0) == HALIBUT_ID; case tsl2572: case tsl2672: case tmd2672: case tsl2772: case tmd2772: - return (*id & 0xf0) == SWORDFISH_ID; + return (id & 0xf0) == SWORDFISH_ID; } return -EINVAL; @@ -1612,8 +1612,7 @@ static int tsl2x7x_probe(struct i2c_client *clientp, if (ret < 0) return ret; - if ((!tsl2x7x_device_id(&ret, id->driver_data)) || - (tsl2x7x_device_id(&ret, id->driver_data) == -EINVAL)) { + if (tsl2x7x_device_id_verif(ret, id->driver_data) <= 0) { dev_info(&chip->client->dev, "%s: i2c device found does not match expected id\n", __func__); -- 2.14.3
[PATCH 09/13] staging: iio: tsl2x7x: move power and diode settings into header file
The power and diode defines are needed for the platform data so this patch moves the defines out of the .c file and into the header file. A comment for the diode is also cleaned up while this code is touched. Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 12 drivers/staging/iio/light/tsl2x7x.h | 12 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index 15bc0af1bb6c..87b99deef7a8 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -103,18 +103,6 @@ #define TSL2X7X_CNTL_PROXPON_ENBL 0x0F #define TSL2X7X_CNTL_INTPROXPON_ENBL 0x2F -/*Prox diode to use */ -#define TSL2X7X_DIODE0 0x01 -#define TSL2X7X_DIODE1 0x02 -#define TSL2X7X_DIODE_BOTH 0x03 - -/* LED Power */ -#define TSL2X7X_100_mA 0x00 -#define TSL2X7X_50_mA 0x01 -#define TSL2X7X_25_mA 0x02 -#define TSL2X7X_13_mA 0x03 -#define TSL2X7X_MAX_TIMER_CNT 0xFF - #define TSL2X7X_MIN_ITIME 3 /* TAOS txx2x7x Device family members */ diff --git a/drivers/staging/iio/light/tsl2x7x.h b/drivers/staging/iio/light/tsl2x7x.h index 992ee2465609..2c96f0b39b1e 100644 --- a/drivers/staging/iio/light/tsl2x7x.h +++ b/drivers/staging/iio/light/tsl2x7x.h @@ -23,6 +23,18 @@ struct tsl2x7x_lux { #define TSL2X7X_DEFAULT_TABLE_BYTES (sizeof(struct tsl2x7x_lux) * \ TSL2X7X_DEF_LUX_TABLE_SZ) +/* Proximity diode to use */ +#define TSL2X7X_DIODE0 0x01 +#define TSL2X7X_DIODE1 0x02 +#define TSL2X7X_DIODE_BOTH 0x03 + +/* LED Power */ +#define TSL2X7X_100_mA 0x00 +#define TSL2X7X_50_mA 0x01 +#define TSL2X7X_25_mA 0x02 +#define TSL2X7X_13_mA 0x03 +#define TSL2X7X_MAX_TIMER_CNT 0xFF + /** * struct tsl2x7x_default_settings - power on defaults unless * overridden by platform data. -- 2.14.3
[PATCH 13/13] staging: iio: tsl2x7x: rename prox_config to als_prox_config
The configuration register on the device is represented with the prox_config member on the tsl2x7x_settings structure. According to the TSL2772 data sheet, this register can hold: 1) the proximity drive level, 2) ALS/Proximity long wait, and 3) the ALS gain level. This patch renames prox_config to als_prox_config since ALS settings can be stored here as well. Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 7 --- drivers/staging/iio/light/tsl2x7x.h | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index 05c0f3d5fac0..708b2c6bdf4b 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -56,7 +56,7 @@ #define TSL2X7X_PRX_MAXTHRESHLO0X0A #define TSL2X7X_PRX_MAXTHRESHHI0X0B #define TSL2X7X_PERSISTENCE0x0C -#define TSL2X7X_PRX_CONFIG 0x0D +#define TSL2X7X_ALS_PRX_CONFIG 0x0D #define TSL2X7X_PRX_COUNT 0x0E #define TSL2X7X_GAIN 0x0F #define TSL2X7X_NOTUSED0x10 @@ -207,7 +207,7 @@ static const struct tsl2x7x_settings tsl2x7x_default_settings = { .prox_time = 255, /* 2.73 ms */ .prox_gain = 0, .wait_time = 255, - .prox_config = 0, + .als_prox_config = 0, .als_gain_trim = 1000, .als_cal_target = 150, .als_persistence = 1, @@ -594,7 +594,8 @@ static int tsl2x7x_chip_on(struct iio_dev *indio_dev) /* Non calculated parameters */ chip->tsl2x7x_config[TSL2X7X_PRX_TIME] = chip->settings.prox_time; chip->tsl2x7x_config[TSL2X7X_WAIT_TIME] = chip->settings.wait_time; - chip->tsl2x7x_config[TSL2X7X_PRX_CONFIG] = chip->settings.prox_config; + chip->tsl2x7x_config[TSL2X7X_ALS_PRX_CONFIG] = + chip->settings.als_prox_config; chip->tsl2x7x_config[TSL2X7X_ALS_MINTHRESHLO] = (chip->settings.als_thresh_low) & 0xFF; diff --git a/drivers/staging/iio/light/tsl2x7x.h b/drivers/staging/iio/light/tsl2x7x.h index 85d8fe7a94c8..6e30e71a2127 100644 --- a/drivers/staging/iio/light/tsl2x7x.h +++ b/drivers/staging/iio/light/tsl2x7x.h @@ -48,7 +48,8 @@ struct tsl2x7x_lux { * increments. Total integration time is * (256 - prx_time) * 2.73. * @prox_gain: Index into the tsl2x7x_prx_gain array. - * @prox_config: Prox configuration filters. + * @als_prox_config: The value of the ALS / Proximity configuration + * register. * @als_cal_target:Known external ALS reading for calibration. * @als_persistence: H/W Filters, Number of 'out of limits' ALS readings. * @als_interrupt_en: Enable/Disable ALS interrupts @@ -73,7 +74,7 @@ struct tsl2x7x_settings { int wait_time; int prox_time; int prox_gain; - int prox_config; + int als_prox_config; int als_cal_target; u8 als_persistence; bool als_interrupt_en; -- 2.14.3
[PATCH 01/13] staging: iio: tsl2x7x: move integration_time* attributes to IIO_INTENSITY channel
The integration_time* attributes are currently associated with the IIO_LIGHT channel but should be associated with the IIO_INTENSITY channel. Directory listing of the sysfs attributes for a TSL2772 with this patch applied: dev events in_illuminance0_calibrate in_illuminance0_calibscale_available in_illuminance0_input in_illuminance0_lux_table in_illuminance0_target_input in_intensity0_calibbias in_intensity0_calibscale in_intensity0_integration_time in_intensity0_integration_time_available in_intensity0_raw in_intensity1_raw in_proximity0_calibrate in_proximity0_calibscale in_proximity0_calibscale_available in_proximity0_raw name of_node power subsystem uevent Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index 9991b0483956..eeccfbb0eb1f 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -827,7 +827,7 @@ in_illuminance0_calibscale_available_show(struct device *dev, static IIO_CONST_ATTR(in_proximity0_calibscale_available, "1 2 4 8"); -static IIO_CONST_ATTR(in_illuminance0_integration_time_available, +static IIO_CONST_ATTR(in_intensity0_integration_time_available, ".00272 - .696"); static ssize_t in_illuminance0_target_input_show(struct device *dev, @@ -1358,7 +1358,7 @@ static irqreturn_t tsl2x7x_event_handler(int irq, void *private) static struct attribute *tsl2x7x_ALS_device_attrs[] = { &dev_attr_in_illuminance0_calibscale_available.attr, - &iio_const_attr_in_illuminance0_integration_time_available + &iio_const_attr_in_intensity0_integration_time_available .dev_attr.attr, &dev_attr_in_illuminance0_target_input.attr, &dev_attr_in_illuminance0_calibrate.attr, @@ -1373,7 +1373,7 @@ static struct attribute *tsl2x7x_PRX_device_attrs[] = { static struct attribute *tsl2x7x_ALSPRX_device_attrs[] = { &dev_attr_in_illuminance0_calibscale_available.attr, - &iio_const_attr_in_illuminance0_integration_time_available + &iio_const_attr_in_intensity0_integration_time_available .dev_attr.attr, &dev_attr_in_illuminance0_target_input.attr, &dev_attr_in_illuminance0_calibrate.attr, @@ -1389,7 +1389,7 @@ static struct attribute *tsl2x7x_PRX2_device_attrs[] = { static struct attribute *tsl2x7x_ALSPRX2_device_attrs[] = { &dev_attr_in_illuminance0_calibscale_available.attr, - &iio_const_attr_in_illuminance0_integration_time_available + &iio_const_attr_in_intensity0_integration_time_available .dev_attr.attr, &dev_attr_in_illuminance0_target_input.attr, &dev_attr_in_illuminance0_calibrate.attr, @@ -1489,13 +1489,13 @@ static const struct tsl2x7x_chip_info tsl2x7x_chip_info_tbl[] = { .type = IIO_LIGHT, .indexed = 1, .channel = 0, - .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED) | - BIT(IIO_CHAN_INFO_INT_TIME), + .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), }, { .type = IIO_INTENSITY, .indexed = 1, .channel = 0, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_INT_TIME) | BIT(IIO_CHAN_INFO_CALIBSCALE) | BIT(IIO_CHAN_INFO_CALIBBIAS), .event_spec = tsl2x7x_events, @@ -1529,13 +1529,13 @@ static const struct tsl2x7x_chip_info tsl2x7x_chip_info_tbl[] = { .type = IIO_LIGHT, .indexed = 1, .channel = 0, - .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED) | - BIT(IIO_CHAN_INFO_INT_TIME), + .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), }, { .type = IIO_INTENSITY, .indexed = 1, .channel = 0, .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_INT_TIME) | BIT(IIO_CHAN_INFO_CALIBSCALE) | BIT(IIO_CHAN_INFO_CALIBBIAS), .event_spec = tsl2x7x_events, @@ -1578,13 +1578,13 @@ static const struct tsl2x7x_chip_info tsl2x7x_chip_info_tbl[] = { .type = IIO_LIGHT, .indexed = 1, .channel = 0, - .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED) | - BIT(IIO_CHAN_INFO_IN
[PATCH 00/13] iio: tsl2x7x: staging cleanups
Here is another round of staging cleanups for this driver mostly based on Jonathon's feedback. We're very close to a staging graduation and I only have a few items remaining on my todo list: - Remove wildcards from the driver name. Jonathan suggested tsl2571. - Don't make the events/ directory available to user space via sysfs if an interrupt pin is not configured. - Go through the newer ALS part numbers on AMS's site and see if there are other part numbers that can be added to this driver. - I found this week an issue with the integration_time sysfs attribute. Hopefully I'll have time to wrap this up next week. Brian Masney (13): staging: iio: tsl2x7x: move integration_time* attributes to IIO_INTENSITY channel staging: iio: tsl2x7x: use GPL-2.0+ SPDX license identifier staging: iio: tsl2x7x: don't return error in IRQ handler staging: iio: tsl2x7x: simplify tsl2x7x_clear_interrupts function staging: iio: tsl2x7x: remove unnecessary chip status checks in suspend/resume staging: iio: tsl2x7x: simplify tsl2x7x_write_interrupt_config return staging: iio: tsl2x7x: simplify device id verification staging: iio: tsl2x7x: add range checking to three sysfs attributes staging: iio: tsl2x7x: move power and diode settings into header file staging: iio: tsl2x7x: rename prx to prox for consistency staging: iio: tsl2x7x: use device defaults for als_time, prox_time and wait_time staging: iio: tsl2x7x: various comment cleanups staging: iio: tsl2x7x: rename prox_config to als_prox_config drivers/staging/iio/light/tsl2x7x.c | 217 ++-- drivers/staging/iio/light/tsl2x7x.h | 81 +++--- 2 files changed, 125 insertions(+), 173 deletions(-) -- 2.14.3
[PATCH 12/13] staging: iio: tsl2x7x: various comment cleanups
This patch removes several unnecessary comments, changes some comments so that the use as much of the allowable 80 characters as possible, adds the proper whitespace, removes some structure members from the kernel docs that are no longer present, and improves the existing kernel doc information for some existing structure members. Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 59 + drivers/staging/iio/light/tsl2x7x.h | 48 +++--- 2 files changed, 51 insertions(+), 56 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index 293810ff11b9..05c0f3d5fac0 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -1,6 +1,6 @@ /* - * Device driver for monitoring ambient light intensity in (lux) - * and proximity detection (prox) within the TAOS TSL2X7X family of devices. + * Device driver for monitoring ambient light intensity in (lux) and proximity + * detection (prox) within the TAOS TSL2X7X family of devices. * * Copyright (c) 2012, TAOS Corporation. * Copyright (c) 2017-2018 Brian Masney @@ -21,7 +21,7 @@ #include #include "tsl2x7x.h" -/* Cal defs*/ +/* Cal defs */ #define PROX_STAT_CAL 0 #define PROX_STAT_SAMP 1 #define MAX_SAMPLES_CAL200 @@ -34,10 +34,11 @@ /* Lux calculation constants */ #define TSL2X7X_LUX_CALC_OVER_FLOW 65535 -/* TAOS Register definitions - note: - * depending on device, some of these register are not used and the - * register address is benign. +/* + * TAOS Register definitions - Note: depending on device, some of these register + * are not used and the register address is benign. */ + /* 2X7X register offsets */ #define TSL2X7X_MAX_CONFIG_REG 16 @@ -342,15 +343,14 @@ static int tsl2x7x_read_autoinc_regs(struct tsl2X7X_chip *chip, int lower_reg, * @indio_dev: pointer to IIO device * * The raw ch0 and ch1 values of the ambient light sensed in the last - * integration cycle are read from the device. - * Time scale factor array values are adjusted based on the integration time. - * The raw values are multiplied by a scale factor, and device gain is obtained - * using gain index. Limit checks are done next, then the ratio of a multiple - * of ch1 value, to the ch0 value, is calculated. Array tsl2x7x_device_lux[] - * is then scanned to find the first ratio value that is just above the ratio - * we just calculated. The ch0 and ch1 multiplier constants in the array are - * then used along with the time scale factor array values, to calculate the - * lux. + * integration cycle are read from the device. Time scale factor array values + * are adjusted based on the integration time. The raw values are multiplied + * by a scale factor, and device gain is obtained using gain index. Limit + * checks are done next, then the ratio of a multiple of ch1 value, to the + * ch0 value, is calculated. Array tsl2x7x_device_lux[] is then scanned to + * find the first ratio value that is just above the ratio we just calculated. + * The ch0 and ch1 multiplier constants in the array are then used along with + * the time scale factor array values, to calculate the lux. */ static int tsl2x7x_get_lux(struct iio_dev *indio_dev) { @@ -363,7 +363,6 @@ static int tsl2x7x_get_lux(struct iio_dev *indio_dev) mutex_lock(&chip->als_mutex); if (chip->tsl2x7x_chip_status != TSL2X7X_CHIP_WORKING) { - /* device is not enabled */ dev_err(&chip->client->dev, "%s: device is not enabled\n", __func__); ret = -EBUSY; @@ -374,7 +373,6 @@ static int tsl2x7x_get_lux(struct iio_dev *indio_dev) if (ret < 0) goto out_unlock; - /* is data new & valid */ if (!(ret & TSL2X7X_STA_ADC_VALID)) { dev_err(&chip->client->dev, "%s: data not valid yet\n", __func__); @@ -430,12 +428,12 @@ static int tsl2x7x_get_lux(struct iio_dev *indio_dev) lux = (lux + (chip->als_time_scale >> 1)) / chip->als_time_scale; - /* adjust for active gain scale -* The tsl2x7x_device_lux tables have a factor of 256 built-in. -* User-specified gain provides a multiplier. + /* +* adjust for active gain scale. The tsl2x7x_device_lux tables have a +* factor of 256 built-in. User-specified gain provides a multiplier. * Apply user-specified gain before shifting right to retain precision. -* Use 64 bits to avoid overflow on multiplication. -* Then go back to 32 bits before division to avoid using div_u64(). +* Use 64 bits to avoid overflow on multiplication. Then go back to +* 32 bits before division to avoid using div_u64(). */ lux64 = lux; @@ -713,14 +711,13 @@ static int tsl2x7x_chip_off(struct ii
[PATCH 11/13] staging: iio: tsl2x7x: use device defaults for als_time, prox_time and wait_time
This patch changes the defaults of the als_time, prox_time and wait_time to match the defaults according to the TSL2772 datasheet. Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index a7b4fcba7935..293810ff11b9 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -201,11 +201,11 @@ static const struct tsl2x7x_lux *tsl2x7x_default_lux_table_group[] = { }; static const struct tsl2x7x_settings tsl2x7x_default_settings = { - .als_time = 219, /* 101 ms */ + .als_time = 255, /* 2.73 ms */ .als_gain = 0, - .prox_time = 254, /* 5.4 ms */ + .prox_time = 255, /* 2.73 ms */ .prox_gain = 0, - .wait_time = 245, + .wait_time = 255, .prox_config = 0, .als_gain_trim = 1000, .als_cal_target = 150, -- 2.14.3
[PATCH 10/13] staging: iio: tsl2x7x: rename prx to prox for consistency
The driver mostly uses the 'prox' naming convention for most of the proximity settings, however prx_time and tsl2x7x_prx_gain was present. This patch renames these to prox_time and tsl2x7x_prox_gain for consistency with everything else in the driver. The kernel documentation for prx_gain is corrected to prox_gain so that it matches what is actually in the structure. Signed-off-by: Brian Masney --- drivers/staging/iio/light/tsl2x7x.c | 12 ++-- drivers/staging/iio/light/tsl2x7x.h | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/staging/iio/light/tsl2x7x.c b/drivers/staging/iio/light/tsl2x7x.c index 87b99deef7a8..a7b4fcba7935 100644 --- a/drivers/staging/iio/light/tsl2x7x.c +++ b/drivers/staging/iio/light/tsl2x7x.c @@ -203,7 +203,7 @@ static const struct tsl2x7x_lux *tsl2x7x_default_lux_table_group[] = { static const struct tsl2x7x_settings tsl2x7x_default_settings = { .als_time = 219, /* 101 ms */ .als_gain = 0, - .prx_time = 254, /* 5.4 ms */ + .prox_time = 254, /* 5.4 ms */ .prox_gain = 0, .wait_time = 245, .prox_config = 0, @@ -230,7 +230,7 @@ static const s16 tsl2x7x_als_gain[] = { 120 }; -static const s16 tsl2x7x_prx_gain[] = { +static const s16 tsl2x7x_prox_gain[] = { 1, 2, 4, @@ -594,7 +594,7 @@ static int tsl2x7x_chip_on(struct iio_dev *indio_dev) u8 *dev_reg, reg_val; /* Non calculated parameters */ - chip->tsl2x7x_config[TSL2X7X_PRX_TIME] = chip->settings.prx_time; + chip->tsl2x7x_config[TSL2X7X_PRX_TIME] = chip->settings.prox_time; chip->tsl2x7x_config[TSL2X7X_WAIT_TIME] = chip->settings.wait_time; chip->tsl2x7x_config[TSL2X7X_PRX_CONFIG] = chip->settings.prox_config; @@ -1021,7 +1021,7 @@ static int tsl2x7x_write_event_value(struct iio_dev *indio_dev, if (chan->type == IIO_INTENSITY) time = chip->settings.als_time; else - time = chip->settings.prx_time; + time = chip->settings.prox_time; y = (TSL2X7X_MAX_TIMER_CNT - time) + 1; z = y * TSL2X7X_MIN_ITIME; @@ -1090,7 +1090,7 @@ static int tsl2x7x_read_event_value(struct iio_dev *indio_dev, time = chip->settings.als_time; mult = chip->settings.als_persistence; } else { - time = chip->settings.prx_time; + time = chip->settings.prox_time; mult = chip->settings.prox_persistence; } @@ -1153,7 +1153,7 @@ static int tsl2x7x_read_raw(struct iio_dev *indio_dev, if (chan->type == IIO_LIGHT) *val = tsl2x7x_als_gain[chip->settings.als_gain]; else - *val = tsl2x7x_prx_gain[chip->settings.prox_gain]; + *val = tsl2x7x_prox_gain[chip->settings.prox_gain]; ret = IIO_VAL_INT; break; case IIO_CHAN_INFO_CALIBBIAS: diff --git a/drivers/staging/iio/light/tsl2x7x.h b/drivers/staging/iio/light/tsl2x7x.h index 2c96f0b39b1e..408e5a89edb1 100644 --- a/drivers/staging/iio/light/tsl2x7x.h +++ b/drivers/staging/iio/light/tsl2x7x.h @@ -44,9 +44,9 @@ struct tsl2x7x_lux { * aperture effects. * @wait_time: Time between PRX and ALS cycles * in 2.7 periods - * @prx_time: 5.2ms prox integration time - + * @prox_time: 5.2ms prox integration time - * decrease in 2.7ms periods - * @prx_gain: Proximity gain index + * @prox_gain: Proximity gain index * @prox_config: Prox configuration filters. * @als_cal_target:Known external ALS reading for * calibration. @@ -68,7 +68,7 @@ struct tsl2x7x_settings { int als_gain; int als_gain_trim; int wait_time; - int prx_time; + int prox_time; int prox_gain; int prox_config; int als_cal_target; -- 2.14.3
Re: [PATCH v2] KVM: Extend MAX_IRQ_ROUTES to 4096 for all archs
2018-04-20 22:21 GMT+08:00 Cornelia Huck : > On Fri, 20 Apr 2018 21:51:13 +0800 > Wanpeng Li wrote: > >> 2018-04-20 15:15 GMT+08:00 Cornelia Huck : >> > On Thu, 19 Apr 2018 17:47:28 -0700 >> > Wanpeng Li wrote: >> > >> >> From: Wanpeng Li >> >> >> >> Our virtual machines make use of device assignment by configuring >> >> 12 NVMe disks for high I/O performance. Each NVMe device has 129 >> >> MSI-X Table entries: >> >> Capabilities: [50] MSI-X: Enable+ Count=129 Masked-Vector table: BAR=0 >> >> offset=2000 >> >> The windows virtual machines fail to boot since they will map the number >> >> of >> >> MSI-table entries that the NVMe hardware reported to the bus to msi >> >> routing >> >> table, this will exceed the 1024. This patch extends MAX_IRQ_ROUTES to >> >> 4096 >> >> for all archs, in the future this might be extended again if needed. >> >> >> >> Cc: Paolo Bonzini >> >> Cc: Radim Krčmář >> >> Cc: Tonny Lu >> >> Cc: Cornelia Huck >> >> Signed-off-by: Wanpeng Li >> >> Signed-off-by: Tonny Lu >> >> --- >> >> v1 -> v2: >> >> * extend MAX_IRQ_ROUTES to 4096 for all archs >> >> >> >> include/linux/kvm_host.h | 6 -- >> >> 1 file changed, 6 deletions(-) >> >> >> >> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h >> >> index 6930c63..0a5c299 100644 >> >> --- a/include/linux/kvm_host.h >> >> +++ b/include/linux/kvm_host.h >> >> @@ -1045,13 +1045,7 @@ static inline int mmu_notifier_retry(struct kvm >> >> *kvm, unsigned long mmu_seq) >> >> >> >> #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING >> >> >> >> -#ifdef CONFIG_S390 >> >> #define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that... >> > >> > What about /* might need extension/rework in the future */ instead of >> > the FIXME? >> >> Yeah, I guess the maintainers can help to fix it when applying. :) >> >> > >> > As far as I understand, 4096 should cover most architectures and the >> > sane end of s390 configurations, but will not be enough at the scarier >> > end of s390. (I'm not sure how much it matters in practice.) >> > >> > Do we want to make this a tuneable in the future? Do some kind of >> > dynamic allocation? Not sure whether it is worth the trouble. >> >> I think keep as it is currently. > > My main question here is how long this is enough... the number of > virtqueues per device is up to 1K from the initial 64, which makes it > possible to hit the 4K limit with fewer virtio devices than before (on > s390, each virtqueue uses a routing table entry). OTOH, we don't want > giant tables everywhere just to accommodate s390. I suspect there is no real scenario to futher extend for s390 since no guys report. > If the s390 maintainers tell me that nobody is doing the really insane > stuff, I'm happy as well :) Christian, any thoughts? Regards, Wanpeng Li
Re: [PATCH] KVM: s390: reset crypto attributes for all vcpus
Hi Tony, Thank you for the patch! Yet something to improve: [auto build test ERROR on s390/features] [also build test ERROR on v4.17-rc1 next-20180420] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Tony-Krowiak/KVM-s390-reset-crypto-attributes-for-all-vcpus/20180421-050734 base: https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git features config: s390-allyesconfig (attached as .config) compiler: s390x-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree make.cross ARCH=s390 All error/warnings (new ones prefixed by >>): arch/s390/kvm/kvm-s390.c: In function 'kvm_s390_vcpu_crypto_reset_all': >> arch/s390/kvm/kvm-s390.c:800:10: error: implicit declaration of function >> 'kvm_s390_vcpu_crypto_setup'; did you mean 'kvm_s390_vcpu_crypto_reset_all'? >> [-Werror=implicit-function-declaration] kvm_s390_vcpu_crypto_setup(vcpu); ^~ kvm_s390_vcpu_crypto_reset_all arch/s390/kvm/kvm-s390.c: At top level: >> arch/s390/kvm/kvm-s390.c:805:13: warning: conflicting types for >> 'kvm_s390_vcpu_crypto_setup' static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); ^~ >> arch/s390/kvm/kvm-s390.c:805:13: error: static declaration of >> 'kvm_s390_vcpu_crypto_setup' follows non-static declaration arch/s390/kvm/kvm-s390.c:800:10: note: previous implicit declaration of 'kvm_s390_vcpu_crypto_setup' was here kvm_s390_vcpu_crypto_setup(vcpu); ^~ arch/s390/kvm/kvm-s390.c: In function 'kvm_s390_vm_set_crypto': arch/s390/kvm/kvm-s390.c:810:6: warning: unused variable 'i' [-Wunused-variable] int i; ^ arch/s390/kvm/kvm-s390.c:809:19: warning: unused variable 'vcpu' [-Wunused-variable] struct kvm_vcpu *vcpu; ^~~~ cc1: some warnings being treated as errors vim +800 arch/s390/kvm/kvm-s390.c 791 792 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm) 793 { 794 int i; 795 struct kvm_vcpu *vcpu; 796 797 kvm_s390_vcpu_block_all(kvm); 798 799 kvm_for_each_vcpu(i, vcpu, kvm) > 800 kvm_s390_vcpu_crypto_setup(vcpu); 801 802 kvm_s390_vcpu_unblock_all(kvm); 803 } 804 > 805 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); 806 --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip
Re: [PATCH 1/3] fs: move documentation for thaw_super() where appropriate
On 04/20/18 17:07, Luis R. Rodriguez wrote: > On Sat, Apr 21, 2018 at 12:01:31AM +, Bart Van Assche wrote: >> On Fri, 2018-04-20 at 16:59 -0700, Luis R. Rodriguez wrote: >>> +/** >>> + * thaw_super -- unlock filesystem >>> + * @sb: the super to thaw >>> + * >>> + * Unlocks the filesystem and marks it writeable again after >>> freeze_super(). >>> + */ >> >> Have you verified the output generated by scripts/kernel-doc? Last >> time I checked the output for headers containing a double dash looked >> weird. > > No, I just moved the block of kdoc crap. Randy would have this memorized I'm > sure though so I should just fix the kdoc if its bad while at it. > > Luis > "--" does sound odd. I've never seen it used on purpose. FWIW, I just go by what is in Documentation/doc-guide/kernel-doc.rst: Function documentation -- The general format of a function and function-like macro kernel-doc comment is:: /** * function_name() - Brief description of function. * @arg1: Describe the first argument. * @arg2: Describe the second argument. *One can provide multiple line descriptions *for arguments. -- ~Randy
Re: [PATCH 1/3] fs: move documentation for thaw_super() where appropriate
On Sat, Apr 21, 2018 at 12:01:31AM +, Bart Van Assche wrote: > On Fri, 2018-04-20 at 16:59 -0700, Luis R. Rodriguez wrote: > > +/** > > + * thaw_super -- unlock filesystem > > + * @sb: the super to thaw > > + * > > + * Unlocks the filesystem and marks it writeable again after > > freeze_super(). > > + */ > > Have you verified the output generated by scripts/kernel-doc? Last > time I checked the output for headers containing a double dash looked > weird. No, I just moved the block of kdoc crap. Randy would have this memorized I'm sure though so I should just fix the kdoc if its bad while at it. Luis
Re: [PATCH 0/3] fs: minor fs thaw fixes and adjustments
On Fri, Apr 20, 2018 at 04:59:01PM -0700, Luis R. Rodriguez wrote: > Here's a few minor fs thaw fixes and adjustments I ran accross > as I started to refresh my development for to use freeze_fs on > suspend/hibernation [0]. These are just prep commits for the real > work, I'm still reviewing feedback and adjusting the code for > that. And I forgot to provide the URL, for those that missed the old series I was working on: [0] https://lkml.kernel.org/r/20171129232356.28296-1-mcg...@kernel.org Luis > > I've tested the patches with generic/085 on xfs and found no regressions. > > Luis R. Rodriguez (3): > fs: move documentation for thaw_super() where appropriate > fs: make thaw_super_locked() really just a helper > fs: fix corner case race on freeze_bdev() when sb disappears > > fs/block_dev.c | 4 +++- > fs/super.c | 39 ++- > 2 files changed, 29 insertions(+), 14 deletions(-) > > -- > 2.16.3 > > -- Do not panic
Re: [PATCH 1/3] fs: move documentation for thaw_super() where appropriate
On Fri, 2018-04-20 at 16:59 -0700, Luis R. Rodriguez wrote: > +/** > + * thaw_super -- unlock filesystem > + * @sb: the super to thaw > + * > + * Unlocks the filesystem and marks it writeable again after freeze_super(). > + */ Have you verified the output generated by scripts/kernel-doc? Last time I checked the output for headers containing a double dash looked weird. Bart.
[PATCH 3/3] fs: fix corner case race on freeze_bdev() when sb disappears
freeze_bdev() will bail but leave the bd_fsfreeze_count incremented if the get_active_super() does not find the superblock on our super_blocks list to match. This issue has been present since v2.6.29 during the introduction of the ioctl_fsfreeze() and ioctl_fsthaw() via commit fcccf502540e3 ("filesystem freeze: implement generic freeze feature"). I am not aware of any existing races which have triggered this situation, however, if it does trigger it could mean leaving a superblock with bd_fsfreeze_count always positive. Fixes: fcccf502540e3 ("filesystem freeze: implement generic freeze feature") Signed-off-by: Luis R. Rodriguez --- fs/block_dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index b54966679833..7a532aa58c07 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -507,8 +507,10 @@ struct super_block *freeze_bdev(struct block_device *bdev) } sb = get_active_super(bdev); - if (!sb) + if (!sb) { + bdev->bd_fsfreeze_count--; goto out; + } if (sb->s_op->freeze_super) error = sb->s_op->freeze_super(sb); else -- 2.16.3
[PATCH 1/3] fs: move documentation for thaw_super() where appropriate
On commit 08fdc8a0138a ("buffer.c: call thaw_super during emergency thaw") Mateusz added thaw_super_locked() and made thaw_super() use it, but forgot to move the documentation. Signed-off-by: Luis R. Rodriguez --- fs/super.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/super.c b/fs/super.c index 5fa9a8d8d865..9d0eb5e20a1f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1532,12 +1532,6 @@ int freeze_super(struct super_block *sb) } EXPORT_SYMBOL(freeze_super); -/** - * thaw_super -- unlock filesystem - * @sb: the super to thaw - * - * Unlocks the filesystem and marks it writeable again after freeze_super(). - */ static int thaw_super_locked(struct super_block *sb) { int error; @@ -1573,6 +1567,12 @@ static int thaw_super_locked(struct super_block *sb) return 0; } +/** + * thaw_super -- unlock filesystem + * @sb: the super to thaw + * + * Unlocks the filesystem and marks it writeable again after freeze_super(). + */ int thaw_super(struct super_block *sb) { down_write(&sb->s_umount); -- 2.16.3
[PATCH 2/3] fs: make thaw_super_locked() really just a helper
thaw_super_locked() was added via commit 08fdc8a0138a ("buffer.c: call thaw_super during emergency thaw") merged on v4.17 to help with the ability so that the caller can take charge of handling the s_umount lock, however, it has left all* of the failure handling including unlocking lock of s_umount inside thaw_super_locked(). This does not make thaw_super_locked() flexible. For instance we may later want to use it with the abilty to handle unfolding of the locks ourselves. Change thaw_super_locked() to really just be a helper, and let the callers deal with all the error handling. This commit introeuces no functional changes. Signed-off-by: Luis R. Rodriguez --- fs/super.c | 27 --- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/fs/super.c b/fs/super.c index 9d0eb5e20a1f..82bc74a16f06 100644 --- a/fs/super.c +++ b/fs/super.c @@ -937,10 +937,15 @@ void emergency_remount(void) static void do_thaw_all_callback(struct super_block *sb) { + int error; + down_write(&sb->s_umount); if (sb->s_root && sb->s_flags & MS_BORN) { emergency_thaw_bdev(sb); - thaw_super_locked(sb); + error = thaw_super_locked(sb); + if (error) + up_write(&sb->s_umount); + deactivate_locked_super(sb); } else { up_write(&sb->s_umount); } @@ -1532,14 +1537,13 @@ int freeze_super(struct super_block *sb) } EXPORT_SYMBOL(freeze_super); +/* Caller takes the sb->s_umount rw_semaphore lock and handles active count */ static int thaw_super_locked(struct super_block *sb) { int error; - if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) { - up_write(&sb->s_umount); + if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) return -EINVAL; - } if (sb_rdonly(sb)) { sb->s_writers.frozen = SB_UNFROZEN; @@ -1554,7 +1558,6 @@ static int thaw_super_locked(struct super_block *sb) printk(KERN_ERR "VFS:Filesystem thaw failed\n"); lockdep_sb_freeze_release(sb); - up_write(&sb->s_umount); return error; } } @@ -1563,7 +1566,6 @@ static int thaw_super_locked(struct super_block *sb) sb_freeze_unlock(sb); out: wake_up(&sb->s_writers.wait_unfrozen); - deactivate_locked_super(sb); return 0; } @@ -1575,7 +1577,18 @@ static int thaw_super_locked(struct super_block *sb) */ int thaw_super(struct super_block *sb) { + int error; + down_write(&sb->s_umount); - return thaw_super_locked(sb); + error = thaw_super_locked(sb); + if (error) { + up_write(&sb->s_umount); + goto out; + } + + deactivate_locked_super(sb); + +out: + return error; } EXPORT_SYMBOL(thaw_super); -- 2.16.3
Re: unregister_netdevice: waiting for DEV to become free
syzbot has found reproducer for the following crash on https://github.com/google/kmsan.git/master commit 48c6a2b0ab1b752451cdc40b5392471ed1a2a329 (Mon Apr 16 08:42:26 2018 +) mm/kmsan: fix origin calculation in kmsan_internal_check_memory syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=2dfb68e639f0621b19fb So far this crash happened 180 times on https://github.com/google/kmsan.git/master, net-next, upstream. C reproducer: https://syzkaller.appspot.com/x/repro.c?id=4936564132020224 syzkaller reproducer: https://syzkaller.appspot.com/x/repro.syz?id=5817131010621440 Raw console output: https://syzkaller.appspot.com/x/log.txt?id=6313498770407424 Kernel config: https://syzkaller.appspot.com/x/.config?id=6627248707860932248 compiler: clang version 7.0.0 (trunk 329391) IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+2dfb68e639f0621b1...@syzkaller.appspotmail.com It will help syzbot understand when the bug is fixed. IPv6: ADDRCONF(NETDEV_CHANGE): veth0: link becomes ready device bridge_slave_1 left promiscuous mode bridge0: port 2(bridge_slave_1) entered disabled state device bridge_slave_0 left promiscuous mode bridge0: port 1(bridge_slave_0) entered disabled state unregister_netdevice: waiting for lo to become free. Usage count = 3
[PATCH 0/3] fs: minor fs thaw fixes and adjustments
Here's a few minor fs thaw fixes and adjustments I ran accross as I started to refresh my development for to use freeze_fs on suspend/hibernation [0]. These are just prep commits for the real work, I'm still reviewing feedback and adjusting the code for that. I've tested the patches with generic/085 on xfs and found no regressions. Luis R. Rodriguez (3): fs: move documentation for thaw_super() where appropriate fs: make thaw_super_locked() really just a helper fs: fix corner case race on freeze_bdev() when sb disappears fs/block_dev.c | 4 +++- fs/super.c | 39 ++- 2 files changed, 29 insertions(+), 14 deletions(-) -- 2.16.3
Re: [RFC PATCH 00/79] Generic page write protection and a solution to page waitqueue
On 04/20/2018 03:19 PM, Jerome Glisse wrote: > On Fri, Apr 20, 2018 at 12:57:41PM -0700, Tim Chen wrote: >> On 04/04/2018 12:17 PM, jgli...@redhat.com wrote: >> >> >> Your approach seems useful if there are lots of locked pages sharing >> the same wait queue. >> >> That said, in the original workload from our customer with the long wait >> queue >> problem, there was a single super hot page getting migrated, and it >> is being accessed by all threads which caused the big log jam while they >> wait for >> the migration to get completed. >> With your approach, we will still likely end up with a long queue >> in that workload even if we have per page wait queue. >> >> Thanks. > > Ok so i re-read the thread, i was writting this cover letter from memory > and i had bad recollection of your issue, so sorry. > > First, do you have a way to reproduce the issue ? Something easy would > be nice :) Unfortunately it is a customer workload that they guard closely and wouldn't let us look at the source code. We have to profile and backtrace its behavior. Mel made a quick attempt to reproduce the behavior with a hot page migration, but he wasn't quite able to duplicate the pathologic behavior. > > So what i am proposing for per page wait queue would only marginaly help > you (it might not even be mesurable in your workload). It would certainly > make the code smaller and easier to understand i believe. In certain cases if we have lots of pages sharing a page wait queue, your solution would help, and we wouldn't be wasting time checking waiters not waiting on the page that's being unlocked. Though I don't have a specific workload that has such behavior. > > Now that i have look back at your issue i think there is 2 things we > should do. First keep migration page map read only, this would at least > avoid CPU read fault. In trace you captured i wasn't able to ascertain > if this were read or write fault. > > Second idea i have is about NUMA, everytime we NUMA migrate a page we > could attach a temporary struct to the page (using page->mapping). So > if we scan that page again we can inspect information about previous > migration and see if we are not over migrating that page (ie bouncing > it all over). If so we can mark the page (maybe with a page flag if we > can find one) to protect it from further migration. That temporary > struct would be remove after a while, ie autonuma would preallocate a > bunch of those and keep an LRU of them and recycle the oldest when it > needs a new one to migrate another page. The goal to migrate a hot page with care, or avoid bouncing it around frequently makes sense. If it is a hot page shared by many threads running on different NUMA nodes, and moving it will only mildly improve NUMA locality, we should avoid the migration. Tim > > > LSF/MM slots: > > Michal can i get 2 slots to talk about this ? MM only discussion, one > to talk about doing migration with page map read only but write > protected while migration is happening. The other one to talk about > attaching auto NUMA tracking struct to page. > > Cheers, > Jérôme >
Re: [PATCH 2/2] drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+
Daniel Vetter writes: > On Thu, Apr 19, 2018 at 12:20:35PM -0700, Eric Anholt wrote: >> This driver will be used to support Mesa on the Broadcom 7268 and 7278 >> platforms. >> >> V3D 3.3 introduces an MMU, which means we no longer need CMA or vc4's >> complicated CL/shader validation scheme. This massively changes the >> GEM behavior, so I've forked off to a new driver. >> >> Signed-off-by: Eric Anholt > > Read through the entire thing, ignored the hw details, but dropped a few > comments all over. With those addressed one way or another this has my > > Acked-by: Daniel Vetter > > Can I call in a return favour for > https://patchwork.freedesktop.org/series/41219/ ? Done. >> diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst >> index d3ab6abae838..f982558fc25d 100644 >> --- a/Documentation/gpu/drivers.rst >> +++ b/Documentation/gpu/drivers.rst >> @@ -10,6 +10,7 @@ GPU Driver Documentation >> tegra >> tinydrm >> tve200 >> + v3d >> vc4 >> bridge/dw-hdmi >> xen-front >> diff --git a/MAINTAINERS b/MAINTAINERS >> index bca3c32fb141..7314d66833fd 100644 >> --- a/MAINTAINERS >> +++ b/MAINTAINERS >> @@ -4795,6 +4795,15 @@ S:Maintained >> F: drivers/gpu/drm/omapdrm/ >> F: Documentation/devicetree/bindings/display/ti/ >> >> +DRM DRIVERS FOR V3D >> +M: Eric Anholt >> +T: git git://github.com/anholt/linux > > This one also official? If it's just for now I'd drop it ... Sure. >> +/* Pins the shmem pages, fills in the .pages and .sgt fields of the BO, and >> maps >> + * it for DMA. >> + */ >> +static int >> +v3d_bo_get_pages(struct v3d_bo *bo) >> +{ >> +struct drm_gem_object *obj = &bo->base; >> +struct drm_device *dev = obj->dev; >> +int npages = obj->size >> PAGE_SHIFT; >> +int ret = 0; >> + >> +mutex_lock(&bo->lock); >> +if (bo->pages_refcount++ != 0) >> +goto unlock; >> + >> +if (!obj->import_attach) { >> +bo->pages = drm_gem_get_pages(obj); >> +if (IS_ERR(bo->pages)) { >> +ret = PTR_ERR(bo->pages); >> +goto unlock; >> +} >> + >> +bo->sgt = drm_prime_pages_to_sg(bo->pages, npages); >> +if (IS_ERR(bo->sgt)) { >> +ret = PTR_ERR(bo->sgt); >> +goto put_pages; >> +} >> +} else { >> +bo->pages = kcalloc(npages, sizeof(*bo->pages), GFP_KERNEL); >> +if (!bo->pages) >> +goto put_pages; >> + >> +drm_prime_sg_to_page_addr_arrays(bo->sgt, bo->pages, >> + NULL, npages); >> +} >> + >> +/* Map the pages for use by the GPU. */ >> +dma_map_sg(dev->dev, bo->sgt->sgl, >> + bo->sgt->nents, DMA_BIDIRECTIONAL); > > For dma-buf you already get a mapped sgt, and the idea at least is to not > noodle around in internals like drm_prime_sg_to_page_addr_arrays does ... > That was just a hack Dave did to avoid rewriting all of ttm, which imo > shouldn't be copied all over the place (but it happens). > > Since you immediately convert the page list back into a mapped sg table > it's a bit silly. > > I guess longer-term we could switch the gem helpers to just use sg tables > directly, instead of going through pages arrays. But core mm folks just > got nasty on us doing that, so I'm not sure which direction we should go > here longer-term. I moved the map/unmap to the !import case. We still need the pages array for v3d_gem_fault(). If we have an alternative for that that isn't a linear walk of the sg at fault time, I'd be up for that. >> +int v3d_gem_fault(struct vm_fault *vmf) >> +{ >> +struct vm_area_struct *vma = vmf->vma; >> +struct drm_gem_object *obj = vma->vm_private_data; >> +struct v3d_bo *bo = to_v3d_bo(obj); >> +unsigned long pfn; >> +pgoff_t pgoff; >> +int ret; >> + >> +/* We don't use vmf->pgoff since that has the fake offset: */ >> +pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT; >> +pfn = page_to_pfn(bo->pages[pgoff]); > > Freaked out for a bit, then noticed you're pinning everything. That makes > the bo->pages_refcount a bit confusing since totally not needed. > > I guess if you do have longer-term plans to roll out a shrinker I'd put at > least a FIXME here. Yep, long-term shrinker plans. Added a note to the kerneldoc about why we don't shrink yet. >> +int v3d_mmap(struct file *filp, struct vm_area_struct *vma) >> +{ >> +int ret; >> + >> +ret = drm_gem_mmap(filp, vma); >> +if (ret) >> +return ret; >> + >> +v3d_set_mmap_vma_flags(vma); > > If it'd actually understand what these vma flag frobberies in most drivers > do I might come up with an idea how we can avoid the copypaste. Oh well. > Maybe a drm_gem_mmap_wc? drm_gem_mmap seems to already be wc, just with different vma flags. If you ever figure out the flag frobbing, please let me know. :( >> + >> +return ret
Re: [RFC PATCH] dt-bindings: add a jsonschema binding example
Quoting Rob Herring (2018-04-20 11:15:04) > On Fri, Apr 20, 2018 at 11:47 AM, Stephen Boyd wrote: > > Quoting Rob Herring (2018-04-18 15:29:05) > >> diff --git a/Documentation/devicetree/bindings/example-schema.yaml > >> b/Documentation/devicetree/bindings/example-schema.yaml > >> new file mode 100644 > >> index ..fe0a3bd1668e > >> --- /dev/null > >> +++ b/Documentation/devicetree/bindings/example-schema.yaml > >> @@ -0,0 +1,149 @@ > >> + > >> + The end of the description is marked by indentation less than the first > >> line > >> + in the description. > >> + > >> +select: false > >> + # 'select' is a schema applied to a DT node to determine if this binding > >> + # schema should be applied to the node. It is optional and by default > >> the > >> + # possible compatible strings are extracted and used to match. > > > > Can we get a concrete example here? > > select: true > > :) Which is apply to every node. > > A better one is from the memory node schema ('$nodename' gets added : > > select: > required: ["$nodename"] > properties: > $nodename: > oneOf: > - pattern: "^memory@[0-9a-f]*" > - const: "memory" # 'memory' only allowed for selecting > > > I expect the vast majority of device bindings will not use select at > all and rely on compatible string matching. Thanks! I was looking to see how the select syntax would work and this shows one example nicely. I suppose another way would be to show how a compatible string would be matched through select, even though it's redundant. Is there a way we can enforce node names through the schema too? For example to enforce that a clock controller is called 'clock-controller' or a spi master is called 'spi'. > > >> + > >> +properties: > > [...] > >> + > >> + interrupts: > >> +# Either 1 or 2 interrupts can be present > >> +minItems: 1 > >> +maxItems: 2 > >> +items: > >> + - description: tx or combined interrupt > >> + - description: rx interrupt > >> + > >> +description: | > > > > The '|' is needed to make yaml happy? > > Yes, this is simply how you do literal text blocks in yaml. > > We don't really need for this one really, but for the top-level > 'description' we do. The long term intent is 'description' would be > written in sphinx/rst and can be extracted into the DT spec (for > common bindings). Grant has experimented with that some. Ok. That sounds cool. Then we could embed links to datasheets and SVGs too. > > >> + A variable number of interrupts warrants a description of what > >> conditions > >> + affect the number of interrupts. Otherwise, descriptions on standard > >> + properties are not necessary. > >> + > >> + interrupt-names: > >> +# minItems must be specified here because the default would be 2 > >> +minItems: 1 > >> +items: > >> + - const: "tx irq" > >> + - const: "rx irq" > >> + > >> + # Property names starting with '#' must be quoted > >> + '#interrupt-cells': > >> +# A simple case where the value must always be '2'. > >> +# The core schema handles that this must be a single integer. > >> +const: 2 > >> + > >> + interrupt-controller: {} > > > > Does '{}' mean nothing to see here? > > Yes. It's just an empty schema that's always valid. Could we include another schema to indicate that this is an interrupt controller? I'm sort of asking for multi-schema inheritance. > > >> + foo-gpios: > >> +maxItems: 1 > >> +description: A connection of the 'foo' gpio line. > >> + > >> + vendor,int-property: > >> +description: Vendor specific properties must have a description > >> +type: integer # A type is also required > >> +enum: [2, 4, 6, 8, 10] > >> + > >> + vendor,bool-property: > >> +description: Vendor specific properties must have a description > >> +type: boolean > >> + > >> +required: > >> + - compatible > >> + - reg > >> + - interrupts > >> + - interrupt-controller > > > > Can the required or optional parts go under each property instead of > > having a different section? > > No, because then it is not json-schema language. > > > Or does that make the schema parser > > difficult to implement? > > Yes, because then we have to implement a schema parser. :/
Re: [PATCH v4 04/22] iommu/vt-d: add bind_pasid_table function
On Fri, 20 Apr 2018 19:25:34 +0100 Jean-Philippe Brucker wrote: > On Tue, Apr 17, 2018 at 08:10:47PM +0100, Alex Williamson wrote: > [...] > > > + /* Assign guest PASID table pointer and size order */ > > > + ctx_lo = (pasidt_binfo->base_ptr & VTD_PAGE_MASK) | > > > + (pasidt_binfo->pasid_bits - MIN_NR_PASID_BITS); > > > > Where does this IOMMU API interface define that base_ptr is 4K > > aligned or the format of the PASID table? Are these all > > standardized or do they vary by host IOMMU? If they're standards, > > maybe we could note that and the spec which defines them when we > > declare base_ptr. If they're IOMMU specific then I don't > > understand how we'll match a user provided PASID table to the > > requirements and format of the host IOMMU. Thanks, > > On SMMUv3 the minimum alignment for base_ptr is 64 bytes, so a guest > under a vSMMU might pass a pointer that's not aligned on 4k. > PASID table pointer for VT-d is 4K aligned. > Maybe this information could be part of the data passed to userspace > about IOMMU table formats and features? They're not part of this > series, but I think we wanted to communicate IOMMU-specific features > via sysfs. > Agreed, I believe Yi Liu is working on a sysfs interface such that QEMU can match IOMMU model and features.
Re: [PATCH] [v2] scsi: ips: fix firmware timestamps for 32-bit
Arnd, > do_gettimeofday() is deprecated since it will stop working in 2038 on > 32-bit platforms, leading to incorrect times passed to the firmware. > On 64-bit platforms the current code appears to be fine, as the > calculation passes an 8-bit century number into the firmware that can > represent times long in the future (possibly until 25599). > > Using ktime_get_real_seconds() to get a 64-bit seconds value and > time64_to_tm() to convert it into the firmware format greatly > simplifies the ips timekeeping code, makes 32-bit and 64-bit behave > the same way here, and gets us closer to removing the deprecated > interfaces. Applied to 4.18/scsi-queue, thanks! -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH] [RESEND 2] scsi: esas2r: use ktime_get_real_seconds()
Arnd, > do_gettimeofday() is deprecated because of the y2038 overflow. Here, > we use the result to pass into a 32-bit field in the firmware, which > still risks an overflow, but if the firmware is written to expect > unsigned values, it can at least last until y2106, and there is not > much we can do about it. > > This changes do_gettimeofday() to ktime_get_real_seconds(), which at > least simplifies the code a bit, and avoids the deprecated > interface. I'm adding a comment about the overflow to document what > happens. Applied to 4.18/scsi-queue. Thanks! -- Martin K. Petersen Oracle Linux Engineering
[PATCH] fscrypt: use unbound workqueue for decryption
Improve fscrypt read performance by switching the decryption workqueue from bound to unbound. With the bound workqueue, when multiple bios completed on the same CPU, they were decrypted on that same CPU. But with the unbound queue, they are now decrypted in parallel on any CPU. Although fscrypt read performance can be tough to measure due to the many sources of variation, this change is most beneficial when decryption is slow, e.g. on CPUs without AES instructions. For example, I timed tarring up encrypted directories on f2fs. On x86 with AES-NI instructions disabled, the unbound workqueue improved performance by about 25-35%, using 1 to NUM_CPUs jobs with 4 or 8 CPUs available. But with AES-NI enabled, performance was unchanged to within ~2%. I also did the same test on a quad-core ARM CPU using xts-speck128-neon encryption. There performance was usually about 10% better with the unbound workqueue, bringing it closer to the unencrypted speed. The unbound workqueue may be worse in some cases due to worse locality, but I think it's still the better default. dm-crypt uses an unbound workqueue by default too, so this change makes fscrypt match. Signed-off-by: Eric Biggers --- fs/crypto/crypto.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index ce654526c0fb..984e190f9b89 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -427,8 +427,17 @@ int fscrypt_initialize(unsigned int cop_flags) */ static int __init fscrypt_init(void) { + /* +* Use an unbound workqueue to allow bios to be decrypted in parallel +* even when they happen to complete on the same CPU. This sacrifices +* locality, but it's worthwhile since decryption is CPU-intensive. +* +* Also use a high-priority workqueue to prioritize decryption work, +* which blocks reads from completing, over regular application tasks. +*/ fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue", - WQ_HIGHPRI, 0); +WQ_UNBOUND | WQ_HIGHPRI, +num_online_cpus()); if (!fscrypt_read_workqueue) goto fail; -- 2.17.0.484.g0c8726318c-goog
Re: [PATCH][V2] isci: Fix infinite loop in while loop
Colin, > In the case when the phy_mask is bitwise anded with the phy_index bit > is zero the continue statement currently jumps to the next iteration > of the while loop and phy_index is never actually incremented, > potentially causing an infinite loop if phy_index is less than > SCI_MAX_PHS. Fix this by turning the while loop into a for loop. Applied to 4.17/scsi-fixes. Thanks! -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH] perf tools: set kernel end address properly
On Fri, 20 Apr 2018 08:59:15 +0900 Namhyung Kim wrote: > The map_groups__fixup_end() was called to set end addresses of kernel > map and module maps. But now machine__create_modules() is set the end > address of modules properly, the only remaining piece is the kernel map. > We can set it with adjacent module's address directly instead of calling > the map_groups__fixup_end(). If there's no module after the kernel map, > the end address will be ~0ULL. > > Since it also changes the start address of the kernel map, it needs to > re-insert the map to the kmaps in order to keep a correct ordering. Kim > reported that it caused problems on ARM64. > > Reported-by: Kim Phillips > Signed-off-by: Namhyung Kim > --- Acked-by: Kim Phillips Thanks, Kim
Re: [PATCH v4 04/22] iommu/vt-d: add bind_pasid_table function
On Tue, 17 Apr 2018 13:10:47 -0600 Alex Williamson wrote: > On Mon, 16 Apr 2018 14:48:53 -0700 > Jacob Pan wrote: > > > Add Intel VT-d ops to the generic iommu_bind_pasid_table API > > functions. > > > > The primary use case is for direct assignment of SVM capable > > device. Originated from emulated IOMMU in the guest, the request > > goes through many layers (e.g. VFIO). Upon calling host IOMMU > > driver, caller passes guest PASID table pointer (GPA) and size. > > > > Device context table entry is modified by Intel IOMMU specific > > bind_pasid_table function. This will turn on nesting mode and > > matching translation type. > > > > The unbind operation restores default context mapping. > > > > Signed-off-by: Jacob Pan > > Signed-off-by: Liu, Yi L > > Signed-off-by: Ashok Raj > > --- > > drivers/iommu/intel-iommu.c | 119 > > ++ > > include/linux/dma_remapping.h | 1 + 2 files changed, 120 > > insertions(+) > > > > diff --git a/drivers/iommu/intel-iommu.c > > b/drivers/iommu/intel-iommu.c index a0f81a4..d8058be 100644 > > --- a/drivers/iommu/intel-iommu.c > > +++ b/drivers/iommu/intel-iommu.c > > @@ -2409,6 +2409,7 @@ static struct dmar_domain > > *dmar_insert_one_dev_info(struct intel_iommu *iommu, > > info->ats_supported = info->pasid_supported = info->pri_supported = > > 0; info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0; > > info->ats_qdep = 0; > > + info->pasid_table_bound = 0; > > info->dev = dev; > > info->domain = domain; > > info->iommu = iommu; > > @@ -5132,6 +5133,7 @@ static void > > intel_iommu_put_resv_regions(struct device *dev, > > #ifdef CONFIG_INTEL_IOMMU_SVM > > #define MAX_NR_PASID_BITS (20) > > +#define MIN_NR_PASID_BITS (5) > > static inline unsigned long intel_iommu_get_pts(struct intel_iommu > > *iommu) { > > /* > > @@ -5258,6 +5260,119 @@ struct intel_iommu > > *intel_svm_device_to_iommu(struct device *dev) > > return iommu; > > } > > + > > +static int intel_iommu_bind_pasid_table(struct iommu_domain > > *domain, > > + struct device *dev, struct pasid_table_config > > *pasidt_binfo) +{ > > Never validates pasidt_binfo->{version,bytes} > good catch, will do. > > + struct intel_iommu *iommu; > > + struct context_entry *context; > > + struct dmar_domain *dmar_domain = to_dmar_domain(domain); > > + struct device_domain_info *info; > > + struct pci_dev *pdev; > > + u8 bus, devfn, host_table_pasid_bits; > > + u16 did, sid; > > + int ret = 0; > > + unsigned long flags; > > + u64 ctx_lo; > > + > > + iommu = device_to_iommu(dev, &bus, &devfn); > > + if (!iommu) > > + return -ENODEV; > > + /* VT-d spec section 9.4 says pasid table size is encoded > > as 2^(x+5) */ > > + host_table_pasid_bits = intel_iommu_get_pts(iommu) + > > MIN_NR_PASID_BITS; > > + if (!pasidt_binfo || pasidt_binfo->pasid_bits > > > host_table_pasid_bits || > > + pasidt_binfo->pasid_bits < MIN_NR_PASID_BITS) { > > + pr_err("Invalid gPASID bits %d, host range %d - > > %d\n", > > + pasidt_binfo->pasid_bits, > > + MIN_NR_PASID_BITS, host_table_pasid_bits); > > + return -ERANGE; > > + } > > + if (!ecap_nest(iommu->ecap)) { > > + dev_err(dev, "Cannot bind PASID table, no nested > > translation\n"); > > + ret = -EINVAL; > > + goto out; > > + } > > Gratuitous use of pr_err, some of these look user reachable, for > instance can vfio know in advance the supported widths or can the user > trigger that pr_err at will? Yes, the current IOMMU sysfs for vt-d does show the content of capability registers so user could know in advance whether the nested mode is supported. But I think we are in need of some generic interface to enumerate IOMMU features. Here I am trying to prepare for the worst. Are you concerned about security if user can trigger that error at will? Sorry I didn't get the point. > Some of these errno values are also > maybe not as descriptive as they could be. For instance if the iommu > doesn't support nesting, that's not a calling argument error, that's > an unsupported device error, right? > your are right, that is not invalid argument. You mean use ENODEV? > > + pdev = to_pci_dev(dev); > > + sid = PCI_DEVID(bus, devfn); > > + info = dev->archdata.iommu; > > + > > + if (!info) { > > + dev_err(dev, "Invalid device domain info\n"); > > + ret = -EINVAL; > > + goto out; > > + } > > + if (info->pasid_table_bound) { > > + dev_err(dev, "Device PASID table already bound\n"); > > + ret = -EBUSY; > > + goto out; > > + } > > + if (!info->pasid_enabled) { > > + ret = pci_enable_pasid(pdev, info->pasid_supported > > & ~1); > > + if (ret) { > > + dev_err(dev, "Failed to enable PASID\n"); > > + goto out; > > + } > > + } > > + spi
Re: [PATCH v3 0/6] scsi: handle special return codes for ABORTED COMMAND
Martin, > Here is another attempt to handle the special return codes for ABORTED > COMMAND for certain SCSI devices. Following MKP's recommendation, I've > created two new BLIST flags, simplifying the code in scsi_error.c > compared to the previous versions. Rather than using "free" bits, I > increased the length of blist_flag_t to 64 bit, and used previously > unused bits. I also added checking for obsolete and unused bits. > > For the blist_flag_t size increase, I used sparse to try and avoid > regressions; that necessitated fixing sparse's errors for the current > code first. Much better, thanks for reworking this. Applied to 4.18/scsi-queue. -- Martin K. Petersen Oracle Linux Engineering
Re: [patch V2 0/8] rslib: Cleanup and VLA removal
On Thu, Apr 19, 2018 at 3:04 AM, Thomas Gleixner wrote: > Kees tried to get rid of the Variable Length Arrays in the Reed-Solomon > library by replacing them with fixed length arrays on stack. Though they > are rather large and Andrew did not fall in love with that solution. > > This series addresses that in a different way by splitting the rs control > structure up, so that each invocation of rs_init() returns a new instance > in which the decoder buffers are allocated. The polynom tables which build > the base for the RS codecs are still shareable between the instances to > avoid large allocations and initializations of the same data over and over. > > The usage sites have been audited and fixed up where necessary to > accomodate the decoder change which forbids parallel decoder invocation for > a particular rs control instance to prevent buffer corruption. > > While at it the patch set tidies up the code and converts the related files > over to use SPDX license identifiers. > > Changes since V1: > >Simplify error path in the diskonchip code and use the proper >function to free the decoder. > > As this spawns multiple subsystems it should either go through Andrews tree > or Kees can route it with his other hardening stuff. I can start collecting ignored VLA patches. I haven't done that yet as most maintainers have been taking them. There are only a handful unapplied. Should I start the tree with rslib? :) Either way: Reviewed-by: Kees Cook -Kees -- Kees Cook Pixel Security
[PATCH v4] mtd: spi-nor: clear Winbond Extended Address Reg on switch to 3-byte addressing.
Winbond spi-nor flash 32MB and larger have an 'Extended Address Register' as one option for addressing beyond 16MB (Macronix has the same concept, Spansion has EXTADD bits in the Bank Address Register). According to section 8.2.7 Write Extended Address Register (C5h) of the Winbond W25Q256FV data sheet (256M-BIT SPI flash) The Extended Address Register is only effective when the device is in the 3-Byte Address Mode. When the device operates in the 4-Byte Address Mode (ADS=1), any command with address input of A31-A24 will replace the Extended Address Register values. It is recommended to check and update the Extended Address Register if necessary when the device is switched from 4-Byte to 3-Byte Address Mode. So the documentation suggests clearing the EAR after switching to 3-byte mode. Experimentation shows that the EAR is *always* one after the switch to 3-byte mode, so clearing the EAR is mandatory at shutdown for a subsequent 3-byte-addressed reboot to work. Note that some SOCs (e.g. MT7621) do not assert a reset line at normal reboot, so we cannot rely on hardware reset. The MT7621 does assert a reset line at watchdog-reset. Acked-by: Marek Vasut Signed-off-by: NeilBrown --- Changes since v3: Removed Fixes/stable tags. Added Acked-by from Marek. Changes sinc3 v2: Fixed comment style. drivers/mtd/spi-nor/spi-nor.c | 14 ++ include/linux/mtd/spi-nor.h | 2 ++ 2 files changed, 16 insertions(+) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 5bfa36e95f35..42ae9a1529bb 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -284,6 +284,20 @@ static inline int set_4byte(struct spi_nor *nor, const struct flash_info *info, if (need_wren) write_disable(nor); + if (!status && !enable && + JEDEC_MFR(info) == SNOR_MFR_WINBOND) { + /* +* On Winbond W25Q256FV, leaving 4byte mode causes +* the Extended Address Register to be set to 1, so all +* 3-byte-address reads come from the second 16M. +* We must clear the register to enable normal behavior. +*/ + write_enable(nor); + nor->cmd_buf[0] = 0; + nor->write_reg(nor, SPINOR_OP_WREAR, nor->cmd_buf, 1); + write_disable(nor); + } + return status; default: /* Spansion style */ diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index de36969eb359..e60da0d34cc1 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -62,6 +62,8 @@ #define SPINOR_OP_RDCR 0x35/* Read configuration register */ #define SPINOR_OP_RDFSR0x70/* Read flag status register */ #define SPINOR_OP_CLFSR0x50/* Clear flag status register */ +#define SPINOR_OP_RDEAR0xc8/* Read Extended Address Register */ +#define SPINOR_OP_WREAR0xc5/* Write Extended Address Register */ /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ #define SPINOR_OP_READ_4B 0x13/* Read data bytes (low frequency) */ -- 2.14.0.rc0.dirty signature.asc Description: PGP signature
Re: Representative Needed.
Good day, I am seeking your concept with great gratitude to present you as a representative to carry out business transactions with a reasonable share upon your interest and cooperation to work with us in trust. If interested please get back. Regards Kingsley --- This email has been checked for viruses by Avast antivirus software. https://www.avast.com/antivirus
Re: [PATCH v2] mtd: spi-nor: clear Winbond Extended Address Reg on switch to 3-byte addressing.
On Sat, Apr 21 2018, Boris Brezillon wrote: > On Sat, 21 Apr 2018 07:28:19 +1000 > NeilBrown wrote: > >> On Fri, Apr 20 2018, Boris Brezillon wrote: >> >> > Hi Neil, >> > >> > On Mon, 16 Apr 2018 09:42:30 +1000 >> > NeilBrown wrote: >> > >> >> Winbond spi-nor flash 32MB and larger have an 'Extended Address >> >> Register' as one option for addressing beyond 16MB (Macronix >> >> has the same concept, Spansion has EXTADD bits in the Bank Address >> >> Register). >> >> >> >> According to section >> >>8.2.7 Write Extended Address Register (C5h) >> >> >> >> of the Winbond W25Q256FV data sheet (256M-BIT SPI flash) >> >> >> >>The Extended Address Register is only effective when the device is >> >>in the 3-Byte Address Mode. When the device operates in the 4-Byte >> >>Address Mode (ADS=1), any command with address input of A31-A24 >> >>will replace the Extended Address Register values. It is >> >>recommended to check and update the Extended Address Register if >> >>necessary when the device is switched from 4-Byte to 3-Byte Address >> >>Mode. >> >> >> >> So the documentation suggests clearing the EAR after switching to >> >> 3-byte mode. Experimentation shows that the EAR is *always* one after >> >> the switch to 3-byte mode, so clearing the EAR is mandatory at >> >> shutdown for a subsequent 3-byte-addressed reboot to work. >> >> >> >> Note that some SOCs (e.g. MT7621) do not assert a reset line at normal >> >> reboot, so we cannot rely on hardware reset. The MT7621 does assert a >> >> reset line at watchdog-reset. >> >> >> >> Signed-off-by: NeilBrown >> > >> > We should probably backport the fix. Can you add a Fixes and Cc-stable >> > tag? >> >> It's a bit weird having Fixes when this isn't a regression, but I guess >> it doesn't hurt. > > Well, I thought you wanted this patch to be backported to stable > releases, hence my suggestion. Of course it's not a regression, since > the bug is here from the beginning, but nonetheless it's fixing a bug. I have not interest in this patch going to stable. I'll be perfectly happy once it lands upstream. > >> I chose >> Fixes: 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when >> exiting") >> as this patch it useless without that one. > > Not sure that's how Fixes should be used. IIUC, it should point to the > first commit where the bug was introduced, so I guess it's 0aa87b7563f1 > ("mtd: m25p80: add support for the windbond w25q256 chip") in this case. That does make a certain sort of sense, but if you tried applying this patch to that kernel, it wouldn't apply at all. So it is hard to say that it fixes anything there. I think it is best to drop the fixes/stable tags. It isn't a regression, doesn't cause data corruption, doesn't make it possible to crash the kernel, so it isn't really "stable" material in my mind. Thanks, NeilBrown > > Now, if you want to restrict the kernel releases this patch should be > backported to, you can use the '# vX.Y+' suffix in your Cc-stable tag. > >> >> I also fixed the comment and have resent. > > Thanks. signature.asc Description: PGP signature
Re: [PATCH v3 0/7] MIPS: perf: MT fixes and improvements
On 04/20/2018 03:23 AM, Matt Redfearn wrote: > This series addresses a few issues with how the MIPS performance > counters code supports the hardware multithreading MT ASE. > > Firstly, implementations of the MT ASE may implement performance > counters > per core or per thread(TC). MIPS Techologies implementations signal this > via a bit in the implmentation specific CONFIG7 register. Since this > register is implementation specific, checking it should be guarded by a > PRID check. This also replaces a bit defined by a magic number. > > Secondly, the code currently uses vpe_id(), defined as > smp_processor_id(), divided by 2, to share core performance counters > between VPEs. This relies on a couple of assumptions of the hardware > implementation to function correctly (always 2 VPEs per core, and the > hardware reading only the least significant bit). > > Finally, the method of sharing core performance counters between VPEs is > suboptimal since it does not allow one process running on a VPE to use > all of the performance counters available to it, because the kernel will > reserve half of the coutners for the other VPE even if it may never use > them. This reservation at counter probe is replaced with an allocation > on use strategy. > > Tested on a MIPS Creator CI40 (2C2T MIPS InterAptiv with per-TC > counters, though for the purposes of testing the per-TC availability was > hardcoded to allow testing both paths). > > Series applies to v4.16 Sorry it took so long to get that tested. Sounds like you need to build test this on a BMIPS5000 configuration (bmips_stb_defconfig should provide that): In file included from ./arch/mips/include/asm/mach-generic/spaces.h:15:0, from ./arch/mips/include/asm/mach-bmips/spaces.h:16, from ./arch/mips/include/asm/addrspace.h:13, from ./arch/mips/include/asm/barrier.h:11, from ./include/linux/compiler.h:245, from ./include/linux/kernel.h:10, from ./include/linux/cpumask.h:10, from arch/mips/kernel/perf_event_mipsxx.c:18: arch/mips/kernel/perf_event_mipsxx.c: In function 'mipsxx_pmu_enable_event': ./arch/mips/include/asm/mipsregs.h:738:52: error: suggest parentheses around '+' in operand of '&' [-Werror=parentheses] #define BRCM_PERFCTRL_VPEID(v) (_ULCAST_(1) << (12 + v)) arch/mips/kernel/perf_event_mipsxx.c:385:10: note: in expansion of macro 'BRCM_PERFCTRL_VPEID' ctrl = BRCM_PERFCTRL_VPEID(cpu & MIPS_CPUID_TO_COUNTER_MASK); ^~~ CC drivers/of/fdt_addres after fixing that, I tried the following to see whether this would be a good test case to exercise against: perf record -a -C 0 taskset -c 1 /bin/true perf record -a -C 1 taskset -c 0 /bin/true and would not see anything related to /bin/true running in either case, which seems like it does the right thing? Tested-by: Florian Fainelli BTW, for some reason not specifying -a -C does lead to lockups, consistently and for pretty much any perf command, this could be BMIPS specific, did not get a chance to cross test on a different machine. > > > Changes in v3: > New patch to detect feature presence in cpu-probe.c > Use flag in cpu_data set by cpu_probe.c to indicate feature presence. > - rebase on new feature detection > > Changes in v2: > Fix mipsxx_pmu_enable_event for !#ifdef CONFIG_MIPS_MT_SMP > - Fix !#ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS build > - re-use cpuc variable in mipsxx_pmu_alloc_counter, > mipsxx_pmu_free_counter rather than having sibling_ version. > Since BMIPS5000 does not implement per TC counters, we can remove the > check on cpu_has_mipsmt_pertccounters. > New patch to fix BMIPS5000 system mode perf. > > Matt Redfearn (7): > MIPS: Probe for MIPS MT perf counters per TC > MIPS: perf: More robustly probe for the presence of per-tc counters > MIPS: perf: Use correct VPE ID when setting up VPE tracing > MIPS: perf: Fix perf with MT counting other threads > MIPS: perf: Allocate per-core counters on demand > MIPS: perf: Fold vpe_id() macro into it's one last usage > MIPS: perf: Fix BMIPS5000 system mode counting > > arch/mips/include/asm/cpu-features.h | 7 ++ > arch/mips/include/asm/cpu.h | 2 + > arch/mips/include/asm/mipsregs.h | 6 + > arch/mips/kernel/cpu-probe.c | 12 ++ > arch/mips/kernel/perf_event_mipsxx.c | 232 > +++ > arch/mips/oprofile/op_model_mipsxx.c | 2 - > 6 files changed, 150 insertions(+), 111 deletions(-) > -- Florian
Re: [PATCH v6 01/16] initrd: Add weakly-linked generic free_initrd_mem.
Hi Palmer, Palmer Dabbelt writes: > On Wed, 18 Apr 2018 04:10:16 PDT (-0700), s...@shealevy.com wrote: >> Hi all, >> >> Shea Levy writes: >> >>> This function is effectively identical across 14 architectures, and >>> the generic implementation is small enough to be negligible in the >>> architectures that do override it. Many of the remaining divergent >>> implementations can be included in the common code path in future, >>> further reducing code duplication and sharing improvements between >>> architectures. >>> >>> Series boot-tested on RISC-V (which now uses the generic >>> implementation) and x86_64 (which doesn't). >>> >>> v6: Add information about build/run testing. >>> v5: Add more complete commit messages. >>> v4: Use weak symbols instead of Kconfig. >>> v3: Make the generic path opt-out instead of opt-in. >>> v2: Mark generic free_initrd_mem __init. >>> >>> Signed-off-by: Shea Levy >>> --- >>> init/initramfs.c | 5 + >>> 1 file changed, 5 insertions(+) >>> >>> diff --git a/init/initramfs.c b/init/initramfs.c >>> index 7e99a0038942..c8fe150f958a 100644 >>> --- a/init/initramfs.c >>> +++ b/init/initramfs.c >>> @@ -526,6 +526,11 @@ extern unsigned long __initramfs_size; >>> #include >>> #include >>> >>> +void __init __weak free_initrd_mem(unsigned long start, unsigned long end) >>> +{ >>> + free_reserved_area((void *)start, (void *)end, -1, "initrd"); >>> +} >>> + >>> static void __init free_initrd(void) >>> { >>> #ifdef CONFIG_KEXEC_CORE >>> -- >>> 2.16.2 >> >> This series has been quiet for a few weeks other than picking up some >> arch-specific acks. What is the next step here? > > I'm not sure. I don't really think it's sane for the RISC-V tree because it > touches so many architectures -- I haven't looked closely, though. Yeah, I think that makes sense. > IIRC > there's a slight behavior change to the RISC-V port, which I'd be OK taking > through my tree (and then obviously the RISC-V cleanup as well, unless it > goes > in as a whole patch set). > So currently the behavior for RISC-V is changed by simply deleting the arch-specific free_initrd_mem, which was a noop. Would you like me to first submit a patch to have the arch-specific free_initrd_mem and then change that in this series? > > For the IRQ cleanup I currently have in flight > > * Add the generic support > * Move every arch over (RISC-V is in, the rest aren't yet) > * Clean up a bit now that everyone is generic > > That lets all the arch-specific patches go in parallel, but can be a bit of a > headache to manage. With the current series, the first patch could go in on its own and all of the arch-specific patches can go in in parallel if we wanted to, but beyond the above-suggested implementation of the RISC-V free_initrd_mem there's no real reordering meaningful here. > > I'm adding Arnd and Olof, as they know a lot more about Linux than I do. > Here's the top-level of the v4 patch set: https://lkml.org/lkml/2018/3/28/744 And here's the top-level of v6, the latest: https://lkml.org/lkml/2018/4/1/50 signature.asc Description: PGP signature
Re: Representative Needed.
Good day, I am seeking your concept with great gratitude to present you as a representative to carry out business transactions with a reasonable share upon your interest and cooperation to work with us in trust. If interested please get back. Regards Kingsley --- This email has been checked for viruses by Avast antivirus software. https://www.avast.com/antivirus
Re: [PATCH 1/2] regulator: add QCOM RPMh regulator driver
On Fri, Apr 20 2018 at 13:07 -0600, David Collins wrote: On 04/18/2018 10:55 PM, Stephen Boyd wrote: Quoting David Collins (2018-03-22 18:30:06) On 03/21/2018 12:07 PM, Stephen Boyd wrote: Quoting David Collins (2018-03-16 18:09:10) diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 097f617..e0ecd0a 100644 + ret = cmd_db_ready(); + if (ret < 0) { + if (ret != -EPROBE_DEFER) + dev_err(dev, "Command DB not available, ret=%d\n", ret); + return ret; + } We should just make rpmh parent device call cmd_db_ready() so that these devices aren't even populated until then and so that cmd_db_ready() is only in one place. Lina? Let's see if Lina has qualms about this plan. Sounds like you're ok with it. Sure, I'll remove this check if Lina agrees to add it in the rpmh driver. We want to make the RSC nodes child of Command DB? That way we probe the controllers only if the command DB is ready? I could do that. Just so you know, there is are no strict directives to use Command DB. If a driver knows the information it needs to pass to the accelerator, it may choose to skip command DB completely. Thanks, Lina
Re: [PATCH] bsg referencing bus driver module
> This patch isn't applyable because your mailer has changed all the tabs > to spaces. > > I also think there's no need to do it this way. I think what we need > is for fc_bsg_remove() to wait until the bsg queue is drained. It does > look like the author thought this happened otherwise the code wouldn't > have the note. If we fix it that way we can do the same thing in all > the other transport classes that use bsg (which all have a similar > issue). > > James > Thanks, James. Sorry about the tabs; re-sending. On fc_bsg_remove()...: are you suggesting to implement the whole fix in scsi_transport_fc.c? That would be nice, but I do not see how that is possible. Even with the queue drained bsg still holds a reference to the Scsi_Host via bsg_class_device; bsg_class_device itself is referenced on bsg_open and kept around while a user-mode process keeps a handle to bsg. Even if we somehow implement the waiting the call may be stuck forever if the user-mode process keeps the handle. I think handling it via a rererence to the module is more consistent with the way things are done in Linux. You suggested the approach youself back in "Waiting for scsi_host_template release" discussion. >From df939b80d02bf37b21efaaef8ede86cfd39b0cb8 Mon Sep 17 00:00:00 2001 From: Anatoliy Glagolev Date: Thu, 19 Apr 2018 15:06:06 -0600 Subject: [PATCH] bsg referencing parent module Fixing a bug when bsg layer holds the last reference to device when the device's module has been unloaded. Upon dropping the reference the device's release function may touch memory of the unloaded module. --- block/bsg-lib.c | 24 ++-- block/bsg.c | 29 ++--- drivers/scsi/scsi_transport_fc.c | 8 ++-- include/linux/bsg-lib.h | 4 include/linux/bsg.h | 5 + 5 files changed, 63 insertions(+), 7 deletions(-) diff --git a/block/bsg-lib.c b/block/bsg-lib.c index fc2e5ff..90c28fd 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -309,6 +309,25 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, bsg_job_fn *job_fn, int dd_job_size, void (*release)(struct device *)) { + return bsg_setup_queue_ex(dev, name, job_fn, dd_job_size, release, + NULL); +} +EXPORT_SYMBOL_GPL(bsg_setup_queue); + +/** + * bsg_setup_queue - Create and add the bsg hooks so we can receive requests + * @dev: device to attach bsg device to + * @name: device to give bsg device + * @job_fn: bsg job handler + * @dd_job_size: size of LLD data needed for each job + * @release: @dev release function + * @dev_module: @dev's module + */ +struct request_queue *bsg_setup_queue_ex(struct device *dev, const char *name, + bsg_job_fn *job_fn, int dd_job_size, + void (*release)(struct device *), + struct module *dev_module) +{ struct request_queue *q; int ret; @@ -331,7 +350,8 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, blk_queue_softirq_done(q, bsg_softirq_done); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); - ret = bsg_register_queue(q, dev, name, &bsg_transport_ops, release); + ret = bsg_register_queue_ex(q, dev, name, &bsg_transport_ops, release, + dev_module); if (ret) { printk(KERN_ERR "%s: bsg interface failed to " "initialize - register queue\n", dev->kobj.name); @@ -343,4 +363,4 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, blk_cleanup_queue(q); return ERR_PTR(ret); } -EXPORT_SYMBOL_GPL(bsg_setup_queue); +EXPORT_SYMBOL_GPL(bsg_setup_queue_ex); diff --git a/block/bsg.c b/block/bsg.c index defa06c..6920c5b 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -750,7 +750,8 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q) return bd; } -static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file) +static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file, + struct bsg_class_device **pbcd) { struct bsg_device *bd; struct bsg_class_device *bcd; @@ -766,6 +767,7 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file) if (!bcd) return ERR_PTR(-ENODEV); + *pbcd = bcd; bd = __bsg_get_device(iminor(inode), bcd->queue); if (bd) @@ -781,22 +783,34 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file) static int bsg_open(struct inode *inode, struct file *file) { struct bsg_device *bd; + struct bsg_class_device *bcd; - bd = bsg_get_device(inode, file); + bd = bsg_get_device(inode, file, &bcd); if (IS_ERR(bd)) return PTR_ERR(bd); file->private_data = bd; + if (bcd->parent_module) {
[PATCH 3/6] x86/intel_rdt/mba_sc: Add initialization support
When MBA software controller is enabled, we need a per domain storage for user specified bandwidth in "MBps" and the "percentage" values which are programmed into the IA32_MBA_THRTL_MSR. Add support for these data structures and initialization. The MBA percentage values have a default max value of 100 but however the max value in MBps is not available from the hardware and we keep it just at U32_MAX. This simply says that user can use all bandwidth by default but does not say what is the actual max bandwidth available. The actual bandwidth that is available may depend on lot of factors like QPI link, number of memory channels, memory channel frequency, its width and memory speed, how many channels are configured and also if memory interleaving is enabled. Signed-off-by: Vikas Shivappa --- arch/x86/kernel/cpu/intel_rdt.c | 37 +++- arch/x86/kernel/cpu/intel_rdt.h | 3 +++ arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 3 +++ 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 2a0931f..07f6b3b 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -35,6 +35,7 @@ #define MAX_MBA_BW 100u #define MBA_IS_LINEAR 0x4 +#define MBA_MAX_MBPS U32_MAX /* Mutex to protect rdtgroup access. */ DEFINE_MUTEX(rdtgroup_mutex); @@ -439,25 +440,40 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, return NULL; } +void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm) +{ + int i; + + /* +* Initialize the Control MSRs to having no control. +* For Cache Allocation: Set all bits in cbm +* For Memory Allocation: Set b/w requested to 100% +* and the bandwidth in MBps to U32_MAX +*/ + for (i = 0; i < r->num_closid; i++, dc++, dm++) { + *dc = r->default_ctrl; + *dm = MBA_MAX_MBPS; + } +} + static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) { struct msr_param m; - u32 *dc; - int i; + u32 *dc, *dm; dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL); if (!dc) return -ENOMEM; - d->ctrl_val = dc; + dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL); + if (!dm) { + kfree(dc); + return -ENOMEM; + } - /* -* Initialize the Control MSRs to having no control. -* For Cache Allocation: Set all bits in cbm -* For Memory Allocation: Set b/w requested to 100 -*/ - for (i = 0; i < r->num_closid; i++, dc++) - *dc = r->default_ctrl; + d->ctrl_val = dc; + d->mbps_val = dm; + setup_default_ctrlval(r, dc, dm); m.low = 0; m.high = r->num_closid; @@ -596,6 +612,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) } kfree(d->ctrl_val); + kfree(d->mbps_val); kfree(d->rmid_busy_llc); kfree(d->mbm_total); kfree(d->mbm_local); diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 74aee0f..91cc310 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -202,6 +202,7 @@ struct mbm_state { * @cqm_work_cpu: * worker cpu for CQM h/w counters * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID) + * @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps * @new_ctrl: new ctrl value to be loaded * @have_new_ctrl: did user provide new_ctrl for this domain */ @@ -217,6 +218,7 @@ struct rdt_domain { int mbm_work_cpu; int cqm_work_cpu; u32 *ctrl_val; + u32 *mbps_val; u32 new_ctrl; boolhave_new_ctrl; }; @@ -448,6 +450,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms); void mbm_handle_overflow(struct work_struct *work); bool is_mba_sc(struct rdt_resource *r); +void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm); void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms); void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index b9ceb13..bfd707d 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1055,12 +1055,15 @@ static int set_cache_qos_cfg(int level, bool enable) static int set_mba_sc(bool mba_sc) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA]; + struct rdt_domain
[PATCH V2 0/6] Memory bandwidth allocation software controller(mba_sc)
Sending the second version of MBA software controller which addresses the feedback on V1. Thanks to the feedback from Thomas on the V1. Thomas was unhappy about the bad structure and english in the documentation and comments explaining the changes and also about duct taping of data structure which saves the throttle MSRs. Also issues were pointed out in the mounting and other init code. This series also changed the counting and feedback loop patches with some improvements to not do any division and take care of hierarchy and some l2 -> l3 traffic scenarios. The patches are based on 4.16. Background: Intel RDT memory bandwidth allocation (MBA) currently uses the resctrl interface and uses the schemata file in each rdtgroup to specify the max "bandwidth percentage" that is allowed to be used by the "threads" and "cpus" in the rdtgroup. These values are specified "per package" in each rdtgroup in the schemata file as below: $ cat /sys/fs/resctrl/p1/schemata L3:0=7ff;1=7ff MB:0=100;1=50 In the above example the MB is the memory bandwidth percentage and "0" and "1" specify the package/socket ids. The threads in rdtgroup "p1" would get 100% memory bandwidth on socket0 and 50% bandwidth on socket1. Problem: However there are confusions in specifying the MBA in "percentage": 1. In some scenarios, when user increases bandwidth percentage values he does not not see any raw bandwidth increase in "MBps" 2. Same bandwidth "percentage values" may mean different raw bandwidth in "MBps". 3. This interface may also end up unnecessarily controlling the L2 <-> L3 traffic which has no or very minimal L3 external traffic. Proposed solution: In short, we let user specify the bandwidth in "MBps" and we introduce a software feedback loop which measures the bandwidth using MBM and restricts the bandwidth "percentage" internally. The fact that Memory bandwidth allocation(MBA) is a core specific mechanism where as memory bandwidth monitoring(MBM) is done at the package level is what leads to confusion when users try to apply control via the MBA and then monitor the bandwidth to see if the controls are effective. Below are details on such scenarios: 1. User may *not* see increase in actual bandwidth when bandwidth percentage values are increased: This can occur when aggregate L2 external bandwidth is more than L3 external bandwidth. Consider an SKL SKU with 24 cores on a package and where L2 external is 10GBps (hence aggregate L2 external bandwidth is 240GBps) and L3 external bandwidth is 100GBps. Now a workload with '20 threads, having 50% bandwidth, each consuming 5GBps' consumes the max L3 bandwidth of 100GBps although the percentage value specified is only 50% << 100%. Hence increasing the bandwidth percentage will not yield any more bandwidth. This is because although the L2 external bandwidth still has capacity, the L3 external bandwidth is fully used. Also note that this would be dependent on number of cores the benchmark is run on. 2. Same bandwidth percentage may mean different actual bandwidth depending on # of threads: For the same SKU in #1, a 'single thread, with 10% bandwidth' and '4 thread, with 10% bandwidth' can consume upto 10GBps and 40GBps although they have same percentage bandwidth of 10%. This is simply because as threads start using more cores in an rdtgroup, the actual bandwidth may increase or vary although user specified bandwidth percentage is same. In order to mitigate this and make the interface more user friendly, resctrl added support for specifying the bandwidth in "MBps" as well. The kernel underneath would use a software feedback mechanism or a "Software Controller" which reads the actual bandwidth using MBM counters and adjust the memory bandwidth percentages to ensure "actual bandwidth < user specified bandwidth". By default, the schemata would take the bandwidth percentage values where as user can switch to the "MBA software controller" mode using a mount option 'mba_MBps'. The schemata format is specified in the below. To use the feature mount the file system using mba_MBps option: $ mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl If the MBA is specified in MBps then user can enter the max bandwidth in MBps rather than the percentage values. The default value when mounted is max_u32. $ echo "L3:0=3;1=c\nMB:0=1024;1=500" > /sys/fs/resctrl/p0/schemata $ echo "L3:0=3;1=3\nMB:0=1024;1=500" > /sys/fs/resctrl/p1/schemata In the above example the tasks in "p1" and "p0" rdtgroup would use a max bandwidth of 1024MBps on socket0 and 500MBps on socket1. Vikas Shivappa (6): x86/intel_rdt/mba_sc: Documentation for MBA software controller(mba_sc) x86/intel_rdt/mba_sc: Enable/disable MBA software controller(mba_sc) x86/intel_rdt/mba_sc: Add initialization support x86/intel_rdt/mba_sc: Add schemata support x86/intel_rdt/mba_sc: Prepare for feedback loop x86/intel_rdt/mba_sc: Feedback loop to dynamically update mem bandwidth Do
[PATCH 5/6] x86/intel_rdt/mba_sc: Prepare for feedback loop
This is a preparatory patch for the mba feedback loop. We add support to measure the "bandwidth in MBps" and the "delta bandwidth". We measure it reading the MBM IA32_QM_CTR MSRs and calculating the amount of "bytes" moved. There is no user interface for this and will only be used by the feedback loop patch. Signed-off-by: Vikas Shivappa --- arch/x86/kernel/cpu/intel_rdt.h | 10 arch/x86/kernel/cpu/intel_rdt_monitor.c | 45 - 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 91cc310..66a0ba3 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -180,10 +180,20 @@ struct rftype { * struct mbm_state - status for each MBM counter in each domain * @chunks:Total data moved (multiply by rdt_group.mon_scale to get bytes) * @prev_msr Value of IA32_QM_CTR for this RMID last time we read it + * @chunks_bw Total local data moved. Used for bandwidth calculation + * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting + * @prev_bwThe most recent bandwidth in MBps + * @delta_bw Difference between the current and previous bandwidth + * @delta_comp Indicates whether to compute the delta_bw */ struct mbm_state { u64 chunks; u64 prev_msr; + u64 chunks_bw; + u64 prev_bw_msr; + u32 prev_bw; + u32 delta_bw; + booldelta_comp; }; /** diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c index 681450e..32c9e55 100644 --- a/arch/x86/kernel/cpu/intel_rdt_monitor.c +++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c @@ -225,10 +225,18 @@ void free_rmid(u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } +static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr) +{ + u64 shift = 64 - MBM_CNTR_WIDTH, chunks; + + chunks = (cur_msr << shift) - (prev_msr << shift); + return chunks >>= shift; +} + static int __mon_event_count(u32 rmid, struct rmid_read *rr) { - u64 chunks, shift, tval; struct mbm_state *m; + u64 chunks, tval; tval = __rmid_read(rmid, rr->evtid); if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { @@ -254,14 +262,12 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) } if (rr->first) { - m->prev_msr = tval; - m->chunks = 0; + memset(m, 0, sizeof(struct mbm_state)); + m->prev_bw_msr = m->prev_msr = tval; return 0; } - shift = 64 - MBM_CNTR_WIDTH; - chunks = (tval << shift) - (m->prev_msr << shift); - chunks >>= shift; + chunks = mbm_overflow_count(m->prev_msr, tval); m->chunks += chunks; m->prev_msr = tval; @@ -270,6 +276,33 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) } /* + * Supporting function to calculate the memory bandwidth + * and delta bandwidth in MBps. + */ +static void mbm_bw_count(u32 rmid, struct rmid_read *rr) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + struct mbm_state *m = &rr->d->mbm_local[rmid]; + u64 tval, cur_bw, chunks; + + tval = __rmid_read(rmid, rr->evtid); + if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { + return; + } + + chunks = mbm_overflow_count(m->prev_bw_msr, tval); + m->chunks_bw += chunks; + m->chunks = m->chunks_bw; + cur_bw = (chunks * r->mon_scale) >> 20; + + if (m->delta_comp) + m->delta_bw = abs(cur_bw - m->prev_bw); + m->delta_comp = false; + m->prev_bw = cur_bw; + m->prev_bw_msr = tval; +} + +/* * This is called via IPI to read the CQM/MBM counters * on a domain. */ -- 1.9.1
[PATCH 2/6] x86/intel_rdt/mba_sc: Enable/disable MBA software controller
Currently user does memory bandwidth allocation(MBA) by specifying the bandwidth in percentage via the resctrl schemata file: "/sys/fs/resctrl/schemata" Add a new mount option "mba_MBps" to enable the user to specify MBA in MBps: $mount -t resctrl resctrl [-o cdp[,cdpl2][mba_MBps]] /sys/fs/resctrl Signed-off-by: Vikas Shivappa --- arch/x86/kernel/cpu/intel_rdt.c | 8 arch/x86/kernel/cpu/intel_rdt.h | 3 +++ arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 30 ++ 3 files changed, 41 insertions(+) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 2b65601..2a0931f 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -230,6 +230,14 @@ static inline void cache_alloc_hsw_probe(void) rdt_alloc_capable = true; } +bool is_mba_sc(struct rdt_resource *r) +{ + if (!r) + return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc; + + return r->membw.mba_sc; +} + /* * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values * exposed to user interface and the h/w understandable delay values. diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 3fd7a70..74aee0f 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -259,6 +259,7 @@ struct rdt_cache { * @min_bw:Minimum memory bandwidth percentage user can request * @bw_gran: Granularity at which the memory bandwidth is allocated * @delay_linear: True if memory B/W delay is in linear scale + * @mba_sc:True if MBA software controller(mba_sc) is enabled * @mb_map:Mapping of memory B/W percentage to memory B/W delay */ struct rdt_membw { @@ -266,6 +267,7 @@ struct rdt_membw { u32 min_bw; u32 bw_gran; u32 delay_linear; + boolmba_sc; u32 *mb_map; }; @@ -445,6 +447,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_domain *d, void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms); void mbm_handle_overflow(struct work_struct *work); +bool is_mba_sc(struct rdt_resource *r); void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms); void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index fca759d..b9ceb13 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1005,6 +1005,11 @@ static void l2_qos_cfg_update(void *arg) wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); } +static inline bool is_mba_linear(void) +{ + return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear; +} + static int set_cache_qos_cfg(int level, bool enable) { void (*update)(void *arg); @@ -1041,6 +1046,25 @@ static int set_cache_qos_cfg(int level, bool enable) return 0; } +/* + * Enable or disable the MBA software controller + * which helps user specify bandwidth in MBps. + * MBA software controller is supported only if + * MBM is supported and MBA is in linear scale. + */ +static int set_mba_sc(bool mba_sc) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA]; + + if (!is_mbm_enabled() || !is_mba_linear() || + mba_sc == is_mba_sc(r)) + return -1; + + r->membw.mba_sc = mba_sc; + + return 0; +} + static int cdp_enable(int level, int data_type, int code_type) { struct rdt_resource *r_ldata = &rdt_resources_all[data_type]; @@ -1123,6 +1147,10 @@ static int parse_rdtgroupfs_options(char *data) ret = cdpl2_enable(); if (ret) goto out; + } else if (!strcmp(token, "mba_MBps")) { + ret = set_mba_sc(true); + if (ret) + goto out; } else { ret = -EINVAL; goto out; @@ -1445,6 +1473,8 @@ static void rdt_kill_sb(struct super_block *sb) cpus_read_lock(); mutex_lock(&rdtgroup_mutex); + set_mba_sc(false); + /*Put everything back to default values. */ for_each_alloc_enabled_rdt_resource(r) reset_all_ctrls(r); -- 1.9.1
[PATCH 4/6] x86/intel_rdt/mba_sc: Add schemata support
Currently when user updates the "schemata" with new MBA percentage values, kernel writes the corresponding bandwidth percentage values to the IA32_MBA_THRTL_MSR. When MBA is expressed in MBps, the schemata format is changed to have the per package memory bandwidth in MBps instead of being specified in percentage. We do not write the IA32_MBA_THRTL_MSRs when the schemata is updated as that is handled separately. Signed-off-by: Vikas Shivappa --- arch/x86/kernel/cpu/intel_rdt.c | 2 +- arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 24 +++- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 07f6b3b..85805d7 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -179,7 +179,7 @@ struct rdt_resource rdt_resources_all[] = { .msr_update = mba_wrmsr, .cache_level= 3, .parse_ctrlval = parse_bw, - .format_str = "%d=%*d", + .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, }, }; diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index 23e1d5c..116d57b 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -53,7 +53,8 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) return false; } - if (bw < r->membw.min_bw || bw > r->default_ctrl) { + if ((bw < r->membw.min_bw || bw > r->default_ctrl) && + !is_mba_sc(r)) { rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, r->membw.min_bw, r->default_ctrl); return false; @@ -179,6 +180,8 @@ static int update_domains(struct rdt_resource *r, int closid) struct msr_param msr_param; cpumask_var_t cpu_mask; struct rdt_domain *d; + bool mba_sc; + u32 *dc; int cpu; if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) @@ -188,13 +191,20 @@ static int update_domains(struct rdt_resource *r, int closid) msr_param.high = msr_param.low + 1; msr_param.res = r; + mba_sc = is_mba_sc(r); list_for_each_entry(d, &r->domains, list) { - if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) { + dc = !mba_sc ? d->ctrl_val : d->mbps_val; + if (d->have_new_ctrl && d->new_ctrl != dc[closid]) { cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); - d->ctrl_val[closid] = d->new_ctrl; + dc[closid] = d->new_ctrl; } } - if (cpumask_empty(cpu_mask)) + + /* +* Avoid writing the control msr with control values when +* MBA software controller is enabled +*/ + if (cpumask_empty(cpu_mask) || mba_sc) goto done; cpu = get_cpu(); /* Update CBM on this cpu if it's in cpu_mask. */ @@ -282,13 +292,17 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid) { struct rdt_domain *dom; bool sep = false; + u32 ctrl_val; seq_printf(s, "%*s:", max_name_width, r->name); list_for_each_entry(dom, &r->domains, list) { if (sep) seq_puts(s, ";"); + + ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] : + dom->mbps_val[closid]); seq_printf(s, r->format_str, dom->id, max_data_width, - dom->ctrl_val[closid]); + ctrl_val); sep = true; } seq_puts(s, "\n"); -- 1.9.1
[PATCH 1/6] x86/intel_rdt/mba_sc: Documentation for MBA software controller(mba_sc)
Add documentation about the feedback loop mechanism (MBA software controller) which lets the user specify the memory bandwidth allocation in MBps. This includes some changes to "schemata" formati with examples. Signed-off-by: Vikas Shivappa --- Documentation/x86/intel_rdt_ui.txt | 75 ++ 1 file changed, 67 insertions(+), 8 deletions(-) diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt index 71c3098..a16aa21 100644 --- a/Documentation/x86/intel_rdt_ui.txt +++ b/Documentation/x86/intel_rdt_ui.txt @@ -17,12 +17,14 @@ MBA (Memory Bandwidth Allocation) - "mba" To use the feature mount the file system: - # mount -t resctrl resctrl [-o cdp[,cdpl2]] /sys/fs/resctrl + # mount -t resctrl resctrl [-o cdp[,cdpl2][,mba_MBps]] /sys/fs/resctrl mount options are: "cdp": Enable code/data prioritization in L3 cache allocations. "cdpl2": Enable code/data prioritization in L2 cache allocations. +"mba_MBps": Enable the MBA Software Controller(mba_sc) to specify MBA + bandwidth in MBps L2 and L3 CDP are controlled seperately. @@ -270,10 +272,11 @@ and 0xA are not. On a system with a 20-bit mask each bit represents 5% of the capacity of the cache. You could partition the cache into four equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000. -Memory bandwidth(b/w) percentage - -For Memory b/w resource, user controls the resource by indicating the -percentage of total memory b/w. +Memory bandwidth Allocation and monitoring +-- + +For Memory bandwidth resource, by default the user controls the resource +by indicating the percentage of total memory bandwidth. The minimum bandwidth percentage value for each cpu model is predefined and can be looked up through "info/MB/min_bandwidth". The bandwidth @@ -285,7 +288,47 @@ to the next control step available on the hardware. The bandwidth throttling is a core specific mechanism on some of Intel SKUs. Using a high bandwidth and a low bandwidth setting on two threads sharing a core will result in both threads being throttled to use the -low bandwidth. +low bandwidth. The fact that Memory bandwidth allocation(MBA) is a core +specific mechanism where as memory bandwidth monitoring(MBM) is done at +the package level may lead to confusion when users try to apply control +via the MBA and then monitor the bandwidth to see if the controls are +effective. Below are such scenarios: + +1. User may *not* see increase in actual bandwidth when percentage + values are increased: + +This can occur when aggregate L2 external bandwidth is more than L3 +external bandwidth. Consider an SKL SKU with 24 cores on a package and +where L2 external is 10GBps (hence aggregate L2 external bandwidth is +240GBps) and L3 external bandwidth is 100GBps. Now a workload with '20 +threads, having 50% bandwidth, each consuming 5GBps' consumes the max L3 +bandwidth of 100GBps although the percentage value specified is only 50% +<< 100%. Hence increasing the bandwidth percentage will not yeild any +more bandwidth. This is because although the L2 external bandwidth still +has capacity, the L3 external bandwidth is fully used. Also note that +this would be dependent on number of cores the benchmark is run on. + +2. Same bandwidth percentage may mean different actual bandwidth + depending on # of threads: + +For the same SKU in #1, a 'single thread, with 10% bandwidth' and '4 +thread, with 10% bandwidth' can consume upto 10GBps and 40GBps although +they have same percentage bandwidth of 10%. This is simply because as +threads start using more cores in an rdtgroup, the actual bandwidth may +increase or vary although user specified bandwidth percentage is same. + +In order to mitigate this and make the interface more user friendly, +resctrl added support for specifying the bandwidth in MBps as well. The +kernel underneath would use a software feedback mechanism or a "Software +Controller(mba_sc)" which reads the actual bandwidth using MBM counters +and adjust the memowy bandwidth percentages to ensure + + "actual bandwidth < user specified bandwidth". + +By default, the schemata would take the bandwidth percentage values +where as user can switch to the "MBA software controller" mode using +a mount option 'mba_MBps'. The schemata format is specified in the below +sections. L3 schemata file details (code and data prioritization disabled) @@ -308,13 +351,20 @@ schemata format is always: L2:=;=;... -Memory b/w Allocation details -- +Memory bandwidth Allocation (default mode) +-- Memory b/w domain is L3 cache. MB:=bandwidth0;=bandwidth1;... +Memory bandwidth Allocation specified in MBps +- + +Memory bandwidth domain is L3 cache. + + MB:=bw_MB
[PATCH 6/6] x86/intel_rdt/mba_sc: Feedback loop to dynamically update mem bandwidth
mba_sc is a feedback loop where we periodically read MBM counters and try to restrict the bandwidth below a max value so the below is always true: "current bandwidth(cur_bw) < user specified bandwidth(user_bw)" The frequency of these checks is currently 1s and we just tag along the MBM overflow timer to do the updates. Doing it once in a second also makes the calculation of bandwidth easy. The steps of increase or decrease of bandwidth is the minimum granularity specified by the hardware. Although the MBA's goal is to restrict the bandwidth below a maximum, there may be a need to even increase the bandwidth. Since MBA controls the L2 external bandwidth where as MBM measures the L3 external bandwidth, we may end up restricting some rdtgroups unnecessarily. This may happen in the sequence where rdtgroup (set of jobs) had high "L3 <-> memory traffic" in initial phases -> mba_sc kicks in and reduced bandwidth percentage values -> but after some it has mostly "L2 <-> L3" traffic. In this scenario mba_sc increases the bandwidth percentage when there is lesser memory traffic. Signed-off-by: Vikas Shivappa --- arch/x86/kernel/cpu/intel_rdt.c | 3 +- arch/x86/kernel/cpu/intel_rdt.h | 2 + arch/x86/kernel/cpu/intel_rdt_monitor.c | 121 +++- 3 files changed, 123 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 85805d7..6dcd93b 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -33,7 +33,6 @@ #include #include "intel_rdt.h" -#define MAX_MBA_BW 100u #define MBA_IS_LINEAR 0x4 #define MBA_MAX_MBPS U32_MAX @@ -350,7 +349,7 @@ static int get_cache_id(int cpu, int level) * that can be written to QOS_MSRs. * There are currently no SKUs which support non linear delay values. */ -static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) +u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) { if (r->membw.delay_linear) return MAX_MBA_BW - bw; diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 66a0ba3..3975282 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -28,6 +28,7 @@ #define MBM_CNTR_WIDTH 24 #define MBM_OVERFLOW_INTERVAL 1000 +#define MAX_MBA_BW 100u #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) @@ -461,6 +462,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, void mbm_handle_overflow(struct work_struct *work); bool is_mba_sc(struct rdt_resource *r); void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm); +u32 delay_bw_map(unsigned long bw, struct rdt_resource *r); void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms); void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c index 32c9e55..5a26c1c 100644 --- a/arch/x86/kernel/cpu/intel_rdt_monitor.c +++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c @@ -330,6 +330,113 @@ void mon_event_count(void *info) } } +/* + * This implements feedback loop for MBA software controller(mba_sc) + * + * mba_sc is a feedback loop where we periodically read MBM counters + * and adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs + * so that: + * "current bandwdith(cur_bw) < user specified bandwidth(user_bw)". + * We can simply use the MBM counters to measure the bandwidth and + * use MBA throttle MSRs to control the bandwidth for a particular + * rdtgrp because we use the same resctrl rdtgroup for both monitoring + * and control now as suggested by Thomas Gleixner. + * + * The frequency of the checks is 1s and we just tag along the + * MBM overflow timer. Having 1s interval makes the calculation of + * bandwidth simpler. + * + * Although MBA's goal is to restrict the bandwidth to a maximum, + * there may be a need to increase the bandwidth to avoid + * uncecessarily restricting the L2 <-> L3 traffic. + * Since MBA controls the L2 external bandwidth where as + * MBM measures the L3 external bandwidth the following sequence + * could lead to such a situation. + * Consider an rdtgroup which had high L3 <-> memory traffic + * in initial phases -> mba_sc kicks in and reduced bandwidth + * percentage values -> but after some time rdtgroup has mostly + * L2 <-> L3 traffic. + * In this case we may restrict the rdtgroup's L2 <-> L3 traffic + * as its throttle MSRs already have low percentage values. + * To avoid unnecessarily restricting such rdtgroups,we also + * increase the bandwidth. + */ +static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) +{ + u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val; + struct mbm_state *pmbm_data, *cmbm_data; +
Re: [PATCH 1/2] scsi: st: Replace GFP_ATOMIC with GFP_KERNEL in st_probe
Jia-Ju, > st_probe() is never called in atomic context. Applied patches 1 and 2 to 4.18/scsi-queue. Thanks! -- Martin K. Petersen Oracle Linux Engineering
Re: [PATCH v4 09/22] iommu/vt-d: add svm/sva invalidate function
On Tue, 17 Apr 2018 13:10:45 -0600 Alex Williamson wrote: > On Mon, 16 Apr 2018 14:48:58 -0700 > Jacob Pan wrote: > > > When Shared Virtual Address (SVA) is enabled for a guest OS via > > vIOMMU, we need to provide invalidation support at IOMMU API and > > driver level. This patch adds Intel VT-d specific function to > > implement iommu passdown invalidate API for shared virtual address. > > > > The use case is for supporting caching structure invalidation > > of assigned SVM capable devices. Emulated IOMMU exposes queue > > invalidation capability and passes down all descriptors from the > > guest to the physical IOMMU. > > > > The assumption is that guest to host device ID mapping should be > > resolved prior to calling IOMMU driver. Based on the device handle, > > host IOMMU driver can replace certain fields before submit to the > > invalidation queue. > > > > Signed-off-by: Liu, Yi L > > Signed-off-by: Ashok Raj > > Signed-off-by: Jacob Pan > > --- > > drivers/iommu/intel-iommu.c | 170 > > 1 file changed, 170 > > insertions(+) > > > > diff --git a/drivers/iommu/intel-iommu.c > > b/drivers/iommu/intel-iommu.c index cae4042..c765448 100644 > > --- a/drivers/iommu/intel-iommu.c > > +++ b/drivers/iommu/intel-iommu.c > > @@ -4973,6 +4973,175 @@ static void > > intel_iommu_detach_device(struct iommu_domain *domain, > > dmar_remove_one_dev_info(to_dmar_domain(domain), dev); } > > > > +/* > > + * 3D array for converting IOMMU generic type-granularity to VT-d > > granularity > > + * X indexed by enum iommu_inv_type > > + * Y indicates request without and with PASID > > + * Z indexed by enum iommu_inv_granularity > > + * > > + * For an example, if we want to find the VT-d granularity > > encoding for IOTLB > > + * type, DMA request with PASID, and page selective. The look up > > indices are: > > + * [1][1][8], where > > + * 1: IOMMU_INV_TYPE_TLB > > + * 1: with PASID > > + * 8: IOMMU_INV_GRANU_PAGE_PASID > > + * > > + * Granu_map array indicates validity of the table. 1: valid, 0: > > invalid > > + * > > + */ > > +const static int > > inv_type_granu_map[IOMMU_INV_NR_TYPE][2][IOMMU_INV_NR_GRANU] = { > > + /* extended dev IOTLBs, for dev-IOTLB, only global is > > valid, > > + for dev-EXIOTLB, two valid granu */ > > + { > > + {1}, > > + {0, 0, 0, 0, 1, 1, 0, 0, 0} > > + }, > > + /* IOTLB and EIOTLB */ > > + { > > + {1, 1, 0, 1, 0, 0, 0, 0, 0}, > > + {0, 0, 0, 0, 1, 0, 1, 1, 1} > > + }, > > + /* PASID cache */ > > + { > > + {0}, > > + {0, 0, 0, 0, 1, 1, 0, 0, 0} > > + }, > > + /* context cache */ > > + { > > + {1, 1, 1} > > + } > > +}; > > + > > +const static u64 > > inv_type_granu_table[IOMMU_INV_NR_TYPE][2][IOMMU_INV_NR_GRANU] = { > > + /* extended dev IOTLBs, only global is valid */ > > + { > > + {QI_DEV_IOTLB_GRAN_ALL}, > > + {0, 0, 0, 0, QI_DEV_IOTLB_GRAN_ALL, > > QI_DEV_IOTLB_GRAN_PASID_SEL, 0, 0, 0} > > + }, > > + /* IOTLB and EIOTLB */ > > + { > > + {DMA_TLB_GLOBAL_FLUSH, DMA_TLB_DSI_FLUSH, 0, > > DMA_TLB_PSI_FLUSH}, > > + {0, 0, 0, 0, QI_GRAN_ALL_ALL, 0, QI_GRAN_NONG_ALL, > > QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID} > > + }, > > + /* PASID cache */ > > + { > > + {0}, > > + {0, 0, 0, 0, QI_PC_ALL_PASIDS, QI_PC_PASID_SEL} > > + }, > > + /* context cache */ > > + { > > + {DMA_CCMD_GLOBAL_INVL, DMA_CCMD_DOMAIN_INVL, > > DMA_CCMD_DEVICE_INVL} > > + } > > +}; > > + > > +static inline int to_vtd_granularity(int type, int granu, int > > with_pasid, u64 *vtd_granu) +{ > > + if (type >= IOMMU_INV_NR_TYPE || granu >= > > IOMMU_INV_NR_GRANU || with_pasid > 1) > > + return -EINVAL; > > + > > + if (inv_type_granu_map[type][with_pasid][granu] == 0) > > + return -EINVAL; > > + > > + *vtd_granu = inv_type_granu_table[type][with_pasid][granu]; > > + > > + return 0; > > +} > > + > > +static int intel_iommu_sva_invalidate(struct iommu_domain *domain, > > + struct device *dev, struct tlb_invalidate_info > > *inv_info) > > inv_info->hdr.version is never checked, why do we have these if > they're not used? > the version was added to leave room for future extension. you are right, it should be checked. > > +{ > > + struct intel_iommu *iommu; > > + struct dmar_domain *dmar_domain = to_dmar_domain(domain); > > + struct device_domain_info *info; > > + u16 did, sid; > > + u8 bus, devfn; > > + int ret = 0; > > + u64 granu; > > + unsigned long flags; > > + > > + if (!inv_info || !dmar_domain) > > + return -EINVAL; > > + > > + iommu = device_to_iommu(dev, &bus, &devfn); > > + if (!iommu) > > + return -ENODEV; > > + > > + if (!dev || !dev_is_pci(dev)) > > + return -ENODEV; > > + > > + did = dmar_domain->iommu_did[iommu->seq_id]; > > + sid = PCI_DEVID(bus, devfn); > > + ret = to_vtd_granularity
[PATCH 4/5] x86, pti: disallow global kernel text with RANDSTRUCT
I believe this was originally reported by the grsecurity team who tweeted about it (link below). RANDSTRUCT derives its hardening benefits from the attacker's lack of knowledge about the layout of kernel data structures. Keep the kernel image non-global in cases where RANDSTRUCT is in use to help keep the layout a secret. Signed-off-by: Dave Hansen Reported-by: Kees Cook Link: https://twitter.com/grsecurity/status/985678720630476800 Fixes: 8c06c7740 (x86/pti: Leave kernel text global for !PCID) Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Nadav Amit Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: linux...@kvack.org --- b/arch/x86/mm/pti.c | 10 ++ 1 file changed, 10 insertions(+) diff -puN arch/x86/mm/pti.c~pti-glb-disable-with-compile-options arch/x86/mm/pti.c --- a/arch/x86/mm/pti.c~pti-glb-disable-with-compile-options2018-04-20 14:10:02.702749165 -0700 +++ b/arch/x86/mm/pti.c 2018-04-20 14:10:02.706749165 -0700 @@ -421,6 +421,16 @@ static inline bool pti_kernel_image_glob if (boot_cpu_has(X86_FEATURE_K8)) return false; + /* +* RANDSTRUCT derives its hardening benefits from the +* attacker's lack of knowledge about the layout of kernel +* data structures. Keep the kernel image non-global in +* cases where RANDSTRUCT is in use to help keep the layout a +* secret. +*/ + if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT)) + return false; + return true; } _
[PATCH 0/5] x86, mm: PTI Global page fixes for 4.17
There have been a number of reports about issues with the patches that restore the Global PTE bit when using KPIT. This set resolves all of the issues that have been reported. These have been pushed out to a git tree where 0day should be chewing on them. Considering the troubles thus far, we should probably wait for 0day to spend _some_ time on these fixes before these get merged. Cc: Aaro Koskinen Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: David Woodhouse Cc: Fengguang Wu Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Mariusz Ceier Cc: Nadav Amit Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: linux...@kvack.org
[PATCH 2/5] x86, pti: fix boot warning from Global-bit setting
From: Dave Hansen The pageattr.c code attempts to process "faults" when it goes looking for PTEs to change and finds non-present entries. It allows these faults in the linear map which is "expected to have holes", but WARN()s about them elsewhere, like when called on the kernel image. However, we are now calling change_page_attr_clear() on the kernel image in the process of trying to clear the Global bit. This trips the warning in __cpa_process_fault() if a non-present PTE is encountered in the kernel image. The "holes" in the kernel image result from free_init_pages()'s use of set_memory_np(). These holes are totally fine, and result from normal operation, just as they would be in the kernel linear map. Just silence the warning when holes in the kernel image are encountered. Signed-off-by: Dave Hansen Fixes: 39114b7a7 (x86/pti: Never implicitly clear _PAGE_GLOBAL for kernel image) Reported-by: Mariusz Ceier Reported-by: Aaro Koskinen Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Nadav Amit Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux...@kvack.org --- b/arch/x86/mm/pageattr.c | 41 +++-- 1 file changed, 31 insertions(+), 10 deletions(-) diff -puN arch/x86/mm/pageattr.c~pti-glb-warning-inpageattr arch/x86/mm/pageattr.c --- a/arch/x86/mm/pageattr.c~pti-glb-warning-inpageattr 2018-04-20 14:10:01.619749168 -0700 +++ b/arch/x86/mm/pageattr.c2018-04-20 14:10:01.623749168 -0700 @@ -93,6 +93,18 @@ void arch_report_meminfo(struct seq_file static inline void split_page_count(int level) { } #endif +static inline int +within(unsigned long addr, unsigned long start, unsigned long end) +{ + return addr >= start && addr < end; +} + +static inline int +within_inclusive(unsigned long addr, unsigned long start, unsigned long end) +{ + return addr >= start && addr <= end; +} + #ifdef CONFIG_X86_64 static inline unsigned long highmap_start_pfn(void) @@ -106,20 +118,26 @@ static inline unsigned long highmap_end_ return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT; } -#endif - -static inline int -within(unsigned long addr, unsigned long start, unsigned long end) +static bool __cpa_pfn_in_highmap(unsigned long pfn) { - return addr >= start && addr < end; + /* +* Kernel text has an alias mapping at a high address, known +* here as "highmap". +*/ + return within_inclusive(pfn, highmap_start_pfn(), + highmap_end_pfn()); } -static inline int -within_inclusive(unsigned long addr, unsigned long start, unsigned long end) +#else + +static bool __cpa_pfn_in_highmap(unsigned long pfn) { - return addr >= start && addr <= end; + /* There is no highmap on 32-bit */ + return false; } +#endif + /* * Flushing functions */ @@ -1183,6 +1201,10 @@ static int __cpa_process_fault(struct cp cpa->numpages = 1; cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; return 0; + + } else if (__cpa_pfn_in_highmap(cpa->pfn)) { + /* Faults in the highmap are OK, so do not warn: */ + return -EFAULT; } else { WARN(1, KERN_WARNING "CPA: called for zero pte. " "vaddr = %lx cpa->vaddr = %lx\n", vaddr, @@ -1335,8 +1357,7 @@ static int cpa_process_alias(struct cpa_ * to touch the high mapped kernel as well: */ if (!within(vaddr, (unsigned long)_text, _brk_end) && - within_inclusive(cpa->pfn, highmap_start_pfn(), -highmap_end_pfn())) { + __cpa_pfn_in_highmap(cpa->pfn)) { unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; alias_cpa = *cpa; _
[PATCH 3/5] x86, pti: reduce amount of kernel text allowed to be Global
Kees reported to me that I made too much of the kernel image global. It was far more than just text: I think this is too much set global: _end is after data, bss, and brk, and all kinds of other stuff that could hold secrets. I think this should match what mark_rodata_ro() is doing. This does exactly that. We use __end_rodata_hpage_align as our marker both because it is huge-page-aligned and it does not contain any sections we expect to hold secrets. Kees's logic was that r/o data is in the kernel image anyway and, in the case of traditional distributions, can be freely downloaded from the web, so there's no reason to hide it. Signed-off-by: Dave Hansen Reported-by: Kees Cook Fixes: 8c06c7740 (x86/pti: Leave kernel text global for !PCID) Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Nadav Amit Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux...@kvack.org --- b/arch/x86/mm/pti.c | 16 +--- 1 file changed, 13 insertions(+), 3 deletions(-) diff -puN arch/x86/mm/pti.c~pti-glb-too-much-mapped arch/x86/mm/pti.c --- a/arch/x86/mm/pti.c~pti-glb-too-much-mapped 2018-04-20 14:10:02.164749166 -0700 +++ b/arch/x86/mm/pti.c 2018-04-20 14:10:02.168749166 -0700 @@ -430,12 +430,24 @@ static inline bool pti_kernel_image_glob */ void pti_clone_kernel_text(void) { + /* +* rodata is part of the kernel image and is normally +* readable on the filesystem or on the web. But, do not +* clone the areas past rodata, they might contain secrets. +*/ unsigned long start = PFN_ALIGN(_text); - unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); + unsigned long end = (unsigned long)__end_rodata_hpage_align; if (!pti_kernel_image_global_ok()) return; + pr_debug("mapping partial kernel image into user address space\n"); + + /* +* Note that this will undo _some_ of the work that +* pti_set_kernel_image_nonglobal() did to clear the +* global bit. +*/ pti_clone_pmds(start, end, _PAGE_RW); } @@ -458,8 +470,6 @@ void pti_set_kernel_image_nonglobal(void if (pti_kernel_image_global_ok()) return; - pr_debug("set kernel image non-global\n"); - set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT); } _
[PATCH 5/5] x86, pti: filter at vma->vm_page_prot population
From: Dave Hansen 0day reported warnings at boot on 32-bit systems without NX support: [ 12.349193] attempted to set unsupported pgprot: 8025 bits: 8000 supported: 7fff [ 12.350792] WARNING: CPU: 0 PID: 1 at arch/x86/include/asm/pgtable.h:540 handle_mm_fault+0xfc1/0xfe0: check_pgprot at arch/x86/include/asm/pgtable.h:535 (inlined by) pfn_pte at arch/x86/include/asm/pgtable.h:549 (inlined by) do_anonymous_page at mm/memory.c:3169 (inlined by) handle_pte_fault at mm/memory.c:3961 (inlined by) __handle_mm_fault at mm/memory.c:4087 (inlined by) handle_mm_fault at mm/memory.c:4124 The problem was that we stopped massaging page permissions at PTE creation time, so vma->vm_page_prot was passed unfiltered to PTE creation. To fix it, filter the page protections before they are installed in vma->vm_page_prot. Signed-off-by: Dave Hansen Reported-by: Fengguang Wu Fixes: fb43d6cb91 ("x86/mm: Do not auto-massage page protections") Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Nadav Amit Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux...@kvack.org Cc: Ingo Molnar --- b/arch/x86/Kconfig |4 b/arch/x86/include/asm/pgtable.h |5 + b/mm/mmap.c | 11 ++- 3 files changed, 19 insertions(+), 1 deletion(-) diff -puN arch/x86/include/asm/pgtable.h~pti-glb-protection_map arch/x86/include/asm/pgtable.h --- a/arch/x86/include/asm/pgtable.h~pti-glb-protection_map 2018-04-20 14:10:08.251749151 -0700 +++ b/arch/x86/include/asm/pgtable.h2018-04-20 14:10:08.260749151 -0700 @@ -601,6 +601,11 @@ static inline pgprot_t pgprot_modify(pgp #define canon_pgprot(p) __pgprot(massage_pgprot(p)) +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) +{ + return canon_pgprot(prot); +} + static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, enum page_cache_mode pcm, enum page_cache_mode new_pcm) diff -puN arch/x86/Kconfig~pti-glb-protection_map arch/x86/Kconfig --- a/arch/x86/Kconfig~pti-glb-protection_map 2018-04-20 14:10:08.253749151 -0700 +++ b/arch/x86/Kconfig 2018-04-20 14:10:08.260749151 -0700 @@ -52,6 +52,7 @@ config X86 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER + select ARCH_HAS_FILTER_PGPROT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOVif X86_64 @@ -273,6 +274,9 @@ config ARCH_HAS_CPU_RELAX config ARCH_HAS_CACHE_LINE_SIZE def_bool y +config ARCH_HAS_FILTER_PGPROT + def_bool y + config HAVE_SETUP_PER_CPU_AREA def_bool y diff -puN mm/mmap.c~pti-glb-protection_map mm/mmap.c --- a/mm/mmap.c~pti-glb-protection_map 2018-04-20 14:10:08.256749151 -0700 +++ b/mm/mmap.c 2018-04-20 14:10:08.261749151 -0700 @@ -100,11 +100,20 @@ pgprot_t protection_map[16] __ro_after_i __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 }; +#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) +{ + return prot; +} +#endif + pgprot_t vm_get_page_prot(unsigned long vm_flags) { - return __pgprot(pgprot_val(protection_map[vm_flags & + pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | pgprot_val(arch_vm_get_page_prot(vm_flags))); + + return arch_filter_pgprot(ret); } EXPORT_SYMBOL(vm_get_page_prot); _
[PATCH 1/5] x86, pti: fix boot problems from Global-bit setting
From: Dave Hansen Part of the global bit _setting_ patches also includes clearing the Global bit when we do not want it. That is done with set_memory_nonglobal(), which uses change_page_attr_clear() in pageattr.c under the covers. The TLB flushing code inside pageattr.c has has checks like BUG_ON(irqs_disabled()), looking for interrupt disabling that might cause deadlocks. But, these also trip in early boot on certain preempt configurations. Just copy the existing BUG_ON() sequence from cpa_flush_range() to the other two sites and check for early boot. Signed-off-by: Dave Hansen Fixes: 39114b7a7 (x86/pti: Never implicitly clear _PAGE_GLOBAL for kernel image) Reported-by: Mariusz Ceier Reported-by: Aaro Koskinen Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Nadav Amit Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux...@kvack.org --- b/arch/x86/mm/pageattr.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff -puN arch/x86/mm/pageattr.c~pti-glb-boot-problem-fix arch/x86/mm/pageattr.c --- a/arch/x86/mm/pageattr.c~pti-glb-boot-problem-fix 2018-04-20 14:10:01.086749169 -0700 +++ b/arch/x86/mm/pageattr.c2018-04-20 14:10:01.090749169 -0700 @@ -172,7 +172,7 @@ static void __cpa_flush_all(void *arg) static void cpa_flush_all(unsigned long cache) { - BUG_ON(irqs_disabled()); + BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); on_each_cpu(__cpa_flush_all, (void *) cache, 1); } @@ -236,7 +236,7 @@ static void cpa_flush_array(unsigned lon unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ #endif - BUG_ON(irqs_disabled()); + BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); _
Re: [RFC PATCH 00/79] Generic page write protection and a solution to page waitqueue
On Fri, Apr 20, 2018 at 12:57:41PM -0700, Tim Chen wrote: > On 04/04/2018 12:17 PM, jgli...@redhat.com wrote: > > From: Jérôme Glisse > > > > https://cgit.freedesktop.org/~glisse/linux/log/?h=generic-write-protection-rfc > > > > This is an RFC for LSF/MM discussions. It impacts the file subsystem, > > the block subsystem and the mm subsystem. Hence it would benefit from > > a cross sub-system discussion. > > > > Patchset is not fully bake so take it with a graint of salt. I use it > > to illustrate the fact that it is doable and now that i did it once i > > believe i have a better and cleaner plan in my head on how to do this. > > I intend to share and discuss it at LSF/MM (i still need to write it > > down). That plan lead to quite different individual steps than this > > patchset takes and his also easier to split up in more manageable > > pieces. > > > > I also want to apologize for the size and number of patches (and i am > > not even sending them all). > > > > -- > > The Why ? > > > > I have two objectives: duplicate memory read only accross nodes and or > > devices and work around PCIE atomic limitations. More on each of those > > objective below. I also want to put forward that it can solve the page > > wait list issue ie having each page with its own wait list and thus > > avoiding long wait list traversale latency recently reported [1]. > > > > It does allow KSM for file back pages (truely generic KSM even between > > both anonymous and file back page). I am not sure how useful this can > > be, this was not an objective i did pursue, this is just a for free > > feature (see below). > > > > [1] https://groups.google.com/forum/#!topic/linux.kernel/Iit1P5BNyX8 > > > > -- > > Per page wait list, so long page_waitqueue() ! > > > > Not implemented in this RFC but below is the logic and pseudo code > > at bottom of this email. > > > > When there is a contention on struct page lock bit, the caller which > > is trying to lock the page will add itself to a waitqueue. The issues > > here is that multiple pages share the same wait queue and on large > > system with a lot of ram this means we can quickly get to a long list > > of waiters for differents pages (or for the same page) on the same > > list [1]. > > Your approach seems useful if there are lots of locked pages sharing > the same wait queue. > > That said, in the original workload from our customer with the long wait queue > problem, there was a single super hot page getting migrated, and it > is being accessed by all threads which caused the big log jam while they wait > for > the migration to get completed. > With your approach, we will still likely end up with a long queue > in that workload even if we have per page wait queue. > > Thanks. Ok so i re-read the thread, i was writting this cover letter from memory and i had bad recollection of your issue, so sorry. First, do you have a way to reproduce the issue ? Something easy would be nice :) So what i am proposing for per page wait queue would only marginaly help you (it might not even be mesurable in your workload). It would certainly make the code smaller and easier to understand i believe. Now that i have look back at your issue i think there is 2 things we should do. First keep migration page map read only, this would at least avoid CPU read fault. In trace you captured i wasn't able to ascertain if this were read or write fault. Second idea i have is about NUMA, everytime we NUMA migrate a page we could attach a temporary struct to the page (using page->mapping). So if we scan that page again we can inspect information about previous migration and see if we are not over migrating that page (ie bouncing it all over). If so we can mark the page (maybe with a page flag if we can find one) to protect it from further migration. That temporary struct would be remove after a while, ie autonuma would preallocate a bunch of those and keep an LRU of them and recycle the oldest when it needs a new one to migrate another page. LSF/MM slots: Michal can i get 2 slots to talk about this ? MM only discussion, one to talk about doing migration with page map read only but write protected while migration is happening. The other one to talk about attaching auto NUMA tracking struct to page. Cheers, Jérôme
Re: [Intel-gfx] [PATCH] gpu: drm: i915: Change return type to vm_fault_t
On Wed, Apr 18, 2018 at 08:46:44AM +0300, Jani Nikula wrote: > On Tue, 17 Apr 2018, Souptick Joarder wrote: > > On 17-Apr-2018 9:45 PM, "Matthew Wilcox" wrote: > >> > >> On Tue, Apr 17, 2018 at 09:14:32PM +0530, Souptick Joarder wrote: > >> > Not exactly. The plan for these patches is to introduce new vm_fault_t > > type > >> > in vm_operations_struct fault handlers. It's now available in 4.17-rc1. > > We will > >> > push all the required drivers/filesystem changes through different > > maintainers > >> > to linus tree. Once everything is converted into vm_fault_t type then > > Changing > >> > it from a signed to an unsigned int causes GCC to warn about an > > assignment > >> > from an incompatible type -- int foo(void) is incompatible with > >> > unsigned int foo(void). > >> > > >> > Please refer 1c8f422059ae ("mm: change return type to vm_fault_t") in > > 4.17-rc1. > >> > >> I think this patch would be clearer if you did > >> > >> - int ret; > >> + int err; > >> + vm_fault_t ret; > >> > >> Then it would be clearer to the maintainer that you're splitting apart the > >> VM_FAULT and errno codes. > >> > >> Sorry for not catching this during initial review. > > > > Ok, I will make required changes and send v2. Sorry, even I missed this :) > > I'm afraid Daniel is closer to the truth. +1. > My bad, sorry for the noise. I opened this thread to add exactly question/noise ;). So my recommendation for some next time is to make the intention clear on the commit message itself from the begin. > > BR, > Jani. > > > > >> > >> > On Tue, Apr 17, 2018 at 8:59 PM, Jani Nikula > >> > wrote: > >> > > On Tue, 17 Apr 2018, Souptick Joarder wrote: > >> > >> Use new return type vm_fault_t for fault handler. For > >> > >> now, this is just documenting that the function returns > >> > >> a VM_FAULT value rather than an errno. Once all instances > >> > >> are converted, vm_fault_t will become a distinct type. > >> > >> > >> > >> Reference id -> 1c8f422059ae ("mm: change return type to > >> > >> vm_fault_t") > >> > >> > >> > >> Signed-off-by: Souptick Joarder > >> > >> --- > >> > >> drivers/gpu/drm/i915/i915_drv.h | 3 ++- > >> > >> drivers/gpu/drm/i915/i915_gem.c | 15 --- > >> > >> 2 files changed, 10 insertions(+), 8 deletions(-) > >> > >> > >> > >> diff --git a/drivers/gpu/drm/i915/i915_drv.h > > b/drivers/gpu/drm/i915/i915_drv.h > >> > >> index a42deeb..95b0d50 100644 > >> > >> --- a/drivers/gpu/drm/i915/i915_drv.h > >> > >> +++ b/drivers/gpu/drm/i915/i915_drv.h > >> > >> @@ -51,6 +51,7 @@ > >> > >> #include > >> > >> #include > >> > >> #include > >> > >> +#include > >> > >> > >> > >> #include "i915_params.h" > >> > >> #include "i915_reg.h" > >> > >> @@ -3363,7 +3364,7 @@ int i915_gem_wait_for_idle(struct > > drm_i915_private *dev_priv, > >> > >> unsigned int flags); > >> > >> int __must_check i915_gem_suspend(struct drm_i915_private > > *dev_priv); > >> > >> void i915_gem_resume(struct drm_i915_private *dev_priv); > >> > >> -int i915_gem_fault(struct vm_fault *vmf); > >> > >> +vm_fault_t i915_gem_fault(struct vm_fault *vmf); > >> > >> int i915_gem_object_wait(struct drm_i915_gem_object *obj, > >> > >>unsigned int flags, > >> > >>long timeout, > >> > >> diff --git a/drivers/gpu/drm/i915/i915_gem.c > > b/drivers/gpu/drm/i915/i915_gem.c > >> > >> index dd89abd..bdac690 100644 > >> > >> --- a/drivers/gpu/drm/i915/i915_gem.c > >> > >> +++ b/drivers/gpu/drm/i915/i915_gem.c > >> > >> @@ -1882,7 +1882,7 @@ int i915_gem_mmap_gtt_version(void) > >> > >> * The current feature set supported by i915_gem_fault() and thus > > GTT mmaps > >> > >> * is exposed via I915_PARAM_MMAP_GTT_VERSION (see > > i915_gem_mmap_gtt_version). > >> > >> */ > >> > >> -int i915_gem_fault(struct vm_fault *vmf) > >> > >> +vm_fault_t i915_gem_fault(struct vm_fault *vmf) > >> > >> { > >> > >> #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */ > >> > >> struct vm_area_struct *area = vmf->vma; > >> > >> @@ -1895,6 +1895,7 @@ int i915_gem_fault(struct vm_fault *vmf) > >> > >> pgoff_t page_offset; > >> > >> unsigned int flags; > >> > >> int ret; > >> > >> + vm_fault_t retval; > >> > > > >> > > What's the point of changing the name? An unnecessary change. > >> > > > >> > > BR, > >> > > Jani. > >> > > > >> > >> > >> > >> /* We don't use vmf->pgoff since that has the fake offset */ > >> > >> page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; > >> > >> @@ -2000,7 +2001,7 @@ int i915_gem_fault(struct vm_fault *vmf) > >> > >>* and so needs to be reported. > >> > >>*/ > >> > >> if (!i915_terminally_wedged(&dev_priv->gpu_error)) { > >> > >> - ret = VM_FAULT_SIGBUS; > >> > >> + retval = VM_FAULT_SIGBUS; > >> > >> break; > >> > >> } > >> > >> case -EAGAI
Re: [PATCH v2] mtd: spi-nor: clear Winbond Extended Address Reg on switch to 3-byte addressing.
On Sat, 21 Apr 2018 07:28:19 +1000 NeilBrown wrote: > On Fri, Apr 20 2018, Boris Brezillon wrote: > > > Hi Neil, > > > > On Mon, 16 Apr 2018 09:42:30 +1000 > > NeilBrown wrote: > > > >> Winbond spi-nor flash 32MB and larger have an 'Extended Address > >> Register' as one option for addressing beyond 16MB (Macronix > >> has the same concept, Spansion has EXTADD bits in the Bank Address > >> Register). > >> > >> According to section > >>8.2.7 Write Extended Address Register (C5h) > >> > >> of the Winbond W25Q256FV data sheet (256M-BIT SPI flash) > >> > >>The Extended Address Register is only effective when the device is > >>in the 3-Byte Address Mode. When the device operates in the 4-Byte > >>Address Mode (ADS=1), any command with address input of A31-A24 > >>will replace the Extended Address Register values. It is > >>recommended to check and update the Extended Address Register if > >>necessary when the device is switched from 4-Byte to 3-Byte Address > >>Mode. > >> > >> So the documentation suggests clearing the EAR after switching to > >> 3-byte mode. Experimentation shows that the EAR is *always* one after > >> the switch to 3-byte mode, so clearing the EAR is mandatory at > >> shutdown for a subsequent 3-byte-addressed reboot to work. > >> > >> Note that some SOCs (e.g. MT7621) do not assert a reset line at normal > >> reboot, so we cannot rely on hardware reset. The MT7621 does assert a > >> reset line at watchdog-reset. > >> > >> Signed-off-by: NeilBrown > > > > We should probably backport the fix. Can you add a Fixes and Cc-stable > > tag? > > It's a bit weird having Fixes when this isn't a regression, but I guess > it doesn't hurt. Well, I thought you wanted this patch to be backported to stable releases, hence my suggestion. Of course it's not a regression, since the bug is here from the beginning, but nonetheless it's fixing a bug. > I chose > Fixes: 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when > exiting") > as this patch it useless without that one. Not sure that's how Fixes should be used. IIUC, it should point to the first commit where the bug was introduced, so I guess it's 0aa87b7563f1 ("mtd: m25p80: add support for the windbond w25q256 chip") in this case. Now, if you want to restrict the kernel releases this patch should be backported to, you can use the '# vX.Y+' suffix in your Cc-stable tag. > > I also fixed the comment and have resent. Thanks.
Re: [pci PATCH v8 2/4] ena: Migrate over to unmanaged SR-IOV support
On 4/20/2018 9:30 AM, Alexander Duyck wrote: Instead of implementing our own version of a SR-IOV configuration stub in the ena driver we can just reuse the existing pci_sriov_configure_simple function. Signed-off-by: Alexander Duyck --- v5: Replaced call to pci_sriov_configure_unmanaged with pci_sriov_configure_simple v6: Dropped "#ifdef" checks for IOV wrapping sriov_configure definition v7: No change drivers/net/ethernet/amazon/ena/ena_netdev.c | 28 +- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a822e70..f2af87d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3386,32 +3386,6 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /*/ -static int ena_sriov_configure(struct pci_dev *dev, int numvfs) -{ - int rc; - - if (numvfs > 0) { - rc = pci_enable_sriov(dev, numvfs); - if (rc != 0) { - dev_err(&dev->dev, - "pci_enable_sriov failed to enable: %d vfs with the error: %d\n", - numvfs, rc); - return rc; - } - - return numvfs; - } - - if (numvfs == 0) { - pci_disable_sriov(dev); - return 0; - } - - return -EINVAL; -} - -/*/ -/*/ /* ena_remove - Device Removal Routine * @pdev: PCI device information struct @@ -3526,7 +3500,7 @@ static int ena_resume(struct pci_dev *pdev) .suspend= ena_suspend, .resume = ena_resume, #endif - .sriov_configure = ena_sriov_configure, + .sriov_configure = pci_sriov_configure_simple, }; static int __init ena_init(void) Reviewed-by: Greg Rose
Re: [PATCH v2 2/2] regulator: add QCOM RPMh regulator driver
On 04/19/2018 09:16 AM, Doug Anderson wrote: > On Wed, Apr 18, 2018 at 4:30 PM, David Collins > wrote: + * @drms_mode: Array of regulator framework modes which can + * be configured dynamically for this regulator + * via the set_load() callback. >>> >>> Using the singular for something that is an array is confusing. Why >>> not "drms_modes" or "drms_mode_arr"? In the past review you said >>> 'Perhaps something along the lines of "drms_modes"'. >> >> It seems awkward to me to use a plural for arrays as it leads to indexing >> like this: vreg->drms_modes[i]. "mode i" seems better than "modes i". >> However, I'm willing to change this to be drms_modes and drms_mode_max_uAs >> if that style is preferred. > > I'd very much like a plural here. Ok. I'll change this to be plural. + prop = "qcom,regulator-initial-voltage"; + ret = of_property_read_u32(node, prop, &uV); + if (!ret) { + range = &vreg->hw_data->voltage_range; + selector = DIV_ROUND_UP(uV - range->min_uV, + range->uV_step) + range->min_sel; + if (uV < range->min_uV || selector > range->max_sel) { + dev_err(dev, "%s: %s=%u is invalid\n", + node->name, prop, uV); + return -EINVAL; + } + + vreg->voltage_selector = selector; + + cmd[cmd_count].addr + = vreg->addr + RPMH_REGULATOR_REG_VRM_VOLTAGE; + cmd[cmd_count++].data + = DIV_ROUND_UP(selector * range->uV_step + + range->min_uV, 1000); + } >>> >>> Seems like you want an "else { vreg->voltage_selector = -EINVAL; }". >>> Otherwise "get_voltage_sel" will return selector 0 before the first >>> set, right? >>> >>> Previously Mark said: "If the driver can't read values it should >>> return an appropriate error code." >>> ...and previously you said: "I'll try this out and see if the >>> regulator framework complains during regulator registration." >> >> I tested out what happens when vreg->voltage_selector = -EINVAL is set >> when qcom,regulator-initial-voltage is not present. This results in >> devm_regulator_register() failing and subsequently causing the >> qcom_rpmh-regulator probe to fail. The error happens in >> machine_constraints_voltage() [1]. >> >> This leaves two courses of action: >> 1. (current patch set) allow voltage_selector to stay 0 if uninitialized >> 2. Set voltage_selector = -EINVAL by default and specify in DT binding >> documentation that qcom,regulator-initial-voltage is required for VRM >> managed RPMh regulator resources which have regulator-min-microvolt and >> regulator-max-microvolt specified. >> >> Are you ok with the DT implications of option #2? > > You'd need to ask Mark if he's OK with it, but a option #3 is to add a > patch to your series fix the regulator framework to try setting the > voltage if _regulator_get_voltage() fails. Presumably in > machine_constraints_voltage() you'd now do something like: > > int target_min, target_max; > int current_uV = _regulator_get_voltage(rdev); > if (current_uV < 0) { > /* Maybe this regulator's hardware can't be read and needs to be initted > */ > _regulator_do_set_voltage( > rdev, rdev->constraints->min_uV, rdev->constraints->min_uV); > current_uV = _regulator_get_voltage(rdev); > } > if (current_uV < 0) { > rdev_err(rdev, > "failed to get the current voltage(%d)\n", > current_uV); > return current_uV; > } > > If Mark doesn't like that then I guess I'd be OK w/ initting it to 0 > but this needs to be documented _somewhere_ (unlike for bypass it's > not obvious, so you need to find someplace to put it). I'd rather not > hack the DT to deal with our software limitations. I'm not opposed to your option #3 though it does seem a little hacky and tailored to the qcom_rpmh-regulator specific case. Note that I think it would be better to vote for min_uV to max_uV than min_uV to min_uV though. Mark, what are your thoughts on the best way to handle this situation? +static unsigned int rpmh_regulator_pmic4_bob_of_map_mode(unsigned int mode) +{ + static const unsigned int of_mode_map[RPMH_REGULATOR_MODE_COUNT] = { + [RPMH_REGULATOR_MODE_RET] = -EINVAL, + [RPMH_REGULATOR_MODE_LPM] = REGULATOR_MODE_IDLE, + [RPMH_REGULATOR_MODE_AUTO] = REGULATOR_MODE_NORMAL, + [RPMH_REGULATOR_MODE_HPM] = REGULATOR_MODE_FAST, +
Re: [pci PATCH v8 4/4] pci-pf-stub: Add PF driver stub for PFs that function only to enable VFs
On 4/20/2018 9:31 AM, Alexander Duyck wrote: Add a new driver called "pci-pf-stub" to act as a "white-list" for PF devices that provide no other functionality other then acting as a means of allocating a set of VFs. For now I only have one example ID provided by Amazon in terms of devices that require this functionality. The general idea is that in the future we will see other devices added as vendors come up with devices where the PF is more or less just a lightweight shim used to allocate VFs. Signed-off-by: Alexander Duyck --- v6: New driver to address concerns about Amazon devices left unsupported v7: Dropped pci_id table explanation from pci-pf-stub driver drivers/pci/Kconfig | 12 ++ drivers/pci/Makefile |2 ++ drivers/pci/pci-pf-stub.c | 54 + include/linux/pci_ids.h |2 ++ 4 files changed, 70 insertions(+) create mode 100644 drivers/pci/pci-pf-stub.c diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 34b56a8..cdef2a2 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -71,6 +71,18 @@ config PCI_STUB When in doubt, say N. +config PCI_PF_STUB + tristate "PCI PF Stub driver" + depends on PCI + depends on PCI_IOV + help + Say Y or M here if you want to enable support for devices that + require SR-IOV support, while at the same time the PF itself is + not providing any actual services on the host itself such as + storage or networking. + + When in doubt, say N. + config XEN_PCIDEV_FRONTEND tristate "Xen PCI Frontend" depends on PCI && X86 && XEN diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 9419709..4e133d3 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -43,6 +43,8 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o obj-$(CONFIG_PCI_STUB) += pci-stub.o +obj-$(CONFIG_PCI_PF_STUB) += pci-pf-stub.o + obj-$(CONFIG_PCI_ECAM) += ecam.o obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o diff --git a/drivers/pci/pci-pf-stub.c b/drivers/pci/pci-pf-stub.c new file mode 100644 index 000..9d5fdf2 --- /dev/null +++ b/drivers/pci/pci-pf-stub.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* pci-pf-stub - simple stub driver for PCI SR-IOV PF device + * + * This driver is meant to act as a "white-list" for devices that provde + * SR-IOV functionality while at the same time not actually needing a + * driver of their own. + */ + +#include +#include + +/** + * pci_pf_stub_white_list - White list of devices to bind pci-pf-stub onto + * + * This table provides the list of IDs this driver is supposed to bind + * onto. You could think of this as a list of "quirked" devices where we + * are adding support for SR-IOV here since there are no other drivers + * that they would be running under. + */ +static const struct pci_device_id pci_pf_stub_white_list[] = { + { PCI_VDEVICE(AMAZON, 0x0053) }, + /* required last entry */ + { 0 } +}; +MODULE_DEVICE_TABLE(pci, pci_pf_stub_white_list); + +static int pci_pf_stub_probe(struct pci_dev *dev, +const struct pci_device_id *id) +{ + pci_info(dev, "claimed by pci-pf-stub\n"); + return 0; +} + +static struct pci_driver pf_stub_driver = { + .name = "pci-pf-stub", + .id_table = pci_pf_stub_white_list, + .probe = pci_pf_stub_probe, + .sriov_configure= pci_sriov_configure_simple, +}; + +static int __init pci_pf_stub_init(void) +{ + return pci_register_driver(&pf_stub_driver); +} + +static void __exit pci_pf_stub_exit(void) +{ + pci_unregister_driver(&pf_stub_driver); +} + +module_init(pci_pf_stub_init); +module_exit(pci_pf_stub_exit); + +MODULE_LICENSE("GPL"); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a637a7d..62dab14 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2549,6 +2549,8 @@ #define PCI_VENDOR_ID_CIRCUITCO 0x1cc8 #define PCI_SUBSYSTEM_ID_CIRCUITCO_MINNOWBOARD0x0001 +#define PCI_VENDOR_ID_AMAZON 0x1d0f + #define PCI_VENDOR_ID_TEKRAM 0x1de1 #define PCI_DEVICE_ID_TEKRAM_DC2900xdc29 LGTM Reviewed-by: Greg Rose
Re: [pci PATCH v8 1/4] pci: Add pci_sriov_configure_simple for PFs that don't manage VF resources
On 4/20/2018 9:28 AM, Alexander Duyck wrote: This patch adds a common configuration function called pci_sriov_configure_simple that will allow for managing VFs on devices where the PF is not capable of managing VF resources. Signed-off-by: Alexander Duyck Tested-by: Mark Rustad --- v5: New patch replacing pci_sriov_configure_unmanaged with pci_sriov_configure_simple Dropped bits related to autoprobe changes v6: Defined pci_sriov_configure_simple as NULL if IOV is disabled v7: Updated pci_sriov_configure_simple to drop need for err value Fixed comment explaining why pci_sriov_configure_simple is NULL drivers/pci/iov.c | 31 +++ include/linux/pci.h |3 +++ 2 files changed, 34 insertions(+) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 677924a..3e0a7fd 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -807,3 +807,34 @@ int pci_sriov_get_totalvfs(struct pci_dev *dev) return dev->sriov->total_VFs; } EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs); + +/** + * pci_sriov_configure_simple - helper to configure unmanaged SR-IOV + * @dev: the PCI device + * @nr_virtfn: number of virtual functions to enable, 0 to disable + * + * Used to provide generic enable/disable SR-IOV option for devices + * that do not manage the VFs generated by their driver. Return value + * is negative on error, or number of VFs allocated on success. + */ +int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn) +{ + might_sleep(); + + if (!dev->is_physfn) + return -ENODEV; + + if (pci_vfs_assigned(dev)) { + pci_warn(dev, +"Cannot modify SR-IOV while VFs are assigned\n"); + return -EPERM; + } + + if (!nr_virtfn) { + sriov_disable(dev); + return 0; + } + + return sriov_enable(dev, nr_virtfn) ? : nr_virtfn; +} +EXPORT_SYMBOL_GPL(pci_sriov_configure_simple); diff --git a/include/linux/pci.h b/include/linux/pci.h index ae42289..7d36e39 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1955,6 +1955,7 @@ static inline void pci_mmcfg_late_init(void) { } int pci_vfs_assigned(struct pci_dev *dev); int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); +int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn); resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe); #else @@ -1982,6 +1983,8 @@ static inline int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs) { return 0; } static inline int pci_sriov_get_totalvfs(struct pci_dev *dev) { return 0; } +/* this is expected to be used as a function pointer, just define as NULL */ +#define pci_sriov_configure_simple NULL static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) { return 0; } static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } Thanks Alex! Reviewed-by: Greg Rose
Re: [RFC PATCH 3/4] acpi: apei: Do not panic() in NMI because of GHES messages
On 04/20/2018 02:27 AM, James Morse wrote: > Hi Alex, > > On 04/16/2018 10:59 PM, Alex G. wrote: >> On 04/13/2018 11:38 AM, James Morse wrote: >>> This assumes a cache-invalidate will clear the error, which I don't > think we're >>> guaranteed on arm. >>> It also destroys any adjacent data, "everyone's happy" includes the > thread that >>> got a chunk of someone-else's stack frame, I don't think it will be > happy for >>> very long! >> >> Hmm, no cache-line (or page) invalidation on arm64? How does >> dma_map/unmap_*() work then? You may not guarantee to fix the error, but > > There are cache-invalidate instructions, but I don't think 'solving' a > RAS error with them is the right thing to do. You seem to be putting RAS on a pedestal in a very cloudy and foggy day. I admit that I fail to see the specialness of RAS in comparison to other errors. >> I don't buy into the "let's crash without trying" argument. > > Our 'cache writeback granule' may be as large as 2K, so we may have to > invalidate up to 2K of data to convince the hardware this address is > okay again. Eureka! OS can invalidate the entire page. 1:1 mapping with the memory management data. > All we've done here is differently-corrupt the data so that it no longer > generates a RAS fault, it just gives you the wrong data instead. > Cache-invalidation is destructive. > > I don't think there is a one-size-fits-all solution here. Of course there isn't. That's not the issue. A cache corruption is a special case of a memory access issue, and that, we already know how to handle. Triple-fault and cpu-on-fire concerns apply wrt returning to the context which triggered the problem. We've already figured that out. There is a lot of opportunity here for using well tested code paths and not crashing on first go. Why let firmware make this a problem again? >>> (this is a side issue for AER though) >> >> Somebody muddled up AER with these tables, so we now have to worry about >> it. :) > > Eh? I see there is a v2, maybe I'll understand this comment once I read it. I meant that somebody (the spec writers) decided to put ominous errors (PCIe) on the same severity scale with "cpu is on fire" errors. How does FFS handle race conditions that can occur when accessing HW concurrently with the OS? I'm told it's the main reasons why BIOS doesn't release unused cores from SMM early. >>> >>> This is firmware's problem, it depends on whether there is any > hardware that is >>> shared with the OS. Some hardware can be marked 'secure' in which > case only >>> firmware can access it, alternatively firmware can trap or just > disable the OS's >>> access to the shared hardware. >> >> It's everyone's problem. It's the firmware's responsibility. > > It depends on the SoC design. If there is no hardware that the OS and > firmware both need to access to handle an error then I don't think > firmware needs to do this. > > >>> For example, with the v8.2 RAS Extensions, there are some per-cpu error >>> registers. Firmware can disable these for the OS, so that it always > reads 0 from >>> them. Instead firmware takes the error via FF, reads the registers from >>> firmware, and dumps CPER records into the OS's memory. >>> >>> If there is a shared hardware resource that both the OS and firmware > may be >>> accessing, yes firmware needs to pull the other CPUs in, but this > depends on the >>> SoC design, it doesn't necessarily happen. >> >> The problem with shared resources is just a problem. I've seen systems >> where all 100 cores are held up for 300+ ms. In latency-critical >> applications reliability drops exponentially. Am I correct in assuming >> your answer would be to "hide" more stuff from the OS? > > No, I'm not a fan of firmware cycle stealing. If you can design the SoC or > firmware so that the 'all CPUs' stuff doesn't need to happen, then you > won't get > these issues. (I don't design these things, I'm sure they're much more > complicated > than I think!) > > Because the firmware is SoC-specific, so it only needs to do exactly > what is necessary. Irrespective of the hardware design, there's devicetree, ACPI methods, and a few other ways to inform the OS of non-standard bits. They don't have the resource sharing problem. I'm confused as to why FFS is used when there are concerns about resource conflicts instead of race-free alternatives. I think the idea of firmware-first is broken. But it's there, it's shipping in FW, so we have to accommodate it in SW. >>> >>> Part of our different-views here is firmware-first is taking > something away from >>> you, whereas for me its giving me information that would otherwise be in >>> secret-soc-specific registers. >> >> Under this interpretation, FFS is a band-aid to the problem of "secret" >> registers. "Secret" hardware doesn't really fit well into the idea of an >> OS [1]. > > Sorry, I'm being sloppy with my terminology, by secret-soc-specific I > mean either Linux can't access them
Re: [PATCH v7 00/11] Sunxi: Add SMP support on A83T
Hello, On Fri, 20 Apr 2018 23:10:11 +0200 Mylène Josserand wrote: > Hello everyone, > > This is a V7 of my series that adds SMP support for Allwinner sun8i-a83t. > Based on sunxi's tree, sunxi/for-next branch. > Depends on a patch from Doug Berger that allows to include the "cpu-type" > header on assembly files: > https://lkml.org/lkml/2018/2/23/1263 > (applied on Broadcom's tree: > https://github.com/Broadcom/stblinux/commits/soc/next) And I forgot to say that I will have a look at the CPU0 hotplug but not on this series. Best regards, Mylène > > If you have any remarks/questions, let me know. > Thank you in advance, > Mylène > > Changes from v6: > - Correct the commit log on patch 07 according to Sergei Shtylyov's > review. > - Rename the field "is_sun8i" into "is_a83t". > - Add all Tested-by and Reviewed-by from previous version. > > Changes from v5: > - Remove my patch 01 and use the patch of Doug Berger to be able to > include the cpu-type header on assembly files. > - Rename smp_init_cntvoff function into secure_cntvoff_init according > to Marc Zyngier's review. > - According to Chen-Yu and Maxime's reviews, remove the patch that was > moving structures. Instead of using an index to retrieve which > architecture we are having, use a global variable. > - Merge the 2 patches that move assembly code from C to assembly file. > - Use a sun8i field instead of sun9i to know on which architecture we > are using because many modifications/additions of the code are for > sun8i-a83t. > - Rework the patch "add is_sun8i field" to add only this field in this > patch. The part of the patch that was starting to handle the differences > between sun8i-a83t and sun9i-a80 is merged in the patch that adds the > support of sun8i-a83t. > - Add a new patch that refactor the shmobile code to use the new function > secure_cntvoff_init introduced in this series. > > Changes from v4: > - Rebased my series according to new Chen-Yu series: >"ARM: sunxi: Clean and improvements for multi-cluster SMP" >https://lkml.org/lkml/2018/3/8/886 > - Updated my series according to Marc Zyngier's reviews to add CNTVOFF > initialization's function into ARM's common part. Thanks to that, other > platforms such as Renesa can use this function. > - For boot CPU, create a new machine to handle the CNTVOFF initialization > using "init_early" callback. > > Changes from v3: > - Take into account Maxime's reviews: > - split the first patch into 4 new patches: add sun9i device tree > parsing, rename some variables, add a83t support and finally, > add hotplug support. > - Move the code of previous patch 07 (to disable CPU0 disabling) > into hotplug support patch (see patch 04) > - Remove the patch that added PRCM register because it is already > available. Because of that, update the device tree parsing to use > "sun8i-a83t-r-ccu". > - Use a variable to know which SoC we currently have > - Take into account Chen-Yu's reviews: create two iounmap functions > to release the resources of the device tree parsing. > - Take into account Marc's review: Update the code to initialize CNTVOFF > register. As there is already assembly code in the driver, I decided > to create an assembly file not to mix assembly and C code. > For that, I create 3 new patches: move the current assembly code that > handles the cluster cache enabling into a file, move the cpu_resume entry > in this file and finally, add a new assembly entry to initialize the timer > offset for boot CPU and secondary CPUs. > > Changes from v2: > - Rebased my modifications according to new Chen Yu's patch series > that adds SMP support for sun9i-a80 (without MCPM). > - Split the device-tree patches into 3 patches for CPUCFG, R_CPUCFG > and PRCM registers for more visibility. > - The hotplug of CPU0 is currently not working (even after trying what > Allwinner's code is doing) so remove the possibility of disabling > this CPU. Created a new patch for it. > > Changes from v1: > - Add Chen Yu's patch in my series (see path 01) > - Add new compatibles for prcm and cpucfg registers for sun8i-a83t. > Create two functions to separate the DT parsing of sun9i-a80 and > sun8i-a83t. > - Thanks to Maxime's review: order device tree's nodes according > to physical addresses, remove unused label and fix registers' sizes. > Update the commit log and commit title of my last patch (see > patch 05). > > Mylène Josserand (11): > ARM: sunxi: smp: Move assembly code into a file > ARM: dts: sun8i: Add CPUCFG device node for A83T dtsi > ARM: dts: sun8i: Add R_CPUCFG device node for the A83T dtsi > ARM: dts: sun8i: a83t: Add CCI-400 node > ARM: smp: Add initialization of CNTVOFF > ARM: sunxi: Add initialization of CNTVOFF > AR
mmotm 2018-04-20-14-58 uploaded
The mm-of-the-moment snapshot 2018-04-20-14-58 has been uploaded to http://www.ozlabs.org/~akpm/mmotm/ mmotm-readme.txt says README for mm-of-the-moment: http://www.ozlabs.org/~akpm/mmotm/ This is a snapshot of my -mm patch queue. Uploaded at random hopefully more than once a week. You will need quilt to apply these patches to the latest Linus release (4.x or 4.x-rcY). The series file is in broken-out.tar.gz and is duplicated in http://ozlabs.org/~akpm/mmotm/series The file broken-out.tar.gz contains two datestamp files: .DATE and .DATE--mm-dd-hh-mm-ss. Both contain the string -mm-dd-hh-mm-ss, followed by the base kernel version against which this patch series is to be applied. This tree is partially included in linux-next. To see which patches are included in linux-next, consult the `series' file. Only the patches within the #NEXT_PATCHES_START/#NEXT_PATCHES_END markers are included in linux-next. A git tree which contains the memory management portion of this tree is maintained at git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git by Michal Hocko. It contains the patches which are between the "#NEXT_PATCHES_START mm" and "#NEXT_PATCHES_END" markers, from the series file, http://www.ozlabs.org/~akpm/mmotm/series. A full copy of the full kernel tree with the linux-next and mmotm patches already applied is available through git within an hour of the mmotm release. Individual mmotm releases are tagged. The master branch always points to the latest release, so it's constantly rebasing. http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/ To develop on top of mmotm git: $ git remote add mmotm git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git $ git remote update mmotm $ git checkout -b topic mmotm/master $ git send-email mmotm/master.. [...] To rebase a branch with older patches to a new mmotm release: $ git remote update mmotm $ git rebase --onto mmotm/master topic The directory http://www.ozlabs.org/~akpm/mmots/ (mm-of-the-second) contains daily snapshots of the -mm tree. It is updated more frequently than mmotm, and is untested. A git copy of this tree is available at http://git.cmpxchg.org/cgit.cgi/linux-mmots.git/ and use of this tree is similar to http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/, described above. This mmotm tree contains the following patches against 4.17-rc1: (patches marked "*" will be included in linux-next) origin.patch i-need-old-gcc.patch * fork-unconditionally-clear-stack-on-fork.patch * mm-fix-do_pages_move-status-handling.patch * mm-pagemap-fix-swap-offset-value-for-pmd-migration-entry.patch * writeback-safer-lock-nesting.patch * mm-enable-thp-migration-for-shmem-thp.patch * rapidio-fix-rio_dma_transfer-error-handling.patch * kasan-add-no_sanitize-attribute-for-clang-builds.patch * maintainers-add-personal-addresses-for-sascha-and-me.patch * autofs-mount-point-create-should-honour-passed-in-mode.patch * proc-revalidate-kernel-thread-inodes-to-root-root.patch * proc-fix-proc-loadavg-regression.patch * kexec_file-do-not-add-extra-alignment-to-efi-memmap.patch * fs-elf-dont-complain-map_fixed_noreplace-unless-eexist-error.patch * mm-memcg-add-__gfp_nowarn-in-__memcg_schedule_kmem_cache_create.patch * fix-null-pointer-in-page_cache_tree_insert.patch * mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap.patch * mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap-v2.patch * kasan-prohibit-kasanstructleak-combination.patch * lib-avoid-soft-lockup-in-test_find_first_bit.patch * memcg-remove-memcg_cgroup-id-from-idr-on-mem_cgroup_css_alloc-failure.patch * ocfs2-submit-another-bio-if-current-bio-is-full.patch * arm-arch-arm-include-asm-pageh-needs-personalityh.patch * prctl-add-pr_et_pdeathsig_proc.patch * ocfs2-get-rid-of-ocfs2_is_o2cb_active-function.patch * ocfs2-without-quota-support-try-to-avoid-calling-quota-recovery.patch * ocfs2-without-quota-support-try-to-avoid-calling-quota-recovery-checkpatch-fixes.patch * ocfs2-dont-put-and-assign-null-to-bh-allocated-outside.patch * ocfs2-dont-use-iocb-when-eiocbqueued-returns.patch * block-restore-proc-partitions-to-not-display-non-partitionable-removable-devices.patch * dentry-fix-kmemcheck-splat-at-take_dentry_name_snapshot.patch * namei-allow-restricted-o_creat-of-fifos-and-regular-files.patch mm.patch * slab-__gfp_zero-is-incompatible-with-a-constructor.patch * mm-introduce-arg_lock-to-protect-arg_startend-and-env_startend-in-mm_struct.patch * mm-introduce-arg_lock-to-protect-arg_startend-and-env_startend-in-mm_struct-fix.patch * mm-memcontrol-move-swap-charge-handling-into-get_swap_page.patch * mm-memcontrol-implement-memoryswapevents.patch * zram-correct-flag-name-of-zram_access.patch * zram-mark-incompressible-page-as-zram_huge.patch * zram-record-accessed-second.patch * zram-introduce-zram-memory-tracking.patch * zram-introduce-zram-memory-tracking-update.patch * zram-introduce-zram-memory-tracking-update-fix.patch * mm-shmem-add-__rcu-annot
Re: [PATCH v3] mtd: spi-nor: clear Winbond Extended Address Reg on switch to 3-byte addressing.
On 04/20/2018 11:26 PM, NeilBrown wrote: > > Winbond spi-nor flash 32MB and larger have an 'Extended Address > Register' as one option for addressing beyond 16MB (Macronix > has the same concept, Spansion has EXTADD bits in the Bank Address > Register). > > According to section >8.2.7 Write Extended Address Register (C5h) > > of the Winbond W25Q256FV data sheet (256M-BIT SPI flash) > >The Extended Address Register is only effective when the device is >in the 3-Byte Address Mode. When the device operates in the 4-Byte >Address Mode (ADS=1), any command with address input of A31-A24 >will replace the Extended Address Register values. It is >recommended to check and update the Extended Address Register if >necessary when the device is switched from 4-Byte to 3-Byte Address >Mode. > > So the documentation suggests clearing the EAR after switching to > 3-byte mode. Experimentation shows that the EAR is *always* one after > the switch to 3-byte mode, so clearing the EAR is mandatory at > shutdown for a subsequent 3-byte-addressed reboot to work. > > Note that some SOCs (e.g. MT7621) do not assert a reset line at normal > reboot, so we cannot rely on hardware reset. The MT7621 does assert a > reset line at watchdog-reset. > > This problem is not a regression. However the commit identified below > added support for resetting an spi-nor chip at shutdown, but didn't > achieve the goal for all spi-nor chips. So this patch can be seen as > fixing that one. > > Fixes: 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when > exiting") > Cc: sta...@vger.kernel.org (v4.16) > Signed-off-by: NeilBrown Acked-by: Marek Vasut > --- > drivers/mtd/spi-nor/spi-nor.c | 14 ++ > include/linux/mtd/spi-nor.h | 2 ++ > 2 files changed, 16 insertions(+) > > diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c > index 5bfa36e95f35..42ae9a1529bb 100644 > --- a/drivers/mtd/spi-nor/spi-nor.c > +++ b/drivers/mtd/spi-nor/spi-nor.c > @@ -284,6 +284,20 @@ static inline int set_4byte(struct spi_nor *nor, const > struct flash_info *info, > if (need_wren) > write_disable(nor); > > + if (!status && !enable && > + JEDEC_MFR(info) == SNOR_MFR_WINBOND) { > + /* > + * On Winbond W25Q256FV, leaving 4byte mode causes > + * the Extended Address Register to be set to 1, so all > + * 3-byte-address reads come from the second 16M. > + * We must clear the register to enable normal behavior. > + */ > + write_enable(nor); > + nor->cmd_buf[0] = 0; > + nor->write_reg(nor, SPINOR_OP_WREAR, nor->cmd_buf, 1); > + write_disable(nor); > + } > + > return status; > default: > /* Spansion style */ > diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h > index de36969eb359..e60da0d34cc1 100644 > --- a/include/linux/mtd/spi-nor.h > +++ b/include/linux/mtd/spi-nor.h > @@ -62,6 +62,8 @@ > #define SPINOR_OP_RDCR 0x35/* Read configuration register > */ > #define SPINOR_OP_RDFSR 0x70/* Read flag status register */ > #define SPINOR_OP_CLFSR 0x50/* Clear flag status register */ > +#define SPINOR_OP_RDEAR 0xc8/* Read Extended Address > Register */ > +#define SPINOR_OP_WREAR 0xc5/* Write Extended Address > Register */ > > /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ > #define SPINOR_OP_READ_4B0x13/* Read data bytes (low frequency) */ > -- Best regards, Marek Vasut
Re: [PATCH 4/4] sh: remove board_time_init() callback
On Fri, Apr 20, 2018 at 11:51:18PM +0200, Arnd Bergmann wrote: > On Fri, Apr 20, 2018 at 5:48 PM, Arnd Bergmann wrote: > > > @@ -41,8 +39,7 @@ static void __init sh_late_time_init(void) > > > > void __init time_init(void) > > { > > - if (board_time_init) > > - board_time_init(); > > + timer_init(); > > Testing revealed this to be broken, the fix is: > > diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c > index a29eb989d81b..8a1c6c8ab4ec 100644 > --- a/arch/sh/kernel/time.c > +++ b/arch/sh/kernel/time.c > @@ -39,7 +39,7 @@ static void __init sh_late_time_init(void) > > void __init time_init(void) > { > - timer_init(); > + timer_probe(); > > clk_init(); > > Let me know if you'd like me to resend the series with that typo fixed. If there are no other issues to correct, I can fix this when merging. Rich
Re: [PATCH] platform/x86: Kconfig: Fix dell-laptop dependency chain.
On Fri, Apr 20, 2018 at 05:55:28PM +, mario.limoncie...@dell.com wrote: > > > -Original Message- > > From: platform-driver-x86-ow...@vger.kernel.org [mailto:platform-driver-x86- > > ow...@vger.kernel.org] On Behalf Of Randy Dunlap > > Sent: Friday, April 20, 2018 12:53 PM > > To: Limonciello, Mario; dvh...@infradead.org; Andy Shevchenko > > Cc: LKML; platform-driver-...@vger.kernel.org > > Subject: Re: [PATCH] platform/x86: Kconfig: Fix dell-laptop dependency > > chain. > > > > On 04/20/18 10:42, Mario Limonciello wrote: > > > As reported by Randy Dunlap: > > >>> WARNING: unmet direct dependencies detected for DELL_SMBIOS > > >>> Depends on [m]: X86 [=y] && X86_PLATFORM_DEVICES [=y] > > >>> && (DCDBAS [=m] || > > >>> DCDBAS [=m]=n) && (ACPI_WMI [=n] || ACPI_WMI [=n]=n) > > >>> Selected by [y]: > > >>> - DELL_LAPTOP [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] > > >>> && DMI [=y] > > >>> && BACKLIGHT_CLASS_DEVICE [=y] && (ACPI_VIDEO [=n] || > > >>> ACPI_VIDEO [=n]=n) > > >>> && (RFKILL [=n] || RFKILL [=n]=n) && SERIO_I8042 [=y] > > >>> > > > > > > Right now it's possible to set dell laptop to compile in but this > > > causes dell-smbios to compile in which breaks if dcdbas is a module. > > > > > > Dell laptop shouldn't select dell-smbios anymore, but depend on it. > > > > > > Fixes: 32d7b19 (platform/x86: dell-smbios: Resolve dependency error on > > DCDBAS) > > > > meta-comment: the SHA-1 ID should be the first 12 characters of the commit > > ID > > according to Documentation/process/submitting-patches.rst. > > Thanks for letting me, I didn't realize that. > > Darren, let me know if you want me to resubmit with this adjustment or if you > can adjust when you queue for testing. Will fix locally. -- Darren Hart VMware Open Source Technology Center
Re: [PATCH] platform/x86: Kconfig: Fix dell-laptop dependency chain.
On Fri, Apr 20, 2018 at 12:42:11PM -0500, Mario Limonciello wrote: > As reported by Randy Dunlap: > >> WARNING: unmet direct dependencies detected for DELL_SMBIOS > >> Depends on [m]: X86 [=y] && X86_PLATFORM_DEVICES [=y] > >>&& (DCDBAS [=m] || > >> DCDBAS [=m]=n) && (ACPI_WMI [=n] || ACPI_WMI [=n]=n) > >> Selected by [y]: > >> - DELL_LAPTOP [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] > >> && DMI [=y] > >> && BACKLIGHT_CLASS_DEVICE [=y] && (ACPI_VIDEO [=n] || > >>ACPI_VIDEO [=n]=n) > >> && (RFKILL [=n] || RFKILL [=n]=n) && SERIO_I8042 [=y] > >> > > Right now it's possible to set dell laptop to compile in but this > causes dell-smbios to compile in which breaks if dcdbas is a module. > > Dell laptop shouldn't select dell-smbios anymore, but depend on it. Ugh. Indeed. One of the goals of the previous maelstrom of dell* kconfig depencency changes was to make it so DELL_LAPTOP was visible without having to toggle DELL_SMBIOS*. But, having "select DELL_SMBIOS" avoids the dependency check causing this failure (I'm surprised and dismayed this made it through all the config permutation testing this saw). I'll apply this patch, and I guess it should go to stable, and for the next version I think it's time for a new "CONFIG DELL_EXTRAS" which will allow us to default DELL_SMBIOS to y and make the menu more consistent. Thank you for the catch. Grr :-( -- Darren Hart VMware Open Source Technology Center
Re: [PATCH 4/4] sh: remove board_time_init() callback
On Fri, Apr 20, 2018 at 5:48 PM, Arnd Bergmann wrote: > @@ -41,8 +39,7 @@ static void __init sh_late_time_init(void) > > void __init time_init(void) > { > - if (board_time_init) > - board_time_init(); > + timer_init(); Testing revealed this to be broken, the fix is: diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c index a29eb989d81b..8a1c6c8ab4ec 100644 --- a/arch/sh/kernel/time.c +++ b/arch/sh/kernel/time.c @@ -39,7 +39,7 @@ static void __init sh_late_time_init(void) void __init time_init(void) { - timer_init(); + timer_probe(); clk_init(); Let me know if you'd like me to resend the series with that typo fixed. Arnd
Re: [RFC PATCH] dt-bindings: add a jsonschema binding example
On Wed 18 Apr 15:29 PDT 2018, Rob Herring wrote: > The current DT binding documentation format of freeform text is painful > to write, review, validate and maintain. > > This is just an example of what a binding in the schema format looks > like. It's using jsonschema vocabulary in a YAML encoded document. Using > jsonschema gives us access to existing tooling. A YAML encoding gives us > something easy to edit. > > This example is just the tip of the iceberg, but it the part most > developers writing bindings will interact with. Backing all this up > are meta-schema (to validate the binding schemas), some DT core schema, > YAML encoded DT output with dtc, and a small number of python scripts to > run validation. The gory details including how to run end-to-end > validation can be found here: > > https://www.spinics.net/lists/devicetree-spec/msg00649.html > > Signed-off-by: Rob Herring > --- > Cc list, > You all review and/or write lots of binding documents. I'd like some feedback > on the format. > I really like the idea of formalizing the binding document format and the ability of validating a dtb is really nice. > Thanks, > Rob > > .../devicetree/bindings/example-schema.yaml| 149 > + > 1 file changed, 149 insertions(+) > create mode 100644 Documentation/devicetree/bindings/example-schema.yaml > > diff --git a/Documentation/devicetree/bindings/example-schema.yaml > b/Documentation/devicetree/bindings/example-schema.yaml [..] > + reg: > +# The description of each element defines the order and implicitly > defines > +# the number of reg entries > +items: > + - description: core registers > + - description: aux registers > +# minItems/maxItems equal to 2 is implied I assume that a reg with variable number of entries would have "description" for all of them and then a minItems that matches the required ones and maxItems matching all of them? > + > + reg-names: > +# The core schema enforces this is a string array > +items: > + - const: core > + - const: aux I presume validation based on this should check that there's equal number of entries in reg-names as there where in reg. Should this relationship be described in the schema? [..] > + interrupts: > +# Either 1 or 2 interrupts can be present > +minItems: 1 > +maxItems: 2 > +items: > + - description: tx or combined interrupt > + - description: rx interrupt > + > +description: | > + A variable number of interrupts warrants a description of what > conditions > + affect the number of interrupts. Otherwise, descriptions on standard > + properties are not necessary. For validation purposes this could be interrupts with interrupt-parents or a interrupts-extend, a fact that we probably don't want to duplicate in every definition? Perhaps we should just do like you did here and define the "interrupts" and then in the validation tool - where we need to encode the logic behind this anyways - we support the different variants. > + > + interrupt-names: > +# minItems must be specified here because the default would be 2 > +minItems: 1 As with reg-names, it would be good to have the validator warn if this is not the same number of items as entries in "interrupts". > +items: > + - const: "tx irq" > + - const: "rx irq" > + > + # Property names starting with '#' must be quoted > + '#interrupt-cells': > +# A simple case where the value must always be '2'. > +# The core schema handles that this must be a single integer. > +const: 2 If this is specified then interrupt-controller should also be given, or vise versa. How would we describe that? > + > + interrupt-controller: {} > +# The core checks this is a boolean, so just have to list it here to be > +# valid for this binding. > + Regards, Bjorn
Re: [PATCH] prctl: Don't compile some of prctl functions when CRUI
Hi Sergey, I love your patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.17-rc1 next-20180420] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Sergey-Senozhatsky/prctl-Don-t-compile-some-of-prctl-functions-when-CRUI/20180421-040826 config: i386-randconfig-s1-201815 (attached as .config) compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026 reproduce: # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): kernel/sys.c: In function 'prctl_set_mm': >> kernel/sys.c:2108:10: error: implicit declaration of function >> 'prctl_set_mm_exe_file' [-Werror=implicit-function-declaration] return prctl_set_mm_exe_file(mm, (unsigned int)addr); ^ >> kernel/sys.c:2174:10: error: implicit declaration of function >> 'validate_prctl_map' [-Werror=implicit-function-declaration] error = validate_prctl_map(&prctl_map); ^~ cc1: some warnings being treated as errors vim +/prctl_set_mm_exe_file +2108 kernel/sys.c f606b77f1 Cyrill Gorcunov 2014-10-09 2103 79f0713d4 Cyrill Gorcunov 2012-03-15 2104 if (!capable(CAP_SYS_RESOURCE)) 028ee4be3 Cyrill Gorcunov 2012-01-12 2105 return -EPERM; 028ee4be3 Cyrill Gorcunov 2012-01-12 2106 6e399cd14 Davidlohr Bueso 2015-04-16 2107 if (opt == PR_SET_MM_EXE_FILE) 6e399cd14 Davidlohr Bueso 2015-04-16 @2108 return prctl_set_mm_exe_file(mm, (unsigned int)addr); b32dfe377 Cyrill Gorcunov 2012-05-31 2109 4a00e9df2 Alexey Dobriyan 2015-06-25 2110 if (opt == PR_SET_MM_AUXV) 4a00e9df2 Alexey Dobriyan 2015-06-25 2111 return prctl_set_auxv(mm, addr, arg4); 4a00e9df2 Alexey Dobriyan 2015-06-25 2112 1ad75b9e1 Cyrill Gorcunov 2012-06-07 2113 if (addr >= TASK_SIZE || addr < mmap_min_addr) 028ee4be3 Cyrill Gorcunov 2012-01-12 2114 return -EINVAL; 028ee4be3 Cyrill Gorcunov 2012-01-12 2115 fe8c7f5cb Cyrill Gorcunov 2012-05-31 2116 error = -EINVAL; fe8c7f5cb Cyrill Gorcunov 2012-05-31 2117 ddf1d398e Mateusz Guzik 2016-01-20 2118 down_write(&mm->mmap_sem); 028ee4be3 Cyrill Gorcunov 2012-01-12 2119 vma = find_vma(mm, addr); 028ee4be3 Cyrill Gorcunov 2012-01-12 2120 4a00e9df2 Alexey Dobriyan 2015-06-25 2121 prctl_map.start_code= mm->start_code; 4a00e9df2 Alexey Dobriyan 2015-06-25 2122 prctl_map.end_code = mm->end_code; 4a00e9df2 Alexey Dobriyan 2015-06-25 2123 prctl_map.start_data= mm->start_data; 4a00e9df2 Alexey Dobriyan 2015-06-25 2124 prctl_map.end_data = mm->end_data; 4a00e9df2 Alexey Dobriyan 2015-06-25 2125 prctl_map.start_brk = mm->start_brk; 4a00e9df2 Alexey Dobriyan 2015-06-25 2126 prctl_map.brk = mm->brk; 4a00e9df2 Alexey Dobriyan 2015-06-25 2127 prctl_map.start_stack = mm->start_stack; 4a00e9df2 Alexey Dobriyan 2015-06-25 2128 prctl_map.arg_start = mm->arg_start; 4a00e9df2 Alexey Dobriyan 2015-06-25 2129 prctl_map.arg_end = mm->arg_end; 4a00e9df2 Alexey Dobriyan 2015-06-25 2130 prctl_map.env_start = mm->env_start; 4a00e9df2 Alexey Dobriyan 2015-06-25 2131 prctl_map.env_end = mm->env_end; 4a00e9df2 Alexey Dobriyan 2015-06-25 2132 prctl_map.auxv = NULL; 4a00e9df2 Alexey Dobriyan 2015-06-25 2133 prctl_map.auxv_size = 0; 4a00e9df2 Alexey Dobriyan 2015-06-25 2134 prctl_map.exe_fd= -1; 4a00e9df2 Alexey Dobriyan 2015-06-25 2135 028ee4be3 Cyrill Gorcunov 2012-01-12 2136 switch (opt) { 028ee4be3 Cyrill Gorcunov 2012-01-12 2137 case PR_SET_MM_START_CODE: 4a00e9df2 Alexey Dobriyan 2015-06-25 2138 prctl_map.start_code = addr; fe8c7f5cb Cyrill Gorcunov 2012-05-31 2139 break; fe8c7f5cb Cyrill Gorcunov 2012-05-31 2140 case PR_SET_MM_END_CODE: 4a00e9df2 Alexey Dobriyan 2015-06-25 2141 prctl_map.end_code = addr; 028ee4be3 Cyrill Gorcunov 2012-01-12 2142 break; 028ee4be3 Cyrill Gorcunov 2012-01-12 2143 case PR_SET_MM_START_DATA: 4a00e9df2 Alexey Dobriyan 2015-06-25 2144 prctl_map.start_data = addr; 028ee4be3 Cyrill Gorcunov 2012-01-12 2145 break; fe8c7f5cb Cyrill Gorcunov 2012-05-31 2146 case PR_SET_MM_END_DATA: 4a00e9df2 Alexey Dobriyan 2015-06-25 2147 prctl_map.end_data = addr; 4a00e9df2 Alexey Dobriyan 2015-06-25 2148 break; 4a00e9df2 Alexey Dobriyan 2015-06-25 2149 case PR_SET_MM_START_STACK: 4a00e9df2 Alexey Dobriyan 2015-06-25 2150 prctl_map.start_stack = addr; 028ee4be3 Cyrill Gorcunov 2012-01-12 2151 break; 028ee4be3 Cyrill Gorcunov 2012-01-12 2
[PATCH] platform/x86: acer-wmi: add another KEY_POWER keycode
Now that we have informed the firmware that the Power Button driver is active, laptops such as the Acer Swift 3 will generate a WMI key event with code 0x87 when the power button key is pressed. Add this keycode to the table so that it is converted to an appropriate input event. Signed-off-by: Antonio Rosario Intilisano Acked-by: Gianfranco Costamagna Cc: Chris Chiu Cc: Daniel Drake Cc: Andy Shevchenko --- drivers/platform/x86/acer-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 1be71f956d5c..8952173dd380 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -129,6 +129,7 @@ static const struct key_entry acer_wmi_keymap[] __initconst = { {KE_IGNORE, 0x83, {KEY_TOUCHPAD_TOGGLE} }, {KE_KEY, 0x85, {KEY_TOUCHPAD_TOGGLE} }, {KE_KEY, 0x86, {KEY_WLAN} }, + {KE_KEY, 0x87, {KEY_POWER} }, {KE_END, 0} }; -- 2.15.1
Re: [PATCH] sched/fair: Change sched_feat(x) in !CONFIG_SCHED_DEBUG case
On Fri, 20 Apr 2018, Peter Zijlstra wrote: > On Fri, Apr 20, 2018 at 06:29:07PM +0200, Philipp Klocke wrote: > > The gain is stopping a warning that clutters the output log of clang. > > Well, you should not be using clang anyway. It is known to miscompile > the kernel. > There are some advantages of having a second compiler that can compile the kernel (https://lwn.net/Articles/734071/). Some people in the kernel community and LLVM community are trying to get that to work. We also want a zero-warning policy for clang, similar to gcc. Hence, this motivates to have a look at those few clang warnings and come up with patches for them. This does not imply to make changes at any cost, and we need to determine a proper patch to either change the source code, disable the warning in the build script or annotate the file with some clang-specific pragmas. To us, a minor change in the source sounded most reasonable after looking at all three possible patches. Philipp might need another iteration, but it generally looks sound to me if we get the details flattened out. Lukas
Re: [PATCH v2] mtd: spi-nor: clear Winbond Extended Address Reg on switch to 3-byte addressing.
On Fri, Apr 20 2018, Boris Brezillon wrote: > Hi Neil, > > On Mon, 16 Apr 2018 09:42:30 +1000 > NeilBrown wrote: > >> Winbond spi-nor flash 32MB and larger have an 'Extended Address >> Register' as one option for addressing beyond 16MB (Macronix >> has the same concept, Spansion has EXTADD bits in the Bank Address >> Register). >> >> According to section >>8.2.7 Write Extended Address Register (C5h) >> >> of the Winbond W25Q256FV data sheet (256M-BIT SPI flash) >> >>The Extended Address Register is only effective when the device is >>in the 3-Byte Address Mode. When the device operates in the 4-Byte >>Address Mode (ADS=1), any command with address input of A31-A24 >>will replace the Extended Address Register values. It is >>recommended to check and update the Extended Address Register if >>necessary when the device is switched from 4-Byte to 3-Byte Address >>Mode. >> >> So the documentation suggests clearing the EAR after switching to >> 3-byte mode. Experimentation shows that the EAR is *always* one after >> the switch to 3-byte mode, so clearing the EAR is mandatory at >> shutdown for a subsequent 3-byte-addressed reboot to work. >> >> Note that some SOCs (e.g. MT7621) do not assert a reset line at normal >> reboot, so we cannot rely on hardware reset. The MT7621 does assert a >> reset line at watchdog-reset. >> >> Signed-off-by: NeilBrown > > We should probably backport the fix. Can you add a Fixes and Cc-stable > tag? It's a bit weird having Fixes when this isn't a regression, but I guess it doesn't hurt. I chose Fixes: 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when exiting") as this patch it useless without that one. I also fixed the comment and have resent. Thanks, NeilBrown > >> --- >> >> following a helpful discussion with Marek, I've revised the description >> a little, and make the code change specific to winbond. >> I've change the OP names to RDEAR and WREAR instead of RDXA and WRXA to >> match names used in the Macronix documentation. Winbond documentation >> doesn't provide abbreviated OP names. >> >> Thanks, >> NeilBrown >> >> >> drivers/mtd/spi-nor/spi-nor.c | 13 + >> include/linux/mtd/spi-nor.h | 2 ++ >> 2 files changed, 15 insertions(+) >> >> diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c >> index d445a4d3b770..0d0af0acf8b9 100644 >> --- a/drivers/mtd/spi-nor/spi-nor.c >> +++ b/drivers/mtd/spi-nor/spi-nor.c >> @@ -284,6 +284,19 @@ static inline int set_4byte(struct spi_nor *nor, const >> struct flash_info *info, >> if (need_wren) >> write_disable(nor); >> >> +if (!status && !enable && >> +JEDEC_MFR(info) == SNOR_MFR_WINBOND) { >> +/* On Winbond W25Q256FV, leaving 4byte mode causes > > We use regular kernel-comment style in MTD: > > /* >* blablabla >*/ > > Thanks, > > Boris > >> + * the Extended Address Register to be set to 1, so all >> + * 3-byte-address reads come from the second 16M. >> + * We must clear the register to enable normal behavior. >> + */ >> +write_enable(nor); >> +nor->cmd_buf[0] = 0; >> +nor->write_reg(nor, SPINOR_OP_WREAR, nor->cmd_buf, 1); >> +write_disable(nor); >> +} >> + >> return status; >> default: >> /* Spansion style */ >> diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h >> index de36969eb359..e60da0d34cc1 100644 >> --- a/include/linux/mtd/spi-nor.h >> +++ b/include/linux/mtd/spi-nor.h >> @@ -62,6 +62,8 @@ >> #define SPINOR_OP_RDCR 0x35/* Read configuration register >> */ >> #define SPINOR_OP_RDFSR 0x70/* Read flag status register */ >> #define SPINOR_OP_CLFSR 0x50/* Clear flag status register */ >> +#define SPINOR_OP_RDEAR 0xc8/* Read Extended Address >> Register */ >> +#define SPINOR_OP_WREAR 0xc5/* Write Extended Address >> Register */ >> >> /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ >> #define SPINOR_OP_READ_4B 0x13/* Read data bytes (low frequency) */ signature.asc Description: PGP signature
[PATCH] platform/x86: acer-wmi: add another KEY_POWER keycode
Now that we have informed the firmware that the Power Button driver is active, laptops such as the Acer Swift 3 will generate a WMI key event with code 0x87 when the power button key is pressed. Add this keycode to the table so that it is converted to an appropriate input event. Signed-off-by: Antonio Rosario Intilisano Acked-by: Gianfranco Costamagna Cc: Chris Chiu Cc: Daniel Drake Cc: Andy Shevchenko --- drivers/platform/x86/acer-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 1be71f956d5c..8952173dd380 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -129,6 +129,7 @@ static const struct key_entry acer_wmi_keymap[] __initconst = { {KE_IGNORE, 0x83, {KEY_TOUCHPAD_TOGGLE} }, {KE_KEY, 0x85, {KEY_TOUCHPAD_TOGGLE} }, {KE_KEY, 0x86, {KEY_WLAN} }, + {KE_KEY, 0x87, {KEY_POWER} }, {KE_END, 0} }; -- 2.15.1
[PATCH v3] mtd: spi-nor: clear Winbond Extended Address Reg on switch to 3-byte addressing.
Winbond spi-nor flash 32MB and larger have an 'Extended Address Register' as one option for addressing beyond 16MB (Macronix has the same concept, Spansion has EXTADD bits in the Bank Address Register). According to section 8.2.7 Write Extended Address Register (C5h) of the Winbond W25Q256FV data sheet (256M-BIT SPI flash) The Extended Address Register is only effective when the device is in the 3-Byte Address Mode. When the device operates in the 4-Byte Address Mode (ADS=1), any command with address input of A31-A24 will replace the Extended Address Register values. It is recommended to check and update the Extended Address Register if necessary when the device is switched from 4-Byte to 3-Byte Address Mode. So the documentation suggests clearing the EAR after switching to 3-byte mode. Experimentation shows that the EAR is *always* one after the switch to 3-byte mode, so clearing the EAR is mandatory at shutdown for a subsequent 3-byte-addressed reboot to work. Note that some SOCs (e.g. MT7621) do not assert a reset line at normal reboot, so we cannot rely on hardware reset. The MT7621 does assert a reset line at watchdog-reset. This problem is not a regression. However the commit identified below added support for resetting an spi-nor chip at shutdown, but didn't achieve the goal for all spi-nor chips. So this patch can be seen as fixing that one. Fixes: 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when exiting") Cc: sta...@vger.kernel.org (v4.16) Signed-off-by: NeilBrown --- drivers/mtd/spi-nor/spi-nor.c | 14 ++ include/linux/mtd/spi-nor.h | 2 ++ 2 files changed, 16 insertions(+) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 5bfa36e95f35..42ae9a1529bb 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -284,6 +284,20 @@ static inline int set_4byte(struct spi_nor *nor, const struct flash_info *info, if (need_wren) write_disable(nor); + if (!status && !enable && + JEDEC_MFR(info) == SNOR_MFR_WINBOND) { + /* +* On Winbond W25Q256FV, leaving 4byte mode causes +* the Extended Address Register to be set to 1, so all +* 3-byte-address reads come from the second 16M. +* We must clear the register to enable normal behavior. +*/ + write_enable(nor); + nor->cmd_buf[0] = 0; + nor->write_reg(nor, SPINOR_OP_WREAR, nor->cmd_buf, 1); + write_disable(nor); + } + return status; default: /* Spansion style */ diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index de36969eb359..e60da0d34cc1 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -62,6 +62,8 @@ #define SPINOR_OP_RDCR 0x35/* Read configuration register */ #define SPINOR_OP_RDFSR0x70/* Read flag status register */ #define SPINOR_OP_CLFSR0x50/* Clear flag status register */ +#define SPINOR_OP_RDEAR0xc8/* Read Extended Address Register */ +#define SPINOR_OP_WREAR0xc5/* Write Extended Address Register */ /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ #define SPINOR_OP_READ_4B 0x13/* Read data bytes (low frequency) */ -- 2.14.0.rc0.dirty signature.asc Description: PGP signature
Re: [PATCH] kvmalloc: always use vmalloc if CONFIG_DEBUG_VM
On Fri, 20 Apr 2018, Matthew Wilcox wrote: > On Fri, Apr 20, 2018 at 04:54:53PM -0400, Mikulas Patocka wrote: > > On Fri, 20 Apr 2018, Michal Hocko wrote: > > > No way. This is just wrong! First of all, you will explode most likely > > > on many allocations of small sizes. Second, CONFIG_DEBUG_VM tends to be > > > enabled quite often. > > > > You're an evil person who doesn't want to fix bugs. > > Steady on. There's no need for that. Michal isn't evil. Please > apologise. I see this attitude from Michal again and again. He didn't want to fix vmalloc(GFP_NOIO), he didn't want to fix alloc_pages sleeping when __GFP_NORETRY is used. So what should I say? Fix them and you won't be evil :-) (he could also fix the oom killer, so that it is triggered when free_memory+cache+free_swap goes beyond a threshold and not when you loop too long in the allocator) > > You refused to fix vmalloc(GFP_NOIO) misbehavior a year ago (did you make > > some progress with it since that time?) and you refuse to fix kvmalloc > > misuses. > > I understand you're frustrated, but this is not the way to get the problems > fixed. > > > I tried this patch on text-only virtual machine and /proc/vmallocinfo > > shows 614kB more memory. I tried it on a desktop machine with the chrome > > browser open and /proc/vmallocinfo space is increased by 7MB. So no - this > > won't exhaust memory and kill the machine. > > This is good data, thank you for providing it. > > > Arguing that this increases memory consumption is as bogus as arguing that > > CONFIG_LOCKDEP increses memory consumption. No one is forcing you to > > enable CONFIG_LOCKDEP and no one is forcing you to enable this kvmalloc > > test too. > > I think there's a real problem which is that CONFIG_DEBUG_VM is too broad. > It inserts code in a *lot* of places, some of which is quite expensive. > We would do better to split it into more granular pieces ... although > an explosion of configuration options isn't great either. Maybe just > CONFIG_DEBUG_VM and CONFIG_DEBUG_VM_EXPENSIVE. > > Michal may be wrong, but he's not evil. I already said that we can change it from CONFIG_DEBUG_VM to CONFIG_DEBUG_SG - or to whatever other option you may want, just to make sure that it is enabled in distro debug kernels by default. Mikulas
[PATCH v3] clk: add duty cycle support
Add the possibility to apply and query the clock signal duty cycle ratio. This is useful when the duty cycle of the clock signal depends on some other parameters controlled by the clock framework. For example, the duty cycle of a divider may depends on the raw divider setting (ratio = N / div) , which is controlled by the CCF. In such case, going through the pwm framework to control the duty cycle ratio of this clock would be a burden. A clock provider is not required to implement the operation to set and get the duty cycle. If it does not implement .get_duty_cycle(), the ratio is assumed to be 50%. This change also adds a new flag, CLK_DUTY_CYCLE_PARENT. This flag should be used to indicate that a clock, such as gates and muxes, may inherit the duty cycle ratio of its parent clock. If a clock does not provide a get_duty_cycle() callback and has CLK_DUTY_CYCLE_PARENT, then the call will be directly forwarded to its parent clock, if any. For set_duty_cycle(), the clock should also have CLK_SET_RATE_PARENT for the call to be forwarded Signed-off-by: Jerome Brunet --- The series has been developed to handled the sample clocks provided by audio clock controller of amlogic's A113 SoC. To support i2s modes, this clock need to have a 50% duty cycle ratio, while it should be just one pulse of the parent clock in dsp modes. Changes since v2 [1]: - Fix kbuild robot issue with the trace file (imcomplete change related to clk_duty structure) Changes since v1 [0]: - Use a structure to hold the duty cycle ratio - Change the way parent traversal is done, so the core framework is more aware of what is going on. Pass-through ops dropped as a result - Only one debugfs entry for the duty cycle ratio, instead of 2 - Minor fixes as pointed out by Stephen [0]: https://lkml.kernel.org/r/20180416175743.20826-1-jbru...@baylibre.com [1]: https://lkml.kernel.org/r/20180420153431.13003-1-jbru...@baylibre.com drivers/clk/clk.c| 202 +-- include/linux/clk-provider.h | 26 ++ include/linux/clk.h | 33 +++ include/trace/events/clk.h | 36 4 files changed, 292 insertions(+), 5 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 7af555f0e60c..a1874f2932ee 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -68,6 +68,7 @@ struct clk_core { unsigned long max_rate; unsigned long accuracy; int phase; + struct clk_duty duty; struct hlist_head children; struct hlist_node child_node; struct hlist_head clks; @@ -2401,6 +2402,172 @@ int clk_get_phase(struct clk *clk) } EXPORT_SYMBOL_GPL(clk_get_phase); +static void clk_core_reset_duty_cycle_nolock(struct clk_core *core) +{ + /* Assume a default value of 50% */ + core->duty.num = 1; + core->duty.den = 2; +} + +static int clk_core_update_duty_cycle_parent_nolock(struct clk_core *core); + +static int clk_core_update_duty_cycle_nolock(struct clk_core *core) +{ + struct clk_duty *duty = &core->duty; + int ret = 0; + + if (!core->ops->get_duty_cycle) + return clk_core_update_duty_cycle_parent_nolock(core); + + ret = core->ops->get_duty_cycle(core->hw, duty); + if (ret) + goto reset; + + /* Don't trust the clock provider too much */ + if (duty->den == 0 || duty->num > duty->den) { + ret = -EINVAL; + goto reset; + } + + return 0; + +reset: + clk_core_reset_duty_cycle_nolock(core); + return ret; +} + +static int clk_core_update_duty_cycle_parent_nolock(struct clk_core *core) +{ + int ret = 0; + + if (core->parent && + core->flags & CLK_DUTY_CYCLE_PARENT) { + ret = clk_core_update_duty_cycle_nolock(core->parent); + memcpy(&core->duty, &core->parent->duty, sizeof(core->duty)); + } else { + clk_core_reset_duty_cycle_nolock(core); + } + + return ret; +} + +static int clk_core_set_duty_cycle_parent_nolock(struct clk_core *core, +struct clk_duty *duty); + +static int clk_core_set_duty_cycle_nolock(struct clk_core *core, + struct clk_duty *duty) +{ + int ret; + + lockdep_assert_held(&prepare_lock); + + if (clk_core_rate_is_protected(core)) + return -EBUSY; + + trace_clk_set_duty_cycle(core, duty); + + if (!core->ops->set_duty_cycle) + return clk_core_set_duty_cycle_parent_nolock(core, duty); + + ret = core->ops->set_duty_cycle(core->hw, duty); + if (!ret) + memcpy(&core->duty, duty, sizeof(*duty)); + + trace_clk_set_duty_cycle_complete(core, duty); + + return ret; +} + +static int clk_core_set_duty_cycle_parent_nolock(struct clk_core *core, +
[PATCH v7 05/11] ARM: smp: Add initialization of CNTVOFF
The CNTVOFF register from arch timer is uninitialized. It should be done by the bootloader but it is currently not the case, even for boot CPU because this SoC is booting in secure mode. It leads to an random offset value meaning that each CPU will have a different time, which isn't working very well. Add assembly code used for boot CPU and secondary CPU cores to make sure that the CNTVOFF register is initialized. Because this code can be used by different platforms, add this assembly file in ARM's common folder. Signed-off-by: Mylène Josserand Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven --- arch/arm/common/Makefile | 1 + arch/arm/common/secure_cntvoff.S | 31 +++ arch/arm/include/asm/secure_cntvoff.h | 8 3 files changed, 40 insertions(+) create mode 100644 arch/arm/common/secure_cntvoff.S create mode 100644 arch/arm/include/asm/secure_cntvoff.h diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 70b4a14ed993..1e9f7af8f70f 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_DMABOUNCE) += dmabounce.o obj-$(CONFIG_SHARP_LOCOMO) += locomo.o obj-$(CONFIG_SHARP_PARAM) += sharpsl_param.o obj-$(CONFIG_SHARP_SCOOP) += scoop.o +obj-$(CONFIG_SMP) += secure_cntvoff.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o CFLAGS_REMOVE_mcpm_entry.o = -pg diff --git a/arch/arm/common/secure_cntvoff.S b/arch/arm/common/secure_cntvoff.S new file mode 100644 index ..68a4a8344319 --- /dev/null +++ b/arch/arm/common/secure_cntvoff.S @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2014 Renesas Electronics Corporation + * + * Initialization of CNTVOFF register from secure mode + * + */ + +#include +#include + +ENTRY(secure_cntvoff_init) + .arch armv7-a + /* +* CNTVOFF has to be initialized either from non-secure Hypervisor +* mode or secure Monitor mode with SCR.NS==1. If TrustZone is enabled +* then it should be handled by the secure code +*/ + cps #MON_MODE + mrc p15, 0, r1, c1, c1, 0 /* Get Secure Config */ + orr r0, r1, #1 + mcr p15, 0, r0, c1, c1, 0 /* Set Non Secure bit */ + isb + mov r0, #0 + mcrrp15, 4, r0, r0, c14 /* CNTVOFF = 0 */ + isb + mcr p15, 0, r1, c1, c1, 0 /* Set Secure bit */ + isb + cps #SVC_MODE + ret lr +ENDPROC(secure_cntvoff_init) diff --git a/arch/arm/include/asm/secure_cntvoff.h b/arch/arm/include/asm/secure_cntvoff.h new file mode 100644 index ..1f93aee1f630 --- /dev/null +++ b/arch/arm/include/asm/secure_cntvoff.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASMARM_ARCH_CNTVOFF_H +#define __ASMARM_ARCH_CNTVOFF_H + +extern void secure_cntvoff_init(void); + +#endif -- 2.11.0
[PATCH v7 09/11] ARM: sun8i: smp: Add support for A83T
Add the support for A83T. A83T SoC has an additional register than A80 to handle CPU configurations: R_CPUS_CFG. Information about the register comes from Allwinner's BSP driver. An important difference is the Power Off Gating register for clusters which is BIT(4) in case of SUN9I-A80 and BIT(0) in case of SUN8I-A83T. There is also a bit swap between sun8i-a83t and sun9i-a80 that must be handled. Signed-off-by: Mylène Josserand --- arch/arm/mach-sunxi/Kconfig | 2 +- arch/arm/mach-sunxi/mc_smp.c | 151 ++- 2 files changed, 137 insertions(+), 16 deletions(-) diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig index ce53ceaf4cc5..d9c8ecf88ec6 100644 --- a/arch/arm/mach-sunxi/Kconfig +++ b/arch/arm/mach-sunxi/Kconfig @@ -51,7 +51,7 @@ config MACH_SUN9I config ARCH_SUNXI_MC_SMP bool depends on SMP - default MACH_SUN9I + default MACH_SUN9I || MACH_SUN8I select ARM_CCI400_PORT_CTRL select ARM_CPU_SUSPEND diff --git a/arch/arm/mach-sunxi/mc_smp.c b/arch/arm/mach-sunxi/mc_smp.c index 48e5f4db64b6..4fd2267ecb04 100644 --- a/arch/arm/mach-sunxi/mc_smp.c +++ b/arch/arm/mach-sunxi/mc_smp.c @@ -55,22 +55,31 @@ #define CPUCFG_CX_RST_CTRL_L2_RST BIT(8) #define CPUCFG_CX_RST_CTRL_CX_RST(n) BIT(4 + (n)) #define CPUCFG_CX_RST_CTRL_CORE_RST(n) BIT(n) +#define CPUCFG_CX_RST_CTRL_CORE_RST_ALL(0xf << 0) #define PRCM_CPU_PO_RST_CTRL(c)(0x4 + 0x4 * (c)) #define PRCM_CPU_PO_RST_CTRL_CORE(n) BIT(n) #define PRCM_CPU_PO_RST_CTRL_CORE_ALL 0xf #define PRCM_PWROFF_GATING_REG(c) (0x100 + 0x4 * (c)) +/* The power off register for clusters are different from a80 and a83t */ +#define PRCM_PWROFF_GATING_REG_CLUSTER_SUN8I BIT(0) #define PRCM_PWROFF_GATING_REG_CLUSTER_SUN9I BIT(4) #define PRCM_PWROFF_GATING_REG_CORE(n) BIT(n) #define PRCM_PWR_SWITCH_REG(c, cpu)(0x140 + 0x10 * (c) + 0x4 * (cpu)) #define PRCM_CPU_SOFT_ENTRY_REG0x164 +/* R_CPUCFG registers, specific to sun8i-a83t */ +#define R_CPUCFG_CLUSTER_PO_RST_CTRL(c)(0x30 + (c) * 0x4) +#define R_CPUCFG_CLUSTER_PO_RST_CTRL_CORE(n) BIT(n) +#define R_CPUCFG_CPU_SOFT_ENTRY_REG0x01a4 + #define CPU0_SUPPORT_HOTPLUG_MAGIC00xFA50392F #define CPU0_SUPPORT_HOTPLUG_MAGIC10x790DCA3A static void __iomem *cpucfg_base; static void __iomem *prcm_base; static void __iomem *sram_b_smp_base; +static void __iomem *r_cpucfg_base; extern void sunxi_mc_smp_secondary_startup(void); extern void sunxi_mc_smp_resume(void); @@ -161,6 +170,16 @@ static int sunxi_cpu_powerup(unsigned int cpu, unsigned int cluster) reg &= ~PRCM_CPU_PO_RST_CTRL_CORE(cpu); writel(reg, prcm_base + PRCM_CPU_PO_RST_CTRL(cluster)); + if (is_a83t) { + /* assert cpu power-on reset */ + reg = readl(r_cpucfg_base + +R_CPUCFG_CLUSTER_PO_RST_CTRL(cluster)); + reg &= ~(R_CPUCFG_CLUSTER_PO_RST_CTRL_CORE(cpu)); + writel(reg, r_cpucfg_base + + R_CPUCFG_CLUSTER_PO_RST_CTRL(cluster)); + udelay(10); + } + /* Cortex-A7: hold L1 reset disable signal low */ if (!sunxi_core_is_cortex_a15(cpu, cluster)) { reg = readl(cpucfg_base + CPUCFG_CX_CTRL_REG0(cluster)); @@ -184,17 +203,38 @@ static int sunxi_cpu_powerup(unsigned int cpu, unsigned int cluster) /* open power switch */ sunxi_cpu_power_switch_set(cpu, cluster, true); + /* Handle A83T bit swap */ + if (is_a83t) { + if (cpu == 0) + cpu = 4; + } + /* clear processor power gate */ reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster)); reg &= ~PRCM_PWROFF_GATING_REG_CORE(cpu); writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster)); udelay(20); + /* Handle A83T bit swap */ + if (is_a83t) { + if (cpu == 4) + cpu = 0; + } + /* de-assert processor power-on reset */ reg = readl(prcm_base + PRCM_CPU_PO_RST_CTRL(cluster)); reg |= PRCM_CPU_PO_RST_CTRL_CORE(cpu); writel(reg, prcm_base + PRCM_CPU_PO_RST_CTRL(cluster)); + if (is_a83t) { + reg = readl(r_cpucfg_base + +R_CPUCFG_CLUSTER_PO_RST_CTRL(cluster)); + reg |= R_CPUCFG_CLUSTER_PO_RST_CTRL_CORE(cpu); + writel(reg, r_cpucfg_base + + R_CPUCFG_CLUSTER_PO_RST_CTRL(cluster)); + udelay(10); + } + /* de-assert all processor resets */ reg = readl(cpucfg_base + CPUCFG_CX_RST_CTRL(cluster)); reg |= CPUCFG_CX_RST_CTRL_DBG_RST(cpu); @@ -216,6 +256,14 @@ static int sunxi_cluster_powerup(unsigned int cluster) if (cluster >= SUNXI_NR_CLUSTERS) return -EINVAL; + /* For A83T, assert cluster cores resets
[PATCH v7 08/11] ARM: sun9i: smp: Add is_a83t field
To prepare the support of sun8i-a83t, add a field in the smp_data structure to know if we are on sun9i-a80 or sun8i-a83t. Add also a global variable to retrieve which architecture we are having. Signed-off-by: Mylène Josserand --- arch/arm/mach-sunxi/mc_smp.c | 4 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-sunxi/mc_smp.c b/arch/arm/mach-sunxi/mc_smp.c index 03f021d0c73e..48e5f4db64b6 100644 --- a/arch/arm/mach-sunxi/mc_smp.c +++ b/arch/arm/mach-sunxi/mc_smp.c @@ -74,6 +74,7 @@ static void __iomem *sram_b_smp_base; extern void sunxi_mc_smp_secondary_startup(void); extern void sunxi_mc_smp_resume(void); +static int is_a83t; static bool sunxi_core_is_cortex_a15(unsigned int core, unsigned int cluster) { @@ -624,6 +625,7 @@ struct sunxi_mc_smp_nodes { struct sunxi_mc_smp_data { const char *enable_method; int (*get_smp_nodes)(struct sunxi_mc_smp_nodes *nodes); + int is_a83t; }; static void __init sunxi_mc_smp_put_nodes(struct sunxi_mc_smp_nodes *nodes) @@ -697,6 +699,8 @@ static int __init sunxi_mc_smp_init(void) break; } + is_a83t = sunxi_mc_smp_data[i].is_a83t; + of_node_put(node); if (ret) return -ENODEV; -- 2.11.0