date:20190225

[PATCH v2] RDMA/cma: Make CM response timeout and # CM retries configurable

2019-02-25 Thread Håkon Bugge

During certain workloads, the default CM response timeout is too
short, leading to excessive retries. Hence, make it configurable
through sysctl. While at it, also make number of CM retries
configurable.

The defaults are not changed.

Signed-off-by: Håkon Bugge 

---
v1 -> v2:
   * Added unregister_net_sysctl_table() in cma_cleanup()
---
 drivers/infiniband/core/cma.c | 52 ++-
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 68c997be2429..50abce078ff1 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -68,13 +69,46 @@ MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
 MODULE_LICENSE("Dual BSD/GPL");
 
-#define CMA_CM_RESPONSE_TIMEOUT 20
 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
-#define CMA_MAX_CM_RETRIES 15
 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
 #define CMA_IBOE_PACKET_LIFETIME 18
 #define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
 
+#define CMA_DFLT_CM_RESPONSE_TIMEOUT 20
+static int cma_cm_response_timeout = CMA_DFLT_CM_RESPONSE_TIMEOUT;
+static int cma_cm_response_timeout_min = 8;
+static int cma_cm_response_timeout_max = 31;
+#undef CMA_DFLT_CM_RESPONSE_TIMEOUT
+
+#define CMA_DFLT_MAX_CM_RETRIES 15
+static int cma_max_cm_retries = CMA_DFLT_MAX_CM_RETRIES;
+static int cma_max_cm_retries_min = 1;
+static int cma_max_cm_retries_max = 100;
+#undef CMA_DFLT_MAX_CM_RETRIES
+
+static struct ctl_table_header *cma_ctl_table_hdr;
+static struct ctl_table cma_ctl_table[] = {
+   {
+   .procname   = "cma_cm_response_timeout",
+   .data   = _cm_response_timeout,
+   .maxlen = sizeof(cma_cm_response_timeout),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = _cm_response_timeout_min,
+   .extra2 = _cm_response_timeout_max,
+   },
+   {
+   .procname   = "cma_max_cm_retries",
+   .data   = _max_cm_retries,
+   .maxlen = sizeof(cma_max_cm_retries),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = _max_cm_retries_min,
+   .extra2 = _max_cm_retries_max,
+   },
+   { }
+};
+
 static const char * const cma_events[] = {
[RDMA_CM_EVENT_ADDR_RESOLVED]= "address resolved",
[RDMA_CM_EVENT_ADDR_ERROR]   = "address error",
@@ -3744,8 +3778,8 @@ static int cma_resolve_ib_udp(struct rdma_id_private 
*id_priv,
req.path = id_priv->id.route.path_rec;
req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
req.service_id = rdma_get_service_id(_priv->id, 
cma_dst_addr(id_priv));
-   req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
-   req.max_cm_retries = CMA_MAX_CM_RETRIES;
+   req.timeout_ms = 1 << (cma_cm_response_timeout - 8);
+   req.max_cm_retries = cma_max_cm_retries;
 
ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, );
if (ret) {
@@ -3815,9 +3849,9 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
req.flow_control = conn_param->flow_control;
req.retry_count = min_t(u8, 7, conn_param->retry_count);
req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
-   req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
-   req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
-   req.max_cm_retries = CMA_MAX_CM_RETRIES;
+   req.remote_cm_response_timeout = cma_cm_response_timeout;
+   req.local_cm_response_timeout = cma_cm_response_timeout;
+   req.max_cm_retries = cma_max_cm_retries;
req.srq = id_priv->srq ? 1 : 0;
 
ret = ib_send_cm_req(id_priv->cm_id.ib, );
@@ -4700,6 +4734,9 @@ static int __init cma_init(void)
goto err;
 
cma_configfs_init();
+   cma_ctl_table_hdr = register_net_sysctl(_net, "net/rdma_cm", 
cma_ctl_table);
+   if (!cma_ctl_table_hdr)
+   pr_warn("rdma_cm: couldn't register sysctl path, using default 
values\n");
 
return 0;
 
@@ -4713,6 +4750,7 @@ static int __init cma_init(void)
 
 static void __exit cma_cleanup(void)
 {
+   unregister_net_sysctl_table(cma_ctl_table_hdr);
cma_configfs_exit();
ib_unregister_client(_client);
unregister_netdevice_notifier(_nb);
-- 
2.20.1

[PATCH v3 3/4] dt-bindings: display: Add bindings for OSD101T2587-53TS panel

2019-02-25 Thread Peter Ujfalusi

This adds the device-tree bindings for the OSD101T2587-53TS 10.1"
1920x1200 panel from One Stop Displays.

Note: the panel is similar to OSD101T2045-53TS, but it needs additional
MIPI_DSI_TURN_ON_PERIPHERAL message from the host.

Signed-off-by: Peter Ujfalusi 
Reviewed-by: Rob Herring 
---
 .../display/panel/osd,osd101t2587-53ts.txt | 14 ++
 1 file changed, 14 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/display/panel/osd,osd101t2587-53ts.txt

diff --git 
a/Documentation/devicetree/bindings/display/panel/osd,osd101t2587-53ts.txt 
b/Documentation/devicetree/bindings/display/panel/osd,osd101t2587-53ts.txt
new file mode 100644
index ..2082cae1a0e3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/osd,osd101t2587-53ts.txt
@@ -0,0 +1,14 @@
+One Stop Displays OSD101T2587-53TS 10.1" 1920x1200 panel
+
+The panel is similar to OSD101T2045-53TS, but it needs additional
+MIPI_DSI_TURN_ON_PERIPHERAL message from the host.
+
+Required properties:
+- compatible: should be "osd,osd101t2587-53ts"
+- power-supply: as specified in the base binding
+
+Optional properties:
+- backlight: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
-- 
Peter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki

Re: KASAN: use-after-free Read in tls_tx_records

2019-02-25 Thread Eric Biggers

On Fri, Sep 28, 2018 at 06:09:03AM -0700, syzbot wrote:
> Hello,
> 
> syzbot found the following crash on:
> 
> HEAD commit:1042caa79e93 net-ipv4: remove 2 always zero parameters fro..
> git tree:   net-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=13fff71140
> kernel config:  https://syzkaller.appspot.com/x/.config?x=6da69433212d7e87
> dashboard link: https://syzkaller.appspot.com/bug?extid=c45f79b4e5e940da28a9
> compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
> 
> Unfortunately, I don't have any reproducer for this crash yet.
> 
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+c45f79b4e5e940da2...@syzkaller.appspotmail.com
> 
> EXT4-fs (sda1): resizing filesystem from 524032 to 6 blocks
> EXT4-fs warning (device sda1): ext4_resize_fs:1930: can't shrink FS - resize
> aborted
> EXT4-fs (sda1): resizing filesystem from 524032 to 6 blocks
> EXT4-fs warning (device sda1): ext4_resize_fs:1930: can't shrink FS - resize
> aborted
> ==
> BUG: KASAN: use-after-free in tls_tx_records+0x8b0/0x980
> net/tls/tls_sw.c:365
> Read of size 8 at addr 8801ce46e040 by task syz-executor3/28575
> 
> CPU: 0 PID: 28575 Comm: syz-executor3 Not tainted 4.19.0-rc5+ #235
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:77 [inline]
>  dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113
>  print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256
>  kasan_report_error mm/kasan/report.c:354 [inline]
>  kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412
>  __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
>  tls_tx_records+0x8b0/0x980 net/tls/tls_sw.c:365
>  tls_sw_free_resources_tx+0x1ec/0xd20 net/tls/tls_sw.c:1552
>  tls_sk_proto_close+0x605/0x750 net/tls/tls_main.c:278
>  inet_release+0x104/0x1f0 net/ipv4/af_inet.c:428
>  inet6_release+0x50/0x70 net/ipv6/af_inet6.c:458
>  __sock_release+0xd7/0x250 net/socket.c:579
>  sock_close+0x19/0x20 net/socket.c:1141
>  __fput+0x385/0xa30 fs/file_table.c:278
>  fput+0x15/0x20 fs/file_table.c:309
>  task_work_run+0x1e8/0x2a0 kernel/task_work.c:113
>  tracehook_notify_resume include/linux/tracehook.h:193 [inline]
>  exit_to_usermode_loop+0x318/0x380 arch/x86/entry/common.c:166
>  prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
>  syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
>  do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293
>  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x457579
> Code: 1d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7
> 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff
> 0f 83 eb b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00
> RSP: 002b:7f2ccaa3bc78 EFLAGS: 0246 ORIG_RAX: 0003
> RAX:  RBX: 0001 RCX: 00457579
> RDX:  RSI:  RDI: 0003
> RBP: 0072bf00 R08:  R09: 
> R10:  R11: 0246 R12: 7f2ccaa3c6d4
> R13: 004ef912 R14: 004cc460 R15: 
> 
> Allocated by task 28575:
>  save_stack+0x43/0xd0 mm/kasan/kasan.c:448
>  set_track mm/kasan/kasan.c:460 [inline]
>  kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553
>  __do_kmalloc mm/slab.c:3718 [inline]
>  __kmalloc+0x14e/0x760 mm/slab.c:3727
>  kmalloc include/linux/slab.h:518 [inline]
>  kzalloc include/linux/slab.h:707 [inline]
>  get_rec+0x147/0x630 net/tls/tls_sw.c:653
>  tls_sw_sendmsg+0x47e/0x17a0 net/tls/tls_sw.c:727
>  inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
>  sock_sendmsg_nosec net/socket.c:621 [inline]
>  sock_sendmsg+0xd5/0x120 net/socket.c:631
>  __sys_sendto+0x3d7/0x670 net/socket.c:1788
>  __do_sys_sendto net/socket.c:1800 [inline]
>  __se_sys_sendto net/socket.c:1796 [inline]
>  __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1796
>  do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
>  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> 
> Freed by task 23411:
>  save_stack+0x43/0xd0 mm/kasan/kasan.c:448
>  set_track mm/kasan/kasan.c:460 [inline]
>  __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521
>  kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
>  __cache_free mm/slab.c:3498 [inline]
>  kfree+0xcf/0x230 mm/slab.c:3813
>  tls_encrypt_done+0x221/0x610 net/tls/tls_sw.c:417
>  aead_request_complete include/crypto/internal/aead.h:75 [inline]
>  pcrypt_aead_serial+0x7b/0xb0 crypto/pcrypt.c:123
>  padata_serial_worker+0x4c6/0x760 kernel/padata.c:349
>  process_one_work+0xc90/0x1b90 kernel/workqueue.c:2153
>  worker_thread+0x17f/0x1390 kernel/workqueue.c:2296
>  kthread+0x35a/0x420 kernel/kthread.c:246
>  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:413
> 
> The buggy address belongs to the object at 8801ce46e040
>  which belongs to the cache kmalloc-2048 of size 2048
> The buggy address is

[PATCH v3 4/4] drm/panel: Add OSD101T2587-53TS driver

2019-02-25 Thread Peter Ujfalusi

The panel is similar to OSD101T2045-53TS (which is handled by panel-simple)
with one big difference: osd101t2587-53ts needs MIPI_DSI_TURN_ON_PERIPHERAL
message to be sent from the host to be operational and thus can not be
handled by panel-simple.

Signed-off-by: Peter Ujfalusi 
Reviewed-by: Sam Ravnborg 
---
 drivers/gpu/drm/panel/Kconfig |   9 +
 drivers/gpu/drm/panel/Makefile|   1 +
 .../drm/panel/panel-osd-osd101t2587-53ts.c| 254 ++
 3 files changed, 264 insertions(+)
 create mode 100644 drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c

diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 3e070153ef21..6b6790474c33 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -122,6 +122,15 @@ config DRM_PANEL_ORISETECH_OTM8009A
  Say Y here if you want to enable support for Orise Technology
  otm8009a 480x800 dsi 2dl panel.
 
+config DRM_PANEL_OSD_OSD101T2587_53TS
+   tristate "OSD OSD101T2587-53TS DSI 1920x1200 video mode panel"
+   depends on OF
+   depends on DRM_MIPI_DSI
+   depends on BACKLIGHT_CLASS_DEVICE
+   help
+ Say Y here if you want to enable support for One Stop Displays
+ OSD101T2587-53TS 10.1" 1920x1200 dsi panel.
+
 config DRM_PANEL_PANASONIC_VVX10F034N00
tristate "Panasonic VVX10F034N00 1920x1200 video mode panel"
depends on OF
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index e7ab71968bbf..d9d99956db0c 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04) += 
panel-kingdisplay-kd097d04.o
 obj-$(CONFIG_DRM_PANEL_LG_LG4573) += panel-lg-lg4573.o
 obj-$(CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO) += panel-olimex-lcd-olinuxino.o
 obj-$(CONFIG_DRM_PANEL_ORISETECH_OTM8009A) += panel-orisetech-otm8009a.o
+obj-$(CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS) += panel-osd-osd101t2587-53ts.o
 obj-$(CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00) += 
panel-panasonic-vvx10f034n00.o
 obj-$(CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN) += 
panel-raspberrypi-touchscreen.o
 obj-$(CONFIG_DRM_PANEL_RAYDIUM_RM68200) += panel-raydium-rm68200.o
diff --git a/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c 
b/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c
new file mode 100644
index ..55974e74aa0a
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi 
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+struct osd101t2587_panel {
+   struct drm_panel base;
+   struct mipi_dsi_device *dsi;
+
+   struct backlight_device *backlight;
+   struct regulator *supply;
+
+   bool prepared;
+   bool enabled;
+
+   const struct drm_display_mode *default_mode;
+};
+
+static inline struct osd101t2587_panel *ti_osd_panel(struct drm_panel *panel)
+{
+   return container_of(panel, struct osd101t2587_panel, base);
+}
+
+static int osd101t2587_panel_disable(struct drm_panel *panel)
+{
+   struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+   int ret;
+
+   if (!osd101t2587->enabled)
+   return 0;
+
+   backlight_disable(osd101t2587->backlight);
+
+   ret = mipi_dsi_shutdown_peripheral(osd101t2587->dsi);
+
+   osd101t2587->enabled = false;
+
+   return ret;
+}
+
+static int osd101t2587_panel_unprepare(struct drm_panel *panel)
+{
+   struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+
+   if (!osd101t2587->prepared)
+   return 0;
+
+   regulator_disable(osd101t2587->supply);
+   osd101t2587->prepared = false;
+
+   return 0;
+}
+
+static int osd101t2587_panel_prepare(struct drm_panel *panel)
+{
+   struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+   int ret;
+
+   if (osd101t2587->prepared)
+   return 0;
+
+   ret = regulator_enable(osd101t2587->supply);
+   if (!ret)
+   osd101t2587->prepared = true;
+
+   return ret;
+}
+
+static int osd101t2587_panel_enable(struct drm_panel *panel)
+{
+   struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+   int ret;
+
+   if (osd101t2587->enabled)
+   return 0;
+
+   ret = mipi_dsi_turn_on_peripheral(osd101t2587->dsi);
+   if (ret)
+   return ret;
+
+   backlight_enable(osd101t2587->backlight);
+
+   osd101t2587->enabled = true;
+
+   return ret;
+}
+
+static const struct drm_display_mode default_mode_osd101t2587 = {
+   .clock = 164400,
+   .hdisplay = 1920,
+   .hsync_start = 1920 + 152,
+   .hsync_end = 1920 + 152 + 52,
+   .htotal = 1920 + 152 + 52 + 20,
+   .vdisplay = 1200,
+   .vsync_start

Re: [PATCH v2 20/26] userfaultfd: wp: support write protection for userfault vma range

2019-02-25 Thread Peter Xu

On Tue, Feb 26, 2019 at 09:46:12AM +0200, Mike Rapoport wrote:

[...]

> > > > > > +int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long 
> > > > > > start,
> > > > > > +   unsigned long len, bool enable_wp, bool 
> > > > > > *mmap_changing)
> > > > > > +{
> > > > > > +   struct vm_area_struct *dst_vma;
> > > > > > +   pgprot_t newprot;
> > > > > > +   int err;
> > > > > > +
> > > > > > +   /*
> > > > > > +* Sanitize the command parameters:
> > > > > > +*/
> > > > > > +   BUG_ON(start & ~PAGE_MASK);
> > > > > > +   BUG_ON(len & ~PAGE_MASK);
> > > > > > +
> > > > > > +   /* Does the address range wrap, or is the span zero-sized? */
> > > > > > +   BUG_ON(start + len <= start);
> > > > > 
> > > > > I'd replace these BUG_ON()s with
> > > > > 
> > > > >   if (WARN_ON())
> > > > >return -EINVAL;
> > > > 
> > > > I believe BUG_ON() is used because these parameters should have been
> > > > checked in userfaultfd_writeprotect() already by the common
> > > > validate_range() even before calling mwriteprotect_range().  So I'm
> > > > fine with the WARN_ON() approach but I'd slightly prefer to simply
> > > > keep the patch as is to keep Jerome's r-b if you won't disagree. :)
> > > 
> > > Right, userfaultfd_writeprotect() should check these parameters and if it
> > > didn't it was a bug indeed. But still, it's not severe enough to crash the
> > > kernel.
> > > 
> > > I hope Jerome wouldn't mind to keep his r-b with s/BUG_ON/WARN_ON ;-)
> > > 
> > > With this change you can also add 
> > > 
> > > Reviewed-by: Mike Rapoport 
> > 
> > Thanks!  Though before I change anything... please note that the
> > BUG_ON()s are really what we've done in existing MISSING code.  One
> > example is userfaultfd_copy() which did validate_range() first, then
> > in __mcopy_atomic() we've used BUG_ON()s.  They make sense to me
> > becauase userspace should never be able to trigger it.  And if we
> > really want to change the BUG_ON()s in this patch, IMHO we probably
> > want to change the other BUG_ON()s as well, then that can be a
> > standalone patch or patchset to address another issue...
> 
> Yeah, we have quite a lot of them, so doing the replacement in a separate
> patch makes perfect sense.
>  
> > (and if we really want to use WARN_ON, I would prefer WARN_ON_ONCE, or
> >  directly return the errors to avoid DOS).
> 
> Agree.
> 
> > I'll see how you'd prefer to see how I should move on with this patch.
> 
> Let's keep this patch as is and make the replacement on top of the WP
> series. Feel free to add r-b.

Great!  I'll do.  Thanks,

-- 
Peter Xu

[PATCH v3 1/4] dt-bindings: display: Add bindings for OSD101T2045-53TS

2019-02-25 Thread Peter Ujfalusi

This adds the device-tree bindings for the OSD101T2045-53TS 10.1"
1920x1200 panel from One Stop Displays.

Signed-off-by: Peter Ujfalusi 
Reviewed-by: Rob Herring 
---
 .../bindings/display/panel/osd,osd101t2045-53ts.txt   | 11 +++
 1 file changed, 11 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/display/panel/osd,osd101t2045-53ts.txt

diff --git 
a/Documentation/devicetree/bindings/display/panel/osd,osd101t2045-53ts.txt 
b/Documentation/devicetree/bindings/display/panel/osd,osd101t2045-53ts.txt
new file mode 100644
index ..b3f6df59f7c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/osd,osd101t2045-53ts.txt
@@ -0,0 +1,11 @@
+One Stop Displays OSD101T2045-53TS 10.1" 1920x1200 panel
+
+Required properties:
+- compatible: should be "osd,osd101t2045-53ts"
+- power-supply: as specified in the base binding
+
+Optional properties:
+- backlight: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
-- 
Peter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki

[PATCH v3 0/4] drm/panel: Support for OSD101T2045-53TS and OSD101T2587-53TS

2019-02-25 Thread Peter Ujfalusi

Hi,

Changes since v2:
- Added Reviewed-by from Rob to the binding patches
- Added help text to Kconfig (osd101t2587-53ts)
- Print the error values in dev_err/warn
- Added Reviewed-by from Sam to the osd101t2587-53ts patch

Changes since v1 (only panel-osd-osd101t2587-53ts changed):
- Removed unused members from struct osd101t2587_panel
- Use backlight_enable/backlight_disable
- Use devm_of_find_backlight()
- osd101t2587_of_match table standardized 
- osd101t2587_panel_unprepare() added to shutdown and remove callbacks to turn
  power off
- Fix probe in case mipi_dsi_attach() would fail

Add support for OSD101T2045-53TS and OSD101T2587-53TS from One Stop Displays.

The two panel is similar with one big difference: OSD101T2587-53TS requires the
MIPI_DSI_TURN_ON_PERIPHERAL message, thus can not be handled by panel-simple.

Regards,
Peter
---
Peter Ujfalusi (4):
  dt-bindings: display: Add bindings for OSD101T2045-53TS
  drm/panel: simple: Add support for OSD101T2045-53TS
  dt-bindings: display: Add bindings for OSD101T2587-53TS panel
  drm/panel: Add OSD101T2587-53TS driver

 .../display/panel/osd,osd101t2045-53ts.txt|  11 +
 .../display/panel/osd,osd101t2587-53ts.txt|  14 +
 drivers/gpu/drm/panel/Kconfig |   9 +
 drivers/gpu/drm/panel/Makefile|   1 +
 .../drm/panel/panel-osd-osd101t2587-53ts.c| 254 ++
 drivers/gpu/drm/panel/panel-simple.c  |  34 +++
 6 files changed, 323 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/display/panel/osd,osd101t2045-53ts.txt
 create mode 100644 
Documentation/devicetree/bindings/display/panel/osd,osd101t2587-53ts.txt
 create mode 100644 drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c

-- 
Peter

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki.
Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki

Re: [PATCH v2 26/26] userfaultfd: selftests: add write-protect test

2019-02-25 Thread Peter Xu

On Tue, Feb 26, 2019 at 08:58:36AM +0200, Mike Rapoport wrote:
> On Tue, Feb 12, 2019 at 10:56:32AM +0800, Peter Xu wrote:
> > This patch adds uffd tests for write protection.
> > 
> > Instead of introducing new tests for it, let's simply squashing uffd-wp
> > tests into existing uffd-missing test cases.  Changes are:
> > 
> > (1) Bouncing tests
> > 
> >   We do the write-protection in two ways during the bouncing test:
> > 
> >   - By using UFFDIO_COPY_MODE_WP when resolving MISSING pages: then
> > we'll make sure for each bounce process every single page will be
> > at least fault twice: once for MISSING, once for WP.
> > 
> >   - By direct call UFFDIO_WRITEPROTECT on existing faulted memories:
> > To further torture the explicit page protection procedures of
> > uffd-wp, we split each bounce procedure into two halves (in the
> > background thread): the first half will be MISSING+WP for each
> > page as explained above.  After the first half, we write protect
> > the faulted region in the background thread to make sure at least
> > half of the pages will be write protected again which is the first
> > half to test the new UFFDIO_WRITEPROTECT call.  Then we continue
> > with the 2nd half, which will contain both MISSING and WP faulting
> > tests for the 2nd half and WP-only faults from the 1st half.
> > 
> > (2) Event/Signal test
> > 
> >   Mostly previous tests but will do MISSING+WP for each page.  For
> >   sigbus-mode test we'll need to provide standalone path to handle the
> >   write protection faults.
> > 
> > For all tests, do statistics as well for uffd-wp pages.
> > 
> > Signed-off-by: Peter Xu 
> > ---
> >  tools/testing/selftests/vm/userfaultfd.c | 154 ++-
> >  1 file changed, 126 insertions(+), 28 deletions(-)
> > 
> > diff --git a/tools/testing/selftests/vm/userfaultfd.c 
> > b/tools/testing/selftests/vm/userfaultfd.c
> > index e5d12c209e09..57b5ac02080a 100644
> > --- a/tools/testing/selftests/vm/userfaultfd.c
> > +++ b/tools/testing/selftests/vm/userfaultfd.c
> > @@ -56,6 +56,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > 
> >  #include "../kselftest.h"
> > 
> > @@ -78,6 +79,8 @@ static int test_type;
> >  #define ALARM_INTERVAL_SECS 10
> >  static volatile bool test_uffdio_copy_eexist = true;
> >  static volatile bool test_uffdio_zeropage_eexist = true;
> > +/* Whether to test uffd write-protection */
> > +static bool test_uffdio_wp = false;
> > 
> >  static bool map_shared;
> >  static int huge_fd;
> > @@ -92,6 +95,7 @@ pthread_attr_t attr;
> >  struct uffd_stats {
> > int cpu;
> > unsigned long missing_faults;
> > +   unsigned long wp_faults;
> >  };
> > 
> >  /* pthread_mutex_t starts at page offset 0 */
> > @@ -141,9 +145,29 @@ static void uffd_stats_reset(struct uffd_stats 
> > *uffd_stats,
> > for (i = 0; i < n_cpus; i++) {
> > uffd_stats[i].cpu = i;
> > uffd_stats[i].missing_faults = 0;
> > +   uffd_stats[i].wp_faults = 0;
> > }
> >  }
> > 
> > +static void uffd_stats_report(struct uffd_stats *stats, int n_cpus)
> > +{
> > +   int i;
> > +   unsigned long long miss_total = 0, wp_total = 0;
> > +
> > +   for (i = 0; i < n_cpus; i++) {
> > +   miss_total += stats[i].missing_faults;
> > +   wp_total += stats[i].wp_faults;
> > +   }
> > +
> > +   printf("userfaults: %llu missing (", miss_total);
> > +   for (i = 0; i < n_cpus; i++)
> > +   printf("%lu+", stats[i].missing_faults);
> > +   printf("\b), %llu wp (", wp_total);
> > +   for (i = 0; i < n_cpus; i++)
> > +   printf("%lu+", stats[i].wp_faults);
> > +   printf("\b)\n");
> > +}
> > +
> >  static int anon_release_pages(char *rel_area)
> >  {
> > int ret = 0;
> > @@ -264,19 +288,15 @@ struct uffd_test_ops {
> > void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
> >  };
> > 
> > -#define ANON_EXPECTED_IOCTLS   ((1 << _UFFDIO_WAKE) | \
> > -(1 << _UFFDIO_COPY) | \
> > -(1 << _UFFDIO_ZEROPAGE))
> > -
> >  static struct uffd_test_ops anon_uffd_test_ops = {
> > -   .expected_ioctls = ANON_EXPECTED_IOCTLS,
> > +   .expected_ioctls = UFFD_API_RANGE_IOCTLS,
> > .allocate_area  = anon_allocate_area,
> > .release_pages  = anon_release_pages,
> > .alias_mapping = noop_alias_mapping,
> >  };
> > 
> >  static struct uffd_test_ops shmem_uffd_test_ops = {
> > -   .expected_ioctls = ANON_EXPECTED_IOCTLS,
> > +   .expected_ioctls = UFFD_API_RANGE_IOCTLS,
> 
> Isn't UFFD_API_RANGE_IOCTLS includes UFFDIO_WP which is not supported for
> shmem?

Yes it didn't fail the test case probably because the test case only
registers the shmem region with UFFDIO_REGISTER_MODE_MISSING, and for
now we'll simply blindly return the _UFFDIO_WRITEPROTECT capability if
the register ioctl succeeded.  However it'll still fail the
UFFDIO_REGISTER ioctl directly if someone requests with

Re: [PATCH net-next] can: kvaser_usb: Use struct_size() in alloc_candev()

2019-02-25 Thread Marc Kleine-Budde

On 2/8/19 4:10 AM, Gustavo A. R. Silva wrote:
> One of the more common cases of allocation size calculations is finding
> the size of a structure that has a zero-sized array at the end, along
> with memory for some number of elements for that array. For example:
> 
> struct foo {
> int stuff;
> void *entry[];
> };
> 
> instance = alloc(sizeof(struct foo) + count * sizeof(void *));
> 
> Instead of leaving these open-coded and prone to type mistakes, we can
> now use the new struct_size() helper:
> 
> instance = alloc(struct_size(instance, entry, count));
> 
> This code was detected with the help of Coccinelle.
> 
> Signed-off-by: Gustavo A. R. Silva 

Applied to linux-can-next.

Tnx,
Marc

-- 
Pengutronix e.K.  | Marc Kleine-Budde   |
Industrial Linux Solutions| Phone: +49-231-2826-924 |
Vertretung West/Dortmund  | Fax:   +49-5121-206917- |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |

VERY URGENT.

2019-02-25 Thread Karim Zakari

Dear  Friend,

  I know that this letter will come to you as surprise, I got your contact 
address while I search for foreign partner to assist me in  this business 
transaction that is present in our favor now, My name is Mr. KARIM  ZAKARI, I 
am the Bill and Exchange (assistant)  Manager (BOA) BANK OF AFRICA. I'm 
proposing to lift in your name (US$16.5 Million Dollars) that belong to our 
later customer, MR.  GORPUN VLADIMIR From Saratov Oblast Russia who died in 
Siber airline that crashed into sea  at Isreal on 4th October 2001.

I want to present you to my bank here as the beneficiary to this fund and I Am 
waiting for your response for more details, As you are  willing to execute this 
business appointunity with me.

Thanks,
Yours Sincerely,
Mr. Karim Zakari.

Re: [PATCH v2 20/26] userfaultfd: wp: support write protection for userfault vma range

2019-02-25 Thread Mike Rapoport

On Tue, Feb 26, 2019 at 03:20:28PM +0800, Peter Xu wrote:
> On Tue, Feb 26, 2019 at 08:43:47AM +0200, Mike Rapoport wrote:
> > On Tue, Feb 26, 2019 at 02:06:27PM +0800, Peter Xu wrote:
> > > On Mon, Feb 25, 2019 at 10:52:34PM +0200, Mike Rapoport wrote:
> > > > On Tue, Feb 12, 2019 at 10:56:26AM +0800, Peter Xu wrote:
> > > > > From: Shaohua Li 
> > > > > 
> > > > > Add API to enable/disable writeprotect a vma range. Unlike mprotect,
> > > > > this doesn't split/merge vmas.
> > > > > 
> > > > > Cc: Andrea Arcangeli 
> > > > > Cc: Rik van Riel 
> > > > > Cc: Kirill A. Shutemov 
> > > > > Cc: Mel Gorman 
> > > > > Cc: Hugh Dickins 
> > > > > Cc: Johannes Weiner 
> > > > > Signed-off-by: Shaohua Li 
> > > > > Signed-off-by: Andrea Arcangeli 
> > > > > [peterx:
> > > > >  - use the helper to find VMA;
> > > > >  - return -ENOENT if not found to match mcopy case;
> > > > >  - use the new MM_CP_UFFD_WP* flags for change_protection
> > > > >  - check against mmap_changing for failures]
> > > > > Signed-off-by: Peter Xu 
> > > > > ---
> > > > >  include/linux/userfaultfd_k.h |  3 ++
> > > > >  mm/userfaultfd.c  | 54 
> > > > > +++
> > > > >  2 files changed, 57 insertions(+)
> > > > > 
> > > > > diff --git a/include/linux/userfaultfd_k.h 
> > > > > b/include/linux/userfaultfd_k.h
> > > > > index 765ce884cec0..8f6e6ed544fb 100644
> > > > > --- a/include/linux/userfaultfd_k.h
> > > > > +++ b/include/linux/userfaultfd_k.h
> > > > > @@ -39,6 +39,9 @@ extern ssize_t mfill_zeropage(struct mm_struct 
> > > > > *dst_mm,
> > > > > unsigned long dst_start,
> > > > > unsigned long len,
> > > > > bool *mmap_changing);
> > > > > +extern int mwriteprotect_range(struct mm_struct *dst_mm,
> > > > > +unsigned long start, unsigned long len,
> > > > > +bool enable_wp, bool *mmap_changing);
> > > > > 
> > > > >  /* mm helpers */
> > > > >  static inline bool is_mergeable_vm_userfaultfd_ctx(struct 
> > > > > vm_area_struct *vma,
> > > > > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> > > > > index fefa81c301b7..529d180bb4d7 100644
> > > > > --- a/mm/userfaultfd.c
> > > > > +++ b/mm/userfaultfd.c
> > > > > @@ -639,3 +639,57 @@ ssize_t mfill_zeropage(struct mm_struct *dst_mm, 
> > > > > unsigned long start,
> > > > >  {
> > > > >   return __mcopy_atomic(dst_mm, start, 0, len, true, 
> > > > > mmap_changing, 0);
> > > > >  }
> > > > > +
> > > > > +int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long 
> > > > > start,
> > > > > + unsigned long len, bool enable_wp, bool 
> > > > > *mmap_changing)
> > > > > +{
> > > > > + struct vm_area_struct *dst_vma;
> > > > > + pgprot_t newprot;
> > > > > + int err;
> > > > > +
> > > > > + /*
> > > > > +  * Sanitize the command parameters:
> > > > > +  */
> > > > > + BUG_ON(start & ~PAGE_MASK);
> > > > > + BUG_ON(len & ~PAGE_MASK);
> > > > > +
> > > > > + /* Does the address range wrap, or is the span zero-sized? */
> > > > > + BUG_ON(start + len <= start);
> > > > 
> > > > I'd replace these BUG_ON()s with
> > > > 
> > > > if (WARN_ON())
> > > >  return -EINVAL;
> > > 
> > > I believe BUG_ON() is used because these parameters should have been
> > > checked in userfaultfd_writeprotect() already by the common
> > > validate_range() even before calling mwriteprotect_range().  So I'm
> > > fine with the WARN_ON() approach but I'd slightly prefer to simply
> > > keep the patch as is to keep Jerome's r-b if you won't disagree. :)
> > 
> > Right, userfaultfd_writeprotect() should check these parameters and if it
> > didn't it was a bug indeed. But still, it's not severe enough to crash the
> > kernel.
> > 
> > I hope Jerome wouldn't mind to keep his r-b with s/BUG_ON/WARN_ON ;-)
> > 
> > With this change you can also add 
> > 
> > Reviewed-by: Mike Rapoport 
> 
> Thanks!  Though before I change anything... please note that the
> BUG_ON()s are really what we've done in existing MISSING code.  One
> example is userfaultfd_copy() which did validate_range() first, then
> in __mcopy_atomic() we've used BUG_ON()s.  They make sense to me
> becauase userspace should never be able to trigger it.  And if we
> really want to change the BUG_ON()s in this patch, IMHO we probably
> want to change the other BUG_ON()s as well, then that can be a
> standalone patch or patchset to address another issue...

Yeah, we have quite a lot of them, so doing the replacement in a separate
patch makes perfect sense.
 
> (and if we really want to use WARN_ON, I would prefer WARN_ON_ONCE, or
>  directly return the errors to avoid DOS).

Agree.

> I'll see how you'd prefer to see how I should move on with this patch.

Let's keep this patch as is and make the replacement on top of the WP
series. Feel free to add r-b.
 
> Thanks,
> 
> -- 
> Peter Xu
> 

--

Re: [PATCH] huegtlbfs: fix races and page leaks during migration

2019-02-25 Thread Naoya Horiguchi

Hi Mike,

On Thu, Feb 21, 2019 at 11:11:06AM -0800, Mike Kravetz wrote:
> On 2/20/19 10:09 PM, Andrew Morton wrote:
> > On Tue, 12 Feb 2019 14:14:00 -0800 Mike Kravetz  
> > wrote:
> >> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> >> index a80832487981..f859e319e3eb 100644
> >> --- a/mm/hugetlb.c
> >> +++ b/mm/hugetlb.c

...

> >> @@ -3863,6 +3862,10 @@ static vm_fault_t hugetlb_no_page(struct mm_struct 
> >> *mm,
> >>}
> >>  
> >>spin_unlock(ptl);
> >> +
> >> +  /* May already be set if not newly allocated page */
> >> +  set_page_huge_active(page);
> >> +
> 
> This is wrong.  We need to only set_page_huge_active() for newly allocated
> pages.  Why?  We could have got the page from the pagecache, and it could
> be that the page is !page_huge_active() because it has been isolated for
> migration.  Therefore, we do not want to set it active here.
> 
> I have also found another race with migration when removing a page from
> a file.  When a huge page is removed from the pagecache, the page_mapping()
> field is cleared yet page_private continues to point to the subpool until
> the page is actually freed by free_huge_page().  free_huge_page is what
> adjusts the counts for the subpool.  A page could be migrated while in this
> state.  However, since page_mapping() is not set the hugetlbfs specific
> routine to transfer page_private is not called and we leak the page count
> in the filesystem.  To fix, check for this condition before migrating a huge
> page.  If the condition is detected, return EBUSY for the page.
> 
> Both issues are addressed in the updated patch below.
> 
> Sorry for the churn.  As I find and fix one issue I seem to discover another.
> There is still at least one more issue with private pages when COW comes into
> play.  I continue to work that.  I wanted to send this patch earlier as it
> is pretty easy to hit the bugs if you try.  If you would prefer another
> approach, let me know.
> 
> From: Mike Kravetz 
> Date: Thu, 21 Feb 2019 11:01:04 -0800
> Subject: [PATCH] huegtlbfs: fix races and page leaks during migration

Subject still contains a typo.

> 
> hugetlb pages should only be migrated if they are 'active'.  The routines
> set/clear_page_huge_active() modify the active state of hugetlb pages.
> When a new hugetlb page is allocated at fault time, set_page_huge_active
> is called before the page is locked.  Therefore, another thread could
> race and migrate the page while it is being added to page table by the
> fault code.  This race is somewhat hard to trigger, but can be seen by
> strategically adding udelay to simulate worst case scheduling behavior.
> Depending on 'how' the code races, various BUG()s could be triggered.
> 
> To address this issue, simply delay the set_page_huge_active call until
> after the page is successfully added to the page table.
> 
> Hugetlb pages can also be leaked at migration time if the pages are
> associated with a file in an explicitly mounted hugetlbfs filesystem.
> For example, consider a two node system with 4GB worth of huge pages
> available.  A program mmaps a 2G file in a hugetlbfs filesystem.  It
> then migrates the pages associated with the file from one node to
> another.  When the program exits, huge page counts are as follows:
> 
> node0
> 1024free_hugepages
> 1024nr_hugepages
> 
> node1
> 0   free_hugepages
> 1024nr_hugepages
> 
> Filesystem Size  Used Avail Use% Mounted on
> nodev  4.0G  2.0G  2.0G  50% /var/opt/hugepool
> 
> That is as expected.  2G of huge pages are taken from the free_hugepages
> counts, and 2G is the size of the file in the explicitly mounted filesystem.
> If the file is then removed, the counts become:
> 
> node0
> 1024free_hugepages
> 1024nr_hugepages
> 
> node1
> 1024free_hugepages
> 1024nr_hugepages
> 
> Filesystem Size  Used Avail Use% Mounted on
> nodev  4.0G  2.0G  2.0G  50% /var/opt/hugepool
> 
> Note that the filesystem still shows 2G of pages used, while there
> actually are no huge pages in use.  The only way to 'fix' the
> filesystem accounting is to unmount the filesystem
> 
> If a hugetlb page is associated with an explicitly mounted filesystem,
> this information in contained in the page_private field.  At migration
> time, this information is not preserved.  To fix, simply transfer
> page_private from old to new page at migration time if necessary.
> 
> There is a related race with removing a huge page from a file migration.
> When a huge page is removed from the pagecache, the page_mapping() field
> is cleared yet page_private remains set until the page is actually freed
> by free_huge_page().  A page could be migrated while in this state.
> However, since page_mapping() is not set the hugetlbfs specific routine
> to transfer page_private is not called and we leak the page count in the
> filesystem.  To fix, check for this condition before migrating a huge
>

Re: [PATCH v2 24/26] userfaultfd: wp: UFFDIO_REGISTER_MODE_WP documentation update

2019-02-25 Thread Peter Xu

On Tue, Feb 26, 2019 at 09:04:25AM +0200, Mike Rapoport wrote:
> On Tue, Feb 26, 2019 at 02:53:42PM +0800, Peter Xu wrote:
> > On Mon, Feb 25, 2019 at 11:19:32PM +0200, Mike Rapoport wrote:
> > > On Tue, Feb 12, 2019 at 10:56:30AM +0800, Peter Xu wrote:
> > > > From: Martin Cracauer 
> > > > 
> > > > Adds documentation about the write protection support.
> > > > 
> > > > Signed-off-by: Andrea Arcangeli 
> > > > [peterx: rewrite in rst format; fixups here and there]
> > > > Signed-off-by: Peter Xu 
> > > 
> > > Reviewed-by: Mike Rapoport 
> > > 
> > > Peter, can you please also update the man pages (1, 2)?
> > > 
> > > [1] http://man7.org/linux/man-pages/man2/userfaultfd.2.html
> > > [2] http://man7.org/linux/man-pages/man2/ioctl_userfaultfd.2.html
> > 
> > Sure.  Should I post the man patches after the kernel part is merged?
> 
> Yep, once we know for sure what's the API kernel will expose.

I see, thanks.  Then I'll probably wait until the series got merged to
be safe since so far we still have discussion on the interfaces
(especially the DONTWAKE flags).

-- 
Peter Xu

Re: [PATCH v2 23/26] userfaultfd: wp: don't wake up when doing write protect

2019-02-25 Thread Peter Xu

On Tue, Feb 26, 2019 at 09:29:33AM +0200, Mike Rapoport wrote:
> On Tue, Feb 26, 2019 at 02:24:52PM +0800, Peter Xu wrote:
> > On Mon, Feb 25, 2019 at 11:09:35PM +0200, Mike Rapoport wrote:
> > > On Tue, Feb 12, 2019 at 10:56:29AM +0800, Peter Xu wrote:
> > > > It does not make sense to try to wake up any waiting thread when we're
> > > > write-protecting a memory region.  Only wake up when resolving a write
> > > > protected page fault.
> > > > 
> > > > Signed-off-by: Peter Xu 
> > > > ---
> > > >  fs/userfaultfd.c | 13 -
> > > >  1 file changed, 8 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> > > > index 81962d62520c..f1f61a0278c2 100644
> > > > --- a/fs/userfaultfd.c
> > > > +++ b/fs/userfaultfd.c
> > > > @@ -1771,6 +1771,7 @@ static int userfaultfd_writeprotect(struct 
> > > > userfaultfd_ctx *ctx,
> > > > struct uffdio_writeprotect uffdio_wp;
> > > > struct uffdio_writeprotect __user *user_uffdio_wp;
> > > > struct userfaultfd_wake_range range;
> > > > +   bool mode_wp, mode_dontwake;
> > > > 
> > > > if (READ_ONCE(ctx->mmap_changing))
> > > > return -EAGAIN;
> > > > @@ -1789,18 +1790,20 @@ static int userfaultfd_writeprotect(struct 
> > > > userfaultfd_ctx *ctx,
> > > > if (uffdio_wp.mode & ~(UFFDIO_WRITEPROTECT_MODE_DONTWAKE |
> > > >UFFDIO_WRITEPROTECT_MODE_WP))
> > > > return -EINVAL;
> > > > -   if ((uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP) &&
> > > > -(uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_DONTWAKE))
> > > > +
> > > > +   mode_wp = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP;
> > > > +   mode_dontwake = uffdio_wp.mode & 
> > > > UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
> > > > +
> > > > +   if (mode_wp && mode_dontwake)
> > > > return -EINVAL;
> > > 
> > > This actually means the opposite of the commit message text ;-)
> > > 
> > > Is any dependency of _WP and _DONTWAKE needed at all?
> > 
> > So this is indeed confusing at least, because both you and Jerome have
> > asked the same question... :)
> > 
> > My understanding is that we don't have any reason to wake up any
> > thread when we are write-protecting a range, in that sense the flag
> > UFFDIO_WRITEPROTECT_MODE_DONTWAKE is already meaningless in the
> > UFFDIO_WRITEPROTECT ioctl context.  So before everything here's how
> > these flags are defined:
> > 
> > struct uffdio_writeprotect {
> > struct uffdio_range range;
> > /* !WP means undo writeprotect. DONTWAKE is valid only with !WP */
> > #define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0)
> > #define UFFDIO_WRITEPROTECT_MODE_DONTWAKE   ((__u64)1<<1)
> > __u64 mode;
> > };
> > 
> > To make it clear, we simply define it as "DONTWAKE is valid only with
> > !WP".  When with that, "mode_wp && mode_dontwake" is indeed a
> > meaningless flag combination.  Though please note that it does not
> > mean that the operation ("don't wake up the thread") is meaningless -
> > that's what we'll do no matter what when WP==1.  IMHO it's only about
> > the interface not the behavior.
> > 
> > I don't have a good way to make this clearer because firstly we'll
> > need the WP flag to mark whether we're protecting or unprotecting the
> > pages.  Later on, we need DONTWAKE for page fault handling case to
> > mark that we don't want to wake up the waiting thread now.  So both
> > the flags have their reason to stay so far.  Then with all these in
> > mind what I can think of is only to forbid using DONTWAKE in WP case,
> > and that's how above definition comes (I believe, because it was
> > defined that way even before I started to work on it and I think it
> > makes sense).
> 
> There's no argument how DONTWAKE can be used with !WP. The
> userfaultfd_writeprotect() is called in response of the uffd monitor to WP
> page fault, it asks to clear write protection to some range, but it does
> not want to wake the faulting thread yet but rather it will use uffd_wake()
> later.
> 
> Still, I can't grok the usage of DONTWAKE with WP=1. In my understanding,
> in this case userfaultfd_writeprotect() is called unrelated to page faults,
> and the monitored thread runs freely, so why it should be waked at all?

Exactly this is how I understand it.  And that's why I wrote this
patch to remove the extra wakeup() since I think it's unecessary.

> 
> And what happens, if the thread is waiting on a missing page fault and we
> do userfaultfd_writeprotect(WP=1) at the same time?

Then IMHO the userfaultfd_writeprotect() will be a noop simply because
the page is still missing.  Here if with the old code (before this
patch) we'll probably even try to wake up this thread but this thread
should just fault again on the same address due to the fact that the
page is missing.  After this patch the monitored thread should
continue to wait on the missing page.

Thanks,

-- 
Peter Xu

Re: [PATCH 1/5] mm/resource: return real error codes from walk failures

2019-02-25 Thread Christophe Leroy





Le 25/02/2019 à 19:57, Dave Hansen a écrit :

From: Dave Hansen 

walk_system_ram_range() can return an error code either becuase
*it* failed, or because the 'func' that it calls returned an
error.  The memory hotplug does the following:

ret = walk_system_ram_range(..., func);
 if (ret)
return ret;

and 'ret' makes it out to userspace, eventually.  The problem
s, walk_system_ram_range() failues that result from *it* failing
(as opposed to 'func') return -1.  That leads to a very odd
-EPERM (-1) return code out to userspace.

Make walk_system_ram_range() return -EINVAL for internal
failures to keep userspace less confused.

This return code is compatible with all the callers that I
audited.

This changes both the generic mm/ and powerpc-specific
implementations to have the same return value.

Signed-off-by: Dave Hansen 
Reviewed-by: Bjorn Helgaas 
Acked-by: Michael Ellerman  (powerpc)
Cc: Dan Williams 
Cc: Dave Jiang 
Cc: Ross Zwisler 
Cc: Vishal Verma 
Cc: Tom Lendacky 
Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: linux-nvd...@lists.01.org
Cc: linux-kernel@vger.kernel.org
Cc: linux...@kvack.org
Cc: Huang Ying 
Cc: Fengguang Wu 
Cc: Borislav Petkov 
Cc: Yaowei Bai 
Cc: Takashi Iwai 
Cc: Jerome Glisse 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: linuxppc-...@lists.ozlabs.org
Cc: Keith Busch 
---

  b/arch/powerpc/mm/mem.c |2 +-


walk_system_ram_range() was droped in commit 
https://git.kernel.orghttps://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=26b523356f49a0117c8f9e32ca98aa6d6e496e1a


Christophe


  b/kernel/resource.c |4 ++--
  2 files changed, 3 insertions(+), 3 deletions(-)

diff -puN 
arch/powerpc/mm/mem.c~memory-hotplug-walk_system_ram_range-returns-neg-1 
arch/powerpc/mm/mem.c
--- a/arch/powerpc/mm/mem.c~memory-hotplug-walk_system_ram_range-returns-neg-1  
2019-02-25 10:56:47.452908034 -0800
+++ b/arch/powerpc/mm/mem.c 2019-02-25 10:56:47.458908034 -0800
@@ -189,7 +189,7 @@ walk_system_ram_range(unsigned long star
struct memblock_region *reg;
unsigned long end_pfn = start_pfn + nr_pages;
unsigned long tstart, tend;
-   int ret = -1;
+   int ret = -EINVAL;
  
  	for_each_memblock(memory, reg) {

tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
diff -puN kernel/resource.c~memory-hotplug-walk_system_ram_range-returns-neg-1 
kernel/resource.c
--- a/kernel/resource.c~memory-hotplug-walk_system_ram_range-returns-neg-1  
2019-02-25 10:56:47.454908034 -0800
+++ b/kernel/resource.c 2019-02-25 10:56:47.459908034 -0800
@@ -382,7 +382,7 @@ static int __walk_iomem_res_desc(resourc
 int (*func)(struct resource *, void *))
  {
struct resource res;
-   int ret = -1;
+   int ret = -EINVAL;
  
  	while (start < end &&

   !find_next_iomem_res(start, end, flags, desc, first_lvl, )) {
@@ -462,7 +462,7 @@ int walk_system_ram_range(unsigned long
unsigned long flags;
struct resource res;
unsigned long pfn, end_pfn;
-   int ret = -1;
+   int ret = -EINVAL;
  
  	start = (u64) start_pfn << PAGE_SHIFT;

end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
_

Re: general protection fault in tls_push_sg

2019-02-25 Thread Eric Biggers

On Tue, Jun 19, 2018 at 10:34:01PM -0700, syzbot wrote:
> Hello,
> 
> syzbot found the following crash on:
> 
> HEAD commit:ba4dbdedd3ed Merge tag 'jfs-4.18' of git://github.com/klei..
> git tree:   upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=112e9ce440
> kernel config:  https://syzkaller.appspot.com/x/.config?x=f390986c4f7cd566
> dashboard link: https://syzkaller.appspot.com/bug?extid=54bcc120da8da091d609
> compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
> 
> Unfortunately, I don't have any reproducer for this crash yet.
> 
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+54bcc120da8da091d...@syzkaller.appspotmail.com
> 
> netlink: 8 bytes leftover after parsing attributes in process
> `syz-executor0'.
> kasan: CONFIG_KASAN_INLINE enabled
> kasan: GPF could be caused by NULL-ptr deref or user memory access
> general protection fault:  [#1] SMP KASAN
> CPU: 1 PID: 27979 Comm: syz-executor6 Not tainted 4.18.0-rc1+ #109
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> RIP: 0010:__read_once_size include/linux/compiler.h:188 [inline]
> RIP: 0010:compound_head include/linux/page-flags.h:142 [inline]
> RIP: 0010:put_page include/linux/mm.h:911 [inline]
> RIP: 0010:tls_push_sg+0x2a3/0x880 net/tls/tls_main.c:142
> Code: fa 4d 39 e5 75 a2 e8 bc 50 f1 fa 48 8b 85 08 ff ff ff 49 8d 7f 08 48
> b9 00 00 00 00 00 fc ff df c6 00 00 48 89 f8 48 c1 e8 03 <80> 3c 08 00 0f 85
> 50 05 00 00 48 8b 85 08 ff ff ff 49 8b 5f 08 80
> RSP: 0018:8801c5776d90 EFLAGS: 00010202
> RAX: 0001 RBX:  RCX: dc00
> RDX:  RSI: 868a59e4 RDI: 0008
> RBP: 8801c5776eb0 R08: 88018e4fc6c0 R09: 8801c5776668
> R10: 0003 R11: 0002 R12: 
> R13:  R14:  R15: 
> FS:  7f2d08c17700() GS:8801daf0() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: 1cc0 CR3: 000188ce8000 CR4: 001406e0
> DR0:  DR1:  DR2: 
> DR3:  DR6: fffe0ff0 DR7: 0400
> Call Trace:
>  tls_push_record+0xaec/0x1400 net/tls/tls_sw.c:264
>  tls_sw_push_pending_record+0x22/0x30 net/tls/tls_sw.c:276
>  tls_handle_open_record net/tls/tls_main.c:164 [inline]
>  tls_sk_proto_close+0x74c/0xae0 net/tls/tls_main.c:264
>  inet_release+0x104/0x1f0 net/ipv4/af_inet.c:427
>  inet6_release+0x50/0x70 net/ipv6/af_inet6.c:459
>  __sock_release+0xd7/0x260 net/socket.c:603
>  sock_close+0x19/0x20 net/socket.c:1186
>  __fput+0x35b/0x8b0 fs/file_table.c:209
>  fput+0x15/0x20 fs/file_table.c:243
>  task_work_run+0x1ec/0x2a0 kernel/task_work.c:113
>  exit_task_work include/linux/task_work.h:22 [inline]
>  do_exit+0x1b08/0x2750 kernel/exit.c:865
>  do_group_exit+0x177/0x440 kernel/exit.c:968
>  get_signal+0x88e/0x1970 kernel/signal.c:2468
>  do_signal+0x9c/0x21c0 arch/x86/kernel/signal.c:816
>  exit_to_usermode_loop+0x2de/0x370 arch/x86/entry/common.c:162
>  prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
>  syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
>  do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293
>  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x455b29
> Code: 1d ba fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7
> 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff
> 0f 83 eb b9 fb ff c3 66 2e 0f 1f 84 00 00 00 00
> RSP: 002b:7f2d08c16ce8 EFLAGS: 0246 ORIG_RAX: 00ca
> RAX: fe00 RBX: 0072bec8 RCX: 00455b29
> RDX:  RSI:  RDI: 0072bec8
> RBP: 0072bec8 R08: 0033 R09: 0072bea0
> R10:  R11: 0246 R12: 
> R13: 00a3e81f R14: 7f2d08c179c0 R15: 
> Modules linked in:
> Dumping ftrace buffer:
>(ftrace buffer empty)
> ---[ end trace d9dfd7279b1a9c99 ]---
> RIP: 0010:__read_once_size include/linux/compiler.h:188 [inline]
> RIP: 0010:compound_head include/linux/page-flags.h:142 [inline]
> RIP: 0010:put_page include/linux/mm.h:911 [inline]
> RIP: 0010:tls_push_sg+0x2a3/0x880 net/tls/tls_main.c:142
> 
> 
> ---
> This bug is generated by a bot. It may contain errors.
> See https://goo.gl/tpsmEJ for more information about syzbot.
> syzbot engineers can be reached at syzkal...@googlegroups.com.
> 
> syzbot will keep track of this bug report. See:
> https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with
> syzbot.
> 

(As with the other reports of this...)

AFAICS this was fixed by this commit:

commit d829e9c4112b52f4f00195900fd4c685f61365ab
Author: Daniel Borkmann 
Date:   Sat Oct 13 02:45:59 2018 +0200

tls: convert to generic

linux-next: build failure after merge of the akpm tree

2019-02-25 Thread Stephen Rothwell

Hi Andrew,

After merging the akpm tree, today's linux-next build (powerpc
allnoconfig) failed like this:

/home/sfr/next/next/arch/powerpc/kernel/setup_32.c:176:21: error: redefinition 
of 'alloc_stack'
 static void *__init alloc_stack(void)
 ^~~
/home/sfr/next/next/arch/powerpc/kernel/setup_32.c:165:21: note: previous 
definition of 'alloc_stack' was here
 static void *__init alloc_stack(void)
 ^~~

Caused by patch

  "powerpc: use memblock functions returning virtual address"

from the akpm tree interacting with commit

  c8e409a33cf8 ("powerpc/irq: use memblock functions returning virtual address")

from the powerpc tree.

Both patches added the alloc_stack() function and git resolved it by
adding both. :-(  I have added a patch to remove one of them.



-- 
Cheers,
Stephen Rothwell


pgpyFNi5KgXaz.pgp
Description: OpenPGP digital signature

Re: [PATCH 3/4] mb12x2.c: add mb12x2 ultrasonic distance iio sensor

2019-02-25 Thread Peter Meerwald-Stadler

On Sun, 24 Feb 2019, Andreas Klinger wrote:

comments below

> Add MaxSonar-I2CXL ultrasonic distance sensors of type family mb12x2
> using the i2c interface
> 
> Implemented functionality:
> - reading the distance via in_distance_raw
> - buffered mode with trigger
> - make use of status gpio to announce completion of ranging
> 
> Signed-off-by: Andreas Klinger 
> ---
>  drivers/iio/proximity/mb12x2.c | 283 
> +
>  1 file changed, 283 insertions(+)
>  create mode 100644 drivers/iio/proximity/mb12x2.c
> 
> diff --git a/drivers/iio/proximity/mb12x2.c b/drivers/iio/proximity/mb12x2.c
> new file mode 100644
> index ..0c052fde94b4
> --- /dev/null
> +++ b/drivers/iio/proximity/mb12x2.c
> @@ -0,0 +1,283 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * mb12x2.c - Support for I2CXL-MaxSonar-EZ series ultrasonic ranger with
> + *   i2c interface
> + * actually supported are mb12x2 types
> + *
> + * Copyright (c) 2019 Andreas Klinger 
> + *
> + * For details about the device see:
> + * https://www.maxbotix.com/documents/I2CXL-MaxSonar-EZ_Datasheet.pdf
> + *
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/* registers of MaxSonar device */
> +#define MB12X2_RANGE_COMMAND 0x51/* Command for reading range */
> +#define MB12X2_ADDR_UNLOCK_1 0xAA/* Command 1 for changing address */
> +#define MB12X2_ADDR_UNLOCK_2 0xA5/* Command 2 for changing address */
> +
> +struct mb12x2_data {
> + struct i2c_client   *client;
> +
> + struct mutexlock;
> +
> + /*
> +  * optionally a gpio can be used to announce when ranging has
> +  * finished
> +  */
> + struct completion   ranging;
> + struct gpio_desc*gpiod_status;
> + int irqnr;
> +
> + /*
> +  * triggered buffer
> +  * 1x16-bit channel + 3x16 padding + 4x16 timestamp
> +  */
> + s16 buffer[8];
> +};
> +
> +static irqreturn_t mb12x2_handle_irq(int irq, void *dev_id)
> +{
> + struct iio_dev *indio_dev = dev_id;
> + struct mb12x2_data *data = iio_priv(indio_dev);
> +
> + /* double check to make sure data is now available */
> + if (!gpiod_get_value(data->gpiod_status))
> + complete(>ranging);
> +
> + return IRQ_HANDLED;
> +}
> +
> +static int mb12x2_read_distance(struct mb12x2_data *data)
> +{
> + struct i2c_client *client = data->client;
> + int ret;
> + int distance;
> + unsigned char buf[2];

use __le16?

> +
> + mutex_lock(>lock);
> +
> + reinit_completion(>ranging);
> +
> + ret = i2c_smbus_write_byte(client, MB12X2_RANGE_COMMAND);
> + if (ret < 0) {
> + dev_err(>dev, "write command - err: %d\n", ret);
> + mutex_unlock(>lock);
> + return ret;
> + }
> +
> + if (data->gpiod_status) {
> + /* it cannot take more than 100 ms */
> + ret = wait_for_completion_killable_timeout(>ranging,
> + HZ/10);
> + if (ret < 0) {
> + mutex_unlock(>lock);
> + return ret;
> + } else if (ret == 0) {
> + mutex_unlock(>lock);
> + return -ETIMEDOUT;
> + }
> + } else {
> + /*
> +  * use simple sleep if gpio announce pin is not connected
> +  */
> + msleep(15);
> + }
> +
> +
> + ret = i2c_master_recv(client, buf, sizeof(buf));
> + if (ret < 0) {
> + dev_err(>dev, "i2c_master_recv: ret=%d\n", ret);
> + mutex_unlock(>lock);
> + return ret;
> + }
> +
> + distance = buf[0]<<8 | buf[1];

__le16_to_cpu()

> +
> + mutex_unlock(>lock);
> +
> + return distance;
> +}
> +
> +static irqreturn_t mb12x2_trigger_handler(int irq, void *p)
> +{
> + struct iio_poll_func *pf = p;
> + struct iio_dev *indio_dev = pf->indio_dev;
> + struct mb12x2_data *data = iio_priv(indio_dev);
> + s16 sensor_data;
> +
> + sensor_data = mb12x2_read_distance(data);

_read_distance() returns int, not s16
what if the distance is >= 0x8000?

> + if (sensor_data < 0)
> + goto err;
> +
> + mutex_lock(>lock);
> +
> + data->buffer[0] = sensor_data;
> + iio_push_to_buffers_with_timestamp(indio_dev,
> + data->buffer, pf->timestamp);
> +
> + mutex_unlock(>lock);
> +err:
> + iio_trigger_notify_done(indio_dev->trig);
> + return IRQ_HANDLED;
> +}
> +
> +static int mb12x2_read_raw(struct iio_dev *indio_dev,
> + struct iio_chan_spec const *channel, int *val,
> + int *val2, long mask)
> +{
> + struct mb12x2_data *data = iio_priv(indio_dev);
> + int ret;
> +
> + if (channel->type !=

Re: general protection fault in gcmaes_crypt_by_sg

2019-02-25 Thread Eric Biggers

On Wed, Feb 20, 2019 at 05:03:38PM +0100, 'Dmitry Vyukov' via syzkaller-bugs 
wrote:
> On Mon, Oct 8, 2018 at 12:06 PM Ard Biesheuvel
>  wrote:
> >
> > (add the TLS maintainers)
> >
> > On 6 October 2018 at 15:04, syzbot
> >  wrote:
> > > Hello,
> > >
> > > syzbot found the following crash on:
> > >
> > > HEAD commit:12ffaa1197f5 Add linux-next specific files for 20181005
> > > git tree:   linux-next
> > > console output: https://syzkaller.appspot.com/x/log.txt?x=16cb780640
> > > kernel config:  https://syzkaller.appspot.com/x/.config?x=d6b058a7232046f
> > > dashboard link: 
> > > https://syzkaller.appspot.com/bug?extid=c5048caf67d09ee24549
> > > compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
> > >
> > > Unfortunately, I don't have any reproducer for this crash yet.
> > >
> > > IMPORTANT: if you fix the bug, please add the following tag to the commit:
> > > Reported-by: syzbot+c5048caf67d09ee24...@syzkaller.appspotmail.com
> 
> 
> This last happened 4 months ago. Probably fixed by something?
> Candidate for closure as obsolete.
> 
> > > @ : renamed from ip6gre0
> > > kasan: CONFIG_KASAN_INLINE enabled
> > > kasan: GPF could be caused by NULL-ptr deref or user memory access
> > > general protection fault:  [#1] PREEMPT SMP KASAN
> > > CPU: 1 PID: 1510 Comm: syz-executor0 Not tainted 4.19.0-rc6-next-20181005+
> > > #88
> > > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> > > Google 01/01/2011
> > > RIP: 0010:scatterwalk_start include/crypto/scatterwalk.h:73 [inline]
> > > RIP: 0010:gcmaes_crypt_by_sg+0x56f/0x2110
> > > arch/x86/crypto/aesni-intel_glue.c:834
> > > Code: c1 e9 03 80 3c 11 00 0f 85 bf 18 00 00 48 8d 78 08 48 89 84 24 50 01
> > > 00 00 48 ba 00 00 00 00 00 fc ff df 48 89 f9 48 c1 e9 03 <0f> b6 14 11 84 
> > > d2
> > > 74 09 80 fa 03 0f 8e 6b 15 00 00 44 8b 60 08 48
> > > RSP: 0018:8801852bf120 EFLAGS: 00010202
> > > RAX:  RBX: 88019023c6b0 RCX: 0001
> > > RDX: dc00 RSI: 8359e06c RDI: 0008
> > > RBP: 8801852bf520 R08: 8801850c4300 R09: 8801befb0060
> > > R10: 8801852bf7b0 R11: 8801852bf7db R12: 000d
> > > R13: 000d R14: 8801852bf238 R15: 8801852bf7d0
> > > FS:  025d3940() GS:8801daf0() 
> > > knlGS:
> > > CS:  0010 DS:  ES:  CR0: 80050033
> > > CR2: 001b2d128000 CR3: 0001cd273000 CR4: 001406e0
> > > DR0: 2000 DR1:  DR2: 
> > > DR3:  DR6: fffe0ff0 DR7: 0600
> > > Call Trace:
> > >  gcmaes_encrypt.constprop.17+0x7d7/0x1190
> > > arch/x86/crypto/aesni-intel_glue.c:929
> > >  generic_gcmaes_encrypt+0x12d/0x186 
> > > arch/x86/crypto/aesni-intel_glue.c:1294
> > >  crypto_aead_encrypt include/crypto/aead.h:364 [inline]
> > >  gcmaes_wrapper_encrypt+0x162/0x200 
> > > arch/x86/crypto/aesni-intel_glue.c:1127
> > >  crypto_aead_encrypt include/crypto/aead.h:364 [inline]
> > >  tls_do_encryption net/tls/tls_sw.c:534 [inline]
> > >  tls_push_record+0xc12/0x17f0 net/tls/tls_sw.c:583
> > >  tls_sw_push_pending_record+0x22/0x30 net/tls/tls_sw.c:597
> > >  tls_handle_open_record net/tls/tls_main.c:155 [inline]
> > >  tls_sk_proto_close+0x439/0x750 net/tls/tls_main.c:272
> > >  inet_release+0x104/0x1f0 net/ipv4/af_inet.c:428
> > >  inet6_release+0x50/0x70 net/ipv6/af_inet6.c:458
> > >  __sock_release+0xd7/0x250 net/socket.c:580
> > >  sock_close+0x19/0x20 net/socket.c:1142
> > >  __fput+0x3bc/0xa70 fs/file_table.c:279
> > >  fput+0x15/0x20 fs/file_table.c:312
> > >  task_work_run+0x1e8/0x2a0 kernel/task_work.c:113
> > >  tracehook_notify_resume include/linux/tracehook.h:188 [inline]
> > >  exit_to_usermode_loop+0x318/0x380 arch/x86/entry/common.c:166
> > >  prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
> > >  syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
> > >  do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293
> > >  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> > > RIP: 0033:0x411051
> > > Code: 75 14 b8 03 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 34 19 00 00 c3 48
> > > 83 ec 08 e8 0a fc ff ff 48 89 04 24 b8 03 00 00 00 0f 05 <48> 8b 3c 24 48 
> > > 89
> > > c2 e8 53 fc ff ff 48 89 d0 48 83 c4 08 48 3d 01
> > > RSP: 002b:7fff40f9a8d0 EFLAGS: 0293 ORIG_RAX: 0003
> > > RAX:  RBX: 0004 RCX: 00411051
> > > RDX: 001b2da2 RSI:  RDI: 0003
> > > RBP:  R08: 49ccd1ef R09: 49ccd1f3
> > > R10: 7fff40f9a800 R11: 0293 R12: 
> > > R13: 0001 R14: 010c R15: 
> > > Modules linked in:
> > > ---[ end trace a8f523110d8ca375 ]---
> > > RIP: 0010:scatterwalk_start include/crypto/scatterwalk.h:73 [inline]
> > > RIP: 0010:gcmaes_crypt_by_sg+0x56f/0x2110
> > >

[PATCH] fw_cfg: use __ATTR_RO_MODE to define rev sysfs

2019-02-25 Thread Wei Yang

Leverage __ATTR_RO_MODE to define rev sysfs instead of using open code
to define the attribute.

Signed-off-by: Wei Yang 
---
 drivers/firmware/qemu_fw_cfg.c | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 039e0f91dba8..a1293cbd7adb 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -296,18 +296,13 @@ static int fw_cfg_do_platform_probe(struct 
platform_device *pdev)
return 0;
 }
 
-static ssize_t fw_cfg_showrev(struct kobject *k, struct attribute *a, char 
*buf)
+static ssize_t fw_cfg_rev_show(struct kobject *k, struct kobj_attribute *a,
+  char *buf)
 {
return sprintf(buf, "%u\n", fw_cfg_rev);
 }
-
-static const struct {
-   struct attribute attr;
-   ssize_t (*show)(struct kobject *k, struct attribute *a, char *buf);
-} fw_cfg_rev_attr = {
-   .attr = { .name = "rev", .mode = S_IRUSR },
-   .show = fw_cfg_showrev,
-};
+static const struct kobj_attribute fw_cfg_rev_attr =
+   __ATTR_RO_MODE(fw_cfg_rev, 0400);
 
 /* fw_cfg_sysfs_entry type */
 struct fw_cfg_sysfs_entry {
-- 
2.19.1

[bug report][stable] perf probe: failed to add events

2019-02-25 Thread Joseph Qi

Hi,

I'm using kernel v4.19.24 and have found that there is an issue when
using perf probe to define a new dynamic tracepoint.

$ perf probe -a handle_mm_fault
Failed to write event: Numerical result out of range
  Error: Failed to add events.

I've also tried kernel v4.20, and it can pass.

So I've bisected and finally found the first good commit is:
bf904d2762ee x86/pti/64: Remove the SYSCALL64 entry trampoline
which is based on another commit:
98f05b5138f0 Use the TSS sp2 slot for SYSCALL/SYSRET scratch space

Once I've backpoted these two commits into 4.19.24, the above case can
pass, though I'm not sure how it is fixed.
So is there any plan to let them go into stable as well?

Thanks,
Joseph

Re: KASAN: use-after-free Write in tls_push_record (2)

2019-02-25 Thread Eric Biggers

On Thu, Jul 12, 2018 at 06:44:55AM -0400, Boris Pismenny wrote:
> It seems to me that the crash here is due to write_space being called after
> the close system call. Maybe the correct solution is to move the TX software
> state to be released in sk_destruct. As we already do for the device state
> (see tls_device.c).
> 
> Is anyone looking into this one?
> 
> On 7/11/2018 8:49 PM, syzbot wrote:
> > Hello,
> > 
> > syzbot found the following crash on:
> > 
> > HEAD commit:    1e09177acae3 Merge tag 'mips_fixes_4.18_3' of
> > git://git.ke..
> > git tree:   upstream
> > console output: https://syzkaller.appspot.com/x/log.txt?x=128903b240
> > kernel config:  https://syzkaller.appspot.com/x/.config?x=25856fac4e580aa7
> > dashboard link:
> > https://syzkaller.appspot.com/bug?extid=6c4e6ecbf9a2797be67c
> > compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
> > syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=1231267840
> > C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=13ef76c240
> > 
> > IMPORTANT: if you fix the bug, please add the following tag to the commit:
> > Reported-by: syzbot+6c4e6ecbf9a2797be...@syzkaller.appspotmail.com
> > 
> > RDX: fdef RSI: 25c0 RDI: 0003
> > RBP: 006cb018 R08: 2000 R09: 001c
> > R10: 0040 R11: 0212 R12: 0005
> > R13:  R14:  R15: 
> > ==
> > BUG: KASAN: use-after-free in tls_fill_prepend include/net/tls.h:339
> > [inline]
> > BUG: KASAN: use-after-free in tls_push_record+0x1091/0x1400
> > net/tls/tls_sw.c:239
> > Write of size 1 at addr 8801ae43 by task syz-executor589/4567
> > 
> > CPU: 0 PID: 4567 Comm: syz-executor589 Not tainted 4.18.0-rc4+ #141
> > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> > Google 01/01/2011
> > Call Trace:
> >   __dump_stack lib/dump_stack.c:77 [inline]
> >   dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
> >   print_address_description+0x6c/0x20b mm/kasan/report.c:256
> >   kasan_report_error mm/kasan/report.c:354 [inline]
> >   kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
> >   __asan_report_store1_noabort+0x17/0x20 mm/kasan/report.c:435
> >   tls_fill_prepend include/net/tls.h:339 [inline]
> >   tls_push_record+0x1091/0x1400 net/tls/tls_sw.c:239
> >   tls_sw_push_pending_record+0x22/0x30 net/tls/tls_sw.c:276
> >   tls_handle_open_record net/tls/tls_main.c:164 [inline]
> >   tls_sk_proto_close+0x74c/0xae0 net/tls/tls_main.c:264
> >   inet_release+0x104/0x1f0 net/ipv4/af_inet.c:427
> >   inet6_release+0x50/0x70 net/ipv6/af_inet6.c:459
> >   __sock_release+0xd7/0x260 net/socket.c:599
> >   sock_close+0x19/0x20 net/socket.c:1150
> >   __fput+0x355/0x8b0 fs/file_table.c:209
> >   fput+0x15/0x20 fs/file_table.c:243
> >   task_work_run+0x1ec/0x2a0 kernel/task_work.c:113
> >   exit_task_work include/linux/task_work.h:22 [inline]
> >   do_exit+0x1b08/0x2750 kernel/exit.c:865
> >   do_group_exit+0x177/0x440 kernel/exit.c:968
> >   __do_sys_exit_group kernel/exit.c:979 [inline]
> >   __se_sys_exit_group kernel/exit.c:977 [inline]
> >   __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:977
> >   do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
> >   entry_SYSCALL_64_after_hwframe+0x49/0xbe
> > RIP: 0033:0x43f358
> > Code: Bad RIP value.
> > RSP: 002b:7fff51750198 EFLAGS: 0246 ORIG_RAX: 00e7
> > RAX: ffda RBX:  RCX: 0043f358
> > RDX:  RSI: 003c RDI: 
> > RBP: 004bf448 R08: 00e7 R09: ffd0
> > R10: 0040 R11: 0246 R12: 0001
> > R13: 006d1180 R14:  R15: 
> > 
> > The buggy address belongs to the page:
> > page:ea0006b90c00 count:0 mapcount:-128 mapping:
> > index:0x0
> > flags: 0x2fffc00()
> > raw: 02fffc00 ea0006b96408 88021fffac18 
> > raw:  0003 ff7f 
> > page dumped because: kasan: bad access detected
> > 
> > Memory state around the buggy address:
> >   8801ae42ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> >   8801ae42ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> > > 8801ae43: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> >     ^
> >   8801ae430080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> >   8801ae430100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> > ==
> > 
> > 
> > ---
> > This bug is generated by a bot. It may contain errors.
> > See https://goo.gl/tpsmEJ for more information about syzbot.
> > syzbot engineers can be reached at syzkal...@googlegroups.com.
> > 
> > syzbot will keep track of this bug

Re: [PATCH v2 23/26] userfaultfd: wp: don't wake up when doing write protect

2019-02-25 Thread Mike Rapoport

On Tue, Feb 26, 2019 at 02:24:52PM +0800, Peter Xu wrote:
> On Mon, Feb 25, 2019 at 11:09:35PM +0200, Mike Rapoport wrote:
> > On Tue, Feb 12, 2019 at 10:56:29AM +0800, Peter Xu wrote:
> > > It does not make sense to try to wake up any waiting thread when we're
> > > write-protecting a memory region.  Only wake up when resolving a write
> > > protected page fault.
> > > 
> > > Signed-off-by: Peter Xu 
> > > ---
> > >  fs/userfaultfd.c | 13 -
> > >  1 file changed, 8 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> > > index 81962d62520c..f1f61a0278c2 100644
> > > --- a/fs/userfaultfd.c
> > > +++ b/fs/userfaultfd.c
> > > @@ -1771,6 +1771,7 @@ static int userfaultfd_writeprotect(struct 
> > > userfaultfd_ctx *ctx,
> > >   struct uffdio_writeprotect uffdio_wp;
> > >   struct uffdio_writeprotect __user *user_uffdio_wp;
> > >   struct userfaultfd_wake_range range;
> > > + bool mode_wp, mode_dontwake;
> > > 
> > >   if (READ_ONCE(ctx->mmap_changing))
> > >   return -EAGAIN;
> > > @@ -1789,18 +1790,20 @@ static int userfaultfd_writeprotect(struct 
> > > userfaultfd_ctx *ctx,
> > >   if (uffdio_wp.mode & ~(UFFDIO_WRITEPROTECT_MODE_DONTWAKE |
> > >  UFFDIO_WRITEPROTECT_MODE_WP))
> > >   return -EINVAL;
> > > - if ((uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP) &&
> > > -  (uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_DONTWAKE))
> > > +
> > > + mode_wp = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP;
> > > + mode_dontwake = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
> > > +
> > > + if (mode_wp && mode_dontwake)
> > >   return -EINVAL;
> > 
> > This actually means the opposite of the commit message text ;-)
> > 
> > Is any dependency of _WP and _DONTWAKE needed at all?
> 
> So this is indeed confusing at least, because both you and Jerome have
> asked the same question... :)
> 
> My understanding is that we don't have any reason to wake up any
> thread when we are write-protecting a range, in that sense the flag
> UFFDIO_WRITEPROTECT_MODE_DONTWAKE is already meaningless in the
> UFFDIO_WRITEPROTECT ioctl context.  So before everything here's how
> these flags are defined:
> 
> struct uffdio_writeprotect {
>   struct uffdio_range range;
>   /* !WP means undo writeprotect. DONTWAKE is valid only with !WP */
> #define UFFDIO_WRITEPROTECT_MODE_WP   ((__u64)1<<0)
> #define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1)
>   __u64 mode;
> };
> 
> To make it clear, we simply define it as "DONTWAKE is valid only with
> !WP".  When with that, "mode_wp && mode_dontwake" is indeed a
> meaningless flag combination.  Though please note that it does not
> mean that the operation ("don't wake up the thread") is meaningless -
> that's what we'll do no matter what when WP==1.  IMHO it's only about
> the interface not the behavior.
> 
> I don't have a good way to make this clearer because firstly we'll
> need the WP flag to mark whether we're protecting or unprotecting the
> pages.  Later on, we need DONTWAKE for page fault handling case to
> mark that we don't want to wake up the waiting thread now.  So both
> the flags have their reason to stay so far.  Then with all these in
> mind what I can think of is only to forbid using DONTWAKE in WP case,
> and that's how above definition comes (I believe, because it was
> defined that way even before I started to work on it and I think it
> makes sense).

There's no argument how DONTWAKE can be used with !WP. The
userfaultfd_writeprotect() is called in response of the uffd monitor to WP
page fault, it asks to clear write protection to some range, but it does
not want to wake the faulting thread yet but rather it will use uffd_wake()
later.

Still, I can't grok the usage of DONTWAKE with WP=1. In my understanding,
in this case userfaultfd_writeprotect() is called unrelated to page faults,
and the monitored thread runs freely, so why it should be waked at all?

And what happens, if the thread is waiting on a missing page fault and we
do userfaultfd_writeprotect(WP=1) at the same time?

> Thanks,
> 
> -- 
> Peter Xu
> 

-- 
Sincerely yours,
Mike.

[PATCH] arm64: defconfig: Enable SPI_SUN6I

2019-02-25 Thread Jagan Teki

Enable SUN6I SPI controller for Allwinner ARM64 SoC's.

This would helpful to setup spi flash, for another booting source.
mark it as static since it require during boot.

Signed-off-by: Jagan Teki 
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 2d9c39033c1a..d45e032c4d98 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -372,6 +372,7 @@ CONFIG_SPI_ROCKCHIP=y
 CONFIG_SPI_QUP=y
 CONFIG_SPI_S3C64XX=y
 CONFIG_SPI_SPIDEV=m
+CONFIG_SPI_SUN6I=y
 CONFIG_SPI_NXP_FLEXSPI=y
 CONFIG_SPMI=y
 CONFIG_PINCTRL_SINGLE=y
-- 
2.18.0.321.gffc6fa0e3

Re: kernel BUG at include/linux/mm.h:LINE! (2)

2019-02-25 Thread Eric Biggers

On Fri, Jun 08, 2018 at 06:11:02AM -0700, syzbot wrote:
> Hello,
> 
> syzbot found the following crash on:
> 
> HEAD commit:7170e6045a6a strparser: Add __strp_unpause and use it in k..
> git tree:   net-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=114236af80
> kernel config:  https://syzkaller.appspot.com/x/.config?x=a601a80fec461d44
> dashboard link: https://syzkaller.appspot.com/bug?extid=3225ce21c0e9929bb9cf
> compiler:   gcc (GCC) 8.0.1 20180413 (experimental)
> syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=10f44fdf80
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=110f636f80
> 
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+3225ce21c0e9929bb...@syzkaller.appspotmail.com
> 
> flags: 0x2fffc00()
> raw: 02fffc00   ff80
> raw: ea0006b29220 88021fffac18 0003 
> page dumped because: VM_BUG_ON_PAGE(page_ref_count(page) <= 0)
> [ cut here ]
> kernel BUG at include/linux/mm.h:853!
> invalid opcode:  [#1] SMP KASAN
> Dumping ftrace buffer:
>(ftrace buffer empty)
> Modules linked in:
> CPU: 1 PID: 4545 Comm: syz-executor492 Not tainted 4.17.0-rc7+ #82
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> RIP: 0010:get_page include/linux/mm.h:853 [inline]
> RIP: 0010:do_tcp_sendpages+0x1879/0x1e60 net/ipv4/tcp.c:1002
> RSP: 0018:8801c2a06f88 EFLAGS: 00010203
> RAX:  RBX: 8801d972d580 RCX: 
> RDX:  RSI: 81a66c25 RDI: ed0038540de0
> RBP: 8801c2a071e8 R08: 8801b11d2480 R09: 0006
> R10: 8801b11d2480 R11:  R12: 301d
> R13: ea0006b2621c R14: 8801ae5a6040 R15: dc00
> FS:  () GS:8801daf0() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: 20008000 CR3: 08c6a000 CR4: 001406e0
> DR0:  DR1:  DR2: 
> DR3:  DR6: fffe0ff0 DR7: 0400
> Call Trace:
>  tls_push_sg+0x25b/0x860 net/tls/tls_main.c:126
>  tls_push_record+0xae5/0x13e0 net/tls/tls_sw.c:266
>  tls_sw_push_pending_record+0x22/0x30 net/tls/tls_sw.c:276
>  tls_handle_open_record net/tls/tls_main.c:164 [inline]
>  tls_sk_proto_close+0x734/0xad0 net/tls/tls_main.c:264
>  inet_release+0x104/0x1f0 net/ipv4/af_inet.c:427
>  inet6_release+0x50/0x70 net/ipv6/af_inet6.c:459
>  sock_release+0x96/0x1b0 net/socket.c:594
>  sock_close+0x16/0x20 net/socket.c:1149
>  __fput+0x34d/0x890 fs/file_table.c:209
>  fput+0x15/0x20 fs/file_table.c:243
>  task_work_run+0x1e4/0x290 kernel/task_work.c:113
>  exit_task_work include/linux/task_work.h:22 [inline]
>  do_exit+0x1aee/0x2730 kernel/exit.c:865
>  do_group_exit+0x16f/0x430 kernel/exit.c:968
>  __do_sys_exit_group kernel/exit.c:979 [inline]
>  __se_sys_exit_group kernel/exit.c:977 [inline]
>  __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:977
>  do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
>  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x43f368
> RSP: 002b:7ffd03500578 EFLAGS: 0246 ORIG_RAX: 00e7
> RAX: ffda RBX:  RCX: 0043f368
> RDX:  RSI: 003c RDI: 
> RBP: 004bf448 R08: 00e7 R09: ffd0
> R10:  R11: 0246 R12: 0001
> R13: 006d1180 R14:  R15: 
> Code: ff ff 41 89 86 cc 08 00 00 e8 e4 07 05 00 e9 2c eb ff ff e8 ca 4b 27
> fb 48 8b bd b8 fd ff ff 48 c7 c6 40 0c 54 88 e8 77 72 54 fb <0f> 0b 48 89 85
> b8 fd ff ff e8 a9 4b 27 fb 48 8b 85 b8 fd ff ff
> RIP: get_page include/linux/mm.h:853 [inline] RSP: 8801c2a06f88
> RIP: do_tcp_sendpages+0x1879/0x1e60 net/ipv4/tcp.c:1002 RSP:
> 8801c2a06f88
> ---[ end trace 500a6e4fab99629c ]---
> 
> 
> ---
> This bug is generated by a bot. It may contain errors.
> See https://goo.gl/tpsmEJ for more information about syzbot.
> syzbot engineers can be reached at syzkal...@googlegroups.com.
> 
> syzbot will keep track of this bug report. See:
> https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with
> syzbot.
> syzbot can test patches for this bug, for details see:
> https://goo.gl/tpsmEJ#testing-patches
> 

AFAICS this was fixed by this commit:

commit d829e9c4112b52f4f00195900fd4c685f61365ab
Author: Daniel Borkmann 
Date:   Sat Oct 13 02:45:59 2018 +0200

tls: convert to generic sk_msg interface

So telling syzbot:

#syz fix: tls: convert to generic sk_msg interface

The issue was that described in this comment in tls_sw_sendmsg():

/* Open records defined only if successfully copied, otherwise
 * we would

Re: [PATCH] staging: wilc1000: Fix incorrent type in assignment

2019-02-25 Thread YU Bo


On Tue, Feb 26, 2019 at 06:39:28AM +, ajay.kat...@microchip.com wrote:



On 2/26/2019 8:58 AM, Bo YU wrote:

The patch fixes following sparse warning:

drivers/staging/wilc1000/host_interface.c:450:30: warning: incorrect type in 
assignment (different base types)
drivers/staging/wilc1000/host_interface.c:450:30:expected restricted __le16 
[usertype] beacon_period
drivers/staging/wilc1000/host_interface.c:450:30:got unsigned short 
[usertype] beacon_interval
drivers/staging/wilc1000/host_interface.c:451:25: warning: incorrect type in 
assignment (different base types)
drivers/staging/wilc1000/host_interface.c:451:25:expected restricted __le16 
[usertype] cap_info
drivers/staging/wilc1000/host_interface.c:451:25:got unsigned short 
[usertype] capability

Signed-off-by: Bo YU 
---
I have no hardware to test it and just to compile it


Thanks for submitting the patch.

The correct way to fix above spare warning is by using cpu_to_le16()
while filing the information in ->beacon_period and ->cap_info because
wilc1000 module expects the data in _le_ byte order.

Please changes the below lines in host_interface.c and resubmit the patch.
param->beacon_period = bss->beacon_interval;
param->cap_info = bss->capability;
to
param->beacon_period = cpu_to_le16(bss->beacon_interval);
param->cap_info = cpu_to_le16(bss->capability);

Ok, done, thank you,
Bo



---
 drivers/staging/wilc1000/host_interface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/wilc1000/host_interface.c 
b/drivers/staging/wilc1000/host_interface.c
index 50dc2dd942f5..cdcb52aec779 100644
--- a/drivers/staging/wilc1000/host_interface.c
+++ b/drivers/staging/wilc1000/host_interface.c
@@ -106,10 +106,10 @@ struct wilc_join_bss_param {
u8 ssid_terminator;
u8 bss_type;
u8 ch;
-   __le16 cap_info;
+   u16 cap_info;
u8 sa[ETH_ALEN];
u8 bssid[ETH_ALEN];
-   __le16 beacon_period;
+   u16 beacon_period;
u8 dtim_period;
u8 supp_rates[WILC_MAX_RATES_SUPPORTED + 1];
u8 wmm_cap;



Regards,
Ajay

[PATCH V2] staging: wilc1000: fix incorrent type assignment

2019-02-25 Thread Bo YU

Fix sparse warning:

drivers/staging/wilc1000/host_interface.c:450:30: warning: incorrect type in 
assignment (different base types)
drivers/staging/wilc1000/host_interface.c:450:30:expected restricted __le16 
[usertype] beacon_period
drivers/staging/wilc1000/host_interface.c:450:30:got unsigned short 
[usertype] beacon_interval
drivers/staging/wilc1000/host_interface.c:451:25: warning: incorrect type in 
assignment (different base types)
drivers/staging/wilc1000/host_interface.c:451:25:expected restricted __le16 
[usertype] cap_info
drivers/staging/wilc1000/host_interface.c:451:25:got unsigned short 
[usertype] capability

Signed-off-by: Bo YU 
---
V2: use cpu_to_le16 assign valid type according to Ajay's suggestions
---
 drivers/staging/wilc1000/host_interface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/wilc1000/host_interface.c 
b/drivers/staging/wilc1000/host_interface.c
index 50dc2dd942f5..20349af2ed30 100644
--- a/drivers/staging/wilc1000/host_interface.c
+++ b/drivers/staging/wilc1000/host_interface.c
@@ -447,8 +447,8 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss,
if (!param)
return NULL;
 
-   param->beacon_period = bss->beacon_interval;
-   param->cap_info = bss->capability;
+   param->beacon_period = cpu_to_le16(bss->beacon_interval);
+   param->cap_info = cpu_to_le16(bss->capability);
param->bss_type = WILC_FW_BSS_TYPE_INFRA;
param->ch = ieee80211_frequency_to_channel(bss->channel->center_freq);
ether_addr_copy(param->bssid, bss->bssid);
-- 
2.11.0

Re: [PATCH 2/4] iio/proximity: add mb12x2 driver to Kconfig and Makefile

2019-02-25 Thread Peter Meerwald-Stadler

On Sun, 24 Feb 2019, Andreas Klinger wrote:

> Makefile and Kconfig: add configuration for mb12x2 ultrasonic proximity
> driver
> 

> diff --git a/drivers/iio/proximity/Makefile b/drivers/iio/proximity/Makefile
> index 6d031f903c4c..c56b72a8be87 100644
> --- a/drivers/iio/proximity/Makefile
> +++ b/drivers/iio/proximity/Makefile
> @@ -6,6 +6,7 @@
>  # When adding new entries keep the list in alphabetical order
>  obj-$(CONFIG_AS3935) += as3935.o
>  obj-$(CONFIG_ISL29501)   += isl29501.o
> +obj-$(CONFIG_MB12X2) += mb12x2.o

alphabetic order please

>  obj-$(CONFIG_LIDAR_LITE_V2)  += pulsedlight-lidar-lite-v2.o
>  obj-$(CONFIG_RFD77402)   += rfd77402.o
>  obj-$(CONFIG_SRF04)  += srf04.o
> 

-- 

Peter Meerwald-Stadler
Mobile: +43 664 24 44 418

Re: [PATCH v2 20/26] userfaultfd: wp: support write protection for userfault vma range

2019-02-25 Thread Peter Xu

On Tue, Feb 26, 2019 at 08:43:47AM +0200, Mike Rapoport wrote:
> On Tue, Feb 26, 2019 at 02:06:27PM +0800, Peter Xu wrote:
> > On Mon, Feb 25, 2019 at 10:52:34PM +0200, Mike Rapoport wrote:
> > > On Tue, Feb 12, 2019 at 10:56:26AM +0800, Peter Xu wrote:
> > > > From: Shaohua Li 
> > > > 
> > > > Add API to enable/disable writeprotect a vma range. Unlike mprotect,
> > > > this doesn't split/merge vmas.
> > > > 
> > > > Cc: Andrea Arcangeli 
> > > > Cc: Rik van Riel 
> > > > Cc: Kirill A. Shutemov 
> > > > Cc: Mel Gorman 
> > > > Cc: Hugh Dickins 
> > > > Cc: Johannes Weiner 
> > > > Signed-off-by: Shaohua Li 
> > > > Signed-off-by: Andrea Arcangeli 
> > > > [peterx:
> > > >  - use the helper to find VMA;
> > > >  - return -ENOENT if not found to match mcopy case;
> > > >  - use the new MM_CP_UFFD_WP* flags for change_protection
> > > >  - check against mmap_changing for failures]
> > > > Signed-off-by: Peter Xu 
> > > > ---
> > > >  include/linux/userfaultfd_k.h |  3 ++
> > > >  mm/userfaultfd.c  | 54 +++
> > > >  2 files changed, 57 insertions(+)
> > > > 
> > > > diff --git a/include/linux/userfaultfd_k.h 
> > > > b/include/linux/userfaultfd_k.h
> > > > index 765ce884cec0..8f6e6ed544fb 100644
> > > > --- a/include/linux/userfaultfd_k.h
> > > > +++ b/include/linux/userfaultfd_k.h
> > > > @@ -39,6 +39,9 @@ extern ssize_t mfill_zeropage(struct mm_struct 
> > > > *dst_mm,
> > > >   unsigned long dst_start,
> > > >   unsigned long len,
> > > >   bool *mmap_changing);
> > > > +extern int mwriteprotect_range(struct mm_struct *dst_mm,
> > > > +  unsigned long start, unsigned long len,
> > > > +  bool enable_wp, bool *mmap_changing);
> > > > 
> > > >  /* mm helpers */
> > > >  static inline bool is_mergeable_vm_userfaultfd_ctx(struct 
> > > > vm_area_struct *vma,
> > > > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> > > > index fefa81c301b7..529d180bb4d7 100644
> > > > --- a/mm/userfaultfd.c
> > > > +++ b/mm/userfaultfd.c
> > > > @@ -639,3 +639,57 @@ ssize_t mfill_zeropage(struct mm_struct *dst_mm, 
> > > > unsigned long start,
> > > >  {
> > > > return __mcopy_atomic(dst_mm, start, 0, len, true, 
> > > > mmap_changing, 0);
> > > >  }
> > > > +
> > > > +int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
> > > > +   unsigned long len, bool enable_wp, bool 
> > > > *mmap_changing)
> > > > +{
> > > > +   struct vm_area_struct *dst_vma;
> > > > +   pgprot_t newprot;
> > > > +   int err;
> > > > +
> > > > +   /*
> > > > +* Sanitize the command parameters:
> > > > +*/
> > > > +   BUG_ON(start & ~PAGE_MASK);
> > > > +   BUG_ON(len & ~PAGE_MASK);
> > > > +
> > > > +   /* Does the address range wrap, or is the span zero-sized? */
> > > > +   BUG_ON(start + len <= start);
> > > 
> > > I'd replace these BUG_ON()s with
> > > 
> > >   if (WARN_ON())
> > >return -EINVAL;
> > 
> > I believe BUG_ON() is used because these parameters should have been
> > checked in userfaultfd_writeprotect() already by the common
> > validate_range() even before calling mwriteprotect_range().  So I'm
> > fine with the WARN_ON() approach but I'd slightly prefer to simply
> > keep the patch as is to keep Jerome's r-b if you won't disagree. :)
> 
> Right, userfaultfd_writeprotect() should check these parameters and if it
> didn't it was a bug indeed. But still, it's not severe enough to crash the
> kernel.
> 
> I hope Jerome wouldn't mind to keep his r-b with s/BUG_ON/WARN_ON ;-)
> 
> With this change you can also add 
> 
> Reviewed-by: Mike Rapoport 

Thanks!  Though before I change anything... please note that the
BUG_ON()s are really what we've done in existing MISSING code.  One
example is userfaultfd_copy() which did validate_range() first, then
in __mcopy_atomic() we've used BUG_ON()s.  They make sense to me
becauase userspace should never be able to trigger it.  And if we
really want to change the BUG_ON()s in this patch, IMHO we probably
want to change the other BUG_ON()s as well, then that can be a
standalone patch or patchset to address another issue...

(and if we really want to use WARN_ON, I would prefer WARN_ON_ONCE, or
 directly return the errors to avoid DOS).

I'll see how you'd prefer to see how I should move on with this patch.

Thanks,

-- 
Peter Xu

Re: [PATCH] lib/raid6: use vdupq_n_u8 to avoid endianness warnings

2019-02-25 Thread Ard Biesheuvel

On Tue, 26 Feb 2019 at 05:03,  wrote:
>
> Clang warns: vector initializers are not compatible with NEON intrinsics
> in big endian mode [-Wnonportable-vector-initialization]
>
> While this is usually the case, it's not an issue for this case since
> we're initializing the uint8x16_t (16x uint8_t's) with the same value.
>
> Instead, use vdupq_n_u8 which both compilers lower into a single movi
> instruction: https://godbolt.org/z/vBrgzt
>
> This avoids the static storage for a constant value.
>
> Link: https://github.com/ClangBuiltLinux/linux/issues/214
> Suggested-by: Nathan Chancellor 
> Signed-off-by: Nick Desaulniers 

Much better, thanks,

Did you double check that the intrinsic exists on 32-bit ARM as well?
I assume it does, but please make sure if you haven't yet.

If so,

Reviewed-by: Ard Biesheuvel 

> ---
>  lib/raid6/neon.uc| 5 ++---
>  lib/raid6/recov_neon_inner.c | 7 ++-
>  2 files changed, 4 insertions(+), 8 deletions(-)
>
> diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc
> index d5242f544551..b7c68030da4f 100644
> --- a/lib/raid6/neon.uc
> +++ b/lib/raid6/neon.uc
> @@ -28,7 +28,6 @@
>
>  typedef uint8x16_t unative_t;
>
> -#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
>  #define NSIZE  sizeof(unative_t)
>
>  /*
> @@ -61,7 +60,7 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned 
> long bytes, void **ptrs)
> int d, z, z0;
>
> register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
> -   const unative_t x1d = NBYTES(0x1d);
> +   const unative_t x1d = vdupq_n_u8(0x1d);
>
> z0 = disks - 3; /* Highest data disk */
> p = dptr[z0+1]; /* XOR parity */
> @@ -92,7 +91,7 @@ void raid6_neon$#_xor_syndrome_real(int disks, int start, 
> int stop,
> int d, z, z0;
>
> register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
> -   const unative_t x1d = NBYTES(0x1d);
> +   const unative_t x1d = vdupq_n_u8(0x1d);
>
> z0 = stop;  /* P/Q right side optimization */
> p = dptr[disks-2];  /* XOR parity */
> diff --git a/lib/raid6/recov_neon_inner.c b/lib/raid6/recov_neon_inner.c
> index 8cd20c9f834a..7d00c31a6547 100644
> --- a/lib/raid6/recov_neon_inner.c
> +++ b/lib/raid6/recov_neon_inner.c
> @@ -10,11 +10,6 @@
>
>  #include 
>
> -static const uint8x16_t x0f = {
> -   0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
> -   0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
> -};
> -
>  #ifdef CONFIG_ARM
>  /*
>   * AArch32 does not provide this intrinsic natively because it does not
> @@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, 
> uint8_t *q, uint8_t *dp,
> uint8x16_t pm1 = vld1q_u8(pbmul + 16);
> uint8x16_t qm0 = vld1q_u8(qmul);
> uint8x16_t qm1 = vld1q_u8(qmul + 16);
> +   uint8x16_t x0f = vdupq_n_u8(0x0f);
>
> /*
>  * while ( bytes-- ) {
> @@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, 
> uint8_t *q, uint8_t *dq,
>  {
> uint8x16_t qm0 = vld1q_u8(qmul);
> uint8x16_t qm1 = vld1q_u8(qmul + 16);
> +   uint8x16_t x0f = vdupq_n_u8(0x0f);
>
> /*
>  * while (bytes--) {
> --
> 2.21.0.rc2.261.ga7da99ff1b-goog
>

[PATCH v3 5/5] arm64: dts: imx8mq: Enable wm8524 codec

2019-02-25 Thread Daniel Baluta

This uses simple-audio-card machine driver adding 1 CPU DAI
and 1 Codec DAI.

Signed-off-by: Daniel Baluta 
---
 arch/arm64/boot/dts/freescale/imx8mq-evk.dts | 29 
 1 file changed, 29 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts 
b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
index 58de4a3d6029..77f590c13ee0 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
@@ -31,6 +31,35 @@
gpio = < 19 GPIO_ACTIVE_HIGH>;
enable-active-high;
};
+
+   wm8524: audio-codec-0 {
+   #sound-dai-cells = <0>;
+   compatible = "wlf,wm8524";
+   clocks = < IMX8MQ_CLK_SAI2_ROOT>;
+   clock-names = "mclk";
+   wlf,mute-gpios = < 8 GPIO_ACTIVE_LOW>;
+   };
+
+   sound-wm8524 {
+   compatible = "simple-audio-card";
+   simple-audio-card,name = "wm8524-audio";
+   simple-audio-card,format = "i2s";
+   simple-audio-card,frame-master = <>;
+   simple-audio-card,bitclock-master = <>;
+   simple-audio-card,widgets =
+   "Line", "Left Line Out Jack",
+   "Line", "Right Line Out Jack";
+   simple-audio-card,routing =
+   "Left Line Out Jack", "LINEVOUTL",
+   "Right Line Out Jack", "LINEVOUTR";
+   cpudai: simple-audio-card,cpu {
+   sound-dai = <>;
+   };
+   link_codec: simple-audio-card,codec {
+   sound-dai = <>;
+   clocks = < IMX8MQ_CLK_SAI2_ROOT>;
+   };
+   };
 };
 
  {
-- 
2.17.1

[PATCH v3 3/5] arm64: dts: imx8mq: Add SAI pinctrl configuration

2019-02-25 Thread Daniel Baluta

This sets the pin configuration for SAI pins BLCK/MCLK/FSYNC/DATA.
GPIO_01 is used for mute.

Signed-off-by: Daniel Baluta 
---
 arch/arm64/boot/dts/freescale/imx8mq-evk.dts | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts 
b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
index 54737bf1772f..d21ee2a5312c 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
@@ -223,6 +223,16 @@
>;
};
 
+   pinctrl_sai2: sai2grp {
+   fsl,pins = <
+   MX8MQ_IOMUXC_SAI2_TXFS_SAI2_TX_SYNC 0xd6
+   MX8MQ_IOMUXC_SAI2_TXC_SAI2_TX_BCLK  0xd6
+   MX8MQ_IOMUXC_SAI2_MCLK_SAI2_MCLK0xd6
+   MX8MQ_IOMUXC_SAI2_TXD0_SAI2_TX_DATA00xd6
+   MX8MQ_IOMUXC_GPIO1_IO08_GPIO1_IO8   0xd6
+   >;
+   };
+
pinctrl_i2c1: i2c1grp {
fsl,pins = <
MX8MQ_IOMUXC_I2C1_SCL_I2C1_SCL  
0x407f
-- 
2.17.1

[PATCH v3 2/5] arm64: dts: imx8mq: Add SAI2 node

2019-02-25 Thread Daniel Baluta

SAI2 is part of AIPS-3 memory region and it's the DAI through
which the wm8524 codec gets its data.

Signed-off-by: Daniel Baluta 
---
 arch/arm64/boot/dts/freescale/imx8mq.dtsi | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi 
b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 8cafec17726b..6fe5798afd2d 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -489,6 +489,22 @@
status = "disabled";
};
 
+   sai2: sai@308b {
+   #sound-dai-cells = <0>;
+   compatible = "fsl,imx8mq-sai",
+"fsl,imx6sx-sai";
+   reg = <0x308b 0x1>;
+   interrupts = ;
+   clocks = < IMX8MQ_CLK_SAI2_IPG>,
+< IMX8MQ_CLK_DUMMY>,
+< IMX8MQ_CLK_SAI2_ROOT>,
+< IMX8MQ_CLK_DUMMY>, < 
IMX8MQ_CLK_DUMMY>;
+   clock-names = "bus", "mclk0", "mclk1", "mclk2", 
"mclk3";
+   dmas = < 10 24 0>, < 11 24 0>;
+   dma-names = "rx", "tx";
+   status = "disabled";
+   };
+
i2c1: i2c@30a2 {
compatible = "fsl,imx8mq-i2c", "fsl,imx21-i2c";
reg = <0x30a2 0x1>;
-- 
2.17.1

[PATCH v3 4/5] arm64: dts: imx8mq: Enable SAI2 for wm8524 codec

2019-02-25 Thread Daniel Baluta

This enables SAI2 digital audio interface to be used with
wm8524 codec.

wm8524 works only in slave mode, so we make sure that IMX8MQ_CLK_SAI2
has an appropriate frequency in order to easily derive rates divisible
with 8000.

Signed-off-by: Daniel Baluta 
---
 arch/arm64/boot/dts/freescale/imx8mq-evk.dts | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts 
b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
index d21ee2a5312c..58de4a3d6029 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts
@@ -52,6 +52,15 @@
};
 };
 
+ {
+   pinctrl-names = "default";
+   pinctrl-0 = <_sai2>;
+   assigned-clocks = < IMX8MQ_CLK_SAI2>;
+   assigned-clock-parents = < IMX8MQ_AUDIO_PLL1_OUT>;
+   assigned-clock-rates = <24576000>;
+   status = "okay";
+};
+
  {
clock-frequency = <10>;
pinctrl-names = "default";
-- 
2.17.1

[PATCH v3 1/5] arm64: dts: imx8mq: Add SDMA nodes

2019-02-25 Thread Daniel Baluta

SDMA1 is part of AIPS-3 region and SDMA2 is part
of AIPS-1 region.

Signed-off-by: Anson Huang 
[initial submit in i.MX internal tree]
Signed-off-by: Daniel Baluta 
[adaptation for linux-next]
---
 arch/arm64/boot/dts/freescale/imx8mq.dtsi | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi 
b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 9155bd4784eb..8cafec17726b 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -234,6 +234,17 @@
status = "disabled";
};
 
+   sdma2: sdma@302c {
+   compatible = "fsl,imx7d-sdma";
+   reg = <0x302c 0x1>;
+   interrupts = ;
+   clocks = < IMX8MQ_CLK_SDMA2_ROOT>,
+< IMX8MQ_CLK_SDMA2_ROOT>;
+   clock-names = "ipg", "ahb";
+   #dma-cells = <3>;
+   fsl,sdma-ram-script-name = 
"imx/sdma/sdma-imx7d.bin";
+   };
+
iomuxc: iomuxc@3033 {
compatible = "fsl,imx8mq-iomuxc";
reg = <0x3033 0x1>;
@@ -575,6 +586,17 @@
status = "disabled";
};
 
+   sdma1: sdma@30bd {
+   compatible = "fsl,imx7d-sdma";
+   reg = <0x30bd 0x1>;
+   interrupts = ;
+   clocks = < IMX8MQ_CLK_SDMA1_ROOT>,
+< IMX8MQ_CLK_SDMA1_ROOT>;
+   clock-names = "ipg", "ahb";
+   #dma-cells = <3>;
+   fsl,sdma-ram-script-name = 
"imx/sdma/sdma-imx7d.bin";
+   };
+
fec1: ethernet@30be {
compatible = "fsl,imx8mq-fec", "fsl,imx6sx-fec";
reg = <0x30be 0x1>;
-- 
2.17.1

[PATCH v3 0/5] Enable wm8524 on i.MX8MQ

2019-02-25 Thread Daniel Baluta

On i.MX8MQ we can start the party using the wm8524 codec
which gets it's data through the SAI2 interface.

In order to make it work this patch series enables the SDMA nodes,
sets the correct pinctrl configuration and uses the simple card
machine driver to put everything together.

Changes since v2:
- s/QM/MQ after Chris comments

Changes since v1:
- added cover letter
- remove "fsl,imx8mq-sdma" compatible for sdma.

Daniel Baluta (5):
  arm64: dts: imx8mq: Add SDMA nodes
  arm64: dts: imx8mq: Add SAI2 node
  arm64: dts: imx8mq: Add SAI pinctrl configuration
  arm64: dts: imx8mq: Enable SAI2 for wm8524 codec
  arm64: dts: imx8mq: Enable wm8524 codec

 arch/arm64/boot/dts/freescale/imx8mq-evk.dts | 48 
 arch/arm64/boot/dts/freescale/imx8mq.dtsi| 38 
 2 files changed, 86 insertions(+)

-- 
2.17.1

Re: [PATCH v4 1/3] PCI: altera: Add Stratix 10 PCIe support

2019-02-25 Thread Ley Foon Tan

On Mon, Feb 25, 2019 at 5:35 PM Ley Foon Tan  wrote:
>
> On Tue, 2019-02-19 at 16:23 +, Lorenzo Pieralisi wrote:
> > On Thu, Feb 14, 2019 at 11:20:36PM +0800, Ley Foon Tan wrote:
> > >
> > > Add PCIe Root Port support for Stratix 10 device.
> > >
> > > Main differences:
> > Main differences with what ? We need to rewrite this commit log.
> Differences compare with Cyclone V and Arria 10 devices.
> I will rewrite this.
> >
> > >
> > > - HIP interface to access Root Port configuration register.
> > > - TLP programming flow:
> > >   - One REG0 register
> > >   - Don't need to check alignment
> > >
> > > Signed-off-by: Ley Foon Tan 
> > > ---
> > >  drivers/pci/controller/pcie-altera.c |  246
> > > ++
> > >  1 files changed, 222 insertions(+), 24 deletions(-)
> > >
> > > diff --git a/drivers/pci/controller/pcie-altera.c
> > > b/drivers/pci/controller/pcie-altera.c
> > > index 7d05e51..76bb6a6 100644
> > > --- a/drivers/pci/controller/pcie-altera.c
> > > +++ b/drivers/pci/controller/pcie-altera.c
> > > @@ -11,6 +11,7 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > >  #include 
> > >  #include 
> > >  #include 
> > > @@ -37,7 +38,12 @@
> > >  #define RP_LTSSM_MASK  0x1f
> > >  #define LTSSM_L0   0xf
> > >
> > > -#define PCIE_CAP_OFFSET0x80
> > > +#define S10_RP_TX_CNTRL0x2004
> > > +#define S10_RP_RXCPL_REG   0x2008
> > > +#define S10_RP_RXCPL_STATUS0x200C
> > > +#define S10_RP_CFG_ADDR(pcie, reg) \
> > > +   (((pcie)->hip_base) + (reg) + (1 << 20))
> > > +
> > >  /* TLP configuration type 0 and 1 */
> > >  #define TLP_FMTTYPE_CFGRD0 0x04/*
> > > Configuration Read Type 0 */
> > >  #define TLP_FMTTYPE_CFGWR0 0x44/*
> > > Configuration Write Type 0 */
> > > @@ -49,18 +55,19 @@
> > >  #define RP_DEVFN   0
> > >  #define TLP_REQ_ID(bus, devfn) (((bus) << 8) |
> > > (devfn))
> > >  #define TLP_CFGRD_DW0(pcie, bus)
> > > \
> > > -bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGRD0
> > > \
> > > -   : TLP_FMTTYPE_CFGRD1) << 24) |
> > > \
> > > - TLP_PAYLOAD_SIZE)
> > > +   bus == pcie->root_bus_nr) ? pcie->pcie_data->cfgrd0
> > > \
> > > +   : pcie->pcie_data->cfgrd1) << 24)
> > > |   \
> > > +   TLP_PAYLOAD_SIZE)
> > >  #define TLP_CFGWR_DW0(pcie, bus)
> > > \
> > > -bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGWR0
> > > \
> > > -   : TLP_FMTTYPE_CFGWR1) << 24) |
> > > \
> > > - TLP_PAYLOAD_SIZE)
> > > +   bus == pcie->root_bus_nr) ? pcie->pcie_data->cfgwr0
> > > \
> > > +   : pcie->pcie_data->cfgwr1) << 24)
> > > |   \
> > > +   TLP_PAYLOAD_SIZE)
> > >  #define TLP_CFG_DW1(pcie, tag, be) \
> > > -(((TLP_REQ_ID(pcie->root_bus_nr,  RP_DEVFN)) << 16) | (tag <<
> > > 8) | (be))
> > > +   (((TLP_REQ_ID(pcie->root_bus_nr,  RP_DEVFN)) << 16) | (tag
> > > << 8) | (be))
> > >  #define TLP_CFG_DW2(bus, devfn, offset)\
> > > (((bus) << 24) | ((devfn) << 16) |
> > > (offset))
> > >  #define TLP_COMP_STATUS(s) (((s) >> 13) & 7)
> > > +#define TLP_BYTE_COUNT(s)  (((s) >> 0) & 0xfff)
> > >  #define TLP_HDR_SIZE   3
> > >  #define TLP_LOOP   500
> > >
> > > @@ -69,14 +76,43 @@
> > >
> > >  #define DWORD_MASK 3
> > >
> > > +#define S10_TLP_FMTTYPE_CFGRD0 0x05
> > > +#define S10_TLP_FMTTYPE_CFGRD1 0x04
> > > +#define S10_TLP_FMTTYPE_CFGWR0 0x45
> > > +#define S10_TLP_FMTTYPE_CFGWR1 0x44
> > > +
> > > +enum altera_pcie_version {
> > > +   ALTERA_PCIE_V1 = 0,
> > > +   ALTERA_PCIE_V2,
> > > +};
> > > +
> > >  struct altera_pcie {
> > > struct platform_device  *pdev;
> > > -   void __iomem*cra_base;  /* DT Cra */
> > > +   void __iomem*cra_base;
> > > +   void __iomem*hip_base;
> > > int irq;
> > > u8  root_bus_nr;
> > > struct irq_domain   *irq_domain;
> > > struct resource bus_range;
> > > struct list_headresources;
> > > +   const struct altera_pcie_data   *pcie_data;
> > > +};
> > > +
> > > +struct altera_pcie_data {
> > > +   int (*tlp_read_pkt)(struct altera_pcie *pcie, u32 *value);
> > > +   void (*tlp_write_pkt)(struct altera_pcie *pcie, u32
> > > *headers,
> > > + u32 data, bool align);
> > > +   bool (*get_link_status)(struct altera_pcie *pcie);
> > > +   int (*rp_read_cfg)(struct altera_pcie *pcie, int where,
> > > +  int size, u32 *value);
> > > +   int (*rp_write_cfg)(struct altera_pcie *pcie, u8 bus, int
> > > where,
> > > +   int size, u32 value);
> > > +   enum

[PATCH] arm64: dts: qcom: sdm845: Add GEN2 PCIe controller and PHY

2019-02-25 Thread Bjorn Andersson

SDM845 sports two PCIe controller/phy pairs; one GEN2 and one GEN3. Add
the nodes for the GEN2 pair.

Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/sdm845.dtsi | 103 +++
 1 file changed, 103 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 5308f1671824..43848653babd 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -1024,6 +1024,109 @@
};
};
 
+   pcie0: pci@1c0 {
+   compatible = "qcom,pcie-sdm845", "snps,dw-pcie";
+   ret = <0 0x01c0 0 0x2000>,
+ <0 0x6000 0 0xf1d>,
+ <0 0x6f20 0 0xa8>,
+ <0 0x6010 0 0x10>;
+   reg-names = "parf", "dbi", "elbi", "config";
+   device_type = "pci";
+   linux,pci-domain = <0>;
+   bus-range = <0x00 0xff>;
+   num-lanes = <1>;
+
+   #address-cells = <3>;
+   #size-cells = <2>;
+
+   ranges = <0x0100 0x0 0x6020 0 0x6020 0x0 
0x10>,
+<0x0200 0x0 0x6030 0 0x6030 0x0 
0xd0>;
+
+   interrupts = ;
+   interrupt-names = "msi";
+   #interrupt-cells = <1>;
+   interrupt-map-mask = <0 0 0 0x7>;
+   interrupt-map = <0 0 0 1  0 149 
IRQ_TYPE_LEVEL_HIGH>, /* int_a */
+   <0 0 0 2  0 150 
IRQ_TYPE_LEVEL_HIGH>, /* int_b */
+   <0 0 0 3  0 151 
IRQ_TYPE_LEVEL_HIGH>, /* int_c */
+   <0 0 0 4  0 152 
IRQ_TYPE_LEVEL_HIGH>; /* int_d */
+
+   clocks = < GCC_PCIE_0_PIPE_CLK>,
+< GCC_PCIE_0_AUX_CLK>,
+< GCC_PCIE_0_CFG_AHB_CLK>,
+< GCC_PCIE_0_MSTR_AXI_CLK>,
+< GCC_PCIE_0_SLV_AXI_CLK>,
+< GCC_PCIE_0_SLV_Q2A_AXI_CLK>,
+< GCC_AGGRE_NOC_PCIE_TBU_CLK>;
+   clock-names = "pipe",
+ "aux",
+ "cfg",
+ "bus_master",
+ "bus_slave",
+ "slave_q2a",
+ "tbu";
+
+   iommu-map = <0x0 _smmu 0x1c10 0x1>,
+   <0x100 _smmu 0x1c11 0x1>,
+   <0x200 _smmu 0x1c12 0x1>,
+   <0x300 _smmu 0x1c13 0x1>,
+   <0x400 _smmu 0x1c14 0x1>,
+   <0x500 _smmu 0x1c15 0x1>,
+   <0x600 _smmu 0x1c16 0x1>,
+   <0x700 _smmu 0x1c17 0x1>,
+   <0x800 _smmu 0x1c18 0x1>,
+   <0x900 _smmu 0x1c19 0x1>,
+   <0xa00 _smmu 0x1c1a 0x1>,
+   <0xb00 _smmu 0x1c1b 0x1>,
+   <0xc00 _smmu 0x1c1c 0x1>,
+   <0xd00 _smmu 0x1c1d 0x1>,
+   <0xe00 _smmu 0x1c1e 0x1>,
+   <0xf00 _smmu 0x1c1f 0x1>;
+
+   resets = < GCC_PCIE_0_BCR>;
+   reset-names = "pci";
+
+   power-domains = < PCIE_0_GDSC>;
+
+   phys = <_lane>;
+   phy-names = "pciephy";
+
+   status = "disabled";
+   };
+
+   pcie0_phy: phy@1c06000 {
+   compatible = "qcom,sdm845-qmp-pcie-phy";
+   reg = <0 0x01c06000 0 0x18c>;
+   #address-cells = <2>;
+   #size-cells = <2>;
+   ranges;
+   clocks = < GCC_PCIE_PHY_AUX_CLK>,
+< GCC_PCIE_0_CFG_AHB_CLK>,
+< GCC_PCIE_0_CLKREF_CLK>,
+< GCC_PCIE_PHY_REFGEN_CLK>;
+   clock-names = "aux", "cfg_ahb", "ref", "refgen";
+
+   resets = < GCC_PCIE_0_PHY_BCR>;
+   reset-names = "phy";
+
+   assigned-clocks = < GCC_PCIE_PHY_REFGEN_CLK>;
+   assigned-clock-rates = <1>;
+
+   status = "disabled";
+
+   pcie0_lane: lanes@1c06200 {
+   reg = <0 0x01c06200 0

Re: [PATCH v2 24/26] userfaultfd: wp: UFFDIO_REGISTER_MODE_WP documentation update

2019-02-25 Thread Mike Rapoport

On Tue, Feb 26, 2019 at 02:53:42PM +0800, Peter Xu wrote:
> On Mon, Feb 25, 2019 at 11:19:32PM +0200, Mike Rapoport wrote:
> > On Tue, Feb 12, 2019 at 10:56:30AM +0800, Peter Xu wrote:
> > > From: Martin Cracauer 
> > > 
> > > Adds documentation about the write protection support.
> > > 
> > > Signed-off-by: Andrea Arcangeli 
> > > [peterx: rewrite in rst format; fixups here and there]
> > > Signed-off-by: Peter Xu 
> > 
> > Reviewed-by: Mike Rapoport 
> > 
> > Peter, can you please also update the man pages (1, 2)?
> > 
> > [1] http://man7.org/linux/man-pages/man2/userfaultfd.2.html
> > [2] http://man7.org/linux/man-pages/man2/ioctl_userfaultfd.2.html
> 
> Sure.  Should I post the man patches after the kernel part is merged?

Yep, once we know for sure what's the API kernel will expose.
 
> Thanks,
> 
> -- 
> Peter Xu
> 

-- 
Sincerely yours,
Mike.

[PATCH] clk: qcom: gcc-sdm845: Define parent of PCIe PIPE clocks

2019-02-25 Thread Bjorn Andersson

The PCIe PIPE clock in the GCC is fed by the PIPE clock coming from the
PHY, describe this relationship.

Signed-off-by: Bjorn Andersson 
---
 drivers/clk/qcom/gcc-sdm845.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c
index 58fa5c247af1..7131dcf9b060 100644
--- a/drivers/clk/qcom/gcc-sdm845.c
+++ b/drivers/clk/qcom/gcc-sdm845.c
@@ -1703,6 +1703,9 @@ static struct clk_branch gcc_pcie_0_pipe_clk = {
.enable_mask = BIT(4),
.hw.init = &(struct clk_init_data){
.name = "gcc_pcie_0_pipe_clk",
+   .parent_names = (const char *[]){ "pcie_0_pipe_clk" },
+   .num_parents = 1,
+   .flags = CLK_SET_RATE_PARENT,
.ops = _branch2_ops,
},
},
@@ -1802,6 +1805,8 @@ static struct clk_branch gcc_pcie_1_pipe_clk = {
.enable_mask = BIT(30),
.hw.init = &(struct clk_init_data){
.name = "gcc_pcie_1_pipe_clk",
+   .parent_names = (const char *[]){ "pcie_1_pipe_clk" },
+   .num_parents = 1,
.ops = _branch2_ops,
},
},
-- 
2.18.0

[PATCH v3 1/9] feature: implement libzstd check, LIBZSTD_DIR and NO_LIBZSTD defines

2019-02-25 Thread Alexey Budankov



Implement libzstd feature check, NO_LIBZSTD and LIBZSTD_DIR defines
to override Zstd library sources or disable the feature from the
command line:

  $ make -C tools/perf LIBZSTD_DIR=/path/to/zstd/sources/ clean all
  $ make -C tools/perf NO_LIBZSTD=1 clean all

Auto detection feature status is reported just before compilation starts.
If your system has some version of the zstd library preinstalled then
the build system finds and uses it during the build.

If you still prefer to compile with some other version of zstd library
that is not preinstalled you have capability to refer the compilation 
to that version using LIBZSTD_DIR define.

Signed-off-by: Alexey Budankov 
---
 tools/build/Makefile.feature   |  6 --
 tools/build/feature/Makefile   |  6 +-
 tools/build/feature/test-all.c |  5 +
 tools/build/feature/test-libzstd.c | 12 
 tools/perf/Makefile.config | 20 
 tools/perf/Makefile.perf   |  3 +++
 6 files changed, 49 insertions(+), 3 deletions(-)
 create mode 100644 tools/build/feature/test-libzstd.c

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 61e46d54a67c..adf791cbd726 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -66,7 +66,8 @@ FEATURE_TESTS_BASIC :=  \
 sched_getcpu   \
 sdt\
 setns  \
-libaio
+libaio \
+libzstd
 
 # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
 # of all feature tests
@@ -118,7 +119,8 @@ FEATURE_DISPLAY ?=  \
  lzma   \
  get_cpuid  \
  bpf   \
- libaio
+ libaio\
+ libzstd
 
 # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
 # If in the future we need per-feature checks/flags for features not
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 7ceb4441b627..4b8244ee65ce 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -62,7 +62,8 @@ FILES=  \
  test-clang.bin\
  test-llvm.bin \
  test-llvm-version.bin \
- test-libaio.bin
+ test-libaio.bin   \
+ test-libzstd.bin
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
@@ -301,6 +302,9 @@ $(OUTPUT)test-clang.bin:
 $(OUTPUT)test-libaio.bin:
$(BUILD) -lrt
 
+$(OUTPUT)test-libzstd.bin:
+   $(BUILD) -lzstd
+
 ###
 
 clean:
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index e903b86b742f..b0dda7db2a17 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -178,6 +178,10 @@
 # include "test-reallocarray.c"
 #undef main
 
+#define main main_test_zstd
+# include "test-libzstd.c"
+#undef main
+
 int main(int argc, char *argv[])
 {
main_test_libpython();
@@ -219,6 +223,7 @@ int main(int argc, char *argv[])
main_test_setns();
main_test_libaio();
main_test_reallocarray();
+   main_test_libzstd();
 
return 0;
 }
diff --git a/tools/build/feature/test-libzstd.c 
b/tools/build/feature/test-libzstd.c
new file mode 100644
index ..55268c01b84d
--- /dev/null
+++ b/tools/build/feature/test-libzstd.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+
+int main(void)
+{
+   ZSTD_CStream*cstream;
+
+   cstream = ZSTD_createCStream();
+   ZSTD_freeCStream(cstream);
+
+   return 0;
+}
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 0f11d5891301..4949bdb16a66 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -152,6 +152,13 @@ endif
 FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
 FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) 
-lbabeltrace-ctf
 
+ifdef LIBZSTD_DIR
+  LIBZSTD_CFLAGS  := -I$(LIBZSTD_DIR)/lib
+  LIBZSTD_LDFLAGS := -L$(LIBZSTD_DIR)/lib
+endif
+FEATURE_CHECK_CFLAGS-libzstd := $(LIBZSTD_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libzstd := $(LIBZSTD_LDFLAGS)
+
 FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include 
-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
 # include ARCH specific config
 -include $(src-perf)/arch/$(SRCARCH)/Makefile
@@ -782,6 +789,19 @@ ifndef NO_LZMA
   endif
 endif
 
+ifndef NO_LIBZSTD
+  ifeq ($(feature-libzstd), 1)
+CFLAGS += -DHAVE_ZSTD_SUPPORT
+CFLAGS += $(LIBZSTD_CFLAGS)
+LDFLAGS += $(LIBZSTD_LDFLAGS)
+EXTLIBS += -lzstd
+$(call detected,CONFIG_ZSTD)
+  else
+msg := $(warning No libzstd found, disables trace compression, please 
install libzstd-dev[el] and/or set LIBZSTD_DIR);
+NO_LIBZSTD := 1
+  endif
+endif
+
 ifndef

[PATCH] pcie: qcom: Add support for sdm845 PCIe controller

2019-02-25 Thread Bjorn Andersson

The SDM845 has one Gen2 and one Gen3 controller, add support for these.

Due to lack of hardware only the Gen2 controller has been verified.

Signed-off-by: Bjorn Andersson 
---
 .../devicetree/bindings/pci/qcom,pcie.txt |  19 +++
 drivers/pci/controller/dwc/pcie-qcom.c| 146 ++
 2 files changed, 165 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie.txt 
b/Documentation/devicetree/bindings/pci/qcom,pcie.txt
index 1fd703bd73e0..2cf92ed39499 100644
--- a/Documentation/devicetree/bindings/pci/qcom,pcie.txt
+++ b/Documentation/devicetree/bindings/pci/qcom,pcie.txt
@@ -10,6 +10,7 @@
- "qcom,pcie-msm8996" for msm8996 or apq8096
- "qcom,pcie-ipq4019" for ipq4019
- "qcom,pcie-ipq8074" for ipq8074
+   - "qcom,pcie-sdm845" for sdm845
 
 - reg:
Usage: required
@@ -116,6 +117,18 @@
- "ahb" AHB clock
- "aux" Auxiliary clock
 
+- clock-names:
+   Usage: required for sdm845
+   Value type: 
+   Definition: Should contain the following entries
+   - "aux" Auxiliary clock
+   - "cfg" Configuration clock
+   - "bus_master"  Master AXI clock
+   - "bus_slave"   Slave AXI clock
+   - "slave_q2a"   Slave Q2A clock
+   - "tbu" PCIe TBU clock
+   - "pipe"PIPE clock
+
 - resets:
Usage: required
Value type: 
@@ -167,6 +180,12 @@
- "ahb" AHB Reset
- "axi_m_sticky"AXI Master Sticky reset
 
+- reset-names:
+   Usage: required for sdm845
+   Value type: 
+   Definition: Should contain the following entries
+   - "pci" PCIe core reset
+
 - power-domains:
Usage: required for apq8084 and msm8996/apq8096
Value type: 
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c 
b/drivers/pci/controller/dwc/pcie-qcom.c
index d185ea5fe996..5147454a6ae5 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -54,6 +54,7 @@
 #define PCIE20_PARF_LTSSM  0x1B0
 #define PCIE20_PARF_SID_OFFSET 0x234
 #define PCIE20_PARF_BDF_TRANSLATE_CFG  0x24C
+#define PCIE20_PARF_DEVICE_TYPE0x1000
 
 #define PCIE20_ELBI_SYS_CTRL   0x04
 #define PCIE20_ELBI_SYS_CTRL_LT_ENABLE BIT(0)
@@ -80,6 +81,8 @@
 #define PCIE20_v3_PARF_SLV_ADDR_SPACE_SIZE 0x358
 #define SLV_ADDR_SPACE_SZ  0x1000
 
+#define DEVICE_TYPE_RC 0x4
+
 #define QCOM_PCIE_2_1_0_MAX_SUPPLY 3
 struct qcom_pcie_resources_2_1_0 {
struct clk *iface_clk;
@@ -139,12 +142,21 @@ struct qcom_pcie_resources_2_3_3 {
struct reset_control *rst[7];
 };
 
+struct qcom_pcie_resources_2_7_0 {
+   struct clk_bulk_data clks[6];
+   struct regulator_bulk_data supplies[2];
+
+   struct reset_control *pci_reset;
+   struct clk *pipe_clk;
+};
+
 union qcom_pcie_resources {
struct qcom_pcie_resources_1_0_0 v1_0_0;
struct qcom_pcie_resources_2_1_0 v2_1_0;
struct qcom_pcie_resources_2_3_2 v2_3_2;
struct qcom_pcie_resources_2_3_3 v2_3_3;
struct qcom_pcie_resources_2_4_0 v2_4_0;
+   struct qcom_pcie_resources_2_7_0 v2_7_0;
 };
 
 struct qcom_pcie;
@@ -1076,6 +1088,129 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie)
return ret;
 }
 
+static int qcom_pcie_get_resources_2_7_0(struct qcom_pcie *pcie)
+{
+   struct qcom_pcie_resources_2_7_0 *res = >res.v2_7_0;
+   struct dw_pcie *pci = pcie->pci;
+   struct device *dev = pci->dev;
+   int ret;
+
+   res->pci_reset = devm_reset_control_get_exclusive(dev, "pci");
+   if (IS_ERR(res->pci_reset))
+   return PTR_ERR(res->pci_reset);
+
+   res->supplies[0].supply = "vdda";
+   res->supplies[1].supply = "vddpe-3v3";
+   ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(res->supplies),
+ res->supplies);
+   if (ret)
+   return ret;
+
+   res->clks[0].id = "aux";
+   res->clks[1].id = "cfg";
+   res->clks[2].id = "bus_master";
+   res->clks[3].id = "bus_slave";
+   res->clks[4].id = "slave_q2a";
+   res->clks[5].id = "tbu";
+
+   ret = devm_clk_bulk_get(dev, ARRAY_SIZE(res->clks), res->clks);
+   if (ret < 0)
+   return ret;
+
+   res->pipe_clk = devm_clk_get(dev, "pipe");
+   return PTR_ERR_OR_ZERO(res->pipe_clk);
+}
+
+static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
+{
+   struct qcom_pcie_resources_2_7_0 *res = >res.v2_7_0;
+   struct dw_pcie *pci = pcie->pci;
+   struct device *dev = pci->dev;
+   u32

[PATCH v3 2/2] usb: dwc3: Add workaround for host mode VBUS glitch when boot

2019-02-25 Thread Ran Wang

When DWC3 is set to host mode by programming register DWC3_GCTL, VBUS
(or its control signal) will be turned on immediately on related Root Hub
ports. Then, the VBUS is turned off for a little while(15us) when do xhci
reset (conducted by xhci driver) and back to normal finally, we can
observe a negative glitch of related signal happen.

This VBUS glitch might cause some USB devices enumeration fail if kernel
boot with them connected. Such as LS1012AFWRY/LS1043ARDB/LX2160AQDS
/LS1088ARDB with Kingston 16GB USB2.0/Kingston USB3.0/JetFlash Transcend
4GB USB2.0 drives. The fail cases include enumerated as full-speed device
or report wrong device descriptor, etc.

One SW workaround which can fix this is by programing all xhci PORTSC[PP]
to 0 to turn off VBUS immediately after setting host mode in DWC3 driver
(per signal measurement result, it will be too late to do it in
xhci-plat.c or xhci.c). Then, after xhci reset complete in xhci driver,
PORTSC[PP]s' value will back to 1 automatically and VBUS on at that time,
no glitch happen and normal enumeration process has no impact.

Signed-off-by: Ran Wang 
---
Changes in v3:
  - Add macro PORT_REGS_SIZE to replace 0x10
  - Change initial value of i to 0 for the for loop
  - Cosmetic changes

Changes in v2:
  - Rename related property to 'snps,host-vbus-glitches'
  - Rename related dwc member to 'host_vbus_glitches'
  - Add member 'host_vbus_glitches' description in 'dwc3'

 drivers/usb/dwc3/core.c |   48 +++
 drivers/usb/dwc3/core.h |   12 +++
 2 files changed, 60 insertions(+), 0 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index a1b126f..dd80e3d 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -100,6 +100,42 @@ static int dwc3_get_dr_mode(struct dwc3 *dwc)
return 0;
 }
 
+/*
+ * dwc3_power_of_all_roothub_ports - Power off all Root hub ports
+ * @dwc3: Pointer to our controller context structure
+ */
+static void dwc3_power_off_all_roothub_ports(struct dwc3 *dwc)
+{
+   int i, port_num;
+   u32 reg, op_regs_base, offset;
+   void __iomem*xhci_regs;
+
+   /* xhci regs is not mapped yet, do it temperary here */
+   if (dwc->xhci_resources[0].start) {
+   xhci_regs = ioremap(dwc->xhci_resources[0].start,
+   DWC3_XHCI_REGS_END);
+   if (IS_ERR(xhci_regs)) {
+   dev_err(dwc->dev, "Failed to ioremap xhci_regs\n");
+   return;
+   }
+
+   op_regs_base = HC_LENGTH(readl(xhci_regs));
+   reg = readl(xhci_regs + XHCI_HCSPARAMS1);
+   port_num = HCS_MAX_PORTS(reg);
+
+   for (i = 0; i < port_num; i++) {
+   offset = op_regs_base + XHCI_PORTSC_BASE +
+   PORT_REGS_SIZE * i;
+   reg = readl(xhci_regs + offset);
+   reg &= ~PORT_POWER;
+   writel(reg, xhci_regs + offset);
+   }
+
+   iounmap(xhci_regs);
+   } else
+   dev_err(dwc->dev, "xhci base reg invalid\n");
+}
+
 void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode)
 {
u32 reg;
@@ -109,6 +145,15 @@ void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode)
reg |= DWC3_GCTL_PRTCAPDIR(mode);
dwc3_writel(dwc->regs, DWC3_GCTL, reg);
 
+   /*
+* We have to power off all Root hub ports immediately after DWC3 set
+* to host mode to avoid VBUS glitch happen when xhci get reset later.
+*/
+   if (dwc->host_vbus_glitches) {
+   if (mode == DWC3_GCTL_PRTCAP_HOST)
+   dwc3_power_off_all_roothub_ports(dwc);
+   }
+
dwc->current_dr_role = mode;
 }
 
@@ -1306,6 +1351,9 @@ static void dwc3_get_properties(struct dwc3 *dwc)
dwc->dis_metastability_quirk = device_property_read_bool(dev,
"snps,dis_metastability_quirk");
 
+   dwc->host_vbus_glitches = device_property_read_bool(dev,
+   "snps,host-vbus-glitches");
+
dwc->lpm_nyet_threshold = lpm_nyet_threshold;
dwc->tx_de_emphasis = tx_de_emphasis;
 
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index df87641..c2dee0b 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -606,6 +606,15 @@
 #define DWC3_OSTS_VBUSVLD  BIT(1)
 #define DWC3_OSTS_CONIDSTS BIT(0)
 
+/* Partial XHCI Register and Bit fields for quirk */
+#define XHCI_HCSPARAMS10x4
+#define XHCI_PORTSC_BASE   0x400
+#define PORT_REGS_SIZE 0x10
+#define PORT_POWER (1 << 9)
+#define HCS_MAX_PORTS(p)   (((p) >> 24) & 0x7f)
+#define XHCI_HC_LENGTH(p)  (((p)>>00)&0x00ff)
+#define HC_LENGTH(p)   XHCI_HC_LENGTH(p)
+
 /* Structures */
 
 struct dwc3_trb;
@@ -1024,6 +1033,8 @@ struct dwc3_scratchpad_array {
  * 2

[PATCH v3 1/2] usb: dwc3: Add avoiding vbus glitch happen during xhci reset

2019-02-25 Thread Ran Wang

When DWC3 is set to host mode by programming register DWC3_GCTL, VBUS
(or its control signal) will turn on immediately on related Root Hub
ports. Then the VBUS will be de-asserted for a little while during xhci
reset (conducted by xhci driver) for a little while and back to normal.

This VBUS glitch might cause some USB devices emuration fail if kernel
boot with them connected. One SW workaround which can fix this is to
program all PORTSC[PP] to 0 to turn off VBUS immediately after setting
host mode in DWC3 driver(per signal measurement result, it will be too
late to do it in xhci-plat.c or xhci.c).

Signed-off-by: Ran Wang 
---
Changes in v3:
  - None

Changes in v2:
  - Correct typos
  - Shorten the name to snps,host-vbus-glitches

 Documentation/devicetree/bindings/usb/dwc3.txt |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt 
b/Documentation/devicetree/bindings/usb/dwc3.txt
index 8e5265e..453f562 100644
--- a/Documentation/devicetree/bindings/usb/dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/dwc3.txt
@@ -106,6 +106,9 @@ Optional properties:
When just one value, which means INCRX burst mode 
enabled. When
more than one value, which means undefined length INCR 
burst type
enabled. The values can be 1, 4, 8, 16, 32, 64, 128 and 
256.
+ - snps,host-vbus-glitches: Power off all Root Hub ports immediately after
+   setting host mode to avoid vbus (negative) glitch 
happen in later
+   xhci reset. And the vbus will back to 5V automatically 
when reset done.
 
  - in addition all properties from usb-xhci.txt from the current directory are
supported as well
-- 
1.7.1

Re: [PATCH v2 26/26] userfaultfd: selftests: add write-protect test

2019-02-25 Thread Mike Rapoport

On Tue, Feb 12, 2019 at 10:56:32AM +0800, Peter Xu wrote:
> This patch adds uffd tests for write protection.
> 
> Instead of introducing new tests for it, let's simply squashing uffd-wp
> tests into existing uffd-missing test cases.  Changes are:
> 
> (1) Bouncing tests
> 
>   We do the write-protection in two ways during the bouncing test:
> 
>   - By using UFFDIO_COPY_MODE_WP when resolving MISSING pages: then
> we'll make sure for each bounce process every single page will be
> at least fault twice: once for MISSING, once for WP.
> 
>   - By direct call UFFDIO_WRITEPROTECT on existing faulted memories:
> To further torture the explicit page protection procedures of
> uffd-wp, we split each bounce procedure into two halves (in the
> background thread): the first half will be MISSING+WP for each
> page as explained above.  After the first half, we write protect
> the faulted region in the background thread to make sure at least
> half of the pages will be write protected again which is the first
> half to test the new UFFDIO_WRITEPROTECT call.  Then we continue
> with the 2nd half, which will contain both MISSING and WP faulting
> tests for the 2nd half and WP-only faults from the 1st half.
> 
> (2) Event/Signal test
> 
>   Mostly previous tests but will do MISSING+WP for each page.  For
>   sigbus-mode test we'll need to provide standalone path to handle the
>   write protection faults.
> 
> For all tests, do statistics as well for uffd-wp pages.
> 
> Signed-off-by: Peter Xu 
> ---
>  tools/testing/selftests/vm/userfaultfd.c | 154 ++-
>  1 file changed, 126 insertions(+), 28 deletions(-)
> 
> diff --git a/tools/testing/selftests/vm/userfaultfd.c 
> b/tools/testing/selftests/vm/userfaultfd.c
> index e5d12c209e09..57b5ac02080a 100644
> --- a/tools/testing/selftests/vm/userfaultfd.c
> +++ b/tools/testing/selftests/vm/userfaultfd.c
> @@ -56,6 +56,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #include "../kselftest.h"
> 
> @@ -78,6 +79,8 @@ static int test_type;
>  #define ALARM_INTERVAL_SECS 10
>  static volatile bool test_uffdio_copy_eexist = true;
>  static volatile bool test_uffdio_zeropage_eexist = true;
> +/* Whether to test uffd write-protection */
> +static bool test_uffdio_wp = false;
> 
>  static bool map_shared;
>  static int huge_fd;
> @@ -92,6 +95,7 @@ pthread_attr_t attr;
>  struct uffd_stats {
>   int cpu;
>   unsigned long missing_faults;
> + unsigned long wp_faults;
>  };
> 
>  /* pthread_mutex_t starts at page offset 0 */
> @@ -141,9 +145,29 @@ static void uffd_stats_reset(struct uffd_stats 
> *uffd_stats,
>   for (i = 0; i < n_cpus; i++) {
>   uffd_stats[i].cpu = i;
>   uffd_stats[i].missing_faults = 0;
> + uffd_stats[i].wp_faults = 0;
>   }
>  }
> 
> +static void uffd_stats_report(struct uffd_stats *stats, int n_cpus)
> +{
> + int i;
> + unsigned long long miss_total = 0, wp_total = 0;
> +
> + for (i = 0; i < n_cpus; i++) {
> + miss_total += stats[i].missing_faults;
> + wp_total += stats[i].wp_faults;
> + }
> +
> + printf("userfaults: %llu missing (", miss_total);
> + for (i = 0; i < n_cpus; i++)
> + printf("%lu+", stats[i].missing_faults);
> + printf("\b), %llu wp (", wp_total);
> + for (i = 0; i < n_cpus; i++)
> + printf("%lu+", stats[i].wp_faults);
> + printf("\b)\n");
> +}
> +
>  static int anon_release_pages(char *rel_area)
>  {
>   int ret = 0;
> @@ -264,19 +288,15 @@ struct uffd_test_ops {
>   void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
>  };
> 
> -#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
> -  (1 << _UFFDIO_COPY) | \
> -  (1 << _UFFDIO_ZEROPAGE))
> -
>  static struct uffd_test_ops anon_uffd_test_ops = {
> - .expected_ioctls = ANON_EXPECTED_IOCTLS,
> + .expected_ioctls = UFFD_API_RANGE_IOCTLS,
>   .allocate_area  = anon_allocate_area,
>   .release_pages  = anon_release_pages,
>   .alias_mapping = noop_alias_mapping,
>  };
> 
>  static struct uffd_test_ops shmem_uffd_test_ops = {
> - .expected_ioctls = ANON_EXPECTED_IOCTLS,
> + .expected_ioctls = UFFD_API_RANGE_IOCTLS,

Isn't UFFD_API_RANGE_IOCTLS includes UFFDIO_WP which is not supported for
shmem?

>   .allocate_area  = shmem_allocate_area,
>   .release_pages  = shmem_release_pages,
>   .alias_mapping = noop_alias_mapping,

...

-- 
Sincerely yours,
Mike.

[PATCH] phy: qcom: qmp: Add SDM845 PCIe QMP PHY support

2019-02-25 Thread Bjorn Andersson

qcom_qmp_phy_init() is extended to support the additional register
writes needed in PCS MISC and the appropriate sequences and resources
are defined for SDM845.

Signed-off-by: Bjorn Andersson 
---
 .../devicetree/bindings/phy/qcom-qmp-phy.txt  |   7 +
 drivers/phy/qualcomm/phy-qcom-qmp.c   | 160 ++
 drivers/phy/qualcomm/phy-qcom-qmp.h   |  12 ++
 3 files changed, 179 insertions(+)

diff --git a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt 
b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
index 5d181fc3cc18..dd2725a9d3f7 100644
--- a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
+++ b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
@@ -11,6 +11,7 @@ Required properties:
   "qcom,msm8996-qmp-usb3-phy" for 14nm USB3 phy on msm8996,
   "qcom,msm8998-qmp-usb3-phy" for USB3 QMP V3 phy on msm8998,
   "qcom,msm8998-qmp-ufs-phy" for UFS QMP phy on msm8998,
+  "qcom,sdm845-qmp-pcie-phy" for PCIe phy on sdm845,
   "qcom,sdm845-qmp-usb3-phy" for USB3 QMP V3 phy on sdm845,
   "qcom,sdm845-qmp-usb3-uni-phy" for USB3 QMP V3 UNI phy on sdm845,
   "qcom,sdm845-qmp-ufs-phy" for UFS QMP phy on sdm845.
@@ -48,6 +49,10 @@ Required properties:
"aux", "cfg_ahb", "ref".
For "qcom,msm8998-qmp-ufs-phy" must contain:
"ref", "ref_aux".
+   For "qcom,sdm845-qmp-usb3-phy" must contain:
+   "aux", "cfg_ahb", "ref", "refgen".
+   For "qcom,sdm845-qmp-usb3-phy" must contain:
+   "aux", "cfg_ahb", "ref", "com_aux".
For "qcom,sdm845-qmp-usb3-phy" must contain:
"aux", "cfg_ahb", "ref", "com_aux".
For "qcom,sdm845-qmp-usb3-uni-phy" must contain:
@@ -70,6 +75,8 @@ Required properties:
For "qcom,msm8998-qmp-usb3-phy" must contain
"phy", "common".
For "qcom,msm8998-qmp-ufs-phy": no resets are listed.
+   For "qcom,sdm845-qmp-pcie-phy" must contain:
+   "phy".
For "qcom,sdm845-qmp-usb3-phy" must contain:
"phy", "common".
For "qcom,sdm845-qmp-usb3-uni-phy" must contain:
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c 
b/drivers/phy/qualcomm/phy-qcom-qmp.c
index 08d6f6f7f039..c5ca4a217439 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.c
@@ -159,6 +159,11 @@ static const unsigned int qmp_v3_usb3phy_regs_layout[] = {
[QPHY_PCS_LFPS_RXTERM_IRQ_STATUS] = 0x170,
 };
 
+static const unsigned int sdm845_pciephy_regs_layout[] = {
+   [QPHY_START_CTRL]   = 0x08,
+   [QPHY_PCS_READY_STATUS] = 0x174,
+};
+
 static const unsigned int sdm845_ufsphy_regs_layout[] = {
[QPHY_START_CTRL]   = 0x00,
[QPHY_PCS_READY_STATUS] = 0x160,
@@ -392,6 +397,109 @@ static const struct qmp_phy_init_tbl 
ipq8074_pcie_pcs_tbl[] = {
QMP_PHY_INIT_CFG_L(QPHY_START_CTRL, 0x3),
 };
 
+static const struct qmp_phy_init_tbl sdm845_pcie_serdes_tbl[] = {
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN, 0x14),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_CLK_SELECT, 0x30),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_IVCO, 0x007),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_CMN_CONFIG, 0x06),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP_EN, 0x01),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_RESETSM_CNTRL, 0x20),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE_MAP, 0x00),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE2_MODE0, 0x01),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE1_MODE0, 0xc9),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE_TIMER1, 0xff),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE_TIMER2, 0x3f),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_SVS_MODE_CLK_SEL, 0x01),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_CORE_CLK_EN, 0x00),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_CORECLK_DIV_MODE0, 0x0a),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_CLK_EP_DIV, 0x19),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_CLK_ENABLE1, 0x90),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_DEC_START_MODE0, 0x82),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_DIV_FRAC_START3_MODE0, 0x02),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_DIV_FRAC_START2_MODE0, 0xea),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_DIV_FRAC_START1_MODE0, 0xab),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP3_MODE0, 0x00),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP2_MODE0, 0x0d),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP1_MODE0, 0x04),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_HSCLK_SEL, 0x00),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_CP_CTRL_MODE0, 0x06),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_RCTRL_MODE0, 0x16),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_CCTRL_MODE0, 0x36),
+   QMP_PHY_INIT_CFG(QSERDES_V3_COM_CMN_MODE, 0x01),
+

Re: [PATCH 08/11] powercap/intel_rapl: Support multi-die/package

2019-02-25 Thread Zhang Rui

On 一, 2019-02-25 at 23:41 -0500, Len Brown wrote:
> On Thu, Feb 21, 2019 at 12:44 AM Len Brown  wrote:
> > 
> > 
> > On Wed, Feb 20, 2019 at 6:02 AM Peter Zijlstra  > g> wrote:
> > 
> > > 
> > > > 
> > > >   list_for_each_entry(rp, _packages, plist) {
> > > > @@ -1457,7 +1457,7 @@ static void rapl_remove_package(struct
> > > > rapl_package *rp)
> > > >  /* called from CPU hotplug notifier, hotplug lock held */
> > > >  static struct rapl_package *rapl_add_package(int cpu)
> > > >  {
> > > > - int id = topology_physical_package_id(cpu);
> > > > + int id = topology_unique_die_id(cpu);
> > > >   struct rapl_package *rp;
> > > >   int ret;
> > > And now your new function names are misnomers.
> > That is fair.
> > 
> > Seems that a subsequent re-name-only patch is appropriate.
> I'm not sure that re-naming these functions is a good idea.
> 
> Fundamentally, the reason stems from the SDM being in-consistent.
> And the reason that the SDM is inconsistent is for compatibility.
> 
> ie. the PACKAGE MSRs in the SDM are still called PACKAGE MSRs,
> even though on a multi-die system, they are DIE scoped.
> There is no plan to re-name all of those MSRs.
> 
> And so what do you call a routine that parses a PACKAGE_RAPL domain?
> Well, it is still called PACKAGE MSR, even though the code is smart
> enough
> to know that on a multi-die system, its scope is die-scoped, not
> package-scoped.
> 
Agreed.

rapl_add_package() actually adds a package RAPL domain, and "package
RAPL domain" comes from SDM, which is used to describe the RAPL domain
that uses the package MSRs.

IMO, we can keep using "package RAPL domain" as the name of this
certain kind of RAPL domains, but just stop aligning it with the cpu
physical package.
Actually, my next patch fixes the places that had this assumption.
In short, "package domain foo" is okay, but "domain for package X"
should be avoided.

thanks,
rui

> And yes, just to confuse things, there WILL be PACKAGE scope MSRs
> in the future that span multiple die on multi-die systems.  No, it
> will not
> be a surprise when they appear -- by definition, they will be
> different
> and incompatible with previous PACKAGE MSRs.  We will need to update
> some software to be smart about handling them -- no blind assumptions
> on using the word "package" in this context.
> 
> So unless Rui disagrees, I'm inclined to leave these routine names
> alone.
> 
> thanks,
> Len Brown, Intel Open Source Technology Center

Re: [PATCH v2 24/26] userfaultfd: wp: UFFDIO_REGISTER_MODE_WP documentation update

2019-02-25 Thread Peter Xu

On Mon, Feb 25, 2019 at 11:19:32PM +0200, Mike Rapoport wrote:
> On Tue, Feb 12, 2019 at 10:56:30AM +0800, Peter Xu wrote:
> > From: Martin Cracauer 
> > 
> > Adds documentation about the write protection support.
> > 
> > Signed-off-by: Andrea Arcangeli 
> > [peterx: rewrite in rst format; fixups here and there]
> > Signed-off-by: Peter Xu 
> 
> Reviewed-by: Mike Rapoport 
> 
> Peter, can you please also update the man pages (1, 2)?
> 
> [1] http://man7.org/linux/man-pages/man2/userfaultfd.2.html
> [2] http://man7.org/linux/man-pages/man2/ioctl_userfaultfd.2.html

Sure.  Should I post the man patches after the kernel part is merged?

Thanks,

-- 
Peter Xu

Re: [PATCH v2 25/26] userfaultfd: selftests: refactor statistics

2019-02-25 Thread Mike Rapoport

On Tue, Feb 12, 2019 at 10:56:31AM +0800, Peter Xu wrote:
> Introduce uffd_stats structure for statistics of the self test, at the
> same time refactor the code to always pass in the uffd_stats for either
> read() or poll() typed fault handling threads instead of using two
> different ways to return the statistic results.  No functional change.
> 
> With the new structure, it's very easy to introduce new statistics.
> 
> Signed-off-by: Peter Xu 

Reviewed-by: Mike Rapoport 

> ---
>  tools/testing/selftests/vm/userfaultfd.c | 76 +++-
>  1 file changed, 49 insertions(+), 27 deletions(-)
> 
> diff --git a/tools/testing/selftests/vm/userfaultfd.c 
> b/tools/testing/selftests/vm/userfaultfd.c
> index 5d1db824f73a..e5d12c209e09 100644
> --- a/tools/testing/selftests/vm/userfaultfd.c
> +++ b/tools/testing/selftests/vm/userfaultfd.c
> @@ -88,6 +88,12 @@ static char *area_src, *area_src_alias, *area_dst, 
> *area_dst_alias;
>  static char *zeropage;
>  pthread_attr_t attr;
> 
> +/* Userfaultfd test statistics */
> +struct uffd_stats {
> + int cpu;
> + unsigned long missing_faults;
> +};
> +
>  /* pthread_mutex_t starts at page offset 0 */
>  #define area_mutex(___area, ___nr)   \
>   ((pthread_mutex_t *) ((___area) + (___nr)*page_size))
> @@ -127,6 +133,17 @@ static void usage(void)
>   exit(1);
>  }
> 
> +static void uffd_stats_reset(struct uffd_stats *uffd_stats,
> +  unsigned long n_cpus)
> +{
> + int i;
> +
> + for (i = 0; i < n_cpus; i++) {
> + uffd_stats[i].cpu = i;
> + uffd_stats[i].missing_faults = 0;
> + }
> +}
> +
>  static int anon_release_pages(char *rel_area)
>  {
>   int ret = 0;
> @@ -469,8 +486,8 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
>   return 0;
>  }
> 
> -/* Return 1 if page fault handled by us; otherwise 0 */
> -static int uffd_handle_page_fault(struct uffd_msg *msg)
> +static void uffd_handle_page_fault(struct uffd_msg *msg,
> +struct uffd_stats *stats)
>  {
>   unsigned long offset;
> 
> @@ -485,18 +502,19 @@ static int uffd_handle_page_fault(struct uffd_msg *msg)
>   offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
>   offset &= ~(page_size-1);
> 
> - return copy_page(uffd, offset);
> + if (copy_page(uffd, offset))
> + stats->missing_faults++;
>  }
> 
>  static void *uffd_poll_thread(void *arg)
>  {
> - unsigned long cpu = (unsigned long) arg;
> + struct uffd_stats *stats = (struct uffd_stats *)arg;
> + unsigned long cpu = stats->cpu;
>   struct pollfd pollfd[2];
>   struct uffd_msg msg;
>   struct uffdio_register uffd_reg;
>   int ret;
>   char tmp_chr;
> - unsigned long userfaults = 0;
> 
>   pollfd[0].fd = uffd;
>   pollfd[0].events = POLLIN;
> @@ -526,7 +544,7 @@ static void *uffd_poll_thread(void *arg)
>   msg.event), exit(1);
>   break;
>   case UFFD_EVENT_PAGEFAULT:
> - userfaults += uffd_handle_page_fault();
> + uffd_handle_page_fault(, stats);
>   break;
>   case UFFD_EVENT_FORK:
>   close(uffd);
> @@ -545,28 +563,27 @@ static void *uffd_poll_thread(void *arg)
>   break;
>   }
>   }
> - return (void *)userfaults;
> +
> + return NULL;
>  }
> 
>  pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
> 
>  static void *uffd_read_thread(void *arg)
>  {
> - unsigned long *this_cpu_userfaults;
> + struct uffd_stats *stats = (struct uffd_stats *)arg;
>   struct uffd_msg msg;
> 
> - this_cpu_userfaults = (unsigned long *) arg;
> - *this_cpu_userfaults = 0;
> -
>   pthread_mutex_unlock(_read_mutex);
>   /* from here cancellation is ok */
> 
>   for (;;) {
>   if (uffd_read_msg(uffd, ))
>   continue;
> - (*this_cpu_userfaults) += uffd_handle_page_fault();
> + uffd_handle_page_fault(, stats);
>   }
> - return (void *)NULL;
> +
> + return NULL;
>  }
> 
>  static void *background_thread(void *arg)
> @@ -582,13 +599,12 @@ static void *background_thread(void *arg)
>   return NULL;
>  }
> 
> -static int stress(unsigned long *userfaults)
> +static int stress(struct uffd_stats *uffd_stats)
>  {
>   unsigned long cpu;
>   pthread_t locking_threads[nr_cpus];
>   pthread_t uffd_threads[nr_cpus];
>   pthread_t background_threads[nr_cpus];
> - void **_userfaults = (void **) userfaults;
> 
>   finished = 0;
>   for (cpu = 0; cpu < nr_cpus; cpu++) {
> @@ -597,12 +613,13 @@ static int stress(unsigned long *userfaults)
>   return 1;
>   if (bounces & BOUNCE_POLL) {
>   if (pthread_create(_threads[cpu], ,
> -

INFO: rcu detected stall in ext4_file_write_iter

2019-02-25 Thread syzbot


Hello,

syzbot found the following crash on:

HEAD commit:8a61716ff2ab Merge tag 'ceph-for-5.0-rc8' of git://github...
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=161b71d4c0
kernel config:  https://syzkaller.appspot.com/x/.config?x=7132344728e7ec3f
dashboard link: https://syzkaller.appspot.com/bug?extid=7d19c5fe6a3f1161abb7
compiler:   gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=103908f8c0
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=105e5cd0c0

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+7d19c5fe6a3f1161a...@syzkaller.appspotmail.com

audit: type=1400 audit(1550814986.750:36): avc:  denied  { map } for   
pid=8058 comm="syz-executor004" path="/root/syz-executor004991115"  
dev="sda1" ino=1426 scontext=unconfined_u:system_r:insmod_t:s0-s0:c0.c1023  
tcontext=unconfined_u:object_r:user_home_t:s0 tclass=file permissive=1

hrtimer: interrupt took 42841 ns
rcu: INFO: rcu_preempt detected stalls on CPUs/tasks:
rcu:(detected by 1, t=10502 jiffies, g=5873, q=2)
rcu: All QSes seen, last rcu_preempt kthread activity 10502  
(4295059997-4295049495), jiffies_till_next_fqs=1, root ->qsmask 0x0

syz-executor004 R  running task26448  8069   8060 0x
Call Trace:
 
 sched_show_task kernel/sched/core.c:5306 [inline]
 sched_show_task.cold+0x292/0x30b kernel/sched/core.c:5281
 print_other_cpu_stall kernel/rcu/tree.c:1301 [inline]
 check_cpu_stall kernel/rcu/tree.c:1429 [inline]
 rcu_pending kernel/rcu/tree.c:3018 [inline]
 rcu_check_callbacks.cold+0xa10/0xa4a kernel/rcu/tree.c:2521
 update_process_times+0x32/0x80 kernel/time/timer.c:1635
 tick_sched_handle+0xa2/0x190 kernel/time/tick-sched.c:161
 tick_sched_timer+0x47/0x130 kernel/time/tick-sched.c:1271
 __run_hrtimer kernel/time/hrtimer.c:1389 [inline]
 __hrtimer_run_queues+0x33e/0xde0 kernel/time/hrtimer.c:1451
 hrtimer_interrupt+0x314/0x770 kernel/time/hrtimer.c:1509
 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1035 [inline]
 smp_apic_timer_interrupt+0x120/0x570 arch/x86/kernel/apic/apic.c:1060
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:807
 
RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x50 kernel/kcov.c:94
Code: e5 08 41 bc f4 ff ff ff e8 11 9f ea ff 48 c7 05 12 fb e5 08 00 00 00  
00 e9 c8 e9 ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 <55> 48 89 e5 48  
8b 75 08 65 48 8b 04 25 40 ee 01 00 65 8b 15 38 0c

RSP: 0018:888084ed72c0 EFLAGS: 0246 ORIG_RAX: ff13
RAX: dc00 RBX: 88809a2205c0 RCX: 81e7c36c
RDX: 111013444114 RSI: 81e7c37a RDI: 88809a2208a0
RBP: 888084ed7300 R08: 88808fa28240 R09: ed1015d25bd0
R10: ed1015d25bcf R11: 8880ae92de7b R12: 0002
R13:  R14:  R15: 0001
 __ext4_journal_start fs/ext4/ext4_jbd2.h:311 [inline]
 ext4_dirty_inode+0x64/0xc0 fs/ext4/inode.c:6081
 __mark_inode_dirty+0x919/0x1290 fs/fs-writeback.c:2155
 mark_inode_dirty include/linux/fs.h:2125 [inline]
 __generic_write_end+0x1b9/0x240 fs/buffer.c:2118
 generic_write_end+0x6c/0x90 fs/buffer.c:2163
 ext4_da_write_end+0x3c8/0xa50 fs/ext4/inode.c:3204
 generic_perform_write+0x2f0/0x530 mm/filemap.c:3219
 __generic_file_write_iter+0x25e/0x630 mm/filemap.c:
 ext4_file_write_iter+0x33f/0x1160 fs/ext4/file.c:266
 call_write_iter include/linux/fs.h:1863 [inline]
 do_iter_readv_writev+0x5e0/0x8e0 fs/read_write.c:680
 do_iter_write fs/read_write.c:956 [inline]
 do_iter_write+0x184/0x610 fs/read_write.c:937
 vfs_iter_write+0x77/0xb0 fs/read_write.c:969
 iter_file_splice_write+0x667/0xbe0 fs/splice.c:749
 do_splice_from fs/splice.c:851 [inline]
 direct_splice_actor+0x126/0x1a0 fs/splice.c:1023
 splice_direct_to_actor+0x369/0x970 fs/splice.c:978
 do_splice_direct+0x1da/0x2a0 fs/splice.c:1066
 do_sendfile+0x597/0xd00 fs/read_write.c:1436
 __do_sys_sendfile64 fs/read_write.c:1491 [inline]
 __se_sys_sendfile64 fs/read_write.c:1483 [inline]
 __x64_sys_sendfile64+0x15a/0x220 fs/read_write.c:1483
 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x446a59
Code: e8 dc e6 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7  
48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff  
ff 0f 83 4b 07 fc ff c3 66 2e 0f 1f 84 00 00 00 00

RSP: 002b:7fce454e5db8 EFLAGS: 0246 ORIG_RAX: 0028
RAX: ffda RBX: 006dcc28 RCX: 00446a59
RDX: 2000 RSI: 0003 RDI: 0003
RBP: 006dcc20 R08:  R09: 
R10: 8080fffe R11: 0246 R12: 006dcc2c
R13: 7ffd7bf04daf R14: 7fce454e69c0 R15: 20c49ba5e353f7cf
rcu: rcu_preempt kthread starved for 10502 jiffies! g5873 f0x2  
RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=1

rcu: RCU grace-period kthread stack dump:
rcu_preempt R

Re: [PATCH -mm -V8] mm, swap: fix race between swapoff and some swap operations

2019-02-25 Thread Huang, Ying

Hi, Daniel and Andrea,

"Huang, Ying"  writes:

> From: Huang Ying 
>
> When swapin is performed, after getting the swap entry information from
> the page table, system will swap in the swap entry, without any lock held
> to prevent the swap device from being swapoff.  This may cause the race
> like below,
>
> CPU 1 CPU 2
> - -
>   do_swap_page
> swapin_readahead
>   __read_swap_cache_async
> swapoff swapcache_prepare
>   p->swap_map = NULL  __swap_duplicate
> p->swap_map[?] /* !!! NULL pointer 
> access */
>
> Because swapoff is usually done when system shutdown only, the race may
> not hit many people in practice.  But it is still a race need to be fixed.
>
> To fix the race, get_swap_device() is added to check whether the specified
> swap entry is valid in its swap device.  If so, it will keep the swap
> entry valid via preventing the swap device from being swapoff, until
> put_swap_device() is called.
>
> Because swapoff() is very rare code path, to make the normal path runs
> as fast as possible, rcu_read_lock/unlock() and synchronize_rcu()
> instead of reference count is used to implement get/put_swap_device().
> From get_swap_device() to put_swap_device(), RCU reader side is
> locked, so synchronize_rcu() in swapoff() will wait until
> put_swap_device() is called.
>
> In addition to swap_map, cluster_info, etc. data structure in the struct
> swap_info_struct, the swap cache radix tree will be freed after swapoff,
> so this patch fixes the race between swap cache looking up and swapoff
> too.
>
> Races between some other swap cache usages and swapoff are fixed too
> via calling synchronize_rcu() between clearing PageSwapCache() and
> freeing swap cache data structure.
>
> Fixes: 235b62176712 ("mm/swap: add cluster lock")
> Signed-off-by: "Huang, Ying" 
> Not-Nacked-by: Hugh Dickins 
> Cc: Paul E. McKenney 
> Cc: Minchan Kim 
> Cc: Johannes Weiner 
> Cc: Tim Chen 
> Cc: Mel Gorman 
> Cc: Jérôme Glisse 
> Cc: Michal Hocko 
> Cc: Andrea Arcangeli 
> Cc: David Rientjes 
> Cc: Rik van Riel 
> Cc: Jan Kara 
> Cc: Dave Jiang 
> Cc: Aaron Lu 
> Cc: Daniel Jordan 
> Cc: Andrea Parri 
>
> Changelog:
>
> v8:
>
> - Use swp_swap_info() to cleanup the code per Daniel's comments
>
> - Use rcu_read_lock/unlock and synchronize_rcu() per Andrea
>   Arcangeli's comments
>
> - Added Fixes tag per Michal Hocko's comments

Do you have time to take a look at this patch?

Best Regards,
Huang, Ying

> v7:
>
> - Rebased on patch: "mm, swap: bounds check swap_info accesses to avoid NULL 
> derefs"
>
> v6:
>
> - Add more comments to get_swap_device() to make it more clear about
>   possible swapoff or swapoff+swapon.
>
> v5:
>
> - Replace RCU with stop_machine()
>
> v4:
>
> - Use synchronize_rcu() in enable_swap_info() to reduce overhead of
>   normal paths further.
>
> v3:
>
> - Re-implemented with RCU to reduce the overhead of normal paths
>
> v2:
>
> - Re-implemented with SRCU to reduce the overhead of normal paths.
>
> - Avoid to check whether the swap device has been swapoff in
>   get_swap_device().  Because we can check the origin of the swap
>   entry to make sure the swap device hasn't bee swapoff.
> ---
>  include/linux/swap.h |  13 +++-
>  mm/memory.c  |   2 +-
>  mm/swap_state.c  |  16 -
>  mm/swapfile.c| 148 +--
>  4 files changed, 140 insertions(+), 39 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 649529be91f2..f2ddaf299e15 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -175,8 +175,9 @@ enum {
>   SWP_PAGE_DISCARD = (1 << 10),   /* freed swap page-cluster discards */
>   SWP_STABLE_WRITES = (1 << 11),  /* no overwrite PG_writeback pages */
>   SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */
> + SWP_VALID   = (1 << 13),/* swap is valid to be operated on? */
>   /* add others here before... */
> - SWP_SCANNING= (1 << 13),/* refcount in scan_swap_map */
> + SWP_SCANNING= (1 << 14),/* refcount in scan_swap_map */
>  };
>  
>  #define SWAP_CLUSTER_MAX 32UL
> @@ -460,7 +461,7 @@ extern unsigned int count_swap_pages(int, int);
>  extern sector_t map_swap_page(struct page *, struct block_device **);
>  extern sector_t swapdev_block(int, pgoff_t);
>  extern int page_swapcount(struct page *);
> -extern int __swap_count(struct swap_info_struct *si, swp_entry_t entry);
> +extern int __swap_count(swp_entry_t entry);
>  extern int __swp_swapcount(swp_entry_t entry);
>  extern int swp_swapcount(swp_entry_t entry);
>  extern struct swap_info_struct *page_swap_info(struct page *);
> @@ -470,6 +471,12 @@ extern int try_to_free_swap(struct page *);
>  struct backing_dev_info;

Re: Linux-next 20190218: am57xx-evm: mmc1: ADMA error

2019-02-25 Thread Faiz Abbas

Hi Ming Lei,

On 26/02/19 7:11 AM, Ming Lei wrote:
> On Mon, Feb 25, 2019 at 9:14 PM Faiz Abbas  wrote:
>>
>> Hi Naresh,
>>
>> + Commit authors.
>>
>> On 19/02/19 6:38 PM, Faiz Abbas wrote:
>>> Hi Naresh,
>>>
>>> On 18/02/19 6:57 PM, Naresh Kamboju wrote:
 Do you see this error on am57xx-evm running Linux next 20190218 ?
 I have tested on multiple devices and found this error.
 Please find the full boot log [1].
 Am i missing any pre required configs [2] ?

 [5.620263] mmc1: ADMA error
 [5.623266] mmc1: sdhci:  SDHCI REGISTER DUMP ===
 [5.629740] mmc1: sdhci: Sys addr:  0x | Version:  0x3302
 [5.636215] mmc1: sdhci: Blk size:  0x0200 | Blk cnt:  0x
 [5.642690] mmc1: sdhci: Argument:  0x002cec70 | Trn mode: 0x0033
 [5.649162] mmc1: sdhci: Present:   0x01f0 | Host ctl: 0x0010
 [5.655634] mmc1: sdhci: Power: 0x000f | Blk gap:  0x
 [5.662108] mmc1: sdhci: Wake-up:   0x | Clock:0x0107
 [5.668582] mmc1: sdhci: Timeout:   0x000c | Int stat: 0x
 [5.675055] mmc1: sdhci: Int enab:  0x027f000b | Sig enab: 0x027f000b
 [5.681529] mmc1: sdhci: ACmd stat: 0x | Slot int: 0x
 [5.688002] mmc1: sdhci: Caps:  0x21e90080 | Caps_1:   0x0f77
 [5.694474] mmc1: sdhci: Cmd:   0x123a | Max curr: 0x
 [5.700949] mmc1: sdhci: Resp[0]:   0x0900 | Resp[1]:  0xffef
 [5.707423] mmc1: sdhci: Resp[2]:   0x0f5903ff | Resp[3]:  0xd04f0132
 [5.713896] mmc1: sdhci: Host ctl2: 0x0004
 [5.718364] mmc1: sdhci: ADMA Err:  0x0007 | ADMA Ptr: 0xab868218

>>>
>>> I see this as well on my setup. Trying to bisect now. Will keep you posted.
>>
>>
>> Reverting the following commit fixes this.
>> commit 07173c3ec276cbb18dc0e0687d37d310e98a1480
>> Author: Ming Lei 
>> Date:   Fri Feb 15 19:13:20 2019 +0800
>>
>> block: enable multipage bvecs
>>
>> This patch pulls the trigger for multi-page bvecs.
>>
>> Reviewed-by: Omar Sandoval 
>> Signed-off-by: Ming Lei 
>> Signed-off-by: Jens Axboe 
> 
> Hi,
> 
> Thanks for your report & bisect.
> 
> Could you test the following patch?
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git/commit/?h=for-5.1/block=8f4e80da764ec1ca44c83f3e17dbc9bf0209bccc
> 
> Or  simply run the latest -next?

That didn't fix it for me. Still see ADMA error.

[   13.126186] mmc0: ADMA error
[   13.129084] mmc0: sdhci:  SDHCI REGISTER DUMP ===
[   13.135552] mmc0: sdhci: Sys addr:  0x | Version:  0x3302
[   13.142019] mmc0: sdhci: Blk size:  0x0200 | Blk cnt:  0x
[   13.148485] mmc0: sdhci: Argument:  0x0089 | Trn mode: 0x0033
[   13.154952] mmc0: sdhci: Present:   0x | Host ctl: 0x0012
[   13.161418] mmc0: sdhci: Power: 0x000f | Blk gap:  0x
[   13.167885] mmc0: sdhci: Wake-up:   0x | Clock:0x0107
[   13.174351] mmc0: sdhci: Timeout:   0x000a | Int stat: 0x
[   13.180817] mmc0: sdhci: Int enab:  0x027f000b | Sig enab: 0x027f000b
[   13.187282] mmc0: sdhci: ACmd stat: 0x | Slot int: 0x
[   13.193748] mmc0: sdhci: Caps:  0x25e90080 | Caps_1:   0x0f77
[   13.200215] mmc0: sdhci: Cmd:   0x123a | Max curr: 0x
[   13.206682] mmc0: sdhci: Resp[0]:   0x0900 | Resp[1]:  0x3b377f80
[   13.213148] mmc0: sdhci: Resp[2]:   0x5b59 | Resp[3]:  0x400e0032
[   13.219613] mmc0: sdhci: Host ctl2: 0x
[   13.224073] mmc0: sdhci: ADMA Err:  0x0007 | ADMA Ptr: 0xae857288
[   13.230538] mmc0: sdhci: 

Full Log:

https://pastebin.ubuntu.com/p/4yGqgJCGZQ/

Thanks,
Faiz

Re: [PATCH v2 20/26] userfaultfd: wp: support write protection for userfault vma range

2019-02-25 Thread Mike Rapoport

On Tue, Feb 26, 2019 at 02:06:27PM +0800, Peter Xu wrote:
> On Mon, Feb 25, 2019 at 10:52:34PM +0200, Mike Rapoport wrote:
> > On Tue, Feb 12, 2019 at 10:56:26AM +0800, Peter Xu wrote:
> > > From: Shaohua Li 
> > > 
> > > Add API to enable/disable writeprotect a vma range. Unlike mprotect,
> > > this doesn't split/merge vmas.
> > > 
> > > Cc: Andrea Arcangeli 
> > > Cc: Rik van Riel 
> > > Cc: Kirill A. Shutemov 
> > > Cc: Mel Gorman 
> > > Cc: Hugh Dickins 
> > > Cc: Johannes Weiner 
> > > Signed-off-by: Shaohua Li 
> > > Signed-off-by: Andrea Arcangeli 
> > > [peterx:
> > >  - use the helper to find VMA;
> > >  - return -ENOENT if not found to match mcopy case;
> > >  - use the new MM_CP_UFFD_WP* flags for change_protection
> > >  - check against mmap_changing for failures]
> > > Signed-off-by: Peter Xu 
> > > ---
> > >  include/linux/userfaultfd_k.h |  3 ++
> > >  mm/userfaultfd.c  | 54 +++
> > >  2 files changed, 57 insertions(+)
> > > 
> > > diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
> > > index 765ce884cec0..8f6e6ed544fb 100644
> > > --- a/include/linux/userfaultfd_k.h
> > > +++ b/include/linux/userfaultfd_k.h
> > > @@ -39,6 +39,9 @@ extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
> > > unsigned long dst_start,
> > > unsigned long len,
> > > bool *mmap_changing);
> > > +extern int mwriteprotect_range(struct mm_struct *dst_mm,
> > > +unsigned long start, unsigned long len,
> > > +bool enable_wp, bool *mmap_changing);
> > > 
> > >  /* mm helpers */
> > >  static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct 
> > > *vma,
> > > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> > > index fefa81c301b7..529d180bb4d7 100644
> > > --- a/mm/userfaultfd.c
> > > +++ b/mm/userfaultfd.c
> > > @@ -639,3 +639,57 @@ ssize_t mfill_zeropage(struct mm_struct *dst_mm, 
> > > unsigned long start,
> > >  {
> > >   return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing, 0);
> > >  }
> > > +
> > > +int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
> > > + unsigned long len, bool enable_wp, bool *mmap_changing)
> > > +{
> > > + struct vm_area_struct *dst_vma;
> > > + pgprot_t newprot;
> > > + int err;
> > > +
> > > + /*
> > > +  * Sanitize the command parameters:
> > > +  */
> > > + BUG_ON(start & ~PAGE_MASK);
> > > + BUG_ON(len & ~PAGE_MASK);
> > > +
> > > + /* Does the address range wrap, or is the span zero-sized? */
> > > + BUG_ON(start + len <= start);
> > 
> > I'd replace these BUG_ON()s with
> > 
> > if (WARN_ON())
> >  return -EINVAL;
> 
> I believe BUG_ON() is used because these parameters should have been
> checked in userfaultfd_writeprotect() already by the common
> validate_range() even before calling mwriteprotect_range().  So I'm
> fine with the WARN_ON() approach but I'd slightly prefer to simply
> keep the patch as is to keep Jerome's r-b if you won't disagree. :)

Right, userfaultfd_writeprotect() should check these parameters and if it
didn't it was a bug indeed. But still, it's not severe enough to crash the
kernel.

I hope Jerome wouldn't mind to keep his r-b with s/BUG_ON/WARN_ON ;-)

With this change you can also add 

Reviewed-by: Mike Rapoport 
 
> Thanks,
> 
> -- 
> Peter Xu
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH] staging: wilc1000: Fix incorrent type in assignment

2019-02-25 Thread Ajay.Kathat



On 2/26/2019 8:58 AM, Bo YU wrote:
> The patch fixes following sparse warning:
> 
> drivers/staging/wilc1000/host_interface.c:450:30: warning: incorrect type in 
> assignment (different base types)
> drivers/staging/wilc1000/host_interface.c:450:30:expected restricted 
> __le16 [usertype] beacon_period
> drivers/staging/wilc1000/host_interface.c:450:30:got unsigned short 
> [usertype] beacon_interval
> drivers/staging/wilc1000/host_interface.c:451:25: warning: incorrect type in 
> assignment (different base types)
> drivers/staging/wilc1000/host_interface.c:451:25:expected restricted 
> __le16 [usertype] cap_info
> drivers/staging/wilc1000/host_interface.c:451:25:got unsigned short 
> [usertype] capability
> 
> Signed-off-by: Bo YU 
> ---
> I have no hardware to test it and just to compile it

Thanks for submitting the patch.

The correct way to fix above spare warning is by using cpu_to_le16()
while filing the information in ->beacon_period and ->cap_info because
wilc1000 module expects the data in _le_ byte order.

Please changes the below lines in host_interface.c and resubmit the patch.
param->beacon_period = bss->beacon_interval;
param->cap_info = bss->capability;
to
param->beacon_period = cpu_to_le16(bss->beacon_interval);
param->cap_info = cpu_to_le16(bss->capability);

> ---
>  drivers/staging/wilc1000/host_interface.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/staging/wilc1000/host_interface.c 
> b/drivers/staging/wilc1000/host_interface.c
> index 50dc2dd942f5..cdcb52aec779 100644
> --- a/drivers/staging/wilc1000/host_interface.c
> +++ b/drivers/staging/wilc1000/host_interface.c
> @@ -106,10 +106,10 @@ struct wilc_join_bss_param {
>   u8 ssid_terminator;
>   u8 bss_type;
>   u8 ch;
> - __le16 cap_info;
> + u16 cap_info;
>   u8 sa[ETH_ALEN];
>   u8 bssid[ETH_ALEN];
> - __le16 beacon_period;
> + u16 beacon_period;
>   u8 dtim_period;
>   u8 supp_rates[WILC_MAX_RATES_SUPPORTED + 1];
>   u8 wmm_cap;
> 

Regards,
Ajay

[RFC v10 2/4] pstore/blk: add blkoops for pstore_blk

2019-02-25 Thread liaoweixiong

blkoops is a sample for pstore/blk. It can only record oops, excluding
panics as no read/write apis for panic registered. It support settings
on Kconfg/module parameters. It can record oops log even power failure
if "PSTORE_BLKOOPS_BLKDEV" on Kconfig or "blkdev" on module parameter
is valid. Otherwise, it can only record data to ram buffer, which will
be dropped when reboot.

Signed-off-by: liaoweixiong 
---
 MAINTAINERS|   2 +-
 fs/pstore/Kconfig  | 114 ++
 fs/pstore/Makefile |   2 +
 fs/pstore/blkoops.c| 198 +
 include/linux/pstore_blk.h |  14 +++-
 5 files changed, 325 insertions(+), 5 deletions(-)
 create mode 100644 fs/pstore/blkoops.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 51029a4..4e9242a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12318,7 +12318,7 @@ F:  drivers/firmware/efi/efi-pstore.c
 F: drivers/acpi/apei/erst.c
 F: Documentation/admin-guide/ramoops.rst
 F: Documentation/devicetree/bindings/reserved-memory/ramoops.txt
-K: \b(pstore|ramoops)
+K: \b(pstore|ramoops|blkoops)
 
 PTP HARDWARE CLOCK SUPPORT
 M: Richard Cochran 
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index defcb75..7dfe00b 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -160,3 +160,117 @@ config PSTORE_BLK
help
  This enables panic and oops message to be logged to a block dev
  where it can be read back at some later point.
+
+config PSTORE_BLKOOPS
+   tristate "pstore block with oops logger"
+   depends on PSTORE_BLK
+   help
+ This is a sample for pstore block with oops logger.
+
+ It CANNOT record panic log as no read/write apis for panic registered.
+
+ It CAN record oops log even power failure if
+ "PSTORE_BLKOOPS_BLKDEV" on Kconfig or "block-device" on dts or
+ "blkdev" on module parameter is valid.
+
+ Otherwise, it can only record data to ram buffer, which will be
+ dropped when reboot.
+
+ NOTE that, there are three ways to set parameters of blkoops and
+ prioritize according to configuration flexibility. That is
+ Kconfig < device tree < module parameters. It means that the value can
+ be overwritten by higher priority settings.
+ 1. Kconfig
+It just sets a default value.
+ 2. device tree
+It is set on device tree, which will overwrites value from Kconfig,
+but can also be overwritten by module parameters.
+ 3. module parameters
+It is the first priority. Take care of that blkoops will take lower
+priority settings if higher priority one do not set.
+
+config PSTORE_BLKOOPS_DMESG_SIZE
+   int "dmesg size in kbytes for blkoops"
+   depends on PSTORE_BLKOOPS
+   default 64
+   help
+ This just sets size of dmesg (dmesg_size) for pstore/blk. The value
+ must be a multiple of 4096.
+
+ NOTE that, there are three ways to set parameters of blkoops and
+ prioritize according to configuration flexibility. That is
+ Kconfig < device tree < module parameters. It means that the value can
+ be overwritten by higher priority settings.
+ 1. Kconfig
+It just sets a default value.
+ 2. device tree
+It is set on device tree, which will overwrites value from Kconfig,
+but can also be overwritten by module parameters.
+ 3. module parameters
+It is the first priority. Take care of that blkoops will take lower
+priority settings if higher priority one do not set.
+
+config PSTORE_BLKOOPS_TOTAL_SIZE
+   int "total size in kbytes for blkoops"
+   depends on PSTORE_BLKOOPS
+   default 0
+   help
+ The total size in kbytes pstore/blk can use. It must be less than or
+ equal to size of block device if @blkdev valid. If @total_size is zero
+ with @blkdev, @total_size will be set to equal to size of @blkdev.
+ The value must be a multiple of 4096.
+
+ NOTE that, there are three ways to set parameters of blkoops and
+ prioritize according to configuration flexibility. That is
+ Kconfig < device tree < module parameters. It means that the value can
+ be overwritten by higher priority settings.
+ 1. Kconfig
+It just sets a default value.
+ 2. device tree
+It is set on device tree, which will overwrites value from Kconfig,
+but can also be overwritten by module parameters.
+ 3. module parameters
+It is the first priority. Take care of that blkoops will take lower
+priority settings if higher priority one do not set.
+
+config PSTORE_BLKOOPS_BLKDEV
+   string "block device for blkoops"
+   depends on PSTORE_BLKOOPS
+   default ""
+   help
+ This just sets

[PATCH v3 1/8] KVM:VMX: Define CET VMCS fields and bits

2019-02-25 Thread Yang Weijiang

CET - Control-flow Enforcement Technology, it's used to
protect against return/jump oriented programming (ROP)
attacks. It provides the following capabilities to defend
against ROP/JOP style control-flow subversion attacks:
- Shadow Stack (SHSTK):
 A second stack for the program that is
 used exclusively for control transfer
 operations.
- Indirect Branch Tracking (IBT):
 Free branch protection to defend against
 Jump/Call Oriented Programming.

On processors that support CET, VMX saves/restores
the states of IA32_S_CET, SSP and IA32_INTR_SSP_TABL_ADDR MSR
to the VMCS area for Guest/Host unconditionally.

If VM_EXIT_LOAD_HOST_CET_STATE = 1, the host CET MSRs are
restored from VMCS host-state area at VM exit as follows:

- HOST_IA32_S_CET: Host supervisor mode IA32_S_CET MSR is loaded
   from this field.

- HOST_SSP :   Host SSP is loaded from this field.

- HOST_INTR_SSP_TABL_ADDR : Host IA32_INTR_SSP_TABL_ADDR
 MSR is loaded from this field.

If VM_ENTRY_LOAD_GUEST_CET_STATE = 1, the guest CET MSRs are loaded
from VMCS guest-state area at VM entry as follows:

- GUEST_IA32_S_CET : Guest supervisor mode IA32_S_CET MSR is loaded
 from this field.

- GUEST_SSP :Guest SSP is loaded from this field.

- GUEST_INTR_SSP_TABL_ADDR : Guest IA32_INTR_SSP_TABL_ADDR
 MSR is loaded from this field.

Additionally, to context switch guest and host CET states, the VMM
uses xsaves/xrstors instructions to save/restore the guest CET states
at VM exit/entry. The CET xsave area is within thread_struct.fpu area.
If OS execution flow changes during task switch/interrupt/exception etc.,
the OS also relies on xsaves/xrstors to switch CET states accordingly.

Signed-off-by: Zhang Yi Z 
Signed-off-by: Yang Weijiang 
---
 arch/x86/include/asm/vmx.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index ade0f153947d..395c1f7e5938 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -98,6 +98,7 @@
 #define VM_EXIT_LOAD_IA32_EFER  0x0020
 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER   0x0040
 #define VM_EXIT_CLEAR_BNDCFGS   0x0080
+#define VM_EXIT_LOAD_HOST_CET_STATE 0x1000
 
 #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR  0x00036dff
 
@@ -109,6 +110,7 @@
 #define VM_ENTRY_LOAD_IA32_PAT 0x4000
 #define VM_ENTRY_LOAD_IA32_EFER 0x8000
 #define VM_ENTRY_LOAD_BNDCFGS   0x0001
+#define VM_ENTRY_LOAD_GUEST_CET_STATE   0x0010
 
 #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x11ff
 
@@ -325,6 +327,9 @@ enum vmcs_field {
GUEST_PENDING_DBG_EXCEPTIONS= 0x6822,
GUEST_SYSENTER_ESP  = 0x6824,
GUEST_SYSENTER_EIP  = 0x6826,
+   GUEST_IA32_S_CET= 0x6828,
+   GUEST_SSP   = 0x682a,
+   GUEST_INTR_SSP_TABL_ADDR= 0x682c,
HOST_CR0= 0x6c00,
HOST_CR3= 0x6c02,
HOST_CR4= 0x6c04,
@@ -337,6 +342,9 @@ enum vmcs_field {
HOST_IA32_SYSENTER_EIP  = 0x6c12,
HOST_RSP= 0x6c14,
HOST_RIP= 0x6c16,
+   HOST_IA32_S_CET = 0x6c18,
+   HOST_SSP= 0x6c1a,
+   HOST_INTR_SSP_TABL_ADDR = 0x6c1c
 };
 
 /*
-- 
2.17.1

[RFC v10 0/4] pstore/block: new support logger for block devices

2019-02-25 Thread liaoweixiong

Why should we need pstore_block?
1. Most embedded intelligent equipment have no persistent ram, which
increases costs. We perfer to cheaper solutions, like block devices.
In fast, there is already a sample for block device logger in driver
MTD (drivers/mtd/mtdoops.c).
2. Do not any equipment have battery, which means that it lost all data
on general ram if power failure. Pstore has little to do for these
equipments.

[PATCH v10]
Cancel DT support for blkoops temporarily.
On patch 1:
1. pstore/blk should unlink PSTORE_BLKDEV when unregister.
On patch 2:
1. cancel DT support temporarily. I will submit other patches to support DT
   when DT maintainers acked.
2. add spin lock to protect blkz_info when modify panic operations.
3. change default value of total size on Kconfig from 1024 to 0.

[PATCH v9]
On patch 1:
1. rename part_path/part_size, members of blkz_info, to blkdev/total_size
2. if total_size is zero, get size from @blkdev
3. support multiple variants for @blkdev, such as partuuid, major with minor,
   and /dev/. See details on Documentation.
4. get size from block device
5. add depends on CONFIG_BLOCK
On patch 2:
1. update document
On patch 3:
1. update codes for new blkzone. Blkoops support insmod without total_size.
   for example: "insmod ./blkoops.ko blkdev=93:6" (major:minor).
2. use late_initcalls rather than module_init, to avoid block device not ready.
3. support for block driver to add panic apis to blkoops. By this, block
   driver can do the least work that just provides panic operations.
On patch 5:
1. update document

[PATCH v8]
On patch 2:
1. move DT to /bindings/pstore
2. Delete details for kernel.

[PATCH v7]
On patch 1:
1. Fix line over 80 characters.
On patch 2:
1. Insert a separate patch for DT bindings.

[PATCH v6]
On patch 1:
1. Fix according to email from Kees Cook, including spelling mistakes,
   explicit overflow test, none of the zeroing etc.
2. Do not recover data but metadata of dmesg when panic.
3. No need to take recovery when do erase.
4. Do not use "blkoops" for blkzone any more because "blkoops" is used for
   other module now. (rename blkbuf to blkoops)
On patch 2:
1. Rename blkbuf to blkoops.
2. Add Kconfig/device tree/module parameters settings for blkoops.
3. Add document for device tree.
On patch 3:
1. Blkoops support pmsg.
2. Fix description for new version patch.
On patch 4:
1. Fix description for new version patch.

[PATCH v5]
On patch 1:
1. rename pstore/rom to pstore/blk
2. Do not allocate any memory in the write path of panic. So, use local
array instead in function romz_recover_dmesg_meta.
3. Add C header file "linux/fs.h" to fix implicit declaration of function
   'filp_open','kernel_read'...
On patch 3:
1. If panic, do not recover pmsg but flush if it is dirty.
2. Fix erase pmsg failed.
On patch 4:
1. Create a document for pstore/blk

[PATCH v4]
On patch 1:
1. Fix always true condition '(--i >= 0) => (0-u32max >= 0)' in function
   romz_init_zones by defining variable i to 'int' rahter than
   'unsigned int'.
2. To make codes more easily to read, we use macro READ_NEXT_ZONE for
   return value of romz_dmesg_read if it need to read next zone.
   Moveover, we assign READ_NEXT_ZONE -1024 rather than 0.
3. Add 'FLUSH_META' to 'enum romz_flush_mode' and rename 'NOT_FLUSH' to
   'FLUSH_NONE'
4. Function romz_zone_write work badly with FLUSH_PART mode as badly
   address and offset to write.
On patch 3:
NEW SUPPORT psmg for pstore_rom.

[PATCH v3]
On patch 1:
Fix build as module error for undefined 'vfs_read' and 'vfs_write'
Both of 'vfs_read' and 'vfs_write' haven't be exproted yet, so we use
'kernel_read' and 'kernel_write' instead.

[PATCH v2]
On patch 1:
Fix build as module error for redefinition of 'romz_unregister' and
'romz_register'

[PATCH v1]
On patch 1:
Core codes of pstore_rom, which works well on allwinner(sunxi) platform.
On patch 2:
A sample for pstore_rom, using general ram rather than block device.

liaoweixiong (4):
  pstore/blk: new support logger for block devices
  pstore/blk: add blkoops for pstore_blk
  pstore/blk: support pmsg for pstore block
  Documentation: pstore/blk: create document for pstore_blk

 Documentation/admin-guide/pstore-block.rst |  233 ++
 MAINTAINERS|3 +-
 fs/pstore/Kconfig  |  147 
 fs/pstore/Makefile |5 +
 fs/pstore/blkoops.c|  206 +
 fs/pstore/blkzone.c| 1244 
 include/linux/pstore_blk.h |   87 ++
 7 files changed, 1924 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/admin-guide/pstore-block.rst
 create mode 100644 fs/pstore/blkoops.c
 create mode 100644 fs/pstore/blkzone.c
 create mode 100644 include/linux/pstore_blk.h

-- 
1.9.1

[PATCH v3 5/8] KVM:VMX: Pass through host CET related MSRs to Guest.

2019-02-25 Thread Yang Weijiang

The CET runtime settings, i.e., CET state control bits(IA32_U_CET/
IA32_S_CET), CET SSP(IA32_PL3_SSP/IA32_PL0_SSP) and SSP table address
(IA32_INTERRUPT_SSP_TABLE_ADDR) are task/thread specific, therefore,
OS needs to save/restore the states properly during context switch,
e.g., task/thread switching, interrupt/exception handling, it uses
xsaves/xrstors to achieve that.

The difference between VMCS CET area fields and xsave CET area, is that
the former is for state retention during Guest/Host context
switch while the latter is for state retention during OS execution.

Linux currently doesn't support CPL1 and CPL2, so SSPs for these level
are skipped here.

Signed-off-by: Zhang Yi Z 
Signed-off-by: Yang Weijiang 
---
 arch/x86/kvm/vmx.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7bbb8b26e901..89ee086e1729 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11769,6 +11769,7 @@ static void nested_vmx_entry_exit_ctls_update(struct 
kvm_vcpu *vcpu)
 static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
+   unsigned long *msr_bitmap;
 
if (cpu_has_secondary_exec_ctrls()) {
vmx_compute_secondary_exec_control(vmx);
@@ -11786,6 +11787,18 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
nested_vmx_cr_fixed1_bits_update(vcpu);
nested_vmx_entry_exit_ctls_update(vcpu);
}
+
+   msr_bitmap = vmx->vmcs01.msr_bitmap;
+
+   if (guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) |
+   guest_cpuid_has(vcpu, X86_FEATURE_IBT)) {
+   vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_U_CET, 
MSR_TYPE_RW);
+   vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_S_CET, 
MSR_TYPE_RW);
+   vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_INT_SSP_TAB, 
MSR_TYPE_RW);
+   vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL0_SSP, 
MSR_TYPE_RW);
+   vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL3_SSP, 
MSR_TYPE_RW);
+   }
+
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
-- 
2.17.1

[RFC v10 4/4] Documentation: pstore/blk: create document for pstore_blk

2019-02-25 Thread liaoweixiong

The document, at Documentation/admin-guide/pstore-block.rst,
tells user how to use pstore_blk and the attentions about panic
read/write

Signed-off-by: liaoweixiong 
---
 Documentation/admin-guide/pstore-block.rst | 233 +
 MAINTAINERS|   1 +
 fs/pstore/Kconfig  |   4 +
 3 files changed, 238 insertions(+)
 create mode 100644 Documentation/admin-guide/pstore-block.rst

diff --git a/Documentation/admin-guide/pstore-block.rst 
b/Documentation/admin-guide/pstore-block.rst
new file mode 100644
index 000..a828274
--- /dev/null
+++ b/Documentation/admin-guide/pstore-block.rst
@@ -0,0 +1,233 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Pstore block oops/panic logger
+==
+
+Introduction
+
+
+Pstore block (pstore_blk) is an oops/panic logger that write its logs to block
+device before the system crashes. Pstore_blk needs block device driver
+registering a partition path of the block device, like /dev/mmcblk0p7 for mmc
+driver, and read/write APIs for this partition when on panic.
+
+Pstore block concepts
+-
+
+Pstore block begins at function ``blkz_register``, by which block driver
+registers to pstore_blk. Note that, block driver should register to pstore_blk
+after block device has registered. Block driver transfers a structure
+``blkz_info`` which is defined in *linux/pstore_blk.h*.
+
+The following key members of ``struct blkz_info`` may be of interest to you.
+
+blkdev
+~~
+
+The block device to use. Most of the time, it is a partition of block device.
+It's ok to keep it as NULL if you passing ``read`` and ``write`` in blkz_info 
as
+``blkdev`` is used by blkz_default_general_read/write. If both of ``blkdev``,
+``read`` and ``write`` are NULL, no block device is effective and the data will
+be saved in ddr buffer.
+
+It accept the following variants:
+
+1.  device number in hexadecimal represents itself no
+   leading 0x, for example b302.
+#. /dev/ represents the device number of disk
+#. /dev/ represents the device number of partition - device
+   number of disk plus the partition number
+#. /dev/p - same as the above, that form is used when disk
+   name of partitioned disk ends on a digit.
+#. PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the unique id of
+   a partition if the partition table provides it. The UUID may be either an
+   EFI/GPT UUID, or refer to an MSDOS partition using the format -PP,
+   where  is a zero-filled hex representation of the 32-bit
+   "NT disk signature", and PP is a zero-filled hex representation of the
+   1-based partition number.
+#. PARTUUID=/PARTNROFF= to select a partition in relation to a
+   partition with a known unique id.
+#. : major and minor number of the device separated by a colon.
+
+See more on section **read/write**.
+
+total_size
+~~
+
+The total size in bytes of block device used for pstore_blk. It **MUST** be 
less
+than or equal to size of block device if ``blkdev`` valid. It **MUST** be a
+multiple of 4096. If ``total_size`` is zero with ``blkdev``, ``total_size`` 
will be
+set to equal to size of ``blkdev``.
+
+The block device area is divided into many chunks, and each event writes a 
chunk
+of information.
+
+dmesg_size
+~~
+
+The chunk size in bytes for dmesg(oops/panic). It **MUST** be a multiple of
+SECTOR_SIZE (Most of the time, the SECTOR_SIZE is 512). If you don't need 
dmesg,
+you are safely to set it to 0.
+
+NOTE that, the remaining space, except ``pmsg_size`` and others, belongs to
+dmesg. It means that there are multiple chunks for dmesg.
+
+Psotre_blk will log to dmesg chunks one by one, and always overwrite the oldest
+chunk if no free chunk.
+
+pmsg_size
+~
+
+The chunk size in bytes for pmsg. It **MUST** be a multiple of SECTOR_SIZE 
(Most
+of the time, the SECTOR_SIZE is 512). If you don't need pmsg, you are safely to
+set it to 0.
+
+There is only one chunk for pmsg.
+
+Pmsg is a user space accessible pstore object. Writes to */dev/pmsg0* are
+appended to the chunk. On reboot the contents are available in
+/sys/fs/pstore/pmsg-pstore-blk-0.
+
+dump_oops
+~
+
+Dumping both oopses and panics can be done by setting 1 in the ``dump_oops``
+member while setting 0 in that variable dumps only the panics.
+
+read/write
+~~
+
+They are general ``read/write`` APIs. It is safely and recommended to ignore 
it,
+but set ``blkdev``.
+
+These general APIs are used all the time expect panic. The ``read`` API is
+usually used to recover data from block device, and the ``write`` API is 
usually
+to flush new data and erase to block device.
+
+Pstore_blk will temporarily hold all new data before block device is ready. If
+you ignore both of ``read/write`` and ``blkdev``, the old data will be lost.
+
+NOTE that, the general APIs must check whether the block device is ready if
+self-defined.
+
+panic_read/panic_write

[RFC v10 3/4] pstore/blk: support pmsg for pstore block

2019-02-25 Thread liaoweixiong

To enable pmsg, just set pmsg_size when block device register blkzone.

Signed-off-by: liaoweixiong 
---
 fs/pstore/Kconfig  |  21 
 fs/pstore/blkoops.c|  10 ++
 fs/pstore/blkzone.c| 253 +
 include/linux/pstore_blk.h |   1 +
 4 files changed, 264 insertions(+), 21 deletions(-)

diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 7dfe00b..b417bf5 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -210,6 +210,27 @@ config PSTORE_BLKOOPS_DMESG_SIZE
 It is the first priority. Take care of that blkoops will take lower
 priority settings if higher priority one do not set.
 
+config PSTORE_BLKOOPS_PMSG_SIZE
+   int "pmsg size in kbytes for blkoops"
+   depends on PSTORE_BLKOOPS
+   default 64
+   help
+ This just sets size of pmsg (pmsg_size) for pstore/blk. The value must
+ be a multiple of 4096. Pmsg work only if "blkdev" is set.
+
+ NOTE that, there are three ways to set parameters of blkoops and
+ prioritize according to configuration flexibility. That is
+ Kconfig < device tree < module parameters. It means that the value can
+ be overwritten by higher priority settings.
+ 1. Kconfig
+It just sets a default value.
+ 2. device tree
+It is set on device tree, which will overwrites value from Kconfig,
+but can also be overwritten by module parameters.
+ 3. module parameters
+It is the first priority. Take care of that blkoops will take lower
+priority settings if higher priority one do not set.
+
 config PSTORE_BLKOOPS_TOTAL_SIZE
int "total size in kbytes for blkoops"
depends on PSTORE_BLKOOPS
diff --git a/fs/pstore/blkoops.c b/fs/pstore/blkoops.c
index 22c0c84..05140fd 100644
--- a/fs/pstore/blkoops.c
+++ b/fs/pstore/blkoops.c
@@ -30,6 +30,10 @@
 module_param(dmesg_size, long, 0400);
 MODULE_PARM_DESC(dmesg_size, "demsg size in kbytes");
 
+static long pmsg_size = -1;
+module_param(pmsg_size, long, 0400);
+MODULE_PARM_DESC(pmsg_size, "pmsg size in kbytes");
+
 static long total_size = -1;
 module_param(total_size, long, 0400);
 MODULE_PARM_DESC(total_size, "total size in kbytes");
@@ -47,11 +51,13 @@ struct blkz_info blkz_info = {
 
 struct blkoops_info {
unsigned long dmesg_size;
+   unsigned long pmsg_size;
unsigned long total_size;
const char *blkdev;
 };
 struct blkoops_info blkoops_info = {
.dmesg_size = CONFIG_PSTORE_BLKOOPS_DMESG_SIZE * 1024,
+   .pmsg_size = CONFIG_PSTORE_BLKOOPS_PMSG_SIZE * 1024,
.total_size = CONFIG_PSTORE_BLKOOPS_TOTAL_SIZE * 1024,
.blkdev = CONFIG_PSTORE_BLKOOPS_BLKDEV,
 };
@@ -104,6 +110,7 @@ static int blkoops_probe(struct platform_device *pdev)
 
check_size(total_size, 4096);
check_size(dmesg_size, 4096);
+   check_size(pmsg_size, 4096);
 
 #undef check_size
 
@@ -112,6 +119,7 @@ static int blkoops_probe(struct platform_device *pdev)
 * through /sys/module/blkoops/parameters/
 */
dmesg_size = blkz_info.dmesg_size;
+   pmsg_size = blkz_info.pmsg_size;
total_size = blkz_info.total_size;
if (blkz_info.blkdev)
strncpy(blkdev, blkz_info.blkdev, 80 - 1);
@@ -156,6 +164,8 @@ void blkoops_register_dummy(void)
info->blkdev = (const char *)blkdev;
if (dmesg_size >= 0)
info->dmesg_size = (unsigned long)dmesg_size * 1024;
+   if (pmsg_size >= 0)
+   info->pmsg_size = (unsigned long)pmsg_size * 1024;
} else if (info->total_size > 0 || strlen(info->blkdev)) {
pr_info("using kconfig value\n");
} else {
diff --git a/fs/pstore/blkzone.c b/fs/pstore/blkzone.c
index cba55b3..cd3d4ed 100644
--- a/fs/pstore/blkzone.c
+++ b/fs/pstore/blkzone.c
@@ -40,12 +40,14 @@
  *
  * @sig: signature to indicate header (BLK_SIG xor BLKZONE-type value)
  * @datalen: length of data in @data
+ * @start: offset into @data where the beginning of the stored bytes begin
  * @data: zone data.
  */
 struct blkz_buffer {
 #define BLK_SIG (0x43474244) /* DBGC */
uint32_t sig;
atomic_t datalen;
+   atomic_t start;
uint8_t data[];
 };
 
@@ -78,6 +80,9 @@ struct blkz_dmesg_header {
  * frontent name for this zone
  * @buffer:
  * pointer to data buffer managed by this zone
+ * @oldbuf:
+ * pointer to old data buffer. It is used for single zone such as pmsg,
+ * saving the old buffer.
  * @buffer_size:
  * bytes in @buffer->data
  * @should_recover:
@@ -91,6 +96,7 @@ struct blkz_zone {
enum pstore_type_id type;
 
struct blkz_buffer *buffer;
+   struct blkz_buffer *oldbuf;
size_t buffer_size;
bool should_recover;
atomic_t dirty;
@@ -98,8 +104,10 @@ struct blkz_zone {
 
 struct blkz_context {
struct

[RFC v10 1/4] pstore/blk: new support logger for block devices

2019-02-25 Thread liaoweixiong

pstore_blk is similar to pstore_ram, but dump log to block devices
rather than persistent ram.

Why should we need pstore_blk?
1. Most embedded intelligent equipment have no persistent ram, which
increases costs. We perfer to cheaper solutions, like block devices.
In fact, there is already a sample for block device logger in driver
MTD (drivers/mtd/mtdoops.c).
2. Do not any equipment have battery, which means that it lost all data
on general ram if power failure. Pstore has little to do for these
equipments.

pstore_blk can only dump Oops/Panic log to block devices. It only
supports dmesg now. To make pstore_blk work, the block driver should
provide the block device and the read/write apis when on panic.

pstore_blk begins at 'blkz_register', by witch block device can register
a block device to pstore_blk. Then pstore_blk divide and manage the
block device as zones, which is similar to pstore_ram.

Recommend that, block driver register pstore_blk after block device is
ready.

pstore_blk works well on allwinner(sunxi) platform.

Signed-off-by: liaoweixiong 
---
 fs/pstore/Kconfig  |8 +
 fs/pstore/Makefile |3 +
 fs/pstore/blkzone.c| 1031 
 include/linux/pstore_blk.h |   80 
 4 files changed, 1122 insertions(+)
 create mode 100644 fs/pstore/blkzone.c
 create mode 100644 include/linux/pstore_blk.h

diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 8b3ba27..defcb75 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -152,3 +152,11 @@ config PSTORE_RAM
  "ramoops.ko".
 
  For more information, see Documentation/admin-guide/ramoops.rst.
+
+config PSTORE_BLK
+   tristate "Log panic/oops to a block device"
+   depends on PSTORE
+   depends on BLOCK
+   help
+ This enables panic and oops message to be logged to a block dev
+ where it can be read back at some later point.
diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile
index 967b589..0ee2fc8 100644
--- a/fs/pstore/Makefile
+++ b/fs/pstore/Makefile
@@ -12,3 +12,6 @@ pstore-$(CONFIG_PSTORE_PMSG)  += pmsg.o
 
 ramoops-objs += ram.o ram_core.o
 obj-$(CONFIG_PSTORE_RAM)   += ramoops.o
+
+obj-$(CONFIG_PSTORE_BLK) += pstore_blk.o
+pstore_blk-y += blkzone.o
diff --git a/fs/pstore/blkzone.c b/fs/pstore/blkzone.c
new file mode 100644
index 000..cba55b3
--- /dev/null
+++ b/fs/pstore/blkzone.c
@@ -0,0 +1,1031 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * blkzone.c: Block device Oops/Panic logger
+ *
+ * Copyright (C) 2019 liaoweixiong 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define MODNAME "pstore-blk"
+#define pr_fmt(fmt) MODNAME ": " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PSTORE_BLKDEV "/dev/pstore-blk"
+
+/**
+ * struct blkz_head - head of zone to flush to storage
+ *
+ * @sig: signature to indicate header (BLK_SIG xor BLKZONE-type value)
+ * @datalen: length of data in @data
+ * @data: zone data.
+ */
+struct blkz_buffer {
+#define BLK_SIG (0x43474244) /* DBGC */
+   uint32_t sig;
+   atomic_t datalen;
+   uint8_t data[];
+};
+
+/**
+ * struct blkz_dmesg_header: dmesg information
+ *
+ * @magic: magic num for dmesg header
+ * @time: trigger time
+ * @compressed: whether conpressed
+ * @count: oops/panic counter
+ * @reason: identify oops or panic
+ */
+struct blkz_dmesg_header {
+#define DMESG_HEADER_MAGIC 0x4dfc3ae5
+   uint32_t magic;
+   struct timespec64 time;
+   bool compressed;
+   uint32_t counter;
+   enum kmsg_dump_reason reason;
+   uint8_t data[0];
+};
+
+/**
+ * struct blkz_zone - zone information
+ * @off:
+ * zone offset of block device
+ * @type:
+ * frontent type for this zone
+ * @name:
+ * frontent name for this zone
+ * @buffer:
+ * pointer to data buffer managed by this zone
+ * @buffer_size:
+ * bytes in @buffer->data
+ * @should_recover:
+ * should recover from storage
+ * @dirty:
+ * mark whether the data in @buffer are dirty (not flush to storage yet)
+ */
+struct blkz_zone {
+   unsigned long off;
+   const char *name;
+   enum pstore_type_id type;
+
+   struct blkz_buffer *buffer;
+   size_t buffer_size;
+   bool should_recover;
+   atomic_t dirty;
+};
+
+struct blkz_context {
+   struct blkz_zone **dbzs;/* dmesg block zones */
+   unsigned int dmesg_max_cnt;
+   unsigned int dmesg_read_cnt;
+   unsigned int dmesg_write_cnt;
+   /*
+*

[PATCH v3 0/8] This patch-set is to enable Guest CET support

2019-02-25 Thread Yang Weijiang

Control-flow Enforcement Technology (CET) provides protection against
return/jump-oriented programming (ROP) attacks. To make kvm Guest OS own
the capability, this patch-set is required. It enables CET related CPUID
report, xsaves/xrstors, vmx entry configuration etc. for Guest OS.

PATCH 1: Define CET VMCS fields and bits.
PATCH 2/3  : Report CET feature support in CPUID.
PATCH 4: Fix xsaves size calculation issue.
PATCH 5: Pass through CET MSRs to Guest.
PATCH 6: Set Guest CET state auto loading bit.
PATCH 7: Enable CET xsaves bits support in XSS.
PATCH 8: Add CET MSR user space access interface.

Changelog:
 v3:
 - Modified patches to make Guest CET independent to Host enabling.
 - Added patch 8 to add user space access for Guest CET MSR access.
 - Modified code comments and patch description to reflect changes.
  
 v2:
 - Re-ordered patch sequence, combined one patch.
 - Added more description for CET related VMCS fields.
 - Added Host CET capability check while enabling Guest CET loading bit.
 - Added Host CET capability check while reporting Guest CPUID(EAX=7,
   EXC=0).
 - Modified code in reporting Guest CPUID(EAX=D,ECX>=1), make it clearer.
 - Added Host and Guest XSS mask check while setting bits for Guest XSS.


Yang Weijiang (8):
  KVM:VMX: Define CET VMCS fields and bits
  KVM:CPUID: Define CET CPUID bits and CR4.CET master enable bit.
  KVM:CPUID: Add CPUID support for Guest CET
  KVM:CPUID: Fix xsaves area size calculation for CPUID.(EAX=0xD,ECX=1).
  KVM:VMX: Pass through host CET related MSRs to Guest.
  KVM:VMX: Load Guest CET via VMCS when CET is enabled in Guest
  KVM:X86: Add XSS bit 11 and 12 support for CET xsaves/xrstors.
  KVM:X86: Add user-space read/write interface for CET MSRs.

 arch/x86/include/asm/kvm_host.h |  3 +-
 arch/x86/include/asm/vmx.h  |  8 
 arch/x86/kvm/cpuid.c| 67 -
 arch/x86/kvm/vmx.c  | 53 --
 arch/x86/kvm/x86.c  | 46 --
 arch/x86/kvm/x86.h  |  4 ++
 6 files changed, 157 insertions(+), 24 deletions(-)

-- 
2.17.1

[PATCH v3 3/8] KVM:CPUID: Add CPUID support for Guest CET

2019-02-25 Thread Yang Weijiang

Guest CET SHSTK and IBT capability are reported via
CPUID.(EAX=7, ECX=0):ECX[bit 7] and EDX[bit 20] respectively.
Guest user mode and supervisor mode xsaves component size
is reported via CPUID.(EAX=0xD, ECX=1):ECX[bit 11] and ECX[bit 12]
respectively.

Signed-off-by: Zhang Yi Z 
Signed-off-by: Yang Weijiang 
---
 arch/x86/kvm/cpuid.c | 60 +---
 arch/x86/kvm/x86.h   |  4 +++
 2 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index cb1aece25b17..5e05756cc6db 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -65,6 +65,16 @@ u64 kvm_supported_xcr0(void)
return xcr0;
 }
 
+u64 kvm_supported_xss(void)
+{
+   u64 xss;
+
+   rdmsrl(MSR_IA32_XSS, xss);
+   xss &= KVM_SUPPORTED_XSS;
+   return xss;
+}
+EXPORT_SYMBOL(kvm_supported_xss);
+
 #define F(x) bit(X86_FEATURE_##x)
 
 /* For scattered features from cpufeatures.h; we currently expose none */
@@ -323,6 +333,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
 u32 index, int *nent, int maxnent)
 {
int r;
+   u32 eax, ebx, ecx, edx;
unsigned f_nx = is_efer_nx() ? F(NX) : 0;
 #ifdef CONFIG_X86_64
unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
@@ -503,6 +514,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
 * if the host doesn't support it.
 */
entry->edx |= F(ARCH_CAPABILITIES);
+
+   /*
+* Guest OS CET enabling is designed independent to
+* host enabling, it only has dependency on Host HW
+* capability, if it has, report CET support to
+* Guest.
+*/
+   cpuid_count(7, 0, , , , );
+   if (ecx & F(SHSTK))
+   entry->ecx |= F(SHSTK);
+
+   if (edx & F(IBT))
+   entry->edx |= F(IBT);
+
} else {
entry->ebx = 0;
entry->ecx = 0;
@@ -564,14 +589,17 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
}
case 0xd: {
int idx, i;
-   u64 supported = kvm_supported_xcr0();
+   u64 u_supported = kvm_supported_xcr0();
+   u64 s_supported = kvm_supported_xss();
+   u64 supported;
+   int compacted;
 
-   entry->eax &= supported;
-   entry->ebx = xstate_required_size(supported, false);
+   entry->eax &= u_supported;
+   entry->ebx = xstate_required_size(u_supported, false);
entry->ecx = entry->ebx;
-   entry->edx &= supported >> 32;
+   entry->edx &= u_supported >> 32;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-   if (!supported)
+   if (!u_supported && !s_supported)
break;
 
for (idx = 1, i = 1; idx < 64; ++idx) {
@@ -583,19 +611,23 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
if (idx == 1) {
entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
cpuid_mask([i].eax, CPUID_D_1_EAX);
-   entry[i].ebx = 0;
-   if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
-   entry[i].ebx =
-   xstate_required_size(supported,
-true);
+   supported = u_supported | s_supported;
+   compacted = entry[i].eax &
+   (F(XSAVES) | F(XSAVEC));
+   entry[i].ebx = xstate_required_size(supported,
+   compacted);
+   entry[i].ecx &= s_supported;
+   entry[i].edx = 0;
} else {
+   supported = (entry[i].ecx & 1) ? s_supported :
+u_supported;
if (entry[i].eax == 0 || !(supported & mask))
continue;
-   if (WARN_ON_ONCE(entry[i].ecx & 1))
-   continue;
+   entry[i].ecx &= 1;
+   entry[i].edx = 0;
+   if (entry[i].ecx)
+   entry[i].ebx = 0;

[PATCH v3 2/8] KVM:CPUID: Define CET CPUID bits and CR4.CET master enable bit.

2019-02-25 Thread Yang Weijiang

Guest queries CET SHSTK and IBT support by CPUID.(EAX=0x7,ECX=0),
in return, ECX[bit 7] corresponds to SHSTK feature, and EDX[bit 20]
corresponds to IBT feature.
CR4.CET[bit 23] is CET master enable bit, it controls CET feature
availability in guest OS.

Note: Although SHSTK or IBT can be enabled independently,
  either of the features is controlled by CR4.CET.

Signed-off-by: Zhang Yi Z 
Signed-off-by: Yang Weijiang 
---
 arch/x86/include/asm/kvm_host.h | 3 ++-
 arch/x86/kvm/cpuid.c| 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55e51ff7e421..df002936088f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -90,7 +90,8 @@
  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | 
X86_CR4_PCIDE \
  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
  | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
- | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
+ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
+ | X86_CR4_CET))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7bcfa61375c0..cb1aece25b17 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -406,12 +406,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-   F(CLDEMOTE);
+   F(CLDEMOTE) | F(SHSTK);
 
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
-   F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
+   F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(IBT);
 
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
-- 
2.17.1

[PATCH v3 4/8] KVM:CPUID: Fix xsaves area size calculation for CPUID.(EAX=0xD,ECX=1).

2019-02-25 Thread Yang Weijiang

According to latest Software Development Manual vol.2/3.2,
for CPUID.(EAX=0xD,ECX=1), it should report xsaves area size
containing all states enabled  by XCR0|IA32_MSR_XSS.

Signed-off-by: Zhang Yi Z 
Signed-off-by: Yang Weijiang 
---
 arch/x86/kvm/cpuid.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 5e05756cc6db..f71c3d8d6ec3 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -131,7 +131,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 
best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
if (best && (best->eax & (F(XSAVES) | F(XSAVEC
-   best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+   best->ebx = xstate_required_size(vcpu->arch.xcr0 |
+   kvm_supported_xss(), true);
 
/*
 * The existing code assumes virtual address is 48-bit or 57-bit in the
-- 
2.17.1

[PATCH v3 8/8] KVM:X86: Add user-space read/write interface for CET MSRs.

2019-02-25 Thread Yang Weijiang

The Guest MSRs are stored in fpu storage area, they are
operated by XSAVES/XRSTORS, so use kvm_load_guest_fpu
to restore them is a convenient way to let KVM access
them. After finish operation, need to restore Host MSR
contents by kvm_put_guest_fpu.

Signed-off-by: Yang Weijiang 
---
 arch/x86/kvm/x86.c | 46 +++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a0f8b71b2132..a4bdbef3a712 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -75,6 +75,8 @@
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
+#define MAX_GUEST_CET_MSRS 5
+
 u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
 EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
 
@@ -214,6 +216,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 u64 __read_mostly host_xcr0;
 
 static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
+static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
+static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
 {
@@ -2889,21 +2893,57 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
 }
 EXPORT_SYMBOL_GPL(kvm_get_msr_common);
 
+static int do_cet_msrs(struct kvm_vcpu *vcpu, int entry_num,
+  struct kvm_msr_entry *entries, bool read)
+{
+   int i = entry_num;
+   int j = MAX_GUEST_CET_MSRS;
+   bool has_cet;
+
+   has_cet = guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) |
+ guest_cpuid_has(vcpu, X86_FEATURE_IBT);
+   /*
+* Guest CET MSRs are saved by XSAVES, so need to restore
+* them first, then read out or update the contents and
+* restore Host ones.
+*/
+   if (has_cet) {
+   kvm_load_guest_fpu(vcpu);
+
+   if (read) {
+   for (j = 0; j < MAX_GUEST_CET_MSRS; j++, i++)
+   rdmsrl(entries[i].index, entries[i].data);
+   } else {
+   for (j = 0; j < MAX_GUEST_CET_MSRS; j++, i++)
+   wrmsrl(entries[i].index, entries[i].data);
+   }
+
+   kvm_put_guest_fpu(vcpu);
+   }
+   return j;
+}
 /*
  * Read or write a bunch of msrs. All parameters are kernel addresses.
  *
  * @return number of msrs set successfully.
  */
 static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
-   struct kvm_msr_entry *entries,
+   struct kvm_msr_entry *entries, bool read,
int (*do_msr)(struct kvm_vcpu *vcpu,
  unsigned index, u64 *data))
 {
int i;
 
-   for (i = 0; i < msrs->nmsrs; ++i)
+   for (i = 0; i < msrs->nmsrs; ++i) {
+   /* If it comes to CET related MSRs, read them together. */
+   if (entries[i].index == MSR_IA32_U_CET) {
+   i += do_cet_msrs(vcpu, i, entries, read) - 1;
+   continue;
+   }
+
if (do_msr(vcpu, entries[i].index, [i].data))
break;
+   }
 
return i;
 }
@@ -2938,7 +2978,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs 
__user *user_msrs,
goto out;
}
 
-   r = n = __msr_io(vcpu, , entries, do_msr);
+   r = n = __msr_io(vcpu, , entries, !!writeback, do_msr);
if (r < 0)
goto out_free;
 
-- 
2.17.1

[PATCH v3 6/8] KVM:VMX: Load Guest CET via VMCS when CET is enabled in Guest

2019-02-25 Thread Yang Weijiang

"Load Guest CET state" bit controls whether guest CET states
will be loaded at Guest entry. Before doing that, KVM needs
to check if CPU CET feature is available.

Signed-off-by: Zhang Yi Z 
Signed-off-by: Yang Weijiang 
---
 arch/x86/kvm/vmx.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 89ee086e1729..d32cee9ee079 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -55,6 +55,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "trace.h"
 #include "pmu.h"
@@ -4065,6 +4066,20 @@ static inline bool vmx_feature_control_msr_valid(struct 
kvm_vcpu *vcpu,
return !(val & ~valid_bits);
 }
 
+static int vmx_guest_cet_cap(struct kvm_vcpu *vcpu)
+{
+   u32 eax, ebx, ecx, edx;
+
+   /*
+* Guest CET can work as long as HW supports the feature, independent
+* to Host SW enabling status.
+*/
+   cpuid_count(7, 0, , , , );
+
+   return ((ecx & bit(X86_FEATURE_SHSTK)) |
+   (edx & bit(X86_FEATURE_IBT))) ? 1 : 0;
+}
+
 static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
 {
switch (msr->index) {
@@ -5409,6 +5424,23 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned 
long cr4)
return 1;
}
 
+   /*
+* To enable Guest CET, check whether CPU CET feature is
+* available, if it's there, set Guest CET state loading bit
+* per CR4.CET status, otherwise, return a fault to Guest.
+*/
+   if (vmx_guest_cet_cap(vcpu)) {
+   if (cr4 & X86_CR4_CET) {
+   vmcs_set_bits(VM_ENTRY_CONTROLS,
+ VM_ENTRY_LOAD_GUEST_CET_STATE);
+   } else {
+   vmcs_clear_bits(VM_ENTRY_CONTROLS,
+   VM_ENTRY_LOAD_GUEST_CET_STATE);
+   }
+   } else if (cr4 & X86_CR4_CET) {
+   return 1;
+   }
+
if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
return 1;
 
-- 
2.17.1

[PATCH v3 7/8] KVM:X86: Add XSS bit 11 and 12 support for CET xsaves/xrstors.

2019-02-25 Thread Yang Weijiang

For Guest XSS, right now, only bit 11(user states) and bit 12
(supervisor states) are supported, if other bits are being set,
need to modify KVM_SUPPORTED_XSS macro to have support.

Signed-off-by: Zhang Yi Z 
Signed-off-by: Yang Weijiang 
---
 arch/x86/kvm/vmx.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d32cee9ee079..68908ed7b151 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -47,6 +47,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -4336,12 +4337,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
case MSR_IA32_XSS:
if (!vmx_xsaves_supported())
return 1;
+
/*
-* The only supported bit as of Skylake is bit 8, but
-* it is not supported on KVM.
+* Check bits being set are supported in KVM.
 */
-   if (data != 0)
+   if (data & ~kvm_supported_xss())
return 1;
+
vcpu->arch.ia32_xss = data;
if (vcpu->arch.ia32_xss != host_xss)
add_atomic_switch_msr(vmx, MSR_IA32_XSS,
-- 
2.17.1

Re: [PATCH v2 21/26] userfaultfd: wp: add the writeprotect API to userfaultfd ioctl

2019-02-25 Thread Peter Xu

On Mon, Feb 25, 2019 at 11:03:51PM +0200, Mike Rapoport wrote:
> On Tue, Feb 12, 2019 at 10:56:27AM +0800, Peter Xu wrote:
> > From: Andrea Arcangeli 
> > 
> > v1: From: Shaohua Li 
> > 
> > v2: cleanups, remove a branch.
> > 
> > [peterx writes up the commit message, as below...]
> > 
> > This patch introduces the new uffd-wp APIs for userspace.
> > 
> > Firstly, we'll allow to do UFFDIO_REGISTER with write protection
> > tracking using the new UFFDIO_REGISTER_MODE_WP flag.  Note that this
> > flag can co-exist with the existing UFFDIO_REGISTER_MODE_MISSING, in
> > which case the userspace program can not only resolve missing page
> > faults, and at the same time tracking page data changes along the way.
> > 
> > Secondly, we introduced the new UFFDIO_WRITEPROTECT API to do page
> > level write protection tracking.  Note that we will need to register
> > the memory region with UFFDIO_REGISTER_MODE_WP before that.
> > 
> > Signed-off-by: Andrea Arcangeli 
> > [peterx: remove useless block, write commit message, check against
> >  VM_MAYWRITE rather than VM_WRITE when register]
> > Signed-off-by: Peter Xu 
> > ---
> >  fs/userfaultfd.c | 82 +---
> >  include/uapi/linux/userfaultfd.h | 11 +
> >  2 files changed, 77 insertions(+), 16 deletions(-)
> > 
> > diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> > index 3092885c9d2c..81962d62520c 100644
> > --- a/fs/userfaultfd.c
> > +++ b/fs/userfaultfd.c
> > @@ -304,8 +304,11 @@ static inline bool userfaultfd_must_wait(struct 
> > userfaultfd_ctx *ctx,
> > if (!pmd_present(_pmd))
> > goto out;
> > 
> > -   if (pmd_trans_huge(_pmd))
> > +   if (pmd_trans_huge(_pmd)) {
> > +   if (!pmd_write(_pmd) && (reason & VM_UFFD_WP))
> > +   ret = true;
> > goto out;
> > +   }
> > 
> > /*
> >  * the pmd is stable (as in !pmd_trans_unstable) so we can re-read it
> > @@ -318,6 +321,8 @@ static inline bool userfaultfd_must_wait(struct 
> > userfaultfd_ctx *ctx,
> >  */
> > if (pte_none(*pte))
> > ret = true;
> > +   if (!pte_write(*pte) && (reason & VM_UFFD_WP))
> > +   ret = true;
> > pte_unmap(pte);
> > 
> >  out:
> > @@ -1251,10 +1256,13 @@ static __always_inline int validate_range(struct 
> > mm_struct *mm,
> > return 0;
> >  }
> > 
> > -static inline bool vma_can_userfault(struct vm_area_struct *vma)
> > +static inline bool vma_can_userfault(struct vm_area_struct *vma,
> > +unsigned long vm_flags)
> >  {
> > -   return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
> > -   vma_is_shmem(vma);
> > +   /* FIXME: add WP support to hugetlbfs and shmem */
> > +   return vma_is_anonymous(vma) ||
> > +   ((is_vm_hugetlb_page(vma) || vma_is_shmem(vma)) &&
> > +!(vm_flags & VM_UFFD_WP));
> >  }
> > 
> >  static int userfaultfd_register(struct userfaultfd_ctx *ctx,
> > @@ -1286,15 +1294,8 @@ static int userfaultfd_register(struct 
> > userfaultfd_ctx *ctx,
> > vm_flags = 0;
> > if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING)
> > vm_flags |= VM_UFFD_MISSING;
> > -   if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) {
> > +   if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP)
> > vm_flags |= VM_UFFD_WP;
> > -   /*
> > -* FIXME: remove the below error constraint by
> > -* implementing the wprotect tracking mode.
> > -*/
> > -   ret = -EINVAL;
> > -   goto out;
> > -   }
> > 
> > ret = validate_range(mm, uffdio_register.range.start,
> >  uffdio_register.range.len);
> > @@ -1342,7 +1343,7 @@ static int userfaultfd_register(struct 
> > userfaultfd_ctx *ctx,
> > 
> > /* check not compatible vmas */
> > ret = -EINVAL;
> > -   if (!vma_can_userfault(cur))
> > +   if (!vma_can_userfault(cur, vm_flags))
> > goto out_unlock;
> > 
> > /*
> > @@ -1370,6 +1371,8 @@ static int userfaultfd_register(struct 
> > userfaultfd_ctx *ctx,
> > if (end & (vma_hpagesize - 1))
> > goto out_unlock;
> > }
> > +   if ((vm_flags & VM_UFFD_WP) && !(cur->vm_flags & VM_MAYWRITE))
> > +   goto out_unlock;
> > 
> > /*
> >  * Check that this vma isn't already owned by a
> > @@ -1399,7 +1402,7 @@ static int userfaultfd_register(struct 
> > userfaultfd_ctx *ctx,
> > do {
> > cond_resched();
> > 
> > -   BUG_ON(!vma_can_userfault(vma));
> > +   BUG_ON(!vma_can_userfault(vma, vm_flags));
> > BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
> >vma->vm_userfaultfd_ctx.ctx != ctx);
> > WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
> > @@ -1534,7 +1537,7 @@ static int userfaultfd_unregister(struct 
> > userfaultfd_ctx *ctx,
> >  * provides for more

[PATCH v3 9/9] perf inject: enable COMPRESSED records decompression

2019-02-25 Thread Alexey Budankov



Initialized decompression API so COMPRESSED records would be
decompressed into the resulting output data file.

Signed-off-by: Alexey Budankov 
---
 tools/perf/builtin-inject.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 9bb1f35d5cb7..5a5bc4207766 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -839,6 +839,9 @@ int cmd_inject(int argc, const char **argv)
if (inject.session == NULL)
return -1;
 
+   if (zstd_init(&(inject.session->zstd_data), 0) < 0)
+   pr_warning("Decompression initialization failed.\n");
+
if (inject.build_ids) {
/*
 * to make sure the mmap records are ordered correctly
@@ -869,6 +872,7 @@ int cmd_inject(int argc, const char **argv)
ret = __cmd_inject();
 
 out_delete:
+   zstd_fini(&(inject.session->zstd_data));
perf_session__delete(inject.session);
return ret;
 }

[PATCH v3 8/9] perf report: implement record trace decompression

2019-02-25 Thread Alexey Budankov



Trace frames containing PERF_RECORD_COMPRESSED records are
decompressed using functions from zstd.c into a linked list
of mmaped memory regions of mmap_comp_len size (struct decomp).

After decompression of one COMPRESSED record its content is 
iterated and fetched for usual processing. The mmaped memory regions 
with decompressed events are kept till the tool process termination.

When dumping raw trace (e.g., perf report -D --header) file
offsets of events from compressed records are printed as zero.

Signed-off-by: Alexey Budankov 
---
 tools/perf/builtin-report.c |   5 +-
 tools/perf/util/compress.h  |   4 ++
 tools/perf/util/session.c   | 124 +++-
 tools/perf/util/session.h   |  10 +++
 tools/perf/util/tool.h  |   2 +
 tools/perf/util/zstd.c  |  48 ++
 6 files changed, 191 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2e8c74d6430c..5f4483b525ed 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1215,6 +1215,9 @@ int cmd_report(int argc, const char **argv)
if (session == NULL)
return -1;
 
+   if (zstd_init(&(session->zstd_data), 0) < 0)
+   pr_warning("Decompression initialization failed. Reported data 
may be incomplete.\n");
+
if (report.queue_size) {
ordered_events__set_alloc_size(>ordered_events,
   report.queue_size);
@@ -1427,7 +1430,7 @@ int cmd_report(int argc, const char **argv)
 
 error:
zfree(_range);
-
+   zstd_fini(&(session->zstd_data));
perf_session__delete(session);
return ret;
 }
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index e0987616db94..4f6672770ebb 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -20,6 +20,7 @@ bool lzma_is_compressed(const char *input);
 struct zstd_data {
 #ifdef HAVE_ZSTD_SUPPORT
ZSTD_CStream*cstream;
+   ZSTD_DStream*dstream;
 #endif
 };
 
@@ -30,4 +31,7 @@ size_t zstd_compress_stream_to_records(struct zstd_data *data,
void *dst, size_t dst_size, void *src, size_t src_size, size_t 
max_record_size,
size_t process_header(void *record, size_t increment));
 
+size_t zstd_decompress_stream(struct zstd_data *data,
+   void *src, size_t src_size, void *dst, size_t dst_size);
+
 #endif /* PERF_COMPRESS_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c764bbc91009..b1bf37c30461 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -29,6 +29,67 @@
 #include "stat.h"
 #include "arch/common.h"
 
+#ifdef HAVE_ZSTD_SUPPORT
+static int perf_session__process_compressed_event(struct perf_session *session,
+   union perf_event *event, u64 
file_offset)
+{
+   void *src;
+   size_t decomp_size, src_size;
+   u64 decomp_last_rem = 0;
+   size_t decomp_len = session->header.env.comp_mmap_len;
+   struct decomp *decomp, *decomp_last = session->decomp_last;
+
+   decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, 
PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+   if (decomp == MAP_FAILED) {
+   pr_err("Couldn't allocate memory for decompression\n");
+   return -1;
+   }
+
+   decomp->file_pos = file_offset;
+   decomp->head = 0;
+
+   if (decomp_last) {
+   decomp_last_rem = decomp_last->size - decomp_last->head;
+   memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), 
decomp_last_rem);
+   decomp->size = decomp_last_rem;
+   }
+
+   src = (void *)event + sizeof(struct compressed_event);
+   src_size = event->pack.header.size - sizeof(struct compressed_event);
+
+   decomp_size = zstd_decompress_stream(&(session->zstd_data), src, 
src_size,
+   &(decomp->data[decomp_last_rem]), decomp_len - 
decomp_last_rem);
+   if (!decomp_size) {
+   munmap(decomp, sizeof(struct decomp) + decomp_len);
+   pr_err("Couldn't decompress data\n");
+   return -1;
+   }
+
+   decomp->size += decomp_size;
+
+   if (session->decomp == NULL) {
+   session->decomp = decomp;
+   session->decomp_last = decomp;
+   } else {
+   session->decomp_last->next = decomp;
+   session->decomp_last = decomp;
+   }
+
+   pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size);
+
+   return 0;
+}
+#else /* !HAVE_ZSTD_SUPPORT */
+static int perf_session__process_compressed_event(struct perf_session *session 
__maybe_unused,
+   union perf_event *event __maybe_unused,
+   u64 file_offset __maybe_unused)
+{
+   dump_printf(": unhandled!\n");
+   return 0;
+}
+#endif
+
 static int

Re: [PATCH v2 23/26] userfaultfd: wp: don't wake up when doing write protect

2019-02-25 Thread Peter Xu

On Mon, Feb 25, 2019 at 11:09:35PM +0200, Mike Rapoport wrote:
> On Tue, Feb 12, 2019 at 10:56:29AM +0800, Peter Xu wrote:
> > It does not make sense to try to wake up any waiting thread when we're
> > write-protecting a memory region.  Only wake up when resolving a write
> > protected page fault.
> > 
> > Signed-off-by: Peter Xu 
> > ---
> >  fs/userfaultfd.c | 13 -
> >  1 file changed, 8 insertions(+), 5 deletions(-)
> > 
> > diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> > index 81962d62520c..f1f61a0278c2 100644
> > --- a/fs/userfaultfd.c
> > +++ b/fs/userfaultfd.c
> > @@ -1771,6 +1771,7 @@ static int userfaultfd_writeprotect(struct 
> > userfaultfd_ctx *ctx,
> > struct uffdio_writeprotect uffdio_wp;
> > struct uffdio_writeprotect __user *user_uffdio_wp;
> > struct userfaultfd_wake_range range;
> > +   bool mode_wp, mode_dontwake;
> > 
> > if (READ_ONCE(ctx->mmap_changing))
> > return -EAGAIN;
> > @@ -1789,18 +1790,20 @@ static int userfaultfd_writeprotect(struct 
> > userfaultfd_ctx *ctx,
> > if (uffdio_wp.mode & ~(UFFDIO_WRITEPROTECT_MODE_DONTWAKE |
> >UFFDIO_WRITEPROTECT_MODE_WP))
> > return -EINVAL;
> > -   if ((uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP) &&
> > -(uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_DONTWAKE))
> > +
> > +   mode_wp = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP;
> > +   mode_dontwake = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
> > +
> > +   if (mode_wp && mode_dontwake)
> > return -EINVAL;
> 
> This actually means the opposite of the commit message text ;-)
> 
> Is any dependency of _WP and _DONTWAKE needed at all?

So this is indeed confusing at least, because both you and Jerome have
asked the same question... :)

My understanding is that we don't have any reason to wake up any
thread when we are write-protecting a range, in that sense the flag
UFFDIO_WRITEPROTECT_MODE_DONTWAKE is already meaningless in the
UFFDIO_WRITEPROTECT ioctl context.  So before everything here's how
these flags are defined:

struct uffdio_writeprotect {
struct uffdio_range range;
/* !WP means undo writeprotect. DONTWAKE is valid only with !WP */
#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0)
#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE   ((__u64)1<<1)
__u64 mode;
};

To make it clear, we simply define it as "DONTWAKE is valid only with
!WP".  When with that, "mode_wp && mode_dontwake" is indeed a
meaningless flag combination.  Though please note that it does not
mean that the operation ("don't wake up the thread") is meaningless -
that's what we'll do no matter what when WP==1.  IMHO it's only about
the interface not the behavior.

I don't have a good way to make this clearer because firstly we'll
need the WP flag to mark whether we're protecting or unprotecting the
pages.  Later on, we need DONTWAKE for page fault handling case to
mark that we don't want to wake up the waiting thread now.  So both
the flags have their reason to stay so far.  Then with all these in
mind what I can think of is only to forbid using DONTWAKE in WP case,
and that's how above definition comes (I believe, because it was
defined that way even before I started to work on it and I think it
makes sense).

Thanks,

-- 
Peter Xu

Re: [PATCH v2 2/2] Drivers: hv: vmbus: Add a channel ring buffer mutex lock

2019-02-25 Thread Kimberly Brown

On Sun, Feb 24, 2019 at 04:53:03PM +, Michael Kelley wrote:
> From: Kimberly Brown  Sent: Thursday, February 21, 2019 
> 7:47 PM
> > 
> > The "_show" functions that access channel ring buffer data are
> > vulnerable to a race condition that can result in a NULL pointer
> > dereference. This problem was discussed here:
> > https://lkml.org/lkml/2018/10/18/779 
> >
> > To prevent this from occurring, add a new mutex lock,
> > "ring_buffer_mutex", to the vmbus_channel struct.
> > 
> > Acquire/release "ring_buffer_mutex" in the functions that can set the
> > ring buffer pointer to NULL: vmbus_free_ring() and __vmbus_open().
> > 
> > Acquire/release "ring_buffer_mutex" in the four channel-level "_show"
> > functions that access ring buffer data. Remove the "const" qualifier
> > from the "struct vmbus_channel *chan" parameter of the channel-level
> > "_show" functions so that "ring_buffer_mutex" can be acquired/released
> > in these functions.
> > 
> > Acquire/release "ring_buffer_mutex" in hv_ringbuffer_get_debuginfo().
> > Pass the channel pointer to hv_ringbuffer_get_debuginfo() so that
> > "ring_buffer_mutex" can be accessed in this function.
> > 
> > Signed-off-by: Kimberly Brown 
> 
> I've reviewed the code.  I believe it is correct and fixes the race
> condition.  Unfortunately, the code ended up being messier than I
> had hoped, and in particular, the need to pass the channel pointer
> into the ring buffer functions is distasteful.  An alternate idea is to
> put the new mutex into the hv_ring_buffer_info structure.  This results
> in two mutex's since there's a separate hv_ring_buffer_info structure for
> the "in" ring and the "out" ring.  But it makes the ring buffer functions
> more self-contained and able to operate without knowledge of the
> channel.   The mutex can be obtained in hv_ringbuffer_cleanup() instead
> of in the vmbus functions, and hv_ringbuffer_get_debuginfo() doesn't
> need the channel pointer.
> 
> The "const" still has to dropped from the channel pointer because
> the hv_ring_buffer_info structures are inline in the channel structure,
> but that's less objectionable.   The extra memory for two mutex's isn't
> really a problem, and none of the code paths are performance
> sensitive.
> 
> It's a tradeoff.  I think I slightly prefer moving the mutex to the
> hv_ring_buffer_info structure, but could also be persuaded to
> take it like it is.
> 

Thanks for the feedback! I don't have a compelling reason to keep the
lock in the vmbus_channel struct. I chose this approach because only one
lock would be required, rather than two. But, as you noted, using one
lock requires some tradeoffs.

I've looked through the changes that would be required to use two locks,
and I agree with you; I prefer using two locks. I'll submit a v3 for this
patch.

Thanks,
Kim



> Thoughts?
> 
> Michael
>

[PATCH 10/14] powercap/intel_rapl: update rapl domain name and debug messages

2019-02-25 Thread Len Brown

From: Zhang Rui 

The RAPL domain "name" attribute contains "Package-N",
which is ambiguous on multi-die per-package systems.

Update the name to "package-X-die-Y" on those systems.

No change on systems without multi-die.

Driver debug messages are also updated.

Signed-off-by: Zhang Rui 
Signed-off-by: Len Brown 
Acked-by: Rafael J. Wysocki 
Cc: linux...@vger.kernel.org
---
 drivers/powercap/intel_rapl.c | 57 ---
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 8723e9ae7436..47719c995f61 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -178,12 +178,15 @@ struct rapl_domain {
 #define power_zone_to_rapl_domain(_zone) \
container_of(_zone, struct rapl_domain, power_zone)
 
+/* maximum rapl package domain name: package-%d-die-%d */
+#define PACKAGE_DOMAIN_NAME_LENGTH 30
 
-/* Each physical package contains multiple domains, these are the common
+
+/* Each rapl package contains multiple domains, these are the common
  * data across RAPL domains within a package.
  */
 struct rapl_package {
-   unsigned int id; /* physical package/socket id */
+   unsigned int id; /* logical die id, equals physical 1-die systems */
unsigned int nr_domains;
unsigned long domain_map; /* bit map of active domains */
unsigned int power_unit;
@@ -198,6 +201,7 @@ struct rapl_package {
int lead_cpu; /* one active cpu per package for access */
/* Track active cpus */
struct cpumask cpumask;
+   char name[PACKAGE_DOMAIN_NAME_LENGTH];
 };
 
 struct rapl_defaults {
@@ -926,8 +930,8 @@ static int rapl_check_unit_core(struct rapl_package *rp, 
int cpu)
value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
rp->time_unit = 100 / (1 << value);
 
-   pr_debug("Core CPU package %d energy=%dpJ, time=%dus, power=%duW\n",
-   rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
+   pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n",
+   rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
 
return 0;
 }
@@ -951,8 +955,8 @@ static int rapl_check_unit_atom(struct rapl_package *rp, 
int cpu)
value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
rp->time_unit = 100 / (1 << value);
 
-   pr_debug("Atom package %d energy=%dpJ, time=%dus, power=%duW\n",
-   rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
+   pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n",
+   rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
 
return 0;
 }
@@ -1179,7 +1183,7 @@ static void rapl_update_domain_data(struct rapl_package 
*rp)
u64 val;
 
for (dmn = 0; dmn < rp->nr_domains; dmn++) {
-   pr_debug("update package %d domain %s data\n", rp->id,
+   pr_debug("update %s domain %s data\n", rp->name,
 rp->domains[dmn].name);
/* exclude non-raw primitives */
for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) {
@@ -1204,7 +1208,6 @@ static void rapl_unregister_powercap(void)
 static int rapl_package_register_powercap(struct rapl_package *rp)
 {
struct rapl_domain *rd;
-   char dev_name[17]; /* max domain name = 7 + 1 + 8 for int + 1 for null*/
struct powercap_zone *power_zone = NULL;
int nr_pl, ret;
 
@@ -1215,20 +1218,16 @@ static int rapl_package_register_powercap(struct 
rapl_package *rp)
for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
if (rd->id == RAPL_DOMAIN_PACKAGE) {
nr_pl = find_nr_power_limit(rd);
-   pr_debug("register socket %d package domain %s\n",
-   rp->id, rd->name);
-   memset(dev_name, 0, sizeof(dev_name));
-   snprintf(dev_name, sizeof(dev_name), "%s-%d",
-   rd->name, rp->id);
+   pr_debug("register package domain %s\n", rp->name);
power_zone = powercap_register_zone(>power_zone,
control_type,
-   dev_name, NULL,
+   rp->name, NULL,
_ops[rd->id],
nr_pl,
_ops);
if (IS_ERR(power_zone)) {
-   pr_debug("failed to register package, %d\n",
-   rp->id);
+   pr_debug("failed to register power zone %s\n",
+   rp->name);
return PTR_ERR(power_zone);

[PATCH v3 7/9] perf record: implement -z,--compression_level=n option and compression

2019-02-25 Thread Alexey Budankov



Implemented -z,--compression_level=n option that enables compression
of mmaped kernel data buffers content in runtime during perf record
sampling collection.

Compression is implemented using the functions from zstd.c. As the
memory to operate on the compression employs mmap->data buffer in case
of serial trace writing and mmap AIO buffers in case of AIO trace
writing. If Zstd streaming compression API fails for some reason the
data to be compressed are just copied into the memory buffers using
memcpy().

Compressed trace frame consists of an array of PERF_RECORD_COMPRESSED
records. Each element of the array is not longer that 64KiB because of
u16 size limitation and comprised of perf_event_header followed by the
compressed chunk that is decompressed on the loading stage. --mmap-flush
option value can be used to avoid compression of every single byte of
data and possibly increase compression ratio.

Compression overhead has been measured for serial and AIO trace writing
when profiling matrix multiplication workload:

-
| SERIAL  | AIO-1   |
-
|-z | OVH(x) | ratio(x) size(MiB) | OVH(x) | ratio(x) size(MiB) |
|
| 0 | 1,00   | 1,000179,424   | 1,00   | 1,000187,527   |
| 1 | 1,04   | 8,427181,148   | 1,01   | 8,474188,562   |
| 2 | 1,07   | 8,055186,953   | 1,03   | 7,912191,773   |
| 3 | 1,04   | 8,283181,908   | 1,03   | 8,220191,078   |
| 5 | 1,09   | 8,101187,705   | 1,05   | 7,780190,065   |
| 8 | 1,05   | 9,217179,191   | 1,12   | 6,111193,024   |
-

OVH = (Execution time with -z N) / (Execution time with -z 0)

ratio - compression ratio
size  - number of bytes that was compressed

size ~= trace size x ratio

Signed-off-by: Alexey Budankov 
---
 tools/perf/Documentation/perf-record.txt |  5 ++
 tools/perf/builtin-record.c  | 87 
 tools/perf/util/mmap.c   | 31 ++---
 tools/perf/util/mmap.h   | 13 ++--
 tools/perf/util/session.h|  2 +
 5 files changed, 110 insertions(+), 28 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 8276d6517812..28c62a914c75 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -464,6 +464,11 @@ Set affinity mask of trace reading thread according to the 
policy defined by 'mo
 Minimal number of bytes accumulated in mmaped kernel buffer that is flushed to 
a storage (default: 1).
 Maximal allowed value is a quater of mmaped kernel buffer size.
 
+-z::
+--compression-level=n::
+Produce compressed trace using specified level n to save storage space (no 
compression: 0 - default,
+fastest compression: 1, smallest trace: 22)
+
 --all-kernel::
 Configure all used events to run in kernel space.
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 71c67a87c713..fa50387334f2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -237,7 +237,7 @@ static int record__aio_sync(struct perf_mmap *md, bool 
sync_all)
} while (1);
 }
 
-static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t 
size, off_t off)
+static int record__aio_pushfn(void *to, void *bf, size_t size, off_t off, 
struct aiocb *cblock)
 {
struct record *rec = to;
int ret, trace_fd = rec->session->data->file.fd;
@@ -264,13 +264,15 @@ static void record__aio_set_pos(int trace_fd, off_t pos)
lseek(trace_fd, pos, SEEK_SET);
 }
 
+static int record__aio_enabled(struct record *rec);
+
 static void record__aio_mmap_read_sync(struct record *rec)
 {
int i;
struct perf_evlist *evlist = rec->evlist;
struct perf_mmap *maps = evlist->mmap;
 
-   if (!rec->opts.nr_cblocks)
+   if (!record__aio_enabled(rec))
return;
 
for (i = 0; i < evlist->nr_mmaps; i++) {
@@ -292,25 +294,28 @@ static int record__aio_parse(const struct option *opt,
 
if (unset) {
opts->nr_cblocks = 0;
-   } else {
-   if (str)
-   opts->nr_cblocks = strtol(str, NULL, 0);
-   if (!opts->nr_cblocks)
-   opts->nr_cblocks = nr_cblocks_default;
+   return 0;
}
 
+   if (str)
+   opts->nr_cblocks = strtol(str, NULL, 0);
+   if (!opts->nr_cblocks)
+   opts->nr_cblocks = nr_cblocks_default;
+
+   if (opts->nr_cblocks > nr_cblocks_max)
+   opts->nr_cblocks = nr_cblocks_max;
+
return 0;
 }
 #else /* HAVE_AIO_SUPPORT */

[PATCH 0/14] v2 multi-die/package topology support

2019-02-25 Thread Len Brown

This patch series does 4 things.

1. Parses the new CPUID.1F leaf to discover multi-die/package topology

2. Export multi-die topology inside the kernel

3. Update 3 places (coretemp, pkgtemp, rapl) that that need to know
   the difference between die and package-scope MSR.

   (Note: Kan Liang has a patch series on top of this one to similarly
   make the uncore perf code multi-die/package aware.)

4. Export multi-die topology to user-space via sysfs

These changes should have 0 impact on cache topology,
NUMA topology, Linux scheduler, or system performance.

These topology changes primarily impact parts of the kernel
and some applciations that care about package MSR scope.
Also, some software is licensed per package, and other tools,
such as benchmark reporting software sometimes cares about packages.

Changes since v1:

Responded to all syntax and style feedback.

Fixed a bug in the CPUID.1F parsing code
we were parsing the leaf properly, but in some configs
we were not updating the maps correctly

topology_logical_die_id() replaces topology_unique_die_id()

Suggested by Kan, who's uncore code uses
topology_logical_package_id().

Restored sysfs core_siblings, core_siblings_list

v1 proposed re-defining this existing attribute to
be the threads in a die, rather than in a package.

For compatibility, decided rather to keep this
attribute unchanged, for now, even though
its name makes little sense, and it makes
no sense in a multi-die system.

Added sysfs package_threads, package_threads_list

Added this attribute to show threads siblings in a package.
Exactly same as "core_siblings above", a name now deprecated.
This attribute name and definition is immune to future
topology changes.

Suggested by Brice.

Added sysfs die_threads, die_threads_list

Added this attribute to show which threads siblings in a die.
V1 had proposed putting this info into "core_siblings", but we
decided to leave that legacy attribute alone.
This attribute name and definition is immune to future
topology changes.

On a single die-package system this attribute has same contents
as "package_threads".

Suggested by Brice.

Added sysfs core_threads, core_threads_list

Added this attribute to show which threads siblings in a core.
Exactly same as "thread_siblings", a name now deprecated.
This attribute name and definition is immune to future
topology changes.

Suggested by Brice.


For compatibility, sysfs cpuX/topology core_siblings
and core_siblings_list are unchanged.  They retain
their legacy defintion of listing which CPUs share
the same package.

Patch Summary:

Unchanged:

[PATCH 01/14] x86 topology: Fix doc typo
[PATCH 02/14] topolgy: Simplify cputopology.txt formatting and
[PATCH 03/14] x86 smpboot: Rename match_die() to match_pkg()
[PATCH 05/14] cpu topology: Export die_id
[PATCH 07/14] powercap/intel_rapl: Simplify rapl_find_package()
[PATCH 10/14] powercap/intel_rapl: update rapl domain name and debug

Bug Fixed:

[PATCH 04/14] x86 topology: Add CPUID.1F multi-die/package support

New since v1:

[PATCH 06/14] x86 topology: Define topology_logical_die_id()
[PATCH 12/14] topology: Create package_threads sysfs attribute
[PATCH 13/14] topology: Create core_threads sysfs attribute
[PATCH 14/14] topology: Create die_threads sysfs attribute

Updated (to use logical_die_id()):

[PATCH 08/14] powercap/intel_rapl: Support multi-die/package
[PATCH 09/14] thermal/x86_pkg_temp_thermal: Support multi-die/package
[PATCH 11/14] hwmon/coretemp: Support multi-die/package



 Documentation/cputopology.txt| 72 ++-
 Documentation/x86/topology.txt   |  6 +-
 arch/x86/include/asm/processor.h |  5 +-
 arch/x86/include/asm/smp.h   |  1 +
 arch/x86/include/asm/topology.h  |  5 ++
 arch/x86/kernel/cpu/topology.c   | 85 +---
 arch/x86/kernel/smpboot.c| 73 +++-
 arch/x86/xen/smp_pv.c|  1 +
 drivers/base/topology.c  | 22 +++
 drivers/hwmon/coretemp.c |  9 +--
 drivers/powercap/intel_rapl.c| 75 +---
 drivers/thermal/intel/x86_pkg_temp_thermal.c |  9 +--
 include/linux/topology.h |  6 ++
 13 files changed, 276 insertions(+), 93 deletions(-)

These patches are also available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git x86

[PATCH 02/14] topolgy: Simplify cputopology.txt formatting and wording

2019-02-25 Thread Len Brown

From: Len Brown 

Syntax only, no functional or semantic change.

Signed-off-by: Len Brown 
Cc: linux-...@vger.kernel.org
---
 Documentation/cputopology.txt | 46 +--
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index c6e7e9196a8b..cb61277e2308 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -3,79 +3,79 @@ How CPU topology info is exported via sysfs
 ===
 
 Export CPU topology info via sysfs. Items (attributes) are similar
-to /proc/cpuinfo output of some architectures:
+to /proc/cpuinfo output of some architectures.  They reside in
+/sys/devices/system/cpu/cpuX/topology/:
 
-1) /sys/devices/system/cpu/cpuX/topology/physical_package_id:
+physical_package_id:
 
physical package id of cpuX. Typically corresponds to a physical
socket number, but the actual value is architecture and platform
dependent.
 
-2) /sys/devices/system/cpu/cpuX/topology/core_id:
+core_id:
 
the CPU core ID of cpuX. Typically it is the hardware platform's
identifier (rather than the kernel's).  The actual value is
architecture and platform dependent.
 
-3) /sys/devices/system/cpu/cpuX/topology/book_id:
+book_id:
 
the book ID of cpuX. Typically it is the hardware platform's
identifier (rather than the kernel's).  The actual value is
architecture and platform dependent.
 
-4) /sys/devices/system/cpu/cpuX/topology/drawer_id:
+drawer_id:
 
the drawer ID of cpuX. Typically it is the hardware platform's
identifier (rather than the kernel's).  The actual value is
architecture and platform dependent.
 
-5) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
+thread_siblings:
 
internal kernel map of cpuX's hardware threads within the same
core as cpuX.
 
-6) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list:
+thread_siblings_list:
 
human-readable list of cpuX's hardware threads within the same
core as cpuX.
 
-7) /sys/devices/system/cpu/cpuX/topology/core_siblings:
+core_siblings:
 
internal kernel map of cpuX's hardware threads within the same
physical_package_id.
 
-8) /sys/devices/system/cpu/cpuX/topology/core_siblings_list:
+core_siblings_list:
 
human-readable list of cpuX's hardware threads within the same
physical_package_id.
 
-9) /sys/devices/system/cpu/cpuX/topology/book_siblings:
+book_siblings:
 
internal kernel map of cpuX's hardware threads within the same
book_id.
 
-10) /sys/devices/system/cpu/cpuX/topology/book_siblings_list:
+book_siblings_list:
 
human-readable list of cpuX's hardware threads within the same
book_id.
 
-11) /sys/devices/system/cpu/cpuX/topology/drawer_siblings:
+drawer_siblings:
 
internal kernel map of cpuX's hardware threads within the same
drawer_id.
 
-12) /sys/devices/system/cpu/cpuX/topology/drawer_siblings_list:
+drawer_siblings_list:
 
human-readable list of cpuX's hardware threads within the same
drawer_id.
 
-To implement it in an architecture-neutral way, a new source file,
-drivers/base/topology.c, is to export the 6 to 12 attributes. The book
-and drawer related sysfs files will only be created if CONFIG_SCHED_BOOK
-and CONFIG_SCHED_DRAWER are selected.
+Architecture-neutral, drivers/base/topology.c, exports these attributes.
+However, the book and drawer related sysfs files will only be created if
+CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are selected, respectively.
 
-CONFIG_SCHED_BOOK and CONFIG_DRAWER are currently only used on s390, where
-they reflect the cpu and cache hierarchy.
+CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are currently only used on s390,
+where they reflect the cpu and cache hierarchy.
 
 For an architecture to support this feature, it must define some of
 these macros in include/asm-XXX/topology.h::
@@ -98,10 +98,10 @@ To be consistent on all architectures, 
include/linux/topology.h
 provides default definitions for any of the above macros that are
 not defined by include/asm-XXX/topology.h:
 
-1) physical_package_id: -1
-2) core_id: 0
-3) sibling_cpumask: just the given CPU
-4) core_cpumask: just the given CPU
+1) topology_physical_package_id: -1
+2) topology_core_id: 0
+3) topology_sibling_cpumask: just the given CPU
+4) topology_core_cpumask: just the given CPU
 
 For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
 default definitions for topology_book_id() and topology_book_cpumask().
-- 
2.18.0-rc0

[PATCH 01/14] x86 topology: Fix doc typo

2019-02-25 Thread Len Brown

From: Len Brown 

Syntax only, no functional or semantic change.

reflect actual cpuinfo_x86 field name:

s/logical_id/logical_proc_id/

Signed-off-by: Len Brown 
Cc: linux-...@vger.kernel.org
---
 Documentation/x86/topology.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt
index 2953e3ec9a02..06b3cdbc4048 100644
--- a/Documentation/x86/topology.txt
+++ b/Documentation/x86/topology.txt
@@ -51,7 +51,7 @@ The topology of a system is described in the units of:
 The physical ID of the package. This information is retrieved via CPUID
 and deduced from the APIC IDs of the cores in the package.
 
-  - cpuinfo_x86.logical_id:
+  - cpuinfo_x86.logical_proc_id:
 
 The logical ID of the package. As we do not trust BIOSes to enumerate the
 packages in a consistent way, we introduced the concept of logical package
-- 
2.18.0-rc0

Re: [PATCH v4] mm/hugetlb: Fix unsigned overflow in __nr_hugepages_store_common()

2019-02-25 Thread David Rientjes

On Tue, 26 Feb 2019, Jing Xiangfeng wrote:

> On 2019/2/26 3:17, David Rientjes wrote:
> > On Mon, 25 Feb 2019, Mike Kravetz wrote:
> > 
> >> Ok, what about just moving the calculation/check inside the lock as in the
> >> untested patch below?
> >>
> >> Signed-off-by: Mike Kravetz 
> >> ---
> >>  mm/hugetlb.c | 34 ++
> >>  1 file changed, 26 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> >> index 1c5219193b9e..5afa77dc7bc8 100644
> >> --- a/mm/hugetlb.c
> >> +++ b/mm/hugetlb.c
> >> @@ -2274,7 +2274,7 @@ static int adjust_pool_surplus(struct hstate *h,
> >> nodemask_t *nodes_allowed,
> >>  }
> >>
> >>  #define persistent_huge_pages(h) (h->nr_huge_pages - 
> >> h->surplus_huge_pages)
> >> -static int set_max_huge_pages(struct hstate *h, unsigned long count,
> >> +static int set_max_huge_pages(struct hstate *h, unsigned long count, int 
> >> nid,
> >>nodemask_t *nodes_allowed)
> >>  {
> >>unsigned long min_count, ret;
> >> @@ -2289,6 +2289,23 @@ static int set_max_huge_pages(struct hstate *h, 
> >> unsigned
> >> long count,
> >>goto decrease_pool;
> >>}
> >>
> >> +  spin_lock(_lock);
> >> +
> >> +  /*
> >> +   * Check for a node specific request.  Adjust global count, but
> >> +   * restrict alloc/free to the specified node.
> >> +   */
> >> +  if (nid != NUMA_NO_NODE) {
> >> +  unsigned long old_count = count;
> >> +  count += h->nr_huge_pages - h->nr_huge_pages_node[nid];
> >> +  /*
> >> +   * If user specified count causes overflow, set to
> >> +   * largest possible value.
> >> +   */
> >> +  if (count < old_count)
> >> +  count = ULONG_MAX;
> >> +  }
> >> +
> >>/*
> >> * Increase the pool size
> >> * First take pages out of surplus state.  Then make up the
> >> @@ -2300,7 +2317,6 @@ static int set_max_huge_pages(struct hstate *h, 
> >> unsigned
> >> long count,
> >> * pool might be one hugepage larger than it needs to be, but
> >> * within all the constraints specified by the sysctls.
> >> */
> >> -  spin_lock(_lock);
> >>while (h->surplus_huge_pages && count > persistent_huge_pages(h)) {
> >>if (!adjust_pool_surplus(h, nodes_allowed, -1))
> >>break;
> >> @@ -2421,16 +2437,18 @@ static ssize_t __nr_hugepages_store_common(bool
> >> obey_mempolicy,
> >>nodes_allowed = _states[N_MEMORY];
> >>}
> >>} else if (nodes_allowed) {
> >> +  /* Node specific request */
> >> +  init_nodemask_of_node(nodes_allowed, nid);
> >> +  } else {
> >>/*
> >> -   * per node hstate attribute: adjust count to global,
> >> -   * but restrict alloc/free to the specified node.
> >> +   * Node specific request, but we could not allocate
> >> +   * node mask.  Pass in ALL nodes, and clear nid.
> >> */
> >> -  count += h->nr_huge_pages - h->nr_huge_pages_node[nid];
> >> -  init_nodemask_of_node(nodes_allowed, nid);
> >> -  } else
> >> +  nid = NUMA_NO_NODE;
> >>nodes_allowed = _states[N_MEMORY];
> >> +  }
> >>
> >> -  err = set_max_huge_pages(h, count, nodes_allowed);
> >> +  err = set_max_huge_pages(h, count, nid, nodes_allowed);
> >>if (err)
> >>goto out;
> >>
> > 
> > Looks good; Jing, could you test that this fixes your case?
> 
> Yes, I have tested this patch, it can also fix my case.

Great!

Reported-by: Jing Xiangfeng 
Tested-by: Jing Xiangfeng 
Acked-by: David Rientjes

[PATCH 08/14] powercap/intel_rapl: Support multi-die/package

2019-02-25 Thread Len Brown

From: Zhang Rui 

On the new dual-die/package systems, the RAPL MSR becomes die-scope.
Thus instead of one powercap device per physical package, now there
should be one powercap device for each unique die on these systems.

This patch introduces intel_rapl driver support for new
dual-die/package systems.

On the hardwares that do not have multi-die, topology_logical_die_id()
equals topology_physical_package_id(), thus there is no functional change.

Signed-off-by: Zhang Rui 
Signed-off-by: Len Brown 
Acked-by: Rafael J. Wysocki 
Cc: linux...@vger.kernel.org
---
 drivers/powercap/intel_rapl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 6057d9695fed..8723e9ae7436 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -266,7 +266,7 @@ static struct rapl_domain *platform_rapl_domain; /* 
Platform (PSys) domain */
 /* caller to ensure CPU hotplug lock is held */
 static struct rapl_package *rapl_find_package(int cpu)
 {
-   int id = topology_physical_package_id(cpu);
+   int id = topology_logical_die_id(cpu);
struct rapl_package *rp;
 
list_for_each_entry(rp, _packages, plist) {
@@ -1457,7 +1457,7 @@ static void rapl_remove_package(struct rapl_package *rp)
 /* called from CPU hotplug notifier, hotplug lock held */
 static struct rapl_package *rapl_add_package(int cpu)
 {
-   int id = topology_physical_package_id(cpu);
+   int id = topology_logical_die_id(cpu);
struct rapl_package *rp;
int ret;
 
-- 
2.18.0-rc0

[PATCH 11/14] hwmon/coretemp: Support multi-die/package

2019-02-25 Thread Len Brown

From: Zhang Rui 

This patch introduces coretemp driver support
for new dual-die/package systems.

On the new dual-die/package systems, the package temperature MSRs becomes
die-scope. Thus instead of one hwmon device per physical package, now
there should be one hwmon device for each die on these systems.

On the hardwares that do not have multi-die support,
topology_logical_die_id() equals topology_physical_package_id(), thus the
only difference is that physical package id is used as the coretemp
platform device id, instead of logical package id on these systems.

Signed-off-by: Zhang Rui 
Signed-off-by: Len Brown 
Acked-by: Guenter Roeck 
Cc: linux...@vger.kernel.org
Cc: linux-hw...@vger.kernel.org
---
 drivers/hwmon/coretemp.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 5d34f7271e67..57f348d43819 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -435,7 +435,7 @@ static int chk_ucode_version(unsigned int cpu)
 
 static struct platform_device *coretemp_get_pdev(unsigned int cpu)
 {
-   int pkgid = topology_logical_package_id(cpu);
+   int pkgid = topology_logical_die_id(cpu);
 
if (pkgid >= 0 && pkgid < max_packages)
return pkg_devices[pkgid];
@@ -579,7 +579,7 @@ static struct platform_driver coretemp_driver = {
 
 static struct platform_device *coretemp_device_add(unsigned int cpu)
 {
-   int err, pkgid = topology_logical_package_id(cpu);
+   int err, pkgid = topology_logical_die_id(cpu);
struct platform_device *pdev;
 
if (pkgid < 0)
@@ -703,7 +703,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
 * the rest.
 */
if (cpumask_empty(>cpumask)) {
-   pkg_devices[topology_logical_package_id(cpu)] = NULL;
+   pkg_devices[topology_logical_die_id(cpu)] = NULL;
platform_device_unregister(pdev);
return 0;
}
@@ -732,6 +732,7 @@ static enum cpuhp_state coretemp_hp_online;
 static int __init coretemp_init(void)
 {
int err;
+   struct cpuinfo_x86 *c = _data(0);
 
/*
 * CPUID.06H.EAX[0] indicates whether the CPU has thermal
@@ -741,7 +742,7 @@ static int __init coretemp_init(void)
if (!x86_match_cpu(coretemp_ids))
return -ENODEV;
 
-   max_packages = topology_max_packages();
+   max_packages = topology_max_packages() * c->x86_max_dies;
pkg_devices = kcalloc(max_packages, sizeof(struct platform_device *),
  GFP_KERNEL);
if (!pkg_devices)
-- 
2.18.0-rc0

[PATCH 06/14] x86 topology: Define topology_logical_die_id()

2019-02-25 Thread Len Brown

From: Len Brown 

Define topology_logical_die_id() ala
existing topology_logical_package_id()

Signed-off-by: Len Brown 
---
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/include/asm/topology.h  |  3 +++
 arch/x86/kernel/smpboot.c| 43 
 3 files changed, 47 insertions(+)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f2856fe03715..ee34ff34889d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -119,6 +119,7 @@ struct cpuinfo_x86 {
/* Core id: */
u16 cpu_core_id;
u16 cpu_die_id;
+   u16 logical_die_id;
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 281be6bbc80d..88578f10ae22 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -106,6 +106,7 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
 
 #define topology_logical_package_id(cpu)   (cpu_data(cpu).logical_proc_id)
 #define topology_physical_package_id(cpu)  (cpu_data(cpu).phys_proc_id)
+#define topology_logical_die_id(cpu)   (cpu_data(cpu).logical_die_id)
 #define topology_die_id(cpu)   (cpu_data(cpu).cpu_die_id)
 #define topology_core_id(cpu)  (cpu_data(cpu).cpu_core_id)
 
@@ -125,6 +126,7 @@ static inline int topology_max_smt_threads(void)
 
 int topology_update_package_map(unsigned int apicid, unsigned int cpu);
 int topology_phys_to_logical_pkg(unsigned int pkg);
+int topology_phys_to_logical_die(unsigned int die);
 bool topology_is_primary_thread(unsigned int cpu);
 bool topology_smt_supported(void);
 #else
@@ -132,6 +134,7 @@ bool topology_smt_supported(void);
 static inline int
 topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; 
}
 static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_phys_to_logical_die(unsigned int die) { return 0; }
 static inline int topology_max_smt_threads(void) { return 1; }
 static inline bool topology_is_primary_thread(unsigned int cpu) { return true; 
}
 static inline bool topology_smt_supported(void) { return false; }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c70e547b18c2..e332d5e59652 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -100,6 +100,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 unsigned int __max_logical_packages __read_mostly;
 EXPORT_SYMBOL(__max_logical_packages);
 static unsigned int logical_packages __read_mostly;
+static unsigned int logical_die __read_mostly;
 
 /* Maximum number of SMT threads on any online core */
 int __read_mostly __max_smt_threads = 1;
@@ -306,6 +307,24 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg)
return -1;
 }
 EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+/**
+ * topology_phys_to_logical_die - Map a physical die id to logical
+ *
+ * Returns logical die id or -1 if not found
+ */
+int topology_phys_to_logical_die(unsigned int die_id)
+{
+   int cpu;
+
+   for_each_possible_cpu(cpu) {
+   struct cpuinfo_x86 *c = _data(cpu);
+
+   if (c->initialized && c->cpu_die_id == die_id)
+   return c->logical_proc_id;
+   }
+   return -1;
+}
+EXPORT_SYMBOL(topology_phys_to_logical_die);
 
 /**
  * topology_update_package_map - Update the physical to logical package map
@@ -330,6 +349,29 @@ int topology_update_package_map(unsigned int pkg, unsigned 
int cpu)
cpu_data(cpu).logical_proc_id = new;
return 0;
 }
+/**
+ * topology_update_die_map - Update the physical to logical die map
+ * @die:   The die id as retrieved via CPUID
+ * @cpu:   The cpu for which this is updated
+ */
+int topology_update_die_map(unsigned int die, unsigned int cpu)
+{
+   int new;
+
+   /* Already available somewhere? */
+   new = topology_phys_to_logical_pkg(die);
+   if (new >= 0)
+   goto found;
+
+   new = logical_die++;
+   if (new != die) {
+   pr_info("CPU %u Converting physical %u to logical die %u\n",
+   cpu, die, new);
+   }
+found:
+   cpu_data(cpu).logical_die_id = new;
+   return 0;
+}
 
 void __init smp_store_boot_cpu_info(void)
 {
@@ -339,6 +381,7 @@ void __init smp_store_boot_cpu_info(void)
*c = boot_cpu_data;
c->cpu_index = id;
topology_update_package_map(c->phys_proc_id, id);
+   topology_update_die_map(c->cpu_die_id, id);
c->initialized = true;
 }
 
-- 
2.18.0-rc0

[PATCH 12/14] topology: Create package_threads sysfs attribute

2019-02-25 Thread Len Brown

From: Len Brown 

The sysfs cpu/topology/core_siblings (and core_siblings_list)
attributes are documented, implemented, and used by programs to
represent set of logical CPU threads sharing the same package.

This makes sense if the next topology level above a core
is always a package.  But on systems where there is a die
topology level between a core and a package, the name
no longer makese sense.

So without changing its function, add a name for this map
that describes what it actually is -- package threads --
the set of logical CPU threads that share the same package.

This new name will be immune to changes in topology, since
it describes threads at the current level, not siblings
at a contained level.

Signed-off-by: Len Brown 
Suggested-by: Brice Goglin 
---
 Documentation/cputopology.txt | 8 
 drivers/base/topology.c   | 6 ++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index 4e6be7f68fd8..2794dbe8e559 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -46,15 +46,15 @@ thread_siblings_list:
human-readable list of cpuX's hardware threads within the same
core as cpuX.
 
-core_siblings:
+package_threads:
 
internal kernel map of cpuX's hardware threads within the same
-   physical_package_id.
+   physical_package_id. (deprecated name: "core_siblings")
 
-core_siblings_list:
+package_threads_list:
 
human-readable list of cpuX's hardware threads within the same
-   physical_package_id.
+   physical_package_id. (deprecated name: "core_siblings_list")
 
 book_siblings:
 
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 50352cf96f85..5f4405a08c6e 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -57,6 +57,10 @@ define_siblings_show_func(core_siblings, core_cpumask);
 static DEVICE_ATTR_RO(core_siblings);
 static DEVICE_ATTR_RO(core_siblings_list);
 
+define_siblings_show_func(package_threads, core_cpumask);
+static DEVICE_ATTR_RO(package_threads);
+static DEVICE_ATTR_RO(package_threads_list);
+
 #ifdef CONFIG_SCHED_BOOK
 define_id_show_func(book_id);
 static DEVICE_ATTR_RO(book_id);
@@ -81,6 +85,8 @@ static struct attribute *default_attrs[] = {
_attr_thread_siblings_list.attr,
_attr_core_siblings.attr,
_attr_core_siblings_list.attr,
+   _attr_package_threads.attr,
+   _attr_package_threads_list.attr,
 #ifdef CONFIG_SCHED_BOOK
_attr_book_id.attr,
_attr_book_siblings.attr,
-- 
2.18.0-rc0

[PATCH 09/14] thermal/x86_pkg_temp_thermal: Support multi-die/package

2019-02-25 Thread Len Brown

From: Zhang Rui 

On the new dual-die/package systems, the package temperature MSR becomes
die-scope. Thus instead of one thermal zone device per physical package,
now there should be one thermal_zone for each die on these systems.

This patch introduces x86_pkg_temp_thermal support for new
dual-die/package systems.

On the hardwares that do not have multi-die, topology_logical_die_id()
equals topology_physical_package_id(), thus there is no functional change.

Signed-off-by: Zhang Rui 
Signed-off-by: Len Brown 
---
 drivers/thermal/intel/x86_pkg_temp_thermal.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c 
b/drivers/thermal/intel/x86_pkg_temp_thermal.c
index 1ef937d799e4..1b03ab3ee20c 100644
--- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
@@ -122,7 +122,7 @@ static int pkg_temp_debugfs_init(void)
  */
 static struct pkg_device *pkg_temp_thermal_get_dev(unsigned int cpu)
 {
-   int pkgid = topology_logical_package_id(cpu);
+   int pkgid = topology_logical_die_id(cpu);
 
if (pkgid >= 0 && pkgid < max_packages)
return packages[pkgid];
@@ -353,7 +353,7 @@ static int pkg_thermal_notify(u64 msr_val)
 
 static int pkg_temp_thermal_device_add(unsigned int cpu)
 {
-   int pkgid = topology_logical_package_id(cpu);
+   int pkgid = topology_logical_die_id(cpu);
u32 tj_max, eax, ebx, ecx, edx;
struct pkg_device *pkgdev;
int thres_count, err;
@@ -449,7 +449,7 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
 * worker will see the package anymore.
 */
if (lastcpu) {
-   packages[topology_logical_package_id(cpu)] = NULL;
+   packages[topology_logical_die_id(cpu)] = NULL;
/* After this point nothing touches the MSR anymore. */
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
  pkgdev->msr_pkg_therm_low, pkgdev->msr_pkg_therm_high);
@@ -511,11 +511,12 @@ MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
 static int __init pkg_temp_thermal_init(void)
 {
int ret;
+   struct cpuinfo_x86 *c = _data(0);
 
if (!x86_match_cpu(pkg_temp_thermal_ids))
return -ENODEV;
 
-   max_packages = topology_max_packages();
+   max_packages = topology_max_packages() * c->x86_max_dies;
packages = kcalloc(max_packages, sizeof(struct pkg_device *),
   GFP_KERNEL);
if (!packages)
-- 
2.18.0-rc0

[PATCH 04/14] x86 topology: Add CPUID.1F multi-die/package support

2019-02-25 Thread Len Brown

From: Len Brown 

Some new systems have multiple software-visible die within each package.

Update Linux parsing of the Intel CPUID "Extended Topology Leaf"
to handle either CPUID.B, or the new CPUID.1F.

Add cpuinfo_x86.die_id and cpuinfo_x86.max_dies to store the result.

die_id will be non-zero only for multi-die/package systems.

Signed-off-by: Len Brown 
Cc: linux-...@vger.kernel.org
---
 Documentation/x86/topology.txt   |  4 ++
 arch/x86/include/asm/processor.h |  4 +-
 arch/x86/kernel/cpu/topology.c   | 85 +---
 arch/x86/kernel/smpboot.c|  2 +
 4 files changed, 75 insertions(+), 20 deletions(-)

diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt
index 06b3cdbc4048..8107b6cfc9ea 100644
--- a/Documentation/x86/topology.txt
+++ b/Documentation/x86/topology.txt
@@ -46,6 +46,10 @@ The topology of a system is described in the units of:
 
 The number of cores in a package. This information is retrieved via CPUID.
 
+  - cpuinfo_x86.x86_max_dies:
+
+The number of dies in a package. This information is retrieved via CPUID.
+
   - cpuinfo_x86.phys_proc_id:
 
 The physical ID of the package. This information is retrieved via CPUID
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 33051436c864..f2856fe03715 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -105,7 +105,8 @@ struct cpuinfo_x86 {
int x86_power;
unsigned long   loops_per_jiffy;
/* cpuid returned max cores value: */
-   u16  x86_max_cores;
+   u16 x86_max_cores;
+   u16 x86_max_dies;
u16 apicid;
u16 initial_apicid;
u16 x86_clflush_size;
@@ -117,6 +118,7 @@ struct cpuinfo_x86 {
u16 logical_proc_id;
/* Core id: */
u16 cpu_core_id;
+   u16 cpu_die_id;
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 8f6c784141d1..4d17e699657d 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -15,33 +15,63 @@
 /* leaf 0xb SMT level */
 #define SMT_LEVEL  0
 
-/* leaf 0xb sub-leaf types */
+/* extended topology sub-leaf types */
 #define INVALID_TYPE   0
 #define SMT_TYPE   1
 #define CORE_TYPE  2
+#define DIE_TYPE   5
 
 #define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
 #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
 #define LEVEL_MAX_SIBLINGS(ebx)((ebx) & 0x)
 
-int detect_extended_topology_early(struct cpuinfo_x86 *c)
-{
 #ifdef CONFIG_SMP
+/*
+ * Check if given CPUID extended toplogy "leaf" is implemented
+ */
+static int check_extended_topology_leaf(int leaf)
+{
unsigned int eax, ebx, ecx, edx;
 
-   if (c->cpuid_level < 0xb)
+   cpuid_count(leaf, SMT_LEVEL, , , , );
+
+   if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
return -1;
 
-   cpuid_count(0xb, SMT_LEVEL, , , , );
+   return 0;
+}
+/*
+ * Return best CPUID Extended Toplogy Leaf supported
+ */
+static int detect_extended_topology_leaf(struct cpuinfo_x86 *c)
+{
+   if (c->cpuid_level >= 0x1f) {
+   if (check_extended_topology_leaf(0x1f) == 0)
+   return 0x1f;
+   }
 
-   /*
-* check if the cpuid leaf 0xb is actually implemented.
-*/
-   if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
+   if (c->cpuid_level >= 0xb) {
+   if (check_extended_topology_leaf(0xb) == 0)
+   return 0xb;
+   }
+
+   return -1;
+}
+#endif
+
+int detect_extended_topology_early(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+   unsigned int eax, ebx, ecx, edx;
+   int leaf;
+
+   leaf = detect_extended_topology_leaf(c);
+   if (leaf < 0)
return -1;
 
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
 
+   cpuid_count(leaf, SMT_LEVEL, , , , );
/*
 * initial apic id, which also represents 32-bit extended x2apic id.
 */
@@ -52,7 +82,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
 }
 
 /*
- * Check for extended topology enumeration cpuid leaf 0xb and if it
+ * Check for extended topology enumeration cpuid leaf, and if it
  * exists, use it for populating initial_apicid and cpu topology
  * detection.
  */
@@ -60,22 +90,28 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
-   unsigned int ht_mask_width, core_plus_mask_width;
+   unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width;
unsigned int core_select_mask,

[PATCH 13/14] topology: Create core_threads sysfs attribute

2019-02-25 Thread Len Brown

From: Len Brown 

Create CPU topology sysfs attributes:
"core_threads" and "core_threads_list"

These attributes represent all of the logical CPU threads that share the
same core.

These attriutes is synonymous with the existing "thread_siblings" and
"thread_siblings_list" attribute, which will be deprecated.

Signed-off-by: Len Brown 
Suggested-by: Brice Goglin 
---
 Documentation/cputopology.txt | 8 
 drivers/base/topology.c   | 6 ++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index 2794dbe8e559..e67915a8a512 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -36,15 +36,15 @@ drawer_id:
identifier (rather than the kernel's).  The actual value is
architecture and platform dependent.
 
-thread_siblings:
+core_threads:
 
internal kernel map of cpuX's hardware threads within the same
-   core as cpuX.
+   core as cpuX. (deprecated name: "thread_siblings")
 
-thread_siblings_list:
+core_threads_list:
 
human-readable list of cpuX's hardware threads within the same
-   core as cpuX.
+   core as cpuX. (deprecated name: "thread_siblings_list");
 
 package_threads:
 
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 5f4405a08c6e..73efadf5e6d4 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -53,6 +53,10 @@ define_siblings_show_func(thread_siblings, sibling_cpumask);
 static DEVICE_ATTR_RO(thread_siblings);
 static DEVICE_ATTR_RO(thread_siblings_list);
 
+define_siblings_show_func(core_threads, sibling_cpumask);
+static DEVICE_ATTR_RO(core_threads);
+static DEVICE_ATTR_RO(core_threads_list);
+
 define_siblings_show_func(core_siblings, core_cpumask);
 static DEVICE_ATTR_RO(core_siblings);
 static DEVICE_ATTR_RO(core_siblings_list);
@@ -83,6 +87,8 @@ static struct attribute *default_attrs[] = {
_attr_core_id.attr,
_attr_thread_siblings.attr,
_attr_thread_siblings_list.attr,
+   _attr_core_threads.attr,
+   _attr_core_threads_list.attr,
_attr_core_siblings.attr,
_attr_core_siblings_list.attr,
_attr_package_threads.attr,
-- 
2.18.0-rc0

[PATCH 05/14] cpu topology: Export die_id

2019-02-25 Thread Len Brown

From: Len Brown 

Export die_id in cpu topology, for the benefit of hardware that
has multiple-die/package.

Signed-off-by: Len Brown 
Cc: linux-...@vger.kernel.org
---
 Documentation/cputopology.txt   | 6 ++
 arch/x86/include/asm/topology.h | 1 +
 drivers/base/topology.c | 4 
 include/linux/topology.h| 3 +++
 4 files changed, 14 insertions(+)

diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index cb61277e2308..4e6be7f68fd8 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -12,6 +12,12 @@ physical_package_id:
socket number, but the actual value is architecture and platform
dependent.
 
+die_id:
+
+   the CPU die ID of cpuX. Typically it is the hardware platform's
+   identifier (rather than the kernel's).  The actual value is
+   architecture and platform dependent.
+
 core_id:
 
the CPU core ID of cpuX. Typically it is the hardware platform's
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 453cf38a1c33..281be6bbc80d 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -106,6 +106,7 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
 
 #define topology_logical_package_id(cpu)   (cpu_data(cpu).logical_proc_id)
 #define topology_physical_package_id(cpu)  (cpu_data(cpu).phys_proc_id)
+#define topology_die_id(cpu)   (cpu_data(cpu).cpu_die_id)
 #define topology_core_id(cpu)  (cpu_data(cpu).cpu_core_id)
 
 #ifdef CONFIG_SMP
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 5fd9f167ecc1..50352cf96f85 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -43,6 +43,9 @@ static ssize_t name##_list_show(struct device *dev,   
\
 define_id_show_func(physical_package_id);
 static DEVICE_ATTR_RO(physical_package_id);
 
+define_id_show_func(die_id);
+static DEVICE_ATTR_RO(die_id);
+
 define_id_show_func(core_id);
 static DEVICE_ATTR_RO(core_id);
 
@@ -72,6 +75,7 @@ static DEVICE_ATTR_RO(drawer_siblings_list);
 
 static struct attribute *default_attrs[] = {
_attr_physical_package_id.attr,
+   _attr_die_id.attr,
_attr_core_id.attr,
_attr_thread_siblings.attr,
_attr_thread_siblings_list.attr,
diff --git a/include/linux/topology.h b/include/linux/topology.h
index cb0775e1ee4b..5cc8595dd0e4 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -184,6 +184,9 @@ static inline int cpu_to_mem(int cpu)
 #ifndef topology_physical_package_id
 #define topology_physical_package_id(cpu)  ((void)(cpu), -1)
 #endif
+#ifndef topology_die_id
+#define topology_die_id(cpu)   ((void)(cpu), -1)
+#endif
 #ifndef topology_core_id
 #define topology_core_id(cpu)  ((void)(cpu), 0)
 #endif
-- 
2.18.0-rc0

[PATCH 14/14] topology: Create die_threads sysfs attribute

2019-02-25 Thread Len Brown

From: Len Brown 

The die_threads show all the logical CPUs that share the same die_id.

Signed-off-by: Len Brown 
Suggested-by: Brice Goglin 
---
 Documentation/cputopology.txt   | 12 
 arch/x86/include/asm/smp.h  |  1 +
 arch/x86/include/asm/topology.h |  1 +
 arch/x86/kernel/smpboot.c   | 22 ++
 arch/x86/xen/smp_pv.c   |  1 +
 drivers/base/topology.c |  6 ++
 include/linux/topology.h|  3 +++
 7 files changed, 46 insertions(+)

diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index e67915a8a512..6c25ce682c90 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -56,6 +56,16 @@ package_threads_list:
human-readable list of cpuX's hardware threads within the same
physical_package_id. (deprecated name: "core_siblings_list")
 
+die_threads:
+
+   internal kernel map of cpuX's hardware threads within the same
+   die_id.
+
+die_threads_list:
+
+   human-readable list of cpuX's hardware threads within the same
+   die_id.
+
 book_siblings:
 
internal kernel map of cpuX's hardware threads within the same
@@ -92,6 +102,7 @@ these macros in include/asm-XXX/topology.h::
#define topology_drawer_id(cpu)
#define topology_sibling_cpumask(cpu)
#define topology_core_cpumask(cpu)
+   #define topology_die_cpumask(cpu)
#define topology_book_cpumask(cpu)
#define topology_drawer_cpumask(cpu)
 
@@ -108,6 +119,7 @@ not defined by include/asm-XXX/topology.h:
 2) topology_core_id: 0
 3) topology_sibling_cpumask: just the given CPU
 4) topology_core_cpumask: just the given CPU
+5) topology_die_cpumask: just the given CPU
 
 For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
 default definitions for topology_book_id() and topology_book_cpumask().
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 2e95b6c1bca3..39266d193597 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -23,6 +23,7 @@ extern unsigned int num_processors;
 
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
 /* cpus sharing the last level cache: */
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
 DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 88578f10ae22..c573b0a26e16 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -111,6 +111,7 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
 #define topology_core_id(cpu)  (cpu_data(cpu).cpu_core_id)
 
 #ifdef CONFIG_SMP
+#define topology_die_cpumask(cpu)  (per_cpu(cpu_die_map, cpu))
 #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
 #define topology_sibling_cpumask(cpu)  (per_cpu(cpu_sibling_map, cpu))
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e332d5e59652..d30fd42a3285 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -90,6 +90,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
+/* representing HT, core, and die siblings of each logical CPU */
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
+EXPORT_PER_CPU_SYMBOL(cpu_die_map);
+
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
 
 /* Per CPU bogomips and other parameters */
@@ -511,6 +515,15 @@ static bool match_pkg(struct cpuinfo_x86 *c, struct 
cpuinfo_x86 *o)
return false;
 }
 
+static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+   if ((c->phys_proc_id == o->phys_proc_id) &&
+   (c->cpu_die_id == o->cpu_die_id))
+   return true;
+   return false;
+}
+
+
 #if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
 static inline int x86_sched_itmt_flags(void)
 {
@@ -573,6 +586,7 @@ void set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
+   cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
c->booted_cores = 1;
return;
}
@@ -621,6 +635,9 @@ void set_cpu_sibling_map(int cpu)
}
if (match_pkg(c, o) && !topology_same_node(c, o))
x86_has_numa_in_package = true;
+
+   if ((i == cpu) || (has_mp && match_die(c, o)))
+   link_mask(topology_die_cpumask, cpu, i);
}
 
threads = cpumask_weight(topology_sibling_cpumask(cpu));
@@ -1216,6 +1233,7 @@ static __init void disable_smp(void)

[PATCH 03/14] x86 smpboot: Rename match_die() to match_pkg()

2019-02-25 Thread Len Brown

From: Len Brown 

Syntax only, no functional or semantic change.

This routine matches packages, not die, so name it thus.

Signed-off-by: Len Brown 
---
 arch/x86/kernel/smpboot.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ccd1f2a8e557..19a963890bbe 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -459,7 +459,7 @@ static bool match_llc(struct cpuinfo_x86 *c, struct 
cpuinfo_x86 *o)
  * multicore group inside a NUMA node.  If this happens, we will
  * discard the MC level of the topology later.
  */
-static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
 {
if (c->phys_proc_id == o->phys_proc_id)
return true;
@@ -550,7 +550,7 @@ void set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
o = _data(i);
 
-   if ((i == cpu) || (has_mp && match_die(c, o))) {
+   if ((i == cpu) || (has_mp && match_pkg(c, o))) {
link_mask(topology_core_cpumask, cpu, i);
 
/*
@@ -574,7 +574,7 @@ void set_cpu_sibling_map(int cpu)
} else if (i != cpu && !c->booted_cores)
c->booted_cores = cpu_data(i).booted_cores;
}
-   if (match_die(c, o) && !topology_same_node(c, o))
+   if (match_pkg(c, o) && !topology_same_node(c, o))
x86_has_numa_in_package = true;
}
 
-- 
2.18.0-rc0

[PATCH 07/14] powercap/intel_rapl: Simplify rapl_find_package()

2019-02-25 Thread Len Brown

From: Zhang Rui 

Syntax only, no functional or semantic change.

Simplify how the code to discover a package is called.
Rename find_package_by_id() to rapl_find_package()

Signed-off-by: Zhang Rui 
Signed-off-by: Len Brown 
Acked-by: Rafael J. Wysocki 
Cc: linux...@vger.kernel.org
---
 drivers/powercap/intel_rapl.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 6cdb2c14eee4..6057d9695fed 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -264,8 +264,9 @@ static struct powercap_control_type *control_type; /* 
PowerCap Controller */
 static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */
 
 /* caller to ensure CPU hotplug lock is held */
-static struct rapl_package *find_package_by_id(int id)
+static struct rapl_package *rapl_find_package(int cpu)
 {
+   int id = topology_physical_package_id(cpu);
struct rapl_package *rp;
 
list_for_each_entry(rp, _packages, plist) {
@@ -1298,7 +1299,7 @@ static int __init rapl_register_psys(void)
rd->rpl[0].name = pl1_name;
rd->rpl[1].prim_id = PL2_ENABLE;
rd->rpl[1].name = pl2_name;
-   rd->rp = find_package_by_id(0);
+   rd->rp = rapl_find_package(0);
 
power_zone = powercap_register_zone(>power_zone, control_type,
"psys", NULL,
@@ -1454,8 +1455,9 @@ static void rapl_remove_package(struct rapl_package *rp)
 }
 
 /* called from CPU hotplug notifier, hotplug lock held */
-static struct rapl_package *rapl_add_package(int cpu, int pkgid)
+static struct rapl_package *rapl_add_package(int cpu)
 {
+   int id = topology_physical_package_id(cpu);
struct rapl_package *rp;
int ret;
 
@@ -1464,7 +1466,7 @@ static struct rapl_package *rapl_add_package(int cpu, int 
pkgid)
return ERR_PTR(-ENOMEM);
 
/* add the new package to the list */
-   rp->id = pkgid;
+   rp->id = id;
rp->lead_cpu = cpu;
 
/* check if the package contains valid domains */
@@ -1495,12 +1497,11 @@ static struct rapl_package *rapl_add_package(int cpu, 
int pkgid)
  */
 static int rapl_cpu_online(unsigned int cpu)
 {
-   int pkgid = topology_physical_package_id(cpu);
struct rapl_package *rp;
 
-   rp = find_package_by_id(pkgid);
+   rp = rapl_find_package(cpu);
if (!rp) {
-   rp = rapl_add_package(cpu, pkgid);
+   rp = rapl_add_package(cpu);
if (IS_ERR(rp))
return PTR_ERR(rp);
}
@@ -1510,11 +1511,10 @@ static int rapl_cpu_online(unsigned int cpu)
 
 static int rapl_cpu_down_prep(unsigned int cpu)
 {
-   int pkgid = topology_physical_package_id(cpu);
struct rapl_package *rp;
int lead_cpu;
 
-   rp = find_package_by_id(pkgid);
+   rp = rapl_find_package(cpu);
if (!rp)
return 0;
 
-- 
2.18.0-rc0

Glückwunsch zum Gewinn

2019-02-25 Thread director . ict

Schönen Tag,
Mein Name ist Mavis Wanczyk, Gewinner des Powerball-Jackpots im August 2017 in 
Höhe von 758,7 Millionen US-Dollar. Ich und meine Familie haben beschlossen, 
jeweils fünf Millionen US-Dollar an einige glückliche Personen auf der ganzen 
Welt auszugeben. Kontaktieren Sie mich über
E-Mail: maviswanczyk12...@gmail.com für Info / Anspruch.

Weiterlesen:
http://Money.cnn.com/2017/08/23/News/Powerball-700-Million-Jackpot/Index.html

/

Good Day,
My name is Mavis Wanczyk, winner of the power ball jackpot $758.7 million in 
August 2017. I and my family have decided to give out $5 million each to a few 
lucky persons all over the world. Contact me via
Email: maviswanczyk12...@gmail.com for info/claim.

Continue reading:
http://Money.cnn.com/2017/08/23/News/Powerball-700-Million-Jackpot/Index.html

Re: [PATCH 1/2] dt-bindings: input: sitronix-st1232: document optional reset-gpios property

2019-02-25 Thread Martin Kepplinger

On 25.02.19 15:43, Rob Herring wrote:
> On Tue, Jan 29, 2019 at 11:23:46AM +0100, Martin Kepplinger wrote:
>> From: Martin Kepplinger 
>>
>> The st1232 driver reads this via gpiod.
> 
> What a driver does is not relevant to the binding. This breaks 
> compatibility so you need to mention that and why this is okay.
> 
> Either you need to keep 'gpios' as deprecated or you can drop it if 
> there aren't any dts files using it.
> 

Hi Rob,

The patch is outdated. Dmity took the driver-changes without breaking
the current DT bindings.

   martin


smime.p7s
Description: S/MIME cryptographic signature

Re: [PATCH v6 0/4] input: touchscreen: Add goodix GT5553 CTP support

2019-02-25 Thread Jagan Teki

Hi Dmitry,

On Tue, Feb 19, 2019 at 3:46 PM Jagan Teki  wrote:
>
> This is v6 patchset for supporting goodix GT5553 CTP. Here is the
> previous version[1]
>
> Changes for v5:
> - document bindings for required regulators, which are need during
>   power-on sequence
> - enable, disable required regulators as described in power-on sequence
>   using normal regulator calls
> - update the proper commi messages
> Changes for v4:
> - document AVDD22, DVDD12, VDDIO as optional properties
> - use regulator bulk calls, for get, enable and disable functionalities
> Changes for v4:
> - devm_add_action_or_reset for disabling regulator
> Changes for v3:
> - add cover-letter
> - s/ADVV28/AVDD28 on commit head
> - fix few typo
> Changes for v2:
> - Rename vcc-supply with AVDD28-supply
> - disable regulator in remove
> - fix to setup regulator in probe code
> - add chipdata
> - drop example node in dt-bindings
>
> [1] https://patchwork.kernel.org/cover/10816901/
>
> Jagan Teki (4):
>   dt-bindings: input: touchscreen: goodix: Document regulator properties
>   Input: goodix - Add regulators suppot
>   dt-bindings: input: touchscreen: goodix: Add GT5663 compatible
>   Input: goodix - Add GT5663 CTP support
>
>  .../bindings/input/touchscreen/goodix.txt |  3 +
>  drivers/input/touchscreen/goodix.c| 60 +++
>  2 files changed, 63 insertions(+)

Let me know if you have any further comments on regulator patch, fyi
Rob reviewed it already.

Re: [PATCH] dt-bindings: Add vendor prefix for feiyang

2019-02-25 Thread Jagan Teki

Hi Rob,

On Tue, Feb 12, 2019 at 5:22 PM Jagan Teki  wrote:
>
> Add vendor prefix for feiyang, known as
> Shenzhen Fly Young Technology Co.,LTD. a known producer for LCD modules.
>
> Signed-off-by: Jagan Teki 
> ---
> Note: notation about using 'feiyang' is based on the datasheet
> http://files.pine64.org/doc/datasheet/pine64/FY07024DI26A30-D_feiyang_LCD_panel.pdf
>
>  Documentation/devicetree/bindings/vendor-prefixes.txt | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt 
> b/Documentation/devicetree/bindings/vendor-prefixes.txt
> index a5ad57eaee20..f42e12eb9d64 100644
> --- a/Documentation/devicetree/bindings/vendor-prefixes.txt
> +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
> @@ -140,6 +140,7 @@ fairphone   Fairphone B.V.
>  faradayFaraday Technology Corporation
>  fastraxFastrax Oy
>  fcsFairchild Semiconductor
> +feiyangShenzhen Fly Young Technology Co.,LTD.
>  fireflyFirefly
>  focaltech  FocalTech Systems Co.,Ltd
>  friendlyarmGuangzhou FriendlyARM Computer Tech Co., Ltd
> --
> 2.18.0.321.gffc6fa0e3
>

Any comments?

[PATCH v3 6/9] perf util: introduce Zstd based streaming compression API

2019-02-25 Thread Alexey Budankov



Implemented functions are based on Zstd streaming compression
API. The functions are used in runtime to compress data that
come from mmaped kernel buffer data and then stored into a trace.

Signed-off-by: Alexey Budankov 
---
 tools/perf/util/Build  |  2 +
 tools/perf/util/compress.h | 18 
 tools/perf/util/zstd.c | 95 ++
 3 files changed, 115 insertions(+)
 create mode 100644 tools/perf/util/zstd.c

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8dd3102301ea..920ee8bebd83 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -145,6 +145,8 @@ perf-y += scripting-engines/
 
 perf-$(CONFIG_ZLIB) += zlib.o
 perf-$(CONFIG_LZMA) += lzma.o
+perf-y += zstd.o
+
 perf-y += demangle-java.o
 perf-y += demangle-rust.o
 
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index 892e92e7e7fc..e0987616db94 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -2,6 +2,11 @@
 #ifndef PERF_COMPRESS_H
 #define PERF_COMPRESS_H
 
+#include 
+#ifdef HAVE_ZSTD_SUPPORT
+#include 
+#endif
+
 #ifdef HAVE_ZLIB_SUPPORT
 int gzip_decompress_to_file(const char *input, int output_fd);
 bool gzip_is_compressed(const char *input);
@@ -12,4 +17,17 @@ int lzma_decompress_to_file(const char *input, int 
output_fd);
 bool lzma_is_compressed(const char *input);
 #endif
 
+struct zstd_data {
+#ifdef HAVE_ZSTD_SUPPORT
+   ZSTD_CStream*cstream;
+#endif
+};
+
+int zstd_init(struct zstd_data *data, int level);
+int zstd_fini(struct zstd_data *data);
+
+size_t zstd_compress_stream_to_records(struct zstd_data *data,
+   void *dst, size_t dst_size, void *src, size_t src_size, size_t 
max_record_size,
+   size_t process_header(void *record, size_t increment));
+
 #endif /* PERF_COMPRESS_H */
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
new file mode 100644
index ..686c3a347dcc
--- /dev/null
+++ b/tools/perf/util/zstd.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+
+#include "util/compress.h"
+#include "util/debug.h"
+
+#ifdef HAVE_ZSTD_SUPPORT
+
+int zstd_init(struct zstd_data *data, int level)
+{
+   size_t ret;
+
+   data->cstream = ZSTD_createCStream();
+   if (data->cstream == NULL) {
+   pr_err("Couldn't create compression stream.\n");
+   return -1;
+   }
+
+   ret = ZSTD_initCStream(data->cstream, level);
+   if (ZSTD_isError(ret)) {
+   pr_err("Failed to initialize compression stream: %s\n", 
ZSTD_getErrorName(ret));
+   return -1;
+   }
+
+   return 0;
+}
+
+int zstd_fini(struct zstd_data *data)
+{
+   if (data->cstream) {
+   ZSTD_freeCStream(data->cstream);
+   data->cstream = NULL;
+   }
+
+   return 0;
+}
+
+size_t zstd_compress_stream_to_records(struct zstd_data *data,
+   void *dst, size_t dst_size, void *src, size_t src_size, size_t 
max_record_size,
+   size_t process_header(void *record, size_t increment))
+{
+   size_t ret, size, compressed = 0;
+   ZSTD_inBuffer input = { src, src_size, 0 };
+   ZSTD_outBuffer output;
+   void *record;
+
+   while (input.pos < input.size) {
+   record = dst;
+   size = process_header(record, 0);
+   compressed += size;
+   dst += size;
+   dst_size -= size;
+   output = (ZSTD_outBuffer){ dst, (dst_size > max_record_size) ?
+   max_record_size : dst_size, 0 };
+   ret = ZSTD_compressStream(data->cstream, , );
+   ZSTD_flushStream(data->cstream, );
+   if (ZSTD_isError(ret)) {
+   pr_err("failed to compress %ld bytes: %s\n",
+   (long)src_size, ZSTD_getErrorName(ret));
+   memcpy(dst, src, src_size);
+   return src_size;
+   }
+   size = output.pos;
+   size = process_header(record, size);
+   compressed += size;
+   dst += size;
+   dst_size -= size;
+   }
+
+   return compressed;
+}
+
+#else /* !HAVE_ZSTD_SUPPORT */
+
+int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused)
+{
+   return 0;
+}
+
+int zstd_fini(struct zstd_data *data __maybe_unused)
+{
+   return 0;
+}
+
+size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused,
+   void *dst, size_t dst_size __maybe_unused,
+   void *src, size_t src_size, size_t max_record_size 
__maybe_unused,
+   size_t process_header(void *record, size_t increment) 
__maybe_unused)
+{
+   memcpy(dst, src, src_size);
+   return 0;
+}
+
+#endif

RE: [PATCH v3 2/2] drivers: mux: Add Generic regmap bitfield-based multiplexer in mmio-mux

2019-02-25 Thread Pankaj Bansal

Hi Peter,

> -Original Message-
> From: Peter Rosin [mailto:p...@axentia.se]
> Sent: Monday, 25 February, 2019 08:14 PM
> To: Pankaj Bansal ; Leo Li 
> Cc: linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v3 2/2] drivers: mux: Add Generic regmap bitfield-based
> multiplexer in mmio-mux
> 
> On 2019-02-24 09:27, Pankaj Bansal wrote:
> > Generic register bitfield-based multiplexer that controls the
> > multiplexer producer defined under a parent node.
> > The driver corresponding to parent node provides register read/write
> > capabilities.
> >
> > Signed-off-by: Pankaj Bansal 
> > ---
> >
> > Notes:
> > V3:
> > - Added the patch in series with device tree binding patch
> > - Added the NULL return handling for regmap
> > V2:
> > - removed seperate driver regmap.c and added the regmap function in
> mmio.c
> >   based on compatible field, the syscon or regmap function would be 
> > called
> > - Modified the KConfig as per Peter's comments
> >
> >  drivers/mux/Kconfig | 12 ++--  drivers/mux/mmio.c  | 10
> > +++---
> >  2 files changed, 13 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/mux/Kconfig b/drivers/mux/Kconfig index
> > 7659d6c5f718..e5c571fd232c 100644
> > --- a/drivers/mux/Kconfig
> > +++ b/drivers/mux/Kconfig
> > @@ -46,14 +46,14 @@ config MUX_GPIO
> >   be called mux-gpio.
> >
> >  config MUX_MMIO
> > -   tristate "MMIO register bitfield-controlled Multiplexer"
> > -   depends on (OF && MFD_SYSCON) || COMPILE_TEST
> > +   tristate "MMIO/Regmap register bitfield-controlled Multiplexer"
> > +   depends on OF || COMPILE_TEST
> > help
> > - MMIO register bitfield-controlled Multiplexer controller.
> > + MMIO/Regmap register bitfield-controlled Multiplexer controller.
> >
> > - The driver builds multiplexer controllers for bitfields in a syscon
> > - register. For N bit wide bitfields, there will be 2^N possible
> > - multiplexer states.
> > + The driver builds multiplexer controllers for bitfields in either
> > + a syscon register or a driver regmap register. For N bit wide
> > + bitfields, there will be 2^N possible multiplexer states.
> >
> >   To compile the driver as a module, choose M here: the module will
> >   be called mux-mmio.
> > diff --git a/drivers/mux/mmio.c b/drivers/mux/mmio.c index
> > 935ac44aa209..cc02155e4644 100644
> > --- a/drivers/mux/mmio.c
> > +++ b/drivers/mux/mmio.c
> > @@ -28,6 +28,7 @@ static const struct mux_control_ops mux_mmio_ops = {
> >
> >  static const struct of_device_id mux_mmio_dt_ids[] = {
> > { .compatible = "mmio-mux", },
> > +   { .compatible = "reg-mux", },
> > { /* sentinel */ }
> >  };
> >  MODULE_DEVICE_TABLE(of, mux_mmio_dt_ids); @@ -43,9 +44,12 @@
> static
> > int mux_mmio_probe(struct platform_device *pdev)
> > int ret;
> > int i;
> >
> > -   regmap = syscon_node_to_regmap(np->parent);
> > -   if (IS_ERR(regmap)) {
> > -   ret = PTR_ERR(regmap);
> > +   if (of_device_is_compatible(np, "mmio-mux"))
> > +   regmap = syscon_node_to_regmap(np->parent);
> > +   else
> > +   regmap = dev_get_regmap(dev->parent, NULL);
> > +   if (IS_ERR_OR_NULL(regmap)) {
> > +   ret = PTR_ERR_OR_ZERO(regmap) ? PTR_ERR(regmap) : -
> ENODEV;
> 
> The above is not correct, this should be better (untested):
> 
>   ret = PTR_ERR(regmap) ?: -ENODEV;

Omitting the second operand in ternary operator is not standard. 
https://stackoverflow.com/questions/34559705/ternary-operator-without-the-middle-expression

Although, it *has been* used in kernel in many places
https://livegrep.com/search/linux?q=file%3A%5C.c%24%20%5C%3F%5C%3A_case=auto=true=true


> 
> Cheers,
> Peter
> 
> > dev_err(dev, "failed to get regmap: %d\n", ret);
> > return ret;
> > }
> >

Re: [PATCH v2 20/26] userfaultfd: wp: support write protection for userfault vma range

2019-02-25 Thread Peter Xu

On Mon, Feb 25, 2019 at 10:52:34PM +0200, Mike Rapoport wrote:
> On Tue, Feb 12, 2019 at 10:56:26AM +0800, Peter Xu wrote:
> > From: Shaohua Li 
> > 
> > Add API to enable/disable writeprotect a vma range. Unlike mprotect,
> > this doesn't split/merge vmas.
> > 
> > Cc: Andrea Arcangeli 
> > Cc: Rik van Riel 
> > Cc: Kirill A. Shutemov 
> > Cc: Mel Gorman 
> > Cc: Hugh Dickins 
> > Cc: Johannes Weiner 
> > Signed-off-by: Shaohua Li 
> > Signed-off-by: Andrea Arcangeli 
> > [peterx:
> >  - use the helper to find VMA;
> >  - return -ENOENT if not found to match mcopy case;
> >  - use the new MM_CP_UFFD_WP* flags for change_protection
> >  - check against mmap_changing for failures]
> > Signed-off-by: Peter Xu 
> > ---
> >  include/linux/userfaultfd_k.h |  3 ++
> >  mm/userfaultfd.c  | 54 +++
> >  2 files changed, 57 insertions(+)
> > 
> > diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
> > index 765ce884cec0..8f6e6ed544fb 100644
> > --- a/include/linux/userfaultfd_k.h
> > +++ b/include/linux/userfaultfd_k.h
> > @@ -39,6 +39,9 @@ extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
> >   unsigned long dst_start,
> >   unsigned long len,
> >   bool *mmap_changing);
> > +extern int mwriteprotect_range(struct mm_struct *dst_mm,
> > +  unsigned long start, unsigned long len,
> > +  bool enable_wp, bool *mmap_changing);
> > 
> >  /* mm helpers */
> >  static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct 
> > *vma,
> > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> > index fefa81c301b7..529d180bb4d7 100644
> > --- a/mm/userfaultfd.c
> > +++ b/mm/userfaultfd.c
> > @@ -639,3 +639,57 @@ ssize_t mfill_zeropage(struct mm_struct *dst_mm, 
> > unsigned long start,
> >  {
> > return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing, 0);
> >  }
> > +
> > +int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
> > +   unsigned long len, bool enable_wp, bool *mmap_changing)
> > +{
> > +   struct vm_area_struct *dst_vma;
> > +   pgprot_t newprot;
> > +   int err;
> > +
> > +   /*
> > +* Sanitize the command parameters:
> > +*/
> > +   BUG_ON(start & ~PAGE_MASK);
> > +   BUG_ON(len & ~PAGE_MASK);
> > +
> > +   /* Does the address range wrap, or is the span zero-sized? */
> > +   BUG_ON(start + len <= start);
> 
> I'd replace these BUG_ON()s with
> 
>   if (WARN_ON())
>return -EINVAL;

I believe BUG_ON() is used because these parameters should have been
checked in userfaultfd_writeprotect() already by the common
validate_range() even before calling mwriteprotect_range().  So I'm
fine with the WARN_ON() approach but I'd slightly prefer to simply
keep the patch as is to keep Jerome's r-b if you won't disagree. :)

Thanks,

-- 
Peter Xu

Re: [PATCH v2] dt-bindings: hwmon: Add missing documentation for lm75

2019-02-25 Thread Jagan Teki

Rob,

On Thu, Feb 21, 2019 at 7:32 PM Guenter Roeck  wrote:
>
> On 2/20/19 9:22 AM, Jagan Teki wrote:
> > On Tue, Feb 12, 2019 at 5:08 PM Jagan Teki  
> > wrote:
> >>
> >> Add missing dt-binding documentation for lm75 hwmon sensor.
> >>
> >> Signed-off-by: Jagan Teki 
> >> ---
> >> Changes for v2:
> >> -  Add all compatible nodes available in lm75.
> >>
> >>   .../devicetree/bindings/hwmon/lm75.txt| 37 +++
> >>   1 file changed, 37 insertions(+)
> >>   create mode 100644 Documentation/devicetree/bindings/hwmon/lm75.txt
> >
> > Any comments on this? It is blocking some dts changes to get merge
> > into Mainline.
> >
>
> Waiting for Rob's Ack.

Any comments?

[PATCH v3 5/9] perf mmap: implement dedicated memory buffer for data compression

2019-02-25 Thread Alexey Budankov



Implemented mmap data buffer that is used as the memory to operate
on when compressing sampling data in case of serial trace streaming.

In case of AIO trace streaming AIO buffers are used to implement
sampling data compression.

Signed-off-by: Alexey Budankov 
---
 tools/perf/builtin-record.c |  6 +-
 tools/perf/util/evlist.c|  8 +---
 tools/perf/util/evlist.h|  2 +-
 tools/perf/util/mmap.c  | 25 +
 tools/perf/util/mmap.h  |  4 +++-
 5 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 61017fa0ee1c..71c67a87c713 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -572,7 +572,7 @@ static int record__mmap_evlist(struct record *rec,
 opts->auxtrace_mmap_pages,
 opts->auxtrace_snapshot_mode,
 opts->nr_cblocks, opts->affinity,
-opts->mmap_flush) < 0) {
+opts->mmap_flush, opts->comp_level) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
   "Consider increasing "
@@ -2242,6 +2242,10 @@ int cmd_record(int argc, const char **argv)
pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
 
+   if (rec->opts.comp_level > 22)
+   rec->opts.comp_level = 0;
+   pr_debug("comp level: %d\n", rec->opts.comp_level);
+
err = __cmd_record(, argc, argv);
 out:
perf_evlist__delete(rec->evlist);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 937039faac59..a13458b43dc1 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1022,7 +1022,8 @@ int perf_evlist__parse_mmap_pages(const struct option 
*opt, const char *str,
  */
 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 unsigned int auxtrace_pages,
-bool auxtrace_overwrite, int nr_cblocks, int affinity, 
int flush)
+bool auxtrace_overwrite, int nr_cblocks, int affinity, 
int flush,
+int comp_level)
 {
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
@@ -1032,7 +1033,8 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, 
unsigned int pages,
 * Its value is decided by evsel's write_backward.
 * So  should not be passed through const pointer.
 */
-   struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = 
affinity, .flush = flush };
+   struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = 
affinity, .flush = flush,
+ .comp_level = comp_level };
 
if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1064,7 +1066,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, 
unsigned int pages,
 
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
 {
-   return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, 
PERF_AFFINITY_SYS, 1);
+   return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, 
PERF_AFFINITY_SYS, 1, 0);
 }
 
 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index edf18811e39f..77c11dac4a63 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -166,7 +166,7 @@ unsigned long perf_event_mlock_kb_in_pages(void);
 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 unsigned int auxtrace_pages,
 bool auxtrace_overwrite, int nr_cblocks,
-int affinity, int flush);
+int affinity, int flush, int comp_level);
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
 void perf_evlist__munmap(struct perf_evlist *evlist);
 
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index ef3d79b2c90b..08fd846df604 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -159,6 +159,10 @@ void __weak auxtrace_mmap_params__set_idx(struct 
auxtrace_mmap_params *mp __mayb
 #ifdef HAVE_AIO_SUPPORT
 
 #ifdef HAVE_LIBNUMA_SUPPORT
+static int perf_mmap__aio_enabled(struct perf_mmap *map)
+{
+   return map->aio.nr_cblocks > 0;
+}
 static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
 {
map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), 
PROT_READ|PROT_WRITE,
@@ -199,6 +203,10 @@ static int perf_mmap__aio_bind(struct perf_mmap *map, int 
idx, int cpu, int affi
return 0;
 }
 #else
+static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused)
+{
+   return 0;
+}
 static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
 {

[PATCH 2/2] arm64: allwinner: a64: Add Oceanic A64-5inMFD initial support

2019-02-25 Thread Jagan Teki

Oceanic A64-5inMFD is a 5 inch Multi function display baseboard
designed to mount SoPine SOM.

Key features:
- Allwinner A64 Cortex-A53
- Mali-400MP2 GPU
- AXP803 PMIC
- 2GB DDR3 RAM
- SD Slot
- SPI-NOR flash
- EMAC, RTL8211E
- MCP2515 CAN
- MIPI-DSI
- Goodix 911 CTP
- USB Host
- 12V DC power supply

Signed-off-by: Jagan Teki 
---
 arch/arm64/boot/dts/allwinner/Makefile|  1 +
 .../allwinner/sun50i-a64-oceanic-5inmfd.dts   | 46 +++
 2 files changed, 47 insertions(+)
 create mode 100644 arch/arm64/boot/dts/allwinner/sun50i-a64-oceanic-5inmfd.dts

diff --git a/arch/arm64/boot/dts/allwinner/Makefile 
b/arch/arm64/boot/dts/allwinner/Makefile
index a5fb1eaa8acf..ec39fe856117 100644
--- a/arch/arm64/boot/dts/allwinner/Makefile
+++ b/arch/arm64/boot/dts/allwinner/Makefile
@@ -2,6 +2,7 @@
 dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-amarula-relic.dtb
 dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-bananapi-m64.dtb 
sun50i-a64-bananapi-m64-icn6211.dtb
 dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-nanopi-a64.dtb
+dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-oceanic-5inmfd.dtb
 dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-olinuxino.dtb
 dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-orangepi-win.dtb
 dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-pine64-lts.dtb
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-oceanic-5inmfd.dts 
b/arch/arm64/boot/dts/allwinner/sun50i-a64-oceanic-5inmfd.dts
new file mode 100644
index ..d73d1f55acb9
--- /dev/null
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-oceanic-5inmfd.dts
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Oceanic Systems (UK) Ltd.
+ * Copyright (C) 2019 Amarula Solutions B.V.
+ * Author: Jagan Teki 
+ */
+
+/dts-v1/;
+
+#include "sun50i-a64-sopine.dtsi"
+
+/ {
+   model = "Oceanic A64-5inMFD";
+   compatible = "oceanic,a64-5inmfd", "allwinner,sun50i-a64";
+
+   aliases {
+   serial0 = 
+   };
+
+   chosen {
+   stdout-path = "serial0:115200n8";
+   };
+};
+
+ {
+   status = "okay";
+};
+
+ {
+   status = "okay";
+};
+
+ {
+   pinctrl-names = "default";
+   pinctrl-0 = <_pb_pins>;
+   status = "okay";
+};
+
+_otg {
+   dr_mode = "host";
+   status = "okay";
+};
+
+ {
+   status = "okay";
+};
-- 
2.18.0.321.gffc6fa0e3

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1288 matches

Mail list logo