date:20190616

[PATCH] scsi: tcmu: Simplify 'tcmu_update_uio_info()'

2019-06-16 Thread Christophe JAILLET

Use 'kasprintf()' instead of:
   - snprintf(NULL, 0...
   - kmalloc(...
   - snprintf(...

This is less verbose and saves 7 bytes (i.e. the space for '/(null)') if
'udev->dev_config' is NULL.

Signed-off-by: Christophe JAILLET 
---
 drivers/target/target_core_user.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/target/target_core_user.c 
b/drivers/target/target_core_user.c
index b43d6385a1a0..04eda111920e 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1824,20 +1824,18 @@ static int tcmu_update_uio_info(struct tcmu_dev *udev)
 {
struct tcmu_hba *hba = udev->hba->hba_ptr;
struct uio_info *info;
-   size_t size, used;
char *str;
 
info = &udev->uio_info;
-   size = snprintf(NULL, 0, "tcm-user/%u/%s/%s", hba->host_id, udev->name,
-   udev->dev_config);
-   size += 1; /* for \0 */
-   str = kmalloc(size, GFP_KERNEL);
-   if (!str)
-   return -ENOMEM;
 
-   used = snprintf(str, size, "tcm-user/%u/%s", hba->host_id, udev->name);
if (udev->dev_config[0])
-   snprintf(str + used, size - used, "/%s", udev->dev_config);
+   str = kasprintf(GFP_KERNEL, "tcm-user/%u/%s/%s", hba->host_id,
+   udev->name, udev->dev_config);
+   else
+   str = kasprintf(GFP_KERNEL, "tcm-user/%u/%s", hba->host_id,
+   udev->name);
+   if (!str)
+   return -ENOMEM;
 
/* If the old string exists, free it */
kfree(info->name);
-- 
2.20.1

Re: [PATCH v3 1/2] staging: erofs: add requirements field in superblock

2019-06-16 Thread Gao Xiang

Hi Greg,

Sorry for annoying... Could you help merge these two fixes? Thanks in advance...
decompression inplace optimization needs these two patches and I will integrate
erofs decompression inplace optimization later for linux-next 5.3, and try to 
start 
making effort on moving to fs/ directory on kernel 5.4...

Thanks,
Gao Xiang

On 2019/6/13 16:35, Gao Xiang wrote:
> There are some backward incompatible features pending
> for months, mainly due to on-disk format expensions.
> 
> However, we should ensure that it cannot be mounted with
> old kernels. Otherwise, it will causes unexpected behaviors.
> 
> Fixes: ba2b77a82022 ("staging: erofs: add super block operations")
> Cc:  # 4.19+
> Reviewed-by: Chao Yu 
> Signed-off-by: Gao Xiang 
> ---
> change log v3:
>  - record requirements in erofs_sb_info for runtime use as well;
> 
> change log v2:
>  - update printed message
> 
>  drivers/staging/erofs/erofs_fs.h | 13 ++---
>  drivers/staging/erofs/internal.h |  2 ++
>  drivers/staging/erofs/super.c| 19 +++
>  3 files changed, 31 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/staging/erofs/erofs_fs.h 
> b/drivers/staging/erofs/erofs_fs.h
> index fa52898df006..8ddb2b3e7d39 100644
> --- a/drivers/staging/erofs/erofs_fs.h
> +++ b/drivers/staging/erofs/erofs_fs.h
> @@ -17,10 +17,16 @@
>  #define EROFS_SUPER_MAGIC_V10xE0F5E1E2
>  #define EROFS_SUPER_OFFSET  1024
>  
> +/*
> + * Any bits that aren't in EROFS_ALL_REQUIREMENTS should be
> + * incompatible with this kernel version.
> + */
> +#define EROFS_ALL_REQUIREMENTS  0
> +
>  struct erofs_super_block {
>  /*  0 */__le32 magic;   /* in the little endian */
>  /*  4 */__le32 checksum;/* crc32c(super_block) */
> -/*  8 */__le32 features;
> +/*  8 */__le32 features;/* (aka. feature_compat) */
>  /* 12 */__u8 blkszbits; /* support block_size == PAGE_SIZE only */
>  /* 13 */__u8 reserved;
>  
> @@ -34,9 +40,10 @@ struct erofs_super_block {
>  /* 44 */__le32 xattr_blkaddr;
>  /* 48 */__u8 uuid[16];  /* 128-bit uuid for volume */
>  /* 64 */__u8 volume_name[16];   /* volume name */
> +/* 80 */__le32 requirements;/* (aka. feature_incompat) */
>  
> -/* 80 */__u8 reserved2[48]; /* 128 bytes */
> -} __packed;
> +/* 84 */__u8 reserved2[44];
> +} __packed; /* 128 bytes */
>  
>  /*
>   * erofs inode data mapping:
> diff --git a/drivers/staging/erofs/internal.h 
> b/drivers/staging/erofs/internal.h
> index 911333cdeef4..fc732c86ecd8 100644
> --- a/drivers/staging/erofs/internal.h
> +++ b/drivers/staging/erofs/internal.h
> @@ -115,6 +115,8 @@ struct erofs_sb_info {
>  
>   u8 uuid[16];/* 128-bit uuid for volume */
>   u8 volume_name[16]; /* volume name */
> + u32 requirements;
> +
>   char *dev_name;
>  
>   unsigned int mount_opt;
> diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c
> index f580d4ef77a1..cadbcc11702a 100644
> --- a/drivers/staging/erofs/super.c
> +++ b/drivers/staging/erofs/super.c
> @@ -71,6 +71,22 @@ static void free_inode(struct inode *inode)
>   kmem_cache_free(erofs_inode_cachep, vi);
>  }
>  
> +static bool check_layout_compatibility(struct super_block *sb,
> +struct erofs_super_block *layout)
> +{
> + const unsigned int requirements = le32_to_cpu(layout->requirements);
> +
> + EROFS_SB(sb)->requirements = requirements;
> +
> + /* check if current kernel meets all mandatory requirements */
> + if (requirements & (~EROFS_ALL_REQUIREMENTS)) {
> + errln("unidentified requirements %x, please upgrade kernel 
> version",
> +   requirements & ~EROFS_ALL_REQUIREMENTS);
> + return false;
> + }
> + return true;
> +}
> +
>  static int superblock_read(struct super_block *sb)
>  {
>   struct erofs_sb_info *sbi;
> @@ -104,6 +120,9 @@ static int superblock_read(struct super_block *sb)
>   goto out;
>   }
>  
> + if (!check_layout_compatibility(sb, layout))
> + goto out;
> +
>   sbi->blocks = le32_to_cpu(layout->blocks);
>   sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr);
>  #ifdef CONFIG_EROFS_FS_XATTR
>

[PATCH 4/6] net: macb: add support for c45 PHY

2019-06-16 Thread Parshuram Thombare

This patch modify MDIO read/write functions to support
communication with C45 PHY.

Signed-off-by: Parshuram Thombare 
---
 drivers/net/ethernet/cadence/macb.h  | 15 --
 drivers/net/ethernet/cadence/macb_main.c | 61 +++-
 drivers/net/ethernet/cadence/macb_pci.c  | 60 +++
 3 files changed, 91 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h 
b/drivers/net/ethernet/cadence/macb.h
index 85c7e4cb1057..75f093bc52fe 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -667,10 +667,17 @@
 #define GEM_CLK_DIV96  5
 
 /* Constants for MAN register */
-#define MACB_MAN_SOF   1
-#define MACB_MAN_WRITE 1
-#define MACB_MAN_READ  2
-#define MACB_MAN_CODE  2
+#define MACB_MAN_C22_SOF1
+#define MACB_MAN_C22_WRITE  1
+#define MACB_MAN_C22_READ   2
+#define MACB_MAN_C22_CODE   2
+
+#define MACB_MAN_C45_SOF0
+#define MACB_MAN_C45_ADDR   0
+#define MACB_MAN_C45_WRITE  1
+#define MACB_MAN_C45_POST_READ_INCR 2
+#define MACB_MAN_C45_READ   3
+#define MACB_MAN_C45_CODE   2
 
 /* Capability mask bits */
 #define MACB_CAPS_ISR_CLEAR_ON_WRITE   BIT(0)
diff --git a/drivers/net/ethernet/cadence/macb_main.c 
b/drivers/net/ethernet/cadence/macb_main.c
index 5b3e7d9f4384..57ffc4e9d2b9 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -334,11 +334,30 @@ static int macb_mdio_read(struct mii_bus *bus, int 
mii_id, int regnum)
if (status < 0)
goto mdio_read_exit;
 
-   macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF)
- | MACB_BF(RW, MACB_MAN_READ)
- | MACB_BF(PHYA, mii_id)
- | MACB_BF(REGA, regnum)
- | MACB_BF(CODE, MACB_MAN_CODE)));
+   if (regnum & MII_ADDR_C45) {
+   macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF)
+   | MACB_BF(RW, MACB_MAN_C45_ADDR)
+   | MACB_BF(PHYA, mii_id)
+   | MACB_BF(REGA, (regnum >> 16) & 0x1F)
+   | MACB_BF(DATA, regnum & 0x)
+   | MACB_BF(CODE, MACB_MAN_C45_CODE)));
+
+   status = macb_mdio_wait_for_idle(bp);
+   if (status < 0)
+   goto mdio_read_exit;
+
+   macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF)
+   | MACB_BF(RW, MACB_MAN_C45_READ)
+   | MACB_BF(PHYA, mii_id)
+   | MACB_BF(REGA, (regnum >> 16) & 0x1F)
+   | MACB_BF(CODE, MACB_MAN_C45_CODE)));
+   } else {
+   macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C22_SOF)
+   | MACB_BF(RW, MACB_MAN_C22_READ)
+   | MACB_BF(PHYA, mii_id)
+   | MACB_BF(REGA, regnum)
+   | MACB_BF(CODE, MACB_MAN_C22_CODE)));
+   }
 
status = macb_mdio_wait_for_idle(bp);
if (status < 0)
@@ -367,12 +386,32 @@ static int macb_mdio_write(struct mii_bus *bus, int 
mii_id, int regnum,
if (status < 0)
goto mdio_write_exit;
 
-   macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF)
- | MACB_BF(RW, MACB_MAN_WRITE)
- | MACB_BF(PHYA, mii_id)
- | MACB_BF(REGA, regnum)
- | MACB_BF(CODE, MACB_MAN_CODE)
- | MACB_BF(DATA, value)));
+   if (regnum & MII_ADDR_C45) {
+   macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF)
+   | MACB_BF(RW, MACB_MAN_C45_ADDR)
+   | MACB_BF(PHYA, mii_id)
+   | MACB_BF(REGA, (regnum >> 16) & 0x1F)
+   | MACB_BF(DATA, regnum & 0x)
+   | MACB_BF(CODE, MACB_MAN_C45_CODE)));
+
+   status = macb_mdio_wait_for_idle(bp);
+   if (status < 0)
+   goto mdio_write_exit;
+
+   macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF)
+   | MACB_BF(RW, MACB_MAN_C45_WRITE)
+   | MACB_BF(PHYA, mii_id)
+   | MACB_BF(REGA, (regnum >> 16) & 0x1F)
+   | MACB_BF(CODE, MACB_MAN_C45_CODE)
+   | MACB_BF(DATA, value)));
+   } else {
+   macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C22_SOF)
+   | MACB_BF(RW, MACB_

[PATCH 2/6] net: macb: add support for sgmii MAC-PHY interface

2019-06-16 Thread Parshuram Thombare

This is version 2 of patch to add support for SGMII interface) and
2.5Gbps MAC in Cadence ethernet controller driver.

Signed-off-by: Parshuram Thombare 
---
 drivers/net/ethernet/cadence/macb.h  |  76 +--
 drivers/net/ethernet/cadence/macb_main.c | 157 ---
 2 files changed, 202 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h 
b/drivers/net/ethernet/cadence/macb.h
index 35ed13236c8b..85c7e4cb1057 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -80,6 +80,7 @@
 #define MACB_RBQPH 0x04D4
 
 /* GEM register offsets. */
+#define GEM_NCR0x /* Network Control */
 #define GEM_NCFGR  0x0004 /* Network Config */
 #define GEM_USRIO  0x000c /* User IO */
 #define GEM_DMACFG 0x0010 /* DMA Configuration */
@@ -159,6 +160,9 @@
 #define GEM_PEFTN  0x01f4 /* PTP Peer Event Frame Tx Ns */
 #define GEM_PEFRSL 0x01f8 /* PTP Peer Event Frame Rx Sec Low */
 #define GEM_PEFRN  0x01fc /* PTP Peer Event Frame Rx Ns */
+#define GEM_PCS_CTRL   0x0200 /* PCS Control */
+#define GEM_PCS_STATUS  0x0204 /* PCS Status */
+#define GEM_PCS_AN_LP_BASE  0x0214 /* PCS AN LP BASE*/
 #define GEM_DCFG1  0x0280 /* Design Config 1 */
 #define GEM_DCFG2  0x0284 /* Design Config 2 */
 #define GEM_DCFG3  0x0288 /* Design Config 3 */
@@ -274,6 +278,10 @@
 #define MACB_IRXFCS_OFFSET 19
 #define MACB_IRXFCS_SIZE   1
 
+/* GEM specific NCR bitfields. */
+#define GEM_TWO_PT_FIVE_GIG_OFFSET 29
+#define GEM_TWO_PT_FIVE_GIG_SIZE   1
+
 /* GEM specific NCFGR bitfields. */
 #define GEM_GBE_OFFSET 10 /* Gigabit mode enable */
 #define GEM_GBE_SIZE   1
@@ -326,6 +334,9 @@
 #define MACB_MDIO_SIZE 1
 #define MACB_IDLE_OFFSET   2 /* The PHY management logic is idle */
 #define MACB_IDLE_SIZE 1
+#define MACB_DUPLEX_OFFSET  3
+#define MACB_DUPLEX_SIZE1
+
 
 /* Bitfields in TSR */
 #define MACB_UBR_OFFSET0 /* Used bit read */
@@ -459,11 +470,37 @@
 #define MACB_REV_OFFSET0
 #define MACB_REV_SIZE  16
 
+/* Bitfields in PCS_CONTROL. */
+#define GEM_PCS_CTRL_RST_OFFSET15
+#define GEM_PCS_CTRL_RST_SIZE  1
+#define GEM_PCS_CTRL_EN_AN_OFFSET  12
+#define GEM_PCS_CTRL_EN_AN_SIZE1
+#define GEM_PCS_CTRL_RESTART_AN_OFFSET 9
+#define GEM_PCS_CTRL_RESTART_AN_SIZE   1
+
+/* Bitfields in PCS_STATUS. */
+#define GEM_PCS_STATUS_AN_DONE_OFFSET   5
+#define GEM_PCS_STATUS_AN_DONE_SIZE 1
+#define GEM_PCS_STATUS_AN_SUPPORT_OFFSET3
+#define GEM_PCS_STATUS_AN_SUPPORT_SIZE  1
+#define GEM_PCS_STATUS_LINK_OFFSET  2
+#define GEM_PCS_STATUS_LINK_SIZE1
+
+/* Bitfield in PCS_AN_LP_BASE */
+#define GEM_PCS_AN_LP_BASE_LINK_OFFSET  15
+#define GEM_PCS_AN_LP_BASE_LINK_SIZE1
+#define GEM_PCS_AN_LP_BASE_DUPLEX_OFFSET12
+#define GEM_PCS_AN_LP_BASE_DUPLEX_SIZE  1
+#define GEM_PCS_AN_LP_BASE_SPEED_OFFSET 10
+#define GEM_PCS_AN_LP_BASE_SPEED_SIZE   2
+
 /* Bitfields in DCFG1. */
 #define GEM_IRQCOR_OFFSET  23
 #define GEM_IRQCOR_SIZE1
 #define GEM_DBWDEF_OFFSET  25
 #define GEM_DBWDEF_SIZE3
+#define GEM_NO_PCS_OFFSET  0
+#define GEM_NO_PCS_SIZE1
 
 /* Bitfields in DCFG2. */
 #define GEM_RX_PKT_BUFF_OFFSET 20
@@ -636,19 +673,32 @@
 #define MACB_MAN_CODE  2
 
 /* Capability mask bits */
-#define MACB_CAPS_ISR_CLEAR_ON_WRITE   0x0001
-#define MACB_CAPS_USRIO_HAS_CLKEN  0x0002
-#define MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII0x0004
-#define MACB_CAPS_NO_GIGABIT_HALF  0x0008
-#define MACB_CAPS_USRIO_DISABLED   0x0010
-#define MACB_CAPS_JUMBO0x0020
-#define MACB_CAPS_GEM_HAS_PTP  0x0040
-#define MACB_CAPS_BD_RD_PREFETCH   0x0080
-#define MACB_CAPS_NEEDS_RSTONUBR   0x0100
-#define MACB_CAPS_FIFO_MODE0x1000
-#define MACB_CAPS_GIGABIT_MODE_AVAILABLE   0x2000
-#define MACB_CAPS_SG_DISABLED  0x4000
-#define MACB_CAPS_MACB_IS_GEM  0x8000
+#define MACB_CAPS_ISR_CLEAR_ON_WRITE   BIT(0)
+#define MACB_CAPS_USRIO_HAS_CLKEN  BIT(1)
+#define MACB_CAPS_USRIO_DEFAULT_IS_MII_GMIIBIT(2)
+#define MACB_CAPS_NO_GIGABIT_HALF  BIT(3)
+#define MACB_CAPS_USRIO_DISABLED   BIT(4)
+#define MACB_CAPS_JUMBOBIT(5)
+#define MACB_CAPS_GEM_HAS

[PATCH 3/6] net: macb: add PHY configuration in MACB PCI wrapper

2019-06-16 Thread Parshuram Thombare

This patch add TI PHY DP83867 configuration for SGMII link in
Cadence MACB PCI wrapper.

Signed-off-by: Parshuram Thombare 
---
 drivers/net/ethernet/cadence/macb_pci.c | 225 
 1 file changed, 225 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb_pci.c 
b/drivers/net/ethernet/cadence/macb_pci.c
index 248a8fc45069..1001e03191a1 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include "macb.h"
@@ -37,6 +38,224 @@
 #define GEM_PCLK_RATE 5000
 #define GEM_HCLK_RATE 5000
 
+#define TI_PHY_DP83867_ID  0x2000a231
+#define TI_PHY_DEVADDR 0x1f
+#define PHY_REGCR 0x0D
+#define PHY_ADDAR 0x0E
+
+#define MACB_MDIO_TIMEOUT  100 /* in usecs */
+
+#define MACB_REGCR_OP_OFFSET   14
+#define MACB_REGCR_OP_SIZE 2
+#define MACB_REGCR_DEVADDR_OFFSET  0
+#define MACB_REGCR_DEVADDR_SIZE5
+
+#define MACB_REGCR_OP_ADDR 0
+#define MACB_REGCR_OP_DATA 1
+
+static int macb_mdio_wait_for_idle(void __iomem *macb_base_addr)
+{
+   u32 val;
+
+   return readx_poll_timeout(readl, macb_base_addr + MACB_NSR, val,
+ val & MACB_BIT(IDLE), 1, MACB_MDIO_TIMEOUT);
+}
+
+static int macb_mdiobus_read(void __iomem *macb_base_addr,
+u32 phy_id,
+u32 regnum)
+{
+   u32 i;
+   int status;
+
+   if (regnum < 32) {
+   i = MACB_BF(SOF, MACB_MAN_SOF) |
+   MACB_BF(RW, MACB_MAN_READ) |
+   MACB_BF(PHYA, phy_id) |
+   MACB_BF(REGA, regnum) |
+   MACB_BF(CODE, MACB_MAN_CODE);
+
+   writel(i, macb_base_addr + MACB_MAN);
+   status = macb_mdio_wait_for_idle(macb_base_addr);
+   if (status < 0)
+   return status;
+   } else {
+   u16 reg;
+
+   reg = MACB_BF(REGCR_OP, MACB_REGCR_OP_ADDR) |
+   MACB_BF(REGCR_DEVADDR, TI_PHY_DEVADDR);
+   i = MACB_BF(SOF, MACB_MAN_SOF) |
+   MACB_BF(RW, MACB_MAN_WRITE) |
+   MACB_BF(PHYA, phy_id) |
+   MACB_BF(REGA, PHY_REGCR) |
+   MACB_BF(CODE, MACB_MAN_CODE) |
+   MACB_BF(DATA, reg);
+   writel(i, macb_base_addr + MACB_MAN);
+   status = macb_mdio_wait_for_idle(macb_base_addr);
+   if (status < 0)
+   return status;
+
+   i = MACB_BF(SOF, MACB_MAN_SOF) |
+   MACB_BF(RW, MACB_MAN_WRITE) |
+   MACB_BF(PHYA, phy_id) |
+   MACB_BF(REGA, PHY_ADDAR) |
+   MACB_BF(CODE, MACB_MAN_CODE) |
+   MACB_BF(DATA, regnum);
+   writel(i, macb_base_addr + MACB_MAN);
+   status = macb_mdio_wait_for_idle(macb_base_addr);
+   if (status < 0)
+   return status;
+
+   reg = MACB_BF(REGCR_OP, MACB_REGCR_OP_DATA) |
+   MACB_BF(REGCR_DEVADDR, TI_PHY_DEVADDR);
+   i = MACB_BF(SOF, MACB_MAN_SOF) |
+   MACB_BF(RW, MACB_MAN_WRITE) |
+   MACB_BF(PHYA, phy_id) |
+   MACB_BF(REGA, PHY_REGCR) |
+   MACB_BF(CODE, MACB_MAN_CODE) |
+   MACB_BF(DATA, reg);
+   writel(i, macb_base_addr + MACB_MAN);
+   status = macb_mdio_wait_for_idle(macb_base_addr);
+   if (status < 0)
+   return status;
+
+   i = MACB_BF(SOF, MACB_MAN_SOF) |
+   MACB_BF(RW, MACB_MAN_READ) |
+   MACB_BF(PHYA, phy_id) |
+   MACB_BF(REGA, PHY_ADDAR) |
+   MACB_BF(CODE, MACB_MAN_CODE);
+
+   writel(i, macb_base_addr + MACB_MAN);
+   status = macb_mdio_wait_for_idle(macb_base_addr);
+   if (status < 0)
+   return status;
+   }
+
+   return readl(macb_base_addr + MACB_MAN);
+}
+
+static int macb_mdiobus_write(void __iomem *macb_base_addr, u32 phy_id,
+ u32 regnum, u16 value)
+{
+   u32 i;
+   int status;
+
+   if (regnum < 32) {
+   i = MACB_BF(SOF, MACB_MAN_SOF) |
+   MACB_BF(RW, MACB_MAN_WRITE) |
+   MACB_BF(PHYA, phy_id) |
+   MACB_BF(REGA, regnum) |
+   MACB_BF(CODE, MACB_MAN_CODE) |
+   MACB_BF(DATA, value);
+
+   writel(i, macb_base_addr + MACB_MAN);
+   status = macb_mdio_wait_for_idle(macb_base_addr);
+   if (status < 0)
+   return status;
+   } else

[PATCH 5/6] net: macb: add support for high speed interface

2019-06-16 Thread Parshuram Thombare

This patch add support for high speed USXGMII PCS and 10G
speed in Cadence ethernet controller driver.

Signed-off-by: Parshuram Thombare 
---
 drivers/net/ethernet/cadence/macb.h  |  42 +
 drivers/net/ethernet/cadence/macb_main.c | 215 +++
 2 files changed, 224 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h 
b/drivers/net/ethernet/cadence/macb.h
index 75f093bc52fe..e00b9f647757 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -85,6 +85,7 @@
 #define GEM_USRIO  0x000c /* User IO */
 #define GEM_DMACFG 0x0010 /* DMA Configuration */
 #define GEM_JML0x0048 /* Jumbo Max Length */
+#define GEM_HS_MAC_CONFIG  0x0050 /* GEM high speed config */
 #define GEM_HRB0x0080 /* Hash Bottom */
 #define GEM_HRT0x0084 /* Hash Top */
 #define GEM_SA1B   0x0088 /* Specific1 Bottom */
@@ -172,6 +173,9 @@
 #define GEM_DCFG7  0x0298 /* Design Config 7 */
 #define GEM_DCFG8  0x029C /* Design Config 8 */
 #define GEM_DCFG10 0x02A4 /* Design Config 10 */
+#define GEM_DCFG12 0x02AC /* Design Config 12 */
+#define GEM_USX_CONTROL0x0A80 /* USXGMII control register */
+#define GEM_USX_STATUS 0x0A88 /* USXGMII status register */
 
 #define GEM_TXBDCTRL   0x04cc /* TX Buffer Descriptor control register */
 #define GEM_RXBDCTRL   0x04d0 /* RX Buffer Descriptor control register */
@@ -279,6 +283,8 @@
 #define MACB_IRXFCS_SIZE   1
 
 /* GEM specific NCR bitfields. */
+#define GEM_ENABLE_HS_MAC_OFFSET   31
+#define GEM_ENABLE_HS_MAC_SIZE 1
 #define GEM_TWO_PT_FIVE_GIG_OFFSET 29
 #define GEM_TWO_PT_FIVE_GIG_SIZE   1
 
@@ -470,6 +476,10 @@
 #define MACB_REV_OFFSET0
 #define MACB_REV_SIZE  16
 
+/* Bitfield in HS_MAC_CONFIG */
+#define GEM_HS_MAC_SPEED_OFFSET0
+#define GEM_HS_MAC_SPEED_SIZE  3
+
 /* Bitfields in PCS_CONTROL. */
 #define GEM_PCS_CTRL_RST_OFFSET15
 #define GEM_PCS_CTRL_RST_SIZE  1
@@ -535,6 +545,34 @@
 #define GEM_RXBD_RDBUFF_OFFSET 8
 #define GEM_RXBD_RDBUFF_SIZE   4
 
+/* Bitfields in DCFG12. */
+#define GEM_HIGH_SPEED_OFFSET  26
+#define GEM_HIGH_SPEED_SIZE1
+
+/* Bitfields in USX_CONTROL. */
+#define GEM_USX_CTRL_SPEED_OFFSET  14
+#define GEM_USX_CTRL_SPEED_SIZE3
+#define GEM_SERDES_RATE_OFFSET 12
+#define GEM_SERDES_RATE_SIZE   2
+#define GEM_RX_SCR_BYPASS_OFFSET   9
+#define GEM_RX_SCR_BYPASS_SIZE 1
+#define GEM_TX_SCR_BYPASS_OFFSET   8
+#define GEM_TX_SCR_BYPASS_SIZE 1
+#define GEM_RX_SYNC_RESET_OFFSET   2
+#define GEM_RX_SYNC_RESET_SIZE 1
+#define GEM_TX_EN_OFFSET   1
+#define GEM_TX_EN_SIZE 1
+#define GEM_SIGNAL_OK_OFFSET   0
+#define GEM_SIGNAL_OK_SIZE 1
+
+/* Bitfields in USX_STATUS. */
+#define GEM_USX_TX_FAULT_OFFSET28
+#define GEM_USX_TX_FAULT_SIZE  1
+#define GEM_USX_RX_FAULT_OFFSET27
+#define GEM_USX_RX_FAULT_SIZE  1
+#define GEM_USX_BLOCK_LOCK_OFFSET  0
+#define GEM_USX_BLOCK_LOCK_SIZE1
+
 /* Bitfields in TISUBN */
 #define GEM_SUBNSINCR_OFFSET   0
 #define GEM_SUBNSINCR_SIZE 16
@@ -695,6 +733,7 @@
 #define MACB_CAPS_MACB_IS_GEM  BIT(31)
 #define MACB_CAPS_PCS  BIT(24)
 #define MACB_CAPS_MACB_IS_GEM_GXL  BIT(25)
+#define MACB_CAPS_HIGH_SPEED   BIT(26)
 
 #define MACB_GEM7010_IDNUM 0x009
 #define MACB_GEM7014_IDNUM 0x107
@@ -774,6 +813,7 @@
})
 
 #define MACB_READ_NSR(bp)  macb_readl(bp, NSR)
+#define GEM_READ_USX_STATUS(bp)gem_readl(bp, USX_STATUS)
 
 /* struct macb_dma_desc - Hardware DMA descriptor
  * @addr: DMA address of data buffer
@@ -1287,6 +1327,8 @@ struct macb {
struct macb_pm_data pm_data;
struct phylink *pl;
struct phylink_config pl_config;
+   u32 serdes_rate;
+   u32 fixed_speed;
 };
 
 #ifdef CONFIG_MACB_USE_HWSTAMP
diff --git a/drivers/net/ethernet/cadence/macb_main.c 
b/drivers/net/ethernet/cadence/macb_main.c
index 57ffc4e9d2b9..8739f815bcae 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -77,6 +77,20 @@
 #define MACB_WOL_HAS_MAGIC_PACKET  (0x1 << 0)
 #define MACB_WOL_ENABLED   (0x1 << 1)
 
+enum {
+   HS_MAC_SPEED_100M,
+   HS_MAC_SPEED_1000M,
+   HS_MAC_SPEED_2500M,

[PATCH 6/6] net: macb: parameter added to cadence ethernet controller DT binding

2019-06-16 Thread Parshuram Thombare

New parameters added to Cadence ethernet controller DT binding
for USXGMII interface.

Signed-off-by: Parshuram Thombare 
---
 Documentation/devicetree/bindings/net/macb.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/macb.txt 
b/Documentation/devicetree/bindings/net/macb.txt
index 9c5e94482b5f..cd79ec9dddfb 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -25,6 +25,10 @@ Required properties:
Optional elements: 'rx_clk' applies to cdns,zynqmp-gem
Optional elements: 'tsu_clk'
 - clocks: Phandles to input clocks.
+- serdes-rate External serdes rate.Mandatory for USXGMII mode.
+   0 - 5G
+   1 - 10G
+- fixed-speed Speed for fixed mode UXSGMII interface based link
 
 The MAC address will be determined using the optional properties
 defined in ethernet.txt.
-- 
2.17.1

Re: [PATCH AUTOSEL 5.1 16/59] fpga: dfl: Add lockdep classes for pdata->lock

2019-06-16 Thread Greg Kroah-Hartman

On Sat, Jun 15, 2019 at 06:41:59PM -0400, Sasha Levin wrote:
> On Sat, Jun 15, 2019 at 07:47:39AM +0200, Greg Kroah-Hartman wrote:
> > On Fri, Jun 14, 2019 at 04:28:00PM -0400, Sasha Levin wrote:
> > > From: Scott Wood 
> > > 
> > > [ Upstream commit dfe3de8d397bf878b31864d4e489d41118ec475f ]
> > > 
> > > struct dfl_feature_platform_data (and it's mutex) is used
> > > by both fme and port devices, and when lockdep is enabled it
> > > complains about nesting between these locks.  Tell lockdep about
> > > the difference so it can track each class separately.
> > > 
> > > Here's the lockdep complaint:
> > > [  409.680668] WARNING: possible recursive locking detected
> > > [  409.685983] 5.1.0-rc3.fpga+ #1 Tainted: GE
> > > [  409.691469] 
> > > [  409.696779] fpgaconf/9348 is trying to acquire lock:
> > > [  409.701746] a443fe2e (&pdata->lock){+.+.}, at: 
> > > port_enable_set+0x24/0x60 [dfl_afu]
> > > [  409.710006]
> > > [  409.710006] but task is already holding lock:
> > > [  409.715837] 63b78782 (&pdata->lock){+.+.}, at: 
> > > fme_pr_ioctl+0x21d/0x330 [dfl_fme]
> > > [  409.724012]
> > > [  409.724012] other info that might help us debug this:
> > > [  409.730535]  Possible unsafe locking scenario:
> > > [  409.730535]
> > > [  409.736457]CPU0
> > > [  409.738910]
> > > [  409.741360]   lock(&pdata->lock);
> > > [  409.744679]   lock(&pdata->lock);
> > > [  409.747999]
> > > [  409.747999]  *** DEADLOCK ***
> > > [  409.747999]
> > > [  409.753920]  May be due to missing lock nesting notation
> > > [  409.753920]
> > > [  409.760704] 4 locks held by fpgaconf/9348:
> > > [  409.764805]  #0: 63b78782 (&pdata->lock){+.+.}, at: 
> > > fme_pr_ioctl+0x21d/0x330 [dfl_fme]
> > > [  409.773408]  #1: 213c8a66 (®ion->mutex){+.+.}, at: 
> > > fpga_region_program_fpga+0x24/0x200 [fpga_region]
> > > [  409.783489]  #2: fe63afb9 (&mgr->ref_mutex){+.+.}, at: 
> > > fpga_mgr_lock+0x15/0x40 [fpga_mgr]
> > > [  409.792354]  #3: 0b2285c5 (&bridge->mutex){+.+.}, at: 
> > > __fpga_bridge_get+0x26/0xa0 [fpga_bridge]
> > > [  409.801740]
> > > [  409.801740] stack backtrace:
> > > [  409.806102] CPU: 45 PID: 9348 Comm: fpgaconf Kdump: loaded Tainted: G  
> > >   E 5.1.0-rc3.fpga+ #1
> > > [  409.815658] Hardware name: Intel Corporation S2600BT/S2600BT, BIOS 
> > > SE5C620.86B.01.00.0763.022420181017 02/24/2018
> > > [  409.825911] Call Trace:
> > > [  409.828369]  dump_stack+0x5e/0x8b
> > > [  409.831686]  __lock_acquire+0xf3d/0x10e0
> > > [  409.835612]  ? find_held_lock+0x3c/0xa0
> > > [  409.839451]  lock_acquire+0xbc/0x1d0
> > > [  409.843030]  ? port_enable_set+0x24/0x60 [dfl_afu]
> > > [  409.847823]  ? port_enable_set+0x24/0x60 [dfl_afu]
> > > [  409.852616]  __mutex_lock+0x86/0x970
> > > [  409.856195]  ? port_enable_set+0x24/0x60 [dfl_afu]
> > > [  409.860989]  ? port_enable_set+0x24/0x60 [dfl_afu]
> > > [  409.865777]  ? __mutex_unlock_slowpath+0x4b/0x290
> > > [  409.870486]  port_enable_set+0x24/0x60 [dfl_afu]
> > > [  409.875106]  fpga_bridges_disable+0x36/0x50 [fpga_bridge]
> > > [  409.880502]  fpga_region_program_fpga+0xea/0x200 [fpga_region]
> > > [  409.886338]  fme_pr_ioctl+0x13e/0x330 [dfl_fme]
> > > [  409.890870]  fme_ioctl+0x66/0xe0 [dfl_fme]
> > > [  409.894973]  do_vfs_ioctl+0xa9/0x720
> > > [  409.898548]  ? lockdep_hardirqs_on+0xf0/0x1a0
> > > [  409.902907]  ksys_ioctl+0x60/0x90
> > > [  409.906225]  __x64_sys_ioctl+0x16/0x20
> > > [  409.909981]  do_syscall_64+0x5a/0x220
> > > [  409.913644]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
> > > [  409.918698] RIP: 0033:0x7f9d31b9b8d7
> > > [  409.922276] Code: 44 00 00 48 8b 05 b9 15 2d 00 64 c7 00 26 00 00 00 
> > > 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 
> > > 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 89 15 2d 00 f7 d8 64 89 01 48
> > > [  409.941020] RSP: 002b:7ffe4cae0d68 EFLAGS: 0202 ORIG_RAX: 
> > > 0010
> > > [  409.948588] RAX: ffda RBX: 7f9d32ade6a0 RCX: 
> > > 7f9d31b9b8d7
> > > [  409.955719] RDX: 7ffe4cae0df0 RSI: b680 RDI: 
> > > 0003
> > > [  409.962852] RBP: 0003 R08: 7f9d2b70a177 R09: 
> > > 7ffe4cae0e40
> > > [  409.969984] R10: 7ffe4cae0160 R11: 0202 R12: 
> > > 7ffe4cae0df0
> > > [  409.977115] R13: b680 R14:  R15: 
> > > 7ffe4cae0f60
> > > 
> > > Signed-off-by: Scott Wood 
> > > Acked-by: Wu Hao 
> > > Acked-by: Alan Tull 
> > > Signed-off-by: Greg Kroah-Hartman 
> > > Signed-off-by: Sasha Levin 
> > > ---
> > >  drivers/fpga/dfl.c | 16 +++-
> > >  1 file changed, 15 insertions(+), 1 deletion(-)
> > 
> > Adding lockdep stuff is not really needed for stable kernels, please
> > drop this from all trees.
> 
> For actual splats? Why? I treat them as compiler warnings. Keeping these
> around will just make them show up over a

Re: memory leak in tipc_buf_acquire

2019-06-16 Thread Ying Xue

On 6/10/19 2:44 AM, Xin Long wrote:
> Looks we need to purge each member's deferredq list in tipc_group_delete():
> diff --git a/net/tipc/group.c b/net/tipc/group.c
> index 992be61..23823eb 100644
> --- a/net/tipc/group.c
> +++ b/net/tipc/group.c
> @@ -218,6 +218,7 @@ void tipc_group_delete(struct net *net, struct
> tipc_group *grp)
> 
>   rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
>   tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq);
> + __skb_queue_purge(&m->deferredq);
>   list_del(&m->list);
>   kfree(m);
>   }

Good catch! I agree with you.

Re: general protection fault in oom_unkillable_task

2019-06-16 Thread Tetsuo Handa

On 2019/06/16 6:33, Tetsuo Handa wrote:
> On 2019/06/16 3:50, Shakeel Butt wrote:
>>> While dump_tasks() traverses only each thread group, mem_cgroup_scan_tasks()
>>> traverses each thread.
>>
>> I think mem_cgroup_scan_tasks() traversing threads is not intentional
>> and css_task_iter_start in it should use CSS_TASK_ITER_PROCS as the
>> oom killer only cares about the processes or more specifically
>> mm_struct (though two different thread groups can have same mm_struct
>> but that is fine).
> 
> We can't use CSS_TASK_ITER_PROCS from mem_cgroup_scan_tasks(). I've tried
> CSS_TASK_ITER_PROCS in an attempt to evaluate only one thread from each
> thread group, but I found that CSS_TASK_ITER_PROCS causes skipping whole
> threads in a thread group (and trivially allowing "Out of memory and no
> killable processes...\n" flood) if thread group leader has already exited.

Seems that CSS_TASK_ITER_PROCS from mem_cgroup_scan_tasks() is now working.
Maybe I was confused due to without commit 7775face207922ea ("memcg: killed
threads should not invoke memcg OOM killer"). We can scan one thread from
each thread group, and remove

/* Prefer thread group leaders for display purposes */
if (points == oc->chosen_points && thread_group_leader(oc->chosen))
goto next;

check.

Re: [PATCH v9 10/12] mm/devm_memremap_pages: Enable sub-section remap

2019-06-16 Thread Aneesh Kumar K.V

Dan Williams  writes:

> Teach devm_memremap_pages() about the new sub-section capabilities of
> arch_{add,remove}_memory(). Effectively, just replace all usage of
> align_start, align_end, and align_size with res->start, res->end, and
> resource_size(res). The existing sanity check will still make sure that
> the two separate remap attempts do not collide within a sub-section (2MB
> on x86).
>
> Cc: Michal Hocko 
> Cc: Toshi Kani 
> Cc: Jérôme Glisse 
> Cc: Logan Gunthorpe 
> Cc: Oscar Salvador 
> Cc: Pavel Tatashin 
> Signed-off-by: Dan Williams 
> ---
>  kernel/memremap.c |   61 
> +
>  1 file changed, 24 insertions(+), 37 deletions(-)
>
> diff --git a/kernel/memremap.c b/kernel/memremap.c
> index 57980ed4e571..a0e5f6b91b04 100644
> --- a/kernel/memremap.c
> +++ b/kernel/memremap.c
> @@ -58,7 +58,7 @@ static unsigned long pfn_first(struct dev_pagemap *pgmap)
>   struct vmem_altmap *altmap = &pgmap->altmap;
>   unsigned long pfn;
>  
> - pfn = res->start >> PAGE_SHIFT;
> + pfn = PHYS_PFN(res->start);
>   if (pgmap->altmap_valid)
>   pfn += vmem_altmap_offset(altmap);
>   return pfn;
> @@ -86,7 +86,6 @@ static void devm_memremap_pages_release(void *data)
>   struct dev_pagemap *pgmap = data;
>   struct device *dev = pgmap->dev;
>   struct resource *res = &pgmap->res;
> - resource_size_t align_start, align_size;
>   unsigned long pfn;
>   int nid;
>  
> @@ -96,25 +95,21 @@ static void devm_memremap_pages_release(void *data)
>   pgmap->cleanup(pgmap->ref);
>  
>   /* pages are dead and unused, undo the arch mapping */
> - align_start = res->start & ~(PA_SECTION_SIZE - 1);
> - align_size = ALIGN(res->start + resource_size(res), PA_SECTION_SIZE)
> - - align_start;
> -
> - nid = page_to_nid(pfn_to_page(align_start >> PAGE_SHIFT));
> + nid = page_to_nid(pfn_to_page(PHYS_PFN(res->start)));

Why do we not require to align things to subsection size now? 

-aneesh

Re: [PATCH v2] staging: rtl8723bs: Resolve checkpatch error "that open brace { should be on the previous line" in the rtl8723 driver

2019-06-16 Thread Joe Perches

On Sat, 2019-06-15 at 14:29 -0700, Shobhit Kukreti wrote:
> Cleaned up the code from the following files to get rid of
> check patch error "that open brace { should be on the previous line"

It's fine you are modifying brace styles, but:

> diff --git a/drivers/staging/rtl8723bs/os_dep/mlme_linux.c 
> b/drivers/staging/rtl8723bs/os_dep/mlme_linux.c
> index aa2499f..4631b68 100644
> --- a/drivers/staging/rtl8723bs/os_dep/mlme_linux.c
> +++ b/drivers/staging/rtl8723bs/os_dep/mlme_linux.c
> @@ -46,8 +46,7 @@ void rtw_os_indicate_connect(struct adapter *adapter)
>   struct mlme_priv *pmlmepriv = &(adapter->mlmepriv);
>  
>   if ((check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE) == true) ||
> - (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE) == true))
> - {
> + (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE) == true)) {
>   rtw_cfg80211_ibss_indicate_connect(adapter);
>   }
>   else

the else should be on the same line as the close brace

> @@ -106,8 +105,9 @@ void rtw_reset_securitypriv(struct adapter *adapter)
>   adapter->securitypriv.ndisencryptstatus = Ndis802_11WEPDisabled;
>  
>   }
> - else /* reset values in securitypriv */
> - {
> + else {
> + /* reset values in securitypriv */
> +

and here.  etc.  Please change all instances appropriately.

Re: [PATCH] staging: rtl8723bs: os_dep: ioctl_linux: Make use rtw_zmalloc

2019-06-16 Thread Dan Carpenter

On Sun, Jun 16, 2019 at 11:02:50AM +0530, Hariprasad Kelam wrote:
> rtw_malloc with memset can be replace with rtw_zmalloc.
> 
> Signed-off-by: Hariprasad Kelam 
> ---
>  drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 12 +++-
>  1 file changed, 3 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c 
> b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
> index fc3885d..c59e366 100644
> --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
> +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
> @@ -478,14 +478,12 @@ static int wpa_set_encryption(struct net_device *dev, 
> struct ieee_param *param,
>   if (wep_key_len > 0) {
>   wep_key_len = wep_key_len <= 5 ? 5 : 13;
>   wep_total_len = wep_key_len + FIELD_OFFSET(struct 
> ndis_802_11_wep, KeyMaterial);
> - pwep = rtw_malloc(wep_total_len);
> + pwep = rtw_zmalloc(wep_total_len);

We should not introduce new uses of rtw_malloc() or rtw_zmalloc().  They
are buggy garbage.  Use normall kmalloc() and kzalloc().

regards,
dan carpenter

Re: [PATCH v5 13/16] powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX

2019-06-16 Thread christophe leroy





Le 15/06/2019 à 13:23, Andreas Schwab a écrit :

This breaks suspend (or resume) on the iBook G4.  no_console_suspend
doesn't give any clues, the display just stays dark.



After a quick look at the suspend functions, I have the feeling that 
those functions only store and restore BATs 0 to 3.


Could you build your kernel with CONFIG_PPC_PTDUMP and see in file 
/sys/kernel/debug/powerpc/segment_registers how many IBATs registers are 
used.
If any of registers IBATs 4 to 7 are used, could you adjust 
CONFIG_ETEXT_SHIFT so that only IBATs 0 to 3 be used, and check if 
suspend/resume works when IBATs 4 to 7 are not used ?


Thanks
Christophe

---
L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel 
antivirus Avast.
https://www.avast.com/antivirus

Re: [PATCH] powerpc/mm/32s: only use MMU to mark initmem NX if STRICT_KERNEL_RWX

2019-06-16 Thread christophe leroy





Le 15/06/2019 à 16:36, Andreas Schwab a écrit :

On Jun 15 2019, Christophe Leroy  wrote:


Andreas Schwab  a écrit :


If STRICT_KERNEL_RWX is disabled, never use the MMU to mark initmen
nonexecutable.


I dont understand, can you elaborate ?


It breaks suspend.


Ok, but we need to explain why it breaks suspend, and again your patch 
is wrong anyway because that area of memory is mapped with BATs so you 
can't use change_page_attr()





This area is mapped with BATs so using change_page_attr() is pointless.


There must be a reason STRICT_KERNEL_RWX is not available with
HIBERNATE.


Yes but HIBERNATE and suspend are not the same thing. I guess HIBERNATE 
is not available with STRICT_KERNEL_RWX because HIBERNATE have to write 
back saved state into read-only memory as well.


Christophe

---
L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel 
antivirus Avast.
https://www.avast.com/antivirus

Re: [PATCH v5 13/16] powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX

2019-06-16 Thread christophe leroy





Le 15/06/2019 à 14:28, Andreas Schwab a écrit :

On Feb 21 2019, Christophe Leroy  wrote:


diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index a000768a5cc9..6e56a6240bfa 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -353,7 +353,10 @@ void mark_initmem_nx(void)
unsigned long numpages = PFN_UP((unsigned long)_einittext) -
 PFN_DOWN((unsigned long)_sinittext);
  
-	change_page_attr(page, numpages, PAGE_KERNEL);

+   if (v_block_mapped((unsigned long)_stext) + 1)


That is always true.



Did you boot with 'nobats' kernel parameter ?

If not, that's normal to be true, it means that memory is mapped with BATs.

When you boot with 'nobats' parameter, this should return false.

Christophe

---
L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel 
antivirus Avast.
https://www.avast.com/antivirus

Re: [PATCH v7 18/18] x86/fsgsbase/64: Add documentation for FSGSBASE

2019-06-16 Thread Thomas Gleixner

On Sun, 16 Jun 2019, Bae, Chang Seok wrote:
> On Jun 14, 2019, at 13:07, Bae, Chang Seok 
> mailto:chang.seok@intel.com>> wrote:
> 
> 
> On Jun 13, 2019, at 23:54, Thomas Gleixner 
> mailto:t...@linutronix.de>> wrote:
> 
> +The GS segment has no common use and can be used freely by
> +applications. There is no storage class specifier similar to __thread which
> +would cause the compiler to use GS based addressing modes. Newer versions
> +of GCC and Clang support GS based addressing via address space identifiers.
> +
> 
> …
> 
> +
> +Clang does not provide these address space identifiers, but it provides
> +an attribute based mechanism:
> +
> 
> These two sentences seem to conflict with each other; Clang needs to be 
> clarified
> above.
> 
> Thanks for the write-up. Just preparing v8 right now. Will send out shortly.

Please dont. Send me a delta patch against the documentation. I have queued
all the other patches already internally. I did not push it out because I
wanted to have proper docs.

Thanks,

tglx

Re: [PATCH v4 1/3] stacktrace: Remove weak version of save_stack_trace_tsk_reliable()

2019-06-16 Thread Thomas Gleixner

On Tue, 11 Jun 2019, Miroslav Benes wrote:

> Recent rework of stack trace infrastructure introduced a new set of
> helpers for common stack trace operations (commit e9b98e162aa5
> ("stacktrace: Provide helpers for common stack trace operations") and
> related). As a result, save_stack_trace_tsk_reliable() is not directly
> called anywhere. Livepatch, currently the only user of the reliable
> stack trace feature, now calls stack_trace_save_tsk_reliable().
> 
> When CONFIG_HAVE_RELIABLE_STACKTRACE is set and depending on
> CONFIG_ARCH_STACKWALK, stack_trace_save_tsk_reliable() calls either
> arch_stack_walk_reliable() or mentioned save_stack_trace_tsk_reliable().
> x86_64 defines the former, ppc64le the latter. All other architectures
> do not have HAVE_RELIABLE_STACKTRACE and include/linux/stacktrace.h
> defines -ENOSYS returning version for them.
> 
> In short, stack_trace_save_tsk_reliable() returning -ENOSYS defined in
> include/linux/stacktrace.h serves the same purpose as the old weak
> version of save_stack_trace_tsk_reliable() which is therefore no longer
> needed.
> 
> Cc: Thomas Gleixner 
> Signed-off-by: Miroslav Benes 

Reviewed-by: Thomas Gleixner

Re: [PATCH v5 13/16] powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX

2019-06-16 Thread Andreas Schwab

On Jun 16 2019, christophe leroy  wrote:

> Le 15/06/2019 à 14:28, Andreas Schwab a écrit :
>> On Feb 21 2019, Christophe Leroy  wrote:
>>
>>> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
>>> index a000768a5cc9..6e56a6240bfa 100644
>>> --- a/arch/powerpc/mm/pgtable_32.c
>>> +++ b/arch/powerpc/mm/pgtable_32.c
>>> @@ -353,7 +353,10 @@ void mark_initmem_nx(void)
>>> unsigned long numpages = PFN_UP((unsigned long)_einittext) -
>>>  PFN_DOWN((unsigned long)_sinittext);
>>>   - change_page_attr(page, numpages, PAGE_KERNEL);
>>> +   if (v_block_mapped((unsigned long)_stext) + 1)
>>
>> That is always true.
>>
>
> Did you boot with 'nobats' kernel parameter ?
>
> If not, that's normal to be true, it means that memory is mapped with BATs.

bool + 1 is always true.

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

[PATCH NOTFORMERGE 2/5] mm: revert madvise_inject_error line split

2019-06-16 Thread Oleksandr Natalenko

Just to highlight it after our conversation.

Signed-off-by: Oleksandr Natalenko 
---
 mm/madvise.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index edb7184f665c..70aeb54f3e1c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1041,8 +1041,7 @@ static int madvise_common(struct task_struct *task, 
struct mm_struct *mm,
 
 #ifdef CONFIG_MEMORY_FAILURE
if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
-   return madvise_inject_error(behavior,
-   start, start + len_in);
+   return madvise_inject_error(behavior, start, start + len_in);
 #endif
 
write = madvise_need_mmap_write(behavior);
-- 
2.22.0

[PATCH NOTFORMERGE 3/5] mm: include uio.h to madvise.c

2019-06-16 Thread Oleksandr Natalenko

I couldn't compile it w/o this header.

Signed-off-by: Oleksandr Natalenko 
---
 mm/madvise.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/madvise.c b/mm/madvise.c
index 70aeb54f3e1c..9755340da157 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
-- 
2.22.0

[PATCH NOTFORMERGE 1/5] mm: rename madvise_core to madvise_common

2019-06-16 Thread Oleksandr Natalenko

"core" usually means something very different within the kernel land,
thus lets just follow the way it is handled in mutexes, rw_semaphores
etc and name common things as "_common".

Signed-off-by: Oleksandr Natalenko 
---
 mm/madvise.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 94d782097afd..edb7184f665c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -998,7 +998,7 @@ process_madvise_behavior_valid(int behavior)
 }
 
 /*
- * madvise_core - request behavior hint to address range of the target process
+ * madvise_common - request behavior hint to address range of the target 
process
  *
  * @task: task_struct got behavior hint, not giving the hint
  * @mm: mm_struct got behavior hint, not giving the hint
@@ -1009,7 +1009,7 @@ process_madvise_behavior_valid(int behavior)
  * @task could be a zombie leader if it calls sys_exit so accessing mm_struct
  * via task->mm is prohibited. Please use @mm insetad of task->mm.
  */
-static int madvise_core(struct task_struct *task, struct mm_struct *mm,
+static int madvise_common(struct task_struct *task, struct mm_struct *mm,
unsigned long start, size_t len_in, int behavior)
 {
unsigned long end, tmp;
@@ -1132,7 +1132,7 @@ static int pr_madvise_copy_param(struct pr_madvise_param 
__user *u_param,
return ret;
 }
 
-static int process_madvise_core(struct task_struct *tsk, struct mm_struct *mm,
+static int process_madvise_common(struct task_struct *tsk, struct mm_struct 
*mm,
int *behaviors,
struct iov_iter *iter,
const struct iovec *range_vec,
@@ -1144,7 +1144,7 @@ static int process_madvise_core(struct task_struct *tsk, 
struct mm_struct *mm,
for (i = 0; i < riovcnt && iov_iter_count(iter); i++) {
err = -EINVAL;
if (process_madvise_behavior_valid(behaviors[i]))
-   err = madvise_core(tsk, mm,
+   err = madvise_common(tsk, mm,
(unsigned long)range_vec[i].iov_base,
range_vec[i].iov_len, behaviors[i]);
 
@@ -1220,7 +1220,7 @@ static int process_madvise_core(struct task_struct *tsk, 
struct mm_struct *mm,
  */
 SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 {
-   return madvise_core(current, current->mm, start, len_in, behavior);
+   return madvise_common(current, current->mm, start, len_in, behavior);
 }
 
 
@@ -1252,7 +1252,7 @@ SYSCALL_DEFINE3(process_madvise, int, pidfd,
 
/*
 * We don't support cookie to gaurantee address space atomicity yet.
-* Once we implment cookie, process_madvise_core need to hold mmap_sme
+* Once we implment cookie, process_madvise_common need to hold mmap_sme
 * during entire operation to guarantee atomicity.
 */
if (params.cookie != 0)
@@ -1316,7 +1316,7 @@ SYSCALL_DEFINE3(process_madvise, int, pidfd,
goto release_task;
}
 
-   ret = process_madvise_core(task, mm, behaviors, &iter, iov_r, nr_elem);
+   ret = process_madvise_common(task, mm, behaviors, &iter, iov_r, 
nr_elem);
mmput(mm);
 release_task:
put_task_struct(task);
-- 
2.22.0

[PATCH NOTFORMERGE 5/5] mm/madvise: allow KSM hints for remote API

2019-06-16 Thread Oleksandr Natalenko

It all began with the fact that KSM works only on memory that is marked
by madvise(). And the only way to get around that is to either:

  * use LD_PRELOAD; or
  * patch the kernel with something like UKSM or PKSM.

(i skip ptrace can of worms here intentionally)

To overcome this restriction, lets employ a new remote madvise API. This
can be used by some small userspace helper daemon that will do auto-KSM
job for us.

I think of two major consumers of remote KSM hints:

  * hosts, that run containers, especially similar ones and especially in
a trusted environment, sharing the same runtime like Node.js;

  * heavy applications, that can be run in multiple instances, not
limited to opensource ones like Firefox, but also those that cannot be
modified since they are binary-only and, maybe, statically linked.

Speaking of statistics, more numbers can be found in the very first
submission, that is related to this one [1]. For my current setup with
two Firefox instances I get 100 to 200 MiB saved for the second instance
depending on the amount of tabs.

1 FF instance with 15 tabs:

   $ echo "$(cat /sys/kernel/mm/ksm/pages_sharing) * 4 / 1024" | bc
   410

2 FF instances, second one has 12 tabs (all the tabs are different):

   $ echo "$(cat /sys/kernel/mm/ksm/pages_sharing) * 4 / 1024" | bc
   592

At the very moment I do not have specific numbers for containerised
workload, but those should be comparable in case the containers share
similar/same runtime.

[1] https://lore.kernel.org/patchwork/patch/1012142/

Signed-off-by: Oleksandr Natalenko 
---
 mm/madvise.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/madvise.c b/mm/madvise.c
index 84f899b1b6da..e8f9c49794a3 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -991,6 +991,8 @@ process_madvise_behavior_valid(int behavior)
switch (behavior) {
case MADV_COLD:
case MADV_PAGEOUT:
+   case MADV_MERGEABLE:
+   case MADV_UNMERGEABLE:
return true;
 
default:
-- 
2.22.0

[PATCH NOTFORMERGE 0/5] Extend remote madvise API to KSM hints

2019-06-16 Thread Oleksandr Natalenko

Hi, Minchan.

This is a set of commits based on our discussion on your submission [1].

First 2 implement minor suggestions just for you to not forget to take
them into account.

uio.h inclusion was needed for me to be able to compile your series
successfully. Also please note I had to enable "Transparent Hugepage
Support" as well as "Enable idle page tracking" options, otherwise the
build failed. I guess this can be addressed by you better since the
errors are introduced with MADV_COLD introduction.

Last 2 commits are the actual KSM hints enablement. The first one
implements additional check for the case where the mmap_sem is taken for
write, and the second one just allows KSM hints to be used by the remote
interface.

I'm not Cc'ing else anyone except two mailing lists to not distract
people unnecessarily. If you are fine with this addition, please use it
for your next iteration of process_madvise(), and then you'll Cc all the
people needed.

Thanks.

[1] https://lore.kernel.org/lkml/20190531064313.193437-1-minc...@kernel.org/

Oleksandr Natalenko (5):
  mm: rename madvise_core to madvise_common
  mm: revert madvise_inject_error line split
  mm: include uio.h to madvise.c
  mm/madvise: employ mmget_still_valid for write lock
  mm/madvise: allow KSM hints for remote API

 mm/madvise.c | 23 ++-
 1 file changed, 14 insertions(+), 9 deletions(-)

-- 
2.22.0

[PATCH NOTFORMERGE 4/5] mm/madvise: employ mmget_still_valid for write lock

2019-06-16 Thread Oleksandr Natalenko

Do the very same trick as we already do since 04f5866e41fb. KSM hints
will require locking mmap_sem for write since they modify vm_flags, so
for remote KSM hinting this additional check is needed.

Signed-off-by: Oleksandr Natalenko 
---
 mm/madvise.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/madvise.c b/mm/madvise.c
index 9755340da157..84f899b1b6da 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1049,6 +1049,8 @@ static int madvise_common(struct task_struct *task, 
struct mm_struct *mm,
if (write) {
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
+   if (current->mm != mm && !mmget_still_valid(mm))
+   goto skip_mm;
} else {
down_read(&mm->mmap_sem);
}
@@ -1099,6 +1101,7 @@ static int madvise_common(struct task_struct *task, 
struct mm_struct *mm,
}
 out:
blk_finish_plug(&plug);
+skip_mm:
if (write)
up_write(&mm->mmap_sem);
else
-- 
2.22.0

Re: [PATCH] i2c: mux-gpio: Use "mux" con_id to find channel GPIOs

2019-06-16 Thread Peter Rosin

On 2019-06-16 01:24, Serge Semin wrote:
> On Sat, Jun 15, 2019 at 02:43:09PM +0300, Andy Shevchenko wrote:
>> On Sat, Jun 15, 2019 at 12:51 AM Serge Semin  wrote:
>>>
>>> Recent patch - ("i2c: mux/i801: Switch to use descriptor passing")
>>> altered the i2c-mux-gpio driver to use the GPIO-descriptor
>>> based interface to find and request the GPIOs then being utilized
>>> to select and deselect the channels of GPIO-driven i2c-muxes. Even
>>> though the proposed modification was correct for the platform_data-based
>>> systems, it was invalid for the OF-based ones and caused the kernel
>>> to crash at the driver probe procedure. There were two problems with
>>> that modification. First of all the gpiod_count() and gpiod_get_index()
>>> were called with NULL con_id.
>>
>> I always thought that this means "count me all GPIO's for this device
>> despite their names" and "get me GPIO by index despite it's name".
>> What's went wrong?
>>
> 
> No. It's wrong as far as I can see for kernels 4.4, 4.9 and the modern
> 5.2.0-rcX. dt_gpio_count()/of_find_gpio()will always try to count/request
> either "-gpio(s)" or "gpio(s)" GPIOs in the device of-node. While
> platform_gpio_count()/gpiod_find() will take into account GPIOs with matching
> 's even if it is NULL.

Right, this is my reading to. For the OF case, gpiod_count calls dt_gpio_count
which has no way to find gpios unless they are explicitly named. And NULL
simply means unnamed (which is not the case here). The function simply does
not do any trawling for gpios it has not been told about.

Linus, care to squash this incremental into your patch and resend with proper
credit added?

Cheers,
Peter

Re: [PATCH v5 13/16] powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX

2019-06-16 Thread Andreas Schwab

On Jun 16 2019, christophe leroy  wrote:

> If any of registers IBATs 4 to 7 are used

Nope.

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

Re: [PATCH 1/2] arm64: dts: rockchip: Fix multiple thermal zones conflict in rk3399.dtsi

2019-06-16 Thread Krzysztof Kozlowski

On Fri, Jun 14, 2019 at 04:30:13PM +0200, Daniel Lezcano wrote:
> On 14/06/2019 16:02, Robin Murphy wrote:
> > On 14/06/2019 14:03, Daniel Lezcano wrote:
> >> On 14/06/2019 11:35, Heiko Stuebner wrote:
> >>> Hi Daniel,
> >>>
> >>> Am Dienstag, 4. Juni 2019, 18:57:57 CEST schrieb Daniel Lezcano:
>  Currently the common thermal zones definitions for the rk3399 assumes
>  multiple thermal zones are supported by the governors. This is not the
>  case and each thermal zone has its own governor instance acting
>  individually without collaboration with other governors.
> 
>  As the cooling device for the CPU and the GPU thermal zones is the
>  same, each governors take different decisions for the same cooling
>  device leading to conflicting instructions and an erratic behavior.
> 
>  As the cooling-maps is about to become an optional property, let's
>  remove the cpu cooling device map from the GPU thermal zone.
> 
>  Signed-off-by: Daniel Lezcano 
>  ---
>    arch/arm64/boot/dts/rockchip/rk3399.dtsi | 9 -
>    1 file changed, 9 deletions(-)
> 
>  diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
>  b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
>  index 196ac9b78076..e1357e0f60f7 100644
>  --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
>  +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
>  @@ -821,15 +821,6 @@
>    type = "critical";
>    };
>    };
>  -
>  -    cooling-maps {
>  -    map0 {
>  -    trip = <&gpu_alert0>;
>  -    cooling-device =
>  -    <&cpu_b0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
>  -    <&cpu_b1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
>  -    };
>  -    };
>    };
>    };
> >>>
> >>> my knowledge of the thermal framework is not that big, but what about
> >>> the
> >>> rk3399-devices which further detail the cooling-maps like
> >>> rk3399-gru-kevin
> >>> and the rk3399-nanopc-t4 with its fan-handling in the cooling-maps?
> >>
> >> The rk3399-gru-kevin is correct.
> >>
> >> The rk3399-nanopc-t4 is not correct because the cpu and the gpu are
> >> sharing the same cooling device (the fan). There are different
> >> configurations:
> >>
> >> 1. The cpu cooling device for the CPU and the fan for the GPU
> >>
> >> 2. Different trip points on the CPU thermal zone, eg. one to for the CPU
> >> cooling device and another one for the fan.
> >>
> >> There are some variant for the above. If this board is not on battery,
> >> you may want to give priority to the throughput, so activate the fan
> >> first and then cool down the CPU. Or if you are on battery, you may want
> >> to invert the trip points.
> >>
> >> In any case, it is not possible to share the same cooling device for
> >> different thermal zones.
> > 
> > OK, thanks for the clarification. I'll get my board set up again to
> > figure out the best fix for rk3399-nanopc-t4 (FWIW most users are
> > probably just using passive cooling or a plain DC fan anyway). You might
> > want to raise this issue with the maintainers of
> > arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi, since the
> > everything-shared-by-everything approach in there was what I used as a
> > reference.
> 
> Cc'ed: Kukjin Kim and Krzysztof Kozlowski
> 
> Easy :)
>

Assuming that all trip-points are the same between thermal zones, I
understand that solution could be to have one thermal zone with thermal
multiple sensors (some time ago bindings did not support it) and all
cooling devices? Then only one governor would be assigned?

Best regards,
Krzysztof

[PATCH v3 2/3] KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL

2019-06-16 Thread Tao Xu

UMWAIT and TPAUSE instructions use IA32_UMWAIT_CONTROL at MSR index E1H
to determines the maximum time in TSC-quanta that the processor can reside
in either C0.1 or C0.2.

This patch emulates MSR IA32_UMWAIT_CONTROL in guest and differentiate
IA32_UMWAIT_CONTROL between host and guest. The variable
mwait_control_cached in arch/x86/power/umwait.c caches the MSR value, so
this patch uses it to avoid frequently rdmsr of IA32_UMWAIT_CONTROL.

Co-developed-by: Jingqi Liu 
Signed-off-by: Jingqi Liu 
Signed-off-by: Tao Xu 
---
 arch/x86/kvm/vmx/vmx.c  | 36 
 arch/x86/kvm/vmx/vmx.h  |  3 +++
 arch/x86/power/umwait.c |  3 ++-
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b35bfac30a34..f33a25e82cb8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1679,6 +1679,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
 #endif
case MSR_EFER:
return kvm_get_msr_common(vcpu, msr_info);
+   case MSR_IA32_UMWAIT_CONTROL:
+   if (!vmx_waitpkg_supported())
+   return 1;
+
+   msr_info->data = vmx->msr_ia32_umwait_control;
+   break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -1841,6 +1847,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
return 1;
vmcs_write64(GUEST_BNDCFGS, data);
break;
+   case MSR_IA32_UMWAIT_CONTROL:
+   if (!vmx_waitpkg_supported())
+   return 1;
+
+   if (!data)
+   break;
+
+   vmx->msr_ia32_umwait_control = data;
+   break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -4126,6 +4141,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool 
init_event)
vmx->rmode.vm86_active = 0;
vmx->spec_ctrl = 0;
 
+   vmx->msr_ia32_umwait_control = 0;
+
vcpu->arch.microcode_version = 0x1ULL;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(vcpu, 0);
@@ -6339,6 +6356,23 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host, false);
 }
 
+static void atomic_switch_ia32_umwait_control(struct vcpu_vmx *vmx)
+{
+   u64 host_umwait_control;
+
+   if (!vmx_waitpkg_supported())
+   return;
+
+   host_umwait_control = umwait_control_cached;
+
+   if (vmx->msr_ia32_umwait_control != host_umwait_control)
+   add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
+ vmx->msr_ia32_umwait_control,
+ host_umwait_control, false);
+   else
+   clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
+}
+
 static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
 {
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
@@ -6447,6 +6481,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
atomic_switch_perf_msrs(vmx);
 
+   atomic_switch_ia32_umwait_control(vmx);
+
vmx_update_hv_timer(vcpu);
 
/*
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 61128b48c503..8485bec7c38a 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -14,6 +14,8 @@
 extern const u32 vmx_msr_index[];
 extern u64 host_efer;
 
+extern u32 umwait_control_cached;
+
 #define MSR_TYPE_R 1
 #define MSR_TYPE_W 2
 #define MSR_TYPE_RW3
@@ -194,6 +196,7 @@ struct vcpu_vmx {
 #endif
 
u64   spec_ctrl;
+   u64   msr_ia32_umwait_control;
 
u32 vm_entry_controls_shadow;
u32 vm_exit_controls_shadow;
diff --git a/arch/x86/power/umwait.c b/arch/x86/power/umwait.c
index 7fa381e3fd4e..2e6ce4cbccb3 100644
--- a/arch/x86/power/umwait.c
+++ b/arch/x86/power/umwait.c
@@ -9,7 +9,8 @@
  * MSR value. By default, umwait max time is 10 in TSC-quanta and C0.2
  * is enabled
  */
-static u32 umwait_control_cached = 10;
+u32 umwait_control_cached = 10;
+EXPORT_SYMBOL_GPL(umwait_control_cached);
 
 /*
  * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR
-- 
2.20.1

[PATCH v3 0/3] x86: Enable user wait instructions

2019-06-16 Thread Tao Xu

UMONITOR, UMWAIT and TPAUSE are a set of user wait instructions.

UMONITOR arms address monitoring hardware using an address. A store
to an address within the specified address range triggers the
monitoring hardware to wake up the processor waiting in umwait.

UMWAIT instructs the processor to enter an implementation-dependent
optimized state while monitoring a range of addresses. The optimized
state may be either a light-weight power/performance optimized state
(c0.1 state) or an improved power/performance optimized state
(c0.2 state).
TPAUSE instructs the processor to enter an implementation-dependent
optimized state c0.1 or c0.2 state and wake up when time-stamp counter
reaches specified timeout.

Availability of the user wait instructions is indicated by the presence
of the CPUID feature flag WAITPKG CPUID.0x07.0x0:ECX[5].

The patches enable the umonitor, umwait and tpause features in KVM.
Because umwait and tpause can put a (psysical) CPU into a power saving
state, by default we dont't expose it in kvm and provide a capability to
enable it. Use kvm capability to enable UMONITOR, UMWAIT and TPAUSE when
QEMU use "-overcommit cpu-pm=on, a VM can use UMONITOR, UMWAIT and TPAUSE
instructions. If the instruction causes a delay, the amount of time
delayed is called here the physical delay. The physical delay is first
computed by determining the virtual delay (the time to delay relative to
the VM’s timestamp counter). Otherwise, UMONITOR, UMWAIT and TPAUSE cause
an invalid-opcode exception(#UD).

The dependency KVM patch link:
https://lkml.org/lkml/2019/5/24/138

The release document ref below link:
https://software.intel.com/sites/default/files/\
managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf

Changelog:
v3:
Simplify the patches, expose user wait instructions when the guest
has CPUID (Paolo)
v2:
Separated from the series
https://www.mail-archive.com/qemu-devel@nongnu.org/msg549526.html
Use kvm capability to enable UMONITOR, UMWAIT and TPAUSE when
QEMU use "-overcommit cpu-pm=on"
v1:
Sent out with MOVDIRI/MOVDIR64B instructions patches

Tao Xu (3):
  KVM: x86: add support for user wait instructions
  KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL
  KVM: vmx: handle vm-exit for UMWAIT and TPAUSE

 arch/x86/include/asm/vmx.h  |  1 +
 arch/x86/include/uapi/asm/vmx.h |  6 +++-
 arch/x86/kvm/cpuid.c|  2 +-
 arch/x86/kvm/vmx/capabilities.h |  6 
 arch/x86/kvm/vmx/vmx.c  | 56 +
 arch/x86/kvm/vmx/vmx.h  |  3 ++
 arch/x86/power/umwait.c |  3 +-
 7 files changed, 74 insertions(+), 3 deletions(-)

-- 
2.20.1

[PATCH v3 1/3] KVM: x86: add support for user wait instructions

2019-06-16 Thread Tao Xu

UMONITOR, UMWAIT and TPAUSE are a set of user wait instructions.
This patch adds support for user wait instructions in KVM. Availability
of the user wait instructions is indicated by the presence of the CPUID
feature flag WAITPKG CPUID.0x07.0x0:ECX[5]. User wait instructions may
be executed at any privilege level, and use IA32_UMWAIT_CONTROL MSR to
set the maximum time.

The behavior of user wait instructions in VMX non-root operation is
determined first by the setting of the "enable user wait and pause"
secondary processor-based VM-execution control bit 26.
If the VM-execution control is 0, UMONITOR/UMWAIT/TPAUSE cause
an invalid-opcode exception (#UD).
If the VM-execution control is 1, treatment is based on the
setting of the “RDTSC exiting” VM-execution control. Because KVM never
enables RDTSC exiting, if the instruction causes a delay, the amount of
time delayed is called here the physical delay. The physical delay is
first computed by determining the virtual delay. If
IA32_UMWAIT_CONTROL[31:2] is zero, the virtual delay is the value in
EDX:EAX minus the value that RDTSC would return; if
IA32_UMWAIT_CONTROL[31:2] is not zero, the virtual delay is the minimum
of that difference and AND(IA32_UMWAIT_CONTROL,FFFCH).

Because umwait and tpause can put a (psysical) CPU into a power saving
state, by default we dont't expose it to kvm and enable it only when
guest CPUID has it.

Detailed information about user wait instructions can be found in the
latest Intel 64 and IA-32 Architectures Software Developer's Manual.

Co-developed-by: Jingqi Liu 
Signed-off-by: Jingqi Liu 
Signed-off-by: Tao Xu 
---
 arch/x86/include/asm/vmx.h  | 1 +
 arch/x86/kvm/cpuid.c| 2 +-
 arch/x86/kvm/vmx/capabilities.h | 6 ++
 arch/x86/kvm/vmx/vmx.c  | 4 
 4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index a39136b0d509..8f00882664d3 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
 #define SECONDARY_EXEC_PT_USE_GPA  0x0100
 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x0040
 #define SECONDARY_EXEC_TSC_SCALING  0x0200
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE   0x0400
 
 #define PIN_BASED_EXT_INTR_MASK 0x0001
 #define PIN_BASED_NMI_EXITING   0x0008
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e18a9f9f65b5..48bd851a6ae5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -405,7 +405,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-   F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
+   F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
 
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index d6664ee3d127..fd77e17651b4 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -253,6 +253,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void)
SECONDARY_EXEC_TSC_SCALING;
 }
 
+static inline bool vmx_waitpkg_supported(void)
+{
+   return vmcs_config.cpu_based_2nd_exec_ctrl &
+   SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+}
+
 static inline bool cpu_has_vmx_apicv(void)
 {
return cpu_has_vmx_apic_register_virt() &&
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b93e36ddee5e..b35bfac30a34 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2250,6 +2250,7 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf,
SECONDARY_EXEC_RDRAND_EXITING |
SECONDARY_EXEC_ENABLE_PML |
SECONDARY_EXEC_TSC_SCALING |
+   SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
SECONDARY_EXEC_PT_USE_GPA |
SECONDARY_EXEC_PT_CONCEAL_VMX |
SECONDARY_EXEC_ENABLE_VMFUNC |
@@ -3987,6 +3988,9 @@ static void vmx_compute_secondary_exec_control(struct 
vcpu_vmx *vmx)
}
}
 
+   if (!guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG))
+   exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+
vmx->secondary_exec_control = exec_control;
 }
 
-- 
2.20.1

[PATCH v3 3/3] KVM: vmx: handle vm-exit for UMWAIT and TPAUSE

2019-06-16 Thread Tao Xu

As the latest Intel 64 and IA-32 Architectures Software Developer's
Manual, UMWAIT and TPAUSE instructions cause a VM exit if the
RDTSC exiting and enable user wait and pause VM-execution
controls are both 1.

This patch is to handle the vm-exit for UMWAIT and TPAUSE as this
should never happen.

Co-developed-by: Jingqi Liu 
Signed-off-by: Jingqi Liu 
Signed-off-by: Tao Xu 
---
 arch/x86/include/uapi/asm/vmx.h |  6 +-
 arch/x86/kvm/vmx/vmx.c  | 16 
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index d213ec5c3766..d88d7a68849b 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -85,6 +85,8 @@
 #define EXIT_REASON_PML_FULL62
 #define EXIT_REASON_XSAVES  63
 #define EXIT_REASON_XRSTORS 64
+#define EXIT_REASON_UMWAIT  67
+#define EXIT_REASON_TPAUSE  68
 
 #define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -142,7 +144,9 @@
{ EXIT_REASON_RDSEED,"RDSEED" }, \
{ EXIT_REASON_PML_FULL,  "PML_FULL" }, \
{ EXIT_REASON_XSAVES,"XSAVES" }, \
-   { EXIT_REASON_XRSTORS,   "XRSTORS" }
+   { EXIT_REASON_XRSTORS,   "XRSTORS" }, \
+   { EXIT_REASON_UMWAIT,"UMWAIT" }, \
+   { EXIT_REASON_TPAUSE,"TPAUSE" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL1
 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL   2
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f33a25e82cb8..386bd68f8d0b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5335,6 +5335,20 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
return handle_nop(vcpu);
 }
 
+static int handle_umwait(struct kvm_vcpu *vcpu)
+{
+   kvm_skip_emulated_instruction(vcpu);
+   WARN(1, "this should never happen\n");
+   return 1;
+}
+
+static int handle_tpause(struct kvm_vcpu *vcpu)
+{
+   kvm_skip_emulated_instruction(vcpu);
+   WARN(1, "this should never happen\n");
+   return 1;
+}
+
 static int handle_invpcid(struct kvm_vcpu *vcpu)
 {
u32 vmx_instruction_info;
@@ -5545,6 +5559,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu 
*vcpu) = {
[EXIT_REASON_VMFUNC]  = handle_vmx_instruction,
[EXIT_REASON_PREEMPTION_TIMER]= handle_preemption_timer,
[EXIT_REASON_ENCLS]   = handle_encls,
+   [EXIT_REASON_UMWAIT]  = handle_umwait,
+   [EXIT_REASON_TPAUSE]  = handle_tpause,
 };
 
 static const int kvm_vmx_max_exit_handlers =
-- 
2.20.1

Re: [PATCH] perf: Don't hardcode host include path for libslang

2019-06-16 Thread Jiri Olsa

On Fri, Jun 14, 2019 at 11:39:47AM -0700, Florian Fainelli wrote:
> Hardcoding /usr/include/slang is fundamentally incompatible with cross
> compilation and will lead to the inability for a cross-compiled
> environment to properly detect whether slang is available or not.
> 
> If /usr/include/slang is necessary that is a distribution specific
> knowledge that could be solved with either a standard pkg-config .pc
> file (which slang has) or simply overriding CFLAGS accordingly, but the
> default perf Makefile should be clean of all of that.

fedora 30 is ok with this, I guess acme's distro test will
tell us about the rest ;-)

jirka

> 
> Signed-off-by: Florian Fainelli 
> ---
>  tools/build/feature/Makefile | 2 +-
>  tools/perf/Makefile.config   | 1 -
>  2 files changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
> index 4b8244ee65ce..f9432d21eff9 100644
> --- a/tools/build/feature/Makefile
> +++ b/tools/build/feature/Makefile
> @@ -181,7 +181,7 @@ $(OUTPUT)test-libaudit.bin:
>   $(BUILD) -laudit
>  
>  $(OUTPUT)test-libslang.bin:
> - $(BUILD) -I/usr/include/slang -lslang
> + $(BUILD) -lslang
>  
>  $(OUTPUT)test-libcrypto.bin:
>   $(BUILD) -lcrypto
> diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
> index 85fbcd265351..b11134fdf59f 100644
> --- a/tools/perf/Makefile.config
> +++ b/tools/perf/Makefile.config
> @@ -641,7 +641,6 @@ ifndef NO_SLANG
>  NO_SLANG := 1
>else
>  # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
> -CFLAGS += -I/usr/include/slang
>  CFLAGS += -DHAVE_SLANG_SUPPORT
>  EXTLIBS += -lslang
>  $(call detected,CONFIG_SLANG)
> -- 
> 2.17.1
>

Re: [PATCH v2 8/8] habanalabs: enable 64-bit DMA mask in POWER9

2019-06-16 Thread Christoph Hellwig

On Sat, Jun 15, 2019 at 03:12:36PM +0300, Oded Gabbay wrote:
> So after the dust has settled a bit, do you think it is reasonable to
> add this patch upstream ?

I'm not Greg, but the answer is a very clear no.  drivers have abslutely
no business adding these hacks.

Re: [PATCH v2 2/5] perf pmu: Support more complex PMU event aliasing

2019-06-16 Thread Jiri Olsa

On Fri, Jun 14, 2019 at 10:08:00PM +0800, John Garry wrote:
> The jevent "Unit" field is used for uncore PMU alias definition.
> 
> The form uncore_pmu_example_X is supported, where "X" is a wildcard,
> to support multiple instances of the same PMU in a system.
> 
> Unfortunately this format not suitable for all uncore PMUs; take the Hisi
> DDRC uncore PMU for example, where the name is in the form
> hisi_scclX_ddrcY.
> 
> For the current jevent parsing, we would be required to hardcode an uncore
> alias translation for each possible value of X. This is not scalable.
> 
> Instead, add support for "Unit" field in the form "hisi_sccl,ddrc", where
> we can match by hisi_scclX and ddrcY. Tokens in Unit field are 
> delimited by ','.
> 
> Signed-off-by: John Garry 
> ---
>  tools/perf/util/pmu.c | 39 ++-
>  1 file changed, 34 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index 7e7299fee550..bc71c60589b5 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -700,6 +700,39 @@ struct pmu_events_map *perf_pmu__find_map(struct 
> perf_pmu *pmu)
>   return map;
>  }
>  
> +static bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
> +{
> + char *tmp, *tok, *str;
> + bool res;
> +
> + str = strdup(pmu_name);
> + if (!str)
> + return false;
> +
> + /*
> +  * uncore alias may be from different PMU with common
> +  * prefix or matching tokens.
> +  */
> + tok = strtok_r(str, ",", &tmp);
> + if (strncmp(pmu_name, tok, strlen(tok))) {

if tok is NULL in here we crash

> + res = false;
> + goto out;
> + }
> +
> + for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) {

why is name shifted in here?

jirka

> + name = strstr(name, tok);
> + if (!name) {
> + res = false;
> + goto out;
> + }
> + }
> +
> + res = true;
> +out:
> + free(str);
> + return res;
> +}
> +
>  /*
>   * From the pmu_events_map, find the table of PMU events that corresponds
>   * to the current running CPU. Then, add all PMU events from that table
> @@ -730,12 +763,8 @@ static void pmu_add_cpu_aliases(struct list_head *head, 
> struct perf_pmu *pmu)
>   break;
>   }
>  
> - /*
> -  * uncore alias may be from different PMU
> -  * with common prefix
> -  */
>   if (pmu_is_uncore(name) &&
> - !strncmp(pname, name, strlen(pname)))
> + pmu_uncore_alias_match(pname, name))
>   goto new_alias;
>  
>   if (strcmp(pname, name))
> -- 
> 2.17.1
>

[PATCH RESEND v3 1/3] KVM: x86: add support for user wait instructions

2019-06-16 Thread Tao Xu

UMONITOR, UMWAIT and TPAUSE are a set of user wait instructions.
This patch adds support for user wait instructions in KVM. Availability
of the user wait instructions is indicated by the presence of the CPUID
feature flag WAITPKG CPUID.0x07.0x0:ECX[5]. User wait instructions may
be executed at any privilege level, and use IA32_UMWAIT_CONTROL MSR to
set the maximum time.

The behavior of user wait instructions in VMX non-root operation is
determined first by the setting of the "enable user wait and pause"
secondary processor-based VM-execution control bit 26.
If the VM-execution control is 0, UMONITOR/UMWAIT/TPAUSE cause
an invalid-opcode exception (#UD).
If the VM-execution control is 1, treatment is based on the
setting of the “RDTSC exiting” VM-execution control. Because KVM never
enables RDTSC exiting, if the instruction causes a delay, the amount of
time delayed is called here the physical delay. The physical delay is
first computed by determining the virtual delay. If
IA32_UMWAIT_CONTROL[31:2] is zero, the virtual delay is the value in
EDX:EAX minus the value that RDTSC would return; if
IA32_UMWAIT_CONTROL[31:2] is not zero, the virtual delay is the minimum
of that difference and AND(IA32_UMWAIT_CONTROL,FFFCH).

Because umwait and tpause can put a (psysical) CPU into a power saving
state, by default we dont't expose it to kvm and enable it only when
guest CPUID has it.

Detailed information about user wait instructions can be found in the
latest Intel 64 and IA-32 Architectures Software Developer's Manual.

Co-developed-by: Jingqi Liu 
Signed-off-by: Jingqi Liu 
Signed-off-by: Tao Xu 
---
 arch/x86/include/asm/vmx.h  | 1 +
 arch/x86/kvm/cpuid.c| 2 +-
 arch/x86/kvm/vmx/capabilities.h | 6 ++
 arch/x86/kvm/vmx/vmx.c  | 4 
 4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index a39136b0d509..8f00882664d3 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
 #define SECONDARY_EXEC_PT_USE_GPA  0x0100
 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x0040
 #define SECONDARY_EXEC_TSC_SCALING  0x0200
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE   0x0400
 
 #define PIN_BASED_EXT_INTR_MASK 0x0001
 #define PIN_BASED_NMI_EXITING   0x0008
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e18a9f9f65b5..48bd851a6ae5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -405,7 +405,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-   F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
+   F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
 
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index d6664ee3d127..fd77e17651b4 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -253,6 +253,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void)
SECONDARY_EXEC_TSC_SCALING;
 }
 
+static inline bool vmx_waitpkg_supported(void)
+{
+   return vmcs_config.cpu_based_2nd_exec_ctrl &
+   SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+}
+
 static inline bool cpu_has_vmx_apicv(void)
 {
return cpu_has_vmx_apic_register_virt() &&
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b93e36ddee5e..b35bfac30a34 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2250,6 +2250,7 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf,
SECONDARY_EXEC_RDRAND_EXITING |
SECONDARY_EXEC_ENABLE_PML |
SECONDARY_EXEC_TSC_SCALING |
+   SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
SECONDARY_EXEC_PT_USE_GPA |
SECONDARY_EXEC_PT_CONCEAL_VMX |
SECONDARY_EXEC_ENABLE_VMFUNC |
@@ -3987,6 +3988,9 @@ static void vmx_compute_secondary_exec_control(struct 
vcpu_vmx *vmx)
}
}
 
+   if (!guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG))
+   exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+
vmx->secondary_exec_control = exec_control;
 }
 
-- 
2.20.1

[PATCH RESEND v3 2/3] KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL

2019-06-16 Thread Tao Xu

UMWAIT and TPAUSE instructions use IA32_UMWAIT_CONTROL at MSR index E1H
to determines the maximum time in TSC-quanta that the processor can reside
in either C0.1 or C0.2.

This patch emulates MSR IA32_UMWAIT_CONTROL in guest and differentiate
IA32_UMWAIT_CONTROL between host and guest. The variable
mwait_control_cached in arch/x86/power/umwait.c caches the MSR value, so
this patch uses it to avoid frequently rdmsr of IA32_UMWAIT_CONTROL.

Co-developed-by: Jingqi Liu 
Signed-off-by: Jingqi Liu 
Signed-off-by: Tao Xu 
---
 arch/x86/kvm/vmx/vmx.c  | 36 
 arch/x86/kvm/vmx/vmx.h  |  3 +++
 arch/x86/power/umwait.c |  3 ++-
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b35bfac30a34..f33a25e82cb8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1679,6 +1679,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
 #endif
case MSR_EFER:
return kvm_get_msr_common(vcpu, msr_info);
+   case MSR_IA32_UMWAIT_CONTROL:
+   if (!vmx_waitpkg_supported())
+   return 1;
+
+   msr_info->data = vmx->msr_ia32_umwait_control;
+   break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -1841,6 +1847,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
return 1;
vmcs_write64(GUEST_BNDCFGS, data);
break;
+   case MSR_IA32_UMWAIT_CONTROL:
+   if (!vmx_waitpkg_supported())
+   return 1;
+
+   if (!data)
+   break;
+
+   vmx->msr_ia32_umwait_control = data;
+   break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -4126,6 +4141,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool 
init_event)
vmx->rmode.vm86_active = 0;
vmx->spec_ctrl = 0;
 
+   vmx->msr_ia32_umwait_control = 0;
+
vcpu->arch.microcode_version = 0x1ULL;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(vcpu, 0);
@@ -6339,6 +6356,23 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host, false);
 }
 
+static void atomic_switch_ia32_umwait_control(struct vcpu_vmx *vmx)
+{
+   u64 host_umwait_control;
+
+   if (!vmx_waitpkg_supported())
+   return;
+
+   host_umwait_control = umwait_control_cached;
+
+   if (vmx->msr_ia32_umwait_control != host_umwait_control)
+   add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
+ vmx->msr_ia32_umwait_control,
+ host_umwait_control, false);
+   else
+   clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
+}
+
 static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
 {
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
@@ -6447,6 +6481,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
atomic_switch_perf_msrs(vmx);
 
+   atomic_switch_ia32_umwait_control(vmx);
+
vmx_update_hv_timer(vcpu);
 
/*
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 61128b48c503..8485bec7c38a 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -14,6 +14,8 @@
 extern const u32 vmx_msr_index[];
 extern u64 host_efer;
 
+extern u32 umwait_control_cached;
+
 #define MSR_TYPE_R 1
 #define MSR_TYPE_W 2
 #define MSR_TYPE_RW3
@@ -194,6 +196,7 @@ struct vcpu_vmx {
 #endif
 
u64   spec_ctrl;
+   u64   msr_ia32_umwait_control;
 
u32 vm_entry_controls_shadow;
u32 vm_exit_controls_shadow;
diff --git a/arch/x86/power/umwait.c b/arch/x86/power/umwait.c
index 7fa381e3fd4e..2e6ce4cbccb3 100644
--- a/arch/x86/power/umwait.c
+++ b/arch/x86/power/umwait.c
@@ -9,7 +9,8 @@
  * MSR value. By default, umwait max time is 10 in TSC-quanta and C0.2
  * is enabled
  */
-static u32 umwait_control_cached = 10;
+u32 umwait_control_cached = 10;
+EXPORT_SYMBOL_GPL(umwait_control_cached);
 
 /*
  * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR
-- 
2.20.1

[PATCH RESEND v3 0/3] KVM: x86: Enable user wait instructions

2019-06-16 Thread Tao Xu

UMONITOR, UMWAIT and TPAUSE are a set of user wait instructions.

UMONITOR arms address monitoring hardware using an address. A store
to an address within the specified address range triggers the
monitoring hardware to wake up the processor waiting in umwait.

UMWAIT instructs the processor to enter an implementation-dependent
optimized state while monitoring a range of addresses. The optimized
state may be either a light-weight power/performance optimized state
(c0.1 state) or an improved power/performance optimized state
(c0.2 state).

TPAUSE instructs the processor to enter an implementation-dependent
optimized state c0.1 or c0.2 state and wake up when time-stamp counter
reaches specified timeout.

Availability of the user wait instructions is indicated by the presence
of the CPUID feature flag WAITPKG CPUID.0x07.0x0:ECX[5].

The patches enable the umonitor, umwait and tpause features in KVM.
Because umwait and tpause can put a (psysical) CPU into a power saving
state, by default we dont't expose it to kvm and enable it only when
guest CPUID has it. If the instruction causes a delay, the amount
of time delayed is called here the physical delay. The physical delay is
first computed by determining the virtual delay (the time to delay
relative to the VM’s timestamp counter). 

The release document ref below link:
Intel 64 and IA-32 Architectures Software Developer's Manual,
https://software.intel.com/sites/default/files/\
managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf
This patch has a dependency on https://lkml.org/lkml/2019/6/7/1206

Changelog:
v3:
Simplify the patches, expose user wait instructions when the
guest has CPUID (Paolo)
Use mwait_control_cached to avoid frequently rdmsr of
IA32_UMWAIT_CONTROL (Paolo and Xiaoyao)
Handle vm-exit for UMWAIT and TPAUSE as "never happen" (Paolo)
v2:
Separated from the series https://lkml.org/lkml/2018/7/10/160
Add provide a capability to enable UMONITOR, UMWAIT and TPAUSE 
v1:
Sent out with MOVDIRI/MOVDIR64B instructions patches

Tao Xu (3):
  KVM: x86: add support for user wait instructions
  KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL
  KVM: vmx: handle vm-exit for UMWAIT and TPAUSE

 arch/x86/include/asm/vmx.h  |  1 +
 arch/x86/include/uapi/asm/vmx.h |  6 +++-
 arch/x86/kvm/cpuid.c|  2 +-
 arch/x86/kvm/vmx/capabilities.h |  6 
 arch/x86/kvm/vmx/vmx.c  | 56 +
 arch/x86/kvm/vmx/vmx.h  |  3 ++
 arch/x86/power/umwait.c |  3 +-
 7 files changed, 74 insertions(+), 3 deletions(-)

-- 
2.20.1

[PATCH RESEND v3 3/3] KVM: vmx: handle vm-exit for UMWAIT and TPAUSE

2019-06-16 Thread Tao Xu

As the latest Intel 64 and IA-32 Architectures Software Developer's
Manual, UMWAIT and TPAUSE instructions cause a VM exit if the
RDTSC exiting and enable user wait and pause VM-execution
controls are both 1.

This patch is to handle the vm-exit for UMWAIT and TPAUSE as this
should never happen.

Co-developed-by: Jingqi Liu 
Signed-off-by: Jingqi Liu 
Signed-off-by: Tao Xu 
---
 arch/x86/include/uapi/asm/vmx.h |  6 +-
 arch/x86/kvm/vmx/vmx.c  | 16 
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index d213ec5c3766..d88d7a68849b 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -85,6 +85,8 @@
 #define EXIT_REASON_PML_FULL62
 #define EXIT_REASON_XSAVES  63
 #define EXIT_REASON_XRSTORS 64
+#define EXIT_REASON_UMWAIT  67
+#define EXIT_REASON_TPAUSE  68
 
 #define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -142,7 +144,9 @@
{ EXIT_REASON_RDSEED,"RDSEED" }, \
{ EXIT_REASON_PML_FULL,  "PML_FULL" }, \
{ EXIT_REASON_XSAVES,"XSAVES" }, \
-   { EXIT_REASON_XRSTORS,   "XRSTORS" }
+   { EXIT_REASON_XRSTORS,   "XRSTORS" }, \
+   { EXIT_REASON_UMWAIT,"UMWAIT" }, \
+   { EXIT_REASON_TPAUSE,"TPAUSE" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL1
 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL   2
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f33a25e82cb8..386bd68f8d0b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5335,6 +5335,20 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
return handle_nop(vcpu);
 }
 
+static int handle_umwait(struct kvm_vcpu *vcpu)
+{
+   kvm_skip_emulated_instruction(vcpu);
+   WARN(1, "this should never happen\n");
+   return 1;
+}
+
+static int handle_tpause(struct kvm_vcpu *vcpu)
+{
+   kvm_skip_emulated_instruction(vcpu);
+   WARN(1, "this should never happen\n");
+   return 1;
+}
+
 static int handle_invpcid(struct kvm_vcpu *vcpu)
 {
u32 vmx_instruction_info;
@@ -5545,6 +5559,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu 
*vcpu) = {
[EXIT_REASON_VMFUNC]  = handle_vmx_instruction,
[EXIT_REASON_PREEMPTION_TIMER]= handle_preemption_timer,
[EXIT_REASON_ENCLS]   = handle_encls,
+   [EXIT_REASON_UMWAIT]  = handle_umwait,
+   [EXIT_REASON_TPAUSE]  = handle_tpause,
 };
 
 static const int kvm_vmx_max_exit_handlers =
-- 
2.20.1

Re: kernel BUG at mm/swap_state.c:170!

2019-06-16 Thread Mikhail Gavrilov

Hi,
I finished today bisecting kernel.
And first bad commit for me was cd736d8b67fb22a85a68c1ee8020eb0d660615ec

Can you look into this?


$ git bisect log
git bisect start
# good: [a188339ca5a396acc588e5851ed7e19f66b0ebd9] Linux 5.2-rc1
git bisect good a188339ca5a396acc588e5851ed7e19f66b0ebd9
# good: [a188339ca5a396acc588e5851ed7e19f66b0ebd9] Linux 5.2-rc1
git bisect good a188339ca5a396acc588e5851ed7e19f66b0ebd9
# bad: [cd6c84d8f0cdc911df435bb075ba22ce3c605b07] Linux 5.2-rc2
git bisect bad cd6c84d8f0cdc911df435bb075ba22ce3c605b07
# bad: [060358de993f24562e884e265c4c57864a3a4141] treewide: Replace
GPLv2 boilerplate/reference with SPDX - rule 125
git bisect bad 060358de993f24562e884e265c4c57864a3a4141
# bad: [d53e860fd46f3d95c437bb67518f7374500de467] Merge branch 'linus'
of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
git bisect bad d53e860fd46f3d95c437bb67518f7374500de467
# bad: [34dcf6a1902ac214149a2742250ff03aa5346f3e] net: caif: fix the
value of size argument of snprintf
git bisect bad 34dcf6a1902ac214149a2742250ff03aa5346f3e
# bad: [c7d5ec26ea4adf450d9ab2b794e7735761a93af1] Merge
git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
git bisect bad c7d5ec26ea4adf450d9ab2b794e7735761a93af1
# good: [3d21b6525caeae45f08e2d3a07ddfdef64882b8b] selftests/bpf: add
prog detach to flow_dissector test
git bisect good 3d21b6525caeae45f08e2d3a07ddfdef64882b8b
# bad: [3ebe1bca58c85325c97a22d4fc3f5b5420752e6f] ppp: deflate: Fix
possible crash in deflate_init
git bisect bad 3ebe1bca58c85325c97a22d4fc3f5b5420752e6f
# bad: [d0a7e8cb3c9d7d4fa2bcdd557be19f0841e2a3be] NFC: Orphan the subsystem
git bisect bad d0a7e8cb3c9d7d4fa2bcdd557be19f0841e2a3be
# bad: [0fe9f173d6cda95874edeb413b1fa9907b5ae830] net: Always descend into dsa/
git bisect bad 0fe9f173d6cda95874edeb413b1fa9907b5ae830
# bad: [cd736d8b67fb22a85a68c1ee8020eb0d660615ec] tcp: fix retrans
timestamp on passive Fast Open
git bisect bad cd736d8b67fb22a85a68c1ee8020eb0d660615ec
# first bad commit: [cd736d8b67fb22a85a68c1ee8020eb0d660615ec] tcp:
fix retrans timestamp on passive Fast Open



--
Best Regards,
Mike Gavrilov.

On Tue, 11 Jun 2019 at 08:59, Mikhail Gavrilov
 wrote:
>
> On Wed, 29 May 2019 at 23:09, Michal Hocko  wrote:
> >
> >
> > Do you see the same with 5.2-rc1 resp. 5.1?
>
> I can say with 100% certainty that kernel tag 5.1 is not affected by this bug.
>
> Say anything about 5.2 rc1 is very difficult because occurs another
> problem due to which all file systems are switched to read only mode.
>
> And I am sure that since 5.2 rc2 this issue is begin occurring.
>
> I also able recorded much more kernel logs with netconsole and option
> memblock=debug. (attached as file here)
>
> Please help me.

Re: [PATCH v5 13/16] powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX

2019-06-16 Thread Andreas Schwab

On Jun 16 2019, christophe leroy  wrote:

> If any of registers IBATs 4 to 7 are used, could you adjust
> CONFIG_ETEXT_SHIFT so that only IBATs 0 to 3 be used, and check if
> suspend/resume works when IBATs 4 to 7 are not used ?

I forgot to remove my patch.  With only 0-3 used, suspend/resume works.

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

Re: [PATCH] staging: iio: ad7150: use ternary operating to ensure 0/1 value

2019-06-16 Thread Jonathan Cameron

On Fri, 14 Jun 2019 13:50:59 -0300
Melissa Wen  wrote:

> Remove idiom and use ternary operator for consistently trigger 0/1 value
> on variable declaration.
> 
> Signed-off-by: Melissa Wen 
Hi Melissa,

In general I would consider this unnecessary churn as, whilst
it's no longer a favoured idiom, it is extremely common in the
kernel.  However, as this is a staging cleanup, fair enough to
make it as 'nice as possible'! 

Applied to the togreg branch of iio.git and pushed out as testing
for the autobuilders to play with it.

Thanks,

Jonathan

> ---
>  drivers/staging/iio/cdc/ad7150.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/staging/iio/cdc/ad7150.c 
> b/drivers/staging/iio/cdc/ad7150.c
> index 8234da4b8c65..25598bf124fb 100644
> --- a/drivers/staging/iio/cdc/ad7150.c
> +++ b/drivers/staging/iio/cdc/ad7150.c
> @@ -350,8 +350,8 @@ static ssize_t ad7150_show_timeout(struct device *dev,
>  
>   /* use the event code for consistency reasons */
>   int chan = IIO_EVENT_CODE_EXTRACT_CHAN(this_attr->address);
> - int rising = !!(IIO_EVENT_CODE_EXTRACT_DIR(this_attr->address)
> - == IIO_EV_DIR_RISING);
> + int rising = (IIO_EVENT_CODE_EXTRACT_DIR(this_attr->address)
> +   == IIO_EV_DIR_RISING) ? 1 : 0;
>  
>   switch (IIO_EVENT_CODE_EXTRACT_TYPE(this_attr->address)) {
>   case IIO_EV_TYPE_MAG_ADAPTIVE:

Re: [PATCH v2 1/3] staging: iio: ad7150: use FIELD_GET and GENMASK

2019-06-16 Thread Jonathan Cameron

On Fri, 14 Jun 2019 13:32:21 -0300
Melissa Wen  wrote:

> Use the bitfield macro FIELD_GET, and GENMASK to do the shift and mask in
> one go. This makes the code more readable than explicit masking followed
> by a shift.
> 
> Signed-off-by: Melissa Wen 
Applied to the togreg branch of iio.git and pushed out as testing for
the autobuilders to paly with it.

Thanks,

Jonathan

> ---
>  drivers/staging/iio/cdc/ad7150.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/staging/iio/cdc/ad7150.c 
> b/drivers/staging/iio/cdc/ad7150.c
> index 8234da4b8c65..091aa33589d7 100644
> --- a/drivers/staging/iio/cdc/ad7150.c
> +++ b/drivers/staging/iio/cdc/ad7150.c
> @@ -5,6 +5,7 @@
>   * Copyright 2010-2011 Analog Devices Inc.
>   */
>  
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -45,6 +46,9 @@
>  #define AD7150_SN0 22
>  #define AD7150_ID  23
>  
> +/* AD7150 masks */
> +#define AD7150_THRESHTYPE_MSKGENMASK(6, 5)
> +
>  /**
>   * struct ad7150_chip_info - instance specific chip data
>   * @client: i2c client for this device
> @@ -137,7 +141,7 @@ static int ad7150_read_event_config(struct iio_dev 
> *indio_dev,
>   if (ret < 0)
>   return ret;
>  
> - threshtype = (ret >> 5) & 0x03;
> + threshtype = FIELD_GET(AD7150_THRESHTYPE_MSK, ret);
>   adaptive = !!(ret & 0x80);
>  
>   switch (type) {

Re: [PATCH v2 3/3] staging: iio: ad7150: clean up of comments

2019-06-16 Thread Jonathan Cameron

On Fri, 14 Jun 2019 13:33:19 -0300
Melissa Wen  wrote:

> General cleaning of comments to remove useless information or improve
> description.
> 
> Signed-off-by: Melissa Wen 
Applied,

Thanks,

Jonathan

> ---
>  drivers/staging/iio/cdc/ad7150.c | 11 ++-
>  1 file changed, 2 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/staging/iio/cdc/ad7150.c 
> b/drivers/staging/iio/cdc/ad7150.c
> index 7d56f10a19ed..51d6b52bce8b 100644
> --- a/drivers/staging/iio/cdc/ad7150.c
> +++ b/drivers/staging/iio/cdc/ad7150.c
> @@ -163,7 +163,8 @@ static int ad7150_read_event_config(struct iio_dev 
> *indio_dev,
>   return -EINVAL;
>  }
>  
> -/* lock should be held */
> +/* state_lock should be held to ensure consistent state*/
> +
>  static int ad7150_write_event_params(struct iio_dev *indio_dev,
>unsigned int chan,
>enum iio_event_type type,
> @@ -479,10 +480,6 @@ static const struct iio_chan_spec ad7150_channels[] = {
>   AD7150_CAPACITANCE_CHAN(1)
>  };
>  
> -/*
> - * threshold events
> - */
> -
>  static irqreturn_t ad7150_event_handler(int irq, void *private)
>  {
>   struct iio_dev *indio_dev = private;
> @@ -571,10 +568,6 @@ static const struct iio_info ad7150_info = {
>   .write_event_value = &ad7150_write_event_value,
>  };
>  
> -/*
> - * device probe and remove
> - */
> -
>  static int ad7150_probe(struct i2c_client *client,
>   const struct i2c_device_id *id)
>  {

Re: [PATCH v2 2/3] staging: iio: ad7150: simplify i2c SMBus return treatment

2019-06-16 Thread Jonathan Cameron

On Fri, 14 Jun 2019 13:32:54 -0300
Melissa Wen  wrote:

> Since i2c_smbus_write_byte_data returns no-positive value, this commit
> making the treatment of its return value less verbose.
> 
> Signed-off-by: Melissa Wen 
Applied to the togreg branch of iio.git and pushed out as testing for
the autobuilders to play with it.

Thanks,

Jonathan

> ---
>  drivers/staging/iio/cdc/ad7150.c | 9 ++---
>  1 file changed, 2 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/staging/iio/cdc/ad7150.c 
> b/drivers/staging/iio/cdc/ad7150.c
> index 091aa33589d7..7d56f10a19ed 100644
> --- a/drivers/staging/iio/cdc/ad7150.c
> +++ b/drivers/staging/iio/cdc/ad7150.c
> @@ -202,16 +202,11 @@ static int ad7150_write_event_params(struct iio_dev 
> *indio_dev,
>   ret = i2c_smbus_write_byte_data(chip->client,
>   ad7150_addresses[chan][4],
>   sens);
> - if (ret < 0)
> + if (ret)
>   return ret;
> -
> - ret = i2c_smbus_write_byte_data(chip->client,
> + return i2c_smbus_write_byte_data(chip->client,
>   ad7150_addresses[chan][5],
>   timeout);
> - if (ret < 0)
> - return ret;
> -
> - return 0;
>  }
>  
>  static int ad7150_write_event_config(struct iio_dev *indio_dev,

Re: [PATCH v2 bpf-next] bpf: sk_storage: Fix out of bounds memory access

2019-06-16 Thread Arthur Fabre

On Sat, Jun 15, 2019 at 10:45 PM Alexei Starovoitov
 wrote:
> It's certainly should be in bpf tree.
> It didn't apply directly, so I tweaked it a tiny bit,
> reduced verbosity of commit log and pushed to bpf tree.
> Thanks for the fix!

Thanks! I didn't realize this had already made it to the bpf tree.

Re: [PATCH] memcg: Ignore unprotected parent in mem_cgroup_protected()

2019-06-16 Thread Chris Down


Hi Xunlei,

Xunlei Pang writes:

docker and various types(different memory capacity) of containers
are managed by k8s, it's a burden for k8s to maintain those dynamic
figures, simply set "max" to key containers is always welcome.


Right, setting "max" is generally a fine way of going about it.


Set "max" to docker also protects docker cgroup memory(as docker
itself has tasks) unnecessarily.


That's not correct -- leaf memcgs have to _explicitly_ request memory 
protection. From the documentation:


   memory.low

   [...]

   Best-effort memory protection.  If the memory usages of a
   cgroup and all its ancestors are below their low boundaries,
   the cgroup's memory won't be reclaimed unless memory can be
   reclaimed from unprotected cgroups.

Note the part that the cgroup itself also must be within its low boundary, 
which is not implied simply by having ancestors that would permit propagation 
of protections.


In this case, Docker just shouldn't request it for those Docker-related tasks, 
and they won't get any. That seems a lot simpler and more intuitive than 
special casing "0" in ancestors.



This patch doesn't take effect on any intermediate layer with
positive memory.min set, it requires all the ancestors having
0 memory.min to work.

Nothing special change, but more flexible to business deployment...


Not so, this change is extremely "special". It violates the basic expectation 
that 0 means no possibility of propagation of protection, and I still don't see 
a compelling argument why Docker can't just set "max" in the intermediate 
cgroup and not accept any protection in leaf memcgs that it doesn't want 
protection for.

Re: [PATCH v2 8/8] habanalabs: enable 64-bit DMA mask in POWER9

2019-06-16 Thread Oded Gabbay

On Sun, Jun 16, 2019 at 12:55 PM Christoph Hellwig  wrote:
>
> On Sat, Jun 15, 2019 at 03:12:36PM +0300, Oded Gabbay wrote:
> > So after the dust has settled a bit, do you think it is reasonable to
> > add this patch upstream ?
>
> I'm not Greg, but the answer is a very clear no.  drivers have abslutely
> no business adding these hacks.

So the alternative is that my device won't work on POWER9. Does that make sense?
What is the reason for this logic?
I'm not adding code that will be used by other drivers/users.
I'm just doing a special configuration to my device's H/W and I
condition it upon the PCI device ID of my parent PCI device.
What is the harm in that?

Thanks,
Oded

[GIT pull] timer fixes for 5.2

2019-06-16 Thread Thomas Gleixner

Linus,

please pull the latest timers-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
timers-urgent-for-linus

up to:  e3ff9c3678b4: timekeeping: Repair ktime_get_coarse*() granularity

A set of small fixes:

 - Repair the ktime_get_coarse() functions so they actually deliver what
   they are supposed to: tick granular time stamps. The current code missed
   to add the accumulated nanoseconds part of the timekeeper so the
   resulting granularity was 1 second.

 - Prevent the tracer from infinitely recursing into time getter functions
   in the arm architectured timer by marking these functions notrace

 - Fix a trivial compiler warning caused by wrong qualifier ordering.

Thanks,

tglx

-->
Julien Thierry (1):
  clocksource/drivers/arm_arch_timer: Don't trace count reader functions

Philippe Mazenauer (1):
  clocksource/drivers/timer-ti-dm: Change to new style declaration

Thomas Gleixner (1):
  timekeeping: Repair ktime_get_coarse*() granularity


 drivers/clocksource/arm_arch_timer.c | 8 
 drivers/clocksource/timer-ti-dm.c| 2 +-
 kernel/time/timekeeping.c| 5 +++--
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/clocksource/arm_arch_timer.c 
b/drivers/clocksource/arm_arch_timer.c
index b2a951a798e2..5c69c9a9a6a4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -149,22 +149,22 @@ u32 arch_timer_reg_read(int access, enum arch_timer_reg 
reg,
return val;
 }
 
-static u64 arch_counter_get_cntpct_stable(void)
+static notrace u64 arch_counter_get_cntpct_stable(void)
 {
return __arch_counter_get_cntpct_stable();
 }
 
-static u64 arch_counter_get_cntpct(void)
+static notrace u64 arch_counter_get_cntpct(void)
 {
return __arch_counter_get_cntpct();
 }
 
-static u64 arch_counter_get_cntvct_stable(void)
+static notrace u64 arch_counter_get_cntvct_stable(void)
 {
return __arch_counter_get_cntvct_stable();
 }
 
-static u64 arch_counter_get_cntvct(void)
+static notrace u64 arch_counter_get_cntvct(void)
 {
return __arch_counter_get_cntvct();
 }
diff --git a/drivers/clocksource/timer-ti-dm.c 
b/drivers/clocksource/timer-ti-dm.c
index e40b55a7086f..5394d9dbdfbc 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -896,7 +896,7 @@ static int omap_dm_timer_remove(struct platform_device 
*pdev)
return ret;
 }
 
-const static struct omap_dm_timer_ops dmtimer_ops = {
+static const struct omap_dm_timer_ops dmtimer_ops = {
.request_by_node = omap_dm_timer_request_by_node,
.request_specific = omap_dm_timer_request_specific,
.request = omap_dm_timer_request,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 85f5912d8f70..44b726bab4bd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -808,17 +808,18 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base, *offset = offsets[offs];
+   u64 nsecs;
 
WARN_ON(timekeeping_suspended);
 
do {
seq = read_seqcount_begin(&tk_core.seq);
base = ktime_add(tk->tkr_mono.base, *offset);
+   nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
 
} while (read_seqcount_retry(&tk_core.seq, seq));
 
-   return base;
-
+   return base + nsecs;
 }
 EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);

[GIT pull] x86 fixes for 5.2

2019-06-16 Thread Thomas Gleixner

Linus,

please pull the latest x86-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
x86-urgent-for-linus

up to:  78f4e932f776: x86/microcode, cpuhotplug: Add a microcode loader CPU 
hotplug callback

The accumulated fixes from this and last week:

 - Fix vmalloc TLB flush and map range calculations which lead to stale
   TLBs, spurious faults and other hard to diagnose issues.

 - Use fault_in_pages_writable() for prefaulting the user stack in the FPU
   code as it's less fragile as the current solution

 - Use the PF_KTHREAD flag when checking for a kernel thread instead of
   current->mm as the latter can give the wrong answer due to use_mm()

 - Compute the vmemmap size correctly for KASLR and 5-Level paging. Otherwise
   this can end up with a way to small vmemmap area.

 - Make KASAN and 5-level paging work again by making sure that all invalid
   bits are masked out when computing the P4D offset. This worked before
   but got broken recently when the LDT remap area was moved.

 - Prevent a NULL pointer dereference in the resource control code which
   can be triggered with certain mount options when the requested resource
   is not available.

 - Enforce ordering of microcode loading vs. perf initialization on
   secondary CPUs. Otherwise perf tries to access a non-existing MSR as the
   boot CPU marked it as available.

 - Don't stop the resource control group walk early otherwise the control
   bitmaps are not updated correctly and become inconsistent.

 - Unbreak kgdb by returning 0 on success from kgdb_arch_set_breakpoint()
   instead of an error code.

 - Add more Icelake CPU model defines so depending changes can be queued in
   other trees

Thanks,

tglx

-->
Andrey Ryabinin (1):
  x86/kasan: Fix boot with 5-level paging and KASAN

Baoquan He (1):
  x86/mm/KASLR: Compute the size of the vmemmap section properly

Borislav Petkov (1):
  x86/microcode, cpuhotplug: Add a microcode loader CPU hotplug callback

Christoph Hellwig (1):
  x86/fpu: Don't use current->mm to check for a kthread

Hugh Dickins (1):
  x86/fpu: Use fault_in_pages_writeable() for pre-faulting

James Morse (1):
  x86/resctrl: Don't stop walking closids when a locksetup group is found

Kan Liang (1):
  x86/CPU: Add more Icelake model numbers

Matt Mullins (1):
  x86/kgdb: Return 0 from kgdb_arch_set_breakpoint()

Prarit Bhargava (1):
  x86/resctrl: Prevent NULL pointer dereference when local MBM is disabled

Rick Edgecombe (2):
  mm/vmalloc: Fix calculation of direct map addr range
  mm/vmalloc: Avoid rare case of flushing TLB with weird arguments

Sebastian Andrzej Siewior (1):
  x86/fpu: Update kernel's FPU state before using for the fsave header


 arch/x86/include/asm/fpu/internal.h|  6 +++---
 arch/x86/include/asm/intel-family.h|  3 +++
 arch/x86/kernel/cpu/microcode/core.c   |  2 +-
 arch/x86/kernel/cpu/resctrl/monitor.c  |  3 +++
 arch/x86/kernel/cpu/resctrl/rdtgroup.c |  7 ++-
 arch/x86/kernel/fpu/core.c |  2 +-
 arch/x86/kernel/fpu/signal.c   | 16 +++-
 arch/x86/kernel/kgdb.c |  2 +-
 arch/x86/mm/kasan_init_64.c|  2 +-
 arch/x86/mm/kaslr.c| 11 ++-
 include/linux/cpuhotplug.h |  1 +
 mm/vmalloc.c   | 14 --
 12 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h 
b/arch/x86/include/asm/fpu/internal.h
index 9e27fa05a7ae..4c95c365058a 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -536,7 +536,7 @@ static inline void __fpregs_load_activate(void)
struct fpu *fpu = ¤t->thread.fpu;
int cpu = smp_processor_id();
 
-   if (WARN_ON_ONCE(current->mm == NULL))
+   if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
return;
 
if (!fpregs_state_valid(fpu, cpu)) {
@@ -567,11 +567,11 @@ static inline void __fpregs_load_activate(void)
  * otherwise.
  *
  * The FPU context is only stored/restored for a user task and
- * ->mm is used to distinguish between kernel and user threads.
+ * PF_KTHREAD is used to distinguish between kernel and user threads.
  */
 static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-   if (static_cpu_has(X86_FEATURE_FPU) && current->mm) {
+   if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
diff --git a/arch/x86/include/asm/intel-family.h 
b/arch/x86/include/asm/intel-family.h
index 9f15384c504a..310118805f57 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -52,6 +52,9 @@
 
 #define INTEL_FAM6_CANNONLAKE_MOBILE   0x66
 
+#define INTEL_FAM6_ICELAKE_X   0x6A
+#define INTEL_FAM6_ICELAKE_XEON_D

[GIT pull] ras fixes for 5.2

2019-06-16 Thread Thomas Gleixner

Linus,

please pull the latest ras-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
ras-urgent-for-linus

up to:  0ade0b6240c4: RAS/CEC: Convert the timer callback to a workqueue

Two small fixes for RAS:

 - Use a proper search algorithm to find the correct element in the CEC
   array. The replacement was a better choice than fixing the crash causes
   by the original search function with horrible duct tape.

 - Move the timer based decay function into thread context so it can
   actually acquire the mutex which protects the CEC array to prevent
   corruption.

Thanks,

tglx

-->
Borislav Petkov (1):
  RAS/CEC: Fix binary search function

Cong Wang (1):
  RAS/CEC: Convert the timer callback to a workqueue


 drivers/ras/cec.c | 80 +--
 1 file changed, 42 insertions(+), 38 deletions(-)

diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 88e4f3ff0cb8..673f8a128397 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -2,6 +2,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -123,16 +124,12 @@ static u64 dfs_pfn;
 /* Amount of errors after which we offline */
 static unsigned int count_threshold = COUNT_MASK;
 
-/*
- * The timer "decays" element count each timer_interval which is 24hrs by
- * default.
- */
-
-#define CEC_TIMER_DEFAULT_INTERVAL 24 * 60 * 60/* 24 hrs */
-#define CEC_TIMER_MIN_INTERVAL  1 * 60 * 60/* 1h */
-#define CEC_TIMER_MAX_INTERVAL30 * 24 * 60 * 60/* one month */
-static struct timer_list cec_timer;
-static u64 timer_interval = CEC_TIMER_DEFAULT_INTERVAL;
+/* Each element "decays" each decay_interval which is 24hrs by default. */
+#define CEC_DECAY_DEFAULT_INTERVAL 24 * 60 * 60/* 24 hrs */
+#define CEC_DECAY_MIN_INTERVAL  1 * 60 * 60/* 1h */
+#define CEC_DECAY_MAX_INTERVAL30 * 24 * 60 * 60/* one month */
+static struct delayed_work cec_work;
+static u64 decay_interval = CEC_DECAY_DEFAULT_INTERVAL;
 
 /*
  * Decrement decay value. We're using DECAY_BITS bits to denote decay of an
@@ -160,20 +157,21 @@ static void do_spring_cleaning(struct ce_array *ca)
 /*
  * @interval in seconds
  */
-static void cec_mod_timer(struct timer_list *t, unsigned long interval)
+static void cec_mod_work(unsigned long interval)
 {
unsigned long iv;
 
-   iv = interval * HZ + jiffies;
-
-   mod_timer(t, round_jiffies(iv));
+   iv = interval * HZ;
+   mod_delayed_work(system_wq, &cec_work, round_jiffies(iv));
 }
 
-static void cec_timer_fn(struct timer_list *unused)
+static void cec_work_fn(struct work_struct *work)
 {
+   mutex_lock(&ce_mutex);
do_spring_cleaning(&ce_arr);
+   mutex_unlock(&ce_mutex);
 
-   cec_mod_timer(&cec_timer, timer_interval);
+   cec_mod_work(decay_interval);
 }
 
 /*
@@ -183,32 +181,38 @@ static void cec_timer_fn(struct timer_list *unused)
  */
 static int __find_elem(struct ce_array *ca, u64 pfn, unsigned int *to)
 {
+   int min = 0, max = ca->n - 1;
u64 this_pfn;
-   int min = 0, max = ca->n;
 
-   while (min < max) {
-   int tmp = (max + min) >> 1;
+   while (min <= max) {
+   int i = (min + max) >> 1;
 
-   this_pfn = PFN(ca->array[tmp]);
+   this_pfn = PFN(ca->array[i]);
 
if (this_pfn < pfn)
-   min = tmp + 1;
+   min = i + 1;
else if (this_pfn > pfn)
-   max = tmp;
-   else {
-   min = tmp;
-   break;
+   max = i - 1;
+   else if (this_pfn == pfn) {
+   if (to)
+   *to = i;
+
+   return i;
}
}
 
+   /*
+* When the loop terminates without finding @pfn, min has the index of
+* the element slot where the new @pfn should be inserted. The loop
+* terminates when min > max, which means the min index points to the
+* bigger element while the max index to the smaller element, in-between
+* which the new @pfn belongs to.
+*
+* For more details, see exercise 1, Section 6.2.1 in TAOCP, vol. 3.
+*/
if (to)
*to = min;
 
-   this_pfn = PFN(ca->array[min]);
-
-   if (this_pfn == pfn)
-   return min;
-
return -ENOKEY;
 }
 
@@ -374,15 +378,15 @@ static int decay_interval_set(void *data, u64 val)
 {
*(u64 *)data = val;
 
-   if (val < CEC_TIMER_MIN_INTERVAL)
+   if (val < CEC_DECAY_MIN_INTERVAL)
return -EINVAL;
 
-   if (val > CEC_TIMER_MAX_INTERVAL)
+   if (val > CEC_DECAY_MAX_INTERVAL)
return -EINVAL;
 
-   timer_interval = val;
+   decay_interval = val;
 
-   cec_mod_timer(&cec_timer, timer_interval);
+   cec_mod_work

Re: [PATCH net-next 2/2 v5] netns: restrict uevents

2019-06-16 Thread Eric W. Biederman

Dmitry Torokhov  writes:

> Hi Christian,
>
> On Sun, Apr 29, 2018 at 3:45 AM Christian Brauner
>  wrote:
>>
>> commit 07e98962fa77 ("kobject: Send hotplug events in all network 
>> namespaces")
>>abhishe...@google.com
>> enabled sending hotplug events into all network namespaces back in 2010.
>> Over time the set of uevents that get sent into all network namespaces has
>> shrunk. We have now reached the point where hotplug events for all devices
>> that carry a namespace tag are filtered according to that namespace.
>> Specifically, they are filtered whenever the namespace tag of the kobject
>> does not match the namespace tag of the netlink socket.
>> Currently, only network devices carry namespace tags (i.e. network
>> namespace tags). Hence, uevents for network devices only show up in the
>> network namespace such devices are created in or moved to.
>>
>> However, any uevent for a kobject that does not have a namespace tag
>> associated with it will not be filtered and we will broadcast it into all
>> network namespaces. This behavior stopped making sense when user namespaces
>> were introduced.
>>
>> This patch simplifies and fixes couple of things:
>> - Split codepath for sending uevents by kobject namespace tags:
>>   1. Untagged kobjects - uevent_net_broadcast_untagged():
>>  Untagged kobjects will be broadcast into all uevent sockets recorded
>>  in uevent_sock_list, i.e. into all network namespacs owned by the
>>  intial user namespace.
>>   2. Tagged kobjects - uevent_net_broadcast_tagged():
>>  Tagged kobjects will only be broadcast into the network namespace they
>>  were tagged with.
>>   Handling of tagged kobjects in 2. does not cause any semantic changes.
>>   This is just splitting out the filtering logic that was handled by
>>   kobj_bcast_filter() before.
>>   Handling of untagged kobjects in 1. will cause a semantic change. The
>>   reasons why this is needed and ok have been discussed in [1]. Here is a
>>   short summary:
>>   - Userspace ignores uevents from network namespaces that are not owned by
>> the intial user namespace:
>> Uevents are filtered by userspace in a user namespace because the
>> received uid != 0. Instead the uid associated with the event will be
>> 65534 == "nobody" because the global root uid is not mapped.
>> This means we can safely and without introducing regressions modify the
>> kernel to not send uevents into all network namespaces whose owning
>> user namespace is not the initial user namespace because we know that
>> userspace will ignore the message because of the uid anyway.
>> I have a) verified that is is true for every udev implementation out
>> there b) that this behavior has been present in all udev
>> implementations from the very beginning.
>
> Unfortunately udev is not the only consumer of uevents, for example on
> Android there is healthd that also consumes uevents, and this
> particular change broke Android running in a container on Chrome OS.
> Can this be reverted? Or, if we want to keep this, how can containers
> that use separate user namespace still listen to uevents?

The code has been in the main tree for over a year so at a minimum
reverting this has the real chance of causing a regression for
folks like lxc.

I don't think Android running in a container on Chrome OS was even
available when this change was merged.  So I don't think this falls
under the ordinary no regression rules.

I may be wrong but I think this is a case of developing new code on an
old kernel and developing a dependence on a bug that had already been
fixed in newer kernels.  I know Christian did his best to reach out to
everyone when this change came through, so only getting a bug report
over a year after the code was merged is concerning.

That said uevents should be completely useless in a user namespace
except as letting you know something happened.  Is that what healthd
is using them for?


One solution would be to tweak the container userspace on ChromeOS to
listen to the uevents outside the container and to relay them into the
Android container.

Eric

Re: [PATCH] memcg: Ignore unprotected parent in mem_cgroup_protected()

2019-06-16 Thread Xunlei Pang

Hi Chris,

On 2019/6/16 PM 6:37, Chris Down wrote:
> Hi Xunlei,
> 
> Xunlei Pang writes:
>> docker and various types(different memory capacity) of containers
>> are managed by k8s, it's a burden for k8s to maintain those dynamic
>> figures, simply set "max" to key containers is always welcome.
> 
> Right, setting "max" is generally a fine way of going about it.
> 
>> Set "max" to docker also protects docker cgroup memory(as docker
>> itself has tasks) unnecessarily.
> 
> That's not correct -- leaf memcgs have to _explicitly_ request memory
> protection. From the documentation:
> 
>    memory.low
> 
>    [...]
> 
>    Best-effort memory protection.  If the memory usages of a
>    cgroup and all its ancestors are below their low boundaries,
>    the cgroup's memory won't be reclaimed unless memory can be
>    reclaimed from unprotected cgroups.
> 
> Note the part that the cgroup itself also must be within its low
> boundary, which is not implied simply by having ancestors that would
> permit propagation of protections.
> 
> In this case, Docker just shouldn't request it for those Docker-related
> tasks, and they won't get any. That seems a lot simpler and more
> intuitive than special casing "0" in ancestors.
> 
>> This patch doesn't take effect on any intermediate layer with
>> positive memory.min set, it requires all the ancestors having
>> 0 memory.min to work.
>>
>> Nothing special change, but more flexible to business deployment...
> 
> Not so, this change is extremely "special". It violates the basic
> expectation that 0 means no possibility of propagation of protection,
> and I still don't see a compelling argument why Docker can't just set
> "max" in the intermediate cgroup and not accept any protection in leaf
> memcgs that it doesn't want protection for.

I got the reason, I'm using cgroup v1(with memory.min backport)
which permits tasks existent in "docker" cgroup.procs.

For cgroup v2, it's not a problem.

Thanks,
Xunlei

Re: [PATCH v2 1/2] x86/mm: Identify the end of the kernel area to be reserved

2019-06-16 Thread lijiang

After applied the patch series(v2), the kexec-d kernel and the kdump kernel can
successfully boot.

Thanks.

Tested-by: Lianbo Jiang 

在 2019年06月15日 05:15, Lendacky, Thomas 写道:
> The memory occupied by the kernel is reserved using memblock_reserve()
> in setup_arch(). Currently, the area is from symbols _text to __bss_stop.
> Everything after __bss_stop must be specifically reserved otherwise it
> is discarded. This is not clearly documented.
> 
> Add a new symbol, __end_of_kernel_reserve, that more readily identifies
> what is reserved, along with comments that indicate what is reserved,
> what is discarded and what needs to be done to prevent a section from
> being discarded.
> 
> Cc: Baoquan He 
> Cc: Lianbo Jiang 
> Signed-off-by: Tom Lendacky 
> ---
>  arch/x86/include/asm/sections.h | 2 ++
>  arch/x86/kernel/setup.c | 8 +++-
>  arch/x86/kernel/vmlinux.lds.S   | 9 -
>  3 files changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
> index 8ea1cfdbeabc..71b32f2570ab 100644
> --- a/arch/x86/include/asm/sections.h
> +++ b/arch/x86/include/asm/sections.h
> @@ -13,4 +13,6 @@ extern char __end_rodata_aligned[];
>  extern char __end_rodata_hpage_align[];
>  #endif
>  
> +extern char __end_of_kernel_reserve[];
> +
>  #endif   /* _ASM_X86_SECTIONS_H */
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 08a5f4a131f5..32eb70625b3b 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -827,8 +827,14 @@ dump_kernel_offset(struct notifier_block *self, unsigned 
> long v, void *p)
>  
>  void __init setup_arch(char **cmdline_p)
>  {
> + /*
> +  * Reserve the memory occupied by the kernel between _text and
> +  * __end_of_kernel_reserve symbols. Any kernel sections after the
> +  * __end_of_kernel_reserve symbol must be explicity reserved with a
> +  * separate memblock_reserve() or it will be discarded.
> +  */
>   memblock_reserve(__pa_symbol(_text),
> -  (unsigned long)__bss_stop - (unsigned long)_text);
> +  (unsigned long)__end_of_kernel_reserve - (unsigned 
> long)_text);
>  
>   /*
>* Make sure page 0 is always reserved because on systems with
> diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
> index 0850b5149345..ca2252ca6ad7 100644
> --- a/arch/x86/kernel/vmlinux.lds.S
> +++ b/arch/x86/kernel/vmlinux.lds.S
> @@ -368,6 +368,14 @@ SECTIONS
>   __bss_stop = .;
>   }
>  
> + /*
> +  * The memory occupied from _text to here, __end_of_kernel_reserve, is
> +  * automatically reserved in setup_arch(). Anything after here must be
> +  * explicitly reserved using memblock_reserve() or it will be discarded
> +  * and treated as available memory.
> +  */
> + __end_of_kernel_reserve = .;
> +
>   . = ALIGN(PAGE_SIZE);
>   .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
>   __brk_base = .;
> @@ -382,7 +390,6 @@ SECTIONS
>   STABS_DEBUG
>   DWARF_DEBUG
>  
> - /* Sections to be discarded */
>   DISCARDS
>   /DISCARD/ : {
>   *(.eh_frame)
>

Re: [PATCH] powerpc/32: fix build failure on book3e with KVM

2019-06-16 Thread Michael Ellerman

On Thu, 2019-05-23 at 08:39:27 UTC, Christophe Leroy wrote:
> Build failure was introduced by the commit identified below,
> due to missed macro expension leading to wrong called function's name.
> 
> arch/powerpc/kernel/head_fsl_booke.o: In function `SystemCall':
> arch/powerpc/kernel/head_fsl_booke.S:416: undefined reference to 
> `kvmppc_handler_BOOKE_INTERRUPT_SYSCALL_SPRN_SRR1'
> Makefile:1052: recipe for target 'vmlinux' failed
> 
> The called function should be kvmppc_handler_8_0x01B(). This patch fixes it.
> 
> Reported-by: Paul Mackerras 
> Fixes: 1a4b739bbb4f ("powerpc/32: implement fast entry for syscalls on BOOKE")
> Signed-off-by: Christophe Leroy 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/82f6e266f8123d7938713c0e10c03aa655b3e68a

cheers

Re: [PATCH] powerpc/32s: fix initial setup of segment registers on secondary CPU

2019-06-16 Thread Michael Ellerman

On Tue, 2019-06-11 at 15:47:20 UTC, Christophe Leroy wrote:
> The patch referenced below moved the loading of segment registers
> out of load_up_mmu() in order to do it earlier in the boot sequence.
> However, the secondary CPU still needs it to be done when loading up
> the MMU.
> 
> Reported-by: Erhard F. 
> Fixes: 215b823707ce ("powerpc/32s: set up an early static hash table for 
> KASAN")
> Signed-off-by: Christophe Leroy 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/b7f8b440f3001cc1775c028f0a783786113c2ae3

cheers

Re: [PATCH] powerpc/booke: fix fast syscall entry on SMP

2019-06-16 Thread Michael Ellerman

On Thu, 2019-06-13 at 13:52:30 UTC, Christophe Leroy wrote:
> Use r10 instead of r9 to calculate CPU offset as r9 contains
> the value from SRR1 which is used later.
> 
> Fixes: 1a4b739bbb4f ("powerpc/32: implement fast entry for syscalls on BOOKE")
> Signed-off-by: Christophe Leroy 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/e8732ffa2e096d433c3f2349b871d43ed0d39f5c

cheers

Re: [PATCH v7 18/18] x86/fsgsbase/64: Add documentation for FSGSBASE

2019-06-16 Thread Thomas Gleixner

On Sun, 16 Jun 2019, Thomas Gleixner wrote:
> On Sun, 16 Jun 2019, Bae, Chang Seok wrote:
> > On Jun 14, 2019, at 13:07, Bae, Chang Seok 
> > mailto:chang.seok@intel.com>> wrote:
> > 
> > 
> > On Jun 13, 2019, at 23:54, Thomas Gleixner 
> > mailto:t...@linutronix.de>> wrote:
> > 
> > +The GS segment has no common use and can be used freely by
> > +applications. There is no storage class specifier similar to __thread which
> > +would cause the compiler to use GS based addressing modes. Newer versions
> > +of GCC and Clang support GS based addressing via address space identifiers.
> > +
> > +Clang does not provide these address space identifiers, but it provides
> > +an attribute based mechanism:
> > +
> > 
> > These two sentences seem to conflict with each other; Clang needs to be 
> > clarified
> > above.
> > 
> > Thanks for the write-up. Just preparing v8 right now. Will send out shortly.
> 
> Please dont. Send me a delta patch against the documentation. I have queued
> all the other patches already internally. I did not push it out because I
> wanted to have proper docs.

Fixed it up already. About to push it out.

Thanks,

tglx

Re: [PATCH v5 1/3] mtd: spi-nor: add support for is25wp256

2019-06-16 Thread Vignesh Raghavendra

Hi,

On 12-Jun-19 4:17 PM, Sagar Shrikant Kadam wrote:
[...]

> @@ -4129,7 +4137,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
>   if (ret)
>   return ret;
>  
> - if (nor->addr_width) {
> + if (nor->addr_width && JEDEC_MFR(info) != SNOR_MFR_ISSI) {
>   /* already configured from SFDP */

Hmm, why would you want to ignore addr_width that's read from SFDP table?

Regards
Vignesh


>   } else if (info->addr_width) {
>   nor->addr_width = info->addr_width;
> diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
> index b3d360b..ff13297 100644
> --- a/include/linux/mtd/spi-nor.h
> +++ b/include/linux/mtd/spi-nor.h
> @@ -19,6 +19,7 @@
>  #define SNOR_MFR_ATMEL   CFI_MFR_ATMEL
>  #define SNOR_MFR_GIGADEVICE  0xc8
>  #define SNOR_MFR_INTEL   CFI_MFR_INTEL
> +#define SNOR_MFR_ISSI0x9d/* ISSI */
>  #define SNOR_MFR_ST  CFI_MFR_ST  /* ST Micro */
>  #define SNOR_MFR_MICRON  CFI_MFR_MICRON  /* Micron */
>  #define SNOR_MFR_MACRONIXCFI_MFR_MACRONIX
>

Re: [PATCH v5 2/3] mtd: spi-nor: add support to unlock flash device

2019-06-16 Thread Vignesh Raghavendra




On 12-Jun-19 4:17 PM, Sagar Shrikant Kadam wrote:
> Nor device (is25wp256 mounted on HiFive unleashed Rev A00 board) from ISSI
> have memory blocks guarded by block protection bits BP[0,1,2,3].
> 
> Clearing block protection bits,unlocks the flash memory regions
> The unlock scheme is registered during nor scans.
> 
> Based on code developed by Wesley Terpstra 
> and/or Palmer Dabbelt .
> https://github.com/riscv/riscv-linux/commit/c94e267766d62bc9a669611c3d0c8ed5ea26569b
> 
> Signed-off-by: Sagar Shrikant Kadam 
> ---
>  drivers/mtd/spi-nor/spi-nor.c | 51 
> ++-
>  include/linux/mtd/spi-nor.h   |  1 +
>  2 files changed, 51 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
> index 2d5a925..b7c6261 100644
> --- a/drivers/mtd/spi-nor/spi-nor.c
> +++ b/drivers/mtd/spi-nor/spi-nor.c
> @@ -1461,6 +1461,49 @@ static int macronix_quad_enable(struct spi_nor *nor)
>  }
>  
>  /**
> + * issi_unlock() - clear BP[0123] write-protection.
> + * @nor: pointer to a 'struct spi_nor'.
> + * @ofs: offset from which to unlock memory.
> + * @len: number of bytes to unlock.
> + *
> + * Bits [2345] of the Status Register are BP[0123].
> + * ISSI chips use a different block protection scheme than other chips.
> + * Just disable the write-protect unilaterally.
> + *
> + * Return: 0 on success, -errno otherwise.
> + */
> +static int issi_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
> +{
> + int ret, val;
> + u8 mask = SR_BP0 | SR_BP1 | SR_BP2 | SR_BP3;
> +
> + val = read_sr(nor);
> + if (val < 0)
> + return val;
> + if (!(val & mask))
> + return 0;
> +
> + write_enable(nor);
> +
> + write_sr(nor, val & ~mask);
> +
> + ret = spi_nor_wait_till_ready(nor);
> + if (ret)
> + return ret;
> +
> + ret = read_sr(nor);
> + if (ret > 0 && !(ret & mask)) {
> + dev_info(nor->dev,
> + "ISSI Block Protection Bits cleared SR=0x%x", ret);
> + ret = 0;
> + } else {
> + dev_err(nor->dev, "ISSI Block Protection Bits not cleared\n");
> + ret = -EINVAL;
> + }
> + return ret;
> +}
> +
> +/**
>   * spansion_quad_enable() - set QE bit in Configuraiton Register.
>   * @nor: pointer to a 'struct spi_nor'
>   *
> @@ -1836,7 +1879,7 @@ static int sr2_bit7_quad_enable(struct spi_nor *nor)
>   SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
>   { "is25wp256", INFO(0x9d7019, 0, 64 * 1024, 1024,
>   SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
> - SPI_NOR_4B_OPCODES)
> + SPI_NOR_4B_OPCODES | SPI_NOR_HAS_LOCK)
>   },
>  
>   /* Macronix */
> @@ -4080,6 +4123,12 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
>   nor->flash_is_locked = stm_is_locked;
>   }
>  
> + /* NOR protection support for ISSI chips */
> + if (JEDEC_MFR(info) == SNOR_MFR_ISSI ||
> + info->flags & SPI_NOR_HAS_LOCK) {

This should be:

if (JEDEC_MFR(info) == SNOR_MFR_ISSI &&
info->flags & SPI_NOR_HAS_LOCK) {

Otherwise you would end up overriding nor->flash_unlock function for
other vendors too, right?

> + nor->flash_unlock = issi_unlock;
> +

No need for blank line here.
Please run ./scripts/checkpatch.pl --strict on all patches and address
all the issues reported by it.



> + }
>   if (nor->flash_lock && nor->flash_unlock && nor->flash_is_locked) {
>   mtd->_lock = spi_nor_lock;
>   mtd->_unlock = spi_nor_unlock;
> diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
> index ff13297..9a7d719 100644
> --- a/include/linux/mtd/spi-nor.h
> +++ b/include/linux/mtd/spi-nor.h
> @@ -127,6 +127,7 @@
>  #define SR_BP0   BIT(2)  /* Block protect 0 */
>  #define SR_BP1   BIT(3)  /* Block protect 1 */
>  #define SR_BP2   BIT(4)  /* Block protect 2 */
> +#define SR_BP3   BIT(5)  /* Block protect 3 for ISSI 
> device*/

No need to mention ISSI here. I am sure there are devices from other
vendors with BP3

>  #define SR_TBBIT(5)  /* Top/Bottom protect */
>  #define SR_SRWD  BIT(7)  /* SR write protect */
>  /* Spansion/Cypress specific status bits */
> 

Regards
Vignesh

Re: [PATCH v9 01/12] mm/sparsemem: Introduce struct mem_section_usage

2019-06-16 Thread Wei Yang

On Wed, Jun 05, 2019 at 02:57:54PM -0700, Dan Williams wrote:
>Towards enabling memory hotplug to track partial population of a
>section, introduce 'struct mem_section_usage'.
>
>A pointer to a 'struct mem_section_usage' instance replaces the existing
>pointer to a 'pageblock_flags' bitmap. Effectively it adds one more
>'unsigned long' beyond the 'pageblock_flags' (usemap) allocation to
>house a new 'subsection_map' bitmap.  The new bitmap enables the memory
>hot{plug,remove} implementation to act on incremental sub-divisions of a
>section.
>
>The default SUBSECTION_SHIFT is chosen to keep the 'subsection_map' no
>larger than a single 'unsigned long' on the major architectures.
>Alternatively an architecture can define ARCH_SUBSECTION_SHIFT to
>override the default PMD_SHIFT. Note that PowerPC needs to use
>ARCH_SUBSECTION_SHIFT to workaround PMD_SHIFT being a non-constant
>expression on PowerPC.
>
>The primary motivation for this functionality is to support platforms
>that mix "System RAM" and "Persistent Memory" within a single section,
>or multiple PMEM ranges with different mapping lifetimes within a single
>section. The section restriction for hotplug has caused an ongoing saga
>of hacks and bugs for devm_memremap_pages() users.
>
>Beyond the fixups to teach existing paths how to retrieve the 'usemap'
>from a section, and updates to usemap allocation path, there are no
>expected behavior changes.
>
>Cc: Michal Hocko 
>Cc: Vlastimil Babka 
>Cc: Logan Gunthorpe 
>Cc: Oscar Salvador 
>Cc: Pavel Tatashin 
>Cc: Benjamin Herrenschmidt 
>Cc: Paul Mackerras 
>Cc: Michael Ellerman 
>Signed-off-by: Dan Williams 
>---
> arch/powerpc/include/asm/sparsemem.h |3 +
> include/linux/mmzone.h   |   48 +++-
> mm/memory_hotplug.c  |   18 
> mm/page_alloc.c  |2 -
> mm/sparse.c  |   81 +-
> 5 files changed, 99 insertions(+), 53 deletions(-)
>
>diff --git a/arch/powerpc/include/asm/sparsemem.h 
>b/arch/powerpc/include/asm/sparsemem.h
>index 3192d454a733..1aa3c9303bf8 100644
>--- a/arch/powerpc/include/asm/sparsemem.h
>+++ b/arch/powerpc/include/asm/sparsemem.h
>@@ -10,6 +10,9 @@
>  */
> #define SECTION_SIZE_BITS   24
> 
>+/* Reflect the largest possible PMD-size as the subsection-size constant */
>+#define ARCH_SUBSECTION_SHIFT 24
>+
> #endif /* CONFIG_SPARSEMEM */
> 
> #ifdef CONFIG_MEMORY_HOTPLUG
>diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>index 427b79c39b3c..ac163f2f274f 100644
>--- a/include/linux/mmzone.h
>+++ b/include/linux/mmzone.h
>@@ -1161,6 +1161,44 @@ static inline unsigned long section_nr_to_pfn(unsigned 
>long sec)
> #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & 
> PAGE_SECTION_MASK)
> #define SECTION_ALIGN_DOWN(pfn)   ((pfn) & PAGE_SECTION_MASK)
> 
>+/*
>+ * SUBSECTION_SHIFT must be constant since it is used to declare
>+ * subsection_map and related bitmaps without triggering the generation
>+ * of variable-length arrays. The most natural size for a subsection is
>+ * a PMD-page. For architectures that do not have a constant PMD-size
>+ * ARCH_SUBSECTION_SHIFT can be set to a constant max size, or otherwise
>+ * fallback to 2MB.
>+ */
>+#if defined(ARCH_SUBSECTION_SHIFT)
>+#define SUBSECTION_SHIFT (ARCH_SUBSECTION_SHIFT)
>+#elif defined(PMD_SHIFT)
>+#define SUBSECTION_SHIFT (PMD_SHIFT)
>+#else
>+/*
>+ * Memory hotplug enabled platforms avoid this default because they
>+ * either define ARCH_SUBSECTION_SHIFT, or PMD_SHIFT is a constant, but
>+ * this is kept as a backstop to allow compilation on
>+ * !ARCH_ENABLE_MEMORY_HOTPLUG archs.
>+ */
>+#define SUBSECTION_SHIFT 21
>+#endif
>+
>+#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
>+#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
>+#define PAGE_SUBSECTION_MASK ((~(PAGES_PER_SUBSECTION-1)))

One pair of brackets could be removed, IMHO.

>+
>+#if SUBSECTION_SHIFT > SECTION_SIZE_BITS
>+#error Subsection size exceeds section size
>+#else
>+#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - 
>SUBSECTION_SHIFT))
>+#endif
>+
>+struct mem_section_usage {
>+  DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
>+  /* See declaration of similar field in struct zone */
>+  unsigned long pageblock_flags[0];
>+};
>+
> struct page;
> struct page_ext;
> struct mem_section {
>@@ -1178,8 +1216,7 @@ struct mem_section {
>*/
>   unsigned long section_mem_map;
> 
>-  /* See declaration of similar field in struct zone */
>-  unsigned long *pageblock_flags;
>+  struct mem_section_usage *usage;
> #ifdef CONFIG_PAGE_EXTENSION
>   /*
>* If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
>@@ -1210,6 +1247,11 @@ extern struct mem_section **mem_section;
> extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
> #endif
> 
>+static inline unsigned long *section_to_usemap(struct mem_section *ms)
>+

Re: [PATCH v2] staging: rtl8723bs: Resolve checkpatch error "that open brace { should be on the previous line" in the rtl8723 driver

2019-06-16 Thread Shobhit Kukreti

On Sun, Jun 16, 2019 at 01:13:11AM -0700, Joe Perches wrote:
> On Sat, 2019-06-15 at 14:29 -0700, Shobhit Kukreti wrote:
> > Cleaned up the code from the following files to get rid of
> > check patch error "that open brace { should be on the previous line"
> 
> It's fine you are modifying brace styles, but:
> 
> > diff --git a/drivers/staging/rtl8723bs/os_dep/mlme_linux.c 
> > b/drivers/staging/rtl8723bs/os_dep/mlme_linux.c
> > index aa2499f..4631b68 100644
> > --- a/drivers/staging/rtl8723bs/os_dep/mlme_linux.c
> > +++ b/drivers/staging/rtl8723bs/os_dep/mlme_linux.c
> > @@ -46,8 +46,7 @@ void rtw_os_indicate_connect(struct adapter *adapter)
> > struct mlme_priv *pmlmepriv = &(adapter->mlmepriv);
> >  
> > if ((check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE) == true) ||
> > -   (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE) == true))
> > -   {
> > +   (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE) == true)) {
> > rtw_cfg80211_ibss_indicate_connect(adapter);
> > }
> > else
> 
> the else should be on the same line as the close brace
> 
> > @@ -106,8 +105,9 @@ void rtw_reset_securitypriv(struct adapter *adapter)
> > adapter->securitypriv.ndisencryptstatus = Ndis802_11WEPDisabled;
> >  
> > }
> > -   else /* reset values in securitypriv */
> > -   {
> > +   else {
> > +   /* reset values in securitypriv */
> > +
> 
> and here.  etc.  Please change all instances appropriately.
Thank you for the feedback. I intented to do one kind of change in a
patch. This probably would need a patch set. Will edit appropriately. 

Best,
Shobhit Kukreti
> 
>

Re: [PATCH] staging: iio: adt7316: Add missing include files

2019-06-16 Thread Jonathan Cameron

On Fri, 14 Jun 2019 23:28:46 +0800
YueHaibing  wrote:

> Fix build error:
> 
> drivers/staging/iio/addac/adt7316.c: In function adt7316_store_update_DAC:
> drivers/staging/iio/addac/adt7316.c:949:3: error: implicit declaration of
>  function gpiod_set_value; did you mean gpio_set_value? 
> [-Werror=implicit-function-declaration]
>gpiod_set_value(chip->ldac_pin, 0);
> 
> drivers/staging/iio/addac/adt7316.c: In function adt7316_setup_irq:
> drivers/staging/iio/addac/adt7316.c:1807:13: error: implicit declaration of
>  function irqd_get_trigger_type; did you mean devm_iio_trigger_free? 
> [-Werror=implicit-function-declaration]
>   irq_type = irqd_get_trigger_type(irq_get_irq_data(chip->bus.irq));
> 
> Reported-by: Hulk Robot 
> Fixes: 7f6b6d553df7 ("Staging: iio: adt7316: Add all irq related code in 
> adt7316_irq_setup()")
> Fixes: c63460c4298f ("Staging: iio: adt7316: Use device tree data to set 
> ldac_pin")
> Signed-off-by: YueHaibing 
Hi yuehaibing,

You were second to send a fix for this. I've had it in my
fixes branch since last week, but not done a pull request quite yet.
I'll probably send it out later today.

https://patchwork.kernel.org/patch/10978301/

Thanks,

Jonathan

> ---
>  drivers/staging/iio/addac/adt7316.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/staging/iio/addac/adt7316.c 
> b/drivers/staging/iio/addac/adt7316.c
> index 37ce563..9d3d159 100644
> --- a/drivers/staging/iio/addac/adt7316.c
> +++ b/drivers/staging/iio/addac/adt7316.c
> @@ -16,6 +16,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  
>  #include 
>  #include

I Need Your Trust

2019-06-16 Thread Sgt John Anup

I am Sgt John Anup,US Army base in Afghanistan for peace keeping,I have a
monetary proposal for you, I found your contact detail in a address journal am 
seeking your assistance to evacuate the sum of $25,000,000 to you as long as I 
am assured that it will be safe in your care until I complete my service here 
in Afghanistan, I we be given you 25% from it.This is not stolen money and 
there are no  dangers involved, if
interested,please Reply me on my Private email Below for more details;
sgt.johnanupsenap...@gmail.com
---
This email and any attachments are confidential and may also be privileged. If 
you are not the addressee you may not copy, forward, disclose, circulate in any 
other way use or rely on the information contained in this email or any 
attachments or use any part of it. If you are not the intended recipient, 
please immediately delete it and all copies of it from your system, destroy any 
hard copies of it and notify the sender review, re-transmission, dissemination 
or other use of, or taking of any action in reliance upon such information is 
prohibited.
Any views, opinions, comments, statements of information contained in this 
E-mail, including any attachments (if any) are those of the author, their 
accuracy, completeness or correctness are not guaranteed.  shall not be liable 
or responsible for any of such contents, including damage or loss resulting 
from any virus transmitted or any attachments by this E-mail."
Emails cannot be guaranteed to be secure or error free as the message and any 
attachments could be intercepted, corrupted, lost, delayed, incomplete or 
amended. The sender does not accept liability for any errors or omissions.
---

[tip:WIP.x86/cpu 1/18] arch/x86//kernel/ptrace.c:400:3: error: too many arguments to function 'x86_fsbase_write_cpu'

2019-06-16 Thread kbuild test robot

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git WIP.x86/cpu
head:   a0e3bbdc2e84af76864e16772ef0099ce933cded
commit: 9a17639c0ad237666277861b65cf8fa80a4e9775 [1/18] x86/process/64: Fix 
ARCH_SET_FS/GS for a remote task
config: x86_64-randconfig-x019-201924 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
git checkout 9a17639c0ad237666277861b65cf8fa80a4e9775
# save the attached .config to linux build tree
make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot 

All error/warnings (new ones prefixed by >>):

   arch/x86//kernel/ptrace.c: In function 'putreg':
>> arch/x86//kernel/ptrace.c:400:24: warning: passing argument 1 of 
>> 'x86_fsbase_write_cpu' makes integer from pointer without a cast 
>> [-Wint-conversion]
  x86_fsbase_write_cpu(child, value);
   ^
   In file included from arch/x86/include/asm/elf.h:13:0,
from include/linux/elf.h:5,
from arch/x86//kernel/ptrace.c:18:
   arch/x86/include/asm/fsgsbase.h:42:20: note: expected 'long unsigned int' 
but argument is of type 'struct task_struct *'
static inline void x86_fsbase_write_cpu(unsigned long fsbase)
   ^~~~
>> arch/x86//kernel/ptrace.c:400:3: error: too many arguments to function 
>> 'x86_fsbase_write_cpu'
  x86_fsbase_write_cpu(child, value);
  ^~~~
   In file included from arch/x86/include/asm/elf.h:13:0,
from include/linux/elf.h:5,
from arch/x86//kernel/ptrace.c:18:
   arch/x86/include/asm/fsgsbase.h:42:20: note: declared here
static inline void x86_fsbase_write_cpu(unsigned long fsbase)
   ^~~~
>> arch/x86//kernel/ptrace.c:405:3: error: implicit declaration of function 
>> 'x86_gsbase_write_cpu'; did you mean 'x86_fsbase_write_cpu'? 
>> [-Werror=implicit-function-declaration]
  x86_gsbase_write_cpu(child, value);
  ^~~~
  x86_fsbase_write_cpu
   cc1: some warnings being treated as errors

vim +/x86_fsbase_write_cpu +400 arch/x86//kernel/ptrace.c

   380  
   381  static int putreg(struct task_struct *child,
   382unsigned long offset, unsigned long value)
   383  {
   384  switch (offset) {
   385  case offsetof(struct user_regs_struct, cs):
   386  case offsetof(struct user_regs_struct, ds):
   387  case offsetof(struct user_regs_struct, es):
   388  case offsetof(struct user_regs_struct, fs):
   389  case offsetof(struct user_regs_struct, gs):
   390  case offsetof(struct user_regs_struct, ss):
   391  return set_segment_reg(child, offset, value);
   392  
   393  case offsetof(struct user_regs_struct, flags):
   394  return set_flags(child, value);
   395  
   396  #ifdef CONFIG_X86_64
   397  case offsetof(struct user_regs_struct,fs_base):
   398  if (value >= TASK_SIZE_MAX)
   399  return -EIO;
 > 400  x86_fsbase_write_cpu(child, value);
   401  return 0;
   402  case offsetof(struct user_regs_struct,gs_base):
   403  if (value >= TASK_SIZE_MAX)
   404  return -EIO;
 > 405  x86_gsbase_write_cpu(child, value);
   406  return 0;
   407  #endif
   408  }
   409  
   410  *pt_regs_access(task_pt_regs(child), offset) = value;
   411  return 0;
   412  }
   413  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

Re: linux-next: Fixes tag needs some work in the clockevents tree

2019-06-16 Thread Stephen Rothwell

Hi Daniel,

[Sorry for the slow response.]

On Thu, 13 Jun 2019 08:52:21 +0200 Daniel Lezcano  
wrote:
>
> actually it returns:
> 
> git log -1 --format='Fixes: %h ("%s")' 3be2a85a0b61
> 
> Fixes: 3be2a85a0b61 ("clocksource/drivers/tegra: Support per-CPU timers on 
> all Tegra's")

Indeed.

> Is it ok to shorten the subject?

I figure it is easier to just use the "git log" result and to give
anyone (or any script) the wants to use the Fixes tag as much
information as possible.

-- 
Cheers,
Stephen Rothwell


pgpf7LDWD5Cr1.pgp
Description: OpenPGP digital signature

Re: [v3 1/2] mtd: nand: Add Cadence NAND controller driver

2019-06-16 Thread Dmitry Osipenko

14.06.2019 18:09, Piotr Sroka пишет:

Commit description is mandatory.

> Signed-off-by: Piotr Sroka 
> ---

[snip]

> +
> +/* Cadnence NAND flash controller capabilities get from driver data. */
> +struct cadence_nand_dt_devdata {
> + /* Skew value of the output signals of the NAND Flash interface. */
> + u32 if_skew;
> + /* It informs if aging feature in the DLL PHY supported. */
> + u8 phy_dll_aging;
> + /*
> +  * It informs if per bit deskew for read and write path in
> +  * the PHY is supported.
> +  */
> + u8 phy_per_bit_deskew;
> + /* It informs if slave DMA interface is connected to DMA engine. */
> + u8 has_dma;

There is no needed to dedicate 8 bits to a variable if you only care about a 
single
bit. You may write this as:

bool has_dma : 1;

[snip]

> +static struct
> +cdns_nand_chip *to_cdns_nand_chip(struct nand_chip *chip)
> +{
> + return container_of(chip, struct cdns_nand_chip, chip);
> +}
> +
> +static struct
> +cdns_nand_ctrl *to_cdns_nand_ctrl(struct nand_controller *controller)
> +{
> + return container_of(controller, struct cdns_nand_ctrl, controller);
> +}

It's better to inline explicitly such cases because they won't get inlined with 
some
kernel configurations, like enabled ftracing for example.

> +static bool
> +cadence_nand_dma_buf_ok(struct cdns_nand_ctrl *cdns_ctrl, const void *buf,
> + u32 buf_len)
> +{
> + u8 data_dma_width = cdns_ctrl->caps2.data_dma_width;
> +
> + return buf && virt_addr_valid(buf) &&
> + likely(IS_ALIGNED((uintptr_t)buf, data_dma_width)) &&
> + likely(IS_ALIGNED(buf_len, data_dma_width));
> +}
> +
> +static int cadence_nand_wait_for_value(struct cdns_nand_ctrl *cdns_ctrl,
> +u32 reg_offset, u32 timeout_us,
> +u32 mask, bool is_clear)
> +{
> + u32 val;
> + int ret = 0;
> +
> + ret = readl_poll_timeout(cdns_ctrl->reg + reg_offset,
> +  val, !(val & mask) == is_clear,
> +  10, timeout_us);

Apparently you don't care about having memory barrier here, hence
readl_relaxed_poll_timeout().

> + if (ret < 0) {
> + dev_err(cdns_ctrl->dev,
> + "Timeout while waiting for reg %x with mask %x is clear 
> %d\n",
> + reg_offset, mask, is_clear);
> + }
> +
> + return ret;
> +}
> +
> +static int cadence_nand_set_ecc_enable(struct cdns_nand_ctrl *cdns_ctrl,
> +bool enable)
> +{
> + u32 reg;
> +
> + if (cadence_nand_wait_for_value(cdns_ctrl, CTRL_STATUS,
> + 100,
> + CTRL_STATUS_CTRL_BUSY, true))
> + return -ETIMEDOUT;
> +
> + reg = readl(cdns_ctrl->reg + ECC_CONFIG_0);
> +
> + if (enable)
> + reg |= ECC_CONFIG_0_ECC_EN;
> + else
> + reg &= ~ECC_CONFIG_0_ECC_EN;
> +
> + writel(reg, cdns_ctrl->reg + ECC_CONFIG_0);

And here.. looks like there is no need for the memory barries, hence use the 
relaxed
versions of readl/writel. Same for the rest of the patch.

> + return 0;
> +}
> +
> +static void cadence_nand_set_ecc_strength(struct cdns_nand_ctrl *cdns_ctrl,
> +   u8 corr_str_idx)
> +{
> + u32 reg;
> +
> + if (cdns_ctrl->curr_corr_str_idx == corr_str_idx)
> + return;
> +
> + reg = readl(cdns_ctrl->reg + ECC_CONFIG_0);
> + reg &= ~ECC_CONFIG_0_CORR_STR;
> + reg |= FIELD_PREP(ECC_CONFIG_0_CORR_STR, corr_str_idx);
> + writel(reg, cdns_ctrl->reg + ECC_CONFIG_0);
> +
> + cdns_ctrl->curr_corr_str_idx = corr_str_idx;
> +}
> +
> +static u8 cadence_nand_get_ecc_strength_idx(struct cdns_nand_ctrl *cdns_ctrl,
> + u8 strength)
> +{
> + u8 i, corr_str_idx = 0;
> +
> + for (i = 0; i < BCH_MAX_NUM_CORR_CAPS; i++) {
> + if (cdns_ctrl->ecc_strengths[i] == strength) {
> + corr_str_idx = i;
> + break;
> + }
> + }

Is it a error case when i == BCH_MAX_NUM_CORR_CAPS?

> + return corr_str_idx;
> +}
> +
> +static int cadence_nand_set_skip_marker_val(struct cdns_nand_ctrl *cdns_ctrl,
> + u16 marker_value)
> +{
> + u32 reg = 0;
> +
> + if (cadence_nand_wait_for_value(cdns_ctrl, CTRL_STATUS,
> + 100,
> + CTRL_STATUS_CTRL_BUSY, true))
> + return -ETIMEDOUT;
> +
> + reg = readl(cdns_ctrl->reg + SKIP_BYTES_CONF);
> + reg &= ~SKIP_BYTES_MARKER_VALUE;
> + reg |= FIELD_PREP(SKIP_BYTES_MARKER_VALUE,
> + marker_value);
> +
> + writel(reg, cdns_ctrl->reg + SKIP_BYTES_CONF);
> +
> + return 0;
> +}
> +
> +static int cadence_nand_set_skip_bytes_conf(struct cdns_nand_ctrl *cdns_c

[PATCH] habanalabs: Allow accessing host mapped addresses via debugfs

2019-06-16 Thread Tomer Tayar

Allows using the addr/data32 debugfs nodes to access a device VA of a
host mapped memory when the IOMMU is disabled.

Due to the possible large amount of a user host mapped memory, the
driver doesn't maintain a database with the host addresses per device VA.
When the IOMMU is disabled, this missing info is being overcome by
simply using phys_to_virt(). However, this is not useful when the IOMMU
is enabled, and thus the enforced limitation.

Signed-off-by: Tomer Tayar 
---
 .../ABI/testing/debugfs-driver-habanalabs | 11 --
 drivers/misc/habanalabs/debugfs.c | 35 ---
 drivers/misc/habanalabs/goya/goya.c   | 19 +++---
 3 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs 
b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 2f5b80be07a3..18191c2becab 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -3,7 +3,10 @@ Date:   Jan 2019
 KernelVersion:  5.1
 Contact:oded.gab...@gmail.com
 Description:Sets the device address to be used for read or write through
-PCI bar. The acceptable value is a string that starts with "0x"
+PCI bar, or the device VA of a host mapped memory to be read or
+written directly from the host. The latter option is allowed
+only when the IOMMU is disabled.
+The acceptable value is a string that starts with "0x"
 
 What:   /sys/kernel/debug/habanalabs/hl/command_buffers
 Date:   Jan 2019
@@ -33,10 +36,12 @@ Contact:oded.gab...@gmail.com
 Description:Allows the root user to read or write directly through the
 device's PCI bar. Writing to this file generates a write
 transaction while reading from the file generates a read
-transcation. This custom interface is needed (instead of using
+transaction. This custom interface is needed (instead of using
 the generic Linux user-space PCI mapping) because the DDR bar
 is very small compared to the DDR memory and only the driver 
can
-move the bar before and after the transaction
+move the bar before and after the transaction.
+If the IOMMU is disabled, it also allows the root user to read
+or write from the host a device VA of a host mapped memory
 
 What:   /sys/kernel/debug/habanalabs/hl/device
 Date:   Jan 2019
diff --git a/drivers/misc/habanalabs/debugfs.c 
b/drivers/misc/habanalabs/debugfs.c
index 886f8ea82499..17974919b760 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -500,6 +500,25 @@ static ssize_t mmu_write(struct file *file, const char 
__user *buf,
return -EINVAL;
 }
 
+static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
+{
+   struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+   if (!hdev->mmu_enable)
+   goto out;
+
+   if (hdev->dram_supports_virtual_memory &&
+   addr >= prop->va_space_dram_start_address &&
+   addr < prop->va_space_dram_end_address)
+   return true;
+
+   if (addr >= prop->va_space_host_start_address &&
+   addr < prop->va_space_host_end_address)
+   return true;
+out:
+   return false;
+}
+
 static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
u64 *phys_addr)
 {
@@ -573,7 +592,6 @@ static ssize_t hl_data_read32(struct file *f, char __user 
*buf,
 {
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
-   struct asic_fixed_properties *prop = &hdev->asic_prop;
char tmp_buf[32];
u64 addr = entry->addr;
u32 val;
@@ -582,11 +600,8 @@ static ssize_t hl_data_read32(struct file *f, char __user 
*buf,
if (*ppos)
return 0;
 
-   if (addr >= prop->va_space_dram_start_address &&
-   addr < prop->va_space_dram_end_address &&
-   hdev->mmu_enable &&
-   hdev->dram_supports_virtual_memory) {
-   rc = device_va_to_pa(hdev, entry->addr, &addr);
+   if (hl_is_device_va(hdev, addr)) {
+   rc = device_va_to_pa(hdev, addr, &addr);
if (rc)
return rc;
}
@@ -607,7 +622,6 @@ static ssize_t hl_data_write32(struct file *f, const char 
__user *buf,
 {
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
-   struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 addr = entry->addr;
u32 value;
ssize_t rc;
@@ -616,11 +630,8 @@ static ssize_t hl_data_write32(struct file *f, const char 
__u

Re: [PATCH 1/1] MAINTAINERS: add counter/ftm-quaddec driver entry

2019-06-16 Thread Jonathan Cameron

On Thu, 13 Jun 2019 00:02:21 +0900
William Breathitt Gray  wrote:

> On Wed, Jun 12, 2019 at 04:52:23PM +0200, Patrick Havelange wrote:
> > Adding myself as maintainer for this driver
> > 
> > Signed-off-by: Patrick Havelange 
> > ---
> >  MAINTAINERS | 8 
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 57f496cff999..6671854098d6 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -6218,6 +6218,14 @@ M:   Philip Kelleher 
> >  S: Maintained
> >  F: drivers/block/rsxx/
> >  
> > +FLEXTIMER FTM-QUADDEC DRIVER
> > +M: Patrick Havelange 
> > +L: linux-...@vger.kernel.org
> > +S: Maintained
> > +F: Documentation/ABI/testing/sysfs-bus-counter-ftm-quadddec
> > +F: Documentation/devicetree/bindings/counter/ftm-quaddec.txt
> > +F: drivers/counter/ftm-quaddec.c
> > +
> >  FLOPPY DRIVER
> >  M: Jiri Kosina 
> >  T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/floppy.git
> > -- 
> > 2.19.1  
> 
> Thank you, this should take care of those files.
> 
> Signed-off-by: William Breathitt Gray 

Given I took the last patch related to this driver, I'll pick this
one up as well.   William, let me know on each series whether you want
me to.  I would imagine that sometimes you'll have a enough going on
you'll want to do something more efficient, but whilst it's individual
patches this works fine.

Applied to the togreg branch of iio.git and pushed out as testing.

Thanks,

Jonathan

Re: [PATCH V3] i2c: busses: tegra: Add suspend-resume support

2019-06-16 Thread Dmitry Osipenko

15.06.2019 7:54, Wolfram Sang пишет:
> 
>>> Without a maintainer ack, this is an exception this time. Should we add
>>> Dmitry as another maintainer or reviewer at least?
>>>
>> I shall followup with Maintainer for ACK in future I2C tegra patches.
> 
> This comment was not directed at you, sorry if that was not clear. It
> was more for Laxman, Thierry, Jonathan, and Dmitry (if he is
> interested).
> 

I don't mind at all to review and test patches for the driver and can propose 
myself
as a reviewer if that helps and if there are no objections from the Tegra 
maintainers.
My primary interest is to have my devices working after next kernel update, but 
I also
like to review patches in general if they are touching area that I'm familiar 
with.

RE: [PATCH] kernel/notifier.c: remove notifier_chain_register

2019-06-16 Thread Nixiaoming

On Fri, 14 Jun 2019 03:38 AM Andrew Morton  wrote:
>On Thu, 13 Jun 2019 22:07:44 +0800 Xiaoming Ni  wrote:
>
>> Registering the same notifier to a hook repeatedly can cause the hook
>> list to form a ring or lose other members of the list.
>> .
>> 
>> diff --git a/kernel/notifier.c b/kernel/notifier.c
>> index d9f5081..56efd54 100644
>> --- a/kernel/notifier.c
>> +++ b/kernel/notifier.c
>> @@ -19,20 +19,6 @@
>>   *  are layered on top of these, with appropriate locking added.
>>   */
>>  
>> -static int notifier_chain_register(struct notifier_block **nl,
>> -struct notifier_block *n)
>> -{
>> -while ((*nl) != NULL) {
>> -WARN_ONCE(((*nl) == n), "double register detected");
>> -if (n->priority > (*nl)->priority)
>> -break;
>> -nl = &((*nl)->next);
>> -}
>> -n->next = *nl;
>> -rcu_assign_pointer(*nl, n);
>> -return 0;
>> -}
>
>Registering an already-registered notifier is a bug (except for in
>net/sunrpc/rpc_pipe.c, apparently).  The effect of this change is to
>remove the warning about the presence of the bug, so the bug is less
>likely to get fixed.
>
thanks for your guidance,

Should I modify this way 
   1 notifier_chain_cond_register() and notifier_chain_register() should be 
combined into one function.
   2 The warning information needs to be displayed while prohibiting duplicate 
registration.
@@ -23,7 +23,10 @@ static int notifier_chain_register(struct 
notifier_block **nl,
struct notifier_block *n)
 {
while ((*nl) != NULL) {
-   WARN_ONCE(((*nl) == n), "double register detected");
+ if (unlikely((*nl) == n)) {
+ WARN(1, "double register detected");
+ return 0;
+ }
if (n->priority > 
(*nl)->priority)
break;

>I think it would be better to remove notifier_chain_cond_register() and
>blocking_notifier_chain_cond_register() and to figure out why
>net/sunrpc/rpc_pipe.c is using it and to redo the rpc code so it no
>longer has that need.
>
thanks for your guidance,
I re-examine the submission record and analyze it as follows

notifier_chain_cond_register() was introduced by commit 6546bc4279241e8fa43
 ("ipc: re-enable msgmni automatic recomputing msgmni if set to negative")
From the patch description information, it should be done to avoid repeated 
registrations,
 but I don't know why not directly modify notifier_chain_cond_register().

notifier_chain_cond_register() is only called by 
blocking_notifier_chain_cond_register()
blocking_notifier_chain_cond_register() has less processing of the 
SYSTEM_BOOTING state 
than blocking_notifier_chain_egister().
may also be a bug.

ipc/ipcns_notifier.c and the call to blocking_notifier_chain_cond_register() 
are removed 
in commit 0050ee059f7fc86b1df252 ("ipc/msg: increase MSGMNI, remove scaling").

now blocking_notifier_chain_cond_register() is only used in 
net/sunrpc/rpc_pipe.c, 
commit 2d00131acc641b2cb6 ("SUNRPC: send notification events on pipefs sb 
creation and destruction")
Using blocking_notifier_chain_cond_register() may also be to avoid duplicate 
registrations??

thanks

Re: [PATCH v2 1/3] HID: apple-ibridge: Add Apple iBridge HID driver.

2019-06-16 Thread Jonathan Cameron

On Wed, 12 Jun 2019 01:33:58 -0700
Ronald Tschalär  wrote:

> The iBridge device provides access to several devices, including:
> - the Touch Bar
> - the iSight webcam
> - the light sensor
> - the fingerprint sensor
> 
> This driver provides the core support for managing the iBridge device
> and the access to the underlying devices. In particular, since the
> functionality for the touch bar and light sensor is exposed via USB HID
> interfaces, and the same HID device is used for multiple functions, this
> driver creates virtual HID devices, one per real HID device and
> sub-driver pair (for a total of 4 virtual HID devices). The sub-drivers
> then bind to these virtual HID devices.
> 
> This way the Touch Bar and ALS drivers can be kept in their own modules,
> while at the same time making them look very much like as if they were
> connected to the real HID devices; e.g. in particular the Touch Bar
> driver is aware of the fact that it is dealing with two HID devices that
> need to managed differently.
> 
> Signed-off-by: Ronald Tschalär 
Hi Ronald,

I'm far from a hid expert and was only reading this for background on
the ALS sensor driver...  Anyhow, there are some comments in here
that needs some follow up or formatting into kernel comment style.

Nitpicks inline!

Jonathan
> ---
>  drivers/hid/Kconfig   |  14 +
>  drivers/hid/Makefile  |   1 +
>  drivers/hid/apple-ibridge.c   | 585 ++
>  drivers/hid/hid-ids.h |   1 +
>  include/linux/apple-ibridge.h |  23 ++
>  5 files changed, 624 insertions(+)
>  create mode 100644 drivers/hid/apple-ibridge.c
>  create mode 100644 include/linux/apple-ibridge.h
> 
> diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
> index 4ca0cdfa6b33..545d3691fc1c 100644
> --- a/drivers/hid/Kconfig
> +++ b/drivers/hid/Kconfig
> @@ -135,6 +135,20 @@ config HID_APPLE
>   Say Y here if you want support for keyboards of Apple iBooks, 
> PowerBooks,
>   MacBooks, MacBook Pros and Apple Aluminum.
>  
> +config HID_APPLE_IBRIDGE
> + tristate "Apple iBridge"
> + depends on ACPI
> + depends on USB_HID
> + depends on X86 || COMPILE_TEST
> + help
> +   This module provides the core support for the Apple T1 chip found
> +   on recent MacBookPro's, also known as the iBridge. The drivers for
> +   the Touch Bar (apple-ib-tb) and light sensor (apple-ib-als) need to
> +   be enabled separately.
> +
> +   To compile this driver as a module, choose M here: the
> +   module will be called apple-ibridge.
> +
>  config HID_APPLEIR
>   tristate "Apple infrared receiver"
>   depends on (USB_HID)
> diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
> index 170163b41303..a4da5663a541 100644
> --- a/drivers/hid/Makefile
> +++ b/drivers/hid/Makefile
> @@ -26,6 +26,7 @@ obj-$(CONFIG_HID_ACCUTOUCH) += hid-accutouch.o
>  obj-$(CONFIG_HID_ALPS)   += hid-alps.o
>  obj-$(CONFIG_HID_ACRUX)  += hid-axff.o
>  obj-$(CONFIG_HID_APPLE)  += hid-apple.o
> +obj-$(CONFIG_HID_APPLE_IBRIDGE)  += apple-ibridge.o
>  obj-$(CONFIG_HID_APPLEIR)+= hid-appleir.o
>  obj-$(CONFIG_HID_ASUS)   += hid-asus.o
>  obj-$(CONFIG_HID_AUREAL) += hid-aureal.o
> diff --git a/drivers/hid/apple-ibridge.c b/drivers/hid/apple-ibridge.c
> new file mode 100644
> index ..565f080a38d6
> --- /dev/null
> +++ b/drivers/hid/apple-ibridge.c
> @@ -0,0 +1,585 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Apple iBridge Driver
> + *
> + * Copyright (c) 2018 Ronald Tschalär
> + */
> +
> +/**
> + * DOC: Overview
> + *
> + * MacBookPro models with a Touch Bar (13,[23] and 14,[23]) have an Apple
> + * iBridge chip (also known as T1 chip) which exposes the touch bar,
> + * built-in webcam (iSight), ambient light sensor, and Secure Enclave
> + * Processor (SEP) for TouchID. It shows up in the system as a USB device
> + * with 3 configurations: 'Default iBridge Interfaces', 'Default iBridge
> + * Interfaces(OS X)', and 'Default iBridge Interfaces(Recovery)'. While
> + * the second one is used by MacOS to provide the fancy touch bar
> + * functionality with custom buttons etc, this driver just uses the first.
> + *
> + * In the first (default after boot) configuration, 4 usb interfaces are
> + * exposed: 2 related to the webcam, and 2 USB HID interfaces representing
> + * the touch bar and the ambient light sensor. The webcam interfaces are
> + * already handled by the uvcvideo driver; furthermore, the handling of the
> + * input reports when "keys" on the touch bar are pressed is already handled
> + * properly by the generic USB HID core. This leaves the management of the
> + * touch bar modes (e.g. switching between function and special keys when the
> + * FN key is pressed), the touch bar display (dimming and turning off), the
> + * key-remapping when the FN key is pressed, and handling of the light 
> sensor.
> + *
> + * This driver is implemented as a HID driver t

[PATCH 8/8] perf/x86/rapl: Get quirk state from new probe framework

2019-06-16 Thread Jiri Olsa

Getting the apply_quirk bool from new rapl_model_match array.

And because apply_quirk was the last remaining piece of data
in rapl_cpu_match, replacing it with rapl_model_match as device
table.

The switch to new perf_msr_probe detection API is done.

Signed-off-by: Jiri Olsa 
---
 arch/x86/events/intel/rapl.c | 82 ++--
 1 file changed, 3 insertions(+), 79 deletions(-)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index e4c4ff870845..ddad45ef8757 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -671,75 +671,6 @@ static int __init init_rapl_pmus(void)
 #define X86_RAPL_MODEL_MATCH(model, init)  \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
 
-struct intel_rapl_init_fun {
-   bool apply_quirk;
-};
-
-static const struct intel_rapl_init_fun snb_rapl_init __initconst = {
-   .apply_quirk = false,
-};
-
-static const struct intel_rapl_init_fun hsx_rapl_init __initconst = {
-   .apply_quirk = true,
-};
-
-static const struct intel_rapl_init_fun hsw_rapl_init __initconst = {
-   .apply_quirk = false,
-};
-
-static const struct intel_rapl_init_fun snbep_rapl_init __initconst = {
-   .apply_quirk = false,
-};
-
-static const struct intel_rapl_init_fun knl_rapl_init __initconst = {
-   .apply_quirk = true,
-};
-
-static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
-   .apply_quirk = false,
-};
-
-static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,   snb_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
-
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,   snb_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
-
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X,hsx_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,  hsw_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
-
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE,   hsw_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E,   hsw_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,  hsx_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init),
-
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init),
-
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,   hsx_rapl_init),
-
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE,  skl_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
-
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE,  skl_rapl_init),
-
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init),
-
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init),
-
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE,  skl_rapl_init),
-   X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, skl_rapl_init),
-   {},
-};
-
-MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match);
-
 static struct rapl_model model_snb = {
.events = BIT(PERF_RAPL_PP0) |
  BIT(PERF_RAPL_PKG) |
@@ -813,12 +744,12 @@ static const struct x86_cpu_id rapl_model_match[] 
__initconst = {
{},
 };
 
+MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
+
 static int __init rapl_pmu_init(void)
 {
const struct x86_cpu_id *id;
-   struct intel_rapl_init_fun *rapl_init;
struct rapl_model *rm;
-   bool apply_quirk;
int ret;
 
id = x86_match_cpu(rapl_model_match);
@@ -829,14 +760,7 @@ static int __init rapl_pmu_init(void)
rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
false, (void *) &rm->events);
 
-   id = x86_match_cpu(rapl_cpu_match);
-   if (!id)
-   return -ENODEV;
-
-   rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
-   apply_quirk = rapl_init->apply_quirk;
-
-   ret = rapl_check_hw_unit(apply_quirk);
+   ret = rapl_check_hw_unit(rm->apply_quirk);
if (ret)
return ret;
 
-- 
2.21.0

[PATCH 7/8] perf/x86/rapl: Get attributes from new probe framework

2019-06-16 Thread Jiri Olsa

We no longer need model specific attribute arrays,
because we get all this detected in rapl_events_attrs.

Signed-off-by: Jiri Olsa 
---
 arch/x86/events/intel/rapl.c | 89 
 1 file changed, 89 deletions(-)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 709a749a4ada..e4c4ff870845 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -416,87 +416,6 @@ RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, 
"2.3283064365386962890
 RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, 
"2.3283064365386962890625e-10");
 RAPL_EVENT_ATTR_STR(energy-psys.scale,   rapl_psys_scale, 
"2.3283064365386962890625e-10");
 
-static struct attribute *rapl_events_srv_attr[] = {
-   EVENT_PTR(rapl_cores),
-   EVENT_PTR(rapl_pkg),
-   EVENT_PTR(rapl_ram),
-
-   EVENT_PTR(rapl_cores_unit),
-   EVENT_PTR(rapl_pkg_unit),
-   EVENT_PTR(rapl_ram_unit),
-
-   EVENT_PTR(rapl_cores_scale),
-   EVENT_PTR(rapl_pkg_scale),
-   EVENT_PTR(rapl_ram_scale),
-   NULL,
-};
-
-static struct attribute *rapl_events_cln_attr[] = {
-   EVENT_PTR(rapl_cores),
-   EVENT_PTR(rapl_pkg),
-   EVENT_PTR(rapl_gpu),
-
-   EVENT_PTR(rapl_cores_unit),
-   EVENT_PTR(rapl_pkg_unit),
-   EVENT_PTR(rapl_gpu_unit),
-
-   EVENT_PTR(rapl_cores_scale),
-   EVENT_PTR(rapl_pkg_scale),
-   EVENT_PTR(rapl_gpu_scale),
-   NULL,
-};
-
-static struct attribute *rapl_events_hsw_attr[] = {
-   EVENT_PTR(rapl_cores),
-   EVENT_PTR(rapl_pkg),
-   EVENT_PTR(rapl_gpu),
-   EVENT_PTR(rapl_ram),
-
-   EVENT_PTR(rapl_cores_unit),
-   EVENT_PTR(rapl_pkg_unit),
-   EVENT_PTR(rapl_gpu_unit),
-   EVENT_PTR(rapl_ram_unit),
-
-   EVENT_PTR(rapl_cores_scale),
-   EVENT_PTR(rapl_pkg_scale),
-   EVENT_PTR(rapl_gpu_scale),
-   EVENT_PTR(rapl_ram_scale),
-   NULL,
-};
-
-static struct attribute *rapl_events_skl_attr[] = {
-   EVENT_PTR(rapl_cores),
-   EVENT_PTR(rapl_pkg),
-   EVENT_PTR(rapl_gpu),
-   EVENT_PTR(rapl_ram),
-   EVENT_PTR(rapl_psys),
-
-   EVENT_PTR(rapl_cores_unit),
-   EVENT_PTR(rapl_pkg_unit),
-   EVENT_PTR(rapl_gpu_unit),
-   EVENT_PTR(rapl_ram_unit),
-   EVENT_PTR(rapl_psys_unit),
-
-   EVENT_PTR(rapl_cores_scale),
-   EVENT_PTR(rapl_pkg_scale),
-   EVENT_PTR(rapl_gpu_scale),
-   EVENT_PTR(rapl_ram_scale),
-   EVENT_PTR(rapl_psys_scale),
-   NULL,
-};
-
-static struct attribute *rapl_events_knl_attr[] = {
-   EVENT_PTR(rapl_pkg),
-   EVENT_PTR(rapl_ram),
-
-   EVENT_PTR(rapl_pkg_unit),
-   EVENT_PTR(rapl_ram_unit),
-
-   EVENT_PTR(rapl_pkg_scale),
-   EVENT_PTR(rapl_ram_scale),
-   NULL,
-};
-
 /*
  * There are no default events, but we need to create
  * "events" group (with empty attrs) before updating
@@ -754,37 +673,30 @@ static int __init init_rapl_pmus(void)
 
 struct intel_rapl_init_fun {
bool apply_quirk;
-   struct attribute **attrs;
 };
 
 static const struct intel_rapl_init_fun snb_rapl_init __initconst = {
.apply_quirk = false,
-   .attrs = rapl_events_cln_attr,
 };
 
 static const struct intel_rapl_init_fun hsx_rapl_init __initconst = {
.apply_quirk = true,
-   .attrs = rapl_events_srv_attr,
 };
 
 static const struct intel_rapl_init_fun hsw_rapl_init __initconst = {
.apply_quirk = false,
-   .attrs = rapl_events_hsw_attr,
 };
 
 static const struct intel_rapl_init_fun snbep_rapl_init __initconst = {
.apply_quirk = false,
-   .attrs = rapl_events_srv_attr,
 };
 
 static const struct intel_rapl_init_fun knl_rapl_init __initconst = {
.apply_quirk = true,
-   .attrs = rapl_events_knl_attr,
 };
 
 static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
.apply_quirk = false,
-   .attrs = rapl_events_skl_attr,
 };
 
 static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
@@ -923,7 +835,6 @@ static int __init rapl_pmu_init(void)
 
rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
apply_quirk = rapl_init->apply_quirk;
-   rapl_pmu_events_group.attrs = rapl_init->attrs;
 
ret = rapl_check_hw_unit(apply_quirk);
if (ret)
-- 
2.21.0

[PATCHv3 0/8] perf/x86: Rework msr probe interface

2019-06-16 Thread Jiri Olsa

hi,
following up on [1], [2] and [3], this patchset adds update
attribute groups to pmu, factors out the MSR probe code and
use it in msr,cstate* and rapl PMUs.

The functionality stays the same with one exception:
for msr PMU: the event is not exported if the rdmsr return zero
on event's msr, cstate* and rapl pmu functionality stays.

And also: ;-)
> Somewhere along the line you lost the explanation of _why_ we're doing
> this; namely: virt sucks.

Also available in:
  git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
  perf/msr

Tested on snb and skylake servers.

v3 changes:
  - rebase on latest perf changes for Icelake models

v2 changes:
  - checking zero rdmsr only for rapl PMU events,
cstate* and msr pmu functionality stays unchanged

thanks,
jirka


[1] https://lore.kernel.org/lkml/20190301114250.GA23459@krava/
[2] https://lore.kernel.org/lkml/20190318182116.17388-1-jo...@kernel.org/
[3] https://lore.kernel.org/lkml/20190512155518.21468-1-jo...@kernel.org/
---
Jiri Olsa (8):
  perf/x86: Add msr probe interface
  perf/x86/msr: Use new probe function
  perf/x86/cstate: Use new probe function
  perf/x86/rapl: Use new msr detection interface
  perf/x86/rapl: Get rapl_cntr_mask from new probe framework
  perf/x86/rapl: Get msr values from new probe framework
  perf/x86/rapl: Get attributes from new probe framework
  perf/x86/rapl: Get quirk state from new probe framework

 arch/x86/events/Makefile   |   2 +-
 arch/x86/events/intel/cstate.c | 152 
+++-
 arch/x86/events/intel/rapl.c   | 380 
++--
 arch/x86/events/msr.c  | 110 +++
 arch/x86/events/probe.c|  45 +++
 arch/x86/events/probe.h|  29 ++
 6 files changed, 392 insertions(+), 326 deletions(-)
 create mode 100644 arch/x86/events/probe.c
 create mode 100644 arch/x86/events/probe.h

[PATCH 1/8] perf/x86: Add msr probe interface

2019-06-16 Thread Jiri Olsa

Adding perf_msr_probe function to provide interface for
checking up on MSR register and set the related attribute
group visibility.

User defines following struct for each MSR register:

  struct perf_msr {
   u64   msr;
   struct attribute_group   *grp;
   bool(*test)(int idx, void *data);
   bool  no_check;
  };

Where:
  msr  - is the MSR address
  attrs- is attribute groups array to add if the check passed
  test - is test function pointer
  no_check - is bool that bypass the check and adds the
  attribute without any test

The array of struct perf_msr is passed into:

  perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)

Together with:
  cnt  - which is the number of struct msr array elements
  data - which is user pointer passed to the test function
  zero - allow counters that returns zero on rdmsr

The perf_msr_probe will executed test code, read the MSR and
check the value is != 0. If all these tests pass, related
attribute group is kept visible.

Also adding PMU_EVENT_GROUP macro helper to define attribute
group for single attribute. It will be used in following patches.

Signed-off-by: Jiri Olsa 
---
 arch/x86/events/Makefile |  2 +-
 arch/x86/events/probe.c  | 45 
 arch/x86/events/probe.h  | 29 ++
 3 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/events/probe.c
 create mode 100644 arch/x86/events/probe.h

diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index 9cbfd34042d5..9e07f554333f 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-y  += core.o
+obj-y  += core.o probe.o
 obj-y  += amd/
 obj-$(CONFIG_X86_LOCAL_APIC)+= msr.o
 obj-$(CONFIG_CPU_SUP_INTEL)+= intel/
diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c
new file mode 100644
index ..c2ede2f3b277
--- /dev/null
+++ b/arch/x86/events/probe.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+#include "probe.h"
+
+static umode_t
+not_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+   return 0;
+}
+
+unsigned long
+perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data)
+{
+   unsigned long avail = 0;
+   unsigned int bit;
+   u64 val;
+
+   if (cnt >= BITS_PER_LONG)
+   return 0;
+
+   for (bit = 0; bit < cnt; bit++) {
+   if (!msr[bit].no_check) {
+   struct attribute_group *grp = msr[bit].grp;
+
+   grp->is_visible = not_visible;
+
+   if (msr[bit].test && !msr[bit].test(bit, data))
+   continue;
+   /* Virt sucks; you cannot tell if a R/O MSR is present 
:/ */
+   if (rdmsrl_safe(msr[bit].msr, &val))
+   continue;
+   /* Disable zero counters if requested. */
+   if (!zero && !val)
+   continue;
+
+   grp->is_visible = NULL;
+   }
+   avail |= BIT(bit);
+   }
+
+   return avail;
+}
+EXPORT_SYMBOL_GPL(perf_msr_probe);
diff --git a/arch/x86/events/probe.h b/arch/x86/events/probe.h
new file mode 100644
index ..4c8e0afc5fb5
--- /dev/null
+++ b/arch/x86/events/probe.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_X86_EVENTS_PROBE_H__
+#define __ARCH_X86_EVENTS_PROBE_H__
+#include 
+
+struct perf_msr {
+   u64   msr;
+   struct attribute_group   *grp;
+   bool(*test)(int idx, void *data);
+   bool  no_check;
+};
+
+unsigned long
+perf_msr_probe(struct perf_msr *msr, int cnt, bool no_zero, void *data);
+
+#define __PMU_EVENT_GROUP(_name)   \
+static struct attribute *attrs_##_name[] = {   \
+   &attr_##_name.attr.attr,\
+   NULL,   \
+}
+
+#define PMU_EVENT_GROUP(_grp, _name)   \
+__PMU_EVENT_GROUP(_name);  \
+static struct attribute_group group_##_name = {\
+   .name  = #_grp, \
+   .attrs = attrs_##_name, \
+}
+
+#endif /* __ARCH_X86_EVENTS_PROBE_H__ */
-- 
2.21.0

[PATCH 3/8] perf/x86/cstate: Use new probe function

2019-06-16 Thread Jiri Olsa

Using perf_msr_probe function to probe for cstate events.

The functionality is the same, with one exception, that
perf_msr_probe checks for rdmsr to return value != 0 for
given MSR register.

Using the new attribute groups and adding the events via
pmu::attr_update.

Signed-off-by: Jiri Olsa 
---
 arch/x86/events/intel/cstate.c | 152 +++--
 1 file changed, 87 insertions(+), 65 deletions(-)

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index e1caa0b49d63..688592b34564 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -96,6 +96,7 @@
 #include 
 #include 
 #include "../perf_event.h"
+#include "../probe.h"
 
 MODULE_LICENSE("GPL");
 
@@ -144,25 +145,42 @@ enum perf_cstate_core_events {
PERF_CSTATE_CORE_EVENT_MAX,
 };
 
-PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c1-residency, attr_cstate_core_c1, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_core_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_core_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_core_c7, "event=0x03");
 
-static struct perf_cstate_msr core_msr[] = {
-   [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,  
&evattr_cstate_core_c1 },
-   [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,
&evattr_cstate_core_c3 },
-   [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,
&evattr_cstate_core_c6 },
-   [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,
&evattr_cstate_core_c7 },
+static unsigned long core_msr_mask;
+
+PMU_EVENT_GROUP(events, cstate_core_c1);
+PMU_EVENT_GROUP(events, cstate_core_c3);
+PMU_EVENT_GROUP(events, cstate_core_c6);
+PMU_EVENT_GROUP(events, cstate_core_c7);
+
+static bool test_msr(int idx, void *data)
+{
+   return test_bit(idx, (unsigned long *) data);
+}
+
+static struct perf_msr core_msr[] = {
+   [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,  
&group_cstate_core_c1,  test_msr },
+   [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,
&group_cstate_core_c3,  test_msr },
+   [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,
&group_cstate_core_c6,  test_msr },
+   [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,
&group_cstate_core_c7,  test_msr },
 };
 
-static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
+static struct attribute *attrs_empty[] = {
NULL,
 };
 
+/*
+ * There are no default events, but we need to create
+ * "events" group (with empty attrs) before updating
+ * it with detected events.
+ */
 static struct attribute_group core_events_attr_group = {
.name = "events",
-   .attrs = core_events_attrs,
+   .attrs = attrs_empty,
 };
 
 DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
@@ -211,31 +229,37 @@ enum perf_cstate_pkg_events {
PERF_CSTATE_PKG_EVENT_MAX,
 };
 
-PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
-PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
-PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
-PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
-
-static struct perf_cstate_msr pkg_msr[] = {
-   [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,  
&evattr_cstate_pkg_c2 },
-   [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,  
&evattr_cstate_pkg_c3 },
-   [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,  
&evattr_cstate_pkg_c6 },
-   [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,  
&evattr_cstate_pkg_c7 },
-   [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,  
&evattr_cstate_pkg_c8 },
-   [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,  
&evattr_cstate_pkg_c9 },
-   [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,
&evattr_cstate_pkg_c10 },
-};
-
-static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
-   NULL,
+PMU_EVENT_ATTR_STRING(c2-residency,  attr_cstate_pkg_c2,  "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency,  attr_cstate_pkg_c3,  "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency,  attr_cstate_pkg_c6,  "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency,  attr_cstate_pkg_c7,  "event=0x03");
+PMU_EVENT_ATTR_STRING(c8-residency,  attr_cstate_pkg_c8,  "event=0x04");
+PMU_EVENT_ATTR_STRING(c9-residency,  attr_cstate_pkg_c9,  "event=0x05");
+PMU_EVENT_ATTR_STRING(c10-residency, attr_cstate_pkg_c10, "ev

[PATCH 4/8] perf/x86/rapl: Use new msr detection interface

2019-06-16 Thread Jiri Olsa

Using perf_msr_probe function to probe for rapl msrs.

Adding new rapl_model_match device table, that
gathers events info for given model, following
the msr and cstate module design.

It will replace the current rapl_cpu_match device
table and detection code in following patches.

Signed-off-by: Jiri Olsa 
---
 arch/x86/events/intel/rapl.c | 192 ++-
 1 file changed, 191 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 798135419a62..fa6d8065db15 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -58,6 +58,7 @@
 #include 
 #include 
 #include "../perf_event.h"
+#include "../probe.h"
 
 MODULE_LICENSE("GPL");
 
@@ -76,6 +77,17 @@ MODULE_LICENSE("GPL");
 #define INTEL_RAPL_PSYS0x5 /* pseudo-encoding */
 
 #define NR_RAPL_DOMAINS 0x5
+
+enum perf_rapl_events {
+   PERF_RAPL_PP0 = 0,  /* all cores */
+   PERF_RAPL_PKG,  /* entire package */
+   PERF_RAPL_RAM,  /* DRAM */
+   PERF_RAPL_PP1,  /* gpu */
+   PERF_RAPL_PSYS, /* psys */
+
+   PERF_RAPL_MAX,
+};
+
 static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"pp0-core",
"package",
@@ -153,6 +165,11 @@ struct rapl_pmus {
struct rapl_pmu *pmus[];
 };
 
+struct rapl_model {
+   unsigned long   events;
+   boolapply_quirk;
+};
+
  /* 1/2^hw_unit Joule */
 static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
 static struct rapl_pmus *rapl_pmus;
@@ -538,9 +555,18 @@ static struct attribute *rapl_events_knl_attr[] = {
NULL,
 };
 
+/*
+ * There are no default events, but we need to create
+ * "events" group (with empty attrs) before updating
+ * it with detected events.
+ */
+static struct attribute *attrs_empty[] = {
+   NULL,
+};
+
 static struct attribute_group rapl_pmu_events_group = {
.name = "events",
-   .attrs = NULL, /* patched at runtime */
+   .attrs = attrs_empty,
 };
 
 DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
@@ -561,6 +587,79 @@ static const struct attribute_group *rapl_attr_groups[] = {
NULL,
 };
 
+static struct attribute *rapl_events_cores[] = {
+   EVENT_PTR(rapl_cores),
+   EVENT_PTR(rapl_cores_unit),
+   EVENT_PTR(rapl_cores_scale),
+   NULL,
+};
+
+static struct attribute_group rapl_events_cores_group = {
+   .name  = "events",
+   .attrs = rapl_events_cores,
+};
+
+static struct attribute *rapl_events_pkg[] = {
+   EVENT_PTR(rapl_pkg),
+   EVENT_PTR(rapl_pkg_unit),
+   EVENT_PTR(rapl_pkg_scale),
+   NULL,
+};
+
+static struct attribute_group rapl_events_pkg_group = {
+   .name  = "events",
+   .attrs = rapl_events_pkg,
+};
+
+static struct attribute *rapl_events_ram[] = {
+   EVENT_PTR(rapl_ram),
+   EVENT_PTR(rapl_ram_unit),
+   EVENT_PTR(rapl_ram_scale),
+   NULL,
+};
+
+static struct attribute_group rapl_events_ram_group = {
+   .name  = "events",
+   .attrs = rapl_events_ram,
+};
+
+static struct attribute *rapl_events_gpu[] = {
+   EVENT_PTR(rapl_gpu),
+   EVENT_PTR(rapl_gpu_unit),
+   EVENT_PTR(rapl_gpu_scale),
+   NULL,
+};
+
+static struct attribute_group rapl_events_gpu_group = {
+   .name  = "events",
+   .attrs = rapl_events_gpu,
+};
+
+static struct attribute *rapl_events_psys[] = {
+   EVENT_PTR(rapl_psys),
+   EVENT_PTR(rapl_psys_unit),
+   EVENT_PTR(rapl_psys_scale),
+   NULL,
+};
+
+static struct attribute_group rapl_events_psys_group = {
+   .name  = "events",
+   .attrs = rapl_events_psys,
+};
+
+static bool test_msr(int idx, void *data)
+{
+   return test_bit(idx, (unsigned long *) data);
+}
+
+static struct perf_msr rapl_msrs[] = {
+   [PERF_RAPL_PP0]  = { MSR_PP0_ENERGY_STATUS,  
&rapl_events_cores_group, test_msr },
+   [PERF_RAPL_PKG]  = { MSR_PKG_ENERGY_STATUS,  
&rapl_events_pkg_group,   test_msr },
+   [PERF_RAPL_RAM]  = { MSR_DRAM_ENERGY_STATUS, 
&rapl_events_ram_group,   test_msr },
+   [PERF_RAPL_PP1]  = { MSR_PP1_ENERGY_STATUS,  
&rapl_events_gpu_group,   test_msr },
+   [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, 
&rapl_events_psys_group,  test_msr },
+};
+
 static int rapl_cpu_offline(unsigned int cpu)
 {
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
@@ -674,6 +773,15 @@ static void cleanup_rapl_pmus(void)
kfree(rapl_pmus);
 }
 
+const struct attribute_group *rapl_attr_update[] = {
+   &rapl_events_cores_group,
+   &rapl_events_pkg_group,
+   &rapl_events_ram_group,
+   &rapl_events_gpu_group,
+   &rapl_events_gpu_group,
+   NULL,
+};
+
 static int __init init_rapl_pmus(void)
 {
int maxdie = topology_max_packages() * topology_max_die_per_package();
@@ -686,6 +794,7 @@ static int __init init_rapl_pmus(void)
 
rapl_pmus->maxdie

[PATCH 5/8] perf/x86/rapl: Get rapl_cntr_mask from new probe framework

2019-06-16 Thread Jiri Olsa

We get rapl_cntr_mask from perf_msr_probe call, as a replacement
for current intel_rapl_init_fun::cntr_mask value for each model.

Signed-off-by: Jiri Olsa 
---
 arch/x86/events/intel/rapl.c | 38 ++--
 1 file changed, 2 insertions(+), 36 deletions(-)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index fa6d8065db15..417de3fdde61 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -96,33 +96,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] 
__initconst = {
"psys",
 };
 
-/* Clients have PP0, PKG */
-#define RAPL_IDX_CLN   (1driver_data;
-   perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, false, (void *) &rm->events);
+   rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
+   false, (void *) &rm->events);
 
id = x86_match_cpu(rapl_cpu_match);
if (!id)
@@ -987,7 +954,6 @@ static int __init rapl_pmu_init(void)
 
rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
apply_quirk = rapl_init->apply_quirk;
-   rapl_cntr_mask = rapl_init->cntr_mask;
rapl_pmu_events_group.attrs = rapl_init->attrs;
 
ret = rapl_check_hw_unit(apply_quirk);
-- 
2.21.0

[PATCH 6/8] perf/x86/rapl: Get msr values from new probe framework

2019-06-16 Thread Jiri Olsa

There's no need to have special code for getting
the bit and msr value for given event. We can
now easily get it from rapl_msrs array.

Also getting rid of RAPL_IDX_*, which is no longer
needed and replacing INTEL_RAPL* with PERF_RAPL*
enums.

Signed-off-by: Jiri Olsa 
---
 arch/x86/events/intel/rapl.c | 53 
 1 file changed, 11 insertions(+), 42 deletions(-)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 417de3fdde61..709a749a4ada 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -55,6 +55,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include "../perf_event.h"
@@ -65,19 +66,6 @@ MODULE_LICENSE("GPL");
 /*
  * RAPL energy status counters
  */
-#define RAPL_IDX_PP0_NRG_STAT  0   /* all cores */
-#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */
-#define RAPL_IDX_PKG_NRG_STAT  1   /* entire package */
-#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */
-#define RAPL_IDX_RAM_NRG_STAT  2   /* DRAM */
-#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
-#define RAPL_IDX_PP1_NRG_STAT  3   /* gpu */
-#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
-#define RAPL_IDX_PSYS_NRG_STAT 4   /* psys */
-#define INTEL_RAPL_PSYS0x5 /* pseudo-encoding */
-
-#define NR_RAPL_DOMAINS 0x5
-
 enum perf_rapl_events {
PERF_RAPL_PP0 = 0,  /* all cores */
PERF_RAPL_PKG,  /* entire package */
@@ -86,6 +74,7 @@ enum perf_rapl_events {
PERF_RAPL_PSYS, /* psys */
 
PERF_RAPL_MAX,
+   NR_RAPL_DOMAINS = PERF_RAPL_MAX,
 };
 
 static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
@@ -149,6 +138,7 @@ static struct rapl_pmus *rapl_pmus;
 static cpumask_t rapl_cpu_mask;
 static unsigned int rapl_cntr_mask;
 static u64 rapl_timer_ms;
+static struct perf_msr rapl_msrs[];
 
 static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
 {
@@ -340,7 +330,7 @@ static void rapl_pmu_event_del(struct perf_event *event, 
int flags)
 static int rapl_pmu_event_init(struct perf_event *event)
 {
u64 cfg = event->attr.config & RAPL_EVENT_MASK;
-   int bit, msr, ret = 0;
+   int bit, ret = 0;
struct rapl_pmu *pmu;
 
/* only look at RAPL events */
@@ -356,33 +346,12 @@ static int rapl_pmu_event_init(struct perf_event *event)
 
event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
 
-   /*
-* check event is known (determines counter)
-*/
-   switch (cfg) {
-   case INTEL_RAPL_PP0:
-   bit = RAPL_IDX_PP0_NRG_STAT;
-   msr = MSR_PP0_ENERGY_STATUS;
-   break;
-   case INTEL_RAPL_PKG:
-   bit = RAPL_IDX_PKG_NRG_STAT;
-   msr = MSR_PKG_ENERGY_STATUS;
-   break;
-   case INTEL_RAPL_RAM:
-   bit = RAPL_IDX_RAM_NRG_STAT;
-   msr = MSR_DRAM_ENERGY_STATUS;
-   break;
-   case INTEL_RAPL_PP1:
-   bit = RAPL_IDX_PP1_NRG_STAT;
-   msr = MSR_PP1_ENERGY_STATUS;
-   break;
-   case INTEL_RAPL_PSYS:
-   bit = RAPL_IDX_PSYS_NRG_STAT;
-   msr = MSR_PLATFORM_ENERGY_STATUS;
-   break;
-   default:
+   if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
return -EINVAL;
-   }
+
+   cfg = array_index_nospec(cfg, NR_RAPL_DOMAINS + 1);
+   bit = cfg - 1;
+
/* check event supported */
if (!(rapl_cntr_mask & (1 << bit)))
return -EINVAL;
@@ -397,7 +366,7 @@ static int rapl_pmu_event_init(struct perf_event *event)
return -EINVAL;
event->cpu = pmu->cpu;
event->pmu_private = pmu;
-   event->hw.event_base = msr;
+   event->hw.event_base = rapl_msrs[bit].msr;
event->hw.config = cfg;
event->hw.idx = bit;
 
@@ -705,7 +674,7 @@ static int rapl_check_hw_unit(bool apply_quirk)
 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
 */
if (apply_quirk)
-   rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+   rapl_hw_unit[PERF_RAPL_RAM] = 16;
 
/*
 * Calculate the timer rate:
-- 
2.21.0

[PATCH 2/8] perf/x86/msr: Use new probe function

2019-06-16 Thread Jiri Olsa

Using perf_msr_probe function to probe for msr events.

The functionality is the same, with one exception, that
perf_msr_probe checks for rdmsr to return value != 0 for
given MSR register.

Using the new attribute groups and adding the events via
pmu::attr_update.

Signed-off-by: Jiri Olsa 
---
 arch/x86/events/msr.c | 110 +++---
 1 file changed, 60 insertions(+), 50 deletions(-)

diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index f3f4c2263501..9431447541e9 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 #include 
+#include 
 #include 
 #include 
+#include "probe.h"
 
 enum perf_msr_id {
PERF_MSR_TSC= 0,
@@ -12,32 +14,30 @@ enum perf_msr_id {
PERF_MSR_PTSC   = 5,
PERF_MSR_IRPERF = 6,
PERF_MSR_THERM  = 7,
-   PERF_MSR_THERM_SNAP = 8,
-   PERF_MSR_THERM_UNIT = 9,
PERF_MSR_EVENT_MAX,
 };
 
-static bool test_aperfmperf(int idx)
+static bool test_aperfmperf(int idx, void *data)
 {
return boot_cpu_has(X86_FEATURE_APERFMPERF);
 }
 
-static bool test_ptsc(int idx)
+static bool test_ptsc(int idx, void *data)
 {
return boot_cpu_has(X86_FEATURE_PTSC);
 }
 
-static bool test_irperf(int idx)
+static bool test_irperf(int idx, void *data)
 {
return boot_cpu_has(X86_FEATURE_IRPERF);
 }
 
-static bool test_therm_status(int idx)
+static bool test_therm_status(int idx, void *data)
 {
return boot_cpu_has(X86_FEATURE_DTHERM);
 }
 
-static bool test_intel(int idx)
+static bool test_intel(int idx, void *data)
 {
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_data.x86 != 6)
@@ -98,37 +98,51 @@ static bool test_intel(int idx)
return false;
 }
 
-struct perf_msr {
-   u64 msr;
-   struct  perf_pmu_events_attr *attr;
-   bool(*test)(int idx);
+PMU_EVENT_ATTR_STRING(tsc, attr_tsc,   
"event=0x00");
+PMU_EVENT_ATTR_STRING(aperf,   attr_aperf, 
"event=0x01");
+PMU_EVENT_ATTR_STRING(mperf,   attr_mperf, 
"event=0x02");
+PMU_EVENT_ATTR_STRING(pperf,   attr_pperf, 
"event=0x03");
+PMU_EVENT_ATTR_STRING(smi, attr_smi,   
"event=0x04");
+PMU_EVENT_ATTR_STRING(ptsc,attr_ptsc,  
"event=0x05");
+PMU_EVENT_ATTR_STRING(irperf,  attr_irperf,
"event=0x06");
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin,  attr_therm, 
"event=0x07");
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, attr_therm_snap,
"1" );
+PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, attr_therm_unit,
"C" );
+
+static unsigned long msr_mask;
+
+PMU_EVENT_GROUP(events, aperf);
+PMU_EVENT_GROUP(events, mperf);
+PMU_EVENT_GROUP(events, pperf);
+PMU_EVENT_GROUP(events, smi);
+PMU_EVENT_GROUP(events, ptsc);
+PMU_EVENT_GROUP(events, irperf);
+
+static struct attribute *attrs_therm[] = {
+   &attr_therm.attr.attr,
+   &attr_therm_snap.attr.attr,
+   &attr_therm_unit.attr.attr,
+   NULL,
 };
 
-PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, 
"event=0x00");
-PMU_EVENT_ATTR_STRING(aperf,   evattr_aperf,   
"event=0x01");
-PMU_EVENT_ATTR_STRING(mperf,   evattr_mperf,   
"event=0x02");
-PMU_EVENT_ATTR_STRING(pperf,   evattr_pperf,   
"event=0x03");
-PMU_EVENT_ATTR_STRING(smi, evattr_smi, 
"event=0x04");
-PMU_EVENT_ATTR_STRING(ptsc,evattr_ptsc,
"event=0x05");
-PMU_EVENT_ATTR_STRING(irperf,  evattr_irperf,  
"event=0x06");
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin,  evattr_therm,   
"event=0x07");
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap,  
"1" );
-PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit,  
"C" );
+static struct attribute_group group_therm = {
+   .name  = "events",
+   .attrs = attrs_therm,
+};
 
 static struct perf_msr msr[] = {
-   [PERF_MSR_TSC]  = { 0,  &evattr_tsc,
NULL,   },
-   [PERF_MSR_APERF]= { MSR_IA32_APERF, &evattr_aperf,  
test_aperfmperf,},
-   [PERF_MSR_MPERF]= { MSR_IA32_MPERF, &evattr_mperf,  
test_aperfmperf,},
-   [PERF_MSR_PPERF]= { MSR_PPERF,  &evattr_pperf,  
test_intel,

Dear friend

2019-06-16 Thread Umar Bello

-- 


Dear friend
I am contacting you on a business deal of $17.5 Million US Dollars,
ready for transfer into your account
if we make this claim, we will share it 60%/40%.
100% risk free and it will be legally backed up with government
approved If you are interested reply for more details.

Best regards,
Umar Bello
 +226 68874958

Re: linux-next: Fixes tag needs some work in the clockevents tree

2019-06-16 Thread Dmitry Osipenko

16.06.2019 16:24, Stephen Rothwell пишет:
> Hi Daniel,
> 
> [Sorry for the slow response.]
> 
> On Thu, 13 Jun 2019 08:52:21 +0200 Daniel Lezcano  
> wrote:
>>
>> actually it returns:
>>
>> git log -1 --format='Fixes: %h ("%s")' 3be2a85a0b61
>>
>> Fixes: 3be2a85a0b61 ("clocksource/drivers/tegra: Support per-CPU timers on 
>> all Tegra's")
> 
> Indeed.
> 
>> Is it ok to shorten the subject?
> 
> I figure it is easier to just use the "git log" result and to give
> anyone (or any script) the wants to use the Fixes tag as much
> information as possible.
> 

Daniel, I'd also recommend to shorten the common subsys prefix in general to 
something
like "clocksource: tegra:".

[PATCH] xtensa/PCI: Remove unused variable

2019-06-16 Thread Guenter Roeck

gcc reports:

arch/xtensa/kernel/pci.c:40:32: warning:
'pci_ctrl_tail' defined but not used

which is indeed the case.

Signed-off-by: Guenter Roeck 
---
 arch/xtensa/kernel/pci.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 8b823f94e568..e0235e34e1ba 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -37,7 +37,6 @@
  */
 
 static struct pci_controller *pci_ctrl_head;
-static struct pci_controller **pci_ctrl_tail = &pci_ctrl_head;
 
 static int pci_bus_count;
 
-- 
2.7.4

[PATCH] perf/x86/intel: Disable check_msr for real hw

2019-06-16 Thread Jiri Olsa

On Fri, Jun 14, 2019 at 09:45:21AM -0400, Liang, Kan wrote:
> 
> 
> On 6/14/2019 7:28 AM, Jiri Olsa wrote:
> > hi,
> > the HPE server can do POST tracing and have enabled LBR
> > tracing during the boot, which makes check_msr fail falsly.
> > 
> > It looks like check_msr code was added only to check on guests
> > MSR access, would it be then ok to disable check_msr for real
> > hardware? (as in patch below)
> 
> Yes, the check_msr patch was to fix a bug report in guest.
> I didn't get similar bug report for real hardware.
> I think it should be OK to disable it for real hardware.
> 

thanks for confirmation, attaching the full patch

thanks,
jirka


---
Tom Vaden reported false failure of check_msr function, because
some servers can do POST tracing and enable LBR tracing during
the boot.

Kan confirmed that check_msr patch was to fix a bug report in
guest, so it's ok to disable it for real HW.

Cc: Kan Liang 
Reported-by: Tom Vaden 
Signed-off-by: Jiri Olsa 
---
 arch/x86/events/intel/core.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 71001f005bfe..1194ae7e1992 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "../perf_event.h"
 
@@ -4050,6 +4051,13 @@ static bool check_msr(unsigned long msr, u64 mask)
 {
u64 val_old, val_new, val_tmp;
 
+   /*
+* Disable the check for real HW, so we don't
+* mess up with potentionaly enabled regs.
+*/
+   if (hypervisor_is_type(X86_HYPER_NATIVE))
+   return true;
+
/*
 * Read the current value, change it and read it back to see if it
 * matches, this is needed to detect certain hardware emulators
-- 
2.21.0

Re: linux-next: Fixes tag needs some work in the clockevents tree

2019-06-16 Thread Daniel Lezcano

On 16/06/2019 16:08, Dmitry Osipenko wrote:
> 16.06.2019 16:24, Stephen Rothwell пишет:
>> Hi Daniel,
>>
>> [Sorry for the slow response.]
>>
>> On Thu, 13 Jun 2019 08:52:21 +0200 Daniel Lezcano 
>>  wrote:
>>>
>>> actually it returns:
>>>
>>> git log -1 --format='Fixes: %h ("%s")' 3be2a85a0b61
>>>
>>> Fixes: 3be2a85a0b61 ("clocksource/drivers/tegra: Support per-CPU timers on 
>>> all Tegra's")
>>
>> Indeed.
>>
>>> Is it ok to shorten the subject?
>>
>> I figure it is easier to just use the "git log" result and to give
>> anyone (or any script) the wants to use the Fixes tag as much
>> information as possible.
>>
> 
> Daniel, I'd also recommend to shorten the common subsys prefix in general to 
> something
> like "clocksource: tegra:".

Actually I can't, I have been asked by the tip team to follow this format.


-- 
  Linaro.org │ Open source software for ARM SoCs

Follow Linaro:   Facebook |
 Twitter |
 Blog

Re: [PATCH] arm64: dts: rockchip: Update DWC3 modules on RK3399 SoCs

2019-06-16 Thread Heiko Stübner

Am Donnerstag, 13. Juni 2019, 19:44:43 CEST schrieb Robin Murphy:
> On 13/06/2019 18:20, Enric Balletbo i Serra wrote:
> > Hi Robin,
> > 
> > On 13/6/19 18:56, Robin Murphy wrote:
> >> On 13/06/2019 17:27, Enric Balletbo i Serra wrote:
> >>> As per binding documentation [1], the DWC3 core should have the "ref",
> >>> "bus_early" and "suspend" clocks. As explained in the binding, those
> >>> clocks are required for new platforms but not for existing platforms
> >>> before commit fe8abf332b8f ("usb: dwc3: support clocks and resets for
> >>> DWC3 core").
> >>>
> >>> However, as those clocks are really treated as required, this ends with
> >>> having some annoying messages when the "rockchip,rk3399-dwc3" is used:
> >>>
> >>> [1.724107] dwc3 fe80.dwc3: Failed to get clk 'ref': -2
> >>> [1.731893] dwc3 fe90.dwc3: Failed to get clk 'ref': -2
> >>> [2.495937] dwc3 fe80.dwc3: Failed to get clk 'ref': -2
> >>> [2.647239] dwc3 fe90.dwc3: Failed to get clk 'ref': -2
> >>>
> >>> In order to remove those annoying messages, update the DWC3 hardware
> >>> module node and add all the required clocks. With this change, both, the
> >>> glue node and the DWC3 core node, have the clocks defined, but that's
> >>> not really a problem and there isn't a side effect on do this. So, we
> >>> can get rid of the annoying get clk error messages.
> >>
> >> Can we not just move these clocks entirely from the glue layer to the core
> >> layer? That didn't seem to break when I tried it, although I'll admit my
> >> 'testing' was no more than booting and mounting a USB 3.0 flash drive, no
> >> suspend or anything fancy.
> >>
> > 
> > AFAICT usb doesn't break, but we won't break backward compability then? (/me
> > still doesn't know when backward compability is really important or not)
> 
> Ah, fair point - I was imagining the glue layer's heavy-handed "turn 
> everything on" approach potentially interfering with the core layer's 
> ability to exert finer-grained control of the clocks it knows more 
> about, but the "old kernel, new DT" situation might indeed be a problem. 
> I guess that's Heiko's call in the end.

I really try to stay with the "backwards-compatible" philosophy
(at least with the old dt with new kernel variant) and so far that worked
surprisingly well ;-) .

Personally I don't believe in the other direction, old kernel with new dt,
as we're always adding features, so updating the devicetree without
updating the kernel doesn't look like it would make very much sense.

So if you just keep the old-dt+new-kernel variant intact, I'll be happy.


Heiko



> Plus I'm no CCF expert so my 
> concern might be unfounded anyway.
> 
> >> My own attempt to shut up these errors got sidetracked into c0c61471ef86 
> >> ("usb:
> >> dwc3: of-simple: Convert to bulk clk API"), then apparently stalled :)
> >>
> > 
> > There was any off the record discussion and stalled or simply you didn't get
> > feedback?
> 
> More that the 3399 board got put away in a freak "tidying the kitchen" 
> incident, and I've just had too many other things on the go since :)
> 
> Robin.
> 
> > 
> > I'll take a look.
> > 
> > Thanks,
> > ~ Enric
> > 
> >> Robin.
> >>
> >>>
> >>> [1] Documentation/devicetree/bindings/usb/dwc3.txt
> >>>
> >>> Signed-off-by: Enric Balletbo i Serra 
> >>> ---
> >>>
> >>>arch/arm64/boot/dts/rockchip/rk3399.dtsi | 6 ++
> >>>1 file changed, 6 insertions(+)
> >>>
> >>> diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
> >>> b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
> >>> index 196ac9b78076..a15348d185ce 100644
> >>> --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
> >>> +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
> >>> @@ -414,6 +414,9 @@
> >>>compatible = "snps,dwc3";
> >>>reg = <0x0 0xfe80 0x0 0x10>;
> >>>interrupts = ;
> >>> +clocks = <&cru SCLK_USB3OTG0_REF>, <&cru ACLK_USB3OTG0>,
> >>> + <&cru SCLK_USB3OTG0_SUSPEND>;
> >>> +clock-names = "ref", "bus_early", "suspend";
> >>>dr_mode = "otg";
> >>>phys = <&u2phy0_otg>, <&tcphy0_usb3>;
> >>>phy-names = "usb2-phy", "usb3-phy";
> >>> @@ -447,6 +450,9 @@
> >>>compatible = "snps,dwc3";
> >>>reg = <0x0 0xfe90 0x0 0x10>;
> >>>interrupts = ;
> >>> +clocks = <&cru SCLK_USB3OTG1_REF>, <&cru ACLK_USB3OTG1>,
> >>> + <&cru SCLK_USB3OTG1_SUSPEND>;
> >>> +clock-names = "ref", "bus_early", "suspend";
> >>>dr_mode = "otg";
> >>>phys = <&u2phy1_otg>, <&tcphy1_usb3>;
> >>>phy-names = "usb2-phy", "usb3-phy";
> >>>
> > 
> > ___
> > linux-arm-kernel mailing list
> > linux-arm-ker...@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> > 
>

Re: [PATCH v2 2/3] HID: apple-ib-tb: Add driver for the Touch Bar on MacBook Pro's.

2019-06-16 Thread Jonathan Cameron

On Wed, 12 Jun 2019 01:33:59 -0700
Ronald Tschalär  wrote:

> This driver enables basic touch bar functionality: enabling it, switching
> between modes on FN key press, and dimming and turning the display
> off/on when idle/active.
> 
> Signed-off-by: Ronald Tschalär 
A few minor comments inline from me, but as before well outside of my
areas of knowledge!

Jonathan

> ---
>  drivers/hid/Kconfig   |   10 +
>  drivers/hid/Makefile  |1 +
>  drivers/hid/apple-ib-tb.c | 1389 +
>  3 files changed, 1400 insertions(+)
>  create mode 100644 drivers/hid/apple-ib-tb.c
> 
> diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
> index 545d3691fc1c..7621c2500d71 100644
> --- a/drivers/hid/Kconfig
> +++ b/drivers/hid/Kconfig
> @@ -149,6 +149,16 @@ config HID_APPLE_IBRIDGE
> To compile this driver as a module, choose M here: the
> module will be called apple-ibridge.
>  
> +config HID_APPLE_IBRIDGE_TB
> + tristate "Apple iBridge Touch Bar"
> + depends on HID_APPLE_IBRIDGE
> + ---help---
> + Say Y here if you want support for the Touch Bar on recent
> + MacBook Pros.
> +
> + To compile this driver as a module, choose M here: the
> + module will be called apple-ib-tb.
> +
>  config HID_APPLEIR
>   tristate "Apple infrared receiver"
>   depends on (USB_HID)
> diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
> index a4da5663a541..0c46e5f70db1 100644
> --- a/drivers/hid/Makefile
> +++ b/drivers/hid/Makefile
> @@ -27,6 +27,7 @@ obj-$(CONFIG_HID_ALPS)  += hid-alps.o
>  obj-$(CONFIG_HID_ACRUX)  += hid-axff.o
>  obj-$(CONFIG_HID_APPLE)  += hid-apple.o
>  obj-$(CONFIG_HID_APPLE_IBRIDGE)  += apple-ibridge.o
> +obj-$(CONFIG_HID_APPLE_IBRIDGE_TB)   += apple-ib-tb.o
>  obj-$(CONFIG_HID_APPLEIR)+= hid-appleir.o
>  obj-$(CONFIG_HID_ASUS)   += hid-asus.o
>  obj-$(CONFIG_HID_AUREAL) += hid-aureal.o
> diff --git a/drivers/hid/apple-ib-tb.c b/drivers/hid/apple-ib-tb.c
> new file mode 100644
> index ..6daee80060ce
> --- /dev/null
> +++ b/drivers/hid/apple-ib-tb.c
> @@ -0,0 +1,1389 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Apple Touch Bar Driver
> + *
> + * Copyright (c) 2017-2018 Ronald Tschalär
> + */
> +
> +/*
> + * Recent MacBookPro models (13,[23] and 14,[23]) have a touch bar, which
> + * is exposed via several USB interfaces. MacOS supports a fancy mode
> + * where arbitrary buttons can be defined; this driver currently only
> + * supports the simple mode that consists of 3 predefined layouts
> + * (escape-only, esc + special keys, and esc + function keys).
> + *
> + * The first USB HID interface supports two reports, an input report that
> + * is used to report the key presses, and an output report which can be
> + * used to set the touch bar "mode": touch bar off (in which case no touches
> + * are reported at all), escape key only, escape + 12 function keys, and
> + * escape + several special keys (including brightness, audio volume,
> + * etc). The second interface supports several, complex reports, most of
> + * which are unknown at this time, but one of which has been determined to
> + * allow for controlling of the touch bar's brightness: off (though touches
> + * are still reported), dimmed, and full brightness. This driver makes
> + * use of these two reports.
> + */
> +
> +#define dev_fmt(fmt) "tb: " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#define HID_UP_APPLE 0xff12
> +#define HID_USAGE_MODE   (HID_UP_CUSTOM | 0x0004)
> +#define HID_USAGE_APPLE_APP  (HID_UP_APPLE  | 0x0001)
> +#define HID_USAGE_DISP   (HID_UP_APPLE  | 0x0021)
> +#define HID_USAGE_DISP_AUX1  (HID_UP_APPLE  | 0x0020)
> +
> +#define APPLETB_MAX_TB_KEYS  13  /* ESC, F1-F12 */
> +
> +#define APPLETB_CMD_MODE_ESC 0
> +#define APPLETB_CMD_MODE_FN  1
> +#define APPLETB_CMD_MODE_SPCL2
> +#define APPLETB_CMD_MODE_OFF 3
> +#define APPLETB_CMD_MODE_UPD 254
> +#define APPLETB_CMD_MODE_NONE255
> +
> +#define APPLETB_CMD_DISP_ON  1
> +#define APPLETB_CMD_DISP_DIM 2
> +#define APPLETB_CMD_DISP_OFF 4
> +#define APPLETB_CMD_DISP_UPD 254
> +#define APPLETB_CMD_DISP_NONE255
> +
> +#define APPLETB_FN_MODE_FKEYS0
> +#define APPLETB_FN_MODE_NORM 1
> +#define APPLETB_FN_MODE_INV  2
> +#define APPLETB_FN_MODE_SPCL 3
> +#define APPLETB_FN_MODE_MAX  APPLETB_FN_MODE_SPCL
> +
> +#define APPLETB_DEVID_KEYBOARD   1
> +#define APPLETB_DEVID_TOUCHPAD   2
> +
> +#define APPLETB_MAX_DIM_TIME 30
> +
> +static int appletb_tb_def_idle_timeout = 5 * 60;
> +module_param_named(idle_timeout, appletb_tb_def_idle_timeout, int, 0444);
> +MODULE_PARM_DESC(idle_timeout, "Default touch bar idle timeout:\n"
> +">0 - turn touch bar display off after no

Re: linux-next: Fixes tag needs some work in the clockevents tree

2019-06-16 Thread Dmitry Osipenko

16.06.2019 17:18, Daniel Lezcano пишет:
> On 16/06/2019 16:08, Dmitry Osipenko wrote:
>> 16.06.2019 16:24, Stephen Rothwell пишет:
>>> Hi Daniel,
>>>
>>> [Sorry for the slow response.]
>>>
>>> On Thu, 13 Jun 2019 08:52:21 +0200 Daniel Lezcano 
>>>  wrote:

 actually it returns:

 git log -1 --format='Fixes: %h ("%s")' 3be2a85a0b61

 Fixes: 3be2a85a0b61 ("clocksource/drivers/tegra: Support per-CPU timers on 
 all Tegra's")
>>>
>>> Indeed.
>>>
 Is it ok to shorten the subject?
>>>
>>> I figure it is easier to just use the "git log" result and to give
>>> anyone (or any script) the wants to use the Fixes tag as much
>>> information as possible.
>>>
>>
>> Daniel, I'd also recommend to shorten the common subsys prefix in general to 
>> something
>> like "clocksource: tegra:".
> 
> Actually I can't, I have been asked by the tip team to follow this format.

Oh well.

Also please feel free to fixup the offending commit's tag if you think that it's
necessary. I'll keep a full subject line in the future patches to a void that 
situation.

Re: [PATCH v2 3/3] iio: light: apple-ib-als: Add driver for ALS on iBridge chip.

2019-06-16 Thread Jonathan Cameron

On Wed, 12 Jun 2019 01:34:00 -0700
Ronald Tschalär  wrote:

> On 2016/2017 MacBook Pro's with a Touch Bar the ALS is attached to,
> and exposed via the iBridge device. This provides the driver for that
> sensor.
> 
> Signed-off-by: Ronald Tschalär 
Hi Ronald,

One thing that we should perhaps document more clearly in IIO is that
it is acceptable to not have triggers if they don't make any sense.
In this particular case, you have one basically to give a way of saying
to move into a more continuous sampling mode from a polled one (I think).
For that just use the buffer enable callbacks.

It'll be much cleaner without the trigger.

A few other suggestions inline.  In particularly I'm not that keen on the
appleals_device having a pointer to the iio device which then has
a pointer back again.  I 'think' you can just reorder things a bit and
embed the appleals_device structure in the iio_dev private field directly
and avoid the dance between the different structures.

Thanks,

Jonathan

> ---
>  drivers/iio/light/Kconfig|  12 +
>  drivers/iio/light/Makefile   |   1 +
>  drivers/iio/light/apple-ib-als.c | 607 +++
>  3 files changed, 620 insertions(+)
>  create mode 100644 drivers/iio/light/apple-ib-als.c
> 
> diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig
> index 5190eacfeb0a..b477aa5d2024 100644
> --- a/drivers/iio/light/Kconfig
> +++ b/drivers/iio/light/Kconfig
> @@ -64,6 +64,18 @@ config APDS9960
> To compile this driver as a module, choose M here: the
> module will be called apds9960
>  
> +config APPLE_IBRIDGE_ALS
> + tristate "Apple iBridge ambient light sensor"
> + select IIO_BUFFER
> + select IIO_TRIGGERED_BUFFER
> + depends on HID_APPLE_IBRIDGE
> + help
> +   Say Y here to build the driver for the Apple iBridge ALS
> +   sensor.
> +
> +   To compile this driver as a module, choose M here: the
> +   module will be called apple-ib-als.
> +
>  config BH1750
>   tristate "ROHM BH1750 ambient light sensor"
>   depends on I2C
> diff --git a/drivers/iio/light/Makefile b/drivers/iio/light/Makefile
> index e40794fbb435..cd6cd5ba6da5 100644
> --- a/drivers/iio/light/Makefile
> +++ b/drivers/iio/light/Makefile
> @@ -9,6 +9,7 @@ obj-$(CONFIG_ADJD_S311)   += adjd_s311.o
>  obj-$(CONFIG_AL3320A)+= al3320a.o
>  obj-$(CONFIG_APDS9300)   += apds9300.o
>  obj-$(CONFIG_APDS9960)   += apds9960.o
> +obj-$(CONFIG_APPLE_IBRIDGE_ALS)  += apple-ib-als.o
>  obj-$(CONFIG_BH1750) += bh1750.o
>  obj-$(CONFIG_BH1780) += bh1780.o
>  obj-$(CONFIG_CM32181)+= cm32181.o
> diff --git a/drivers/iio/light/apple-ib-als.c 
> b/drivers/iio/light/apple-ib-als.c
> new file mode 100644
> index ..b84be0076e0f
> --- /dev/null
> +++ b/drivers/iio/light/apple-ib-als.c
> @@ -0,0 +1,607 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Apple Ambient Light Sensor Driver
> + *
> + * Copyright (c) 2017-2018 Ronald Tschalär
> + */
> +
> +/*
> + * MacBookPro models with an iBridge chip (13,[23] and 14,[23]) have an
> + * ambient light sensor that is exposed via one of the USB interfaces on
> + * the iBridge as a standard HID light sensor. However, we cannot use the
> + * existing hid-sensor-als driver, for two reasons:
> + *
> + * 1. The hid-sensor-als driver is part of the hid-sensor-hub which in turn
> + *is a hid driver, but you can't have more than one hid driver per hid
> + *device, which is a problem because the touch bar also needs to
> + *register as a driver for this hid device.
> + *
> + * 2. While the hid-sensors-als driver stores sensor readings received via
> + *interrupt in an iio buffer, reads on the sysfs
> + *.../iio:deviceX/in_illuminance_YYY attribute result in a get of the
> + *feature report; however, in the case of this sensor here the
> + *illuminance field of that report is always 0. Instead, the input
> + *report needs to be requested.
> + */
> +
> +#define dev_fmt(fmt) "als: " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#define APPLEALS_DYN_SENS0   /* our dynamic sensitivity */
> +#define APPLEALS_DEF_CHANGE_SENS APPLEALS_DYN_SENS
> +
> +struct appleals_device {
> + struct hid_device   *hid_dev;
> + struct hid_report   *cfg_report;
> + struct hid_field*illum_field;
> + struct iio_dev  *iio_dev;
> + int cur_sensitivity;
> + int cur_hysteresis;
> + boolevents_enabled;
> +};
> +
> +static struct hid_driver appleals_hid_driver;
> +
> +/*
> + * This is a primitive way to get a relative sensitivity, one where we get
> + * notified when the value changes by a certain percentage rather than some
> + * absolute value. MacOS somehow manages to config

[PATCH] e1000e: Make watchdog use delayed work

2019-06-16 Thread Detlev Casanova

Use delayed work instead of timers to run the watchdog of the e1000e
driver.

Simplify the code with one less middle function.

Signed-off-by: Detlev Casanova 
---
 drivers/net/ethernet/intel/e1000e/e1000.h  |  3 +-
 drivers/net/ethernet/intel/e1000e/netdev.c | 52 +++---
 2 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h 
b/drivers/net/ethernet/intel/e1000e/e1000.h
index be13227f1697..942ab74030ca 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -186,12 +186,11 @@ struct e1000_phy_regs {
 
 /* board specific private data structure */
 struct e1000_adapter {
-   struct timer_list watchdog_timer;
struct timer_list phy_info_timer;
struct timer_list blink_timer;
 
struct work_struct reset_task;
-   struct work_struct watchdog_task;
+   struct delayed_work watchdog_task;
 
const struct e1000_info *ei;
 
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0e09bede42a2..d101671cdb9b 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -39,6 +39,8 @@ static int debug = -1;
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
+struct workqueue_struct *e1000_workqueue;
+
 static const struct e1000_info *e1000_info_tbl[] = {
[board_82571]   = &e1000_82571_info,
[board_82572]   = &e1000_82572_info,
@@ -1780,7 +1782,7 @@ static irqreturn_t e1000_intr_msi(int __always_unused 
irq, void *data)
}
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
-   mod_timer(&adapter->watchdog_timer, jiffies + 1);
+   queue_delayed_work(e1000_workqueue, 
&adapter->watchdog_task, 1);
}
 
/* Reset on uncorrectable ECC error */
@@ -1860,7 +1862,7 @@ static irqreturn_t e1000_intr(int __always_unused irq, 
void *data)
}
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
-   mod_timer(&adapter->watchdog_timer, jiffies + 1);
+   queue_delayed_work(e1000_workqueue, 
&adapter->watchdog_task, 1);
}
 
/* Reset on uncorrectable ECC error */
@@ -1905,7 +1907,7 @@ static irqreturn_t e1000_msix_other(int __always_unused 
irq, void *data)
hw->mac.get_link_status = true;
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
-   mod_timer(&adapter->watchdog_timer, jiffies + 1);
+   queue_delayed_work(e1000_workqueue, 
&adapter->watchdog_task, 0);
}
 
if (!test_bit(__E1000_DOWN, &adapter->state))
@@ -4278,7 +4280,6 @@ void e1000e_down(struct e1000_adapter *adapter, bool 
reset)
 
napi_synchronize(&adapter->napi);
 
-   del_timer_sync(&adapter->watchdog_timer);
del_timer_sync(&adapter->phy_info_timer);
 
spin_lock(&adapter->stats64_lock);
@@ -5150,25 +5151,11 @@ static void e1000e_check_82574_phy_workaround(struct 
e1000_adapter *adapter)
}
 }
 
-/**
- * e1000_watchdog - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
- **/
-static void e1000_watchdog(struct timer_list *t)
-{
-   struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer);
-
-   /* Do the rest outside of interrupt context */
-   schedule_work(&adapter->watchdog_task);
-
-   /* TODO: make this use queue_delayed_work() */
-}
-
 static void e1000_watchdog_task(struct work_struct *work)
 {
struct e1000_adapter *adapter = container_of(work,
 struct e1000_adapter,
-watchdog_task);
+watchdog_task.work);
struct net_device *netdev = adapter->netdev;
struct e1000_mac_info *mac = &adapter->hw.mac;
struct e1000_phy_info *phy = &adapter->hw.phy;
@@ -5395,8 +5382,8 @@ static void e1000_watchdog_task(struct work_struct *work)
 
/* Reset the timer */
if (!test_bit(__E1000_DOWN, &adapter->state))
-   mod_timer(&adapter->watchdog_timer,
- round_jiffies(jiffies + 2 * HZ));
+   queue_delayed_work(e1000_workqueue, &adapter->watchdog_task,
+  round_jiffies(2 * HZ));
 }
 
 #define E1000_TX_FLAGS_CSUM0x0001
@@ -7251,11 +7238,21 @@ static int e1000_probe(struct pci_dev *pdev, const 
struct pci_device_id *ent)
goto err_eeprom;
}
 
-   timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0);
+   e1000_workqueue

[PATCH] tty: serial_core: recover uport->cons->cflag on uart_close

2019-06-16 Thread kpark3469

From: Sahara 

Since uart_close was converted to use tty_port_close, uart_shutdown
also moved to uart_tty_port_shutdown, which means it does not backup
tty's termios to uart_port.console.cflag when console is closed and
uart_console is true.
By losing this value, serial console was not set correctly especially
after suspend/resume when there is no consumer of console device.
This problem resets console driver's configuration to an unwanted value
and may give a performance regression in the system eventually.
This patch fixes the bug introduced from v4.9 kernel.

Fixes: 761ed4a94582 ("tty: serial_core: convert uart_close to use 
tty_port_close")
Reported-by: Jouni Linnamaa 
Signed-off-by: Sahara 
---
 drivers/tty/serial/serial_core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 83f4dd0bfd74..a52afceb2f4e 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1533,6 +1533,7 @@ static void uart_set_termios(struct tty_struct *tty,
 static void uart_close(struct tty_struct *tty, struct file *filp)
 {
struct uart_state *state = tty->driver_data;
+   struct uart_port *uport = uart_port_check(state);
 
if (!state) {
struct uart_driver *drv = tty->driver->driver_state;
@@ -1548,6 +1549,8 @@ static void uart_close(struct tty_struct *tty, struct 
file *filp)
 
pr_debug("uart_close(%d) called\n", tty->index);
 
+   if (uport && uart_console(uport))
+   uport->cons->cflag = tty->termios.c_cflag;
tty_port_close(tty->port, tty, filp);
 }
 
-- 
2.17.1

Re: [PATCH 2/3] iio: adc: stm32-adc: add analog switches supply control

2019-06-16 Thread Jonathan Cameron

On Wed, 12 Jun 2019 09:24:35 +0200
Fabrice Gasnier  wrote:

> On stm32h7 and stm32mp1, the ADC inputs are multiplexed with analog
> switches which have reduced performances when their supply is below 2.7V
> (vdda by default):
> - vdd supply can be selected if above 2.7V by setting ANASWVDD syscfg bit
>   (STM32MP1 only).
> - Voltage booster can be used, to get full ADC performances by setting
>   BOOSTE/EN_BOOSTER syscfg bit (increases power consumption).
> 
> Make this optional, since this is a trade-off between analog performance
> and power consumption.
> 
> Note: STM32H7 syscfg has a set and clear register for "BOOSTE" control.
> STM32MP1 has separate set and clear registers pair to control EN_BOOSTER
> and ANASWVDD bits.
> 
> Signed-off-by: Fabrice Gasnier 

A few minor bits inline, but mostly seems fine to me.

Jonathan

> ---
>  drivers/iio/adc/stm32-adc-core.c | 232 
> ++-
>  1 file changed, 230 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iio/adc/stm32-adc-core.c 
> b/drivers/iio/adc/stm32-adc-core.c
> index 2327ec1..9d41b16 100644
> --- a/drivers/iio/adc/stm32-adc-core.c
> +++ b/drivers/iio/adc/stm32-adc-core.c
> @@ -14,9 +14,11 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -51,6 +53,20 @@
>  
>  #define STM32_ADC_CORE_SLEEP_DELAY_MS2000
>  
> +/* SYSCFG registers */
> +#define STM32H7_SYSCFG_PMCR  0x04
> +#define STM32MP1_SYSCFG_PMCSETR  0x04
> +#define STM32MP1_SYSCFG_PMCCLRR  0x44
> +
> +/* SYSCFG bit fields */
> +#define STM32H7_SYSCFG_BOOSTE_MASK   BIT(8)
> +#define STM32MP1_SYSCFG_ANASWVDD_MASKBIT(9)
> +
> +/* SYSCFG capability flags */
> +#define HAS_VBOOSTER BIT(0)
> +#define HAS_ANASWVDD BIT(1)
> +#define HAS_CLEAR_REGBIT(2)
> +
>  /**
>   * stm32_adc_common_regs - stm32 common registers, compatible dependent data
>   * @csr: common status register offset
> @@ -58,6 +74,11 @@
>   * @eoc1:adc1 end of conversion flag in @csr
>   * @eoc2:adc2 end of conversion flag in @csr
>   * @eoc3:adc3 end of conversion flag in @csr
> + * @has_syscfg: SYSCFG capability flags
> + * @pmcr:SYSCFG_PMCSETR/SYSCFG_PMCR register offset
> + * @pmcc:SYSCFG_PMCCLRR clear register offset
> + * @booste_msk:  SYSCFG BOOSTE / EN_BOOSTER bitmask in PMCR & PMCCLRR
> + * @anaswvdd_msk: SYSCFG ANASWVDD bitmask in PMCR & PMCCLRR
>   */
>  struct stm32_adc_common_regs {
>   u32 csr;
> @@ -65,6 +86,11 @@ struct stm32_adc_common_regs {
>   u32 eoc1_msk;
>   u32 eoc2_msk;
>   u32 eoc3_msk;
> + unsigned int has_syscfg;
> + u32 pmcr;
> + u32 pmcc;
> + u32 booste_msk;
> + u32 anaswvdd_msk;
>  };
>  
>  struct stm32_adc_priv;
> @@ -87,20 +113,26 @@ struct stm32_adc_priv_cfg {
>   * @domain:  irq domain reference
>   * @aclk:clock reference for the analog circuitry
>   * @bclk:bus clock common for all ADCs, depends on part used
> + * @vdd: vdd supply reference
> + * @vdda:vdda supply reference
>   * @vref:regulator reference
>   * @cfg: compatible configuration data
>   * @common:  common data for all ADC instances
>   * @ccr_bak: backup CCR in low power mode
> + * @syscfg:  reference to syscon, system control registers
>   */
>  struct stm32_adc_priv {
>   int irq[STM32_ADC_MAX_ADCS];
>   struct irq_domain   *domain;
>   struct clk  *aclk;
>   struct clk  *bclk;
> + struct regulator*vdd;
> + struct regulator*vdda;
>   struct regulator*vref;
>   const struct stm32_adc_priv_cfg *cfg;
>   struct stm32_adc_common common;
>   u32 ccr_bak;
> + struct regmap   *syscfg;
>  };
>  
>  static struct stm32_adc_priv *to_stm32_adc_priv(struct stm32_adc_common *com)
> @@ -284,6 +316,22 @@ static const struct stm32_adc_common_regs 
> stm32h7_adc_common_regs = {
>   .ccr = STM32H7_ADC_CCR,
>   .eoc1_msk = STM32H7_EOC_MST,
>   .eoc2_msk = STM32H7_EOC_SLV,
> + .has_syscfg = HAS_VBOOSTER,
> + .pmcr = STM32H7_SYSCFG_PMCR,
> + .booste_msk = STM32H7_SYSCFG_BOOSTE_MASK,
> +};
> +
> +/* STM32MP1 common registers definitions */
> +static const struct stm32_adc_common_regs stm32mp1_adc_common_regs = {
> + .csr = STM32H7_ADC_CSR,
> + .ccr = STM32H7_ADC_CCR,
> + .eoc1_msk = STM32H7_EOC_MST,
> + .eoc2_msk = STM32H7_EOC_SLV,
> + .has_syscfg =  HAS_VBOOSTER | HAS_ANASWVDD | HAS_CLEAR_REG,

Extra space after =


> + .pmcr = STM32MP1_SYSCFG_PMCSETR,
> + .pmcc = STM32MP1_SYSCFG_PMCCLRR,
> + .booste_msk = STM32H7_SYSCFG_BOOSTE_MASK,
> + .anaswvdd_msk = STM32MP1_SYSCFG_ANASWVDD_MASK,
>  };
>  
>  /* ADC

Re: [PATCH 1/3] dt-bindings: iio: adc: stm32: add analog switches supply control

2019-06-16 Thread Jonathan Cameron

On Wed, 12 Jun 2019 09:24:34 +0200
Fabrice Gasnier  wrote:

> On stm32h7 and stm32mp1, the ADC inputs are multiplexed with analog
> switches which have reduced performances when their supply is below 2.7V
> (vdda by default).
> 
> Add documentation for optional vdda-supply & vdd-supply that can be used
> to supply analog circuitry (controlled by syscfg bits).
> 
> Signed-off-by: Fabrice Gasnier 
> ---
>  Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt 
> b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt
> index 8346bcb..3af48b9 100644
> --- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt
> +++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt
> @@ -46,6 +46,12 @@ Required properties:
>  Optional properties:
>  - A pinctrl state named "default" for each ADC channel may be defined to set
>inX ADC pins in mode of operation for analog input on external pin.
> +- vdda-supply: Phandle to the vdda input voltage. It can be used to supply 
> ADC
> +  analog inputs switches on stm32h7 and stm32mp1.

input switches

> +- vdd-supply: Phandle to the vdd input voltage. It can be used to supply ADC
> +  analog inputs switches on stm32mp1.
> +- st,syscfg: Phandle to system configuration controller. It can be used to
> +  control the analog circuitry on stm32h7 and stm32mp1.
>  
>  Contents of a stm32 adc child node:
>  ---

Re: general protection fault in oom_unkillable_task

2019-06-16 Thread Tetsuo Handa

On 2019/06/16 16:37, Tetsuo Handa wrote:
> On 2019/06/16 6:33, Tetsuo Handa wrote:
>> On 2019/06/16 3:50, Shakeel Butt wrote:
 While dump_tasks() traverses only each thread group, 
 mem_cgroup_scan_tasks()
 traverses each thread.
>>>
>>> I think mem_cgroup_scan_tasks() traversing threads is not intentional
>>> and css_task_iter_start in it should use CSS_TASK_ITER_PROCS as the
>>> oom killer only cares about the processes or more specifically
>>> mm_struct (though two different thread groups can have same mm_struct
>>> but that is fine).
>>
>> We can't use CSS_TASK_ITER_PROCS from mem_cgroup_scan_tasks(). I've tried
>> CSS_TASK_ITER_PROCS in an attempt to evaluate only one thread from each
>> thread group, but I found that CSS_TASK_ITER_PROCS causes skipping whole
>> threads in a thread group (and trivially allowing "Out of memory and no
>> killable processes...\n" flood) if thread group leader has already exited.
> 
> Seems that CSS_TASK_ITER_PROCS from mem_cgroup_scan_tasks() is now working.


I found a reproducer and the commit.


#define _GNU_SOURCE
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static const unsigned long size = 1048576 * 200;
static int thread(void *unused)
{
int fd = open("/dev/zero", O_RDONLY);
char *buf = mmap(NULL, size, PROT_WRITE | PROT_READ,
 MAP_ANONYMOUS | MAP_SHARED, EOF, 0);
sleep(1);
read(fd, buf, size);
return syscall(__NR_exit, 0);
}
int main(int argc, char *argv[])
{
FILE *fp;
mkdir("/sys/fs/cgroup/memory/test1", 0755);
fp = fopen("/sys/fs/cgroup/memory/test1/memory.limit_in_bytes", "w");
fprintf(fp, "%lu\n", size);
fclose(fp);
fp = fopen("/sys/fs/cgroup/memory/test1/tasks", "w");
fprintf(fp, "%u\n", getpid());
fclose(fp);
clone(thread, malloc(8192) + 4096, CLONE_SIGHAND | CLONE_THREAD | 
CLONE_VM, NULL);
return syscall(__NR_exit, 0);
}


Here is a patch to use CSS_TASK_ITER_PROCS.

>From 415e52cf55bc4ad931e4f005421b827f0b02693d Mon Sep 17 00:00:00 2001
From: Tetsuo Handa 
Date: Mon, 17 Jun 2019 00:09:38 +0900
Subject: [PATCH] mm: memcontrol: Use CSS_TASK_ITER_PROCS at 
mem_cgroup_scan_tasks().

Since commit c03cd7738a83b137 ("cgroup: Include dying leaders with live
threads in PROCS iterations") corrected how CSS_TASK_ITER_PROCS works,
mem_cgroup_scan_tasks() can use CSS_TASK_ITER_PROCS in order to check
only one thread from each thread group.

Signed-off-by: Tetsuo Handa 
---
 mm/memcontrol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ba9138a..b09ff45 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1163,7 +1163,7 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
struct css_task_iter it;
struct task_struct *task;
 
-   css_task_iter_start(&iter->css, 0, &it);
+   css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it);
while (!ret && (task = css_task_iter_next(&it)))
ret = fn(task, arg);
css_task_iter_end(&it);
-- 
1.8.3.1

Re: [PATCH] iio: humidity: Replace older GPIO APIs with GPIO Consumer APIs for the dht11 sensor

2019-06-16 Thread Jonathan Cameron

On Tue, 11 Jun 2019 13:30:42 +0200
Harald Geyer  wrote:

> Shobhit Kukreti writes:
> > The dht11 driver uses a single gpio to make measurements. It was
> > using the older global gpio numberspace. The patch replaces the
> > old gpio api with the new gpio descriptor based api.
> > 
> > Removed header files "linux/gpio.h" and "linux/of_gpio.h"
> >
> > Signed-off-by: Shobhit Kukreti   
> 
> Acked-by: Harald Geyer 

Applied to the togreg branch of iio.git and pushed out as testing
for the autobuilders to play with it.

thanks,

Jonathan

> 
> > ---
> >  drivers/iio/humidity/dht11.c | 28 ++--
> >  1 file changed, 10 insertions(+), 18 deletions(-)
> > 
> > diff --git a/drivers/iio/humidity/dht11.c b/drivers/iio/humidity/dht11.c
> > index c815920..f5128d8 100644
> > --- a/drivers/iio/humidity/dht11.c
> > +++ b/drivers/iio/humidity/dht11.c
> > @@ -22,8 +22,7 @@
> >  #include 
> >  #include 
> >  #include 
> > -#include 
> > -#include 
> > +#include 
> >  #include 
> >  
> >  #include 
> > @@ -72,7 +71,7 @@
> >  struct dht11 {
> > struct device   *dev;
> >  
> > -   int gpio;
> > +   struct gpio_desc*gpiod;
> > int irq;
> >  
> > struct completion   completion;
> > @@ -179,7 +178,7 @@ static irqreturn_t dht11_handle_irq(int irq, void *data)
> > if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) {
> > dht11->edges[dht11->num_edges].ts = ktime_get_boot_ns();
> > dht11->edges[dht11->num_edges++].value =
> > -   gpio_get_value(dht11->gpio);
> > +   gpiod_get_value(dht11->gpiod);
> >  
> > if (dht11->num_edges >= DHT11_EDGES_PER_READ)
> > complete(&dht11->completion);
> > @@ -217,12 +216,12 @@ static int dht11_read_raw(struct iio_dev *iio_dev,
> > reinit_completion(&dht11->completion);
> >  
> > dht11->num_edges = 0;
> > -   ret = gpio_direction_output(dht11->gpio, 0);
> > +   ret = gpiod_direction_output(dht11->gpiod, 0);
> > if (ret)
> > goto err;
> > usleep_range(DHT11_START_TRANSMISSION_MIN,
> >  DHT11_START_TRANSMISSION_MAX);
> > -   ret = gpio_direction_input(dht11->gpio);
> > +   ret = gpiod_direction_input(dht11->gpiod);
> > if (ret)
> > goto err;
> >  
> > @@ -294,10 +293,8 @@ MODULE_DEVICE_TABLE(of, dht11_dt_ids);
> >  static int dht11_probe(struct platform_device *pdev)
> >  {
> > struct device *dev = &pdev->dev;
> > -   struct device_node *node = dev->of_node;
> > struct dht11 *dht11;
> > struct iio_dev *iio;
> > -   int ret;
> >  
> > iio = devm_iio_device_alloc(dev, sizeof(*dht11));
> > if (!iio) {
> > @@ -307,18 +304,13 @@ static int dht11_probe(struct platform_device *pdev)
> >  
> > dht11 = iio_priv(iio);
> > dht11->dev = dev;
> > +   dht11->gpiod = devm_gpiod_get(dev, NULL, GPIOD_IN);
> > +   if (IS_ERR(dht11->gpiod))
> > +   return PTR_ERR(dht11->gpiod);
> >  
> > -   ret = of_get_gpio(node, 0);
> > -   if (ret < 0)
> > -   return ret;
> > -   dht11->gpio = ret;
> > -   ret = devm_gpio_request_one(dev, dht11->gpio, GPIOF_IN, pdev->name);
> > -   if (ret)
> > -   return ret;
> > -
> > -   dht11->irq = gpio_to_irq(dht11->gpio);
> > +   dht11->irq = gpiod_to_irq(dht11->gpiod);
> > if (dht11->irq < 0) {
> > -   dev_err(dev, "GPIO %d has no interrupt\n", dht11->gpio);
> > +   dev_err(dev, "GPIO %d has no interrupt\n", 
> > desc_to_gpio(dht11->gpiod));
> > return -EINVAL;
> > }
> >  
> > -- 
> > 2.7.4
> >   
>

Re: [PATCH v3 1/3] thermal: sun8i: add thermal driver for h6

2019-06-16 Thread Frank Lee

On Sat, Jun 15, 2019 at 7:08 AM Vasily Khoruzhick  wrote:
>
> On Wed, Jun 12, 2019 at 9:50 AM Frank Lee  wrote:
> >
> > > If you have a git tree I'll be happy to contribute A64 support. IIRC
> > > it was quite similar to H3.
> >
> > I built a ths branch and I will do some work later.
> >
> > https://github.com/TinyWindzz/linux/tree/ths
>
> Looks like you forgot to add your patches to this branch.

Frankly, I started the new version this weekend. There is still a
small part of the
hand that has not been completed, and I will push patches to my branch
these two days.

MBR,
Yangtao

Re: [PATCH v7 18/18] x86/fsgsbase/64: Add documentation for FSGSBASE

2019-06-16 Thread Bae, Chang Seok



> On Jun 16, 2019, at 05:34, Thomas Gleixner  wrote:
> 
> On Sun, 16 Jun 2019, Thomas Gleixner wrote:
>> 
>> Please dont. Send me a delta patch against the documentation. I have queued
>> all the other patches already internally. I did not push it out because I
>> wanted to have proper docs.
> 
> Fixed it up already. About to push it out.
> 

Thanks. This is the diff though.

diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 22992c8377952..f667087792747 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -118,7 +118,7 @@ static __always_inline bool should_resched(int 
preempt_offset)
 
/* preempt count == 0 ? */
tmp &= ~PREEMPT_NEED_RESCHED;
-   if (tmp)
+   if (tmp != preempt_offset)
return false;
if (current_thread_info()->preempt_lazy_count)
return false;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index c15583162a559..25bcf2f2714ba 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -92,6 +92,34 @@ static inline void softirq_clr_runner(unsigned int sirq)
sr->runner[sirq] = NULL;
 }
 
+static bool softirq_check_runner_tsk(struct task_struct *tsk,
+unsigned int *pending)
+{
+   bool ret = false;
+
+   if (!tsk)
+   return ret;
+
+   /*
+* The wakeup code in rtmutex.c wakes up the task
+* _before_ it sets pi_blocked_on to NULL under
+* tsk->pi_lock. So we need to check for both: state
+* and pi_blocked_on.
+* The test against UNINTERRUPTIBLE + ->sleeping_lock is in case the
+* task does cpu_chill().
+*/
+   raw_spin_lock(&tsk->pi_lock);
+   if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING ||
+   (tsk->state == TASK_UNINTERRUPTIBLE && tsk->sleeping_lock)) {
+   /* Clear all bits pending in that task */
+   *pending &= ~(tsk->softirqs_raised);
+   ret = true;
+   }
+   raw_spin_unlock(&tsk->pi_lock);
+
+   return ret;
+}
+
 /*
  * On preempt-rt a softirq running context might be blocked on a
  * lock. There might be no other runnable task on this CPU because the
@@ -104,6 +132,7 @@ static inline void softirq_clr_runner(unsigned int sirq)
  */
 void softirq_check_pending_idle(void)
 {
+   struct task_struct *tsk;
static int rate_limit;
struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
u32 warnpending;
@@ -113,24 +142,23 @@ void softirq_check_pending_idle(void)
return;
 
warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
+   if (!warnpending)
+   return;
for (i = 0; i < NR_SOFTIRQS; i++) {
-   struct task_struct *tsk = sr->runner[i];
+   tsk = sr->runner[i];
 
-   /*
-* The wakeup code in rtmutex.c wakes up the task
-* _before_ it sets pi_blocked_on to NULL under
-* tsk->pi_lock. So we need to check for both: state
-* and pi_blocked_on.
-*/
-   if (tsk) {
-   raw_spin_lock(&tsk->pi_lock);
-   if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
-   /* Clear all bits pending in that task */
-   warnpending &= ~(tsk->softirqs_raised);
-   warnpending &= ~(1 << i);
-   }
-   raw_spin_unlock(&tsk->pi_lock);
-   }
+   if (softirq_check_runner_tsk(tsk, &warnpending))
+   warnpending &= ~(1 << i);
+   }
+
+   if (warnpending) {
+   tsk = __this_cpu_read(ksoftirqd);
+   softirq_check_runner_tsk(tsk, &warnpending);
+   }
+
+   if (warnpending) {
+   tsk = __this_cpu_read(ktimer_softirqd);
+   softirq_check_runner_tsk(tsk, &warnpending);
}
 
if (warnpending) {
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 851b2134e77f4..6f2736ec4b8ef 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1902,15 +1902,18 @@ void cpu_chill(void)
 {
ktime_t chill_time;
unsigned int freeze_flag = current->flags & PF_NOFREEZE;
+   long saved_state;
 
+   saved_state = current->state;
chill_time = ktime_set(0, NSEC_PER_MSEC);
-   set_current_state(TASK_UNINTERRUPTIBLE);
+   __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
current->flags |= PF_NOFREEZE;
sleeping_lock_inc();
schedule_hrtimeout(&chill_time, HRTIMER_MODE_REL_HARD);
sleeping_lock_dec();
if (!freeze_flag)
current->flags &= ~PF_NOFREEZE;
+   __set_current_state_no_track(saved_state);
 }
 EXPORT_SYMBOL(cpu_chill);
 #endif
diff --git a/localversion-rt b/localversion-rt
index 9f7d0bdbffb18..08b3e75841adc 100644
--- a/l

Re: [PATCH v3 3/3] iio: Add PAT9125 optical tracker sensor

2019-06-16 Thread Jonathan Cameron

On Mon, 10 Jun 2019 11:29:45 +0200
Alexandre Mergnat  wrote:

> This adds support for PixArt Imaging’s miniature low power optical
> navigation chip using LASER light source enabling digital surface tracking.
> 
> Feature and datasheet: [0]
> 
> This IIO driver allows to read delta or relative position on X and Y axis:
>   - The position relative to where the system started can be taken through
>   punctual "read_raw" which will issue a read in the device registers to
>   get the delta between last/current read and return the addition of all
>   the deltas.
>   - The delta can be retrieved using triggered buffer subscription
>   (i.e. iio_readdev). The buffer payload is:
> |32 bits delta X|32 bits delta Y|timestamp|.
> 
> The possible I2C addresses are 0x73, 0x75 and 0x79.
> 
> X and Y axis CPI resolution can be get/set independently through IIO_SCALE.
> The range value is 0-255 which means:
>   - 0 to ~1,275 Counts Per Inch on flat surface.
>   - 0 to ~630 Counts Per Rev on 1.0mm diameter STS shaft at 1.0mm distance.
> More details on the datasheet.
> 
> The "position" directory is added to contain drivers which can provide
> position data.
> 
> Signed-off-by: Alexandre Mergnat 
> 
> [0]: http://www.pixart.com/products-detail/72/PAT9125EL-TKIT___TKMT
> 
> Signed-off-by: Alexandre Mergnat 

Hi Alexandre,

Getting close but a few more bits and pieces inline.

I'm a little confused on why the buffered reads are giving the delta
values whilst we have fixed up the sysfs reads to give the more useful
position values (and hence not need to know when last read was).
We should be consistent and give positions from the buffered path as well.

Thanks,

Jonathan

> ---
>  drivers/iio/Kconfig|   1 +
>  drivers/iio/Makefile   |   1 +
>  drivers/iio/position/Kconfig   |  18 ++
>  drivers/iio/position/Makefile  |   6 +
>  drivers/iio/position/pat9125.c | 499 +
>  5 files changed, 525 insertions(+)
>  create mode 100644 drivers/iio/position/Kconfig
>  create mode 100644 drivers/iio/position/Makefile
>  create mode 100644 drivers/iio/position/pat9125.c
> 
> diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig
> index 1d736a4952ab..23d9780640e7 100644
> --- a/drivers/iio/Kconfig
> +++ b/drivers/iio/Kconfig
> @@ -85,6 +85,7 @@ source "drivers/iio/light/Kconfig"
>  source "drivers/iio/magnetometer/Kconfig"
>  source "drivers/iio/multiplexer/Kconfig"
>  source "drivers/iio/orientation/Kconfig"
> +source "drivers/iio/position/Kconfig"
>  if IIO_TRIGGER
> source "drivers/iio/trigger/Kconfig"
>  endif #IIO_TRIGGER
> diff --git a/drivers/iio/Makefile b/drivers/iio/Makefile
> index bff682ad1cfb..1712011c0f4a 100644
> --- a/drivers/iio/Makefile
> +++ b/drivers/iio/Makefile
> @@ -31,6 +31,7 @@ obj-y += light/
>  obj-y += magnetometer/
>  obj-y += multiplexer/
>  obj-y += orientation/
> +obj-y += position/
>  obj-y += potentiometer/
>  obj-y += potentiostat/
>  obj-y += pressure/
> diff --git a/drivers/iio/position/Kconfig b/drivers/iio/position/Kconfig
> new file mode 100644
> index ..1cf28896511c
> --- /dev/null
> +++ b/drivers/iio/position/Kconfig
> @@ -0,0 +1,18 @@
> +#
> +# Optical tracker sensors
> +#
> +# When adding new entries keep the list in alphabetical order
> +
> +menu "Optical tracker sensors"
> +
> +config PAT9125
> + tristate "Optical tracker PAT9125 I2C driver"
> + depends on I2C
> + select IIO_BUFFER
> + help
> +   Say yes here to build support for PAT9125 optical tracker
> +   sensors.
> +
> +  To compile this driver as a module, say M here: the module will
> +  be called pat9125.
> +endmenu
> diff --git a/drivers/iio/position/Makefile b/drivers/iio/position/Makefile
> new file mode 100644
> index ..cf294917ae2c
> --- /dev/null
> +++ b/drivers/iio/position/Makefile
> @@ -0,0 +1,6 @@
> +#
> +# Makefile for industrial I/O Optical tracker sensor drivers
> +#
> +
> +# When adding new entries keep the list in alphabetical order
> +obj-$(CONFIG_PAT9125) += pat9125.o
> diff --git a/drivers/iio/position/pat9125.c b/drivers/iio/position/pat9125.c
> new file mode 100644
> index ..22bf729bec9b
> --- /dev/null
> +++ b/drivers/iio/position/pat9125.c
> @@ -0,0 +1,499 @@
> +// SPDX-License-Identifier: (GPL-2.0)
> +/*
> + * Copyright (C) 2019 BayLibre, SAS
> + * Author: Alexandre Mergnat 
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/* I2C Address function to ID pin*/
> +#define PAT9125_I2C_ADDR_HI  0x73
> +#define PAT9125_I2C_ADDR_LO  0x75
> +#define PAT9125_I2C_ADDR_NC  0x79
> +
> +/* Registers */
> +#define PAT9125_PRD_ID1_REG  0x00
> +#define PAT9125_PRD_ID2_REG  0x01
> +#define PAT9125_MOTION_STATUS_REG0x02
> +#define PAT9125_DELTA_X_LO_REG   0x03
> +#define

Re: [PATCH] Revert "ARM: dts: rockchip: set PWM delay backlight settings for Minnie"

2019-06-16 Thread Pavel Machek

Hi!

> This reverts commit 288ceb85b505c19abe1895df068dda5ed20cf482.
> 
> According to the commit message the AUO B101EAN01 panel on minnie
> requires a PWM delay of 200 ms, however this is not what the
> datasheet says. The datasheet mentions a *max* delay of 200 ms
> for T2 ("delay from LCDVDD to black video generation") and T3
> ("delay from LCDVDD to HPD high"), which aren't related to the
> PWM. The backlight power sequence does not specify min/max
> constraints for T15 (time from PWM on to BL enable) or T16
> (time from BL disable to PWM off).
> 
> Signed-off-by: Matthias Kaehlcke 
> ---
> Enric, if you think I misinterpreted the datasheet please holler!

Was this tested? Was previous patch tested?

Does patch being reverted actually break anything? If so, cc stable?

Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

1 2 3 >

1 - 100 of 299 matches

Mail list logo