date:20170703

[v2] wlcore: add missing nvs file name info for wilink8

2017-07-03 Thread Reizer, Eyal

When working with wl18xx the nvs file is used for defining an alternate
mac address and override the default mac address that is stored inside
the wl18xx chip.

The following commits:
c815fde wlcore: spi: Populate config firmware data
d776fc8 wlcore: sdio: Populate config firmware data

Populated the nvs entry for wilink6 and wilink7 only while it is 
still needed for wilink8 as well. 
This broke user space backward compatibility when upgrading from older 
kernels, as the alternate mac address would not be read from the nvs that is 
already present in the file system 
(lib/firmware/ti-connectivity/wl1271-nvs.bin) 
causing mac address change of the wlan interface.

This patch fix this and update the structure field with the same default nvs 
file 
name that has been used before.

Cc: stable 
Signed-off-by: Eyal Reizer 
---
 drivers/net/wireless/ti/wlcore/sdio.c | 1 +
 drivers/net/wireless/ti/wlcore/spi.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ti/wlcore/sdio.c 
b/drivers/net/wireless/ti/wlcore/sdio.c
index 2fb3871..f8a1fea 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -230,6 +230,7 @@ static const struct wilink_family_data wl128x_data = {
 static const struct wilink_family_data wl18xx_data = {
.name = "wl18xx",
.cfg_name = "ti-connectivity/wl18xx-conf.bin",
+   .nvs_name = "ti-connectivity/wl1271-nvs.bin",
 };
 
 static const struct of_device_id wlcore_sdio_of_match_table[] = {
diff --git a/drivers/net/wireless/ti/wlcore/spi.c 
b/drivers/net/wireless/ti/wlcore/spi.c
index fdabb92..62ce54a 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -92,6 +92,7 @@ static const struct wilink_family_data wl128x_data = {
 static const struct wilink_family_data wl18xx_data = {
.name = "wl18xx",
.cfg_name = "ti-connectivity/wl18xx-conf.bin",
+   .nvs_name = "ti-connectivity/wl1271-nvs.bin",
 };
 
 struct wl12xx_spi_glue {
-- 
2.7.4

Re: [PATCH v6 06/18] xen/pvcalls: handle commands from the frontend

2017-07-03 Thread Juergen Gross

On 03/07/17 23:08, Stefano Stabellini wrote:
> When the other end notifies us that there are commands to be read
> (pvcalls_back_event), wake up the backend thread to parse the command.
> 
> The command ring works like most other Xen rings, so use the usual
> ring macros to read and write to it. The functions implementing the
> commands are empty stubs for now.
> 
> Signed-off-by: Stefano Stabellini 
> CC: boris.ostrov...@oracle.com
> CC: jgr...@suse.com
> ---
>  drivers/xen/pvcalls-back.c | 144 
> -
>  1 file changed, 129 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
> index e4c2e46..9e00971 100644
> --- a/drivers/xen/pvcalls-back.c
> +++ b/drivers/xen/pvcalls-back.c
> @@ -47,16 +47,135 @@ struct pvcalls_fedata {
>   struct list_head socket_mappings;
>   struct radix_tree_root socketpass_mappings;
>   struct semaphore socket_lock;
> - struct workqueue_struct *wq;
> - struct work_struct register_work;
>  };
>  
> -static void pvcalls_back_work(struct work_struct *work)
> +static int pvcalls_back_socket(struct xenbus_device *dev,
> + struct xen_pvcalls_request *req)
>  {
> + return 0;
> +}
> +
> +static int pvcalls_back_connect(struct xenbus_device *dev,
> + struct xen_pvcalls_request *req)
> +{
> + return 0;
> +}
> +
> +static int pvcalls_back_release(struct xenbus_device *dev,
> + struct xen_pvcalls_request *req)
> +{
> + return 0;
> +}
> +
> +static int pvcalls_back_bind(struct xenbus_device *dev,
> +  struct xen_pvcalls_request *req)
> +{
> + return 0;
> +}
> +
> +static int pvcalls_back_listen(struct xenbus_device *dev,
> +struct xen_pvcalls_request *req)
> +{
> + return 0;
> +}
> +
> +static int pvcalls_back_accept(struct xenbus_device *dev,
> +struct xen_pvcalls_request *req)
> +{
> + return 0;
> +}
> +
> +static int pvcalls_back_poll(struct xenbus_device *dev,
> +  struct xen_pvcalls_request *req)
> +{
> + return 0;
> +}
> +
> +static int pvcalls_back_handle_cmd(struct xenbus_device *dev,
> +struct xen_pvcalls_request *req)
> +{
> + int ret = 0;
> +
> + switch (req->cmd) {
> + case PVCALLS_SOCKET:
> + ret = pvcalls_back_socket(dev, req);
> + break;
> + case PVCALLS_CONNECT:
> + ret = pvcalls_back_connect(dev, req);
> + break;
> + case PVCALLS_RELEASE:
> + ret = pvcalls_back_release(dev, req);
> + break;
> + case PVCALLS_BIND:
> + ret = pvcalls_back_bind(dev, req);
> + break;
> + case PVCALLS_LISTEN:
> + ret = pvcalls_back_listen(dev, req);
> + break;
> + case PVCALLS_ACCEPT:
> + ret = pvcalls_back_accept(dev, req);
> + break;
> + case PVCALLS_POLL:
> + ret = pvcalls_back_poll(dev, req);
> + break;
> + default:
> + {
> + struct pvcalls_fedata *fedata;
> + struct xen_pvcalls_response *rsp;
> +
> + fedata = dev_get_drvdata(&dev->dev);
> + rsp = RING_GET_RESPONSE(
> + &fedata->ring, fedata->ring.rsp_prod_pvt++);
> + rsp->req_id = req->req_id;
> + rsp->cmd = req->cmd;
> + rsp->ret = -ENOTSUPP;
> + break;
> + }
> + }
> + return ret;
> +}
> +
> +static void pvcalls_back_work(struct pvcalls_fedata *fedata)
> +{
> + int notify, notify_all = 0, more = 1;
> + struct xen_pvcalls_request req;
> + struct xenbus_device *dev = fedata->dev;
> +
> + while (more) {
> + while (RING_HAS_UNCONSUMED_REQUESTS(&fedata->ring)) {
> + RING_COPY_REQUEST(&fedata->ring,
> +   fedata->ring.req_cons++,
> +   &req);
> +
> + if (!pvcalls_back_handle_cmd(dev, &req)) {
> + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(
> + &fedata->ring, notify);
> + notify_all += notify;
> + }
> + }
> +
> + if (notify_all) {
> + notify_remote_via_irq(fedata->irq);
> + notify_all = 0;
> + }
> +
> + RING_FINAL_CHECK_FOR_REQUESTS(&fedata->ring, more);
> + }
>  }
>  
>  static irqreturn_t pvcalls_back_event(int irq, void *dev_id)
>  {
> + struct xenbus_device *dev = dev_id;
> + struct pvcalls_fedata *fedata = NULL;
> +
> + if (dev == NULL)
> + return IRQ_HANDLED;
> +
> + fedata = dev_get_drvdata(&dev->dev);
> + if (fedata == NULL)
> + return IRQ_HANDLED;
> +
> + pvcalls_back_work(fedata);
>

Re: [PATCH 08/14] qcom: mtd: nand: Add support for additional CSRs

2017-07-03 Thread Archit Taneja




On 06/29/2017 12:46 PM, Abhishek Sahu wrote:

1. NAND_READ_LOCATION: provides the offset in page for
reading in BAM DMA mode

2. NAND_ERASED_CW_DETECT_CFG: contains the status for erased
code words

3. NAND_BUFFER_STATUS: contains the status for ECC

Signed-off-by: Abhishek Sahu 
---
  drivers/mtd/nand/qcom_nandc.c | 67 ++-
  1 file changed, 66 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c
index 65c9059..8e7dc9e 100644
--- a/drivers/mtd/nand/qcom_nandc.c
+++ b/drivers/mtd/nand/qcom_nandc.c
@@ -54,6 +54,8 @@
  #define   NAND_VERSION0xf08
  #define   NAND_READ_LOCATION_00xf20
  #define   NAND_READ_LOCATION_10xf24
+#defineNAND_READ_LOCATION_20xf28
+#defineNAND_READ_LOCATION_30xf2c
  
  /* dummy register offsets, used by write_reg_dma */

  #define   NAND_DEV_CMD1_RESTORE   0xdead
@@ -132,6 +134,11 @@
  #define   ERASED_PAGE (PAGE_ALL_ERASED | PAGE_ERASED)
  #define   ERASED_CW   (CODEWORD_ALL_ERASED | 
CODEWORD_ERASED)
  
+/* NAND_READ_LOCATION_n bits */

+#define READ_LOCATION_OFFSET   0
+#define READ_LOCATION_SIZE 16
+#define READ_LOCATION_LAST 31
+
  /* Version Mask */
  #define   NAND_VERSION_MAJOR_MASK 0xf000
  #define   NAND_VERSION_MAJOR_SHIFT28
@@ -177,6 +184,11 @@
  #define NAND_BAM_NWD  (0x0002)
  /* Finish writing in the current sgl and start writing in another sgl */
  #define NAND_BAM_NEXT_SGL (0x0004)
+/*
+ * Erased codeword status is being used two times in single transfer so this
+ * flag will determine the current value of erased codeword status register
+ */
+#define NAND_ERASED_CW_SET (0x0008)
  
  #define QPIC_PER_CW_MAX_CMD_ELEMENTS	(32)

  #define QPIC_PER_CW_MAX_CMD_SGL   (32)
@@ -258,6 +270,13 @@ struct nandc_regs {
__le32 orig_vld;
  
  	__le32 ecc_buf_cfg;

+   __le32 read_location0;
+   __le32 read_location1;
+   __le32 read_location2;
+   __le32 read_location3;
+
+   __le32 erased_cw_detect_cfg_clr;
+   __le32 erased_cw_detect_cfg_set;
  };
  
  /*

@@ -504,6 +523,16 @@ static __le32 *offset_to_nandc_reg(struct nandc_regs 
*regs, int offset)
return ®s->orig_vld;
case NAND_EBI2_ECC_BUF_CFG:
return ®s->ecc_buf_cfg;
+   case NAND_BUFFER_STATUS:
+   return ®s->clrreadstatus;
+   case NAND_READ_LOCATION_0:
+   return ®s->read_location0;
+   case NAND_READ_LOCATION_1:
+   return ®s->read_location1;
+   case NAND_READ_LOCATION_2:
+   return ®s->read_location2;
+   case NAND_READ_LOCATION_3:
+   return ®s->read_location3;
default:
return NULL;
}
@@ -545,7 +574,7 @@ static void update_rw_regs(struct qcom_nand_host *host, int 
num_cw, bool read)
  {
struct nand_chip *chip = &host->chip;
struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
-   u32 cmd, cfg0, cfg1, ecc_bch_cfg;
+   u32 cmd, cfg0, cfg1, ecc_bch_cfg, read_location0;
  
  	if (read) {

if (host->use_ecc)
@@ -562,12 +591,20 @@ static void update_rw_regs(struct qcom_nand_host *host, 
int num_cw, bool read)
  
  		cfg1 = host->cfg1;

ecc_bch_cfg = host->ecc_bch_cfg;
+   if (read)
+   read_location0 = (0 << READ_LOCATION_OFFSET) |
+   (host->cw_data << READ_LOCATION_SIZE) |
+   (1 << READ_LOCATION_LAST);
} else {
cfg0 = (host->cfg0_raw & ~(7U << CW_PER_PAGE)) |
(num_cw - 1) << CW_PER_PAGE;
  
  		cfg1 = host->cfg1_raw;

ecc_bch_cfg = 1 << ECC_CFG_ECC_DISABLE;
+   if (read)
+   read_location0 = (0 << READ_LOCATION_OFFSET) |
+   (host->cw_size << READ_LOCATION_SIZE) |
+   (1 << READ_LOCATION_LAST);
}
  
  	nandc_set_reg(nandc, NAND_FLASH_CMD, cmd);

@@ -578,6 +615,9 @@ static void update_rw_regs(struct qcom_nand_host *host, int 
num_cw, bool read)
nandc_set_reg(nandc, NAND_FLASH_STATUS, host->clrflashstatus);
nandc_set_reg(nandc, NAND_READ_STATUS, host->clrreadstatus);
nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
+
+   if (read)
+   nandc_set_reg(nandc, NAND_READ_LOCATION_0, read_location0);
  }
  
  /*

@@ -756,6 +796,13 @@ static int write_reg_dma(struct qcom_nand_controller 
*nandc, int first,
if (first == NAND_FLASH_CMD)
flow_control = true;
  
+	if (first == NAND_ERASED_CW_DETECT_CFG) {

+   if (flags & NAND_ERASED_CW_SET)
+   vaddr = ®s->erased_cw_detect_cfg_set;
+

Re: [RFC][PATCHv3 2/5] printk: introduce printing kernel thread

2017-07-03 Thread Sergey Senozhatsky

On (07/04/17 14:26), Sergey Senozhatsky wrote:
[..]
> not sure if we can properly throttle printk in all of the cases.
> we know that console_sem is locked, but we don't know what for.
> is CPU that owns the console_sem is now in console_unlock() or
> somewhere in fbcon, or anywhere else. we probably need not to
> throttle printk() if we know that console_sem is already locked
> by this_cpu and we simply call printk either from IRQ that
> preempted console_unlock() on this_cpu or recursive printk from
> console_unlock()... and so on.

which is hard to do, given that console_unlock() can schedule with
console_sem locked. so CPU number won't do the trick. unless we will
forbid preemption in console_unlock()... we sort of need to do it.

-ss

Re: [PATCH mm] introduce reverse buddy concept to reduce buddy fragment

2017-07-03 Thread Michal Hocko

On Tue 04-07-17 09:21:00, zhouxianrong wrote:
> the test was done as follows:
> 
> 1. the environment is android 7.0 and kernel is 4.1 and managed memory is 
> 3.5GB

There have been many changes in the compaction proper since than. Do you
see the same problem with the current upstream kernel?

> 2. every 4s startup one apk, total 100 more apks need to startup
> 3. after finishing step 2, sample buddyinfo once and get the result

How stable are those results?
-- 
Michal Hocko
SUSE Labs

Re: "mm: use early_pfn_to_nid in page_ext_init" broken on some configurations?

2017-07-03 Thread Michal Hocko

On Tue 04-07-17 14:11:41, Joonsoo Kim wrote:
> On Fri, Jun 30, 2017 at 05:44:16PM +0200, Michal Hocko wrote:
> > On Fri 30-06-17 17:42:24, Michal Hocko wrote:
> > [...]
> > > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > > index 16532fa0bb64..894697c1e6f5 100644
> > > --- a/include/linux/mmzone.h
> > > +++ b/include/linux/mmzone.h
> > > @@ -1055,6 +1055,7 @@ static inline struct zoneref 
> > > *first_zones_zonelist(struct zonelist *zonelist,
> > >   !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
> > >  static inline unsigned long early_pfn_to_nid(unsigned long pfn)
> > >  {
> > > + BUILD_BUG_ON(!IS_ENABLED(CONFIG_NUMA));
> > 
> > Err, this should read BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA)) of course
> 
> Agreed.
> 
> However, AFAIK, ARM can set CONFIG_NUMA but it doesn't have
> CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID and CONFIG_HAVE_MEMBLOCK_NODE_MAP.

$ git grep "config NUMA\|select NUMA" arch/arm
$

Did you mean arch64? If yes this one looks ok
$ git grep "HAVE_MEMBLOCK_NODE_MAP\|HAVE_ARCH_EARLY_PFN_TO_NID" arch/arm64/
arch/arm64/Kconfig: select HAVE_MEMBLOCK_NODE_MAP if NUMA

> If page_ext uses early_pfn_to_nid(), it will cause build error in ARM.

Which would be intentional if it doesn't provide a proper implementation
of the function.
 
> Therefore, I suggest following change.
> CONFIG_DEFERRED_STRUCT_PAGE_INIT depends on proper early_pfn_to_nid().
> So, following code will always work as long as
> CONFIG_DEFERRED_STRUCT_PAGE_INIT works.

I haven't checked all other callers of early_pfn_to_nid yet but I have
run my original patch (with !IS_ENABLED...) just to see whether anybody
actually uses this function from an innvalid context and it hasn't blown
up. So I suspect that all current users simply use the function from the
proper context. So if nobody objects I would just post the patch for
inclusion. If the compilation breaks we can think of a proper
implementation.

> 
> Thanks.
> 
> --->8---
> diff --git a/mm/page_ext.c b/mm/page_ext.c
> index 88ccc044..e3db259 100644
> --- a/mm/page_ext.c
> +++ b/mm/page_ext.c
> @@ -384,6 +384,7 @@ void __init page_ext_init(void)
>  
> for_each_node_state(nid, N_MEMORY) {
> unsigned long start_pfn, end_pfn;
> +   int page_nid;
>  
> start_pfn = node_start_pfn(nid);
> end_pfn = node_end_pfn(nid);
> @@ -405,8 +406,15 @@ void __init page_ext_init(void)
>  *
>  * Take into account DEFERRED_STRUCT_PAGE_INIT.
>  */
> -   if (early_pfn_to_nid(pfn) != nid)
> +#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
> +   page_nid = early_pfn_to_nid(pfn);
> +#else
> +   page_nid = pfn_to_nid(pfn);
> +#endif

I cannot say I would be happy about this ifdefery. Especially when there
is no existing user which would need it. 

> +
> +   if (page_nid != nid)
> continue;
> +
> if (init_section_page_ext(pfn, nid))
> goto oom;
> }

-- 
Michal Hocko
SUSE Labs

Re: [PATCH 07/14] qcom: mtd: nand: support for passing flags in transfer functions

2017-07-03 Thread Archit Taneja




On 06/29/2017 12:45 PM, Abhishek Sahu wrote:

The BAM has multiple flags to control the transfer. This patch
adds flags parameter in register and data transfer functions and
modifies all these function call with appropriate flags.

Signed-off-by: Abhishek Sahu 
---
  drivers/mtd/nand/qcom_nandc.c | 114 --
  1 file changed, 65 insertions(+), 49 deletions(-)

diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c
index 7042a65..65c9059 100644
--- a/drivers/mtd/nand/qcom_nandc.c
+++ b/drivers/mtd/nand/qcom_nandc.c
@@ -170,6 +170,14 @@
  #define   ECC_BCH_4BITBIT(2)
  #define   ECC_BCH_8BITBIT(3)
  
+/* Flags used for BAM DMA desc preparation*/

+/* Don't set the EOT in current tx sgl */
+#define NAND_BAM_NO_EOT(0x0001)
+/* Set the NWD flag in current sgl */
+#define NAND_BAM_NWD   (0x0002)
+/* Finish writing in the current sgl and start writing in another sgl */
+#define NAND_BAM_NEXT_SGL  (0x0004)
+
  #define QPIC_PER_CW_MAX_CMD_ELEMENTS  (32)
  #define QPIC_PER_CW_MAX_CMD_SGL   (32)
  #define QPIC_PER_CW_MAX_DATA_SGL  (8)
@@ -712,7 +720,7 @@ static int prep_dma_desc(struct qcom_nand_controller 
*nandc, bool read,
   * @num_regs: number of registers to read
   */
  static int read_reg_dma(struct qcom_nand_controller *nandc, int first,
-   int num_regs)
+   int num_regs, unsigned int flags)
  {
bool flow_control = false;
void *vaddr;
@@ -736,7 +744,7 @@ static int read_reg_dma(struct qcom_nand_controller *nandc, 
int first,
   * @num_regs: number of registers to write
   */
  static int write_reg_dma(struct qcom_nand_controller *nandc, int first,
-int num_regs)
+int num_regs, unsigned int flags)


Adding flags to read_reg_dma and write_reg_dma is making things a bit messy. I 
can't
think of a better way to share the code either, though.

One thing we could consider doing is something like below. I don't know if it 
would
make things more legible.

union nand_dma_props {
bool adm_flow_control;
unsigned int bam_flags;
};

config_cw_read()
{
union nand_dma_props dma_props;
...
...

if (is_bam)
dma_props.bam_flags = NAND_BAM_NWD;
else
dma_props.adm_flow_control = false;

write_reg_dma(nandc, NAND_EXEC_CMD, 1, &dma_props);
...
...
}

Thanks,
Archit


  {
bool flow_control = false;
struct nandc_regs *regs = nandc->regs;
@@ -748,6 +756,9 @@ static int write_reg_dma(struct qcom_nand_controller 
*nandc, int first,
if (first == NAND_FLASH_CMD)
flow_control = true;
  
+	if (first == NAND_EXEC_CMD)

+   flags |= NAND_BAM_NWD;
+
if (first == NAND_DEV_CMD1_RESTORE)
first = NAND_DEV_CMD1;
  
@@ -768,7 +779,7 @@ static int write_reg_dma(struct qcom_nand_controller *nandc, int first,

   * @size: DMA transaction size in bytes
   */
  static int read_data_dma(struct qcom_nand_controller *nandc, int reg_off,
-const u8 *vaddr, int size)
+const u8 *vaddr, int size, unsigned int flags)
  {
return prep_dma_desc(nandc, true, reg_off, vaddr, size, false);
  }
@@ -782,7 +793,7 @@ static int read_data_dma(struct qcom_nand_controller 
*nandc, int reg_off,
   * @size: DMA transaction size in bytes
   */
  static int write_data_dma(struct qcom_nand_controller *nandc, int reg_off,
- const u8 *vaddr, int size)
+ const u8 *vaddr, int size, unsigned int flags)
  {
return prep_dma_desc(nandc, false, reg_off, vaddr, size, false);
  }
@@ -793,14 +804,16 @@ static int write_data_dma(struct qcom_nand_controller 
*nandc, int reg_off,
   */
  static void config_cw_read(struct qcom_nand_controller *nandc)
  {
-   write_reg_dma(nandc, NAND_FLASH_CMD, 3);
-   write_reg_dma(nandc, NAND_DEV0_CFG0, 3);
-   write_reg_dma(nandc, NAND_EBI2_ECC_BUF_CFG, 1);
+   write_reg_dma(nandc, NAND_FLASH_CMD, 3, 0);
+   write_reg_dma(nandc, NAND_DEV0_CFG0, 3, 0);
+   write_reg_dma(nandc, NAND_EBI2_ECC_BUF_CFG, 1, 0);
  
-	write_reg_dma(nandc, NAND_EXEC_CMD, 1);

+   write_reg_dma(nandc, NAND_EXEC_CMD, 1,
+ NAND_BAM_NWD | NAND_BAM_NEXT_SGL);
  
-	read_reg_dma(nandc, NAND_FLASH_STATUS, 2);

-   read_reg_dma(nandc, NAND_ERASED_CW_DETECT_STATUS, 1);
+   read_reg_dma(nandc, NAND_FLASH_STATUS, 2, 0);
+   read_reg_dma(nandc, NAND_ERASED_CW_DETECT_STATUS, 1,
+NAND_BAM_NEXT_SGL);
  }
  
  /*

@@ -809,19 +822,20 @@ static void config_cw_read(struct qcom_nand_controller 
*nandc)
   */
  static void config_cw_write_pre(struct qcom_nand_controller *nandc)
  {
-   write_reg_dma(nandc, NAND_FLASH_CMD, 3);
-   write_reg_

Re: [PATCH 3/5] pwm: rockchip: Move the configuration of polarity from rockchip_pwm_set_enable() to rockchip_pwm_config()

2017-07-03 Thread David.Wu


Hi Boris,

在 2017/7/4 2:36, Boris Brezillon 写道:

Hm, maybe it's time to drop these custom hooks and implement
pwm_apply_v1 and pwm_apply_v2 instead.


Okay, drop the enable and config hooks, only use the apply hook to 
instead them.

Re: [RFC 0/5] drivers: Add boot constraints core

2017-07-03 Thread Viresh Kumar

On 03-07-17, 16:07, Mark Brown wrote:
> On Mon, Jul 03, 2017 at 11:45:52AM +0530, Viresh Kumar wrote:
> > The above regulator-min/max-microvolt values I mentioned were for the 
> > regulator
> > device and not what the consumers would request. Yes, DMA will request 
> > something
> 
> If you're putting the maximum possible range that the physical regulator
> can supply into machine constraints then you really haven't understood
> what machine constraints are at all.

I wasn't referring to the limits of the physical regulators but the min/max that
the consumers can set on a particular platform.

> No, it really shouldn't.  Please read what I wrote.

Sorry about that. Understood it now.

-- 
viresh

Re: centos 7.2，I got some oops form my production line

2017-07-03 Thread Xishi Qiu

On 2017/6/29 16:22, Xishi Qiu wrote:

> centos 7.2，I got some oops form my production line,
> Anybody has seen these errors before?
> 

Here is another one

[  703.025737] BUG: unable to handle kernel NULL pointer dereference at 
0d68
[  703.026008] IP: [] mlx4_en_QUERY_PORT+0xa2/0x190 [mlx4_en]
[  703.026008] PGD 377f2a067 PUD 379df4067 PMD 0 
[  703.026008] Oops: 0002 [#1] SMP 
[  703.033019] Modules linked in: sch_htb haek(OVE) squashfs loop binfmt_misc 
phram mtdblock mtd_blkdevs mtd zlib_deflate nf_log_ipv4 nf_log_common xt_LOG 
ipmi_watchdog ipmi_devintf ipmi_si ipmi_msghandler vfat fat bonding tipc 
kboxdriver(O) kbox(O) ipt_REJECT iptable_filter signo_catch(O) mlx4_ib(OVE) 
ib_sa(OVE) ib_mad(OVE) ib_core(OVE) mlx4_en(OVE) ib_addr(OVE) ib_netlink(OVE) 
vxlan ip6_udp_tunnel udp_tunnel ptp pps_core mlx4_core(OVE) compat(OVE) isofs 
crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper 
ablk_helper cryptd ppdev dm_mod parport_pc sg parport pcspkr i2c_piix4 i2c_core 
ip_tables ext3 mbcache jbd sr_mod cdrom ata_generic pata_acpi virtio_blk(OVE) 
virtio_console(OVE) kvm_ivshmem(OVE) crct10dif_pclmul crct10dif_common ata_piix 
crc32c_intel serio_raw libata pv_channel(OVE)
[  703.055064] mlx4_core :00:07.0: mlx4_dec_port_macs removed mac, port: 1, 
now: 0
[  703.033019]  virtio_pci(OVE) virtio_ring(OVE) virtio(OVE) floppy 
monitor_netdev(OE)
[  703.033019] CPU: 3 PID: 3038 Comm: kworker/3:2 Tainted: GW  OE  
V---   3.10.0-327.49.58.52.x86_64 #1
[  703.033019] Hardware name: OpenStack Foundation OpenStack Nova, BIOS 
rel-1.8.1-0-g4adadbd-2016_105425-HGH108200 04/01/2014
[  703.033019] Workqueue: events linkwatch_event
[  703.033019] task: 88041a9bf300 ti: 880412134000 task.ti: 
880412134000
[  703.066565] RIP: 0010:[]  [] 
mlx4_en_QUERY_PORT+0xa2/0x190 [mlx4_en]
[  703.066565] RSP: 0018:880412137bd0  EFLAGS: 00010a03
[  703.066565] RAX: 8800ba5bc000 RBX: 880410cd1000 RCX: 0038
[  703.066565] RDX: 0001 RSI: 0246 RDI: 88041472046c
[  703.074752] RBP: 880412137c10 R08: 81668be0 R09: 81dc63c0
[  703.074752] R10: 0400 R11: 0017 R12: 8803773eea20
[  703.074752] R13:  R14:  R15: 88041b5eb000
[  703.074752] FS:  () GS:880434ac() 
knlGS:
[  703.074752] CS:  0010 DS:  ES:  CR0: 80050033
[  703.074752] CR2: 0d68 CR3: 00036ff69000 CR4: 001407e0
[  703.086770] DR0:  DR1:  DR2: 
[  703.086770] DR3:  DR6: 0ff0 DR7: 0400
[  703.086770] Stack:
[  703.086770]  88040043 ea60 8804 
ba5bc000
[  703.086770]  880410ce 880412137cac 88041b5eb8c0 
880410ce08c0
[  703.086770]  880412137c78 a02a29a4 dead00200200 
4ff29e7e
[  703.086770] Call Trace:
[  703.086770]  [] mlx4_en_get_settings+0x34/0x540 [mlx4_en]
[  703.086770]  [] __ethtool_get_settings+0x86/0x140
[  703.104149]  [] bond_update_speed_duplex+0x3d/0x90 
[bonding]
[  703.104149]  [] bond_netdev_event+0x137/0x360 [bonding]
[  703.104149]  [] notifier_call_chain+0x4c/0x70
[  703.104149]  [] raw_notifier_call_chain+0x16/0x20
[  703.104149]  [] call_netdevice_notifiers+0x2d/0x60
[  703.104149]  [] netdev_state_change+0x23/0x40
[  703.104149]  [] linkwatch_do_dev+0x40/0x60
[  703.104149]  [] __linkwatch_run_queue+0xef/0x200
[  703.104149]  [] linkwatch_event+0x25/0x30
[  703.104149]  [] process_one_work+0x17b/0x470
[  703.104149]  [] worker_thread+0x11b/0x400
[  703.104149]  [] ? rescuer_thread+0x400/0x400
[  703.104149]  [] kthread+0xcf/0xe0
[  703.104149]  [] ? kthread_create_on_node+0x140/0x140
[  703.104149]  [] ret_from_fork+0x58/0x90
[  703.104149]  [] ? kthread_create_on_node+0x140/0x140
[  703.134274] Code: 48 8b 3b 4c 89 e6 e8 7e 7e f4 ff 48 83 c4 20 44 89 f0 5b 
41 5c 41 5d 41 5e 5d c3 66 0f 1f 44 00 00 49 8b 04 24 0f be 10 c1 ea 1f <41> 89 
95 68 0d 00 00 0f b6 50 05 83 e2 6f 80 fa 40 0f 87 b7 00 
[  703.134274] RIP  [] mlx4_en_QUERY_PORT+0xa2/0x190 [mlx4_en]
[  703.134274]  RSP 
[  703.134274] CR2: 0d68
[  703.134274] ---[ end trace 76a7da47a517c30b ]---
[  703.134274] Kernel panic - not syncing: Fatal exception


> 
> 1)
> 2017-06-28T02:18:16.461384+08:00[880983.488036] do nothing after die!
> 2017-06-28T02:18:16.462068+08:00[880983.488723] Modules linked in: fuse 
> iptable_filter sha512_generic icp_qa_al_vf(OVE) vfat fat isofs ext4 jbd2 xfs 
> libcrc32c kboxdriver(O) ipmi_devintf ipmi_si ipmi_msghandler kbox(O) 
> signo_catch(O) mlx4_core(OVE) compat(OVE) ppdev crc32_pclmul 
> ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd 
> pcspkr parport_pc i2c_piix4 parport i2c_core ip_tables ext3 mbcache jbd 
> ata_generic pata_acpi virtio_console(OVE) virtio_balloon(OVE) virtio_blk

[PATCH 4/4] selftests: ftrace: Output only to console with "--logdir -"

2017-07-03 Thread Masami Hiramatsu

Output logs only to console if "-" is given to --logdir
option. In this case, ftracetest doesn't record any log
on the disk, and all logs immediately shown (including
all command logs.) Since there is no "tee" in the middle
of command and console, it outputs the log really soon.

This option is useful only when the console is logged.

Signed-off-by: Masami Hiramatsu 
---
 tools/testing/selftests/ftrace/ftracetest |   29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/ftrace/ftracetest 
b/tools/testing/selftests/ftrace/ftracetest
index 892ca4e..25792ee 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -19,6 +19,7 @@ echo "-vvv   Alias of -v -v -v (Show all 
commands immediately)"
 echo " --fail-unsupported Treat UNSUPPORTED as a failure"
 echo " -d|--debug Debug mode (trace all shell commands)"
 echo " -l|--logdir  Save logs on the "
+echo " If  is -, all logs output in console only"
 exit $1
 }
 
@@ -127,14 +128,20 @@ if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then
 fi
 
 # Preparing logs
-LOG_FILE=$LOG_DIR/ftracetest.log
-mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
-date > $LOG_FILE
+if [ "x$LOG_DIR" = "x-" ]; then
+  LOG_FILE=
+  date
+else
+  LOG_FILE=$LOG_DIR/ftracetest.log
+  mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
+  date > $LOG_FILE
+fi
+
 prlog() { # messages
-  echo "$@" | tee -a $LOG_FILE
+  [ -z "$LOG_FILE" ] && echo "$@" || echo "$@" | tee -a $LOG_FILE
 }
 catlog() { #file
-  cat $1 | tee -a $LOG_FILE
+  [ -z "$LOG_FILE" ] && cat $1 || cat $1 | tee -a $LOG_FILE
 }
 prlog "=== Ftrace unit tests ==="
 
@@ -255,12 +262,18 @@ __run_test() { # testfile
 # Run one test case
 run_test() { # testfile
   local testname=`basename $1`
-  local testlog=`mktemp $LOG_DIR/${testname}-log.XX`
+  if [ "$LOG_FILE" ] ; then
+local testlog=`mktemp $LOG_DIR/${testname}-log.XX`
+  else
+local testlog=`/proc/self/fd/1`
+  fi
   export TMPDIR=`mktemp -d /tmp/ftracetest-dir.XX`
   testcase $1
   echo "execute$INSTANCE: "$1 > $testlog
   SIG_RESULT=0
-  if [ $VERBOSE -ge 3 ]; then
+  if [ -z "$LOG_FILE" ]; then
+__run_test $1 2>&1
+  elif [ $VERBOSE -ge 3 ]; then
 __run_test $1 | tee -a $testlog 2>&1
   elif [ $VERBOSE -eq 2 ]; then
 __run_test $1 2>> $testlog | tee -a $testlog
@@ -270,7 +283,7 @@ run_test() { # testfile
   eval_result $SIG_RESULT
   if [ $? -eq 0 ]; then
 # Remove test log if the test was done as it was expected.
-[ $KEEP_LOG -eq 0 ] && rm $testlog
+[ $KEEP_LOG -eq 0 -a "$LOG_FILE" ] && rm $testlog
   else
 [ $VERBOSE -eq 1 -o $VERBOSE -eq 2 ] && catlog $testlog
 TOTAL_RESULT=1

[PATCH 3/4] selftests: ftrace: Add more verbosity for immediate log

2017-07-03 Thread Masami Hiramatsu

Add 3-level verbosity for showing traced command log
on console immediately. Since some test cases can cause
kernel pacic if there is a probrem (like regression etc.),
we can not know which command caused the problem without
traced command log. This verbosity (-vvv) solves that
because it shows the log on console immediately. User
can get continuous command/error log.

Note that this is a kind of kernel debug mode, if you
don't see any kernel related issue, you don't need this
verbosity.

Signed-off-by: Masami Hiramatsu 
---
  Changes in v2:
- Do not show failure log on console again.
---
 tools/testing/selftests/ftrace/ftracetest |   10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/ftrace/ftracetest 
b/tools/testing/selftests/ftrace/ftracetest
index e033f54..892ca4e 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -15,6 +15,7 @@ echo "-h|--help  Show help message"
 echo " -k|--keep  Keep passed test logs"
 echo " -v|--verbose Increase verbosity of test messages"
 echo " -vvAlias of -v -v (Show all results in stdout)"
+echo " -vvv   Alias of -v -v -v (Show all commands immediately)"
 echo " --fail-unsupported Treat UNSUPPORTED as a failure"
 echo " -d|--debug Debug mode (trace all shell commands)"
 echo " -l|--logdir  Save logs on the "
@@ -57,9 +58,10 @@ parse_opts() { # opts
   KEEP_LOG=1
   shift 1
 ;;
---verbose|-v|-vv)
+--verbose|-v|-vv|-vvv)
   VERBOSE=$((VERBOSE + 1))
   [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1))
+  [ $1 = '-vvv' ] && VERBOSE=$((VERBOSE + 2))
   shift 1
 ;;
 --debug|-d)
@@ -258,7 +260,9 @@ run_test() { # testfile
   testcase $1
   echo "execute$INSTANCE: "$1 > $testlog
   SIG_RESULT=0
-  if [ $VERBOSE -ge 2 ]; then
+  if [ $VERBOSE -ge 3 ]; then
+__run_test $1 | tee -a $testlog 2>&1
+  elif [ $VERBOSE -eq 2 ]; then
 __run_test $1 2>> $testlog | tee -a $testlog
   else
 __run_test $1 >> $testlog 2>&1
@@ -268,7 +272,7 @@ run_test() { # testfile
 # Remove test log if the test was done as it was expected.
 [ $KEEP_LOG -eq 0 ] && rm $testlog
   else
-[ $VERBOSE -ge 1 ] && catlog $testlog
+[ $VERBOSE -eq 1 -o $VERBOSE -eq 2 ] && catlog $testlog
 TOTAL_RESULT=1
   fi
   rm -rf $TMPDIR

Re: [PATCH v4 2/3] arm64: kvm: route synchronous external abort exceptions to el2

2017-07-03 Thread gengdongjiu

Hi Christoffer,

On 2017/7/3 16:23, Christoffer Dall wrote:
> On Tue, Jun 27, 2017 at 08:15:49PM +0800, gengdongjiu wrote:
>> correct the commit message:
>>
>>  In the firmware-first RAS solution, OS receives an synchronous
>>  external abort, then trapped to EL3 by SCR_EL3.EA. Firmware inspects
>>  the HCR_EL2.TEA and chooses the target to send APEI's SEA notification.
>>  If the SCR_EL3.EA is set, delegates the error exception to the hypervisor,
>>  otherwise it delegates to the host OS kernel
> 
> This commit text has nothing (directly) to do with the content of the
> patch.  Whether or not seting these bits are used by firmware to emulate
> injecting an exception or by the CPU raising a an exception is not the
> core of the issue.
> 
> Please describe your change, then provide rationale.

(1)Below hcr_el2.TEA/TERR two field is introduced by armv8.2, RAS extension.

TEA, bit [37]
Route synchronous External Abort exceptions to EL2. The possible values 
of this bit are:
0 Do not route synchronous External Abort exceptions from Non-secure 
EL0 and EL1 to EL2.
1 Route synchronous External Abort exceptions from Non-secure EL0 and 
EL1 to EL2, if not routed
to EL3.
This bit is RES0 if the RAS extension is not implemented.
TERR, bit [36]
Trap Error record accesses. The possible values of this bit are:
0 Do not trap accesses to error record registers from Non-secure EL1 to 
EL2.
1 Accesses to the ER* registers from Non-secure EL1 generate a Trap 
exception to EL2.
This bit is RES0 if the RAS extension is not implemented.

(2) when synchronous External Abort(SEA) OS happen SEA, it trap to EL3 firmware.
then the firmware needs to do by faking an exception entry to  hypervisor EL2; 
or
by faking an exception entry to EL1
so if the hcr_el2.TEA is set, firmware will eret to EL2; otherwise, eret to EL1.
hcr_el2.TEA is only set for the guest OS.
not set for the host OS.

(3) setting hcr_el2.HCR_TERR want to trap the EL1 error record access to EL2.

> 
> Thanks,
> -Christoffer
> 
> 
>>
>>
>> On 2017/6/26 20:45, Dongjiu Geng wrote:
>>> In the firmware-first RAS solution, guest OS receives an synchronous
>>> external abort, then trapped to EL3 by SCR_EL3.EA. Firmware inspects
>>> the HCR_EL2.TEA and chooses the target to send APEI's SEA notification.
>>> If the SCR_EL3.EA is set, delegates the error exception to the hypervisor,
>>> otherwise it delegates to the guest OS kernel
>>>
>>> Signed-off-by: Dongjiu Geng 
>>> ---
>>>  arch/arm64/include/asm/kvm_arm.h | 2 ++
>>>  arch/arm64/include/asm/kvm_emulate.h | 7 +++
>>>  2 files changed, 9 insertions(+)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_arm.h 
>>> b/arch/arm64/include/asm/kvm_arm.h
>>> index 61d694c..1188272 100644
>>> --- a/arch/arm64/include/asm/kvm_arm.h
>>> +++ b/arch/arm64/include/asm/kvm_arm.h
>>> @@ -23,6 +23,8 @@
>>>  #include 
>>>  
>>>  /* Hyp Configuration Register (HCR) bits */
>>> +#define HCR_TEA(UL(1) << 37)
>>> +#define HCR_TERR   (UL(1) << 36)
>>>  #define HCR_E2H(UL(1) << 34)
>>>  #define HCR_ID (UL(1) << 33)
>>>  #define HCR_CD (UL(1) << 32)
>>> diff --git a/arch/arm64/include/asm/kvm_emulate.h 
>>> b/arch/arm64/include/asm/kvm_emulate.h
>>> index f5ea0ba..5f64ab2 100644
>>> --- a/arch/arm64/include/asm/kvm_emulate.h
>>> +++ b/arch/arm64/include/asm/kvm_emulate.h
>>> @@ -47,6 +47,13 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
>>> vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
>>> if (is_kernel_in_hyp_mode())
>>> vcpu->arch.hcr_el2 |= HCR_E2H;
>>> +   if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
>>> +   /* route synchronous external abort exceptions to EL2 */
>>> +   vcpu->arch.hcr_el2 |= HCR_TEA;
>>> +   /* trap error record accesses */
>>> +   vcpu->arch.hcr_el2 |= HCR_TERR;
>>> +   }
>>> +
>>> if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
>>> vcpu->arch.hcr_el2 &= ~HCR_RW;
>>>  }
>>>
>>
> 
> .
>

[PATCH 3/9] net, ipv6: convert inet6_ifaddr.refcnt from atomic_t to refcount_t

2017-07-03 Thread Elena Reshetova

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova 
Signed-off-by: Hans Liljestrand 
Signed-off-by: Kees Cook 
Signed-off-by: David Windsor 
---
 include/net/addrconf.h | 6 +++---
 include/net/if_inet6.h | 2 +-
 net/ipv6/addrconf.c| 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 620bd9a..6df79e9 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -350,18 +350,18 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp);
 
 static inline void in6_ifa_put(struct inet6_ifaddr *ifp)
 {
-   if (atomic_dec_and_test(&ifp->refcnt))
+   if (refcount_dec_and_test(&ifp->refcnt))
inet6_ifa_finish_destroy(ifp);
 }
 
 static inline void __in6_ifa_put(struct inet6_ifaddr *ifp)
 {
-   atomic_dec(&ifp->refcnt);
+   refcount_dec(&ifp->refcnt);
 }
 
 static inline void in6_ifa_hold(struct inet6_ifaddr *ifp)
 {
-   atomic_inc(&ifp->refcnt);
+   refcount_inc(&ifp->refcnt);
 }
 
 
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index e7a17b2..2b41cb8 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -46,7 +46,7 @@ struct inet6_ifaddr {
/* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */
__u32   valid_lft;
__u32   prefered_lft;
-   atomic_trefcnt;
+   refcount_t  refcnt;
spinlock_t  lock;
 
int state;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2365f12..3c46e95 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1050,7 +1050,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct 
in6_addr *addr,
 
ifa->idev = idev;
/* For caller */
-   in6_ifa_hold(ifa);
+   refcount_set(&ifa->refcnt, 1);
 
/* Add to big hash table */
hash = inet6_addr_hash(addr);
-- 
2.7.4

[PATCH 2/4] selftests: ftrace: Add --fail-unsupported option

2017-07-03 Thread Masami Hiramatsu

Add --fail-unsupported option to fail the test result if
ftracetest gets UNSUPPORTED result. UNSUPPORTED usually
happens when the kernel is old (e.g. stable tree) or some
kernel feature is disabled.

However, if newer kernel has any bug or regression, it
can make test results in UNSUPPORTED too. This option
can detect such kernel regression.

Signed-off-by: Masami Hiramatsu 
---
 tools/testing/selftests/ftrace/ftracetest |8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/ftrace/ftracetest 
b/tools/testing/selftests/ftrace/ftracetest
index 290cd42..e033f54 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -15,6 +15,7 @@ echo "-h|--help  Show help message"
 echo " -k|--keep  Keep passed test logs"
 echo " -v|--verbose Increase verbosity of test messages"
 echo " -vvAlias of -v -v (Show all results in stdout)"
+echo " --fail-unsupported Treat UNSUPPORTED as a failure"
 echo " -d|--debug Debug mode (trace all shell commands)"
 echo " -l|--logdir  Save logs on the "
 exit $1
@@ -65,6 +66,10 @@ parse_opts() { # opts
   DEBUG=1
   shift 1
 ;;
+--fail-unsupported)
+  UNSUPPORTED_RESULT=1
+  shift 1
+;;
 --logdir|-l)
   LOG_DIR=$2
   shift 2
@@ -108,6 +113,7 @@ LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/
 KEEP_LOG=0
 DEBUG=0
 VERBOSE=0
+UNSUPPORTED_RESULT=0
 # Parse command-line options
 parse_opts $*
 
@@ -187,7 +193,7 @@ eval_result() { # sigval
 $UNSUPPORTED)
   prlog "  [UNSUPPORTED]"
   UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
-  return 0 # this is not a bug.
+  return $UNSUPPORTED_RESULT # depends on use case
 ;;
 $XFAIL)
   prlog "  [XFAIL]"

[PATCH 4/9] net, ipv6: convert ifmcaddr6.mca_refcnt from atomic_t to refcount_t

2017-07-03 Thread Elena Reshetova

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova 
Signed-off-by: Hans Liljestrand 
Signed-off-by: Kees Cook 
Signed-off-by: David Windsor 
---
 include/net/if_inet6.h |  2 +-
 net/ipv6/mcast.c   | 18 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 2b41cb8..4bb52ce 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -127,7 +127,7 @@ struct ifmcaddr6 {
struct timer_list   mca_timer;
unsigned intmca_flags;
int mca_users;
-   atomic_tmca_refcnt;
+   refcount_t  mca_refcnt;
spinlock_t  mca_lock;
unsigned long   mca_cstamp;
unsigned long   mca_tstamp;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index e222113..12b7c27 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -701,7 +701,7 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 
spin_lock_bh(&mc->mca_lock);
if (del_timer(&mc->mca_timer))
-   atomic_dec(&mc->mca_refcnt);
+   refcount_dec(&mc->mca_refcnt);
spin_unlock_bh(&mc->mca_lock);
 }
 
@@ -819,12 +819,12 @@ static void mld_clear_delrec(struct inet6_dev *idev)
 
 static void mca_get(struct ifmcaddr6 *mc)
 {
-   atomic_inc(&mc->mca_refcnt);
+   refcount_inc(&mc->mca_refcnt);
 }
 
 static void ma_put(struct ifmcaddr6 *mc)
 {
-   if (atomic_dec_and_test(&mc->mca_refcnt)) {
+   if (refcount_dec_and_test(&mc->mca_refcnt)) {
in6_dev_put(mc->idev);
kfree(mc);
}
@@ -846,7 +846,7 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
mc->mca_users = 1;
/* mca_stamp should be updated upon changes */
mc->mca_cstamp = mc->mca_tstamp = jiffies;
-   atomic_set(&mc->mca_refcnt, 1);
+   refcount_set(&mc->mca_refcnt, 1);
spin_lock_init(&mc->mca_lock);
 
/* initial mode is (EX, empty) */
@@ -1065,7 +1065,7 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, 
unsigned long resptime)
return;
 
if (del_timer(&ma->mca_timer)) {
-   atomic_dec(&ma->mca_refcnt);
+   refcount_dec(&ma->mca_refcnt);
delay = ma->mca_timer.expires - jiffies;
}
 
@@ -1074,7 +1074,7 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, 
unsigned long resptime)
 
ma->mca_timer.expires = jiffies + delay;
if (!mod_timer(&ma->mca_timer, jiffies + delay))
-   atomic_inc(&ma->mca_refcnt);
+   refcount_inc(&ma->mca_refcnt);
ma->mca_flags |= MAF_TIMER_RUNNING;
 }
 
@@ -1469,7 +1469,7 @@ int igmp6_event_report(struct sk_buff *skb)
if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) {
spin_lock(&ma->mca_lock);
if (del_timer(&ma->mca_timer))
-   atomic_dec(&ma->mca_refcnt);
+   refcount_dec(&ma->mca_refcnt);
ma->mca_flags &= ~(MAF_LAST_REPORTER|MAF_TIMER_RUNNING);
spin_unlock(&ma->mca_lock);
break;
@@ -2391,12 +2391,12 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
 
spin_lock_bh(&ma->mca_lock);
if (del_timer(&ma->mca_timer)) {
-   atomic_dec(&ma->mca_refcnt);
+   refcount_dec(&ma->mca_refcnt);
delay = ma->mca_timer.expires - jiffies;
}
 
if (!mod_timer(&ma->mca_timer, jiffies + delay))
-   atomic_inc(&ma->mca_refcnt);
+   refcount_inc(&ma->mca_refcnt);
ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER;
spin_unlock_bh(&ma->mca_lock);
 }
-- 
2.7.4

[PATCH 8/9] net, ipv4: convert cipso_v4_doi.refcount from atomic_t to refcount_t

2017-07-03 Thread Elena Reshetova

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova 
Signed-off-by: Hans Liljestrand 
Signed-off-by: Kees Cook 
Signed-off-by: David Windsor 
---
 include/net/cipso_ipv4.h |  3 ++-
 net/ipv4/cipso_ipv4.c| 12 ++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index a34b141..880adb2 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /* known doi values */
@@ -85,7 +86,7 @@ struct cipso_v4_doi {
} map;
u8 tags[CIPSO_V4_TAG_MAXCNT];
 
-   atomic_t refcount;
+   refcount_t refcount;
struct list_head list;
struct rcu_head rcu;
 };
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index c204477..c4c6e19 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -375,7 +375,7 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
struct cipso_v4_doi *iter;
 
list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
-   if (iter->doi == doi && atomic_read(&iter->refcount))
+   if (iter->doi == doi && refcount_read(&iter->refcount))
return iter;
return NULL;
 }
@@ -429,7 +429,7 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def,
}
}
 
-   atomic_set(&doi_def->refcount, 1);
+   refcount_set(&doi_def->refcount, 1);
 
spin_lock(&cipso_v4_doi_list_lock);
if (cipso_v4_doi_search(doi_def->doi)) {
@@ -533,7 +533,7 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit 
*audit_info)
ret_val = -ENOENT;
goto doi_remove_return;
}
-   if (!atomic_dec_and_test(&doi_def->refcount)) {
+   if (!refcount_dec_and_test(&doi_def->refcount)) {
spin_unlock(&cipso_v4_doi_list_lock);
ret_val = -EBUSY;
goto doi_remove_return;
@@ -576,7 +576,7 @@ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
doi_def = cipso_v4_doi_search(doi);
if (!doi_def)
goto doi_getdef_return;
-   if (!atomic_inc_not_zero(&doi_def->refcount))
+   if (!refcount_inc_not_zero(&doi_def->refcount))
doi_def = NULL;
 
 doi_getdef_return:
@@ -597,7 +597,7 @@ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def)
if (!doi_def)
return;
 
-   if (!atomic_dec_and_test(&doi_def->refcount))
+   if (!refcount_dec_and_test(&doi_def->refcount))
return;
spin_lock(&cipso_v4_doi_list_lock);
list_del_rcu(&doi_def->list);
@@ -630,7 +630,7 @@ int cipso_v4_doi_walk(u32 *skip_cnt,
 
rcu_read_lock();
list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list)
-   if (atomic_read(&iter_doi->refcount) > 0) {
+   if (refcount_read(&iter_doi->refcount) > 0) {
if (doi_cnt++ < *skip_cnt)
continue;
ret_val = callback(iter_doi, cb_arg);
-- 
2.7.4

[PATCH 0/4] selftests: ftrace: ftracetest improvements

2017-07-03 Thread Masami Hiramatsu

Hello,

Here is v2 of ftracetest improvements, including test
return code change and immediate logging features.

The first version is here.
https://patchwork.kernel.org/patch/9821943/
https://patchwork.kernel.org/patch/9821945/

This version adds 2 patches according discussions on
previous version. [2/4] adds --fail-unsupported option
which makes UNSUPPORTED result failure. [4/4] adds
"--logdir -" which logs all results in console but
no file.

Changes in v2:
 - [2/4]: (new) adds --fail-unsupported option
 - [3/4]: Fix not to show failure log twice
 - [4/4]: (new) adds --logdir "-" option so that
  all log goes to console directly.

Thank you,

---

Masami Hiramatsu (4):
  selftests: ftrace: Do not failure if there is unsupported tests
  selftests: ftrace: Add --fail-unsupported option
  selftests: ftrace: Add more verbosity for immediate log
  selftests: ftrace: Output only to console with "--logdir -"


 tools/testing/selftests/ftrace/ftracetest |   45 ++---
 1 file changed, 34 insertions(+), 11 deletions(-)

--
Masami Hiramatsu (Linaro)

[PATCH 1/4] selftests: ftrace: Do not failure if there is unsupported tests

2017-07-03 Thread Masami Hiramatsu

Do not return failure exit code (1) for unsupported testcases,
since it is expected for stable kernels.

Previously, ftracetest is expected to run only on current
release for avoiding regressions. However, nowadays we run
it on stable kernels. This means some test cases must return
unsupported result. In such case, we should NOT exit
ftracetest with error status for unsupported results so that
kselftest (upper tests wrapper) shows it passed correctly.

Note that we continue to treat unresolved results as failure,
if test writers would like to notice user that the test result
should be reviewed, they can use exit_unresolved.

Signed-off-by: Masami Hiramatsu 
---
 tools/testing/selftests/ftrace/ftracetest |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/ftrace/ftracetest 
b/tools/testing/selftests/ftrace/ftracetest
index 14a03ea..290cd42 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -187,7 +187,7 @@ eval_result() { # sigval
 $UNSUPPORTED)
   prlog "  [UNSUPPORTED]"
   UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
-  return 1 # this is not a bug, but the result should be reported.
+  return 0 # this is not a bug.
 ;;
 $XFAIL)
   prlog "  [XFAIL]"

[PATCH 7/9] net, ipv6: convert ip6addrlbl_entry.refcnt from atomic_t to refcount_t

2017-07-03 Thread Elena Reshetova

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova 
Signed-off-by: Hans Liljestrand 
Signed-off-by: Kees Cook 
Signed-off-by: David Windsor 
---
 net/ipv6/addrlabel.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 07cd7d2..7a428f6 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #if 0
 #define ADDRLABEL(x...) printk(x)
@@ -36,7 +37,7 @@ struct ip6addrlbl_entry {
int addrtype;
u32 label;
struct hlist_node list;
-   atomic_t refcnt;
+   refcount_t refcnt;
struct rcu_head rcu;
 };
 
@@ -137,12 +138,12 @@ static void ip6addrlbl_free_rcu(struct rcu_head *h)
 
 static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p)
 {
-   return atomic_inc_not_zero(&p->refcnt);
+   return refcount_inc_not_zero(&p->refcnt);
 }
 
 static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
 {
-   if (atomic_dec_and_test(&p->refcnt))
+   if (refcount_dec_and_test(&p->refcnt))
call_rcu(&p->rcu, ip6addrlbl_free_rcu);
 }
 
@@ -236,7 +237,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net 
*net,
newp->label = label;
INIT_HLIST_NODE(&newp->list);
write_pnet(&newp->lbl_net, net);
-   atomic_set(&newp->refcnt, 1);
+   refcount_set(&newp->refcnt, 1);
return newp;
 }
 
-- 
2.7.4

[PATCH 9/9] net, ipv4: convert fib_info.fib_clntref from atomic_t to refcount_t

2017-07-03 Thread Elena Reshetova

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova 
Signed-off-by: Hans Liljestrand 
Signed-off-by: Kees Cook 
Signed-off-by: David Windsor 
---
 include/net/ip_fib.h | 7 ---
 net/ipv4/fib_semantics.c | 2 +-
 net/ipv4/fib_trie.c  | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 3dbfd5e..41d580c 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct fib_config {
u8  fc_dst_len;
@@ -105,7 +106,7 @@ struct fib_info {
struct hlist_node   fib_lhash;
struct net  *fib_net;
int fib_treeref;
-   atomic_tfib_clntref;
+   refcount_t  fib_clntref;
unsigned intfib_flags;
unsigned char   fib_dead;
unsigned char   fib_protocol;
@@ -430,12 +431,12 @@ void free_fib_info(struct fib_info *fi);
 
 static inline void fib_info_hold(struct fib_info *fi)
 {
-   atomic_inc(&fi->fib_clntref);
+   refcount_inc(&fi->fib_clntref);
 }
 
 static inline void fib_info_put(struct fib_info *fi)
 {
-   if (atomic_dec_and_test(&fi->fib_clntref))
+   if (refcount_dec_and_test(&fi->fib_clntref))
free_fib_info(fi);
 }
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ff47ea1..22210010 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1253,7 +1253,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
}
 
fi->fib_treeref++;
-   atomic_inc(&fi->fib_clntref);
+   refcount_set(&fi->fib_clntref, 1);
spin_lock_bh(&fib_info_lock);
hlist_add_head(&fi->fib_hash,
   &fib_info_hash[fib_info_hashfn(fi)]);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d56659e..64668c6 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1463,7 +1463,7 @@ int fib_table_lookup(struct fib_table *tb, const struct 
flowi4 *flp,
}
 
if (!(fib_flags & FIB_LOOKUP_NOREF))
-   atomic_inc(&fi->fib_clntref);
+   refcount_inc(&fi->fib_clntref);
 
res->prefix = htonl(n->key);
res->prefixlen = KEYLENGTH - fa->fa_slen;
-- 
2.7.4

[PATCH 6/9] net, ipv6: convert xfrm6_tunnel_spi.refcnt from atomic_t to refcount_t

2017-07-03 Thread Elena Reshetova

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova 
Signed-off-by: Hans Liljestrand 
Signed-off-by: Kees Cook 
Signed-off-by: David Windsor 
---
 net/ipv6/xfrm6_tunnel.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index d7b731a..4e438bc 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -59,7 +59,7 @@ struct xfrm6_tunnel_spi {
struct hlist_node   list_byspi;
xfrm_address_t  addr;
u32 spi;
-   atomic_trefcnt;
+   refcount_t  refcnt;
struct rcu_head rcu_head;
 };
 
@@ -160,7 +160,7 @@ static u32 __xfrm6_tunnel_alloc_spi(struct net *net, 
xfrm_address_t *saddr)
 
memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr));
x6spi->spi = spi;
-   atomic_set(&x6spi->refcnt, 1);
+   refcount_set(&x6spi->refcnt, 1);
 
hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tn->spi_byspi[index]);
 
@@ -178,7 +178,7 @@ __be32 xfrm6_tunnel_alloc_spi(struct net *net, 
xfrm_address_t *saddr)
spin_lock_bh(&xfrm6_tunnel_spi_lock);
x6spi = __xfrm6_tunnel_spi_lookup(net, saddr);
if (x6spi) {
-   atomic_inc(&x6spi->refcnt);
+   refcount_inc(&x6spi->refcnt);
spi = x6spi->spi;
} else
spi = __xfrm6_tunnel_alloc_spi(net, saddr);
@@ -207,7 +207,7 @@ static void xfrm6_tunnel_free_spi(struct net *net, 
xfrm_address_t *saddr)
  list_byaddr)
{
if (xfrm6_addr_equal(&x6spi->addr, saddr)) {
-   if (atomic_dec_and_test(&x6spi->refcnt)) {
+   if (refcount_dec_and_test(&x6spi->refcnt)) {
hlist_del_rcu(&x6spi->list_byaddr);
hlist_del_rcu(&x6spi->list_byspi);
call_rcu(&x6spi->rcu_head, x6spi_destroy_rcu);
-- 
2.7.4

[PATCH 0/9] v2 ipv4/ipv6 refcount conversions

2017-07-03 Thread Elena Reshetova

Changes in v2:
 * rebase on top of net-next
 * currently by default refcount_t = atomic_t (*) and uses all 
   atomic standard operations unless CONFIG_REFCOUNT_FULL is enabled.
   This is a compromise for the systems that are critical on
   performance (such as net) and cannot accept even slight delay
   on the refcounter operations.

This series, for ipv4/ipv6 network components, replaces atomic_t reference
counters with the new refcount_t type and API (see include/linux/refcount.h).
By doing this we prevent intentional or accidental
underflows or overflows that can led to use-after-free vulnerabilities.

The patches are fully independent and can be cherry-picked separately.
In order to try with refcount functionality enabled in run-time,
CONFIG_REFCOUNT_FULL must be enabled.

NOTE: automatic kernel builder for some reason doesn't like all my
network branches and regularly times out the builds on these branches.
Suggestion for "waiting a day for a good coverage" doesn't work, as
we have seen with generic network conversions. So please wait for the
full report from kernel test rebot before merging further up.
This has been compile-tested in 116 configs, but 71 timed out (including
all s390-related configs again). I am trying to see if they can fix
build coverage for me in meanwhile.

* The respective change is currently merged into -next as
  "locking/refcount: Create unchecked atomic_t implementation".

Elena Reshetova (9):
  net, ipv6: convert ipv6_txoptions.refcnt from atomic_t to refcount_t
  net, ipv6: convert inet6_dev.refcnt from atomic_t to refcount_t
  net, ipv6: convert inet6_ifaddr.refcnt from atomic_t to refcount_t
  net, ipv6: convert ifmcaddr6.mca_refcnt from atomic_t to refcount_t
  net, ipv6: convert ifacaddr6.aca_refcnt from atomic_t to refcount_t
  net, ipv6: convert xfrm6_tunnel_spi.refcnt from atomic_t to refcount_t
  net, ipv6: convert ip6addrlbl_entry.refcnt from atomic_t to refcount_t
  net, ipv4: convert cipso_v4_doi.refcount from atomic_t to refcount_t
  net, ipv4: convert fib_info.fib_clntref from atomic_t to refcount_t

 include/net/addrconf.h   | 14 +++---
 include/net/cipso_ipv4.h |  3 ++-
 include/net/if_inet6.h   |  9 +
 include/net/ip_fib.h |  7 ---
 include/net/ipv6.h   |  7 ---
 net/ipv4/cipso_ipv4.c| 12 ++--
 net/ipv4/fib_semantics.c |  2 +-
 net/ipv4/fib_trie.c  |  2 +-
 net/ipv6/addrconf.c  |  4 ++--
 net/ipv6/addrlabel.c |  9 +
 net/ipv6/anycast.c   |  6 +++---
 net/ipv6/exthdrs.c   |  4 ++--
 net/ipv6/ipv6_sockglue.c |  2 +-
 net/ipv6/mcast.c | 18 +-
 net/ipv6/xfrm6_tunnel.c  |  8 
 15 files changed, 56 insertions(+), 51 deletions(-)

-- 
2.7.4

[PATCH 5/9] net, ipv6: convert ifacaddr6.aca_refcnt from atomic_t to refcount_t

2017-07-03 Thread Elena Reshetova

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova 
Signed-off-by: Hans Liljestrand 
Signed-off-by: Kees Cook 
Signed-off-by: David Windsor 
---
 include/net/if_inet6.h | 2 +-
 net/ipv6/anycast.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 4bb52ce..d4088d1 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -147,7 +147,7 @@ struct ifacaddr6 {
struct rt6_info *aca_rt;
struct ifacaddr6*aca_next;
int aca_users;
-   atomic_taca_refcnt;
+   refcount_t  aca_refcnt;
unsigned long   aca_cstamp;
unsigned long   aca_tstamp;
 };
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 514ac25..0bbab8a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -203,12 +203,12 @@ void ipv6_sock_ac_close(struct sock *sk)
 
 static void aca_get(struct ifacaddr6 *aca)
 {
-   atomic_inc(&aca->aca_refcnt);
+   refcount_inc(&aca->aca_refcnt);
 }
 
 static void aca_put(struct ifacaddr6 *ac)
 {
-   if (atomic_dec_and_test(&ac->aca_refcnt)) {
+   if (refcount_dec_and_test(&ac->aca_refcnt)) {
in6_dev_put(ac->aca_idev);
dst_release(&ac->aca_rt->dst);
kfree(ac);
@@ -232,7 +232,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
aca->aca_users = 1;
/* aca_tstamp should be updated upon changes */
aca->aca_cstamp = aca->aca_tstamp = jiffies;
-   atomic_set(&aca->aca_refcnt, 1);
+   refcount_set(&aca->aca_refcnt, 1);
 
return aca;
 }
-- 
2.7.4

[PATCH 1/9] net, ipv6: convert ipv6_txoptions.refcnt from atomic_t to refcount_t

2017-07-03 Thread Elena Reshetova

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova 
Signed-off-by: Hans Liljestrand 
Signed-off-by: Kees Cook 
Signed-off-by: David Windsor 
---
 include/net/ipv6.h   | 7 ---
 net/ipv6/exthdrs.c   | 4 ++--
 net/ipv6/ipv6_sockglue.c | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 3e505bb..6eac5cf 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -203,7 +204,7 @@ extern rwlock_t ip6_ra_lock;
  */
 
 struct ipv6_txoptions {
-   atomic_trefcnt;
+   refcount_t  refcnt;
/* Length of this structure */
int tot_len;
 
@@ -265,7 +266,7 @@ static inline struct ipv6_txoptions *txopt_get(const struct 
ipv6_pinfo *np)
rcu_read_lock();
opt = rcu_dereference(np->opt);
if (opt) {
-   if (!atomic_inc_not_zero(&opt->refcnt))
+   if (!refcount_inc_not_zero(&opt->refcnt))
opt = NULL;
else
opt = rcu_pointer_handoff(opt);
@@ -276,7 +277,7 @@ static inline struct ipv6_txoptions *txopt_get(const struct 
ipv6_pinfo *np)
 
 static inline void txopt_put(struct ipv6_txoptions *opt)
 {
-   if (opt && atomic_dec_and_test(&opt->refcnt))
+   if (opt && refcount_dec_and_test(&opt->refcnt))
kfree_rcu(opt, rcu);
 }
 
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 0460af22..4996d73 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -971,7 +971,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions 
*opt)
*((char **)&opt2->dst1opt) += dif;
if (opt2->srcrt)
*((char **)&opt2->srcrt) += dif;
-   atomic_set(&opt2->refcnt, 1);
+   refcount_set(&opt2->refcnt, 1);
}
return opt2;
 }
@@ -1056,7 +1056,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions 
*opt,
return ERR_PTR(-ENOBUFS);
 
memset(opt2, 0, tot_len);
-   atomic_set(&opt2->refcnt, 1);
+   refcount_set(&opt2->refcnt, 1);
opt2->tot_len = tot_len;
p = (char *)(opt2 + 1);
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a531ba0..85404e7 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -505,7 +505,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, 
int optname,
break;
 
memset(opt, 0, sizeof(*opt));
-   atomic_set(&opt->refcnt, 1);
+   refcount_set(&opt->refcnt, 1);
opt->tot_len = sizeof(*opt) + optlen;
retv = -EFAULT;
if (copy_from_user(opt+1, optval, optlen))
-- 
2.7.4

[PATCH 2/9] net, ipv6: convert inet6_dev.refcnt from atomic_t to refcount_t

2017-07-03 Thread Elena Reshetova

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova 
Signed-off-by: Hans Liljestrand 
Signed-off-by: Kees Cook 
Signed-off-by: David Windsor 
---
 include/net/addrconf.h | 8 
 include/net/if_inet6.h | 3 ++-
 net/ipv6/addrconf.c| 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index d0889cb..620bd9a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -316,7 +316,7 @@ static inline struct inet6_dev *in6_dev_get(const struct 
net_device *dev)
rcu_read_lock();
idev = rcu_dereference(dev->ip6_ptr);
if (idev)
-   atomic_inc(&idev->refcnt);
+   refcount_inc(&idev->refcnt);
rcu_read_unlock();
return idev;
 }
@@ -332,18 +332,18 @@ void in6_dev_finish_destroy(struct inet6_dev *idev);
 
 static inline void in6_dev_put(struct inet6_dev *idev)
 {
-   if (atomic_dec_and_test(&idev->refcnt))
+   if (refcount_dec_and_test(&idev->refcnt))
in6_dev_finish_destroy(idev);
 }
 
 static inline void __in6_dev_put(struct inet6_dev *idev)
 {
-   atomic_dec(&idev->refcnt);
+   refcount_dec(&idev->refcnt);
 }
 
 static inline void in6_dev_hold(struct inet6_dev *idev)
 {
-   atomic_inc(&idev->refcnt);
+   refcount_inc(&idev->refcnt);
 }
 
 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index f656f90..e7a17b2 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -17,6 +17,7 @@
 
 #include 
 #include 
+#include 
 
 /* inet6_dev.if_flags */
 
@@ -187,7 +188,7 @@ struct inet6_dev {
 
struct ifacaddr6*ac_list;
rwlock_tlock;
-   atomic_trefcnt;
+   refcount_t  refcnt;
__u32   if_flags;
int dead;
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 114fb64..2365f12 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -426,7 +426,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device 
*dev)
}
 
/* One reference from device. */
-   in6_dev_hold(ndev);
+   refcount_set(&ndev->refcnt, 1);
 
if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
ndev->cnf.accept_dad = -1;
-- 
2.7.4

Re: [PATCH v3] acpi: configfs: Unload SSDT on configfs entry removal

2017-07-03 Thread Jan Kiszka

On 2017-06-09 20:36, Jan Kiszka wrote:
> Call directly into acpica to load a table to obtain its index on return.
> We choose the direct call of acpica internal functions to avoid having
> to modify its API which is used outside of Linux as well.
> 
> Use that index to unload the table again when the corresponding
> directory in configfs gets removed. This allows to change SSDTs without
> rebooting the system. It also allows to destroy devices again that a
> dynamically loaded SSDT created.
> 
> This is widely similar to the DT overlay behavior.
> 
> Signed-off-by: Jan Kiszka 
> ---
> 
> Change in v3:
>  - fix breakage if acpi_configfs is modular
> 
>  drivers/acpi/acpi_configfs.c | 20 +++-
>  drivers/acpi/acpica/tbdata.c |  4 
>  2 files changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/acpi/acpi_configfs.c b/drivers/acpi/acpi_configfs.c
> index 146a77fb762d..853bc7fc673f 100644
> --- a/drivers/acpi/acpi_configfs.c
> +++ b/drivers/acpi/acpi_configfs.c
> @@ -15,11 +15,15 @@
>  #include 
>  #include 
>  
> +#include "acpica/accommon.h"
> +#include "acpica/actables.h"
> +
>  static struct config_group *acpi_table_group;
>  
>  struct acpi_table {
>   struct config_item cfg;
>   struct acpi_table_header *header;
> + u32 index;
>  };
>  
>  static ssize_t acpi_table_aml_write(struct config_item *cfg,
> @@ -52,7 +56,11 @@ static ssize_t acpi_table_aml_write(struct config_item 
> *cfg,
>   if (!table->header)
>   return -ENOMEM;
>  
> - ret = acpi_load_table(table->header);
> + ACPI_INFO(("Host-directed Dynamic ACPI Table Load:"));
> + ret = acpi_tb_install_and_load_table(
> + ACPI_PTR_TO_PHYSADDR(table->header),
> + ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL, FALSE,
> + &table->index);
>   if (ret) {
>   kfree(table->header);
>   table->header = NULL;
> @@ -215,8 +223,18 @@ static struct config_item *acpi_table_make_item(struct 
> config_group *group,
>   return &table->cfg;
>  }
>  
> +static void acpi_table_drop_item(struct config_group *group,
> +  struct config_item *cfg)
> +{
> + struct acpi_table *table = container_of(cfg, struct acpi_table, cfg);
> +
> + ACPI_INFO(("Host-directed Dynamic ACPI Table Unload"));
> + acpi_tb_unload_table(table->index);
> +}
> +
>  struct configfs_group_operations acpi_table_group_ops = {
>   .make_item = acpi_table_make_item,
> + .drop_item = acpi_table_drop_item,
>  };
>  
>  static struct config_item_type acpi_tables_type = {
> diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c
> index 27c5c27d4818..c9d6fa6d7cc6 100644
> --- a/drivers/acpi/acpica/tbdata.c
> +++ b/drivers/acpi/acpica/tbdata.c
> @@ -867,6 +867,8 @@ acpi_tb_install_and_load_table(acpi_physical_address 
> address,
>   return_ACPI_STATUS(status);
>  }
>  
> +ACPI_EXPORT_SYMBOL(acpi_tb_install_and_load_table)
> +
>  
> /***
>   *
>   * FUNCTION:acpi_tb_unload_table
> @@ -914,3 +916,5 @@ acpi_status acpi_tb_unload_table(u32 table_index)
>   acpi_tb_set_table_loaded_flag(table_index, FALSE);
>   return_ACPI_STATUS(status);
>  }
> +
> +ACPI_EXPORT_SYMBOL(acpi_tb_unload_table)
> 

Ping for this patch.

Jan

-- 
Siemens AG, Corporate Technology, CT RDA ITP SES-DE
Corporate Competence Center Embedded Linux

Re: [PATCH 06/14] qcom: mtd: nand: add bam dma descriptor handling

2017-07-03 Thread Archit Taneja




On 06/29/2017 12:45 PM, Abhishek Sahu wrote:

1. prepare_bam_async_desc is the function which will call
all the DMA API’s. It will fetch the outstanding scatter gather
list for passed channel and will do the DMA descriptor formation.
The DMA flag is dependent upon the type of channel.

2. For ADM DMA, the descriptor is being formed for every DMA
request so its sgl count will be always 1 while in BAM DMA, the
clubbing of descriptor is being done to increase throughput.

3. ADM uses only one channel while in BAM, data descriptors
will be submitted to tx channel (for write) or rx channel
(for read) and all the registers read/write descriptors in
command channel.

Signed-off-by: Abhishek Sahu 
---
  drivers/mtd/nand/qcom_nandc.c | 119 --
  1 file changed, 114 insertions(+), 5 deletions(-)

diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c
index f8d0bde..7042a65 100644
--- a/drivers/mtd/nand/qcom_nandc.c
+++ b/drivers/mtd/nand/qcom_nandc.c
@@ -206,14 +206,22 @@ struct bam_transaction {
   * This data type corresponds to the nand dma descriptor
   * @list - list for desc_info
   * @dir - DMA transfer direction
- * @sgl - sgl which will be used for single sgl dma descriptor
+ * @sgl - sgl which will be used for single sgl dma descriptor. Only used by 
ADM
+ * @bam_sgl - sgl which will be used for dma descriptor. Only used by BAM
+ * @sgl_cnt - number of SGL in bam_sgl. Only used by BAM
   * @dma_desc - low level dma engine descriptor
   */
  struct desc_info {
struct list_head node;
  
  	enum dma_data_direction dir;

-   struct scatterlist sgl;
+   union {
+   struct scatterlist sgl;


Can you make this adm_sgl instead for consistency? Also, please use only
two tabs instead of one here for indentation.


+   struct {
+   struct scatterlist *bam_sgl;
+   int sgl_cnt;
+   };
+   };
struct dma_async_tx_descriptor *dma_desc;
  };
  
@@ -564,6 +572,68 @@ static void update_rw_regs(struct qcom_nand_host *host, int num_cw, bool read)

nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
  }
  
+/*

+ * Maps the scatter gather list for DMA transfer and forms the DMA descriptor
+ * for BAM. This descriptor will be added in the NAND DMA descriptor queue
+ * which will be submitted to DMA engine.
+ */
+static int prepare_bam_async_desc(struct qcom_nand_controller *nandc,
+ struct dma_chan *chan,
+ unsigned long flags)


From what I gathered in patch #10, this would be called by
prep_dma_desc_data_bam() and prep_dma_desc_command(). Can you rename these
two to something like prep_bam_dma_desc_data() and prep_bam_dma_desc_cmd()



+{
+   struct desc_info *desc;
+   struct scatterlist *sgl;
+   unsigned int sgl_cnt;
+   struct bam_transaction *bam_txn = nandc->bam_txn;
+   enum dma_transfer_direction dir_eng;
+   struct dma_async_tx_descriptor *dma_desc;
+
+   desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+   if (!desc)
+   return -ENOMEM;
+
+   if (chan == nandc->cmd_chan) {
+   sgl = &bam_txn->cmd_sgl[bam_txn->cmd_sgl_start];
+   sgl_cnt = bam_txn->cmd_sgl_pos - bam_txn->cmd_sgl_start;
+   bam_txn->cmd_sgl_start = bam_txn->cmd_sgl_pos;
+   dir_eng = DMA_MEM_TO_DEV;
+   desc->dir = DMA_TO_DEVICE;
+   } else if (chan == nandc->tx_chan) {
+   sgl = &bam_txn->data_sg[bam_txn->tx_sgl_start];
+   sgl_cnt = bam_txn->tx_sgl_pos - bam_txn->tx_sgl_start;
+   bam_txn->tx_sgl_start = bam_txn->tx_sgl_pos;
+   dir_eng = DMA_MEM_TO_DEV;
+   desc->dir = DMA_TO_DEVICE;
+   } else {
+   sgl = &bam_txn->data_sg[bam_txn->rx_sgl_start];
+   sgl_cnt = bam_txn->rx_sgl_pos - bam_txn->rx_sgl_start;
+   bam_txn->rx_sgl_start = bam_txn->rx_sgl_pos;
+   desc->dir = DMA_FROM_DEVICE;
+   dir_eng = DMA_DEV_TO_MEM;
+   }
+
+   sg_mark_end(sgl + sgl_cnt - 1);
+   dma_map_sg(nandc->dev, sgl, sgl_cnt, desc->dir);


Is it safe to assume here that dma_map_sg won't return an error?


+
+   desc->sgl_cnt = sgl_cnt;
+   desc->bam_sgl = sgl;
+
+   dma_desc = dmaengine_prep_slave_sg(chan, sgl, sgl_cnt, dir_eng,
+  flags);
+
+   if (!dma_desc) {
+   dev_err(nandc->dev, "failure in prep desc\n");
+   kfree(desc);
+   return -EINVAL;
+   }
+
+   desc->dma_desc = dma_desc;
+
+   list_add_tail(&desc->node, &nandc->desc_list);
+
+   return 0;
+}
+




  static int prep_dma_desc(struct qcom_nand_controller *nandc, bool read,
 int reg_off, const void *vaddr, int size,
 bool flow_cont

[PATCH 2/6] Staging: rtl8712 : os_intfs.c: use octal permission representation

2017-07-03 Thread Jaya Durga

Fix checkpatch.pl Warning: Symbolic permissions 'S_IRUGO | S_IWUSR' are not 
preferred.
Consider using octal permissions '0644'.

Signed-off-by: Jaya Durga 
---
 drivers/staging/rtl8712/os_intfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8712/os_intfs.c 
b/drivers/staging/rtl8712/os_intfs.c
index 8836b31..e698f6e 100644
--- a/drivers/staging/rtl8712/os_intfs.c
+++ b/drivers/staging/rtl8712/os_intfs.c
@@ -93,7 +93,7 @@
  */
 static int wifi_test;
 
-module_param_string(ifname, ifname, sizeof(ifname), S_IRUGO | S_IWUSR);
+module_param_string(ifname, ifname, sizeof(ifname), 0644);
 module_param(wifi_test, int, 0644);
 module_param(initmac, charp, 0644);
 module_param(video_mode, int, 0644);
-- 
1.9.1

[PATCH] drm/i915: Fix an error hanfling path in 'i915_gem_do_execbuffer'

2017-07-03 Thread Christophe JAILLET

if 'eb_create()' fails, we must release some resources as done in all other
error handling paths of this function.

Signed-off-by: Christophe JAILLET 
---
This patch is just a guess based on surrounding gotos and function names.
(i.e. 'get_unused_fd_flags()' and 'put_unused_fd()')
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9337446f1068..0746b352f820 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2180,8 +2180,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
}
 
-   if (eb_create(&eb))
-   return -ENOMEM;
+   err = eb_create(&eb);
+   if (err)
+   goto err_put_unused_fd;
 
/*
 * Take a local wakeref for preparing to dispatch the execbuf as
@@ -2340,6 +2341,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_rpm:
intel_runtime_pm_put(eb.i915);
eb_destroy(&eb);
+err_put_unused_fd:
if (out_fence_fd != -1)
put_unused_fd(out_fence_fd);
 err_in_fence:
-- 
2.11.0

[PATCH] mux: remove the Kconfig question for the subsystem

2017-07-03 Thread Peter Rosin

The MULTIPLEXER question in the Kconfig might be confusing and is
of dubious value. Remove it. This makes consumers responsible for
selecting MULTIPLEXER, which they already do.

Signed-off-by: Peter Rosin 
---
 drivers/mux/Kconfig | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

Hi Linus!

My thinking was that I wanted it to be possible to select
mux drivers before any mux consumer was selected. I also
wanted to avoid one question for each of the mux drivers
when the whole thing is not needed most of the time...

But ok, is something like this what you wanted?

Cheers,
peda

diff --git a/drivers/mux/Kconfig b/drivers/mux/Kconfig
index 7c754a0..96d364e 100644
--- a/drivers/mux/Kconfig
+++ b/drivers/mux/Kconfig
@@ -2,18 +2,7 @@
 # Multiplexer devices
 #
 
-menuconfig MULTIPLEXER
-   tristate "Multiplexer subsystem"
-   help
- Multiplexer controller subsystem. Multiplexers are used in a
- variety of settings, and this subsystem abstracts their use
- so that the rest of the kernel sees a common interface. When
- multiple parallel multiplexers are controlled by one single
- multiplexer controller, this subsystem also coordinates the
- multiplexer accesses.
-
- To compile the subsystem as a module, choose M here: the module will
- be called mux-core.
+menu "Multiplexer support"
 
 if MULTIPLEXER
 
@@ -57,3 +46,5 @@ config MUX_MMIO
  be called mux-mmio.
 
 endif
+
+endmenu
-- 
2.1.4

Re: [PATCH v2] ext4: have ext4_xattr_set_handle() allocate journal credits

2017-07-03 Thread Theodore Ts'o

On Fri, Jun 30, 2017 at 12:36:51PM -0700, Tahsin Erdogan wrote:
> > One problem with this approach is that restarting the transaction handle 
> > will
> > make the xattr update non-atomic, which could be a real problem for some
> > workloads.  For example, ACLs or SELinux or fscrypt xattrs being added in
> > a separate transaction from file creation, or being modified (delete in a
> > separate transaction from add) and then lost completely if the system 
> > crashes
> > before the second transaction is committed.
> 
> Agreed.

I really don't like this patch for this reason.

In fact, it doesn't work because in your example code path:

> An example code path is this:
> 
> ext4_mkdir()
>   ext4_new_inode_start_handle()
> __ext4_new_inode()   <<== transaction handle is started here
>   ext4_init_acl()
> __ext4_set_acl()
>   ext4_xattr_set_handle()
> 
> In this case, __ext4_new_inode() needs to figure out all journal
> credits needed including the ones for ext4_xattr_set_handle(). This is
> a few levels deep so reaching out to ext4_xattr_set_credits() with the
> right parameters is where the complexity lies.

If we need to restart a transaction in ext4_init_acl(), we will end up
breaking up a transaction into two pieces.  Which means if we crash,
we could very easily end up with a corrupt file system because the
inode might be allocated, but not yet linked into the directory
hierarchy.

Worse, it doesn't really solve the problem because
ext4_xattr_ensure_credits() merely makes sure there are enough credits
for the xattr operation.  If setting the xattr ACL chews up credits
needed to insert the name of the newly created file into the
directory, you're still going to end up running into problems.

The way we've historically handled this is to simplify things by
making worse-case estimates when the transaction handled started.  So
for example, we assume the worst case that we'll split an directory
hash tree, even though we might not know whether or not this will be
necessary.  That's because if we over-estimate the number of credits
needed for a handle, it's really not a disaster.  Most handles are
active for a very short time, and when we close the handle, we can
give back any unused credits.

I understand that for ext4_new_inode it can be quite tricky, since in
theory we might need to add an SE Linux label, plus an ACL, plus an
encryption context.

The one good news is that is that with the xattr inode deduplication
feature, ext4_init_acl as called from ext4_new_inode should always
require just bumping a refcount, since the ACL will be inherited from
the directory's default ACL.

The bad news is that in general, we don't know what
security_inode_init_security() will do.  In theory, it could try to
set an arbitrarily large lael, although in practice we know the SE
Linux label tends not to be too terribly large.

Are you aware of other cases where we're likely to run into problems
besides ext4_new_inode()?

- Ted

linux-next: Tree for Jul 4

2017-07-03 Thread Stephen Rothwell

Hi all,

Please do not add any v4.14 material to you linux-next included branches
until after v4.13-rc1 has been released.

Changes since 20170703:

The sound-asoc tree lost its build failure.

The spi tree lost its build failure.

Non-merge commits (relative to Linus' tree): 10374
 9398 files changed, 767022 insertions(+), 194071 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc and an allmodconfig (with
CONFIG_BUILD_DOCSRC=n) for x86_64, a multi_v7_defconfig for arm and a
native build of tools/perf. After the final fixups (if any), I do an
x86_64 modules_install followed by builds for x86_64 allnoconfig,
powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig, allyesconfig
and pseries_le_defconfig and i386, sparc and sparc64 defconfig. And
finally, a simple boot test of the powerpc pseries_le_defconfig kernel
in qemu.

Below is a summary of the state of the merge.

I am currently merging 266 trees (counting Linus' and 41 trees of bug
fix patches pending for the current merge release).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwell

$ git checkout master
$ git reset --hard stable
Merging origin/master (c6b1e36c8fa0 Merge branch 'for-4.13/block' of 
git://git.kernel.dk/linux-block)
Merging fixes/master (97da3854c526 Linux 4.11-rc3)
Merging kbuild-current/fixes (ad8181060788 kconfig: fix sparse warnings in 
nconfig)
Merging arc-current/for-curr (c0bc126f97fb Linux 4.12-rc7)
Merging arm-current/fixes (9e25ebfe56ec ARM: 8685/1: ensure memblock-limit is 
pmd-aligned)
Merging m68k-current/for-linus (204a2be30a7a m68k: Remove ptrace_signal_deliver)
Merging metag-fixes/fixes (b884a190afce metag/usercopy: Add missing fixups)
Merging powerpc-fixes/fixes (d6bd8194e286 powerpc/32: Avoid miscompilation 
w/GCC 4.6.3 - don't inline copy_to/from_user())
Merging sparc/master (dbd2667a4fb9 sparc64: Fix gup_huge_pmd)
Merging fscrypt-current/for-stable (42d97eb0ade3 fscrypt: fix renaming and 
linking special files)
Merging net/master (ea23b42739a2 Merge tag 'mlx5-fixes-2017-06-28' of 
git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux)
Merging ipsec/master (ca3a1b856636 esp6_offload: Fix IP6CB(skb)->nhoff for ESP 
GRO)
Merging netfilter/master (91af6ba7ff16 netfilter: ebt_nflog: fix unexpected 
truncated packet)
Merging ipvs/master (3c5ab3f395d6 ipvs: SNAT packet replies only for NATed 
connections)
Merging wireless-drivers/master (35abcd4f9f30 brcmfmac: fix uninitialized 
warning in brcmf_usb_probe_phase2())
Merging mac80211/master (4b153ca989a9 Merge tag 'mac80211-for-davem-2017-06-16' 
of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211)
Merging sound-current/for-linus (a178232ddab5 ALSA: hda/realtek - Remove 
GPIO_MASK)
Merging pci-current/for-linus (98dbf5af4fdd PCI: endpoint: Select CRC32 to fix 
test build error)
Merging driver-core.current/driver-core-linus (08332893e37a Linux 4.12-rc2)
Merging tty.current/tty-linus (3c2993b8c614 Linux 4.12-rc4)
Merging usb.current/usb-linus (dec08194ffec xhci: Limit USB2 port wake support 
for AMD Promontory hosts)
Merging usb-gadget-fixes/fixes (f50b878fed33 USB: gadget: fix GPF in gadgetfs)
Merging usb-serial-fixes/usb-linus (996fab55d864 USB: serial: qcserial: new 
Sierra Wireless EM7305 device ID)
Merging usb-chipidea-fixes/ci-for-usb-stable (cbb22ebcfb99 usb: chipidea: core: 
check before accessing ci_role in ci_role_show)
Merging phy/fixes (9605bc46433d phy: qualcomm: phy-qcom-qmp: fix application of 
sizeof to pointer)
Merging staging.current/staging-linus (41f1830f5a7a Linux 4.12-rc6)
Merging char-misc.current/char-misc-linus (32c1431eea48 Linux 4.12-rc5)
Merging input-current/for-linus (9768935264c4 Input: synaptics-rmi4 - only read 
the F54 query registers which are used)
Merging crypto-current/master (019d62db5401 crypto: caam - fix gfp allocation 
flags (part II))
Merging ide/master (acfead32f3f9 ide: don't c

Re: [PATCH v2 09/19] media: camms: Add core files

2017-07-03 Thread Sakari Ailus

Hi Todor,

On Mon, Jul 03, 2017 at 05:03:40PM +0300, Todor Tomov wrote:
> >> +  unsigned int i;
> >> +
> >> +  v4l2_of_parse_endpoint(node, &vep);
> >> +
> >> +  csd->interface.csiphy_id = vep.base.port;
> >> +
> >> +  mipi_csi2 = &vep.bus.mipi_csi2;
> >> +  lncfg->clk.pos = mipi_csi2->clock_lane;
> >> +  lncfg->clk.pol = mipi_csi2->lane_polarities[0];
> >> +  lncfg->num_data = mipi_csi2->num_data_lanes;
> >> +
> >> +  lncfg->data = devm_kzalloc(dev, lncfg->num_data * sizeof(*lncfg->data),
> >> + GFP_KERNEL);
> >> +  if (!lncfg->data)
> >> +  return -ENOMEM;
> >> +
> >> +  for (i = 0; i < lncfg->num_data; i++) {
> >> +  lncfg->data[i].pos = mipi_csi2->data_lanes[i];
> >> +  lncfg->data[i].pol = mipi_csi2->lane_polarities[i + 1];
> >> +  }
> >> +
> >> +  of_property_read_u32(node, "qcom,settle-cnt", settle_cnt);
> > 
> > Isn't this something that depends on the CSI-2 bus speed, for instance?
> > Could you calculate it instead of putting it to DT?
> 
> Actually, after some digging into this, yes, I can calculate it. I can
> calculate the CSI-2 bus speed based on the sensor's output pixel clock
> and then calculate the settle time and this settle count value.
> So I already have the code to get the sensor's pixel clock using the
> standard v4l2 control V4L2_CID_PIXEL_RATE.

What we have currently in documentation on this is here:

https://www.linuxtv.org/downloads/v4l-dvb-apis/kapi/csi2.html>

I.e. both should be implemented. The link frequency is rather more relevant
for CSI-2 albeit you can derive one from the other in case of CSI-2. The
pixel rate documentation should probably be rather elsewhere.

> Now the question is what to do if the sensor driver doesn't support this
> control? Just return an error and refuse to work with this "limited"
> sensor driver?

If the sensor driver does not provide enough information to work with a
receiver, it's only fair not to proceed with streaming. That said, it might
be possible to manage with some sensible defaults in some cases but then again
you could have only some units working with this configuration. It'd be
much safer to require the information: not doing so hides the error and
makes it (more) difficult to debug.

...

> >> +struct camss {
> >> +  struct v4l2_device v4l2_dev;
> >> +  struct v4l2_async_notifier notifier;
> >> +  struct media_device media_dev;
> >> +  struct device *dev;
> >> +  struct csiphy_device csiphy[CAMSS_CSIPHY_NUM];
> >> +  struct csid_device csid[CAMSS_CSID_NUM];
> >> +  struct ispif_device ispif;
> >> +  struct vfe_device vfe;
> >> +  atomic_t ref_count;

If this is refcount, then you should use refcount_t instead.

-- 
Regards,

Sakari Ailus
e-mail: sakari.ai...@iki.fi XMPP: sai...@retiisi.org.uk

Re: [Xen-devel] [PATCH] xen/balloon: don't online new memory initially

2017-07-03 Thread Juergen Gross

On 03/07/17 20:44, Igor Druzhinin wrote:
> On 03/07/17 16:40, Juergen Gross wrote:
>> When setting up the Xenstore watch for the memory target size the new
>> watch will fire at once. Don't try to reach the configured target size
>> by onlining new memory in this case, as the current memory size will
>> be smaller in almost all cases due to e.g. BIOS reserved pages.
>>
>> Onlining new memory will lead to more problems e.g. undesired conflicts
>> with NVMe devices meant to be operated as block devices.
>>
>> Instead remember the difference between target size and current size
>> when the watch fires for the first time and apply it to any further
>> size changes, too.
>>
>> In order to avoid races between balloon.c and xen-balloon.c init calls
>> do the xen-balloon.c initialization from balloon.c.
>>
>> Signed-off-by: Juergen Gross 
>> ---
>>  drivers/xen/balloon.c |  3 +++
>>  drivers/xen/xen-balloon.c | 20 
>>  include/xen/balloon.h |  8 
>>  3 files changed, 23 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
>> index 50dcb68d8070..ab609255a0f3 100644
>> --- a/drivers/xen/balloon.c
>> +++ b/drivers/xen/balloon.c
>> @@ -780,6 +780,9 @@ static int __init balloon_init(void)
>>  }
>>  #endif
>>  
>> +/* Init the xen-balloon driver. */
>> +xen_balloon_init();
>> +
>>  return 0;
>>  }
>>  subsys_initcall(balloon_init);
>> diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
>> index e7715cb62eef..66ec519c825c 100644
>> --- a/drivers/xen/xen-balloon.c
>> +++ b/drivers/xen/xen-balloon.c
>> @@ -59,6 +59,8 @@ static void watch_target(struct xenbus_watch *watch,
>>  {
>>  unsigned long long new_target;
>>  int err;
>> +static bool watch_fired;
>> +static unsigned long target_diff;
>>  
>>  err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
>>  if (err != 1) {
>> @@ -69,7 +71,14 @@ static void watch_target(struct xenbus_watch *watch,
>>  /* The given memory/target value is in KiB, so it needs converting to
>>   * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
>>   */
>> -balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
>> +new_target >>= PAGE_SHIFT - 10;
>> +if (watch_fired) {
>> +balloon_set_new_target(new_target - target_diff);
>> +return;
>> +}
>> +
>> +watch_fired = true;
>> +target_diff = new_target - balloon_stats.target_pages;
>>  }
>>  static struct xenbus_watch target_watch = {
>>  .node = "memory/target",
>> @@ -94,13 +103,8 @@ static struct notifier_block xenstore_notifier = {
>>  .notifier_call = balloon_init_watcher,
>>  };
>>  
>> -static int __init balloon_init(void)
>> +void __init xen_balloon_init(void)
>>  {
>> -if (!xen_domain())
>> -return -ENODEV;
>> -
>> -pr_info("Initialising balloon driver\n");
>> -
>>  register_balloon(&balloon_dev);
>>  
>>  register_xen_selfballooning(&balloon_dev);
>> @@ -109,7 +113,7 @@ static int __init balloon_init(void)
>>  
>>  return 0;
>>  }
>> -subsys_initcall(balloon_init);
>> +EXPORT_SYMBOL_GPL(xen_balloon_init);
>>  
>>  #define BALLOON_SHOW(name, format, args...) \
>>  static ssize_t show_##name(struct device *dev,  \
>> diff --git a/include/xen/balloon.h b/include/xen/balloon.h
>> index d1767dfb0d95..8906361bb50c 100644
>> --- a/include/xen/balloon.h
>> +++ b/include/xen/balloon.h
>> @@ -35,3 +35,11 @@ static inline int register_xen_selfballooning(struct 
>> device *dev)
>>  return -ENOSYS;
>>  }
>>  #endif
>> +
>> +#ifdef CONFIG_XEN_BALLOON
>> +void xen_balloon_init(void);
>> +#else
>> +static inline void xen_balloon_init(void)
>> +{
>> +}
>> +#endif
>>
> 
> We came across the same issue just recently. The problem was that for
> some kernel versions DMA buffers for emulated devices are allocated in
> this recently hotplugged area. This area is not properly described for
> QEMU so when a DMA request comes in QEMU treats it as "unassigned" and
> skips by default. This eventually leads to cryptic failures of system
> loading.
> 
> Internally we developed a workaround for QEMU with which we try to
> satisfy all the "unassigned" requests. But it doesn't solves the problem
> in a proper way IMHO.
> 
> I haven't not completely understood your use-case but we might try come
> up with a general solution for both of the problems because they are
> obviously related.
> 
>> Onlining new memory will lead to more problems e.g. undesired conflicts
>> with NVMe devices meant to be operated as block devices.
> 
> Could you explain this in more detail?

Please see

https://lists.xen.org/archives/html/xen-devel/2017-03/msg03020.html

for a more detailed discussion.


Juergen

Re: 'skb' buffer address information leakage

2017-07-03 Thread Jakub Kicinski

On Tue, 4 Jul 2017 13:12:18 +0800, Dison River wrote:
> drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c:167
>  seq_printf(file, " frag=%p", skb);

FWIW that's actually not a skb pointer.  The structure is defined like
this:

struct nfp_net_tx_buf {
union { 
struct sk_buff *skb;
void *frag;
};
dma_addr_t dma_addr;
short int fidx;
u16 pkt_cnt;
u32 real_len;
};

So the line in question is actually reading the frag pointer, I just
reused the skb variable, because this has to be read via READ_ONCE()
and NULL-checked so I thought that doing it separately for skb and
frag is a waste of LOC especially in debug code.  I will queue up a
clean up for after the merge window.

Thanks!

Re: [RFC][PATCHv3 2/5] printk: introduce printing kernel thread

2017-07-03 Thread Sergey Senozhatsky

On (07/03/17 15:34), Steven Rostedt wrote:
> > +#define PRINTK_FLOOD_DEFAULT_DELAY 10
> > +
> >  int printk_delay_msec __read_mostly;
> >  
> > +static inline void __printk_delay(int m)
> > +{
> > +   while (m--) {
> > +   mdelay(1);
> > +   touch_nmi_watchdog();
> > +   }
> > +}
> > +
> >  static inline void printk_delay(void)
> >  {
> > -   if (unlikely(printk_delay_msec)) {
> > -   int m = printk_delay_msec;
> > +   unsigned long flags;
> > +   u64 console_seen = 0, console_to_see;
> >  
> > -   while (m--) {
> > -   mdelay(1);
> > -   touch_nmi_watchdog();
> > -   }
> > +   if (printk_delay_msec) {
> > +   __printk_delay(printk_delay_msec);
> > +   return;
> > +   }
> > +
> 
> This had better be an option, and not default.

yes.

> And what happens if the printk caller happens to preempt the one
> doing the writes to consoles?

in short - we just burn CPU cycles. that case is broken.

that's mostly the reason behind PRINTK_FLOOD_DEFAULT_DELAY being quite
small.

one can simply do

console_lock();
printk();
printk();

printk();
console_unlock();

and trigger a useless throttling. a needed one in general case,
but useless in the given circumstances.

not sure if we can properly throttle printk in all of the cases.
we know that console_sem is locked, but we don't know what for.
is CPU that owns the console_sem is now in console_unlock() or
somewhere in fbcon, or anywhere else. we probably need not to
throttle printk() if we know that console_sem is already locked
by this_cpu and we simply call printk either from IRQ that
preempted console_unlock() on this_cpu or recursive printk from
console_unlock()... and so on.

-ss

Re: [PATCH] media: vb2 dma-sg: Constify dma_buf_ops structures.

2017-07-03 Thread Marek Szyprowski


Hi Arvind,

On 2017-07-01 14:18, Arvind Yadav wrote:

dma_buf_ops are not supposed to change at runtime. All functions
working with dma_buf_ops provided by  work with
const dma_buf_ops. So mark the non-const structs as const.

File size before:
text   data bss dec hex filename
5238112   4535414ea 
drivers/media/v4l2-core/videobuf2-dma-sg.o

File size After adding 'const':
text   data bss dec hex filename
5358  0   4536214f2 
drivers/media/v4l2-core/videobuf2-dma-sg.o

Signed-off-by: Arvind Yadav 


Acked-by: Marek Szyprowski 


---
  drivers/media/v4l2-core/videobuf2-dma-sg.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c 
b/drivers/media/v4l2-core/videobuf2-dma-sg.c
index 8e8798a..f8b4643 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
@@ -500,7 +500,7 @@ static int vb2_dma_sg_dmabuf_ops_mmap(struct dma_buf *dbuf,
return vb2_dma_sg_mmap(dbuf->priv, vma);
  }
  
-static struct dma_buf_ops vb2_dma_sg_dmabuf_ops = {

+static const struct dma_buf_ops vb2_dma_sg_dmabuf_ops = {
.attach = vb2_dma_sg_dmabuf_ops_attach,
.detach = vb2_dma_sg_dmabuf_ops_detach,
.map_dma_buf = vb2_dma_sg_dmabuf_ops_map,


Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland

Re: "mm: use early_pfn_to_nid in page_ext_init" broken on some configurations?

2017-07-03 Thread Joonsoo Kim

On Mon, Jul 03, 2017 at 04:18:01PM +0200, Vlastimil Babka wrote:
> On 07/03/2017 01:48 PM, Vlastimil Babka wrote:
> > On 06/30/2017 04:18 PM, Michal Hocko wrote:
> >> fe53ca54270a ("mm: use early_pfn_to_nid in page_ext_init") seem
> >> to silently depend on CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID resp.
> >> CONFIG_HAVE_MEMBLOCK_NODE_MAP. early_pfn_to_nid is returning zero with
> >> !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && 
> >> !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
> >> I am not sure how widely is this used but such a code is tricky. I see
> >> how catching early allocations during defered initialization might be
> >> useful but a subtly broken code sounds like a problem to me.  So is
> >> fe53ca54270a worth this or we should revert it?
> > 
> > There might be more issues with fe53ca54270a, I think. This I've
> > observed on our 4.4-based kernel, which has deferred page struct init,
> > but doesn't have b8f1a75d61d8 ("mm: call page_ext_init() after all
> > struct pages are initialized") nor aforementioned fe53ca54270a:
> > 
> > [0.00] allocated 421003264 bytes of page_ext
> > [0.00] Node 0, zone  DMA: page owner found early allocated 0 
> > pages
> > [0.00] Node 0, zoneDMA32: page owner found early allocated 33 
> > pages
> > [0.00] Node 0, zone   Normal: page owner found early allocated 
> > 2842622 pages
> > [0.00] BUG: unable to handle kernel NULL pointer dereference at 
> >   (null)
> > [0.00] IP: [] init_page_owner+0x12a/0x240
> > [0.00] PGD 0 
> > [0.00] Oops:  [#1] SMP 
> > [0.00] Modules linked in:
> > [0.00] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.4.74+ #7
> > [0.00] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> > 1.0.0-prebuilt.qemu-project.org 04/01/2014
> > [0.00] task: 81e104c0 ti: 81e0 task.ti: 
> > 81e0
> > [0.00] RIP: 0010:[]  [] 
> > init_page_owner+0x12a/0x240
> > [0.00] RSP: :81e03ed0  EFLAGS: 00010046
> > [0.00] RAX:  RBX: 88083ffe0210 RCX: 
> > ea001300
> > [0.00] RDX: 0300 RSI: 81f57437 RDI: 
> > 004c
> > [0.00] RBP: 81e03f20 R08: 81e03e90 R09: 
> > 
> > [0.00] R10: 004c0200 R11:  R12: 
> > ea00
> > [0.00] R13: 004c0200 R14: 004c R15: 
> > 0084
> > [0.00] FS:  () GS:88042fc0() 
> > knlGS:
> > [0.00] CS:  0010 DS:  ES:  CR0: 80050033
> > [0.00] CR2:  CR3: 01e0b000 CR4: 
> > 000406b0
> > [0.00] Stack:
> > [0.00]  0206 88083ffe0f90 88083ffdf000 
> > 3181
> > [0.00]  ea001300 0040 ea00 
> > 0084
> > [0.00]  0084 8e10 81e03f50 
> > 81f84145
> > [0.00] Call Trace:
> > [0.00]  [] page_ext_init+0x15e/0x167
> > [0.00]  [] start_kernel+0x351/0x418
> > [0.00]  [] ? early_idt_handler_array+0x120/0x120
> > [0.00]  [] x86_64_start_reservations+0x2a/0x2c
> > [0.00]  [] x86_64_start_kernel+0x12c/0x13b
> > [0.00] Code: 81 e2 00 fe ff ff 4d 39 fa 4d 0f 47 d7 4d 39 f2 4d 89 
> > d5 77 34 eb 5e 48 8b 01 f6 c4 04 75 21 48 89 cf 48 89 4d d0 e8 b6 35 00 00 
> > <48> 8b 00 a8 04 75 0e 48 8b 4d d0 e9 c2 00 00 00 48 83 45 c8 01 
> > [0.00] RIP  [] init_page_owner+0x12a/0x240
> > [0.00]  RSP 
> > [0.00] CR2: 
> > [0.00] ---[ end trace 19e05592f03a690f ]---
> > 
> > Note that this is different backtrace than in b8f1a75d61d8 log.
> > 
> > Still, backporting b8f1a75d61d8 fixes this:
> > 
> > [1.538379] allocated 738197504 bytes of page_ext
> > [1.539340] Node 0, zone  DMA: page owner found early allocated 0 
> > pages
> > [1.540179] Node 0, zoneDMA32: page owner found early allocated 33 
> > pages
> > [1.611173] Node 0, zone   Normal: page owner found early allocated 
> > 96755 pages
> > [1.683167] Node 1, zone   Normal: page owner found early allocated 
> > 96575 pages
> > 
> > No panic, notice how it allocated more for page_ext, and found smaller 
> > number of
> > early allocated pages.
> > 
> > Now backporting fe53ca54270a on top:
> > 
> > [0.00] allocated 738197504 bytes of page_ext
> > [0.00] Node 0, zone  DMA: page owner found early allocated 0 
> > pages
> > [0.00] Node 0, zoneDMA32: page owner found early allocated 33 
> > pages
> > [0.00] Node 0, zone   Normal: page owner found early allocated 
> > 2842622 pages
> > [0.00] Node 1, zone   Normal: page owner found early allocated 
> > 3694362 pages
> > 
> > Again no panic, and same amount of page_ext usage. But the "early 
>

Re: [PATCH] media: vb2 vmalloc: Constify dma_buf_ops structures.

2017-07-03 Thread Marek Szyprowski


Hi Arvind,

On 2017-07-01 13:37, Arvind Yadav wrote:

dma_buf_ops are not supposed to change at runtime. All functions
working with dma_buf_ops provided by  work with
const dma_buf_ops. So mark the non-const structs as const.

File size before:
text   data bss dec hex filename
3171192   03363 d23 
drivers/media/v4l2-core/videobuf2-vmalloc.o

File size After adding 'const':
text   data bss dec hex filename
3291 80   03371 d2b 
drivers/media/v4l2-core/videobuf2-vmalloc.o

Signed-off-by: Arvind Yadav 


Acked-by: Marek Szyprowski 


---
  drivers/media/v4l2-core/videobuf2-vmalloc.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/v4l2-core/videobuf2-vmalloc.c 
b/drivers/media/v4l2-core/videobuf2-vmalloc.c
index b337d78..6bc130f 100644
--- a/drivers/media/v4l2-core/videobuf2-vmalloc.c
+++ b/drivers/media/v4l2-core/videobuf2-vmalloc.c
@@ -338,7 +338,7 @@ static int vb2_vmalloc_dmabuf_ops_mmap(struct dma_buf *dbuf,
return vb2_vmalloc_mmap(dbuf->priv, vma);
  }
  
-static struct dma_buf_ops vb2_vmalloc_dmabuf_ops = {

+static const struct dma_buf_ops vb2_vmalloc_dmabuf_ops = {
.attach = vb2_vmalloc_dmabuf_ops_attach,
.detach = vb2_vmalloc_dmabuf_ops_detach,
.map_dma_buf = vb2_vmalloc_dmabuf_ops_map,


Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland

Re: [PATCH] media: vb2 dma-contig: Constify dma_buf_ops structures.

2017-07-03 Thread Marek Szyprowski


Hi Arvind,

On 2017-07-01 13:27, Arvind Yadav wrote:

dma_buf_ops are not supposed to change at runtime. All functions
working with dma_buf_ops provided by  work with
const dma_buf_ops. So mark the non-const structs as const.

File size before:
text   data bss dec hex filename
6035272   0630718a3 
drivers/media/v4l2-core/videobuf2-dma-contig.o

File size After adding 'const':
text   data bss dec hex filename
6155160   0631518ab 
drivers/media/v4l2-core/videobuf2-dma-contig.o

Signed-off-by: Arvind Yadav 


Thanks!
Acked-by: Marek Szyprowski 


---
  drivers/media/v4l2-core/videobuf2-dma-contig.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c 
b/drivers/media/v4l2-core/videobuf2-dma-contig.c
index 4f246d1..5b90a66 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c
@@ -352,7 +352,7 @@ static int vb2_dc_dmabuf_ops_mmap(struct dma_buf *dbuf,
return vb2_dc_mmap(dbuf->priv, vma);
  }
  
-static struct dma_buf_ops vb2_dc_dmabuf_ops = {

+static const struct dma_buf_ops vb2_dc_dmabuf_ops = {
.attach = vb2_dc_dmabuf_ops_attach,
.detach = vb2_dc_dmabuf_ops_detach,
.map_dma_buf = vb2_dc_dmabuf_ops_map,


Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland

Re: "mm: use early_pfn_to_nid in page_ext_init" broken on some configurations?

2017-07-03 Thread Joonsoo Kim

On Mon, Jul 03, 2017 at 01:48:05PM +0200, Vlastimil Babka wrote:
> On 06/30/2017 04:18 PM, Michal Hocko wrote:
> > fe53ca54270a ("mm: use early_pfn_to_nid in page_ext_init") seem
> > to silently depend on CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID resp.
> > CONFIG_HAVE_MEMBLOCK_NODE_MAP. early_pfn_to_nid is returning zero with
> > !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && 
> > !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
> > I am not sure how widely is this used but such a code is tricky. I see
> > how catching early allocations during defered initialization might be
> > useful but a subtly broken code sounds like a problem to me.  So is
> > fe53ca54270a worth this or we should revert it?
> 
> There might be more issues with fe53ca54270a, I think. This I've
> observed on our 4.4-based kernel, which has deferred page struct init,
> but doesn't have b8f1a75d61d8 ("mm: call page_ext_init() after all
> struct pages are initialized") nor aforementioned fe53ca54270a:
> 
> [0.00] allocated 421003264 bytes of page_ext
> [0.00] Node 0, zone  DMA: page owner found early allocated 0 pages
> [0.00] Node 0, zoneDMA32: page owner found early allocated 33 
> pages
> [0.00] Node 0, zone   Normal: page owner found early allocated 
> 2842622 pages
> [0.00] BUG: unable to handle kernel NULL pointer dereference at   
> (null)
> [0.00] IP: [] init_page_owner+0x12a/0x240
> [0.00] PGD 0 
> [0.00] Oops:  [#1] SMP 
> [0.00] Modules linked in:
> [0.00] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.4.74+ #7
> [0.00] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> 1.0.0-prebuilt.qemu-project.org 04/01/2014
> [0.00] task: 81e104c0 ti: 81e0 task.ti: 
> 81e0
> [0.00] RIP: 0010:[]  [] 
> init_page_owner+0x12a/0x240
> [0.00] RSP: :81e03ed0  EFLAGS: 00010046
> [0.00] RAX:  RBX: 88083ffe0210 RCX: 
> ea001300
> [0.00] RDX: 0300 RSI: 81f57437 RDI: 
> 004c
> [0.00] RBP: 81e03f20 R08: 81e03e90 R09: 
> 
> [0.00] R10: 004c0200 R11:  R12: 
> ea00
> [0.00] R13: 004c0200 R14: 004c R15: 
> 0084
> [0.00] FS:  () GS:88042fc0() 
> knlGS:
> [0.00] CS:  0010 DS:  ES:  CR0: 80050033
> [0.00] CR2:  CR3: 01e0b000 CR4: 
> 000406b0
> [0.00] Stack:
> [0.00]  0206 88083ffe0f90 88083ffdf000 
> 3181
> [0.00]  ea001300 0040 ea00 
> 0084
> [0.00]  0084 8e10 81e03f50 
> 81f84145
> [0.00] Call Trace:
> [0.00]  [] page_ext_init+0x15e/0x167
> [0.00]  [] start_kernel+0x351/0x418
> [0.00]  [] ? early_idt_handler_array+0x120/0x120
> [0.00]  [] x86_64_start_reservations+0x2a/0x2c
> [0.00]  [] x86_64_start_kernel+0x12c/0x13b
> [0.00] Code: 81 e2 00 fe ff ff 4d 39 fa 4d 0f 47 d7 4d 39 f2 4d 89 d5 
> 77 34 eb 5e 48 8b 01 f6 c4 04 75 21 48 89 cf 48 89 4d d0 e8 b6 35 00 00 <48> 
> 8b 00 a8 04 75 0e 48 8b 4d d0 e9 c2 00 00 00 48 83 45 c8 01 
> [0.00] RIP  [] init_page_owner+0x12a/0x240
> [0.00]  RSP 
> [0.00] CR2: 
> [0.00] ---[ end trace 19e05592f03a690f ]---
> 
> Note that this is different backtrace than in b8f1a75d61d8 log.
> 
> Still, backporting b8f1a75d61d8 fixes this:
> 
> [1.538379] allocated 738197504 bytes of page_ext
> [1.539340] Node 0, zone  DMA: page owner found early allocated 0 pages
> [1.540179] Node 0, zoneDMA32: page owner found early allocated 33 
> pages
> [1.611173] Node 0, zone   Normal: page owner found early allocated 96755 
> pages
> [1.683167] Node 1, zone   Normal: page owner found early allocated 96575 
> pages
> 
> No panic, notice how it allocated more for page_ext, and found smaller number 
> of
> early allocated pages.
> 
> Now backporting fe53ca54270a on top:
> 
> [0.00] allocated 738197504 bytes of page_ext
> [0.00] Node 0, zone  DMA: page owner found early allocated 0 pages
> [0.00] Node 0, zoneDMA32: page owner found early allocated 33 
> pages
> [0.00] Node 0, zone   Normal: page owner found early allocated 
> 2842622 pages
> [0.00] Node 1, zone   Normal: page owner found early allocated 
> 3694362 pages
> 
> Again no panic, and same amount of page_ext usage. But the "early allocated" 
> numbers
> seem bogus to me. I think it's because init_pages_in_zone() is running and 
> inspecting
> struct pages that have not been yet initialized. It doesn't end up crashing, 
> but
> still doesn't seem correct?

Numbers looks sane to me. fe53

Re: "mm: use early_pfn_to_nid in page_ext_init" broken on some configurations?

2017-07-03 Thread Joonsoo Kim

On Fri, Jun 30, 2017 at 05:44:16PM +0200, Michal Hocko wrote:
> On Fri 30-06-17 17:42:24, Michal Hocko wrote:
> [...]
> > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > index 16532fa0bb64..894697c1e6f5 100644
> > --- a/include/linux/mmzone.h
> > +++ b/include/linux/mmzone.h
> > @@ -1055,6 +1055,7 @@ static inline struct zoneref 
> > *first_zones_zonelist(struct zonelist *zonelist,
> > !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
> >  static inline unsigned long early_pfn_to_nid(unsigned long pfn)
> >  {
> > +   BUILD_BUG_ON(!IS_ENABLED(CONFIG_NUMA));
> 
> Err, this should read BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA)) of course

Agreed.

However, AFAIK, ARM can set CONFIG_NUMA but it doesn't have
CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID and CONFIG_HAVE_MEMBLOCK_NODE_MAP.

If page_ext uses early_pfn_to_nid(), it will cause build error in ARM.

Therefore, I suggest following change.
CONFIG_DEFERRED_STRUCT_PAGE_INIT depends on proper early_pfn_to_nid().
So, following code will always work as long as
CONFIG_DEFERRED_STRUCT_PAGE_INIT works.

Thanks.

--->8---
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 88ccc044..e3db259 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -384,6 +384,7 @@ void __init page_ext_init(void)
 
for_each_node_state(nid, N_MEMORY) {
unsigned long start_pfn, end_pfn;
+   int page_nid;
 
start_pfn = node_start_pfn(nid);
end_pfn = node_end_pfn(nid);
@@ -405,8 +406,15 @@ void __init page_ext_init(void)
 *
 * Take into account DEFERRED_STRUCT_PAGE_INIT.
 */
-   if (early_pfn_to_nid(pfn) != nid)
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+   page_nid = early_pfn_to_nid(pfn);
+#else
+   page_nid = pfn_to_nid(pfn);
+#endif
+
+   if (page_nid != nid)
continue;
+
if (init_section_page_ext(pfn, nid))
goto oom;
}

'skb' buffer address information leakage

2017-07-03 Thread Dison River

Hi all:
I'd found several address leaks of "skb" buffer.When i have a
arbitrary address write vulnerability in kernel(enabled kASLR),I can
use skb's address find sk_destruct's address and overwrite it. And
then,invoke close(sock_fd) function can trigger the
shellcode(sk_destruct func).

In kernel 4.12-rc7
drivers/net/irda/vlsi_ir.c:326   seq_printf(seq, "skb=%p
data=%p hw=%p\n", rd->skb, rd->buf, rd->hw);
drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c:167
 seq_printf(file, " frag=%p", skb);
drivers/net/wireless/ath/wil6210/debugfs.c:926   seq_printf(s,
"  SKB = 0x%p\n", skb);

Thanks.

Re: [GIT PULL] Char/Misc driver patches for 4.13-rc1

2017-07-03 Thread Linus Torvalds

On Mon, Jul 3, 2017 at 8:00 AM, Greg KH  wrote:
>
> Peter Rosin (11):
>   mux: minimal mux subsystem

Hmm. The MULTIPLEXER question in the Kconfig strikes me as confusing
and of dubious value.

Can we please get rid of questions that people have basically an
impossible time answering, and instead have the drivers that want to
use CONFIG_MULTIPLEXER just add a "select" statement.

In fact it looks like we actually *have* the select statements, but
then we still ask the user about something they can't answer?

Most people will not have any kind of idea that whatever other driver
they are using wants to use the mux-core functionality..

  Linus

Re: [PATCH v4 3/3] arm64: kvm: inject SError with user space specified syndrome

2017-07-03 Thread gengdongjiu

Hi Christoffer,
  thanks for the review.

On 2017/7/3 16:39, Christoffer Dall wrote:
> Hi Dongjiu,
> 
> On Mon, Jun 26, 2017 at 08:46:39PM +0800, Dongjiu Geng wrote:
>> when SError happen, kvm notifies user space to record the CPER,
>> user space specifies and passes the contents of ESR_EL1 on taking
>> a virtual SError interrupt to KVM, KVM enables virtual system
>> error or asynchronous abort with this specifies syndrome. This
>> patch modify the world-switch to restore VSESR_EL2, VSESR_EL2
>> saves the virtual SError syndrome, it becomes the ESR_EL1 value when
>> HCR_EL2.VSE injects an SError. This register is added by the
>> RAS Extensions.
> 
> This commit message is confusing and doesn't help me understand the
> patch.
(1) what is the rationale for the guest OS SError interrupt(SEI) handling in 
the RAS solution?
  you can refer to document: "RAS_Extension_PRD03-PRDC-010953-32-0, 6.5.3 
Example software sequences"
  a). In the firmware-first RAS solution, when guest OS happen a SError 
interrupt (SEI), it will firstly trap to EL3(SCR_EL3.EA = 1);
  b). The firmware logs, triages, and delegates the error exception to the 
hypervisor. As the error came from guest OS  EL1, firmware
  does by faking an SError interrupt exception entry to EL2.
  c). Control transfers to the hypervisor's delegated error recovery 
agent.Because HCR_EL2.AMO is set to 1, the hypervisor can use a
  Virtual SError interrupt to delegate an asynchronous abort to EL1, by 
setting HCR_EL2.VSE to 1 and using VESR_EL2 to pass syndrome.

(2) what is this patch mainly do?
  As mentioned above, the hypervisor needs to enable virtual SError and pass 
the virtual syndrome to the guest OS.

  a). when Control transfers to the hypervisor from firmware by faking an 
SError interrupt, the hypervisor delivered the syndrome_info(esr_el2) and
  host VA address( Qemu translate this VA address to the virtual machine 
physical address(IPA)) using below new added "serror_intr" struct.
/* KVM_EXIT_SERROR_INTR */
struct {
__u32 syndrome_info;
__u64 address;
} serror_intr;

  b). Qemu gets the address(host VA) delivered by KVM, translate this host VA 
address to virtual machine physical address(IPA), and runtime record this 
virtual
 machine physical address(IPA) to the guest OS's APEI table.

  c). Qemu gets the syndrome_info delivered by KVM, it refers to this syndrome 
value(but can be different from it) to specify the virtual SError interrupt's 
syndrome through setting VESR_EL2.

the vsesr_el2 is armv8.2 register, its explanation can be found in 
"RAS_Extension_PRD03-PRDC-010953-33-0, 5.6.18 VSESR_EL2, Virtual SError 
Exception Syndrome Register"

>>The VSESR_EL2 characteristics are:
>>Purpose:
>>Provides the syndrome value reported to software on taking a virtual 
SError interrupt exception:
>>  — If the virtual SError interrupt is taken to EL1 using AArch64 
then VSESR_EL2 provides the
>>syndrome value reported in ESR_EL1.
>>  — If the virtual SError interrupt is taken to EL1 using AArch32 
then VSESR_EL2 provides the
>>syndrome values reported in DFSR.{AET, ExT} and the remainder 
of the DFSR is set as
>>   defined by VMSAv8-32.

 so in the KVM, I added a new IOCTL(#define KVM_ARM_SEI  _IO(KVMIO,  0xb8)) 
to pass the virtual SError syndrome value specified by Qemu and enable a 
virtual System Error.

 d). when world switch to guest OS, guest OS will happen virtual SError(this 
virtual SError can not be route to EL3 firmware), guest OS uses the specified 
syndrome value to do the recovery and
 parses the guest OS CPER which is dynamically recorded by the Qemu in the 
APEI table .

> 
> I think this patch is trying to do too many things.  I suggest you split
> the patch into (at least) one patch that captures exception information
> from the world-switch path, one patch that deals with the new exit
> reason, and finally a patch with the new ioctl.  That way you can write
> a commit message for each patch describing first what the patch does,
> and then why this is a good idea.
  Ok, thanks for the good suggestion.

> 
> Neverthess, I added some random comments below.
> 
>>
>> Changes since v3:
>> (1) Move restore VSESR_EL2 value logic to a helper method
>> (2) In the world-switch, not save VSESR_EL2, because no one cares the
>> old VSESR_EL2 value
>> (3) Add a new KVM_ARM_SEI ioctl to set the VSESR_EL2 value and pend
>> a virtual system error
>>
>> Signed-off-by: Dongjiu Geng 
>> Signed-off-by: Quanming Wu 
>> ---
>>  Documentation/virtual/kvm/api.txt| 10 ++
>>  arch/arm/include/asm/kvm_host.h  |  1 +
>>  arch/arm/kvm/arm.c   |  7 +++
>>  arch/arm/kvm/guest.c |  5 +
>>  arch/arm64/include/asm/esr.h |  2 ++
>>  arch/arm64/include/asm/kvm_emulate.h | 10 ++
>

Re: [PULL] Docs for 4.13

2017-07-03 Thread Linus Torvalds

On Mon, Jul 3, 2017 at 6:20 AM, Jonathan Corbet  wrote:
>You'll also encounter more than the usual number of conflicts, which
>is saying something.

Hmm. I fixed the ones that were actual data conflicts, but I think
there ends up being several things that are just stale or didn't get
updated by other pulls.

Eg things like

  Error: Cannot open file ./kernel/rcu/srcu.c
  Error: Cannot open file ./kernel/rcu/srcu.c

happen simply because that file no longer exists, and the docs never
got updated.

So my merge didn't even try to fix those kinds of things at all.  I
literally just looked at the conflicts and moved those over to the rst
files, and that was it. There's a lot of other changes that never
cause conflicts for the simple reason that those changes never caused
documentation changes to begin with.

Now, this is obviously not new, but it does strike me that if checking
for these kinds of things was easier and part of "make allmodconfig",
then we might have less of it happen.

At the same time, lots of people run a lot of builds, and while I'd
love to see warnings about docs failures, I am *not* willing to slow
down my usual build enormously. I run "male allmodconfig" builds
between every single pull during the merge window, and while it's
often parallel with me looking at the problems, I don't really want to
slow the build down too much. And the doc building is still *slow*.

Is there some fast "just basic sanity checks" that would be more reasonable?

Because one thing that the switch to sphinx has done is that the doc
build environment seems saner (tool-wise). So now that kind of thing
would at least be _possible_ to do in ways I don't think was
reasonable with docbook.

And now docbook is finally gone. But sphinx isn't exactly a speed demon either.

 Linus

Commit edf064e7c (btrfs: nowait aio support) breaks shells

2017-07-03 Thread Markus Trippelsdorf

commit edf064e7c6fec3646b06c944a8e35d1a3de5c2c3 (HEAD, refs/bisect/bad)
Author: Goldwyn Rodrigues 
Date:   Tue Jun 20 07:05:49 2017 -0500

btrfs: nowait aio support

apparently breaks several shell related features on my system.
In zsh history stopped working, because no new entries are added
anymore.
I fist noticed the issue when I tried to build mplayer. It uses a shell
script to generate a help_mp.h file:

  % help/help_create.sh help/help_mp-en.h UTF-8

This file gets corrupted:

--- help_mp_good.h  2017-07-04 05:38:33.161640826 +0200
+++ help_mp_bad.h   2017-07-04 05:51:00.650730726 +0200
@@ -1,14 +1,8 @@
-/* WARNING! This is a generated file, do NOT edit.
- * See the help/ subdirectory for the editable files. */
 
 -#ifndef MPLAYER_HELP_MP_H
 -#define MPLAYER_HELP_MP_H
 -
 -#include 
 -#include "config.h"
 +#endif /* MPLAYER_HELP_MP_H */
 +he English master file */
  
  -// $Revision: 37846 $
  -// MASTER FILE. Use this file as base for translations.
  + for translations.
...
(I have attached the testcase.)

/dev/sdc3 on / type btrfs 
(rw,noatime,lazytime,compress=lzo,ssd,noacl,space_cache=v2,subvolid=5,subvol=/) 
 # cat /sys/block/sdc/queue/scheduler
[none] mq-deadline 

-- 
Markus


test.tar.bz2
Description: Binary data

Re: [PATCH] ext4: fix __ext4_xattr_set_credits()

2017-07-03 Thread Theodore Ts'o

On Wed, Jun 28, 2017 at 02:47:50PM -0700, Tahsin Erdogan wrote:
> __ext4_xattr_set_credits() calculates journal credits needed for a
> set xattr operation. Currently, credits needed for quota updates are
> added only if ea_inode feature is enabled which is wrong. Fix this by
> moving quota related additions to above ea_inode feature check.
> 
> Fixes: 74c5bfa651af ("ext4: xattr inode deduplication")
> 
> Signed-off-by: Tahsin Erdogan 

Thanks, I've folded this into the xattr_inode_deduplication patch.

  - Ted

[PATCH] char: ipmi: eliminate misleading print info when being probed via ACPI

2017-07-03 Thread Hanjun Guo

From: Hanjun Guo 

When ipmi is probed via ACPI, the boot log shows

[   17.945139] ipmi_si IPI0001:00: probing via device tree
[   17.950369] ipmi_si IPI0001:00: ipmi_si: probing via ACPI
[   17.955795] ipmi_si IPI0001:00: [io  0x00e4-0x3fff] regsize 1 spacing 1 irq 0
[   17.962932] ipmi_si: Adding ACPI-specified bt state machine

which "ipmi_si IPI0001:00: probing via device tree" is misleading
with a ACPI HID "IPI0001" but probing via DT.

Eliminate this misleading print info by checking of_node is valid
or not before calling of_ipmi_probe().

Signed-off-by: Hanjun Guo 
---
 drivers/char/ipmi/ipmi_si_intf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 59ee93e..159950d 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2810,7 +2810,7 @@ static int acpi_ipmi_probe(struct platform_device *dev)
 
 static int ipmi_probe(struct platform_device *dev)
 {
-   if (of_ipmi_probe(dev) == 0)
+   if (dev->dev.of_node && of_ipmi_probe(dev) == 0)
return 0;
 
return acpi_ipmi_probe(dev);
-- 
1.7.12.4

Re: [PATCH] ext4: fast symlink test should not rely on i_blocks

2017-07-03 Thread Theodore Ts'o

On Wed, Jun 28, 2017 at 10:53:31AM -0600, Andreas Dilger wrote:
> 
> > On Jun 27, 2017, at 6:34 PM, Tahsin Erdogan  wrote:
> > 
> > ext4_inode_info->i_data is the storage area for 4 types of data:
> > 
> >  a) Extents data
> >  b) Inline data
> >  c) Block map
> >  d) Fast symlink data (symlink length < 60)
> > 
> > Extents data case is positively identified by EXT4_INODE_EXTENTS flag.
> > Inline data case is also obvious because of EXT4_INODE_INLINE_DATA
> > flag.
> > 
> > Distinguishing c) and d) however requires additional logic. This
> > currently relies on i_blocks count. After subtracting external xattr
> > block from i_blocks, if it is greater than 0 then we know that some
> > data blocks exist, so there must be a block map.
> > 
> > This logic got broken after ea_inode feature was added. That feature
> > charges the data blocks of external xattr inodes to the referencing
> > inode and so adds them to the i_blocks. To fix this, we could subtract
> > ea_inode blocks by iterating through all xattr entries and then check
> > whether remaining i_blocks count is zero. Besides being complicated,
> > this won't change the fact that the current way of distinguishing
> > between c) and d) is fragile.
> > 
> > The alternative solution is to test whether i_size is less than 60 to
> > determine fast symlink case. ext4_symlink() uses the same test to decide
> > whether to store the symlink in i_data. There is one caveat to address
> > before this can work though.
> > 
> > If an inode's i_nlink is zero during eviction, its i_size is set to
> > zero and its data is truncated. If system crashes before inode is removed
> > from the orphan list, next boot orphan cleanup may find the inode with
> > zero i_size. So, a symlink that had its data stored in a block may now
> > appear to be a fast symlink. The solution used in this patch is to treat
> > i_size = 0 as a non-fast symlink case. A zero sized symlink is not legal
> > so the only time this can happen is the mentioned scenario. This is also
> > logically correct because a i_size = 0 symlink has no data stored in
> > i_data.
> > 
> > Fixes: 74c5bfa651af ("ext4: xattr inode deduplication")
> > 
> > Suggested-by: Andreas Dilger 
> > Signed-off-by: Tahsin Erdogan 
> 
> The unfortunate bit is that this makes the inode impossible to undelete, but
> I don't think that is a huge concern for symlinks.
> 
> Reviewed-by: Andreas Dilger 

Thanks, applied.

- Ted

Re: [PATCH] fs: ext4: inode->i_generation not assigned 0.

2017-07-03 Thread Darrick J. Wong

On Thu, Jun 29, 2017 at 02:50:22PM -0400, J. Bruce Fields wrote:
> On Thu, Jun 29, 2017 at 02:30:53PM -0400, J. Bruce Fields wrote:
> > On Thu, Jun 29, 2017 at 10:25:28AM -0700, Darrick J. Wong wrote:
> > > Was there ever a version of NFS (or more generally callers of the
> > > exportfs code) that couldn't deal with i_generation in the file handle,
> > > and therefore we invented this generation hack to work around the loss
> > > of the generation information?
> > > 
> > > There's a comment in xfs_fs_encode_fh about not supporting 64bit inodes
> > > with subtree_check (which seems to require one ino/gen pair for the file
> > > and a second pair for the file's parent) on NFSv2 because v2 doesn't
> > > provide enough space for all the file handle information, but that's the
> > > furthest I got with lazy-mining the git history. :)
> > 
> > There's a comment in fs/ext4/super.c:ext4_nfs_get_inode
> > 
> > * Currently we don't know the generation for parent directory, so
> > * a generation of 0 means "accept any"
> > 
> > But I don't see that used.
> > 
> > It was used once upon a time; I see it actually used in old 2.5 code in
> > nfsd_get_dentry.  Hm.
> 
> Oh, maybe it's here in fs/libfs.c:generic_fh_to_parent:
> 
>   switch (fh_type) {
>   case FILEID_INO32_GEN_PARENT:
>   inode = get_inode(sb, fid->i32.parent_ino,
> (fh_len > 3 ? fid->i32.parent_gen : 0));
>   break;
>   }
> 
> I'm not sure under what conditions that filehandle encoding is used.

The best guess I can come up with is the old nfs_fhbase_old style handles,
which (afaict) do not carry parent i_generation?

--D

> 
> --b.

[PATCH] gpio: drop unnecessary includes from include/linux/gpio/driver.h

2017-07-03 Thread Masahiro Yamada

Some of include directives in include/linux/gpio/driver.h are
unneeded because the header does not need to know the content of
struct device, irq_chip, etc.  Just declare they are structures.

On the other hand,  and 
turned out to be necessary for irq_flow_handler_t and spinlock_t,
respectively.

Each driver should include what it needs without relying on what is
implicitly included from .  This will cut down
unnecessary header parsing.

Signed-off-by: Masahiro Yamada 
---

 drivers/gpio/gpio-104-dio-48e.c  |  1 +
 drivers/gpio/gpio-104-idi-48.c   |  1 +
 drivers/gpio/gpio-104-idio-16.c  |  1 +
 drivers/gpio/gpio-altera-a10sr.c |  2 ++
 drivers/gpio/gpio-altera.c   |  3 +++
 drivers/gpio/gpio-aspeed.c   |  5 +
 drivers/gpio/gpio-ath79.c|  2 ++
 drivers/gpio/gpio-bcm-kona.c |  1 +
 drivers/gpio/gpio-clps711x.c |  1 +
 drivers/gpio/gpio-crystalcove.c  |  2 ++
 drivers/gpio/gpio-dln2.c |  1 +
 drivers/gpio/gpio-dwapb.c|  1 +
 drivers/gpio/gpio-etraxfs.c  |  1 +
 drivers/gpio/gpio-f7188x.c   |  1 +
 drivers/gpio/gpio-ftgpio010.c|  3 +++
 drivers/gpio/gpio-ingenic.c  |  1 +
 drivers/gpio/gpio-intel-mid.c|  2 ++
 drivers/gpio/gpio-lp873x.c   |  1 +
 drivers/gpio/gpio-lynxpoint.c|  2 ++
 drivers/gpio/gpio-max732x.c  |  1 +
 drivers/gpio/gpio-max77620.c |  1 +
 drivers/gpio/gpio-menz127.c  |  1 +
 drivers/gpio/gpio-merrifield.c   |  4 
 drivers/gpio/gpio-omap.c |  3 +++
 drivers/gpio/gpio-pca953x.c  |  1 +
 drivers/gpio/gpio-pci-idio-16.c  |  2 ++
 drivers/gpio/gpio-pisosr.c   |  1 +
 drivers/gpio/gpio-pl061.c|  1 +
 drivers/gpio/gpio-rcar.c |  1 +
 drivers/gpio/gpio-stmpe.c|  2 ++
 drivers/gpio/gpio-tc3589x.c  |  3 +++
 drivers/gpio/gpio-tegra.c|  1 +
 drivers/gpio/gpio-tps65218.c |  1 +
 drivers/gpio/gpio-vf610.c|  3 +++
 drivers/gpio/gpio-vx855.c|  1 +
 drivers/gpio/gpio-wcove.c|  2 ++
 drivers/gpio/gpio-wm831x.c   |  1 +
 drivers/gpio/gpio-wm8994.c   |  1 +
 drivers/gpio/gpio-ws16c48.c  |  1 +
 drivers/gpio/gpio-xgene-sb.c |  2 ++
 drivers/gpio/gpio-xlp.c  |  1 +
 drivers/gpio/gpio-zx.c   |  1 +
 drivers/gpio/gpio-zynq.c |  3 +++
 drivers/gpio/gpiolib-acpi.c  |  1 +
 drivers/gpio/gpiolib.c   |  2 ++
 include/linux/gpio/driver.h  | 11 ++-
 46 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index 598e209efa2d..bdc52be7902a 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index 51f046e29ff7..7bbb0e8573d1 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c
index ec2ce34ff473..7e3fc0bf3398 100644
--- a/drivers/gpio/gpio-104-idio-16.c
+++ b/drivers/gpio/gpio-104-idio-16.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c
index 16a8951b2bed..4cdca9332043 100644
--- a/drivers/gpio/gpio-altera-a10sr.c
+++ b/drivers/gpio/gpio-altera-a10sr.c
@@ -21,6 +21,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 /**
  * struct altr_a10sr_gpio - Altera Max5 GPIO device private data structure
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c
index 17485dc20384..40b26274acaf 100644
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -17,6 +17,9 @@
  */
 
 #include 
+#include 
+#include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index 4ca436e66bdb..a9d575e215da 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c
@@ -15,9 +15,14 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
 #include 
+#include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/gpio/gpio-ath79.c b/drivers/gpio/gpio-ath79.c
index f33d4a5fe671..1249ab0f8f4f 100644
--- a/drivers/gpio/gpio-ath79.c
+++ b/drivers/gpio/gpio-ath79.c
@@ -17,6 +17,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #define AR71XX_GPIO_REG_OE 0x00
 #define AR71XX_GPIO_REG_IN 0x04
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index dfcf56ee3c61..937d646d184b 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define BCM_GPIO_PASSWD0x00a5a501
 #define GPIO_PER_BANK

[PATCH] gpio: add COMPILE_TEST to several drivers

2017-07-03 Thread Masahiro Yamada

These drivers are actually platform-agnostic.  Add COMPILE_TEST for
the compilation test coverage.

Signed-off-by: Masahiro Yamada 
---

 drivers/gpio/Kconfig | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index f235eae04c16..da82cee24980 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -153,7 +153,7 @@ config GPIO_CLPS711X
 config GPIO_DAVINCI
bool "TI Davinci/Keystone GPIO support"
default y if ARCH_DAVINCI
-   depends on ARM && (ARCH_DAVINCI || ARCH_KEYSTONE)
+   depends on ARM && (ARCH_DAVINCI || ARCH_KEYSTONE || COMPILE_TEST)
help
  Say yes here to enable GPIO support for TI Davinci/Keystone SoCs.
 
@@ -279,7 +279,7 @@ config GPIO_LPC18XX
 
 config GPIO_LYNXPOINT
tristate "Intel Lynxpoint GPIO support"
-   depends on ACPI && X86
+   depends on ACPI && (X86 || COMPILE_TEST)
select GPIOLIB_IRQCHIP
help
  driver for GPIO functionality on Intel Lynxpoint PCH chipset
@@ -337,20 +337,20 @@ config GPIO_MPC8XXX
 
 config GPIO_MVEBU
def_bool y
-   depends on PLAT_ORION || ARCH_MVEBU
+   depends on PLAT_ORION || ARCH_MVEBU || COMPILE_TEST
depends on OF_GPIO
select GENERIC_IRQ_CHIP
select REGMAP_MMIO
 
 config GPIO_MXC
def_bool y
-   depends on ARCH_MXC
+   depends on ARCH_MXC || COMPILE_TEST
select GPIO_GENERIC
select GENERIC_IRQ_CHIP
 
 config GPIO_MXS
def_bool y
-   depends on ARCH_MXS
+   depends on ARCH_MXS || COMPILE_TEST
select GPIO_GENERIC
select GENERIC_IRQ_CHIP
 
@@ -381,7 +381,7 @@ config GPIO_PL061
 
 config GPIO_PXA
bool "PXA GPIO support"
-   depends on ARCH_PXA || ARCH_MMP
+   depends on ARCH_PXA || ARCH_MMP || COMPILE_TEST
help
  Say yes here to support the PXA GPIO device
 
@@ -467,7 +467,7 @@ config GPIO_TZ1090_PDC
 
 config GPIO_VF610
def_bool y
-   depends on ARCH_MXC && SOC_VF610
+   depends on (ARCH_MXC && SOC_VF610) || COMPILE_TEST
select GPIOLIB_IRQCHIP
help
  Say yes here to support Vybrid vf610 GPIOs.
@@ -492,7 +492,7 @@ config GPIO_VX855
 
 config GPIO_XGENE
bool "APM X-Gene GPIO controller support"
-   depends on ARM64 && OF_GPIO
+   depends on (ARM64 || COMPILE_TEST) && OF_GPIO
help
  This driver is to support the GPIO block within the APM X-Gene SoC
  platform's generic flash controller. The GPIO pins are muxed with
@@ -543,7 +543,7 @@ config GPIO_ZEVIO
 
 config GPIO_ZYNQ
tristate "Xilinx Zynq GPIO support"
-   depends on ARCH_ZYNQ || ARCH_ZYNQMP
+   depends on ARCH_ZYNQ || ARCH_ZYNQMP || COMPILE_TEST
select GPIOLIB_IRQCHIP
help
  Say yes here to support Xilinx Zynq GPIO controller.
@@ -1154,14 +1154,14 @@ config GPIO_BT8XX
 
 config GPIO_INTEL_MID
bool "Intel MID GPIO support"
-   depends on X86_INTEL_MID
+   depends on X86_INTEL_MID || COMPILE_TEST
select GPIOLIB_IRQCHIP
help
  Say Y here to support Intel MID GPIO.
 
 config GPIO_MERRIFIELD
tristate "Intel Merrifield GPIO support"
-   depends on X86_INTEL_MID
+   depends on X86_INTEL_MID || COMPILE_TEST
select GPIOLIB_IRQCHIP
help
  Say Y here to support Intel Merrifield GPIO.
-- 
2.7.4

Re: [PATCH] x86/platform/uv/BAU: minor cleanup, make some local functions static

2017-07-03 Thread Dou Liyang


Hi Colin,

At 07/03/2017 10:22 PM, Colin King wrote:

From: Colin Ian King 

Functions normal_busy, handle_uv2_busy, uv_flush_send_and_wait and
find_another_by_swack are local to the source, so make them static

Fixes various smatch warnings, such as:
"symbol 'find_another_by_swack' was not declared. Should it be static?"
"symbol 'handle_uv2_busy' was not declared. Should it be static?"

Signed-off-by: Colin Ian King 
---
 arch/x86/platform/uv/tlb_uv.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 2983faab5b18..730b47dce402 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -595,7 +595,7 @@ static unsigned long uv2_3_read_status(unsigned long 
offset, int rshft, int desc
  * The bit provided by the activation_status_2 register is irrelevant to
  * the status if it is only being tested for busy or not busy.
  */
-int normal_busy(struct bau_control *bcp)
+static int normal_busy(struct bau_control *bcp)


In my opinion, there is no need to mark *normal_busy* static, remove it
directly.

the commit c5d35d399e68(x86/UV2: Work around BAU bug) add it to
handle_uv2_busy(), but the handle_uv2_busy() is rewritten now. the
normal_busy is unused, can be remove.

By the way, there are also an other function named
uv_bau_message_interrupt() can be remove.


Thanks,

dou.


 {
int cpu = bcp->uvhub_cpu;
int mmr_offset;
@@ -612,7 +612,7 @@ int normal_busy(struct bau_control *bcp)
  * of a hardware bug.
  * Workaround the bug.
  */
-int handle_uv2_busy(struct bau_control *bcp)
+static int handle_uv2_busy(struct bau_control *bcp)
 {
struct ptc_stats *stat = bcp->statp;

@@ -917,7 +917,8 @@ static void handle_cmplt(int completion_status, struct 
bau_desc *bau_desc,
  * Returns 1 if it gives up entirely and the original cpu mask is to be
  * returned to the kernel.
  */
-int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
+static int uv_flush_send_and_wait(struct cpumask *flush_mask,
+   struct bau_control *bcp,
struct bau_desc *bau_desc)
 {
int seq_number = 0;
@@ -1212,8 +1213,8 @@ const struct cpumask *uv_flush_tlb_others(const struct 
cpumask *cpumask,
  * Search the message queue for any 'other' unprocessed message with the
  * same software acknowledge resource bit vector as the 'msg' message.
  */
-struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
-  struct bau_control *bcp)
+static struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
+ struct bau_control *bcp)
 {
struct bau_pq_entry *msg_next = msg + 1;
unsigned char swack_vec = msg->swack_vec;

Re: [PATCH v1 1/1] usb:xhci: update condition to select bus->sysdev from parent device

2017-07-03 Thread Thang Q. Nguyen

On Tue, Jun 6, 2017 at 2:11 PM, Thang Q. Nguyen  wrote:
> For commit 4c39d4b949d3 ("usb: xhci: use bus->sysdev for DMA
> configuration"), sysdev points to devices known to the system firmware
> or hardware for DMA parameters.
> However, the parent of the system firmware/hardware device checking
> logic does not work in ACPI boot mode. This patch updates the formulation
> to check this case in both DT and ACPI.
>
> Signed-off-by: Tung Nguyen 
> Signed-off-by: Thang Q. Nguyen 
> ---
>  drivers/usb/host/xhci-plat.c |4 +++-
>  1 files changed, 3 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
> index c04144b..e3e342a 100644
> --- a/drivers/usb/host/xhci-plat.c
> +++ b/drivers/usb/host/xhci-plat.c
> @@ -187,7 +187,9 @@ static int xhci_plat_probe(struct platform_device *pdev)
>  * 3. xhci_plat is grandchild of a pci device (dwc3-pci)
>  */
> sysdev = &pdev->dev;
> -   if (sysdev->parent && !sysdev->of_node && sysdev->parent->of_node)
> +   if (sysdev->parent && (sysdev->fwnode->type == FWNODE_PDATA) &&
> +   (is_of_node(sysdev->parent->fwnode) ||
> +   is_acpi_device_node(sysdev->parent->fwnode)))
> sysdev = sysdev->parent;
>  #ifdef CONFIG_PCI
> else if (sysdev->parent && sysdev->parent->parent &&
> --
> 1.7.1
>
Hi,
Do you have any comment on this patch?

Regards,
Thang Q. Nguyen

Re: [PATCH v5 3/5] mtd: handle partitioning on devices with 0 erasesize

2017-07-03 Thread Chris Packham

Hi,

On 02/06/17 15:21, Chris Packham wrote:
> erasesize is meaningful for flash devices but for SRAM there is no
> concept of an erase block so erasesize is set to 0. When partitioning
> these devices instead of ensuring partitions fall on erasesize
> boundaries we ensure they fall on writesize boundaries.
> 
> Helped-by: Boris Brezillon 
> Signed-off-by: Chris Packham 

I had someone mention to me in passing that mtdinfo was failing for them 
(crashing with some floating point error). I'm wondering if we've 
created a divide-by-zero problem by reporting 0 erase size in /proc/mtd. 
I don't have any other info and right now I don't have access to the 
system I had with the mchp23lcv1024 sram.

Andrew, do you still have access to your device?

Re: [PATCH v2 0/7] KVM: MMU: fast write protect

2017-07-03 Thread Xiao Guangrong




On 07/03/2017 11:47 PM, Paolo Bonzini wrote:



On 03/07/2017 16:39, Xiao Guangrong wrote:



On 06/20/2017 05:15 PM, guangrong.x...@gmail.com wrote:

From: Xiao Guangrong 

Changelog in v2:
thanks to Paolo's review, this version disables write-protect-all if
PML is supported


Hi Paolo,

Do you have time to have a look at this new version? ;)
Or I should wait until the patchset of dirty ring-buffer is merged?


I will look at it soon, but I still plan to merge dirty ring buffer first.

Thanks for your understanding,


Sure, i fully understand, thank you for bearing my push. :)

[PATCH] net: ethernet: mediatek: fixed deadlock captured by lockdep

2017-07-03 Thread sean.wang

From: Sean Wang 

Lockdep found an inconsistent lock state when mtk_get_stats64 is called
in user context while NAPI updates MAC statistics in softirq.

Use spin_trylock_bh/spin_unlock_bh fix following lockdep warning.

[   81.321030] WARNING: inconsistent lock state
[   81.325266] 4.12.0-rc1-00035-gd9dda65 #32 Not tainted
[   81.330273] 
[   81.334505] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
[   81.340464] ksoftirqd/0/7 [HC0[0]:SC1[1]:HE1:SE0] takes:
[   81.345731]  (&syncp->seq#2){+.?...}, at: [] 
mtk_handle_status_irq.part.6+0x70/0x84
[   81.354219] {SOFTIRQ-ON-W} state was registered at:
[   81.359062]   lock_acquire+0xfc/0x2b0
[   81.362696]   mtk_stats_update_mac+0x60/0x2c0
[   81.367017]   mtk_get_stats64+0x17c/0x18c
[   81.370995]   dev_get_stats+0x48/0xbc
[   81.374628]   rtnl_fill_stats+0x48/0x128
[   81.378520]   rtnl_fill_ifinfo+0x4ac/0xd1c
[   81.382584]   rtmsg_ifinfo_build_skb+0x7c/0xe0
[   81.386991]   rtmsg_ifinfo.part.5+0x24/0x54
[   81.391139]   rtmsg_ifinfo+0x24/0x28
[   81.394685]   __dev_notify_flags+0xa4/0xac
[   81.398749]   dev_change_flags+0x50/0x58
[   81.402640]   devinet_ioctl+0x768/0x85c
[   81.406444]   inet_ioctl+0x1a4/0x1d0
[   81.409990]   sock_ioctl+0x16c/0x33c
[   81.413538]   do_vfs_ioctl+0xb4/0xa34
[   81.417169]   SyS_ioctl+0x44/0x6c
[   81.420458]   ret_fast_syscall+0x0/0x1c
[   81.424260] irq event stamp: 3354692
[   81.427806] hardirqs last  enabled at (3354692): [] 
net_rx_action+0xc0/0x504
[   81.435660] hardirqs last disabled at (3354691): [] 
net_rx_action+0x8c/0x504
[   81.443515] softirqs last  enabled at (3354106): [] 
__do_softirq+0x4b4/0x614
[   81.451370] softirqs last disabled at (3354109): [] 
run_ksoftirqd+0x44/0x80
[   81.459134]
[   81.459134] other info that might help us debug this:
[   81.465608]  Possible unsafe locking scenario:
[   81.465608]
[   81.471478]CPU0
[   81.473900]
[   81.476321]   lock(&syncp->seq#2);
[   81.479701]   
[   81.482294] lock(&syncp->seq#2);
[   81.485847]
[   81.485847]  *** DEADLOCK ***
[   81.485847]
[   81.491720] 1 lock held by ksoftirqd/0/7:
[   81.495693]  #0:  (&(&mac->hw_stats->stats_lock)->rlock){+.+...}, at: 
[] mtk_handle_status_irq.part.6+0x48/0x84
[   81.506579]
[   81.506579] stack backtrace:
[   81.510904] CPU: 0 PID: 7 Comm: ksoftirqd/0 Not tainted 
4.12.0-rc1-00035-gd9dda65 #32
[   81.518668] Hardware name: Mediatek Cortex-A7 (Device Tree)
[   81.524208] [] (unwind_backtrace) from [] 
(show_stack+0x20/0x24)
[   81.531899] [] (show_stack) from [] 
(dump_stack+0xb4/0xe0)
[   81.539072] [] (dump_stack) from [] 
(print_usage_bug+0x234/0x2e0)
[   81.546846] [] (print_usage_bug) from [] 
(mark_lock+0x63c/0x7bc)
[   81.554532] [] (mark_lock) from [] 
(__lock_acquire+0x654/0x1bfc)
[   81.562217] [] (__lock_acquire) from [] 
(lock_acquire+0xfc/0x2b0)
[   81.569990] [] (lock_acquire) from [] 
(mtk_stats_update_mac+0x60/0x2c0)
[   81.578283] [] (mtk_stats_update_mac) from [] 
(mtk_handle_status_irq.part.6+0x70/0x84)
[   81.587865] [] (mtk_handle_status_irq.part.6) from [] 
(mtk_napi_tx+0x358/0x37c)
[   81.596845] [] (mtk_napi_tx) from [] 
(net_rx_action+0x244/0x504)
[   81.604533] [] (net_rx_action) from [] 
(__do_softirq+0x134/0x614)
[   81.612306] [] (__do_softirq) from [] 
(run_ksoftirqd+0x44/0x80)
[   81.619907] [] (run_ksoftirqd) from [] 
(smpboot_thread_fn+0x14c/0x25c)
[   81.628110] [] (smpboot_thread_fn) from [] 
(kthread+0x150/0x180)
[   81.635798] [] (kthread) from [] 
(ret_from_fork+0x14/0x24)

Signed-off-by: Sean Wang 
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c 
b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 16f9755..8a2acb8 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -470,9 +470,9 @@ static void mtk_get_stats64(struct net_device *dev,
unsigned int start;
 
if (netif_running(dev) && netif_device_present(dev)) {
-   if (spin_trylock(&hw_stats->stats_lock)) {
+   if (spin_trylock_bh(&hw_stats->stats_lock)) {
mtk_stats_update_mac(mac);
-   spin_unlock(&hw_stats->stats_lock);
+   spin_unlock_bh(&hw_stats->stats_lock);
}
}
 
@@ -2156,9 +2156,9 @@ static void mtk_get_ethtool_stats(struct net_device *dev,
return;
 
if (netif_running(dev) && netif_device_present(dev)) {
-   if (spin_trylock(&hwstats->stats_lock)) {
+   if (spin_trylock_bh(&hwstats->stats_lock)) {
mtk_stats_update_mac(mac);
-   spin_unlock(&hwstats->stats_lock);
+   spin_unlock_bh(&hwstats->stats_lock);
}
}
 
-- 
2.7.4

Re: [1/3] cpuidle: powerpc: cpuidle set polling before enabling irqs

2017-07-03 Thread Michael Ellerman

"Rafael J. Wysocki"  writes:

> On Fri, Jun 30, 2017 at 5:45 AM, Michael Ellerman  wrote:
>> "Rafael J. Wysocki"  writes:
>>
>>> On Thu, Jun 29, 2017 at 2:21 PM, Michael Ellerman
>>>  wrote:
 On Wed, 2017-06-14 at 13:02:39 UTC, Nicholas Piggin wrote:
> local_irq_enable can cause interrupts to be taken which could
> take significant amount of processing time. The idle process
> should set its polling flag before this, so another process that
> wakes it during this time will not have to send an IPI.
>
> Expand the TIF_POLLING_NRFLAG coverage to as large as possible.
>
> Reviewed-by: Gautham R. Shenoy 
> Signed-off-by: Nicholas Piggin 

 Series applied to powerpc next, thanks.

 https://git.kernel.org/powerpc/c/3fc5ee927ff4ffed6aa2fcd44d2fbf
>>>
>>> OK
>>>
>>> I've applied it too, so I guess I should drop it?
>>
>> Erk sorry. I hadn't heard anything so I picked it up.
>>
>> If you can drop it that would be good, but if not git will probably work
>> it out mostly :)
>
> I've dropped it, no problem.

Thanks.

cheers

Re: [PATCH 2/2] x86/idle: use dynamic halt poll

2017-07-03 Thread Yang Zhang


On 2017/7/3 18:06, Thomas Gleixner wrote:

On Mon, 3 Jul 2017, Yang Zhang wrote:

The background is that we(Alibaba Cloud) do get more and more complaints from
our customers in both KVM and Xen compare to bare-mental.After investigations,
the root cause is known to us: big cost in message passing workload(David show
it in KVM forum 2015)

A typical message workload like below:
vcpu 0 vcpu 1
1. send ipi 2.  doing hlt
3. go into idle 4.  receive ipi and wake up from hlt
5. write APIC time twice6.  write APIC time twice to
   to stop sched timer  reprogram sched timer
7. doing hlt8.  handle task and send ipi to
vcpu 0
9. same to 4.   10. same to 3

One transaction will introduce about 12 vmexits(2 hlt and 10 msr write). The
cost of such vmexits will degrades performance severely. Linux kernel already
provide idle=poll to mitigate the trend. But it only eliminates the IPI and
hlt vmexit. It has nothing to do with start/stop sched timer. A compromise
would be to turn off NOHZ kernel, but it is not the default config for new
distributions.


You still can turn if off on the kernel command line via nohz=off


You are right. Senior users will turn off it manually. But it only solve 
the sched timer. They still have the IPI/hlt problem. Another point is 
we release the distribution image to customer without any extra 
configuration to avoid mismatch between VM and bare-metal. To change 
such configuration needs reboot, but some customer's business cannot be 
interrupted after they start the service(like online gaming). It would 
be better if we can provide the sysctl interface to allow run-time 
modification. By the way, idle=poll seems too heavy to use.






Thanks,

tglx




--
Yang
Alibaba Cloud Computing

[PATCH] MAINTAINERS:add maintainer for kirin pcie

2017-07-03 Thread Xiaowei Song

Kirin PCIe Driver does not have a maintainer at present,

Add maintainers for kirin pcie driver and Doc, the two fellowing files.
Documentation/devicetree/bindings/pci/pcie-kirin.txt
drivers/pci/dwc/pcie-kirin.c

Signed-off-by: Xiaowei Song 
Cc: Guodong Xu 
---
 MAINTAINERS | 8 
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5ee3125f8341..c64ff79587c8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9851,6 +9851,14 @@ S:   Maintained
 F: Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
 F: drivers/pci/dwc/pcie-hisi.c
 
+PCIE DRIVER FOR Kirin
+M: Xiaowei Song 
+M: Binghui Wang 
+L: linux-...@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/pci/pcie-kirin.txt
+F: drivers/pci/dwc/pcie-kirin.c
+
 PCIE DRIVER FOR ROCKCHIP
 M: Shawn Lin 
 M: Wenrui Li 
-- 
2.11.GIT

[PATCH] MAINTAINERS:add maintainer for kirin pcie

2017-07-03 Thread Xiaowei Song

Kirin PCIe Driver does not have a maintainer at present,

Add maintainers for kirin pcie driver and Doc, the two fellowing files.
Documentation/devicetree/bindings/pci/pcie-kirin.txt
drivers/pci/dwc/pcie-kirin.c

Signed-off-by: Xiaowei Song 
Cc: Guodong Xu 
---
 MAINTAINERS | 8 
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5ee3125f8341..c64ff79587c8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9851,6 +9851,14 @@ S:   Maintained
 F: Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
 F: drivers/pci/dwc/pcie-hisi.c
 
+PCIE DRIVER FOR Kirin
+M: Xiaowei Song 
+M: Binghui Wang 
+L: linux-...@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/pci/pcie-kirin.txt
+F: drivers/pci/dwc/pcie-kirin.c
+
 PCIE DRIVER FOR ROCKCHIP
 M: Shawn Lin 
 M: Wenrui Li 
-- 
2.11.GIT

Re: [PATCH v11 0/3]add PCIe driver for Kirin PCIe

2017-07-03 Thread Guodong Xu

On Mon, Jul 3, 2017 at 9:32 PM, Wei Xu  wrote:
> Hi Guodong,
>
> On 2017/7/3 14:04, Guodong Xu wrote:
>> Hi, Xu Wei
>>
>>
>>
>> On Mon, Jul 3, 2017 at 6:47 PM, Will Deacon  wrote:
>>> On Sun, Jul 02, 2017 at 06:36:57PM -0500, Bjorn Helgaas wrote:
 [+cc Catalin, Will, linux-arm-kernel]

 Applied patches 2 & 3 to pci/host-kirin for v4.13.

 I would like a MAINTAINERS update, too.  If you send me that, I'll
 squash it into the driver patch.

 Catalin, Will, how do you want to handle the
 arch/arm64/configs/defconfig change (patch 3)?  It's currently on my
 branch, but I'm happy to drop it if another route is better.
>>>
>>> defconfig updates usually go through arm-soc, via the relevant platform
>>> maintainer, so it would be best to follow that route here too otherwise
>>> you'll probably see conflicts in -next.
>>>
>>
>> Is it ok for you to pick up the arch/arm64/configs/defconfig change (patch 
>> 3)?
>
> Since it is already 4.12-rc7, I will pick up it and queue for the v4.14.
> Are you fine about that?
> Thanks!

Sure.

>
> Best Regards,
> Wei
>
>>
>> -Guodong
>>
>>> Will
>>
>> .
>>
>

Re: [PATCH v4] PCI: Workaround wrong flags completions for IDT switch

2017-07-03 Thread Ethan Zhao

James,

On Tue, Jul 4, 2017 at 2:17 AM, james puthukattukaran
 wrote:
>
> Ethan -
>
>
> On 7/2/2017 9:55 PM, Ethan Zhao wrote:
>>
>> James,
>>
>> On Wed, Jun 28, 2017 at 5:42 AM, James Puthukattukaran
>>  wrote:
>>>
>>> From: James Puthukattukaran 
>>>
>>> The IDT switch incorrectly flags an ACS source violation on a read config
>>> request to an end point device on the completion (IDT 89H32H8G3-YC,
>>> errata #36) even though the PCI Express spec states that completions are
>>> never affected by ACS source violation (PCI Spec 3.1, Section 6.12.1.1).
>>>
>>> The suggested workaround by IDT is to issue a configuration write to the
>>> downstream device before issuing the first config read. This allows the
>>> downstream device to capture its bus number, thus avoiding the ACS
>>> violation on the completion.
>>>
>>> The patch does the following -
>>>
>>> 1. Disable ACS source violation if enabled
>>> 2. Wait for config space access to become available by reading vendor id
>>> 3. Do a config write to the end point (errata workaround)
>>> 4. Enable ACS source validation (if it was enabled to begin with)
>>>
>>> -v2: move workaround to pci_bus_read_dev_vendor_id() from
>>> pci_bus_check_dev()
>>>   and move enable_acs_sv to drivers/pci/pci.c -- by Yinghai
>>> -v3: add bus->self check for root bus and virtual bus for sriov vfs.
>>> -v4: only do workaround for IDT switches
>>>
>>> Signed-off-by: James Puthukattukaran 
>>> Signed-off-by: Yinghai Lu 
>>>
>>> --
>>>
>>>   drivers/pci/pci.c   | 33 +
>>>   drivers/pci/pci.h   |  1 +
>>>   drivers/pci/probe.c | 38 --
>>>   3 files changed, 70 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>>> index 563901c..a7a2e2b 100644
>>> --- a/drivers/pci/pci.c
>>> +++ b/drivers/pci/pci.c
>>> @@ -2835,6 +2835,39 @@ static bool pci_acs_flags_enabled(struct pci_dev
>>> *pdev, u16 acs_flags)
>>>   }
>>>
>>>   /**
>>> + *  pci_std_enable_acs_sv - enable/disable ACS source validation if
>>> supported by the switch
>>> + *  @dev - pcie switch/RP
>>> + *  @enable - enable (1) or disable (0) source validation
>>> + *
>>> + *  Returns : < 0 on failure
>>
>> You didn't define the meaning of 0 and >0, but you check it later against
>> >0,
>> Then what does it mean 0 and >0 ?
>
> see below..
>>
>>
>>> + *   previous acs_sv state
>
>
> It returns the previous acs_sv state (0 or 1).

You didn't clarify the meaning of previous acs_sv state, or possible value,
you check it later with >0 also confused the possibility.


>>>
>>> + */
>>> +int pci_std_enable_acs_sv(struct pci_dev *dev, bool enable)
>>> +{
>>> +   int pos;
>>> +   u16 cap;
>>> +   u16 ctrl;
>>> +   int retval;
>>> +
>>> +   pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS);
>>> +   if (!pos)
>>> +   return -ENODEV;
>>> +
>>> +   pci_read_config_word(dev, pos + PCI_ACS_CAP, &cap);
>>> +   pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
>>> +
>>> +   retval = !!(ctrl & cap & PCI_ACS_SV);
>>
>> If the device's ACS SV( ACS Source Validation) capability wasn't
>> implemented, the return value of this function will still tell us the
>> operation of enabling is successful ? though it might be rare case.
>
> If the ACS capability is implemented, then all bits are expected to have
> meaning and are valid. If SV is not implemented by the switch, the control
> bit for it should return zero (no source validation done). This is the PCI
> specification.  The onus is on the switch designer to keep it so.

PCI spec doesn't say SV must be implemented in every device even it
has ACS Cap, see also:

"6.12.1.2. ACS Functions in Multi-Function Devices This section
applies to multi-Function device ACS Functions, with the exception of
Downstream Port Functions, which are covered in the preceding section.
 ACS Source Validation: must not be implemented. 20  ACS Translation
Blocking: must not be implemented.  ACS P2P Request Redirect: must be
implemented by Functions that support peer-to-peer traffic with other
Functions.
"
Here pci_std_enable_acs_sv() is common function, once implemented,
possible be used by other code to enable acs beside this workaround.

Then how about it is called with a MF device ?

Thanks,
Ethan

>
> thanks,
> James
>
>
>>> +   if (enable)
>>> +   ctrl |= (cap & PCI_ACS_SV);
>>> +   else
>>> +   ctrl &= ~(cap & PCI_ACS_SV);
>>> +
>>> +   pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl);
>>> +
>>> +   return retval;
>>> +}
>>> +
>>> +/**
>>>* pci_acs_enabled - test ACS against required flags for a given device
>>>* @pdev: device to test
>>>* @acs_flags: required PCI ACS flags
>>> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>>> index f8113e5..3960c2a 100644
>>> --- a/drivers/pci/pci.h
>>> +++ b/drivers/pci/pci.h
>>> @@ -343,6 +343,7 @@ static inline resource_size_t
>>> pci_resource_alignment(st

Re: [PATCH v3 1/2] acpi: thermal: initialize tz_enabled to 1

2017-07-03 Thread Zhang Rui

On Mon, 2017-07-03 at 23:08 +0200, Rafael J. Wysocki wrote:
> On Mon, Jul 3, 2017 at 10:00 AM, Enric Balletbo i Serra
>  wrote:
> > 
> > From: Sameer Nanda 
> > 
> > In the acpi_thermal_add path, acpi_thermal_get_info gets called
> > before
> > acpi_thermal_register_thermal_zone.  Since tz_enabled was getting
> > set to
> > 1 only in acpi_thermal_register_thermal_zone, acpi_thermal_get_info
> > ended up disabling thermal polling.
> > 
> > Moved setting of tz_enabled to 1 into acpi_thermal_add itself.
> > 
> > Signed-off-by: Sameer Nanda 
> > Signed-off-by: Enric Balletbo i Serra  > >
> > ---
> > Changes since v2:
> >  - Zhang Rui:
> >    - Make sure tz->tz_enabled is set properly before registering
> > the zone.
> > 
> > Changes since v1:
> >  - This patch is new from v1 [1]
> > 
> >  [1] https://patchwork.kernel.org/patch/9804229/
> > 
> >  drivers/acpi/thermal.c | 3 +--
> >  1 file changed, 1 insertion(+), 2 deletions(-)
> > 
> > diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
> > index 1d0417b..cd0fe92 100644
> > --- a/drivers/acpi/thermal.c
> > +++ b/drivers/acpi/thermal.c
> > @@ -930,8 +930,6 @@ static int
> > acpi_thermal_register_thermal_zone(struct acpi_thermal *tz)
> > if (ACPI_FAILURE(status))
> > return -ENODEV;
> > 
> > -   tz->tz_enabled = 1;
> > -
> > dev_info(&tz->device->dev, "registered as
> > thermal_zone%d\n",
> >  tz->thermal_zone->id);
> > return 0;
> > @@ -1088,6 +1086,7 @@ static int acpi_thermal_add(struct
> > acpi_device *device)
> > return -ENOMEM;
> > 
> > tz->device = device;
> > +   tz->tz_enabled = 1;
> > strcpy(tz->name, device->pnp.bus_id);
> > strcpy(acpi_device_name(device), ACPI_THERMAL_DEVICE_NAME);
> > strcpy(acpi_device_class(device), ACPI_THERMAL_CLASS);
> > --
> Rui,
> 
> Can I just apply this, or do I need to work for a thermal core
> update?
>  In the latter case, can you take care of this one too, please?
> 
Yes, I will take both of the patches.

thanks,
rui

> Thanks,
> Rafael

Re: [GIT PULL] Char/Misc driver patches for 4.13-rc1

2017-07-03 Thread Stephen Rothwell

Hi all,

On Mon, 3 Jul 2017 17:00:30 +0200 Greg KH  wrote:
>
> All of these have been in linux-next for a while with the only reported
> issue being a merge problem with this tree and the jc-docs tree in the
> w1 documentation area.  The fix should be obvious for what to do when it
> happens, if not, we can send a follow-up patch for it afterward.

I have been using this (after deleting Documentation/DocBook/w1.tmpl):

From: Stephen Rothwell 
Date: Wed, 14 Jun 2017 14:01:14 +1000
Subject: [PATCH] docs-rst: merge fix for "DocBook: w1: Update W1 file
 locations and names in DocBook"

Signed-off-by: Stephen Rothwell 
---
 Documentation/driver-api/w1.rst | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/Documentation/driver-api/w1.rst b/Documentation/driver-api/w1.rst
index c1da8f0cb476..64826ceba2de 100644
--- a/Documentation/driver-api/w1.rst
+++ b/Documentation/driver-api/w1.rst
@@ -10,12 +10,12 @@ W1 API internal to the kernel
 W1 API internal to the kernel
 -
 
-drivers/w1/w1.h
-~~~
+include/linux/w1.h
+~~
 
-W1 core functions.
+W1 kernel API functions.
 
-.. kernel-doc:: drivers/w1/w1.h
+.. kernel-doc:: include/linux/w1.h
:internal:
 
 drivers/w1/w1.c
@@ -26,20 +26,20 @@ W1 core functions.
 .. kernel-doc:: drivers/w1/w1.c
:internal:
 
-drivers/w1/w1_family.h
+drivers/w1/w1_family.c
 ~~~
 
 Allows registering device family operations.
 
-.. kernel-doc:: drivers/w1/w1_family.h
-   :internal:
+.. kernel-doc:: drivers/w1/w1_family.c
+   :export:
 
-drivers/w1/w1_family.c
-~~~
+drivers/w1/w1_internal.h
+
 
-Allows registering device family operations.
+W1 internal initialization for master devices.
 
-.. kernel-doc:: drivers/w1/w1_family.c
+.. kernel-doc:: drivers/w1/w1_internal.h
:export:
 
 drivers/w1/w1_int.c
-- 
2.11.0

-- 
Cheers,
Stephen Rothwell

Re: [GIT PULL] USB/PHY patches for 4.13-rc1

2017-07-03 Thread Stephen Rothwell

Hi all,

On Mon, 3 Jul 2017 16:58:47 +0200 Greg KH  wrote:
>
> All of these have been in linux-next for a while with no reported
> issues.

I have been carrying the following merge fix patch for the merge
against the uuid tree (now in Linus' tree):

From: Stephen Rothwell 
Date: Thu, 29 Jun 2017 14:36:10 +1000
Subject: [PATCH] usb: typec: fix for "ACPI: Switch to use generic guid_t in
 acpi_evaluate_dsm()"

Signed-off-by: Stephen Rothwell 
---
 drivers/usb/typec/ucsi/ucsi_acpi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c 
b/drivers/usb/typec/ucsi/ucsi_acpi.c
index 3fb2e48e1c91..7b7c9373a9b6 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -23,14 +23,14 @@ struct ucsi_acpi {
struct device *dev;
struct ucsi *ucsi;
struct ucsi_ppm ppm;
-   uuid_le uuid;
+   guid_t uuid;
 };
 
 static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func)
 {
union acpi_object *obj;
 
-   obj = acpi_evaluate_dsm(ACPI_HANDLE(ua->dev), ua->uuid.b, 1, func,
+   obj = acpi_evaluate_dsm(ACPI_HANDLE(ua->dev), &ua->uuid, 1, func,
NULL);
if (!obj) {
dev_err(ua->dev, "%s: failed to evaluate _DSM %d\n",
-- 
2.11.0

-- 
Cheers,
Stephen Rothwell

RE: [PATCH v1] vfio: ABI for mdev display dma-buf operation

2017-07-03 Thread Zhang, Tina



> -Original Message-
> From: Zhang, Tina
> Sent: Tuesday, July 4, 2017 9:04 AM
> To: alex.william...@redhat.com; kra...@redhat.com; ch...@chris-wilson.co.uk;
> zhen...@linux.intel.com; Lv, Zhiyuan ; Wang, Zhi A
> ; Tian, Kevin ; dan...@ffwll.ch;
> kwankh...@nvidia.com
> Cc: Zhang, Tina ; intel-...@lists.freedesktop.org; 
> intel-
> gvt-...@lists.freedesktop.org; linux-kernel@vger.kernel.org
> Subject: [PATCH v1] vfio: ABI for mdev display dma-buf operation
> 
> Add VFIO_DEVICE_QUERY_GFX_PLANE ioctl command to let user mode query
> and get the plan and its related information.
> 
> The dma-buf's life cycle is handled by user mode and tracked by kernel.
> The returned fd in struct vfio_device_query_gfx_plane can be a new fd or an
> old fd of a re-exported dma-buf. Host User mode can check the value of fd and
> to see if it need to creat new resource according to the new fd or just use 
> the
> existed resource related to the old fd.
> 
> Signed-off-by: Tina Zhang 
> 
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index
> ae46105..c92bc69 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -502,6 +502,36 @@ struct vfio_pci_hot_reset {
> 
>  #define VFIO_DEVICE_PCI_HOT_RESET_IO(VFIO_TYPE, VFIO_BASE + 13)
> 
> +/**
> + * VFIO_DEVICE_QUERY_GFX_PLANE - _IOW(VFIO_TYPE, VFIO_BASE + 14,
> + *   struct vfio_device_query_gfx_plane)
> + * Return: 0 on success, -errno on failure.
> + */
> +
> +struct vfio_device_gfx_plane_info {
> + __u64 start;
> + __u64 drm_format_mod;
> + __u32 drm_format;
> + __u32 width;
> + __u32 height;
> + __u32 stride;
> + __u32 size;
> + __u32 x_pos;
> + __u32 y_pos;
> +};
In this version, we don't rely on user mode to compare and find out whether a 
new dmabuf needs to be created. So, some of fields can be removed if user mode 
thinks they are not interesting any more. 

> +
> +struct vfio_device_query_gfx_plane {
> + __u32 argsz;
> + __u32 flags;
> + struct vfio_device_gfx_plane_info plane_info;
> + __u32 plane_type;
> + __s32 fd; /* dma-buf fd */
> + __u32 plane_id;
> +};
Still cannot figure out what the plane_id stands for in dmabuf case. As it 
might be needed by region usage, just add it here.

> +
> +#define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14)
> +
> +
>  /*  API for Type1 VFIO IOMMU  */
> 
>  /**
> --
> 2.7.4

Re: [PATCH mm] introduce reverse buddy concept to reduce buddy fragment

2017-07-03 Thread zhouxianrong


the test was done as follows:

1. the environment is android 7.0 and kernel is 4.1 and managed memory is 3.5GB
2. every 4s startup one apk, total 100 more apks need to startup
3. after finishing step 2, sample buddyinfo once and get the result

On 2017/7/3 23:33, Michal Hocko wrote:

On Mon 03-07-17 20:02:16, zhouxianrong wrote:
[...]

from above i think after applying the patch the result is better.


You haven't described your testing methodology, nor the workload that was
tested. As such this data is completely meaningless.

[PATCH v1] vfio: ABI for mdev display dma-buf operation

2017-07-03 Thread Tina Zhang

Add VFIO_DEVICE_QUERY_GFX_PLANE ioctl command to let user mode query and
get the plan and its related information.

The dma-buf's life cycle is handled by user mode and tracked by kernel.
The returned fd in struct vfio_device_query_gfx_plane can be a new
fd or an old fd of a re-exported dma-buf. Host User mode can check the
value of fd and to see if it need to creat new resource according to the
new fd or just use the existed resource related to the old fd.

Signed-off-by: Tina Zhang 

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index ae46105..c92bc69 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -502,6 +502,36 @@ struct vfio_pci_hot_reset {
 
 #define VFIO_DEVICE_PCI_HOT_RESET  _IO(VFIO_TYPE, VFIO_BASE + 13)
 
+/**
+ * VFIO_DEVICE_QUERY_GFX_PLANE - _IOW(VFIO_TYPE, VFIO_BASE + 14,
+ *   struct vfio_device_query_gfx_plane)
+ * Return: 0 on success, -errno on failure.
+ */
+
+struct vfio_device_gfx_plane_info {
+   __u64 start;
+   __u64 drm_format_mod;
+   __u32 drm_format;
+   __u32 width;
+   __u32 height;
+   __u32 stride;
+   __u32 size;
+   __u32 x_pos;
+   __u32 y_pos;
+};
+
+struct vfio_device_query_gfx_plane {
+   __u32 argsz;
+   __u32 flags;
+   struct vfio_device_gfx_plane_info plane_info;
+   __u32 plane_type;
+   __s32 fd; /* dma-buf fd */
+   __u32 plane_id;
+};
+
+#define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14)
+
+
 /*  API for Type1 VFIO IOMMU  */
 
 /**
-- 
2.7.4

Re: [GIT pull] x86/timers updates for 4.13

2017-07-03 Thread Linus Torvalds

On Mon, Jul 3, 2017 at 1:20 AM, Thomas Gleixner  wrote:
>
> This update contains:
>
>- The final solution for the TSC deadline timer borkage, which is caused
>  by a hardware problem in the TSC_ADJUST/TSC_DEADLINE_TIMER logic.
>
>  The problem is documented now fixed with a microcode update, so we can
>  remove the workaround and check for the microcode version. If the
>  microcode is not up to date, then the TSC deadline timer is
>  enabled. If the borkage is fixed by the proper micro code version,
>  then the deadline timer can be used. In both cases the restrictions to
>  the range of the TSC_ADJUST value, which were added as workarounds,
>  are removed.
>
>   - A few simple fixes and updates to the timer related x86 code.

That deadline timer explanation made no sense.

I edited it to what I think you wanted to say in the commit message.

   Linus

Re: [PATCH RFC 08/26] locking: Remove spin_unlock_wait() generic definitions

2017-07-03 Thread Paul E. McKenney

On Mon, Jul 03, 2017 at 05:39:36PM -0700, Paul E. McKenney wrote:
> On Mon, Jul 03, 2017 at 03:49:42PM -0700, Linus Torvalds wrote:
> > On Mon, Jul 3, 2017 at 3:30 PM, Paul E. McKenney
> >  wrote:
> > >
> > > That certainly is one interesting function, isn't it?  I wonder what
> > > happens if you replace the raw_spin_is_locked() calls with an
> > > unlock under a trylock check?  ;-)
> > 
> > Deadlock due to interrupts again?
> 
> Unless I am missing something subtle, the kgdb_cpu_enter() function in
> question has a local_irq_save() over the "interesting" portion of its
> workings, so interrupt-handler self-deadlock should not happen.
> 
> > Didn't your spin_unlock_wait() patches teach you anything? Checking
> > state is fundamentally different from taking the lock. Even a trylock.
> 
> That was an embarrassing bug, no two ways about it.  :-/
> 
> > I guess you could try with the irqsave versions. But no, we're not doing 
> > that.
> 
> Again, no need in this case.
> 
> But I agree with Will's assessment of this function...
> 
> The raw_spin_is_locked() looks to be asking if -any- CPU holds the
> dbg_slave_lock, and the answer could of course change immediately
> on return from raw_spin_is_locked().  Perhaps the theory is that
> if other CPU holds the lock, this CPU is supposed to be subjected to
> kgdb_roundup_cpus().  Except that the CPU that held dbg_slave_lock might
> be just about to release that lock.  Odd.
> 
> Seems like there should be a get_online_cpus() somewhere, but maybe
> that constraint is to be manually enforced.

Except that invoking get_online_cpus() from an exception handler would
be of course be a spectacularly bad idea.  I would feel better if the
num_online_cpus() was under the local_irq_save(), but perhaps this code
is relying on the stop_machine().  Except that it appears we could
deadlock with offline waiting for stop_machine() to complete and kdbg
waiting for all CPUs to report, including those in stop_machine().

Looks like the current situation is "Don't use kdbg if there is any
possibility of CPU-hotplug operations."  Not necessarily an unreasonable
restriction.

But I need to let me eyes heal a bit before looking at this more.

Thanx, Paul

Re: [PATCH v2 2/2] thermal: uniphier: add UniPhier thermal driver

2017-07-03 Thread Kunihiko Hayashi

Hi Eduardo,
Thank you for your comment.

On Fri, 30 Jun 2017 20:16:33 -0700  wrote:

> Hey,
> 
> On Wed, Jun 28, 2017 at 07:11:59PM +0900, Kunihiko Hayashi wrote:
> > Add a thermal driver for on-chip PVT (Process, Voltage and Temperature)
> > monitoring unit implemented on UniPhier SoCs. This driver supports
> > temperature monitoring and alert function.
> > 
> > Signed-off-by: Kunihiko Hayashi 
> > ---
> >  drivers/thermal/Kconfig|   8 +
> >  drivers/thermal/Makefile   |   1 +
> >  drivers/thermal/uniphier_thermal.c | 391 
> > +
> >  3 files changed, 400 insertions(+)
> >  create mode 100644 drivers/thermal/uniphier_thermal.c

(snip)

> > +static void uniphier_tm_enable_sensor(struct uniphier_tm_dev *tdev)
> > +{
> > +   struct regmap *map = tdev->regmap;
> > +   int i;
> > +   u32 bits = 0;
> > +
> > +   for (i = 0; i < ALERT_CH_NUM; i++)
> > +   if (tdev->alert_en[i])
> > +   bits |= PMALERTINTCTL_EN(i);
> > +
> > +   /* enable alert interrupt */
> > +   regmap_write_bits(map, tdev->data->map_base + PMALERTINTCTL,
> > + PMALERTINTCTL_MASK, bits);
> > +
> > +   /* start PVT */
> > +   regmap_write_bits(map, tdev->data->block_base + PVTCTLEN,
> > + PVTCTLEN_EN, PVTCTLEN_EN);
> 
> Do we need to wait some time after starting PVT and before reading the
> first temperature?

Thanks for your pointing out.

According to the spec sheet, we can read first temperature
with waiting 700us after starting PVT. And after disabling PVT,
we must wait 1ms until next access.

I'll add "nsleep" after accessing PVTCTLEN in
uniphier_tm_{enable,disable}_sensor().

> > +}
> > +
> > +static void uniphier_tm_disable_sensor(struct uniphier_tm_dev *tdev)
> > +{
> > +   struct regmap *map = tdev->regmap;
> > +
> > +   /* disable alert interrupt */
> > +   regmap_write_bits(map, tdev->data->map_base + PMALERTINTCTL,
> > + PMALERTINTCTL_MASK, 0);
> > +
> > +   /* stop PVT */
> > +   regmap_write_bits(map, tdev->data->block_base + PVTCTLEN,
> > + PVTCTLEN_EN, 0);
> > +}
> > +
> > +static int uniphier_tm_get_temp(void *data, int *out_temp)
> > +{
> > +   struct uniphier_tm_dev *tdev = data;
> > +   struct regmap *map = tdev->regmap;
> > +   int ret;
> > +   u32 temp;
> > +
> > +   ret = regmap_read(map, tdev->data->map_base + TMOD, &temp);
> > +   if (ret)
> > +   return ret;
> > +
> > +   /*
> > +* Since MSB of TMOD_MASK in TMOD represents signed bit,
> > +* if the register value is bigger than or equal to
> > +* ((TMOD_MASK + 1) / 2), it represents a negative value
> > +* of temperature.
> > +*/
> > +   temp &= TMOD_MASK;
> > +   if (temp >= ((TMOD_MASK + 1) / 2))
> > +   *out_temp = (temp - (TMOD_MASK + 1)) * 1000;
> 
> But, why do you mask negative values? Are you considering them invalid?
> should this be reported? Why simply silently transforming into positive?

My explanation comment is insufficient.
The whole TMOD register doesn't represent temperature value.

TMOD[31:9] has always 0 as reserved bits.
TMOD[8:0] has 2's complement value of temperature (Celsius)
represented by 9bits.
For example, when we read 0x1ff from the TMOD, it means -1.

Then according to linux/bitops.h,
it can be replaced with "*out_temp = sign_extend32(temp, 9)" simply.

Best Regards,
Kunihiko Hayashi

RE: [Intel-gfx] [PATCH v9 5/7] vfio: Define vfio based dma-buf operations

2017-07-03 Thread Zhang, Tina


> -Original Message-
> From: intel-gvt-dev [mailto:intel-gvt-dev-boun...@lists.freedesktop.org] On
> Behalf Of Daniel Vetter
> Sent: Thursday, June 29, 2017 4:39 PM
> To: Gerd Hoffmann 
> Cc: Wang, Zhenyu Z ; intel-
> g...@lists.freedesktop.org; linux-kernel@vger.kernel.org; Chen, Xiaoguang
> ; Zhang, Tina ; Alex
> Williamson ; Lv, Zhiyuan
> ; Kirti Wankhede ; intel-gvt-
> d...@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH v9 5/7] vfio: Define vfio based dma-buf
> operations
> 
> On Thu, Jun 29, 2017 at 08:41:53AM +0200, Gerd Hoffmann wrote:
> >   Hi,
> >
> > > > Does gvt track the live cycle of all dma-bufs it has handed out?
> > >
> > > The V9 implementation does track the dma-bufs' live cycle. The
> > > original idea was that leaving the dma-bufs' live cycle management
> > > to user mode.
> >
> > That is still the case, user space decides which dma-bufs it'll go
> > keep cached.  But kernel space can see what user space is doing, so
> > there is no need to explicitly tell the kernel whenever a cached
> > dma-buf exists or not.
> 
> We do the same trick in drm_prime.c, keeping a cache of exported dma-buf
> around for re-exporting. Since for prime sharing the use-case is almost always
> re-importing as a drm gem buffer again we can then on re-import also tell
> userspace whether it already has that buffer in it's userspace buffer manager,
> but that's an additional optimization. With plain dma-buf we could achieve the
> same by wiring up a real stat() implementation with unique inode numbers (atm
> they all share the anon_inode singleton). But thus far no one asked for that.

Thanks. I'm going to submit the v10 version of ABI interface.

> 
> btw I'm lost a bit in the discussion (was on vacation), but I think all the 
> concerns
> I've noticed with the initial rfc have been raised already, so things look 
> good. I'll
> check the next rfc once that shows up.
> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
> ___
> intel-gvt-dev mailing list
> intel-gvt-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev

Re: [PATCH 2/3] mtd: spi-nor: core code for the Altera Quadspi Flash Controller v2

2017-07-03 Thread Cyrille Pitchen

Hi Matthew,


Le 26/06/2017 à 18:13, matthew.gerl...@linux.intel.com a écrit :
> From: Matthew Gerlach 
> 
> Signed-off-by: Matthew Gerlach 
> ---
>  MAINTAINERS  |   7 +
>  drivers/mtd/spi-nor/Kconfig  |   5 +
>  drivers/mtd/spi-nor/Makefile |   4 +-
>  drivers/mtd/spi-nor/altera-quadspi.c | 676 
> +++
>  include/linux/mtd/altera-quadspi.h   |  28 ++
>  5 files changed, 719 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/mtd/spi-nor/altera-quadspi.c
>  create mode 100644 include/linux/mtd/altera-quadspi.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 6b4395c..ae33fa6 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -648,6 +648,13 @@ L:   linux-g...@vger.kernel.org
>  S:   Maintained
>  F:   drivers/gpio/gpio-altera.c
>  
> +ALTERA QUADSPI FLASH DRIVER
> +M:   Matthew Gerlach 
> +L:   linux-...@lists.infradead.org
> +S:   Maintained
> +F:   drivers/mtd/spi-nor/altera-quadspi.c
> +F:   inclulde/linux/mtd/altera-quadspi.h
> +
>  ALTERA SYSTEM RESOURCE DRIVER FOR ARRIA10 DEVKIT
>  M:   Thor Thayer 
>  S:   Maintained
> diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig
> index 293c8a4..89fe425 100644
> --- a/drivers/mtd/spi-nor/Kconfig
> +++ b/drivers/mtd/spi-nor/Kconfig
> @@ -113,4 +113,9 @@ config SPI_STM32_QUADSPI
> This enables support for the STM32 Quad SPI controller.
> We only connect the NOR to this controller.
>  
> +config SPI_ALTERA_QUADSPI
> +tristate "Altera Quad SPI Flash Controller II"
> +help
> +  Enable support for version 2 of Altera Quad SPI Flash Controller.
> +
>  endif # MTD_SPI_NOR
> diff --git a/drivers/mtd/spi-nor/Makefile b/drivers/mtd/spi-nor/Makefile
> index 285aab8..024c6ac 100644
> --- a/drivers/mtd/spi-nor/Makefile
> +++ b/drivers/mtd/spi-nor/Makefile
> @@ -8,4 +8,6 @@ obj-$(CONFIG_MTD_MT81xx_NOR)+= mtk-quadspi.o
>  obj-$(CONFIG_SPI_NXP_SPIFI)  += nxp-spifi.o
>  obj-$(CONFIG_SPI_INTEL_SPI)  += intel-spi.o
>  obj-$(CONFIG_SPI_INTEL_SPI_PLATFORM) += intel-spi-platform.o
> -obj-$(CONFIG_SPI_STM32_QUADSPI)  += stm32-quadspi.o
> \ No newline at end of file
> +obj-$(CONFIG_SPI_STM32_QUADSPI)  += stm32-quadspi.o
> +obj-$(CONFIG_SPI_ALTERA_QUADSPI) += altera-quadspi.o
> +
> diff --git a/drivers/mtd/spi-nor/altera-quadspi.c 
> b/drivers/mtd/spi-nor/altera-quadspi.c
> new file mode 100644
> index 000..de65453
> --- /dev/null
> +++ b/drivers/mtd/spi-nor/altera-quadspi.c
> @@ -0,0 +1,676 @@
> +/*
> + * Copyright (C) 2014 Altera Corporation. All rights reserved.
> + * Copyright (C) 2017 Intel Corporation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along 
> with
> + * this program.  If not, see .
> + */
> +
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#define ALTERA_QUADSPI_RESOURCE_NAME "altera_quadspi"
> +
> +#define EPCS_OPCODE_ID   1
> +#define NON_EPCS_OPCODE_ID   2
> +
> +#define WRITE_CHECK  1
> +#define ERASE_CHECK  0
> +
> +#define QUADSPI_SR_REG   0x0
> +#define QUADSPI_SR_MASK  0x000F
> +
> +/* defines for device id register */
> +#define QUADSPI_SID_REG  0x4
> +#define QUADSPI_RDID_REG 0x8
> +#define QUADSPI_ID_MASK  0x00FF
> +
> +/*
> + * QUADSPI_MEM_OP register offset
> + *
> + * The QUADSPI_MEM_OP register is used to do memory protect and erase 
> operations
> + *
> + */
> +#define QUADSPI_MEM_OP_REG   0xC
> +
> +#define QUADSPI_MEM_OP_CMD_MASK  0x0003
> +#define QUADSPI_MEM_OP_BULK_ERASE_CMD0x0001
> +#define QUADSPI_MEM_OP_SECTOR_ERASE_CMD  0x0002
> +#define QUADSPI_MEM_OP_SECTOR_PROTECT_CMD0x0003
> +#define QUADSPI_MEM_OP_SECTOR_WRITE_ENABLE_CMD   0x0004
> +#define QUADSPI_MEM_OP_SECTOR_VALUE_MASK 0x0003FF00
> +
> +#define QUADSPI_MEM_OP_SECTOR_PROTECT_SHIFT  8
> +#define QUADSPI_MEM_OP_SECTOR_PROTECT_VALUE_MASK 0x1F00
> +/*
> + * QUADSPI_ISR register offset
> + *
> + * The QUADSPI_ISR register is used to determine whether an invalid write or
>

Re: [PATCH RFC 08/26] locking: Remove spin_unlock_wait() generic definitions

2017-07-03 Thread Paul E. McKenney

On Mon, Jul 03, 2017 at 03:49:42PM -0700, Linus Torvalds wrote:
> On Mon, Jul 3, 2017 at 3:30 PM, Paul E. McKenney
>  wrote:
> >
> > That certainly is one interesting function, isn't it?  I wonder what
> > happens if you replace the raw_spin_is_locked() calls with an
> > unlock under a trylock check?  ;-)
> 
> Deadlock due to interrupts again?

Unless I am missing something subtle, the kgdb_cpu_enter() function in
question has a local_irq_save() over the "interesting" portion of its
workings, so interrupt-handler self-deadlock should not happen.

> Didn't your spin_unlock_wait() patches teach you anything? Checking
> state is fundamentally different from taking the lock. Even a trylock.

That was an embarrassing bug, no two ways about it.  :-/

> I guess you could try with the irqsave versions. But no, we're not doing that.

Again, no need in this case.

But I agree with Will's assessment of this function...

The raw_spin_is_locked() looks to be asking if -any- CPU holds the
dbg_slave_lock, and the answer could of course change immediately
on return from raw_spin_is_locked().  Perhaps the theory is that
if other CPU holds the lock, this CPU is supposed to be subjected to
kgdb_roundup_cpus().  Except that the CPU that held dbg_slave_lock might
be just about to release that lock.  Odd.

Seems like there should be a get_online_cpus() somewhere, but maybe
that constraint is to be manually enforced.

Thanx, Paul

Re: [PATCH 1/2] selftests: ftrace: Do not failure if there is unsupported tests

2017-07-03 Thread Masami Hiramatsu

On Mon, 3 Jul 2017 11:59:06 -0400
Steven Rostedt  wrote:

> On Tue, 4 Jul 2017 00:52:32 +0900
> Masami Hiramatsu  wrote:
> > 
> > > 
> > > Can we add an option in kselftest, or to ftracetest that decides if
> > > unsupported is a failure or not? Otherwise I can not ack this patch.  
> > 
> > I would rather like to add an option to ftracetest instead of
> > kselftest, because whether the tested feature should be supported
> > or not is hard to decide from testing framework. It should be
> > checked by manual.
> 
> Can we do both? That is, add an option to have ftracetest not fail on
> "unsupported" but have it fail by default. We can have kselftest just
> pass in a parameter to ftracetest that has unsupported not fail?

I rather like to treat unsupported as success (or XFAIL) by default
and add "--fail-unsupported" option.

Thank you,

> But if that is too difficult, then I can live with modifying my test
> case to add the option.
> 
> -- Steve


-- 
Masami Hiramatsu

Re: [PATCH] aic7xxx: fix firmware build with O=path

2017-07-03 Thread Jakub Kicinski

On Wed, 28 Jun 2017 21:17:16 -0700, Jakub Kicinski wrote:
> Building firmware with O=path was apparently broken in aic7 for ever.
> Message of the previous commit to the Makefile (from 2008) mentions
> this unfortunate state of affairs already.  Fix this, mostly to make
> randconfig builds more reliable.
> 
> Signed-off-by: Jakub Kicinski 

Did anyone have a chance to look at this one?  It would be nice if it
could make its way into 4.13 :)

Re: [PATCH] mm: larger stack guard gap, between vmas

2017-07-03 Thread Andy Lutomirski

On Mon, Jul 3, 2017 at 4:55 PM, Ben Hutchings  wrote:
> On Wed, 2017-06-21 at 11:47 +0100, Ben Hutchings wrote:
>> On Wed, 2017-06-21 at 11:24 +0200, Michal Hocko wrote:
>> > On Wed 21-06-17 02:38:21, Ben Hutchings wrote:
>> > > On Mon, 2017-06-19 at 16:23 +0200, Willy Tarreau wrote:
>> > > > On Mon, Jun 19, 2017 at 08:44:24PM +0800, Linus Torvalds wrote:
>> > > > > The distros are in a different situation and don't have that
>> > > > > two-week
>> > > > > window until a release, and presumably would not want to cut
>> > > > > over to
>> > > > > something new and fairly untested on such short notice.
>> > > > >
>> > > > > The timing for this all sucks, but if somebody has some final
>> > > > > comments, please speak up now..
>> > > >
>> > > > What do you suggest the stable maintainers do here ? I've just
>> > > > backported
>> > > > this patch back to 3.10 and could boot it on i386 where it
>> > > > apparently
>> > > > works. But we may need more tests. On the other hand we benefit
>> > > > from the
>> > > > automated tests on tens of platforms when we push the queues so
>> > > > at least
>> > > > we'll quickly know if it builds and boots. I just don't feel
>> > > > confident in
>> > > > my work just because it builds and boots, you know.
>> > > >
>> > > > I'm appending the patches I currently have if anyone wants to
>> > > > have a
>> > > > glance. Ben, 3.2 requires much more changes than 3.10 and I'm
>> > > > pretty
>> > > > sure you won't change your patches at the last minute so I gave
>> > > > up.
>> > >
>> > > Well I'm now dealing with fall-out from the Debian stable updates,
>> > > which used a backport of Michal's patch series.  That unfortunately
>> > > seems to break programs running Java code in the main thread (the
>> > > 'java' command doesn't do this, but e.g. 'jsvc' does).
>> >
>> > Could you share more details please?
>>
>> https://bugs.debian.org/865303
>> https://bugs.debian.org/865311
>> https://bugs.debian.org/865343
>
> Unfortunately these regressions have not been completely fixed by
> switching to Hugh's fix.
>
> Firstly, some Rust programs are crashing on ppc64el with 64 KiB pages.
> Apparently Rust maps its own guard page at the lower limit of the stack
> (determined using pthread_getattr_np() and pthread_attr_getstack()).  I
> don't think this ever actually worked for the main thread stack, but it
> now also blocks expansion as the default stack size of 8 MiB is smaller
> than the stack gap of 16 MiB.  Would it make sense to skip over
> PROT_NONE mappings when checking whether it's safe to expand?

That change makes sense to me.

Re: [PATCH] mm: larger stack guard gap, between vmas

2017-07-03 Thread Linus Torvalds

On Mon, Jul 3, 2017 at 4:55 PM, Ben Hutchings  wrote:
>
> Firstly, some Rust programs are crashing on ppc64el with 64 KiB pages.
> Apparently Rust maps its own guard page at the lower limit of the stack
> (determined using pthread_getattr_np() and pthread_attr_getstack()).  I
> don't think this ever actually worked for the main thread stack, but it
> now also blocks expansion as the default stack size of 8 MiB is smaller
> than the stack gap of 16 MiB.  Would it make sense to skip over
> PROT_NONE mappings when checking whether it's safe to expand?

Hmm. Maybe.

Also, the whole notion that the gap should be relative to the page
size never made sense to me. So I think we could/should just make the
default gap size be one megabyte, not that "256 pages" abortion.

> Secondly, LibreOffice is crashing on i386 when running components
> implemented in Java.  I don't have a diagnosis for this yet.

Ugh. Nobody seeing this inside SuSe/Red Hat? I don't think I've heard
about this..

Linus

Re: [GIT pull] irq updates for 4.13

2017-07-03 Thread Linus Torvalds

On Mon, Jul 3, 2017 at 12:42 AM, Thomas Gleixner  wrote:
>
> please pull the latest irq-core-for-linus git tree from:
>
>git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
> irq-core-for-linus

Ugh, this caused conflicts with the block tree, with commits

 - fe631457ff3e: "blk-mq: map all HWQ also in hyperthreaded system"

 - 5f042e7cbd9e "blk-mq: Include all present CPUs in the default queue mapping"

clashing.

I'm not at all understanding why that second commit came in through
the irq tree at all, in fact. Very annoying. Why was that not sent
through the block tree? It doesn't seem to have anything fundamentally
to do with irqs, really: it's a driver CPU choice for irq chocie.

Anyway, I absolutely detested that code, and the obvious resolution
was too disgusting to live. So I did an evil merge and moved some
things around in the merge to make it at least not cause me to dig my
eyes out.

But I'd like people to look at that - not so much due to the evil
merge itself (but check that too, by any means), but just because the
code seems fundamentally broken for the hotplug case. We end up
picking a possible metric shit-ton of CPU's for queue 0, if they were
"possible but not online".

If they ever do come online, does that get fixed? I don't know.
Somebody should check.

Linus

Re: [PATCH] mm: larger stack guard gap, between vmas

2017-07-03 Thread Ben Hutchings

On Wed, 2017-06-21 at 11:47 +0100, Ben Hutchings wrote:
> On Wed, 2017-06-21 at 11:24 +0200, Michal Hocko wrote:
> > On Wed 21-06-17 02:38:21, Ben Hutchings wrote:
> > > On Mon, 2017-06-19 at 16:23 +0200, Willy Tarreau wrote:
> > > > On Mon, Jun 19, 2017 at 08:44:24PM +0800, Linus Torvalds wrote:
> > > > > The distros are in a different situation and don't have that
> > > > > two-week
> > > > > window until a release, and presumably would not want to cut
> > > > > over to
> > > > > something new and fairly untested on such short notice.
> > > > > 
> > > > > The timing for this all sucks, but if somebody has some final
> > > > > comments, please speak up now..
> > > > 
> > > > What do you suggest the stable maintainers do here ? I've just
> > > > backported
> > > > this patch back to 3.10 and could boot it on i386 where it
> > > > apparently
> > > > works. But we may need more tests. On the other hand we benefit
> > > > from the
> > > > automated tests on tens of platforms when we push the queues so
> > > > at least
> > > > we'll quickly know if it builds and boots. I just don't feel
> > > > confident in
> > > > my work just because it builds and boots, you know.
> > > > 
> > > > I'm appending the patches I currently have if anyone wants to
> > > > have a
> > > > glance. Ben, 3.2 requires much more changes than 3.10 and I'm
> > > > pretty
> > > > sure you won't change your patches at the last minute so I gave
> > > > up.
> > > 
> > > Well I'm now dealing with fall-out from the Debian stable updates,
> > > which used a backport of Michal's patch series.  That unfortunately
> > > seems to break programs running Java code in the main thread (the
> > > 'java' command doesn't do this, but e.g. 'jsvc' does).
> > 
> > Could you share more details please?
> 
> https://bugs.debian.org/865303
> https://bugs.debian.org/865311
> https://bugs.debian.org/865343

Unfortunately these regressions have not been completely fixed by
switching to Hugh's fix.

Firstly, some Rust programs are crashing on ppc64el with 64 KiB pages. 
Apparently Rust maps its own guard page at the lower limit of the stack
(determined using pthread_getattr_np() and pthread_attr_getstack()).  I
don't think this ever actually worked for the main thread stack, but it
now also blocks expansion as the default stack size of 8 MiB is smaller
than the stack gap of 16 MiB.  Would it make sense to skip over
PROT_NONE mappings when checking whether it's safe to expand?

Secondly, LibreOffice is crashing on i386 when running components
implemented in Java.  I don't have a diagnosis for this yet.

Ben.

-- 
Ben Hutchings
The world is coming to an end.  Please log off.



signature.asc
Description: This is a digitally signed message part

Re: [PATCH 1/5] mm/persistent-memory: match IORES_DESC name and enum memory_type one

2017-07-03 Thread Dan Williams

On Mon, Jul 3, 2017 at 2:14 PM, Jérôme Glisse  wrote:
> Use consistent name between IORES_DESC and enum memory_type, rename
> MEMORY_DEVICE_PUBLIC to MEMORY_DEVICE_PERSISTENT. This is to free up
> the public name for CDM (cache coherent device memory) for which the
> term public is a better match.
>
> Signed-off-by: Jérôme Glisse 
> Cc: Dan Williams 
> Cc: Ross Zwisler 
> ---
>  include/linux/memremap.h | 4 ++--
>  kernel/memremap.c| 2 +-
>  2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index 57546a07a558..2299cc2d387d 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -41,7 +41,7 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned 
> long memmap_start)
>   * Specialize ZONE_DEVICE memory into multiple types each having differents
>   * usage.
>   *
> - * MEMORY_DEVICE_PUBLIC:
> + * MEMORY_DEVICE_PERSISTENT:
>   * Persistent device memory (pmem): struct page might be allocated in 
> different
>   * memory and architecture might want to perform special actions. It is 
> similar
>   * to regular memory, in that the CPU can access it transparently. However,
> @@ -59,7 +59,7 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned 
> long memmap_start)
>   * include/linux/hmm.h and Documentation/vm/hmm.txt.
>   */
>  enum memory_type {
> -   MEMORY_DEVICE_PUBLIC = 0,
> +   MEMORY_DEVICE_PERSISTENT = 0,
> MEMORY_DEVICE_PRIVATE,
>  };
>
> diff --git a/kernel/memremap.c b/kernel/memremap.c
> index b9baa6c07918..e82456c39a6a 100644
> --- a/kernel/memremap.c
> +++ b/kernel/memremap.c
> @@ -350,7 +350,7 @@ void *devm_memremap_pages(struct device *dev, struct 
> resource *res,
> }
> pgmap->ref = ref;
> pgmap->res = &page_map->res;
> -   pgmap->type = MEMORY_DEVICE_PUBLIC;
> +   pgmap->type = MEMORY_DEVICE_PERSISTENT;
> pgmap->page_fault = NULL;
> pgmap->page_free = NULL;
> pgmap->data = NULL;

I think we need a different name. There's nothing "persistent" about
the devm_memremap_pages() path. Why can't they share name, is the only
difference coherence? I'm thinking something like:

MEMORY_DEVICE_PRIVATE
MEMORY_DEVICE_COHERENT /* persistent memory and coherent devices */
MEMORY_DEVICE_IO /* "public", but not coherent */

linux-next: btrfs merge resolution

2017-07-03 Thread Stephen Rothwell

Hi all,

Since the block tree has been merged into Linus' tree, here is the
merge resolution from my merge of the btrfs-kdave tree today:

36c639d65d6a6e596d73ccf285adf9fffc9097b8
diff --cc fs/btrfs/disk-io.c
index 6036d15b47b8,b6758892874f..7065201bedcf
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -87,9 -87,8 +87,8 @@@ struct btrfs_end_io_wq 
bio_end_io_t *end_io;
void *private;
struct btrfs_fs_info *info;
 -  int error;
 +  blk_status_t status;
enum btrfs_wq_endio_type metadata;
-   struct list_head list;
struct btrfs_work work;
  };
  
@@@ -868,10 -867,10 +867,10 @@@ unsigned long btrfs_async_submit_limit(
  static void run_one_async_start(struct btrfs_work *work)
  {
struct async_submit_bio *async;
 -  int ret;
 +  blk_status_t ret;
  
async = container_of(work, struct  async_submit_bio, work);
-   ret = async->submit_bio_start(async->inode, async->bio,
+   ret = async->submit_bio_start(async->private_data, async->bio,
  async->mirror_num, async->bio_flags,
  async->bio_offset);
if (ret)
@@@ -916,19 -915,20 +915,20 @@@ static void run_one_async_free(struct b
kfree(async);
  }
  
- blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
-   struct inode *inode, struct bio *bio, int mirror_num,
-   unsigned long bio_flags, u64 bio_offset,
-   extent_submit_bio_hook_t *submit_bio_start,
-   extent_submit_bio_hook_t *submit_bio_done)
 -int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
++blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio 
*bio,
+   int mirror_num, unsigned long bio_flags,
+   u64 bio_offset, void *private_data,
+   extent_submit_bio_hook_t *submit_bio_start,
+   extent_submit_bio_hook_t *submit_bio_done)
  {
struct async_submit_bio *async;
  
async = kmalloc(sizeof(*async), GFP_NOFS);
if (!async)
 -  return -ENOMEM;
 +  return BLK_STS_RESOURCE;
  
-   async->inode = inode;
+   async->private_data = private_data;
+   async->fs_info = fs_info;
async->bio = bio;
async->mirror_num = mirror_num;
async->submit_bio_start = submit_bio_start;
@@@ -971,12 -971,12 +971,12 @@@ static blk_status_t btree_csum_one_bio(
break;
}
  
 -  return ret;
 +  return errno_to_blk_status(ret);
  }
  
- static blk_status_t __btree_submit_bio_start(struct inode *inode,
-   struct bio *bio, int mirror_num, unsigned long bio_flags,
-   u64 bio_offset)
 -static int __btree_submit_bio_start(void *private_data, struct bio *bio,
++static blk_status_t __btree_submit_bio_start(void *private_data, struct bio 
*bio,
+   int mirror_num, unsigned long bio_flags,
+   u64 bio_offset)
  {
/*
 * when we're called for a write, we're already in the async
@@@ -985,11 -985,12 +985,12 @@@
return btree_csum_one_bio(bio);
  }
  
- static blk_status_t __btree_submit_bio_done(struct inode *inode,
-   struct bio *bio, int mirror_num, unsigned long bio_flags,
-   u64 bio_offset)
 -static int __btree_submit_bio_done(void *private_data, struct bio *bio,
++static blk_status_t __btree_submit_bio_done(void *private_data, struct bio 
*bio,
+int mirror_num, unsigned long bio_flags,
+u64 bio_offset)
  {
+   struct inode *inode = private_data;
 -  int ret;
 +  blk_status_t ret;
  
/*
 * when we're called for a write, we're already in the async
@@@ -1014,13 -1015,14 +1015,14 @@@ static int check_async_write(unsigned l
return 1;
  }
  
- static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio 
*bio,
 -static int btree_submit_bio_hook(void *private_data, struct bio *bio,
++static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
 int mirror_num, unsigned long bio_flags,
 u64 bio_offset)
  {
+   struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int async = check_async_write(bio_flags);
 -  int ret;
 +  blk_status_t ret;
  
if (bio_op(bio) != REQ_OP_WRITE) {
/*
@@@ -3490,59 -3485,57 +3485,57 @@@ static void btrfs_end_empty_barrier(str
  }
  
  /*
-  * trigger flushes for one the devices.  If you pass wait == 0, the flushes 
are
-  * sent down.  With wait == 1, it waits for the previous flush.
-  *
-  * any device where the flush fails with eopnotsupp are flagged as not-barrier
-  * capable
+  * Submit a flush request to the device if it supports it. Error

Re: [PATCH] KVM: arm/arm64: Handle hva aging while destroying the vm

2017-07-03 Thread Andrea Arcangeli

Hello,

On Mon, Jul 03, 2017 at 10:48:03AM +0200, Alexander Graf wrote:
> On 07/03/2017 10:03 AM, Christoffer Dall wrote:
> > Hi Alex,
> >
> > On Fri, Jun 23, 2017 at 05:21:59PM +0200, Alexander Graf wrote:
> >> If we want to age an HVA while the VM is getting destroyed, we have a
> >> tiny race window during which we may end up dereferencing an invalid
> >> kvm->arch.pgd value.
> >>
> >> CPU0   CPU1
> >>
> >> kvm_age_hva()
> >>kvm_mmu_notifier_release()
> >>kvm_arch_flush_shadow_all()
> >>kvm_free_stage2_pgd()
> >>
> >> stage2_get_pmd()
> >> 
> >>set kvm->arch.pgd = 0
> >>
> >> 
> >> stage2_get_pud()
> >> arch.pgd>
> >> 
> > I don't think this sequence, can happen, but I think kvm_age_hva() can
> > be called with the mmu_lock held and kvm->pgd already being NULL.
> >
> > Is that possible for the mmu notifiers to be calling clear(_flush)_young
> > while also calling notifier_release?
> 
> I *think* the aging happens completely orthogonally to release. But 
> let's ask Andrea - I'm sure he knows :).

I think the sequence can happen. All mmu notifier methods are flushed
out of CPUs only through synchronize_srcu() which is called as the
last step in __mmu_notifier_release/unregister. Only after _unregister
returns you're sure kvm_age_hva cannot run anymore, until that point
it can still run. Even during exit_mmap->mmu_notifier_release it can
still run if invoked through rmap walks.

So while the ->release method runs, all other mmu notifier methods
could be still invoked concurrently.

mmu notifier methods are only protected by srcu to prevent the mmu
notifier structure to be freed from under them, but there's no
additional locking to serialize them (except for the synchronize_srcu
that happens as the last step of mmu_notifier_release/unregister, well
after they may have called the ->release method).

There's also a comment about it in __mmu_notifier_release:

 * runs with mm_users == 0. Other tasks may still invoke mmu notifiers
 * in parallel despite there being no task using this mm any more,
 * through the vmas outside of the exit_mmap context, such as with
 * vmtruncate. This serializes against mmu_notifier_unregister with

And in the mmu_notifier_unregister too:

 * calling mmu_notifier_unregister. ->release or any other notifier
 * method may be invoked concurrently with mmu_notifier_unregister,
 * and only after mmu_notifier_unregister returned we're guaranteed
 * that ->release or any other method can't run anymore.

Even ->release could in theory run concurrently against itself if
mmu_notifier_unregister runs concurrently with mmu_notifier_release
but that's purely theoretical possibility.

Thanks,
Andrea

[GIT PULL] security subsystem updates for v4.13

2017-07-03 Thread James Morris

Hi Linus,

- This update includes a major update for AppArmor. From JJ: 

" * several bug fixes and cleanups
  * the patch to add symlink support to securityfs that was floated on
the list earlier and the apparmorfs changes that make use of
securityfs symlinks
  * it introduces the domain labeling base code that Ubuntu has been
carrying for several years, with several cleanups applied. And it
converts the current mediation over to using the domain labeling
base, which brings domain stacking support with it. This finally
will bring the base upstream code in line with Ubuntu and provide a
base to upstream the new feature work that Ubuntu carries.

  This request does not contain any of the newer apparmor mediation
  features/controls (mount, signals, network, keys, ...) that Ubuntu is
  currently carrying, all of which will be RFC'd on top of this.  "

- Notable also is the Infiniband work in SELinux, and the new file:map 
permission.  From Paul:

" While we're down to 21 patches for v4.13 (it was 31 for v4.12), the
  diffstat jumps up tremendously with over 2k of line changes.  Almost  
  all of these changes are the SELinux/IB work done by Daniel Jurgens;
  some other noteworthy changes include a NFS v4.2 labeling fix, a new
  file:map permission, and reporting of policy capabilities on policy
  load.  "

There's also now genfscon labeling support for tracefs, which was lost in 
v4.1 with the separation from debugfs.

- Smack incorporates a safer socket check in file_receive, and adds a 
cap_capable call in privilege check.

- TPM as usual has a bunch of fixes and enhancements.

- Multiple calls to security_add_hooks() can now be made for the same LSM, 
to allow LSMs to have hook declarations across multiple files.

- IMA now supports different "ima_appraise=" modes (eg. log, fix) from the 
  boot command line.

Please pull!


---

The following changes since commit e0f3e8f14da868047c524a0cf11e08b95fd1b008:

  Merge branch 'for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux (2017-07-03 15:39:36 
-0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git next

Andy Shevchenko (1):
  tpm/st33zp24: Switch to devm_acpi_dev_add_driver_gpios()

Ben Hutchings (1):
  IMA: Correct Kconfig dependencies for hash selection

Bryan Freed (1):
  tpm: Apply a sane minimum adapterlimit value for retransmission.

Casey Schaufler (2):
  Smack: Safer check for a socket in file_receive
  Smack: Use cap_capable in privilege check

Dan Carpenter (1):
  apparmor: Fix error cod in __aa_fs_profile_mkdir()

Daniel Jurgens (9):
  IB/core: IB cache enhancements to support Infiniband security
  IB/core: Enforce PKey security on QPs
  selinux lsm IB/core: Implement LSM notification system
  IB/core: Enforce security on management datagrams
  selinux: Create policydb version for Infiniband support
  selinux: Allocate and free infiniband security hooks
  selinux: Implement Infiniband PKey "Access" access vector
  selinux: Add IB Port SMP access vector
  selinux: Add a cache for quicker retreival of PKey SIDs

Eric Richter (1):
  IMA: update IMA policy documentation to include pcr= option

Florian Westphal (2):
  smack: use pernet operations for hook registration
  selinux: use pernet operations for hook registration

Geert Uytterhoeven (1):
  security: Grammar s/allocates/allocated/

Geliang Tang (1):
  ima: use memdup_user_nul

Gustavo A. R. Silva (1):
  tpm/tpm_atmel: remove unnecessary NULL check

James Morris (4):
  Sync to mainline for security submaintainers to work against
  Merge branch 'smack-for-4.13' of git://github.com/cschaufler/smack-next 
into next
  Merge branch 'stable-4.13' of 
git://git.infradead.org/users/pcmoore/selinux into next
  Merge tag 'seccomp-next' of git://git.kernel.org/.../kees/linux into next

Jarkko Sakkinen (3):
  tpm: fix byte order related arithmetic inconsistency in tpm_getcap()
  tpm, tpm_infineon: remove useless snprintf() calls
  tpm: remove struct tpm_pcrextend_in

Jason Gunthorpe (3):
  tpm_tis: Fix IRQ autoprobing when using platform_device
  tpm_tis: Use platform_get_irq
  tpm_tis: Consolidate the platform and acpi probe flow

Jeff Vander Stoep (1):
  selinux: enable genfscon labeling for tracefs

John Johansen (59):
  apparmor: move file context into file.h
  apparmor: make internal lib fn skipn_spaces available to the rest of 
apparmor
  apparmor: allow profiles to provide info to disconnected paths
  apparmor: Move path lookup to using preallocated buffers
  securityfs: add the ability to support symlinks
  apparmor: move to per loaddata files, instead of replicating in profiles
  apparmor: use macro template to simplify profile seq_files
  apparmor: use macro template to simplify namespace seq_files
  apparmor: add cu

[PATCH] staging: comedi: Use offset_in_page macro

2017-07-03 Thread Amitoj Kaur Chawla

Use offset_in_page macro instead of (var & ~PAGE_MASK)

The Coccinelle semantic patch used to make this change is as follows:
// 
@@
unsigned long p;
@@
- p & ~PAGE_MASK
+ offset_in_page(p)
// 

Signed-off-by: Amitoj Kaur Chawla 
---
 drivers/staging/comedi/comedi_buf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/comedi/comedi_buf.c 
b/drivers/staging/comedi/comedi_buf.c
index 8e9b30b..b455ff6 100644
--- a/drivers/staging/comedi/comedi_buf.c
+++ b/drivers/staging/comedi/comedi_buf.c
@@ -165,7 +165,7 @@ int comedi_buf_map_put(struct comedi_buf_map *bm)
 int comedi_buf_map_access(struct comedi_buf_map *bm, unsigned long offset,
  void *buf, int len, int write)
 {
-   unsigned int pgoff = offset & ~PAGE_MASK;
+   unsigned int pgoff = offset_in_page(offset);
unsigned long pg = offset >> PAGE_SHIFT;
int done = 0;
 
-- 
2.7.4

[PATCH] f2fs: avoid migratepage for atomic written page

2017-07-03 Thread Jaegeuk Kim

In order to avoid lock contention for atomic written pages, we'd better give
EAGAIN in f2fs_migrate_page. We expect it will be released soon as transaction
commits.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/data.c | 35 ++-
 1 file changed, 10 insertions(+), 25 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d58b81213a86..1458e3a6d630 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2197,41 +2197,26 @@ static sector_t f2fs_bmap(struct address_space 
*mapping, sector_t block)
 int f2fs_migrate_page(struct address_space *mapping,
struct page *newpage, struct page *page, enum migrate_mode mode)
 {
-   int rc, extra_count;
-   struct f2fs_inode_info *fi = F2FS_I(mapping->host);
-   bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);
+   int rc;
 
-   BUG_ON(PageWriteback(page));
-
-   /* migrating an atomic written page is safe with the inmem_lock hold */
-   if (atomic_written && !mutex_trylock(&fi->inmem_lock))
+   /*
+* We'd better return EAGAIN for atomic pages, which will be committed
+* sooner or later. Don't botter transactions with inmem_lock.
+*/
+   if (IS_ATOMIC_WRITTEN_PAGE(page))
return -EAGAIN;
 
+   BUG_ON(PageWriteback(page));/* Writeback must be complete */
+
/*
 * A reference is expected if PagePrivate set when move mapping,
 * however F2FS breaks this for maintaining dirty page counts when
 * truncating pages. So here adjusting the 'extra_count' make it work.
 */
-   extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
rc = migrate_page_move_mapping(mapping, newpage,
-   page, NULL, mode, extra_count);
-   if (rc != MIGRATEPAGE_SUCCESS) {
-   if (atomic_written)
-   mutex_unlock(&fi->inmem_lock);
+   page, NULL, mode, (page_has_private(page) ? -1 : 0));
+   if (rc != MIGRATEPAGE_SUCCESS)
return rc;
-   }
-
-   if (atomic_written) {
-   struct inmem_pages *cur;
-   list_for_each_entry(cur, &fi->inmem_pages, list)
-   if (cur->page == page) {
-   cur->page = newpage;
-   break;
-   }
-   mutex_unlock(&fi->inmem_lock);
-   put_page(page);
-   get_page(newpage);
-   }
 
if (PagePrivate(page))
SetPagePrivate(newpage);
-- 
2.13.0.rc1.294.g07d810a77f-goog

Re: [PATCH 8/9] RISC-V: User-facing API

2017-07-03 Thread James Hogan

On Thu, Jun 29, 2017 at 02:42:38PM -0700, Palmer Dabbelt wrote:
> On Wed, 28 Jun 2017 15:42:37 PDT (-0700), james.ho...@imgtec.com wrote:
> > On Wed, Jun 28, 2017 at 11:55:37AM -0700, Palmer Dabbelt wrote:
> >> diff --git a/arch/riscv/include/uapi/asm/ucontext.h 
> >> b/arch/riscv/include/uapi/asm/ucontext.h
> >> new file mode 100644
> >> index ..52eff9febcfd
> >> --- /dev/null
> >> +++ b/arch/riscv/include/uapi/asm/ucontext.h
> > ...
> >> +struct ucontext {
> >> +  unsigned long uc_flags;
> >> +  struct ucontext  *uc_link;
> >> +  stack_t   uc_stack;
> >> +  sigset_t  uc_sigmask;
> >> +  /* glibc uses a 1024-bit sigset_t */
> >> +  __u8  __unused[1024 / 8 - sizeof(sigset_t)];
> >> +  /* last for future expansion */
> >> +  struct sigcontext uc_mcontext;
> >> +};
> >
> > Any particular reason not to use the asm-generic ucontext?
> 
> In the generic ucontext, 'uc_sigmask' is at the end of the structure so it can
> be expanded.  Since we want our mcontext to be expandable as well, we
> pre-allocate some expandable space for sigmask and then put mcontext at the
> end.
> 
> We stole this idea from arm64.

Curious. __unused seems like overkill to be honest given that expanding
the number of signals up to 128 causes other issues (as discovered on
MIPS e.g. the waitpid() status, with stopsig not fitting below the exit
code (shift 8) and core dump flag (bit 7)), but perhaps it could be
carefully expanded by splitting the stopsig field.

Looks harmless here I suppose so I defer to others. If it is the
preferred approach does it make sense to make it the "default" for new
architectures at some point?

Cheers
James


signature.asc
Description: Digital signature

Re: [PATCH] vmalloc: respect the GFP_NOIO and GFP_NOFS flags

2017-07-03 Thread Mikulas Patocka



On Mon, 3 Jul 2017, Michal Hocko wrote:

> We can add a warning (or move it from kvmalloc) and hope that the
> respective maintainers will fix those places properly. The reason I
> didn't add the warning to vmalloc and kept it in kvmalloc was to catch
> only new users rather than suddenly splat on existing ones. Note that
> there are users with panic_on_warn enabled.
> 
> Considering how many NOFS users we have in tree I would rather work with
> maintainers to fix them.

So - do you want this patch?

I still believe that the previous patch that pushes 
memalloc_noio/nofs_save into __vmalloc is better than this.

Currently there are 28 __vmalloc callers that use GFP_NOIO or GFP_NOFS, 
three of them already use memalloc_noio_save, 25 don't.

Mikulas

---
 drivers/block/drbd/drbd_bitmap.c|8 +---
 drivers/infiniband/hw/mlx4/qp.c |   21 +
 drivers/infiniband/sw/rdmavt/qp.c   |   19 +--
 drivers/infiniband/ulp/ipoib/ipoib_cm.c |7 +--
 drivers/md/dm-bufio.c   |2 +-
 drivers/mtd/ubi/io.c|   11 +--
 fs/btrfs/free-space-tree.c  |7 ++-
 fs/ext4/super.c |   21 +
 fs/gfs2/dir.c   |   29 +
 fs/gfs2/quota.c |8 ++--
 fs/nfs/blocklayout/extent_tree.c|7 ++-
 fs/ntfs/malloc.h|   11 +--
 fs/ubifs/debug.c|5 -
 fs/ubifs/lprops.c   |5 -
 fs/ubifs/lpt_commit.c   |   10 --
 fs/ubifs/orphan.c   |5 -
 fs/ubifs/ubifs.h|1 +
 fs/xfs/kmem.c   |2 +-
 mm/page_alloc.c |2 +-
 mm/vmalloc.c|6 ++
 net/ceph/ceph_common.c  |   14 --
 21 files changed, 156 insertions(+), 45 deletions(-)

Index: linux-2.6/drivers/block/drbd/drbd_bitmap.c
===
--- linux-2.6.orig/drivers/block/drbd/drbd_bitmap.c
+++ linux-2.6/drivers/block/drbd/drbd_bitmap.c
@@ -26,6 +26,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -408,9 +409,10 @@ static struct page **bm_realloc_pages(st
bytes = sizeof(struct page *)*want;
new_pages = kzalloc(bytes, GFP_NOIO | __GFP_NOWARN);
if (!new_pages) {
-   new_pages = __vmalloc(bytes,
-   GFP_NOIO | __GFP_ZERO,
-   PAGE_KERNEL);
+   unsigned noio;
+   noio = memalloc_noio_save();
+   new_pages = vmalloc(bytes);
+   memalloc_noio_restore(noio);
if (!new_pages)
return NULL;
}
Index: linux-2.6/drivers/infiniband/hw/mlx4/qp.c
===
--- linux-2.6.orig/drivers/infiniband/hw/mlx4/qp.c
+++ linux-2.6/drivers/infiniband/hw/mlx4/qp.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -814,14 +815,26 @@ static int create_qp_common(struct mlx4_
 
qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
gfp | __GFP_NOWARN);
-   if (!qp->sq.wrid)
+   if (!qp->sq.wrid) {
+   unsigned noio;
+   if (!(gfp & __GFP_IO))
+   noio = memalloc_noio_save();
qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
-   gfp, PAGE_KERNEL);
+   gfp | __GFP_FS | __GFP_IO, 
PAGE_KERNEL);
+   if (!(gfp & __GFP_IO))
+   memalloc_noio_restore(noio);
+   }
qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
gfp | __GFP_NOWARN);
-   if (!qp->rq.wrid)
+   if (!qp->rq.wrid) {
+   unsigned noio;
+   if (!(gfp & __GFP_IO))
+   noio = memalloc_noio_save();
qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
-   gfp, PAGE_KERNEL);
+   gfp | __GFP_FS | __GFP_IO, 
PAGE_KERNEL);
+   if (!(gfp & __GFP_IO))
+   memalloc_noio_restore(noio);
+   }
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
Index: linux-2.6/drivers/infiniband/sw/rdmavt/qp.c
===
--- linux-2.6.orig/driver

Re: [PATCH BUGFIX] block, bfq: fix bug causing crashes

2017-07-03 Thread Jens Axboe

On 07/03/2017 02:00 AM, Paolo Valente wrote:
> Hi Jens,
> I'm writing this short cover letter to hopefully help you decide what
> to do with this patch, in this late phase of the development
> cycle. This patch fixes a bug causing kernel crashes for at least
> one year. Crashes apparently affect only a minority of users, but are
> systematic for them (a crash every few tens of minutes for some).

By the time you wrote that email, 4.12 was already released for hours.
So there's really no choice this time but to queue it up for 4.13.

-- 
Jens Axboe

Re: [PATCH RFC 08/26] locking: Remove spin_unlock_wait() generic definitions

2017-07-03 Thread Linus Torvalds

On Mon, Jul 3, 2017 at 3:30 PM, Paul E. McKenney
 wrote:
>
> That certainly is one interesting function, isn't it?  I wonder what
> happens if you replace the raw_spin_is_locked() calls with an
> unlock under a trylock check?  ;-)

Deadlock due to interrupts again?

Didn't your spin_unlock_wait() patches teach you anything? Checking
state is fundamentally different from taking the lock. Even a trylock.

I guess you could try with the irqsave versions. But no, we're not doing that.

Linus

Re: [GIT PULL] s390 patches for 4.13 merge window

2017-07-03 Thread Linus Torvalds

On Mon, Jul 3, 2017 at 2:01 AM, Martin Schwidefsky
 wrote:
>
> please pull from the 'for-linus' branch of
>
> git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git for-linus

So my conflict resolution looks different from the one Stephen posted,
which may be due to various reasons, ranging from "linux-next has
other things that conflict" to just "I didn't notice some semantic
conflict since unlike linux-next I don't build for s390".

Regardless, you should check my current -git tree just to verify, and
send me a patch if I screwed something up.

  Linus

Dear Beloved

2017-07-03 Thread Mrs marios




--
Dear Beloved Friend,

I am Mrs Nicole Benoite Marois and I have been suffering from ovarian 
cancer disease and the doctor says that i have just few weeks to leave. 
I am from (Paris) France but based in Benin republic since eleven years 
ago as a business woman dealing with gold exportation before the death 
of my husband many years ago.


I have $4.5 Million US Dollars at Eco-Bank here in Benin republic and I 
instructed the bank to transfer the fund to you as foreigner that will 
apply to the bank after I have gone, that they should release the fund 
to him/her, but you will assure me that you will take 50% of the fund 
and give 50% to the orphanages home in your country for my heart to 
rest.


Yours fairly friend,
Mrs. Nicole Benoite Marois

Re: [PATCH RFC 08/26] locking: Remove spin_unlock_wait() generic definitions

2017-07-03 Thread Paul E. McKenney

On Mon, Jul 03, 2017 at 06:13:38PM +0100, Will Deacon wrote:
> On Mon, Jul 03, 2017 at 09:40:22AM -0700, Linus Torvalds wrote:
> > On Mon, Jul 3, 2017 at 9:18 AM, Paul E. McKenney
> >  wrote:
> > >
> > > Agreed, and my next step is to look at spin_lock() followed by
> > > spin_is_locked(), not necessarily the same lock.
> > 
> > Hmm. Most (all?) "spin_is_locked()" really should be about the same
> > thread that took the lock (ie it's about asserts and lock debugging).
> > 
> > The optimistic ABBA avoidance pattern for spinlocks *should* be
> > 
> > spin_lock(inner)
> > ...
> > if (!try_lock(outer)) {
> >spin_unlock(inner);
> >.. do them in the right order ..
> > 
> > so I don't think spin_is_locked() should have any memory barriers.
> > 
> > In fact, the core function for spin_is_locked() is arguably
> > arch_spin_value_unlocked() which doesn't even do the access itself.
> 
> Yeah, but there's some spaced-out stuff going on in kgdb_cpu_enter where
> it looks to me like raw_spin_is_locked is used for synchronization. My
> eyes are hurting looking at it, though.

That certainly is one interesting function, isn't it?  I wonder what
happens if you replace the raw_spin_is_locked() calls with an
unlock under a trylock check?  ;-)

Thanx, Paul

[GIT PULL] Audit patches for v4.13

2017-07-03 Thread Paul Moore

Hi Linus,

Things are relatively quiet on the audit front for v4.13, just five
patches for a total diffstat of 102 lines.  There are two patches from
Richard to consistently record the POSIX capabilities and add the
ambient capability information as well.  I also chipped in two patches
to fix a race condition with the auditd tracking code and ensure we
don't skip sending any records to the audit multicast group.  Finally
a single style fix that I accepted because I must have been in a good
mood that day.

Everything passes our test suite, and should be relatively harmless,
please merge for v4.13.

Thanks,
-Paul

---
The following changes since commit 48d0e023af9799cd7220335baf8e3ba61eeafbeb:

 audit: fix the RCU locking for the auditd_connection structure (2017-05-02 10:
16:05 -0400)

are available in the git repository at:

 git://git.infradead.org/users/pcmoore/audit stable-4.13

for you to fetch changes up to cd33f5f2cbfaadc21270f3ddac7c3c33e0a1a28c:

 audit: make sure we never skip the multicast broadcast
   (2017-06-16 11:51:00 -0400)


Derek Robson (1):
 audit: style fix

Paul Moore (2):
 audit: fix a race condition with the auditd tracking code
 audit: make sure we never skip the multicast broadcast

Richard Guy Briggs (2):
 audit: unswing cap_* fields in PATH records
 audit: add ambient capabilities to CAPSET and BPRM_FCAPS records

kernel/audit.c   | 61 +---
kernel/audit.h   | 29 ++-
kernel/auditsc.c | 12 ---
3 files changed, 53 insertions(+), 49 deletions(-)

-- 
paul moore
www.paul-moore.com

Re: [RFC/RFT PATCH 2/4] net: ethernat: ti: cpts: enable irq

2017-07-03 Thread Ivan Khoronzhuk

On Mon, Jul 03, 2017 at 02:31:06PM -0500, Grygorii Strashko wrote:
> 
> 
> On 06/30/2017 08:31 PM, Ivan Khoronzhuk wrote:
> > On Tue, Jun 13, 2017 at 06:16:21PM -0500, Grygorii Strashko wrote:
> >> There are two reasons for this change:
> >> 1) enabling of HW_TS_PUSH events as suggested by Richard Cochran and
> >> discussed in [1]
> >> 2) fixing an TX timestamping miss issue which happens with low speed
> >> ethernet connections and was reproduced on am57xx and am335x boards.
> >> Issue description: With the low Ethernet connection speed CPDMA 
> >> notification
> >> about packet processing can be received before CPTS TX timestamp event,
> >> which is sent when packet actually left CPSW while cpdma notification is
> >> sent when packet pushed in CPSW fifo.  As result, when connection is slow
> >> and CPU is fast enough TX timestamp can be missed and not working properly.
> >>
> >> This patch converts CPTS driver to use IRQ instead of polling in the
> >> following way:
> >>
> >>   - CPTS_EV_PUSH: CPTS_EV_PUSH is used to get current CPTS counter value 
> >> and
> >> triggered from PTP callbacks and cpts_overflow_check() work. With this
> >> change current CPTS counter value will be read in IRQ handler and saved in
> >> CPTS context "cur_timestamp" field. The compeltion event will be signalled 
> >> to the
> >> requestor. The timecounter->read() will just read saved value. Access to
> >> the "cur_timestamp" is protected by mutex "ptp_clk_mutex".
> >>
> >> cpts_get_time:
> >>reinit_completion(&cpts->ts_push_complete);
> >>cpts_write32(cpts, TS_PUSH, ts_push);
> >>wait_for_completion_interruptible_timeout(&cpts->ts_push_complete, HZ);
> >>ns = timecounter_read(&cpts->tc);
> >>
> >> cpts_irq:
> >>case CPTS_EV_PUSH:
> >>cpts->cur_timestamp = lo;
> >>complete(&cpts->ts_push_complete);
> >>
> >> - CPTS_EV_TX: signals when CPTS timestamp is ready for valid TX PTP
> >> packets. The TX timestamp is requested from cpts_tx_timestamp() which is
> >> called for each transmitted packet from NAPI cpsw_tx_poll() callback. With
> >> this change, CPTS event queue will be checked for existing CPTS_EV_TX
> >> event, corresponding to the current TX packet, and if event is not found - 
> >> packet
> >> will be placed in CPTS TX packet queue for later processing. CPTS TX packet
> >> queue will be processed from hi-priority cpts_ts_work() work which is 
> >> scheduled
> >> as from cpts_tx_timestamp() as from CPTS IRQ handler when CPTS_EV_TX event
> >> is received.
> >>
> >> cpts_tx_timestamp:
> >>   check if packet is PTP packet
> >>   try to find corresponding CPTS_EV_TX event
> >> if found: report timestamp
> >> if not found: put packet in TX queue, schedule cpts_ts_work()
> > I've not read patch itself yet, but why schedule is needed if timestamp is 
> > not
> > found? Anyway it is scheduled with irq when timestamp arrives. It's rather 
> > should
> > be scheduled if timestamp is found,
> 
> CPTS IRQ, cpts_ts_work and Net SoftIRQ processing might happen on
> different CPUs, as result - CPTS IRQ will detect TX event and schedule 
> cpts_ts_work on
> one CPU and this work might race with SKB processing in Net SoftIRQ on 
> another, so
> both SKB and CPTS TX event might be queued, but no cpts_ts_work scheduled 
> until
> next CPTS event is received (worst case for cpts_overflow_check period).

Couldn't be better to put packet in TX/RX queue under cpts->lock?
Then, probably, no need to schedule work in rx/tx timestamping and potentially
cpts_ts_work() will not be scheduled twice. I know it makes Irq handler to
wait a little, but it waits anyway while NetSoftIRQ retrieves ts.

> 
> Situation became even more complex on RT kernel where everything is
> executed in kthread contexts.
> 
> > 
> >>
> >> cpts_irq:
> >>   case CPTS_EV_TX:
> >>   put event in CPTS event queue
> >>   schedule cpts_ts_work()
> >>
> >> cpts_ts_work:
> >> for each packet in  CPTS TX packet queue
> >> try to find corresponding CPTS_EV_TX event
> >> if found: report timestamp
> >> if timeout: drop packet
> >>
> >> - CPTS_EV_RX: signals when CPTS timestamp is ready for valid RX PTP
> >> packets. The RX timestamp is requested from cpts_rx_timestamp() which is
> >> called for each received packet from NAPI cpsw_rx_poll() callback. With
> >> this change, CPTS event queue will be checked for existing CPTS_EV_RX
> >> event, corresponding to the current RX packet, and if event is not found - 
> >> packet
> >> will be placed in CPTS RX packet queue for later processing. CPTS RX packet
> >> queue will be processed from hi-priority cpts_ts_work() work which is 
> >> scheduled
> >> as from cpts_rx_timestamp() as from CPTS IRQ handler when CPTS_EV_RX event
> >> is received. cpts_rx_timestamp() has been updated to return failure in case
> >> of RX timestamp processing delaying and, in such cases, caller of
> >> cpts_rx_timestamp() should not call netif_receive_skb().
> > It's much similar to tx path, but

Re: [GIT PULL] x86/asm changes for v4.13

2017-07-03 Thread Ingo Molnar


* Linus Torvalds  wrote:

> On Mon, Jul 3, 2017 at 1:50 AM, Ingo Molnar  wrote:
> >
> > A single commit micro-optimizing short string copies on certain Intel CPUs.
> 
> That's slightly misleading - it's not string copies, it's just regular
> user copies using the "fast string" interface.

Indeed!

> Fixing that up in my merge commit message.

Thanks!

Ingo

[GIT PULL] ACPI updates for v4.13-rc1

2017-07-03 Thread Rafael J. Wysocki

Hi Linus,

Please pull from the tag

 git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git \
 acpi-4.13-rc1

with top-most commit 03471c06d02f80f9338a5df45f2a92ef38ca1233

 Merge branches 'acpi-ec' and 'acpi-video'

on top of commit c0bc126f97fb929b3ae02c1c62322645d70eb408

 Linux 4.12-rc7

to receive ACPI updates for v4.13-rc1.

These mostly update the ACPICA code in the kernel to upstream
revision 20170531 which covers all of the new material from ACPI 6.2,
including new tables (WSMT, HMAT, PPTT), new subtables and definition
changes for some existing tables (BGRT, HEST, SRAT, TPM2, PCCT),
new resource descriptor macros for pin control, support for new
predefined methods (_LSI, _LSR, _LSW, _HMA), fixes and cleanups.

On top of that, an additional ACPICA change from Kees (which also
is upstream already) switches all of the definitions of function
pointer structures in ACPICA to use designated initializers so as
to make the structure layout randomization GCC plugin work with it.

The rest is a few fixes and cleanups in the EC driver, an xpower PMIC
driver update, a new backlight blacklist entry, and update of the
tables configfs interface and a messages formatting cleanup.

Specifics:

 - Update the ACPICA code in the kernel to upstream revision
   revision 20170531 (which covers all of the new material from
   ACPI 6.2) including:
   * Support for the PinFunction(), PinConfig(), PinGroup(),
 PinGroupFunction(), and PinGroupConfig() resource descriptors
 (Mika Westerberg).
   * Support for new subtables in HEST and SRAT, new notify value
 for HEST, header support for TPM2 table changes, and BGRT
 Status field update (Bob Moore).
   * Support for new PCCT subtables (David Box).
   * Support for _LSI, _LSR, _LSW, and _HMA as predefined methods
 (Erik Schmauss).
   * Support for the new WSMT, HMAT, and PPTT tables (Lv Zheng).
   * New UUID values for Processor Properties (Bob Moore).
   * New notify values for memory attributes and graceful shutdown
 (Bob Moore).
   * Fix related to the PCAT_COMPAT MADT flag (Janosch Hildebrand).
   * Resource to AML conversion fix for resources containing GPIOs
 (Mika Westerberg).
   * Disassembler-related updates (Bob Moore, David Box, Erik
 Schmauss).
   * Assorted fixes and cleanups (Bob Moore, Erik Schmauss, Lv Zheng,
 Cao Jin).

 - Modify ACPICA to always use designated initializers for function
   pointer structures to make the structure layout randomization GCC
   plugin work with it (Kees Cook).

 - Update the tables configfs interface to unload SSDTs on configfs
   entry removal (Jan Kiszka).

 - Add support for the GPI1 regulator to the xpower PMIC Operation
   Region handler (Hans de Goede).

 - Fix ACPI EC issues related to conflicting EC definitions in the
   ECDT and in the ACPI namespace (Lv Zheng, Carlo Caione, Chris
   Chiu).

 - Fix an interrupt storm issue in the EC driver and make its debug
   output work with dynamic debug as expected (Lv Zheng).

 - Add ACPI backlight quirk for Dell Precision 7510 (Shih-Yuan Lee).

 - Fix whitespace in pr_fmt() to align log entries properly in some
   places in the ACPI subsystem (Vincent Legoll).

Thanks!


---

Bob Moore (25):
  ACPICA: Add new notify value for memory attributes update
  ACPICA: Added two new UUID values
  ACPICA: Utilities: Make a notify value reserved
  ACPICA: Update Status field for BGRT table
  ACPICA: Add new notify value for HEST table
  ACPICA: Add new flags to HEST subtables
  ACPICA: Add support for new HEST subtable
  ACPICA: Add support for new SRAT subtable
  ACPICA: Add header support for TPM2 table changes
  ACPICA: Fix a type value overlap in the AML support file
  ACPICA: Update error message for field beyond buffer case
  ACPICA: Debugger/acpiexec: Cleanup error messages
  ACPICA: Disassembler: Abort on an invalid/unknown AML opcode
  ACPICA: Export the public mutex interfaces
  ACPICA: Remove extraneous status check
  ACPICA: Split resource descriptor decode strings to a new file
  ACPICA: Comment update: spelling/format. No functional change
  ACPICA: Fix for Device/Thermal objects with ObjectType and DerefOf
  ACPICA: Update two error messages to emit control method name
  ACPICA: Improvements for debug output only
  ACPICA: Unix application OSL: Correctly handle control-c (EINTR)
  ACPICA: Simplify output for the ACPI Debug Object
  ACPICA: acpiexec: enhance local signal handler
  ACPICA: Update a couple of debug output messages
  ACPICA: Update version to 20170531

Cao Jin (1):
  ACPICA: Update comments, no functional change

Carlo Caione (1):
  ACPI / EC: Add quirk for GL720VMK

Chris Chiu (1):
  ACPI / EC: Fix media keys not working problem on some Asus laptops

David E. Box (2):
  ACPICA: disassembler: improve Switch support
  ACPICA: Add support for new PCCT subtables

Erik Schmauss (11):
  AC

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1033 matches

Mail list logo