RE: [RESEND PATCH] tee: add kernel internal client interface

2018-08-13 Thread Zengtao (B)
Hi jens:

Actually, we have already used the kernel client api in our product(poplar 
board).
Thank you for the upstream.

Tested-by: Zeng Tao 

Regards
Zengtao 

>-Original Message-
>From: Jens Wiklander [mailto:jens.wiklan...@linaro.org]
>Sent: Monday, July 09, 2018 2:16 PM
>To: linux-kernel@vger.kernel.org; linux-arm-ker...@lists.infradead.org;
>tee-...@lists.linaro.org
>Cc: Zengtao (B) ; Victor Chong
>; Jerome Forissier ;
>Jens Wiklander 
>Subject: [RESEND PATCH] tee: add kernel internal client interface
>
>Adds a kernel internal TEE client interface to be used by other drivers.
>
>Signed-off-by: Jens Wiklander 
>---
> drivers/tee/tee_core.c  | 113 +---
> include/linux/tee_drv.h |  73 ++
> 2 files changed, 179 insertions(+), 7 deletions(-)
>
>diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index
>dd46b758852a..7b2bb4c50058 100644
>--- a/drivers/tee/tee_core.c
>+++ b/drivers/tee/tee_core.c
>@@ -38,15 +38,13 @@ static DEFINE_SPINLOCK(driver_lock);  static struct
>class *tee_class;  static dev_t tee_devt;
>
>-static int tee_open(struct inode *inode, struct file *filp)
>+static struct tee_context *teedev_open(struct tee_device *teedev)
> {
>   int rc;
>-  struct tee_device *teedev;
>   struct tee_context *ctx;
>
>-  teedev = container_of(inode->i_cdev, struct tee_device, cdev);
>   if (!tee_device_get(teedev))
>-  return -EINVAL;
>+  return ERR_PTR(-EINVAL);
>
>   ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
>   if (!ctx) {
>@@ -57,16 +55,16 @@ static int tee_open(struct inode *inode, struct file *filp)
>   kref_init(>refcount);
>   ctx->teedev = teedev;
>   INIT_LIST_HEAD(>list_shm);
>-  filp->private_data = ctx;
>   rc = teedev->desc->ops->open(ctx);
>   if (rc)
>   goto err;
>
>-  return 0;
>+  return ctx;
> err:
>   kfree(ctx);
>   tee_device_put(teedev);
>-  return rc;
>+  return ERR_PTR(rc);
>+
> }
>
> void teedev_ctx_get(struct tee_context *ctx) @@ -100,6 +98,18 @@ static
>void teedev_close_context(struct tee_context *ctx)
>   teedev_ctx_put(ctx);
> }
>
>+static int tee_open(struct inode *inode, struct file *filp) {
>+  struct tee_context *ctx;
>+
>+  ctx = teedev_open(container_of(inode->i_cdev, struct tee_device, cdev));
>+  if (IS_ERR(ctx))
>+  return PTR_ERR(ctx);
>+
>+  filp->private_data = ctx;
>+  return 0;
>+}
>+
> static int tee_release(struct inode *inode, struct file *filp)  {
>   teedev_close_context(filp->private_data);
>@@ -928,6 +938,95 @@ void *tee_get_drvdata(struct tee_device *teedev)  }
>EXPORT_SYMBOL_GPL(tee_get_drvdata);
>
>+struct match_dev_data {
>+  struct tee_ioctl_version_data *vers;
>+  const void *data;
>+  int (*match)(struct tee_ioctl_version_data *, const void *); };
>+
>+static int match_dev(struct device *dev, const void *data) {
>+  const struct match_dev_data *match_data = data;
>+  struct tee_device *teedev = container_of(dev, struct tee_device, dev);
>+
>+  teedev->desc->ops->get_version(teedev, match_data->vers);
>+  return match_data->match(match_data->vers, match_data->data); }
>+
>+struct tee_context *
>+tee_client_open_context(struct tee_context *start,
>+  int (*match)(struct tee_ioctl_version_data *,
>+   const void *),
>+  const void *data, struct tee_ioctl_version_data *vers) {
>+  struct device *dev = NULL;
>+  struct device *put_dev = NULL;
>+  struct tee_context *ctx = NULL;
>+  struct tee_ioctl_version_data v;
>+  struct match_dev_data match_data = { vers ? vers : , data, match };
>+
>+  if (start)
>+  dev = >teedev->dev;
>+
>+  do {
>+  dev = class_find_device(tee_class, dev, _data, match_dev);
>+  if (!dev) {
>+  ctx = ERR_PTR(-ENOENT);
>+  break;
>+  }
>+
>+  put_device(put_dev);
>+  put_dev = dev;
>+
>+  ctx = teedev_open(container_of(dev, struct tee_device, dev));
>+  } while (IS_ERR(ctx) && PTR_ERR(ctx) != -ENOMEM);
>+
>+  put_device(put_dev);
>+  return ctx;
>+}
>+EXPORT_SYMBOL_GPL(tee_client_open_context);
>+
>+void tee_client_close_context(struct tee_context *ctx) {
>+  teedev_close_context(ctx);
>+}
>+EXPORT_SYMBOL_GPL(tee_client_close_context);
>+
>+void tee_client_get_version(struct tee_context *ctx,
>+  struct tee_ioctl_version_data *vers) {
>+  ctx->teedev->desc->ops->get_version(ctx->teedev, vers); }
>+EXPORT_SYMBOL_GPL(tee_client_get_version);
>+
>+int tee_client_open_session(struct tee_context *ctx,
>+  struct tee_ioctl_open_session_arg *arg,
>+  struct tee_param *param)
>+{
>+  if (!ctx->teedev->desc->ops->open_session)
>+  return -EINVAL;
>+  return 

RE: [RESEND PATCH] tee: add kernel internal client interface

2018-08-13 Thread Zengtao (B)
Hi jens:

Actually, we have already used the kernel client api in our product(poplar 
board).
Thank you for the upstream.

Tested-by: Zeng Tao 

Regards
Zengtao 

>-Original Message-
>From: Jens Wiklander [mailto:jens.wiklan...@linaro.org]
>Sent: Monday, July 09, 2018 2:16 PM
>To: linux-kernel@vger.kernel.org; linux-arm-ker...@lists.infradead.org;
>tee-...@lists.linaro.org
>Cc: Zengtao (B) ; Victor Chong
>; Jerome Forissier ;
>Jens Wiklander 
>Subject: [RESEND PATCH] tee: add kernel internal client interface
>
>Adds a kernel internal TEE client interface to be used by other drivers.
>
>Signed-off-by: Jens Wiklander 
>---
> drivers/tee/tee_core.c  | 113 +---
> include/linux/tee_drv.h |  73 ++
> 2 files changed, 179 insertions(+), 7 deletions(-)
>
>diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index
>dd46b758852a..7b2bb4c50058 100644
>--- a/drivers/tee/tee_core.c
>+++ b/drivers/tee/tee_core.c
>@@ -38,15 +38,13 @@ static DEFINE_SPINLOCK(driver_lock);  static struct
>class *tee_class;  static dev_t tee_devt;
>
>-static int tee_open(struct inode *inode, struct file *filp)
>+static struct tee_context *teedev_open(struct tee_device *teedev)
> {
>   int rc;
>-  struct tee_device *teedev;
>   struct tee_context *ctx;
>
>-  teedev = container_of(inode->i_cdev, struct tee_device, cdev);
>   if (!tee_device_get(teedev))
>-  return -EINVAL;
>+  return ERR_PTR(-EINVAL);
>
>   ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
>   if (!ctx) {
>@@ -57,16 +55,16 @@ static int tee_open(struct inode *inode, struct file *filp)
>   kref_init(>refcount);
>   ctx->teedev = teedev;
>   INIT_LIST_HEAD(>list_shm);
>-  filp->private_data = ctx;
>   rc = teedev->desc->ops->open(ctx);
>   if (rc)
>   goto err;
>
>-  return 0;
>+  return ctx;
> err:
>   kfree(ctx);
>   tee_device_put(teedev);
>-  return rc;
>+  return ERR_PTR(rc);
>+
> }
>
> void teedev_ctx_get(struct tee_context *ctx) @@ -100,6 +98,18 @@ static
>void teedev_close_context(struct tee_context *ctx)
>   teedev_ctx_put(ctx);
> }
>
>+static int tee_open(struct inode *inode, struct file *filp) {
>+  struct tee_context *ctx;
>+
>+  ctx = teedev_open(container_of(inode->i_cdev, struct tee_device, cdev));
>+  if (IS_ERR(ctx))
>+  return PTR_ERR(ctx);
>+
>+  filp->private_data = ctx;
>+  return 0;
>+}
>+
> static int tee_release(struct inode *inode, struct file *filp)  {
>   teedev_close_context(filp->private_data);
>@@ -928,6 +938,95 @@ void *tee_get_drvdata(struct tee_device *teedev)  }
>EXPORT_SYMBOL_GPL(tee_get_drvdata);
>
>+struct match_dev_data {
>+  struct tee_ioctl_version_data *vers;
>+  const void *data;
>+  int (*match)(struct tee_ioctl_version_data *, const void *); };
>+
>+static int match_dev(struct device *dev, const void *data) {
>+  const struct match_dev_data *match_data = data;
>+  struct tee_device *teedev = container_of(dev, struct tee_device, dev);
>+
>+  teedev->desc->ops->get_version(teedev, match_data->vers);
>+  return match_data->match(match_data->vers, match_data->data); }
>+
>+struct tee_context *
>+tee_client_open_context(struct tee_context *start,
>+  int (*match)(struct tee_ioctl_version_data *,
>+   const void *),
>+  const void *data, struct tee_ioctl_version_data *vers) {
>+  struct device *dev = NULL;
>+  struct device *put_dev = NULL;
>+  struct tee_context *ctx = NULL;
>+  struct tee_ioctl_version_data v;
>+  struct match_dev_data match_data = { vers ? vers : , data, match };
>+
>+  if (start)
>+  dev = >teedev->dev;
>+
>+  do {
>+  dev = class_find_device(tee_class, dev, _data, match_dev);
>+  if (!dev) {
>+  ctx = ERR_PTR(-ENOENT);
>+  break;
>+  }
>+
>+  put_device(put_dev);
>+  put_dev = dev;
>+
>+  ctx = teedev_open(container_of(dev, struct tee_device, dev));
>+  } while (IS_ERR(ctx) && PTR_ERR(ctx) != -ENOMEM);
>+
>+  put_device(put_dev);
>+  return ctx;
>+}
>+EXPORT_SYMBOL_GPL(tee_client_open_context);
>+
>+void tee_client_close_context(struct tee_context *ctx) {
>+  teedev_close_context(ctx);
>+}
>+EXPORT_SYMBOL_GPL(tee_client_close_context);
>+
>+void tee_client_get_version(struct tee_context *ctx,
>+  struct tee_ioctl_version_data *vers) {
>+  ctx->teedev->desc->ops->get_version(ctx->teedev, vers); }
>+EXPORT_SYMBOL_GPL(tee_client_get_version);
>+
>+int tee_client_open_session(struct tee_context *ctx,
>+  struct tee_ioctl_open_session_arg *arg,
>+  struct tee_param *param)
>+{
>+  if (!ctx->teedev->desc->ops->open_session)
>+  return -EINVAL;
>+  return 

[PATCH RFC] Documentation/scheduler/sched-stats.txt: correct jiffies to ns

2018-08-13 Thread Weiping Zhang
The unit of run_daly and rq_cpu_time is ns instead of jiffies.

Signed-off-by: Weiping Zhang 
---
 Documentation/scheduler/sched-stats.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Documentation/scheduler/sched-stats.txt 
b/Documentation/scheduler/sched-stats.txt
index 8259b34a66ae..e7d1f614a1f1 100644
--- a/Documentation/scheduler/sched-stats.txt
+++ b/Documentation/scheduler/sched-stats.txt
@@ -48,9 +48,8 @@ Next two are try_to_wake_up() statistics:
  6) # of times try_to_wake_up() was called to wake up the local cpu
 
 Next three are statistics describing scheduling latency:
- 7) sum of all time spent running by tasks on this processor (in jiffies)
- 8) sum of all time spent waiting to run by tasks on this processor (in
-jiffies)
+ 7) sum of all time spent running by tasks on this processor (in ns)
+ 8) sum of all time spent waiting to run by tasks on this processor (in ns)
  9) # of timeslices run on this cpu
 
 
-- 
2.14.1



[PATCH RFC] Documentation/scheduler/sched-stats.txt: correct jiffies to ns

2018-08-13 Thread Weiping Zhang
The unit of run_daly and rq_cpu_time is ns instead of jiffies.

Signed-off-by: Weiping Zhang 
---
 Documentation/scheduler/sched-stats.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Documentation/scheduler/sched-stats.txt 
b/Documentation/scheduler/sched-stats.txt
index 8259b34a66ae..e7d1f614a1f1 100644
--- a/Documentation/scheduler/sched-stats.txt
+++ b/Documentation/scheduler/sched-stats.txt
@@ -48,9 +48,8 @@ Next two are try_to_wake_up() statistics:
  6) # of times try_to_wake_up() was called to wake up the local cpu
 
 Next three are statistics describing scheduling latency:
- 7) sum of all time spent running by tasks on this processor (in jiffies)
- 8) sum of all time spent waiting to run by tasks on this processor (in
-jiffies)
+ 7) sum of all time spent running by tasks on this processor (in ns)
+ 8) sum of all time spent waiting to run by tasks on this processor (in ns)
  9) # of timeslices run on this cpu
 
 
-- 
2.14.1



Re: [PATCH] remoteproc/davinci: Mark error recovery as disabled

2018-08-13 Thread Sekhar Nori
Hi Bjorn,

On Tuesday 14 August 2018 12:22 AM, Bjorn Andersson wrote:
> On Mon 13 Aug 08:11 PDT 2018, Suman Anna wrote:
> 
>> Hi Bjorn,
>>
>> On 07/23/2018 06:27 PM, Suman Anna wrote:
>>> The Davinci remoteproc driver does not support error recovery at
>>> present, so mark the corresponding remoteproc flag appropriately
>>> so that the debugfs flag shows the value as 'disabled' by default.
>>>
>>> Signed-off-by: Suman Anna 
>>
>> Can you pick up this minor patch for 4.19 please, I do not see this in
>> your rproc-next branch?
>>
> 
> Of course, the patch is applied now, will send my pull request in a few
> days.

Can you also please pick patch 1/4 here:

https://patchwork.kernel.org/patch/10479365/

Thanks,
Sekhar


Re: [PATCH] remoteproc/davinci: Mark error recovery as disabled

2018-08-13 Thread Sekhar Nori
Hi Bjorn,

On Tuesday 14 August 2018 12:22 AM, Bjorn Andersson wrote:
> On Mon 13 Aug 08:11 PDT 2018, Suman Anna wrote:
> 
>> Hi Bjorn,
>>
>> On 07/23/2018 06:27 PM, Suman Anna wrote:
>>> The Davinci remoteproc driver does not support error recovery at
>>> present, so mark the corresponding remoteproc flag appropriately
>>> so that the debugfs flag shows the value as 'disabled' by default.
>>>
>>> Signed-off-by: Suman Anna 
>>
>> Can you pick up this minor patch for 4.19 please, I do not see this in
>> your rproc-next branch?
>>
> 
> Of course, the patch is applied now, will send my pull request in a few
> days.

Can you also please pick patch 1/4 here:

https://patchwork.kernel.org/patch/10479365/

Thanks,
Sekhar


Re: [PATCH v1 10/10] MAINTAINERS: Add entry for Qualcomm TSENS thermal drivers

2018-08-13 Thread Rajendra Nayak



On 8/9/2018 6:02 PM, Amit Kucheria wrote:

Create an entry for the TSENS drivers and mark them as maintained

Signed-off-by: Amit Kucheria 


Thanks Amit for signing up to maintain this driver.

Acked-by: Rajendra Nayak 


---
  MAINTAINERS | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 68b4ff8ed205..ca6183d6d545 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11862,6 +11862,13 @@ L: linux-arm-...@vger.kernel.org
  S:Maintained
  F:drivers/iommu/qcom_iommu.c
  
+QUALCOMM TSENS THERMAL DRIVER

+M: Amit Kucheria 
+L: linux...@vger.kernel.org
+L: linux-arm-...@vger.kernel.org
+S: Maintained
+F: drivers/thermal/qcom/
+
  QUALCOMM VENUS VIDEO ACCELERATOR DRIVER
  M:Stanimir Varbanov 
  L:linux-me...@vger.kernel.org



Re: [PATCH v1 10/10] MAINTAINERS: Add entry for Qualcomm TSENS thermal drivers

2018-08-13 Thread Rajendra Nayak



On 8/9/2018 6:02 PM, Amit Kucheria wrote:

Create an entry for the TSENS drivers and mark them as maintained

Signed-off-by: Amit Kucheria 


Thanks Amit for signing up to maintain this driver.

Acked-by: Rajendra Nayak 


---
  MAINTAINERS | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 68b4ff8ed205..ca6183d6d545 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11862,6 +11862,13 @@ L: linux-arm-...@vger.kernel.org
  S:Maintained
  F:drivers/iommu/qcom_iommu.c
  
+QUALCOMM TSENS THERMAL DRIVER

+M: Amit Kucheria 
+L: linux...@vger.kernel.org
+L: linux-arm-...@vger.kernel.org
+S: Maintained
+F: drivers/thermal/qcom/
+
  QUALCOMM VENUS VIDEO ACCELERATOR DRIVER
  M:Stanimir Varbanov 
  L:linux-me...@vger.kernel.org



Re: [PATCH v3 07/17] x86/pci: add Hygon PCI vendor and northbridge support

2018-08-13 Thread Pu Wen

On 2018/8/14 6:14, Bjorn Helgaas wrote:

On Sat, Aug 11, 2018 at 09:27:42PM +0800, Pu Wen wrote:

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2950223..d0e98a9 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -511,6 +511,8 @@
  #define PCI_DEVICE_ID_AMI_MEGARAID0x9010
  #define PCI_DEVICE_ID_AMI_MEGARAID2   0x9060
  
+#define PCI_VENDOR_ID_HYGON		0x1d94


Please add this entry so pci_ids.h remains sorted by Vendor ID, then Device
ID, as the comment at the top suggests.


Thanks for the suggestion. Will place PCI_VENDOR_ID_HYGON between 
PCI_VENDOR_ID_AMAZON(0x1d0f) and PCI_VENDOR_ID_TEKRAM(0x1de1) in next

patch set.

Pu Wen



With that changed,

Acked-by: Bjorn Helgaas  # pci_ids.h


  #define PCI_VENDOR_ID_AMD 0x1022
  #define PCI_DEVICE_ID_AMD_K8_NB   0x1100
  #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP   0x1101
--
2.7.4





Re: [PATCH v3 07/17] x86/pci: add Hygon PCI vendor and northbridge support

2018-08-13 Thread Pu Wen

On 2018/8/14 6:14, Bjorn Helgaas wrote:

On Sat, Aug 11, 2018 at 09:27:42PM +0800, Pu Wen wrote:

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2950223..d0e98a9 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -511,6 +511,8 @@
  #define PCI_DEVICE_ID_AMI_MEGARAID0x9010
  #define PCI_DEVICE_ID_AMI_MEGARAID2   0x9060
  
+#define PCI_VENDOR_ID_HYGON		0x1d94


Please add this entry so pci_ids.h remains sorted by Vendor ID, then Device
ID, as the comment at the top suggests.


Thanks for the suggestion. Will place PCI_VENDOR_ID_HYGON between 
PCI_VENDOR_ID_AMAZON(0x1d0f) and PCI_VENDOR_ID_TEKRAM(0x1de1) in next

patch set.

Pu Wen



With that changed,

Acked-by: Bjorn Helgaas  # pci_ids.h


  #define PCI_VENDOR_ID_AMD 0x1022
  #define PCI_DEVICE_ID_AMD_K8_NB   0x1100
  #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP   0x1101
--
2.7.4





Re: [PATCH 6/9] platform: goldfish: pipe: Fail compilation if structs are too large

2018-08-13 Thread Joe Perches
On Mon, 2018-08-13 at 20:47 -0700, Roman Kiryanov wrote:
> Hi,
> 
> thank you for reviewing my patches. I decided to put BUILD_BUG_ON
> close to places where it is important that these structs fit into a
> memory page to give some context.

And you make the reader figure out what type dev->buffers is
unnecessarily when sizeof(struct goldfish_pipe_dev_buffers)
is pretty obvious.

> Regards,
> Roman.

cheers, Joe



Re: [PATCH 6/9] platform: goldfish: pipe: Fail compilation if structs are too large

2018-08-13 Thread Joe Perches
On Mon, 2018-08-13 at 20:47 -0700, Roman Kiryanov wrote:
> Hi,
> 
> thank you for reviewing my patches. I decided to put BUILD_BUG_ON
> close to places where it is important that these structs fit into a
> memory page to give some context.

And you make the reader figure out what type dev->buffers is
unnecessarily when sizeof(struct goldfish_pipe_dev_buffers)
is pretty obvious.

> Regards,
> Roman.

cheers, Joe



Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)

2018-08-13 Thread Ravi Bangoria
Hi Song,

On 08/13/2018 10:42 PM, Song Liu wrote:
> On Mon, Aug 13, 2018 at 6:17 AM, Oleg Nesterov  wrote:
>> On 08/13, Ravi Bangoria wrote:
>>>
 But damn, process creation (exec) is trivial. We could add a new 
 uprobe_exec()
 hook and avoid delayed_uprobe_install() in uprobe_mmap().
>>>
>>> I'm sorry. I didn't get this.
>>
>> Sorry for confusion...
>>
>> I meant, if only exec*( could race with _register(), we could add another 
>> uprobe
>> hook which updates all (delayed) counters before return to user-mode.
>>
 Afaics, the really problematic case is dlopen() which can race with 
 _register()
 too, right?
>>>
>>> dlopen() should internally use mmap() right? So what is the problem here? 
>>> Can
>>> you please elaborate.
>>
>> What I tried to say is that we can't avoid 
>> uprobe_mmap()->delayed_uprobe_install()
>> because dlopen() can race with _register() too, just like exec.
>>
>> Oleg.
>>
> 
> How about we do delayed_uprobe_install() per file? Say we keep a list
> of delayed_uprobe
> in load_elf_binary(). Then we can install delayed_uprobe after loading
> all sections of the
> file.

I'm not sure if I totally understood the idea. But how this approach can
solve dlopen() race with _register()?

Rather, making delayed_uprobe_list an mm field seems simple and effective
idea to me. The only overhead will be list_empty(mm->delayed_list) check.

Please let me know if I misunderstood you.

Thanks,
Ravi



Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)

2018-08-13 Thread Ravi Bangoria
Hi Song,

On 08/13/2018 10:42 PM, Song Liu wrote:
> On Mon, Aug 13, 2018 at 6:17 AM, Oleg Nesterov  wrote:
>> On 08/13, Ravi Bangoria wrote:
>>>
 But damn, process creation (exec) is trivial. We could add a new 
 uprobe_exec()
 hook and avoid delayed_uprobe_install() in uprobe_mmap().
>>>
>>> I'm sorry. I didn't get this.
>>
>> Sorry for confusion...
>>
>> I meant, if only exec*( could race with _register(), we could add another 
>> uprobe
>> hook which updates all (delayed) counters before return to user-mode.
>>
 Afaics, the really problematic case is dlopen() which can race with 
 _register()
 too, right?
>>>
>>> dlopen() should internally use mmap() right? So what is the problem here? 
>>> Can
>>> you please elaborate.
>>
>> What I tried to say is that we can't avoid 
>> uprobe_mmap()->delayed_uprobe_install()
>> because dlopen() can race with _register() too, just like exec.
>>
>> Oleg.
>>
> 
> How about we do delayed_uprobe_install() per file? Say we keep a list
> of delayed_uprobe
> in load_elf_binary(). Then we can install delayed_uprobe after loading
> all sections of the
> file.

I'm not sure if I totally understood the idea. But how this approach can
solve dlopen() race with _register()?

Rather, making delayed_uprobe_list an mm field seems simple and effective
idea to me. The only overhead will be list_empty(mm->delayed_list) check.

Please let me know if I misunderstood you.

Thanks,
Ravi



Re: [PATCH 2/2] rtc:rtc-ds1347: Use PTR_ERR_OR_ZERO to replace the open code

2018-08-13 Thread Baruch Siach
Hi zhong,

On Mon, Aug 13, 2018 at 07:31:25PM +0800, zhong jiang wrote:
> PTR_ERR_OR_ZERO has implemented the if(IS_ERR(...)) + PTR_ERR, So
> just replace them rather than duplicating its implement.
> 
> Signed-off-by: zhong jiang 

Acked-by: Baruch Siach 

Thanks,
baruch

> ---
>  drivers/rtc/rtc-ds1347.c | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c
> index 938512c..b97b67f 100644
> --- a/drivers/rtc/rtc-ds1347.c
> +++ b/drivers/rtc/rtc-ds1347.c
> @@ -155,10 +155,7 @@ static int ds1347_probe(struct spi_device *spi)
>   rtc = devm_rtc_device_register(>dev, "ds1347",
>   _rtc_ops, THIS_MODULE);
>  
> - if (IS_ERR(rtc))
> - return PTR_ERR(rtc);
> -
> - return 0;
> + return PTR_ERR_OR_ZERO(rtc);
>  }
>  
>  static struct spi_driver ds1347_driver = {

-- 
 http://baruch.siach.name/blog/  ~. .~   Tk Open Systems
=}ooO--U--Ooo{=
   - bar...@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -


Re: [PATCH 2/2] f2fs: tune discard speed with storage usage rate

2018-08-13 Thread Jaegeuk Kim
On 08/10, Chao Yu wrote:
> Previously, discard speed was fixed mostly, and in high usage rate
> device, we will speed up issuing discard, but it doesn't make sense
> that in a non-full filesystem, we still issue discard with slow speed.

Could you please elaborate the problem in more detail? The speed depends
on how many candidates?

Thanks,

> 
> Anyway, it comes out undiscarded block makes FTL GC be lower efficient
> and causing high lifetime overhead.
> 
> Let's tune discard speed as below:
> 
> a. adjust default issue interval:
>   originalafter
> min_interval: 50ms100ms
> mid_interval: 500ms   1000ms
> max_interval: 6ms 1ms
> 
> b. if last time we stop issuing discard due to IO interruption of user,
> let's reset all {min,mid,max}_interval to default one.
> 
> c. tune {min,mid,max}_interval with below calculation method:
> 
> base_interval = default_interval / 10;
> total_interval = default_interval - base_interval;
> interval = base_interval + total_interval * (100 - dev_util) / 100;
> 
> For example:
> min_interval (:100ms)
> dev_util (%)  interval (ms)
> 0 100
> 1091
> 2082
> 3073
> ...
> 8028
> 9019
> 100   10
> 
> Signed-off-by: Chao Yu 
> ---
>  fs/f2fs/f2fs.h| 11 
>  fs/f2fs/segment.c | 64 +--
>  fs/f2fs/segment.h |  9 +++
>  fs/f2fs/super.c   |  2 +-
>  4 files changed, 67 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 273ffdaf4891..a1dd2e1c3cb9 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -185,10 +185,9 @@ enum {
>  
>  #define MAX_DISCARD_BLOCKS(sbi)  BLKS_PER_SEC(sbi)
>  #define DEF_MAX_DISCARD_REQUEST  8   /* issue 8 discards per 
> round */
> -#define DEF_MIN_DISCARD_ISSUE_TIME   50  /* 50 ms, if exists */
> -#define DEF_MID_DISCARD_ISSUE_TIME   500 /* 500 ms, if device busy */
> -#define DEF_MAX_DISCARD_ISSUE_TIME   6   /* 60 s, if no candidates */
> -#define DEF_DISCARD_URGENT_UTIL  80  /* do more discard over 
> 80% */
> +#define DEF_MIN_DISCARD_ISSUE_TIME   100 /* 100 ms, if exists */
> +#define DEF_MID_DISCARD_ISSUE_TIME   1000/* 1000 ms, if device busy */
> +#define DEF_MAX_DISCARD_ISSUE_TIME   1   /* 1 ms, if no candidates */
>  #define DEF_CP_INTERVAL  60  /* 60 secs */
>  #define DEF_IDLE_INTERVAL5   /* 5 secs */
>  
> @@ -248,7 +247,8 @@ struct discard_entry {
>  };
>  
>  /* default discard granularity of inner discard thread, unit: block count */
> -#define DEFAULT_DISCARD_GRANULARITY  1
> +#define MID_DISCARD_GRANULARITY  16
> +#define MIN_DISCARD_GRANULARITY  1
>  
>  /* max discard pend list number */
>  #define MAX_PLIST_NUM512
> @@ -330,6 +330,7 @@ struct discard_cmd_control {
>   atomic_t discard_cmd_cnt;   /* # of cached cmd count */
>   struct rb_root root;/* root of discard rb-tree */
>   bool rbtree_check;  /* config for consistence check 
> */
> + bool io_interrupted;/* last state of io interrupted 
> */
>  };
>  
>  /* for the list of fsync inodes, used only during recovery */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 8b52e8dfb12f..9564aaf1f27b 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
>  #endif
>  }
>  
> +static void __adjust_discard_speed(unsigned int *interval,
> + unsigned int def_interval, int dev_util)
> +{
> + unsigned int base_interval, total_interval;
> +
> + base_interval = def_interval / 10;
> + total_interval = def_interval - base_interval;
> +
> + /*
> +  * if def_interval = 100, adjusted interval should be in range of
> +  * [10, 100].
> +  */
> + *interval = base_interval + total_interval * (100 - dev_util) / 100;
> +}
> +
> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
> + struct discard_policy *dpolicy)
> +{
> + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> + int dev_util;
> +
> + if (dcc->io_interrupted) {
> + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> + dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> + return;
> + }
> +
> + dev_util = dev_utilization(sbi);
> +
> + __adjust_discard_speed(>min_interval,
> + DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
> + __adjust_discard_speed(>mid_interval,
> + DEF_MID_DISCARD_ISSUE_TIME, dev_util);
> + __adjust_discard_speed(>max_interval,
> + 

Re: [PATCH 2/2] rtc:rtc-ds1347: Use PTR_ERR_OR_ZERO to replace the open code

2018-08-13 Thread Baruch Siach
Hi zhong,

On Mon, Aug 13, 2018 at 07:31:25PM +0800, zhong jiang wrote:
> PTR_ERR_OR_ZERO has implemented the if(IS_ERR(...)) + PTR_ERR, So
> just replace them rather than duplicating its implement.
> 
> Signed-off-by: zhong jiang 

Acked-by: Baruch Siach 

Thanks,
baruch

> ---
>  drivers/rtc/rtc-ds1347.c | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c
> index 938512c..b97b67f 100644
> --- a/drivers/rtc/rtc-ds1347.c
> +++ b/drivers/rtc/rtc-ds1347.c
> @@ -155,10 +155,7 @@ static int ds1347_probe(struct spi_device *spi)
>   rtc = devm_rtc_device_register(>dev, "ds1347",
>   _rtc_ops, THIS_MODULE);
>  
> - if (IS_ERR(rtc))
> - return PTR_ERR(rtc);
> -
> - return 0;
> + return PTR_ERR_OR_ZERO(rtc);
>  }
>  
>  static struct spi_driver ds1347_driver = {

-- 
 http://baruch.siach.name/blog/  ~. .~   Tk Open Systems
=}ooO--U--Ooo{=
   - bar...@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -


Re: [PATCH 2/2] f2fs: tune discard speed with storage usage rate

2018-08-13 Thread Jaegeuk Kim
On 08/10, Chao Yu wrote:
> Previously, discard speed was fixed mostly, and in high usage rate
> device, we will speed up issuing discard, but it doesn't make sense
> that in a non-full filesystem, we still issue discard with slow speed.

Could you please elaborate the problem in more detail? The speed depends
on how many candidates?

Thanks,

> 
> Anyway, it comes out undiscarded block makes FTL GC be lower efficient
> and causing high lifetime overhead.
> 
> Let's tune discard speed as below:
> 
> a. adjust default issue interval:
>   originalafter
> min_interval: 50ms100ms
> mid_interval: 500ms   1000ms
> max_interval: 6ms 1ms
> 
> b. if last time we stop issuing discard due to IO interruption of user,
> let's reset all {min,mid,max}_interval to default one.
> 
> c. tune {min,mid,max}_interval with below calculation method:
> 
> base_interval = default_interval / 10;
> total_interval = default_interval - base_interval;
> interval = base_interval + total_interval * (100 - dev_util) / 100;
> 
> For example:
> min_interval (:100ms)
> dev_util (%)  interval (ms)
> 0 100
> 1091
> 2082
> 3073
> ...
> 8028
> 9019
> 100   10
> 
> Signed-off-by: Chao Yu 
> ---
>  fs/f2fs/f2fs.h| 11 
>  fs/f2fs/segment.c | 64 +--
>  fs/f2fs/segment.h |  9 +++
>  fs/f2fs/super.c   |  2 +-
>  4 files changed, 67 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 273ffdaf4891..a1dd2e1c3cb9 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -185,10 +185,9 @@ enum {
>  
>  #define MAX_DISCARD_BLOCKS(sbi)  BLKS_PER_SEC(sbi)
>  #define DEF_MAX_DISCARD_REQUEST  8   /* issue 8 discards per 
> round */
> -#define DEF_MIN_DISCARD_ISSUE_TIME   50  /* 50 ms, if exists */
> -#define DEF_MID_DISCARD_ISSUE_TIME   500 /* 500 ms, if device busy */
> -#define DEF_MAX_DISCARD_ISSUE_TIME   6   /* 60 s, if no candidates */
> -#define DEF_DISCARD_URGENT_UTIL  80  /* do more discard over 
> 80% */
> +#define DEF_MIN_DISCARD_ISSUE_TIME   100 /* 100 ms, if exists */
> +#define DEF_MID_DISCARD_ISSUE_TIME   1000/* 1000 ms, if device busy */
> +#define DEF_MAX_DISCARD_ISSUE_TIME   1   /* 1 ms, if no candidates */
>  #define DEF_CP_INTERVAL  60  /* 60 secs */
>  #define DEF_IDLE_INTERVAL5   /* 5 secs */
>  
> @@ -248,7 +247,8 @@ struct discard_entry {
>  };
>  
>  /* default discard granularity of inner discard thread, unit: block count */
> -#define DEFAULT_DISCARD_GRANULARITY  1
> +#define MID_DISCARD_GRANULARITY  16
> +#define MIN_DISCARD_GRANULARITY  1
>  
>  /* max discard pend list number */
>  #define MAX_PLIST_NUM512
> @@ -330,6 +330,7 @@ struct discard_cmd_control {
>   atomic_t discard_cmd_cnt;   /* # of cached cmd count */
>   struct rb_root root;/* root of discard rb-tree */
>   bool rbtree_check;  /* config for consistence check 
> */
> + bool io_interrupted;/* last state of io interrupted 
> */
>  };
>  
>  /* for the list of fsync inodes, used only during recovery */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 8b52e8dfb12f..9564aaf1f27b 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
>  #endif
>  }
>  
> +static void __adjust_discard_speed(unsigned int *interval,
> + unsigned int def_interval, int dev_util)
> +{
> + unsigned int base_interval, total_interval;
> +
> + base_interval = def_interval / 10;
> + total_interval = def_interval - base_interval;
> +
> + /*
> +  * if def_interval = 100, adjusted interval should be in range of
> +  * [10, 100].
> +  */
> + *interval = base_interval + total_interval * (100 - dev_util) / 100;
> +}
> +
> +static void __tune_discard_policy(struct f2fs_sb_info *sbi,
> + struct discard_policy *dpolicy)
> +{
> + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
> + int dev_util;
> +
> + if (dcc->io_interrupted) {
> + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
> + dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
> + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
> + return;
> + }
> +
> + dev_util = dev_utilization(sbi);
> +
> + __adjust_discard_speed(>min_interval,
> + DEF_MIN_DISCARD_ISSUE_TIME, dev_util);
> + __adjust_discard_speed(>mid_interval,
> + DEF_MID_DISCARD_ISSUE_TIME, dev_util);
> + __adjust_discard_speed(>max_interval,
> + 

Re: [PATCH 1/2] rtc:rtc-digicolor: Use PTR_ERR_OR_ZERO to replace the open code

2018-08-13 Thread Baruch Siach
Hi zhong,

On Mon, Aug 13, 2018 at 07:31:24PM +0800, zhong jiang wrote:
> PTR_ERR_OR_ZERO has implemented the if(IS_ERR(...)) + PTR_ERR, So
> just replace them rather than duplicating its implement.
> 
> Signed-off-by: zhong jiang 

Acked-by: Baruch Siach 

Thanks,
baruch

> ---
>  drivers/rtc/rtc-digicolor.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/drivers/rtc/rtc-digicolor.c b/drivers/rtc/rtc-digicolor.c
> index b253bf1..fd6850c 100644
> --- a/drivers/rtc/rtc-digicolor.c
> +++ b/drivers/rtc/rtc-digicolor.c
> @@ -202,10 +202,8 @@ static int __init dc_rtc_probe(struct platform_device 
> *pdev)
>   platform_set_drvdata(pdev, rtc);
>   rtc->rtc_dev = devm_rtc_device_register(>dev, pdev->name,
>   _rtc_ops, THIS_MODULE);
> - if (IS_ERR(rtc->rtc_dev))
> - return PTR_ERR(rtc->rtc_dev);
>  
> - return 0;
> + return PTR_ERR_OR_ZERO(rtc->rtc_dev);
>  }
>  
>  static const struct of_device_id dc_dt_ids[] = {

-- 
 http://baruch.siach.name/blog/  ~. .~   Tk Open Systems
=}ooO--U--Ooo{=
   - bar...@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -


Re: [PATCH 1/2] rtc:rtc-digicolor: Use PTR_ERR_OR_ZERO to replace the open code

2018-08-13 Thread Baruch Siach
Hi zhong,

On Mon, Aug 13, 2018 at 07:31:24PM +0800, zhong jiang wrote:
> PTR_ERR_OR_ZERO has implemented the if(IS_ERR(...)) + PTR_ERR, So
> just replace them rather than duplicating its implement.
> 
> Signed-off-by: zhong jiang 

Acked-by: Baruch Siach 

Thanks,
baruch

> ---
>  drivers/rtc/rtc-digicolor.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/drivers/rtc/rtc-digicolor.c b/drivers/rtc/rtc-digicolor.c
> index b253bf1..fd6850c 100644
> --- a/drivers/rtc/rtc-digicolor.c
> +++ b/drivers/rtc/rtc-digicolor.c
> @@ -202,10 +202,8 @@ static int __init dc_rtc_probe(struct platform_device 
> *pdev)
>   platform_set_drvdata(pdev, rtc);
>   rtc->rtc_dev = devm_rtc_device_register(>dev, pdev->name,
>   _rtc_ops, THIS_MODULE);
> - if (IS_ERR(rtc->rtc_dev))
> - return PTR_ERR(rtc->rtc_dev);
>  
> - return 0;
> + return PTR_ERR_OR_ZERO(rtc->rtc_dev);
>  }
>  
>  static const struct of_device_id dc_dt_ids[] = {

-- 
 http://baruch.siach.name/blog/  ~. .~   Tk Open Systems
=}ooO--U--Ooo{=
   - bar...@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -


[PATCH 3/5] fs/locks: allow a lock request to block other requests.

2018-08-13 Thread NeilBrown
Currently, a lock can block pending requests, but all pending
requests are equal.  If lots of pending requests are
mutually exclusive, this means they will all be woken up
and all but one will fail.  This can hurt performance.

So we will allow pending requests to block other requests.
Only the first request will be woken, and it will wake the others.

This patch doesn't implement this fully, but prepares the way.

- It acknowledges that a request might be blocking other requests,
  and when the request is converted to a lock, those blocked
  requests are moved across.
- When a request is requeued or discarded, all blocked requests are
  woken.
- When deadlock-detection looks for the lock which blocks a
  given request, we follow the chain of ->fl_blocker all
  the way to the top.

Signed-off-by: NeilBrown 
---
 fs/locks.c |   43 +++
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index de0b9276f23d..9439eebd4eb9 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -408,9 +408,19 @@ void locks_copy_lock(struct file_lock *new, struct 
file_lock *fl)
fl->fl_ops->fl_copy_lock(new, fl);
}
 }
-
 EXPORT_SYMBOL(locks_copy_lock);
 
+static void locks_move_blocks(struct file_lock *new, struct file_lock *fl)
+{
+   struct file_lock *f;
+
+   spin_lock(_lock_lock);
+   list_splice_init(>fl_blocked, >fl_blocked);
+   list_for_each_entry(f, >fl_blocked, fl_block)
+   f->fl_blocker = new;
+   spin_unlock(_lock_lock);
+}
+
 static inline int flock_translate_cmd(int cmd) {
if (cmd & LOCK_MAND)
return cmd & (LOCK_MAND | LOCK_RW);
@@ -683,11 +693,15 @@ static void __locks_delete_block(struct file_lock *waiter)
 
 static void __locks_wake_up_blocks(struct file_lock *blocker)
 {
+   /* Wake up all requests in blocker->fl_blocked, including
+* requests blocked by those requests.
+*/
while (!list_empty(>fl_blocked)) {
struct file_lock *waiter;
 
waiter = list_first_entry(>fl_blocked,
  struct file_lock, fl_block);
+   list_splice_init(>fl_blocked, >fl_blocked);
__locks_delete_block(waiter);
if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
waiter->fl_lmops->lm_notify(waiter);
@@ -699,6 +713,7 @@ static void __locks_wake_up_blocks(struct file_lock 
*blocker)
 static void locks_delete_block(struct file_lock *waiter)
 {
spin_lock(_lock_lock);
+   __locks_wake_up_blocks(waiter);
__locks_delete_block(waiter);
spin_unlock(_lock_lock);
 }
@@ -714,13 +729,19 @@ static void locks_delete_block(struct file_lock *waiter)
  * blocked_lock_lock in some cases when we see that the fl_blocked list is 
empty.
  */
 static void __locks_insert_block(struct file_lock *blocker,
-   struct file_lock *waiter)
+struct file_lock *waiter)
 {
BUG_ON(!list_empty(>fl_block));
waiter->fl_blocker = blocker;
list_add_tail(>fl_block, >fl_blocked);
if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
locks_insert_global_blocked(waiter);
+
+   /* The requests in waiter->fl_blocked are known to conflict with
+* waiter, but might not conflict with blocker, or the requests
+* and lock which block it.  So they all need to be woken.
+*/
+   __locks_wake_up_blocks(waiter);
 }
 
 /* Must be called with flc_lock held. */
@@ -893,8 +914,11 @@ static struct file_lock *what_owner_is_waiting_for(struct 
file_lock *block_fl)
struct file_lock *fl;
 
hash_for_each_possible(blocked_hash, fl, fl_link, 
posix_owner_key(block_fl)) {
-   if (posix_same_owner(fl, block_fl))
-   return fl->fl_blocker;
+   if (posix_same_owner(fl, block_fl)) {
+   while (fl->fl_blocker)
+   fl = fl->fl_blocker;
+   return fl;
+   }
}
return NULL;
 }
@@ -987,6 +1011,7 @@ static int flock_lock_inode(struct inode *inode, struct 
file_lock *request)
if (request->fl_flags & FL_ACCESS)
goto out;
locks_copy_lock(new_fl, request);
+   locks_move_blocks(new_fl, request);
locks_insert_lock_ctx(new_fl, >flc_flock);
new_fl = NULL;
error = 0;
@@ -1179,6 +1204,7 @@ static int posix_lock_inode(struct inode *inode, struct 
file_lock *request,
goto out;
}
locks_copy_lock(new_fl, request);
+   locks_move_blocks(new_fl, request);
locks_insert_lock_ctx(new_fl, >fl_list);
fl = new_fl;
new_fl = NULL;
@@ -2587,13 +2613,14 @@ void locks_remove_file(struct file *filp)
 int
 posix_unblock_lock(struct file_lock 

[PATCH 5/5] fs/locks: create a tree of dependent requests.

2018-08-13 Thread NeilBrown
When we find an existing lock which conflicts with a request,
and the request wants to wait, we currently add the request
to a list.  When the lock is removed, the whole list is woken.
This can cause the thundering-herd problem.
To reduce the problem, we make use of the (new) fact that
a pending request can itself have a list of blocked requests.
When we find a conflict, we look through the existing blocked requests.
If any one of them blocks the new request, the new request is attached
below that request, otherwise it is added to the list of blocked
requests, which are now known to be mutually non-conflicting.

This way, when the lock is released, only a set of non-conflicting
locks will be woken, the rest can stay asleep.
If the lock request cannot be granted and the request needs to be
requeued, all the other requests it blocks will then be woken

Reported-and-tested-by: Martin Wilck 
Signed-off-by: NeilBrown 
---
 fs/locks.c |   29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index c7a372cebff1..af250afceff4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -727,11 +727,25 @@ static void locks_delete_block(struct file_lock *waiter)
  * fl_blocked list itself is protected by the blocked_lock_lock, but by 
ensuring
  * that the flc_lock is also held on insertions we can avoid taking the
  * blocked_lock_lock in some cases when we see that the fl_blocked list is 
empty.
+ *
+ * Rather than just adding to the list, we check for conflicts with any 
existing
+ * waiters, and add beneath any waiter that blocks the new waiter.
+ * Thus wakeups don't happen until needed.
  */
 static void __locks_insert_block(struct file_lock *blocker,
-struct file_lock *waiter)
+struct file_lock *waiter,
+bool conflict(struct file_lock *,
+  struct file_lock *))
 {
+   struct file_lock *fl;
BUG_ON(!list_empty(>fl_block));
+
+new_blocker:
+   list_for_each_entry(fl, >fl_blocked, fl_block)
+   if (conflict(fl, waiter)) {
+   blocker =  fl;
+   goto new_blocker;
+   }
waiter->fl_blocker = blocker;
list_add_tail(>fl_block, >fl_blocked);
if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
@@ -746,10 +760,12 @@ static void __locks_insert_block(struct file_lock 
*blocker,
 
 /* Must be called with flc_lock held. */
 static void locks_insert_block(struct file_lock *blocker,
-   struct file_lock *waiter)
+  struct file_lock *waiter,
+  bool conflict(struct file_lock *,
+struct file_lock *))
 {
spin_lock(_lock_lock);
-   __locks_insert_block(blocker, waiter);
+   __locks_insert_block(blocker, waiter, conflict);
spin_unlock(_lock_lock);
 }
 
@@ -1008,7 +1024,7 @@ static int flock_lock_inode(struct inode *inode, struct 
file_lock *request)
if (!(request->fl_flags & FL_SLEEP))
goto out;
error = FILE_LOCK_DEFERRED;
-   locks_insert_block(fl, request);
+   locks_insert_block(fl, request, flock_locks_conflict);
goto out;
}
if (request->fl_flags & FL_ACCESS)
@@ -1082,7 +1098,8 @@ static int posix_lock_inode(struct inode *inode, struct 
file_lock *request,
spin_lock(_lock_lock);
if (likely(!posix_locks_deadlock(request, fl))) {
error = FILE_LOCK_DEFERRED;
-   __locks_insert_block(fl, request);
+   __locks_insert_block(fl, request,
+posix_locks_conflict);
}
spin_unlock(_lock_lock);
goto out;
@@ -1555,7 +1572,7 @@ int __break_lease(struct inode *inode, unsigned int mode, 
unsigned int type)
break_time -= jiffies;
if (break_time == 0)
break_time++;
-   locks_insert_block(fl, new_fl);
+   locks_insert_block(fl, new_fl, leases_conflict);
trace_break_lease_block(inode, new_fl);
spin_unlock(>flc_lock);
percpu_up_read_preempt_enable(_rwsem);




[PATCH 4/5] fs/locks: change all *_conflict() functions to return bool.

2018-08-13 Thread NeilBrown
posix_locks_conflict() and flock_locks_conflict() both return int.
leases_conflict() returns bool.

This inconsistency will cause problems for the next patch if not
fixed.

So change posix_locks_conflict() and flock_locks_conflict() to return
bool.
Also change the locks_conflict() helper.

And convert some
   return (foo);
to
   return foo;

Signed-off-by: NeilBrown 
---
 fs/locks.c |   27 +++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 9439eebd4eb9..c7a372cebff1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -803,47 +803,50 @@ locks_delete_lock_ctx(struct file_lock *fl, struct 
list_head *dispose)
 /* Determine if lock sys_fl blocks lock caller_fl. Common functionality
  * checks for shared/exclusive status of overlapping locks.
  */
-static int locks_conflict(struct file_lock *caller_fl, struct file_lock 
*sys_fl)
+static bool locks_conflict(struct file_lock *caller_fl,
+  struct file_lock *sys_fl)
 {
if (sys_fl->fl_type == F_WRLCK)
-   return 1;
+   return true;
if (caller_fl->fl_type == F_WRLCK)
-   return 1;
-   return 0;
+   return true;
+   return false;
 }
 
 /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
  * checking before calling the locks_conflict().
  */
-static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock 
*sys_fl)
+static bool posix_locks_conflict(struct file_lock *caller_fl,
+struct file_lock *sys_fl)
 {
/* POSIX locks owned by the same process do not conflict with
 * each other.
 */
if (posix_same_owner(caller_fl, sys_fl))
-   return (0);
+   return false;
 
/* Check whether they overlap */
if (!locks_overlap(caller_fl, sys_fl))
-   return 0;
+   return false;
 
-   return (locks_conflict(caller_fl, sys_fl));
+   return locks_conflict(caller_fl, sys_fl);
 }
 
 /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
  * checking before calling the locks_conflict().
  */
-static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock 
*sys_fl)
+static bool flock_locks_conflict(struct file_lock *caller_fl,
+struct file_lock *sys_fl)
 {
/* FLOCK locks referring to the same filp do not conflict with
 * each other.
 */
if (caller_fl->fl_file == sys_fl->fl_file)
-   return (0);
+   return false;
if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
-   return 0;
+   return false;
 
-   return (locks_conflict(caller_fl, sys_fl));
+   return locks_conflict(caller_fl, sys_fl);
 }
 
 void




[PATCH 3/5] fs/locks: allow a lock request to block other requests.

2018-08-13 Thread NeilBrown
Currently, a lock can block pending requests, but all pending
requests are equal.  If lots of pending requests are
mutually exclusive, this means they will all be woken up
and all but one will fail.  This can hurt performance.

So we will allow pending requests to block other requests.
Only the first request will be woken, and it will wake the others.

This patch doesn't implement this fully, but prepares the way.

- It acknowledges that a request might be blocking other requests,
  and when the request is converted to a lock, those blocked
  requests are moved across.
- When a request is requeued or discarded, all blocked requests are
  woken.
- When deadlock-detection looks for the lock which blocks a
  given request, we follow the chain of ->fl_blocker all
  the way to the top.

Signed-off-by: NeilBrown 
---
 fs/locks.c |   43 +++
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index de0b9276f23d..9439eebd4eb9 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -408,9 +408,19 @@ void locks_copy_lock(struct file_lock *new, struct 
file_lock *fl)
fl->fl_ops->fl_copy_lock(new, fl);
}
 }
-
 EXPORT_SYMBOL(locks_copy_lock);
 
+static void locks_move_blocks(struct file_lock *new, struct file_lock *fl)
+{
+   struct file_lock *f;
+
+   spin_lock(_lock_lock);
+   list_splice_init(>fl_blocked, >fl_blocked);
+   list_for_each_entry(f, >fl_blocked, fl_block)
+   f->fl_blocker = new;
+   spin_unlock(_lock_lock);
+}
+
 static inline int flock_translate_cmd(int cmd) {
if (cmd & LOCK_MAND)
return cmd & (LOCK_MAND | LOCK_RW);
@@ -683,11 +693,15 @@ static void __locks_delete_block(struct file_lock *waiter)
 
 static void __locks_wake_up_blocks(struct file_lock *blocker)
 {
+   /* Wake up all requests in blocker->fl_blocked, including
+* requests blocked by those requests.
+*/
while (!list_empty(>fl_blocked)) {
struct file_lock *waiter;
 
waiter = list_first_entry(>fl_blocked,
  struct file_lock, fl_block);
+   list_splice_init(>fl_blocked, >fl_blocked);
__locks_delete_block(waiter);
if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
waiter->fl_lmops->lm_notify(waiter);
@@ -699,6 +713,7 @@ static void __locks_wake_up_blocks(struct file_lock 
*blocker)
 static void locks_delete_block(struct file_lock *waiter)
 {
spin_lock(_lock_lock);
+   __locks_wake_up_blocks(waiter);
__locks_delete_block(waiter);
spin_unlock(_lock_lock);
 }
@@ -714,13 +729,19 @@ static void locks_delete_block(struct file_lock *waiter)
  * blocked_lock_lock in some cases when we see that the fl_blocked list is 
empty.
  */
 static void __locks_insert_block(struct file_lock *blocker,
-   struct file_lock *waiter)
+struct file_lock *waiter)
 {
BUG_ON(!list_empty(>fl_block));
waiter->fl_blocker = blocker;
list_add_tail(>fl_block, >fl_blocked);
if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
locks_insert_global_blocked(waiter);
+
+   /* The requests in waiter->fl_blocked are known to conflict with
+* waiter, but might not conflict with blocker, or the requests
+* and lock which block it.  So they all need to be woken.
+*/
+   __locks_wake_up_blocks(waiter);
 }
 
 /* Must be called with flc_lock held. */
@@ -893,8 +914,11 @@ static struct file_lock *what_owner_is_waiting_for(struct 
file_lock *block_fl)
struct file_lock *fl;
 
hash_for_each_possible(blocked_hash, fl, fl_link, 
posix_owner_key(block_fl)) {
-   if (posix_same_owner(fl, block_fl))
-   return fl->fl_blocker;
+   if (posix_same_owner(fl, block_fl)) {
+   while (fl->fl_blocker)
+   fl = fl->fl_blocker;
+   return fl;
+   }
}
return NULL;
 }
@@ -987,6 +1011,7 @@ static int flock_lock_inode(struct inode *inode, struct 
file_lock *request)
if (request->fl_flags & FL_ACCESS)
goto out;
locks_copy_lock(new_fl, request);
+   locks_move_blocks(new_fl, request);
locks_insert_lock_ctx(new_fl, >flc_flock);
new_fl = NULL;
error = 0;
@@ -1179,6 +1204,7 @@ static int posix_lock_inode(struct inode *inode, struct 
file_lock *request,
goto out;
}
locks_copy_lock(new_fl, request);
+   locks_move_blocks(new_fl, request);
locks_insert_lock_ctx(new_fl, >fl_list);
fl = new_fl;
new_fl = NULL;
@@ -2587,13 +2613,14 @@ void locks_remove_file(struct file *filp)
 int
 posix_unblock_lock(struct file_lock 

[PATCH 5/5] fs/locks: create a tree of dependent requests.

2018-08-13 Thread NeilBrown
When we find an existing lock which conflicts with a request,
and the request wants to wait, we currently add the request
to a list.  When the lock is removed, the whole list is woken.
This can cause the thundering-herd problem.
To reduce the problem, we make use of the (new) fact that
a pending request can itself have a list of blocked requests.
When we find a conflict, we look through the existing blocked requests.
If any one of them blocks the new request, the new request is attached
below that request, otherwise it is added to the list of blocked
requests, which are now known to be mutually non-conflicting.

This way, when the lock is released, only a set of non-conflicting
locks will be woken, the rest can stay asleep.
If the lock request cannot be granted and the request needs to be
requeued, all the other requests it blocks will then be woken

Reported-and-tested-by: Martin Wilck 
Signed-off-by: NeilBrown 
---
 fs/locks.c |   29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index c7a372cebff1..af250afceff4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -727,11 +727,25 @@ static void locks_delete_block(struct file_lock *waiter)
  * fl_blocked list itself is protected by the blocked_lock_lock, but by 
ensuring
  * that the flc_lock is also held on insertions we can avoid taking the
  * blocked_lock_lock in some cases when we see that the fl_blocked list is 
empty.
+ *
+ * Rather than just adding to the list, we check for conflicts with any 
existing
+ * waiters, and add beneath any waiter that blocks the new waiter.
+ * Thus wakeups don't happen until needed.
  */
 static void __locks_insert_block(struct file_lock *blocker,
-struct file_lock *waiter)
+struct file_lock *waiter,
+bool conflict(struct file_lock *,
+  struct file_lock *))
 {
+   struct file_lock *fl;
BUG_ON(!list_empty(>fl_block));
+
+new_blocker:
+   list_for_each_entry(fl, >fl_blocked, fl_block)
+   if (conflict(fl, waiter)) {
+   blocker =  fl;
+   goto new_blocker;
+   }
waiter->fl_blocker = blocker;
list_add_tail(>fl_block, >fl_blocked);
if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
@@ -746,10 +760,12 @@ static void __locks_insert_block(struct file_lock 
*blocker,
 
 /* Must be called with flc_lock held. */
 static void locks_insert_block(struct file_lock *blocker,
-   struct file_lock *waiter)
+  struct file_lock *waiter,
+  bool conflict(struct file_lock *,
+struct file_lock *))
 {
spin_lock(_lock_lock);
-   __locks_insert_block(blocker, waiter);
+   __locks_insert_block(blocker, waiter, conflict);
spin_unlock(_lock_lock);
 }
 
@@ -1008,7 +1024,7 @@ static int flock_lock_inode(struct inode *inode, struct 
file_lock *request)
if (!(request->fl_flags & FL_SLEEP))
goto out;
error = FILE_LOCK_DEFERRED;
-   locks_insert_block(fl, request);
+   locks_insert_block(fl, request, flock_locks_conflict);
goto out;
}
if (request->fl_flags & FL_ACCESS)
@@ -1082,7 +1098,8 @@ static int posix_lock_inode(struct inode *inode, struct 
file_lock *request,
spin_lock(_lock_lock);
if (likely(!posix_locks_deadlock(request, fl))) {
error = FILE_LOCK_DEFERRED;
-   __locks_insert_block(fl, request);
+   __locks_insert_block(fl, request,
+posix_locks_conflict);
}
spin_unlock(_lock_lock);
goto out;
@@ -1555,7 +1572,7 @@ int __break_lease(struct inode *inode, unsigned int mode, 
unsigned int type)
break_time -= jiffies;
if (break_time == 0)
break_time++;
-   locks_insert_block(fl, new_fl);
+   locks_insert_block(fl, new_fl, leases_conflict);
trace_break_lease_block(inode, new_fl);
spin_unlock(>flc_lock);
percpu_up_read_preempt_enable(_rwsem);




[PATCH 4/5] fs/locks: change all *_conflict() functions to return bool.

2018-08-13 Thread NeilBrown
posix_locks_conflict() and flock_locks_conflict() both return int.
leases_conflict() returns bool.

This inconsistency will cause problems for the next patch if not
fixed.

So change posix_locks_conflict() and flock_locks_conflict() to return
bool.
Also change the locks_conflict() helper.

And convert some
   return (foo);
to
   return foo;

Signed-off-by: NeilBrown 
---
 fs/locks.c |   27 +++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 9439eebd4eb9..c7a372cebff1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -803,47 +803,50 @@ locks_delete_lock_ctx(struct file_lock *fl, struct 
list_head *dispose)
 /* Determine if lock sys_fl blocks lock caller_fl. Common functionality
  * checks for shared/exclusive status of overlapping locks.
  */
-static int locks_conflict(struct file_lock *caller_fl, struct file_lock 
*sys_fl)
+static bool locks_conflict(struct file_lock *caller_fl,
+  struct file_lock *sys_fl)
 {
if (sys_fl->fl_type == F_WRLCK)
-   return 1;
+   return true;
if (caller_fl->fl_type == F_WRLCK)
-   return 1;
-   return 0;
+   return true;
+   return false;
 }
 
 /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
  * checking before calling the locks_conflict().
  */
-static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock 
*sys_fl)
+static bool posix_locks_conflict(struct file_lock *caller_fl,
+struct file_lock *sys_fl)
 {
/* POSIX locks owned by the same process do not conflict with
 * each other.
 */
if (posix_same_owner(caller_fl, sys_fl))
-   return (0);
+   return false;
 
/* Check whether they overlap */
if (!locks_overlap(caller_fl, sys_fl))
-   return 0;
+   return false;
 
-   return (locks_conflict(caller_fl, sys_fl));
+   return locks_conflict(caller_fl, sys_fl);
 }
 
 /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
  * checking before calling the locks_conflict().
  */
-static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock 
*sys_fl)
+static bool flock_locks_conflict(struct file_lock *caller_fl,
+struct file_lock *sys_fl)
 {
/* FLOCK locks referring to the same filp do not conflict with
 * each other.
 */
if (caller_fl->fl_file == sys_fl->fl_file)
-   return (0);
+   return false;
if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
-   return 0;
+   return false;
 
-   return (locks_conflict(caller_fl, sys_fl));
+   return locks_conflict(caller_fl, sys_fl);
 }
 
 void




[PATCH 0/5 v2] locks: avoid thundering-herd wake-ups

2018-08-13 Thread NeilBrown


V2, which added wake_non_conflicts() was more broken than V1 - as
Bruce explained there is no transitivity in the blocking relation
between locks.
So this series takes a simpler approach.
It still attached waiters between other waiters as necessary to ensure
that:
  - a waiter is blocked by it's parent (fl->blocker) and all further
ancestors, and
  - the list of waiters on fl_blocked are mutually non-conflicting.

When a lock (the root of a tree of requests) is released, only its
immediate children (fl_blocked) are woken.
When any lock is woken (either because its fl_blocker was released
to due to a signal or similar) it with either:
 - be granted
 - be aborted
 - be re-queued beneath some other lock.

In the first case tree of blocked locks is moved across to the newly
created lock, and the invariants still hold.
In the order two cases, the tree or blocked waiters are all detached
and woken.

Note that this series has not received much testing yet.

Original description:
If you have a many-core machine, and have many threads all wanting to
briefly lock a give file (udev is known to do this), you can get quite
poor performance.

When one thread releases a lock, it wakes up all other threads that
are waiting (classic thundering-herd) - one will get the lock and the
others go to sleep.
When you have few cores, this is not very noticeable: by the time the
4th or 5th thread gets enough CPU time to try to claim the lock, the
earlier threads have claimed it, done what was needed, and released.
With 50+ cores, the contention can easily be measured.

This patchset creates a tree of pending lock request in which siblings
don't conflict and each lock request does conflict with its parent.
When a lock is released, only requests which don't conflict with each
other a woken.

Testing shows that lock-acquisitions-per-second is now fairly stable even
as number of contending process goes to 1000.  Without this patch,
locks-per-second drops off steeply after a few 10s of processes.

There is a small cost to this extra complexity.
At 20 processes running a particular test on 72 cores, the lock
acquisitions per second drops from 1.8 million to 1.4 million with
this patch.  For 100 processes, this patch still provides 1.4 million
while without this patch there are about 700,000.

NeilBrown

---

NeilBrown (5):
  fs/locks: rename some lists and pointers.
  fs/locks: split out __locks_wake_up_blocks().
  fs/locks: allow a lock request to block other requests.
  fs/locks: change all *_conflict() functions to return bool.
  fs/locks: create a tree of dependent requests.


 fs/cifs/file.c  |2 -
 fs/locks.c  |  156 ++-
 include/linux/fs.h  |7 +-
 include/trace/events/filelock.h |   16 ++--
 4 files changed, 119 insertions(+), 62 deletions(-)

--
Signature



[PATCH 0/5 v2] locks: avoid thundering-herd wake-ups

2018-08-13 Thread NeilBrown


V2, which added wake_non_conflicts() was more broken than V1 - as
Bruce explained there is no transitivity in the blocking relation
between locks.
So this series takes a simpler approach.
It still attached waiters between other waiters as necessary to ensure
that:
  - a waiter is blocked by it's parent (fl->blocker) and all further
ancestors, and
  - the list of waiters on fl_blocked are mutually non-conflicting.

When a lock (the root of a tree of requests) is released, only its
immediate children (fl_blocked) are woken.
When any lock is woken (either because its fl_blocker was released
to due to a signal or similar) it with either:
 - be granted
 - be aborted
 - be re-queued beneath some other lock.

In the first case tree of blocked locks is moved across to the newly
created lock, and the invariants still hold.
In the order two cases, the tree or blocked waiters are all detached
and woken.

Note that this series has not received much testing yet.

Original description:
If you have a many-core machine, and have many threads all wanting to
briefly lock a give file (udev is known to do this), you can get quite
poor performance.

When one thread releases a lock, it wakes up all other threads that
are waiting (classic thundering-herd) - one will get the lock and the
others go to sleep.
When you have few cores, this is not very noticeable: by the time the
4th or 5th thread gets enough CPU time to try to claim the lock, the
earlier threads have claimed it, done what was needed, and released.
With 50+ cores, the contention can easily be measured.

This patchset creates a tree of pending lock request in which siblings
don't conflict and each lock request does conflict with its parent.
When a lock is released, only requests which don't conflict with each
other a woken.

Testing shows that lock-acquisitions-per-second is now fairly stable even
as number of contending process goes to 1000.  Without this patch,
locks-per-second drops off steeply after a few 10s of processes.

There is a small cost to this extra complexity.
At 20 processes running a particular test on 72 cores, the lock
acquisitions per second drops from 1.8 million to 1.4 million with
this patch.  For 100 processes, this patch still provides 1.4 million
while without this patch there are about 700,000.

NeilBrown

---

NeilBrown (5):
  fs/locks: rename some lists and pointers.
  fs/locks: split out __locks_wake_up_blocks().
  fs/locks: allow a lock request to block other requests.
  fs/locks: change all *_conflict() functions to return bool.
  fs/locks: create a tree of dependent requests.


 fs/cifs/file.c  |2 -
 fs/locks.c  |  156 ++-
 include/linux/fs.h  |7 +-
 include/trace/events/filelock.h |   16 ++--
 4 files changed, 119 insertions(+), 62 deletions(-)

--
Signature



[PATCH 2/5] fs/locks: split out __locks_wake_up_blocks().

2018-08-13 Thread NeilBrown
This functionality will be useful in future patches, so
split it out from locks_wake_up_blocks().

Signed-off-by: NeilBrown 
---
 fs/locks.c |   27 ---
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 322491e70e41..de0b9276f23d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -681,6 +681,21 @@ static void __locks_delete_block(struct file_lock *waiter)
waiter->fl_blocker = NULL;
 }
 
+static void __locks_wake_up_blocks(struct file_lock *blocker)
+{
+   while (!list_empty(>fl_blocked)) {
+   struct file_lock *waiter;
+
+   waiter = list_first_entry(>fl_blocked,
+ struct file_lock, fl_block);
+   __locks_delete_block(waiter);
+   if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
+   waiter->fl_lmops->lm_notify(waiter);
+   else
+   wake_up(>fl_wait);
+   }
+}
+
 static void locks_delete_block(struct file_lock *waiter)
 {
spin_lock(_lock_lock);
@@ -735,17 +750,7 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
return;
 
spin_lock(_lock_lock);
-   while (!list_empty(>fl_blocked)) {
-   struct file_lock *waiter;
-
-   waiter = list_first_entry(>fl_blocked,
-   struct file_lock, fl_block);
-   __locks_delete_block(waiter);
-   if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
-   waiter->fl_lmops->lm_notify(waiter);
-   else
-   wake_up(>fl_wait);
-   }
+   __locks_wake_up_blocks(blocker);
spin_unlock(_lock_lock);
 }
 




[PATCH 1/5] fs/locks: rename some lists and pointers.

2018-08-13 Thread NeilBrown
struct file lock contains an 'fl_next' pointer which
is used to point to the lock that this request is blocked
waiting for.  So rename it to fl_blocker.

The fl_blocked list_head in an active lock is the head of a list of
blocked requests.  In a request it is a node in that list.
These are two distinct uses, so replace with two list_heads
with different names.
fl_blocked is the head of a list of blocked requests
fl_block is a node on that list.

The two different list_heads are never used at the same time, but that
will change in a future patch.

Note that a tracepoint is changed to report fl_blocker instead
of fl_next.

Signed-off-by: NeilBrown 
---
 fs/cifs/file.c  |2 +-
 fs/locks.c  |   40 ---
 include/linux/fs.h  |7 +--
 include/trace/events/filelock.h |   16 
 4 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8d41ca7bfcf1..066ed2e4ba96 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1092,7 +1092,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock 
*flock)
rc = posix_lock_file(file, flock, NULL);
up_write(>lock_sem);
if (rc == FILE_LOCK_DEFERRED) {
-   rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
+   rc = wait_event_interruptible(flock->fl_wait, 
!flock->fl_blocker);
if (!rc)
goto try_again;
posix_unblock_lock(flock);
diff --git a/fs/locks.c b/fs/locks.c
index db7b6917d9c5..322491e70e41 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -194,7 +194,7 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
  * This lock protects the blocked_hash. Generally, if you're accessing it, you
  * want to be holding this lock.
  *
- * In addition, it also protects the fl->fl_block list, and the fl->fl_next
+ * In addition, it also protects the fl->fl_blocked list, and the 
fl->fl_blocker
  * pointer for file_lock structures that are acting as lock requests (in
  * contrast to those that are acting as records of acquired locks).
  *
@@ -203,7 +203,7 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
  * protected by this lock, we can skip acquiring it iff we already hold the
  * flc_lock.
  *
- * In particular, adding an entry to the fl_block list requires that you hold
+ * In particular, adding an entry to the fl_blocked list requires that you hold
  * both the flc_lock and the blocked_lock_lock (acquired in that order).
  * Deleting an entry from the list however only requires the file_lock_lock.
  */
@@ -302,6 +302,7 @@ static void locks_init_lock_heads(struct file_lock *fl)
 {
INIT_HLIST_NODE(>fl_link);
INIT_LIST_HEAD(>fl_list);
+   INIT_LIST_HEAD(>fl_blocked);
INIT_LIST_HEAD(>fl_block);
init_waitqueue_head(>fl_wait);
 }
@@ -341,6 +342,7 @@ void locks_free_lock(struct file_lock *fl)
 {
BUG_ON(waitqueue_active(>fl_wait));
BUG_ON(!list_empty(>fl_list));
+   BUG_ON(!list_empty(>fl_blocked));
BUG_ON(!list_empty(>fl_block));
BUG_ON(!hlist_unhashed(>fl_link));
 
@@ -676,7 +678,7 @@ static void __locks_delete_block(struct file_lock *waiter)
 {
locks_delete_global_blocked(waiter);
list_del_init(>fl_block);
-   waiter->fl_next = NULL;
+   waiter->fl_blocker = NULL;
 }
 
 static void locks_delete_block(struct file_lock *waiter)
@@ -692,16 +694,16 @@ static void locks_delete_block(struct file_lock *waiter)
  * it seems like the reasonable thing to do.
  *
  * Must be called with both the flc_lock and blocked_lock_lock held. The
- * fl_block list itself is protected by the blocked_lock_lock, but by ensuring
+ * fl_blocked list itself is protected by the blocked_lock_lock, but by 
ensuring
  * that the flc_lock is also held on insertions we can avoid taking the
- * blocked_lock_lock in some cases when we see that the fl_block list is empty.
+ * blocked_lock_lock in some cases when we see that the fl_blocked list is 
empty.
  */
 static void __locks_insert_block(struct file_lock *blocker,
struct file_lock *waiter)
 {
BUG_ON(!list_empty(>fl_block));
-   waiter->fl_next = blocker;
-   list_add_tail(>fl_block, >fl_block);
+   waiter->fl_blocker = blocker;
+   list_add_tail(>fl_block, >fl_blocked);
if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
locks_insert_global_blocked(waiter);
 }
@@ -725,18 +727,18 @@ static void locks_wake_up_blocks(struct file_lock 
*blocker)
/*
 * Avoid taking global lock if list is empty. This is safe since new
 * blocked requests are only added to the list under the flc_lock, and
-* the flc_lock is always held here. Note that removal from the fl_block
+* the flc_lock is always held here. Note that removal from the 
fl_blocked
 * list does not require the flc_lock, so we 

[PATCH 2/5] fs/locks: split out __locks_wake_up_blocks().

2018-08-13 Thread NeilBrown
This functionality will be useful in future patches, so
split it out from locks_wake_up_blocks().

Signed-off-by: NeilBrown 
---
 fs/locks.c |   27 ---
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 322491e70e41..de0b9276f23d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -681,6 +681,21 @@ static void __locks_delete_block(struct file_lock *waiter)
waiter->fl_blocker = NULL;
 }
 
+static void __locks_wake_up_blocks(struct file_lock *blocker)
+{
+   while (!list_empty(>fl_blocked)) {
+   struct file_lock *waiter;
+
+   waiter = list_first_entry(>fl_blocked,
+ struct file_lock, fl_block);
+   __locks_delete_block(waiter);
+   if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
+   waiter->fl_lmops->lm_notify(waiter);
+   else
+   wake_up(>fl_wait);
+   }
+}
+
 static void locks_delete_block(struct file_lock *waiter)
 {
spin_lock(_lock_lock);
@@ -735,17 +750,7 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
return;
 
spin_lock(_lock_lock);
-   while (!list_empty(>fl_blocked)) {
-   struct file_lock *waiter;
-
-   waiter = list_first_entry(>fl_blocked,
-   struct file_lock, fl_block);
-   __locks_delete_block(waiter);
-   if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
-   waiter->fl_lmops->lm_notify(waiter);
-   else
-   wake_up(>fl_wait);
-   }
+   __locks_wake_up_blocks(blocker);
spin_unlock(_lock_lock);
 }
 




[PATCH 1/5] fs/locks: rename some lists and pointers.

2018-08-13 Thread NeilBrown
struct file lock contains an 'fl_next' pointer which
is used to point to the lock that this request is blocked
waiting for.  So rename it to fl_blocker.

The fl_blocked list_head in an active lock is the head of a list of
blocked requests.  In a request it is a node in that list.
These are two distinct uses, so replace with two list_heads
with different names.
fl_blocked is the head of a list of blocked requests
fl_block is a node on that list.

The two different list_heads are never used at the same time, but that
will change in a future patch.

Note that a tracepoint is changed to report fl_blocker instead
of fl_next.

Signed-off-by: NeilBrown 
---
 fs/cifs/file.c  |2 +-
 fs/locks.c  |   40 ---
 include/linux/fs.h  |7 +--
 include/trace/events/filelock.h |   16 
 4 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8d41ca7bfcf1..066ed2e4ba96 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1092,7 +1092,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock 
*flock)
rc = posix_lock_file(file, flock, NULL);
up_write(>lock_sem);
if (rc == FILE_LOCK_DEFERRED) {
-   rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
+   rc = wait_event_interruptible(flock->fl_wait, 
!flock->fl_blocker);
if (!rc)
goto try_again;
posix_unblock_lock(flock);
diff --git a/fs/locks.c b/fs/locks.c
index db7b6917d9c5..322491e70e41 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -194,7 +194,7 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
  * This lock protects the blocked_hash. Generally, if you're accessing it, you
  * want to be holding this lock.
  *
- * In addition, it also protects the fl->fl_block list, and the fl->fl_next
+ * In addition, it also protects the fl->fl_blocked list, and the 
fl->fl_blocker
  * pointer for file_lock structures that are acting as lock requests (in
  * contrast to those that are acting as records of acquired locks).
  *
@@ -203,7 +203,7 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
  * protected by this lock, we can skip acquiring it iff we already hold the
  * flc_lock.
  *
- * In particular, adding an entry to the fl_block list requires that you hold
+ * In particular, adding an entry to the fl_blocked list requires that you hold
  * both the flc_lock and the blocked_lock_lock (acquired in that order).
  * Deleting an entry from the list however only requires the file_lock_lock.
  */
@@ -302,6 +302,7 @@ static void locks_init_lock_heads(struct file_lock *fl)
 {
INIT_HLIST_NODE(>fl_link);
INIT_LIST_HEAD(>fl_list);
+   INIT_LIST_HEAD(>fl_blocked);
INIT_LIST_HEAD(>fl_block);
init_waitqueue_head(>fl_wait);
 }
@@ -341,6 +342,7 @@ void locks_free_lock(struct file_lock *fl)
 {
BUG_ON(waitqueue_active(>fl_wait));
BUG_ON(!list_empty(>fl_list));
+   BUG_ON(!list_empty(>fl_blocked));
BUG_ON(!list_empty(>fl_block));
BUG_ON(!hlist_unhashed(>fl_link));
 
@@ -676,7 +678,7 @@ static void __locks_delete_block(struct file_lock *waiter)
 {
locks_delete_global_blocked(waiter);
list_del_init(>fl_block);
-   waiter->fl_next = NULL;
+   waiter->fl_blocker = NULL;
 }
 
 static void locks_delete_block(struct file_lock *waiter)
@@ -692,16 +694,16 @@ static void locks_delete_block(struct file_lock *waiter)
  * it seems like the reasonable thing to do.
  *
  * Must be called with both the flc_lock and blocked_lock_lock held. The
- * fl_block list itself is protected by the blocked_lock_lock, but by ensuring
+ * fl_blocked list itself is protected by the blocked_lock_lock, but by 
ensuring
  * that the flc_lock is also held on insertions we can avoid taking the
- * blocked_lock_lock in some cases when we see that the fl_block list is empty.
+ * blocked_lock_lock in some cases when we see that the fl_blocked list is 
empty.
  */
 static void __locks_insert_block(struct file_lock *blocker,
struct file_lock *waiter)
 {
BUG_ON(!list_empty(>fl_block));
-   waiter->fl_next = blocker;
-   list_add_tail(>fl_block, >fl_block);
+   waiter->fl_blocker = blocker;
+   list_add_tail(>fl_block, >fl_blocked);
if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
locks_insert_global_blocked(waiter);
 }
@@ -725,18 +727,18 @@ static void locks_wake_up_blocks(struct file_lock 
*blocker)
/*
 * Avoid taking global lock if list is empty. This is safe since new
 * blocked requests are only added to the list under the flc_lock, and
-* the flc_lock is always held here. Note that removal from the fl_block
+* the flc_lock is always held here. Note that removal from the 
fl_blocked
 * list does not require the flc_lock, so we 

Re: [PATCH 6/9] platform: goldfish: pipe: Fail compilation if structs are too large

2018-08-13 Thread Roman Kiryanov
Hi,

thank you for reviewing my patches. I decided to put BUILD_BUG_ON
close to places where it is important that these structs fit into a
memory page to give some context.

Regards,
Roman.
On Mon, Aug 13, 2018 at 6:48 PM Joe Perches  wrote:
>
> On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote:
> > From: Roman Kiryanov 
> >
> > Since the driver provides no workaround prevent in cases if structs do
> > no fit into a memory page, it is better to fail complation to find about
> > the issue earlt instead of returning errors at runtime.
>
> Minor earlt/early typo
>
> > diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
> > b/drivers/platform/goldfish/goldfish_pipe.c
> []
> > @@ -797,9 +798,7 @@ static int goldfish_pipe_device_init(struct 
> > platform_device *pdev)
> >* needs to be contained in a single physical page. The easiest choice
> >* is to just allocate a page and place the buffers in it.
> >*/
> > - if (WARN_ON(sizeof(*dev->buffers) > PAGE_SIZE))
> > - return -ENOMEM;
> > -
> > + BUILD_BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE);
> >   page = (char *)__get_free_page(GFP_KERNEL);
> >   if (!page) {
> >   kfree(dev->pipes);
> > @@ -842,8 +841,7 @@ static int goldfish_pipe_probe(struct platform_device 
> > *pdev)
> >   struct resource *r;
> >   struct goldfish_pipe_dev *dev = pipe_dev;
> >
> > - if (WARN_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE))
> > - return -ENOMEM;
> > + BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE);
> >
> >   /* not thread safe, but this should not happen */
> >   WARN_ON(dev->base != NULL);
>
> Why separate these BUILD_BUG_ONs into 2 different functions?
>
> Why not just
> BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE);
> BUILD_BUG_ON(sizeof(struct goldfish_pipe_dev_buffers) > PAGE_SIZE);
>


Re: [PATCH 6/9] platform: goldfish: pipe: Fail compilation if structs are too large

2018-08-13 Thread Roman Kiryanov
Hi,

thank you for reviewing my patches. I decided to put BUILD_BUG_ON
close to places where it is important that these structs fit into a
memory page to give some context.

Regards,
Roman.
On Mon, Aug 13, 2018 at 6:48 PM Joe Perches  wrote:
>
> On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote:
> > From: Roman Kiryanov 
> >
> > Since the driver provides no workaround prevent in cases if structs do
> > no fit into a memory page, it is better to fail complation to find about
> > the issue earlt instead of returning errors at runtime.
>
> Minor earlt/early typo
>
> > diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
> > b/drivers/platform/goldfish/goldfish_pipe.c
> []
> > @@ -797,9 +798,7 @@ static int goldfish_pipe_device_init(struct 
> > platform_device *pdev)
> >* needs to be contained in a single physical page. The easiest choice
> >* is to just allocate a page and place the buffers in it.
> >*/
> > - if (WARN_ON(sizeof(*dev->buffers) > PAGE_SIZE))
> > - return -ENOMEM;
> > -
> > + BUILD_BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE);
> >   page = (char *)__get_free_page(GFP_KERNEL);
> >   if (!page) {
> >   kfree(dev->pipes);
> > @@ -842,8 +841,7 @@ static int goldfish_pipe_probe(struct platform_device 
> > *pdev)
> >   struct resource *r;
> >   struct goldfish_pipe_dev *dev = pipe_dev;
> >
> > - if (WARN_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE))
> > - return -ENOMEM;
> > + BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE);
> >
> >   /* not thread safe, but this should not happen */
> >   WARN_ON(dev->base != NULL);
>
> Why separate these BUILD_BUG_ONs into 2 different functions?
>
> Why not just
> BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE);
> BUILD_BUG_ON(sizeof(struct goldfish_pipe_dev_buffers) > PAGE_SIZE);
>


Re: [PATCH 8/9] platform: goldfish: pipe: Replace pr_ with dev_ for logging

2018-08-13 Thread kbuild test robot
Hi Roman,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.18 next-20180813]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/rkir-google-com/platform-goldfish-pipe-Fix-comments-to-fit-80-columns/20180814-104717
config: i386-randconfig-x008-201832 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   drivers/platform//goldfish/goldfish_pipe.c: In function 
'goldfish_pipe_read_write':
>> drivers/platform//goldfish/goldfish_pipe.c:405:22: error: 'struct 
>> goldfish_pipe_dev' has no member named 'pdev_dev'
 pdev_dev = pipe->dev->pdev_dev;
 ^~

vim +405 drivers/platform//goldfish/goldfish_pipe.c

   379  
   380  static ssize_t goldfish_pipe_read_write(struct file *filp,
   381  char __user *buffer, size_t bufflen, int is_write)
   382  {
   383  struct goldfish_pipe *pipe = filp->private_data;
   384  int count = 0, ret = -EINVAL;
   385  unsigned long address, address_end, last_page;
   386  unsigned int last_page_size;
   387  struct device *pdev_dev;
   388  
   389  /* If the emulator already closed the pipe, no need to go 
further */
   390  if (unlikely(test_bit(BIT_CLOSED_ON_HOST, >flags)))
   391  return -EIO;
   392  /* Null reads or writes succeeds */
   393  if (unlikely(bufflen == 0))
   394  return 0;
   395  /* Check the buffer range for access */
   396  if (unlikely(!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ,
   397  buffer, bufflen)))
   398  return -EFAULT;
   399  
   400  address = (unsigned long)buffer;
   401  address_end = address + bufflen;
   402  last_page = (address_end - 1) & PAGE_MASK;
   403  last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1;
   404  
 > 405  pdev_dev = pipe->dev->pdev_dev;
   406  
   407  while (address < address_end) {
   408  s32 consumed_size;
   409  int status;
   410  
   411  ret = transfer_max_buffers(pipe, address, address_end, 
is_write,
   412  last_page, last_page_size, 
_size,
   413  );
   414  if (ret < 0)
   415  break;
   416  
   417  if (consumed_size > 0) {
   418  /* No matter what's the status, we've 
transferred
   419   * something.
   420   */
   421  count += consumed_size;
   422  address += consumed_size;
   423  }
   424  if (status > 0)
   425  continue;
   426  if (status == 0) {
   427  /* EOF */
   428  ret = 0;
   429  break;
   430  }
   431  if (count > 0) {
   432  /*
   433   * An error occurred, but we already transferred
   434   * something on one of the previous iterations.
   435   * Just return what we already copied and log 
this
   436   * err.
   437   */
   438  if (status != PIPE_ERROR_AGAIN)
   439  dev_err_ratelimited(pdev_dev,
   440  "goldfish_pipe: backend error 
%d on %s\n",
   441  status, is_write ? "write" : 
"read");
   442  break;
   443  }
   444  
   445  /*
   446   * If the error is not PIPE_ERROR_AGAIN, or if we are in
   447   * non-blocking mode, just return the error code.
   448   */
   449  if (status != PIPE_ERROR_AGAIN ||
   450  (filp->f_flags & O_NONBLOCK) != 0) {
   451  ret = goldfish_pipe_error_convert(status);
   452  break;
   453  }
   454  
   455  status = wait_for_host_signal(pipe, is_write);
   456  if (status < 0)
   457  return status;
   458  }
   459  
   460  if (count > 0)
   461  return count;
   462  return ret;
   463  }
   464  

---
0-DAY kernel test infrastructure  

Re: [PATCH 8/9] platform: goldfish: pipe: Replace pr_ with dev_ for logging

2018-08-13 Thread kbuild test robot
Hi Roman,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.18 next-20180813]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/rkir-google-com/platform-goldfish-pipe-Fix-comments-to-fit-80-columns/20180814-104717
config: i386-randconfig-x008-201832 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   drivers/platform//goldfish/goldfish_pipe.c: In function 
'goldfish_pipe_read_write':
>> drivers/platform//goldfish/goldfish_pipe.c:405:22: error: 'struct 
>> goldfish_pipe_dev' has no member named 'pdev_dev'
 pdev_dev = pipe->dev->pdev_dev;
 ^~

vim +405 drivers/platform//goldfish/goldfish_pipe.c

   379  
   380  static ssize_t goldfish_pipe_read_write(struct file *filp,
   381  char __user *buffer, size_t bufflen, int is_write)
   382  {
   383  struct goldfish_pipe *pipe = filp->private_data;
   384  int count = 0, ret = -EINVAL;
   385  unsigned long address, address_end, last_page;
   386  unsigned int last_page_size;
   387  struct device *pdev_dev;
   388  
   389  /* If the emulator already closed the pipe, no need to go 
further */
   390  if (unlikely(test_bit(BIT_CLOSED_ON_HOST, >flags)))
   391  return -EIO;
   392  /* Null reads or writes succeeds */
   393  if (unlikely(bufflen == 0))
   394  return 0;
   395  /* Check the buffer range for access */
   396  if (unlikely(!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ,
   397  buffer, bufflen)))
   398  return -EFAULT;
   399  
   400  address = (unsigned long)buffer;
   401  address_end = address + bufflen;
   402  last_page = (address_end - 1) & PAGE_MASK;
   403  last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1;
   404  
 > 405  pdev_dev = pipe->dev->pdev_dev;
   406  
   407  while (address < address_end) {
   408  s32 consumed_size;
   409  int status;
   410  
   411  ret = transfer_max_buffers(pipe, address, address_end, 
is_write,
   412  last_page, last_page_size, 
_size,
   413  );
   414  if (ret < 0)
   415  break;
   416  
   417  if (consumed_size > 0) {
   418  /* No matter what's the status, we've 
transferred
   419   * something.
   420   */
   421  count += consumed_size;
   422  address += consumed_size;
   423  }
   424  if (status > 0)
   425  continue;
   426  if (status == 0) {
   427  /* EOF */
   428  ret = 0;
   429  break;
   430  }
   431  if (count > 0) {
   432  /*
   433   * An error occurred, but we already transferred
   434   * something on one of the previous iterations.
   435   * Just return what we already copied and log 
this
   436   * err.
   437   */
   438  if (status != PIPE_ERROR_AGAIN)
   439  dev_err_ratelimited(pdev_dev,
   440  "goldfish_pipe: backend error 
%d on %s\n",
   441  status, is_write ? "write" : 
"read");
   442  break;
   443  }
   444  
   445  /*
   446   * If the error is not PIPE_ERROR_AGAIN, or if we are in
   447   * non-blocking mode, just return the error code.
   448   */
   449  if (status != PIPE_ERROR_AGAIN ||
   450  (filp->f_flags & O_NONBLOCK) != 0) {
   451  ret = goldfish_pipe_error_convert(status);
   452  break;
   453  }
   454  
   455  status = wait_for_host_signal(pipe, is_write);
   456  if (status < 0)
   457  return status;
   458  }
   459  
   460  if (count > 0)
   461  return count;
   462  return ret;
   463  }
   464  

---
0-DAY kernel test infrastructure  

Re: [1/2] dt-bindings: phy: Add stingray usb phy documentation

2018-08-13 Thread Srinath Mannam
Hi JC,


On Tue, Aug 14, 2018 at 7:24 AM, Jayachandran C
 wrote:
> Hi Srinath, Ray,
>
> On Fri, Jul 07, 2017 at 06:37:04PM +0530, Srinath Mannam wrote:
>> Add DT binding document for stingray usb phy driver.
>>
>> Signed-off-by: Srinath Mannam 
>> Reviewed-by: Ray Jui 
>> Acked-by: Rob Herring 
>
> The Broadcom Vulcan chip (now Cavium ThunderX2) uses the same USB PHY
> from what I understand. Our hardware team reports that there are some
> failures seen without the a custom PHY driver.
>
> Do you require this PHY driver on regular operation on Stingray? Or is
> this for some specific functionality? I am asking since the issue
> reported here is on switching between a USB 3.0 and USB 2.0 devices.
We need it for regular operation in stingray.
>
> Any plans on submitting this driver again?
I will submit it ASAP.

Regards,
Srinath.
>
> Thanks,
> JC


Re: [1/2] dt-bindings: phy: Add stingray usb phy documentation

2018-08-13 Thread Srinath Mannam
Hi JC,


On Tue, Aug 14, 2018 at 7:24 AM, Jayachandran C
 wrote:
> Hi Srinath, Ray,
>
> On Fri, Jul 07, 2017 at 06:37:04PM +0530, Srinath Mannam wrote:
>> Add DT binding document for stingray usb phy driver.
>>
>> Signed-off-by: Srinath Mannam 
>> Reviewed-by: Ray Jui 
>> Acked-by: Rob Herring 
>
> The Broadcom Vulcan chip (now Cavium ThunderX2) uses the same USB PHY
> from what I understand. Our hardware team reports that there are some
> failures seen without the a custom PHY driver.
>
> Do you require this PHY driver on regular operation on Stingray? Or is
> this for some specific functionality? I am asking since the issue
> reported here is on switching between a USB 3.0 and USB 2.0 devices.
We need it for regular operation in stingray.
>
> Any plans on submitting this driver again?
I will submit it ASAP.

Regards,
Srinath.
>
> Thanks,
> JC


Re: [PATCH] kconfig: fix the rule of mainmenu_stmt symbol

2018-08-13 Thread Masahiro Yamada
2018-08-09 15:47 GMT+09:00 Masahiro Yamada :
> The rule of mainmenu_stmt does not have debug print of zconf_lineno(),
> but if it had, it would print a wrong line number for the same reason
> as commit b2d00d7c61c8 ("kconfig: fix line numbers for if-entries in
> menu tree").
>
> The mainmenu_stmt does not need to eat following empty lines because
> they are reduced to common_stmt.
>
> Signed-off-by: Masahiro Yamada 
> ---


Applied to linux-kbuild/kconfig.

>  scripts/kconfig/zconf.y | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y
> index 4b68272..95a4fd3 100644
> --- a/scripts/kconfig/zconf.y
> +++ b/scripts/kconfig/zconf.y
> @@ -31,7 +31,7 @@ struct symbol *symbol_hash[SYMBOL_HASHSIZE];
>  static struct menu *current_menu, *current_entry;
>
>  %}
> -%expect 31
> +%expect 30
>
>  %union
>  {
> @@ -117,7 +117,7 @@ start: mainmenu_stmt stmt_list | stmt_list;
>
>  /* mainmenu entry */
>
> -mainmenu_stmt: T_MAINMENU prompt nl
> +mainmenu_stmt: T_MAINMENU prompt T_EOL
>  {
> menu_add_prompt(P_MENU, $2, NULL);
>  };
> --
> 2.7.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Best Regards
Masahiro Yamada


Re: [PATCH 1/2] kconfig: remove unused sym_get_env_prop() function

2018-08-13 Thread Masahiro Yamada
2018-08-14 2:20 GMT+09:00 Sam Ravnborg :
> On Tue, Aug 14, 2018 at 01:48:38AM +0900, Masahiro Yamada wrote:
>> This function is unused since commit 104daea149c4 ("kconfig: reference
>> environment variables directly and remove 'option env='").
>>
>> Signed-off-by: Masahiro Yamada 
>
> Both patches are obviously correct.
> Feel free to add:
> Acked-by: Sam Ravnborg 
>
> and apply to kbuild so we  can get them in this cycle.
>
> Sam


Applied to linux-kbuild/kconfig with Sam's Ack.



-- 
Best Regards
Masahiro Yamada


Re: [PATCH 1/2] kconfig: remove unused sym_get_env_prop() function

2018-08-13 Thread Masahiro Yamada
2018-08-14 2:20 GMT+09:00 Sam Ravnborg :
> On Tue, Aug 14, 2018 at 01:48:38AM +0900, Masahiro Yamada wrote:
>> This function is unused since commit 104daea149c4 ("kconfig: reference
>> environment variables directly and remove 'option env='").
>>
>> Signed-off-by: Masahiro Yamada 
>
> Both patches are obviously correct.
> Feel free to add:
> Acked-by: Sam Ravnborg 
>
> and apply to kbuild so we  can get them in this cycle.
>
> Sam


Applied to linux-kbuild/kconfig with Sam's Ack.



-- 
Best Regards
Masahiro Yamada


Re: [PATCH] kconfig: fix the rule of mainmenu_stmt symbol

2018-08-13 Thread Masahiro Yamada
2018-08-09 15:47 GMT+09:00 Masahiro Yamada :
> The rule of mainmenu_stmt does not have debug print of zconf_lineno(),
> but if it had, it would print a wrong line number for the same reason
> as commit b2d00d7c61c8 ("kconfig: fix line numbers for if-entries in
> menu tree").
>
> The mainmenu_stmt does not need to eat following empty lines because
> they are reduced to common_stmt.
>
> Signed-off-by: Masahiro Yamada 
> ---


Applied to linux-kbuild/kconfig.

>  scripts/kconfig/zconf.y | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y
> index 4b68272..95a4fd3 100644
> --- a/scripts/kconfig/zconf.y
> +++ b/scripts/kconfig/zconf.y
> @@ -31,7 +31,7 @@ struct symbol *symbol_hash[SYMBOL_HASHSIZE];
>  static struct menu *current_menu, *current_entry;
>
>  %}
> -%expect 31
> +%expect 30
>
>  %union
>  {
> @@ -117,7 +117,7 @@ start: mainmenu_stmt stmt_list | stmt_list;
>
>  /* mainmenu entry */
>
> -mainmenu_stmt: T_MAINMENU prompt nl
> +mainmenu_stmt: T_MAINMENU prompt T_EOL
>  {
> menu_add_prompt(P_MENU, $2, NULL);
>  };
> --
> 2.7.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Best Regards
Masahiro Yamada


Re: linux-next: Signed-off-by missing for commit in the kbuild tree

2018-08-13 Thread Masahiro Yamada
Stephen,

2018-08-14 6:33 GMT+09:00 Stephen Rothwell :
> Hi Masahiro,
>
> Commit
>
>   bd714f5f14e0 ("Coccinelle: doubletest: reduce side effect false positives")
>
> is missing a Signed-off-by from its committer.

Thanks for catching this.
I will fix it.




> --
> Cheers,
> Stephen Rothwell



-- 
Best Regards
Masahiro Yamada


Re: linux-next: Signed-off-by missing for commit in the kbuild tree

2018-08-13 Thread Masahiro Yamada
Stephen,

2018-08-14 6:33 GMT+09:00 Stephen Rothwell :
> Hi Masahiro,
>
> Commit
>
>   bd714f5f14e0 ("Coccinelle: doubletest: reduce side effect false positives")
>
> is missing a Signed-off-by from its committer.

Thanks for catching this.
I will fix it.




> --
> Cheers,
> Stephen Rothwell



-- 
Best Regards
Masahiro Yamada


Re: [PATCH] Support mksh as /bin/sh.

2018-08-13 Thread Masahiro Yamada
2018-08-14 7:32 GMT+09:00 Kees Cook :
> On Mon, Aug 13, 2018 at 3:14 AM,   wrote:
>> From: Arkadiusz Miśkiewicz 
>>
>> mksh needs space between ! and ( to work properly. Otherwise this
>> happens:
>>
>> + make oldconfig
>> scripts/kconfig/conf  --oldconfig Kconfig
>> ./scripts/clang-version.sh[18]: COPYING: not found
>> printf: ‘__clang_major__’: expected a numeric value
>> printf: ‘__clang_minor__’: expected a numeric value
>> printf: ‘__clang_patchlevel__’: expected a numeric value
>> init/Kconfig:24:warning: 'CLANG_VERSION': number is invalid
>>
>> Signed-off-by: Arkadiusz Miśkiewicz 
>
> Reviewed-by: Kees Cook 
>
> -Kees
>

I could not find this patch in ML.


Arkadiusz,

Please resend this patch to

linux-kbu...@vger.kernel.org



>> ---
>>  scripts/clang-version.sh | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/scripts/clang-version.sh b/scripts/clang-version.sh
>> index dbf0a31eb111..e65fbc3079d4 100755
>> --- a/scripts/clang-version.sh
>> +++ b/scripts/clang-version.sh
>> @@ -12,7 +12,7 @@
>>
>>  compiler="$*"
>>
>> -if !( $compiler --version | grep -q clang) ; then
>> +if ! ( $compiler --version | grep -q clang) ; then
>> echo 0
>> exit 1
>>  fi
>> --
>> 2.18.0
>>
>
>
>
> --
> Kees Cook
> Pixel Security



-- 
Best Regards
Masahiro Yamada


Re: [PATCH] Support mksh as /bin/sh.

2018-08-13 Thread Masahiro Yamada
2018-08-14 7:32 GMT+09:00 Kees Cook :
> On Mon, Aug 13, 2018 at 3:14 AM,   wrote:
>> From: Arkadiusz Miśkiewicz 
>>
>> mksh needs space between ! and ( to work properly. Otherwise this
>> happens:
>>
>> + make oldconfig
>> scripts/kconfig/conf  --oldconfig Kconfig
>> ./scripts/clang-version.sh[18]: COPYING: not found
>> printf: ‘__clang_major__’: expected a numeric value
>> printf: ‘__clang_minor__’: expected a numeric value
>> printf: ‘__clang_patchlevel__’: expected a numeric value
>> init/Kconfig:24:warning: 'CLANG_VERSION': number is invalid
>>
>> Signed-off-by: Arkadiusz Miśkiewicz 
>
> Reviewed-by: Kees Cook 
>
> -Kees
>

I could not find this patch in ML.


Arkadiusz,

Please resend this patch to

linux-kbu...@vger.kernel.org



>> ---
>>  scripts/clang-version.sh | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/scripts/clang-version.sh b/scripts/clang-version.sh
>> index dbf0a31eb111..e65fbc3079d4 100755
>> --- a/scripts/clang-version.sh
>> +++ b/scripts/clang-version.sh
>> @@ -12,7 +12,7 @@
>>
>>  compiler="$*"
>>
>> -if !( $compiler --version | grep -q clang) ; then
>> +if ! ( $compiler --version | grep -q clang) ; then
>> echo 0
>> exit 1
>>  fi
>> --
>> 2.18.0
>>
>
>
>
> --
> Kees Cook
> Pixel Security



-- 
Best Regards
Masahiro Yamada


Re: [PATCH v2 2/4] dt-bindings: soc: amlogic: add meson-canvas documentation

2018-08-13 Thread Maxime Jourdan
2018-08-13 21:07 GMT+02:00 Rob Herring :
> On Wed, Aug 08, 2018 at 12:00:09AM +0200, Maxime Jourdan wrote:
>> DT bindings doc for amlogic,meson-canvas
>>
>> Signed-off-by: Maxime Jourdan 
>> ---
>>  .../soc/amlogic/amlogic,meson-canvas.txt  | 36 +++
>>  1 file changed, 36 insertions(+)
>>  create mode 100644 
>> Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt
>>
>> diff --git 
>> a/Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt 
>> b/Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt
>> new file mode 100644
>> index ..5f0351717bee
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt
>> @@ -0,0 +1,36 @@
>> +Amlogic Canvas
>> +
>> +
>> +A canvas is a collection of metadata that describes a pixel buffer.
>> +Those metadata include: width, height, phyaddr, wrapping, block mode
>> +and endianness.
>> +
>> +Many IPs within Amlogic SoCs rely on canvas indexes to read/write pixel data
>> +rather than use the phy addresses directly. For instance, this is the case 
>> for
>> +the video decoders and the display.
>> +
>> +Amlogic SoCs have 256 canvas.
>> +
>> +Device Tree Bindings:
>> +-
>> +
>> +Canvas Provider
>> +--
>> +
>> +Required properties:
>> +- compatible: "amlogic,canvas"
>> +
>> +Parent node should have the following properties :
>> +- compatible: "amlogic,gx-dmc-sysctrl", "syscon", "simple-mfd"
>
> Is this documented somewhere? One child function is not a reason for an
> MFD and child nodes. And child nodes like this with no resources are
> unnecessary.
>

Hi Rob, this was done to follow the same path as other buses on the
platform that have sysctrls in order to provide regmaps to the
devices.

I can see how it's not really necessary here though, would it be okay
with you if I turned "canvas" into a simple bus subnode, using __iomem
in the device ?

>> +- reg: base address and size of the DMC system control register space.
>> +
>> +Example:
>> +
>> +sysctrl_DMC: system-controller@0 {
>> + compatible = "amlogic,gx-dmc-sysctrl", "syscon", "simple-mfd";
>> + reg = <0x0 0x0 0x0 0x1000>;
>> +
>> + canvas: canvas-provider@0 {
>> + compatible = "amlogic,canvas";
>> + };
>> +};
>> --
>> 2.18.0
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe devicetree" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 2/4] dt-bindings: soc: amlogic: add meson-canvas documentation

2018-08-13 Thread Maxime Jourdan
2018-08-13 21:07 GMT+02:00 Rob Herring :
> On Wed, Aug 08, 2018 at 12:00:09AM +0200, Maxime Jourdan wrote:
>> DT bindings doc for amlogic,meson-canvas
>>
>> Signed-off-by: Maxime Jourdan 
>> ---
>>  .../soc/amlogic/amlogic,meson-canvas.txt  | 36 +++
>>  1 file changed, 36 insertions(+)
>>  create mode 100644 
>> Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt
>>
>> diff --git 
>> a/Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt 
>> b/Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt
>> new file mode 100644
>> index ..5f0351717bee
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt
>> @@ -0,0 +1,36 @@
>> +Amlogic Canvas
>> +
>> +
>> +A canvas is a collection of metadata that describes a pixel buffer.
>> +Those metadata include: width, height, phyaddr, wrapping, block mode
>> +and endianness.
>> +
>> +Many IPs within Amlogic SoCs rely on canvas indexes to read/write pixel data
>> +rather than use the phy addresses directly. For instance, this is the case 
>> for
>> +the video decoders and the display.
>> +
>> +Amlogic SoCs have 256 canvas.
>> +
>> +Device Tree Bindings:
>> +-
>> +
>> +Canvas Provider
>> +--
>> +
>> +Required properties:
>> +- compatible: "amlogic,canvas"
>> +
>> +Parent node should have the following properties :
>> +- compatible: "amlogic,gx-dmc-sysctrl", "syscon", "simple-mfd"
>
> Is this documented somewhere? One child function is not a reason for an
> MFD and child nodes. And child nodes like this with no resources are
> unnecessary.
>

Hi Rob, this was done to follow the same path as other buses on the
platform that have sysctrls in order to provide regmaps to the
devices.

I can see how it's not really necessary here though, would it be okay
with you if I turned "canvas" into a simple bus subnode, using __iomem
in the device ?

>> +- reg: base address and size of the DMC system control register space.
>> +
>> +Example:
>> +
>> +sysctrl_DMC: system-controller@0 {
>> + compatible = "amlogic,gx-dmc-sysctrl", "syscon", "simple-mfd";
>> + reg = <0x0 0x0 0x0 0x1000>;
>> +
>> + canvas: canvas-provider@0 {
>> + compatible = "amlogic,canvas";
>> + };
>> +};
>> --
>> 2.18.0
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe devicetree" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] gcc-plugins: require GCC

2018-08-13 Thread Masahiro Yamada
2018-08-14 6:29 GMT+09:00 Stefan Agner :
> On 13.08.2018 22:18, Kees Cook wrote:
>> On Mon, Aug 13, 2018 at 1:10 PM, Kees Cook  wrote:
>>> On Mon, Aug 13, 2018 at 12:38 AM, Masahiro Yamada
>>>  wrote:
 2018-08-11 18:48 GMT+09:00 Stefan Agner :
> Unsurprisingly GCC plugins require GCC as a compiler. This avoids
> GCC plugins being selectable when using clang.
>
> Signed-off-by: Stefan Agner 
> ---
>  arch/Kconfig | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 1aa59063f1fd..8c693a837ed7 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -422,7 +422,7 @@ config HAVE_GCC_PLUGINS
>
>  menuconfig GCC_PLUGINS
> bool "GCC plugins"
> -   depends on HAVE_GCC_PLUGINS
> +   depends on HAVE_GCC_PLUGINS && CC_IS_GCC
> depends on PLUGIN_HOSTCC != ""
> help
>   GCC plugins are loadable modules that provide extra features to 
> the
> --
> 2.18.0
>

 I guess the more correct way is to fix
 scripts/gcc-plugin.sh

 This shell script should exit 0
 only when GCC plugin is supported.
>>>
>>> I'm of two minds: we already have the gcc test in Kconfig so we might
>>> want to avoid redundant checks, but maybe the script should be a
>>> "complete" test. I guess the latter. I will send a patch.
>>
>> Actually, how about we do the test in Kconfig, but ahead of the
>> script? That will reduce redundancy:
>>
>> diff --git a/arch/Kconfig b/arch/Kconfig
>> index 1aa59063f1fd..18f518624e41 100644
>> --- a/arch/Kconfig
>> +++ b/arch/Kconfig
>> @@ -409,7 +409,7 @@ preferred-plugin-hostcc := $(if-success,[
>> $(gcc-version) -ge 40800 ],$(HOSTCXX),
>>
>>  config PLUGIN_HOSTCC
>> string
>> -   default "$(shell,$(srctree)/scripts/gcc-plugin.sh
>> "$(preferred-plugin-hostcc)" "$(HOSTCXX)" "$(CC)")"
>> +   default "$(shell,$(srctree)/scripts/gcc-plugin.sh
>> "$(preferred-plugin-hostcc)" "$(HOSTCXX)" "$(CC)")" if CC_IS_GCC
>> help
>>   Host compiler used to build GCC plugins.  This can be $(HOSTCXX),
>>   $(HOSTCC), or a null string if GCC plugin is unsupported.
>
> I like this better. A script which is called gcc-plugin.sh should only
> be called if gcc is used...
>

This does not work like that.

$(shell,$(srctree)/scripts/gcc-plugin.sh "$(preferred-plugin-hostcc)"
"$(HOSTCXX)" "$(CC)")

is expanded in the parse stage regardless of CC_IS_GCC.




If nobody comes up with a better way, I am fine with this.

Reviewed-by: Masahiro Yamada 




> --
> Stefan
>
>>
>>
>> -Kees



-- 
Best Regards
Masahiro Yamada


Re: [PATCH] gcc-plugins: require GCC

2018-08-13 Thread Masahiro Yamada
2018-08-14 6:29 GMT+09:00 Stefan Agner :
> On 13.08.2018 22:18, Kees Cook wrote:
>> On Mon, Aug 13, 2018 at 1:10 PM, Kees Cook  wrote:
>>> On Mon, Aug 13, 2018 at 12:38 AM, Masahiro Yamada
>>>  wrote:
 2018-08-11 18:48 GMT+09:00 Stefan Agner :
> Unsurprisingly GCC plugins require GCC as a compiler. This avoids
> GCC plugins being selectable when using clang.
>
> Signed-off-by: Stefan Agner 
> ---
>  arch/Kconfig | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 1aa59063f1fd..8c693a837ed7 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -422,7 +422,7 @@ config HAVE_GCC_PLUGINS
>
>  menuconfig GCC_PLUGINS
> bool "GCC plugins"
> -   depends on HAVE_GCC_PLUGINS
> +   depends on HAVE_GCC_PLUGINS && CC_IS_GCC
> depends on PLUGIN_HOSTCC != ""
> help
>   GCC plugins are loadable modules that provide extra features to 
> the
> --
> 2.18.0
>

 I guess the more correct way is to fix
 scripts/gcc-plugin.sh

 This shell script should exit 0
 only when GCC plugin is supported.
>>>
>>> I'm of two minds: we already have the gcc test in Kconfig so we might
>>> want to avoid redundant checks, but maybe the script should be a
>>> "complete" test. I guess the latter. I will send a patch.
>>
>> Actually, how about we do the test in Kconfig, but ahead of the
>> script? That will reduce redundancy:
>>
>> diff --git a/arch/Kconfig b/arch/Kconfig
>> index 1aa59063f1fd..18f518624e41 100644
>> --- a/arch/Kconfig
>> +++ b/arch/Kconfig
>> @@ -409,7 +409,7 @@ preferred-plugin-hostcc := $(if-success,[
>> $(gcc-version) -ge 40800 ],$(HOSTCXX),
>>
>>  config PLUGIN_HOSTCC
>> string
>> -   default "$(shell,$(srctree)/scripts/gcc-plugin.sh
>> "$(preferred-plugin-hostcc)" "$(HOSTCXX)" "$(CC)")"
>> +   default "$(shell,$(srctree)/scripts/gcc-plugin.sh
>> "$(preferred-plugin-hostcc)" "$(HOSTCXX)" "$(CC)")" if CC_IS_GCC
>> help
>>   Host compiler used to build GCC plugins.  This can be $(HOSTCXX),
>>   $(HOSTCC), or a null string if GCC plugin is unsupported.
>
> I like this better. A script which is called gcc-plugin.sh should only
> be called if gcc is used...
>

This does not work like that.

$(shell,$(srctree)/scripts/gcc-plugin.sh "$(preferred-plugin-hostcc)"
"$(HOSTCXX)" "$(CC)")

is expanded in the parse stage regardless of CC_IS_GCC.




If nobody comes up with a better way, I am fine with this.

Reviewed-by: Masahiro Yamada 




> --
> Stefan
>
>>
>>
>> -Kees



-- 
Best Regards
Masahiro Yamada


Re: [PATCH] md/bcache: Remove NULL check.

2018-08-13 Thread Sean Fu
On Mon, Aug 13, 2018 at 06:00:11PM +0800, Coly Li wrote:
> On 2018/8/13 5:38 PM, Sean Fu wrote:
> > Remove unnessesary NULL check before kmem_cache_destroy() in
> > drivers/md/bcache/request.c
> > 
> > Signed-off-by: Sean Fu 
> 
> Hi Sean,
> 
> A same change is posted in my previous checkpatch fixes series. You may
> find it from,
> https://git.kernel.org/pub/scm/linux/kernel/git/colyli/linux-bcache.git/?h=for-next
> 
> The identical patch is,
> https://git.kernel.org/pub/scm/linux/kernel/git/colyli/linux-bcache.git/commit/?h=for-next=26b39d0732955ecb839d0afe8d6211d70f213384
>
Thank you for your reply.

Sean Fu
> Thanks.
> 
> Coly Li
> 
> > ---
> >  drivers/md/bcache/request.c | 3 +--
> >  1 file changed, 1 insertion(+), 2 deletions(-)
> > 
> > diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
> > index ae67f5f..0ddee35 100644
> > --- a/drivers/md/bcache/request.c
> > +++ b/drivers/md/bcache/request.c
> > @@ -1306,8 +1306,7 @@ void bch_flash_dev_request_init(struct bcache_device 
> > *d)
> >  
> >  void bch_request_exit(void)
> >  {
> > -   if (bch_search_cache)
> > -   kmem_cache_destroy(bch_search_cache);
> > +   kmem_cache_destroy(bch_search_cache);
> >  }
> >  
> >  int __init bch_request_init(void)
> > 
> 


Re: [PATCH] md/bcache: Remove NULL check.

2018-08-13 Thread Sean Fu
On Mon, Aug 13, 2018 at 06:00:11PM +0800, Coly Li wrote:
> On 2018/8/13 5:38 PM, Sean Fu wrote:
> > Remove unnessesary NULL check before kmem_cache_destroy() in
> > drivers/md/bcache/request.c
> > 
> > Signed-off-by: Sean Fu 
> 
> Hi Sean,
> 
> A same change is posted in my previous checkpatch fixes series. You may
> find it from,
> https://git.kernel.org/pub/scm/linux/kernel/git/colyli/linux-bcache.git/?h=for-next
> 
> The identical patch is,
> https://git.kernel.org/pub/scm/linux/kernel/git/colyli/linux-bcache.git/commit/?h=for-next=26b39d0732955ecb839d0afe8d6211d70f213384
>
Thank you for your reply.

Sean Fu
> Thanks.
> 
> Coly Li
> 
> > ---
> >  drivers/md/bcache/request.c | 3 +--
> >  1 file changed, 1 insertion(+), 2 deletions(-)
> > 
> > diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
> > index ae67f5f..0ddee35 100644
> > --- a/drivers/md/bcache/request.c
> > +++ b/drivers/md/bcache/request.c
> > @@ -1306,8 +1306,7 @@ void bch_flash_dev_request_init(struct bcache_device 
> > *d)
> >  
> >  void bch_request_exit(void)
> >  {
> > -   if (bch_search_cache)
> > -   kmem_cache_destroy(bch_search_cache);
> > +   kmem_cache_destroy(bch_search_cache);
> >  }
> >  
> >  int __init bch_request_init(void)
> > 
> 


[PATCH] arm/mach-at91/pm: Do not double put the device node

2018-08-13 Thread zhong jiang
Device node iterators put the previous value of the index variable,
so an explicit put causes a double put.

I detect the issue with the help of Coccinelle.

Signed-off-by: zhong jiang 
---
 arch/arm/mach-at91/pm.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 32fae4d..a5ec35f 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -143,15 +143,12 @@ static int at91_pm_config_ws(unsigned int pm_mode, bool 
set)
 
/* Check if enabled on SHDWC. */
if (wsi->shdwc_mr_bit && !(val & wsi->shdwc_mr_bit))
-   goto put_node;
+   continue;
 
mode |= wsi->pmc_fsmr_bit;
if (wsi->set_polarity)
polarity |= wsi->pmc_fsmr_bit;
}
-
-put_node:
-   of_node_put(np);
}
 
if (mode) {
-- 
1.7.12.4



[PATCH] arm/mach-at91/pm: Do not double put the device node

2018-08-13 Thread zhong jiang
Device node iterators put the previous value of the index variable,
so an explicit put causes a double put.

I detect the issue with the help of Coccinelle.

Signed-off-by: zhong jiang 
---
 arch/arm/mach-at91/pm.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 32fae4d..a5ec35f 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -143,15 +143,12 @@ static int at91_pm_config_ws(unsigned int pm_mode, bool 
set)
 
/* Check if enabled on SHDWC. */
if (wsi->shdwc_mr_bit && !(val & wsi->shdwc_mr_bit))
-   goto put_node;
+   continue;
 
mode |= wsi->pmc_fsmr_bit;
if (wsi->set_polarity)
polarity |= wsi->pmc_fsmr_bit;
}
-
-put_node:
-   of_node_put(np);
}
 
if (mode) {
-- 
1.7.12.4



Re: [PATCH 1/3] arm64: implement ftrace with regs

2018-08-13 Thread Steven Rostedt
On Mon, 13 Aug 2018 11:54:06 +0100
Julien Thierry  wrote:

> > --- a/arch/arm64/Makefile
> > +++ b/arch/arm64/Makefile
> > @@ -78,6 +78,15 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
> >   KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
> >   endif
> >   
> > +ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> > +  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
> > +  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
> > +  ifeq ($(call cc-option,-fpatchable-function-entry=2),)
> > +$(warning Cannot use CONFIG_DYNAMIC_FTRACE_WITH_REGS: \
> > + -fpatchable-function-entry not supported by compiler)  
> 
> Shouldn't this be an error? The option -fpatchable-function-entry has 
> been added to the CC_FLAGS_FTRACE, so any call to the compiler is gonna 
> break anyway. Or am I missing something?

I'm guessing this adds a more informative message on that error. One
will know why -fpatchable-function-entry was added to the CFLAGS. I'm
for more informative error messages being a victim of poor error
messages causing me to dig deep into the guts of the build
infrastructure to figure out simple issues.

-- Steve


Re: [PATCH 1/3] arm64: implement ftrace with regs

2018-08-13 Thread Steven Rostedt
On Mon, 13 Aug 2018 11:54:06 +0100
Julien Thierry  wrote:

> > --- a/arch/arm64/Makefile
> > +++ b/arch/arm64/Makefile
> > @@ -78,6 +78,15 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
> >   KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
> >   endif
> >   
> > +ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> > +  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
> > +  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
> > +  ifeq ($(call cc-option,-fpatchable-function-entry=2),)
> > +$(warning Cannot use CONFIG_DYNAMIC_FTRACE_WITH_REGS: \
> > + -fpatchable-function-entry not supported by compiler)  
> 
> Shouldn't this be an error? The option -fpatchable-function-entry has 
> been added to the CC_FLAGS_FTRACE, so any call to the compiler is gonna 
> break anyway. Or am I missing something?

I'm guessing this adds a more informative message on that error. One
will know why -fpatchable-function-entry was added to the CFLAGS. I'm
for more informative error messages being a victim of poor error
messages causing me to dig deep into the guts of the build
infrastructure to figure out simple issues.

-- Steve


Re: [1/2] dt-bindings: phy: Add stingray usb phy documentation

2018-08-13 Thread Jayachandran C
Hi Srinath, Ray,

On Fri, Jul 07, 2017 at 06:37:04PM +0530, Srinath Mannam wrote:
> Add DT binding document for stingray usb phy driver.
> 
> Signed-off-by: Srinath Mannam 
> Reviewed-by: Ray Jui 
> Acked-by: Rob Herring 

The Broadcom Vulcan chip (now Cavium ThunderX2) uses the same USB PHY
from what I understand. Our hardware team reports that there are some
failures seen without the a custom PHY driver.

Do you require this PHY driver on regular operation on Stingray? Or is
this for some specific functionality? I am asking since the issue
reported here is on switching between a USB 3.0 and USB 2.0 devices.

Any plans on submitting this driver again?

Thanks,
JC


Re: [1/2] dt-bindings: phy: Add stingray usb phy documentation

2018-08-13 Thread Jayachandran C
Hi Srinath, Ray,

On Fri, Jul 07, 2017 at 06:37:04PM +0530, Srinath Mannam wrote:
> Add DT binding document for stingray usb phy driver.
> 
> Signed-off-by: Srinath Mannam 
> Reviewed-by: Ray Jui 
> Acked-by: Rob Herring 

The Broadcom Vulcan chip (now Cavium ThunderX2) uses the same USB PHY
from what I understand. Our hardware team reports that there are some
failures seen without the a custom PHY driver.

Do you require this PHY driver on regular operation on Stingray? Or is
this for some specific functionality? I am asking since the issue
reported here is on switching between a USB 3.0 and USB 2.0 devices.

Any plans on submitting this driver again?

Thanks,
JC


Re: [PATCH 6/9] platform: goldfish: pipe: Fail compilation if structs are too large

2018-08-13 Thread Joe Perches
On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote:
> From: Roman Kiryanov 
> 
> Since the driver provides no workaround prevent in cases if structs do
> no fit into a memory page, it is better to fail complation to find about
> the issue earlt instead of returning errors at runtime.

Minor earlt/early typo

> diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
> b/drivers/platform/goldfish/goldfish_pipe.c
[]
> @@ -797,9 +798,7 @@ static int goldfish_pipe_device_init(struct 
> platform_device *pdev)
>* needs to be contained in a single physical page. The easiest choice
>* is to just allocate a page and place the buffers in it.
>*/
> - if (WARN_ON(sizeof(*dev->buffers) > PAGE_SIZE))
> - return -ENOMEM;
> -
> + BUILD_BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE);
>   page = (char *)__get_free_page(GFP_KERNEL);
>   if (!page) {
>   kfree(dev->pipes);
> @@ -842,8 +841,7 @@ static int goldfish_pipe_probe(struct platform_device 
> *pdev)
>   struct resource *r;
>   struct goldfish_pipe_dev *dev = pipe_dev;
>  
> - if (WARN_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE))
> - return -ENOMEM;
> + BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE);
>  
>   /* not thread safe, but this should not happen */
>   WARN_ON(dev->base != NULL);

Why separate these BUILD_BUG_ONs into 2 different functions?

Why not just
BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE);
BUILD_BUG_ON(sizeof(struct goldfish_pipe_dev_buffers) > PAGE_SIZE);



Re: [PATCH 6/9] platform: goldfish: pipe: Fail compilation if structs are too large

2018-08-13 Thread Joe Perches
On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote:
> From: Roman Kiryanov 
> 
> Since the driver provides no workaround prevent in cases if structs do
> no fit into a memory page, it is better to fail complation to find about
> the issue earlt instead of returning errors at runtime.

Minor earlt/early typo

> diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
> b/drivers/platform/goldfish/goldfish_pipe.c
[]
> @@ -797,9 +798,7 @@ static int goldfish_pipe_device_init(struct 
> platform_device *pdev)
>* needs to be contained in a single physical page. The easiest choice
>* is to just allocate a page and place the buffers in it.
>*/
> - if (WARN_ON(sizeof(*dev->buffers) > PAGE_SIZE))
> - return -ENOMEM;
> -
> + BUILD_BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE);
>   page = (char *)__get_free_page(GFP_KERNEL);
>   if (!page) {
>   kfree(dev->pipes);
> @@ -842,8 +841,7 @@ static int goldfish_pipe_probe(struct platform_device 
> *pdev)
>   struct resource *r;
>   struct goldfish_pipe_dev *dev = pipe_dev;
>  
> - if (WARN_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE))
> - return -ENOMEM;
> + BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE);
>  
>   /* not thread safe, but this should not happen */
>   WARN_ON(dev->base != NULL);

Why separate these BUILD_BUG_ONs into 2 different functions?

Why not just
BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE);
BUILD_BUG_ON(sizeof(struct goldfish_pipe_dev_buffers) > PAGE_SIZE);



Re: [PATCH 1/2] perf tools: Make check-headers.sh check based on kernel dir

2018-08-13 Thread Michael Ellerman
Jiri Olsa  writes:
> diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
> index ea48aa6f8d19..9d466e853aec 100755
> --- a/tools/perf/check-headers.sh
> +++ b/tools/perf/check-headers.sh
> @@ -88,6 +88,8 @@ check () {
>  # differences.
>  test -d ../../include || exit 0
>  
> +pushd ../.. > /dev/null
> +
>  # simple diff check
>  for i in $HEADERS; do
>check $i -B

This breaks the build when sh is not bash:

  ./check-headers.sh: 91: ./check-headers.sh: pushd: not found
  ./check-headers.sh: 107: ./check-headers.sh: popd: not found
  Makefile.perf:205: recipe for target 'sub-make' failed

cheers


Re: [PATCH 1/2] perf tools: Make check-headers.sh check based on kernel dir

2018-08-13 Thread Michael Ellerman
Jiri Olsa  writes:
> diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
> index ea48aa6f8d19..9d466e853aec 100755
> --- a/tools/perf/check-headers.sh
> +++ b/tools/perf/check-headers.sh
> @@ -88,6 +88,8 @@ check () {
>  # differences.
>  test -d ../../include || exit 0
>  
> +pushd ../.. > /dev/null
> +
>  # simple diff check
>  for i in $HEADERS; do
>check $i -B

This breaks the build when sh is not bash:

  ./check-headers.sh: 91: ./check-headers.sh: pushd: not found
  ./check-headers.sh: 107: ./check-headers.sh: popd: not found
  Makefile.perf:205: recipe for target 'sub-make' failed

cheers


Re: [PATCH 1/9] platform: goldfish: pipe: Fix comments to fit 80 columns

2018-08-13 Thread Joe Perches
On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote:
> From: Roman Kiryanov 
> 
> Some comment lines are longer than 80 symbols.
[]
> diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
> b/drivers/platform/goldfish/goldfish_pipe.c
[]
> @@ -84,7 +84,10 @@ enum PipePollFlags {
>   PIPE_POLL_HUP   = 1 << 2
>  };
>  
> -/* Possible status values used to signal errors - see 
> goldfish_pipe_error_convert */
> +/*
> + * Possible status values used to signal errors - see
> + * goldfish_pipe_error_convert

If this is to be wrapped at all, and this really doesn't
need to be wrapped, it would probably be more sensible as:

/*
 * Possible status values used to signal errors
 * see: goldfish_pipe_error_convert
 */



Re: [PATCH 1/9] platform: goldfish: pipe: Fix comments to fit 80 columns

2018-08-13 Thread Joe Perches
On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote:
> From: Roman Kiryanov 
> 
> Some comment lines are longer than 80 symbols.
[]
> diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
> b/drivers/platform/goldfish/goldfish_pipe.c
[]
> @@ -84,7 +84,10 @@ enum PipePollFlags {
>   PIPE_POLL_HUP   = 1 << 2
>  };
>  
> -/* Possible status values used to signal errors - see 
> goldfish_pipe_error_convert */
> +/*
> + * Possible status values used to signal errors - see
> + * goldfish_pipe_error_convert

If this is to be wrapped at all, and this really doesn't
need to be wrapped, it would probably be more sensible as:

/*
 * Possible status values used to signal errors
 * see: goldfish_pipe_error_convert
 */



Re: [PATCH 8/9] platform: goldfish: pipe: Replace pr_ with dev_ for logging

2018-08-13 Thread Joe Perches
On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote:
> dev_ is preferred if struct device is available.
[]
> diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
> b/drivers/platform/goldfish/goldfish_pipe.c
[]
> @@ -384,6 +384,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp,
>   int count = 0, ret = -EINVAL;
>   unsigned long address, address_end, last_page;
>   unsigned int last_page_size;
> + struct device *pdev_dev;
>  
>   /* If the emulator already closed the pipe, no need to go further */
>   if (unlikely(test_bit(BIT_CLOSED_ON_HOST, >flags)))
> @@ -401,6 +402,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp,
>   last_page = (address_end - 1) & PAGE_MASK;
>   last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1;
>  
> + pdev_dev = pipe->dev->pdev_dev;
> +
>   while (address < address_end) {
>   s32 consumed_size;
>   int status;
> @@ -433,7 +436,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp,
>* err.
>*/
>   if (status != PIPE_ERROR_AGAIN)
> - pr_info_ratelimited("goldfish_pipe: backend 
> error %d on %s\n",
> + dev_err_ratelimited(pdev_dev,
> + "goldfish_pipe: backend error %d on 
> %s\n",
>   status, is_write ? "write" : "read");
>   break;
>   }

Wouldn't it be simpler to use pipe->dev->pdev_dev here instead
of creating and assigning a probably unused use-once pointer?

What does the output look like now?

Is the "pipe->dev->pdev_dev->name" then "goldfish_pipe: " output
useful?



Re: [PATCH 8/9] platform: goldfish: pipe: Replace pr_ with dev_ for logging

2018-08-13 Thread Joe Perches
On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote:
> dev_ is preferred if struct device is available.
[]
> diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
> b/drivers/platform/goldfish/goldfish_pipe.c
[]
> @@ -384,6 +384,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp,
>   int count = 0, ret = -EINVAL;
>   unsigned long address, address_end, last_page;
>   unsigned int last_page_size;
> + struct device *pdev_dev;
>  
>   /* If the emulator already closed the pipe, no need to go further */
>   if (unlikely(test_bit(BIT_CLOSED_ON_HOST, >flags)))
> @@ -401,6 +402,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp,
>   last_page = (address_end - 1) & PAGE_MASK;
>   last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1;
>  
> + pdev_dev = pipe->dev->pdev_dev;
> +
>   while (address < address_end) {
>   s32 consumed_size;
>   int status;
> @@ -433,7 +436,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp,
>* err.
>*/
>   if (status != PIPE_ERROR_AGAIN)
> - pr_info_ratelimited("goldfish_pipe: backend 
> error %d on %s\n",
> + dev_err_ratelimited(pdev_dev,
> + "goldfish_pipe: backend error %d on 
> %s\n",
>   status, is_write ? "write" : "read");
>   break;
>   }

Wouldn't it be simpler to use pipe->dev->pdev_dev here instead
of creating and assigning a probably unused use-once pointer?

What does the output look like now?

Is the "pipe->dev->pdev_dev->name" then "goldfish_pipe: " output
useful?



[PATCH] get_maintainer: Allow option --mpath to read all files in

2018-08-13 Thread Joe Perches
There is an external use case for multiple private MAINTAINER
style files in a separate directory.  Allow it.

--mpath has a default of "./MAINTAINERS".

The value entered can be either a file or a directory.

The behaviors are now:

--mpath   Read only the specific file as  file
--mpath  Read all files in  as  files
--mpath  --find-maintainer-files
Recurse through  and read all files named 
MAINTAINERS

Signed-off-by: Joe Perches 
---
 scripts/get_maintainer.pl | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 0ebdefe74d5b..c1c088ef1420 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -396,7 +396,12 @@ sub read_all_maintainer_files {
 
 if (-d $path) {
$path .= '/' if ($path !~ m@/$@);
-   if ($path eq "${lk_path}MAINTAINERS/") {
+   if ($find_maintainer_files) {
+   find( { wanted => \_is_maintainer_file,
+   preprocess => \_ignore_git,
+   no_chdir => 1,
+   }, "$path");
+   } else {
opendir(DIR, "$path") or die $!;
my @files = readdir(DIR);
closedir(DIR);
@@ -404,12 +409,6 @@ sub read_all_maintainer_files {
push(@mfiles, "$path$file") if ($file !~ /^\./);
}
}
-   if ($find_maintainer_files) {
-   find( { wanted => \_is_maintainer_file,
-   preprocess => \_ignore_git,
-   no_chdir => 1,
-   }, "$path");
-   }
 } elsif (-f "$path") {
push(@mfiles, "$path");
 } else {



[PATCH] get_maintainer: Allow option --mpath to read all files in

2018-08-13 Thread Joe Perches
There is an external use case for multiple private MAINTAINER
style files in a separate directory.  Allow it.

--mpath has a default of "./MAINTAINERS".

The value entered can be either a file or a directory.

The behaviors are now:

--mpath   Read only the specific file as  file
--mpath  Read all files in  as  files
--mpath  --find-maintainer-files
Recurse through  and read all files named 
MAINTAINERS

Signed-off-by: Joe Perches 
---
 scripts/get_maintainer.pl | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 0ebdefe74d5b..c1c088ef1420 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -396,7 +396,12 @@ sub read_all_maintainer_files {
 
 if (-d $path) {
$path .= '/' if ($path !~ m@/$@);
-   if ($path eq "${lk_path}MAINTAINERS/") {
+   if ($find_maintainer_files) {
+   find( { wanted => \_is_maintainer_file,
+   preprocess => \_ignore_git,
+   no_chdir => 1,
+   }, "$path");
+   } else {
opendir(DIR, "$path") or die $!;
my @files = readdir(DIR);
closedir(DIR);
@@ -404,12 +409,6 @@ sub read_all_maintainer_files {
push(@mfiles, "$path$file") if ($file !~ /^\./);
}
}
-   if ($find_maintainer_files) {
-   find( { wanted => \_is_maintainer_file,
-   preprocess => \_ignore_git,
-   no_chdir => 1,
-   }, "$path");
-   }
 } elsif (-f "$path") {
push(@mfiles, "$path");
 } else {



Re: [PATCH] zsmalloc: fix linking bug in init_zspage

2018-08-13 Thread Sergey Senozhatsky
Hi Minchan,

On (08/14/18 09:24), Minchan Kim wrote:
> > Any thoughts?
>
> If we want a refactoring, I'm not against but description said it tiggered
> BUG_ON on zs_map_object rarely. That means it should be stable material
> and need more description to understand. Please be more specific with
> some example.

I don't have any BUG_ON on hands. Would be great if zhouxianrong could
post some backtraces or more info/explanation.

> The reason I'm hesitating is zsmalloc moves ZS_FULL group
> when the zspage->inuse is equal to class->objs_per_zspage so I thought
> it shouldn't allocate last partial object.

Maybe, allocating last partial object does look a bit hacky - it's not a
valid object anyway, but I'm not suggesting that we need to change it.
Let's hear from zhouxianrong.

-ss


Re: [PATCH] zsmalloc: fix linking bug in init_zspage

2018-08-13 Thread Sergey Senozhatsky
Hi Minchan,

On (08/14/18 09:24), Minchan Kim wrote:
> > Any thoughts?
>
> If we want a refactoring, I'm not against but description said it tiggered
> BUG_ON on zs_map_object rarely. That means it should be stable material
> and need more description to understand. Please be more specific with
> some example.

I don't have any BUG_ON on hands. Would be great if zhouxianrong could
post some backtraces or more info/explanation.

> The reason I'm hesitating is zsmalloc moves ZS_FULL group
> when the zspage->inuse is equal to class->objs_per_zspage so I thought
> it shouldn't allocate last partial object.

Maybe, allocating last partial object does look a bit hacky - it's not a
valid object anyway, but I'm not suggesting that we need to change it.
Let's hear from zhouxianrong.

-ss


Re: [PATCH] fsi: ast: select GENERIC_ALLOCATOR

2018-08-13 Thread Benjamin Herrenschmidt
On Tue, 2018-08-14 at 00:37 +0200, Arnd Bergmann wrote:
> In randconfig builds without CONFIG_GENERIC_ALLOCATOR, this driver
> fails to link:
> 
> ERROR: "gen_pool_alloc_algo" [drivers/fsi/fsi-master-ast-cf.ko] undefined!
> ERROR: "gen_pool_fixed_alloc" [drivers/fsi/fsi-master-ast-cf.ko] undefined!
> ERROR: "of_gen_pool_get" [drivers/fsi/fsi-master-ast-cf.ko] undefined!
> ERROR: "gen_pool_free" [drivers/fsi/fsi-master-ast-cf.ko] undefined!
> 
> Select the dependency as all other users do.
> 
> Fixes: 6a794a27daca ("fsi: master-ast-cf: Add new FSI master using Aspeed 
> ColdFire")
> Signed-off-by: Arnd Bergmann 
> ---

Thanks, I'll pick it up and send to Greg with a slightly different
subject

Cheers,
Ben.

>  drivers/fsi/Kconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig
> index af3a20dd5aa4..99c99a5d57fe 100644
> --- a/drivers/fsi/Kconfig
> +++ b/drivers/fsi/Kconfig
> @@ -46,6 +46,7 @@ config FSI_MASTER_AST_CF
>   tristate "FSI master based on Aspeed ColdFire coprocessor"
>   depends on GPIOLIB
>   depends on GPIO_ASPEED
> + select GENERIC_ALLOCATOR
>   ---help---
>   This option enables a FSI master using the AST2400 and AST2500 GPIO
>   lines driven by the internal ColdFire coprocessor. This requires



Re: [PATCH] fsi: ast: select GENERIC_ALLOCATOR

2018-08-13 Thread Benjamin Herrenschmidt
On Tue, 2018-08-14 at 00:37 +0200, Arnd Bergmann wrote:
> In randconfig builds without CONFIG_GENERIC_ALLOCATOR, this driver
> fails to link:
> 
> ERROR: "gen_pool_alloc_algo" [drivers/fsi/fsi-master-ast-cf.ko] undefined!
> ERROR: "gen_pool_fixed_alloc" [drivers/fsi/fsi-master-ast-cf.ko] undefined!
> ERROR: "of_gen_pool_get" [drivers/fsi/fsi-master-ast-cf.ko] undefined!
> ERROR: "gen_pool_free" [drivers/fsi/fsi-master-ast-cf.ko] undefined!
> 
> Select the dependency as all other users do.
> 
> Fixes: 6a794a27daca ("fsi: master-ast-cf: Add new FSI master using Aspeed 
> ColdFire")
> Signed-off-by: Arnd Bergmann 
> ---

Thanks, I'll pick it up and send to Greg with a slightly different
subject

Cheers,
Ben.

>  drivers/fsi/Kconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig
> index af3a20dd5aa4..99c99a5d57fe 100644
> --- a/drivers/fsi/Kconfig
> +++ b/drivers/fsi/Kconfig
> @@ -46,6 +46,7 @@ config FSI_MASTER_AST_CF
>   tristate "FSI master based on Aspeed ColdFire coprocessor"
>   depends on GPIOLIB
>   depends on GPIO_ASPEED
> + select GENERIC_ALLOCATOR
>   ---help---
>   This option enables a FSI master using the AST2400 and AST2500 GPIO
>   lines driven by the internal ColdFire coprocessor. This requires



[PATCH v2] mm: migration: fix migration of huge PMD shared pages

2018-08-13 Thread Mike Kravetz
The page migration code employs try_to_unmap() to try and unmap the
source page.  This is accomplished by using rmap_walk to find all
vmas where the page is mapped.  This search stops when page mapcount
is zero.  For shared PMD huge pages, the page map count is always 1
no matter the number of mappings.  Shared mappings are tracked via
the reference count of the PMD page.  Therefore, try_to_unmap stops
prematurely and does not completely unmap all mappings of the source
page.

This problem can result is data corruption as writes to the original
source page can happen after contents of the page are copied to the
target page.  Hence, data is lost.

This problem was originally seen as DB corruption of shared global
areas after a huge page was soft offlined due to ECC memory errors.
DB developers noticed they could reproduce the issue by (hotplug)
offlining memory used to back huge pages.  A simple testcase can
reproduce the problem by creating a shared PMD mapping (note that
this must be at least PUD_SIZE in size and PUD_SIZE aligned (1GB on
x86)), and using migrate_pages() to migrate process pages between
nodes while continually writing to the huge pages being migrated.

To fix, have the try_to_unmap_one routine check for huge PMD sharing
by calling huge_pmd_unshare for hugetlbfs huge pages.  If it is a
shared mapping it will be 'unshared' which removes the page table
entry and drops the reference on the PMD page.  After this, flush
caches and TLB.

Fixes: 39dde65c9940 ("shared page table for hugetlb page")
Signed-off-by: Mike Kravetz 
---
v2: Fixed build issue for !CONFIG_HUGETLB_PAGE and typos in comment

 include/linux/hugetlb.h |  6 ++
 mm/rmap.c   | 21 +
 2 files changed, 27 insertions(+)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 36fa6a2a82e3..7524663028ec 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -170,6 +170,12 @@ static inline unsigned long hugetlb_total_pages(void)
return 0;
 }
 
+static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
+   pte_t *ptep)
+{
+   return 0;
+}
+
 #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n)({ BUG(); 0; })
 #define follow_huge_addr(mm, addr, write)  ERR_PTR(-EINVAL)
 #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
diff --git a/mm/rmap.c b/mm/rmap.c
index 09a799c9aebd..cf2340adad10 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1409,6 +1409,27 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
address = pvmw.address;
 
+   /*
+* PMDs for hugetlbfs pages could be shared.  In this case,
+* pages with shared PMDs will have a mapcount of 1 no matter
+* how many times they are actually mapped.  Map counting for
+* PMD sharing is mostly done via the reference count on the
+* PMD page itself.  If the page we are trying to unmap is a
+* hugetlbfs page, attempt to 'unshare' at the PMD level.
+* huge_pmd_unshare clears the PUD and adjusts reference
+* counting on the PMD page which effectively unmaps the page.
+* Take care of flushing cache and TLB for page in this
+* specific mapping here.
+*/
+   if (PageHuge(page) &&
+   huge_pmd_unshare(mm, , pvmw.pte)) {
+   unsigned long end_add = address + vma_mmu_pagesize(vma);
+
+   flush_cache_range(vma, address, end_add);
+   flush_tlb_range(vma, address, end_add);
+   mmu_notifier_invalidate_range(mm, address, end_add);
+   continue;
+   }
 
if (IS_ENABLED(CONFIG_MIGRATION) &&
(flags & TTU_MIGRATION) &&
-- 
2.17.1



[PATCH v2] mm: migration: fix migration of huge PMD shared pages

2018-08-13 Thread Mike Kravetz
The page migration code employs try_to_unmap() to try and unmap the
source page.  This is accomplished by using rmap_walk to find all
vmas where the page is mapped.  This search stops when page mapcount
is zero.  For shared PMD huge pages, the page map count is always 1
no matter the number of mappings.  Shared mappings are tracked via
the reference count of the PMD page.  Therefore, try_to_unmap stops
prematurely and does not completely unmap all mappings of the source
page.

This problem can result is data corruption as writes to the original
source page can happen after contents of the page are copied to the
target page.  Hence, data is lost.

This problem was originally seen as DB corruption of shared global
areas after a huge page was soft offlined due to ECC memory errors.
DB developers noticed they could reproduce the issue by (hotplug)
offlining memory used to back huge pages.  A simple testcase can
reproduce the problem by creating a shared PMD mapping (note that
this must be at least PUD_SIZE in size and PUD_SIZE aligned (1GB on
x86)), and using migrate_pages() to migrate process pages between
nodes while continually writing to the huge pages being migrated.

To fix, have the try_to_unmap_one routine check for huge PMD sharing
by calling huge_pmd_unshare for hugetlbfs huge pages.  If it is a
shared mapping it will be 'unshared' which removes the page table
entry and drops the reference on the PMD page.  After this, flush
caches and TLB.

Fixes: 39dde65c9940 ("shared page table for hugetlb page")
Signed-off-by: Mike Kravetz 
---
v2: Fixed build issue for !CONFIG_HUGETLB_PAGE and typos in comment

 include/linux/hugetlb.h |  6 ++
 mm/rmap.c   | 21 +
 2 files changed, 27 insertions(+)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 36fa6a2a82e3..7524663028ec 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -170,6 +170,12 @@ static inline unsigned long hugetlb_total_pages(void)
return 0;
 }
 
+static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
+   pte_t *ptep)
+{
+   return 0;
+}
+
 #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n)({ BUG(); 0; })
 #define follow_huge_addr(mm, addr, write)  ERR_PTR(-EINVAL)
 #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
diff --git a/mm/rmap.c b/mm/rmap.c
index 09a799c9aebd..cf2340adad10 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1409,6 +1409,27 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
address = pvmw.address;
 
+   /*
+* PMDs for hugetlbfs pages could be shared.  In this case,
+* pages with shared PMDs will have a mapcount of 1 no matter
+* how many times they are actually mapped.  Map counting for
+* PMD sharing is mostly done via the reference count on the
+* PMD page itself.  If the page we are trying to unmap is a
+* hugetlbfs page, attempt to 'unshare' at the PMD level.
+* huge_pmd_unshare clears the PUD and adjusts reference
+* counting on the PMD page which effectively unmaps the page.
+* Take care of flushing cache and TLB for page in this
+* specific mapping here.
+*/
+   if (PageHuge(page) &&
+   huge_pmd_unshare(mm, , pvmw.pte)) {
+   unsigned long end_add = address + vma_mmu_pagesize(vma);
+
+   flush_cache_range(vma, address, end_add);
+   flush_tlb_range(vma, address, end_add);
+   mmu_notifier_invalidate_range(mm, address, end_add);
+   continue;
+   }
 
if (IS_ENABLED(CONFIG_MIGRATION) &&
(flags & TTU_MIGRATION) &&
-- 
2.17.1



[PATCH] android: binder: no outgoing transaction when thread todo has transaction

2018-08-13 Thread Sherry Yang
When a process dies, failed reply is sent to the sender of any transaction
queued on a dead thread's todo list. The sender asserts that the
received failed reply corresponds to the head of the transaction stack.
This assert can fail if the dead thread is allowed to send outgoing
transactions when there is already a transaction on its todo list,
because this new transaction can end up on the transaction stack of the
original sender. The following steps illustrate how this assertion can
fail.

1. Thread1 sends txn19 to Thread2
   (T1->transaction_stack=txn19, T2->todo+=txn19)
2. Without processing todo list, Thread2 sends txn20 to Thread1
   (T1->todo+=txn20, T2->transaction_stack=txn20)
3. T1 processes txn20 on its todo list
   (T1->transaction_stack=txn20->txn19, T1->todo=)
4. T2 dies, T2->todo cleanup attempts to send failed reply for txn19, but
   T1->transaction_stack points to txn20 -- assertion failes

Step 2. is the incorrect behavior. When there is a transaction on a
thread's todo list, this thread should not be able to send any outgoing
synchronous transactions. Only the head of the todo list needs to be
checked because only threads that are waiting for proc work can directly
receive work from another thread, and no work is allowed to be queued
on such a thread without waking up the thread. This patch also enforces
that a thread is not waiting for proc work when a work is directly
enqueued to its todo list.

Acked-by: Arve Hjønnevåg 
Signed-off-by: Sherry Yang 
---
 drivers/android/binder.c | 44 +---
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d58763b6b009..f2081934eb8b 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -822,6 +822,7 @@ static void
 binder_enqueue_deferred_thread_work_ilocked(struct binder_thread *thread,
struct binder_work *work)
 {
+   WARN_ON(!list_empty(>waiting_thread_node));
binder_enqueue_work_ilocked(work, >todo);
 }
 
@@ -839,6 +840,7 @@ static void
 binder_enqueue_thread_work_ilocked(struct binder_thread *thread,
   struct binder_work *work)
 {
+   WARN_ON(!list_empty(>waiting_thread_node));
binder_enqueue_work_ilocked(work, >todo);
thread->process_todo = true;
 }
@@ -1270,19 +1272,12 @@ static int binder_inc_node_nilocked(struct binder_node 
*node, int strong,
} else
node->local_strong_refs++;
if (!node->has_strong_ref && target_list) {
+   struct binder_thread *thread = container_of(target_list,
+   struct binder_thread, todo);
binder_dequeue_work_ilocked(>work);
-   /*
-* Note: this function is the only place where we queue
-* directly to a thread->todo without using the
-* corresponding binder_enqueue_thread_work() helper
-* functions; in this case it's ok to not set the
-* process_todo flag, since we know this node work will
-* always be followed by other work that starts queue
-* processing: in case of synchronous transactions, a
-* BR_REPLY or BR_ERROR; in case of oneway
-* transactions, a BR_TRANSACTION_COMPLETE.
-*/
-   binder_enqueue_work_ilocked(>work, target_list);
+   BUG_ON(>todo != target_list);
+   binder_enqueue_deferred_thread_work_ilocked(thread,
+  >work);
}
} else {
if (!internal)
@@ -2723,6 +2718,7 @@ static void binder_transaction(struct binder_proc *proc,
 {
int ret;
struct binder_transaction *t;
+   struct binder_work *w;
struct binder_work *tcomplete;
binder_size_t *offp, *off_end, *off_start;
binder_size_t off_min;
@@ -2864,6 +2860,29 @@ static void binder_transaction(struct binder_proc *proc,
goto err_invalid_target_handle;
}
binder_inner_proc_lock(proc);
+
+   w = list_first_entry_or_null(>todo,
+struct binder_work, entry);
+   if (!(tr->flags & TF_ONE_WAY) && w &&
+   w->type == BINDER_WORK_TRANSACTION) {
+   /*
+* Do not allow new outgoing transaction from a
+* thread that has a transaction at the head of
+* its todo list. Only need to check the head
+* because binder_select_thread_ilocked picks a
+* thread from 

[PATCH] android: binder: no outgoing transaction when thread todo has transaction

2018-08-13 Thread Sherry Yang
When a process dies, failed reply is sent to the sender of any transaction
queued on a dead thread's todo list. The sender asserts that the
received failed reply corresponds to the head of the transaction stack.
This assert can fail if the dead thread is allowed to send outgoing
transactions when there is already a transaction on its todo list,
because this new transaction can end up on the transaction stack of the
original sender. The following steps illustrate how this assertion can
fail.

1. Thread1 sends txn19 to Thread2
   (T1->transaction_stack=txn19, T2->todo+=txn19)
2. Without processing todo list, Thread2 sends txn20 to Thread1
   (T1->todo+=txn20, T2->transaction_stack=txn20)
3. T1 processes txn20 on its todo list
   (T1->transaction_stack=txn20->txn19, T1->todo=)
4. T2 dies, T2->todo cleanup attempts to send failed reply for txn19, but
   T1->transaction_stack points to txn20 -- assertion failes

Step 2. is the incorrect behavior. When there is a transaction on a
thread's todo list, this thread should not be able to send any outgoing
synchronous transactions. Only the head of the todo list needs to be
checked because only threads that are waiting for proc work can directly
receive work from another thread, and no work is allowed to be queued
on such a thread without waking up the thread. This patch also enforces
that a thread is not waiting for proc work when a work is directly
enqueued to its todo list.

Acked-by: Arve Hjønnevåg 
Signed-off-by: Sherry Yang 
---
 drivers/android/binder.c | 44 +---
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d58763b6b009..f2081934eb8b 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -822,6 +822,7 @@ static void
 binder_enqueue_deferred_thread_work_ilocked(struct binder_thread *thread,
struct binder_work *work)
 {
+   WARN_ON(!list_empty(>waiting_thread_node));
binder_enqueue_work_ilocked(work, >todo);
 }
 
@@ -839,6 +840,7 @@ static void
 binder_enqueue_thread_work_ilocked(struct binder_thread *thread,
   struct binder_work *work)
 {
+   WARN_ON(!list_empty(>waiting_thread_node));
binder_enqueue_work_ilocked(work, >todo);
thread->process_todo = true;
 }
@@ -1270,19 +1272,12 @@ static int binder_inc_node_nilocked(struct binder_node 
*node, int strong,
} else
node->local_strong_refs++;
if (!node->has_strong_ref && target_list) {
+   struct binder_thread *thread = container_of(target_list,
+   struct binder_thread, todo);
binder_dequeue_work_ilocked(>work);
-   /*
-* Note: this function is the only place where we queue
-* directly to a thread->todo without using the
-* corresponding binder_enqueue_thread_work() helper
-* functions; in this case it's ok to not set the
-* process_todo flag, since we know this node work will
-* always be followed by other work that starts queue
-* processing: in case of synchronous transactions, a
-* BR_REPLY or BR_ERROR; in case of oneway
-* transactions, a BR_TRANSACTION_COMPLETE.
-*/
-   binder_enqueue_work_ilocked(>work, target_list);
+   BUG_ON(>todo != target_list);
+   binder_enqueue_deferred_thread_work_ilocked(thread,
+  >work);
}
} else {
if (!internal)
@@ -2723,6 +2718,7 @@ static void binder_transaction(struct binder_proc *proc,
 {
int ret;
struct binder_transaction *t;
+   struct binder_work *w;
struct binder_work *tcomplete;
binder_size_t *offp, *off_end, *off_start;
binder_size_t off_min;
@@ -2864,6 +2860,29 @@ static void binder_transaction(struct binder_proc *proc,
goto err_invalid_target_handle;
}
binder_inner_proc_lock(proc);
+
+   w = list_first_entry_or_null(>todo,
+struct binder_work, entry);
+   if (!(tr->flags & TF_ONE_WAY) && w &&
+   w->type == BINDER_WORK_TRANSACTION) {
+   /*
+* Do not allow new outgoing transaction from a
+* thread that has a transaction at the head of
+* its todo list. Only need to check the head
+* because binder_select_thread_ilocked picks a
+* thread from 

Re: [PATCH] zsmalloc: fix linking bug in init_zspage

2018-08-13 Thread Minchan Kim
Hi Sergey,

On Mon, Aug 13, 2018 at 07:55:36PM +0900, Sergey Senozhatsky wrote:
> On (08/13/18 15:05), Minchan Kim wrote:
> > > From: zhouxianrong 
> > > 
> > > The last partial object in last subpage of zspage should not be linked
> > > in allocation list. Otherwise it could trigger BUG_ON explicitly at
> > > function zs_map_object. But it happened rarely.
> > 
> > Could you be more specific? What case did you see the problem?
> > Is it a real problem or one founded by review?
> [..]
> > > Signed-off-by: zhouxianrong 
> > > ---
> > >  mm/zsmalloc.c | 2 ++
> > >  1 file changed, 2 insertions(+)
> > > 
> > > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> > > index 8d87e973a4f5..24dd8da0aa59 100644
> > > --- a/mm/zsmalloc.c
> > > +++ b/mm/zsmalloc.c
> > > @@ -1040,6 +1040,8 @@ static void init_zspage(struct size_class *class, 
> > > struct zspage *zspage)
> > >* Reset OBJ_TAG_BITS bit to last link to tell
> > >* whether it's allocated object or not.
> > >*/
> > > + if (off > PAGE_SIZE)
> > > + link -= class->size / sizeof(*link);
> > >   link->next = -1UL << OBJ_TAG_BITS;
> > >   }
> > >   kunmap_atomic(vaddr);
> 
> Hmm. This can be a real issue. Unless I'm missing something.
> 
> So... I might be wrong, but the way I see the bug report is:
> 
> When we link objects during zspage init, we do the following:
> 
>   while ((off += class->size) < PAGE_SIZE) {
>   link->next = freeobj++ << OBJ_TAG_BITS;
>   link += class->size / sizeof(*link);
>   }
> 
> Note that we increment the link first, link += class->size / sizeof(*link),
> and check for the offset only afterwards. So by the time we break out of
> the while-loop the link *might* point to the partial object which starts at
> the last page of zspage, but *never* ends, because we don't have next_page
> in current zspage. So that's why that object should not be linked in,
> because it's not a valid allocates object - we simply don't have space
> for it anymore.
> 
> zspage [  page 1 ][  page 2  ]
> ...link
>  [..###]
> 
> therefore the last object must be "link - 1" for such cases.
> 
> I think, the following change can also do the trick:
> 
>   while ((off + class->size) < PAGE_SIZE) {
>   link->next = freeobj++ << OBJ_TAG_BITS;
>   link += class->size / sizeof(*link);
>   off += class->size;
>   }
> 
> Once again, I might be wrong on this.
> Any thoughts?

If we want a refactoring, I'm not against but description said it tiggered
BUG_ON on zs_map_object rarely. That means it should be stable material
and need more description to understand. Please be more specific with
some example. The reason I'm hesitating is zsmalloc moves ZS_FULL group
when the zspage->inuse is equal to class->objs_per_zspage so I thought
it shouldn't allocate last partial object.

Thanks.


Re: [PATCH] zsmalloc: fix linking bug in init_zspage

2018-08-13 Thread Minchan Kim
Hi Sergey,

On Mon, Aug 13, 2018 at 07:55:36PM +0900, Sergey Senozhatsky wrote:
> On (08/13/18 15:05), Minchan Kim wrote:
> > > From: zhouxianrong 
> > > 
> > > The last partial object in last subpage of zspage should not be linked
> > > in allocation list. Otherwise it could trigger BUG_ON explicitly at
> > > function zs_map_object. But it happened rarely.
> > 
> > Could you be more specific? What case did you see the problem?
> > Is it a real problem or one founded by review?
> [..]
> > > Signed-off-by: zhouxianrong 
> > > ---
> > >  mm/zsmalloc.c | 2 ++
> > >  1 file changed, 2 insertions(+)
> > > 
> > > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> > > index 8d87e973a4f5..24dd8da0aa59 100644
> > > --- a/mm/zsmalloc.c
> > > +++ b/mm/zsmalloc.c
> > > @@ -1040,6 +1040,8 @@ static void init_zspage(struct size_class *class, 
> > > struct zspage *zspage)
> > >* Reset OBJ_TAG_BITS bit to last link to tell
> > >* whether it's allocated object or not.
> > >*/
> > > + if (off > PAGE_SIZE)
> > > + link -= class->size / sizeof(*link);
> > >   link->next = -1UL << OBJ_TAG_BITS;
> > >   }
> > >   kunmap_atomic(vaddr);
> 
> Hmm. This can be a real issue. Unless I'm missing something.
> 
> So... I might be wrong, but the way I see the bug report is:
> 
> When we link objects during zspage init, we do the following:
> 
>   while ((off += class->size) < PAGE_SIZE) {
>   link->next = freeobj++ << OBJ_TAG_BITS;
>   link += class->size / sizeof(*link);
>   }
> 
> Note that we increment the link first, link += class->size / sizeof(*link),
> and check for the offset only afterwards. So by the time we break out of
> the while-loop the link *might* point to the partial object which starts at
> the last page of zspage, but *never* ends, because we don't have next_page
> in current zspage. So that's why that object should not be linked in,
> because it's not a valid allocates object - we simply don't have space
> for it anymore.
> 
> zspage [  page 1 ][  page 2  ]
> ...link
>  [..###]
> 
> therefore the last object must be "link - 1" for such cases.
> 
> I think, the following change can also do the trick:
> 
>   while ((off + class->size) < PAGE_SIZE) {
>   link->next = freeobj++ << OBJ_TAG_BITS;
>   link += class->size / sizeof(*link);
>   off += class->size;
>   }
> 
> Once again, I might be wrong on this.
> Any thoughts?

If we want a refactoring, I'm not against but description said it tiggered
BUG_ON on zs_map_object rarely. That means it should be stable material
and need more description to understand. Please be more specific with
some example. The reason I'm hesitating is zsmalloc moves ZS_FULL group
when the zspage->inuse is equal to class->objs_per_zspage so I thought
it shouldn't allocate last partial object.

Thanks.


Re: [PATCH v8 1/6] Uprobes: Simplify uprobe_register() body

2018-08-13 Thread Steven Rostedt
On Mon, 13 Aug 2018 01:56:12 -0700
Srikar Dronamraju  wrote:

> * Ravi Bangoria  [2018-08-09 09:48:51]:
> 
> > Simplify uprobe_register() function body and let __uprobe_register()
> > handle everything. Also move dependency functions around to fix build
> > failures.
> >   
> 
> One nit:
> s/to fix build/failures/to avoid build failures/

I pulled in this patch with the above update to the change log.

> 
> 
> 
> > Signed-off-by: Ravi Bangoria 
> > ---  
> 
> 
> Acked-by: Srikar Dronamraju 

Thanks, I added your ack and Song's Reviewed-by tags.

-- Steve


Re: [PATCH v8 1/6] Uprobes: Simplify uprobe_register() body

2018-08-13 Thread Steven Rostedt
On Mon, 13 Aug 2018 01:56:12 -0700
Srikar Dronamraju  wrote:

> * Ravi Bangoria  [2018-08-09 09:48:51]:
> 
> > Simplify uprobe_register() function body and let __uprobe_register()
> > handle everything. Also move dependency functions around to fix build
> > failures.
> >   
> 
> One nit:
> s/to fix build/failures/to avoid build failures/

I pulled in this patch with the above update to the change log.

> 
> 
> 
> > Signed-off-by: Ravi Bangoria 
> > ---  
> 
> 
> Acked-by: Srikar Dronamraju 

Thanks, I added your ack and Song's Reviewed-by tags.

-- Steve


Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)

2018-08-13 Thread Steven Rostedt
On Mon, 13 Aug 2018 20:01:41 -0400
Steven Rostedt  wrote:

> On Fri, 10 Aug 2018 23:14:01 -0700
> Song Liu  wrote:
> 
> > On Fri, Aug 10, 2018 at 12:58 PM, Steven Rostedt  
> > wrote:  
> > > On Thu, 9 Aug 2018 16:38:28 +0200
> > > Oleg Nesterov  wrote:
> > >
> > >> I need to read this (hopefully final) version carefully. I'll try to do
> > >> this before next Monday.
> > >>
> > >
> > > Monday may be the opening of the merge window (more likely Sunday). Do
> > > you think this is good enough for pushing it in this late in the game,
> > > or do you think we should wait another cycle?
> > >
> > > -- Steve
> > 
> > We (Facebook) have use cases in production that would benefit from this 
> > work, so
> > I would rather see this gets in sooner than later. After a brief
> > review (I will more
> > careful review before Monday), I think this set is not likely to break
> > existing uprobes
> > (those w/o ref_ctr). Therefore, I think it is safe to put it in this cycle.
> >   
> 
> It's still going under review, and the merge window is now open. I'd
> prefer that this waits till the next merge window.
> 
> 

But this said, patch 1&2 look simple enough to include this merge
window. I'll pull them in and start testing.

-- Steve


Re: [RFC] vruntime updated incorrectly when rt_mutex boots prio?

2018-08-13 Thread Steve Muckle

On 08/07/2018 10:40 AM, 'Todd Kjos' via kernel-team wrote:

This issue was discovered on a 4.9-based android device, but the
relevant mainline code appears to be the same. The symptom is that
over time the some workloads become sluggish resulting in missed
frames or sluggishness. It appears to be the same issue described in
http://lists.infradead.org/pipermail/linux-arm-kernel/2018-March/567836.html.

Here is the scenario: A task is deactivated while still in the fair
class. The task is then boosted to RT, so rt_mutex_setprio() is
called. This changes the task to RT and calls check_class_changed(),
which eventually calls detach_task_cfs_rq(), which is where
vruntime_normalized() sees that the task's state is TASK_WAKING, which
results in skipping the subtraction of the rq's min_vruntime from the
task's vruntime. Later, when the prio is deboosted and the task is
moved back to the fair class, the fair rq's min_vruntime is added to
the task's vruntime, resulting in vruntime inflation.


This was reproduced for me on tip of mainline by using the program at 
the end of this mail. It was run in a 2 CPU virtualbox VM. Relevant 
annotated bits of the trace:


low-prio thread vruntime is 752ms
 pi-vruntime-tes-598   [001] d...   520.572459: sched_stat_runtime: 
comm=pi-vruntime-tes pid=598 runtime=29953 [ns] vruntime=752888705 [ns]


low-prio thread waits on a_sem
 pi-vruntime-tes-598   [001] d...   520.572465: sched_switch: 
prev_comm=pi-vruntime-tes prev_pid=598 prev_prio=120 prev_state=D ==> 
next_comm=swapper/1 next_pid=0 next_prio=120


high prio thread finishes wakeup, then sleeps for 1ms
  -0 [000] dNh.   520.572483: sched_wakeup: 
comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000
  -0 [000] d...   520.572486: sched_switch: 
prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=S ==> 
next_comm=pi-vruntime-tes next_pid=597 next_prio=19
 pi-vruntime-tes-597   [000] d...   520.572498: sched_switch: 
prev_comm=pi-vruntime-tes prev_pid=597 prev_prio=19 prev_state=D ==> 
next_comm=swapper/0 next_pid=0 next_prio=120


high prio thread wakes up after 1ms sleep, posts a_sem which starts to 
wake low-prio thread, then tries to grab pi_mutex, which low-prio thread has
  -0 [000] d.h.   520.573876: sched_waking: 
comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000
  -0 [000] dNh.   520.573879: sched_wakeup: 
comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000
  -0 [000] d...   520.573887: sched_switch: 
prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=S ==> 
next_comm=pi-vruntime-tes next_pid=597 next_prio=19
 pi-vruntime-tes-597   [000] d...   520.573895: sched_waking: 
comm=pi-vruntime-tes pid=598 prio=120 target_cpu=001


low-prio thread pid 598 gets pi_mutex priority inheritance, this happens 
while low-prio thread is in waking state
 pi-vruntime-tes-597   [000] d...   520.573911: sched_pi_setprio: 
comm=pi-vruntime-tes pid=598 oldprio=120 newprio=19


high-prio thread sleeps on pi_mutex
 pi-vruntime-tes-597   [000] d...   520.573919: sched_switch: 
prev_comm=pi-vruntime-tes prev_pid=597 prev_prio=19 prev_state=D ==> 
next_comm=swapper/0 next_pid=0 next_prio=120


low-prio thread finishes wakeup
  -0 [001] dNh.   520.573932: sched_wakeup: 
comm=pi-vruntime-tes pid=598 prio=19 target_cpu=001
  -0 [001] d...   520.573936: sched_switch: 
prev_comm=swapper/1 prev_pid=0 prev_prio=120 prev_state=S ==> 
next_comm=pi-vruntime-tes next_pid=598 next_prio=19


low-prio thread releases pi-mutex, loses pi boost, high-prio thread 
wakes for pi-mutex
 pi-vruntime-tes-598   [001] d...   520.573946: sched_pi_setprio: 
comm=pi-vruntime-tes pid=598 oldprio=19 newprio=120
 pi-vruntime-tes-598   [001] dN..   520.573954: sched_waking: 
comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000


low-prio thread vruntime is 1505ms
 pi-vruntime-tes-598   [001] dN..   520.573966: sched_stat_runtime: 
comm=pi-vruntime-tes pid=598 runtime=20150 [ns] vruntime=1505797560 [ns]


The program:

/*
 * Test case for vruntime management during rtmutex priority inheritance
 * promotion and demotion.
 *
 * build with -lpthread
 */

#define _GNU_SOURCE
#include 
#include 
#include 
#include 
#include 

#define ERROR_CHECK(x) \
if (x) \
fprintf(stderr, "Error at line %d", __LINE__);

pthread_mutex_t pi_mutex;
sem_t a_sem;
sem_t b_sem;

void *rt_thread_func(void *arg) {
int policy;
int i = 0;
cpu_set_t cpuset;

CPU_ZERO();
CPU_SET(0, );
ERROR_CHECK(pthread_setaffinity_np(pthread_self(), 
sizeof(cpu_set_t),

   ));

while(i++ < 100) {
sem_wait(_sem);
usleep(1000);
sem_post(_sem);
pthread_mutex_lock(_mutex);
pthread_mutex_unlock(_mutex);
}
}

void *low_prio_thread_func(void *arg) {
int i = 0;
cpu_set_t cpuset;

CPU_ZERO();

Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)

2018-08-13 Thread Steven Rostedt
On Mon, 13 Aug 2018 20:01:41 -0400
Steven Rostedt  wrote:

> On Fri, 10 Aug 2018 23:14:01 -0700
> Song Liu  wrote:
> 
> > On Fri, Aug 10, 2018 at 12:58 PM, Steven Rostedt  
> > wrote:  
> > > On Thu, 9 Aug 2018 16:38:28 +0200
> > > Oleg Nesterov  wrote:
> > >
> > >> I need to read this (hopefully final) version carefully. I'll try to do
> > >> this before next Monday.
> > >>
> > >
> > > Monday may be the opening of the merge window (more likely Sunday). Do
> > > you think this is good enough for pushing it in this late in the game,
> > > or do you think we should wait another cycle?
> > >
> > > -- Steve
> > 
> > We (Facebook) have use cases in production that would benefit from this 
> > work, so
> > I would rather see this gets in sooner than later. After a brief
> > review (I will more
> > careful review before Monday), I think this set is not likely to break
> > existing uprobes
> > (those w/o ref_ctr). Therefore, I think it is safe to put it in this cycle.
> >   
> 
> It's still going under review, and the merge window is now open. I'd
> prefer that this waits till the next merge window.
> 
> 

But this said, patch 1&2 look simple enough to include this merge
window. I'll pull them in and start testing.

-- Steve


Re: [RFC] vruntime updated incorrectly when rt_mutex boots prio?

2018-08-13 Thread Steve Muckle

On 08/07/2018 10:40 AM, 'Todd Kjos' via kernel-team wrote:

This issue was discovered on a 4.9-based android device, but the
relevant mainline code appears to be the same. The symptom is that
over time the some workloads become sluggish resulting in missed
frames or sluggishness. It appears to be the same issue described in
http://lists.infradead.org/pipermail/linux-arm-kernel/2018-March/567836.html.

Here is the scenario: A task is deactivated while still in the fair
class. The task is then boosted to RT, so rt_mutex_setprio() is
called. This changes the task to RT and calls check_class_changed(),
which eventually calls detach_task_cfs_rq(), which is where
vruntime_normalized() sees that the task's state is TASK_WAKING, which
results in skipping the subtraction of the rq's min_vruntime from the
task's vruntime. Later, when the prio is deboosted and the task is
moved back to the fair class, the fair rq's min_vruntime is added to
the task's vruntime, resulting in vruntime inflation.


This was reproduced for me on tip of mainline by using the program at 
the end of this mail. It was run in a 2 CPU virtualbox VM. Relevant 
annotated bits of the trace:


low-prio thread vruntime is 752ms
 pi-vruntime-tes-598   [001] d...   520.572459: sched_stat_runtime: 
comm=pi-vruntime-tes pid=598 runtime=29953 [ns] vruntime=752888705 [ns]


low-prio thread waits on a_sem
 pi-vruntime-tes-598   [001] d...   520.572465: sched_switch: 
prev_comm=pi-vruntime-tes prev_pid=598 prev_prio=120 prev_state=D ==> 
next_comm=swapper/1 next_pid=0 next_prio=120


high prio thread finishes wakeup, then sleeps for 1ms
  -0 [000] dNh.   520.572483: sched_wakeup: 
comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000
  -0 [000] d...   520.572486: sched_switch: 
prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=S ==> 
next_comm=pi-vruntime-tes next_pid=597 next_prio=19
 pi-vruntime-tes-597   [000] d...   520.572498: sched_switch: 
prev_comm=pi-vruntime-tes prev_pid=597 prev_prio=19 prev_state=D ==> 
next_comm=swapper/0 next_pid=0 next_prio=120


high prio thread wakes up after 1ms sleep, posts a_sem which starts to 
wake low-prio thread, then tries to grab pi_mutex, which low-prio thread has
  -0 [000] d.h.   520.573876: sched_waking: 
comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000
  -0 [000] dNh.   520.573879: sched_wakeup: 
comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000
  -0 [000] d...   520.573887: sched_switch: 
prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=S ==> 
next_comm=pi-vruntime-tes next_pid=597 next_prio=19
 pi-vruntime-tes-597   [000] d...   520.573895: sched_waking: 
comm=pi-vruntime-tes pid=598 prio=120 target_cpu=001


low-prio thread pid 598 gets pi_mutex priority inheritance, this happens 
while low-prio thread is in waking state
 pi-vruntime-tes-597   [000] d...   520.573911: sched_pi_setprio: 
comm=pi-vruntime-tes pid=598 oldprio=120 newprio=19


high-prio thread sleeps on pi_mutex
 pi-vruntime-tes-597   [000] d...   520.573919: sched_switch: 
prev_comm=pi-vruntime-tes prev_pid=597 prev_prio=19 prev_state=D ==> 
next_comm=swapper/0 next_pid=0 next_prio=120


low-prio thread finishes wakeup
  -0 [001] dNh.   520.573932: sched_wakeup: 
comm=pi-vruntime-tes pid=598 prio=19 target_cpu=001
  -0 [001] d...   520.573936: sched_switch: 
prev_comm=swapper/1 prev_pid=0 prev_prio=120 prev_state=S ==> 
next_comm=pi-vruntime-tes next_pid=598 next_prio=19


low-prio thread releases pi-mutex, loses pi boost, high-prio thread 
wakes for pi-mutex
 pi-vruntime-tes-598   [001] d...   520.573946: sched_pi_setprio: 
comm=pi-vruntime-tes pid=598 oldprio=19 newprio=120
 pi-vruntime-tes-598   [001] dN..   520.573954: sched_waking: 
comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000


low-prio thread vruntime is 1505ms
 pi-vruntime-tes-598   [001] dN..   520.573966: sched_stat_runtime: 
comm=pi-vruntime-tes pid=598 runtime=20150 [ns] vruntime=1505797560 [ns]


The program:

/*
 * Test case for vruntime management during rtmutex priority inheritance
 * promotion and demotion.
 *
 * build with -lpthread
 */

#define _GNU_SOURCE
#include 
#include 
#include 
#include 
#include 

#define ERROR_CHECK(x) \
if (x) \
fprintf(stderr, "Error at line %d", __LINE__);

pthread_mutex_t pi_mutex;
sem_t a_sem;
sem_t b_sem;

void *rt_thread_func(void *arg) {
int policy;
int i = 0;
cpu_set_t cpuset;

CPU_ZERO();
CPU_SET(0, );
ERROR_CHECK(pthread_setaffinity_np(pthread_self(), 
sizeof(cpu_set_t),

   ));

while(i++ < 100) {
sem_wait(_sem);
usleep(1000);
sem_post(_sem);
pthread_mutex_lock(_mutex);
pthread_mutex_unlock(_mutex);
}
}

void *low_prio_thread_func(void *arg) {
int i = 0;
cpu_set_t cpuset;

CPU_ZERO();

Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)

2018-08-13 Thread Steven Rostedt
On Mon, 13 Aug 2018 13:50:19 +0200
Oleg Nesterov  wrote:

> On 08/13, Ravi Bangoria wrote:
> >
> > On 08/11/2018 01:27 PM, Song Liu wrote:  
> > >> +
> > >> +static void delayed_uprobe_delete(struct delayed_uprobe *du)
> > >> +{
> > >> +   if (!du)
> > >> +   return;  
> > > Do we really need this check?  
> >
> > Not necessary though, but I would still like to keep it for a safety.  
> 
> Heh. I tried to ignore all minor problems in this version, but now that Song
> mentioned this unnecessary check...
> 
> Personally I really dislike the checks like this one.
> 
>   - It can confuse the reader who will try to understand the purpose
> 
>   - it can hide a bug if delayed_uprobe_delete(du) is actually called
> with du == NULL.
> 
> IMO, you should either remove it and let the kernel crash (to notice the
> problem), or turn it into
> 
>   if (WARN_ON(!du))
>   return;

I'd prefer the more robust WARN_ON(!du) above instead of removing it.

-- Steve



Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)

2018-08-13 Thread Steven Rostedt
On Mon, 13 Aug 2018 13:50:19 +0200
Oleg Nesterov  wrote:

> On 08/13, Ravi Bangoria wrote:
> >
> > On 08/11/2018 01:27 PM, Song Liu wrote:  
> > >> +
> > >> +static void delayed_uprobe_delete(struct delayed_uprobe *du)
> > >> +{
> > >> +   if (!du)
> > >> +   return;  
> > > Do we really need this check?  
> >
> > Not necessary though, but I would still like to keep it for a safety.  
> 
> Heh. I tried to ignore all minor problems in this version, but now that Song
> mentioned this unnecessary check...
> 
> Personally I really dislike the checks like this one.
> 
>   - It can confuse the reader who will try to understand the purpose
> 
>   - it can hide a bug if delayed_uprobe_delete(du) is actually called
> with du == NULL.
> 
> IMO, you should either remove it and let the kernel crash (to notice the
> problem), or turn it into
> 
>   if (WARN_ON(!du))
>   return;

I'd prefer the more robust WARN_ON(!du) above instead of removing it.

-- Steve



Re: [PATCH] mm, slub: restore the original intention of prefetch_freepointer()

2018-08-13 Thread Kees Cook
On Thu, Aug 9, 2018 at 1:52 AM, Vlastimil Babka  wrote:
> In SLUB, prefetch_freepointer() is used when allocating an object from cache's
> freelist, to make sure the next object in the list is cache-hot, since it's
> probable it will be allocated soon.
>
> Commit 2482ddec670f ("mm: add SLUB free list pointer obfuscation") has
> unintentionally changed the prefetch in a way where the prefetch is turned to 
> a
> real fetch, and only the next->next pointer is prefetched. In case there is 
> not
> a stream of allocations that would benefit from prefetching, the extra real
> fetch might add a useless cache miss to the allocation. Restore the previous
> behavior.
>
> Signed-off-by: Vlastimil Babka 
> Cc: Kees Cook 
> Cc: Daniel Micay 
> Cc: Eric Dumazet 
> Cc: Christoph Lameter 
> Cc: Pekka Enberg 
> Cc: David Rientjes 
> Cc: Joonsoo Kim 
> ---
> While I don't expect this to be causing the bug at hand, it's worth fixing.
> For the bug it might mean that the page fault moves elsewhere.
>
>  mm/slub.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index 51258eff4178..ce2b9e5cea77 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -271,8 +271,7 @@ static inline void *get_freepointer(struct kmem_cache *s, 
> void *object)
>
>  static void prefetch_freepointer(const struct kmem_cache *s, void *object)
>  {
> -   if (object)
> -   prefetch(freelist_dereference(s, object + s->offset));
> +   prefetch(object + s->offset);

Ah -- gotcha. I think I misunderstood the purpose here. You're not
prefetching what is being pointed at, you're literally prefetching
what is stored there. That wouldn't require dereferencing the freelist
pointer, no.

Thanks!

Acked-by: Kees Cook 

>  }
>
>  static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
> --
> 2.18.0
>



-- 
Kees Cook
Pixel Security


Re: [PATCH] mm, slub: restore the original intention of prefetch_freepointer()

2018-08-13 Thread Kees Cook
On Thu, Aug 9, 2018 at 1:52 AM, Vlastimil Babka  wrote:
> In SLUB, prefetch_freepointer() is used when allocating an object from cache's
> freelist, to make sure the next object in the list is cache-hot, since it's
> probable it will be allocated soon.
>
> Commit 2482ddec670f ("mm: add SLUB free list pointer obfuscation") has
> unintentionally changed the prefetch in a way where the prefetch is turned to 
> a
> real fetch, and only the next->next pointer is prefetched. In case there is 
> not
> a stream of allocations that would benefit from prefetching, the extra real
> fetch might add a useless cache miss to the allocation. Restore the previous
> behavior.
>
> Signed-off-by: Vlastimil Babka 
> Cc: Kees Cook 
> Cc: Daniel Micay 
> Cc: Eric Dumazet 
> Cc: Christoph Lameter 
> Cc: Pekka Enberg 
> Cc: David Rientjes 
> Cc: Joonsoo Kim 
> ---
> While I don't expect this to be causing the bug at hand, it's worth fixing.
> For the bug it might mean that the page fault moves elsewhere.
>
>  mm/slub.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index 51258eff4178..ce2b9e5cea77 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -271,8 +271,7 @@ static inline void *get_freepointer(struct kmem_cache *s, 
> void *object)
>
>  static void prefetch_freepointer(const struct kmem_cache *s, void *object)
>  {
> -   if (object)
> -   prefetch(freelist_dereference(s, object + s->offset));
> +   prefetch(object + s->offset);

Ah -- gotcha. I think I misunderstood the purpose here. You're not
prefetching what is being pointed at, you're literally prefetching
what is stored there. That wouldn't require dereferencing the freelist
pointer, no.

Thanks!

Acked-by: Kees Cook 

>  }
>
>  static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
> --
> 2.18.0
>



-- 
Kees Cook
Pixel Security


Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)

2018-08-13 Thread Steven Rostedt
On Fri, 10 Aug 2018 23:14:01 -0700
Song Liu  wrote:

> On Fri, Aug 10, 2018 at 12:58 PM, Steven Rostedt  wrote:
> > On Thu, 9 Aug 2018 16:38:28 +0200
> > Oleg Nesterov  wrote:
> >  
> >> I need to read this (hopefully final) version carefully. I'll try to do
> >> this before next Monday.
> >>  
> >
> > Monday may be the opening of the merge window (more likely Sunday). Do
> > you think this is good enough for pushing it in this late in the game,
> > or do you think we should wait another cycle?
> >
> > -- Steve  
> 
> We (Facebook) have use cases in production that would benefit from this work, 
> so
> I would rather see this gets in sooner than later. After a brief
> review (I will more
> careful review before Monday), I think this set is not likely to break
> existing uprobes
> (those w/o ref_ctr). Therefore, I think it is safe to put it in this cycle.
> 

It's still going under review, and the merge window is now open. I'd
prefer that this waits till the next merge window.

-- Steve


Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)

2018-08-13 Thread Steven Rostedt
On Fri, 10 Aug 2018 23:14:01 -0700
Song Liu  wrote:

> On Fri, Aug 10, 2018 at 12:58 PM, Steven Rostedt  wrote:
> > On Thu, 9 Aug 2018 16:38:28 +0200
> > Oleg Nesterov  wrote:
> >  
> >> I need to read this (hopefully final) version carefully. I'll try to do
> >> this before next Monday.
> >>  
> >
> > Monday may be the opening of the merge window (more likely Sunday). Do
> > you think this is good enough for pushing it in this late in the game,
> > or do you think we should wait another cycle?
> >
> > -- Steve  
> 
> We (Facebook) have use cases in production that would benefit from this work, 
> so
> I would rather see this gets in sooner than later. After a brief
> review (I will more
> careful review before Monday), I think this set is not likely to break
> existing uprobes
> (those w/o ref_ctr). Therefore, I think it is safe to put it in this cycle.
> 

It's still going under review, and the merge window is now open. I'd
prefer that this waits till the next merge window.

-- Steve


Re: [PATCH] bitfield: avoid gcc-8 -Wint-in-bool-context warning

2018-08-13 Thread Masahiro Yamada
2018-08-14 7:09 GMT+09:00 Arnd Bergmann :
> Passing an enum into FIELD_GET() produces a long but harmless warning on
> newer compilers:
>
>  from include/linux/linkage.h:7,
>  from include/linux/kernel.h:7,
>  from include/linux/skbuff.h:17,
>  from include/linux/if_ether.h:23,
>  from include/linux/etherdevice.h:25,
>  from drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:63:
> drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c: In function 
> 'iwl_mvm_rx_mpdu_mq':
> include/linux/bitfield.h:56:20: error: enum constant in boolean context 
> [-Werror=int-in-bool-context]
>BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero"); \
> ^
> ...
> include/linux/bitfield.h:103:3: note: in expansion of macro '__BF_FIELD_CHECK'
>__BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \
>^~~~
> drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:1025:21: note: in expansion of 
> macro 'FIELD_GET'
> le16_encode_bits(FIELD_GET(IWL_RX_HE_PHY_SIBG_SYM_OR_USER_NUM_MASK,


How about fixing the root cause
in drivers/net/wireless/intel/iwlwifi/fw/api/rx.h ?


#define IWL_RX_HE_PHY_SIBG_SYM_OR_USER_NUM_MASK   0x1eULL


enum iwl_rx_he_phy looks really strange.



Passing enum to FIELD_GET is odd,
so I prefer keeping this warned.



> The problem here is that the caller has no idea how the macro gets
> expanding, leading to a false-positive. It can be trivially avoided
> by doing a comparison against zero.
>
> This only recently started appearing as the iwlwifi driver was patched
> to use FIELD_GET.
>
> Fixes: 514c30696fbc ("iwlwifi: add support for IEEE802.11ax")
> Signed-off-by: Arnd Bergmann 
> ---
>  include/linux/bitfield.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
> index 65a6981eef7b..3f1ef4450a7c 100644
> --- a/include/linux/bitfield.h
> +++ b/include/linux/bitfield.h
> @@ -53,7 +53,7 @@
> ({  \
> BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask),  \
>  _pfx "mask is not constant");  \
> -   BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero");\
> +   BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero");\
> BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ?   \
>  ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
>  _pfx "value too large for the field"); \
> --
> 2.18.0
>



-- 
Best Regards
Masahiro Yamada


Re: [PATCH] bitfield: avoid gcc-8 -Wint-in-bool-context warning

2018-08-13 Thread Masahiro Yamada
2018-08-14 7:09 GMT+09:00 Arnd Bergmann :
> Passing an enum into FIELD_GET() produces a long but harmless warning on
> newer compilers:
>
>  from include/linux/linkage.h:7,
>  from include/linux/kernel.h:7,
>  from include/linux/skbuff.h:17,
>  from include/linux/if_ether.h:23,
>  from include/linux/etherdevice.h:25,
>  from drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:63:
> drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c: In function 
> 'iwl_mvm_rx_mpdu_mq':
> include/linux/bitfield.h:56:20: error: enum constant in boolean context 
> [-Werror=int-in-bool-context]
>BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero"); \
> ^
> ...
> include/linux/bitfield.h:103:3: note: in expansion of macro '__BF_FIELD_CHECK'
>__BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \
>^~~~
> drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:1025:21: note: in expansion of 
> macro 'FIELD_GET'
> le16_encode_bits(FIELD_GET(IWL_RX_HE_PHY_SIBG_SYM_OR_USER_NUM_MASK,


How about fixing the root cause
in drivers/net/wireless/intel/iwlwifi/fw/api/rx.h ?


#define IWL_RX_HE_PHY_SIBG_SYM_OR_USER_NUM_MASK   0x1eULL


enum iwl_rx_he_phy looks really strange.



Passing enum to FIELD_GET is odd,
so I prefer keeping this warned.



> The problem here is that the caller has no idea how the macro gets
> expanding, leading to a false-positive. It can be trivially avoided
> by doing a comparison against zero.
>
> This only recently started appearing as the iwlwifi driver was patched
> to use FIELD_GET.
>
> Fixes: 514c30696fbc ("iwlwifi: add support for IEEE802.11ax")
> Signed-off-by: Arnd Bergmann 
> ---
>  include/linux/bitfield.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
> index 65a6981eef7b..3f1ef4450a7c 100644
> --- a/include/linux/bitfield.h
> +++ b/include/linux/bitfield.h
> @@ -53,7 +53,7 @@
> ({  \
> BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask),  \
>  _pfx "mask is not constant");  \
> -   BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero");\
> +   BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero");\
> BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ?   \
>  ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
>  _pfx "value too large for the field"); \
> --
> 2.18.0
>



-- 
Best Regards
Masahiro Yamada


Re: [PATCH v2 1/3] pinctrl: msm: Really mask level interrupts to prevent latching

2018-08-13 Thread Doug Anderson
Hi,

On Wed, Jul 25, 2018 at 3:28 PM, Stephen Boyd  wrote:
> The interrupt controller hardware in this pin controller has two status
> enable bits. The first "normal" status enable bit enables or disables
> the summary interrupt line being raised when a gpio interrupt triggers
> and the "raw" status enable bit allows or prevents the hardware from
> latching an interrupt into the status register for a gpio interrupt.
> Currently we just toggle the "normal" status enable bit in the mask and
> unmask ops so that the summary irq interrupt going to the CPU's
> interrupt controller doesn't trigger for the masked gpio interrupt.
>
> For a level triggered interrupt, the flow would be as follows: the pin
> controller sees the interrupt, latches the status into the status
> register, raises the summary irq to the CPU, summary irq handler runs
> and calls handle_level_irq(), handle_level_irq() masks and acks the gpio
> interrupt, the interrupt handler runs, and finally unmask the interrupt.
> When the interrupt handler completes, we expect that the interrupt line
> level will go back to the deasserted state so the genirq code can unmask
> the interrupt without it triggering again.
>
> If we only mask the interrupt by clearing the "normal" status enable bit
> then we'll ack the interrupt but it will continue to show up as pending
> in the status register because the raw status bit is enabled, the
> hardware hasn't deasserted the line, and thus the asserted state latches
> into the status register again. When the hardware deasserts the
> interrupt the pin controller still thinks there is a pending unserviced
> level interrupt because it latched it earlier. This behavior causes
> software to see an extra interrupt for level type interrupts each time
> the interrupt is handled.
>
> Let's fix this by clearing the raw status enable bit for level type
> interrupts so that the hardware stops latching the status of the
> interrupt after we ack it. We don't do this for edge type interrupts
> because it seems that toggling the raw status enable bit for edge type
> interrupts causes spurious edge interrupts.
>
> Cc: Bjorn Andersson 
> Cc: Doug Anderson 
> Signed-off-by: Stephen Boyd 
> ---
>
> Changes from v1:
>  - Squashed raw_status_bit write into same write on unmask (Doug
>Andersson)
>
>  drivers/pinctrl/qcom/pinctrl-msm.c | 14 ++
>  1 file changed, 14 insertions(+)
>
> diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c 
> b/drivers/pinctrl/qcom/pinctrl-msm.c
> index 2155a30c282b..3970dc599092 100644
> --- a/drivers/pinctrl/qcom/pinctrl-msm.c
> +++ b/drivers/pinctrl/qcom/pinctrl-msm.c
> @@ -634,6 +634,19 @@ static void msm_gpio_irq_mask(struct irq_data *d)
> raw_spin_lock_irqsave(>lock, flags);
>
> val = readl(pctrl->regs + g->intr_cfg_reg);
> +   /*
> +* Leaving the RAW_STATUS_EN bit enabled causes level interrupts that
> +* are still asserted to re-latch after we ack them. Clear the raw
> +* status enable bit too so the interrupt can't even latch into the
> +* hardware while it's masked, but only do this for level interrupts
> +* because edge interrupts have a problem with the raw status bit
> +* toggling and causing spurious interrupts.

This whole "spurious interrupts" explanation still seems dodgy.  Have
you experienced it yourself, or is this looking through some previous
commits?  As per my comments in v1, I'd still rather the comment state
the reason as: it's important to _not_ lose edge interrupts when
masked.


> +*/
> +   if (irqd_get_trigger_type(d) & IRQ_TYPE_LEVEL_MASK) {
> +   val &= ~BIT(g->intr_raw_status_bit);
> +   writel(val, pctrl->regs + g->intr_cfg_reg);
> +   }

In v1 you claimed you were going to combine this with the next write
(you said you'd combine things in both mask and unmask).  ...is there
a reason why you didn't?  As per my comments in v1 I believe it's
safer from a correctness point of view to combine them.


-Doug


Re: [PATCH v2 1/3] pinctrl: msm: Really mask level interrupts to prevent latching

2018-08-13 Thread Doug Anderson
Hi,

On Wed, Jul 25, 2018 at 3:28 PM, Stephen Boyd  wrote:
> The interrupt controller hardware in this pin controller has two status
> enable bits. The first "normal" status enable bit enables or disables
> the summary interrupt line being raised when a gpio interrupt triggers
> and the "raw" status enable bit allows or prevents the hardware from
> latching an interrupt into the status register for a gpio interrupt.
> Currently we just toggle the "normal" status enable bit in the mask and
> unmask ops so that the summary irq interrupt going to the CPU's
> interrupt controller doesn't trigger for the masked gpio interrupt.
>
> For a level triggered interrupt, the flow would be as follows: the pin
> controller sees the interrupt, latches the status into the status
> register, raises the summary irq to the CPU, summary irq handler runs
> and calls handle_level_irq(), handle_level_irq() masks and acks the gpio
> interrupt, the interrupt handler runs, and finally unmask the interrupt.
> When the interrupt handler completes, we expect that the interrupt line
> level will go back to the deasserted state so the genirq code can unmask
> the interrupt without it triggering again.
>
> If we only mask the interrupt by clearing the "normal" status enable bit
> then we'll ack the interrupt but it will continue to show up as pending
> in the status register because the raw status bit is enabled, the
> hardware hasn't deasserted the line, and thus the asserted state latches
> into the status register again. When the hardware deasserts the
> interrupt the pin controller still thinks there is a pending unserviced
> level interrupt because it latched it earlier. This behavior causes
> software to see an extra interrupt for level type interrupts each time
> the interrupt is handled.
>
> Let's fix this by clearing the raw status enable bit for level type
> interrupts so that the hardware stops latching the status of the
> interrupt after we ack it. We don't do this for edge type interrupts
> because it seems that toggling the raw status enable bit for edge type
> interrupts causes spurious edge interrupts.
>
> Cc: Bjorn Andersson 
> Cc: Doug Anderson 
> Signed-off-by: Stephen Boyd 
> ---
>
> Changes from v1:
>  - Squashed raw_status_bit write into same write on unmask (Doug
>Andersson)
>
>  drivers/pinctrl/qcom/pinctrl-msm.c | 14 ++
>  1 file changed, 14 insertions(+)
>
> diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c 
> b/drivers/pinctrl/qcom/pinctrl-msm.c
> index 2155a30c282b..3970dc599092 100644
> --- a/drivers/pinctrl/qcom/pinctrl-msm.c
> +++ b/drivers/pinctrl/qcom/pinctrl-msm.c
> @@ -634,6 +634,19 @@ static void msm_gpio_irq_mask(struct irq_data *d)
> raw_spin_lock_irqsave(>lock, flags);
>
> val = readl(pctrl->regs + g->intr_cfg_reg);
> +   /*
> +* Leaving the RAW_STATUS_EN bit enabled causes level interrupts that
> +* are still asserted to re-latch after we ack them. Clear the raw
> +* status enable bit too so the interrupt can't even latch into the
> +* hardware while it's masked, but only do this for level interrupts
> +* because edge interrupts have a problem with the raw status bit
> +* toggling and causing spurious interrupts.

This whole "spurious interrupts" explanation still seems dodgy.  Have
you experienced it yourself, or is this looking through some previous
commits?  As per my comments in v1, I'd still rather the comment state
the reason as: it's important to _not_ lose edge interrupts when
masked.


> +*/
> +   if (irqd_get_trigger_type(d) & IRQ_TYPE_LEVEL_MASK) {
> +   val &= ~BIT(g->intr_raw_status_bit);
> +   writel(val, pctrl->regs + g->intr_cfg_reg);
> +   }

In v1 you claimed you were going to combine this with the next write
(you said you'd combine things in both mask and unmask).  ...is there
a reason why you didn't?  As per my comments in v1 I believe it's
safer from a correctness point of view to combine them.


-Doug


[PATCH 9/9] platform: goldfish: pipe: Add a blank line to separate struct members

2018-08-13 Thread rkir
From: Roman Kiryanov 

To improve readability and to be consistent with other
struct members.

Signed-off-by: Roman Kiryanov 
---
 drivers/platform/goldfish/goldfish_pipe.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
b/drivers/platform/goldfish/goldfish_pipe.c
index 3f8e440d62ae..d0621da2c334 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -157,6 +157,7 @@ struct goldfish_pipe {
 
/* A wake queue for sleeping until host signals an event */
wait_queue_head_t wake_queue;
+
/* Pointer to the parent goldfish_pipe_dev instance */
struct goldfish_pipe_dev *dev;
 };
-- 
2.18.0.865.gffc8e1a3cd6-goog



[PATCH 9/9] platform: goldfish: pipe: Add a blank line to separate struct members

2018-08-13 Thread rkir
From: Roman Kiryanov 

To improve readability and to be consistent with other
struct members.

Signed-off-by: Roman Kiryanov 
---
 drivers/platform/goldfish/goldfish_pipe.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
b/drivers/platform/goldfish/goldfish_pipe.c
index 3f8e440d62ae..d0621da2c334 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -157,6 +157,7 @@ struct goldfish_pipe {
 
/* A wake queue for sleeping until host signals an event */
wait_queue_head_t wake_queue;
+
/* Pointer to the parent goldfish_pipe_dev instance */
struct goldfish_pipe_dev *dev;
 };
-- 
2.18.0.865.gffc8e1a3cd6-goog



[PATCH 4/9] platform: goldfish: pipe: Separate the host interface to a separate header

2018-08-13 Thread rkir
From: Roman Kiryanov 

These are several enums that must kept in sync with the host side.
This change explicitly separates them into a dedicated header file.

Signed-off-by: Roman Kiryanov 
---
 drivers/platform/goldfish/goldfish_pipe.c |  69 +--
 .../platform/goldfish/goldfish_pipe_qemu.h| 112 ++
 2 files changed, 113 insertions(+), 68 deletions(-)
 create mode 100644 drivers/platform/goldfish/goldfish_pipe_qemu.h

diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
b/drivers/platform/goldfish/goldfish_pipe.c
index 0607897c6a59..8f9580454154 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -63,6 +63,7 @@
 #include 
 #include 
 #include 
+#include "goldfish_pipe_qemu.h"
 
 /*
  * Update this when something changes in the driver's behavior so the host
@@ -73,74 +74,6 @@ enum {
PIPE_CURRENT_DEVICE_VERSION = 2
 };
 
-/*
- * IMPORTANT: The following constants must match the ones used and defined
- * in external/qemu/hw/goldfish_pipe.c in the Android source tree.
- */
-
-/* List of bitflags returned in status of CMD_POLL command */
-enum PipePollFlags {
-   PIPE_POLL_IN= 1 << 0,
-   PIPE_POLL_OUT   = 1 << 1,
-   PIPE_POLL_HUP   = 1 << 2
-};
-
-/*
- * Possible status values used to signal errors - see
- * goldfish_pipe_error_convert
- */
-enum PipeErrors {
-   PIPE_ERROR_INVAL  = -1,
-   PIPE_ERROR_AGAIN  = -2,
-   PIPE_ERROR_NOMEM  = -3,
-   PIPE_ERROR_IO = -4
-};
-
-/* Bit-flags used to signal events from the emulator */
-enum PipeWakeFlags {
-   PIPE_WAKE_CLOSED = 1 << 0,  /* emulator closed pipe */
-   PIPE_WAKE_READ   = 1 << 1,  /* pipe can now be read from */
-   PIPE_WAKE_WRITE  = 1 << 2  /* pipe can now be written to */
-};
-
-/* Bit flags for the 'flags' field */
-enum PipeFlagsBits {
-   BIT_CLOSED_ON_HOST = 0,  /* pipe closed by host */
-   BIT_WAKE_ON_WRITE  = 1,  /* want to be woken on writes */
-   BIT_WAKE_ON_READ   = 2,  /* want to be woken on reads */
-};
-
-enum PipeRegs {
-   PIPE_REG_CMD = 0,
-
-   PIPE_REG_SIGNAL_BUFFER_HIGH = 4,
-   PIPE_REG_SIGNAL_BUFFER = 8,
-   PIPE_REG_SIGNAL_BUFFER_COUNT = 12,
-
-   PIPE_REG_OPEN_BUFFER_HIGH = 20,
-   PIPE_REG_OPEN_BUFFER = 24,
-
-   PIPE_REG_VERSION = 36,
-
-   PIPE_REG_GET_SIGNALLED = 48,
-};
-
-enum PipeCmdCode {
-   PIPE_CMD_OPEN = 1,  /* to be used by the pipe device itself */
-   PIPE_CMD_CLOSE,
-   PIPE_CMD_POLL,
-   PIPE_CMD_WRITE,
-   PIPE_CMD_WAKE_ON_WRITE,
-   PIPE_CMD_READ,
-   PIPE_CMD_WAKE_ON_READ,
-
-   /*
-* TODO(zyy): implement a deferred read/write execution to allow
-* parallel processing of pipe operations on the host.
-*/
-   PIPE_CMD_WAKE_ON_DONE_IO,
-};
-
 enum {
MAX_BUFFERS_PER_COMMAND = 336,
MAX_SIGNALLED_PIPES = 64,
diff --git a/drivers/platform/goldfish/goldfish_pipe_qemu.h 
b/drivers/platform/goldfish/goldfish_pipe_qemu.h
new file mode 100644
index ..51f3d9f82af6
--- /dev/null
+++ b/drivers/platform/goldfish/goldfish_pipe_qemu.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/*
+ * IMPORTANT: The following constants must match the ones used and defined in
+ * external/qemu/include/hw/misc/goldfish_pipe.h
+ */
+
+#ifndef ANDROID_PIPE_COMMON_H
+#define ANDROID_PIPE_COMMON_H
+
+/* List of bitflags returned in status of CMD_POLL command */
+enum PipePollFlags {
+   PIPE_POLL_IN= 1 << 0,
+   PIPE_POLL_OUT   = 1 << 1,
+   PIPE_POLL_HUP   = 1 << 2
+};
+
+/* Possible status values used to signal errors */
+enum PipeErrors {
+   PIPE_ERROR_INVAL= -1,
+   PIPE_ERROR_AGAIN= -2,
+   PIPE_ERROR_NOMEM= -3,
+   PIPE_ERROR_IO   = -4
+};
+
+/* Bit-flags used to signal events from the emulator */
+enum PipeWakeFlags {
+   /* emulator closed pipe */
+   PIPE_WAKE_CLOSED= 1 << 0,
+
+   /* pipe can now be read from */
+   PIPE_WAKE_READ  = 1 << 1,
+
+   /* pipe can now be written to */
+   PIPE_WAKE_WRITE = 1 << 2,
+
+   /* unlock this pipe's DMA buffer */
+   PIPE_WAKE_UNLOCK_DMA= 1 << 3,
+
+   /* unlock DMA buffer of the pipe shared to this pipe */
+   PIPE_WAKE_UNLOCK_DMA_SHARED = 1 << 4,
+};
+
+/* Possible pipe closing reasons */
+enum PipeCloseReason {
+   /* guest sent a 

[PATCH 5/9] platform: goldfish: pipe: Update the comment for GFP_ATOMIC

2018-08-13 Thread rkir
From: Roman Kiryanov 

Provide an explanation why GFP_ATOMIC is needed to prevent changing it to
other values.

Signed-off-by: Roman Kiryanov 
---
 drivers/platform/goldfish/goldfish_pipe.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
b/drivers/platform/goldfish/goldfish_pipe.c
index 8f9580454154..a4db4d12b09c 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -639,7 +639,10 @@ static int get_free_pipe_id_locked(struct 
goldfish_pipe_dev *dev)
return id;
 
{
-   /* Reallocate the array */
+   /* Reallocate the array.
+* Since get_free_pipe_id_locked runs with interrupts disabled,
+* we don't want to make calls that could lead to sleep.
+*/
u32 new_capacity = 2 * dev->pipes_capacity;
struct goldfish_pipe **pipes =
kcalloc(new_capacity, sizeof(*pipes), GFP_ATOMIC);
-- 
2.18.0.865.gffc8e1a3cd6-goog



[PATCH 4/9] platform: goldfish: pipe: Separate the host interface to a separate header

2018-08-13 Thread rkir
From: Roman Kiryanov 

These are several enums that must kept in sync with the host side.
This change explicitly separates them into a dedicated header file.

Signed-off-by: Roman Kiryanov 
---
 drivers/platform/goldfish/goldfish_pipe.c |  69 +--
 .../platform/goldfish/goldfish_pipe_qemu.h| 112 ++
 2 files changed, 113 insertions(+), 68 deletions(-)
 create mode 100644 drivers/platform/goldfish/goldfish_pipe_qemu.h

diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
b/drivers/platform/goldfish/goldfish_pipe.c
index 0607897c6a59..8f9580454154 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -63,6 +63,7 @@
 #include 
 #include 
 #include 
+#include "goldfish_pipe_qemu.h"
 
 /*
  * Update this when something changes in the driver's behavior so the host
@@ -73,74 +74,6 @@ enum {
PIPE_CURRENT_DEVICE_VERSION = 2
 };
 
-/*
- * IMPORTANT: The following constants must match the ones used and defined
- * in external/qemu/hw/goldfish_pipe.c in the Android source tree.
- */
-
-/* List of bitflags returned in status of CMD_POLL command */
-enum PipePollFlags {
-   PIPE_POLL_IN= 1 << 0,
-   PIPE_POLL_OUT   = 1 << 1,
-   PIPE_POLL_HUP   = 1 << 2
-};
-
-/*
- * Possible status values used to signal errors - see
- * goldfish_pipe_error_convert
- */
-enum PipeErrors {
-   PIPE_ERROR_INVAL  = -1,
-   PIPE_ERROR_AGAIN  = -2,
-   PIPE_ERROR_NOMEM  = -3,
-   PIPE_ERROR_IO = -4
-};
-
-/* Bit-flags used to signal events from the emulator */
-enum PipeWakeFlags {
-   PIPE_WAKE_CLOSED = 1 << 0,  /* emulator closed pipe */
-   PIPE_WAKE_READ   = 1 << 1,  /* pipe can now be read from */
-   PIPE_WAKE_WRITE  = 1 << 2  /* pipe can now be written to */
-};
-
-/* Bit flags for the 'flags' field */
-enum PipeFlagsBits {
-   BIT_CLOSED_ON_HOST = 0,  /* pipe closed by host */
-   BIT_WAKE_ON_WRITE  = 1,  /* want to be woken on writes */
-   BIT_WAKE_ON_READ   = 2,  /* want to be woken on reads */
-};
-
-enum PipeRegs {
-   PIPE_REG_CMD = 0,
-
-   PIPE_REG_SIGNAL_BUFFER_HIGH = 4,
-   PIPE_REG_SIGNAL_BUFFER = 8,
-   PIPE_REG_SIGNAL_BUFFER_COUNT = 12,
-
-   PIPE_REG_OPEN_BUFFER_HIGH = 20,
-   PIPE_REG_OPEN_BUFFER = 24,
-
-   PIPE_REG_VERSION = 36,
-
-   PIPE_REG_GET_SIGNALLED = 48,
-};
-
-enum PipeCmdCode {
-   PIPE_CMD_OPEN = 1,  /* to be used by the pipe device itself */
-   PIPE_CMD_CLOSE,
-   PIPE_CMD_POLL,
-   PIPE_CMD_WRITE,
-   PIPE_CMD_WAKE_ON_WRITE,
-   PIPE_CMD_READ,
-   PIPE_CMD_WAKE_ON_READ,
-
-   /*
-* TODO(zyy): implement a deferred read/write execution to allow
-* parallel processing of pipe operations on the host.
-*/
-   PIPE_CMD_WAKE_ON_DONE_IO,
-};
-
 enum {
MAX_BUFFERS_PER_COMMAND = 336,
MAX_SIGNALLED_PIPES = 64,
diff --git a/drivers/platform/goldfish/goldfish_pipe_qemu.h 
b/drivers/platform/goldfish/goldfish_pipe_qemu.h
new file mode 100644
index ..51f3d9f82af6
--- /dev/null
+++ b/drivers/platform/goldfish/goldfish_pipe_qemu.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/*
+ * IMPORTANT: The following constants must match the ones used and defined in
+ * external/qemu/include/hw/misc/goldfish_pipe.h
+ */
+
+#ifndef ANDROID_PIPE_COMMON_H
+#define ANDROID_PIPE_COMMON_H
+
+/* List of bitflags returned in status of CMD_POLL command */
+enum PipePollFlags {
+   PIPE_POLL_IN= 1 << 0,
+   PIPE_POLL_OUT   = 1 << 1,
+   PIPE_POLL_HUP   = 1 << 2
+};
+
+/* Possible status values used to signal errors */
+enum PipeErrors {
+   PIPE_ERROR_INVAL= -1,
+   PIPE_ERROR_AGAIN= -2,
+   PIPE_ERROR_NOMEM= -3,
+   PIPE_ERROR_IO   = -4
+};
+
+/* Bit-flags used to signal events from the emulator */
+enum PipeWakeFlags {
+   /* emulator closed pipe */
+   PIPE_WAKE_CLOSED= 1 << 0,
+
+   /* pipe can now be read from */
+   PIPE_WAKE_READ  = 1 << 1,
+
+   /* pipe can now be written to */
+   PIPE_WAKE_WRITE = 1 << 2,
+
+   /* unlock this pipe's DMA buffer */
+   PIPE_WAKE_UNLOCK_DMA= 1 << 3,
+
+   /* unlock DMA buffer of the pipe shared to this pipe */
+   PIPE_WAKE_UNLOCK_DMA_SHARED = 1 << 4,
+};
+
+/* Possible pipe closing reasons */
+enum PipeCloseReason {
+   /* guest sent a 

[PATCH 5/9] platform: goldfish: pipe: Update the comment for GFP_ATOMIC

2018-08-13 Thread rkir
From: Roman Kiryanov 

Provide an explanation why GFP_ATOMIC is needed to prevent changing it to
other values.

Signed-off-by: Roman Kiryanov 
---
 drivers/platform/goldfish/goldfish_pipe.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/goldfish/goldfish_pipe.c 
b/drivers/platform/goldfish/goldfish_pipe.c
index 8f9580454154..a4db4d12b09c 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -639,7 +639,10 @@ static int get_free_pipe_id_locked(struct 
goldfish_pipe_dev *dev)
return id;
 
{
-   /* Reallocate the array */
+   /* Reallocate the array.
+* Since get_free_pipe_id_locked runs with interrupts disabled,
+* we don't want to make calls that could lead to sleep.
+*/
u32 new_capacity = 2 * dev->pipes_capacity;
struct goldfish_pipe **pipes =
kcalloc(new_capacity, sizeof(*pipes), GFP_ATOMIC);
-- 
2.18.0.865.gffc8e1a3cd6-goog



  1   2   3   4   5   6   7   8   >