RE: [RESEND PATCH] tee: add kernel internal client interface
Hi jens: Actually, we have already used the kernel client api in our product(poplar board). Thank you for the upstream. Tested-by: Zeng Tao Regards Zengtao >-Original Message- >From: Jens Wiklander [mailto:jens.wiklan...@linaro.org] >Sent: Monday, July 09, 2018 2:16 PM >To: linux-kernel@vger.kernel.org; linux-arm-ker...@lists.infradead.org; >tee-...@lists.linaro.org >Cc: Zengtao (B) ; Victor Chong >; Jerome Forissier ; >Jens Wiklander >Subject: [RESEND PATCH] tee: add kernel internal client interface > >Adds a kernel internal TEE client interface to be used by other drivers. > >Signed-off-by: Jens Wiklander >--- > drivers/tee/tee_core.c | 113 +--- > include/linux/tee_drv.h | 73 ++ > 2 files changed, 179 insertions(+), 7 deletions(-) > >diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index >dd46b758852a..7b2bb4c50058 100644 >--- a/drivers/tee/tee_core.c >+++ b/drivers/tee/tee_core.c >@@ -38,15 +38,13 @@ static DEFINE_SPINLOCK(driver_lock); static struct >class *tee_class; static dev_t tee_devt; > >-static int tee_open(struct inode *inode, struct file *filp) >+static struct tee_context *teedev_open(struct tee_device *teedev) > { > int rc; >- struct tee_device *teedev; > struct tee_context *ctx; > >- teedev = container_of(inode->i_cdev, struct tee_device, cdev); > if (!tee_device_get(teedev)) >- return -EINVAL; >+ return ERR_PTR(-EINVAL); > > ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); > if (!ctx) { >@@ -57,16 +55,16 @@ static int tee_open(struct inode *inode, struct file *filp) > kref_init(>refcount); > ctx->teedev = teedev; > INIT_LIST_HEAD(>list_shm); >- filp->private_data = ctx; > rc = teedev->desc->ops->open(ctx); > if (rc) > goto err; > >- return 0; >+ return ctx; > err: > kfree(ctx); > tee_device_put(teedev); >- return rc; >+ return ERR_PTR(rc); >+ > } > > void teedev_ctx_get(struct tee_context *ctx) @@ -100,6 +98,18 @@ static >void teedev_close_context(struct tee_context *ctx) > teedev_ctx_put(ctx); > } > >+static int tee_open(struct inode *inode, struct file *filp) { >+ struct tee_context *ctx; >+ >+ ctx = teedev_open(container_of(inode->i_cdev, struct tee_device, cdev)); >+ if (IS_ERR(ctx)) >+ return PTR_ERR(ctx); >+ >+ filp->private_data = ctx; >+ return 0; >+} >+ > static int tee_release(struct inode *inode, struct file *filp) { > teedev_close_context(filp->private_data); >@@ -928,6 +938,95 @@ void *tee_get_drvdata(struct tee_device *teedev) } >EXPORT_SYMBOL_GPL(tee_get_drvdata); > >+struct match_dev_data { >+ struct tee_ioctl_version_data *vers; >+ const void *data; >+ int (*match)(struct tee_ioctl_version_data *, const void *); }; >+ >+static int match_dev(struct device *dev, const void *data) { >+ const struct match_dev_data *match_data = data; >+ struct tee_device *teedev = container_of(dev, struct tee_device, dev); >+ >+ teedev->desc->ops->get_version(teedev, match_data->vers); >+ return match_data->match(match_data->vers, match_data->data); } >+ >+struct tee_context * >+tee_client_open_context(struct tee_context *start, >+ int (*match)(struct tee_ioctl_version_data *, >+ const void *), >+ const void *data, struct tee_ioctl_version_data *vers) { >+ struct device *dev = NULL; >+ struct device *put_dev = NULL; >+ struct tee_context *ctx = NULL; >+ struct tee_ioctl_version_data v; >+ struct match_dev_data match_data = { vers ? vers : , data, match }; >+ >+ if (start) >+ dev = >teedev->dev; >+ >+ do { >+ dev = class_find_device(tee_class, dev, _data, match_dev); >+ if (!dev) { >+ ctx = ERR_PTR(-ENOENT); >+ break; >+ } >+ >+ put_device(put_dev); >+ put_dev = dev; >+ >+ ctx = teedev_open(container_of(dev, struct tee_device, dev)); >+ } while (IS_ERR(ctx) && PTR_ERR(ctx) != -ENOMEM); >+ >+ put_device(put_dev); >+ return ctx; >+} >+EXPORT_SYMBOL_GPL(tee_client_open_context); >+ >+void tee_client_close_context(struct tee_context *ctx) { >+ teedev_close_context(ctx); >+} >+EXPORT_SYMBOL_GPL(tee_client_close_context); >+ >+void tee_client_get_version(struct tee_context *ctx, >+ struct tee_ioctl_version_data *vers) { >+ ctx->teedev->desc->ops->get_version(ctx->teedev, vers); } >+EXPORT_SYMBOL_GPL(tee_client_get_version); >+ >+int tee_client_open_session(struct tee_context *ctx, >+ struct tee_ioctl_open_session_arg *arg, >+ struct tee_param *param) >+{ >+ if (!ctx->teedev->desc->ops->open_session) >+ return -EINVAL; >+ return
RE: [RESEND PATCH] tee: add kernel internal client interface
Hi jens: Actually, we have already used the kernel client api in our product(poplar board). Thank you for the upstream. Tested-by: Zeng Tao Regards Zengtao >-Original Message- >From: Jens Wiklander [mailto:jens.wiklan...@linaro.org] >Sent: Monday, July 09, 2018 2:16 PM >To: linux-kernel@vger.kernel.org; linux-arm-ker...@lists.infradead.org; >tee-...@lists.linaro.org >Cc: Zengtao (B) ; Victor Chong >; Jerome Forissier ; >Jens Wiklander >Subject: [RESEND PATCH] tee: add kernel internal client interface > >Adds a kernel internal TEE client interface to be used by other drivers. > >Signed-off-by: Jens Wiklander >--- > drivers/tee/tee_core.c | 113 +--- > include/linux/tee_drv.h | 73 ++ > 2 files changed, 179 insertions(+), 7 deletions(-) > >diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index >dd46b758852a..7b2bb4c50058 100644 >--- a/drivers/tee/tee_core.c >+++ b/drivers/tee/tee_core.c >@@ -38,15 +38,13 @@ static DEFINE_SPINLOCK(driver_lock); static struct >class *tee_class; static dev_t tee_devt; > >-static int tee_open(struct inode *inode, struct file *filp) >+static struct tee_context *teedev_open(struct tee_device *teedev) > { > int rc; >- struct tee_device *teedev; > struct tee_context *ctx; > >- teedev = container_of(inode->i_cdev, struct tee_device, cdev); > if (!tee_device_get(teedev)) >- return -EINVAL; >+ return ERR_PTR(-EINVAL); > > ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); > if (!ctx) { >@@ -57,16 +55,16 @@ static int tee_open(struct inode *inode, struct file *filp) > kref_init(>refcount); > ctx->teedev = teedev; > INIT_LIST_HEAD(>list_shm); >- filp->private_data = ctx; > rc = teedev->desc->ops->open(ctx); > if (rc) > goto err; > >- return 0; >+ return ctx; > err: > kfree(ctx); > tee_device_put(teedev); >- return rc; >+ return ERR_PTR(rc); >+ > } > > void teedev_ctx_get(struct tee_context *ctx) @@ -100,6 +98,18 @@ static >void teedev_close_context(struct tee_context *ctx) > teedev_ctx_put(ctx); > } > >+static int tee_open(struct inode *inode, struct file *filp) { >+ struct tee_context *ctx; >+ >+ ctx = teedev_open(container_of(inode->i_cdev, struct tee_device, cdev)); >+ if (IS_ERR(ctx)) >+ return PTR_ERR(ctx); >+ >+ filp->private_data = ctx; >+ return 0; >+} >+ > static int tee_release(struct inode *inode, struct file *filp) { > teedev_close_context(filp->private_data); >@@ -928,6 +938,95 @@ void *tee_get_drvdata(struct tee_device *teedev) } >EXPORT_SYMBOL_GPL(tee_get_drvdata); > >+struct match_dev_data { >+ struct tee_ioctl_version_data *vers; >+ const void *data; >+ int (*match)(struct tee_ioctl_version_data *, const void *); }; >+ >+static int match_dev(struct device *dev, const void *data) { >+ const struct match_dev_data *match_data = data; >+ struct tee_device *teedev = container_of(dev, struct tee_device, dev); >+ >+ teedev->desc->ops->get_version(teedev, match_data->vers); >+ return match_data->match(match_data->vers, match_data->data); } >+ >+struct tee_context * >+tee_client_open_context(struct tee_context *start, >+ int (*match)(struct tee_ioctl_version_data *, >+ const void *), >+ const void *data, struct tee_ioctl_version_data *vers) { >+ struct device *dev = NULL; >+ struct device *put_dev = NULL; >+ struct tee_context *ctx = NULL; >+ struct tee_ioctl_version_data v; >+ struct match_dev_data match_data = { vers ? vers : , data, match }; >+ >+ if (start) >+ dev = >teedev->dev; >+ >+ do { >+ dev = class_find_device(tee_class, dev, _data, match_dev); >+ if (!dev) { >+ ctx = ERR_PTR(-ENOENT); >+ break; >+ } >+ >+ put_device(put_dev); >+ put_dev = dev; >+ >+ ctx = teedev_open(container_of(dev, struct tee_device, dev)); >+ } while (IS_ERR(ctx) && PTR_ERR(ctx) != -ENOMEM); >+ >+ put_device(put_dev); >+ return ctx; >+} >+EXPORT_SYMBOL_GPL(tee_client_open_context); >+ >+void tee_client_close_context(struct tee_context *ctx) { >+ teedev_close_context(ctx); >+} >+EXPORT_SYMBOL_GPL(tee_client_close_context); >+ >+void tee_client_get_version(struct tee_context *ctx, >+ struct tee_ioctl_version_data *vers) { >+ ctx->teedev->desc->ops->get_version(ctx->teedev, vers); } >+EXPORT_SYMBOL_GPL(tee_client_get_version); >+ >+int tee_client_open_session(struct tee_context *ctx, >+ struct tee_ioctl_open_session_arg *arg, >+ struct tee_param *param) >+{ >+ if (!ctx->teedev->desc->ops->open_session) >+ return -EINVAL; >+ return
[PATCH RFC] Documentation/scheduler/sched-stats.txt: correct jiffies to ns
The unit of run_daly and rq_cpu_time is ns instead of jiffies. Signed-off-by: Weiping Zhang --- Documentation/scheduler/sched-stats.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/scheduler/sched-stats.txt b/Documentation/scheduler/sched-stats.txt index 8259b34a66ae..e7d1f614a1f1 100644 --- a/Documentation/scheduler/sched-stats.txt +++ b/Documentation/scheduler/sched-stats.txt @@ -48,9 +48,8 @@ Next two are try_to_wake_up() statistics: 6) # of times try_to_wake_up() was called to wake up the local cpu Next three are statistics describing scheduling latency: - 7) sum of all time spent running by tasks on this processor (in jiffies) - 8) sum of all time spent waiting to run by tasks on this processor (in -jiffies) + 7) sum of all time spent running by tasks on this processor (in ns) + 8) sum of all time spent waiting to run by tasks on this processor (in ns) 9) # of timeslices run on this cpu -- 2.14.1
[PATCH RFC] Documentation/scheduler/sched-stats.txt: correct jiffies to ns
The unit of run_daly and rq_cpu_time is ns instead of jiffies. Signed-off-by: Weiping Zhang --- Documentation/scheduler/sched-stats.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/scheduler/sched-stats.txt b/Documentation/scheduler/sched-stats.txt index 8259b34a66ae..e7d1f614a1f1 100644 --- a/Documentation/scheduler/sched-stats.txt +++ b/Documentation/scheduler/sched-stats.txt @@ -48,9 +48,8 @@ Next two are try_to_wake_up() statistics: 6) # of times try_to_wake_up() was called to wake up the local cpu Next three are statistics describing scheduling latency: - 7) sum of all time spent running by tasks on this processor (in jiffies) - 8) sum of all time spent waiting to run by tasks on this processor (in -jiffies) + 7) sum of all time spent running by tasks on this processor (in ns) + 8) sum of all time spent waiting to run by tasks on this processor (in ns) 9) # of timeslices run on this cpu -- 2.14.1
Re: [PATCH] remoteproc/davinci: Mark error recovery as disabled
Hi Bjorn, On Tuesday 14 August 2018 12:22 AM, Bjorn Andersson wrote: > On Mon 13 Aug 08:11 PDT 2018, Suman Anna wrote: > >> Hi Bjorn, >> >> On 07/23/2018 06:27 PM, Suman Anna wrote: >>> The Davinci remoteproc driver does not support error recovery at >>> present, so mark the corresponding remoteproc flag appropriately >>> so that the debugfs flag shows the value as 'disabled' by default. >>> >>> Signed-off-by: Suman Anna >> >> Can you pick up this minor patch for 4.19 please, I do not see this in >> your rproc-next branch? >> > > Of course, the patch is applied now, will send my pull request in a few > days. Can you also please pick patch 1/4 here: https://patchwork.kernel.org/patch/10479365/ Thanks, Sekhar
Re: [PATCH] remoteproc/davinci: Mark error recovery as disabled
Hi Bjorn, On Tuesday 14 August 2018 12:22 AM, Bjorn Andersson wrote: > On Mon 13 Aug 08:11 PDT 2018, Suman Anna wrote: > >> Hi Bjorn, >> >> On 07/23/2018 06:27 PM, Suman Anna wrote: >>> The Davinci remoteproc driver does not support error recovery at >>> present, so mark the corresponding remoteproc flag appropriately >>> so that the debugfs flag shows the value as 'disabled' by default. >>> >>> Signed-off-by: Suman Anna >> >> Can you pick up this minor patch for 4.19 please, I do not see this in >> your rproc-next branch? >> > > Of course, the patch is applied now, will send my pull request in a few > days. Can you also please pick patch 1/4 here: https://patchwork.kernel.org/patch/10479365/ Thanks, Sekhar
Re: [PATCH v1 10/10] MAINTAINERS: Add entry for Qualcomm TSENS thermal drivers
On 8/9/2018 6:02 PM, Amit Kucheria wrote: Create an entry for the TSENS drivers and mark them as maintained Signed-off-by: Amit Kucheria Thanks Amit for signing up to maintain this driver. Acked-by: Rajendra Nayak --- MAINTAINERS | 7 +++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 68b4ff8ed205..ca6183d6d545 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11862,6 +11862,13 @@ L: linux-arm-...@vger.kernel.org S:Maintained F:drivers/iommu/qcom_iommu.c +QUALCOMM TSENS THERMAL DRIVER +M: Amit Kucheria +L: linux...@vger.kernel.org +L: linux-arm-...@vger.kernel.org +S: Maintained +F: drivers/thermal/qcom/ + QUALCOMM VENUS VIDEO ACCELERATOR DRIVER M:Stanimir Varbanov L:linux-me...@vger.kernel.org
Re: [PATCH v1 10/10] MAINTAINERS: Add entry for Qualcomm TSENS thermal drivers
On 8/9/2018 6:02 PM, Amit Kucheria wrote: Create an entry for the TSENS drivers and mark them as maintained Signed-off-by: Amit Kucheria Thanks Amit for signing up to maintain this driver. Acked-by: Rajendra Nayak --- MAINTAINERS | 7 +++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 68b4ff8ed205..ca6183d6d545 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11862,6 +11862,13 @@ L: linux-arm-...@vger.kernel.org S:Maintained F:drivers/iommu/qcom_iommu.c +QUALCOMM TSENS THERMAL DRIVER +M: Amit Kucheria +L: linux...@vger.kernel.org +L: linux-arm-...@vger.kernel.org +S: Maintained +F: drivers/thermal/qcom/ + QUALCOMM VENUS VIDEO ACCELERATOR DRIVER M:Stanimir Varbanov L:linux-me...@vger.kernel.org
Re: [PATCH v3 07/17] x86/pci: add Hygon PCI vendor and northbridge support
On 2018/8/14 6:14, Bjorn Helgaas wrote: On Sat, Aug 11, 2018 at 09:27:42PM +0800, Pu Wen wrote: diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2950223..d0e98a9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -511,6 +511,8 @@ #define PCI_DEVICE_ID_AMI_MEGARAID0x9010 #define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060 +#define PCI_VENDOR_ID_HYGON 0x1d94 Please add this entry so pci_ids.h remains sorted by Vendor ID, then Device ID, as the comment at the top suggests. Thanks for the suggestion. Will place PCI_VENDOR_ID_HYGON between PCI_VENDOR_ID_AMAZON(0x1d0f) and PCI_VENDOR_ID_TEKRAM(0x1de1) in next patch set. Pu Wen With that changed, Acked-by: Bjorn Helgaas # pci_ids.h #define PCI_VENDOR_ID_AMD 0x1022 #define PCI_DEVICE_ID_AMD_K8_NB 0x1100 #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 -- 2.7.4
Re: [PATCH v3 07/17] x86/pci: add Hygon PCI vendor and northbridge support
On 2018/8/14 6:14, Bjorn Helgaas wrote: On Sat, Aug 11, 2018 at 09:27:42PM +0800, Pu Wen wrote: diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2950223..d0e98a9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -511,6 +511,8 @@ #define PCI_DEVICE_ID_AMI_MEGARAID0x9010 #define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060 +#define PCI_VENDOR_ID_HYGON 0x1d94 Please add this entry so pci_ids.h remains sorted by Vendor ID, then Device ID, as the comment at the top suggests. Thanks for the suggestion. Will place PCI_VENDOR_ID_HYGON between PCI_VENDOR_ID_AMAZON(0x1d0f) and PCI_VENDOR_ID_TEKRAM(0x1de1) in next patch set. Pu Wen With that changed, Acked-by: Bjorn Helgaas # pci_ids.h #define PCI_VENDOR_ID_AMD 0x1022 #define PCI_DEVICE_ID_AMD_K8_NB 0x1100 #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 -- 2.7.4
Re: [PATCH 6/9] platform: goldfish: pipe: Fail compilation if structs are too large
On Mon, 2018-08-13 at 20:47 -0700, Roman Kiryanov wrote: > Hi, > > thank you for reviewing my patches. I decided to put BUILD_BUG_ON > close to places where it is important that these structs fit into a > memory page to give some context. And you make the reader figure out what type dev->buffers is unnecessarily when sizeof(struct goldfish_pipe_dev_buffers) is pretty obvious. > Regards, > Roman. cheers, Joe
Re: [PATCH 6/9] platform: goldfish: pipe: Fail compilation if structs are too large
On Mon, 2018-08-13 at 20:47 -0700, Roman Kiryanov wrote: > Hi, > > thank you for reviewing my patches. I decided to put BUILD_BUG_ON > close to places where it is important that these structs fit into a > memory page to give some context. And you make the reader figure out what type dev->buffers is unnecessarily when sizeof(struct goldfish_pipe_dev_buffers) is pretty obvious. > Regards, > Roman. cheers, Joe
Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)
Hi Song, On 08/13/2018 10:42 PM, Song Liu wrote: > On Mon, Aug 13, 2018 at 6:17 AM, Oleg Nesterov wrote: >> On 08/13, Ravi Bangoria wrote: >>> But damn, process creation (exec) is trivial. We could add a new uprobe_exec() hook and avoid delayed_uprobe_install() in uprobe_mmap(). >>> >>> I'm sorry. I didn't get this. >> >> Sorry for confusion... >> >> I meant, if only exec*( could race with _register(), we could add another >> uprobe >> hook which updates all (delayed) counters before return to user-mode. >> Afaics, the really problematic case is dlopen() which can race with _register() too, right? >>> >>> dlopen() should internally use mmap() right? So what is the problem here? >>> Can >>> you please elaborate. >> >> What I tried to say is that we can't avoid >> uprobe_mmap()->delayed_uprobe_install() >> because dlopen() can race with _register() too, just like exec. >> >> Oleg. >> > > How about we do delayed_uprobe_install() per file? Say we keep a list > of delayed_uprobe > in load_elf_binary(). Then we can install delayed_uprobe after loading > all sections of the > file. I'm not sure if I totally understood the idea. But how this approach can solve dlopen() race with _register()? Rather, making delayed_uprobe_list an mm field seems simple and effective idea to me. The only overhead will be list_empty(mm->delayed_list) check. Please let me know if I misunderstood you. Thanks, Ravi
Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)
Hi Song, On 08/13/2018 10:42 PM, Song Liu wrote: > On Mon, Aug 13, 2018 at 6:17 AM, Oleg Nesterov wrote: >> On 08/13, Ravi Bangoria wrote: >>> But damn, process creation (exec) is trivial. We could add a new uprobe_exec() hook and avoid delayed_uprobe_install() in uprobe_mmap(). >>> >>> I'm sorry. I didn't get this. >> >> Sorry for confusion... >> >> I meant, if only exec*( could race with _register(), we could add another >> uprobe >> hook which updates all (delayed) counters before return to user-mode. >> Afaics, the really problematic case is dlopen() which can race with _register() too, right? >>> >>> dlopen() should internally use mmap() right? So what is the problem here? >>> Can >>> you please elaborate. >> >> What I tried to say is that we can't avoid >> uprobe_mmap()->delayed_uprobe_install() >> because dlopen() can race with _register() too, just like exec. >> >> Oleg. >> > > How about we do delayed_uprobe_install() per file? Say we keep a list > of delayed_uprobe > in load_elf_binary(). Then we can install delayed_uprobe after loading > all sections of the > file. I'm not sure if I totally understood the idea. But how this approach can solve dlopen() race with _register()? Rather, making delayed_uprobe_list an mm field seems simple and effective idea to me. The only overhead will be list_empty(mm->delayed_list) check. Please let me know if I misunderstood you. Thanks, Ravi
Re: [PATCH 2/2] rtc:rtc-ds1347: Use PTR_ERR_OR_ZERO to replace the open code
Hi zhong, On Mon, Aug 13, 2018 at 07:31:25PM +0800, zhong jiang wrote: > PTR_ERR_OR_ZERO has implemented the if(IS_ERR(...)) + PTR_ERR, So > just replace them rather than duplicating its implement. > > Signed-off-by: zhong jiang Acked-by: Baruch Siach Thanks, baruch > --- > drivers/rtc/rtc-ds1347.c | 5 + > 1 file changed, 1 insertion(+), 4 deletions(-) > > diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c > index 938512c..b97b67f 100644 > --- a/drivers/rtc/rtc-ds1347.c > +++ b/drivers/rtc/rtc-ds1347.c > @@ -155,10 +155,7 @@ static int ds1347_probe(struct spi_device *spi) > rtc = devm_rtc_device_register(>dev, "ds1347", > _rtc_ops, THIS_MODULE); > > - if (IS_ERR(rtc)) > - return PTR_ERR(rtc); > - > - return 0; > + return PTR_ERR_OR_ZERO(rtc); > } > > static struct spi_driver ds1347_driver = { -- http://baruch.siach.name/blog/ ~. .~ Tk Open Systems =}ooO--U--Ooo{= - bar...@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -
Re: [PATCH 2/2] f2fs: tune discard speed with storage usage rate
On 08/10, Chao Yu wrote: > Previously, discard speed was fixed mostly, and in high usage rate > device, we will speed up issuing discard, but it doesn't make sense > that in a non-full filesystem, we still issue discard with slow speed. Could you please elaborate the problem in more detail? The speed depends on how many candidates? Thanks, > > Anyway, it comes out undiscarded block makes FTL GC be lower efficient > and causing high lifetime overhead. > > Let's tune discard speed as below: > > a. adjust default issue interval: > originalafter > min_interval: 50ms100ms > mid_interval: 500ms 1000ms > max_interval: 6ms 1ms > > b. if last time we stop issuing discard due to IO interruption of user, > let's reset all {min,mid,max}_interval to default one. > > c. tune {min,mid,max}_interval with below calculation method: > > base_interval = default_interval / 10; > total_interval = default_interval - base_interval; > interval = base_interval + total_interval * (100 - dev_util) / 100; > > For example: > min_interval (:100ms) > dev_util (%) interval (ms) > 0 100 > 1091 > 2082 > 3073 > ... > 8028 > 9019 > 100 10 > > Signed-off-by: Chao Yu > --- > fs/f2fs/f2fs.h| 11 > fs/f2fs/segment.c | 64 +-- > fs/f2fs/segment.h | 9 +++ > fs/f2fs/super.c | 2 +- > 4 files changed, 67 insertions(+), 19 deletions(-) > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 273ffdaf4891..a1dd2e1c3cb9 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -185,10 +185,9 @@ enum { > > #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) > #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per > round */ > -#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ > -#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */ > -#define DEF_MAX_DISCARD_ISSUE_TIME 6 /* 60 s, if no candidates */ > -#define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over > 80% */ > +#define DEF_MIN_DISCARD_ISSUE_TIME 100 /* 100 ms, if exists */ > +#define DEF_MID_DISCARD_ISSUE_TIME 1000/* 1000 ms, if device busy */ > +#define DEF_MAX_DISCARD_ISSUE_TIME 1 /* 1 ms, if no candidates */ > #define DEF_CP_INTERVAL 60 /* 60 secs */ > #define DEF_IDLE_INTERVAL5 /* 5 secs */ > > @@ -248,7 +247,8 @@ struct discard_entry { > }; > > /* default discard granularity of inner discard thread, unit: block count */ > -#define DEFAULT_DISCARD_GRANULARITY 1 > +#define MID_DISCARD_GRANULARITY 16 > +#define MIN_DISCARD_GRANULARITY 1 > > /* max discard pend list number */ > #define MAX_PLIST_NUM512 > @@ -330,6 +330,7 @@ struct discard_cmd_control { > atomic_t discard_cmd_cnt; /* # of cached cmd count */ > struct rb_root root;/* root of discard rb-tree */ > bool rbtree_check; /* config for consistence check > */ > + bool io_interrupted;/* last state of io interrupted > */ > }; > > /* for the list of fsync inodes, used only during recovery */ > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 8b52e8dfb12f..9564aaf1f27b 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, > #endif > } > > +static void __adjust_discard_speed(unsigned int *interval, > + unsigned int def_interval, int dev_util) > +{ > + unsigned int base_interval, total_interval; > + > + base_interval = def_interval / 10; > + total_interval = def_interval - base_interval; > + > + /* > + * if def_interval = 100, adjusted interval should be in range of > + * [10, 100]. > + */ > + *interval = base_interval + total_interval * (100 - dev_util) / 100; > +} > + > +static void __tune_discard_policy(struct f2fs_sb_info *sbi, > + struct discard_policy *dpolicy) > +{ > + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; > + int dev_util; > + > + if (dcc->io_interrupted) { > + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; > + dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; > + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; > + return; > + } > + > + dev_util = dev_utilization(sbi); > + > + __adjust_discard_speed(>min_interval, > + DEF_MIN_DISCARD_ISSUE_TIME, dev_util); > + __adjust_discard_speed(>mid_interval, > + DEF_MID_DISCARD_ISSUE_TIME, dev_util); > + __adjust_discard_speed(>max_interval, > +
Re: [PATCH 2/2] rtc:rtc-ds1347: Use PTR_ERR_OR_ZERO to replace the open code
Hi zhong, On Mon, Aug 13, 2018 at 07:31:25PM +0800, zhong jiang wrote: > PTR_ERR_OR_ZERO has implemented the if(IS_ERR(...)) + PTR_ERR, So > just replace them rather than duplicating its implement. > > Signed-off-by: zhong jiang Acked-by: Baruch Siach Thanks, baruch > --- > drivers/rtc/rtc-ds1347.c | 5 + > 1 file changed, 1 insertion(+), 4 deletions(-) > > diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c > index 938512c..b97b67f 100644 > --- a/drivers/rtc/rtc-ds1347.c > +++ b/drivers/rtc/rtc-ds1347.c > @@ -155,10 +155,7 @@ static int ds1347_probe(struct spi_device *spi) > rtc = devm_rtc_device_register(>dev, "ds1347", > _rtc_ops, THIS_MODULE); > > - if (IS_ERR(rtc)) > - return PTR_ERR(rtc); > - > - return 0; > + return PTR_ERR_OR_ZERO(rtc); > } > > static struct spi_driver ds1347_driver = { -- http://baruch.siach.name/blog/ ~. .~ Tk Open Systems =}ooO--U--Ooo{= - bar...@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -
Re: [PATCH 2/2] f2fs: tune discard speed with storage usage rate
On 08/10, Chao Yu wrote: > Previously, discard speed was fixed mostly, and in high usage rate > device, we will speed up issuing discard, but it doesn't make sense > that in a non-full filesystem, we still issue discard with slow speed. Could you please elaborate the problem in more detail? The speed depends on how many candidates? Thanks, > > Anyway, it comes out undiscarded block makes FTL GC be lower efficient > and causing high lifetime overhead. > > Let's tune discard speed as below: > > a. adjust default issue interval: > originalafter > min_interval: 50ms100ms > mid_interval: 500ms 1000ms > max_interval: 6ms 1ms > > b. if last time we stop issuing discard due to IO interruption of user, > let's reset all {min,mid,max}_interval to default one. > > c. tune {min,mid,max}_interval with below calculation method: > > base_interval = default_interval / 10; > total_interval = default_interval - base_interval; > interval = base_interval + total_interval * (100 - dev_util) / 100; > > For example: > min_interval (:100ms) > dev_util (%) interval (ms) > 0 100 > 1091 > 2082 > 3073 > ... > 8028 > 9019 > 100 10 > > Signed-off-by: Chao Yu > --- > fs/f2fs/f2fs.h| 11 > fs/f2fs/segment.c | 64 +-- > fs/f2fs/segment.h | 9 +++ > fs/f2fs/super.c | 2 +- > 4 files changed, 67 insertions(+), 19 deletions(-) > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 273ffdaf4891..a1dd2e1c3cb9 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -185,10 +185,9 @@ enum { > > #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) > #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per > round */ > -#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ > -#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */ > -#define DEF_MAX_DISCARD_ISSUE_TIME 6 /* 60 s, if no candidates */ > -#define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over > 80% */ > +#define DEF_MIN_DISCARD_ISSUE_TIME 100 /* 100 ms, if exists */ > +#define DEF_MID_DISCARD_ISSUE_TIME 1000/* 1000 ms, if device busy */ > +#define DEF_MAX_DISCARD_ISSUE_TIME 1 /* 1 ms, if no candidates */ > #define DEF_CP_INTERVAL 60 /* 60 secs */ > #define DEF_IDLE_INTERVAL5 /* 5 secs */ > > @@ -248,7 +247,8 @@ struct discard_entry { > }; > > /* default discard granularity of inner discard thread, unit: block count */ > -#define DEFAULT_DISCARD_GRANULARITY 1 > +#define MID_DISCARD_GRANULARITY 16 > +#define MIN_DISCARD_GRANULARITY 1 > > /* max discard pend list number */ > #define MAX_PLIST_NUM512 > @@ -330,6 +330,7 @@ struct discard_cmd_control { > atomic_t discard_cmd_cnt; /* # of cached cmd count */ > struct rb_root root;/* root of discard rb-tree */ > bool rbtree_check; /* config for consistence check > */ > + bool io_interrupted;/* last state of io interrupted > */ > }; > > /* for the list of fsync inodes, used only during recovery */ > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 8b52e8dfb12f..9564aaf1f27b 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -968,6 +968,44 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, > #endif > } > > +static void __adjust_discard_speed(unsigned int *interval, > + unsigned int def_interval, int dev_util) > +{ > + unsigned int base_interval, total_interval; > + > + base_interval = def_interval / 10; > + total_interval = def_interval - base_interval; > + > + /* > + * if def_interval = 100, adjusted interval should be in range of > + * [10, 100]. > + */ > + *interval = base_interval + total_interval * (100 - dev_util) / 100; > +} > + > +static void __tune_discard_policy(struct f2fs_sb_info *sbi, > + struct discard_policy *dpolicy) > +{ > + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; > + int dev_util; > + > + if (dcc->io_interrupted) { > + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; > + dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; > + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; > + return; > + } > + > + dev_util = dev_utilization(sbi); > + > + __adjust_discard_speed(>min_interval, > + DEF_MIN_DISCARD_ISSUE_TIME, dev_util); > + __adjust_discard_speed(>mid_interval, > + DEF_MID_DISCARD_ISSUE_TIME, dev_util); > + __adjust_discard_speed(>max_interval, > +
Re: [PATCH 1/2] rtc:rtc-digicolor: Use PTR_ERR_OR_ZERO to replace the open code
Hi zhong, On Mon, Aug 13, 2018 at 07:31:24PM +0800, zhong jiang wrote: > PTR_ERR_OR_ZERO has implemented the if(IS_ERR(...)) + PTR_ERR, So > just replace them rather than duplicating its implement. > > Signed-off-by: zhong jiang Acked-by: Baruch Siach Thanks, baruch > --- > drivers/rtc/rtc-digicolor.c | 4 +--- > 1 file changed, 1 insertion(+), 3 deletions(-) > > diff --git a/drivers/rtc/rtc-digicolor.c b/drivers/rtc/rtc-digicolor.c > index b253bf1..fd6850c 100644 > --- a/drivers/rtc/rtc-digicolor.c > +++ b/drivers/rtc/rtc-digicolor.c > @@ -202,10 +202,8 @@ static int __init dc_rtc_probe(struct platform_device > *pdev) > platform_set_drvdata(pdev, rtc); > rtc->rtc_dev = devm_rtc_device_register(>dev, pdev->name, > _rtc_ops, THIS_MODULE); > - if (IS_ERR(rtc->rtc_dev)) > - return PTR_ERR(rtc->rtc_dev); > > - return 0; > + return PTR_ERR_OR_ZERO(rtc->rtc_dev); > } > > static const struct of_device_id dc_dt_ids[] = { -- http://baruch.siach.name/blog/ ~. .~ Tk Open Systems =}ooO--U--Ooo{= - bar...@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -
Re: [PATCH 1/2] rtc:rtc-digicolor: Use PTR_ERR_OR_ZERO to replace the open code
Hi zhong, On Mon, Aug 13, 2018 at 07:31:24PM +0800, zhong jiang wrote: > PTR_ERR_OR_ZERO has implemented the if(IS_ERR(...)) + PTR_ERR, So > just replace them rather than duplicating its implement. > > Signed-off-by: zhong jiang Acked-by: Baruch Siach Thanks, baruch > --- > drivers/rtc/rtc-digicolor.c | 4 +--- > 1 file changed, 1 insertion(+), 3 deletions(-) > > diff --git a/drivers/rtc/rtc-digicolor.c b/drivers/rtc/rtc-digicolor.c > index b253bf1..fd6850c 100644 > --- a/drivers/rtc/rtc-digicolor.c > +++ b/drivers/rtc/rtc-digicolor.c > @@ -202,10 +202,8 @@ static int __init dc_rtc_probe(struct platform_device > *pdev) > platform_set_drvdata(pdev, rtc); > rtc->rtc_dev = devm_rtc_device_register(>dev, pdev->name, > _rtc_ops, THIS_MODULE); > - if (IS_ERR(rtc->rtc_dev)) > - return PTR_ERR(rtc->rtc_dev); > > - return 0; > + return PTR_ERR_OR_ZERO(rtc->rtc_dev); > } > > static const struct of_device_id dc_dt_ids[] = { -- http://baruch.siach.name/blog/ ~. .~ Tk Open Systems =}ooO--U--Ooo{= - bar...@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -
[PATCH 3/5] fs/locks: allow a lock request to block other requests.
Currently, a lock can block pending requests, but all pending requests are equal. If lots of pending requests are mutually exclusive, this means they will all be woken up and all but one will fail. This can hurt performance. So we will allow pending requests to block other requests. Only the first request will be woken, and it will wake the others. This patch doesn't implement this fully, but prepares the way. - It acknowledges that a request might be blocking other requests, and when the request is converted to a lock, those blocked requests are moved across. - When a request is requeued or discarded, all blocked requests are woken. - When deadlock-detection looks for the lock which blocks a given request, we follow the chain of ->fl_blocker all the way to the top. Signed-off-by: NeilBrown --- fs/locks.c | 43 +++ 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index de0b9276f23d..9439eebd4eb9 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -408,9 +408,19 @@ void locks_copy_lock(struct file_lock *new, struct file_lock *fl) fl->fl_ops->fl_copy_lock(new, fl); } } - EXPORT_SYMBOL(locks_copy_lock); +static void locks_move_blocks(struct file_lock *new, struct file_lock *fl) +{ + struct file_lock *f; + + spin_lock(_lock_lock); + list_splice_init(>fl_blocked, >fl_blocked); + list_for_each_entry(f, >fl_blocked, fl_block) + f->fl_blocker = new; + spin_unlock(_lock_lock); +} + static inline int flock_translate_cmd(int cmd) { if (cmd & LOCK_MAND) return cmd & (LOCK_MAND | LOCK_RW); @@ -683,11 +693,15 @@ static void __locks_delete_block(struct file_lock *waiter) static void __locks_wake_up_blocks(struct file_lock *blocker) { + /* Wake up all requests in blocker->fl_blocked, including +* requests blocked by those requests. +*/ while (!list_empty(>fl_blocked)) { struct file_lock *waiter; waiter = list_first_entry(>fl_blocked, struct file_lock, fl_block); + list_splice_init(>fl_blocked, >fl_blocked); __locks_delete_block(waiter); if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) waiter->fl_lmops->lm_notify(waiter); @@ -699,6 +713,7 @@ static void __locks_wake_up_blocks(struct file_lock *blocker) static void locks_delete_block(struct file_lock *waiter) { spin_lock(_lock_lock); + __locks_wake_up_blocks(waiter); __locks_delete_block(waiter); spin_unlock(_lock_lock); } @@ -714,13 +729,19 @@ static void locks_delete_block(struct file_lock *waiter) * blocked_lock_lock in some cases when we see that the fl_blocked list is empty. */ static void __locks_insert_block(struct file_lock *blocker, - struct file_lock *waiter) +struct file_lock *waiter) { BUG_ON(!list_empty(>fl_block)); waiter->fl_blocker = blocker; list_add_tail(>fl_block, >fl_blocked); if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) locks_insert_global_blocked(waiter); + + /* The requests in waiter->fl_blocked are known to conflict with +* waiter, but might not conflict with blocker, or the requests +* and lock which block it. So they all need to be woken. +*/ + __locks_wake_up_blocks(waiter); } /* Must be called with flc_lock held. */ @@ -893,8 +914,11 @@ static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) struct file_lock *fl; hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { - if (posix_same_owner(fl, block_fl)) - return fl->fl_blocker; + if (posix_same_owner(fl, block_fl)) { + while (fl->fl_blocker) + fl = fl->fl_blocker; + return fl; + } } return NULL; } @@ -987,6 +1011,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) if (request->fl_flags & FL_ACCESS) goto out; locks_copy_lock(new_fl, request); + locks_move_blocks(new_fl, request); locks_insert_lock_ctx(new_fl, >flc_flock); new_fl = NULL; error = 0; @@ -1179,6 +1204,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, goto out; } locks_copy_lock(new_fl, request); + locks_move_blocks(new_fl, request); locks_insert_lock_ctx(new_fl, >fl_list); fl = new_fl; new_fl = NULL; @@ -2587,13 +2613,14 @@ void locks_remove_file(struct file *filp) int posix_unblock_lock(struct file_lock
[PATCH 5/5] fs/locks: create a tree of dependent requests.
When we find an existing lock which conflicts with a request, and the request wants to wait, we currently add the request to a list. When the lock is removed, the whole list is woken. This can cause the thundering-herd problem. To reduce the problem, we make use of the (new) fact that a pending request can itself have a list of blocked requests. When we find a conflict, we look through the existing blocked requests. If any one of them blocks the new request, the new request is attached below that request, otherwise it is added to the list of blocked requests, which are now known to be mutually non-conflicting. This way, when the lock is released, only a set of non-conflicting locks will be woken, the rest can stay asleep. If the lock request cannot be granted and the request needs to be requeued, all the other requests it blocks will then be woken Reported-and-tested-by: Martin Wilck Signed-off-by: NeilBrown --- fs/locks.c | 29 +++-- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index c7a372cebff1..af250afceff4 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -727,11 +727,25 @@ static void locks_delete_block(struct file_lock *waiter) * fl_blocked list itself is protected by the blocked_lock_lock, but by ensuring * that the flc_lock is also held on insertions we can avoid taking the * blocked_lock_lock in some cases when we see that the fl_blocked list is empty. + * + * Rather than just adding to the list, we check for conflicts with any existing + * waiters, and add beneath any waiter that blocks the new waiter. + * Thus wakeups don't happen until needed. */ static void __locks_insert_block(struct file_lock *blocker, -struct file_lock *waiter) +struct file_lock *waiter, +bool conflict(struct file_lock *, + struct file_lock *)) { + struct file_lock *fl; BUG_ON(!list_empty(>fl_block)); + +new_blocker: + list_for_each_entry(fl, >fl_blocked, fl_block) + if (conflict(fl, waiter)) { + blocker = fl; + goto new_blocker; + } waiter->fl_blocker = blocker; list_add_tail(>fl_block, >fl_blocked); if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) @@ -746,10 +760,12 @@ static void __locks_insert_block(struct file_lock *blocker, /* Must be called with flc_lock held. */ static void locks_insert_block(struct file_lock *blocker, - struct file_lock *waiter) + struct file_lock *waiter, + bool conflict(struct file_lock *, +struct file_lock *)) { spin_lock(_lock_lock); - __locks_insert_block(blocker, waiter); + __locks_insert_block(blocker, waiter, conflict); spin_unlock(_lock_lock); } @@ -1008,7 +1024,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) if (!(request->fl_flags & FL_SLEEP)) goto out; error = FILE_LOCK_DEFERRED; - locks_insert_block(fl, request); + locks_insert_block(fl, request, flock_locks_conflict); goto out; } if (request->fl_flags & FL_ACCESS) @@ -1082,7 +1098,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, spin_lock(_lock_lock); if (likely(!posix_locks_deadlock(request, fl))) { error = FILE_LOCK_DEFERRED; - __locks_insert_block(fl, request); + __locks_insert_block(fl, request, +posix_locks_conflict); } spin_unlock(_lock_lock); goto out; @@ -1555,7 +1572,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) break_time -= jiffies; if (break_time == 0) break_time++; - locks_insert_block(fl, new_fl); + locks_insert_block(fl, new_fl, leases_conflict); trace_break_lease_block(inode, new_fl); spin_unlock(>flc_lock); percpu_up_read_preempt_enable(_rwsem);
[PATCH 4/5] fs/locks: change all *_conflict() functions to return bool.
posix_locks_conflict() and flock_locks_conflict() both return int. leases_conflict() returns bool. This inconsistency will cause problems for the next patch if not fixed. So change posix_locks_conflict() and flock_locks_conflict() to return bool. Also change the locks_conflict() helper. And convert some return (foo); to return foo; Signed-off-by: NeilBrown --- fs/locks.c | 27 +++ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 9439eebd4eb9..c7a372cebff1 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -803,47 +803,50 @@ locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose) /* Determine if lock sys_fl blocks lock caller_fl. Common functionality * checks for shared/exclusive status of overlapping locks. */ -static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +static bool locks_conflict(struct file_lock *caller_fl, + struct file_lock *sys_fl) { if (sys_fl->fl_type == F_WRLCK) - return 1; + return true; if (caller_fl->fl_type == F_WRLCK) - return 1; - return 0; + return true; + return false; } /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific * checking before calling the locks_conflict(). */ -static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +static bool posix_locks_conflict(struct file_lock *caller_fl, +struct file_lock *sys_fl) { /* POSIX locks owned by the same process do not conflict with * each other. */ if (posix_same_owner(caller_fl, sys_fl)) - return (0); + return false; /* Check whether they overlap */ if (!locks_overlap(caller_fl, sys_fl)) - return 0; + return false; - return (locks_conflict(caller_fl, sys_fl)); + return locks_conflict(caller_fl, sys_fl); } /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific * checking before calling the locks_conflict(). */ -static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +static bool flock_locks_conflict(struct file_lock *caller_fl, +struct file_lock *sys_fl) { /* FLOCK locks referring to the same filp do not conflict with * each other. */ if (caller_fl->fl_file == sys_fl->fl_file) - return (0); + return false; if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) - return 0; + return false; - return (locks_conflict(caller_fl, sys_fl)); + return locks_conflict(caller_fl, sys_fl); } void
[PATCH 3/5] fs/locks: allow a lock request to block other requests.
Currently, a lock can block pending requests, but all pending requests are equal. If lots of pending requests are mutually exclusive, this means they will all be woken up and all but one will fail. This can hurt performance. So we will allow pending requests to block other requests. Only the first request will be woken, and it will wake the others. This patch doesn't implement this fully, but prepares the way. - It acknowledges that a request might be blocking other requests, and when the request is converted to a lock, those blocked requests are moved across. - When a request is requeued or discarded, all blocked requests are woken. - When deadlock-detection looks for the lock which blocks a given request, we follow the chain of ->fl_blocker all the way to the top. Signed-off-by: NeilBrown --- fs/locks.c | 43 +++ 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index de0b9276f23d..9439eebd4eb9 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -408,9 +408,19 @@ void locks_copy_lock(struct file_lock *new, struct file_lock *fl) fl->fl_ops->fl_copy_lock(new, fl); } } - EXPORT_SYMBOL(locks_copy_lock); +static void locks_move_blocks(struct file_lock *new, struct file_lock *fl) +{ + struct file_lock *f; + + spin_lock(_lock_lock); + list_splice_init(>fl_blocked, >fl_blocked); + list_for_each_entry(f, >fl_blocked, fl_block) + f->fl_blocker = new; + spin_unlock(_lock_lock); +} + static inline int flock_translate_cmd(int cmd) { if (cmd & LOCK_MAND) return cmd & (LOCK_MAND | LOCK_RW); @@ -683,11 +693,15 @@ static void __locks_delete_block(struct file_lock *waiter) static void __locks_wake_up_blocks(struct file_lock *blocker) { + /* Wake up all requests in blocker->fl_blocked, including +* requests blocked by those requests. +*/ while (!list_empty(>fl_blocked)) { struct file_lock *waiter; waiter = list_first_entry(>fl_blocked, struct file_lock, fl_block); + list_splice_init(>fl_blocked, >fl_blocked); __locks_delete_block(waiter); if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) waiter->fl_lmops->lm_notify(waiter); @@ -699,6 +713,7 @@ static void __locks_wake_up_blocks(struct file_lock *blocker) static void locks_delete_block(struct file_lock *waiter) { spin_lock(_lock_lock); + __locks_wake_up_blocks(waiter); __locks_delete_block(waiter); spin_unlock(_lock_lock); } @@ -714,13 +729,19 @@ static void locks_delete_block(struct file_lock *waiter) * blocked_lock_lock in some cases when we see that the fl_blocked list is empty. */ static void __locks_insert_block(struct file_lock *blocker, - struct file_lock *waiter) +struct file_lock *waiter) { BUG_ON(!list_empty(>fl_block)); waiter->fl_blocker = blocker; list_add_tail(>fl_block, >fl_blocked); if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) locks_insert_global_blocked(waiter); + + /* The requests in waiter->fl_blocked are known to conflict with +* waiter, but might not conflict with blocker, or the requests +* and lock which block it. So they all need to be woken. +*/ + __locks_wake_up_blocks(waiter); } /* Must be called with flc_lock held. */ @@ -893,8 +914,11 @@ static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) struct file_lock *fl; hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { - if (posix_same_owner(fl, block_fl)) - return fl->fl_blocker; + if (posix_same_owner(fl, block_fl)) { + while (fl->fl_blocker) + fl = fl->fl_blocker; + return fl; + } } return NULL; } @@ -987,6 +1011,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) if (request->fl_flags & FL_ACCESS) goto out; locks_copy_lock(new_fl, request); + locks_move_blocks(new_fl, request); locks_insert_lock_ctx(new_fl, >flc_flock); new_fl = NULL; error = 0; @@ -1179,6 +1204,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, goto out; } locks_copy_lock(new_fl, request); + locks_move_blocks(new_fl, request); locks_insert_lock_ctx(new_fl, >fl_list); fl = new_fl; new_fl = NULL; @@ -2587,13 +2613,14 @@ void locks_remove_file(struct file *filp) int posix_unblock_lock(struct file_lock
[PATCH 5/5] fs/locks: create a tree of dependent requests.
When we find an existing lock which conflicts with a request, and the request wants to wait, we currently add the request to a list. When the lock is removed, the whole list is woken. This can cause the thundering-herd problem. To reduce the problem, we make use of the (new) fact that a pending request can itself have a list of blocked requests. When we find a conflict, we look through the existing blocked requests. If any one of them blocks the new request, the new request is attached below that request, otherwise it is added to the list of blocked requests, which are now known to be mutually non-conflicting. This way, when the lock is released, only a set of non-conflicting locks will be woken, the rest can stay asleep. If the lock request cannot be granted and the request needs to be requeued, all the other requests it blocks will then be woken Reported-and-tested-by: Martin Wilck Signed-off-by: NeilBrown --- fs/locks.c | 29 +++-- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index c7a372cebff1..af250afceff4 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -727,11 +727,25 @@ static void locks_delete_block(struct file_lock *waiter) * fl_blocked list itself is protected by the blocked_lock_lock, but by ensuring * that the flc_lock is also held on insertions we can avoid taking the * blocked_lock_lock in some cases when we see that the fl_blocked list is empty. + * + * Rather than just adding to the list, we check for conflicts with any existing + * waiters, and add beneath any waiter that blocks the new waiter. + * Thus wakeups don't happen until needed. */ static void __locks_insert_block(struct file_lock *blocker, -struct file_lock *waiter) +struct file_lock *waiter, +bool conflict(struct file_lock *, + struct file_lock *)) { + struct file_lock *fl; BUG_ON(!list_empty(>fl_block)); + +new_blocker: + list_for_each_entry(fl, >fl_blocked, fl_block) + if (conflict(fl, waiter)) { + blocker = fl; + goto new_blocker; + } waiter->fl_blocker = blocker; list_add_tail(>fl_block, >fl_blocked); if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) @@ -746,10 +760,12 @@ static void __locks_insert_block(struct file_lock *blocker, /* Must be called with flc_lock held. */ static void locks_insert_block(struct file_lock *blocker, - struct file_lock *waiter) + struct file_lock *waiter, + bool conflict(struct file_lock *, +struct file_lock *)) { spin_lock(_lock_lock); - __locks_insert_block(blocker, waiter); + __locks_insert_block(blocker, waiter, conflict); spin_unlock(_lock_lock); } @@ -1008,7 +1024,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) if (!(request->fl_flags & FL_SLEEP)) goto out; error = FILE_LOCK_DEFERRED; - locks_insert_block(fl, request); + locks_insert_block(fl, request, flock_locks_conflict); goto out; } if (request->fl_flags & FL_ACCESS) @@ -1082,7 +1098,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, spin_lock(_lock_lock); if (likely(!posix_locks_deadlock(request, fl))) { error = FILE_LOCK_DEFERRED; - __locks_insert_block(fl, request); + __locks_insert_block(fl, request, +posix_locks_conflict); } spin_unlock(_lock_lock); goto out; @@ -1555,7 +1572,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) break_time -= jiffies; if (break_time == 0) break_time++; - locks_insert_block(fl, new_fl); + locks_insert_block(fl, new_fl, leases_conflict); trace_break_lease_block(inode, new_fl); spin_unlock(>flc_lock); percpu_up_read_preempt_enable(_rwsem);
[PATCH 4/5] fs/locks: change all *_conflict() functions to return bool.
posix_locks_conflict() and flock_locks_conflict() both return int. leases_conflict() returns bool. This inconsistency will cause problems for the next patch if not fixed. So change posix_locks_conflict() and flock_locks_conflict() to return bool. Also change the locks_conflict() helper. And convert some return (foo); to return foo; Signed-off-by: NeilBrown --- fs/locks.c | 27 +++ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 9439eebd4eb9..c7a372cebff1 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -803,47 +803,50 @@ locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose) /* Determine if lock sys_fl blocks lock caller_fl. Common functionality * checks for shared/exclusive status of overlapping locks. */ -static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +static bool locks_conflict(struct file_lock *caller_fl, + struct file_lock *sys_fl) { if (sys_fl->fl_type == F_WRLCK) - return 1; + return true; if (caller_fl->fl_type == F_WRLCK) - return 1; - return 0; + return true; + return false; } /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific * checking before calling the locks_conflict(). */ -static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +static bool posix_locks_conflict(struct file_lock *caller_fl, +struct file_lock *sys_fl) { /* POSIX locks owned by the same process do not conflict with * each other. */ if (posix_same_owner(caller_fl, sys_fl)) - return (0); + return false; /* Check whether they overlap */ if (!locks_overlap(caller_fl, sys_fl)) - return 0; + return false; - return (locks_conflict(caller_fl, sys_fl)); + return locks_conflict(caller_fl, sys_fl); } /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific * checking before calling the locks_conflict(). */ -static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) +static bool flock_locks_conflict(struct file_lock *caller_fl, +struct file_lock *sys_fl) { /* FLOCK locks referring to the same filp do not conflict with * each other. */ if (caller_fl->fl_file == sys_fl->fl_file) - return (0); + return false; if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) - return 0; + return false; - return (locks_conflict(caller_fl, sys_fl)); + return locks_conflict(caller_fl, sys_fl); } void
[PATCH 0/5 v2] locks: avoid thundering-herd wake-ups
V2, which added wake_non_conflicts() was more broken than V1 - as Bruce explained there is no transitivity in the blocking relation between locks. So this series takes a simpler approach. It still attached waiters between other waiters as necessary to ensure that: - a waiter is blocked by it's parent (fl->blocker) and all further ancestors, and - the list of waiters on fl_blocked are mutually non-conflicting. When a lock (the root of a tree of requests) is released, only its immediate children (fl_blocked) are woken. When any lock is woken (either because its fl_blocker was released to due to a signal or similar) it with either: - be granted - be aborted - be re-queued beneath some other lock. In the first case tree of blocked locks is moved across to the newly created lock, and the invariants still hold. In the order two cases, the tree or blocked waiters are all detached and woken. Note that this series has not received much testing yet. Original description: If you have a many-core machine, and have many threads all wanting to briefly lock a give file (udev is known to do this), you can get quite poor performance. When one thread releases a lock, it wakes up all other threads that are waiting (classic thundering-herd) - one will get the lock and the others go to sleep. When you have few cores, this is not very noticeable: by the time the 4th or 5th thread gets enough CPU time to try to claim the lock, the earlier threads have claimed it, done what was needed, and released. With 50+ cores, the contention can easily be measured. This patchset creates a tree of pending lock request in which siblings don't conflict and each lock request does conflict with its parent. When a lock is released, only requests which don't conflict with each other a woken. Testing shows that lock-acquisitions-per-second is now fairly stable even as number of contending process goes to 1000. Without this patch, locks-per-second drops off steeply after a few 10s of processes. There is a small cost to this extra complexity. At 20 processes running a particular test on 72 cores, the lock acquisitions per second drops from 1.8 million to 1.4 million with this patch. For 100 processes, this patch still provides 1.4 million while without this patch there are about 700,000. NeilBrown --- NeilBrown (5): fs/locks: rename some lists and pointers. fs/locks: split out __locks_wake_up_blocks(). fs/locks: allow a lock request to block other requests. fs/locks: change all *_conflict() functions to return bool. fs/locks: create a tree of dependent requests. fs/cifs/file.c |2 - fs/locks.c | 156 ++- include/linux/fs.h |7 +- include/trace/events/filelock.h | 16 ++-- 4 files changed, 119 insertions(+), 62 deletions(-) -- Signature
[PATCH 0/5 v2] locks: avoid thundering-herd wake-ups
V2, which added wake_non_conflicts() was more broken than V1 - as Bruce explained there is no transitivity in the blocking relation between locks. So this series takes a simpler approach. It still attached waiters between other waiters as necessary to ensure that: - a waiter is blocked by it's parent (fl->blocker) and all further ancestors, and - the list of waiters on fl_blocked are mutually non-conflicting. When a lock (the root of a tree of requests) is released, only its immediate children (fl_blocked) are woken. When any lock is woken (either because its fl_blocker was released to due to a signal or similar) it with either: - be granted - be aborted - be re-queued beneath some other lock. In the first case tree of blocked locks is moved across to the newly created lock, and the invariants still hold. In the order two cases, the tree or blocked waiters are all detached and woken. Note that this series has not received much testing yet. Original description: If you have a many-core machine, and have many threads all wanting to briefly lock a give file (udev is known to do this), you can get quite poor performance. When one thread releases a lock, it wakes up all other threads that are waiting (classic thundering-herd) - one will get the lock and the others go to sleep. When you have few cores, this is not very noticeable: by the time the 4th or 5th thread gets enough CPU time to try to claim the lock, the earlier threads have claimed it, done what was needed, and released. With 50+ cores, the contention can easily be measured. This patchset creates a tree of pending lock request in which siblings don't conflict and each lock request does conflict with its parent. When a lock is released, only requests which don't conflict with each other a woken. Testing shows that lock-acquisitions-per-second is now fairly stable even as number of contending process goes to 1000. Without this patch, locks-per-second drops off steeply after a few 10s of processes. There is a small cost to this extra complexity. At 20 processes running a particular test on 72 cores, the lock acquisitions per second drops from 1.8 million to 1.4 million with this patch. For 100 processes, this patch still provides 1.4 million while without this patch there are about 700,000. NeilBrown --- NeilBrown (5): fs/locks: rename some lists and pointers. fs/locks: split out __locks_wake_up_blocks(). fs/locks: allow a lock request to block other requests. fs/locks: change all *_conflict() functions to return bool. fs/locks: create a tree of dependent requests. fs/cifs/file.c |2 - fs/locks.c | 156 ++- include/linux/fs.h |7 +- include/trace/events/filelock.h | 16 ++-- 4 files changed, 119 insertions(+), 62 deletions(-) -- Signature
[PATCH 2/5] fs/locks: split out __locks_wake_up_blocks().
This functionality will be useful in future patches, so split it out from locks_wake_up_blocks(). Signed-off-by: NeilBrown --- fs/locks.c | 27 --- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 322491e70e41..de0b9276f23d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -681,6 +681,21 @@ static void __locks_delete_block(struct file_lock *waiter) waiter->fl_blocker = NULL; } +static void __locks_wake_up_blocks(struct file_lock *blocker) +{ + while (!list_empty(>fl_blocked)) { + struct file_lock *waiter; + + waiter = list_first_entry(>fl_blocked, + struct file_lock, fl_block); + __locks_delete_block(waiter); + if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) + waiter->fl_lmops->lm_notify(waiter); + else + wake_up(>fl_wait); + } +} + static void locks_delete_block(struct file_lock *waiter) { spin_lock(_lock_lock); @@ -735,17 +750,7 @@ static void locks_wake_up_blocks(struct file_lock *blocker) return; spin_lock(_lock_lock); - while (!list_empty(>fl_blocked)) { - struct file_lock *waiter; - - waiter = list_first_entry(>fl_blocked, - struct file_lock, fl_block); - __locks_delete_block(waiter); - if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) - waiter->fl_lmops->lm_notify(waiter); - else - wake_up(>fl_wait); - } + __locks_wake_up_blocks(blocker); spin_unlock(_lock_lock); }
[PATCH 1/5] fs/locks: rename some lists and pointers.
struct file lock contains an 'fl_next' pointer which is used to point to the lock that this request is blocked waiting for. So rename it to fl_blocker. The fl_blocked list_head in an active lock is the head of a list of blocked requests. In a request it is a node in that list. These are two distinct uses, so replace with two list_heads with different names. fl_blocked is the head of a list of blocked requests fl_block is a node on that list. The two different list_heads are never used at the same time, but that will change in a future patch. Note that a tracepoint is changed to report fl_blocker instead of fl_next. Signed-off-by: NeilBrown --- fs/cifs/file.c |2 +- fs/locks.c | 40 --- include/linux/fs.h |7 +-- include/trace/events/filelock.h | 16 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8d41ca7bfcf1..066ed2e4ba96 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1092,7 +1092,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock) rc = posix_lock_file(file, flock, NULL); up_write(>lock_sem); if (rc == FILE_LOCK_DEFERRED) { - rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); + rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker); if (!rc) goto try_again; posix_unblock_lock(flock); diff --git a/fs/locks.c b/fs/locks.c index db7b6917d9c5..322491e70e41 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -194,7 +194,7 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); * This lock protects the blocked_hash. Generally, if you're accessing it, you * want to be holding this lock. * - * In addition, it also protects the fl->fl_block list, and the fl->fl_next + * In addition, it also protects the fl->fl_blocked list, and the fl->fl_blocker * pointer for file_lock structures that are acting as lock requests (in * contrast to those that are acting as records of acquired locks). * @@ -203,7 +203,7 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); * protected by this lock, we can skip acquiring it iff we already hold the * flc_lock. * - * In particular, adding an entry to the fl_block list requires that you hold + * In particular, adding an entry to the fl_blocked list requires that you hold * both the flc_lock and the blocked_lock_lock (acquired in that order). * Deleting an entry from the list however only requires the file_lock_lock. */ @@ -302,6 +302,7 @@ static void locks_init_lock_heads(struct file_lock *fl) { INIT_HLIST_NODE(>fl_link); INIT_LIST_HEAD(>fl_list); + INIT_LIST_HEAD(>fl_blocked); INIT_LIST_HEAD(>fl_block); init_waitqueue_head(>fl_wait); } @@ -341,6 +342,7 @@ void locks_free_lock(struct file_lock *fl) { BUG_ON(waitqueue_active(>fl_wait)); BUG_ON(!list_empty(>fl_list)); + BUG_ON(!list_empty(>fl_blocked)); BUG_ON(!list_empty(>fl_block)); BUG_ON(!hlist_unhashed(>fl_link)); @@ -676,7 +678,7 @@ static void __locks_delete_block(struct file_lock *waiter) { locks_delete_global_blocked(waiter); list_del_init(>fl_block); - waiter->fl_next = NULL; + waiter->fl_blocker = NULL; } static void locks_delete_block(struct file_lock *waiter) @@ -692,16 +694,16 @@ static void locks_delete_block(struct file_lock *waiter) * it seems like the reasonable thing to do. * * Must be called with both the flc_lock and blocked_lock_lock held. The - * fl_block list itself is protected by the blocked_lock_lock, but by ensuring + * fl_blocked list itself is protected by the blocked_lock_lock, but by ensuring * that the flc_lock is also held on insertions we can avoid taking the - * blocked_lock_lock in some cases when we see that the fl_block list is empty. + * blocked_lock_lock in some cases when we see that the fl_blocked list is empty. */ static void __locks_insert_block(struct file_lock *blocker, struct file_lock *waiter) { BUG_ON(!list_empty(>fl_block)); - waiter->fl_next = blocker; - list_add_tail(>fl_block, >fl_block); + waiter->fl_blocker = blocker; + list_add_tail(>fl_block, >fl_blocked); if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) locks_insert_global_blocked(waiter); } @@ -725,18 +727,18 @@ static void locks_wake_up_blocks(struct file_lock *blocker) /* * Avoid taking global lock if list is empty. This is safe since new * blocked requests are only added to the list under the flc_lock, and -* the flc_lock is always held here. Note that removal from the fl_block +* the flc_lock is always held here. Note that removal from the fl_blocked * list does not require the flc_lock, so we
[PATCH 2/5] fs/locks: split out __locks_wake_up_blocks().
This functionality will be useful in future patches, so split it out from locks_wake_up_blocks(). Signed-off-by: NeilBrown --- fs/locks.c | 27 --- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 322491e70e41..de0b9276f23d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -681,6 +681,21 @@ static void __locks_delete_block(struct file_lock *waiter) waiter->fl_blocker = NULL; } +static void __locks_wake_up_blocks(struct file_lock *blocker) +{ + while (!list_empty(>fl_blocked)) { + struct file_lock *waiter; + + waiter = list_first_entry(>fl_blocked, + struct file_lock, fl_block); + __locks_delete_block(waiter); + if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) + waiter->fl_lmops->lm_notify(waiter); + else + wake_up(>fl_wait); + } +} + static void locks_delete_block(struct file_lock *waiter) { spin_lock(_lock_lock); @@ -735,17 +750,7 @@ static void locks_wake_up_blocks(struct file_lock *blocker) return; spin_lock(_lock_lock); - while (!list_empty(>fl_blocked)) { - struct file_lock *waiter; - - waiter = list_first_entry(>fl_blocked, - struct file_lock, fl_block); - __locks_delete_block(waiter); - if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) - waiter->fl_lmops->lm_notify(waiter); - else - wake_up(>fl_wait); - } + __locks_wake_up_blocks(blocker); spin_unlock(_lock_lock); }
[PATCH 1/5] fs/locks: rename some lists and pointers.
struct file lock contains an 'fl_next' pointer which is used to point to the lock that this request is blocked waiting for. So rename it to fl_blocker. The fl_blocked list_head in an active lock is the head of a list of blocked requests. In a request it is a node in that list. These are two distinct uses, so replace with two list_heads with different names. fl_blocked is the head of a list of blocked requests fl_block is a node on that list. The two different list_heads are never used at the same time, but that will change in a future patch. Note that a tracepoint is changed to report fl_blocker instead of fl_next. Signed-off-by: NeilBrown --- fs/cifs/file.c |2 +- fs/locks.c | 40 --- include/linux/fs.h |7 +-- include/trace/events/filelock.h | 16 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 8d41ca7bfcf1..066ed2e4ba96 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1092,7 +1092,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock) rc = posix_lock_file(file, flock, NULL); up_write(>lock_sem); if (rc == FILE_LOCK_DEFERRED) { - rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); + rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker); if (!rc) goto try_again; posix_unblock_lock(flock); diff --git a/fs/locks.c b/fs/locks.c index db7b6917d9c5..322491e70e41 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -194,7 +194,7 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); * This lock protects the blocked_hash. Generally, if you're accessing it, you * want to be holding this lock. * - * In addition, it also protects the fl->fl_block list, and the fl->fl_next + * In addition, it also protects the fl->fl_blocked list, and the fl->fl_blocker * pointer for file_lock structures that are acting as lock requests (in * contrast to those that are acting as records of acquired locks). * @@ -203,7 +203,7 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); * protected by this lock, we can skip acquiring it iff we already hold the * flc_lock. * - * In particular, adding an entry to the fl_block list requires that you hold + * In particular, adding an entry to the fl_blocked list requires that you hold * both the flc_lock and the blocked_lock_lock (acquired in that order). * Deleting an entry from the list however only requires the file_lock_lock. */ @@ -302,6 +302,7 @@ static void locks_init_lock_heads(struct file_lock *fl) { INIT_HLIST_NODE(>fl_link); INIT_LIST_HEAD(>fl_list); + INIT_LIST_HEAD(>fl_blocked); INIT_LIST_HEAD(>fl_block); init_waitqueue_head(>fl_wait); } @@ -341,6 +342,7 @@ void locks_free_lock(struct file_lock *fl) { BUG_ON(waitqueue_active(>fl_wait)); BUG_ON(!list_empty(>fl_list)); + BUG_ON(!list_empty(>fl_blocked)); BUG_ON(!list_empty(>fl_block)); BUG_ON(!hlist_unhashed(>fl_link)); @@ -676,7 +678,7 @@ static void __locks_delete_block(struct file_lock *waiter) { locks_delete_global_blocked(waiter); list_del_init(>fl_block); - waiter->fl_next = NULL; + waiter->fl_blocker = NULL; } static void locks_delete_block(struct file_lock *waiter) @@ -692,16 +694,16 @@ static void locks_delete_block(struct file_lock *waiter) * it seems like the reasonable thing to do. * * Must be called with both the flc_lock and blocked_lock_lock held. The - * fl_block list itself is protected by the blocked_lock_lock, but by ensuring + * fl_blocked list itself is protected by the blocked_lock_lock, but by ensuring * that the flc_lock is also held on insertions we can avoid taking the - * blocked_lock_lock in some cases when we see that the fl_block list is empty. + * blocked_lock_lock in some cases when we see that the fl_blocked list is empty. */ static void __locks_insert_block(struct file_lock *blocker, struct file_lock *waiter) { BUG_ON(!list_empty(>fl_block)); - waiter->fl_next = blocker; - list_add_tail(>fl_block, >fl_block); + waiter->fl_blocker = blocker; + list_add_tail(>fl_block, >fl_blocked); if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) locks_insert_global_blocked(waiter); } @@ -725,18 +727,18 @@ static void locks_wake_up_blocks(struct file_lock *blocker) /* * Avoid taking global lock if list is empty. This is safe since new * blocked requests are only added to the list under the flc_lock, and -* the flc_lock is always held here. Note that removal from the fl_block +* the flc_lock is always held here. Note that removal from the fl_blocked * list does not require the flc_lock, so we
Re: [PATCH 6/9] platform: goldfish: pipe: Fail compilation if structs are too large
Hi, thank you for reviewing my patches. I decided to put BUILD_BUG_ON close to places where it is important that these structs fit into a memory page to give some context. Regards, Roman. On Mon, Aug 13, 2018 at 6:48 PM Joe Perches wrote: > > On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote: > > From: Roman Kiryanov > > > > Since the driver provides no workaround prevent in cases if structs do > > no fit into a memory page, it is better to fail complation to find about > > the issue earlt instead of returning errors at runtime. > > Minor earlt/early typo > > > diff --git a/drivers/platform/goldfish/goldfish_pipe.c > > b/drivers/platform/goldfish/goldfish_pipe.c > [] > > @@ -797,9 +798,7 @@ static int goldfish_pipe_device_init(struct > > platform_device *pdev) > >* needs to be contained in a single physical page. The easiest choice > >* is to just allocate a page and place the buffers in it. > >*/ > > - if (WARN_ON(sizeof(*dev->buffers) > PAGE_SIZE)) > > - return -ENOMEM; > > - > > + BUILD_BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE); > > page = (char *)__get_free_page(GFP_KERNEL); > > if (!page) { > > kfree(dev->pipes); > > @@ -842,8 +841,7 @@ static int goldfish_pipe_probe(struct platform_device > > *pdev) > > struct resource *r; > > struct goldfish_pipe_dev *dev = pipe_dev; > > > > - if (WARN_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE)) > > - return -ENOMEM; > > + BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE); > > > > /* not thread safe, but this should not happen */ > > WARN_ON(dev->base != NULL); > > Why separate these BUILD_BUG_ONs into 2 different functions? > > Why not just > BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE); > BUILD_BUG_ON(sizeof(struct goldfish_pipe_dev_buffers) > PAGE_SIZE); >
Re: [PATCH 6/9] platform: goldfish: pipe: Fail compilation if structs are too large
Hi, thank you for reviewing my patches. I decided to put BUILD_BUG_ON close to places where it is important that these structs fit into a memory page to give some context. Regards, Roman. On Mon, Aug 13, 2018 at 6:48 PM Joe Perches wrote: > > On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote: > > From: Roman Kiryanov > > > > Since the driver provides no workaround prevent in cases if structs do > > no fit into a memory page, it is better to fail complation to find about > > the issue earlt instead of returning errors at runtime. > > Minor earlt/early typo > > > diff --git a/drivers/platform/goldfish/goldfish_pipe.c > > b/drivers/platform/goldfish/goldfish_pipe.c > [] > > @@ -797,9 +798,7 @@ static int goldfish_pipe_device_init(struct > > platform_device *pdev) > >* needs to be contained in a single physical page. The easiest choice > >* is to just allocate a page and place the buffers in it. > >*/ > > - if (WARN_ON(sizeof(*dev->buffers) > PAGE_SIZE)) > > - return -ENOMEM; > > - > > + BUILD_BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE); > > page = (char *)__get_free_page(GFP_KERNEL); > > if (!page) { > > kfree(dev->pipes); > > @@ -842,8 +841,7 @@ static int goldfish_pipe_probe(struct platform_device > > *pdev) > > struct resource *r; > > struct goldfish_pipe_dev *dev = pipe_dev; > > > > - if (WARN_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE)) > > - return -ENOMEM; > > + BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE); > > > > /* not thread safe, but this should not happen */ > > WARN_ON(dev->base != NULL); > > Why separate these BUILD_BUG_ONs into 2 different functions? > > Why not just > BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE); > BUILD_BUG_ON(sizeof(struct goldfish_pipe_dev_buffers) > PAGE_SIZE); >
Re: [PATCH 8/9] platform: goldfish: pipe: Replace pr_ with dev_ for logging
Hi Roman, Thank you for the patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.18 next-20180813] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/rkir-google-com/platform-goldfish-pipe-Fix-comments-to-fit-80-columns/20180814-104717 config: i386-randconfig-x008-201832 (attached as .config) compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): drivers/platform//goldfish/goldfish_pipe.c: In function 'goldfish_pipe_read_write': >> drivers/platform//goldfish/goldfish_pipe.c:405:22: error: 'struct >> goldfish_pipe_dev' has no member named 'pdev_dev' pdev_dev = pipe->dev->pdev_dev; ^~ vim +405 drivers/platform//goldfish/goldfish_pipe.c 379 380 static ssize_t goldfish_pipe_read_write(struct file *filp, 381 char __user *buffer, size_t bufflen, int is_write) 382 { 383 struct goldfish_pipe *pipe = filp->private_data; 384 int count = 0, ret = -EINVAL; 385 unsigned long address, address_end, last_page; 386 unsigned int last_page_size; 387 struct device *pdev_dev; 388 389 /* If the emulator already closed the pipe, no need to go further */ 390 if (unlikely(test_bit(BIT_CLOSED_ON_HOST, >flags))) 391 return -EIO; 392 /* Null reads or writes succeeds */ 393 if (unlikely(bufflen == 0)) 394 return 0; 395 /* Check the buffer range for access */ 396 if (unlikely(!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ, 397 buffer, bufflen))) 398 return -EFAULT; 399 400 address = (unsigned long)buffer; 401 address_end = address + bufflen; 402 last_page = (address_end - 1) & PAGE_MASK; 403 last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1; 404 > 405 pdev_dev = pipe->dev->pdev_dev; 406 407 while (address < address_end) { 408 s32 consumed_size; 409 int status; 410 411 ret = transfer_max_buffers(pipe, address, address_end, is_write, 412 last_page, last_page_size, _size, 413 ); 414 if (ret < 0) 415 break; 416 417 if (consumed_size > 0) { 418 /* No matter what's the status, we've transferred 419 * something. 420 */ 421 count += consumed_size; 422 address += consumed_size; 423 } 424 if (status > 0) 425 continue; 426 if (status == 0) { 427 /* EOF */ 428 ret = 0; 429 break; 430 } 431 if (count > 0) { 432 /* 433 * An error occurred, but we already transferred 434 * something on one of the previous iterations. 435 * Just return what we already copied and log this 436 * err. 437 */ 438 if (status != PIPE_ERROR_AGAIN) 439 dev_err_ratelimited(pdev_dev, 440 "goldfish_pipe: backend error %d on %s\n", 441 status, is_write ? "write" : "read"); 442 break; 443 } 444 445 /* 446 * If the error is not PIPE_ERROR_AGAIN, or if we are in 447 * non-blocking mode, just return the error code. 448 */ 449 if (status != PIPE_ERROR_AGAIN || 450 (filp->f_flags & O_NONBLOCK) != 0) { 451 ret = goldfish_pipe_error_convert(status); 452 break; 453 } 454 455 status = wait_for_host_signal(pipe, is_write); 456 if (status < 0) 457 return status; 458 } 459 460 if (count > 0) 461 return count; 462 return ret; 463 } 464 --- 0-DAY kernel test infrastructure
Re: [PATCH 8/9] platform: goldfish: pipe: Replace pr_ with dev_ for logging
Hi Roman, Thank you for the patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.18 next-20180813] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/rkir-google-com/platform-goldfish-pipe-Fix-comments-to-fit-80-columns/20180814-104717 config: i386-randconfig-x008-201832 (attached as .config) compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): drivers/platform//goldfish/goldfish_pipe.c: In function 'goldfish_pipe_read_write': >> drivers/platform//goldfish/goldfish_pipe.c:405:22: error: 'struct >> goldfish_pipe_dev' has no member named 'pdev_dev' pdev_dev = pipe->dev->pdev_dev; ^~ vim +405 drivers/platform//goldfish/goldfish_pipe.c 379 380 static ssize_t goldfish_pipe_read_write(struct file *filp, 381 char __user *buffer, size_t bufflen, int is_write) 382 { 383 struct goldfish_pipe *pipe = filp->private_data; 384 int count = 0, ret = -EINVAL; 385 unsigned long address, address_end, last_page; 386 unsigned int last_page_size; 387 struct device *pdev_dev; 388 389 /* If the emulator already closed the pipe, no need to go further */ 390 if (unlikely(test_bit(BIT_CLOSED_ON_HOST, >flags))) 391 return -EIO; 392 /* Null reads or writes succeeds */ 393 if (unlikely(bufflen == 0)) 394 return 0; 395 /* Check the buffer range for access */ 396 if (unlikely(!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ, 397 buffer, bufflen))) 398 return -EFAULT; 399 400 address = (unsigned long)buffer; 401 address_end = address + bufflen; 402 last_page = (address_end - 1) & PAGE_MASK; 403 last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1; 404 > 405 pdev_dev = pipe->dev->pdev_dev; 406 407 while (address < address_end) { 408 s32 consumed_size; 409 int status; 410 411 ret = transfer_max_buffers(pipe, address, address_end, is_write, 412 last_page, last_page_size, _size, 413 ); 414 if (ret < 0) 415 break; 416 417 if (consumed_size > 0) { 418 /* No matter what's the status, we've transferred 419 * something. 420 */ 421 count += consumed_size; 422 address += consumed_size; 423 } 424 if (status > 0) 425 continue; 426 if (status == 0) { 427 /* EOF */ 428 ret = 0; 429 break; 430 } 431 if (count > 0) { 432 /* 433 * An error occurred, but we already transferred 434 * something on one of the previous iterations. 435 * Just return what we already copied and log this 436 * err. 437 */ 438 if (status != PIPE_ERROR_AGAIN) 439 dev_err_ratelimited(pdev_dev, 440 "goldfish_pipe: backend error %d on %s\n", 441 status, is_write ? "write" : "read"); 442 break; 443 } 444 445 /* 446 * If the error is not PIPE_ERROR_AGAIN, or if we are in 447 * non-blocking mode, just return the error code. 448 */ 449 if (status != PIPE_ERROR_AGAIN || 450 (filp->f_flags & O_NONBLOCK) != 0) { 451 ret = goldfish_pipe_error_convert(status); 452 break; 453 } 454 455 status = wait_for_host_signal(pipe, is_write); 456 if (status < 0) 457 return status; 458 } 459 460 if (count > 0) 461 return count; 462 return ret; 463 } 464 --- 0-DAY kernel test infrastructure
Re: [1/2] dt-bindings: phy: Add stingray usb phy documentation
Hi JC, On Tue, Aug 14, 2018 at 7:24 AM, Jayachandran C wrote: > Hi Srinath, Ray, > > On Fri, Jul 07, 2017 at 06:37:04PM +0530, Srinath Mannam wrote: >> Add DT binding document for stingray usb phy driver. >> >> Signed-off-by: Srinath Mannam >> Reviewed-by: Ray Jui >> Acked-by: Rob Herring > > The Broadcom Vulcan chip (now Cavium ThunderX2) uses the same USB PHY > from what I understand. Our hardware team reports that there are some > failures seen without the a custom PHY driver. > > Do you require this PHY driver on regular operation on Stingray? Or is > this for some specific functionality? I am asking since the issue > reported here is on switching between a USB 3.0 and USB 2.0 devices. We need it for regular operation in stingray. > > Any plans on submitting this driver again? I will submit it ASAP. Regards, Srinath. > > Thanks, > JC
Re: [1/2] dt-bindings: phy: Add stingray usb phy documentation
Hi JC, On Tue, Aug 14, 2018 at 7:24 AM, Jayachandran C wrote: > Hi Srinath, Ray, > > On Fri, Jul 07, 2017 at 06:37:04PM +0530, Srinath Mannam wrote: >> Add DT binding document for stingray usb phy driver. >> >> Signed-off-by: Srinath Mannam >> Reviewed-by: Ray Jui >> Acked-by: Rob Herring > > The Broadcom Vulcan chip (now Cavium ThunderX2) uses the same USB PHY > from what I understand. Our hardware team reports that there are some > failures seen without the a custom PHY driver. > > Do you require this PHY driver on regular operation on Stingray? Or is > this for some specific functionality? I am asking since the issue > reported here is on switching between a USB 3.0 and USB 2.0 devices. We need it for regular operation in stingray. > > Any plans on submitting this driver again? I will submit it ASAP. Regards, Srinath. > > Thanks, > JC
Re: [PATCH] kconfig: fix the rule of mainmenu_stmt symbol
2018-08-09 15:47 GMT+09:00 Masahiro Yamada : > The rule of mainmenu_stmt does not have debug print of zconf_lineno(), > but if it had, it would print a wrong line number for the same reason > as commit b2d00d7c61c8 ("kconfig: fix line numbers for if-entries in > menu tree"). > > The mainmenu_stmt does not need to eat following empty lines because > they are reduced to common_stmt. > > Signed-off-by: Masahiro Yamada > --- Applied to linux-kbuild/kconfig. > scripts/kconfig/zconf.y | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y > index 4b68272..95a4fd3 100644 > --- a/scripts/kconfig/zconf.y > +++ b/scripts/kconfig/zconf.y > @@ -31,7 +31,7 @@ struct symbol *symbol_hash[SYMBOL_HASHSIZE]; > static struct menu *current_menu, *current_entry; > > %} > -%expect 31 > +%expect 30 > > %union > { > @@ -117,7 +117,7 @@ start: mainmenu_stmt stmt_list | stmt_list; > > /* mainmenu entry */ > > -mainmenu_stmt: T_MAINMENU prompt nl > +mainmenu_stmt: T_MAINMENU prompt T_EOL > { > menu_add_prompt(P_MENU, $2, NULL); > }; > -- > 2.7.4 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Best Regards Masahiro Yamada
Re: [PATCH 1/2] kconfig: remove unused sym_get_env_prop() function
2018-08-14 2:20 GMT+09:00 Sam Ravnborg : > On Tue, Aug 14, 2018 at 01:48:38AM +0900, Masahiro Yamada wrote: >> This function is unused since commit 104daea149c4 ("kconfig: reference >> environment variables directly and remove 'option env='"). >> >> Signed-off-by: Masahiro Yamada > > Both patches are obviously correct. > Feel free to add: > Acked-by: Sam Ravnborg > > and apply to kbuild so we can get them in this cycle. > > Sam Applied to linux-kbuild/kconfig with Sam's Ack. -- Best Regards Masahiro Yamada
Re: [PATCH 1/2] kconfig: remove unused sym_get_env_prop() function
2018-08-14 2:20 GMT+09:00 Sam Ravnborg : > On Tue, Aug 14, 2018 at 01:48:38AM +0900, Masahiro Yamada wrote: >> This function is unused since commit 104daea149c4 ("kconfig: reference >> environment variables directly and remove 'option env='"). >> >> Signed-off-by: Masahiro Yamada > > Both patches are obviously correct. > Feel free to add: > Acked-by: Sam Ravnborg > > and apply to kbuild so we can get them in this cycle. > > Sam Applied to linux-kbuild/kconfig with Sam's Ack. -- Best Regards Masahiro Yamada
Re: [PATCH] kconfig: fix the rule of mainmenu_stmt symbol
2018-08-09 15:47 GMT+09:00 Masahiro Yamada : > The rule of mainmenu_stmt does not have debug print of zconf_lineno(), > but if it had, it would print a wrong line number for the same reason > as commit b2d00d7c61c8 ("kconfig: fix line numbers for if-entries in > menu tree"). > > The mainmenu_stmt does not need to eat following empty lines because > they are reduced to common_stmt. > > Signed-off-by: Masahiro Yamada > --- Applied to linux-kbuild/kconfig. > scripts/kconfig/zconf.y | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y > index 4b68272..95a4fd3 100644 > --- a/scripts/kconfig/zconf.y > +++ b/scripts/kconfig/zconf.y > @@ -31,7 +31,7 @@ struct symbol *symbol_hash[SYMBOL_HASHSIZE]; > static struct menu *current_menu, *current_entry; > > %} > -%expect 31 > +%expect 30 > > %union > { > @@ -117,7 +117,7 @@ start: mainmenu_stmt stmt_list | stmt_list; > > /* mainmenu entry */ > > -mainmenu_stmt: T_MAINMENU prompt nl > +mainmenu_stmt: T_MAINMENU prompt T_EOL > { > menu_add_prompt(P_MENU, $2, NULL); > }; > -- > 2.7.4 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kbuild" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Best Regards Masahiro Yamada
Re: linux-next: Signed-off-by missing for commit in the kbuild tree
Stephen, 2018-08-14 6:33 GMT+09:00 Stephen Rothwell : > Hi Masahiro, > > Commit > > bd714f5f14e0 ("Coccinelle: doubletest: reduce side effect false positives") > > is missing a Signed-off-by from its committer. Thanks for catching this. I will fix it. > -- > Cheers, > Stephen Rothwell -- Best Regards Masahiro Yamada
Re: linux-next: Signed-off-by missing for commit in the kbuild tree
Stephen, 2018-08-14 6:33 GMT+09:00 Stephen Rothwell : > Hi Masahiro, > > Commit > > bd714f5f14e0 ("Coccinelle: doubletest: reduce side effect false positives") > > is missing a Signed-off-by from its committer. Thanks for catching this. I will fix it. > -- > Cheers, > Stephen Rothwell -- Best Regards Masahiro Yamada
Re: [PATCH] Support mksh as /bin/sh.
2018-08-14 7:32 GMT+09:00 Kees Cook : > On Mon, Aug 13, 2018 at 3:14 AM, wrote: >> From: Arkadiusz Miśkiewicz >> >> mksh needs space between ! and ( to work properly. Otherwise this >> happens: >> >> + make oldconfig >> scripts/kconfig/conf --oldconfig Kconfig >> ./scripts/clang-version.sh[18]: COPYING: not found >> printf: ‘__clang_major__’: expected a numeric value >> printf: ‘__clang_minor__’: expected a numeric value >> printf: ‘__clang_patchlevel__’: expected a numeric value >> init/Kconfig:24:warning: 'CLANG_VERSION': number is invalid >> >> Signed-off-by: Arkadiusz Miśkiewicz > > Reviewed-by: Kees Cook > > -Kees > I could not find this patch in ML. Arkadiusz, Please resend this patch to linux-kbu...@vger.kernel.org >> --- >> scripts/clang-version.sh | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/scripts/clang-version.sh b/scripts/clang-version.sh >> index dbf0a31eb111..e65fbc3079d4 100755 >> --- a/scripts/clang-version.sh >> +++ b/scripts/clang-version.sh >> @@ -12,7 +12,7 @@ >> >> compiler="$*" >> >> -if !( $compiler --version | grep -q clang) ; then >> +if ! ( $compiler --version | grep -q clang) ; then >> echo 0 >> exit 1 >> fi >> -- >> 2.18.0 >> > > > > -- > Kees Cook > Pixel Security -- Best Regards Masahiro Yamada
Re: [PATCH] Support mksh as /bin/sh.
2018-08-14 7:32 GMT+09:00 Kees Cook : > On Mon, Aug 13, 2018 at 3:14 AM, wrote: >> From: Arkadiusz Miśkiewicz >> >> mksh needs space between ! and ( to work properly. Otherwise this >> happens: >> >> + make oldconfig >> scripts/kconfig/conf --oldconfig Kconfig >> ./scripts/clang-version.sh[18]: COPYING: not found >> printf: ‘__clang_major__’: expected a numeric value >> printf: ‘__clang_minor__’: expected a numeric value >> printf: ‘__clang_patchlevel__’: expected a numeric value >> init/Kconfig:24:warning: 'CLANG_VERSION': number is invalid >> >> Signed-off-by: Arkadiusz Miśkiewicz > > Reviewed-by: Kees Cook > > -Kees > I could not find this patch in ML. Arkadiusz, Please resend this patch to linux-kbu...@vger.kernel.org >> --- >> scripts/clang-version.sh | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/scripts/clang-version.sh b/scripts/clang-version.sh >> index dbf0a31eb111..e65fbc3079d4 100755 >> --- a/scripts/clang-version.sh >> +++ b/scripts/clang-version.sh >> @@ -12,7 +12,7 @@ >> >> compiler="$*" >> >> -if !( $compiler --version | grep -q clang) ; then >> +if ! ( $compiler --version | grep -q clang) ; then >> echo 0 >> exit 1 >> fi >> -- >> 2.18.0 >> > > > > -- > Kees Cook > Pixel Security -- Best Regards Masahiro Yamada
Re: [PATCH v2 2/4] dt-bindings: soc: amlogic: add meson-canvas documentation
2018-08-13 21:07 GMT+02:00 Rob Herring : > On Wed, Aug 08, 2018 at 12:00:09AM +0200, Maxime Jourdan wrote: >> DT bindings doc for amlogic,meson-canvas >> >> Signed-off-by: Maxime Jourdan >> --- >> .../soc/amlogic/amlogic,meson-canvas.txt | 36 +++ >> 1 file changed, 36 insertions(+) >> create mode 100644 >> Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt >> >> diff --git >> a/Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt >> b/Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt >> new file mode 100644 >> index ..5f0351717bee >> --- /dev/null >> +++ b/Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt >> @@ -0,0 +1,36 @@ >> +Amlogic Canvas >> + >> + >> +A canvas is a collection of metadata that describes a pixel buffer. >> +Those metadata include: width, height, phyaddr, wrapping, block mode >> +and endianness. >> + >> +Many IPs within Amlogic SoCs rely on canvas indexes to read/write pixel data >> +rather than use the phy addresses directly. For instance, this is the case >> for >> +the video decoders and the display. >> + >> +Amlogic SoCs have 256 canvas. >> + >> +Device Tree Bindings: >> +- >> + >> +Canvas Provider >> +-- >> + >> +Required properties: >> +- compatible: "amlogic,canvas" >> + >> +Parent node should have the following properties : >> +- compatible: "amlogic,gx-dmc-sysctrl", "syscon", "simple-mfd" > > Is this documented somewhere? One child function is not a reason for an > MFD and child nodes. And child nodes like this with no resources are > unnecessary. > Hi Rob, this was done to follow the same path as other buses on the platform that have sysctrls in order to provide regmaps to the devices. I can see how it's not really necessary here though, would it be okay with you if I turned "canvas" into a simple bus subnode, using __iomem in the device ? >> +- reg: base address and size of the DMC system control register space. >> + >> +Example: >> + >> +sysctrl_DMC: system-controller@0 { >> + compatible = "amlogic,gx-dmc-sysctrl", "syscon", "simple-mfd"; >> + reg = <0x0 0x0 0x0 0x1000>; >> + >> + canvas: canvas-provider@0 { >> + compatible = "amlogic,canvas"; >> + }; >> +}; >> -- >> 2.18.0 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe devicetree" in >> the body of a message to majord...@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 2/4] dt-bindings: soc: amlogic: add meson-canvas documentation
2018-08-13 21:07 GMT+02:00 Rob Herring : > On Wed, Aug 08, 2018 at 12:00:09AM +0200, Maxime Jourdan wrote: >> DT bindings doc for amlogic,meson-canvas >> >> Signed-off-by: Maxime Jourdan >> --- >> .../soc/amlogic/amlogic,meson-canvas.txt | 36 +++ >> 1 file changed, 36 insertions(+) >> create mode 100644 >> Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt >> >> diff --git >> a/Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt >> b/Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt >> new file mode 100644 >> index ..5f0351717bee >> --- /dev/null >> +++ b/Documentation/devicetree/bindings/soc/amlogic/amlogic,meson-canvas.txt >> @@ -0,0 +1,36 @@ >> +Amlogic Canvas >> + >> + >> +A canvas is a collection of metadata that describes a pixel buffer. >> +Those metadata include: width, height, phyaddr, wrapping, block mode >> +and endianness. >> + >> +Many IPs within Amlogic SoCs rely on canvas indexes to read/write pixel data >> +rather than use the phy addresses directly. For instance, this is the case >> for >> +the video decoders and the display. >> + >> +Amlogic SoCs have 256 canvas. >> + >> +Device Tree Bindings: >> +- >> + >> +Canvas Provider >> +-- >> + >> +Required properties: >> +- compatible: "amlogic,canvas" >> + >> +Parent node should have the following properties : >> +- compatible: "amlogic,gx-dmc-sysctrl", "syscon", "simple-mfd" > > Is this documented somewhere? One child function is not a reason for an > MFD and child nodes. And child nodes like this with no resources are > unnecessary. > Hi Rob, this was done to follow the same path as other buses on the platform that have sysctrls in order to provide regmaps to the devices. I can see how it's not really necessary here though, would it be okay with you if I turned "canvas" into a simple bus subnode, using __iomem in the device ? >> +- reg: base address and size of the DMC system control register space. >> + >> +Example: >> + >> +sysctrl_DMC: system-controller@0 { >> + compatible = "amlogic,gx-dmc-sysctrl", "syscon", "simple-mfd"; >> + reg = <0x0 0x0 0x0 0x1000>; >> + >> + canvas: canvas-provider@0 { >> + compatible = "amlogic,canvas"; >> + }; >> +}; >> -- >> 2.18.0 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe devicetree" in >> the body of a message to majord...@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] gcc-plugins: require GCC
2018-08-14 6:29 GMT+09:00 Stefan Agner : > On 13.08.2018 22:18, Kees Cook wrote: >> On Mon, Aug 13, 2018 at 1:10 PM, Kees Cook wrote: >>> On Mon, Aug 13, 2018 at 12:38 AM, Masahiro Yamada >>> wrote: 2018-08-11 18:48 GMT+09:00 Stefan Agner : > Unsurprisingly GCC plugins require GCC as a compiler. This avoids > GCC plugins being selectable when using clang. > > Signed-off-by: Stefan Agner > --- > arch/Kconfig | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/Kconfig b/arch/Kconfig > index 1aa59063f1fd..8c693a837ed7 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -422,7 +422,7 @@ config HAVE_GCC_PLUGINS > > menuconfig GCC_PLUGINS > bool "GCC plugins" > - depends on HAVE_GCC_PLUGINS > + depends on HAVE_GCC_PLUGINS && CC_IS_GCC > depends on PLUGIN_HOSTCC != "" > help > GCC plugins are loadable modules that provide extra features to > the > -- > 2.18.0 > I guess the more correct way is to fix scripts/gcc-plugin.sh This shell script should exit 0 only when GCC plugin is supported. >>> >>> I'm of two minds: we already have the gcc test in Kconfig so we might >>> want to avoid redundant checks, but maybe the script should be a >>> "complete" test. I guess the latter. I will send a patch. >> >> Actually, how about we do the test in Kconfig, but ahead of the >> script? That will reduce redundancy: >> >> diff --git a/arch/Kconfig b/arch/Kconfig >> index 1aa59063f1fd..18f518624e41 100644 >> --- a/arch/Kconfig >> +++ b/arch/Kconfig >> @@ -409,7 +409,7 @@ preferred-plugin-hostcc := $(if-success,[ >> $(gcc-version) -ge 40800 ],$(HOSTCXX), >> >> config PLUGIN_HOSTCC >> string >> - default "$(shell,$(srctree)/scripts/gcc-plugin.sh >> "$(preferred-plugin-hostcc)" "$(HOSTCXX)" "$(CC)")" >> + default "$(shell,$(srctree)/scripts/gcc-plugin.sh >> "$(preferred-plugin-hostcc)" "$(HOSTCXX)" "$(CC)")" if CC_IS_GCC >> help >> Host compiler used to build GCC plugins. This can be $(HOSTCXX), >> $(HOSTCC), or a null string if GCC plugin is unsupported. > > I like this better. A script which is called gcc-plugin.sh should only > be called if gcc is used... > This does not work like that. $(shell,$(srctree)/scripts/gcc-plugin.sh "$(preferred-plugin-hostcc)" "$(HOSTCXX)" "$(CC)") is expanded in the parse stage regardless of CC_IS_GCC. If nobody comes up with a better way, I am fine with this. Reviewed-by: Masahiro Yamada > -- > Stefan > >> >> >> -Kees -- Best Regards Masahiro Yamada
Re: [PATCH] gcc-plugins: require GCC
2018-08-14 6:29 GMT+09:00 Stefan Agner : > On 13.08.2018 22:18, Kees Cook wrote: >> On Mon, Aug 13, 2018 at 1:10 PM, Kees Cook wrote: >>> On Mon, Aug 13, 2018 at 12:38 AM, Masahiro Yamada >>> wrote: 2018-08-11 18:48 GMT+09:00 Stefan Agner : > Unsurprisingly GCC plugins require GCC as a compiler. This avoids > GCC plugins being selectable when using clang. > > Signed-off-by: Stefan Agner > --- > arch/Kconfig | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/Kconfig b/arch/Kconfig > index 1aa59063f1fd..8c693a837ed7 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -422,7 +422,7 @@ config HAVE_GCC_PLUGINS > > menuconfig GCC_PLUGINS > bool "GCC plugins" > - depends on HAVE_GCC_PLUGINS > + depends on HAVE_GCC_PLUGINS && CC_IS_GCC > depends on PLUGIN_HOSTCC != "" > help > GCC plugins are loadable modules that provide extra features to > the > -- > 2.18.0 > I guess the more correct way is to fix scripts/gcc-plugin.sh This shell script should exit 0 only when GCC plugin is supported. >>> >>> I'm of two minds: we already have the gcc test in Kconfig so we might >>> want to avoid redundant checks, but maybe the script should be a >>> "complete" test. I guess the latter. I will send a patch. >> >> Actually, how about we do the test in Kconfig, but ahead of the >> script? That will reduce redundancy: >> >> diff --git a/arch/Kconfig b/arch/Kconfig >> index 1aa59063f1fd..18f518624e41 100644 >> --- a/arch/Kconfig >> +++ b/arch/Kconfig >> @@ -409,7 +409,7 @@ preferred-plugin-hostcc := $(if-success,[ >> $(gcc-version) -ge 40800 ],$(HOSTCXX), >> >> config PLUGIN_HOSTCC >> string >> - default "$(shell,$(srctree)/scripts/gcc-plugin.sh >> "$(preferred-plugin-hostcc)" "$(HOSTCXX)" "$(CC)")" >> + default "$(shell,$(srctree)/scripts/gcc-plugin.sh >> "$(preferred-plugin-hostcc)" "$(HOSTCXX)" "$(CC)")" if CC_IS_GCC >> help >> Host compiler used to build GCC plugins. This can be $(HOSTCXX), >> $(HOSTCC), or a null string if GCC plugin is unsupported. > > I like this better. A script which is called gcc-plugin.sh should only > be called if gcc is used... > This does not work like that. $(shell,$(srctree)/scripts/gcc-plugin.sh "$(preferred-plugin-hostcc)" "$(HOSTCXX)" "$(CC)") is expanded in the parse stage regardless of CC_IS_GCC. If nobody comes up with a better way, I am fine with this. Reviewed-by: Masahiro Yamada > -- > Stefan > >> >> >> -Kees -- Best Regards Masahiro Yamada
Re: [PATCH] md/bcache: Remove NULL check.
On Mon, Aug 13, 2018 at 06:00:11PM +0800, Coly Li wrote: > On 2018/8/13 5:38 PM, Sean Fu wrote: > > Remove unnessesary NULL check before kmem_cache_destroy() in > > drivers/md/bcache/request.c > > > > Signed-off-by: Sean Fu > > Hi Sean, > > A same change is posted in my previous checkpatch fixes series. You may > find it from, > https://git.kernel.org/pub/scm/linux/kernel/git/colyli/linux-bcache.git/?h=for-next > > The identical patch is, > https://git.kernel.org/pub/scm/linux/kernel/git/colyli/linux-bcache.git/commit/?h=for-next=26b39d0732955ecb839d0afe8d6211d70f213384 > Thank you for your reply. Sean Fu > Thanks. > > Coly Li > > > --- > > drivers/md/bcache/request.c | 3 +-- > > 1 file changed, 1 insertion(+), 2 deletions(-) > > > > diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c > > index ae67f5f..0ddee35 100644 > > --- a/drivers/md/bcache/request.c > > +++ b/drivers/md/bcache/request.c > > @@ -1306,8 +1306,7 @@ void bch_flash_dev_request_init(struct bcache_device > > *d) > > > > void bch_request_exit(void) > > { > > - if (bch_search_cache) > > - kmem_cache_destroy(bch_search_cache); > > + kmem_cache_destroy(bch_search_cache); > > } > > > > int __init bch_request_init(void) > > >
Re: [PATCH] md/bcache: Remove NULL check.
On Mon, Aug 13, 2018 at 06:00:11PM +0800, Coly Li wrote: > On 2018/8/13 5:38 PM, Sean Fu wrote: > > Remove unnessesary NULL check before kmem_cache_destroy() in > > drivers/md/bcache/request.c > > > > Signed-off-by: Sean Fu > > Hi Sean, > > A same change is posted in my previous checkpatch fixes series. You may > find it from, > https://git.kernel.org/pub/scm/linux/kernel/git/colyli/linux-bcache.git/?h=for-next > > The identical patch is, > https://git.kernel.org/pub/scm/linux/kernel/git/colyli/linux-bcache.git/commit/?h=for-next=26b39d0732955ecb839d0afe8d6211d70f213384 > Thank you for your reply. Sean Fu > Thanks. > > Coly Li > > > --- > > drivers/md/bcache/request.c | 3 +-- > > 1 file changed, 1 insertion(+), 2 deletions(-) > > > > diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c > > index ae67f5f..0ddee35 100644 > > --- a/drivers/md/bcache/request.c > > +++ b/drivers/md/bcache/request.c > > @@ -1306,8 +1306,7 @@ void bch_flash_dev_request_init(struct bcache_device > > *d) > > > > void bch_request_exit(void) > > { > > - if (bch_search_cache) > > - kmem_cache_destroy(bch_search_cache); > > + kmem_cache_destroy(bch_search_cache); > > } > > > > int __init bch_request_init(void) > > >
[PATCH] arm/mach-at91/pm: Do not double put the device node
Device node iterators put the previous value of the index variable, so an explicit put causes a double put. I detect the issue with the help of Coccinelle. Signed-off-by: zhong jiang --- arch/arm/mach-at91/pm.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 32fae4d..a5ec35f 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -143,15 +143,12 @@ static int at91_pm_config_ws(unsigned int pm_mode, bool set) /* Check if enabled on SHDWC. */ if (wsi->shdwc_mr_bit && !(val & wsi->shdwc_mr_bit)) - goto put_node; + continue; mode |= wsi->pmc_fsmr_bit; if (wsi->set_polarity) polarity |= wsi->pmc_fsmr_bit; } - -put_node: - of_node_put(np); } if (mode) { -- 1.7.12.4
[PATCH] arm/mach-at91/pm: Do not double put the device node
Device node iterators put the previous value of the index variable, so an explicit put causes a double put. I detect the issue with the help of Coccinelle. Signed-off-by: zhong jiang --- arch/arm/mach-at91/pm.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 32fae4d..a5ec35f 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -143,15 +143,12 @@ static int at91_pm_config_ws(unsigned int pm_mode, bool set) /* Check if enabled on SHDWC. */ if (wsi->shdwc_mr_bit && !(val & wsi->shdwc_mr_bit)) - goto put_node; + continue; mode |= wsi->pmc_fsmr_bit; if (wsi->set_polarity) polarity |= wsi->pmc_fsmr_bit; } - -put_node: - of_node_put(np); } if (mode) { -- 1.7.12.4
Re: [PATCH 1/3] arm64: implement ftrace with regs
On Mon, 13 Aug 2018 11:54:06 +0100 Julien Thierry wrote: > > --- a/arch/arm64/Makefile > > +++ b/arch/arm64/Makefile > > @@ -78,6 +78,15 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) > > KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds > > endif > > > > +ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > > + CC_FLAGS_FTRACE := -fpatchable-function-entry=2 > > + KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY > > + ifeq ($(call cc-option,-fpatchable-function-entry=2),) > > +$(warning Cannot use CONFIG_DYNAMIC_FTRACE_WITH_REGS: \ > > + -fpatchable-function-entry not supported by compiler) > > Shouldn't this be an error? The option -fpatchable-function-entry has > been added to the CC_FLAGS_FTRACE, so any call to the compiler is gonna > break anyway. Or am I missing something? I'm guessing this adds a more informative message on that error. One will know why -fpatchable-function-entry was added to the CFLAGS. I'm for more informative error messages being a victim of poor error messages causing me to dig deep into the guts of the build infrastructure to figure out simple issues. -- Steve
Re: [PATCH 1/3] arm64: implement ftrace with regs
On Mon, 13 Aug 2018 11:54:06 +0100 Julien Thierry wrote: > > --- a/arch/arm64/Makefile > > +++ b/arch/arm64/Makefile > > @@ -78,6 +78,15 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) > > KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds > > endif > > > > +ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS > > + CC_FLAGS_FTRACE := -fpatchable-function-entry=2 > > + KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY > > + ifeq ($(call cc-option,-fpatchable-function-entry=2),) > > +$(warning Cannot use CONFIG_DYNAMIC_FTRACE_WITH_REGS: \ > > + -fpatchable-function-entry not supported by compiler) > > Shouldn't this be an error? The option -fpatchable-function-entry has > been added to the CC_FLAGS_FTRACE, so any call to the compiler is gonna > break anyway. Or am I missing something? I'm guessing this adds a more informative message on that error. One will know why -fpatchable-function-entry was added to the CFLAGS. I'm for more informative error messages being a victim of poor error messages causing me to dig deep into the guts of the build infrastructure to figure out simple issues. -- Steve
Re: [1/2] dt-bindings: phy: Add stingray usb phy documentation
Hi Srinath, Ray, On Fri, Jul 07, 2017 at 06:37:04PM +0530, Srinath Mannam wrote: > Add DT binding document for stingray usb phy driver. > > Signed-off-by: Srinath Mannam > Reviewed-by: Ray Jui > Acked-by: Rob Herring The Broadcom Vulcan chip (now Cavium ThunderX2) uses the same USB PHY from what I understand. Our hardware team reports that there are some failures seen without the a custom PHY driver. Do you require this PHY driver on regular operation on Stingray? Or is this for some specific functionality? I am asking since the issue reported here is on switching between a USB 3.0 and USB 2.0 devices. Any plans on submitting this driver again? Thanks, JC
Re: [1/2] dt-bindings: phy: Add stingray usb phy documentation
Hi Srinath, Ray, On Fri, Jul 07, 2017 at 06:37:04PM +0530, Srinath Mannam wrote: > Add DT binding document for stingray usb phy driver. > > Signed-off-by: Srinath Mannam > Reviewed-by: Ray Jui > Acked-by: Rob Herring The Broadcom Vulcan chip (now Cavium ThunderX2) uses the same USB PHY from what I understand. Our hardware team reports that there are some failures seen without the a custom PHY driver. Do you require this PHY driver on regular operation on Stingray? Or is this for some specific functionality? I am asking since the issue reported here is on switching between a USB 3.0 and USB 2.0 devices. Any plans on submitting this driver again? Thanks, JC
Re: [PATCH 6/9] platform: goldfish: pipe: Fail compilation if structs are too large
On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote: > From: Roman Kiryanov > > Since the driver provides no workaround prevent in cases if structs do > no fit into a memory page, it is better to fail complation to find about > the issue earlt instead of returning errors at runtime. Minor earlt/early typo > diff --git a/drivers/platform/goldfish/goldfish_pipe.c > b/drivers/platform/goldfish/goldfish_pipe.c [] > @@ -797,9 +798,7 @@ static int goldfish_pipe_device_init(struct > platform_device *pdev) >* needs to be contained in a single physical page. The easiest choice >* is to just allocate a page and place the buffers in it. >*/ > - if (WARN_ON(sizeof(*dev->buffers) > PAGE_SIZE)) > - return -ENOMEM; > - > + BUILD_BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE); > page = (char *)__get_free_page(GFP_KERNEL); > if (!page) { > kfree(dev->pipes); > @@ -842,8 +841,7 @@ static int goldfish_pipe_probe(struct platform_device > *pdev) > struct resource *r; > struct goldfish_pipe_dev *dev = pipe_dev; > > - if (WARN_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE)) > - return -ENOMEM; > + BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE); > > /* not thread safe, but this should not happen */ > WARN_ON(dev->base != NULL); Why separate these BUILD_BUG_ONs into 2 different functions? Why not just BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE); BUILD_BUG_ON(sizeof(struct goldfish_pipe_dev_buffers) > PAGE_SIZE);
Re: [PATCH 6/9] platform: goldfish: pipe: Fail compilation if structs are too large
On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote: > From: Roman Kiryanov > > Since the driver provides no workaround prevent in cases if structs do > no fit into a memory page, it is better to fail complation to find about > the issue earlt instead of returning errors at runtime. Minor earlt/early typo > diff --git a/drivers/platform/goldfish/goldfish_pipe.c > b/drivers/platform/goldfish/goldfish_pipe.c [] > @@ -797,9 +798,7 @@ static int goldfish_pipe_device_init(struct > platform_device *pdev) >* needs to be contained in a single physical page. The easiest choice >* is to just allocate a page and place the buffers in it. >*/ > - if (WARN_ON(sizeof(*dev->buffers) > PAGE_SIZE)) > - return -ENOMEM; > - > + BUILD_BUG_ON(sizeof(*dev->buffers) > PAGE_SIZE); > page = (char *)__get_free_page(GFP_KERNEL); > if (!page) { > kfree(dev->pipes); > @@ -842,8 +841,7 @@ static int goldfish_pipe_probe(struct platform_device > *pdev) > struct resource *r; > struct goldfish_pipe_dev *dev = pipe_dev; > > - if (WARN_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE)) > - return -ENOMEM; > + BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE); > > /* not thread safe, but this should not happen */ > WARN_ON(dev->base != NULL); Why separate these BUILD_BUG_ONs into 2 different functions? Why not just BUILD_BUG_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE); BUILD_BUG_ON(sizeof(struct goldfish_pipe_dev_buffers) > PAGE_SIZE);
Re: [PATCH 1/2] perf tools: Make check-headers.sh check based on kernel dir
Jiri Olsa writes: > diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh > index ea48aa6f8d19..9d466e853aec 100755 > --- a/tools/perf/check-headers.sh > +++ b/tools/perf/check-headers.sh > @@ -88,6 +88,8 @@ check () { > # differences. > test -d ../../include || exit 0 > > +pushd ../.. > /dev/null > + > # simple diff check > for i in $HEADERS; do >check $i -B This breaks the build when sh is not bash: ./check-headers.sh: 91: ./check-headers.sh: pushd: not found ./check-headers.sh: 107: ./check-headers.sh: popd: not found Makefile.perf:205: recipe for target 'sub-make' failed cheers
Re: [PATCH 1/2] perf tools: Make check-headers.sh check based on kernel dir
Jiri Olsa writes: > diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh > index ea48aa6f8d19..9d466e853aec 100755 > --- a/tools/perf/check-headers.sh > +++ b/tools/perf/check-headers.sh > @@ -88,6 +88,8 @@ check () { > # differences. > test -d ../../include || exit 0 > > +pushd ../.. > /dev/null > + > # simple diff check > for i in $HEADERS; do >check $i -B This breaks the build when sh is not bash: ./check-headers.sh: 91: ./check-headers.sh: pushd: not found ./check-headers.sh: 107: ./check-headers.sh: popd: not found Makefile.perf:205: recipe for target 'sub-make' failed cheers
Re: [PATCH 1/9] platform: goldfish: pipe: Fix comments to fit 80 columns
On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote: > From: Roman Kiryanov > > Some comment lines are longer than 80 symbols. [] > diff --git a/drivers/platform/goldfish/goldfish_pipe.c > b/drivers/platform/goldfish/goldfish_pipe.c [] > @@ -84,7 +84,10 @@ enum PipePollFlags { > PIPE_POLL_HUP = 1 << 2 > }; > > -/* Possible status values used to signal errors - see > goldfish_pipe_error_convert */ > +/* > + * Possible status values used to signal errors - see > + * goldfish_pipe_error_convert If this is to be wrapped at all, and this really doesn't need to be wrapped, it would probably be more sensible as: /* * Possible status values used to signal errors * see: goldfish_pipe_error_convert */
Re: [PATCH 1/9] platform: goldfish: pipe: Fix comments to fit 80 columns
On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote: > From: Roman Kiryanov > > Some comment lines are longer than 80 symbols. [] > diff --git a/drivers/platform/goldfish/goldfish_pipe.c > b/drivers/platform/goldfish/goldfish_pipe.c [] > @@ -84,7 +84,10 @@ enum PipePollFlags { > PIPE_POLL_HUP = 1 << 2 > }; > > -/* Possible status values used to signal errors - see > goldfish_pipe_error_convert */ > +/* > + * Possible status values used to signal errors - see > + * goldfish_pipe_error_convert If this is to be wrapped at all, and this really doesn't need to be wrapped, it would probably be more sensible as: /* * Possible status values used to signal errors * see: goldfish_pipe_error_convert */
Re: [PATCH 8/9] platform: goldfish: pipe: Replace pr_ with dev_ for logging
On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote: > dev_ is preferred if struct device is available. [] > diff --git a/drivers/platform/goldfish/goldfish_pipe.c > b/drivers/platform/goldfish/goldfish_pipe.c [] > @@ -384,6 +384,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, > int count = 0, ret = -EINVAL; > unsigned long address, address_end, last_page; > unsigned int last_page_size; > + struct device *pdev_dev; > > /* If the emulator already closed the pipe, no need to go further */ > if (unlikely(test_bit(BIT_CLOSED_ON_HOST, >flags))) > @@ -401,6 +402,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, > last_page = (address_end - 1) & PAGE_MASK; > last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1; > > + pdev_dev = pipe->dev->pdev_dev; > + > while (address < address_end) { > s32 consumed_size; > int status; > @@ -433,7 +436,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, >* err. >*/ > if (status != PIPE_ERROR_AGAIN) > - pr_info_ratelimited("goldfish_pipe: backend > error %d on %s\n", > + dev_err_ratelimited(pdev_dev, > + "goldfish_pipe: backend error %d on > %s\n", > status, is_write ? "write" : "read"); > break; > } Wouldn't it be simpler to use pipe->dev->pdev_dev here instead of creating and assigning a probably unused use-once pointer? What does the output look like now? Is the "pipe->dev->pdev_dev->name" then "goldfish_pipe: " output useful?
Re: [PATCH 8/9] platform: goldfish: pipe: Replace pr_ with dev_ for logging
On Mon, 2018-08-13 at 16:38 -0700, r...@google.com wrote: > dev_ is preferred if struct device is available. [] > diff --git a/drivers/platform/goldfish/goldfish_pipe.c > b/drivers/platform/goldfish/goldfish_pipe.c [] > @@ -384,6 +384,7 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, > int count = 0, ret = -EINVAL; > unsigned long address, address_end, last_page; > unsigned int last_page_size; > + struct device *pdev_dev; > > /* If the emulator already closed the pipe, no need to go further */ > if (unlikely(test_bit(BIT_CLOSED_ON_HOST, >flags))) > @@ -401,6 +402,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, > last_page = (address_end - 1) & PAGE_MASK; > last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1; > > + pdev_dev = pipe->dev->pdev_dev; > + > while (address < address_end) { > s32 consumed_size; > int status; > @@ -433,7 +436,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, >* err. >*/ > if (status != PIPE_ERROR_AGAIN) > - pr_info_ratelimited("goldfish_pipe: backend > error %d on %s\n", > + dev_err_ratelimited(pdev_dev, > + "goldfish_pipe: backend error %d on > %s\n", > status, is_write ? "write" : "read"); > break; > } Wouldn't it be simpler to use pipe->dev->pdev_dev here instead of creating and assigning a probably unused use-once pointer? What does the output look like now? Is the "pipe->dev->pdev_dev->name" then "goldfish_pipe: " output useful?
[PATCH] get_maintainer: Allow option --mpath to read all files in
There is an external use case for multiple private MAINTAINER style files in a separate directory. Allow it. --mpath has a default of "./MAINTAINERS". The value entered can be either a file or a directory. The behaviors are now: --mpath Read only the specific file as file --mpath Read all files in as files --mpath --find-maintainer-files Recurse through and read all files named MAINTAINERS Signed-off-by: Joe Perches --- scripts/get_maintainer.pl | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 0ebdefe74d5b..c1c088ef1420 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -396,7 +396,12 @@ sub read_all_maintainer_files { if (-d $path) { $path .= '/' if ($path !~ m@/$@); - if ($path eq "${lk_path}MAINTAINERS/") { + if ($find_maintainer_files) { + find( { wanted => \_is_maintainer_file, + preprocess => \_ignore_git, + no_chdir => 1, + }, "$path"); + } else { opendir(DIR, "$path") or die $!; my @files = readdir(DIR); closedir(DIR); @@ -404,12 +409,6 @@ sub read_all_maintainer_files { push(@mfiles, "$path$file") if ($file !~ /^\./); } } - if ($find_maintainer_files) { - find( { wanted => \_is_maintainer_file, - preprocess => \_ignore_git, - no_chdir => 1, - }, "$path"); - } } elsif (-f "$path") { push(@mfiles, "$path"); } else {
[PATCH] get_maintainer: Allow option --mpath to read all files in
There is an external use case for multiple private MAINTAINER style files in a separate directory. Allow it. --mpath has a default of "./MAINTAINERS". The value entered can be either a file or a directory. The behaviors are now: --mpath Read only the specific file as file --mpath Read all files in as files --mpath --find-maintainer-files Recurse through and read all files named MAINTAINERS Signed-off-by: Joe Perches --- scripts/get_maintainer.pl | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 0ebdefe74d5b..c1c088ef1420 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -396,7 +396,12 @@ sub read_all_maintainer_files { if (-d $path) { $path .= '/' if ($path !~ m@/$@); - if ($path eq "${lk_path}MAINTAINERS/") { + if ($find_maintainer_files) { + find( { wanted => \_is_maintainer_file, + preprocess => \_ignore_git, + no_chdir => 1, + }, "$path"); + } else { opendir(DIR, "$path") or die $!; my @files = readdir(DIR); closedir(DIR); @@ -404,12 +409,6 @@ sub read_all_maintainer_files { push(@mfiles, "$path$file") if ($file !~ /^\./); } } - if ($find_maintainer_files) { - find( { wanted => \_is_maintainer_file, - preprocess => \_ignore_git, - no_chdir => 1, - }, "$path"); - } } elsif (-f "$path") { push(@mfiles, "$path"); } else {
Re: [PATCH] zsmalloc: fix linking bug in init_zspage
Hi Minchan, On (08/14/18 09:24), Minchan Kim wrote: > > Any thoughts? > > If we want a refactoring, I'm not against but description said it tiggered > BUG_ON on zs_map_object rarely. That means it should be stable material > and need more description to understand. Please be more specific with > some example. I don't have any BUG_ON on hands. Would be great if zhouxianrong could post some backtraces or more info/explanation. > The reason I'm hesitating is zsmalloc moves ZS_FULL group > when the zspage->inuse is equal to class->objs_per_zspage so I thought > it shouldn't allocate last partial object. Maybe, allocating last partial object does look a bit hacky - it's not a valid object anyway, but I'm not suggesting that we need to change it. Let's hear from zhouxianrong. -ss
Re: [PATCH] zsmalloc: fix linking bug in init_zspage
Hi Minchan, On (08/14/18 09:24), Minchan Kim wrote: > > Any thoughts? > > If we want a refactoring, I'm not against but description said it tiggered > BUG_ON on zs_map_object rarely. That means it should be stable material > and need more description to understand. Please be more specific with > some example. I don't have any BUG_ON on hands. Would be great if zhouxianrong could post some backtraces or more info/explanation. > The reason I'm hesitating is zsmalloc moves ZS_FULL group > when the zspage->inuse is equal to class->objs_per_zspage so I thought > it shouldn't allocate last partial object. Maybe, allocating last partial object does look a bit hacky - it's not a valid object anyway, but I'm not suggesting that we need to change it. Let's hear from zhouxianrong. -ss
Re: [PATCH] fsi: ast: select GENERIC_ALLOCATOR
On Tue, 2018-08-14 at 00:37 +0200, Arnd Bergmann wrote: > In randconfig builds without CONFIG_GENERIC_ALLOCATOR, this driver > fails to link: > > ERROR: "gen_pool_alloc_algo" [drivers/fsi/fsi-master-ast-cf.ko] undefined! > ERROR: "gen_pool_fixed_alloc" [drivers/fsi/fsi-master-ast-cf.ko] undefined! > ERROR: "of_gen_pool_get" [drivers/fsi/fsi-master-ast-cf.ko] undefined! > ERROR: "gen_pool_free" [drivers/fsi/fsi-master-ast-cf.ko] undefined! > > Select the dependency as all other users do. > > Fixes: 6a794a27daca ("fsi: master-ast-cf: Add new FSI master using Aspeed > ColdFire") > Signed-off-by: Arnd Bergmann > --- Thanks, I'll pick it up and send to Greg with a slightly different subject Cheers, Ben. > drivers/fsi/Kconfig | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig > index af3a20dd5aa4..99c99a5d57fe 100644 > --- a/drivers/fsi/Kconfig > +++ b/drivers/fsi/Kconfig > @@ -46,6 +46,7 @@ config FSI_MASTER_AST_CF > tristate "FSI master based on Aspeed ColdFire coprocessor" > depends on GPIOLIB > depends on GPIO_ASPEED > + select GENERIC_ALLOCATOR > ---help--- > This option enables a FSI master using the AST2400 and AST2500 GPIO > lines driven by the internal ColdFire coprocessor. This requires
Re: [PATCH] fsi: ast: select GENERIC_ALLOCATOR
On Tue, 2018-08-14 at 00:37 +0200, Arnd Bergmann wrote: > In randconfig builds without CONFIG_GENERIC_ALLOCATOR, this driver > fails to link: > > ERROR: "gen_pool_alloc_algo" [drivers/fsi/fsi-master-ast-cf.ko] undefined! > ERROR: "gen_pool_fixed_alloc" [drivers/fsi/fsi-master-ast-cf.ko] undefined! > ERROR: "of_gen_pool_get" [drivers/fsi/fsi-master-ast-cf.ko] undefined! > ERROR: "gen_pool_free" [drivers/fsi/fsi-master-ast-cf.ko] undefined! > > Select the dependency as all other users do. > > Fixes: 6a794a27daca ("fsi: master-ast-cf: Add new FSI master using Aspeed > ColdFire") > Signed-off-by: Arnd Bergmann > --- Thanks, I'll pick it up and send to Greg with a slightly different subject Cheers, Ben. > drivers/fsi/Kconfig | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig > index af3a20dd5aa4..99c99a5d57fe 100644 > --- a/drivers/fsi/Kconfig > +++ b/drivers/fsi/Kconfig > @@ -46,6 +46,7 @@ config FSI_MASTER_AST_CF > tristate "FSI master based on Aspeed ColdFire coprocessor" > depends on GPIOLIB > depends on GPIO_ASPEED > + select GENERIC_ALLOCATOR > ---help--- > This option enables a FSI master using the AST2400 and AST2500 GPIO > lines driven by the internal ColdFire coprocessor. This requires
[PATCH v2] mm: migration: fix migration of huge PMD shared pages
The page migration code employs try_to_unmap() to try and unmap the source page. This is accomplished by using rmap_walk to find all vmas where the page is mapped. This search stops when page mapcount is zero. For shared PMD huge pages, the page map count is always 1 no matter the number of mappings. Shared mappings are tracked via the reference count of the PMD page. Therefore, try_to_unmap stops prematurely and does not completely unmap all mappings of the source page. This problem can result is data corruption as writes to the original source page can happen after contents of the page are copied to the target page. Hence, data is lost. This problem was originally seen as DB corruption of shared global areas after a huge page was soft offlined due to ECC memory errors. DB developers noticed they could reproduce the issue by (hotplug) offlining memory used to back huge pages. A simple testcase can reproduce the problem by creating a shared PMD mapping (note that this must be at least PUD_SIZE in size and PUD_SIZE aligned (1GB on x86)), and using migrate_pages() to migrate process pages between nodes while continually writing to the huge pages being migrated. To fix, have the try_to_unmap_one routine check for huge PMD sharing by calling huge_pmd_unshare for hugetlbfs huge pages. If it is a shared mapping it will be 'unshared' which removes the page table entry and drops the reference on the PMD page. After this, flush caches and TLB. Fixes: 39dde65c9940 ("shared page table for hugetlb page") Signed-off-by: Mike Kravetz --- v2: Fixed build issue for !CONFIG_HUGETLB_PAGE and typos in comment include/linux/hugetlb.h | 6 ++ mm/rmap.c | 21 + 2 files changed, 27 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 36fa6a2a82e3..7524663028ec 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -170,6 +170,12 @@ static inline unsigned long hugetlb_total_pages(void) return 0; } +static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, + pte_t *ptep) +{ + return 0; +} + #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n)({ BUG(); 0; }) #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) diff --git a/mm/rmap.c b/mm/rmap.c index 09a799c9aebd..cf2340adad10 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1409,6 +1409,27 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); address = pvmw.address; + /* +* PMDs for hugetlbfs pages could be shared. In this case, +* pages with shared PMDs will have a mapcount of 1 no matter +* how many times they are actually mapped. Map counting for +* PMD sharing is mostly done via the reference count on the +* PMD page itself. If the page we are trying to unmap is a +* hugetlbfs page, attempt to 'unshare' at the PMD level. +* huge_pmd_unshare clears the PUD and adjusts reference +* counting on the PMD page which effectively unmaps the page. +* Take care of flushing cache and TLB for page in this +* specific mapping here. +*/ + if (PageHuge(page) && + huge_pmd_unshare(mm, , pvmw.pte)) { + unsigned long end_add = address + vma_mmu_pagesize(vma); + + flush_cache_range(vma, address, end_add); + flush_tlb_range(vma, address, end_add); + mmu_notifier_invalidate_range(mm, address, end_add); + continue; + } if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) && -- 2.17.1
[PATCH v2] mm: migration: fix migration of huge PMD shared pages
The page migration code employs try_to_unmap() to try and unmap the source page. This is accomplished by using rmap_walk to find all vmas where the page is mapped. This search stops when page mapcount is zero. For shared PMD huge pages, the page map count is always 1 no matter the number of mappings. Shared mappings are tracked via the reference count of the PMD page. Therefore, try_to_unmap stops prematurely and does not completely unmap all mappings of the source page. This problem can result is data corruption as writes to the original source page can happen after contents of the page are copied to the target page. Hence, data is lost. This problem was originally seen as DB corruption of shared global areas after a huge page was soft offlined due to ECC memory errors. DB developers noticed they could reproduce the issue by (hotplug) offlining memory used to back huge pages. A simple testcase can reproduce the problem by creating a shared PMD mapping (note that this must be at least PUD_SIZE in size and PUD_SIZE aligned (1GB on x86)), and using migrate_pages() to migrate process pages between nodes while continually writing to the huge pages being migrated. To fix, have the try_to_unmap_one routine check for huge PMD sharing by calling huge_pmd_unshare for hugetlbfs huge pages. If it is a shared mapping it will be 'unshared' which removes the page table entry and drops the reference on the PMD page. After this, flush caches and TLB. Fixes: 39dde65c9940 ("shared page table for hugetlb page") Signed-off-by: Mike Kravetz --- v2: Fixed build issue for !CONFIG_HUGETLB_PAGE and typos in comment include/linux/hugetlb.h | 6 ++ mm/rmap.c | 21 + 2 files changed, 27 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 36fa6a2a82e3..7524663028ec 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -170,6 +170,12 @@ static inline unsigned long hugetlb_total_pages(void) return 0; } +static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, + pte_t *ptep) +{ + return 0; +} + #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n)({ BUG(); 0; }) #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) diff --git a/mm/rmap.c b/mm/rmap.c index 09a799c9aebd..cf2340adad10 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1409,6 +1409,27 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); address = pvmw.address; + /* +* PMDs for hugetlbfs pages could be shared. In this case, +* pages with shared PMDs will have a mapcount of 1 no matter +* how many times they are actually mapped. Map counting for +* PMD sharing is mostly done via the reference count on the +* PMD page itself. If the page we are trying to unmap is a +* hugetlbfs page, attempt to 'unshare' at the PMD level. +* huge_pmd_unshare clears the PUD and adjusts reference +* counting on the PMD page which effectively unmaps the page. +* Take care of flushing cache and TLB for page in this +* specific mapping here. +*/ + if (PageHuge(page) && + huge_pmd_unshare(mm, , pvmw.pte)) { + unsigned long end_add = address + vma_mmu_pagesize(vma); + + flush_cache_range(vma, address, end_add); + flush_tlb_range(vma, address, end_add); + mmu_notifier_invalidate_range(mm, address, end_add); + continue; + } if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) && -- 2.17.1
[PATCH] android: binder: no outgoing transaction when thread todo has transaction
When a process dies, failed reply is sent to the sender of any transaction queued on a dead thread's todo list. The sender asserts that the received failed reply corresponds to the head of the transaction stack. This assert can fail if the dead thread is allowed to send outgoing transactions when there is already a transaction on its todo list, because this new transaction can end up on the transaction stack of the original sender. The following steps illustrate how this assertion can fail. 1. Thread1 sends txn19 to Thread2 (T1->transaction_stack=txn19, T2->todo+=txn19) 2. Without processing todo list, Thread2 sends txn20 to Thread1 (T1->todo+=txn20, T2->transaction_stack=txn20) 3. T1 processes txn20 on its todo list (T1->transaction_stack=txn20->txn19, T1->todo=) 4. T2 dies, T2->todo cleanup attempts to send failed reply for txn19, but T1->transaction_stack points to txn20 -- assertion failes Step 2. is the incorrect behavior. When there is a transaction on a thread's todo list, this thread should not be able to send any outgoing synchronous transactions. Only the head of the todo list needs to be checked because only threads that are waiting for proc work can directly receive work from another thread, and no work is allowed to be queued on such a thread without waking up the thread. This patch also enforces that a thread is not waiting for proc work when a work is directly enqueued to its todo list. Acked-by: Arve Hjønnevåg Signed-off-by: Sherry Yang --- drivers/android/binder.c | 44 +--- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d58763b6b009..f2081934eb8b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -822,6 +822,7 @@ static void binder_enqueue_deferred_thread_work_ilocked(struct binder_thread *thread, struct binder_work *work) { + WARN_ON(!list_empty(>waiting_thread_node)); binder_enqueue_work_ilocked(work, >todo); } @@ -839,6 +840,7 @@ static void binder_enqueue_thread_work_ilocked(struct binder_thread *thread, struct binder_work *work) { + WARN_ON(!list_empty(>waiting_thread_node)); binder_enqueue_work_ilocked(work, >todo); thread->process_todo = true; } @@ -1270,19 +1272,12 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong, } else node->local_strong_refs++; if (!node->has_strong_ref && target_list) { + struct binder_thread *thread = container_of(target_list, + struct binder_thread, todo); binder_dequeue_work_ilocked(>work); - /* -* Note: this function is the only place where we queue -* directly to a thread->todo without using the -* corresponding binder_enqueue_thread_work() helper -* functions; in this case it's ok to not set the -* process_todo flag, since we know this node work will -* always be followed by other work that starts queue -* processing: in case of synchronous transactions, a -* BR_REPLY or BR_ERROR; in case of oneway -* transactions, a BR_TRANSACTION_COMPLETE. -*/ - binder_enqueue_work_ilocked(>work, target_list); + BUG_ON(>todo != target_list); + binder_enqueue_deferred_thread_work_ilocked(thread, + >work); } } else { if (!internal) @@ -2723,6 +2718,7 @@ static void binder_transaction(struct binder_proc *proc, { int ret; struct binder_transaction *t; + struct binder_work *w; struct binder_work *tcomplete; binder_size_t *offp, *off_end, *off_start; binder_size_t off_min; @@ -2864,6 +2860,29 @@ static void binder_transaction(struct binder_proc *proc, goto err_invalid_target_handle; } binder_inner_proc_lock(proc); + + w = list_first_entry_or_null(>todo, +struct binder_work, entry); + if (!(tr->flags & TF_ONE_WAY) && w && + w->type == BINDER_WORK_TRANSACTION) { + /* +* Do not allow new outgoing transaction from a +* thread that has a transaction at the head of +* its todo list. Only need to check the head +* because binder_select_thread_ilocked picks a +* thread from
[PATCH] android: binder: no outgoing transaction when thread todo has transaction
When a process dies, failed reply is sent to the sender of any transaction queued on a dead thread's todo list. The sender asserts that the received failed reply corresponds to the head of the transaction stack. This assert can fail if the dead thread is allowed to send outgoing transactions when there is already a transaction on its todo list, because this new transaction can end up on the transaction stack of the original sender. The following steps illustrate how this assertion can fail. 1. Thread1 sends txn19 to Thread2 (T1->transaction_stack=txn19, T2->todo+=txn19) 2. Without processing todo list, Thread2 sends txn20 to Thread1 (T1->todo+=txn20, T2->transaction_stack=txn20) 3. T1 processes txn20 on its todo list (T1->transaction_stack=txn20->txn19, T1->todo=) 4. T2 dies, T2->todo cleanup attempts to send failed reply for txn19, but T1->transaction_stack points to txn20 -- assertion failes Step 2. is the incorrect behavior. When there is a transaction on a thread's todo list, this thread should not be able to send any outgoing synchronous transactions. Only the head of the todo list needs to be checked because only threads that are waiting for proc work can directly receive work from another thread, and no work is allowed to be queued on such a thread without waking up the thread. This patch also enforces that a thread is not waiting for proc work when a work is directly enqueued to its todo list. Acked-by: Arve Hjønnevåg Signed-off-by: Sherry Yang --- drivers/android/binder.c | 44 +--- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d58763b6b009..f2081934eb8b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -822,6 +822,7 @@ static void binder_enqueue_deferred_thread_work_ilocked(struct binder_thread *thread, struct binder_work *work) { + WARN_ON(!list_empty(>waiting_thread_node)); binder_enqueue_work_ilocked(work, >todo); } @@ -839,6 +840,7 @@ static void binder_enqueue_thread_work_ilocked(struct binder_thread *thread, struct binder_work *work) { + WARN_ON(!list_empty(>waiting_thread_node)); binder_enqueue_work_ilocked(work, >todo); thread->process_todo = true; } @@ -1270,19 +1272,12 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong, } else node->local_strong_refs++; if (!node->has_strong_ref && target_list) { + struct binder_thread *thread = container_of(target_list, + struct binder_thread, todo); binder_dequeue_work_ilocked(>work); - /* -* Note: this function is the only place where we queue -* directly to a thread->todo without using the -* corresponding binder_enqueue_thread_work() helper -* functions; in this case it's ok to not set the -* process_todo flag, since we know this node work will -* always be followed by other work that starts queue -* processing: in case of synchronous transactions, a -* BR_REPLY or BR_ERROR; in case of oneway -* transactions, a BR_TRANSACTION_COMPLETE. -*/ - binder_enqueue_work_ilocked(>work, target_list); + BUG_ON(>todo != target_list); + binder_enqueue_deferred_thread_work_ilocked(thread, + >work); } } else { if (!internal) @@ -2723,6 +2718,7 @@ static void binder_transaction(struct binder_proc *proc, { int ret; struct binder_transaction *t; + struct binder_work *w; struct binder_work *tcomplete; binder_size_t *offp, *off_end, *off_start; binder_size_t off_min; @@ -2864,6 +2860,29 @@ static void binder_transaction(struct binder_proc *proc, goto err_invalid_target_handle; } binder_inner_proc_lock(proc); + + w = list_first_entry_or_null(>todo, +struct binder_work, entry); + if (!(tr->flags & TF_ONE_WAY) && w && + w->type == BINDER_WORK_TRANSACTION) { + /* +* Do not allow new outgoing transaction from a +* thread that has a transaction at the head of +* its todo list. Only need to check the head +* because binder_select_thread_ilocked picks a +* thread from
Re: [PATCH] zsmalloc: fix linking bug in init_zspage
Hi Sergey, On Mon, Aug 13, 2018 at 07:55:36PM +0900, Sergey Senozhatsky wrote: > On (08/13/18 15:05), Minchan Kim wrote: > > > From: zhouxianrong > > > > > > The last partial object in last subpage of zspage should not be linked > > > in allocation list. Otherwise it could trigger BUG_ON explicitly at > > > function zs_map_object. But it happened rarely. > > > > Could you be more specific? What case did you see the problem? > > Is it a real problem or one founded by review? > [..] > > > Signed-off-by: zhouxianrong > > > --- > > > mm/zsmalloc.c | 2 ++ > > > 1 file changed, 2 insertions(+) > > > > > > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c > > > index 8d87e973a4f5..24dd8da0aa59 100644 > > > --- a/mm/zsmalloc.c > > > +++ b/mm/zsmalloc.c > > > @@ -1040,6 +1040,8 @@ static void init_zspage(struct size_class *class, > > > struct zspage *zspage) > > >* Reset OBJ_TAG_BITS bit to last link to tell > > >* whether it's allocated object or not. > > >*/ > > > + if (off > PAGE_SIZE) > > > + link -= class->size / sizeof(*link); > > > link->next = -1UL << OBJ_TAG_BITS; > > > } > > > kunmap_atomic(vaddr); > > Hmm. This can be a real issue. Unless I'm missing something. > > So... I might be wrong, but the way I see the bug report is: > > When we link objects during zspage init, we do the following: > > while ((off += class->size) < PAGE_SIZE) { > link->next = freeobj++ << OBJ_TAG_BITS; > link += class->size / sizeof(*link); > } > > Note that we increment the link first, link += class->size / sizeof(*link), > and check for the offset only afterwards. So by the time we break out of > the while-loop the link *might* point to the partial object which starts at > the last page of zspage, but *never* ends, because we don't have next_page > in current zspage. So that's why that object should not be linked in, > because it's not a valid allocates object - we simply don't have space > for it anymore. > > zspage [ page 1 ][ page 2 ] > ...link > [..###] > > therefore the last object must be "link - 1" for such cases. > > I think, the following change can also do the trick: > > while ((off + class->size) < PAGE_SIZE) { > link->next = freeobj++ << OBJ_TAG_BITS; > link += class->size / sizeof(*link); > off += class->size; > } > > Once again, I might be wrong on this. > Any thoughts? If we want a refactoring, I'm not against but description said it tiggered BUG_ON on zs_map_object rarely. That means it should be stable material and need more description to understand. Please be more specific with some example. The reason I'm hesitating is zsmalloc moves ZS_FULL group when the zspage->inuse is equal to class->objs_per_zspage so I thought it shouldn't allocate last partial object. Thanks.
Re: [PATCH] zsmalloc: fix linking bug in init_zspage
Hi Sergey, On Mon, Aug 13, 2018 at 07:55:36PM +0900, Sergey Senozhatsky wrote: > On (08/13/18 15:05), Minchan Kim wrote: > > > From: zhouxianrong > > > > > > The last partial object in last subpage of zspage should not be linked > > > in allocation list. Otherwise it could trigger BUG_ON explicitly at > > > function zs_map_object. But it happened rarely. > > > > Could you be more specific? What case did you see the problem? > > Is it a real problem or one founded by review? > [..] > > > Signed-off-by: zhouxianrong > > > --- > > > mm/zsmalloc.c | 2 ++ > > > 1 file changed, 2 insertions(+) > > > > > > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c > > > index 8d87e973a4f5..24dd8da0aa59 100644 > > > --- a/mm/zsmalloc.c > > > +++ b/mm/zsmalloc.c > > > @@ -1040,6 +1040,8 @@ static void init_zspage(struct size_class *class, > > > struct zspage *zspage) > > >* Reset OBJ_TAG_BITS bit to last link to tell > > >* whether it's allocated object or not. > > >*/ > > > + if (off > PAGE_SIZE) > > > + link -= class->size / sizeof(*link); > > > link->next = -1UL << OBJ_TAG_BITS; > > > } > > > kunmap_atomic(vaddr); > > Hmm. This can be a real issue. Unless I'm missing something. > > So... I might be wrong, but the way I see the bug report is: > > When we link objects during zspage init, we do the following: > > while ((off += class->size) < PAGE_SIZE) { > link->next = freeobj++ << OBJ_TAG_BITS; > link += class->size / sizeof(*link); > } > > Note that we increment the link first, link += class->size / sizeof(*link), > and check for the offset only afterwards. So by the time we break out of > the while-loop the link *might* point to the partial object which starts at > the last page of zspage, but *never* ends, because we don't have next_page > in current zspage. So that's why that object should not be linked in, > because it's not a valid allocates object - we simply don't have space > for it anymore. > > zspage [ page 1 ][ page 2 ] > ...link > [..###] > > therefore the last object must be "link - 1" for such cases. > > I think, the following change can also do the trick: > > while ((off + class->size) < PAGE_SIZE) { > link->next = freeobj++ << OBJ_TAG_BITS; > link += class->size / sizeof(*link); > off += class->size; > } > > Once again, I might be wrong on this. > Any thoughts? If we want a refactoring, I'm not against but description said it tiggered BUG_ON on zs_map_object rarely. That means it should be stable material and need more description to understand. Please be more specific with some example. The reason I'm hesitating is zsmalloc moves ZS_FULL group when the zspage->inuse is equal to class->objs_per_zspage so I thought it shouldn't allocate last partial object. Thanks.
Re: [PATCH v8 1/6] Uprobes: Simplify uprobe_register() body
On Mon, 13 Aug 2018 01:56:12 -0700 Srikar Dronamraju wrote: > * Ravi Bangoria [2018-08-09 09:48:51]: > > > Simplify uprobe_register() function body and let __uprobe_register() > > handle everything. Also move dependency functions around to fix build > > failures. > > > > One nit: > s/to fix build/failures/to avoid build failures/ I pulled in this patch with the above update to the change log. > > > > > Signed-off-by: Ravi Bangoria > > --- > > > Acked-by: Srikar Dronamraju Thanks, I added your ack and Song's Reviewed-by tags. -- Steve
Re: [PATCH v8 1/6] Uprobes: Simplify uprobe_register() body
On Mon, 13 Aug 2018 01:56:12 -0700 Srikar Dronamraju wrote: > * Ravi Bangoria [2018-08-09 09:48:51]: > > > Simplify uprobe_register() function body and let __uprobe_register() > > handle everything. Also move dependency functions around to fix build > > failures. > > > > One nit: > s/to fix build/failures/to avoid build failures/ I pulled in this patch with the above update to the change log. > > > > > Signed-off-by: Ravi Bangoria > > --- > > > Acked-by: Srikar Dronamraju Thanks, I added your ack and Song's Reviewed-by tags. -- Steve
Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)
On Mon, 13 Aug 2018 20:01:41 -0400 Steven Rostedt wrote: > On Fri, 10 Aug 2018 23:14:01 -0700 > Song Liu wrote: > > > On Fri, Aug 10, 2018 at 12:58 PM, Steven Rostedt > > wrote: > > > On Thu, 9 Aug 2018 16:38:28 +0200 > > > Oleg Nesterov wrote: > > > > > >> I need to read this (hopefully final) version carefully. I'll try to do > > >> this before next Monday. > > >> > > > > > > Monday may be the opening of the merge window (more likely Sunday). Do > > > you think this is good enough for pushing it in this late in the game, > > > or do you think we should wait another cycle? > > > > > > -- Steve > > > > We (Facebook) have use cases in production that would benefit from this > > work, so > > I would rather see this gets in sooner than later. After a brief > > review (I will more > > careful review before Monday), I think this set is not likely to break > > existing uprobes > > (those w/o ref_ctr). Therefore, I think it is safe to put it in this cycle. > > > > It's still going under review, and the merge window is now open. I'd > prefer that this waits till the next merge window. > > But this said, patch 1&2 look simple enough to include this merge window. I'll pull them in and start testing. -- Steve
Re: [RFC] vruntime updated incorrectly when rt_mutex boots prio?
On 08/07/2018 10:40 AM, 'Todd Kjos' via kernel-team wrote: This issue was discovered on a 4.9-based android device, but the relevant mainline code appears to be the same. The symptom is that over time the some workloads become sluggish resulting in missed frames or sluggishness. It appears to be the same issue described in http://lists.infradead.org/pipermail/linux-arm-kernel/2018-March/567836.html. Here is the scenario: A task is deactivated while still in the fair class. The task is then boosted to RT, so rt_mutex_setprio() is called. This changes the task to RT and calls check_class_changed(), which eventually calls detach_task_cfs_rq(), which is where vruntime_normalized() sees that the task's state is TASK_WAKING, which results in skipping the subtraction of the rq's min_vruntime from the task's vruntime. Later, when the prio is deboosted and the task is moved back to the fair class, the fair rq's min_vruntime is added to the task's vruntime, resulting in vruntime inflation. This was reproduced for me on tip of mainline by using the program at the end of this mail. It was run in a 2 CPU virtualbox VM. Relevant annotated bits of the trace: low-prio thread vruntime is 752ms pi-vruntime-tes-598 [001] d... 520.572459: sched_stat_runtime: comm=pi-vruntime-tes pid=598 runtime=29953 [ns] vruntime=752888705 [ns] low-prio thread waits on a_sem pi-vruntime-tes-598 [001] d... 520.572465: sched_switch: prev_comm=pi-vruntime-tes prev_pid=598 prev_prio=120 prev_state=D ==> next_comm=swapper/1 next_pid=0 next_prio=120 high prio thread finishes wakeup, then sleeps for 1ms -0 [000] dNh. 520.572483: sched_wakeup: comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000 -0 [000] d... 520.572486: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=S ==> next_comm=pi-vruntime-tes next_pid=597 next_prio=19 pi-vruntime-tes-597 [000] d... 520.572498: sched_switch: prev_comm=pi-vruntime-tes prev_pid=597 prev_prio=19 prev_state=D ==> next_comm=swapper/0 next_pid=0 next_prio=120 high prio thread wakes up after 1ms sleep, posts a_sem which starts to wake low-prio thread, then tries to grab pi_mutex, which low-prio thread has -0 [000] d.h. 520.573876: sched_waking: comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000 -0 [000] dNh. 520.573879: sched_wakeup: comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000 -0 [000] d... 520.573887: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=S ==> next_comm=pi-vruntime-tes next_pid=597 next_prio=19 pi-vruntime-tes-597 [000] d... 520.573895: sched_waking: comm=pi-vruntime-tes pid=598 prio=120 target_cpu=001 low-prio thread pid 598 gets pi_mutex priority inheritance, this happens while low-prio thread is in waking state pi-vruntime-tes-597 [000] d... 520.573911: sched_pi_setprio: comm=pi-vruntime-tes pid=598 oldprio=120 newprio=19 high-prio thread sleeps on pi_mutex pi-vruntime-tes-597 [000] d... 520.573919: sched_switch: prev_comm=pi-vruntime-tes prev_pid=597 prev_prio=19 prev_state=D ==> next_comm=swapper/0 next_pid=0 next_prio=120 low-prio thread finishes wakeup -0 [001] dNh. 520.573932: sched_wakeup: comm=pi-vruntime-tes pid=598 prio=19 target_cpu=001 -0 [001] d... 520.573936: sched_switch: prev_comm=swapper/1 prev_pid=0 prev_prio=120 prev_state=S ==> next_comm=pi-vruntime-tes next_pid=598 next_prio=19 low-prio thread releases pi-mutex, loses pi boost, high-prio thread wakes for pi-mutex pi-vruntime-tes-598 [001] d... 520.573946: sched_pi_setprio: comm=pi-vruntime-tes pid=598 oldprio=19 newprio=120 pi-vruntime-tes-598 [001] dN.. 520.573954: sched_waking: comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000 low-prio thread vruntime is 1505ms pi-vruntime-tes-598 [001] dN.. 520.573966: sched_stat_runtime: comm=pi-vruntime-tes pid=598 runtime=20150 [ns] vruntime=1505797560 [ns] The program: /* * Test case for vruntime management during rtmutex priority inheritance * promotion and demotion. * * build with -lpthread */ #define _GNU_SOURCE #include #include #include #include #include #define ERROR_CHECK(x) \ if (x) \ fprintf(stderr, "Error at line %d", __LINE__); pthread_mutex_t pi_mutex; sem_t a_sem; sem_t b_sem; void *rt_thread_func(void *arg) { int policy; int i = 0; cpu_set_t cpuset; CPU_ZERO(); CPU_SET(0, ); ERROR_CHECK(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), )); while(i++ < 100) { sem_wait(_sem); usleep(1000); sem_post(_sem); pthread_mutex_lock(_mutex); pthread_mutex_unlock(_mutex); } } void *low_prio_thread_func(void *arg) { int i = 0; cpu_set_t cpuset; CPU_ZERO();
Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)
On Mon, 13 Aug 2018 20:01:41 -0400 Steven Rostedt wrote: > On Fri, 10 Aug 2018 23:14:01 -0700 > Song Liu wrote: > > > On Fri, Aug 10, 2018 at 12:58 PM, Steven Rostedt > > wrote: > > > On Thu, 9 Aug 2018 16:38:28 +0200 > > > Oleg Nesterov wrote: > > > > > >> I need to read this (hopefully final) version carefully. I'll try to do > > >> this before next Monday. > > >> > > > > > > Monday may be the opening of the merge window (more likely Sunday). Do > > > you think this is good enough for pushing it in this late in the game, > > > or do you think we should wait another cycle? > > > > > > -- Steve > > > > We (Facebook) have use cases in production that would benefit from this > > work, so > > I would rather see this gets in sooner than later. After a brief > > review (I will more > > careful review before Monday), I think this set is not likely to break > > existing uprobes > > (those w/o ref_ctr). Therefore, I think it is safe to put it in this cycle. > > > > It's still going under review, and the merge window is now open. I'd > prefer that this waits till the next merge window. > > But this said, patch 1&2 look simple enough to include this merge window. I'll pull them in and start testing. -- Steve
Re: [RFC] vruntime updated incorrectly when rt_mutex boots prio?
On 08/07/2018 10:40 AM, 'Todd Kjos' via kernel-team wrote: This issue was discovered on a 4.9-based android device, but the relevant mainline code appears to be the same. The symptom is that over time the some workloads become sluggish resulting in missed frames or sluggishness. It appears to be the same issue described in http://lists.infradead.org/pipermail/linux-arm-kernel/2018-March/567836.html. Here is the scenario: A task is deactivated while still in the fair class. The task is then boosted to RT, so rt_mutex_setprio() is called. This changes the task to RT and calls check_class_changed(), which eventually calls detach_task_cfs_rq(), which is where vruntime_normalized() sees that the task's state is TASK_WAKING, which results in skipping the subtraction of the rq's min_vruntime from the task's vruntime. Later, when the prio is deboosted and the task is moved back to the fair class, the fair rq's min_vruntime is added to the task's vruntime, resulting in vruntime inflation. This was reproduced for me on tip of mainline by using the program at the end of this mail. It was run in a 2 CPU virtualbox VM. Relevant annotated bits of the trace: low-prio thread vruntime is 752ms pi-vruntime-tes-598 [001] d... 520.572459: sched_stat_runtime: comm=pi-vruntime-tes pid=598 runtime=29953 [ns] vruntime=752888705 [ns] low-prio thread waits on a_sem pi-vruntime-tes-598 [001] d... 520.572465: sched_switch: prev_comm=pi-vruntime-tes prev_pid=598 prev_prio=120 prev_state=D ==> next_comm=swapper/1 next_pid=0 next_prio=120 high prio thread finishes wakeup, then sleeps for 1ms -0 [000] dNh. 520.572483: sched_wakeup: comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000 -0 [000] d... 520.572486: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=S ==> next_comm=pi-vruntime-tes next_pid=597 next_prio=19 pi-vruntime-tes-597 [000] d... 520.572498: sched_switch: prev_comm=pi-vruntime-tes prev_pid=597 prev_prio=19 prev_state=D ==> next_comm=swapper/0 next_pid=0 next_prio=120 high prio thread wakes up after 1ms sleep, posts a_sem which starts to wake low-prio thread, then tries to grab pi_mutex, which low-prio thread has -0 [000] d.h. 520.573876: sched_waking: comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000 -0 [000] dNh. 520.573879: sched_wakeup: comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000 -0 [000] d... 520.573887: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=S ==> next_comm=pi-vruntime-tes next_pid=597 next_prio=19 pi-vruntime-tes-597 [000] d... 520.573895: sched_waking: comm=pi-vruntime-tes pid=598 prio=120 target_cpu=001 low-prio thread pid 598 gets pi_mutex priority inheritance, this happens while low-prio thread is in waking state pi-vruntime-tes-597 [000] d... 520.573911: sched_pi_setprio: comm=pi-vruntime-tes pid=598 oldprio=120 newprio=19 high-prio thread sleeps on pi_mutex pi-vruntime-tes-597 [000] d... 520.573919: sched_switch: prev_comm=pi-vruntime-tes prev_pid=597 prev_prio=19 prev_state=D ==> next_comm=swapper/0 next_pid=0 next_prio=120 low-prio thread finishes wakeup -0 [001] dNh. 520.573932: sched_wakeup: comm=pi-vruntime-tes pid=598 prio=19 target_cpu=001 -0 [001] d... 520.573936: sched_switch: prev_comm=swapper/1 prev_pid=0 prev_prio=120 prev_state=S ==> next_comm=pi-vruntime-tes next_pid=598 next_prio=19 low-prio thread releases pi-mutex, loses pi boost, high-prio thread wakes for pi-mutex pi-vruntime-tes-598 [001] d... 520.573946: sched_pi_setprio: comm=pi-vruntime-tes pid=598 oldprio=19 newprio=120 pi-vruntime-tes-598 [001] dN.. 520.573954: sched_waking: comm=pi-vruntime-tes pid=597 prio=19 target_cpu=000 low-prio thread vruntime is 1505ms pi-vruntime-tes-598 [001] dN.. 520.573966: sched_stat_runtime: comm=pi-vruntime-tes pid=598 runtime=20150 [ns] vruntime=1505797560 [ns] The program: /* * Test case for vruntime management during rtmutex priority inheritance * promotion and demotion. * * build with -lpthread */ #define _GNU_SOURCE #include #include #include #include #include #define ERROR_CHECK(x) \ if (x) \ fprintf(stderr, "Error at line %d", __LINE__); pthread_mutex_t pi_mutex; sem_t a_sem; sem_t b_sem; void *rt_thread_func(void *arg) { int policy; int i = 0; cpu_set_t cpuset; CPU_ZERO(); CPU_SET(0, ); ERROR_CHECK(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), )); while(i++ < 100) { sem_wait(_sem); usleep(1000); sem_post(_sem); pthread_mutex_lock(_mutex); pthread_mutex_unlock(_mutex); } } void *low_prio_thread_func(void *arg) { int i = 0; cpu_set_t cpuset; CPU_ZERO();
Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)
On Mon, 13 Aug 2018 13:50:19 +0200 Oleg Nesterov wrote: > On 08/13, Ravi Bangoria wrote: > > > > On 08/11/2018 01:27 PM, Song Liu wrote: > > >> + > > >> +static void delayed_uprobe_delete(struct delayed_uprobe *du) > > >> +{ > > >> + if (!du) > > >> + return; > > > Do we really need this check? > > > > Not necessary though, but I would still like to keep it for a safety. > > Heh. I tried to ignore all minor problems in this version, but now that Song > mentioned this unnecessary check... > > Personally I really dislike the checks like this one. > > - It can confuse the reader who will try to understand the purpose > > - it can hide a bug if delayed_uprobe_delete(du) is actually called > with du == NULL. > > IMO, you should either remove it and let the kernel crash (to notice the > problem), or turn it into > > if (WARN_ON(!du)) > return; I'd prefer the more robust WARN_ON(!du) above instead of removing it. -- Steve
Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)
On Mon, 13 Aug 2018 13:50:19 +0200 Oleg Nesterov wrote: > On 08/13, Ravi Bangoria wrote: > > > > On 08/11/2018 01:27 PM, Song Liu wrote: > > >> + > > >> +static void delayed_uprobe_delete(struct delayed_uprobe *du) > > >> +{ > > >> + if (!du) > > >> + return; > > > Do we really need this check? > > > > Not necessary though, but I would still like to keep it for a safety. > > Heh. I tried to ignore all minor problems in this version, but now that Song > mentioned this unnecessary check... > > Personally I really dislike the checks like this one. > > - It can confuse the reader who will try to understand the purpose > > - it can hide a bug if delayed_uprobe_delete(du) is actually called > with du == NULL. > > IMO, you should either remove it and let the kernel crash (to notice the > problem), or turn it into > > if (WARN_ON(!du)) > return; I'd prefer the more robust WARN_ON(!du) above instead of removing it. -- Steve
Re: [PATCH] mm, slub: restore the original intention of prefetch_freepointer()
On Thu, Aug 9, 2018 at 1:52 AM, Vlastimil Babka wrote: > In SLUB, prefetch_freepointer() is used when allocating an object from cache's > freelist, to make sure the next object in the list is cache-hot, since it's > probable it will be allocated soon. > > Commit 2482ddec670f ("mm: add SLUB free list pointer obfuscation") has > unintentionally changed the prefetch in a way where the prefetch is turned to > a > real fetch, and only the next->next pointer is prefetched. In case there is > not > a stream of allocations that would benefit from prefetching, the extra real > fetch might add a useless cache miss to the allocation. Restore the previous > behavior. > > Signed-off-by: Vlastimil Babka > Cc: Kees Cook > Cc: Daniel Micay > Cc: Eric Dumazet > Cc: Christoph Lameter > Cc: Pekka Enberg > Cc: David Rientjes > Cc: Joonsoo Kim > --- > While I don't expect this to be causing the bug at hand, it's worth fixing. > For the bug it might mean that the page fault moves elsewhere. > > mm/slub.c | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-) > > diff --git a/mm/slub.c b/mm/slub.c > index 51258eff4178..ce2b9e5cea77 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -271,8 +271,7 @@ static inline void *get_freepointer(struct kmem_cache *s, > void *object) > > static void prefetch_freepointer(const struct kmem_cache *s, void *object) > { > - if (object) > - prefetch(freelist_dereference(s, object + s->offset)); > + prefetch(object + s->offset); Ah -- gotcha. I think I misunderstood the purpose here. You're not prefetching what is being pointed at, you're literally prefetching what is stored there. That wouldn't require dereferencing the freelist pointer, no. Thanks! Acked-by: Kees Cook > } > > static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) > -- > 2.18.0 > -- Kees Cook Pixel Security
Re: [PATCH] mm, slub: restore the original intention of prefetch_freepointer()
On Thu, Aug 9, 2018 at 1:52 AM, Vlastimil Babka wrote: > In SLUB, prefetch_freepointer() is used when allocating an object from cache's > freelist, to make sure the next object in the list is cache-hot, since it's > probable it will be allocated soon. > > Commit 2482ddec670f ("mm: add SLUB free list pointer obfuscation") has > unintentionally changed the prefetch in a way where the prefetch is turned to > a > real fetch, and only the next->next pointer is prefetched. In case there is > not > a stream of allocations that would benefit from prefetching, the extra real > fetch might add a useless cache miss to the allocation. Restore the previous > behavior. > > Signed-off-by: Vlastimil Babka > Cc: Kees Cook > Cc: Daniel Micay > Cc: Eric Dumazet > Cc: Christoph Lameter > Cc: Pekka Enberg > Cc: David Rientjes > Cc: Joonsoo Kim > --- > While I don't expect this to be causing the bug at hand, it's worth fixing. > For the bug it might mean that the page fault moves elsewhere. > > mm/slub.c | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-) > > diff --git a/mm/slub.c b/mm/slub.c > index 51258eff4178..ce2b9e5cea77 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -271,8 +271,7 @@ static inline void *get_freepointer(struct kmem_cache *s, > void *object) > > static void prefetch_freepointer(const struct kmem_cache *s, void *object) > { > - if (object) > - prefetch(freelist_dereference(s, object + s->offset)); > + prefetch(object + s->offset); Ah -- gotcha. I think I misunderstood the purpose here. You're not prefetching what is being pointed at, you're literally prefetching what is stored there. That wouldn't require dereferencing the freelist pointer, no. Thanks! Acked-by: Kees Cook > } > > static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) > -- > 2.18.0 > -- Kees Cook Pixel Security
Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)
On Fri, 10 Aug 2018 23:14:01 -0700 Song Liu wrote: > On Fri, Aug 10, 2018 at 12:58 PM, Steven Rostedt wrote: > > On Thu, 9 Aug 2018 16:38:28 +0200 > > Oleg Nesterov wrote: > > > >> I need to read this (hopefully final) version carefully. I'll try to do > >> this before next Monday. > >> > > > > Monday may be the opening of the merge window (more likely Sunday). Do > > you think this is good enough for pushing it in this late in the game, > > or do you think we should wait another cycle? > > > > -- Steve > > We (Facebook) have use cases in production that would benefit from this work, > so > I would rather see this gets in sooner than later. After a brief > review (I will more > careful review before Monday), I think this set is not likely to break > existing uprobes > (those w/o ref_ctr). Therefore, I think it is safe to put it in this cycle. > It's still going under review, and the merge window is now open. I'd prefer that this waits till the next merge window. -- Steve
Re: [PATCH v8 3/6] Uprobes: Support SDT markers having reference count (semaphore)
On Fri, 10 Aug 2018 23:14:01 -0700 Song Liu wrote: > On Fri, Aug 10, 2018 at 12:58 PM, Steven Rostedt wrote: > > On Thu, 9 Aug 2018 16:38:28 +0200 > > Oleg Nesterov wrote: > > > >> I need to read this (hopefully final) version carefully. I'll try to do > >> this before next Monday. > >> > > > > Monday may be the opening of the merge window (more likely Sunday). Do > > you think this is good enough for pushing it in this late in the game, > > or do you think we should wait another cycle? > > > > -- Steve > > We (Facebook) have use cases in production that would benefit from this work, > so > I would rather see this gets in sooner than later. After a brief > review (I will more > careful review before Monday), I think this set is not likely to break > existing uprobes > (those w/o ref_ctr). Therefore, I think it is safe to put it in this cycle. > It's still going under review, and the merge window is now open. I'd prefer that this waits till the next merge window. -- Steve
Re: [PATCH] bitfield: avoid gcc-8 -Wint-in-bool-context warning
2018-08-14 7:09 GMT+09:00 Arnd Bergmann : > Passing an enum into FIELD_GET() produces a long but harmless warning on > newer compilers: > > from include/linux/linkage.h:7, > from include/linux/kernel.h:7, > from include/linux/skbuff.h:17, > from include/linux/if_ether.h:23, > from include/linux/etherdevice.h:25, > from drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:63: > drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c: In function > 'iwl_mvm_rx_mpdu_mq': > include/linux/bitfield.h:56:20: error: enum constant in boolean context > [-Werror=int-in-bool-context] >BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero"); \ > ^ > ... > include/linux/bitfield.h:103:3: note: in expansion of macro '__BF_FIELD_CHECK' >__BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ >^~~~ > drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:1025:21: note: in expansion of > macro 'FIELD_GET' > le16_encode_bits(FIELD_GET(IWL_RX_HE_PHY_SIBG_SYM_OR_USER_NUM_MASK, How about fixing the root cause in drivers/net/wireless/intel/iwlwifi/fw/api/rx.h ? #define IWL_RX_HE_PHY_SIBG_SYM_OR_USER_NUM_MASK 0x1eULL enum iwl_rx_he_phy looks really strange. Passing enum to FIELD_GET is odd, so I prefer keeping this warned. > The problem here is that the caller has no idea how the macro gets > expanding, leading to a false-positive. It can be trivially avoided > by doing a comparison against zero. > > This only recently started appearing as the iwlwifi driver was patched > to use FIELD_GET. > > Fixes: 514c30696fbc ("iwlwifi: add support for IEEE802.11ax") > Signed-off-by: Arnd Bergmann > --- > include/linux/bitfield.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h > index 65a6981eef7b..3f1ef4450a7c 100644 > --- a/include/linux/bitfield.h > +++ b/include/linux/bitfield.h > @@ -53,7 +53,7 @@ > ({ \ > BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ > _pfx "mask is not constant"); \ > - BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero");\ > + BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero");\ > BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ > ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ > _pfx "value too large for the field"); \ > -- > 2.18.0 > -- Best Regards Masahiro Yamada
Re: [PATCH] bitfield: avoid gcc-8 -Wint-in-bool-context warning
2018-08-14 7:09 GMT+09:00 Arnd Bergmann : > Passing an enum into FIELD_GET() produces a long but harmless warning on > newer compilers: > > from include/linux/linkage.h:7, > from include/linux/kernel.h:7, > from include/linux/skbuff.h:17, > from include/linux/if_ether.h:23, > from include/linux/etherdevice.h:25, > from drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:63: > drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c: In function > 'iwl_mvm_rx_mpdu_mq': > include/linux/bitfield.h:56:20: error: enum constant in boolean context > [-Werror=int-in-bool-context] >BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero"); \ > ^ > ... > include/linux/bitfield.h:103:3: note: in expansion of macro '__BF_FIELD_CHECK' >__BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ >^~~~ > drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:1025:21: note: in expansion of > macro 'FIELD_GET' > le16_encode_bits(FIELD_GET(IWL_RX_HE_PHY_SIBG_SYM_OR_USER_NUM_MASK, How about fixing the root cause in drivers/net/wireless/intel/iwlwifi/fw/api/rx.h ? #define IWL_RX_HE_PHY_SIBG_SYM_OR_USER_NUM_MASK 0x1eULL enum iwl_rx_he_phy looks really strange. Passing enum to FIELD_GET is odd, so I prefer keeping this warned. > The problem here is that the caller has no idea how the macro gets > expanding, leading to a false-positive. It can be trivially avoided > by doing a comparison against zero. > > This only recently started appearing as the iwlwifi driver was patched > to use FIELD_GET. > > Fixes: 514c30696fbc ("iwlwifi: add support for IEEE802.11ax") > Signed-off-by: Arnd Bergmann > --- > include/linux/bitfield.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h > index 65a6981eef7b..3f1ef4450a7c 100644 > --- a/include/linux/bitfield.h > +++ b/include/linux/bitfield.h > @@ -53,7 +53,7 @@ > ({ \ > BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ > _pfx "mask is not constant"); \ > - BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero");\ > + BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero");\ > BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ > ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ > _pfx "value too large for the field"); \ > -- > 2.18.0 > -- Best Regards Masahiro Yamada
Re: [PATCH v2 1/3] pinctrl: msm: Really mask level interrupts to prevent latching
Hi, On Wed, Jul 25, 2018 at 3:28 PM, Stephen Boyd wrote: > The interrupt controller hardware in this pin controller has two status > enable bits. The first "normal" status enable bit enables or disables > the summary interrupt line being raised when a gpio interrupt triggers > and the "raw" status enable bit allows or prevents the hardware from > latching an interrupt into the status register for a gpio interrupt. > Currently we just toggle the "normal" status enable bit in the mask and > unmask ops so that the summary irq interrupt going to the CPU's > interrupt controller doesn't trigger for the masked gpio interrupt. > > For a level triggered interrupt, the flow would be as follows: the pin > controller sees the interrupt, latches the status into the status > register, raises the summary irq to the CPU, summary irq handler runs > and calls handle_level_irq(), handle_level_irq() masks and acks the gpio > interrupt, the interrupt handler runs, and finally unmask the interrupt. > When the interrupt handler completes, we expect that the interrupt line > level will go back to the deasserted state so the genirq code can unmask > the interrupt without it triggering again. > > If we only mask the interrupt by clearing the "normal" status enable bit > then we'll ack the interrupt but it will continue to show up as pending > in the status register because the raw status bit is enabled, the > hardware hasn't deasserted the line, and thus the asserted state latches > into the status register again. When the hardware deasserts the > interrupt the pin controller still thinks there is a pending unserviced > level interrupt because it latched it earlier. This behavior causes > software to see an extra interrupt for level type interrupts each time > the interrupt is handled. > > Let's fix this by clearing the raw status enable bit for level type > interrupts so that the hardware stops latching the status of the > interrupt after we ack it. We don't do this for edge type interrupts > because it seems that toggling the raw status enable bit for edge type > interrupts causes spurious edge interrupts. > > Cc: Bjorn Andersson > Cc: Doug Anderson > Signed-off-by: Stephen Boyd > --- > > Changes from v1: > - Squashed raw_status_bit write into same write on unmask (Doug >Andersson) > > drivers/pinctrl/qcom/pinctrl-msm.c | 14 ++ > 1 file changed, 14 insertions(+) > > diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c > b/drivers/pinctrl/qcom/pinctrl-msm.c > index 2155a30c282b..3970dc599092 100644 > --- a/drivers/pinctrl/qcom/pinctrl-msm.c > +++ b/drivers/pinctrl/qcom/pinctrl-msm.c > @@ -634,6 +634,19 @@ static void msm_gpio_irq_mask(struct irq_data *d) > raw_spin_lock_irqsave(>lock, flags); > > val = readl(pctrl->regs + g->intr_cfg_reg); > + /* > +* Leaving the RAW_STATUS_EN bit enabled causes level interrupts that > +* are still asserted to re-latch after we ack them. Clear the raw > +* status enable bit too so the interrupt can't even latch into the > +* hardware while it's masked, but only do this for level interrupts > +* because edge interrupts have a problem with the raw status bit > +* toggling and causing spurious interrupts. This whole "spurious interrupts" explanation still seems dodgy. Have you experienced it yourself, or is this looking through some previous commits? As per my comments in v1, I'd still rather the comment state the reason as: it's important to _not_ lose edge interrupts when masked. > +*/ > + if (irqd_get_trigger_type(d) & IRQ_TYPE_LEVEL_MASK) { > + val &= ~BIT(g->intr_raw_status_bit); > + writel(val, pctrl->regs + g->intr_cfg_reg); > + } In v1 you claimed you were going to combine this with the next write (you said you'd combine things in both mask and unmask). ...is there a reason why you didn't? As per my comments in v1 I believe it's safer from a correctness point of view to combine them. -Doug
Re: [PATCH v2 1/3] pinctrl: msm: Really mask level interrupts to prevent latching
Hi, On Wed, Jul 25, 2018 at 3:28 PM, Stephen Boyd wrote: > The interrupt controller hardware in this pin controller has two status > enable bits. The first "normal" status enable bit enables or disables > the summary interrupt line being raised when a gpio interrupt triggers > and the "raw" status enable bit allows or prevents the hardware from > latching an interrupt into the status register for a gpio interrupt. > Currently we just toggle the "normal" status enable bit in the mask and > unmask ops so that the summary irq interrupt going to the CPU's > interrupt controller doesn't trigger for the masked gpio interrupt. > > For a level triggered interrupt, the flow would be as follows: the pin > controller sees the interrupt, latches the status into the status > register, raises the summary irq to the CPU, summary irq handler runs > and calls handle_level_irq(), handle_level_irq() masks and acks the gpio > interrupt, the interrupt handler runs, and finally unmask the interrupt. > When the interrupt handler completes, we expect that the interrupt line > level will go back to the deasserted state so the genirq code can unmask > the interrupt without it triggering again. > > If we only mask the interrupt by clearing the "normal" status enable bit > then we'll ack the interrupt but it will continue to show up as pending > in the status register because the raw status bit is enabled, the > hardware hasn't deasserted the line, and thus the asserted state latches > into the status register again. When the hardware deasserts the > interrupt the pin controller still thinks there is a pending unserviced > level interrupt because it latched it earlier. This behavior causes > software to see an extra interrupt for level type interrupts each time > the interrupt is handled. > > Let's fix this by clearing the raw status enable bit for level type > interrupts so that the hardware stops latching the status of the > interrupt after we ack it. We don't do this for edge type interrupts > because it seems that toggling the raw status enable bit for edge type > interrupts causes spurious edge interrupts. > > Cc: Bjorn Andersson > Cc: Doug Anderson > Signed-off-by: Stephen Boyd > --- > > Changes from v1: > - Squashed raw_status_bit write into same write on unmask (Doug >Andersson) > > drivers/pinctrl/qcom/pinctrl-msm.c | 14 ++ > 1 file changed, 14 insertions(+) > > diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c > b/drivers/pinctrl/qcom/pinctrl-msm.c > index 2155a30c282b..3970dc599092 100644 > --- a/drivers/pinctrl/qcom/pinctrl-msm.c > +++ b/drivers/pinctrl/qcom/pinctrl-msm.c > @@ -634,6 +634,19 @@ static void msm_gpio_irq_mask(struct irq_data *d) > raw_spin_lock_irqsave(>lock, flags); > > val = readl(pctrl->regs + g->intr_cfg_reg); > + /* > +* Leaving the RAW_STATUS_EN bit enabled causes level interrupts that > +* are still asserted to re-latch after we ack them. Clear the raw > +* status enable bit too so the interrupt can't even latch into the > +* hardware while it's masked, but only do this for level interrupts > +* because edge interrupts have a problem with the raw status bit > +* toggling and causing spurious interrupts. This whole "spurious interrupts" explanation still seems dodgy. Have you experienced it yourself, or is this looking through some previous commits? As per my comments in v1, I'd still rather the comment state the reason as: it's important to _not_ lose edge interrupts when masked. > +*/ > + if (irqd_get_trigger_type(d) & IRQ_TYPE_LEVEL_MASK) { > + val &= ~BIT(g->intr_raw_status_bit); > + writel(val, pctrl->regs + g->intr_cfg_reg); > + } In v1 you claimed you were going to combine this with the next write (you said you'd combine things in both mask and unmask). ...is there a reason why you didn't? As per my comments in v1 I believe it's safer from a correctness point of view to combine them. -Doug
[PATCH 9/9] platform: goldfish: pipe: Add a blank line to separate struct members
From: Roman Kiryanov To improve readability and to be consistent with other struct members. Signed-off-by: Roman Kiryanov --- drivers/platform/goldfish/goldfish_pipe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 3f8e440d62ae..d0621da2c334 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -157,6 +157,7 @@ struct goldfish_pipe { /* A wake queue for sleeping until host signals an event */ wait_queue_head_t wake_queue; + /* Pointer to the parent goldfish_pipe_dev instance */ struct goldfish_pipe_dev *dev; }; -- 2.18.0.865.gffc8e1a3cd6-goog
[PATCH 9/9] platform: goldfish: pipe: Add a blank line to separate struct members
From: Roman Kiryanov To improve readability and to be consistent with other struct members. Signed-off-by: Roman Kiryanov --- drivers/platform/goldfish/goldfish_pipe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 3f8e440d62ae..d0621da2c334 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -157,6 +157,7 @@ struct goldfish_pipe { /* A wake queue for sleeping until host signals an event */ wait_queue_head_t wake_queue; + /* Pointer to the parent goldfish_pipe_dev instance */ struct goldfish_pipe_dev *dev; }; -- 2.18.0.865.gffc8e1a3cd6-goog
[PATCH 4/9] platform: goldfish: pipe: Separate the host interface to a separate header
From: Roman Kiryanov These are several enums that must kept in sync with the host side. This change explicitly separates them into a dedicated header file. Signed-off-by: Roman Kiryanov --- drivers/platform/goldfish/goldfish_pipe.c | 69 +-- .../platform/goldfish/goldfish_pipe_qemu.h| 112 ++ 2 files changed, 113 insertions(+), 68 deletions(-) create mode 100644 drivers/platform/goldfish/goldfish_pipe_qemu.h diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 0607897c6a59..8f9580454154 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -63,6 +63,7 @@ #include #include #include +#include "goldfish_pipe_qemu.h" /* * Update this when something changes in the driver's behavior so the host @@ -73,74 +74,6 @@ enum { PIPE_CURRENT_DEVICE_VERSION = 2 }; -/* - * IMPORTANT: The following constants must match the ones used and defined - * in external/qemu/hw/goldfish_pipe.c in the Android source tree. - */ - -/* List of bitflags returned in status of CMD_POLL command */ -enum PipePollFlags { - PIPE_POLL_IN= 1 << 0, - PIPE_POLL_OUT = 1 << 1, - PIPE_POLL_HUP = 1 << 2 -}; - -/* - * Possible status values used to signal errors - see - * goldfish_pipe_error_convert - */ -enum PipeErrors { - PIPE_ERROR_INVAL = -1, - PIPE_ERROR_AGAIN = -2, - PIPE_ERROR_NOMEM = -3, - PIPE_ERROR_IO = -4 -}; - -/* Bit-flags used to signal events from the emulator */ -enum PipeWakeFlags { - PIPE_WAKE_CLOSED = 1 << 0, /* emulator closed pipe */ - PIPE_WAKE_READ = 1 << 1, /* pipe can now be read from */ - PIPE_WAKE_WRITE = 1 << 2 /* pipe can now be written to */ -}; - -/* Bit flags for the 'flags' field */ -enum PipeFlagsBits { - BIT_CLOSED_ON_HOST = 0, /* pipe closed by host */ - BIT_WAKE_ON_WRITE = 1, /* want to be woken on writes */ - BIT_WAKE_ON_READ = 2, /* want to be woken on reads */ -}; - -enum PipeRegs { - PIPE_REG_CMD = 0, - - PIPE_REG_SIGNAL_BUFFER_HIGH = 4, - PIPE_REG_SIGNAL_BUFFER = 8, - PIPE_REG_SIGNAL_BUFFER_COUNT = 12, - - PIPE_REG_OPEN_BUFFER_HIGH = 20, - PIPE_REG_OPEN_BUFFER = 24, - - PIPE_REG_VERSION = 36, - - PIPE_REG_GET_SIGNALLED = 48, -}; - -enum PipeCmdCode { - PIPE_CMD_OPEN = 1, /* to be used by the pipe device itself */ - PIPE_CMD_CLOSE, - PIPE_CMD_POLL, - PIPE_CMD_WRITE, - PIPE_CMD_WAKE_ON_WRITE, - PIPE_CMD_READ, - PIPE_CMD_WAKE_ON_READ, - - /* -* TODO(zyy): implement a deferred read/write execution to allow -* parallel processing of pipe operations on the host. -*/ - PIPE_CMD_WAKE_ON_DONE_IO, -}; - enum { MAX_BUFFERS_PER_COMMAND = 336, MAX_SIGNALLED_PIPES = 64, diff --git a/drivers/platform/goldfish/goldfish_pipe_qemu.h b/drivers/platform/goldfish/goldfish_pipe_qemu.h new file mode 100644 index ..51f3d9f82af6 --- /dev/null +++ b/drivers/platform/goldfish/goldfish_pipe_qemu.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* + * IMPORTANT: The following constants must match the ones used and defined in + * external/qemu/include/hw/misc/goldfish_pipe.h + */ + +#ifndef ANDROID_PIPE_COMMON_H +#define ANDROID_PIPE_COMMON_H + +/* List of bitflags returned in status of CMD_POLL command */ +enum PipePollFlags { + PIPE_POLL_IN= 1 << 0, + PIPE_POLL_OUT = 1 << 1, + PIPE_POLL_HUP = 1 << 2 +}; + +/* Possible status values used to signal errors */ +enum PipeErrors { + PIPE_ERROR_INVAL= -1, + PIPE_ERROR_AGAIN= -2, + PIPE_ERROR_NOMEM= -3, + PIPE_ERROR_IO = -4 +}; + +/* Bit-flags used to signal events from the emulator */ +enum PipeWakeFlags { + /* emulator closed pipe */ + PIPE_WAKE_CLOSED= 1 << 0, + + /* pipe can now be read from */ + PIPE_WAKE_READ = 1 << 1, + + /* pipe can now be written to */ + PIPE_WAKE_WRITE = 1 << 2, + + /* unlock this pipe's DMA buffer */ + PIPE_WAKE_UNLOCK_DMA= 1 << 3, + + /* unlock DMA buffer of the pipe shared to this pipe */ + PIPE_WAKE_UNLOCK_DMA_SHARED = 1 << 4, +}; + +/* Possible pipe closing reasons */ +enum PipeCloseReason { + /* guest sent a
[PATCH 5/9] platform: goldfish: pipe: Update the comment for GFP_ATOMIC
From: Roman Kiryanov Provide an explanation why GFP_ATOMIC is needed to prevent changing it to other values. Signed-off-by: Roman Kiryanov --- drivers/platform/goldfish/goldfish_pipe.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 8f9580454154..a4db4d12b09c 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -639,7 +639,10 @@ static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev) return id; { - /* Reallocate the array */ + /* Reallocate the array. +* Since get_free_pipe_id_locked runs with interrupts disabled, +* we don't want to make calls that could lead to sleep. +*/ u32 new_capacity = 2 * dev->pipes_capacity; struct goldfish_pipe **pipes = kcalloc(new_capacity, sizeof(*pipes), GFP_ATOMIC); -- 2.18.0.865.gffc8e1a3cd6-goog
[PATCH 4/9] platform: goldfish: pipe: Separate the host interface to a separate header
From: Roman Kiryanov These are several enums that must kept in sync with the host side. This change explicitly separates them into a dedicated header file. Signed-off-by: Roman Kiryanov --- drivers/platform/goldfish/goldfish_pipe.c | 69 +-- .../platform/goldfish/goldfish_pipe_qemu.h| 112 ++ 2 files changed, 113 insertions(+), 68 deletions(-) create mode 100644 drivers/platform/goldfish/goldfish_pipe_qemu.h diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 0607897c6a59..8f9580454154 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -63,6 +63,7 @@ #include #include #include +#include "goldfish_pipe_qemu.h" /* * Update this when something changes in the driver's behavior so the host @@ -73,74 +74,6 @@ enum { PIPE_CURRENT_DEVICE_VERSION = 2 }; -/* - * IMPORTANT: The following constants must match the ones used and defined - * in external/qemu/hw/goldfish_pipe.c in the Android source tree. - */ - -/* List of bitflags returned in status of CMD_POLL command */ -enum PipePollFlags { - PIPE_POLL_IN= 1 << 0, - PIPE_POLL_OUT = 1 << 1, - PIPE_POLL_HUP = 1 << 2 -}; - -/* - * Possible status values used to signal errors - see - * goldfish_pipe_error_convert - */ -enum PipeErrors { - PIPE_ERROR_INVAL = -1, - PIPE_ERROR_AGAIN = -2, - PIPE_ERROR_NOMEM = -3, - PIPE_ERROR_IO = -4 -}; - -/* Bit-flags used to signal events from the emulator */ -enum PipeWakeFlags { - PIPE_WAKE_CLOSED = 1 << 0, /* emulator closed pipe */ - PIPE_WAKE_READ = 1 << 1, /* pipe can now be read from */ - PIPE_WAKE_WRITE = 1 << 2 /* pipe can now be written to */ -}; - -/* Bit flags for the 'flags' field */ -enum PipeFlagsBits { - BIT_CLOSED_ON_HOST = 0, /* pipe closed by host */ - BIT_WAKE_ON_WRITE = 1, /* want to be woken on writes */ - BIT_WAKE_ON_READ = 2, /* want to be woken on reads */ -}; - -enum PipeRegs { - PIPE_REG_CMD = 0, - - PIPE_REG_SIGNAL_BUFFER_HIGH = 4, - PIPE_REG_SIGNAL_BUFFER = 8, - PIPE_REG_SIGNAL_BUFFER_COUNT = 12, - - PIPE_REG_OPEN_BUFFER_HIGH = 20, - PIPE_REG_OPEN_BUFFER = 24, - - PIPE_REG_VERSION = 36, - - PIPE_REG_GET_SIGNALLED = 48, -}; - -enum PipeCmdCode { - PIPE_CMD_OPEN = 1, /* to be used by the pipe device itself */ - PIPE_CMD_CLOSE, - PIPE_CMD_POLL, - PIPE_CMD_WRITE, - PIPE_CMD_WAKE_ON_WRITE, - PIPE_CMD_READ, - PIPE_CMD_WAKE_ON_READ, - - /* -* TODO(zyy): implement a deferred read/write execution to allow -* parallel processing of pipe operations on the host. -*/ - PIPE_CMD_WAKE_ON_DONE_IO, -}; - enum { MAX_BUFFERS_PER_COMMAND = 336, MAX_SIGNALLED_PIPES = 64, diff --git a/drivers/platform/goldfish/goldfish_pipe_qemu.h b/drivers/platform/goldfish/goldfish_pipe_qemu.h new file mode 100644 index ..51f3d9f82af6 --- /dev/null +++ b/drivers/platform/goldfish/goldfish_pipe_qemu.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* + * IMPORTANT: The following constants must match the ones used and defined in + * external/qemu/include/hw/misc/goldfish_pipe.h + */ + +#ifndef ANDROID_PIPE_COMMON_H +#define ANDROID_PIPE_COMMON_H + +/* List of bitflags returned in status of CMD_POLL command */ +enum PipePollFlags { + PIPE_POLL_IN= 1 << 0, + PIPE_POLL_OUT = 1 << 1, + PIPE_POLL_HUP = 1 << 2 +}; + +/* Possible status values used to signal errors */ +enum PipeErrors { + PIPE_ERROR_INVAL= -1, + PIPE_ERROR_AGAIN= -2, + PIPE_ERROR_NOMEM= -3, + PIPE_ERROR_IO = -4 +}; + +/* Bit-flags used to signal events from the emulator */ +enum PipeWakeFlags { + /* emulator closed pipe */ + PIPE_WAKE_CLOSED= 1 << 0, + + /* pipe can now be read from */ + PIPE_WAKE_READ = 1 << 1, + + /* pipe can now be written to */ + PIPE_WAKE_WRITE = 1 << 2, + + /* unlock this pipe's DMA buffer */ + PIPE_WAKE_UNLOCK_DMA= 1 << 3, + + /* unlock DMA buffer of the pipe shared to this pipe */ + PIPE_WAKE_UNLOCK_DMA_SHARED = 1 << 4, +}; + +/* Possible pipe closing reasons */ +enum PipeCloseReason { + /* guest sent a
[PATCH 5/9] platform: goldfish: pipe: Update the comment for GFP_ATOMIC
From: Roman Kiryanov Provide an explanation why GFP_ATOMIC is needed to prevent changing it to other values. Signed-off-by: Roman Kiryanov --- drivers/platform/goldfish/goldfish_pipe.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 8f9580454154..a4db4d12b09c 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -639,7 +639,10 @@ static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev) return id; { - /* Reallocate the array */ + /* Reallocate the array. +* Since get_free_pipe_id_locked runs with interrupts disabled, +* we don't want to make calls that could lead to sleep. +*/ u32 new_capacity = 2 * dev->pipes_capacity; struct goldfish_pipe **pipes = kcalloc(new_capacity, sizeof(*pipes), GFP_ATOMIC); -- 2.18.0.865.gffc8e1a3cd6-goog