date:20170103

[PATCH v2] soc: ti: Drop wait from wkup_m3_rproc_boot_thread

2017-01-03 Thread Sarangdhar Joshi

The function wkup_m3_rproc_boot_thread waits for
asynchronous firmware loading to parse the resource table
before calling rproc_boot(). However, as the resource table
parsing has been moved to rproc_boot(), there's no need to
wait for the asynchronous firmware loading completion.
So, drop this.

CC: Dave Gerlach 
CC: Suman Anna 
CC: Bjorn Andersson 
Signed-off-by: Sarangdhar Joshi 
---

This patch seems to be doing an independent clean up now. Hence
removing it from the series.

 drivers/soc/ti/wkup_m3_ipc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c
index 8823cc8..8bfa44b 100644
--- a/drivers/soc/ti/wkup_m3_ipc.c
+++ b/drivers/soc/ti/wkup_m3_ipc.c
@@ -370,8 +370,6 @@ static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc 
*m3_ipc)
struct device *dev = m3_ipc->dev;
int ret;
 
-   wait_for_completion(&m3_ipc->rproc->firmware_loading_complete);
-
init_completion(&m3_ipc->sync_complete);
 
ret = rproc_boot(m3_ipc->rproc);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

Re: [PATCH 3/4] watchdog: iTCO_wdt: Use pdev for platform device and pci_dev for pci device

2017-01-03 Thread Andy Shevchenko

On Wed, Jan 4, 2017 at 1:40 AM, Guenter Roeck  wrote:
> On Wed, Jan 04, 2017 at 12:39:59AM +0200, Andy Shevchenko wrote:
>> On Tue, Jan 3, 2017 at 4:39 PM, Guenter Roeck  wrote:
>> > Use pdev for struct platform_device, pcidev for struct pci_dev, and dev
>> > for struct device variables to improve consistency.
>> >
>> > Remove 'struct platform_device *dev;' from struct iTCO_wdt_private since
>> > it was unused.
>>
>> Would pci_dev work?
>>
> Sure, or maybe just 'pci'. Any preference ?

Just slightly prefer pci_dev over others (pcidev, pci), but do not
insist. Up to you.

P.S. Matt is not working for Intel anymore. I would recommend to Cc to
Mika instead.

>
> Thanks,
> Guenter
>
>> In any case
>> Reviewed-by: Andy Shevchenko 
>>
>> >
>> > Signed-off-by: Guenter Roeck 
>> > ---
>> >  drivers/watchdog/iTCO_wdt.c | 53 
>> > ++---
>> >  1 file changed, 26 insertions(+), 27 deletions(-)
>> >
>> > diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
>> > index eed1dee6de19..ad29ae03a30b 100644
>> > --- a/drivers/watchdog/iTCO_wdt.c
>> > +++ b/drivers/watchdog/iTCO_wdt.c
>> > @@ -102,9 +102,8 @@ struct iTCO_wdt_private {
>> > unsigned long __iomem *gcs_pmc;
>> > /* the lock for io operations */
>> > spinlock_t io_lock;
>> > -   struct platform_device *dev;
>> > /* the PCI-device */
>> > -   struct pci_dev *pdev;
>> > +   struct pci_dev *pcidev;
>> > /* whether or not the watchdog has been suspended */
>> > bool suspended;
>> >  };
>> > @@ -181,9 +180,9 @@ static void iTCO_wdt_set_NO_REBOOT_bit(struct 
>> > iTCO_wdt_private *p)
>> > val32 |= no_reboot_bit(p);
>> > writel(val32, p->gcs_pmc);
>> > } else if (p->iTCO_version == 1) {
>> > -   pci_read_config_dword(p->pdev, 0xd4, &val32);
>> > +   pci_read_config_dword(p->pcidev, 0xd4, &val32);
>> > val32 |= no_reboot_bit(p);
>> > -   pci_write_config_dword(p->pdev, 0xd4, val32);
>> > +   pci_write_config_dword(p->pcidev, 0xd4, val32);
>> > }
>> >  }
>> >
>> > @@ -200,11 +199,11 @@ static int iTCO_wdt_unset_NO_REBOOT_bit(struct 
>> > iTCO_wdt_private *p)
>> >
>> > val32 = readl(p->gcs_pmc);
>> > } else if (p->iTCO_version == 1) {
>> > -   pci_read_config_dword(p->pdev, 0xd4, &val32);
>> > +   pci_read_config_dword(p->pcidev, 0xd4, &val32);
>> > val32 &= ~enable_bit;
>> > -   pci_write_config_dword(p->pdev, 0xd4, val32);
>> > +   pci_write_config_dword(p->pcidev, 0xd4, val32);
>> >
>> > -   pci_read_config_dword(p->pdev, 0xd4, &val32);
>> > +   pci_read_config_dword(p->pcidev, 0xd4, &val32);
>> > }
>> >
>> > if (val32 & enable_bit)
>> > @@ -401,9 +400,10 @@ static const struct watchdog_ops iTCO_wdt_ops = {
>> >   * Init & exit routines
>> >   */
>> >
>> > -static int iTCO_wdt_probe(struct platform_device *dev)
>> > +static int iTCO_wdt_probe(struct platform_device *pdev)
>> >  {
>> > -   struct itco_wdt_platform_data *pdata = dev_get_platdata(&dev->dev);
>> > +   struct device *dev = &pdev->dev;
>> > +   struct itco_wdt_platform_data *pdata = dev_get_platdata(dev);
>> > struct iTCO_wdt_private *p;
>> > unsigned long val32;
>> > int ret;
>> > @@ -411,33 +411,32 @@ static int iTCO_wdt_probe(struct platform_device 
>> > *dev)
>> > if (!pdata)
>> > return -ENODEV;
>> >
>> > -   p = devm_kzalloc(&dev->dev, sizeof(*p), GFP_KERNEL);
>> > +   p = devm_kzalloc(dev, sizeof(*p), GFP_KERNEL);
>> > if (!p)
>> > return -ENOMEM;
>> >
>> > spin_lock_init(&p->io_lock);
>> >
>> > -   p->tco_res = platform_get_resource(dev, IORESOURCE_IO, 
>> > ICH_RES_IO_TCO);
>> > +   p->tco_res = platform_get_resource(pdev, IORESOURCE_IO, 
>> > ICH_RES_IO_TCO);
>> > if (!p->tco_res)
>> > return -ENODEV;
>> >
>> > -   p->smi_res = platform_get_resource(dev, IORESOURCE_IO, 
>> > ICH_RES_IO_SMI);
>> > +   p->smi_res = platform_get_resource(pdev, IORESOURCE_IO, 
>> > ICH_RES_IO_SMI);
>> > if (!p->smi_res)
>> > return -ENODEV;
>> >
>> > p->iTCO_version = pdata->version;
>> > -   p->dev = dev;
>> > -   p->pdev = to_pci_dev(dev->dev.parent);
>> > +   p->pcidev = to_pci_dev(dev->parent);
>> >
>> > /*
>> >  * Get the Memory-Mapped GCS or PMC register, we need it for the
>> >  * NO_REBOOT flag (TCO v2 and v3).
>> >  */
>> > if (p->iTCO_version >= 2) {
>> > -   p->gcs_pmc_res = platform_get_resource(dev,
>> > +   p->gcs_pmc_res = platform_get_resource(pdev,
>> >IORESOURCE_MEM,
>> >
>> > ICH_RES_MEM_GCS_PM

Re: Designated initializers, struct randomization and addressing?

2017-01-03 Thread Kees Cook

On Tue, Dec 20, 2016 at 9:29 AM, Joe Perches  wrote:
> On Fri, 2016-12-16 at 17:00 -0800, Kees Cook wrote:
>> Prepare to mark sensitive kernel structures for randomization by making
> sure they're using designated initializers.
>
> About the designated initializer patches,
> which by themselves are fine of course,
> and the fundamental randomization plugin,
> c guarantees that struct member ordering
> is as specified.
>
> how is the code to be verified so that
> any use of things like offsetof and any
> address/indexing is not impacted?

AIUI, offsetof() works correctly in the face of this plugin, since the
ordering happens before the pass that handles offsetof(). Anything
that _does not_ use offsetof(), however, needs fixing. Based on the
work done in grsecurity, I don't see any added offsetof() uses that
are specific to the randomization plugin.

(Note that the randomization plugin is only on function pointer
structures, where using an offsetof() should be rare to none, and on
hand-selected structures, where missing offsetof() should be easy to
audit.)

-Kees

-- 
Kees Cook
Nexus Security

Re: [PATCH] Allow userspace control of runtime disabling/enabling of driver probing

2017-01-03 Thread Kees Cook

On Tue, Jan 3, 2017 at 3:34 PM, Rafael J. Wysocki  wrote:
> On Tue, Jan 3, 2017 at 11:58 PM, Kees Cook  wrote:
>> From: Matthew Garrett 
>>
>> Various attacks are made possible due to the large attack surface of
>> kernel drivers and the easy availability of hotpluggable hardware that can
>> be programmed to mimic arbitrary devices. This allows attackers to find a
>> single vulnerable driver and then produce a device that can exploit it by
>> plugging into a hotpluggable bus (such as PCI or USB). This violates user
>> assumptions about unattended systems being secure as long as the screen
>> is locked.
>>
>> The kernel already has support for deferring driver binding in order
>> to avoid problems over suspend/resume. By exposing this to userspace we
>> can disable probing when the screen is locked and simply reenable it on
>> unlock.
>>
>> This is not a complete solution - since this still permits device
>> creation and simply blocks driver binding, it won't stop userspace
>> drivers from attaching to devices and it won't protect against any kernel
>> vulnerabilities in the core bus code. However, it should be sufficient to
>> block attacks like Poisontap (https://samy.pl/poisontap/).
>
> It also looks like this may be worked around by tricking the user to
> unlock the screen while the malicious device is still attached to the
> system.

It certainly changes the temporal aspect of the attack (i.e. there is
a delay and must be "silent" in that the local user cannot notice it).

> If that really is the case, I wonder if it's worth the extra complexity.

I think so, since it's not that much more complexity (it uses the
existing deferral mechanism).

-Kees

-- 
Kees Cook
Nexus Security

Re: [RFC] memcpy_nocache() and memcpy_writethrough()

2017-01-03 Thread Linus Torvalds

On Tue, Jan 3, 2017 at 3:22 PM, Al Viro  wrote:
>
> 1) memcpy_to_pmem() seems to rely upon the __copy_from_user_nocache()
> having only used movnt; it does not attempt clwb at all.
>
> 2) __copy_from_user_nocache() for short copies does not use movnt at all.
> In that case neither sfence nor clwb is issued.

Quite frankly, the whole "memcpy_nocache()" idea or (ab-)using
copy_user_nocache() just needs to die. It's idiotic.

As you point out, it's also fundamentally buggy crap.

Throw it away. There is no possible way this is ever valid or
portable. We're not going to lie and claim that it is.

If some driver ends up using "movnt" by hand, that is up to that
*driver*. But no way in hell should we care about this one whit in the
sense of . Get rid of that shit.

So Al - just ignore this whole issue. It's not your headache. Any code
that tries to depend on some non-caching memcpy is terminally buggy,
and those code paths need to fix themselves, not ask others to fix
their braindamage for them.

 Linus

Re: [PATCH 3/4] watchdog: iTCO_wdt: Use pdev for platform device and pci_dev for pci device

2017-01-03 Thread Guenter Roeck

On Wed, Jan 04, 2017 at 12:39:59AM +0200, Andy Shevchenko wrote:
> On Tue, Jan 3, 2017 at 4:39 PM, Guenter Roeck  wrote:
> > Use pdev for struct platform_device, pcidev for struct pci_dev, and dev
> > for struct device variables to improve consistency.
> >
> > Remove 'struct platform_device *dev;' from struct iTCO_wdt_private since
> > it was unused.
> 
> Would pci_dev work?
> 
Sure, or maybe just 'pci'. Any preference ?

Thanks,
Guenter

> In any case
> Reviewed-by: Andy Shevchenko 
> 
> >
> > Signed-off-by: Guenter Roeck 
> > ---
> >  drivers/watchdog/iTCO_wdt.c | 53 
> > ++---
> >  1 file changed, 26 insertions(+), 27 deletions(-)
> >
> > diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
> > index eed1dee6de19..ad29ae03a30b 100644
> > --- a/drivers/watchdog/iTCO_wdt.c
> > +++ b/drivers/watchdog/iTCO_wdt.c
> > @@ -102,9 +102,8 @@ struct iTCO_wdt_private {
> > unsigned long __iomem *gcs_pmc;
> > /* the lock for io operations */
> > spinlock_t io_lock;
> > -   struct platform_device *dev;
> > /* the PCI-device */
> > -   struct pci_dev *pdev;
> > +   struct pci_dev *pcidev;
> > /* whether or not the watchdog has been suspended */
> > bool suspended;
> >  };
> > @@ -181,9 +180,9 @@ static void iTCO_wdt_set_NO_REBOOT_bit(struct 
> > iTCO_wdt_private *p)
> > val32 |= no_reboot_bit(p);
> > writel(val32, p->gcs_pmc);
> > } else if (p->iTCO_version == 1) {
> > -   pci_read_config_dword(p->pdev, 0xd4, &val32);
> > +   pci_read_config_dword(p->pcidev, 0xd4, &val32);
> > val32 |= no_reboot_bit(p);
> > -   pci_write_config_dword(p->pdev, 0xd4, val32);
> > +   pci_write_config_dword(p->pcidev, 0xd4, val32);
> > }
> >  }
> >
> > @@ -200,11 +199,11 @@ static int iTCO_wdt_unset_NO_REBOOT_bit(struct 
> > iTCO_wdt_private *p)
> >
> > val32 = readl(p->gcs_pmc);
> > } else if (p->iTCO_version == 1) {
> > -   pci_read_config_dword(p->pdev, 0xd4, &val32);
> > +   pci_read_config_dword(p->pcidev, 0xd4, &val32);
> > val32 &= ~enable_bit;
> > -   pci_write_config_dword(p->pdev, 0xd4, val32);
> > +   pci_write_config_dword(p->pcidev, 0xd4, val32);
> >
> > -   pci_read_config_dword(p->pdev, 0xd4, &val32);
> > +   pci_read_config_dword(p->pcidev, 0xd4, &val32);
> > }
> >
> > if (val32 & enable_bit)
> > @@ -401,9 +400,10 @@ static const struct watchdog_ops iTCO_wdt_ops = {
> >   * Init & exit routines
> >   */
> >
> > -static int iTCO_wdt_probe(struct platform_device *dev)
> > +static int iTCO_wdt_probe(struct platform_device *pdev)
> >  {
> > -   struct itco_wdt_platform_data *pdata = dev_get_platdata(&dev->dev);
> > +   struct device *dev = &pdev->dev;
> > +   struct itco_wdt_platform_data *pdata = dev_get_platdata(dev);
> > struct iTCO_wdt_private *p;
> > unsigned long val32;
> > int ret;
> > @@ -411,33 +411,32 @@ static int iTCO_wdt_probe(struct platform_device *dev)
> > if (!pdata)
> > return -ENODEV;
> >
> > -   p = devm_kzalloc(&dev->dev, sizeof(*p), GFP_KERNEL);
> > +   p = devm_kzalloc(dev, sizeof(*p), GFP_KERNEL);
> > if (!p)
> > return -ENOMEM;
> >
> > spin_lock_init(&p->io_lock);
> >
> > -   p->tco_res = platform_get_resource(dev, IORESOURCE_IO, 
> > ICH_RES_IO_TCO);
> > +   p->tco_res = platform_get_resource(pdev, IORESOURCE_IO, 
> > ICH_RES_IO_TCO);
> > if (!p->tco_res)
> > return -ENODEV;
> >
> > -   p->smi_res = platform_get_resource(dev, IORESOURCE_IO, 
> > ICH_RES_IO_SMI);
> > +   p->smi_res = platform_get_resource(pdev, IORESOURCE_IO, 
> > ICH_RES_IO_SMI);
> > if (!p->smi_res)
> > return -ENODEV;
> >
> > p->iTCO_version = pdata->version;
> > -   p->dev = dev;
> > -   p->pdev = to_pci_dev(dev->dev.parent);
> > +   p->pcidev = to_pci_dev(dev->parent);
> >
> > /*
> >  * Get the Memory-Mapped GCS or PMC register, we need it for the
> >  * NO_REBOOT flag (TCO v2 and v3).
> >  */
> > if (p->iTCO_version >= 2) {
> > -   p->gcs_pmc_res = platform_get_resource(dev,
> > +   p->gcs_pmc_res = platform_get_resource(pdev,
> >IORESOURCE_MEM,
> >ICH_RES_MEM_GCS_PMC);
> > -   p->gcs_pmc = devm_ioremap_resource(&dev->dev, 
> > p->gcs_pmc_res);
> > +   p->gcs_pmc = devm_ioremap_resource(dev, p->gcs_pmc_res);
> > if (IS_ERR(p->gcs_pmc))
> > return PTR_ERR(p->gcs_pmc);
> > }
> > @@ -453,9 +452,9 @@ static int iTCO_wdt_probe(struct platform_device *dev)

Re: [PATCH 2/4] watchdog: iTCO_wdt: Use device managed resources

2017-01-03 Thread Guenter Roeck

On Wed, Jan 04, 2017 at 12:41:56AM +0200, Andy Shevchenko wrote:
> On Tue, Jan 3, 2017 at 4:39 PM, Guenter Roeck  wrote:
> > Using device managed resources simplifies error handling and cleanup,
> > and to reduce the likelyhood of errors.
> >
> > Signed-off-by: Guenter Roeck 
> 
> Reviewed-by: Andy Shevchenko 
> 
> Does it make sense to convert to dev_err() at some point?
> 
Sounds like an idea. Let me play with it.

Thanks,
Guenter

> > ---
> >  drivers/watchdog/iTCO_wdt.c | 80 
> > ++---
> >  1 file changed, 17 insertions(+), 63 deletions(-)
> >
> > diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
> > index a35a9164ccd0..eed1dee6de19 100644
> > --- a/drivers/watchdog/iTCO_wdt.c
> > +++ b/drivers/watchdog/iTCO_wdt.c
> > @@ -401,27 +401,6 @@ static const struct watchdog_ops iTCO_wdt_ops = {
> >   * Init & exit routines
> >   */
> >
> > -static void iTCO_wdt_cleanup(struct iTCO_wdt_private *p)
> > -{
> > -   /* Stop the timer before we leave */
> > -   if (!nowayout)
> > -   iTCO_wdt_stop(&p->wddev);
> > -
> > -   /* Deregister */
> > -   watchdog_unregister_device(&p->wddev);
> > -
> > -   /* release resources */
> > -   release_region(p->tco_res->start,
> > -   resource_size(p->tco_res));
> > -   release_region(p->smi_res->start,
> > -   resource_size(p->smi_res));
> > -   if (p->iTCO_version >= 2) {
> > -   iounmap(p->gcs_pmc);
> > -   release_mem_region(p->gcs_pmc_res->start,
> > -   resource_size(p->gcs_pmc_res));
> > -   }
> > -}
> > -
> >  static int iTCO_wdt_probe(struct platform_device *dev)
> >  {
> > struct itco_wdt_platform_data *pdata = dev_get_platdata(&dev->dev);
> > @@ -458,41 +437,28 @@ static int iTCO_wdt_probe(struct platform_device *dev)
> > p->gcs_pmc_res = platform_get_resource(dev,
> >IORESOURCE_MEM,
> >ICH_RES_MEM_GCS_PMC);
> > -
> > -   if (!p->gcs_pmc_res)
> > -   return -ENODEV;
> > -
> > -   if (!request_mem_region(p->gcs_pmc_res->start,
> > -   resource_size(p->gcs_pmc_res),
> > -   dev->name))
> > -   return -EBUSY;
> > -
> > -   p->gcs_pmc = ioremap(p->gcs_pmc_res->start,
> > -resource_size(p->gcs_pmc_res));
> > -   if (!p->gcs_pmc) {
> > -   ret = -EIO;
> > -   goto unreg_gcs_pmc;
> > -   }
> > +   p->gcs_pmc = devm_ioremap_resource(&dev->dev, 
> > p->gcs_pmc_res);
> > +   if (IS_ERR(p->gcs_pmc))
> > +   return PTR_ERR(p->gcs_pmc);
> > }
> >
> > /* Check chipset's NO_REBOOT bit */
> > if (iTCO_wdt_unset_NO_REBOOT_bit(p) &&
> > iTCO_vendor_check_noreboot_on()) {
> > pr_info("unable to reset NO_REBOOT flag, device disabled by 
> > hardware/BIOS\n");
> > -   ret = -ENODEV;  /* Cannot reset NO_REBOOT bit */
> > -   goto unmap_gcs_pmc;
> > +   return -ENODEV; /* Cannot reset NO_REBOOT bit */
> > }
> >
> > /* Set the NO_REBOOT bit to prevent later reboots, just for sure */
> > iTCO_wdt_set_NO_REBOOT_bit(p);
> >
> > /* The TCO logic uses the TCO_EN bit in the SMI_EN register */
> > -   if (!request_region(p->smi_res->start,
> > -   resource_size(p->smi_res), dev->name)) {
> > +   if (!devm_request_region(&dev->dev, p->smi_res->start,
> > +resource_size(p->smi_res),
> > +dev->name)) {
> > pr_err("I/O address 0x%04llx already in use, device 
> > disabled\n",
> >(u64)SMI_EN(p));
> > -   ret = -EBUSY;
> > -   goto unmap_gcs_pmc;
> > +   return -EBUSY;
> > }
> > if (turn_SMI_watchdog_clear_off >= p->iTCO_version) {
> > /*
> > @@ -504,12 +470,12 @@ static int iTCO_wdt_probe(struct platform_device *dev)
> > outl(val32, SMI_EN(p));
> > }
> >
> > -   if (!request_region(p->tco_res->start,
> > -   resource_size(p->tco_res), dev->name)) {
> > +   if (!devm_request_region(&dev->dev, p->tco_res->start,
> > +resource_size(p->tco_res),
> > +dev->name)) {
> > pr_err("I/O address 0x%04llx already in use, device 
> > disabled\n",
> >(u64)TCOBASE(p));
> > -   ret = -EBUSY;
> > -   goto unreg_smi;
> > +   return -EBUSY;
> > }
> >
> > pr_info("Found a %s T

Re: [PATCH] gpio: pca953x: Add optional reset gpio control

2017-01-03 Thread Andy Shevchenko

On Mon, Jan 2, 2017 at 11:07 PM, Steve Longerbeam  wrote:
> Add optional reset-gpios pin control. If present, de-assert the
> specified reset gpio pin to bring the chip out of reset.

> --- a/drivers/gpio/gpio-pca953x.c
> +++ b/drivers/gpio/gpio-pca953x.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 

> +#include 

Please, try to put it somehow alphabetically ordered (yes, I see it's
not in general, but try to squeeze it into longest part which is
ordered).

>
>  #define PCA953X_INPUT  0
>  #define PCA953X_OUTPUT 1
> @@ -754,8 +755,18 @@ static int pca953x_probe(struct i2c_client *client,
> invert = pdata->invert;
> chip->names = pdata->names;
> } else {
> +   struct gpio_desc *reset_gpio;
> +
> chip->gpio_start = -1;
> irq_base = 0;
> +
> +   /* see if we need to de-assert a reset pin */

see -> See

> +   reset_gpio = devm_gpiod_get_optional(&client->dev, "reset",
> +GPIOD_OUT_LOW);

Shouldn't be _optional_exclusive?
See this recent discussion https://patchwork.ozlabs.org/patch/706002/

> +   if (IS_ERR(reset_gpio)) {
> +   dev_err(&client->dev, "request for reset pin 
> failed\n");
> +   return PTR_ERR(reset_gpio);
> +   }
> }

-- 
With Best Regards,
Andy Shevchenko

Re: [PATCH v2 0/7] async requests support for 9pfs

2017-01-03 Thread Stefano Stabellini

Ping

On Thu, 15 Dec 2016, Stefano Stabellini wrote:
> Hi all,
> 
> This patch series introduces async requests for read and write
> operations. If the read, or the write, is an async operation to begin
> with (aio), we can avoid waiting for the server response.
> 
> This is my first contribution to 9p, so feedback and suggestions are
> welcome!
> 
> 
> Changes in v2:
> - replace callback with work_struct
> - handle large aio read/write requests
> - clear pagevec
> - rename offset to page_offset
> - add file_offset
> - add fid
> - add completed and tot_size
> 
> 
> Stefano Stabellini (7):
>   9p: add iocb parameter to p9_client_read and p9_client_write
>   9p: store req details and workqueue in struct p9_req_t
>   9p: introduce p9_client_get_req
>   9p: introduce async read requests
>   9p: introduce async write requests
>   9p: handle large aio read requests
>   9p: handle large aio write requests
> 
>  fs/9p/vfs_addr.c|   8 +-
>  fs/9p/vfs_dir.c |   2 +-
>  fs/9p/vfs_file.c|   4 +-
>  fs/9p/xattr.c   |   4 +-
>  include/net/9p/client.h |  19 ++-
>  net/9p/client.c | 324 
> ++--
>  6 files changed, 343 insertions(+), 18 deletions(-)
>

Re: 9pfs hangs since 4.7

2017-01-03 Thread Tuomas Tynkkynen

On Mon, 2 Jan 2017 16:23:09 +
Al Viro  wrote:


> 
> What I'd like to see is a log of 9p traffic in those; to hell with the
> payloads, just the type and tag of from each message [...]

Thanks for the suggestions. With the following patch to QEMU:

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index aea7e9d..8a6b426 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -662,6 +662,7 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t 
len)
 
 /* fill out the header */
 pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag);
+fprintf(stderr, "complete %d %04x\n", id, pdu->tag);
 
 /* keep these in sync */
 pdu->size = len;
@@ -2347,6 +2348,7 @@ static void v9fs_flush(void *opaque)
 return;
 }
 trace_v9fs_flush(pdu->tag, pdu->id, tag);
+fprintf(stderr, "flush %04x %04x\n", tag, pdu->tag);
 
 QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
 if (cancel_pdu->tag == tag) {
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 1782e4a..6a5ac04 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -76,6 +76,7 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue 
*vq)
 
 pdu->id = out.id;
 pdu->tag = le16_to_cpu(out.tag_le);
+fprintf(stderr, "out %d %04x\n", pdu->id, pdu->tag);
 
 qemu_co_queue_init(&pdu->complete);
 pdu_submit(pdu);

I got these logs from the server & client with 9p tracepoints enabled:

https://gist.githubusercontent.com/dezgeg/02447100b3182167403099fe7de4d941/raw/3772e408ddf586fb662ac9148fc10943529a6b99/dmesg%2520with%25209p%2520trace
https://gist.githubusercontent.com/dezgeg/e1e0c7f354042e1d9bdf7e9135934a65/raw/3a0e3b4f7a5229fd0be032c6839b578d47a21ce4/qemu.log

Re: [PATCH V7 1/4] Documentation/devicetree/bindings: b850v3_lvds_dp

2017-01-03 Thread Peter Senna Tschudin

 Hi Rob,

Thank you for the review.

On 03 January, 2017 23:51 CET, Rob Herring  wrote: 
 
> On Sun, Jan 01, 2017 at 09:24:29PM +0100, Peter Senna Tschudin wrote:
> > Devicetree bindings documentation for the GE B850v3 LVDS/DP++
> > display bridge.
> > 
> > Cc: Martyn Welch 
> > Cc: Martin Donnelly 
> > Cc: Javier Martinez Canillas 
> > Cc: Enric Balletbo i Serra 
> > Cc: Philipp Zabel 
> > Cc: Rob Herring 
> > Cc: Fabio Estevam 
> > Signed-off-by: Peter Senna Tschudin 
> > ---
> > There was an Acked-by from Rob Herring  for V6, but I 
> > changed
> > the bindings to use i2c_new_secondary_device() so I removed it from the 
> > commit
> > message.
> > 
> >  .../devicetree/bindings/ge/b850v3-lvds-dp.txt  | 39 
> > ++
> 
> Generally, bindings are not organized by vendor. Put in 
> bindings/display/bridge/... instead.

Will change that.

> 
> >  1 file changed, 39 insertions(+)
> >  create mode 100644 Documentation/devicetree/bindings/ge/b850v3-lvds-dp.txt
> > 
> > diff --git a/Documentation/devicetree/bindings/ge/b850v3-lvds-dp.txt 
> > b/Documentation/devicetree/bindings/ge/b850v3-lvds-dp.txt
> > new file mode 100644
> > index 000..1bc6ebf
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/ge/b850v3-lvds-dp.txt
> > @@ -0,0 +1,39 @@
> > +Driver for GE B850v3 LVDS/DP++ display bridge
> > +
> > +Required properties:
> > +  - compatible : should be "ge,b850v3-lvds-dp".
> 
> Isn't '-lvds-dp' redundant? The part# should be enough.

b850v3 is the name of the product, this is why the proposed name. What about, 
b850v3-dp2 dp2 indicating the second DP output?

> 
> > +  - reg : should contain the main address which is used to ack the
> > +interrupts and address for edid.
> > +  - reg-names : comma separeted list of register names. Valid values
> 
> s/separeted/separated/

argh, sorry for this. Will fix it.

> 
> > +are "main", and "edid".
> > +  - interrupt-parent : phandle of the interrupt controller that services
> > +interrupts to the device
> > +  - interrupts : one interrupt should be described here, as in
> > +<0 IRQ_TYPE_LEVEL_HIGH>.
> > +  - port : should describe the video signal connection between the host
> > +and the bridge.
> > +
> > +Example:
> > +
> > +&mux2_i2c2 {
> > +   status = "okay";
> > +   clock-frequency = <10>;
> > +
> > +   b850v3-lvds-dp-bridge@73  {
> > +   compatible = "ge,b850v3-lvds-dp";
> > +   #address-cells = <1>;
> > +   #size-cells = <0>;
> > +
> > +   reg = <0x73 0x72>;
> > +   reg-names = "main", "edid";
> > +
> > +   interrupt-parent = <&gpio2>;
> > +   interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
> > +
> > +   port {
> > +   b850v3_dp_bridge_in: endpoint {
> > +   remote-endpoint = <&lvds0_out>;
> > +   };
> > +   };
> > +   };
> > +};
> > -- 
> > 2.5.5
> >

Re: [PATCHv6 00/11] CONFIG_DEBUG_VIRTUAL for arm64

2017-01-03 Thread Laura Abbott

On 01/03/2017 02:56 PM, Florian Fainelli wrote:
> On 01/03/2017 09:21 AM, Laura Abbott wrote:
>> Happy New Year!
>>
>> This is a very minor rebase from v5. It only moves a few headers around.
>> I think this series should be ready to be queued up for 4.11.
> 
> FWIW:
> 
> Tested-by: Florian Fainelli 
> 

Thanks!

> How do we get this series included? I would like to get the ARM 32-bit
> counterpart included as well (will resubmit rebased shortly), but I have
> no clue which tree this should be going through.
> 

I was assuming this would go through the arm64 tree unless Catalin/Will
have an objection to that.

> Thanks!
> 
>>
>> Thanks,
>> Laura
>>
>> Laura Abbott (11):
>>   lib/Kconfig.debug: Add ARCH_HAS_DEBUG_VIRTUAL
>>   mm/cma: Cleanup highmem check
>>   arm64: Move some macros under #ifndef __ASSEMBLY__
>>   arm64: Add cast for virt_to_pfn
>>   mm: Introduce lm_alias
>>   arm64: Use __pa_symbol for kernel symbols
>>   drivers: firmware: psci: Use __pa_symbol for kernel symbol
>>   kexec: Switch to __pa_symbol
>>   mm/kasan: Switch to using __pa_symbol and lm_alias
>>   mm/usercopy: Switch to using lm_alias
>>   arm64: Add support for CONFIG_DEBUG_VIRTUAL
>>
>>  arch/arm64/Kconfig|  1 +
>>  arch/arm64/include/asm/kvm_mmu.h  |  4 +-
>>  arch/arm64/include/asm/memory.h   | 66 
>> +--
>>  arch/arm64/include/asm/mmu_context.h  |  6 +--
>>  arch/arm64/include/asm/pgtable.h  |  2 +-
>>  arch/arm64/kernel/acpi_parking_protocol.c |  3 +-
>>  arch/arm64/kernel/cpu-reset.h |  2 +-
>>  arch/arm64/kernel/cpufeature.c|  3 +-
>>  arch/arm64/kernel/hibernate.c | 20 +++---
>>  arch/arm64/kernel/insn.c  |  2 +-
>>  arch/arm64/kernel/psci.c  |  3 +-
>>  arch/arm64/kernel/setup.c |  9 +++--
>>  arch/arm64/kernel/smp_spin_table.c|  3 +-
>>  arch/arm64/kernel/vdso.c  |  8 +++-
>>  arch/arm64/mm/Makefile|  2 +
>>  arch/arm64/mm/init.c  | 12 +++---
>>  arch/arm64/mm/kasan_init.c| 22 +++
>>  arch/arm64/mm/mmu.c   | 33 ++--
>>  arch/arm64/mm/physaddr.c  | 30 ++
>>  arch/x86/Kconfig  |  1 +
>>  drivers/firmware/psci.c   |  2 +-
>>  include/linux/mm.h|  4 ++
>>  kernel/kexec_core.c   |  2 +-
>>  lib/Kconfig.debug |  5 ++-
>>  mm/cma.c  | 15 +++
>>  mm/kasan/kasan_init.c | 15 +++
>>  mm/usercopy.c |  4 +-
>>  27 files changed, 180 insertions(+), 99 deletions(-)
>>  create mode 100644 arch/arm64/mm/physaddr.c
>>
> 
>

Re: [PATCH] scsi/bfa: use designated initializers

2017-01-03 Thread Kees Cook

On Wed, Dec 21, 2016 at 12:33 AM, Christoph Hellwig  wrote:
> On Fri, Dec 16, 2016 at 05:05:15PM -0800, Kees Cook wrote:
>> Prepare to mark sensitive kernel structures for randomization by making
>> sure they're using designated initializers. These were identified during
>> allyesconfig builds of x86, arm, and arm64, with most initializer fixes
>> extracted from grsecurity.
>
> Instead of further bloating the idiotic dispatch table just kill it off
> entirely:

Sounds fine to me! Is this going via your tree?

Thanks!

-Kees

-- 
Kees Cook
Nexus Security

Re: [PATCH v3 RESEND 07/11] pwm: imx: Provide atomic PWM support for i.MX PWMv2

2017-01-03 Thread Boris Brezillon

On Tue, 3 Jan 2017 23:46:58 +0100
Lukasz Majewski  wrote:

> > > > > >> > > > Same goes for the regression introduced in patch 2: I
> > > > > >> > > > think it's better to keep things bisectable on all
> > > > > >> > > > platforms (even if it appeared to work by chance on
> > > > > >> > > > imx7, it did work before this change).  
> > > > > >> > >
> > > > > >> > > Could you be more specific about your idea to solve this
> > > > > >> > > problem?  
> > > > > >> >
> > > > > >> > Stefan already provided a patch, I just think it should be
> > > > > >> > fixed before patch 2 to avoid breaking bisectibility.  
> > > > > >>
> > > > > >> My idea is as follows:
> > > > > >>
> > > > > >> I will drop patch v2 (prepared by Sasha) and then squash
> > > > > >> Stefan's patch [1] to patch 7/11. The "old" ipg enable code
> > > > > >> will be removed with other not needed code during
> > > > > >> conversion.  
> > > > > > 
> > > > > > How about keeping patch 2 but enabling/disabling the periph
> > > > > > clk in imx_pwm_config() instead of completely dropping the
> > > > > > enable/disable clk sequence.
> > > > > > 
> > > > > > In patch 7 you just add the logic we talked about earlier:
> > > > > > unconditionally enable the periph clk when entering the
> > > > > > imx_pwm_apply_v2() function and disable it before leaving the
> > > > > > function.
> > > > > > 
> > > > > > This way you can preserve bisectibility and still get rid of
> > > > > > the ipg clk.
> > > > > > 
> > > > > > Stefan, what's your opinion?  
> > > > > 
> > > > > We will get rid of the ipg clocks anyway in patch 8 (which
> > > > > removes those functions completely).
> > > > > 
> > > > > So I think Lukasz approach should be fine, just drop patch 2 and
> > > > > squash my patch into patch 7.
> > > > 
> > > > Well, the end result will be same (ipg_clk will be gone after
> > > > patch 8), but then it's hard to track why this clock suddenly
> > > > disappeared. I still think it's worth adding an extra commit
> > > > explaining that enabling the per_clk before accessing IP
> > > > registers is needed on some platforms (imx7), and that IPG clk is
> > > > actually not required until we start using it as a source for the
> > > > PWM signal generation.
> > > > 
> > > > Maybe I'm the only one to think so. In this case, feel free to
> > > > drop patch 2.
> > > 
> > > If you feel really bad about this issue, then we can drop patch 2
> > > and:
> > > 
> > > reorganize patch 7/11 to 
> > >  - keep code, which adds imx_pwm_apply_v2() function code (just
> > > moves it as is) 
> > >  - remove .apply = imx_pwm_apply_v2 entry from pwm_ops structure.
> > > 
> > > 
> > > On top of it add patch to enable/disable unconditionally the
> > > imx->clk_per clock to avoid problems on imx7 (and state them in
> > > commit message).
> > > 
> > > Then we add separate patch with 
> > > .apply = imx_pwm_apply_v2 to pwm_ops structure to enable "new"
> > > atomic approach.
> > > 
> > > And at last we apply patch 8/11, which removes the code for old (non
> > > atomic) behaviour.
> > > 
> > > All the issues are documented in this way on the cost of having
> > > "dead" (I mean not used) imx_pwm_apply_v2() for two commits.
> > >   
> > 
> > This looks even more complicated.
> > Sorry, but I don't see the problem with modifying patch 2 to enable
> > per_clk instead of ipg_clk. Can you explain what's bothering you?  
> 
> But in patch 2:
> "pwm: imx: remove ipg clock"
> 
>  we _remove_ the clk_ipg from imx_pwm_config() and imx_pwm_probe(), so
>  I'm quite puzzled with your above statement.

See my reworked version below.

> 
> > 
> > If you really want to do the change after patch 7, fine, but in this
> > case, keep the existing logic: enable/disable ipg_clk in
> > imx_pwm_apply_v2() until you drop the ipg_clk and replace the ipg_clk
> > enable/disable sequence by the equivalent enable/disable per_clk one.
> >   
> 
> Frankly, I do agree with Stefan here - we should drop patch 2, squash
> all changes (including imx7 clock issues) to patch 7 (including verbose
> commit message) and remove the non-atomic code in patch 8.

Hm, this is not like I'm asking something impossible here (see the
following patch).

--->8---
>From c79bb872a40b8e322fd13f33f374fb1ba085e7a9 Mon Sep 17 00:00:00 2001
From: Sascha Hauer 
Date: Mon, 26 Dec 2016 23:55:52 +0100
Subject: [PATCH v4] pwm: imx: remove ipg clock and enable per clock when 
required

The use of the ipg clock was introduced with commit 7b27c160c681
("pwm: i.MX: fix clock lookup").
In the commit message it was claimed that the ipg clock is enabled for
register accesses. This is true for the ->config() callback, but not
for the ->set_enable() callback. Given that the ipg clock is not
consistently enabled for all register accesses we can assume that either
it is not required at all or that the current code does not work.
Remove the ipg clock code for now so that it's no longer in the way of
refactoring the driver.

In the other hand,

Re: [PATCH] Allow userspace control of runtime disabling/enabling of driver probing

2017-01-03 Thread Rafael J. Wysocki

On Tue, Jan 3, 2017 at 11:58 PM, Kees Cook  wrote:
> From: Matthew Garrett 
>
> Various attacks are made possible due to the large attack surface of
> kernel drivers and the easy availability of hotpluggable hardware that can
> be programmed to mimic arbitrary devices. This allows attackers to find a
> single vulnerable driver and then produce a device that can exploit it by
> plugging into a hotpluggable bus (such as PCI or USB). This violates user
> assumptions about unattended systems being secure as long as the screen
> is locked.
>
> The kernel already has support for deferring driver binding in order
> to avoid problems over suspend/resume. By exposing this to userspace we
> can disable probing when the screen is locked and simply reenable it on
> unlock.
>
> This is not a complete solution - since this still permits device
> creation and simply blocks driver binding, it won't stop userspace
> drivers from attaching to devices and it won't protect against any kernel
> vulnerabilities in the core bus code. However, it should be sufficient to
> block attacks like Poisontap (https://samy.pl/poisontap/).

It also looks like this may be worked around by tricking the user to
unlock the screen while the malicious device is still attached to the
system.

If that really is the case, I wonder if it's worth the extra complexity.

Thanks,
Rafael

[RFC PATCH] ACPICA: sprintf should use ACPI_SIZE_MAX

2017-01-03 Thread Vivien Didelot

sprintf() should use the architecture independent ACPI_SIZE_MAX value
instead of the ACPI_UINT32_MAX 32-bit variant. Change this.

Signed-off-by: Vivien Didelot 
---
 drivers/acpi/acpica/utprint.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c
index 40eba804d49c..5001043351e8 100644
--- a/drivers/acpi/acpica/utprint.c
+++ b/drivers/acpi/acpica/utprint.c
@@ -627,7 +627,7 @@ int sprintf(char *string, const char *format, ...)
int length;
 
va_start(args, format);
-   length = vsnprintf(string, ACPI_UINT32_MAX, format, args);
+   length = vsnprintf(string, ACPI_SIZE_MAX, format, args);
va_end(args);
 
return (length);
-- 
2.11.0

[PATCH v2] video: fbdev: matroxfb: use designated initializers

2017-01-03 Thread Kees Cook

Prepare to mark sensitive kernel structures for randomization by making
sure they're using designated initializers. These were identified during
allyesconfig builds of x86, arm, and arm64, with most initializer fixes
extracted from grsecurity.

Signed-off-by: Kees Cook 
---
 drivers/video/fbdev/matrox/matroxfb_DAC1064.c | 10 --
 drivers/video/fbdev/matrox/matroxfb_Ti3026.c  |  5 -
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/video/fbdev/matrox/matroxfb_DAC1064.c 
b/drivers/video/fbdev/matrox/matroxfb_DAC1064.c
index a01147fdf270..b380a393cbc3 100644
--- a/drivers/video/fbdev/matrox/matroxfb_DAC1064.c
+++ b/drivers/video/fbdev/matrox/matroxfb_DAC1064.c
@@ -1088,14 +1088,20 @@ static void MGAG100_restore(struct matrox_fb_info 
*minfo)
 
 #ifdef CONFIG_FB_MATROX_MYSTIQUE
 struct matrox_switch matrox_mystique = {
-   MGA1064_preinit, MGA1064_reset, MGA1064_init, MGA1064_restore,
+   .preinit= MGA1064_preinit,
+   .reset  = MGA1064_reset,
+   .init   = MGA1064_init,
+   .restore= MGA1064_restore,
 };
 EXPORT_SYMBOL(matrox_mystique);
 #endif
 
 #ifdef CONFIG_FB_MATROX_G
 struct matrox_switch matrox_G100 = {
-   MGAG100_preinit, MGAG100_reset, MGAG100_init, MGAG100_restore,
+   .preinit= MGAG100_preinit,
+   .reset  = MGAG100_reset,
+   .init   = MGAG100_init,
+   .restore= MGAG100_restore,
 };
 EXPORT_SYMBOL(matrox_G100);
 #endif
diff --git a/drivers/video/fbdev/matrox/matroxfb_Ti3026.c 
b/drivers/video/fbdev/matrox/matroxfb_Ti3026.c
index 68fa037d8cbc..9ff9be85759e 100644
--- a/drivers/video/fbdev/matrox/matroxfb_Ti3026.c
+++ b/drivers/video/fbdev/matrox/matroxfb_Ti3026.c
@@ -738,7 +738,10 @@ static int Ti3026_preinit(struct matrox_fb_info *minfo)
 }
 
 struct matrox_switch matrox_millennium = {
-   Ti3026_preinit, Ti3026_reset, Ti3026_init, Ti3026_restore
+   .preinit= Ti3026_preinit,
+   .reset  = Ti3026_reset,
+   .init   = Ti3026_init,
+   .restore= Ti3026_restore
 };
 EXPORT_SYMBOL(matrox_millennium);
 #endif
-- 
2.7.4


-- 
Kees Cook
Nexus Security

Re: [PATCH 01/22] dt-bindings: iio: adc: add AXP20X/AXP22X ADC DT binding

2017-01-03 Thread Rob Herring

On Mon, Jan 02, 2017 at 05:37:01PM +0100, Quentin Schulz wrote:
> The X-Powers AXP20X and AXP22X PMICs have multiple ADCs. They expose the
> battery voltage, battery charge and discharge currents, AC-in and VBUS
> voltages and currents, 2 GPIOs muxable in ADC mode and PMIC temperature.
> 
> This adds the device tree binding documentation for the X-Powers AXP20X
> and AXP22X PMICs ADCs.
> 
> Signed-off-by: Quentin Schulz 
> ---
>  .../devicetree/bindings/iio/adc/axp20x_adc.txt | 24 
> ++
>  1 file changed, 24 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/iio/adc/axp20x_adc.txt
> 
> diff --git a/Documentation/devicetree/bindings/iio/adc/axp20x_adc.txt 
> b/Documentation/devicetree/bindings/iio/adc/axp20x_adc.txt
> new file mode 100644
> index 000..1b60065
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/iio/adc/axp20x_adc.txt
> @@ -0,0 +1,24 @@
> +X-Powers AXP20X and AXP22X PMIC Analog to Digital Converter (ADC)
> +
> +The X-Powers AXP20X and AXP22X PMICs have multiple ADCs. They expose the
> +battery voltage, battery charge and discharge currents, AC-in and VBUS
> +voltages and currents, 2 GPIOs muxable in ADC mode and PMIC temperature.
> +
> +The AXP22X PMICs do not have all ADCs of the AXP20X though.
> +
> +Required properties:
> + - compatible, one of:
> + "x-powers,axp209-adc"
> + "x-powers,axp221-adc"
> + - #io-channel-cells = <1>;
> +
> +This is a subnode of the AXP20X PMIC.
> +
> +Example:
> +
> +&axp209 {
> + axp209_adc: axp209_adc {

Use 'adc' for node name:

With that,

Acked-by: Rob Herring 

> + compatible = "x-powers,axp209-adc";
> + #io-channel-cells = <1>;
> + };
> +};
> -- 
> 2.9.3
>

Re: [RFC] memcpy_nocache() and memcpy_writethrough()

2017-01-03 Thread Al Viro

On Tue, Jan 03, 2017 at 01:14:11PM -0800, Dan Williams wrote:

> Robert was describing the overall flow / mechanics, but I think it is
> easier to visualize the sfence as a flush command sent to a disk
> device with a volatile cache. In fact, that's how we implemented it in
> the pmem block device driver. The pmem block device registers itself
> as requiring REQ_FLUSH to be sent to persist writes. The driver issues
> sfence on the assumption that all writes to pmem have either bypassed
> the cache with movnt, or are scheduled for write-back via one of the
> flush instructions (clflush, clwb, or clflushopt).

*blink*

1) memcpy_to_pmem() seems to rely upon the __copy_from_user_nocache()
having only used movnt; it does not attempt clwb at all.

2) __copy_from_user_nocache() for short copies does not use movnt at all.
In that case neither sfence nor clwb is issued.

3) it uses movnt only for part of copying in case of misaligned copy;
No clwb is issued, but sfence *is* - at the very end in 64bit case,
between movnt and copying the tail - in 32bit one.  Incidentally,
while 64bit case takes care to align the destination for movnt part,
32bit one does not.

How much of the above is broken and what do the callers rely upon?  In
particular, is that sfence the right thing for pmem usecases?

Re: [tpmdd-devel] [PATCH RFC 0/4] RFC: in-kernel resource manager

2017-01-03 Thread Jason Gunthorpe

On Tue, Jan 03, 2017 at 05:21:28PM -0500, Ken Goldman wrote:
> On 1/3/2017 4:47 PM, Jason Gunthorpe wrote:
> >
> > I think we should also consider TPM 1.2 support in all of this, it is
> > still a very popular piece of hardware and it is equally able to
> > support a RM.
> 
> I suspect that TPM 2.0 and TPM 1.2 are so different that there may be 
> little or no code in common.

Sure, but the uapi should make sense for both versions, ie, I don't want
to see a tpm 2.0 specific char dev.

Jason

Re: [PATCH 1/2] sched: Introduce rcuwait machinery

2017-01-03 Thread Davidlohr Bueso


On Fri, 23 Dec 2016, kbuild test robot wrote:

kernel/exit.c:285:29: warning: 'struct rcuwait' declared inside parameter list 
will not be visible outside of this definition or declaration

   void rcuwait_trywake(struct rcuwait *w)
   ^~~


Ah, I'm missing an linux/rcuwait.h include there. Here's v2, thanks.

-8<
From: Davidlohr Bueso 
Subject: [PATCH v2 1/2] sched: Introduce rcuwait machinery

rcuwait provides support for (single) rcu-safe task wait/wake functionality,
with the caveat that it must not be called after exit_notify(), such that
we avoid racing with rcu delayed_put_task_struct callbacks, task_struct
being rcu unaware in this context -- for which we similarly have
task_rcu_dereference() magic, but with different return semantics, which
can conflict with the wakeup side.

The interfaces are quite straightforward:

rcuwait_wait_event()
rcuwait_trywake()

More details are in the comments, but it's perhaps worth mentioning at least,
that users must provide proper serialization when waiting on a condition, and
avoid corrupting a concurrent waiter. Also care must be taken between the task
and the condition for when calling the wakeup -- we cannot miss wakeups. When
porting users, this is for example, a given when using waitqueues in that
everything is done under the q->lock.

Signed-off-by: Davidlohr Bueso 
---
include/linux/rcuwait.h | 63 +
kernel/exit.c   | 30 +++
2 files changed, 93 insertions(+)
create mode 100644 include/linux/rcuwait.h

diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
new file mode 100644
index ..3e07beb14c1f
--- /dev/null
+++ b/include/linux/rcuwait.h
@@ -0,0 +1,63 @@
+#ifndef _LINUX_RCUWAIT_H_
+#define _LINUX_RCUWAIT_H_
+
+#include 
+
+/*
+ * rcuwait provides a way of blocking and waking up a single
+ * task in an rcu-safe manner; where it is forbidden to use
+ * after exit_notify(). task_struct is not properly rcu protected,
+ * unless dealing with rcu-aware lists, ie: find_task_by_*().
+ *
+ * Alternatively we have task_rcu_dereference(), but the return
+ * semantics have different implications which would break the
+ * wakeup side. The only time @task is non-nil is when a user is
+ * blocked (or checking if it needs to) on a condition, and reset
+ * as soon as we know that the condition has succeeded and are
+ * awoken.
+ */
+struct rcuwait {
+   struct task_struct *task;
+};
+
+#define __RCUWAIT_INITIALIZER(name)\
+   { .task = NULL, }
+
+static inline void rcuwait_init(struct rcuwait *w)
+{
+   w->task = NULL;
+}
+
+extern void rcuwait_trywake(struct rcuwait *w);
+
+/*
+ * The caller is responsible for locking around rcuwait_wait_event(),
+ * such that writes to @task are properly serialized.
+ */
+#define rcuwait_wait_event(w, condition)   \
+({ \
+   /*  \
+* Complain if we are called after do_exit()/exit_notify(), \
+* as we cannot rely on the rcu critical region for the \
+* wakeup side. \
+*/ \
+   WARN_ON(current->exit_state);   \
+   \
+   rcu_assign_pointer((w)->task, current);  \
+   for (;;) {  \
+   /*  \
+* Implicit barrier (A) pairs with (B) in   \
+* rcuwait_trywake().   \
+*/ \
+   set_current_state(TASK_UNINTERRUPTIBLE);\
+   if (condition)  \
+   break;  \
+   \
+   schedule(); \
+   }   \
+   \
+   WRITE_ONCE((w)->task, NULL); \
+   __set_current_state(TASK_RUNNING);  \
+})
+
+#endif /* _LINUX_RCUWAIT_H_ */
diff --git a/kernel/exit.c b/kernel/exit.c
index 8f14b866f9f6..e579b30a35a7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -55,6 +55,7 @@
#include 
#include 
#include 
+#include 

#include 
#include 
@@ -282,6 +283,35 @@ struct task_struct *task_rcu_dereference(struct 
task_struct **ptask)

Re: linux-next: failure while fetching the mvebu tree

2017-01-03 Thread Gregory CLEMENT

Le 3 janvier 2017 23:28:25 GMT+01:00, Stephen Rothwell  
a écrit :
>Hi all,
>
>Fetching the mvebu tree produces this error:
>
>fatal: Couldn't find remote ref refs/heads/for-next

It should be fixed now.

Thanks,

Gregory

>
>-- 
>Cheers,
>Stephen Rothwell

Hi,
-- 
Gregory Clement, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com

Re: [PATCH] sgi-xp: use designated initializers

2017-01-03 Thread Kees Cook

On Wed, Dec 21, 2016 at 8:24 AM, Robin Holt  wrote:
> On Fri, Dec 16, 2016 at 7:01 PM, Kees Cook  wrote:
>> Prepare to mark sensitive kernel structures for randomization by making
>> sure they're using designated initializers. These were identified during
>> allyesconfig builds of x86, arm, and arm64, with most initializer fixes
>> extracted from grsecurity.
>
> I guess I don't understand the context enough here to give you a
> Signed-off-by.  Can you give us more background on this randomization?

Sure thing! The randomization is on the order of function pointers in
all-pointer structures (like struct xpc_interface). As long as the
memory containing the structure isn't shared externally, this
randomization should have no operational effect. The reason explicit
no-op functions were added was to avoid ugly casts, etc.

> From what I see in the code here, I can see you are providing
> equivalent functionality and I would give it a signed-off-by, but I am
> not sure this randomization of which you speak is not going to cause
> problems for XP, XPC, XPNET, and XPMEM (out of tree GPL kernel
> module).

Ah, hm, does this module share the structure without being built
against the kernel? (If built with the kernel, the randomization
plugin will keep things in the right order.)

-Kees

-- 
Kees Cook
Nexus Security

Re: [PATCH 1/2] arm64: dma_mapping: allow PCI host driver to limit DMA mask

2017-01-03 Thread Arnd Bergmann

On Tuesday, January 3, 2017 6:44:44 PM CET Will Deacon wrote:
> > @@ -347,6 +348,16 @@ static int __swiotlb_get_sgtable(struct device *dev, 
> > struct sg_table *sgt,
> >  
> >  static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
> >  {
> > +#ifdef CONFIG_PCI
> > + if (dev_is_pci(hwdev)) {
> > + struct pci_dev *pdev = to_pci_dev(hwdev);
> > + struct pci_host_bridge *br = pci_find_host_bridge(pdev->bus);
> > +
> > + if (br->dev.dma_mask && (*br->dev.dma_mask) &&
> > + (mask & (*br->dev.dma_mask)) != mask)
> > + return 0;
> > + }
> > +#endif
> 
> Hmm, but this makes it look like the problem is both arm64 and swiotlb
> specific, when in reality it's not. Perhaps another hack you could try
> would be to register a PCI bus notifier in the host bridge looking for
> BUS_NOTIFY_BIND_DRIVER, then you could proxy the DMA ops for each child
> device before the driver has probed, but adding a dma_set_mask callback
> to limit the mask to what you need?
> 
> I agree that it would be better if dma_set_mask handled all of this
> transparently, but it's all based on the underlying ops rather than the
> bus type.

This is what I prototyped a long time ago when this first came up.
I still think this needs to be solved properly for all of arm64, not
with a PCI specific hack, and in particular not using notifiers.

Arnd

commit 9a57d58d116800a535510053136c6dd7a9c26e25
Author: Arnd Bergmann 
Date:   Tue Nov 17 14:06:55 2015 +0100

[EXPERIMENTAL] ARM64: check implement dma_set_mask

Needs work for coherent mask

Signed-off-by: Arnd Bergmann 

diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
index 243ef256b8c9..a57e7bb10e71 100644
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -22,6 +22,7 @@ struct dev_archdata {
void *iommu;/* private IOMMU data */
 #endif
bool dma_coherent;
+   u64 parent_dma_mask;
 };
 
 struct pdev_archdata {
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 290a84f3351f..aa65875c611b 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -352,6 +352,31 @@ static int __swiotlb_dma_supported(struct device *hwdev, 
u64 mask)
return 1;
 }
 
+static int __swiotlb_set_dma_mask(struct device *dev, u64 mask)
+{
+   /* device is not DMA capable */
+   if (!dev->dma_mask)
+   return -EIO;
+
+   /* mask is below swiotlb bounce buffer, so fail */
+   if (!swiotlb_dma_supported(dev, mask))
+   return -EIO;
+
+   /*
+* because of the swiotlb, we can return success for
+* larger masks, but need to ensure that bounce buffers
+* are used above parent_dma_mask, so set that as
+* the effective mask.
+*/
+   if (mask > dev->archdata.parent_dma_mask)
+   mask = dev->archdata.parent_dma_mask;
+
+
+   *dev->dma_mask = mask;
+
+   return 0;
+}
+
 static struct dma_map_ops swiotlb_dma_ops = {
.alloc = __dma_alloc,
.free = __dma_free,
@@ -367,6 +392,7 @@ static struct dma_map_ops swiotlb_dma_ops = {
.sync_sg_for_device = __swiotlb_sync_sg_for_device,
.dma_supported = __swiotlb_dma_supported,
.mapping_error = swiotlb_dma_mapping_error,
+   .set_dma_mask = __swiotlb_set_dma_mask,
 };
 
 static int __init atomic_pool_init(void)
@@ -957,6 +983,18 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, 
u64 size,
if (!dev->archdata.dma_ops)
dev->archdata.dma_ops = &swiotlb_dma_ops;
 
+   /*
+* we don't yet support buses that have a non-zero mapping.
+*  Let's hope we won't need it
+*/
+   WARN_ON(dma_base != 0);
+
+   /*
+* Whatever the parent bus can set. A device must not set
+* a DMA mask larger than this.
+*/
+   dev->archdata.parent_dma_mask = size;
+
dev->archdata.dma_coherent = coherent;
__iommu_setup_dma_ops(dev, dma_base, size, iommu);
 }

Re: [PATCH] PCI: acpiphp_ibm: add __ro_after_init to ibm_apci_table_attr

2017-01-03 Thread Kees Cook

On Mon, Dec 26, 2016 at 7:36 AM, Bhumika Goyal  wrote:
> The object ibm_apci_table_attr of type bin_attribute structure is not
> modified after getting initialized by ibm_acpiphp_init. Apart from
> getting referenced in init it is also passed as an argument to the functions
> sysfs_{remove/create}_bin_file but both the arguments are of type const
> struct bin_attribute *. Therefore add __ro_after_init to its declaration.
>
> Signed-off-by: Bhumika Goyal 
> ---
>  drivers/pci/hotplug/acpiphp_ibm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/pci/hotplug/acpiphp_ibm.c 
> b/drivers/pci/hotplug/acpiphp_ibm.c
> index f6221d7..188cdfa 100644
> --- a/drivers/pci/hotplug/acpiphp_ibm.c
> +++ b/drivers/pci/hotplug/acpiphp_ibm.c
> @@ -107,7 +107,7 @@ static acpi_status __init 
> ibm_find_acpi_device(acpi_handle handle,
>
>  static acpi_handle ibm_acpi_handle;
>  static struct notification ibm_note;
> -static struct bin_attribute ibm_apci_table_attr = {
> +static struct bin_attribute ibm_apci_table_attr __ro_after_init = {
> .attr = {
> .name = "apci_table",
> .mode = S_IRUGO,

Thanks!

Reviewed-by: Kees Cook 

-Kees

-- 
Kees Cook
Nexus Security

Re: [PATCH] drop_monitor: consider inserted data in genlmsg_end

2017-01-03 Thread Wolfgang Reiter


Yes, genlmsg_end changes nlmsg_len field dependent on skb->tail.
After allocation in reset_per_cpu_data skb->tail is modified in
trace_drop_common via __nla_reserve_nohdr.

Best place for setting nlmsg_len to its final value is after being
swapped out in reset_per_cpu_data.

Neil Horman  writes:

> On Tue, Jan 03, 2017 at 09:54:19AM -0500, David Miller wrote:
>> From: Reiter Wolfgang 
>> Date: Tue,  3 Jan 2017 01:39:10 +0100
>> 
>> > Final nlmsg_len field update must reflect inserted net_dm_drop_point
>> > data.
>> > 
>> > This patch depends on previous patch:
>> > "drop_monitor: add missing call to genlmsg_end"
>> > 
>> > Signed-off-by: Reiter Wolfgang 
>> 
>> I don't understand why the current code doesn't work properly.
>> 
>> All over the tree, the pattern is:
>> 
>>  x = genlmsg_put(skb, ...);
>>  ...
>>  genlmsg_end(skb, x);
>> 
>> And that is exactly what the code is doing right now.
>> 
>
> Because reset_per_cpu_data should close the use of of the established skb
> that was being written to.  Without this patch we add the END tlv to the skb
> that is just getting started for use in the drop monitor, rather than for the
> skb that is getting returned for use in sending up to user space listeners.
>
> Or am I missing something?

[PATCH] Allow userspace to request device probing even if defer_all_probes is true

2017-01-03 Thread Kees Cook

From: Matthew Garrett 

Userspace may wish to make a policy decision to allow certain devices
to be attached, such as keyboards. Add a force_probe sysfs node to each
device, which if written will trigger a probe even if defer_all_probes is
currently true.

Signed-off-by: Matthew Garrett 
Signed-off-by: Kees Cook 
---
 .../ABI/testing/sysfs-devices-force_probe  | 10 +
 drivers/base/base.h|  4 +-
 drivers/base/bus.c |  2 +-
 drivers/base/core.c|  7 ++-
 drivers/base/dd.c  | 51 ++
 5 files changed, 62 insertions(+), 12 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-devices-force_probe

diff --git a/Documentation/ABI/testing/sysfs-devices-force_probe 
b/Documentation/ABI/testing/sysfs-devices-force_probe
new file mode 100644
index ..3a69b9e3b86b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-force_probe
@@ -0,0 +1,10 @@
+What:  /sys/devices/.../force_probe
+Date:  December 2016
+KernelVersion: 4.11
+Contact:   Matthew Garrett 
+Description:
+   The /sys/devices/.../force_probe attribute is
+   present for all devices. If deferred probing is globally
+   enabled and the device has no driver bound, a write to this
+   node will trigger probing. This attribute reads as 1 if the
+   device currently has a driver bound, and 0 otherwise.
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 7bee2e4e38ce..787ab5b9a16f 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -112,7 +112,8 @@ extern void device_release_driver_internal(struct device 
*dev,
   struct device *parent);
 
 extern void driver_detach(struct device_driver *drv);
-extern int driver_probe_device(struct device_driver *drv, struct device *dev);
+extern int driver_probe_device(struct device_driver *drv, struct device *dev,
+  bool force);
 extern void driver_deferred_probe_del(struct device *dev);
 static inline int driver_match_device(struct device_driver *drv,
  struct device *dev)
@@ -140,6 +141,7 @@ extern struct kset *devices_kset;
 extern void devices_kset_move_last(struct device *dev);
 
 extern struct device_attribute dev_attr_deferred_probe;
+extern struct device_attribute dev_attr_force_probe;
 
 #if defined(CONFIG_MODULES) && defined(CONFIG_SYSFS)
 extern void module_add_driver(struct module *mod, struct device_driver *drv);
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 6470eb8088f4..0d4a771abdd9 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -216,7 +216,7 @@ static ssize_t bind_store(struct device_driver *drv, const 
char *buf,
if (dev->parent)/* Needed for USB */
device_lock(dev->parent);
device_lock(dev);
-   err = driver_probe_device(drv, dev);
+   err = driver_probe_device(drv, dev, true);
device_unlock(dev);
if (dev->parent)
device_unlock(dev->parent);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 020ea7f05520..0c6469c57de6 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1064,8 +1064,13 @@ static int device_add_attrs(struct device *dev)
if (error)
goto err_remove_online;
 
-   return 0;
+   error = device_create_file(dev, &dev_attr_force_probe);
+   if (error)
+   goto err_remove_deferred_probe;
 
+   return 0;
+ err_remove_deferred_probe:
+   device_remove_file(dev, &dev_attr_deferred_probe);
  err_remove_online:
device_remove_file(dev, &dev_attr_online);
  err_remove_dev_groups:
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 4d70fa41132c..8270348b9dc7 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -344,14 +344,15 @@ EXPORT_SYMBOL_GPL(device_bind_driver);
 static atomic_t probe_count = ATOMIC_INIT(0);
 static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue);
 
-static int really_probe(struct device *dev, struct device_driver *drv)
+static int really_probe(struct device *dev, struct device_driver *drv,
+   bool force)
 {
int ret = -EPROBE_DEFER;
int local_trigger_count = atomic_read(&deferred_trigger_count);
bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE) &&
   !drv->suppress_bind_attrs;
 
-   if (defer_all_probes) {
+   if (defer_all_probes && !force) {
/*
 * Value of defer_all_probes can be set only by
 * device_defer_all_probes_enable() which, in turn, will call
@@ -527,7 +528,8 @@ EXPORT_SYMBOL_GPL(wait_for_device_probe);
  *
  * If the device has a parent, runtime-resume the parent before driver probing.
  */
-int driver_probe_

Re: [PATCH 2/2] isdn: i4l: move active-isdn drivers to staging

2017-01-03 Thread Arnd Bergmann

On Tuesday, January 3, 2017 11:57:36 PM CET Paul Bolle wrote:
> On Tue, 2017-01-03 at 23:25 +0100, Arnd Bergmann wrote:
> > As far as I'm concerned, we are totally fine as long as there exists a
> > longterm supported kernel that has i4l in drivers/staging.
> 
> Or in drivers/isdn, right?

Right, I was assuming that we would first move it to staging and then
delete it, both at future points in time that we can debate. With the
existing longterm kernels that have i4l in drivers/isdn, the few remaining
users still have access to a supported kernel release until at least
2020.

Arnd

Re: [PATCH] extcon: Add documentation for EXTCON_CHG_USB_SLOW/FAST

2017-01-03 Thread NeilBrown

On Tue, Jan 03 2017, Chanwoo Choi wrote:

> Hi Baolin,
>
> 2017-01-03 14:50 GMT+09:00 Baolin Wang :
>> Currently there are no documentation for EXTCON_CHG_USB_SLOW/FAST
>> charger connector. These names don't mean much and no guide to tell
>> users how to use it, thus try to add documentation to make them clear.
>>
>> Suggested-by: NeilBrown 
>> Signed-off-by: Baolin Wang 
>> ---
>>  include/linux/extcon.h |4 
>>  1 file changed, 4 insertions(+)
>>
>> diff --git a/include/linux/extcon.h b/include/linux/extcon.h
>> index 0020123..ceec1f0 100644
>> --- a/include/linux/extcon.h
>> +++ b/include/linux/extcon.h
>> @@ -53,6 +53,10 @@
>>   * the USB connector, which means EXTCON_CHG_USB_SDP should always
>>   * appear together with EXTCON_USB. The same as ACA charger connector,
>>   * EXTCON_CHG_USB_ACA would normally appear with EXTCON_USB_HOST.
>> + *
>> + * A cable of type EXTCON_CHG_USB_SLOW can provide at least 500mA of
>> + * current at 5V. A cable of type EXTCON_CHG_USB_FAST can provide at
>> + * least 1A of current at 5V.
>
> I agree the at least current value(mA) for EXTCON_CHG_USB_SLOW/FAST.
> But, I'm worried that  fix the 5V.
>
> I have a plan to support the 'current' and 'voltage' property for
> charger connector as following:
> - EXTCON_PROP_CHG_CURRNET
> - EXTCON_PROP_CHG_VOLTAGE
>
> So, I don't like to fix the voltage for EXTCON_CHG_USB_SLOW/FAST.
> How about this?

I think you should be very caution about pursuing this approach.  As it
stands, it implies that a given cable has a fixed voltage and a fixed
available current.  This is not true of many cables.

The USB battery charging spec defines a range of possibly current limits
that each cable type can provide.  There is a defined protocol for the
slave to detect how much current is available within that range.  So
setting a single "CHG_CURRENT" property would be incorrect.

For SDP ports, the current limit is negotiation using the USB protocol
and it is completely unknown to the cable (and so to EXTCON).  For
USB-C, there is a greater range of voltages and currents.  I don't know
the details, but I do know that a single-valued "PROP_CHG_VOLTAGE" would
not be appropriate.

I think that EXTCON should stick with identifying the type of cable, and
leave it to the client to determine what other properties are implied by
each particular cable type.

Thanks,
NeilBrown

signature.asc
Description: PGP signature

Re: [PATCH v3 03/10] dt-bindings: perf: hisi: Add Devicetree bindings for Hisilicon SoC PMU

2017-01-03 Thread Rob Herring

On Mon, Jan 02, 2017 at 01:49:21AM -0500, Anurup M wrote:
> 1) Device tree bindings for Hisilicon SoC PMU.
> 2) Add example for Hisilicon L3 cache and MN PMU.
> 3) Add child nodes of L3C and MN in djtag bindings example.
> 
> Signed-off-by: Anurup M 
> Signed-off-by: Shaokun Zhang 
> ---
>  .../devicetree/bindings/arm/hisilicon/djtag.txt|  25 ++
>  .../devicetree/bindings/arm/hisilicon/pmu.txt  | 100 
> +
>  2 files changed, 125 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/arm/hisilicon/pmu.txt
> 
> diff --git a/Documentation/devicetree/bindings/arm/hisilicon/djtag.txt 
> b/Documentation/devicetree/bindings/arm/hisilicon/djtag.txt
> index bbe8b45..653fdb7 100644
> --- a/Documentation/devicetree/bindings/arm/hisilicon/djtag.txt
> +++ b/Documentation/devicetree/bindings/arm/hisilicon/djtag.txt
> @@ -27,6 +27,31 @@ Example 1: Djtag for CPU die
>   hisi-scl-id = <0x02>;
>  
>   /* All connecting components will appear as child nodes */
> +
> + pmul3c0 {
> + compatible = "hisilicon,hip05-pmu-l3c-v1";
> + hisi-module-id = <0x04 0x02>;
> + };
> +
> + pmul3c1 {
> + compatible = "hisilicon,hip05-pmu-l3c-v1";
> + hisi-module-id = <0x04 0x04>;
> + };
> +
> + pmul3c2 {
> + compatible = "hisilicon,hip05-pmu-l3c-v1";
> + hisi-module-id = <0x04 0x01>;
> + };
> +
> + pmul3c3 {
> + compatible = "hisilicon,hip05-pmu-l3c-v1";
> + hisi-module-id = <0x04 0x08>;
> + };
> +
> + pmumn0 {
> + compatible = "hisilicon,hip05-pmu-mn-v1";
> + hisi-module-id = <0x0b>;
> + };
>   };
>  
>  Example 2: Djtag for IO die
> diff --git a/Documentation/devicetree/bindings/arm/hisilicon/pmu.txt 
> b/Documentation/devicetree/bindings/arm/hisilicon/pmu.txt
> new file mode 100644
> index 000..fceef8d
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/arm/hisilicon/pmu.txt
> @@ -0,0 +1,100 @@
> +Hisilicon SoC HiP05/06/07 ARMv8 PMU
> +===
> +
> +The Hisilicon SoC chips like HiP05/06/07 etc. consist of various independent
> +system device PMUs such as L3 cache (L3C) and Miscellaneous Nodes(MN). These
> +PMU devices are independent and have hardware logic to gather statistics and
> +performance information.
> +
> +HiSilicon SoC chip is encapsulated by multiple CPU and IO dies. The CPU die
> +is called as Super CPU cluster (SCCL) which includes 16 cpu-cores. Every SCCL
> +in HiP05/06/07 chips are further grouped as CPU clusters (CCL) which includes
> +4 cpu-cores each.
> +e.g. In the case of HiP05/06/07, each SCCL has 1 L3 cache and 1 MN PMU 
> device.
> +The L3 cache is further grouped as 4 L3 cache banks in a SCCL.
> +
> +The Hisilicon SoC PMU DT node bindings for uncore PMU devices are as below.
> +For PMU devices like L3 cache. MN etc. which are accessed using the djtag,
> +the parent node will be the djtag node of the corresponding CPU die (SCCL).
> +
> +L3 cache
> +-
> +The L3 cache is dedicated for each SCCL. Each SCCL in HiP05/06/07 chips have 
> 4
> +L3 cache banks. Each L3 cache bank have separate DT nodes.
> +
> +Required properties:
> +
> + - compatible : This value should be as follows
> + (a) "hisilicon,hip05-pmu-l3c-v1" for v1 hw in HiP05 chipset
> + (b) "hisilicon,hip06-pmu-l3c-v1" for v1 hw in HiP06 chipset
> + (c) "hisilicon,hip07-pmu-l3c-v2" for v2 hw in HiP07 chipset
> +
> + - hisi-module-id : This property is a combination of two values in the 
> below order.

Vendor prefix: hisilicon,module-id

> +   a) Module ID: The module identifier for djtag.
> +   b) Instance or Bank ID: This will identify the L3 cache 
> bank
> +  or instance.
> +
> +Optional properties:
> +
> + - interrupt-parent : A phandle indicating which interrupt controller
> + this PMU signals interrupts to.
> +
> + - interrupts : Interrupt line used by this L3 cache bank.
> +
> + *The counter overflow IRQ is not supported in v1 hardware (HiP05/06).
> +
> +Miscellaneous Node
> +--
> +The MN is dedicated for each SCCL and hence there are separate DT nodes for 
> MN
> +for each SCCL.
> +
> +Required properties:
> +
> + - compatible : This value should be as follows
> + (a) "hisilicon,hip05-pmu-mn-v1" for v1 hw in HiP05 chipset
> + (b) "hisilicon,hip06-pmu-mn-v1" for v1 hw in HiP06 chipset
> + (c) "hisilicon,hip07-pmu-mn-v2" for v2 hw in HiP07 chipset
> +
> + - hisi-module-id : Module ID to input for djtag.

ditto

> +
> +Optional properties:
> +
> + - interrupt-parent : A phandle indicating which interrupt controller
> + this PMU s

[PATCH] Allow userspace control of runtime disabling/enabling of driver probing

2017-01-03 Thread Kees Cook

From: Matthew Garrett 

Various attacks are made possible due to the large attack surface of
kernel drivers and the easy availability of hotpluggable hardware that can
be programmed to mimic arbitrary devices. This allows attackers to find a
single vulnerable driver and then produce a device that can exploit it by
plugging into a hotpluggable bus (such as PCI or USB). This violates user
assumptions about unattended systems being secure as long as the screen
is locked.

The kernel already has support for deferring driver binding in order
to avoid problems over suspend/resume. By exposing this to userspace we
can disable probing when the screen is locked and simply reenable it on
unlock.

This is not a complete solution - since this still permits device
creation and simply blocks driver binding, it won't stop userspace
drivers from attaching to devices and it won't protect against any kernel
vulnerabilities in the core bus code. However, it should be sufficient to
block attacks like Poisontap (https://samy.pl/poisontap/).

Signed-off-by: Matthew Garrett 
Signed-off-by: Kees Cook 
---
 .../ABI/testing/sysfs-kernel-disable-device-probe  | 10 
 drivers/base/base.h|  2 --
 drivers/base/dd.c  | 10 
 drivers/base/power/main.c  | 16 ++---
 include/linux/device.h |  4 
 kernel/ksysfs.c| 28 ++
 6 files changed, 65 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-kernel-disable-device-probe

diff --git a/Documentation/ABI/testing/sysfs-kernel-disable-device-probe 
b/Documentation/ABI/testing/sysfs-kernel-disable-device-probe
new file mode 100644
index ..1ca6c2d11d8b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-disable-device-probe
@@ -0,0 +1,10 @@
+What:  /sys/kernel/disable_device_probe
+Date:  December 2016
+KernelVersion: 4.11
+Contact:   Matthew Garrett 
+Description
+   Disables automatic driver probing of any newly added devices.
+   If "1", driver probing is disabled - any newly added devices
+   will not have a driver bound to them. If "0", newly added
+   devices will be probed, along with any devices connected while
+   "1" was set.
diff --git a/drivers/base/base.h b/drivers/base/base.h
index ada9dce34e6d..7bee2e4e38ce 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -134,8 +134,6 @@ extern void device_remove_groups(struct device *dev,
 extern char *make_class_name(const char *name, struct kobject *kobj);
 
 extern int devres_release_all(struct device *dev);
-extern void device_block_probing(void);
-extern void device_unblock_probing(void);
 
 /* /sys/devices directory */
 extern struct kset *devices_kset;
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index a8b258e5407b..4d70fa41132c 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -191,6 +191,16 @@ static void driver_deferred_probe_trigger(void)
 }
 
 /**
+ * device_probing_deferred() - Get the current state of device probing
+ *
+ * Returns whether or not device probing is currently deferred
+ */
+bool device_probing_deferred(void)
+{
+   return defer_all_probes;
+}
+
+/**
  * device_block_probing() - Block/defere device's probes
  *
  * It will disable probing of devices and defer their probes instead.
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 249e0304597f..b566e7a6140c 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -59,6 +59,8 @@ struct suspend_stats suspend_stats;
 static DEFINE_MUTEX(dpm_list_mtx);
 static pm_message_t pm_transition;
 
+static bool probing_deferred;
+
 static int async_error;
 
 static char *pm_verb(int event)
@@ -1024,8 +1026,12 @@ void dpm_complete(pm_message_t state)
list_splice(&list, &dpm_list);
mutex_unlock(&dpm_list_mtx);
 
-   /* Allow device probing and trigger re-probing of deferred devices */
-   device_unblock_probing();
+   /*
+* Allow device probing and trigger re-probing of deferred devices
+* unless userspace has explicitly disabled probing
+*/
+   if (!probing_deferred)
+   device_unblock_probing();
trace_suspend_resume(TPS("dpm_complete"), state.event, false);
 }
 
@@ -1714,8 +1720,12 @@ int dpm_prepare(pm_message_t state)
 * hibernation and system behavior will be unpredictable in this case.
 * So, let's prohibit device's probing here and defer their probes
 * instead. The normal behavior will be restored in dpm_complete().
+* Skip this if probing is already deferred, otherwise we'll override
+* explicitly configured state.
 */
-   device_block_probing();
+   probing_deferred = device_probing_deferred();
+   if (!probing_deferred)
+   device_bl

Re: [PATCHv6 00/11] CONFIG_DEBUG_VIRTUAL for arm64

2017-01-03 Thread Florian Fainelli

On 01/03/2017 09:21 AM, Laura Abbott wrote:
> Happy New Year!
> 
> This is a very minor rebase from v5. It only moves a few headers around.
> I think this series should be ready to be queued up for 4.11.

FWIW:

Tested-by: Florian Fainelli 

How do we get this series included? I would like to get the ARM 32-bit
counterpart included as well (will resubmit rebased shortly), but I have
no clue which tree this should be going through.

Thanks!

> 
> Thanks,
> Laura
> 
> Laura Abbott (11):
>   lib/Kconfig.debug: Add ARCH_HAS_DEBUG_VIRTUAL
>   mm/cma: Cleanup highmem check
>   arm64: Move some macros under #ifndef __ASSEMBLY__
>   arm64: Add cast for virt_to_pfn
>   mm: Introduce lm_alias
>   arm64: Use __pa_symbol for kernel symbols
>   drivers: firmware: psci: Use __pa_symbol for kernel symbol
>   kexec: Switch to __pa_symbol
>   mm/kasan: Switch to using __pa_symbol and lm_alias
>   mm/usercopy: Switch to using lm_alias
>   arm64: Add support for CONFIG_DEBUG_VIRTUAL
> 
>  arch/arm64/Kconfig|  1 +
>  arch/arm64/include/asm/kvm_mmu.h  |  4 +-
>  arch/arm64/include/asm/memory.h   | 66 
> +--
>  arch/arm64/include/asm/mmu_context.h  |  6 +--
>  arch/arm64/include/asm/pgtable.h  |  2 +-
>  arch/arm64/kernel/acpi_parking_protocol.c |  3 +-
>  arch/arm64/kernel/cpu-reset.h |  2 +-
>  arch/arm64/kernel/cpufeature.c|  3 +-
>  arch/arm64/kernel/hibernate.c | 20 +++---
>  arch/arm64/kernel/insn.c  |  2 +-
>  arch/arm64/kernel/psci.c  |  3 +-
>  arch/arm64/kernel/setup.c |  9 +++--
>  arch/arm64/kernel/smp_spin_table.c|  3 +-
>  arch/arm64/kernel/vdso.c  |  8 +++-
>  arch/arm64/mm/Makefile|  2 +
>  arch/arm64/mm/init.c  | 12 +++---
>  arch/arm64/mm/kasan_init.c| 22 +++
>  arch/arm64/mm/mmu.c   | 33 ++--
>  arch/arm64/mm/physaddr.c  | 30 ++
>  arch/x86/Kconfig  |  1 +
>  drivers/firmware/psci.c   |  2 +-
>  include/linux/mm.h|  4 ++
>  kernel/kexec_core.c   |  2 +-
>  lib/Kconfig.debug |  5 ++-
>  mm/cma.c  | 15 +++
>  mm/kasan/kasan_init.c | 15 +++
>  mm/usercopy.c |  4 +-
>  27 files changed, 180 insertions(+), 99 deletions(-)
>  create mode 100644 arch/arm64/mm/physaddr.c
> 


-- 
Florian

Re: [PATCH 2/2] isdn: i4l: move active-isdn drivers to staging

2017-01-03 Thread Paul Bolle

On Tue, 2017-01-03 at 23:25 +0100, Arnd Bergmann wrote:
> As far as I'm concerned, we are totally fine as long as there exists a
> longterm supported kernel that has i4l in drivers/staging.

Or in drivers/isdn, right?


Paul Bolle

Re: [PATCH v3 02/10] dt-bindings: hisi: Add Hisilicon HiP05/06/07 Djtag dts bindings

2017-01-03 Thread Rob Herring

On Mon, Jan 02, 2017 at 01:49:03AM -0500, Anurup M wrote:
> From: Tan Xiaojun 
> 
> Add Hisilicon HiP05/06/07 Djtag dts bindings for CPU and IO Die
> 
> Signed-off-by: Tan Xiaojun 
> Signed-off-by: Anurup M 
> ---
>  .../devicetree/bindings/arm/hisilicon/djtag.txt| 41 
> ++
>  1 file changed, 41 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/arm/hisilicon/djtag.txt
> 
> diff --git a/Documentation/devicetree/bindings/arm/hisilicon/djtag.txt 
> b/Documentation/devicetree/bindings/arm/hisilicon/djtag.txt
> new file mode 100644
> index 000..bbe8b45
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/arm/hisilicon/djtag.txt
> @@ -0,0 +1,41 @@
> +The Hisilicon Djtag is an independent component which connects with some 
> other
> +components in the SoC by Debug Bus. The djtag is available in CPU and IO dies
> +in the chip. The djtag controls access to connecting modules of CPU and IO
> +dies.
> +The various connecting components in CPU die (like L3 cache, L3 cache PMU 
> etc.)
> +are accessed by djtag during real time debugging. In IO die there are 
> connecting
> +components like RSA. These components appear as devices attached to djtag 
> bus.
> +
> +Hisilicon HiP05/06/07 djtag for CPU and IO die
> +Required properties:
> +  - compatible : The value should be as follows
> + (a) "hisilicon,hip05-djtag-v1" for CPU and IO die which use v1 hw in
> + HiP05 chipset.

You don't need to distinguish the CPU and IO blocks?

> + (b) "hisilicon,hip06-djtag-v1" for CPU die which use v1 hw in HiP06 
> chipset.
> + (c) "hisilicon,hip06-djtag-v2" for IO die which use v2 hw in HiP06 
> chipset.
> + (d) "hisilicon,hip07-djtag-v2" for CPU and IO die which use v2 hw in
> + HiP07 chipset.
> +  - reg : Register address and size
> +  - hisi-scl-id : The Super Cluster ID for CPU or IO die

Still needs a vendor prefix. i.e. hisilicon,scl-id

> +
> +Example 1: Djtag for CPU die
> +
> + /* for Hisilicon HiP05 djtag for CPU Die */
> + djtag0: djtag@8001 {
> + compatible = "hisilicon,hip05-djtag-v1";
> + reg = <0x0 0x8001 0x0 0x1>;
> + hisi-scl-id = <0x02>;
> +
> + /* All connecting components will appear as child nodes */
> + };
> +
> +Example 2: Djtag for IO die
> +
> + /* for Hisilicon HiP05 djtag for IO Die */
> + djtag1: djtag@d000 {
> + compatible = "hisilicon,hip05-djtag-v1";
> + reg = <0x0 0xd000 0x0 0x1>;
> + hisi-scl-id = <0x01>;
> +
> + /* All connecting components will appear as child nodes */
> + };
> -- 
> 2.1.4
>

Re: [PATCH V7 1/4] Documentation/devicetree/bindings: b850v3_lvds_dp

2017-01-03 Thread Rob Herring

On Sun, Jan 01, 2017 at 09:24:29PM +0100, Peter Senna Tschudin wrote:
> Devicetree bindings documentation for the GE B850v3 LVDS/DP++
> display bridge.
> 
> Cc: Martyn Welch 
> Cc: Martin Donnelly 
> Cc: Javier Martinez Canillas 
> Cc: Enric Balletbo i Serra 
> Cc: Philipp Zabel 
> Cc: Rob Herring 
> Cc: Fabio Estevam 
> Signed-off-by: Peter Senna Tschudin 
> ---
> There was an Acked-by from Rob Herring  for V6, but I changed
> the bindings to use i2c_new_secondary_device() so I removed it from the commit
> message.
> 
>  .../devicetree/bindings/ge/b850v3-lvds-dp.txt  | 39 
> ++

Generally, bindings are not organized by vendor. Put in 
bindings/display/bridge/... instead.

>  1 file changed, 39 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/ge/b850v3-lvds-dp.txt
> 
> diff --git a/Documentation/devicetree/bindings/ge/b850v3-lvds-dp.txt 
> b/Documentation/devicetree/bindings/ge/b850v3-lvds-dp.txt
> new file mode 100644
> index 000..1bc6ebf
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/ge/b850v3-lvds-dp.txt
> @@ -0,0 +1,39 @@
> +Driver for GE B850v3 LVDS/DP++ display bridge
> +
> +Required properties:
> +  - compatible : should be "ge,b850v3-lvds-dp".

Isn't '-lvds-dp' redundant? The part# should be enough.

> +  - reg : should contain the main address which is used to ack the
> +interrupts and address for edid.
> +  - reg-names : comma separeted list of register names. Valid values

s/separeted/separated/

> +are "main", and "edid".
> +  - interrupt-parent : phandle of the interrupt controller that services
> +interrupts to the device
> +  - interrupts : one interrupt should be described here, as in
> +<0 IRQ_TYPE_LEVEL_HIGH>.
> +  - port : should describe the video signal connection between the host
> +and the bridge.
> +
> +Example:
> +
> +&mux2_i2c2 {
> + status = "okay";
> + clock-frequency = <10>;
> +
> + b850v3-lvds-dp-bridge@73  {
> + compatible = "ge,b850v3-lvds-dp";
> + #address-cells = <1>;
> + #size-cells = <0>;
> +
> + reg = <0x73 0x72>;
> + reg-names = "main", "edid";
> +
> + interrupt-parent = <&gpio2>;
> + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
> +
> + port {
> + b850v3_dp_bridge_in: endpoint {
> + remote-endpoint = <&lvds0_out>;
> + };
> + };
> + };
> +};
> -- 
> 2.5.5
>

Re: [PATCH v3 RESEND 07/11] pwm: imx: Provide atomic PWM support for i.MX PWMv2

2017-01-03 Thread Lukasz Majewski

Hi Boris,

> On Tue, 3 Jan 2017 23:01:11 +0100
> Lukasz Majewski  wrote:
> 
> > Hi Boris, Stefan,
> > 
> > > On Tue, 03 Jan 2017 09:29:40 -0800
> > > Stefan Agner  wrote:
> > >   
> > > > On 2017-01-03 04:46, Boris Brezillon wrote:
> > > >   
> > > > >> > Well, regarding the imx_pwm_apply_v2() suggested by
> > > > >> > Stefan, I think we both agreed that most of the code was
> > > > >> > unneeded when all we want to do is disable the PWM.
> > > > >>
> > > > >> So for the PATCH 7/11 we fix the issue with recalculating
> > > > >> clocks when we want to disable PWM.
> > > > >>
> > > > >> if (state->enabled) {
> > > > >>  c = clk_get_rate(imx->clk_per);
> > > > >>  c *= state->period;
> > > > >>
> > > > >>  do_div(c, 10);
> > > > >>  period_cycles = c;
> > > > >>
> > > > >>  prescale = period_cycles / 0x1 + 1;
> > > > >>
> > > > >>  period_cycles /= prescale;
> > > > >>  c = (unsigned long long)period_cycles *
> > > > >>  state->duty_cycle;
> > > > >>  do_div(c, state->period);
> > > > >>  duty_cycles = c;
> > > > >>
> > > > >>  /*
> > > > >>   * According to imx pwm RM, the real period
> > > > >> value
> > > > >>   * should be PERIOD value in PWMPR plus 2.
> > > > >>   */
> > > > >>  if (period_cycles > 2)
> > > > >>  period_cycles -= 2;
> > > > >>  else
> > > > >>  period_cycles = 0;
> > > > >>
> > > > >>  /*
> > > > >>   * Enable the clock if the PWM is not already
> > > > >>   * enabled.
> > > > >>   */
> > > > >>  if (!cstate.enabled) {
> > > > >>  ret =
> > > > >> clk_prepare_enable(imx->clk_per); if (ret)
> > > > >>  return ret;
> > > > >>  }
> > > > >>
> > > > >>  /*
> > > > >>   * Wait for a free FIFO slot if the PWM is
> > > > >> already
> > > > >>   * enabled, and flush the FIFO if the PWM was
> > > > >> disabled
> > > > >>   * and is about to be enabled.
> > > > >>   */
> > > > >>  if (cstate.enabled)
> > > > >>  imx_pwm_wait_fifo_slot(chip, pwm);
> > > > >>  else
> > > > >>  imx_pwm_sw_reset(chip);
> > > > >>
> > > > >>  writel(duty_cycles, imx->mmio_base +
> > > > >> MX3_PWMSAR); writel(period_cycles, imx->mmio_base +
> > > > >> MX3_PWMPR);
> > > > >>
> > > > >>  writel(MX3_PWMCR_PRESCALER(prescale) |
> > > > >> MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
> > > > >> MX3_PWMCR_DBGEN |
> > > > >> MX3_PWMCR_CLKSRC_IPG_HIGH | MX3_PWMCR_EN,
> > > > >> imx->mmio_base + MX3_PWMCR);
> > > > >>  } else {
> > > > >>
> > > > >>  writel(0, imx->mmio_base + MX3_PWMCR);
> > > > >>
> > > > >>  /* Disable the clock if the PWM is currently
> > > > >> enabled. */ if (cstate.enabled)
> > > > >>  clk_disable_unprepare(imx->clk_per);
> > > > >>  }
> > > > >>
> > > > >>
> > > > > 
> > > > > Yep.
> > > > > 
> > > > 
> > > > This looks like a good transformation of the current Patch 7,
> > > > but once you merge my patch, it will look slightly
> > > > different...  
> > > 
> > > Yes. I think we should just unconditionally enable/disable the
> > > per_clk at function entry/exit. The prepare_enable() call is
> > > almost free when the clk is already enabled, so it's not like
> > > we're adding a huge overhead by doing that.  
> > 
> > So in the above snippet we should replace:
> > 
> > if (!cstate.enabled) {
> > ret = clk_prepare_enable(imx->clk_per);
> > if (ret)
> > return ret;
> > }
> > 
> > with
> > ret = clk_prepare_enable(imx->clk_per);
> > if (ret)
> > return ret;
> > 
> > And 
> > 
> > if (cstate.enabled)
> > clk_disable_unprepare(imx->clk_per);
> > 
> > with 
> > clk_disable_unprepare(imx->clk_per);
> 
> That's what I had in mind.

OK.

> 
> > 
> > >   
> > > >   
> > > > >>
> > > > >> >
> > > > >> > My concern was more about the way PWM changes are applied
> > > > >> > (->apply() returns before the change is actually applied),
> > > > >> > but I agreed that it could be fixed later on (if other
> > > > >> > people think it's really needed), since the existing code
> > > > >> > already handles it this way.
> > > > >>
> > > > >> This is the issue with FIFO setting - but for now we do not
> > > > >> deal with it.
> > > > > 
> > > > > Exactly.
> > > > > 
> > > > >>
> > > > >> >
> > > > >> > > No clear decision what to change until today when Stefan
> > > > >> > > prepared separate (concise) patch (now I see what is the
> > > > >> > > problem).   
> > > > >> >
> > > > >> > The patch proposed by Stefan is addressing a different
> > >

Re: [PATCH 11/13] fs: fix unsigned enum warning with gcc-4.2

2017-01-03 Thread Brendan Gregg

On Fri, Dec 16, 2016 at 2:56 AM, Arnd Bergmann  wrote:
>
> With arm-linux-gcc-4.2, almost every file we build in the kernel ends
> up with this warning:
>
> include/linux/fs.h:2648: warning: comparison of unsigned expression < 0 is 
> always false
>

Thanks, I'd like to see this fixed as a similar warning gets printed
whenever running many of the bcc/BPF tools, which gets annoying and is
user-visible. eg:

# /usr/share/bcc/tools/xfsslower 1
In file included from /virtual/main.c:3:
/lib/modules/4.8.6-300.fc25.x86_64/build/include/linux/fs.h:2677:9:
warning: comparison of unsigned enum expression < 0 is always false
[-Wtautological-compare]
if (id < 0 || id >= READING_MAX_ID)
~~ ^ ~
1 warning generated.
Tracing XFS operations slower than 1 ms
TIME COMM   PIDT BYTES   OFF_KB   LAT(ms) FILENAME
14:44:27 cksum  4414   R 65536   0   1.02 chcon
14:44:27 cksum  4414   R 65536   0   1.20 cpio
14:44:27 cksum  4414   R 65536   0   1.01 diff
14:44:27 cksum  4414   R 65536   0   1.15 dir
[...]

This patch fixes the warning.

Brendan

>
> Later versions don't have this problem, but it's easy enough to
> work around.
>
> Signed-off-by: Arnd Bergmann 
> ---
>  include/linux/fs.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 398cf20a706d..782c2a292fd7 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2645,7 +2645,7 @@ static const char * const kernel_read_file_str[] = {
>
>  static inline const char *kernel_read_file_id_str(enum kernel_read_file_id 
> id)
>  {
> -   if (id < 0 || id >= READING_MAX_ID)
> +   if ((unsigned)id >= READING_MAX_ID)
> return kernel_read_file_str[READING_UNKNOWN];
>
> return kernel_read_file_str[id];
> --
> 2.9.0
>

Re: [PATCH linux 5/6] hwmon: occ: Add hwmon implementation for the P8 OCC

2017-01-03 Thread Rob Herring

On Fri, Dec 30, 2016 at 11:56:07AM -0600, eajames@gmail.com wrote:
> From: "Edward A. James" 
> 
> Add code to tie the hwmon sysfs code and the POWER8 OCC code together, as
> well as probe the entire driver from the I2C bus. I2C is the communication
> method between the BMC and the P8 OCC.
> 
> Signed-off-by: Edward A. James 
> Signed-off-by: Andrew Jeffery 
> Reviewed-by: Andrew Jeffery 
> ---
>  .../devicetree/bindings/i2c/i2c-ibm-occ.txt|  13 ++

bindings/i2c/ is generally for host controllers. bindings/hwmon perhaps.

>  drivers/hwmon/occ/Kconfig  |  14 ++
>  drivers/hwmon/occ/Makefile |   1 +
>  drivers/hwmon/occ/p8_occ_i2c.c | 141 
> +
>  4 files changed, 169 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/i2c/i2c-ibm-occ.txt
>  create mode 100644 drivers/hwmon/occ/p8_occ_i2c.c
> 
> diff --git a/Documentation/devicetree/bindings/i2c/i2c-ibm-occ.txt 
> b/Documentation/devicetree/bindings/i2c/i2c-ibm-occ.txt
> new file mode 100644
> index 000..b0d2b36
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/i2c/i2c-ibm-occ.txt
> @@ -0,0 +1,13 @@
> +HWMON I2C driver for IBM POWER CPU OCC (On Chip Controller)
> +
> +Required properties:
> + - compatible: must be "ibm,p8-occ-i2c"
> + - reg: physical address
> +
> +Example:
> +i2c3: i2c-bus@100 {
> + occ@50 {
> + compatible = "ibm,p8-occ-i2c";
> + reg = <0x50>;
> + };
> +};
> diff --git a/drivers/hwmon/occ/Kconfig b/drivers/hwmon/occ/Kconfig
> index cdb64a7..3a5188f 100644
> --- a/drivers/hwmon/occ/Kconfig
> +++ b/drivers/hwmon/occ/Kconfig
> @@ -13,3 +13,17 @@ menuconfig SENSORS_PPC_OCC
>  
> This driver can also be built as a module. If so, the module
> will be called occ.
> +
> +if SENSORS_PPC_OCC
> +
> +config SENSORS_PPC_OCC_P8_I2C
> + tristate "POWER8 OCC hwmon support"
> + depends on I2C
> + help
> +  Provide a hwmon sysfs interface for the POWER8 On-Chip Controller,
> +  exposing temperature, frequency and power measurements.
> +
> +  This driver can also be built as a module. If so, the module will be
> +  called p8-occ-i2c.
> +
> +endif
> diff --git a/drivers/hwmon/occ/Makefile b/drivers/hwmon/occ/Makefile
> index a6881f9..9294b58 100644
> --- a/drivers/hwmon/occ/Makefile
> +++ b/drivers/hwmon/occ/Makefile
> @@ -1 +1,2 @@
>  obj-$(CONFIG_SENSORS_PPC_OCC) += occ.o occ_sysfs.o
> +obj-$(CONFIG_SENSORS_PPC_OCC_P8_I2C) += occ_scom_i2c.o occ_p8.o p8_occ_i2c.o
> diff --git a/drivers/hwmon/occ/p8_occ_i2c.c b/drivers/hwmon/occ/p8_occ_i2c.c
> new file mode 100644
> index 000..0c65894
> --- /dev/null
> +++ b/drivers/hwmon/occ/p8_occ_i2c.c
> @@ -0,0 +1,141 @@
> +/*
> + * p8_occ_i2c.c - hwmon OCC driver
> + *
> + * This file contains the i2c layer for accessing the P8 OCC over i2c bus.
> + *
> + * Copyright 2016 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "scom.h"
> +#include "occ_scom_i2c.h"
> +#include "occ_p8.h"
> +#include "occ_sysfs.h"
> +
> +#define P8_OCC_I2C_NAME  "p8-occ-i2c"
> +
> +static char *caps_sensor_names[] = {
> + "curr_powercap",
> + "curr_powerreading",
> + "norm_powercap",
> + "max_powercap",
> + "min_powercap",
> + "user_powerlimit"
> +};
> +
> +int p8_i2c_getscom(void *bus, u32 address, u64 *data)
> +{
> + /* P8 i2c slave requires address to be shifted by 1 */
> + address = address << 1;
> +
> + return occ_i2c_getscom(bus, address, data);
> +}
> +
> +int p8_i2c_putscom(void *bus, u32 address, u32 data0, u32 data1)
> +{
> + /* P8 i2c slave requires address to be shifted by 1 */
> + address = address << 1;
> +
> + return occ_i2c_putscom(bus, address, data0, data1);
> +}
> +
> +static struct occ_bus_ops p8_bus_ops = {
> + .getscom = p8_i2c_getscom,
> + .putscom = p8_i2c_putscom,
> +};
> +
> +static struct occ_sysfs_config p8_sysfs_config = {
> + .num_caps_fields = ARRAY_SIZE(caps_sensor_names),
> + .caps_names = caps_sensor_names,
> +};
> +
> +static int p8_occ_probe(struct i2c_client *client,
> + const struct i2c_device_id *id)
> +{
> + struct occ *occ;
> + struct occ_sysfs *hwmon;
> +
> + occ = p8_occ_start(&client->dev, client, &p8_bus_ops);
> + if (IS_ERR(occ))
> + return PTR_ERR(occ);
>

Re: [PATCH 1/4] watchdog: iTCO_wdt: Use allocated data structures

2017-01-03 Thread Andy Shevchenko

On Tue, Jan 3, 2017 at 4:39 PM, Guenter Roeck  wrote:
> Allocate private data and the watchdog device to to avoid having

'too to' ?

> to clear it on remove and to enable subsequent simplifications.

I doubt it will be more than one device per platform, but change is
good by itself to reduce amount of global module variables and a such.

Reviewed-by: Andy Shevchenko 

>
> Signed-off-by: Guenter Roeck 
> ---
>  drivers/watchdog/iTCO_wdt.c | 402 
> ++--
>  1 file changed, 205 insertions(+), 197 deletions(-)
>
> diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
> index 06fcb6c8c917..a35a9164ccd0 100644
> --- a/drivers/watchdog/iTCO_wdt.c
> +++ b/drivers/watchdog/iTCO_wdt.c
> @@ -72,22 +72,24 @@
>
>  /* Address definitions for the TCO */
>  /* TCO base address */
> -#define TCOBASE(iTCO_wdt_private.tco_res->start)
> +#define TCOBASE(p) ((p)->tco_res->start)
>  /* SMI Control and Enable Register */
> -#define SMI_EN (iTCO_wdt_private.smi_res->start)
> -
> -#define TCO_RLD(TCOBASE + 0x00) /* TCO Timer Reload and 
> Curr. Value */
> -#define TCOv1_TMR  (TCOBASE + 0x01) /* TCOv1 Timer Initial Value   */
> -#define TCO_DAT_IN (TCOBASE + 0x02) /* TCO Data In Register*/
> -#define TCO_DAT_OUT(TCOBASE + 0x03) /* TCO Data Out Register   */
> -#define TCO1_STS   (TCOBASE + 0x04) /* TCO1 Status Register*/
> -#define TCO2_STS   (TCOBASE + 0x06) /* TCO2 Status Register*/
> -#define TCO1_CNT   (TCOBASE + 0x08) /* TCO1 Control Register   */
> -#define TCO2_CNT   (TCOBASE + 0x0a) /* TCO2 Control Register   */
> -#define TCOv2_TMR  (TCOBASE + 0x12) /* TCOv2 Timer Initial Value   */
> +#define SMI_EN(p)  ((p)->smi_res->start)
> +
> +#define TCO_RLD(p) (TCOBASE(p) + 0x00) /* TCO Timer Reload/Curr. Value */
> +#define TCOv1_TMR(p)   (TCOBASE(p) + 0x01) /* TCOv1 Timer Initial Value*/
> +#define TCO_DAT_IN(p)  (TCOBASE(p) + 0x02) /* TCO Data In Register */
> +#define TCO_DAT_OUT(p) (TCOBASE(p) + 0x03) /* TCO Data Out Register*/
> +#define TCO1_STS(p)(TCOBASE(p) + 0x04) /* TCO1 Status Register */
> +#define TCO2_STS(p)(TCOBASE(p) + 0x06) /* TCO2 Status Register */
> +#define TCO1_CNT(p)(TCOBASE(p) + 0x08) /* TCO1 Control Register*/
> +#define TCO2_CNT(p)(TCOBASE(p) + 0x0a) /* TCO2 Control Register*/
> +#define TCOv2_TMR(p)   (TCOBASE(p) + 0x12) /* TCOv2 Timer Initial Value*/
>
>  /* internal variables */
> -static struct {/* this is private data for the iTCO_wdt 
> device */
> +struct iTCO_wdt_private {
> +   struct watchdog_device wddev;
> +
> /* TCO version/generation */
> unsigned int iTCO_version;
> struct resource *tco_res;
> @@ -105,7 +107,7 @@ static struct { /* this is private data for 
> the iTCO_wdt device */
> struct pci_dev *pdev;
> /* whether or not the watchdog has been suspended */
> bool suspended;
> -} iTCO_wdt_private;
> +};
>
>  /* module parameters */
>  #define WATCHDOG_TIMEOUT 30/* 30 sec default heartbeat */
> @@ -135,21 +137,23 @@ MODULE_PARM_DESC(turn_SMI_watchdog_clear_off,
>   * every 0.6 seconds.  v3's internal timer is stored as seconds (some
>   * datasheets incorrectly state 0.6 seconds).
>   */
> -static inline unsigned int seconds_to_ticks(int secs)
> +static inline unsigned int seconds_to_ticks(struct iTCO_wdt_private *p,
> +   int secs)
>  {
> -   return iTCO_wdt_private.iTCO_version == 3 ? secs : (secs * 10) / 6;
> +   return p->iTCO_version == 3 ? secs : (secs * 10) / 6;
>  }
>
> -static inline unsigned int ticks_to_seconds(int ticks)
> +static inline unsigned int ticks_to_seconds(struct iTCO_wdt_private *p,
> +   int ticks)
>  {
> -   return iTCO_wdt_private.iTCO_version == 3 ? ticks : (ticks * 6) / 10;
> +   return p->iTCO_version == 3 ? ticks : (ticks * 6) / 10;
>  }
>
> -static inline u32 no_reboot_bit(void)
> +static inline u32 no_reboot_bit(struct iTCO_wdt_private *p)
>  {
> u32 enable_bit;
>
> -   switch (iTCO_wdt_private.iTCO_version) {
> +   switch (p->iTCO_version) {
> case 5:
> case 3:
> enable_bit = 0x0010;
> @@ -167,40 +171,40 @@ static inline u32 no_reboot_bit(void)
> return enable_bit;
>  }
>
> -static void iTCO_wdt_set_NO_REBOOT_bit(void)
> +static void iTCO_wdt_set_NO_REBOOT_bit(struct iTCO_wdt_private *p)
>  {
> u32 val32;
>
> /* Set the NO_REBOOT bit: this disables reboots */
> -   if (iTCO_wdt_private.iTCO_version >= 2) {
> -   val32 = readl(iTCO_wdt_private.gcs_pmc);
> -   val32 |= no_reboot_bit();
> -   writel(val32, iTCO_wdt_private.gcs_pmc);
> -   } else if (iTCO_wdt_private.iTCO_version == 1) {
> -   pci_read_config_dword(iTCO_wdt_private.pdev, 0xd

Re: [patch] mm, thp: always direct reclaim for MADV_HUGEPAGE even when deferred

2017-01-03 Thread David Rientjes

On Mon, 2 Jan 2017, Vlastimil Babka wrote:

> I'm late to the thread (I did read it fully though), so instead of
> multiple responses, I'll just list my observations here:
> 
> - "defer", e.g. background kswapd+compaction is not a silver bullet, it
> will also affect the system. Mel already mentioned extra reclaim.
> Compaction also has CPU costs, just hides the accounting to a kernel
> thread so it's not visible as latency. It also increases zone/node
> lru_lock and lock pressure.
> 
> For the same reasons, admin might want to limit direct compaction for
> THP, even for madvise() apps. It's also likely that "defer" might have
> lower system overhead than "madvise", as with "defer",
> reclaim/compaction is done by one per-node thread at a time, but there
> might be multiple madvise() threads. So there might be sense in not
> allowing madvise() apps to do direct reclaim/compaction on "defer".
> 

Hmm, is there a significant benefit to setting "defer" rather than "never" 
if you can rely on khugepaged to trigger compaction when it tries to 
allocate.  I suppose if there is nothing to collapse that this won't do 
compaction, but is this not intended for users who always want to defer 
when not immediately available?

"Defer" in it's current setting is useless, in my opinion, other than 
providing it as a simple workaround to users when their applications are 
doing MADV_HUGEPAGE without allowing them to configure it.  We would love 
to use "defer" if it didn't completely break MADV_HUGEPAGE, though.

> - for overriding specific apps such as QEMU (including their madvise()
> usage, AFAICS), we have PR_SET_THP_DISABLE prctl(), so no need to
> LD_PRELOAD stuff IMO.
> 

Very good point, and I think it's also worthwhile to allow users to 
suppress the MADV_HUGEPAGE when allocating a translation buffer in qemu if 
they choose to do so; it's a very trivial patch to qemu to allow this to 
be configurable.  I haven't proposed it because I don't personally have a 
need for it, and haven't been pointed to anyone who has a need for it.

> - I have wondered about exactly the issue here when Mel proposed the
> defer option [1]. Mel responded that it doesn't seem needed at that
> point. Now it seems it is. Too bad you didn't raise it then, but to be
> fair you were not CC'd.
> 

My understanding is that the defer option is available to users who cannot 
modify their binary to suppress an madvise(MADV_HUGEPAGE) and are unaware 
that PR_SET_THP_DISABLE exists.  The prctl was added specifically when you 
cannot control your binary.

> So would something like this be possible?
> 
> > echo "defer madvise" > /sys/kernel/mm/transparent_hugepage/defrag
> > cat /sys/kernel/mm/transparent_hugepage/defrag
> always [defer] [madvise] never
> 
> I'm not sure about the analogous kernel boot option though, I guess
> those can't use spaces, so maybe comma-separated?
> 
> If that's not acceptable, then I would probably rather be for changing
> "madvise" to include "defer", than the other way around. When we augment
> kcompactd to be more proactive, it might easily be that it will
> effectively act as "defer", even when defrag=none is set, anyway.
> 

The concern I have with changing the behavior of "madvise" is that it 
changes long standing behavior that people have correctly implemented 
userspace applications with.  I suggest doing this only with "defer" since 
it's an option that is new, nobody appears to be deploying with, and makes 
it much more powerful.  I think we could make the kernel default as 
"defer" later as well and not break userspace that has been setting 
"madvise" ever since the 2.6 kernel.

My position is this: userspace that does MADV_HUGEPAGES knows what it's 
doing.  Let it stall if it wants to stall.  If users don't want it to be 
done, allow them to configure it.  If a binary has forced you into using 
it, use the prctl.  Otherwise, I think "defer" doing background compaction 
for everybody and direct compaction for users who really want hugepages is 
appropriate and is precisely what I need.

Re: [PATCH 2/4] watchdog: iTCO_wdt: Use device managed resources

2017-01-03 Thread Andy Shevchenko

On Tue, Jan 3, 2017 at 4:39 PM, Guenter Roeck  wrote:
> Using device managed resources simplifies error handling and cleanup,
> and to reduce the likelyhood of errors.
>
> Signed-off-by: Guenter Roeck 

Reviewed-by: Andy Shevchenko 

Does it make sense to convert to dev_err() at some point?

> ---
>  drivers/watchdog/iTCO_wdt.c | 80 
> ++---
>  1 file changed, 17 insertions(+), 63 deletions(-)
>
> diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
> index a35a9164ccd0..eed1dee6de19 100644
> --- a/drivers/watchdog/iTCO_wdt.c
> +++ b/drivers/watchdog/iTCO_wdt.c
> @@ -401,27 +401,6 @@ static const struct watchdog_ops iTCO_wdt_ops = {
>   * Init & exit routines
>   */
>
> -static void iTCO_wdt_cleanup(struct iTCO_wdt_private *p)
> -{
> -   /* Stop the timer before we leave */
> -   if (!nowayout)
> -   iTCO_wdt_stop(&p->wddev);
> -
> -   /* Deregister */
> -   watchdog_unregister_device(&p->wddev);
> -
> -   /* release resources */
> -   release_region(p->tco_res->start,
> -   resource_size(p->tco_res));
> -   release_region(p->smi_res->start,
> -   resource_size(p->smi_res));
> -   if (p->iTCO_version >= 2) {
> -   iounmap(p->gcs_pmc);
> -   release_mem_region(p->gcs_pmc_res->start,
> -   resource_size(p->gcs_pmc_res));
> -   }
> -}
> -
>  static int iTCO_wdt_probe(struct platform_device *dev)
>  {
> struct itco_wdt_platform_data *pdata = dev_get_platdata(&dev->dev);
> @@ -458,41 +437,28 @@ static int iTCO_wdt_probe(struct platform_device *dev)
> p->gcs_pmc_res = platform_get_resource(dev,
>IORESOURCE_MEM,
>ICH_RES_MEM_GCS_PMC);
> -
> -   if (!p->gcs_pmc_res)
> -   return -ENODEV;
> -
> -   if (!request_mem_region(p->gcs_pmc_res->start,
> -   resource_size(p->gcs_pmc_res),
> -   dev->name))
> -   return -EBUSY;
> -
> -   p->gcs_pmc = ioremap(p->gcs_pmc_res->start,
> -resource_size(p->gcs_pmc_res));
> -   if (!p->gcs_pmc) {
> -   ret = -EIO;
> -   goto unreg_gcs_pmc;
> -   }
> +   p->gcs_pmc = devm_ioremap_resource(&dev->dev, p->gcs_pmc_res);
> +   if (IS_ERR(p->gcs_pmc))
> +   return PTR_ERR(p->gcs_pmc);
> }
>
> /* Check chipset's NO_REBOOT bit */
> if (iTCO_wdt_unset_NO_REBOOT_bit(p) &&
> iTCO_vendor_check_noreboot_on()) {
> pr_info("unable to reset NO_REBOOT flag, device disabled by 
> hardware/BIOS\n");
> -   ret = -ENODEV;  /* Cannot reset NO_REBOOT bit */
> -   goto unmap_gcs_pmc;
> +   return -ENODEV; /* Cannot reset NO_REBOOT bit */
> }
>
> /* Set the NO_REBOOT bit to prevent later reboots, just for sure */
> iTCO_wdt_set_NO_REBOOT_bit(p);
>
> /* The TCO logic uses the TCO_EN bit in the SMI_EN register */
> -   if (!request_region(p->smi_res->start,
> -   resource_size(p->smi_res), dev->name)) {
> +   if (!devm_request_region(&dev->dev, p->smi_res->start,
> +resource_size(p->smi_res),
> +dev->name)) {
> pr_err("I/O address 0x%04llx already in use, device 
> disabled\n",
>(u64)SMI_EN(p));
> -   ret = -EBUSY;
> -   goto unmap_gcs_pmc;
> +   return -EBUSY;
> }
> if (turn_SMI_watchdog_clear_off >= p->iTCO_version) {
> /*
> @@ -504,12 +470,12 @@ static int iTCO_wdt_probe(struct platform_device *dev)
> outl(val32, SMI_EN(p));
> }
>
> -   if (!request_region(p->tco_res->start,
> -   resource_size(p->tco_res), dev->name)) {
> +   if (!devm_request_region(&dev->dev, p->tco_res->start,
> +resource_size(p->tco_res),
> +dev->name)) {
> pr_err("I/O address 0x%04llx already in use, device 
> disabled\n",
>(u64)TCOBASE(p));
> -   ret = -EBUSY;
> -   goto unreg_smi;
> +   return -EBUSY;
> }
>
> pr_info("Found a %s TCO device (Version=%d, TCOBASE=0x%04llx)\n",
> @@ -555,37 +521,25 @@ static int iTCO_wdt_probe(struct platform_device *dev)
> WATCHDOG_TIMEOUT);
> }
>
> -   ret = watchdog_register_device(&p->wddev);
> +   ret = devm_watchdog_register_device(&dev->dev, &p->wddev);
> if (ret != 0) {
> pr_err("cannot

Re: [PATCH 3/4] watchdog: iTCO_wdt: Use pdev for platform device and pci_dev for pci device

2017-01-03 Thread Andy Shevchenko

On Tue, Jan 3, 2017 at 4:39 PM, Guenter Roeck  wrote:
> Use pdev for struct platform_device, pcidev for struct pci_dev, and dev
> for struct device variables to improve consistency.
>
> Remove 'struct platform_device *dev;' from struct iTCO_wdt_private since
> it was unused.

Would pci_dev work?

In any case
Reviewed-by: Andy Shevchenko 

>
> Signed-off-by: Guenter Roeck 
> ---
>  drivers/watchdog/iTCO_wdt.c | 53 
> ++---
>  1 file changed, 26 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
> index eed1dee6de19..ad29ae03a30b 100644
> --- a/drivers/watchdog/iTCO_wdt.c
> +++ b/drivers/watchdog/iTCO_wdt.c
> @@ -102,9 +102,8 @@ struct iTCO_wdt_private {
> unsigned long __iomem *gcs_pmc;
> /* the lock for io operations */
> spinlock_t io_lock;
> -   struct platform_device *dev;
> /* the PCI-device */
> -   struct pci_dev *pdev;
> +   struct pci_dev *pcidev;
> /* whether or not the watchdog has been suspended */
> bool suspended;
>  };
> @@ -181,9 +180,9 @@ static void iTCO_wdt_set_NO_REBOOT_bit(struct 
> iTCO_wdt_private *p)
> val32 |= no_reboot_bit(p);
> writel(val32, p->gcs_pmc);
> } else if (p->iTCO_version == 1) {
> -   pci_read_config_dword(p->pdev, 0xd4, &val32);
> +   pci_read_config_dword(p->pcidev, 0xd4, &val32);
> val32 |= no_reboot_bit(p);
> -   pci_write_config_dword(p->pdev, 0xd4, val32);
> +   pci_write_config_dword(p->pcidev, 0xd4, val32);
> }
>  }
>
> @@ -200,11 +199,11 @@ static int iTCO_wdt_unset_NO_REBOOT_bit(struct 
> iTCO_wdt_private *p)
>
> val32 = readl(p->gcs_pmc);
> } else if (p->iTCO_version == 1) {
> -   pci_read_config_dword(p->pdev, 0xd4, &val32);
> +   pci_read_config_dword(p->pcidev, 0xd4, &val32);
> val32 &= ~enable_bit;
> -   pci_write_config_dword(p->pdev, 0xd4, val32);
> +   pci_write_config_dword(p->pcidev, 0xd4, val32);
>
> -   pci_read_config_dword(p->pdev, 0xd4, &val32);
> +   pci_read_config_dword(p->pcidev, 0xd4, &val32);
> }
>
> if (val32 & enable_bit)
> @@ -401,9 +400,10 @@ static const struct watchdog_ops iTCO_wdt_ops = {
>   * Init & exit routines
>   */
>
> -static int iTCO_wdt_probe(struct platform_device *dev)
> +static int iTCO_wdt_probe(struct platform_device *pdev)
>  {
> -   struct itco_wdt_platform_data *pdata = dev_get_platdata(&dev->dev);
> +   struct device *dev = &pdev->dev;
> +   struct itco_wdt_platform_data *pdata = dev_get_platdata(dev);
> struct iTCO_wdt_private *p;
> unsigned long val32;
> int ret;
> @@ -411,33 +411,32 @@ static int iTCO_wdt_probe(struct platform_device *dev)
> if (!pdata)
> return -ENODEV;
>
> -   p = devm_kzalloc(&dev->dev, sizeof(*p), GFP_KERNEL);
> +   p = devm_kzalloc(dev, sizeof(*p), GFP_KERNEL);
> if (!p)
> return -ENOMEM;
>
> spin_lock_init(&p->io_lock);
>
> -   p->tco_res = platform_get_resource(dev, IORESOURCE_IO, 
> ICH_RES_IO_TCO);
> +   p->tco_res = platform_get_resource(pdev, IORESOURCE_IO, 
> ICH_RES_IO_TCO);
> if (!p->tco_res)
> return -ENODEV;
>
> -   p->smi_res = platform_get_resource(dev, IORESOURCE_IO, 
> ICH_RES_IO_SMI);
> +   p->smi_res = platform_get_resource(pdev, IORESOURCE_IO, 
> ICH_RES_IO_SMI);
> if (!p->smi_res)
> return -ENODEV;
>
> p->iTCO_version = pdata->version;
> -   p->dev = dev;
> -   p->pdev = to_pci_dev(dev->dev.parent);
> +   p->pcidev = to_pci_dev(dev->parent);
>
> /*
>  * Get the Memory-Mapped GCS or PMC register, we need it for the
>  * NO_REBOOT flag (TCO v2 and v3).
>  */
> if (p->iTCO_version >= 2) {
> -   p->gcs_pmc_res = platform_get_resource(dev,
> +   p->gcs_pmc_res = platform_get_resource(pdev,
>IORESOURCE_MEM,
>ICH_RES_MEM_GCS_PMC);
> -   p->gcs_pmc = devm_ioremap_resource(&dev->dev, p->gcs_pmc_res);
> +   p->gcs_pmc = devm_ioremap_resource(dev, p->gcs_pmc_res);
> if (IS_ERR(p->gcs_pmc))
> return PTR_ERR(p->gcs_pmc);
> }
> @@ -453,9 +452,9 @@ static int iTCO_wdt_probe(struct platform_device *dev)
> iTCO_wdt_set_NO_REBOOT_bit(p);
>
> /* The TCO logic uses the TCO_EN bit in the SMI_EN register */
> -   if (!devm_request_region(&dev->dev, p->smi_res->start,
> +   if (!devm_request_region(dev, p->smi_res->start,
>  resource_size(p->smi_res),
> -dev->name)) {
> +

Re: [tpmdd-devel] [PATCH RFC 0/4] RFC: in-kernel resource manager

2017-01-03 Thread James Bottomley

On Tue, 2017-01-03 at 14:47 -0700, Jason Gunthorpe wrote:
> On Tue, Jan 03, 2017 at 08:36:10AM -0800, James Bottomley wrote:
> 
> > > I'm not sure about this. Why you couldn't have a very thin daemon
> > > that prepares the file descriptor and sends it through UDS socket 
> > > to a client.
> > 
> > So I'm a bit soured on daemons from the trousers experience: tcsd
> > crashed regularly and when it did it took all the TPM connections 
> > down irrecoverably.  I'm not saying we can't write a stateless 
> > daemon to fix most of the trousers issues, but I think it's 
> > valuable first to ask the question, "can we manage without a daemon 
> > at all?"  I actually think the answer is "yes", so I'm interested 
> > in seeing how far that line of research gets us.
> 
> There is clearly no need for a daemon to be involved when working on
> simple tasks like key load and key sign/enc/dec actions, adding such 
> a thing only increases the complexity.
> 
> If we discover a reason to have a daemon down the road then it should
> work in some way where the user space can call out to the daemon over
> a different path than the kernel. (eg dbus or something)

Agreed ... I think the only reason I can currently see for needing a
daemon is if we need it to sort out access security (which I'm hoping
we don't).

> > Do you have a link to the presentation?  The Plumbers etherpad 
> > doesn't contain it.  I've been trying to work out whether a 
> > properly set up TPM actually does need any protections at all.  As 
> > far as I can tell, once you've set all the hierarchy authorities 
> > and the lockout one, you're pretty well protected.
> 
> I think we should also consider TPM 1.2 support in all of this, it is
> still a very popular peice of hardware and it is equally able to
> support a RM.

I've been running with the openssl and gnome-keyring patches in 1.2 for
months now.  The thing about 1.2 is that the volatile store is much
larger, so there's a lot less of a need for a RM.  It's only a
requirement in 2.0 because most shipping TPMs only seem to have room
for about 3 objects.

> So, in general, I'd prefer to see the unprivileged char dev hard
> prevented by the kernel from doing certain things:
> 
> - Wipe the TPM
> - Manipulate the SRK, nvram, tpm flags, change passwords etc
> - Read back the EK

These are all things that the TPM itself is capable of enforcing a
policy for.  I think we should aim for correct setup of the TPM in the
first place so it enforces the policy in a standard manner rather than
having an artificial policy enforcement in the kernel.

> - Write to PCRs

The design of a TPM is mostly that it's up to user space to deal with
this.  Userspace can, of course, kill the TPM ability to quote and seal
to PCRs by inappropriately extending them.  However, there are a lot of
responsible applications that want to use PCRs in userspace; for
instance cloud boot and attestation.  We don't really want to restrict
their ability arbitrarily.

> - etc.

> 
> Even if TPM 2 has a stronger password based model, I still think the
> kernel should hard prevent those sorts of actions even if the user
> knows the TPM password.

That would make us different from TPM1.2: there, if you know the owner
authorisation, trousers will pretty much let you do anything.

> Realistically people in less senstive environments will want to use
> the well known TPM passwords and still have reasonable safety in 
> their unprivileged accounts.

Can we not do most of this with localities?  In theory locality 0 is
supposed to be only the bios and the boot manager and the OS gets to
access 1-3.  We could reserve one for the internal kernel and still
have a couple for userspace (I'll have to go back and check numbers; I
seem to remember there were odd restrictions on which PCR you can reset
and extend in which locality).  If we have two devices (one for each
locality) we could define a UNIX ACL on the devices to achieve what you
want.

James

Re: [PATCH 4/4] watchdog: iTCO_wdt: Simplify module init function

2017-01-03 Thread Andy Shevchenko

On Tue, Jan 3, 2017 at 4:39 PM, Guenter Roeck  wrote:
> The 'ret' variable in iTCO_wdt_init_module() does not add any value;
> drop it.

Perhaps 'err', otherwise:
Reviewed-by: Andy Shevchenko 

>
> Signed-off-by: Guenter Roeck 
> ---
>  drivers/watchdog/iTCO_wdt.c | 8 +---
>  1 file changed, 1 insertion(+), 7 deletions(-)
>
> diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
> index ad29ae03a30b..fc7712112412 100644
> --- a/drivers/watchdog/iTCO_wdt.c
> +++ b/drivers/watchdog/iTCO_wdt.c
> @@ -612,15 +612,9 @@ static struct platform_driver iTCO_wdt_driver = {
>
>  static int __init iTCO_wdt_init_module(void)
>  {
> -   int err;
> -
> pr_info("Intel TCO WatchDog Timer Driver v%s\n", DRV_VERSION);
>
> -   err = platform_driver_register(&iTCO_wdt_driver);
> -   if (err)
> -   return err;
> -
> -   return 0;
> +   return platform_driver_register(&iTCO_wdt_driver);
>  }
>
>  static void __exit iTCO_wdt_cleanup_module(void)
> --
> 2.7.4
>



-- 
With Best Regards,
Andy Shevchenko

Re: [PATCH] Documentation: simple-card: add full path to widgets.txt

2017-01-03 Thread Rob Herring

On Fri, Dec 30, 2016 at 09:00:41AM +0100, yegorsli...@googlemail.com wrote:
> From: Yegor Yefremov 

Why? With this change, then the path is wrong in the filtered DT 
tree[1].

Rob

[1] 
https://git.kernel.org/cgit/linux/kernel/git/devicetree/devicetree-rebasing.git/tree/

Re: [PATCH net-next 2/3] net: dsa: move HWMON support to its own file

2017-01-03 Thread kbuild test robot

Hi Vivien,

[auto build test WARNING on net-next/master]

url:
https://github.com/0day-ci/linux/commits/Vivien-Didelot/net-dsa-restore-HWMON-support-in-dsa2/20170104-055351
config: i386-randconfig-x005-201701 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All warnings (new ones prefixed by >>):

   In file included from net/dsa/dsa.c:26:0:
>> net/dsa/dsa_priv.h:61:5: warning: "CONFIG_NET_DSA_HWMON" is not defined 
>> [-Wundef]
#if CONFIG_NET_DSA_HWMON
^~~~

vim +/CONFIG_NET_DSA_HWMON +61 net/dsa/dsa_priv.h

45  struct net_device   *bridge_dev;
46  #ifdef CONFIG_NET_POLL_CONTROLLER
47  struct netpoll  *netpoll;
48  #endif
49  };
50  
51  /* dsa.c */
52  extern char dsa_driver_version[];
53  int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
54struct device_node *port_dn, int port);
55  void dsa_cpu_dsa_destroy(struct device_node *port_dn);
56  const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
57  int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds);
58  void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds);
59  
60  /* hwmon.c */
  > 61  #if CONFIG_NET_DSA_HWMON
62  void dsa_hwmon_register(struct dsa_switch *ds);
63  void dsa_hwmon_unregister(struct dsa_switch *ds);
64  #else
65  static inline void dsa_hwmon_register(struct dsa_switch *ds) { }
66  static inline void dsa_hwmon_unregister(struct dsa_switch *ds) { }
67  #endif
68  
69  /* slave.c */

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

Re: linux-next: failure fetching the sunxi tree

2017-01-03 Thread Stephen Rothwell

Hi Maxime,

On Wed, 21 Dec 2016 09:24:13 +1100 Stephen Rothwell  
wrote:
>
> Trying to fetch the sunxi tree
> (git://git.kernel.org/pub/scm/linux/kernel/git/mripard/linux.git#sunxi/for-next),
> produces this error:
> 
> fatal: Couldn't find remote ref refs/heads/sunxi/for-next

I am still getting this error.

-- 
Cheers,
Stephen Rothwell

Re: [PATCH 3/6 linux-next] fs/affs: make affs exportable

2017-01-03 Thread Al Viro

On Tue, Jan 03, 2017 at 10:30:39PM +0100, Fabian Frederick wrote:
> Add standard functions making AFFS work with NFS.
> 
> Functions based on ext4 implementation.
> Tested on loop device.

How the hell is that supposed to work with cold dcache?  You don't have
->get_parent() there at all...

There *IS* a reference to parent directory in those suckers - not the same
kind as in normal unix filesystems (".." is not a directory entry there -
it's all fake), but it's doable.  be32_to_cpu(AFFS_TAIL(sb, bh)->parent) 
would be the inumber you need, where bh is the inode block of directory.

So it can be done, but not in this form.  NAK for the time being...

linux-next: failure while fetching the mvebu tree

2017-01-03 Thread Stephen Rothwell

Hi all,

Fetching the mvebu tree produces this error:

fatal: Couldn't find remote ref refs/heads/for-next

-- 
Cheers,
Stephen Rothwell

Re: [PATCH 2/2] isdn: i4l: move active-isdn drivers to staging

2017-01-03 Thread Arnd Bergmann

On Tuesday, January 3, 2017 10:54:19 PM CET Paul Bolle wrote:
> On Tue, 2017-01-03 at 22:19 +0100, Arnd Bergmann wrote:
> > isdn: move isdnhdlc out of i4l
> > isdn: i4l: move hisax driver to staging
> > isdn: move i4l to staging
> > 
> > I can post those as well, at least I think the first two are helpful
> > for untangling i4l from the rest of ISDN.  I also still think that
> > moving hisax and i4l to staging is reasonable given the state of
> > that code, even if there are a couple of users today.
> 
> There are? And even if there are: is there any reason to expect that moving
> the rest of i4l to staging will result in anything other than a stream of
> checkpatch cleanups?

To clarify: Karsten's concern was about the loss of features that are
present in i4l but not in mISDN. There were active users of those features
last year, so I assumed that there are still a few this year. However,
whether any of those users would ever need to move to a 4.11 kernel or
newer is an entirely different question.

As far as I'm concerned, we are totally fine as long as there exists a
longterm supported kernel that has i4l in drivers/staging. If we move
i4l to staging for v4.11 with the intention of removing it after the
2018 longterm release (i.e. after Deutsche Telekom turns off their
ISDN network), that gives us at least until 2020. I assume there will
be at least one older kernel with a longer end-of-support date.

> How often did a bunch of drivers re-enter the tree after being sent to
> staging?

Greg can probably answer that. I'm sure it's either never or very rare.
The only case of removed code coming back later is arch/h8300, which
was removed in 2013 and replaced with a much nicer implementation
in 2015.

Arnd

Re: [PATCH v2] locking/pvqspinlock: Relax cmpxchg's to improve performance on some archs

2017-01-03 Thread Waiman Long

On 12/26/2016 12:50 AM, Boqun Feng wrote:
> Hi Wainman,
>
> On Sun, Dec 25, 2016 at 03:26:01PM -0500, Waiman Long wrote:
>> A number of cmpxchg calls in qspinlock_paravirt.h were replaced by more
>> relaxed versions to improve performance on architectures that use LL/SC.
>>
>> All the locking related cmpxchg's are replaced with the _acquire
>> variants:
>>  - pv_queued_spin_steal_lock()
>>  - trylock_clear_pending()
>>
>> The cmpxchg's related to hashing are replaced by either by the _release
>> or the _relaxed variants. See the inline comment for details.
>>
>> Signed-off-by: Waiman Long 
>>
>>  v1->v2:
>>   - Add comments in changelog and code for the rationale of the change.
>>
>> ---
>>  kernel/locking/qspinlock_paravirt.h | 50 
>> -
>>  1 file changed, 33 insertions(+), 17 deletions(-)
>>
>> diff --git a/kernel/locking/qspinlock_paravirt.h 
>> b/kernel/locking/qspinlock_paravirt.h
>> index e3b5520..c31d1ab 100644
>> --- a/kernel/locking/qspinlock_paravirt.h
>> +++ b/kernel/locking/qspinlock_paravirt.h
>> @@ -72,7 +72,7 @@ static inline bool pv_queued_spin_steal_lock(struct 
>> qspinlock *lock)
>>  struct __qspinlock *l = (void *)lock;
>>  
>>  if (!(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
>> -(cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0)) {
>> +(cmpxchg_acquire(&l->locked, 0, _Q_LOCKED_VAL) == 0)) {
>>  qstat_inc(qstat_pv_lock_stealing, true);
>>  return true;
>>  }
>> @@ -101,16 +101,16 @@ static __always_inline void clear_pending(struct 
>> qspinlock *lock)
>>  
>>  /*
>>   * The pending bit check in pv_queued_spin_steal_lock() isn't a memory
>> - * barrier. Therefore, an atomic cmpxchg() is used to acquire the lock
>> - * just to be sure that it will get it.
>> + * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
>> + * lock to provide the proper memory barrier.
>>   */
>>  static __always_inline int trylock_clear_pending(struct qspinlock *lock)
>>  {
>>  struct __qspinlock *l = (void *)lock;
>>  
>>  return !READ_ONCE(l->locked) &&
>> -   (cmpxchg(&l->locked_pending, _Q_PENDING_VAL, _Q_LOCKED_VAL)
>> -== _Q_PENDING_VAL);
>> +   (cmpxchg_acquire(&l->locked_pending, _Q_PENDING_VAL,
>> +_Q_LOCKED_VAL) == _Q_PENDING_VAL);
>>  }
>>  #else /* _Q_PENDING_BITS == 8 */
>>  static __always_inline void set_pending(struct qspinlock *lock)
>> @@ -138,7 +138,7 @@ static __always_inline int trylock_clear_pending(struct 
>> qspinlock *lock)
>>   */
>>  old = val;
>>  new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
>> -val = atomic_cmpxchg(&lock->val, old, new);
>> +val = atomic_cmpxchg_acquire(&lock->val, old, new);
>>  
>>  if (val == old)
>>  return 1;
>> @@ -209,9 +209,15 @@ static struct qspinlock **pv_hash(struct qspinlock 
>> *lock, struct pv_node *node)
>>  struct pv_hash_entry *he;
>>  int hopcnt = 0;
>>  
>> +/*
>> + * Synchronizing with the node state variable will control who does
>> + * the hashing - the lock holder or lock waiter. The control
>> + * dependency will ensure that node value is written after the lock
>> + * value. So we don't need other ordering guarantee.
>> + */
> By this comment, you mean that
>   
>   cmpxchg_relaxed(&he->lock, NULL, lock);
> r1 = ll he->lock;
> 
> sc he->lock, lock // successed
>
>   if (r1)
>   WRITE_ONCE(he->node, node);
>
>
> the sc and WRITE_ONCE() can not be reordered because of the control
> dependency? I dont think this is true. Yes the sc must execute before
> the WRITE_ONCE(), but the memory/cache effects may be reordered. IOW,
> the following may happen
>
>
>   CPU 0   CPU 1
>   === ===
>   {x = 0, y = 0}  if (!cmpxchg_relaxed(&y, 0, 1))
>   WRITE_ONCE(x, 1);
>   r1 = READ_ONCE(x);
>
>   smp_rmb();
>
>   r2 = READ_ONCE(y);
>
> The following result is possible:
>
>   y = 1 && r1 = 1 && r2 = 0
>
> Or I'm missing your point here? ;-) 
>
> Regards,
> Boqun
>
You are probably right. I know the code is somewhat risky. That is why I
am waiting for expert like you to see if this is really the case. Now it
seems that it may not be the case. I will revise the patch to take that out.

Cheers,
Longman

Re: [PATCH v3 RESEND 07/11] pwm: imx: Provide atomic PWM support for i.MX PWMv2

2017-01-03 Thread Boris Brezillon

On Tue, 3 Jan 2017 23:01:11 +0100
Lukasz Majewski  wrote:

> Hi Boris, Stefan,
> 
> > On Tue, 03 Jan 2017 09:29:40 -0800
> > Stefan Agner  wrote:
> >   
> > > On 2017-01-03 04:46, Boris Brezillon wrote:
> > >   
> > > >> > Well, regarding the imx_pwm_apply_v2() suggested by Stefan, I
> > > >> > think we both agreed that most of the code was unneeded when
> > > >> > all we want to do is disable the PWM.
> > > >>
> > > >> So for the PATCH 7/11 we fix the issue with recalculating clocks
> > > >> when we want to disable PWM.
> > > >>
> > > >> if (state->enabled) {
> > > >>c = clk_get_rate(imx->clk_per);
> > > >>c *= state->period;
> > > >>
> > > >>do_div(c, 10);
> > > >>period_cycles = c;
> > > >>
> > > >>prescale = period_cycles / 0x1 + 1;
> > > >>
> > > >>period_cycles /= prescale;
> > > >>c = (unsigned long long)period_cycles *
> > > >>state->duty_cycle;
> > > >>do_div(c, state->period);
> > > >>duty_cycles = c;
> > > >>
> > > >>/*
> > > >> * According to imx pwm RM, the real period value
> > > >> * should be PERIOD value in PWMPR plus 2.
> > > >> */
> > > >>if (period_cycles > 2)
> > > >>period_cycles -= 2;
> > > >>else
> > > >>period_cycles = 0;
> > > >>
> > > >>/*
> > > >> * Enable the clock if the PWM is not already
> > > >> * enabled.
> > > >> */
> > > >>if (!cstate.enabled) {
> > > >>ret = clk_prepare_enable(imx->clk_per);
> > > >>if (ret)
> > > >>return ret;
> > > >>}
> > > >>
> > > >>/*
> > > >> * Wait for a free FIFO slot if the PWM is
> > > >> already
> > > >> * enabled, and flush the FIFO if the PWM was
> > > >> disabled
> > > >> * and is about to be enabled.
> > > >> */
> > > >>if (cstate.enabled)
> > > >>imx_pwm_wait_fifo_slot(chip, pwm);
> > > >>else
> > > >>imx_pwm_sw_reset(chip);
> > > >>
> > > >>writel(duty_cycles, imx->mmio_base + MX3_PWMSAR);
> > > >>writel(period_cycles, imx->mmio_base +
> > > >> MX3_PWMPR);
> > > >>
> > > >>writel(MX3_PWMCR_PRESCALER(prescale) |
> > > >>   MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
> > > >>   MX3_PWMCR_DBGEN |
> > > >> MX3_PWMCR_CLKSRC_IPG_HIGH | MX3_PWMCR_EN,
> > > >>   imx->mmio_base + MX3_PWMCR);
> > > >>} else {
> > > >>
> > > >>writel(0, imx->mmio_base + MX3_PWMCR);
> > > >>
> > > >>/* Disable the clock if the PWM is currently
> > > >> enabled. */ if (cstate.enabled)
> > > >>clk_disable_unprepare(imx->clk_per);
> > > >>}
> > > >>
> > > >>
> > > > 
> > > > Yep.
> > > > 
> > > 
> > > This looks like a good transformation of the current Patch 7, but
> > > once you merge my patch, it will look slightly different...  
> > 
> > Yes. I think we should just unconditionally enable/disable the per_clk
> > at function entry/exit. The prepare_enable() call is almost free
> > when the clk is already enabled, so it's not like we're adding a huge
> > overhead by doing that.  
> 
> So in the above snippet we should replace:
> 
> if (!cstate.enabled) {
>   ret = clk_prepare_enable(imx->clk_per);
>   if (ret)
>   return ret;
> }
> 
> with
>   ret = clk_prepare_enable(imx->clk_per);
>   if (ret)
>   return ret;
> 
> And 
> 
> if (cstate.enabled)
>   clk_disable_unprepare(imx->clk_per);
> 
> with 
>   clk_disable_unprepare(imx->clk_per);

That's what I had in mind.

> 
> >   
> > >   
> > > >>
> > > >> >
> > > >> > My concern was more about the way PWM changes are applied
> > > >> > (->apply() returns before the change is actually applied), but
> > > >> > I agreed that it could be fixed later on (if other people
> > > >> > think it's really needed), since the existing code already
> > > >> > handles it this way.
> > > >>
> > > >> This is the issue with FIFO setting - but for now we do not deal
> > > >> with it.
> > > > 
> > > > Exactly.
> > > > 
> > > >>
> > > >> >
> > > >> > > No clear decision what to change until today when Stefan
> > > >> > > prepared separate (concise) patch (now I see what is the
> > > >> > > problem).   
> > > >> >
> > > >> > The patch proposed by Stefan is addressing a different
> > > >> > problem: the periph clock has to be enabled before accessing
> > > >> > registers.
> > > >>
> > > >> So for this reason Stefan's patch [1] always enable the clock

Re: [PATCH 3/3] ext4: Find desired extent in ext4_ext_shift_extents() using binsearch

2017-01-03 Thread Theodore Ts'o

On Tue, Jan 03, 2017 at 09:44:15PM +0100, Roman Penyaev wrote:
> 
> (I had to say that right now I am testing on 4.4.28 kernel and testing
>  on latest sources taken from linux-next will require some time, but of
>  course I will retest and send up-to-date results)
> 
> ---
> Failures: ext4/302 ext4/303 ext4/304 generic/061 generic/063
> generic/075 generic/079 generic/091 generic/112 generic/127
> generic/252 generic/263
> Failed 12 of 200 tests

You didn't say what file system configuration you're using, but I'm
assuming it's a default ext4 4k configuration?  One of the things
about my kvm-xfstests and gce-xfstests setup is that I test a range of
file system configurations, and there are specific exclude files to
skip certain known failures.  Currently we are skipping the following
tests globally (from kvm-xfstests/test-appliances/files/root/fs/ext4/exclude):

# generic/223 tests file alignment, which works on ext4 only by
# accident because we're not RAID stripe aware yet, and works at all
# because we have bias towards aligning on power-of-two block numbers.
# It is a flaky test for some configurations, so skip it.
generic/223

# ext4/304 fails for all configurations, and this appears to be at
# test or fio bug.
#
ext4/304

A recent run of v4.10-rc2 shows that on the 4k configuration, we're only
failing one test:

BEGIN TEST 4k: Ext4 4k block Tue Jan  3 00:20:39 EST 2017
Failures: generic/389

Somewhat older test runs (late in the 4.9 development cycle, before
4.9 final was finally shipped), I saw test failures of generic/082 and
generic/095 as well.

> 1. What is the optimal size for $TEST_DEV ? I see these seconds numbers on a
> small Vm (disk is 16gb):
> 
> ext4/007 22s

With kvm-xfstests and gce-xfstests I normally use a 5G device for
test_dev and scratch_dev except for the bigalloc test configuration
where I use a 20GB device.

> 2.  I see many of the tests are ignored.  Do we have some perfect run,
> reference, Standard, to see how many tests run on up-to-date kernel?
> E.g. I see generic/038 has this output:
>"[not run] This test requires at least 10GB free".

I currently don't publish one.  What I normally do for myself is run
gce-xfstests at the beginning of the development cycle (e.g., versus
4.10-rc2) and then look for regressions.  The problem is sometimes
regressions are sometimes caused by new tests being added, or changes
pulled in via other trees.

In general the goal is to reduce the number of test failures down to
zero, or else, just suppress them using exclude files, but I haven't
had time to look at a lot of the more recent test failures.  Some of
them, especially for the 1k test case, seem to be quota releated, and
may very be test bugs where the golden output of the tests assume a 4k
block size.  This is definitely true for the bigalloc test cases,
where a number of the failures are known test bugs that we haven't had
time to address.

> Increasing size of $SCRATCH partition will bring the test to live.
> So would be nice to have some reference numbers that those tests
> are expected to run and to pass.

Sure; on my to do list is to update the published kvm-xfstests and
gce-xfstests images to something newer, and when I do that, I can
publish "official" reference test outputs using gce-xfstests.  I've
attached a gce-xfstests output.  It has a huge number of failures
because in the encryption test case because the kernel which was used
for this test run was missing a fix which only landed in Linus's tree
today (see commit fe4f6c801c03b: "fscrypt: fix the
test_dummy_encryption mount option").

You'll see that modulo the encryption failures and some failures on
the 1k config case which I need to track down and iron out, we're in
pretty good shape.  Below please find the summary; attached please
find the full log file.

CMDLINE: full
FSTESTIMG: gce-xfstests/xfstests-201612072310
FSTESTVER: e2fsprogsv1.43.3-30-g8df85fb (Sun, 4 Sep 2016 21:32:35 -0400)
FSTESTVER: fio  fio-2.14-45-g43f248c (Mon, 24 Oct 2016 20:48:43 -0600)
FSTESTVER: quota81aca5c (Tue, 12 Jul 2016 16:15:45 +0200)
FSTESTVER: xfsprogs v4.8.0-rc3 (Mon, 3 Oct 2016 14:25:45 +1100)
FSTESTVER: xfstests-bld 71223ea (Wed, 7 Dec 2016 22:20:14 -0500)
FSTESTVER: xfstests linux-v3.8-1266-g8d57865 (Wed, 7 Dec 2016 22:56:52 
-0500)
FSTESTVER: kernel   4.10.0-rc2-ext4-1-geb590c0248f4 #176 SMP Tue Jan 3 
00:17:59 EST 2017 x86_64
FSTESTCFG: "all"
FSTESTSET: "-g auto"
FSTESTEXC: ""
FSTESTOPT: "aex"
MNTOPTS: ""
CPUS: "2"
MEM: "7477.96"
MEM: 7680 MB (Max capacity)
BEGIN TEST 4k: Ext4 4k block Tue Jan  3 00:20:39 EST 2017
Failures: generic/389
BEGIN TEST 1k: Ext4 1k block Tue Jan  3 01:17:12 EST 2017
Failures: ext4/307 generic/018 generic/076 generic/077 generic/117 generic/233 
generic/256 generic/269 generic/270 generic/273 generic/299 generic/300 
generic/361 generic/389
BEGIN TEST ext3: Ext4 4k block emulating ext3 Tue Jan  3 02:18:13 EST 2017
Failures: generic/382 generic/389
BEGIN

Re: [PATCH 4/7] mm, vmscan: show LRU name in mm_vmscan_lru_isolate tracepoint

2017-01-03 Thread Michal Hocko

On Tue 03-01-17 21:52:44, Michal Hocko wrote:
> On Tue 03-01-17 21:47:45, Michal Hocko wrote:
> > On Tue 03-01-17 18:08:58, Vlastimil Babka wrote:
> > > On 12/28/2016 04:30 PM, Michal Hocko wrote:
> > > > From: Michal Hocko 
> > > > 
> > > > mm_vmscan_lru_isolate currently prints only whether the LRU we isolate
> > > > from is file or anonymous but we do not know which LRU this is. It is
> > > > useful to know whether the list is file or anonymous as well. Change
> > > > the tracepoint to show symbolic names of the lru rather.
> > > > 
> > > > Signed-off-by: Michal Hocko 
> > > > ---
> > > >  include/trace/events/vmscan.h | 20 ++--
> > > >  mm/vmscan.c   |  2 +-
> > > >  2 files changed, 15 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/include/trace/events/vmscan.h 
> > > > b/include/trace/events/vmscan.h
> > > > index 6af4dae46db2..cc0b4c456c78 100644
> > > > --- a/include/trace/events/vmscan.h
> > > > +++ b/include/trace/events/vmscan.h
> > > > @@ -36,6 +36,14 @@
> > > > (RECLAIM_WB_ASYNC) \
> > > > )
> > > > 
> > > > +#define show_lru_name(lru) \
> > > > +   __print_symbolic(lru, \
> > > > +   {LRU_INACTIVE_ANON, "LRU_INACTIVE_ANON"}, \
> > > > +   {LRU_ACTIVE_ANON, "LRU_ACTIVE_ANON"}, \
> > > > +   {LRU_INACTIVE_FILE, "LRU_INACTIVE_FILE"}, \
> > > > +   {LRU_ACTIVE_FILE, "LRU_ACTIVE_FILE"}, \
> > > > +   {LRU_UNEVICTABLE, "LRU_UNEVICTABLE"})
> > > > +
> > > 
> > > Does this work with external tools such as trace-cmd, i.e. does it export
> > > the correct format file?
> > 
> > How do I find out?
> 
> Well, I've just checked the format file and it says
> print fmt: "isolate_mode=%d classzone=%d order=%d nr_requested=%lu 
> nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s", REC->isolate_mode, 
> REC->classzone_idx, REC->order, REC->nr_requested, REC->nr_scanned, 
> REC->nr_skipped, REC->nr_taken, __print_symbolic(REC->lru, 
> {LRU_INACTIVE_ANON, "LRU_INACTIVE_ANON"}, {LRU_ACTIVE_ANON, 
> "LRU_ACTIVE_ANON"}, {LRU_INACTIVE_FILE, "LRU_INACTIVE_FILE"}, 
> {LRU_ACTIVE_FILE, "LRU_ACTIVE_FILE"}, {LRU_UNEVICTABLE, "LRU_UNEVICTABLE"})
> 
> So the tool should be OK as long as it can find values for LRU_*
> constants. Is this what is the problem?

OK, I got it. We need enum->value translation and all the EM stuff to do
that, right?

I will rework the patch and move the definition to the rest of the EM
family...
-- 
Michal Hocko
SUSE Labs

Re: [RFC, PATCHv2 29/29] mm, x86: introduce RLIMIT_VADDR

2017-01-03 Thread Arnd Bergmann

On Tuesday, January 3, 2017 10:29:33 AM CET Andy Lutomirski wrote:
> 
> Hmm.  What if we approached this a bit differently?  We could add a
> single new personality bit ADDR_LIMIT_EXPLICIT.  Setting this bit
> cause PER_LINUX32_3GB etc to be automatically cleared.

Both the ADDR_LIMIT_32BIT and ADDR_LIMIT_3GB flags I guess?

> When
> ADDR_LIMIT_EXPLICIT is in effect, prctl can set a 64-bit numeric
> limit.  If ADDR_LIMIT_EXPLICIT is cleared, the prctl value stops being
> settable and reading it via prctl returns whatever is implied by the
> other personality bits.

I don't see anything wrong with it, but I'm a bit confused now
what this would be good for, compared to using just prctl.

Is this about setuid clearing the personality but not the prctl,
or something else?

Arnd

Re: [PATCH v11 0/5] Enabling Ring 3 MONITOR/MWAIT feature for Knights Landing

2017-01-03 Thread Pavel Machek

On Tue 2016-12-20 14:48:41, Grzegorz Andrejczuk wrote:
> Following patches enable the use of the feature that allows
> the Intel Xeon Phi x200 devices to use MONITOR/MWAIT instructions
> outside ring 0. It allows userspace applications to use
> more efficient synchronization operations, which improves performance
> and energy efficiency.

What kind of security holes does it bring us?

rdseed can be used for two processes to communicate when they should
not (serious problem for android). Can this be used for something similar?

Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


signature.asc
Description: Digital signature

Re: [RFC, PATCHv2 29/29] mm, x86: introduce RLIMIT_VADDR

2017-01-03 Thread Andy Lutomirski

On Tue, Jan 3, 2017 at 2:07 PM, Arnd Bergmann  wrote:
> On Tuesday, January 3, 2017 10:29:33 AM CET Andy Lutomirski wrote:
>>
>> Hmm.  What if we approached this a bit differently?  We could add a
>> single new personality bit ADDR_LIMIT_EXPLICIT.  Setting this bit
>> cause PER_LINUX32_3GB etc to be automatically cleared.
>
> Both the ADDR_LIMIT_32BIT and ADDR_LIMIT_3GB flags I guess?

Yes.

>
>> When
>> ADDR_LIMIT_EXPLICIT is in effect, prctl can set a 64-bit numeric
>> limit.  If ADDR_LIMIT_EXPLICIT is cleared, the prctl value stops being
>> settable and reading it via prctl returns whatever is implied by the
>> other personality bits.
>
> I don't see anything wrong with it, but I'm a bit confused now
> what this would be good for, compared to using just prctl.
>
> Is this about setuid clearing the personality but not the prctl,
> or something else?

It's to avid ambiguity as to what happens if you set ADDR_LIMIT_32BIT
and use the prctl.  ISTM it would be nice for the semantics to be
fully defined in all cases.

--Andy

Re: [PATCH v2] locking/pvqspinlock: Relax cmpxchg's to improve performance on some archs

2017-01-03 Thread Waiman Long

On 01/03/2017 11:18 AM, Peter Zijlstra wrote:
> On Sun, Dec 25, 2016 at 03:26:01PM -0500, Waiman Long wrote:
>> A number of cmpxchg calls in qspinlock_paravirt.h were replaced by more
>> relaxed versions to improve performance on architectures that use LL/SC.
> Claim without numbers ;-)

Well it is hard to produce actual numbers here as I don't have the setup
to gather data.
 
>> All the locking related cmpxchg's are replaced with the _acquire
>> variants:
>>  - pv_queued_spin_steal_lock()
>>  - trylock_clear_pending()
> So these seem to make sense in that they're in 'fast' paths..
>
>> The cmpxchg's related to hashing are replaced by either by the _release
>> or the _relaxed variants. See the inline comment for details.
>
> But these not so much, we're going to put the vcpu to sleep, why does it
> make sense to 'optimize' the wait/kick stuff?

I haven't thought too much about fast/slow paths when I was making the
patch. You are right that we properly don't need to do that for the
slowpath cases. I can modify the patch to do just the fast patch change.

Cheers,
Longman

Re: [PATCH] perf/x86: Reject non sampling events with precise_ip

2017-01-03 Thread Vince Weaver

On Tue, 3 Jan 2017, Jiri Olsa wrote:
> On Tue, Jan 03, 2017 at 10:40:59AM +0100, Peter Zijlstra wrote:
> > 
> > I think we should reject non sampling pebs events, as you say they make
> > no sense what so ever.
> 
> ook, attached
> 

you can use the PEBS events to gather aggregate stats though and they 
seem roughly right.  Are they truly meaningless?

I had misremembered that they might not have the determinism problems of 
regular events (turns out that's wrong).  They oddly seem to be worse in 
some limited tests I did.

So I guess nothing will be lost if they're disabled.

Vince

Re: [PATCH] KVM: ioapic: fix NULL deref ioapic->lock

2017-01-03 Thread Wanpeng Li

2017-01-04 1:23 GMT+08:00 Paolo Bonzini :
>
>
> On 03/01/2017 13:06, David Hildenbrand wrote:
>>>
>>>  switch (cap->cap) {
>>>  case KVM_CAP_HYPERV_SYNIC:
>>> -return kvm_hv_activate_synic(vcpu);
>>> +if (!irqchip_in_kernel(vcpu->kvm))
>>> +return -EINVAL;
>>> +else
>>
>> You can simply drop the else and return directly.
>>
>> Can't really say if this is the right fix, my first thought was that
>> a request has been set although it should never have been set for
>> that VCPU. Maybe that is an effect of synic being activated
>> (because synic code unconditionally later on sets the request).
>>
>> Fixing the cause of the request seems better than fixing up the result.
>
> Yes, I agree.  Wanpeng's second patch is fine.

Thanks Paolo, I will send out a formal one soon.

Regards,
Wanpeng Li

Re: [tpmdd-devel] [PATCH RFC 0/4] RFC: in-kernel resource manager

2017-01-03 Thread James Bottomley

On Tue, 2017-01-03 at 14:32 -0700, Jason Gunthorpe wrote:
> On Mon, Jan 02, 2017 at 08:36:20AM -0800, James Bottomley wrote:
> > On Mon, 2017-01-02 at 15:22 +0200, Jarkko Sakkinen wrote:
> > > This patch set adds support for TPM spaces that provide a context
> > > for isolating and swapping transient objects. This patch set does
> > > not yet include support for isolating policy and HMAC sessions 
> > > but it is trivial to add once the basic approach is settled (and
> > > that's why I created an RFC patch set).
> > 
> > The approach looks fine to me.  The only basic query I have is 
> > about the default: shouldn't it be with resource manager on rather 
> > than off?  I can't really think of a use case that wants the RM off 
> > (even if you're running your own, having another doesn't hurt 
> > anything, and it's still required to share with in-kernel uses).
> 
> I haven't looked too closely at TPM 2.0 stuff, but at least for 1.2 
> we should have a kernel white-list of allowed commands within a RM
> context, so having the RM on by default would break all of the user
> space.
> 
> I really think the only way forward here is a new char dev that is
> safe for unprivileged/concurrent use and migrate the user space stack
> to use it instead.

That's effectively what /dev/tpms0 would be, with /dev/tpm0 giving full
fledged access.

> > And with that, I've TPM 2 enabled both gnome-keyring and openssl:
> > 
> > https://build.opensuse.org/package/show/home:jejb1:Tumbleweed/gnome
> > -keyring
> > https://build.opensuse.org/package/show/home:jejb1:Tumbleweed/opens
> > sl_tpm_engine
>  
> > I'm running them in production on my day to day laptop and so far
> > everything's working nicely (better than 1.2, in fact, since tcsd
> > periodically crashes necessitating a restart of everything).
> 
> You granted your unprivileged user access to /dev/tpm0 then? FYI I
> think that is a dangerous idea..

No, I granted access to the resource manager device (I'm running with a
combination of Jarkko's and my patches).

James

Re: [PATCH v3 RESEND 07/11] pwm: imx: Provide atomic PWM support for i.MX PWMv2

2017-01-03 Thread Lukasz Majewski

Hi Boris, Stefan,

> On Tue, 03 Jan 2017 09:29:40 -0800
> Stefan Agner  wrote:
> 
> > On 2017-01-03 04:46, Boris Brezillon wrote:
> > 
> > >> > Well, regarding the imx_pwm_apply_v2() suggested by Stefan, I
> > >> > think we both agreed that most of the code was unneeded when
> > >> > all we want to do is disable the PWM.  
> > >>
> > >> So for the PATCH 7/11 we fix the issue with recalculating clocks
> > >> when we want to disable PWM.
> > >>
> > >> if (state->enabled) {
> > >>  c = clk_get_rate(imx->clk_per);
> > >>  c *= state->period;
> > >>
> > >>  do_div(c, 10);
> > >>  period_cycles = c;
> > >>
> > >>  prescale = period_cycles / 0x1 + 1;
> > >>
> > >>  period_cycles /= prescale;
> > >>  c = (unsigned long long)period_cycles *
> > >>  state->duty_cycle;
> > >>  do_div(c, state->period);
> > >>  duty_cycles = c;
> > >>
> > >>  /*
> > >>   * According to imx pwm RM, the real period value
> > >>   * should be PERIOD value in PWMPR plus 2.
> > >>   */
> > >>  if (period_cycles > 2)
> > >>  period_cycles -= 2;
> > >>  else
> > >>  period_cycles = 0;
> > >>
> > >>  /*
> > >>   * Enable the clock if the PWM is not already
> > >>   * enabled.
> > >>   */
> > >>  if (!cstate.enabled) {
> > >>  ret = clk_prepare_enable(imx->clk_per);
> > >>  if (ret)
> > >>  return ret;
> > >>  }
> > >>
> > >>  /*
> > >>   * Wait for a free FIFO slot if the PWM is
> > >> already
> > >>   * enabled, and flush the FIFO if the PWM was
> > >> disabled
> > >>   * and is about to be enabled.
> > >>   */
> > >>  if (cstate.enabled)
> > >>  imx_pwm_wait_fifo_slot(chip, pwm);
> > >>  else
> > >>  imx_pwm_sw_reset(chip);
> > >>
> > >>  writel(duty_cycles, imx->mmio_base + MX3_PWMSAR);
> > >>  writel(period_cycles, imx->mmio_base +
> > >> MX3_PWMPR);
> > >>
> > >>  writel(MX3_PWMCR_PRESCALER(prescale) |
> > >> MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
> > >> MX3_PWMCR_DBGEN |
> > >> MX3_PWMCR_CLKSRC_IPG_HIGH | MX3_PWMCR_EN,
> > >> imx->mmio_base + MX3_PWMCR);
> > >>  } else {
> > >>
> > >>  writel(0, imx->mmio_base + MX3_PWMCR);
> > >>
> > >>  /* Disable the clock if the PWM is currently
> > >> enabled. */ if (cstate.enabled)
> > >>  clk_disable_unprepare(imx->clk_per);
> > >>  }
> > >>
> > >>  
> > > 
> > > Yep.
> > >   
> > 
> > This looks like a good transformation of the current Patch 7, but
> > once you merge my patch, it will look slightly different...
> 
> Yes. I think we should just unconditionally enable/disable the per_clk
> at function entry/exit. The prepare_enable() call is almost free
> when the clk is already enabled, so it's not like we're adding a huge
> overhead by doing that.

So in the above snippet we should replace:

if (!cstate.enabled) {
ret = clk_prepare_enable(imx->clk_per);
if (ret)
return ret;
}

with
ret = clk_prepare_enable(imx->clk_per);
if (ret)
return ret;

And 

if (cstate.enabled)
clk_disable_unprepare(imx->clk_per);

with 
clk_disable_unprepare(imx->clk_per);

> 
> > 
> > >>  
> > >> >
> > >> > My concern was more about the way PWM changes are applied
> > >> > (->apply() returns before the change is actually applied), but
> > >> > I agreed that it could be fixed later on (if other people
> > >> > think it's really needed), since the existing code already
> > >> > handles it this way.  
> > >>
> > >> This is the issue with FIFO setting - but for now we do not deal
> > >> with it.  
> > > 
> > > Exactly.
> > >   
> > >>  
> > >> >  
> > >> > > No clear decision what to change until today when Stefan
> > >> > > prepared separate (concise) patch (now I see what is the
> > >> > > problem). 
> > >> >
> > >> > The patch proposed by Stefan is addressing a different
> > >> > problem: the periph clock has to be enabled before accessing
> > >> > registers.  
> > >>
> > >> So for this reason Stefan's patch [1] always enable the clock no
> > >> matter if PWM clock is generated or not.  
> > > 
> > > Yes.
> > >   
> > >>  
> > >> >  
> > >> > >  
> > >> > > >
> > >> > > > Same goes for the regression introduced in patch 2: I
> > >> > > > think it's better to keep things bisectable on all
> > >> > > > platforms (even if it appeared to work by chance on imx7,
> > >> > > > it did work before this change).  
> > >> > >
> > >> > > Could you be more specific about your idea to solve this
> > >> > > problem?  
> > >> >
> > >> > Stefan already provided a patch, I just think it should be
> > >> > fixed before patch 2 to avoid breaking bisectibility.  
> > >>
> > >> My idea is as follows:
> > >>
>

Re: [PATCH 2/2] Fix warning during compilation

2017-01-03 Thread Arnd Bergmann

On Tuesday, January 3, 2017 10:23:29 AM CET ivan.stoya...@amk-drives.bg wrote:
> From: amk 
> 
> drivers/dma/ipu/ipu_irq.c: In function 'ipu_irq_fn':
> drivers/dma/ipu/ipu_irq.c:342:4: warning: 'irq' may be used uninitialized in 
> this function [-Wmaybe-uninitialized]
> 
> Signed-off-by: amk 
> ---

This looks like my patch 86c7e6836479 ("dmaengine: ipu: remove bogus NO_IRQ 
reference")
that was applied in September, but it seems to be written for an older kernel
prior to v4.3.

Which kernel version were you testing on?

Arnd

Re: [PATCH v2 0/3] PM / devfreq: Fix the bug and add reviewer for devfreq support

2017-01-03 Thread Rafael J. Wysocki

On Tue, Jan 3, 2017 at 12:54 PM, Chanwoo Choi  wrote:
> Dear Myungjoo,
>
> Thanks for your review for patch1.
> But, patch2/3 is not yet reviewed. Could you please review these patches?

I queued them up as 4.10 fixes in the meantime.

Thanks,
Rafael

Re: [patch] mm, thp: always direct reclaim for MADV_HUGEPAGE even when deferred

2017-01-03 Thread David Rientjes

On Tue, 3 Jan 2017, Mel Gorman wrote:

> > I sympathize with that, I've dealt with a number of issues that we have 
> > encountered where thp defrag was either at fault or wasn't, and there were 
> > also suggestions to set defrag to "madvise" to rule it out and that 
> > impacted other users.
> > 
> > I'm curious if you could show examples where there were severe stalls 
> > being encountered by applications that did madvise(MADV_HUGEPAGE)
> 
> I do not have a bug report that is specific to MADV_HUGEPAGE. Until very
> recently they would have been masked by THP fault overhead in general.

I parse this, the masking of thp fault overhead in general, as an 
indication that the qemu user was using defrag set to "always" rather than 
the new kernel default of "madvise".

I wholeheartedly agree that we don't want defrag to be set to "always" be 
default, but that's not really a huge concern: we can easily set it to 
anything else by initscripts.

Qemu, when they added the MADV_HUGEPAGE, obviously wanted to try to 
allocate hugepages at fault using the available means when defrag was set 
to "madvise": https://patchwork.ozlabs.org/patch/177695

So now qemu notices no difference that the kernel default has changed, but 
you later reference qemu in your email about bugs concerning "slow start 
times."  It's puzzling unless you're offering a defrag setting of "defer" 
to workaround this potential bug report, which affects the whole machine 
and now qemu users have _no_ option to try to get thp at fault because the 
admin thinks he knows better, essentially making MADV_HUGEPAGE a no-op 
with no alternative provided.  That's specifically what I'm arguing 
against.

Qemu can be fixed, and I'll do it myself if necessary, when allocating a 
new RAMBlock or translation buffer to suppress the MADV_HUGEPAGE if 
configured.  It's a very trivial change, and I can do that if you'll 
kindly point me to the initial bug report so I can propose it to the 
appropriate user.

As Vlastimil also correctly brings up, there is already a 
prctl(PR_SET_THP_DISABLE) option available to prevent hugepages at fault 
and simply requires you to fork the process in the correct context to 
inherit the vma setting, see commit 1e1836e84f87.

> The current defer logic isn't in the field long enough to generate bugs
> that are detailed enough to catch something like this.
> 

Let us consider this email as a generating a bug that we, the users of 
MADV_HUGEPAGE that are using the madvise(2) correctly and add flags to 
suppress it when desired correctly, have no option to allow background 
compaction for everybody when we cannot allocate thp immediately but also 
allow users of our library to accept the cost of direct compaction at 
fault because they really want their .text segment remapped and backed by 
hugepages.

> > The problem with the current option set is that we don't have the ability 
> > to trigger background compaction for everybody, which only very minimally 
> > impacts their page fault latency since it just wakes up kcompactd, and 
> > allow MADV_HUGEPAGE users to accept that up-front cost by doing direct 
> > compaction.  My usecase, remapping .text segment and faulting thp memory 
> > at startup, demands that ability.  Setting defrag=madvise gets that 
> > behavior, but nobody else triggers background compaction when thp memory 
> > fails and we _want_ that behavior so work is being done to defrag.  
> > Setting defrag=defer makes MADV_HUGEPAGE a no-op for page fault, and I 
> > argue that's the wrong behavior.
> > 
> 
> Again, I accept your reasoning and I don't have direct evidence that it'll be
> a problem. In an emergency, it could also be worked around using LD_PRELOAD
> or a systemtap script until a kernel fix could be applied. Unfortunately it
> could also be years before a patch like this would hit enough users for me
> to spot the problem in the field. That's not enough to Nak the patch but
> it was enough to suggest an alternative that would side-step the problem
> ever occurring.
> 

Or simply forking the application after doing prctl(PR_SET_THP_DISABLE)?  
What exactly are you working around with a LD_PRELOAD that isn't addressed 
by this?

Btw, is there a qemu bug filed that makes doing the MADV_HUGEPAGE 
configurable?  I don't find it at https://bugs.launchpad.net/qemu.

> > If you want a fifth option added to sysfs for thp defrag, that's fine, we 
> > can easily do that.  I'm slightly concerned with more and more options 
> > added that we will eventually approach the 2^4 option count that I 
> > mentioned earlier and nobody will know what to select.  I'm fine with the 
> > kernel default remaining as "madvise,"
> 
> I find it hard to believe this one *can* explode. There are a limited
> number of user-triggable actions that can trigger stalls.
> 

I'm confused as to whether you support the addition of a fifth option that 
users will have to learn what they want, or whether you are open to 
changing the behavior of "de

Re: [PATCH V9 0/3] irqchip: qcom: Add IRQ combiner driver

2017-01-03 Thread Rafael J. Wysocki

On Tue, Jan 3, 2017 at 4:19 PM, Agustin Vega-Frias
 wrote:
> Hi,
>
> Is there any more feedback on this beyond Lorenzo's suggestion to drop
> the conditional check on the first patch?
> How can we move forward on this series?

Essentially, I need to convince myself that patches [1-2/3] are fine
which hasn't happened yet.

Thanks,
Rafael

Re: [tpmdd-devel] [PATCH RFC 0/4] RFC: in-kernel resource manager

2017-01-03 Thread Jason Gunthorpe

On Mon, Jan 02, 2017 at 09:26:58PM -0800, James Bottomley wrote:

> OK, so I put a patch together that does this (see below). It all works
> nicely (with a udev script that sets the resource manager device to
> 0666):
> 
> jejb@jarvis:~> ls -l /dev/tpm*
> crw--- 1 root root  10,   224 Jan  2 20:54 /dev/tpm0
> crw-rw-rw- 1 root root 246, 65536 Jan  2 20:54 /dev/tpm0rm
> 
> I've modified the tss to connect to /dev/tpm0rm by default and it all
> seems to work.
> 
> The patch applies on top of your tabrm branch, by the way.

If we are making a new /dev/ node we should think more carefully about
the design.

- Do we need a cdev node for every chip? What about just '/dev/tpm' and
  we encode the chip number in the message. Since the exclusive
  locking is gone this is very doable.
- Should we get rid of the read/write protocol and use ioctl instead?
  As I understand it ioctl is more usable with seccomp and related
  schemes? I could see passing a TPM FD into a sandbox and wanting the
  sandbox only able to do do decrypt/encrypt operations, for instance.
- Something to identify tpm chips and help match key data with the
  proper chip.

Jason

Re: [PATCH] rtc: armada38x: add __ro_after_init to armada38x_rtc_ops

2017-01-03 Thread Russell King - ARM Linux

On Tue, Jan 03, 2017 at 09:31:18PM +, Russell King - ARM Linux wrote:
> On Tue, Jan 03, 2017 at 01:18:29PM -0800, Kees Cook wrote:
> > On Mon, Jan 2, 2017 at 6:06 AM, Russell King - ARM Linux
> >  wrote:
> > > On Mon, Dec 26, 2016 at 05:01:02PM +0530, Bhumika Goyal wrote:
> > >> The object armada38x_rtc_ops of type rtc_class_ops structure is not
> > >> modified after getting initialized by armada38x_rtc_probe. Apart from
> > >> getting referenced in init it is also passed as an argument to the 
> > >> function
> > >> devm_rtc_device_register but this argument is of type const struct
> > >> rtc_class_ops *. Therefore add __ro_after_init to its declaration.
> > >
> > > What I'd prefer here is for the structure to be duplicated, with one
> > > copy having the alarm methods and one which does not.  Both can then
> > > be made "const" (so placed into the read-only section at link time)
> > > and the probe function select between the two.
> > >
> > > I think that's a cleaner and better solution, even though it's
> > > slightly larger.
> > >
> > > I'm not a fan of __ro_after_init being used where other solutions are
> > > possible.
> > 
> > Can the pointer that points to the struct rtc_class_ops be made 
> > ro_after_init?
> 
> It's passed into the RTC core code, and probably stored in some dynamically
> allocated object, so probably no.  It's the same class of problem as every
> file_operations pointer in the kernel, or the thousand other operations
> structure pointers that a running kernel has.

For the elimination of doubt, this is what I meant in my original email.
As you can see, there's nothing to be marked as __ro_after_init anymore.

 drivers/rtc/rtc-armada38x.c | 24 +---
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c
index 9a3f2a6f512e..a4166ccfce36 100644
--- a/drivers/rtc/rtc-armada38x.c
+++ b/drivers/rtc/rtc-armada38x.c
@@ -202,7 +202,7 @@ static irqreturn_t armada38x_rtc_alarm_irq(int irq, void 
*data)
return IRQ_HANDLED;
 }
 
-static struct rtc_class_ops armada38x_rtc_ops = {
+static const struct rtc_class_ops armada38x_rtc_ops = {
.read_time = armada38x_rtc_read_time,
.set_time = armada38x_rtc_set_time,
.read_alarm = armada38x_rtc_read_alarm,
@@ -210,8 +210,15 @@ static struct rtc_class_ops armada38x_rtc_ops = {
.alarm_irq_enable = armada38x_rtc_alarm_irq_enable,
 };
 
+static const struct rtc_class_ops armada38x_rtc_ops_noirq = {
+   .read_time = armada38x_rtc_read_time,
+   .set_time = armada38x_rtc_set_time,
+   .read_alarm = armada38x_rtc_read_alarm,
+};
+
 static __init int armada38x_rtc_probe(struct platform_device *pdev)
 {
+   const struct rtc_class_ops *ops;
struct resource *res;
struct armada38x_rtc *rtc;
int ret;
@@ -242,19 +249,22 @@ static __init int armada38x_rtc_probe(struct 
platform_device *pdev)
0, pdev->name, rtc) < 0) {
dev_warn(&pdev->dev, "Interrupt not available.\n");
rtc->irq = -1;
+   }
+   platform_set_drvdata(pdev, rtc);
+
+   if (rtc->irq != -1) {
+   device_init_wakeup(&pdev->dev, 1);
+   ops = &armada38x_rtc_ops;
+   } else {
/*
 * If there is no interrupt available then we can't
 * use the alarm
 */
-   armada38x_rtc_ops.set_alarm = NULL;
-   armada38x_rtc_ops.alarm_irq_enable = NULL;
+   ops = &armada38x_rtc_ops_noirq;
}
-   platform_set_drvdata(pdev, rtc);
-   if (rtc->irq != -1)
-   device_init_wakeup(&pdev->dev, 1);
 
rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, pdev->name,
-   &armada38x_rtc_ops, THIS_MODULE);
+   ops, THIS_MODULE);
if (IS_ERR(rtc->rtc_dev)) {
ret = PTR_ERR(rtc->rtc_dev);
dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret);

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

Re: [PATCH 2/2] isdn: i4l: move active-isdn drivers to staging

2017-01-03 Thread Paul Bolle

On Tue, 2017-01-03 at 22:19 +0100, Arnd Bergmann wrote:
> Sounds good to me. My original series contained four more patches that
> I did not post again after there was some concern[1] that we did not
> come to a conclusion on:
> 
> isdn: gigaset: remove i4l code

Let me repeat that I'm fine with a patch that does that.

> isdn: move isdnhdlc out of i4l
> isdn: i4l: move hisax driver to staging
> isdn: move i4l to staging
> 
> I can post those as well, at least I think the first two are helpful
> for untangling i4l from the rest of ISDN.  I also still think that
> moving hisax and i4l to staging is reasonable given the state of
> that code, even if there are a couple of users today.

There are? And even if there are: is there any reason to expect that moving
the rest of i4l to staging will result in anything other than a stream of
checkpatch cleanups?

How often did a bunch of drivers re-enter the tree after being sent to
staging?

Paul Bolle

Re: [PATCH 4/7] mm, vmscan: show LRU name in mm_vmscan_lru_isolate tracepoint

2017-01-03 Thread Michal Hocko

On Tue 03-01-17 22:40:23, Vlastimil Babka wrote:
> On 01/03/2017 10:24 PM, Michal Hocko wrote:
[...]
> > > So the tool should be OK as long as it can find values for LRU_*
> > > constants. Is this what is the problem?
> 
> Exactly.

So this should make it work (it compiles it has to be correct, right?).
---
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index aa4caa6914a9..6172afa2fd82 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -240,6 +240,13 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" )   
\
IFDEF_ZONE_HIGHMEM( EM (ZONE_HIGHMEM,"HighMem"))\
EMe(ZONE_MOVABLE,"Movable")
 
+#define LRU_NAMES  \
+   EM (LRU_INACTIVE_ANON, "inactive_anon") \
+   EM (LRU_ACTIVE_ANON, "active_anon") \
+   EM (LRU_INACTIVE_FILE, "inactive_file") \
+   EM (LRU_ACTIVE_FILE, "active_file") \
+   EMe(LRU_UNEVICTABLE, "unevictable")
+
 /*
  * First define the enums in the above macros to be exported to userspace
  * via TRACE_DEFINE_ENUM().
@@ -253,6 +260,7 @@ COMPACTION_STATUS
 COMPACTION_PRIORITY
 COMPACTION_FEEDBACK
 ZONE_TYPE
+LRU_NAMES
 
 /*
  * Now redefine the EM() and EMe() macros to map the enums to the strings
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 8e7c4c56499a..3c38d9315b43 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -36,14 +36,6 @@
(RECLAIM_WB_ASYNC) \
)
 
-#define show_lru_name(lru) \
-   __print_symbolic(lru, \
-   {LRU_INACTIVE_ANON, "inactive_anon"}, \
-   {LRU_ACTIVE_ANON, "active_anon"}, \
-   {LRU_INACTIVE_FILE, "inactive_file"}, \
-   {LRU_ACTIVE_FILE, "active_file"}, \
-   {LRU_UNEVICTABLE, "unevictable"})
-
 TRACE_EVENT(mm_vmscan_kswapd_sleep,
 
TP_PROTO(int nid),
@@ -319,7 +311,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate,
__entry->nr_scanned,
__entry->nr_skipped,
__entry->nr_taken,
-   show_lru_name(__entry->lru))
+   __print_symbolic(__entry->lru, LRU_NAMES))
 );
 
 TRACE_EVENT(mm_vmscan_writepage,

-- 
Michal Hocko
SUSE Labs

Re: [PATCH v3 2/3] USB3/DWC3: Add property "snps, incr-burst-type-adjustment" for INCR burst type

2017-01-03 Thread Rob Herring

On Thu, Dec 22, 2016 at 8:52 PM, Jerry Huang  wrote:
> Hi, Rob,
>> -Original Message-
>> From: Rob Herring [mailto:r...@kernel.org]
>> Sent: Friday, December 23, 2016 2:45 AM
>> To: Jerry Huang 
>> Cc: ba...@kernel.org; mark.rutl...@arm.com; catalin.mari...@arm.com;
>> will.dea...@arm.com; li...@armlinux.org.uk; devicet...@vger.kernel.org;
>> linux-...@vger.kernel.org; linux-kernel@vger.kernel.org; linux-arm-
>> ker...@lists.infradead.org
>> Subject: Re: [PATCH v3 2/3] USB3/DWC3: Add property "snps, incr-burst-
>> type-adjustment" for INCR burst type
>>
>> On Mon, Dec 19, 2016 at 05:25:53PM +0800, Changming Huang wrote:
>> > New property "snps,incr-burst-type-adjustment = , " for USB3.0
>> DWC3.
>> > Field "x": 1/0 - undefined length INCR burst type enable or not; Field
>> > "y": INCR4/INCR8/INCR16/INCR32/INCR64/INCR128/INCR256 burst type.
>> >
>> > While enabling undefined length INCR burst type and INCR16 burst type,
>> > get better write performance on NXP Layerscape platform:
>> > around 3% improvement (from 364MB/s to 375MB/s).
>> >
>> > Signed-off-by: Changming Huang 
>> > ---
>> > Changes in v3:
>> >   - add new property for INCR burst in usb node.
>> >
>> >  Documentation/devicetree/bindings/usb/dwc3.txt |5 +
>> >  arch/arm/boot/dts/ls1021a.dtsi |1 +
>> >  arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi |3 +++
>> >  arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi |2 ++
>> >  4 files changed, 11 insertions(+)
>> >
>> > diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt
>> > b/Documentation/devicetree/bindings/usb/dwc3.txt
>> > index e3e6983..8c405a3 100644
>> > --- a/Documentation/devicetree/bindings/usb/dwc3.txt
>> > +++ b/Documentation/devicetree/bindings/usb/dwc3.txt
>> > @@ -55,6 +55,10 @@ Optional properties:
>> > fladj_30mhz_sdbnd signal is invalid or incorrect.
>> >
>> >   -  tx-fifo-resize: determines if the FIFO *has* to be
>> reallocated.
>> > + - snps,incr-burst-type-adjustment: Value for INCR burst type of
>> GSBUSCFG0
>> > +   register, undefined length INCR burst type enable and INCRx type.
>> > +   First field is for undefined length INCR burst type enable or not.
>> > +   Second field is for largest INCRx type enabled.
>>
>> Why do you need the first field? Is the 2nd field used if the 1st is 0?
>> If not, then just use the presence of the property to enable or not.
> The first field is one switch.
> When it is 1, means undefined length INCR burst type enabled, we can use any 
> length less than or equal to the largest-enabled burst length of 
> INCR4/8/16/32/64/128/256.
> When it is zero, means INCRx burst mode enabled, we can use one fixed burst 
> length of 1/4/8/16/32/64/128/256 byte.
> So, the 2nd field is used if the 1st is 0, we need to select one largest 
> burst length the USB controller can support.
> If we don't want to change the value of this register (use the default 
> value), we don't need to add this property to usb node.

Just make this a single value with 0 meaning INCR and 4/8/16/etc being INCRx.

Rob

Re: sg_io HARDENED_USERCOPY_PAGESPAN trace

2017-01-03 Thread Kees Cook

On Fri, Dec 30, 2016 at 7:10 AM, Christoph Hellwig  wrote:
> On Fri, Dec 30, 2016 at 10:01:39AM -0500, Dave Jones wrote:
>> I threw this debug printk into the pagespan code to see what exactly
>> it was complaining about..
>>
>> ptr:88042614cff8 end:88042614d003 n:c
>>
>> so it was copying 12 bytes that spanned two pages.
>> >From my reading of the config option help text, this thing is
>> complaining that wasn't allocated with __GFP_COMP maybe ?

There are a lot of cases of "missing" __GFP_COMP, which is why
HARDENED_USERCOPY_PAGESPAN defaults to "n".

> If this is on a devie using blk-mq the block core will use high
> order allocations (as high as possible) to allocate the requests
> for each queue, so struct request could very well span multiple
> pages.  But I don't see what __GFP_COMP would have to do with
> user copy annoations.  As all requests for a queue are freed
> togeth again there is no point in setting __GFP_COMP for the
> request allocations.

Does it hurt anything to mark these pages as allocated "together" via
__GFP_COMP?

-Kees

-- 
Kees Cook
Nexus Security

Re: [tpmdd-devel] [PATCH RFC 0/4] RFC: in-kernel resource manager

2017-01-03 Thread Jason Gunthorpe

On Tue, Jan 03, 2017 at 08:36:10AM -0800, James Bottomley wrote:

> > I'm not sure about this. Why you couldn't have a very thin daemon 
> > that prepares the file descriptor and sends it through UDS socket to 
> > a client.
> 
> So I'm a bit soured on daemons from the trousers experience: tcsd
> crashed regularly and when it did it took all the TPM connections down
> irrecoverably.  I'm not saying we can't write a stateless daemon to fix
> most of the trousers issues, but I think it's valuable first to ask the
> question, "can we manage without a daemon at all?"  I actually think
> the answer is "yes", so I'm interested in seeing how far that line of
> research gets us.

There is clearly no need for a daemon to be involved when working on
simple tasks like key load and key sign/enc/dec actions, adding such a
thing only increases the complexity.

If we discover a reason to have a daemon down the road then it should
work in some way where the user space can call out to the daemon over
a different path than the kernel. (eg dbus or something)

> Do you have a link to the presentation?  The Plumbers etherpad doesn't
> contain it.  I've been trying to work out whether a properly set up TPM
> actually does need any protections at all.  As far as I can tell, once
> you've set all the hierarchy authorities and the lockout one, you're
> pretty well protected.

I think we should also consider TPM 1.2 support in all of this, it is
still a very popular peice of hardware and it is equally able to
support a RM.

So, in general, I'd prefer to see the unprivileged char dev hard
prevented by the kernel from doing certain things:

- Wipe the TPM
- Manipulate the SRK, nvram, tpm flags, change passwords etc
- Read back the EK
- Write to PCRs
- etc.

Even if TPM 2 has a stronger password based model, I still think the
kernel should hard prevent those sorts of actions even if the user
knows the TPM password.

Realistically people in less senstive environments will want to use
the well known TPM passwords and still have reasonable safety in their
unprivileged accounts.

Jason

Re: [PATCH 0/2] Begin auditing SECCOMP_RET_ERRNO return actions

2017-01-03 Thread Kees Cook

On Tue, Jan 3, 2017 at 1:31 PM, Paul Moore  wrote:
> On Tue, Jan 3, 2017 at 4:21 PM, Kees Cook  wrote:
>> On Tue, Jan 3, 2017 at 1:13 PM, Paul Moore  wrote:
>>> On Tue, Jan 3, 2017 at 4:03 PM, Kees Cook  wrote:
 On Tue, Jan 3, 2017 at 12:54 PM, Paul Moore  wrote:
> On Tue, Jan 3, 2017 at 3:44 PM, Kees Cook  wrote:
>> I still wonder, though, isn't there a way to use auditctl to get all
>> the seccomp messages you need?
>
> Not all of the seccomp actions are currently logged, that's one of the
> problems (and the biggest at the moment).

 Well... sort of. It all gets passed around, but the logic isn't very
 obvious (or at least I always have to go look it up).
>>>
>>> Last time I checked SECCOMP_RET_ALLOW wasn't logged (as well as at
>>> least one other action, but I can't remember which off the top of my
>>> head)?
>>
>> Sure, but if you're using audit, you don't need RET_ALLOW to be logged
>> because you'll get a full syscall log entry. Logging RET_ALLOW is
>> redundant and provides no new information, it seems to me.
>
> I only bring this up as it might be a way to help solve the
> SECCOMP_RET_AUDIT problem that Tyler mentioned.

So, I guess I want to understand why something like this doesn't work,
with no changes at all to the kernel:

Imaginary "seccomp-audit.c":

...
pid = fork();
if (pid) {
char cmd[80];

sprintf(cmd, "auditctl -a always,exit -S all -F pid=%d", pid);
system(cmd);
release...
 } else {
wait for release...
execv(argv[1], argv + 1);
 }
...

This should dump all syscalls (both RET_ALLOW and RET_ERRNO), as well
as all seccomp actions of any kind. (Down side is the need for root to
launch auditctl...)

Perhaps an improvement to this could be enabling audit when seccomp
syscall is seen? I can't tell if auditctl already has something to do
this ("start auditing this process and all children when syscall X is
performed").

-Kees

-- 
Kees Cook
Nexus Security

[PATCH 2/4] drivers/tty: Compute current directly

2017-01-03 Thread Davidlohr Bueso

This patch effectively replaces the tsk pointer dereference
(which is obviously == current), to directly use get_current()
macro. This is to make the removal of setting foreign task
states smoother and painfully obvious. Performance win on some
archs such as x86-64 and ppc64 -- arm64 is no longer an issue:

https://lkml.org/lkml/2016/12/30/230

Cc: Greg Kroah-Hartman 
Signed-off-by: Davidlohr Bueso 
---
 drivers/tty/tty_ldsem.c | 18 --
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c
index 1bf8ed13f827..3e6722954f29 100644
--- a/drivers/tty/tty_ldsem.c
+++ b/drivers/tty/tty_ldsem.c
@@ -200,7 +200,6 @@ static struct ld_semaphore __sched *
 down_read_failed(struct ld_semaphore *sem, long count, long timeout)
 {
struct ldsem_waiter waiter;
-   struct task_struct *tsk = current;
long adjust = -LDSEM_ACTIVE_BIAS + LDSEM_WAIT_BIAS;
 
/* set up my own style of waitqueue */
@@ -221,8 +220,8 @@ down_read_failed(struct ld_semaphore *sem, long count, long 
timeout)
list_add_tail(&waiter.list, &sem->read_wait);
sem->wait_readers++;
 
-   waiter.task = tsk;
-   get_task_struct(tsk);
+   waiter.task = current;
+   get_task_struct(current);
 
/* if there are no active locks, wake the new lock owner(s) */
if ((count & LDSEM_ACTIVE_MASK) == 0)
@@ -232,7 +231,7 @@ down_read_failed(struct ld_semaphore *sem, long count, long 
timeout)
 
/* wait to be given the lock */
for (;;) {
-   set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+   set_task_state(current, TASK_UNINTERRUPTIBLE);
 
if (!waiter.task)
break;
@@ -241,7 +240,7 @@ down_read_failed(struct ld_semaphore *sem, long count, long 
timeout)
timeout = schedule_timeout(timeout);
}
 
-   __set_task_state(tsk, TASK_RUNNING);
+   __set_task_state(current, TASK_RUNNING);
 
if (!timeout) {
/* lock timed out but check if this task was just
@@ -268,7 +267,6 @@ static struct ld_semaphore __sched *
 down_write_failed(struct ld_semaphore *sem, long count, long timeout)
 {
struct ldsem_waiter waiter;
-   struct task_struct *tsk = current;
long adjust = -LDSEM_ACTIVE_BIAS;
int locked = 0;
 
@@ -289,16 +287,16 @@ down_write_failed(struct ld_semaphore *sem, long count, 
long timeout)
 
list_add_tail(&waiter.list, &sem->write_wait);
 
-   waiter.task = tsk;
+   waiter.task = current;
 
-   set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+   set_task_state(current, TASK_UNINTERRUPTIBLE);
for (;;) {
if (!timeout)
break;
raw_spin_unlock_irq(&sem->wait_lock);
timeout = schedule_timeout(timeout);
raw_spin_lock_irq(&sem->wait_lock);
-   set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+   set_task_state(current, TASK_UNINTERRUPTIBLE);
locked = writer_trylock(sem);
if (locked)
break;
@@ -309,7 +307,7 @@ down_write_failed(struct ld_semaphore *sem, long count, 
long timeout)
list_del(&waiter.list);
raw_spin_unlock_irq(&sem->wait_lock);
 
-   __set_task_state(tsk, TASK_RUNNING);
+   __set_task_state(current, TASK_RUNNING);
 
/* lock wait may have timed out */
if (!locked)
-- 
2.6.6

[PATCH 3/4] kernel/locking: Compute current directly

2017-01-03 Thread Davidlohr Bueso

This patch effectively replaces the tsk pointer dereference
(which is obviously == current), to directly use get_current()
macro. This is to make the removal of setting foreign task
states smoother and painfully obvious. Performance win on some
archs such as x86-64 and ppc64. On a microbenchmark that calls
set_task_state() vs set_current_state() and an inode rwsem
pounding benchmark doing unlink:

== 1. x86-64 ==

Avg runtime set_task_state():601 msecs
Avg runtime set_current_state(): 552 msecs

vanilla dirty
Hmeanunlink1-processes-2  36089.26 (  0.00%)38977.33 (  8.00%)
Hmeanunlink1-processes-5  28555.01 (  0.00%)29832.55 (  4.28%)
Hmeanunlink1-processes-8  37323.75 (  0.00%)44974.57 ( 20.50%)
Hmeanunlink1-processes-12 43571.88 (  0.00%)44283.01 (  1.63%)
Hmeanunlink1-processes-21 34431.52 (  0.00%)38284.45 ( 11.19%)
Hmeanunlink1-processes-30 34813.26 (  0.00%)37975.17 (  9.08%)
Hmeanunlink1-processes-48 37048.90 (  0.00%)39862.78 (  7.59%)
Hmeanunlink1-processes-79 35630.01 (  0.00%)36855.30 (  3.44%)
Hmeanunlink1-processes-11036115.85 (  0.00%)39843.91 ( 10.32%)
Hmeanunlink1-processes-14132546.96 (  0.00%)35418.52 (  8.82%)
Hmeanunlink1-processes-17234674.79 (  0.00%)36899.21 (  6.42%)
Hmeanunlink1-processes-20337303.11 (  0.00%)36393.04 ( -2.44%)
Hmeanunlink1-processes-22435712.13 (  0.00%)36685.96 (  2.73%)

== 2. ppc64le ==

Avg runtime set_task_state():  938 msecs
Avg runtime set_current_state: 940 msecs

vanilla dirty
Hmeanunlink1-processes-2  19269.19 (  0.00%)30704.50 ( 59.35%)
Hmeanunlink1-processes-5  20106.15 (  0.00%)21804.15 (  8.45%)
Hmeanunlink1-processes-8  17496.97 (  0.00%)17243.28 ( -1.45%)
Hmeanunlink1-processes-12 14224.15 (  0.00%)17240.21 ( 21.20%)
Hmeanunlink1-processes-21 14155.66 (  0.00%)15681.23 ( 10.78%)
Hmeanunlink1-processes-30 14450.70 (  0.00%)15995.83 ( 10.69%)
Hmeanunlink1-processes-48 16945.57 (  0.00%)16370.42 ( -3.39%)
Hmeanunlink1-processes-79 15788.39 (  0.00%)14639.27 ( -7.28%)
Hmeanunlink1-processes-11014268.48 (  0.00%)14377.40 (  0.76%)
Hmeanunlink1-processes-14114023.65 (  0.00%)16271.69 ( 16.03%)
Hmeanunlink1-processes-17213417.62 (  0.00%)16067.55 ( 19.75%)
Hmeanunlink1-processes-20315293.08 (  0.00%)15440.40 (  0.96%)
Hmeanunlink1-processes-23413719.32 (  0.00%)16190.74 ( 18.01%)
Hmeanunlink1-processes-26516400.97 (  0.00%)16115.22 ( -1.74%)
Hmeanunlink1-processes-29614388.60 (  0.00%)16216.13 ( 12.70%)
Hmeanunlink1-processes-32015771.85 (  0.00%)15905.96 (  0.85%)

Signed-off-by: Davidlohr Bueso 
---
XXX: things like semaphore.c are only compile tested.

 kernel/locking/mutex.c  | 19 +--
 kernel/locking/rwsem-spinlock.c | 18 +++---
 kernel/locking/rwsem-xadd.c |  7 +++
 kernel/locking/semaphore.c  |  7 +++
 4 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 9b349619f431..4c7d04362c95 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -622,7 +622,6 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
 {
-   struct task_struct *task = current;
struct mutex_waiter waiter;
unsigned long flags;
bool first = false;
@@ -656,18 +655,18 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned int subclass,
goto skip_wait;
 
debug_mutex_lock_common(lock, &waiter);
-   debug_mutex_add_waiter(lock, &waiter, task);
+   debug_mutex_add_waiter(lock, &waiter, current);
 
/* add waiting tasks to the end of the waitqueue (FIFO): */
list_add_tail(&waiter.list, &lock->wait_list);
-   waiter.task = task;
+   waiter.task = current;
 
if (__mutex_waiter_is_first(lock, &waiter))
__mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
 
lock_contended(&lock->dep_map, ip);
 
-   set_task_state(task, state);
+   set_task_state(current, state);
for (;;) {
/*
 * Once we hold wait_lock, we're serialized against
@@ -683,7 +682,7 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned int subclass,
 * wait_lock. This ensures the lock cancellation is ordered
 * against mutex_unlock() and wake-ups do not go missing.
 */
-   if (unlikely(signal_pending_state(state, task))) {
+   if (unlikely

[PATCH 4/4] sched: Remove set_task_state()

2017-01-03 Thread Davidlohr Bueso

This is a nasty interface and setting the state of a foreign task
must not be done. As of be628be0956 (bcache: Make gc wakeup sane,
remove set_task_state()) everyone in the kernel calls
set_task_state() with current, allowing the helper to be removed.
However, as the comment indicates, it is still around for those
archs where computing current is more expensive than using a pointer,
at least in theory. An important arch that is affected is arm64[1],
however this has been addressed now[2] and performance is up to par
making no difference with either calls.

Of all the callers, if any, it's the locking bits that would care
most about this -- ie: we end up passing a tsk pointer to a lot of
the lock slowpath, and setting ->state on that. The following numbers
are based on two tests: a custom ad-hoc microbenchmark that just
measures latencies (for ~65 million calls) between get_task_state()
vs get_current_state().

Secondly for a higher overview, an unlink microbenchmark was used,
which pounds on a single file with open, close,unlink combos with
increasing thread counts (up to 4x ncpus). While the workload is
quite unrealistic, it does contend a lot on the inode mutex or now
rwsem.

[1] https://lkml.org/lkml/2016/12/30/230
[2] 
http://lists.infradead.org/pipermail/linux-arm-kernel/2017-January/476461.html

== 1. x86-64 ==

Avg runtime set_task_state():601 msecs
Avg runtime set_current_state(): 552 msecs

vanilla dirty
Hmeanunlink1-processes-2  36089.26 (  0.00%)38977.33 (  8.00%)
Hmeanunlink1-processes-5  28555.01 (  0.00%)29832.55 (  4.28%)
Hmeanunlink1-processes-8  37323.75 (  0.00%)44974.57 ( 20.50%)
Hmeanunlink1-processes-12 43571.88 (  0.00%)44283.01 (  1.63%)
Hmeanunlink1-processes-21 34431.52 (  0.00%)38284.45 ( 11.19%)
Hmeanunlink1-processes-30 34813.26 (  0.00%)37975.17 (  9.08%)
Hmeanunlink1-processes-48 37048.90 (  0.00%)39862.78 (  7.59%)
Hmeanunlink1-processes-79 35630.01 (  0.00%)36855.30 (  3.44%)
Hmeanunlink1-processes-11036115.85 (  0.00%)39843.91 ( 10.32%)
Hmeanunlink1-processes-14132546.96 (  0.00%)35418.52 (  8.82%)
Hmeanunlink1-processes-17234674.79 (  0.00%)36899.21 (  6.42%)
Hmeanunlink1-processes-20337303.11 (  0.00%)36393.04 ( -2.44%)
Hmeanunlink1-processes-22435712.13 (  0.00%)36685.96 (  2.73%)

== 2. ppc64le ==

Avg runtime set_task_state():  938 msecs
Avg runtime set_current_state: 940 msecs

vanilla dirty
Hmeanunlink1-processes-2  19269.19 (  0.00%)30704.50 ( 59.35%)
Hmeanunlink1-processes-5  20106.15 (  0.00%)21804.15 (  8.45%)
Hmeanunlink1-processes-8  17496.97 (  0.00%)17243.28 ( -1.45%)
Hmeanunlink1-processes-12 14224.15 (  0.00%)17240.21 ( 21.20%)
Hmeanunlink1-processes-21 14155.66 (  0.00%)15681.23 ( 10.78%)
Hmeanunlink1-processes-30 14450.70 (  0.00%)15995.83 ( 10.69%)
Hmeanunlink1-processes-48 16945.57 (  0.00%)16370.42 ( -3.39%)
Hmeanunlink1-processes-79 15788.39 (  0.00%)14639.27 ( -7.28%)
Hmeanunlink1-processes-11014268.48 (  0.00%)14377.40 (  0.76%)
Hmeanunlink1-processes-14114023.65 (  0.00%)16271.69 ( 16.03%)
Hmeanunlink1-processes-17213417.62 (  0.00%)16067.55 ( 19.75%)
Hmeanunlink1-processes-20315293.08 (  0.00%)15440.40 (  0.96%)
Hmeanunlink1-processes-23413719.32 (  0.00%)16190.74 ( 18.01%)
Hmeanunlink1-processes-26516400.97 (  0.00%)16115.22 ( -1.74%)
Hmeanunlink1-processes-29614388.60 (  0.00%)16216.13 ( 12.70%)
Hmeanunlink1-processes-32015771.85 (  0.00%)15905.96 (  0.85%)

x86-64 (known to be fast for get_current()/this_cpu_read_stable() caching)
and ppc64 (with paca) show similar improvements in the unlink microbenches.
The small delta for ppc64 (2ms), does not represent the gains on the unlink
runs. In the case of x86, there was a decent amount of variation in the
latency runs, but always within a 20 to 50ms increase), ppc was more constant.

Signed-off-by: Davidlohr Bueso 
---
 arch/um/drivers/random.c   |  2 +-
 drivers/md/dm-bufio.c  |  2 +-
 drivers/md/dm-crypt.c  |  4 ++--
 drivers/md/persistent-data/dm-block-manager.c  |  4 ++--
 .../staging/lustre/lnet/libcfs/linux/linux-debug.c |  2 +-
 drivers/tty/tty_ldsem.c| 10 
 include/linux/sched.h  | 27 +-
 kernel/exit.c  |  4 ++--
 kernel/locking/mutex.c |  8 +++
 kernel/locking/rwsem-spinlock.c|  8 +++
 kernel/locking/rwsem-xadd.c|  4 ++--
 kernel/locking/semaphore.c

[PATCH 1/4] kernel/exit: Compute current directly

2017-01-03 Thread Davidlohr Bueso

This patch effectively replaces the tsk pointer dereference
(which is obviously == current), to directly use get_current()
macro. In this case, do_exit() always passes current to exit_mm(),
hence we can simply get rid of the arg. This is also a performance
win on some archs such as x86-64 and ppc64 -- arm64 is no longer
an issue:

https://lkml.org/lkml/2016/12/30/230

Signed-off-by: Davidlohr Bueso 
---
XXX: do_exit() could further be cleaned up and we'd endup getting
rid of tsk for a lot of the exit_*() calls.

 kernel/exit.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index 8f14b866f9f6..2385d434a46e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -468,12 +468,12 @@ void mm_update_next_owner(struct mm_struct *mm)
  * Turn us into a lazy TLB process if we
  * aren't already..
  */
-static void exit_mm(struct task_struct *tsk)
+static void exit_mm(void)
 {
-   struct mm_struct *mm = tsk->mm;
+   struct mm_struct *mm = current->mm;
struct core_state *core_state;
 
-   mm_release(tsk, mm);
+   mm_release(current, mm);
if (!mm)
return;
sync_mm_rss(mm);
@@ -491,7 +491,7 @@ static void exit_mm(struct task_struct *tsk)
 
up_read(&mm->mmap_sem);
 
-   self.task = tsk;
+   self.task = current;
self.next = xchg(&core_state->dumper.next, &self);
/*
 * Implies mb(), the result of xchg() must be visible
@@ -501,22 +501,22 @@ static void exit_mm(struct task_struct *tsk)
complete(&core_state->startup);
 
for (;;) {
-   set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+   set_task_state(current, TASK_UNINTERRUPTIBLE);
if (!self.task) /* see coredump_finish() */
break;
freezable_schedule();
}
-   __set_task_state(tsk, TASK_RUNNING);
+   __set_task_state(current, TASK_RUNNING);
down_read(&mm->mmap_sem);
}
atomic_inc(&mm->mm_count);
-   BUG_ON(mm != tsk->active_mm);
+   BUG_ON(mm != current->active_mm);
/* more a memory barrier than a real lock */
-   task_lock(tsk);
-   tsk->mm = NULL;
+   task_lock(current);
+   current->mm = NULL;
up_read(&mm->mmap_sem);
enter_lazy_tlb(mm, current);
-   task_unlock(tsk);
+   task_unlock(current);
mm_update_next_owner(mm);
mmput(mm);
if (test_thread_flag(TIF_MEMDIE))
@@ -823,7 +823,7 @@ void __noreturn do_exit(long code)
tsk->exit_code = code;
taskstats_exit(tsk, group_dead);
 
-   exit_mm(tsk);
+   exit_mm();
 
if (group_dead)
acct_process();
-- 
2.6.6

[PATCH 0/4] current vs ptr to current dereferencing

2017-01-03 Thread Davidlohr Bueso

Hi,

This is a re-spin of the earlier rfc[1] wrt deleting the set_task_state()
interfaces. In order for us not taking a hit on arm64, these patches
depend on Mark's fix to get rid of read_sysreg():

http://lists.infradead.org/pipermail/linux-arm-kernel/2017-January/476461.html

First three patches get rid of 'tsk = current' based assignments to
simply use current/get_current macro directly now that we have some
actual numbers -- in which the conversion does improve some performance
numbers, specially the locking bits. This is only around what directly
is touched by patch 4, making it very obvious that we are indeed calling
upon the current task. There are other users left with this pattern that
could be cleaned up later.

Applies against v4.10-rc2.

[1] https://lkml.org/lkml/2016/12/30/230

Thanks.

Davidlohr Bueso (4):
  kernel/exit: Compute current directly
  drivers/tty: Compute current directly
  kernel/locking: Compute current directly
  sched: Remove set_task_state()

 arch/um/drivers/random.c   |  2 +-
 drivers/md/dm-bufio.c  |  2 +-
 drivers/md/dm-crypt.c  |  4 ++--
 drivers/md/persistent-data/dm-block-manager.c  |  4 ++--
 .../staging/lustre/lnet/libcfs/linux/linux-debug.c |  2 +-
 drivers/tty/tty_ldsem.c| 18 +++
 include/linux/sched.h  | 27 +-
 kernel/exit.c  | 22 +-
 kernel/locking/mutex.c | 19 ---
 kernel/locking/rwsem-spinlock.c| 18 ++-
 kernel/locking/rwsem-xadd.c|  7 +++---
 kernel/locking/semaphore.c |  7 +++---
 12 files changed, 49 insertions(+), 83 deletions(-)

-- 
2.6.6

Re: [PATCH 4/7] mm, vmscan: show LRU name in mm_vmscan_lru_isolate tracepoint

2017-01-03 Thread Vlastimil Babka


On 01/03/2017 10:24 PM, Michal Hocko wrote:

On Tue 03-01-17 21:52:44, Michal Hocko wrote:

On Tue 03-01-17 21:47:45, Michal Hocko wrote:
> On Tue 03-01-17 18:08:58, Vlastimil Babka wrote:
> > On 12/28/2016 04:30 PM, Michal Hocko wrote:
> > > From: Michal Hocko 
> > >
> > > mm_vmscan_lru_isolate currently prints only whether the LRU we isolate
> > > from is file or anonymous but we do not know which LRU this is. It is
> > > useful to know whether the list is file or anonymous as well. Change
> > > the tracepoint to show symbolic names of the lru rather.
> > >
> > > Signed-off-by: Michal Hocko 
> > > ---
> > >  include/trace/events/vmscan.h | 20 ++--
> > >  mm/vmscan.c   |  2 +-
> > >  2 files changed, 15 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
> > > index 6af4dae46db2..cc0b4c456c78 100644
> > > --- a/include/trace/events/vmscan.h
> > > +++ b/include/trace/events/vmscan.h
> > > @@ -36,6 +36,14 @@
> > >  (RECLAIM_WB_ASYNC) \
> > >  )
> > >
> > > +#define show_lru_name(lru) \
> > > +__print_symbolic(lru, \
> > > +{LRU_INACTIVE_ANON, "LRU_INACTIVE_ANON"}, \
> > > +{LRU_ACTIVE_ANON, "LRU_ACTIVE_ANON"}, \
> > > +{LRU_INACTIVE_FILE, "LRU_INACTIVE_FILE"}, \
> > > +{LRU_ACTIVE_FILE, "LRU_ACTIVE_FILE"}, \
> > > +{LRU_UNEVICTABLE, "LRU_UNEVICTABLE"})
> > > +
> >
> > Does this work with external tools such as trace-cmd, i.e. does it export
> > the correct format file?
>
> How do I find out?


You did :) Another way to verify is to use trace-cmd tool instead of manual 
sysfs operations and see if the output looks as expected. The tool gets the raw 
records from kernel and does the printing in userspace, unlike "cat trace_pipe".



Well, I've just checked the format file and it says
print fmt: "isolate_mode=%d classzone=%d order=%d nr_requested=%lu nr_scanned=%lu nr_skipped=%lu nr_taken=%lu lru=%s", REC->isolate_mode, 
REC->classzone_idx, REC->order, REC->nr_requested, REC->nr_scanned, REC->nr_skipped, REC->nr_taken, __print_symbolic(REC->lru, 
{LRU_INACTIVE_ANON, "LRU_INACTIVE_ANON"}, {LRU_ACTIVE_ANON, "LRU_ACTIVE_ANON"}, {LRU_INACTIVE_FILE, "LRU_INACTIVE_FILE"}, 
{LRU_ACTIVE_FILE, "LRU_ACTIVE_FILE"}, {LRU_UNEVICTABLE, "LRU_UNEVICTABLE"})

So the tool should be OK as long as it can find values for LRU_*
constants. Is this what is the problem?


Exactly.


OK, I got it. We need enum->value translation and all the EM stuff to do
that, right?


Yep.


I will rework the patch and move the definition to the rest of the EM
family...


Thanks!

Re: [PATCH] uapi: use wildcards to list files

2017-01-03 Thread Arnd Bergmann

On Tuesday, January 3, 2017 3:35:44 PM CET Nicolas Dichtel wrote:
> Regularly, when a new header is created in include/uapi/, the developer
> forgets to add it in the corresponding Kbuild file. This error is usually
> detected after the release is out.
> 
> In fact, all headers under include/uapi/ should be exported, so let's
> use wildcards.

I think the idea makes a lot of sense: if a header is in uapi, we should
really export it. However, using a wildcard expression seems a bit
backwards here, I think we should make this implicit and not have the
Kbuild file at all.

The "header-y" syntax was originally added back when the uapi headers
were mixed with the internal headers in the same directory. After
David Howells introduced the separate directory for uapi, it has
become a bit redundant.

Can you try to modify scripts/Makefile.headersinst instead so we
can simply remove the Kbuild files entirely?

Arnd

[PATCH] dax: fix deadlock with DAX 4k holes

2017-01-03 Thread Ross Zwisler

Currently in DAX if we have three read faults on the same hole address we
can end up with the following:

Thread 0Thread 1Thread 2

dax_iomap_fault
 grab_mapping_entry
  lock_slot
   

dax_iomap_fault
 grab_mapping_entry
  get_unlocked_mapping_entry
   

dax_iomap_fault
 grab_mapping_entry
  get_unlocked_mapping_entry
   
  dax_load_hole
   find_or_create_page
   ...
page_cache_tree_insert
 dax_wake_mapping_entry_waiter
  
 __radix_tree_replace
  


get_page
lock_page
...
put_locked_mapping_entry
unlock_page
put_page



The crux of the problem is that once we insert a 4k zero page, all locking
from then on is done in terms of that 4k zero page and any additional
threads sleeping on the empty DAX entry will never be woken.  Fix this by
waking all sleepers when we replace the DAX radix tree entry with a 4k zero
page.  This will allow all sleeping threads to successfully transition from
locking based on the DAX empty entry to locking on the 4k zero page.

With the test case reported by Xiong this happens very regularly in my test
setup, with some runs resulting in 9+ threads in this deadlocked state.
With this fix I've been able to run that same test dozens of times in a
loop without issue.

Signed-off-by: Ross Zwisler 
Reported-by: Xiong Zhou 
Fixes: commit ac401cc78242 ("dax: New fault locking")
Cc: Jan Kara 
Cc: sta...@vger.kernel.org # 4.7+
---

This issue exists as far back as v4.7, and I was easly able to reproduce it
with v4.7 using the same test.

Unfortunately this patch won't apply cleanly to the stable trees, but the
change is very simple and should be easy to replicate by hand.  Please ping
me if you'd like patches that apply cleanly to the v4.9 and v4.8.15 trees.

---
 mm/filemap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index d0e4d10..b772a33 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -138,7 +138,7 @@ static int page_cache_tree_insert(struct address_space 
*mapping,
dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
/* Wakeup waiters for exceptional entry lock */
dax_wake_mapping_entry_waiter(mapping, page->index, p,
- false);
+ true);
}
}
__radix_tree_replace(&mapping->page_tree, node, slot, page,
-- 
2.7.4

Re: [PATCH] drivers/virt: use get_user_pages_unlocked()

2017-01-03 Thread Michal Hocko

On Tue 03-01-17 21:14:20, Lorenzo Stoakes wrote:
> Just a gentle ping on this :) I think this might be a slightly
> abandoned corner of the kernel so not sure who else to ping to get
> this moving.

Maybe Andrew can pick it up?
http://lkml.kernel.org/r/20161101194332.23961-1-lstoa...@gmail.com

> On 1 November 2016 at 19:43, Lorenzo Stoakes  wrote:
> > Moving from get_user_pages() to get_user_pages_unlocked() simplifies the 
> > code
> > and takes advantage of VM_FAULT_RETRY functionality when faulting in pages.
> >
> > Signed-off-by: Lorenzo Stoakes 
> > ---
> >  drivers/virt/fsl_hypervisor.c | 7 ++-
> >  1 file changed, 2 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
> > index 150ce2a..d3eca87 100644
> > --- a/drivers/virt/fsl_hypervisor.c
> > +++ b/drivers/virt/fsl_hypervisor.c
> > @@ -243,11 +243,8 @@ static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy 
> > __user *p)
> > sg_list = PTR_ALIGN(sg_list_unaligned, sizeof(struct fh_sg_list));
> >
> > /* Get the physical addresses of the source buffer */
> > -   down_read(¤t->mm->mmap_sem);
> > -   num_pinned = get_user_pages(param.local_vaddr - lb_offset,
> > -   num_pages, (param.source == -1) ? 0 : FOLL_WRITE,
> > -   pages, NULL);
> > -   up_read(¤t->mm->mmap_sem);
> > +   num_pinned = get_user_pages_unlocked(param.local_vaddr - lb_offset,
> > +   num_pages, pages, (param.source == -1) ? 0 : FOLL_WRITE);
> >
> > if (num_pinned != num_pages) {
> > /* get_user_pages() failed */
> > --
> > 2.10.2
> >
> 
> 
> 
> -- 
> Lorenzo Stoakes
> https://ljs.io

-- 
Michal Hocko
SUSE Labs

Re: [tpmdd-devel] [PATCH RFC 0/4] RFC: in-kernel resource manager

2017-01-03 Thread Jason Gunthorpe

On Mon, Jan 02, 2017 at 08:36:20AM -0800, James Bottomley wrote:
> On Mon, 2017-01-02 at 15:22 +0200, Jarkko Sakkinen wrote:
> > This patch set adds support for TPM spaces that provide a context
> > for isolating and swapping transient objects. This patch set does
> > not yet include support for isolating policy and HMAC sessions but
> > it is trivial to add once the basic approach is settled (and that's
> > why I created an RFC patch set).
> 
> The approach looks fine to me.  The only basic query I have is about
> the default: shouldn't it be with resource manager on rather than off? 
>  I can't really think of a use case that wants the RM off (even if
> you're running your own, having another doesn't hurt anything, and it's
> still required to share with in-kernel uses).

I haven't looked too closely at TPM 2.0 stuff, but at least for 1.2 we
should have a kernel white-list of allowed commands within a RM
context, so having the RM on by default would break all of the user
space.

I really think the only way forward here is a new char dev that is
safe for unprivileged/concurrent use and migrate the user space stack
to use it instead.

> And with that, I've TPM 2 enabled both gnome-keyring and openssl:
> 
> https://build.opensuse.org/package/show/home:jejb1:Tumbleweed/gnome-keyring
> https://build.opensuse.org/package/show/home:jejb1:Tumbleweed/openssl_tpm_engine
 
> I'm running them in production on my day to day laptop and so far
> everything's working nicely (better than 1.2, in fact, since tcsd
> periodically crashes necessitating a restart of everything).

You granted your unprivileged user access to /dev/tpm0 then? FYI I
think that is a dangerous idea..

Jason

Re: [RFC v4 0/6] CPU reclaiming for SCHED_DEADLINE

2017-01-03 Thread luca abeni

Hi Daniel,
(sorry for the previous html email; I replied from my phone and I did
not realise how the email client was configured)

On Tue, 3 Jan 2017 19:58:38 +0100
Daniel Bristot de Oliveira  wrote:

[...]
> > The implemented CPU reclaiming algorithm is based on tracking the
> > utilization U_act of active tasks (first 2 patches), and modifying
> > the runtime accounting rule (see patch 0004). The original GRUB
> > algorithm is modified as described in [2] to support multiple CPUs
> > (the original algorithm only considered one single CPU, this one
> > tracks U_act per runqueue) and to leave an "unreclaimable" fraction
> > of CPU time to non SCHED_DEADLINE tasks (see patch 0005: the
> > original algorithm can consume 100% of the CPU time, starving all
> > the other tasks). Patch 0003 uses the newly introduced "inactive
> > timer" (introduced in patch 0002) to fix dl_overflow() and
> > __setparam_dl(). Patch 0006 allows to enable CPU reclaiming only on
> > selected tasks.  
> 
> Hi,
> 
> Today I did some tests in this patch set. Unfortunately, it seems that
> there is a problem :-(.
[...]
I reproduced this issue; thanks for the report. It seems to be due to
the fact that the reclaiming tasks are more than the CPU cores and the
load is very high (near to the utilisation limit).

I am investigating it, and will hopefully post an update in the next
days.



Thanks,
Luca


> 
> In a four core box, if I dispatch 11 tasks [1] with setup:
> 
>   period = 30 ms
>   runtime = 10 ms
>   flags = 0 (GRUB disabled)
> 
> I see this:
> --- HTOP
>  1
> [|92.5%]   Tasks: 128, 259 thr; 14 running 2
> [|91.0%]   Load average: 4.65 4.66 4.81 3
> [|92.5%]   Uptime: 05:12:43 4
> [|92.5%] Mem[|||1.13G/3.78G]
>   Swp[  0K/3.90G]
> 
>   PID USER  PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
> 16247 root  -101   0  4204   632   564 R 32.4  0.0  2:10.35 d
> 16249 root-101   0  4204   624   556 R 32.4  0.0  2:09.80 d
> 16250 root-101   0  4204   728   660 R 32.4  0.0  2:09.58 d
> 16252 root-101   0  4204   676   608 R 32.4  0.0  2:09.08 d
> 16253 root-101   0  4204   636   568 R 32.4  0.0  2:08.85 d
> 16254 root  -101   0  4204   732   664 R 32.4  0.0  2:08.62 d
> 16255 root-101   0  4204   620   556 R 32.4  0.0  2:08.40 d
> 16257 root-101   0  4204   708   640 R 32.4  0.0  2:07.98 d
> 16256 root-101   0  4204   624   560 R 32.4  0.0  2:08.18 d
> 16248 root-101   0  4204   680   612 R 33.0  0.0  2:10.15 d
> 16251 root-101   0  4204   676   608 R 33.0  0.0  2:09.34 d
> 16259 root   20   0  124M  4692  3120 R  1.1  0.1  0:02.82 htop
>  2191 bristot20   0  649M 41312 32048 S  0.0  1.0  0:28.77
> gnome-ter --- HTOP
> 
> 
> All tasks are using +- the same amount of CPU time, a little bit more
> than 30%, as expected. However, if I enable GRUB in the same task set
> I get this:
> 
> --- HTOP
>  1
> [|93.8%]   Tasks: 128, 260 thr; 15 running 2
> [|95.2%]   Load average: 5.13 5.01 4.98 3
> [|93.3%]   Uptime: 05:01:02 4
> [|96.4%] Mem[|||1.13G/3.78G]
>   Swp[  0K/3.90G]
> 
>   PID USER  PRI  NI  VIRT   RES   SHR S CPU% MEM%   TIME+  Command
> 14967 root  -101   0  4204   628   564 R 45.8  0.0  1h07:49 g
> 14962 root-101   0  4204   728   660 R 45.8  0.0  1h05:06 g
> 14959 root-101   0  4204   680   612 R 45.2  0.0  1h07:29 g
> 14927 root-101   0  4204   624   556 R 44.6  0.0  1h04:30 g
> 14928 root-101   0  4204   656   588 R 31.1  0.0 47:37.21 g
> 14961 root-101   0  4204   684   616 R 31.1  0.0 47:19.75 g
> 14968 root-101   0  4204   636   568 R 31.1  0.0 46:27.36 g
> 14960 root-101   0  4204   684   616 R 23.8  0.0 37:31.06 g
> 14969 root-101   0  4204   684   616 R 23.8  0.0 38:11.50 g
> 14925 root-101   0  4204   636   568 R 23.8  0.0 37:34.88 g
> 14926 root-101   0  4204   684   616 R 23.8  0.0 38:27.37 g
> 16182 root 20   0  124M  3972  3212 R  0.6  0.1  0:00.23 htop
>   862 root   20   0  264M  5668  4832 S  0.6  0.1  0:03.30
> iio-sensor 2191 bristot20   0  649M 41312 32048 S  0.0  1.0
> 0:27.62 gnome-term 588 root   20   0  257M  121M  120M S  0.0
> 3.1  0:13.53 systemd-jo --- HTOP
> 
> 
> Some tasks start to use more CPU time, while others seems to use less
> CPU than it was reserved for them. See the task 14926, it is using
> only 23.8 % of the CPU, which is less than its 10/30 reservation.
> 
> I traced this task activation and noticed this:
> 
>  swapper 0 [003] 14968.332

Re: [PATCH 0/2] Begin auditing SECCOMP_RET_ERRNO return actions

2017-01-03 Thread Paul Moore

On Tue, Jan 3, 2017 at 4:21 PM, Kees Cook  wrote:
> On Tue, Jan 3, 2017 at 1:13 PM, Paul Moore  wrote:
>> On Tue, Jan 3, 2017 at 4:03 PM, Kees Cook  wrote:
>>> On Tue, Jan 3, 2017 at 12:54 PM, Paul Moore  wrote:
 On Tue, Jan 3, 2017 at 3:44 PM, Kees Cook  wrote:
> I still wonder, though, isn't there a way to use auditctl to get all
> the seccomp messages you need?

 Not all of the seccomp actions are currently logged, that's one of the
 problems (and the biggest at the moment).
>>>
>>> Well... sort of. It all gets passed around, but the logic isn't very
>>> obvious (or at least I always have to go look it up).
>>
>> Last time I checked SECCOMP_RET_ALLOW wasn't logged (as well as at
>> least one other action, but I can't remember which off the top of my
>> head)?
>
> Sure, but if you're using audit, you don't need RET_ALLOW to be logged
> because you'll get a full syscall log entry. Logging RET_ALLOW is
> redundant and provides no new information, it seems to me.

I only bring this up as it might be a way to help solve the
SECCOMP_RET_AUDIT problem that Tyler mentioned.

-- 
paul moore
www.paul-moore.com

[PATCH 6/6 linux-next] fs/affs/namei.c: forward declarations clean-up

2017-01-03 Thread Fabian Frederick

move dentry_operations structures and remove
forward declarations

Signed-off-by: Fabian Frederick 
---
 fs/affs/namei.c | 30 ++
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 906ff5b..fb88446 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -13,26 +13,6 @@
 
 typedef int (*toupper_t)(int);
 
-static int  affs_toupper(int ch);
-static int  affs_hash_dentry(const struct dentry *, struct qstr *);
-static int   affs_compare_dentry(const struct dentry *dentry,
-   unsigned int len, const char *str, const struct qstr *name);
-static int  affs_intl_toupper(int ch);
-static int  affs_intl_hash_dentry(const struct dentry *, struct qstr *);
-static int   affs_intl_compare_dentry(const struct dentry *dentry,
-   unsigned int len, const char *str, const struct qstr *name);
-
-const struct dentry_operations affs_dentry_operations = {
-   .d_hash = affs_hash_dentry,
-   .d_compare  = affs_compare_dentry,
-};
-
-const struct dentry_operations affs_intl_dentry_operations = {
-   .d_hash = affs_intl_hash_dentry,
-   .d_compare  = affs_intl_compare_dentry,
-};
-
-
 /* Simple toupper() for DOS\1 */
 
 static int
@@ -505,3 +485,13 @@ const struct export_operations affs_export_ops = {
.fh_to_dentry = affs_fh_to_dentry,
.fh_to_parent = affs_fh_to_parent,
 };
+
+const struct dentry_operations affs_dentry_operations = {
+   .d_hash = affs_hash_dentry,
+   .d_compare  = affs_compare_dentry,
+};
+
+const struct dentry_operations affs_intl_dentry_operations = {
+   .d_hash = affs_intl_hash_dentry,
+   .d_compare  = affs_intl_compare_dentry,
+};
-- 
2.7.4

[PATCH 5/6 linux-next] fs/affs: add prefix to some functions

2017-01-03 Thread Fabian Frederick

secs_to_datestamp(time64_t secs, struct affs_date *ds);
prot_to_mode(u32 prot);
mode_to_prot(struct inode *inode);

were declared without affs_ prefix

Signed-off-by: Fabian Frederick 
---
 fs/affs/affs.h | 6 +++---
 fs/affs/amigaffs.c | 6 +++---
 fs/affs/inode.c| 9 +
 fs/affs/namei.c| 6 +++---
 fs/affs/super.c| 2 +-
 5 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 1b55428..2f8bab3 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -138,9 +138,9 @@ extern int  affs_remove_hash(struct inode *dir, struct 
buffer_head *rem_bh);
 extern int affs_remove_header(struct dentry *dentry);
 extern u32 affs_checksum_block(struct super_block *sb, struct buffer_head 
*bh);
 extern voidaffs_fix_checksum(struct super_block *sb, struct buffer_head 
*bh);
-extern voidsecs_to_datestamp(time64_t secs, struct affs_date *ds);
-extern umode_t prot_to_mode(u32 prot);
-extern voidmode_to_prot(struct inode *inode);
+extern voidaffs_secs_to_datestamp(time64_t secs, struct affs_date *ds);
+extern umode_t affs_prot_to_mode(u32 prot);
+extern voidaffs_mode_to_prot(struct inode *inode);
 __printf(3, 4)
 extern voidaffs_error(struct super_block *sb, const char *function,
   const char *fmt, ...);
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index fd7a754..b573c3b 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -367,7 +367,7 @@ affs_fix_checksum(struct super_block *sb, struct 
buffer_head *bh)
 }
 
 void
-secs_to_datestamp(time64_t secs, struct affs_date *ds)
+affs_secs_to_datestamp(time64_t secs, struct affs_date *ds)
 {
u32  days;
u32  minute;
@@ -386,7 +386,7 @@ secs_to_datestamp(time64_t secs, struct affs_date *ds)
 }
 
 umode_t
-prot_to_mode(u32 prot)
+affs_prot_to_mode(u32 prot)
 {
umode_t mode = 0;
 
@@ -413,7 +413,7 @@ prot_to_mode(u32 prot)
 }
 
 void
-mode_to_prot(struct inode *inode)
+affs_mode_to_prot(struct inode *inode)
 {
u32 prot = AFFS_I(inode)->i_protect;
umode_t mode = inode->i_mode;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index fe4e129..a5e6097 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -69,7 +69,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long 
ino)
if (affs_test_opt(sbi->s_flags, SF_SETMODE))
inode->i_mode = sbi->s_mode;
else
-   inode->i_mode = prot_to_mode(prot);
+   inode->i_mode = affs_prot_to_mode(prot);
 
id = be16_to_cpu(tail->uid);
if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETUID))
@@ -184,11 +184,12 @@ affs_write_inode(struct inode *inode, struct 
writeback_control *wbc)
}
tail = AFFS_TAIL(sb, bh);
if (tail->stype == cpu_to_be32(ST_ROOT)) {
-   secs_to_datestamp(inode->i_mtime.tv_sec,&AFFS_ROOT_TAIL(sb, 
bh)->root_change);
+   affs_secs_to_datestamp(inode->i_mtime.tv_sec,
+  &AFFS_ROOT_TAIL(sb, bh)->root_change);
} else {
tail->protect = cpu_to_be32(AFFS_I(inode)->i_protect);
tail->size = cpu_to_be32(inode->i_size);
-   secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change);
+   affs_secs_to_datestamp(inode->i_mtime.tv_sec, &tail->change);
if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) {
uid = i_uid_read(inode);
gid = i_gid_read(inode);
@@ -249,7 +250,7 @@ affs_notify_change(struct dentry *dentry, struct iattr 
*attr)
mark_inode_dirty(inode);
 
if (attr->ia_valid & ATTR_MODE)
-   mode_to_prot(inode);
+   affs_mode_to_prot(inode);
 out:
return error;
 }
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 04c3156f..906ff5b 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -272,7 +272,7 @@ affs_create(struct inode *dir, struct dentry *dentry, 
umode_t mode, bool excl)
return -ENOSPC;
 
inode->i_mode = mode;
-   mode_to_prot(inode);
+   affs_mode_to_prot(inode);
mark_inode_dirty(inode);
 
inode->i_op = &affs_file_inode_operations;
@@ -302,7 +302,7 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, 
umode_t mode)
return -ENOSPC;
 
inode->i_mode = S_IFDIR | mode;
-   mode_to_prot(inode);
+   affs_mode_to_prot(inode);
 
inode->i_op = &affs_dir_inode_operations;
inode->i_fop = &affs_dir_operations;
@@ -348,7 +348,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, 
const char *symname)
inode_nohighmem(inode);
inode->i_data.a_ops = &affs_symlink_aops;
inode->i_mode = S_IFLNK | 0777;
-   mode_to_prot(inode);
+   affs_mode_to_prot(inode);
 
error = -EIO;
bh = affs_bread(sb, inode->i_ino);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 98bd952..37532538 100644
--- a/fs/affs/super.c
+++ b

Re: [PATCH] rtc: armada38x: add __ro_after_init to armada38x_rtc_ops

2017-01-03 Thread Russell King - ARM Linux

On Tue, Jan 03, 2017 at 01:18:29PM -0800, Kees Cook wrote:
> On Mon, Jan 2, 2017 at 6:06 AM, Russell King - ARM Linux
>  wrote:
> > On Mon, Dec 26, 2016 at 05:01:02PM +0530, Bhumika Goyal wrote:
> >> The object armada38x_rtc_ops of type rtc_class_ops structure is not
> >> modified after getting initialized by armada38x_rtc_probe. Apart from
> >> getting referenced in init it is also passed as an argument to the function
> >> devm_rtc_device_register but this argument is of type const struct
> >> rtc_class_ops *. Therefore add __ro_after_init to its declaration.
> >
> > What I'd prefer here is for the structure to be duplicated, with one
> > copy having the alarm methods and one which does not.  Both can then
> > be made "const" (so placed into the read-only section at link time)
> > and the probe function select between the two.
> >
> > I think that's a cleaner and better solution, even though it's
> > slightly larger.
> >
> > I'm not a fan of __ro_after_init being used where other solutions are
> > possible.
> 
> Can the pointer that points to the struct rtc_class_ops be made ro_after_init?

It's passed into the RTC core code, and probably stored in some dynamically
allocated object, so probably no.  It's the same class of problem as every
file_operations pointer in the kernel, or the thousand other operations
structure pointers that a running kernel has.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

[PATCH 2/6 linux-next] fs/affs: add validation block function

2017-01-03 Thread Fabian Frederick

avoid repeating 4 times the same calculation.

Signed-off-by: Fabian Frederick 
---
 fs/affs/affs.h | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 899256b..efe6839 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -212,6 +212,12 @@ extern const struct address_space_operations
affs_aops_ofs;
 extern const struct dentry_operations   affs_dentry_operations;
 extern const struct dentry_operations   affs_intl_dentry_operations;
 
+static inline bool affs_validblock(struct super_block *sb, int block)
+{
+   return(block >= AFFS_SB(sb)->s_reserved &&
+  block < AFFS_SB(sb)->s_partition_size);
+}
+
 static inline void
 affs_set_blocksize(struct super_block *sb, int size)
 {
@@ -221,7 +227,7 @@ static inline struct buffer_head *
 affs_bread(struct super_block *sb, int block)
 {
pr_debug("%s: %d\n", __func__, block);
-   if (block >= AFFS_SB(sb)->s_reserved && block < 
AFFS_SB(sb)->s_partition_size)
+   if (affs_validblock(sb, block))
return sb_bread(sb, block);
return NULL;
 }
@@ -229,7 +235,7 @@ static inline struct buffer_head *
 affs_getblk(struct super_block *sb, int block)
 {
pr_debug("%s: %d\n", __func__, block);
-   if (block >= AFFS_SB(sb)->s_reserved && block < 
AFFS_SB(sb)->s_partition_size)
+   if (affs_validblock(sb, block))
return sb_getblk(sb, block);
return NULL;
 }
@@ -238,7 +244,7 @@ affs_getzeroblk(struct super_block *sb, int block)
 {
struct buffer_head *bh;
pr_debug("%s: %d\n", __func__, block);
-   if (block >= AFFS_SB(sb)->s_reserved && block < 
AFFS_SB(sb)->s_partition_size) {
+   if (affs_validblock(sb, block)) {
bh = sb_getblk(sb, block);
lock_buffer(bh);
memset(bh->b_data, 0 , sb->s_blocksize);
@@ -253,7 +259,7 @@ affs_getemptyblk(struct super_block *sb, int block)
 {
struct buffer_head *bh;
pr_debug("%s: %d\n", __func__, block);
-   if (block >= AFFS_SB(sb)->s_reserved && block < 
AFFS_SB(sb)->s_partition_size) {
+   if (affs_validblock(sb, block)) {
bh = sb_getblk(sb, block);
wait_on_buffer(bh);
set_buffer_uptodate(bh);
-- 
2.7.4

[PATCH 3/6 linux-next] fs/affs: make affs exportable

2017-01-03 Thread Fabian Frederick

Add standard functions making AFFS work with NFS.

Functions based on ext4 implementation.
Tested on loop device.

Signed-off-by: Fabian Frederick 
---
 fs/affs/affs.h  |  1 +
 fs/affs/namei.c | 40 
 fs/affs/super.c |  1 +
 3 files changed, 42 insertions(+)

diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index efe6839..1b55428 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -162,6 +162,7 @@ extern void affs_free_bitmap(struct super_block *sb);
 
 /* namei.c */
 
+extern const struct export_operations affs_export_ops;
 extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned 
int len);
 extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, 
unsigned int);
 extern int affs_unlink(struct inode *dir, struct dentry *dentry);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 29186d2..04c3156f 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -9,6 +9,7 @@
  */
 
 #include "affs.h"
+#include 
 
 typedef int (*toupper_t)(int);
 
@@ -465,3 +466,42 @@ affs_rename(struct inode *old_dir, struct dentry 
*old_dentry,
affs_brelse(bh);
return retval;
 }
+
+static struct inode *affs_nfs_get_inode(struct super_block *sb, u64 ino,
+   u32 generation)
+{
+   struct inode *inode;
+
+   if (!affs_validblock(sb, ino))
+   return ERR_PTR(-ESTALE);
+
+   inode = affs_iget(sb, ino);
+   if (IS_ERR(inode))
+   return ERR_CAST(inode);
+
+   if (generation && inode->i_generation != generation) {
+   iput(inode);
+   return ERR_PTR(-ESTALE);
+   }
+
+   return inode;
+}
+
+static struct dentry *affs_fh_to_dentry(struct super_block *sb, struct fid 
*fid,
+   int fh_len, int fh_type)
+{
+   return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+   affs_nfs_get_inode);
+}
+
+static struct dentry *affs_fh_to_parent(struct super_block *sb, struct fid 
*fid,
+   int fh_len, int fh_type)
+{
+   return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+   affs_nfs_get_inode);
+}
+
+const struct export_operations affs_export_ops = {
+   .fh_to_dentry = affs_fh_to_dentry,
+   .fh_to_parent = affs_fh_to_parent,
+};
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d638486..98bd952 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -507,6 +507,7 @@ static int affs_fill_super(struct super_block *sb, void 
*data, int silent)
return -ENOMEM;
}
 
+   sb->s_export_op = &affs_export_ops;
pr_debug("s_flags=%lX\n", sb->s_flags);
return 0;
 }
-- 
2.7.4

[PATCH 4/6 linux-next] fs/affs: use octal for permissions

2017-01-03 Thread Fabian Frederick

According to commit f90774e1fd27
("checkpatch: look for symbolic permissions and suggest octal instead")

Signed-off-by: Fabian Frederick 
---
 fs/affs/amigaffs.c | 36 ++--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 0ec65c1..fd7a754 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -391,23 +391,23 @@ prot_to_mode(u32 prot)
umode_t mode = 0;
 
if (!(prot & FIBF_NOWRITE))
-   mode |= S_IWUSR;
+   mode |= 0200;
if (!(prot & FIBF_NOREAD))
-   mode |= S_IRUSR;
+   mode |= 0400;
if (!(prot & FIBF_NOEXECUTE))
-   mode |= S_IXUSR;
+   mode |= 0100;
if (prot & FIBF_GRP_WRITE)
-   mode |= S_IWGRP;
+   mode |= 0020;
if (prot & FIBF_GRP_READ)
-   mode |= S_IRGRP;
+   mode |= 0040;
if (prot & FIBF_GRP_EXECUTE)
-   mode |= S_IXGRP;
+   mode |= 0010;
if (prot & FIBF_OTR_WRITE)
-   mode |= S_IWOTH;
+   mode |= 0002;
if (prot & FIBF_OTR_READ)
-   mode |= S_IROTH;
+   mode |= 0004;
if (prot & FIBF_OTR_EXECUTE)
-   mode |= S_IXOTH;
+   mode |= 0001;
 
return mode;
 }
@@ -418,23 +418,23 @@ mode_to_prot(struct inode *inode)
u32 prot = AFFS_I(inode)->i_protect;
umode_t mode = inode->i_mode;
 
-   if (!(mode & S_IXUSR))
+   if (!(mode & 0100))
prot |= FIBF_NOEXECUTE;
-   if (!(mode & S_IRUSR))
+   if (!(mode & 0400))
prot |= FIBF_NOREAD;
-   if (!(mode & S_IWUSR))
+   if (!(mode & 0200))
prot |= FIBF_NOWRITE;
-   if (mode & S_IXGRP)
+   if (mode & 0010)
prot |= FIBF_GRP_EXECUTE;
-   if (mode & S_IRGRP)
+   if (mode & 0040)
prot |= FIBF_GRP_READ;
-   if (mode & S_IWGRP)
+   if (mode & 0020)
prot |= FIBF_GRP_WRITE;
-   if (mode & S_IXOTH)
+   if (mode & 0001)
prot |= FIBF_OTR_EXECUTE;
-   if (mode & S_IROTH)
+   if (mode & 0004)
prot |= FIBF_OTR_READ;
-   if (mode & S_IWOTH)
+   if (mode & 0002)
prot |= FIBF_OTR_WRITE;
 
AFFS_I(inode)->i_protect = prot;
-- 
2.7.4

[PATCH 1/6 linux-next] fs/affs: remove reference to affs_parent_ino()

2017-01-03 Thread Fabian Frederick

That function was removed a long time ago.

Signed-off-by: Fabian Frederick 
---
 fs/affs/affs.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 2f08877..899256b 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -178,7 +178,6 @@ extern int  affs_rename(struct inode *old_dir, struct 
dentry *old_dentry,
 
 /* inode.c */
 
-extern unsigned longaffs_parent_ino(struct inode *dir);
 extern struct inode*affs_new_inode(struct inode *dir);
 extern int  affs_notify_change(struct dentry *dentry, 
struct iattr *attr);
 extern void affs_evict_inode(struct inode *inode);
-- 
2.7.4

[PATCH 0/6 linux-next] affs: make FS exportable plus some clean-up

2017-01-03 Thread Fabian Frederick

This small patchset makes AFFS work with NFS for standard operations.

Fabian Frederick (6):
  fs/affs: remove reference to affs_parent_ino()
  fs/affs: add validation block function
  fs/affs: make affs exportable
  fs/affs: use octal for permissions
  fs/affs: add prefix to some functions
  fs/affs/namei.c: forward declarations clean-up

 fs/affs/affs.h | 22 ++--
 fs/affs/amigaffs.c | 42 +++---
 fs/affs/inode.c|  9 ---
 fs/affs/namei.c| 76 +-
 fs/affs/super.c|  3 ++-
 5 files changed, 95 insertions(+), 57 deletions(-)

-- 
2.7.4

Re: [PATCH] i2c: i801: Register optional lis3lv02d i2c device on Dell machines

2017-01-03 Thread Benjamin Tissoires

On Jan 03 2017 or thereabouts, Dmitry Torokhov wrote:
> On Tue, Jan 03, 2017 at 07:50:17PM +0100, Pali Rohár wrote:
> > On Tuesday 03 January 2017 19:38:43 Dmitry Torokhov wrote:
> > > On Tue, Jan 03, 2017 at 10:06:41AM +0100, Benjamin Tissoires wrote:
> > > > On Dec 29 2016 or thereabouts, Pali Rohár wrote:
> > > > > On Thursday 29 December 2016 22:09:32 Michał Kępień wrote:
> > > > > > > On Thursday 29 December 2016 14:47:19 Michał Kępień wrote:
> > > > > > > > > On Thursday 29 December 2016 09:29:36 Michał Kępień wrote:
> > > > > > > > > > > Dell platform team told us that some (DMI
> > > > > > > > > > > whitelisted) Dell Latitude machines have ST
> > > > > > > > > > > microelectronics accelerometer at i2c address 0x29.
> > > > > > > > > > > That i2c address is not specified in DMI or ACPI, so
> > > > > > > > > > > runtime detection without whitelist which is below
> > > > > > > > > > > is not possible.
> > > > > > > > > > > 
> > > > > > > > > > > Presence of that ST microelectronics accelerometer is
> > > > > > > > > > > verified by existence of SMO88xx ACPI device which
> > > > > > > > > > > represent that accelerometer. Unfortunately without
> > > > > > > > > > > i2c address.
> > > > > > > > > > 
> > > > > > > > > > This part of the commit message sounded a bit confusing
> > > > > > > > > > to me at first because there is already an ACPI driver
> > > > > > > > > > which handles SMO88xx
> > > > > > > > > > 
> > > > > > > > > > devices (dell-smo8800).  My understanding is that:
> > > > > > > > > >   * the purpose of this patch is to expose a richer
> > > > > > > > > >   interface (as
> > > > > > > > > >   
> > > > > > > > > > provided by lis3lv02d) to these devices on some
> > > > > > > > > > machines,
> > > > > > > > > >   
> > > > > > > > > >   * on whitelisted machines, dell-smo8800 and lis3lv02d
> > > > > > > > > >   can work
> > > > > > > > > >   
> > > > > > > > > > simultaneously (even though dell-smo8800
> > > > > > > > > > effectively duplicates the work that lis3lv02d
> > > > > > > > > > does).
> > > > > > > > > 
> > > > > > > > > No. dell-smo8800 reads from ACPI irq number and exports
> > > > > > > > > /dev/freefall device which notify userspace about falls.
> > > > > > > > > lis3lv02d is i2c driver which exports axes of
> > > > > > > > > accelerometer. Additionaly lis3lv02d can export also
> > > > > > > > > /dev/freefall if registerer of i2c device provides irq
> > > > > > > > > number -- which is not case of this patch.
> > > > > > > > > 
> > > > > > > > > So both drivers are doing different things and both are
> > > > > > > > > useful.
> > > > > > > > > 
> > > > > > > > > IIRC both dell-smo8800 and lis3lv02d represent one HW
> > > > > > > > > device (that ST microelectronics accelerometer) but due
> > > > > > > > > to complicated HW abstraction and layers on Dell laptops
> > > > > > > > > it is handled by two drivers, one ACPI and one i2c.
> > > > > > > > > 
> > > > > > > > > Yes, in ideal world irq number should be passed to
> > > > > > > > > lis3lv02d driver and that would export whole device
> > > > > > > > > (with /dev/freefall too), but due to HW abstraction it
> > > > > > > > > is too much complicated...
> > > > > > > > 
> > > > > > > > Why?  AFAICT, all that is required to pass that IRQ number
> > > > > > > > all the way down to lis3lv02d is to set the irq field of
> > > > > > > > the struct i2c_board_info you are passing to
> > > > > > > > i2c_new_device().  And you can extract that IRQ number
> > > > > > > > e.g. in check_acpi_smo88xx_device(). However, you would
> > > > > > > > then need to make sure dell-smo8800 does not attempt to
> > > > > > > > request the same IRQ on whitelisted machines.  This got me
> > > > > > > > thinking about a way to somehow incorporate your changes
> > > > > > > > into dell-smo8800 using Wolfram's bus_notifier suggestion,
> > > > > > > > but I do not have a working solution for now.  What is
> > > > > > > > tempting about this approach is that you would not have to
> > > > > > > > scan the ACPI namespace in search of SMO88xx devices,
> > > > > > > > because smo8800_add() is automatically called for them. 
> > > > > > > > However, I fear that the resulting solution may be more
> > > > > > > > complicated than the one you submitted.
> > > > > > > 
> > > > > > > Then we need to deal with lot of problems. Order of loading
> > > > > > > .ko modules is undefined. Binding devices to drivers
> > > > > > > registered by .ko module is also in "random" order. At any
> > > > > > > time any of those .ko module can be unloaded or at least
> > > > > > > device unbind (via sysfs) from driver... And there can be
> > > > > > > some pathological situation (thanks to adding ACPI layer as
> > > > > > > Andy pointed) that there will be more SMO88xx devices in
> > > > > > > ACPI. Plus you can compile kernel with and without those
> > > > > > > modules and also you can blacklist loading them (so compile
> > > > > > > time check is not enough). And still

RE: [PATCH 7/8] staging: fsl-dpaa2/eth: Add TODO file

2017-01-03 Thread Stuart Yoder


> -Original Message-
> From: Greg KH [mailto:gre...@linuxfoundation.org]
> Sent: Tuesday, January 03, 2017 10:48 AM
> To: Stuart Yoder 
> Cc: de...@driverdev.osuosl.org; a...@arndb.de; Roy Pledge 
> ; Alexandru Marginean
> ; linux-kernel@vger.kernel.org; ag...@suse.de; 
> Bogdan Hamciuc
> ; Laurentiu Tudor 
> Subject: Re: [PATCH 7/8] staging: fsl-dpaa2/eth: Add TODO file
> 
> On Tue, Dec 06, 2016 at 06:10:38PM +, Stuart Yoder wrote:
> >
> >
> > > -Original Message-
> > > From: Greg KH [mailto:gre...@linuxfoundation.org]
> > > Sent: Tuesday, December 06, 2016 11:56 AM
> > > To: Stuart Yoder 
> > > Cc: Ruxandra Ioana Radulescu ; 
> > > de...@driverdev.osuosl.org; linux-
> > > ker...@vger.kernel.org; ag...@suse.de; a...@arndb.de; Alexandru Marginean
> ;
> > > Bogdan Hamciuc ; Roy Pledge ; 
> > > Laurentiu Tudor
> > > 
> > > Subject: Re: [PATCH 7/8] staging: fsl-dpaa2/eth: Add TODO file
> > >
> > > On Tue, Dec 06, 2016 at 12:59:59PM +, Stuart Yoder wrote:
> > > >
> > > >
> > > > > -Original Message-
> > > > > From: Greg KH [mailto:gre...@linuxfoundation.org]
> > > > > Sent: Tuesday, December 06, 2016 4:20 AM
> > > > > To: Ruxandra Ioana Radulescu 
> > > > > Cc: de...@driverdev.osuosl.org; linux-kernel@vger.kernel.org; 
> > > > > ag...@suse.de; a...@arndb.de;
> > > Alexandru
> > > > > Marginean ; Bogdan Hamciuc 
> > > > > ; Stuart Yoder
> > > > > ; Roy Pledge ; Laurentiu 
> > > > > Tudor
> 
> > > > > Subject: Re: [PATCH 7/8] staging: fsl-dpaa2/eth: Add TODO file
> > > > >
> > > > > On Tue, Dec 06, 2016 at 10:06:25AM +, Ruxandra Ioana Radulescu 
> > > > > wrote:
> > > > > > > -Original Message-
> > > > > > > From: Greg KH [mailto:gre...@linuxfoundation.org]
> > > > > > > Sent: Tuesday, December 06, 2016 11:58 AM
> > > > > > > To: Ruxandra Ioana Radulescu 
> > > > > > > Cc: de...@driverdev.osuosl.org; linux-kernel@vger.kernel.org;
> > > > > > > ag...@suse.de; a...@arndb.de; Alexandru Marginean
> > > > > > > ; Bogdan Hamciuc
> > > > > > > ; Stuart Yoder ; Roy
> > > > > > > Pledge ; Laurentiu Tudor
> > > > > > > 
> > > > > > > Subject: Re: [PATCH 7/8] staging: fsl-dpaa2/eth: Add TODO file
> > > > > > >
> > > > > > > On Tue, Dec 06, 2016 at 03:34:41AM -0600, Ioana Radulescu wrote:
> > > > > > > > Add a list of TODO items for the Ethernet driver
> > > > > > > >
> > > > > > > > Signed-off-by: Ioana Radulescu 
> > > > > > > > ---
> > > > > > > >  drivers/staging/fsl-dpaa2/ethernet/TODO |9 +
> > > > > > > >  1 files changed, 9 insertions(+), 0 deletions(-)
> > > > > > > >  create mode 100644 drivers/staging/fsl-dpaa2/ethernet/TODO
> > > > > > > >
> > > > > > > > diff --git a/drivers/staging/fsl-dpaa2/ethernet/TODO 
> > > > > > > > b/drivers/staging/fsl-
> > > > > > > dpaa2/ethernet/TODO
> > > > > > > > new file mode 100644
> > > > > > > > index 000..833265b
> > > > > > > > --- /dev/null
> > > > > > > > +++ b/drivers/staging/fsl-dpaa2/ethernet/TODO
> > > > > > > > @@ -0,0 +1,9 @@
> > > > > > > > +* Add a DPAA2 MAC kernel driver in order to allow PHY 
> > > > > > > > management;
> > > > > > > currently
> > > > > > > > +  the DPMAC objects and their link to DPNIs are handled by MC 
> > > > > > > > internally
> > > > > > > > +  and all PHYs are seen as fixed-link
> > > > > > > > +* add more debug support: decide how to expose detailed debug
> > > > > > > statistics,
> > > > > > > > +  add ingress error queue support
> > > > > > > > +* MC firmware uprev; the DPAA2 objects used by the Ethernet 
> > > > > > > > driver
> > > > > > > need to
> > > > > > > > +  be kept in sync with binary interface changes in MC
> > > > > > > > +* refine README file
> > > > > > > > +* cleanup
> > > > > > >
> > > > > > > These seem like very minor things, why not just spend a week and 
> > > > > > > do this
> > > > > > > work and get it merged to the "correct" portion of the kernel 
> > > > > > > tree?  Why
> > > > > > > does this have to go into staging?
> > > > > >
> > > > > > Actually the first bullet is not minor at all and requires some 
> > > > > > design
> > > > > > choices that we aren't yet completely clear with, and which in turn 
> > > > > > may
> > > > > > affect parts of the Ethernet driver. We figured it would be best to 
> > > > > > try
> > > > > > adding this in staging first (and also provide this way an example 
> > > > > > of using
> > > > > > the fsl-mc bus and dpio driver) than wait until all MAC development
> > > > > > questions are ironed-out.
> > > > >
> > > > > Ok, that makes sense.
> > > > >
> > > > > > I can remove the other bullets from the TODO list if you think 
> > > > > > they're
> > > > > > not worth mentioning.
> > > > >
> > > > > No, they should be mentioned, I just didn't think they are all that 
> > > > > much
> > > > > work, and if you didn't have major things needed to get done, you 
> > > > > could
> > > > > just knock it all out in a week of local development.
> > > > >
> > > > > I'll look into taking this into the tree later today...
> > > >
> >

Re: [PATCH v3 3/3] nfc: trf7970a: Prevent repeated polling from crashing the kernel

2017-01-03 Thread Mark Greer

On Tue, Jan 03, 2017 at 01:35:18PM -0500, Geoff Lansberry wrote:
> On Tue, Jan 3, 2017 at 11:33 AM, Mark Greer  wrote:
> > On Tue, Dec 27, 2016 at 09:18:32AM -0500, Geoff Lansberry wrote:

> >> In the meantime - here is some more info about how we use it.
> >>
> >> We do use NFC structures.I did find an interesting clue in that
> >> there are certain bottles that cause neard to segfault,  I'm not sure
> >> what is different about them.  We write a string, like
> >> "coppola_chardonnay_2015" to the bottles.
> >
> > Off the top of my head, it could be the length of the text.
> > It would be useful to compare the data that works to the data
> > that doesn't work.  Can you install NXP's 'TagInfo' app on a
> > smartphone and scan tags with working & non-working data?
> > You can email the data from the app to yourself, edit out
> > the cruft, and share here.
> 
> The data is always the same - and the tags are all the same.  Only
> difference is that the tag is physically different, and perhaps
> orientation; distance from antenna to tag is fixed.

Interesting...  They're all type 2 tags, right?

> I can't even
> write the tags at all, so reading them will show blank.   Also a minor
> but significant detail, is that the tags are embedded in such a way
> that the phone cannot get close enough to them to connect.

This section had me completely confused for a couple minutes until I realized
that you mean that you can read & write the tags using the trf7970a with
an attached antenna but not with your phone.  Is that correct?

If so, try a tag that isn't embedded in something else and move it around
the back of the phone.  Try to find where it works best.  The phone
manufacturers are notorius for paying little attention to the NFC antenna
they put on their products.  For example, I have a Samsung S5 next to me
and it seems to work best around the center of the phone.  I've used others
where I had to use the upper-left or upper-right corner of the phone.

Mark
--

Re: [PATCH 0/2] Begin auditing SECCOMP_RET_ERRNO return actions

2017-01-03 Thread Kees Cook

On Tue, Jan 3, 2017 at 1:13 PM, Paul Moore  wrote:
> On Tue, Jan 3, 2017 at 4:03 PM, Kees Cook  wrote:
>> On Tue, Jan 3, 2017 at 12:54 PM, Paul Moore  wrote:
>>> On Tue, Jan 3, 2017 at 3:44 PM, Kees Cook  wrote:
 I still wonder, though, isn't there a way to use auditctl to get all
 the seccomp messages you need?
>>>
>>> Not all of the seccomp actions are currently logged, that's one of the
>>> problems (and the biggest at the moment).
>>
>> Well... sort of. It all gets passed around, but the logic isn't very
>> obvious (or at least I always have to go look it up).
>
> Last time I checked SECCOMP_RET_ALLOW wasn't logged (as well as at
> least one other action, but I can't remember which off the top of my
> head)?

Sure, but if you're using audit, you don't need RET_ALLOW to be logged
because you'll get a full syscall log entry. Logging RET_ALLOW is
redundant and provides no new information, it seems to me.

-Kees

-- 
Kees Cook
Nexus Security

Re: [PATCH 2/2] isdn: i4l: move active-isdn drivers to staging

2017-01-03 Thread Arnd Bergmann

On Tuesday, January 3, 2017 4:24:36 PM CET Greg Kroah-Hartman wrote:
> On Wed, Mar 02, 2016 at 08:06:46PM +0100, Arnd Bergmann wrote:
> > The icn, act2000 and pcbit drivers are all for very old hardware,
> > and it is highly unlikely that anyone is actually still using them
> > on modern kernels, if at all.
> > 
> > All three drivers apparently are for hardware that predates PCI
> > being the common connector, as they are ISA-only and active
> > PCI ISDN cards were widely available in the 1990s.
> > 
> > Looking through the git logs, it I cannot find any indication of a
> > patch to any of these drivers that has been tested on real hardware,
> > only cleanups or global API changes.
> > 
> > Signed-off-by: Arnd Bergmann 
> > Acked-by: Karsten Keil 
> 
> This patch got added in the 4.6 kernel release.  As I am now taking
> patches for 4.11-rc1, I figure it is time to just delete the
> drivers/staging/i4l/ directory now, given that no one has really done
> anything with it.  If people show up that wish to maintain it, I'll be
> glad to revert it, or if someone really screams in the next week.
> Otherwise it's time to just move on 

Sounds good to me. My original series contained four more patches that
I did not post again after there was some concern[1] that we did not
come to a conclusion on:

isdn: gigaset: remove i4l code
isdn: move isdnhdlc out of i4l
isdn: i4l: move hisax driver to staging
isdn: move i4l to staging

I can post those as well, at least I think the first two are helpful
for untangling i4l from the rest of ISDN.  I also still think that
moving hisax and i4l to staging is reasonable given the state of
that code, even if there are a couple of users today.

Arnd

[1] https://lkml.org/lkml/2016/3/4/762

Re: [PATCH] kernel/watchdog.c: Do not hardcode CPU 0 as the initial thread

2017-01-03 Thread Prarit Bhargava



On 12/01/2016 03:06 PM, Don Zickus wrote:
> On Tue, Nov 29, 2016 at 08:15:21AM -0500, Prarit Bhargava wrote:
>> When CONFIG_BOOTPARAM_HOTPLUG_CPU0 is enabled, the socket containing the
>> boot cpu can be replaced.  During the hot add event, the message
>>
>> NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter.
>>
>> is output implying that the NMI watchdog was disabled at some point.  This
>> is not the case and the message has caused confusion for users of systems
>> that support the removal of the boot cpu socket.
>>
>> The watchdog code is coded to assume that cpu 0 is always the first cpu to
>> initialize the watchdog, and the last to stop its watchdog thread.  That
>> is not the case for initializing if cpu 0 has been removed and added.  The
>> removal case has never been correct because the smpboot code will remove
>> the watchdog threads starting with the lowest cpu number.
>>
>> This patch adds watchdog_cpus to track the number of cpus with active NMI
>> watchdog threads so that the first and last thread can be used to set and
>> clear the value of firstcpu_err.  firstcpu_err is set when the first
>> watchdog thread is enabled, and cleared when the last watchdog thread is
>> disabled.
>>
>> This patch is based on top of linux-next akpm-base.
> 
> It passed my tests.  Thanks!
> 
> Acked-by: Don Zickus 

Just re-pinging on this.  I haven't seen it picked up by anyone.

P.

> 
> 
>>
>> Signed-off-by: Prarit Bhargava 
>> Cc: Borislav Petkov 
>> Cc: Tejun Heo 
>> Cc: Don Zickus 
>> Cc: Hidehiro Kawai 
>> Cc: Thomas Gleixner 
>> Cc: Andi Kleen 
>> Cc: Joshua Hunt 
>> Cc: Ingo Molnar 
>> Cc: Babu Moger 
>> ---
>>  kernel/watchdog_hld.c |   25 +++--
>>  1 file changed, 15 insertions(+), 10 deletions(-)
>>
>> diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
>> index 84016c8aee6b..30761f7504ef 100644
>> --- a/kernel/watchdog_hld.c
>> +++ b/kernel/watchdog_hld.c
>> @@ -134,12 +134,14 @@ static void watchdog_overflow_callback(struct 
>> perf_event *event,
>>   * Reduce the watchdog noise by only printing messages
>>   * that are different from what cpu0 displayed.
>>   */
>> -static unsigned long cpu0_err;
>> +static unsigned long firstcpu_err;
>> +static atomic_t watchdog_cpus;
>>  
>>  int watchdog_nmi_enable(unsigned int cpu)
>>  {
>>  struct perf_event_attr *wd_attr;
>>  struct perf_event *event = per_cpu(watchdog_ev, cpu);
>> +int firstcpu = 0;
>>  
>>  /* nothing to do if the hard lockup detector is disabled */
>>  if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
>> @@ -153,19 +155,22 @@ int watchdog_nmi_enable(unsigned int cpu)
>>  if (event != NULL)
>>  goto out_enable;
>>  
>> +if (atomic_inc_return(&watchdog_cpus) == 1)
>> +firstcpu = 1;
>> +
>>  wd_attr = &wd_hw_attr;
>>  wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
>>  
>>  /* Try to register using hardware perf events */
>>  event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, 
>> watchdog_overflow_callback, NULL);
>>  
>> -/* save cpu0 error for future comparision */
>> -if (cpu == 0 && IS_ERR(event))
>> -cpu0_err = PTR_ERR(event);
>> +/* save the first cpu's error for future comparision */
>> +if (firstcpu && IS_ERR(event))
>> +firstcpu_err = PTR_ERR(event);
>>  
>>  if (!IS_ERR(event)) {
>> -/* only print for cpu0 or different than cpu0 */
>> -if (cpu == 0 || cpu0_err)
>> +/* only print for the first cpu initialized */
>> +if (firstcpu || firstcpu_err)
>>  pr_info("enabled on all CPUs, permanently consumes one 
>> hw-PMU counter.\n");
>>  goto out_save;
>>  }
>> @@ -183,7 +188,7 @@ int watchdog_nmi_enable(unsigned int cpu)
>>  smp_mb__after_atomic();
>>  
>>  /* skip displaying the same error again */
>> -if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
>> +if (!firstcpu && (PTR_ERR(event) == firstcpu_err))
>>  return PTR_ERR(event);
>>  
>>  /* vary the KERN level based on the returned errno */
>> @@ -219,9 +224,9 @@ void watchdog_nmi_disable(unsigned int cpu)
>>  
>>  /* should be in cleanup, but blocks oprofile */
>>  perf_event_release_kernel(event);
>> -}
>> -if (cpu == 0) {
>> +
>>  /* watchdog_nmi_enable() expects this to be zero initially. */
>> -cpu0_err = 0;
>> +if (atomic_dec_and_test(&watchdog_cpus))
>> +firstcpu_err = 0;
>>  }
>>  }
>> -- 
>> 1.7.9.3
>>

< 1 2 3 4 5 6 7 8 9 10 >

201 - 300 of 934 matches

Mail list logo