Re: [PATCH] staging:iio:adc:ad7816: Backward resources cleanups in probe

2018-06-18 Thread Lars-Peter Clausen
On 06/19/2018 01:38 AM, Karim Eshapa wrote:
> Backward cleanups for all resources allocated in probing
> in case of failure at any regestering or allocation step.

Hi,

Thanks for the patch.

Resources that are allocated with devm_ are freed automatically in case of
an error, so this patch should not be necessary.

> 
> Signed-off-by: Karim Eshapa 
> ---
>  drivers/staging/iio/adc/ad7816.c | 22 +-
>  1 file changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/staging/iio/adc/ad7816.c 
> b/drivers/staging/iio/adc/ad7816.c
> index bf76a8620bdb..5ff14c830451 100644
> --- a/drivers/staging/iio/adc/ad7816.c
> +++ b/drivers/staging/iio/adc/ad7816.c
> @@ -373,7 +373,7 @@ static int ad7816_probe(struct spi_device *spi_dev)
>   if (ret) {
>   dev_err(_dev->dev, "Fail to request rdwr gpio PIN %d.\n",
>   chip->rdwr_pin);
> - return ret;
> + goto device_free;
>   }
>   gpio_direction_input(chip->rdwr_pin);
>   ret = devm_gpio_request(_dev->dev, chip->convert_pin,
> @@ -381,7 +381,7 @@ static int ad7816_probe(struct spi_device *spi_dev)
>   if (ret) {
>   dev_err(_dev->dev, "Fail to request convert gpio PIN %d.\n",
>   chip->convert_pin);
> - return ret;
> + goto free_rdwr_pin;
>   }
>   gpio_direction_input(chip->convert_pin);
>   ret = devm_gpio_request(_dev->dev, chip->busy_pin,
> @@ -389,7 +389,7 @@ static int ad7816_probe(struct spi_device *spi_dev)
>   if (ret) {
>   dev_err(_dev->dev, "Fail to request busy gpio PIN %d.\n",
>   chip->busy_pin);
> - return ret;
> + goto free_convert_pin;
>   }
>   gpio_direction_input(chip->busy_pin);
>  
> @@ -407,17 +407,29 @@ static int ad7816_probe(struct spi_device *spi_dev)
>   indio_dev->name,
>   indio_dev);
>   if (ret)
> - return ret;
> + goto free_busy_pin;
>   }
>  
>   ret = devm_iio_device_register(_dev->dev, indio_dev);
>   if (ret)
> - return ret;
> + goto free_dev_irq;
>  
>   dev_info(_dev->dev, "%s temperature sensor and ADC registered.\n",
>indio_dev->name);
>  
>   return 0;
> +free_dev_irq:
> + devm_free_irq(_dev->dev, spi_dev->irq, indio_dev);
> +free_busy_pin:
> + devm_gpio_free(_dev->dev, chip->busy_pin);
> +free_convert_pin:
> + devm_gpio_free(_dev->dev, chip->convert_pin);
> +free_rdwr_pin:
> + devm_gpio_free(_dev->dev, chip->rdwr_pin);
> +device_free:
> + devm_iio_device_free(_dev->dev, indio_dev);
> +
> + return ret;
>  }
>  
>  static const struct spi_device_id ad7816_id[] = {
> 



Re: [PATCH] staging:iio:adc:ad7816: Backward resources cleanups in probe

2018-06-18 Thread Lars-Peter Clausen
On 06/19/2018 01:38 AM, Karim Eshapa wrote:
> Backward cleanups for all resources allocated in probing
> in case of failure at any regestering or allocation step.

Hi,

Thanks for the patch.

Resources that are allocated with devm_ are freed automatically in case of
an error, so this patch should not be necessary.

> 
> Signed-off-by: Karim Eshapa 
> ---
>  drivers/staging/iio/adc/ad7816.c | 22 +-
>  1 file changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/staging/iio/adc/ad7816.c 
> b/drivers/staging/iio/adc/ad7816.c
> index bf76a8620bdb..5ff14c830451 100644
> --- a/drivers/staging/iio/adc/ad7816.c
> +++ b/drivers/staging/iio/adc/ad7816.c
> @@ -373,7 +373,7 @@ static int ad7816_probe(struct spi_device *spi_dev)
>   if (ret) {
>   dev_err(_dev->dev, "Fail to request rdwr gpio PIN %d.\n",
>   chip->rdwr_pin);
> - return ret;
> + goto device_free;
>   }
>   gpio_direction_input(chip->rdwr_pin);
>   ret = devm_gpio_request(_dev->dev, chip->convert_pin,
> @@ -381,7 +381,7 @@ static int ad7816_probe(struct spi_device *spi_dev)
>   if (ret) {
>   dev_err(_dev->dev, "Fail to request convert gpio PIN %d.\n",
>   chip->convert_pin);
> - return ret;
> + goto free_rdwr_pin;
>   }
>   gpio_direction_input(chip->convert_pin);
>   ret = devm_gpio_request(_dev->dev, chip->busy_pin,
> @@ -389,7 +389,7 @@ static int ad7816_probe(struct spi_device *spi_dev)
>   if (ret) {
>   dev_err(_dev->dev, "Fail to request busy gpio PIN %d.\n",
>   chip->busy_pin);
> - return ret;
> + goto free_convert_pin;
>   }
>   gpio_direction_input(chip->busy_pin);
>  
> @@ -407,17 +407,29 @@ static int ad7816_probe(struct spi_device *spi_dev)
>   indio_dev->name,
>   indio_dev);
>   if (ret)
> - return ret;
> + goto free_busy_pin;
>   }
>  
>   ret = devm_iio_device_register(_dev->dev, indio_dev);
>   if (ret)
> - return ret;
> + goto free_dev_irq;
>  
>   dev_info(_dev->dev, "%s temperature sensor and ADC registered.\n",
>indio_dev->name);
>  
>   return 0;
> +free_dev_irq:
> + devm_free_irq(_dev->dev, spi_dev->irq, indio_dev);
> +free_busy_pin:
> + devm_gpio_free(_dev->dev, chip->busy_pin);
> +free_convert_pin:
> + devm_gpio_free(_dev->dev, chip->convert_pin);
> +free_rdwr_pin:
> + devm_gpio_free(_dev->dev, chip->rdwr_pin);
> +device_free:
> + devm_iio_device_free(_dev->dev, indio_dev);
> +
> + return ret;
>  }
>  
>  static const struct spi_device_id ad7816_id[] = {
> 



Re: [RFC PATCH v2 1/6] mtd: rawnand: marvell: Handle on-die ECC

2018-06-18 Thread Boris Brezillon
On Tue, 19 Jun 2018 17:31:20 +1200
Chris Packham  wrote:

> From the controllers point of view this is the same as no or
> software only ECC.
> 
> Signed-off-by: Chris Packham 

Reviewed-by: Boris Brezillon 

> ---
> Changes in v2:
> - New
> 
>  drivers/mtd/nand/raw/marvell_nand.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/mtd/nand/raw/marvell_nand.c 
> b/drivers/mtd/nand/raw/marvell_nand.c
> index ebb1d141b900..ba6889bbe802 100644
> --- a/drivers/mtd/nand/raw/marvell_nand.c
> +++ b/drivers/mtd/nand/raw/marvell_nand.c
> @@ -2157,6 +2157,7 @@ static int marvell_nand_ecc_init(struct mtd_info *mtd,
>   break;
>   case NAND_ECC_NONE:
>   case NAND_ECC_SOFT:
> + case NAND_ECC_ON_DIE:
>   if (!nfc->caps->is_nfcv2 && mtd->writesize != SZ_512 &&
>   mtd->writesize != SZ_2K) {
>   dev_err(nfc->dev, "NFCv1 cannot write %d bytes pages\n",



Re: [RFC PATCH v2 1/6] mtd: rawnand: marvell: Handle on-die ECC

2018-06-18 Thread Boris Brezillon
On Tue, 19 Jun 2018 17:31:20 +1200
Chris Packham  wrote:

> From the controllers point of view this is the same as no or
> software only ECC.
> 
> Signed-off-by: Chris Packham 

Reviewed-by: Boris Brezillon 

> ---
> Changes in v2:
> - New
> 
>  drivers/mtd/nand/raw/marvell_nand.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/mtd/nand/raw/marvell_nand.c 
> b/drivers/mtd/nand/raw/marvell_nand.c
> index ebb1d141b900..ba6889bbe802 100644
> --- a/drivers/mtd/nand/raw/marvell_nand.c
> +++ b/drivers/mtd/nand/raw/marvell_nand.c
> @@ -2157,6 +2157,7 @@ static int marvell_nand_ecc_init(struct mtd_info *mtd,
>   break;
>   case NAND_ECC_NONE:
>   case NAND_ECC_SOFT:
> + case NAND_ECC_ON_DIE:
>   if (!nfc->caps->is_nfcv2 && mtd->writesize != SZ_512 &&
>   mtd->writesize != SZ_2K) {
>   dev_err(nfc->dev, "NFCv1 cannot write %d bytes pages\n",



Re: [PATCH 4.14 000/189] 4.14.51-stable review

2018-06-18 Thread Naresh Kamboju
On 18 June 2018 at 13:41, Greg Kroah-Hartman  wrote:
> This is the start of the stable review cycle for the 4.14.51 release.
> There are 189 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Wed Jun 20 08:11:38 UTC 2018.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.51-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.14.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

Results from Linaro’s test farm.
No regressions on arm64, arm and x86_64.

Summary


kernel: 4.14.51-rc1
git repo: 
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-4.14.y
git commit: 3e67e2023733ff0796077205a09178330bdac357
git describe: v4.14.50-189-g3e67e2023733
Test details: 
https://qa-reports.linaro.org/lkft/linux-stable-rc-4.14-oe/build/v4.14.50-189-g3e67e2023733


No regressions (compared to build v4.14.50-190-gc758a48da7f5)


Ran 11416 total tests in the following environments and test suites.

Environments
--
- dragonboard-410c - arm64
- hi6220-hikey - arm64
- juno-r2 - arm64
- qemu_arm
- qemu_arm64
- qemu_x86_64
- x15 - arm
- x86_64

Test Suites
---
* boot
* kselftest
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-sched-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* ltp-timers-tests
* ltp-io-tests
* kselftest-vsyscall-mode-native
* kselftest-vsyscall-mode-none

-- 
Linaro LKFT
https://lkft.linaro.org


Re: [PATCH 4.14 000/189] 4.14.51-stable review

2018-06-18 Thread Naresh Kamboju
On 18 June 2018 at 13:41, Greg Kroah-Hartman  wrote:
> This is the start of the stable review cycle for the 4.14.51 release.
> There are 189 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Wed Jun 20 08:11:38 UTC 2018.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.51-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.14.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

Results from Linaro’s test farm.
No regressions on arm64, arm and x86_64.

Summary


kernel: 4.14.51-rc1
git repo: 
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-4.14.y
git commit: 3e67e2023733ff0796077205a09178330bdac357
git describe: v4.14.50-189-g3e67e2023733
Test details: 
https://qa-reports.linaro.org/lkft/linux-stable-rc-4.14-oe/build/v4.14.50-189-g3e67e2023733


No regressions (compared to build v4.14.50-190-gc758a48da7f5)


Ran 11416 total tests in the following environments and test suites.

Environments
--
- dragonboard-410c - arm64
- hi6220-hikey - arm64
- juno-r2 - arm64
- qemu_arm
- qemu_arm64
- qemu_x86_64
- x15 - arm
- x86_64

Test Suites
---
* boot
* kselftest
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-sched-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* ltp-timers-tests
* ltp-io-tests
* kselftest-vsyscall-mode-native
* kselftest-vsyscall-mode-none

-- 
Linaro LKFT
https://lkft.linaro.org


Re: [PATCH v3] mtd: rawnand: mxc: set spare area size register explicitly

2018-06-18 Thread Boris Brezillon
On Mon, 18 Jun 2018 22:41:03 +0200
Martin Kaiser  wrote:

> The v21 version of the NAND flash controller contains a Spare Area Size
> Register (SPAS) at offset 0x10. Its setting defaults to the maximum
> spare area size of 218 bytes. The size that is set in this register is
> used by the controller when it calculates the ECC bytes internally in
> hardware.
> 
> Usually, this register is updated from settings in the IIM fuses when
> the system is booting from NAND flash. For other boot media, however,
> the SPAS register remains at the default setting, which may not work for
> the particular flash chip on the board. The same goes for flash chips
> whose configuration cannot be set in the IIM fuses (e.g. chips with 2k
> sector size and 128 bytes spare area size can't be configured in the IIM
> fuses on imx25 systems).
> 
> Set the SPAS register explicitly during the preset operation. Derive the
> register value from mtd->oobsize that was detected during probe by
> decoding the flash chip's ID bytes.
> 
> While at it, rename the define for the spare area register's offset to
> NFC_V21_RSLTSPARE_AREA. The register at offset 0x10 on v1 controllers is
> different from the register on v21 controllers.
> 
> Signed-off-by: Martin Kaiser 
> Cc: sta...@vger.kernel.org
> Fixes: d484018 ("mtd: mxc_nand: set NFC registers after reset")

You could have kept Sacha's R-b, it's not like the patch completely
changed between v1 and v3. Also, just nitpicking, but I prefer when
Fixes and Cc-stable tags are placed before author's SoB. No need
to send a new version for that, I'll fix it when applying.

Thanks,

Boris

> ---
> changes in v3
>- add a Fixes tag pointing to the commit that introduced the
>  preset() operation
> 
> changes in v2
>- fix the commit message
>- use '/ 2' instead of shift operator for division
> 
>  drivers/mtd/nand/raw/mxc_nand.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
> index 45786e7..26cef21 100644
> --- a/drivers/mtd/nand/raw/mxc_nand.c
> +++ b/drivers/mtd/nand/raw/mxc_nand.c
> @@ -48,7 +48,7 @@
>  #define NFC_V1_V2_CONFIG (host->regs + 0x0a)
>  #define NFC_V1_V2_ECC_STATUS_RESULT  (host->regs + 0x0c)
>  #define NFC_V1_V2_RSLTMAIN_AREA  (host->regs + 0x0e)
> -#define NFC_V1_V2_RSLTSPARE_AREA (host->regs + 0x10)
> +#define NFC_V21_RSLTSPARE_AREA   (host->regs + 0x10)
>  #define NFC_V1_V2_WRPROT (host->regs + 0x12)
>  #define NFC_V1_UNLOCKSTART_BLKADDR   (host->regs + 0x14)
>  #define NFC_V1_UNLOCKEND_BLKADDR (host->regs + 0x16)
> @@ -1274,6 +1274,9 @@ static void preset_v2(struct mtd_info *mtd)
>   writew(config1, NFC_V1_V2_CONFIG1);
>   /* preset operation */
>  
> + /* spare area size in 16-bit half-words */
> + writew(mtd->oobsize / 2, NFC_V21_RSLTSPARE_AREA);
> +
>   /* Unlock the internal RAM Buffer */
>   writew(0x2, NFC_V1_V2_CONFIG);
>  



Re: [PATCH v3] mtd: rawnand: mxc: set spare area size register explicitly

2018-06-18 Thread Boris Brezillon
On Mon, 18 Jun 2018 22:41:03 +0200
Martin Kaiser  wrote:

> The v21 version of the NAND flash controller contains a Spare Area Size
> Register (SPAS) at offset 0x10. Its setting defaults to the maximum
> spare area size of 218 bytes. The size that is set in this register is
> used by the controller when it calculates the ECC bytes internally in
> hardware.
> 
> Usually, this register is updated from settings in the IIM fuses when
> the system is booting from NAND flash. For other boot media, however,
> the SPAS register remains at the default setting, which may not work for
> the particular flash chip on the board. The same goes for flash chips
> whose configuration cannot be set in the IIM fuses (e.g. chips with 2k
> sector size and 128 bytes spare area size can't be configured in the IIM
> fuses on imx25 systems).
> 
> Set the SPAS register explicitly during the preset operation. Derive the
> register value from mtd->oobsize that was detected during probe by
> decoding the flash chip's ID bytes.
> 
> While at it, rename the define for the spare area register's offset to
> NFC_V21_RSLTSPARE_AREA. The register at offset 0x10 on v1 controllers is
> different from the register on v21 controllers.
> 
> Signed-off-by: Martin Kaiser 
> Cc: sta...@vger.kernel.org
> Fixes: d484018 ("mtd: mxc_nand: set NFC registers after reset")

You could have kept Sacha's R-b, it's not like the patch completely
changed between v1 and v3. Also, just nitpicking, but I prefer when
Fixes and Cc-stable tags are placed before author's SoB. No need
to send a new version for that, I'll fix it when applying.

Thanks,

Boris

> ---
> changes in v3
>- add a Fixes tag pointing to the commit that introduced the
>  preset() operation
> 
> changes in v2
>- fix the commit message
>- use '/ 2' instead of shift operator for division
> 
>  drivers/mtd/nand/raw/mxc_nand.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
> index 45786e7..26cef21 100644
> --- a/drivers/mtd/nand/raw/mxc_nand.c
> +++ b/drivers/mtd/nand/raw/mxc_nand.c
> @@ -48,7 +48,7 @@
>  #define NFC_V1_V2_CONFIG (host->regs + 0x0a)
>  #define NFC_V1_V2_ECC_STATUS_RESULT  (host->regs + 0x0c)
>  #define NFC_V1_V2_RSLTMAIN_AREA  (host->regs + 0x0e)
> -#define NFC_V1_V2_RSLTSPARE_AREA (host->regs + 0x10)
> +#define NFC_V21_RSLTSPARE_AREA   (host->regs + 0x10)
>  #define NFC_V1_V2_WRPROT (host->regs + 0x12)
>  #define NFC_V1_UNLOCKSTART_BLKADDR   (host->regs + 0x14)
>  #define NFC_V1_UNLOCKEND_BLKADDR (host->regs + 0x16)
> @@ -1274,6 +1274,9 @@ static void preset_v2(struct mtd_info *mtd)
>   writew(config1, NFC_V1_V2_CONFIG1);
>   /* preset operation */
>  
> + /* spare area size in 16-bit half-words */
> + writew(mtd->oobsize / 2, NFC_V21_RSLTSPARE_AREA);
> +
>   /* Unlock the internal RAM Buffer */
>   writew(0x2, NFC_V1_V2_CONFIG);
>  



Re: [PATCH V7] powercap/drivers/idle_injection: Add an idle injection framework

2018-06-18 Thread Viresh Kumar
On 18-06-18, 13:28, Daniel Lezcano wrote:
> for this specific case, we can use the park() callback to set should_run
> to false, no ?

Yep, that can be one option. Or just iterate through all the CPUs in the mask.

-- 
viresh


Re: [PATCH V7] powercap/drivers/idle_injection: Add an idle injection framework

2018-06-18 Thread Viresh Kumar
On 18-06-18, 13:28, Daniel Lezcano wrote:
> for this specific case, we can use the park() callback to set should_run
> to false, no ?

Yep, that can be one option. Or just iterate through all the CPUs in the mask.

-- 
viresh


Re: [PATCH 2/2] sched/fair: Advance global expiration when period timer is restarted

2018-06-18 Thread Xunlei Pang
On 6/19/18 12:36 PM, Cong Wang wrote:
> On Mon, Jun 18, 2018 at 2:16 AM, Xunlei Pang  wrote:
>> I noticed the group frequently got throttled even it consumed
>> low cpu usage, this caused some jitters on the response time
>> to some of our business containers enabling cpu quota.
>>
>> It's very easy to reproduce:
>> mkdir /sys/fs/cgroup/cpu/test
>> cd /sys/fs/cgroup/cpu/test
>> echo 10 > cpu.cfs_quota_us
>> echo $$ > tasks
>> then repeat:
>> cat cpu.stat |grep nr_throttled  // nr_throttled will increase
>>
>> After some analysis, we found that cfs_rq::runtime_remaining will
>> be cleared by expire_cfs_rq_runtime() due to two equal but stale
>> "cfs_{b|q}->runtime_expires" after period timer is re-armed.
>>
>> The global expiration should be advanced accordingly when the
>> bandwidth period timer is restarted.
>>
> 
> I observed the same problem and already sent some patches:
> 
> https://lkml.org/lkml/2018/5/22/37
> https://lkml.org/lkml/2018/5/22/38
> https://lkml.org/lkml/2018/5/22/35
> 

Looks they are related to large slice setting and unused slack
under large number of cpus, the issue I described is a little
different.


Re: [PATCH 2/2] sched/fair: Advance global expiration when period timer is restarted

2018-06-18 Thread Xunlei Pang
On 6/19/18 12:36 PM, Cong Wang wrote:
> On Mon, Jun 18, 2018 at 2:16 AM, Xunlei Pang  wrote:
>> I noticed the group frequently got throttled even it consumed
>> low cpu usage, this caused some jitters on the response time
>> to some of our business containers enabling cpu quota.
>>
>> It's very easy to reproduce:
>> mkdir /sys/fs/cgroup/cpu/test
>> cd /sys/fs/cgroup/cpu/test
>> echo 10 > cpu.cfs_quota_us
>> echo $$ > tasks
>> then repeat:
>> cat cpu.stat |grep nr_throttled  // nr_throttled will increase
>>
>> After some analysis, we found that cfs_rq::runtime_remaining will
>> be cleared by expire_cfs_rq_runtime() due to two equal but stale
>> "cfs_{b|q}->runtime_expires" after period timer is re-armed.
>>
>> The global expiration should be advanced accordingly when the
>> bandwidth period timer is restarted.
>>
> 
> I observed the same problem and already sent some patches:
> 
> https://lkml.org/lkml/2018/5/22/37
> https://lkml.org/lkml/2018/5/22/38
> https://lkml.org/lkml/2018/5/22/35
> 

Looks they are related to large slice setting and unused slack
under large number of cpus, the issue I described is a little
different.


Re: [Tee-dev] [PATCH] optee: allow to work without static shared memory

2018-06-18 Thread Rouven Czerwinski


Volodymyr Babchuk  writes:

> From: Volodymyr Babchuk 
>
> On virtualized systems it is possible that OP-TEE will provide
> only dynamic shared memory support. So it is fine to boot
> without static SHM enabled if dymanic one is supported.
>
> Signed-off-by: Volodymyr Babchuk 
> ---
>  drivers/tee/optee/core.c | 83 
> 
>  1 file changed, 49 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
> index 4a2c420..d80da29 100644
> --- a/drivers/tee/optee/core.c
> +++ b/drivers/tee/optee/core.c
> @@ -420,9 +420,35 @@ static bool 
> optee_msg_exchange_capabilities(optee_invoke_fn *invoke_fn,
>   return true;
>  }
>  
> +static struct tee_shm_pool *optee_config_dyn_shm(void)
> +{
> + struct tee_shm_pool_mgr *priv_mgr;
> + struct tee_shm_pool_mgr *dmabuf_mgr;
> + void *rc;
> +
> + rc = optee_shm_pool_alloc_pages();
> + if (IS_ERR(rc))
> + return rc;
> + priv_mgr = rc;
> +
> + rc = optee_shm_pool_alloc_pages();
> + if (IS_ERR(rc)) {
> + tee_shm_pool_mgr_destroy(priv_mgr);
> + return rc;
> + }
> + dmabuf_mgr = rc;
> +
> + rc = tee_shm_pool_alloc(priv_mgr, dmabuf_mgr);
> + if (IS_ERR(rc)) {
> + tee_shm_pool_mgr_destroy(priv_mgr);
> + tee_shm_pool_mgr_destroy(dmabuf_mgr);
> + }
> +
> + return rc;
> +}
> +
>  static struct tee_shm_pool *
> -optee_config_shm_memremap(optee_invoke_fn *invoke_fn, void **memremaped_shm,
> -   u32 sec_caps)
> +optee_config_shm_memremap(optee_invoke_fn *invoke_fn, void **memremaped_shm)
>  {
>   union {
>   struct arm_smccc_res smccc;
> @@ -437,10 +463,11 @@ optee_config_shm_memremap(optee_invoke_fn *invoke_fn, 
> void **memremaped_shm,
>   struct tee_shm_pool_mgr *priv_mgr;
>   struct tee_shm_pool_mgr *dmabuf_mgr;
>   void *rc;
> + const int sz = OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
>  
>   invoke_fn(OPTEE_SMC_GET_SHM_CONFIG, 0, 0, 0, 0, 0, 0, 0, );
>   if (res.result.status != OPTEE_SMC_RETURN_OK) {
> - pr_info("shm service not available\n");
> + pr_err("static shm service not available\n");
>   return ERR_PTR(-ENOENT);
>   }
>  
> @@ -466,28 +493,15 @@ optee_config_shm_memremap(optee_invoke_fn *invoke_fn, 
> void **memremaped_shm,
>   }
>   vaddr = (unsigned long)va;
>  
> - /*
> -  * If OP-TEE can work with unregistered SHM, we will use own pool
> -  * for private shm
> -  */
> - if (sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM) {
> - rc = optee_shm_pool_alloc_pages();
> - if (IS_ERR(rc))
> - goto err_memunmap;
> - priv_mgr = rc;
> - } else {
> - const size_t sz = OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
> -
> - rc = tee_shm_pool_mgr_alloc_res_mem(vaddr, paddr, sz,
> - 3 /* 8 bytes aligned */);
> - if (IS_ERR(rc))
> - goto err_memunmap;
> - priv_mgr = rc;
> -
> - vaddr += sz;
> - paddr += sz;
> - size -= sz;
> - }
> + rc = tee_shm_pool_mgr_alloc_res_mem(vaddr, paddr, sz,
> + 3 /* 8 bytes aligned */);
> + if (IS_ERR(rc))
> + goto err_memunmap;
> + priv_mgr = rc;
> +
> + vaddr += sz;
> + paddr += sz;
> + size -= sz;
>  
>   rc = tee_shm_pool_mgr_alloc_res_mem(vaddr, paddr, size, PAGE_SHIFT);
>   if (IS_ERR(rc))
> @@ -553,7 +567,7 @@ static optee_invoke_fn *get_invoke_func(struct 
> device_node *np)
>  static struct optee *optee_probe(struct device_node *np)
>  {
>   optee_invoke_fn *invoke_fn;
> - struct tee_shm_pool *pool;
> + struct tee_shm_pool *pool = ERR_PTR(-EINVAL);
>   struct optee *optee = NULL;
>   void *memremaped_shm = NULL;
>   struct tee_device *teedev;
> @@ -582,13 +596,17 @@ static struct optee *optee_probe(struct device_node *np)
>   }
>  
>   /*
> -  * We have no other option for shared memory, if secure world
> -  * doesn't have any reserved memory we can use we can't continue.
> +  * Try to use dynamic shared memory if possible
>*/
> - if (!(sec_caps & OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM))
> - return ERR_PTR(-EINVAL);
> + if (sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM)
> + pool = optee_config_dyn_shm();
> +
> + /*
> +  * If dynamic shared memory is not available or failed - try static one
> +  */
> + if (IS_ERR(pool) && (sec_caps & OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM))
> + pool = optee_config_shm_memremap(invoke_fn, _shm);
>  
> - pool = optee_config_shm_memremap(invoke_fn, _shm, sec_caps);
>   if (IS_ERR(pool))
>   return (void *)pool;
>  

-- BEGIN --
> @@ -632,9 +650,6 @@ static struct optee *optee_probe(struct 

Re: [Tee-dev] [PATCH] optee: allow to work without static shared memory

2018-06-18 Thread Rouven Czerwinski


Volodymyr Babchuk  writes:

> From: Volodymyr Babchuk 
>
> On virtualized systems it is possible that OP-TEE will provide
> only dynamic shared memory support. So it is fine to boot
> without static SHM enabled if dymanic one is supported.
>
> Signed-off-by: Volodymyr Babchuk 
> ---
>  drivers/tee/optee/core.c | 83 
> 
>  1 file changed, 49 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
> index 4a2c420..d80da29 100644
> --- a/drivers/tee/optee/core.c
> +++ b/drivers/tee/optee/core.c
> @@ -420,9 +420,35 @@ static bool 
> optee_msg_exchange_capabilities(optee_invoke_fn *invoke_fn,
>   return true;
>  }
>  
> +static struct tee_shm_pool *optee_config_dyn_shm(void)
> +{
> + struct tee_shm_pool_mgr *priv_mgr;
> + struct tee_shm_pool_mgr *dmabuf_mgr;
> + void *rc;
> +
> + rc = optee_shm_pool_alloc_pages();
> + if (IS_ERR(rc))
> + return rc;
> + priv_mgr = rc;
> +
> + rc = optee_shm_pool_alloc_pages();
> + if (IS_ERR(rc)) {
> + tee_shm_pool_mgr_destroy(priv_mgr);
> + return rc;
> + }
> + dmabuf_mgr = rc;
> +
> + rc = tee_shm_pool_alloc(priv_mgr, dmabuf_mgr);
> + if (IS_ERR(rc)) {
> + tee_shm_pool_mgr_destroy(priv_mgr);
> + tee_shm_pool_mgr_destroy(dmabuf_mgr);
> + }
> +
> + return rc;
> +}
> +
>  static struct tee_shm_pool *
> -optee_config_shm_memremap(optee_invoke_fn *invoke_fn, void **memremaped_shm,
> -   u32 sec_caps)
> +optee_config_shm_memremap(optee_invoke_fn *invoke_fn, void **memremaped_shm)
>  {
>   union {
>   struct arm_smccc_res smccc;
> @@ -437,10 +463,11 @@ optee_config_shm_memremap(optee_invoke_fn *invoke_fn, 
> void **memremaped_shm,
>   struct tee_shm_pool_mgr *priv_mgr;
>   struct tee_shm_pool_mgr *dmabuf_mgr;
>   void *rc;
> + const int sz = OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
>  
>   invoke_fn(OPTEE_SMC_GET_SHM_CONFIG, 0, 0, 0, 0, 0, 0, 0, );
>   if (res.result.status != OPTEE_SMC_RETURN_OK) {
> - pr_info("shm service not available\n");
> + pr_err("static shm service not available\n");
>   return ERR_PTR(-ENOENT);
>   }
>  
> @@ -466,28 +493,15 @@ optee_config_shm_memremap(optee_invoke_fn *invoke_fn, 
> void **memremaped_shm,
>   }
>   vaddr = (unsigned long)va;
>  
> - /*
> -  * If OP-TEE can work with unregistered SHM, we will use own pool
> -  * for private shm
> -  */
> - if (sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM) {
> - rc = optee_shm_pool_alloc_pages();
> - if (IS_ERR(rc))
> - goto err_memunmap;
> - priv_mgr = rc;
> - } else {
> - const size_t sz = OPTEE_SHM_NUM_PRIV_PAGES * PAGE_SIZE;
> -
> - rc = tee_shm_pool_mgr_alloc_res_mem(vaddr, paddr, sz,
> - 3 /* 8 bytes aligned */);
> - if (IS_ERR(rc))
> - goto err_memunmap;
> - priv_mgr = rc;
> -
> - vaddr += sz;
> - paddr += sz;
> - size -= sz;
> - }
> + rc = tee_shm_pool_mgr_alloc_res_mem(vaddr, paddr, sz,
> + 3 /* 8 bytes aligned */);
> + if (IS_ERR(rc))
> + goto err_memunmap;
> + priv_mgr = rc;
> +
> + vaddr += sz;
> + paddr += sz;
> + size -= sz;
>  
>   rc = tee_shm_pool_mgr_alloc_res_mem(vaddr, paddr, size, PAGE_SHIFT);
>   if (IS_ERR(rc))
> @@ -553,7 +567,7 @@ static optee_invoke_fn *get_invoke_func(struct 
> device_node *np)
>  static struct optee *optee_probe(struct device_node *np)
>  {
>   optee_invoke_fn *invoke_fn;
> - struct tee_shm_pool *pool;
> + struct tee_shm_pool *pool = ERR_PTR(-EINVAL);
>   struct optee *optee = NULL;
>   void *memremaped_shm = NULL;
>   struct tee_device *teedev;
> @@ -582,13 +596,17 @@ static struct optee *optee_probe(struct device_node *np)
>   }
>  
>   /*
> -  * We have no other option for shared memory, if secure world
> -  * doesn't have any reserved memory we can use we can't continue.
> +  * Try to use dynamic shared memory if possible
>*/
> - if (!(sec_caps & OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM))
> - return ERR_PTR(-EINVAL);
> + if (sec_caps & OPTEE_SMC_SEC_CAP_DYNAMIC_SHM)
> + pool = optee_config_dyn_shm();
> +
> + /*
> +  * If dynamic shared memory is not available or failed - try static one
> +  */
> + if (IS_ERR(pool) && (sec_caps & OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM))
> + pool = optee_config_shm_memremap(invoke_fn, _shm);
>  
> - pool = optee_config_shm_memremap(invoke_fn, _shm, sec_caps);
>   if (IS_ERR(pool))
>   return (void *)pool;
>  

-- BEGIN --
> @@ -632,9 +650,6 @@ static struct optee *optee_probe(struct 

Re: [RFC PATCH v2 5/6] mtd: rawnand: micron: add ONFI_FEATURE_ON_DIE_ECC to supported features

2018-06-18 Thread Boris Brezillon
On Tue, 19 Jun 2018 17:31:24 +1200
Chris Packham  wrote:

> Add ONFI_FEATURE_ON_DIE_ECC to the set/get features list for Micron
> NAND flash.
>

Fixes: 789157e41a06 ("mtd: rawnand: allow vendors to declare (un)supported 
features")
Cc: 

No need to send a new version, I'll add that when queuing the patch.

Miquel, if you're okay, I'm gonna take this patch in the mtd/fixes branch
and let you deal with other patches in this series.

> Signed-off-by: Chris Packham 
> ---
> Changes in v2:
> - New
> 
>  drivers/mtd/nand/raw/nand_micron.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/mtd/nand/raw/nand_micron.c 
> b/drivers/mtd/nand/raw/nand_micron.c
> index e582c9e61715..d1e8f57544a0 100644
> --- a/drivers/mtd/nand/raw/nand_micron.c
> +++ b/drivers/mtd/nand/raw/nand_micron.c
> @@ -66,7 +66,9 @@ static int micron_nand_onfi_init(struct nand_chip *chip)
>  
>   if (p->supports_set_get_features) {
>   set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->set_feature_list);
> + set_bit(ONFI_FEATURE_ON_DIE_ECC, p->set_feature_list);
>   set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->get_feature_list);
> + set_bit(ONFI_FEATURE_ON_DIE_ECC, p->get_feature_list);
>   }
>  
>   return 0;



[PATCH V3 4/4] mmc: host: Register changes for sdcc V5

2018-06-18 Thread Vijay Viswanath
From: Sayali Lokhande 

Add support to use the new compatible string "qcom,sdhci-msm-v5".

Based on the msm variant, pick the relevant variant data and
use it for register read/write to msm specific registers.

Signed-off-by: Sayali Lokhande 
Signed-off-by: Vijay Viswanath 
Reviewed-by: Evan Green 
---
 drivers/mmc/host/sdhci-msm.c | 347 +++
 1 file changed, 221 insertions(+), 126 deletions(-)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 3d01bc2..418dbb0 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -33,16 +33,11 @@
 #define CORE_MCI_GENERICS  0x70
 #define SWITCHABLE_SIGNALING_VOLTAGE   BIT(29)
 
-#define CORE_HC_MODE   0x78
 #define HC_MODE_EN 0x1
 #define CORE_POWER 0x0
 #define CORE_SW_RSTBIT(7)
 #define FF_CLK_SW_RST_DIS  BIT(13)
 
-#define CORE_PWRCTL_STATUS 0xdc
-#define CORE_PWRCTL_MASK   0xe0
-#define CORE_PWRCTL_CLEAR  0xe4
-#define CORE_PWRCTL_CTL0xe8
 #define CORE_PWRCTL_BUS_OFFBIT(0)
 #define CORE_PWRCTL_BUS_ON BIT(1)
 #define CORE_PWRCTL_IO_LOW BIT(2)
@@ -63,17 +58,13 @@
 #define CORE_CDR_EXT_ENBIT(19)
 #define CORE_DLL_PDN   BIT(29)
 #define CORE_DLL_RST   BIT(30)
-#define CORE_DLL_CONFIG0x100
 #define CORE_CMD_DAT_TRACK_SEL BIT(0)
-#define CORE_DLL_STATUS0x108
 
-#define CORE_DLL_CONFIG_2  0x1b4
 #define CORE_DDR_CAL_ENBIT(0)
 #define CORE_FLL_CYCLE_CNT BIT(18)
 #define CORE_DLL_CLOCK_DISABLE BIT(21)
 
-#define CORE_VENDOR_SPEC   0x10c
-#define CORE_VENDOR_SPEC_POR_VAL   0xa1c
+#define CORE_VENDOR_SPEC_POR_VAL 0xa1c
 #define CORE_CLK_PWRSAVE   BIT(1)
 #define CORE_HC_MCLK_SEL_DFLT  (2 << 8)
 #define CORE_HC_MCLK_SEL_HS400 (3 << 8)
@@ -111,17 +102,14 @@
 #define CORE_CDC_SWITCH_BYPASS_OFF BIT(0)
 #define CORE_CDC_SWITCH_RC_EN  BIT(1)
 
-#define CORE_DDR_200_CFG   0x184
 #define CORE_CDC_T4_DLY_SELBIT(0)
 #define CORE_CMDIN_RCLK_EN BIT(1)
 #define CORE_START_CDC_TRAFFIC BIT(6)
-#define CORE_VENDOR_SPEC3  0x1b0
+
 #define CORE_PWRSAVE_DLL   BIT(3)
 
-#define CORE_DDR_CONFIG0x1b8
 #define DDR_CONFIG_POR_VAL 0x80040853
 
-#define CORE_VENDOR_SPEC_CAPABILITIES0 0x11c
 
 #define INVALID_TUNING_PHASE   -1
 #define SDHCI_MSM_MIN_CLOCK40
@@ -137,6 +125,12 @@
 /* Timeout value to avoid infinite waiting for pwr_irq */
 #define MSM_PWR_IRQ_TIMEOUT_MS 5000
 
+#define msm_host_readl(msm_host, host, offset) \
+   msm_host->var_ops->msm_readl_relaxed(host, offset)
+
+#define msm_host_writel(msm_host, val, host, offset) \
+   msm_host->var_ops->msm_writel_relaxed(val, host, offset)
+
 struct sdhci_msm_offset {
u32 core_hc_mode;
u32 core_mci_data_cnt;
@@ -266,6 +260,14 @@ struct sdhci_msm_host {
const struct sdhci_msm_offset *offset;
 };
 
+static const struct sdhci_msm_offset *sdhci_priv_msm_offset(struct sdhci_host 
*host)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   return msm_host->offset;
+}
+
 /*
  * APIs to read/write to vendor specific registers which were there in the
  * core_mem region before MCI was removed.
@@ -347,10 +349,12 @@ static inline int msm_dll_poll_ck_out_en(struct 
sdhci_host *host, u8 poll)
u32 wait_cnt = 50;
u8 ck_out_en;
struct mmc_host *mmc = host->mmc;
+   const struct sdhci_msm_offset *msm_offset =
+   sdhci_priv_msm_offset(host);
 
/* Poll for CK_OUT_EN bit.  max. poll time = 50us */
-   ck_out_en = !!(readl_relaxed(host->ioaddr + CORE_DLL_CONFIG) &
-   CORE_CK_OUT_EN);
+   ck_out_en = !!(readl_relaxed(host->ioaddr +
+   msm_offset->core_dll_config) & CORE_CK_OUT_EN);
 
while (ck_out_en != poll) {
if (--wait_cnt == 0) {
@@ -360,8 +364,8 @@ static inline int msm_dll_poll_ck_out_en(struct sdhci_host 
*host, u8 poll)
}
udelay(1);
 
-   ck_out_en = !!(readl_relaxed(host->ioaddr + CORE_DLL_CONFIG) &
-   CORE_CK_OUT_EN);
+   ck_out_en = !!(readl_relaxed(host->ioaddr +
+   msm_offset->core_dll_config) & CORE_CK_OUT_EN);
}
 
return 0;
@@ -377,16 +381,18 @@ static int msm_config_cm_dll_phase(struct sdhci_host 
*host, u8 phase)
unsigned long flags;
u32 config;
struct mmc_host *mmc = host->mmc;
+   const struct sdhci_msm_offset *msm_offset =
+   sdhci_priv_msm_offset(host);
 
if (phase > 0xf)
return -EINVAL;
 
spin_lock_irqsave(>lock, flags);
 
-   config = readl_relaxed(host->ioaddr + CORE_DLL_CONFIG);

Re: [RFC PATCH v2 5/6] mtd: rawnand: micron: add ONFI_FEATURE_ON_DIE_ECC to supported features

2018-06-18 Thread Boris Brezillon
On Tue, 19 Jun 2018 17:31:24 +1200
Chris Packham  wrote:

> Add ONFI_FEATURE_ON_DIE_ECC to the set/get features list for Micron
> NAND flash.
>

Fixes: 789157e41a06 ("mtd: rawnand: allow vendors to declare (un)supported 
features")
Cc: 

No need to send a new version, I'll add that when queuing the patch.

Miquel, if you're okay, I'm gonna take this patch in the mtd/fixes branch
and let you deal with other patches in this series.

> Signed-off-by: Chris Packham 
> ---
> Changes in v2:
> - New
> 
>  drivers/mtd/nand/raw/nand_micron.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/mtd/nand/raw/nand_micron.c 
> b/drivers/mtd/nand/raw/nand_micron.c
> index e582c9e61715..d1e8f57544a0 100644
> --- a/drivers/mtd/nand/raw/nand_micron.c
> +++ b/drivers/mtd/nand/raw/nand_micron.c
> @@ -66,7 +66,9 @@ static int micron_nand_onfi_init(struct nand_chip *chip)
>  
>   if (p->supports_set_get_features) {
>   set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->set_feature_list);
> + set_bit(ONFI_FEATURE_ON_DIE_ECC, p->set_feature_list);
>   set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->get_feature_list);
> + set_bit(ONFI_FEATURE_ON_DIE_ECC, p->get_feature_list);
>   }
>  
>   return 0;



[PATCH V3 4/4] mmc: host: Register changes for sdcc V5

2018-06-18 Thread Vijay Viswanath
From: Sayali Lokhande 

Add support to use the new compatible string "qcom,sdhci-msm-v5".

Based on the msm variant, pick the relevant variant data and
use it for register read/write to msm specific registers.

Signed-off-by: Sayali Lokhande 
Signed-off-by: Vijay Viswanath 
Reviewed-by: Evan Green 
---
 drivers/mmc/host/sdhci-msm.c | 347 +++
 1 file changed, 221 insertions(+), 126 deletions(-)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 3d01bc2..418dbb0 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -33,16 +33,11 @@
 #define CORE_MCI_GENERICS  0x70
 #define SWITCHABLE_SIGNALING_VOLTAGE   BIT(29)
 
-#define CORE_HC_MODE   0x78
 #define HC_MODE_EN 0x1
 #define CORE_POWER 0x0
 #define CORE_SW_RSTBIT(7)
 #define FF_CLK_SW_RST_DIS  BIT(13)
 
-#define CORE_PWRCTL_STATUS 0xdc
-#define CORE_PWRCTL_MASK   0xe0
-#define CORE_PWRCTL_CLEAR  0xe4
-#define CORE_PWRCTL_CTL0xe8
 #define CORE_PWRCTL_BUS_OFFBIT(0)
 #define CORE_PWRCTL_BUS_ON BIT(1)
 #define CORE_PWRCTL_IO_LOW BIT(2)
@@ -63,17 +58,13 @@
 #define CORE_CDR_EXT_ENBIT(19)
 #define CORE_DLL_PDN   BIT(29)
 #define CORE_DLL_RST   BIT(30)
-#define CORE_DLL_CONFIG0x100
 #define CORE_CMD_DAT_TRACK_SEL BIT(0)
-#define CORE_DLL_STATUS0x108
 
-#define CORE_DLL_CONFIG_2  0x1b4
 #define CORE_DDR_CAL_ENBIT(0)
 #define CORE_FLL_CYCLE_CNT BIT(18)
 #define CORE_DLL_CLOCK_DISABLE BIT(21)
 
-#define CORE_VENDOR_SPEC   0x10c
-#define CORE_VENDOR_SPEC_POR_VAL   0xa1c
+#define CORE_VENDOR_SPEC_POR_VAL 0xa1c
 #define CORE_CLK_PWRSAVE   BIT(1)
 #define CORE_HC_MCLK_SEL_DFLT  (2 << 8)
 #define CORE_HC_MCLK_SEL_HS400 (3 << 8)
@@ -111,17 +102,14 @@
 #define CORE_CDC_SWITCH_BYPASS_OFF BIT(0)
 #define CORE_CDC_SWITCH_RC_EN  BIT(1)
 
-#define CORE_DDR_200_CFG   0x184
 #define CORE_CDC_T4_DLY_SELBIT(0)
 #define CORE_CMDIN_RCLK_EN BIT(1)
 #define CORE_START_CDC_TRAFFIC BIT(6)
-#define CORE_VENDOR_SPEC3  0x1b0
+
 #define CORE_PWRSAVE_DLL   BIT(3)
 
-#define CORE_DDR_CONFIG0x1b8
 #define DDR_CONFIG_POR_VAL 0x80040853
 
-#define CORE_VENDOR_SPEC_CAPABILITIES0 0x11c
 
 #define INVALID_TUNING_PHASE   -1
 #define SDHCI_MSM_MIN_CLOCK40
@@ -137,6 +125,12 @@
 /* Timeout value to avoid infinite waiting for pwr_irq */
 #define MSM_PWR_IRQ_TIMEOUT_MS 5000
 
+#define msm_host_readl(msm_host, host, offset) \
+   msm_host->var_ops->msm_readl_relaxed(host, offset)
+
+#define msm_host_writel(msm_host, val, host, offset) \
+   msm_host->var_ops->msm_writel_relaxed(val, host, offset)
+
 struct sdhci_msm_offset {
u32 core_hc_mode;
u32 core_mci_data_cnt;
@@ -266,6 +260,14 @@ struct sdhci_msm_host {
const struct sdhci_msm_offset *offset;
 };
 
+static const struct sdhci_msm_offset *sdhci_priv_msm_offset(struct sdhci_host 
*host)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   return msm_host->offset;
+}
+
 /*
  * APIs to read/write to vendor specific registers which were there in the
  * core_mem region before MCI was removed.
@@ -347,10 +349,12 @@ static inline int msm_dll_poll_ck_out_en(struct 
sdhci_host *host, u8 poll)
u32 wait_cnt = 50;
u8 ck_out_en;
struct mmc_host *mmc = host->mmc;
+   const struct sdhci_msm_offset *msm_offset =
+   sdhci_priv_msm_offset(host);
 
/* Poll for CK_OUT_EN bit.  max. poll time = 50us */
-   ck_out_en = !!(readl_relaxed(host->ioaddr + CORE_DLL_CONFIG) &
-   CORE_CK_OUT_EN);
+   ck_out_en = !!(readl_relaxed(host->ioaddr +
+   msm_offset->core_dll_config) & CORE_CK_OUT_EN);
 
while (ck_out_en != poll) {
if (--wait_cnt == 0) {
@@ -360,8 +364,8 @@ static inline int msm_dll_poll_ck_out_en(struct sdhci_host 
*host, u8 poll)
}
udelay(1);
 
-   ck_out_en = !!(readl_relaxed(host->ioaddr + CORE_DLL_CONFIG) &
-   CORE_CK_OUT_EN);
+   ck_out_en = !!(readl_relaxed(host->ioaddr +
+   msm_offset->core_dll_config) & CORE_CK_OUT_EN);
}
 
return 0;
@@ -377,16 +381,18 @@ static int msm_config_cm_dll_phase(struct sdhci_host 
*host, u8 phase)
unsigned long flags;
u32 config;
struct mmc_host *mmc = host->mmc;
+   const struct sdhci_msm_offset *msm_offset =
+   sdhci_priv_msm_offset(host);
 
if (phase > 0xf)
return -EINVAL;
 
spin_lock_irqsave(>lock, flags);
 
-   config = readl_relaxed(host->ioaddr + CORE_DLL_CONFIG);

[PATCH V3 3/4] Documentation: sdhci-msm: Add new compatible string for SDCC v5

2018-06-18 Thread Vijay Viswanath
From: Sayali Lokhande 

For SDCC version 5.0.0 and higher, new compatible string
"qcom,sdhci-msm-v5" is added.

Signed-off-by: Sayali Lokhande 
Signed-off-by: Vijay Viswanath 
Acked-by: Rob Herring 
---
 Documentation/devicetree/bindings/mmc/sdhci-msm.txt | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt 
b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
index bfdcdc4..502b3b8 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
@@ -4,7 +4,12 @@ This file documents differences between the core properties in 
mmc.txt
 and the properties used by the sdhci-msm driver.
 
 Required properties:
-- compatible: Should contain "qcom,sdhci-msm-v4".
+- compatible: Should contain:
+   "qcom,sdhci-msm-v4" for sdcc versions less than 5.0
+   "qcom,sdhci-msm-v5" for sdcc versions >= 5.0
+   For SDCC version 5.0.0, MCI registers are removed from SDCC
+   interface and some registers are moved to HC. New compatible
+   string is added to support this change - "qcom,sdhci-msm-v5".
 - reg: Base address and length of the register in the following order:
- Host controller register map (required)
- SD Core register map (required)
-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH V3 3/4] Documentation: sdhci-msm: Add new compatible string for SDCC v5

2018-06-18 Thread Vijay Viswanath
From: Sayali Lokhande 

For SDCC version 5.0.0 and higher, new compatible string
"qcom,sdhci-msm-v5" is added.

Signed-off-by: Sayali Lokhande 
Signed-off-by: Vijay Viswanath 
Acked-by: Rob Herring 
---
 Documentation/devicetree/bindings/mmc/sdhci-msm.txt | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt 
b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
index bfdcdc4..502b3b8 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
@@ -4,7 +4,12 @@ This file documents differences between the core properties in 
mmc.txt
 and the properties used by the sdhci-msm driver.
 
 Required properties:
-- compatible: Should contain "qcom,sdhci-msm-v4".
+- compatible: Should contain:
+   "qcom,sdhci-msm-v4" for sdcc versions less than 5.0
+   "qcom,sdhci-msm-v5" for sdcc versions >= 5.0
+   For SDCC version 5.0.0, MCI registers are removed from SDCC
+   interface and some registers are moved to HC. New compatible
+   string is added to support this change - "qcom,sdhci-msm-v5".
 - reg: Base address and length of the register in the following order:
- Host controller register map (required)
- SD Core register map (required)
-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH V3 2/4] mmc: sdhci-msm: Add msm version specific ops and data structures

2018-06-18 Thread Vijay Viswanath
In addition to offsets of certain registers changing, the registers in
core_mem have been shifted to HC mem as well. To access these
registers, define msm version specific functions. These functions can
be loaded into the function pointers at the time of probe based on
the msm version detected.

Also defind new data structure to hold version specific Ops and
register addresses.

Signed-off-by: Sayali Lokhande 
Signed-off-by: Vijay Viswanath 
Reviewed-by: Evan Green 
---
 drivers/mmc/host/sdhci-msm.c | 75 
 1 file changed, 75 insertions(+)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 4050c99..3d01bc2 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -226,6 +226,22 @@ struct sdhci_msm_offset {
.core_ddr_config_2 = 0x1bc,
 };
 
+struct sdhci_msm_variant_ops {
+   u32 (*msm_readl_relaxed)(struct sdhci_host *host, u32 offset);
+   void (*msm_writel_relaxed)(u32 val, struct sdhci_host *host,
+   u32 offset);
+};
+
+/*
+ * From V5, register spaces have changed. Wrap this info in a structure
+ * and choose the data_structure based on version info mentioned in DT.
+ */
+struct sdhci_msm_variant_info {
+   bool mci_removed;
+   const struct sdhci_msm_variant_ops *var_ops;
+   const struct sdhci_msm_offset *offset;
+};
+
 struct sdhci_msm_host {
struct platform_device *pdev;
void __iomem *core_mem; /* MSM SDCC mapped address */
@@ -245,8 +261,45 @@ struct sdhci_msm_host {
wait_queue_head_t pwr_irq_wait;
bool pwr_irq_flag;
u32 caps_0;
+   bool mci_removed;
+   const struct sdhci_msm_variant_ops *var_ops;
+   const struct sdhci_msm_offset *offset;
 };
 
+/*
+ * APIs to read/write to vendor specific registers which were there in the
+ * core_mem region before MCI was removed.
+ */
+static u32 sdhci_msm_mci_variant_readl_relaxed(struct sdhci_host *host,
+   u32 offset)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   return readl_relaxed(msm_host->core_mem + offset);
+}
+
+static u32 sdhci_msm_v5_variant_readl_relaxed(struct sdhci_host *host,
+   u32 offset)
+{
+   return readl_relaxed(host->ioaddr + offset);
+}
+
+static void sdhci_msm_mci_variant_writel_relaxed(u32 val,
+   struct sdhci_host *host, u32 offset)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   writel_relaxed(val, msm_host->core_mem + offset);
+}
+
+static void sdhci_msm_v5_variant_writel_relaxed(u32 val,
+   struct sdhci_host *host, u32 offset)
+{
+   writel_relaxed(val, host->ioaddr + offset);
+}
+
 static unsigned int msm_get_clock_rate_for_bus_mode(struct sdhci_host *host,
unsigned int clock)
 {
@@ -1481,6 +1534,28 @@ static void sdhci_msm_set_regulator_caps(struct 
sdhci_msm_host *msm_host)
pr_debug("%s: supported caps: 0x%08x\n", mmc_hostname(mmc), caps);
 }
 
+static const struct sdhci_msm_variant_ops mci_var_ops = {
+   .msm_readl_relaxed = sdhci_msm_mci_variant_readl_relaxed,
+   .msm_writel_relaxed = sdhci_msm_mci_variant_writel_relaxed,
+};
+
+static const struct sdhci_msm_variant_ops v5_var_ops = {
+   .msm_readl_relaxed = sdhci_msm_v5_variant_readl_relaxed,
+   .msm_writel_relaxed = sdhci_msm_v5_variant_writel_relaxed,
+};
+
+static const struct sdhci_msm_variant_info sdhci_msm_mci_var = {
+   .mci_removed = false,
+   .var_ops = _var_ops,
+   .offset = _msm_mci_offset,
+};
+
+static const struct sdhci_msm_variant_info sdhci_msm_v5_var = {
+   .mci_removed = true,
+   .var_ops = _var_ops,
+   .offset = _msm_v5_offset,
+};
+
 static const struct of_device_id sdhci_msm_dt_match[] = {
{ .compatible = "qcom,sdhci-msm-v4" },
{},
-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH V3 2/4] mmc: sdhci-msm: Add msm version specific ops and data structures

2018-06-18 Thread Vijay Viswanath
In addition to offsets of certain registers changing, the registers in
core_mem have been shifted to HC mem as well. To access these
registers, define msm version specific functions. These functions can
be loaded into the function pointers at the time of probe based on
the msm version detected.

Also defind new data structure to hold version specific Ops and
register addresses.

Signed-off-by: Sayali Lokhande 
Signed-off-by: Vijay Viswanath 
Reviewed-by: Evan Green 
---
 drivers/mmc/host/sdhci-msm.c | 75 
 1 file changed, 75 insertions(+)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 4050c99..3d01bc2 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -226,6 +226,22 @@ struct sdhci_msm_offset {
.core_ddr_config_2 = 0x1bc,
 };
 
+struct sdhci_msm_variant_ops {
+   u32 (*msm_readl_relaxed)(struct sdhci_host *host, u32 offset);
+   void (*msm_writel_relaxed)(u32 val, struct sdhci_host *host,
+   u32 offset);
+};
+
+/*
+ * From V5, register spaces have changed. Wrap this info in a structure
+ * and choose the data_structure based on version info mentioned in DT.
+ */
+struct sdhci_msm_variant_info {
+   bool mci_removed;
+   const struct sdhci_msm_variant_ops *var_ops;
+   const struct sdhci_msm_offset *offset;
+};
+
 struct sdhci_msm_host {
struct platform_device *pdev;
void __iomem *core_mem; /* MSM SDCC mapped address */
@@ -245,8 +261,45 @@ struct sdhci_msm_host {
wait_queue_head_t pwr_irq_wait;
bool pwr_irq_flag;
u32 caps_0;
+   bool mci_removed;
+   const struct sdhci_msm_variant_ops *var_ops;
+   const struct sdhci_msm_offset *offset;
 };
 
+/*
+ * APIs to read/write to vendor specific registers which were there in the
+ * core_mem region before MCI was removed.
+ */
+static u32 sdhci_msm_mci_variant_readl_relaxed(struct sdhci_host *host,
+   u32 offset)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   return readl_relaxed(msm_host->core_mem + offset);
+}
+
+static u32 sdhci_msm_v5_variant_readl_relaxed(struct sdhci_host *host,
+   u32 offset)
+{
+   return readl_relaxed(host->ioaddr + offset);
+}
+
+static void sdhci_msm_mci_variant_writel_relaxed(u32 val,
+   struct sdhci_host *host, u32 offset)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   writel_relaxed(val, msm_host->core_mem + offset);
+}
+
+static void sdhci_msm_v5_variant_writel_relaxed(u32 val,
+   struct sdhci_host *host, u32 offset)
+{
+   writel_relaxed(val, host->ioaddr + offset);
+}
+
 static unsigned int msm_get_clock_rate_for_bus_mode(struct sdhci_host *host,
unsigned int clock)
 {
@@ -1481,6 +1534,28 @@ static void sdhci_msm_set_regulator_caps(struct 
sdhci_msm_host *msm_host)
pr_debug("%s: supported caps: 0x%08x\n", mmc_hostname(mmc), caps);
 }
 
+static const struct sdhci_msm_variant_ops mci_var_ops = {
+   .msm_readl_relaxed = sdhci_msm_mci_variant_readl_relaxed,
+   .msm_writel_relaxed = sdhci_msm_mci_variant_writel_relaxed,
+};
+
+static const struct sdhci_msm_variant_ops v5_var_ops = {
+   .msm_readl_relaxed = sdhci_msm_v5_variant_readl_relaxed,
+   .msm_writel_relaxed = sdhci_msm_v5_variant_writel_relaxed,
+};
+
+static const struct sdhci_msm_variant_info sdhci_msm_mci_var = {
+   .mci_removed = false,
+   .var_ops = _var_ops,
+   .offset = _msm_mci_offset,
+};
+
+static const struct sdhci_msm_variant_info sdhci_msm_v5_var = {
+   .mci_removed = true,
+   .var_ops = _var_ops,
+   .offset = _msm_v5_offset,
+};
+
 static const struct of_device_id sdhci_msm_dt_match[] = {
{ .compatible = "qcom,sdhci-msm-v4" },
{},
-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH V3 1/4] mmc: sdhci-msm: Define new Register address map

2018-06-18 Thread Vijay Viswanath
From: Sayali Lokhande 

For SDCC version 5.0.0, MCI registers are removed from SDCC
interface and some registers are moved to HC.
Define a new data structure where we can statically define
the address offsets for the registers in different SDCC versions.

Signed-off-by: Sayali Lokhande 
Signed-off-by: Vijay Viswanath 
Reviewed-by: Evan Green 
Acked-by: Adrian Hunter 
---
 drivers/mmc/host/sdhci-msm.c | 89 
 1 file changed, 89 insertions(+)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index bb11916..4050c99 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -137,6 +137,95 @@
 /* Timeout value to avoid infinite waiting for pwr_irq */
 #define MSM_PWR_IRQ_TIMEOUT_MS 5000
 
+struct sdhci_msm_offset {
+   u32 core_hc_mode;
+   u32 core_mci_data_cnt;
+   u32 core_mci_status;
+   u32 core_mci_fifo_cnt;
+   u32 core_mci_version;
+   u32 core_generics;
+   u32 core_testbus_config;
+   u32 core_testbus_sel2_bit;
+   u32 core_testbus_ena;
+   u32 core_testbus_sel2;
+   u32 core_pwrctl_status;
+   u32 core_pwrctl_mask;
+   u32 core_pwrctl_clear;
+   u32 core_pwrctl_ctl;
+   u32 core_sdcc_debug_reg;
+   u32 core_dll_config;
+   u32 core_dll_status;
+   u32 core_vendor_spec;
+   u32 core_vendor_spec_adma_err_addr0;
+   u32 core_vendor_spec_adma_err_addr1;
+   u32 core_vendor_spec_func2;
+   u32 core_vendor_spec_capabilities0;
+   u32 core_ddr_200_cfg;
+   u32 core_vendor_spec3;
+   u32 core_dll_config_2;
+   u32 core_ddr_config;
+   u32 core_ddr_config_2;
+};
+
+static const struct sdhci_msm_offset sdhci_msm_v5_offset = {
+   .core_mci_data_cnt = 0x35c,
+   .core_mci_status = 0x324,
+   .core_mci_fifo_cnt = 0x308,
+   .core_mci_version = 0x318,
+   .core_generics = 0x320,
+   .core_testbus_config = 0x32c,
+   .core_testbus_sel2_bit = 3,
+   .core_testbus_ena = (1 << 31),
+   .core_testbus_sel2 = (1 << 3),
+   .core_pwrctl_status = 0x240,
+   .core_pwrctl_mask = 0x244,
+   .core_pwrctl_clear = 0x248,
+   .core_pwrctl_ctl = 0x24c,
+   .core_sdcc_debug_reg = 0x358,
+   .core_dll_config = 0x200,
+   .core_dll_status = 0x208,
+   .core_vendor_spec = 0x20c,
+   .core_vendor_spec_adma_err_addr0 = 0x214,
+   .core_vendor_spec_adma_err_addr1 = 0x218,
+   .core_vendor_spec_func2 = 0x210,
+   .core_vendor_spec_capabilities0 = 0x21c,
+   .core_ddr_200_cfg = 0x224,
+   .core_vendor_spec3 = 0x250,
+   .core_dll_config_2 = 0x254,
+   .core_ddr_config = 0x258,
+   .core_ddr_config_2 = 0x25c,
+};
+
+static const struct sdhci_msm_offset sdhci_msm_mci_offset = {
+   .core_hc_mode = 0x78,
+   .core_mci_data_cnt = 0x30,
+   .core_mci_status = 0x34,
+   .core_mci_fifo_cnt = 0x44,
+   .core_mci_version = 0x050,
+   .core_generics = 0x70,
+   .core_testbus_config = 0x0cc,
+   .core_testbus_sel2_bit = 4,
+   .core_testbus_ena = (1 << 3),
+   .core_testbus_sel2 = (1 << 4),
+   .core_pwrctl_status = 0xdc,
+   .core_pwrctl_mask = 0xe0,
+   .core_pwrctl_clear = 0xe4,
+   .core_pwrctl_ctl = 0xe8,
+   .core_sdcc_debug_reg = 0x124,
+   .core_dll_config = 0x100,
+   .core_dll_status = 0x108,
+   .core_vendor_spec = 0x10c,
+   .core_vendor_spec_adma_err_addr0 = 0x114,
+   .core_vendor_spec_adma_err_addr1 = 0x118,
+   .core_vendor_spec_func2 = 0x110,
+   .core_vendor_spec_capabilities0 = 0x11c,
+   .core_ddr_200_cfg = 0x184,
+   .core_vendor_spec3 = 0x1b0,
+   .core_dll_config_2 = 0x1b4,
+   .core_ddr_config = 0x1b8,
+   .core_ddr_config_2 = 0x1bc,
+};
+
 struct sdhci_msm_host {
struct platform_device *pdev;
void __iomem *core_mem; /* MSM SDCC mapped address */
-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH V3 0/4] Changes for SDCC5 version

2018-06-18 Thread Vijay Viswanath
With SDCC5, the MCI register space got removed and the offset/order of
several registers have changed. Based on SDCC version used and the register,
we need to pick the base address and offset.

Depends on patch series: "[PATCH V5 0/2] mmc: sdhci-msm: Configuring IO_PAD 
support for sdhci-msm"

Changes since RFC:
Dropped voltage regulator changes in sdhci-msm
Split the "Register changes for sdcc V5" patch
Instead of checking mci removal for deciding which base addr to use,
new function pointers are defined for the 2 variants of sdcc: 
1) MCI present
2) V5 (mci removed)
Instead of string comparing with the compatible string from DT file,
the sdhci_msm_probe will now pick the data associated with the
compatible entry and use it to load variant specific address offsets
and msm variant specific read/write ops.

Changes since V1:
Removed unused msm_reab & msm_writeb APIs
Changed certain register addresses from uppercase to lowercase hex
letters
Removed extra lines and spaces
Split "[PATCH V1 0/3] Changes for SDCC5 version" patch into two,
one for Documentation and other for the driver changes.

Changes since V2:
Used lower case for macro function defenitions
Removed unused function pointers for msm_readb & msm_writeb


Sayali Lokhande (3):
  mmc: sdhci-msm: Define new Register address map
  Documentation: sdhci-msm: Add new compatible string for SDCC v5
  mmc: host: Register changes for sdcc V5

Vijay Viswanath (1):
  mmc: sdhci-msm: Add msm version specific ops and data structures

 .../devicetree/bindings/mmc/sdhci-msm.txt  |   7 +-
 drivers/mmc/host/sdhci-msm.c   | 511 -
 2 files changed, 391 insertions(+), 127 deletions(-)

-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH V3 1/4] mmc: sdhci-msm: Define new Register address map

2018-06-18 Thread Vijay Viswanath
From: Sayali Lokhande 

For SDCC version 5.0.0, MCI registers are removed from SDCC
interface and some registers are moved to HC.
Define a new data structure where we can statically define
the address offsets for the registers in different SDCC versions.

Signed-off-by: Sayali Lokhande 
Signed-off-by: Vijay Viswanath 
Reviewed-by: Evan Green 
Acked-by: Adrian Hunter 
---
 drivers/mmc/host/sdhci-msm.c | 89 
 1 file changed, 89 insertions(+)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index bb11916..4050c99 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -137,6 +137,95 @@
 /* Timeout value to avoid infinite waiting for pwr_irq */
 #define MSM_PWR_IRQ_TIMEOUT_MS 5000
 
+struct sdhci_msm_offset {
+   u32 core_hc_mode;
+   u32 core_mci_data_cnt;
+   u32 core_mci_status;
+   u32 core_mci_fifo_cnt;
+   u32 core_mci_version;
+   u32 core_generics;
+   u32 core_testbus_config;
+   u32 core_testbus_sel2_bit;
+   u32 core_testbus_ena;
+   u32 core_testbus_sel2;
+   u32 core_pwrctl_status;
+   u32 core_pwrctl_mask;
+   u32 core_pwrctl_clear;
+   u32 core_pwrctl_ctl;
+   u32 core_sdcc_debug_reg;
+   u32 core_dll_config;
+   u32 core_dll_status;
+   u32 core_vendor_spec;
+   u32 core_vendor_spec_adma_err_addr0;
+   u32 core_vendor_spec_adma_err_addr1;
+   u32 core_vendor_spec_func2;
+   u32 core_vendor_spec_capabilities0;
+   u32 core_ddr_200_cfg;
+   u32 core_vendor_spec3;
+   u32 core_dll_config_2;
+   u32 core_ddr_config;
+   u32 core_ddr_config_2;
+};
+
+static const struct sdhci_msm_offset sdhci_msm_v5_offset = {
+   .core_mci_data_cnt = 0x35c,
+   .core_mci_status = 0x324,
+   .core_mci_fifo_cnt = 0x308,
+   .core_mci_version = 0x318,
+   .core_generics = 0x320,
+   .core_testbus_config = 0x32c,
+   .core_testbus_sel2_bit = 3,
+   .core_testbus_ena = (1 << 31),
+   .core_testbus_sel2 = (1 << 3),
+   .core_pwrctl_status = 0x240,
+   .core_pwrctl_mask = 0x244,
+   .core_pwrctl_clear = 0x248,
+   .core_pwrctl_ctl = 0x24c,
+   .core_sdcc_debug_reg = 0x358,
+   .core_dll_config = 0x200,
+   .core_dll_status = 0x208,
+   .core_vendor_spec = 0x20c,
+   .core_vendor_spec_adma_err_addr0 = 0x214,
+   .core_vendor_spec_adma_err_addr1 = 0x218,
+   .core_vendor_spec_func2 = 0x210,
+   .core_vendor_spec_capabilities0 = 0x21c,
+   .core_ddr_200_cfg = 0x224,
+   .core_vendor_spec3 = 0x250,
+   .core_dll_config_2 = 0x254,
+   .core_ddr_config = 0x258,
+   .core_ddr_config_2 = 0x25c,
+};
+
+static const struct sdhci_msm_offset sdhci_msm_mci_offset = {
+   .core_hc_mode = 0x78,
+   .core_mci_data_cnt = 0x30,
+   .core_mci_status = 0x34,
+   .core_mci_fifo_cnt = 0x44,
+   .core_mci_version = 0x050,
+   .core_generics = 0x70,
+   .core_testbus_config = 0x0cc,
+   .core_testbus_sel2_bit = 4,
+   .core_testbus_ena = (1 << 3),
+   .core_testbus_sel2 = (1 << 4),
+   .core_pwrctl_status = 0xdc,
+   .core_pwrctl_mask = 0xe0,
+   .core_pwrctl_clear = 0xe4,
+   .core_pwrctl_ctl = 0xe8,
+   .core_sdcc_debug_reg = 0x124,
+   .core_dll_config = 0x100,
+   .core_dll_status = 0x108,
+   .core_vendor_spec = 0x10c,
+   .core_vendor_spec_adma_err_addr0 = 0x114,
+   .core_vendor_spec_adma_err_addr1 = 0x118,
+   .core_vendor_spec_func2 = 0x110,
+   .core_vendor_spec_capabilities0 = 0x11c,
+   .core_ddr_200_cfg = 0x184,
+   .core_vendor_spec3 = 0x1b0,
+   .core_dll_config_2 = 0x1b4,
+   .core_ddr_config = 0x1b8,
+   .core_ddr_config_2 = 0x1bc,
+};
+
 struct sdhci_msm_host {
struct platform_device *pdev;
void __iomem *core_mem; /* MSM SDCC mapped address */
-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH V3 0/4] Changes for SDCC5 version

2018-06-18 Thread Vijay Viswanath
With SDCC5, the MCI register space got removed and the offset/order of
several registers have changed. Based on SDCC version used and the register,
we need to pick the base address and offset.

Depends on patch series: "[PATCH V5 0/2] mmc: sdhci-msm: Configuring IO_PAD 
support for sdhci-msm"

Changes since RFC:
Dropped voltage regulator changes in sdhci-msm
Split the "Register changes for sdcc V5" patch
Instead of checking mci removal for deciding which base addr to use,
new function pointers are defined for the 2 variants of sdcc: 
1) MCI present
2) V5 (mci removed)
Instead of string comparing with the compatible string from DT file,
the sdhci_msm_probe will now pick the data associated with the
compatible entry and use it to load variant specific address offsets
and msm variant specific read/write ops.

Changes since V1:
Removed unused msm_reab & msm_writeb APIs
Changed certain register addresses from uppercase to lowercase hex
letters
Removed extra lines and spaces
Split "[PATCH V1 0/3] Changes for SDCC5 version" patch into two,
one for Documentation and other for the driver changes.

Changes since V2:
Used lower case for macro function defenitions
Removed unused function pointers for msm_readb & msm_writeb


Sayali Lokhande (3):
  mmc: sdhci-msm: Define new Register address map
  Documentation: sdhci-msm: Add new compatible string for SDCC v5
  mmc: host: Register changes for sdcc V5

Vijay Viswanath (1):
  mmc: sdhci-msm: Add msm version specific ops and data structures

 .../devicetree/bindings/mmc/sdhci-msm.txt  |   7 +-
 drivers/mmc/host/sdhci-msm.c   | 511 -
 2 files changed, 391 insertions(+), 127 deletions(-)

-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[RFC PATCH v2 2/6] mtd: rawnand: add manufacturer fixup for ONFI parameter page

2018-06-18 Thread Chris Packham
This is called after the ONFI parameter page checksum is verified
and allows us to override the contents of the parameter page.

Suggested-by: Boris Brezillon 
Signed-off-by: Chris Packham 
---
Changes in v2:
- New

 drivers/mtd/nand/raw/nand_base.c | 4 
 include/linux/mtd/rawnand.h  | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 0cd3e216b95c..65250308c82d 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5172,6 +5172,10 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
}
}
 
+   if (chip->manufacturer.desc && chip->manufacturer.desc->ops &&
+   chip->manufacturer.desc->ops->fixup_onfi_param_page)
+   chip->manufacturer.desc->ops->fixup_onfi_param_page(chip, p);
+
/* Check version */
val = le16_to_cpu(p->revision);
if (val & (1 << 5))
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 3e8ec3b8a39c..6db42091ee5e 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -783,6 +783,7 @@ struct nand_manufacturer_ops {
void (*detect)(struct nand_chip *chip);
int (*init)(struct nand_chip *chip);
void (*cleanup)(struct nand_chip *chip);
+   void (*fixup_onfi_param_page)(struct nand_chip *chip, struct 
nand_onfi_params *p);
 };
 
 /**
-- 
2.17.1



[RFC PATCH v2 2/6] mtd: rawnand: add manufacturer fixup for ONFI parameter page

2018-06-18 Thread Chris Packham
This is called after the ONFI parameter page checksum is verified
and allows us to override the contents of the parameter page.

Suggested-by: Boris Brezillon 
Signed-off-by: Chris Packham 
---
Changes in v2:
- New

 drivers/mtd/nand/raw/nand_base.c | 4 
 include/linux/mtd/rawnand.h  | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 0cd3e216b95c..65250308c82d 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5172,6 +5172,10 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
}
}
 
+   if (chip->manufacturer.desc && chip->manufacturer.desc->ops &&
+   chip->manufacturer.desc->ops->fixup_onfi_param_page)
+   chip->manufacturer.desc->ops->fixup_onfi_param_page(chip, p);
+
/* Check version */
val = le16_to_cpu(p->revision);
if (val & (1 << 5))
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 3e8ec3b8a39c..6db42091ee5e 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -783,6 +783,7 @@ struct nand_manufacturer_ops {
void (*detect)(struct nand_chip *chip);
int (*init)(struct nand_chip *chip);
void (*cleanup)(struct nand_chip *chip);
+   void (*fixup_onfi_param_page)(struct nand_chip *chip, struct 
nand_onfi_params *p);
 };
 
 /**
-- 
2.17.1



[RFC PATCH v2 1/6] mtd: rawnand: marvell: Handle on-die ECC

2018-06-18 Thread Chris Packham
>From the controllers point of view this is the same as no or
software only ECC.

Signed-off-by: Chris Packham 
---
Changes in v2:
- New

 drivers/mtd/nand/raw/marvell_nand.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/nand/raw/marvell_nand.c 
b/drivers/mtd/nand/raw/marvell_nand.c
index ebb1d141b900..ba6889bbe802 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -2157,6 +2157,7 @@ static int marvell_nand_ecc_init(struct mtd_info *mtd,
break;
case NAND_ECC_NONE:
case NAND_ECC_SOFT:
+   case NAND_ECC_ON_DIE:
if (!nfc->caps->is_nfcv2 && mtd->writesize != SZ_512 &&
mtd->writesize != SZ_2K) {
dev_err(nfc->dev, "NFCv1 cannot write %d bytes pages\n",
-- 
2.17.1



[RFC PATCH v2 3/6] mtd: rawnand: micron: add fixup for ONFI revision

2018-06-18 Thread Chris Packham
Some Micron NAND chips (MT29F1G08ABAFAWP-ITE:F) report 00 00 for the
revision number field of the ONFI parameter page. Rather than rejecting
these outright assume ONFI version 1.0 if the revision number is 00 00.

Signed-off-by: Chris Packham 
---
This is now qualified on vendor == MICRON. I haven't qualified this
based on specific chips the ABAFA (id=d1) and ABBFA (id=a1) variants are
documented to have this behaviour.

Changes in v2:
- use fixup_onfi_param_page

 drivers/mtd/nand/raw/nand_micron.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/mtd/nand/raw/nand_micron.c 
b/drivers/mtd/nand/raw/nand_micron.c
index 0af45b134c0c..e582c9e61715 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -287,6 +287,14 @@ static int micron_nand_init(struct nand_chip *chip)
return 0;
 }
 
+static void micron_fixup_onfi_param_page(struct nand_chip *chip,
+struct nand_onfi_params *p)
+{
+   if (le16_to_cpu(p->revision) == 0)
+   p->revision = cpu_to_le16(1 << 1);
+}
+
 const struct nand_manufacturer_ops micron_nand_manuf_ops = {
.init = micron_nand_init,
+   .fixup_onfi_param_page = micron_fixup_onfi_param_page,
 };
-- 
2.17.1



[RFC PATCH v2 1/6] mtd: rawnand: marvell: Handle on-die ECC

2018-06-18 Thread Chris Packham
>From the controllers point of view this is the same as no or
software only ECC.

Signed-off-by: Chris Packham 
---
Changes in v2:
- New

 drivers/mtd/nand/raw/marvell_nand.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/nand/raw/marvell_nand.c 
b/drivers/mtd/nand/raw/marvell_nand.c
index ebb1d141b900..ba6889bbe802 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -2157,6 +2157,7 @@ static int marvell_nand_ecc_init(struct mtd_info *mtd,
break;
case NAND_ECC_NONE:
case NAND_ECC_SOFT:
+   case NAND_ECC_ON_DIE:
if (!nfc->caps->is_nfcv2 && mtd->writesize != SZ_512 &&
mtd->writesize != SZ_2K) {
dev_err(nfc->dev, "NFCv1 cannot write %d bytes pages\n",
-- 
2.17.1



[RFC PATCH v2 3/6] mtd: rawnand: micron: add fixup for ONFI revision

2018-06-18 Thread Chris Packham
Some Micron NAND chips (MT29F1G08ABAFAWP-ITE:F) report 00 00 for the
revision number field of the ONFI parameter page. Rather than rejecting
these outright assume ONFI version 1.0 if the revision number is 00 00.

Signed-off-by: Chris Packham 
---
This is now qualified on vendor == MICRON. I haven't qualified this
based on specific chips the ABAFA (id=d1) and ABBFA (id=a1) variants are
documented to have this behaviour.

Changes in v2:
- use fixup_onfi_param_page

 drivers/mtd/nand/raw/nand_micron.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/mtd/nand/raw/nand_micron.c 
b/drivers/mtd/nand/raw/nand_micron.c
index 0af45b134c0c..e582c9e61715 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -287,6 +287,14 @@ static int micron_nand_init(struct nand_chip *chip)
return 0;
 }
 
+static void micron_fixup_onfi_param_page(struct nand_chip *chip,
+struct nand_onfi_params *p)
+{
+   if (le16_to_cpu(p->revision) == 0)
+   p->revision = cpu_to_le16(1 << 1);
+}
+
 const struct nand_manufacturer_ops micron_nand_manuf_ops = {
.init = micron_nand_init,
+   .fixup_onfi_param_page = micron_fixup_onfi_param_page,
 };
-- 
2.17.1



[RFC PATCH v2 0/6] mtd: rawnand: support MT29F1G08ABAFAWP-ITE:F

2018-06-18 Thread Chris Packham
Hi,

I'm looking at adding support for the Micron MT29F1G08ABAFAWP-ITE:F chip
to one of our boards which uses the Marvell NFCv2 controller.

This particular chip is a bit odd in that the datasheet states support
for ONFI 1.0 but the revision number field is 00 00. It also is marked
ABAFA but reports internally as ABAGA. Finally it has internal 8-bit ECC
which cannot be disabled.

The existing test in micron_supports_on_die_ecc() determines that on-die
ECC is supported but not mandatory but I know for this chip it is
mandatory despite what set_features returns.

In order for this to work I need to set nand-ecc-mode = "on-die" in my
dts. Ideally I'd like it to be automatic based on what the hardware can
support but that may be asking too much at the moment.

I think I need to do some re-testing on other Micron parts so this
series is still RFC. Although the first 3 patches are probably good to
go.

Here's a dump of the parameter page from the chip I have

: 4f 4e 46 49 00 00 18 00 3f 00 00 00 00 00 00 00 ONFI?...
0010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
0020: 4d 49 43 52 4f 4e 20 20 20 20 20 20 4d 54 32 39  MICRON MT29
0030: 46 31 47 30 38 41 42 41 47 41 57 50 20 20 20 20  F1G08ABAGAWP
0040: 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ,...
0050: 00 08 00 00 80 00 00 02 00 00 20 00 40 00 00 00  ..  .@...
0060: 00 04 00 00 01 22 01 14 00 01 05 08 00 00 04 00 ."..
0070: 08 01 0e 00 00 00 00 00 00 00 00 00 00 00 00 00 
0080: 08 3f 00 3f 00 58 02 10 27 46 00 64 00 00 00 00 .?.?.X..'F.d
0090: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
00a0: 00 00 00 00 01 00 00 00 00 02 04 80 01 81 04 03 
00b0: 02 01 1e 90 00 00 00 00 00 00 00 00 00 00 00 00 
00c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
00d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
00e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
00f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 85 a6 

Chris Packham (6):
  mtd: rawnand: marvell: Handle on-die ECC
  mtd: rawnand: add manufacturer fixup for ONFI parameter page
  mtd: rawnand: micron: add fixup for ONFI revision
  mtd: rawnand: marvell: Support page size of 2048 with 8-bit ECC
  mtd: rawnand: micron: add ONFI_FEATURE_ON_DIE_ECC to supported
features
  mtd: rawnand: micron: support 8/512 on-die ECC

 drivers/mtd/nand/raw/marvell_nand.c |  2 ++
 drivers/mtd/nand/raw/nand_base.c|  4 
 drivers/mtd/nand/raw/nand_micron.c  | 18 ++
 include/linux/mtd/rawnand.h |  1 +
 4 files changed, 21 insertions(+), 4 deletions(-)

-- 
2.17.1



[RFC PATCH v2 0/6] mtd: rawnand: support MT29F1G08ABAFAWP-ITE:F

2018-06-18 Thread Chris Packham
Hi,

I'm looking at adding support for the Micron MT29F1G08ABAFAWP-ITE:F chip
to one of our boards which uses the Marvell NFCv2 controller.

This particular chip is a bit odd in that the datasheet states support
for ONFI 1.0 but the revision number field is 00 00. It also is marked
ABAFA but reports internally as ABAGA. Finally it has internal 8-bit ECC
which cannot be disabled.

The existing test in micron_supports_on_die_ecc() determines that on-die
ECC is supported but not mandatory but I know for this chip it is
mandatory despite what set_features returns.

In order for this to work I need to set nand-ecc-mode = "on-die" in my
dts. Ideally I'd like it to be automatic based on what the hardware can
support but that may be asking too much at the moment.

I think I need to do some re-testing on other Micron parts so this
series is still RFC. Although the first 3 patches are probably good to
go.

Here's a dump of the parameter page from the chip I have

: 4f 4e 46 49 00 00 18 00 3f 00 00 00 00 00 00 00 ONFI?...
0010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
0020: 4d 49 43 52 4f 4e 20 20 20 20 20 20 4d 54 32 39  MICRON MT29
0030: 46 31 47 30 38 41 42 41 47 41 57 50 20 20 20 20  F1G08ABAGAWP
0040: 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ,...
0050: 00 08 00 00 80 00 00 02 00 00 20 00 40 00 00 00  ..  .@...
0060: 00 04 00 00 01 22 01 14 00 01 05 08 00 00 04 00 ."..
0070: 08 01 0e 00 00 00 00 00 00 00 00 00 00 00 00 00 
0080: 08 3f 00 3f 00 58 02 10 27 46 00 64 00 00 00 00 .?.?.X..'F.d
0090: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
00a0: 00 00 00 00 01 00 00 00 00 02 04 80 01 81 04 03 
00b0: 02 01 1e 90 00 00 00 00 00 00 00 00 00 00 00 00 
00c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
00d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
00e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
00f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 85 a6 

Chris Packham (6):
  mtd: rawnand: marvell: Handle on-die ECC
  mtd: rawnand: add manufacturer fixup for ONFI parameter page
  mtd: rawnand: micron: add fixup for ONFI revision
  mtd: rawnand: marvell: Support page size of 2048 with 8-bit ECC
  mtd: rawnand: micron: add ONFI_FEATURE_ON_DIE_ECC to supported
features
  mtd: rawnand: micron: support 8/512 on-die ECC

 drivers/mtd/nand/raw/marvell_nand.c |  2 ++
 drivers/mtd/nand/raw/nand_base.c|  4 
 drivers/mtd/nand/raw/nand_micron.c  | 18 ++
 include/linux/mtd/rawnand.h |  1 +
 4 files changed, 21 insertions(+), 4 deletions(-)

-- 
2.17.1



[RFC PATCH v2 6/6] mtd: rawnand: micron: support 8/512 on-die ECC

2018-06-18 Thread Chris Packham
Micron MT29F1G08ABAFAWP-ITE:F supports an on-die ECC with 8 bits
per 512 bytes. Add support for this combination.

Signed-off-by: Chris Packham 
---
This seems deceptively easy so I've probably missed something. I have
tested with running some of the ubifs stress tests from mtd-utils and
things seem OK.

Changes in v2:
- New

 drivers/mtd/nand/raw/nand_micron.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/nand/raw/nand_micron.c 
b/drivers/mtd/nand/raw/nand_micron.c
index d1e8f57544a0..2164dd112f5c 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -240,9 +240,9 @@ static int micron_supports_on_die_ecc(struct nand_chip 
*chip)
 
/*
 * Some Micron NANDs have an on-die ECC of 4/512, some other
-* 8/512. We only support the former.
+* 8/512.
 */
-   if (chip->ecc_strength_ds != 4)
+   if (chip->ecc_strength_ds != 4 && chip->ecc_strength_ds != 8)
return MICRON_ON_DIE_UNSUPPORTED;
 
return MICRON_ON_DIE_SUPPORTED;
@@ -274,9 +274,9 @@ static int micron_nand_init(struct nand_chip *chip)
return -EINVAL;
}
 
-   chip->ecc.bytes = 8;
+   chip->ecc.bytes = chip->ecc_strength_ds * 2;
chip->ecc.size = 512;
-   chip->ecc.strength = 4;
+   chip->ecc.strength = chip->ecc_strength_ds;
chip->ecc.algo = NAND_ECC_BCH;
chip->ecc.read_page = micron_nand_read_page_on_die_ecc;
chip->ecc.write_page = micron_nand_write_page_on_die_ecc;
-- 
2.17.1



[RFC PATCH v2 6/6] mtd: rawnand: micron: support 8/512 on-die ECC

2018-06-18 Thread Chris Packham
Micron MT29F1G08ABAFAWP-ITE:F supports an on-die ECC with 8 bits
per 512 bytes. Add support for this combination.

Signed-off-by: Chris Packham 
---
This seems deceptively easy so I've probably missed something. I have
tested with running some of the ubifs stress tests from mtd-utils and
things seem OK.

Changes in v2:
- New

 drivers/mtd/nand/raw/nand_micron.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/nand/raw/nand_micron.c 
b/drivers/mtd/nand/raw/nand_micron.c
index d1e8f57544a0..2164dd112f5c 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -240,9 +240,9 @@ static int micron_supports_on_die_ecc(struct nand_chip 
*chip)
 
/*
 * Some Micron NANDs have an on-die ECC of 4/512, some other
-* 8/512. We only support the former.
+* 8/512.
 */
-   if (chip->ecc_strength_ds != 4)
+   if (chip->ecc_strength_ds != 4 && chip->ecc_strength_ds != 8)
return MICRON_ON_DIE_UNSUPPORTED;
 
return MICRON_ON_DIE_SUPPORTED;
@@ -274,9 +274,9 @@ static int micron_nand_init(struct nand_chip *chip)
return -EINVAL;
}
 
-   chip->ecc.bytes = 8;
+   chip->ecc.bytes = chip->ecc_strength_ds * 2;
chip->ecc.size = 512;
-   chip->ecc.strength = 4;
+   chip->ecc.strength = chip->ecc_strength_ds;
chip->ecc.algo = NAND_ECC_BCH;
chip->ecc.read_page = micron_nand_read_page_on_die_ecc;
chip->ecc.write_page = micron_nand_write_page_on_die_ecc;
-- 
2.17.1



[RFC PATCH v2 4/6] mtd: rawnand: marvell: Support page size of 2048 with 8-bit ECC

2018-06-18 Thread Chris Packham
The MT29F1G08ABAFAWP-ITE:F chip has 2048 byte pages and requires a
minimum ECC strength of 8-bits. Allow for this combination of
requirements using the marvell_nand controller.

Signed-off-by: Chris Packham 
---
I've tried to follow the recommended AN-379 from Marvell. They do seem
to have information that covers this particular set of chip
requirements.

As discussed I don't think my particular configuration will be supported
with this change due to the conflict with the on-die ECC. But this may
be useful for others so I've left it in.

Changes in v2:
- update as suggested by miquel

 drivers/mtd/nand/raw/marvell_nand.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/nand/raw/marvell_nand.c 
b/drivers/mtd/nand/raw/marvell_nand.c
index ba6889bbe802..96aee8ffd408 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -217,6 +217,7 @@ static const struct marvell_hw_ecc_layout 
marvell_nfc_layouts[] = {
MARVELL_LAYOUT(  512,   512,  1,  1,  1,  512,  8,  8,  0,  0,  0),
MARVELL_LAYOUT( 2048,   512,  1,  1,  1, 2048, 40, 24,  0,  0,  0),
MARVELL_LAYOUT( 2048,   512,  4,  1,  1, 2048, 32, 30,  0,  0,  0),
+   MARVELL_LAYOUT( 2048,   512,  8,  2,  1, 1024, 0, 30,  1024,  32,  30),
MARVELL_LAYOUT( 4096,   512,  4,  2,  2, 2048, 32, 30,  0,  0,  0),
MARVELL_LAYOUT( 4096,   512,  8,  5,  4, 1024,  0, 30,  0, 64, 30),
 };
-- 
2.17.1



[RFC PATCH v2 4/6] mtd: rawnand: marvell: Support page size of 2048 with 8-bit ECC

2018-06-18 Thread Chris Packham
The MT29F1G08ABAFAWP-ITE:F chip has 2048 byte pages and requires a
minimum ECC strength of 8-bits. Allow for this combination of
requirements using the marvell_nand controller.

Signed-off-by: Chris Packham 
---
I've tried to follow the recommended AN-379 from Marvell. They do seem
to have information that covers this particular set of chip
requirements.

As discussed I don't think my particular configuration will be supported
with this change due to the conflict with the on-die ECC. But this may
be useful for others so I've left it in.

Changes in v2:
- update as suggested by miquel

 drivers/mtd/nand/raw/marvell_nand.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/nand/raw/marvell_nand.c 
b/drivers/mtd/nand/raw/marvell_nand.c
index ba6889bbe802..96aee8ffd408 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -217,6 +217,7 @@ static const struct marvell_hw_ecc_layout 
marvell_nfc_layouts[] = {
MARVELL_LAYOUT(  512,   512,  1,  1,  1,  512,  8,  8,  0,  0,  0),
MARVELL_LAYOUT( 2048,   512,  1,  1,  1, 2048, 40, 24,  0,  0,  0),
MARVELL_LAYOUT( 2048,   512,  4,  1,  1, 2048, 32, 30,  0,  0,  0),
+   MARVELL_LAYOUT( 2048,   512,  8,  2,  1, 1024, 0, 30,  1024,  32,  30),
MARVELL_LAYOUT( 4096,   512,  4,  2,  2, 2048, 32, 30,  0,  0,  0),
MARVELL_LAYOUT( 4096,   512,  8,  5,  4, 1024,  0, 30,  0, 64, 30),
 };
-- 
2.17.1



[RFC PATCH v2 5/6] mtd: rawnand: micron: add ONFI_FEATURE_ON_DIE_ECC to supported features

2018-06-18 Thread Chris Packham
Add ONFI_FEATURE_ON_DIE_ECC to the set/get features list for Micron
NAND flash.

Signed-off-by: Chris Packham 
---
Changes in v2:
- New

 drivers/mtd/nand/raw/nand_micron.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mtd/nand/raw/nand_micron.c 
b/drivers/mtd/nand/raw/nand_micron.c
index e582c9e61715..d1e8f57544a0 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -66,7 +66,9 @@ static int micron_nand_onfi_init(struct nand_chip *chip)
 
if (p->supports_set_get_features) {
set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->set_feature_list);
+   set_bit(ONFI_FEATURE_ON_DIE_ECC, p->set_feature_list);
set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->get_feature_list);
+   set_bit(ONFI_FEATURE_ON_DIE_ECC, p->get_feature_list);
}
 
return 0;
-- 
2.17.1



[RFC PATCH v2 5/6] mtd: rawnand: micron: add ONFI_FEATURE_ON_DIE_ECC to supported features

2018-06-18 Thread Chris Packham
Add ONFI_FEATURE_ON_DIE_ECC to the set/get features list for Micron
NAND flash.

Signed-off-by: Chris Packham 
---
Changes in v2:
- New

 drivers/mtd/nand/raw/nand_micron.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mtd/nand/raw/nand_micron.c 
b/drivers/mtd/nand/raw/nand_micron.c
index e582c9e61715..d1e8f57544a0 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -66,7 +66,9 @@ static int micron_nand_onfi_init(struct nand_chip *chip)
 
if (p->supports_set_get_features) {
set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->set_feature_list);
+   set_bit(ONFI_FEATURE_ON_DIE_ECC, p->set_feature_list);
set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->get_feature_list);
+   set_bit(ONFI_FEATURE_ON_DIE_ECC, p->get_feature_list);
}
 
return 0;
-- 
2.17.1



Re: [RESEND PATCH v12 2/2] sched/rt: Add support for SD_PREFER_SIBLING on find_lowest_rq()

2018-06-18 Thread Byungchul Park
On Tue, Jun 19, 2018 at 6:42 AM, Steven Rostedt  wrote:
> On Mon, 18 Jun 2018 13:58:09 +0900
> Byungchul Park  wrote:
>
>> Hello Steven,
>>
>> I've changed the code a little bit to avoid a compile warning caused by
>> 'const' args of find_cpu(). Can I keep your Reviewed-by?
>>
>> BEFORE:
>> static int find_cpu(const struct cpumask *mask,
>>   const struct sched_domain *sd,
>>   const struct sched_domain *prefer)
>>
>> AFTER:
>> static int find_cpu(const struct cpumask *mask,
>>   struct sched_domain *sd,
>>   struct sched_domain *prefer)
>>
>> (I temporarily removed the Reviewed-by you gave me.)
>> Reviewed-by: Steven Rostedt (VMware) 
>
> I would fix sched_domain_span() to take a constant and keep the
> previous patch.

Right. I also considered it like you and asked it here:

   https://lkml.org/lkml/2018/1/11/106

But I didn't get any answer so tried to keep sched_domain_span()
unchanged conservatively.

Peterz, what's your opinion?

>
> -- Steve

-- 
Thanks,
Byungchul


Re: [PATCH v11 03/10] drivers: qcom: rpmh-rsc: log RPMH requests in FTRACE

2018-06-18 Thread Bjorn Andersson
On Mon 18 Jun 06:37 PDT 2018, Raju P L S S S N wrote:
> diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
> index 39d3a05..cb6300f 100644
> --- a/drivers/soc/qcom/Makefile
> +++ b/drivers/soc/qcom/Makefile
> @@ -1,4 +1,5 @@
>  # SPDX-License-Identifier: GPL-2.0
> +CFLAGS_rpmh-rsc.o := -I$(src)

Please rebase v12 onto v4.18-rc1, to make sure that the series apply
cleanly.

Regards,
Bjorn


Re: [RESEND PATCH v12 2/2] sched/rt: Add support for SD_PREFER_SIBLING on find_lowest_rq()

2018-06-18 Thread Byungchul Park
On Tue, Jun 19, 2018 at 6:42 AM, Steven Rostedt  wrote:
> On Mon, 18 Jun 2018 13:58:09 +0900
> Byungchul Park  wrote:
>
>> Hello Steven,
>>
>> I've changed the code a little bit to avoid a compile warning caused by
>> 'const' args of find_cpu(). Can I keep your Reviewed-by?
>>
>> BEFORE:
>> static int find_cpu(const struct cpumask *mask,
>>   const struct sched_domain *sd,
>>   const struct sched_domain *prefer)
>>
>> AFTER:
>> static int find_cpu(const struct cpumask *mask,
>>   struct sched_domain *sd,
>>   struct sched_domain *prefer)
>>
>> (I temporarily removed the Reviewed-by you gave me.)
>> Reviewed-by: Steven Rostedt (VMware) 
>
> I would fix sched_domain_span() to take a constant and keep the
> previous patch.

Right. I also considered it like you and asked it here:

   https://lkml.org/lkml/2018/1/11/106

But I didn't get any answer so tried to keep sched_domain_span()
unchanged conservatively.

Peterz, what's your opinion?

>
> -- Steve

-- 
Thanks,
Byungchul


Re: [PATCH v11 03/10] drivers: qcom: rpmh-rsc: log RPMH requests in FTRACE

2018-06-18 Thread Bjorn Andersson
On Mon 18 Jun 06:37 PDT 2018, Raju P L S S S N wrote:
> diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
> index 39d3a05..cb6300f 100644
> --- a/drivers/soc/qcom/Makefile
> +++ b/drivers/soc/qcom/Makefile
> @@ -1,4 +1,5 @@
>  # SPDX-License-Identifier: GPL-2.0
> +CFLAGS_rpmh-rsc.o := -I$(src)

Please rebase v12 onto v4.18-rc1, to make sure that the series apply
cleanly.

Regards,
Bjorn


[PATCH 2/3] fs: fsnotify: account fsnotify metadata to kmemcg

2018-06-18 Thread Shakeel Butt
A lot of memory can be consumed by the events generated for the huge or
unlimited queues if there is either no or slow listener.  This can cause
system level memory pressure or OOMs.  So, it's better to account the
fsnotify kmem caches to the memcg of the listener.

There are seven fsnotify kmem caches and among them allocations from
dnotify_struct_cache, dnotify_mark_cache, fanotify_mark_cache and
inotify_inode_mark_cachep happens in the context of syscall from the
listener.  So, SLAB_ACCOUNT is enough for these caches.

The objects from fsnotify_mark_connector_cachep are not accounted as they
are small compared to the notification mark or events and it is unclear
whom to account connector to since it is shared by all events attached to
the inode.

The allocations from the event caches happen in the context of the event
producer.  For such caches we will need to remote charge the allocations
to the listener's memcg.  Thus we save the memcg reference in the
fsnotify_group structure of the listener.

This patch has also moved the members of fsnotify_group to keep the size
same, at least for 64 bit build, even with additional member by filling
the holes.

Signed-off-by: Shakeel Butt 
Acked-by: Jan Kara 
Cc: Michal Hocko 
Cc: Amir Goldstein 
Cc: Christoph Lameter 
Cc: Pekka Enberg 
Cc: David Rientjes 
Cc: Joonsoo Kim 
Cc: Greg Thelen 
Cc: Johannes Weiner 
Cc: Vladimir Davydov 
Cc: Mel Gorman 
Cc: Vlastimil Babka 
Cc: Alexander Viro 
Cc: Andrew Morton 
---
Changelog since v5:
- None

Changelog since v4:
- Fixed the build for CONFIG_MEMCG=n

Changelog since v3:
- Rebased over Jan's patches.
- Some cleanup based on Amir's comments.

Changelog since v2:
- None

Changelog since v1:
- no more charging fsnotify_mark_connector objects
- Fixed the build for SLOB

 fs/notify/dnotify/dnotify.c  |  5 +++--
 fs/notify/fanotify/fanotify.c|  6 --
 fs/notify/fanotify/fanotify_user.c   |  5 -
 fs/notify/group.c|  6 ++
 fs/notify/inotify/inotify_fsnotify.c |  2 +-
 fs/notify/inotify/inotify_user.c |  5 -
 include/linux/fsnotify_backend.h | 12 
 include/linux/memcontrol.h   |  7 +++
 mm/memcontrol.c  | 15 +--
 9 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index e2bea2ac5dfb..a6365e6bc047 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -384,8 +384,9 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned 
long arg)
 
 static int __init dnotify_init(void)
 {
-   dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
-   dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
+   dnotify_struct_cache = KMEM_CACHE(dnotify_struct,
+ SLAB_PANIC|SLAB_ACCOUNT);
+   dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
 
dnotify_group = fsnotify_alloc_group(_fsnotify_ops);
if (IS_ERR(dnotify_group))
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f90842efea13..c8d6e37a4855 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -154,14 +154,16 @@ struct fanotify_event_info *fanotify_alloc_event(struct 
fsnotify_group *group,
if (fanotify_is_perm_event(mask)) {
struct fanotify_perm_event_info *pevent;
 
-   pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
+   pevent = kmem_cache_alloc_memcg(fanotify_perm_event_cachep, gfp,
+   group->memcg);
if (!pevent)
return NULL;
event = >fae;
pevent->response = 0;
goto init;
}
-   event = kmem_cache_alloc(fanotify_event_cachep, gfp);
+   event = kmem_cache_alloc_memcg(fanotify_event_cachep, gfp,
+  group->memcg);
if (!event)
return NULL;
 init: __maybe_unused
diff --git a/fs/notify/fanotify/fanotify_user.c 
b/fs/notify/fanotify/fanotify_user.c
index ec4d8c59d0e3..0cf45041dc32 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -756,6 +757,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, 
unsigned int, event_f_flags)
 
group->fanotify_data.user = user;
atomic_inc(>fanotify_listeners);
+   group->memcg = get_mem_cgroup_from_mm(current->mm);
 
oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL);
if (unlikely(!oevent)) {
@@ -957,7 +959,8 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
  */
 static int __init fanotify_user_setup(void)
 {
-   fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
+   fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
+

[PATCH 2/3] fs: fsnotify: account fsnotify metadata to kmemcg

2018-06-18 Thread Shakeel Butt
A lot of memory can be consumed by the events generated for the huge or
unlimited queues if there is either no or slow listener.  This can cause
system level memory pressure or OOMs.  So, it's better to account the
fsnotify kmem caches to the memcg of the listener.

There are seven fsnotify kmem caches and among them allocations from
dnotify_struct_cache, dnotify_mark_cache, fanotify_mark_cache and
inotify_inode_mark_cachep happens in the context of syscall from the
listener.  So, SLAB_ACCOUNT is enough for these caches.

The objects from fsnotify_mark_connector_cachep are not accounted as they
are small compared to the notification mark or events and it is unclear
whom to account connector to since it is shared by all events attached to
the inode.

The allocations from the event caches happen in the context of the event
producer.  For such caches we will need to remote charge the allocations
to the listener's memcg.  Thus we save the memcg reference in the
fsnotify_group structure of the listener.

This patch has also moved the members of fsnotify_group to keep the size
same, at least for 64 bit build, even with additional member by filling
the holes.

Signed-off-by: Shakeel Butt 
Acked-by: Jan Kara 
Cc: Michal Hocko 
Cc: Amir Goldstein 
Cc: Christoph Lameter 
Cc: Pekka Enberg 
Cc: David Rientjes 
Cc: Joonsoo Kim 
Cc: Greg Thelen 
Cc: Johannes Weiner 
Cc: Vladimir Davydov 
Cc: Mel Gorman 
Cc: Vlastimil Babka 
Cc: Alexander Viro 
Cc: Andrew Morton 
---
Changelog since v5:
- None

Changelog since v4:
- Fixed the build for CONFIG_MEMCG=n

Changelog since v3:
- Rebased over Jan's patches.
- Some cleanup based on Amir's comments.

Changelog since v2:
- None

Changelog since v1:
- no more charging fsnotify_mark_connector objects
- Fixed the build for SLOB

 fs/notify/dnotify/dnotify.c  |  5 +++--
 fs/notify/fanotify/fanotify.c|  6 --
 fs/notify/fanotify/fanotify_user.c   |  5 -
 fs/notify/group.c|  6 ++
 fs/notify/inotify/inotify_fsnotify.c |  2 +-
 fs/notify/inotify/inotify_user.c |  5 -
 include/linux/fsnotify_backend.h | 12 
 include/linux/memcontrol.h   |  7 +++
 mm/memcontrol.c  | 15 +--
 9 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index e2bea2ac5dfb..a6365e6bc047 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -384,8 +384,9 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned 
long arg)
 
 static int __init dnotify_init(void)
 {
-   dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
-   dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
+   dnotify_struct_cache = KMEM_CACHE(dnotify_struct,
+ SLAB_PANIC|SLAB_ACCOUNT);
+   dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
 
dnotify_group = fsnotify_alloc_group(_fsnotify_ops);
if (IS_ERR(dnotify_group))
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f90842efea13..c8d6e37a4855 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -154,14 +154,16 @@ struct fanotify_event_info *fanotify_alloc_event(struct 
fsnotify_group *group,
if (fanotify_is_perm_event(mask)) {
struct fanotify_perm_event_info *pevent;
 
-   pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
+   pevent = kmem_cache_alloc_memcg(fanotify_perm_event_cachep, gfp,
+   group->memcg);
if (!pevent)
return NULL;
event = >fae;
pevent->response = 0;
goto init;
}
-   event = kmem_cache_alloc(fanotify_event_cachep, gfp);
+   event = kmem_cache_alloc_memcg(fanotify_event_cachep, gfp,
+  group->memcg);
if (!event)
return NULL;
 init: __maybe_unused
diff --git a/fs/notify/fanotify/fanotify_user.c 
b/fs/notify/fanotify/fanotify_user.c
index ec4d8c59d0e3..0cf45041dc32 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -756,6 +757,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, 
unsigned int, event_f_flags)
 
group->fanotify_data.user = user;
atomic_inc(>fanotify_listeners);
+   group->memcg = get_mem_cgroup_from_mm(current->mm);
 
oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL);
if (unlikely(!oevent)) {
@@ -957,7 +959,8 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
  */
 static int __init fanotify_user_setup(void)
 {
-   fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
+   fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
+

[PATCH 3/3] fs, mm: account buffer_head to kmemcg

2018-06-18 Thread Shakeel Butt
The buffer_head can consume a significant amount of system memory and
is directly related to the amount of page cache. In our production
environment we have observed that a lot of machines are spending a
significant amount of memory as buffer_head and can not be left as
system memory overhead.

Charging buffer_head is not as simple as adding __GFP_ACCOUNT to the
allocation. The buffer_heads can be allocated in a memcg different from
the memcg of the page for which buffer_heads are being allocated. One
concrete example is memory reclaim. The reclaim can trigger I/O of pages
of any memcg on the system. So, the right way to charge buffer_head is
to extract the memcg from the page for which buffer_heads are being
allocated and then use targeted memcg charging API.

Signed-off-by: Shakeel Butt 
Cc: Jan Kara 
Cc: Greg Thelen 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Vladimir Davydov 
Cc: Alexander Viro 
Cc: Andrew Morton 
---
 fs/buffer.c| 14 +-
 include/linux/memcontrol.h |  7 +++
 mm/memcontrol.c| 21 +
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 8194e3049fc5..26389b7a3cab 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -815,10 +815,17 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
struct buffer_head *bh, *head;
gfp_t gfp = GFP_NOFS;
long offset;
+   struct mem_cgroup *old_memcg;
+   struct mem_cgroup *memcg = get_mem_cgroup_from_page(page);
 
if (retry)
gfp |= __GFP_NOFAIL;
 
+   if (memcg) {
+   gfp |= __GFP_ACCOUNT;
+   old_memcg = memalloc_memcg_save(memcg);
+   }
+
head = NULL;
offset = PAGE_SIZE;
while ((offset -= size) >= 0) {
@@ -835,6 +842,11 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
/* Link the buffer to its page */
set_bh_page(bh, page, offset);
}
+out:
+   if (memcg) {
+   memalloc_memcg_restore(old_memcg);
+#ifdef CONFIG_MEMCG
+   css_put(>css);
+#endif
+   }
return head;
 /*
  * In case anything failed, we just free everything we got.
@@ -848,7 +860,7 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
} while (head);
}
 
-   return NULL;
+   goto out;
 }
 EXPORT_SYMBOL_GPL(alloc_page_buffers);
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6c857be8a9b7..d53609978eb7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -380,6 +380,8 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct 
*p);
 
 struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
 
+struct mem_cgroup *get_mem_cgroup_from_page(struct page *page);
+
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -864,6 +866,11 @@ static inline struct mem_cgroup 
*get_mem_cgroup_from_mm(struct mm_struct *mm)
return NULL;
 }
 
+static inline struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
+{
+   return NULL;
+}
+
 static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c481e661e051..f9a9a79117b9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -712,6 +712,27 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct 
*mm)
return memcg;
 }
 
+/**
+ * get_mem_cgroup_from_page: Obtain a reference on given page's memcg.
+ * @page: page from which memcg should be extracted.
+ *
+ * Obtain a reference on page->memcg and returns it if successful. Otherwise
+ * NULL is returned.
+ */
+struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
+{
+   struct mem_cgroup *memcg = page->mem_cgroup;
+
+   if (mem_cgroup_disabled() || !memcg)
+   return NULL;
+
+   rcu_read_lock();
+   if (!css_tryget_online(>css))
+   memcg = NULL;
+   rcu_read_unlock();
+   return memcg;
+}
+
 static __always_inline struct mem_cgroup *get_mem_cgroup(
struct mem_cgroup *memcg, struct mm_struct *mm)
 {
-- 
2.18.0.rc1.244.gcf134e6275-goog



[PATCH 1/3] mm: memcg: remote memcg charging for kmem allocations

2018-06-18 Thread Shakeel Butt
Introduce the memcg variant for kmalloc[_node] and
kmem_cache_alloc[_node]. For kmem_cache_alloc, the kernel switches the
root kmem cache with the memcg specific kmem cache for __GFP_ACCOUNT
allocations to charge those allocations to the memcg. However, the memcg
to charge is extracted from the current task_struct. This patch
introduces the variant of kmem cache allocation functions where the memcg
can be provided explicitly by the caller instead of deducing the memcg
from the current task.

The kmalloc allocations are underlying served using the kmem caches unless
the size of the allocation request is larger than KMALLOC_MAX_CACHE_SIZE,
in which case, the kmem caches are bypassed and the request is routed
directly to page allocator. So, for __GFP_ACCOUNT kmalloc allocations,
the memcg of current task is charged. This patch introduces memcg variant
of kmalloc functions to allow callers to provide memcg for charging.

These functions are useful for use-cases where the allocations should be
charged to the memcg different from the memcg of the caller. One such
concrete use-case is the allocations for fsnotify event objects where the
objects should be charged to the listener instead of the producer.

One requirement to call these functions is that the caller must have the
reference to the memcg provided to these functions. If reference
acquition on the given memcg is failed (it can fail if memcg is offline)
then the current's memcg is tried. These functions implicitly assumes
that the caller wants a __GFP_ACCOUNT allocation.

This patch also introduces scope API for targeted memcg charging. All
the __GFP_ACCOUNT allocations between memalloc_memcg_save(target_memcg)
and memalloc_memcg_restore(old_memcg) will be charged to target_memcg.

Traditionally kmem charging is skipped for allocations by kthreads and
allocations during interrupts. The reason is that the current's memcg
might not be the right owner for such allocations. However targeted
memcg charging does not have such limitation and can work even for
allocations by kthreads and for allocations during interrupts. For now
due to lack of actual use-case, targeted memcg charging for such
allocations is not added. Though this might change in future.

Signed-off-by: Shakeel Butt 
Cc: Michal Hocko 
Cc: Jan Kara 
Cc: Amir Goldstein 
Cc: Christoph Lameter 
Cc: Pekka Enberg 
Cc: David Rientjes 
Cc: Joonsoo Kim 
Cc: Greg Thelen 
Cc: Johannes Weiner 
Cc: Vladimir Davydov 
Cc: Mel Gorman 
Cc: Vlastimil Babka 
Cc: Alexander Viro 
Cc: Andrew Morton 
---
Changelog sinve v5:
- Added more explanation in commit message.
- Added handling of NULL memcg for targeted memcg allocation functions.

Changelog since v4:
- Removed branch from hot path of memory charging.

Changelog since v3:
- Added node variant of directed kmem allocation functions.

Changelog since v2:
- Merge the kmalloc_memcg patch into this patch.
- Instead of plumbing memcg throughout, use field in task_struct to pass
  the target_memcg.

Changelog since v1:
- Fixed build for SLOB

 include/linux/sched.h|  3 ++
 include/linux/sched/mm.h | 24 
 include/linux/slab.h | 83 
 kernel/fork.c|  3 ++
 mm/memcontrol.c  | 18 -
 5 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 87bf02d93a27..cbd0def60fd4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1149,6 +1149,9 @@ struct task_struct {
 
/* Number of pages to reclaim on returning to userland: */
unsigned intmemcg_nr_pages_over_high;
+
+   /* Used by memcontrol for targeted memcg charge: */
+   struct mem_cgroup   *target_memcg;
 #endif
 
 #ifdef CONFIG_UPROBES
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 44d356f5e47c..2ffb194f3f32 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -248,6 +248,30 @@ static inline void memalloc_noreclaim_restore(unsigned int 
flags)
current->flags = (current->flags & ~PF_MEMALLOC) | flags;
 }
 
+#ifdef CONFIG_MEMCG
+static inline struct mem_cgroup *memalloc_memcg_save(struct mem_cgroup *memcg)
+{
+   struct mem_cgroup *old_memcg = current->target_memcg;
+
+   current->target_memcg = memcg;
+   return old_memcg;
+}
+
+static inline void memalloc_memcg_restore(struct mem_cgroup *memcg)
+{
+   current->target_memcg = memcg;
+}
+#else
+static inline struct mem_cgroup *memalloc_memcg_save(struct mem_cgroup *memcg)
+{
+   return NULL;
+}
+
+static inline void memalloc_memcg_restore(struct mem_cgroup *memcg)
+{
+}
+#endif /* CONFIG_MEMCG */
+
 #ifdef CONFIG_MEMBARRIER
 enum {
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY= (1U << 0),
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 14e3fe4bd6a1..2f6319fa0d3d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -16,6 +16,7 @@
 #include 
 #include 
 

[PATCH v6 0/3] Directed kmem charging

2018-06-18 Thread Shakeel Butt
This patchset introduces memcg variant memory allocation functions.  The
caller can explicitly pass the memcg to charge for kmem allocations.
Currently the kernel, for __GFP_ACCOUNT memory allocation requests,
extract the memcg of the current task to charge for the kmem allocation.
This patch series introduces kmem allocation functions where the caller
can pass the pointer to the remote memcg.  The remote memcg will be
charged for the allocation instead of the memcg of the caller.  However
the caller must have a reference to the remote memcg.  This patch series
also introduces scope API for targeted memcg charging. So, all the
__GFP_ACCOUNT alloctions within the specified scope will be charged to
the given target memcg.

Shakeel Butt (3):
  mm: memcg: remote memcg charging for kmem allocations
  fs: fsnotify: account fsnotify metadata to kmemcg
  fs, mm: account buffer_head to kmemcg

 fs/buffer.c  | 14 -
 fs/notify/dnotify/dnotify.c  |  5 +-
 fs/notify/fanotify/fanotify.c|  6 +-
 fs/notify/fanotify/fanotify_user.c   |  5 +-
 fs/notify/group.c|  6 ++
 fs/notify/inotify/inotify_fsnotify.c |  2 +-
 fs/notify/inotify/inotify_user.c |  5 +-
 include/linux/fsnotify_backend.h | 12 ++--
 include/linux/memcontrol.h   | 14 +
 include/linux/sched.h|  3 +
 include/linux/sched/mm.h | 24 
 include/linux/slab.h | 83 
 kernel/fork.c|  3 +
 mm/memcontrol.c  | 54 --
 14 files changed, 220 insertions(+), 16 deletions(-)

-- 
2.18.0.rc1.244.gcf134e6275-goog



[PATCH 3/3] fs, mm: account buffer_head to kmemcg

2018-06-18 Thread Shakeel Butt
The buffer_head can consume a significant amount of system memory and
is directly related to the amount of page cache. In our production
environment we have observed that a lot of machines are spending a
significant amount of memory as buffer_head and can not be left as
system memory overhead.

Charging buffer_head is not as simple as adding __GFP_ACCOUNT to the
allocation. The buffer_heads can be allocated in a memcg different from
the memcg of the page for which buffer_heads are being allocated. One
concrete example is memory reclaim. The reclaim can trigger I/O of pages
of any memcg on the system. So, the right way to charge buffer_head is
to extract the memcg from the page for which buffer_heads are being
allocated and then use targeted memcg charging API.

Signed-off-by: Shakeel Butt 
Cc: Jan Kara 
Cc: Greg Thelen 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Vladimir Davydov 
Cc: Alexander Viro 
Cc: Andrew Morton 
---
 fs/buffer.c| 14 +-
 include/linux/memcontrol.h |  7 +++
 mm/memcontrol.c| 21 +
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 8194e3049fc5..26389b7a3cab 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -815,10 +815,17 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
struct buffer_head *bh, *head;
gfp_t gfp = GFP_NOFS;
long offset;
+   struct mem_cgroup *old_memcg;
+   struct mem_cgroup *memcg = get_mem_cgroup_from_page(page);
 
if (retry)
gfp |= __GFP_NOFAIL;
 
+   if (memcg) {
+   gfp |= __GFP_ACCOUNT;
+   old_memcg = memalloc_memcg_save(memcg);
+   }
+
head = NULL;
offset = PAGE_SIZE;
while ((offset -= size) >= 0) {
@@ -835,6 +842,11 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
/* Link the buffer to its page */
set_bh_page(bh, page, offset);
}
+out:
+   if (memcg) {
+   memalloc_memcg_restore(old_memcg);
+#ifdef CONFIG_MEMCG
+   css_put(>css);
+#endif
+   }
return head;
 /*
  * In case anything failed, we just free everything we got.
@@ -848,7 +860,7 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
unsigned long size,
} while (head);
}
 
-   return NULL;
+   goto out;
 }
 EXPORT_SYMBOL_GPL(alloc_page_buffers);
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6c857be8a9b7..d53609978eb7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -380,6 +380,8 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct 
*p);
 
 struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
 
+struct mem_cgroup *get_mem_cgroup_from_page(struct page *page);
+
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -864,6 +866,11 @@ static inline struct mem_cgroup 
*get_mem_cgroup_from_mm(struct mm_struct *mm)
return NULL;
 }
 
+static inline struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
+{
+   return NULL;
+}
+
 static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c481e661e051..f9a9a79117b9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -712,6 +712,27 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct 
*mm)
return memcg;
 }
 
+/**
+ * get_mem_cgroup_from_page: Obtain a reference on given page's memcg.
+ * @page: page from which memcg should be extracted.
+ *
+ * Obtain a reference on page->memcg and returns it if successful. Otherwise
+ * NULL is returned.
+ */
+struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
+{
+   struct mem_cgroup *memcg = page->mem_cgroup;
+
+   if (mem_cgroup_disabled() || !memcg)
+   return NULL;
+
+   rcu_read_lock();
+   if (!css_tryget_online(>css))
+   memcg = NULL;
+   rcu_read_unlock();
+   return memcg;
+}
+
 static __always_inline struct mem_cgroup *get_mem_cgroup(
struct mem_cgroup *memcg, struct mm_struct *mm)
 {
-- 
2.18.0.rc1.244.gcf134e6275-goog



[PATCH 1/3] mm: memcg: remote memcg charging for kmem allocations

2018-06-18 Thread Shakeel Butt
Introduce the memcg variant for kmalloc[_node] and
kmem_cache_alloc[_node]. For kmem_cache_alloc, the kernel switches the
root kmem cache with the memcg specific kmem cache for __GFP_ACCOUNT
allocations to charge those allocations to the memcg. However, the memcg
to charge is extracted from the current task_struct. This patch
introduces the variant of kmem cache allocation functions where the memcg
can be provided explicitly by the caller instead of deducing the memcg
from the current task.

The kmalloc allocations are underlying served using the kmem caches unless
the size of the allocation request is larger than KMALLOC_MAX_CACHE_SIZE,
in which case, the kmem caches are bypassed and the request is routed
directly to page allocator. So, for __GFP_ACCOUNT kmalloc allocations,
the memcg of current task is charged. This patch introduces memcg variant
of kmalloc functions to allow callers to provide memcg for charging.

These functions are useful for use-cases where the allocations should be
charged to the memcg different from the memcg of the caller. One such
concrete use-case is the allocations for fsnotify event objects where the
objects should be charged to the listener instead of the producer.

One requirement to call these functions is that the caller must have the
reference to the memcg provided to these functions. If reference
acquition on the given memcg is failed (it can fail if memcg is offline)
then the current's memcg is tried. These functions implicitly assumes
that the caller wants a __GFP_ACCOUNT allocation.

This patch also introduces scope API for targeted memcg charging. All
the __GFP_ACCOUNT allocations between memalloc_memcg_save(target_memcg)
and memalloc_memcg_restore(old_memcg) will be charged to target_memcg.

Traditionally kmem charging is skipped for allocations by kthreads and
allocations during interrupts. The reason is that the current's memcg
might not be the right owner for such allocations. However targeted
memcg charging does not have such limitation and can work even for
allocations by kthreads and for allocations during interrupts. For now
due to lack of actual use-case, targeted memcg charging for such
allocations is not added. Though this might change in future.

Signed-off-by: Shakeel Butt 
Cc: Michal Hocko 
Cc: Jan Kara 
Cc: Amir Goldstein 
Cc: Christoph Lameter 
Cc: Pekka Enberg 
Cc: David Rientjes 
Cc: Joonsoo Kim 
Cc: Greg Thelen 
Cc: Johannes Weiner 
Cc: Vladimir Davydov 
Cc: Mel Gorman 
Cc: Vlastimil Babka 
Cc: Alexander Viro 
Cc: Andrew Morton 
---
Changelog sinve v5:
- Added more explanation in commit message.
- Added handling of NULL memcg for targeted memcg allocation functions.

Changelog since v4:
- Removed branch from hot path of memory charging.

Changelog since v3:
- Added node variant of directed kmem allocation functions.

Changelog since v2:
- Merge the kmalloc_memcg patch into this patch.
- Instead of plumbing memcg throughout, use field in task_struct to pass
  the target_memcg.

Changelog since v1:
- Fixed build for SLOB

 include/linux/sched.h|  3 ++
 include/linux/sched/mm.h | 24 
 include/linux/slab.h | 83 
 kernel/fork.c|  3 ++
 mm/memcontrol.c  | 18 -
 5 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 87bf02d93a27..cbd0def60fd4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1149,6 +1149,9 @@ struct task_struct {
 
/* Number of pages to reclaim on returning to userland: */
unsigned intmemcg_nr_pages_over_high;
+
+   /* Used by memcontrol for targeted memcg charge: */
+   struct mem_cgroup   *target_memcg;
 #endif
 
 #ifdef CONFIG_UPROBES
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 44d356f5e47c..2ffb194f3f32 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -248,6 +248,30 @@ static inline void memalloc_noreclaim_restore(unsigned int 
flags)
current->flags = (current->flags & ~PF_MEMALLOC) | flags;
 }
 
+#ifdef CONFIG_MEMCG
+static inline struct mem_cgroup *memalloc_memcg_save(struct mem_cgroup *memcg)
+{
+   struct mem_cgroup *old_memcg = current->target_memcg;
+
+   current->target_memcg = memcg;
+   return old_memcg;
+}
+
+static inline void memalloc_memcg_restore(struct mem_cgroup *memcg)
+{
+   current->target_memcg = memcg;
+}
+#else
+static inline struct mem_cgroup *memalloc_memcg_save(struct mem_cgroup *memcg)
+{
+   return NULL;
+}
+
+static inline void memalloc_memcg_restore(struct mem_cgroup *memcg)
+{
+}
+#endif /* CONFIG_MEMCG */
+
 #ifdef CONFIG_MEMBARRIER
 enum {
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY= (1U << 0),
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 14e3fe4bd6a1..2f6319fa0d3d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -16,6 +16,7 @@
 #include 
 #include 
 

[PATCH v6 0/3] Directed kmem charging

2018-06-18 Thread Shakeel Butt
This patchset introduces memcg variant memory allocation functions.  The
caller can explicitly pass the memcg to charge for kmem allocations.
Currently the kernel, for __GFP_ACCOUNT memory allocation requests,
extract the memcg of the current task to charge for the kmem allocation.
This patch series introduces kmem allocation functions where the caller
can pass the pointer to the remote memcg.  The remote memcg will be
charged for the allocation instead of the memcg of the caller.  However
the caller must have a reference to the remote memcg.  This patch series
also introduces scope API for targeted memcg charging. So, all the
__GFP_ACCOUNT alloctions within the specified scope will be charged to
the given target memcg.

Shakeel Butt (3):
  mm: memcg: remote memcg charging for kmem allocations
  fs: fsnotify: account fsnotify metadata to kmemcg
  fs, mm: account buffer_head to kmemcg

 fs/buffer.c  | 14 -
 fs/notify/dnotify/dnotify.c  |  5 +-
 fs/notify/fanotify/fanotify.c|  6 +-
 fs/notify/fanotify/fanotify_user.c   |  5 +-
 fs/notify/group.c|  6 ++
 fs/notify/inotify/inotify_fsnotify.c |  2 +-
 fs/notify/inotify/inotify_user.c |  5 +-
 include/linux/fsnotify_backend.h | 12 ++--
 include/linux/memcontrol.h   | 14 +
 include/linux/sched.h|  3 +
 include/linux/sched/mm.h | 24 
 include/linux/slab.h | 83 
 kernel/fork.c|  3 +
 mm/memcontrol.c  | 54 --
 14 files changed, 220 insertions(+), 16 deletions(-)

-- 
2.18.0.rc1.244.gcf134e6275-goog



Re: [PATCH] configfs: Use kvasprintf() instead of open-coding it

2018-06-18 Thread Christoph Hellwig
Thanks Bart,

applied to the configfs tree for 4.19.


Re: [PATCH] configfs: Use kvasprintf() instead of open-coding it

2018-06-18 Thread Christoph Hellwig
Thanks Bart,

applied to the configfs tree for 4.19.


Re: Possible regression in "slab, slub: skip unnecessary kasan_cache_shutdown()"

2018-06-18 Thread Shakeel Butt
On Mon, Jun 18, 2018 at 9:08 PM Jason A. Donenfeld  wrote:
>
> On Tue, Jun 19, 2018 at 5:59 AM Shakeel Butt  wrote:
> > Hi Jason, yes please do send me the test suite with the kernel config.
>
> $ git clone https://git.zx2c4.com/WireGuard
> $ cd WireGuard/src
> $ [[ $(gcc -v 2>&1) =~ gcc\ version\ 8\.1\.0 ]] || echo crash needs 8.1
> $ export DEBUG_KERNEL=yes
> $ export KERNEL_VERSION=4.18-rc1
> $ make test-qemu -j$(nproc)
>
> This will build a kernel and a minimal userland and load it in qemu,
> which must be installed.
>
> This code is what causes the crash:
> The self test that's executed:
> https://git.zx2c4.com/WireGuard/tree/src/selftest/ratelimiter.h
> Which exercises this code:
> https://git.zx2c4.com/WireGuard/tree/src/ratelimiter.c
>
> The problem occurs after gc_entries(NULL) frees things (line 124 in
> ratelimiter.h above), and then line 133 reallocates those objects.
> Sometime after that happens, elsewhere in the kernel invokes this
> kasan issue in the kasan cache cleanup.
>

I will try to repro with your test suite sometime later this week.
However from high level code inspection, I see that the code is
creating a 'entry_cache' kmem_cache which is destroyed by
ratelimiter_uninit on last reference drop. Currently refcnt in your
code can underflow, through it does not seem like the selftest will
cause the underflow but still you should fix it.

>From high level your code seems fine. Does the issue occur on first
try of selftest? Basically I wanted to ask if kmem_cache_destroy of
your entry_cache was ever executed and have you tried to run this
selftest multiple time while the system was up.

As Dmitry already asked, are you using SLAB or SLUB?

> I realize it's disappointing that the test case here is in WireGuard,
> which isn't (yet!) upstream. That's why in my original message I
> wrote:
>
> > Rather, it looks like this
> > commit introduces a performance optimization, rather than a
> > correctness fix, so it seems that whatever test case is failing is
> > likely an incorrect failure. Does that seem like an accurate
> > possibility to you?
>
> I was hoping to only point you toward my own code after establishing
> the possibility that the bug is not my own. If you still think there's
> a chance this is due to my own correctness issue, and your commit has
> simply unearthed it, let me know and I'll happily keep debugging on my
> own before pinging you further.
>

Sorry, I can not really give a definitive answer.

Shakeel


Re: Possible regression in "slab, slub: skip unnecessary kasan_cache_shutdown()"

2018-06-18 Thread Shakeel Butt
On Mon, Jun 18, 2018 at 9:08 PM Jason A. Donenfeld  wrote:
>
> On Tue, Jun 19, 2018 at 5:59 AM Shakeel Butt  wrote:
> > Hi Jason, yes please do send me the test suite with the kernel config.
>
> $ git clone https://git.zx2c4.com/WireGuard
> $ cd WireGuard/src
> $ [[ $(gcc -v 2>&1) =~ gcc\ version\ 8\.1\.0 ]] || echo crash needs 8.1
> $ export DEBUG_KERNEL=yes
> $ export KERNEL_VERSION=4.18-rc1
> $ make test-qemu -j$(nproc)
>
> This will build a kernel and a minimal userland and load it in qemu,
> which must be installed.
>
> This code is what causes the crash:
> The self test that's executed:
> https://git.zx2c4.com/WireGuard/tree/src/selftest/ratelimiter.h
> Which exercises this code:
> https://git.zx2c4.com/WireGuard/tree/src/ratelimiter.c
>
> The problem occurs after gc_entries(NULL) frees things (line 124 in
> ratelimiter.h above), and then line 133 reallocates those objects.
> Sometime after that happens, elsewhere in the kernel invokes this
> kasan issue in the kasan cache cleanup.
>

I will try to repro with your test suite sometime later this week.
However from high level code inspection, I see that the code is
creating a 'entry_cache' kmem_cache which is destroyed by
ratelimiter_uninit on last reference drop. Currently refcnt in your
code can underflow, through it does not seem like the selftest will
cause the underflow but still you should fix it.

>From high level your code seems fine. Does the issue occur on first
try of selftest? Basically I wanted to ask if kmem_cache_destroy of
your entry_cache was ever executed and have you tried to run this
selftest multiple time while the system was up.

As Dmitry already asked, are you using SLAB or SLUB?

> I realize it's disappointing that the test case here is in WireGuard,
> which isn't (yet!) upstream. That's why in my original message I
> wrote:
>
> > Rather, it looks like this
> > commit introduces a performance optimization, rather than a
> > correctness fix, so it seems that whatever test case is failing is
> > likely an incorrect failure. Does that seem like an accurate
> > possibility to you?
>
> I was hoping to only point you toward my own code after establishing
> the possibility that the bug is not my own. If you still think there's
> a chance this is due to my own correctness issue, and your commit has
> simply unearthed it, let me know and I'll happily keep debugging on my
> own before pinging you further.
>

Sorry, I can not really give a definitive answer.

Shakeel


[RFC PATCH] ARM: Use logical or instead of addition for badr address calculation

2018-06-18 Thread Guenter Roeck
Modern assemblers may take the ISA into account when resolving local
symbols. This can result in bad address calculations when using badr
in the wrong location since the offset + 1 may be added twice, resulting
in an even address target for THUMB instructions. This in turn results
in an exception at (destination address + 2).

Unhandled exception: IPSR = 0006 LR = fff1
CPU: 0 PID: 1 Comm: init Not tainted 4.18.0-rc1-00026-gf773e5bdf0c9 #15
Hardware name: MPS2 (Device Tree Support)
PC is at ret_fast_syscall+0x2/0x58
LR is at tty_ioctl+0x2a5/0x528
pc : [<21009002>]lr : [<210d1535>]psr: 400b
sp : 21825fa8  ip : 001c  fp : 21a95892
r10:   r9 : 21824000  r8 : 210091c0
r7 : 0036  r6 : 21ae1ee0  r5 :   r4 : 21ae1f3c
r3 :   r2 : 3d9adc25  r1 :   r0 : 
xPSR: 400b
CPU: 0 PID: 1 Comm: init Not tainted 4.18.0-rc1-00026-gf773e5bdf0c9 #15
Hardware name: MPS2 (Device Tree Support)
[<2100bd8d>] (unwind_backtrace) from [<2100b13b>] (show_stack+0xb/0xc)
[<2100b13b>] (show_stack) from [<2100b87b>] (__invalid_entry+0x4b/0x4c)

Fix the problem by using a logical or instead of an addition. This is
less efficient but guaranteed to work.

Signed-off-by: Guenter Roeck 
---
RFC: I don't really like this, but my ARM assembler knowledge is quite
limited. Just dropping the "+ 1" from badr doesn't work because some
other code needs it (the image hangs completely if I try that).
Ultimately I don't even know if the invoke_syscall macro should just
have used adr instead of badr (but then how did this ever work ?).

Seen with various toolchains based on gcc 7.x and binutils 2.30 when
building and testing MPS2 images.

 arch/arm/include/asm/assembler.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 0cd4dccbae78..24c87ff2060f 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -195,7 +195,8 @@
.irpc,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
.macro  badr\c, rd, sym
 #ifdef CONFIG_THUMB2_KERNEL
-   adr\c   \rd, \sym + 1
+   adr\c   \rd, \sym
+   orr \rd, #1
 #else
adr\c   \rd, \sym
 #endif
-- 
2.7.4



[RFC PATCH] ARM: Use logical or instead of addition for badr address calculation

2018-06-18 Thread Guenter Roeck
Modern assemblers may take the ISA into account when resolving local
symbols. This can result in bad address calculations when using badr
in the wrong location since the offset + 1 may be added twice, resulting
in an even address target for THUMB instructions. This in turn results
in an exception at (destination address + 2).

Unhandled exception: IPSR = 0006 LR = fff1
CPU: 0 PID: 1 Comm: init Not tainted 4.18.0-rc1-00026-gf773e5bdf0c9 #15
Hardware name: MPS2 (Device Tree Support)
PC is at ret_fast_syscall+0x2/0x58
LR is at tty_ioctl+0x2a5/0x528
pc : [<21009002>]lr : [<210d1535>]psr: 400b
sp : 21825fa8  ip : 001c  fp : 21a95892
r10:   r9 : 21824000  r8 : 210091c0
r7 : 0036  r6 : 21ae1ee0  r5 :   r4 : 21ae1f3c
r3 :   r2 : 3d9adc25  r1 :   r0 : 
xPSR: 400b
CPU: 0 PID: 1 Comm: init Not tainted 4.18.0-rc1-00026-gf773e5bdf0c9 #15
Hardware name: MPS2 (Device Tree Support)
[<2100bd8d>] (unwind_backtrace) from [<2100b13b>] (show_stack+0xb/0xc)
[<2100b13b>] (show_stack) from [<2100b87b>] (__invalid_entry+0x4b/0x4c)

Fix the problem by using a logical or instead of an addition. This is
less efficient but guaranteed to work.

Signed-off-by: Guenter Roeck 
---
RFC: I don't really like this, but my ARM assembler knowledge is quite
limited. Just dropping the "+ 1" from badr doesn't work because some
other code needs it (the image hangs completely if I try that).
Ultimately I don't even know if the invoke_syscall macro should just
have used adr instead of badr (but then how did this ever work ?).

Seen with various toolchains based on gcc 7.x and binutils 2.30 when
building and testing MPS2 images.

 arch/arm/include/asm/assembler.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 0cd4dccbae78..24c87ff2060f 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -195,7 +195,8 @@
.irpc,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
.macro  badr\c, rd, sym
 #ifdef CONFIG_THUMB2_KERNEL
-   adr\c   \rd, \sym + 1
+   adr\c   \rd, \sym
+   orr \rd, #1
 #else
adr\c   \rd, \sym
 #endif
-- 
2.7.4



[PATCH 3/3] powernv/cpuidle: Use parsed device tree values for cpuidle_init

2018-06-18 Thread Akshay Adiga
Export pnv_idle_states and nr_pnv_idle_states so that its accessible to
cpuidle driver. Use properties from pnv_idle_states structure for powernv
cpuidle_init.

Signed-off-by: Akshay Adiga 
---
 arch/powerpc/include/asm/cpuidle.h |  2 ++
 drivers/cpuidle/cpuidle-powernv.c  | 49 +++---
 2 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/cpuidle.h 
b/arch/powerpc/include/asm/cpuidle.h
index 55ee7e3..1446747 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -93,6 +93,8 @@ struct pnv_idle_states_t {
u32 flags;
 };
 
+extern struct pnv_idle_states_t *pnv_idle_states;
+extern int nr_pnv_idle_states;
 extern u32 pnv_fastsleep_workaround_at_entry[];
 extern u32 pnv_fastsleep_workaround_at_exit[];
 
diff --git a/drivers/cpuidle/cpuidle-powernv.c 
b/drivers/cpuidle/cpuidle-powernv.c
index d29e4f0..de8ba26 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -285,6 +285,11 @@ static int powernv_add_idle_states(void)
goto out;
}
 
+   if (nr_pnv_idle_states <= 0) {
+   pr_warn("opal: No idle states found\n");
+   goto out;
+   }
+
/* Read values of any property to determine the num of idle states */
dt_idle_states = of_property_count_u32_elems(power_mgt, 
"ibm,cpu-idle-state-flags");
if (dt_idle_states < 0) {
@@ -338,7 +343,7 @@ static int powernv_add_idle_states(void)
 * If the idle states use stop instruction, probe for psscr values
 * and psscr mask which are necessary to specify required stop level.
 */
-   has_stop_states = (flags[0] &
+   has_stop_states = (pnv_idle_states[0].flags &
   (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP));
if (has_stop_states) {
count = of_property_count_u64_elems(power_mgt,
@@ -387,51 +392,55 @@ static int powernv_add_idle_states(void)
residency_ns, dt_idle_states);
}
 
-   for (i = 0; i < dt_idle_states; i++) {
+   for (i = 0; i < nr_pnv_idle_states; i++) {
unsigned int exit_latency, target_residency;
bool stops_timebase = false;
+   struct pnv_idle_states_t *state = _idle_states[i];
 
/*
 * Skip the platform idle state whose flag isn't in
-* the supported_cpuidle_states flag mask.
+* the supported_pnv_idle_states flag mask.
 */
-   if ((flags[i] & supported_flags) != flags[i])
+   if ((state->flags & supported_flags) !=
+   state->flags)
continue;
/*
 * If an idle state has exit latency beyond
 * POWERNV_THRESHOLD_LATENCY_NS then don't use it
 * in cpu-idle.
 */
-   if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS)
+   if (state->latency_ns > POWERNV_THRESHOLD_LATENCY_NS)
continue;
/*
 * Firmware passes residency and latency values in ns.
 * cpuidle expects it in us.
 */
-   exit_latency = DIV_ROUND_UP(latency_ns[i], 1000);
+   exit_latency = DIV_ROUND_UP(state->latency_ns, 1000);
if (!rc)
-   target_residency = DIV_ROUND_UP(residency_ns[i], 1000);
+   target_residency = DIV_ROUND_UP(state->residency_ns, 
1000);
else
target_residency = 0;
 
if (has_stop_states) {
-   int err = validate_psscr_val_mask(_val[i],
- _mask[i],
- flags[i]);
+   int err;
+   err = validate_psscr_val_mask(>pm_ctrl_reg_val,
+ >pm_ctrl_reg_mask,
+ state->flags);
if (err) {
-   report_invalid_psscr_val(psscr_val[i], err);
+   report_invalid_psscr_val(state->pm_ctrl_reg_val,
+err);
continue;
}
}
 
-   if (flags[i] & OPAL_PM_TIMEBASE_STOP)
+   if (state->flags & OPAL_PM_TIMEBASE_STOP)
stops_timebase = true;
 
/*
 * For nap and fastsleep, use default target_residency
 * values if f/w does not expose it.
 */
-   if (flags[i] & OPAL_PM_NAP_ENABLED) {
+   if (state->flags & OPAL_PM_NAP_ENABLED) {
   

[PATCH 3/3] powernv/cpuidle: Use parsed device tree values for cpuidle_init

2018-06-18 Thread Akshay Adiga
Export pnv_idle_states and nr_pnv_idle_states so that its accessible to
cpuidle driver. Use properties from pnv_idle_states structure for powernv
cpuidle_init.

Signed-off-by: Akshay Adiga 
---
 arch/powerpc/include/asm/cpuidle.h |  2 ++
 drivers/cpuidle/cpuidle-powernv.c  | 49 +++---
 2 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/cpuidle.h 
b/arch/powerpc/include/asm/cpuidle.h
index 55ee7e3..1446747 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -93,6 +93,8 @@ struct pnv_idle_states_t {
u32 flags;
 };
 
+extern struct pnv_idle_states_t *pnv_idle_states;
+extern int nr_pnv_idle_states;
 extern u32 pnv_fastsleep_workaround_at_entry[];
 extern u32 pnv_fastsleep_workaround_at_exit[];
 
diff --git a/drivers/cpuidle/cpuidle-powernv.c 
b/drivers/cpuidle/cpuidle-powernv.c
index d29e4f0..de8ba26 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -285,6 +285,11 @@ static int powernv_add_idle_states(void)
goto out;
}
 
+   if (nr_pnv_idle_states <= 0) {
+   pr_warn("opal: No idle states found\n");
+   goto out;
+   }
+
/* Read values of any property to determine the num of idle states */
dt_idle_states = of_property_count_u32_elems(power_mgt, 
"ibm,cpu-idle-state-flags");
if (dt_idle_states < 0) {
@@ -338,7 +343,7 @@ static int powernv_add_idle_states(void)
 * If the idle states use stop instruction, probe for psscr values
 * and psscr mask which are necessary to specify required stop level.
 */
-   has_stop_states = (flags[0] &
+   has_stop_states = (pnv_idle_states[0].flags &
   (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP));
if (has_stop_states) {
count = of_property_count_u64_elems(power_mgt,
@@ -387,51 +392,55 @@ static int powernv_add_idle_states(void)
residency_ns, dt_idle_states);
}
 
-   for (i = 0; i < dt_idle_states; i++) {
+   for (i = 0; i < nr_pnv_idle_states; i++) {
unsigned int exit_latency, target_residency;
bool stops_timebase = false;
+   struct pnv_idle_states_t *state = _idle_states[i];
 
/*
 * Skip the platform idle state whose flag isn't in
-* the supported_cpuidle_states flag mask.
+* the supported_pnv_idle_states flag mask.
 */
-   if ((flags[i] & supported_flags) != flags[i])
+   if ((state->flags & supported_flags) !=
+   state->flags)
continue;
/*
 * If an idle state has exit latency beyond
 * POWERNV_THRESHOLD_LATENCY_NS then don't use it
 * in cpu-idle.
 */
-   if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS)
+   if (state->latency_ns > POWERNV_THRESHOLD_LATENCY_NS)
continue;
/*
 * Firmware passes residency and latency values in ns.
 * cpuidle expects it in us.
 */
-   exit_latency = DIV_ROUND_UP(latency_ns[i], 1000);
+   exit_latency = DIV_ROUND_UP(state->latency_ns, 1000);
if (!rc)
-   target_residency = DIV_ROUND_UP(residency_ns[i], 1000);
+   target_residency = DIV_ROUND_UP(state->residency_ns, 
1000);
else
target_residency = 0;
 
if (has_stop_states) {
-   int err = validate_psscr_val_mask(_val[i],
- _mask[i],
- flags[i]);
+   int err;
+   err = validate_psscr_val_mask(>pm_ctrl_reg_val,
+ >pm_ctrl_reg_mask,
+ state->flags);
if (err) {
-   report_invalid_psscr_val(psscr_val[i], err);
+   report_invalid_psscr_val(state->pm_ctrl_reg_val,
+err);
continue;
}
}
 
-   if (flags[i] & OPAL_PM_TIMEBASE_STOP)
+   if (state->flags & OPAL_PM_TIMEBASE_STOP)
stops_timebase = true;
 
/*
 * For nap and fastsleep, use default target_residency
 * values if f/w does not expose it.
 */
-   if (flags[i] & OPAL_PM_NAP_ENABLED) {
+   if (state->flags & OPAL_PM_NAP_ENABLED) {
   

Re: [PATCH v1] Revert "eventfd: only return events requested in poll_mask()"

2018-06-18 Thread Christoph Hellwig
On Sun, Jun 17, 2018 at 11:31:41AM +0300, Avi Kivity wrote:
> This reverts commit 4d572d9f46507be8cfe326aa5bc3698babcbdfa7. It is
> superceded by the more general
> 2739b807b0885a09996659be82f813af219c7360 ("aio: only return events
> requested in poll_mask() for IOCB_CMD_POLL"). Unfortunately, hch
> nacked it on the bug report rather than on the patch itself, so it
> was picked up.

Note that even with that patch this patch is harmless, although indeed
not actually needed.


[PATCH 1/3] powernv/cpuidle: Parse dt idle properties into global structure

2018-06-18 Thread Akshay Adiga
Device-tree parsing happens in twice, once while deciding idle state to
be used for hotplug and once during cpuidle init. Hence, parsing the
device tree and caching it will reduce code duplication. Parsing code
has been moved to pnv_parse_cpuidle_dt() from pnv_probe_idle_states().

Setting up things so that number of available idle states can be
accessible to cpuidle-powernv driver. Hence adding nr_pnv_idle_states to
track number of idle states.

Signed-off-by: Akshay Adiga 
---
 arch/powerpc/include/asm/cpuidle.h|  14 +++
 arch/powerpc/platforms/powernv/idle.c | 197 --
 2 files changed, 152 insertions(+), 59 deletions(-)

diff --git a/arch/powerpc/include/asm/cpuidle.h 
b/arch/powerpc/include/asm/cpuidle.h
index e210a83..55ee7e3 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -79,6 +79,20 @@ struct stop_sprs {
u64 mmcra;
 };
 
+#define PNV_IDLE_NAME_LEN16
+struct pnv_idle_states_t {
+   char name[PNV_IDLE_NAME_LEN];
+   u32 latency_ns;
+   u32 residency_ns;
+   /*
+* Register value/mask used to select different idle states.
+* PMICR in POWER8 and PSSCR in POWER9
+*/
+   u64 pm_ctrl_reg_val;
+   u64 pm_ctrl_reg_mask;
+   u32 flags;
+};
+
 extern u32 pnv_fastsleep_workaround_at_entry[];
 extern u32 pnv_fastsleep_workaround_at_exit[];
 
diff --git a/arch/powerpc/platforms/powernv/idle.c 
b/arch/powerpc/platforms/powernv/idle.c
index 1c5d067..07be984 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -36,6 +36,8 @@
 #define P9_STOP_SPR_PSSCR  855
 
 static u32 supported_cpuidle_states;
+struct pnv_idle_states_t *pnv_idle_states;
+int nr_pnv_idle_states;
 
 /*
  * The default stop state that will be used by ppc_md.power_save
@@ -625,45 +627,8 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 
*psscr_mask, u32 flags)
 static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
int dt_idle_states)
 {
-   u64 *psscr_val = NULL;
-   u64 *psscr_mask = NULL;
-   u32 *residency_ns = NULL;
u64 max_residency_ns = 0;
-   int rc = 0, i;
-
-   psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL);
-   psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL);
-   residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns),
-  GFP_KERNEL);
-
-   if (!psscr_val || !psscr_mask || !residency_ns) {
-   rc = -1;
-   goto out;
-   }
-
-   if (of_property_read_u64_array(np,
-   "ibm,cpu-idle-state-psscr",
-   psscr_val, dt_idle_states)) {
-   pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in 
DT\n");
-   rc = -1;
-   goto out;
-   }
-
-   if (of_property_read_u64_array(np,
-  "ibm,cpu-idle-state-psscr-mask",
-  psscr_mask, dt_idle_states)) {
-   pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask 
in DT\n");
-   rc = -1;
-   goto out;
-   }
-
-   if (of_property_read_u32_array(np,
-  "ibm,cpu-idle-state-residency-ns",
-   residency_ns, dt_idle_states)) {
-   pr_warn("cpuidle-powernv: missing 
ibm,cpu-idle-state-residency-ns in DT\n");
-   rc = -1;
-   goto out;
-   }
+   int i;
 
/*
 * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
@@ -681,31 +646,33 @@ static int __init pnv_power9_idle_init(struct device_node 
*np, u32 *flags,
pnv_first_deep_stop_state = MAX_STOP_STATE;
for (i = 0; i < dt_idle_states; i++) {
int err;
-   u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK;
+   struct pnv_idle_states_t *state = _idle_states[i];
+   u64 psscr_rl = state->pm_ctrl_reg_val & PSSCR_RL_MASK;
 
-   if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) &&
-(pnv_first_deep_stop_state > psscr_rl))
+   if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
+   (pnv_first_deep_stop_state > psscr_rl))
pnv_first_deep_stop_state = psscr_rl;
 
-   err = validate_psscr_val_mask(_val[i], _mask[i],
- flags[i]);
+   err = validate_psscr_val_mask(>pm_ctrl_reg_val,
+ >pm_ctrl_reg_mask,
+ state->flags);
if (err) {
-   report_invalid_psscr_val(psscr_val[i], err);
+   report_invalid_psscr_val(state->pm_ctrl_reg_val, err);
continue;
}
 
-   if 

Re: [alsa-devel] [PATCH] ASoC: qcom: add sdm845 sound card support

2018-06-18 Thread Vinod
On 18-06-18, 16:46, Rohit kumar wrote:

> +struct sdm845_snd_data {
> + struct snd_soc_card *card;
> + struct regulator *vdd_supply;
> + struct snd_soc_dai_link dai_link[];
> +};
> +
> +static struct mutex pri_mi2s_res_lock;
> +static struct mutex quat_tdm_res_lock;

any reason why the locks can't be part of sdm845_snd_data?
Also why do we need two locks ?

> +static atomic_t pri_mi2s_clk_count;
> +static atomic_t quat_tdm_clk_count;

Any specific reason for using atomic variables?

> +static unsigned int tdm_slot_offset[8] = {0, 4, 8, 12, 16, 20, 24, 28};
> +
> +static int sdm845_tdm_snd_hw_params(struct snd_pcm_substream *substream,
> + struct snd_pcm_hw_params *params)
> +{
> + struct snd_soc_pcm_runtime *rtd = substream->private_data;
> + struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
> + int ret = 0;
> + int channels, slot_width;
> +
> + channels = params_channels(params);
> + if (channels < 1 || channels > 8) {

I though ch = 0 would be caught by framework and IIRC ASoC doesn't
support more than 8 channels

> + pr_err("%s: invalid param channels %d\n",
> + __func__, channels);
> + return -EINVAL;
> + }
> +
> + switch (params_format(params)) {
> + case SNDRV_PCM_FORMAT_S32_LE:
> + case SNDRV_PCM_FORMAT_S24_LE:
> + case SNDRV_PCM_FORMAT_S16_LE:
> + slot_width = 32;
> + break;
> + default:
> + pr_err("%s: invalid param format 0x%x\n",
> + __func__, params_format(params));

why not use dev_err, bonus you get device name printer with the logs :)

> +static int sdm845_snd_startup(struct snd_pcm_substream *substream)
> +{
> + unsigned int fmt = SND_SOC_DAIFMT_CBS_CFS;
> + struct snd_soc_pcm_runtime *rtd = substream->private_data;
> + struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
> +
> + pr_debug("%s: dai_id: 0x%x\n", __func__, cpu_dai->id);

It is good for debug but not very useful here, so removing it would be
good

> + switch (cpu_dai->id) {
> + case PRIMARY_MI2S_RX:
> + case PRIMARY_MI2S_TX:
> + mutex_lock(_mi2s_res_lock);
> + if (atomic_inc_return(_mi2s_clk_count) == 1) {
> + snd_soc_dai_set_sysclk(cpu_dai,
> + Q6AFE_LPASS_CLK_ID_MCLK_1,
> + DEFAULT_MCLK_RATE, SNDRV_PCM_STREAM_PLAYBACK);
> + snd_soc_dai_set_sysclk(cpu_dai,
> + Q6AFE_LPASS_CLK_ID_PRI_MI2S_IBIT,
> + DEFAULT_BCLK_RATE, SNDRV_PCM_STREAM_PLAYBACK);
> + }
> + mutex_unlock(_mi2s_res_lock);

why do we need locking here? Can you please explain that.

> + snd_soc_dai_set_fmt(cpu_dai, fmt);
> + break;

empty line after break helps in readability

> +static int sdm845_sbc_parse_of(struct snd_soc_card *card)
> +{
> + struct device *dev = card->dev;
> + struct snd_soc_dai_link *link;
> + struct device_node *np, *codec, *platform, *cpu, *node;
> + int ret, num_links;
> + struct sdm845_snd_data *data;
> +
> + ret = snd_soc_of_parse_card_name(card, "qcom,model");
> + if (ret) {
> + dev_err(dev, "Error parsing card name: %d\n", ret);
> + return ret;
> + }
> +
> + node = dev->of_node;
> +
> + /* DAPM routes */
> + if (of_property_read_bool(node, "qcom,audio-routing")) {
> + ret = snd_soc_of_parse_audio_routing(card,
> + "qcom,audio-routing");
> + if (ret)
> + return ret;
> + }

so if we dont find audio-routing, then? we seems to continue..

-- 
~Vinod


Re: [RFC PATCH v3 09/10] sched/fair: Select an energy-efficient CPU on task wake-up

2018-06-18 Thread Pavan Kondeti
On Mon, May 21, 2018 at 03:25:04PM +0100, Quentin Perret wrote:



> + if (cpumask_test_cpu(prev_cpu, >cpus_allowed))
> + prev_energy = best_energy = compute_energy(p, prev_cpu);
> + else
> + prev_energy = best_energy = ULONG_MAX;
> +
> + for_each_freq_domain(sfd) {
> + unsigned long spare_cap, max_spare_cap = 0;
> + int max_spare_cap_cpu = -1;
> + unsigned long util;
> +
> + /* Find the CPU with the max spare cap in the freq. dom. */
> + for_each_cpu_and(cpu, freq_domain_span(sfd), 
> sched_domain_span(sd)) {
> + if (!cpumask_test_cpu(cpu, >cpus_allowed))
> + continue;
> +
> + if (cpu == prev_cpu)
> + continue;
> +
> + /* Skip CPUs that will be overutilized */
> + util = cpu_util_wake(cpu, p) + task_util;
> + cpu_cap = capacity_of(cpu);
> + if (cpu_cap * 1024 < util * capacity_margin)
> + continue;
> +
> + spare_cap = cpu_cap - util;
> + if (spare_cap > max_spare_cap) {
> + max_spare_cap = spare_cap;
> + max_spare_cap_cpu = cpu;
> + }
> + }
> +
> + /* Evaluate the energy impact of using this CPU. */
> + if (max_spare_cap_cpu >= 0) {
> + cur_energy = compute_energy(p, max_spare_cap_cpu);
> + if (cur_energy < best_energy) {
> + best_energy = cur_energy;
> + best_energy_cpu = max_spare_cap_cpu;
> + }
> + }
> + }
> +
> + /*
> +  * We pick the best CPU only if it saves at least 1.5% of the
> +  * energy used by prev_cpu.
> +  */
> + if ((prev_energy - best_energy) > (prev_energy >> 6))
> + return best_energy_cpu;
> +
> + return prev_cpu;
> +}

We are comparing the best_energy_cpu against prev_cpu even when prev_cpu
can't accommodate the waking task. Is this intentional? Should not we
discard the prev_cpu if it can't accommodate the task.

This can potentially make a BIG task run on a lower capacity CPU until
load balancer kicks in and corrects the situation.

Thanks,
Pavan
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



Re: [PATCH v1] Revert "eventfd: only return events requested in poll_mask()"

2018-06-18 Thread Christoph Hellwig
On Sun, Jun 17, 2018 at 11:31:41AM +0300, Avi Kivity wrote:
> This reverts commit 4d572d9f46507be8cfe326aa5bc3698babcbdfa7. It is
> superceded by the more general
> 2739b807b0885a09996659be82f813af219c7360 ("aio: only return events
> requested in poll_mask() for IOCB_CMD_POLL"). Unfortunately, hch
> nacked it on the bug report rather than on the patch itself, so it
> was picked up.

Note that even with that patch this patch is harmless, although indeed
not actually needed.


[PATCH 1/3] powernv/cpuidle: Parse dt idle properties into global structure

2018-06-18 Thread Akshay Adiga
Device-tree parsing happens in twice, once while deciding idle state to
be used for hotplug and once during cpuidle init. Hence, parsing the
device tree and caching it will reduce code duplication. Parsing code
has been moved to pnv_parse_cpuidle_dt() from pnv_probe_idle_states().

Setting up things so that number of available idle states can be
accessible to cpuidle-powernv driver. Hence adding nr_pnv_idle_states to
track number of idle states.

Signed-off-by: Akshay Adiga 
---
 arch/powerpc/include/asm/cpuidle.h|  14 +++
 arch/powerpc/platforms/powernv/idle.c | 197 --
 2 files changed, 152 insertions(+), 59 deletions(-)

diff --git a/arch/powerpc/include/asm/cpuidle.h 
b/arch/powerpc/include/asm/cpuidle.h
index e210a83..55ee7e3 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -79,6 +79,20 @@ struct stop_sprs {
u64 mmcra;
 };
 
+#define PNV_IDLE_NAME_LEN16
+struct pnv_idle_states_t {
+   char name[PNV_IDLE_NAME_LEN];
+   u32 latency_ns;
+   u32 residency_ns;
+   /*
+* Register value/mask used to select different idle states.
+* PMICR in POWER8 and PSSCR in POWER9
+*/
+   u64 pm_ctrl_reg_val;
+   u64 pm_ctrl_reg_mask;
+   u32 flags;
+};
+
 extern u32 pnv_fastsleep_workaround_at_entry[];
 extern u32 pnv_fastsleep_workaround_at_exit[];
 
diff --git a/arch/powerpc/platforms/powernv/idle.c 
b/arch/powerpc/platforms/powernv/idle.c
index 1c5d067..07be984 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -36,6 +36,8 @@
 #define P9_STOP_SPR_PSSCR  855
 
 static u32 supported_cpuidle_states;
+struct pnv_idle_states_t *pnv_idle_states;
+int nr_pnv_idle_states;
 
 /*
  * The default stop state that will be used by ppc_md.power_save
@@ -625,45 +627,8 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 
*psscr_mask, u32 flags)
 static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
int dt_idle_states)
 {
-   u64 *psscr_val = NULL;
-   u64 *psscr_mask = NULL;
-   u32 *residency_ns = NULL;
u64 max_residency_ns = 0;
-   int rc = 0, i;
-
-   psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL);
-   psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL);
-   residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns),
-  GFP_KERNEL);
-
-   if (!psscr_val || !psscr_mask || !residency_ns) {
-   rc = -1;
-   goto out;
-   }
-
-   if (of_property_read_u64_array(np,
-   "ibm,cpu-idle-state-psscr",
-   psscr_val, dt_idle_states)) {
-   pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in 
DT\n");
-   rc = -1;
-   goto out;
-   }
-
-   if (of_property_read_u64_array(np,
-  "ibm,cpu-idle-state-psscr-mask",
-  psscr_mask, dt_idle_states)) {
-   pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask 
in DT\n");
-   rc = -1;
-   goto out;
-   }
-
-   if (of_property_read_u32_array(np,
-  "ibm,cpu-idle-state-residency-ns",
-   residency_ns, dt_idle_states)) {
-   pr_warn("cpuidle-powernv: missing 
ibm,cpu-idle-state-residency-ns in DT\n");
-   rc = -1;
-   goto out;
-   }
+   int i;
 
/*
 * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
@@ -681,31 +646,33 @@ static int __init pnv_power9_idle_init(struct device_node 
*np, u32 *flags,
pnv_first_deep_stop_state = MAX_STOP_STATE;
for (i = 0; i < dt_idle_states; i++) {
int err;
-   u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK;
+   struct pnv_idle_states_t *state = _idle_states[i];
+   u64 psscr_rl = state->pm_ctrl_reg_val & PSSCR_RL_MASK;
 
-   if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) &&
-(pnv_first_deep_stop_state > psscr_rl))
+   if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
+   (pnv_first_deep_stop_state > psscr_rl))
pnv_first_deep_stop_state = psscr_rl;
 
-   err = validate_psscr_val_mask(_val[i], _mask[i],
- flags[i]);
+   err = validate_psscr_val_mask(>pm_ctrl_reg_val,
+ >pm_ctrl_reg_mask,
+ state->flags);
if (err) {
-   report_invalid_psscr_val(psscr_val[i], err);
+   report_invalid_psscr_val(state->pm_ctrl_reg_val, err);
continue;
}
 
-   if 

Re: [alsa-devel] [PATCH] ASoC: qcom: add sdm845 sound card support

2018-06-18 Thread Vinod
On 18-06-18, 16:46, Rohit kumar wrote:

> +struct sdm845_snd_data {
> + struct snd_soc_card *card;
> + struct regulator *vdd_supply;
> + struct snd_soc_dai_link dai_link[];
> +};
> +
> +static struct mutex pri_mi2s_res_lock;
> +static struct mutex quat_tdm_res_lock;

any reason why the locks can't be part of sdm845_snd_data?
Also why do we need two locks ?

> +static atomic_t pri_mi2s_clk_count;
> +static atomic_t quat_tdm_clk_count;

Any specific reason for using atomic variables?

> +static unsigned int tdm_slot_offset[8] = {0, 4, 8, 12, 16, 20, 24, 28};
> +
> +static int sdm845_tdm_snd_hw_params(struct snd_pcm_substream *substream,
> + struct snd_pcm_hw_params *params)
> +{
> + struct snd_soc_pcm_runtime *rtd = substream->private_data;
> + struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
> + int ret = 0;
> + int channels, slot_width;
> +
> + channels = params_channels(params);
> + if (channels < 1 || channels > 8) {

I though ch = 0 would be caught by framework and IIRC ASoC doesn't
support more than 8 channels

> + pr_err("%s: invalid param channels %d\n",
> + __func__, channels);
> + return -EINVAL;
> + }
> +
> + switch (params_format(params)) {
> + case SNDRV_PCM_FORMAT_S32_LE:
> + case SNDRV_PCM_FORMAT_S24_LE:
> + case SNDRV_PCM_FORMAT_S16_LE:
> + slot_width = 32;
> + break;
> + default:
> + pr_err("%s: invalid param format 0x%x\n",
> + __func__, params_format(params));

why not use dev_err, bonus you get device name printer with the logs :)

> +static int sdm845_snd_startup(struct snd_pcm_substream *substream)
> +{
> + unsigned int fmt = SND_SOC_DAIFMT_CBS_CFS;
> + struct snd_soc_pcm_runtime *rtd = substream->private_data;
> + struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
> +
> + pr_debug("%s: dai_id: 0x%x\n", __func__, cpu_dai->id);

It is good for debug but not very useful here, so removing it would be
good

> + switch (cpu_dai->id) {
> + case PRIMARY_MI2S_RX:
> + case PRIMARY_MI2S_TX:
> + mutex_lock(_mi2s_res_lock);
> + if (atomic_inc_return(_mi2s_clk_count) == 1) {
> + snd_soc_dai_set_sysclk(cpu_dai,
> + Q6AFE_LPASS_CLK_ID_MCLK_1,
> + DEFAULT_MCLK_RATE, SNDRV_PCM_STREAM_PLAYBACK);
> + snd_soc_dai_set_sysclk(cpu_dai,
> + Q6AFE_LPASS_CLK_ID_PRI_MI2S_IBIT,
> + DEFAULT_BCLK_RATE, SNDRV_PCM_STREAM_PLAYBACK);
> + }
> + mutex_unlock(_mi2s_res_lock);

why do we need locking here? Can you please explain that.

> + snd_soc_dai_set_fmt(cpu_dai, fmt);
> + break;

empty line after break helps in readability

> +static int sdm845_sbc_parse_of(struct snd_soc_card *card)
> +{
> + struct device *dev = card->dev;
> + struct snd_soc_dai_link *link;
> + struct device_node *np, *codec, *platform, *cpu, *node;
> + int ret, num_links;
> + struct sdm845_snd_data *data;
> +
> + ret = snd_soc_of_parse_card_name(card, "qcom,model");
> + if (ret) {
> + dev_err(dev, "Error parsing card name: %d\n", ret);
> + return ret;
> + }
> +
> + node = dev->of_node;
> +
> + /* DAPM routes */
> + if (of_property_read_bool(node, "qcom,audio-routing")) {
> + ret = snd_soc_of_parse_audio_routing(card,
> + "qcom,audio-routing");
> + if (ret)
> + return ret;
> + }

so if we dont find audio-routing, then? we seems to continue..

-- 
~Vinod


Re: [RFC PATCH v3 09/10] sched/fair: Select an energy-efficient CPU on task wake-up

2018-06-18 Thread Pavan Kondeti
On Mon, May 21, 2018 at 03:25:04PM +0100, Quentin Perret wrote:



> + if (cpumask_test_cpu(prev_cpu, >cpus_allowed))
> + prev_energy = best_energy = compute_energy(p, prev_cpu);
> + else
> + prev_energy = best_energy = ULONG_MAX;
> +
> + for_each_freq_domain(sfd) {
> + unsigned long spare_cap, max_spare_cap = 0;
> + int max_spare_cap_cpu = -1;
> + unsigned long util;
> +
> + /* Find the CPU with the max spare cap in the freq. dom. */
> + for_each_cpu_and(cpu, freq_domain_span(sfd), 
> sched_domain_span(sd)) {
> + if (!cpumask_test_cpu(cpu, >cpus_allowed))
> + continue;
> +
> + if (cpu == prev_cpu)
> + continue;
> +
> + /* Skip CPUs that will be overutilized */
> + util = cpu_util_wake(cpu, p) + task_util;
> + cpu_cap = capacity_of(cpu);
> + if (cpu_cap * 1024 < util * capacity_margin)
> + continue;
> +
> + spare_cap = cpu_cap - util;
> + if (spare_cap > max_spare_cap) {
> + max_spare_cap = spare_cap;
> + max_spare_cap_cpu = cpu;
> + }
> + }
> +
> + /* Evaluate the energy impact of using this CPU. */
> + if (max_spare_cap_cpu >= 0) {
> + cur_energy = compute_energy(p, max_spare_cap_cpu);
> + if (cur_energy < best_energy) {
> + best_energy = cur_energy;
> + best_energy_cpu = max_spare_cap_cpu;
> + }
> + }
> + }
> +
> + /*
> +  * We pick the best CPU only if it saves at least 1.5% of the
> +  * energy used by prev_cpu.
> +  */
> + if ((prev_energy - best_energy) > (prev_energy >> 6))
> + return best_energy_cpu;
> +
> + return prev_cpu;
> +}

We are comparing the best_energy_cpu against prev_cpu even when prev_cpu
can't accommodate the waking task. Is this intentional? Should not we
discard the prev_cpu if it can't accommodate the task.

This can potentially make a BIG task run on a lower capacity CPU until
load balancer kicks in and corrects the situation.

Thanks,
Pavan
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



Re: [RFC PATCH 0/5] kbuild: build modules from code in multiple directories.

2018-06-18 Thread Darrick J. Wong
On Mon, Jun 18, 2018 at 09:47:48PM -0700, Christoph Hellwig wrote:
> On Tue, Jun 19, 2018 at 02:05:23PM +1000, NeilBrown wrote:
> > From: NeilBrown 
> > Date: Tue, 19 Jun 2018 13:59:16 +1000
> > Subject: [PATCH] kbuild/xfs: example modobj-m conversion
> > 
> > This is a demonstration patch to show how
> > xfs can be changed to make use of the proposed modobj-m=
> > functionality, should the xfs developers want that.
> 
> Well, IFF we go with this new functionality I think everyone should
> be using it instead of the current hacks.
> 
> So text like the above should be in the series cover letter, and this
> patch should have an actual description..
> 
> I see no argument against these changes, but I've also added the XFS
> list.

Yes, please send the entire series to the xfs list in the future.

It looks like a reasonable reorganization of makefile goop. :)

(Will dig for the rest of the series on lkml tomorrow I guess.)

> > 
> > Signed-off-by: NeilBrown 
> > ---
> >  fs/xfs/Makefile| 78 
> > ++
> >  fs/xfs/libxfs/Makefile | 43 
> >  fs/xfs/scrub/Makefile  | 29 +++
> >  3 files changed, 74 insertions(+), 76 deletions(-)
> >  create mode 100644 fs/xfs/libxfs/Makefile
> >  create mode 100644 fs/xfs/scrub/Makefile
> > 
> > diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> > index 2f3f75a7f180..0ba854045fe9 100644
> > --- a/fs/xfs/Makefile
> > +++ b/fs/xfs/Makefile
> > @@ -15,47 +15,7 @@ obj-$(CONFIG_XFS_FS) += xfs.o
> >  xfs-y  += xfs_trace.o
> >  
> >  # build the libxfs code first
> > -xfs-y  += $(addprefix libxfs/, \
> > -  xfs_ag.o \
> > -  xfs_alloc.o \
> > -  xfs_alloc_btree.o \
> > -  xfs_attr.o \
> > -  xfs_attr_leaf.o \
> > -  xfs_attr_remote.o \
> > -  xfs_bit.o \
> > -  xfs_bmap.o \
> > -  xfs_bmap_btree.o \
> > -  xfs_btree.o \
> > -  xfs_da_btree.o \
> > -  xfs_da_format.o \
> > -  xfs_defer.o \
> > -  xfs_dir2.o \
> > -  xfs_dir2_block.o \
> > -  xfs_dir2_data.o \
> > -  xfs_dir2_leaf.o \
> > -  xfs_dir2_node.o \
> > -  xfs_dir2_sf.o \
> > -  xfs_dquot_buf.o \
> > -  xfs_ialloc.o \
> > -  xfs_ialloc_btree.o \
> > -  xfs_iext_tree.o \
> > -  xfs_inode_fork.o \
> > -  xfs_inode_buf.o \
> > -  xfs_log_rlimit.o \
> > -  xfs_ag_resv.o \
> > -  xfs_rmap.o \
> > -  xfs_rmap_btree.o \
> > -  xfs_refcount.o \
> > -  xfs_refcount_btree.o \
> > -  xfs_sb.o \
> > -  xfs_symlink_remote.o \
> > -  xfs_trans_resv.o \
> > -  xfs_types.o \
> > -  )
> > -# xfs_rtbitmap is shared with libxfs
> > -xfs-$(CONFIG_XFS_RT)   += $(addprefix libxfs/, \
> > -  xfs_rtbitmap.o \
> > -  )
> > +xfs-y  += libxfs/
> >  
> >  # highlevel code
> >  xfs-y  += xfs_aops.o \
> > @@ -127,38 +87,4 @@ xfs-$(CONFIG_SYSCTL)+= xfs_sysctl.o
> >  xfs-$(CONFIG_COMPAT)   += xfs_ioctl32.o
> >  xfs-$(CONFIG_EXPORTFS_BLOCK_OPS)   += xfs_pnfs.o
> >  
> > -# online scrub/repair
> > -ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
> > -
> > -# Tracepoints like to blow up, so build that before everything else
> > -
> > -xfs-y  += $(addprefix scrub/, \
> > -  trace.o \
> > -  agheader.o \
> > -  alloc.o \
> > -  attr.o \
> > -  bmap.o \
> > -  btree.o \
> > -  common.o \
> > -  dabtree.o \
> > -  dir.o \
> > -  ialloc.o \
> > -  inode.o \
> > -  parent.o \
> > -  refcount.o \
> > -  rmap.o \
> > -  scrub.o \
> > -  

Re: [RFC PATCH 0/5] kbuild: build modules from code in multiple directories.

2018-06-18 Thread Darrick J. Wong
On Mon, Jun 18, 2018 at 09:47:48PM -0700, Christoph Hellwig wrote:
> On Tue, Jun 19, 2018 at 02:05:23PM +1000, NeilBrown wrote:
> > From: NeilBrown 
> > Date: Tue, 19 Jun 2018 13:59:16 +1000
> > Subject: [PATCH] kbuild/xfs: example modobj-m conversion
> > 
> > This is a demonstration patch to show how
> > xfs can be changed to make use of the proposed modobj-m=
> > functionality, should the xfs developers want that.
> 
> Well, IFF we go with this new functionality I think everyone should
> be using it instead of the current hacks.
> 
> So text like the above should be in the series cover letter, and this
> patch should have an actual description..
> 
> I see no argument against these changes, but I've also added the XFS
> list.

Yes, please send the entire series to the xfs list in the future.

It looks like a reasonable reorganization of makefile goop. :)

(Will dig for the rest of the series on lkml tomorrow I guess.)

> > 
> > Signed-off-by: NeilBrown 
> > ---
> >  fs/xfs/Makefile| 78 
> > ++
> >  fs/xfs/libxfs/Makefile | 43 
> >  fs/xfs/scrub/Makefile  | 29 +++
> >  3 files changed, 74 insertions(+), 76 deletions(-)
> >  create mode 100644 fs/xfs/libxfs/Makefile
> >  create mode 100644 fs/xfs/scrub/Makefile
> > 
> > diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> > index 2f3f75a7f180..0ba854045fe9 100644
> > --- a/fs/xfs/Makefile
> > +++ b/fs/xfs/Makefile
> > @@ -15,47 +15,7 @@ obj-$(CONFIG_XFS_FS) += xfs.o
> >  xfs-y  += xfs_trace.o
> >  
> >  # build the libxfs code first
> > -xfs-y  += $(addprefix libxfs/, \
> > -  xfs_ag.o \
> > -  xfs_alloc.o \
> > -  xfs_alloc_btree.o \
> > -  xfs_attr.o \
> > -  xfs_attr_leaf.o \
> > -  xfs_attr_remote.o \
> > -  xfs_bit.o \
> > -  xfs_bmap.o \
> > -  xfs_bmap_btree.o \
> > -  xfs_btree.o \
> > -  xfs_da_btree.o \
> > -  xfs_da_format.o \
> > -  xfs_defer.o \
> > -  xfs_dir2.o \
> > -  xfs_dir2_block.o \
> > -  xfs_dir2_data.o \
> > -  xfs_dir2_leaf.o \
> > -  xfs_dir2_node.o \
> > -  xfs_dir2_sf.o \
> > -  xfs_dquot_buf.o \
> > -  xfs_ialloc.o \
> > -  xfs_ialloc_btree.o \
> > -  xfs_iext_tree.o \
> > -  xfs_inode_fork.o \
> > -  xfs_inode_buf.o \
> > -  xfs_log_rlimit.o \
> > -  xfs_ag_resv.o \
> > -  xfs_rmap.o \
> > -  xfs_rmap_btree.o \
> > -  xfs_refcount.o \
> > -  xfs_refcount_btree.o \
> > -  xfs_sb.o \
> > -  xfs_symlink_remote.o \
> > -  xfs_trans_resv.o \
> > -  xfs_types.o \
> > -  )
> > -# xfs_rtbitmap is shared with libxfs
> > -xfs-$(CONFIG_XFS_RT)   += $(addprefix libxfs/, \
> > -  xfs_rtbitmap.o \
> > -  )
> > +xfs-y  += libxfs/
> >  
> >  # highlevel code
> >  xfs-y  += xfs_aops.o \
> > @@ -127,38 +87,4 @@ xfs-$(CONFIG_SYSCTL)+= xfs_sysctl.o
> >  xfs-$(CONFIG_COMPAT)   += xfs_ioctl32.o
> >  xfs-$(CONFIG_EXPORTFS_BLOCK_OPS)   += xfs_pnfs.o
> >  
> > -# online scrub/repair
> > -ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
> > -
> > -# Tracepoints like to blow up, so build that before everything else
> > -
> > -xfs-y  += $(addprefix scrub/, \
> > -  trace.o \
> > -  agheader.o \
> > -  alloc.o \
> > -  attr.o \
> > -  bmap.o \
> > -  btree.o \
> > -  common.o \
> > -  dabtree.o \
> > -  dir.o \
> > -  ialloc.o \
> > -  inode.o \
> > -  parent.o \
> > -  refcount.o \
> > -  rmap.o \
> > -  scrub.o \
> > -  

Re: [PATCH] MAINTAINERS: Add me as an x86 entry code maintainer

2018-06-18 Thread Tony Luck
So the TIP tree was named for “Thomas, Ingo, Peter”. Need to add an “A” for 
Andy. Maybe it should be the PITA tree :-)

-Tony

> On Jun 18, 2018, at 14:41, Andy Lutomirski  wrote:
> 
> And update my email address.
> 
> Cc: Ingo Molnar 
> Cc: Thomas Gleixner 
> Cc: "H. Peter Anvin" 
> Cc: Linus Torvalds 
> Signed-off-by: Andy Lutomirski 
> ---
> 
> Linus has been saying that I might as well maintain this stuff for years.
> What do you all think?
> 
> MAINTAINERS | 9 -
> 1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 9d5eeff51b5f..624c3fd11d04 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -15575,6 +15575,13 @@ S:Maintained
> F:Documentation/x86/
> F:arch/x86/
> 
> +X86 ENTRY CODE
> +M:Andy Lutomirski 
> +L:linux-kernel@vger.kernel.org
> +T:git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/asm
> +S:Maintained
> +F:arch/x86/entry/
> +
> X86 MCE INFRASTRUCTURE
> M:Tony Luck 
> M:Borislav Petkov 
> @@ -15597,7 +15604,7 @@ F:drivers/platform/x86/
> F:drivers/platform/olpc/
> 
> X86 VDSO
> -M:Andy Lutomirski 
> +M:Andy Lutomirski 
> L:linux-kernel@vger.kernel.org
> T:git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vdso
> S:Maintained
> -- 
> 2.17.1
> 


Re: [PATCH] MAINTAINERS: Add me as an x86 entry code maintainer

2018-06-18 Thread Tony Luck
So the TIP tree was named for “Thomas, Ingo, Peter”. Need to add an “A” for 
Andy. Maybe it should be the PITA tree :-)

-Tony

> On Jun 18, 2018, at 14:41, Andy Lutomirski  wrote:
> 
> And update my email address.
> 
> Cc: Ingo Molnar 
> Cc: Thomas Gleixner 
> Cc: "H. Peter Anvin" 
> Cc: Linus Torvalds 
> Signed-off-by: Andy Lutomirski 
> ---
> 
> Linus has been saying that I might as well maintain this stuff for years.
> What do you all think?
> 
> MAINTAINERS | 9 -
> 1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 9d5eeff51b5f..624c3fd11d04 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -15575,6 +15575,13 @@ S:Maintained
> F:Documentation/x86/
> F:arch/x86/
> 
> +X86 ENTRY CODE
> +M:Andy Lutomirski 
> +L:linux-kernel@vger.kernel.org
> +T:git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/asm
> +S:Maintained
> +F:arch/x86/entry/
> +
> X86 MCE INFRASTRUCTURE
> M:Tony Luck 
> M:Borislav Petkov 
> @@ -15597,7 +15604,7 @@ F:drivers/platform/x86/
> F:drivers/platform/olpc/
> 
> X86 VDSO
> -M:Andy Lutomirski 
> +M:Andy Lutomirski 
> L:linux-kernel@vger.kernel.org
> T:git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vdso
> S:Maintained
> -- 
> 2.17.1
> 


Re: [PATCH v3] mm: Change return type int to vm_fault_t for fault handlers

2018-06-18 Thread Souptick Joarder
On Mon, Jun 4, 2018 at 10:47 PM, Souptick Joarder  wrote:
> Use new return type vm_fault_t for fault handler. For
> now, this is just documenting that the function returns
> a VM_FAULT value rather than an errno. Once all instances
> are converted, vm_fault_t will become a distinct type.
>
> Ref-> commit 1c8f422059ae ("mm: change return type to vm_fault_t")
>
> The aim is to change the return type of finish_fault()
> and handle_mm_fault() to vm_fault_t type. As part of
> that clean up return type of all other recursively called
> functions have been changed to vm_fault_t type.
>
> The places from where handle_mm_fault() is getting invoked
> will be change to vm_fault_t type but in a separate patch.
>
> vmf_error() is the newly introduce inline function
> in 4.17-rc6.
>
> Signed-off-by: Souptick Joarder 
> Reviewed-by: Matthew Wilcox 
> ---
> v2: Change patch title and fixed sparse warning.
>
> v3: Reviewed by Matthew Wilcox
>
>  fs/userfaultfd.c  |  6 +--
>  include/linux/huge_mm.h   | 10 +++--
>  include/linux/hugetlb.h   |  2 +-
>  include/linux/mm.h| 14 +++
>  include/linux/oom.h   |  2 +-
>  include/linux/swapops.h   |  5 ++-
>  include/linux/userfaultfd_k.h |  5 ++-
>  kernel/memremap.c |  2 +-
>  mm/gup.c  |  4 +-
>  mm/huge_memory.c  | 25 ++--
>  mm/hugetlb.c  | 29 +++---
>  mm/internal.h |  2 +-
>  mm/khugepaged.c   |  3 +-
>  mm/memory.c   | 90 
> ++-
>  mm/shmem.c| 17 
>  15 files changed, 110 insertions(+), 106 deletions(-)
>
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index cec550c..bf60c6a 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -336,17 +336,15 @@ static inline bool userfaultfd_must_wait(struct 
> userfaultfd_ctx *ctx,
>   * fatal_signal_pending()s, and the mmap_sem must be released before
>   * returning it.
>   */
> -int handle_userfault(struct vm_fault *vmf, unsigned long reason)
> +vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
>  {
> struct mm_struct *mm = vmf->vma->vm_mm;
> struct userfaultfd_ctx *ctx;
> struct userfaultfd_wait_queue uwq;
> -   int ret;
> +   vm_fault_t ret = VM_FAULT_SIGBUS;
> bool must_wait, return_to_userland;
> long blocking_state;
>
> -   ret = VM_FAULT_SIGBUS;
> -
> /*
>  * We don't do userfault handling for the final child pid update.
>  *
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index a8a1262..8c4f0a6 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -3,10 +3,11 @@
>  #define _LINUX_HUGE_MM_H
>
>  #include 
> +#include 
>
>  #include  /* only for vma_is_dax() */
>
> -extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
> +extern vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
>  extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
>  struct vm_area_struct *vma);
> @@ -23,7 +24,7 @@ static inline void huge_pud_set_accessed(struct vm_fault 
> *vmf, pud_t orig_pud)
>  }
>  #endif
>
> -extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
> +extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
>  extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
>   unsigned long addr,
>   pmd_t *pmd,
> @@ -216,7 +217,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct 
> *vma, unsigned long addr,
>  struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long 
> addr,
> pud_t *pud, int flags);
>
> -extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
> +extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t 
> orig_pmd);
>
>  extern struct page *huge_zero_page;
>
> @@ -321,7 +322,8 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
> return NULL;
>  }
>
> -static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
> +static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf,
> +   pmd_t orig_pmd)
>  {
> return 0;
>  }
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 36fa6a2..c779b2f 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -105,7 +105,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, 
> struct vm_area_struct *vma,
>  int hugetlb_report_node_meminfo(int, char *);
>  void hugetlb_show_meminfo(void);
>  unsigned long hugetlb_total_pages(void);
> -int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
> +vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
> 

Re: Possible regression in "slab, slub: skip unnecessary kasan_cache_shutdown()"

2018-06-18 Thread Dmitry Vyukov
On Tue, Jun 19, 2018 at 6:08 AM, Jason A. Donenfeld  wrote:
> On Tue, Jun 19, 2018 at 5:59 AM Shakeel Butt  wrote:
>> Hi Jason, yes please do send me the test suite with the kernel config.
>
> $ git clone https://git.zx2c4.com/WireGuard
> $ cd WireGuard/src
> $ [[ $(gcc -v 2>&1) =~ gcc\ version\ 8\.1\.0 ]] || echo crash needs 8.1
> $ export DEBUG_KERNEL=yes
> $ export KERNEL_VERSION=4.18-rc1
> $ make test-qemu -j$(nproc)
>
> This will build a kernel and a minimal userland and load it in qemu,
> which must be installed.
>
> This code is what causes the crash:
> The self test that's executed:
> https://git.zx2c4.com/WireGuard/tree/src/selftest/ratelimiter.h
> Which exercises this code:
> https://git.zx2c4.com/WireGuard/tree/src/ratelimiter.c
>
> The problem occurs after gc_entries(NULL) frees things (line 124 in
> ratelimiter.h above), and then line 133 reallocates those objects.
> Sometime after that happens, elsewhere in the kernel invokes this
> kasan issue in the kasan cache cleanup.
>
> I realize it's disappointing that the test case here is in WireGuard,
> which isn't (yet!) upstream. That's why in my original message I
> wrote:
>
>> Rather, it looks like this
>> commit introduces a performance optimization, rather than a
>> correctness fix, so it seems that whatever test case is failing is
>> likely an incorrect failure. Does that seem like an accurate
>> possibility to you?
>
> I was hoping to only point you toward my own code after establishing
> the possibility that the bug is not my own. If you still think there's
> a chance this is due to my own correctness issue, and your commit has
> simply unearthed it, let me know and I'll happily keep debugging on my
> own before pinging you further.


Hi Jason,

Your code frees all entries before freeing the cache, right? If you
add total_entries check before freeing the cache, it does not fire,
right?
Are you using SLAB or SLUB? We stress kernel pretty heavily, but with
SLAB, and I suspect Shakeel may also be using SLAB. So if you are
using SLUB, there is significant chance that it's a bug in the SLUB
part of the change.


Re: [PATCH v3] mm: Change return type int to vm_fault_t for fault handlers

2018-06-18 Thread Souptick Joarder
On Mon, Jun 4, 2018 at 10:47 PM, Souptick Joarder  wrote:
> Use new return type vm_fault_t for fault handler. For
> now, this is just documenting that the function returns
> a VM_FAULT value rather than an errno. Once all instances
> are converted, vm_fault_t will become a distinct type.
>
> Ref-> commit 1c8f422059ae ("mm: change return type to vm_fault_t")
>
> The aim is to change the return type of finish_fault()
> and handle_mm_fault() to vm_fault_t type. As part of
> that clean up return type of all other recursively called
> functions have been changed to vm_fault_t type.
>
> The places from where handle_mm_fault() is getting invoked
> will be change to vm_fault_t type but in a separate patch.
>
> vmf_error() is the newly introduce inline function
> in 4.17-rc6.
>
> Signed-off-by: Souptick Joarder 
> Reviewed-by: Matthew Wilcox 
> ---
> v2: Change patch title and fixed sparse warning.
>
> v3: Reviewed by Matthew Wilcox
>
>  fs/userfaultfd.c  |  6 +--
>  include/linux/huge_mm.h   | 10 +++--
>  include/linux/hugetlb.h   |  2 +-
>  include/linux/mm.h| 14 +++
>  include/linux/oom.h   |  2 +-
>  include/linux/swapops.h   |  5 ++-
>  include/linux/userfaultfd_k.h |  5 ++-
>  kernel/memremap.c |  2 +-
>  mm/gup.c  |  4 +-
>  mm/huge_memory.c  | 25 ++--
>  mm/hugetlb.c  | 29 +++---
>  mm/internal.h |  2 +-
>  mm/khugepaged.c   |  3 +-
>  mm/memory.c   | 90 
> ++-
>  mm/shmem.c| 17 
>  15 files changed, 110 insertions(+), 106 deletions(-)
>
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index cec550c..bf60c6a 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -336,17 +336,15 @@ static inline bool userfaultfd_must_wait(struct 
> userfaultfd_ctx *ctx,
>   * fatal_signal_pending()s, and the mmap_sem must be released before
>   * returning it.
>   */
> -int handle_userfault(struct vm_fault *vmf, unsigned long reason)
> +vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
>  {
> struct mm_struct *mm = vmf->vma->vm_mm;
> struct userfaultfd_ctx *ctx;
> struct userfaultfd_wait_queue uwq;
> -   int ret;
> +   vm_fault_t ret = VM_FAULT_SIGBUS;
> bool must_wait, return_to_userland;
> long blocking_state;
>
> -   ret = VM_FAULT_SIGBUS;
> -
> /*
>  * We don't do userfault handling for the final child pid update.
>  *
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index a8a1262..8c4f0a6 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -3,10 +3,11 @@
>  #define _LINUX_HUGE_MM_H
>
>  #include 
> +#include 
>
>  #include  /* only for vma_is_dax() */
>
> -extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
> +extern vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
>  extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
>  struct vm_area_struct *vma);
> @@ -23,7 +24,7 @@ static inline void huge_pud_set_accessed(struct vm_fault 
> *vmf, pud_t orig_pud)
>  }
>  #endif
>
> -extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
> +extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
>  extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
>   unsigned long addr,
>   pmd_t *pmd,
> @@ -216,7 +217,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct 
> *vma, unsigned long addr,
>  struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long 
> addr,
> pud_t *pud, int flags);
>
> -extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
> +extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t 
> orig_pmd);
>
>  extern struct page *huge_zero_page;
>
> @@ -321,7 +322,8 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
> return NULL;
>  }
>
> -static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
> +static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf,
> +   pmd_t orig_pmd)
>  {
> return 0;
>  }
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 36fa6a2..c779b2f 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -105,7 +105,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, 
> struct vm_area_struct *vma,
>  int hugetlb_report_node_meminfo(int, char *);
>  void hugetlb_show_meminfo(void);
>  unsigned long hugetlb_total_pages(void);
> -int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
> +vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
> 

Re: Possible regression in "slab, slub: skip unnecessary kasan_cache_shutdown()"

2018-06-18 Thread Dmitry Vyukov
On Tue, Jun 19, 2018 at 6:08 AM, Jason A. Donenfeld  wrote:
> On Tue, Jun 19, 2018 at 5:59 AM Shakeel Butt  wrote:
>> Hi Jason, yes please do send me the test suite with the kernel config.
>
> $ git clone https://git.zx2c4.com/WireGuard
> $ cd WireGuard/src
> $ [[ $(gcc -v 2>&1) =~ gcc\ version\ 8\.1\.0 ]] || echo crash needs 8.1
> $ export DEBUG_KERNEL=yes
> $ export KERNEL_VERSION=4.18-rc1
> $ make test-qemu -j$(nproc)
>
> This will build a kernel and a minimal userland and load it in qemu,
> which must be installed.
>
> This code is what causes the crash:
> The self test that's executed:
> https://git.zx2c4.com/WireGuard/tree/src/selftest/ratelimiter.h
> Which exercises this code:
> https://git.zx2c4.com/WireGuard/tree/src/ratelimiter.c
>
> The problem occurs after gc_entries(NULL) frees things (line 124 in
> ratelimiter.h above), and then line 133 reallocates those objects.
> Sometime after that happens, elsewhere in the kernel invokes this
> kasan issue in the kasan cache cleanup.
>
> I realize it's disappointing that the test case here is in WireGuard,
> which isn't (yet!) upstream. That's why in my original message I
> wrote:
>
>> Rather, it looks like this
>> commit introduces a performance optimization, rather than a
>> correctness fix, so it seems that whatever test case is failing is
>> likely an incorrect failure. Does that seem like an accurate
>> possibility to you?
>
> I was hoping to only point you toward my own code after establishing
> the possibility that the bug is not my own. If you still think there's
> a chance this is due to my own correctness issue, and your commit has
> simply unearthed it, let me know and I'll happily keep debugging on my
> own before pinging you further.


Hi Jason,

Your code frees all entries before freeing the cache, right? If you
add total_entries check before freeing the cache, it does not fire,
right?
Are you using SLAB or SLUB? We stress kernel pretty heavily, but with
SLAB, and I suspect Shakeel may also be using SLAB. So if you are
using SLUB, there is significant chance that it's a bug in the SLUB
part of the change.


Re: [RFC PATCH 0/2] mtd: rawnand: support MT29F1G08ABAFAWP-ITE:F

2018-06-18 Thread Boris Brezillon
Hi Chris,

On Tue, 19 Jun 2018 01:44:24 +
Chris Packham  wrote:

> On 19/06/18 12:35, Chris Packham wrote:
> > On 19/06/18 01:15, Miquel Raynal wrote:  
> >> Hi Chris,
> >>
> >> On Mon, 18 Jun 2018 16:52:53 +1200, Chris Packham
> >>  wrote:
> >>  
> >>> Hi,
> >>>
> >>> I'm looking at adding support for the Micron MT29F1G08ABAFAWP-ITE:F chip
> >>> to one of our boards which uses the Marvell NFCv2 controller.
> >>>
> >>> This particular chip is a bit odd in that the datasheet states support
> >>> for ONFI 1.0 but the revision number field is 00 00. It also is marked
> >>> ABAFA but reports internally as ABAGA. Finally it has internal 8-bit ECC
> >>> which cannot be disabled.  
> >>
> >> Boris and I agree that in this case, the chip should not be probed if
> >> ecc->type != ON_DIE (and eventually NONE).
> >>
> >> This should be handled in the Micron driver.
> >>
> >> Also, what is the returned value of micron_supports_on_die_ecc() (with
> >> patch 1/2)?  
> > 
> > micron_supports_on_die_ecc() returns MICRON_ON_DIE_UNSUPPORTED.
> > Technically this chip should be MICRON_ON_DIE_MANDATORY since it can't
> > be disabled but that wouldn't be much help since that would still result
> > in -EINVAL. I'll dig into micron_supports_on_die_ecc() and see if I can
> > find something in the datasheet to use.
> >   
> 
> Some further debugging. Nothing (in 4.17) calls 
> set_bit(ONFI_FEATURE_ON_DIE_ECC) so I don't think 
> micron_supports_on_die_ecc() can return anything other than 
> MICRON_ON_DIE_UNSUPPORTED, unless I'm missing something for how the 
> {get,set}_feature_list is populated.

Nope you're not. Looks like we broke Micron on-die ECC in 4.17.

> 
> With the onfi.version fix and the following
> 
> --- a/drivers/mtd/nand/raw/nand_micron.c
> +++ b/drivers/mtd/nand/raw/nand_micron.c
> @@ -66,7 +66,9 @@ static int micron_nand_onfi_init(struct nand_chip *chip)
> 
>  if (p->supports_set_get_features) {
>  set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->set_feature_list);
> +   set_bit(ONFI_FEATURE_ON_DIE_ECC, p->set_feature_list);
>  set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->get_feature_list);
> +   set_bit(ONFI_FEATURE_ON_DIE_ECC, p->get_feature_list);
>  }

Can you send a patch containing only the above changes with the
Cc-stable and Fixes tags?

> @@ -240,7 +246,7 @@ static int micron_supports_on_die_ecc(struct 
> nand_chip *chip)
>   * Some Micron NANDs have an on-die ECC of 4/512, some other
> - * 8/512. We only support the former.
> + * 8/512.
>   */
> -   if (chip->ecc_strength_ds != 4)
> +   if (chip->ecc_strength_ds != 4 && chip->ecc_strength_ds != 8)
>  return MICRON_ON_DIE_UNSUPPORTED;
>

This should be done in a separate patch.
 
> I can get micron_supports_on_die_ecc() to return MICRON_ON_DIE_SUPPORTED.
> 

That's weird. You should have MICRON_ON_DIE_MANDATORY here. Could it be
that the ONFI_FEATURE_ON_DIE_ECC_EN bit does not really reflect the ECC
engine state? If that's the case, we'll have to change the way we
detect if on-die ECC is supported/mandatory/not-supported (based on the
model name stored in the ONFI param page?).

> Then I run into a problem with the marvell_nand.c which currently 
> doesn't handle NAND_ECC_ON_DIE which is easily fixed.

Yep, adding that to the new driver should be pretty easy.

> 
> But then I have the issue that I need to handle systems with either type 
> of ECC scheme ("on-die" or "hw") which I'm not sure is even possible 
> within the dts.

You mean having the same dts for both setup. Indeed, that's not
supported right now.

> 
> I'll re-base against 4.18-rc1 and send what I have so-far.

Cool. I'm particularly interested by the SET/GET_FEATURE(ECC) fix.

Thanks,

Boris


[PATCH] serial: mps2-uart: Initialize early console

2018-06-18 Thread Guenter Roeck
The early console code for mps2-uart assumes that the serial hardware is
enabled for transmit when the system boots. However, this is not the case
after reset. This results in a hang in mps2_early_putchar() if the serial
transmitter is not enabled by a boot loader or ROM monitor.

Signed-off-by: Guenter Roeck 
---
 drivers/tty/serial/mps2-uart.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/tty/serial/mps2-uart.c b/drivers/tty/serial/mps2-uart.c
index 9f8f63719126..0743a0551ce1 100644
--- a/drivers/tty/serial/mps2-uart.c
+++ b/drivers/tty/serial/mps2-uart.c
@@ -448,6 +448,14 @@ static struct console mps2_uart_console = {
 
 #define MPS2_SERIAL_CONSOLE (_uart_console)
 
+static void mps2_early_init(struct uart_port *port)
+{
+   u8 control = readb(port->membase + UARTn_CTRL);
+
+   control |= UARTn_CTRL_TX_ENABLE;
+   writeb(control, port->membase + UARTn_CTRL);
+}
+
 static void mps2_early_putchar(struct uart_port *port, int ch)
 {
while (readb(port->membase + UARTn_STATE) & UARTn_STATE_TX_FULL)
@@ -469,6 +477,7 @@ static int __init mps2_early_console_setup(struct 
earlycon_device *device,
if (!device->port.membase)
return -ENODEV;
 
+   mps2_early_init(>port);
device->con->write = mps2_early_write;
 
return 0;
-- 
2.7.4



Re: [RFC PATCH 0/2] mtd: rawnand: support MT29F1G08ABAFAWP-ITE:F

2018-06-18 Thread Boris Brezillon
Hi Chris,

On Tue, 19 Jun 2018 01:44:24 +
Chris Packham  wrote:

> On 19/06/18 12:35, Chris Packham wrote:
> > On 19/06/18 01:15, Miquel Raynal wrote:  
> >> Hi Chris,
> >>
> >> On Mon, 18 Jun 2018 16:52:53 +1200, Chris Packham
> >>  wrote:
> >>  
> >>> Hi,
> >>>
> >>> I'm looking at adding support for the Micron MT29F1G08ABAFAWP-ITE:F chip
> >>> to one of our boards which uses the Marvell NFCv2 controller.
> >>>
> >>> This particular chip is a bit odd in that the datasheet states support
> >>> for ONFI 1.0 but the revision number field is 00 00. It also is marked
> >>> ABAFA but reports internally as ABAGA. Finally it has internal 8-bit ECC
> >>> which cannot be disabled.  
> >>
> >> Boris and I agree that in this case, the chip should not be probed if
> >> ecc->type != ON_DIE (and eventually NONE).
> >>
> >> This should be handled in the Micron driver.
> >>
> >> Also, what is the returned value of micron_supports_on_die_ecc() (with
> >> patch 1/2)?  
> > 
> > micron_supports_on_die_ecc() returns MICRON_ON_DIE_UNSUPPORTED.
> > Technically this chip should be MICRON_ON_DIE_MANDATORY since it can't
> > be disabled but that wouldn't be much help since that would still result
> > in -EINVAL. I'll dig into micron_supports_on_die_ecc() and see if I can
> > find something in the datasheet to use.
> >   
> 
> Some further debugging. Nothing (in 4.17) calls 
> set_bit(ONFI_FEATURE_ON_DIE_ECC) so I don't think 
> micron_supports_on_die_ecc() can return anything other than 
> MICRON_ON_DIE_UNSUPPORTED, unless I'm missing something for how the 
> {get,set}_feature_list is populated.

Nope you're not. Looks like we broke Micron on-die ECC in 4.17.

> 
> With the onfi.version fix and the following
> 
> --- a/drivers/mtd/nand/raw/nand_micron.c
> +++ b/drivers/mtd/nand/raw/nand_micron.c
> @@ -66,7 +66,9 @@ static int micron_nand_onfi_init(struct nand_chip *chip)
> 
>  if (p->supports_set_get_features) {
>  set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->set_feature_list);
> +   set_bit(ONFI_FEATURE_ON_DIE_ECC, p->set_feature_list);
>  set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->get_feature_list);
> +   set_bit(ONFI_FEATURE_ON_DIE_ECC, p->get_feature_list);
>  }

Can you send a patch containing only the above changes with the
Cc-stable and Fixes tags?

> @@ -240,7 +246,7 @@ static int micron_supports_on_die_ecc(struct 
> nand_chip *chip)
>   * Some Micron NANDs have an on-die ECC of 4/512, some other
> - * 8/512. We only support the former.
> + * 8/512.
>   */
> -   if (chip->ecc_strength_ds != 4)
> +   if (chip->ecc_strength_ds != 4 && chip->ecc_strength_ds != 8)
>  return MICRON_ON_DIE_UNSUPPORTED;
>

This should be done in a separate patch.
 
> I can get micron_supports_on_die_ecc() to return MICRON_ON_DIE_SUPPORTED.
> 

That's weird. You should have MICRON_ON_DIE_MANDATORY here. Could it be
that the ONFI_FEATURE_ON_DIE_ECC_EN bit does not really reflect the ECC
engine state? If that's the case, we'll have to change the way we
detect if on-die ECC is supported/mandatory/not-supported (based on the
model name stored in the ONFI param page?).

> Then I run into a problem with the marvell_nand.c which currently 
> doesn't handle NAND_ECC_ON_DIE which is easily fixed.

Yep, adding that to the new driver should be pretty easy.

> 
> But then I have the issue that I need to handle systems with either type 
> of ECC scheme ("on-die" or "hw") which I'm not sure is even possible 
> within the dts.

You mean having the same dts for both setup. Indeed, that's not
supported right now.

> 
> I'll re-base against 4.18-rc1 and send what I have so-far.

Cool. I'm particularly interested by the SET/GET_FEATURE(ECC) fix.

Thanks,

Boris


[PATCH] serial: mps2-uart: Initialize early console

2018-06-18 Thread Guenter Roeck
The early console code for mps2-uart assumes that the serial hardware is
enabled for transmit when the system boots. However, this is not the case
after reset. This results in a hang in mps2_early_putchar() if the serial
transmitter is not enabled by a boot loader or ROM monitor.

Signed-off-by: Guenter Roeck 
---
 drivers/tty/serial/mps2-uart.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/tty/serial/mps2-uart.c b/drivers/tty/serial/mps2-uart.c
index 9f8f63719126..0743a0551ce1 100644
--- a/drivers/tty/serial/mps2-uart.c
+++ b/drivers/tty/serial/mps2-uart.c
@@ -448,6 +448,14 @@ static struct console mps2_uart_console = {
 
 #define MPS2_SERIAL_CONSOLE (_uart_console)
 
+static void mps2_early_init(struct uart_port *port)
+{
+   u8 control = readb(port->membase + UARTn_CTRL);
+
+   control |= UARTn_CTRL_TX_ENABLE;
+   writeb(control, port->membase + UARTn_CTRL);
+}
+
 static void mps2_early_putchar(struct uart_port *port, int ch)
 {
while (readb(port->membase + UARTn_STATE) & UARTn_STATE_TX_FULL)
@@ -469,6 +477,7 @@ static int __init mps2_early_console_setup(struct 
earlycon_device *device,
if (!device->port.membase)
return -ENODEV;
 
+   mps2_early_init(>port);
device->con->write = mps2_early_write;
 
return 0;
-- 
2.7.4



linux-next: manual merge of the userns tree with the vfs tree

2018-06-18 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the userns tree got conflicts in:

  fs/proc/inode.c
  fs/proc/root.c

between commits:

  0223e0999be2 ("procfs: Move proc_fill_super() to fs/proc/root.c")
  83cd45075c36 ("proc: Add fs_context support to procfs")

from the vfs tree and commit:

  cc8cda3af2ba ("proc: Simplify and fix proc by removing the kernel mount")

from the userns tree.

I don't know how to fix this up, so I just dropped the userns tree for
today (since it only contained that one commit).

-- 
Cheers,
Stephen Rothwell


pgpmfPoU8dNZR.pgp
Description: OpenPGP digital signature


linux-next: manual merge of the userns tree with the vfs tree

2018-06-18 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the userns tree got conflicts in:

  fs/proc/inode.c
  fs/proc/root.c

between commits:

  0223e0999be2 ("procfs: Move proc_fill_super() to fs/proc/root.c")
  83cd45075c36 ("proc: Add fs_context support to procfs")

from the vfs tree and commit:

  cc8cda3af2ba ("proc: Simplify and fix proc by removing the kernel mount")

from the userns tree.

I don't know how to fix this up, so I just dropped the userns tree for
today (since it only contained that one commit).

-- 
Cheers,
Stephen Rothwell


pgpmfPoU8dNZR.pgp
Description: OpenPGP digital signature


Re: [Letux-kernel] BUG: drivers/pinctrl/core: races in pinctrl_groups and deferred probing

2018-06-18 Thread H. Nikolaus Schaller
Hi Tony,

> Am 19.06.2018 um 06:34 schrieb Tony Lindgren :
> 
> * H. Nikolaus Schaller  [180618 18:33]:
 So code just needs group cleanup on failed probing and fixing the mutex 
 around pinctrl_generic_add_group().
 
 I think we need the mutex because a race still can happen when 
 create_pinctrl() is calling pcs_dt_node_to_map()
 and pinctrl_generic_add_group() w/o being locked on pinctrl_maps_mutex.
 
 The race I suspect is that two drivers are trying to insert the same name 
 and may come
 both to the conclusion that it does not yet exist. And both insert into 
 the radix tree.
 
 The window of risk is small though... It is in pinctrl_generic_add_group() 
 between calling
 pinctrl_generic_group_name_to_selector() and radix_tree_insert() so we 
 probably won't
 see it in real hardware tests.
>>> 
>>> Hmm but that race should be already fixed with mutex held
>>> by the pin controller drivers with these fixes? Or am I
>>> missing something still?
>> 
>> Hm. Maybe we refer to a different mutex?
> 
> Yes I think that's the case, you're talking about a different
> mutex here :)
> 
>> I had seen the call sequence
>> 
>> create_pinctrl()-> pinctrl_dt_to_map() -> pcs_dt_node_to_map() -> 
>> pinctrl_generic_add_group()
>> 
>> w/o any lock inside.
>> 
>> There is a mutex_lock(_maps_mutex); in create_pinctrl(), but locked 
>> after that.
>> 
>> Or is there a lock outside of create_pinctrl()?
>> 
>> If I look into the stack dumps, call nesting is
>> 
>> driver_probe_device() -> pinctrl_bind_pins() -> devm_pinctrl_get() -> 
>> create_pinctrl()
>> 
>> They all do no locking.
>> 
>> Maybe I am missing something.
> 
> Can you please post a patch for that as you already have it
> debugged? That's easier to understand than reading a verbal
> patch :)

I have no patch for it and all tests were without, but I can suggest a change 
which IMHO
could solve it:

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index eb2b217f5e1e..7d125f9a7804 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -1037,15 +1037,16 @@ static struct pinctrl *create_pinctrl(struct device 
*dev,
INIT_LIST_HEAD(>states);
INIT_LIST_HEAD(>dt_maps);
 
+   mutex_lock(_maps_mutex);
ret = pinctrl_dt_to_map(p, pctldev);
if (ret < 0) {
kfree(p);
+   mutex_unlock(_maps_mutex);
return ERR_PTR(ret);
}
 
devname = dev_name(dev);
 
-   mutex_lock(_maps_mutex);
/* Iterate over the pin control maps to locate the right ones */
for_each_maps(maps_node, i, map) {
/* Map must be for this device */

Description: we should also protect pinctrl_dt_to_map(), which calls 
pinctrl_generic_add_group()
and the calls inside pinctrl_generic_add_group() to 
pinctrl_generic_group_name_to_selector()
and radix_tree_insert() with a mutex.

But I haven't tested this case (because it is unlikely to happen and be 
testable).

BR,
Nikolaus



Re: [PATCH v2 4/4] vt: coherence validation code for the unicode screen buffer

2018-06-18 Thread Joe Perches
On Mon, 2018-06-18 at 21:50 -0400, Nicolas Pitre wrote:
> On Tue, 19 Jun 2018, Andy Shevchenko wrote:
[]
> > > +   /*
> > > +* Make sure our unicode screen translates into the same glyphs
> > > +* as the actual screen. This is brutal indeed.
> > > +*/
> > > +   p = (unsigned short *)vc->vc_origin;
> > > +   mask = vc->vc_hi_font_mask | 0xff;
> > > +   for (y = 0; y < vc->vc_rows; y++) {
> > > +   char32_t *line = uniscr->lines[y];
> > > +   for (x = 0; x < vc->vc_cols; x++) {
> > > +   u16 glyph = scr_readw(p++) & mask;
> > > +   char32_t uc = line[x];
> > > +   int tc = conv_uni_to_pc(vc, uc);
> > > +   if (tc == -4)
> > > +   tc = conv_uni_to_pc(vc, 0xfffd);
> > > +   if (tc == -4)
> > > +   tc = conv_uni_to_pc(vc, '?');
> > > +   if (tc != glyph)
> > > +   pr_notice("%s: mismatch at %d,%d: "
> > > + "glyph=%#x tc=%#x\n", __func__,
> > > + x, y, glyph, tc);
> > 
> > Don't split format string in printk(). checkpatch will not warn on longer 
> > lines.
> 
> I didn't do it like that for checkpatch but to keep the code readable.
> I don't particularly care either ways though.

If one glyph is off, then perhaps others are off too.
Perhaps this message should be ratelimited.



Re: [Letux-kernel] BUG: drivers/pinctrl/core: races in pinctrl_groups and deferred probing

2018-06-18 Thread H. Nikolaus Schaller
Hi Tony,

> Am 19.06.2018 um 06:34 schrieb Tony Lindgren :
> 
> * H. Nikolaus Schaller  [180618 18:33]:
 So code just needs group cleanup on failed probing and fixing the mutex 
 around pinctrl_generic_add_group().
 
 I think we need the mutex because a race still can happen when 
 create_pinctrl() is calling pcs_dt_node_to_map()
 and pinctrl_generic_add_group() w/o being locked on pinctrl_maps_mutex.
 
 The race I suspect is that two drivers are trying to insert the same name 
 and may come
 both to the conclusion that it does not yet exist. And both insert into 
 the radix tree.
 
 The window of risk is small though... It is in pinctrl_generic_add_group() 
 between calling
 pinctrl_generic_group_name_to_selector() and radix_tree_insert() so we 
 probably won't
 see it in real hardware tests.
>>> 
>>> Hmm but that race should be already fixed with mutex held
>>> by the pin controller drivers with these fixes? Or am I
>>> missing something still?
>> 
>> Hm. Maybe we refer to a different mutex?
> 
> Yes I think that's the case, you're talking about a different
> mutex here :)
> 
>> I had seen the call sequence
>> 
>> create_pinctrl()-> pinctrl_dt_to_map() -> pcs_dt_node_to_map() -> 
>> pinctrl_generic_add_group()
>> 
>> w/o any lock inside.
>> 
>> There is a mutex_lock(_maps_mutex); in create_pinctrl(), but locked 
>> after that.
>> 
>> Or is there a lock outside of create_pinctrl()?
>> 
>> If I look into the stack dumps, call nesting is
>> 
>> driver_probe_device() -> pinctrl_bind_pins() -> devm_pinctrl_get() -> 
>> create_pinctrl()
>> 
>> They all do no locking.
>> 
>> Maybe I am missing something.
> 
> Can you please post a patch for that as you already have it
> debugged? That's easier to understand than reading a verbal
> patch :)

I have no patch for it and all tests were without, but I can suggest a change 
which IMHO
could solve it:

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index eb2b217f5e1e..7d125f9a7804 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -1037,15 +1037,16 @@ static struct pinctrl *create_pinctrl(struct device 
*dev,
INIT_LIST_HEAD(>states);
INIT_LIST_HEAD(>dt_maps);
 
+   mutex_lock(_maps_mutex);
ret = pinctrl_dt_to_map(p, pctldev);
if (ret < 0) {
kfree(p);
+   mutex_unlock(_maps_mutex);
return ERR_PTR(ret);
}
 
devname = dev_name(dev);
 
-   mutex_lock(_maps_mutex);
/* Iterate over the pin control maps to locate the right ones */
for_each_maps(maps_node, i, map) {
/* Map must be for this device */

Description: we should also protect pinctrl_dt_to_map(), which calls 
pinctrl_generic_add_group()
and the calls inside pinctrl_generic_add_group() to 
pinctrl_generic_group_name_to_selector()
and radix_tree_insert() with a mutex.

But I haven't tested this case (because it is unlikely to happen and be 
testable).

BR,
Nikolaus



Re: [PATCH v2 4/4] vt: coherence validation code for the unicode screen buffer

2018-06-18 Thread Joe Perches
On Mon, 2018-06-18 at 21:50 -0400, Nicolas Pitre wrote:
> On Tue, 19 Jun 2018, Andy Shevchenko wrote:
[]
> > > +   /*
> > > +* Make sure our unicode screen translates into the same glyphs
> > > +* as the actual screen. This is brutal indeed.
> > > +*/
> > > +   p = (unsigned short *)vc->vc_origin;
> > > +   mask = vc->vc_hi_font_mask | 0xff;
> > > +   for (y = 0; y < vc->vc_rows; y++) {
> > > +   char32_t *line = uniscr->lines[y];
> > > +   for (x = 0; x < vc->vc_cols; x++) {
> > > +   u16 glyph = scr_readw(p++) & mask;
> > > +   char32_t uc = line[x];
> > > +   int tc = conv_uni_to_pc(vc, uc);
> > > +   if (tc == -4)
> > > +   tc = conv_uni_to_pc(vc, 0xfffd);
> > > +   if (tc == -4)
> > > +   tc = conv_uni_to_pc(vc, '?');
> > > +   if (tc != glyph)
> > > +   pr_notice("%s: mismatch at %d,%d: "
> > > + "glyph=%#x tc=%#x\n", __func__,
> > > + x, y, glyph, tc);
> > 
> > Don't split format string in printk(). checkpatch will not warn on longer 
> > lines.
> 
> I didn't do it like that for checkpatch but to keep the code readable.
> I don't particularly care either ways though.

If one glyph is off, then perhaps others are off too.
Perhaps this message should be ratelimited.



Re: [PATCH] x86/microcode/intel: Ensure new microcode processor flags match with cpu's pf

2018-06-18 Thread Zhenzhong Duan

On 2018/6/19 3:56, Borislav Petkov wrote:

On Mon, Jun 04, 2018 at 08:16:51AM +, Zhenzhong Duan wrote:

Intel spec says: 'The processor flags in the 48-byte header and the
processor flags field associated with the extended processor signature
structures may have multiple bits set.'

Make sure processor flags of the new microcode intersect with current
cpu's. Comparing with old microcode's pf can't guarantee this.

Signed-off-by: Zhenzhong Duan 
---
  arch/x86/kernel/cpu/microcode/intel.c |8 +++-
  1 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c 
b/arch/x86/kernel/cpu/microcode/intel.c
index 461e315..54f4014 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -371,12 +371,10 @@ static int microcode_sanity_check(void *mc, int print_err)
goto next;
  
  		} else {

-   struct microcode_header_intel *phdr = >hdr;
-
if (!has_newer_microcode(data,
-phdr->sig,
-phdr->pf,
-phdr->rev))
+uci->cpu_sig.sig,
+uci->cpu_sig.pf,
+patch->hdr.rev))
goto next;
}
  
--


So I'm scratching my head over this and have no clue what you're trying
to achieve. Is this a fix for a bug you're seeing or what? You'd need to
be a lot more verbose when explaining what this patch is trying to do...
Imagine kernel already found a microcode blob A with extended sig/pf 
matching current cpu, then another microcode B is checked which doesn't 
match current cpu but matches the sig/pf of microcode A, then microcode 
B will replaced A, but it's not suitable for current cpu.


I didn't see same issue in our system. When fixing another bug and 
reading upstream microcode code, I found this potential issue, feel free 
to correct me if it's never possible in reality.


Thanks
Zhenzhong


Re: [PATCH] x86/microcode/intel: Ensure new microcode processor flags match with cpu's pf

2018-06-18 Thread Zhenzhong Duan

On 2018/6/19 3:56, Borislav Petkov wrote:

On Mon, Jun 04, 2018 at 08:16:51AM +, Zhenzhong Duan wrote:

Intel spec says: 'The processor flags in the 48-byte header and the
processor flags field associated with the extended processor signature
structures may have multiple bits set.'

Make sure processor flags of the new microcode intersect with current
cpu's. Comparing with old microcode's pf can't guarantee this.

Signed-off-by: Zhenzhong Duan 
---
  arch/x86/kernel/cpu/microcode/intel.c |8 +++-
  1 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c 
b/arch/x86/kernel/cpu/microcode/intel.c
index 461e315..54f4014 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -371,12 +371,10 @@ static int microcode_sanity_check(void *mc, int print_err)
goto next;
  
  		} else {

-   struct microcode_header_intel *phdr = >hdr;
-
if (!has_newer_microcode(data,
-phdr->sig,
-phdr->pf,
-phdr->rev))
+uci->cpu_sig.sig,
+uci->cpu_sig.pf,
+patch->hdr.rev))
goto next;
}
  
--


So I'm scratching my head over this and have no clue what you're trying
to achieve. Is this a fix for a bug you're seeing or what? You'd need to
be a lot more verbose when explaining what this patch is trying to do...
Imagine kernel already found a microcode blob A with extended sig/pf 
matching current cpu, then another microcode B is checked which doesn't 
match current cpu but matches the sig/pf of microcode A, then microcode 
B will replaced A, but it's not suitable for current cpu.


I didn't see same issue in our system. When fixing another bug and 
reading upstream microcode code, I found this potential issue, feel free 
to correct me if it's never possible in reality.


Thanks
Zhenzhong


Re: [RFC PATCH 0/5] kbuild: build modules from code in multiple directories.

2018-06-18 Thread Christoph Hellwig
On Tue, Jun 19, 2018 at 02:05:23PM +1000, NeilBrown wrote:
> From: NeilBrown 
> Date: Tue, 19 Jun 2018 13:59:16 +1000
> Subject: [PATCH] kbuild/xfs: example modobj-m conversion
> 
> This is a demonstration patch to show how
> xfs can be changed to make use of the proposed modobj-m=
> functionality, should the xfs developers want that.

Well, IFF we go with this new functionality I think everyone should
be using it instead of the current hacks.

So text like the above should be in the series cover letter, and this
patch should have an actual description..

I see no argument against these changes, but I've also added the XFS
list.

> 
> Signed-off-by: NeilBrown 
> ---
>  fs/xfs/Makefile| 78 
> ++
>  fs/xfs/libxfs/Makefile | 43 
>  fs/xfs/scrub/Makefile  | 29 +++
>  3 files changed, 74 insertions(+), 76 deletions(-)
>  create mode 100644 fs/xfs/libxfs/Makefile
>  create mode 100644 fs/xfs/scrub/Makefile
> 
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 2f3f75a7f180..0ba854045fe9 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -15,47 +15,7 @@ obj-$(CONFIG_XFS_FS)   += xfs.o
>  xfs-y+= xfs_trace.o
>  
>  # build the libxfs code first
> -xfs-y+= $(addprefix libxfs/, \
> -xfs_ag.o \
> -xfs_alloc.o \
> -xfs_alloc_btree.o \
> -xfs_attr.o \
> -xfs_attr_leaf.o \
> -xfs_attr_remote.o \
> -xfs_bit.o \
> -xfs_bmap.o \
> -xfs_bmap_btree.o \
> -xfs_btree.o \
> -xfs_da_btree.o \
> -xfs_da_format.o \
> -xfs_defer.o \
> -xfs_dir2.o \
> -xfs_dir2_block.o \
> -xfs_dir2_data.o \
> -xfs_dir2_leaf.o \
> -xfs_dir2_node.o \
> -xfs_dir2_sf.o \
> -xfs_dquot_buf.o \
> -xfs_ialloc.o \
> -xfs_ialloc_btree.o \
> -xfs_iext_tree.o \
> -xfs_inode_fork.o \
> -xfs_inode_buf.o \
> -xfs_log_rlimit.o \
> -xfs_ag_resv.o \
> -xfs_rmap.o \
> -xfs_rmap_btree.o \
> -xfs_refcount.o \
> -xfs_refcount_btree.o \
> -xfs_sb.o \
> -xfs_symlink_remote.o \
> -xfs_trans_resv.o \
> -xfs_types.o \
> -)
> -# xfs_rtbitmap is shared with libxfs
> -xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \
> -xfs_rtbitmap.o \
> -)
> +xfs-y+= libxfs/
>  
>  # highlevel code
>  xfs-y+= xfs_aops.o \
> @@ -127,38 +87,4 @@ xfs-$(CONFIG_SYSCTL)  += xfs_sysctl.o
>  xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
>  xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o
>  
> -# online scrub/repair
> -ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
> -
> -# Tracepoints like to blow up, so build that before everything else
> -
> -xfs-y+= $(addprefix scrub/, \
> -trace.o \
> -agheader.o \
> -alloc.o \
> -attr.o \
> -bmap.o \
> -btree.o \
> -common.o \
> -dabtree.o \
> -dir.o \
> -ialloc.o \
> -inode.o \
> -parent.o \
> -refcount.o \
> -rmap.o \
> -scrub.o \
> -symlink.o \
> -)
> -
> -xfs-$(CONFIG_XFS_RT) += scrub/rtbitmap.o
> -xfs-$(CONFIG_XFS_QUOTA)  += scrub/quota.o
> -
> -# online repair
> -ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
> -xfs-y+= $(addprefix scrub/, \
> -agheader_repair.o \
> - 

Re: [RFC PATCH 0/5] kbuild: build modules from code in multiple directories.

2018-06-18 Thread Christoph Hellwig
On Tue, Jun 19, 2018 at 02:05:23PM +1000, NeilBrown wrote:
> From: NeilBrown 
> Date: Tue, 19 Jun 2018 13:59:16 +1000
> Subject: [PATCH] kbuild/xfs: example modobj-m conversion
> 
> This is a demonstration patch to show how
> xfs can be changed to make use of the proposed modobj-m=
> functionality, should the xfs developers want that.

Well, IFF we go with this new functionality I think everyone should
be using it instead of the current hacks.

So text like the above should be in the series cover letter, and this
patch should have an actual description..

I see no argument against these changes, but I've also added the XFS
list.

> 
> Signed-off-by: NeilBrown 
> ---
>  fs/xfs/Makefile| 78 
> ++
>  fs/xfs/libxfs/Makefile | 43 
>  fs/xfs/scrub/Makefile  | 29 +++
>  3 files changed, 74 insertions(+), 76 deletions(-)
>  create mode 100644 fs/xfs/libxfs/Makefile
>  create mode 100644 fs/xfs/scrub/Makefile
> 
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 2f3f75a7f180..0ba854045fe9 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -15,47 +15,7 @@ obj-$(CONFIG_XFS_FS)   += xfs.o
>  xfs-y+= xfs_trace.o
>  
>  # build the libxfs code first
> -xfs-y+= $(addprefix libxfs/, \
> -xfs_ag.o \
> -xfs_alloc.o \
> -xfs_alloc_btree.o \
> -xfs_attr.o \
> -xfs_attr_leaf.o \
> -xfs_attr_remote.o \
> -xfs_bit.o \
> -xfs_bmap.o \
> -xfs_bmap_btree.o \
> -xfs_btree.o \
> -xfs_da_btree.o \
> -xfs_da_format.o \
> -xfs_defer.o \
> -xfs_dir2.o \
> -xfs_dir2_block.o \
> -xfs_dir2_data.o \
> -xfs_dir2_leaf.o \
> -xfs_dir2_node.o \
> -xfs_dir2_sf.o \
> -xfs_dquot_buf.o \
> -xfs_ialloc.o \
> -xfs_ialloc_btree.o \
> -xfs_iext_tree.o \
> -xfs_inode_fork.o \
> -xfs_inode_buf.o \
> -xfs_log_rlimit.o \
> -xfs_ag_resv.o \
> -xfs_rmap.o \
> -xfs_rmap_btree.o \
> -xfs_refcount.o \
> -xfs_refcount_btree.o \
> -xfs_sb.o \
> -xfs_symlink_remote.o \
> -xfs_trans_resv.o \
> -xfs_types.o \
> -)
> -# xfs_rtbitmap is shared with libxfs
> -xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \
> -xfs_rtbitmap.o \
> -)
> +xfs-y+= libxfs/
>  
>  # highlevel code
>  xfs-y+= xfs_aops.o \
> @@ -127,38 +87,4 @@ xfs-$(CONFIG_SYSCTL)  += xfs_sysctl.o
>  xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
>  xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o
>  
> -# online scrub/repair
> -ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
> -
> -# Tracepoints like to blow up, so build that before everything else
> -
> -xfs-y+= $(addprefix scrub/, \
> -trace.o \
> -agheader.o \
> -alloc.o \
> -attr.o \
> -bmap.o \
> -btree.o \
> -common.o \
> -dabtree.o \
> -dir.o \
> -ialloc.o \
> -inode.o \
> -parent.o \
> -refcount.o \
> -rmap.o \
> -scrub.o \
> -symlink.o \
> -)
> -
> -xfs-$(CONFIG_XFS_RT) += scrub/rtbitmap.o
> -xfs-$(CONFIG_XFS_QUOTA)  += scrub/quota.o
> -
> -# online repair
> -ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
> -xfs-y+= $(addprefix scrub/, \
> -agheader_repair.o \
> - 

Re: [PATCH 2/2] sched/fair: Advance global expiration when period timer is restarted

2018-06-18 Thread Cong Wang
On Mon, Jun 18, 2018 at 2:16 AM, Xunlei Pang  wrote:
> I noticed the group frequently got throttled even it consumed
> low cpu usage, this caused some jitters on the response time
> to some of our business containers enabling cpu quota.
>
> It's very easy to reproduce:
> mkdir /sys/fs/cgroup/cpu/test
> cd /sys/fs/cgroup/cpu/test
> echo 10 > cpu.cfs_quota_us
> echo $$ > tasks
> then repeat:
> cat cpu.stat |grep nr_throttled  // nr_throttled will increase
>
> After some analysis, we found that cfs_rq::runtime_remaining will
> be cleared by expire_cfs_rq_runtime() due to two equal but stale
> "cfs_{b|q}->runtime_expires" after period timer is re-armed.
>
> The global expiration should be advanced accordingly when the
> bandwidth period timer is restarted.
>

I observed the same problem and already sent some patches:

https://lkml.org/lkml/2018/5/22/37
https://lkml.org/lkml/2018/5/22/38
https://lkml.org/lkml/2018/5/22/35


Re: [PATCH 2/2] sched/fair: Advance global expiration when period timer is restarted

2018-06-18 Thread Cong Wang
On Mon, Jun 18, 2018 at 2:16 AM, Xunlei Pang  wrote:
> I noticed the group frequently got throttled even it consumed
> low cpu usage, this caused some jitters on the response time
> to some of our business containers enabling cpu quota.
>
> It's very easy to reproduce:
> mkdir /sys/fs/cgroup/cpu/test
> cd /sys/fs/cgroup/cpu/test
> echo 10 > cpu.cfs_quota_us
> echo $$ > tasks
> then repeat:
> cat cpu.stat |grep nr_throttled  // nr_throttled will increase
>
> After some analysis, we found that cfs_rq::runtime_remaining will
> be cleared by expire_cfs_rq_runtime() due to two equal but stale
> "cfs_{b|q}->runtime_expires" after period timer is re-armed.
>
> The global expiration should be advanced accordingly when the
> bandwidth period timer is restarted.
>

I observed the same problem and already sent some patches:

https://lkml.org/lkml/2018/5/22/37
https://lkml.org/lkml/2018/5/22/38
https://lkml.org/lkml/2018/5/22/35


Re: [PATCH 3/3] rpmsg: smd: fix kerneldoc warnings

2018-06-18 Thread Vinod
On 18-06-18, 13:33, Srinivas Kandagatla wrote:
> This patch fixes below kerneldoc warnings
> 
> qcom_smd.c:141: warning: Function parameter or member 'dev' not described in 
> 'qcom_smd_edge'
> qcom_smd.c:141: warning: Function parameter or member 'name' not described in 
> 'qcom_smd_edge'
> qcom_smd.c:141: warning: Function parameter or member 'new_channel_event' not 
> described in 'qcom_smd_edge'
> qcom_smd.c:222: warning: Function parameter or member 'qsept' not described 
> in 'qcom_smd_channel'
> qcom_smd.c:222: warning: Function parameter or member 'registered' not 
> described in 'qcom_smd_channel'
> qcom_smd.c:222: warning: Function parameter or member 'state_change_event' 
> not described in 'qcom_smd_channel'
> qcom_smd.c:222: warning: Function parameter or member 'drvdata' not described 
> in 'qcom_smd_channel'
> qcom_smd.c:737: warning: Function parameter or member 'wait' not described in 
> '__qcom_smd_send'
> 
> Signed-off-by: Srinivas Kandagatla 
> 
> qcom_smd.c

this one looks out of place to me :)

> ---
>  drivers/rpmsg/qcom_smd.c | 9 -
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c
> index 6437bbeebc91..8c6a142050f8 100644
> --- a/drivers/rpmsg/qcom_smd.c
> +++ b/drivers/rpmsg/qcom_smd.c
> @@ -93,6 +93,8 @@ static const struct {
>  
>  /**
>   * struct qcom_smd_edge - representing a remote processor
> + * @dev: device associated with this edge
> + * @name:name of this edge
>   * @of_node: of_node handle for information related to this edge
>   * @edge_id: identifier of this edge
>   * @remote_pid:  identifier of remote processor
> @@ -106,6 +108,7 @@ static const struct {
>   * @channels_lock:   guard for modifications of @channels
>   * @allocated:   array of bitmaps representing already allocated 
> channels
>   * @smem_available:  last available amount of smem triggering a channel scan
> + * @new_channel_event:   wait queue for new channel events
>   * @scan_work:   work item for discovering new channels
>   * @state_work:  work item for edge state changes
>   */
> @@ -172,10 +175,12 @@ struct qcom_smd_endpoint {
>  /**
>   * struct qcom_smd_channel - smd channel struct
>   * @edge:qcom_smd_edge this channel is living on
> - * @qsdev:   reference to a associated smd client device
> + * @qsept:   reference to a associated smd endpoint
> + * @registered:  flag to indicate if the channel is registered
>   * @name:name of the channel
>   * @state:   local state of the channel
>   * @remote_state:remote state of the channel
> + * @state_change_event:  state change event
>   * @info:byte aligned outgoing/incoming channel info
>   * @info_word:   word aligned outgoing/incoming channel info
>   * @tx_lock: lock to make writes to the channel mutually exclusive
> @@ -187,6 +192,7 @@ struct qcom_smd_endpoint {
>   * @cb:  callback function registered for this channel
>   * @recv_lock:   guard for rx info modifications and cb pointer
>   * @pkt_size:size of the currently handled packet
> + * @drvdata: driver private data
>   * @list:lite entry for @channels in qcom_smd_edge
>   */
>  struct qcom_smd_channel {
> @@ -726,6 +732,7 @@ static int qcom_smd_write_fifo(struct qcom_smd_channel 
> *channel,
>   * @channel: channel handle
>   * @data:buffer of data to write
>   * @len: number of bytes to write
> + * @wait:flag to indicate if write has ca wait
>   *
>   * This is a blocking write of len bytes into the channel's tx ring buffer 
> and
>   * signal the remote end. It will sleep until there is enough space available
> -- 
> 2.16.2
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
~Vinod


Re: [PATCH 3/3] rpmsg: smd: fix kerneldoc warnings

2018-06-18 Thread Vinod
On 18-06-18, 13:33, Srinivas Kandagatla wrote:
> This patch fixes below kerneldoc warnings
> 
> qcom_smd.c:141: warning: Function parameter or member 'dev' not described in 
> 'qcom_smd_edge'
> qcom_smd.c:141: warning: Function parameter or member 'name' not described in 
> 'qcom_smd_edge'
> qcom_smd.c:141: warning: Function parameter or member 'new_channel_event' not 
> described in 'qcom_smd_edge'
> qcom_smd.c:222: warning: Function parameter or member 'qsept' not described 
> in 'qcom_smd_channel'
> qcom_smd.c:222: warning: Function parameter or member 'registered' not 
> described in 'qcom_smd_channel'
> qcom_smd.c:222: warning: Function parameter or member 'state_change_event' 
> not described in 'qcom_smd_channel'
> qcom_smd.c:222: warning: Function parameter or member 'drvdata' not described 
> in 'qcom_smd_channel'
> qcom_smd.c:737: warning: Function parameter or member 'wait' not described in 
> '__qcom_smd_send'
> 
> Signed-off-by: Srinivas Kandagatla 
> 
> qcom_smd.c

this one looks out of place to me :)

> ---
>  drivers/rpmsg/qcom_smd.c | 9 -
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c
> index 6437bbeebc91..8c6a142050f8 100644
> --- a/drivers/rpmsg/qcom_smd.c
> +++ b/drivers/rpmsg/qcom_smd.c
> @@ -93,6 +93,8 @@ static const struct {
>  
>  /**
>   * struct qcom_smd_edge - representing a remote processor
> + * @dev: device associated with this edge
> + * @name:name of this edge
>   * @of_node: of_node handle for information related to this edge
>   * @edge_id: identifier of this edge
>   * @remote_pid:  identifier of remote processor
> @@ -106,6 +108,7 @@ static const struct {
>   * @channels_lock:   guard for modifications of @channels
>   * @allocated:   array of bitmaps representing already allocated 
> channels
>   * @smem_available:  last available amount of smem triggering a channel scan
> + * @new_channel_event:   wait queue for new channel events
>   * @scan_work:   work item for discovering new channels
>   * @state_work:  work item for edge state changes
>   */
> @@ -172,10 +175,12 @@ struct qcom_smd_endpoint {
>  /**
>   * struct qcom_smd_channel - smd channel struct
>   * @edge:qcom_smd_edge this channel is living on
> - * @qsdev:   reference to a associated smd client device
> + * @qsept:   reference to a associated smd endpoint
> + * @registered:  flag to indicate if the channel is registered
>   * @name:name of the channel
>   * @state:   local state of the channel
>   * @remote_state:remote state of the channel
> + * @state_change_event:  state change event
>   * @info:byte aligned outgoing/incoming channel info
>   * @info_word:   word aligned outgoing/incoming channel info
>   * @tx_lock: lock to make writes to the channel mutually exclusive
> @@ -187,6 +192,7 @@ struct qcom_smd_endpoint {
>   * @cb:  callback function registered for this channel
>   * @recv_lock:   guard for rx info modifications and cb pointer
>   * @pkt_size:size of the currently handled packet
> + * @drvdata: driver private data
>   * @list:lite entry for @channels in qcom_smd_edge
>   */
>  struct qcom_smd_channel {
> @@ -726,6 +732,7 @@ static int qcom_smd_write_fifo(struct qcom_smd_channel 
> *channel,
>   * @channel: channel handle
>   * @data:buffer of data to write
>   * @len: number of bytes to write
> + * @wait:flag to indicate if write has ca wait
>   *
>   * This is a blocking write of len bytes into the channel's tx ring buffer 
> and
>   * signal the remote end. It will sleep until there is enough space available
> -- 
> 2.16.2
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
~Vinod


Re: [Letux-kernel] BUG: drivers/pinctrl/core: races in pinctrl_groups and deferred probing

2018-06-18 Thread Tony Lindgren
* H. Nikolaus Schaller  [180618 18:33]:
> >> So code just needs group cleanup on failed probing and fixing the mutex 
> >> around pinctrl_generic_add_group().
> >> 
> >> I think we need the mutex because a race still can happen when 
> >> create_pinctrl() is calling pcs_dt_node_to_map()
> >> and pinctrl_generic_add_group() w/o being locked on pinctrl_maps_mutex.
> >> 
> >> The race I suspect is that two drivers are trying to insert the same name 
> >> and may come
> >> both to the conclusion that it does not yet exist. And both insert into 
> >> the radix tree.
> >> 
> >> The window of risk is small though... It is in pinctrl_generic_add_group() 
> >> between calling
> >> pinctrl_generic_group_name_to_selector() and radix_tree_insert() so we 
> >> probably won't
> >> see it in real hardware tests.
> > 
> > Hmm but that race should be already fixed with mutex held
> > by the pin controller drivers with these fixes? Or am I
> > missing something still?
> 
> Hm. Maybe we refer to a different mutex?

Yes I think that's the case, you're talking about a different
mutex here :)

> I had seen the call sequence
> 
> create_pinctrl()-> pinctrl_dt_to_map() -> pcs_dt_node_to_map() -> 
> pinctrl_generic_add_group()
> 
> w/o any lock inside.
> 
> There is a mutex_lock(_maps_mutex); in create_pinctrl(), but locked 
> after that.
> 
> Or is there a lock outside of create_pinctrl()?
> 
> If I look into the stack dumps, call nesting is
> 
> driver_probe_device() -> pinctrl_bind_pins() -> devm_pinctrl_get() -> 
> create_pinctrl()
> 
> They all do no locking.
> 
> Maybe I am missing something.

Can you please post a patch for that as you already have it
debugged? That's easier to understand than reading a verbal
patch :)

Regards,

Tony


Re: [Letux-kernel] BUG: drivers/pinctrl/core: races in pinctrl_groups and deferred probing

2018-06-18 Thread Tony Lindgren
* H. Nikolaus Schaller  [180618 18:33]:
> >> So code just needs group cleanup on failed probing and fixing the mutex 
> >> around pinctrl_generic_add_group().
> >> 
> >> I think we need the mutex because a race still can happen when 
> >> create_pinctrl() is calling pcs_dt_node_to_map()
> >> and pinctrl_generic_add_group() w/o being locked on pinctrl_maps_mutex.
> >> 
> >> The race I suspect is that two drivers are trying to insert the same name 
> >> and may come
> >> both to the conclusion that it does not yet exist. And both insert into 
> >> the radix tree.
> >> 
> >> The window of risk is small though... It is in pinctrl_generic_add_group() 
> >> between calling
> >> pinctrl_generic_group_name_to_selector() and radix_tree_insert() so we 
> >> probably won't
> >> see it in real hardware tests.
> > 
> > Hmm but that race should be already fixed with mutex held
> > by the pin controller drivers with these fixes? Or am I
> > missing something still?
> 
> Hm. Maybe we refer to a different mutex?

Yes I think that's the case, you're talking about a different
mutex here :)

> I had seen the call sequence
> 
> create_pinctrl()-> pinctrl_dt_to_map() -> pcs_dt_node_to_map() -> 
> pinctrl_generic_add_group()
> 
> w/o any lock inside.
> 
> There is a mutex_lock(_maps_mutex); in create_pinctrl(), but locked 
> after that.
> 
> Or is there a lock outside of create_pinctrl()?
> 
> If I look into the stack dumps, call nesting is
> 
> driver_probe_device() -> pinctrl_bind_pins() -> devm_pinctrl_get() -> 
> create_pinctrl()
> 
> They all do no locking.
> 
> Maybe I am missing something.

Can you please post a patch for that as you already have it
debugged? That's easier to understand than reading a verbal
patch :)

Regards,

Tony


Re: [PATCH] MAINTAINERS: Update email-id of Sinan Kaya

2018-06-18 Thread Vinod
On 14-06-18, 09:37, Sinan Kaya wrote:
> I'm no longer with QCOM. I am still interested in maintaining or reviewing
> PCI/DMA engine patches. Update email-id to an active one.

Applied, thanks

-- 
~Vinod


Re: [PATCH] MAINTAINERS: Update email-id of Sinan Kaya

2018-06-18 Thread Vinod
On 14-06-18, 09:37, Sinan Kaya wrote:
> I'm no longer with QCOM. I am still interested in maintaining or reviewing
> PCI/DMA engine patches. Update email-id to an active one.

Applied, thanks

-- 
~Vinod


[PATCH 1/4] clk: clk: Add functions to save/restore clock context en-masse

2018-06-18 Thread Keerthy
From: Russ Dill 

Deep enough power saving mode can result into losing context of the clock
registers also, and they need to be restored once coming back from the power
saving mode. Hence add functions to save/restore clock context.

Signed-off-by: Keerthy 
Signed-off-by: Russ Dill 
---
 drivers/clk/clk.c| 74 
 include/linux/clk-provider.h |  7 +
 include/linux/clk.h  | 25 +++
 3 files changed, 106 insertions(+)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index a24a6af..7347e06 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -910,6 +910,80 @@ static int clk_core_enable_lock(struct clk_core *core)
return ret;
 }
 
+static int _clk_save_context(struct clk_core *clk)
+{
+   struct clk_core *child;
+   int ret = 0;
+
+   hlist_for_each_entry(child, >children, child_node) {
+   ret = _clk_save_context(child);
+   if (ret < 0)
+   return ret;
+   }
+
+   if (clk->ops && clk->ops->save_context)
+   ret = clk->ops->save_context(clk->hw);
+
+   return ret;
+}
+
+static void _clk_restore_context(struct clk_core *clk)
+{
+   struct clk_core *child;
+
+   if (clk->ops && clk->ops->restore_context)
+   clk->ops->restore_context(clk->hw);
+
+   hlist_for_each_entry(child, >children, child_node)
+   _clk_restore_context(child);
+}
+
+/**
+ * clk_save_context - save clock context for poweroff
+ *
+ * Saves the context of the clock register for powerstates in which the
+ * contents of the registers will be lost. Occurs deep within the suspend
+ * code.  Returns 0 on success.
+ */
+int clk_save_context(void)
+{
+   struct clk_core *clk;
+   int ret;
+
+   hlist_for_each_entry(clk, _root_list, child_node) {
+   ret = _clk_save_context(clk);
+   if (ret < 0)
+   return ret;
+   }
+
+   hlist_for_each_entry(clk, _orphan_list, child_node) {
+   ret = _clk_save_context(clk);
+   if (ret < 0)
+   return ret;
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(clk_save_context);
+
+/**
+ * clk_restore_context - restore clock context after poweroff
+ *
+ * Restore the saved clock context upon resume.
+ *
+ */
+void clk_restore_context(void)
+{
+   struct clk_core *clk;
+
+   hlist_for_each_entry(clk, _root_list, child_node)
+   _clk_restore_context(clk);
+
+   hlist_for_each_entry(clk, _orphan_list, child_node)
+   _clk_restore_context(clk);
+}
+EXPORT_SYMBOL_GPL(clk_restore_context);
+
 /**
  * clk_enable - ungate a clock
  * @clk: the clk being ungated
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index b7cfa03..7f30d62 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -106,6 +106,11 @@ struct clk_rate_request {
  * Called with enable_lock held.  This function must not
  * sleep.
  *
+ * @save_context: Save the context of the clock in prepration for poweroff.
+ *
+ * @restore_context: Restore the context of the clock after a restoration
+ * of power.
+ *
  * @recalc_rateRecalculate the rate of this clock, by querying 
hardware. The
  * parent rate is an input parameter.  It is up to the caller to
  * ensure that the prepare_mutex is held across this call.
@@ -201,6 +206,8 @@ struct clk_ops {
void(*disable)(struct clk_hw *hw);
int (*is_enabled)(struct clk_hw *hw);
void(*disable_unused)(struct clk_hw *hw);
+   int (*save_context)(struct clk_hw *hw);
+   void(*restore_context)(struct clk_hw *hw);
unsigned long   (*recalc_rate)(struct clk_hw *hw,
unsigned long parent_rate);
long(*round_rate)(struct clk_hw *hw, unsigned long rate,
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 0dbd088..70a2662 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -596,6 +596,23 @@ int __must_check clk_bulk_enable(int num_clks,
  */
 struct clk *clk_get_sys(const char *dev_id, const char *con_id);
 
+/**
+ * clk_save_context - save clock context for poweroff
+ *
+ * Saves the context of the clock register for powerstates in which the
+ * contents of the registers will be lost. Occurs deep within the suspend
+ * code so locking is not necessary.
+ */
+int clk_save_context(void);
+
+/**
+ * clk_restore_context - restore clock context after poweroff
+ *
+ * This occurs with all clocks enabled. Occurs deep within the resume code
+ * so locking is not necessary.
+ */
+void clk_restore_context(void);
+
 #else /* !CONFIG_HAVE_CLK */
 
 static inline struct clk *clk_get(struct device *dev, const char *id)
@@ -695,6 +712,14 @@ static inline struct clk *clk_get_sys(const char *dev_id, 
const char 

[PATCH 1/4] clk: clk: Add functions to save/restore clock context en-masse

2018-06-18 Thread Keerthy
From: Russ Dill 

Deep enough power saving mode can result into losing context of the clock
registers also, and they need to be restored once coming back from the power
saving mode. Hence add functions to save/restore clock context.

Signed-off-by: Keerthy 
Signed-off-by: Russ Dill 
---
 drivers/clk/clk.c| 74 
 include/linux/clk-provider.h |  7 +
 include/linux/clk.h  | 25 +++
 3 files changed, 106 insertions(+)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index a24a6af..7347e06 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -910,6 +910,80 @@ static int clk_core_enable_lock(struct clk_core *core)
return ret;
 }
 
+static int _clk_save_context(struct clk_core *clk)
+{
+   struct clk_core *child;
+   int ret = 0;
+
+   hlist_for_each_entry(child, >children, child_node) {
+   ret = _clk_save_context(child);
+   if (ret < 0)
+   return ret;
+   }
+
+   if (clk->ops && clk->ops->save_context)
+   ret = clk->ops->save_context(clk->hw);
+
+   return ret;
+}
+
+static void _clk_restore_context(struct clk_core *clk)
+{
+   struct clk_core *child;
+
+   if (clk->ops && clk->ops->restore_context)
+   clk->ops->restore_context(clk->hw);
+
+   hlist_for_each_entry(child, >children, child_node)
+   _clk_restore_context(child);
+}
+
+/**
+ * clk_save_context - save clock context for poweroff
+ *
+ * Saves the context of the clock register for powerstates in which the
+ * contents of the registers will be lost. Occurs deep within the suspend
+ * code.  Returns 0 on success.
+ */
+int clk_save_context(void)
+{
+   struct clk_core *clk;
+   int ret;
+
+   hlist_for_each_entry(clk, _root_list, child_node) {
+   ret = _clk_save_context(clk);
+   if (ret < 0)
+   return ret;
+   }
+
+   hlist_for_each_entry(clk, _orphan_list, child_node) {
+   ret = _clk_save_context(clk);
+   if (ret < 0)
+   return ret;
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(clk_save_context);
+
+/**
+ * clk_restore_context - restore clock context after poweroff
+ *
+ * Restore the saved clock context upon resume.
+ *
+ */
+void clk_restore_context(void)
+{
+   struct clk_core *clk;
+
+   hlist_for_each_entry(clk, _root_list, child_node)
+   _clk_restore_context(clk);
+
+   hlist_for_each_entry(clk, _orphan_list, child_node)
+   _clk_restore_context(clk);
+}
+EXPORT_SYMBOL_GPL(clk_restore_context);
+
 /**
  * clk_enable - ungate a clock
  * @clk: the clk being ungated
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index b7cfa03..7f30d62 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -106,6 +106,11 @@ struct clk_rate_request {
  * Called with enable_lock held.  This function must not
  * sleep.
  *
+ * @save_context: Save the context of the clock in prepration for poweroff.
+ *
+ * @restore_context: Restore the context of the clock after a restoration
+ * of power.
+ *
  * @recalc_rateRecalculate the rate of this clock, by querying 
hardware. The
  * parent rate is an input parameter.  It is up to the caller to
  * ensure that the prepare_mutex is held across this call.
@@ -201,6 +206,8 @@ struct clk_ops {
void(*disable)(struct clk_hw *hw);
int (*is_enabled)(struct clk_hw *hw);
void(*disable_unused)(struct clk_hw *hw);
+   int (*save_context)(struct clk_hw *hw);
+   void(*restore_context)(struct clk_hw *hw);
unsigned long   (*recalc_rate)(struct clk_hw *hw,
unsigned long parent_rate);
long(*round_rate)(struct clk_hw *hw, unsigned long rate,
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 0dbd088..70a2662 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -596,6 +596,23 @@ int __must_check clk_bulk_enable(int num_clks,
  */
 struct clk *clk_get_sys(const char *dev_id, const char *con_id);
 
+/**
+ * clk_save_context - save clock context for poweroff
+ *
+ * Saves the context of the clock register for powerstates in which the
+ * contents of the registers will be lost. Occurs deep within the suspend
+ * code so locking is not necessary.
+ */
+int clk_save_context(void);
+
+/**
+ * clk_restore_context - restore clock context after poweroff
+ *
+ * This occurs with all clocks enabled. Occurs deep within the resume code
+ * so locking is not necessary.
+ */
+void clk_restore_context(void);
+
 #else /* !CONFIG_HAVE_CLK */
 
 static inline struct clk *clk_get(struct device *dev, const char *id)
@@ -695,6 +712,14 @@ static inline struct clk *clk_get_sys(const char *dev_id, 
const char 

  1   2   3   4   5   6   7   8   9   10   >