Re: [PATCH] mtd: nand: Add support for reading ooblayout from device tree

2018-05-11 Thread Boris Brezillon
Hi Paul,

On Fri, 11 May 2018 23:29:12 +0200
Paul Cercueil  wrote:

> By specifying the properties "mtd-oob-ecc" and "mtd-oob-free", it is
> now possible to specify from devicetree where the ECC data is located
> inside the OOB region.

Why would we want to do that? I mean, ECC/free regions are ECC
controller dependent (and NAND chip dependent for the OOB size part),
so there's no reason to describe it in the DT. And more importantly,
people are likely to get it wrong.

I'm curious, why do you need that?

Regards,

Boris

> 
> Signed-off-by: Paul Cercueil 
> ---
>  Documentation/devicetree/bindings/mtd/nand.txt |  7 +
>  drivers/mtd/nand/raw/nand_base.c   | 42 
> ++
>  2 files changed, 49 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/mtd/nand.txt 
> b/Documentation/devicetree/bindings/mtd/nand.txt
> index 8bb11d809429..118ea92787cb 100644
> --- a/Documentation/devicetree/bindings/mtd/nand.txt
> +++ b/Documentation/devicetree/bindings/mtd/nand.txt
> @@ -45,6 +45,13 @@ Optional NAND chip properties:
>as reliable as possible.
>  - nand-rb: shall contain the native Ready/Busy ids.
>  
> +- nand-oob-ecc:  couples of integers, specifying the offset
> +  and length of the ECC data in the OOB region. There can be 
> more
> +  than one couple.
> +- nand-oob-free:  couples of integers, specifying the offset
> +  and length of a free-to-use area in the OOB region. There 
> can be
> +  more than one couple.
> +
>  The ECC strength and ECC step size properties define the correction 
> capability
>  of a controller. Together, they say a controller can correct "{strength} bit
>  errors per {size} bytes".
> diff --git a/drivers/mtd/nand/raw/nand_base.c 
> b/drivers/mtd/nand/raw/nand_base.c
> index 72f3a89da513..c905531effb0 100644
> --- a/drivers/mtd/nand/raw/nand_base.c
> +++ b/drivers/mtd/nand/raw/nand_base.c
> @@ -213,6 +213,43 @@ static const struct mtd_ooblayout_ops 
> nand_ooblayout_lp_hamming_ops = {
>   .free = nand_ooblayout_free_lp_hamming,
>  };
>  
> +static int nand_oob_of(struct device_node *np, int section,
> +struct mtd_oob_region *oobregion, const char *prop)
> +{
> + int ret = of_property_read_u32_index(np, prop,
> + section * 2, >offset);
> + if (ret == -EOVERFLOW)
> + return -ERANGE; /* We're done */
> + if (ret)
> + return ret;
> +
> + ret = of_property_read_u32_index(np, prop,
> + section * 2 + 1, >length);
> + if (ret == -EOVERFLOW)
> + return -EINVAL; /* We must have an even number of integers */
> +
> + return ret;
> +}
> +
> +static int nand_ooblayout_ecc_of(struct mtd_info *mtd, int section,
> +  struct mtd_oob_region *oobregion)
> +{
> + return nand_oob_of(mtd->dev.of_node, section,
> + oobregion, "nand-oob-ecc");
> +}
> +
> +static int nand_ooblayout_free_of(struct mtd_info *mtd, int section,
> +  struct mtd_oob_region *oobregion)
> +{
> + return nand_oob_of(mtd->dev.of_node, section,
> + oobregion, "nand-oob-free");
> +}
> +
> +static const struct mtd_ooblayout_ops nand_ooblayout_of_ops = {
> + .ecc = nand_ooblayout_ecc_of,
> + .free = nand_ooblayout_free_of,
> +};
> +
>  static int check_offs_len(struct mtd_info *mtd,
>   loff_t ofs, uint64_t len)
>  {
> @@ -5843,6 +5880,11 @@ static int nand_dt_init(struct nand_chip *chip)
>   if (of_property_read_bool(dn, "nand-ecc-maximize"))
>   chip->ecc.options |= NAND_ECC_MAXIMIZE;
>  
> + if (!chip->mtd.ooblayout &&
> + of_property_read_bool(dn, "nand-oob-ecc") &&
> + of_property_read_bool(dn, "nand-oob-free"))
> + chip->mtd.ooblayout = _ooblayout_of_ops;
> +
>   return 0;
>  }
>  



Re: [PATCH] mtd: nand: Add support for reading ooblayout from device tree

2018-05-11 Thread Boris Brezillon
Hi Paul,

On Fri, 11 May 2018 23:29:12 +0200
Paul Cercueil  wrote:

> By specifying the properties "mtd-oob-ecc" and "mtd-oob-free", it is
> now possible to specify from devicetree where the ECC data is located
> inside the OOB region.

Why would we want to do that? I mean, ECC/free regions are ECC
controller dependent (and NAND chip dependent for the OOB size part),
so there's no reason to describe it in the DT. And more importantly,
people are likely to get it wrong.

I'm curious, why do you need that?

Regards,

Boris

> 
> Signed-off-by: Paul Cercueil 
> ---
>  Documentation/devicetree/bindings/mtd/nand.txt |  7 +
>  drivers/mtd/nand/raw/nand_base.c   | 42 
> ++
>  2 files changed, 49 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/mtd/nand.txt 
> b/Documentation/devicetree/bindings/mtd/nand.txt
> index 8bb11d809429..118ea92787cb 100644
> --- a/Documentation/devicetree/bindings/mtd/nand.txt
> +++ b/Documentation/devicetree/bindings/mtd/nand.txt
> @@ -45,6 +45,13 @@ Optional NAND chip properties:
>as reliable as possible.
>  - nand-rb: shall contain the native Ready/Busy ids.
>  
> +- nand-oob-ecc:  couples of integers, specifying the offset
> +  and length of the ECC data in the OOB region. There can be 
> more
> +  than one couple.
> +- nand-oob-free:  couples of integers, specifying the offset
> +  and length of a free-to-use area in the OOB region. There 
> can be
> +  more than one couple.
> +
>  The ECC strength and ECC step size properties define the correction 
> capability
>  of a controller. Together, they say a controller can correct "{strength} bit
>  errors per {size} bytes".
> diff --git a/drivers/mtd/nand/raw/nand_base.c 
> b/drivers/mtd/nand/raw/nand_base.c
> index 72f3a89da513..c905531effb0 100644
> --- a/drivers/mtd/nand/raw/nand_base.c
> +++ b/drivers/mtd/nand/raw/nand_base.c
> @@ -213,6 +213,43 @@ static const struct mtd_ooblayout_ops 
> nand_ooblayout_lp_hamming_ops = {
>   .free = nand_ooblayout_free_lp_hamming,
>  };
>  
> +static int nand_oob_of(struct device_node *np, int section,
> +struct mtd_oob_region *oobregion, const char *prop)
> +{
> + int ret = of_property_read_u32_index(np, prop,
> + section * 2, >offset);
> + if (ret == -EOVERFLOW)
> + return -ERANGE; /* We're done */
> + if (ret)
> + return ret;
> +
> + ret = of_property_read_u32_index(np, prop,
> + section * 2 + 1, >length);
> + if (ret == -EOVERFLOW)
> + return -EINVAL; /* We must have an even number of integers */
> +
> + return ret;
> +}
> +
> +static int nand_ooblayout_ecc_of(struct mtd_info *mtd, int section,
> +  struct mtd_oob_region *oobregion)
> +{
> + return nand_oob_of(mtd->dev.of_node, section,
> + oobregion, "nand-oob-ecc");
> +}
> +
> +static int nand_ooblayout_free_of(struct mtd_info *mtd, int section,
> +  struct mtd_oob_region *oobregion)
> +{
> + return nand_oob_of(mtd->dev.of_node, section,
> + oobregion, "nand-oob-free");
> +}
> +
> +static const struct mtd_ooblayout_ops nand_ooblayout_of_ops = {
> + .ecc = nand_ooblayout_ecc_of,
> + .free = nand_ooblayout_free_of,
> +};
> +
>  static int check_offs_len(struct mtd_info *mtd,
>   loff_t ofs, uint64_t len)
>  {
> @@ -5843,6 +5880,11 @@ static int nand_dt_init(struct nand_chip *chip)
>   if (of_property_read_bool(dn, "nand-ecc-maximize"))
>   chip->ecc.options |= NAND_ECC_MAXIMIZE;
>  
> + if (!chip->mtd.ooblayout &&
> + of_property_read_bool(dn, "nand-oob-ecc") &&
> + of_property_read_bool(dn, "nand-oob-free"))
> + chip->mtd.ooblayout = _ooblayout_of_ops;
> +
>   return 0;
>  }
>  



Re: [PATCH v2 03/12] arm: dts: mt7623: fix invalid memory node being generated

2018-05-11 Thread Sean Wang
On Fri, 2018-05-11 at 17:03 +0200, Matthias Brugger wrote:
> 
> On 04/11/2018 10:53 AM, sean.w...@mediatek.com wrote:
> > From: Sean Wang 
> > 
> > Below two wrong nodes in existing DTS files would cause a fail boot since
> > in fact the address 0 is not the correct place the memory device locates
> > at.
> > 
> > memory {
> > device_type = "memory";
> > reg = <0x0 0x0 0x0 0x0>;
> > };
> > 
> > memory@8000 {
> > reg = <0x0 0x8000 0x0 0x4000>;
> > };
> > 
> > In order to avoid having a memory node starting at address 0, we can't
> > include file skeleton64.dtsi and instead need to explicitly manually
> > define a few of properties the DTS relies on such as #address-cells
> > and #size-cells in root node and device_type in the node memory@8000.
> > 
> > Cc: sta...@vger.kernel.org
> > Fixes: 31ac0d69a1d4 ("ARM: dts: mediatek: add MT7623 basic support")
> > Signed-off-by: Sean Wang 
> > Cc: Rob Herring 
> > ---
> >  arch/arm/boot/dts/mt7623.dtsi | 3 ++-
> >  arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts | 1 +
> >  arch/arm/boot/dts/mt7623n-rfb.dtsi| 1 +
> >  3 files changed, 4 insertions(+), 1 deletion(-)
> > 
> 
> merged. We would need this at least for mt2701 as well, correct?
> Would you mind to provide a patch.
> 
> Regards,
> Matthias
> 

Thanks! I totally think the same problem could happen on mt2701, so I'm 
happy to come up with a patch for that.

Sean

> > diff --git a/arch/arm/boot/dts/mt7623.dtsi b/arch/arm/boot/dts/mt7623.dtsi
> > index fec4715..406a9f3 100644
> > --- a/arch/arm/boot/dts/mt7623.dtsi
> > +++ b/arch/arm/boot/dts/mt7623.dtsi
> > @@ -15,11 +15,12 @@
> >  #include 
> >  #include 
> >  #include 
> > -#include "skeleton64.dtsi"
> >  
> >  / {
> > compatible = "mediatek,mt7623";
> > interrupt-parent = <>;
> > +   #address-cells = <2>;
> > +   #size-cells = <2>;
> >  
> > cpu_opp_table: opp-table {
> > compatible = "operating-points-v2";
> > diff --git a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts 
> > b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts
> > index bbf56f8..5938e4c 100644
> > --- a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts
> > +++ b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts
> > @@ -109,6 +109,7 @@
> > };
> >  
> > memory@8000 {
> > +   device_type = "memory";
> > reg = <0 0x8000 0 0x4000>;
> > };
> >  };
> > diff --git a/arch/arm/boot/dts/mt7623n-rfb.dtsi 
> > b/arch/arm/boot/dts/mt7623n-rfb.dtsi
> > index a199ae7..343e8ef 100644
> > --- a/arch/arm/boot/dts/mt7623n-rfb.dtsi
> > +++ b/arch/arm/boot/dts/mt7623n-rfb.dtsi
> > @@ -40,6 +40,7 @@
> > };
> >  
> > memory@8000 {
> > +   device_type = "memory";
> > reg = <0 0x8000 0 0x4000>;
> > };
> >  
> > 




Re: [PATCH v2 03/12] arm: dts: mt7623: fix invalid memory node being generated

2018-05-11 Thread Sean Wang
On Fri, 2018-05-11 at 17:03 +0200, Matthias Brugger wrote:
> 
> On 04/11/2018 10:53 AM, sean.w...@mediatek.com wrote:
> > From: Sean Wang 
> > 
> > Below two wrong nodes in existing DTS files would cause a fail boot since
> > in fact the address 0 is not the correct place the memory device locates
> > at.
> > 
> > memory {
> > device_type = "memory";
> > reg = <0x0 0x0 0x0 0x0>;
> > };
> > 
> > memory@8000 {
> > reg = <0x0 0x8000 0x0 0x4000>;
> > };
> > 
> > In order to avoid having a memory node starting at address 0, we can't
> > include file skeleton64.dtsi and instead need to explicitly manually
> > define a few of properties the DTS relies on such as #address-cells
> > and #size-cells in root node and device_type in the node memory@8000.
> > 
> > Cc: sta...@vger.kernel.org
> > Fixes: 31ac0d69a1d4 ("ARM: dts: mediatek: add MT7623 basic support")
> > Signed-off-by: Sean Wang 
> > Cc: Rob Herring 
> > ---
> >  arch/arm/boot/dts/mt7623.dtsi | 3 ++-
> >  arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts | 1 +
> >  arch/arm/boot/dts/mt7623n-rfb.dtsi| 1 +
> >  3 files changed, 4 insertions(+), 1 deletion(-)
> > 
> 
> merged. We would need this at least for mt2701 as well, correct?
> Would you mind to provide a patch.
> 
> Regards,
> Matthias
> 

Thanks! I totally think the same problem could happen on mt2701, so I'm 
happy to come up with a patch for that.

Sean

> > diff --git a/arch/arm/boot/dts/mt7623.dtsi b/arch/arm/boot/dts/mt7623.dtsi
> > index fec4715..406a9f3 100644
> > --- a/arch/arm/boot/dts/mt7623.dtsi
> > +++ b/arch/arm/boot/dts/mt7623.dtsi
> > @@ -15,11 +15,12 @@
> >  #include 
> >  #include 
> >  #include 
> > -#include "skeleton64.dtsi"
> >  
> >  / {
> > compatible = "mediatek,mt7623";
> > interrupt-parent = <>;
> > +   #address-cells = <2>;
> > +   #size-cells = <2>;
> >  
> > cpu_opp_table: opp-table {
> > compatible = "operating-points-v2";
> > diff --git a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts 
> > b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts
> > index bbf56f8..5938e4c 100644
> > --- a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts
> > +++ b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts
> > @@ -109,6 +109,7 @@
> > };
> >  
> > memory@8000 {
> > +   device_type = "memory";
> > reg = <0 0x8000 0 0x4000>;
> > };
> >  };
> > diff --git a/arch/arm/boot/dts/mt7623n-rfb.dtsi 
> > b/arch/arm/boot/dts/mt7623n-rfb.dtsi
> > index a199ae7..343e8ef 100644
> > --- a/arch/arm/boot/dts/mt7623n-rfb.dtsi
> > +++ b/arch/arm/boot/dts/mt7623n-rfb.dtsi
> > @@ -40,6 +40,7 @@
> > };
> >  
> > memory@8000 {
> > +   device_type = "memory";
> > reg = <0 0x8000 0 0x4000>;
> > };
> >  
> > 




Re: [PATCH] rcu: Report a quiescent state when it's exactly in the state

2018-05-11 Thread Paul E. McKenney
On Fri, May 11, 2018 at 03:41:38PM -0700, Joel Fernandes wrote:
> On Fri, May 11, 2018 at 09:17:46AM -0700, Paul E. McKenney wrote:
> > On Fri, May 11, 2018 at 09:57:54PM +0900, Byungchul Park wrote:
> > > Hello folks,
> > > 
> > > I think I wrote the title in a misleading way.
> > > 
> > > Please change the title to something else such as,
> > > "rcu: Report a quiescent state when it's in the state" or,
> > > "rcu: Add points reporting quiescent states where proper" or so on.
> > > 
> > > On 2018-05-11 오후 5:30, Byungchul Park wrote:
> > > >We expect a quiescent state of TASKS_RCU when cond_resched_tasks_rcu_qs()
> > > >is called, no matter whether it actually be scheduled or not. However,
> > > >it currently doesn't report the quiescent state when the task enters
> > > >into __schedule() as it's called with preempt = true. So make it report
> > > >the quiescent state unconditionally when cond_resched_tasks_rcu_qs() is
> > > >called.
> > > >
> > > >And in TINY_RCU, even though the quiescent state of rcu_bh also should
> > > >be reported when the tick interrupt comes from user, it doesn't. So make
> > > >it reported.
> > > >
> > > >Lastly in TREE_RCU, rcu_note_voluntary_context_switch() should be
> > > >reported when the tick interrupt comes from not only user but also idle,
> > > >as an extended quiescent state.
> > > >
> > > >Signed-off-by: Byungchul Park 
> > > >---
> > > >  include/linux/rcupdate.h | 4 ++--
> > > >  kernel/rcu/tiny.c| 6 +++---
> > > >  kernel/rcu/tree.c| 4 ++--
> > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > >
> > > >diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> > > >index ee8cf5fc..7432261 100644
> > > >--- a/include/linux/rcupdate.h
> > > >+++ b/include/linux/rcupdate.h
> > > >@@ -195,8 +195,8 @@ static inline void exit_tasks_rcu_finish(void) { }
> > > >   */
> > > >  #define cond_resched_tasks_rcu_qs() \
> > > >  do { \
> > > >-if (!cond_resched()) \
> > > >-rcu_note_voluntary_context_switch_lite(current); \
> > > >+rcu_note_voluntary_context_switch_lite(current); \
> > > >+cond_resched(); \
> > 
> > Ah, good point.
> > 
> > Peter, I have to ask...  Why is "cond_resched()" considered a preemption
> > while "schedule()" is not?
> 
> Infact something interesting I inferred from the __schedule loop related to
> your question:
> 
> switch_count can either be set to prev->invcsw or prev->nvcsw. If we can
> assume that switch_count reflects whether the context switch is involuntary
> or voluntary,
>   
> task-running-statepreempt switch_count
> 0 (running)   1   involuntary
> 0 0   involuntary
> 1 0   voluntary
> 1 1   involuntary
> 
> According to the above table, both the task's running state and the preempt
> parameter to __schedule should be used together to determine if the switch is
> a voluntary one or not.
> 
> So this code in rcu_note_context_switch should really be:
> if (!preempt && !(current->state & TASK_RUNNING))
>   rcu_note_voluntary_context_switch_lite(current);
> 
> According to the above table, cond_resched always classifies as an
> involuntary switch which makes sense to me. Even though cond_resched is
> explicitly called, its still sort of involuntary in the sense its not called
> into the scheduler for sleeping, but rather for seeing if something else can
> run instead (a preemption point). Infact none of the task deactivation in the
> __schedule loop will run if cond_resched is used.
> 
> I agree that if schedule was called directly but with TASK_RUNNING=1, then
> that could probably be classified an involuntary switch too...
> 
> Also since we're deciding to call rcu_note_voluntary_context_switch_lite
> unconditionally, then IMO this comment on that macro:
> 
> /*
>  * Note a voluntary context switch for RCU-tasks benefit.  This is a
>  * macro rather than an inline function to avoid #include hell.
>  */
>  #ifdef CONFIG_TASKS_RCU
>  #define rcu_note_voluntary_context_switch_lite(t)
> 
> Should be changed to:
> 
> /*
>  * Note a attempt to perform a voluntary context switch for RCU-tasks
>  * benefit.  This is called even in situations where a context switch
>  * didn't really happen even though it was requested. This is a
>  * macro rather than an inline function to avoid #include hell.
>  */
>  #ifdef CONFIG_TASKS_RCU
>  #define rcu_note_voluntary_context_switch_lite(t)
> 
> Right?
> 
> Correct me if I'm wrong about anything, thanks,

The starting point for me is that Tasks RCU is a special-purpose mechanism
for freeing trampolines in PREEMPT=y kernels.  The approach is to arrange
for the trampoline to be inaccessible to future execution, wait for a
tasks-RCU grace period, then free the trampoline.  So a tasks-RCU grace
period must wait until all tasks have spent at least some time outside
of a 

Re: [PATCH] rcu: Report a quiescent state when it's exactly in the state

2018-05-11 Thread Paul E. McKenney
On Fri, May 11, 2018 at 03:41:38PM -0700, Joel Fernandes wrote:
> On Fri, May 11, 2018 at 09:17:46AM -0700, Paul E. McKenney wrote:
> > On Fri, May 11, 2018 at 09:57:54PM +0900, Byungchul Park wrote:
> > > Hello folks,
> > > 
> > > I think I wrote the title in a misleading way.
> > > 
> > > Please change the title to something else such as,
> > > "rcu: Report a quiescent state when it's in the state" or,
> > > "rcu: Add points reporting quiescent states where proper" or so on.
> > > 
> > > On 2018-05-11 오후 5:30, Byungchul Park wrote:
> > > >We expect a quiescent state of TASKS_RCU when cond_resched_tasks_rcu_qs()
> > > >is called, no matter whether it actually be scheduled or not. However,
> > > >it currently doesn't report the quiescent state when the task enters
> > > >into __schedule() as it's called with preempt = true. So make it report
> > > >the quiescent state unconditionally when cond_resched_tasks_rcu_qs() is
> > > >called.
> > > >
> > > >And in TINY_RCU, even though the quiescent state of rcu_bh also should
> > > >be reported when the tick interrupt comes from user, it doesn't. So make
> > > >it reported.
> > > >
> > > >Lastly in TREE_RCU, rcu_note_voluntary_context_switch() should be
> > > >reported when the tick interrupt comes from not only user but also idle,
> > > >as an extended quiescent state.
> > > >
> > > >Signed-off-by: Byungchul Park 
> > > >---
> > > >  include/linux/rcupdate.h | 4 ++--
> > > >  kernel/rcu/tiny.c| 6 +++---
> > > >  kernel/rcu/tree.c| 4 ++--
> > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > >
> > > >diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> > > >index ee8cf5fc..7432261 100644
> > > >--- a/include/linux/rcupdate.h
> > > >+++ b/include/linux/rcupdate.h
> > > >@@ -195,8 +195,8 @@ static inline void exit_tasks_rcu_finish(void) { }
> > > >   */
> > > >  #define cond_resched_tasks_rcu_qs() \
> > > >  do { \
> > > >-if (!cond_resched()) \
> > > >-rcu_note_voluntary_context_switch_lite(current); \
> > > >+rcu_note_voluntary_context_switch_lite(current); \
> > > >+cond_resched(); \
> > 
> > Ah, good point.
> > 
> > Peter, I have to ask...  Why is "cond_resched()" considered a preemption
> > while "schedule()" is not?
> 
> Infact something interesting I inferred from the __schedule loop related to
> your question:
> 
> switch_count can either be set to prev->invcsw or prev->nvcsw. If we can
> assume that switch_count reflects whether the context switch is involuntary
> or voluntary,
>   
> task-running-statepreempt switch_count
> 0 (running)   1   involuntary
> 0 0   involuntary
> 1 0   voluntary
> 1 1   involuntary
> 
> According to the above table, both the task's running state and the preempt
> parameter to __schedule should be used together to determine if the switch is
> a voluntary one or not.
> 
> So this code in rcu_note_context_switch should really be:
> if (!preempt && !(current->state & TASK_RUNNING))
>   rcu_note_voluntary_context_switch_lite(current);
> 
> According to the above table, cond_resched always classifies as an
> involuntary switch which makes sense to me. Even though cond_resched is
> explicitly called, its still sort of involuntary in the sense its not called
> into the scheduler for sleeping, but rather for seeing if something else can
> run instead (a preemption point). Infact none of the task deactivation in the
> __schedule loop will run if cond_resched is used.
> 
> I agree that if schedule was called directly but with TASK_RUNNING=1, then
> that could probably be classified an involuntary switch too...
> 
> Also since we're deciding to call rcu_note_voluntary_context_switch_lite
> unconditionally, then IMO this comment on that macro:
> 
> /*
>  * Note a voluntary context switch for RCU-tasks benefit.  This is a
>  * macro rather than an inline function to avoid #include hell.
>  */
>  #ifdef CONFIG_TASKS_RCU
>  #define rcu_note_voluntary_context_switch_lite(t)
> 
> Should be changed to:
> 
> /*
>  * Note a attempt to perform a voluntary context switch for RCU-tasks
>  * benefit.  This is called even in situations where a context switch
>  * didn't really happen even though it was requested. This is a
>  * macro rather than an inline function to avoid #include hell.
>  */
>  #ifdef CONFIG_TASKS_RCU
>  #define rcu_note_voluntary_context_switch_lite(t)
> 
> Right?
> 
> Correct me if I'm wrong about anything, thanks,

The starting point for me is that Tasks RCU is a special-purpose mechanism
for freeing trampolines in PREEMPT=y kernels.  The approach is to arrange
for the trampoline to be inaccessible to future execution, wait for a
tasks-RCU grace period, then free the trampoline.  So a tasks-RCU grace
period must wait until all tasks have spent at least some time outside
of a trampoline.  My 

[PATCH 2/6] lustre: Use long long type to print inode time

2018-05-11 Thread Deepa Dinamani
Subsequent patches in the series convert inode timestamps
to use struct timespec64 instead of struct timespec as
part of solving the y2038 problem.

Convert these print formats to use long long types to
avoid warnings and errors on conversion.

Signed-off-by: Deepa Dinamani 
CC: andreas.dil...@intel.com
---
 drivers/staging/lustre/lustre/llite/llite_lib.c | 12 +++-
 drivers/staging/lustre/lustre/lmv/lmv_obd.c |  7 ---
 drivers/staging/lustre/lustre/mdc/mdc_reint.c   |  6 +++---
 drivers/staging/lustre/lustre/obdclass/obdo.c   |  6 +++---
 4 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c 
b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 60dbe888e336..dc31966bbf3c 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -1482,8 +1482,9 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr 
*attr, bool hsm_import)
}
 
if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
-   CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %llu\n",
-  LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
+   CDEBUG(D_INODE, "setting mtime %llu, ctime %llu, now = %llu\n",
+  (unsigned long long)LTIME_S(attr->ia_mtime),
+  (unsigned long long)LTIME_S(attr->ia_ctime),
   (s64)ktime_get_real_seconds());
 
if (S_ISREG(inode->i_mode))
@@ -1760,9 +1761,10 @@ int ll_update_inode(struct inode *inode, struct 
lustre_md *md)
if (body->mbo_valid & OBD_MD_FLMTIME) {
if (body->mbo_mtime > LTIME_S(inode->i_mtime)) {
CDEBUG(D_INODE,
-  "setting ino %lu mtime from %lu to %llu\n",
-  inode->i_ino, LTIME_S(inode->i_mtime),
-  body->mbo_mtime);
+  "setting ino %lu mtime from %llu to %llu\n",
+  inode->i_ino,
+  (unsigned long long)LTIME_S(inode->i_mtime),
+  (unsigned long long)body->mbo_mtime);
LTIME_S(inode->i_mtime) = body->mbo_mtime;
}
lli->lli_mtime = body->mbo_mtime;
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c 
b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index 7198a6384028..88e05a53716e 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -3029,11 +3029,12 @@ static int lmv_merge_attr(struct obd_export *exp,
for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
struct inode *inode = lsm->lsm_md_oinfo[i].lmo_root;
 
-   CDEBUG(D_INFO, "" DFID " size %llu, blocks %llu nlink %u, atime 
%lu ctime %lu, mtime %lu.\n",
+   CDEBUG(D_INFO, "" DFID " size %llu, blocks %llu nlink %u, atime 
%llu ctime %llu, mtime %llu.\n",
   PFID(>lsm_md_oinfo[i].lmo_fid),
   i_size_read(inode), (unsigned long long)inode->i_blocks,
-  inode->i_nlink, LTIME_S(inode->i_atime),
-  LTIME_S(inode->i_ctime), LTIME_S(inode->i_mtime));
+  inode->i_nlink, (unsigned long 
long)LTIME_S(inode->i_atime),
+  (unsigned long long)LTIME_S(inode->i_ctime),
+  (unsigned long long)LTIME_S(inode->i_mtime));
 
/* for slave stripe, it needs to subtract nlink for . and .. */
if (i)
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c 
b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
index 488b98007558..f1ccf8d26ddc 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_reint.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
@@ -129,9 +129,9 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data 
*op_data,
}
 
if (op_data->op_attr.ia_valid & (ATTR_MTIME | ATTR_CTIME))
-   CDEBUG(D_INODE, "setting mtime %ld, ctime %ld\n",
-  LTIME_S(op_data->op_attr.ia_mtime),
-  LTIME_S(op_data->op_attr.ia_ctime));
+   CDEBUG(D_INODE, "setting mtime %lld, ctime %lld\n",
+  (long long)LTIME_S(op_data->op_attr.ia_mtime),
+  (long long)LTIME_S(op_data->op_attr.ia_ctime));
mdc_setattr_pack(req, op_data, ea, ealen);
 
ptlrpc_request_set_replen(req);
diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c 
b/drivers/staging/lustre/lustre/obdclass/obdo.c
index c4503bc36591..8f4054aa970b 100644
--- a/drivers/staging/lustre/lustre/obdclass/obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/obdo.c
@@ -60,9 +60,9 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, u32 
valid)
u32 newvalid = 0;
 
if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
-   CDEBUG(D_INODE, 

[PATCH 5/6] udf: Simplify calls to udf_disk_stamp_to_time

2018-05-11 Thread Deepa Dinamani
Subsequent patches in the series convert inode timestamps
to use struct timespec64 instead of struct timespec as
part of solving the y2038 problem.

commit fd3cfad374d4 ("udf: Convert udf_disk_stamp_to_time() to use mktime64()")
eliminated the NULL return condition from udf_disk_stamp_to_time().
udf_time_to_disk_time() is always called with a valid dest pointer and
the return value is ignored.
Further, caller can as well check the dest pointer being passed in rather
than return argument.
Make both the functions return void.

This will make the inode timestamp conversion simpler.

Signed-off-by: Deepa Dinamani 
Cc: j...@suse.com
---
 fs/udf/inode.c   | 28 +++-
 fs/udf/super.c   | 16 +---
 fs/udf/udfdecl.h |  4 ++--
 fs/udf/udftime.c |  9 ++---
 4 files changed, 20 insertions(+), 37 deletions(-)

diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index c80765d62f7e..df2378d6ebb4 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1443,15 +1443,9 @@ static int udf_read_inode(struct inode *inode, bool 
hidden_inode)
inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) <<
(inode->i_sb->s_blocksize_bits - 9);
 
-   if (!udf_disk_stamp_to_time(>i_atime, fe->accessTime))
-   inode->i_atime = sbi->s_record_time;
-
-   if (!udf_disk_stamp_to_time(>i_mtime,
-   fe->modificationTime))
-   inode->i_mtime = sbi->s_record_time;
-
-   if (!udf_disk_stamp_to_time(>i_ctime, fe->attrTime))
-   inode->i_ctime = sbi->s_record_time;
+   udf_disk_stamp_to_time(>i_atime, fe->accessTime);
+   udf_disk_stamp_to_time(>i_mtime, fe->modificationTime);
+   udf_disk_stamp_to_time(>i_ctime, fe->attrTime);
 
iinfo->i_unique = le64_to_cpu(fe->uniqueID);
iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
@@ -1461,18 +1455,10 @@ static int udf_read_inode(struct inode *inode, bool 
hidden_inode)
inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) <<
(inode->i_sb->s_blocksize_bits - 9);
 
-   if (!udf_disk_stamp_to_time(>i_atime, efe->accessTime))
-   inode->i_atime = sbi->s_record_time;
-
-   if (!udf_disk_stamp_to_time(>i_mtime,
-   efe->modificationTime))
-   inode->i_mtime = sbi->s_record_time;
-
-   if (!udf_disk_stamp_to_time(>i_crtime, efe->createTime))
-   iinfo->i_crtime = sbi->s_record_time;
-
-   if (!udf_disk_stamp_to_time(>i_ctime, efe->attrTime))
-   inode->i_ctime = sbi->s_record_time;
+   udf_disk_stamp_to_time(>i_atime, efe->accessTime);
+   udf_disk_stamp_to_time(>i_mtime, efe->modificationTime);
+   udf_disk_stamp_to_time(>i_crtime, efe->createTime);
+   udf_disk_stamp_to_time(>i_ctime, efe->attrTime);
 
iinfo->i_unique = le64_to_cpu(efe->uniqueID);
iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 0d27d41f5c6e..bd0ae64bc31c 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -862,6 +862,9 @@ static int udf_load_pvoldesc(struct super_block *sb, 
sector_t block)
struct buffer_head *bh;
uint16_t ident;
int ret = -ENOMEM;
+#ifdef UDFFS_DEBUG
+   struct timestamp *ts;
+#endif
 
outstr = kmalloc(128, GFP_NOFS);
if (!outstr)
@@ -880,15 +883,14 @@ static int udf_load_pvoldesc(struct super_block *sb, 
sector_t block)
 
pvoldesc = (struct primaryVolDesc *)bh->b_data;
 
-   if (udf_disk_stamp_to_time(_SB(sb)->s_record_time,
- pvoldesc->recordingDateAndTime)) {
+   udf_disk_stamp_to_time(_SB(sb)->s_record_time,
+ pvoldesc->recordingDateAndTime);
 #ifdef UDFFS_DEBUG
-   struct timestamp *ts = >recordingDateAndTime;
-   udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n",
- le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
- ts->minute, le16_to_cpu(ts->typeAndTimezone));
+   *ts = >recordingDateAndTime;
+   udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n",
+ le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
+ ts->minute, le16_to_cpu(ts->typeAndTimezone));
 #endif
-   }
 
ret = udf_dstrCS0toChar(sb, outstr, 31, pvoldesc->volIdent, 32);
if (ret < 0)
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index fc8d1b3384d2..bae311b59400 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -253,8 +253,8 @@ extern struct long_ad *udf_get_filelongad(uint8_t *, int, 
uint32_t *, int);
 extern struct short_ad 

[PATCH 6/6] vfs: change inode times to use struct timespec64

2018-05-11 Thread Deepa Dinamani
struct timespec is not y2038 safe. Transition vfs to use
y2038 safe struct timespec64 instead.

The change was made with the help of the following cocinelle
script. This catches about 80% of the changes.
All the header file and logic changes are included in the
first 5 rules. The rest are trivial substitutions.
I avoid changing any of the function signatures or any other
filesystem specific data structures to keep the patch simple
for review.

The script can be a little shorter by combining different cases.
But, this version was sufficient for my usecase.

virtual patch

@ depends on patch @
identifier now;
@@
- struct timespec
+ struct timespec64
  current_time ( ... )
  {
- struct timespec now = current_kernel_time();
+ struct timespec64 now = current_kernel_time64();
  ...
- return timespec_trunc(
+ return timespec64_trunc(
  ... );
  }

@ depends on patch @
identifier xtime;
@@
 struct \( iattr \| inode \| kstat \) {
 ...
-   struct timespec xtime;
+   struct timespec64 xtime;
 ...
 }

@ depends on patch @
identifier t;
@@
 struct inode_operations {
 ...
int (*update_time) (...,
-   struct timespec t,
+   struct timespec64 t,
...);
 ...
 }

@ depends on patch @
identifier t;
identifier fn_update_time =~ "update_time$";
@@
 fn_update_time (...,
- struct timespec *t,
+ struct timespec64 *t,
 ...) { ... }

@ depends on patch @
identifier t;
@@
lease_get_mtime( ... ,
- struct timespec *t
+ struct timespec64 *t
  ) { ... }

@te depends on patch forall@
identifier ts;
local idexpression struct inode *inode_node;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn_update_time =~ "update_time$";
identifier fn;
expression e, E3;
local idexpression struct inode *node1;
local idexpression struct inode *node2;
local idexpression struct iattr *attr1;
local idexpression struct iattr *attr2;
local idexpression struct iattr attr;
identifier i_xtime1 =~ "^i_[acm]time$";
identifier i_xtime2 =~ "^i_[acm]time$";
identifier ia_xtime1 =~ "^ia_[acm]time$";
identifier ia_xtime2 =~ "^ia_[acm]time$";
@@
(
(
- struct timespec ts;
+ struct timespec64 ts;
|
- struct timespec ts = current_time(inode_node);
+ struct timespec64 ts = current_time(inode_node);
)

<+... when != ts
(
- timespec_equal(_node->i_xtime, )
+ timespec64_equal(_node->i_xtime, )
|
- timespec_equal(, _node->i_xtime)
+ timespec64_equal(, _node->i_xtime)
|
- timespec_compare(_node->i_xtime, )
+ timespec64_compare(_node->i_xtime, )
|
- timespec_compare(, _node->i_xtime)
+ timespec64_compare(, _node->i_xtime)
|
ts = current_time(e)
|
fn_update_time(..., ,...)
|
inode_node->i_xtime = ts
|
node1->i_xtime = ts
|
ts = inode_node->i_xtime
|
<+... attr1->ia_xtime ...+> = ts
|
ts = attr1->ia_xtime
|
ts.tv_sec
|
ts.tv_nsec
|
btrfs_set_stack_timespec_sec(..., ts.tv_sec)
|
btrfs_set_stack_timespec_nsec(..., ts.tv_nsec)
|
- ts = timespec64_to_timespec(
+ ts =
...
-)
|
- ts = ktime_to_timespec(
+ ts = ktime_to_timespec64(
...)
|
- ts = E3
+ ts = timespec_to_timespec64(E3)
|
- ktime_get_real_ts()
+ ktime_get_real_ts64()
|
fn(...,
- ts
+ timespec64_to_timespec(ts)
,...)
)
...+>
(
<... when != ts
- return ts;
+ return timespec64_to_timespec(ts);
...>
)
|
- timespec_equal(>i_xtime1, >i_xtime2)
+ timespec64_equal(>i_xtime2, >i_xtime2)
|
- timespec_equal(>i_xtime1, >ia_xtime2)
+ timespec64_equal(>i_xtime2, >ia_xtime2)
|
- timespec_compare(>i_xtime1, >i_xtime2)
+ timespec64_compare(>i_xtime1, >i_xtime2)
|
node1->i_xtime1 =
- timespec_trunc(attr1->ia_xtime1,
+ timespec64_trunc(attr1->ia_xtime1,
...)
|
- attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2,
+ attr1->ia_xtime1 =  timespec64_trunc(attr2->ia_xtime2,
...)
|
- ktime_get_real_ts(>ia_xtime1)
+ ktime_get_real_ts64(>ia_xtime1)
|
- ktime_get_real_ts(_xtime1)
+ ktime_get_real_ts64(_xtime1)
)

@ depends on patch @
struct inode *node;
struct iattr *attr;
identifier fn;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
expression e;
@@
(
- fn(node->i_xtime);
+ fn(timespec64_to_timespec(node->i_xtime));
|
 fn(...,
- node->i_xtime);
+ timespec64_to_timespec(node->i_xtime));
|
- e = fn(attr->ia_xtime);
+ e = fn(timespec64_to_timespec(attr->ia_xtime));
)

@ depends on patch forall @
struct inode *node;
struct iattr *attr;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
fn (...,
- >i_xtime,
+ ,
...);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
fn (...,
- >ia_xtime,
+ ,
...);
)
...+>
}

@ depends on patch forall @
struct inode *node;
struct iattr *attr;
struct kstat *stat;
identifier ia_xtime =~ "^ia_[acm]time$";
identifier i_xtime =~ "^i_[acm]time$";
identifier xtime =~ "^[acm]time$";
identifier fn, ret;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- >i_xtime,
+ ,
...);
|
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- >i_xtime);
+ );
|
+ ts = 

[PATCH 2/6] lustre: Use long long type to print inode time

2018-05-11 Thread Deepa Dinamani
Subsequent patches in the series convert inode timestamps
to use struct timespec64 instead of struct timespec as
part of solving the y2038 problem.

Convert these print formats to use long long types to
avoid warnings and errors on conversion.

Signed-off-by: Deepa Dinamani 
CC: andreas.dil...@intel.com
---
 drivers/staging/lustre/lustre/llite/llite_lib.c | 12 +++-
 drivers/staging/lustre/lustre/lmv/lmv_obd.c |  7 ---
 drivers/staging/lustre/lustre/mdc/mdc_reint.c   |  6 +++---
 drivers/staging/lustre/lustre/obdclass/obdo.c   |  6 +++---
 4 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c 
b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 60dbe888e336..dc31966bbf3c 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -1482,8 +1482,9 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr 
*attr, bool hsm_import)
}
 
if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
-   CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %llu\n",
-  LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
+   CDEBUG(D_INODE, "setting mtime %llu, ctime %llu, now = %llu\n",
+  (unsigned long long)LTIME_S(attr->ia_mtime),
+  (unsigned long long)LTIME_S(attr->ia_ctime),
   (s64)ktime_get_real_seconds());
 
if (S_ISREG(inode->i_mode))
@@ -1760,9 +1761,10 @@ int ll_update_inode(struct inode *inode, struct 
lustre_md *md)
if (body->mbo_valid & OBD_MD_FLMTIME) {
if (body->mbo_mtime > LTIME_S(inode->i_mtime)) {
CDEBUG(D_INODE,
-  "setting ino %lu mtime from %lu to %llu\n",
-  inode->i_ino, LTIME_S(inode->i_mtime),
-  body->mbo_mtime);
+  "setting ino %lu mtime from %llu to %llu\n",
+  inode->i_ino,
+  (unsigned long long)LTIME_S(inode->i_mtime),
+  (unsigned long long)body->mbo_mtime);
LTIME_S(inode->i_mtime) = body->mbo_mtime;
}
lli->lli_mtime = body->mbo_mtime;
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c 
b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index 7198a6384028..88e05a53716e 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -3029,11 +3029,12 @@ static int lmv_merge_attr(struct obd_export *exp,
for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
struct inode *inode = lsm->lsm_md_oinfo[i].lmo_root;
 
-   CDEBUG(D_INFO, "" DFID " size %llu, blocks %llu nlink %u, atime 
%lu ctime %lu, mtime %lu.\n",
+   CDEBUG(D_INFO, "" DFID " size %llu, blocks %llu nlink %u, atime 
%llu ctime %llu, mtime %llu.\n",
   PFID(>lsm_md_oinfo[i].lmo_fid),
   i_size_read(inode), (unsigned long long)inode->i_blocks,
-  inode->i_nlink, LTIME_S(inode->i_atime),
-  LTIME_S(inode->i_ctime), LTIME_S(inode->i_mtime));
+  inode->i_nlink, (unsigned long 
long)LTIME_S(inode->i_atime),
+  (unsigned long long)LTIME_S(inode->i_ctime),
+  (unsigned long long)LTIME_S(inode->i_mtime));
 
/* for slave stripe, it needs to subtract nlink for . and .. */
if (i)
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c 
b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
index 488b98007558..f1ccf8d26ddc 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_reint.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
@@ -129,9 +129,9 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data 
*op_data,
}
 
if (op_data->op_attr.ia_valid & (ATTR_MTIME | ATTR_CTIME))
-   CDEBUG(D_INODE, "setting mtime %ld, ctime %ld\n",
-  LTIME_S(op_data->op_attr.ia_mtime),
-  LTIME_S(op_data->op_attr.ia_ctime));
+   CDEBUG(D_INODE, "setting mtime %lld, ctime %lld\n",
+  (long long)LTIME_S(op_data->op_attr.ia_mtime),
+  (long long)LTIME_S(op_data->op_attr.ia_ctime));
mdc_setattr_pack(req, op_data, ea, ealen);
 
ptlrpc_request_set_replen(req);
diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c 
b/drivers/staging/lustre/lustre/obdclass/obdo.c
index c4503bc36591..8f4054aa970b 100644
--- a/drivers/staging/lustre/lustre/obdclass/obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/obdo.c
@@ -60,9 +60,9 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, u32 
valid)
u32 newvalid = 0;
 
if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
-   CDEBUG(D_INODE, "valid %x, new time 

[PATCH 5/6] udf: Simplify calls to udf_disk_stamp_to_time

2018-05-11 Thread Deepa Dinamani
Subsequent patches in the series convert inode timestamps
to use struct timespec64 instead of struct timespec as
part of solving the y2038 problem.

commit fd3cfad374d4 ("udf: Convert udf_disk_stamp_to_time() to use mktime64()")
eliminated the NULL return condition from udf_disk_stamp_to_time().
udf_time_to_disk_time() is always called with a valid dest pointer and
the return value is ignored.
Further, caller can as well check the dest pointer being passed in rather
than return argument.
Make both the functions return void.

This will make the inode timestamp conversion simpler.

Signed-off-by: Deepa Dinamani 
Cc: j...@suse.com
---
 fs/udf/inode.c   | 28 +++-
 fs/udf/super.c   | 16 +---
 fs/udf/udfdecl.h |  4 ++--
 fs/udf/udftime.c |  9 ++---
 4 files changed, 20 insertions(+), 37 deletions(-)

diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index c80765d62f7e..df2378d6ebb4 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1443,15 +1443,9 @@ static int udf_read_inode(struct inode *inode, bool 
hidden_inode)
inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) <<
(inode->i_sb->s_blocksize_bits - 9);
 
-   if (!udf_disk_stamp_to_time(>i_atime, fe->accessTime))
-   inode->i_atime = sbi->s_record_time;
-
-   if (!udf_disk_stamp_to_time(>i_mtime,
-   fe->modificationTime))
-   inode->i_mtime = sbi->s_record_time;
-
-   if (!udf_disk_stamp_to_time(>i_ctime, fe->attrTime))
-   inode->i_ctime = sbi->s_record_time;
+   udf_disk_stamp_to_time(>i_atime, fe->accessTime);
+   udf_disk_stamp_to_time(>i_mtime, fe->modificationTime);
+   udf_disk_stamp_to_time(>i_ctime, fe->attrTime);
 
iinfo->i_unique = le64_to_cpu(fe->uniqueID);
iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
@@ -1461,18 +1455,10 @@ static int udf_read_inode(struct inode *inode, bool 
hidden_inode)
inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) <<
(inode->i_sb->s_blocksize_bits - 9);
 
-   if (!udf_disk_stamp_to_time(>i_atime, efe->accessTime))
-   inode->i_atime = sbi->s_record_time;
-
-   if (!udf_disk_stamp_to_time(>i_mtime,
-   efe->modificationTime))
-   inode->i_mtime = sbi->s_record_time;
-
-   if (!udf_disk_stamp_to_time(>i_crtime, efe->createTime))
-   iinfo->i_crtime = sbi->s_record_time;
-
-   if (!udf_disk_stamp_to_time(>i_ctime, efe->attrTime))
-   inode->i_ctime = sbi->s_record_time;
+   udf_disk_stamp_to_time(>i_atime, efe->accessTime);
+   udf_disk_stamp_to_time(>i_mtime, efe->modificationTime);
+   udf_disk_stamp_to_time(>i_crtime, efe->createTime);
+   udf_disk_stamp_to_time(>i_ctime, efe->attrTime);
 
iinfo->i_unique = le64_to_cpu(efe->uniqueID);
iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 0d27d41f5c6e..bd0ae64bc31c 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -862,6 +862,9 @@ static int udf_load_pvoldesc(struct super_block *sb, 
sector_t block)
struct buffer_head *bh;
uint16_t ident;
int ret = -ENOMEM;
+#ifdef UDFFS_DEBUG
+   struct timestamp *ts;
+#endif
 
outstr = kmalloc(128, GFP_NOFS);
if (!outstr)
@@ -880,15 +883,14 @@ static int udf_load_pvoldesc(struct super_block *sb, 
sector_t block)
 
pvoldesc = (struct primaryVolDesc *)bh->b_data;
 
-   if (udf_disk_stamp_to_time(_SB(sb)->s_record_time,
- pvoldesc->recordingDateAndTime)) {
+   udf_disk_stamp_to_time(_SB(sb)->s_record_time,
+ pvoldesc->recordingDateAndTime);
 #ifdef UDFFS_DEBUG
-   struct timestamp *ts = >recordingDateAndTime;
-   udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n",
- le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
- ts->minute, le16_to_cpu(ts->typeAndTimezone));
+   *ts = >recordingDateAndTime;
+   udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n",
+ le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
+ ts->minute, le16_to_cpu(ts->typeAndTimezone));
 #endif
-   }
 
ret = udf_dstrCS0toChar(sb, outstr, 31, pvoldesc->volIdent, 32);
if (ret < 0)
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index fc8d1b3384d2..bae311b59400 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -253,8 +253,8 @@ extern struct long_ad *udf_get_filelongad(uint8_t *, int, 
uint32_t *, int);
 extern struct short_ad *udf_get_fileshortad(uint8_t *, int, uint32_t 

[PATCH 6/6] vfs: change inode times to use struct timespec64

2018-05-11 Thread Deepa Dinamani
struct timespec is not y2038 safe. Transition vfs to use
y2038 safe struct timespec64 instead.

The change was made with the help of the following cocinelle
script. This catches about 80% of the changes.
All the header file and logic changes are included in the
first 5 rules. The rest are trivial substitutions.
I avoid changing any of the function signatures or any other
filesystem specific data structures to keep the patch simple
for review.

The script can be a little shorter by combining different cases.
But, this version was sufficient for my usecase.

virtual patch

@ depends on patch @
identifier now;
@@
- struct timespec
+ struct timespec64
  current_time ( ... )
  {
- struct timespec now = current_kernel_time();
+ struct timespec64 now = current_kernel_time64();
  ...
- return timespec_trunc(
+ return timespec64_trunc(
  ... );
  }

@ depends on patch @
identifier xtime;
@@
 struct \( iattr \| inode \| kstat \) {
 ...
-   struct timespec xtime;
+   struct timespec64 xtime;
 ...
 }

@ depends on patch @
identifier t;
@@
 struct inode_operations {
 ...
int (*update_time) (...,
-   struct timespec t,
+   struct timespec64 t,
...);
 ...
 }

@ depends on patch @
identifier t;
identifier fn_update_time =~ "update_time$";
@@
 fn_update_time (...,
- struct timespec *t,
+ struct timespec64 *t,
 ...) { ... }

@ depends on patch @
identifier t;
@@
lease_get_mtime( ... ,
- struct timespec *t
+ struct timespec64 *t
  ) { ... }

@te depends on patch forall@
identifier ts;
local idexpression struct inode *inode_node;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn_update_time =~ "update_time$";
identifier fn;
expression e, E3;
local idexpression struct inode *node1;
local idexpression struct inode *node2;
local idexpression struct iattr *attr1;
local idexpression struct iattr *attr2;
local idexpression struct iattr attr;
identifier i_xtime1 =~ "^i_[acm]time$";
identifier i_xtime2 =~ "^i_[acm]time$";
identifier ia_xtime1 =~ "^ia_[acm]time$";
identifier ia_xtime2 =~ "^ia_[acm]time$";
@@
(
(
- struct timespec ts;
+ struct timespec64 ts;
|
- struct timespec ts = current_time(inode_node);
+ struct timespec64 ts = current_time(inode_node);
)

<+... when != ts
(
- timespec_equal(_node->i_xtime, )
+ timespec64_equal(_node->i_xtime, )
|
- timespec_equal(, _node->i_xtime)
+ timespec64_equal(, _node->i_xtime)
|
- timespec_compare(_node->i_xtime, )
+ timespec64_compare(_node->i_xtime, )
|
- timespec_compare(, _node->i_xtime)
+ timespec64_compare(, _node->i_xtime)
|
ts = current_time(e)
|
fn_update_time(..., ,...)
|
inode_node->i_xtime = ts
|
node1->i_xtime = ts
|
ts = inode_node->i_xtime
|
<+... attr1->ia_xtime ...+> = ts
|
ts = attr1->ia_xtime
|
ts.tv_sec
|
ts.tv_nsec
|
btrfs_set_stack_timespec_sec(..., ts.tv_sec)
|
btrfs_set_stack_timespec_nsec(..., ts.tv_nsec)
|
- ts = timespec64_to_timespec(
+ ts =
...
-)
|
- ts = ktime_to_timespec(
+ ts = ktime_to_timespec64(
...)
|
- ts = E3
+ ts = timespec_to_timespec64(E3)
|
- ktime_get_real_ts()
+ ktime_get_real_ts64()
|
fn(...,
- ts
+ timespec64_to_timespec(ts)
,...)
)
...+>
(
<... when != ts
- return ts;
+ return timespec64_to_timespec(ts);
...>
)
|
- timespec_equal(>i_xtime1, >i_xtime2)
+ timespec64_equal(>i_xtime2, >i_xtime2)
|
- timespec_equal(>i_xtime1, >ia_xtime2)
+ timespec64_equal(>i_xtime2, >ia_xtime2)
|
- timespec_compare(>i_xtime1, >i_xtime2)
+ timespec64_compare(>i_xtime1, >i_xtime2)
|
node1->i_xtime1 =
- timespec_trunc(attr1->ia_xtime1,
+ timespec64_trunc(attr1->ia_xtime1,
...)
|
- attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2,
+ attr1->ia_xtime1 =  timespec64_trunc(attr2->ia_xtime2,
...)
|
- ktime_get_real_ts(>ia_xtime1)
+ ktime_get_real_ts64(>ia_xtime1)
|
- ktime_get_real_ts(_xtime1)
+ ktime_get_real_ts64(_xtime1)
)

@ depends on patch @
struct inode *node;
struct iattr *attr;
identifier fn;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
expression e;
@@
(
- fn(node->i_xtime);
+ fn(timespec64_to_timespec(node->i_xtime));
|
 fn(...,
- node->i_xtime);
+ timespec64_to_timespec(node->i_xtime));
|
- e = fn(attr->ia_xtime);
+ e = fn(timespec64_to_timespec(attr->ia_xtime));
)

@ depends on patch forall @
struct inode *node;
struct iattr *attr;
identifier i_xtime =~ "^i_[acm]time$";
identifier ia_xtime =~ "^ia_[acm]time$";
identifier fn;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
fn (...,
- >i_xtime,
+ ,
...);
|
+ ts = timespec64_to_timespec(attr->ia_xtime);
fn (...,
- >ia_xtime,
+ ,
...);
)
...+>
}

@ depends on patch forall @
struct inode *node;
struct iattr *attr;
struct kstat *stat;
identifier ia_xtime =~ "^ia_[acm]time$";
identifier i_xtime =~ "^i_[acm]time$";
identifier xtime =~ "^[acm]time$";
identifier fn, ret;
@@
{
+ struct timespec ts;
<+...
(
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- >i_xtime,
+ ,
...);
|
+ ts = timespec64_to_timespec(node->i_xtime);
ret = fn (...,
- >i_xtime);
+ );
|
+ ts = 

[PATCH 3/6] ceph: make inode time prints to be long long

2018-05-11 Thread Deepa Dinamani
Subsequent patches in the series convert inode timestamps
to use struct timespec64 instead of struct timespec as
part of solving the y2038 problem.

Convert these print formats to use long long types to
avoid warnings and errors on conversion.

Signed-off-by: Deepa Dinamani 
Cc: z...@redhat.com
Cc: ceph-de...@vger.kernel.org
---
 fs/ceph/inode.c | 42 +-
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index ae056927080d..676065a1ea62 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -671,18 +671,18 @@ void ceph_fill_file_time(struct inode *inode, int issued,
  CEPH_CAP_XATTR_EXCL)) {
if (ci->i_version == 0 ||
timespec_compare(ctime, >i_ctime) > 0) {
-   dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
-inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
-ctime->tv_sec, ctime->tv_nsec);
+   dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n",
+(long long)inode->i_ctime.tv_sec, 
inode->i_ctime.tv_nsec,
+(long long)ctime->tv_sec, ctime->tv_nsec);
inode->i_ctime = *ctime;
}
if (ci->i_version == 0 ||
ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
/* the MDS did a utimes() */
-   dout("mtime %ld.%09ld -> %ld.%09ld "
+   dout("mtime %lld.%09ld -> %lld.%09ld "
 "tw %d -> %d\n",
-inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
-mtime->tv_sec, mtime->tv_nsec,
+(long long)inode->i_mtime.tv_sec, 
inode->i_mtime.tv_nsec,
+(long long)mtime->tv_sec, mtime->tv_nsec,
 ci->i_time_warp_seq, (int)time_warp_seq);
 
inode->i_mtime = *mtime;
@@ -691,17 +691,17 @@ void ceph_fill_file_time(struct inode *inode, int issued,
} else if (time_warp_seq == ci->i_time_warp_seq) {
/* nobody did utimes(); take the max */
if (timespec_compare(mtime, >i_mtime) > 0) {
-   dout("mtime %ld.%09ld -> %ld.%09ld inc\n",
-inode->i_mtime.tv_sec,
+   dout("mtime %lld.%09ld -> %lld.%09ld inc\n",
+(long long)inode->i_mtime.tv_sec,
 inode->i_mtime.tv_nsec,
-mtime->tv_sec, mtime->tv_nsec);
+(long long)mtime->tv_sec, mtime->tv_nsec);
inode->i_mtime = *mtime;
}
if (timespec_compare(atime, >i_atime) > 0) {
-   dout("atime %ld.%09ld -> %ld.%09ld inc\n",
-inode->i_atime.tv_sec,
+   dout("atime %lld.%09ld -> %lld.%09ld inc\n",
+(long long)inode->i_atime.tv_sec,
 inode->i_atime.tv_nsec,
-atime->tv_sec, atime->tv_nsec);
+(long long)atime->tv_sec, atime->tv_nsec);
inode->i_atime = *atime;
}
} else if (issued & CEPH_CAP_FILE_EXCL) {
@@ -2015,9 +2015,9 @@ int __ceph_setattr(struct inode *inode, struct iattr 
*attr)
}
 
if (ia_valid & ATTR_ATIME) {
-   dout("setattr %p atime %ld.%ld -> %ld.%ld\n", inode,
-inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
-attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
+   dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode,
+(long long)inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
+(long long)attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) {
ci->i_time_warp_seq++;
inode->i_atime = attr->ia_atime;
@@ -2037,9 +2037,9 @@ int __ceph_setattr(struct inode *inode, struct iattr 
*attr)
}
}
if (ia_valid & ATTR_MTIME) {
-   dout("setattr %p mtime %ld.%ld -> %ld.%ld\n", inode,
-inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
-attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
+   dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode,
+(long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+(long long)attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
if (issued & 

[PATCH 1/6] fs: add timespec64_truncate()

2018-05-11 Thread Deepa Dinamani
As vfs moves to using struct timespec64 to represent times,
update the argument to timespec_truncate() to use
struct timespec64. Also change the name of the function.
The rest of the implementation logic is the same.

Move this to fs/inode.c instead of kernel/time/time.c as all the
users of this api are filesystems.

Signed-off-by: Deepa Dinamani 
Cc: 
---
 fs/inode.c | 24 
 include/linux/fs.h |  1 +
 2 files changed, 25 insertions(+)

diff --git a/fs/inode.c b/fs/inode.c
index 13ceb98c3bd3..93af998ee290 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2110,6 +2110,30 @@ void inode_nohighmem(struct inode *inode)
 }
 EXPORT_SYMBOL(inode_nohighmem);
 
+/**
+ * timespec64_trunc - Truncate timespec64 to a granularity
+ * @t: Timespec64
+ * @gran: Granularity in ns.
+ *
+ * Truncate a timespec64 to a granularity. Always rounds down. gran must
+ * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
+ */
+struct timespec64 timespec64_trunc(struct timespec64 t, unsigned gran)
+{
+   /* Avoid division in the common cases 1 ns and 1 s. */
+   if (gran == 1) {
+   /* nothing */
+   } else if (gran == NSEC_PER_SEC) {
+   t.tv_nsec = 0;
+   } else if (gran > 1 && gran < NSEC_PER_SEC) {
+   t.tv_nsec -= t.tv_nsec % gran;
+   } else {
+   WARN(1, "illegal file time granularity: %u", gran);
+   }
+   return t;
+}
+EXPORT_SYMBOL(timespec64_trunc);
+
 /**
  * current_time - Return FS time
  * @inode: inode.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0eedf745667b..381c77a37404 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1477,6 +1477,7 @@ static inline void i_gid_write(struct inode *inode, gid_t 
gid)
inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
 }
 
+extern struct timespec64 timespec64_trunc(struct timespec64 t, unsigned gran);
 extern struct timespec current_time(struct inode *inode);
 
 /*
-- 
2.17.0



[PATCH 3/6] ceph: make inode time prints to be long long

2018-05-11 Thread Deepa Dinamani
Subsequent patches in the series convert inode timestamps
to use struct timespec64 instead of struct timespec as
part of solving the y2038 problem.

Convert these print formats to use long long types to
avoid warnings and errors on conversion.

Signed-off-by: Deepa Dinamani 
Cc: z...@redhat.com
Cc: ceph-de...@vger.kernel.org
---
 fs/ceph/inode.c | 42 +-
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index ae056927080d..676065a1ea62 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -671,18 +671,18 @@ void ceph_fill_file_time(struct inode *inode, int issued,
  CEPH_CAP_XATTR_EXCL)) {
if (ci->i_version == 0 ||
timespec_compare(ctime, >i_ctime) > 0) {
-   dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
-inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
-ctime->tv_sec, ctime->tv_nsec);
+   dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n",
+(long long)inode->i_ctime.tv_sec, 
inode->i_ctime.tv_nsec,
+(long long)ctime->tv_sec, ctime->tv_nsec);
inode->i_ctime = *ctime;
}
if (ci->i_version == 0 ||
ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
/* the MDS did a utimes() */
-   dout("mtime %ld.%09ld -> %ld.%09ld "
+   dout("mtime %lld.%09ld -> %lld.%09ld "
 "tw %d -> %d\n",
-inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
-mtime->tv_sec, mtime->tv_nsec,
+(long long)inode->i_mtime.tv_sec, 
inode->i_mtime.tv_nsec,
+(long long)mtime->tv_sec, mtime->tv_nsec,
 ci->i_time_warp_seq, (int)time_warp_seq);
 
inode->i_mtime = *mtime;
@@ -691,17 +691,17 @@ void ceph_fill_file_time(struct inode *inode, int issued,
} else if (time_warp_seq == ci->i_time_warp_seq) {
/* nobody did utimes(); take the max */
if (timespec_compare(mtime, >i_mtime) > 0) {
-   dout("mtime %ld.%09ld -> %ld.%09ld inc\n",
-inode->i_mtime.tv_sec,
+   dout("mtime %lld.%09ld -> %lld.%09ld inc\n",
+(long long)inode->i_mtime.tv_sec,
 inode->i_mtime.tv_nsec,
-mtime->tv_sec, mtime->tv_nsec);
+(long long)mtime->tv_sec, mtime->tv_nsec);
inode->i_mtime = *mtime;
}
if (timespec_compare(atime, >i_atime) > 0) {
-   dout("atime %ld.%09ld -> %ld.%09ld inc\n",
-inode->i_atime.tv_sec,
+   dout("atime %lld.%09ld -> %lld.%09ld inc\n",
+(long long)inode->i_atime.tv_sec,
 inode->i_atime.tv_nsec,
-atime->tv_sec, atime->tv_nsec);
+(long long)atime->tv_sec, atime->tv_nsec);
inode->i_atime = *atime;
}
} else if (issued & CEPH_CAP_FILE_EXCL) {
@@ -2015,9 +2015,9 @@ int __ceph_setattr(struct inode *inode, struct iattr 
*attr)
}
 
if (ia_valid & ATTR_ATIME) {
-   dout("setattr %p atime %ld.%ld -> %ld.%ld\n", inode,
-inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
-attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
+   dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode,
+(long long)inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
+(long long)attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) {
ci->i_time_warp_seq++;
inode->i_atime = attr->ia_atime;
@@ -2037,9 +2037,9 @@ int __ceph_setattr(struct inode *inode, struct iattr 
*attr)
}
}
if (ia_valid & ATTR_MTIME) {
-   dout("setattr %p mtime %ld.%ld -> %ld.%ld\n", inode,
-inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
-attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
+   dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode,
+(long long)inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+(long long)attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
if (issued & CEPH_CAP_FILE_EXCL) {
  

[PATCH 1/6] fs: add timespec64_truncate()

2018-05-11 Thread Deepa Dinamani
As vfs moves to using struct timespec64 to represent times,
update the argument to timespec_truncate() to use
struct timespec64. Also change the name of the function.
The rest of the implementation logic is the same.

Move this to fs/inode.c instead of kernel/time/time.c as all the
users of this api are filesystems.

Signed-off-by: Deepa Dinamani 
Cc: 
---
 fs/inode.c | 24 
 include/linux/fs.h |  1 +
 2 files changed, 25 insertions(+)

diff --git a/fs/inode.c b/fs/inode.c
index 13ceb98c3bd3..93af998ee290 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2110,6 +2110,30 @@ void inode_nohighmem(struct inode *inode)
 }
 EXPORT_SYMBOL(inode_nohighmem);
 
+/**
+ * timespec64_trunc - Truncate timespec64 to a granularity
+ * @t: Timespec64
+ * @gran: Granularity in ns.
+ *
+ * Truncate a timespec64 to a granularity. Always rounds down. gran must
+ * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
+ */
+struct timespec64 timespec64_trunc(struct timespec64 t, unsigned gran)
+{
+   /* Avoid division in the common cases 1 ns and 1 s. */
+   if (gran == 1) {
+   /* nothing */
+   } else if (gran == NSEC_PER_SEC) {
+   t.tv_nsec = 0;
+   } else if (gran > 1 && gran < NSEC_PER_SEC) {
+   t.tv_nsec -= t.tv_nsec % gran;
+   } else {
+   WARN(1, "illegal file time granularity: %u", gran);
+   }
+   return t;
+}
+EXPORT_SYMBOL(timespec64_trunc);
+
 /**
  * current_time - Return FS time
  * @inode: inode.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0eedf745667b..381c77a37404 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1477,6 +1477,7 @@ static inline void i_gid_write(struct inode *inode, gid_t 
gid)
inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
 }
 
+extern struct timespec64 timespec64_trunc(struct timespec64 t, unsigned gran);
 extern struct timespec current_time(struct inode *inode);
 
 /*
-- 
2.17.0



[PATCH 4/6] fs: nfs: get rid of memcpys for inode times

2018-05-11 Thread Deepa Dinamani
Subsequent patches in the series convert inode timestamps
to use struct timespec64 instead of struct timespec as
part of solving the y2038 problem.
This will lead to type mismatch for memcpys.
Use regular assignments instead.

Signed-off-by: Deepa Dinamani 
Cc: trond.mykleb...@primarydata.com
---
 fs/nfs/inode.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd15d0b57626..55b62254dd7c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1315,13 +1315,13 @@ static void nfs_wcc_update_inode(struct inode *inode, 
struct nfs_fattr *fattr)
if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
&& (fattr->valid & NFS_ATTR_FATTR_CTIME)
&& timespec_equal(>i_ctime, >pre_ctime)) {
-   memcpy(>i_ctime, >ctime, sizeof(inode->i_ctime));
+   inode->i_ctime = fattr->ctime;
}
 
if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
&& (fattr->valid & NFS_ATTR_FATTR_MTIME)
&& timespec_equal(>i_mtime, >pre_mtime)) {
-   memcpy(>i_mtime, >mtime, sizeof(inode->i_mtime));
+   inode->i_mtime = fattr->mtime;
if (S_ISDIR(inode->i_mode))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
}
@@ -1667,12 +1667,12 @@ int nfs_post_op_update_inode_force_wcc_locked(struct 
inode *inode, struct nfs_fa
}
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
-   memcpy(>pre_ctime, >i_ctime, 
sizeof(fattr->pre_ctime));
+   fattr->pre_ctime = inode->i_ctime;
fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) {
-   memcpy(>pre_mtime, >i_mtime, 
sizeof(fattr->pre_mtime));
+   fattr->pre_mtime = inode->i_mtime;
fattr->valid |= NFS_ATTR_FATTR_PREMTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 &&
@@ -1829,7 +1829,7 @@ static int nfs_update_inode(struct inode *inode, struct 
nfs_fattr *fattr)
}
 
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
-   memcpy(>i_mtime, >mtime, sizeof(inode->i_mtime));
+   inode->i_mtime = fattr->mtime;
} else if (server->caps & NFS_CAP_MTIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_MTIME
@@ -1838,7 +1838,7 @@ static int nfs_update_inode(struct inode *inode, struct 
nfs_fattr *fattr)
}
 
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
-   memcpy(>i_ctime, >ctime, sizeof(inode->i_ctime));
+   inode->i_ctime = fattr->ctime;
} else if (server->caps & NFS_CAP_CTIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_CTIME
@@ -1875,7 +1875,7 @@ static int nfs_update_inode(struct inode *inode, struct 
nfs_fattr *fattr)
 
 
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
-   memcpy(>i_atime, >atime, sizeof(inode->i_atime));
+   inode->i_atime = fattr->atime;
else if (server->caps & NFS_CAP_ATIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATIME
-- 
2.17.0



[PATCH 4/6] fs: nfs: get rid of memcpys for inode times

2018-05-11 Thread Deepa Dinamani
Subsequent patches in the series convert inode timestamps
to use struct timespec64 instead of struct timespec as
part of solving the y2038 problem.
This will lead to type mismatch for memcpys.
Use regular assignments instead.

Signed-off-by: Deepa Dinamani 
Cc: trond.mykleb...@primarydata.com
---
 fs/nfs/inode.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd15d0b57626..55b62254dd7c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1315,13 +1315,13 @@ static void nfs_wcc_update_inode(struct inode *inode, 
struct nfs_fattr *fattr)
if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
&& (fattr->valid & NFS_ATTR_FATTR_CTIME)
&& timespec_equal(>i_ctime, >pre_ctime)) {
-   memcpy(>i_ctime, >ctime, sizeof(inode->i_ctime));
+   inode->i_ctime = fattr->ctime;
}
 
if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
&& (fattr->valid & NFS_ATTR_FATTR_MTIME)
&& timespec_equal(>i_mtime, >pre_mtime)) {
-   memcpy(>i_mtime, >mtime, sizeof(inode->i_mtime));
+   inode->i_mtime = fattr->mtime;
if (S_ISDIR(inode->i_mode))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
}
@@ -1667,12 +1667,12 @@ int nfs_post_op_update_inode_force_wcc_locked(struct 
inode *inode, struct nfs_fa
}
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
-   memcpy(>pre_ctime, >i_ctime, 
sizeof(fattr->pre_ctime));
+   fattr->pre_ctime = inode->i_ctime;
fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) {
-   memcpy(>pre_mtime, >i_mtime, 
sizeof(fattr->pre_mtime));
+   fattr->pre_mtime = inode->i_mtime;
fattr->valid |= NFS_ATTR_FATTR_PREMTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 &&
@@ -1829,7 +1829,7 @@ static int nfs_update_inode(struct inode *inode, struct 
nfs_fattr *fattr)
}
 
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
-   memcpy(>i_mtime, >mtime, sizeof(inode->i_mtime));
+   inode->i_mtime = fattr->mtime;
} else if (server->caps & NFS_CAP_MTIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_MTIME
@@ -1838,7 +1838,7 @@ static int nfs_update_inode(struct inode *inode, struct 
nfs_fattr *fattr)
}
 
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
-   memcpy(>i_ctime, >ctime, sizeof(inode->i_ctime));
+   inode->i_ctime = fattr->ctime;
} else if (server->caps & NFS_CAP_CTIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_CTIME
@@ -1875,7 +1875,7 @@ static int nfs_update_inode(struct inode *inode, struct 
nfs_fattr *fattr)
 
 
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
-   memcpy(>i_atime, >atime, sizeof(inode->i_atime));
+   inode->i_atime = fattr->atime;
else if (server->caps & NFS_CAP_ATIME) {
nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATIME
-- 
2.17.0



[PATCH 0/6] Transition vfs to 64-bit timestamps

2018-05-11 Thread Deepa Dinamani
The series aims to switch vfs timestamps to use
struct timespec64. Currently vfs uses struct timespec,
which is not y2038 safe.

The series involves the following:
1. Add vfs helper functions for supporting struct timepec64 timestamps.
2. Cast prints of vfs timestamps to avoid warnings after the switch.
3. Simplify code using vfs timestamps so that the actual
   replacement becomes easy.
4. Convert vfs timestamps to use struct timespec64 using a script.
   This is a flag day patch.

I've tried to keep the conversions with the script simple, to
aid in the reviews. I've kept all the internal filesystem data
structures and function signatures the same.

Next steps:
1. Convert APIs that can handle timespec64, instead of converting
   timestamps at the boundaries.
2. Update internal data structures to avoid timestamp conversions.

Deepa Dinamani (6):
  fs: add timespec64_truncate()
  lustre: Use long long type to print inode time
  ceph: make inode time prints to be long long
  fs: nfs: get rid of memcpys for inode times
  udf: Simplify calls to udf_disk_stamp_to_time
  vfs: change inode times to use struct timespec64

 .../staging/lustre/lustre/llite/llite_lib.c   | 12 +--
 drivers/staging/lustre/lustre/lmv/lmv_obd.c   |  7 +-
 drivers/staging/lustre/lustre/mdc/mdc_reint.c |  6 +-
 drivers/staging/lustre/lustre/obdclass/obdo.c |  6 +-
 drivers/tty/tty_io.c  | 15 +++-
 drivers/usb/gadget/function/f_fs.c|  2 +-
 fs/adfs/inode.c   |  7 +-
 fs/afs/fsclient.c |  2 +-
 fs/attr.c | 14 +--
 fs/bad_inode.c|  2 +-
 fs/btrfs/file.c   |  6 +-
 fs/btrfs/inode.c  |  8 +-
 fs/btrfs/ioctl.c  |  4 +-
 fs/btrfs/root-tree.c  |  4 +-
 fs/btrfs/transaction.c|  2 +-
 fs/ceph/addr.c| 12 +--
 fs/ceph/cache.c   |  4 +-
 fs/ceph/caps.c|  6 +-
 fs/ceph/file.c|  6 +-
 fs/ceph/inode.c   | 86 ++-
 fs/ceph/mds_client.c  |  7 +-
 fs/ceph/snap.c|  6 +-
 fs/cifs/cache.c   |  4 +-
 fs/cifs/fscache.c |  8 +-
 fs/cifs/inode.c   | 26 +++---
 fs/coda/coda_linux.c  | 12 +--
 fs/configfs/inode.c   | 12 +--
 fs/cramfs/inode.c |  2 +-
 fs/ext4/ext4.h| 34 +---
 fs/ext4/ialloc.c  |  4 +-
 fs/ext4/namei.c   |  2 +-
 fs/f2fs/f2fs.h| 10 ++-
 fs/f2fs/file.c| 12 +--
 fs/f2fs/inode.c   | 12 +--
 fs/f2fs/namei.c   |  4 +-
 fs/fat/inode.c| 20 +++--
 fs/fat/namei_msdos.c  | 21 +++--
 fs/fat/namei_vfat.c   | 22 +++--
 fs/fuse/inode.c   |  2 +-
 fs/gfs2/dir.c |  6 +-
 fs/gfs2/glops.c   |  4 +-
 fs/hfs/inode.c|  4 +-
 fs/hfsplus/inode.c| 12 +--
 fs/hostfs/hostfs_kern.c   |  6 +-
 fs/inode.c| 58 +
 fs/jffs2/dir.c| 18 ++--
 fs/jffs2/file.c   |  2 +-
 fs/jffs2/fs.c | 12 +--
 fs/kernfs/dir.c   |  4 +-
 fs/kernfs/inode.c |  8 +-
 fs/locks.c|  2 +-
 fs/nfs/callback_proc.c|  4 +-
 fs/nfs/fscache-index.c|  4 +-
 fs/nfs/fscache.c  | 12 +--
 fs/nfs/inode.c| 39 +
 fs/nfs/nfs2xdr.c  | 25 +++---
 fs/nfs/nfs3xdr.c  |  8 +-
 fs/nfs/nfs4xdr.c  |  7 +-
 fs/nfsd/blocklayout.c |  8 +-
 fs/nfsd/nfs3xdr.c | 14 +--
 fs/nfsd/nfs4xdr.c |  7 +-
 fs/nfsd/nfsxdr.c  |  2 +-
 fs/ntfs/inode.c   | 30 +++
 fs/ocfs2/dlmglue.c| 20 +++--
 fs/ocfs2/file.c   |  6 +-
 fs/orangefs/inode.c   |  2 +-
 fs/orangefs/orangefs-kernel.h |  2 +-
 fs/overlayfs/inode.c  |  2 +-
 fs/overlayfs/overlayfs.h

[PATCH 0/6] Transition vfs to 64-bit timestamps

2018-05-11 Thread Deepa Dinamani
The series aims to switch vfs timestamps to use
struct timespec64. Currently vfs uses struct timespec,
which is not y2038 safe.

The series involves the following:
1. Add vfs helper functions for supporting struct timepec64 timestamps.
2. Cast prints of vfs timestamps to avoid warnings after the switch.
3. Simplify code using vfs timestamps so that the actual
   replacement becomes easy.
4. Convert vfs timestamps to use struct timespec64 using a script.
   This is a flag day patch.

I've tried to keep the conversions with the script simple, to
aid in the reviews. I've kept all the internal filesystem data
structures and function signatures the same.

Next steps:
1. Convert APIs that can handle timespec64, instead of converting
   timestamps at the boundaries.
2. Update internal data structures to avoid timestamp conversions.

Deepa Dinamani (6):
  fs: add timespec64_truncate()
  lustre: Use long long type to print inode time
  ceph: make inode time prints to be long long
  fs: nfs: get rid of memcpys for inode times
  udf: Simplify calls to udf_disk_stamp_to_time
  vfs: change inode times to use struct timespec64

 .../staging/lustre/lustre/llite/llite_lib.c   | 12 +--
 drivers/staging/lustre/lustre/lmv/lmv_obd.c   |  7 +-
 drivers/staging/lustre/lustre/mdc/mdc_reint.c |  6 +-
 drivers/staging/lustre/lustre/obdclass/obdo.c |  6 +-
 drivers/tty/tty_io.c  | 15 +++-
 drivers/usb/gadget/function/f_fs.c|  2 +-
 fs/adfs/inode.c   |  7 +-
 fs/afs/fsclient.c |  2 +-
 fs/attr.c | 14 +--
 fs/bad_inode.c|  2 +-
 fs/btrfs/file.c   |  6 +-
 fs/btrfs/inode.c  |  8 +-
 fs/btrfs/ioctl.c  |  4 +-
 fs/btrfs/root-tree.c  |  4 +-
 fs/btrfs/transaction.c|  2 +-
 fs/ceph/addr.c| 12 +--
 fs/ceph/cache.c   |  4 +-
 fs/ceph/caps.c|  6 +-
 fs/ceph/file.c|  6 +-
 fs/ceph/inode.c   | 86 ++-
 fs/ceph/mds_client.c  |  7 +-
 fs/ceph/snap.c|  6 +-
 fs/cifs/cache.c   |  4 +-
 fs/cifs/fscache.c |  8 +-
 fs/cifs/inode.c   | 26 +++---
 fs/coda/coda_linux.c  | 12 +--
 fs/configfs/inode.c   | 12 +--
 fs/cramfs/inode.c |  2 +-
 fs/ext4/ext4.h| 34 +---
 fs/ext4/ialloc.c  |  4 +-
 fs/ext4/namei.c   |  2 +-
 fs/f2fs/f2fs.h| 10 ++-
 fs/f2fs/file.c| 12 +--
 fs/f2fs/inode.c   | 12 +--
 fs/f2fs/namei.c   |  4 +-
 fs/fat/inode.c| 20 +++--
 fs/fat/namei_msdos.c  | 21 +++--
 fs/fat/namei_vfat.c   | 22 +++--
 fs/fuse/inode.c   |  2 +-
 fs/gfs2/dir.c |  6 +-
 fs/gfs2/glops.c   |  4 +-
 fs/hfs/inode.c|  4 +-
 fs/hfsplus/inode.c| 12 +--
 fs/hostfs/hostfs_kern.c   |  6 +-
 fs/inode.c| 58 +
 fs/jffs2/dir.c| 18 ++--
 fs/jffs2/file.c   |  2 +-
 fs/jffs2/fs.c | 12 +--
 fs/kernfs/dir.c   |  4 +-
 fs/kernfs/inode.c |  8 +-
 fs/locks.c|  2 +-
 fs/nfs/callback_proc.c|  4 +-
 fs/nfs/fscache-index.c|  4 +-
 fs/nfs/fscache.c  | 12 +--
 fs/nfs/inode.c| 39 +
 fs/nfs/nfs2xdr.c  | 25 +++---
 fs/nfs/nfs3xdr.c  |  8 +-
 fs/nfs/nfs4xdr.c  |  7 +-
 fs/nfsd/blocklayout.c |  8 +-
 fs/nfsd/nfs3xdr.c | 14 +--
 fs/nfsd/nfs4xdr.c |  7 +-
 fs/nfsd/nfsxdr.c  |  2 +-
 fs/ntfs/inode.c   | 30 +++
 fs/ocfs2/dlmglue.c| 20 +++--
 fs/ocfs2/file.c   |  6 +-
 fs/orangefs/inode.c   |  2 +-
 fs/orangefs/orangefs-kernel.h |  2 +-
 fs/overlayfs/inode.c  |  2 +-
 fs/overlayfs/overlayfs.h

Re: [Ksummit-discuss] bug-introducing patches

2018-05-11 Thread Stephen Rothwell
Hi all,

On Wed, 9 May 2018 20:47:27 +1000 Stephen Rothwell  
wrote:
>
> On Wed, 9 May 2018 18:03:46 +0900 Mark Brown  wrote:
> >
> > On Wed, May 09, 2018 at 10:47:57AM +0200, Daniel Vetter wrote:  
> > > On Wed, May 9, 2018 at 10:44 AM, Mark Brown  wrote:   
> > >  
> >   
> > > > I think this is an excellent idea, copying in Stephen for his input.
> > > > I'm currently on holiday but unless someone convinces me it's a terrible
> > > > idea I'm willing to at least give it a go on a trial basis once I'm back
> > > > home.
> >   
> > > Since Stephen merges all -fixes branches first, before merging all the
> > > -next branches, he already generates that as part of linux-next. All
> > > he'd need to do is push that intermediate state out to some
> > > linux-fixes branch for consumption by test bots.  
> 
> Good idea ... I will see what I can do.

See my announcement of a pending-fixes branch in linux-next (on LKML
and others)

> I currently have 44 such fixes branches.  More welcome!

We are up to 55.

-- 
Cheers,
Stephen Rothwell


pgpjVG8LUNEnK.pgp
Description: OpenPGP digital signature


Re: [Ksummit-discuss] bug-introducing patches

2018-05-11 Thread Stephen Rothwell
Hi all,

On Wed, 9 May 2018 20:47:27 +1000 Stephen Rothwell  
wrote:
>
> On Wed, 9 May 2018 18:03:46 +0900 Mark Brown  wrote:
> >
> > On Wed, May 09, 2018 at 10:47:57AM +0200, Daniel Vetter wrote:  
> > > On Wed, May 9, 2018 at 10:44 AM, Mark Brown  wrote:   
> > >  
> >   
> > > > I think this is an excellent idea, copying in Stephen for his input.
> > > > I'm currently on holiday but unless someone convinces me it's a terrible
> > > > idea I'm willing to at least give it a go on a trial basis once I'm back
> > > > home.
> >   
> > > Since Stephen merges all -fixes branches first, before merging all the
> > > -next branches, he already generates that as part of linux-next. All
> > > he'd need to do is push that intermediate state out to some
> > > linux-fixes branch for consumption by test bots.  
> 
> Good idea ... I will see what I can do.

See my announcement of a pending-fixes branch in linux-next (on LKML
and others)

> I currently have 44 such fixes branches.  More welcome!

We are up to 55.

-- 
Cheers,
Stephen Rothwell


pgpjVG8LUNEnK.pgp
Description: OpenPGP digital signature


linux-next: a new pending-fixes branch

2018-05-11 Thread Stephen Rothwell
Hi all,

As an outcome of some discussion, I have added a pending-fixes branch
to linux-next.  This branch contains Linus' tree merged with branches
containing only fixes pending for the current release.  The branch is a
strict subset of linux-next each day (as so rebases like linux-next
does).

It would be good if this branch could be tested by the 0-Day service
and any other testing that people do - in the hope of sending fewer
"fixes causing bugs" patches to Linus.

There is not intention that bug fixes for Linus' tree should
necessarily be tested in linux-next before being forwarded, but
(especially for slightly less urgent bugs, at least) it seems like a
good idea.

I currently have 55 branches of bug fixes included.  As of yesterday
the branch contains 165 commits and looks like this:


 .mailmap   |   3 +
 .../devicetree/bindings/net/can/rcar_canfd.txt |   4 +-
 MAINTAINERS|   8 +-
 arch/arm/boot/compressed/Makefile  |   8 +-
 arch/arm/boot/compressed/head.S|  20 +-
 arch/arm/boot/dts/dm8148-evm.dts   |   2 +-
 arch/arm/boot/dts/dm8148-t410.dts  |   2 +-
 arch/arm/boot/dts/dm8168-evm.dts   |   2 +-
 arch/arm/boot/dts/dra62x-j5eco-evm.dts |   2 +-
 arch/arm/boot/dts/imx35.dtsi   |   4 +-
 arch/arm/boot/dts/imx53.dtsi   |   4 +-
 arch/arm/boot/dts/logicpd-som-lv.dtsi  |  11 +-
 arch/arm/kernel/machine_kexec.c|  36 ++--
 arch/arm/mach-omap1/ams-delta-fiq.c|  28 +--
 arch/arm/mach-omap2/powerdomain.c  |   4 +-
 arch/powerpc/include/asm/ftrace.h  |  29 ++-
 arch/powerpc/include/asm/paca.h|   1 -
 arch/powerpc/include/asm/topology.h|  13 +-
 drivers/atm/firestream.c   |   2 +-
 drivers/atm/zatm.c |   3 +
 drivers/bluetooth/btusb.c  |  19 +-
 drivers/dma/pl330.c|  28 ---
 drivers/gpu/drm/bridge/Kconfig |   1 +
 drivers/gpu/drm/drm_atomic.c   |   8 +
 drivers/gpu/drm/i915/intel_cdclk.c |  41 -
 drivers/gpu/drm/i915/intel_display.c   |   2 +
 drivers/gpu/drm/i915/intel_dp.c|  20 --
 drivers/gpu/drm/i915/intel_lvds.c  |   3 +-
 drivers/gpu/drm/omapdrm/dss/dispc.c|  20 +-
 drivers/gpu/drm/omapdrm/dss/hdmi4.c|   2 +-
 drivers/gpu/drm/omapdrm/dss/hdmi4_core.c   |   7 +-
 drivers/gpu/drm/omapdrm/dss/hdmi5.c|   2 +-
 drivers/gpu/drm/omapdrm/omap_connector.c   |  10 +
 drivers/gpu/drm/omapdrm/omap_dmm_tiler.c   |   6 +-
 drivers/gpu/drm/omapdrm/tcm-sita.c |   2 +-
 drivers/gpu/drm/vc4/vc4_dpi.c  |  25 ++-
 drivers/gpu/drm/vc4/vc4_plane.c|   2 +-
 drivers/hwmon/k10temp.c|  40 +++-
 drivers/iio/adc/Kconfig|   1 +
 drivers/iio/adc/ad7793.c   |  75 +++-
 drivers/iio/adc/at91-sama5d2_adc.c |  41 -
 drivers/iio/adc/stm32-dfsdm-adc.c  |  17 +-
 drivers/iio/buffer/industrialio-buffer-dma.c   |   2 +-
 drivers/iio/buffer/kfifo_buf.c |  11 +-
 .../iio/common/hid-sensors/hid-sensor-trigger.c|   8 +-
 drivers/media/usb/uvc/uvc_ctrl.c   |  17 +-
 drivers/mtd/nand/onenand/omap2.c   | 105 ---
 drivers/mtd/nand/raw/marvell_nand.c|  12 +-
 drivers/mtd/nand/raw/nand_base.c   |   5 +
 drivers/net/can/dev.c  |   2 +-
 drivers/net/can/flexcan.c  |  26 +--
 drivers/net/can/spi/hi311x.c   |  11 +-
 drivers/net/can/usb/kvaser_usb.c   |   2 +-
 drivers/net/dsa/mv88e6xxx/chip.c   |  26 +++
 drivers/net/dsa/mv88e6xxx/chip.h   |   1 +
 drivers/net/dsa/mv88e6xxx/global2.c|   2 +-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c|   3 +
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h|   1 +
 .../net/ethernet/aquantia/atlantic/aq_pci_func.c   |  20 +-
 drivers/net/ethernet/broadcom/tg3.c|   9 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c|   7 +-
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c|  16 ++
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   8 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |   7 +-
 drivers/net/ethernet/netronome/nfp/bpf/main.c  |   2 +-
 drivers/net/ethernet/netronome/nfp/flower/main.c   |  19 --
 drivers/net/ethernet/ni/nixge.c|  10 +-
 drivers/net/ethernet/qlogic/qed/qed_l2.c   |   6 +-
 

linux-next: a new pending-fixes branch

2018-05-11 Thread Stephen Rothwell
Hi all,

As an outcome of some discussion, I have added a pending-fixes branch
to linux-next.  This branch contains Linus' tree merged with branches
containing only fixes pending for the current release.  The branch is a
strict subset of linux-next each day (as so rebases like linux-next
does).

It would be good if this branch could be tested by the 0-Day service
and any other testing that people do - in the hope of sending fewer
"fixes causing bugs" patches to Linus.

There is not intention that bug fixes for Linus' tree should
necessarily be tested in linux-next before being forwarded, but
(especially for slightly less urgent bugs, at least) it seems like a
good idea.

I currently have 55 branches of bug fixes included.  As of yesterday
the branch contains 165 commits and looks like this:


 .mailmap   |   3 +
 .../devicetree/bindings/net/can/rcar_canfd.txt |   4 +-
 MAINTAINERS|   8 +-
 arch/arm/boot/compressed/Makefile  |   8 +-
 arch/arm/boot/compressed/head.S|  20 +-
 arch/arm/boot/dts/dm8148-evm.dts   |   2 +-
 arch/arm/boot/dts/dm8148-t410.dts  |   2 +-
 arch/arm/boot/dts/dm8168-evm.dts   |   2 +-
 arch/arm/boot/dts/dra62x-j5eco-evm.dts |   2 +-
 arch/arm/boot/dts/imx35.dtsi   |   4 +-
 arch/arm/boot/dts/imx53.dtsi   |   4 +-
 arch/arm/boot/dts/logicpd-som-lv.dtsi  |  11 +-
 arch/arm/kernel/machine_kexec.c|  36 ++--
 arch/arm/mach-omap1/ams-delta-fiq.c|  28 +--
 arch/arm/mach-omap2/powerdomain.c  |   4 +-
 arch/powerpc/include/asm/ftrace.h  |  29 ++-
 arch/powerpc/include/asm/paca.h|   1 -
 arch/powerpc/include/asm/topology.h|  13 +-
 drivers/atm/firestream.c   |   2 +-
 drivers/atm/zatm.c |   3 +
 drivers/bluetooth/btusb.c  |  19 +-
 drivers/dma/pl330.c|  28 ---
 drivers/gpu/drm/bridge/Kconfig |   1 +
 drivers/gpu/drm/drm_atomic.c   |   8 +
 drivers/gpu/drm/i915/intel_cdclk.c |  41 -
 drivers/gpu/drm/i915/intel_display.c   |   2 +
 drivers/gpu/drm/i915/intel_dp.c|  20 --
 drivers/gpu/drm/i915/intel_lvds.c  |   3 +-
 drivers/gpu/drm/omapdrm/dss/dispc.c|  20 +-
 drivers/gpu/drm/omapdrm/dss/hdmi4.c|   2 +-
 drivers/gpu/drm/omapdrm/dss/hdmi4_core.c   |   7 +-
 drivers/gpu/drm/omapdrm/dss/hdmi5.c|   2 +-
 drivers/gpu/drm/omapdrm/omap_connector.c   |  10 +
 drivers/gpu/drm/omapdrm/omap_dmm_tiler.c   |   6 +-
 drivers/gpu/drm/omapdrm/tcm-sita.c |   2 +-
 drivers/gpu/drm/vc4/vc4_dpi.c  |  25 ++-
 drivers/gpu/drm/vc4/vc4_plane.c|   2 +-
 drivers/hwmon/k10temp.c|  40 +++-
 drivers/iio/adc/Kconfig|   1 +
 drivers/iio/adc/ad7793.c   |  75 +++-
 drivers/iio/adc/at91-sama5d2_adc.c |  41 -
 drivers/iio/adc/stm32-dfsdm-adc.c  |  17 +-
 drivers/iio/buffer/industrialio-buffer-dma.c   |   2 +-
 drivers/iio/buffer/kfifo_buf.c |  11 +-
 .../iio/common/hid-sensors/hid-sensor-trigger.c|   8 +-
 drivers/media/usb/uvc/uvc_ctrl.c   |  17 +-
 drivers/mtd/nand/onenand/omap2.c   | 105 ---
 drivers/mtd/nand/raw/marvell_nand.c|  12 +-
 drivers/mtd/nand/raw/nand_base.c   |   5 +
 drivers/net/can/dev.c  |   2 +-
 drivers/net/can/flexcan.c  |  26 +--
 drivers/net/can/spi/hi311x.c   |  11 +-
 drivers/net/can/usb/kvaser_usb.c   |   2 +-
 drivers/net/dsa/mv88e6xxx/chip.c   |  26 +++
 drivers/net/dsa/mv88e6xxx/chip.h   |   1 +
 drivers/net/dsa/mv88e6xxx/global2.c|   2 +-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c|   3 +
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h|   1 +
 .../net/ethernet/aquantia/atlantic/aq_pci_func.c   |  20 +-
 drivers/net/ethernet/broadcom/tg3.c|   9 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c|   7 +-
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c|  16 ++
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   8 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |   7 +-
 drivers/net/ethernet/netronome/nfp/bpf/main.c  |   2 +-
 drivers/net/ethernet/netronome/nfp/flower/main.c   |  19 --
 drivers/net/ethernet/ni/nixge.c|  10 +-
 drivers/net/ethernet/qlogic/qed/qed_l2.c   |   6 +-
 

Re: [BUGFIX PATCH v3 0/4] arm: kprobes: Fix to prohibit probing on unsafe functions

2018-05-11 Thread Greg KH
On Sat, May 12, 2018 at 09:42:21AM +0900, Masami Hiramatsu wrote:
> Hi Greg,
> 
> Could you pick this series to stable?



This is not the correct way to submit patches for inclusion in the
stable kernel tree.  Please read:
https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
for how to do this properly.



thanks,

greg k-h


Re: [BUGFIX PATCH v3 0/4] arm: kprobes: Fix to prohibit probing on unsafe functions

2018-05-11 Thread Greg KH
On Sat, May 12, 2018 at 09:42:21AM +0900, Masami Hiramatsu wrote:
> Hi Greg,
> 
> Could you pick this series to stable?



This is not the correct way to submit patches for inclusion in the
stable kernel tree.  Please read:
https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
for how to do this properly.



thanks,

greg k-h


linux-next: added the vfs-fixes tree

2018-05-11 Thread Stephen Rothwell
Hi Al,

As requested I have added the vfs-fixes tree
(git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git#fixes) to
linux-next from Monday.

Thanks for adding your subsystem tree as a participant of linux-next.  As
you may know, this is not a judgement of your code.  The purpose of
linux-next is for integration testing and to lower the impact of
conflicts between subsystems in the next merge window. 

You will need to ensure that the patches/commits in your tree/series have
been:
 * submitted under GPL v2 (or later) and include the Contributor's
Signed-off-by,
 * posted to the relevant mailing list,
 * reviewed by you (or another maintainer of your subsystem tree),
 * successfully unit tested, and 
 * destined for the current or next Linux merge window.

Basically, this should be just what you would send to Linus (or ask him
to fetch).  It is allowed to be rebased if you deem it necessary.

-- 
Cheers,
Stephen Rothwell 
s...@canb.auug.org.au


pgpn_ZADpWsuU.pgp
Description: OpenPGP digital signature


linux-next: added the vfs-fixes tree

2018-05-11 Thread Stephen Rothwell
Hi Al,

As requested I have added the vfs-fixes tree
(git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git#fixes) to
linux-next from Monday.

Thanks for adding your subsystem tree as a participant of linux-next.  As
you may know, this is not a judgement of your code.  The purpose of
linux-next is for integration testing and to lower the impact of
conflicts between subsystems in the next merge window. 

You will need to ensure that the patches/commits in your tree/series have
been:
 * submitted under GPL v2 (or later) and include the Contributor's
Signed-off-by,
 * posted to the relevant mailing list,
 * reviewed by you (or another maintainer of your subsystem tree),
 * successfully unit tested, and 
 * destined for the current or next Linux merge window.

Basically, this should be just what you would send to Linus (or ask him
to fetch).  It is allowed to be rebased if you deem it necessary.

-- 
Cheers,
Stephen Rothwell 
s...@canb.auug.org.au


pgpn_ZADpWsuU.pgp
Description: OpenPGP digital signature


Re: [Ksummit-discuss] bug-introducing patches

2018-05-11 Thread Stephen Rothwell
Hi David,

On Fri, 11 May 2018 10:47:01 +0200 David Sterba  wrote:
>
> Please add
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git next-fixes

Added from Monday (as btrfs-fixes).

Thanks for adding your subsystem tree as a participant of linux-next.  As
you may know, this is not a judgement of your code.  The purpose of
linux-next is for integration testing and to lower the impact of
conflicts between subsystems in the next merge window. 

You will need to ensure that the patches/commits in your tree/series have
been:
 * submitted under GPL v2 (or later) and include the Contributor's
Signed-off-by,
 * posted to the relevant mailing list,
 * reviewed by you (or another maintainer of your subsystem tree),
 * successfully unit tested, and 
 * destined for the current or next Linux merge window.

Basically, this should be just what you would send to Linus (or ask him
to fetch).  It is allowed to be rebased if you deem it necessary.

-- 
Cheers,
Stephen Rothwell 
s...@canb.auug.org.au


pgpRAzEmZx0E1.pgp
Description: OpenPGP digital signature


Re: [Ksummit-discuss] bug-introducing patches

2018-05-11 Thread Stephen Rothwell
Hi David,

On Fri, 11 May 2018 10:47:01 +0200 David Sterba  wrote:
>
> Please add
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git next-fixes

Added from Monday (as btrfs-fixes).

Thanks for adding your subsystem tree as a participant of linux-next.  As
you may know, this is not a judgement of your code.  The purpose of
linux-next is for integration testing and to lower the impact of
conflicts between subsystems in the next merge window. 

You will need to ensure that the patches/commits in your tree/series have
been:
 * submitted under GPL v2 (or later) and include the Contributor's
Signed-off-by,
 * posted to the relevant mailing list,
 * reviewed by you (or another maintainer of your subsystem tree),
 * successfully unit tested, and 
 * destined for the current or next Linux merge window.

Basically, this should be just what you would send to Linus (or ask him
to fetch).  It is allowed to be rebased if you deem it necessary.

-- 
Cheers,
Stephen Rothwell 
s...@canb.auug.org.au


pgpRAzEmZx0E1.pgp
Description: OpenPGP digital signature


Re: linux-next: build warning after merge of the mac80211-next tree

2018-05-11 Thread Stephen Rothwell
Hi all,

Just cc'ing the wireless list at Kalle's suggestion.

On Wed, 9 May 2018 14:56:24 +1000 Stephen Rothwell  
wrote:
>
> Hi Johannes,
> 
> After merging the mac80211-next tree, today's linux-next build (arm_multi
> v7_defconfig) produced this warning:
> 
> drivers/net/wireless/marvell/mwifiex/uap_event.c: In function 
> 'mwifiex_process_uap_event':
> drivers/net/wireless/marvell/mwifiex/uap_event.c:333:1: warning: the frame 
> size of 1680 bytes is larger than 1024 bytes [-Wframe-larger-than=]
>  }
>  ^
> drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c: In function 
> 'brcmf_notify_connect_status_ap':
> drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c:5530:1: warning: 
> the frame size of 1680 bytes is larger than 1024 bytes [-Wframe-larger-than=]
>  }
>  ^
> 
> Maybe introduced by commit
> 
>   52539ca89f36 ("cfg80211: Expose TXQ stats and parameters to userspace")

-- 
Cheers,
Stephen Rothwell


pgpd0oZ1oEbcI.pgp
Description: OpenPGP digital signature


Re: linux-next: build warning after merge of the mac80211-next tree

2018-05-11 Thread Stephen Rothwell
Hi all,

Just cc'ing the wireless list at Kalle's suggestion.

On Wed, 9 May 2018 14:56:24 +1000 Stephen Rothwell  
wrote:
>
> Hi Johannes,
> 
> After merging the mac80211-next tree, today's linux-next build (arm_multi
> v7_defconfig) produced this warning:
> 
> drivers/net/wireless/marvell/mwifiex/uap_event.c: In function 
> 'mwifiex_process_uap_event':
> drivers/net/wireless/marvell/mwifiex/uap_event.c:333:1: warning: the frame 
> size of 1680 bytes is larger than 1024 bytes [-Wframe-larger-than=]
>  }
>  ^
> drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c: In function 
> 'brcmf_notify_connect_status_ap':
> drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c:5530:1: warning: 
> the frame size of 1680 bytes is larger than 1024 bytes [-Wframe-larger-than=]
>  }
>  ^
> 
> Maybe introduced by commit
> 
>   52539ca89f36 ("cfg80211: Expose TXQ stats and parameters to userspace")

-- 
Cheers,
Stephen Rothwell


pgpd0oZ1oEbcI.pgp
Description: OpenPGP digital signature


Re: linux-next: build warning after merge of the mac80211-next tree

2018-05-11 Thread Stephen Rothwell
Hi Kalle,

On Fri, 11 May 2018 15:20:23 +0300 Kalle Valo  wrote:
>
> Btw Stephen for mac80211 reports it would be a good idea to also cc
> linux-wireless list, in case Johannes is not around etc.

Thanks for the suggestion.  Done.

-- 
Cheers,
Stephen Rothwell


pgpWmdiByugpz.pgp
Description: OpenPGP digital signature


Re: linux-next: build warning after merge of the mac80211-next tree

2018-05-11 Thread Stephen Rothwell
Hi Kalle,

On Fri, 11 May 2018 15:20:23 +0300 Kalle Valo  wrote:
>
> Btw Stephen for mac80211 reports it would be a good idea to also cc
> linux-wireless list, in case Johannes is not around etc.

Thanks for the suggestion.  Done.

-- 
Cheers,
Stephen Rothwell


pgpWmdiByugpz.pgp
Description: OpenPGP digital signature


Fwd: KASAN: use-after-free Write in write_mem

2018-05-11 Thread Kyungtae Kim
-- Forwarded message --
From: Kyungtae Kim 
Date: Fri, May 11, 2018 at 11:38 AM
Subject: KASAN: use-after-free Write in write_mem
To: a...@arndb.de, gre...@linuxfoundation.org, linux-kernel@vger.kernel.org
Cc: Byoungyoung Lee , DaeLyong Jeong



We report the crash:
"KASAN: use-after-free Write in write_mem"

This crash was found in v4.17-rc3. Specifically, memory access (write
operation) is invalid, and it is detected by KASAN.

C repro code:
 https://kiwi.cs.purdue.edu/static/alexkkid-fuzzer/repro-3c6e1.c
kernel config:
 https://kiwi.cs.purdue.edu/static/alexkkid-fuzzer/kernel-config-v4.17-rc3

Crash log:

Write of size 4096 at addr 8801 by task syz-executor1/3358

CPU: 0 PID: 3358 Comm: syz-executor1 Not tainted 4.17.0-rc3 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0xc7/0x138 lib/dump_stack.c:113
 print_address_description+0x6a/0x280 mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report+0x22f/0x350 mm/kasan/report.c:412
 check_memory_region_inline mm/kasan/kasan.c:260 [inline]
 check_memory_region+0x13b/0x1a0 mm/kasan/kasan.c:267
 kasan_check_write+0x14/0x20 mm/kasan/kasan.c:278
 _copy_from_user+0xb7/0x100 lib/usercopy.c:12
 copy_from_user include/linux/uaccess.h:147 [inline]
 write_mem+0x8f/0x190 drivers/char/mem.c:240
 __vfs_write+0x10d/0x610 fs/read_write.c:485
 vfs_write+0x187/0x500 fs/read_write.c:549
 ksys_write+0xd4/0x1a0 fs/read_write.c:598
 __do_sys_write fs/read_write.c:610 [inline]
 __se_sys_write fs/read_write.c:607 [inline]
 __x64_sys_write+0x73/0xb0 fs/read_write.c:607
 do_syscall_64+0xa4/0x460 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4497b9
RSP: 002b:7f6f7f254c68 EFLAGS: 0246 ORIG_RAX: 0001
RAX: ffda RBX: 7f6f7f2556cc RCX: 004497b9
RDX: ffad RSI: 2000 RDI: 0013
RBP: 0071bea0 R08:  R09: 
R10:  R11: 0246 R12: 
R13: 9ee8 R14: 006f0f88 R15: 7f6f7f255700

The buggy address belongs to the page:
page:ea000400 count:0 mapcount:-127 mapping: index:0x0
flags: 0x0()
raw:    ff80
raw: 88013fff91e0 ea002020 0004 
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 8800ff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 8800ff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>8801: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
   ^
 88010080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 88010100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff


Thanks,
Kyungtae Kim


Fwd: KASAN: use-after-free Write in write_mem

2018-05-11 Thread Kyungtae Kim
-- Forwarded message --
From: Kyungtae Kim 
Date: Fri, May 11, 2018 at 11:38 AM
Subject: KASAN: use-after-free Write in write_mem
To: a...@arndb.de, gre...@linuxfoundation.org, linux-kernel@vger.kernel.org
Cc: Byoungyoung Lee , DaeLyong Jeong



We report the crash:
"KASAN: use-after-free Write in write_mem"

This crash was found in v4.17-rc3. Specifically, memory access (write
operation) is invalid, and it is detected by KASAN.

C repro code:
 https://kiwi.cs.purdue.edu/static/alexkkid-fuzzer/repro-3c6e1.c
kernel config:
 https://kiwi.cs.purdue.edu/static/alexkkid-fuzzer/kernel-config-v4.17-rc3

Crash log:

Write of size 4096 at addr 8801 by task syz-executor1/3358

CPU: 0 PID: 3358 Comm: syz-executor1 Not tainted 4.17.0-rc3 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0xc7/0x138 lib/dump_stack.c:113
 print_address_description+0x6a/0x280 mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report+0x22f/0x350 mm/kasan/report.c:412
 check_memory_region_inline mm/kasan/kasan.c:260 [inline]
 check_memory_region+0x13b/0x1a0 mm/kasan/kasan.c:267
 kasan_check_write+0x14/0x20 mm/kasan/kasan.c:278
 _copy_from_user+0xb7/0x100 lib/usercopy.c:12
 copy_from_user include/linux/uaccess.h:147 [inline]
 write_mem+0x8f/0x190 drivers/char/mem.c:240
 __vfs_write+0x10d/0x610 fs/read_write.c:485
 vfs_write+0x187/0x500 fs/read_write.c:549
 ksys_write+0xd4/0x1a0 fs/read_write.c:598
 __do_sys_write fs/read_write.c:610 [inline]
 __se_sys_write fs/read_write.c:607 [inline]
 __x64_sys_write+0x73/0xb0 fs/read_write.c:607
 do_syscall_64+0xa4/0x460 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4497b9
RSP: 002b:7f6f7f254c68 EFLAGS: 0246 ORIG_RAX: 0001
RAX: ffda RBX: 7f6f7f2556cc RCX: 004497b9
RDX: ffad RSI: 2000 RDI: 0013
RBP: 0071bea0 R08:  R09: 
R10:  R11: 0246 R12: 
R13: 9ee8 R14: 006f0f88 R15: 7f6f7f255700

The buggy address belongs to the page:
page:ea000400 count:0 mapcount:-127 mapping: index:0x0
flags: 0x0()
raw:    ff80
raw: 88013fff91e0 ea002020 0004 
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 8800ff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 8800ff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>8801: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
   ^
 88010080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 88010100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff


Thanks,
Kyungtae Kim


nios2 build: empty_zero_page ?

2018-05-11 Thread Randy Dunlap
Hi,

I bet this has come up before, but my search foo didn't find anything.

When (cross) building nios2 allmodconfig, I am seeing this build error:

ERROR: "empty_zero_page" [net/ceph/libceph.ko] undefined!
ERROR: "empty_zero_page" [fs/exofs/exofs.ko] undefined!
ERROR: "empty_zero_page" [fs/crypto/fscrypto.ko] undefined!
ERROR: "empty_zero_page" [fs/cramfs/cramfs.ko] undefined!
ERROR: "empty_zero_page" [drivers/usb/wusbcore/wusbcore.ko] undefined!
ERROR: "flush_icache_range" [drivers/misc/lkdtm/lkdtm.ko] undefined!
ERROR: "empty_zero_page" [drivers/md/dm-mod.ko] undefined!

and arch/nios2/mm/init.c references empty_zero_page, but I don't see
anywhere that it is defined.

Help?

There are plenty of other build issues also, but they all are about arithmetic
that is sometimes provided by libc functions, e.g.:

ERROR: "__ucmpdi2" [drivers/media/i2c/adv7842.ko] undefined!
ERROR: "__ashrdi3" [drivers/mtd/nand/onenand/onenand.ko] undefined!
ERROR: "__ashldi3" [fs/btrfs/btrfs.ko] undefined!
ERROR: "__lshrdi3" [drivers/mtd/tests/mtd_nandbiterrs.ko] undefined!

thanks,
-- 
~Randy


nios2 build: empty_zero_page ?

2018-05-11 Thread Randy Dunlap
Hi,

I bet this has come up before, but my search foo didn't find anything.

When (cross) building nios2 allmodconfig, I am seeing this build error:

ERROR: "empty_zero_page" [net/ceph/libceph.ko] undefined!
ERROR: "empty_zero_page" [fs/exofs/exofs.ko] undefined!
ERROR: "empty_zero_page" [fs/crypto/fscrypto.ko] undefined!
ERROR: "empty_zero_page" [fs/cramfs/cramfs.ko] undefined!
ERROR: "empty_zero_page" [drivers/usb/wusbcore/wusbcore.ko] undefined!
ERROR: "flush_icache_range" [drivers/misc/lkdtm/lkdtm.ko] undefined!
ERROR: "empty_zero_page" [drivers/md/dm-mod.ko] undefined!

and arch/nios2/mm/init.c references empty_zero_page, but I don't see
anywhere that it is defined.

Help?

There are plenty of other build issues also, but they all are about arithmetic
that is sometimes provided by libc functions, e.g.:

ERROR: "__ucmpdi2" [drivers/media/i2c/adv7842.ko] undefined!
ERROR: "__ashrdi3" [drivers/mtd/nand/onenand/onenand.ko] undefined!
ERROR: "__ashldi3" [fs/btrfs/btrfs.ko] undefined!
ERROR: "__lshrdi3" [drivers/mtd/tests/mtd_nandbiterrs.ko] undefined!

thanks,
-- 
~Randy


Re: [PATCH] rcu: trace: Remove Startedleaf from trace events comment

2018-05-11 Thread Paul E. McKenney
On Fri, May 11, 2018 at 06:29:57PM -0700, Joel Fernandes wrote:
> On Fri, May 11, 2018 at 6:29 PM, Joel Fernandes (Google)
>  wrote:
> >
> > As part of the gp_seq clean up, the Startleaf condition doesn't occur
> > anymore. Remove it from the comment in the trace event file.
> 
> Sorry, I meant here Startedleaf. Let me know if you want me to resend the 
> patch.

Please do, as it saves me making another typo when attempting to fix it.

Thanx, Paul



Re: [PATCH] rcu: trace: Remove Startedleaf from trace events comment

2018-05-11 Thread Paul E. McKenney
On Fri, May 11, 2018 at 06:29:57PM -0700, Joel Fernandes wrote:
> On Fri, May 11, 2018 at 6:29 PM, Joel Fernandes (Google)
>  wrote:
> >
> > As part of the gp_seq clean up, the Startleaf condition doesn't occur
> > anymore. Remove it from the comment in the trace event file.
> 
> Sorry, I meant here Startedleaf. Let me know if you want me to resend the 
> patch.

Please do, as it saves me making another typo when attempting to fix it.

Thanx, Paul



Re: [PATCH v2] rcu: Add comment documenting how rcu_seq_snap works

2018-05-11 Thread Randy Dunlap
On 05/11/2018 07:20 PM, Joel Fernandes (Google) wrote:
> rcu_seq_snap may be tricky for someone looking at it for the first time.
> Lets document how it works with an example to make it easier.
> 
> Signed-off-by: Joel Fernandes (Google) 
> ---
> v2 changes: Corrections as suggested by Randy.
> 
>  kernel/rcu/rcu.h | 24 +++-
>  1 file changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> index 003671825d62..533bc1087371 100644
> --- a/kernel/rcu/rcu.h
> +++ b/kernel/rcu/rcu.h
> @@ -91,7 +91,29 @@ static inline void rcu_seq_end(unsigned long *sp)
>   WRITE_ONCE(*sp, rcu_seq_endval(sp));
>  }
>  
> -/* Take a snapshot of the update side's sequence number. */
> +/*
> + * Take a snapshot of the update side's sequence number.
> + *
> + * This function predicts what the grace period number will be the next
> + * time an RCU callback will be executed, given the current grace period's
> + * number. This can be gp+1 if RCU is idle, or gp+2 if a grace period is
> + * already in progress.
> + *
> + * We do this with a single addition and masking.
> + * For example, if RCU_SEQ_STATE_MASK=1 and the least significant bit (LSB) 
> of
> + * the seq is used to track if a GP is in progress or not, its sufficient if 
> we

  it's

> + * add (2+1) and mask with ~1. Lets see why with an example:

  Let's

I.e., Let's not be so casual with (dropping) apostrophes.
But v3 can wait for other comments. :)

> + *
> + * Say the current seq is 6 which is 0b110 (gp is 3 and state bit is 0).
> + * To get the next GP number, we have to at least add 0b10 to this (0x1 << 1)
> + * to account for the state bit. However, if the current seq is 7 (gp is 3 
> and
> + * state bit is 1), then it means the current grace period is already in
> + * progress so the next time the callback will run is at the end of grace
> + * period number gp+2. To account for the extra +1, we just overflow the LSB 
> by
> + * adding another 0x1 and masking with ~0x1. In case no GP was in progress 
> (RCU
> + * is idle), then the addition of the extra 0x1 and masking will have no
> + * effect. This is calculated as below.
> + */
>  static inline unsigned long rcu_seq_snap(unsigned long *sp)
>  {
>   unsigned long s;
> 


-- 
~Randy


Re: [PATCH v2] rcu: Add comment documenting how rcu_seq_snap works

2018-05-11 Thread Randy Dunlap
On 05/11/2018 07:20 PM, Joel Fernandes (Google) wrote:
> rcu_seq_snap may be tricky for someone looking at it for the first time.
> Lets document how it works with an example to make it easier.
> 
> Signed-off-by: Joel Fernandes (Google) 
> ---
> v2 changes: Corrections as suggested by Randy.
> 
>  kernel/rcu/rcu.h | 24 +++-
>  1 file changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> index 003671825d62..533bc1087371 100644
> --- a/kernel/rcu/rcu.h
> +++ b/kernel/rcu/rcu.h
> @@ -91,7 +91,29 @@ static inline void rcu_seq_end(unsigned long *sp)
>   WRITE_ONCE(*sp, rcu_seq_endval(sp));
>  }
>  
> -/* Take a snapshot of the update side's sequence number. */
> +/*
> + * Take a snapshot of the update side's sequence number.
> + *
> + * This function predicts what the grace period number will be the next
> + * time an RCU callback will be executed, given the current grace period's
> + * number. This can be gp+1 if RCU is idle, or gp+2 if a grace period is
> + * already in progress.
> + *
> + * We do this with a single addition and masking.
> + * For example, if RCU_SEQ_STATE_MASK=1 and the least significant bit (LSB) 
> of
> + * the seq is used to track if a GP is in progress or not, its sufficient if 
> we

  it's

> + * add (2+1) and mask with ~1. Lets see why with an example:

  Let's

I.e., Let's not be so casual with (dropping) apostrophes.
But v3 can wait for other comments. :)

> + *
> + * Say the current seq is 6 which is 0b110 (gp is 3 and state bit is 0).
> + * To get the next GP number, we have to at least add 0b10 to this (0x1 << 1)
> + * to account for the state bit. However, if the current seq is 7 (gp is 3 
> and
> + * state bit is 1), then it means the current grace period is already in
> + * progress so the next time the callback will run is at the end of grace
> + * period number gp+2. To account for the extra +1, we just overflow the LSB 
> by
> + * adding another 0x1 and masking with ~0x1. In case no GP was in progress 
> (RCU
> + * is idle), then the addition of the extra 0x1 and masking will have no
> + * effect. This is calculated as below.
> + */
>  static inline unsigned long rcu_seq_snap(unsigned long *sp)
>  {
>   unsigned long s;
> 


-- 
~Randy


Re: Another NVMe failure, this time with AER info

2018-05-11 Thread Ming Lei
On Sat, May 12, 2018 at 12:57 AM, Bjorn Helgaas  wrote:
> Andrew wrote:
>> A friend of mine has a brand new LG laptop that has intermittent NVMe
>> failures.  They mostly happen during a suspend/resume cycle
>> (apparently during suspend, not resume).  Unlike the earlier
>> Dell/Samsung issue, the NVMe device isn't completely gone -- MMIO
>> reads fail, but PCI configuration space is apparently still there:
>
>> nvme nvme0: controller is down; will reset: CSTS=0x, PCI_STATUS=0x10
>
>> and it comes with a nice AER dump:
>
>> [12720.894411] pcieport :00:1c.0: AER: Multiple Corrected error 
>> received: id=00e0
>> [12720.909747] pcieport :00:1c.0: PCIe Bus Error: severity=Corrected, 
>> type=Physical Layer, id=00e0(Transmitter ID)
>> [12720.909751] pcieport :00:1c.0:   device [8086:9d14] error 
>> status/mask=1001/2000
>> [12720.909754] pcieport :00:1c.0:[ 0] Receiver Error (First)
>> [12720.909756] pcieport :00:1c.0:[12] Replay Timer Timeout
>
> I opened this bugzilla and attached the dmesg and lspci -vv output to
> it: https://bugzilla.kernel.org/show_bug.cgi?id=199695
>
> The root port at 00:1c.0 leads to the NVMe device at 01:00.0 (this is
> nvme0):
>
>   00:1c.0 PCI bridge: Intel Corporation Sunrise Point-LP PCI Express Root 
> Port #5 (rev f1) (prog-if 00 [Normal decode])
> Bus: primary=00, secondary=01, subordinate=01, sec-latency=0
>   01:00.0 Non-Volatile memory controller: Samsung Electronics Co Ltd NVMe SSD 
> Controller SM961/PM961 (prog-if 02 [NVM Express])
> Subsystem: Samsung Electronics Co Ltd Device a801
>
> We reported several corrected errors before the nvme timeout:
>
>   [12750.281158] nvme nvme0: controller is down; will reset: CSTS=0x, 
> PCI_STATUS=0x10
>   [12750.297594] nvme nvme0: I/O 455 QID 2 timeout, disable controller
>   [12750.305196] nvme :01:00.0: enabling device ( -> 0002)
>   [12750.305465] nvme nvme0: Removing after probe failure status: -19
>   [12750.313188] nvme nvme0: I/O 456 QID 2 timeout, disable controller
>   [12750.329152] nvme nvme0: I/O 457 QID 2 timeout, disable controller
>
> The corrected errors are supposedly recovered in hardware without
> software intervention, and AER logs them for informational purposes.
>
> But it seems very likely that these corrected errors are related to
> the nvme timeout: the first corrected errors were logged at
> 12720.894411, nvme_io_timeout defaults to 30 seconds, and the nvme
> timeout was at 12750.281158.

The following patchset might help this issue:

https://marc.info/?l=linux-block=152604179903505=2

--
Ming Lei


Re: Another NVMe failure, this time with AER info

2018-05-11 Thread Ming Lei
On Sat, May 12, 2018 at 12:57 AM, Bjorn Helgaas  wrote:
> Andrew wrote:
>> A friend of mine has a brand new LG laptop that has intermittent NVMe
>> failures.  They mostly happen during a suspend/resume cycle
>> (apparently during suspend, not resume).  Unlike the earlier
>> Dell/Samsung issue, the NVMe device isn't completely gone -- MMIO
>> reads fail, but PCI configuration space is apparently still there:
>
>> nvme nvme0: controller is down; will reset: CSTS=0x, PCI_STATUS=0x10
>
>> and it comes with a nice AER dump:
>
>> [12720.894411] pcieport :00:1c.0: AER: Multiple Corrected error 
>> received: id=00e0
>> [12720.909747] pcieport :00:1c.0: PCIe Bus Error: severity=Corrected, 
>> type=Physical Layer, id=00e0(Transmitter ID)
>> [12720.909751] pcieport :00:1c.0:   device [8086:9d14] error 
>> status/mask=1001/2000
>> [12720.909754] pcieport :00:1c.0:[ 0] Receiver Error (First)
>> [12720.909756] pcieport :00:1c.0:[12] Replay Timer Timeout
>
> I opened this bugzilla and attached the dmesg and lspci -vv output to
> it: https://bugzilla.kernel.org/show_bug.cgi?id=199695
>
> The root port at 00:1c.0 leads to the NVMe device at 01:00.0 (this is
> nvme0):
>
>   00:1c.0 PCI bridge: Intel Corporation Sunrise Point-LP PCI Express Root 
> Port #5 (rev f1) (prog-if 00 [Normal decode])
> Bus: primary=00, secondary=01, subordinate=01, sec-latency=0
>   01:00.0 Non-Volatile memory controller: Samsung Electronics Co Ltd NVMe SSD 
> Controller SM961/PM961 (prog-if 02 [NVM Express])
> Subsystem: Samsung Electronics Co Ltd Device a801
>
> We reported several corrected errors before the nvme timeout:
>
>   [12750.281158] nvme nvme0: controller is down; will reset: CSTS=0x, 
> PCI_STATUS=0x10
>   [12750.297594] nvme nvme0: I/O 455 QID 2 timeout, disable controller
>   [12750.305196] nvme :01:00.0: enabling device ( -> 0002)
>   [12750.305465] nvme nvme0: Removing after probe failure status: -19
>   [12750.313188] nvme nvme0: I/O 456 QID 2 timeout, disable controller
>   [12750.329152] nvme nvme0: I/O 457 QID 2 timeout, disable controller
>
> The corrected errors are supposedly recovered in hardware without
> software intervention, and AER logs them for informational purposes.
>
> But it seems very likely that these corrected errors are related to
> the nvme timeout: the first corrected errors were logged at
> 12720.894411, nvme_io_timeout defaults to 30 seconds, and the nvme
> timeout was at 12750.281158.

The following patchset might help this issue:

https://marc.info/?l=linux-block=152604179903505=2

--
Ming Lei


[PATCH v3 0/2] regulator: add QCOM RPMh regulator driver

2018-05-11 Thread David Collins
This patch series adds a driver and device tree binding documentation for
PMIC regulator control via Resource Power Manager-hardened (RPMh) on some
Qualcomm Technologies, Inc. SoCs such as SDM845.  RPMh is a hardware block
which contains several accelerators which are used to manage various
hardware resources that are shared between the processors of the SoC.  The
final hardware state of a regulator is determined within RPMh by performing
max aggregation of the requests made by all of the processors.

The RPMh regulator driver depends upon the RPMh driver [1] and command DB
driver [2] which are both still undergoing review.  It also depends upon
two recent of_regulator changes: [3] and [4].

Changes since v2 [5]:
 - Replaced '_' with '-' in device tree supply property names
 - Renamed qcom_rpmh-regulator.c to be qcom-rpmh-regulator.c
 - Updated various DT property names to use "microvolt" and "microamp"
 - Moved allowed modes constraint specification out of the driver [4]
 - Replaced rpmh_client with device pointer to match new RPMh API [1]
 - Corrected drms mode threshold checking
 - Initialized voltage_selector to -EINVAL when not specified in DT
 - Added constants for PMIC regulator hardware modes
 - Corrected type sign of mode mapping tables
 - Made variable names for mode arrays plural
 - Simplified Kconfig depends on
 - Removed unnecessary constants and struct fields
 - Added some descriptive comments

Changes since v1 [6]:
 - Addressed review feedback from Doug, Mark, and Stephen
 - Replaced set_voltage()/get_voltage() callbacks with set_voltage_sel()/
   get_voltage_sel()
 - Added set_bypass()/get_bypass() callbacks for BOB pass-through mode
   control
 - Removed top-level PMIC data structures
 - Removed initialization variables from structs and passed them as
   function parameters
 - Removed various comments and error messages
 - Simplified mode handling
 - Refactored per-PMIC rpmh-regulator data specification
 - Simplified probe function
 - Moved header into DT patch
 - Removed redundant property listings from DT binding documentation

[1]: https://lkml.org/lkml/2018/5/9/729
[2]: https://lkml.org/lkml/2018/4/10/714
[3]: https://patchwork.kernel.org/patch/10348629
[4]: https://lkml.org/lkml/2018/5/11/696
[5]: https://lkml.org/lkml/2018/4/13/687
[6]: https://lkml.org/lkml/2018/3/16/1431

David Collins (2):
  regulator: dt-bindings: add QCOM RPMh regulator bindings
  regulator: add QCOM RPMh regulator driver

 .../bindings/regulator/qcom,rpmh-regulator.txt | 208 +
 drivers/regulator/Kconfig  |   9 +
 drivers/regulator/Makefile |   1 +
 drivers/regulator/qcom-rpmh-regulator.c| 925 +
 .../dt-bindings/regulator/qcom,rpmh-regulator.h|  36 +
 5 files changed, 1179 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
 create mode 100644 drivers/regulator/qcom-rpmh-regulator.c
 create mode 100644 include/dt-bindings/regulator/qcom,rpmh-regulator.h

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v3 2/2] regulator: add QCOM RPMh regulator driver

2018-05-11 Thread David Collins
Add the QCOM RPMh regulator driver to manage PMIC regulators
which are controlled via RPMh on some Qualcomm Technologies, Inc.
SoCs.  RPMh is a hardware block which contains several
accelerators which are used to manage various hardware resources
that are shared between the processors of the SoC.  The final
hardware state of a regulator is determined within RPMh by
performing max aggregation of the requests made by all of the
processors.

Add support for PMIC regulator control via the voltage regulator
manager (VRM) and oscillator buffer (XOB) RPMh accelerators.  VRM
supports manipulation of enable state, voltage, mode, and
headroom voltage.  XOB supports manipulation of enable state.

Signed-off-by: David Collins 
---
 drivers/regulator/Kconfig   |   9 +
 drivers/regulator/Makefile  |   1 +
 drivers/regulator/qcom-rpmh-regulator.c | 925 
 3 files changed, 935 insertions(+)
 create mode 100644 drivers/regulator/qcom-rpmh-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 4efae3b..1a69bdc 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -671,6 +671,15 @@ config REGULATOR_QCOM_RPM
  Qualcomm RPM as a module. The module will be named
  "qcom_rpm-regulator".
 
+config REGULATOR_QCOM_RPMH
+   tristate "Qualcomm Technologies, Inc. RPMh regulator driver"
+   depends on QCOM_RPMH || COMPILE_TEST
+   help
+ This driver supports control of PMIC regulators via the RPMh hardware
+ block found on Qualcomm Technologies Inc. SoCs.  RPMh regulator
+ control allows for voting on regulator state between multiple
+ processors within the SoC.
+
 config REGULATOR_QCOM_SMD_RPM
tristate "Qualcomm SMD based RPM regulator driver"
depends on QCOM_SMD_RPM
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index d81fb02..906f048 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_REGULATOR_MT6323)+= mt6323-regulator.o
 obj-$(CONFIG_REGULATOR_MT6380) += mt6380-regulator.o
 obj-$(CONFIG_REGULATOR_MT6397) += mt6397-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_RPM) += qcom_rpm-regulator.o
+obj-$(CONFIG_REGULATOR_QCOM_RPMH) += qcom-rpmh-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_SMD_RPM) += qcom_smd-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_SPMI) += qcom_spmi-regulator.o
 obj-$(CONFIG_REGULATOR_PALMAS) += palmas-regulator.o
diff --git a/drivers/regulator/qcom-rpmh-regulator.c 
b/drivers/regulator/qcom-rpmh-regulator.c
new file mode 100644
index 000..991ecc1
--- /dev/null
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -0,0 +1,925 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, The Linux Foundation. All rights reserved. */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+
+/**
+ * enum rpmh_regulator_type - supported RPMh accelerator types
+ * %VRM:   RPMh VRM accelerator which supports voting on enable, voltage,
+ * mode, and headroom voltage of LDO, SMPS, and BOB type PMIC
+ * regulators.
+ * %XOB:   RPMh XOB accelerator which supports voting on the enable state
+ * of PMIC regulators.
+ */
+enum rpmh_regulator_type {
+   VRM,
+   XOB,
+};
+
+#define RPMH_VRM_HEADROOM_MAX_UV   511000
+
+#define RPMH_REGULATOR_REG_VRM_VOLTAGE 0x0
+#define RPMH_REGULATOR_REG_ENABLE  0x4
+#define RPMH_REGULATOR_REG_VRM_MODE0x8
+#define RPMH_REGULATOR_REG_VRM_HEADROOM0xC
+
+#define RPMH_REGULATOR_MODE_COUNT  4
+
+#define PMIC4_LDO_MODE_RETENTION   4
+#define PMIC4_LDO_MODE_LPM 5
+#define PMIC4_LDO_MODE_HPM 7
+
+#define PMIC4_SMPS_MODE_RETENTION  4
+#define PMIC4_SMPS_MODE_PFM5
+#define PMIC4_SMPS_MODE_AUTO   6
+#define PMIC4_SMPS_MODE_PWM7
+
+#define PMIC4_BOB_MODE_PASS0
+#define PMIC4_BOB_MODE_PFM 1
+#define PMIC4_BOB_MODE_AUTO2
+#define PMIC4_BOB_MODE_PWM 3
+
+/**
+ * struct rpmh_vreg_hw_data - RPMh regulator hardware configurations
+ * @regulator_type:RPMh accelerator type used to manage this
+ * regulator
+ * @ops:   Pointer to regulator ops callback structure
+ * @voltage_range: The single range of voltages supported by this
+ * PMIC regulator type
+ * @n_voltages:The number of unique voltage set points 
defined
+ * by voltage_range
+ * @pmic_mode_map: Array indexed by regulator framework mode
+ *  

[PATCH v3 0/2] regulator: add QCOM RPMh regulator driver

2018-05-11 Thread David Collins
This patch series adds a driver and device tree binding documentation for
PMIC regulator control via Resource Power Manager-hardened (RPMh) on some
Qualcomm Technologies, Inc. SoCs such as SDM845.  RPMh is a hardware block
which contains several accelerators which are used to manage various
hardware resources that are shared between the processors of the SoC.  The
final hardware state of a regulator is determined within RPMh by performing
max aggregation of the requests made by all of the processors.

The RPMh regulator driver depends upon the RPMh driver [1] and command DB
driver [2] which are both still undergoing review.  It also depends upon
two recent of_regulator changes: [3] and [4].

Changes since v2 [5]:
 - Replaced '_' with '-' in device tree supply property names
 - Renamed qcom_rpmh-regulator.c to be qcom-rpmh-regulator.c
 - Updated various DT property names to use "microvolt" and "microamp"
 - Moved allowed modes constraint specification out of the driver [4]
 - Replaced rpmh_client with device pointer to match new RPMh API [1]
 - Corrected drms mode threshold checking
 - Initialized voltage_selector to -EINVAL when not specified in DT
 - Added constants for PMIC regulator hardware modes
 - Corrected type sign of mode mapping tables
 - Made variable names for mode arrays plural
 - Simplified Kconfig depends on
 - Removed unnecessary constants and struct fields
 - Added some descriptive comments

Changes since v1 [6]:
 - Addressed review feedback from Doug, Mark, and Stephen
 - Replaced set_voltage()/get_voltage() callbacks with set_voltage_sel()/
   get_voltage_sel()
 - Added set_bypass()/get_bypass() callbacks for BOB pass-through mode
   control
 - Removed top-level PMIC data structures
 - Removed initialization variables from structs and passed them as
   function parameters
 - Removed various comments and error messages
 - Simplified mode handling
 - Refactored per-PMIC rpmh-regulator data specification
 - Simplified probe function
 - Moved header into DT patch
 - Removed redundant property listings from DT binding documentation

[1]: https://lkml.org/lkml/2018/5/9/729
[2]: https://lkml.org/lkml/2018/4/10/714
[3]: https://patchwork.kernel.org/patch/10348629
[4]: https://lkml.org/lkml/2018/5/11/696
[5]: https://lkml.org/lkml/2018/4/13/687
[6]: https://lkml.org/lkml/2018/3/16/1431

David Collins (2):
  regulator: dt-bindings: add QCOM RPMh regulator bindings
  regulator: add QCOM RPMh regulator driver

 .../bindings/regulator/qcom,rpmh-regulator.txt | 208 +
 drivers/regulator/Kconfig  |   9 +
 drivers/regulator/Makefile |   1 +
 drivers/regulator/qcom-rpmh-regulator.c| 925 +
 .../dt-bindings/regulator/qcom,rpmh-regulator.h|  36 +
 5 files changed, 1179 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
 create mode 100644 drivers/regulator/qcom-rpmh-regulator.c
 create mode 100644 include/dt-bindings/regulator/qcom,rpmh-regulator.h

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v3 2/2] regulator: add QCOM RPMh regulator driver

2018-05-11 Thread David Collins
Add the QCOM RPMh regulator driver to manage PMIC regulators
which are controlled via RPMh on some Qualcomm Technologies, Inc.
SoCs.  RPMh is a hardware block which contains several
accelerators which are used to manage various hardware resources
that are shared between the processors of the SoC.  The final
hardware state of a regulator is determined within RPMh by
performing max aggregation of the requests made by all of the
processors.

Add support for PMIC regulator control via the voltage regulator
manager (VRM) and oscillator buffer (XOB) RPMh accelerators.  VRM
supports manipulation of enable state, voltage, mode, and
headroom voltage.  XOB supports manipulation of enable state.

Signed-off-by: David Collins 
---
 drivers/regulator/Kconfig   |   9 +
 drivers/regulator/Makefile  |   1 +
 drivers/regulator/qcom-rpmh-regulator.c | 925 
 3 files changed, 935 insertions(+)
 create mode 100644 drivers/regulator/qcom-rpmh-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 4efae3b..1a69bdc 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -671,6 +671,15 @@ config REGULATOR_QCOM_RPM
  Qualcomm RPM as a module. The module will be named
  "qcom_rpm-regulator".
 
+config REGULATOR_QCOM_RPMH
+   tristate "Qualcomm Technologies, Inc. RPMh regulator driver"
+   depends on QCOM_RPMH || COMPILE_TEST
+   help
+ This driver supports control of PMIC regulators via the RPMh hardware
+ block found on Qualcomm Technologies Inc. SoCs.  RPMh regulator
+ control allows for voting on regulator state between multiple
+ processors within the SoC.
+
 config REGULATOR_QCOM_SMD_RPM
tristate "Qualcomm SMD based RPM regulator driver"
depends on QCOM_SMD_RPM
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index d81fb02..906f048 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_REGULATOR_MT6323)+= mt6323-regulator.o
 obj-$(CONFIG_REGULATOR_MT6380) += mt6380-regulator.o
 obj-$(CONFIG_REGULATOR_MT6397) += mt6397-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_RPM) += qcom_rpm-regulator.o
+obj-$(CONFIG_REGULATOR_QCOM_RPMH) += qcom-rpmh-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_SMD_RPM) += qcom_smd-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_SPMI) += qcom_spmi-regulator.o
 obj-$(CONFIG_REGULATOR_PALMAS) += palmas-regulator.o
diff --git a/drivers/regulator/qcom-rpmh-regulator.c 
b/drivers/regulator/qcom-rpmh-regulator.c
new file mode 100644
index 000..991ecc1
--- /dev/null
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -0,0 +1,925 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, The Linux Foundation. All rights reserved. */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include 
+
+/**
+ * enum rpmh_regulator_type - supported RPMh accelerator types
+ * %VRM:   RPMh VRM accelerator which supports voting on enable, voltage,
+ * mode, and headroom voltage of LDO, SMPS, and BOB type PMIC
+ * regulators.
+ * %XOB:   RPMh XOB accelerator which supports voting on the enable state
+ * of PMIC regulators.
+ */
+enum rpmh_regulator_type {
+   VRM,
+   XOB,
+};
+
+#define RPMH_VRM_HEADROOM_MAX_UV   511000
+
+#define RPMH_REGULATOR_REG_VRM_VOLTAGE 0x0
+#define RPMH_REGULATOR_REG_ENABLE  0x4
+#define RPMH_REGULATOR_REG_VRM_MODE0x8
+#define RPMH_REGULATOR_REG_VRM_HEADROOM0xC
+
+#define RPMH_REGULATOR_MODE_COUNT  4
+
+#define PMIC4_LDO_MODE_RETENTION   4
+#define PMIC4_LDO_MODE_LPM 5
+#define PMIC4_LDO_MODE_HPM 7
+
+#define PMIC4_SMPS_MODE_RETENTION  4
+#define PMIC4_SMPS_MODE_PFM5
+#define PMIC4_SMPS_MODE_AUTO   6
+#define PMIC4_SMPS_MODE_PWM7
+
+#define PMIC4_BOB_MODE_PASS0
+#define PMIC4_BOB_MODE_PFM 1
+#define PMIC4_BOB_MODE_AUTO2
+#define PMIC4_BOB_MODE_PWM 3
+
+/**
+ * struct rpmh_vreg_hw_data - RPMh regulator hardware configurations
+ * @regulator_type:RPMh accelerator type used to manage this
+ * regulator
+ * @ops:   Pointer to regulator ops callback structure
+ * @voltage_range: The single range of voltages supported by this
+ * PMIC regulator type
+ * @n_voltages:The number of unique voltage set points 
defined
+ * by voltage_range
+ * @pmic_mode_map: Array indexed by regulator framework mode
+ * containing PMIC 

[PATCH v3 1/2] regulator: dt-bindings: add QCOM RPMh regulator bindings

2018-05-11 Thread David Collins
Introduce bindings for RPMh regulator devices found on some
Qualcomm Technlogies, Inc. SoCs.  These devices allow a given
processor within the SoC to make PMIC regulator requests which
are aggregated within the RPMh hardware block along with requests
from other processors in the SoC to determine the final PMIC
regulator hardware state.

Signed-off-by: David Collins 
---
 .../bindings/regulator/qcom,rpmh-regulator.txt | 208 +
 .../dt-bindings/regulator/qcom,rpmh-regulator.h|  36 
 2 files changed, 244 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
 create mode 100644 include/dt-bindings/regulator/qcom,rpmh-regulator.h

diff --git 
a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt 
b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
new file mode 100644
index 000..ad2185e
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
@@ -0,0 +1,208 @@
+Qualcomm Technologies, Inc. RPMh Regulators
+
+rpmh-regulator devices support PMIC regulator management via the Voltage
+Regulator Manager (VRM) and Oscillator Buffer (XOB) RPMh accelerators.  The 
APPS
+processor communicates with these hardware blocks via a Resource State
+Coordinator (RSC) using command packets.  The VRM allows changing four
+parameters for a given regulator: enable state, output voltage, operating mode,
+and minimum headroom voltage.  The XOB allows changing only a single parameter
+for a given regulator: its enable state.  Despite its name, the XOB is capable
+of controlling the enable state of any PMIC peripheral.  It is used for clock
+buffers, low-voltage switches, and LDO/SMPS regulators which have a fixed
+voltage and mode.
+
+===
+Required Node Structure
+===
+
+RPMh regulators must be described in two levels of device nodes.  The first
+level describes the PMIC containing the regulators and must reside within an
+RPMh device node.  The second level describes each regulator within the PMIC
+which is to be used on the board.  Each of these regulators maps to a single
+RPMh resource.
+
+The names used for regulator nodes must match those supported by a given PMIC.
+Supported regulator node names:
+   PM8998: smps1 - smps13, ldo1 - ldo28, lvs1 - lvs2
+   PMI8998:bob
+   PM8005: smps1 - smps4
+
+
+First Level Nodes - PMIC
+
+
+- compatible
+   Usage:  required
+   Value type: 
+   Definition: Must be one of: "qcom,pm8998-rpmh-regulators",
+   "qcom,pmi8998-rpmh-regulators" or
+   "qcom,pm8005-rpmh-regulators".
+
+- qcom,pmic-id
+   Usage:  required
+   Value type: 
+   Definition: RPMh resource name suffix used for the regulators found on
+   this PMIC.  Typical values: "a", "b", "c", "d", "e", "f".
+
+- vdd-s1-supply
+- vdd-s2-supply
+- vdd-s3-supply
+- vdd-s4-supply
+   Usage:  optional (PM8998 and PM8005 only)
+   Value type: 
+   Definition: phandle of the parent supply regulator of one or more of the
+   regulators for this PMIC.
+
+- vdd-s5-supply
+- vdd-s6-supply
+- vdd-s7-supply
+- vdd-s8-supply
+- vdd-s9-supply
+- vdd-s10-supply
+- vdd-s11-supply
+- vdd-s12-supply
+- vdd-s13-supply
+- vdd-l1-l27-supply
+- vdd-l2-l8-l17-supply
+- vdd-l3-l11-supply
+- vdd-l4-l5-supply
+- vdd-l6-supply
+- vdd-l7-l12-l14-l15-supply
+- vdd-l9-supply
+- vdd-l10-l23-l25-supply
+- vdd-l13-l19-l21-supply
+- vdd-l16-l28-supply
+- vdd-l18-l22-supply
+- vdd-l20-l24-supply
+- vdd-l26-supply
+- vin-lvs-1-2-supply
+   Usage:  optional (PM8998 only)
+   Value type: 
+   Definition: phandle of the parent supply regulator of one or more of the
+   regulators for this PMIC.
+
+- vdd-bob-supply
+   Usage:  optional (PMI8998 only)
+   Value type: 
+   Definition: BOB regulator parent supply phandle
+
+===
+Second Level Nodes - Regulators
+===
+
+- qcom,regulator-initial-microvolt
+   Usage:  optional; VRM regulators only
+   Value type: 
+   Definition: Specifies the initial voltage in microvolts to request for a
+   VRM regulator.
+
+- regulator-initial-mode
+   Usage:  optional; VRM regulators only
+   Value type: 
+   Definition: Specifies the initial mode to request for a VRM regulator.
+   Supported values are RPMH_REGULATOR_MODE_* which are defined
+   in [1] (i.e. 0 to 3).  This property may be specified even
+   if the regulator-allow-set-load property is not specified.
+
+- qcom,allowed-drms-modes
+   Usage:  required if regulator-allow-set-load is specified;
+   VRM regulators only
+   Value type: 
+   Definition: A 

[PATCH v3 1/2] regulator: dt-bindings: add QCOM RPMh regulator bindings

2018-05-11 Thread David Collins
Introduce bindings for RPMh regulator devices found on some
Qualcomm Technlogies, Inc. SoCs.  These devices allow a given
processor within the SoC to make PMIC regulator requests which
are aggregated within the RPMh hardware block along with requests
from other processors in the SoC to determine the final PMIC
regulator hardware state.

Signed-off-by: David Collins 
---
 .../bindings/regulator/qcom,rpmh-regulator.txt | 208 +
 .../dt-bindings/regulator/qcom,rpmh-regulator.h|  36 
 2 files changed, 244 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
 create mode 100644 include/dt-bindings/regulator/qcom,rpmh-regulator.h

diff --git 
a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt 
b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
new file mode 100644
index 000..ad2185e
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
@@ -0,0 +1,208 @@
+Qualcomm Technologies, Inc. RPMh Regulators
+
+rpmh-regulator devices support PMIC regulator management via the Voltage
+Regulator Manager (VRM) and Oscillator Buffer (XOB) RPMh accelerators.  The 
APPS
+processor communicates with these hardware blocks via a Resource State
+Coordinator (RSC) using command packets.  The VRM allows changing four
+parameters for a given regulator: enable state, output voltage, operating mode,
+and minimum headroom voltage.  The XOB allows changing only a single parameter
+for a given regulator: its enable state.  Despite its name, the XOB is capable
+of controlling the enable state of any PMIC peripheral.  It is used for clock
+buffers, low-voltage switches, and LDO/SMPS regulators which have a fixed
+voltage and mode.
+
+===
+Required Node Structure
+===
+
+RPMh regulators must be described in two levels of device nodes.  The first
+level describes the PMIC containing the regulators and must reside within an
+RPMh device node.  The second level describes each regulator within the PMIC
+which is to be used on the board.  Each of these regulators maps to a single
+RPMh resource.
+
+The names used for regulator nodes must match those supported by a given PMIC.
+Supported regulator node names:
+   PM8998: smps1 - smps13, ldo1 - ldo28, lvs1 - lvs2
+   PMI8998:bob
+   PM8005: smps1 - smps4
+
+
+First Level Nodes - PMIC
+
+
+- compatible
+   Usage:  required
+   Value type: 
+   Definition: Must be one of: "qcom,pm8998-rpmh-regulators",
+   "qcom,pmi8998-rpmh-regulators" or
+   "qcom,pm8005-rpmh-regulators".
+
+- qcom,pmic-id
+   Usage:  required
+   Value type: 
+   Definition: RPMh resource name suffix used for the regulators found on
+   this PMIC.  Typical values: "a", "b", "c", "d", "e", "f".
+
+- vdd-s1-supply
+- vdd-s2-supply
+- vdd-s3-supply
+- vdd-s4-supply
+   Usage:  optional (PM8998 and PM8005 only)
+   Value type: 
+   Definition: phandle of the parent supply regulator of one or more of the
+   regulators for this PMIC.
+
+- vdd-s5-supply
+- vdd-s6-supply
+- vdd-s7-supply
+- vdd-s8-supply
+- vdd-s9-supply
+- vdd-s10-supply
+- vdd-s11-supply
+- vdd-s12-supply
+- vdd-s13-supply
+- vdd-l1-l27-supply
+- vdd-l2-l8-l17-supply
+- vdd-l3-l11-supply
+- vdd-l4-l5-supply
+- vdd-l6-supply
+- vdd-l7-l12-l14-l15-supply
+- vdd-l9-supply
+- vdd-l10-l23-l25-supply
+- vdd-l13-l19-l21-supply
+- vdd-l16-l28-supply
+- vdd-l18-l22-supply
+- vdd-l20-l24-supply
+- vdd-l26-supply
+- vin-lvs-1-2-supply
+   Usage:  optional (PM8998 only)
+   Value type: 
+   Definition: phandle of the parent supply regulator of one or more of the
+   regulators for this PMIC.
+
+- vdd-bob-supply
+   Usage:  optional (PMI8998 only)
+   Value type: 
+   Definition: BOB regulator parent supply phandle
+
+===
+Second Level Nodes - Regulators
+===
+
+- qcom,regulator-initial-microvolt
+   Usage:  optional; VRM regulators only
+   Value type: 
+   Definition: Specifies the initial voltage in microvolts to request for a
+   VRM regulator.
+
+- regulator-initial-mode
+   Usage:  optional; VRM regulators only
+   Value type: 
+   Definition: Specifies the initial mode to request for a VRM regulator.
+   Supported values are RPMH_REGULATOR_MODE_* which are defined
+   in [1] (i.e. 0 to 3).  This property may be specified even
+   if the regulator-allow-set-load property is not specified.
+
+- qcom,allowed-drms-modes
+   Usage:  required if regulator-allow-set-load is specified;
+   VRM regulators only
+   Value type: 
+   Definition: A list of integers specifying 

[PATCH v2] rcu: Add comment documenting how rcu_seq_snap works

2018-05-11 Thread Joel Fernandes (Google)
rcu_seq_snap may be tricky for someone looking at it for the first time.
Lets document how it works with an example to make it easier.

Signed-off-by: Joel Fernandes (Google) 
---
v2 changes: Corrections as suggested by Randy.

 kernel/rcu/rcu.h | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 003671825d62..533bc1087371 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -91,7 +91,29 @@ static inline void rcu_seq_end(unsigned long *sp)
WRITE_ONCE(*sp, rcu_seq_endval(sp));
 }
 
-/* Take a snapshot of the update side's sequence number. */
+/*
+ * Take a snapshot of the update side's sequence number.
+ *
+ * This function predicts what the grace period number will be the next
+ * time an RCU callback will be executed, given the current grace period's
+ * number. This can be gp+1 if RCU is idle, or gp+2 if a grace period is
+ * already in progress.
+ *
+ * We do this with a single addition and masking.
+ * For example, if RCU_SEQ_STATE_MASK=1 and the least significant bit (LSB) of
+ * the seq is used to track if a GP is in progress or not, its sufficient if we
+ * add (2+1) and mask with ~1. Lets see why with an example:
+ *
+ * Say the current seq is 6 which is 0b110 (gp is 3 and state bit is 0).
+ * To get the next GP number, we have to at least add 0b10 to this (0x1 << 1)
+ * to account for the state bit. However, if the current seq is 7 (gp is 3 and
+ * state bit is 1), then it means the current grace period is already in
+ * progress so the next time the callback will run is at the end of grace
+ * period number gp+2. To account for the extra +1, we just overflow the LSB by
+ * adding another 0x1 and masking with ~0x1. In case no GP was in progress (RCU
+ * is idle), then the addition of the extra 0x1 and masking will have no
+ * effect. This is calculated as below.
+ */
 static inline unsigned long rcu_seq_snap(unsigned long *sp)
 {
unsigned long s;
-- 
2.17.0.441.gb46fe60e1d-goog


[PATCH v2] rcu: Add comment documenting how rcu_seq_snap works

2018-05-11 Thread Joel Fernandes (Google)
rcu_seq_snap may be tricky for someone looking at it for the first time.
Lets document how it works with an example to make it easier.

Signed-off-by: Joel Fernandes (Google) 
---
v2 changes: Corrections as suggested by Randy.

 kernel/rcu/rcu.h | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 003671825d62..533bc1087371 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -91,7 +91,29 @@ static inline void rcu_seq_end(unsigned long *sp)
WRITE_ONCE(*sp, rcu_seq_endval(sp));
 }
 
-/* Take a snapshot of the update side's sequence number. */
+/*
+ * Take a snapshot of the update side's sequence number.
+ *
+ * This function predicts what the grace period number will be the next
+ * time an RCU callback will be executed, given the current grace period's
+ * number. This can be gp+1 if RCU is idle, or gp+2 if a grace period is
+ * already in progress.
+ *
+ * We do this with a single addition and masking.
+ * For example, if RCU_SEQ_STATE_MASK=1 and the least significant bit (LSB) of
+ * the seq is used to track if a GP is in progress or not, its sufficient if we
+ * add (2+1) and mask with ~1. Lets see why with an example:
+ *
+ * Say the current seq is 6 which is 0b110 (gp is 3 and state bit is 0).
+ * To get the next GP number, we have to at least add 0b10 to this (0x1 << 1)
+ * to account for the state bit. However, if the current seq is 7 (gp is 3 and
+ * state bit is 1), then it means the current grace period is already in
+ * progress so the next time the callback will run is at the end of grace
+ * period number gp+2. To account for the extra +1, we just overflow the LSB by
+ * adding another 0x1 and masking with ~0x1. In case no GP was in progress (RCU
+ * is idle), then the addition of the extra 0x1 and masking will have no
+ * effect. This is calculated as below.
+ */
 static inline unsigned long rcu_seq_snap(unsigned long *sp)
 {
unsigned long s;
-- 
2.17.0.441.gb46fe60e1d-goog


[PATCH] rcu: Add comment documenting how rcu_seq_snap works

2018-05-11 Thread Randy Dunlap
On 05/11/2018 05:33 PM, Joel Fernandes (Google) wrote:
> rcu_seq_snap may be tricky for someone looking at it for the first time.
> Lets document how it works with an example to make it easier.
> 
> Signed-off-by: Joel Fernandes (Google) 
> ---
>  kernel/rcu/rcu.h | 23 ++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> index 003671825d62..004ace3d22c2 100644
> --- a/kernel/rcu/rcu.h
> +++ b/kernel/rcu/rcu.h
> @@ -91,7 +91,28 @@ static inline void rcu_seq_end(unsigned long *sp)
>   WRITE_ONCE(*sp, rcu_seq_endval(sp));
>  }
>  
> -/* Take a snapshot of the update side's sequence number. */
> +/*
> + * Take a snapshot of the update side's sequence number.
> + *
> + * This function predicts what the grace period number will be the next
> + * time an RCU callback will be executed, given the current grace period's
> + * number. This can be gp+1 if RCU is idle, or gp+2 if a grace period is
> + * already in progress.
> + *
> + * We do this with a single addition and masking.
> + * For example, if RCU_SEQ_STATE_MASK=1 and the least significant bit (LSB) 
> of
> + * the seq is used to track if a GP is in progress or not, its sufficient if 
> we
> + * add (2+1) and mask with ~1. Lets see why with an example:
> + *
> + * Say the current seq is 6 which is 0x110 (gp is 3 and state bit is 0).

0b110
   or   0x6

> + * To get the next GP number, we have to atleast add 0x10 to this (0x1 << 1) 
> to

at least add 0b10

> + * account for the state bit. However, if the current seq is 7 (GP num is 3
> + * and state bit is 1), then it means the current grace period is already
> + * in progress so the next the callback will run is at gp+2. To account for

  so the next time? the callback will run

> + * the extra +1, we just overflow the LSB by adding another 0x1 and masking
> + * with ~0x1. Incase no GP was in progress (RCU is idle), then the adding

 In case

> + * by 0x1 and masking will have no effect. This is calculated as below.
> + */
>  static inline unsigned long rcu_seq_snap(unsigned long *sp)
>  {
>   unsigned long s;
> 


-- 
~Randy


[PATCH] rcu: Add comment documenting how rcu_seq_snap works

2018-05-11 Thread Randy Dunlap
On 05/11/2018 05:33 PM, Joel Fernandes (Google) wrote:
> rcu_seq_snap may be tricky for someone looking at it for the first time.
> Lets document how it works with an example to make it easier.
> 
> Signed-off-by: Joel Fernandes (Google) 
> ---
>  kernel/rcu/rcu.h | 23 ++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
> index 003671825d62..004ace3d22c2 100644
> --- a/kernel/rcu/rcu.h
> +++ b/kernel/rcu/rcu.h
> @@ -91,7 +91,28 @@ static inline void rcu_seq_end(unsigned long *sp)
>   WRITE_ONCE(*sp, rcu_seq_endval(sp));
>  }
>  
> -/* Take a snapshot of the update side's sequence number. */
> +/*
> + * Take a snapshot of the update side's sequence number.
> + *
> + * This function predicts what the grace period number will be the next
> + * time an RCU callback will be executed, given the current grace period's
> + * number. This can be gp+1 if RCU is idle, or gp+2 if a grace period is
> + * already in progress.
> + *
> + * We do this with a single addition and masking.
> + * For example, if RCU_SEQ_STATE_MASK=1 and the least significant bit (LSB) 
> of
> + * the seq is used to track if a GP is in progress or not, its sufficient if 
> we
> + * add (2+1) and mask with ~1. Lets see why with an example:
> + *
> + * Say the current seq is 6 which is 0x110 (gp is 3 and state bit is 0).

0b110
   or   0x6

> + * To get the next GP number, we have to atleast add 0x10 to this (0x1 << 1) 
> to

at least add 0b10

> + * account for the state bit. However, if the current seq is 7 (GP num is 3
> + * and state bit is 1), then it means the current grace period is already
> + * in progress so the next the callback will run is at gp+2. To account for

  so the next time? the callback will run

> + * the extra +1, we just overflow the LSB by adding another 0x1 and masking
> + * with ~0x1. Incase no GP was in progress (RCU is idle), then the adding

 In case

> + * by 0x1 and masking will have no effect. This is calculated as below.
> + */
>  static inline unsigned long rcu_seq_snap(unsigned long *sp)
>  {
>   unsigned long s;
> 


-- 
~Randy


[PATCH 1/2] regulator: of: add property for allowed modes specification

2018-05-11 Thread David Collins
Add a common device tree property for regulator nodes to support
the specification of allowed operating modes.

Signed-off-by: David Collins 
---
 Documentation/devicetree/bindings/regulator/regulator.txt | 5 +
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt 
b/Documentation/devicetree/bindings/regulator/regulator.txt
index 2babe15b..c627aa0 100644
--- a/Documentation/devicetree/bindings/regulator/regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/regulator.txt
@@ -59,6 +59,11 @@ Optional properties:
 - regulator-initial-mode: initial operating mode. The set of possible operating
   modes depends on the capabilities of every hardware so each device binding
   documentation explains which values the regulator supports.
+- regulator-allowed-modes: list of operating modes that software is allowed to
+  configure for the regulator at run-time.  Elements may be specified in any
+  order.  The set of possible operating modes depends on the capabilities of
+  every hardware so each device binding document explains which values the
+  regulator supports.
 - regulator-system-load: Load in uA present on regulator that is not captured 
by
   any consumer request.
 - regulator-pull-down: Enable pull down resistor when the regulator is 
disabled.
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 1/2] regulator: of: add property for allowed modes specification

2018-05-11 Thread David Collins
Add a common device tree property for regulator nodes to support
the specification of allowed operating modes.

Signed-off-by: David Collins 
---
 Documentation/devicetree/bindings/regulator/regulator.txt | 5 +
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt 
b/Documentation/devicetree/bindings/regulator/regulator.txt
index 2babe15b..c627aa0 100644
--- a/Documentation/devicetree/bindings/regulator/regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/regulator.txt
@@ -59,6 +59,11 @@ Optional properties:
 - regulator-initial-mode: initial operating mode. The set of possible operating
   modes depends on the capabilities of every hardware so each device binding
   documentation explains which values the regulator supports.
+- regulator-allowed-modes: list of operating modes that software is allowed to
+  configure for the regulator at run-time.  Elements may be specified in any
+  order.  The set of possible operating modes depends on the capabilities of
+  every hardware so each device binding document explains which values the
+  regulator supports.
 - regulator-system-load: Load in uA present on regulator that is not captured 
by
   any consumer request.
 - regulator-pull-down: Enable pull down resistor when the regulator is 
disabled.
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 2/2] regulator: of: add support for allowed modes configuration

2018-05-11 Thread David Collins
Add support for configuring the machine constraints
valid_modes_mask element based on a list of allowed modes
specified via a device tree property.

Signed-off-by: David Collins 
---
 drivers/regulator/of_regulator.c | 29 -
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 0d3f73e..d61fed2 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -32,7 +32,7 @@ static void of_get_regulation_constraints(struct device_node 
*np,
struct regulator_state *suspend_state;
struct device_node *suspend_np;
unsigned int mode;
-   int ret, i;
+   int ret, i, len;
u32 pval;
 
constraints->name = of_get_property(np, "regulator-name", NULL);
@@ -136,6 +136,33 @@ static void of_get_regulation_constraints(struct 
device_node *np,
}
}
 
+   len = of_property_count_elems_of_size(np, "regulator-allowed-modes",
+   sizeof(u32));
+   if (len > 0) {
+   if (desc && desc->of_map_mode) {
+   for (i = 0; i < len; i++) {
+   ret = of_property_read_u32_index(np,
+   "regulator-allowed-modes", i, );
+   if (ret) {
+   pr_err("%s: couldn't read allowed modes 
index %d, ret=%d\n",
+   np->name, i, ret);
+   break;
+   }
+   mode = desc->of_map_mode(pval);
+   if (mode == REGULATOR_MODE_INVALID)
+   pr_err("%s: invalid 
regulator-allowed-modes element %u\n",
+   np->name, pval);
+   else
+   constraints->valid_modes_mask |= mode;
+   }
+   if (constraints->valid_modes_mask)
+   constraints->valid_ops_mask
+   |= REGULATOR_CHANGE_MODE;
+   } else {
+   pr_warn("%s: mode mapping not defined\n", np->name);
+   }
+   }
+
if (!of_property_read_u32(np, "regulator-system-load", ))
constraints->system_load = pval;
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 0/2] regulator: of: add device tree property for allowed modes

2018-05-11 Thread David Collins
There is currently no accepted way to configure constraints->valid_modes_mask
for regulators defined in device tree.  This patch series defines a new
common regulator device tree property, regulator-allowed-modes, which can be
used to specify the set of modes that the regulator is allowed to use.
It also implements parsing for this new property inside of the
of_get_regulation_constraints() function.

David Collins (2):
  regulator: of: add property for allowed modes specification
  regulator: of: add support for allowed modes configuration

 .../devicetree/bindings/regulator/regulator.txt|  5 
 drivers/regulator/of_regulator.c   | 29 +-
 2 files changed, 33 insertions(+), 1 deletion(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 2/2] regulator: of: add support for allowed modes configuration

2018-05-11 Thread David Collins
Add support for configuring the machine constraints
valid_modes_mask element based on a list of allowed modes
specified via a device tree property.

Signed-off-by: David Collins 
---
 drivers/regulator/of_regulator.c | 29 -
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 0d3f73e..d61fed2 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -32,7 +32,7 @@ static void of_get_regulation_constraints(struct device_node 
*np,
struct regulator_state *suspend_state;
struct device_node *suspend_np;
unsigned int mode;
-   int ret, i;
+   int ret, i, len;
u32 pval;
 
constraints->name = of_get_property(np, "regulator-name", NULL);
@@ -136,6 +136,33 @@ static void of_get_regulation_constraints(struct 
device_node *np,
}
}
 
+   len = of_property_count_elems_of_size(np, "regulator-allowed-modes",
+   sizeof(u32));
+   if (len > 0) {
+   if (desc && desc->of_map_mode) {
+   for (i = 0; i < len; i++) {
+   ret = of_property_read_u32_index(np,
+   "regulator-allowed-modes", i, );
+   if (ret) {
+   pr_err("%s: couldn't read allowed modes 
index %d, ret=%d\n",
+   np->name, i, ret);
+   break;
+   }
+   mode = desc->of_map_mode(pval);
+   if (mode == REGULATOR_MODE_INVALID)
+   pr_err("%s: invalid 
regulator-allowed-modes element %u\n",
+   np->name, pval);
+   else
+   constraints->valid_modes_mask |= mode;
+   }
+   if (constraints->valid_modes_mask)
+   constraints->valid_ops_mask
+   |= REGULATOR_CHANGE_MODE;
+   } else {
+   pr_warn("%s: mode mapping not defined\n", np->name);
+   }
+   }
+
if (!of_property_read_u32(np, "regulator-system-load", ))
constraints->system_load = pval;
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 0/2] regulator: of: add device tree property for allowed modes

2018-05-11 Thread David Collins
There is currently no accepted way to configure constraints->valid_modes_mask
for regulators defined in device tree.  This patch series defines a new
common regulator device tree property, regulator-allowed-modes, which can be
used to specify the set of modes that the regulator is allowed to use.
It also implements parsing for this new property inside of the
of_get_regulation_constraints() function.

David Collins (2):
  regulator: of: add property for allowed modes specification
  regulator: of: add support for allowed modes configuration

 .../devicetree/bindings/regulator/regulator.txt|  5 
 drivers/regulator/of_regulator.c   | 29 +-
 2 files changed, 33 insertions(+), 1 deletion(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



Re: [PATCH] rcu: trace: Remove Startedleaf from trace events comment

2018-05-11 Thread Joel Fernandes
On Fri, May 11, 2018 at 6:29 PM, Joel Fernandes (Google)
 wrote:
>
> As part of the gp_seq clean up, the Startleaf condition doesn't occur
> anymore. Remove it from the comment in the trace event file.

Sorry, I meant here Startedleaf. Let me know if you want me to resend the patch.

thanks,

- Joel


Re: [PATCH] rcu: trace: Remove Startedleaf from trace events comment

2018-05-11 Thread Joel Fernandes
On Fri, May 11, 2018 at 6:29 PM, Joel Fernandes (Google)
 wrote:
>
> As part of the gp_seq clean up, the Startleaf condition doesn't occur
> anymore. Remove it from the comment in the trace event file.

Sorry, I meant here Startedleaf. Let me know if you want me to resend the patch.

thanks,

- Joel


[PATCH] rcu: trace: Remove Startedleaf from trace events comment

2018-05-11 Thread Joel Fernandes (Google)
As part of the gp_seq clean up, the Startleaf condition doesn't occur
anymore. Remove it from the comment in the trace event file.

Signed-off-by: Joel Fernandes (Google) 
---
 include/trace/events/rcu.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index ce9d1a1cac78..6d8dd04912d2 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -91,7 +91,6 @@ TRACE_EVENT(rcu_grace_period,
  *
  * "Startleaf": Request a grace period based on leaf-node data.
  * "Prestarted": Someone beat us to the request
- * "Startedleaf": Leaf-node start proved sufficient.
  * "Startedleafroot": Leaf-node start proved sufficient after checking root.
  * "Startedroot": Requested a nocb grace period based on root-node data.
  * "NoGPkthread": The RCU grace-period kthread has not yet started.
-- 
2.17.0.441.gb46fe60e1d-goog


[PATCH] rcu: trace: Remove Startedleaf from trace events comment

2018-05-11 Thread Joel Fernandes (Google)
As part of the gp_seq clean up, the Startleaf condition doesn't occur
anymore. Remove it from the comment in the trace event file.

Signed-off-by: Joel Fernandes (Google) 
---
 include/trace/events/rcu.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index ce9d1a1cac78..6d8dd04912d2 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -91,7 +91,6 @@ TRACE_EVENT(rcu_grace_period,
  *
  * "Startleaf": Request a grace period based on leaf-node data.
  * "Prestarted": Someone beat us to the request
- * "Startedleaf": Leaf-node start proved sufficient.
  * "Startedleafroot": Leaf-node start proved sufficient after checking root.
  * "Startedroot": Requested a nocb grace period based on root-node data.
  * "NoGPkthread": The RCU grace-period kthread has not yet started.
-- 
2.17.0.441.gb46fe60e1d-goog


[RFC] crypto: Remove mcryptd

2018-05-11 Thread Megha Dey
This patch attempts to remove the mcryptd interface and expose the
sha1 multibuffer algorithm as a proper ahash to the inner algorithm.

1. Host the flusher helper in sha1_mb.c instead of mcryptd.c (need to
change the names of these functions)
2. Remove unnecessary mcryptd structure mcryptd_hash_ctx
(combine sha_mb_ctx and mcryptd_hash_ctx)
3. Introduce a new simd_ahash_create_compat() similar to the
simd_skcipher_create_compat() in simd.c. This registers the outer
algorithm. Remove existing outer algorithm.
4. In the outer layer(simd wrapper), pass the right pointers to the
inner algorithm.(will shift 3 and 4 to simd.c later)
5. Remove mcryptd.c
6. Update the name, driver name and priority of inner algorithm.

Herbert,
I would like to know if the above approach is what you are suggesting.
The problem with this approach is there is no async workqueue context
which issues completions. Instead everything runs in a single thread of
execution. You had suggested that the SIMD wrapper will defer the job to
the Kthread context, but I am not sure that will be done.

Please let me know what you think.

Signed-off-by: Megha Dey 
---
 arch/x86/crypto/sha1-mb/sha1_mb.c | 312 +++--
 crypto/Makefile   |   1 -
 crypto/mcryptd.c  | 702 --
 include/crypto/mcryptd.h  |   5 -
 4 files changed, 200 insertions(+), 820 deletions(-)
 delete mode 100644 crypto/mcryptd.c

diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c 
b/arch/x86/crypto/sha1-mb/sha1_mb.c
index acf9fdf..b8c03ce 100644
--- a/arch/x86/crypto/sha1-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb.c
@@ -71,10 +71,62 @@
 
 #define FLUSH_INTERVAL 1000 /* in usec */
 
+static struct crypto_ahash *tfm_compact;
+
+struct mcryptd_flush_list {
+   struct list_head list;
+   struct mutex lock;
+};
+
+static struct mcryptd_flush_list __percpu *mcryptd_flist;
+
+void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long 
delay)
+{
+   struct mcryptd_flush_list *flist;
+
+   if (!cstate->flusher_engaged) {
+   /* put the flusher on the flush list */
+   flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
+   mutex_lock(>lock);
+   list_add_tail(>flush_list, >list);
+   cstate->flusher_engaged = true;
+   cstate->next_flush = jiffies + delay;
+   queue_delayed_work_on(smp_processor_id(), kcrypto_wq,
+   >flush, delay);
+   mutex_unlock(>lock);
+   }
+}
+
+void mcryptd_flusher(struct work_struct *__work)
+{
+   struct  mcryptd_alg_cstate  *alg_cpu_state;
+   struct  mcryptd_alg_state   *alg_state;
+   struct  mcryptd_flush_list  *flist;
+   int cpu;
+
+   cpu = smp_processor_id();
+   alg_cpu_state = container_of(to_delayed_work(__work),
+   struct mcryptd_alg_cstate, flush);
+   alg_state = alg_cpu_state->alg_state;
+   if (alg_cpu_state->cpu != cpu)
+   pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n",
+   cpu, alg_cpu_state->cpu);
+
+   if (alg_cpu_state->flusher_engaged) {
+   flist = per_cpu_ptr(mcryptd_flist, cpu);
+   mutex_lock(>lock);
+   list_del(_cpu_state->flush_list);
+   alg_cpu_state->flusher_engaged = false;
+   mutex_unlock(>lock);
+   alg_state->flusher(alg_cpu_state);
+   }
+}
+
 static struct mcryptd_alg_state sha1_mb_alg_state;
 
 struct sha1_mb_ctx {
-   struct mcryptd_ahash *mcryptd_tfm;
+   struct crypto_ahash *child;
+   struct mcryptd_alg_state *alg_state;
 };
 
 static inline struct mcryptd_hash_request_ctx
@@ -530,7 +582,6 @@ static int sha1_mb_update(struct ahash_request *areq)
struct sha1_hash_ctx *sha_ctx;
int ret = 0, nbytes;
 
-
/* sanity check */
if (rctx->tag.cpu != smp_processor_id()) {
pr_err("mcryptd error: cpu clash\n");
@@ -667,7 +718,6 @@ static int sha1_mb_final(struct ahash_request *areq)
sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, , 0,
HASH_LAST);
kernel_fpu_end();
-
/* check if anything is returned */
if (!sha_ctx)
return -EINPROGRESS;
@@ -707,21 +757,12 @@ static int sha1_mb_import(struct ahash_request *areq, 
const void *in)
 
 static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
 {
-   struct mcryptd_ahash *mcryptd_tfm;
struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-   struct mcryptd_hash_ctx *mctx;
 
-   mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
-   CRYPTO_ALG_INTERNAL,
-   CRYPTO_ALG_INTERNAL);
-   if (IS_ERR(mcryptd_tfm))
-   

[RFC] crypto: Remove mcryptd

2018-05-11 Thread Megha Dey
This patch attempts to remove the mcryptd interface and expose the
sha1 multibuffer algorithm as a proper ahash to the inner algorithm.

1. Host the flusher helper in sha1_mb.c instead of mcryptd.c (need to
change the names of these functions)
2. Remove unnecessary mcryptd structure mcryptd_hash_ctx
(combine sha_mb_ctx and mcryptd_hash_ctx)
3. Introduce a new simd_ahash_create_compat() similar to the
simd_skcipher_create_compat() in simd.c. This registers the outer
algorithm. Remove existing outer algorithm.
4. In the outer layer(simd wrapper), pass the right pointers to the
inner algorithm.(will shift 3 and 4 to simd.c later)
5. Remove mcryptd.c
6. Update the name, driver name and priority of inner algorithm.

Herbert,
I would like to know if the above approach is what you are suggesting.
The problem with this approach is there is no async workqueue context
which issues completions. Instead everything runs in a single thread of
execution. You had suggested that the SIMD wrapper will defer the job to
the Kthread context, but I am not sure that will be done.

Please let me know what you think.

Signed-off-by: Megha Dey 
---
 arch/x86/crypto/sha1-mb/sha1_mb.c | 312 +++--
 crypto/Makefile   |   1 -
 crypto/mcryptd.c  | 702 --
 include/crypto/mcryptd.h  |   5 -
 4 files changed, 200 insertions(+), 820 deletions(-)
 delete mode 100644 crypto/mcryptd.c

diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c 
b/arch/x86/crypto/sha1-mb/sha1_mb.c
index acf9fdf..b8c03ce 100644
--- a/arch/x86/crypto/sha1-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb.c
@@ -71,10 +71,62 @@
 
 #define FLUSH_INTERVAL 1000 /* in usec */
 
+static struct crypto_ahash *tfm_compact;
+
+struct mcryptd_flush_list {
+   struct list_head list;
+   struct mutex lock;
+};
+
+static struct mcryptd_flush_list __percpu *mcryptd_flist;
+
+void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long 
delay)
+{
+   struct mcryptd_flush_list *flist;
+
+   if (!cstate->flusher_engaged) {
+   /* put the flusher on the flush list */
+   flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
+   mutex_lock(>lock);
+   list_add_tail(>flush_list, >list);
+   cstate->flusher_engaged = true;
+   cstate->next_flush = jiffies + delay;
+   queue_delayed_work_on(smp_processor_id(), kcrypto_wq,
+   >flush, delay);
+   mutex_unlock(>lock);
+   }
+}
+
+void mcryptd_flusher(struct work_struct *__work)
+{
+   struct  mcryptd_alg_cstate  *alg_cpu_state;
+   struct  mcryptd_alg_state   *alg_state;
+   struct  mcryptd_flush_list  *flist;
+   int cpu;
+
+   cpu = smp_processor_id();
+   alg_cpu_state = container_of(to_delayed_work(__work),
+   struct mcryptd_alg_cstate, flush);
+   alg_state = alg_cpu_state->alg_state;
+   if (alg_cpu_state->cpu != cpu)
+   pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n",
+   cpu, alg_cpu_state->cpu);
+
+   if (alg_cpu_state->flusher_engaged) {
+   flist = per_cpu_ptr(mcryptd_flist, cpu);
+   mutex_lock(>lock);
+   list_del(_cpu_state->flush_list);
+   alg_cpu_state->flusher_engaged = false;
+   mutex_unlock(>lock);
+   alg_state->flusher(alg_cpu_state);
+   }
+}
+
 static struct mcryptd_alg_state sha1_mb_alg_state;
 
 struct sha1_mb_ctx {
-   struct mcryptd_ahash *mcryptd_tfm;
+   struct crypto_ahash *child;
+   struct mcryptd_alg_state *alg_state;
 };
 
 static inline struct mcryptd_hash_request_ctx
@@ -530,7 +582,6 @@ static int sha1_mb_update(struct ahash_request *areq)
struct sha1_hash_ctx *sha_ctx;
int ret = 0, nbytes;
 
-
/* sanity check */
if (rctx->tag.cpu != smp_processor_id()) {
pr_err("mcryptd error: cpu clash\n");
@@ -667,7 +718,6 @@ static int sha1_mb_final(struct ahash_request *areq)
sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, , 0,
HASH_LAST);
kernel_fpu_end();
-
/* check if anything is returned */
if (!sha_ctx)
return -EINPROGRESS;
@@ -707,21 +757,12 @@ static int sha1_mb_import(struct ahash_request *areq, 
const void *in)
 
 static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
 {
-   struct mcryptd_ahash *mcryptd_tfm;
struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-   struct mcryptd_hash_ctx *mctx;
 
-   mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
-   CRYPTO_ALG_INTERNAL,
-   CRYPTO_ALG_INTERNAL);
-   if (IS_ERR(mcryptd_tfm))
-   return PTR_ERR(mcryptd_tfm);
-  

RE: [PATCH V8 1/5] crypto: Multi-buffer encryption infrastructure support

2018-05-11 Thread Dey, Megha


>-Original Message-
>From: Herbert Xu [mailto:herb...@gondor.apana.org.au]
>Sent: Thursday, May 10, 2018 9:46 PM
>To: Dey, Megha 
>Cc: linux-kernel@vger.kernel.org; linux-cry...@vger.kernel.org;
>da...@davemloft.net
>Subject: Re: [PATCH V8 1/5] crypto: Multi-buffer encryption infrastructure
>support
>
>On Fri, May 11, 2018 at 01:24:42AM +, Dey, Megha wrote:
>>
>> Are you suggesting that the SIMD wrapper, will do what is currently being
>done by the ' mcryptd_queue_worker ' function (assuming FPU is not disabled)
>i.e dispatching the job to the inner algorithm?
>>
>> I have got rid of the mcryptd layer( have an inner layer, outer SIMD layer,
>handled the pointers and completions accordingly), but still facing some issues
>after removing the per cpu mcryptd_cpu_queue.
>
>Why don't you post what you've got and we can work it out together?

Hi Herbert,

Sure, I will post an RFC patch. (crypto: Remove mcryptd). 

>
>Thanks,
>--
>Email: Herbert Xu  Home Page:
>http://gondor.apana.org.au/~herbert/
>PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


RE: [PATCH V8 1/5] crypto: Multi-buffer encryption infrastructure support

2018-05-11 Thread Dey, Megha


>-Original Message-
>From: Herbert Xu [mailto:herb...@gondor.apana.org.au]
>Sent: Thursday, May 10, 2018 9:46 PM
>To: Dey, Megha 
>Cc: linux-kernel@vger.kernel.org; linux-cry...@vger.kernel.org;
>da...@davemloft.net
>Subject: Re: [PATCH V8 1/5] crypto: Multi-buffer encryption infrastructure
>support
>
>On Fri, May 11, 2018 at 01:24:42AM +, Dey, Megha wrote:
>>
>> Are you suggesting that the SIMD wrapper, will do what is currently being
>done by the ' mcryptd_queue_worker ' function (assuming FPU is not disabled)
>i.e dispatching the job to the inner algorithm?
>>
>> I have got rid of the mcryptd layer( have an inner layer, outer SIMD layer,
>handled the pointers and completions accordingly), but still facing some issues
>after removing the per cpu mcryptd_cpu_queue.
>
>Why don't you post what you've got and we can work it out together?

Hi Herbert,

Sure, I will post an RFC patch. (crypto: Remove mcryptd). 

>
>Thanks,
>--
>Email: Herbert Xu  Home Page:
>http://gondor.apana.org.au/~herbert/
>PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


Re: WARNING: bad unlock balance in xfs_iunlock

2018-05-11 Thread Dave Chinner
On Fri, May 11, 2018 at 10:59:53AM +0200, Dmitry Vyukov wrote:
> On Thu, May 10, 2018 at 1:22 AM, Dave Chinner  wrote:
> > On Wed, May 09, 2018 at 10:43:05AM +0200, Dmitry Vyukov wrote:
> >> Does "xfstests fuzzing infrastructure" use coverage-guidance?
> >
> > It's guided manually to fuzz a substantial proportion of the fields
> > in the on-disk format that are susceptible to fuzzing bqased
> > attacks. It's not complete coverage yet, but it's getting better and
> > better, and we're finding more problems from it that random bit
> > based fuzzing has ever uncovered.
> >
> > Also, the xfstests fuzzing defeats the CRC protection now built into
> > the metadata, which means it can exercise all the new filesystem
> > features that random bit fuzzers cannot exercise. That's the problem
> > with fuzzers like syzbot - they can only usefully fuzz the legacy
> > filesystem format which doesn't have CRC validation, nor many of the
> > other protections that the current filesystem format has to detect
> > corruption. This will also allow us to test things like online
> > repair of fuzzed structures
> 
> syzkaller has 2 techniques to deal with checksums, if you are
> interested I can go into more detail.

You can if you want, but I'm betting it basically comes down to
teaching syzcaller about parts of the on-disk format, similar to
AFL. And, like AFL, I doubt any XFS developer has the time to
add such support to syzbot.

> > Given the results we're getting from our own fuzzers, I don't see
> > much point in (XFS developers) investing huge amounts of effort to
> > make some other fuzzer equivalent to what we already have. If
> > someone else starts fuzzing the current format (v5) XFS filesystems
> > and finding problems we haven't, then I'm going to be interested in
> > their fuzzing tools.  But (guided) random bit perturbation fuzzing
> > of legacy filesystem formats is really not that useful or
> > interesting to us right now.
> 
> Just asked.
> 
> Note that coverage-guidance does not necessary mean bit flipping.
> syzkaller combines coverage-guidance with grammar-awareness and other
> smartness.

Yup, I assumed that this would be the case - those sorts of
"directed fuzzing" techniques were pioneered by the Samba guys for
reverse engineering the SMB protocol used by MS servers all those
years ago. But at it's most basic level, it's still using bit
flipping techniques to perturb the input and provoke responses.

> Based on our experience with network testing, main advantage of
> syzkaller over just feeding blobs as network packets (even if these
> blobs are built in a very smart way) is the following. syzkaller can
> build complex interactions between syscalls, external inputs and
> blobs.

Yup, nothing new there - that's what every other filesystem fuzzer
infrastructure does, too.  The problem with this is that it doesn't
pin-point the actual operation that tripped over the on-disk
corruption. It's catching downstream symptoms of an unknown,
undetected on-disk format corruption. i.e. it's a poor substitute
for explicit testing of structure bounds and data relationships of a
known format.

That's the fundamental premise of fuzz testing - most software does
not have robust validation of it's inputs and so fuzzing those
inputs finds problems. We've moved on from the old "trust and don't
validate" model of filesystem structure architecture.  The on-disk
format is very well defined, it is constrained in most cases, and we
can validate most individual structures at runtime with relatively
little cost.

Hence the "structure bounds" exploits that fuzzers tend to exercise
are pretty much taken out of the picture, and that leaves us with
"data relationships" between structures as the main vector for
undetected corruptions. These are mostly detectable, and many are
correctable as the current on-disk format has a lot of redundant
information. So the space for fuzzers to detect problems is getting
smaller and smaller all the time.

IOWs, filesystem image fuzzers have their place, but if you want us
to take your fuzzing seriously then your fuzzer needs to understand
all the mechanisms we now use to detect corruptions to show us where
they are deficient. If your fuzzing doesn't expose flaws in our
current validation techniques, then it's really not useful to us.

> For example, handling of external network packets depend on if
> there is an open socket on that port, what setsockopts were called, if
> there is a pending receive, what flags were passed to that receive,
> were some data sent the other way, etc. For filesystems that would be
> various filesystem syscalls executed against the mounted image,
> concurrent umount, rebind, switch to read-only mode, etc.
> But maybe xfstests do this too, I don't know. Do they?

Generally there is no need to do this because we know exactly what
syscalls will trigger access and/or modification to on-disk
structures. Access to the on-disk structures triggers the built 

Re: WARNING: bad unlock balance in xfs_iunlock

2018-05-11 Thread Dave Chinner
On Fri, May 11, 2018 at 10:59:53AM +0200, Dmitry Vyukov wrote:
> On Thu, May 10, 2018 at 1:22 AM, Dave Chinner  wrote:
> > On Wed, May 09, 2018 at 10:43:05AM +0200, Dmitry Vyukov wrote:
> >> Does "xfstests fuzzing infrastructure" use coverage-guidance?
> >
> > It's guided manually to fuzz a substantial proportion of the fields
> > in the on-disk format that are susceptible to fuzzing bqased
> > attacks. It's not complete coverage yet, but it's getting better and
> > better, and we're finding more problems from it that random bit
> > based fuzzing has ever uncovered.
> >
> > Also, the xfstests fuzzing defeats the CRC protection now built into
> > the metadata, which means it can exercise all the new filesystem
> > features that random bit fuzzers cannot exercise. That's the problem
> > with fuzzers like syzbot - they can only usefully fuzz the legacy
> > filesystem format which doesn't have CRC validation, nor many of the
> > other protections that the current filesystem format has to detect
> > corruption. This will also allow us to test things like online
> > repair of fuzzed structures
> 
> syzkaller has 2 techniques to deal with checksums, if you are
> interested I can go into more detail.

You can if you want, but I'm betting it basically comes down to
teaching syzcaller about parts of the on-disk format, similar to
AFL. And, like AFL, I doubt any XFS developer has the time to
add such support to syzbot.

> > Given the results we're getting from our own fuzzers, I don't see
> > much point in (XFS developers) investing huge amounts of effort to
> > make some other fuzzer equivalent to what we already have. If
> > someone else starts fuzzing the current format (v5) XFS filesystems
> > and finding problems we haven't, then I'm going to be interested in
> > their fuzzing tools.  But (guided) random bit perturbation fuzzing
> > of legacy filesystem formats is really not that useful or
> > interesting to us right now.
> 
> Just asked.
> 
> Note that coverage-guidance does not necessary mean bit flipping.
> syzkaller combines coverage-guidance with grammar-awareness and other
> smartness.

Yup, I assumed that this would be the case - those sorts of
"directed fuzzing" techniques were pioneered by the Samba guys for
reverse engineering the SMB protocol used by MS servers all those
years ago. But at it's most basic level, it's still using bit
flipping techniques to perturb the input and provoke responses.

> Based on our experience with network testing, main advantage of
> syzkaller over just feeding blobs as network packets (even if these
> blobs are built in a very smart way) is the following. syzkaller can
> build complex interactions between syscalls, external inputs and
> blobs.

Yup, nothing new there - that's what every other filesystem fuzzer
infrastructure does, too.  The problem with this is that it doesn't
pin-point the actual operation that tripped over the on-disk
corruption. It's catching downstream symptoms of an unknown,
undetected on-disk format corruption. i.e. it's a poor substitute
for explicit testing of structure bounds and data relationships of a
known format.

That's the fundamental premise of fuzz testing - most software does
not have robust validation of it's inputs and so fuzzing those
inputs finds problems. We've moved on from the old "trust and don't
validate" model of filesystem structure architecture.  The on-disk
format is very well defined, it is constrained in most cases, and we
can validate most individual structures at runtime with relatively
little cost.

Hence the "structure bounds" exploits that fuzzers tend to exercise
are pretty much taken out of the picture, and that leaves us with
"data relationships" between structures as the main vector for
undetected corruptions. These are mostly detectable, and many are
correctable as the current on-disk format has a lot of redundant
information. So the space for fuzzers to detect problems is getting
smaller and smaller all the time.

IOWs, filesystem image fuzzers have their place, but if you want us
to take your fuzzing seriously then your fuzzer needs to understand
all the mechanisms we now use to detect corruptions to show us where
they are deficient. If your fuzzing doesn't expose flaws in our
current validation techniques, then it's really not useful to us.

> For example, handling of external network packets depend on if
> there is an open socket on that port, what setsockopts were called, if
> there is a pending receive, what flags were passed to that receive,
> were some data sent the other way, etc. For filesystems that would be
> various filesystem syscalls executed against the mounted image,
> concurrent umount, rebind, switch to read-only mode, etc.
> But maybe xfstests do this too, I don't know. Do they?

Generally there is no need to do this because we know exactly what
syscalls will trigger access and/or modification to on-disk
structures. Access to the on-disk structures triggers the built in
verifier 

Re: [PATCH 0/3] KVM: VMX: Allow to disable ioport intercept per-VM by userspace

2018-05-11 Thread Wanpeng Li
2018-05-11 23:40 GMT+08:00 Konrad Rzeszutek Wilk :
> On Mon, Apr 16, 2018 at 10:45:59PM -0700, Wanpeng Li wrote:
>> Tim Shearer reported that "There is a guest which is running a packet
>> forwarding app based on the DPDK (dpdk.org). The packet receive routine
>> writes to 0xc070 using glibc's "outw_p" function which does an additional
>> write to I/O port 0x80. It does this write for every packet that's
>> received, causing a flood of KVM userspace context switches". He uses
>> mpstat to observe a CPU performing L2 packet forwarding on a pinned
>> guest vCPU, the guest time is 95 percent when allowing I/O port 0x80
>> bypass, however, it is 65.78 percent when I/O port 0x80 bypss is
>> disabled.
>>
>> This patchset introduces per-VM I/O permission bitmaps, the userspace
>> can disable the ioport intercept when they are more concern the
>> performance than the security.
>
> Could you kindly also add:
>
> Suggested-by: Konrad Rzeszutek Wilk 

Yeah, both you and Liran give the original idea. :) Tim and Liran, any
review for the patchset?

Regards,
Wanpeng Li


Re: [PATCH 0/3] KVM: VMX: Allow to disable ioport intercept per-VM by userspace

2018-05-11 Thread Wanpeng Li
2018-05-11 23:40 GMT+08:00 Konrad Rzeszutek Wilk :
> On Mon, Apr 16, 2018 at 10:45:59PM -0700, Wanpeng Li wrote:
>> Tim Shearer reported that "There is a guest which is running a packet
>> forwarding app based on the DPDK (dpdk.org). The packet receive routine
>> writes to 0xc070 using glibc's "outw_p" function which does an additional
>> write to I/O port 0x80. It does this write for every packet that's
>> received, causing a flood of KVM userspace context switches". He uses
>> mpstat to observe a CPU performing L2 packet forwarding on a pinned
>> guest vCPU, the guest time is 95 percent when allowing I/O port 0x80
>> bypass, however, it is 65.78 percent when I/O port 0x80 bypss is
>> disabled.
>>
>> This patchset introduces per-VM I/O permission bitmaps, the userspace
>> can disable the ioport intercept when they are more concern the
>> performance than the security.
>
> Could you kindly also add:
>
> Suggested-by: Konrad Rzeszutek Wilk 

Yeah, both you and Liran give the original idea. :) Tim and Liran, any
review for the patchset?

Regards,
Wanpeng Li


Re: KASAN: null-ptr-deref Read in rds_ib_get_mr

2018-05-11 Thread Yanjun Zhu



On 2018/5/12 0:58, Santosh Shilimkar wrote:

On 5/11/2018 12:48 AM, Yanjun Zhu wrote:



On 2018/5/11 13:20, DaeRyong Jeong wrote:

We report the crash: KASAN: null-ptr-deref Read in rds_ib_get_mr

Note that this bug is previously reported by syzkaller.
https://syzkaller.appspot.com/bug?id=0bb56a5a48b000b52aa2b0d8dd20b1f545214d91 

Nonetheless, this bug has not fixed yet, and we hope that this 
report and our
analysis, which gets help by the RaceFuzzer's feature, will helpful 
to fix the

crash.

This crash has been found in v4.17-rc1 using RaceFuzzer (a modified
version of Syzkaller), which we describe more at the end of this
report. Our analysis shows that the race occurs when invoking two
syscalls concurrently, bind$rds and setsockopt$RDS_GET_MR.


Analysis:
We think the concurrent execution of __rds_rdma_map() and rds_bind()
causes the problem. __rds_rdma_map() checks whether 
rs->rs_bound_addr is 0

or not. But the concurrent execution with rds_bind() can by-pass this
check. Therefore, __rds_rdmap_map() calls rs->rs_transport->get_mr() 
and
rds_ib_get_mr() causes the null deref at ib_rdma.c:544 in v4.17-rc1, 
when

dereferencing rs_conn.


Thread interleaving:
CPU0 (__rds_rdma_map)    CPU1 (rds_bind)
    // rds_add_bound() sets rs->bound_addr 
as none 0
    ret = rds_add_bound(rs, 
sin->sin_addr.s_addr, >sin_port);

if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
    if (rs->rs_transport) { /* previously 
bound */

    trans = rs->rs_transport;
    if 
(trans->laddr_check(sock_net(sock->sk),

sin->sin_addr.s_addr) != 0) {
    ret = -ENOPROTOOPT;
    // rds_remove_bound() sets 
rs->bound_addr as 0

    rds_remove_bound(rs);
...
trans_private = rs->rs_transport->get_mr(sg, nents, rs,
 >r_key);
(in rds_ib_get_mr())
struct rds_ib_connection *ic = rs->rs_conn->c_transport_data;


Call sequence (v4.17-rc1):
CPU0
rds_setsockopt
rds_get_mr
    __rds_rdma_map
    rds_ib_get_mr


CPU1
rds_bind
rds_add_bound
...
rds_remove_bound


Crash log:
==
BUG: KASAN: null-ptr-deref in rds_ib_get_mr+0x3a/0x150 
net/rds/ib_rdma.c:544

Read of size 8 at addr 0068 by task syz-executor0/32067

CPU: 0 PID: 32067 Comm: syz-executor0 Not tainted 4.17.0-rc1 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014

Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x166/0x21c lib/dump_stack.c:113
  kasan_report_error mm/kasan/report.c:352 [inline]
  kasan_report+0x140/0x360 mm/kasan/report.c:412
  check_memory_region_inline mm/kasan/kasan.c:260 [inline]
  __asan_load8+0x54/0x90 mm/kasan/kasan.c:699
  rds_ib_get_mr+0x3a/0x150 net/rds/ib_rdma.c:544
  __rds_rdma_map+0x521/0x9d0 net/rds/rdma.c:271
  rds_get_mr+0xad/0xf0 net/rds/rdma.c:333
  rds_setsockopt+0x57f/0x720 net/rds/af_rds.c:347
  __sys_setsockopt+0x147/0x230 net/socket.c:1903
  __do_sys_setsockopt net/socket.c:1914 [inline]
  __se_sys_setsockopt net/socket.c:1911 [inline]
  __x64_sys_setsockopt+0x67/0x80 net/socket.c:1911
  do_syscall_64+0x15f/0x4a0 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4563f9
RSP: 002b:7f6a2b3c2b28 EFLAGS: 0246 ORIG_RAX: 0036
RAX: ffda RBX: 0072bee0 RCX: 004563f9
RDX: 0002 RSI: 0114 RDI: 0015
RBP: 0575 R08: 0020 R09: 
R10: 2140 R11: 0246 R12: 7f6a2b3c36d4
R13:  R14: 006fd398 R15: 
==

diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index e678699..2228b50 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -539,11 +539,17 @@ void rds_ib_flush_mrs(void)
  void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
 struct rds_sock *rs, u32 *key_ret)
  {
-   struct rds_ib_device *rds_ibdev;
+   struct rds_ib_device *rds_ibdev = NULL;
 struct rds_ib_mr *ibmr = NULL;
-   struct rds_ib_connection *ic = rs->rs_conn->c_transport_data;
+   struct rds_ib_connection *ic = NULL;
 int ret;

+   if (rs->rs_bound_addr == 0) {
+   ret = -EPERM;
+   goto out;
+   }
+

No you can't return such error for this API and the
socket related checks needs to be done at core layer.
I remember fixing this race but probably never pushed
fix upstream.

OK. Wait for your patch. :-)


The MR code is due for update with optimized FRWR code
which now stable enough. We will address this issue as

Re: KASAN: null-ptr-deref Read in rds_ib_get_mr

2018-05-11 Thread Yanjun Zhu



On 2018/5/12 0:58, Santosh Shilimkar wrote:

On 5/11/2018 12:48 AM, Yanjun Zhu wrote:



On 2018/5/11 13:20, DaeRyong Jeong wrote:

We report the crash: KASAN: null-ptr-deref Read in rds_ib_get_mr

Note that this bug is previously reported by syzkaller.
https://syzkaller.appspot.com/bug?id=0bb56a5a48b000b52aa2b0d8dd20b1f545214d91 

Nonetheless, this bug has not fixed yet, and we hope that this 
report and our
analysis, which gets help by the RaceFuzzer's feature, will helpful 
to fix the

crash.

This crash has been found in v4.17-rc1 using RaceFuzzer (a modified
version of Syzkaller), which we describe more at the end of this
report. Our analysis shows that the race occurs when invoking two
syscalls concurrently, bind$rds and setsockopt$RDS_GET_MR.


Analysis:
We think the concurrent execution of __rds_rdma_map() and rds_bind()
causes the problem. __rds_rdma_map() checks whether 
rs->rs_bound_addr is 0

or not. But the concurrent execution with rds_bind() can by-pass this
check. Therefore, __rds_rdmap_map() calls rs->rs_transport->get_mr() 
and
rds_ib_get_mr() causes the null deref at ib_rdma.c:544 in v4.17-rc1, 
when

dereferencing rs_conn.


Thread interleaving:
CPU0 (__rds_rdma_map)    CPU1 (rds_bind)
    // rds_add_bound() sets rs->bound_addr 
as none 0
    ret = rds_add_bound(rs, 
sin->sin_addr.s_addr, >sin_port);

if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
    if (rs->rs_transport) { /* previously 
bound */

    trans = rs->rs_transport;
    if 
(trans->laddr_check(sock_net(sock->sk),

sin->sin_addr.s_addr) != 0) {
    ret = -ENOPROTOOPT;
    // rds_remove_bound() sets 
rs->bound_addr as 0

    rds_remove_bound(rs);
...
trans_private = rs->rs_transport->get_mr(sg, nents, rs,
 >r_key);
(in rds_ib_get_mr())
struct rds_ib_connection *ic = rs->rs_conn->c_transport_data;


Call sequence (v4.17-rc1):
CPU0
rds_setsockopt
rds_get_mr
    __rds_rdma_map
    rds_ib_get_mr


CPU1
rds_bind
rds_add_bound
...
rds_remove_bound


Crash log:
==
BUG: KASAN: null-ptr-deref in rds_ib_get_mr+0x3a/0x150 
net/rds/ib_rdma.c:544

Read of size 8 at addr 0068 by task syz-executor0/32067

CPU: 0 PID: 32067 Comm: syz-executor0 Not tainted 4.17.0-rc1 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014

Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x166/0x21c lib/dump_stack.c:113
  kasan_report_error mm/kasan/report.c:352 [inline]
  kasan_report+0x140/0x360 mm/kasan/report.c:412
  check_memory_region_inline mm/kasan/kasan.c:260 [inline]
  __asan_load8+0x54/0x90 mm/kasan/kasan.c:699
  rds_ib_get_mr+0x3a/0x150 net/rds/ib_rdma.c:544
  __rds_rdma_map+0x521/0x9d0 net/rds/rdma.c:271
  rds_get_mr+0xad/0xf0 net/rds/rdma.c:333
  rds_setsockopt+0x57f/0x720 net/rds/af_rds.c:347
  __sys_setsockopt+0x147/0x230 net/socket.c:1903
  __do_sys_setsockopt net/socket.c:1914 [inline]
  __se_sys_setsockopt net/socket.c:1911 [inline]
  __x64_sys_setsockopt+0x67/0x80 net/socket.c:1911
  do_syscall_64+0x15f/0x4a0 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4563f9
RSP: 002b:7f6a2b3c2b28 EFLAGS: 0246 ORIG_RAX: 0036
RAX: ffda RBX: 0072bee0 RCX: 004563f9
RDX: 0002 RSI: 0114 RDI: 0015
RBP: 0575 R08: 0020 R09: 
R10: 2140 R11: 0246 R12: 7f6a2b3c36d4
R13:  R14: 006fd398 R15: 
==

diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index e678699..2228b50 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -539,11 +539,17 @@ void rds_ib_flush_mrs(void)
  void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
 struct rds_sock *rs, u32 *key_ret)
  {
-   struct rds_ib_device *rds_ibdev;
+   struct rds_ib_device *rds_ibdev = NULL;
 struct rds_ib_mr *ibmr = NULL;
-   struct rds_ib_connection *ic = rs->rs_conn->c_transport_data;
+   struct rds_ib_connection *ic = NULL;
 int ret;

+   if (rs->rs_bound_addr == 0) {
+   ret = -EPERM;
+   goto out;
+   }
+

No you can't return such error for this API and the
socket related checks needs to be done at core layer.
I remember fixing this race but probably never pushed
fix upstream.

OK. Wait for your patch. :-)


The MR code is due for update with optimized FRWR code
which now stable enough. We will address this issue as

Re: [BUGFIX PATCH v3 0/4] arm: kprobes: Fix to prohibit probing on unsafe functions

2018-05-11 Thread Masami Hiramatsu
Hi Greg,

Could you pick this series to stable?

Thank you,

On Tue, 8 May 2018 12:25:03 +0100
Russell King - ARM Linux  wrote:

> On Fri, May 04, 2018 at 01:14:31PM +0900, Masami Hiramatsu wrote:
> > Hi,
> > 
> > This is the 3rd version of bugfix series for kprobes on arm.
> > This series fixes 4 different issues which I found.
> > 
> >  - Fix to use smp_processor_id() after disabling preemption.
> >  - Prohibit probing on optimized_callback() for avoiding
> >recursive probe.
> >  - Prohibit kprobes on do_undefinstr() by same reason.
> >  - Prohibit kprobes on get_user() by same reason.
> > 
> > >From v2, I included another 2 bugfixes (1/4 and 2/4)
> > which are not merged yet, and added "Cc: sta...@vger.kernel.org",
> > since there are obvious bugs.
> 
> Please submit them to the patch system, thanks.
> 
> > 
> > Thanks,
> > 
> > ---
> > 
> > Masami Hiramatsu (4):
> >   arm: kprobes: Fix to use get_kprobe_ctlblk after irq-disabed
> >   arm: kprobes: Prohibit probing on optimized_callback
> >   arm: kprobes: Prohibit kprobes on do_undefinstr
> >   arm: kprobes: Prohibit kprobes on get_user functions
> > 
> > 
> >  arch/arm/include/asm/assembler.h  |   10 ++
> >  arch/arm/kernel/traps.c   |5 -
> >  arch/arm/lib/getuser.S|   10 ++
> >  arch/arm/probes/kprobes/opt-arm.c |4 +++-
> >  4 files changed, 27 insertions(+), 2 deletions(-)
> > 
> > --
> > Masami Hiramatsu (Linaro) 
> 
> -- 
> RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line in suburbia: sync at 8.8Mbps down 630kbps up
> According to speedtest.net: 8.21Mbps down 510kbps up


-- 
Masami Hiramatsu 


Re: [BUGFIX PATCH v3 0/4] arm: kprobes: Fix to prohibit probing on unsafe functions

2018-05-11 Thread Masami Hiramatsu
Hi Greg,

Could you pick this series to stable?

Thank you,

On Tue, 8 May 2018 12:25:03 +0100
Russell King - ARM Linux  wrote:

> On Fri, May 04, 2018 at 01:14:31PM +0900, Masami Hiramatsu wrote:
> > Hi,
> > 
> > This is the 3rd version of bugfix series for kprobes on arm.
> > This series fixes 4 different issues which I found.
> > 
> >  - Fix to use smp_processor_id() after disabling preemption.
> >  - Prohibit probing on optimized_callback() for avoiding
> >recursive probe.
> >  - Prohibit kprobes on do_undefinstr() by same reason.
> >  - Prohibit kprobes on get_user() by same reason.
> > 
> > >From v2, I included another 2 bugfixes (1/4 and 2/4)
> > which are not merged yet, and added "Cc: sta...@vger.kernel.org",
> > since there are obvious bugs.
> 
> Please submit them to the patch system, thanks.
> 
> > 
> > Thanks,
> > 
> > ---
> > 
> > Masami Hiramatsu (4):
> >   arm: kprobes: Fix to use get_kprobe_ctlblk after irq-disabed
> >   arm: kprobes: Prohibit probing on optimized_callback
> >   arm: kprobes: Prohibit kprobes on do_undefinstr
> >   arm: kprobes: Prohibit kprobes on get_user functions
> > 
> > 
> >  arch/arm/include/asm/assembler.h  |   10 ++
> >  arch/arm/kernel/traps.c   |5 -
> >  arch/arm/lib/getuser.S|   10 ++
> >  arch/arm/probes/kprobes/opt-arm.c |4 +++-
> >  4 files changed, 27 insertions(+), 2 deletions(-)
> > 
> > --
> > Masami Hiramatsu (Linaro) 
> 
> -- 
> RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line in suburbia: sync at 8.8Mbps down 630kbps up
> According to speedtest.net: 8.21Mbps down 510kbps up


-- 
Masami Hiramatsu 


[PATCH] rcu: Add comment documenting how rcu_seq_snap works

2018-05-11 Thread Joel Fernandes (Google)
rcu_seq_snap may be tricky for someone looking at it for the first time.
Lets document how it works with an example to make it easier.

Signed-off-by: Joel Fernandes (Google) 
---
 kernel/rcu/rcu.h | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 003671825d62..004ace3d22c2 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -91,7 +91,28 @@ static inline void rcu_seq_end(unsigned long *sp)
WRITE_ONCE(*sp, rcu_seq_endval(sp));
 }
 
-/* Take a snapshot of the update side's sequence number. */
+/*
+ * Take a snapshot of the update side's sequence number.
+ *
+ * This function predicts what the grace period number will be the next
+ * time an RCU callback will be executed, given the current grace period's
+ * number. This can be gp+1 if RCU is idle, or gp+2 if a grace period is
+ * already in progress.
+ *
+ * We do this with a single addition and masking.
+ * For example, if RCU_SEQ_STATE_MASK=1 and the least significant bit (LSB) of
+ * the seq is used to track if a GP is in progress or not, its sufficient if we
+ * add (2+1) and mask with ~1. Lets see why with an example:
+ *
+ * Say the current seq is 6 which is 0x110 (gp is 3 and state bit is 0).
+ * To get the next GP number, we have to atleast add 0x10 to this (0x1 << 1) to
+ * account for the state bit. However, if the current seq is 7 (GP num is 3
+ * and state bit is 1), then it means the current grace period is already
+ * in progress so the next the callback will run is at gp+2. To account for
+ * the extra +1, we just overflow the LSB by adding another 0x1 and masking
+ * with ~0x1. Incase no GP was in progress (RCU is idle), then the adding
+ * by 0x1 and masking will have no effect. This is calculated as below.
+ */
 static inline unsigned long rcu_seq_snap(unsigned long *sp)
 {
unsigned long s;
-- 
2.17.0.441.gb46fe60e1d-goog


[PATCH] rcu: Add comment documenting how rcu_seq_snap works

2018-05-11 Thread Joel Fernandes (Google)
rcu_seq_snap may be tricky for someone looking at it for the first time.
Lets document how it works with an example to make it easier.

Signed-off-by: Joel Fernandes (Google) 
---
 kernel/rcu/rcu.h | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 003671825d62..004ace3d22c2 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -91,7 +91,28 @@ static inline void rcu_seq_end(unsigned long *sp)
WRITE_ONCE(*sp, rcu_seq_endval(sp));
 }
 
-/* Take a snapshot of the update side's sequence number. */
+/*
+ * Take a snapshot of the update side's sequence number.
+ *
+ * This function predicts what the grace period number will be the next
+ * time an RCU callback will be executed, given the current grace period's
+ * number. This can be gp+1 if RCU is idle, or gp+2 if a grace period is
+ * already in progress.
+ *
+ * We do this with a single addition and masking.
+ * For example, if RCU_SEQ_STATE_MASK=1 and the least significant bit (LSB) of
+ * the seq is used to track if a GP is in progress or not, its sufficient if we
+ * add (2+1) and mask with ~1. Lets see why with an example:
+ *
+ * Say the current seq is 6 which is 0x110 (gp is 3 and state bit is 0).
+ * To get the next GP number, we have to atleast add 0x10 to this (0x1 << 1) to
+ * account for the state bit. However, if the current seq is 7 (GP num is 3
+ * and state bit is 1), then it means the current grace period is already
+ * in progress so the next the callback will run is at gp+2. To account for
+ * the extra +1, we just overflow the LSB by adding another 0x1 and masking
+ * with ~0x1. Incase no GP was in progress (RCU is idle), then the adding
+ * by 0x1 and masking will have no effect. This is calculated as below.
+ */
 static inline unsigned long rcu_seq_snap(unsigned long *sp)
 {
unsigned long s;
-- 
2.17.0.441.gb46fe60e1d-goog


Re: [GIT] Networking

2018-05-11 Thread Linus Torvalds
On Fri, May 11, 2018 at 5:10 PM David Miller  wrote:

> I guess this is my reward for trying to break the monotony of
> pull requests :-)

I actually went back and checked a few older pull requests to see if this
had been going on for a while and I just hadn't noticed.

It just took me by surprise :^p

   Linus


Re: [GIT] Networking

2018-05-11 Thread Linus Torvalds
On Fri, May 11, 2018 at 5:10 PM David Miller  wrote:

> I guess this is my reward for trying to break the monotony of
> pull requests :-)

I actually went back and checked a few older pull requests to see if this
had been going on for a while and I just hadn't noticed.

It just took me by surprise :^p

   Linus


Re: [GIT] Networking

2018-05-11 Thread David Miller
From: Linus Torvalds 
Date: Fri, 11 May 2018 14:25:59 -0700

> David, is there something you want to tell us?
> 
> Drugs are bad, m'kay..

I guess this is my reward for trying to break the monotony of
pull requests :-)


Re: [GIT] Networking

2018-05-11 Thread David Miller
From: Linus Torvalds 
Date: Fri, 11 May 2018 14:25:59 -0700

> David, is there something you want to tell us?
> 
> Drugs are bad, m'kay..

I guess this is my reward for trying to break the monotony of
pull requests :-)


Re: [PATCH net] net: dsa: bcm_sf2: Fix RX_CLS_LOC_ANY overwrite for last rule

2018-05-11 Thread David Miller
From: Florian Fainelli 
Date: Fri, 11 May 2018 16:38:02 -0700

> David, please discard that for now, the IPv4 part is correct, but I am
> not fixing the bug correctly for the IPv6 part. v2 coming some time next
> week. Thank you!

Ok.


Re: [PATCH net] net: dsa: bcm_sf2: Fix RX_CLS_LOC_ANY overwrite for last rule

2018-05-11 Thread David Miller
From: Florian Fainelli 
Date: Fri, 11 May 2018 16:38:02 -0700

> David, please discard that for now, the IPv4 part is correct, but I am
> not fixing the bug correctly for the IPv6 part. v2 coming some time next
> week. Thank you!

Ok.


[PATCH v9 02/12] drivers: base: cacheinfo: setup DT cache properties early

2018-05-11 Thread Jeremy Linton
The original intent in cacheinfo was that an architecture
specific populate_cache_leaves() would probe the hardware
and then cache_shared_cpu_map_setup() and
cache_override_properties() would provide firmware help to
extend/expand upon what was probed. Arm64 was really
the only architecture that was working this way, and
with the removal of most of the hardware probing logic it
became clear that it was possible to simplify the logic a bit.

This patch combines the walk of the DT nodes with the
code updating the cache size/line_size and nr_sets.
cache_override_properties() (which was DT specific) is
then removed. The result is that cacheinfo.of_node is
no longer used as a temporary place to hold DT references
for future calls that update cache properties. That change
helps to clarify its one remaining use (matching
cacheinfo nodes that represent shared caches) which
will be used by the ACPI/PPTT code in the following patches.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Acked-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
---
 arch/riscv/kernel/cacheinfo.c |  1 -
 drivers/base/cacheinfo.c  | 65 +++
 2 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index 10ed2749e246..0bc86e5f8f3f 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -20,7 +20,6 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 struct device_node *node,
 enum cache_type type, unsigned int level)
 {
-   this_leaf->of_node = node;
this_leaf->level = level;
this_leaf->type = type;
/* not a sector cache */
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 09ccef7ddc99..a872523e8951 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -71,7 +71,7 @@ static inline int get_cacheinfo_idx(enum cache_type type)
return type;
 }
 
-static void cache_size(struct cacheinfo *this_leaf)
+static void cache_size(struct cacheinfo *this_leaf, struct device_node *np)
 {
const char *propname;
const __be32 *cache_size;
@@ -80,13 +80,14 @@ static void cache_size(struct cacheinfo *this_leaf)
ct_idx = get_cacheinfo_idx(this_leaf->type);
propname = cache_type_info[ct_idx].size_prop;
 
-   cache_size = of_get_property(this_leaf->of_node, propname, NULL);
+   cache_size = of_get_property(np, propname, NULL);
if (cache_size)
this_leaf->size = of_read_number(cache_size, 1);
 }
 
 /* not cache_line_size() because that's a macro in include/linux/cache.h */
-static void cache_get_line_size(struct cacheinfo *this_leaf)
+static void cache_get_line_size(struct cacheinfo *this_leaf,
+   struct device_node *np)
 {
const __be32 *line_size;
int i, lim, ct_idx;
@@ -98,7 +99,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf)
const char *propname;
 
propname = cache_type_info[ct_idx].line_size_props[i];
-   line_size = of_get_property(this_leaf->of_node, propname, NULL);
+   line_size = of_get_property(np, propname, NULL);
if (line_size)
break;
}
@@ -107,7 +108,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf)
this_leaf->coherency_line_size = of_read_number(line_size, 1);
 }
 
-static void cache_nr_sets(struct cacheinfo *this_leaf)
+static void cache_nr_sets(struct cacheinfo *this_leaf, struct device_node *np)
 {
const char *propname;
const __be32 *nr_sets;
@@ -116,7 +117,7 @@ static void cache_nr_sets(struct cacheinfo *this_leaf)
ct_idx = get_cacheinfo_idx(this_leaf->type);
propname = cache_type_info[ct_idx].nr_sets_prop;
 
-   nr_sets = of_get_property(this_leaf->of_node, propname, NULL);
+   nr_sets = of_get_property(np, propname, NULL);
if (nr_sets)
this_leaf->number_of_sets = of_read_number(nr_sets, 1);
 }
@@ -135,32 +136,27 @@ static void cache_associativity(struct cacheinfo 
*this_leaf)
this_leaf->ways_of_associativity = (size / nr_sets) / line_size;
 }
 
-static bool cache_node_is_unified(struct cacheinfo *this_leaf)
+static bool cache_node_is_unified(struct cacheinfo *this_leaf,
+ struct device_node *np)
 {
-   return of_property_read_bool(this_leaf->of_node, "cache-unified");
+   return of_property_read_bool(np, "cache-unified");
 }
 
-static void cache_of_override_properties(unsigned int cpu)
+static void cache_of_set_props(struct cacheinfo *this_leaf,
+ 

[PATCH v9 02/12] drivers: base: cacheinfo: setup DT cache properties early

2018-05-11 Thread Jeremy Linton
The original intent in cacheinfo was that an architecture
specific populate_cache_leaves() would probe the hardware
and then cache_shared_cpu_map_setup() and
cache_override_properties() would provide firmware help to
extend/expand upon what was probed. Arm64 was really
the only architecture that was working this way, and
with the removal of most of the hardware probing logic it
became clear that it was possible to simplify the logic a bit.

This patch combines the walk of the DT nodes with the
code updating the cache size/line_size and nr_sets.
cache_override_properties() (which was DT specific) is
then removed. The result is that cacheinfo.of_node is
no longer used as a temporary place to hold DT references
for future calls that update cache properties. That change
helps to clarify its one remaining use (matching
cacheinfo nodes that represent shared caches) which
will be used by the ACPI/PPTT code in the following patches.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Acked-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
---
 arch/riscv/kernel/cacheinfo.c |  1 -
 drivers/base/cacheinfo.c  | 65 +++
 2 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index 10ed2749e246..0bc86e5f8f3f 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -20,7 +20,6 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 struct device_node *node,
 enum cache_type type, unsigned int level)
 {
-   this_leaf->of_node = node;
this_leaf->level = level;
this_leaf->type = type;
/* not a sector cache */
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 09ccef7ddc99..a872523e8951 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -71,7 +71,7 @@ static inline int get_cacheinfo_idx(enum cache_type type)
return type;
 }
 
-static void cache_size(struct cacheinfo *this_leaf)
+static void cache_size(struct cacheinfo *this_leaf, struct device_node *np)
 {
const char *propname;
const __be32 *cache_size;
@@ -80,13 +80,14 @@ static void cache_size(struct cacheinfo *this_leaf)
ct_idx = get_cacheinfo_idx(this_leaf->type);
propname = cache_type_info[ct_idx].size_prop;
 
-   cache_size = of_get_property(this_leaf->of_node, propname, NULL);
+   cache_size = of_get_property(np, propname, NULL);
if (cache_size)
this_leaf->size = of_read_number(cache_size, 1);
 }
 
 /* not cache_line_size() because that's a macro in include/linux/cache.h */
-static void cache_get_line_size(struct cacheinfo *this_leaf)
+static void cache_get_line_size(struct cacheinfo *this_leaf,
+   struct device_node *np)
 {
const __be32 *line_size;
int i, lim, ct_idx;
@@ -98,7 +99,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf)
const char *propname;
 
propname = cache_type_info[ct_idx].line_size_props[i];
-   line_size = of_get_property(this_leaf->of_node, propname, NULL);
+   line_size = of_get_property(np, propname, NULL);
if (line_size)
break;
}
@@ -107,7 +108,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf)
this_leaf->coherency_line_size = of_read_number(line_size, 1);
 }
 
-static void cache_nr_sets(struct cacheinfo *this_leaf)
+static void cache_nr_sets(struct cacheinfo *this_leaf, struct device_node *np)
 {
const char *propname;
const __be32 *nr_sets;
@@ -116,7 +117,7 @@ static void cache_nr_sets(struct cacheinfo *this_leaf)
ct_idx = get_cacheinfo_idx(this_leaf->type);
propname = cache_type_info[ct_idx].nr_sets_prop;
 
-   nr_sets = of_get_property(this_leaf->of_node, propname, NULL);
+   nr_sets = of_get_property(np, propname, NULL);
if (nr_sets)
this_leaf->number_of_sets = of_read_number(nr_sets, 1);
 }
@@ -135,32 +136,27 @@ static void cache_associativity(struct cacheinfo 
*this_leaf)
this_leaf->ways_of_associativity = (size / nr_sets) / line_size;
 }
 
-static bool cache_node_is_unified(struct cacheinfo *this_leaf)
+static bool cache_node_is_unified(struct cacheinfo *this_leaf,
+ struct device_node *np)
 {
-   return of_property_read_bool(this_leaf->of_node, "cache-unified");
+   return of_property_read_bool(np, "cache-unified");
 }
 
-static void cache_of_override_properties(unsigned int cpu)
+static void cache_of_set_props(struct cacheinfo *this_leaf,
+  struct device_node *np)
 {
-   int index;
-   struct cacheinfo *this_leaf;
-   struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-
-  

[PATCH v9 04/12] arm64/acpi: Create arch specific cpu to acpi id helper

2018-05-11 Thread Jeremy Linton
Its helpful to be able to lookup the acpi_processor_id associated
with a logical cpu. Provide an arm64 helper to do this.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Acked-by: Ard Biesheuvel 
---
 arch/arm64/include/asm/acpi.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 32f465a80e4e..0db62a4cbce2 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -86,6 +86,10 @@ static inline bool acpi_has_cpu_in_madt(void)
 }
 
 struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu);
+static inline u32 get_acpi_id_for_cpu(unsigned int cpu)
+{
+   return  acpi_cpu_get_madt_gicc(cpu)->uid;
+}
 
 static inline void arch_fix_phys_package_id(int num, u32 slot) { }
 void __init acpi_init_cpus(void);
-- 
2.13.6



[PATCH v9 04/12] arm64/acpi: Create arch specific cpu to acpi id helper

2018-05-11 Thread Jeremy Linton
Its helpful to be able to lookup the acpi_processor_id associated
with a logical cpu. Provide an arm64 helper to do this.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Acked-by: Ard Biesheuvel 
---
 arch/arm64/include/asm/acpi.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 32f465a80e4e..0db62a4cbce2 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -86,6 +86,10 @@ static inline bool acpi_has_cpu_in_madt(void)
 }
 
 struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu);
+static inline u32 get_acpi_id_for_cpu(unsigned int cpu)
+{
+   return  acpi_cpu_get_madt_gicc(cpu)->uid;
+}
 
 static inline void arch_fix_phys_package_id(int num, u32 slot) { }
 void __init acpi_init_cpus(void);
-- 
2.13.6



[PATCH v9 01/12] drivers: base: cacheinfo: move cache_setup_of_node()

2018-05-11 Thread Jeremy Linton
In preparation for the next patch, and to aid in
review of that patch, lets move cache_setup_of_node
further down in the module without any changes.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Reviewed-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
---
 drivers/base/cacheinfo.c | 80 
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index edf726267282..09ccef7ddc99 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -32,46 +32,6 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu)
 }
 
 #ifdef CONFIG_OF
-static int cache_setup_of_node(unsigned int cpu)
-{
-   struct device_node *np;
-   struct cacheinfo *this_leaf;
-   struct device *cpu_dev = get_cpu_device(cpu);
-   struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-   unsigned int index = 0;
-
-   /* skip if of_node is already populated */
-   if (this_cpu_ci->info_list->of_node)
-   return 0;
-
-   if (!cpu_dev) {
-   pr_err("No cpu device for CPU %d\n", cpu);
-   return -ENODEV;
-   }
-   np = cpu_dev->of_node;
-   if (!np) {
-   pr_err("Failed to find cpu%d device node\n", cpu);
-   return -ENOENT;
-   }
-
-   while (index < cache_leaves(cpu)) {
-   this_leaf = this_cpu_ci->info_list + index;
-   if (this_leaf->level != 1)
-   np = of_find_next_cache_node(np);
-   else
-   np = of_node_get(np);/* cpu node itself */
-   if (!np)
-   break;
-   this_leaf->of_node = np;
-   index++;
-   }
-
-   if (index != cache_leaves(cpu)) /* not all OF nodes populated */
-   return -ENOENT;
-
-   return 0;
-}
-
 static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
   struct cacheinfo *sib_leaf)
 {
@@ -202,6 +162,46 @@ static void cache_of_override_properties(unsigned int cpu)
cache_associativity(this_leaf);
}
 }
+
+static int cache_setup_of_node(unsigned int cpu)
+{
+   struct device_node *np;
+   struct cacheinfo *this_leaf;
+   struct device *cpu_dev = get_cpu_device(cpu);
+   struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+   unsigned int index = 0;
+
+   /* skip if of_node is already populated */
+   if (this_cpu_ci->info_list->of_node)
+   return 0;
+
+   if (!cpu_dev) {
+   pr_err("No cpu device for CPU %d\n", cpu);
+   return -ENODEV;
+   }
+   np = cpu_dev->of_node;
+   if (!np) {
+   pr_err("Failed to find cpu%d device node\n", cpu);
+   return -ENOENT;
+   }
+
+   while (index < cache_leaves(cpu)) {
+   this_leaf = this_cpu_ci->info_list + index;
+   if (this_leaf->level != 1)
+   np = of_find_next_cache_node(np);
+   else
+   np = of_node_get(np);/* cpu node itself */
+   if (!np)
+   break;
+   this_leaf->of_node = np;
+   index++;
+   }
+
+   if (index != cache_leaves(cpu)) /* not all OF nodes populated */
+   return -ENOENT;
+
+   return 0;
+}
 #else
 static void cache_of_override_properties(unsigned int cpu) { }
 static inline int cache_setup_of_node(unsigned int cpu) { return 0; }
-- 
2.13.6



[PATCH v9 01/12] drivers: base: cacheinfo: move cache_setup_of_node()

2018-05-11 Thread Jeremy Linton
In preparation for the next patch, and to aid in
review of that patch, lets move cache_setup_of_node
further down in the module without any changes.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Reviewed-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
---
 drivers/base/cacheinfo.c | 80 
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index edf726267282..09ccef7ddc99 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -32,46 +32,6 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu)
 }
 
 #ifdef CONFIG_OF
-static int cache_setup_of_node(unsigned int cpu)
-{
-   struct device_node *np;
-   struct cacheinfo *this_leaf;
-   struct device *cpu_dev = get_cpu_device(cpu);
-   struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-   unsigned int index = 0;
-
-   /* skip if of_node is already populated */
-   if (this_cpu_ci->info_list->of_node)
-   return 0;
-
-   if (!cpu_dev) {
-   pr_err("No cpu device for CPU %d\n", cpu);
-   return -ENODEV;
-   }
-   np = cpu_dev->of_node;
-   if (!np) {
-   pr_err("Failed to find cpu%d device node\n", cpu);
-   return -ENOENT;
-   }
-
-   while (index < cache_leaves(cpu)) {
-   this_leaf = this_cpu_ci->info_list + index;
-   if (this_leaf->level != 1)
-   np = of_find_next_cache_node(np);
-   else
-   np = of_node_get(np);/* cpu node itself */
-   if (!np)
-   break;
-   this_leaf->of_node = np;
-   index++;
-   }
-
-   if (index != cache_leaves(cpu)) /* not all OF nodes populated */
-   return -ENOENT;
-
-   return 0;
-}
-
 static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
   struct cacheinfo *sib_leaf)
 {
@@ -202,6 +162,46 @@ static void cache_of_override_properties(unsigned int cpu)
cache_associativity(this_leaf);
}
 }
+
+static int cache_setup_of_node(unsigned int cpu)
+{
+   struct device_node *np;
+   struct cacheinfo *this_leaf;
+   struct device *cpu_dev = get_cpu_device(cpu);
+   struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+   unsigned int index = 0;
+
+   /* skip if of_node is already populated */
+   if (this_cpu_ci->info_list->of_node)
+   return 0;
+
+   if (!cpu_dev) {
+   pr_err("No cpu device for CPU %d\n", cpu);
+   return -ENODEV;
+   }
+   np = cpu_dev->of_node;
+   if (!np) {
+   pr_err("Failed to find cpu%d device node\n", cpu);
+   return -ENOENT;
+   }
+
+   while (index < cache_leaves(cpu)) {
+   this_leaf = this_cpu_ci->info_list + index;
+   if (this_leaf->level != 1)
+   np = of_find_next_cache_node(np);
+   else
+   np = of_node_get(np);/* cpu node itself */
+   if (!np)
+   break;
+   this_leaf->of_node = np;
+   index++;
+   }
+
+   if (index != cache_leaves(cpu)) /* not all OF nodes populated */
+   return -ENOENT;
+
+   return 0;
+}
 #else
 static void cache_of_override_properties(unsigned int cpu) { }
 static inline int cache_setup_of_node(unsigned int cpu) { return 0; }
-- 
2.13.6



[PATCH v9 05/12] ACPI/PPTT: Add Processor Properties Topology Table parsing

2018-05-11 Thread Jeremy Linton
ACPI 6.2 adds a new table, which describes how processing units
are related to each other in tree like fashion. Caches are
also sprinkled throughout the tree and describe the properties
of the caches in relation to other caches and processing units.

Add the code to parse the cache hierarchy and report the total
number of levels of cache for a given core using
acpi_find_last_cache_level() as well as fill out the individual
cores cache information with cache_setup_acpi() once the
cpu_cacheinfo structure has been populated by the arch specific
code.

An additional patch later in the set adds the ability to report
peers in the topology using find_acpi_cpu_topology()
to report a unique ID for each processing unit at a given level
in the tree. These unique id's can then be used to match related
processing units which exist as threads, within a given
package, etc.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Acked-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
---
 drivers/acpi/pptt.c  | 655 +++
 include/linux/acpi.h |   4 +
 2 files changed, 659 insertions(+)
 create mode 100644 drivers/acpi/pptt.c

diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
new file mode 100644
index ..e5ea1974d1e3
--- /dev/null
+++ b/drivers/acpi/pptt.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * pptt.c - parsing of Processor Properties Topology Table (PPTT)
+ *
+ * Copyright (C) 2018, ARM
+ *
+ * This file implements parsing of the Processor Properties Topology Table
+ * which is optionally used to describe the processor and cache topology.
+ * Due to the relative pointers used throughout the table, this doesn't
+ * leverage the existing subtable parsing in the kernel.
+ *
+ * The PPTT structure is an inverted tree, with each node potentially
+ * holding one or two inverted tree data structures describing
+ * the caches available at that level. Each cache structure optionally
+ * contains properties describing the cache at a given level which can be
+ * used to override hardware probed values.
+ */
+#define pr_fmt(fmt) "ACPI PPTT: " fmt
+
+#include 
+#include 
+#include 
+
+static struct acpi_subtable_header *fetch_pptt_subtable(struct 
acpi_table_header *table_hdr,
+   u32 pptt_ref)
+{
+   struct acpi_subtable_header *entry;
+
+   /* there isn't a subtable at reference 0 */
+   if (pptt_ref < sizeof(struct acpi_subtable_header))
+   return NULL;
+
+   if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length)
+   return NULL;
+
+   entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, pptt_ref);
+
+   if (entry->length == 0)
+   return NULL;
+
+   if (pptt_ref + entry->length > table_hdr->length)
+   return NULL;
+
+   return entry;
+}
+
+static struct acpi_pptt_processor *fetch_pptt_node(struct acpi_table_header 
*table_hdr,
+  u32 pptt_ref)
+{
+   return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr, 
pptt_ref);
+}
+
+static struct acpi_pptt_cache *fetch_pptt_cache(struct acpi_table_header 
*table_hdr,
+   u32 pptt_ref)
+{
+   return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr, 
pptt_ref);
+}
+
+static struct acpi_subtable_header *acpi_get_pptt_resource(struct 
acpi_table_header *table_hdr,
+  struct 
acpi_pptt_processor *node,
+  int resource)
+{
+   u32 *ref;
+
+   if (resource >= node->number_of_priv_resources)
+   return NULL;
+
+   ref = ACPI_ADD_PTR(u32, node, sizeof(struct acpi_pptt_processor));
+   ref += resource;
+
+   return fetch_pptt_subtable(table_hdr, *ref);
+}
+
+static inline bool acpi_pptt_match_type(int table_type, int type)
+{
+   return ((table_type & ACPI_PPTT_MASK_CACHE_TYPE) == type ||
+   table_type & ACPI_PPTT_CACHE_TYPE_UNIFIED & type);
+}
+
+/**
+ * acpi_pptt_walk_cache() - Attempt to find the requested acpi_pptt_cache
+ * @table_hdr: Pointer to the head of the PPTT table
+ * @local_level: passed res reflects this cache level
+ * @res: cache resource in the PPTT we want to walk
+ * @found: returns a pointer to the requested level if found
+ * @level: the requested cache level
+ * @type: the requested cache type
+ *
+ * Attempt to find a given cache level, while counting the max number
+ * of cache levels for the cache node.
+ *
+ * Given a pptt resource, verify that it is a cache node, then walk
+ * down each level of caches, 

[PATCH v9 05/12] ACPI/PPTT: Add Processor Properties Topology Table parsing

2018-05-11 Thread Jeremy Linton
ACPI 6.2 adds a new table, which describes how processing units
are related to each other in tree like fashion. Caches are
also sprinkled throughout the tree and describe the properties
of the caches in relation to other caches and processing units.

Add the code to parse the cache hierarchy and report the total
number of levels of cache for a given core using
acpi_find_last_cache_level() as well as fill out the individual
cores cache information with cache_setup_acpi() once the
cpu_cacheinfo structure has been populated by the arch specific
code.

An additional patch later in the set adds the ability to report
peers in the topology using find_acpi_cpu_topology()
to report a unique ID for each processing unit at a given level
in the tree. These unique id's can then be used to match related
processing units which exist as threads, within a given
package, etc.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Acked-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
---
 drivers/acpi/pptt.c  | 655 +++
 include/linux/acpi.h |   4 +
 2 files changed, 659 insertions(+)
 create mode 100644 drivers/acpi/pptt.c

diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
new file mode 100644
index ..e5ea1974d1e3
--- /dev/null
+++ b/drivers/acpi/pptt.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * pptt.c - parsing of Processor Properties Topology Table (PPTT)
+ *
+ * Copyright (C) 2018, ARM
+ *
+ * This file implements parsing of the Processor Properties Topology Table
+ * which is optionally used to describe the processor and cache topology.
+ * Due to the relative pointers used throughout the table, this doesn't
+ * leverage the existing subtable parsing in the kernel.
+ *
+ * The PPTT structure is an inverted tree, with each node potentially
+ * holding one or two inverted tree data structures describing
+ * the caches available at that level. Each cache structure optionally
+ * contains properties describing the cache at a given level which can be
+ * used to override hardware probed values.
+ */
+#define pr_fmt(fmt) "ACPI PPTT: " fmt
+
+#include 
+#include 
+#include 
+
+static struct acpi_subtable_header *fetch_pptt_subtable(struct 
acpi_table_header *table_hdr,
+   u32 pptt_ref)
+{
+   struct acpi_subtable_header *entry;
+
+   /* there isn't a subtable at reference 0 */
+   if (pptt_ref < sizeof(struct acpi_subtable_header))
+   return NULL;
+
+   if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length)
+   return NULL;
+
+   entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, pptt_ref);
+
+   if (entry->length == 0)
+   return NULL;
+
+   if (pptt_ref + entry->length > table_hdr->length)
+   return NULL;
+
+   return entry;
+}
+
+static struct acpi_pptt_processor *fetch_pptt_node(struct acpi_table_header 
*table_hdr,
+  u32 pptt_ref)
+{
+   return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr, 
pptt_ref);
+}
+
+static struct acpi_pptt_cache *fetch_pptt_cache(struct acpi_table_header 
*table_hdr,
+   u32 pptt_ref)
+{
+   return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr, 
pptt_ref);
+}
+
+static struct acpi_subtable_header *acpi_get_pptt_resource(struct 
acpi_table_header *table_hdr,
+  struct 
acpi_pptt_processor *node,
+  int resource)
+{
+   u32 *ref;
+
+   if (resource >= node->number_of_priv_resources)
+   return NULL;
+
+   ref = ACPI_ADD_PTR(u32, node, sizeof(struct acpi_pptt_processor));
+   ref += resource;
+
+   return fetch_pptt_subtable(table_hdr, *ref);
+}
+
+static inline bool acpi_pptt_match_type(int table_type, int type)
+{
+   return ((table_type & ACPI_PPTT_MASK_CACHE_TYPE) == type ||
+   table_type & ACPI_PPTT_CACHE_TYPE_UNIFIED & type);
+}
+
+/**
+ * acpi_pptt_walk_cache() - Attempt to find the requested acpi_pptt_cache
+ * @table_hdr: Pointer to the head of the PPTT table
+ * @local_level: passed res reflects this cache level
+ * @res: cache resource in the PPTT we want to walk
+ * @found: returns a pointer to the requested level if found
+ * @level: the requested cache level
+ * @type: the requested cache type
+ *
+ * Attempt to find a given cache level, while counting the max number
+ * of cache levels for the cache node.
+ *
+ * Given a pptt resource, verify that it is a cache node, then walk
+ * down each level of caches, counting how many levels are found
+ * as well as checking the cache type (icache, dcache, unified). If a
+ * level & type match, then we set found, and continue the search.
+ * 

[PATCH v9 07/12] drivers: base cacheinfo: Add support for ACPI based firmware tables

2018-05-11 Thread Jeremy Linton
Call ACPI cache parsing routines from base cacheinfo code if ACPI
is enabled. Also stub out cache_setup_acpi and acpi_find_last_cache_level
so that individual architectures can enable ACPI topology parsing.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Acked-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
---
 drivers/base/cacheinfo.c  | 14 ++
 include/linux/cacheinfo.h | 17 +
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 597aacb233fc..2880e2ab01f5 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -206,7 +206,7 @@ static inline bool cache_leaves_are_shared(struct cacheinfo 
*this_leaf,
   struct cacheinfo *sib_leaf)
 {
/*
-* For non-DT systems, assume unique level 1 cache, system-wide
+* For non-DT/ACPI systems, assume unique level 1 caches, system-wide
 * shared caches for all other levels. This will be used only if
 * arch specific code has not populated shared_cpu_map
 */
@@ -214,6 +214,11 @@ static inline bool cache_leaves_are_shared(struct 
cacheinfo *this_leaf,
 }
 #endif
 
+int __weak cache_setup_acpi(unsigned int cpu)
+{
+   return -ENOTSUPP;
+}
+
 static int cache_shared_cpu_map_setup(unsigned int cpu)
 {
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -227,8 +232,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
if (of_have_populated_dt())
ret = cache_setup_of_node(cpu);
else if (!acpi_disabled)
-   /* No cache property/hierarchy support yet in ACPI */
-   ret = -ENOTSUPP;
+   ret = cache_setup_acpi(cpu);
+
if (ret)
return ret;
 
@@ -279,7 +284,8 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
cpumask_clear_cpu(cpu, _leaf->shared_cpu_map);
cpumask_clear_cpu(sibling, _leaf->shared_cpu_map);
}
-   of_node_put(this_leaf->fw_token);
+   if (of_have_populated_dt())
+   of_node_put(this_leaf->fw_token);
}
 }
 
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 0c6f658054d2..89397e30e269 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -97,6 +97,23 @@ int func(unsigned int cpu)   
\
 struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu);
 int init_cache_level(unsigned int cpu);
 int populate_cache_leaves(unsigned int cpu);
+int cache_setup_acpi(unsigned int cpu);
+#ifndef CONFIG_ACPI
+/*
+ * acpi_find_last_cache_level is only called on ACPI enabled
+ * platforms using the PPTT for topology. This means that if
+ * the platform supports other firmware configuration methods
+ * we need to stub out the call when ACPI is disabled.
+ * ACPI enabled platforms not using PPTT won't be making calls
+ * to this function so we need not worry about them.
+ */
+static inline int acpi_find_last_cache_level(unsigned int cpu)
+{
+   return 0;
+}
+#else
+int acpi_find_last_cache_level(unsigned int cpu);
+#endif
 
 const struct attribute_group *cache_get_priv_group(struct cacheinfo 
*this_leaf);
 
-- 
2.13.6



[PATCH v9 07/12] drivers: base cacheinfo: Add support for ACPI based firmware tables

2018-05-11 Thread Jeremy Linton
Call ACPI cache parsing routines from base cacheinfo code if ACPI
is enabled. Also stub out cache_setup_acpi and acpi_find_last_cache_level
so that individual architectures can enable ACPI topology parsing.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Acked-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
---
 drivers/base/cacheinfo.c  | 14 ++
 include/linux/cacheinfo.h | 17 +
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 597aacb233fc..2880e2ab01f5 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -206,7 +206,7 @@ static inline bool cache_leaves_are_shared(struct cacheinfo 
*this_leaf,
   struct cacheinfo *sib_leaf)
 {
/*
-* For non-DT systems, assume unique level 1 cache, system-wide
+* For non-DT/ACPI systems, assume unique level 1 caches, system-wide
 * shared caches for all other levels. This will be used only if
 * arch specific code has not populated shared_cpu_map
 */
@@ -214,6 +214,11 @@ static inline bool cache_leaves_are_shared(struct 
cacheinfo *this_leaf,
 }
 #endif
 
+int __weak cache_setup_acpi(unsigned int cpu)
+{
+   return -ENOTSUPP;
+}
+
 static int cache_shared_cpu_map_setup(unsigned int cpu)
 {
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -227,8 +232,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
if (of_have_populated_dt())
ret = cache_setup_of_node(cpu);
else if (!acpi_disabled)
-   /* No cache property/hierarchy support yet in ACPI */
-   ret = -ENOTSUPP;
+   ret = cache_setup_acpi(cpu);
+
if (ret)
return ret;
 
@@ -279,7 +284,8 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
cpumask_clear_cpu(cpu, _leaf->shared_cpu_map);
cpumask_clear_cpu(sibling, _leaf->shared_cpu_map);
}
-   of_node_put(this_leaf->fw_token);
+   if (of_have_populated_dt())
+   of_node_put(this_leaf->fw_token);
}
 }
 
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 0c6f658054d2..89397e30e269 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -97,6 +97,23 @@ int func(unsigned int cpu)   
\
 struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu);
 int init_cache_level(unsigned int cpu);
 int populate_cache_leaves(unsigned int cpu);
+int cache_setup_acpi(unsigned int cpu);
+#ifndef CONFIG_ACPI
+/*
+ * acpi_find_last_cache_level is only called on ACPI enabled
+ * platforms using the PPTT for topology. This means that if
+ * the platform supports other firmware configuration methods
+ * we need to stub out the call when ACPI is disabled.
+ * ACPI enabled platforms not using PPTT won't be making calls
+ * to this function so we need not worry about them.
+ */
+static inline int acpi_find_last_cache_level(unsigned int cpu)
+{
+   return 0;
+}
+#else
+int acpi_find_last_cache_level(unsigned int cpu);
+#endif
 
 const struct attribute_group *cache_get_priv_group(struct cacheinfo 
*this_leaf);
 
-- 
2.13.6



[PATCH v9 09/12] arm64: topology: rename cluster_id

2018-05-11 Thread Jeremy Linton
The cluster concept isn't architecturally defined for arm64.
Lets match the name of the arm64 topology field to the kernel macro
that uses it.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Acked-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
Acked-by: Morten Rasmussen 
---
 arch/arm64/include/asm/topology.h |  4 ++--
 arch/arm64/kernel/topology.c  | 26 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/topology.h 
b/arch/arm64/include/asm/topology.h
index c4f2d50491eb..6b10459e6905 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -7,14 +7,14 @@
 struct cpu_topology {
int thread_id;
int core_id;
-   int cluster_id;
+   int package_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
 };
 
 extern struct cpu_topology cpu_topology[NR_CPUS];
 
-#define topology_physical_package_id(cpu)  (cpu_topology[cpu].cluster_id)
+#define topology_physical_package_id(cpu)  (cpu_topology[cpu].package_id)
 #define topology_core_id(cpu)  (cpu_topology[cpu].core_id)
 #define topology_core_cpumask(cpu) (_topology[cpu].core_sibling)
 #define topology_sibling_cpumask(cpu)  (_topology[cpu].thread_sibling)
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 21868530018e..dc18b1e53194 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -47,7 +47,7 @@ static int __init get_cpu_for_node(struct device_node *node)
return cpu;
 }
 
-static int __init parse_core(struct device_node *core, int cluster_id,
+static int __init parse_core(struct device_node *core, int package_id,
 int core_id)
 {
char name[10];
@@ -63,7 +63,7 @@ static int __init parse_core(struct device_node *core, int 
cluster_id,
leaf = false;
cpu = get_cpu_for_node(t);
if (cpu >= 0) {
-   cpu_topology[cpu].cluster_id = cluster_id;
+   cpu_topology[cpu].package_id = package_id;
cpu_topology[cpu].core_id = core_id;
cpu_topology[cpu].thread_id = i;
} else {
@@ -85,7 +85,7 @@ static int __init parse_core(struct device_node *core, int 
cluster_id,
return -EINVAL;
}
 
-   cpu_topology[cpu].cluster_id = cluster_id;
+   cpu_topology[cpu].package_id = package_id;
cpu_topology[cpu].core_id = core_id;
} else if (leaf) {
pr_err("%pOF: Can't get CPU for leaf core\n", core);
@@ -101,7 +101,7 @@ static int __init parse_cluster(struct device_node 
*cluster, int depth)
bool leaf = true;
bool has_cores = false;
struct device_node *c;
-   static int cluster_id __initdata;
+   static int package_id __initdata;
int core_id = 0;
int i, ret;
 
@@ -140,7 +140,7 @@ static int __init parse_cluster(struct device_node 
*cluster, int depth)
}
 
if (leaf) {
-   ret = parse_core(c, cluster_id, core_id++);
+   ret = parse_core(c, package_id, core_id++);
} else {
pr_err("%pOF: Non-leaf cluster with core %s\n",
   cluster, name);
@@ -158,7 +158,7 @@ static int __init parse_cluster(struct device_node 
*cluster, int depth)
pr_warn("%pOF: empty cluster\n", cluster);
 
if (leaf)
-   cluster_id++;
+   package_id++;
 
return 0;
 }
@@ -194,7 +194,7 @@ static int __init parse_dt_topology(void)
 * only mark cores described in the DT as possible.
 */
for_each_possible_cpu(cpu)
-   if (cpu_topology[cpu].cluster_id == -1)
+   if (cpu_topology[cpu].package_id == -1)
ret = -EINVAL;
 
 out_map:
@@ -224,7 +224,7 @@ static void update_siblings_masks(unsigned int cpuid)
for_each_possible_cpu(cpu) {
cpu_topo = _topology[cpu];
 
-   if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
+   if (cpuid_topo->package_id != cpu_topo->package_id)
continue;
 
cpumask_set_cpu(cpuid, _topo->core_sibling);
@@ -245,7 +245,7 @@ void store_cpu_topology(unsigned int cpuid)
struct cpu_topology *cpuid_topo = _topology[cpuid];
u64 mpidr;
 
-   if (cpuid_topo->cluster_id != -1)
+   if 

[PATCH v9 09/12] arm64: topology: rename cluster_id

2018-05-11 Thread Jeremy Linton
The cluster concept isn't architecturally defined for arm64.
Lets match the name of the arm64 topology field to the kernel macro
that uses it.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Acked-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
Acked-by: Morten Rasmussen 
---
 arch/arm64/include/asm/topology.h |  4 ++--
 arch/arm64/kernel/topology.c  | 26 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/topology.h 
b/arch/arm64/include/asm/topology.h
index c4f2d50491eb..6b10459e6905 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -7,14 +7,14 @@
 struct cpu_topology {
int thread_id;
int core_id;
-   int cluster_id;
+   int package_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
 };
 
 extern struct cpu_topology cpu_topology[NR_CPUS];
 
-#define topology_physical_package_id(cpu)  (cpu_topology[cpu].cluster_id)
+#define topology_physical_package_id(cpu)  (cpu_topology[cpu].package_id)
 #define topology_core_id(cpu)  (cpu_topology[cpu].core_id)
 #define topology_core_cpumask(cpu) (_topology[cpu].core_sibling)
 #define topology_sibling_cpumask(cpu)  (_topology[cpu].thread_sibling)
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 21868530018e..dc18b1e53194 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -47,7 +47,7 @@ static int __init get_cpu_for_node(struct device_node *node)
return cpu;
 }
 
-static int __init parse_core(struct device_node *core, int cluster_id,
+static int __init parse_core(struct device_node *core, int package_id,
 int core_id)
 {
char name[10];
@@ -63,7 +63,7 @@ static int __init parse_core(struct device_node *core, int 
cluster_id,
leaf = false;
cpu = get_cpu_for_node(t);
if (cpu >= 0) {
-   cpu_topology[cpu].cluster_id = cluster_id;
+   cpu_topology[cpu].package_id = package_id;
cpu_topology[cpu].core_id = core_id;
cpu_topology[cpu].thread_id = i;
} else {
@@ -85,7 +85,7 @@ static int __init parse_core(struct device_node *core, int 
cluster_id,
return -EINVAL;
}
 
-   cpu_topology[cpu].cluster_id = cluster_id;
+   cpu_topology[cpu].package_id = package_id;
cpu_topology[cpu].core_id = core_id;
} else if (leaf) {
pr_err("%pOF: Can't get CPU for leaf core\n", core);
@@ -101,7 +101,7 @@ static int __init parse_cluster(struct device_node 
*cluster, int depth)
bool leaf = true;
bool has_cores = false;
struct device_node *c;
-   static int cluster_id __initdata;
+   static int package_id __initdata;
int core_id = 0;
int i, ret;
 
@@ -140,7 +140,7 @@ static int __init parse_cluster(struct device_node 
*cluster, int depth)
}
 
if (leaf) {
-   ret = parse_core(c, cluster_id, core_id++);
+   ret = parse_core(c, package_id, core_id++);
} else {
pr_err("%pOF: Non-leaf cluster with core %s\n",
   cluster, name);
@@ -158,7 +158,7 @@ static int __init parse_cluster(struct device_node 
*cluster, int depth)
pr_warn("%pOF: empty cluster\n", cluster);
 
if (leaf)
-   cluster_id++;
+   package_id++;
 
return 0;
 }
@@ -194,7 +194,7 @@ static int __init parse_dt_topology(void)
 * only mark cores described in the DT as possible.
 */
for_each_possible_cpu(cpu)
-   if (cpu_topology[cpu].cluster_id == -1)
+   if (cpu_topology[cpu].package_id == -1)
ret = -EINVAL;
 
 out_map:
@@ -224,7 +224,7 @@ static void update_siblings_masks(unsigned int cpuid)
for_each_possible_cpu(cpu) {
cpu_topo = _topology[cpu];
 
-   if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
+   if (cpuid_topo->package_id != cpu_topo->package_id)
continue;
 
cpumask_set_cpu(cpuid, _topo->core_sibling);
@@ -245,7 +245,7 @@ void store_cpu_topology(unsigned int cpuid)
struct cpu_topology *cpuid_topo = _topology[cpuid];
u64 mpidr;
 
-   if (cpuid_topo->cluster_id != -1)
+   if (cpuid_topo->package_id != -1)
goto topology_populated;
 
mpidr = read_cpuid_mpidr();
@@ -259,19 +259,19 @@ void store_cpu_topology(unsigned int cpuid)
/* Multiprocessor system : 

[PATCH v9 08/12] arm64: Add support for ACPI based firmware tables

2018-05-11 Thread Jeremy Linton
The /sys cache entries should support ACPI/PPTT generated cache
topology information.  For arm64, if ACPI is enabled, determine
the max number of cache levels and populate them using the PPTT
table if one is available.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Reviewed-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
---
 arch/arm64/kernel/cacheinfo.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 380f2e2fbed5..0bf0a835122f 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see .
  */
 
+#include 
 #include 
 #include 
 
@@ -46,7 +47,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 
 static int __init_cache_level(unsigned int cpu)
 {
-   unsigned int ctype, level, leaves, of_level;
+   unsigned int ctype, level, leaves, fw_level;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) {
@@ -59,15 +60,19 @@ static int __init_cache_level(unsigned int cpu)
leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
}
 
-   of_level = of_find_last_cache_level(cpu);
-   if (level < of_level) {
+   if (acpi_disabled)
+   fw_level = of_find_last_cache_level(cpu);
+   else
+   fw_level = acpi_find_last_cache_level(cpu);
+
+   if (level < fw_level) {
/*
 * some external caches not specified in CLIDR_EL1
 * the information may be available in the device tree
 * only unified external caches are considered here
 */
-   leaves += (of_level - level);
-   level = of_level;
+   leaves += (fw_level - level);
+   level = fw_level;
}
 
this_cpu_ci->num_levels = level;
-- 
2.13.6



[PATCH v9 08/12] arm64: Add support for ACPI based firmware tables

2018-05-11 Thread Jeremy Linton
The /sys cache entries should support ACPI/PPTT generated cache
topology information.  For arm64, if ACPI is enabled, determine
the max number of cache levels and populate them using the PPTT
table if one is available.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Reviewed-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
---
 arch/arm64/kernel/cacheinfo.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 380f2e2fbed5..0bf0a835122f 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see .
  */
 
+#include 
 #include 
 #include 
 
@@ -46,7 +47,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 
 static int __init_cache_level(unsigned int cpu)
 {
-   unsigned int ctype, level, leaves, of_level;
+   unsigned int ctype, level, leaves, fw_level;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) {
@@ -59,15 +60,19 @@ static int __init_cache_level(unsigned int cpu)
leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
}
 
-   of_level = of_find_last_cache_level(cpu);
-   if (level < of_level) {
+   if (acpi_disabled)
+   fw_level = of_find_last_cache_level(cpu);
+   else
+   fw_level = acpi_find_last_cache_level(cpu);
+
+   if (level < fw_level) {
/*
 * some external caches not specified in CLIDR_EL1
 * the information may be available in the device tree
 * only unified external caches are considered here
 */
-   leaves += (of_level - level);
-   level = of_level;
+   leaves += (fw_level - level);
+   level = fw_level;
}
 
this_cpu_ci->num_levels = level;
-- 
2.13.6



[PATCH v9 10/12] arm64: topology: enable ACPI/PPTT based CPU topology

2018-05-11 Thread Jeremy Linton
Propagate the topology information from the PPTT tree to the
cpu_topology array. We can get the thread id and core_id by assuming
certain levels of the PPTT tree correspond to those concepts.
The package_id is flagged in the tree and can be found by calling
find_acpi_cpu_topology_package() which terminates
its search when it finds an ACPI node flagged as the physical
package. If the tree doesn't contain enough levels to represent
all of the requested levels then the root node will be returned
for all subsequent levels.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Acked-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
Acked-by: Morten Rasmussen 
---
 arch/arm64/kernel/topology.c | 45 +++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index dc18b1e53194..047d98e68502 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -11,6 +11,7 @@
  * for more details.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -22,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -296,6 +298,45 @@ static void __init reset_cpu_topology(void)
}
 }
 
+#ifdef CONFIG_ACPI
+/*
+ * Propagate the topology information of the processor_topology_node tree to 
the
+ * cpu_topology array.
+ */
+static int __init parse_acpi_topology(void)
+{
+   bool is_threaded;
+   int cpu, topology_id;
+
+   is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK;
+
+   for_each_possible_cpu(cpu) {
+   topology_id = find_acpi_cpu_topology(cpu, 0);
+   if (topology_id < 0)
+   return topology_id;
+
+   if (is_threaded) {
+   cpu_topology[cpu].thread_id = topology_id;
+   topology_id = find_acpi_cpu_topology(cpu, 1);
+   cpu_topology[cpu].core_id   = topology_id;
+   } else {
+   cpu_topology[cpu].thread_id  = -1;
+   cpu_topology[cpu].core_id= topology_id;
+   }
+   topology_id = find_acpi_cpu_topology_package(cpu);
+   cpu_topology[cpu].package_id = topology_id;
+   }
+
+   return 0;
+}
+
+#else
+static inline int __init parse_acpi_topology(void)
+{
+   return -EINVAL;
+}
+#endif
+
 void __init init_cpu_topology(void)
 {
reset_cpu_topology();
@@ -304,6 +345,8 @@ void __init init_cpu_topology(void)
 * Discard anything that was parsed if we hit an error so we
 * don't use partial information.
 */
-   if (of_have_populated_dt() && parse_dt_topology())
+   if (!acpi_disabled && parse_acpi_topology())
+   reset_cpu_topology();
+   else if (of_have_populated_dt() && parse_dt_topology())
reset_cpu_topology();
 }
-- 
2.13.6



[PATCH v9 10/12] arm64: topology: enable ACPI/PPTT based CPU topology

2018-05-11 Thread Jeremy Linton
Propagate the topology information from the PPTT tree to the
cpu_topology array. We can get the thread id and core_id by assuming
certain levels of the PPTT tree correspond to those concepts.
The package_id is flagged in the tree and can be found by calling
find_acpi_cpu_topology_package() which terminates
its search when it finds an ACPI node flagged as the physical
package. If the tree doesn't contain enough levels to represent
all of the requested levels then the root node will be returned
for all subsequent levels.

Signed-off-by: Jeremy Linton 
Tested-by: Ard Biesheuvel 
Tested-by: Vijaya Kumar K 
Tested-by: Xiongfeng Wang 
Tested-by: Tomasz Nowicki 
Acked-by: Sudeep Holla 
Acked-by: Ard Biesheuvel 
Acked-by: Morten Rasmussen 
---
 arch/arm64/kernel/topology.c | 45 +++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index dc18b1e53194..047d98e68502 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -11,6 +11,7 @@
  * for more details.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -22,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -296,6 +298,45 @@ static void __init reset_cpu_topology(void)
}
 }
 
+#ifdef CONFIG_ACPI
+/*
+ * Propagate the topology information of the processor_topology_node tree to 
the
+ * cpu_topology array.
+ */
+static int __init parse_acpi_topology(void)
+{
+   bool is_threaded;
+   int cpu, topology_id;
+
+   is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK;
+
+   for_each_possible_cpu(cpu) {
+   topology_id = find_acpi_cpu_topology(cpu, 0);
+   if (topology_id < 0)
+   return topology_id;
+
+   if (is_threaded) {
+   cpu_topology[cpu].thread_id = topology_id;
+   topology_id = find_acpi_cpu_topology(cpu, 1);
+   cpu_topology[cpu].core_id   = topology_id;
+   } else {
+   cpu_topology[cpu].thread_id  = -1;
+   cpu_topology[cpu].core_id= topology_id;
+   }
+   topology_id = find_acpi_cpu_topology_package(cpu);
+   cpu_topology[cpu].package_id = topology_id;
+   }
+
+   return 0;
+}
+
+#else
+static inline int __init parse_acpi_topology(void)
+{
+   return -EINVAL;
+}
+#endif
+
 void __init init_cpu_topology(void)
 {
reset_cpu_topology();
@@ -304,6 +345,8 @@ void __init init_cpu_topology(void)
 * Discard anything that was parsed if we hit an error so we
 * don't use partial information.
 */
-   if (of_have_populated_dt() && parse_dt_topology())
+   if (!acpi_disabled && parse_acpi_topology())
+   reset_cpu_topology();
+   else if (of_have_populated_dt() && parse_dt_topology())
reset_cpu_topology();
 }
-- 
2.13.6



  1   2   3   4   5   6   7   8   9   10   >