Re: [v4] clk: qoriq: Add support for the FMan clock

2015-05-06 Thread Stephen Boyd
On 04/16, Igal.Liberman wrote:
> From: Igal Liberman 
> 
> This patch depends on the following patches:
>   https://patchwork.ozlabs.org/patch/461151/
>   https://patchwork.ozlabs.org/patch/461155/
> 
> This patche is described by the following binding document update:
>   https://patchwork.ozlabs.org/patch/461166/
> 
> v4:   - Replaced "fsl,b4-device-config" with
> "fsl,b4860/b4420-device-config"
>   - Updated error messages
> 
> v3:   Updated commit message
> 
> v2:   - Added clock maintainers
>   - Cached FMan clock parent during initialization
>   - Register the clock after checking if the hardware exists
>   - updated error messages
> 
> Signed-off-by: Igal Liberman 
> ---
>  drivers/clk/clk-qoriq.c |  213 
> +++

If I try to compile this on ARM (the Kconfig for this file shows
that ARM is possible) then it fails with this error message:

  CC  drivers/clk/clk-qoriq.o
  drivers/clk/clk-qoriq.c:22:26:
  fatal error: asm/fsl_guts.h: No such file or directory
  compilation terminated.

>  1 file changed, 213 insertions(+)
> 
> diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c
> index cda90a9..871c6df 100644
> --- a/drivers/clk/clk-qoriq.c
> +++ b/drivers/clk/clk-qoriq.c
> +
> +static u8 get_fm_clk_parent(struct clk_hw *hw)
> +{
> + return hw->init->flags;
> +}

This is very confusing. How is flags the parent index? Please
don't abuse framework data structures. I'm actually thinking we
should replace hw->init with NULL during clk_register() to avoid
this kind of abuse...

> +
> +static const struct clk_ops fm_clk_ops = {
> + .get_parent = get_fm_clk_parent,
> +};
> +
> +static int get_fm_clk_idx(int fm_id, int *fm_clk_idx)
> +{
> + struct ccsr_guts __iomem *guts_regs = NULL;

Unnecessary initialization to NULL. Also, marking a structure as
__iomem is odd. Why do we need to use a struct to figure out
offsets for registers? Why not just use #defines? That would
probably also make it easy to avoid the asm include here.

> + struct device_node *guts;
> + uint32_t reg = 0;

s/uint32_t/u32/

Also unnecessary initialization.

> + int clk_src = 0;
> +
> + guts = of_find_matching_node(NULL, guts_device_ids);
> + if (!guts) {
> + pr_err("%s(): could not find GUTS node\n", __func__);
> + return -ENODEV;
> + }
> +
> + guts_regs = of_iomap(guts, 0);
> + of_node_put(guts);
> + if (!guts_regs) {
> + pr_err("%s(): ioremap of GUTS node failed\n", __func__);
> + return -ENODEV;
> + }
[...]
> +
> +static void __init fm_mux_init(struct device_node *np)
> +{
> + struct clk_init_data *init;
> + struct clk_hw *hw;
> + int count, i, ret, fm_id = 0, fm_clk_idx;
> + struct clk *clk;
> +
> + init = kmalloc((sizeof(struct clk_init_data)), GFP_KERNEL);

Please remove extra parens and do sizeof(*init) so that we don't
have to care about the type matching.

> + if (!init)
> + return;
> +
> + /* get the input clock source count */
> + count = of_property_count_strings(np, "clock-names");
> + if (count < 0) {
> + pr_err("%s(): %s: get clock count error\n",
> +__func__, np->name);
> + goto err_init;
> + }
> +
> + init->parent_names = kmalloc((sizeof(char *) * count), GFP_KERNEL);

Use kcalloc please

> + if (!init->parent_names)
> + goto err_init;
> +
> + for (i = 0; i < count; i++)
> + init->parent_names[i] = of_clk_get_parent_name(np, i);
> +
> + hw = kzalloc(sizeof(*hw), GFP_KERNEL);
> + if (!hw)
> + goto err_name;
> +
> + ret = of_property_read_string_index(np, "clock-output-names", 0,
> + &init->name);
> + if (ret) {
> + pr_err("%s(): %s: read clock names error\n",
> +__func__, np->name);
> + goto err_clk_hw;
> + }
> +
> + if (!strcmp(np->name, "fm1-clk-mux"))
> + fm_id = 1;
> +
> + ret = get_fm_clk_idx(fm_id, &fm_clk_idx);
> + if (ret)
> + goto err_clk_hw;
> +
> + init->ops = &fm_clk_ops;
> + init->num_parents = count;
> + /* Save clock source index */
> + init->flags = fm_clk_idx;

Don't do this.

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/corenet: enable eSDHC

2015-05-06 Thread Yangbo Lu
Signed-off-by: Yangbo Lu 
---
 arch/powerpc/configs/corenet32_smp_defconfig | 2 ++
 arch/powerpc/configs/corenet64_smp_defconfig | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/powerpc/configs/corenet32_smp_defconfig 
b/arch/powerpc/configs/corenet32_smp_defconfig
index ca7957b..17fb2e0 100644
--- a/arch/powerpc/configs/corenet32_smp_defconfig
+++ b/arch/powerpc/configs/corenet32_smp_defconfig
@@ -130,6 +130,8 @@ CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
 CONFIG_USB_STORAGE=y
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_ESDHC=y
 CONFIG_EDAC=y
 CONFIG_EDAC_MM_EDAC=y
 CONFIG_EDAC_MPC85XX=y
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig 
b/arch/powerpc/configs/corenet64_smp_defconfig
index 04737aa..4605a55 100644
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/arch/powerpc/configs/corenet64_smp_defconfig
@@ -112,6 +112,8 @@ CONFIG_USB_EHCI_FSL=y
 CONFIG_USB_STORAGE=y
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_ESDHC=y
 CONFIG_EDAC=y
 CONFIG_EDAC_MM_EDAC=y
 CONFIG_RTC_CLASS=y
-- 
2.1.0.27.g96db324

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 1/1] arm64: dts: Add the arasan sdhc nodes in apm-storm.dtsi.

2015-05-06 Thread Arnd Bergmann
On Wednesday 06 May 2015 10:41:07 Suman Tripathi wrote:
> >> @@ -533,6 +567,16 @@
> >> interrupts = <0x0 0x4f 0x4>;
> >> };
> >>
> >> +   sdhc0: sdhc@1c00 {
> >> +   device_type = "sdhc";
> >
> > device_type generally should not be used (there are a few exceptions).
> 
> Okay !!
> 

While we're at it, please change sdhc@1c00 to mmc@1c00.
Even though Linux does not care, we try to use the standard device
names for consistency.

Arnd
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v4] clk: qoriq: Add support for the FMan clock

2015-05-06 Thread Scott Wood
On Wed, 2015-05-06 at 00:02 -0700, Stephen Boyd wrote:
> On 04/16, Igal.Liberman wrote:
> > +static int get_fm_clk_idx(int fm_id, int *fm_clk_idx)
> > +{
> > +   struct ccsr_guts __iomem *guts_regs = NULL;
> 
> Unnecessary initialization to NULL. Also, marking a structure as
> __iomem is odd. Why do we need to use a struct to figure out
> offsets for registers? Why not just use #defines? That would
> probably also make it easy to avoid the asm include here.

Using a struct for registers is quite common:
scott@snotra:~/fsl/git/linux/upstream$ git grep struct|grep __iomem|wc -l
3005

It provides type-safety, and makes accessing the registers more natural.

> > +   struct device_node *guts;
> > +   uint32_t reg = 0;
> 
> s/uint32_t/u32/

Why?

> Also unnecessary initialization.

Given the if/else if/else if/... nature of how reg is initialized, this
seems like a useful and harmless way of making behavior predictable if
there is a bug.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 1/1] arm64: dts: Add the arasan sdhc nodes in apm-storm.dtsi.

2015-05-06 Thread Michal Simek
On 05/06/2015 09:31 AM, Arnd Bergmann wrote:
> On Wednesday 06 May 2015 10:41:07 Suman Tripathi wrote:
 @@ -533,6 +567,16 @@
 interrupts = <0x0 0x4f 0x4>;
 };

 +   sdhc0: sdhc@1c00 {
 +   device_type = "sdhc";
>>>
>>> device_type generally should not be used (there are a few exceptions).
>>
>> Okay !!
>>
> 
> While we're at it, please change sdhc@1c00 to mmc@1c00.
> Even though Linux does not care, we try to use the standard device
> names for consistency.

Do we have a list of these names somewhere?
Normally I do use ePARP - generic names recommendation but mmc or sdhci
are not listed there.
Both combination mmc@ or sdhci@ are used in the kernel.

On zynq and zynqmp we do use shdci@.

Thanks,
Michal



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 1/1] arm64: dts: Add the arasan sdhc nodes in apm-storm.dtsi.

2015-05-06 Thread Arnd Bergmann
On Wednesday 06 May 2015 09:45:15 Michal Simek wrote:
> On 05/06/2015 09:31 AM, Arnd Bergmann wrote:
> > On Wednesday 06 May 2015 10:41:07 Suman Tripathi wrote:
>  @@ -533,6 +567,16 @@
>  interrupts = <0x0 0x4f 0x4>;
>  };
> 
>  +   sdhc0: sdhc@1c00 {
>  +   device_type = "sdhc";
> >>>
> >>> device_type generally should not be used (there are a few exceptions).
> >>
> >> Okay !!
> >>
> > 
> > While we're at it, please change sdhc@1c00 to mmc@1c00.
> > Even though Linux does not care, we try to use the standard device
> > names for consistency.
> 
> Do we have a list of these names somewhere?
> Normally I do use ePARP - generic names recommendation but mmc or sdhci
> are not listed there.
> Both combination mmc@ or sdhci@ are used in the kernel.
> 
> On zynq and zynqmp we do use shdci@.
> 

Ah, I thought ePAPR listed mmc already. Using "sdhci" is a little too
specific here, since a lot of mmc hosts are not sdhci compliant, and
"sdhc" is completely wrong, because that identifies a specific card
type, but a host that supports SDHC cards will generally also work
with SD (less than 4GB) or SDXC (more than 48GB) cards.

Arnd
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH] mm/thp: Use new function to clear pmd before THP splitting

2015-05-06 Thread Aneesh Kumar K.V
"Kirill A. Shutemov"  writes:

> On Mon, May 04, 2015 at 10:59:16PM +0530, Aneesh Kumar K.V wrote:
>> Archs like ppc64 require pte_t * to remain stable in some code path.
>> They use local_irq_disable to prevent a parallel split. Generic code
>> clear pmd instead of marking it _PAGE_SPLITTING in code path
>> where we can afford to mark pmd none before splitting. Use a
>> variant of pmdp_splitting_clear_notify that arch can override.
>> 
>> Signed-off-by: Aneesh Kumar K.V 
>
> Sorry, I still try wrap my head around this problem.
>
> So, Power has __find_linux_pte_or_hugepte() which does lock-less lookup in
> page tables with local interrupts disabled. For huge pages it casts pmd_t
> to pte_t. Since format of pte_t is different from pmd_t we want to prevent
> transit from pmd pointing to page table to pmd pinging to huge page (and
> back) while interrupts are disabled.
>
> The complication for Power is that it doesn't do implicit IPI on tlb
> flush.
>

s/doesn't do/doesn't need to do/


> Is it correct?

that is correct. I will add more info to the commit message of the patch
I will end up doing.

>
> For THP, split_huge_page() and collapse sides are covered. This patch
> should address two cases of splitting PMD, but not compound page in
> current upstream.
>
> But I think there's still *big* problem for Power -- zap_huge_pmd().
>
> For instance: other CPU can shoot out a THP PMD with MADV_DONTNEED and
> fault in small pages instead. IIUC, for __find_linux_pte_or_hugepte(),
> it's equivalent of splitting.
>
> I don't see how this can be fixed without kick_all_cpus_sync() in all
> pmdp_clear_flush() on Power.
>


Yes we could run into issue with that. Thanks for catching this. Now i
am not sure whether we want to do the kick_all_cpus_sync in
pmdp_get_and_clear. We do use that function while updating huge pte. The
one i am looking at is change_huge_pmd. We don't need a IPI there
and we would really like to avoid the IPI. Any idea why we follow
the sequence of pmd_clear and set_pmd, instead of pmd_update there ?

I looked at code paths we are clearing pmd where we would not
require an IPI. Listing them

move_huge_pmd
do_huge_pmd_wp_page
migrate_misplace_transhuge_page
change_huge_pmd.

Of this IIUC change_huge_pmd may be called more frequently and hence we
may want to avoid doing kick_all_cpus_sync there ?

One way to fix that would be switch change_huge_pmd to pmd_update and
then we could do a kick_all_cpus_sync in pmdp_get_and_clear.

-aneesh

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2] mm: vmscan: do not throttle based on pfmemalloc reserves if node has no reclaimable pages

2015-05-06 Thread Vlastimil Babka

On 05/06/2015 12:09 AM, Nishanth Aravamudan wrote:

On 03.04.2015 [10:45:56 -0700], Nishanth Aravamudan wrote:

What I find somewhat worrying though is that we could potentially
break the pfmemalloc_watermark_ok() test in situations where
zone_reclaimable_pages(zone) == 0 is a transient situation (and not
a permanently allocated hugepage). In that case, the throttling is
supposed to help system recover, and we might be breaking that
ability with this patch, no?


Well, if it's transient, we'll skip it this time through, and once there
are reclaimable pages, we should notice it again.

I'm not familiar enough with this logic, so I'll read through the code
again soon to see if your concern is valid, as best I can.


In reviewing the code, I think that transiently unreclaimable zones will
lead to some higher direct reclaim rates and possible contention, but
shouldn't cause any major harm. The likelihood of that situation, as
well, in a non-reserved memory setup like the one I described, seems
exceedingly low.


OK, I guess when a reasonably configured system has nothing to reclaim, 
it's already busted and throttling won't change much.


Consider the patch Acked-by: Vlastimil Babka 


Thanks,
Nish



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/dts: Add 1588 timer node for eTSEC

2015-05-06 Thread Yangbo Lu
Add 1588 timer node in files:
arch/powerpc/boot/dts/bsc9131rdb.dtsi
arch/powerpc/boot/dts/bsc9132qds.dtsi
arch/powerpc/boot/dts/p1010rdb.dtsi
arch/powerpc/boot/dts/p1020rdb-pd.dts
arch/powerpc/boot/dts/p1021rdb-pc.dtsi
arch/powerpc/boot/dts/p1022ds.dtsi
arch/powerpc/boot/dts/p1025twr.dtsi
arch/powerpc/boot/dts/p2020rdb-pc.dtsi

Signed-off-by: Yangbo Lu 
---
 arch/powerpc/boot/dts/bsc9131rdb.dtsi  | 12 
 arch/powerpc/boot/dts/bsc9132qds.dtsi  | 12 
 arch/powerpc/boot/dts/p1010rdb.dtsi| 12 
 arch/powerpc/boot/dts/p1020rdb-pd.dts  | 12 
 arch/powerpc/boot/dts/p1021rdb-pc.dtsi | 12 
 arch/powerpc/boot/dts/p1022ds.dtsi | 12 
 arch/powerpc/boot/dts/p1025twr.dtsi| 12 
 arch/powerpc/boot/dts/p2020rdb-pc.dtsi | 15 +--
 8 files changed, 93 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/boot/dts/bsc9131rdb.dtsi 
b/arch/powerpc/boot/dts/bsc9131rdb.dtsi
index 45efcba..629cc03 100644
--- a/arch/powerpc/boot/dts/bsc9131rdb.dtsi
+++ b/arch/powerpc/boot/dts/bsc9131rdb.dtsi
@@ -80,6 +80,18 @@
status = "disabled";
};
 
+   ptp_clock@b0e00 {
+   compatible = "fsl,etsec-ptp";
+   reg = <0xb0e00 0xb0>;
+   interrupts = <68 2 0 0 69 2 0 0>;
+   fsl,tclk-period = <5>;
+   fsl,tmr-prsc= <2>;
+   fsl,tmr-add = <0xcccd>;
+   fsl,tmr-fiper1  = <0x3b9ac9fb>;
+   fsl,tmr-fiper2  = <0x00018696>;
+   fsl,max-adj = <24999>;
+   };
+
enet0: ethernet@b {
phy-handle = <&phy0>;
phy-connection-type = "rgmii-id";
diff --git a/arch/powerpc/boot/dts/bsc9132qds.dtsi 
b/arch/powerpc/boot/dts/bsc9132qds.dtsi
index af8e888..9d8d466 100644
--- a/arch/powerpc/boot/dts/bsc9132qds.dtsi
+++ b/arch/powerpc/boot/dts/bsc9132qds.dtsi
@@ -87,6 +87,18 @@
};
};
 
+   ptp_clock@b0e00 {
+   compatible = "fsl,etsec-ptp";
+   reg = <0xb0e00 0xb0>;
+   interrupts = <68 2 0 0 69 2 0 0>;
+   fsl,tclk-period = <5>;
+   fsl,tmr-prsc= <2>;
+   fsl,tmr-add = <0xcccd>;
+   fsl,tmr-fiper1  = <0x3b9ac9fb>;
+   fsl,tmr-fiper2  = <0x00018696>;
+   fsl,max-adj = <24999>;
+   };
+
enet0: ethernet@b {
phy-handle = <&phy0>;
tbi-handle = <&tbi0>;
diff --git a/arch/powerpc/boot/dts/p1010rdb.dtsi 
b/arch/powerpc/boot/dts/p1010rdb.dtsi
index ea534ef..a454cfe 100644
--- a/arch/powerpc/boot/dts/p1010rdb.dtsi
+++ b/arch/powerpc/boot/dts/p1010rdb.dtsi
@@ -186,6 +186,18 @@
};
};
 
+   ptp_clock@b0e00 {
+   compatible = "fsl,etsec-ptp";
+   reg = <0xb0e00 0xb0>;
+   interrupts = <68 2 0 0 69 2 0 0>;
+   fsl,tclk-period = <10>;
+   fsl,tmr-prsc= <2>;
+   fsl,tmr-add = <0x8016>;
+   fsl,tmr-fiper1  = <0x3b9ac9f6>;
+   fsl,tmr-fiper2  = <0x00018696>;
+   fsl,max-adj = <1>;
+   };
+
enet0: ethernet@b {
phy-handle = <&phy0>;
phy-connection-type = "rgmii-id";
diff --git a/arch/powerpc/boot/dts/p1020rdb-pd.dts 
b/arch/powerpc/boot/dts/p1020rdb-pd.dts
index 987017e..2d45195 100644
--- a/arch/powerpc/boot/dts/p1020rdb-pd.dts
+++ b/arch/powerpc/boot/dts/p1020rdb-pd.dts
@@ -225,6 +225,18 @@
};
};
 
+   ptp_clock@b0e00 {
+   compatible = "fsl,etsec-ptp";
+   reg = <0xb0e00 0xb0>;
+   interrupts = <68 2 0 0 69 2 0 0>;
+   fsl,tclk-period = <10>;
+   fsl,tmr-prsc= <2>;
+   fsl,tmr-add = <0x8016>;
+   fsl,tmr-fiper1  = <0x3b9ac9f6>;
+   fsl,tmr-fiper2  = <0x00018696>;
+   fsl,max-adj = <1>;
+   };
+
enet0: ethernet@b {
fixed-link = <1 1 1000 0 0>;
phy-connection-type = "rgmii-id";
diff --git a/arch/powerpc/boot/dts/p1021rdb-pc.dtsi 
b/arch/powerpc/boot/dts/p1021rdb-pc.dtsi
index d6274c5..3a2c12c 100644
--- a/arch/powerpc/boot/dts/p1021rdb-pc.dtsi
+++ b/arch/powerpc/boot/dts/p1021rdb-pc.dtsi
@@ -224,6 +224,18 @@
};
};
 
+   ptp_clock@b0e00 {
+   compatible = "fsl,etsec-ptp";
+   reg = <0xb0e00 0xb0>;
+   interrupts = <68 2 0 0 69 2 0 0>;
+   fsl,tclk-period = <10>;
+   fsl,tmr-prsc= <2>;
+   fsl,tmr-add = <0x8016>;
+   fsl,tmr-fiper1  = <0x3b9ac9f6>;
+   fsl,tmr-fiper2  = <0x00018696>;
+   fsl,max-adj = <1999

Re: [PATCH v5 1/1] arm64: dts: Add the arasan sdhc nodes in apm-storm.dtsi.

2015-05-06 Thread Ulf Hansson
On 5 May 2015 at 12:34, Ulf Hansson  wrote:
> On 5 May 2015 at 11:17, Suman Tripathi  wrote:
>> This patch adds the arasan sdhc nodes to reuse the of-arasan
>> driver for APM X-Gene SoC.
>>
>> Signed-off-by: Suman Tripathi 
>
> I consider this one acked by Arnd, due to:
> http://www.spinics.net/lists/arm-kernel/msg415634.html
>
> Thus applied to my mmc tree.

According to follow up comments, I have dropped this patch from my tree for now.

Kind regards
Uffe
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/3] kvm/x86: report guest steal time in host

2015-05-06 Thread Naveen N. Rao
Report guest steal time in host task statistics. On x86, this is just
the scheduler run_delay.

Signed-off-by: Naveen N. Rao 
---
 arch/x86/kvm/x86.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0ee725f..737b0e4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2094,6 +2094,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 
vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
vcpu->arch.st.steal.version += 2;
+   current->gstime += vcpu->arch.st.accum_steal;
vcpu->arch.st.accum_steal = 0;
 
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
-- 
2.3.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/3] kvm/powerpc: report guest steal time in host

2015-05-06 Thread Naveen N. Rao
On powerpc, kvm tracks both the guest steal time as well as the time
when guest was idle and this gets sent in to the guest through DTL. The
guest accounts these entries as either steal time or idle time based on
the last running task. Since the true guest idle status is not visible
to the host, we can't accurately expose the guest steal time in the
host.

However, tracking the guest vcpu cede status can get us a reasonable
(within 5% variation) vcpu steal time since guest vcpus cede the
processor on entering the idle task. To do this, we introduce a new
field ceded_st in kvm_vcpu_arch structure to accurately track the guest
vcpu cede status (this is needed since the existing ceded field is
modified before we can use it). During DTL entry creation, we check this
flag and account the time as stolen if the guest vcpu had not ceded.

Signed-off-by: Naveen N. Rao 
---
Tests show that the steal time being reported in the host with this approach is
around 5% higher than the steal time shown in guest. I'd be interested to know
if there are ways to achieve better accounting of the guest steal time in host.

 arch/powerpc/include/asm/kvm_host.h | 1 +
 arch/powerpc/kernel/asm-offsets.c   | 1 +
 arch/powerpc/kvm/book3s_hv.c| 2 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +++
 4 files changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 8ef0512..7db48c4 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -655,6 +655,7 @@ struct kvm_vcpu_arch {
u64 busy_preempt;
 
u32 emul_inst;
+   u8 ceded_st;
 #endif
 };
 
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 4717859..765c7c4 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -521,6 +521,7 @@ int main(void)
DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, 
arch.pending_exceptions));
DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
+   DEFINE(VCPU_CEDED_ST, offsetof(struct kvm_vcpu, arch.ceded_st));
DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index de74756..ad7c0e3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -545,6 +545,8 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
spin_lock_irq(&vcpu->arch.tbacct_lock);
stolen += vcpu->arch.busy_stolen;
vcpu->arch.busy_stolen = 0;
+   if (!vcpu->arch.ceded_st && stolen)
+   (pid_task(vcpu->pid, PIDTYPE_PID))->gstime += stolen;
spin_unlock_irq(&vcpu->arch.tbacct_lock);
if (!dt || !vpa)
return;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6cbf163..28f304e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -873,6 +873,7 @@ deliver_guest_interrupt:
 fast_guest_return:
li  r0,0
stb r0,VCPU_CEDED(r4)   /* cancel cede */
+   stb r0,VCPU_CEDED_ST(r4)/* cancel cede */
mtspr   SPRN_HSRR0,r10
mtspr   SPRN_HSRR1,r11
 
@@ -1889,6 +1890,7 @@ _GLOBAL(kvmppc_h_cede)
std r11,VCPU_MSR(r3)
li  r0,1
stb r0,VCPU_CEDED(r3)
+   stb r0,VCPU_CEDED_ST(r3)
sync/* order setting ceded vs. testing prodded */
lbz r5,VCPU_PRODDED(r3)
cmpwi   r5,0
@@ -2052,6 +2054,7 @@ kvm_cede_prodded:
stb r0,VCPU_PRODDED(r3)
sync/* order testing prodded vs. clearing ceded */
stb r0,VCPU_CEDED(r3)
+   stb r0,VCPU_CEDED_ST(r3)
li  r3,H_SUCCESS
blr
 
-- 
2.3.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 0/3] Report guest steal time in host

2015-05-06 Thread Naveen N. Rao
Steal time accounts the time duration during which a guest vcpu was ready to
run, but was not scheduled to run by the hypervisor. This is particularly
relevant in cloud environment where customers would want to use this as an
indicator that their guests are being throttled. However, as it stands today,
guest steal time information is not visible from the hypervisor.

For cloud service providers, this is problematic since they would want to
overcommit cpu resources to achieve optimum resource utilization while at the
same time ensuring guests are not throttled. It is useful for service providers
to have access to the guest steal time data so that they can base their
overcommit/guest packing decisions on this. Higher guest steal time can be used
as a trigger to change how the guests are scheduled, or even migrate guests out
of a system.

This patchset attempts to make the guest steal times available in the host.
This is achieved by introducing a new field in per-task statistics
(/proc//stat and /proc//task//stat) to accumulate per-vcpu steal
time. Programs (such as pidstat) can then be enhanced to report this
information on a per-thread basis.

This should also work for nested virtualization: steal time information for the
guest is readable via /proc/stat, while steal time information for guests
hosted on this hypervisor is readable via /proc//task/*/stat.

Also, mpstat always shows steal time information for current (self) guest on a
per-cpu basis. And pidstat can be enhanced to report the same for the hosted
guests on a per-vcpu basis.

As an example:

Guest (self) steal time information using mpstat:


mpstat is run from within the guest.

[root@rhel7-img ~]# mpstat -P ALL 1
Linux 3.19.0nnr (rhel7-img) 04/15/2015  _ppc64_ (4 CPU)

03:13:23 PM  CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
%guest  %gnice   %idle
03:13:24 PM  all   12.250.001.250.001.002.25   13.75
0.000.00   69.50
03:13:24 PM0   46.530.000.000.000.004.95   45.54
0.000.002.97
03:13:24 PM10.000.000.000.000.004.043.03
0.000.00   92.93
03:13:24 PM20.000.000.000.003.960.992.97
0.000.00   92.08
03:13:24 PM33.000.004.000.000.000.004.00
0.000.00   89.00

03:13:24 PM  CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
%guest  %gnice   %idle
03:13:25 PM  all   12.590.000.000.000.000.25   12.35
0.000.00   74.81
03:13:25 PM0   50.000.000.000.000.000.98   49.02
0.000.000.00
03:13:25 PM10.980.000.000.000.000.000.00
0.000.00   99.02
03:13:25 PM20.000.000.000.000.000.000.00
0.000.00  100.00
03:13:25 PM30.000.000.000.000.000.000.00
0.000.00  100.00

03:13:25 PM  CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
%guest  %gnice   %idle
03:13:26 PM  all   12.990.000.000.000.250.00   12.75
0.000.00   74.02
03:13:26 PM0   51.960.000.000.000.000.00   48.04
0.000.000.00
03:13:26 PM10.000.000.000.000.000.000.00
0.000.00  100.00
03:13:26 PM20.000.000.000.000.980.002.94
0.000.00   96.08
03:13:26 PM30.000.000.000.000.000.000.00
0.000.00  100.00

03:13:26 PM  CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
%guest  %gnice   %idle
03:13:27 PM  all   12.530.001.000.250.000.25   12.03
0.000.00   73.93
03:13:27 PM0   51.020.000.000.000.000.00   48.98
0.000.000.00
03:13:27 PM10.000.004.040.000.000.000.00
0.000.00   95.96
03:13:27 PM20.000.000.000.000.000.000.00
0.000.00  100.00
03:13:27 PM30.000.000.000.000.000.000.00
0.000.00  100.00

Average: CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
%guest  %gnice   %idle
Average: all   12.910.000.540.010.040.12   12.39
0.000.00   74.00
Average:   0   51.360.000.030.000.030.26   48.27
0.000.000.05
Average:   10.020.001.540.020.020.150.36
0.000.00   97.89
Average:   20.000.000.520.000.090.020.36
0.000.00   99.02
Average:   30.050.000.070.000.020.090.34
0.000.00   99.43

Steal time information for hosted guests in host using (locally modified) 
pidstat:
-

pidstat is being run in the host.

[naveen@xx sysstat]$ ./pid

[PATCH 1/3] procfs: add guest steal time in /proc//stat

2015-05-06 Thread Naveen N. Rao
Introduce a field in /proc//stat to expose guest steal time.

Signed-off-by: Naveen N. Rao 
---
 fs/proc/array.c   | 6 ++
 include/linux/sched.h | 7 +++
 kernel/fork.c | 2 +-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 1295a00..d86f00e 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -363,6 +363,7 @@ static int do_task_stat(struct seq_file *m, struct 
pid_namespace *ns,
unsigned long rsslim = 0;
char tcomm[sizeof(task->comm)];
unsigned long flags;
+   cputime_t gstime;
 
state = *get_task_state(task);
vsize = eip = esp = 0;
@@ -382,6 +383,7 @@ static int do_task_stat(struct seq_file *m, struct 
pid_namespace *ns,
sigemptyset(&sigcatch);
cutime = cstime = utime = stime = 0;
cgtime = gtime = 0;
+   gstime = 0;
 
if (lock_task_sighand(task, &flags)) {
struct signal_struct *sig = task->signal;
@@ -410,6 +412,7 @@ static int do_task_stat(struct seq_file *m, struct 
pid_namespace *ns,
min_flt += t->min_flt;
maj_flt += t->maj_flt;
gtime += task_gtime(t);
+   gstime += task_gstime(t);
} while_each_thread(task, t);
 
min_flt += sig->min_flt;
@@ -432,6 +435,7 @@ static int do_task_stat(struct seq_file *m, struct 
pid_namespace *ns,
maj_flt = task->maj_flt;
task_cputime_adjusted(task, &utime, &stime);
gtime = task_gtime(task);
+   gstime = task_gstime(task);
}
 
/* scale priority and nice values from timeslices to -20..20 */
@@ -505,6 +509,8 @@ static int do_task_stat(struct seq_file *m, struct 
pid_namespace *ns,
else
seq_put_decimal_ll(m, ' ', 0);
 
+   seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gstime));
+
seq_putc(m, '\n');
if (mm)
mmput(mm);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0eabab9..cb57954 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1429,6 +1429,7 @@ struct task_struct {
 
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
+   cputime_t gstime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct cputime prev_cputime;
 #endif
@@ -1955,6 +1956,12 @@ static inline cputime_t task_gtime(struct task_struct *t)
return t->gtime;
 }
 #endif
+
+static inline cputime_t task_gstime(struct task_struct *t)
+{
+   return t->gstime;
+}
+
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, 
cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t 
*ut, cputime_t *st);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index cf65139..529ebe5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1293,7 +1293,7 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
 
init_sigpending(&p->pending);
 
-   p->utime = p->stime = p->gtime = 0;
+   p->utime = p->stime = p->gtime = p->gstime = 0;
p->utimescaled = p->stimescaled = 0;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
p->prev_cputime.utime = p->prev_cputime.stime = 0;
-- 
2.3.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 0/3] Report guest steal time in host

2015-05-06 Thread Naveen N. Rao
Arrgh! Sorry about the headers. Please ignore this set. Will repost in a 
separate thread.


- Naveen


On 2015/05/06 04:28PM, Naveen N Rao wrote:
> Steal time accounts the time duration during which a guest vcpu was ready to
> run, but was not scheduled to run by the hypervisor. This is particularly
> relevant in cloud environment where customers would want to use this as an
> indicator that their guests are being throttled. However, as it stands today,
> guest steal time information is not visible from the hypervisor.
> 
> For cloud service providers, this is problematic since they would want to
> overcommit cpu resources to achieve optimum resource utilization while at the
> same time ensuring guests are not throttled. It is useful for service 
> providers
> to have access to the guest steal time data so that they can base their
> overcommit/guest packing decisions on this. Higher guest steal time can be 
> used
> as a trigger to change how the guests are scheduled, or even migrate guests 
> out
> of a system.
> 
> This patchset attempts to make the guest steal times available in the host.
> This is achieved by introducing a new field in per-task statistics
> (/proc//stat and /proc//task//stat) to accumulate per-vcpu 
> steal
> time. Programs (such as pidstat) can then be enhanced to report this
> information on a per-thread basis.
> 
> This should also work for nested virtualization: steal time information for 
> the
> guest is readable via /proc/stat, while steal time information for guests
> hosted on this hypervisor is readable via /proc//task/*/stat.
> 
> Also, mpstat always shows steal time information for current (self) guest on a
> per-cpu basis. And pidstat can be enhanced to report the same for the hosted
> guests on a per-vcpu basis.
> 
> As an example:
> 
> Guest (self) steal time information using mpstat:
> 
> 
> mpstat is run from within the guest.
> 
> [root@rhel7-img ~]# mpstat -P ALL 1
> Linux 3.19.0nnr (rhel7-img)   04/15/2015  _ppc64_ (4 CPU)
> 
> 03:13:23 PM  CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
> %guest  %gnice   %idle
> 03:13:24 PM  all   12.250.001.250.001.002.25   13.75
> 0.000.00   69.50
> 03:13:24 PM0   46.530.000.000.000.004.95   45.54
> 0.000.002.97
> 03:13:24 PM10.000.000.000.000.004.043.03
> 0.000.00   92.93
> 03:13:24 PM20.000.000.000.003.960.992.97
> 0.000.00   92.08
> 03:13:24 PM33.000.004.000.000.000.004.00
> 0.000.00   89.00
> 
> 03:13:24 PM  CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
> %guest  %gnice   %idle
> 03:13:25 PM  all   12.590.000.000.000.000.25   12.35
> 0.000.00   74.81
> 03:13:25 PM0   50.000.000.000.000.000.98   49.02
> 0.000.000.00
> 03:13:25 PM10.980.000.000.000.000.000.00
> 0.000.00   99.02
> 03:13:25 PM20.000.000.000.000.000.000.00
> 0.000.00  100.00
> 03:13:25 PM30.000.000.000.000.000.000.00
> 0.000.00  100.00
> 
> 03:13:25 PM  CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
> %guest  %gnice   %idle
> 03:13:26 PM  all   12.990.000.000.000.250.00   12.75
> 0.000.00   74.02
> 03:13:26 PM0   51.960.000.000.000.000.00   48.04
> 0.000.000.00
> 03:13:26 PM10.000.000.000.000.000.000.00
> 0.000.00  100.00
> 03:13:26 PM20.000.000.000.000.980.002.94
> 0.000.00   96.08
> 03:13:26 PM30.000.000.000.000.000.000.00
> 0.000.00  100.00
> 
> 03:13:26 PM  CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
> %guest  %gnice   %idle
> 03:13:27 PM  all   12.530.001.000.250.000.25   12.03
> 0.000.00   73.93
> 03:13:27 PM0   51.020.000.000.000.000.00   48.98
> 0.000.000.00
> 03:13:27 PM10.000.004.040.000.000.000.00
> 0.000.00   95.96
> 03:13:27 PM20.000.000.000.000.000.000.00
> 0.000.00  100.00
> 03:13:27 PM30.000.000.000.000.000.000.00
> 0.000.00  100.00
> 
> Average: CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
> %guest  %gnice   %idle
> Average: all   12.910.000.540.010.040.12   12.39
> 0.000.00   74.00
> Average:   0   51.360.000.030.000.030.26   48.27
> 0.000.000.05
> Average:   10.020.001.540.020.020.150.36
> 0.000.00   97.89
> Average:   20.000.000.520.000.090.020.36   

[PATCH 2/3] kvm/x86: report guest steal time in host

2015-05-06 Thread Naveen N. Rao
Report guest steal time in host task statistics. On x86, this is just
the scheduler run_delay.

Signed-off-by: Naveen N. Rao 
---
 arch/x86/kvm/x86.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c73efcd..7107b7d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2128,6 +2128,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 
vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
vcpu->arch.st.steal.version += 2;
+   current->gstime += vcpu->arch.st.accum_steal;
vcpu->arch.st.accum_steal = 0;
 
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
-- 
2.3.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/3] kvm/powerpc: report guest steal time in host

2015-05-06 Thread Naveen N. Rao
On powerpc, kvm tracks both the guest steal time as well as the time
when guest was idle and this gets sent in to the guest through DTL. The
guest accounts these entries as either steal time or idle time based on
the last running task. Since the true guest idle status is not visible
to the host, we can't accurately expose the guest steal time in the
host.

However, tracking the guest vcpu cede status can get us a reasonable
(within 5% variation) vcpu steal time since guest vcpus cede the
processor on entering the idle task. To do this, we introduce a new
field ceded_st in kvm_vcpu_arch structure to accurately track the guest
vcpu cede status (this is needed since the existing ceded field is
modified before we can use it). During DTL entry creation, we check this
flag and account the time as stolen if the guest vcpu had not ceded.

Signed-off-by: Naveen N. Rao 
---
Tests show that the steal time being reported in the host with this approach is
around 5% higher than the steal time shown in guest. I'd be interested to know
if there are ways to achieve better accounting of the guest steal time in host.

Thanks!
- Naveen

 arch/powerpc/include/asm/kvm_host.h | 1 +
 arch/powerpc/kernel/asm-offsets.c   | 1 +
 arch/powerpc/kvm/book3s_hv.c| 2 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +++
 4 files changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index a193a13..48cafd6 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -661,6 +661,7 @@ struct kvm_vcpu_arch {
u64 busy_preempt;
 
u32 emul_inst;
+   u8 ceded_st;
 #endif
 
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 0034b6b..7c11c84 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -534,6 +534,7 @@ int main(void)
DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, 
arch.pending_exceptions));
DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
+   DEFINE(VCPU_CEDED_ST, offsetof(struct kvm_vcpu, arch.ceded_st));
DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 48d3c5d..7a7e3ab 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -565,6 +565,8 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
spin_lock_irq(&vcpu->arch.tbacct_lock);
stolen += vcpu->arch.busy_stolen;
vcpu->arch.busy_stolen = 0;
+   if (!vcpu->arch.ceded_st && stolen)
+   (pid_task(vcpu->pid, PIDTYPE_PID))->gstime += stolen;
spin_unlock_irq(&vcpu->arch.tbacct_lock);
if (!dt || !vpa)
return;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 4d70df2..80efc31 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -924,6 +924,7 @@ deliver_guest_interrupt:
 fast_guest_return:
li  r0,0
stb r0,VCPU_CEDED(r4)   /* cancel cede */
+   stb r0,VCPU_CEDED_ST(r4)/* cancel cede */
mtspr   SPRN_HSRR0,r10
mtspr   SPRN_HSRR1,r11
 
@@ -2059,6 +2060,7 @@ _GLOBAL(kvmppc_h_cede)/* r3 = vcpu pointer, 
r11 = msr, r13 = paca */
std r11,VCPU_MSR(r3)
li  r0,1
stb r0,VCPU_CEDED(r3)
+   stb r0,VCPU_CEDED_ST(r3)
sync/* order setting ceded vs. testing prodded */
lbz r5,VCPU_PRODDED(r3)
cmpwi   r5,0
@@ -2266,6 +2268,7 @@ kvm_cede_prodded:
stb r0,VCPU_PRODDED(r3)
sync/* order testing prodded vs. clearing ceded */
stb r0,VCPU_CEDED(r3)
+   stb r0,VCPU_CEDED_ST(r3)
li  r3,H_SUCCESS
blr
 
-- 
2.3.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/3] procfs: add guest steal time in /proc//stat

2015-05-06 Thread Naveen N. Rao
Introduce a field in /proc//stat to expose guest steal time.

Signed-off-by: Naveen N. Rao 
---
 fs/proc/array.c   | 6 ++
 include/linux/sched.h | 7 +++
 kernel/fork.c | 2 +-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index fd02a9e..ad8e616 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -381,6 +381,7 @@ static int do_task_stat(struct seq_file *m, struct 
pid_namespace *ns,
unsigned long rsslim = 0;
char tcomm[sizeof(task->comm)];
unsigned long flags;
+   cputime_t gstime;
 
state = *get_task_state(task);
vsize = eip = esp = 0;
@@ -400,6 +401,7 @@ static int do_task_stat(struct seq_file *m, struct 
pid_namespace *ns,
sigemptyset(&sigcatch);
cutime = cstime = utime = stime = 0;
cgtime = gtime = 0;
+   gstime = 0;
 
if (lock_task_sighand(task, &flags)) {
struct signal_struct *sig = task->signal;
@@ -428,6 +430,7 @@ static int do_task_stat(struct seq_file *m, struct 
pid_namespace *ns,
min_flt += t->min_flt;
maj_flt += t->maj_flt;
gtime += task_gtime(t);
+   gstime += task_gstime(t);
} while_each_thread(task, t);
 
min_flt += sig->min_flt;
@@ -450,6 +453,7 @@ static int do_task_stat(struct seq_file *m, struct 
pid_namespace *ns,
maj_flt = task->maj_flt;
task_cputime_adjusted(task, &utime, &stime);
gtime = task_gtime(task);
+   gstime = task_gstime(task);
}
 
/* scale priority and nice values from timeslices to -20..20 */
@@ -523,6 +527,8 @@ static int do_task_stat(struct seq_file *m, struct 
pid_namespace *ns,
else
seq_put_decimal_ll(m, ' ', 0);
 
+   seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gstime));
+
seq_putc(m, '\n');
if (mm)
mmput(mm);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e61..e28f869 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1430,6 +1430,7 @@ struct task_struct {
 
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
+   cputime_t gstime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct cputime prev_cputime;
 #endif
@@ -1956,6 +1957,12 @@ static inline cputime_t task_gtime(struct task_struct *t)
return t->gtime;
 }
 #endif
+
+static inline cputime_t task_gstime(struct task_struct *t)
+{
+   return t->gstime;
+}
+
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, 
cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t 
*ut, cputime_t *st);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 03c1eaa..edf4ffb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1339,7 +1339,7 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
 
init_sigpending(&p->pending);
 
-   p->utime = p->stime = p->gtime = 0;
+   p->utime = p->stime = p->gtime = p->gstime = 0;
p->utimescaled = p->stimescaled = 0;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
p->prev_cputime.utime = p->prev_cputime.stime = 0;
-- 
2.3.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 0/3] Report guest steal time in host

2015-05-06 Thread Naveen N. Rao
Steal time accounts the time duration during which a guest vcpu was ready to
run, but was not scheduled to run by the hypervisor. This is particularly
relevant in cloud environment where customers would want to use this as an
indicator that their guests are being throttled. However, as it stands today,
guest steal time information is not visible from the hypervisor.

For cloud service providers, this is problematic since they would want to
overcommit cpu resources to achieve optimum resource utilization while at the
same time ensuring guests are not throttled. It is useful for service providers
to have access to the guest steal time data so that they can base their
overcommit/guest packing decisions on this. Higher guest steal time can be used
as a trigger to change how the guests are scheduled, or even migrate guests out
of a system.

This patchset attempts to make the guest steal times available in the host.
This is achieved by introducing a new field in per-task statistics
(/proc//stat and /proc//task//stat) to accumulate per-vcpu steal
time. Programs (such as pidstat) can then be enhanced to report this
information on a per-thread basis.

This should also work for nested virtualization: steal time information for the
guest is readable via /proc/stat, while steal time information for guests
hosted on this hypervisor is readable via /proc//task/*/stat.

Also, mpstat always shows steal time information for current (self) guest on a
per-cpu basis. And pidstat can be enhanced to report the same for the hosted
guests on a per-vcpu basis.

As an example:

Guest (self) steal time information using mpstat:


mpstat is run from within the guest.

[root@rhel7-img ~]# mpstat -P ALL 1
Linux 3.19.0nnr (rhel7-img) 04/15/2015  _ppc64_ (4 CPU)

03:13:23 PM  CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
%guest  %gnice   %idle
03:13:24 PM  all   12.250.001.250.001.002.25   13.75
0.000.00   69.50
03:13:24 PM0   46.530.000.000.000.004.95   45.54
0.000.002.97
03:13:24 PM10.000.000.000.000.004.043.03
0.000.00   92.93
03:13:24 PM20.000.000.000.003.960.992.97
0.000.00   92.08
03:13:24 PM33.000.004.000.000.000.004.00
0.000.00   89.00

03:13:24 PM  CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
%guest  %gnice   %idle
03:13:25 PM  all   12.590.000.000.000.000.25   12.35
0.000.00   74.81
03:13:25 PM0   50.000.000.000.000.000.98   49.02
0.000.000.00
03:13:25 PM10.980.000.000.000.000.000.00
0.000.00   99.02
03:13:25 PM20.000.000.000.000.000.000.00
0.000.00  100.00
03:13:25 PM30.000.000.000.000.000.000.00
0.000.00  100.00

03:13:25 PM  CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
%guest  %gnice   %idle
03:13:26 PM  all   12.990.000.000.000.250.00   12.75
0.000.00   74.02
03:13:26 PM0   51.960.000.000.000.000.00   48.04
0.000.000.00
03:13:26 PM10.000.000.000.000.000.000.00
0.000.00  100.00
03:13:26 PM20.000.000.000.000.980.002.94
0.000.00   96.08
03:13:26 PM30.000.000.000.000.000.000.00
0.000.00  100.00

03:13:26 PM  CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
%guest  %gnice   %idle
03:13:27 PM  all   12.530.001.000.250.000.25   12.03
0.000.00   73.93
03:13:27 PM0   51.020.000.000.000.000.00   48.98
0.000.000.00
03:13:27 PM10.000.004.040.000.000.000.00
0.000.00   95.96
03:13:27 PM20.000.000.000.000.000.000.00
0.000.00  100.00
03:13:27 PM30.000.000.000.000.000.000.00
0.000.00  100.00

Average: CPU%usr   %nice%sys %iowait%irq   %soft  %steal  
%guest  %gnice   %idle
Average: all   12.910.000.540.010.040.12   12.39
0.000.00   74.00
Average:   0   51.360.000.030.000.030.26   48.27
0.000.000.05
Average:   10.020.001.540.020.020.150.36
0.000.00   97.89
Average:   20.000.000.520.000.090.020.36
0.000.00   99.02
Average:   30.050.000.070.000.020.090.34
0.000.00   99.43

Steal time information for hosted guests in host using (locally modified) 
pidstat:
-

pidstat is being run in the host.

[naveen@xx sysstat]$ ./pid

Re: No HDMI Audio with Radeon HD7750 on Acube Sam460ex AMCC powerpc 460ex board

2015-05-06 Thread Julian Margetson

git bisect good
38aef1549b18539eaecd804383a6ccb6588a9ce1 is the first bad commit
commit 38aef1549b18539eaecd804383a6ccb6588a9ce1
Author: Alex Deucher 
Date:   Tue Apr 7 10:20:49 2015 -0400

drm/radeon: only enable audio streams if the monitor supports it

Selectively enable which packets we send based on monitor caps.

Signed-off-by: Alex Deucher 

Cc: sta...@vger.kernel.org

:04 04 e27f98a4a15d2f30baf3ea92420112874b0b1c34 
17af34604734b63ce30cfa3637fa1fa1fa1a38f4 M  drivers


Problems with HDMI audio connection continue.
Kernel 4.0.0 will not boot with HDMI connected only DVI. HD6670 & HD7750 
tested .

Kernel 4.1.0-rc1 same. HD7750 tested .
Kernel 4.1.0-rc2 will only boot if monitor connected with DVI to VGA 
adapter . HD7750 tested.


U-Boot 2010.06.05a (Jan 30 2012 - 16:06:56)

CPU:   AMCC PowerPC 460EX Rev. B at 1155 MHz (PLB=231 OPB=115 EBC=115)
   No Security/Kasumi support
   Bootstrap Option H - Boot ROM Location I2C (Addr 0x52)
   Internal PCI arbiter enabled
   32 kB I-Cache 32 kB D-Cache
Board: Sam460ex, PCIe 4x + PCIe 1x
I2C:   ready
DRAM:  2 GiB (ECC not enabled, 462 MHz, CL4)
PCI:   Bus Dev VenId DevId Class Int
00  04  1095  3512  0104  00
00  06  126f  0501  0380  00
PCIE0: successfully set as root-complex
03  00  1412  1724  0401  ff
02  00  1b21  1080  0604  00
PCIE1: successfully set as root-complex
05  00  1002  683f  0300  ff
Net:   ppc_4xx_eth0
FPGA:  Revision 03 (2010-10-07)
SM502: found
VGA:   1
VESA:  OK

[2.752985] Generic non-volatile memory driver v1.1
[2.758430] Linux agpgart interface v0.103
[2.762867] [drm] Initialized drm 1.1.0 20060810
[2.767774] [drm] radeon kernel modesetting enabled.
[2.773908] [drm] initializing kernel modesetting (VERDE 0x1002:0x683F 
0x1545:0x7750).
[2.782084] [drm] register mmio base: 0xe9000
[2.786859] [drm] register mmio size: 262144
[3.123010] ATOM BIOS: C44501
[3.126284] radeon 0001:81:00.0: VRAM: 1024M 0x - 
0x3FFF (1024M used)
[3.135213] radeon 0001:81:00.0: GTT: 1024M 0x4000 - 
0x7FFF
[3.142897] [drm] Detected VRAM RAM=1024M, BAR=256M
[3.147791] [drm] RAM width 128bits DDR
[3.151836] [TTM] Zone  kernel: Available graphics memory: 379234 kiB
[3.158325] [TTM] Zone highmem: Available graphics memory: 1034594 kiB
[3.164877] [TTM] Initializing pool allocator
[3.169271] [TTM] Initializing DMA pool allocator
[3.174101] [drm] radeon: 1024M of VRAM memory ready
[3.179107] [drm] radeon: 1024M of GTT memory ready.
[3.184146] [drm] Loading verde Microcode
[3.188210] [drm] Internal thermal controller with fan control
[3.194379] [drm] probing gen 2 caps for device aaa1:bed1 = 18cc41/0
[3.248227] [drm] radeon: dpm initialized
[3.252458] [drm] GART: num cpu pages 262144, num gpu pages 262144
[3.264041] [drm] probing gen 2 caps for device aaa1:bed1 = 18cc41/0
[3.298301] [drm] PCIE GART of 1024M enabled (table at 0x00277000).
[3.305645] radeon 0001:81:00.0: WB enabled
[3.309885] radeon 0001:81:00.0: fence driver on ring 0 use gpu addr 
0x4c00 and cpu addr 0xffc01c00
[3.320010] radeon 0001:81:00.0: fence driver on ring 1 use gpu addr 
0x4c04 and cpu addr 0xffc01c04
[3.330130] radeon 0001:81:00.0: fence driver on ring 2 use gpu addr 
0x4c08 and cpu addr 0xffc01c08
[3.340252] radeon 0001:81:00.0: fence driver on ring 3 use gpu addr 
0x4c0c and cpu addr 0xffc01c0c
[3.350374] radeon 0001:81:00.0: fence driver on ring 4 use gpu addr 
0x4c10 and cpu addr 0xffc01c10
[3.382285] radeon 0001:81:00.0: fence driver on ring 5 use gpu addr 
0x00075a18 and cpu addr 0xf90b5a18
[3.392430] [drm] Supports vblank timestamp caching Rev 2 (21.10.2013).
[3.399069] [drm] Driver supports precise vblank timestamp query.
[3.405190] radeon 0001:81:00.0: radeon: MSI limited to 32-bit
[3.411071] ppc4xx_setup_msi_irqs: fail allocating msi interrupt
[3.417195] [drm] radeon: irq initialized.
[4.175889] [drm:r600_ring_test] *ERROR* radeon: ring 0 test failed 
(scratch(0x850C)=0xCAFEDEAD)
[4.184750] radeon 0001:81:00.0: disabling GPU acceleration
[4.388914] [drm] Radeon Display Connectors
[4.393501] [drm] Connector 0:
[4.396624] [drm]   HDMI-A-1
[4.399626] [drm]   HPD4
[4.402211] [drm]   DDC: 0x6570 0x6570 0x6574 0x6574 0x6578 0x6578 0x657c 
0x657c
[4.409664] [drm]   Encoders:
[4.412683] [drm] DFP1: INTERNAL_UNIPHY2
[4.416983] [drm] Connector 1:
[4.420056] [drm]   DVI-I-1
[4.422868] [drm]   HPD2
[4.425423] [drm]   DDC: 0x6560 0x6560 0x6564 0x6564 0x6568 0x6568 0x656c 
0x656c
[4.432841] [drm]   Encoders:
[4.435828] [drm] DFP2: INTERNAL_UNIPHY
[4.440029] [drm] CRT1: INTERNAL_KLDSCP_DAC1
[4.560536] [drm] fb mappable at 0x80478000
[4.564756] [drm] vra

Re: [PATCH v5 1/1] arm64: dts: Add the arasan sdhc nodes in apm-storm.dtsi.

2015-05-06 Thread Suman Tripathi
On Wed, May 6, 2015 at 2:10 PM, Arnd Bergmann  wrote:

> On Wednesday 06 May 2015 09:45:15 Michal Simek wrote:
> > On 05/06/2015 09:31 AM, Arnd Bergmann wrote:
> > > On Wednesday 06 May 2015 10:41:07 Suman Tripathi wrote:
> >  @@ -533,6 +567,16 @@
> >  interrupts = <0x0 0x4f 0x4>;
> >  };
> > 
> >  +   sdhc0: sdhc@1c00 {
> >  +   device_type = "sdhc";
> > >>>
> > >>> device_type generally should not be used (there are a few
> exceptions).
> > >>
> > >> Okay !!
> > >>
> > >
> > > While we're at it, please change sdhc@1c00 to mmc@1c00.
> > > Even though Linux does not care, we try to use the standard device
> > > names for consistency.
> >
> > Do we have a list of these names somewhere?
> > Normally I do use ePARP - generic names recommendation but mmc or sdhci
> > are not listed there.
> > Both combination mmc@ or sdhci@ are used in the kernel.
> >
> > On zynq and zynqmp we do use shdci@.
> >
>
> Ah, I thought ePAPR listed mmc already. Using "sdhci" is a little too
> specific here, since a lot of mmc hosts are not sdhci compliant, and
> "sdhc" is completely wrong, because that identifies a specific card
> type, but a host that supports SDHC cards will generally also work
> with SD (less than 4GB) or SDXC (more than 48GB) cards.
>

Agree on this . Will change it.

>
> Arnd
>



-- 
Thanks,
with regards,
Suman Tripathi
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 1/1] arm64: dts: Add the arasan sdhc nodes in apm-storm.dtsi.

2015-05-06 Thread Suman Tripathi
Hi Arnd,

On Wed, May 6, 2015 at 5:27 PM, Suman Tripathi  wrote:

>
>
> On Wed, May 6, 2015 at 2:10 PM, Arnd Bergmann  wrote:
>
>> On Wednesday 06 May 2015 09:45:15 Michal Simek wrote:
>> > On 05/06/2015 09:31 AM, Arnd Bergmann wrote:
>> > > On Wednesday 06 May 2015 10:41:07 Suman Tripathi wrote:
>> >  @@ -533,6 +567,16 @@
>> >  interrupts = <0x0 0x4f 0x4>;
>> >  };
>> > 
>> >  +   sdhc0: sdhc@1c00 {
>> >  +   device_type = "sdhc";
>> > >>>
>> > >>> device_type generally should not be used (there are a few
>> exceptions).
>> > >>
>> > >> Okay !!
>> > >>
>> > >
>> > > While we're at it, please change sdhc@1c00 to mmc@1c00.
>> > > Even though Linux does not care, we try to use the standard device
>> > > names for consistency.
>> >
>> > Do we have a list of these names somewhere?
>> > Normally I do use ePARP - generic names recommendation but mmc or sdhci
>> > are not listed there.
>> > Both combination mmc@ or sdhci@ are used in the kernel.
>> >
>> > On zynq and zynqmp we do use shdci@.
>> >
>>
>> Ah, I thought ePAPR listed mmc already. Using "sdhci" is a little too
>> specific here, since a lot of mmc hosts are not sdhci compliant, and
>> "sdhc" is completely wrong, because that identifies a specific card
>> type, but a host that supports SDHC cards will generally also work
>> with SD (less than 4GB) or SDXC (more than 48GB) cards.
>>
>
> Agree on this . Will change it.
>

One more point as we are resuing the arasan driver,  is it compulsory to
use the name used in binding info
for arasan ?? It is sdhci for arasan.


>
>> Arnd
>>
>
>
>
> --
> Thanks,
> with regards,
> Suman Tripathi
>



-- 
Thanks,
with regards,
Suman Tripathi
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 3/3] kvm/powerpc: report guest steal time in host

2015-05-06 Thread Christian Borntraeger
Am 06.05.2015 um 13:56 schrieb Naveen N. Rao:
> On powerpc, kvm tracks both the guest steal time as well as the time
> when guest was idle and this gets sent in to the guest through DTL. The
> guest accounts these entries as either steal time or idle time based on
> the last running task. Since the true guest idle status is not visible
> to the host, we can't accurately expose the guest steal time in the
> host.
> 
> However, tracking the guest vcpu cede status can get us a reasonable
> (within 5% variation) vcpu steal time since guest vcpus cede the
> processor on entering the idle task. To do this, we introduce a new
> field ceded_st in kvm_vcpu_arch structure to accurately track the guest
> vcpu cede status (this is needed since the existing ceded field is
> modified before we can use it). During DTL entry creation, we check this
> flag and account the time as stolen if the guest vcpu had not ceded.

I think this is more or less a question about the semantic:

What would happen if you use  current->sched_info.run_delay like x86 also
on power? How far are the numbers away? My feeling is, that the semantics
of "steal time" inside the guest is somewhat different on each platform. 

This brings me to a 2nd question:
Do you need to match the host view of guest steal time with the guest view
or do we want to have a host view that translates as "this is the time that
the guest was runnable but we were too busy to schedule him"?
For the former x86 has the best solution, as the host tells the guest its
understanding of steal - so both match. For the latter we actually try to
give guest steal a meaning in the host context  - the overload.
Would /proc//schedstat value 2 (time spent waiting on a runqueue)
meet your requirements from the cover-letter?

Christian


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 1/1] arm64: dts: Add the arasan sdhc nodes in apm-storm.dtsi.

2015-05-06 Thread Michal Simek
On 05/06/2015 10:40 AM, Arnd Bergmann wrote:
> On Wednesday 06 May 2015 09:45:15 Michal Simek wrote:
>> On 05/06/2015 09:31 AM, Arnd Bergmann wrote:
>>> On Wednesday 06 May 2015 10:41:07 Suman Tripathi wrote:
>> @@ -533,6 +567,16 @@
>> interrupts = <0x0 0x4f 0x4>;
>> };
>>
>> +   sdhc0: sdhc@1c00 {
>> +   device_type = "sdhc";
>
> device_type generally should not be used (there are a few exceptions).

 Okay !!

>>>
>>> While we're at it, please change sdhc@1c00 to mmc@1c00.
>>> Even though Linux does not care, we try to use the standard device
>>> names for consistency.
>>
>> Do we have a list of these names somewhere?
>> Normally I do use ePARP - generic names recommendation but mmc or sdhci
>> are not listed there.
>> Both combination mmc@ or sdhci@ are used in the kernel.
>>
>> On zynq and zynqmp we do use shdci@.
>>
> 
> Ah, I thought ePAPR listed mmc already. Using "sdhci" is a little too
> specific here, since a lot of mmc hosts are not sdhci compliant, and
> "sdhc" is completely wrong, because that identifies a specific card
> type, but a host that supports SDHC cards will generally also work
> with SD (less than 4GB) or SDXC (more than 48GB) cards.

Yes "sdhc" is completely wrong.

Based on our datasheet(also version used on Zynq and ZynqMP) this IP is
compliant with SD HC 3.00, SDIO 3.0, SD MC 3.01 SD MCS 1.01, MMC 4.51.
Not sure about the version which they use.
Also not sure which spec the IP should have to be able to say that we
can use sdhci name. Do you have exact SPEC name?

Thanks,
Michal

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v6 0/2] Add SDHCI support for APM X-Gene SoC using ARASAN SDHCI controller.

2015-05-06 Thread Suman Tripathi
This patch adds the SDHCI support for APM X-Gene SoC using ARASAN SDHCI 
controller.

v1 change:
 * Use the CONFIG_ARM64_DMA_HAS_IOMMU for dma-mapping.

v2 change:
 * Drop the IOMMU support and switching to PIO mode for arasan.
   controller integrated inside APM X-Gene SoC.

v3 change:
 * Change the sdhci-of-arasan.c to support arasan4.9a.
 * Add quirks for arasan4.9a.

v4 change:
 * Cleanup the Documentation and dts.

v5 change:
 * Rebase the dts files.
 * Drop patch 2 and 3 as it is applied.

v6 change:
 * Clean the unrequired properties from dts.
 * Rename sdhc to sdhci.
 * support to disable timming using capability register read.

Signed-off-by: Suman Tripathi 
---

Suman Tripathi (2):
  arm64: dts: Add the arasan sdhci nodes in apm-storm.dtsi.
  mmc: sdhci: Add support to disable SDR104/SDR50/DDR50 based on
capability register 0.

 arch/arm64/boot/dts/apm/apm-mustang.dts |  4 +++
 arch/arm64/boot/dts/apm/apm-storm.dtsi  | 43 +
 drivers/mmc/host/sdhci.c|  3 ++-
 3 files changed, 49 insertions(+), 1 deletion(-)

--
1.8.2.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v6 1/2] arm64: dts: Add the arasan sdhci nodes in apm-storm.dtsi.

2015-05-06 Thread Suman Tripathi
This patch adds the arasan sdhci nodes to reuse the of-arasan
driver for APM X-Gene SoC.

Signed-off-by: Suman Tripathi 
---
 arch/arm64/boot/dts/apm/apm-mustang.dts |  4 +++
 arch/arm64/boot/dts/apm/apm-storm.dtsi  | 43 +
 2 files changed, 47 insertions(+)

diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts 
b/arch/arm64/boot/dts/apm/apm-mustang.dts
index 83578e7..7ccd517 100644
--- a/arch/arm64/boot/dts/apm/apm-mustang.dts
+++ b/arch/arm64/boot/dts/apm/apm-mustang.dts
@@ -52,3 +52,7 @@
 &xgenet {
status = "ok";
 };
+
+&sdhci0 {
+   status = "ok";
+};
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi 
b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index c8d3e0e..b5d2698 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -145,6 +145,40 @@
clock-output-names = "socplldiv2";
};

+   ahbclk: ahbclk@1f2ac000 {
+   compatible = "apm,xgene-device-clock";
+   #clock-cells = <1>;
+   clocks = <&socplldiv2 0>;
+   reg = <0x0 0x1f2ac000 0x0 0x1000
+   0x0 0x1700 0x0 0x2000>;
+   reg-names = "csr-reg", "div-reg";
+   csr-offset = <0x0>;
+   csr-mask = <0x1>;
+   enable-offset = <0x8>;
+   enable-mask = <0x1>;
+   divider-offset = <0x164>;
+   divider-width = <0x5>;
+   divider-shift = <0x0>;
+   clock-output-names = "ahbclk";
+   };
+
+   sdioclk: sdioclk@1f2ac000 {
+   compatible = "apm,xgene-device-clock";
+   #clock-cells = <1>;
+   clocks = <&socplldiv2 0>;
+   reg = <0x0 0x1f2ac000 0x0 0x1000
+   0x0 0x1700 0x0 0x2000>;
+   reg-names = "csr-reg", "div-reg";
+   csr-offset = <0x0>;
+   csr-mask = <0x2>;
+   enable-offset = <0x8>;
+   enable-mask = <0x2>;
+   divider-offset = <0x178>;
+   divider-width = <0x8>;
+   divider-shift = <0x0>;
+   clock-output-names = "sdioclk";
+   };
+
qmlclk: qmlclk {
compatible = "apm,xgene-device-clock";
#clock-cells = <1>;
@@ -533,6 +567,15 @@
interrupts = <0x0 0x4f 0x4>;
};

+   sdhci0: sdhci@1c00 {
+   compatible = "arasan,sdhci-4.9a";
+   reg = <0x0 0x1c00 0x0 0x100>;
+   interrupts = <0x0 0x49 0x4>;
+   dma-coherent;
+   clock-names = "clk_xin", "clk_ahb";
+   clocks = <&sdioclk 0>, <&ahbclk 0>;
+   };
+
phy1: phy@1f21a000 {
compatible = "apm,xgene-phy";
reg = <0x0 0x1f21a000 0x0 0x100>;
--
1.8.2.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v6 2/2] mmc: sdhci: Add support to disable SDR104/SDR50/DDR50 based on capability register 0.

2015-05-06 Thread Suman Tripathi
The sdhci framework disables SDR104/SDR50/DDR50 based on only quirk.
This patch adds the support to disable SDR104/SDR50/DDR50 based on
reading the capability register 0.

Signed-off-by: Suman Tripathi 
---
 drivers/mmc/host/sdhci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index c80287a..e024c64 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -3199,7 +3199,8 @@ int sdhci_add_host(struct sdhci_host *host)
}
}

-   if (host->quirks2 & SDHCI_QUIRK2_NO_1_8_V)
+   if (host->quirks2 & SDHCI_QUIRK2_NO_1_8_V ||
+   !(caps[0] & SDHCI_CAN_VDD_180))
caps[1] &= ~(SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_SDR50 |
   SDHCI_SUPPORT_DDR50);

--
1.8.2.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 02/12] KVM: define common __KVM_GUESTDBG_USE_SW/HW_BP values

2015-05-06 Thread Alex Bennée
Currently x86, powerpc and soon arm64 use the same two architecture
specific bits for guest debug support for software and hardware
breakpoints. This makes the shared values explicit while leaving the
gate open for another architecture to use some other value if they
really really want to.

Signed-off-by: Alex Bennée 
Reviewed-by: Andrew Jones 

diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index ab4d473..1731569 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -310,8 +310,8 @@ struct kvm_guest_debug_arch {
  * and upper 16 bits are architecture specific. Architecture specific defines
  * that ioctl is for setting hardware breakpoint or software breakpoint.
  */
-#define KVM_GUESTDBG_USE_SW_BP 0x0001
-#define KVM_GUESTDBG_USE_HW_BP 0x0002
+#define KVM_GUESTDBG_USE_SW_BP __KVM_GUESTDBG_USE_SW_BP
+#define KVM_GUESTDBG_USE_HW_BP __KVM_GUESTDBG_USE_HW_BP
 
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index d7dcef5..1438202 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -250,8 +250,8 @@ struct kvm_debug_exit_arch {
__u64 dr7;
 };
 
-#define KVM_GUESTDBG_USE_SW_BP 0x0001
-#define KVM_GUESTDBG_USE_HW_BP 0x0002
+#define KVM_GUESTDBG_USE_SW_BP __KVM_GUESTDBG_USE_SW_BP
+#define KVM_GUESTDBG_USE_HW_BP __KVM_GUESTDBG_USE_HW_BP
 #define KVM_GUESTDBG_INJECT_DB 0x0004
 #define KVM_GUESTDBG_INJECT_BP 0x0008
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 70ac641..3b6252e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -570,8 +570,16 @@ struct kvm_s390_irq_state {
 
 /* for KVM_SET_GUEST_DEBUG */
 
-#define KVM_GUESTDBG_ENABLE0x0001
-#define KVM_GUESTDBG_SINGLESTEP0x0002
+#define KVM_GUESTDBG_ENABLE(1 << 0)
+#define KVM_GUESTDBG_SINGLESTEP(1 << 1)
+
+/*
+ * Architecture specific stuff uses the top 16 bits of the field,
+ * however there is some shared commonality for the common cases
+ */
+#define __KVM_GUESTDBG_USE_SW_BP   (1 << 16)
+#define __KVM_GUESTDBG_USE_HW_BP   (1 << 17)
+
 
 struct kvm_guest_debug {
__u32 control;
-- 
2.3.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 3/3] kvm/powerpc: report guest steal time in host

2015-05-06 Thread Naveen N. Rao
On 2015/05/06 02:46PM, Christian Borntraeger wrote:
> Am 06.05.2015 um 13:56 schrieb Naveen N. Rao:
> > On powerpc, kvm tracks both the guest steal time as well as the time
> > when guest was idle and this gets sent in to the guest through DTL. The
> > guest accounts these entries as either steal time or idle time based on
> > the last running task. Since the true guest idle status is not visible
> > to the host, we can't accurately expose the guest steal time in the
> > host.
> > 
> > However, tracking the guest vcpu cede status can get us a reasonable
> > (within 5% variation) vcpu steal time since guest vcpus cede the
> > processor on entering the idle task. To do this, we introduce a new
> > field ceded_st in kvm_vcpu_arch structure to accurately track the guest
> > vcpu cede status (this is needed since the existing ceded field is
> > modified before we can use it). During DTL entry creation, we check this
> > flag and account the time as stolen if the guest vcpu had not ceded.
> 
> I think this is more or less a question about the semantic:
> 
> What would happen if you use  current->sched_info.run_delay like x86 also
> on power? How far are the numbers away?

The numbers were quite off and didn't quite make sense.

> My feeling is, that the semantics
> of "steal time" inside the guest is somewhat different on each platform. 
> 
> This brings me to a 2nd question:
> Do you need to match the host view of guest steal time with the guest view
> or do we want to have a host view that translates as "this is the time that
> the guest was runnable but we were too busy to schedule him"?

Very good point. This is probably good enough for our purpose and I'd 
like to think my current patchset does something similar for powerpc. We 
don't report the exact steal time as seen from within the guest, but a 
close approximation of it. We count all time that a vcpu was not-idle as 
steal. This includes time we were doing something in the host on behalf 
of the vcpu as well as time when we were just doing something else. I 
don't know if we can separate these two or if that would be desirable.  
The scheduler statistics don't seem to accurately reflect this on ppc.

> For the former x86 has the best solution, as the host tells the guest its
> understanding of steal - so both match. For the latter we actually try to
> give guest steal a meaning in the host context  - the overload.
> Would /proc//schedstat value 2 (time spent waiting on a runqueue)
> meet your requirements from the cover-letter?

This looks to be the same as sched_info.run_delay, which doesn't seem to 
reflect the wait on the runqueue. I will recheck this on ppc tomorrow.

As an aside, do you happen to know if /proc//schedstat accurately 
reports the "overload" on s390?


Thanks!
- Naveen

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCHv2 00/10] add 842 hw compression for PowerNV platform

2015-05-06 Thread Dan Streetman
IBM PowerPC processors starting at version P7+ contain a NX coprocessor
that provides various hw-accelerated functions, one of which is memory
compression to the IBM "842" compression format.  This NX-842 coprocessor
is already supported on the pSeries platform, by the nx-842.c driver and
the crypto compression interface at crypto/842.c.  This patch set adds
support for NX-842 on the PowerNV (Non-Virtualized) platform, as well as
adding a full software 842 compression/decompression implementation.

Quick summary of changes: the current 842 crypto compression interface uses
only the 842 hardware on pSeries platforms, and can handle only page-sized
and page-aligned uncompressed buffers.  These patches add a full software
842 impementation, change the crypto/ directory 842 interface to a
software only implementation, add a 842 hardware crypto compression
interface that can handle any size and alignment buffers, add a
driver for 842 hardware on PowerNV platforms, and create a common
interface for both 842 hardware platform drivers.

The existing pSeries platform NX-842 driver could not be re-used for the
PowerNV platform driver, as there are fundamentally different interfaces;
on pSeries the system hypervisor (pHyp) provides the interface and manages
communication with the coprocessor, while on PowerNV the kernel talks directly
to the coprocessor using the ICSWX instruction.  The data structures used to
describe each compression or decompression request to the coprocessor are
also different between pHyp's interface and direct communication with ICSWX.
So, different drivers for pSeries and PowerNV are required.  Adding the new
PowerNV driver but keeping the interface to the drivers the same required
adding a new common frontend interface, to which only one of the platform
drivers will connect (based on what platform the kernel is currently running
on), and moving some functionality out of the existing pSeries driver into a
more common location.

The existing crypto/842.c interface is in the wrong place, since crypto/
should only contain software implementations; so lib/842/ is added
containing a reference (i.e. rather slow) implementation in software
of both 842 compression and 842 decompression.  The crypto/842.c interface
is changed to use only that software implementation.

The hardware 842 crypto compression interface is moved to
drivers/crypto/nx/nx-842-crypto.c.  It is also modified to be able to
handle any alignment/length input or output buffer; currently it is only
able to handle page-size and page-aligned (uncompressed) buffers, due to
restrictions in the pSeries 842 hardware driver.

Note that several of these patches have changed significantly since the
last patch series; I didn't list specific differences since there are
so many.

Dan Streetman (10):
  powerpc: export of_get_ibm_chip_id function
  powerpc: Add ICSWX instruction
  lib: add software 842 compression/decompression
  crypto: change 842 alg to use software
  drivers/crypto/nx: rename nx-842.c to nx-842-pseries.c
  drivers/crypto/nx: add NX-842 platform frontend driver
  drivers/crypto/nx: add nx842 constraints
  drivers/crypto/nx: add PowerNV platform NX-842 driver
  drivers/crypto/nx: simplify pSeries nx842 driver
  drivers/crypto/nx: add hardware 842 crypto comp alg

 MAINTAINERS   |5 +-
 arch/powerpc/include/asm/icswx.h  |  184 
 arch/powerpc/include/asm/ppc-opcode.h |   13 +
 arch/powerpc/kernel/prom.c|1 +
 crypto/842.c  |  175 +---
 crypto/Kconfig|7 +-
 drivers/crypto/Kconfig|   10 +-
 drivers/crypto/nx/Kconfig |   55 +-
 drivers/crypto/nx/Makefile|6 +
 drivers/crypto/nx/nx-842-crypto.c |  603 
 drivers/crypto/nx/nx-842-powernv.c|  625 +
 drivers/crypto/nx/nx-842-pseries.c| 1128 +++
 drivers/crypto/nx/nx-842.c| 1623 +++--
 drivers/crypto/nx/nx-842.h|  131 +++
 include/linux/nx842.h |   21 +-
 include/linux/sw842.h |   12 +
 lib/842/842.h |  127 +++
 lib/842/842_compress.c|  626 +
 lib/842/842_debugfs.h |   52 ++
 lib/842/842_decompress.c  |  405 
 lib/842/Makefile  |2 +
 lib/Kconfig   |6 +
 lib/Makefile  |2 +
 23 files changed, 4138 insertions(+), 1681 deletions(-)
 create mode 100644 arch/powerpc/include/asm/icswx.h
 create mode 100644 drivers/crypto/nx/nx-842-crypto.c
 create mode 100644 drivers/crypto/nx/nx-842-powernv.c
 create mode 100644 drivers/crypto/nx/nx-842-pseries.c
 create mode 100644 drivers/crypto/nx/nx-842.h
 create mode 100644 include/linux/sw842.h
 create mode 100644 lib/842/842.h
 create mode 100644 lib/842/842_compress.c
 create mode 100644 lib/842/842_debugfs.h
 cr

[PATCH 01/10] powerpc: export of_get_ibm_chip_id function

2015-05-06 Thread Dan Streetman
Export the of_get_ibm_chip_id() function.  This will be used by the
PowerNV NX-842 driver.

Signed-off-by: Dan Streetman 
---
 arch/powerpc/kernel/prom.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 308c5e1..ea2cea7 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -800,6 +800,7 @@ int of_get_ibm_chip_id(struct device_node *np)
}
return -1;
 }
+EXPORT_SYMBOL(of_get_ibm_chip_id);
 
 /**
  * cpu_to_chip_id - Return the cpus chip-id
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 02/10] powerpc: Add ICSWX instruction

2015-05-06 Thread Dan Streetman
Add the asm ICSWX and ICSWEPX opcodes.  Add definitions for the
Coprocessor Request structures needed to use the icswx calls to
coprocessors.  Add icswx() function to perform the ICSWX asm
using the provided Coprocessor Command Word value and
Coprocessor Request Block structure.

This is required for communication with the NX-842 coprocessor on
a PowerNV system.

Signed-off-by: Dan Streetman 
---
 arch/powerpc/include/asm/icswx.h  | 184 ++
 arch/powerpc/include/asm/ppc-opcode.h |  13 +++
 2 files changed, 197 insertions(+)
 create mode 100644 arch/powerpc/include/asm/icswx.h

diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h
new file mode 100644
index 000..9f8402b
--- /dev/null
+++ b/arch/powerpc/include/asm/icswx.h
@@ -0,0 +1,184 @@
+/*
+ * ICSWX api
+ *
+ * Copyright (C) 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This provides the Initiate Coprocessor Store Word Indexed (ICSWX)
+ * instruction.  This instruction is used to communicate with PowerPC
+ * coprocessors.  This also provides definitions of the structures used
+ * to communicate with the coprocessor.
+ *
+ * The RFC02130: Coprocessor Architecture document is the reference for
+ * everything in this file unless otherwise noted.
+ */
+#ifndef _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_
+#define _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_
+
+#include  /* for PPC_ICSWX */
+
+/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */
+
+#define CCB_VALUE  (0x3fff)
+#define CCB_ADDRESS(0xfff8)
+#define CCB_CM (0x0007)
+#define CCB_CM0(0x0004)
+#define CCB_CM12   (0x0003)
+
+#define CCB_CM0_ALL_COMPLETIONS(0x0)
+#define CCB_CM0_LAST_IN_CHAIN  (0x4)
+#define CCB_CM12_STORE (0x0)
+#define CCB_CM12_INTERRUPT (0x1)
+
+#define CCB_SIZE   (0x10)
+#define CCB_ALIGN  CCB_SIZE
+
+struct coprocessor_completion_block {
+   __be64 value;
+   __be64 address;
+} __packed __aligned(CCB_ALIGN);
+
+
+/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */
+
+#define CSB_V  (0x80)
+#define CSB_F  (0x04)
+#define CSB_CH (0x03)
+#define CSB_CE_INCOMPLETE  (0x80)
+#define CSB_CE_TERMINATION (0x40)
+#define CSB_CE_TPBC(0x20)
+
+#define CSB_CC_SUCCESS (0)
+#define CSB_CC_INVALID_ALIGN   (1)
+#define CSB_CC_OPERAND_OVERLAP (2)
+#define CSB_CC_DATA_LENGTH (3)
+#define CSB_CC_TRANSLATION (5)
+#define CSB_CC_PROTECTION  (6)
+#define CSB_CC_RD_EXTERNAL (7)
+#define CSB_CC_INVALID_OPERAND (8)
+#define CSB_CC_PRIVILEGE   (9)
+#define CSB_CC_INTERNAL(10)
+#define CSB_CC_WR_EXTERNAL (12)
+#define CSB_CC_NOSPC   (13)
+#define CSB_CC_EXCESSIVE_DDE   (14)
+#define CSB_CC_WR_TRANSLATION  (15)
+#define CSB_CC_WR_PROTECTION   (16)
+#define CSB_CC_UNKNOWN_CODE(17)
+#define CSB_CC_ABORT   (18)
+#define CSB_CC_TRANSPORT   (20)
+#define CSB_CC_SEGMENTED_DDL   (31)
+#define CSB_CC_PROGRESS_POINT  (32)
+#define CSB_CC_DDE_OVERFLOW(33)
+#define CSB_CC_SESSION (34)
+#define CSB_CC_PROVISION   (36)
+#define CSB_CC_CHAIN   (37)
+#define CSB_CC_SEQUENCE(38)
+#define CSB_CC_HW  (39)
+
+#define CSB_SIZE   (0x10)
+#define CSB_ALIGN  CSB_SIZE
+
+struct coprocessor_status_block {
+   u8 flags;
+   u8 cs;
+   u8 cc;
+   u8 ce;
+   __be32 count;
+   __be64 address;
+} __packed __aligned(CSB_ALIGN);
+
+
+/* Chapter 6.5.10 Data-Descriptor List (DDL)
+ * each list contains one or more Data-Descriptor Entries (DDE)
+ */
+
+#define DDE_P  (0x8000)
+
+#define DDE_SIZE   (0x10)
+#define DDE_ALIGN  DDE_SIZE
+
+struct data_descriptor_entry {
+   __be16 flags;
+   u8 count;
+   u8 index;
+   __be32 length;
+   __be64 address;
+} __packed __aligned(DDE_ALIGN);
+
+
+/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
+
+#define CRB_SIZE   (0x80)
+#define CRB_ALIGN  (0x100) /* Errata: requires 256 alignment */
+
+/* Coprocessor Status Block field
+ *   ADDRESS   address of CSB
+ *   C CCB is valid
+ *   AT0 = addrs are virtual, 1 = addrs are phys
+ *   M enable perf monitor
+ */
+#define CRB_CSB_ADDRESS(0xfff0)
+#define CRB_CSB_C  (0x0008)
+#define CRB_CSB_AT (0x0002)
+#define CRB_CSB_M  (0x0001)
+
+struct coprocessor_request_block {
+   __be32 ccw;
+   __be32 flags;
+   __be64 csb_addr;
+
+   

[PATCH 03/10] lib: add software 842 compression/decompression

2015-05-06 Thread Dan Streetman
Add 842-format software compression and decompression functions.
Update the MAINTAINERS 842 section to include the new files.

The 842 compression function can compress any input data into the 842
compression format.  The 842 decompression function can decompress any
standard-format 842 compressed data - specifically, either a compressed
data buffer created by the 842 software compression function, or a
compressed data buffer created by the 842 hardware compressor (located
in PowerPC coprocessors).

The 842 compressed data format is explained in the header comments.

This is used in a later patch to provide a full software 842 compression
and decompression crypto interface.

Signed-off-by: Dan Streetman 
---
 MAINTAINERS  |   2 +
 include/linux/sw842.h|  12 +
 lib/842/842.h| 127 ++
 lib/842/842_compress.c   | 626 +++
 lib/842/842_debugfs.h|  52 
 lib/842/842_decompress.c | 405 ++
 lib/842/Makefile |   2 +
 lib/Kconfig  |   6 +
 lib/Makefile |   2 +
 9 files changed, 1234 insertions(+)
 create mode 100644 include/linux/sw842.h
 create mode 100644 lib/842/842.h
 create mode 100644 lib/842/842_compress.c
 create mode 100644 lib/842/842_debugfs.h
 create mode 100644 lib/842/842_decompress.c
 create mode 100644 lib/842/Makefile

diff --git a/MAINTAINERS b/MAINTAINERS
index 781e099..116af01 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4872,6 +4872,8 @@ M:Dan Streetman 
 S: Supported
 F: drivers/crypto/nx/nx-842.c
 F: include/linux/nx842.h
+F: include/linux/sw842.h
+F: lib/842/
 
 IBM Power Linux RAID adapter
 M: Brian King 
diff --git a/include/linux/sw842.h b/include/linux/sw842.h
new file mode 100644
index 000..109ba04
--- /dev/null
+++ b/include/linux/sw842.h
@@ -0,0 +1,12 @@
+#ifndef __SW842_H__
+#define __SW842_H__
+
+#define SW842_MEM_COMPRESS (0xf000)
+
+int sw842_compress(const u8 *src, unsigned int srclen,
+  u8 *dst, unsigned int *destlen, void *wmem);
+
+int sw842_decompress(const u8 *src, unsigned int srclen,
+u8 *dst, unsigned int *destlen);
+
+#endif
diff --git a/lib/842/842.h b/lib/842/842.h
new file mode 100644
index 000..7c20003
--- /dev/null
+++ b/lib/842/842.h
@@ -0,0 +1,127 @@
+
+#ifndef __842_H__
+#define __842_H__
+
+/* The 842 compressed format is made up of multiple blocks, each of
+ * which have the format:
+ *
+ * [arg1][arg2][arg3][arg4]
+ *
+ * where there are between 0 and 4 template args, depending on the specific
+ * template operation.  For normal operations, each arg is either a specific
+ * number of data bytes to add to the output buffer, or an index pointing
+ * to a previously-written number of data bytes to copy to the output buffer.
+ *
+ * The template code is a 5-bit value.  This code indicates what to do with
+ * the following data.  Template codes from 0 to 0x19 should use the template
+ * table, the static "decomp_ops" table used in decompress.  For each template
+ * (table row), there are between 1 and 4 actions; each action corresponds to
+ * an arg following the template code bits.  Each action is either a "data"
+ * type action, or a "index" type action, and each action results in 2, 4, or 8
+ * bytes being written to the output buffer.  Each template (i.e. all actions
+ * in the table row) will add up to 8 bytes being written to the output buffer.
+ * Any row with less than 4 actions is padded with noop actions, indicated by
+ * N0 (for which there is no corresponding arg in the compressed data buffer).
+ *
+ * "Data" actions, indicated in the table by D2, D4, and D8, mean that the
+ * corresponding arg is 2, 4, or 8 bytes, respectively, in the compressed data
+ * buffer should be copied directly to the output buffer.
+ *
+ * "Index" actions, indicated in the table by I2, I4, and I8, mean the
+ * corresponding arg is an index parameter that points to, respectively, a 2,
+ * 4, or 8 byte value already in the output buffer, that should be copied to
+ * the end of the output buffer.  Essentially, the index points to a position
+ * in a ring buffer that contains the last N bytes of output buffer data.
+ * The number of bits for each index's arg are: 8 bits for I2, 9 bits for I4,
+ * and 8 bits for I8.  Since each index points to a 2, 4, or 8 byte section,
+ * this means that I2 can reference 512 bytes ((2^8 bits = 256) * 2 bytes), I4
+ * can reference 2048 bytes ((2^9 = 512) * 4 bytes), and I8 can reference 2048
+ * bytes ((2^8 = 256) * 8 bytes).  Think of it as a kind-of ring buffer for
+ * each of I2, I4, and I8 that are updated for each byte written to the output
+ * buffer.  In this implementation, the output buffer is directly used for each
+ * index; there is no additional memory required.  Note that the index is into
+ * a ring buffer, not a sliding window; for example, if there have been 260
+ * bytes written to the output buffer, an 

[PATCH 04/10] crypto: change 842 alg to use software

2015-05-06 Thread Dan Streetman
Change the crypto 842 compression alg to use the software 842 compression
and decompression library.  Change the name of this crypto alg to "sw842".
Remove the fallback to LZO compression.

Previously, this crypto compression alg attemped 842 compression using
PowerPC hardware, and fell back to LZO compression and decompression if
the 842 PowerPC hardware was unavailable or failed.  This should not
fall back to any other compression method, however; users of this crypto
compression alg can fallback if desired, and transparent fallback tricks
callers into thinking they are getting 842 compression when they actually
get LZO compression - the failure of the 842 hardware should not be
transparent to the caller.

The crypto compression alg for a hardware device also should not be located
in crypto/ so this is now a software-only implementation that uses the 842
software compression/decompression library.  Since users of the "842" alg
expected hardware compression, the name of this software-only alg is
changed to "sw842"; the new hardware 842 crypto compression alg will be
aliased to "842" in a later patch.

Signed-off-by: Dan Streetman 
---
 MAINTAINERS|   1 +
 crypto/842.c   | 175 -
 crypto/Kconfig |   7 +--
 3 files changed, 41 insertions(+), 142 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 116af01..5a5c1dc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4873,6 +4873,7 @@ S:Supported
 F: drivers/crypto/nx/nx-842.c
 F: include/linux/nx842.h
 F: include/linux/sw842.h
+F: crypto/842.c
 F: lib/842/
 
 IBM Power Linux RAID adapter
diff --git a/crypto/842.c b/crypto/842.c
index b48f4f1..c43b157 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -1,5 +1,5 @@
 /*
- * Cryptographic API for the 842 compression algorithm.
+ * Cryptographic API for the 842 software compression algorithm.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -11,173 +11,72 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ * Copyright (C) IBM Corporation, 2011-2015
  *
- * Copyright (C) IBM Corporation, 2011
+ * Original Authors: Robert Jennings 
+ *   Seth Jennings 
  *
- * Authors: Robert Jennings 
- *  Seth Jennings 
+ * Rewrite: Dan Streetman 
+ *
+ * This is the software implementation of compression and decompression using
+ * the 842 format.  This uses the software 842 library at lib/842/ which is
+ * only a reference implementation, and is very, very slow as compared to other
+ * software compressors.  You probably do not want to use this software
+ * compression.  If you have access to the PowerPC 842 compression hardware, 
you
+ * want to use the 842 hardware compression interface, which is at:
+ * drivers/crypto/nx/nx-842-crypto.c
  */
 
 #include 
 #include 
 #include 
-#include 
-#include 
-#include 
-#include 
-
-static int nx842_uselzo;
-
-struct nx842_ctx {
-   void *nx842_wmem; /* working memory for 842/lzo */
-};
+#include 
 
-enum nx842_crypto_type {
-   NX842_CRYPTO_TYPE_842,
-   NX842_CRYPTO_TYPE_LZO
-};
-
-#define NX842_SENTINEL 0xdeadbeef
+#define CRYPTO842_NAME "sw842"
 
-struct nx842_crypto_header {
-   unsigned int sentinel; /* debug */
-   enum nx842_crypto_type type;
+struct crypto842_ctx {
+   char wmem[SW842_MEM_COMPRESS];  /* working memory for compress */
 };
 
-static int nx842_init(struct crypto_tfm *tfm)
-{
-   struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
-   int wmemsize;
-
-   wmemsize = max_t(int, nx842_get_workmem_size(), LZO1X_MEM_COMPRESS);
-   ctx->nx842_wmem = kmalloc(wmemsize, GFP_NOFS);
-   if (!ctx->nx842_wmem)
-   return -ENOMEM;
-
-   return 0;
-}
-
-static void nx842_exit(struct crypto_tfm *tfm)
+static int crypto842_compress(struct crypto_tfm *tfm,
+ const u8 *src, unsigned int slen,
+ u8 *dst, unsigned int *dlen)
 {
-   struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
+   struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
 
-   kfree(ctx->nx842_wmem);
+   return sw842_compress(src, slen, dst, dlen, ctx->wmem);
 }
 
-static void nx842_reset_uselzo(unsigned long data)
+static int crypto842_decompress(struct crypto_tfm *tfm,
+   const u8 *src, unsigned int slen,
+   u8 *dst, unsigned int *dlen)
 {
-   nx842_uselzo = 0;
-}
-
-static DEFINE_TIMER(failover_timer, nx842_reset_uselzo, 0, 0);
-
-static int nx842_crypto_compress(struct crypto_tfm *tfm, const u8 *src,
-   unsigned int slen, u8 *ds

[PATCH 05/10] drivers/crypto/nx: rename nx-842.c to nx-842-pseries.c

2015-05-06 Thread Dan Streetman
Move the entire NX-842 driver for the pSeries platform from the file
nx-842.c to nx-842-pseries.c.  This is required by later patches that
add NX-842 support for the PowerNV platform.

This patch does not alter the content of the pSeries NX-842 driver at
all, it only changes the filename.

Signed-off-by: Dan Streetman 
---
 drivers/crypto/nx/Makefile |2 +-
 drivers/crypto/nx/nx-842-pseries.c | 1603 
 drivers/crypto/nx/nx-842.c | 1603 
 3 files changed, 1604 insertions(+), 1604 deletions(-)
 create mode 100644 drivers/crypto/nx/nx-842-pseries.c
 delete mode 100644 drivers/crypto/nx/nx-842.c

diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile
index bb770ea..8669ffa 100644
--- a/drivers/crypto/nx/Makefile
+++ b/drivers/crypto/nx/Makefile
@@ -11,4 +11,4 @@ nx-crypto-objs := nx.o \
  nx-sha512.o
 
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o
-nx-compress-objs := nx-842.o
+nx-compress-objs := nx-842-pseries.o
diff --git a/drivers/crypto/nx/nx-842-pseries.c 
b/drivers/crypto/nx/nx-842-pseries.c
new file mode 100644
index 000..887196e
--- /dev/null
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -0,0 +1,1603 @@
+/*
+ * Driver for IBM Power 842 compression accelerator
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Authors: Robert Jennings 
+ *  Seth Jennings 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include "nx_csbcpb.h" /* struct nx_csbcpb */
+
+#define MODULE_NAME "nx-compress"
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Robert Jennings ");
+MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
+
+#define SHIFT_4K 12
+#define SHIFT_64K 16
+#define SIZE_4K (1UL << SHIFT_4K)
+#define SIZE_64K (1UL << SHIFT_64K)
+
+/* IO buffer must be 128 byte aligned */
+#define IO_BUFFER_ALIGN 128
+
+struct nx842_header {
+   int blocks_nr; /* number of compressed blocks */
+   int offset; /* offset of the first block (from beginning of header) */
+   int sizes[0]; /* size of compressed blocks */
+};
+
+static inline int nx842_header_size(const struct nx842_header *hdr)
+{
+   return sizeof(struct nx842_header) +
+   hdr->blocks_nr * sizeof(hdr->sizes[0]);
+}
+
+/* Macros for fields within nx_csbcpb */
+/* Check the valid bit within the csbcpb valid field */
+#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
+
+/* CE macros operate on the completion_extension field bits in the csbcpb.
+ * CE0 0=full completion, 1=partial completion
+ * CE1 0=CE0 indicates completion, 1=termination (output may be modified)
+ * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */
+#define NX842_CSBCPB_CE0(x)(x & BIT_MASK(7))
+#define NX842_CSBCPB_CE1(x)(x & BIT_MASK(6))
+#define NX842_CSBCPB_CE2(x)(x & BIT_MASK(5))
+
+/* The NX unit accepts data only on 4K page boundaries */
+#define NX842_HW_PAGE_SHIFTSHIFT_4K
+#define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT)
+#define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1))
+
+enum nx842_status {
+   UNAVAILABLE,
+   AVAILABLE
+};
+
+struct ibm_nx842_counters {
+   atomic64_t comp_complete;
+   atomic64_t comp_failed;
+   atomic64_t decomp_complete;
+   atomic64_t decomp_failed;
+   atomic64_t swdecomp;
+   atomic64_t comp_times[32];
+   atomic64_t decomp_times[32];
+};
+
+static struct nx842_devdata {
+   struct vio_dev *vdev;
+   struct device *dev;
+   struct ibm_nx842_counters *counters;
+   unsigned int max_sg_len;
+   unsigned int max_sync_size;
+   unsigned int max_sync_sg;
+   enum nx842_status status;
+} __rcu *devdata;
+static DEFINE_SPINLOCK(devdata_mutex);
+
+#define NX842_COUNTER_INC(_x) \
+static inline void nx842_inc_##_x( \
+   const struct nx842_devdata *dev) { \
+   if (dev) \
+   atomic64_inc(&dev->counters->_x); \
+}
+NX842_COUNTER_INC(comp_complete);
+NX842_COUNTER_INC(comp_failed);
+NX842_COUNTER_INC(decomp_complete);
+NX842_COUNTER_INC(decomp_failed);
+NX842_COUNTER_INC(swdecomp);
+
+#define NX842_HIST_SLOTS 16
+
+static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int t

[PATCH 07/10] drivers/crypto/nx: add nx842 constraints

2015-05-06 Thread Dan Streetman
Add "constraints" for the NX-842 driver.  The constraints are used to
indicate what the current NX-842 platform driver is capable of.  The
constraints tell the NX-842 user what alignment, min and max length, and
length multiple each provided buffers should conform to.  These are
required because the 842 hardware requires buffers to meet specific
constraints that vary based on platform - for example, the pSeries
max length is much lower than the PowerNV max length.

Signed-off-by: Dan Streetman 
---
 drivers/crypto/nx/nx-842-pseries.c | 10 ++
 drivers/crypto/nx/nx-842.c | 38 ++
 drivers/crypto/nx/nx-842.h |  2 ++
 include/linux/nx842.h  |  9 +
 4 files changed, 59 insertions(+)

diff --git a/drivers/crypto/nx/nx-842-pseries.c 
b/drivers/crypto/nx/nx-842-pseries.c
index 9b83c9e..cb481d8 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -40,6 +40,13 @@ MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power 
processors");
 /* IO buffer must be 128 byte aligned */
 #define IO_BUFFER_ALIGN 128
 
+static struct nx842_constraints nx842_pseries_constraints = {
+   .alignment =IO_BUFFER_ALIGN,
+   .multiple = DDE_BUFFER_LAST_MULT,
+   .minimum =  IO_BUFFER_ALIGN,
+   .maximum =  PAGE_SIZE, /* dynamic, max_sync_size */
+};
+
 struct nx842_header {
int blocks_nr; /* number of compressed blocks */
int offset; /* offset of the first block (from beginning of header) */
@@ -842,6 +849,8 @@ static int nx842_OF_upd_maxsyncop(struct nx842_devdata 
*devdata,
goto out;
}
 
+   nx842_pseries_constraints.maximum = devdata->max_sync_size;
+
devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit,
maxsynccop->decomp_sg_limit);
if (devdata->max_sync_sg < 1) {
@@ -1115,6 +1124,7 @@ static struct attribute_group nx842_attribute_group = {
 
 static struct nx842_driver nx842_pseries_driver = {
.owner =THIS_MODULE,
+   .constraints =  &nx842_pseries_constraints,
.compress = nx842_pseries_compress,
.decompress =   nx842_pseries_decompress,
 };
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index f1f378e..160fe2d 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -86,6 +86,44 @@ static void put_driver(struct nx842_driver *driver)
module_put(driver->owner);
 }
 
+/**
+ * nx842_constraints
+ *
+ * This provides the driver's constraints.  Different nx842 implementations
+ * may have varying requirements.  The constraints are:
+ *   @alignment:   All buffers should be aligned to this
+ *   @multiple:All buffer lengths should be a multiple of this
+ *   @minimum: Buffer lengths must not be less than this amount
+ *   @maximum: Buffer lengths must not be more than this amount
+ *
+ * The constraints apply to all buffers and lengths, both input and output,
+ * for both compression and decompression, except for the minimum which
+ * only applies to compression input and decompression output; the
+ * compressed data can be less than the minimum constraint.  It can be
+ * assumed that compressed data will always adhere to the multiple
+ * constraint.
+ *
+ * The driver may succeed even if these constraints are violated;
+ * however the driver can return failure or suffer reduced performance
+ * if any constraint is not met.
+ */
+int nx842_constraints(struct nx842_constraints *c)
+{
+   struct nx842_driver *driver = get_driver();
+   int ret = 0;
+
+   if (!driver)
+   return -ENODEV;
+
+   BUG_ON(!c);
+   memcpy(c, driver->constraints, sizeof(*c));
+
+   put_driver(driver);
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(nx842_constraints);
+
 int nx842_compress(const unsigned char *in, unsigned int in_len,
   unsigned char *out, unsigned int *out_len,
   void *wrkmem)
diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h
index 2a5d4e1..c6ceb0f 100644
--- a/drivers/crypto/nx/nx-842.h
+++ b/drivers/crypto/nx/nx-842.h
@@ -12,6 +12,8 @@
 struct nx842_driver {
struct module *owner;
 
+   struct nx842_constraints *constraints;
+
int (*compress)(const unsigned char *in, unsigned int in_len,
unsigned char *out, unsigned int *out_len,
void *wrkmem);
diff --git a/include/linux/nx842.h b/include/linux/nx842.h
index d919c22..aa1a97e9 100644
--- a/include/linux/nx842.h
+++ b/include/linux/nx842.h
@@ -5,6 +5,15 @@
 
 #define NX842_MEM_COMPRESS __NX842_PSERIES_MEM_COMPRESS
 
+struct nx842_constraints {
+   int alignment;
+   int multiple;
+   int minimum;
+   int maximum;
+};
+
+int nx842_constraints(struct nx842_constraints *constraints);
+
 int nx842_compress(const unsigned char *in, unsigned 

[PATCH 08/10] drivers/crypto/nx: add PowerNV platform NX-842 driver

2015-05-06 Thread Dan Streetman
Add driver for NX-842 hardware on the PowerNV platform.

This allows the use of the 842 compression hardware coprocessor on
the PowerNV platform.

Signed-off-by: Dan Streetman 
---
 drivers/crypto/nx/Kconfig  |  10 +
 drivers/crypto/nx/Makefile |   2 +
 drivers/crypto/nx/nx-842-powernv.c | 625 +
 drivers/crypto/nx/nx-842-pseries.c |   9 -
 drivers/crypto/nx/nx-842.c |   4 +-
 drivers/crypto/nx/nx-842.h |  97 ++
 include/linux/nx842.h  |   6 +-
 7 files changed, 741 insertions(+), 12 deletions(-)
 create mode 100644 drivers/crypto/nx/nx-842-powernv.c

diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig
index 34013f7..ee9e259 100644
--- a/drivers/crypto/nx/Kconfig
+++ b/drivers/crypto/nx/Kconfig
@@ -40,4 +40,14 @@ config CRYPTO_DEV_NX_COMPRESS_PSERIES
  algorithm.  This supports NX hardware on the pSeries platform.
  If you choose 'M' here, this module will be called 
nx_compress_pseries.
 
+config CRYPTO_DEV_NX_COMPRESS_POWERNV
+   tristate "Compression acceleration support on PowerNV platform"
+   depends on PPC_POWERNV
+   default y
+   help
+ Support for PowerPC Nest (NX) compression acceleration. This
+ module supports acceleration for compressing memory with the 842
+ algorithm.  This supports NX hardware on the PowerNV platform.
+ If you choose 'M' here, this module will be called 
nx_compress_powernv.
+
 endif
diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile
index 5d9f4bc..6619787 100644
--- a/drivers/crypto/nx/Makefile
+++ b/drivers/crypto/nx/Makefile
@@ -12,5 +12,7 @@ nx-crypto-objs := nx.o \
 
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o
 nx-compress-objs := nx-842.o
 nx-compress-pseries-objs := nx-842-pseries.o
+nx-compress-powernv-objs := nx-842-powernv.o
diff --git a/drivers/crypto/nx/nx-842-powernv.c 
b/drivers/crypto/nx/nx-842-powernv.c
new file mode 100644
index 000..6a9fb8b
--- /dev/null
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -0,0 +1,625 @@
+/*
+ * Driver for IBM PowerNV 842 compression accelerator
+ *
+ * Copyright (C) 2015 Dan Streetman, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "nx-842.h"
+
+#include 
+
+#include 
+#include 
+
+#define MODULE_NAME NX842_POWERNV_MODULE_NAME
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman ");
+MODULE_DESCRIPTION("842 H/W Compression driver for IBM PowerNV processors");
+
+#define WORKMEM_ALIGN  (CRB_ALIGN)
+#define CSB_WAIT_MAX   (5000) /* ms */
+
+struct nx842_workmem {
+   /* Below fields must be properly aligned */
+   struct coprocessor_request_block crb; /* CRB_ALIGN align */
+   struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
+   struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
+   /* Above fields must be properly aligned */
+
+   ktime_t start;
+
+   char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
+} __packed __aligned(WORKMEM_ALIGN);
+
+struct nx842_coproc {
+   unsigned int chip_id;
+   unsigned int ct;
+   unsigned int ci;
+   struct list_head list;
+};
+
+/* no cpu hotplug on powernv, so this list never changes after init */
+static LIST_HEAD(nx842_coprocs);
+static unsigned int nx842_ct;
+
+/**
+ * setup_indirect_dde - Setup an indirect DDE
+ *
+ * The DDE is setup with the the DDE count, byte count, and address of
+ * first direct DDE in the list.
+ */
+static void setup_indirect_dde(struct data_descriptor_entry *dde,
+  struct data_descriptor_entry *ddl,
+  unsigned int dde_count, unsigned int byte_count)
+{
+   dde->flags = 0;
+   dde->count = dde_count;
+   dde->index = 0;
+   dde->length = cpu_to_be32(byte_count);
+   dde->address = cpu_to_be64(nx842_get_pa(ddl));
+}
+
+/**
+ * setup_direct_dde - Setup single DDE from buffer
+ *
+ * The DDE is setup with the buffer and length.  The buffer must be properly
+ * aligned.  The used length is returned.
+ * Returns:
+ *   NSuccessfully set up DDE with N bytes
+ */
+static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
+unsigned long pa, unsigned int len)
+{
+   unsigned int l = 

[PATCH 06/10] drivers/crypto/nx: add NX-842 platform frontend driver

2015-05-06 Thread Dan Streetman
Add NX-842 frontend that allows using either the pSeries platform or
PowerNV platform driver (to be added by later patch) for the NX-842
hardware.  Update the MAINTAINERS file to include the new filenames.
Update Kconfig files to clarify titles and descriptions, and correct
dependencies.

Signed-off-by: Dan Streetman 
---
 MAINTAINERS|   2 +-
 drivers/crypto/Kconfig |  10 +--
 drivers/crypto/nx/Kconfig  |  35 ++---
 drivers/crypto/nx/Makefile |   4 +-
 drivers/crypto/nx/nx-842-pseries.c |  57 +++
 drivers/crypto/nx/nx-842.c | 144 +
 drivers/crypto/nx/nx-842.h |  32 +
 include/linux/nx842.h  |  10 +--
 8 files changed, 245 insertions(+), 49 deletions(-)
 create mode 100644 drivers/crypto/nx/nx-842.c
 create mode 100644 drivers/crypto/nx/nx-842.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 5a5c1dc..e71855f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4870,7 +4870,7 @@ F:drivers/crypto/nx/
 IBM Power 842 compression accelerator
 M: Dan Streetman 
 S: Supported
-F: drivers/crypto/nx/nx-842.c
+F: drivers/crypto/nx/nx-842*
 F: include/linux/nx842.h
 F: include/linux/sw842.h
 F: crypto/842.c
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 033c0c8..872de26 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -312,11 +312,13 @@ config CRYPTO_DEV_S5P
  algorithms execution.
 
 config CRYPTO_DEV_NX
-   bool "Support for IBM Power7+ in-Nest cryptographic acceleration"
-   depends on PPC64 && IBMVIO && !CPU_LITTLE_ENDIAN
-   default n
+   bool "Support for IBM PowerPC Nest (NX) cryptographic acceleration"
+   depends on PPC64
help
- Support for Power7+ in-Nest cryptographic acceleration.
+ This enables support for the NX hardware cryptographic accelerator
+ coprocessor that is in IBM PowerPC P7+ or later processors.  This
+ does not actually enable any drivers, it only allows you to select
+ which acceleration type (encryption and/or compression) to enable.
 
 if CRYPTO_DEV_NX
source "drivers/crypto/nx/Kconfig"
diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig
index f826166..34013f7 100644
--- a/drivers/crypto/nx/Kconfig
+++ b/drivers/crypto/nx/Kconfig
@@ -1,7 +1,9 @@
+
 config CRYPTO_DEV_NX_ENCRYPT
-   tristate "Encryption acceleration support"
-   depends on PPC64 && IBMVIO
+   tristate "Encryption acceleration support on pSeries platform"
+   depends on PPC_PSERIES && IBMVIO && !CPU_LITTLE_ENDIAN
default y
+   select CRYPTO_ALGAPI
select CRYPTO_AES
select CRYPTO_CBC
select CRYPTO_ECB
@@ -12,15 +14,30 @@ config CRYPTO_DEV_NX_ENCRYPT
select CRYPTO_SHA256
select CRYPTO_SHA512
help
- Support for Power7+ in-Nest encryption acceleration. This
- module supports acceleration for AES and SHA2 algorithms. If you
- choose 'M' here, this module will be called nx_crypto.
+ Support for PowerPC Nest (NX) encryption acceleration. This
+ module supports acceleration for AES and SHA2 algorithms on
+ the pSeries platform.  If you choose 'M' here, this module
+ will be called nx_crypto.
 
 config CRYPTO_DEV_NX_COMPRESS
tristate "Compression acceleration support"
-   depends on PPC64 && IBMVIO
default y
help
- Support for Power7+ in-Nest compression acceleration. This
- module supports acceleration for AES and SHA2 algorithms. If you
- choose 'M' here, this module will be called nx_compress.
+ Support for PowerPC Nest (NX) compression acceleration. This
+ module supports acceleration for compressing memory with the 842
+ algorithm.  One of the platform drivers must be selected also.
+ If you choose 'M' here, this module will be called nx_compress.
+
+if CRYPTO_DEV_NX_COMPRESS
+
+config CRYPTO_DEV_NX_COMPRESS_PSERIES
+   tristate "Compression acceleration support on pSeries platform"
+   depends on PPC_PSERIES && IBMVIO && !CPU_LITTLE_ENDIAN
+   default y
+   help
+ Support for PowerPC Nest (NX) compression acceleration. This
+ module supports acceleration for compressing memory with the 842
+ algorithm.  This supports NX hardware on the pSeries platform.
+ If you choose 'M' here, this module will be called 
nx_compress_pseries.
+
+endif
diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile
index 8669ffa..5d9f4bc 100644
--- a/drivers/crypto/nx/Makefile
+++ b/drivers/crypto/nx/Makefile
@@ -11,4 +11,6 @@ nx-crypto-objs := nx.o \
  nx-sha512.o
 
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o
-nx-compress-objs := nx-842-pseries.o
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o
+nx-compress-objs := nx-842.o
+nx-co

[PATCH 09/10] drivers/crypto/nx: simplify pSeries nx842 driver

2015-05-06 Thread Dan Streetman
Simplify the pSeries NX-842 driver: do not expect incoming buffers to be
exactly page-sized; do not break up input buffers to compress smaller
blocks; do not use any internal headers in the compressed data blocks;
remove the software decompression implementation; implement the pSeries
nx842_constraints.

This changes the pSeries NX-842 driver to perform constraints-based
compression so that it only needs to compress one entire input block at a
time.  This removes the need for it to split input data blocks into
multiple compressed data sections in the output buffer, and removes the
need for any extra header info in the compressed data; all that is moved
(in a later patch) into the main crypto 842 driver.  Additionally, the
842 software decompression implementation is no longer needed here, as
the crypto 842 driver will use the generic software 842 decompression
function as a fallback if any hardware 842 driver fails.

Signed-off-by: Dan Streetman 
---
 drivers/crypto/nx/nx-842-pseries.c | 779 -
 1 file changed, 153 insertions(+), 626 deletions(-)

diff --git a/drivers/crypto/nx/nx-842-pseries.c 
b/drivers/crypto/nx/nx-842-pseries.c
index 6db9992..85837e9 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -21,7 +21,6 @@
  *  Seth Jennings 
  */
 
-#include 
 #include 
 
 #include "nx-842.h"
@@ -32,11 +31,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Robert Jennings ");
 MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
 
-#define SHIFT_4K 12
-#define SHIFT_64K 16
-#define SIZE_4K (1UL << SHIFT_4K)
-#define SIZE_64K (1UL << SHIFT_64K)
-
 /* IO buffer must be 128 byte aligned */
 #define IO_BUFFER_ALIGN 128
 
@@ -47,18 +41,52 @@ static struct nx842_constraints nx842_pseries_constraints = 
{
.maximum =  PAGE_SIZE, /* dynamic, max_sync_size */
 };
 
-struct nx842_header {
-   int blocks_nr; /* number of compressed blocks */
-   int offset; /* offset of the first block (from beginning of header) */
-   int sizes[0]; /* size of compressed blocks */
-};
-
-static inline int nx842_header_size(const struct nx842_header *hdr)
+static int check_constraints(unsigned long buf, unsigned int *len, bool in)
 {
-   return sizeof(struct nx842_header) +
-   hdr->blocks_nr * sizeof(hdr->sizes[0]);
+   if (!IS_ALIGNED(buf, nx842_pseries_constraints.alignment)) {
+   pr_debug("%s buffer 0x%lx not aligned to 0x%x\n",
+in ? "input" : "output", buf,
+nx842_pseries_constraints.alignment);
+   return -EINVAL;
+   }
+   if (*len % nx842_pseries_constraints.multiple) {
+   pr_debug("%s buffer len 0x%x not multiple of 0x%x\n",
+in ? "input" : "output", *len,
+nx842_pseries_constraints.multiple);
+   if (in)
+   return -EINVAL;
+   *len = round_down(*len, nx842_pseries_constraints.multiple);
+   }
+   if (*len < nx842_pseries_constraints.minimum) {
+   pr_debug("%s buffer len 0x%x under minimum 0x%x\n",
+in ? "input" : "output", *len,
+nx842_pseries_constraints.minimum);
+   return -EINVAL;
+   }
+   if (*len > nx842_pseries_constraints.maximum) {
+   pr_debug("%s buffer len 0x%x over maximum 0x%x\n",
+in ? "input" : "output", *len,
+nx842_pseries_constraints.maximum);
+   if (in)
+   return -EINVAL;
+   *len = nx842_pseries_constraints.maximum;
+   }
+   return 0;
 }
 
+/* I assume we need to align the CSB? */
+#define WORKMEM_ALIGN  (256)
+
+struct nx842_workmem {
+   /* scatterlist */
+   char slin[4096];
+   char slout[4096];
+   /* coprocessor status/parameter block */
+   struct nx_csbcpb csbcpb;
+
+   char padding[WORKMEM_ALIGN];
+} __aligned(WORKMEM_ALIGN);
+
 /* Macros for fields within nx_csbcpb */
 /* Check the valid bit within the csbcpb valid field */
 #define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
@@ -72,8 +100,7 @@ static inline int nx842_header_size(const struct 
nx842_header *hdr)
 #define NX842_CSBCPB_CE2(x)(x & BIT_MASK(5))
 
 /* The NX unit accepts data only on 4K page boundaries */
-#define NX842_HW_PAGE_SHIFTSHIFT_4K
-#define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT)
+#define NX842_HW_PAGE_SIZE (4096)
 #define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1))
 
 enum nx842_status {
@@ -194,41 +221,6 @@ static int nx842_build_scatterlist(unsigned long buf, int 
len,
return 0;
 }
 
-/*
- * Working memory for software decompression
- */
-struct sw842_fifo {
-   union {
-   char f8[256][8];
-   char f4[512][4];
-   };
-   char f2[256][2];
-   unsigned char f84_full;
-   unsigned

[PATCH 10/10] drivers/crypto/nx: add hardware 842 crypto comp alg

2015-05-06 Thread Dan Streetman
Add crypto compression alg for 842 hardware compression and decompression.

This crypto compression alg is named "nx842" to indicate it uses hardware
to perform the compression and decompression, while the software 842
compression alg is named "sw842".  However, since before this split there
was only one 842 compression alg named "842" which only used hardware,
this is also aliased "842" for backwards compatibility.

This uses only the PowerPC coprocessor hardware for 842 compression.  It
also uses the hardware for decompression, but if the hardware fails it will
fall back to the 842 software decompression library, so that decompression
never fails (for valid 842 compressed buffers).  A header must be used in
most cases, due to the hardware's restrictions on the buffers being
specifically aligned and sized.

Due to the header this driver adds, compressed buffers it creates cannot be
directly passed to the 842 software library for decompression.  However,
compressed buffers created by the software 842 library can be passed to
this driver for hardware 842 decompression (with the exception of buffers
containing the "short data" template, as lib/842/842.h explains).

Signed-off-by: Dan Streetman 
---
 drivers/crypto/nx/Kconfig |  10 +
 drivers/crypto/nx/Makefile|   2 +
 drivers/crypto/nx/nx-842-crypto.c | 603 ++
 3 files changed, 615 insertions(+)
 create mode 100644 drivers/crypto/nx/nx-842-crypto.c

diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig
index ee9e259..3e621ad 100644
--- a/drivers/crypto/nx/Kconfig
+++ b/drivers/crypto/nx/Kconfig
@@ -50,4 +50,14 @@ config CRYPTO_DEV_NX_COMPRESS_POWERNV
  algorithm.  This supports NX hardware on the PowerNV platform.
  If you choose 'M' here, this module will be called 
nx_compress_powernv.
 
+config CRYPTO_DEV_NX_COMPRESS_CRYPTO
+   tristate "Compression acceleration cryptographic interface"
+   select CRYPTO_ALGAPI
+   select 842_DECOMPRESS
+   default y
+   help
+ Support for PowerPC Nest (NX) accelerators using the cryptographic
+ API.  If you choose 'M' here, this module will be called
+ nx_compress_crypto.
+
 endif
diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile
index 6619787..868b5e6 100644
--- a/drivers/crypto/nx/Makefile
+++ b/drivers/crypto/nx/Makefile
@@ -13,6 +13,8 @@ nx-crypto-objs := nx.o \
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_CRYPTO) += nx-compress-crypto.o
 nx-compress-objs := nx-842.o
 nx-compress-pseries-objs := nx-842-pseries.o
 nx-compress-powernv-objs := nx-842-powernv.o
+nx-compress-crypto-objs := nx-842-crypto.o
diff --git a/drivers/crypto/nx/nx-842-crypto.c 
b/drivers/crypto/nx/nx-842-crypto.c
new file mode 100644
index 000..42d0da8
--- /dev/null
+++ b/drivers/crypto/nx/nx-842-crypto.c
@@ -0,0 +1,603 @@
+/*
+ * Cryptographic API for the NX-842 hardware compression.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) IBM Corporation, 2011-2015
+ *
+ * Original Authors: Robert Jennings 
+ *   Seth Jennings 
+ *
+ * Rewrite: Dan Streetman 
+ *
+ * This is an interface to the NX-842 compression hardware in PowerPC
+ * processors.  Most of the complexity of this drvier is due to the fact that
+ * the NX-842 compression hardware requires the input and output data buffers
+ * to be specifically aligned, to be a specific multiple in length, and within
+ * specific minimum and maximum lengths.  Those restrictions, provided by the
+ * nx-842 driver via nx842_constraints, mean this driver must use bounce
+ * buffers and headers to correct misaligned in or out buffers, and to split
+ * input buffers that are too large.
+ *
+ * This driver will fall back to software decompression if the hardware
+ * decompression fails, so this driver's decompression should never fail as
+ * long as the provided compressed buffer is valid.  Any compressed buffer
+ * created by this driver will have a header (except ones where the input
+ * perfectly matches the constraints); so users of this driver cannot simply
+ * pass a compressed buffer created by this driver over to the 842 software
+ * decompression library.  Instead, users must use this driver to decompress;
+ * if the hardware fails or is unavailable, the compressed buffer will 

Re: [PATCH v5 1/1] arm64: dts: Add the arasan sdhc nodes in apm-storm.dtsi.

2015-05-06 Thread Suman Tripathi
On Wed, May 6, 2015 at 6:11 PM, Michal Simek 
wrote:

> On 05/06/2015 10:40 AM, Arnd Bergmann wrote:
> > On Wednesday 06 May 2015 09:45:15 Michal Simek wrote:
> >> On 05/06/2015 09:31 AM, Arnd Bergmann wrote:
> >>> On Wednesday 06 May 2015 10:41:07 Suman Tripathi wrote:
> >> @@ -533,6 +567,16 @@
> >> interrupts = <0x0 0x4f 0x4>;
> >> };
> >>
> >> +   sdhc0: sdhc@1c00 {
> >> +   device_type = "sdhc";
> >
> > device_type generally should not be used (there are a few
> exceptions).
> 
>  Okay !!
> 
> >>>
> >>> While we're at it, please change sdhc@1c00 to mmc@1c00.
> >>> Even though Linux does not care, we try to use the standard device
> >>> names for consistency.
> >>
> >> Do we have a list of these names somewhere?
> >> Normally I do use ePARP - generic names recommendation but mmc or sdhci
> >> are not listed there.
> >> Both combination mmc@ or sdhci@ are used in the kernel.
> >>
> >> On zynq and zynqmp we do use shdci@.
> >>
> >
> > Ah, I thought ePAPR listed mmc already. Using "sdhci" is a little too
> > specific here, since a lot of mmc hosts are not sdhci compliant, and
> > "sdhc" is completely wrong, because that identifies a specific card
> > type, but a host that supports SDHC cards will generally also work
> > with SD (less than 4GB) or SDXC (more than 48GB) cards.
>
> Yes "sdhc" is completely wrong.
>

But spec name in search engine's gives  SDHC 3.0 as general.

>
> Based on our datasheet(also version used on Zynq and ZynqMP) this IP is
> compliant with SD HC 3.00, SDIO 3.0, SD MC 3.01 SD MCS 1.01, MMC 4.51.
> Not sure about the version which they use.
> Also not sure which spec the IP should have to be able to say that we
> can use sdhci name. Do you have exact SPEC name?
>

I also think sdhci because the binding is sdhci written by Arasan. Anyway I
will change to sdhci.

>
> Thanks,
> Michal
>
>


-- 
Thanks,
with regards,
Suman Tripathi
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc/dts: Add 1588 timer node for eTSEC

2015-05-06 Thread Scott Wood
On Wed, 2015-05-06 at 17:40 +0800, Yangbo Lu wrote:
> Add 1588 timer node in files:
> arch/powerpc/boot/dts/bsc9131rdb.dtsi
> arch/powerpc/boot/dts/bsc9132qds.dtsi
> arch/powerpc/boot/dts/p1010rdb.dtsi
> arch/powerpc/boot/dts/p1020rdb-pd.dts
> arch/powerpc/boot/dts/p1021rdb-pc.dtsi
> arch/powerpc/boot/dts/p1022ds.dtsi
> arch/powerpc/boot/dts/p1025twr.dtsi
> arch/powerpc/boot/dts/p2020rdb-pc.dtsi
> 
> Signed-off-by: Yangbo Lu 
> ---
>  arch/powerpc/boot/dts/bsc9131rdb.dtsi  | 12 
>  arch/powerpc/boot/dts/bsc9132qds.dtsi  | 12 
>  arch/powerpc/boot/dts/p1010rdb.dtsi| 12 
>  arch/powerpc/boot/dts/p1020rdb-pd.dts  | 12 
>  arch/powerpc/boot/dts/p1021rdb-pc.dtsi | 12 
>  arch/powerpc/boot/dts/p1022ds.dtsi | 12 
>  arch/powerpc/boot/dts/p1025twr.dtsi| 12 
>  arch/powerpc/boot/dts/p2020rdb-pc.dtsi | 15 +--
>  8 files changed, 93 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/boot/dts/bsc9131rdb.dtsi 
> b/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> index 45efcba..629cc03 100644
> --- a/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> +++ b/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> @@ -80,6 +80,18 @@
>   status = "disabled";
>   };
>  
> + ptp_clock@b0e00 {
> + compatible = "fsl,etsec-ptp";
> + reg = <0xb0e00 0xb0>;
> + interrupts = <68 2 0 0 69 2 0 0>;
> + fsl,tclk-period = <5>;
> + fsl,tmr-prsc= <2>;
> + fsl,tmr-add = <0xcccd>;
> + fsl,tmr-fiper1  = <0x3b9ac9fb>;
> + fsl,tmr-fiper2  = <0x00018696>;
> + fsl,max-adj = <24999>;

Please don't use hex for numbers that make more sense as decimal.

> --- a/arch/powerpc/boot/dts/p2020rdb-pc.dtsi
> +++ b/arch/powerpc/boot/dts/p2020rdb-pc.dtsi
> @@ -215,12 +215,15 @@
>   };
>  
>   ptp_clock@24e00 {
> - fsl,tclk-period = <5>;
> - fsl,tmr-prsc = <200>;
> - fsl,tmr-add = <0xCCCD>;
> - fsl,tmr-fiper1 = <0x3B9AC9FB>;
> - fsl,tmr-fiper2 = <0x0001869B>;
> - fsl,max-adj = <24999>;
> + compatible = "fsl,etsec-ptp";
> + reg = <0x24e00 0xb0>;
> + interrupts = <68 2 0 0 69 2 0 0>;
> + fsl,tclk-period = <5>;
> + fsl,tmr-prsc= <2>;
> + fsl,tmr-add = <0xaaab>;
> + fsl,tmr-fiper1  = <0x3b9ac9fb>;
> + fsl,tmr-fiper2  = <0x00018696>;
> + fsl,max-adj = <2>;
>   };

This isn't adding a node -- it's changing values.  If the old ones were
wrong, explain that in the changelog.

Also, p2020si-post.dtsi already adds interrupts to this node (and it
contains one more interrupt than the above), and it includes
pq3-etsec1-timer-0.dtsi which contains the compatible and reg (and
interrupts with two specifiers).  Probably all of these should be using
pq3-etsec1-timer-0.dtsi and only specifying the board-specific values.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v4] clk: qoriq: Add support for the FMan clock

2015-05-06 Thread Stephen Boyd
On 05/06, Scott Wood wrote:
> On Wed, 2015-05-06 at 00:02 -0700, Stephen Boyd wrote:
> > On 04/16, Igal.Liberman wrote:
> > > +static int get_fm_clk_idx(int fm_id, int *fm_clk_idx)
> > > +{
> > > + struct ccsr_guts __iomem *guts_regs = NULL;
> > 
> > Unnecessary initialization to NULL. Also, marking a structure as
> > __iomem is odd. Why do we need to use a struct to figure out
> > offsets for registers? Why not just use #defines? That would
> > probably also make it easy to avoid the asm include here.
> 
> Using a struct for registers is quite common:
> scott@snotra:~/fsl/git/linux/upstream$ git grep struct|grep __iomem|wc -l
> 3005

$ git grep -E 'struct \w+ __iomem' | wc -l
2212

That's slightly inflated, but ok.

Within drivers/clk there aren't any though, hence my apprehension

$ git grep -E 'struct \w+ __iomem' -- drivers/clk/ | wc -l
0

> 
> It provides type-safety, and makes accessing the registers more natural.

Sure, we can leave the struct as is, but to make this compile on
ARM we need to figure something out. Move the struct definition
into include/linux/platform_data/ perhaps?

> 
> > > + struct device_node *guts;
> > > + uint32_t reg = 0;
> > 
> > s/uint32_t/u32/
> 
> Why?

This matches the rest of the file except for one instance of
uint32_t. I googled it and found [1], perhaps that will help.

> 
> > Also unnecessary initialization.
> 
> Given the if/else if/else if/... nature of how reg is initialized, this
> seems like a useful and harmless way of making behavior predictable if
> there is a bug.
> 

If there's a possibility of a bug due to missed initialization
perhaps it's a sign the code is too complicated and should be
broken down into smaller functions. For example, this function
could be rewritten to have a match table with function pointers
that return the fm_clk_idx.

[1] http://lkml.iu.edu/hypermail/linux/kernel/1101.3/02176.html

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v4] clk: qoriq: Add support for the FMan clock

2015-05-06 Thread Scott Wood
On Wed, 2015-05-06 at 15:25 -0700, Stephen Boyd wrote:
> On 05/06, Scott Wood wrote:
> > On Wed, 2015-05-06 at 00:02 -0700, Stephen Boyd wrote:
> > > On 04/16, Igal.Liberman wrote:
> > > > +static int get_fm_clk_idx(int fm_id, int *fm_clk_idx)
> > > > +{
> > > > +   struct ccsr_guts __iomem *guts_regs = NULL;
> > > 
> > > Unnecessary initialization to NULL. Also, marking a structure as
> > > __iomem is odd. Why do we need to use a struct to figure out
> > > offsets for registers? Why not just use #defines? That would
> > > probably also make it easy to avoid the asm include here.
> > 
> > Using a struct for registers is quite common:
> > scott@snotra:~/fsl/git/linux/upstream$ git grep struct|grep __iomem|wc -l
> > 3005
> 
> $ git grep -E 'struct \w+ __iomem' | wc -l
> 2212
> 
> That's slightly inflated, but ok.
> 
> Within drivers/clk there aren't any though, hence my apprehension
> 
> $ git grep -E 'struct \w+ __iomem' -- drivers/clk/ | wc -l
> 0

I'm not sure why clk should be special.  Plus, this is a struct that's
been used by other parts of the kernel since before git history began,
rather than something defined specifically for drivers/clk.

> > It provides type-safety, and makes accessing the registers more natural.
> 
> Sure, we can leave the struct as is, but to make this compile on
> ARM we need to figure something out. Move the struct definition
> into include/linux/platform_data/ perhaps?

It's register definition rather than platform data, but yes, it should
go somewhere in include/linux.  Or I suppose we could put #ifdef
CONFIG_PPC around the fman stuff.

> > > Also unnecessary initialization.
> > 
> > Given the if/else if/else if/... nature of how reg is initialized, this
> > seems like a useful and harmless way of making behavior predictable if
> > there is a bug.
> > 
> 
> If there's a possibility of a bug due to missed initialization
> perhaps it's a sign the code is too complicated and should be
> broken down into smaller functions.

Well, there's always a possibility. :-)

Though rereading this function, reg is only used in the locations where
it's set -- not after the if/else stuff -- so I no longer think this is
a particularly high risk situation.  Plus, GCC's gotten pretty
aggressive about warning about such possibilities.

>  For example, this function could be rewritten to have a match table
> with function pointers that return the fm_clk_idx.

Yes, that'd be nice.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/6] powerpc: Add cpu name to dump stack arch description

2015-05-06 Thread Michael Neuling
On Tue, 2015-05-05 at 21:12 +1000, Michael Ellerman wrote:
> As soon as we know the name of the cpu we're on, add it to the dump
> stack arch description, which is printed in case of an oops.
> 
> Signed-off-by: Michael Ellerman 
> ---
>  arch/powerpc/kernel/cputable.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
> index 60262fdf35ba..cf5e0c9b80cb 100644
> --- a/arch/powerpc/kernel/cputable.c
> +++ b/arch/powerpc/kernel/cputable.c
> @@ -15,6 +15,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -2174,6 +2175,8 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned 
> long offset,
>   }
>  #endif /* CONFIG_PPC64 || CONFIG_BOOKE */
>  
> + dump_stack_add_arch_desc(t->cpu_name);
> +

Can we make this the PVR instead if the name?  It gives us more fidelity
on what the hardware revision is.

Mikey

>   return t;
>  }
>  

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc/dts: Add 1588 timer node for eTSEC

2015-05-06 Thread Scott Wood
On Wed, 2015-05-06 at 21:26 -0500, Lu Yangbo-B47093 wrote:
> Thanks.
> Pls see my comments below.
> 
> -Original Message-
> From: Wood Scott-B07421 
> Sent: Thursday, May 07, 2015 4:44 AM
> To: Lu Yangbo-B47093
> Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH] powerpc/dts: Add 1588 timer node for eTSEC
> 
> On Wed, 2015-05-06 at 17:40 +0800, Yangbo Lu wrote:
> > Add 1588 timer node in files:
> > arch/powerpc/boot/dts/bsc9131rdb.dtsi
> > arch/powerpc/boot/dts/bsc9132qds.dtsi
> > arch/powerpc/boot/dts/p1010rdb.dtsi
> > arch/powerpc/boot/dts/p1020rdb-pd.dts
> > arch/powerpc/boot/dts/p1021rdb-pc.dtsi
> > arch/powerpc/boot/dts/p1022ds.dtsi
> > arch/powerpc/boot/dts/p1025twr.dtsi
> > arch/powerpc/boot/dts/p2020rdb-pc.dtsi
> > 
> > Signed-off-by: Yangbo Lu 
> > ---
> >  arch/powerpc/boot/dts/bsc9131rdb.dtsi  | 12   
> > arch/powerpc/boot/dts/bsc9132qds.dtsi  | 12 
> >  arch/powerpc/boot/dts/p1010rdb.dtsi| 12 
> >  arch/powerpc/boot/dts/p1020rdb-pd.dts  | 12   
> > arch/powerpc/boot/dts/p1021rdb-pc.dtsi | 12 
> >  arch/powerpc/boot/dts/p1022ds.dtsi | 12 
> >  arch/powerpc/boot/dts/p1025twr.dtsi| 12 
> >  arch/powerpc/boot/dts/p2020rdb-pc.dtsi | 15 +--
> >  8 files changed, 93 insertions(+), 6 deletions(-)
> > 
> > diff --git a/arch/powerpc/boot/dts/bsc9131rdb.dtsi 
> > b/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> > index 45efcba..629cc03 100644
> > --- a/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> > +++ b/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> > @@ -80,6 +80,18 @@
> > status = "disabled";
> > };
> >  
> > +   ptp_clock@b0e00 {
> > +   compatible = "fsl,etsec-ptp";
> > +   reg = <0xb0e00 0xb0>;
> > +   interrupts = <68 2 0 0 69 2 0 0>;
> > +   fsl,tclk-period = <5>;
> > +   fsl,tmr-prsc= <2>;
> > +   fsl,tmr-add = <0xcccd>;
> > +   fsl,tmr-fiper1  = <0x3b9ac9fb>;
> > +   fsl,tmr-fiper2  = <0x00018696>;
> > +   fsl,max-adj = <24999>;
> 
> Please don't use hex for numbers that make more sense as decimal.
> [Lu Yangbo-B47093] The hex value is register value, I think it's better to 
> use hex.

Whether it goes into a register doesn't matter.  Hex values are useful
for values which are subdivided into various bitfields, or whose hex
representation is simpler than decimal.  I'm not familiar with the
details of this hardware, but I doubt the former is the case for
0x3b9ac9fb == 95 or 0x18696 == 0.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

RE: [PATCH] powerpc/dts: Add 1588 timer node for eTSEC

2015-05-06 Thread yangbo...@freescale.com


-Original Message-
From: Wood Scott-B07421 
Sent: Thursday, May 07, 2015 10:35 AM
To: Lu Yangbo-B47093
Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org
Subject: Re: [PATCH] powerpc/dts: Add 1588 timer node for eTSEC

On Wed, 2015-05-06 at 21:26 -0500, Lu Yangbo-B47093 wrote:
> Thanks.
> Pls see my comments below.
> 
> -Original Message-
> From: Wood Scott-B07421
> Sent: Thursday, May 07, 2015 4:44 AM
> To: Lu Yangbo-B47093
> Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org
> Subject: Re: [PATCH] powerpc/dts: Add 1588 timer node for eTSEC
> 
> On Wed, 2015-05-06 at 17:40 +0800, Yangbo Lu wrote:
> > Add 1588 timer node in files:
> > arch/powerpc/boot/dts/bsc9131rdb.dtsi
> > arch/powerpc/boot/dts/bsc9132qds.dtsi
> > arch/powerpc/boot/dts/p1010rdb.dtsi
> > arch/powerpc/boot/dts/p1020rdb-pd.dts
> > arch/powerpc/boot/dts/p1021rdb-pc.dtsi
> > arch/powerpc/boot/dts/p1022ds.dtsi
> > arch/powerpc/boot/dts/p1025twr.dtsi
> > arch/powerpc/boot/dts/p2020rdb-pc.dtsi
> > 
> > Signed-off-by: Yangbo Lu 
> > ---
> >  arch/powerpc/boot/dts/bsc9131rdb.dtsi  | 12  
> > arch/powerpc/boot/dts/bsc9132qds.dtsi  | 12 
> >  arch/powerpc/boot/dts/p1010rdb.dtsi| 12 
> >  arch/powerpc/boot/dts/p1020rdb-pd.dts  | 12  
> > arch/powerpc/boot/dts/p1021rdb-pc.dtsi | 12 
> >  arch/powerpc/boot/dts/p1022ds.dtsi | 12 
> >  arch/powerpc/boot/dts/p1025twr.dtsi| 12 
> >  arch/powerpc/boot/dts/p2020rdb-pc.dtsi | 15 +--
> >  8 files changed, 93 insertions(+), 6 deletions(-)
> > 
> > diff --git a/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> > b/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> > index 45efcba..629cc03 100644
> > --- a/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> > +++ b/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> > @@ -80,6 +80,18 @@
> > status = "disabled";
> > };
> >  
> > +   ptp_clock@b0e00 {
> > +   compatible = "fsl,etsec-ptp";
> > +   reg = <0xb0e00 0xb0>;
> > +   interrupts = <68 2 0 0 69 2 0 0>;
> > +   fsl,tclk-period = <5>;
> > +   fsl,tmr-prsc= <2>;
> > +   fsl,tmr-add = <0xcccd>;
> > +   fsl,tmr-fiper1  = <0x3b9ac9fb>;
> > +   fsl,tmr-fiper2  = <0x00018696>;
> > +   fsl,max-adj = <24999>;
> 
> Please don't use hex for numbers that make more sense as decimal.
> [Lu Yangbo-B47093] The hex value is register value, I think it's better to 
> use hex.

Whether it goes into a register doesn't matter.  Hex values are useful for 
values which are subdivided into various bitfields, or whose hex representation 
is simpler than decimal.  I'm not familiar with the details of this hardware, 
but I doubt the former is the case for 0x3b9ac9fb == 95 or 0x18696 == 
0.
[Lu Yangbo-B47093] Thanks Scott. I got it. The hex value here is not for 
various bitfields but a value calculated manually. I will modify to decimalism.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

RE: [PATCH] powerpc/dts: Add 1588 timer node for eTSEC

2015-05-06 Thread yangbo...@freescale.com
Thanks.
Pls see my comments below.

-Original Message-
From: Wood Scott-B07421 
Sent: Thursday, May 07, 2015 4:44 AM
To: Lu Yangbo-B47093
Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org
Subject: Re: [PATCH] powerpc/dts: Add 1588 timer node for eTSEC

On Wed, 2015-05-06 at 17:40 +0800, Yangbo Lu wrote:
> Add 1588 timer node in files:
> arch/powerpc/boot/dts/bsc9131rdb.dtsi
> arch/powerpc/boot/dts/bsc9132qds.dtsi
> arch/powerpc/boot/dts/p1010rdb.dtsi
> arch/powerpc/boot/dts/p1020rdb-pd.dts
> arch/powerpc/boot/dts/p1021rdb-pc.dtsi
> arch/powerpc/boot/dts/p1022ds.dtsi
> arch/powerpc/boot/dts/p1025twr.dtsi
> arch/powerpc/boot/dts/p2020rdb-pc.dtsi
> 
> Signed-off-by: Yangbo Lu 
> ---
>  arch/powerpc/boot/dts/bsc9131rdb.dtsi  | 12   
> arch/powerpc/boot/dts/bsc9132qds.dtsi  | 12 
>  arch/powerpc/boot/dts/p1010rdb.dtsi| 12 
>  arch/powerpc/boot/dts/p1020rdb-pd.dts  | 12   
> arch/powerpc/boot/dts/p1021rdb-pc.dtsi | 12 
>  arch/powerpc/boot/dts/p1022ds.dtsi | 12 
>  arch/powerpc/boot/dts/p1025twr.dtsi| 12 
>  arch/powerpc/boot/dts/p2020rdb-pc.dtsi | 15 +--
>  8 files changed, 93 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/boot/dts/bsc9131rdb.dtsi 
> b/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> index 45efcba..629cc03 100644
> --- a/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> +++ b/arch/powerpc/boot/dts/bsc9131rdb.dtsi
> @@ -80,6 +80,18 @@
>   status = "disabled";
>   };
>  
> + ptp_clock@b0e00 {
> + compatible = "fsl,etsec-ptp";
> + reg = <0xb0e00 0xb0>;
> + interrupts = <68 2 0 0 69 2 0 0>;
> + fsl,tclk-period = <5>;
> + fsl,tmr-prsc= <2>;
> + fsl,tmr-add = <0xcccd>;
> + fsl,tmr-fiper1  = <0x3b9ac9fb>;
> + fsl,tmr-fiper2  = <0x00018696>;
> + fsl,max-adj = <24999>;

Please don't use hex for numbers that make more sense as decimal.
[Lu Yangbo-B47093] The hex value is register value, I think it's better to use 
hex.

> --- a/arch/powerpc/boot/dts/p2020rdb-pc.dtsi
> +++ b/arch/powerpc/boot/dts/p2020rdb-pc.dtsi
> @@ -215,12 +215,15 @@
>   };
>  
>   ptp_clock@24e00 {
> - fsl,tclk-period = <5>;
> - fsl,tmr-prsc = <200>;
> - fsl,tmr-add = <0xCCCD>;
> - fsl,tmr-fiper1 = <0x3B9AC9FB>;
> - fsl,tmr-fiper2 = <0x0001869B>;
> - fsl,max-adj = <24999>;
> + compatible = "fsl,etsec-ptp";
> + reg = <0x24e00 0xb0>;
> + interrupts = <68 2 0 0 69 2 0 0>;
> + fsl,tclk-period = <5>;
> + fsl,tmr-prsc= <2>;
> + fsl,tmr-add = <0xaaab>;
> + fsl,tmr-fiper1  = <0x3b9ac9fb>;
> + fsl,tmr-fiper2  = <0x00018696>;
> + fsl,max-adj = <2>;
>   };

This isn't adding a node -- it's changing values.  If the old ones were wrong, 
explain that in the changelog.

Also, p2020si-post.dtsi already adds interrupts to this node (and it contains 
one more interrupt than the above), and it includes pq3-etsec1-timer-0.dtsi 
which contains the compatible and reg (and interrupts with two specifiers).  
Probably all of these should be using pq3-etsec1-timer-0.dtsi and only 
specifying the board-specific values.

[Lu Yangbo-B47093] I will modify according your comments here.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 04/10] crypto: change 842 alg to use software

2015-05-06 Thread Herbert Xu
On Wed, May 06, 2015 at 12:51:00PM -0400, Dan Streetman wrote:
> Change the crypto 842 compression alg to use the software 842 compression
> and decompression library.  Change the name of this crypto alg to "sw842".
> Remove the fallback to LZO compression.

That's not how the name works.  All implementations of 842 should
bear that name.  They should differentiate themselves based on
cra_driver_name.  For example, we generally call the software
implementation of foo "foo-generic".

Cheers,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 10/10] drivers/crypto/nx: add hardware 842 crypto comp alg

2015-05-06 Thread Herbert Xu
On Wed, May 06, 2015 at 12:51:06PM -0400, Dan Streetman wrote:
> Add crypto compression alg for 842 hardware compression and decompression.
> 
> This crypto compression alg is named "nx842" to indicate it uses hardware
> to perform the compression and decompression, while the software 842
> compression alg is named "sw842".  However, since before this split there
> was only one 842 compression alg named "842" which only used hardware,
> this is also aliased "842" for backwards compatibility.

This should still be called 842.  You can set the driver name to
nx842 or 842-nx.

Cheers,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 4/8] powernv/eeh: Update the EEH code to use the opal irq domain

2015-05-06 Thread Alistair Popple
The eeh code currently uses the old notifier method to get eeh events
from OPAL. It also contains some logic to filter opal events which has
been moved into the virtual irqchip. This patch converts the eeh code
to the new event interface which simplifies event handling.

Signed-off-by: Alistair Popple 
---
 arch/powerpc/platforms/powernv/eeh-powernv.c | 58 +++-
 1 file changed, 31 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
b/arch/powerpc/platforms/powernv/eeh-powernv.c
index ce738ab..ca825ec 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -40,6 +41,7 @@
 #include "pci.h"
 
 static bool pnv_eeh_nb_init = false;
+static int eeh_event_irq = -EINVAL;
 
 /**
  * pnv_eeh_init - EEH platform dependent initialization
@@ -88,34 +90,22 @@ static int pnv_eeh_init(void)
return 0;
 }
 
-static int pnv_eeh_event(struct notifier_block *nb,
-unsigned long events, void *change)
+static irqreturn_t pnv_eeh_event(int irq, void *data)
 {
-   uint64_t changed_evts = (uint64_t)change;
-
/*
-* We simply send special EEH event if EEH has
-* been enabled, or clear pending events in
-* case that we enable EEH soon
+* We simply send a special EEH event if EEH has been
+* enabled. We don't care about EEH events until we've
+* finished processing the outstanding ones. Event processing
+* gets unmasked in next_error() if EEH is enabled.
 */
-   if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
-   !(events & OPAL_EVENT_PCI_ERROR))
-   return 0;
+   disable_irq_nosync(irq);
 
if (eeh_enabled())
eeh_send_failure_event(NULL);
-   else
-   opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
 
-   return 0;
+   return IRQ_HANDLED;
 }
 
-static struct notifier_block pnv_eeh_nb = {
-   .notifier_call  = pnv_eeh_event,
-   .next   = NULL,
-   .priority   = 0
-};
-
 #ifdef CONFIG_DEBUG_FS
 static ssize_t pnv_eeh_ei_write(struct file *filp,
const char __user *user_buf,
@@ -237,16 +227,28 @@ static int pnv_eeh_post_init(void)
 
/* Register OPAL event notifier */
if (!pnv_eeh_nb_init) {
-   ret = opal_notifier_register(&pnv_eeh_nb);
-   if (ret) {
-   pr_warn("%s: Can't register OPAL event notifier (%d)\n",
-   __func__, ret);
+   eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
+   if (eeh_event_irq < 0) {
+   pr_err("%s: Can't register OPAL event interrupt (%d)\n",
+  __func__, eeh_event_irq);
+   return eeh_event_irq;
+   }
+
+   ret = request_irq(eeh_event_irq, pnv_eeh_event,
+   IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
+   if (ret < 0) {
+   irq_dispose_mapping(eeh_event_irq);
+   pr_err("%s: Can't request OPAL event interrupt (%d)\n",
+  __func__, eeh_event_irq);
return ret;
}
 
pnv_eeh_nb_init = true;
}
 
+   if (!eeh_enabled())
+   disable_irq(eeh_event_irq);
+
list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
 
@@ -1303,12 +1305,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
int state, ret = EEH_NEXT_ERR_NONE;
 
/*
-* While running here, it's safe to purge the event queue.
-* And we should keep the cached OPAL notifier event sychronized
-* between the kernel and firmware.
+* While running here, it's safe to purge the event queue. The
+* event should still be masked.
 */
eeh_remove_event(NULL, false);
-   opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
 
list_for_each_entry(hose, &hose_list, list_node) {
/*
@@ -1477,6 +1477,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
break;
}
 
+   /* Unmask the event */
+   if (eeh_enabled())
+   enable_irq(eeh_event_irq);
+
return ret;
 }
 
-- 
1.8.3.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 2/8] ipmi/powernv: Convert to irq event interface

2015-05-06 Thread Alistair Popple
Convert the opal ipmi driver to use the new irq interface for events.

Signed-off-by: Alistair Popple 
Cc: Corey Minyard 
Cc: openipmi-develo...@lists.sourceforge.net
---

Corey,

If this looks ok can you please ack it? Michael Ellerman will then take
the whole series via the powerpc tree. Thanks.

 drivers/char/ipmi/ipmi_powernv.c | 39 ++-
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_powernv.c b/drivers/char/ipmi/ipmi_powernv.c
index 8753b0f..9b409c0 100644
--- a/drivers/char/ipmi/ipmi_powernv.c
+++ b/drivers/char/ipmi/ipmi_powernv.c
@@ -15,6 +15,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 #include 

@@ -23,8 +25,7 @@ struct ipmi_smi_powernv {
u64 interface_id;
struct ipmi_device_id   ipmi_id;
ipmi_smi_t  intf;
-   u64 event;
-   struct notifier_block   event_nb;
+   unsigned intirq;

/**
 * We assume that there can only be one outstanding request, so
@@ -197,15 +198,12 @@ static struct ipmi_smi_handlers ipmi_powernv_smi_handlers 
= {
.poll   = ipmi_powernv_poll,
 };

-static int ipmi_opal_event(struct notifier_block *nb,
- unsigned long events, void *change)
+static irqreturn_t ipmi_opal_event(int irq, void *data)
 {
-   struct ipmi_smi_powernv *smi = container_of(nb,
-   struct ipmi_smi_powernv, event_nb);
+   struct ipmi_smi_powernv *smi = data;

-   if (events & smi->event)
-   ipmi_powernv_recv(smi);
-   return 0;
+   ipmi_powernv_recv(smi);
+   return IRQ_HANDLED;
 }

 static int ipmi_powernv_probe(struct platform_device *pdev)
@@ -240,13 +238,16 @@ static int ipmi_powernv_probe(struct platform_device 
*pdev)
goto err_free;
}

-   ipmi->event = 1ull << prop;
-   ipmi->event_nb.notifier_call = ipmi_opal_event;
+   ipmi->irq = irq_of_parse_and_map(dev->of_node, 0);
+   if (!ipmi->irq) {
+   dev_info(dev, "Unable to map irq from device tree\n");
+   ipmi->irq = opal_event_request(prop);
+   }

-   rc = opal_notifier_register(&ipmi->event_nb);
-   if (rc) {
-   dev_warn(dev, "OPAL notifier registration failed (%d)\n", rc);
-   goto err_free;
+   if (request_irq(ipmi->irq, ipmi_opal_event, IRQ_TYPE_LEVEL_HIGH,
+   "opal-ipmi", ipmi)) {
+   dev_warn(dev, "Unable to request irq\n");
+   goto err_dispose;
}

ipmi->opal_msg = devm_kmalloc(dev,
@@ -271,7 +272,9 @@ static int ipmi_powernv_probe(struct platform_device *pdev)
 err_free_msg:
devm_kfree(dev, ipmi->opal_msg);
 err_unregister:
-   opal_notifier_unregister(&ipmi->event_nb);
+   free_irq(ipmi->irq, ipmi);
+err_dispose:
+   irq_dispose_mapping(ipmi->irq);
 err_free:
devm_kfree(dev, ipmi);
return rc;
@@ -282,7 +285,9 @@ static int ipmi_powernv_remove(struct platform_device *pdev)
struct ipmi_smi_powernv *smi = dev_get_drvdata(&pdev->dev);

ipmi_unregister_smi(smi->intf);
-   opal_notifier_unregister(&smi->event_nb);
+   free_irq(smi->irq, smi);
+   irq_dispose_mapping(smi->irq);
+
return 0;
 }

--
1.8.3.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 3/8] hvc: Convert to using interrupts instead of opal events

2015-05-06 Thread Alistair Popple
Convert the opal hvc driver to use the new irqchip to register for
opal events. As older firmware versions may not have device tree
bindings for the interrupt parent we just use a hardcoded hwirq based
on the event number.

Signed-off-by: Alistair Popple 
---
 drivers/tty/hvc/hvc_opal.c | 33 ++---
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c
index 543b234..47b54c6 100644
--- a/drivers/tty/hvc/hvc_opal.c
+++ b/drivers/tty/hvc/hvc_opal.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -61,7 +62,6 @@ static struct hvc_opal_priv 
*hvc_opal_privs[MAX_NR_HVC_CONSOLES];
 /* For early boot console */
 static struct hvc_opal_priv hvc_opal_boot_priv;
 static u32 hvc_opal_boot_termno;
-static bool hvc_opal_event_registered;
 
 static const struct hv_ops hvc_opal_raw_ops = {
.get_chars = opal_get_chars,
@@ -162,28 +162,15 @@ static const struct hv_ops hvc_opal_hvsi_ops = {
.tiocmset = hvc_opal_hvsi_tiocmset,
 };
 
-static int hvc_opal_console_event(struct notifier_block *nb,
- unsigned long events, void *change)
-{
-   if (events & OPAL_EVENT_CONSOLE_INPUT)
-   hvc_kick();
-   return 0;
-}
-
-static struct notifier_block hvc_opal_console_nb = {
-   .notifier_call  = hvc_opal_console_event,
-};
-
 static int hvc_opal_probe(struct platform_device *dev)
 {
const struct hv_ops *ops;
struct hvc_struct *hp;
struct hvc_opal_priv *pv;
hv_protocol_t proto;
-   unsigned int termno, boot = 0;
+   unsigned int termno, irq, boot = 0;
const __be32 *reg;
 
-
if (of_device_is_compatible(dev->dev.of_node, "ibm,opal-console-raw")) {
proto = HV_PROTOCOL_RAW;
ops = &hvc_opal_raw_ops;
@@ -227,18 +214,18 @@ static int hvc_opal_probe(struct platform_device *dev)
dev->dev.of_node->full_name,
boot ? " (boot console)" : "");
 
-   /* We don't do IRQ ... */
-   hp = hvc_alloc(termno, 0, ops, MAX_VIO_PUT_CHARS);
+   irq = opal_event_request(ilog2(OPAL_EVENT_CONSOLE_INPUT));
+   if (!irq) {
+   pr_err("hvc_opal: Unable to map interrupt for device %s\n",
+   dev->dev.of_node->full_name);
+   return irq;
+   }
+
+   hp = hvc_alloc(termno, irq, ops, MAX_VIO_PUT_CHARS);
if (IS_ERR(hp))
return PTR_ERR(hp);
dev_set_drvdata(&dev->dev, hp);
 
-   /* ...  but we use OPAL event to kick the console */
-   if (!hvc_opal_event_registered) {
-   opal_notifier_register(&hvc_opal_console_nb);
-   hvc_opal_event_registered = true;
-   }
-
return 0;
 }
 
-- 
1.8.3.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 1/8] powerpc/powernv: Add a virtual irqchip for opal events

2015-05-06 Thread Alistair Popple
Whenever an interrupt is received for opal the linux kernel gets a
bitfield indicating certain events that have occurred and need handling
by the various device drivers. Currently this is handled using a
notifier interface where we call every device driver that has
registered to receive opal events.

This approach has several drawbacks. For example each driver has to do
its own checking to see if the event is relevant as well as event
masking. There is also no easy method of recording the number of times
we receive particular events.

This patch solves these issues by exposing opal events via the
standard interrupt APIs by adding a new interrupt chip and
domain. Drivers can then register for the appropriate events using
standard kernel calls such as irq_of_parse_and_map().

Signed-off-by: Alistair Popple 
---

Changes from v2:
 - Addressed comments by Neelesh Gupta
 - Fixed soft-lockup bug reported by Neelesh in the opal-dump driver
 - Rebased on v4.1-rc1

 arch/powerpc/include/asm/opal.h   |   2 +
 arch/powerpc/platforms/powernv/Makefile   |   2 +-
 arch/powerpc/platforms/powernv/opal-irqchip.c | 248 ++
 arch/powerpc/platforms/powernv/opal.c |  70 +---
 arch/powerpc/platforms/powernv/powernv.h  |   4 +
 5 files changed, 260 insertions(+), 66 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/opal-irqchip.c

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 042af1a..9ffd113 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -250,6 +250,8 @@ extern int opal_resync_timebase(void);

 extern void opal_lpc_init(void);

+extern int opal_event_request(unsigned int opal_event_nr);
+
 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
 unsigned long vmalloc_size);
 void opal_free_sg_list(struct opal_sg_list *sg);
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index 33e44f3..f1d7de2 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -1,7 +1,7 @@
 obj-y  += setup.o opal-wrappers.o opal.o opal-async.o
 obj-y  += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y  += rng.o opal-elog.o opal-dump.o opal-sysparam.o 
opal-sensor.o
-obj-y  += opal-msglog.o opal-hmi.o opal-power.o
+obj-y  += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o

 obj-$(CONFIG_SMP)  += smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)  += pci.o pci-p5ioc2.o pci-ioda.o
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c 
b/arch/powerpc/platforms/powernv/opal-irqchip.c
new file mode 100644
index 000..4b6f951
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -0,0 +1,248 @@
+/*
+ * This file implements an irqchip for OPAL events. Whenever there is
+ * an interrupt that is handled by OPAL we get passed a list of events
+ * that Linux needs to do something about. These basically look like
+ * interrupts to Linux so we implement an irqchip to handle them.
+ *
+ * Copyright Alistair Popple, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include "powernv.h"
+
+/* Maximum number of events supported by OPAL firmware */
+#define MAX_NUM_EVENTS 64
+
+struct opal_event_irqchip {
+   struct irq_chip irqchip;
+   struct irq_domain *domain;
+   unsigned long mask;
+};
+static struct opal_event_irqchip opal_event_irqchip;
+
+static unsigned int opal_irq_count;
+static unsigned int *opal_irqs;
+
+static void opal_handle_irq_work(struct irq_work *work);
+static __be64 last_outstanding_events;
+static struct irq_work opal_event_irq_work = {
+   .func = opal_handle_irq_work,
+};
+
+static void opal_event_mask(struct irq_data *d)
+{
+   clear_bit(d->hwirq, &opal_event_irqchip.mask);
+}
+
+static void opal_event_unmask(struct irq_data *d)
+{
+   set_bit(d->hwirq, &opal_event_irqchip.mask);
+
+   opal_poll_events(&last_outstanding_events);
+   if (last_outstanding_events & opal_event_irqchip.mask)
+   /* Need to retrigger the interrupt */
+   irq_work_queue(&opal_event_irq_work);
+}
+
+static int opal_event_set_type(struct irq_data *d, unsigned int flow_type)
+{
+   /*
+* For now we only support level triggered events. The irq
+* handler will be called continuously until the event has
+* been cleared in OPAL.
+*/
+   if (flow_type != IRQ_TYPE_LEVEL_HIGH)
+  

[PATCH v3 5/8] powernv/opal: Convert opal message events to opal irq domain

2015-05-06 Thread Alistair Popple
This patch converts the opal message event to use the new opal irq
domain.

Signed-off-by: Alistair Popple 
---
 arch/powerpc/platforms/powernv/opal.c | 29 +++--
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 4399ff2..0196220 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -362,33 +362,34 @@ static void opal_handle_message(void)
opal_message_do_notify(type, (void *)&msg);
 }
 
-static int opal_message_notify(struct notifier_block *nb,
- unsigned long events, void *change)
+static irqreturn_t opal_message_notify(int irq, void *data)
 {
-   if (events & OPAL_EVENT_MSG_PENDING)
-   opal_handle_message();
-   return 0;
+   opal_handle_message();
+   return IRQ_HANDLED;
 }
 
-static struct notifier_block opal_message_nb = {
-   .notifier_call  = opal_message_notify,
-   .next   = NULL,
-   .priority   = 0,
-};
-
 static int __init opal_message_init(void)
 {
-   int ret, i;
+   int ret, i, irq;
 
for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
 
-   ret = opal_notifier_register(&opal_message_nb);
+   irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING));
+   if (!irq) {
+   pr_err("%s: Can't register OPAL event irq (%d)\n",
+  __func__, irq);
+   return irq;
+   }
+
+   ret = request_irq(irq, opal_message_notify,
+   IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL);
if (ret) {
-   pr_err("%s: Can't register OPAL event notifier (%d)\n",
+   pr_err("%s: Can't request OPAL event irq (%d)\n",
   __func__, ret);
return ret;
}
+
return 0;
 }
 machine_early_initcall(powernv, opal_message_init);
-- 
1.8.3.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 7/8] powernv/opal-dump: Convert to irq domain

2015-05-06 Thread Alistair Popple
Convert the opal dump driver to the new opal irq domain.

Signed-off-by: Alistair Popple 
---
 arch/powerpc/platforms/powernv/opal-dump.c | 56 +-
 1 file changed, 17 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal-dump.c 
b/arch/powerpc/platforms/powernv/opal-dump.c
index 5aa9c1c..2ee9643 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -60,7 +61,7 @@ static ssize_t dump_type_show(struct dump_obj *dump_obj,
  struct dump_attribute *attr,
  char *buf)
 {
-   
+
return sprintf(buf, "0x%x %s\n", dump_obj->type,
   dump_type_to_string(dump_obj->type));
 }
@@ -363,7 +364,7 @@ static struct dump_obj *create_dump_obj(uint32_t id, size_t 
size,
return dump;
 }
 
-static int process_dump(void)
+static irqreturn_t process_dump(int irq, void *data)
 {
int rc;
uint32_t dump_id, dump_size, dump_type;
@@ -387,45 +388,13 @@ static int process_dump(void)
if (!dump)
return -1;
 
-   return 0;
-}
-
-static void dump_work_fn(struct work_struct *work)
-{
-   process_dump();
+   return IRQ_HANDLED;
 }
 
-static DECLARE_WORK(dump_work, dump_work_fn);
-
-static void schedule_process_dump(void)
-{
-   schedule_work(&dump_work);
-}
-
-/*
- * New dump available notification
- *
- * Once we get notification, we add sysfs entries for it.
- * We only fetch the dump on demand, and create sysfs asynchronously.
- */
-static int dump_event(struct notifier_block *nb,
- unsigned long events, void *change)
-{
-   if (events & OPAL_EVENT_DUMP_AVAIL)
-   schedule_process_dump();
-
-   return 0;
-}
-
-static struct notifier_block dump_nb = {
-   .notifier_call  = dump_event,
-   .next   = NULL,
-   .priority   = 0
-};
-
 void __init opal_platform_dump_init(void)
 {
int rc;
+   int dump_irq;
 
/* ELOG not supported by firmware */
if (!opal_check_token(OPAL_DUMP_READ))
@@ -445,10 +414,19 @@ void __init opal_platform_dump_init(void)
return;
}
 
-   rc = opal_notifier_register(&dump_nb);
+   dump_irq = opal_event_request(ilog2(OPAL_EVENT_DUMP_AVAIL));
+   if (!dump_irq) {
+   pr_err("%s: Can't register OPAL event irq (%d)\n",
+  __func__, dump_irq);
+   return;
+   }
+
+   rc = request_threaded_irq(dump_irq, NULL, process_dump,
+   IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+   "opal-dump", NULL);
if (rc) {
-   pr_warn("%s: Can't register OPAL event notifier (%d)\n",
-   __func__, rc);
+   pr_err("%s: Can't request OPAL event irq (%d)\n",
+  __func__, rc);
return;
}
 
-- 
1.8.3.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 6/8] powernv/elog: Convert elog to opal irq domain

2015-05-06 Thread Alistair Popple
This patch converts the elog code to use the opal irq domain instead
of notifier events.

Signed-off-by: Alistair Popple 
---
 arch/powerpc/platforms/powernv/opal-elog.c | 32 +++---
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal-elog.c 
b/arch/powerpc/platforms/powernv/opal-elog.c
index 38ce757..4949ef0 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -10,6 +10,7 @@
  */
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -276,24 +277,15 @@ static void elog_work_fn(struct work_struct *work)
 
 static DECLARE_WORK(elog_work, elog_work_fn);
 
-static int elog_event(struct notifier_block *nb,
-   unsigned long events, void *change)
+static irqreturn_t elog_event(int irq, void *data)
 {
-   /* check for error log event */
-   if (events & OPAL_EVENT_ERROR_LOG_AVAIL)
-   schedule_work(&elog_work);
-   return 0;
+   schedule_work(&elog_work);
+   return IRQ_HANDLED;
 }
 
-static struct notifier_block elog_nb = {
-   .notifier_call  = elog_event,
-   .next   = NULL,
-   .priority   = 0
-};
-
 int __init opal_elog_init(void)
 {
-   int rc = 0;
+   int rc = 0, irq;
 
/* ELOG not supported by firmware */
if (!opal_check_token(OPAL_ELOG_READ))
@@ -305,10 +297,18 @@ int __init opal_elog_init(void)
return -1;
}
 
-   rc = opal_notifier_register(&elog_nb);
+   irq = opal_event_request(ilog2(OPAL_EVENT_ERROR_LOG_AVAIL));
+   if (!irq) {
+   pr_err("%s: Can't register OPAL event irq (%d)\n",
+  __func__, irq);
+   return irq;
+   }
+
+   rc = request_irq(irq, elog_event,
+   IRQ_TYPE_LEVEL_HIGH, "opal-elog", NULL);
if (rc) {
-   pr_err("%s: Can't register OPAL event notifier (%d)\n",
-   __func__, rc);
+   pr_err("%s: Can't request OPAL event irq (%d)\n",
+  __func__, rc);
return rc;
}
 
-- 
1.8.3.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 8/8] opal: Remove events notifier

2015-05-06 Thread Alistair Popple
All users of the old opal events notifier have been converted over to
the irq domain so remove the event notifier functions.

Signed-off-by: Alistair Popple 
---
 arch/powerpc/platforms/powernv/opal-irqchip.c | 16 ++---
 arch/powerpc/platforms/powernv/opal.c | 84 +--
 arch/powerpc/platforms/powernv/powernv.h  |  1 -
 arch/powerpc/platforms/powernv/setup.c|  2 +-
 4 files changed, 8 insertions(+), 95 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c 
b/arch/powerpc/platforms/powernv/opal-irqchip.c
index 4b6f951..8c17d81 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -100,21 +100,17 @@ void opal_handle_events(uint64_t events)
 {
int virq, hwirq = 0;
u64 mask = opal_event_irqchip.mask;
-   u64 notifier_mask = 0;
 
-   while (events) {
+   while (events & mask) {
hwirq = fls64(events) - 1;
-   virq = irq_find_mapping(opal_event_irqchip.domain,
-   hwirq);
-   if (virq) {
-   if (BIT_ULL(hwirq) & mask)
+   if (BIT_ULL(hwirq) & mask) {
+   virq = irq_find_mapping(opal_event_irqchip.domain,
+   hwirq);
+   if (virq)
generic_handle_irq(virq);
-   } else
-   notifier_mask |= BIT_ULL(hwirq);
+   }
events &= ~BIT_ULL(hwirq);
}
-
-   opal_do_notifier(notifier_mask);
 }
 
 static irqreturn_t opal_interrupt(int irq, void *data)
diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 0196220..a5e48cd 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -53,11 +53,7 @@ static int mc_recoverable_range_len;
 
 struct device_node *opal_node;
 static DEFINE_SPINLOCK(opal_write_lock);
-static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
-static DEFINE_SPINLOCK(opal_notifier_lock);
-static uint64_t last_notified_mask = 0x0ul;
-static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
 static uint32_t opal_heartbeat;
 
 static void opal_reinit_cores(void)
@@ -223,82 +219,6 @@ static int __init opal_register_exception_handlers(void)
 }
 machine_early_initcall(powernv, opal_register_exception_handlers);
 
-int opal_notifier_register(struct notifier_block *nb)
-{
-   if (!nb) {
-   pr_warning("%s: Invalid argument (%p)\n",
-  __func__, nb);
-   return -EINVAL;
-   }
-
-   atomic_notifier_chain_register(&opal_notifier_head, nb);
-   return 0;
-}
-EXPORT_SYMBOL_GPL(opal_notifier_register);
-
-int opal_notifier_unregister(struct notifier_block *nb)
-{
-   if (!nb) {
-   pr_warning("%s: Invalid argument (%p)\n",
-  __func__, nb);
-   return -EINVAL;
-   }
-
-   atomic_notifier_chain_unregister(&opal_notifier_head, nb);
-   return 0;
-}
-EXPORT_SYMBOL_GPL(opal_notifier_unregister);
-
-void opal_do_notifier(uint64_t events)
-{
-   unsigned long flags;
-   uint64_t changed_mask;
-
-   if (atomic_read(&opal_notifier_hold))
-   return;
-
-   spin_lock_irqsave(&opal_notifier_lock, flags);
-   changed_mask = last_notified_mask ^ events;
-   last_notified_mask = events;
-   spin_unlock_irqrestore(&opal_notifier_lock, flags);
-
-   /*
-* We feed with the event bits and changed bits for
-* enough information to the callback.
-*/
-   atomic_notifier_call_chain(&opal_notifier_head,
-  events, (void *)changed_mask);
-}
-
-void opal_notifier_update_evt(uint64_t evt_mask,
- uint64_t evt_val)
-{
-   unsigned long flags;
-
-   spin_lock_irqsave(&opal_notifier_lock, flags);
-   last_notified_mask &= ~evt_mask;
-   last_notified_mask |= evt_val;
-   spin_unlock_irqrestore(&opal_notifier_lock, flags);
-}
-
-void opal_notifier_enable(void)
-{
-   int64_t rc;
-   __be64 evt = 0;
-
-   atomic_set(&opal_notifier_hold, 0);
-
-   /* Process pending events */
-   rc = opal_poll_events(&evt);
-   if (rc == OPAL_SUCCESS && evt)
-   opal_do_notifier(be64_to_cpu(evt));
-}
-
-void opal_notifier_disable(void)
-{
-   atomic_set(&opal_notifier_hold, 1);
-}
-
 /*
  * Opal message notifier based on message type. Allow subscribers to get
  * notified for specific messgae type.
@@ -571,10 +491,8 @@ int opal_handle_hmi_exception(struct pt_regs *regs)
 
local_paca->hmi_event_available = 0;
rc = opal_poll_events(&evt);
-   if (rc == OPAL_SUCCESS && evt) {
-   opal_do_notifier(be64_to_cpu(evt));
+   if (rc == OPAL_SUCCESS &&

Re: [PATCH 1/1] powerpc: mpc85xx: Add board support for ucp1020

2015-05-06 Thread Scott Wood
On Tue, 2015-05-05 at 11:52 -0400, Oleksandr G Zhadan wrote:
> New QorIQ p1020 based board support from Arcturus Networks Inc.
> http://www.arcturusnetworks.com/products/ucp1020/
> 
> Signed-off-by: Michael Durrant 
> Signed-off-by: Oleksandr G Zhadan 
> ---
>  Documentation/devicetree/bindings/pci/fsl,pci.txt  |2 +-
>  .../devicetree/bindings/powerpc/arcturus/board.txt |  149 ++
>  .../devicetree/bindings/powerpc/arcturus/ecm.txt   |   64 +
>  Documentation/devicetree/bindings/usb/fsl-usb.txt  |2 +-
>  .../devicetree/bindings/vendor-prefixes.txt|1 +
>  arch/powerpc/boot/dts/fsl/ucp1020som-post.dtsi |  179 ++
>  arch/powerpc/boot/dts/fsl/ucp1020som-pre.dtsi  |   70 +
>  arch/powerpc/boot/dts/ucp1020_32b.dts  |   88 +
>  arch/powerpc/boot/dts/ucp1020_32b.dtsi |  174 ++
>  arch/powerpc/configs/ucp1020_defconfig | 2731 
> 
>  arch/powerpc/platforms/85xx/Kconfig|7 +
>  arch/powerpc/platforms/85xx/Makefile   |1 +
>  arch/powerpc/platforms/85xx/ucp1020_som.c  |  100 +
>  13 files changed, 3566 insertions(+), 2 deletions(-)
>  create mode 100644 
> Documentation/devicetree/bindings/powerpc/arcturus/board.txt
>  create mode 100644 Documentation/devicetree/bindings/powerpc/arcturus/ecm.txt
>  create mode 100644 arch/powerpc/boot/dts/fsl/ucp1020som-post.dtsi
>  create mode 100644 arch/powerpc/boot/dts/fsl/ucp1020som-pre.dtsi
>  create mode 100644 arch/powerpc/boot/dts/ucp1020_32b.dts
>  create mode 100644 arch/powerpc/boot/dts/ucp1020_32b.dtsi
>  create mode 100644 arch/powerpc/configs/ucp1020_defconfig
>  create mode 100644 arch/powerpc/platforms/85xx/ucp1020_som.c
> 
> diff --git a/Documentation/devicetree/bindings/pci/fsl,pci.txt 
> b/Documentation/devicetree/bindings/pci/fsl,pci.txt
> index d8ac4a7..298a5e6 100644
> --- a/Documentation/devicetree/bindings/pci/fsl,pci.txt
> +++ b/Documentation/devicetree/bindings/pci/fsl,pci.txt
> @@ -20,7 +20,7 @@ Example:
>   #interrupt-cells = <1>;
>   #size-cells = <2>;
>   #address-cells = <3>;
> - compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
> + compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci", 
> "fsl,mpc8548-pcie";
>   device_type = "pci";
>   ...
>   ...
> diff --git a/Documentation/devicetree/bindings/powerpc/arcturus/board.txt 
> b/Documentation/devicetree/bindings/powerpc/arcturus/board.txt
> new file mode 100644
> index 000..54e9765
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/powerpc/arcturus/board.txt
> @@ -0,0 +1,149 @@
> +UCP1020 module Tree Bindings
> +
> +
> +Copyright 2013-2015 Arcturus Networks, Inc.
> +
> +QorIQ p1020 based board
> +http://www.arcturusnetworks.com/products/ucp1020/
> +-
> +
> +Root Module
> +
> +Properties:
> +- model: "arcturus,uCP1020"
> +- compatible:"arcturus,uCP1020"
> +- SN:"1234567890-1234"
> +
> +/ {
> + model = "arcturus,uCP1020";
> + compatible = "arcturus,uCP1020", "fsl,P1020";
> + SN = "1234567890-1234";
> + ...
> +  }

Drop the "fsl,P1020" compatible.  Top-level compatible strings describe
the whole board.

SN is a bad property name.  Call it something like "arcturus,serial#",
and define what it actually means rather than just giving an example.

> +-
> +
> +P1020 SPI controller
> +
> +Properties:
> +- compatible:"spansion,s25fl008k", "winbond,w25q80bl"
> +
> +Example:
> + spi@7000 {
> + flash@0 {
> + #address-cells = <1>;
> + #size-cells = <1>;
> + compatible = "spansion,s25fl008k", "winbond,w25q80bl";
> + reg = <0>;
> + spi-max-frequency = <4000>; /* input clock */
> + ...
> + };

This isn't describing the controller, but rather a SPI chip attached to
the controller.  This also doesn't seem like the right place for random
SPI chips.

If all you're specifying is the compatible, maybe create a
spi/trivial-devices.txt similar to i2c/trivial-devices.txt?  Or
something specific to SPI flash chips to describe the partition
specification, though I generally recommend against describing
partitions in the device tree -- especially if this is a developer board
rather than something fixed-purpose where the partitioning is not going
to change based on user requirements.


> +-
> +
> +Chipselect/Local Bus
> +
> +Properties:
> +- #address-cells:<2>.
> +- #size-cells:   <1>.
> +- compatible:"fsl,p1020-elbc", "fsl,elbc", 
> "simple-bus","fsl,p1020-immr"
> +- interrupts:interrupts to report localbus events.
> +
> +Example:
> +
> +&lbc {
> + #addre

[PATCH v2] powerpc/dts: Add and fix 1588 timer node for eTSEC

2015-05-06 Thread Yangbo Lu
Add 1588 timer node in files:
arch/powerpc/boot/dts/bsc9131rdb.dtsi
arch/powerpc/boot/dts/bsc9132qds.dtsi
arch/powerpc/boot/dts/p1010rdb.dtsi
arch/powerpc/boot/dts/p1020rdb-pd.dts
arch/powerpc/boot/dts/p1021rdb-pc.dtsi
arch/powerpc/boot/dts/p1022ds.dtsi
arch/powerpc/boot/dts/p1025twr.dtsi
Fix 1588 timer node in file:
arch/powerpc/boot/dts/p2020rdb-pc.dtsi

Signed-off-by: Yangbo Lu 
---
Changes for v2:
- Changed hex value to decimal value in dts
- Modified commit message
- Modified 1588 node in p2020rdb-pc.dtsi
---
 arch/powerpc/boot/dts/bsc9131rdb.dtsi  | 12 
 arch/powerpc/boot/dts/bsc9132qds.dtsi  | 12 
 arch/powerpc/boot/dts/p1010rdb.dtsi| 12 
 arch/powerpc/boot/dts/p1020rdb-pd.dts  | 12 
 arch/powerpc/boot/dts/p1021rdb-pc.dtsi | 12 
 arch/powerpc/boot/dts/p1022ds.dtsi | 12 
 arch/powerpc/boot/dts/p1025twr.dtsi| 12 
 arch/powerpc/boot/dts/p2020rdb-pc.dtsi | 12 ++--
 8 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/boot/dts/bsc9131rdb.dtsi 
b/arch/powerpc/boot/dts/bsc9131rdb.dtsi
index 45efcba..a6d533e 100644
--- a/arch/powerpc/boot/dts/bsc9131rdb.dtsi
+++ b/arch/powerpc/boot/dts/bsc9131rdb.dtsi
@@ -80,6 +80,18 @@
status = "disabled";
};
 
+   ptp_clock@b0e00 {
+   compatible = "fsl,etsec-ptp";
+   reg = <0xb0e00 0xb0>;
+   interrupts = <68 2 0 0 69 2 0 0>;
+   fsl,tclk-period = <5>;
+   fsl,tmr-prsc= <2>;
+   fsl,tmr-add = <3435973837>;
+   fsl,tmr-fiper1  = <5>;
+   fsl,tmr-fiper2  = <0>;
+   fsl,max-adj = <24999>;
+   };
+
enet0: ethernet@b {
phy-handle = <&phy0>;
phy-connection-type = "rgmii-id";
diff --git a/arch/powerpc/boot/dts/bsc9132qds.dtsi 
b/arch/powerpc/boot/dts/bsc9132qds.dtsi
index af8e888..ef75804 100644
--- a/arch/powerpc/boot/dts/bsc9132qds.dtsi
+++ b/arch/powerpc/boot/dts/bsc9132qds.dtsi
@@ -87,6 +87,18 @@
};
};
 
+   ptp_clock@b0e00 {
+   compatible = "fsl,etsec-ptp";
+   reg = <0xb0e00 0xb0>;
+   interrupts = <68 2 0 0 69 2 0 0>;
+   fsl,tclk-period = <5>;
+   fsl,tmr-prsc= <2>;
+   fsl,tmr-add = <3435973837>;
+   fsl,tmr-fiper1  = <5>;
+   fsl,tmr-fiper2  = <0>;
+   fsl,max-adj = <24999>;
+   };
+
enet0: ethernet@b {
phy-handle = <&phy0>;
tbi-handle = <&tbi0>;
diff --git a/arch/powerpc/boot/dts/p1010rdb.dtsi 
b/arch/powerpc/boot/dts/p1010rdb.dtsi
index ea534ef..1613678 100644
--- a/arch/powerpc/boot/dts/p1010rdb.dtsi
+++ b/arch/powerpc/boot/dts/p1010rdb.dtsi
@@ -186,6 +186,18 @@
};
};
 
+   ptp_clock@b0e00 {
+   compatible = "fsl,etsec-ptp";
+   reg = <0xb0e00 0xb0>;
+   interrupts = <68 2 0 0 69 2 0 0>;
+   fsl,tclk-period = <10>;
+   fsl,tmr-prsc= <2>;
+   fsl,tmr-add = <2147483670>;
+   fsl,tmr-fiper1  = <0>;
+   fsl,tmr-fiper2  = <0>;
+   fsl,max-adj = <1>;
+   };
+
enet0: ethernet@b {
phy-handle = <&phy0>;
phy-connection-type = "rgmii-id";
diff --git a/arch/powerpc/boot/dts/p1020rdb-pd.dts 
b/arch/powerpc/boot/dts/p1020rdb-pd.dts
index 987017e..52e8fe8 100644
--- a/arch/powerpc/boot/dts/p1020rdb-pd.dts
+++ b/arch/powerpc/boot/dts/p1020rdb-pd.dts
@@ -225,6 +225,18 @@
};
};
 
+   ptp_clock@b0e00 {
+   compatible = "fsl,etsec-ptp";
+   reg = <0xb0e00 0xb0>;
+   interrupts = <68 2 0 0 69 2 0 0>;
+   fsl,tclk-period = <10>;
+   fsl,tmr-prsc= <2>;
+   fsl,tmr-add = <2147483670>;
+   fsl,tmr-fiper1  = <0>;
+   fsl,tmr-fiper2  = <0>;
+   fsl,max-adj = <1>;
+   };
+
enet0: ethernet@b {
fixed-link = <1 1 1000 0 0>;
phy-connection-type = "rgmii-id";
diff --git a/arch/powerpc/boot/dts/p1021rdb-pc.dtsi 
b/arch/powerpc/boot/dts/p1021rdb-pc.dtsi
index d6274c5..a29c84a 100644
--- a/arch/powerpc/boot/dts/p1021rdb-pc.dtsi
+++ b/arch/powerpc/boot/dts/p1021rdb-pc.dtsi
@@ -224,6 +224,18 @@
};
};
 
+   ptp_clock@b0e00 {
+   compatible = "fsl,etsec-ptp";
+   reg = <0xb0e00 0xb0>;
+   interrupts = <68 2 0 0 69 2 0 0>;
+   fsl,tclk-period = <10>;
+   fsl,tmr-prsc= <2>;
+   fsl,tmr-ad

[PATCH] cpuidle: Handle tick_broadcast_enter() failure gracefully

2015-05-06 Thread Preeti U Murthy
When a CPU has to enter an idle state where tick stops, it makes a call
to tick_broadcast_enter(). The call will fail if this CPU is the
broadcast CPU. Today, under such a circumstance, the arch cpuidle code
handles this CPU.  This is not convincing because not only are we not
aware what the arch cpuidle code does, but we also do not account for
the idle state residency time and usage of such a CPU.

This scenario can be handled better by simply asking the cpuidle
governor to choose an idle state where in ticks do not stop. To
accommodate this change move the setting of runqueue idle state from the
core to the cpuidle driver, else the rq->idle_state will be set wrong.

Signed-off-by: Preeti U Murthy 
---
Based on linux-pm/bleeding-edge

 drivers/cpuidle/cpuidle.c  |   21 +
 drivers/cpuidle/governors/ladder.c |   13 ++---
 drivers/cpuidle/governors/menu.c   |6 +-
 include/linux/cpuidle.h|6 +++---
 include/linux/sched.h  |   16 
 kernel/sched/core.c|   17 +
 kernel/sched/fair.c|2 +-
 kernel/sched/idle.c|8 +---
 kernel/sched/sched.h   |   24 
 9 files changed, 70 insertions(+), 43 deletions(-)

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 61c417b..8f5657e 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "cpuidle.h"
@@ -167,8 +168,15 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct 
cpuidle_driver *drv,
 * local timer will be shut down.  If a local timer is used from another
 * CPU as a broadcast timer, this call may fail if it is not available.
 */
-   if (broadcast && tick_broadcast_enter())
-   return -EBUSY;
+   if (broadcast && tick_broadcast_enter()) {
+   index = cpuidle_select(drv, dev, !broadcast);
+   if (index < 0)
+   return -EBUSY;
+   target_state = &drv->states[index];
+   }
+
+   /* Take note of the planned idle state. */
+   idle_set_state(smp_processor_id(), target_state);
 
trace_cpu_idle_rcuidle(index, dev->cpu);
time_start = ktime_get();
@@ -178,6 +186,9 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct 
cpuidle_driver *drv,
time_end = ktime_get();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
+   /* The cpu is no longer idle or about to enter idle. */
+   idle_set_state(smp_processor_id(), NULL);
+
if (broadcast) {
if (WARN_ON_ONCE(!irqs_disabled()))
local_irq_disable();
@@ -213,12 +224,14 @@ int cpuidle_enter_state(struct cpuidle_device *dev, 
struct cpuidle_driver *drv,
  *
  * @drv: the cpuidle driver
  * @dev: the cpuidle device
+ * @timer_stop_valid: allow selection of idle state where tick stops
  *
  * Returns the index of the idle state.
  */
-int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+int cpuidle_select(struct cpuidle_driver *drv,
+   struct cpuidle_device *dev, int timer_stop_valid)
 {
-   return cpuidle_curr_governor->select(drv, dev);
+   return cpuidle_curr_governor->select(drv, dev, timer_stop_valid);
 }
 
 /**
diff --git a/drivers/cpuidle/governors/ladder.c 
b/drivers/cpuidle/governors/ladder.c
index 401c010..c437322 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -62,9 +62,10 @@ static inline void ladder_do_selection(struct ladder_device 
*ldev,
  * ladder_select_state - selects the next state to enter
  * @drv: cpuidle driver
  * @dev: the CPU
+ * @timer_stop_valid: allow selection of idle state where tick stops
  */
 static int ladder_select_state(struct cpuidle_driver *drv,
-   struct cpuidle_device *dev)
+   struct cpuidle_device *dev, int 
timer_stop_valid)
 {
struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
struct ladder_device_state *last_state;
@@ -86,6 +87,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
!drv->states[last_idx + 1].disabled &&
!dev->states_usage[last_idx + 1].disable &&
last_residency > last_state->threshold.promotion_time &&
+   !(!timer_stop_valid && (drv->states[last_idx + 1].flags & 
CPUIDLE_FLAG_TIMER_STOP)) &&
drv->states[last_idx + 1].exit_latency <= latency_req) {
last_state->stats.promotion_count++;
last_state->stats.demotion_count = 0;
@@ -99,11 +101,14 @@ static int ladder_select_state(struct cpuidle_driver *drv,
if (last_idx > CPUIDLE_DRIVER_STATE_START &&
(drv->states[last_idx].disabled ||
dev->states_usage[last_idx].disable ||
+   (!timer_stop_valid && (d