Re: [v6 1/2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome

2017-08-09 Thread Matt Brown
On Wed, Aug 9, 2017 at 11:26 PM, Michael Ellerman <m...@ellerman.id.au> wrote:
> Matt Brown <matthew.brown@gmail.com> writes:
>
>> This patch uses the vpermxor instruction to optimise the raid6 Q syndrome.
>> This instruction was made available with POWER8, ISA version 2.07.
>> It allows for both vperm and vxor instructions to be done in a single
>> instruction. This has been tested for correctness on a ppc64le vm with a
>> basic RAID6 setup containing 5 drives.
>>
>> The performance benchmarks are from the raid6test in the /lib/raid6/test
>> directory. These results are from an IBM Firestone machine with ppc64le
>> architecture. The benchmark results show a 35% speed increase over the best
>> existing algorithm for powerpc (altivec). The raid6test has also been run
>> on a big-endian ppc64 vm to ensure it also works for big-endian
>> architectures.
>>
>> Performance benchmarks:
>>   raid6: altivecx4 gen() 18773 MB/s
>>   raid6: altivecx8 gen() 19438 MB/s
>>
>>   raid6: vpermxor4 gen() 25112 MB/s
>>   raid6: vpermxor8 gen() 26279 MB/s
>>
>> Signed-off-by: Matt Brown <matthew.brown@gmail.com>
>> Reviewed-by: Daniel Axtens <d...@axtens.net>
>> ---
>> v6:
>>   - added vpermxor files to .gitignore
>>   - fixup whitespace
>>   - added vpermxor objs to test/Makefile
>> v5:
>>   - moved altivec.uc fix into other patch in series
>> ---
>>  include/linux/raid/pq.h |   4 ++
>>  lib/raid6/.gitignore|   1 +
>>  lib/raid6/Makefile  |  27 -
>>  lib/raid6/algos.c   |   4 ++
>>  lib/raid6/test/Makefile |  17 +++-
>>  lib/raid6/vpermxor.uc   | 104 
>> 
>>  6 files changed, 154 insertions(+), 3 deletions(-)
>>  create mode 100644 lib/raid6/vpermxor.uc
>
> This version at least is not Cc'ed to any of the folks that
> get_maintainers.pl identifies for these files:
>
> $ ./scripts/get_maintainer.pl -f lib/raid6
> s...@fb.com
> gayatri.kamm...@intel.com
> fenghua...@intel.com
> megha@linux.intel.com
> schwidef...@de.ibm.com
> anup.pa...@broadcom.com
> linux-ker...@vger.kernel.org
>
>
> This seems like mostly a list of random folks who've touched this code,
> but maybe some of them would have comments?
>

Ah my bad. I've CC'ed them into this email chain.
Apologies for not including you guys in the original email.
Here is a link to the patchworks patch:
http://patchwork.ozlabs.org/patch/797576/

Thanks,
Matt Brown


Re: [PATCH v3] powerpc/powernv: Use darn instr for random_seed on p9

2017-08-06 Thread Matt Brown
On Sat, Aug 5, 2017 at 3:06 AM, Tyrel Datwyler
<tyr...@linux.vnet.ibm.com> wrote:
> On 08/03/2017 06:12 PM, Matt Brown wrote:
>> This adds the powernv_get_random_darn function which utilises the darn
>> instruction, introduced in POWER9. The powernv_get_random_darn function
>> is used as the ppc_md.get_random_seed on P9.
>>
>> The DARN instruction can potentially throw an error, so we attempt to
>> register the powernv_get_random_darn function up to 10 times before
>> failing.
>>
>> Signed-off-by: Matt Brown <matthew.brown@gmail.com>
>> ---
>> v3:
>>   - add repeat attempts to register the ppc_md.get_random_seed
>>   - fixed the PPC_DARN macro
>>   - move DARN_ERR definition
>>   - fixed commit message
>> v2:
>>   - remove repeat darn attempts
>>   - move hook to rng_init
>> ---
>>  arch/powerpc/include/asm/ppc-opcode.h |  4 
>>  arch/powerpc/platforms/powernv/rng.c  | 35 
>> ++-
>>  2 files changed, 38 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
>> b/arch/powerpc/include/asm/ppc-opcode.h
>> index c4ced1d..aabd150 100644
>> --- a/arch/powerpc/include/asm/ppc-opcode.h
>> +++ b/arch/powerpc/include/asm/ppc-opcode.h
>> @@ -134,6 +134,7 @@
>>  #define PPC_INST_COPY0x7c00060c
>>  #define PPC_INST_COPY_FIRST  0x7c20060c
>>  #define PPC_INST_CP_ABORT0x7c00068c
>> +#define PPC_INST_DARN0x7c0005e6
>>  #define PPC_INST_DCBA0x7c0005ec
>>  #define PPC_INST_DCBA_MASK   0xfc0007fe
>>  #define PPC_INST_DCBAL   0x7c2005ec
>> @@ -325,6 +326,9 @@
>>
>>  /* Deal with instructions that older assemblers aren't aware of */
>>  #define  PPC_CP_ABORTstringify_in_c(.long PPC_INST_CP_ABORT)
>> +#define PPC_DARN(t, l)   stringify_in_c(.long PPC_INST_DARN |  \
>> + ___PPC_RT(t)   |  \
>> + (((l) & 0x3) << 16))
>>  #define  PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \
>>   __PPC_RA(a) | __PPC_RB(b))
>>  #define  PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \
>> diff --git a/arch/powerpc/platforms/powernv/rng.c 
>> b/arch/powerpc/platforms/powernv/rng.c
>> index 5dcbdea..83b925c 100644
>> --- a/arch/powerpc/platforms/powernv/rng.c
>> +++ b/arch/powerpc/platforms/powernv/rng.c
>> @@ -16,11 +16,13 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>>  #include 
>>
>> +#define DARN_ERR 0xul
>>
>>  struct powernv_rng {
>>   void __iomem *regs;
>> @@ -67,6 +69,21 @@ int powernv_get_random_real_mode(unsigned long *v)
>>   return 1;
>>  }
>>
>> +int powernv_get_random_darn(unsigned long *v)
>> +{
>> + unsigned long val;
>> +
>> + /* Using DARN with L=1 - 64-bit conditioned random number */
>> + asm volatile(PPC_DARN(%0, 1) : "=r"(val));
>> +
>> + if (val == DARN_ERR)
>> + return 0;
>> +
>> + *v = val;
>> +
>> + return 1;
>> +}
>> +
>>  int powernv_get_random_long(unsigned long *v)
>>  {
>>   struct powernv_rng *rng;
>> @@ -135,8 +152,9 @@ static __init int rng_create(struct device_node *dn)
>>
>>  static __init int rng_init(void)
>>  {
>> + unsigned long darn_test;
>>   struct device_node *dn;
>> - int rc;
>> + int rc, i;
>>
>>   for_each_compatible_node(dn, NULL, "ibm,power-rng") {
>>   rc = rng_create(dn);
>> @@ -150,6 +168,21 @@ static __init int rng_init(void)
>>   of_platform_device_create(dn, NULL, NULL);
>>   }
>>
>> + if (cpu_has_feature(CPU_FTR_ARCH_300)) {
>> + for (i = 0; i < 10; i++) {
>> + if (powernv_get_random_darn(_test)) {
>> + ppc_md.get_random_seed =
>> + powernv_get_random_darn;
>> + break;
>
> If you return directly here you can avoid the (i == 9) conditional every 
> iteration of the
> loop by moving the pr_warn to just outside the loop.

That's true, although it is very unlikely for the
powernv_get_random_darn to fail. So in practice we should never reach
the (i == 9) conditional.
The loop is more of a backup in the rare case that it does fail.

Thanks,
Matt


>
> -Tyrel
>
>> + }
>> +
>> + if (i == 9) {
>> + pr_warn("Failed to use 
>> powernv_get_random_darn"\
>> + "as get_random_seed");
>> + }
>> + }
>> + }
>> +
>>   return 0;
>>  }
>>  machine_subsys_initcall(powernv, rng_init);
>>
>


[v6 2/2] lib/raid6: Build proper raid6test files on powerpc

2017-08-03 Thread Matt Brown
Previously the raid6 test Makefile did not build the POWER specific files
(altivec and vpermxor).
This patch fixes the bug, so that all appropriate files for powerpc are built.

This patch also fixes the missing and mismatched ifdef statements to allow the
altivec.uc file to be built correctly.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v6:
- remove vpermxor objs from this patch
v5:
- moved altivec.uc fix into this patch
---
 lib/raid6/altivec.uc| 3 +++
 lib/raid6/test/Makefile | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
 
 #include 
 
+#ifdef CONFIG_ALTIVEC
+
 #include 
 #ifdef __KERNEL__
 # include 
 # include 
+#endif /* __KERNEL__ */
 
 /*
  * This is the C data type to use.  We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index a14be53..b64a267 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -44,9 +44,10 @@ else ifeq ($(HAS_NEON),yes)
 CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
 HAS_ALTIVEC := $(shell printf '\#include \nvector int a;\n' 
|\
- gcc -c -x c - >&/dev/null && \
- rm ./-.o && echo yes)
+ gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
 ifeq ($(HAS_ALTIVEC),yes)
+CFLAGS += -I../../../arch/powerpc/include
+CFLAGS += -DCONFIG_ALTIVEC
 OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
 vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 endif
-- 
2.9.3



[v6 1/2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome

2017-08-03 Thread Matt Brown
This patch uses the vpermxor instruction to optimise the raid6 Q syndrome.
This instruction was made available with POWER8, ISA version 2.07.
It allows for both vperm and vxor instructions to be done in a single
instruction. This has been tested for correctness on a ppc64le vm with a
basic RAID6 setup containing 5 drives.

The performance benchmarks are from the raid6test in the /lib/raid6/test
directory. These results are from an IBM Firestone machine with ppc64le
architecture. The benchmark results show a 35% speed increase over the best
existing algorithm for powerpc (altivec). The raid6test has also been run
on a big-endian ppc64 vm to ensure it also works for big-endian
architectures.

Performance benchmarks:
raid6: altivecx4 gen() 18773 MB/s
raid6: altivecx8 gen() 19438 MB/s

raid6: vpermxor4 gen() 25112 MB/s
raid6: vpermxor8 gen() 26279 MB/s

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
Reviewed-by: Daniel Axtens <d...@axtens.net>
---
v6:
- added vpermxor files to .gitignore
- fixup whitespace
- added vpermxor objs to test/Makefile
v5:
- moved altivec.uc fix into other patch in series
---
 include/linux/raid/pq.h |   4 ++
 lib/raid6/.gitignore|   1 +
 lib/raid6/Makefile  |  27 -
 lib/raid6/algos.c   |   4 ++
 lib/raid6/test/Makefile |  17 +++-
 lib/raid6/vpermxor.uc   | 104 
 6 files changed, 154 insertions(+), 3 deletions(-)
 create mode 100644 lib/raid6/vpermxor.uc

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bba..3df9aa6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
 extern const struct raid6_calls raid6_avx512x4;
 extern const struct raid6_calls raid6_tilegx8;
 extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
 
 struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index f01b1cb..3de0d89 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -4,3 +4,4 @@ int*.c
 tables.c
 neon?.c
 s390vx?.c
+vpermxor*.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011..db095a7 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -4,7 +4,8 @@ raid6_pq-y  += algos.o recov.o tables.o int1.o int2.o 
int4.o \
   int8.o int16.o int32.o
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o 
avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+  vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -88,6 +89,30 @@ $(obj)/altivec8.c:   UNROLL := 8
 $(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
 
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
 CFLAGS_neon1.o += $(NEON_FLAGS)
 targets += neon1.c
 $(obj)/neon1.c:   UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049..edd4f69 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
_altivec2,
_altivec4,
_altivec8,
+   _vpermxor1,
+   _vpermxor2,
+   _vpermxor4,
+   _vpermxor8,
 #endif
 #if defined(CONFIG_TILEGX)
_tilegx8,
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 2c7b60e..a14be53 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -47,7 +47,8 @@ else
  gcc -c -x c - >&/dev/null && \
  rm ./-.o && echo yes)
 ifeq ($(HAS_ALTIVEC),yes)
-OBJS 

Re: [v5 2/2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome

2017-08-03 Thread Matt Brown
On Wed, Aug 2, 2017 at 10:20 AM, Daniel Axtens  wrote:
> Oh, one final thing - I just realised there's a .gitignore file in
> lib/raid6/.gitignore that needs to be updated to include the vpermxor
> generated files. That should be part of this patch.
>

Oh, I managed to miss that!
I'll add that and fix up that comment spacing.

Thanks,
Matt



> Regards,
> Daniel


Re: [v5 1/2] lib/raid6: Build proper files on corresponding arch

2017-08-03 Thread Matt Brown
On Wed, Aug 2, 2017 at 12:00 PM, Michael Ellerman  wrote:
> Daniel Axtens  writes:
>
>> Hi Matt,
>>
>>> --- a/lib/raid6/test/Makefile
>>> +++ b/lib/raid6/test/Makefile
>>> @@ -44,10 +44,12 @@ else ifeq ($(HAS_NEON),yes)
>>>  CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
>>>  else
>>>  HAS_ALTIVEC := $(shell printf '\#include \nvector int 
>>> a;\n' |\
>>> - gcc -c -x c - >&/dev/null && \
>>> - rm ./-.o && echo yes)
>>> + gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
>>
>> From memory the change here (s/>&/>/) was necessary to get the build to
>> succeed - did we ever figure out why that was? I'm not enough of a shell
>> guru to grok the difference.
>
> Using >& redirects stdout and stderr, whereas > only redirects stdout.
>
> So possibly it doesn't fix anything, but rather lets you see any error
> emitted by the compiler rather than swallowing it?
>

Just had to double-check what the problem was.
The bug was that none of the ppc specific files were being built.
I'm not entirely sure how, but this fixes it so the altivec and
vpermxor files are built.

I'll fix up the commit message and move the vpermxor make defs into
the other patch.

Thanks,
Matt

> cheers


[PATCH v3] powerpc/powernv: Use darn instr for random_seed on p9

2017-08-03 Thread Matt Brown
This adds the powernv_get_random_darn function which utilises the darn
instruction, introduced in POWER9. The powernv_get_random_darn function
is used as the ppc_md.get_random_seed on P9.

The DARN instruction can potentially throw an error, so we attempt to
register the powernv_get_random_darn function up to 10 times before
failing.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v3:
- add repeat attempts to register the ppc_md.get_random_seed
- fixed the PPC_DARN macro
- move DARN_ERR definition
- fixed commit message
v2:
- remove repeat darn attempts
- move hook to rng_init
---
 arch/powerpc/include/asm/ppc-opcode.h |  4 
 arch/powerpc/platforms/powernv/rng.c  | 35 ++-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index c4ced1d..aabd150 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -134,6 +134,7 @@
 #define PPC_INST_COPY  0x7c00060c
 #define PPC_INST_COPY_FIRST0x7c20060c
 #define PPC_INST_CP_ABORT  0x7c00068c
+#define PPC_INST_DARN  0x7c0005e6
 #define PPC_INST_DCBA  0x7c0005ec
 #define PPC_INST_DCBA_MASK 0xfc0007fe
 #define PPC_INST_DCBAL 0x7c2005ec
@@ -325,6 +326,9 @@
 
 /* Deal with instructions that older assemblers aren't aware of */
 #definePPC_CP_ABORTstringify_in_c(.long PPC_INST_CP_ABORT)
+#define PPC_DARN(t, l) stringify_in_c(.long PPC_INST_DARN |  \
+   ___PPC_RT(t)   |  \
+   (((l) & 0x3) << 16))
 #definePPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \
__PPC_RA(a) | __PPC_RB(b))
 #definePPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \
diff --git a/arch/powerpc/platforms/powernv/rng.c 
b/arch/powerpc/platforms/powernv/rng.c
index 5dcbdea..83b925c 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -16,11 +16,13 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 
+#define DARN_ERR 0xul
 
 struct powernv_rng {
void __iomem *regs;
@@ -67,6 +69,21 @@ int powernv_get_random_real_mode(unsigned long *v)
return 1;
 }
 
+int powernv_get_random_darn(unsigned long *v)
+{
+   unsigned long val;
+
+   /* Using DARN with L=1 - 64-bit conditioned random number */
+   asm volatile(PPC_DARN(%0, 1) : "=r"(val));
+
+   if (val == DARN_ERR)
+   return 0;
+
+   *v = val;
+
+   return 1;
+}
+
 int powernv_get_random_long(unsigned long *v)
 {
struct powernv_rng *rng;
@@ -135,8 +152,9 @@ static __init int rng_create(struct device_node *dn)
 
 static __init int rng_init(void)
 {
+   unsigned long darn_test;
struct device_node *dn;
-   int rc;
+   int rc, i;
 
for_each_compatible_node(dn, NULL, "ibm,power-rng") {
rc = rng_create(dn);
@@ -150,6 +168,21 @@ static __init int rng_init(void)
of_platform_device_create(dn, NULL, NULL);
}
 
+   if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+   for (i = 0; i < 10; i++) {
+   if (powernv_get_random_darn(_test)) {
+   ppc_md.get_random_seed =
+   powernv_get_random_darn;
+   break;
+   }
+
+   if (i == 9) {
+   pr_warn("Failed to use powernv_get_random_darn"\
+   "as get_random_seed");
+   }
+   }
+   }
+
return 0;
 }
 machine_subsys_initcall(powernv, rng_init);
-- 
2.9.3



Re: [PATCH v2] powerpc/powernv: Use darn instr for random_seed on p9

2017-08-03 Thread Matt Brown
On Tue, Aug 1, 2017 at 10:57 PM, Segher Boessenkool
 wrote:
> On Mon, Jul 31, 2017 at 07:10:15PM +1000, Michael Ellerman wrote:
>> And ___PPC_RA() is not quite right. The L field is only 2 bits wide, not
>> the 5 that ___PPC_RA() allows.
>>
>> We don't have a __PPC_L() macro, because L fields vary in size and
>> location. So I think you're best of open coding it, eg:
>>
>> +#define PPC_DARN(t, l)   stringify_in_c(.long PPC_INST_DARN |  \
>> + __PPC_RT(t)|  \
>> + (((l) & 0x3) << 16))
>
> It would be better if you could do a compile-time error if the L value
> is out of range.  Hrm, nothing else does such checking either?
>

Yeah currently the only checks are whether the register value is
valid, using the __PPC_R{A,B,S,T} macros.
However, we can't use these macros for inline asm because we're
passing a variable into it
so the pre-processor attempts to look for register %0 which breaks it.
(Have to use triple underscore versions)

We could add more checking to validate the L value, but I don't know
how much of an issue it currently is.
A question for mpe I guess.

Thanks,
Matt


>
> Segher


Re: [PATCH v4 1/5] powerpc/lib/sstep: Add cmpb instruction emulation

2017-08-01 Thread Matt Brown
On Tue, Aug 1, 2017 at 10:44 PM, Segher Boessenkool
<seg...@kernel.crashing.org> wrote:
> Hi!
>
> On Mon, Jul 31, 2017 at 10:58:22AM +1000, Matt Brown wrote:
>> @@ -1049,6 +1065,10 @@ int analyse_instr(struct instruction_op *op, struct 
>> pt_regs *regs,
>>   do_cmp_unsigned(regs, val, val2, rd >> 2);
>>   goto instr_done;
>>
>> + case 508: /* cmpb */
>> + do_cmpb(regs, regs->gpr[rd], regs->gpr[rb], ra);
>> + goto instr_done;
>
> Should this then be under an ifdef for 64-bit?

I don't think so, the cmpb instruction should be 32 and 64-bit.
It isn't listed under the '64-bit Fixed-point Logical Instructions'
section in the ISA either.

Thanks,
Matt
>
>
> Segher


[PATCH v4 5/5] powerpc/lib/sstep: Add isel instruction emulation

2017-07-30 Thread Matt Brown
This adds emulation for the isel instruction.
Tested for correctness against the isel instruction and its extended
mnemonics (lt, gt, eq) on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v4:
- simplify if statement to ternary op
(same as isel emulation in kernel/traps.c)
v2:
- fixed opcode
- fixed definition to include the 'if RA=0, a=0' clause
- fixed ccr bitshifting error
---
 arch/powerpc/lib/sstep.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index af4eef9..473bab5 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1240,6 +1240,14 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
 /*
  * Logical instructions
  */
+   case 15:/* isel */
+   mb = (instr >> 6) & 0x1f; /* bc */
+   val = (regs->ccr >> (31 - mb)) & 1;
+   val2 = (ra) ? regs->gpr[ra] : 0;
+
+   regs->gpr[rd] = (val) ? val2 : regs->gpr[rb];
+   goto logical_done;
+
case 26:/* cntlzw */
asm("cntlzw %0,%1" : "=r" (regs->gpr[ra]) :
"r" (regs->gpr[rd]));
-- 
2.9.3



[PATCH v4 4/5] powerpc/lib/sstep: Add prty instruction emulation

2017-07-30 Thread Matt Brown
This adds emulation for the prtyw and prtyd instructions.
Tested for logical correctness against the prtyw and prtyd instructions
on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v4:
- use simpler xor method
v3:
- optimised using the Giles-Miller method of side-ways addition
v2:
- fixed opcodes
- fixed bitshifting and typecast errors
- merged do_prtyw and do_prtyd into single function
---
 arch/powerpc/lib/sstep.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index c9fd613..af4eef9 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -657,6 +657,24 @@ static nokprobe_inline void do_bpermd(struct pt_regs 
*regs, unsigned long v1,
regs->gpr[ra] = perm;
 }
 #endif /* CONFIG_PPC64 */
+/*
+ * The size parameter adjusts the equivalent prty instruction.
+ * prtyw = 32, prtyd = 64
+ */
+static nokprobe_inline void do_prty(struct pt_regs *regs, unsigned long v,
+   int size, int ra)
+{
+   unsigned long long res = v ^ (v >> 8);
+
+   res ^= res >> 16;
+   if (size == 32) {   /* prtyw */
+   regs->gpr[ra] = res & 0x00010001;
+   return;
+   }
+
+   res ^= res >> 32;
+   regs->gpr[ra] = res & 1;/*prtyd */
+}
 
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
@@ -1247,6 +1265,14 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
case 124:   /* nor */
regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
goto logical_done;
+
+   case 154:   /* prtyw */
+   do_prty(regs, regs->gpr[rd], 32, ra);
+   goto logical_done;
+
+   case 186:   /* prtyd */
+   do_prty(regs, regs->gpr[rd], 64, ra);
+   goto logical_done;
 #ifdef CONFIG_PPC64
case 252:   /* bpermd */
do_bpermd(regs, regs->gpr[rd], regs->gpr[rb], ra);
-- 
2.9.3



[PATCH v4 3/5] powerpc/lib/sstep: Add bpermd instruction emulation

2017-07-30 Thread Matt Brown
This adds emulation for the bpermd instruction.
Tested for correctness against the bpermd instruction on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v4:
- change ifdef macro from __powerpc64__ to CONFIG_PPC64
v2:
- fixed opcode
- added ifdef tags to do_bpermd func
- fixed bitshifting errors
---
 arch/powerpc/lib/sstep.c | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 2fd7377..c9fd613 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -640,6 +640,24 @@ static nokprobe_inline void do_popcnt(struct pt_regs 
*regs, unsigned long v1,
regs->gpr[ra] = out;/* popcntd */
 }
 
+#ifdef CONFIG_PPC64
+static nokprobe_inline void do_bpermd(struct pt_regs *regs, unsigned long v1,
+   unsigned long v2, int ra)
+{
+   unsigned char perm, idx;
+   unsigned int i;
+
+   perm = 0;
+   for (i = 0; i < 8; i++) {
+   idx = (v1 >> (i * 8)) & 0xff;
+   if (idx < 64)
+   if (v2 & PPC_BIT(idx))
+   perm |= 1 << i;
+   }
+   regs->gpr[ra] = perm;
+}
+#endif /* CONFIG_PPC64 */
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1229,7 +1247,11 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
case 124:   /* nor */
regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
goto logical_done;
-
+#ifdef CONFIG_PPC64
+   case 252:   /* bpermd */
+   do_bpermd(regs, regs->gpr[rd], regs->gpr[rb], ra);
+   goto logical_done;
+#endif
case 284:   /* xor */
regs->gpr[ra] = ~(regs->gpr[rd] ^ regs->gpr[rb]);
goto logical_done;
-- 
2.9.3



[PATCH v4 2/5] powerpc/lib/sstep: Add popcnt instruction emulation

2017-07-30 Thread Matt Brown
This adds emulations for the popcntb, popcntw, and popcntd instructions.
Tested for correctness against the popcnt{b,w,d} instructions on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v4:
- change ifdef macro from __powerpc64__ to CONFIG_PPC64
- slight optimisations 
(now identical to the popcntb implementation in kernel/traps.c)
v3:
- optimised using the Giles-Miller method of side-ways addition
v2:
- fixed opcodes
- fixed typecasting
- fixed bitshifting error for both 32 and 64bit arch
---
 arch/powerpc/lib/sstep.c | 42 +-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 87d277f..2fd7377 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -612,6 +612,34 @@ static nokprobe_inline void do_cmpb(struct pt_regs *regs, 
unsigned long v1,
regs->gpr[rd] = out_val;
 }
 
+/*
+ * The size parameter is used to adjust the equivalent popcnt instruction.
+ * popcntb = 8, popcntw = 32, popcntd = 64
+ */
+static nokprobe_inline void do_popcnt(struct pt_regs *regs, unsigned long v1,
+   int size, int ra)
+{
+   unsigned long long out = v1;
+
+   out -= (out >> 1) & 0x;
+   out = (0x & out) + (0x & (out >> 2));
+   out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0f;
+
+   if (size == 8) {/* popcntb */
+   regs->gpr[ra] = out;
+   return;
+   }
+   out += out >> 8;
+   out += out >> 16;
+   if (size == 32) {   /* popcntw */
+   regs->gpr[ra] = out & 0x003f003f;
+   return;
+   }
+
+   out = (out + (out >> 32)) & 0x7f;
+   regs->gpr[ra] = out;/* popcntd */
+}
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1194,6 +1222,10 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
regs->gpr[ra] = regs->gpr[rd] & ~regs->gpr[rb];
goto logical_done;
 
+   case 122:   /* popcntb */
+   do_popcnt(regs, regs->gpr[rd], 8, ra);
+   goto logical_done;
+
case 124:   /* nor */
regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
goto logical_done;
@@ -1206,6 +1238,10 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
regs->gpr[ra] = regs->gpr[rd] ^ regs->gpr[rb];
goto logical_done;
 
+   case 378:   /* popcntw */
+   do_popcnt(regs, regs->gpr[rd], 32, ra);
+   goto logical_done;
+
case 412:   /* orc */
regs->gpr[ra] = regs->gpr[rd] | ~regs->gpr[rb];
goto logical_done;
@@ -1217,7 +1253,11 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
case 476:   /* nand */
regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]);
goto logical_done;
-
+#ifdef CONFIG_PPC64
+   case 506:   /* popcntd */
+   do_popcnt(regs, regs->gpr[rd], 64, ra);
+   goto logical_done;
+#endif
case 922:   /* extsh */
regs->gpr[ra] = (signed short) regs->gpr[rd];
goto logical_done;
-- 
2.9.3



[PATCH v4 1/5] powerpc/lib/sstep: Add cmpb instruction emulation

2017-07-30 Thread Matt Brown
This patch adds emulation of the cmpb instruction, enabling xmon to
emulate this instruction.
Tested for correctness against the cmpb asm instruction on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v2: 
- fixed opcode
- fixed mask typecasting
---
 arch/powerpc/lib/sstep.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 33117f8..87d277f 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -596,6 +596,22 @@ static nokprobe_inline void do_cmp_unsigned(struct pt_regs 
*regs, unsigned long
regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
 }
 
+static nokprobe_inline void do_cmpb(struct pt_regs *regs, unsigned long v1,
+   unsigned long v2, int rd)
+{
+   unsigned long long out_val, mask;
+   int i;
+
+   out_val = 0;
+   for (i = 0; i < 8; i++) {
+   mask = 0xffUL << (i * 8);
+   if ((v1 & mask) == (v2 & mask))
+   out_val |= mask;
+   }
+
+   regs->gpr[rd] = out_val;
+}
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1049,6 +1065,10 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
do_cmp_unsigned(regs, val, val2, rd >> 2);
goto instr_done;
 
+   case 508: /* cmpb */
+   do_cmpb(regs, regs->gpr[rd], regs->gpr[rb], ra);
+   goto instr_done;
+
 /*
  * Arithmetic instructions
  */
-- 
2.9.3



Re: [PATCH v3 4/5] powerpc/lib/sstep: Add prty instruction emulation

2017-07-27 Thread Matt Brown
On Thu, Jul 27, 2017 at 11:26 AM, Michael Ellerman  wrote:
> Segher Boessenkool  writes:
>
>> On Wed, Jul 26, 2017 at 08:03:30PM +1000, Michael Ellerman wrote:
>>> Segher Boessenkool  writes:
>>> > A general question about these patches: some things are inside #ifdef
>>> > __powerpc64__, some are not.  It seems it is the wrong macro, and it
>>> > should be used (or not used) consistently?
>>>
>>> Why is it the wrong macro? Because we tend to use CONFIG_PPC64 you mean?
>>
>> Yeah.  But I see sstep.c already mixes those two at will (or if there
>> is a distinction, I'm not seeing it :-) )
>
> Yeah OK. In practice they're equivalent, if CONFIG_PPC64=y then the
> kernel is built 64-bit and therefore __powerpc64__ is defined.
>
> But I agree it's a mess, we should use CONFIG_PPC64 exclusively unless
> there's some reason not to (which I don't think there ever is).
>
>>> I thought the reason some are #ifdef'ed is that some are 64-bit only.
>>> ie. bpermd is 64-bit only ?
>>
>> 64-bit only, in what way?  It's not clear what the rules are.
>
> Instructions that have "d" in the name? :P
>
>> It's not instructions that can only run in 64-bit mode.
>> It's not instructions that only give a usable result with 64-bit regs
>> implemented.
>> It's not instructions only implemented on 64-bit CPUs.
>
> I think it's trying to be that ^
>
> If you build a 32-bit kernel then instructions that are only defined on
> 64-bit CPUs should be treated as illegal, so the easiest way to achieve
> that is to #ifdef off the code for those instructions.
>

I'll fixup this up to use the xor implementation, and change the
series to use CONFIG_PPC64 for the ifdef.

Thanks,
Matt
> cheers


[PATCH v3 5/5] powerpc/lib/sstep: Add isel instruction emulation

2017-07-24 Thread Matt Brown
This adds emulation for the isel instruction.
Tested for correctness against the isel instruction and its extended
mnemonics (lt, gt, eq) on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v2:
- fixed opcode
- fixed definition to include the 'if RA=0, a=0' clause
- fixed ccr bitshifting error
---
 arch/powerpc/lib/sstep.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 0bcf631..de3d558 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1239,6 +1239,17 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
 /*
  * Logical instructions
  */
+   case 15:/* isel */
+   mb = (instr >> 6) & 0x1f; /* bc */
+   val = (regs->ccr >> (31 - mb)) & 1;
+   val2 = (ra) ? regs->gpr[ra] : 0;
+
+   if (val)
+   regs->gpr[rd] = val2;
+   else
+   regs->gpr[rd] = regs->gpr[rb];
+   goto logical_done;
+
case 26:/* cntlzw */
asm("cntlzw %0,%1" : "=r" (regs->gpr[ra]) :
"r" (regs->gpr[rd]));
-- 
2.9.3



[PATCH v3 4/5] powerpc/lib/sstep: Add prty instruction emulation

2017-07-24 Thread Matt Brown
This adds emulation for the prtyw and prtyd instructions.
Tested for logical correctness against the prtyw and prtyd instructions
on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v3:
- optimised using the Giles-Miller method of side-ways addition
v2:
- fixed opcodes
- fixed bitshifting and typecast errors
- merged do_prtyw and do_prtyd into single function
---
 arch/powerpc/lib/sstep.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 6a79618..0bcf631 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -655,6 +655,25 @@ static nokprobe_inline void do_bpermd(struct pt_regs 
*regs, unsigned long v1,
regs->gpr[ra] = perm;
 }
 #endif /* __powerpc64__ */
+/*
+ * The size parameter adjusts the equivalent prty instruction.
+ * prtyw = 32, prtyd = 64
+ */
+static nokprobe_inline void do_prty(struct pt_regs *regs, unsigned long v,
+   int size, int ra)
+{
+   unsigned long long res = v;
+
+   res = (0x0001000100010001 & res) + (0x0001000100010001 & (res >> 8));
+   res = (0x00070007 & res) + (0x00070007 & (res >> 16));
+   if (size == 32) {   /* prtyw */
+   regs->gpr[ra] = (0x00010001 & res);
+   return;
+   }
+
+   res = (0x000f & res) + (0x000f & (res >> 32));
+   regs->gpr[ra] = res & 1;/*prtyd */
+}
 
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
@@ -1245,6 +1264,14 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
case 124:   /* nor */
regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
goto logical_done;
+
+   case 154:   /* prtyw */
+   do_prty(regs, regs->gpr[rd], 32, ra);
+   goto logical_done;
+
+   case 186:   /* prtyd */
+   do_prty(regs, regs->gpr[rd], 64, ra);
+   goto logical_done;
 #ifdef __powerpc64__
case 252:   /* bpermd */
do_bpermd(regs, regs->gpr[rd], regs->gpr[rb], ra);
-- 
2.9.3



[PATCH v3 3/5] powerpc/lib/sstep: Add bpermd instruction emulation

2017-07-24 Thread Matt Brown
This adds emulation for the bpermd instruction.
Tested for correctness against the bpermd instruction on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v2:
- fixed opcode
- added ifdef tags to do_bpermd func
- fixed bitshifting errors
---
 arch/powerpc/lib/sstep.c | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index c1f9cdb..6a79618 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -638,6 +638,24 @@ static nokprobe_inline void do_popcnt(struct pt_regs 
*regs, unsigned long v1,
regs->gpr[ra] = out;/* popcntd */
 }
 
+#ifdef __powerpc64__
+static nokprobe_inline void do_bpermd(struct pt_regs *regs, unsigned long v1,
+   unsigned long v2, int ra)
+{
+   unsigned char perm, idx;
+   unsigned int i;
+
+   perm = 0;
+   for (i = 0; i < 8; i++) {
+   idx = (v1 >> (i * 8)) & 0xff;
+   if (idx < 64)
+   if (v2 & PPC_BIT(idx))
+   perm |= 1 << i;
+   }
+   regs->gpr[ra] = perm;
+}
+#endif /* __powerpc64__ */
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1227,7 +1245,11 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
case 124:   /* nor */
regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
goto logical_done;
-
+#ifdef __powerpc64__
+   case 252:   /* bpermd */
+   do_bpermd(regs, regs->gpr[rd], regs->gpr[rb], ra);
+   goto logical_done;
+#endif
case 284:   /* xor */
regs->gpr[ra] = ~(regs->gpr[rd] ^ regs->gpr[rb]);
goto logical_done;
-- 
2.9.3



[PATCH v3 2/5] powerpc/lib/sstep: Add popcnt instruction emulation

2017-07-24 Thread Matt Brown
This adds emulations for the popcntb, popcntw, and popcntd instructions.
Tested for correctness against the popcnt{b,w,d} instructions on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v3:
- optimised using the Giles-Miller method of side-ways addition
v2:
- fixed opcodes
- fixed typecasting
- fixed bitshifting error for both 32 and 64bit arch
---
 arch/powerpc/lib/sstep.c | 40 +++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 87d277f..c1f9cdb 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -612,6 +612,32 @@ static nokprobe_inline void do_cmpb(struct pt_regs *regs, 
unsigned long v1,
regs->gpr[rd] = out_val;
 }
 
+/*
+ * The size parameter is used to adjust the equivalent popcnt instruction.
+ * popcntb = 8, popcntw = 32, popcntd = 64
+ */
+static nokprobe_inline void do_popcnt(struct pt_regs *regs, unsigned long v1,
+   int size, int ra)
+{
+   unsigned long long out = v1;
+
+   out = (0x & out) + (0x & (out >> 1));
+   out = (0x & out) + (0x & (out >> 2));
+   out = (0x0f0f0f0f0f0f0f0f & out) + (0x0f0f0f0f0f0f0f0f & (out >> 4));
+   if (size == 8) {/* popcntb */
+   regs->gpr[ra] = out;
+   return;
+   }
+   out = (0x001f001f001f001f & out) + (0x001f001f001f001f & (out >> 8));
+   out = (0x003f003f & out) + (0x003f003f & (out >> 16));
+   if (size == 32) {   /* popcntw */
+   regs->gpr[ra] = out;
+   return;
+   }
+   out = (0x007f & out) + (0x007f & (out >> 32));
+   regs->gpr[ra] = out;/* popcntd */
+}
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1194,6 +1220,10 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
regs->gpr[ra] = regs->gpr[rd] & ~regs->gpr[rb];
goto logical_done;
 
+   case 122:   /* popcntb */
+   do_popcnt(regs, regs->gpr[rd], 8, ra);
+   goto logical_done;
+
case 124:   /* nor */
regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
goto logical_done;
@@ -1206,6 +1236,10 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
regs->gpr[ra] = regs->gpr[rd] ^ regs->gpr[rb];
goto logical_done;
 
+   case 378:   /* popcntw */
+   do_popcnt(regs, regs->gpr[rd], 32, ra);
+   goto logical_done;
+
case 412:   /* orc */
regs->gpr[ra] = regs->gpr[rd] | ~regs->gpr[rb];
goto logical_done;
@@ -1217,7 +1251,11 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
case 476:   /* nand */
regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]);
goto logical_done;
-
+#ifdef __powerpc64__
+   case 506:   /* popcntd */
+   do_popcnt(regs, regs->gpr[rd], 64, ra);
+   goto logical_done;
+#endif
case 922:   /* extsh */
regs->gpr[ra] = (signed short) regs->gpr[rd];
goto logical_done;
-- 
2.9.3



[PATCH v3 1/5] powerpc/lib/sstep: Add cmpb instruction emulation

2017-07-24 Thread Matt Brown
This patch adds emulation of the cmpb instruction, enabling xmon to
emulate this instruction.
Tested for correctness against the cmpb asm instruction on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v2: 
- fixed opcode
- fixed mask typecasting
---
 arch/powerpc/lib/sstep.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 33117f8..87d277f 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -596,6 +596,22 @@ static nokprobe_inline void do_cmp_unsigned(struct pt_regs 
*regs, unsigned long
regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
 }
 
+static nokprobe_inline void do_cmpb(struct pt_regs *regs, unsigned long v1,
+   unsigned long v2, int rd)
+{
+   unsigned long long out_val, mask;
+   int i;
+
+   out_val = 0;
+   for (i = 0; i < 8; i++) {
+   mask = 0xffUL << (i * 8);
+   if ((v1 & mask) == (v2 & mask))
+   out_val |= mask;
+   }
+
+   regs->gpr[rd] = out_val;
+}
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1049,6 +1065,10 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
do_cmp_unsigned(regs, val, val2, rd >> 2);
goto instr_done;
 
+   case 508: /* cmpb */
+   do_cmpb(regs, regs->gpr[rd], regs->gpr[rb], ra);
+   goto instr_done;
+
 /*
  * Arithmetic instructions
  */
-- 
2.9.3



Re: [PATCH v2 2/5] powerpc/lib/sstep: Add popcnt instruction emulation

2017-07-24 Thread Matt Brown
On Mon, Jul 24, 2017 at 8:28 PM, Balbir Singh <bsinghar...@gmail.com> wrote:
> On Mon, Jul 24, 2017 at 11:01 AM, Matt Brown
> <matthew.brown@gmail.com> wrote:
>> This adds emulations for the popcntb, popcntw, and popcntd instructions.
>> Tested for correctness against the popcnt{b,w,d} instructions on ppc64le.
>>
>> Signed-off-by: Matt Brown <matthew.brown@gmail.com>
>> ---
>> v2:
>> - fixed opcodes
>> - fixed typecasting
>> - fixed bitshifting error for both 32 and 64bit arch
>> ---
>>  arch/powerpc/lib/sstep.c | 43 ++-
>>  1 file changed, 42 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
>> index 87d277f..e6a16a3 100644
>> --- a/arch/powerpc/lib/sstep.c
>> +++ b/arch/powerpc/lib/sstep.c
>> @@ -612,6 +612,35 @@ static nokprobe_inline void do_cmpb(struct pt_regs 
>> *regs, unsigned long v1,
>> regs->gpr[rd] = out_val;
>>  }
>>
>> +/*
>> + * The size parameter is used to adjust the equivalent popcnt instruction.
>> + * popcntb = 8, popcntw = 32, popcntd = 64
>> + */
>> +static nokprobe_inline void do_popcnt(struct pt_regs *regs, unsigned long 
>> v1,
>> +   int size, int ra)
>> +{
>> +   unsigned long long high, low, mask;
>> +   unsigned int n;
>> +   int i, j;
>> +
>> +   high = 0;
>> +   low = 0;
>> +
>> +   for (i = 0; i < (64 / size); i++) {
>> +   n = 0;
>> +   for (j = 0; j < size; j++) {
>> +   mask = 1UL << (j + (i * size));
>> +   if (v1 & mask)
>> +   n++;
>> +   }
>> +   if ((i * size) < 32)
>> +   low |= n << (i * size);
>> +   else
>> +   high |= n << ((i * size) - 32);
>> +   }
>> +   regs->gpr[ra] = (high << 32) | low;
>> +}
>
> There's a way to do it in very efficient way via the Giles-Miller
> method of side-ways addition
>
> Please see
>
> http://opensourceforu.com/2012/06/power-programming-bitwise-tips-tricks/
> and lib/hweight.c, you can reuse the code from lib/hweight.c

Oh that's a really cool technique.
We could use that for the parity instructions too.

>
> Balbir Singh


Re: [PATCH] powerpc/asm/cacheflush: Cleanup cacheflush function params

2017-07-23 Thread Matt Brown
I've realised that changing the arguments for the cacheflush functions
is much more work than its worth, due to other archs using these
functions.
The next patch will just translate the asm cacheflush functions to c,
keeping the existing parameters.
So this won't have any effect on the drivers.

Thanks,
Matt Brown

On Thu, Jul 20, 2017 at 11:01 PM, Michael Ellerman <m...@ellerman.id.au> wrote:
> Geert Uytterhoeven <ge...@linux-m68k.org> writes:
>
>> On Thu, Jul 20, 2017 at 1:43 PM, Michael Ellerman <m...@ellerman.id.au> 
>> wrote:
>>> Matt Brown <matthew.brown@gmail.com> writes:
>>>> The cacheflush prototypes currently use start and stop values and each
>>>> call requires typecasting the address to an unsigned long.
>>>> This patch changes the cacheflush prototypes to follow the x86 style of
>>>> using a base and size values, with base being a void pointer.
>>>>
>>>> All callers of the cacheflush functions, including drivers, have been
>>>> modified to conform to the new prototypes.
>>>>
>>>> The 64 bit cacheflush functions which were implemented in assembly code
>>>> (flush_dcache_range, flush_inval_dcache_range) have been translated into
>>>> C for readability and coherence.
>>
>>>> --- a/arch/powerpc/include/asm/cacheflush.h
>>>> +++ b/arch/powerpc/include/asm/cacheflush.h
>>>> @@ -51,13 +51,13 @@ static inline void __flush_dcache_icache_phys(unsigned 
>>>> long physaddr)
>>>>   * Write any modified data cache blocks out to memory and invalidate them.
>>>>   * Does not invalidate the corresponding instruction cache blocks.
>>>>   */
>>>> -static inline void flush_dcache_range(unsigned long start, unsigned long 
>>>> stop)
>>>> +static inline void flush_dcache_range(void *start, unsigned long size)
>>>>  {
>>>> - void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
>>>> - unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 
>>>> 1);
>>>> + void *addr = (void *)((u32)start & ~(L1_CACHE_BYTES - 1));
>>>
>>> unsigned long would be nicer than u32.
>>
>> Indeed. That would make this work on ppc64, too.
>> After which ppc64 has an identical copy (u64 = unsigned long on ppc64) below?
>
> That was Matt's homework to notice that ;)
>
> cheers


[PATCH v2 5/5] powerpc/lib/sstep: Add isel instruction emulation

2017-07-23 Thread Matt Brown
This adds emulation for the isel instruction.
Tested for correctness against the isel instruction and its extended
mnemonics (lt, gt, eq) on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v2:
- fixed opcode
- fixed definition to include the 'if RA=0, a=0' clause
- fixed ccr bitshifting error
---
 arch/powerpc/lib/sstep.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index b08eb96..e1f4ec6 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1246,6 +1246,17 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
 /*
  * Logical instructions
  */
+   case 15:/* isel */
+   mb = (instr >> 6) & 0x1f; /* bc */
+   val = (regs->ccr >> (31 - mb)) & 1;
+   val2 = (ra) ? regs->gpr[ra] : 0;
+
+   if (val)
+   regs->gpr[rd] = val2;
+   else
+   regs->gpr[rd] = regs->gpr[rb];
+   goto logical_done;
+
case 26:/* cntlzw */
asm("cntlzw %0,%1" : "=r" (regs->gpr[ra]) :
"r" (regs->gpr[rd]));
-- 
2.9.3



[PATCH 4/5] powerpc/lib/sstep: Add prty instruction emulation

2017-07-23 Thread Matt Brown
This add emulation for the prtyw and prtyd instructions.
Tested for logical correctness against the prtyw and prtyd instructions
on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v2:
- fixed opcodes
- fixed bitshifting and typecast errors
- merged do_prtyw and do_prtyd into single function
---
 arch/powerpc/lib/sstep.c | 31 +++
 1 file changed, 31 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index a756f44..1820fd6 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -658,6 +658,29 @@ static nokprobe_inline void do_bpermd(struct pt_regs 
*regs, unsigned long v1,
regs->gpr[ra] = perm;
 }
 #endif /* __powerpc64__ */
+/*
+ * The size parameter adjusts the equivalent prty instruction.
+ * prtyw = 32, prtyd = 64
+ */
+static nokprobe_inline void do_prty(struct pt_regs *regs, unsigned long v,
+   int size, int ra)
+{
+   unsigned long long high, low;
+   unsigned int i;
+
+   high = 0;
+   low = 0;
+
+   for (i = 0; i < 8; i++) {
+   if (v & (1UL << (i * 8)))
+   (i < 4) ? (low++) : (high++);
+   }
+
+   if (size == 32)
+   regs->gpr[ra] = ((high & 1) << 32) | (low & 1);
+   else
+   regs->gpr[ra] = (high + low) & 1;
+}
 
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
@@ -1248,6 +1271,14 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
case 124:   /* nor */
regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
goto logical_done;
+
+   case 154:   /* prtyw */
+   do_prty(regs, regs->gpr[rd], 32, ra);
+   goto logical_done;
+
+   case 186:   /* prtyd */
+   do_prty(regs, regs->gpr[rd], 64, ra);
+   goto logical_done;
 #ifdef __powerpc64__
case 252:   /* bpermd */
do_bpermd(regs, regs->gpr[rd], regs->gpr[rb], ra);
-- 
2.9.3



[PATCH v2 3/5] powerpc/lib/sstep: Add bpermd instruction emulation

2017-07-23 Thread Matt Brown
This adds emulation for the bpermd instruction.
Tested for correctness against the bpermd instruction on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v2:
- fixed opcode
- added ifdef tags to do_bpermd func
- fixed bitshifting errors
---
 arch/powerpc/lib/sstep.c | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index e6a16a3..a756f44 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -641,6 +641,24 @@ static nokprobe_inline void do_popcnt(struct pt_regs 
*regs, unsigned long v1,
regs->gpr[ra] = (high << 32) | low;
 }
 
+#ifdef __powerpc64__
+static nokprobe_inline void do_bpermd(struct pt_regs *regs, unsigned long v1,
+   unsigned long v2, int ra)
+{
+   unsigned char perm, idx;
+   unsigned int i;
+
+   perm = 0;
+   for (i = 0; i < 8; i++) {
+   idx = (v1 >> (i * 8)) & 0xff;
+   if (idx < 64)
+   if (v2 & PPC_BIT(idx))
+   perm |= 1 << i;
+   }
+   regs->gpr[ra] = perm;
+}
+#endif /* __powerpc64__ */
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1230,7 +1248,11 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
case 124:   /* nor */
regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
goto logical_done;
-
+#ifdef __powerpc64__
+   case 252:   /* bpermd */
+   do_bpermd(regs, regs->gpr[rd], regs->gpr[rb], ra);
+   goto logical_done;
+#endif
case 284:   /* xor */
regs->gpr[ra] = ~(regs->gpr[rd] ^ regs->gpr[rb]);
goto logical_done;
-- 
2.9.3



[PATCH v2 2/5] powerpc/lib/sstep: Add popcnt instruction emulation

2017-07-23 Thread Matt Brown
This adds emulations for the popcntb, popcntw, and popcntd instructions.
Tested for correctness against the popcnt{b,w,d} instructions on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v2:
- fixed opcodes
- fixed typecasting
- fixed bitshifting error for both 32 and 64bit arch
---
 arch/powerpc/lib/sstep.c | 43 ++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 87d277f..e6a16a3 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -612,6 +612,35 @@ static nokprobe_inline void do_cmpb(struct pt_regs *regs, 
unsigned long v1,
regs->gpr[rd] = out_val;
 }
 
+/*
+ * The size parameter is used to adjust the equivalent popcnt instruction.
+ * popcntb = 8, popcntw = 32, popcntd = 64
+ */
+static nokprobe_inline void do_popcnt(struct pt_regs *regs, unsigned long v1,
+   int size, int ra)
+{
+   unsigned long long high, low, mask;
+   unsigned int n;
+   int i, j;
+
+   high = 0;
+   low = 0;
+
+   for (i = 0; i < (64 / size); i++) {
+   n = 0;
+   for (j = 0; j < size; j++) {
+   mask = 1UL << (j + (i * size));
+   if (v1 & mask)
+   n++;
+   }
+   if ((i * size) < 32)
+   low |= n << (i * size);
+   else
+   high |= n << ((i * size) - 32);
+   }
+   regs->gpr[ra] = (high << 32) | low;
+}
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1194,6 +1223,10 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
regs->gpr[ra] = regs->gpr[rd] & ~regs->gpr[rb];
goto logical_done;
 
+   case 122:   /* popcntb */
+   do_popcnt(regs, regs->gpr[rd], 8, ra);
+   goto logical_done;
+
case 124:   /* nor */
regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
goto logical_done;
@@ -1206,6 +1239,10 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
regs->gpr[ra] = regs->gpr[rd] ^ regs->gpr[rb];
goto logical_done;
 
+   case 378:   /* popcntw */
+   do_popcnt(regs, regs->gpr[rd], 32, ra);
+   goto logical_done;
+
case 412:   /* orc */
regs->gpr[ra] = regs->gpr[rd] | ~regs->gpr[rb];
goto logical_done;
@@ -1217,7 +1254,11 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
case 476:   /* nand */
regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]);
goto logical_done;
-
+#ifdef __powerpc64__
+   case 506:   /* popcntd */
+   do_popcnt(regs, regs->gpr[rd], 64, ra);
+   goto logical_done;
+#endif
case 922:   /* extsh */
regs->gpr[ra] = (signed short) regs->gpr[rd];
goto logical_done;
-- 
2.9.3



[PATCH v2 1/5] powerpc/lib/sstep: Add cmpb instruction emulation

2017-07-23 Thread Matt Brown
This patch adds emulation of the cmpb instruction, enabling xmon to
emulate this instruction.
Tested for correctness against the cmpb asm instruction on ppc64le.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v2: 
- fixed opcode
- fixed mask typecasting
---
 arch/powerpc/lib/sstep.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 33117f8..87d277f 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -596,6 +596,22 @@ static nokprobe_inline void do_cmp_unsigned(struct pt_regs 
*regs, unsigned long
regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
 }
 
+static nokprobe_inline void do_cmpb(struct pt_regs *regs, unsigned long v1,
+   unsigned long v2, int rd)
+{
+   unsigned long long out_val, mask;
+   int i;
+
+   out_val = 0;
+   for (i = 0; i < 8; i++) {
+   mask = 0xffUL << (i * 8);
+   if ((v1 & mask) == (v2 & mask))
+   out_val |= mask;
+   }
+
+   regs->gpr[rd] = out_val;
+}
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1049,6 +1065,10 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
do_cmp_unsigned(regs, val, val2, rd >> 2);
goto instr_done;
 
+   case 508: /* cmpb */
+   do_cmpb(regs, regs->gpr[rd], regs->gpr[rb], ra);
+   goto instr_done;
+
 /*
  * Arithmetic instructions
  */
-- 
2.9.3



[PATCH] powerpc/asm/cacheflush: Cleanup cacheflush function params

2017-07-20 Thread Matt Brown
The cacheflush prototypes currently use start and stop values and each
call requires typecasting the address to an unsigned long.
This patch changes the cacheflush prototypes to follow the x86 style of
using a base and size values, with base being a void pointer.

All callers of the cacheflush functions, including drivers, have been
modified to conform to the new prototypes.

The 64 bit cacheflush functions which were implemented in assembly code
(flush_dcache_range, flush_inval_dcache_range) have been translated into
C for readability and coherence.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 arch/powerpc/include/asm/cacheflush.h| 47 +
 arch/powerpc/kernel/misc_64.S| 52 
 arch/powerpc/mm/dma-noncoherent.c| 15 
 arch/powerpc/platforms/512x/mpc512x_shared.c | 10 +++---
 arch/powerpc/platforms/85xx/smp.c|  6 ++--
 arch/powerpc/sysdev/dart_iommu.c |  5 +--
 drivers/ata/pata_bf54x.c |  3 +-
 drivers/char/agp/uninorth-agp.c  |  6 ++--
 drivers/gpu/drm/drm_cache.c  |  3 +-
 drivers/macintosh/smu.c  | 15 
 drivers/mmc/host/bfin_sdh.c  |  3 +-
 drivers/mtd/nand/bf5xx_nand.c|  6 ++--
 drivers/soc/fsl/qbman/dpaa_sys.h |  2 +-
 drivers/soc/fsl/qbman/qman_ccsr.c|  3 +-
 drivers/spi/spi-bfin5xx.c| 10 +++---
 drivers/tty/serial/mpsc.c| 46 
 drivers/usb/musb/blackfin.c  |  6 ++--
 17 files changed, 86 insertions(+), 152 deletions(-)

diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index 11843e3..b8f04c3 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -51,13 +51,13 @@ static inline void __flush_dcache_icache_phys(unsigned long 
physaddr)
  * Write any modified data cache blocks out to memory and invalidate them.
  * Does not invalidate the corresponding instruction cache blocks.
  */
-static inline void flush_dcache_range(unsigned long start, unsigned long stop)
+static inline void flush_dcache_range(void *start, unsigned long size)
 {
-   void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
-   unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+   void *addr = (void *)((u32)start & ~(L1_CACHE_BYTES - 1));
+   unsigned long len = size + (L1_CACHE_BYTES - 1);
unsigned long i;
 
-   for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+   for (i = 0; i < len >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
dcbf(addr);
mb();   /* sync */
 }
@@ -67,13 +67,13 @@ static inline void flush_dcache_range(unsigned long start, 
unsigned long stop)
  * Does not invalidate the corresponding cache lines (especially for
  * any corresponding instruction cache).
  */
-static inline void clean_dcache_range(unsigned long start, unsigned long stop)
+static inline void clean_dcache_range(void *start, unsigned long size)
 {
-   void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
-   unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+   void *addr = (void *)((u32)start & ~(L1_CACHE_BYTES - 1));
+   unsigned long len = size + (L1_CACHE_BYTES  - 1);
unsigned long i;
 
-   for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+   for (i = 0; i < len >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
dcbst(addr);
mb();   /* sync */
 }
@@ -83,22 +83,39 @@ static inline void clean_dcache_range(unsigned long start, 
unsigned long stop)
  * to invalidate the cache so the PPC core doesn't get stale data
  * from the CPM (no cache snooping here :-).
  */
-static inline void invalidate_dcache_range(unsigned long start,
-  unsigned long stop)
+static inline void invalidate_dcache_range(void *start, unsigned long size)
 {
-   void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1));
-   unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1);
+   void *addr = (void *)((u32)start & ~(L1_CACHE_BYTES - 1));
+   unsigned long len = size + (L1_CACHE_SHIFT - 1);
unsigned long i;
 
-   for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
+   for (i = 0; i < len >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES)
dcbi(addr);
mb();   /* sync */
 }
 
 #endif /* CONFIG_PPC32 */
 #ifdef CONFIG_PPC64
-extern void flush_dcache_range(unsigned long start, unsigned long stop);
-extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
+static inline void flush_dcache_range(void *start, unsigned long size)
+{
+   void *a

[PATCH] powerpc/include/asm: Remove unused 64bit cacheflush function

2017-07-20 Thread Matt Brown
The flush_dcache_phys_range function is no longer used in the kernel.
This patch removes and cleans up the function.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 arch/powerpc/include/asm/cacheflush.h |  1 -
 arch/powerpc/kernel/misc_64.S | 38 ---
 2 files changed, 39 deletions(-)

diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index b77f036..11843e3 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -99,7 +99,6 @@ static inline void invalidate_dcache_range(unsigned long 
start,
 #ifdef CONFIG_PPC64
 extern void flush_dcache_range(unsigned long start, unsigned long stop);
 extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
-extern void flush_dcache_phys_range(unsigned long start, unsigned long stop);
 #endif
 
 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index c119044..0ed5c55 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -144,44 +144,6 @@ _GLOBAL_TOC(flush_dcache_range)
blr
 EXPORT_SYMBOL(flush_dcache_range)
 
-/*
- * Like above, but works on non-mapped physical addresses.
- * Use only for non-LPAR setups ! It also assumes real mode
- * is cacheable. Used for flushing out the DART before using
- * it as uncacheable memory 
- *
- * flush_dcache_phys_range(unsigned long start, unsigned long stop)
- *
- *flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL(flush_dcache_phys_range)
-   ld  r10,PPC64_CACHES@toc(r2)
-   lwz r7,DCACHEL1BLOCKSIZE(r10)   /* Get dcache block size */
-   addir5,r7,-1
-   andcr6,r3,r5/* round low to line bdy */
-   subfr8,r6,r4/* compute length */
-   add r8,r8,r5/* ensure we get enough */
-   lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block 
size */
-   srw.r8,r8,r9/* compute line count */
-   beqlr   /* nothing to do? */
-   mfmsr   r5  /* Disable MMU Data Relocation */
-   ori r0,r5,MSR_DR
-   xorir0,r0,MSR_DR
-   sync
-   mtmsr   r0
-   sync
-   isync
-   mtctr   r8
-0: dcbst   0,r6
-   add r6,r6,r7
-   bdnz0b
-   sync
-   isync
-   mtmsr   r5  /* Re-enable MMU Data Relocation */
-   sync
-   isync
-   blr
-
 _GLOBAL(flush_inval_dcache_range)
ld  r10,PPC64_CACHES@toc(r2)
lwz r7,DCACHEL1BLOCKSIZE(r10)   /* Get dcache block size */
-- 
2.9.3



Re: [PATCH 3/5] powerpc/lib/sstep: Add bpermd instruction emulation

2017-07-13 Thread Matt Brown
On Thu, Jul 13, 2017 at 5:28 PM, Segher Boessenkool
<seg...@kernel.crashing.org> wrote:
> On Thu, Jul 13, 2017 at 01:25:46PM +1000, Matt Brown wrote:
>> +static nokprobe_inline void do_bpermd(struct pt_regs *regs, unsigned long 
>> v1,
>> + unsigned long v2, int ra)
>> +{
>> + unsigned int idx, i;
>> + unsigned char perm;
>> +
>> + perm = 0x0;
>> + for (i = 0; i < 8; i++) {
>> + idx = (v1 >> (i * 8)) & 0xff;
>> + if (idx < 64)
>> + perm |= (v2 & (1 << idx)) >> (idx - i);
>
> That doesn't work I think, the bit numbers ("idx") are big-endian?

Why would it be big-endian? Wouldn't it be in the same endian form as the arch?
>
>> + }
>> + regs->gpr[ra] = 0 | perm;
>
> And that is just silly :-)
>

Yep that is silly.

Thanks,
Matt

>> +}
>
>
> Segher


Re: [PATCH 5/5] powerpc/lib/sstep: Add isel instruction emulation

2017-07-13 Thread Matt Brown
On Thu, Jul 13, 2017 at 5:47 PM, Segher Boessenkool
<seg...@kernel.crashing.org> wrote:
> On Thu, Jul 13, 2017 at 01:25:48PM +1000, Matt Brown wrote:
>> + case 585:   /* isel */
>
> The secondary opcode for isel is only 5 bits, not 10 like most other
> insns have.

Yet another conversion mistake, I'll get there one day!

>
>> + mb = (instr >> 6) & 0x1f; /* bc */
>> + val = (regs->ccr >> (mb + 32)) & 1;
>
> regs->ccr >> (31 - mb)  ?
>
>> +
>> + if (val)
>> + regs->gpr[rd] = regs->gpr[ra];
>
> You need to treat ra=0 separately (as 0, not reg 0).
>
Ah I missed that. The wording in the ISA doesn't make that completely obvious.

Thanks,
Matt

>> + else
>> + regs->gpr[rd] = regs->gpr[rb];
>> + goto logical_done;
>
>
> Segher


Re: [PATCH 4/5] powerpc/lib/sstep: Add prty instruction emulation

2017-07-13 Thread Matt Brown
On Thu, Jul 13, 2017 at 5:37 PM, Segher Boessenkool
<seg...@kernel.crashing.org> wrote:
> On Thu, Jul 13, 2017 at 01:25:47PM +1000, Matt Brown wrote:
>> +static nokprobe_inline void do_prtyw(struct pt_regs *regs, unsigned long v,
>> + int ra)
>> +{
>> + unsigned long low, high, out;
>> + unsigned int i;
>> +
>> + high = 0;
>> + low = 0;
>> + out = 0;
>> +
>> + for (i = 0; i < 8; i++) {
>> + if (v & (1 << (i * 8)))
>
> 1UL
>
>> + (i < 4) ? (low++) : (high++);
>> + }
>> +
>> + if (low % 2)
>> + out |= low;
>> + if (high % 2)
>> + out |= (high << 32);
>
> Only the low bit of each word of the output can be set.  Something
> like
>
>   out = ((high & 1) << 32) | (low & 1);
>
Ah, I wasn't aware. That way is much more concise too :)

Thanks,
Matt
>
> Segher


[PATCH 5/5] powerpc/lib/sstep: Add isel instruction emulation

2017-07-12 Thread Matt Brown
This add emulation for the isel instruction.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 arch/powerpc/lib/sstep.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 3228783..bb0e301 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1297,6 +1297,16 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]);
goto logical_done;
 
+   case 585:   /* isel */
+   mb = (instr >> 6) & 0x1f; /* bc */
+   val = (regs->ccr >> (mb + 32)) & 1;
+
+   if (val)
+   regs->gpr[rd] = regs->gpr[ra];
+   else
+   regs->gpr[rd] = regs->gpr[rb];
+   goto logical_done;
+
case 922:   /* extsh */
regs->gpr[ra] = (signed short) regs->gpr[rd];
goto logical_done;
-- 
2.9.3



[PATCH 4/5] powerpc/lib/sstep: Add prty instruction emulation

2017-07-12 Thread Matt Brown
This add emulation for the prtyw and prtyd instructions.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 arch/powerpc/lib/sstep.c | 58 +++-
 1 file changed, 52 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 603654d..3228783 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -652,6 +652,42 @@ static nokprobe_inline void do_bpermd(struct pt_regs 
*regs, unsigned long v1,
regs->gpr[ra] = 0 | perm;
 }
 
+static nokprobe_inline void do_prtyw(struct pt_regs *regs, unsigned long v,
+   int ra)
+{
+   unsigned long low, high, out;
+   unsigned int i;
+
+   high = 0;
+   low = 0;
+   out = 0;
+
+   for (i = 0; i < 8; i++) {
+   if (v & (1 << (i * 8)))
+   (i < 4) ? (low++) : (high++);
+   }
+
+   if (low % 2)
+   out |= low;
+   if (high % 2)
+   out |= (high << 32);
+
+   regs->gpr[ra] = out;
+}
+
+static nokprobe_inline void do_prtyd(struct pt_regs *regs, unsigned long v,
+   int ra)
+{
+   unsigned int count, i;
+
+   count = 0;
+   for (i = 0; i < 8; i++) {
+   if (v & (1 << (i * 8)))
+   count++;
+   }
+   regs->gpr[ra] = count % 2;
+}
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1278,16 +1314,15 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
do_popcnt(regs, val, 8, ra);
goto logical_done;
 
-   case 17076744:  /* popcntw */
+   case 2101768:   /* prtyw */
val = regs->gpr[rd];
-   do_popcnt(regs, val, 32, ra);
+   do_prtyw(regs, val, ra);
goto logical_done;
-#ifdef __powerpc64__
-   case 19173896:  /* popcntd */
+
+   case 2134536:   /* prtyd */
val = regs->gpr[rd];
-   do_popcnt(regs, val, 64, ra);
+   do_prtyd(regs, val, ra);
goto logical_done;
-#endif
 
 #ifdef __powerpc64__
case 2396736:   /* bpermd */
@@ -1297,6 +1332,17 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
goto logical_done;
 #endif
 
+   case 17076744:  /* popcntw */
+   val = regs->gpr[rd];
+   do_popcnt(regs, val, 32, ra);
+   goto logical_done;
+#ifdef __powerpc64__
+   case 19173896:  /* popcntd */
+   val = regs->gpr[rd];
+   do_popcnt(regs, val, 64, ra);
+   goto logical_done;
+#endif
+
 /*
  * Shift instructions
  */
-- 
2.9.3



[PATCH 3/5] powerpc/lib/sstep: Add bpermd instruction emulation

2017-07-12 Thread Matt Brown
This adds emulation for the bpermd instruction.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 arch/powerpc/lib/sstep.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index cf69987..603654d 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -637,6 +637,21 @@ static nokprobe_inline void do_popcnt(struct pt_regs 
*regs, unsigned long v1,
regs->gpr[ra] = out_val;
 }
 
+static nokprobe_inline void do_bpermd(struct pt_regs *regs, unsigned long v1,
+   unsigned long v2, int ra)
+{
+   unsigned int idx, i;
+   unsigned char perm;
+
+   perm = 0x0;
+   for (i = 0; i < 8; i++) {
+   idx = (v1 >> (i * 8)) & 0xff;
+   if (idx < 64)
+   perm |= (v2 & (1 << idx)) >> (idx - i);
+   }
+   regs->gpr[ra] = 0 | perm;
+}
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1274,6 +1289,14 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
goto logical_done;
 #endif
 
+#ifdef __powerpc64__
+   case 2396736:   /* bpermd */
+   val = regs->gpr[rd];
+   val2 = regs->gpr[rb];
+   do_bpermd(regs, val, val2, ra);
+   goto logical_done;
+#endif
+
 /*
  * Shift instructions
  */
-- 
2.9.3



[PATCH 2/5] powerpc/lib/sstep: Add popcnt instruction emulation

2017-07-12 Thread Matt Brown
This adds emulations for the popcntb, popcntw, and popcntd instructions.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 arch/powerpc/lib/sstep.c | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index f3e9ba8..cf69987 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -613,6 +613,30 @@ static nokprobe_inline void do_cmpb(struct pt_regs *regs, 
unsigned long v1,
regs->gpr[rd] = out_val;
 }
 
+/*
+ * The size parameter is used to ajust the equivalent popcnt instruction.
+ * popcntb = 8, popcntw = 32, popcntd = 64
+ */
+static nokprobe_inline void do_popcnt(struct pt_regs *regs, unsigned long v1,
+   int size, int ra)
+{
+   unsigned int out_val, mask, n;
+   int i, j;
+
+   out_val = 0;
+
+   for (i = 0; i < (64 / size); i++) {
+   n = 0;
+   for (j = 0; j < size; j++) {
+   mask = 1 << (j + (i * size));
+   if (v1 & mask)
+   n++;
+   }
+   out_val |= n << (i * size);
+   }
+   regs->gpr[ra] = out_val;
+}
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1234,6 +1258,21 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
regs->gpr[ra] = (signed int) regs->gpr[rd];
goto logical_done;
 #endif
+   case 299528:/* popcntb */
+   val = regs->gpr[rd];
+   do_popcnt(regs, val, 8, ra);
+   goto logical_done;
+
+   case 17076744:  /* popcntw */
+   val = regs->gpr[rd];
+   do_popcnt(regs, val, 32, ra);
+   goto logical_done;
+#ifdef __powerpc64__
+   case 19173896:  /* popcntd */
+   val = regs->gpr[rd];
+   do_popcnt(regs, val, 64, ra);
+   goto logical_done;
+#endif
 
 /*
  * Shift instructions
-- 
2.9.3



[PATCH 1/5] powerpc/lib/sstep: Add cmpb instruction emulation

2017-07-12 Thread Matt Brown
This patch adds emulation of the cmpb instruction, enabling xmon to
emulate this instruction.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 arch/powerpc/lib/sstep.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 33117f8..f3e9ba8 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -596,6 +596,23 @@ static nokprobe_inline void do_cmp_unsigned(struct pt_regs 
*regs, unsigned long
regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
 }
 
+static nokprobe_inline void do_cmpb(struct pt_regs *regs, unsigned long v1,
+   unsigned long v2, int rd)
+{
+   unsigned long out_val, mask;
+   int i;
+
+   out_val = 0;
+   for (i = 0; i < 8; i++) {
+   mask = 0xff << (i * 8);
+
+   if ((v1 & mask) == (v2 & mask))
+   out_val |= mask;
+   }
+
+   regs->gpr[rd] = out_val;
+}
+
 static nokprobe_inline int trap_compare(long v1, long v2)
 {
int ret = 0;
@@ -1049,6 +1066,13 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
do_cmp_unsigned(regs, val, val2, rd >> 2);
goto instr_done;
 
+   case 19173952: /* cmpb */
+   val = regs->gpr[rd];
+   val2 = regs->gpr[rb];
+
+   do_cmpb(regs, val, val2, ra);
+   goto instr_done;
+
 /*
  * Arithmetic instructions
  */
-- 
2.9.3



Re: [PATCH v2] powerpc/powernv: Use darn instr for random_seed on p9

2017-07-12 Thread Matt Brown
On Tue, Jul 11, 2017 at 7:34 PM, Daniel Axtens  wrote:
> Hi Matt,
>
>> Currently ppc_md.get_random_seed uses the powernv_get_random_long function.
>> A guest calling this function would have to go through the hypervisor. The
>> 'darn' instruction, introduced in POWER9, allows us to bypass this by
>> directly obtaining a value from the mmio region.
>>
>> This patch adds a function for ppc_md.get_random_seed on p9,
>> utilising the darn instruction.
>
> This patch looks pretty good - I'm not set up to test it but I have one
> code-style nit:
>
>> diff --git a/arch/powerpc/platforms/powernv/rng.c 
>> b/arch/powerpc/platforms/powernv/rng.c
>> index 5dcbdea..ab6f411 100644
>> --- a/arch/powerpc/platforms/powernv/rng.c
>> +++ b/arch/powerpc/platforms/powernv/rng.c
>> @@ -8,6 +8,7 @@
>>   */
>>
>>  #define pr_fmt(fmt)  "powernv-rng: " fmt
>> +#define DARN_ERR 0xul
>>
>>  #include 
>>  #include 
>> @@ -16,6 +17,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -67,6 +69,21 @@ int powernv_get_random_real_mode(unsigned long *v)
>>   return 1;
>>  }
>>
>> +int powernv_get_random_darn(unsigned long *v)
>
> This is only referenced in this file so it should probably be labelled
> as 'static'.
>

That's true, my thinking was to then use the get_random_darn where the
get_random_long was being used.
Looks like there is only one other caller of it at the moment,
kvmppc_h_random (in arch/powerpc/kvm/book3s_hv_builtin.c).
We may want to change that to get_random_darn.

>> +{
>> + unsigned long val;
>> +
>> + /* Using DARN with L=1 - conditioned random number */
>> + asm (PPC_DARN(%0, 1)"\n" : "=r"(val) :);
>> +
>> + if (val == DARN_ERR)
>> + return 0;
>> +
>> + *v = val;
>> +
>> + return 1;
>
> I was a bit confused to see 1 representing success - I think I have been
> in userspace too long. But I checked against pseries_get_random_long and
> it is in fact correct, so good for you!
>
> An excellent followup patch would be changing the type of this function
> to be bool rather than int, but no pressure :)

That's probably a good idea!

Thanks,
Matt

>
> Regards,
> Daniel
>
>> +}
>> +
>>  int powernv_get_random_long(unsigned long *v)
>>  {
>>   struct powernv_rng *rng;
>> @@ -136,6 +153,7 @@ static __init int rng_create(struct device_node *dn)
>>  static __init int rng_init(void)
>>  {
>>   struct device_node *dn;
>> + unsigned long drn_test;
>>   int rc;
>>
>>   for_each_compatible_node(dn, NULL, "ibm,power-rng") {
>> @@ -150,6 +168,10 @@ static __init int rng_init(void)
>>   of_platform_device_create(dn, NULL, NULL);
>>   }
>>
>> + if (cpu_has_feature(CPU_FTR_ARCH_300) &&
>> + powernv_get_random_darn(_test))
>> + ppc_md.get_random_seed = powernv_get_random_darn;
>> +
>>   return 0;
>>  }
>>  machine_subsys_initcall(powernv, rng_init);
>> --
>> 2.9.3


[PATCH v2] powerpc/powernv: Use darn instr for random_seed on p9

2017-07-07 Thread Matt Brown
Currently ppc_md.get_random_seed uses the powernv_get_random_long function.
A guest calling this function would have to go through the hypervisor. The
'darn' instruction, introduced in POWER9, allows us to bypass this by
directly obtaining a value from the mmio region.

This patch adds a function for ppc_md.get_random_seed on p9,
utilising the darn instruction.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
v2:
- remove repeat darn attempts
- move hook to rng_init
---
 arch/powerpc/include/asm/ppc-opcode.h |  4 
 arch/powerpc/platforms/powernv/rng.c  | 22 ++
 2 files changed, 26 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index c4ced1d..d5f7082 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -134,6 +134,7 @@
 #define PPC_INST_COPY  0x7c00060c
 #define PPC_INST_COPY_FIRST0x7c20060c
 #define PPC_INST_CP_ABORT  0x7c00068c
+#define PPC_INST_DARN  0x7c0005e6
 #define PPC_INST_DCBA  0x7c0005ec
 #define PPC_INST_DCBA_MASK 0xfc0007fe
 #define PPC_INST_DCBAL 0x7c2005ec
@@ -325,6 +326,9 @@
 
 /* Deal with instructions that older assemblers aren't aware of */
 #definePPC_CP_ABORTstringify_in_c(.long PPC_INST_CP_ABORT)
+#define PPC_DARN(t, l) stringify_in_c(.long PPC_INST_DARN |  \
+   ___PPC_RT(t)   |  \
+   ___PPC_RA(l))
 #definePPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \
__PPC_RA(a) | __PPC_RB(b))
 #definePPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \
diff --git a/arch/powerpc/platforms/powernv/rng.c 
b/arch/powerpc/platforms/powernv/rng.c
index 5dcbdea..ab6f411 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -8,6 +8,7 @@
  */
 
 #define pr_fmt(fmt)"powernv-rng: " fmt
+#define DARN_ERR 0xul
 
 #include 
 #include 
@@ -16,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -67,6 +69,21 @@ int powernv_get_random_real_mode(unsigned long *v)
return 1;
 }
 
+int powernv_get_random_darn(unsigned long *v)
+{
+   unsigned long val;
+
+   /* Using DARN with L=1 - conditioned random number */
+   asm (PPC_DARN(%0, 1)"\n" : "=r"(val) :);
+
+   if (val == DARN_ERR)
+   return 0;
+
+   *v = val;
+
+   return 1;
+}
+
 int powernv_get_random_long(unsigned long *v)
 {
struct powernv_rng *rng;
@@ -136,6 +153,7 @@ static __init int rng_create(struct device_node *dn)
 static __init int rng_init(void)
 {
struct device_node *dn;
+   unsigned long drn_test;
int rc;
 
for_each_compatible_node(dn, NULL, "ibm,power-rng") {
@@ -150,6 +168,10 @@ static __init int rng_init(void)
of_platform_device_create(dn, NULL, NULL);
}
 
+   if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+   powernv_get_random_darn(_test))
+   ppc_md.get_random_seed = powernv_get_random_darn;
+
return 0;
 }
 machine_subsys_initcall(powernv, rng_init);
-- 
2.9.3



Re: [PATCH] powerpc/lib: Split xor_vmx file to guarantee instruction ordering

2017-05-24 Thread Matt Brown
On Wed, May 24, 2017 at 11:36 PM, Paul Clarke <p...@us.ibm.com> wrote:
> On 05/23/2017 06:45 PM, Matt Brown wrote:
>> The xor_vmx.c file is used for the RAID5 xor operations. In these functions
>> altivec is enabled to run the operation and then disabled. However due to
>> compiler instruction reordering, altivec instructions are being run before
>> enable_altivec() and after disable_altivec().
>
> If altivec instructions can be reordered after disable_altivec(), then 
> disable_altivec() is broken, I'd think.
>
> Could it be because the isync in mtmsr_isync() is after the mtmsr?
>
> disable_kernel_altivec
> - msr_check_and_clear
>   - __msr_check_and_clear
> - mtmsr_isync

So it turns out the enable / disable functions don't actually enable
or disable the use of vector instructions.
If we have marked the file to be compiled with altivec the compiler
has free reign to reorder the vector instructions wherever it likes.
Including reordering it before or after the enable/disable_altivec
commands.

The enable_kernel_altivec and disable_kernel_altivec functions are
mainly there to empty and restore the vector registers which could
have been used in user-space. So those functions work as intended,
although not particularly intuitive.

Splitting the files and only compiling the xor_vmx.c file with altivec
will guarantee that there are no vector instructions in the
xor_vmx_glue.c file, and that no vector instructions are outside of
the enable/disable block.


- Matt Brown

>
> PC
>


[PATCH] powerpc/lib: Split xor_vmx file to guarantee instruction ordering

2017-05-23 Thread Matt Brown
The xor_vmx.c file is used for the RAID5 xor operations. In these functions
altivec is enabled to run the operation and then disabled. However due to
compiler instruction reordering, altivec instructions are being run before
enable_altivec() and after disable_altivec().

This patch splits the non-altivec code into xor_vmx_glue.c which calls the
altivec functions in xor_vmx.c. By compiling xor_vmx_glue.c without
-maltivec we can guarantee that altivec instruction will not be reordered
outside of the enable/disable block.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 arch/powerpc/lib/Makefile   |  2 +-
 arch/powerpc/lib/xor_vmx.c  | 53 ---
 arch/powerpc/lib/xor_vmx.h  | 20 +
 arch/powerpc/lib/xor_vmx_glue.c | 62 +
 4 files changed, 94 insertions(+), 43 deletions(-)
 create mode 100644 arch/powerpc/lib/xor_vmx.h
 create mode 100644 arch/powerpc/lib/xor_vmx_glue.c

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 309361e8..a448464 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
 
 obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
 
-obj-$(CONFIG_ALTIVEC)  += xor_vmx.o
+obj-$(CONFIG_ALTIVEC)  += xor_vmx.o xor_vmx_glue.o
 CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
 
 obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
index f9de69a..4df240a 100644
--- a/arch/powerpc/lib/xor_vmx.c
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -29,10 +29,7 @@
 #define vector __attribute__((vector_size(16)))
 #endif
 
-#include 
-#include 
-#include 
-#include 
+#include "xor_vmx.h"
 
 typedef vector signed char unative_t;
 
@@ -64,16 +61,13 @@ typedef vector signed char unative_t;
V1##_3 = vec_xor(V1##_3, V2##_3);   \
} while (0)
 
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-  unsigned long *v2_in)
+void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+unsigned long *v2_in)
 {
DEFINE(v1);
DEFINE(v2);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
 
-   preempt_disable();
-   enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -83,23 +77,16 @@ void xor_altivec_2(unsigned long bytes, unsigned long 
*v1_in,
v1 += 4;
v2 += 4;
} while (--lines > 0);
-
-   disable_kernel_altivec();
-   preempt_enable();
 }
-EXPORT_SYMBOL(xor_altivec_2);
 
-void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
-  unsigned long *v2_in, unsigned long *v3_in)
+void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+unsigned long *v2_in, unsigned long *v3_in)
 {
DEFINE(v1);
DEFINE(v2);
DEFINE(v3);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
 
-   preempt_disable();
-   enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -112,15 +99,11 @@ void xor_altivec_3(unsigned long bytes, unsigned long 
*v1_in,
v2 += 4;
v3 += 4;
} while (--lines > 0);
-
-   disable_kernel_altivec();
-   preempt_enable();
 }
-EXPORT_SYMBOL(xor_altivec_3);
 
-void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
-  unsigned long *v2_in, unsigned long *v3_in,
-  unsigned long *v4_in)
+void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+unsigned long *v2_in, unsigned long *v3_in,
+unsigned long *v4_in)
 {
DEFINE(v1);
DEFINE(v2);
@@ -128,9 +111,6 @@ void xor_altivec_4(unsigned long bytes, unsigned long 
*v1_in,
DEFINE(v4);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
 
-   preempt_disable();
-   enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -146,15 +126,11 @@ void xor_altivec_4(unsigned long bytes, unsigned long 
*v1_in,
v3 += 4;
v4 += 4;
} while (--lines > 0);
-
-   disable_kernel_altivec();
-   preempt_enable();
 }
-EXPORT_SYMBOL(xor_altivec_4);
 
-void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
-  unsigned long *v2_in, unsigned long *v3_in,
-  unsigned long *v4_in, unsigned long *v5_in)
+void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+unsigned long *v2_in, unsigned long *v3_in,
+unsigned long *v4_in, unsigned long *v5_in)
 {
DEFINE(v1);
DEFINE(v2);
@@ -163,9 +139,6 @@ void xor_altivec_5(unsigned long bytes, unsigned long 
*v1_in,
DEFINE(v5);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;

[v5 2/2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome

2017-04-28 Thread Matt Brown
The raid6 Q syndrome check has been optimised using the vpermxor
instruction. This instruction was made available with POWER8, ISA version
2.07. It allows for both vperm and vxor instructions to be done in a single
instruction. This has been tested for correctness on a ppc64le vm with a
basic RAID6 setup containing 5 drives.

The performance benchmarks are from the raid6test in the /lib/raid6/test
directory. These results are from an IBM Firestone machine with ppc64le
architecture. The benchmark results show a 35% speed increase over the best
existing algorithm for powerpc (altivec). The raid6test has also been run
on a big-endian ppc64 vm to ensure it also works for big-endian
architectures.

Performance benchmarks:
raid6: altivecx4 gen() 18773 MB/s
raid6: altivecx8 gen() 19438 MB/s

raid6: vpermxor4 gen() 25112 MB/s
raid6: vpermxor8 gen() 26279 MB/s

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
Changelog
v5
- moved altivec.uc fix into other patch in series
---
 include/linux/raid/pq.h |   4 ++
 lib/raid6/Makefile  |  27 -
 lib/raid6/algos.c   |   4 ++
 lib/raid6/test/Makefile |  14 ++-
 lib/raid6/vpermxor.uc   | 104 
 5 files changed, 151 insertions(+), 2 deletions(-)
 create mode 100644 lib/raid6/vpermxor.uc

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bba..3df9aa6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
 extern const struct raid6_calls raid6_avx512x4;
 extern const struct raid6_calls raid6_tilegx8;
 extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
 
 struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011..db095a7 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -4,7 +4,8 @@ raid6_pq-y  += algos.o recov.o tables.o int1.o int2.o 
int4.o \
   int8.o int16.o int32.o
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o 
avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+  vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -88,6 +89,30 @@ $(obj)/altivec8.c:   UNROLL := 8
 $(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
 
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
 CFLAGS_neon1.o += $(NEON_FLAGS)
 targets += neon1.c
 $(obj)/neon1.c:   UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049..edd4f69 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
_altivec2,
_altivec4,
_altivec8,
+   _vpermxor1,
+   _vpermxor2,
+   _vpermxor4,
+   _vpermxor8,
 #endif
 #if defined(CONFIG_TILEGX)
_tilegx8,
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 2c7b60e..9c333e9 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -97,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
 altivec8.c: altivec.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
 
+vpermxor1.c: vpermxor.uc ../unroll.awk
+   $(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+   $(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+   $(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+   $(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
 int1.c: int.uc ../unroll.awk
   

[v5 1/2] lib/raid6: Build proper files on corresponding arch

2017-04-27 Thread Matt Brown
Previously the raid6 test Makefile did not correctly build the files for
testing on PowerPC. This patch fixes the bug, so that all appropriate files
for PowerPC are built.
This patch also fixes the missing and mismatched ifdef statements to allow the
altivec.uc file to be built correctly.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
Changelog
v5
- moved altivec.uc fix into this patch
- updates commit message
---
 lib/raid6/altivec.uc| 3 +++
 lib/raid6/test/Makefile | 8 +---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
 
 #include 
 
+#ifdef CONFIG_ALTIVEC
+
 #include 
 #ifdef __KERNEL__
 # include 
 # include 
+#endif /* __KERNEL__ */
 
 /*
  * This is the C data type to use.  We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 9c333e9..b64a267 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -44,10 +44,12 @@ else ifeq ($(HAS_NEON),yes)
 CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
 HAS_ALTIVEC := $(shell printf '\#include \nvector int a;\n' 
|\
- gcc -c -x c - >&/dev/null && \
- rm ./-.o && echo yes)
+ gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
 ifeq ($(HAS_ALTIVEC),yes)
-OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+CFLAGS += -I../../../arch/powerpc/include
+CFLAGS += -DCONFIG_ALTIVEC
+OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 endif
 endif
 ifeq ($(ARCH),tilegx)
-- 
2.9.3



[v4 2/2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome

2017-04-12 Thread Matt Brown
The raid6 Q syndrome check has been optimised using the vpermxor
instruction. This instruction was made available with POWER8, ISA version
2.07. It allows for both vperm and vxor instructions to be done in a single
instruction. This has been tested for correctness on a ppc64le vm with a
basic RAID6 setup containing 5 drives.

The performance benchmarks are from the raid6test in the /lib/raid6/test
directory. These results are from an IBM Firestone machine with ppc64le
architecture. The benchmark results show a 35% speed increase over the best
existing algorithm for powerpc (altivec). The raid6test has also been run
on a big-endian ppc64 vm to ensure it also works for big-endian
architectures.

Performance benchmarks:
raid6: altivecx4 gen() 18773 MB/s
raid6: altivecx8 gen() 19438 MB/s

raid6: vpermxor4 gen() 25112 MB/s
raid6: vpermxor8 gen() 26279 MB/s

Note: Fixed minor bug in pq.h regarding missing and mismatched ifdef
statements.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 include/linux/raid/pq.h |   4 ++
 lib/raid6/Makefile  |  27 -
 lib/raid6/algos.c   |   4 ++
 lib/raid6/altivec.uc|   3 ++
 lib/raid6/test/Makefile |  14 ++-
 lib/raid6/vpermxor.uc   | 104 
 6 files changed, 154 insertions(+), 2 deletions(-)
 create mode 100644 lib/raid6/vpermxor.uc

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bba..3df9aa6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
 extern const struct raid6_calls raid6_avx512x4;
 extern const struct raid6_calls raid6_tilegx8;
 extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
 
 struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011..db095a7 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -4,7 +4,8 @@ raid6_pq-y  += algos.o recov.o tables.o int1.o int2.o 
int4.o \
   int8.o int16.o int32.o
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o 
avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+  vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -88,6 +89,30 @@ $(obj)/altivec8.c:   UNROLL := 8
 $(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
 
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
 CFLAGS_neon1.o += $(NEON_FLAGS)
 targets += neon1.c
 $(obj)/neon1.c:   UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049..edd4f69 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
_altivec2,
_altivec4,
_altivec8,
+   _vpermxor1,
+   _vpermxor2,
+   _vpermxor4,
+   _vpermxor8,
 #endif
 #if defined(CONFIG_TILEGX)
_tilegx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
 
 #include 
 
+#ifdef CONFIG_ALTIVEC
+
 #include 
 #ifdef __KERNEL__
 # include 
 # include 
+#endif /* __KERNEL__ */
 
 /*
  * This is the C data type to use.  We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 2c7b60e..9c333e9 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -97,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
 altivec8.c: altivec.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
 
+vpermxor1.c: vpermxor.uc ../unroll.awk
+   $(AWK) ../unro

[v4 1/2] lib/raid6: Build proper files on corresponding arch

2017-04-12 Thread Matt Brown
Previously the raid6 test Makefile did not correctly build the files for
testing on PowerPC. This patch fixes the bug, so that all appropriate files
for PowerPC are built.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
Changlog
v2 - v4
- fixup whitespace
- change versioning to match other patch
---
 lib/raid6/test/Makefile | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 9c333e9..b64a267 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -44,10 +44,12 @@ else ifeq ($(HAS_NEON),yes)
 CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
 HAS_ALTIVEC := $(shell printf '\#include \nvector int a;\n' 
|\
- gcc -c -x c - >&/dev/null && \
- rm ./-.o && echo yes)
+ gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
 ifeq ($(HAS_ALTIVEC),yes)
-OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+CFLAGS += -I../../../arch/powerpc/include
+CFLAGS += -DCONFIG_ALTIVEC
+OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 endif
 endif
 ifeq ($(ARCH),tilegx)
-- 
2.9.3



[PATCH v3 2/2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome

2017-04-11 Thread Matt Brown
The raid6 Q syndrome check has been optimised using the vpermxor
instruction. This instruction was made available with POWER8, ISA version
2.07. It allows for both vperm and vxor instructions to be done in a single
instruction. This has been tested for correctness on a ppc64le vm with a
basic RAID6 setup containing 5 drives.

The performance benchmarks are from the raid6test in the /lib/raid6/test
directory. These results are from an IBM Firestone machine with ppc64le
architecture. The benchmark results show a 35% speed increase over the best
existing algorithm for powerpc (altivec). The raid6test has also been run
on a big-endian ppc64 vm to ensure it also works for big-endian
architectures.

Performance benchmarks:
raid6: altivecx4 gen() 18773 MB/s
raid6: altivecx8 gen() 19438 MB/s

raid6: vpermxor4 gen() 25112 MB/s
raid6: vpermxor8 gen() 26279 MB/s

Note: Fixed minor bug in altivec.uc regarding missing and mismatched ifdef
statements.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
Changelog
v2
- Change CONFIG_ALTIVEC to CPU_FTR_ALTIVEC_COMP
- Seperate bug fix into different patch
---
 include/linux/raid/pq.h |   4 ++
 lib/raid6/Makefile  |  27 -
 lib/raid6/algos.c   |   4 ++
 lib/raid6/altivec.uc|   3 ++
 lib/raid6/test/Makefile |  14 ++-
 lib/raid6/vpermxor.uc   | 104 
 6 files changed, 154 insertions(+), 2 deletions(-)
 create mode 100644 lib/raid6/vpermxor.uc

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bba..3df9aa6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
 extern const struct raid6_calls raid6_avx512x4;
 extern const struct raid6_calls raid6_tilegx8;
 extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
 
 struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011..7775aad 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -4,7 +4,8 @@ raid6_pq-y  += algos.o recov.o tables.o int1.o int2.o 
int4.o \
   int8.o int16.o int32.o
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o 
avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+   vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -88,6 +89,30 @@ $(obj)/altivec8.c:   UNROLL := 8
 $(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
 
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
 CFLAGS_neon1.o += $(NEON_FLAGS)
 targets += neon1.c
 $(obj)/neon1.c:   UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049..edd4f69 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
_altivec2,
_altivec4,
_altivec8,
+   _vpermxor1,
+   _vpermxor2,
+   _vpermxor4,
+   _vpermxor8,
 #endif
 #if defined(CONFIG_TILEGX)
_tilegx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
 
 #include 
 
+#ifdef CONFIG_ALTIVEC
+
 #include 
 #ifdef __KERNEL__
 # include 
 # include 
+#endif /* __KERNEL__ */
 
 /*
  * This is the C data type to use.  We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 2c7b60e..9c333e9 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -97,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
 altivec8.c: altivec.uc ../unro

[PATCH 1/2] lib/raid6: Build proper files on corresponding arch

2017-04-11 Thread Matt Brown
Previously the raid6 test Makefile did not correctly build the files for
testing on PowerPC. This patch fixes the bug, so that all appropriate files
for PowerPC are built.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 lib/raid6/test/Makefile | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 9c333e9..62b26d1 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -44,10 +44,12 @@ else ifeq ($(HAS_NEON),yes)
 CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
 HAS_ALTIVEC := $(shell printf '\#include \nvector int a;\n' 
|\
- gcc -c -x c - >&/dev/null && \
- rm ./-.o && echo yes)
+gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
 ifeq ($(HAS_ALTIVEC),yes)
-OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+   CFLAGS += -I../../../arch/powerpc/include
+   CFLAGS += -DCONFIG_ALTIVEC
+   OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+   vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 endif
 endif
 ifeq ($(ARCH),tilegx)
-- 
2.9.3



[v2] raid6/altivec: Add vpermxor implementation for raid6 Q syndrome

2017-04-05 Thread Matt Brown
The raid6 Q syndrome check has been optimised using the vpermxor
instruction.  This instruction was made available with POWER8, ISA version
2.07. It allows for both vperm and vxor instructions to be done in a single
instruction. This has been tested for correctness on a ppc64le vm with a
basic RAID6 setup containing 5 drives.

The performance benchmarks are from the raid6test in the /lib/raid6/test
directory. These results are from an IBM Firestone machine with ppc64le
architecture. The benchmark results show a 35% speed increase over the best
existing algorithm for powerpc (altivec). The raid6test has also been run
on a big-endian ppc64 vm to ensure it also works for big-endian
architectures.

Performance benchmarks:

raid6: altivecx4 gen() 18773 MB/s
raid6: altivecx8 gen() 19438 MB/s

raid6: vpermxor4 gen() 25112 MB/s
raid6: vpermxor8 gen() 26279 MB/s

Bugs fixed:
- A small bug in pq.h regarding a missing and mismatched
  ifdef statement
- Fixed test/Makefile to correctly build test on ppc

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
mpe I assume you are ok to take this patch, most of the other ppc raid patches 
have gone through you.

Changelog:
v2
- added reference to raid6 paper
- shortened asm lines
- removed redundant Makefile line
- fixed test/Makefile bug
---
 include/linux/raid/pq.h |   4 ++
 lib/raid6/Makefile  |  27 -
 lib/raid6/algos.c   |   4 ++
 lib/raid6/altivec.uc|   3 ++
 lib/raid6/test/Makefile |  26 +---
 lib/raid6/vpermxor.uc   | 104 
 6 files changed, 161 insertions(+), 7 deletions(-)
 create mode 100644 lib/raid6/vpermxor.uc

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bba..3df9aa6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
 extern const struct raid6_calls raid6_avx512x4;
 extern const struct raid6_calls raid6_tilegx8;
 extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
 
 struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011..7775aad 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -4,7 +4,8 @@ raid6_pq-y  += algos.o recov.o tables.o int1.o int2.o 
int4.o \
   int8.o int16.o int32.o
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o 
avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+   vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -88,6 +89,30 @@ $(obj)/altivec8.c:   UNROLL := 8
 $(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
 
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
 CFLAGS_neon1.o += $(NEON_FLAGS)
 targets += neon1.c
 $(obj)/neon1.c:   UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049..edd4f69 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
_altivec2,
_altivec4,
_altivec8,
+   _vpermxor1,
+   _vpermxor2,
+   _vpermxor4,
+   _vpermxor8,
 #endif
 #if defined(CONFIG_TILEGX)
_tilegx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
 
 #include 
 
+#ifdef CONFIG_ALTIVEC
+
 #include 
 #ifdef __KERNEL__
 # include 
 # include 
+#endif /* __KERNEL__ */
 
 /*
  * This is the C data type to use.  We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid

Re: [PATCH] raid6/altivec: adding vpermxor implementation for raid6 Q syndrome

2017-04-05 Thread Matt Brown
Hi Daniel,

Just to respond to your comments,

The inline asm line cannot be formatted over multiple lines due to the
unrolling process, but we can take out the volatile.

The pagefault_disable() also seems to be an old method of disabling
preemption, but no longer actually works to disable preemption.
Preempt_disable should be used instead now. So the use of
pagefault_disable() in crc32d-vpmsum_glue.c is actually a bug.

Thanks,
Matt

On Tue, Apr 4, 2017 at 11:51 AM, Daniel Axtens  wrote:
> Hi Matt,
>
>> Woops, totally missed that big chunk of makefile in the commit.
>> I had a chat with Oliver last week about the backwards compatibility stuff.
>> This will work for all versions >= 207S.
>>
>> From what I can tell there is almost no difference between
>> pagefault_disable() and preempt_disable(), but I'll follow that up
>> when I'm in the office next.
>
> Cool, good to know.
>
> See you when you're next in!
>
> Regards,
> Daniel
>
>>
>> Thanks for the review,
>>
>> Matt
>>
>> On Tue, Apr 4, 2017 at 7:44 AM, Daniel Axtens  wrote:
 In that function, the flow is:
  pagefault_disable();
  enable_kernel_altivec();
  
  pagefault_enable();

 There are a few things that it would be nice (but by no means essential)
 to find out:
  - what is the difference between pagefault and prempt enable/disable
  - is it required to disable altivec after the end of the function or
can we leave that enabled?
>>>
>>> Answering my own question here, dc4fbba11e46 ("powerpc: Create
>>> disable_kernel_{fp,altivec,vsx,spe}()") adds the disable_ function, and
>>> it's a no-op except under debug conditions. So it should stay.
>>>
>>> Regards,
>>> Daniel
>>>
>>>

> +
> +int raid6_have_altivec_vpermxor(void);
> +#if $# == 1
> +int raid6_have_altivec_vpermxor(void)
> +{
> +/* Check if CPU has both altivec and the vpermxor instruction*/
 Please add a space: s|ion*/|ion */|
> +# ifdef __KERNEL__
> +return (cpu_has_feature(CONFIG_ALTIVEC) &&
> +cpu_has_feature(CPU_FTR_ARCH_207S));
 I assume this is future-proof - an ISA 3.00 cpu will advertise 2.07S
 compat?

> +# else
> +return 1;
> +#endif
> +
> +}
> +#endif
> +
> +const struct raid6_calls raid6_vpermxor$# = {
> +raid6_vpermxor$#_gen_syndrome,
> +NULL,
> +raid6_have_altivec_vpermxor,
> +"vpermxor$#",
> +0
> +};
> +#endif
> --
> 2.9.3

 Apart from that this patch looks good and I expect I will be able to
 formally Review v2.

 Regards,
 Daniel


[PATCH] powerpc/powernv: utilising darn instruction for get_random_seed on p9

2017-03-30 Thread Matt Brown
Currently ppc_md.get_random_seed uses the powernv_get_random_long function.
A guest calling this function would have to go through the hypervisor. The
'darn' instruction, introduced in power 9, allows us to bypass this by
directly obtaining a value from the mmio region.

This patch adds an alternative function for ppc_md.get_random_seed on p9,
utilising the darn instruction.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 arch/powerpc/include/asm/ppc-opcode.h |  4 
 arch/powerpc/platforms/powernv/rng.c  | 23 ++-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index c4ced1d..d5f7082 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -134,6 +134,7 @@
 #define PPC_INST_COPY  0x7c00060c
 #define PPC_INST_COPY_FIRST0x7c20060c
 #define PPC_INST_CP_ABORT  0x7c00068c
+#define PPC_INST_DARN  0x7c0005e6
 #define PPC_INST_DCBA  0x7c0005ec
 #define PPC_INST_DCBA_MASK 0xfc0007fe
 #define PPC_INST_DCBAL 0x7c2005ec
@@ -325,6 +326,9 @@
 
 /* Deal with instructions that older assemblers aren't aware of */
 #definePPC_CP_ABORTstringify_in_c(.long PPC_INST_CP_ABORT)
+#define PPC_DARN(t, l) stringify_in_c(.long PPC_INST_DARN |  \
+   ___PPC_RT(t)   |  \
+   ___PPC_RA(l))
 #definePPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \
__PPC_RA(a) | __PPC_RB(b))
 #definePPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \
diff --git a/arch/powerpc/platforms/powernv/rng.c 
b/arch/powerpc/platforms/powernv/rng.c
index 5dcbdea..db34f32 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -8,6 +8,7 @@
  */
 
 #define pr_fmt(fmt)"powernv-rng: " fmt
+#define DARN_ERR 0xul
 
 #include 
 #include 
@@ -16,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -67,6 +69,22 @@ int powernv_get_random_real_mode(unsigned long *v)
return 1;
 }
 
+int powernv_get_random_darn(unsigned long *v)
+{
+   int n = 0;
+   unsigned long val;
+
+   do {
+   /* Using DARN with L=1 - conditioned random number */
+   asm (PPC_DARN(%0, 1)"\n" : "=r"(val) :);
+   n++;
+   } while (val == DARN_ERR && n < 10);
+
+   *v = val;
+
+   return (val == DARN_ERR) ? 0 : 1;
+}
+
 int powernv_get_random_long(unsigned long *v)
 {
struct powernv_rng *rng;
@@ -128,7 +146,10 @@ static __init int rng_create(struct device_node *dn)
 
pr_info_once("Registering arch random hook.\n");
 
-   ppc_md.get_random_seed = powernv_get_random_long;
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   ppc_md.get_random_seed = powernv_get_random_darn;
+   else
+   ppc_md.get_random_seed = powernv_get_random_long;
 
return 0;
 }
-- 
2.9.3



[PATCH] raid6/altivec: adding vpermxor implementation for raid6 Q syndrome

2017-03-29 Thread Matt Brown
The raid6 Q syndrome check has been optimised using the vpermxor
instruction.  This instruction was made available with POWER8, ISA version
2.07. It allows for both vperm and vxor instructions to be done in a single
instruction. This has been tested for correctness on a ppc64le vm with a
basic RAID6 setup containing 5 drives.

The performance benchmarks are from the raid6test in the /lib/raid6/test
directory. These results are from an IBM Firestone machine with ppc64le
architecture. The benchmark results show a 35% speed increase over the best
existing algorithm for powerpc (altivec). The raid6test has also been run
on a big-endian ppc64 vm to ensure it also works for big-endian
architectures.

Performance benchmarks:

raid6: altivecx4 gen() 18773 MB/s
raid6: altivecx8 gen() 19438 MB/s

raid6: vpermxor4 gen() 25112 MB/s
raid6: vpermxor8 gen() 26279 MB/s

Note: Also fixed a small bug in pq.h regarding a missing and mismatched
ifdef statement

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 include/linux/raid/pq.h |   4 ++
 lib/raid6/Makefile  |  27 -
 lib/raid6/algos.c   |   4 ++
 lib/raid6/altivec.uc|   3 ++
 lib/raid6/test/Makefile |  28 -
 lib/raid6/vpermxor.uc   | 102 
 6 files changed, 165 insertions(+), 3 deletions(-)
 create mode 100644 lib/raid6/vpermxor.uc

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 4d57bba..3df9aa6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
 extern const struct raid6_calls raid6_avx512x4;
 extern const struct raid6_calls raid6_tilegx8;
 extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
 
 struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3057011..7775aad 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -4,7 +4,8 @@ raid6_pq-y  += algos.o recov.o tables.o int1.o int2.o 
int4.o \
   int8.o int16.o int32.o
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o 
avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+   vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -88,6 +89,30 @@ $(obj)/altivec8.c:   UNROLL := 8
 $(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
 
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+   $(call if_changed,unroll)
+
 CFLAGS_neon1.o += $(NEON_FLAGS)
 targets += neon1.c
 $(obj)/neon1.c:   UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 7857049..edd4f69 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
_altivec2,
_altivec4,
_altivec8,
+   _vpermxor1,
+   _vpermxor2,
+   _vpermxor4,
+   _vpermxor8,
 #endif
 #if defined(CONFIG_TILEGX)
_tilegx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8..d20ed0d 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
 
 #include 
 
+#ifdef CONFIG_ALTIVEC
+
 #include 
 #ifdef __KERNEL__
 # include 
 # include 
+#endif /* __KERNEL__ */
 
 /*
  * This is the C data type to use.  We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 2c7b60e..29ebb39 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -47,13 +47,25 @@ else
  gcc -c -x c - >&/dev/null && \
  rm ./-.o && echo yes)
 ifeq ($(HAS_ALTIVEC),yes)
-OBJS += altivec1.o alt

[v8] powerpc/powernv: add 'firmware/exports' attributes to sysfs

2017-03-29 Thread Matt Brown
The HDAT data area is consumed by skiboot and turned into a device-tree. In
some cases we would like to look directly at the HDAT. This is not possible
through /dev/mem as it is reserved memory which is stopped by the /dev/mem
filter. There are also other memory areas which are reserved but could be
useful to view for debugging purposes.

This patch adds sysfs nodes to allow specified memory areas to be viewed.
sysfs nodes are created for each property in the device-tree under
/ibm,opal/firmware/exports/, and adds them to /sys/firmware/opal/exports/
with root read-only permissions.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
Changelog
v8
- fixed error handling
- added dynamic allocation of attributes
- using of_property_read_u64_array for reading attr vals
- reordered vars
- renaming vars
---
 arch/powerpc/platforms/powernv/opal.c | 81 +++
 1 file changed, 81 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 2822935..232f94e 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -604,6 +604,84 @@ static void opal_export_symmap(void)
pr_warn("Error %d creating OPAL symbols file\n", rc);
 }
 
+static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
+struct bin_attribute *bin_attr, char *buf,
+loff_t off, size_t count)
+{
+   return memory_read_from_buffer(buf, count, , bin_attr->private,
+  bin_attr->size);
+}
+
+/*
+ * opal_export_attrs: creates a sysfs node for each property listed in
+ * the device-tree under /ibm,opal/firmware/exports/
+ * All new sysfs nodes are created under /opal/exports/.
+ * This allows for reserved memory regions (e.g. HDAT) to be read.
+ * The new sysfs nodes are only readable by root.
+ */
+static void opal_export_attrs(void)
+{
+   struct bin_attribute *attr_tmp;
+   struct device_node *np;
+   struct property *prop;
+   struct kobject *kobj;
+   u64 vals[2];
+   int rc, n;
+
+   /* Create new 'exports' directory - /sys/firmware/opal/exports */
+   kobj = kobject_create_and_add("exports", opal_kobj);
+   if (!kobj) {
+   pr_warn("kobject_create_and_add exports failed\n");
+   return;
+   }
+
+   np = of_find_node_by_path("/ibm,opal/firmware/exports");
+   if (!np)
+   return;
+
+   n = 0;
+   for (prop = np->properties; prop != NULL; prop = prop->next)
+   n++;
+
+   if (n < 2)
+   goto cleanup;
+
+   for_each_property_of_node(np, prop) {
+   if (!strcmp(prop->name, "name") ||
+   !strcmp(prop->name, "phandle"))
+   continue;
+
+   if (of_property_read_u64_array(np, prop->name, [0], 2))
+   continue;
+
+   attr_tmp = kmalloc(sizeof(*attr_tmp), GFP_KERNEL);
+
+   if (attr_tmp == NULL) {
+   pr_warn("Failed kmalloc for bin_attribute attr_tmp");
+   continue;
+   }
+
+   attr_tmp->attr.name = kstrdup(prop->name, GFP_KERNEL);
+   attr_tmp->attr.mode = 0400;
+   attr_tmp->read = export_attr_read;
+   attr_tmp->private = __va(vals[0]);
+   attr_tmp->size = vals[1];
+
+   if (attr_tmp->attr.name == NULL) {
+   pr_warn("Failed kstrdup for bin_attribute attr.name");
+   kfree(attr_tmp);
+   continue;
+   }
+   rc = sysfs_create_bin_file(kobj, attr_tmp);
+   if (rc)
+   pr_warn("Error %d creating OPAL sysfs exports/%s 
file\n",
+ rc, prop->name);
+   }
+
+cleanup:
+   of_node_put(np);
+}
+
 static void __init opal_dump_region_init(void)
 {
void *addr;
@@ -742,6 +820,9 @@ static int __init opal_init(void)
opal_msglog_sysfs_init();
}
 
+   /* Export all properties */
+   opal_export_attrs();
+
/* Initialize platform devices: IPMI backend, PRD & flash interface */
opal_pdev_init("ibm,opal-ipmi");
opal_pdev_init("ibm,opal-flash");
-- 
2.9.3



[v7] powerpc/powernv: add hdat attribute to sysfs

2017-03-22 Thread Matt Brown
The HDAT data area is consumed by skiboot and turned into a device-tree.
In some cases we would like to look directly at the HDAT, so this patch
adds a sysfs node to allow it to be viewed.  This is not possible through
/dev/mem as it is reserved memory which is stopped by the /dev/mem filter.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
Changelog:

v7: 
- moved exported_attrs and attr_name into opal_export_attrs
---
 arch/powerpc/platforms/powernv/opal.c | 84 +++
 1 file changed, 84 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 2822935..b8f057f 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -604,6 +604,87 @@ static void opal_export_symmap(void)
pr_warn("Error %d creating OPAL symbols file\n", rc);
 }
 
+static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
+struct bin_attribute *bin_attr, char *buf,
+loff_t off, size_t count)
+{
+   return memory_read_from_buffer(buf, count, , bin_attr->private,
+  bin_attr->size);
+}
+
+/*
+ * opal_export_attrs: creates a sysfs node for each property listed in
+ * the device-tree under /ibm,opal/firmware/exports/
+ * All new sysfs nodes are created under /opal/exports/.
+ * This allows for reserved memory regions (e.g. HDAT) to be read.
+ * The new sysfs nodes are only readable by root.
+ */
+static void opal_export_attrs(void)
+{
+   /* /sys/firmware/opal/exports */
+   struct kobject *opal_export_kobj;
+   struct bin_attribute *exported_attrs;
+   char **attr_name;
+
+   struct bin_attribute *attr_tmp;
+   const __be64 *syms;
+   unsigned int size;
+   struct device_node *fw;
+   struct property *prop;
+   int rc;
+   int attr_count = 0;
+   int n = 0;
+
+   /* Create new 'exports' directory */
+   opal_export_kobj = kobject_create_and_add("exports", opal_kobj);
+   if (!opal_export_kobj) {
+   pr_warn("kobject_create_and_add opal_exports failed\n");
+   return;
+   }
+
+   fw = of_find_node_by_path("/ibm,opal/firmware/exports");
+   if (!fw)
+   return;
+
+   for (prop = fw->properties; prop != NULL; prop = prop->next)
+   attr_count++;
+
+   if (attr_count > 2) {
+   exported_attrs = kzalloc(sizeof(exported_attrs)*(attr_count-2),
+   GFP_KERNEL);
+   attr_name = kzalloc(sizeof(char *)*(attr_count-2), GFP_KERNEL);
+   }
+
+   for_each_property_of_node(fw, prop) {
+
+   attr_name[n] = kstrdup(prop->name, GFP_KERNEL);
+   syms = of_get_property(fw, attr_name[n], );
+
+   if (!strcmp(attr_name[n], "name") ||
+   !strcmp(attr_name[n], "phandle"))
+   continue;
+
+   if (!syms || size != 2 * sizeof(__be64))
+   continue;
+
+   attr_tmp = _attrs[n];
+   attr_tmp->attr.name = attr_name[n];
+   attr_tmp->attr.mode = 0400;
+   attr_tmp->read = export_attr_read;
+   attr_tmp->private = __va(be64_to_cpu(syms[0]));
+   attr_tmp->size = be64_to_cpu(syms[1]);
+
+   rc = sysfs_create_bin_file(opal_export_kobj, attr_tmp);
+   if (rc)
+   pr_warn("Error %d creating OPAL sysfs exports/%s 
file\n",
+   rc, attr_name[n]);
+   n++;
+   }
+
+   of_node_put(fw);
+
+}
+
 static void __init opal_dump_region_init(void)
 {
void *addr;
@@ -742,6 +823,9 @@ static int __init opal_init(void)
opal_msglog_sysfs_init();
}
 
+   /* Export all properties */
+   opal_export_attrs();
+
/* Initialize platform devices: IPMI backend, PRD & flash interface */
opal_pdev_init("ibm,opal-ipmi");
opal_pdev_init("ibm,opal-flash");
-- 
2.9.3



[v6] powerpc/powernv: add hdat attribute to sysfs

2017-03-21 Thread Matt Brown
The HDAT data area is consumed by skiboot and turned into a device-tree.
In some cases we would like to look directly at the HDAT, so this patch
adds a sysfs node to allow it to be viewed.  This is not possible through
/dev/mem as it is reserved memory which is stopped by the /dev/mem filter.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
Changelog

v6
- attribute names are stored locally, removing potential null pointer 
errors
- added of_node_put for the corresponding of_find_node
- folded exports node creation into opal_export_attr()
- fixed kzalloc flags to GFP_KERNEL
- fixed struct array indexing
- fixed error message
---
 arch/powerpc/platforms/powernv/opal.c | 84 +++
 1 file changed, 84 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 2822935..953537e 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -604,6 +604,87 @@ static void opal_export_symmap(void)
pr_warn("Error %d creating OPAL symbols file\n", rc);
 }
 
+static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
+struct bin_attribute *bin_attr, char *buf,
+loff_t off, size_t count)
+{
+   return memory_read_from_buffer(buf, count, , bin_attr->private,
+  bin_attr->size);
+}
+
+static struct bin_attribute *exported_attrs;
+static char **attr_name;
+/*
+ * opal_export_attrs: creates a sysfs node for each property listed in
+ * the device-tree under /ibm,opal/firmware/exports/
+ * All new sysfs nodes are created under /opal/exports/.
+ * This allows for reserved memory regions (e.g. HDAT) to be read.
+ * The new sysfs nodes are only readable by root.
+ */
+static void opal_export_attrs(void)
+{
+   /* /sys/firmware/opal/exports */
+   struct kobject *opal_export_kobj;
+
+   struct bin_attribute *attr_tmp;
+   const __be64 *syms;
+   unsigned int size;
+   struct device_node *fw;
+   struct property *prop;
+   int rc;
+   int attr_count = 0;
+   int n = 0;
+   
+   /* Create new 'exports' directory */
+   opal_export_kobj = kobject_create_and_add("exports", opal_kobj);
+   if (!opal_export_kobj) {
+   pr_warn("kobject_create_and_add opal_exports failed\n");
+   return;
+   }
+
+   fw = of_find_node_by_path("/ibm,opal/firmware/exports");
+   if (!fw)
+   return;
+
+   for (prop = fw->properties; prop != NULL; prop = prop->next)
+   attr_count++;
+
+   if (attr_count > 2) {
+   exported_attrs = kzalloc(sizeof(exported_attrs)*(attr_count-2),
+   GFP_KERNEL);
+   attr_name = kzalloc(sizeof(char *)*(attr_count-2), GFP_KERNEL);
+   }
+
+   for_each_property_of_node(fw, prop) {
+   
+   attr_name[n] = kstrdup(prop->name, GFP_KERNEL);
+   syms = of_get_property(fw, attr_name[n], );
+
+   if (!strcmp(attr_name[n], "name") ||
+   !strcmp(attr_name[n], "phandle"))
+   continue;
+
+   if (!syms || size != 2 * sizeof(__be64))
+   continue;
+
+   attr_tmp = _attrs[n];
+   attr_tmp->attr.name = attr_name[n];
+   attr_tmp->attr.mode = 0400;
+   attr_tmp->read = export_attr_read;
+   attr_tmp->private = __va(be64_to_cpu(syms[0]));
+   attr_tmp->size = be64_to_cpu(syms[1]);
+
+   rc = sysfs_create_bin_file(opal_export_kobj, attr_tmp);
+   if (rc)
+   pr_warn("Error %d creating OPAL sysfs exports/%s 
file\n",
+   rc, attr_name[n]);
+   n++;
+   }
+
+   of_node_put(fw);
+
+}
+
 static void __init opal_dump_region_init(void)
 {
void *addr;
@@ -742,6 +823,9 @@ static int __init opal_init(void)
opal_msglog_sysfs_init();
}
 
+   /* Export all properties */
+   opal_export_attrs();
+
/* Initialize platform devices: IPMI backend, PRD & flash interface */
opal_pdev_init("ibm,opal-ipmi");
opal_pdev_init("ibm,opal-flash");
-- 
2.9.3



[Patch v5] powerpc/powernv: add hdat attribute to sysfs

2017-03-01 Thread Matt Brown
The HDAT data area is consumed by skiboot and turned into a device-tree.
In some cases we would like to look directly at the HDAT, so this patch
adds a sysfs node to allow it to be viewed.  This is not possible through
/dev/mem as it is reserved memory which is stopped by the /dev/mem filter.
This patch also adds sysfs nodes for all properties in the device-tree 
under /ibm,opal/firmware/exports.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
Changes between v4 and v5:
- all properties under /ibm,opal/firmware/exports in the device-tree
  are now added as new sysfs nodes
- the new sysfs nodes are now placed under /opal/exports
- added a generic read function for all exported attributes
---
 arch/powerpc/platforms/powernv/opal.c | 84 +++
 1 file changed, 84 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 2822935..fbb8264 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -36,6 +36,9 @@
 /* /sys/firmware/opal */
 struct kobject *opal_kobj;
 
+/* /sys/firmware/opal/exports */
+struct kobject *opal_export_kobj;
+
 struct opal {
u64 base;
u64 entry;
@@ -604,6 +607,82 @@ static void opal_export_symmap(void)
pr_warn("Error %d creating OPAL symbols file\n", rc);
 }
 
+
+static int opal_exports_sysfs_init(void)
+{
+   opal_export_kobj = kobject_create_and_add("exports", opal_kobj);
+   if (!opal_export_kobj) {
+   pr_warn("kobject_create_and_add opal_exports failed\n");
+   return -ENOMEM;
+   }
+
+   return 0;
+}
+
+static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
+struct bin_attribute *bin_attr, char *buf,
+loff_t off, size_t count)
+{
+   return memory_read_from_buffer(buf, count, , bin_attr->private,
+  bin_attr->size);
+}
+
+static struct bin_attribute *exported_attrs;
+/*
+ * opal_export_attrs: creates a sysfs node for each property listed in
+ * the device-tree under /ibm,opal/firmware/exports/
+ * All new sysfs nodes are created under /opal/exports/.
+ * This allows for reserved memory regions (e.g. HDAT) to be read.
+ * The new sysfs nodes are only readable by root.
+ */
+static void opal_export_attrs(void)
+{
+   const __be64 *syms;
+   unsigned int size;
+   struct device_node *fw;
+   struct property *prop;
+   int rc;
+   int attr_count = 0;
+   int n = 0;
+
+   fw = of_find_node_by_path("/ibm,opal/firmware/exports");
+   if (!fw)
+   return;
+
+   for (prop = fw->properties; prop != NULL; prop = prop->next)
+   attr_count++;
+
+   if (attr_count > 2)
+   exported_attrs = kmalloc(sizeof(exported_attrs)*(attr_count-2),
+   __GFP_IO | __GFP_FS);
+
+
+   for_each_property_of_node(fw, prop) {
+
+   syms = of_get_property(fw, prop->name, );
+
+   if (!strcmp(prop->name, "name") ||
+   !strcmp(prop->name, "phandle"))
+   continue;
+
+   if (!syms || size != 2 * sizeof(__be64))
+   continue;
+
+   (exported_attrs+n)->attr.name = prop->name;
+   (exported_attrs+n)->attr.mode = 0400;
+   (exported_attrs+n)->read = export_attr_read;
+   (exported_attrs+n)->private = __va(be64_to_cpu(syms[0]));
+   (exported_attrs+n)->size = be64_to_cpu(syms[1]);
+
+   rc = sysfs_create_bin_file(opal_export_kobj, exported_attrs+n);
+   if (rc)
+   pr_warn("Error %d creating OPAL %s file\n", rc,
+   prop->name);
+   n++;
+   }
+
+}
+
 static void __init opal_dump_region_init(void)
 {
void *addr;
@@ -742,6 +821,11 @@ static int __init opal_init(void)
opal_msglog_sysfs_init();
}
 
+   rc = opal_exports_sysfs_init();
+   if (rc == 0) {
+   /* Export all properties */
+   opal_export_attrs();
+   }
/* Initialize platform devices: IPMI backend, PRD & flash interface */
opal_pdev_init("ibm,opal-ipmi");
opal_pdev_init("ibm,opal-flash");
-- 
2.9.3



[Patch v4] powerpc/powernv: add hdat attribute to sysfs

2017-02-27 Thread Matt Brown
The HDAT data area is consumed by skiboot and turned into a device-tree.
In some cases we would like to look directly at the HDAT, so this patch
adds a sysfs node to allow it to be viewed.  This is not possible through
/dev/mem as it is reserved memory which is stopped by the /dev/mem filter.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
Changes between v3 and v4:
- changed sysfs attribute permissions from 0444 to 0400
- fixed makefile to be on same line
- fixed authorship/copyright info
- re-ordered includes
- changed hdat_info struct to a static struct

---
 arch/powerpc/include/asm/opal.h|  1 +
 arch/powerpc/platforms/powernv/Makefile|  2 +-
 arch/powerpc/platforms/powernv/opal-hdat.c | 60 ++
 arch/powerpc/platforms/powernv/opal.c  |  2 +
 4 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/powernv/opal-hdat.c

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 5c7db0f..b26944e 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -277,6 +277,7 @@ extern int opal_async_comp_init(void);
 extern int opal_sensor_init(void);
 extern int opal_hmi_handler_init(void);
 extern int opal_event_init(void);
+extern void opal_hdat_sysfs_init(void);
 
 extern int opal_machine_check(struct pt_regs *regs);
 extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb..3826b70 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,7 +2,7 @@ obj-y   += setup.o opal-wrappers.o opal.o 
opal-async.o idle.o
 obj-y  += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y  += rng.o opal-elog.o opal-dump.o opal-sysparam.o 
opal-sensor.o
 obj-y  += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
-obj-y  += opal-kmsg.o
+obj-y  += opal-kmsg.o opal-hdat.o
 
 obj-$(CONFIG_SMP)  += smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)  += pci.o pci-ioda.o npu-dma.o
diff --git a/arch/powerpc/platforms/powernv/opal-hdat.c 
b/arch/powerpc/platforms/powernv/opal-hdat.c
new file mode 100644
index 000..19647fd
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-hdat.c
@@ -0,0 +1,60 @@
+/*
+ * PowerNV OPAL HDAT interface
+ *
+ * Copyright 2017, Matt Brown, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+static struct {
+   char *base;
+   u64 size;
+} hdat_info;
+
+/* Read function for HDAT attribute in sysfs */
+static ssize_t hdat_read(struct file *file, struct kobject *kobj,
+struct bin_attribute *bin_attr, char *to,
+loff_t pos, size_t count)
+{
+   if (!hdat_info.base)
+   return -ENODEV;
+
+   return memory_read_from_buffer(to, count, , hdat_info.base,
+   hdat_info.size);
+}
+
+/* HDAT attribute for sysfs */
+static struct bin_attribute hdat_attr = {
+   .attr = {.name = "hdat", .mode = 0400},
+   .read = hdat_read
+};
+
+void __init opal_hdat_sysfs_init(void)
+{
+   u64 hdat_addr[2];
+
+   /* Check for the hdat-map prop in device-tree */
+   if (of_property_read_u64_array(opal_node, "hdat-map", hdat_addr, 2)) {
+   pr_debug("OPAL: Property hdat-map not found.\n");
+   return;
+   }
+
+   /* Print out hdat-map values. [0]: base, [1]: size */
+   pr_debug("OPAL: HDAT Base address: %#llx\n", hdat_addr[0]);
+   pr_debug("OPAL: HDAT Size: %#llx\n", hdat_addr[1]);
+
+   hdat_info.base = phys_to_virt(hdat_addr[0]);
+   hdat_info.size = hdat_addr[1];
+
+   if (sysfs_create_bin_file(opal_kobj, _attr))
+   pr_debug("OPAL: sysfs file creation for HDAT failed");
+
+}
diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 2822935..cae3745 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -740,6 +740,8 @@ static int __init opal_init(void)
opal_sys_param_init();
/* Setup message log sysfs interface. */
opal_msglog_sysfs_init();
+   /* Create hdat object under sys/firmware/opal */
+   opal_hdat_sysfs_init();
}
 
/* Initialize platform devices: IPMI backend, PRD & flash interface */
-- 
2.9.3



[PATCH v3] powerpc/powernv: add hdat attribute to sysfs

2017-02-23 Thread Matt Brown
The HDAT data area is consumed by skiboot and turned into a device-tree.
In some cases we would like to look directly at the HDAT, so this patch
adds a sysfs node to allow it to be viewed.  This is not possible through
/dev/mem as it is reserved memory which is stopped by the /dev/mem filter.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---

Changes between v2 to v3:
- fixed header comments
- simplified if statement

---
 arch/powerpc/include/asm/opal.h|  1 +
 arch/powerpc/platforms/powernv/Makefile|  1 +
 arch/powerpc/platforms/powernv/opal-hdat.c | 65 ++
 arch/powerpc/platforms/powernv/opal.c  |  2 +
 4 files changed, 69 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/opal-hdat.c

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 5c7db0f..b26944e 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -277,6 +277,7 @@ extern int opal_async_comp_init(void);
 extern int opal_sensor_init(void);
 extern int opal_hmi_handler_init(void);
 extern int opal_event_init(void);
+extern void opal_hdat_sysfs_init(void);
 
 extern int opal_machine_check(struct pt_regs *regs);
 extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb..9a0c9d6 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -3,6 +3,7 @@ obj-y   += opal-rtc.o opal-nvram.o opal-lpc.o 
opal-flash.o
 obj-y  += rng.o opal-elog.o opal-dump.o opal-sysparam.o 
opal-sensor.o
 obj-y  += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
 obj-y  += opal-kmsg.o
+obj-y  += opal-hdat.o
 
 obj-$(CONFIG_SMP)  += smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)  += pci.o pci-ioda.o npu-dma.o
diff --git a/arch/powerpc/platforms/powernv/opal-hdat.c 
b/arch/powerpc/platforms/powernv/opal-hdat.c
new file mode 100644
index 000..3315dd3
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-hdat.c
@@ -0,0 +1,65 @@
+/*
+ * PowerNV OPAL HDAT interface
+ *
+ * Author: Matt Brown <matthew.brown@gmail.com>
+ *
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+struct hdat_info {
+   char *base;
+   u64 size;
+};
+
+static struct hdat_info hdat_inf;
+
+/* Read function for HDAT attribute in sysfs */
+static ssize_t hdat_read(struct file *file, struct kobject *kobj,
+struct bin_attribute *bin_attr, char *to,
+loff_t pos, size_t count)
+{
+   if (!hdat_inf.base)
+   return -ENODEV;
+
+   return memory_read_from_buffer(to, count, , hdat_inf.base,
+   hdat_inf.size);
+}
+
+
+/* HDAT attribute for sysfs */
+static struct bin_attribute hdat_attr = {
+   .attr = {.name = "hdat", .mode = 0444},
+   .read = hdat_read
+};
+
+void __init opal_hdat_sysfs_init(void)
+{
+   u64 hdat_addr[2];
+
+   /* Check for the hdat-map prop in device-tree */
+   if (of_property_read_u64_array(opal_node, "hdat-map", hdat_addr, 2)) {
+   pr_debug("OPAL: Property hdat-map not found.\n");
+   return;
+   }
+
+   /* Print out hdat-map values. [0]: base, [1]: size */
+   pr_debug("OPAL: HDAT Base address: %#llx\n", hdat_addr[0]);
+   pr_debug("OPAL: HDAT Size: %#llx\n", hdat_addr[1]);
+
+   hdat_inf.base = phys_to_virt(hdat_addr[0]);
+   hdat_inf.size = hdat_addr[1];
+
+   if (sysfs_create_bin_file(opal_kobj, _attr))
+   pr_debug("OPAL: sysfs file creation for HDAT failed");
+
+}
diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 2822935..cae3745 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -740,6 +740,8 @@ static int __init opal_init(void)
opal_sys_param_init();
/* Setup message log sysfs interface. */
opal_msglog_sysfs_init();
+   /* Create hdat object under sys/firmware/opal */
+   opal_hdat_sysfs_init();
}
 
/* Initialize platform devices: IPMI backend, PRD & flash interface */
-- 
2.9.3



[PATCH v2] powerpc/powernv: add hdat attribute to sysfs

2017-02-23 Thread Matt Brown
The HDAT data area is consumed by skiboot and turned into a device-tree.
In some cases we would like to look directly at the HDAT, so this patch
adds a sysfs node to allow it to be viewed.  This is not possible through
/dev/mem as it is reserved memory which is stopped by the /dev/mem filter.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---

Between v1 and v2 of the patch the following changes were made.
Changelog:
- moved hdat code into opal-hdat.c
- added opal-hdat to the makefile
- changed struct and variable names from camelcase
---
 arch/powerpc/include/asm/opal.h|  1 +
 arch/powerpc/platforms/powernv/Makefile|  1 +
 arch/powerpc/platforms/powernv/opal-hdat.c | 63 ++
 arch/powerpc/platforms/powernv/opal.c  |  2 +
 4 files changed, 67 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/opal-hdat.c

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 5c7db0f..b26944e 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -277,6 +277,7 @@ extern int opal_async_comp_init(void);
 extern int opal_sensor_init(void);
 extern int opal_hmi_handler_init(void);
 extern int opal_event_init(void);
+extern void opal_hdat_sysfs_init(void);
 
 extern int opal_machine_check(struct pt_regs *regs);
 extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb..9a0c9d6 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -3,6 +3,7 @@ obj-y   += opal-rtc.o opal-nvram.o opal-lpc.o 
opal-flash.o
 obj-y  += rng.o opal-elog.o opal-dump.o opal-sysparam.o 
opal-sensor.o
 obj-y  += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
 obj-y  += opal-kmsg.o
+obj-y  += opal-hdat.o
 
 obj-$(CONFIG_SMP)  += smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)  += pci.o pci-ioda.o npu-dma.o
diff --git a/arch/powerpc/platforms/powernv/opal-hdat.c 
b/arch/powerpc/platforms/powernv/opal-hdat.c
new file mode 100644
index 000..bd305e0
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-hdat.c
@@ -0,0 +1,63 @@
+/*
+ * PowerNV OPAL in-memory console interface
+ *
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+struct hdat_info {
+   char *base;
+   u64 size;
+};
+
+static struct hdat_info hdat_inf;
+
+/* Read function for HDAT attribute in sysfs */
+static ssize_t hdat_read(struct file *file, struct kobject *kobj,
+struct bin_attribute *bin_attr, char *to,
+loff_t pos, size_t count)
+{
+   if (!hdat_inf.base)
+   return -ENODEV;
+
+   return memory_read_from_buffer(to, count, , hdat_inf.base,
+   hdat_inf.size);
+}
+
+
+/* HDAT attribute for sysfs */
+static struct bin_attribute hdat_attr = {
+   .attr = {.name = "hdat", .mode = 0444},
+   .read = hdat_read
+};
+
+void __init opal_hdat_sysfs_init(void)
+{
+   u64 hdat_addr[2];
+
+   /* Check for the hdat-map prop in device-tree */
+   if (of_property_read_u64_array(opal_node, "hdat-map", hdat_addr, 2)) {
+   pr_debug("OPAL: Property hdat-map not found.\n");
+   return;
+   }
+
+   /* Print out hdat-map values. [0]: base, [1]: size */
+   pr_debug("OPAL: HDAT Base address: %#llx\n", hdat_addr[0]);
+   pr_debug("OPAL: HDAT Size: %#llx\n", hdat_addr[1]);
+
+   hdat_inf.base = phys_to_virt(hdat_addr[0]);
+   hdat_inf.size = hdat_addr[1];
+
+   if (sysfs_create_bin_file(opal_kobj, _attr) != 0)
+   pr_debug("OPAL: sysfs file creation for HDAT failed");
+
+}
diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 2822935..cae3745 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -740,6 +740,8 @@ static int __init opal_init(void)
opal_sys_param_init();
/* Setup message log sysfs interface. */
opal_msglog_sysfs_init();
+   /* Create hdat object under sys/firmware/opal */
+   opal_hdat_sysfs_init();
}
 
/* Initialize platform devices: IPMI backend, PRD & flash interface */
-- 
2.9.3



[PATCH] powerpc/powernv: add hdat attribute to sysfs

2017-02-22 Thread Matt Brown
From: Matt Brown <brownmatt1...@gmail.com>

The HDAT data area is consumed by skiboot and turned into a device-tree.
In some cases we would like to look directly at the HDAT, so this patch
adds a sysfs node to allow it to be viewed.  This is not possible through
/dev/mem as it is reserved memory which is stopped by the /dev/mem filter.

Signed-off-by: Matt Brown <matthew.brown@gmail.com>
---
 arch/powerpc/include/asm/opal.h  |  1 +
 arch/powerpc/platforms/powernv/opal-msglog.c | 49 
 arch/powerpc/platforms/powernv/opal.c|  2 ++
 3 files changed, 52 insertions(+)

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 5c7db0f..b26944e 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -277,6 +277,7 @@ extern int opal_async_comp_init(void);
 extern int opal_sensor_init(void);
 extern int opal_hmi_handler_init(void);
 extern int opal_event_init(void);
+extern void opal_hdat_sysfs_init(void);
 
 extern int opal_machine_check(struct pt_regs *regs);
 extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c 
b/arch/powerpc/platforms/powernv/opal-msglog.c
index 39d6ff9..a637055 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -31,7 +31,13 @@ struct memcons {
__be32 in_cons;
 };
 
+struct hdatInfo {
+   char *base;
+   u64 size;
+};
+
 static struct memcons *opal_memcons = NULL;
+static struct hdatInfo hdat_inf;
 
 ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
 {
@@ -136,3 +142,46 @@ void __init opal_msglog_sysfs_init(void)
if (sysfs_create_bin_file(opal_kobj, _msglog_attr) != 0)
pr_warn("OPAL: sysfs file creation failed\n");
 }
+
+
+
+/* Read function for HDAT attribute in sysfs */
+static ssize_t hdat_read(struct file *file, struct kobject *kobj,
+struct bin_attribute *bin_attr, char *to,
+loff_t pos, size_t count)
+{
+   if (!hdat_inf.base)
+   return -ENODEV;
+
+   return memory_read_from_buffer(to, count, , hdat_inf.base,
+   hdat_inf.size);
+}
+
+
+/* HDAT attribute for sysfs */
+static struct bin_attribute hdat_attr = {
+   .attr = {.name = "hdat", .mode = 0444},
+   .read = hdat_read
+};
+
+void __init opal_hdat_sysfs_init(void)
+{
+   u64 hdatAddr[2];
+
+   /* Check for the hdat-map prop in device-tree */
+   if (of_property_read_u64_array(opal_node, "hdat-map", hdatAddr, 2)) {
+   pr_debug("OPAL: Property hdat-map not found.\n");
+   return;
+   }
+
+   /* Print out hdat-map values. [0]: base, [1]: size */
+   pr_debug("HDAT Base address: %#llx\n", hdatAddr[0]);
+   pr_debug("HDAT Size: %#llx\n", hdatAddr[1]);
+
+   hdat_inf.base = phys_to_virt(hdatAddr[0]);
+   hdat_inf.size = hdatAddr[1];
+
+   if (sysfs_create_bin_file(opal_kobj, _attr) != 0)
+   pr_debug("OPAL: sysfs file creation for HDAT failed");
+
+}
diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 2822935..cae3745 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -740,6 +740,8 @@ static int __init opal_init(void)
opal_sys_param_init();
/* Setup message log sysfs interface. */
opal_msglog_sysfs_init();
+   /* Create hdat object under sys/firmware/opal */
+   opal_hdat_sysfs_init();
}
 
/* Initialize platform devices: IPMI backend, PRD & flash interface */
-- 
2.9.3