From: Zhaoxiu Zeng <zhaoxiu.z...@gmail.com>

Use alternatives, lifted from arch_hweight

Signed-off-by: Zhaoxiu Zeng <zhaoxiu.z...@gmail.com>
---
 arch/x86/include/asm/arch_hweight.h |   5 ++
 arch/x86/include/asm/arch_parity.h  | 102 ++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/bitops.h       |   4 +-
 arch/x86/lib/Makefile               |   8 +++
 arch/x86/lib/parity.c               |  32 ++++++++++++
 5 files changed, 150 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/include/asm/arch_parity.h
 create mode 100644 arch/x86/lib/parity.c

diff --git a/arch/x86/include/asm/arch_hweight.h 
b/arch/x86/include/asm/arch_hweight.h
index 02e799f..c79d50d 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -63,4 +63,9 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
 }
 #endif /* CONFIG_X86_32 */
 
+#undef POPCNT32
+#undef POPCNT64
+#undef REG_IN
+#undef REG_OUT
+
 #endif
diff --git a/arch/x86/include/asm/arch_parity.h 
b/arch/x86/include/asm/arch_parity.h
new file mode 100644
index 0000000..09463fd
--- /dev/null
+++ b/arch/x86/include/asm/arch_parity.h
@@ -0,0 +1,100 @@
+#ifndef _ASM_X86_PARITY_H
+#define _ASM_X86_PARITY_H
+
+#include <asm/cpufeatures.h>
+
+#ifdef CONFIG_64BIT
+/* popcnt %edi, %eax -- redundant REX prefix for alignment */
+#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
+/* popcnt %rdi, %rax */
+#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
+#define REG_IN "D"
+#define REG_OUT "a"
+#else
+/* popcnt %eax, %eax */
+#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0"
+#define REG_IN "a"
+#define REG_OUT "a"
+#endif
+
+/*
+ * __sw_parityXX are called from within the alternatives below
+ * and callee-clobbered registers need to be taken care of. See
+ * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
+ * compiler switches.
+ */
+unsigned int __sw_parity32(unsigned int w);
+#ifndef CONFIG_X86_32
+unsigned int __sw_parity64(__u64 w);
+#endif
+
+static inline unsigned int __arch_parity4(unsigned int w)
+{
+       unsigned int res = 0;
+
+       asm("test $0xf, %1; setpo %b0"
+               : "+q" (res)
+               : "r" (w)
+               : "cc");
+
+       return res;
+}
+
+static inline unsigned int __arch_parity8(unsigned int w)
+{
+       unsigned int res = 0;
+
+       asm("test %1, %1; setpo %b0"
+               : "+q" (res)
+               : "r" (w)
+               : "cc");
+
+       return res;
+}
+
+static inline unsigned int __arch_parity16(unsigned int w)
+{
+       unsigned int res = 0;
+
+       asm("xor %h1, %b1; setpo %b0"
+               : "+q" (res), "+q" (w)
+               : : "cc");
+
+       return res;
+}
+
+static __always_inline unsigned int __arch_parity32(unsigned int w)
+{
+       unsigned int res;
+
+       asm(ALTERNATIVE("call __sw_parity32", POPCNT32 "; and $1, %0", 
X86_FEATURE_POPCNT)
+               : "="REG_OUT (res)
+               : REG_IN (w)
+               : "cc");
+
+       return res;
+}
+
+#ifdef CONFIG_X86_32
+static inline unsigned long __arch_parity64(__u64 w)
+{
+       return __arch_parity32((u32)w ^ (u32)(w >> 32));
+}
+#else
+static __always_inline unsigned long __arch_parity64(__u64 w)
+{
+       unsigned long res;
+
+       asm(ALTERNATIVE("call __sw_parity64", POPCNT64 "; and $1, %0", 
X86_FEATURE_POPCNT)
+               : "="REG_OUT (res)
+               : REG_IN (w)
+               : "cc");
+
+       return res;
+}
+#endif /* CONFIG_X86_32 */
+
+#undef POPCNT32
+#undef POPCNT64
+#undef REG_IN
+#undef REG_OUT
+
+#endif
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 7766d1c..f5b0122 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -498,9 +498,11 @@ static __always_inline int fls64(__u64 x)
 #include <asm-generic/bitops/sched.h>
 
 #include <asm/arch_hweight.h>
-
 #include <asm-generic/bitops/const_hweight.h>
 
+#include <asm/arch_parity.h>
+#include <asm-generic/bitops/const_parity.h>
+
 #include <asm-generic/bitops/le.h>
 
 #include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 72a5767..5716295 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -4,6 +4,9 @@
 
 # Produces uninteresting flaky coverage.
 KCOV_INSTRUMENT_delay.o        := n
+# Kernel does not boot if we instrument this file as it uses custom calling
+# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
+KCOV_INSTRUMENT_parity.o := n
 
 inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
 inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
@@ -45,3 +48,8 @@ else
         lib-y += copy_user_64.o
        lib-y += cmpxchg16b_emu.o
 endif
+
+GCOV_PROFILE_parity.o := n
+CFLAGS_parity.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
+obj-y  += parity.o
+
diff --git a/arch/x86/lib/parity.c b/arch/x86/lib/parity.c
new file mode 100644
index 0000000..762117b
--- /dev/null
+++ b/arch/x86/lib/parity.c
@@ -0,0 +1,32 @@
+#include <linux/export.h>
+#include <linux/bitops.h>
+
+unsigned int __sw_parity32(unsigned int w)
+{
+       unsigned int res;
+       w ^= w >> 16;
+       asm("xor        %%ah, %%al              \n"
+               "mov    $0, %%eax               \n"
+               "setpo  %%al                    \n"
+               : "=a" (res)
+               : "a" (w)
+               : "cc");
+       return res;
+}
+EXPORT_SYMBOL(__sw_parity32);
+
+#ifndef CONFIG_X86_32
+unsigned int __sw_parity64(__u64 w)
+{
+       unsigned int res = (unsigned int)w ^ (unsigned int)(w >> 32);
+       res ^= res >> 16;
+       asm("xor        %%ah, %%al              \n"
+               "mov    $0, %%eax               \n"
+               "setpo  %%al                    \n"
+               : "=a" (res)
+               : "a" (res)
+               : "cc");
+       return res;
+}
+EXPORT_SYMBOL(__sw_parity64);
+#endif
-- 
2.5.0


Reply via email to