Use MOVBE if it is available.

Internally MOVBE probably translates to MOV+BSWAP anyway, but who knows.

Do it because it is easy to do...

Signed-off-by: Alexey Dobriyan <adobri...@gmail.com>
---
 arch/x86/crypto/des3_ede-asm_64.S | 28 ++++++++++++++++++++++++++++
 arch/x86/kernel/verify_cpu.S      |  7 +++++++
 scripts/kconfig/cpuid.c           |  5 +++++
 scripts/march-native.sh           |  1 +
 4 files changed, 41 insertions(+)

diff --git a/arch/x86/crypto/des3_ede-asm_64.S 
b/arch/x86/crypto/des3_ede-asm_64.S
index 7fca43099a5f..2fd310e98b0b 100644
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -150,6 +150,15 @@
 
 #define dummy2(a, b) /*_*/
 
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+#define read_block(io, left, right) \
+       movbe    (io), left##d; \
+       movbe   4(io), right##d;
+
+#define write_block(io, left, right) \
+       movbe   left##d,   (io); \
+       movbe   right##d, 4(io);
+#else
 #define read_block(io, left, right) \
        movl    (io), left##d; \
        movl   4(io), right##d; \
@@ -161,6 +170,7 @@
        bswapl right##d; \
        movl   left##d,   (io); \
        movl   right##d, 4(io);
+#endif
 
 ENTRY(des3_ede_x86_64_crypt_blk)
        /* input:
@@ -434,6 +444,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
        pushq %rsi /* dst */
 
        /* load input */
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+       movbe 0 * 4(%rdx), RL0d;
+       movbe 1 * 4(%rdx), RR0d;
+       movbe 2 * 4(%rdx), RL1d;
+       movbe 3 * 4(%rdx), RR1d;
+       movbe 4 * 4(%rdx), RL2d;
+       movbe 5 * 4(%rdx), RR2d;
+#else
        movl 0 * 4(%rdx), RL0d;
        movl 1 * 4(%rdx), RR0d;
        movl 2 * 4(%rdx), RL1d;
@@ -447,6 +465,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
        bswapl RR1d;
        bswapl RL2d;
        bswapl RR2d;
+#endif
 
        initial_permutation3(RL, RR);
 
@@ -507,6 +526,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
 
        final_permutation3(RR, RL);
 
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+       movbe RR0d, 0 * 4(%rsi);
+       movbe RL0d, 1 * 4(%rsi);
+       movbe RR1d, 2 * 4(%rsi);
+       movbe RL1d, 3 * 4(%rsi);
+       movbe RR2d, 4 * 4(%rsi);
+       movbe RL2d, 5 * 4(%rsi);
+#else
        bswapl RR0d;
        bswapl RL0d;
        bswapl RR1d;
@@ -521,6 +548,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
        movl RL1d, 3 * 4(%rsi);
        movl RR2d, 4 * 4(%rsi);
        movl RL2d, 5 * 4(%rsi);
+#endif
 
        popq %r15;
        popq %r14;
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index d3f3370e7dab..f8ff130edfb3 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -142,6 +142,13 @@ ENTRY(verify_cpu)
        jnc     .Lverify_cpu_no_longmode
 #endif
 
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+       mov     $1, %eax
+       cpuid
+       bt      $22, %ecx
+       jnc     .Lverify_cpu_no_longmode
+#endif
+
 #if defined(CONFIG_MARCH_NATIVE_REP_MOVSB) || 
defined(CONFIG_MARCH_NATIVE_REP_STOSB)
        xor     %eax, %eax
        cpuid
diff --git a/scripts/kconfig/cpuid.c b/scripts/kconfig/cpuid.c
index 58d09bda61e5..0da1142a59da 100644
--- a/scripts/kconfig/cpuid.c
+++ b/scripts/kconfig/cpuid.c
@@ -43,6 +43,7 @@ static inline void cpuid2(uint32_t eax0, uint32_t ecx0, 
uint32_t *eax, uint32_t
        );
 }
 
+static bool movbe      = false;
 static bool popcnt     = false;
 static bool rep_movsb  = false;
 static bool rep_stosb  = false;
@@ -57,6 +58,9 @@ static void intel(void)
                cpuid(1, &eax, &ecx, &edx, &ebx);
 //             printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx);
 
+               if (ecx & (1 << 22)) {
+                       movbe = true;
+               }
                if (ecx & (1 << 23)) {
                        popcnt = true;
                }
@@ -89,6 +93,7 @@ int main(int argc, char *argv[])
        }
 
 #define _(x)   if (streq(opt, #x)) return x ? EXIT_SUCCESS : EXIT_FAILURE
+       _(movbe);
        _(popcnt);
        _(rep_movsb);
        _(rep_stosb);
diff --git a/scripts/march-native.sh b/scripts/march-native.sh
index a41a15a64df4..530bac22fa07 100755
--- a/scripts/march-native.sh
+++ b/scripts/march-native.sh
@@ -41,6 +41,7 @@ COLLECT_GCC_OPTIONS=$(
 )
 echo "-march=native: $COLLECT_GCC_OPTIONS"
 
+"$CPUID" movbe         && option "CONFIG_MARCH_NATIVE_MOVBE"
 "$CPUID" popcnt                && option "CONFIG_MARCH_NATIVE_POPCNT"
 "$CPUID" rep_movsb     && option "CONFIG_MARCH_NATIVE_REP_MOVSB"
 "$CPUID" rep_stosb     && option "CONFIG_MARCH_NATIVE_REP_STOSB"
-- 
2.21.0

Reply via email to