Use MOVBE if it is available. This doesn't save code size as MOVBE seems to be as long as MOV+BSWAP, It is not clear if it saves uop, maybe it will in the future.
Do it because it is easy, I guess. --- arch/x86/crypto/des3_ede-asm_64.S | 28 ++++++++++++++++++++++++++++ arch/x86/net/bpf_jit.S | 12 ++++++++++++ scripts/kconfig/cpuid.c | 4 ++++ scripts/march-native.sh | 3 ++- 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index 8e49ce117494..007319ea1f62 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -159,6 +159,15 @@ #define dummy2(a, b) /*_*/ +#ifdef CONFIG_MARCH_NATIVE_MOVBE +#define read_block(io, left, right) \ + movbe (io), left##d; \ + movbe 4(io), right##d; + +#define write_block(io, left, right) \ + movbe left##d, (io); \ + movbe right##d, 4(io); +#else #define read_block(io, left, right) \ movl (io), left##d; \ movl 4(io), right##d; \ @@ -170,6 +179,7 @@ bswapl right##d; \ movl left##d, (io); \ movl right##d, 4(io); +#endif ENTRY(des3_ede_x86_64_crypt_blk) /* input: @@ -443,6 +453,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) pushq %rsi /* dst */ /* load input */ +#ifdef CONFIG_MARCH_NATIVE_MOVBE + movbe 0 * 4(%rdx), RL0d; + movbe 1 * 4(%rdx), RR0d; + movbe 2 * 4(%rdx), RL1d; + movbe 3 * 4(%rdx), RR1d; + movbe 4 * 4(%rdx), RL2d; + movbe 5 * 4(%rdx), RR2d; +#else movl 0 * 4(%rdx), RL0d; movl 1 * 4(%rdx), RR0d; movl 2 * 4(%rdx), RL1d; @@ -456,6 +474,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) bswapl RR1d; bswapl RL2d; bswapl RR2d; +#endif initial_permutation3(RL, RR); @@ -516,6 +535,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) final_permutation3(RR, RL); +#ifdef CONFIG_MARCH_NATIVE_MOVBE + movbe RR0d, 0 * 4(%rsi); + movbe RL0d, 1 * 4(%rsi); + movbe RR1d, 2 * 4(%rsi); + movbe RL1d, 3 * 4(%rsi); + movbe RR2d, 4 * 4(%rsi); + movbe RL2d, 5 * 4(%rsi); +#else bswapl RR0d; bswapl RL0d; bswapl RR1d; @@ -530,6 +557,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) movl RL1d, 3 * 4(%rsi); movl RR2d, 4 * 4(%rsi); movl RL2d, 5 * 4(%rsi); +#endif popq %r15; popq %r14; diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index b33093f84528..17fe33750298 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -34,8 +34,12 @@ FUNC(sk_load_word_positive_offset) sub %esi,%eax # hlen - offset cmp $3,%eax jle bpf_slow_path_word +#ifdef CONFIG_MARCH_NATIVE_MOVBE + movbe (SKBDATA,%rsi),%eax +#else mov (SKBDATA,%rsi),%eax bswap %eax /* ntohl() */ +#endif ret FUNC(sk_load_half) @@ -80,8 +84,12 @@ FUNC(sk_load_byte_positive_offset) bpf_slow_path_word: bpf_slow_path_common(4) js bpf_error +#ifdef CONFIG_MARCH_NATIVE_MOVBE + movbe 32(%rbp),%eax +#else mov 32(%rbp),%eax bswap %eax +#endif ret bpf_slow_path_half: @@ -118,8 +126,12 @@ bpf_slow_path_word_neg: FUNC(sk_load_word_negative_offset) sk_negative_common(4) +#ifdef CONFIG_MARCH_NATIVE_MOVBE + movbe (%rax), %eax +#else mov (%rax), %eax bswap %eax +#endif ret bpf_slow_path_half_neg: diff --git a/scripts/kconfig/cpuid.c b/scripts/kconfig/cpuid.c index ecb285183581..2c23c8699ae6 100644 --- a/scripts/kconfig/cpuid.c +++ b/scripts/kconfig/cpuid.c @@ -42,6 +42,7 @@ static inline void cpuid2(uint32_t eax0, uint32_t ecx0, uint32_t *eax, uint32_t ); } +static bool movbe = false; static bool popcnt = false; static bool rep_movsb = false; static bool rep_stosb = false; @@ -56,6 +57,8 @@ static void intel(void) cpuid(1, &eax, &ecx, &edx, &ebx); // printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx); + if (ecx & (1 << 22)) + movbe = true; if (ecx & (1 << 23)) popcnt = true; } @@ -86,6 +89,7 @@ int main(int argc, char *argv[]) intel(); #define _(x) if (streq(opt, #x)) return x ? EXIT_SUCCESS : EXIT_FAILURE + _(movbe); _(popcnt); _(rep_movsb); _(rep_stosb); diff --git a/scripts/march-native.sh b/scripts/march-native.sh index d3adf0edb2be..93f6a9bd4a6c 100755 --- a/scripts/march-native.sh +++ b/scripts/march-native.sh @@ -30,6 +30,7 @@ option() { } if test -x "$CPUID"; then + "$CPUID" movbe && option "CONFIG_MARCH_NATIVE_MOVBE" "$CPUID" popcnt && option "CONFIG_MARCH_NATIVE_POPCNT" "$CPUID" rep_movsb && option "CONFIG_MARCH_NATIVE_REP_MOVSB" "$CPUID" rep_stosb && option "CONFIG_MARCH_NATIVE_REP_STOSB" @@ -76,7 +77,7 @@ for i in $COLLECT_GCC_OPTIONS; do -mhle) option "CONFIG_MARCH_NATIVE_HLE" ;; -mlzcnt) option "CONFIG_MARCH_NATIVE_LZCNT" ;; -mmmx) option "CONFIG_MARCH_NATIVE_MMX" ;; - -mmovbe) option "CONFIG_MARCH_NATIVE_MOVBE" ;; + -mmovbe);; -mpclmul) option "CONFIG_MARCH_NATIVE_PCLMUL" ;; -mpopcnt);; -mprfchw) option "CONFIG_MARCH_NATIVE_PREFETCHW" ;; -- 2.13.6