On 06/25/2018 03:25 AM, Jan Beulich wrote: > Some Intel CPUs don't recognize 64-bit XORs as zeroing idioms - use > 32-bit ones instead.
Hmph. Is that considered a bug (errata)? URL/references? Are these changes really only zeroing the lower 32 bits of the register? and that's all that the code cares about? thanks. > Signed-off-by: Jan Beulich <jbeul...@suse.com> > --- > arch/x86/crypto/aegis128-aesni-asm.S | 2 +- > arch/x86/crypto/aegis128l-aesni-asm.S | 2 +- > arch/x86/crypto/aegis256-aesni-asm.S | 2 +- > arch/x86/crypto/aesni-intel_asm.S | 8 ++++---- > arch/x86/crypto/aesni-intel_avx-x86_64.S | 4 ++-- > arch/x86/crypto/morus1280-avx2-asm.S | 2 +- > arch/x86/crypto/morus1280-sse2-asm.S | 2 +- > arch/x86/crypto/morus640-sse2-asm.S | 2 +- > arch/x86/crypto/sha1_ssse3_asm.S | 2 +- > arch/x86/kernel/head_64.S | 2 +- > arch/x86/kernel/paravirt_patch_64.c | 2 +- > arch/x86/lib/memcpy_64.S | 2 +- > arch/x86/power/hibernate_asm_64.S | 2 +- > 13 files changed, 17 insertions(+), 17 deletions(-) > > --- 4.18-rc2/arch/x86/crypto/aegis128-aesni-asm.S > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/aegis128-aesni-asm.S > @@ -75,7 +75,7 @@ > * %r9 > */ > __load_partial: > - xor %r9, %r9 > + xor %r9d, %r9d > pxor MSG, MSG > > mov LEN, %r8 > --- 4.18-rc2/arch/x86/crypto/aegis128l-aesni-asm.S > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/aegis128l-aesni-asm.S > @@ -66,7 +66,7 @@ > * %r9 > */ > __load_partial: > - xor %r9, %r9 > + xor %r9d, %r9d > pxor MSG0, MSG0 > pxor MSG1, MSG1 > > --- 4.18-rc2/arch/x86/crypto/aegis256-aesni-asm.S > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/aegis256-aesni-asm.S > @@ -59,7 +59,7 @@ > * %r9 > */ > __load_partial: > - xor %r9, %r9 > + xor %r9d, %r9d > pxor MSG, MSG > > mov LEN, %r8 > --- 4.18-rc2/arch/x86/crypto/aesni-intel_asm.S > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/aesni-intel_asm.S > @@ -258,7 +258,7 @@ ALL_F: .octa 0xffffffffffffffffffff > .macro GCM_INIT Iv SUBKEY AAD AADLEN > mov \AADLEN, %r11 > mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length > - xor %r11, %r11 > + xor %r11d, %r11d > mov %r11, InLen(%arg2) # ctx_data.in_length = 0 > mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 > mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 > @@ -286,7 +286,7 @@ ALL_F: .octa 0xffffffffffffffffffff > movdqu HashKey(%arg2), %xmm13 > add %arg5, InLen(%arg2) > > - xor %r11, %r11 # initialise the data pointer offset as zero > + xor %r11d, %r11d # initialise the data pointer offset as zero > PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation > > sub %r11, %arg5 # sub partial block data used > @@ -702,7 +702,7 @@ _no_extra_mask_1_\@: > > # GHASH computation for the last <16 Byte block > GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 > - xor %rax,%rax > + xor %eax, %eax > > mov %rax, PBlockLen(%arg2) > jmp _dec_done_\@ > @@ -737,7 +737,7 @@ _no_extra_mask_2_\@: > > # GHASH computation for the last <16 Byte block > GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 > - xor %rax,%rax > + xor %eax, %eax > > mov %rax, PBlockLen(%arg2) > jmp _encode_done_\@ > --- 4.18-rc2/arch/x86/crypto/aesni-intel_avx-x86_64.S > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/aesni-intel_avx-x86_64.S > @@ -463,7 +463,7 @@ _get_AAD_rest_final\@: > > _get_AAD_done\@: > # initialize the data pointer offset as zero > - xor %r11, %r11 > + xor %r11d, %r11d > > # start AES for num_initial_blocks blocks > mov arg5, %rax # rax = *Y0 > @@ -1770,7 +1770,7 @@ _get_AAD_rest_final\@: > > _get_AAD_done\@: > # initialize the data pointer offset as zero > - xor %r11, %r11 > + xor %r11d, %r11d > > # start AES for num_initial_blocks blocks > mov arg5, %rax # rax = *Y0 > --- 4.18-rc2/arch/x86/crypto/morus1280-avx2-asm.S > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/morus1280-avx2-asm.S > @@ -113,7 +113,7 @@ ENDPROC(__morus1280_update_zero) > * %r9 > */ > __load_partial: > - xor %r9, %r9 > + xor %r9d, %r9d > vpxor MSG, MSG, MSG > > mov %rcx, %r8 > --- 4.18-rc2/arch/x86/crypto/morus1280-sse2-asm.S > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/morus1280-sse2-asm.S > @@ -235,7 +235,7 @@ ENDPROC(__morus1280_update_zero) > * %r9 > */ > __load_partial: > - xor %r9, %r9 > + xor %r9d, %r9d > pxor MSG_LO, MSG_LO > pxor MSG_HI, MSG_HI > > --- 4.18-rc2/arch/x86/crypto/morus640-sse2-asm.S > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/morus640-sse2-asm.S > @@ -113,7 +113,7 @@ ENDPROC(__morus640_update_zero) > * %r9 > */ > __load_partial: > - xor %r9, %r9 > + xor %r9d, %r9d > pxor MSG, MSG > > mov %rcx, %r8 > --- 4.18-rc2/arch/x86/crypto/sha1_ssse3_asm.S > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/sha1_ssse3_asm.S > @@ -96,7 +96,7 @@ > # cleanup workspace > mov $8, %ecx > mov %rsp, %rdi > - xor %rax, %rax > + xor %eax, %eax > rep stosq > > mov %rbp, %rsp # deallocate workspace > --- 4.18-rc2/arch/x86/kernel/head_64.S > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/kernel/head_64.S > @@ -235,7 +235,7 @@ ENTRY(secondary_startup_64) > * address given in m16:64. > */ > pushq $.Lafter_lret # put return address on stack for unwinder > - xorq %rbp, %rbp # clear frame pointer > + xorl %ebp, %ebp # clear frame pointer > movq initial_code(%rip), %rax > pushq $__KERNEL_CS # set correct cs > pushq %rax # target address in negative space > --- 4.18-rc2/arch/x86/kernel/paravirt_patch_64.c > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/kernel/paravirt_patch_64.c > @@ -20,7 +20,7 @@ DEF_NATIVE(, mov64, "mov %rdi, %rax"); > > #if defined(CONFIG_PARAVIRT_SPINLOCKS) > DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); > -DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax"); > +DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax"); > #endif > > unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) > --- 4.18-rc2/arch/x86/lib/memcpy_64.S > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/lib/memcpy_64.S > @@ -256,7 +256,7 @@ ENTRY(__memcpy_mcsafe) > > /* Copy successful. Return zero */ > .L_done_memcpy_trap: > - xorq %rax, %rax > + xorl %eax, %eax > ret > ENDPROC(__memcpy_mcsafe) > EXPORT_SYMBOL_GPL(__memcpy_mcsafe) > --- 4.18-rc2/arch/x86/power/hibernate_asm_64.S > +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/power/hibernate_asm_64.S > @@ -137,7 +137,7 @@ ENTRY(restore_registers) > /* Saved in save_processor_state. */ > lgdt saved_context_gdt_desc(%rax) > > - xorq %rax, %rax > + xorl %eax, %eax > > /* tell the hibernation core that we've just restored the memory */ > movq %rax, in_suspend(%rip) > > > -- ~Randy