On June 25, 2018 9:33:35 AM PDT, Randy Dunlap <rdun...@infradead.org> wrote: >On 06/25/2018 03:25 AM, Jan Beulich wrote: >> Some Intel CPUs don't recognize 64-bit XORs as zeroing idioms - use >> 32-bit ones instead. > >Hmph. Is that considered a bug (errata)? > >URL/references? > >Are these changes really only zeroing the lower 32 bits of the >register? >and that's all that the code cares about? > >thanks. > >> Signed-off-by: Jan Beulich <jbeul...@suse.com> >> --- >> arch/x86/crypto/aegis128-aesni-asm.S | 2 +- >> arch/x86/crypto/aegis128l-aesni-asm.S | 2 +- >> arch/x86/crypto/aegis256-aesni-asm.S | 2 +- >> arch/x86/crypto/aesni-intel_asm.S | 8 ++++---- >> arch/x86/crypto/aesni-intel_avx-x86_64.S | 4 ++-- >> arch/x86/crypto/morus1280-avx2-asm.S | 2 +- >> arch/x86/crypto/morus1280-sse2-asm.S | 2 +- >> arch/x86/crypto/morus640-sse2-asm.S | 2 +- >> arch/x86/crypto/sha1_ssse3_asm.S | 2 +- >> arch/x86/kernel/head_64.S | 2 +- >> arch/x86/kernel/paravirt_patch_64.c | 2 +- >> arch/x86/lib/memcpy_64.S | 2 +- >> arch/x86/power/hibernate_asm_64.S | 2 +- >> 13 files changed, 17 insertions(+), 17 deletions(-) >> >> --- 4.18-rc2/arch/x86/crypto/aegis128-aesni-asm.S >> +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/aegis128-aesni-asm.S >> @@ -75,7 +75,7 @@ >> * %r9 >> */ >> __load_partial: >> - xor %r9, %r9 >> + xor %r9d, %r9d >> pxor MSG, MSG >> >> mov LEN, %r8 >> --- 4.18-rc2/arch/x86/crypto/aegis128l-aesni-asm.S >> +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/aegis128l-aesni-asm.S >> @@ -66,7 +66,7 @@ >> * %r9 >> */ >> __load_partial: >> - xor %r9, %r9 >> + xor %r9d, %r9d >> pxor MSG0, MSG0 >> pxor MSG1, MSG1 >> >> --- 4.18-rc2/arch/x86/crypto/aegis256-aesni-asm.S >> +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/aegis256-aesni-asm.S >> @@ -59,7 +59,7 @@ >> * %r9 >> */ >> __load_partial: >> - xor %r9, %r9 >> + xor %r9d, %r9d >> pxor MSG, MSG >> >> mov LEN, %r8 >> --- 4.18-rc2/arch/x86/crypto/aesni-intel_asm.S >> +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/aesni-intel_asm.S >> @@ -258,7 +258,7 @@ ALL_F: .octa 0xffffffffffffffffffff >> .macro GCM_INIT Iv SUBKEY AAD AADLEN >> mov \AADLEN, %r11 >> mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length >> - xor %r11, %r11 >> + xor %r11d, %r11d >> mov %r11, InLen(%arg2) # ctx_data.in_length = 0 >> mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 >> mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 >> @@ -286,7 +286,7 @@ ALL_F: .octa 0xffffffffffffffffffff >> movdqu HashKey(%arg2), %xmm13 >> add %arg5, InLen(%arg2) >> >> - xor %r11, %r11 # initialise the data pointer offset as zero >> + xor %r11d, %r11d # initialise the data pointer offset as zero >> PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation >> >> sub %r11, %arg5 # sub partial block data used >> @@ -702,7 +702,7 @@ _no_extra_mask_1_\@: >> >> # GHASH computation for the last <16 Byte block >> GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 >> - xor %rax,%rax >> + xor %eax, %eax >> >> mov %rax, PBlockLen(%arg2) >> jmp _dec_done_\@ >> @@ -737,7 +737,7 @@ _no_extra_mask_2_\@: >> >> # GHASH computation for the last <16 Byte block >> GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 >> - xor %rax,%rax >> + xor %eax, %eax >> >> mov %rax, PBlockLen(%arg2) >> jmp _encode_done_\@ >> --- 4.18-rc2/arch/x86/crypto/aesni-intel_avx-x86_64.S >> +++ >4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/aesni-intel_avx-x86_64.S >> @@ -463,7 +463,7 @@ _get_AAD_rest_final\@: >> >> _get_AAD_done\@: >> # initialize the data pointer offset as zero >> - xor %r11, %r11 >> + xor %r11d, %r11d >> >> # start AES for num_initial_blocks blocks >> mov arg5, %rax # rax = *Y0 >> @@ -1770,7 +1770,7 @@ _get_AAD_rest_final\@: >> >> _get_AAD_done\@: >> # initialize the data pointer offset as zero >> - xor %r11, %r11 >> + xor %r11d, %r11d >> >> # start AES for num_initial_blocks blocks >> mov arg5, %rax # rax = *Y0 >> --- 4.18-rc2/arch/x86/crypto/morus1280-avx2-asm.S >> +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/morus1280-avx2-asm.S >> @@ -113,7 +113,7 @@ ENDPROC(__morus1280_update_zero) >> * %r9 >> */ >> __load_partial: >> - xor %r9, %r9 >> + xor %r9d, %r9d >> vpxor MSG, MSG, MSG >> >> mov %rcx, %r8 >> --- 4.18-rc2/arch/x86/crypto/morus1280-sse2-asm.S >> +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/morus1280-sse2-asm.S >> @@ -235,7 +235,7 @@ ENDPROC(__morus1280_update_zero) >> * %r9 >> */ >> __load_partial: >> - xor %r9, %r9 >> + xor %r9d, %r9d >> pxor MSG_LO, MSG_LO >> pxor MSG_HI, MSG_HI >> >> --- 4.18-rc2/arch/x86/crypto/morus640-sse2-asm.S >> +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/morus640-sse2-asm.S >> @@ -113,7 +113,7 @@ ENDPROC(__morus640_update_zero) >> * %r9 >> */ >> __load_partial: >> - xor %r9, %r9 >> + xor %r9d, %r9d >> pxor MSG, MSG >> >> mov %rcx, %r8 >> --- 4.18-rc2/arch/x86/crypto/sha1_ssse3_asm.S >> +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/crypto/sha1_ssse3_asm.S >> @@ -96,7 +96,7 @@ >> # cleanup workspace >> mov $8, %ecx >> mov %rsp, %rdi >> - xor %rax, %rax >> + xor %eax, %eax >> rep stosq >> >> mov %rbp, %rsp # deallocate workspace >> --- 4.18-rc2/arch/x86/kernel/head_64.S >> +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/kernel/head_64.S >> @@ -235,7 +235,7 @@ ENTRY(secondary_startup_64) >> * address given in m16:64. >> */ >> pushq $.Lafter_lret # put return address on stack for unwinder >> - xorq %rbp, %rbp # clear frame pointer >> + xorl %ebp, %ebp # clear frame pointer >> movq initial_code(%rip), %rax >> pushq $__KERNEL_CS # set correct cs >> pushq %rax # target address in negative space >> --- 4.18-rc2/arch/x86/kernel/paravirt_patch_64.c >> +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/kernel/paravirt_patch_64.c >> @@ -20,7 +20,7 @@ DEF_NATIVE(, mov64, "mov %rdi, %rax"); >> >> #if defined(CONFIG_PARAVIRT_SPINLOCKS) >> DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); >> -DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax"); >> +DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax"); >> #endif >> >> unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) >> --- 4.18-rc2/arch/x86/lib/memcpy_64.S >> +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/lib/memcpy_64.S >> @@ -256,7 +256,7 @@ ENTRY(__memcpy_mcsafe) >> >> /* Copy successful. Return zero */ >> .L_done_memcpy_trap: >> - xorq %rax, %rax >> + xorl %eax, %eax >> ret >> ENDPROC(__memcpy_mcsafe) >> EXPORT_SYMBOL_GPL(__memcpy_mcsafe) >> --- 4.18-rc2/arch/x86/power/hibernate_asm_64.S >> +++ 4.18-rc2-x86_64-32bit-XOR/arch/x86/power/hibernate_asm_64.S >> @@ -137,7 +137,7 @@ ENTRY(restore_registers) >> /* Saved in save_processor_state. */ >> lgdt saved_context_gdt_desc(%rax) >> >> - xorq %rax, %rax >> + xorl %eax, %eax >> >> /* tell the hibernation core that we've just restored the memory */ >> movq %rax, in_suspend(%rip) >> >> >>
Writing the low 32 bits zero-extends the result to 64 bits anyway. -- Sent from my Android device with K-9 Mail. Please excuse my brevity.