Module Name: src Committed By: riastradh Date: Sat Jul 25 22:11:05 UTC 2020
Modified Files: src/sys/crypto/aes/arch/x86: aes_ni_64.S Log Message: Invert some loops to save a jmp instruction on each iteration. No semantic change intended. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_ni_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.2 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.3 --- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.2 Tue Jun 30 21:41:04 2020 +++ src/sys/crypto/aes/arch/x86/aes_ni_64.S Sat Jul 25 22:11:05 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ni_64.S,v 1.2 2020/06/30 21:41:04 riastradh Exp $ */ +/* $NetBSD: aes_ni_64.S,v 1.3 2020/07/25 22:11:05 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -522,14 +522,14 @@ ENTRY(aesni_enctodec) shl $4,%edx /* rdx := byte offset of last round key */ movdqa (%rdi,%rdx),%xmm0 /* load last round key */ movdqa %xmm0,(%rsi) /* store last round key verbatim */ -1: sub $0x10,%rdx /* advance to next round key */ - lea 0x10(%rsi),%rsi - jz 2f /* stop if this is the last one */ - movdqa (%rdi,%rdx),%xmm0 /* load round key */ + jmp 2f +1: movdqa (%rdi,%rdx),%xmm0 /* load round key */ aesimc %xmm0,%xmm0 /* convert encryption to decryption */ movdqa %xmm0,(%rsi) /* store round key */ - jmp 1b -2: movdqa (%rdi),%xmm0 /* load first round key */ +2: sub $0x10,%rdx /* advance to next round key */ + lea 0x10(%rsi),%rsi + jnz 1b /* repeat if more rounds */ + movdqa (%rdi),%xmm0 /* load first round key */ movdqa %xmm0,(%rsi) /* store first round key verbatim */ ret END(aesni_enctodec) @@ -614,16 +614,16 @@ ENTRY(aesni_cbc_dec1) mov %rcx,%r10 /* r10 := nbytes */ movdqu -0x10(%rsi,%r10),%xmm0 /* xmm0 := last ciphertext block */ movdqu %xmm0,(%r8) /* update iv */ -1: mov %r9d,%ecx /* ecx := nrounds */ - call aesni_dec1 /* xmm0 := cv ^ ptxt */ - sub $0x10,%r10 - jz 2f /* first block if r10 is now zero */ - movdqu -0x10(%rsi,%r10),%xmm8 /* xmm8 := chaining value */ + jmp 2f +1: movdqu -0x10(%rsi,%r10),%xmm8 /* xmm8 := chaining value */ pxor %xmm8,%xmm0 /* xmm0 := ptxt */ movdqu %xmm0,(%rdx,%r10) /* store plaintext block */ movdqa %xmm8,%xmm0 /* move cv = ciphertext block */ - jmp 1b -2: pxor (%rsp),%xmm0 /* xmm0 := ptxt */ +2: mov %r9d,%ecx /* ecx := nrounds */ + call aesni_dec1 /* xmm0 := cv ^ ptxt */ + sub $0x10,%r10 + jnz 1b /* repeat if more blocks */ + pxor (%rsp),%xmm0 /* xmm0 := ptxt */ movdqu %xmm0,(%rdx) /* store first plaintext block */ leave ret @@ -649,7 +649,11 @@ ENTRY(aesni_cbc_dec8) mov %rcx,%r10 /* r10 := nbytes */ movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := ciphertext block[n-1] */ movdqu %xmm7,(%r8) /* update iv */ -1: movdqu -0x20(%rsi,%r10),%xmm6 /* xmm6 := ciphertext block[n-2] */ + jmp 2f +1: movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := cv[0] */ + pxor %xmm7,%xmm0 /* xmm0 := ptxt[0] */ + movdqu %xmm0,(%rdx,%r10) /* store plaintext block */ +2: movdqu -0x20(%rsi,%r10),%xmm6 /* xmm6 := ciphertext block[n-2] */ movdqu -0x30(%rsi,%r10),%xmm5 /* xmm5 := ciphertext block[n-3] */ movdqu -0x40(%rsi,%r10),%xmm4 /* xmm4 := ciphertext block[n-4] */ movdqu -0x50(%rsi,%r10),%xmm3 /* xmm3 := ciphertext block[n-5] */ @@ -680,12 +684,8 @@ ENTRY(aesni_cbc_dec8) movdqu %xmm2,-0x60(%rdx,%r10) movdqu %xmm1,-0x70(%rdx,%r10) sub $0x80,%r10 - jz 2f /* first block if r10 is now zero */ - movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := cv[0] */ - pxor %xmm7,%xmm0 /* xmm0 := ptxt[0] */ - movdqu %xmm0,(%rdx,%r10) /* store plaintext block */ - jmp 1b -2: pxor (%rsp),%xmm0 /* xmm0 := ptxt[0] */ + jnz 1b /* repeat if more blocks */ + pxor (%rsp),%xmm0 /* xmm0 := ptxt[0] */ movdqu %xmm0,(%rdx) /* store first plaintext block */ leave ret @@ -966,12 +966,12 @@ aesni_enc1: shl $4,%ecx /* ecx := total byte size of round keys */ lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */ neg %rcx /* rcx := byte offset of round key from end */ -1: movdqa (%rax,%rcx),%xmm8 /* load round key */ + jmp 2f +1: aesenc %xmm8,%xmm0 +2: movdqa (%rax,%rcx),%xmm8 /* load round key */ add $0x10,%rcx - jz 2f /* stop if this is the last one */ - aesenc %xmm8,%xmm0 - jmp 1b -2: aesenclast %xmm8,%xmm0 + jnz 1b /* repeat if more rounds */ + aesenclast %xmm8,%xmm0 ret END(aesni_enc1) @@ -999,10 +999,8 @@ aesni_enc8: shl $4,%ecx /* ecx := total byte size of round keys */ lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */ neg %rcx /* rcx := byte offset of round key from end */ -1: movdqa (%rax,%rcx),%xmm8 /* load round key */ - add $0x10,%rcx - jz 2f /* stop if this is the last one */ - aesenc %xmm8,%xmm0 + jmp 2f +1: aesenc %xmm8,%xmm0 aesenc %xmm8,%xmm1 aesenc %xmm8,%xmm2 aesenc %xmm8,%xmm3 @@ -1010,8 +1008,10 @@ aesni_enc8: aesenc %xmm8,%xmm5 aesenc %xmm8,%xmm6 aesenc %xmm8,%xmm7 - jmp 1b -2: aesenclast %xmm8,%xmm0 +2: movdqa (%rax,%rcx),%xmm8 /* load round key */ + add $0x10,%rcx + jnz 1b /* repeat if more rounds */ + aesenclast %xmm8,%xmm0 aesenclast %xmm8,%xmm1 aesenclast %xmm8,%xmm2 aesenclast %xmm8,%xmm3 @@ -1038,12 +1038,12 @@ aesni_dec1: shl $4,%ecx /* ecx := byte offset of round key */ lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */ neg %rcx /* rcx := byte offset of round key from end */ -1: movdqa (%rax,%rcx),%xmm8 /* load round key */ + jmp 2f +1: aesdec %xmm8,%xmm0 +2: movdqa (%rax,%rcx),%xmm8 /* load round key */ add $0x10,%rcx - jz 2f /* stop if this is the last one */ - aesdec %xmm8,%xmm0 - jmp 1b -2: aesdeclast %xmm8,%xmm0 + jnz 1b /* repeat if more rounds */ + aesdeclast %xmm8,%xmm0 ret END(aesni_dec1) @@ -1071,10 +1071,8 @@ aesni_dec8: shl $4,%ecx /* ecx := byte offset of round key */ lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */ neg %rcx /* rcx := byte offset of round key from end */ -1: movdqa (%rax,%rcx),%xmm8 /* load round key */ - add $0x10,%rcx - jz 2f /* stop if this is the last one */ - aesdec %xmm8,%xmm0 + jmp 2f +1: aesdec %xmm8,%xmm0 aesdec %xmm8,%xmm1 aesdec %xmm8,%xmm2 aesdec %xmm8,%xmm3 @@ -1082,8 +1080,10 @@ aesni_dec8: aesdec %xmm8,%xmm5 aesdec %xmm8,%xmm6 aesdec %xmm8,%xmm7 - jmp 1b -2: aesdeclast %xmm8,%xmm0 +2: movdqa (%rax,%rcx),%xmm8 /* load round key */ + add $0x10,%rcx + jnz 1b /* repeat if more rounds */ + aesdeclast %xmm8,%xmm0 aesdeclast %xmm8,%xmm1 aesdeclast %xmm8,%xmm2 aesdeclast %xmm8,%xmm3