Module Name:    src
Committed By:   riastradh
Date:           Sat Jul 25 22:11:05 UTC 2020

Modified Files:
        src/sys/crypto/aes/arch/x86: aes_ni_64.S

Log Message:
Invert some loops to save a jmp instruction on each iteration.

No semantic change intended.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_ni_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S
diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.2 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.3
--- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.2	Tue Jun 30 21:41:04 2020
+++ src/sys/crypto/aes/arch/x86/aes_ni_64.S	Sat Jul 25 22:11:05 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ni_64.S,v 1.2 2020/06/30 21:41:04 riastradh Exp $	*/
+/*	$NetBSD: aes_ni_64.S,v 1.3 2020/07/25 22:11:05 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -522,14 +522,14 @@ ENTRY(aesni_enctodec)
 	shl	$4,%edx		/* rdx := byte offset of last round key */
 	movdqa	(%rdi,%rdx),%xmm0	/* load last round key */
 	movdqa	%xmm0,(%rsi)	/* store last round key verbatim */
-1:	sub	$0x10,%rdx	/* advance to next round key */
-	lea	0x10(%rsi),%rsi
-	jz	2f		/* stop if this is the last one */
-	movdqa	(%rdi,%rdx),%xmm0	/* load round key */
+	jmp	2f
+1:	movdqa	(%rdi,%rdx),%xmm0	/* load round key */
 	aesimc	%xmm0,%xmm0	/* convert encryption to decryption */
 	movdqa	%xmm0,(%rsi)	/* store round key */
-	jmp	1b
-2:	movdqa	(%rdi),%xmm0	/* load first round key */
+2:	sub	$0x10,%rdx	/* advance to next round key */
+	lea	0x10(%rsi),%rsi
+	jnz	1b		/* repeat if more rounds */
+	movdqa	(%rdi),%xmm0	/* load first round key */
 	movdqa	%xmm0,(%rsi)	/* store first round key verbatim */
 	ret
 END(aesni_enctodec)
@@ -614,16 +614,16 @@ ENTRY(aesni_cbc_dec1)
 	mov	%rcx,%r10		/* r10 := nbytes */
 	movdqu	-0x10(%rsi,%r10),%xmm0	/* xmm0 := last ciphertext block */
 	movdqu	%xmm0,(%r8)		/* update iv */
-1:	mov	%r9d,%ecx		/* ecx := nrounds */
-	call	aesni_dec1		/* xmm0 := cv ^ ptxt */
-	sub	$0x10,%r10
-	jz	2f			/* first block if r10 is now zero */
-	movdqu	-0x10(%rsi,%r10),%xmm8	/* xmm8 := chaining value */
+	jmp	2f
+1:	movdqu	-0x10(%rsi,%r10),%xmm8	/* xmm8 := chaining value */
 	pxor	%xmm8,%xmm0		/* xmm0 := ptxt */
 	movdqu	%xmm0,(%rdx,%r10)	/* store plaintext block */
 	movdqa	%xmm8,%xmm0		/* move cv = ciphertext block */
-	jmp	1b
-2:	pxor	(%rsp),%xmm0		/* xmm0 := ptxt */
+2:	mov	%r9d,%ecx		/* ecx := nrounds */
+	call	aesni_dec1		/* xmm0 := cv ^ ptxt */
+	sub	$0x10,%r10
+	jnz	1b			/* repeat if more blocks */
+	pxor	(%rsp),%xmm0		/* xmm0 := ptxt */
 	movdqu	%xmm0,(%rdx)		/* store first plaintext block */
 	leave
 	ret
@@ -649,7 +649,11 @@ ENTRY(aesni_cbc_dec8)
 	mov	%rcx,%r10		/* r10 := nbytes */
 	movdqu	-0x10(%rsi,%r10),%xmm7	/* xmm7 := ciphertext block[n-1] */
 	movdqu	%xmm7,(%r8)		/* update iv */
-1:	movdqu	-0x20(%rsi,%r10),%xmm6	/* xmm6 := ciphertext block[n-2] */
+	jmp	2f
+1:	movdqu	-0x10(%rsi,%r10),%xmm7	/* xmm7 := cv[0] */
+	pxor	%xmm7,%xmm0		/* xmm0 := ptxt[0] */
+	movdqu	%xmm0,(%rdx,%r10)	/* store plaintext block */
+2:	movdqu	-0x20(%rsi,%r10),%xmm6	/* xmm6 := ciphertext block[n-2] */
 	movdqu	-0x30(%rsi,%r10),%xmm5	/* xmm5 := ciphertext block[n-3] */
 	movdqu	-0x40(%rsi,%r10),%xmm4	/* xmm4 := ciphertext block[n-4] */
 	movdqu	-0x50(%rsi,%r10),%xmm3	/* xmm3 := ciphertext block[n-5] */
@@ -680,12 +684,8 @@ ENTRY(aesni_cbc_dec8)
 	movdqu	%xmm2,-0x60(%rdx,%r10)
 	movdqu	%xmm1,-0x70(%rdx,%r10)
 	sub	$0x80,%r10
-	jz	2f			/* first block if r10 is now zero */
-	movdqu	-0x10(%rsi,%r10),%xmm7	/* xmm7 := cv[0] */
-	pxor	%xmm7,%xmm0		/* xmm0 := ptxt[0] */
-	movdqu	%xmm0,(%rdx,%r10)	/* store plaintext block */
-	jmp	1b
-2:	pxor	(%rsp),%xmm0		/* xmm0 := ptxt[0] */
+	jnz	1b			/* repeat if more blocks */
+	pxor	(%rsp),%xmm0		/* xmm0 := ptxt[0] */
 	movdqu	%xmm0,(%rdx)		/* store first plaintext block */
 	leave
 	ret
@@ -966,12 +966,12 @@ aesni_enc1:
 	shl	$4,%ecx		/* ecx := total byte size of round keys */
 	lea	0x10(%rdi,%rcx),%rax	/* rax := end of round key array */
 	neg	%rcx		/* rcx := byte offset of round key from end */
-1:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
+	jmp	2f
+1:	aesenc	%xmm8,%xmm0
+2:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
 	add	$0x10,%rcx
-	jz	2f		/* stop if this is the last one */
-	aesenc	%xmm8,%xmm0
-	jmp	1b
-2:	aesenclast %xmm8,%xmm0
+	jnz	1b		/* repeat if more rounds */
+	aesenclast %xmm8,%xmm0
 	ret
 END(aesni_enc1)
 
@@ -999,10 +999,8 @@ aesni_enc8:
 	shl	$4,%ecx		/* ecx := total byte size of round keys */
 	lea	0x10(%rdi,%rcx),%rax	/* rax := end of round key array */
 	neg	%rcx		/* rcx := byte offset of round key from end */
-1:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
-	add	$0x10,%rcx
-	jz	2f		/* stop if this is the last one */
-	aesenc	%xmm8,%xmm0
+	jmp	2f
+1:	aesenc	%xmm8,%xmm0
 	aesenc	%xmm8,%xmm1
 	aesenc	%xmm8,%xmm2
 	aesenc	%xmm8,%xmm3
@@ -1010,8 +1008,10 @@ aesni_enc8:
 	aesenc	%xmm8,%xmm5
 	aesenc	%xmm8,%xmm6
 	aesenc	%xmm8,%xmm7
-	jmp	1b
-2:	aesenclast %xmm8,%xmm0
+2:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
+	add	$0x10,%rcx
+	jnz	1b		/* repeat if more rounds */
+	aesenclast %xmm8,%xmm0
 	aesenclast %xmm8,%xmm1
 	aesenclast %xmm8,%xmm2
 	aesenclast %xmm8,%xmm3
@@ -1038,12 +1038,12 @@ aesni_dec1:
 	shl	$4,%ecx		/* ecx := byte offset of round key */
 	lea	0x10(%rdi,%rcx),%rax	/* rax := pointer to round key */
 	neg	%rcx		/* rcx := byte offset of round key from end */
-1:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
+	jmp	2f
+1:	aesdec	%xmm8,%xmm0
+2:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
 	add	$0x10,%rcx
-	jz	2f		/* stop if this is the last one */
-	aesdec	%xmm8,%xmm0
-	jmp	1b
-2:	aesdeclast %xmm8,%xmm0
+	jnz	1b		/* repeat if more rounds */
+	aesdeclast %xmm8,%xmm0
 	ret
 END(aesni_dec1)
 
@@ -1071,10 +1071,8 @@ aesni_dec8:
 	shl	$4,%ecx		/* ecx := byte offset of round key */
 	lea	0x10(%rdi,%rcx),%rax	/* rax := pointer to round key */
 	neg	%rcx		/* rcx := byte offset of round key from end */
-1:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
-	add	$0x10,%rcx
-	jz	2f		/* stop if this is the last one */
-	aesdec	%xmm8,%xmm0
+	jmp	2f
+1:	aesdec	%xmm8,%xmm0
 	aesdec	%xmm8,%xmm1
 	aesdec	%xmm8,%xmm2
 	aesdec	%xmm8,%xmm3
@@ -1082,8 +1080,10 @@ aesni_dec8:
 	aesdec	%xmm8,%xmm5
 	aesdec	%xmm8,%xmm6
 	aesdec	%xmm8,%xmm7
-	jmp	1b
-2:	aesdeclast %xmm8,%xmm0
+2:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
+	add	$0x10,%rcx
+	jnz	1b		/* repeat if more rounds */
+	aesdeclast %xmm8,%xmm0
 	aesdeclast %xmm8,%xmm1
 	aesdeclast %xmm8,%xmm2
 	aesdeclast %xmm8,%xmm3

Reply via email to