CVS commit: src/sys/crypto/aes/arch/arm

2020-11-21 Thread Rin Okuyama
Module Name:src
Committed By:   rin
Date:   Sat Nov 21 08:09:21 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon.c

Log Message:
Fix build with clang for earmv7hf; loadroundkey() is used only for __aarch64__.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon.c
diff -u src/sys/crypto/aes/arch/arm/aes_neon.c:1.5 src/sys/crypto/aes/arch/arm/aes_neon.c:1.6
--- src/sys/crypto/aes/arch/arm/aes_neon.c:1.5	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon.c	Sat Nov 21 08:09:21 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: aes_neon.c,v 1.6 2020/11/21 08:09:21 rin Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -39,7 +39,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.6 2020/11/21 08:09:21 rin Exp $");
 
 #include 
 
@@ -196,11 +196,13 @@ inv	= VQ_N_U8(0x80,0x01,0x08,0x0D,0x0F,0
 inva	= VQ_N_U8(0x80,0x07,0x0B,0x0F,0x06,0x0A,0x04,0x01,
 	0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03);
 
+#ifdef __aarch64__
 static inline uint8x16_t
 loadroundkey(const void *rkp)
 {
 	return vld1q_u8(rkp);
 }
+#endif
 
 static inline void
 storeroundkey(void *rkp, uint8x16_t rk)



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:31:04 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Gather mc_forward/backward so we can load 256 bits at once.


To generate a diff of this commit:
cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.11
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10	Thu Sep 10 11:30:28 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Thu Sep 10 11:31:03 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $")
 
 	.fpu	neon
 
@@ -54,36 +54,26 @@ inva:
 	.byte	0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03
 END(inva)
 
-	.type	mc_forward,_ASM_TYPE_OBJECT
-mc_forward:
-	.byte	0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04	/* 0 */
+	.type	mc,_ASM_TYPE_OBJECT
+mc:
+	.byte	0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04	/* 0 forward */
 	.byte	0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C
-
-	.byte	0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08	/* 1 */
+	.byte	0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06	/* 0 backward */
+	.byte	0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E
+	.byte	0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08	/* 1 forward */
 	.byte	0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00
-
-	.byte	0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C	/* 2 */
+	.byte	0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02	/* 1 backward */
+	.byte	0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A
+	.byte	0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C	/* 2 forward */
 	.byte	0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04
-
+	.byte	0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E	/* 2 backward */
+	.byte	0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06
 .Lmc_forward_3:
-	.byte	0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00	/* 3 */
+	.byte	0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00	/* 3 forward */
 	.byte	0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08
-END(mc_forward)
-
-	.type	mc_backward,_ASM_TYPE_OBJECT
-mc_backward:
-	.byte	0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06	/* 0 */
-	.byte	0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E
-
-	.byte	0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02	/* 1 */
-	.byte	0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A
-
-	.byte	0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E	/* 2 */
-	.byte	0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06
-
-	.byte	0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A	/* 3 */
+	.byte	0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A	/* 3 backward */
 	.byte	0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02
-END(mc_backward)
+END(mc)
 
 	.type	sr,_ASM_TYPE_OBJECT
 sr:
@@ -210,8 +200,7 @@ ENTRY(aes_neon_enc1)
 
 	/*
 	 * r3: rmod4
-	 * r4: mc_forward
-	 * r5: mc_backward
+	 * r4: mc
 	 * r6,r8,r10,ip: temporaries
 	 * q0={d0-d1}: x/ak/A
 	 * q1={d2-d3}: 0x0f0f...
@@ -225,8 +214,8 @@ ENTRY(aes_neon_enc1)
 	 * q9={d18-d19}: sb2[1]
 	 * q10={d20-d21}: inv
 	 * q11={d22-d23}: inva
-	 * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc_backward[rmod4]
-	 * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc_forward[rmod4]
+	 * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc[rmod4].backward
+	 * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc[rmod4].forward
 	 * q14={d28-d29}: rk/A2/A2_B_D
 	 * q15={d30-d31}: A2_B/sr[rmod4]
 	 */
@@ -254,9 +243,8 @@ ENTRY(aes_neon_enc1)
 	vld1.8	{q8-q9}, [r6 :256]	/* q8 = sb2[0], q9 = sb2[1] */
 	vld1.8	{q10-q11}, [r8 :256]	/* q10 = inv, q11 = inva */
 
-	/* (r4, r5) := (&mc_forward[0], &mc_backward[0]) */
-	add	r4, ip, #(mc_forward - .Lconstants)
-	add	r5, ip, #(mc_backward - .Lconstants)
+	/* r4 := mc */
+	add	r4, ip, #(mc - .Lconstants)
 
 	/* (q2, q3) := (lo, hi) */
 	vshr.u8	q3, q0, #4
@@ -291,13 +279,11 @@ ENTRY(aes_neon_enc1)
 	vtbl.8	d25, {q8}, d5
 	vtbl.8	d26, {q9}, d6
 	vtbl.8	d27, {q9}, d7
+	add	r6, r4, r3, lsl #5	/* r6 := &mc[rmod4] */
 	veor	q14, q12, q13
 
-	/* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */
-	add	r6, r4, r3, lsl #4
-	add	r8, r5, r3, lsl #4
-	vld1.8	{q12}, [r6 :128]
-	vld1.8	{q13}, [r8 :128]
+	/* (q12, q13) := (mc[rmod4].forward, mc[rmod4].backward) */
+	vld1.8	{q12-q13}, [r6 :256]
 
 	/* q15 := A2_B = A2 + A(mcf) */
 	vtbl.8	d30, {q0}, d24
@@ -474,7 +460,7 @@ ENTRY(aes_neon_dec1)
 	add	r8, ip, #(.Lmc_forward_3 - .Lconstants)
 	vld1.8	{q6-q7}, [r4 :256]	/* q6 := dsbb[0], q7 := dsbb[1] */
 	vld1.8	{q10-q11}, [r6 :256]	/* q10 := inv, q11 := inva */
-	vld1.8	{q15}, [r8 :128]	/* q15 := mc_forward[3] */
+	vld1.8	{q15}, [r8 :128]	/* q15 := mc[3].forward */
 
 	/* (q2, q3) := (lo, hi) */
 	vshr.u8	q3, q0, #4



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:30:28 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Hoist dsbd/dsbe address calculation out of loop.


To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.9 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.9	Thu Sep 10 11:30:08 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Thu Sep 10 11:30:28 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $")
 
 	.fpu	neon
 
@@ -431,6 +431,9 @@ ENTRY(aes_neon_dec1)
 
 	/*
 	 * r3: 3 & ~(nrounds - 1)
+	 * r4: dsbd
+	 * r5: dsbe
+	 * r6,r8,r10,ip: temporaries
 	 * q0={d0-d1}: x/ak
 	 * q1={d2-d3}: 0x0f0f...
 	 * q2={d4-d5}: lo/k/j/io
@@ -488,6 +491,10 @@ ENTRY(aes_neon_dec1)
 	add	r4, ip, #(dsb9 - .Lconstants)
 	vld1.8	{q4-q5}, [r4 :256]	/* q4 := dsb9[0], q5 := dsb9[1] */
 
+	/* r4 := dsbd, r5 := dsbe */
+	add	r4, ip, #(dsbd - .Lconstants)
+	add	r5, ip, #(dsbe - .Lconstants)
+
 	/* q0 := rk[0] + diptlo(lo) + dipthi(hi) */
 	veor	q0, q14, q2
 	veor	q0, q0, q3
@@ -496,7 +503,6 @@ ENTRY(aes_neon_dec1)
 
 	_ALIGN_TEXT
 1:	/* load dsbd */
-	add	r4, ip, #(dsbd - .Lconstants)
 	vld1.8	{q8-q9}, [r4 :256]	/* q8 := dsbd[0], q9 := dsbd[1] */
 
 	vld1.8	{q14}, [r0 :128]!	/* q14 = *rk++ */
@@ -522,8 +528,7 @@ ENTRY(aes_neon_dec1)
 	veor	q0, q0, q13
 
 	/* load dsbe */
-	add	r4, ip, #(dsbe - .Lconstants)
-	vld1.8	{q8-q9}, [r4 :256]!	/* q8 := dsbe[0], q9 := dsbe[1] */
+	vld1.8	{q8-q9}, [r5 :256]	/* q8 := dsbe[0], q9 := dsbe[1] */
 
 	/* q0 := x(mc) + dsbb_0(io) + dsbb_1(jo) */
 	vtbl.8	d28, {q0}, d30



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:30:08 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Tweak register usage.

- Call r12 by its usual name, ip.
- No need for r7 or r11=fp at the moment.


To generate a diff of this commit:
cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.8 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.9
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.8	Thu Sep 10 11:29:43 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Thu Sep 10 11:30:08 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $")
 
 	.fpu	neon
 
@@ -205,14 +205,14 @@ ENTRY(aes_neon_enc1)
 	vldr	d1, [sp]		/* d1 := x hi */
 	ldr	r1, [sp, #8]		/* r1 := nrounds */
 #endif
-	push	{r4, r5, r6, r7, r8, r10, r11, lr}
+	push	{r4, r5, r6, r8, r10, lr}
 	vpush	{d8-d15}
 
 	/*
 	 * r3: rmod4
 	 * r4: mc_forward
 	 * r5: mc_backward
-	 * r6,r7,r8,r10,r11,r12: temporaries
+	 * r6,r8,r10,ip: temporaries
 	 * q0={d0-d1}: x/ak/A
 	 * q1={d2-d3}: 0x0f0f...
 	 * q2={d4-d5}: lo/k/j/io
@@ -231,32 +231,32 @@ ENTRY(aes_neon_enc1)
 	 * q15={d30-d31}: A2_B/sr[rmod4]
 	 */
 
-	/* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */
-	ldr	r12, .Lconstants_addr
-	adr	r11, .Lconstants_addr
+	/* ip := .Lconstants - .Lconstants_addr, r10 := .Lconstants_addr */
+	ldr	ip, .Lconstants_addr
+	adr	r10, .Lconstants_addr
 
 	vld1.8	{q14}, [r0 :128]!	/* q14 = *rk++ */
 	movw	r3, #0
 	vmov.i8	q1, #0x0f
 
-	/* r12 := .Lconstants */
-	add	r12, r12, r11
+	/* ip := .Lconstants */
+	add	ip, ip, r10
 
 	/* (q4, q5) := (iptlo, ipthi) */
-	add	r6, r12, #(ipt - .Lconstants)
+	add	r6, ip, #(ipt - .Lconstants)
 	vld1.8	{q4-q5}, [r6 :256]
 
 	/* load the rest of the constants */
-	add	r4, r12, #(sb1 - .Lconstants)
-	add	r6, r12, #(sb2 - .Lconstants)
-	add	r8, r12, #(.Linv_inva - .Lconstants)
+	add	r4, ip, #(sb1 - .Lconstants)
+	add	r6, ip, #(sb2 - .Lconstants)
+	add	r8, ip, #(.Linv_inva - .Lconstants)
 	vld1.8	{q6-q7}, [r4 :256]	/* q6 = sb1[0], q7 = sb1[1] */
 	vld1.8	{q8-q9}, [r6 :256]	/* q8 = sb2[0], q9 = sb2[1] */
 	vld1.8	{q10-q11}, [r8 :256]	/* q10 = inv, q11 = inva */
 
 	/* (r4, r5) := (&mc_forward[0], &mc_backward[0]) */
-	add	r4, r12, #(mc_forward - .Lconstants)
-	add	r5, r12, #(mc_backward - .Lconstants)
+	add	r4, ip, #(mc_forward - .Lconstants)
+	add	r5, ip, #(mc_backward - .Lconstants)
 
 	/* (q2, q3) := (lo, hi) */
 	vshr.u8	q3, q0, #4
@@ -295,9 +295,9 @@ ENTRY(aes_neon_enc1)
 
 	/* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */
 	add	r6, r4, r3, lsl #4
-	add	r7, r5, r3, lsl #4
+	add	r8, r5, r3, lsl #4
 	vld1.8	{q12}, [r6 :128]
-	vld1.8	{q13}, [r7 :128]
+	vld1.8	{q13}, [r8 :128]
 
 	/* q15 := A2_B = A2 + A(mcf) */
 	vtbl.8	d30, {q0}, d24
@@ -365,8 +365,8 @@ ENTRY(aes_neon_enc1)
 	bne	1b
 
 	/* (q6, q7, q15) := (sbo[0], sbo[1], sr[rmod4]) */
-	add	r8, r12, #(sr - .Lconstants)
-	add	r6, r12, #(sbo - .Lconstants)
+	add	r8, ip, #(sr - .Lconstants)
+	add	r6, ip, #(sbo - .Lconstants)
 	add	r8, r8, r3, lsl #4
 	vld1.8	{q6-q7}, [r6 :256]
 	vld1.8	{q15}, [r8 :128]
@@ -388,7 +388,7 @@ ENTRY(aes_neon_enc1)
 	vtbl.8	d1, {q2}, d31
 
 	vpop	{d8-d15}
-	pop	{r4, r5, r6, r7, r8, r10, r11, lr}
+	pop	{r4, r5, r6, r8, r10, lr}
 #ifdef __SOFTFP__
 #ifdef __ARM_BIG_ENDIAN
 	vmov	r1, r0, d0
@@ -426,7 +426,7 @@ ENTRY(aes_neon_dec1)
 	vldr	d1, [sp]		/* d1 := x hi */
 	ldr	r1, [sp, #8]		/* r1 := nrounds */
 #endif
-	push	{r4, r5, r6, r7, r8, r10, r11, lr}
+	push	{r4, r5, r6, r8, r10, lr}
 	vpush	{d8-d15}
 
 	/*
@@ -449,26 +449,26 @@ ENTRY(aes_neon_dec1)
 	 * q15={d30-d31}: mc/sr[3 & ~(nrounds - 1)]
 	 */
 
-	/* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */
-	ldr	r12, .Lconstants_addr
-	adr	r11, .Lconstants_addr
+	/* ip := .Lconstants - .Lconstants_addr, r10 := .Lconstants_addr */
+	ldr	ip, .Lconstants_addr
+	adr	r10, .Lconstants_addr
 
 	vld1.8	{q14}, [r0 :128]!	/* q14 = *rk++ */
 	rsb	r3, r1, #0		/* r3 := ~(x - 1) = -x */
 	vmov.i8	q1, #0x0f
 	and	r3, r3, #3		/* r3 := 3 & ~(x - 1) */
 
-	/* r12 := .Lconstants */
-	add	r12, r12, r11
+	/* ip := .Lconstants */
+	add	ip, ip, r10
 
 	/* (q4, q5) := (diptlo, dipthi) */
-	add	r6, r12, #(dipt - .Lconstants)
+	add	r6, ip, #(dipt - .Lconstants)
 	vld1.8	{q4-q5}, [r6 :256]
 
 	/* load the rest of the constants */
-	add	r4, r12, #(dsbb - .Lconstants)
-	add	r6, r12, #(.Linv_inva - .Lconstants)
-	add	r8, r12, #(.Lmc_forward_3 -

CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:29:43 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Write vtbl with {qN} rather than {d(2N)-d(2N+1)}.

Cosmetic; no functional change.


To generate a diff of this commit:
cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.7 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.8
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.7	Thu Sep 10 11:29:02 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Thu Sep 10 11:29:43 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $")
 
 	.fpu	neon
 
@@ -264,10 +264,10 @@ ENTRY(aes_neon_enc1)
 	vand	q3, q3, q1		/* q3 := (x >> 4) & 0x0f0f... */
 
 	/* (q2, q3) := (iptlo(lo), ipthi(hi)) */
-	vtbl.8	d4, {d8-d9}, d4
-	vtbl.8	d5, {d8-d9}, d5
-	vtbl.8	d6, {d10-d11}, d6
-	vtbl.8	d7, {d10-d11}, d7
+	vtbl.8	d4, {q4}, d4
+	vtbl.8	d5, {q4}, d5
+	vtbl.8	d6, {q5}, d6
+	vtbl.8	d7, {q5}, d7
 
 	/* q0 := rk[0] + iptlo(lo) + ipthi(hi) */
 	veor	q0, q14, q2
@@ -279,18 +279,18 @@ ENTRY(aes_neon_enc1)
 1:	vld1.8	{q14}, [r0 :128]!	/* q14 = *rk++ */
 
 	/* q0 := A = rk[i] + sb1_0(io) + sb1_1(jo) */
-	vtbl.8	d24, {d12-d13}, d4
-	vtbl.8	d25, {d12-d13}, d5
-	vtbl.8	d26, {d14-d15}, d6
-	vtbl.8	d27, {d14-d15}, d7
+	vtbl.8	d24, {q6}, d4
+	vtbl.8	d25, {q6}, d5
+	vtbl.8	d26, {q7}, d6
+	vtbl.8	d27, {q7}, d7
 	veor	q0, q14, q12
 	veor	q0, q0, q13
 
 	/* q14 := A2 = sb2_0[io] + sb2_1[jo] */
-	vtbl.8	d24, {d16-d17}, d4
-	vtbl.8	d25, {d16-d17}, d5
-	vtbl.8	d26, {d18-d19}, d6
-	vtbl.8	d27, {d18-d19}, d7
+	vtbl.8	d24, {q8}, d4
+	vtbl.8	d25, {q8}, d5
+	vtbl.8	d26, {q9}, d6
+	vtbl.8	d27, {q9}, d7
 	veor	q14, q12, q13
 
 	/* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */
@@ -300,18 +300,18 @@ ENTRY(aes_neon_enc1)
 	vld1.8	{q13}, [r7 :128]
 
 	/* q15 := A2_B = A2 + A(mcf) */
-	vtbl.8	d30, {d0-d1}, d24
-	vtbl.8	d31, {d0-d1}, d25
+	vtbl.8	d30, {q0}, d24
+	vtbl.8	d31, {q0}, d25
 	veor	q15, q15, q14
 
 	/* q14 := A2_B_D = A2_B + A(mcb) */
-	vtbl.8	d28, {d0-d1}, d26
-	vtbl.8	d29, {d0-d1}, d27
+	vtbl.8	d28, {q0}, d26
+	vtbl.8	d29, {q0}, d27
 	veor	q14, q14, q15
 
 	/* q0 := x = A2_B_D + A2_B(mcf) */
-	vtbl.8	d0, {d30-d31}, d24
-	vtbl.8	d1, {d30-d31}, d25
+	vtbl.8	d0, {q15}, d24
+	vtbl.8	d1, {q15}, d25
 	veor	q0, q0, q14
 
 2:	/*
@@ -324,19 +324,19 @@ ENTRY(aes_neon_enc1)
 	vand	q3, q3, q1		/* q3 := (x >> 4) & 0x0f0f... */
 
 	/* q0 := a/k */
-	vtbl.8	d0, {d22-d23}, d4
-	vtbl.8	d1, {d22-d23}, d5
+	vtbl.8	d0, {q11}, d4
+	vtbl.8	d1, {q11}, d5
 
 	/* q2 := j = i + k */
 	veor	q2, q3, q2
 
 	/* q12 := ir = 1/i */
-	vtbl.8	d24, {d20-d21}, d6
-	vtbl.8	d25, {d20-d21}, d7
+	vtbl.8	d24, {q10}, d6
+	vtbl.8	d25, {q10}, d7
 
 	/* q13 := jr = 1/j */
-	vtbl.8	d26, {d20-d21}, d4
-	vtbl.8	d27, {d20-d21}, d5
+	vtbl.8	d26, {q10}, d4
+	vtbl.8	d27, {q10}, d5
 
 	/* q12 := iak = 1/i + a/k */
 	veor	q12, q12, q0
@@ -345,12 +345,12 @@ ENTRY(aes_neon_enc1)
 	veor	q13, q13, q0
 
 	/* q12 := iakr = 1/(1/i + a/k) */
-	vtbl.8	d24, {d20-d21}, d24
-	vtbl.8	d25, {d20-d21}, d25
+	vtbl.8	d24, {q10}, d24
+	vtbl.8	d25, {q10}, d25
 
 	/* q13 := jakr = 1/(1/j + a/k) */
-	vtbl.8	d26, {d20-d21}, d26
-	vtbl.8	d27, {d20-d21}, d27
+	vtbl.8	d26, {q10}, d26
+	vtbl.8	d27, {q10}, d27
 
 	/* q2 := io = j + 1/(1/i + a/k) */
 	veor	q2, q2, q12
@@ -374,18 +374,18 @@ ENTRY(aes_neon_enc1)
 	vld1.8	{q14}, [r0 :128]!	/* q14 = *rk++ */
 
 	/* (q2, q3) := (sbo_0(io), sbo_1(jo)) */
-	vtbl.8	d4, {d12-d13}, d4
-	vtbl.8	d5, {d12-d13}, d5
-	vtbl.8	d6, {d14-d15}, d6
-	vtbl.8	d7, {d14-d15}, d7
+	vtbl.8	d4, {q6}, d4
+	vtbl.8	d5, {q6}, d5
+	vtbl.8	d6, {q7}, d6
+	vtbl.8	d7, {q7}, d7
 
 	/* q2 := x = rk[nr] + sbo_0(io) + sbo_1(jo) */
 	veor	q2, q2, q14
 	veor	q2, q2, q3
 
 	/* q0 := x(sr[rmod4]) */
-	vtbl.8	d0, {d4-d5}, d30
-	vtbl.8	d1, {d4-d5}, d31
+	vtbl.8	d0, {q2}, d30
+	vtbl.8	d1, {q2}, d31
 
 	vpop	{d8-d15}
 	pop	{r4, r5, r6, r7, r8, r10, r11, lr}
@@ -479,10 +479,10 @@ ENTRY(aes_neon_dec1)
 	vand	q3, q3, q1		/* q3 := (x >> 4) & 0x0f0f... */
 
 	/* (q2, q3) := (diptlo(lo), dipthi(hi)) */
-	vtbl.8	d4, {d8-d9}, d4
-	vtbl.8	d5, {d8-d9}, d5
-	vtbl.8	d6, {d10-d11}, d6
-	vtbl.8	d7, {d10-d11}, d7
+	vtbl.8	d4, {q4}, d4
+	vtbl.8	d5, {q4}, d5
+	vtbl.8	d6, {q5}, d6
+	vtbl.8	d7, {q5}, d7
 
 	/* load dsb9 */
 	add	r4, r12, #(dsb9 - .Lconstants)
@@ -502,22 +502,22 @@ ENTRY(aes_neon_dec1)
 	vld1.8	{q14}, [r0 :128]!	/* q14 = *rk++ */
 
 	/* q0 := rk[i] + dsb9_0(io) + dsb9_1(jo) 

CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:29:02 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Issue 256-bit loads rather than pairs of 128-bit loads.

Not sure why I didn't realize you could do this before!

Saves some temporary registers that can now be allocated to shave off
a few cycles.


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.6 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.7
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.6	Sun Aug 16 18:02:03 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Thu Sep 10 11:29:02 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $")
 
 	.fpu	neon
 
@@ -38,9 +38,10 @@ RCSID("$NetBSD: aes_neon_32.S,v 1.6 2020
 	.long	.Lconstants - .
 
 	.section .rodata
-	.p2align 4
+	.p2align 5
 .Lconstants:
 
+.Linv_inva:	/* inv and inva must be consecutive */
 	.type	inv,_ASM_TYPE_OBJECT
 inv:
 	.byte	0x80,0x01,0x08,0x0D,0x0F,0x06,0x05,0x0E
@@ -99,125 +100,85 @@ sr:
 	.byte	0x08,0x05,0x02,0x0F,0x0C,0x09,0x06,0x03
 END(sr)
 
-	.type	iptlo,_ASM_TYPE_OBJECT
-iptlo:
-	.byte	0x00,0x70,0x2A,0x5A,0x98,0xE8,0xB2,0xC2
+	.type	ipt,_ASM_TYPE_OBJECT
+ipt:
+	.byte	0x00,0x70,0x2A,0x5A,0x98,0xE8,0xB2,0xC2	/* lo */
 	.byte	0x08,0x78,0x22,0x52,0x90,0xE0,0xBA,0xCA
-END(iptlo)
-
-	.type	ipthi,_ASM_TYPE_OBJECT
-ipthi:
-	.byte	0x00,0x4D,0x7C,0x31,0x7D,0x30,0x01,0x4C
+	.byte	0x00,0x4D,0x7C,0x31,0x7D,0x30,0x01,0x4C /* hi */
 	.byte	0x81,0xCC,0xFD,0xB0,0xFC,0xB1,0x80,0xCD
-END(ipthi)
+END(ipt)
 
-	.type	sb1_0,_ASM_TYPE_OBJECT
-sb1_0:
-	.byte	0x00,0x3E,0x50,0xCB,0x8F,0xE1,0x9B,0xB1
+	.type	sb1,_ASM_TYPE_OBJECT
+sb1:
+	.byte	0x00,0x3E,0x50,0xCB,0x8F,0xE1,0x9B,0xB1 /* 0 */
 	.byte	0x44,0xF5,0x2A,0x14,0x6E,0x7A,0xDF,0xA5
-END(sb1_0)
-
-	.type	sb1_1,_ASM_TYPE_OBJECT
-sb1_1:
-	.byte	0x00,0x23,0xE2,0xFA,0x15,0xD4,0x18,0x36
+	.byte	0x00,0x23,0xE2,0xFA,0x15,0xD4,0x18,0x36 /* 1 */
 	.byte	0xEF,0xD9,0x2E,0x0D,0xC1,0xCC,0xF7,0x3B
-END(sb1_1)
+END(sb1)
 
-	.type	sb2_0,_ASM_TYPE_OBJECT
-sb2_0:
-	.byte	0x00,0x24,0x71,0x0B,0xC6,0x93,0x7A,0xE2
+	.type	sb2,_ASM_TYPE_OBJECT
+sb2:
+	.byte	0x00,0x24,0x71,0x0B,0xC6,0x93,0x7A,0xE2 /* 0 */
 	.byte	0xCD,0x2F,0x98,0xBC,0x55,0xE9,0xB7,0x5E
-END(sb2_0)
-
-	.type	sb2_1,_ASM_TYPE_OBJECT
-sb2_1:
-	.byte	0x00,0x29,0xE1,0x0A,0x40,0x88,0xEB,0x69
+	.byte	0x00,0x29,0xE1,0x0A,0x40,0x88,0xEB,0x69 /* 1 */
 	.byte	0x4A,0x23,0x82,0xAB,0xC8,0x63,0xA1,0xC2
-END(sb2_1)
+END(sb2)
 
-	.type	sbo_0,_ASM_TYPE_OBJECT
-sbo_0:
-	.byte	0x00,0xC7,0xBD,0x6F,0x17,0x6D,0xD2,0xD0
+	.type	sbo,_ASM_TYPE_OBJECT
+sbo:
+	.byte	0x00,0xC7,0xBD,0x6F,0x17,0x6D,0xD2,0xD0 /* 0 */
 	.byte	0x78,0xA8,0x02,0xC5,0x7A,0xBF,0xAA,0x15
-END(sbo_0)
-
-	.type	sbo_1,_ASM_TYPE_OBJECT
-sbo_1:
-	.byte	0x00,0x6A,0xBB,0x5F,0xA5,0x74,0xE4,0xCF
+	.byte	0x00,0x6A,0xBB,0x5F,0xA5,0x74,0xE4,0xCF /* 1 */
 	.byte	0xFA,0x35,0x2B,0x41,0xD1,0x90,0x1E,0x8E
-END(sbo_1)
+END(sbo)
 
-	.type	diptlo,_ASM_TYPE_OBJECT
-diptlo:
-	.byte	0x00,0x5F,0x54,0x0B,0x04,0x5B,0x50,0x0F
+	.type	dipt,_ASM_TYPE_OBJECT
+dipt:
+	.byte	0x00,0x5F,0x54,0x0B,0x04,0x5B,0x50,0x0F	/* lo */
 	.byte	0x1A,0x45,0x4E,0x11,0x1E,0x41,0x4A,0x15
-END(diptlo)
-
-	.type	dipthi,_ASM_TYPE_OBJECT
-dipthi:
-	.byte	0x00,0x65,0x05,0x60,0xE6,0x83,0xE3,0x86
+	.byte	0x00,0x65,0x05,0x60,0xE6,0x83,0xE3,0x86	/* hi */
 	.byte	0x94,0xF1,0x91,0xF4,0x72,0x17,0x77,0x12
-END(dipthi)
+END(dipt)
 
-	.type	dsb9_0,_ASM_TYPE_OBJECT
-dsb9_0:
-	.byte	0x00,0xD6,0x86,0x9A,0x53,0x03,0x1C,0x85
+	.type	dsb9,_ASM_TYPE_OBJECT
+dsb9:
+	.byte	0x00,0xD6,0x86,0x9A,0x53,0x03,0x1C,0x85	/* 0 */
 	.byte	0xC9,0x4C,0x99,0x4F,0x50,0x1F,0xD5,0xCA
-END(dsb9_0)
-
-	.type	dsb9_1,_ASM_TYPE_OBJECT
-dsb9_1:
-	.byte	0x00,0x49,0xD7,0xEC,0x89,0x17,0x3B,0xC0
+	.byte	0x00,0x49,0xD7,0xEC,0x89,0x17,0x3B,0xC0	/* 1 */
 	.byte	0x65,0xA5,0xFB,0xB2,0x9E,0x2C,0x5E,0x72
-END(dsb9_1)
+END(dsb9)
 
-	.type	dsbd_0,_ASM_TYPE_OBJECT
-dsbd_0:
-	.byte	0x00,0xA2,0xB1,0xE6,0xDF,0xCC,0x57,0x7D
+	.type	dsbd,_ASM_TYPE_OBJECT
+dsbd:
+	.byte	0x00,0xA2,0xB1,0xE6,0xDF,0xCC,0x57,0x7D	/* 0 */
 	.byte	0x39,0x44,0x2A,0x88,0x13,0x9B,0x6E,0xF5
-END(dsbd_0)
-
-	.type	dsbd_1,_ASM_TYPE_OBJECT
-dsbd_1:
-	.byte	0x00,0xCB,0xC6,0x24,0xF7,0xFA,0xE2,0x3C
+	.byte	0x00,0xCB,0xC6,0x24,0xF7,0xFA,0xE2,0x3C	/* 1 */
 	.byte	0xD3,0xEF,0xDE,0x15,0x0D,0x18,0x31,0x29
-END(dsbd_1)
+END(dsbd)
 
-	.type	dsbb_0,_ASM_TYPE_OBJECT
-dsbb_0:
-	.byte	0x00,0x42,0xB4,0x96,0x92,0x64,0x22,0xD0

CVS commit: src/sys/crypto/aes/arch/arm

2020-09-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Sep  8 23:58:09 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
aesarmv8: Reallocate registers to shave off unnecessary MOV.


To generate a diff of this commit:
cvs rdiff -u -r1.14 -r1.15 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.14 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.15
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.14	Tue Sep  8 23:57:43 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Tue Sep  8 23:58:09 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.15 2020/09/08 23:58:09 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $")
+RCSID("$NetBSD: aes_armv8_64.S,v 1.15 2020/09/08 23:58:09 riastradh Exp $")
 
 	.arch_extension	aes
 
@@ -917,13 +917,12 @@ END(aesarmv8_cbcmac_update1)
 ENTRY(aesarmv8_ccm_enc1)
 	stp	fp, lr, [sp, #-16]!	/* push stack frame */
 	mov	fp, sp
-	ld1	{v0.16b, v1.16b}, [x4]	/* q0 := auth, q2 := ctr (be) */
-	mov	v2.16b, v1.16b
+	ld1	{v0.16b-v1.16b}, [x4]	/* q0 := auth, q1 := ctr (be) */
 	adrl	x11, ctr32_inc		/* x11 := &ctr32_inc */
 	ld1	{v5.4s}, [x11]		/* q5 := (0,0,0,1) (host-endian) */
 	mov	x9, x0			/* x9 := enckey */
 	mov	x10, x3			/* x10 := nbytes */
-	rev32	v2.16b, v2.16b		/* q2 := ctr (host-endian) */
+	rev32	v2.16b, v1.16b		/* q2 := ctr (host-endian) */
 	_ALIGN_TEXT
 1:	ld1	{v3.16b}, [x1], #0x10	/* q3 := plaintext block */
 	add	v2.4s, v2.4s, v5.4s	/* increment ctr (32-bit) */
@@ -937,9 +936,8 @@ ENTRY(aesarmv8_ccm_enc1)
 	subs	x10, x10, #0x10		/* count down bytes */
 	st1	{v3.16b}, [x2], #0x10	/* store ciphertext block */
 	b.ne	1b			/* repeat if more blocks */
-	rev32	v2.16b, v2.16b		/* q2 := ctr (big-endian) */
-	mov	v1.16b, v2.16b		/* store updated auth/ctr */
-	st1	{v0.16b-v1.16b}, [x4]
+	rev32	v1.16b, v2.16b		/* q1 := ctr (big-endian) */
+	st1	{v0.16b-v1.16b}, [x4]	/* store updated auth/ctr */
 	ldp	fp, lr, [sp], #16	/* pop stack frame */
 	ret
 END(aesarmv8_ccm_enc1)



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Sep  8 23:57:43 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
aesarmv8: Issue two 4-register ld/st, not four 2-register ld/st.


To generate a diff of this commit:
cvs rdiff -u -r1.13 -r1.14 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.13 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.14
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.13	Tue Sep  8 23:57:13 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Tue Sep  8 23:57:43 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $")
+RCSID("$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $")
 
 	.arch_extension	aes
 
@@ -693,10 +693,8 @@ ENTRY(aesarmv8_xts_enc8)
 	mov	v30.16b, v31.16b	/* q30 := tweak[6] */
 	bl	aesarmv8_xts_mulx	/* q31 *= x; trash x0/q0/q1 */
 	/* q31 := tweak[7] */
-	ld1	{v0.16b,v1.16b}, [x1], #0x20	/* q[i] := ptxt[i] */
-	ld1	{v2.16b,v3.16b}, [x1], #0x20
-	ld1	{v4.16b,v5.16b}, [x1], #0x20
-	ld1	{v6.16b,v7.16b}, [x1], #0x20
+	ld1	{v0.16b-v3.16b}, [x1], #0x40	/* q[i] := ptxt[i] */
+	ld1	{v4.16b-v7.16b}, [x1], #0x40
 	eor	v0.16b, v0.16b, v24.16b	/* q[i] := ptxt[i] ^ tweak[i] */
 	eor	v1.16b, v1.16b, v25.16b
 	eor	v2.16b, v2.16b, v26.16b
@@ -716,10 +714,8 @@ ENTRY(aesarmv8_xts_enc8)
 	eor	v5.16b, v5.16b, v29.16b
 	eor	v6.16b, v6.16b, v30.16b
 	eor	v7.16b, v7.16b, v31.16b
-	st1	{v0.16b,v1.16b}, [x2], #0x20	/* store ciphertext blocks */
-	st1	{v2.16b,v3.16b}, [x2], #0x20
-	st1	{v4.16b,v5.16b}, [x2], #0x20
-	st1	{v6.16b,v7.16b}, [x2], #0x20
+	st1	{v0.16b-v3.16b}, [x2], #0x40	/* store ciphertext blocks */
+	st1	{v4.16b-v7.16b}, [x2], #0x40
 	bl	aesarmv8_xts_mulx	/* q31 *= x; trash x0/q0/q1 */
 	subs	x10, x10, #0x80		/* count down nbytes */
 	b.ne	1b			/* repeat if more block groups */



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Sep  8 23:57:13 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
aesarmv8: Adapt aes_armv8_64.S to big-endian.

Patch mainly from (and tested by) jakllsch@ with minor tweaks by me.


To generate a diff of this commit:
cvs rdiff -u -r1.12 -r1.13 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.12 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.13
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.12	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Tue Sep  8 23:57:13 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $")
+RCSID("$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $")
 
 	.arch_extension	aes
 
@@ -114,11 +114,11 @@ END(unshiftrows_rotword_3)
  *	Standard ABI calling convention.
  */
 ENTRY(aesarmv8_setenckey128)
-	ldr	q1, [x1]	/* q1 := master key */
+	ld1	{v1.16b}, [x1]	/* q1 := master key */
 
 	adrl	x4, unshiftrows_rotword_3
 	eor	v0.16b, v0.16b, v0.16b	/* q0 := 0 */
-	ldr	q16, [x4]	/* q16 := unshiftrows_rotword_3 table */
+	ld1	{v16.16b}, [x4]	/* q16 := unshiftrows_rotword_3 table */
 
 	str	q1, [x0], #0x10	/* store master key as first round key */
 	mov	x2, #10		/* round count */
@@ -171,14 +171,14 @@ END(aesarmv8_setenckey128)
  *	Standard ABI calling convention.
  */
 ENTRY(aesarmv8_setenckey192)
-	ldr	q1, [x1], #0x10	/* q1 := master key[0:128) */
-	ldr	d2, [x1]	/* d2 := master key[128:192) */
+	ld1	{v1.16b}, [x1], #0x10	/* q1 := master key[0:128) */
+	ld1	{v2.8b}, [x1]	/* d2 := master key[128:192) */
 
 	adrl	x4, unshiftrows_rotword_1
 	adrl	x5, unshiftrows_rotword_3
 	eor	v0.16b, v0.16b, v0.16b	/* q0 := 0 */
-	ldr	q16, [x4]	/* q16 := unshiftrows_rotword_1 */
-	ldr	q17, [x5]	/* q17 := unshiftrows_rotword_3 */
+	ld1	{v16.16b}, [x4]	/* q16 := unshiftrows_rotword_1 */
+	ld1	{v17.16b}, [x5]	/* q17 := unshiftrows_rotword_3 */
 
 	str	q1, [x0], #0x10	/* store master key[0:128) as round key */
 	mov	x2, #12		/* round count */
@@ -351,13 +351,13 @@ END(aesarmv8_setenckey192)
  */
 ENTRY(aesarmv8_setenckey256)
 	/* q1 := key[0:128), q2 := key[128:256) */
-	ldp	q1, q2, [x1], #0x20
+	ld1	{v1.16b-v2.16b}, [x1], #0x20
 
 	adrl	x4, unshiftrows_rotword_3
 	adrl	x5, unshiftrows_3
 	eor	v0.16b, v0.16b, v0.16b	/* q0 := 0 */
-	ldr	q16, [x4]	/* q16 := unshiftrows_rotword_3 */
-	ldr	q17, [x5]	/* q17 := unshiftrows_3 */
+	ld1	{v16.16b}, [x4]	/* q16 := unshiftrows_rotword_3 */
+	ld1	{v17.16b}, [x5]	/* q17 := unshiftrows_3 */
 
 	/* store master key as first two round keys */
 	stp	q1, q2, [x0], #0x20
@@ -461,9 +461,9 @@ END(aesarmv8_enctodec)
 ENTRY(aesarmv8_enc)
 	stp	fp, lr, [sp, #-16]!	/* push stack frame */
 	mov	fp, sp
-	ldr	q0, [x1]	/* q0 := ptxt */
+	ld1	{v0.16b}, [x1]	/* q0 := ptxt */
 	bl	aesarmv8_enc1	/* q0 := ctxt; trash x0/x3/q16 */
-	str	q0, [x2]	/* store ctxt */
+	st1	{v0.16b}, [x2]	/* store ctxt */
 	ldp	fp, lr, [sp], #16	/* pop stack frame */
 	ret
 END(aesarmv8_enc)
@@ -479,9 +479,9 @@ END(aesarmv8_enc)
 ENTRY(aesarmv8_dec)
 	stp	fp, lr, [sp, #-16]!	/* push stack frame */
 	mov	fp, sp
-	ldr	q0, [x1]	/* q0 := ctxt */
+	ld1	{v0.16b}, [x1]	/* q0 := ctxt */
 	bl	aesarmv8_dec1	/* q0 := ptxt; trash x0/x3/q16 */
-	str	q0, [x2]	/* store ptxt */
+	st1	{v0.16b}, [x2]	/* store ptxt */
 	ldp	fp, lr, [sp], #16	/* pop stack frame */
 	ret
 END(aesarmv8_dec)
@@ -503,17 +503,17 @@ ENTRY(aesarmv8_cbc_enc)
 	mov	fp, sp
 	mov	x9, x0			/* x9 := enckey */
 	mov	x10, x3			/* x10 := nbytes */
-	ldr	q0, [x4]		/* q0 := chaining value */
+	ld1	{v0.16b}, [x4]		/* q0 := chaining value */
 	_ALIGN_TEXT
-1:	ldr	q1, [x1], #0x10		/* q1 := plaintext block */
+1:	ld1	{v1.16b}, [x1], #0x10	/* q1 := plaintext block */
 	eor	v0.16b, v0.16b, v1.16b	/* q0 := cv ^ ptxt */
 	mov	x0, x9			/* x0 := enckey */
 	mov	x3, x5			/* x3 := nrounds */
 	bl	aesarmv8_enc1		/* q0 := ctxt; trash x0/x3/q16 */
 	subs	x10, x10, #0x10		/* count down nbytes */
-	str	q0, [x2], #0x10		/* store ciphertext block */
+	st1	{v0.16b}, [x2], #0x10	/* store ciphertext block */
 	b.ne	1b			/* repeat if x10 is nonzero */
-	str	q0, [x4]		/* store chaining value */
+	st1	{v0.16b}, [x4]		/* store chaining value */
 	ldp	fp, lr, [sp], #16	/* pop stack frame */
 2:	ret
 END(aesarmv8_cbc_enc)
@@ -533,18 +533,21 @@ END(aesarmv8_cbc_enc)
 ENTRY(aesarmv8_cbc_dec1)
 	stp	fp, lr, [sp, #-16]!	/* push stack frame */
 	mov	fp, sp
-	ldr	q24, [x4]		/* q24 := iv */
+	ld1	{v24.16b}, [x4]		/* q24 := iv */
 	mov	x9, x0			/* x9 := enckey */
 	mov

CVS commit: src/sys/crypto/aes

2020-09-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Sep  8 22:48:24 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_selftest.c
src/sys/crypto/aes/arch/x86: aes_sse2_subr.c

Log Message:
aes(9): Fix edge case in bitsliced SSE2 AES-CBC decryption.

Make sure self-tests exercise this edge case.

Discovered by confusion over code inspection of jak's adaptation of
aes_armv8_64.S for big-endian.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/aes_selftest.c
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_selftest.c
diff -u src/sys/crypto/aes/aes_selftest.c:1.5 src/sys/crypto/aes/aes_selftest.c:1.6
--- src/sys/crypto/aes/aes_selftest.c:1.5	Sat Jul 25 22:36:42 2020
+++ src/sys/crypto/aes/aes_selftest.c	Tue Sep  8 22:48:24 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_selftest.c,v 1.5 2020/07/25 22:36:42 riastradh Exp $	*/
+/*	$NetBSD: aes_selftest.c,v 1.6 2020/09/08 22:48:24 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.5 2020/07/25 22:36:42 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.6 2020/09/08 22:48:24 riastradh Exp $");
 
 #ifdef _KERNEL
 
@@ -210,7 +210,7 @@ aes_selftest_encdec_cbc(const struct aes
 	uint8_t in[144];
 	uint8_t outbuf[146] = { [0] = 0x1a, [145] = 0x1a }, *out = outbuf + 1;
 	uint8_t iv0[16], iv[16];
-	unsigned i;
+	unsigned i, j;
 
 	for (i = 0; i < 32; i++)
 		key[i] = i;
@@ -237,21 +237,26 @@ aes_selftest_encdec_cbc(const struct aes
 			"AES-%u-CBC dec", aes_keybits[i]);
 
 		/* Try incrementally, with IV update.  */
-		memcpy(iv, iv0, 16);
-		impl->ai_cbc_enc(&enc, in, out, 16, iv, aes_nrounds[i]);
-		impl->ai_cbc_enc(&enc, in + 16, out + 16, 128, iv,
-		aes_nrounds[i]);
-		if (memcmp(out, expected[i], 144))
-			return aes_selftest_fail(impl, out, expected[i], 144,
-			"AES-%u-CBC enc incremental", aes_keybits[i]);
-
-		memcpy(iv, iv0, 16);
-		impl->ai_cbc_dec(&dec, out, out, 128, iv, aes_nrounds[i]);
-		impl->ai_cbc_dec(&dec, out + 128, out + 128, 16, iv,
-		aes_nrounds[i]);
-		if (memcmp(out, in, 144))
-			return aes_selftest_fail(impl, out, in, 144,
-			"AES-%u-CBC dec incremental", aes_keybits[i]);
+		for (j = 0; j < 144; j += 16) {
+			memcpy(iv, iv0, 16);
+			impl->ai_cbc_enc(&enc, in, out, j, iv, aes_nrounds[i]);
+			impl->ai_cbc_enc(&enc, in + j, out + j, 144 - j, iv,
+			aes_nrounds[i]);
+			if (memcmp(out, expected[i], 144))
+return aes_selftest_fail(impl, out,
+expected[i], 144, "AES-%u-CBC enc inc %u",
+aes_keybits[i], j);
+
+			memcpy(iv, iv0, 16);
+			impl->ai_cbc_dec(&dec, out, out, j, iv,
+			aes_nrounds[i]);
+			impl->ai_cbc_dec(&dec, out + j, out + j, 144 - j, iv,
+			aes_nrounds[i]);
+			if (memcmp(out, in, 144))
+return aes_selftest_fail(impl, out,
+in, 144, "AES-%u-CBC dec inc %u",
+aes_keybits[i], j);
+		}
 	}
 
 	if (outbuf[0] != 0x1a)

Index: src/sys/crypto/aes/arch/x86/aes_sse2_subr.c
diff -u src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.3 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.4
--- src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.3	Sat Jul 25 22:29:56 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2_subr.c	Tue Sep  8 22:48:24 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2_subr.c,v 1.4 2020/09/08 22:48:24 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.4 2020/09/08 22:48:24 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -200,11 +200,13 @@ aes_sse2_cbc_dec(const struct aesdec *de
 		case 48:
 			w = _mm_loadu_epi8(in + nbytes - 32);
 			q[1] = aes_sse2_interleave_in(w);
-			/*FALLTHROUGH*/
-		case 32:
 			w = _mm_loadu_epi8(in + nbytes - 48);
 			q[0] = aes_sse2_interleave_in(w);
-			/*FALLTHROUGH*/
+			break;
+		case 32:
+			w = _mm_loadu_epi8(in + nbytes - 32);
+			q[0] = aes_sse2_interleave_in(w);
+			break;
 		case 16:
 			break;
 		}



CVS commit: src/sys/crypto/aes/arch/arm

2020-08-16 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug 16 18:02:03 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S files.aesneon

Log Message:
Fix AES NEON code for big-endian softfp ARM.

...which is how the kernel runs.  Switch to using __SOFTFP__ for
consistency with how it gets exposed to C, although I'm not sure how
to get it defined automagically in the toolchain for .S files so
that's set manually in files.aesneon for now.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon_32.S
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/files.aesneon

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.5 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.6
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.5	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Sun Aug 16 18:02:03 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.5 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.5 2020/08/08 14:47:01 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $")
 
 	.fpu	neon
 
@@ -228,15 +228,19 @@ END(dsbo_1)
  * aes_neon_enc1(const struct aesenc *enc@r0, uint8x16_t x@q0,
  * unsigned nrounds@r1)
  *
- *	With -mfloat-abi=soft(fp) (here spelled `#ifdef _KERNEL'):
+ *	With -mfloat-abi=soft(fp) (i.e., __SOFTFP__):
  *
  * uint8x16_t@(r0,r1,r2,r3)
  * aes_neon_enc1(const struct aesenc *enc@r0,
  * uint8x16_t x@(r2,r3,sp[0],sp[4]), nrounds@sp[8])
  */
 ENTRY(aes_neon_enc1)
-#ifdef _KERNEL
+#ifdef __SOFTFP__
+#ifdef __ARM_BIG_ENDIAN
+	vmov	d0, r3, r2		/* d0 := x lo */
+#else
 	vmov	d0, r2, r3		/* d0 := x lo */
+#endif
 	vldr	d1, [sp]		/* d1 := x hi */
 	ldr	r1, [sp, #8]		/* r1 := nrounds */
 #endif
@@ -434,10 +438,15 @@ ENTRY(aes_neon_enc1)
 
 	vpop	{d8-d15}
 	pop	{r4, r5, r6, r7, r8, r10, r11, lr}
-#ifdef _KERNEL
+#ifdef __SOFTFP__
+#ifdef __ARM_BIG_ENDIAN
+	vmov	r1, r0, d0
+	vmov	r3, r2, d1
+#else
 	vmov	r0, r1, d0
 	vmov	r2, r3, d1
 #endif
+#endif
 	bx	lr
 END(aes_neon_enc1)
 
@@ -457,8 +466,12 @@ END(aes_neon_enc1)
  * uint8x16_t x@(r2,r3,sp[0],sp[4]), nrounds@sp[8])
  */
 ENTRY(aes_neon_dec1)
-#ifdef _KERNEL
+#ifdef __SOFTFP__
+#ifdef __ARM_BIG_ENDIAN
+	vmov	d0, r3, r2		/* d0 := x lo */
+#else
 	vmov	d0, r2, r3		/* d0 := x lo */
+#endif
 	vldr	d1, [sp]		/* d1 := x hi */
 	ldr	r1, [sp, #8]		/* r1 := nrounds */
 #endif
@@ -669,9 +682,14 @@ ENTRY(aes_neon_dec1)
 
 	vpop	{d8-d15}
 	pop	{r4, r5, r6, r7, r8, r10, r11, lr}
-#ifdef _KERNEL
+#ifdef __SOFTFP__
+#ifdef __ARM_BIG_ENDIAN
+	vmov	r1, r0, d0
+	vmov	r3, r2, d1
+#else
 	vmov	r0, r1, d0
 	vmov	r2, r3, d1
 #endif
+#endif
 	bx	lr
 END(aes_neon_dec1)

Index: src/sys/crypto/aes/arch/arm/files.aesneon
diff -u src/sys/crypto/aes/arch/arm/files.aesneon:1.3 src/sys/crypto/aes/arch/arm/files.aesneon:1.4
--- src/sys/crypto/aes/arch/arm/files.aesneon:1.3	Tue Jun 30 17:03:13 2020
+++ src/sys/crypto/aes/arch/arm/files.aesneon	Sun Aug 16 18:02:03 2020
@@ -1,4 +1,4 @@
-#	$NetBSD: files.aesneon,v 1.3 2020/06/30 17:03:13 riastradh Exp $
+#	$NetBSD: files.aesneon,v 1.4 2020/08/16 18:02:03 riastradh Exp $
 
 ifdef aarch64
 makeoptions	aes	"COPTS.aes_neon.c"+="-march=armv8-a"
@@ -8,6 +8,8 @@ makeoptions	aes	"COPTS.aes_neon.c"+="-mf
 makeoptions	aes	"COPTS.aes_neon_subr.c"+="-mfloat-abi=softfp -mfpu=neon"
 endif
 
+makeoptions	aes	"AOPTS.aes_neon_32.S"+="-D__SOFTFP__"
+
 file	crypto/aes/arch/arm/aes_neon.c		aes & (cpu_cortex | aarch64)
 file	crypto/aes/arch/arm/aes_neon_impl.c	aes & (cpu_cortex | aarch64)
 file	crypto/aes/arch/arm/aes_neon_subr.c	aes & (cpu_cortex | aarch64)



CVS commit: src/sys/crypto/aes

2020-08-09 Thread Rin Okuyama
Module Name:src
Committed By:   rin
Date:   Mon Aug 10 06:27:29 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_ccm.c

Log Message:
Add hack to compile aes_ccm_tag() with -O0 for m68k for GCC8.

GCC 8 miscompiles aes_ccm_tag() for m68k with optimization level -O[12],
which results in failure in aes_ccm_selftest():

| aes_ccm_selftest: tag 0: 8 bytes @ 0x4d3e38
| 03 80 5f 08 22 6f cb fe  | .._."o..
| aes_ccm_selftest: verify 0 failed
| ...
| WARNING: module error: built-in module aes_ccm failed its MODULE_CMD_INIT, 
error 5

This is observed for amiga (A1200, 68060), mac68k (Quadra 840AV, 68040),
and luna68k (nono, 68030 emulator). However, it is not for sun3 (TME, 68020
emulator) and sun2 (TME, 68010 emulator). At the moment, it is unclear
whether this is due to differences b/w 68010-20 vs 68030-60, or something
wrong with TME.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/aes_ccm.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_ccm.c
diff -u src/sys/crypto/aes/aes_ccm.c:1.4 src/sys/crypto/aes/aes_ccm.c:1.5
--- src/sys/crypto/aes/aes_ccm.c:1.4	Mon Jul 27 20:44:30 2020
+++ src/sys/crypto/aes/aes_ccm.c	Mon Aug 10 06:27:29 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ccm.c,v 1.4 2020/07/27 20:44:30 riastradh Exp $	*/
+/*	$NetBSD: aes_ccm.c,v 1.5 2020/08/10 06:27:29 rin Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.4 2020/07/27 20:44:30 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.5 2020/08/10 06:27:29 rin Exp $");
 
 #include 
 #include 
@@ -301,6 +301,9 @@ aes_ccm_dec(struct aes_ccm *C, const voi
 }
 
 void
+#if defined(__m68k__) && __GNUC_PREREQ__(8, 0)
+__attribute__((__optimize__("O0")))
+#endif
 aes_ccm_tag(struct aes_ccm *C, void *out)
 {
 	uint8_t *auth = C->authctr;



CVS commit: src/sys/crypto/aes/arch/arm

2020-08-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug  9 02:00:57 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_subr.c

Log Message:
Nix outdated comment.

I implemented this parallelism a couple weeks ago.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_subr.c
diff -u src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.5 src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.6
--- src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.5	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_subr.c	Sun Aug  9 02:00:57 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_subr.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_subr.c,v 1.6 2020/08/09 02:00:57 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.6 2020/08/09 02:00:57 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -287,12 +287,6 @@ aes_neon_cbcmac_update1(const struct aes
 	storeblock(auth0, auth);
 }
 
-/*
- * XXX On aarch64, we have enough registers that we should be able to
- * pipeline two simultaneous vpaes computations in an `aes_neon_enc2'
- * function, which should substantially improve CCM throughput.
- */
-
 void
 aes_neon_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
 uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],



CVS commit: src/sys/crypto/aes/arch/arm

2020-07-28 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jul 28 20:11:09 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon.c aes_neon_impl.h aes_neon_subr.c
arm_neon.h

Log Message:
Draft 2x vectorized neon vpaes for aarch64.

Gives a modest speed boost on rk3399 (Cortex-A53/A72), around 20% in
cgd tests, for parallelizable operations like CBC decryption; same
improvement should probably carry over to rpi4 CPU which lacks
ARMv8.0-AES.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon.c \
src/sys/crypto/aes/arch/arm/aes_neon_subr.c
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon_impl.h
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon.c
diff -u src/sys/crypto/aes/arch/arm/aes_neon.c:1.3 src/sys/crypto/aes/arch/arm/aes_neon.c:1.4
--- src/sys/crypto/aes/arch/arm/aes_neon.c:1.3	Tue Jun 30 20:32:11 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon.c	Tue Jul 28 20:11:09 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon.c,v 1.3 2020/06/30 20:32:11 riastradh Exp $	*/
+/*	$NetBSD: aes_neon.c,v 1.4 2020/07/28 20:11:09 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -39,7 +39,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.3 2020/06/30 20:32:11 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.4 2020/07/28 20:11:09 riastradh Exp $");
 
 #include 
 
@@ -589,6 +589,59 @@ aes_neon_enc1(const struct aesenc *enc, 
 	return vqtbl1q_u8(x, sr[rmod4]);
 }
 
+uint8x16x2_t
+aes_neon_enc2(const struct aesenc *enc, uint8x16x2_t x, unsigned nrounds)
+{
+	const uint32_t *rk32 = enc->aese_aes.aes_rk;
+	uint8x16_t inv_ = *(const volatile uint8x16_t *)&inv;
+	uint8x16_t inva_ = *(const volatile uint8x16_t *)&inva;
+	uint8x16_t sb1_0 = ((const volatile uint8x16_t *)sb1)[0];
+	uint8x16_t sb1_1 = ((const volatile uint8x16_t *)sb1)[1];
+	uint8x16_t sb2_0 = ((const volatile uint8x16_t *)sb2)[0];
+	uint8x16_t sb2_1 = ((const volatile uint8x16_t *)sb2)[1];
+	uint8x16_t x0 = x.val[0], x1 = x.val[1];
+	uint8x16_t io0, jo0, io1, jo1;
+	unsigned rmod4 = 0;
+
+	x0 = aes_schedule_transform(x0, ipt);
+	x1 = aes_schedule_transform(x1, ipt);
+	x0 ^= loadroundkey(rk32);
+	x1 ^= loadroundkey(rk32);
+	for (;;) {
+		uint8x16_t A_0, A2_0, A2_B_0, A2_B_D_0;
+		uint8x16_t A_1, A2_1, A2_B_1, A2_B_D_1;
+
+		subbytes(&io0, &jo0, x0, inv_, inva_);
+		subbytes(&io1, &jo1, x1, inv_, inva_);
+
+		rk32 += 4;
+		rmod4 = (rmod4 + 1) % 4;
+		if (--nrounds == 0)
+			break;
+
+		A_0 = vqtbl1q_u8(sb1_0, io0) ^ vqtbl1q_u8(sb1_1, jo0);
+		A_1 = vqtbl1q_u8(sb1_0, io1) ^ vqtbl1q_u8(sb1_1, jo1);
+		A_0 ^= loadroundkey(rk32);
+		A_1 ^= loadroundkey(rk32);
+		A2_0 = vqtbl1q_u8(sb2_0, io0) ^ vqtbl1q_u8(sb2_1, jo0);
+		A2_1 = vqtbl1q_u8(sb2_0, io1) ^ vqtbl1q_u8(sb2_1, jo1);
+		A2_B_0 = A2_0 ^ vqtbl1q_u8(A_0, mc_forward[rmod4]);
+		A2_B_1 = A2_1 ^ vqtbl1q_u8(A_1, mc_forward[rmod4]);
+		A2_B_D_0 = A2_B_0 ^ vqtbl1q_u8(A_0, mc_backward[rmod4]);
+		A2_B_D_1 = A2_B_1 ^ vqtbl1q_u8(A_1, mc_backward[rmod4]);
+		x0 = A2_B_D_0 ^ vqtbl1q_u8(A2_B_0, mc_forward[rmod4]);
+		x1 = A2_B_D_1 ^ vqtbl1q_u8(A2_B_1, mc_forward[rmod4]);
+	}
+	x0 = vqtbl1q_u8(sbo[0], io0) ^ vqtbl1q_u8(sbo[1], jo0);
+	x1 = vqtbl1q_u8(sbo[0], io1) ^ vqtbl1q_u8(sbo[1], jo1);
+	x0 ^= loadroundkey(rk32);
+	x1 ^= loadroundkey(rk32);
+	return (uint8x16x2_t) { .val = {
+		[0] = vqtbl1q_u8(x0, sr[rmod4]),
+		[1] = vqtbl1q_u8(x1, sr[rmod4]),
+	} };
+}
+
 uint8x16_t
 aes_neon_dec1(const struct aesdec *dec, uint8x16_t x, unsigned nrounds)
 {
@@ -628,4 +681,60 @@ aes_neon_dec1(const struct aesdec *dec, 
 	return vqtbl1q_u8(x, sr[i]);
 }
 
+uint8x16x2_t
+aes_neon_dec2(const struct aesdec *dec, uint8x16x2_t x, unsigned nrounds)
+{
+	const uint32_t *rk32 = dec->aesd_aes.aes_rk;
+	unsigned i = 3 & ~(nrounds - 1);
+	uint8x16_t inv_ = *(const volatile uint8x16_t *)&inv;
+	uint8x16_t inva_ = *(const volatile uint8x16_t *)&inva;
+	uint8x16_t x0 = x.val[0], x1 = x.val[1];
+	uint8x16_t io0, jo0, io1, jo1, mc;
+
+	x0 = aes_schedule_transform(x0, dipt);
+	x1 = aes_schedule_transform(x1, dipt);
+	x0 ^= loadroundkey(rk32);
+	x1 ^= loadroundkey(rk32);
+	rk32 += 4;
+
+	mc = mc_forward[3];
+	for (;;) {
+		subbytes(&io0, &jo0, x0, inv_, inva_);
+		subbytes(&io1, &jo1, x1, inv_, inva_);
+		if (--nrounds == 0)
+			break;
+
+		x0 = vqtbl1q_u8(dsb9[0], io0) ^ vqtbl1q_u8(dsb9[1], jo0);
+		x1 = vqtbl1q_u8(dsb9[0], io1) ^ vqtbl1q_u8(dsb9[1], jo1);
+		x0 ^= loadroundkey(rk32);
+		x1 ^= loadroundkey(rk32);
+		rk32 += 4;/* next round key */
+
+		x0 = vqtbl1q_u8(x0, mc);
+		x1 = vqtbl1q_u8(x1, mc);
+		x0 ^= vqtbl1q_u8(dsbd[0], io0) ^ vqtbl1q_u8(dsbd[1], jo0);
+		x1 ^= vqtbl1q_u8(dsbd[0], io1) ^ vqtbl1q_u8(dsbd[1], jo1);
+
+		x0 = vqtbl1q_u8(x0, mc);
+		x1 = vqtbl1q_u8(x1, mc);
+		x0 ^= vqtbl1q_u8(dsbb[0], io0) ^ v

CVS commit: src/sys/crypto/aes/arch/x86

2020-07-28 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jul 28 14:01:35 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_via.c

Log Message:
Initialize authctr in both branches.

I guess I didn't test the unaligned case, weird.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/x86/aes_via.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_via.c
diff -u src/sys/crypto/aes/arch/x86/aes_via.c:1.5 src/sys/crypto/aes/arch/x86/aes_via.c:1.6
--- src/sys/crypto/aes/arch/x86/aes_via.c:1.5	Sat Jul 25 22:31:32 2020
+++ src/sys/crypto/aes/arch/x86/aes_via.c	Tue Jul 28 14:01:35 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $	*/
+/*	$NetBSD: aes_via.c,v 1.6 2020/07/28 14:01:35 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.6 2020/07/28 14:01:35 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -739,6 +739,7 @@ aesvia_ccm_enc1(const struct aesenc *enc
 		authctr = authctrbuf;
 		ccmenc_unaligned_evcnt.ev_count++;
 	} else {
+		authctr = authctr0;
 		ccmenc_aligned_evcnt.ev_count++;
 	}
 	c0 = le32dec(authctr0 + 16 + 4*0);
@@ -812,6 +813,7 @@ aesvia_ccm_dec1(const struct aesenc *enc
 		le32enc(authctr + 16 + 4*2, c2);
 		ccmdec_unaligned_evcnt.ev_count++;
 	} else {
+		authctr = authctr0;
 		ccmdec_aligned_evcnt.ev_count++;
 	}
 



CVS commit: src/sys/crypto/aes/arch/arm

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:54:12 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
Issue aese/aesmc and aesd/aesimc in pairs.

Advised by the aarch64 optimization guide; increases cgd throughput
by about 10%.


To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.9 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.10
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.9	Mon Jul 27 20:53:22 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Mon Jul 27 20:54:11 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.9 2020/07/27 20:53:22 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.10 2020/07/27 20:54:11 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -1041,15 +1041,18 @@ END(ctr32_inc)
 	.type	aesarmv8_enc1,@function
 aesarmv8_enc1:
 	ldr	q16, [x0], #0x10	/* load round key */
-	b	2f
+	sub	x3, x3, #1
 	_ALIGN_TEXT
-1:	/* q0 := MixColumns(q0) */
+1:	/* q0 := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q0 */
+	aese	v0.16b, v16.16b
 	aesmc	v0.16b, v0.16b
-2:	subs	x3, x3, #1
+	ldr	q16, [x0], #0x10
+	subs	x3, x3, #1
+	b.ne	1b
 	/* q0 := ShiftRows(SubBytes(AddRoundKey_q16(q0))) */
 	aese	v0.16b, v16.16b
-	ldr	q16, [x0], #0x10		/* load next round key */
-	b.ne	1b
+	ldr	q16, [x0]		/* load last round key */
+	/* q0 := AddRoundKey_q16(q0) */
 	eor	v0.16b, v0.16b, v16.16b
 	ret
 END(aesarmv8_enc1)
@@ -1067,17 +1070,21 @@ END(aesarmv8_enc1)
 	.type	aesarmv8_enc2,@function
 aesarmv8_enc2:
 	ldr	q16, [x0], #0x10	/* load round key */
-	b	2f
+	sub	x3, x3, #1
 	_ALIGN_TEXT
-1:	/* q[i] := MixColumns(q[i]) */
+1:	/* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i] */
+	aese	v0.16b, v16.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v16.16b
 	aesmc	v1.16b, v1.16b
-2:	subs	x3, x3, #1
+	ldr	q16, [x0], #0x10	/* load next round key */
+	subs	x3, x3, #1
+	b.ne	1b
 	/* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
 	aese	v0.16b, v16.16b
 	aese	v1.16b, v16.16b
-	ldr	q16, [x0], #0x10		/* load next round key */
-	b.ne	1b
+	ldr	q16, [x0]		/* load last round key */
+	/* q[i] := AddRoundKey_q16(q[i]) */
 	eor	v0.16b, v0.16b, v16.16b
 	eor	v1.16b, v1.16b, v16.16b
 	ret
@@ -1097,18 +1104,28 @@ END(aesarmv8_enc2)
 	.type	aesarmv8_enc8,@function
 aesarmv8_enc8:
 	ldr	q16, [x0], #0x10	/* load round key */
-	b	2f
+	sub	x3, x3, #1
 	_ALIGN_TEXT
-1:	/* q[i] := MixColumns(q[i]) */
+1:	/* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i] */
+	aese	v0.16b, v16.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v16.16b
 	aesmc	v1.16b, v1.16b
+	aese	v2.16b, v16.16b
 	aesmc	v2.16b, v2.16b
+	aese	v3.16b, v16.16b
 	aesmc	v3.16b, v3.16b
+	aese	v4.16b, v16.16b
 	aesmc	v4.16b, v4.16b
+	aese	v5.16b, v16.16b
 	aesmc	v5.16b, v5.16b
+	aese	v6.16b, v16.16b
 	aesmc	v6.16b, v6.16b
+	aese	v7.16b, v16.16b
 	aesmc	v7.16b, v7.16b
-2:	subs	x3, x3, #1
+	ldr	q16, [x0], #0x10	/* load next round key */
+	subs	x3, x3, #1
+	b.ne	1b
 	/* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
 	aese	v0.16b, v16.16b
 	aese	v1.16b, v16.16b
@@ -1118,9 +1135,9 @@ aesarmv8_enc8:
 	aese	v5.16b, v16.16b
 	aese	v6.16b, v16.16b
 	aese	v7.16b, v16.16b
-	ldr	q16, [x0], #0x10	/* load next round key */
-	b.ne	1b
-	eor	v0.16b, v0.16b, v16.16b	/* AddRoundKey */
+	ldr	q16, [x0]		/* load last round key */
+	/* q[i] := AddRoundKey_q16(q[i]) */
+	eor	v0.16b, v0.16b, v16.16b
 	eor	v1.16b, v1.16b, v16.16b
 	eor	v2.16b, v2.16b, v16.16b
 	eor	v3.16b, v3.16b, v16.16b
@@ -1144,15 +1161,19 @@ END(aesarmv8_enc8)
 	.type	aesarmv8_dec1,@function
 aesarmv8_dec1:
 	ldr	q16, [x0], #0x10	/* load round key */
-	b	2f
+	sub	x3, x3, #1
 	_ALIGN_TEXT
-1:	/* q0 := InMixColumns(q0) */
-	aesimc	v0.16b, v0.16b
-2:	subs	x3, x3, #1
-	/* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
+1:	/* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
 	aesd	v0.16b, v16.16b
+	/* q0 := InMixColumns(q0) */
+	aesimc	v0.16b, v0.16b
 	ldr	q16, [x0], #0x10	/* load next round key */
+	subs	x3, x3, #1
 	b.ne	1b
+	/* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
+	aesd	v0.16b, v16.16b
+	ldr	q16, [x0]		/* load last round key */
+	/* q0 := AddRoundKey_q16(q0) */
 	eor	v0.16b, v0.16b, v16.16b
 	ret
 END(aesarmv8_dec1)
@@ -1171,18 +1192,29 @@ END(aesarmv8_dec1)
 	.type	aesarmv8_dec8,@function
 aesarmv8_dec8:
 	ldr	q16, [x0], #0x10	/* load round key */
-	b	2f
+	sub	x3, x3, #1
 	_ALIGN_TEXT
-1:	/* q[i] := InMixColumns(q[i]) */
+1:	/* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */
+	aesd	v0.16b, v16.16b
+	/* q[i] := InMixColumns(q[i]) */
 	aesimc	v0.16b, v0.16b
+	aesd	v1.16b, v16.16b
 	aesimc	v1.16b, v1.16b
+	aesd	v2.16b, v16.16b
 	aesimc	v2.16b, v2.16b
+	aesd	v3.16b, v16.16b
 	aesimc	v3.16b, v3.16b

CVS commit: src/sys/crypto/aes/arch/arm

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:52:11 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
PIC for aes_neon_32.S.

Without this, tests/sys/crypto/aes/t_aes fails to start on armv7
because of R_ARM_ABS32 relocations in a nonwritable text segment for
a PIE -- which atf quietly ignores in the final report!  Yikes.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.1 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.2
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.1	Mon Jun 29 23:57:56 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Mon Jul 27 20:52:10 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.1 2020/06/29 23:57:56 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.2 2020/07/27 20:52:10 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -30,8 +30,14 @@
 
 	.fpu	neon
 
+	.text
+	.p2align 2
+.Lconstants_addr:
+	.long	.Lconstants - .
+
 	.section .rodata
 	.p2align 4
+.Lconstants:
 
 	.type	inv,_ASM_TYPE_OBJECT
 inv:
@@ -239,7 +245,7 @@ ENTRY(aes_neon_enc1)
 	 * r3: rmod4
 	 * r4: mc_forward
 	 * r5: mc_backward
-	 * r6,r7,r8,r10,r11: temporaries
+	 * r6,r7,r8,r10,r11,r12: temporaries
 	 * q0={d0-d1}: x/ak/A
 	 * q1={d2-d3}: 0x0f0f...
 	 * q2={d4-d5}: lo/k/j/io
@@ -258,23 +264,30 @@ ENTRY(aes_neon_enc1)
 	 * q15={d30-d31}: A2_B/sr[rmod4]
 	 */
 
+	/* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */
+	ldr	r12, .Lconstants_addr
+	adr	r11, .Lconstants_addr
+
 	vld1.64	{d28-d29}, [r0 :128]!	/* q14 = *rk++ */
 	movw	r3, #0
 	vmov.i8	q1, #0x0f
 
+	/* r12 := .Lconstants */
+	add	r12, r12, r11
+
 	/* (q4, q5) := (iptlo, ipthi) */
-	ldr	r6, =iptlo
-	ldr	r7, =ipthi
+	add	r6, r12, #(iptlo - .Lconstants)
+	add	r7, r12, #(ipthi - .Lconstants)
 	vld1.64	{d8-d9}, [r6 :128]
 	vld1.64	{d10-d11}, [r7 :128]
 
 	/* load the rest of the constants */
-	ldr	r4, =sb1_0
-	ldr	r5, =sb1_1
-	ldr	r6, =sb2_0
-	ldr	r7, =sb2_1
-	ldr	r8, =inv
-	ldr	r10, =inva
+	add	r4, r12, #(sb1_0 - .Lconstants)
+	add	r5, r12, #(sb1_1 - .Lconstants)
+	add	r6, r12, #(sb2_0 - .Lconstants)
+	add	r7, r12, #(sb2_1 - .Lconstants)
+	add	r8, r12, #(inv - .Lconstants)
+	add	r10, r12, #(inva - .Lconstants)
 	vld1.64	{d12-d13}, [r4 :128]	/* q6 = sb1[0] */
 	vld1.64	{d14-d15}, [r5 :128]	/* q7 = sb1[1] */
 	vld1.64	{d16-d17}, [r6 :128]	/* q8 = sb2[0] */
@@ -283,8 +296,8 @@ ENTRY(aes_neon_enc1)
 	vld1.64	{d22-d23}, [r10 :128]	/* q11 = inva */
 
 	/* (r4, r5) := (&mc_forward[0], &mc_backward[0]) */
-	ldr	r4, =mc_forward
-	ldr	r5, =mc_backward
+	add	r4, r12, #(mc_forward - .Lconstants)
+	add	r5, r12, #(mc_backward - .Lconstants)
 
 	/* (q2, q3) := (lo, hi) */
 	vshr.u8	q3, q0, #4
@@ -392,9 +405,9 @@ ENTRY(aes_neon_enc1)
 	bne	1b
 
 	/* (q6, q7, q15) := (sbo[0], sbo[1], sr[rmod4]) */
-	ldr	r8, =sr
-	ldr	r6, =sbo_0
-	ldr	r7, =sbo_1
+	add	r8, r12, #(sr - .Lconstants)
+	add	r6, r12, #(sbo_0 - .Lconstants)
+	add	r7, r12, #(sbo_1 - .Lconstants)
 	add	r8, r8, r3, lsl #4
 	vld1.64	{d12-d13}, [r6 :128]
 	vld1.64	{d14-d15}, [r7 :128]
@@ -469,23 +482,30 @@ ENTRY(aes_neon_dec1)
 	 * q15={d30-d31}: mc/sr[3 & ~(nrounds - 1)]
 	 */
 
+	/* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */
+	ldr	r12, .Lconstants_addr
+	adr	r11, .Lconstants_addr
+
 	vld1.64	{d28-d29}, [r0 :128]!	/* q14 = *rk++ */
 	rsb	r3, r1, #0		/* r3 := ~(x - 1) = -x */
 	vmov.i8	q1, #0x0f
 	and	r3, r3, #3		/* r3 := 3 & ~(x - 1) */
 
+	/* r12 := .Lconstants */
+	add	r12, r12, r11
+
 	/* (q4, q5) := (diptlo, dipthi) */
-	ldr	r6, =diptlo
-	ldr	r7, =dipthi
+	add	r6, r12, #(diptlo - .Lconstants)
+	add	r7, r12, #(dipthi - .Lconstants)
 	vld1.64	{d8-d9}, [r6 :128]
 	vld1.64	{d10-d11}, [r7 :128]
 
 	/* load the rest of the constants */
-	ldr	r4, =dsbb_0
-	ldr	r5, =dsbb_1
-	ldr	r6, =inv
-	ldr	r7, =inva
-	ldr	r8, =.Lmc_forward_3
+	add	r4, r12, #(dsbb_0 - .Lconstants)
+	add	r5, r12, #(dsbb_1 - .Lconstants)
+	add	r6, r12, #(inv - .Lconstants)
+	add	r7, r12, #(inva - .Lconstants)
+	add	r8, r12, #(.Lmc_forward_3 - .Lconstants)
 	vld1.64	{d12-d13}, [r4 :128]	/* q6 := dsbb[0] */
 	vld1.64	{d14-d15}, [r5 :128]	/* q7 := dsbb[1] */
 	vld1.64	{d20-d21}, [r6 :128]	/* q10 := inv */
@@ -504,8 +524,8 @@ ENTRY(aes_neon_dec1)
 	vtbl.8	d7, {d10-d11}, d7
 
 	/* load dsb9 */
-	ldr	r4, =dsb9_0
-	ldr	r5, =dsb9_1
+	add	r4, r12, #(dsb9_0 - .Lconstants)
+	add	r5, r12, #(dsb9_1 - .Lconstants)
 	vld1.64	{d8-d9}, [r4 :128]	/* q4 := dsb9[0] */
 	vld1.64	{d10-d11}, [r5 :128]	/* q5 := dsb9[1] */
 
@@ -516,7 +536,7 @@ ENTRY(aes_neon_dec1)
 	b	2f
 
 1:	/* load dsbd */
-	ldr	r4, =dsbd_0
+	add	r4, r12, #(dsbd_0 - .Lconstants)
 	vld1.64	{d16-d17}, [r4 :128]!	/* q8 := dsbd[0] */
 	vld1.64	{d18-d19}, [r4 :128]	/* q9 := dsbd[1] */
 
@@ -543,7 +

CVS commit: src/sys/crypto/aes

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:44:30 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_ccm.c aes_ccm.h

Log Message:
Gather auth[16] and ctr[16] into one authctr[32].

Should appease clang.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/aes_ccm.c
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/aes_ccm.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_ccm.c
diff -u src/sys/crypto/aes/aes_ccm.c:1.3 src/sys/crypto/aes/aes_ccm.c:1.4
--- src/sys/crypto/aes/aes_ccm.c:1.3	Sun Jul 26 04:44:47 2020
+++ src/sys/crypto/aes/aes_ccm.c	Mon Jul 27 20:44:30 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ccm.c,v 1.3 2020/07/26 04:44:47 riastradh Exp $	*/
+/*	$NetBSD: aes_ccm.c,v 1.4 2020/07/27 20:44:30 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.3 2020/07/26 04:44:47 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.4 2020/07/27 20:44:30 riastradh Exp $");
 
 #include 
 #include 
@@ -66,18 +66,20 @@ xor(uint8_t *x, const uint8_t *a, const 
 static void
 aes_ccm_inc(struct aes_ccm *C)
 {
+	uint8_t *ctr = C->authctr + 16;
 
 	KASSERT(C->L == 2);
-	if (++C->in[15] == 0 && ++C->in[14] == 0)
+	if (++ctr[15] == 0 && ++ctr[14] == 0)
 		panic("AES-CCM overflow");
 }
 
 static void
 aes_ccm_zero_ctr(struct aes_ccm *C)
 {
+	uint8_t *ctr = C->authctr + 16;
 
 	KASSERT(C->L == 2);
-	C->in[14] = C->in[15] = 0;
+	ctr[14] = ctr[15] = 0;
 }
 
 void
@@ -87,6 +89,8 @@ aes_ccm_init(struct aes_ccm *C, unsigned
 size_t mlen)
 {
 	const uint8_t *adp = ad;
+	uint8_t *auth = C->authctr;
+	uint8_t *ctr = C->authctr + 16;
 	unsigned i;
 
 	KASSERT(L == 2);
@@ -102,58 +106,58 @@ aes_ccm_init(struct aes_ccm *C, unsigned
 	C->mlen = C->mleft = mlen;
 
 	/* Encode B0, the initial authenticated data block.  */
-	C->auth[0] = __SHIFTIN(adlen == 0 ? 0 : 1, CCM_AFLAGS_ADATA);
-	C->auth[0] |= __SHIFTIN((M - 2)/2, CCM_AFLAGS_M);
-	C->auth[0] |= __SHIFTIN(L - 1, CCM_AFLAGS_L);
-	memcpy(C->auth + 1, nonce, noncelen);
+	auth[0] = __SHIFTIN(adlen == 0 ? 0 : 1, CCM_AFLAGS_ADATA);
+	auth[0] |= __SHIFTIN((M - 2)/2, CCM_AFLAGS_M);
+	auth[0] |= __SHIFTIN(L - 1, CCM_AFLAGS_L);
+	memcpy(auth + 1, nonce, noncelen);
 	for (i = 0; i < L; i++, mlen >>= 8) {
 		KASSERT(i < 16 - 1 - noncelen);
-		C->auth[16 - i - 1] = mlen & 0xff;
+		auth[16 - i - 1] = mlen & 0xff;
 	}
-	aes_enc(enc, C->auth, C->auth, C->nr);
+	aes_enc(enc, auth, auth, C->nr);
 
 	/* Process additional authenticated data, if any.  */
 	if (adlen) {
 		/* Encode the length according to the table on p. 4.  */
 		if (adlen < 0xff00) {
-			C->auth[0] ^= adlen >> 8;
-			C->auth[1] ^= adlen;
+			auth[0] ^= adlen >> 8;
+			auth[1] ^= adlen;
 			i = 2;
 		} else if (adlen < 0x) {
-			C->auth[0] ^= 0xff;
-			C->auth[1] ^= 0xfe;
-			C->auth[2] ^= adlen >> 24;
-			C->auth[3] ^= adlen >> 16;
-			C->auth[4] ^= adlen >> 8;
-			C->auth[5] ^= adlen;
+			auth[0] ^= 0xff;
+			auth[1] ^= 0xfe;
+			auth[2] ^= adlen >> 24;
+			auth[3] ^= adlen >> 16;
+			auth[4] ^= adlen >> 8;
+			auth[5] ^= adlen;
 			i = 6;
 #if SIZE_MAX > 0xU
 		} else {
 			CTASSERT(SIZE_MAX <= 0x);
-			C->auth[0] ^= 0xff;
-			C->auth[1] ^= 0xff;
-			C->auth[2] ^= adlen >> 56;
-			C->auth[3] ^= adlen >> 48;
-			C->auth[4] ^= adlen >> 40;
-			C->auth[5] ^= adlen >> 32;
-			C->auth[6] ^= adlen >> 24;
-			C->auth[7] ^= adlen >> 16;
-			C->auth[8] ^= adlen >> 8;
-			C->auth[9] ^= adlen;
+			auth[0] ^= 0xff;
+			auth[1] ^= 0xff;
+			auth[2] ^= adlen >> 56;
+			auth[3] ^= adlen >> 48;
+			auth[4] ^= adlen >> 40;
+			auth[5] ^= adlen >> 32;
+			auth[6] ^= adlen >> 24;
+			auth[7] ^= adlen >> 16;
+			auth[8] ^= adlen >> 8;
+			auth[9] ^= adlen;
 			i = 10;
 #endif
 		}
 
 		/* Fill out the partial block if we can, and encrypt.  */
-		xor(C->auth + i, C->auth + i, adp, MIN(adlen, 16 - i));
+		xor(auth + i, auth + i, adp, MIN(adlen, 16 - i));
 		adp += MIN(adlen, 16 - i);
 		adlen -= MIN(adlen, 16 - i);
-		aes_enc(enc, C->auth, C->auth, C->nr);
+		aes_enc(enc, auth, auth, C->nr);
 
 		/* If there was anything more, process 16 bytes at a time.  */
 		if (adlen - (adlen % 16)) {
 			aes_cbcmac_update1(enc, adp, adlen - (adlen % 16),
-			C->auth, C->nr);
+			auth, C->nr);
 			adlen %= 16;
 		}
 
@@ -162,15 +166,15 @@ aes_ccm_init(struct aes_ccm *C, unsigned
 		 * with zeros, which is a no-op) and process it.
 		 */
 		if (adlen) {
-			xor(C->auth, C->auth, adp, adlen);
-			aes_enc(enc, C->auth, C->auth, C->nr);
+			xor(auth, auth, adp, adlen);
+			aes_enc(enc, auth, auth, C->nr);
 		}
 	}
 
 	/* Set up the AES input for AES-CTR encryption.  */
-	C->in[0] = __SHIFTIN(L - 1, CCM_EFLAGS_L);
-	memcpy(C->in + 1, nonce, noncelen);
-	memset(C->in + 1 + noncelen, 0, 16 - 1 - noncelen);
+	ctr[0] = __SHIFTIN(L - 1, CCM_EFLAGS_L);
+	m

CVS commit: src/sys/crypto/aes

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Jul 26 04:44:47 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_ccm.c

Log Message:
Ensure aes_ccm module init runs after aes module init.

Otherwise the AES implementation might not be selected early enough.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/aes_ccm.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_ccm.c
diff -u src/sys/crypto/aes/aes_ccm.c:1.2 src/sys/crypto/aes/aes_ccm.c:1.3
--- src/sys/crypto/aes/aes_ccm.c:1.2	Sat Jul 25 22:27:53 2020
+++ src/sys/crypto/aes/aes_ccm.c	Sun Jul 26 04:44:47 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ccm.c,v 1.2 2020/07/25 22:27:53 riastradh Exp $	*/
+/*	$NetBSD: aes_ccm.c,v 1.3 2020/07/26 04:44:47 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.2 2020/07/25 22:27:53 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.3 2020/07/26 04:44:47 riastradh Exp $");
 
 #include 
 #include 
@@ -588,7 +588,7 @@ aes_ccm_selftest(void)
 /* XXX provisional hack */
 #include 
 
-MODULE(MODULE_CLASS_MISC, aes_ccm, NULL);
+MODULE(MODULE_CLASS_MISC, aes_ccm, "aes");
 
 static int
 aes_ccm_modcmd(modcmd_t cmd, void *opaque)



CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:45:10 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: immintrin.h

Log Message:
Add some Intel intrinsics for ChaCha.

_mm_load1_ps
_mm_loadu_si128
_mm_movelh_ps
_mm_slli_epi32
_mm_storeu_si128
_mm_unpackhi_epi32
_mm_unpacklo_epi32


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/immintrin.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/immintrin.h
diff -u src/sys/crypto/aes/arch/x86/immintrin.h:1.4 src/sys/crypto/aes/arch/x86/immintrin.h:1.5
--- src/sys/crypto/aes/arch/x86/immintrin.h:1.4	Sat Jul 25 22:44:32 2020
+++ src/sys/crypto/aes/arch/x86/immintrin.h	Sat Jul 25 22:45:10 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: immintrin.h,v 1.4 2020/07/25 22:44:32 riastradh Exp $	*/
+/*	$NetBSD: immintrin.h,v 1.5 2020/07/25 22:45:10 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -103,6 +103,20 @@ _mm_add_epi32(__m128i __a, __m128i __b)
 #endif
 
 _INTRINSATTR
+static __inline __m128
+_mm_load1_ps(const float *__p)
+{
+	return __extension__ (__m128)(__v4sf) { *__p, *__p, *__p, *__p };
+}
+
+_INTRINSATTR
+static __inline __m128i
+_mm_loadu_si128(const __m128i_u *__p)
+{
+	return ((const struct { __m128i_u __v; } _PACKALIAS *)__p)->__v;
+}
+
+_INTRINSATTR
 static __inline __m128i
 _mm_loadu_si32(const void *__p)
 {
@@ -132,8 +146,18 @@ _mm_movehl_ps(__m128 __v0, __m128 __v1)
 #if defined(__GNUC__) && !defined(__clang__)
 	return (__m128)__builtin_ia32_movhlps((__v4sf)__v0, (__v4sf)__v1);
 #elif defined(__clang__)
-	return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1,
-	6, 7, 2, 3);
+	return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, 6,7,2,3);
+#endif
+}
+
+_INTRINSATTR
+static __inline __m128
+_mm_movelh_ps(__m128 __v0, __m128 __v1)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+	return (__m128)__builtin_ia32_movlhps((__v4sf)__v0, (__v4sf)__v1);
+#elif defined(__clang__)
+	return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, 0,1,4,5);
 #endif
 }
 
@@ -205,6 +229,13 @@ _mm_shuffle_epi8(__m128i __vtbl, __m128i
 
 _INTRINSATTR
 static __inline __m128i
+_mm_slli_epi32(__m128i __v, uint8_t __bits)
+{
+	return (__m128i)__builtin_ia32_pslldi128((__v4si)__v, (int)__bits);
+}
+
+_INTRINSATTR
+static __inline __m128i
 _mm_slli_epi64(__m128i __v, uint8_t __bits)
 {
 	return (__m128i)__builtin_ia32_psllqi128((__v2di)__v, (int)__bits);
@@ -245,6 +276,13 @@ _mm_srli_epi64(__m128i __v, uint8_t __bi
 
 _INTRINSATTR
 static __inline void
+_mm_storeu_si128(__m128i_u *__p, __m128i __v)
+{
+	((struct { __m128i_u __v; } _PACKALIAS *)__p)->__v = __v;
+}
+
+_INTRINSATTR
+static __inline void
 _mm_storeu_si32(void *__p, __m128i __v)
 {
 	((struct { int32_t __v; } _PACKALIAS *)__p)->__v = ((__v4si)__v)[0];
@@ -273,6 +311,32 @@ _mm_sub_epi64(__m128i __x, __m128i __y)
 
 _INTRINSATTR
 static __inline __m128i
+_mm_unpackhi_epi32(__m128i __lo, __m128i __hi)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+	return (__m128i)__builtin_ia32_punpckhdq128((__v4si)__lo,
+	(__v4si)__hi);
+#elif defined(__clang__)
+	return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi,
+	2,6,3,7);
+#endif
+}
+
+_INTRINSATTR
+static __inline __m128i
+_mm_unpacklo_epi32(__m128i __lo, __m128i __hi)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+	return (__m128i)__builtin_ia32_punpckldq128((__v4si)__lo,
+	(__v4si)__hi);
+#elif defined(__clang__)
+	return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi,
+	0,4,1,5);
+#endif
+}
+
+_INTRINSATTR
+static __inline __m128i
 _mm_unpacklo_epi64(__m128i __lo, __m128i __hi)
 {
 #if defined(__GNUC__) && !defined(__clang__)



CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:44:32 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: immintrin.h

Log Message:
Fix target attribute on _mm_movehl_ps, fix clang _mm_unpacklo_epi64.

- _mm_movehl_ps is available in SSE2, no need for SSSE3.
- _mm_unpacklo_epi64 operates on v2di, not v4si; fix.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/immintrin.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/immintrin.h
diff -u src/sys/crypto/aes/arch/x86/immintrin.h:1.3 src/sys/crypto/aes/arch/x86/immintrin.h:1.4
--- src/sys/crypto/aes/arch/x86/immintrin.h:1.3	Sat Jul 25 22:31:04 2020
+++ src/sys/crypto/aes/arch/x86/immintrin.h	Sat Jul 25 22:44:32 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: immintrin.h,v 1.3 2020/07/25 22:31:04 riastradh Exp $	*/
+/*	$NetBSD: immintrin.h,v 1.4 2020/07/25 22:44:32 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -125,7 +125,7 @@ _mm_load_si128(const __m128i *__p)
 	return *__p;
 }
 
-_INTRINSATTR _SSSE3_ATTR
+_INTRINSATTR
 static __inline __m128
 _mm_movehl_ps(__m128 __v0, __m128 __v1)
 {
@@ -279,8 +279,8 @@ _mm_unpacklo_epi64(__m128i __lo, __m128i
 	return (__m128i)__builtin_ia32_punpcklqdq128((__v2di)__lo,
 	(__v2di)__hi);
 #elif defined(__clang__)
-	return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi,
-	0, 4, 1, 5);
+	return (__m128i)__builtin_shufflevector((__v2di)__lo, (__v2di)__hi,
+	0,2);
 #endif
 }
 



CVS commit: src/sys/crypto/aes/arch/arm

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:42:31 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon.h

Log Message:
Fix missing clang big-endian case.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/arm_neon.h
diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.4 src/sys/crypto/aes/arch/arm/arm_neon.h:1.5
--- src/sys/crypto/aes/arch/arm/arm_neon.h:1.4	Sat Jul 25 22:36:06 2020
+++ src/sys/crypto/aes/arch/arm/arm_neon.h	Sat Jul 25 22:42:31 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.4 2020/07/25 22:36:06 riastradh Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.5 2020/07/25 22:42:31 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -237,7 +237,12 @@ vld1q_u8(const uint8_t *__p8)
 	return (uint8x16_t)__builtin_neon_vld1v16qi(__p);
 #endif
 #elif defined(__clang__)
-	return (uint8x16_t)__builtin_neon_vld1q_v(__p8, 48);
+	uint8x16_t __v = (uint8x16_t)__builtin_neon_vld1q_v(__p8, 48);
+#ifndef __LITTLE_ENDIAN__
+	__v = __builtin_shufflevector(__v, __v,
+	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
+#endif
+	return __v;
 #endif
 }
 
@@ -442,7 +447,7 @@ vst1q_u8(uint8_t *__p8, uint8x16_t __v)
 #elif defined(__clang__)
 #ifndef __LITTLE_ENDIAN__
 	__v = __builtin_shufflevector(__v, __v,
-	15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
 #endif
 	__builtin_neon_vst1q_v(__p8, __v, 48);
 #endif



CVS commit: src/sys/crypto/aes/arch/arm

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:43:01 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon.h

Log Message:
Add 32-bit load, store, and shift intrinsics.

vld1q_u32
vst1q_u32
vshlq_n_u32
vshrq_n_u32


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/arm_neon.h
diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.5 src/sys/crypto/aes/arch/arm/arm_neon.h:1.6
--- src/sys/crypto/aes/arch/arm/arm_neon.h:1.5	Sat Jul 25 22:42:31 2020
+++ src/sys/crypto/aes/arch/arm/arm_neon.h	Sat Jul 25 22:43:01 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.5 2020/07/25 22:42:31 riastradh Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.6 2020/07/25 22:43:01 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -222,6 +222,30 @@ vgetq_lane_u32(uint32x4_t __v, uint8_t _
 #endif
 
 _INTRINSATTR
+static __inline uint32x4_t
+vld1q_u32(const uint32_t *__p32)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+#ifdef __aarch64__
+	const __builtin_aarch64_simd_si *__p =
+	(const __builtin_aarch64_simd_si *)__p32;
+
+	return (uint32x4_t)__builtin_aarch64_ld1v4si(__p);
+#else
+	const __builtin_neon_si *__p = (const __builtin_neon_si *)__p32;
+
+	return (uint32x4_t)__builtin_neon_vld1v4si(__p);
+#endif
+#elif defined(__clang__)
+	uint32x4_t __v = (uint32x4_t)__builtin_neon_vld1q_v(__p32, 50);
+#ifndef __LITTLE_ENDIAN__
+	__v = __builtin_shufflevector(__v, __v, 3,2,1,0);
+#endif
+	return __v;
+#endif
+}
+
+_INTRINSATTR
 static __inline uint8x16_t
 vld1q_u8(const uint8_t *__p8)
 {
@@ -383,6 +407,38 @@ vsetq_lane_u64(uint64_t __x, uint64x2_t 
 
 #if defined(__GNUC__) && !defined(__clang__)
 _INTRINSATTR
+static __inline uint32x4_t
+vshlq_n_u32(uint32x4_t __v, uint8_t __bits)
+{
+#ifdef __aarch64__
+	return (uint32x4_t)__builtin_aarch64_ashlv4si((int32x4_t)__v, __bits);
+#else
+	return (uint32x4_t)__builtin_neon_vshl_nv4si((int32x4_t)__v, __bits);
+#endif
+}
+#elif defined(__clang__)
+#define	vshlq_n_u32(__v, __bits)	  \
+	(uint32x4_t)__builtin_neon_vshlq_n_v((int32x4_t)(__v), (__bits), 50)
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+_INTRINSATTR
+static __inline uint32x4_t
+vshrq_n_u32(uint32x4_t __v, uint8_t __bits)
+{
+#ifdef __aarch64__
+	return (uint32x4_t)__builtin_aarch64_lshrv4si((int32x4_t)__v, __bits);
+#else
+	return (uint32x4_t)__builtin_neon_vshru_nv4si((int32x4_t)__v, __bits);
+#endif
+}
+#elif defined(__clang__)
+#define	vshrq_n_u8(__v, __bits)		  \
+	(uint32x4_t)__builtin_neon_vshrq_n_v((int32x4_t)(__v), (__bits), 50)
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+_INTRINSATTR
 static __inline uint8x16_t
 vshrq_n_u8(uint8x16_t __v, uint8_t __bits)
 {
@@ -432,6 +488,28 @@ vsliq_n_s32(int32x4_t __vins, int32x4_t 
 
 _INTRINSATTR
 static __inline void
+vst1q_u32(uint32_t *__p32, uint32x4_t __v)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+#ifdef __aarch64__
+	__builtin_aarch64_simd_si *__p = (__builtin_aarch64_simd_si *)__p32;
+
+	__builtin_aarch64_st1v4si(__p, (int32x4_t)__v);
+#else
+	__builtin_neon_si *__p = (__builtin_neon_si *)__p32;
+
+	__builtin_neon_vst1v4si(__p, (int32x4_t)__v);
+#endif
+#elif defined(__clang__)
+#ifndef __LITTLE_ENDIAN__
+	__v = __builtin_shufflevector(__v, __v, 3,2,1,0);
+#endif
+	__builtin_neon_vst1q_v(__p32, __v, 50);
+#endif
+}
+
+_INTRINSATTR
+static __inline void
 vst1q_u8(uint8_t *__p8, uint8x16_t __v)
 {
 #if defined(__GNUC__) && !defined(__clang__)



CVS commit: src/sys/crypto/aes

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:42:03 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_impl.c

Log Message:
Make aes boot message verbose-only.


To generate a diff of this commit:
cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/aes_impl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_impl.c
diff -u src/sys/crypto/aes/aes_impl.c:1.7 src/sys/crypto/aes/aes_impl.c:1.8
--- src/sys/crypto/aes/aes_impl.c:1.7	Sat Jul 25 22:36:42 2020
+++ src/sys/crypto/aes/aes_impl.c	Sat Jul 25 22:42:03 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_impl.c,v 1.7 2020/07/25 22:36:42 riastradh Exp $	*/
+/*	$NetBSD: aes_impl.c,v 1.8 2020/07/25 22:42:03 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.7 2020/07/25 22:36:42 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.8 2020/07/25 22:42:03 riastradh Exp $");
 
 #include 
 #include 
@@ -111,7 +111,7 @@ aes_select(void)
 	if (aes_impl == NULL)
 		panic("AES self-tests failed");
 
-	aprint_normal("aes: %s\n", aes_impl->ai_name);
+	aprint_verbose("aes: %s\n", aes_impl->ai_name);
 	return 0;
 }
 



CVS commit: src/sys/crypto/aes

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:36:42 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_impl.c aes_selftest.c

Log Message:
Remove now-needless AES-CCM fallback logic.

These paths are no longer exercised because all of the aes_impls now
do the AES-CCM operations.


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/aes_impl.c
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/aes_selftest.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_impl.c
diff -u src/sys/crypto/aes/aes_impl.c:1.6 src/sys/crypto/aes/aes_impl.c:1.7
--- src/sys/crypto/aes/aes_impl.c:1.6	Sat Jul 25 22:27:53 2020
+++ src/sys/crypto/aes/aes_impl.c	Sat Jul 25 22:36:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_impl.c,v 1.6 2020/07/25 22:27:53 riastradh Exp $	*/
+/*	$NetBSD: aes_impl.c,v 1.7 2020/07/25 22:36:42 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.6 2020/07/25 22:27:53 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.7 2020/07/25 22:36:42 riastradh Exp $");
 
 #include 
 #include 
@@ -288,16 +288,6 @@ aes_xts_dec(struct aesdec *dec, const ui
 	aes_impl->ai_xts_dec(dec, in, out, nbytes, tweak, nrounds);
 }
 
-static void
-xor16(uint8_t *x, const uint8_t *a, const uint8_t *b)
-{
-
-	le32enc(x + 4*0, le32dec(a + 4*0) ^ le32dec(b + 4*0));
-	le32enc(x + 4*1, le32dec(a + 4*1) ^ le32dec(b + 4*1));
-	le32enc(x + 4*2, le32dec(a + 4*2) ^ le32dec(b + 4*2));
-	le32enc(x + 4*3, le32dec(a + 4*3) ^ le32dec(b + 4*3));
-}
-
 void
 aes_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
 size_t nbytes, uint8_t auth[static 16], uint32_t nrounds)
@@ -307,15 +297,7 @@ aes_cbcmac_update1(const struct aesenc *
 	KASSERT(nbytes % 16 == 0);
 
 	aes_guarantee_selected();
-	if (aes_impl->ai_cbcmac_update1) {
-		aes_impl->ai_cbcmac_update1(enc, in, nbytes, auth, nrounds);
-		return;
-	}
-
-	for (; nbytes; in += 16, nbytes -= 16) {
-		xor16(auth, auth, in);
-		aes_enc(enc, auth, auth, nrounds);
-	}
+	aes_impl->ai_cbcmac_update1(enc, in, nbytes, auth, nrounds);
 }
 
 void
@@ -323,26 +305,12 @@ aes_ccm_enc1(const struct aesenc *enc, c
 uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
 uint32_t nrounds)
 {
-	uint8_t *auth = authctr;
-	uint8_t *ctr = authctr + 16;
 
 	KASSERT(nbytes);
 	KASSERT(nbytes % 16 == 0);
 
 	aes_guarantee_selected();
-	if (aes_impl->ai_ccm_enc1) {
-		aes_impl->ai_ccm_enc1(enc, in, out, nbytes, auth, nrounds);
-		return;
-	}
-
-	for (; nbytes; in += 16, out += 16, nbytes -= 16) {
-		xor16(auth, auth, in);
-		aes_enc(enc, auth, auth, nrounds);
-
-		be32enc(ctr + 12, 1 + be32dec(ctr + 12));
-		aes_enc(enc, ctr, out, nrounds);
-		xor16(out, out, in);
-	}
+	aes_impl->ai_ccm_enc1(enc, in, out, nbytes, authctr, nrounds);
 }
 
 void
@@ -350,26 +318,12 @@ aes_ccm_dec1(const struct aesenc *enc, c
 uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
 uint32_t nrounds)
 {
-	uint8_t *auth = authctr;
-	uint8_t *ctr = authctr + 16;
 
 	KASSERT(nbytes);
 	KASSERT(nbytes % 16 == 0);
 
 	aes_guarantee_selected();
-	if (aes_impl->ai_ccm_dec1) {
-		aes_impl->ai_ccm_dec1(enc, in, out, nbytes, auth, nrounds);
-		return;
-	}
-
-	for (; nbytes >= 16; in += 16, out += 16, nbytes -= 16) {
-		be32enc(ctr + 12, 1 + be32dec(ctr + 12));
-		aes_enc(enc, ctr, out, nrounds);
-		xor16(out, out, in);
-
-		xor16(auth, auth, out);
-		aes_enc(enc, auth, auth, nrounds);
-	}
+	aes_impl->ai_ccm_dec1(enc, in, out, nbytes, authctr, nrounds);
 }
 
 /*

Index: src/sys/crypto/aes/aes_selftest.c
diff -u src/sys/crypto/aes/aes_selftest.c:1.4 src/sys/crypto/aes/aes_selftest.c:1.5
--- src/sys/crypto/aes/aes_selftest.c:1.4	Sat Jul 25 22:27:53 2020
+++ src/sys/crypto/aes/aes_selftest.c	Sat Jul 25 22:36:42 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_selftest.c,v 1.4 2020/07/25 22:27:53 riastradh Exp $	*/
+/*	$NetBSD: aes_selftest.c,v 1.5 2020/07/25 22:36:42 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.4 2020/07/25 22:27:53 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.5 2020/07/25 22:36:42 riastradh Exp $");
 
 #ifdef _KERNEL
 
@@ -424,9 +424,6 @@ aes_selftest_cbcmac(const struct aes_imp
 	uint8_t auth[16];
 	const unsigned nr = AES_128_NROUNDS;
 
-	if (impl->ai_cbcmac_update1 == NULL)
-		return 0;
-
 	memset(auth, 0, sizeof auth);
 
 	impl->ai_setenckey(&enc, key, nr);
@@ -500,9 +497,6 @@ aes_selftest_ccm(const struct aes_impl *
 	const unsigned nr = AES_128_NROUNDS;
 	int result = 0;
 
-	if (impl->ai_ccm_enc1 == NULL)
-		return 0;
-
 	impl->ai_setenckey(&enc, key, nr);
 
 	memset(authctr, 0, 16);
@@ -521,9 +515,6 @@ aes_selftest_ccm(const struct aes_impl *
 		result |= aes_selftest_fail(impl

CVS commit: src/sys/crypto/aes/arch/arm

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:36:06 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon.h aes_neon_impl.c aes_neon_subr.c
arm_neon.h

Log Message:
Implement AES-CCM with NEON.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_neon.h \
src/sys/crypto/aes/arch/arm/aes_neon_subr.c
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon_impl.c \
src/sys/crypto/aes/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon.h
diff -u src/sys/crypto/aes/arch/arm/aes_neon.h:1.2 src/sys/crypto/aes/arch/arm/aes_neon.h:1.3
--- src/sys/crypto/aes/arch/arm/aes_neon.h:1.2	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon.h	Sat Jul 25 22:36:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon.h,v 1.2 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_neon.h,v 1.3 2020/07/25 22:36:06 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -59,6 +59,12 @@ void aes_neon_xts_enc(const struct aesen
 uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
 void aes_neon_xts_dec(const struct aesdec *, const uint8_t[static 16],
 uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
+void aes_neon_cbcmac_update1(const struct aesenc *, const uint8_t[static 16],
+size_t, uint8_t[static 16], uint32_t);
+void aes_neon_ccm_enc1(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t, uint8_t[static 32], uint32_t);
+void aes_neon_ccm_dec1(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t, uint8_t[static 32], uint32_t);
 
 int aes_neon_selftest(void);
 
Index: src/sys/crypto/aes/arch/arm/aes_neon_subr.c
diff -u src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.2 src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.3
--- src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.2	Tue Jun 30 20:32:11 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_subr.c	Sat Jul 25 22:36:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_subr.c,v 1.3 2020/07/25 22:36:06 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,9 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.3 2020/07/25 22:36:06 riastradh Exp $");
+
+#include 
 
 #ifdef _KERNEL
 #include 
@@ -213,6 +215,89 @@ aes_neon_xts_dec(const struct aesdec *de
 	storeblock(tweak, t);
 }
 
+void
+aes_neon_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
+size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
+{
+	uint8x16_t auth;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	auth = loadblock(auth0);
+	for (; nbytes; nbytes -= 16, in += 16)
+		auth = aes_neon_enc1(enc, auth ^ loadblock(in), nrounds);
+	storeblock(auth0, auth);
+}
+
+/*
+ * XXX On aarch64, we have enough registers that we should be able to
+ * pipeline two simultaneous vpaes computations in an `aes_neon_enc2'
+ * function, which should substantially improve CCM throughput.
+ */
+
+#if _BYTE_ORDER == _LITTLE_ENDIAN
+#define	vbetoh32q_u8	vrev32q_u8
+#define	vhtobe32q_u8	vrev32q_u8
+#elif _BYTE_ORDER == _BIG_ENDIAN
+#define	vbetoh32q_u8(x)	(x)
+#define	vhtobe32q_u8(x)	(x)
+#else
+#error what kind of endian are you anyway
+#endif
+
+void
+aes_neon_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+	const uint32x4_t ctr32_inc = {0, 0, 0, 1};
+	uint8x16_t auth, ptxt, ctr_be;
+	uint32x4_t ctr;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	auth = loadblock(authctr);
+	ctr_be = loadblock(authctr + 16);
+	ctr = vreinterpretq_u32_u8(vbetoh32q_u8(ctr_be));
+	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
+		ptxt = loadblock(in);
+		auth = aes_neon_enc1(enc, auth ^ ptxt, nrounds);
+		ctr = vaddq_u32(ctr, ctr32_inc);
+		ctr_be = vhtobe32q_u8(vreinterpretq_u8_u32(ctr));
+		storeblock(out, ptxt ^ aes_neon_enc1(enc, ctr_be, nrounds));
+	}
+	storeblock(authctr, auth);
+	storeblock(authctr + 16, ctr_be);
+}
+
+void
+aes_neon_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+	const uint32x4_t ctr32_inc = {0, 0, 0, 1};
+	uint8x16_t auth, ctr_be, ptxt;
+	uint32x4_t ctr;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	auth = loadblock(authctr);
+	ctr_be = loadblock(authctr + 16);
+	ctr = vreinterpretq_u32_u8(vbetoh32q_u8(ctr_be));
+	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
+		ctr = vaddq_u32(ctr, ctr32_inc);
+		ctr_be = vhtobe32q_u8(vreinterpretq_u8_u32(ctr));
+		ptxt = loadblock(in) ^ aes_neon_enc1(enc, ctr

CVS commit: src/sys/crypto/aes/arch/arm

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:32:09 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
Invert some loops to save a branch instruction on every iteration.


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.6 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.7
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.6	Wed Jul 22 06:15:21 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Sat Jul 25 22:32:09 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.6 2020/07/22 06:15:21 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.7 2020/07/25 22:32:09 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -437,13 +437,13 @@ END(aesarmv8_setenckey256)
  */
 ENTRY(aesarmv8_enctodec)
 	ldr	q0, [x0, x2, lsl #4]	/* load last round key */
-1:	str	q0, [x1], #0x10	/* store round key */
+	b	2f
+1:	aesimc	v0.16b, v0.16b	/* convert encryption to decryption */
+2:	str	q0, [x1], #0x10	/* store round key */
 	subs	x2, x2, #1	/* count down round */
 	ldr	q0, [x0, x2, lsl #4]	/* load previous round key */
-	b.eq	2f		/* stop if this is the last one */
-	aesimc	v0.16b, v0.16b	/* convert encryption to decryption */
-	b	1b
-2:	str	q0, [x1]	/* store first round key verbatim */
+	b.ne	1b		/* repeat if there's more */
+	str	q0, [x1]	/* store first round key verbatim */
 	ret
 END(aesarmv8_enctodec)
 
@@ -536,17 +536,17 @@ ENTRY(aesarmv8_cbc_dec1)
 	add	x2, x2, x3		/* x2 := pointer past end of out */
 	ldr	q0, [x1, #-0x10]!	/* q0 := last ciphertext block */
 	str	q0, [x4]		/* update iv */
-1:	mov	x0, x9			/* x0 := enckey */
-	mov	x3, x5			/* x3 := nrounds */
-	bl	aesarmv8_dec1		/* q0 := cv ^ ptxt; trash x0/x3/q16 */
-	subs	x10, x10, #0x10		/* count down nbytes */
-	b.eq	2f			/* stop if this is the first block */
-	ldr	q31, [x1, #-0x10]!	/* q31 := chaining value */
+	b	2f
+1:	ldr	q31, [x1, #-0x10]!	/* q31 := chaining value */
 	eor	v0.16b, v0.16b, v31.16b	/* q0 := plaintext block */
 	str	q0, [x2, #-0x10]!	/* store plaintext block */
 	mov	v0.16b, v31.16b		/* move cv = ciphertext block */
-	b	1b
-2:	eor	v0.16b, v0.16b, v24.16b	/* q0 := first plaintext block */
+2:	mov	x0, x9			/* x0 := enckey */
+	mov	x3, x5			/* x3 := nrounds */
+	bl	aesarmv8_dec1		/* q0 := cv ^ ptxt; trash x0/x3/q16 */
+	subs	x10, x10, #0x10		/* count down nbytes */
+	b.ne	1b			/* repeat if more blocks */
+	eor	v0.16b, v0.16b, v24.16b	/* q0 := first plaintext block */
 	str	q0, [x2, #-0x10]!	/* store first plaintext block */
 	ldp	fp, lr, [sp], #16	/* pop stack frame */
 	ret
@@ -573,7 +573,11 @@ ENTRY(aesarmv8_cbc_dec8)
 	add	x2, x2, x3		/* x2 := pointer past end of out */
 	ldp	q6, q7, [x1, #-0x20]!	/* q6, q7 := last ciphertext blocks */
 	str	q7, [x4]		/* update iv */
-1:	ldp	q4, q5, [x1, #-0x20]!
+	b	2f
+1:	ldp	q6, q7, [x1, #-0x20]!
+	eor	v0.16b, v0.16b, v7.16b	/* q0 := pt0 */
+	stp	q0, q1, [x2, #-0x20]!
+2:	ldp	q4, q5, [x1, #-0x20]!
 	ldp	q2, q3, [x1, #-0x20]!
 	ldp	q0, q1, [x1, #-0x20]!
 	mov	v31.16b, v6.16b		/* q[24+i] := cv[i], 0

CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:31:32 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_via.c

Log Message:
Implement AES-CCM with VIA ACE.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/aes_via.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_via.c
diff -u src/sys/crypto/aes/arch/x86/aes_via.c:1.4 src/sys/crypto/aes/arch/x86/aes_via.c:1.5
--- src/sys/crypto/aes/arch/x86/aes_via.c:1.4	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/arch/x86/aes_via.c	Sat Jul 25 22:31:32 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_via.c,v 1.4 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.4 2020/07/25 22:12:57 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -674,6 +674,176 @@ aesvia_xts_dec(const struct aesdec *dec,
 	explicit_memset(t, 0, sizeof t);
 }
 
+static struct evcnt cbcmac_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+NULL, "aesvia", "cbcmac aligned");
+EVCNT_ATTACH_STATIC(cbcmac_aligned_evcnt);
+static struct evcnt cbcmac_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+NULL, "aesvia", "cbcmac unaligned");
+EVCNT_ATTACH_STATIC(cbcmac_unaligned_evcnt);
+
+static void
+aesvia_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
+size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
+{
+	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
+	uint8_t authbuf[16] __aligned(16);
+	uint8_t *auth = auth0;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	if ((uintptr_t)auth0 & 0xf) {
+		memcpy(authbuf, auth0, 16);
+		auth = authbuf;
+		cbcmac_unaligned_evcnt.ev_count++;
+	} else {
+		cbcmac_aligned_evcnt.ev_count++;
+	}
+
+	fpu_kern_enter();
+	aesvia_reload_keys();
+	for (; nbytes; nbytes -= 16, in += 16) {
+		xor128(auth, auth, in);
+		aesvia_encN(enc, auth, auth, 1, cw0);
+	}
+	fpu_kern_leave();
+
+	if ((uintptr_t)auth0 & 0xf) {
+		memcpy(auth0, authbuf, 16);
+		explicit_memset(authbuf, 0, sizeof authbuf);
+	}
+}
+
+static struct evcnt ccmenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+NULL, "aesvia", "ccmenc aligned");
+EVCNT_ATTACH_STATIC(ccmenc_aligned_evcnt);
+static struct evcnt ccmenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+NULL, "aesvia", "ccmenc unaligned");
+EVCNT_ATTACH_STATIC(ccmenc_unaligned_evcnt);
+
+static void
+aesvia_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
+uint32_t nrounds)
+{
+	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
+	uint8_t authctrbuf[32] __aligned(16);
+	uint8_t *authctr;
+	uint32_t c0, c1, c2, c3;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	if ((uintptr_t)authctr0 & 0xf) {
+		memcpy(authctrbuf, authctr0, 16);
+		authctr = authctrbuf;
+		ccmenc_unaligned_evcnt.ev_count++;
+	} else {
+		ccmenc_aligned_evcnt.ev_count++;
+	}
+	c0 = le32dec(authctr0 + 16 + 4*0);
+	c1 = le32dec(authctr0 + 16 + 4*1);
+	c2 = le32dec(authctr0 + 16 + 4*2);
+	c3 = be32dec(authctr0 + 16 + 4*3);
+
+	/*
+	 * In principle we could use REP XCRYPTCTR here, but that
+	 * doesn't help to compute the CBC-MAC step, and certain VIA
+	 * CPUs have some weird errata with REP XCRYPTCTR that make it
+	 * kind of a pain to use.  So let's just use REP XCRYPTECB to
+	 * simultaneously compute the CBC-MAC step and the CTR step.
+	 * (Maybe some VIA CPUs will compute REP XCRYPTECB in parallel,
+	 * who knows...)
+	 */
+	fpu_kern_enter();
+	aesvia_reload_keys();
+	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
+		xor128(authctr, authctr, in);
+		le32enc(authctr + 16 + 4*0, c0);
+		le32enc(authctr + 16 + 4*1, c1);
+		le32enc(authctr + 16 + 4*2, c2);
+		be32enc(authctr + 16 + 4*3, ++c3);
+		aesvia_encN(enc, authctr, authctr, 2, cw0);
+		xor128(out, in, authctr + 16);
+	}
+	fpu_kern_leave();
+
+	if ((uintptr_t)authctr0 & 0xf) {
+		memcpy(authctr0, authctrbuf, 16);
+		explicit_memset(authctrbuf, 0, sizeof authctrbuf);
+	}
+
+	le32enc(authctr0 + 16 + 4*0, c0);
+	le32enc(authctr0 + 16 + 4*1, c1);
+	le32enc(authctr0 + 16 + 4*2, c2);
+	be32enc(authctr0 + 16 + 4*3, c3);
+}
+
+static struct evcnt ccmdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+NULL, "aesvia", "ccmdec aligned");
+EVCNT_ATTACH_STATIC(ccmdec_aligned_evcnt);
+static struct evcnt ccmdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+NULL, "aesvia", "ccmdec unaligned");
+EVCNT_ATTACH_STATIC(ccmdec_unaligned_evcnt);
+
+static void
+aesvia_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
+   

CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:31:04 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_ssse3.h aes_ssse3_impl.c
aes_ssse3_subr.c immintrin.h

Log Message:
Implement AES-CCM with SSSE3.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_ssse3.h \
src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c \
src/sys/crypto/aes/arch/x86/immintrin.h
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_ssse3_impl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_ssse3.h
diff -u src/sys/crypto/aes/arch/x86/aes_ssse3.h:1.2 src/sys/crypto/aes/arch/x86/aes_ssse3.h:1.3
--- src/sys/crypto/aes/arch/x86/aes_ssse3.h:1.2	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/arch/x86/aes_ssse3.h	Sat Jul 25 22:31:04 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ssse3.h,v 1.2 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_ssse3.h,v 1.3 2020/07/25 22:31:04 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -59,6 +59,12 @@ void aes_ssse3_xts_enc(const struct aese
 uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
 void aes_ssse3_xts_dec(const struct aesdec *, const uint8_t[static 16],
 uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
+void aes_ssse3_cbcmac_update1(const struct aesenc *, const uint8_t[static 16],
+size_t, uint8_t[static 16], uint32_t);
+void aes_ssse3_ccm_enc1(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t, uint8_t[static 32], uint32_t);
+void aes_ssse3_ccm_dec1(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t, uint8_t[static 32], uint32_t);
 
 int aes_ssse3_selftest(void);
 
Index: src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c
diff -u src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c:1.2 src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c:1.3
--- src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c:1.2	Tue Jun 30 20:32:11 2020
+++ src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c	Sat Jul 25 22:31:04 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ssse3_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $	*/
+/*	$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -208,6 +208,75 @@ aes_ssse3_xts_dec(const struct aesdec *d
 	storeblock(tweak, t);
 }
 
+void
+aes_ssse3_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
+size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
+{
+	__m128i auth;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	auth = loadblock(auth0);
+	for (; nbytes; nbytes -= 16, in += 16)
+		auth = aes_ssse3_enc1(enc, auth ^ loadblock(in), nrounds);
+	storeblock(auth0, auth);
+}
+
+void
+aes_ssse3_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+	const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0);
+	const __m128i bs32 =
+	_mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
+	__m128i auth, ctr_be, ctr, ptxt;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	auth = loadblock(authctr);
+	ctr_be = loadblock(authctr + 16);
+	ctr = _mm_shuffle_epi8(ctr_be, bs32);
+	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
+		ptxt = loadblock(in);
+		auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds);
+		ctr = _mm_add_epi32(ctr, ctr32_inc);
+		ctr_be = _mm_shuffle_epi8(ctr, bs32);
+		storeblock(out, ptxt ^ aes_ssse3_enc1(enc, ctr_be, nrounds));
+	}
+	storeblock(authctr, auth);
+	storeblock(authctr + 16, ctr_be);
+}
+
+void
+aes_ssse3_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+	const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0);
+	const __m128i bs32 =
+	_mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
+	__m128i auth, ctr_be, ctr, ptxt;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	auth = loadblock(authctr);
+	ctr_be = loadblock(authctr + 16);
+	ctr = _mm_shuffle_epi8(ctr_be, bs32);
+	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
+		ctr = _mm_add_epi32(ctr, ctr32_inc);
+		ctr_be = _mm_shuffle_epi8(ctr, bs32);
+		ptxt = loadblock(in) ^ aes_ssse3_enc1(enc, ctr_be, nrounds);
+		storeblock(out, ptxt);
+		auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds);
+	}
+	storeblock(authctr, auth);
+	storeblock(authctr + 16, ctr_be);
+}
+
 int
 aes_ssse3_selftest(void)
 {
Index: src/sys/crypto/aes/arch/x86/immintrin.h
diff -u src/sys/crypto/aes/arch/x86/immintrin.h:1.2 src/sys/crypto/ae

CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:29:56 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_sse2.h aes_sse2_impl.c aes_sse2_subr.c

Log Message:
Implement AES-CCM with SSE2.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_sse2.h
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/aes_sse2_impl.c
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_sse2.h
diff -u src/sys/crypto/aes/arch/x86/aes_sse2.h:1.3 src/sys/crypto/aes/arch/x86/aes_sse2.h:1.4
--- src/sys/crypto/aes/arch/x86/aes_sse2.h:1.3	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2.h	Sat Jul 25 22:29:56 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2.h,v 1.3 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2.h,v 1.4 2020/07/25 22:29:56 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -58,6 +58,12 @@ void aes_sse2_xts_enc(const struct aesen
 uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
 void aes_sse2_xts_dec(const struct aesdec *, const uint8_t[static 16],
 uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_cbcmac_update1(const struct aesenc *, const uint8_t[static 16],
+size_t, uint8_t[static 16], uint32_t);
+void aes_sse2_ccm_enc1(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t, uint8_t[static 32], uint32_t);
+void aes_sse2_ccm_dec1(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t, uint8_t[static 32], uint32_t);
 
 int aes_sse2_selftest(void);
 

Index: src/sys/crypto/aes/arch/x86/aes_sse2_impl.c
diff -u src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.4 src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.5
--- src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.4	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2_impl.c	Sat Jul 25 22:29:56 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2_impl.c,v 1.4 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2_impl.c,v 1.5 2020/07/25 22:29:56 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.4 2020/07/25 22:12:57 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.5 2020/07/25 22:29:56 riastradh Exp $");
 
 #include 
 #include 
@@ -143,6 +143,39 @@ aes_sse2_xts_dec_impl(const struct aesde
 	fpu_kern_leave();
 }
 
+static void
+aes_sse2_cbcmac_update1_impl(const struct aesenc *enc,
+const uint8_t in[static 16], size_t nbytes, uint8_t auth[static 16],
+uint32_t nrounds)
+{
+
+	fpu_kern_enter();
+	aes_sse2_cbcmac_update1(enc, in, nbytes, auth, nrounds);
+	fpu_kern_leave();
+}
+
+static void
+aes_sse2_ccm_enc1_impl(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+
+	fpu_kern_enter();
+	aes_sse2_ccm_enc1(enc, in, out, nbytes, authctr, nrounds);
+	fpu_kern_leave();
+}
+
+static void
+aes_sse2_ccm_dec1_impl(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+
+	fpu_kern_enter();
+	aes_sse2_ccm_dec1(enc, in, out, nbytes, authctr, nrounds);
+	fpu_kern_leave();
+}
+
 static int
 aes_sse2_probe(void)
 {
@@ -182,4 +215,7 @@ struct aes_impl aes_sse2_impl = {
 	.ai_cbc_dec = aes_sse2_cbc_dec_impl,
 	.ai_xts_enc = aes_sse2_xts_enc_impl,
 	.ai_xts_dec = aes_sse2_xts_dec_impl,
+	.ai_cbcmac_update1 = aes_sse2_cbcmac_update1_impl,
+	.ai_ccm_enc1 = aes_sse2_ccm_enc1_impl,
+	.ai_ccm_dec1 = aes_sse2_ccm_dec1_impl,
 };

Index: src/sys/crypto/aes/arch/x86/aes_sse2_subr.c
diff -u src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.2 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.3
--- src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.2	Tue Jun 30 20:32:11 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2_subr.c	Sat Jul 25 22:29:56 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -518,6 +518,180 @@ out:	/* Store the updated tweak.  */
 	explicit_memset(t, 0, sizeof t);
 }
 
+void
+aes_sse2_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
+size_t nbytes, uint8_t auth[static 16], uint32_t nrounds)
+{
+	uint64_t sk_exp[120];
+	__m128i q[4];
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	/* Expand r

CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:29:06 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_ni.c aes_ni.h aes_ni_64.S

Log Message:
Implement AES-CCM with x86 AES-NI.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_ni.c \
src/sys/crypto/aes/arch/x86/aes_ni_64.S
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_ni.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_ni.c
diff -u src/sys/crypto/aes/arch/x86/aes_ni.c:1.3 src/sys/crypto/aes/arch/x86/aes_ni.c:1.4
--- src/sys/crypto/aes/arch/x86/aes_ni.c:1.3	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/arch/x86/aes_ni.c	Sat Jul 25 22:29:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ni.c,v 1.3 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_ni.c,v 1.4 2020/07/25 22:29:06 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_ni.c,v 1.3 2020/07/25 22:12:57 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_ni.c,v 1.4 2020/07/25 22:29:06 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -204,6 +204,48 @@ aesni_xts_dec_impl(const struct aesdec *
 	fpu_kern_leave();
 }
 
+static void
+aesni_cbcmac_update1_impl(const struct aesenc *enc,
+const uint8_t in[static 16], size_t nbytes, uint8_t auth[static 16],
+uint32_t nrounds)
+{
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	fpu_kern_enter();
+	aesni_cbcmac_update1(enc, in, nbytes, auth, nrounds);
+	fpu_kern_leave();
+}
+
+static void
+aesni_ccm_enc1_impl(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	fpu_kern_enter();
+	aesni_ccm_enc1(enc, in, out, nbytes, authctr, nrounds);
+	fpu_kern_leave();
+}
+
+static void
+aesni_ccm_dec1_impl(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	fpu_kern_enter();
+	aesni_ccm_dec1(enc, in, out, nbytes, authctr, nrounds);
+	fpu_kern_leave();
+}
+
 static int
 aesni_xts_update_selftest(void)
 {
@@ -273,4 +315,7 @@ struct aes_impl aes_ni_impl = {
 	.ai_cbc_dec = aesni_cbc_dec_impl,
 	.ai_xts_enc = aesni_xts_enc_impl,
 	.ai_xts_dec = aesni_xts_dec_impl,
+	.ai_cbcmac_update1 = aesni_cbcmac_update1_impl,
+	.ai_ccm_enc1 = aesni_ccm_enc1_impl,
+	.ai_ccm_dec1 = aesni_ccm_dec1_impl,
 };
Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S
diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.3 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.4
--- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.3	Sat Jul 25 22:11:05 2020
+++ src/sys/crypto/aes/arch/x86/aes_ni_64.S	Sat Jul 25 22:29:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ni_64.S,v 1.3 2020/07/25 22:11:05 riastradh Exp $	*/
+/*	$NetBSD: aes_ni_64.S,v 1.4 2020/07/25 22:29:06 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -951,6 +951,142 @@ ENTRY(aesni_xts_update)
 END(aesni_xts_update)
 
 /*
+ * aesni_cbcmac_update1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
+ * size_t nbytes@rdx, uint8_t auth[16] @rcx, uint32_t nrounds@r8d)
+ *
+ *	Update CBC-MAC.
+ *
+ *	nbytes must be a positive integral multiple of 16.
+ *
+ *	Standard ABI calling convention.
+ */
+ENTRY(aesni_cbcmac_update1)
+	movdqu	(%rcx),%xmm0		/* xmm0 := auth */
+	mov	%rdx,%r10		/* r10 := nbytes */
+	mov	%rcx,%rdx		/* rdx := &auth */
+1:	pxor	(%rsi),%xmm0		/* xmm0 ^= plaintext block */
+	lea	0x10(%rsi),%rsi
+	mov	%r8d,%ecx		/* ecx := nrounds */
+	call	aesni_enc1		/* xmm0 := auth'; trash rax,rcx,xmm8 */
+	sub	$0x10,%r10
+	jnz	1b
+	movdqu	%xmm0,(%rdx)		/* store auth' */
+	ret
+END(aesni_cbcmac_update1)
+
+/*
+ * aesni_ccm_enc1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
+ * uint8_t *out@rdx, size_t nbytes@rcx,
+ * uint8_t authctr[32] @r8, uint32_t nrounds@r9d)
+ *
+ *	Update CCM encryption.
+ *
+ *	nbytes must be a positive integral multiple of 16.
+ *
+ *	Standard ABI calling convention.
+ */
+ENTRY(aesni_ccm_enc1)
+	mov	%rcx,%r10		/* r10 := nbytes */
+	movdqu	0x10(%r8),%xmm2		/* xmm2 := ctr (be) */
+	movdqa	bswap32(%rip),%xmm4	/* xmm4 := bswap32 table */
+	movdqa	ctr32_inc(%rip),%xmm5	/* xmm5 := (0,0,0,1) (le) */
+	movdqu	(%r8),%xmm0		/* xmm0 := auth */
+	pshufb	%xmm4,%xmm2		/* xmm2 := ctr (le) */
+1:	movdqu	(%rsi),%xmm3		/* xmm3 := plaintext block */
+	paddd	%xmm5,%xmm2		/* increment ctr (32-bit) */
+	lea	0x10(%rsi),%rsi
+	movdqa	%xmm2,%xmm1		/* xmm1 := ctr (le) */
+	mov	%r9d,%ecx		/* ecx := nrounds */
+	pshufb	%xmm4,%xmm1		/* xmm1 := ctr (be) */
+	pxor	%xmm3,%xmm0		/* xmm0 := auth ^ ptxt */
+	call	aesni_enc2		/* trash rax/rcx/xmm8 */
+	pxor	%xmm1,%xmm3		/* xmm3 := ciphertext block */
+	su

CVS commit: src/sys/crypto/aes

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:28:27 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_bear.c

Log Message:
Implement AES-CCM with BearSSL's bitsliced 32-bit aes_ct.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/aes_bear.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_bear.c
diff -u src/sys/crypto/aes/aes_bear.c:1.3 src/sys/crypto/aes/aes_bear.c:1.4
--- src/sys/crypto/aes/aes_bear.c:1.3	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/aes_bear.c	Sat Jul 25 22:28:27 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_bear.c,v 1.3 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_bear.c,v 1.4 2020/07/25 22:28:27 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_bear.c,v 1.3 2020/07/25 22:12:57 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_bear.c,v 1.4 2020/07/25 22:28:27 riastradh Exp $");
 
 #include 
 #include 
@@ -597,6 +597,220 @@ out:	/* Store the updated tweak.  */
 	explicit_memset(q, 0, sizeof q);
 }
 
+static void
+aesbear_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
+size_t nbytes, uint8_t auth[static 16], uint32_t nrounds)
+{
+	uint32_t sk_exp[120];
+	uint32_t q[8];
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	/* Expand round keys for bitslicing.  */
+	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
+
+	/* Initialize garbage block.  */
+	q[1] = q[3] = q[5] = q[7] = 0;
+
+	/* Load initial authenticator.  */
+	q[2*0] = le32dec(auth + 4*0);
+	q[2*1] = le32dec(auth + 4*1);
+	q[2*2] = le32dec(auth + 4*2);
+	q[2*3] = le32dec(auth + 4*3);
+
+	for (; nbytes; nbytes -= 16, in += 16) {
+		/* Combine input block.  */
+		q[2*0] ^= le32dec(in + 4*0);
+		q[2*1] ^= le32dec(in + 4*1);
+		q[2*2] ^= le32dec(in + 4*2);
+		q[2*3] ^= le32dec(in + 4*3);
+
+		/* Transform to bitslice, encrypt, transform from bitslice.  */
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+	}
+
+	/* Store updated authenticator.  */
+	le32enc(auth + 4*0, q[2*0]);
+	le32enc(auth + 4*1, q[2*1]);
+	le32enc(auth + 4*2, q[2*2]);
+	le32enc(auth + 4*3, q[2*3]);
+
+	/* Paranoia: Zero temporary buffers.  */
+	explicit_memset(sk_exp, 0, sizeof sk_exp);
+	explicit_memset(q, 0, sizeof q);
+}
+
+static void
+aesbear_ccm_enc1(const struct aesenc *enc, const uint8_t *in, uint8_t *out,
+size_t nbytes, uint8_t authctr[32], uint32_t nrounds)
+{
+	uint32_t sk_exp[120];
+	uint32_t q[8];
+	uint32_t c0, c1, c2, c3;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	/* Expand round keys for bitslicing.  */
+	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
+
+	/* Set first block to authenticator.  */
+	q[2*0] = le32dec(authctr + 4*0);
+	q[2*1] = le32dec(authctr + 4*1);
+	q[2*2] = le32dec(authctr + 4*2);
+	q[2*3] = le32dec(authctr + 4*3);
+
+	/* Load initial counter block, big-endian so we can increment it.  */
+	c0 = le32dec(authctr + 16 + 4*0);
+	c1 = le32dec(authctr + 16 + 4*1);
+	c2 = le32dec(authctr + 16 + 4*2);
+	c3 = be32dec(authctr + 16 + 4*3);
+
+	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
+		/* Update authenticator.  */
+		q[2*0] ^= le32dec(in + 4*0);
+		q[2*1] ^= le32dec(in + 4*1);
+		q[2*2] ^= le32dec(in + 4*2);
+		q[2*3] ^= le32dec(in + 4*3);
+
+		/* Increment 32-bit counter.  */
+		q[2*0 + 1] = c0;
+		q[2*1 + 1] = c1;
+		q[2*2 + 1] = c2;
+		q[2*3 + 1] = bswap32(++c3);
+
+		/* Encrypt authenticator and counter.  */
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		/* Encrypt with CTR output.  */
+		le32enc(out + 4*0, le32dec(in + 4*0) ^ q[2*0 + 1]);
+		le32enc(out + 4*1, le32dec(in + 4*1) ^ q[2*1 + 1]);
+		le32enc(out + 4*2, le32dec(in + 4*2) ^ q[2*2 + 1]);
+		le32enc(out + 4*3, le32dec(in + 4*3) ^ q[2*3 + 1]);
+	}
+
+	/* Update authenticator.  */
+	le32enc(authctr + 4*0, q[2*0]);
+	le32enc(authctr + 4*1, q[2*1]);
+	le32enc(authctr + 4*2, q[2*2]);
+	le32enc(authctr + 4*3, q[2*3]);
+
+	/* Update counter.  */
+	be32enc(authctr + 16 + 4*3, c3);
+
+	/* Paranoia: Zero temporary buffers.  */
+	explicit_memset(sk_exp, 0, sizeof sk_exp);
+	explicit_memset(q, 0, sizeof q);
+}
+
+static void
+aesbear_ccm_dec1(const struct aesenc *enc, const uint8_t *in, uint8_t *out,
+size_t nbytes, uint8_t authctr[32], uint32_t nrounds)
+{
+	uint32_t sk_exp[120];
+	uint32_t q[8];
+	uint32_t c0, c1, c2, c3;
+	uint32_t b0, b1, b2, b3;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	/* Expand round keys for bitslicing.  */
+	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
+
+	/* Load initial counter block, big-endian so we can increment it.  */
+	c0 = le32dec(authctr + 16 + 4*0);
+	c1 = le32dec(authctr + 16 + 4*1);
+	c2 = le32dec(authctr + 16 + 4*2);
+	c3 = be32dec(authctr + 16 + 4*3);
+

CVS commit: src/sys/crypto/aes

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:27:53 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_ccm.c aes_impl.c aes_impl.h aes_selftest.c

Log Message:
Push CBC-MAC and CCM block updates into the aes_impl API.

This should help reduce the setup and teardown overhead (enabling and
disabling fpu, or expanding bitsliced keys) for CCM, as used in
802.11 WPA2 CCMP.  But all the fiddly formatting details remain in
aes_ccm.c to reduce the effort of implementing it -- at the cost of a
handful additional setups and teardowns per message.

Not yet implemented by any of the aes_impls, so leave a fallback that
just calls aes_enc for now.  This should be removed when all of the
aes_impls provide CBC-MAC and CCM block updates.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/aes_ccm.c \
src/sys/crypto/aes/aes_impl.h
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/aes_impl.c
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/aes_selftest.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_ccm.c
diff -u src/sys/crypto/aes/aes_ccm.c:1.1 src/sys/crypto/aes/aes_ccm.c:1.2
--- src/sys/crypto/aes/aes_ccm.c:1.1	Sat Jul 25 22:15:55 2020
+++ src/sys/crypto/aes/aes_ccm.c	Sat Jul 25 22:27:53 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ccm.c,v 1.1 2020/07/25 22:15:55 riastradh Exp $	*/
+/*	$NetBSD: aes_ccm.c,v 1.2 2020/07/25 22:27:53 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.1 2020/07/25 22:15:55 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.2 2020/07/25 22:27:53 riastradh Exp $");
 
 #include 
 #include 
@@ -45,6 +45,7 @@ __KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 
 
 #include 
 #include 
+#include 
 
 static inline void
 xor(uint8_t *x, const uint8_t *a, const uint8_t *b, size_t n)
@@ -54,13 +55,6 @@ xor(uint8_t *x, const uint8_t *a, const 
 		*x++ = *a++ ^ *b++;
 }
 
-static inline void
-xor16(uint8_t *x, const uint8_t *a, const uint8_t *b)
-{
-
-	xor(x, a, b, 16);
-}
-
 /* RFC 3610, §2.2 Authentication */
 #define	CCM_AFLAGS_ADATA	__BIT(6)
 #define	CCM_AFLAGS_M		__BITS(5,3)
@@ -157,9 +151,10 @@ aes_ccm_init(struct aes_ccm *C, unsigned
 		aes_enc(enc, C->auth, C->auth, C->nr);
 
 		/* If there was anything more, process 16 bytes at a time.  */
-		for (; adlen >= 16; adp += 16, adlen -= 16) {
-			xor16(C->auth, C->auth, adp);
-			aes_enc(enc, C->auth, C->auth, C->nr);
+		if (adlen - (adlen % 16)) {
+			aes_cbcmac_update1(enc, adp, adlen - (adlen % 16),
+			C->auth, C->nr);
+			adlen %= 16;
 		}
 
 		/*
@@ -217,15 +212,12 @@ aes_ccm_enc(struct aes_ccm *C, const voi
 	}
 
 	/* Process 16 bytes at a time.  */
-	for (; nbytes >= 16; p += 16, q += 16, nbytes -= 16) {
-		/* authenticate */
-		xor16(C->auth, C->auth, p);
-		aes_enc(C->enc, C->auth, C->auth, C->nr);
-
-		/* encrypt */
-		aes_ccm_inc(C);
-		aes_enc(C->enc, C->in, C->out, C->nr);
-		xor16(q, C->out, p);
+	if (nbytes - (nbytes % 16)) {
+		aes_ccm_enc1(C->enc, p, q, nbytes - (nbytes % 16), C->auth,
+		C->nr);
+		p += nbytes - (nbytes % 16);
+		q += nbytes - (nbytes % 16);
+		nbytes %= 16;
 	}
 
 	/* Incorporate any <16-byte unit as a partial block.  */
@@ -278,15 +270,12 @@ aes_ccm_dec(struct aes_ccm *C, const voi
 	}
 
 	/* Process 16 bytes at a time.  */
-	for (; nbytes >= 16; p += 16, q += 16, nbytes -= 16) {
-		/* decrypt */
-		aes_ccm_inc(C);
-		aes_enc(C->enc, C->in, C->out, C->nr);
-		xor16(q, C->out, p);
-
-		/* authenticate */
-		xor16(C->auth, C->auth, q);
-		aes_enc(C->enc, C->auth, C->auth, C->nr);
+	if (nbytes - (nbytes % 16)) {
+		aes_ccm_dec1(C->enc, p, q, nbytes - (nbytes % 16), C->auth,
+		C->nr);
+		p += nbytes - (nbytes % 16);
+		q += nbytes - (nbytes % 16);
+		nbytes %= 16;
 	}
 
 	/* Incorporate any <16-byte unit as a partial block.  */
Index: src/sys/crypto/aes/aes_impl.h
diff -u src/sys/crypto/aes/aes_impl.h:1.1 src/sys/crypto/aes/aes_impl.h:1.2
--- src/sys/crypto/aes/aes_impl.h:1.1	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/aes_impl.h	Sat Jul 25 22:27:53 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_impl.h,v 1.1 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_impl.h,v 1.2 2020/07/25 22:27:53 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -51,10 +51,27 @@ struct aes_impl {
 		uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
 	void	(*ai_xts_dec)(const struct aesdec *, const uint8_t[static 16],
 		uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
+	void	(*ai_cbcmac_update1)(const struct aesenc *,
+		const uint8_t[static 16], size_t, uint8_t[static 16],
+		uint32_t);
+	void	(*ai_ccm_enc1)(const struct aesenc *,
+		const uint8_t[static 16], uint8_t[static 16],
+		size_t, uint8_t[static 32], uint32_t);
+	void	(*ai_ccm_dec1)(const struct aesenc *,
+		const uint8_t[stati

CVS commit: src/sys/crypto/aes

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:15:55 UTC 2020

Modified Files:
src/sys/crypto/aes: files.aes
Added Files:
src/sys/crypto/aes: aes_ccm.c aes_ccm.h aes_ccm_mbuf.c aes_ccm_mbuf.h

Log Message:
New aes_ccm API.

Intended for use in net80211 for WPA2 CCMP.


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 src/sys/crypto/aes/aes_ccm.c \
src/sys/crypto/aes/aes_ccm.h src/sys/crypto/aes/aes_ccm_mbuf.c \
src/sys/crypto/aes/aes_ccm_mbuf.h
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/files.aes

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/files.aes
diff -u src/sys/crypto/aes/files.aes:1.1 src/sys/crypto/aes/files.aes:1.2
--- src/sys/crypto/aes/files.aes:1.1	Mon Jun 29 23:27:52 2020
+++ src/sys/crypto/aes/files.aes	Sat Jul 25 22:15:55 2020
@@ -1,9 +1,11 @@
-#	$NetBSD: files.aes,v 1.1 2020/06/29 23:27:52 riastradh Exp $
+#	$NetBSD: files.aes,v 1.2 2020/07/25 22:15:55 riastradh Exp $
 
 define	aes
 define	rijndael: aes	# legacy Rijndael API
 
 file	crypto/aes/aes_bear.c			aes
+file	crypto/aes/aes_ccm.c			aes
+file	crypto/aes/aes_ccm_mbuf.c		aes
 file	crypto/aes/aes_ct.c			aes
 file	crypto/aes/aes_ct_dec.c			aes
 file	crypto/aes/aes_ct_enc.c			aes

Added files:

Index: src/sys/crypto/aes/aes_ccm.c
diff -u /dev/null src/sys/crypto/aes/aes_ccm.c:1.1
--- /dev/null	Sat Jul 25 22:15:55 2020
+++ src/sys/crypto/aes/aes_ccm.c	Sat Jul 25 22:15:55 2020
@@ -0,0 +1,619 @@
+/*	$NetBSD: aes_ccm.c,v 1.1 2020/07/25 22:15:55 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * AES-CCM, as defined in:
+ *
+ *	D. Whiting, R. Housley, and N. Ferguson, `Counter with CBC-MAC
+ *	(CCM)', IETF RFC 3610, September 2003.
+ *	https://tools.ietf.org/html/rfc3610
+ */
+
+#include 
+__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.1 2020/07/25 22:15:55 riastradh Exp $");
+
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+
+static inline void
+xor(uint8_t *x, const uint8_t *a, const uint8_t *b, size_t n)
+{
+
+	while (n --> 0)
+		*x++ = *a++ ^ *b++;
+}
+
+static inline void
+xor16(uint8_t *x, const uint8_t *a, const uint8_t *b)
+{
+
+	xor(x, a, b, 16);
+}
+
+/* RFC 3610, §2.2 Authentication */
+#define	CCM_AFLAGS_ADATA	__BIT(6)
+#define	CCM_AFLAGS_M		__BITS(5,3)
+#define	CCM_AFLAGS_L		__BITS(2,0)
+
+/* RFC 3610, §2.3 Encryption */
+#define	CCM_EFLAGS_L		__BITS(2,0)
+
+static void
+aes_ccm_inc(struct aes_ccm *C)
+{
+
+	KASSERT(C->L == 2);
+	if (++C->in[15] == 0 && ++C->in[14] == 0)
+		panic("AES-CCM overflow");
+}
+
+static void
+aes_ccm_zero_ctr(struct aes_ccm *C)
+{
+
+	KASSERT(C->L == 2);
+	C->in[14] = C->in[15] = 0;
+}
+
+void
+aes_ccm_init(struct aes_ccm *C, unsigned nr, const struct aesenc *enc,
+unsigned L, unsigned M,
+const uint8_t *nonce, unsigned noncelen, const void *ad, size_t adlen,
+size_t mlen)
+{
+	const uint8_t *adp = ad;
+	unsigned i;
+
+	KASSERT(L == 2);
+	KASSERT(M % 2 == 0);
+	KASSERT(M >= 4);
+	KASSERT(M <= 16);
+	KASSERT(noncelen == 15 - L);
+
+	C->enc = enc;
+	C->nr = nr;
+	C->L = L;
+	C->M = M;
+	C->mlen = C->mleft = mlen;
+
+	/* Encode B0, the initial authenticated data block.  */
+	C->auth[0] = __SHIFTIN(adlen == 0 ? 0 : 1, CCM_AFLAGS_ADATA);
+	C->auth[0] |= __SHIFTIN((M - 2)/2, CCM_AFLAGS_M);
+	C->auth[0] |= __SHIFTIN(L - 1, CCM_AFLAGS_L);
+	memcpy(C->auth + 1, nonce, noncelen);
+	for (i = 0; i < L; i++, mlen >>= 8) {
+		KASSERT(i < 16 - 1 - noncelen);
+		C->auth[16 - i - 1] = mlen & 0xff;
+	}
+	aes_enc(enc, C->auth, C->auth, C->nr);
+
+	/* Process

CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:11:05 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_ni_64.S

Log Message:
Invert some loops to save a jmp instruction on each iteration.

No semantic change intended.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_ni_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S
diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.2 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.3
--- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.2	Tue Jun 30 21:41:04 2020
+++ src/sys/crypto/aes/arch/x86/aes_ni_64.S	Sat Jul 25 22:11:05 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ni_64.S,v 1.2 2020/06/30 21:41:04 riastradh Exp $	*/
+/*	$NetBSD: aes_ni_64.S,v 1.3 2020/07/25 22:11:05 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -522,14 +522,14 @@ ENTRY(aesni_enctodec)
 	shl	$4,%edx		/* rdx := byte offset of last round key */
 	movdqa	(%rdi,%rdx),%xmm0	/* load last round key */
 	movdqa	%xmm0,(%rsi)	/* store last round key verbatim */
-1:	sub	$0x10,%rdx	/* advance to next round key */
-	lea	0x10(%rsi),%rsi
-	jz	2f		/* stop if this is the last one */
-	movdqa	(%rdi,%rdx),%xmm0	/* load round key */
+	jmp	2f
+1:	movdqa	(%rdi,%rdx),%xmm0	/* load round key */
 	aesimc	%xmm0,%xmm0	/* convert encryption to decryption */
 	movdqa	%xmm0,(%rsi)	/* store round key */
-	jmp	1b
-2:	movdqa	(%rdi),%xmm0	/* load first round key */
+2:	sub	$0x10,%rdx	/* advance to next round key */
+	lea	0x10(%rsi),%rsi
+	jnz	1b		/* repeat if more rounds */
+	movdqa	(%rdi),%xmm0	/* load first round key */
 	movdqa	%xmm0,(%rsi)	/* store first round key verbatim */
 	ret
 END(aesni_enctodec)
@@ -614,16 +614,16 @@ ENTRY(aesni_cbc_dec1)
 	mov	%rcx,%r10		/* r10 := nbytes */
 	movdqu	-0x10(%rsi,%r10),%xmm0	/* xmm0 := last ciphertext block */
 	movdqu	%xmm0,(%r8)		/* update iv */
-1:	mov	%r9d,%ecx		/* ecx := nrounds */
-	call	aesni_dec1		/* xmm0 := cv ^ ptxt */
-	sub	$0x10,%r10
-	jz	2f			/* first block if r10 is now zero */
-	movdqu	-0x10(%rsi,%r10),%xmm8	/* xmm8 := chaining value */
+	jmp	2f
+1:	movdqu	-0x10(%rsi,%r10),%xmm8	/* xmm8 := chaining value */
 	pxor	%xmm8,%xmm0		/* xmm0 := ptxt */
 	movdqu	%xmm0,(%rdx,%r10)	/* store plaintext block */
 	movdqa	%xmm8,%xmm0		/* move cv = ciphertext block */
-	jmp	1b
-2:	pxor	(%rsp),%xmm0		/* xmm0 := ptxt */
+2:	mov	%r9d,%ecx		/* ecx := nrounds */
+	call	aesni_dec1		/* xmm0 := cv ^ ptxt */
+	sub	$0x10,%r10
+	jnz	1b			/* repeat if more blocks */
+	pxor	(%rsp),%xmm0		/* xmm0 := ptxt */
 	movdqu	%xmm0,(%rdx)		/* store first plaintext block */
 	leave
 	ret
@@ -649,7 +649,11 @@ ENTRY(aesni_cbc_dec8)
 	mov	%rcx,%r10		/* r10 := nbytes */
 	movdqu	-0x10(%rsi,%r10),%xmm7	/* xmm7 := ciphertext block[n-1] */
 	movdqu	%xmm7,(%r8)		/* update iv */
-1:	movdqu	-0x20(%rsi,%r10),%xmm6	/* xmm6 := ciphertext block[n-2] */
+	jmp	2f
+1:	movdqu	-0x10(%rsi,%r10),%xmm7	/* xmm7 := cv[0] */
+	pxor	%xmm7,%xmm0		/* xmm0 := ptxt[0] */
+	movdqu	%xmm0,(%rdx,%r10)	/* store plaintext block */
+2:	movdqu	-0x20(%rsi,%r10),%xmm6	/* xmm6 := ciphertext block[n-2] */
 	movdqu	-0x30(%rsi,%r10),%xmm5	/* xmm5 := ciphertext block[n-3] */
 	movdqu	-0x40(%rsi,%r10),%xmm4	/* xmm4 := ciphertext block[n-4] */
 	movdqu	-0x50(%rsi,%r10),%xmm3	/* xmm3 := ciphertext block[n-5] */
@@ -680,12 +684,8 @@ ENTRY(aesni_cbc_dec8)
 	movdqu	%xmm2,-0x60(%rdx,%r10)
 	movdqu	%xmm1,-0x70(%rdx,%r10)
 	sub	$0x80,%r10
-	jz	2f			/* first block if r10 is now zero */
-	movdqu	-0x10(%rsi,%r10),%xmm7	/* xmm7 := cv[0] */
-	pxor	%xmm7,%xmm0		/* xmm0 := ptxt[0] */
-	movdqu	%xmm0,(%rdx,%r10)	/* store plaintext block */
-	jmp	1b
-2:	pxor	(%rsp),%xmm0		/* xmm0 := ptxt[0] */
+	jnz	1b			/* repeat if more blocks */
+	pxor	(%rsp),%xmm0		/* xmm0 := ptxt[0] */
 	movdqu	%xmm0,(%rdx)		/* store first plaintext block */
 	leave
 	ret
@@ -966,12 +966,12 @@ aesni_enc1:
 	shl	$4,%ecx		/* ecx := total byte size of round keys */
 	lea	0x10(%rdi,%rcx),%rax	/* rax := end of round key array */
 	neg	%rcx		/* rcx := byte offset of round key from end */
-1:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
+	jmp	2f
+1:	aesenc	%xmm8,%xmm0
+2:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
 	add	$0x10,%rcx
-	jz	2f		/* stop if this is the last one */
-	aesenc	%xmm8,%xmm0
-	jmp	1b
-2:	aesenclast %xmm8,%xmm0
+	jnz	1b		/* repeat if more rounds */
+	aesenclast %xmm8,%xmm0
 	ret
 END(aesni_enc1)
 
@@ -999,10 +999,8 @@ aesni_enc8:
 	shl	$4,%ecx		/* ecx := total byte size of round keys */
 	lea	0x10(%rdi,%rcx),%rax	/* rax := end of round key array */
 	neg	%rcx		/* rcx := byte offset of round key from end */
-1:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
-	add	$0x10,%rcx
-	jz	2f		/* stop if this is the last one */
-	aesenc	%xmm8,%xmm0
+	jmp	2f
+1:	aesenc	%xmm8,%xmm0
 	aesenc	%xmm8,%xmm1
 	aesenc	%xmm8,%xmm2
 	aesenc	%xmm8,%xmm3
@@ -1010,8 +1008,10 @@ aesni_en

CVS commit: src/sys/crypto/aes/arch/arm

2020-07-23 Thread Ryo Shimizu
Module Name:src
Committed By:   ryo
Date:   Thu Jul 23 11:33:01 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon.h

Log Message:
fix build with llvm/clang.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/arm_neon.h
diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.2 src/sys/crypto/aes/arch/arm/arm_neon.h:1.3
--- src/sys/crypto/aes/arch/arm/arm_neon.h:1.2	Tue Jun 30 21:24:00 2020
+++ src/sys/crypto/aes/arch/arm/arm_neon.h	Thu Jul 23 11:33:01 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.2 2020/06/30 21:24:00 riastradh Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.3 2020/07/23 11:33:01 ryo Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@ typedef struct { uint8x8_t val[2]; } uin
 #elif defined(__clang__)
 
 #define	_INTRINSATTR			  \
-	__attribute__((__always_inline__, __nodebug))
+	__attribute__((__always_inline__, __nodebug__))
 
 typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
 typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;



CVS commit: src/sys/crypto/aes/arch/arm

2020-07-21 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Wed Jul 22 06:15:21 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
Fix register name in comment.

Some time ago I reallocated the registers to avoid inadvertently
clobbering the callee-saves v9, but neglected to update the comment.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.5 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.6
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.5	Sun Jul 19 07:32:43 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Wed Jul 22 06:15:21 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.5 2020/07/19 07:32:43 ryo Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.6 2020/07/22 06:15:21 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -827,7 +827,7 @@ aesarmv8_xts_mulx:
 	 * carried into x^128 = x^7 + x^2 + x + 1.
 	 */
 	adrl	x0, xtscarry
-	cmlt	v1.2d, v31.2d, #0 /* v1.2d[i] := -1 if v9.2d[i] < 0, else 0 */
+	cmlt	v1.2d, v31.2d, #0 /* v1.2d[i] := -1 if v31.2d[i] < 0, else 0 */
 	ldr	q0, [x0]		/* q0 := xtscarry */
 	ext	v1.16b, v1.16b, v1.16b, #8 /* swap halves of q1 */
 	shl	v31.2d, v31.2d, #1	/* shift */



CVS commit: src/sys/crypto/aes/arch/arm

2020-07-19 Thread Ryo Shimizu
Module Name:src
Committed By:   ryo
Date:   Sun Jul 19 07:32:43 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
fix build with clang/llvm.

clang aarch64 assembler doesn't accept optional number of lanes of vector 
register.
(but ARMARM says that an assembler must accept it)


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.4 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.5
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.4	Tue Jun 30 23:06:02 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Sun Jul 19 07:32:43 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.4 2020/06/30 23:06:02 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.5 2020/07/19 07:32:43 ryo Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -238,8 +238,8 @@ ENTRY(aesarmv8_setenckey192)
 	 */
 
 	/* v1.4s := (nrk[0], nrk[1], nrk[1], nrk[1]) */
-	dup	v1.4s, v5.4s[3]
-	mov	v1.4s[0], v5.4s[2]
+	dup	v1.4s, v5.s[3]
+	mov	v1.s[0], v5.s[2]
 
 	/*
 	 * v6.4s := (0, 0, rklo[0], rklo[1])
@@ -257,7 +257,7 @@ ENTRY(aesarmv8_setenckey192)
 	 * and v5.4s = (rk[2], rk[3], xxx, xxx).  Set
 	 * v2.4s := (rk[0], rk[1], rk[2], rk[3])
 	 */
-	mov	v2.2d[1], v5.2d[0]
+	mov	v2.d[1], v5.d[0]
 
 	/* store two round keys */
 	stp	q2, q3, [x0], #0x20
@@ -325,7 +325,7 @@ ENTRY(aesarmv8_setenckey192)
 	ext	v5.16b, v0.16b, v4.16b, #12
 
 	/* v2.4s := (nnrk[3], nnrk[3], xxx, xxx) */
-	dup	v2.4s, v1.4s[3]
+	dup	v2.4s, v1.s[3]
 
 	/*
 	 * v2.4s := (nnnrklo[0] = nnrk[3] ^ nrk[2],



CVS commit: src/sys/crypto/aes/arch/arm

2020-06-30 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jun 30 23:06:02 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
Reallocate registers to avoid abusing callee-saves registers, v8-v15.

Forgot to consult the AAPCS before committing this before -- oops!

While here, take advantage of the 32 aarch64 simd registers to avoid
all stack spills.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.3 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.4
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.3	Tue Jun 30 21:53:39 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Tue Jun 30 23:06:02 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.3 2020/06/30 21:53:39 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.4 2020/06/30 23:06:02 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -116,7 +116,7 @@ ENTRY(aesarmv8_setenckey128)
 
 	adrl	x4, unshiftrows_rotword_3
 	eor	v0.16b, v0.16b, v0.16b	/* q0 := 0 */
-	ldr	q8, [x4]	/* q8 := unshiftrows_rotword_3 table */
+	ldr	q16, [x4]	/* q16 := unshiftrows_rotword_3 table */
 
 	str	q1, [x0], #0x10	/* store master key as first round key */
 	mov	x2, #10		/* round count */
@@ -136,7 +136,7 @@ ENTRY(aesarmv8_setenckey128)
 
 	/* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */
 	ld1r	{v4.4s}, [x3], #4
-	tbl	v3.16b, {v3.16b}, v8.16b
+	tbl	v3.16b, {v3.16b}, v16.16b
 	eor	v3.16b, v3.16b, v4.16b
 
 	/*
@@ -175,8 +175,8 @@ ENTRY(aesarmv8_setenckey192)
 	adrl	x4, unshiftrows_rotword_1
 	adrl	x5, unshiftrows_rotword_3
 	eor	v0.16b, v0.16b, v0.16b	/* q0 := 0 */
-	ldr	q8, [x4]	/* q8 := unshiftrows_rotword_1 */
-	ldr	q9, [x5]	/* q9 := unshiftrows_rotword_3 */
+	ldr	q16, [x4]	/* q16 := unshiftrows_rotword_1 */
+	ldr	q17, [x5]	/* q17 := unshiftrows_rotword_3 */
 
 	str	q1, [x0], #0x10	/* store master key[0:128) as round key */
 	mov	x2, #12		/* round count */
@@ -197,7 +197,7 @@ ENTRY(aesarmv8_setenckey192)
 
 	/* v3.4s[i] := RotWords(SubBytes(rklo[1])) ^ RCON */
 	ld1r	{v4.4s}, [x3], #4
-	tbl	v3.16b, {v3.16b}, v8.16b
+	tbl	v3.16b, {v3.16b}, v16.16b
 	eor	v3.16b, v3.16b, v4.16b
 
 	/*
@@ -269,8 +269,8 @@ ENTRY(aesarmv8_setenckey192)
 	 *	q2 = rk
 	 *	q3 = nrk
 	 *	v5.4s = (rk[2], rk[3], nrk[0], nrk[1])
-	 *	q8 = unshiftrows_rotword_1
-	 *	q9 = unshiftrows_rotword_3
+	 *	q16 = unshiftrows_rotword_1
+	 *	q17 = unshiftrows_rotword_3
 	 *
 	 * We have to compute, in q1:
 	 *
@@ -294,7 +294,7 @@ ENTRY(aesarmv8_setenckey192)
 
 	/* v1.4s[i] := RotWords(SubBytes(nrk[3])) ^ RCON' */
 	ld1r	{v4.4s}, [x3], #4
-	tbl	v1.16b, {v1.16b}, v9.16b
+	tbl	v1.16b, {v1.16b}, v17.16b
 	eor	v1.16b, v1.16b, v4.16b
 
 	/*
@@ -354,8 +354,8 @@ ENTRY(aesarmv8_setenckey256)
 	adrl	x4, unshiftrows_rotword_3
 	adrl	x5, unshiftrows_3
 	eor	v0.16b, v0.16b, v0.16b	/* q0 := 0 */
-	ldr	q8, [x4]	/* q8 := unshiftrows_rotword_3 */
-	ldr	q9, [x5]	/* q9 := unshiftrows_3 */
+	ldr	q16, [x4]	/* q16 := unshiftrows_rotword_3 */
+	ldr	q17, [x5]	/* q17 := unshiftrows_3 */
 
 	/* store master key as first two round keys */
 	stp	q1, q2, [x0], #0x20
@@ -376,7 +376,7 @@ ENTRY(aesarmv8_setenckey256)
 
 	/* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */
 	ld1r	{v4.4s}, [x3], #4
-	tbl	v3.16b, {v3.16b}, v8.16b
+	tbl	v3.16b, {v3.16b}, v16.16b
 	eor	v3.16b, v3.16b, v4.16b
 
 	/*
@@ -402,7 +402,7 @@ ENTRY(aesarmv8_setenckey256)
 	aese	v3.16b, v0.16b
 
 	/* v3.4s[i] := SubBytes(rk[3]) */
-	tbl	v3.16b, {v3.16b}, v9.16b
+	tbl	v3.16b, {v3.16b}, v17.16b
 
 	/*
 	 * v5.4s := (0,prk[0],prk[1],prk[2])
@@ -458,9 +458,9 @@ END(aesarmv8_enctodec)
 ENTRY(aesarmv8_enc)
 	stp	fp, lr, [sp, #-16]!	/* push stack frame */
 	mov	fp, sp
-	ldr	q0, [x1]	/* q0 := block */
-	bl	aesarmv8_enc1
-	str	q0, [x2]	/* store block */
+	ldr	q0, [x1]	/* q0 := ptxt */
+	bl	aesarmv8_enc1	/* q0 := ctxt; trash x0/x3/q16 */
+	str	q0, [x2]	/* store ctxt */
 	ldp	fp, lr, [sp], #16	/* pop stack frame */
 	ret
 END(aesarmv8_enc)
@@ -476,9 +476,9 @@ END(aesarmv8_enc)
 ENTRY(aesarmv8_dec)
 	stp	fp, lr, [sp, #-16]!	/* push stack frame */
 	mov	fp, sp
-	ldr	q0, [x1]	/* q0 := block */
-	bl	aesarmv8_dec1
-	str	q0, [x2]	/* store block */
+	ldr	q0, [x1]	/* q0 := ctxt */
+	bl	aesarmv8_dec1	/* q0 := ptxt; trash x0/x3/q16 */
+	str	q0, [x2]	/* store ptxt */
 	ldp	fp, lr, [sp], #16	/* pop stack frame */
 	ret
 END(aesarmv8_dec)
@@ -505,7 +505,7 @@ ENTRY(aesarmv8_cbc_enc)
 	eor	v0.16b, v0.16b, v1.16b	/* q0 := cv ^ ptxt */
 	mov	x0, x9			/* x0 := enckey */
 	mov	x3, x5			/* x3 := nrounds */
-	bl	aesarmv8_enc1		/* q0 := ciphertext block */
+	bl	aesarmv8_enc1		/* q0 := ctxt; trash x0/x3/q16 */
 	subs	x10, x10, #0x10		/* count down nbytes */
 	str	q0, [x2], #0x10		/* store ciphertext block */
 	b.ne	1b			/* repeat if x10 is nonzero */
@@ -52

CVS commit: src/sys/crypto/aes/arch/arm

2020-06-30 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jun 30 21:53:39 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
Use `.arch_extension aes' for aese/aesmc/aesd/aesimc.

Unlike `.arch_extension crypto', this works with clang; both work
with gas, so we'll go with this.

Clang still can't handle aes_armv8_64.S yet -- it gets confused by
dup and mov on lanes, but this makes progress.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.2 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.3
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.2	Tue Jun 30 21:41:03 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Tue Jun 30 21:53:39 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.2 2020/06/30 21:41:03 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.3 2020/06/30 21:53:39 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-	.arch_extension	crypto
+	.arch_extension	aes
 
 /*
  * uint32_t rcon[10]



CVS commit: src/sys/crypto/aes/arch

2020-06-30 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jun 30 21:41:04 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S
src/sys/crypto/aes/arch/x86: aes_ni_64.S

Log Message:
Use .p2align rather than .align.

Apparently on arm, .align is actually an alias for .p2align, taking a
power of two rather than a number of bytes, so aes_armv8_64.o was
bloated to 32KB with obscene alignment when it only needed to be
barely past 4KB.

Do the same for the x86 aes_ni_64.S -- even though .align takes a
number of bytes rather than a power of two on x86, let's just stay
away from the temptations of the evil .align directive.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_armv8_64.S
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/x86/aes_ni_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.1 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.2
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.1	Mon Jun 29 23:31:41 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Tue Jun 30 21:41:03 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.1 2020/06/29 23:31:41 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.2 2020/06/30 21:41:03 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -41,7 +41,7 @@
  *	secret.
  */
 	.section .rodata
-	.align	4
+	.p2align 2
 	.type	rcon,@object
 rcon:
 	.long	0x01
@@ -63,7 +63,7 @@ END(rcon)
  *	RotWord on word 1, and then copy it into all the other words.
  */
 	.section .rodata
-	.align	16
+	.p2align 4
 	.type	unshiftrows_rotword_1,@object
 unshiftrows_rotword_1:
 	.byte	0x01,0x0e,0x0b,0x04
@@ -79,7 +79,7 @@ END(unshiftrows_rotword_1)
  *	3 into all the other words.
  */
 	.section .rodata
-	.align	16
+	.p2align 4
 	.type	unshiftrows_3,@object
 unshiftrows_3:
 	.byte	0x0c,0x09,0x06,0x03
@@ -95,7 +95,7 @@ END(unshiftrows_3)
  *	RotWord on word 3, and then copy it into all the other words.
  */
 	.section .rodata
-	.align	16
+	.p2align 4
 	.type	unshiftrows_rotword_3,@object
 unshiftrows_rotword_3:
 	.byte	0x09,0x06,0x03,0x0c
@@ -846,7 +846,7 @@ aesarmv8_xts_mulx:
 END(aesarmv8_xts_mulx)
 
 	.section .rodata
-	.align	16
+	.p2align 4
 	.type	xtscarry,@object
 xtscarry:
 	.byte	0x87,0,0,0, 0,0,0,0,  1,0,0,0, 0,0,0,0

Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S
diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.1 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.2
--- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.1	Mon Jun 29 23:29:40 2020
+++ src/sys/crypto/aes/arch/x86/aes_ni_64.S	Tue Jun 30 21:41:04 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ni_64.S,v 1.1 2020/06/29 23:29:40 riastradh Exp $	*/
+/*	$NetBSD: aes_ni_64.S,v 1.2 2020/06/30 21:41:04 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -930,7 +930,7 @@ aesni_xts_mulx:
 END(aesni_xts_mulx)
 
 	.section .rodata
-	.align 16
+	.p2align 4
 	.type	xtscarry,@object
 xtscarry:
 	.byte	0x87,0,0,0, 0,0,0,0,  1,0,0,0, 0,0,0,0



CVS commit: src/sys/crypto/aes/arch/arm

2020-06-30 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jun 30 21:24:00 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon.h

Log Message:
Tweak clang neon intrinsics so they build.

(this file is still a kludge)


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/arm_neon.h
diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.1 src/sys/crypto/aes/arch/arm/arm_neon.h:1.2
--- src/sys/crypto/aes/arch/arm/arm_neon.h:1.1	Mon Jun 29 23:56:31 2020
+++ src/sys/crypto/aes/arch/arm/arm_neon.h	Tue Jun 30 21:24:00 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.1 2020/06/29 23:56:31 riastradh Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.2 2020/06/30 21:24:00 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -73,6 +73,8 @@ typedef __attribute__((neon_vector_type(
 typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
 typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
 typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
+
+typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
 typedef struct { uint8x8_t val[2]; } uint8x8x2_t;
 
 #ifdef __LITTLE_ENDIAN__
@@ -118,11 +120,11 @@ vdupq_n_u8(uint8_t __x)
 	};
 }
 
+#if defined(__GNUC__) && !defined(__clang__)
 _INTRINSATTR
 static __inline uint32x4_t
 vextq_u32(uint32x4_t __lo, uint32x4_t __hi, uint8_t __i)
 {
-#if defined(__GNUC__) && !defined(__clang__)
 #if defined(__AARCH64EB__) || defined(__ARM_BIG_ENDIAN)
 	return __builtin_shuffle(__hi, __lo,
 	(uint32x4_t) { 4 - __i, 5 - __i, 6 - __i, 7 - __i });
@@ -130,25 +132,31 @@ vextq_u32(uint32x4_t __lo, uint32x4_t __
 	return __builtin_shuffle(__lo, __hi,
 	(uint32x4_t) { __i + 0, __i + 1, __i + 2, __i + 3 });
 #endif
+}
 #elif defined(__clang__)
 #ifdef __LITTLE_ENDIAN__
-	return __builtin_neon_vextq_v((int8x16_t)__lo, (int8x16_t)__hi, __i,
-	50);
-#else
-	uint32x4_t __lo_r = __builtin_shufflevector(__lo, __lo, 3, 2, 1, 0);
-	uint32x4_t __hi_r = __builtin_shufflevector(__hi, __hi, 3, 2, 1, 0);
-	uint32x4_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r,
-	(int8x16_t)__hi_r, __i, 50);
-	return __builtin_shufflevector(__r, __r, 3, 2, 1, 0);
-#endif
+#define	vextq_u32(__lo, __hi, __i)	  \
+	(uint32x4_t)__builtin_neon_vextq_v((int8x16_t)(__lo),		  \
+	(int8x16_t)(__hi), (__i), 50)
+#else
+#define	vextq_u32(__lo, __hi, __i) (	  \
+{	  \
+	uint32x4_t __tlo = (__lo);	  \
+	uint32x4_t __thi = (__hi);	  \
+	uint32x4_t __lo_r = __builtin_shufflevector(__tlo, __tlo, 3,2,1,0);   \
+	uint32x4_t __hi_r = __builtin_shufflevector(__thi, __thi, 3,2,1,0);   \
+	uint32x4_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r,	  \
+	(int8x16_t)__hi_r, __i, 50);  \
+	__builtin_shufflevector(__r, __r, 3,2,1,0);			  \
+})
+#endif	/* __LITTLE_ENDIAN__ */
 #endif
-}
 
+#if defined(__GNUC__) && !defined(__clang__)
 _INTRINSATTR
 static __inline uint8x16_t
 vextq_u8(uint8x16_t __lo, uint8x16_t __hi, uint8_t __i)
 {
-#if defined(__GNUC__) && !defined(__clang__)
 #if defined(__AARCH64EB__) || defined(__ARM_BIG_ENDIAN)
 	return __builtin_shuffle(__hi, __lo,
 	(uint8x16_t) {
@@ -166,38 +174,45 @@ vextq_u8(uint8x16_t __lo, uint8x16_t __h
 		__i + 12, __i + 13, __i + 14, __i + 15,
 	});
 #endif
+}
 #elif defined(__clang__)
 #ifdef __LITTLE_ENDIAN__
-	return __builtin_neon_vextq_v((int8x16_t)__lo, (int8x16_t)__hi, __i,
-	48);
-#else
-	uint32x4_t __lo_r = __builtin_shufflevector(__lo, __lo,
-	15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-	uint32x4_t __hi_r = __builtin_shufflevector(__hi, __hi,
-	15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-	uint32x4_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r,
-	(int8x16_t)__hi_r, __i, 50);
-	return __builtin_shufflevector(__r, __r,
-	15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-#endif
+#define	vextq_u8(__lo, __hi, __i)	  \
+	(uint8x16_t)__builtin_neon_vextq_v((int8x16_t)(__lo),		  \
+	(int8x16_t)(__hi), (__i), 48)
+#else
+#define	vextq_u8(__lo, __hi, __i) (	  \
+{	  \
+	uint8x16_t __tlo = (__lo);	  \
+	uint8x16_t __thi = (__hi);	  \
+	uint8x16_t __lo_r = __builtin_shufflevector(__tlo, __tlo,	  \
+	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);			  \
+	uint8x16_t __hi_r = __builtin_shufflevector(__thi, __thi,	  \
+	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);			  \
+	uint8x16_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r,	  \
+	(int8x16_t)__hi_r, (__i), 48);  \
+	return __builtin_shufflevector(__r, __r,			  \
+	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);			  \
+})
+#endif	/* __LITTLE_ENDIAN */
 #endif
-}
 
+#if defined(__GNUC__) && !defined(__clang__)
 _INTRINSATTR
 static __inline uint

CVS commit: src/sys/crypto/aes/arch/arm

2020-06-30 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jun 30 17:03:14 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: files.aesneon

Log Message:
Limit aes_neon to cpu_cortex | aarch64.

We won't use it on any other systems, and it doesn't build without
NEON anyway.  Verified earmv7hf GENERIC, aarch64 GENERIC64, and
earmv6 RPI2 all build with this.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/files.aesneon

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/files.aesneon
diff -u src/sys/crypto/aes/arch/arm/files.aesneon:1.2 src/sys/crypto/aes/arch/arm/files.aesneon:1.3
--- src/sys/crypto/aes/arch/arm/files.aesneon:1.2	Mon Jun 29 23:57:56 2020
+++ src/sys/crypto/aes/arch/arm/files.aesneon	Tue Jun 30 17:03:13 2020
@@ -1,4 +1,4 @@
-#	$NetBSD: files.aesneon,v 1.2 2020/06/29 23:57:56 riastradh Exp $
+#	$NetBSD: files.aesneon,v 1.3 2020/06/30 17:03:13 riastradh Exp $
 
 ifdef aarch64
 makeoptions	aes	"COPTS.aes_neon.c"+="-march=armv8-a"
@@ -8,10 +8,8 @@ makeoptions	aes	"COPTS.aes_neon.c"+="-mf
 makeoptions	aes	"COPTS.aes_neon_subr.c"+="-mfloat-abi=softfp -mfpu=neon"
 endif
 
-file	crypto/aes/arch/arm/aes_neon.c		aes
-file	crypto/aes/arch/arm/aes_neon_impl.c	aes
-file	crypto/aes/arch/arm/aes_neon_subr.c	aes
+file	crypto/aes/arch/arm/aes_neon.c		aes & (cpu_cortex | aarch64)
+file	crypto/aes/arch/arm/aes_neon_impl.c	aes & (cpu_cortex | aarch64)
+file	crypto/aes/arch/arm/aes_neon_subr.c	aes & (cpu_cortex | aarch64)
 
-ifndef aarch64
-file	crypto/aes/arch/arm/aes_neon_32.S	aes
-endif
+file	crypto/aes/arch/arm/aes_neon_32.S	aes & cpu_cortex & !aarch64



CVS commit: src/sys/crypto/aes

2020-06-30 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jun 30 16:21:17 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_impl.c

Log Message:
New sysctl node hw.aes_impl for selected AES implementation.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/aes_impl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_impl.c
diff -u src/sys/crypto/aes/aes_impl.c:1.2 src/sys/crypto/aes/aes_impl.c:1.3
--- src/sys/crypto/aes/aes_impl.c:1.2	Mon Jun 29 23:36:59 2020
+++ src/sys/crypto/aes/aes_impl.c	Tue Jun 30 16:21:17 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_impl.c,v 1.2 2020/06/29 23:36:59 riastradh Exp $	*/
+/*	$NetBSD: aes_impl.c,v 1.3 2020/06/30 16:21:17 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,12 +27,13 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.2 2020/06/29 23:36:59 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.3 2020/06/30 16:21:17 riastradh Exp $");
 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -43,6 +44,30 @@ static int aes_selftest_stdkeysched(void
 static const struct aes_impl	*aes_md_impl	__read_mostly;
 static const struct aes_impl	*aes_impl	__read_mostly;
 
+static int
+sysctl_hw_aes_impl(SYSCTLFN_ARGS)
+{
+	struct sysctlnode node;
+
+	KASSERTMSG(aes_impl != NULL,
+	"sysctl ran before AES implementation was selected");
+
+	node = *rnode;
+	node.sysctl_data = __UNCONST(aes_impl->ai_name);
+	node.sysctl_size = strlen(aes_impl->ai_name) + 1;
+	return sysctl_lookup(SYSCTLFN_CALL(&node));
+}
+
+SYSCTL_SETUP(sysctl_hw_aes_setup, "sysctl hw.aes_impl setup")
+{
+
+	sysctl_createv(clog, 0, NULL, NULL,
+	CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "aes_impl",
+	SYSCTL_DESCR("Selected AES implementation"),
+	sysctl_hw_aes_impl, 0, NULL, 0,
+	CTL_HW, CTL_CREATE, CTL_EOL);
+}
+
 /*
  * The timing of AES implementation selection is finicky:
  *



CVS commit: src/sys/crypto/aes/arch/arm

2020-06-29 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jun 29 23:57:56 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon.c files.aesneon
Added Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
Provide hand-written AES NEON assembly for arm32.

gcc does a lousy job at compiling 128-bit NEON intrinsics on arm32;
hand-writing it made it about 12x faster, by avoiding a zillion loads
and stores to spill everything and the kitchen sink onto the stack.
(But gcc does fine on aarch64, presumably because it has twice as
many registers and doesn't have to deal with q2=d4/d5 overlapping.)


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon.c \
src/sys/crypto/aes/arch/arm/files.aesneon
cvs rdiff -u -r0 -r1.1 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon.c
diff -u src/sys/crypto/aes/arch/arm/aes_neon.c:1.1 src/sys/crypto/aes/arch/arm/aes_neon.c:1.2
--- src/sys/crypto/aes/arch/arm/aes_neon.c:1.1	Mon Jun 29 23:56:31 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon.c	Mon Jun 29 23:57:56 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon.c,v 1.1 2020/06/29 23:56:31 riastradh Exp $	*/
+/*	$NetBSD: aes_neon.c,v 1.2 2020/06/29 23:57:56 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -39,7 +39,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.1 2020/06/29 23:56:31 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.2 2020/06/29 23:57:56 riastradh Exp $");
 
 #include 
 
@@ -47,6 +47,12 @@ __KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v
 
 #include "aes_neon_impl.h"
 
+#ifdef __aarch64__
+#define	__aarch64_used
+#else
+#define	__aarch64_used	__unused
+#endif
+
 static const uint8x16_t
 mc_forward[4] = {
 	{0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04,
@@ -58,7 +64,7 @@ mc_forward[4] = {
 	{0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00,
 	 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08},
 },
-mc_backward[4] = {
+mc_backward[4] __aarch64_used = {
 	{0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06,
 	 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E},
 	{0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02,
@@ -68,7 +74,7 @@ mc_backward[4] = {
 	{0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A,
 	 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02},
 },
-ipt[2] = {
+ipt[2] __aarch64_used = {
 	{0x00,0x70,0x2A,0x5A,0x98,0xE8,0xB2,0xC2,
 	 0x08,0x78,0x22,0x52,0x90,0xE0,0xBA,0xCA},
 	{0x00,0x4D,0x7C,0x31,0x7D,0x30,0x01,0x4C,
@@ -80,55 +86,55 @@ opt[2] = {
 	{0x00,0xEC,0xBC,0x50,0x51,0xBD,0xED,0x01,
 	 0xE0,0x0C,0x5C,0xB0,0xB1,0x5D,0x0D,0xE1},
 },
-dipt[2] = {
+dipt[2] __aarch64_used = {
 	{0x00,0x5F,0x54,0x0B,0x04,0x5B,0x50,0x0F,
 	 0x1A,0x45,0x4E,0x11,0x1E,0x41,0x4A,0x15},
 	{0x00,0x65,0x05,0x60,0xE6,0x83,0xE3,0x86,
 	 0x94,0xF1,0x91,0xF4,0x72,0x17,0x77,0x12},
 },
-sb1[2] = {
+sb1[2] __aarch64_used = {
 	{0x00,0x3E,0x50,0xCB,0x8F,0xE1,0x9B,0xB1,
 	 0x44,0xF5,0x2A,0x14,0x6E,0x7A,0xDF,0xA5},
 	{0x00,0x23,0xE2,0xFA,0x15,0xD4,0x18,0x36,
 	 0xEF,0xD9,0x2E,0x0D,0xC1,0xCC,0xF7,0x3B},
 },
-sb2[2] = {
+sb2[2] __aarch64_used = {
 	{0x00,0x24,0x71,0x0B,0xC6,0x93,0x7A,0xE2,
 	 0xCD,0x2F,0x98,0xBC,0x55,0xE9,0xB7,0x5E},
 	{0x00,0x29,0xE1,0x0A,0x40,0x88,0xEB,0x69,
 	 0x4A,0x23,0x82,0xAB,0xC8,0x63,0xA1,0xC2},
 },
-sbo[2] = {
+sbo[2] __aarch64_used = {
 	{0x00,0xC7,0xBD,0x6F,0x17,0x6D,0xD2,0xD0,
 	 0x78,0xA8,0x02,0xC5,0x7A,0xBF,0xAA,0x15},
 	{0x00,0x6A,0xBB,0x5F,0xA5,0x74,0xE4,0xCF,
 	 0xFA,0x35,0x2B,0x41,0xD1,0x90,0x1E,0x8E},
 },
-dsb9[2] = {
+dsb9[2] __aarch64_used = {
 	{0x00,0xD6,0x86,0x9A,0x53,0x03,0x1C,0x85,
 	 0xC9,0x4C,0x99,0x4F,0x50,0x1F,0xD5,0xCA},
 	{0x00,0x49,0xD7,0xEC,0x89,0x17,0x3B,0xC0,
 	 0x65,0xA5,0xFB,0xB2,0x9E,0x2C,0x5E,0x72},
 },
-dsbd[2] = {
+dsbd[2] __aarch64_used = {
 	{0x00,0xA2,0xB1,0xE6,0xDF,0xCC,0x57,0x7D,
 	 0x39,0x44,0x2A,0x88,0x13,0x9B,0x6E,0xF5},
 	{0x00,0xCB,0xC6,0x24,0xF7,0xFA,0xE2,0x3C,
 	 0xD3,0xEF,0xDE,0x15,0x0D,0x18,0x31,0x29},
 },
-dsbb[2] = {
+dsbb[2] __aarch64_used = {
 	{0x00,0x42,0xB4,0x96,0x92,0x64,0x22,0xD0,
 	 0x04,0xD4,0xF2,0xB0,0xF6,0x46,0x26,0x60},
 	{0x00,0x67,0x59,0xCD,0xA6,0x98,0x94,0xC1,
 	 0x6B,0xAA,0x55,0x32,0x3E,0x0C,0xFF,0xF3},
 },
-dsbe[2] = {
+dsbe[2] __aarch64_used = {
 	{0x00,0xD0,0xD4,0x26,0x96,0x92,0xF2,0x46,
 	 0xB0,0xF6,0xB4,0x64,0x04,0x60,0x42,0x22},
 	{0x00,0xC1,0xAA,0xFF,0xCD,0xA6,0x55,0x0C,
 	 0x32,0x3E,0x59,0x98,0x6B,0xF3,0x67,0x94},
 },
-dsbo[2] = {
+dsbo[2] __aarch64_used = {
 	{0x00,0x40,0xF9,0x7E,0x53,0xEA,0x87,0x13,
 	 0x2D,0x3E,0x94,0xD4,0xB9,0x6D,0xAA,0xC7},
 	{0x00,0x1D,0x44,0x93,0x0F,0x56,0xD7,0x12,
@@ -164,7 +170,7 @@ deskew[2] = {
 	{0x00,0x69,0xEA,0x83,0xDC,0xB5,0x36,0x5F,
 	 0x77,0x1E,0x9D,0xF4,0xAB,0xC2,0x41,0x28},
 },
-sr[4] = {
+sr[4] __aarch64_used = {
 	{0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 	 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F},
 	{0x00,0x05,0x0A,0x0F,0x04,0x09,0x0E,0x03,
@@ -533,6 +539,14 @@ aes_neon_setdeck

CVS commit: src/sys/crypto/aes/arch/x86

2020-06-29 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jun 29 23:50:05 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_sse2.h aes_sse2_impl.c aes_sse2_impl.h
files.aessse2
Added Files:
src/sys/crypto/aes/arch/x86: aes_sse2_subr.c

Log Message:
Split SSE2 logic into separate units.

Ensure that there are no paths into files compiled with -msse -msse2
at all except via fpu_kern_enter.

I didn't run into a practical problem with this, but let's not leave
a ticking time bomb for subsequent toolchain changes in case the mere
declaration of local __m128i variables causes trouble.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/x86/aes_sse2.h \
src/sys/crypto/aes/arch/x86/aes_sse2_impl.c \
src/sys/crypto/aes/arch/x86/aes_sse2_impl.h \
src/sys/crypto/aes/arch/x86/files.aessse2
cvs rdiff -u -r0 -r1.1 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_sse2.h
diff -u src/sys/crypto/aes/arch/x86/aes_sse2.h:1.1 src/sys/crypto/aes/arch/x86/aes_sse2.h:1.2
--- src/sys/crypto/aes/arch/x86/aes_sse2.h:1.1	Mon Jun 29 23:47:54 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2.h	Mon Jun 29 23:50:05 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2.h,v 1.1 2020/06/29 23:47:54 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2.h,v 1.2 2020/06/29 23:50:05 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -31,6 +31,31 @@
 
 #include 
 
+/*
+ * These functions MUST NOT use any vector registers for parameters or
+ * results -- the caller is compiled with -mno-sse &c. in the kernel,
+ * and dynamically turns on the vector unit just before calling them.
+ * Internal subroutines that use the vector unit for parameters are
+ * declared in aes_sse2_impl.h instead.
+ */
+
+void aes_sse2_setkey(uint64_t[static 30], const void *, uint32_t);
+
+void aes_sse2_enc(const struct aesenc *, const uint8_t in[static 16],
+uint8_t[static 16], uint32_t);
+void aes_sse2_dec(const struct aesdec *, const uint8_t in[static 16],
+uint8_t[static 16], uint32_t);
+void aes_sse2_cbc_enc(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_cbc_dec(const struct aesdec *, const uint8_t[static 16],
+uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_xts_enc(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_xts_dec(const struct aesdec *, const uint8_t[static 16],
+uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+
+int aes_sse2_selftest(void);
+
 extern struct aes_impl aes_sse2_impl;
 
 #endif	/* _CRYPTO_AES_ARCH_X86_AES_SSE2_H */
Index: src/sys/crypto/aes/arch/x86/aes_sse2_impl.c
diff -u src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.1 src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.2
--- src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.1	Mon Jun 29 23:47:54 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2_impl.c	Mon Jun 29 23:50:05 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2_impl.c,v 1.1 2020/06/29 23:47:54 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2_impl.c,v 1.2 2020/06/29 23:50:05 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,11 +27,10 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.1 2020/06/29 23:47:54 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.2 2020/06/29 23:50:05 riastradh Exp $");
 
 #include 
 #include 
-#include 
 
 #include 
 #include 
@@ -41,532 +40,99 @@ __KERNEL_RCSID(1, "$NetBSD: aes_sse2_imp
 #include 
 #include 
 
-#include "aes_sse2_impl.h"
-
 static void
-aes_sse2_setkey(uint64_t rk[static 30], const void *key, uint32_t nrounds)
+aes_sse2_setenckey_impl(struct aesenc *enc, const uint8_t *key,
+uint32_t nrounds)
 {
-	size_t key_len;
-
-	switch (nrounds) {
-	case 10:
-		key_len = 16;
-		break;
-	case 12:
-		key_len = 24;
-		break;
-	case 14:
-		key_len = 32;
-		break;
-	default:
-		panic("invalid AES nrounds: %u", nrounds);
-	}
 
 	fpu_kern_enter();
-	aes_sse2_keysched(rk, key, key_len);
-	fpu_kern_leave();
-}
-
-static void
-aes_sse2_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
-{
-
 	aes_sse2_setkey(enc->aese_aes.aes_rk64, key, nrounds);
+	fpu_kern_leave();
 }
 
 static void
-aes_sse2_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
+aes_sse2_setdeckey_impl(struct aesdec *dec, const uint8_t *key,
+uint32_t nrounds)
 {
 
+	fpu_kern_enter();
 	/*
 	 * BearSSL computes InvMixColumns on the fly -- no need for
 	 * distinct decryption round keys.
 	 */
 	aes_sse2_setkey(dec->aesd_aes.aes_rk64, key, nrounds);
+	fpu_kern_leave();
 }
 
 static void
-aes_sse2_enc(const struct aesenc *enc, const uint8_t in[static 16],
+aes_sse2_enc_impl(const s

CVS commit: src/sys/crypto/aes/arch/x86

2020-06-29 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jun 29 23:41:35 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_via.c

Log Message:
VIA AES: Batch AES-XTS computation into eight blocks at a time.

Experimental -- performance improvement is not clearly worth the
complexity.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/x86/aes_via.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_via.c
diff -u src/sys/crypto/aes/arch/x86/aes_via.c:1.1 src/sys/crypto/aes/arch/x86/aes_via.c:1.2
--- src/sys/crypto/aes/arch/x86/aes_via.c:1.1	Mon Jun 29 23:39:30 2020
+++ src/sys/crypto/aes/arch/x86/aes_via.c	Mon Jun 29 23:41:35 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_via.c,v 1.1 2020/06/29 23:39:30 riastradh Exp $	*/
+/*	$NetBSD: aes_via.c,v 1.2 2020/06/29 23:41:35 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.1 2020/06/29 23:39:30 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.2 2020/06/29 23:41:35 riastradh Exp $");
 
 #include 
 #include 
@@ -119,8 +119,8 @@ aesvia_setdeckey(struct aesdec *dec, con
 }
 
 static inline void
-aesvia_enc1(const struct aesenc *enc, const uint8_t in[static 16],
-uint8_t out[static 16], uint32_t cw0)
+aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nblocks, uint32_t cw0)
 {
 	const uint32_t cw[4] __aligned(16) = {
 		[0] = (cw0
@@ -128,7 +128,6 @@ aesvia_enc1(const struct aesenc *enc, co
 		| C3_CRYPT_CWLO_ENCRYPT
 		| C3_CRYPT_CWLO_NORMAL),
 	};
-	size_t nblocks = 1;
 
 	KASSERT(((uintptr_t)enc & 0xf) == 0);
 	KASSERT(((uintptr_t)in & 0xf) == 0);
@@ -141,8 +140,8 @@ aesvia_enc1(const struct aesenc *enc, co
 }
 
 static inline void
-aesvia_dec1(const struct aesdec *dec, const uint8_t in[static 16],
-uint8_t out[static 16], uint32_t cw0)
+aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nblocks, uint32_t cw0)
 {
 	const uint32_t cw[4] __aligned(16) = {
 		[0] = (cw0
@@ -150,7 +149,6 @@ aesvia_dec1(const struct aesdec *dec, co
 		| C3_CRYPT_CWLO_DECRYPT
 		| C3_CRYPT_CWLO_NORMAL),
 	};
-	size_t nblocks = 1;
 
 	KASSERT(((uintptr_t)dec & 0xf) == 0);
 	KASSERT(((uintptr_t)in & 0xf) == 0);
@@ -180,7 +178,7 @@ aesvia_enc(const struct aesenc *enc, con
 	if uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
 	((uintptr_t)in & 0xff0) != 0xff0) {
 		enc_aligned_evcnt.ev_count++;
-		aesvia_enc1(enc, in, out, cw0);
+		aesvia_encN(enc, in, out, 1, cw0);
 	} else {
 		enc_unaligned_evcnt.ev_count++;
 		/*
@@ -194,7 +192,7 @@ aesvia_enc(const struct aesenc *enc, con
 		uint8_t outbuf[16] __aligned(16);
 
 		memcpy(inbuf, in, 16);
-		aesvia_enc1(enc, inbuf, outbuf, cw0);
+		aesvia_encN(enc, inbuf, outbuf, 1, cw0);
 		memcpy(out, outbuf, 16);
 
 		explicit_memset(inbuf, 0, sizeof inbuf);
@@ -221,7 +219,7 @@ aesvia_dec(const struct aesdec *dec, con
 	if uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
 	((uintptr_t)in & 0xff0) != 0xff0) {
 		dec_aligned_evcnt.ev_count++;
-		aesvia_dec1(dec, in, out, cw0);
+		aesvia_decN(dec, in, out, 1, cw0);
 	} else {
 		dec_unaligned_evcnt.ev_count++;
 		/*
@@ -235,7 +233,7 @@ aesvia_dec(const struct aesdec *dec, con
 		uint8_t outbuf[16] __aligned(16);
 
 		memcpy(inbuf, in, 16);
-		aesvia_dec1(dec, inbuf, outbuf, cw0);
+		aesvia_decN(dec, inbuf, outbuf, 1, cw0);
 		memcpy(out, outbuf, 16);
 
 		explicit_memset(inbuf, 0, sizeof inbuf);
@@ -245,7 +243,7 @@ aesvia_dec(const struct aesdec *dec, con
 }
 
 static inline void
-aesvia_cbc_enc1(const struct aesenc *enc, const uint8_t in[static 16],
+aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16],
 uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0)
 {
 	const uint32_t cw[4] __aligned(16) = {
@@ -274,7 +272,7 @@ aesvia_cbc_enc1(const struct aesenc *enc
 }
 
 static inline void
-aesvia_cbc_dec1(const struct aesdec *dec, const uint8_t in[static 16],
+aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16],
 uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16],
 uint32_t cw0)
 {
@@ -340,7 +338,7 @@ aesvia_cbc_enc(const struct aesenc *enc,
 	if uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
 		cbcenc_aligned_evcnt.ev_count++;
 		uint8_t *ivp = iv;
-		aesvia_cbc_enc1(enc, in, out, nbytes/16, &ivp, cw0);
+		aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0);
 		memcpy(iv, ivp, 16);
 	} else {
 		cbcenc_unaligned_evcnt.ev_count++;
@@ -351,7 +349,7 @@ aesvia_cbc_enc(const struct aesenc *enc,
 		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
 			memcpy(tmp, in, 16);
 			xor128(tmp, tmp, cv);
-			aesvia_enc1(enc, tmp, cv, cw0);
+			aesvia_encN(enc, tmp, cv, 1, cw0);
 			memcpy(out, cv, 16);
 		}

CVS commit: src/sys/crypto/aes

2020-06-29 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jun 29 23:36:59 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_bear.h aes_ct.c aes_ct_dec.c aes_impl.c

Log Message:
Provide the standard AES key schedule.

Different AES implementations prefer different variations on it, but
some of them -- notably VIA -- require the standard key schedule to
be available and don't provide hardware support for computing it
themselves.  So adapt BearSSL's logic to generate the standard key
schedule (and decryption keys, with InvMixColumns), rather than the
bitsliced key schedule that BearSSL uses natively.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/aes_bear.h \
src/sys/crypto/aes/aes_ct.c src/sys/crypto/aes/aes_ct_dec.c \
src/sys/crypto/aes/aes_impl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_bear.h
diff -u src/sys/crypto/aes/aes_bear.h:1.1 src/sys/crypto/aes/aes_bear.h:1.2
--- src/sys/crypto/aes/aes_bear.h:1.1	Mon Jun 29 23:27:52 2020
+++ src/sys/crypto/aes/aes_bear.h	Mon Jun 29 23:36:59 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_bear.h,v 1.1 2020/06/29 23:27:52 riastradh Exp $	*/
+/*	$NetBSD: aes_bear.h,v 1.2 2020/06/29 23:36:59 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -45,6 +45,12 @@ void	br_aes_ct_skey_expand(uint32_t *, u
 void	br_aes_ct_bitslice_encrypt(unsigned, const uint32_t *, uint32_t *);
 void	br_aes_ct_bitslice_decrypt(unsigned, const uint32_t *, uint32_t *);
 
+/* NetBSD additions */
+
+void	br_aes_ct_inv_mix_columns(uint32_t *);
+u_int	br_aes_ct_keysched_stdenc(uint32_t *, const void *, size_t);
+u_int	br_aes_ct_keysched_stddec(uint32_t *, const void *, size_t);
+
 extern struct aes_impl	aes_bear_impl;
 
 #endif	/* _CRYPTO_AES_AES_BEAR_H */
Index: src/sys/crypto/aes/aes_ct.c
diff -u src/sys/crypto/aes/aes_ct.c:1.1 src/sys/crypto/aes/aes_ct.c:1.2
--- src/sys/crypto/aes/aes_ct.c:1.1	Mon Jun 29 23:27:52 2020
+++ src/sys/crypto/aes/aes_ct.c	Mon Jun 29 23:36:59 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ct.c,v 1.1 2020/06/29 23:27:52 riastradh Exp $	*/
+/*	$NetBSD: aes_ct.c,v 1.2 2020/06/29 23:36:59 riastradh Exp $	*/
 
 /*
  * Copyright (c) 2016 Thomas Pornin 
@@ -25,10 +25,12 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_ct.c,v 1.1 2020/06/29 23:27:52 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_ct.c,v 1.2 2020/06/29 23:36:59 riastradh Exp $");
 
 #include 
 
+#include 
+
 #include 
 
 /* see inner.h */
@@ -333,3 +335,92 @@ br_aes_ct_skey_expand(uint32_t *skey,
 		skey[v + 1] = y | (y >> 1);
 	}
 }
+
+/* NetBSD additions, for computing the standard AES key schedule */
+
+unsigned
+br_aes_ct_keysched_stdenc(uint32_t *skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, j, k, nk, nkf;
+	uint32_t tmp;
+
+	switch (key_len) {
+	case 16:
+		num_rounds = 10;
+		break;
+	case 24:
+		num_rounds = 12;
+		break;
+	case 32:
+		num_rounds = 14;
+		break;
+	default:
+		/* abort(); */
+		return 0;
+	}
+	nk = (int)(key_len >> 2);
+	nkf = (int)((num_rounds + 1) << 2);
+	tmp = 0;
+	for (i = 0; i < nk; i ++) {
+		tmp = br_dec32le((const unsigned char *)key + (i << 2));
+		skey[i] = tmp;
+	}
+	for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+		if (j == 0) {
+			tmp = (tmp << 24) | (tmp >> 8);
+			tmp = sub_word(tmp) ^ Rcon[k];
+		} else if (nk > 6 && j == 4) {
+			tmp = sub_word(tmp);
+		}
+		tmp ^= skey[i - nk];
+		skey[i] = tmp;
+		if (++ j == nk) {
+			j = 0;
+			k ++;
+		}
+	}
+	return num_rounds;
+}
+
+unsigned
+br_aes_ct_keysched_stddec(uint32_t *skey, const void *key, size_t key_len)
+{
+	uint32_t tkey[60];
+	uint32_t q[8];
+	unsigned num_rounds;
+	unsigned i;
+
+	num_rounds = br_aes_ct_keysched_stdenc(skey, key, key_len);
+	if (num_rounds == 0)
+		return 0;
+
+	tkey[0] = skey[4*num_rounds + 0];
+	tkey[1] = skey[4*num_rounds + 1];
+	tkey[2] = skey[4*num_rounds + 2];
+	tkey[3] = skey[4*num_rounds + 3];
+	for (i = 1; i < num_rounds; i++) {
+		q[2*0] = skey[4*i + 0];
+		q[2*1] = skey[4*i + 1];
+		q[2*2] = skey[4*i + 2];
+		q[2*3] = skey[4*i + 3];
+		q[1] = q[3] = q[5] = q[7] = 0;
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_inv_mix_columns(q);
+		br_aes_ct_ortho(q);
+
+		tkey[4*(num_rounds - i) + 0] = q[2*0];
+		tkey[4*(num_rounds - i) + 1] = q[2*1];
+		tkey[4*(num_rounds - i) + 2] = q[2*2];
+		tkey[4*(num_rounds - i) + 3] = q[2*3];
+	}
+	tkey[4*num_rounds + 0] = skey[0];
+	tkey[4*num_rounds + 1] = skey[1];
+	tkey[4*num_rounds + 2] = skey[2];
+	tkey[4*num_rounds + 3] = skey[3];
+
+	memcpy(skey, tkey, 4*(num_rounds + 1)*sizeof(uint32_t));
+	explicit_memset(tkey, 0, 4*(num_rounds + 1)*sizeof(uint32_t));
+	return num_rounds;
+}
Index: src/sys/crypto/aes/aes_ct_dec.c
diff -u src/sys/crypto/aes/aes_ct_dec.c:1.1 src/sys/crypto/aes/aes_ct_dec.c:1.2
--- src/sys/crypto/aes/aes_ct_dec.c:1.1	Mon Jun 29 23:27:52 2020
+++ src/sys/crypto/aes/aes_ct_dec.c	Mon Jun 29 23:36:59 2020
@@ -1,4 +1,4 @@
-/*	$Ne