CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: rin Date: Sat Nov 21 08:09:21 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon.c Log Message: Fix build with clang for earmv7hf; loadroundkey() is used only for __aarch64__. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon.c diff -u src/sys/crypto/aes/arch/arm/aes_neon.c:1.5 src/sys/crypto/aes/arch/arm/aes_neon.c:1.6 --- src/sys/crypto/aes/arch/arm/aes_neon.c:1.5 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon.c Sat Nov 21 08:09:21 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: aes_neon.c,v 1.6 2020/11/21 08:09:21 rin Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.6 2020/11/21 08:09:21 rin Exp $"); #include @@ -196,11 +196,13 @@ inv = VQ_N_U8(0x80,0x01,0x08,0x0D,0x0F,0 inva = VQ_N_U8(0x80,0x07,0x0B,0x0F,0x06,0x0A,0x04,0x01, 0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03); +#ifdef __aarch64__ static inline uint8x16_t loadroundkey(const void *rkp) { return vld1q_u8(rkp); } +#endif static inline void storeroundkey(void *rkp, uint8x16_t rk)
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:31:04 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Gather mc_forward/backward so we can load 256 bits at once. To generate a diff of this commit: cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.11 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10 Thu Sep 10 11:30:28 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:31:03 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $") .fpu neon @@ -54,36 +54,26 @@ inva: .byte 0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03 END(inva) - .type mc_forward,_ASM_TYPE_OBJECT -mc_forward: - .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 /* 0 */ + .type mc,_ASM_TYPE_OBJECT +mc: + .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 /* 0 forward */ .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C - - .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 /* 1 */ + .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 /* 0 backward */ + .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E + .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 /* 1 forward */ .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 - - .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C /* 2 */ + .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 /* 1 backward */ + .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A + .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C /* 2 forward */ .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 - + .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E /* 2 backward */ + .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 .Lmc_forward_3: - .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 /* 3 */ + .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 /* 3 forward */ .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 -END(mc_forward) - - .type mc_backward,_ASM_TYPE_OBJECT -mc_backward: - .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 /* 0 */ - .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E - - .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 /* 1 */ - .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A - - .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E /* 2 */ - .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 - - .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A /* 3 */ + .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A /* 3 backward */ .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 -END(mc_backward) +END(mc) .type sr,_ASM_TYPE_OBJECT sr: @@ -210,8 +200,7 @@ ENTRY(aes_neon_enc1) /* * r3: rmod4 - * r4: mc_forward - * r5: mc_backward + * r4: mc * r6,r8,r10,ip: temporaries * q0={d0-d1}: x/ak/A * q1={d2-d3}: 0x0f0f... @@ -225,8 +214,8 @@ ENTRY(aes_neon_enc1) * q9={d18-d19}: sb2[1] * q10={d20-d21}: inv * q11={d22-d23}: inva - * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc_backward[rmod4] - * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc_forward[rmod4] + * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc[rmod4].backward + * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc[rmod4].forward * q14={d28-d29}: rk/A2/A2_B_D * q15={d30-d31}: A2_B/sr[rmod4] */ @@ -254,9 +243,8 @@ ENTRY(aes_neon_enc1) vld1.8 {q8-q9}, [r6 :256] /* q8 = sb2[0], q9 = sb2[1] */ vld1.8 {q10-q11}, [r8 :256] /* q10 = inv, q11 = inva */ - /* (r4, r5) := (&mc_forward[0], &mc_backward[0]) */ - add r4, ip, #(mc_forward - .Lconstants) - add r5, ip, #(mc_backward - .Lconstants) + /* r4 := mc */ + add r4, ip, #(mc - .Lconstants) /* (q2, q3) := (lo, hi) */ vshr.u8 q3, q0, #4 @@ -291,13 +279,11 @@ ENTRY(aes_neon_enc1) vtbl.8 d25, {q8}, d5 vtbl.8 d26, {q9}, d6 vtbl.8 d27, {q9}, d7 + add r6, r4, r3, lsl #5 /* r6 := &mc[rmod4] */ veor q14, q12, q13 - /* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */ - add r6, r4, r3, lsl #4 - add r8, r5, r3, lsl #4 - vld1.8 {q12}, [r6 :128] - vld1.8 {q13}, [r8 :128] + /* (q12, q13) := (mc[rmod4].forward, mc[rmod4].backward) */ + vld1.8 {q12-q13}, [r6 :256] /* q15 := A2_B = A2 + A(mcf) */ vtbl.8 d30, {q0}, d24 @@ -474,7 +460,7 @@ ENTRY(aes_neon_dec1) add r8, ip, #(.Lmc_forward_3 - .Lconstants) vld1.8 {q6-q7}, [r4 :256] /* q6 := dsbb[0], q7 := dsbb[1] */ vld1.8 {q10-q11}, [r6 :256] /* q10 := inv, q11 := inva */ - vld1.8 {q15}, [r8 :128] /* q15 := mc_forward[3] */ + vld1.8 {q15}, [r8 :128] /* q15 := mc[3].forward */ /* (q2, q3) := (lo, hi) */ vshr.u8 q3, q0, #4
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:30:28 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Hoist dsbd/dsbe address calculation out of loop. To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.9 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.9 Thu Sep 10 11:30:08 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:30:28 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $") .fpu neon @@ -431,6 +431,9 @@ ENTRY(aes_neon_dec1) /* * r3: 3 & ~(nrounds - 1) + * r4: dsbd + * r5: dsbe + * r6,r8,r10,ip: temporaries * q0={d0-d1}: x/ak * q1={d2-d3}: 0x0f0f... * q2={d4-d5}: lo/k/j/io @@ -488,6 +491,10 @@ ENTRY(aes_neon_dec1) add r4, ip, #(dsb9 - .Lconstants) vld1.8 {q4-q5}, [r4 :256] /* q4 := dsb9[0], q5 := dsb9[1] */ + /* r4 := dsbd, r5 := dsbe */ + add r4, ip, #(dsbd - .Lconstants) + add r5, ip, #(dsbe - .Lconstants) + /* q0 := rk[0] + diptlo(lo) + dipthi(hi) */ veor q0, q14, q2 veor q0, q0, q3 @@ -496,7 +503,6 @@ ENTRY(aes_neon_dec1) _ALIGN_TEXT 1: /* load dsbd */ - add r4, ip, #(dsbd - .Lconstants) vld1.8 {q8-q9}, [r4 :256] /* q8 := dsbd[0], q9 := dsbd[1] */ vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ @@ -522,8 +528,7 @@ ENTRY(aes_neon_dec1) veor q0, q0, q13 /* load dsbe */ - add r4, ip, #(dsbe - .Lconstants) - vld1.8 {q8-q9}, [r4 :256]! /* q8 := dsbe[0], q9 := dsbe[1] */ + vld1.8 {q8-q9}, [r5 :256] /* q8 := dsbe[0], q9 := dsbe[1] */ /* q0 := x(mc) + dsbb_0(io) + dsbb_1(jo) */ vtbl.8 d28, {q0}, d30
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:30:08 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Tweak register usage. - Call r12 by its usual name, ip. - No need for r7 or r11=fp at the moment. To generate a diff of this commit: cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.8 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.9 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.8 Thu Sep 10 11:29:43 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:30:08 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $") .fpu neon @@ -205,14 +205,14 @@ ENTRY(aes_neon_enc1) vldr d1, [sp] /* d1 := x hi */ ldr r1, [sp, #8] /* r1 := nrounds */ #endif - push {r4, r5, r6, r7, r8, r10, r11, lr} + push {r4, r5, r6, r8, r10, lr} vpush {d8-d15} /* * r3: rmod4 * r4: mc_forward * r5: mc_backward - * r6,r7,r8,r10,r11,r12: temporaries + * r6,r8,r10,ip: temporaries * q0={d0-d1}: x/ak/A * q1={d2-d3}: 0x0f0f... * q2={d4-d5}: lo/k/j/io @@ -231,32 +231,32 @@ ENTRY(aes_neon_enc1) * q15={d30-d31}: A2_B/sr[rmod4] */ - /* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */ - ldr r12, .Lconstants_addr - adr r11, .Lconstants_addr + /* ip := .Lconstants - .Lconstants_addr, r10 := .Lconstants_addr */ + ldr ip, .Lconstants_addr + adr r10, .Lconstants_addr vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ movw r3, #0 vmov.i8 q1, #0x0f - /* r12 := .Lconstants */ - add r12, r12, r11 + /* ip := .Lconstants */ + add ip, ip, r10 /* (q4, q5) := (iptlo, ipthi) */ - add r6, r12, #(ipt - .Lconstants) + add r6, ip, #(ipt - .Lconstants) vld1.8 {q4-q5}, [r6 :256] /* load the rest of the constants */ - add r4, r12, #(sb1 - .Lconstants) - add r6, r12, #(sb2 - .Lconstants) - add r8, r12, #(.Linv_inva - .Lconstants) + add r4, ip, #(sb1 - .Lconstants) + add r6, ip, #(sb2 - .Lconstants) + add r8, ip, #(.Linv_inva - .Lconstants) vld1.8 {q6-q7}, [r4 :256] /* q6 = sb1[0], q7 = sb1[1] */ vld1.8 {q8-q9}, [r6 :256] /* q8 = sb2[0], q9 = sb2[1] */ vld1.8 {q10-q11}, [r8 :256] /* q10 = inv, q11 = inva */ /* (r4, r5) := (&mc_forward[0], &mc_backward[0]) */ - add r4, r12, #(mc_forward - .Lconstants) - add r5, r12, #(mc_backward - .Lconstants) + add r4, ip, #(mc_forward - .Lconstants) + add r5, ip, #(mc_backward - .Lconstants) /* (q2, q3) := (lo, hi) */ vshr.u8 q3, q0, #4 @@ -295,9 +295,9 @@ ENTRY(aes_neon_enc1) /* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */ add r6, r4, r3, lsl #4 - add r7, r5, r3, lsl #4 + add r8, r5, r3, lsl #4 vld1.8 {q12}, [r6 :128] - vld1.8 {q13}, [r7 :128] + vld1.8 {q13}, [r8 :128] /* q15 := A2_B = A2 + A(mcf) */ vtbl.8 d30, {q0}, d24 @@ -365,8 +365,8 @@ ENTRY(aes_neon_enc1) bne 1b /* (q6, q7, q15) := (sbo[0], sbo[1], sr[rmod4]) */ - add r8, r12, #(sr - .Lconstants) - add r6, r12, #(sbo - .Lconstants) + add r8, ip, #(sr - .Lconstants) + add r6, ip, #(sbo - .Lconstants) add r8, r8, r3, lsl #4 vld1.8 {q6-q7}, [r6 :256] vld1.8 {q15}, [r8 :128] @@ -388,7 +388,7 @@ ENTRY(aes_neon_enc1) vtbl.8 d1, {q2}, d31 vpop {d8-d15} - pop {r4, r5, r6, r7, r8, r10, r11, lr} + pop {r4, r5, r6, r8, r10, lr} #ifdef __SOFTFP__ #ifdef __ARM_BIG_ENDIAN vmov r1, r0, d0 @@ -426,7 +426,7 @@ ENTRY(aes_neon_dec1) vldr d1, [sp] /* d1 := x hi */ ldr r1, [sp, #8] /* r1 := nrounds */ #endif - push {r4, r5, r6, r7, r8, r10, r11, lr} + push {r4, r5, r6, r8, r10, lr} vpush {d8-d15} /* @@ -449,26 +449,26 @@ ENTRY(aes_neon_dec1) * q15={d30-d31}: mc/sr[3 & ~(nrounds - 1)] */ - /* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */ - ldr r12, .Lconstants_addr - adr r11, .Lconstants_addr + /* ip := .Lconstants - .Lconstants_addr, r10 := .Lconstants_addr */ + ldr ip, .Lconstants_addr + adr r10, .Lconstants_addr vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ rsb r3, r1, #0 /* r3 := ~(x - 1) = -x */ vmov.i8 q1, #0x0f and r3, r3, #3 /* r3 := 3 & ~(x - 1) */ - /* r12 := .Lconstants */ - add r12, r12, r11 + /* ip := .Lconstants */ + add ip, ip, r10 /* (q4, q5) := (diptlo, dipthi) */ - add r6, r12, #(dipt - .Lconstants) + add r6, ip, #(dipt - .Lconstants) vld1.8 {q4-q5}, [r6 :256] /* load the rest of the constants */ - add r4, r12, #(dsbb - .Lconstants) - add r6, r12, #(.Linv_inva - .Lconstants) - add r8, r12, #(.Lmc_forward_3 -
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:29:43 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Write vtbl with {qN} rather than {d(2N)-d(2N+1)}. Cosmetic; no functional change. To generate a diff of this commit: cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.7 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.8 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.7 Thu Sep 10 11:29:02 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:29:43 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $") .fpu neon @@ -264,10 +264,10 @@ ENTRY(aes_neon_enc1) vand q3, q3, q1 /* q3 := (x >> 4) & 0x0f0f... */ /* (q2, q3) := (iptlo(lo), ipthi(hi)) */ - vtbl.8 d4, {d8-d9}, d4 - vtbl.8 d5, {d8-d9}, d5 - vtbl.8 d6, {d10-d11}, d6 - vtbl.8 d7, {d10-d11}, d7 + vtbl.8 d4, {q4}, d4 + vtbl.8 d5, {q4}, d5 + vtbl.8 d6, {q5}, d6 + vtbl.8 d7, {q5}, d7 /* q0 := rk[0] + iptlo(lo) + ipthi(hi) */ veor q0, q14, q2 @@ -279,18 +279,18 @@ ENTRY(aes_neon_enc1) 1: vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ /* q0 := A = rk[i] + sb1_0(io) + sb1_1(jo) */ - vtbl.8 d24, {d12-d13}, d4 - vtbl.8 d25, {d12-d13}, d5 - vtbl.8 d26, {d14-d15}, d6 - vtbl.8 d27, {d14-d15}, d7 + vtbl.8 d24, {q6}, d4 + vtbl.8 d25, {q6}, d5 + vtbl.8 d26, {q7}, d6 + vtbl.8 d27, {q7}, d7 veor q0, q14, q12 veor q0, q0, q13 /* q14 := A2 = sb2_0[io] + sb2_1[jo] */ - vtbl.8 d24, {d16-d17}, d4 - vtbl.8 d25, {d16-d17}, d5 - vtbl.8 d26, {d18-d19}, d6 - vtbl.8 d27, {d18-d19}, d7 + vtbl.8 d24, {q8}, d4 + vtbl.8 d25, {q8}, d5 + vtbl.8 d26, {q9}, d6 + vtbl.8 d27, {q9}, d7 veor q14, q12, q13 /* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */ @@ -300,18 +300,18 @@ ENTRY(aes_neon_enc1) vld1.8 {q13}, [r7 :128] /* q15 := A2_B = A2 + A(mcf) */ - vtbl.8 d30, {d0-d1}, d24 - vtbl.8 d31, {d0-d1}, d25 + vtbl.8 d30, {q0}, d24 + vtbl.8 d31, {q0}, d25 veor q15, q15, q14 /* q14 := A2_B_D = A2_B + A(mcb) */ - vtbl.8 d28, {d0-d1}, d26 - vtbl.8 d29, {d0-d1}, d27 + vtbl.8 d28, {q0}, d26 + vtbl.8 d29, {q0}, d27 veor q14, q14, q15 /* q0 := x = A2_B_D + A2_B(mcf) */ - vtbl.8 d0, {d30-d31}, d24 - vtbl.8 d1, {d30-d31}, d25 + vtbl.8 d0, {q15}, d24 + vtbl.8 d1, {q15}, d25 veor q0, q0, q14 2: /* @@ -324,19 +324,19 @@ ENTRY(aes_neon_enc1) vand q3, q3, q1 /* q3 := (x >> 4) & 0x0f0f... */ /* q0 := a/k */ - vtbl.8 d0, {d22-d23}, d4 - vtbl.8 d1, {d22-d23}, d5 + vtbl.8 d0, {q11}, d4 + vtbl.8 d1, {q11}, d5 /* q2 := j = i + k */ veor q2, q3, q2 /* q12 := ir = 1/i */ - vtbl.8 d24, {d20-d21}, d6 - vtbl.8 d25, {d20-d21}, d7 + vtbl.8 d24, {q10}, d6 + vtbl.8 d25, {q10}, d7 /* q13 := jr = 1/j */ - vtbl.8 d26, {d20-d21}, d4 - vtbl.8 d27, {d20-d21}, d5 + vtbl.8 d26, {q10}, d4 + vtbl.8 d27, {q10}, d5 /* q12 := iak = 1/i + a/k */ veor q12, q12, q0 @@ -345,12 +345,12 @@ ENTRY(aes_neon_enc1) veor q13, q13, q0 /* q12 := iakr = 1/(1/i + a/k) */ - vtbl.8 d24, {d20-d21}, d24 - vtbl.8 d25, {d20-d21}, d25 + vtbl.8 d24, {q10}, d24 + vtbl.8 d25, {q10}, d25 /* q13 := jakr = 1/(1/j + a/k) */ - vtbl.8 d26, {d20-d21}, d26 - vtbl.8 d27, {d20-d21}, d27 + vtbl.8 d26, {q10}, d26 + vtbl.8 d27, {q10}, d27 /* q2 := io = j + 1/(1/i + a/k) */ veor q2, q2, q12 @@ -374,18 +374,18 @@ ENTRY(aes_neon_enc1) vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ /* (q2, q3) := (sbo_0(io), sbo_1(jo)) */ - vtbl.8 d4, {d12-d13}, d4 - vtbl.8 d5, {d12-d13}, d5 - vtbl.8 d6, {d14-d15}, d6 - vtbl.8 d7, {d14-d15}, d7 + vtbl.8 d4, {q6}, d4 + vtbl.8 d5, {q6}, d5 + vtbl.8 d6, {q7}, d6 + vtbl.8 d7, {q7}, d7 /* q2 := x = rk[nr] + sbo_0(io) + sbo_1(jo) */ veor q2, q2, q14 veor q2, q2, q3 /* q0 := x(sr[rmod4]) */ - vtbl.8 d0, {d4-d5}, d30 - vtbl.8 d1, {d4-d5}, d31 + vtbl.8 d0, {q2}, d30 + vtbl.8 d1, {q2}, d31 vpop {d8-d15} pop {r4, r5, r6, r7, r8, r10, r11, lr} @@ -479,10 +479,10 @@ ENTRY(aes_neon_dec1) vand q3, q3, q1 /* q3 := (x >> 4) & 0x0f0f... */ /* (q2, q3) := (diptlo(lo), dipthi(hi)) */ - vtbl.8 d4, {d8-d9}, d4 - vtbl.8 d5, {d8-d9}, d5 - vtbl.8 d6, {d10-d11}, d6 - vtbl.8 d7, {d10-d11}, d7 + vtbl.8 d4, {q4}, d4 + vtbl.8 d5, {q4}, d5 + vtbl.8 d6, {q5}, d6 + vtbl.8 d7, {q5}, d7 /* load dsb9 */ add r4, r12, #(dsb9 - .Lconstants) @@ -502,22 +502,22 @@ ENTRY(aes_neon_dec1) vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ /* q0 := rk[i] + dsb9_0(io) + dsb9_1(jo)
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:29:02 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Issue 256-bit loads rather than pairs of 128-bit loads. Not sure why I didn't realize you could do this before! Saves some temporary registers that can now be allocated to shave off a few cycles. To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.6 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.7 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.6 Sun Aug 16 18:02:03 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:29:02 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $") .fpu neon @@ -38,9 +38,10 @@ RCSID("$NetBSD: aes_neon_32.S,v 1.6 2020 .long .Lconstants - . .section .rodata - .p2align 4 + .p2align 5 .Lconstants: +.Linv_inva: /* inv and inva must be consecutive */ .type inv,_ASM_TYPE_OBJECT inv: .byte 0x80,0x01,0x08,0x0D,0x0F,0x06,0x05,0x0E @@ -99,125 +100,85 @@ sr: .byte 0x08,0x05,0x02,0x0F,0x0C,0x09,0x06,0x03 END(sr) - .type iptlo,_ASM_TYPE_OBJECT -iptlo: - .byte 0x00,0x70,0x2A,0x5A,0x98,0xE8,0xB2,0xC2 + .type ipt,_ASM_TYPE_OBJECT +ipt: + .byte 0x00,0x70,0x2A,0x5A,0x98,0xE8,0xB2,0xC2 /* lo */ .byte 0x08,0x78,0x22,0x52,0x90,0xE0,0xBA,0xCA -END(iptlo) - - .type ipthi,_ASM_TYPE_OBJECT -ipthi: - .byte 0x00,0x4D,0x7C,0x31,0x7D,0x30,0x01,0x4C + .byte 0x00,0x4D,0x7C,0x31,0x7D,0x30,0x01,0x4C /* hi */ .byte 0x81,0xCC,0xFD,0xB0,0xFC,0xB1,0x80,0xCD -END(ipthi) +END(ipt) - .type sb1_0,_ASM_TYPE_OBJECT -sb1_0: - .byte 0x00,0x3E,0x50,0xCB,0x8F,0xE1,0x9B,0xB1 + .type sb1,_ASM_TYPE_OBJECT +sb1: + .byte 0x00,0x3E,0x50,0xCB,0x8F,0xE1,0x9B,0xB1 /* 0 */ .byte 0x44,0xF5,0x2A,0x14,0x6E,0x7A,0xDF,0xA5 -END(sb1_0) - - .type sb1_1,_ASM_TYPE_OBJECT -sb1_1: - .byte 0x00,0x23,0xE2,0xFA,0x15,0xD4,0x18,0x36 + .byte 0x00,0x23,0xE2,0xFA,0x15,0xD4,0x18,0x36 /* 1 */ .byte 0xEF,0xD9,0x2E,0x0D,0xC1,0xCC,0xF7,0x3B -END(sb1_1) +END(sb1) - .type sb2_0,_ASM_TYPE_OBJECT -sb2_0: - .byte 0x00,0x24,0x71,0x0B,0xC6,0x93,0x7A,0xE2 + .type sb2,_ASM_TYPE_OBJECT +sb2: + .byte 0x00,0x24,0x71,0x0B,0xC6,0x93,0x7A,0xE2 /* 0 */ .byte 0xCD,0x2F,0x98,0xBC,0x55,0xE9,0xB7,0x5E -END(sb2_0) - - .type sb2_1,_ASM_TYPE_OBJECT -sb2_1: - .byte 0x00,0x29,0xE1,0x0A,0x40,0x88,0xEB,0x69 + .byte 0x00,0x29,0xE1,0x0A,0x40,0x88,0xEB,0x69 /* 1 */ .byte 0x4A,0x23,0x82,0xAB,0xC8,0x63,0xA1,0xC2 -END(sb2_1) +END(sb2) - .type sbo_0,_ASM_TYPE_OBJECT -sbo_0: - .byte 0x00,0xC7,0xBD,0x6F,0x17,0x6D,0xD2,0xD0 + .type sbo,_ASM_TYPE_OBJECT +sbo: + .byte 0x00,0xC7,0xBD,0x6F,0x17,0x6D,0xD2,0xD0 /* 0 */ .byte 0x78,0xA8,0x02,0xC5,0x7A,0xBF,0xAA,0x15 -END(sbo_0) - - .type sbo_1,_ASM_TYPE_OBJECT -sbo_1: - .byte 0x00,0x6A,0xBB,0x5F,0xA5,0x74,0xE4,0xCF + .byte 0x00,0x6A,0xBB,0x5F,0xA5,0x74,0xE4,0xCF /* 1 */ .byte 0xFA,0x35,0x2B,0x41,0xD1,0x90,0x1E,0x8E -END(sbo_1) +END(sbo) - .type diptlo,_ASM_TYPE_OBJECT -diptlo: - .byte 0x00,0x5F,0x54,0x0B,0x04,0x5B,0x50,0x0F + .type dipt,_ASM_TYPE_OBJECT +dipt: + .byte 0x00,0x5F,0x54,0x0B,0x04,0x5B,0x50,0x0F /* lo */ .byte 0x1A,0x45,0x4E,0x11,0x1E,0x41,0x4A,0x15 -END(diptlo) - - .type dipthi,_ASM_TYPE_OBJECT -dipthi: - .byte 0x00,0x65,0x05,0x60,0xE6,0x83,0xE3,0x86 + .byte 0x00,0x65,0x05,0x60,0xE6,0x83,0xE3,0x86 /* hi */ .byte 0x94,0xF1,0x91,0xF4,0x72,0x17,0x77,0x12 -END(dipthi) +END(dipt) - .type dsb9_0,_ASM_TYPE_OBJECT -dsb9_0: - .byte 0x00,0xD6,0x86,0x9A,0x53,0x03,0x1C,0x85 + .type dsb9,_ASM_TYPE_OBJECT +dsb9: + .byte 0x00,0xD6,0x86,0x9A,0x53,0x03,0x1C,0x85 /* 0 */ .byte 0xC9,0x4C,0x99,0x4F,0x50,0x1F,0xD5,0xCA -END(dsb9_0) - - .type dsb9_1,_ASM_TYPE_OBJECT -dsb9_1: - .byte 0x00,0x49,0xD7,0xEC,0x89,0x17,0x3B,0xC0 + .byte 0x00,0x49,0xD7,0xEC,0x89,0x17,0x3B,0xC0 /* 1 */ .byte 0x65,0xA5,0xFB,0xB2,0x9E,0x2C,0x5E,0x72 -END(dsb9_1) +END(dsb9) - .type dsbd_0,_ASM_TYPE_OBJECT -dsbd_0: - .byte 0x00,0xA2,0xB1,0xE6,0xDF,0xCC,0x57,0x7D + .type dsbd,_ASM_TYPE_OBJECT +dsbd: + .byte 0x00,0xA2,0xB1,0xE6,0xDF,0xCC,0x57,0x7D /* 0 */ .byte 0x39,0x44,0x2A,0x88,0x13,0x9B,0x6E,0xF5 -END(dsbd_0) - - .type dsbd_1,_ASM_TYPE_OBJECT -dsbd_1: - .byte 0x00,0xCB,0xC6,0x24,0xF7,0xFA,0xE2,0x3C + .byte 0x00,0xCB,0xC6,0x24,0xF7,0xFA,0xE2,0x3C /* 1 */ .byte 0xD3,0xEF,0xDE,0x15,0x0D,0x18,0x31,0x29 -END(dsbd_1) +END(dsbd) - .type dsbb_0,_ASM_TYPE_OBJECT -dsbb_0: - .byte 0x00,0x42,0xB4,0x96,0x92,0x64,0x22,0xD0
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Sep 8 23:58:09 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: aesarmv8: Reallocate registers to shave off unnecessary MOV. To generate a diff of this commit: cvs rdiff -u -r1.14 -r1.15 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.14 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.15 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.14 Tue Sep 8 23:57:43 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Tue Sep 8 23:58:09 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.15 2020/09/08 23:58:09 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $") +RCSID("$NetBSD: aes_armv8_64.S,v 1.15 2020/09/08 23:58:09 riastradh Exp $") .arch_extension aes @@ -917,13 +917,12 @@ END(aesarmv8_cbcmac_update1) ENTRY(aesarmv8_ccm_enc1) stp fp, lr, [sp, #-16]! /* push stack frame */ mov fp, sp - ld1 {v0.16b, v1.16b}, [x4] /* q0 := auth, q2 := ctr (be) */ - mov v2.16b, v1.16b + ld1 {v0.16b-v1.16b}, [x4] /* q0 := auth, q1 := ctr (be) */ adrl x11, ctr32_inc /* x11 := &ctr32_inc */ ld1 {v5.4s}, [x11] /* q5 := (0,0,0,1) (host-endian) */ mov x9, x0 /* x9 := enckey */ mov x10, x3 /* x10 := nbytes */ - rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */ + rev32 v2.16b, v1.16b /* q2 := ctr (host-endian) */ _ALIGN_TEXT 1: ld1 {v3.16b}, [x1], #0x10 /* q3 := plaintext block */ add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */ @@ -937,9 +936,8 @@ ENTRY(aesarmv8_ccm_enc1) subs x10, x10, #0x10 /* count down bytes */ st1 {v3.16b}, [x2], #0x10 /* store ciphertext block */ b.ne 1b /* repeat if more blocks */ - rev32 v2.16b, v2.16b /* q2 := ctr (big-endian) */ - mov v1.16b, v2.16b /* store updated auth/ctr */ - st1 {v0.16b-v1.16b}, [x4] + rev32 v1.16b, v2.16b /* q1 := ctr (big-endian) */ + st1 {v0.16b-v1.16b}, [x4] /* store updated auth/ctr */ ldp fp, lr, [sp], #16 /* pop stack frame */ ret END(aesarmv8_ccm_enc1)
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Sep 8 23:57:43 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: aesarmv8: Issue two 4-register ld/st, not four 2-register ld/st. To generate a diff of this commit: cvs rdiff -u -r1.13 -r1.14 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.13 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.14 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.13 Tue Sep 8 23:57:13 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Tue Sep 8 23:57:43 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $") +RCSID("$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $") .arch_extension aes @@ -693,10 +693,8 @@ ENTRY(aesarmv8_xts_enc8) mov v30.16b, v31.16b /* q30 := tweak[6] */ bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ /* q31 := tweak[7] */ - ld1 {v0.16b,v1.16b}, [x1], #0x20 /* q[i] := ptxt[i] */ - ld1 {v2.16b,v3.16b}, [x1], #0x20 - ld1 {v4.16b,v5.16b}, [x1], #0x20 - ld1 {v6.16b,v7.16b}, [x1], #0x20 + ld1 {v0.16b-v3.16b}, [x1], #0x40 /* q[i] := ptxt[i] */ + ld1 {v4.16b-v7.16b}, [x1], #0x40 eor v0.16b, v0.16b, v24.16b /* q[i] := ptxt[i] ^ tweak[i] */ eor v1.16b, v1.16b, v25.16b eor v2.16b, v2.16b, v26.16b @@ -716,10 +714,8 @@ ENTRY(aesarmv8_xts_enc8) eor v5.16b, v5.16b, v29.16b eor v6.16b, v6.16b, v30.16b eor v7.16b, v7.16b, v31.16b - st1 {v0.16b,v1.16b}, [x2], #0x20 /* store ciphertext blocks */ - st1 {v2.16b,v3.16b}, [x2], #0x20 - st1 {v4.16b,v5.16b}, [x2], #0x20 - st1 {v6.16b,v7.16b}, [x2], #0x20 + st1 {v0.16b-v3.16b}, [x2], #0x40 /* store ciphertext blocks */ + st1 {v4.16b-v7.16b}, [x2], #0x40 bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ subs x10, x10, #0x80 /* count down nbytes */ b.ne 1b /* repeat if more block groups */
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Sep 8 23:57:13 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: aesarmv8: Adapt aes_armv8_64.S to big-endian. Patch mainly from (and tested by) jakllsch@ with minor tweaks by me. To generate a diff of this commit: cvs rdiff -u -r1.12 -r1.13 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.12 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.13 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.12 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Tue Sep 8 23:57:13 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $") +RCSID("$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $") .arch_extension aes @@ -114,11 +114,11 @@ END(unshiftrows_rotword_3) * Standard ABI calling convention. */ ENTRY(aesarmv8_setenckey128) - ldr q1, [x1] /* q1 := master key */ + ld1 {v1.16b}, [x1] /* q1 := master key */ adrl x4, unshiftrows_rotword_3 eor v0.16b, v0.16b, v0.16b /* q0 := 0 */ - ldr q16, [x4] /* q16 := unshiftrows_rotword_3 table */ + ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_3 table */ str q1, [x0], #0x10 /* store master key as first round key */ mov x2, #10 /* round count */ @@ -171,14 +171,14 @@ END(aesarmv8_setenckey128) * Standard ABI calling convention. */ ENTRY(aesarmv8_setenckey192) - ldr q1, [x1], #0x10 /* q1 := master key[0:128) */ - ldr d2, [x1] /* d2 := master key[128:192) */ + ld1 {v1.16b}, [x1], #0x10 /* q1 := master key[0:128) */ + ld1 {v2.8b}, [x1] /* d2 := master key[128:192) */ adrl x4, unshiftrows_rotword_1 adrl x5, unshiftrows_rotword_3 eor v0.16b, v0.16b, v0.16b /* q0 := 0 */ - ldr q16, [x4] /* q16 := unshiftrows_rotword_1 */ - ldr q17, [x5] /* q17 := unshiftrows_rotword_3 */ + ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_1 */ + ld1 {v17.16b}, [x5] /* q17 := unshiftrows_rotword_3 */ str q1, [x0], #0x10 /* store master key[0:128) as round key */ mov x2, #12 /* round count */ @@ -351,13 +351,13 @@ END(aesarmv8_setenckey192) */ ENTRY(aesarmv8_setenckey256) /* q1 := key[0:128), q2 := key[128:256) */ - ldp q1, q2, [x1], #0x20 + ld1 {v1.16b-v2.16b}, [x1], #0x20 adrl x4, unshiftrows_rotword_3 adrl x5, unshiftrows_3 eor v0.16b, v0.16b, v0.16b /* q0 := 0 */ - ldr q16, [x4] /* q16 := unshiftrows_rotword_3 */ - ldr q17, [x5] /* q17 := unshiftrows_3 */ + ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_3 */ + ld1 {v17.16b}, [x5] /* q17 := unshiftrows_3 */ /* store master key as first two round keys */ stp q1, q2, [x0], #0x20 @@ -461,9 +461,9 @@ END(aesarmv8_enctodec) ENTRY(aesarmv8_enc) stp fp, lr, [sp, #-16]! /* push stack frame */ mov fp, sp - ldr q0, [x1] /* q0 := ptxt */ + ld1 {v0.16b}, [x1] /* q0 := ptxt */ bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */ - str q0, [x2] /* store ctxt */ + st1 {v0.16b}, [x2] /* store ctxt */ ldp fp, lr, [sp], #16 /* pop stack frame */ ret END(aesarmv8_enc) @@ -479,9 +479,9 @@ END(aesarmv8_enc) ENTRY(aesarmv8_dec) stp fp, lr, [sp, #-16]! /* push stack frame */ mov fp, sp - ldr q0, [x1] /* q0 := ctxt */ + ld1 {v0.16b}, [x1] /* q0 := ctxt */ bl aesarmv8_dec1 /* q0 := ptxt; trash x0/x3/q16 */ - str q0, [x2] /* store ptxt */ + st1 {v0.16b}, [x2] /* store ptxt */ ldp fp, lr, [sp], #16 /* pop stack frame */ ret END(aesarmv8_dec) @@ -503,17 +503,17 @@ ENTRY(aesarmv8_cbc_enc) mov fp, sp mov x9, x0 /* x9 := enckey */ mov x10, x3 /* x10 := nbytes */ - ldr q0, [x4] /* q0 := chaining value */ + ld1 {v0.16b}, [x4] /* q0 := chaining value */ _ALIGN_TEXT -1: ldr q1, [x1], #0x10 /* q1 := plaintext block */ +1: ld1 {v1.16b}, [x1], #0x10 /* q1 := plaintext block */ eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */ mov x0, x9 /* x0 := enckey */ mov x3, x5 /* x3 := nrounds */ bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */ subs x10, x10, #0x10 /* count down nbytes */ - str q0, [x2], #0x10 /* store ciphertext block */ + st1 {v0.16b}, [x2], #0x10 /* store ciphertext block */ b.ne 1b /* repeat if x10 is nonzero */ - str q0, [x4] /* store chaining value */ + st1 {v0.16b}, [x4] /* store chaining value */ ldp fp, lr, [sp], #16 /* pop stack frame */ 2: ret END(aesarmv8_cbc_enc) @@ -533,18 +533,21 @@ END(aesarmv8_cbc_enc) ENTRY(aesarmv8_cbc_dec1) stp fp, lr, [sp, #-16]! /* push stack frame */ mov fp, sp - ldr q24, [x4] /* q24 := iv */ + ld1 {v24.16b}, [x4] /* q24 := iv */ mov x9, x0 /* x9 := enckey */ mov
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: riastradh Date: Tue Sep 8 22:48:24 UTC 2020 Modified Files: src/sys/crypto/aes: aes_selftest.c src/sys/crypto/aes/arch/x86: aes_sse2_subr.c Log Message: aes(9): Fix edge case in bitsliced SSE2 AES-CBC decryption. Make sure self-tests exercise this edge case. Discovered by confusion over code inspection of jak's adaptation of aes_armv8_64.S for big-endian. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/aes_selftest.c cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_selftest.c diff -u src/sys/crypto/aes/aes_selftest.c:1.5 src/sys/crypto/aes/aes_selftest.c:1.6 --- src/sys/crypto/aes/aes_selftest.c:1.5 Sat Jul 25 22:36:42 2020 +++ src/sys/crypto/aes/aes_selftest.c Tue Sep 8 22:48:24 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_selftest.c,v 1.5 2020/07/25 22:36:42 riastradh Exp $ */ +/* $NetBSD: aes_selftest.c,v 1.6 2020/09/08 22:48:24 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.5 2020/07/25 22:36:42 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.6 2020/09/08 22:48:24 riastradh Exp $"); #ifdef _KERNEL @@ -210,7 +210,7 @@ aes_selftest_encdec_cbc(const struct aes uint8_t in[144]; uint8_t outbuf[146] = { [0] = 0x1a, [145] = 0x1a }, *out = outbuf + 1; uint8_t iv0[16], iv[16]; - unsigned i; + unsigned i, j; for (i = 0; i < 32; i++) key[i] = i; @@ -237,21 +237,26 @@ aes_selftest_encdec_cbc(const struct aes "AES-%u-CBC dec", aes_keybits[i]); /* Try incrementally, with IV update. */ - memcpy(iv, iv0, 16); - impl->ai_cbc_enc(&enc, in, out, 16, iv, aes_nrounds[i]); - impl->ai_cbc_enc(&enc, in + 16, out + 16, 128, iv, - aes_nrounds[i]); - if (memcmp(out, expected[i], 144)) - return aes_selftest_fail(impl, out, expected[i], 144, - "AES-%u-CBC enc incremental", aes_keybits[i]); - - memcpy(iv, iv0, 16); - impl->ai_cbc_dec(&dec, out, out, 128, iv, aes_nrounds[i]); - impl->ai_cbc_dec(&dec, out + 128, out + 128, 16, iv, - aes_nrounds[i]); - if (memcmp(out, in, 144)) - return aes_selftest_fail(impl, out, in, 144, - "AES-%u-CBC dec incremental", aes_keybits[i]); + for (j = 0; j < 144; j += 16) { + memcpy(iv, iv0, 16); + impl->ai_cbc_enc(&enc, in, out, j, iv, aes_nrounds[i]); + impl->ai_cbc_enc(&enc, in + j, out + j, 144 - j, iv, + aes_nrounds[i]); + if (memcmp(out, expected[i], 144)) +return aes_selftest_fail(impl, out, +expected[i], 144, "AES-%u-CBC enc inc %u", +aes_keybits[i], j); + + memcpy(iv, iv0, 16); + impl->ai_cbc_dec(&dec, out, out, j, iv, + aes_nrounds[i]); + impl->ai_cbc_dec(&dec, out + j, out + j, 144 - j, iv, + aes_nrounds[i]); + if (memcmp(out, in, 144)) +return aes_selftest_fail(impl, out, +in, 144, "AES-%u-CBC dec inc %u", +aes_keybits[i], j); + } } if (outbuf[0] != 0x1a) Index: src/sys/crypto/aes/arch/x86/aes_sse2_subr.c diff -u src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.3 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.4 --- src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.3 Sat Jul 25 22:29:56 2020 +++ src/sys/crypto/aes/arch/x86/aes_sse2_subr.c Tue Sep 8 22:48:24 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $ */ +/* $NetBSD: aes_sse2_subr.c,v 1.4 2020/09/08 22:48:24 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.4 2020/09/08 22:48:24 riastradh Exp $"); #ifdef _KERNEL #include @@ -200,11 +200,13 @@ aes_sse2_cbc_dec(const struct aesdec *de case 48: w = _mm_loadu_epi8(in + nbytes - 32); q[1] = aes_sse2_interleave_in(w); - /*FALLTHROUGH*/ - case 32: w = _mm_loadu_epi8(in + nbytes - 48); q[0] = aes_sse2_interleave_in(w); - /*FALLTHROUGH*/ + break; + case 32: + w = _mm_loadu_epi8(in + nbytes - 32); + q[0] = aes_sse2_interleave_in(w); + break; case 16: break; }
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Sun Aug 16 18:02:03 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S files.aesneon Log Message: Fix AES NEON code for big-endian softfp ARM. ...which is how the kernel runs. Switch to using __SOFTFP__ for consistency with how it gets exposed to C, although I'm not sure how to get it defined automagically in the toolchain for .S files so that's set manually in files.aesneon for now. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon_32.S cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/files.aesneon Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.5 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.6 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.5 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Sun Aug 16 18:02:03 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.5 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_neon_32.S,v 1.5 2020/08/08 14:47:01 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $") .fpu neon @@ -228,15 +228,19 @@ END(dsbo_1) * aes_neon_enc1(const struct aesenc *enc@r0, uint8x16_t x@q0, * unsigned nrounds@r1) * - * With -mfloat-abi=soft(fp) (here spelled `#ifdef _KERNEL'): + * With -mfloat-abi=soft(fp) (i.e., __SOFTFP__): * * uint8x16_t@(r0,r1,r2,r3) * aes_neon_enc1(const struct aesenc *enc@r0, * uint8x16_t x@(r2,r3,sp[0],sp[4]), nrounds@sp[8]) */ ENTRY(aes_neon_enc1) -#ifdef _KERNEL +#ifdef __SOFTFP__ +#ifdef __ARM_BIG_ENDIAN + vmov d0, r3, r2 /* d0 := x lo */ +#else vmov d0, r2, r3 /* d0 := x lo */ +#endif vldr d1, [sp] /* d1 := x hi */ ldr r1, [sp, #8] /* r1 := nrounds */ #endif @@ -434,10 +438,15 @@ ENTRY(aes_neon_enc1) vpop {d8-d15} pop {r4, r5, r6, r7, r8, r10, r11, lr} -#ifdef _KERNEL +#ifdef __SOFTFP__ +#ifdef __ARM_BIG_ENDIAN + vmov r1, r0, d0 + vmov r3, r2, d1 +#else vmov r0, r1, d0 vmov r2, r3, d1 #endif +#endif bx lr END(aes_neon_enc1) @@ -457,8 +466,12 @@ END(aes_neon_enc1) * uint8x16_t x@(r2,r3,sp[0],sp[4]), nrounds@sp[8]) */ ENTRY(aes_neon_dec1) -#ifdef _KERNEL +#ifdef __SOFTFP__ +#ifdef __ARM_BIG_ENDIAN + vmov d0, r3, r2 /* d0 := x lo */ +#else vmov d0, r2, r3 /* d0 := x lo */ +#endif vldr d1, [sp] /* d1 := x hi */ ldr r1, [sp, #8] /* r1 := nrounds */ #endif @@ -669,9 +682,14 @@ ENTRY(aes_neon_dec1) vpop {d8-d15} pop {r4, r5, r6, r7, r8, r10, r11, lr} -#ifdef _KERNEL +#ifdef __SOFTFP__ +#ifdef __ARM_BIG_ENDIAN + vmov r1, r0, d0 + vmov r3, r2, d1 +#else vmov r0, r1, d0 vmov r2, r3, d1 #endif +#endif bx lr END(aes_neon_dec1) Index: src/sys/crypto/aes/arch/arm/files.aesneon diff -u src/sys/crypto/aes/arch/arm/files.aesneon:1.3 src/sys/crypto/aes/arch/arm/files.aesneon:1.4 --- src/sys/crypto/aes/arch/arm/files.aesneon:1.3 Tue Jun 30 17:03:13 2020 +++ src/sys/crypto/aes/arch/arm/files.aesneon Sun Aug 16 18:02:03 2020 @@ -1,4 +1,4 @@ -# $NetBSD: files.aesneon,v 1.3 2020/06/30 17:03:13 riastradh Exp $ +# $NetBSD: files.aesneon,v 1.4 2020/08/16 18:02:03 riastradh Exp $ ifdef aarch64 makeoptions aes "COPTS.aes_neon.c"+="-march=armv8-a" @@ -8,6 +8,8 @@ makeoptions aes "COPTS.aes_neon.c"+="-mf makeoptions aes "COPTS.aes_neon_subr.c"+="-mfloat-abi=softfp -mfpu=neon" endif +makeoptions aes "AOPTS.aes_neon_32.S"+="-D__SOFTFP__" + file crypto/aes/arch/arm/aes_neon.c aes & (cpu_cortex | aarch64) file crypto/aes/arch/arm/aes_neon_impl.c aes & (cpu_cortex | aarch64) file crypto/aes/arch/arm/aes_neon_subr.c aes & (cpu_cortex | aarch64)
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: rin Date: Mon Aug 10 06:27:29 UTC 2020 Modified Files: src/sys/crypto/aes: aes_ccm.c Log Message: Add hack to compile aes_ccm_tag() with -O0 for m68k for GCC8. GCC 8 miscompiles aes_ccm_tag() for m68k with optimization level -O[12], which results in failure in aes_ccm_selftest(): | aes_ccm_selftest: tag 0: 8 bytes @ 0x4d3e38 | 03 80 5f 08 22 6f cb fe | .._."o.. | aes_ccm_selftest: verify 0 failed | ... | WARNING: module error: built-in module aes_ccm failed its MODULE_CMD_INIT, error 5 This is observed for amiga (A1200, 68060), mac68k (Quadra 840AV, 68040), and luna68k (nono, 68030 emulator). However, it is not for sun3 (TME, 68020 emulator) and sun2 (TME, 68010 emulator). At the moment, it is unclear whether this is due to differences b/w 68010-20 vs 68030-60, or something wrong with TME. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/aes_ccm.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_ccm.c diff -u src/sys/crypto/aes/aes_ccm.c:1.4 src/sys/crypto/aes/aes_ccm.c:1.5 --- src/sys/crypto/aes/aes_ccm.c:1.4 Mon Jul 27 20:44:30 2020 +++ src/sys/crypto/aes/aes_ccm.c Mon Aug 10 06:27:29 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ccm.c,v 1.4 2020/07/27 20:44:30 riastradh Exp $ */ +/* $NetBSD: aes_ccm.c,v 1.5 2020/08/10 06:27:29 rin Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.4 2020/07/27 20:44:30 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.5 2020/08/10 06:27:29 rin Exp $"); #include #include @@ -301,6 +301,9 @@ aes_ccm_dec(struct aes_ccm *C, const voi } void +#if defined(__m68k__) && __GNUC_PREREQ__(8, 0) +__attribute__((__optimize__("O0"))) +#endif aes_ccm_tag(struct aes_ccm *C, void *out) { uint8_t *auth = C->authctr;
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Sun Aug 9 02:00:57 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_subr.c Log Message: Nix outdated comment. I implemented this parallelism a couple weeks ago. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon_subr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_subr.c diff -u src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.5 src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.6 --- src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.5 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_subr.c Sun Aug 9 02:00:57 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_subr.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: aes_neon_subr.c,v 1.6 2020/08/09 02:00:57 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.6 2020/08/09 02:00:57 riastradh Exp $"); #ifdef _KERNEL #include @@ -287,12 +287,6 @@ aes_neon_cbcmac_update1(const struct aes storeblock(auth0, auth); } -/* - * XXX On aarch64, we have enough registers that we should be able to - * pipeline two simultaneous vpaes computations in an `aes_neon_enc2' - * function, which should substantially improve CCM throughput. - */ - void aes_neon_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Jul 28 20:11:09 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon.c aes_neon_impl.h aes_neon_subr.c arm_neon.h Log Message: Draft 2x vectorized neon vpaes for aarch64. Gives a modest speed boost on rk3399 (Cortex-A53/A72), around 20% in cgd tests, for parallelizable operations like CBC decryption; same improvement should probably carry over to rpi4 CPU which lacks ARMv8.0-AES. To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon.c \ src/sys/crypto/aes/arch/arm/aes_neon_subr.c cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon_impl.h cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon.c diff -u src/sys/crypto/aes/arch/arm/aes_neon.c:1.3 src/sys/crypto/aes/arch/arm/aes_neon.c:1.4 --- src/sys/crypto/aes/arch/arm/aes_neon.c:1.3 Tue Jun 30 20:32:11 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon.c Tue Jul 28 20:11:09 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon.c,v 1.3 2020/06/30 20:32:11 riastradh Exp $ */ +/* $NetBSD: aes_neon.c,v 1.4 2020/07/28 20:11:09 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.3 2020/06/30 20:32:11 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.4 2020/07/28 20:11:09 riastradh Exp $"); #include @@ -589,6 +589,59 @@ aes_neon_enc1(const struct aesenc *enc, return vqtbl1q_u8(x, sr[rmod4]); } +uint8x16x2_t +aes_neon_enc2(const struct aesenc *enc, uint8x16x2_t x, unsigned nrounds) +{ + const uint32_t *rk32 = enc->aese_aes.aes_rk; + uint8x16_t inv_ = *(const volatile uint8x16_t *)&inv; + uint8x16_t inva_ = *(const volatile uint8x16_t *)&inva; + uint8x16_t sb1_0 = ((const volatile uint8x16_t *)sb1)[0]; + uint8x16_t sb1_1 = ((const volatile uint8x16_t *)sb1)[1]; + uint8x16_t sb2_0 = ((const volatile uint8x16_t *)sb2)[0]; + uint8x16_t sb2_1 = ((const volatile uint8x16_t *)sb2)[1]; + uint8x16_t x0 = x.val[0], x1 = x.val[1]; + uint8x16_t io0, jo0, io1, jo1; + unsigned rmod4 = 0; + + x0 = aes_schedule_transform(x0, ipt); + x1 = aes_schedule_transform(x1, ipt); + x0 ^= loadroundkey(rk32); + x1 ^= loadroundkey(rk32); + for (;;) { + uint8x16_t A_0, A2_0, A2_B_0, A2_B_D_0; + uint8x16_t A_1, A2_1, A2_B_1, A2_B_D_1; + + subbytes(&io0, &jo0, x0, inv_, inva_); + subbytes(&io1, &jo1, x1, inv_, inva_); + + rk32 += 4; + rmod4 = (rmod4 + 1) % 4; + if (--nrounds == 0) + break; + + A_0 = vqtbl1q_u8(sb1_0, io0) ^ vqtbl1q_u8(sb1_1, jo0); + A_1 = vqtbl1q_u8(sb1_0, io1) ^ vqtbl1q_u8(sb1_1, jo1); + A_0 ^= loadroundkey(rk32); + A_1 ^= loadroundkey(rk32); + A2_0 = vqtbl1q_u8(sb2_0, io0) ^ vqtbl1q_u8(sb2_1, jo0); + A2_1 = vqtbl1q_u8(sb2_0, io1) ^ vqtbl1q_u8(sb2_1, jo1); + A2_B_0 = A2_0 ^ vqtbl1q_u8(A_0, mc_forward[rmod4]); + A2_B_1 = A2_1 ^ vqtbl1q_u8(A_1, mc_forward[rmod4]); + A2_B_D_0 = A2_B_0 ^ vqtbl1q_u8(A_0, mc_backward[rmod4]); + A2_B_D_1 = A2_B_1 ^ vqtbl1q_u8(A_1, mc_backward[rmod4]); + x0 = A2_B_D_0 ^ vqtbl1q_u8(A2_B_0, mc_forward[rmod4]); + x1 = A2_B_D_1 ^ vqtbl1q_u8(A2_B_1, mc_forward[rmod4]); + } + x0 = vqtbl1q_u8(sbo[0], io0) ^ vqtbl1q_u8(sbo[1], jo0); + x1 = vqtbl1q_u8(sbo[0], io1) ^ vqtbl1q_u8(sbo[1], jo1); + x0 ^= loadroundkey(rk32); + x1 ^= loadroundkey(rk32); + return (uint8x16x2_t) { .val = { + [0] = vqtbl1q_u8(x0, sr[rmod4]), + [1] = vqtbl1q_u8(x1, sr[rmod4]), + } }; +} + uint8x16_t aes_neon_dec1(const struct aesdec *dec, uint8x16_t x, unsigned nrounds) { @@ -628,4 +681,60 @@ aes_neon_dec1(const struct aesdec *dec, return vqtbl1q_u8(x, sr[i]); } +uint8x16x2_t +aes_neon_dec2(const struct aesdec *dec, uint8x16x2_t x, unsigned nrounds) +{ + const uint32_t *rk32 = dec->aesd_aes.aes_rk; + unsigned i = 3 & ~(nrounds - 1); + uint8x16_t inv_ = *(const volatile uint8x16_t *)&inv; + uint8x16_t inva_ = *(const volatile uint8x16_t *)&inva; + uint8x16_t x0 = x.val[0], x1 = x.val[1]; + uint8x16_t io0, jo0, io1, jo1, mc; + + x0 = aes_schedule_transform(x0, dipt); + x1 = aes_schedule_transform(x1, dipt); + x0 ^= loadroundkey(rk32); + x1 ^= loadroundkey(rk32); + rk32 += 4; + + mc = mc_forward[3]; + for (;;) { + subbytes(&io0, &jo0, x0, inv_, inva_); + subbytes(&io1, &jo1, x1, inv_, inva_); + if (--nrounds == 0) + break; + + x0 = vqtbl1q_u8(dsb9[0], io0) ^ vqtbl1q_u8(dsb9[1], jo0); + x1 = vqtbl1q_u8(dsb9[0], io1) ^ vqtbl1q_u8(dsb9[1], jo1); + x0 ^= loadroundkey(rk32); + x1 ^= loadroundkey(rk32); + rk32 += 4;/* next round key */ + + x0 = vqtbl1q_u8(x0, mc); + x1 = vqtbl1q_u8(x1, mc); + x0 ^= vqtbl1q_u8(dsbd[0], io0) ^ vqtbl1q_u8(dsbd[1], jo0); + x1 ^= vqtbl1q_u8(dsbd[0], io1) ^ vqtbl1q_u8(dsbd[1], jo1); + + x0 = vqtbl1q_u8(x0, mc); + x1 = vqtbl1q_u8(x1, mc); + x0 ^= vqtbl1q_u8(dsbb[0], io0) ^ v
CVS commit: src/sys/crypto/aes/arch/x86
Module Name:src Committed By: riastradh Date: Tue Jul 28 14:01:35 UTC 2020 Modified Files: src/sys/crypto/aes/arch/x86: aes_via.c Log Message: Initialize authctr in both branches. I guess I didn't test the unaligned case, weird. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/x86/aes_via.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/x86/aes_via.c diff -u src/sys/crypto/aes/arch/x86/aes_via.c:1.5 src/sys/crypto/aes/arch/x86/aes_via.c:1.6 --- src/sys/crypto/aes/arch/x86/aes_via.c:1.5 Sat Jul 25 22:31:32 2020 +++ src/sys/crypto/aes/arch/x86/aes_via.c Tue Jul 28 14:01:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $ */ +/* $NetBSD: aes_via.c,v 1.6 2020/07/28 14:01:35 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.6 2020/07/28 14:01:35 riastradh Exp $"); #ifdef _KERNEL #include @@ -739,6 +739,7 @@ aesvia_ccm_enc1(const struct aesenc *enc authctr = authctrbuf; ccmenc_unaligned_evcnt.ev_count++; } else { + authctr = authctr0; ccmenc_aligned_evcnt.ev_count++; } c0 = le32dec(authctr0 + 16 + 4*0); @@ -812,6 +813,7 @@ aesvia_ccm_dec1(const struct aesenc *enc le32enc(authctr + 16 + 4*2, c2); ccmdec_unaligned_evcnt.ev_count++; } else { + authctr = authctr0; ccmdec_aligned_evcnt.ev_count++; }
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:54:12 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: Issue aese/aesmc and aesd/aesimc in pairs. Advised by the aarch64 optimization guide; increases cgd throughput by about 10%. To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.9 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.10 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.9 Mon Jul 27 20:53:22 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Mon Jul 27 20:54:11 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.9 2020/07/27 20:53:22 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.10 2020/07/27 20:54:11 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -1041,15 +1041,18 @@ END(ctr32_inc) .type aesarmv8_enc1,@function aesarmv8_enc1: ldr q16, [x0], #0x10 /* load round key */ - b 2f + sub x3, x3, #1 _ALIGN_TEXT -1: /* q0 := MixColumns(q0) */ +1: /* q0 := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q0 */ + aese v0.16b, v16.16b aesmc v0.16b, v0.16b -2: subs x3, x3, #1 + ldr q16, [x0], #0x10 + subs x3, x3, #1 + b.ne 1b /* q0 := ShiftRows(SubBytes(AddRoundKey_q16(q0))) */ aese v0.16b, v16.16b - ldr q16, [x0], #0x10 /* load next round key */ - b.ne 1b + ldr q16, [x0] /* load last round key */ + /* q0 := AddRoundKey_q16(q0) */ eor v0.16b, v0.16b, v16.16b ret END(aesarmv8_enc1) @@ -1067,17 +1070,21 @@ END(aesarmv8_enc1) .type aesarmv8_enc2,@function aesarmv8_enc2: ldr q16, [x0], #0x10 /* load round key */ - b 2f + sub x3, x3, #1 _ALIGN_TEXT -1: /* q[i] := MixColumns(q[i]) */ +1: /* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i] */ + aese v0.16b, v16.16b aesmc v0.16b, v0.16b + aese v1.16b, v16.16b aesmc v1.16b, v1.16b -2: subs x3, x3, #1 + ldr q16, [x0], #0x10 /* load next round key */ + subs x3, x3, #1 + b.ne 1b /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */ aese v0.16b, v16.16b aese v1.16b, v16.16b - ldr q16, [x0], #0x10 /* load next round key */ - b.ne 1b + ldr q16, [x0] /* load last round key */ + /* q[i] := AddRoundKey_q16(q[i]) */ eor v0.16b, v0.16b, v16.16b eor v1.16b, v1.16b, v16.16b ret @@ -1097,18 +1104,28 @@ END(aesarmv8_enc2) .type aesarmv8_enc8,@function aesarmv8_enc8: ldr q16, [x0], #0x10 /* load round key */ - b 2f + sub x3, x3, #1 _ALIGN_TEXT -1: /* q[i] := MixColumns(q[i]) */ +1: /* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i] */ + aese v0.16b, v16.16b aesmc v0.16b, v0.16b + aese v1.16b, v16.16b aesmc v1.16b, v1.16b + aese v2.16b, v16.16b aesmc v2.16b, v2.16b + aese v3.16b, v16.16b aesmc v3.16b, v3.16b + aese v4.16b, v16.16b aesmc v4.16b, v4.16b + aese v5.16b, v16.16b aesmc v5.16b, v5.16b + aese v6.16b, v16.16b aesmc v6.16b, v6.16b + aese v7.16b, v16.16b aesmc v7.16b, v7.16b -2: subs x3, x3, #1 + ldr q16, [x0], #0x10 /* load next round key */ + subs x3, x3, #1 + b.ne 1b /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */ aese v0.16b, v16.16b aese v1.16b, v16.16b @@ -1118,9 +1135,9 @@ aesarmv8_enc8: aese v5.16b, v16.16b aese v6.16b, v16.16b aese v7.16b, v16.16b - ldr q16, [x0], #0x10 /* load next round key */ - b.ne 1b - eor v0.16b, v0.16b, v16.16b /* AddRoundKey */ + ldr q16, [x0] /* load last round key */ + /* q[i] := AddRoundKey_q16(q[i]) */ + eor v0.16b, v0.16b, v16.16b eor v1.16b, v1.16b, v16.16b eor v2.16b, v2.16b, v16.16b eor v3.16b, v3.16b, v16.16b @@ -1144,15 +1161,19 @@ END(aesarmv8_enc8) .type aesarmv8_dec1,@function aesarmv8_dec1: ldr q16, [x0], #0x10 /* load round key */ - b 2f + sub x3, x3, #1 _ALIGN_TEXT -1: /* q0 := InMixColumns(q0) */ - aesimc v0.16b, v0.16b -2: subs x3, x3, #1 - /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */ +1: /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */ aesd v0.16b, v16.16b + /* q0 := InMixColumns(q0) */ + aesimc v0.16b, v0.16b ldr q16, [x0], #0x10 /* load next round key */ + subs x3, x3, #1 b.ne 1b + /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */ + aesd v0.16b, v16.16b + ldr q16, [x0] /* load last round key */ + /* q0 := AddRoundKey_q16(q0) */ eor v0.16b, v0.16b, v16.16b ret END(aesarmv8_dec1) @@ -1171,18 +1192,29 @@ END(aesarmv8_dec1) .type aesarmv8_dec8,@function aesarmv8_dec8: ldr q16, [x0], #0x10 /* load round key */ - b 2f + sub x3, x3, #1 _ALIGN_TEXT -1: /* q[i] := InMixColumns(q[i]) */ +1: /* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */ + aesd v0.16b, v16.16b + /* q[i] := InMixColumns(q[i]) */ aesimc v0.16b, v0.16b + aesd v1.16b, v16.16b aesimc v1.16b, v1.16b + aesd v2.16b, v16.16b aesimc v2.16b, v2.16b + aesd v3.16b, v16.16b aesimc v3.16b, v3.16b
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:52:11 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: PIC for aes_neon_32.S. Without this, tests/sys/crypto/aes/t_aes fails to start on armv7 because of R_ARM_ABS32 relocations in a nonwritable text segment for a PIE -- which atf quietly ignores in the final report! Yikes. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.1 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.2 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.1 Mon Jun 29 23:57:56 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Mon Jul 27 20:52:10 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.1 2020/06/29 23:57:56 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.2 2020/07/27 20:52:10 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -30,8 +30,14 @@ .fpu neon + .text + .p2align 2 +.Lconstants_addr: + .long .Lconstants - . + .section .rodata .p2align 4 +.Lconstants: .type inv,_ASM_TYPE_OBJECT inv: @@ -239,7 +245,7 @@ ENTRY(aes_neon_enc1) * r3: rmod4 * r4: mc_forward * r5: mc_backward - * r6,r7,r8,r10,r11: temporaries + * r6,r7,r8,r10,r11,r12: temporaries * q0={d0-d1}: x/ak/A * q1={d2-d3}: 0x0f0f... * q2={d4-d5}: lo/k/j/io @@ -258,23 +264,30 @@ ENTRY(aes_neon_enc1) * q15={d30-d31}: A2_B/sr[rmod4] */ + /* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */ + ldr r12, .Lconstants_addr + adr r11, .Lconstants_addr + vld1.64 {d28-d29}, [r0 :128]! /* q14 = *rk++ */ movw r3, #0 vmov.i8 q1, #0x0f + /* r12 := .Lconstants */ + add r12, r12, r11 + /* (q4, q5) := (iptlo, ipthi) */ - ldr r6, =iptlo - ldr r7, =ipthi + add r6, r12, #(iptlo - .Lconstants) + add r7, r12, #(ipthi - .Lconstants) vld1.64 {d8-d9}, [r6 :128] vld1.64 {d10-d11}, [r7 :128] /* load the rest of the constants */ - ldr r4, =sb1_0 - ldr r5, =sb1_1 - ldr r6, =sb2_0 - ldr r7, =sb2_1 - ldr r8, =inv - ldr r10, =inva + add r4, r12, #(sb1_0 - .Lconstants) + add r5, r12, #(sb1_1 - .Lconstants) + add r6, r12, #(sb2_0 - .Lconstants) + add r7, r12, #(sb2_1 - .Lconstants) + add r8, r12, #(inv - .Lconstants) + add r10, r12, #(inva - .Lconstants) vld1.64 {d12-d13}, [r4 :128] /* q6 = sb1[0] */ vld1.64 {d14-d15}, [r5 :128] /* q7 = sb1[1] */ vld1.64 {d16-d17}, [r6 :128] /* q8 = sb2[0] */ @@ -283,8 +296,8 @@ ENTRY(aes_neon_enc1) vld1.64 {d22-d23}, [r10 :128] /* q11 = inva */ /* (r4, r5) := (&mc_forward[0], &mc_backward[0]) */ - ldr r4, =mc_forward - ldr r5, =mc_backward + add r4, r12, #(mc_forward - .Lconstants) + add r5, r12, #(mc_backward - .Lconstants) /* (q2, q3) := (lo, hi) */ vshr.u8 q3, q0, #4 @@ -392,9 +405,9 @@ ENTRY(aes_neon_enc1) bne 1b /* (q6, q7, q15) := (sbo[0], sbo[1], sr[rmod4]) */ - ldr r8, =sr - ldr r6, =sbo_0 - ldr r7, =sbo_1 + add r8, r12, #(sr - .Lconstants) + add r6, r12, #(sbo_0 - .Lconstants) + add r7, r12, #(sbo_1 - .Lconstants) add r8, r8, r3, lsl #4 vld1.64 {d12-d13}, [r6 :128] vld1.64 {d14-d15}, [r7 :128] @@ -469,23 +482,30 @@ ENTRY(aes_neon_dec1) * q15={d30-d31}: mc/sr[3 & ~(nrounds - 1)] */ + /* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */ + ldr r12, .Lconstants_addr + adr r11, .Lconstants_addr + vld1.64 {d28-d29}, [r0 :128]! /* q14 = *rk++ */ rsb r3, r1, #0 /* r3 := ~(x - 1) = -x */ vmov.i8 q1, #0x0f and r3, r3, #3 /* r3 := 3 & ~(x - 1) */ + /* r12 := .Lconstants */ + add r12, r12, r11 + /* (q4, q5) := (diptlo, dipthi) */ - ldr r6, =diptlo - ldr r7, =dipthi + add r6, r12, #(diptlo - .Lconstants) + add r7, r12, #(dipthi - .Lconstants) vld1.64 {d8-d9}, [r6 :128] vld1.64 {d10-d11}, [r7 :128] /* load the rest of the constants */ - ldr r4, =dsbb_0 - ldr r5, =dsbb_1 - ldr r6, =inv - ldr r7, =inva - ldr r8, =.Lmc_forward_3 + add r4, r12, #(dsbb_0 - .Lconstants) + add r5, r12, #(dsbb_1 - .Lconstants) + add r6, r12, #(inv - .Lconstants) + add r7, r12, #(inva - .Lconstants) + add r8, r12, #(.Lmc_forward_3 - .Lconstants) vld1.64 {d12-d13}, [r4 :128] /* q6 := dsbb[0] */ vld1.64 {d14-d15}, [r5 :128] /* q7 := dsbb[1] */ vld1.64 {d20-d21}, [r6 :128] /* q10 := inv */ @@ -504,8 +524,8 @@ ENTRY(aes_neon_dec1) vtbl.8 d7, {d10-d11}, d7 /* load dsb9 */ - ldr r4, =dsb9_0 - ldr r5, =dsb9_1 + add r4, r12, #(dsb9_0 - .Lconstants) + add r5, r12, #(dsb9_1 - .Lconstants) vld1.64 {d8-d9}, [r4 :128] /* q4 := dsb9[0] */ vld1.64 {d10-d11}, [r5 :128] /* q5 := dsb9[1] */ @@ -516,7 +536,7 @@ ENTRY(aes_neon_dec1) b 2f 1: /* load dsbd */ - ldr r4, =dsbd_0 + add r4, r12, #(dsbd_0 - .Lconstants) vld1.64 {d16-d17}, [r4 :128]! /* q8 := dsbd[0] */ vld1.64 {d18-d19}, [r4 :128] /* q9 := dsbd[1] */ @@ -543,7 +
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:44:30 UTC 2020 Modified Files: src/sys/crypto/aes: aes_ccm.c aes_ccm.h Log Message: Gather auth[16] and ctr[16] into one authctr[32]. Should appease clang. To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/aes_ccm.c cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/aes_ccm.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_ccm.c diff -u src/sys/crypto/aes/aes_ccm.c:1.3 src/sys/crypto/aes/aes_ccm.c:1.4 --- src/sys/crypto/aes/aes_ccm.c:1.3 Sun Jul 26 04:44:47 2020 +++ src/sys/crypto/aes/aes_ccm.c Mon Jul 27 20:44:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ccm.c,v 1.3 2020/07/26 04:44:47 riastradh Exp $ */ +/* $NetBSD: aes_ccm.c,v 1.4 2020/07/27 20:44:30 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.3 2020/07/26 04:44:47 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.4 2020/07/27 20:44:30 riastradh Exp $"); #include #include @@ -66,18 +66,20 @@ xor(uint8_t *x, const uint8_t *a, const static void aes_ccm_inc(struct aes_ccm *C) { + uint8_t *ctr = C->authctr + 16; KASSERT(C->L == 2); - if (++C->in[15] == 0 && ++C->in[14] == 0) + if (++ctr[15] == 0 && ++ctr[14] == 0) panic("AES-CCM overflow"); } static void aes_ccm_zero_ctr(struct aes_ccm *C) { + uint8_t *ctr = C->authctr + 16; KASSERT(C->L == 2); - C->in[14] = C->in[15] = 0; + ctr[14] = ctr[15] = 0; } void @@ -87,6 +89,8 @@ aes_ccm_init(struct aes_ccm *C, unsigned size_t mlen) { const uint8_t *adp = ad; + uint8_t *auth = C->authctr; + uint8_t *ctr = C->authctr + 16; unsigned i; KASSERT(L == 2); @@ -102,58 +106,58 @@ aes_ccm_init(struct aes_ccm *C, unsigned C->mlen = C->mleft = mlen; /* Encode B0, the initial authenticated data block. */ - C->auth[0] = __SHIFTIN(adlen == 0 ? 0 : 1, CCM_AFLAGS_ADATA); - C->auth[0] |= __SHIFTIN((M - 2)/2, CCM_AFLAGS_M); - C->auth[0] |= __SHIFTIN(L - 1, CCM_AFLAGS_L); - memcpy(C->auth + 1, nonce, noncelen); + auth[0] = __SHIFTIN(adlen == 0 ? 0 : 1, CCM_AFLAGS_ADATA); + auth[0] |= __SHIFTIN((M - 2)/2, CCM_AFLAGS_M); + auth[0] |= __SHIFTIN(L - 1, CCM_AFLAGS_L); + memcpy(auth + 1, nonce, noncelen); for (i = 0; i < L; i++, mlen >>= 8) { KASSERT(i < 16 - 1 - noncelen); - C->auth[16 - i - 1] = mlen & 0xff; + auth[16 - i - 1] = mlen & 0xff; } - aes_enc(enc, C->auth, C->auth, C->nr); + aes_enc(enc, auth, auth, C->nr); /* Process additional authenticated data, if any. */ if (adlen) { /* Encode the length according to the table on p. 4. */ if (adlen < 0xff00) { - C->auth[0] ^= adlen >> 8; - C->auth[1] ^= adlen; + auth[0] ^= adlen >> 8; + auth[1] ^= adlen; i = 2; } else if (adlen < 0x) { - C->auth[0] ^= 0xff; - C->auth[1] ^= 0xfe; - C->auth[2] ^= adlen >> 24; - C->auth[3] ^= adlen >> 16; - C->auth[4] ^= adlen >> 8; - C->auth[5] ^= adlen; + auth[0] ^= 0xff; + auth[1] ^= 0xfe; + auth[2] ^= adlen >> 24; + auth[3] ^= adlen >> 16; + auth[4] ^= adlen >> 8; + auth[5] ^= adlen; i = 6; #if SIZE_MAX > 0xU } else { CTASSERT(SIZE_MAX <= 0x); - C->auth[0] ^= 0xff; - C->auth[1] ^= 0xff; - C->auth[2] ^= adlen >> 56; - C->auth[3] ^= adlen >> 48; - C->auth[4] ^= adlen >> 40; - C->auth[5] ^= adlen >> 32; - C->auth[6] ^= adlen >> 24; - C->auth[7] ^= adlen >> 16; - C->auth[8] ^= adlen >> 8; - C->auth[9] ^= adlen; + auth[0] ^= 0xff; + auth[1] ^= 0xff; + auth[2] ^= adlen >> 56; + auth[3] ^= adlen >> 48; + auth[4] ^= adlen >> 40; + auth[5] ^= adlen >> 32; + auth[6] ^= adlen >> 24; + auth[7] ^= adlen >> 16; + auth[8] ^= adlen >> 8; + auth[9] ^= adlen; i = 10; #endif } /* Fill out the partial block if we can, and encrypt. */ - xor(C->auth + i, C->auth + i, adp, MIN(adlen, 16 - i)); + xor(auth + i, auth + i, adp, MIN(adlen, 16 - i)); adp += MIN(adlen, 16 - i); adlen -= MIN(adlen, 16 - i); - aes_enc(enc, C->auth, C->auth, C->nr); + aes_enc(enc, auth, auth, C->nr); /* If there was anything more, process 16 bytes at a time. */ if (adlen - (adlen % 16)) { aes_cbcmac_update1(enc, adp, adlen - (adlen % 16), - C->auth, C->nr); + auth, C->nr); adlen %= 16; } @@ -162,15 +166,15 @@ aes_ccm_init(struct aes_ccm *C, unsigned * with zeros, which is a no-op) and process it. */ if (adlen) { - xor(C->auth, C->auth, adp, adlen); - aes_enc(enc, C->auth, C->auth, C->nr); + xor(auth, auth, adp, adlen); + aes_enc(enc, auth, auth, C->nr); } } /* Set up the AES input for AES-CTR encryption. */ - C->in[0] = __SHIFTIN(L - 1, CCM_EFLAGS_L); - memcpy(C->in + 1, nonce, noncelen); - memset(C->in + 1 + noncelen, 0, 16 - 1 - noncelen); + ctr[0] = __SHIFTIN(L - 1, CCM_EFLAGS_L); + m
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: riastradh Date: Sun Jul 26 04:44:47 UTC 2020 Modified Files: src/sys/crypto/aes: aes_ccm.c Log Message: Ensure aes_ccm module init runs after aes module init. Otherwise the AES implementation might not be selected early enough. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/aes_ccm.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_ccm.c diff -u src/sys/crypto/aes/aes_ccm.c:1.2 src/sys/crypto/aes/aes_ccm.c:1.3 --- src/sys/crypto/aes/aes_ccm.c:1.2 Sat Jul 25 22:27:53 2020 +++ src/sys/crypto/aes/aes_ccm.c Sun Jul 26 04:44:47 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ccm.c,v 1.2 2020/07/25 22:27:53 riastradh Exp $ */ +/* $NetBSD: aes_ccm.c,v 1.3 2020/07/26 04:44:47 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.2 2020/07/25 22:27:53 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.3 2020/07/26 04:44:47 riastradh Exp $"); #include #include @@ -588,7 +588,7 @@ aes_ccm_selftest(void) /* XXX provisional hack */ #include -MODULE(MODULE_CLASS_MISC, aes_ccm, NULL); +MODULE(MODULE_CLASS_MISC, aes_ccm, "aes"); static int aes_ccm_modcmd(modcmd_t cmd, void *opaque)
CVS commit: src/sys/crypto/aes/arch/x86
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:45:10 UTC 2020 Modified Files: src/sys/crypto/aes/arch/x86: immintrin.h Log Message: Add some Intel intrinsics for ChaCha. _mm_load1_ps _mm_loadu_si128 _mm_movelh_ps _mm_slli_epi32 _mm_storeu_si128 _mm_unpackhi_epi32 _mm_unpacklo_epi32 To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/immintrin.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/x86/immintrin.h diff -u src/sys/crypto/aes/arch/x86/immintrin.h:1.4 src/sys/crypto/aes/arch/x86/immintrin.h:1.5 --- src/sys/crypto/aes/arch/x86/immintrin.h:1.4 Sat Jul 25 22:44:32 2020 +++ src/sys/crypto/aes/arch/x86/immintrin.h Sat Jul 25 22:45:10 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: immintrin.h,v 1.4 2020/07/25 22:44:32 riastradh Exp $ */ +/* $NetBSD: immintrin.h,v 1.5 2020/07/25 22:45:10 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -103,6 +103,20 @@ _mm_add_epi32(__m128i __a, __m128i __b) #endif _INTRINSATTR +static __inline __m128 +_mm_load1_ps(const float *__p) +{ + return __extension__ (__m128)(__v4sf) { *__p, *__p, *__p, *__p }; +} + +_INTRINSATTR +static __inline __m128i +_mm_loadu_si128(const __m128i_u *__p) +{ + return ((const struct { __m128i_u __v; } _PACKALIAS *)__p)->__v; +} + +_INTRINSATTR static __inline __m128i _mm_loadu_si32(const void *__p) { @@ -132,8 +146,18 @@ _mm_movehl_ps(__m128 __v0, __m128 __v1) #if defined(__GNUC__) && !defined(__clang__) return (__m128)__builtin_ia32_movhlps((__v4sf)__v0, (__v4sf)__v1); #elif defined(__clang__) - return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, - 6, 7, 2, 3); + return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, 6,7,2,3); +#endif +} + +_INTRINSATTR +static __inline __m128 +_mm_movelh_ps(__m128 __v0, __m128 __v1) +{ +#if defined(__GNUC__) && !defined(__clang__) + return (__m128)__builtin_ia32_movlhps((__v4sf)__v0, (__v4sf)__v1); +#elif defined(__clang__) + return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, 0,1,4,5); #endif } @@ -205,6 +229,13 @@ _mm_shuffle_epi8(__m128i __vtbl, __m128i _INTRINSATTR static __inline __m128i +_mm_slli_epi32(__m128i __v, uint8_t __bits) +{ + return (__m128i)__builtin_ia32_pslldi128((__v4si)__v, (int)__bits); +} + +_INTRINSATTR +static __inline __m128i _mm_slli_epi64(__m128i __v, uint8_t __bits) { return (__m128i)__builtin_ia32_psllqi128((__v2di)__v, (int)__bits); @@ -245,6 +276,13 @@ _mm_srli_epi64(__m128i __v, uint8_t __bi _INTRINSATTR static __inline void +_mm_storeu_si128(__m128i_u *__p, __m128i __v) +{ + ((struct { __m128i_u __v; } _PACKALIAS *)__p)->__v = __v; +} + +_INTRINSATTR +static __inline void _mm_storeu_si32(void *__p, __m128i __v) { ((struct { int32_t __v; } _PACKALIAS *)__p)->__v = ((__v4si)__v)[0]; @@ -273,6 +311,32 @@ _mm_sub_epi64(__m128i __x, __m128i __y) _INTRINSATTR static __inline __m128i +_mm_unpackhi_epi32(__m128i __lo, __m128i __hi) +{ +#if defined(__GNUC__) && !defined(__clang__) + return (__m128i)__builtin_ia32_punpckhdq128((__v4si)__lo, + (__v4si)__hi); +#elif defined(__clang__) + return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi, + 2,6,3,7); +#endif +} + +_INTRINSATTR +static __inline __m128i +_mm_unpacklo_epi32(__m128i __lo, __m128i __hi) +{ +#if defined(__GNUC__) && !defined(__clang__) + return (__m128i)__builtin_ia32_punpckldq128((__v4si)__lo, + (__v4si)__hi); +#elif defined(__clang__) + return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi, + 0,4,1,5); +#endif +} + +_INTRINSATTR +static __inline __m128i _mm_unpacklo_epi64(__m128i __lo, __m128i __hi) { #if defined(__GNUC__) && !defined(__clang__)
CVS commit: src/sys/crypto/aes/arch/x86
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:44:32 UTC 2020 Modified Files: src/sys/crypto/aes/arch/x86: immintrin.h Log Message: Fix target attribute on _mm_movehl_ps, fix clang _mm_unpacklo_epi64. - _mm_movehl_ps is available in SSE2, no need for SSSE3. - _mm_unpacklo_epi64 operates on v2di, not v4si; fix. To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/immintrin.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/x86/immintrin.h diff -u src/sys/crypto/aes/arch/x86/immintrin.h:1.3 src/sys/crypto/aes/arch/x86/immintrin.h:1.4 --- src/sys/crypto/aes/arch/x86/immintrin.h:1.3 Sat Jul 25 22:31:04 2020 +++ src/sys/crypto/aes/arch/x86/immintrin.h Sat Jul 25 22:44:32 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: immintrin.h,v 1.3 2020/07/25 22:31:04 riastradh Exp $ */ +/* $NetBSD: immintrin.h,v 1.4 2020/07/25 22:44:32 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -125,7 +125,7 @@ _mm_load_si128(const __m128i *__p) return *__p; } -_INTRINSATTR _SSSE3_ATTR +_INTRINSATTR static __inline __m128 _mm_movehl_ps(__m128 __v0, __m128 __v1) { @@ -279,8 +279,8 @@ _mm_unpacklo_epi64(__m128i __lo, __m128i return (__m128i)__builtin_ia32_punpcklqdq128((__v2di)__lo, (__v2di)__hi); #elif defined(__clang__) - return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi, - 0, 4, 1, 5); + return (__m128i)__builtin_shufflevector((__v2di)__lo, (__v2di)__hi, + 0,2); #endif }
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:42:31 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: arm_neon.h Log Message: Fix missing clang big-endian case. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/arm_neon.h diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.4 src/sys/crypto/aes/arch/arm/arm_neon.h:1.5 --- src/sys/crypto/aes/arch/arm/arm_neon.h:1.4 Sat Jul 25 22:36:06 2020 +++ src/sys/crypto/aes/arch/arm/arm_neon.h Sat Jul 25 22:42:31 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.4 2020/07/25 22:36:06 riastradh Exp $ */ +/* $NetBSD: arm_neon.h,v 1.5 2020/07/25 22:42:31 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -237,7 +237,12 @@ vld1q_u8(const uint8_t *__p8) return (uint8x16_t)__builtin_neon_vld1v16qi(__p); #endif #elif defined(__clang__) - return (uint8x16_t)__builtin_neon_vld1q_v(__p8, 48); + uint8x16_t __v = (uint8x16_t)__builtin_neon_vld1q_v(__p8, 48); +#ifndef __LITTLE_ENDIAN__ + __v = __builtin_shufflevector(__v, __v, + 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); +#endif + return __v; #endif } @@ -442,7 +447,7 @@ vst1q_u8(uint8_t *__p8, uint8x16_t __v) #elif defined(__clang__) #ifndef __LITTLE_ENDIAN__ __v = __builtin_shufflevector(__v, __v, - 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); #endif __builtin_neon_vst1q_v(__p8, __v, 48); #endif
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:43:01 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: arm_neon.h Log Message: Add 32-bit load, store, and shift intrinsics. vld1q_u32 vst1q_u32 vshlq_n_u32 vshrq_n_u32 To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/arm_neon.h diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.5 src/sys/crypto/aes/arch/arm/arm_neon.h:1.6 --- src/sys/crypto/aes/arch/arm/arm_neon.h:1.5 Sat Jul 25 22:42:31 2020 +++ src/sys/crypto/aes/arch/arm/arm_neon.h Sat Jul 25 22:43:01 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.5 2020/07/25 22:42:31 riastradh Exp $ */ +/* $NetBSD: arm_neon.h,v 1.6 2020/07/25 22:43:01 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -222,6 +222,30 @@ vgetq_lane_u32(uint32x4_t __v, uint8_t _ #endif _INTRINSATTR +static __inline uint32x4_t +vld1q_u32(const uint32_t *__p32) +{ +#if defined(__GNUC__) && !defined(__clang__) +#ifdef __aarch64__ + const __builtin_aarch64_simd_si *__p = + (const __builtin_aarch64_simd_si *)__p32; + + return (uint32x4_t)__builtin_aarch64_ld1v4si(__p); +#else + const __builtin_neon_si *__p = (const __builtin_neon_si *)__p32; + + return (uint32x4_t)__builtin_neon_vld1v4si(__p); +#endif +#elif defined(__clang__) + uint32x4_t __v = (uint32x4_t)__builtin_neon_vld1q_v(__p32, 50); +#ifndef __LITTLE_ENDIAN__ + __v = __builtin_shufflevector(__v, __v, 3,2,1,0); +#endif + return __v; +#endif +} + +_INTRINSATTR static __inline uint8x16_t vld1q_u8(const uint8_t *__p8) { @@ -383,6 +407,38 @@ vsetq_lane_u64(uint64_t __x, uint64x2_t #if defined(__GNUC__) && !defined(__clang__) _INTRINSATTR +static __inline uint32x4_t +vshlq_n_u32(uint32x4_t __v, uint8_t __bits) +{ +#ifdef __aarch64__ + return (uint32x4_t)__builtin_aarch64_ashlv4si((int32x4_t)__v, __bits); +#else + return (uint32x4_t)__builtin_neon_vshl_nv4si((int32x4_t)__v, __bits); +#endif +} +#elif defined(__clang__) +#define vshlq_n_u32(__v, __bits) \ + (uint32x4_t)__builtin_neon_vshlq_n_v((int32x4_t)(__v), (__bits), 50) +#endif + +#if defined(__GNUC__) && !defined(__clang__) +_INTRINSATTR +static __inline uint32x4_t +vshrq_n_u32(uint32x4_t __v, uint8_t __bits) +{ +#ifdef __aarch64__ + return (uint32x4_t)__builtin_aarch64_lshrv4si((int32x4_t)__v, __bits); +#else + return (uint32x4_t)__builtin_neon_vshru_nv4si((int32x4_t)__v, __bits); +#endif +} +#elif defined(__clang__) +#define vshrq_n_u8(__v, __bits) \ + (uint32x4_t)__builtin_neon_vshrq_n_v((int32x4_t)(__v), (__bits), 50) +#endif + +#if defined(__GNUC__) && !defined(__clang__) +_INTRINSATTR static __inline uint8x16_t vshrq_n_u8(uint8x16_t __v, uint8_t __bits) { @@ -432,6 +488,28 @@ vsliq_n_s32(int32x4_t __vins, int32x4_t _INTRINSATTR static __inline void +vst1q_u32(uint32_t *__p32, uint32x4_t __v) +{ +#if defined(__GNUC__) && !defined(__clang__) +#ifdef __aarch64__ + __builtin_aarch64_simd_si *__p = (__builtin_aarch64_simd_si *)__p32; + + __builtin_aarch64_st1v4si(__p, (int32x4_t)__v); +#else + __builtin_neon_si *__p = (__builtin_neon_si *)__p32; + + __builtin_neon_vst1v4si(__p, (int32x4_t)__v); +#endif +#elif defined(__clang__) +#ifndef __LITTLE_ENDIAN__ + __v = __builtin_shufflevector(__v, __v, 3,2,1,0); +#endif + __builtin_neon_vst1q_v(__p32, __v, 50); +#endif +} + +_INTRINSATTR +static __inline void vst1q_u8(uint8_t *__p8, uint8x16_t __v) { #if defined(__GNUC__) && !defined(__clang__)
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:42:03 UTC 2020 Modified Files: src/sys/crypto/aes: aes_impl.c Log Message: Make aes boot message verbose-only. To generate a diff of this commit: cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/aes_impl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_impl.c diff -u src/sys/crypto/aes/aes_impl.c:1.7 src/sys/crypto/aes/aes_impl.c:1.8 --- src/sys/crypto/aes/aes_impl.c:1.7 Sat Jul 25 22:36:42 2020 +++ src/sys/crypto/aes/aes_impl.c Sat Jul 25 22:42:03 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_impl.c,v 1.7 2020/07/25 22:36:42 riastradh Exp $ */ +/* $NetBSD: aes_impl.c,v 1.8 2020/07/25 22:42:03 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.7 2020/07/25 22:36:42 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.8 2020/07/25 22:42:03 riastradh Exp $"); #include #include @@ -111,7 +111,7 @@ aes_select(void) if (aes_impl == NULL) panic("AES self-tests failed"); - aprint_normal("aes: %s\n", aes_impl->ai_name); + aprint_verbose("aes: %s\n", aes_impl->ai_name); return 0; }
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:36:42 UTC 2020 Modified Files: src/sys/crypto/aes: aes_impl.c aes_selftest.c Log Message: Remove now-needless AES-CCM fallback logic. These paths are no longer exercised because all of the aes_impls now do the AES-CCM operations. To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/aes_impl.c cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/aes_selftest.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_impl.c diff -u src/sys/crypto/aes/aes_impl.c:1.6 src/sys/crypto/aes/aes_impl.c:1.7 --- src/sys/crypto/aes/aes_impl.c:1.6 Sat Jul 25 22:27:53 2020 +++ src/sys/crypto/aes/aes_impl.c Sat Jul 25 22:36:42 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_impl.c,v 1.6 2020/07/25 22:27:53 riastradh Exp $ */ +/* $NetBSD: aes_impl.c,v 1.7 2020/07/25 22:36:42 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.6 2020/07/25 22:27:53 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.7 2020/07/25 22:36:42 riastradh Exp $"); #include #include @@ -288,16 +288,6 @@ aes_xts_dec(struct aesdec *dec, const ui aes_impl->ai_xts_dec(dec, in, out, nbytes, tweak, nrounds); } -static void -xor16(uint8_t *x, const uint8_t *a, const uint8_t *b) -{ - - le32enc(x + 4*0, le32dec(a + 4*0) ^ le32dec(b + 4*0)); - le32enc(x + 4*1, le32dec(a + 4*1) ^ le32dec(b + 4*1)); - le32enc(x + 4*2, le32dec(a + 4*2) ^ le32dec(b + 4*2)); - le32enc(x + 4*3, le32dec(a + 4*3) ^ le32dec(b + 4*3)); -} - void aes_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], size_t nbytes, uint8_t auth[static 16], uint32_t nrounds) @@ -307,15 +297,7 @@ aes_cbcmac_update1(const struct aesenc * KASSERT(nbytes % 16 == 0); aes_guarantee_selected(); - if (aes_impl->ai_cbcmac_update1) { - aes_impl->ai_cbcmac_update1(enc, in, nbytes, auth, nrounds); - return; - } - - for (; nbytes; in += 16, nbytes -= 16) { - xor16(auth, auth, in); - aes_enc(enc, auth, auth, nrounds); - } + aes_impl->ai_cbcmac_update1(enc, in, nbytes, auth, nrounds); } void @@ -323,26 +305,12 @@ aes_ccm_enc1(const struct aesenc *enc, c uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], uint32_t nrounds) { - uint8_t *auth = authctr; - uint8_t *ctr = authctr + 16; KASSERT(nbytes); KASSERT(nbytes % 16 == 0); aes_guarantee_selected(); - if (aes_impl->ai_ccm_enc1) { - aes_impl->ai_ccm_enc1(enc, in, out, nbytes, auth, nrounds); - return; - } - - for (; nbytes; in += 16, out += 16, nbytes -= 16) { - xor16(auth, auth, in); - aes_enc(enc, auth, auth, nrounds); - - be32enc(ctr + 12, 1 + be32dec(ctr + 12)); - aes_enc(enc, ctr, out, nrounds); - xor16(out, out, in); - } + aes_impl->ai_ccm_enc1(enc, in, out, nbytes, authctr, nrounds); } void @@ -350,26 +318,12 @@ aes_ccm_dec1(const struct aesenc *enc, c uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], uint32_t nrounds) { - uint8_t *auth = authctr; - uint8_t *ctr = authctr + 16; KASSERT(nbytes); KASSERT(nbytes % 16 == 0); aes_guarantee_selected(); - if (aes_impl->ai_ccm_dec1) { - aes_impl->ai_ccm_dec1(enc, in, out, nbytes, auth, nrounds); - return; - } - - for (; nbytes >= 16; in += 16, out += 16, nbytes -= 16) { - be32enc(ctr + 12, 1 + be32dec(ctr + 12)); - aes_enc(enc, ctr, out, nrounds); - xor16(out, out, in); - - xor16(auth, auth, out); - aes_enc(enc, auth, auth, nrounds); - } + aes_impl->ai_ccm_dec1(enc, in, out, nbytes, authctr, nrounds); } /* Index: src/sys/crypto/aes/aes_selftest.c diff -u src/sys/crypto/aes/aes_selftest.c:1.4 src/sys/crypto/aes/aes_selftest.c:1.5 --- src/sys/crypto/aes/aes_selftest.c:1.4 Sat Jul 25 22:27:53 2020 +++ src/sys/crypto/aes/aes_selftest.c Sat Jul 25 22:36:42 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_selftest.c,v 1.4 2020/07/25 22:27:53 riastradh Exp $ */ +/* $NetBSD: aes_selftest.c,v 1.5 2020/07/25 22:36:42 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.4 2020/07/25 22:27:53 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.5 2020/07/25 22:36:42 riastradh Exp $"); #ifdef _KERNEL @@ -424,9 +424,6 @@ aes_selftest_cbcmac(const struct aes_imp uint8_t auth[16]; const unsigned nr = AES_128_NROUNDS; - if (impl->ai_cbcmac_update1 == NULL) - return 0; - memset(auth, 0, sizeof auth); impl->ai_setenckey(&enc, key, nr); @@ -500,9 +497,6 @@ aes_selftest_ccm(const struct aes_impl * const unsigned nr = AES_128_NROUNDS; int result = 0; - if (impl->ai_ccm_enc1 == NULL) - return 0; - impl->ai_setenckey(&enc, key, nr); memset(authctr, 0, 16); @@ -521,9 +515,6 @@ aes_selftest_ccm(const struct aes_impl * result |= aes_selftest_fail(impl
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:36:06 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon.h aes_neon_impl.c aes_neon_subr.c arm_neon.h Log Message: Implement AES-CCM with NEON. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_neon.h \ src/sys/crypto/aes/arch/arm/aes_neon_subr.c cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon_impl.c \ src/sys/crypto/aes/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon.h diff -u src/sys/crypto/aes/arch/arm/aes_neon.h:1.2 src/sys/crypto/aes/arch/arm/aes_neon.h:1.3 --- src/sys/crypto/aes/arch/arm/aes_neon.h:1.2 Sat Jul 25 22:12:57 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon.h Sat Jul 25 22:36:06 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon.h,v 1.2 2020/07/25 22:12:57 riastradh Exp $ */ +/* $NetBSD: aes_neon.h,v 1.3 2020/07/25 22:36:06 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -59,6 +59,12 @@ void aes_neon_xts_enc(const struct aesen uint8_t[static 16], size_t, uint8_t[static 16], uint32_t); void aes_neon_xts_dec(const struct aesdec *, const uint8_t[static 16], uint8_t[static 16], size_t, uint8_t[static 16], uint32_t); +void aes_neon_cbcmac_update1(const struct aesenc *, const uint8_t[static 16], +size_t, uint8_t[static 16], uint32_t); +void aes_neon_ccm_enc1(const struct aesenc *, const uint8_t[static 16], +uint8_t[static 16], size_t, uint8_t[static 32], uint32_t); +void aes_neon_ccm_dec1(const struct aesenc *, const uint8_t[static 16], +uint8_t[static 16], size_t, uint8_t[static 32], uint32_t); int aes_neon_selftest(void); Index: src/sys/crypto/aes/arch/arm/aes_neon_subr.c diff -u src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.2 src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.3 --- src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.2 Tue Jun 30 20:32:11 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_subr.c Sat Jul 25 22:36:06 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $ */ +/* $NetBSD: aes_neon_subr.c,v 1.3 2020/07/25 22:36:06 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,9 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.3 2020/07/25 22:36:06 riastradh Exp $"); + +#include #ifdef _KERNEL #include @@ -213,6 +215,89 @@ aes_neon_xts_dec(const struct aesdec *de storeblock(tweak, t); } +void +aes_neon_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], +size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds) +{ + uint8x16_t auth; + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + auth = loadblock(auth0); + for (; nbytes; nbytes -= 16, in += 16) + auth = aes_neon_enc1(enc, auth ^ loadblock(in), nrounds); + storeblock(auth0, auth); +} + +/* + * XXX On aarch64, we have enough registers that we should be able to + * pipeline two simultaneous vpaes computations in an `aes_neon_enc2' + * function, which should substantially improve CCM throughput. + */ + +#if _BYTE_ORDER == _LITTLE_ENDIAN +#define vbetoh32q_u8 vrev32q_u8 +#define vhtobe32q_u8 vrev32q_u8 +#elif _BYTE_ORDER == _BIG_ENDIAN +#define vbetoh32q_u8(x) (x) +#define vhtobe32q_u8(x) (x) +#else +#error what kind of endian are you anyway +#endif + +void +aes_neon_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], +uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], +uint32_t nrounds) +{ + const uint32x4_t ctr32_inc = {0, 0, 0, 1}; + uint8x16_t auth, ptxt, ctr_be; + uint32x4_t ctr; + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + auth = loadblock(authctr); + ctr_be = loadblock(authctr + 16); + ctr = vreinterpretq_u32_u8(vbetoh32q_u8(ctr_be)); + for (; nbytes; nbytes -= 16, in += 16, out += 16) { + ptxt = loadblock(in); + auth = aes_neon_enc1(enc, auth ^ ptxt, nrounds); + ctr = vaddq_u32(ctr, ctr32_inc); + ctr_be = vhtobe32q_u8(vreinterpretq_u8_u32(ctr)); + storeblock(out, ptxt ^ aes_neon_enc1(enc, ctr_be, nrounds)); + } + storeblock(authctr, auth); + storeblock(authctr + 16, ctr_be); +} + +void +aes_neon_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16], +uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], +uint32_t nrounds) +{ + const uint32x4_t ctr32_inc = {0, 0, 0, 1}; + uint8x16_t auth, ctr_be, ptxt; + uint32x4_t ctr; + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + auth = loadblock(authctr); + ctr_be = loadblock(authctr + 16); + ctr = vreinterpretq_u32_u8(vbetoh32q_u8(ctr_be)); + for (; nbytes; nbytes -= 16, in += 16, out += 16) { + ctr = vaddq_u32(ctr, ctr32_inc); + ctr_be = vhtobe32q_u8(vreinterpretq_u8_u32(ctr)); + ptxt = loadblock(in) ^ aes_neon_enc1(enc, ctr
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:32:09 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: Invert some loops to save a branch instruction on every iteration. To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.6 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.7 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.6 Wed Jul 22 06:15:21 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Sat Jul 25 22:32:09 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.6 2020/07/22 06:15:21 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.7 2020/07/25 22:32:09 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -437,13 +437,13 @@ END(aesarmv8_setenckey256) */ ENTRY(aesarmv8_enctodec) ldr q0, [x0, x2, lsl #4] /* load last round key */ -1: str q0, [x1], #0x10 /* store round key */ + b 2f +1: aesimc v0.16b, v0.16b /* convert encryption to decryption */ +2: str q0, [x1], #0x10 /* store round key */ subs x2, x2, #1 /* count down round */ ldr q0, [x0, x2, lsl #4] /* load previous round key */ - b.eq 2f /* stop if this is the last one */ - aesimc v0.16b, v0.16b /* convert encryption to decryption */ - b 1b -2: str q0, [x1] /* store first round key verbatim */ + b.ne 1b /* repeat if there's more */ + str q0, [x1] /* store first round key verbatim */ ret END(aesarmv8_enctodec) @@ -536,17 +536,17 @@ ENTRY(aesarmv8_cbc_dec1) add x2, x2, x3 /* x2 := pointer past end of out */ ldr q0, [x1, #-0x10]! /* q0 := last ciphertext block */ str q0, [x4] /* update iv */ -1: mov x0, x9 /* x0 := enckey */ - mov x3, x5 /* x3 := nrounds */ - bl aesarmv8_dec1 /* q0 := cv ^ ptxt; trash x0/x3/q16 */ - subs x10, x10, #0x10 /* count down nbytes */ - b.eq 2f /* stop if this is the first block */ - ldr q31, [x1, #-0x10]! /* q31 := chaining value */ + b 2f +1: ldr q31, [x1, #-0x10]! /* q31 := chaining value */ eor v0.16b, v0.16b, v31.16b /* q0 := plaintext block */ str q0, [x2, #-0x10]! /* store plaintext block */ mov v0.16b, v31.16b /* move cv = ciphertext block */ - b 1b -2: eor v0.16b, v0.16b, v24.16b /* q0 := first plaintext block */ +2: mov x0, x9 /* x0 := enckey */ + mov x3, x5 /* x3 := nrounds */ + bl aesarmv8_dec1 /* q0 := cv ^ ptxt; trash x0/x3/q16 */ + subs x10, x10, #0x10 /* count down nbytes */ + b.ne 1b /* repeat if more blocks */ + eor v0.16b, v0.16b, v24.16b /* q0 := first plaintext block */ str q0, [x2, #-0x10]! /* store first plaintext block */ ldp fp, lr, [sp], #16 /* pop stack frame */ ret @@ -573,7 +573,11 @@ ENTRY(aesarmv8_cbc_dec8) add x2, x2, x3 /* x2 := pointer past end of out */ ldp q6, q7, [x1, #-0x20]! /* q6, q7 := last ciphertext blocks */ str q7, [x4] /* update iv */ -1: ldp q4, q5, [x1, #-0x20]! + b 2f +1: ldp q6, q7, [x1, #-0x20]! + eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */ + stp q0, q1, [x2, #-0x20]! +2: ldp q4, q5, [x1, #-0x20]! ldp q2, q3, [x1, #-0x20]! ldp q0, q1, [x1, #-0x20]! mov v31.16b, v6.16b /* q[24+i] := cv[i], 0
CVS commit: src/sys/crypto/aes/arch/x86
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:31:32 UTC 2020 Modified Files: src/sys/crypto/aes/arch/x86: aes_via.c Log Message: Implement AES-CCM with VIA ACE. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/aes_via.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/x86/aes_via.c diff -u src/sys/crypto/aes/arch/x86/aes_via.c:1.4 src/sys/crypto/aes/arch/x86/aes_via.c:1.5 --- src/sys/crypto/aes/arch/x86/aes_via.c:1.4 Sat Jul 25 22:12:57 2020 +++ src/sys/crypto/aes/arch/x86/aes_via.c Sat Jul 25 22:31:32 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_via.c,v 1.4 2020/07/25 22:12:57 riastradh Exp $ */ +/* $NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.4 2020/07/25 22:12:57 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $"); #ifdef _KERNEL #include @@ -674,6 +674,176 @@ aesvia_xts_dec(const struct aesdec *dec, explicit_memset(t, 0, sizeof t); } +static struct evcnt cbcmac_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, +NULL, "aesvia", "cbcmac aligned"); +EVCNT_ATTACH_STATIC(cbcmac_aligned_evcnt); +static struct evcnt cbcmac_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, +NULL, "aesvia", "cbcmac unaligned"); +EVCNT_ATTACH_STATIC(cbcmac_unaligned_evcnt); + +static void +aesvia_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], +size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds) +{ + const uint32_t cw0 = aesvia_keylen_cw0(nrounds); + uint8_t authbuf[16] __aligned(16); + uint8_t *auth = auth0; + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + if ((uintptr_t)auth0 & 0xf) { + memcpy(authbuf, auth0, 16); + auth = authbuf; + cbcmac_unaligned_evcnt.ev_count++; + } else { + cbcmac_aligned_evcnt.ev_count++; + } + + fpu_kern_enter(); + aesvia_reload_keys(); + for (; nbytes; nbytes -= 16, in += 16) { + xor128(auth, auth, in); + aesvia_encN(enc, auth, auth, 1, cw0); + } + fpu_kern_leave(); + + if ((uintptr_t)auth0 & 0xf) { + memcpy(auth0, authbuf, 16); + explicit_memset(authbuf, 0, sizeof authbuf); + } +} + +static struct evcnt ccmenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, +NULL, "aesvia", "ccmenc aligned"); +EVCNT_ATTACH_STATIC(ccmenc_aligned_evcnt); +static struct evcnt ccmenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, +NULL, "aesvia", "ccmenc unaligned"); +EVCNT_ATTACH_STATIC(ccmenc_unaligned_evcnt); + +static void +aesvia_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], +uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], +uint32_t nrounds) +{ + const uint32_t cw0 = aesvia_keylen_cw0(nrounds); + uint8_t authctrbuf[32] __aligned(16); + uint8_t *authctr; + uint32_t c0, c1, c2, c3; + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + if ((uintptr_t)authctr0 & 0xf) { + memcpy(authctrbuf, authctr0, 16); + authctr = authctrbuf; + ccmenc_unaligned_evcnt.ev_count++; + } else { + ccmenc_aligned_evcnt.ev_count++; + } + c0 = le32dec(authctr0 + 16 + 4*0); + c1 = le32dec(authctr0 + 16 + 4*1); + c2 = le32dec(authctr0 + 16 + 4*2); + c3 = be32dec(authctr0 + 16 + 4*3); + + /* + * In principle we could use REP XCRYPTCTR here, but that + * doesn't help to compute the CBC-MAC step, and certain VIA + * CPUs have some weird errata with REP XCRYPTCTR that make it + * kind of a pain to use. So let's just use REP XCRYPTECB to + * simultaneously compute the CBC-MAC step and the CTR step. + * (Maybe some VIA CPUs will compute REP XCRYPTECB in parallel, + * who knows...) + */ + fpu_kern_enter(); + aesvia_reload_keys(); + for (; nbytes; nbytes -= 16, in += 16, out += 16) { + xor128(authctr, authctr, in); + le32enc(authctr + 16 + 4*0, c0); + le32enc(authctr + 16 + 4*1, c1); + le32enc(authctr + 16 + 4*2, c2); + be32enc(authctr + 16 + 4*3, ++c3); + aesvia_encN(enc, authctr, authctr, 2, cw0); + xor128(out, in, authctr + 16); + } + fpu_kern_leave(); + + if ((uintptr_t)authctr0 & 0xf) { + memcpy(authctr0, authctrbuf, 16); + explicit_memset(authctrbuf, 0, sizeof authctrbuf); + } + + le32enc(authctr0 + 16 + 4*0, c0); + le32enc(authctr0 + 16 + 4*1, c1); + le32enc(authctr0 + 16 + 4*2, c2); + be32enc(authctr0 + 16 + 4*3, c3); +} + +static struct evcnt ccmdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, +NULL, "aesvia", "ccmdec aligned"); +EVCNT_ATTACH_STATIC(ccmdec_aligned_evcnt); +static struct evcnt ccmdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, +NULL, "aesvia", "ccmdec unaligned"); +EVCNT_ATTACH_STATIC(ccmdec_unaligned_evcnt); + +static void +aesvia_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16], +uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], +
CVS commit: src/sys/crypto/aes/arch/x86
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:31:04 UTC 2020 Modified Files: src/sys/crypto/aes/arch/x86: aes_ssse3.h aes_ssse3_impl.c aes_ssse3_subr.c immintrin.h Log Message: Implement AES-CCM with SSSE3. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_ssse3.h \ src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c \ src/sys/crypto/aes/arch/x86/immintrin.h cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_ssse3_impl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/x86/aes_ssse3.h diff -u src/sys/crypto/aes/arch/x86/aes_ssse3.h:1.2 src/sys/crypto/aes/arch/x86/aes_ssse3.h:1.3 --- src/sys/crypto/aes/arch/x86/aes_ssse3.h:1.2 Sat Jul 25 22:12:57 2020 +++ src/sys/crypto/aes/arch/x86/aes_ssse3.h Sat Jul 25 22:31:04 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ssse3.h,v 1.2 2020/07/25 22:12:57 riastradh Exp $ */ +/* $NetBSD: aes_ssse3.h,v 1.3 2020/07/25 22:31:04 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -59,6 +59,12 @@ void aes_ssse3_xts_enc(const struct aese uint8_t[static 16], size_t, uint8_t[static 16], uint32_t); void aes_ssse3_xts_dec(const struct aesdec *, const uint8_t[static 16], uint8_t[static 16], size_t, uint8_t[static 16], uint32_t); +void aes_ssse3_cbcmac_update1(const struct aesenc *, const uint8_t[static 16], +size_t, uint8_t[static 16], uint32_t); +void aes_ssse3_ccm_enc1(const struct aesenc *, const uint8_t[static 16], +uint8_t[static 16], size_t, uint8_t[static 32], uint32_t); +void aes_ssse3_ccm_dec1(const struct aesenc *, const uint8_t[static 16], +uint8_t[static 16], size_t, uint8_t[static 32], uint32_t); int aes_ssse3_selftest(void); Index: src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c diff -u src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c:1.2 src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c:1.3 --- src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c:1.2 Tue Jun 30 20:32:11 2020 +++ src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c Sat Jul 25 22:31:04 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ssse3_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $ */ +/* $NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $"); #ifdef _KERNEL #include @@ -208,6 +208,75 @@ aes_ssse3_xts_dec(const struct aesdec *d storeblock(tweak, t); } +void +aes_ssse3_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], +size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds) +{ + __m128i auth; + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + auth = loadblock(auth0); + for (; nbytes; nbytes -= 16, in += 16) + auth = aes_ssse3_enc1(enc, auth ^ loadblock(in), nrounds); + storeblock(auth0, auth); +} + +void +aes_ssse3_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], +uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], +uint32_t nrounds) +{ + const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0); + const __m128i bs32 = + _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); + __m128i auth, ctr_be, ctr, ptxt; + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + auth = loadblock(authctr); + ctr_be = loadblock(authctr + 16); + ctr = _mm_shuffle_epi8(ctr_be, bs32); + for (; nbytes; nbytes -= 16, in += 16, out += 16) { + ptxt = loadblock(in); + auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds); + ctr = _mm_add_epi32(ctr, ctr32_inc); + ctr_be = _mm_shuffle_epi8(ctr, bs32); + storeblock(out, ptxt ^ aes_ssse3_enc1(enc, ctr_be, nrounds)); + } + storeblock(authctr, auth); + storeblock(authctr + 16, ctr_be); +} + +void +aes_ssse3_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16], +uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], +uint32_t nrounds) +{ + const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0); + const __m128i bs32 = + _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); + __m128i auth, ctr_be, ctr, ptxt; + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + auth = loadblock(authctr); + ctr_be = loadblock(authctr + 16); + ctr = _mm_shuffle_epi8(ctr_be, bs32); + for (; nbytes; nbytes -= 16, in += 16, out += 16) { + ctr = _mm_add_epi32(ctr, ctr32_inc); + ctr_be = _mm_shuffle_epi8(ctr, bs32); + ptxt = loadblock(in) ^ aes_ssse3_enc1(enc, ctr_be, nrounds); + storeblock(out, ptxt); + auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds); + } + storeblock(authctr, auth); + storeblock(authctr + 16, ctr_be); +} + int aes_ssse3_selftest(void) { Index: src/sys/crypto/aes/arch/x86/immintrin.h diff -u src/sys/crypto/aes/arch/x86/immintrin.h:1.2 src/sys/crypto/ae
CVS commit: src/sys/crypto/aes/arch/x86
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:29:56 UTC 2020 Modified Files: src/sys/crypto/aes/arch/x86: aes_sse2.h aes_sse2_impl.c aes_sse2_subr.c Log Message: Implement AES-CCM with SSE2. To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_sse2.h cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/aes_sse2_impl.c cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/x86/aes_sse2.h diff -u src/sys/crypto/aes/arch/x86/aes_sse2.h:1.3 src/sys/crypto/aes/arch/x86/aes_sse2.h:1.4 --- src/sys/crypto/aes/arch/x86/aes_sse2.h:1.3 Sat Jul 25 22:12:57 2020 +++ src/sys/crypto/aes/arch/x86/aes_sse2.h Sat Jul 25 22:29:56 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_sse2.h,v 1.3 2020/07/25 22:12:57 riastradh Exp $ */ +/* $NetBSD: aes_sse2.h,v 1.4 2020/07/25 22:29:56 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -58,6 +58,12 @@ void aes_sse2_xts_enc(const struct aesen uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t); void aes_sse2_xts_dec(const struct aesdec *, const uint8_t[static 16], uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t); +void aes_sse2_cbcmac_update1(const struct aesenc *, const uint8_t[static 16], +size_t, uint8_t[static 16], uint32_t); +void aes_sse2_ccm_enc1(const struct aesenc *, const uint8_t[static 16], +uint8_t[static 16], size_t, uint8_t[static 32], uint32_t); +void aes_sse2_ccm_dec1(const struct aesenc *, const uint8_t[static 16], +uint8_t[static 16], size_t, uint8_t[static 32], uint32_t); int aes_sse2_selftest(void); Index: src/sys/crypto/aes/arch/x86/aes_sse2_impl.c diff -u src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.4 src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.5 --- src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.4 Sat Jul 25 22:12:57 2020 +++ src/sys/crypto/aes/arch/x86/aes_sse2_impl.c Sat Jul 25 22:29:56 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_sse2_impl.c,v 1.4 2020/07/25 22:12:57 riastradh Exp $ */ +/* $NetBSD: aes_sse2_impl.c,v 1.5 2020/07/25 22:29:56 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.4 2020/07/25 22:12:57 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.5 2020/07/25 22:29:56 riastradh Exp $"); #include #include @@ -143,6 +143,39 @@ aes_sse2_xts_dec_impl(const struct aesde fpu_kern_leave(); } +static void +aes_sse2_cbcmac_update1_impl(const struct aesenc *enc, +const uint8_t in[static 16], size_t nbytes, uint8_t auth[static 16], +uint32_t nrounds) +{ + + fpu_kern_enter(); + aes_sse2_cbcmac_update1(enc, in, nbytes, auth, nrounds); + fpu_kern_leave(); +} + +static void +aes_sse2_ccm_enc1_impl(const struct aesenc *enc, const uint8_t in[static 16], +uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], +uint32_t nrounds) +{ + + fpu_kern_enter(); + aes_sse2_ccm_enc1(enc, in, out, nbytes, authctr, nrounds); + fpu_kern_leave(); +} + +static void +aes_sse2_ccm_dec1_impl(const struct aesenc *enc, const uint8_t in[static 16], +uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], +uint32_t nrounds) +{ + + fpu_kern_enter(); + aes_sse2_ccm_dec1(enc, in, out, nbytes, authctr, nrounds); + fpu_kern_leave(); +} + static int aes_sse2_probe(void) { @@ -182,4 +215,7 @@ struct aes_impl aes_sse2_impl = { .ai_cbc_dec = aes_sse2_cbc_dec_impl, .ai_xts_enc = aes_sse2_xts_enc_impl, .ai_xts_dec = aes_sse2_xts_dec_impl, + .ai_cbcmac_update1 = aes_sse2_cbcmac_update1_impl, + .ai_ccm_enc1 = aes_sse2_ccm_enc1_impl, + .ai_ccm_dec1 = aes_sse2_ccm_dec1_impl, }; Index: src/sys/crypto/aes/arch/x86/aes_sse2_subr.c diff -u src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.2 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.3 --- src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.2 Tue Jun 30 20:32:11 2020 +++ src/sys/crypto/aes/arch/x86/aes_sse2_subr.c Sat Jul 25 22:29:56 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_sse2_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $ */ +/* $NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $"); #ifdef _KERNEL #include @@ -518,6 +518,180 @@ out: /* Store the updated tweak. */ explicit_memset(t, 0, sizeof t); } +void +aes_sse2_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], +size_t nbytes, uint8_t auth[static 16], uint32_t nrounds) +{ + uint64_t sk_exp[120]; + __m128i q[4]; + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + /* Expand r
CVS commit: src/sys/crypto/aes/arch/x86
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:29:06 UTC 2020 Modified Files: src/sys/crypto/aes/arch/x86: aes_ni.c aes_ni.h aes_ni_64.S Log Message: Implement AES-CCM with x86 AES-NI. To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_ni.c \ src/sys/crypto/aes/arch/x86/aes_ni_64.S cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_ni.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/x86/aes_ni.c diff -u src/sys/crypto/aes/arch/x86/aes_ni.c:1.3 src/sys/crypto/aes/arch/x86/aes_ni.c:1.4 --- src/sys/crypto/aes/arch/x86/aes_ni.c:1.3 Sat Jul 25 22:12:57 2020 +++ src/sys/crypto/aes/arch/x86/aes_ni.c Sat Jul 25 22:29:06 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ni.c,v 1.3 2020/07/25 22:12:57 riastradh Exp $ */ +/* $NetBSD: aes_ni.c,v 1.4 2020/07/25 22:29:06 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_ni.c,v 1.3 2020/07/25 22:12:57 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_ni.c,v 1.4 2020/07/25 22:29:06 riastradh Exp $"); #ifdef _KERNEL #include @@ -204,6 +204,48 @@ aesni_xts_dec_impl(const struct aesdec * fpu_kern_leave(); } +static void +aesni_cbcmac_update1_impl(const struct aesenc *enc, +const uint8_t in[static 16], size_t nbytes, uint8_t auth[static 16], +uint32_t nrounds) +{ + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + fpu_kern_enter(); + aesni_cbcmac_update1(enc, in, nbytes, auth, nrounds); + fpu_kern_leave(); +} + +static void +aesni_ccm_enc1_impl(const struct aesenc *enc, const uint8_t in[static 16], +uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], +uint32_t nrounds) +{ + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + fpu_kern_enter(); + aesni_ccm_enc1(enc, in, out, nbytes, authctr, nrounds); + fpu_kern_leave(); +} + +static void +aesni_ccm_dec1_impl(const struct aesenc *enc, const uint8_t in[static 16], +uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], +uint32_t nrounds) +{ + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + fpu_kern_enter(); + aesni_ccm_dec1(enc, in, out, nbytes, authctr, nrounds); + fpu_kern_leave(); +} + static int aesni_xts_update_selftest(void) { @@ -273,4 +315,7 @@ struct aes_impl aes_ni_impl = { .ai_cbc_dec = aesni_cbc_dec_impl, .ai_xts_enc = aesni_xts_enc_impl, .ai_xts_dec = aesni_xts_dec_impl, + .ai_cbcmac_update1 = aesni_cbcmac_update1_impl, + .ai_ccm_enc1 = aesni_ccm_enc1_impl, + .ai_ccm_dec1 = aesni_ccm_dec1_impl, }; Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.3 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.4 --- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.3 Sat Jul 25 22:11:05 2020 +++ src/sys/crypto/aes/arch/x86/aes_ni_64.S Sat Jul 25 22:29:06 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ni_64.S,v 1.3 2020/07/25 22:11:05 riastradh Exp $ */ +/* $NetBSD: aes_ni_64.S,v 1.4 2020/07/25 22:29:06 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -951,6 +951,142 @@ ENTRY(aesni_xts_update) END(aesni_xts_update) /* + * aesni_cbcmac_update1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi, + * size_t nbytes@rdx, uint8_t auth[16] @rcx, uint32_t nrounds@r8d) + * + * Update CBC-MAC. + * + * nbytes must be a positive integral multiple of 16. + * + * Standard ABI calling convention. + */ +ENTRY(aesni_cbcmac_update1) + movdqu (%rcx),%xmm0 /* xmm0 := auth */ + mov %rdx,%r10 /* r10 := nbytes */ + mov %rcx,%rdx /* rdx := &auth */ +1: pxor (%rsi),%xmm0 /* xmm0 ^= plaintext block */ + lea 0x10(%rsi),%rsi + mov %r8d,%ecx /* ecx := nrounds */ + call aesni_enc1 /* xmm0 := auth'; trash rax,rcx,xmm8 */ + sub $0x10,%r10 + jnz 1b + movdqu %xmm0,(%rdx) /* store auth' */ + ret +END(aesni_cbcmac_update1) + +/* + * aesni_ccm_enc1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi, + * uint8_t *out@rdx, size_t nbytes@rcx, + * uint8_t authctr[32] @r8, uint32_t nrounds@r9d) + * + * Update CCM encryption. + * + * nbytes must be a positive integral multiple of 16. + * + * Standard ABI calling convention. + */ +ENTRY(aesni_ccm_enc1) + mov %rcx,%r10 /* r10 := nbytes */ + movdqu 0x10(%r8),%xmm2 /* xmm2 := ctr (be) */ + movdqa bswap32(%rip),%xmm4 /* xmm4 := bswap32 table */ + movdqa ctr32_inc(%rip),%xmm5 /* xmm5 := (0,0,0,1) (le) */ + movdqu (%r8),%xmm0 /* xmm0 := auth */ + pshufb %xmm4,%xmm2 /* xmm2 := ctr (le) */ +1: movdqu (%rsi),%xmm3 /* xmm3 := plaintext block */ + paddd %xmm5,%xmm2 /* increment ctr (32-bit) */ + lea 0x10(%rsi),%rsi + movdqa %xmm2,%xmm1 /* xmm1 := ctr (le) */ + mov %r9d,%ecx /* ecx := nrounds */ + pshufb %xmm4,%xmm1 /* xmm1 := ctr (be) */ + pxor %xmm3,%xmm0 /* xmm0 := auth ^ ptxt */ + call aesni_enc2 /* trash rax/rcx/xmm8 */ + pxor %xmm1,%xmm3 /* xmm3 := ciphertext block */ + su
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:28:27 UTC 2020 Modified Files: src/sys/crypto/aes: aes_bear.c Log Message: Implement AES-CCM with BearSSL's bitsliced 32-bit aes_ct. To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/aes_bear.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_bear.c diff -u src/sys/crypto/aes/aes_bear.c:1.3 src/sys/crypto/aes/aes_bear.c:1.4 --- src/sys/crypto/aes/aes_bear.c:1.3 Sat Jul 25 22:12:57 2020 +++ src/sys/crypto/aes/aes_bear.c Sat Jul 25 22:28:27 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_bear.c,v 1.3 2020/07/25 22:12:57 riastradh Exp $ */ +/* $NetBSD: aes_bear.c,v 1.4 2020/07/25 22:28:27 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_bear.c,v 1.3 2020/07/25 22:12:57 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_bear.c,v 1.4 2020/07/25 22:28:27 riastradh Exp $"); #include #include @@ -597,6 +597,220 @@ out: /* Store the updated tweak. */ explicit_memset(q, 0, sizeof q); } +static void +aesbear_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], +size_t nbytes, uint8_t auth[static 16], uint32_t nrounds) +{ + uint32_t sk_exp[120]; + uint32_t q[8]; + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + /* Expand round keys for bitslicing. */ + br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk); + + /* Initialize garbage block. */ + q[1] = q[3] = q[5] = q[7] = 0; + + /* Load initial authenticator. */ + q[2*0] = le32dec(auth + 4*0); + q[2*1] = le32dec(auth + 4*1); + q[2*2] = le32dec(auth + 4*2); + q[2*3] = le32dec(auth + 4*3); + + for (; nbytes; nbytes -= 16, in += 16) { + /* Combine input block. */ + q[2*0] ^= le32dec(in + 4*0); + q[2*1] ^= le32dec(in + 4*1); + q[2*2] ^= le32dec(in + 4*2); + q[2*3] ^= le32dec(in + 4*3); + + /* Transform to bitslice, encrypt, transform from bitslice. */ + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q); + br_aes_ct_ortho(q); + } + + /* Store updated authenticator. */ + le32enc(auth + 4*0, q[2*0]); + le32enc(auth + 4*1, q[2*1]); + le32enc(auth + 4*2, q[2*2]); + le32enc(auth + 4*3, q[2*3]); + + /* Paranoia: Zero temporary buffers. */ + explicit_memset(sk_exp, 0, sizeof sk_exp); + explicit_memset(q, 0, sizeof q); +} + +static void +aesbear_ccm_enc1(const struct aesenc *enc, const uint8_t *in, uint8_t *out, +size_t nbytes, uint8_t authctr[32], uint32_t nrounds) +{ + uint32_t sk_exp[120]; + uint32_t q[8]; + uint32_t c0, c1, c2, c3; + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + /* Expand round keys for bitslicing. */ + br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk); + + /* Set first block to authenticator. */ + q[2*0] = le32dec(authctr + 4*0); + q[2*1] = le32dec(authctr + 4*1); + q[2*2] = le32dec(authctr + 4*2); + q[2*3] = le32dec(authctr + 4*3); + + /* Load initial counter block, big-endian so we can increment it. */ + c0 = le32dec(authctr + 16 + 4*0); + c1 = le32dec(authctr + 16 + 4*1); + c2 = le32dec(authctr + 16 + 4*2); + c3 = be32dec(authctr + 16 + 4*3); + + for (; nbytes; nbytes -= 16, in += 16, out += 16) { + /* Update authenticator. */ + q[2*0] ^= le32dec(in + 4*0); + q[2*1] ^= le32dec(in + 4*1); + q[2*2] ^= le32dec(in + 4*2); + q[2*3] ^= le32dec(in + 4*3); + + /* Increment 32-bit counter. */ + q[2*0 + 1] = c0; + q[2*1 + 1] = c1; + q[2*2 + 1] = c2; + q[2*3 + 1] = bswap32(++c3); + + /* Encrypt authenticator and counter. */ + br_aes_ct_ortho(q); + br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q); + br_aes_ct_ortho(q); + + /* Encrypt with CTR output. */ + le32enc(out + 4*0, le32dec(in + 4*0) ^ q[2*0 + 1]); + le32enc(out + 4*1, le32dec(in + 4*1) ^ q[2*1 + 1]); + le32enc(out + 4*2, le32dec(in + 4*2) ^ q[2*2 + 1]); + le32enc(out + 4*3, le32dec(in + 4*3) ^ q[2*3 + 1]); + } + + /* Update authenticator. */ + le32enc(authctr + 4*0, q[2*0]); + le32enc(authctr + 4*1, q[2*1]); + le32enc(authctr + 4*2, q[2*2]); + le32enc(authctr + 4*3, q[2*3]); + + /* Update counter. */ + be32enc(authctr + 16 + 4*3, c3); + + /* Paranoia: Zero temporary buffers. */ + explicit_memset(sk_exp, 0, sizeof sk_exp); + explicit_memset(q, 0, sizeof q); +} + +static void +aesbear_ccm_dec1(const struct aesenc *enc, const uint8_t *in, uint8_t *out, +size_t nbytes, uint8_t authctr[32], uint32_t nrounds) +{ + uint32_t sk_exp[120]; + uint32_t q[8]; + uint32_t c0, c1, c2, c3; + uint32_t b0, b1, b2, b3; + + KASSERT(nbytes); + KASSERT(nbytes % 16 == 0); + + /* Expand round keys for bitslicing. */ + br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk); + + /* Load initial counter block, big-endian so we can increment it. */ + c0 = le32dec(authctr + 16 + 4*0); + c1 = le32dec(authctr + 16 + 4*1); + c2 = le32dec(authctr + 16 + 4*2); + c3 = be32dec(authctr + 16 + 4*3); +
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:27:53 UTC 2020 Modified Files: src/sys/crypto/aes: aes_ccm.c aes_impl.c aes_impl.h aes_selftest.c Log Message: Push CBC-MAC and CCM block updates into the aes_impl API. This should help reduce the setup and teardown overhead (enabling and disabling fpu, or expanding bitsliced keys) for CCM, as used in 802.11 WPA2 CCMP. But all the fiddly formatting details remain in aes_ccm.c to reduce the effort of implementing it -- at the cost of a handful additional setups and teardowns per message. Not yet implemented by any of the aes_impls, so leave a fallback that just calls aes_enc for now. This should be removed when all of the aes_impls provide CBC-MAC and CCM block updates. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/aes_ccm.c \ src/sys/crypto/aes/aes_impl.h cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/aes_impl.c cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/aes_selftest.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_ccm.c diff -u src/sys/crypto/aes/aes_ccm.c:1.1 src/sys/crypto/aes/aes_ccm.c:1.2 --- src/sys/crypto/aes/aes_ccm.c:1.1 Sat Jul 25 22:15:55 2020 +++ src/sys/crypto/aes/aes_ccm.c Sat Jul 25 22:27:53 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ccm.c,v 1.1 2020/07/25 22:15:55 riastradh Exp $ */ +/* $NetBSD: aes_ccm.c,v 1.2 2020/07/25 22:27:53 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.1 2020/07/25 22:15:55 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.2 2020/07/25 22:27:53 riastradh Exp $"); #include #include @@ -45,6 +45,7 @@ __KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v #include #include +#include static inline void xor(uint8_t *x, const uint8_t *a, const uint8_t *b, size_t n) @@ -54,13 +55,6 @@ xor(uint8_t *x, const uint8_t *a, const *x++ = *a++ ^ *b++; } -static inline void -xor16(uint8_t *x, const uint8_t *a, const uint8_t *b) -{ - - xor(x, a, b, 16); -} - /* RFC 3610, §2.2 Authentication */ #define CCM_AFLAGS_ADATA __BIT(6) #define CCM_AFLAGS_M __BITS(5,3) @@ -157,9 +151,10 @@ aes_ccm_init(struct aes_ccm *C, unsigned aes_enc(enc, C->auth, C->auth, C->nr); /* If there was anything more, process 16 bytes at a time. */ - for (; adlen >= 16; adp += 16, adlen -= 16) { - xor16(C->auth, C->auth, adp); - aes_enc(enc, C->auth, C->auth, C->nr); + if (adlen - (adlen % 16)) { + aes_cbcmac_update1(enc, adp, adlen - (adlen % 16), + C->auth, C->nr); + adlen %= 16; } /* @@ -217,15 +212,12 @@ aes_ccm_enc(struct aes_ccm *C, const voi } /* Process 16 bytes at a time. */ - for (; nbytes >= 16; p += 16, q += 16, nbytes -= 16) { - /* authenticate */ - xor16(C->auth, C->auth, p); - aes_enc(C->enc, C->auth, C->auth, C->nr); - - /* encrypt */ - aes_ccm_inc(C); - aes_enc(C->enc, C->in, C->out, C->nr); - xor16(q, C->out, p); + if (nbytes - (nbytes % 16)) { + aes_ccm_enc1(C->enc, p, q, nbytes - (nbytes % 16), C->auth, + C->nr); + p += nbytes - (nbytes % 16); + q += nbytes - (nbytes % 16); + nbytes %= 16; } /* Incorporate any <16-byte unit as a partial block. */ @@ -278,15 +270,12 @@ aes_ccm_dec(struct aes_ccm *C, const voi } /* Process 16 bytes at a time. */ - for (; nbytes >= 16; p += 16, q += 16, nbytes -= 16) { - /* decrypt */ - aes_ccm_inc(C); - aes_enc(C->enc, C->in, C->out, C->nr); - xor16(q, C->out, p); - - /* authenticate */ - xor16(C->auth, C->auth, q); - aes_enc(C->enc, C->auth, C->auth, C->nr); + if (nbytes - (nbytes % 16)) { + aes_ccm_dec1(C->enc, p, q, nbytes - (nbytes % 16), C->auth, + C->nr); + p += nbytes - (nbytes % 16); + q += nbytes - (nbytes % 16); + nbytes %= 16; } /* Incorporate any <16-byte unit as a partial block. */ Index: src/sys/crypto/aes/aes_impl.h diff -u src/sys/crypto/aes/aes_impl.h:1.1 src/sys/crypto/aes/aes_impl.h:1.2 --- src/sys/crypto/aes/aes_impl.h:1.1 Sat Jul 25 22:12:57 2020 +++ src/sys/crypto/aes/aes_impl.h Sat Jul 25 22:27:53 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_impl.h,v 1.1 2020/07/25 22:12:57 riastradh Exp $ */ +/* $NetBSD: aes_impl.h,v 1.2 2020/07/25 22:27:53 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -51,10 +51,27 @@ struct aes_impl { uint8_t[static 16], size_t, uint8_t[static 16], uint32_t); void (*ai_xts_dec)(const struct aesdec *, const uint8_t[static 16], uint8_t[static 16], size_t, uint8_t[static 16], uint32_t); + void (*ai_cbcmac_update1)(const struct aesenc *, + const uint8_t[static 16], size_t, uint8_t[static 16], + uint32_t); + void (*ai_ccm_enc1)(const struct aesenc *, + const uint8_t[static 16], uint8_t[static 16], + size_t, uint8_t[static 32], uint32_t); + void (*ai_ccm_dec1)(const struct aesenc *, + const uint8_t[stati
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:15:55 UTC 2020 Modified Files: src/sys/crypto/aes: files.aes Added Files: src/sys/crypto/aes: aes_ccm.c aes_ccm.h aes_ccm_mbuf.c aes_ccm_mbuf.h Log Message: New aes_ccm API. Intended for use in net80211 for WPA2 CCMP. To generate a diff of this commit: cvs rdiff -u -r0 -r1.1 src/sys/crypto/aes/aes_ccm.c \ src/sys/crypto/aes/aes_ccm.h src/sys/crypto/aes/aes_ccm_mbuf.c \ src/sys/crypto/aes/aes_ccm_mbuf.h cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/files.aes Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/files.aes diff -u src/sys/crypto/aes/files.aes:1.1 src/sys/crypto/aes/files.aes:1.2 --- src/sys/crypto/aes/files.aes:1.1 Mon Jun 29 23:27:52 2020 +++ src/sys/crypto/aes/files.aes Sat Jul 25 22:15:55 2020 @@ -1,9 +1,11 @@ -# $NetBSD: files.aes,v 1.1 2020/06/29 23:27:52 riastradh Exp $ +# $NetBSD: files.aes,v 1.2 2020/07/25 22:15:55 riastradh Exp $ define aes define rijndael: aes # legacy Rijndael API file crypto/aes/aes_bear.c aes +file crypto/aes/aes_ccm.c aes +file crypto/aes/aes_ccm_mbuf.c aes file crypto/aes/aes_ct.c aes file crypto/aes/aes_ct_dec.c aes file crypto/aes/aes_ct_enc.c aes Added files: Index: src/sys/crypto/aes/aes_ccm.c diff -u /dev/null src/sys/crypto/aes/aes_ccm.c:1.1 --- /dev/null Sat Jul 25 22:15:55 2020 +++ src/sys/crypto/aes/aes_ccm.c Sat Jul 25 22:15:55 2020 @@ -0,0 +1,619 @@ +/* $NetBSD: aes_ccm.c,v 1.1 2020/07/25 22:15:55 riastradh Exp $ */ + +/*- + * Copyright (c) 2020 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *notice, this list of conditions and the following disclaimer in the + *documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * AES-CCM, as defined in: + * + * D. Whiting, R. Housley, and N. Ferguson, `Counter with CBC-MAC + * (CCM)', IETF RFC 3610, September 2003. + * https://tools.ietf.org/html/rfc3610 + */ + +#include +__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.1 2020/07/25 22:15:55 riastradh Exp $"); + +#include +#include +#include + +#include + +#include +#include + +static inline void +xor(uint8_t *x, const uint8_t *a, const uint8_t *b, size_t n) +{ + + while (n --> 0) + *x++ = *a++ ^ *b++; +} + +static inline void +xor16(uint8_t *x, const uint8_t *a, const uint8_t *b) +{ + + xor(x, a, b, 16); +} + +/* RFC 3610, §2.2 Authentication */ +#define CCM_AFLAGS_ADATA __BIT(6) +#define CCM_AFLAGS_M __BITS(5,3) +#define CCM_AFLAGS_L __BITS(2,0) + +/* RFC 3610, §2.3 Encryption */ +#define CCM_EFLAGS_L __BITS(2,0) + +static void +aes_ccm_inc(struct aes_ccm *C) +{ + + KASSERT(C->L == 2); + if (++C->in[15] == 0 && ++C->in[14] == 0) + panic("AES-CCM overflow"); +} + +static void +aes_ccm_zero_ctr(struct aes_ccm *C) +{ + + KASSERT(C->L == 2); + C->in[14] = C->in[15] = 0; +} + +void +aes_ccm_init(struct aes_ccm *C, unsigned nr, const struct aesenc *enc, +unsigned L, unsigned M, +const uint8_t *nonce, unsigned noncelen, const void *ad, size_t adlen, +size_t mlen) +{ + const uint8_t *adp = ad; + unsigned i; + + KASSERT(L == 2); + KASSERT(M % 2 == 0); + KASSERT(M >= 4); + KASSERT(M <= 16); + KASSERT(noncelen == 15 - L); + + C->enc = enc; + C->nr = nr; + C->L = L; + C->M = M; + C->mlen = C->mleft = mlen; + + /* Encode B0, the initial authenticated data block. */ + C->auth[0] = __SHIFTIN(adlen == 0 ? 0 : 1, CCM_AFLAGS_ADATA); + C->auth[0] |= __SHIFTIN((M - 2)/2, CCM_AFLAGS_M); + C->auth[0] |= __SHIFTIN(L - 1, CCM_AFLAGS_L); + memcpy(C->auth + 1, nonce, noncelen); + for (i = 0; i < L; i++, mlen >>= 8) { + KASSERT(i < 16 - 1 - noncelen); + C->auth[16 - i - 1] = mlen & 0xff; + } + aes_enc(enc, C->auth, C->auth, C->nr); + + /* Process
CVS commit: src/sys/crypto/aes/arch/x86
Module Name:src Committed By: riastradh Date: Sat Jul 25 22:11:05 UTC 2020 Modified Files: src/sys/crypto/aes/arch/x86: aes_ni_64.S Log Message: Invert some loops to save a jmp instruction on each iteration. No semantic change intended. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_ni_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.2 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.3 --- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.2 Tue Jun 30 21:41:04 2020 +++ src/sys/crypto/aes/arch/x86/aes_ni_64.S Sat Jul 25 22:11:05 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ni_64.S,v 1.2 2020/06/30 21:41:04 riastradh Exp $ */ +/* $NetBSD: aes_ni_64.S,v 1.3 2020/07/25 22:11:05 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -522,14 +522,14 @@ ENTRY(aesni_enctodec) shl $4,%edx /* rdx := byte offset of last round key */ movdqa (%rdi,%rdx),%xmm0 /* load last round key */ movdqa %xmm0,(%rsi) /* store last round key verbatim */ -1: sub $0x10,%rdx /* advance to next round key */ - lea 0x10(%rsi),%rsi - jz 2f /* stop if this is the last one */ - movdqa (%rdi,%rdx),%xmm0 /* load round key */ + jmp 2f +1: movdqa (%rdi,%rdx),%xmm0 /* load round key */ aesimc %xmm0,%xmm0 /* convert encryption to decryption */ movdqa %xmm0,(%rsi) /* store round key */ - jmp 1b -2: movdqa (%rdi),%xmm0 /* load first round key */ +2: sub $0x10,%rdx /* advance to next round key */ + lea 0x10(%rsi),%rsi + jnz 1b /* repeat if more rounds */ + movdqa (%rdi),%xmm0 /* load first round key */ movdqa %xmm0,(%rsi) /* store first round key verbatim */ ret END(aesni_enctodec) @@ -614,16 +614,16 @@ ENTRY(aesni_cbc_dec1) mov %rcx,%r10 /* r10 := nbytes */ movdqu -0x10(%rsi,%r10),%xmm0 /* xmm0 := last ciphertext block */ movdqu %xmm0,(%r8) /* update iv */ -1: mov %r9d,%ecx /* ecx := nrounds */ - call aesni_dec1 /* xmm0 := cv ^ ptxt */ - sub $0x10,%r10 - jz 2f /* first block if r10 is now zero */ - movdqu -0x10(%rsi,%r10),%xmm8 /* xmm8 := chaining value */ + jmp 2f +1: movdqu -0x10(%rsi,%r10),%xmm8 /* xmm8 := chaining value */ pxor %xmm8,%xmm0 /* xmm0 := ptxt */ movdqu %xmm0,(%rdx,%r10) /* store plaintext block */ movdqa %xmm8,%xmm0 /* move cv = ciphertext block */ - jmp 1b -2: pxor (%rsp),%xmm0 /* xmm0 := ptxt */ +2: mov %r9d,%ecx /* ecx := nrounds */ + call aesni_dec1 /* xmm0 := cv ^ ptxt */ + sub $0x10,%r10 + jnz 1b /* repeat if more blocks */ + pxor (%rsp),%xmm0 /* xmm0 := ptxt */ movdqu %xmm0,(%rdx) /* store first plaintext block */ leave ret @@ -649,7 +649,11 @@ ENTRY(aesni_cbc_dec8) mov %rcx,%r10 /* r10 := nbytes */ movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := ciphertext block[n-1] */ movdqu %xmm7,(%r8) /* update iv */ -1: movdqu -0x20(%rsi,%r10),%xmm6 /* xmm6 := ciphertext block[n-2] */ + jmp 2f +1: movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := cv[0] */ + pxor %xmm7,%xmm0 /* xmm0 := ptxt[0] */ + movdqu %xmm0,(%rdx,%r10) /* store plaintext block */ +2: movdqu -0x20(%rsi,%r10),%xmm6 /* xmm6 := ciphertext block[n-2] */ movdqu -0x30(%rsi,%r10),%xmm5 /* xmm5 := ciphertext block[n-3] */ movdqu -0x40(%rsi,%r10),%xmm4 /* xmm4 := ciphertext block[n-4] */ movdqu -0x50(%rsi,%r10),%xmm3 /* xmm3 := ciphertext block[n-5] */ @@ -680,12 +684,8 @@ ENTRY(aesni_cbc_dec8) movdqu %xmm2,-0x60(%rdx,%r10) movdqu %xmm1,-0x70(%rdx,%r10) sub $0x80,%r10 - jz 2f /* first block if r10 is now zero */ - movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := cv[0] */ - pxor %xmm7,%xmm0 /* xmm0 := ptxt[0] */ - movdqu %xmm0,(%rdx,%r10) /* store plaintext block */ - jmp 1b -2: pxor (%rsp),%xmm0 /* xmm0 := ptxt[0] */ + jnz 1b /* repeat if more blocks */ + pxor (%rsp),%xmm0 /* xmm0 := ptxt[0] */ movdqu %xmm0,(%rdx) /* store first plaintext block */ leave ret @@ -966,12 +966,12 @@ aesni_enc1: shl $4,%ecx /* ecx := total byte size of round keys */ lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */ neg %rcx /* rcx := byte offset of round key from end */ -1: movdqa (%rax,%rcx),%xmm8 /* load round key */ + jmp 2f +1: aesenc %xmm8,%xmm0 +2: movdqa (%rax,%rcx),%xmm8 /* load round key */ add $0x10,%rcx - jz 2f /* stop if this is the last one */ - aesenc %xmm8,%xmm0 - jmp 1b -2: aesenclast %xmm8,%xmm0 + jnz 1b /* repeat if more rounds */ + aesenclast %xmm8,%xmm0 ret END(aesni_enc1) @@ -999,10 +999,8 @@ aesni_enc8: shl $4,%ecx /* ecx := total byte size of round keys */ lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */ neg %rcx /* rcx := byte offset of round key from end */ -1: movdqa (%rax,%rcx),%xmm8 /* load round key */ - add $0x10,%rcx - jz 2f /* stop if this is the last one */ - aesenc %xmm8,%xmm0 + jmp 2f +1: aesenc %xmm8,%xmm0 aesenc %xmm8,%xmm1 aesenc %xmm8,%xmm2 aesenc %xmm8,%xmm3 @@ -1010,8 +1008,10 @@ aesni_en
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: ryo Date: Thu Jul 23 11:33:01 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: arm_neon.h Log Message: fix build with llvm/clang. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/arm_neon.h diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.2 src/sys/crypto/aes/arch/arm/arm_neon.h:1.3 --- src/sys/crypto/aes/arch/arm/arm_neon.h:1.2 Tue Jun 30 21:24:00 2020 +++ src/sys/crypto/aes/arch/arm/arm_neon.h Thu Jul 23 11:33:01 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.2 2020/06/30 21:24:00 riastradh Exp $ */ +/* $NetBSD: arm_neon.h,v 1.3 2020/07/23 11:33:01 ryo Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -65,7 +65,7 @@ typedef struct { uint8x8_t val[2]; } uin #elif defined(__clang__) #define _INTRINSATTR \ - __attribute__((__always_inline__, __nodebug)) + __attribute__((__always_inline__, __nodebug__)) typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Wed Jul 22 06:15:21 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: Fix register name in comment. Some time ago I reallocated the registers to avoid inadvertently clobbering the callee-saves v9, but neglected to update the comment. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.5 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.6 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.5 Sun Jul 19 07:32:43 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Wed Jul 22 06:15:21 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.5 2020/07/19 07:32:43 ryo Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.6 2020/07/22 06:15:21 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -827,7 +827,7 @@ aesarmv8_xts_mulx: * carried into x^128 = x^7 + x^2 + x + 1. */ adrl x0, xtscarry - cmlt v1.2d, v31.2d, #0 /* v1.2d[i] := -1 if v9.2d[i] < 0, else 0 */ + cmlt v1.2d, v31.2d, #0 /* v1.2d[i] := -1 if v31.2d[i] < 0, else 0 */ ldr q0, [x0] /* q0 := xtscarry */ ext v1.16b, v1.16b, v1.16b, #8 /* swap halves of q1 */ shl v31.2d, v31.2d, #1 /* shift */
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: ryo Date: Sun Jul 19 07:32:43 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: fix build with clang/llvm. clang aarch64 assembler doesn't accept optional number of lanes of vector register. (but ARMARM says that an assembler must accept it) To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.4 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.5 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.4 Tue Jun 30 23:06:02 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Sun Jul 19 07:32:43 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.4 2020/06/30 23:06:02 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.5 2020/07/19 07:32:43 ryo Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -238,8 +238,8 @@ ENTRY(aesarmv8_setenckey192) */ /* v1.4s := (nrk[0], nrk[1], nrk[1], nrk[1]) */ - dup v1.4s, v5.4s[3] - mov v1.4s[0], v5.4s[2] + dup v1.4s, v5.s[3] + mov v1.s[0], v5.s[2] /* * v6.4s := (0, 0, rklo[0], rklo[1]) @@ -257,7 +257,7 @@ ENTRY(aesarmv8_setenckey192) * and v5.4s = (rk[2], rk[3], xxx, xxx). Set * v2.4s := (rk[0], rk[1], rk[2], rk[3]) */ - mov v2.2d[1], v5.2d[0] + mov v2.d[1], v5.d[0] /* store two round keys */ stp q2, q3, [x0], #0x20 @@ -325,7 +325,7 @@ ENTRY(aesarmv8_setenckey192) ext v5.16b, v0.16b, v4.16b, #12 /* v2.4s := (nnrk[3], nnrk[3], xxx, xxx) */ - dup v2.4s, v1.4s[3] + dup v2.4s, v1.s[3] /* * v2.4s := (nnnrklo[0] = nnrk[3] ^ nrk[2],
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Jun 30 23:06:02 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: Reallocate registers to avoid abusing callee-saves registers, v8-v15. Forgot to consult the AAPCS before committing this before -- oops! While here, take advantage of the 32 aarch64 simd registers to avoid all stack spills. To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.3 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.4 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.3 Tue Jun 30 21:53:39 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Tue Jun 30 23:06:02 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.3 2020/06/30 21:53:39 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.4 2020/06/30 23:06:02 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -116,7 +116,7 @@ ENTRY(aesarmv8_setenckey128) adrl x4, unshiftrows_rotword_3 eor v0.16b, v0.16b, v0.16b /* q0 := 0 */ - ldr q8, [x4] /* q8 := unshiftrows_rotword_3 table */ + ldr q16, [x4] /* q16 := unshiftrows_rotword_3 table */ str q1, [x0], #0x10 /* store master key as first round key */ mov x2, #10 /* round count */ @@ -136,7 +136,7 @@ ENTRY(aesarmv8_setenckey128) /* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */ ld1r {v4.4s}, [x3], #4 - tbl v3.16b, {v3.16b}, v8.16b + tbl v3.16b, {v3.16b}, v16.16b eor v3.16b, v3.16b, v4.16b /* @@ -175,8 +175,8 @@ ENTRY(aesarmv8_setenckey192) adrl x4, unshiftrows_rotword_1 adrl x5, unshiftrows_rotword_3 eor v0.16b, v0.16b, v0.16b /* q0 := 0 */ - ldr q8, [x4] /* q8 := unshiftrows_rotword_1 */ - ldr q9, [x5] /* q9 := unshiftrows_rotword_3 */ + ldr q16, [x4] /* q16 := unshiftrows_rotword_1 */ + ldr q17, [x5] /* q17 := unshiftrows_rotword_3 */ str q1, [x0], #0x10 /* store master key[0:128) as round key */ mov x2, #12 /* round count */ @@ -197,7 +197,7 @@ ENTRY(aesarmv8_setenckey192) /* v3.4s[i] := RotWords(SubBytes(rklo[1])) ^ RCON */ ld1r {v4.4s}, [x3], #4 - tbl v3.16b, {v3.16b}, v8.16b + tbl v3.16b, {v3.16b}, v16.16b eor v3.16b, v3.16b, v4.16b /* @@ -269,8 +269,8 @@ ENTRY(aesarmv8_setenckey192) * q2 = rk * q3 = nrk * v5.4s = (rk[2], rk[3], nrk[0], nrk[1]) - * q8 = unshiftrows_rotword_1 - * q9 = unshiftrows_rotword_3 + * q16 = unshiftrows_rotword_1 + * q17 = unshiftrows_rotword_3 * * We have to compute, in q1: * @@ -294,7 +294,7 @@ ENTRY(aesarmv8_setenckey192) /* v1.4s[i] := RotWords(SubBytes(nrk[3])) ^ RCON' */ ld1r {v4.4s}, [x3], #4 - tbl v1.16b, {v1.16b}, v9.16b + tbl v1.16b, {v1.16b}, v17.16b eor v1.16b, v1.16b, v4.16b /* @@ -354,8 +354,8 @@ ENTRY(aesarmv8_setenckey256) adrl x4, unshiftrows_rotword_3 adrl x5, unshiftrows_3 eor v0.16b, v0.16b, v0.16b /* q0 := 0 */ - ldr q8, [x4] /* q8 := unshiftrows_rotword_3 */ - ldr q9, [x5] /* q9 := unshiftrows_3 */ + ldr q16, [x4] /* q16 := unshiftrows_rotword_3 */ + ldr q17, [x5] /* q17 := unshiftrows_3 */ /* store master key as first two round keys */ stp q1, q2, [x0], #0x20 @@ -376,7 +376,7 @@ ENTRY(aesarmv8_setenckey256) /* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */ ld1r {v4.4s}, [x3], #4 - tbl v3.16b, {v3.16b}, v8.16b + tbl v3.16b, {v3.16b}, v16.16b eor v3.16b, v3.16b, v4.16b /* @@ -402,7 +402,7 @@ ENTRY(aesarmv8_setenckey256) aese v3.16b, v0.16b /* v3.4s[i] := SubBytes(rk[3]) */ - tbl v3.16b, {v3.16b}, v9.16b + tbl v3.16b, {v3.16b}, v17.16b /* * v5.4s := (0,prk[0],prk[1],prk[2]) @@ -458,9 +458,9 @@ END(aesarmv8_enctodec) ENTRY(aesarmv8_enc) stp fp, lr, [sp, #-16]! /* push stack frame */ mov fp, sp - ldr q0, [x1] /* q0 := block */ - bl aesarmv8_enc1 - str q0, [x2] /* store block */ + ldr q0, [x1] /* q0 := ptxt */ + bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */ + str q0, [x2] /* store ctxt */ ldp fp, lr, [sp], #16 /* pop stack frame */ ret END(aesarmv8_enc) @@ -476,9 +476,9 @@ END(aesarmv8_enc) ENTRY(aesarmv8_dec) stp fp, lr, [sp, #-16]! /* push stack frame */ mov fp, sp - ldr q0, [x1] /* q0 := block */ - bl aesarmv8_dec1 - str q0, [x2] /* store block */ + ldr q0, [x1] /* q0 := ctxt */ + bl aesarmv8_dec1 /* q0 := ptxt; trash x0/x3/q16 */ + str q0, [x2] /* store ptxt */ ldp fp, lr, [sp], #16 /* pop stack frame */ ret END(aesarmv8_dec) @@ -505,7 +505,7 @@ ENTRY(aesarmv8_cbc_enc) eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */ mov x0, x9 /* x0 := enckey */ mov x3, x5 /* x3 := nrounds */ - bl aesarmv8_enc1 /* q0 := ciphertext block */ + bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */ subs x10, x10, #0x10 /* count down nbytes */ str q0, [x2], #0x10 /* store ciphertext block */ b.ne 1b /* repeat if x10 is nonzero */ @@ -52
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Jun 30 21:53:39 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: Use `.arch_extension aes' for aese/aesmc/aesd/aesimc. Unlike `.arch_extension crypto', this works with clang; both work with gas, so we'll go with this. Clang still can't handle aes_armv8_64.S yet -- it gets confused by dup and mov on lanes, but this makes progress. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.2 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.3 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.2 Tue Jun 30 21:41:03 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Tue Jun 30 21:53:39 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.2 2020/06/30 21:41:03 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.3 2020/06/30 21:53:39 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include - .arch_extension crypto + .arch_extension aes /* * uint32_t rcon[10]
CVS commit: src/sys/crypto/aes/arch
Module Name:src Committed By: riastradh Date: Tue Jun 30 21:41:04 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S src/sys/crypto/aes/arch/x86: aes_ni_64.S Log Message: Use .p2align rather than .align. Apparently on arm, .align is actually an alias for .p2align, taking a power of two rather than a number of bytes, so aes_armv8_64.o was bloated to 32KB with obscene alignment when it only needed to be barely past 4KB. Do the same for the x86 aes_ni_64.S -- even though .align takes a number of bytes rather than a power of two on x86, let's just stay away from the temptations of the evil .align directive. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_armv8_64.S cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/x86/aes_ni_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.1 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.2 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.1 Mon Jun 29 23:31:41 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Tue Jun 30 21:41:03 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.1 2020/06/29 23:31:41 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.2 2020/06/30 21:41:03 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -41,7 +41,7 @@ * secret. */ .section .rodata - .align 4 + .p2align 2 .type rcon,@object rcon: .long 0x01 @@ -63,7 +63,7 @@ END(rcon) * RotWord on word 1, and then copy it into all the other words. */ .section .rodata - .align 16 + .p2align 4 .type unshiftrows_rotword_1,@object unshiftrows_rotword_1: .byte 0x01,0x0e,0x0b,0x04 @@ -79,7 +79,7 @@ END(unshiftrows_rotword_1) * 3 into all the other words. */ .section .rodata - .align 16 + .p2align 4 .type unshiftrows_3,@object unshiftrows_3: .byte 0x0c,0x09,0x06,0x03 @@ -95,7 +95,7 @@ END(unshiftrows_3) * RotWord on word 3, and then copy it into all the other words. */ .section .rodata - .align 16 + .p2align 4 .type unshiftrows_rotword_3,@object unshiftrows_rotword_3: .byte 0x09,0x06,0x03,0x0c @@ -846,7 +846,7 @@ aesarmv8_xts_mulx: END(aesarmv8_xts_mulx) .section .rodata - .align 16 + .p2align 4 .type xtscarry,@object xtscarry: .byte 0x87,0,0,0, 0,0,0,0, 1,0,0,0, 0,0,0,0 Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.1 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.2 --- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.1 Mon Jun 29 23:29:40 2020 +++ src/sys/crypto/aes/arch/x86/aes_ni_64.S Tue Jun 30 21:41:04 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ni_64.S,v 1.1 2020/06/29 23:29:40 riastradh Exp $ */ +/* $NetBSD: aes_ni_64.S,v 1.2 2020/06/30 21:41:04 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -930,7 +930,7 @@ aesni_xts_mulx: END(aesni_xts_mulx) .section .rodata - .align 16 + .p2align 4 .type xtscarry,@object xtscarry: .byte 0x87,0,0,0, 0,0,0,0, 1,0,0,0, 0,0,0,0
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Jun 30 21:24:00 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: arm_neon.h Log Message: Tweak clang neon intrinsics so they build. (this file is still a kludge) To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/arm_neon.h diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.1 src/sys/crypto/aes/arch/arm/arm_neon.h:1.2 --- src/sys/crypto/aes/arch/arm/arm_neon.h:1.1 Mon Jun 29 23:56:31 2020 +++ src/sys/crypto/aes/arch/arm/arm_neon.h Tue Jun 30 21:24:00 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.1 2020/06/29 23:56:31 riastradh Exp $ */ +/* $NetBSD: arm_neon.h,v 1.2 2020/06/30 21:24:00 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -73,6 +73,8 @@ typedef __attribute__((neon_vector_type( typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t; typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; + +typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; typedef struct { uint8x8_t val[2]; } uint8x8x2_t; #ifdef __LITTLE_ENDIAN__ @@ -118,11 +120,11 @@ vdupq_n_u8(uint8_t __x) }; } +#if defined(__GNUC__) && !defined(__clang__) _INTRINSATTR static __inline uint32x4_t vextq_u32(uint32x4_t __lo, uint32x4_t __hi, uint8_t __i) { -#if defined(__GNUC__) && !defined(__clang__) #if defined(__AARCH64EB__) || defined(__ARM_BIG_ENDIAN) return __builtin_shuffle(__hi, __lo, (uint32x4_t) { 4 - __i, 5 - __i, 6 - __i, 7 - __i }); @@ -130,25 +132,31 @@ vextq_u32(uint32x4_t __lo, uint32x4_t __ return __builtin_shuffle(__lo, __hi, (uint32x4_t) { __i + 0, __i + 1, __i + 2, __i + 3 }); #endif +} #elif defined(__clang__) #ifdef __LITTLE_ENDIAN__ - return __builtin_neon_vextq_v((int8x16_t)__lo, (int8x16_t)__hi, __i, - 50); -#else - uint32x4_t __lo_r = __builtin_shufflevector(__lo, __lo, 3, 2, 1, 0); - uint32x4_t __hi_r = __builtin_shufflevector(__hi, __hi, 3, 2, 1, 0); - uint32x4_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r, - (int8x16_t)__hi_r, __i, 50); - return __builtin_shufflevector(__r, __r, 3, 2, 1, 0); -#endif +#define vextq_u32(__lo, __hi, __i) \ + (uint32x4_t)__builtin_neon_vextq_v((int8x16_t)(__lo), \ + (int8x16_t)(__hi), (__i), 50) +#else +#define vextq_u32(__lo, __hi, __i) ( \ +{ \ + uint32x4_t __tlo = (__lo); \ + uint32x4_t __thi = (__hi); \ + uint32x4_t __lo_r = __builtin_shufflevector(__tlo, __tlo, 3,2,1,0); \ + uint32x4_t __hi_r = __builtin_shufflevector(__thi, __thi, 3,2,1,0); \ + uint32x4_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r, \ + (int8x16_t)__hi_r, __i, 50); \ + __builtin_shufflevector(__r, __r, 3,2,1,0); \ +}) +#endif /* __LITTLE_ENDIAN__ */ #endif -} +#if defined(__GNUC__) && !defined(__clang__) _INTRINSATTR static __inline uint8x16_t vextq_u8(uint8x16_t __lo, uint8x16_t __hi, uint8_t __i) { -#if defined(__GNUC__) && !defined(__clang__) #if defined(__AARCH64EB__) || defined(__ARM_BIG_ENDIAN) return __builtin_shuffle(__hi, __lo, (uint8x16_t) { @@ -166,38 +174,45 @@ vextq_u8(uint8x16_t __lo, uint8x16_t __h __i + 12, __i + 13, __i + 14, __i + 15, }); #endif +} #elif defined(__clang__) #ifdef __LITTLE_ENDIAN__ - return __builtin_neon_vextq_v((int8x16_t)__lo, (int8x16_t)__hi, __i, - 48); -#else - uint32x4_t __lo_r = __builtin_shufflevector(__lo, __lo, - 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint32x4_t __hi_r = __builtin_shufflevector(__hi, __hi, - 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint32x4_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r, - (int8x16_t)__hi_r, __i, 50); - return __builtin_shufflevector(__r, __r, - 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); -#endif +#define vextq_u8(__lo, __hi, __i) \ + (uint8x16_t)__builtin_neon_vextq_v((int8x16_t)(__lo), \ + (int8x16_t)(__hi), (__i), 48) +#else +#define vextq_u8(__lo, __hi, __i) ( \ +{ \ + uint8x16_t __tlo = (__lo); \ + uint8x16_t __thi = (__hi); \ + uint8x16_t __lo_r = __builtin_shufflevector(__tlo, __tlo, \ + 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \ + uint8x16_t __hi_r = __builtin_shufflevector(__thi, __thi, \ + 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \ + uint8x16_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r, \ + (int8x16_t)__hi_r, (__i), 48); \ + return __builtin_shufflevector(__r, __r, \ + 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \ +}) +#endif /* __LITTLE_ENDIAN */ #endif -} +#if defined(__GNUC__) && !defined(__clang__) _INTRINSATTR static __inline uint
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Jun 30 17:03:14 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: files.aesneon Log Message: Limit aes_neon to cpu_cortex | aarch64. We won't use it on any other systems, and it doesn't build without NEON anyway. Verified earmv7hf GENERIC, aarch64 GENERIC64, and earmv6 RPI2 all build with this. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/files.aesneon Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/files.aesneon diff -u src/sys/crypto/aes/arch/arm/files.aesneon:1.2 src/sys/crypto/aes/arch/arm/files.aesneon:1.3 --- src/sys/crypto/aes/arch/arm/files.aesneon:1.2 Mon Jun 29 23:57:56 2020 +++ src/sys/crypto/aes/arch/arm/files.aesneon Tue Jun 30 17:03:13 2020 @@ -1,4 +1,4 @@ -# $NetBSD: files.aesneon,v 1.2 2020/06/29 23:57:56 riastradh Exp $ +# $NetBSD: files.aesneon,v 1.3 2020/06/30 17:03:13 riastradh Exp $ ifdef aarch64 makeoptions aes "COPTS.aes_neon.c"+="-march=armv8-a" @@ -8,10 +8,8 @@ makeoptions aes "COPTS.aes_neon.c"+="-mf makeoptions aes "COPTS.aes_neon_subr.c"+="-mfloat-abi=softfp -mfpu=neon" endif -file crypto/aes/arch/arm/aes_neon.c aes -file crypto/aes/arch/arm/aes_neon_impl.c aes -file crypto/aes/arch/arm/aes_neon_subr.c aes +file crypto/aes/arch/arm/aes_neon.c aes & (cpu_cortex | aarch64) +file crypto/aes/arch/arm/aes_neon_impl.c aes & (cpu_cortex | aarch64) +file crypto/aes/arch/arm/aes_neon_subr.c aes & (cpu_cortex | aarch64) -ifndef aarch64 -file crypto/aes/arch/arm/aes_neon_32.S aes -endif +file crypto/aes/arch/arm/aes_neon_32.S aes & cpu_cortex & !aarch64
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: riastradh Date: Tue Jun 30 16:21:17 UTC 2020 Modified Files: src/sys/crypto/aes: aes_impl.c Log Message: New sysctl node hw.aes_impl for selected AES implementation. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/aes_impl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_impl.c diff -u src/sys/crypto/aes/aes_impl.c:1.2 src/sys/crypto/aes/aes_impl.c:1.3 --- src/sys/crypto/aes/aes_impl.c:1.2 Mon Jun 29 23:36:59 2020 +++ src/sys/crypto/aes/aes_impl.c Tue Jun 30 16:21:17 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_impl.c,v 1.2 2020/06/29 23:36:59 riastradh Exp $ */ +/* $NetBSD: aes_impl.c,v 1.3 2020/06/30 16:21:17 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,12 +27,13 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.2 2020/06/29 23:36:59 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.3 2020/06/30 16:21:17 riastradh Exp $"); #include #include #include #include +#include #include #include @@ -43,6 +44,30 @@ static int aes_selftest_stdkeysched(void static const struct aes_impl *aes_md_impl __read_mostly; static const struct aes_impl *aes_impl __read_mostly; +static int +sysctl_hw_aes_impl(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + + KASSERTMSG(aes_impl != NULL, + "sysctl ran before AES implementation was selected"); + + node = *rnode; + node.sysctl_data = __UNCONST(aes_impl->ai_name); + node.sysctl_size = strlen(aes_impl->ai_name) + 1; + return sysctl_lookup(SYSCTLFN_CALL(&node)); +} + +SYSCTL_SETUP(sysctl_hw_aes_setup, "sysctl hw.aes_impl setup") +{ + + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "aes_impl", + SYSCTL_DESCR("Selected AES implementation"), + sysctl_hw_aes_impl, 0, NULL, 0, + CTL_HW, CTL_CREATE, CTL_EOL); +} + /* * The timing of AES implementation selection is finicky: *
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jun 29 23:57:56 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon.c files.aesneon Added Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: Provide hand-written AES NEON assembly for arm32. gcc does a lousy job at compiling 128-bit NEON intrinsics on arm32; hand-writing it made it about 12x faster, by avoiding a zillion loads and stores to spill everything and the kitchen sink onto the stack. (But gcc does fine on aarch64, presumably because it has twice as many registers and doesn't have to deal with q2=d4/d5 overlapping.) To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon.c \ src/sys/crypto/aes/arch/arm/files.aesneon cvs rdiff -u -r0 -r1.1 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon.c diff -u src/sys/crypto/aes/arch/arm/aes_neon.c:1.1 src/sys/crypto/aes/arch/arm/aes_neon.c:1.2 --- src/sys/crypto/aes/arch/arm/aes_neon.c:1.1 Mon Jun 29 23:56:31 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon.c Mon Jun 29 23:57:56 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon.c,v 1.1 2020/06/29 23:56:31 riastradh Exp $ */ +/* $NetBSD: aes_neon.c,v 1.2 2020/06/29 23:57:56 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.1 2020/06/29 23:56:31 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.2 2020/06/29 23:57:56 riastradh Exp $"); #include @@ -47,6 +47,12 @@ __KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v #include "aes_neon_impl.h" +#ifdef __aarch64__ +#define __aarch64_used +#else +#define __aarch64_used __unused +#endif + static const uint8x16_t mc_forward[4] = { {0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04, @@ -58,7 +64,7 @@ mc_forward[4] = { {0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00, 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08}, }, -mc_backward[4] = { +mc_backward[4] __aarch64_used = { {0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06, 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E}, {0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02, @@ -68,7 +74,7 @@ mc_backward[4] = { {0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A, 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02}, }, -ipt[2] = { +ipt[2] __aarch64_used = { {0x00,0x70,0x2A,0x5A,0x98,0xE8,0xB2,0xC2, 0x08,0x78,0x22,0x52,0x90,0xE0,0xBA,0xCA}, {0x00,0x4D,0x7C,0x31,0x7D,0x30,0x01,0x4C, @@ -80,55 +86,55 @@ opt[2] = { {0x00,0xEC,0xBC,0x50,0x51,0xBD,0xED,0x01, 0xE0,0x0C,0x5C,0xB0,0xB1,0x5D,0x0D,0xE1}, }, -dipt[2] = { +dipt[2] __aarch64_used = { {0x00,0x5F,0x54,0x0B,0x04,0x5B,0x50,0x0F, 0x1A,0x45,0x4E,0x11,0x1E,0x41,0x4A,0x15}, {0x00,0x65,0x05,0x60,0xE6,0x83,0xE3,0x86, 0x94,0xF1,0x91,0xF4,0x72,0x17,0x77,0x12}, }, -sb1[2] = { +sb1[2] __aarch64_used = { {0x00,0x3E,0x50,0xCB,0x8F,0xE1,0x9B,0xB1, 0x44,0xF5,0x2A,0x14,0x6E,0x7A,0xDF,0xA5}, {0x00,0x23,0xE2,0xFA,0x15,0xD4,0x18,0x36, 0xEF,0xD9,0x2E,0x0D,0xC1,0xCC,0xF7,0x3B}, }, -sb2[2] = { +sb2[2] __aarch64_used = { {0x00,0x24,0x71,0x0B,0xC6,0x93,0x7A,0xE2, 0xCD,0x2F,0x98,0xBC,0x55,0xE9,0xB7,0x5E}, {0x00,0x29,0xE1,0x0A,0x40,0x88,0xEB,0x69, 0x4A,0x23,0x82,0xAB,0xC8,0x63,0xA1,0xC2}, }, -sbo[2] = { +sbo[2] __aarch64_used = { {0x00,0xC7,0xBD,0x6F,0x17,0x6D,0xD2,0xD0, 0x78,0xA8,0x02,0xC5,0x7A,0xBF,0xAA,0x15}, {0x00,0x6A,0xBB,0x5F,0xA5,0x74,0xE4,0xCF, 0xFA,0x35,0x2B,0x41,0xD1,0x90,0x1E,0x8E}, }, -dsb9[2] = { +dsb9[2] __aarch64_used = { {0x00,0xD6,0x86,0x9A,0x53,0x03,0x1C,0x85, 0xC9,0x4C,0x99,0x4F,0x50,0x1F,0xD5,0xCA}, {0x00,0x49,0xD7,0xEC,0x89,0x17,0x3B,0xC0, 0x65,0xA5,0xFB,0xB2,0x9E,0x2C,0x5E,0x72}, }, -dsbd[2] = { +dsbd[2] __aarch64_used = { {0x00,0xA2,0xB1,0xE6,0xDF,0xCC,0x57,0x7D, 0x39,0x44,0x2A,0x88,0x13,0x9B,0x6E,0xF5}, {0x00,0xCB,0xC6,0x24,0xF7,0xFA,0xE2,0x3C, 0xD3,0xEF,0xDE,0x15,0x0D,0x18,0x31,0x29}, }, -dsbb[2] = { +dsbb[2] __aarch64_used = { {0x00,0x42,0xB4,0x96,0x92,0x64,0x22,0xD0, 0x04,0xD4,0xF2,0xB0,0xF6,0x46,0x26,0x60}, {0x00,0x67,0x59,0xCD,0xA6,0x98,0x94,0xC1, 0x6B,0xAA,0x55,0x32,0x3E,0x0C,0xFF,0xF3}, }, -dsbe[2] = { +dsbe[2] __aarch64_used = { {0x00,0xD0,0xD4,0x26,0x96,0x92,0xF2,0x46, 0xB0,0xF6,0xB4,0x64,0x04,0x60,0x42,0x22}, {0x00,0xC1,0xAA,0xFF,0xCD,0xA6,0x55,0x0C, 0x32,0x3E,0x59,0x98,0x6B,0xF3,0x67,0x94}, }, -dsbo[2] = { +dsbo[2] __aarch64_used = { {0x00,0x40,0xF9,0x7E,0x53,0xEA,0x87,0x13, 0x2D,0x3E,0x94,0xD4,0xB9,0x6D,0xAA,0xC7}, {0x00,0x1D,0x44,0x93,0x0F,0x56,0xD7,0x12, @@ -164,7 +170,7 @@ deskew[2] = { {0x00,0x69,0xEA,0x83,0xDC,0xB5,0x36,0x5F, 0x77,0x1E,0x9D,0xF4,0xAB,0xC2,0x41,0x28}, }, -sr[4] = { +sr[4] __aarch64_used = { {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F}, {0x00,0x05,0x0A,0x0F,0x04,0x09,0x0E,0x03, @@ -533,6 +539,14 @@ aes_neon_setdeck
CVS commit: src/sys/crypto/aes/arch/x86
Module Name:src Committed By: riastradh Date: Mon Jun 29 23:50:05 UTC 2020 Modified Files: src/sys/crypto/aes/arch/x86: aes_sse2.h aes_sse2_impl.c aes_sse2_impl.h files.aessse2 Added Files: src/sys/crypto/aes/arch/x86: aes_sse2_subr.c Log Message: Split SSE2 logic into separate units. Ensure that there are no paths into files compiled with -msse -msse2 at all except via fpu_kern_enter. I didn't run into a practical problem with this, but let's not leave a ticking time bomb for subsequent toolchain changes in case the mere declaration of local __m128i variables causes trouble. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/x86/aes_sse2.h \ src/sys/crypto/aes/arch/x86/aes_sse2_impl.c \ src/sys/crypto/aes/arch/x86/aes_sse2_impl.h \ src/sys/crypto/aes/arch/x86/files.aessse2 cvs rdiff -u -r0 -r1.1 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/x86/aes_sse2.h diff -u src/sys/crypto/aes/arch/x86/aes_sse2.h:1.1 src/sys/crypto/aes/arch/x86/aes_sse2.h:1.2 --- src/sys/crypto/aes/arch/x86/aes_sse2.h:1.1 Mon Jun 29 23:47:54 2020 +++ src/sys/crypto/aes/arch/x86/aes_sse2.h Mon Jun 29 23:50:05 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_sse2.h,v 1.1 2020/06/29 23:47:54 riastradh Exp $ */ +/* $NetBSD: aes_sse2.h,v 1.2 2020/06/29 23:50:05 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -31,6 +31,31 @@ #include +/* + * These functions MUST NOT use any vector registers for parameters or + * results -- the caller is compiled with -mno-sse &c. in the kernel, + * and dynamically turns on the vector unit just before calling them. + * Internal subroutines that use the vector unit for parameters are + * declared in aes_sse2_impl.h instead. + */ + +void aes_sse2_setkey(uint64_t[static 30], const void *, uint32_t); + +void aes_sse2_enc(const struct aesenc *, const uint8_t in[static 16], +uint8_t[static 16], uint32_t); +void aes_sse2_dec(const struct aesdec *, const uint8_t in[static 16], +uint8_t[static 16], uint32_t); +void aes_sse2_cbc_enc(const struct aesenc *, const uint8_t[static 16], +uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t); +void aes_sse2_cbc_dec(const struct aesdec *, const uint8_t[static 16], +uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t); +void aes_sse2_xts_enc(const struct aesenc *, const uint8_t[static 16], +uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t); +void aes_sse2_xts_dec(const struct aesdec *, const uint8_t[static 16], +uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t); + +int aes_sse2_selftest(void); + extern struct aes_impl aes_sse2_impl; #endif /* _CRYPTO_AES_ARCH_X86_AES_SSE2_H */ Index: src/sys/crypto/aes/arch/x86/aes_sse2_impl.c diff -u src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.1 src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.2 --- src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.1 Mon Jun 29 23:47:54 2020 +++ src/sys/crypto/aes/arch/x86/aes_sse2_impl.c Mon Jun 29 23:50:05 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_sse2_impl.c,v 1.1 2020/06/29 23:47:54 riastradh Exp $ */ +/* $NetBSD: aes_sse2_impl.c,v 1.2 2020/06/29 23:50:05 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,11 +27,10 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.1 2020/06/29 23:47:54 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.2 2020/06/29 23:50:05 riastradh Exp $"); #include #include -#include #include #include @@ -41,532 +40,99 @@ __KERNEL_RCSID(1, "$NetBSD: aes_sse2_imp #include #include -#include "aes_sse2_impl.h" - static void -aes_sse2_setkey(uint64_t rk[static 30], const void *key, uint32_t nrounds) +aes_sse2_setenckey_impl(struct aesenc *enc, const uint8_t *key, +uint32_t nrounds) { - size_t key_len; - - switch (nrounds) { - case 10: - key_len = 16; - break; - case 12: - key_len = 24; - break; - case 14: - key_len = 32; - break; - default: - panic("invalid AES nrounds: %u", nrounds); - } fpu_kern_enter(); - aes_sse2_keysched(rk, key, key_len); - fpu_kern_leave(); -} - -static void -aes_sse2_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds) -{ - aes_sse2_setkey(enc->aese_aes.aes_rk64, key, nrounds); + fpu_kern_leave(); } static void -aes_sse2_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds) +aes_sse2_setdeckey_impl(struct aesdec *dec, const uint8_t *key, +uint32_t nrounds) { + fpu_kern_enter(); /* * BearSSL computes InvMixColumns on the fly -- no need for * distinct decryption round keys. */ aes_sse2_setkey(dec->aesd_aes.aes_rk64, key, nrounds); + fpu_kern_leave(); } static void -aes_sse2_enc(const struct aesenc *enc, const uint8_t in[static 16], +aes_sse2_enc_impl(const s
CVS commit: src/sys/crypto/aes/arch/x86
Module Name:src Committed By: riastradh Date: Mon Jun 29 23:41:35 UTC 2020 Modified Files: src/sys/crypto/aes/arch/x86: aes_via.c Log Message: VIA AES: Batch AES-XTS computation into eight blocks at a time. Experimental -- performance improvement is not clearly worth the complexity. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/x86/aes_via.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/x86/aes_via.c diff -u src/sys/crypto/aes/arch/x86/aes_via.c:1.1 src/sys/crypto/aes/arch/x86/aes_via.c:1.2 --- src/sys/crypto/aes/arch/x86/aes_via.c:1.1 Mon Jun 29 23:39:30 2020 +++ src/sys/crypto/aes/arch/x86/aes_via.c Mon Jun 29 23:41:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_via.c,v 1.1 2020/06/29 23:39:30 riastradh Exp $ */ +/* $NetBSD: aes_via.c,v 1.2 2020/06/29 23:41:35 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.1 2020/06/29 23:39:30 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.2 2020/06/29 23:41:35 riastradh Exp $"); #include #include @@ -119,8 +119,8 @@ aesvia_setdeckey(struct aesdec *dec, con } static inline void -aesvia_enc1(const struct aesenc *enc, const uint8_t in[static 16], -uint8_t out[static 16], uint32_t cw0) +aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16], +uint8_t out[static 16], size_t nblocks, uint32_t cw0) { const uint32_t cw[4] __aligned(16) = { [0] = (cw0 @@ -128,7 +128,6 @@ aesvia_enc1(const struct aesenc *enc, co | C3_CRYPT_CWLO_ENCRYPT | C3_CRYPT_CWLO_NORMAL), }; - size_t nblocks = 1; KASSERT(((uintptr_t)enc & 0xf) == 0); KASSERT(((uintptr_t)in & 0xf) == 0); @@ -141,8 +140,8 @@ aesvia_enc1(const struct aesenc *enc, co } static inline void -aesvia_dec1(const struct aesdec *dec, const uint8_t in[static 16], -uint8_t out[static 16], uint32_t cw0) +aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16], +uint8_t out[static 16], size_t nblocks, uint32_t cw0) { const uint32_t cw[4] __aligned(16) = { [0] = (cw0 @@ -150,7 +149,6 @@ aesvia_dec1(const struct aesdec *dec, co | C3_CRYPT_CWLO_DECRYPT | C3_CRYPT_CWLO_NORMAL), }; - size_t nblocks = 1; KASSERT(((uintptr_t)dec & 0xf) == 0); KASSERT(((uintptr_t)in & 0xf) == 0); @@ -180,7 +178,7 @@ aesvia_enc(const struct aesenc *enc, con if uintptr_t)in | (uintptr_t)out) & 0xf) == 0 && ((uintptr_t)in & 0xff0) != 0xff0) { enc_aligned_evcnt.ev_count++; - aesvia_enc1(enc, in, out, cw0); + aesvia_encN(enc, in, out, 1, cw0); } else { enc_unaligned_evcnt.ev_count++; /* @@ -194,7 +192,7 @@ aesvia_enc(const struct aesenc *enc, con uint8_t outbuf[16] __aligned(16); memcpy(inbuf, in, 16); - aesvia_enc1(enc, inbuf, outbuf, cw0); + aesvia_encN(enc, inbuf, outbuf, 1, cw0); memcpy(out, outbuf, 16); explicit_memset(inbuf, 0, sizeof inbuf); @@ -221,7 +219,7 @@ aesvia_dec(const struct aesdec *dec, con if uintptr_t)in | (uintptr_t)out) & 0xf) == 0 && ((uintptr_t)in & 0xff0) != 0xff0) { dec_aligned_evcnt.ev_count++; - aesvia_dec1(dec, in, out, cw0); + aesvia_decN(dec, in, out, 1, cw0); } else { dec_unaligned_evcnt.ev_count++; /* @@ -235,7 +233,7 @@ aesvia_dec(const struct aesdec *dec, con uint8_t outbuf[16] __aligned(16); memcpy(inbuf, in, 16); - aesvia_dec1(dec, inbuf, outbuf, cw0); + aesvia_decN(dec, inbuf, outbuf, 1, cw0); memcpy(out, outbuf, 16); explicit_memset(inbuf, 0, sizeof inbuf); @@ -245,7 +243,7 @@ aesvia_dec(const struct aesdec *dec, con } static inline void -aesvia_cbc_enc1(const struct aesenc *enc, const uint8_t in[static 16], +aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16], uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0) { const uint32_t cw[4] __aligned(16) = { @@ -274,7 +272,7 @@ aesvia_cbc_enc1(const struct aesenc *enc } static inline void -aesvia_cbc_dec1(const struct aesdec *dec, const uint8_t in[static 16], +aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16], uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16], uint32_t cw0) { @@ -340,7 +338,7 @@ aesvia_cbc_enc(const struct aesenc *enc, if uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) { cbcenc_aligned_evcnt.ev_count++; uint8_t *ivp = iv; - aesvia_cbc_enc1(enc, in, out, nbytes/16, &ivp, cw0); + aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0); memcpy(iv, ivp, 16); } else { cbcenc_unaligned_evcnt.ev_count++; @@ -351,7 +349,7 @@ aesvia_cbc_enc(const struct aesenc *enc, for (; nbytes; nbytes -= 16, in += 16, out += 16) { memcpy(tmp, in, 16); xor128(tmp, tmp, cv); - aesvia_enc1(enc, tmp, cv, cw0); + aesvia_encN(enc, tmp, cv, 1, cw0); memcpy(out, cv, 16); }
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: riastradh Date: Mon Jun 29 23:36:59 UTC 2020 Modified Files: src/sys/crypto/aes: aes_bear.h aes_ct.c aes_ct_dec.c aes_impl.c Log Message: Provide the standard AES key schedule. Different AES implementations prefer different variations on it, but some of them -- notably VIA -- require the standard key schedule to be available and don't provide hardware support for computing it themselves. So adapt BearSSL's logic to generate the standard key schedule (and decryption keys, with InvMixColumns), rather than the bitsliced key schedule that BearSSL uses natively. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/aes_bear.h \ src/sys/crypto/aes/aes_ct.c src/sys/crypto/aes/aes_ct_dec.c \ src/sys/crypto/aes/aes_impl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_bear.h diff -u src/sys/crypto/aes/aes_bear.h:1.1 src/sys/crypto/aes/aes_bear.h:1.2 --- src/sys/crypto/aes/aes_bear.h:1.1 Mon Jun 29 23:27:52 2020 +++ src/sys/crypto/aes/aes_bear.h Mon Jun 29 23:36:59 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_bear.h,v 1.1 2020/06/29 23:27:52 riastradh Exp $ */ +/* $NetBSD: aes_bear.h,v 1.2 2020/06/29 23:36:59 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -45,6 +45,12 @@ void br_aes_ct_skey_expand(uint32_t *, u void br_aes_ct_bitslice_encrypt(unsigned, const uint32_t *, uint32_t *); void br_aes_ct_bitslice_decrypt(unsigned, const uint32_t *, uint32_t *); +/* NetBSD additions */ + +void br_aes_ct_inv_mix_columns(uint32_t *); +u_int br_aes_ct_keysched_stdenc(uint32_t *, const void *, size_t); +u_int br_aes_ct_keysched_stddec(uint32_t *, const void *, size_t); + extern struct aes_impl aes_bear_impl; #endif /* _CRYPTO_AES_AES_BEAR_H */ Index: src/sys/crypto/aes/aes_ct.c diff -u src/sys/crypto/aes/aes_ct.c:1.1 src/sys/crypto/aes/aes_ct.c:1.2 --- src/sys/crypto/aes/aes_ct.c:1.1 Mon Jun 29 23:27:52 2020 +++ src/sys/crypto/aes/aes_ct.c Mon Jun 29 23:36:59 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ct.c,v 1.1 2020/06/29 23:27:52 riastradh Exp $ */ +/* $NetBSD: aes_ct.c,v 1.2 2020/06/29 23:36:59 riastradh Exp $ */ /* * Copyright (c) 2016 Thomas Pornin @@ -25,10 +25,12 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_ct.c,v 1.1 2020/06/29 23:27:52 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_ct.c,v 1.2 2020/06/29 23:36:59 riastradh Exp $"); #include +#include + #include /* see inner.h */ @@ -333,3 +335,92 @@ br_aes_ct_skey_expand(uint32_t *skey, skey[v + 1] = y | (y >> 1); } } + +/* NetBSD additions, for computing the standard AES key schedule */ + +unsigned +br_aes_ct_keysched_stdenc(uint32_t *skey, const void *key, size_t key_len) +{ + unsigned num_rounds; + int i, j, k, nk, nkf; + uint32_t tmp; + + switch (key_len) { + case 16: + num_rounds = 10; + break; + case 24: + num_rounds = 12; + break; + case 32: + num_rounds = 14; + break; + default: + /* abort(); */ + return 0; + } + nk = (int)(key_len >> 2); + nkf = (int)((num_rounds + 1) << 2); + tmp = 0; + for (i = 0; i < nk; i ++) { + tmp = br_dec32le((const unsigned char *)key + (i << 2)); + skey[i] = tmp; + } + for (i = nk, j = 0, k = 0; i < nkf; i ++) { + if (j == 0) { + tmp = (tmp << 24) | (tmp >> 8); + tmp = sub_word(tmp) ^ Rcon[k]; + } else if (nk > 6 && j == 4) { + tmp = sub_word(tmp); + } + tmp ^= skey[i - nk]; + skey[i] = tmp; + if (++ j == nk) { + j = 0; + k ++; + } + } + return num_rounds; +} + +unsigned +br_aes_ct_keysched_stddec(uint32_t *skey, const void *key, size_t key_len) +{ + uint32_t tkey[60]; + uint32_t q[8]; + unsigned num_rounds; + unsigned i; + + num_rounds = br_aes_ct_keysched_stdenc(skey, key, key_len); + if (num_rounds == 0) + return 0; + + tkey[0] = skey[4*num_rounds + 0]; + tkey[1] = skey[4*num_rounds + 1]; + tkey[2] = skey[4*num_rounds + 2]; + tkey[3] = skey[4*num_rounds + 3]; + for (i = 1; i < num_rounds; i++) { + q[2*0] = skey[4*i + 0]; + q[2*1] = skey[4*i + 1]; + q[2*2] = skey[4*i + 2]; + q[2*3] = skey[4*i + 3]; + q[1] = q[3] = q[5] = q[7] = 0; + + br_aes_ct_ortho(q); + br_aes_ct_inv_mix_columns(q); + br_aes_ct_ortho(q); + + tkey[4*(num_rounds - i) + 0] = q[2*0]; + tkey[4*(num_rounds - i) + 1] = q[2*1]; + tkey[4*(num_rounds - i) + 2] = q[2*2]; + tkey[4*(num_rounds - i) + 3] = q[2*3]; + } + tkey[4*num_rounds + 0] = skey[0]; + tkey[4*num_rounds + 1] = skey[1]; + tkey[4*num_rounds + 2] = skey[2]; + tkey[4*num_rounds + 3] = skey[3]; + + memcpy(skey, tkey, 4*(num_rounds + 1)*sizeof(uint32_t)); + explicit_memset(tkey, 0, 4*(num_rounds + 1)*sizeof(uint32_t)); + return num_rounds; +} Index: src/sys/crypto/aes/aes_ct_dec.c diff -u src/sys/crypto/aes/aes_ct_dec.c:1.1 src/sys/crypto/aes/aes_ct_dec.c:1.2 --- src/sys/crypto/aes/aes_ct_dec.c:1.1 Mon Jun 29 23:27:52 2020 +++ src/sys/crypto/aes/aes_ct_dec.c Mon Jun 29 23:36:59 2020 @@ -1,4 +1,4 @@ -/* $Ne