Module Name: src Committed By: riastradh Date: Thu Sep 10 11:31:04 UTC 2020
Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Gather mc_forward/backward so we can load 256 bits at once. To generate a diff of this commit: cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.11 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10 Thu Sep 10 11:30:28 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:31:03 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include <arm/asm.h> -RCSID("$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $") .fpu neon @@ -54,36 +54,26 @@ inva: .byte 0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03 END(inva) - .type mc_forward,_ASM_TYPE_OBJECT -mc_forward: - .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 /* 0 */ + .type mc,_ASM_TYPE_OBJECT +mc: + .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 /* 0 forward */ .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C - - .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 /* 1 */ + .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 /* 0 backward */ + .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E + .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 /* 1 forward */ .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 - - .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C /* 2 */ + .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 /* 1 backward */ + .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A + .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C /* 2 forward */ .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 - + .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E /* 2 backward */ + .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 .Lmc_forward_3: - .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 /* 3 */ + .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 /* 3 forward */ .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 -END(mc_forward) - - .type mc_backward,_ASM_TYPE_OBJECT -mc_backward: - .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 /* 0 */ - .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E - - .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 /* 1 */ - .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A - - .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E /* 2 */ - .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 - - .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A /* 3 */ + .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A /* 3 backward */ .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 -END(mc_backward) +END(mc) .type sr,_ASM_TYPE_OBJECT sr: @@ -210,8 +200,7 @@ ENTRY(aes_neon_enc1) /* * r3: rmod4 - * r4: mc_forward - * r5: mc_backward + * r4: mc * r6,r8,r10,ip: temporaries * q0={d0-d1}: x/ak/A * q1={d2-d3}: 0x0f0f... @@ -225,8 +214,8 @@ ENTRY(aes_neon_enc1) * q9={d18-d19}: sb2[1] * q10={d20-d21}: inv * q11={d22-d23}: inva - * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc_backward[rmod4] - * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc_forward[rmod4] + * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc[rmod4].backward + * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc[rmod4].forward * q14={d28-d29}: rk/A2/A2_B_D * q15={d30-d31}: A2_B/sr[rmod4] */ @@ -254,9 +243,8 @@ ENTRY(aes_neon_enc1) vld1.8 {q8-q9}, [r6 :256] /* q8 = sb2[0], q9 = sb2[1] */ vld1.8 {q10-q11}, [r8 :256] /* q10 = inv, q11 = inva */ - /* (r4, r5) := (&mc_forward[0], &mc_backward[0]) */ - add r4, ip, #(mc_forward - .Lconstants) - add r5, ip, #(mc_backward - .Lconstants) + /* r4 := mc */ + add r4, ip, #(mc - .Lconstants) /* (q2, q3) := (lo, hi) */ vshr.u8 q3, q0, #4 @@ -291,13 +279,11 @@ ENTRY(aes_neon_enc1) vtbl.8 d25, {q8}, d5 vtbl.8 d26, {q9}, d6 vtbl.8 d27, {q9}, d7 + add r6, r4, r3, lsl #5 /* r6 := &mc[rmod4] */ veor q14, q12, q13 - /* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */ - add r6, r4, r3, lsl #4 - add r8, r5, r3, lsl #4 - vld1.8 {q12}, [r6 :128] - vld1.8 {q13}, [r8 :128] + /* (q12, q13) := (mc[rmod4].forward, mc[rmod4].backward) */ + vld1.8 {q12-q13}, [r6 :256] /* q15 := A2_B = A2 + A(mcf) */ vtbl.8 d30, {q0}, d24 @@ -474,7 +460,7 @@ ENTRY(aes_neon_dec1) add r8, ip, #(.Lmc_forward_3 - .Lconstants) vld1.8 {q6-q7}, [r4 :256] /* q6 := dsbb[0], q7 := dsbb[1] */ vld1.8 {q10-q11}, [r6 :256] /* q10 := inv, q11 := inva */ - vld1.8 {q15}, [r8 :128] /* q15 := mc_forward[3] */ + vld1.8 {q15}, [r8 :128] /* q15 := mc[3].forward */ /* (q2, q3) := (lo, hi) */ vshr.u8 q3, q0, #4