Module Name:    src
Committed By:   riastradh
Date:           Thu Sep 10 11:31:04 UTC 2020

Modified Files:
        src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Gather mc_forward/backward so we can load 256 bits at once.


To generate a diff of this commit:
cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.11
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10	Thu Sep 10 11:30:28 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Thu Sep 10 11:31:03 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include <arm/asm.h>
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $")
 
 	.fpu	neon
 
@@ -54,36 +54,26 @@ inva:
 	.byte	0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03
 END(inva)
 
-	.type	mc_forward,_ASM_TYPE_OBJECT
-mc_forward:
-	.byte	0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04	/* 0 */
+	.type	mc,_ASM_TYPE_OBJECT
+mc:
+	.byte	0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04	/* 0 forward */
 	.byte	0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C
-
-	.byte	0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08	/* 1 */
+	.byte	0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06	/* 0 backward */
+	.byte	0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E
+	.byte	0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08	/* 1 forward */
 	.byte	0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00
-
-	.byte	0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C	/* 2 */
+	.byte	0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02	/* 1 backward */
+	.byte	0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A
+	.byte	0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C	/* 2 forward */
 	.byte	0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04
-
+	.byte	0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E	/* 2 backward */
+	.byte	0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06
 .Lmc_forward_3:
-	.byte	0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00	/* 3 */
+	.byte	0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00	/* 3 forward */
 	.byte	0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08
-END(mc_forward)
-
-	.type	mc_backward,_ASM_TYPE_OBJECT
-mc_backward:
-	.byte	0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06	/* 0 */
-	.byte	0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E
-
-	.byte	0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02	/* 1 */
-	.byte	0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A
-
-	.byte	0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E	/* 2 */
-	.byte	0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06
-
-	.byte	0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A	/* 3 */
+	.byte	0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A	/* 3 backward */
 	.byte	0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02
-END(mc_backward)
+END(mc)
 
 	.type	sr,_ASM_TYPE_OBJECT
 sr:
@@ -210,8 +200,7 @@ ENTRY(aes_neon_enc1)
 
 	/*
 	 * r3: rmod4
-	 * r4: mc_forward
-	 * r5: mc_backward
+	 * r4: mc
 	 * r6,r8,r10,ip: temporaries
 	 * q0={d0-d1}: x/ak/A
 	 * q1={d2-d3}: 0x0f0f...
@@ -225,8 +214,8 @@ ENTRY(aes_neon_enc1)
 	 * q9={d18-d19}: sb2[1]
 	 * q10={d20-d21}: inv
 	 * q11={d22-d23}: inva
-	 * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc_backward[rmod4]
-	 * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc_forward[rmod4]
+	 * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc[rmod4].backward
+	 * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc[rmod4].forward
 	 * q14={d28-d29}: rk/A2/A2_B_D
 	 * q15={d30-d31}: A2_B/sr[rmod4]
 	 */
@@ -254,9 +243,8 @@ ENTRY(aes_neon_enc1)
 	vld1.8	{q8-q9}, [r6 :256]	/* q8 = sb2[0], q9 = sb2[1] */
 	vld1.8	{q10-q11}, [r8 :256]	/* q10 = inv, q11 = inva */
 
-	/* (r4, r5) := (&mc_forward[0], &mc_backward[0]) */
-	add	r4, ip, #(mc_forward - .Lconstants)
-	add	r5, ip, #(mc_backward - .Lconstants)
+	/* r4 := mc */
+	add	r4, ip, #(mc - .Lconstants)
 
 	/* (q2, q3) := (lo, hi) */
 	vshr.u8	q3, q0, #4
@@ -291,13 +279,11 @@ ENTRY(aes_neon_enc1)
 	vtbl.8	d25, {q8}, d5
 	vtbl.8	d26, {q9}, d6
 	vtbl.8	d27, {q9}, d7
+	add	r6, r4, r3, lsl #5	/* r6 := &mc[rmod4] */
 	veor	q14, q12, q13
 
-	/* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */
-	add	r6, r4, r3, lsl #4
-	add	r8, r5, r3, lsl #4
-	vld1.8	{q12}, [r6 :128]
-	vld1.8	{q13}, [r8 :128]
+	/* (q12, q13) := (mc[rmod4].forward, mc[rmod4].backward) */
+	vld1.8	{q12-q13}, [r6 :256]
 
 	/* q15 := A2_B = A2 + A(mcf) */
 	vtbl.8	d30, {q0}, d24
@@ -474,7 +460,7 @@ ENTRY(aes_neon_dec1)
 	add	r8, ip, #(.Lmc_forward_3 - .Lconstants)
 	vld1.8	{q6-q7}, [r4 :256]	/* q6 := dsbb[0], q7 := dsbb[1] */
 	vld1.8	{q10-q11}, [r6 :256]	/* q10 := inv, q11 := inva */
-	vld1.8	{q15}, [r8 :128]	/* q15 := mc_forward[3] */
+	vld1.8	{q15}, [r8 :128]	/* q15 := mc[3].forward */
 
 	/* (q2, q3) := (lo, hi) */
 	vshr.u8	q3, q0, #4

Reply via email to