This implementation keeps the 64 bytes of workspace in registers rather than
on the stack, eliminating most of the loads and stores, and reducing the
instruction count by about 25%.

Signed-off-by: Ard Biesheuvel <ard.biesheu...@linaro.org>
---
Hello all,

No performance numbers I am allowed to share, unfortunately, so if anyone else
(with access to actual, representative hardware) would care to have a go, I
would be very grateful.

This can be done by building the tcrypt.ko module (CONFIG_CRYPTO_TEST=m), and
inserting the module using 'mode=303' as a parameter (note that the insmod
always fails, but produces its test output to the kernel log). Also note that
the sha_transform() function will be part of the kernel proper, so just
rebuilding the sha1_generic module is not sufficient.

Cheers,


 arch/arm64/kernel/arm64ksyms.c |   3 +
 arch/arm64/lib/Makefile        |   2 +-
 arch/arm64/lib/sha1.S          | 256 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 260 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/lib/sha1.S

diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 338b568cd8ae..1f5693fb5d93 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -56,3 +56,6 @@ EXPORT_SYMBOL(clear_bit);
 EXPORT_SYMBOL(test_and_clear_bit);
 EXPORT_SYMBOL(change_bit);
 EXPORT_SYMBOL(test_and_change_bit);
+
+       /* SHA-1 implementation under lib/ */
+EXPORT_SYMBOL(sha_transform);
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 328ce1a99daa..ea093ebb9a9a 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,4 +1,4 @@
 lib-y          := bitops.o clear_user.o delay.o copy_from_user.o       \
                   copy_to_user.o copy_in_user.o copy_page.o            \
                   clear_page.o memchr.o memcpy.o memmove.o memset.o    \
-                  strchr.o strrchr.o
+                  strchr.o strrchr.o sha1.o
diff --git a/arch/arm64/lib/sha1.S b/arch/arm64/lib/sha1.S
new file mode 100644
index 000000000000..877b8d70e992
--- /dev/null
+++ b/arch/arm64/lib/sha1.S
@@ -0,0 +1,256 @@
+/*
+ * linux/arch/arm64/lib/sha1.S
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheu...@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+       .text
+
+       k               .req    w1
+
+       res             .req    w2
+       xres            .req    x2
+
+       wA              .req    w3
+       wB              .req    w4
+       wC              .req    w5
+       wD              .req    w6
+       wE              .req    w7
+
+       tmp             .req    w16
+       xtmp            .req    x16
+
+       .macro          sha1_choose, out, b, c, d
+       eor             \out, \c, \d
+       and             \out, \out, \b
+       eor             \out, \out, \d
+       .endm
+
+       .macro          sha1_parity, out, b, c, d
+       eor             \out, \b, \c
+       eor             \out, \out, \d
+       .endm
+
+       .macro          sha1_majority, out, b, c, d
+       eor             tmp, \b, \c
+       and             \out, \b, \c
+       and             tmp, tmp, \d
+       add             \out, \out, tmp
+       .endm
+
+       .macro          mix_state, st0, st1, st4, st6, st7
+       extr            xtmp, \st7, \st6, #32
+       eor             \st0, \st0, \st1
+       eor             xtmp, xtmp, \st4
+       eor             xtmp, xtmp, \st0
+       ror             res, tmp, #(32 - 1)
+       lsr             xtmp, xtmp, #32
+       ror             tmp, tmp, #(32 - 1)
+       orr             \st0, xres, xtmp, lsl #32
+       .endm
+
+       .macro          sha1_round, func, r, h, a, b, c, d, e
+       sha1_\func      res, \b, \c, \d
+       add             res, res, \e
+       ror             \e, \a, #(32 - 5)
+       .ifc            \h, h
+       add             xres, xres, x\r, lsr #32
+       .else
+       add             res, res, w\r
+       .endif
+       add             \e, \e, k
+       ror             \b, \b, #2
+       add             \e, \e, res
+       .endm
+
+       /*
+        * void sha_transform(__u32 *digest, const char *data, __u32 *array)
+        */
+ENTRY(sha_transform)
+       /* load input into state array */
+       ldp             x8, x9, [x1]
+       ldp             x10, x11, [x1, #16]
+       ldp             x12, x13, [x1, #32]
+       ldp             x14, x15, [x1, #48]
+
+       /* load digest input */
+       ldr             wA, [x0]
+       ldp             wB, wC, [x0, #4]
+       ldp             wD, wE, [x0, #12]
+
+       /* endian-reverse the input on LE builds */
+CPU_LE( rev32          x8, x8          )
+CPU_LE( rev32          x9, x9          )
+CPU_LE( rev32          x10, x10        )
+CPU_LE( rev32          x11, x11        )
+CPU_LE( rev32          x12, x12        )
+CPU_LE( rev32          x13, x13        )
+CPU_LE( rev32          x14, x14        )
+CPU_LE( rev32          x15, x15        )
+
+       /* round 1 */
+       ldr             k, =0x5a827999
+       sha1_round      choose,  8, l, wA, wB, wC, wD, wE
+       sha1_round      choose,  8, h, wE, wA, wB, wC, wD
+       sha1_round      choose,  9, l, wD, wE, wA, wB, wC
+       sha1_round      choose,  9, h, wC, wD, wE, wA, wB
+       sha1_round      choose, 10, l, wB, wC, wD, wE, wA
+       sha1_round      choose, 10, h, wA, wB, wC, wD, wE
+       sha1_round      choose, 11, l, wE, wA, wB, wC, wD
+       sha1_round      choose, 11, h, wD, wE, wA, wB, wC
+       sha1_round      choose, 12, l, wC, wD, wE, wA, wB
+       sha1_round      choose, 12, h, wB, wC, wD, wE, wA
+       sha1_round      choose, 13, l, wA, wB, wC, wD, wE
+       sha1_round      choose, 13, h, wE, wA, wB, wC, wD
+       sha1_round      choose, 14, l, wD, wE, wA, wB, wC
+       sha1_round      choose, 14, h, wC, wD, wE, wA, wB
+       sha1_round      choose, 15, l, wB, wC, wD, wE, wA
+       sha1_round      choose, 15, h, wA, wB, wC, wD, wE
+
+       mix_state       x8, x9, x12, x14, x15
+       sha1_round      choose,  8, l, wE, wA, wB, wC, wD
+       sha1_round      choose,  8, h, wD, wE, wA, wB, wC
+       mix_state       x9, x10, x13, x15, x8
+       sha1_round      choose,  9, l, wC, wD, wE, wA, wB
+       sha1_round      choose,  9, h, wB, wC, wD, wE, wA
+
+       /* round 2 */
+       ldr             k, =0x6ed9eba1
+       mix_state       x10, x11, x14, x8, x9
+       sha1_round      parity, 10, l, wA, wB, wC, wD, wE
+       sha1_round      parity, 10, h, wE, wA, wB, wC, wD
+       mix_state       x11, x12, x15, x9, x10
+       sha1_round      parity, 11, l, wD, wE, wA, wB, wC
+       sha1_round      parity, 11, h, wC, wD, wE, wA, wB
+       mix_state       x12, x13, x8, x10, x11
+       sha1_round      parity, 12, l, wB, wC, wD, wE, wA
+       sha1_round      parity, 12, h, wA, wB, wC, wD, wE
+       mix_state       x13, x14, x9, x11, x12
+       sha1_round      parity, 13, l, wE, wA, wB, wC, wD
+       sha1_round      parity, 13, h, wD, wE, wA, wB, wC
+       mix_state       x14, x15, x10, x12, x13
+       sha1_round      parity, 14, l, wC, wD, wE, wA, wB
+       sha1_round      parity, 14, h, wB, wC, wD, wE, wA
+       mix_state       x15, x8, x11, x13, x14
+       sha1_round      parity, 15, l, wA, wB, wC, wD, wE
+       sha1_round      parity, 15, h, wE, wA, wB, wC, wD
+       mix_state       x8, x9, x12, x14, x15
+       sha1_round      parity,  8, l, wD, wE, wA, wB, wC
+       sha1_round      parity,  8, h, wC, wD, wE, wA, wB
+       mix_state       x9, x10, x13, x15, x8
+       sha1_round      parity,  9, l, wB, wC, wD, wE, wA
+       sha1_round      parity,  9, h, wA, wB, wC, wD, wE
+       mix_state       x10, x11, x14, x8, x9
+       sha1_round      parity, 10, l, wE, wA, wB, wC, wD
+       sha1_round      parity, 10, h, wD, wE, wA, wB, wC
+       mix_state       x11, x12, x15, x9, x10
+       sha1_round      parity, 11, l, wC, wD, wE, wA, wB
+       sha1_round      parity, 11, h, wB, wC, wD, wE, wA
+
+       /* round 3 */
+       ldr             k, =0x8f1bbcdc
+       mix_state       x12, x13, x8, x10, x11
+       sha1_round      majority, 12, l, wA, wB, wC, wD, wE
+       sha1_round      majority, 12, h, wE, wA, wB, wC, wD
+       mix_state       x13, x14, x9, x11, x12
+       sha1_round      majority, 13, l, wD, wE, wA, wB, wC
+       sha1_round      majority, 13, h, wC, wD, wE, wA, wB
+       mix_state       x14, x15, x10, x12, x13
+       sha1_round      majority, 14, l, wB, wC, wD, wE, wA
+       sha1_round      majority, 14, h, wA, wB, wC, wD, wE
+       mix_state       x15, x8, x11, x13, x14
+       sha1_round      majority, 15, l, wE, wA, wB, wC, wD
+       sha1_round      majority, 15, h, wD, wE, wA, wB, wC
+       mix_state       x8, x9, x12, x14, x15
+       sha1_round      majority,  8, l, wC, wD, wE, wA, wB
+       sha1_round      majority,  8, h, wB, wC, wD, wE, wA
+       mix_state       x9, x10, x13, x15, x8
+       sha1_round      majority,  9, l, wA, wB, wC, wD, wE
+       sha1_round      majority,  9, h, wE, wA, wB, wC, wD
+       mix_state       x10, x11, x14, x8, x9
+       sha1_round      majority, 10, l, wD, wE, wA, wB, wC
+       sha1_round      majority, 10, h, wC, wD, wE, wA, wB
+       mix_state       x11, x12, x15, x9, x10
+       sha1_round      majority, 11, l, wB, wC, wD, wE, wA
+       sha1_round      majority, 11, h, wA, wB, wC, wD, wE
+       mix_state       x12, x13, x8, x10, x11
+       sha1_round      majority, 12, l, wE, wA, wB, wC, wD
+       sha1_round      majority, 12, h, wD, wE, wA, wB, wC
+       mix_state       x13, x14, x9, x11, x12
+       sha1_round      majority, 13, l, wC, wD, wE, wA, wB
+       sha1_round      majority, 13, h, wB, wC, wD, wE, wA
+
+       /* round 4 */
+       ldr             k, =0xca62c1d6
+       mix_state       x14, x15, x10, x12, x13
+       sha1_round      parity, 14, l, wA, wB, wC, wD, wE
+       sha1_round      parity, 14, h, wE, wA, wB, wC, wD
+       mix_state       x15, x8, x11, x13, x14
+       sha1_round      parity, 15, l, wD, wE, wA, wB, wC
+       sha1_round      parity, 15, h, wC, wD, wE, wA, wB
+       mix_state       x8, x9, x12, x14, x15
+       sha1_round      parity,  8, l, wB, wC, wD, wE, wA
+       sha1_round      parity,  8, h, wA, wB, wC, wD, wE
+       mix_state       x9, x10, x13, x15, x8
+       sha1_round      parity,  9, l, wE, wA, wB, wC, wD
+       sha1_round      parity,  9, h, wD, wE, wA, wB, wC
+       mix_state       x10, x11, x14, x8, x9
+       sha1_round      parity, 10, l, wC, wD, wE, wA, wB
+       sha1_round      parity, 10 ,h, wB, wC, wD, wE, wA
+       mix_state       x11, x12, x15, x9, x10
+       sha1_round      parity, 11, l, wA, wB, wC, wD, wE
+       sha1_round      parity, 11, h, wE, wA, wB, wC, wD
+       mix_state       x12, x13, x8, x10, x11
+       sha1_round      parity, 12, l, wD, wE, wA, wB, wC
+       sha1_round      parity, 12, h, wC, wD, wE, wA, wB
+       mix_state       x13, x14, x9, x11, x12
+       sha1_round      parity, 13, l, wB, wC, wD, wE, wA
+       sha1_round      parity, 13, h, wA, wB, wC, wD, wE
+       mix_state       x14, x15, x10, x12, x13
+       sha1_round      parity, 14, l, wE, wA, wB, wC, wD
+       sha1_round      parity, 14, h, wD, wE, wA, wB, wC
+       mix_state       x15, x8, x11, x13, x14
+
+       /* reload digest input */
+       ldr             w8, [x0]
+       ldp             w9, w10, [x0, #4]
+       ldp             w11, w12, [x0, #12]
+
+       sha1_round      parity, 15, l, wC, wD, wE, wA, wB
+       sha1_round      parity, 15, h, wB, wC, wD, wE, wA
+
+       /* add this round's output to digest */
+       add             wA, wA, w8
+       add             wB, wB, w9
+       add             wC, wC, w10
+       add             wD, wD, w11
+       add             wE, wE, w12
+
+       /* store digest */
+       str             wA, [x0]
+       stp             wB, wC, [x0, #4]
+       stp             wD, wE, [x0, #12]
+       ret
+ENDPROC(sha_transform)
+
+       /*
+        * void sha_init(__u32 *buf)
+        */
+ENTRY(sha_init)
+       ldr     w1, =0x67452301
+       ldr     w2, =0xefcdab89
+       ldr     w3, =0x98badcfe
+       ldr     w4, =0x10325476
+       ldr     w5, =0xc3d2e1f0
+       str     w1, [x0]
+       stp     w2, w3, [x0, #4]
+       stp     w4, w5, [x0, #12]
+       ret
+ENDPROC(sha_init)
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to