Re: [PATCH 4/5] Glue code for optmized Poly1305 implementation for ppc64le.

2023-04-25 Thread Danny Tsen

Did not notice that.  Will do fix it.

Thanks.

-Danny

On 4/25/23 12:44 AM, Herbert Xu wrote:

On Mon, Apr 24, 2023 at 02:47:25PM -0400, Danny Tsen wrote:

+   if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
+   bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
+   used = crypto_poly1305_setdctxkey(dctx, src, bytes);
+   if (likely(used)) {
+   srclen -= used;
+   src += used;
+   }
+   if (srclen >= POLY1305_BLOCK_SIZE*4) {
+   vsx_begin();

Your chacha code has a SIMD-fallback, how come this one doesn't?

Thanks,


Re: [PATCH 4/5] Glue code for optmized Poly1305 implementation for ppc64le.

2023-04-24 Thread Herbert Xu
On Mon, Apr 24, 2023 at 02:47:25PM -0400, Danny Tsen wrote:
>
> + if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
> + bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
> + used = crypto_poly1305_setdctxkey(dctx, src, bytes);
> + if (likely(used)) {
> + srclen -= used;
> + src += used;
> + }
> + if (srclen >= POLY1305_BLOCK_SIZE*4) {
> + vsx_begin();

Your chacha code has a SIMD-fallback, how come this one doesn't?

Thanks,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


[PATCH 4/5] Glue code for optmized Poly1305 implementation for ppc64le.

2023-04-24 Thread Danny Tsen
Signed-off-by: Danny Tsen 
---
 arch/powerpc/crypto/poly1305-p10-glue.c | 186 
 1 file changed, 186 insertions(+)
 create mode 100644 arch/powerpc/crypto/poly1305-p10-glue.c

diff --git a/arch/powerpc/crypto/poly1305-p10-glue.c 
b/arch/powerpc/crypto/poly1305-p10-glue.c
new file mode 100644
index ..b1800f7b6af8
--- /dev/null
+++ b/arch/powerpc/crypto/poly1305-p10-glue.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Poly1305 authenticator algorithm, RFC7539.
+ *
+ * Copyright 2023- IBM Inc. All rights reserved.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+asmlinkage void poly1305_p10le_4blocks(void *h, const u8 *m, u32 mlen);
+asmlinkage void poly1305_64s(void *h, const u8 *m, u32 mlen, int highbit);
+asmlinkage void poly1305_emit_64(void *h, void *s, u8 *dst);
+
+static void vsx_begin(void)
+{
+   preempt_disable();
+   enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+   disable_kernel_vsx();
+   preempt_enable();
+}
+
+static int crypto_poly1305_p10_init(struct shash_desc *desc)
+{
+   struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+   poly1305_core_init(>h);
+   dctx->buflen = 0;
+   dctx->rset = 0;
+   dctx->sset = false;
+
+   return 0;
+}
+
+static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
+  const u8 *inp, unsigned int len)
+{
+   unsigned int acc = 0;
+
+   if (unlikely(!dctx->sset)) {
+   if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) {
+   struct poly1305_core_key *key = >core_r;
+
+   key->key.r64[0] = get_unaligned_le64([0]);
+   key->key.r64[1] = get_unaligned_le64([8]);
+   inp += POLY1305_BLOCK_SIZE;
+   len -= POLY1305_BLOCK_SIZE;
+   acc += POLY1305_BLOCK_SIZE;
+   dctx->rset = 1;
+   }
+   if (len >= POLY1305_BLOCK_SIZE) {
+   dctx->s[0] = get_unaligned_le32([0]);
+   dctx->s[1] = get_unaligned_le32([4]);
+   dctx->s[2] = get_unaligned_le32([8]);
+   dctx->s[3] = get_unaligned_le32([12]);
+   acc += POLY1305_BLOCK_SIZE;
+   dctx->sset = true;
+   }
+   }
+   return acc;
+}
+
+static int crypto_poly1305_p10_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+   struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+   unsigned int bytes, used;
+
+   if (unlikely(dctx->buflen)) {
+   bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
+   memcpy(dctx->buf + dctx->buflen, src, bytes);
+   src += bytes;
+   srclen -= bytes;
+   dctx->buflen += bytes;
+
+   if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+   if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf,
+  
POLY1305_BLOCK_SIZE))) {
+   vsx_begin();
+   poly1305_64s(>h, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1);
+   vsx_end();
+   }
+   dctx->buflen = 0;
+   }
+   }
+
+   if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
+   bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
+   used = crypto_poly1305_setdctxkey(dctx, src, bytes);
+   if (likely(used)) {
+   srclen -= used;
+   src += used;
+   }
+   if (srclen >= POLY1305_BLOCK_SIZE*4) {
+   vsx_begin();
+   poly1305_p10le_4blocks(>h, src, srclen);
+   vsx_end();
+   src += srclen - (srclen % (POLY1305_BLOCK_SIZE * 4));
+   srclen %= POLY1305_BLOCK_SIZE * 4;
+   }
+   while (srclen >= POLY1305_BLOCK_SIZE) {
+   vsx_begin();
+   poly1305_64s(>h, src, POLY1305_BLOCK_SIZE, 1);
+   vsx_end();
+   srclen -= POLY1305_BLOCK_SIZE;
+   src += POLY1305_BLOCK_SIZE;
+   }
+   }
+
+   if (unlikely(srclen)) {
+   dctx->buflen = srclen;
+   memcpy(dctx->buf, src, srclen);
+   }
+
+   return 0;
+}
+
+static int crypto_poly1305_p10_final(struct shash_desc *desc, u8 *dst)
+{
+   struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+   if (unlikely(!dctx->sset))
+   return -ENOKEY;
+
+   if ((dctx->buflen)) {
+