Re: [PATCH v2 3/4] gcm: move block shifting function to block-internal.h

2019-09-04 Thread Dmitry Eremin-Solenikov
ср, 4 сент. 2019 г. в 23:25, Niels Möller :
>
> dbarysh...@gmail.com writes:
>
> > From: Dmitry Eremin-Solenikov 
> >
> > Move GCM's block shift function to block-internal.h. This concludes
> > moving of all Galois mul-by-2 to single header.
>
> I've merged patch 1-3 to the master-updates branch. Thanks!

Thank you! What about gosthash v2 patches I've sent in July?


-- 
With best wishes
Dmitry
___
nettle-bugs mailing list
nettle-bugs@lists.lysator.liu.se
http://lists.lysator.liu.se/mailman/listinfo/nettle-bugs


Re: [PATCH v2 3/4] gcm: move block shifting function to block-internal.h

2019-09-04 Thread Niels Möller
dbarysh...@gmail.com writes:

> From: Dmitry Eremin-Solenikov 
>
> Move GCM's block shift function to block-internal.h. This concludes
> moving of all Galois mul-by-2 to single header.

I've merged patch 1-3 to the master-updates branch. Thanks!

Regards,
/Niels

-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list
nettle-bugs@lists.lysator.liu.se
http://lists.lysator.liu.se/mailman/listinfo/nettle-bugs


Re: [PATCH v2 4/4] gcm: drop intermediate GCM_TABLE_BITS == 4 case

2019-09-04 Thread Niels Möller
dbarysh...@gmail.com writes:

> It makes little sense to have intermediate solution with GCM_TABLE_BITS
> == 4. One either will use unoptimized case of GCM_TABLE_BITS == 0, or
> will switch to fully optimized case (8) as memory usage difference is
> quite low between 4 and 8. So drop GCM_TABLE_BITS == 4 support

For the const shift_table, the size difference is 32 bytes vs 512 bytes,
which may not be a big deal.

I'm more concerned with the size of struct gcm_key, 256 bytes vs 4096
bytes. GCM_TABLE_BITS == 4 seems like a reasonable tradeoff if you have an
embedded system with little RAM.

It's unfortunate that it is poorly tested. It would make some sense with
an (abi-breaking) configure flag --enable-small-size to reduce size of
various tables, which could then be tested more regularly. Besides gcm,
AES and DES use large constant tables of somewhat configurable size, and
for ecc there are both constant tables, for ecc_mul_g, and run-time
tables for ecc_mul_a.

Is there any interest in improving support for low-end embedded devices?

Regards,
/Niels

-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list
nettle-bugs@lists.lysator.liu.se
http://lists.lysator.liu.se/mailman/listinfo/nettle-bugs


[PATCH v2 2/4] block modes: move Galois shifts to block-internal.h

2019-09-04 Thread dbaryshkov
From: Dmitry Eremin-Solenikov 

Move Galois polynomial shifts to block-internal.h, simplifying common
code. GCM is left unconverted for now, this will be fixed later.

Signed-off-by: Dmitry Eremin-Solenikov 
---
 Makefile.in   |  2 +-
 block-internal.h  | 72 +++
 cmac-internal.h   | 54 ---
 cmac.c| 28 ++
 cmac64.c  | 27 ++
 eax.c | 18 ++--
 siv-cmac-aes128.c |  1 -
 siv-cmac-aes256.c |  1 -
 siv-cmac.c|  7 ++---
 xts.c | 34 --
 10 files changed, 87 insertions(+), 157 deletions(-)
 delete mode 100644 cmac-internal.h

diff --git a/Makefile.in b/Makefile.in
index f6658c86341c..ae9c8a7563f9 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -231,7 +231,7 @@ DISTFILES = $(SOURCES) $(HEADERS) getopt.h getopt_int.h \
nettle.pc.in hogweed.pc.in \
$(des_headers) descore.README desdata.stamp \
aes-internal.h block-internal.h \
-   camellia-internal.h cmac-internal.h serpent-internal.h \
+   camellia-internal.h serpent-internal.h \
cast128_sboxes.h desinfo.h desCode.h \
ripemd160-internal.h sha2-internal.h \
memxor-internal.h nettle-internal.h nettle-write.h \
diff --git a/block-internal.h b/block-internal.h
index ab3a6a79b8cb..8972d0ac2b5b 100644
--- a/block-internal.h
+++ b/block-internal.h
@@ -90,4 +90,76 @@ block8_xor_bytes (union nettle_block8 *r,
   memxor3 (r->b, x->b, bytes, 8);
 }
 
+/* Do a foreign-endianness shift of data */
+
+#define LSHIFT_ALIEN_UINT64(x) \
+   x) & UINT64_C(0x7f7f7f7f7f7f7f7f)) << 1) | \
+(((x) & UINT64_C(0x8080808080808080)) >> 15))
+
+/* Two typical defining polynoms */
+
+#define BLOCK16_POLY (UINT64_C(0x87))
+#define BLOCK8_POLY (UINT64_C(0x1b))
+
+/* Galois multiplications by 2:
+ * functions differ in shifting right or left, big- or little- endianness
+ * and by defining polynom.
+ * r == x is allowed. */
+
+#if WORDS_BIGENDIAN
+static inline void
+block16_mulx_be (union nettle_block16 *dst,
+const union nettle_block16 *src)
+{
+  uint64_t carry = src->u64[0] >> 63;
+  dst->u64[0] = (src->u64[0] << 1) | (src->u64[1] >> 63);
+  dst->u64[1] = (src->u64[1] << 1) ^ (BLOCK16_POLY & -carry);
+}
+
+static inline void
+block16_mulx_le (union nettle_block16 *dst,
+const union nettle_block16 *src)
+{
+  uint64_t carry = (src->u64[1] & 0x80) >> 7;
+  dst->u64[1] = LSHIFT_ALIEN_UINT64(src->u64[1]) | ((src->u64[0] & 0x80) << 
49);
+  dst->u64[0] = LSHIFT_ALIEN_UINT64(src->u64[0]) ^ ((BLOCK16_POLY << 56) & 
-carry);
+}
+
+static inline void
+block8_mulx_be (union nettle_block8 *dst,
+   const union nettle_block8 *src)
+{
+  uint64_t carry = src->u64 >> 63;
+
+  dst->u64 = (src->u64 << 1) ^ (BLOCK8_POLY & -carry);
+}
+#else /* !WORDS_BIGENDIAN */
+static inline void
+block16_mulx_be (union nettle_block16 *dst,
+const union nettle_block16 *src)
+{
+  uint64_t carry = (src->u64[0] & 0x80) >> 7;
+  dst->u64[0] = LSHIFT_ALIEN_UINT64(src->u64[0]) | ((src->u64[1] & 0x80) << 
49);
+  dst->u64[1] = LSHIFT_ALIEN_UINT64(src->u64[1]) ^ ((BLOCK16_POLY << 56) & 
-carry);
+}
+
+static inline void
+block16_mulx_le (union nettle_block16 *dst,
+const union nettle_block16 *src)
+{
+  uint64_t carry = src->u64[1] >> 63;
+  dst->u64[1] = (src->u64[1] << 1) | (src->u64[0] >> 63);
+  dst->u64[0] = (src->u64[0] << 1) ^ (BLOCK16_POLY & -carry);
+}
+
+static inline void
+block8_mulx_be (union nettle_block8 *dst,
+   const union nettle_block8 *src)
+{
+  uint64_t carry = (src->u64 & 0x80) >> 7;
+
+  dst->u64 = LSHIFT_ALIEN_UINT64(src->u64) ^ ((BLOCK8_POLY << 56) & -carry);
+}
+#endif /* !WORDS_BIGENDIAN */
+
 #endif /* NETTLE_BLOCK_INTERNAL_H_INCLUDED */
diff --git a/cmac-internal.h b/cmac-internal.h
deleted file mode 100644
index 80db7fcc58cd..
--- a/cmac-internal.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* cmac-internal.h
-
-   CMAC mode internal functions
-
-   Copyright (C) 2017 Red Hat, Inc.
-
-   Contributed by Nikos Mavrogiannopoulos
-
-   This file is part of GNU Nettle.
-
-   GNU Nettle is free software: you can redistribute it and/or
-   modify it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
-   Software Foundation; either version 3 of the License, or (at your
-   option) any later version.
-
-   or
-
- * the GNU General Public License as published by the Free
-   Software Foundation; either version 2 of the License, or (at your
-   option) any later version.
-
-   or both in parallel, as here.
-
-   GNU Nettle is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received copies of the GNU 

[PATCH v2 4/4] gcm: drop intermediate GCM_TABLE_BITS == 4 case

2019-09-04 Thread dbaryshkov
From: Dmitry Eremin-Solenikov 

It makes little sense to have intermediate solution with GCM_TABLE_BITS
== 4. One either will use unoptimized case of GCM_TABLE_BITS == 0, or
will switch to fully optimized case (8) as memory usage difference is
quite low between 4 and 8. So drop GCM_TABLE_BITS == 4 support

Signed-off-by: Dmitry Eremin-Solenikov 
---
 gcm.c | 67 +++
 1 file changed, 8 insertions(+), 59 deletions(-)

diff --git a/gcm.c b/gcm.c
index cf615daf18bd..3a6f04a7671b 100644
--- a/gcm.c
+++ b/gcm.c
@@ -83,62 +83,7 @@ gcm_gf_mul (union nettle_block16 *x, const union 
nettle_block16 *y)
 }
   memcpy (x->b, Z.b, sizeof(Z));
 }
-#else /* GCM_TABLE_BITS != 0 */
 
-# if WORDS_BIGENDIAN
-#  define W(left,right) (0x##left##right)
-# else
-#  define W(left,right) (0x##right##left)
-# endif
-
-# if GCM_TABLE_BITS == 4
-static const uint16_t
-shift_table[0x10] = {
-  W(00,00),W(1c,20),W(38,40),W(24,60),W(70,80),W(6c,a0),W(48,c0),W(54,e0),
-  W(e1,00),W(fd,20),W(d9,40),W(c5,60),W(91,80),W(8d,a0),W(a9,c0),W(b5,e0),
-};
-
-static void
-gcm_gf_shift_4(union nettle_block16 *x)
-{
-  uint64_t *u64 = x->u64;
-  uint64_t reduce;
-
-  /* Shift uses big-endian representation. */
-#if WORDS_BIGENDIAN
-  reduce = shift_table[u64[1] & 0xf];
-  u64[1] = (u64[1] >> 4) | ((u64[0] & 0xf) << 60);
-  u64[0] = (u64[0] >> 4) ^ (reduce << 48);
-#else /* ! WORDS_BIGENDIAN */
-#define RSHIFT_WORD_4(x) \
-  x) & UINT64_C(0xf0f0f0f0f0f0f0f0)) >> 4) \
-   | (((x) & UINT64_C(0x000f0f0f0f0f0f0f)) << 12))
-  reduce = shift_table[(u64[1] >> 56) & 0xf];
-  u64[1] = RSHIFT_WORD_4(u64[1]) | ((u64[0] >> 52) & 0xf0);
-  u64[0] = RSHIFT_WORD_4(u64[0]) ^ reduce;
-# undef RSHIFT_WORD_4
-#endif /* ! WORDS_BIGENDIAN */
-}
-
-static void
-gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *table)
-{
-  union nettle_block16 Z;
-  unsigned i;
-
-  memset(Z.b, 0, sizeof(Z));
-
-  for (i = GCM_BLOCK_SIZE; i-- > 0;)
-{
-  uint8_t b = x->b[i];
-
-  gcm_gf_shift_4();
-  block16_xor(, [b & 0xf]);
-  gcm_gf_shift_4();
-  block16_xor(, [b >> 4]);
-}
-  memcpy (x->b, Z.b, sizeof(Z));
-}
 # elif GCM_TABLE_BITS == 8
 #  if HAVE_NATIVE_gcm_hash8
 
@@ -147,6 +92,13 @@ void
 _nettle_gcm_hash8 (const struct gcm_key *key, union nettle_block16 *x,
   size_t length, const uint8_t *data);
 #  else /* !HAVE_NATIVE_gcm_hash8 */
+
+# if WORDS_BIGENDIAN
+#  define W(left,right) (0x##left##right)
+# else
+#  define W(left,right) (0x##right##left)
+# endif
+
 static const uint16_t
 shift_table[0x100] = {
   W(00,00),W(01,c2),W(03,84),W(02,46),W(07,08),W(06,ca),W(04,8c),W(05,4e),
@@ -182,6 +134,7 @@ shift_table[0x100] = {
   W(b5,e0),W(b4,22),W(b6,64),W(b7,a6),W(b2,e8),W(b3,2a),W(b1,6c),W(b0,ae),
   W(bb,f0),W(ba,32),W(b8,74),W(b9,b6),W(bc,f8),W(bd,3a),W(bf,7c),W(be,be),
 };
+#undef W
 
 static void
 gcm_gf_shift_8(union nettle_block16 *x)
@@ -221,10 +174,6 @@ gcm_gf_mul (union nettle_block16 *x, const union 
nettle_block16 *table)
 #  error Unsupported table size. 
 # endif /* GCM_TABLE_BITS != 8 */
 
-#undef W
-
-#endif /* GCM_TABLE_BITS */
-
 /* Increment the rightmost 32 bits. */
 #define INC32(block) INCREMENT(4, (block.b) + GCM_BLOCK_SIZE - 4)
 
-- 
2.23.0.rc1

___
nettle-bugs mailing list
nettle-bugs@lists.lysator.liu.se
http://lists.lysator.liu.se/mailman/listinfo/nettle-bugs


[PATCH v2 3/4] gcm: move block shifting function to block-internal.h

2019-09-04 Thread dbaryshkov
From: Dmitry Eremin-Solenikov 

Move GCM's block shift function to block-internal.h. This concludes
moving of all Galois mul-by-2 to single header.

Signed-off-by: Dmitry Eremin-Solenikov 
---
 block-internal.h | 30 +-
 gcm.c| 30 ++
 2 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/block-internal.h b/block-internal.h
index 8972d0ac2b5b..88e19be333c8 100644
--- a/block-internal.h
+++ b/block-internal.h
@@ -95,11 +95,15 @@ block8_xor_bytes (union nettle_block8 *r,
 #define LSHIFT_ALIEN_UINT64(x) \
x) & UINT64_C(0x7f7f7f7f7f7f7f7f)) << 1) | \
 (((x) & UINT64_C(0x8080808080808080)) >> 15))
+#define RSHIFT_ALIEN_UINT64(x) \
+   x) & UINT64_C(0xfefefefefefefefe)) >> 1) | \
+(((x) & UINT64_C(0x0001010101010101)) << 15))
 
 /* Two typical defining polynoms */
 
 #define BLOCK16_POLY (UINT64_C(0x87))
 #define BLOCK8_POLY (UINT64_C(0x1b))
+#define GHASH_POLY (UINT64_C(0xE1))
 
 /* Galois multiplications by 2:
  * functions differ in shifting right or left, big- or little- endianness
@@ -133,6 +137,18 @@ block8_mulx_be (union nettle_block8 *dst,
 
   dst->u64 = (src->u64 << 1) ^ (BLOCK8_POLY & -carry);
 }
+
+static inline void
+block16_mulx_ghash (union nettle_block16 *r,
+   const union nettle_block16 *x)
+{
+  uint64_t mask;
+
+  /* Shift uses big-endian representation. */
+  mask = - (x->u64[1] & 1);
+  r->u64[1] = (x->u64[1] >> 1) | ((x->u64[0] & 1) << 63);
+  r->u64[0] = (x->u64[0] >> 1) ^ (mask & (GHASH_POLY << 56));
+}
 #else /* !WORDS_BIGENDIAN */
 static inline void
 block16_mulx_be (union nettle_block16 *dst,
@@ -160,6 +176,18 @@ block8_mulx_be (union nettle_block8 *dst,
 
   dst->u64 = LSHIFT_ALIEN_UINT64(src->u64) ^ ((BLOCK8_POLY << 56) & -carry);
 }
-#endif /* !WORDS_BIGENDIAN */
+
+static inline void
+block16_mulx_ghash (union nettle_block16 *r,
+   const union nettle_block16 *x)
+{
+  uint64_t mask;
+
+  /* Shift uses big-endian representation. */
+  mask = - ((x->u64[1] >> 56) & 1);
+  r->u64[1] = RSHIFT_ALIEN_UINT64(x->u64[1]) | ((x->u64[0] >> 49) & 0x80);
+  r->u64[0] = RSHIFT_ALIEN_UINT64(x->u64[0]) ^ (mask & GHASH_POLY);
+}
+#endif /* ! WORDS_BIGENDIAN */
 
 #endif /* NETTLE_BLOCK_INTERNAL_H_INCLUDED */
diff --git a/gcm.c b/gcm.c
index 4a04a0a10842..cf615daf18bd 100644
--- a/gcm.c
+++ b/gcm.c
@@ -55,32 +55,6 @@
 #include "ctr-internal.h"
 #include "block-internal.h"
 
-#define GHASH_POLYNOMIAL 0xE1UL
-
-/* Multiplication by 010...0; a big-endian shift right. If the bit
-   shifted out is one, the defining polynomial is added to cancel it
-   out. r == x is allowed. */
-static void
-gcm_gf_shift (union nettle_block16 *r, const union nettle_block16 *x)
-{
-  uint64_t mask;
-
-  /* Shift uses big-endian representation. */
-#if WORDS_BIGENDIAN
-  mask = - (x->u64[1] & 1);
-  r->u64[1] = (x->u64[1] >> 1) | ((x->u64[0] & 1) << 63);
-  r->u64[0] = (x->u64[0] >> 1) ^ (mask & ((uint64_t) GHASH_POLYNOMIAL << 56));
-#else /* ! WORDS_BIGENDIAN */
-#define RSHIFT_WORD(x) \
-  x) & 0xfefefefefefefefeUL) >> 1) \
-   | (((x) & 0x0001010101010101UL) << 15))
-  mask = - ((x->u64[1] >> 56) & 1);
-  r->u64[1] = RSHIFT_WORD(x->u64[1]) | ((x->u64[0] >> 49) & 0x80);
-  r->u64[0] = RSHIFT_WORD(x->u64[0]) ^ (mask & GHASH_POLYNOMIAL);
-# undef RSHIFT_WORD
-#endif /* ! WORDS_BIGENDIAN */
-}
-
 #if GCM_TABLE_BITS == 0
 /* Sets x <- x * y mod r, using the plain bitwise algorithm from the
specification. y may be shorter than a full block, missing bytes
@@ -104,7 +78,7 @@ gcm_gf_mul (union nettle_block16 *x, const union 
nettle_block16 *y)
  if (b & 0x80)
block16_xor(, );
  
- gcm_gf_shift(, );
+ block16_mulx_ghash(, );
}
 }
   memcpy (x->b, Z.b, sizeof(Z));
@@ -275,7 +249,7 @@ gcm_set_key(struct gcm_key *key,
   /* Algorithm 3 from the gcm paper. First do powers of two, then do
  the rest by adding. */
   while (i /= 2)
-gcm_gf_shift(>h[i], >h[2*i]);
+block16_mulx_ghash(>h[i], >h[2*i]);
   for (i = 2; i < 1

[PATCH v2 1/4] block-internal: add block XORing functions

2019-09-04 Thread dbaryshkov
From: Dmitry Eremin-Solenikov 

Add common implementations for functions doing XOR over
nettle_block16/nettle_block8.

Signed-off-by: Dmitry Eremin-Solenikov 
---
 Makefile.in  |  3 +-
 block-internal.h | 93 
 cmac.c   | 11 +++---
 cmac64.c | 12 +++
 eax.c|  9 +
 gcm.c| 20 ---
 siv-cmac.c   |  9 ++---
 7 files changed, 120 insertions(+), 37 deletions(-)
 create mode 100644 block-internal.h

diff --git a/Makefile.in b/Makefile.in
index af4f6e46ee9b..f6658c86341c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -230,7 +230,8 @@ DISTFILES = $(SOURCES) $(HEADERS) getopt.h getopt_int.h \
INSTALL NEWS ChangeLog \
nettle.pc.in hogweed.pc.in \
$(des_headers) descore.README desdata.stamp \
-   aes-internal.h camellia-internal.h cmac-internal.h serpent-internal.h \
+   aes-internal.h block-internal.h \
+   camellia-internal.h cmac-internal.h serpent-internal.h \
cast128_sboxes.h desinfo.h desCode.h \
ripemd160-internal.h sha2-internal.h \
memxor-internal.h nettle-internal.h nettle-write.h \
diff --git a/block-internal.h b/block-internal.h
new file mode 100644
index ..ab3a6a79b8cb
--- /dev/null
+++ b/block-internal.h
@@ -0,0 +1,93 @@
+/* block-internal.h
+
+   Internal implementations of nettle_blockZ-related functions.
+
+   Copyright (C) 2011 Katholieke Universiteit Leuven
+   Copyright (C) 2011, 2013, 2018 Niels Möller
+   Copyright (C) 2018 Red Hat, Inc.
+   Copyright (C) 2019 Dmitry Eremin-Solenikov
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your
+   option) any later version.
+
+   or
+
+ * the GNU General Public License as published by the Free
+   Software Foundation; either version 2 of the License, or (at your
+   option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef NETTLE_BLOCK_INTERNAL_H_INCLUDED
+#define NETTLE_BLOCK_INTERNAL_H_INCLUDED
+
+#include 
+
+#include "nettle-types.h"
+#include "memxor.h"
+
+static inline void
+block16_xor (union nettle_block16 *r,
+const union nettle_block16 *x)
+{
+  r->u64[0] ^= x->u64[0];
+  r->u64[1] ^= x->u64[1];
+}
+
+static inline void
+block16_xor3 (union nettle_block16 *r,
+ const union nettle_block16 *x,
+ const union nettle_block16 *y)
+{
+  r->u64[0] = x->u64[0] ^ y->u64[0];
+  r->u64[1] = x->u64[1] ^ y->u64[1];
+}
+
+static inline void
+block16_xor_bytes (union nettle_block16 *r,
+  const union nettle_block16 *x,
+  const uint8_t *bytes)
+{
+  memxor3 (r->b, x->b, bytes, 16);
+}
+
+static inline void
+block8_xor (union nettle_block8 *r,
+   const union nettle_block8 *x)
+{
+  r->u64 ^= x->u64;
+}
+
+static inline void
+block8_xor3 (union nettle_block8 *r,
+const union nettle_block8 *x,
+const union nettle_block8 *y)
+{
+  r->u64 = x->u64 ^ y->u64;
+}
+
+static inline void
+block8_xor_bytes (union nettle_block8 *r,
+ const union nettle_block8 *x,
+ const uint8_t *bytes)
+{
+  memxor3 (r->b, x->b, bytes, 8);
+}
+
+#endif /* NETTLE_BLOCK_INTERNAL_H_INCLUDED */
diff --git a/cmac.c b/cmac.c
index 70ce8132d9d1..194324421c58 100644
--- a/cmac.c
+++ b/cmac.c
@@ -45,6 +45,7 @@
 #include "memxor.h"
 #include "nettle-internal.h"
 #include "cmac-internal.h"
+#include "block-internal.h"
 #include "macros.h"
 
 /* shift one and XOR with 0x87. */
@@ -119,12 +120,12 @@ cmac128_update(struct cmac128_ctx *ctx, const void 
*cipher,
   /*
* now checksum everything but the last block
*/
-  memxor3(Y.b, ctx->X.b, ctx->block.b, 16);
+  block16_xor3(, >X, >block);
   encrypt(cipher, 16, ctx->X.b, Y.b);
 
   while (msg_len > 16)
 {
-  memxor3(Y.b, ctx->X.b, msg, 16);
+  block16_xor_bytes (, >X, msg);
   encrypt(cipher, 16, ctx->X.b, Y.b);
   msg += 16;
   msg_len -= 16;
@@ -151,14 +152,14 @@ cmac128_digest(struct cmac128_ctx *ctx, const struct 
cmac128_key *key,
   ctx->block.b[ctx->index] = 0x80;
   memset(ctx->block.b + ctx->index + 1, 0, 16 - 1 - ctx->index);
 
-  memxor(ctx->block.b, key->K2.b, 16);
+  block16_xor (>block, >K2);
 }
   else
 {
-  memxor(ctx->block.b, key->K1.b, 16);
+  block16_xor