Hi Eric,

On 4 October 2018 at 06:07, Eric Biggers <ebigg...@kernel.org> wrote:
> From: Eric Biggers <ebigg...@google.com>
>
> The generic constant-time AES implementation is supposed to preload the
> AES S-box into the CPU's L1 data cache.  But, an interrupt handler can
> run on the CPU and muck with the cache.  Worse, on preemptible kernels
> the process can even be preempted and moved to a different CPU.  So the
> implementation may actually still be vulnerable to cache-timing attacks.
>
> Make it more robust by disabling interrupts while the sbox is used.
>
> In some quick tests on x86 and ARM, this doesn't affect performance
> significantly.  Responsiveness is also a concern, but interrupts are
> only disabled for a single AES block which even on ARM Cortex-A7 is
> "only" ~1500 cycles to encrypt or ~2600 cycles to decrypt.
>

I share your concern, but that is quite a big hammer.

Also, does it really take ~100 cycles per byte? That is terrible :-)

Given that the full lookup table is only 1024 bytes (or 1024+256 bytes
for decryption), I wonder if something like the below would be a
better option for A7 (apologies for the mangled whitespace)

diff --git a/arch/arm/crypto/aes-cipher-core.S
b/arch/arm/crypto/aes-cipher-core.S
index 184d6c2d15d5..83e893f7e581 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -139,6 +139,13 @@

  __adrl ttab, \ttab

+ .irpc r, 01234567
+ ldr r8, [ttab, #(32 * \r)]
+ ldr r9, [ttab, #(32 * \r) + 256]
+ ldr r10, [ttab, #(32 * \r) + 512]
+ ldr r11, [ttab, #(32 * \r) + 768]
+ .endr
+
  tst rounds, #2
  bne 1f

@@ -180,6 +187,12 @@ ENDPROC(__aes_arm_encrypt)

  .align 5
 ENTRY(__aes_arm_decrypt)
+ __adrl ttab, __aes_arm_inverse_sbox
+
+ .irpc r, 01234567
+ ldr r8, [ttab, #(32 * \r)]
+ .endr
+
  do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
 ENDPROC(__aes_arm_decrypt)

diff --git a/arch/arm/crypto/aes-cipher-glue.c
b/arch/arm/crypto/aes-cipher-glue.c
index c222f6e072ad..630e1a436f1d 100644
--- a/arch/arm/crypto/aes-cipher-glue.c
+++ b/arch/arm/crypto/aes-cipher-glue.c
@@ -23,16 +23,22 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8
*out, const u8 *in)
 {
  struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
  int rounds = 6 + ctx->key_length / 4;
+ unsigned long flags;

+ local_irq_save(flags);
  __aes_arm_encrypt(ctx->key_enc, rounds, in, out);
+ local_irq_restore(flags);
 }

 static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
  struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
  int rounds = 6 + ctx->key_length / 4;
+ unsigned long flags;

+ local_irq_save(flags);
  __aes_arm_decrypt(ctx->key_dec, rounds, in, out);
+ local_irq_restore(flags);
 }

 static struct crypto_alg aes_alg = {
diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index ca554d57d01e..82fa860c9cb9 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -63,7 +63,7 @@ static inline u8 byte(const u32 x, const unsigned n)

 static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 };

-__visible const u32 crypto_ft_tab[4][256] = {
+__visible const u32 crypto_ft_tab[4][256] __cacheline_aligned = {
  {
  0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6,
  0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
@@ -327,7 +327,7 @@ __visible const u32 crypto_ft_tab[4][256] = {
  }
 };

-__visible const u32 crypto_fl_tab[4][256] = {
+__visible const u32 crypto_fl_tab[4][256] __cacheline_aligned = {
  {
  0x00000063, 0x0000007c, 0x00000077, 0x0000007b,
  0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5,
@@ -591,7 +591,7 @@ __visible const u32 crypto_fl_tab[4][256] = {
  }
 };

-__visible const u32 crypto_it_tab[4][256] = {
+__visible const u32 crypto_it_tab[4][256] __cacheline_aligned = {
  {
  0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a,
  0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
@@ -855,7 +855,7 @@ __visible const u32 crypto_it_tab[4][256] = {
  }
 };

-__visible const u32 crypto_il_tab[4][256] = {
+__visible const u32 crypto_il_tab[4][256] __cacheline_aligned = {
  {
  0x00000052, 0x00000009, 0x0000006a, 0x000000d5,
  0x00000030, 0x00000036, 0x000000a5, 0x00000038,






> Fixes: b5e0b032b6c3 ("crypto: aes - add generic time invariant AES cipher")
> Signed-off-by: Eric Biggers <ebigg...@google.com>
> ---
>  crypto/aes_ti.c | 18 ++++++++++++++++++
>  1 file changed, 18 insertions(+)
>
> diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
> index 03023b2290e8e..81b604419ee0e 100644
> --- a/crypto/aes_ti.c
> +++ b/crypto/aes_ti.c
> @@ -269,6 +269,7 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 
> *out, const u8 *in)
>         const u32 *rkp = ctx->key_enc + 4;
>         int rounds = 6 + ctx->key_length / 4;
>         u32 st0[4], st1[4];
> +       unsigned long flags;
>         int round;
>
>         st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
> @@ -276,6 +277,12 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 
> *out, const u8 *in)
>         st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
>         st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
>
> +       /*
> +        * Disable interrupts (including preemption) while the sbox is loaded
> +        * into L1 cache and used for encryption on this CPU.
> +        */
> +       local_irq_save(flags);
> +
>         st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
>         st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
>         st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
> @@ -300,6 +307,8 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 
> *out, const u8 *in)
>         put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
>         put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
>         put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
> +
> +       local_irq_restore(flags);
>  }
>
>  static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
> @@ -308,6 +317,7 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 
> *out, const u8 *in)
>         const u32 *rkp = ctx->key_dec + 4;
>         int rounds = 6 + ctx->key_length / 4;
>         u32 st0[4], st1[4];
> +       unsigned long flags;
>         int round;
>
>         st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
> @@ -315,6 +325,12 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 
> *out, const u8 *in)
>         st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
>         st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
>
> +       /*
> +        * Disable interrupts (including preemption) while the sbox is loaded
> +        * into L1 cache and used for decryption on this CPU.
> +        */
> +       local_irq_save(flags);
> +
>         st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
>         st0[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
>         st0[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
> @@ -339,6 +355,8 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 
> *out, const u8 *in)
>         put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
>         put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
>         put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
> +
> +       local_irq_restore(flags);
>  }
>
>  static struct crypto_alg aes_alg = {
> --
> 2.19.0
>

Reply via email to