On Fri, 28 Nov 2025 06:01:26 GMT, Jiangli Zhou <[email protected]> wrote:

>> Please review the fix in StubGenerator::aesgcm_avx512 and 
>> StubGenerator::aesgcm_avx2 to handle some edge cases with input sizes that 
>> are not multiple of the block size. 
>> 
>> Thanks to Thomas Holenstein and Lukas Zobernig for analyzing the issue and 
>> providing the test case!
>
> Jiangli Zhou has updated the pull request incrementally with one additional 
> commit since the last revision:
> 
>   Change to break before operators.

Oh man, the `pos`/`len` modifications in current code are confusing. I 
scratched my head for quite a while trying to comprehend why does `__ 
bind(MESG_BELOW_32_BLKS)` split the `pos += 16` and `len -= 16`? On a surface, 
that just looks like a bug.

But looks that way because we do `initial_blocks_16_avx512` twice, do `pos += 
16` twice, but only do the `len += 32` after the second call. Which does not 
help if we shortcut after the first call. In fact, I am not at all sure that 
checking `len < 32` _without_ modifying `len` beforehand does not break the 
assumptions downstream:


  initial_blocks_16_avx512(in, out, ct, pos, key, avx512_subkeyHtbl, CTR_CHECK, 
rounds, CTR_BLOCKx, AAD_HASHx,  ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK, 
stack_offset);
  __ addl(pos, 16 * 16);
  __ cmpl(len, 32 * 16);
  __ jcc(Assembler::below, MESG_BELOW_32_BLKS);


Really, in these kind of intrinsics, _you want_ to make sure `pos` and `len` 
updates are tightly bound together, otherwise these kinds of mistakes would 
keep happening. You will lose on code density a bit, but would have more 
readable and robust code.

Shouldn't it be like this?


diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp 
b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
index 1e728ffa279..a16e25b075d 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
@@ -3475,12 +3475,14 @@ void StubGenerator::aesgcm_avx512(Register in, Register 
len, Register ct, Regist
 
   initial_blocks_16_avx512(in, out, ct, pos, key, avx512_subkeyHtbl, 
CTR_CHECK, rounds, CTR_BLOCKx, AAD_HASHx,  ADDBE_4x4, ADDBE_1234, ADD_1234, 
SHUF_MASK, stack_offset);
   __ addl(pos, 16 * 16);
+  __ subl(len, 16 * 16);
+
   __ cmpl(len, 32 * 16);
   __ jcc(Assembler::below, MESG_BELOW_32_BLKS);
 
   initial_blocks_16_avx512(in, out, ct, pos, key, avx512_subkeyHtbl, 
CTR_CHECK, rounds, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, 
SHUF_MASK, stack_offset + 16);
   __ addl(pos, 16 * 16);
-  __ subl(len, 32 * 16);
+  __ subl(len, 16 * 16);
 
   __ cmpl(len, 32 * 16);
   __ jcc(Assembler::below, NO_BIG_BLKS);
@@ -3491,24 +3493,27 @@ void StubGenerator::aesgcm_avx512(Register in, Register 
len, Register ct, Regist
   ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, 
CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, 
SHUF_MASK,
                                     true, true, false, false, false, 
ghashin_offset, aesout_offset, HashKey_32);
   __ addl(pos, 16 * 16);
+  __ subl(len, 16 * 16);
 
   ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, 
CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, 
SHUF_MASK,
                                     true, false, true, false, true, 
ghashin_offset + 16, aesout_offset + 16, HashKey_16);
   __ evmovdquq(AAD_HASHx, ZTMP4, Assembler::AVX_512bit);
   __ addl(pos, 16 * 16);
-  __ subl(len, 32 * 16);
+  __ subl(len, 16 * 16);
   __ jmp(ENCRYPT_BIG_BLKS_NO_HXOR);
 
   __ bind(ENCRYPT_BIG_NBLKS);
   ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, 
CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, 
SHUF_MASK,
                                     false, true, false, false, false, 
ghashin_offset, aesout_offset, HashKey_32);
   __ addl(pos, 16 * 16);
+  __ subl(len, 16 * 16);
+
   ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl, 
CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, 
SHUF_MASK,
                                     false, false, true, true, true, 
ghashin_offset + 16, aesout_offset + 16, HashKey_16);
 
   __ movdqu(AAD_HASHx, ZTMP4);
   __ addl(pos, 16 * 16);
-  __ subl(len, 32 * 16);
+  __ subl(len, 16 * 16);
 
   __ bind(NO_BIG_BLKS);
   __ cmpl(len, 16 * 16);
@@ -3525,9 +3530,9 @@ void StubGenerator::aesgcm_avx512(Register in, Register 
len, Register ct, Regist
 
   ghash16_avx512(false, true, false, false, true, in, pos, avx512_subkeyHtbl, 
AAD_HASHx, SHUF_MASK, stack_offset, 16 * 16, 0, HashKey_16);
   __ addl(pos, 16 * 16);
+  __ subl(len, 16 * 16);
 
   __ bind(MESG_BELOW_32_BLKS);
-  __ subl(len, 16 * 16);
   gcm_enc_dec_last_avx512(len, in, pos, AAD_HASHx, SHUF_MASK, 
avx512_subkeyHtbl, ghashin_offset, HashKey_16, true, true);
 
   __ bind(GHASH_DONE);

-------------

PR Review: https://git.openjdk.org/jdk/pull/28363#pullrequestreview-3518173513

Reply via email to