https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98855

Martin Liška <marxin at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
     Ever confirmed|0                           |1
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2021-01-29

--- Comment #3 from Martin Liška <marxin at gcc dot gnu.org> ---
So it's still there after the fix for PR98845.

I briefly looked at src/lib/block/xtea/xtea.cpp:

one can isolate the one problematic SLP:
-fdbg-cnt=vect_slp:4-4

build/include/botan/loadstor.h:470:15: note: Basic block will be vectorized
using SLP
build/include/botan/loadstor.h:470:15: note: Vectorizing SLP tree:
build/include/botan/loadstor.h:470:15: note: node 0x2cabac0 (max_nunits=4,
refcnt=1)
build/include/botan/loadstor.h:470:15: note: op template: MEM <unsigned int>
[(char * {ref-all})_66] = _133;
build/include/botan/loadstor.h:470:15: note:    stmt 0 MEM <unsigned int>
[(char * {ref-all})_66] = _133;
build/include/botan/loadstor.h:470:15: note:    stmt 1 MEM <unsigned int>
[(char * {ref-all})_66 + 4B] = _134;
build/include/botan/loadstor.h:470:15: note:    stmt 2 MEM <unsigned int>
[(char * {ref-all})_66 + 8B] = _135;
build/include/botan/loadstor.h:470:15: note:    stmt 3 MEM <unsigned int>
[(char * {ref-all})_66 + 12B] = _136;
build/include/botan/loadstor.h:470:15: note:    stmt 4 MEM <unsigned int>
[(char * {ref-all})_66 + 16B] = _137;
build/include/botan/loadstor.h:470:15: note:    stmt 5 MEM <unsigned int>
[(char * {ref-all})_66 + 20B] = _138;
build/include/botan/loadstor.h:470:15: note:    stmt 6 MEM <unsigned int>
[(char * {ref-all})_66 + 24B] = _139;
build/include/botan/loadstor.h:470:15: note:    stmt 7 MEM <unsigned int>
[(char * {ref-all})_66 + 28B] = _140;
build/include/botan/loadstor.h:470:15: note:    children 0x2cabb40
build/include/botan/loadstor.h:470:15: note: node 0x2cabb40 (max_nunits=4,
refcnt=1)
build/include/botan/loadstor.h:470:15: note: op template: _133 =
__builtin_bswap32 (_92);
build/include/botan/loadstor.h:470:15: note:    stmt 0 _133 = __builtin_bswap32
(_92);
build/include/botan/loadstor.h:470:15: note:    stmt 1 _134 = __builtin_bswap32
(_592);
build/include/botan/loadstor.h:470:15: note:    stmt 2 _135 = __builtin_bswap32
(_90);
build/include/botan/loadstor.h:470:15: note:    stmt 3 _136 = __builtin_bswap32
(_591);
build/include/botan/loadstor.h:470:15: note:    stmt 4 _137 = __builtin_bswap32
(_594);
build/include/botan/loadstor.h:470:15: note:    stmt 5 _138 = __builtin_bswap32
(_590);
build/include/botan/loadstor.h:470:15: note:    stmt 6 _139 = __builtin_bswap32
(_593);
build/include/botan/loadstor.h:470:15: note:    stmt 7 _140 = __builtin_bswap32
(_589);
build/include/botan/loadstor.h:470:15: note:    children 0x2cabbc0
build/include/botan/loadstor.h:470:15: note: node 0x2cabbc0 (max_nunits=4,
refcnt=1)
build/include/botan/loadstor.h:470:15: note: op template: _92 = PHI <_14(17)>
build/include/botan/loadstor.h:470:15: note:    stmt 0 _92 = PHI <_14(17)>
build/include/botan/loadstor.h:470:15: note:    stmt 1 _592 = PHI <_46(17)>
build/include/botan/loadstor.h:470:15: note:    stmt 2 _90 = PHI <_23(17)>
build/include/botan/loadstor.h:470:15: note:    stmt 3 _591 = PHI <_52(17)>
build/include/botan/loadstor.h:470:15: note:    stmt 4 _594 = PHI <_31(17)>
build/include/botan/loadstor.h:470:15: note:    stmt 5 _590 = PHI <_58(17)>
build/include/botan/loadstor.h:470:15: note:    stmt 6 _593 = PHI <_37(17)>
build/include/botan/loadstor.h:470:15: note:    stmt 7 _589 = PHI <_64(17)>
build/include/botan/loadstor.h:470:15: note:    children 0x2cabc40
build/include/botan/loadstor.h:470:15: note: node 0x2cabc40 (max_nunits=4,
refcnt=2)
build/include/botan/loadstor.h:470:15: note: op template: _14 = _13 + L0_224;
build/include/botan/loadstor.h:470:15: note:    stmt 0 _14 = _13 + L0_224;
build/include/botan/loadstor.h:470:15: note:    stmt 1 _46 = _45 + R0_225;
build/include/botan/loadstor.h:470:15: note:    stmt 2 _23 = _22 + L1_226;
build/include/botan/loadstor.h:470:15: note:    stmt 3 _52 = _51 + R1_227;
build/include/botan/loadstor.h:470:15: note:    stmt 4 _31 = _30 + L2_228;
build/include/botan/loadstor.h:470:15: note:    stmt 5 _58 = _57 + R2_229;
build/include/botan/loadstor.h:470:15: note:    stmt 6 _37 = _36 + L3_230;
build/include/botan/loadstor.h:470:15: note:    stmt 7 _64 = _63 + R3_231;
build/include/botan/loadstor.h:470:15: note:    children 0x2cabcc0 0x2cabdc0
build/include/botan/loadstor.h:470:15: note: node 0x2cabcc0 (max_nunits=4,
refcnt=1)
build/include/botan/loadstor.h:470:15: note: op template: _13 = _9 ^ _12;
build/include/botan/loadstor.h:470:15: note:    stmt 0 _13 = _9 ^ _12;
build/include/botan/loadstor.h:470:15: note:    stmt 1 _45 = _41 ^ _44;
build/include/botan/loadstor.h:470:15: note:    stmt 2 _22 = _12 ^ _18;
build/include/botan/loadstor.h:470:15: note:    stmt 3 _51 = _44 ^ _50;
build/include/botan/loadstor.h:470:15: note:    stmt 4 _30 = _12 ^ _27;
build/include/botan/loadstor.h:470:15: note:    stmt 5 _57 = _44 ^ _56;
build/include/botan/loadstor.h:470:15: note:    stmt 6 _36 = _12 ^ _35;
build/include/botan/loadstor.h:470:15: note:    stmt 7 _63 = _44 ^ _62;
build/include/botan/loadstor.h:470:15: note:    children 0x2cabd40 0x2cabfc0
build/include/botan/loadstor.h:470:15: note: node (external) 0x2cabd40
(max_nunits=1, refcnt=1)
build/include/botan/loadstor.h:470:15: note:    { _9, _41, _18, _50, _27, _56,
_35, _62 }
build/include/botan/loadstor.h:470:15: note: node (external) 0x2cabfc0
(max_nunits=4, refcnt=1)
build/include/botan/loadstor.h:470:15: note:    stmt 0 _12 = *_11;
build/include/botan/loadstor.h:470:15: note:    stmt 1 _44 = *_43;
build/include/botan/loadstor.h:470:15: note:    stmt 2 _12 = *_11;
build/include/botan/loadstor.h:470:15: note:    stmt 3 _44 = *_43;
build/include/botan/loadstor.h:470:15: note:    stmt 4 _12 = *_11;
build/include/botan/loadstor.h:470:15: note:    stmt 5 _44 = *_43;
build/include/botan/loadstor.h:470:15: note:    stmt 6 _12 = *_11;
build/include/botan/loadstor.h:470:15: note:    stmt 7 _44 = *_43;
build/include/botan/loadstor.h:470:15: note: node 0x2cabdc0 (max_nunits=4,
refcnt=1)
build/include/botan/loadstor.h:470:15: note: op template: L0_224 = PHI
<_14(28), _118(16)>
build/include/botan/loadstor.h:470:15: note:    stmt 0 L0_224 = PHI <_14(28),
_118(16)>
build/include/botan/loadstor.h:470:15: note:    stmt 1 R0_225 = PHI <_46(28),
_120(16)>
build/include/botan/loadstor.h:470:15: note:    stmt 2 L1_226 = PHI <_23(28),
_122(16)>
build/include/botan/loadstor.h:470:15: note:    stmt 3 R1_227 = PHI <_52(28),
_124(16)>
build/include/botan/loadstor.h:470:15: note:    stmt 4 L2_228 = PHI <_31(28),
_126(16)>
build/include/botan/loadstor.h:470:15: note:    stmt 5 R2_229 = PHI <_58(28),
_128(16)>
build/include/botan/loadstor.h:470:15: note:    stmt 6 L3_230 = PHI <_37(28),
_130(16)>
build/include/botan/loadstor.h:470:15: note:    stmt 7 R3_231 = PHI <_64(28),
_132(16)>
build/include/botan/loadstor.h:470:15: note:    children 0x2cabc40 0x2cac040
build/include/botan/loadstor.h:470:15: note: node 0x2cac040 (max_nunits=4,
refcnt=1)
build/include/botan/loadstor.h:470:15: note: op template: _118 =
__builtin_bswap32 (_117);
build/include/botan/loadstor.h:470:15: note:    stmt 0 _118 = __builtin_bswap32
(_117);
build/include/botan/loadstor.h:470:15: note:    stmt 1 _120 = __builtin_bswap32
(_119);
build/include/botan/loadstor.h:470:15: note:    stmt 2 _122 = __builtin_bswap32
(_121);
build/include/botan/loadstor.h:470:15: note:    stmt 3 _124 = __builtin_bswap32
(_123);
build/include/botan/loadstor.h:470:15: note:    stmt 4 _126 = __builtin_bswap32
(_125);
build/include/botan/loadstor.h:470:15: note:    stmt 5 _128 = __builtin_bswap32
(_127);
build/include/botan/loadstor.h:470:15: note:    stmt 6 _130 = __builtin_bswap32
(_129);
build/include/botan/loadstor.h:470:15: note:    stmt 7 _132 = __builtin_bswap32
(_131);
build/include/botan/loadstor.h:470:15: note:    children 0x2cac0c0
build/include/botan/loadstor.h:470:15: note: node 0x2cac0c0 (max_nunits=4,
refcnt=1)
build/include/botan/loadstor.h:470:15: note: op template: _117 = MEM <unsigned
int> [(char * {ref-all})_5];
build/include/botan/loadstor.h:470:15: note:    stmt 0 _117 = MEM <unsigned
int> [(char * {ref-all})_5];
build/include/botan/loadstor.h:470:15: note:    stmt 1 _119 = MEM <unsigned
int> [(char * {ref-all})_5 + 4B];
build/include/botan/loadstor.h:470:15: note:    stmt 2 _121 = MEM <unsigned
int> [(char * {ref-all})_5 + 8B];
build/include/botan/loadstor.h:470:15: note:    stmt 3 _123 = MEM <unsigned
int> [(char * {ref-all})_5 + 12B];
build/include/botan/loadstor.h:470:15: note:    stmt 4 _125 = MEM <unsigned
int> [(char * {ref-all})_5 + 16B];
build/include/botan/loadstor.h:470:15: note:    stmt 5 _127 = MEM <unsigned
int> [(char * {ref-all})_5 + 20B];
build/include/botan/loadstor.h:470:15: note:    stmt 6 _129 = MEM <unsigned
int> [(char * {ref-all})_5 + 24B];
build/include/botan/loadstor.h:470:15: note:    stmt 7 _131 = MEM <unsigned
int> [(char * {ref-all})_5 + 28B];
build/include/botan/loadstor.h:470:15: note: ------>vectorizing SLP node
starting from: _13 = _9 ^ _12;
build/include/botan/loadstor.h:470:15: note: transform binary/unary operation.
build/include/botan/loadstor.h:470:15: note: add new stmt: vect__13.606_578 =
_580 ^ _582;
build/include/botan/loadstor.h:470:15: note: add new stmt: vect__13.606_577 =
_579 ^ _581;
build/include/botan/loadstor.h:470:15: note: ------>vectorizing SLP node
starting from: _117 = MEM <unsigned int> [(char * {ref-all})_5];
build/include/botan/loadstor.h:470:15: note: transform load. ncopies = 1
build/include/botan/loadstor.h:470:15: note: create vector_type-pointer
variable to type: vector(4) unsigned int  vectorizing a pointer ref: MEM
<unsigned int> [(char * {ref-all})_5]
build/include/botan/loadstor.h:470:15: note: created vectp.608_575
build/include/botan/loadstor.h:470:15: note: add new stmt: vect__117.609_574 =
MEM <vector(4) unsigned int> [(char * {ref-all})vectp.608_575];
build/include/botan/loadstor.h:470:15: note: add new stmt: vectp.608_573 =
vectp.608_575 + 16;
build/include/botan/loadstor.h:470:15: note: add new stmt: vect__117.610_572 =
MEM <vector(4) unsigned int> [(char * {ref-all})vectp.608_573];
build/include/botan/loadstor.h:470:15: note: ------>vectorizing SLP node
starting from: _118 = __builtin_bswap32 (_117);
build/include/botan/loadstor.h:470:15: note: vect_is_simple_use: operand MEM
<unsigned int> [(char * {ref-all})_5], type of def: internal
build/include/botan/loadstor.h:470:15: note: add new stmt: _571 =
VIEW_CONVERT_EXPR<vector(16) char>(vect__117.609_574);
build/include/botan/loadstor.h:470:15: note: add new stmt: _570 = VEC_PERM_EXPR
<_571, _571, { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }>;
build/include/botan/loadstor.h:470:15: note: add new stmt: _569 =
VIEW_CONVERT_EXPR<vector(4) unsigned int>(_570);
build/include/botan/loadstor.h:470:15: note: add new stmt: _568 =
VIEW_CONVERT_EXPR<vector(16) char>(vect__117.610_572);
build/include/botan/loadstor.h:470:15: note: add new stmt: _567 = VEC_PERM_EXPR
<_568, _568, { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }>;
build/include/botan/loadstor.h:470:15: note: add new stmt: _566 =
VIEW_CONVERT_EXPR<vector(4) unsigned int>(_567);
build/include/botan/loadstor.h:470:15: note: ------>vectorizing SLP node
starting from: L0_224 = PHI <_14(28), _118(16)>
build/include/botan/loadstor.h:470:15: note: extracting lane for live stmt
R0_225 = PHI <_46(28), _120(16)>
build/include/botan/loadstor.h:470:15: note: extracting lane for live stmt
R1_227 = PHI <_52(28), _124(16)>
build/include/botan/loadstor.h:470:15: note: extracting lane for live stmt
R2_229 = PHI <_58(28), _128(16)>
build/include/botan/loadstor.h:470:15: note: extracting lane for live stmt
R3_231 = PHI <_64(28), _132(16)>
build/include/botan/loadstor.h:470:15: note: ------>vectorizing SLP node
starting from: _14 = _13 + L0_224;
build/include/botan/loadstor.h:470:15: note: vect_is_simple_use: operand _9 ^
_12, type of def: internal
build/include/botan/loadstor.h:470:15: note: vect_is_simple_use: operand L0_224
= PHI <_14(28), _118(16)>, type of def: internal
build/include/botan/loadstor.h:470:15: note: transform binary/unary operation.
build/include/botan/loadstor.h:470:15: note: add new stmt: vect__14.612_559 =
vect__13.606_578 + vect_L0_224.611_565;
build/include/botan/loadstor.h:470:15: note: add new stmt: vect__14.612_558 =
vect__13.606_577 + vect_L0_224.611_564;
build/include/botan/loadstor.h:470:15: note: ------>vectorizing SLP node
starting from: _92 = PHI <_14(17)>
build/include/botan/loadstor.h:470:15: note: ------>vectorizing SLP node
starting from: _133 = __builtin_bswap32 (_92);
build/include/botan/loadstor.h:470:15: note: vect_is_simple_use: operand _92 =
PHI <_14(17)>, type of def: internal
build/include/botan/loadstor.h:470:15: note: add new stmt: _555 =
VIEW_CONVERT_EXPR<vector(16) char>(vect__92.613_557);
build/include/botan/loadstor.h:470:15: note: add new stmt: _554 = VEC_PERM_EXPR
<_555, _555, { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }>;
build/include/botan/loadstor.h:470:15: note: add new stmt: _553 =
VIEW_CONVERT_EXPR<vector(4) unsigned int>(_554);
build/include/botan/loadstor.h:470:15: note: add new stmt: _552 =
VIEW_CONVERT_EXPR<vector(16) char>(vect__92.613_556);
build/include/botan/loadstor.h:470:15: note: add new stmt: _551 = VEC_PERM_EXPR
<_552, _552, { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }>;
build/include/botan/loadstor.h:470:15: note: add new stmt: _550 =
VIEW_CONVERT_EXPR<vector(4) unsigned int>(_551);
build/include/botan/loadstor.h:470:15: note: ------>vectorizing SLP node
starting from: MEM <unsigned int> [(char * {ref-all})_66] = _133;
build/include/botan/loadstor.h:470:15: note: vect_is_simple_use: operand
__builtin_bswap32 (_92), type of def: internal
build/include/botan/loadstor.h:470:15: note: vect_is_simple_use: operand
__builtin_bswap32 (_592), type of def: internal
build/include/botan/loadstor.h:470:15: note: vect_is_simple_use: operand
__builtin_bswap32 (_90), type of def: internal
build/include/botan/loadstor.h:470:15: note: vect_is_simple_use: operand
__builtin_bswap32 (_591), type of def: internal
build/include/botan/loadstor.h:470:15: note: vect_is_simple_use: operand
__builtin_bswap32 (_594), type of def: internal
build/include/botan/loadstor.h:470:15: note: vect_is_simple_use: operand
__builtin_bswap32 (_590), type of def: internal
build/include/botan/loadstor.h:470:15: note: vect_is_simple_use: operand
__builtin_bswap32 (_593), type of def: internal
build/include/botan/loadstor.h:470:15: note: vect_is_simple_use: operand
__builtin_bswap32 (_589), type of def: internal
build/include/botan/loadstor.h:470:15: note: transform store. ncopies = 1
build/include/botan/loadstor.h:470:15: note: create vector_type-pointer
variable to type: vector(4) unsigned int  vectorizing a pointer ref: MEM
<unsigned int> [(char * {ref-all})_66]
build/include/botan/loadstor.h:470:15: note: created vectp.615_549
build/include/botan/loadstor.h:470:15: note: add new stmt: MEM <vector(4)
unsigned int> [(char * {ref-all})vectp.615_549] = _553;
build/include/botan/loadstor.h:470:15: note: add new stmt: vectp.615_547 =
vectp.615_549 + 16;
build/include/botan/loadstor.h:470:15: note: add new stmt: MEM <vector(4)
unsigned int> [(char * {ref-all})vectp.615_547] = _550;
build/include/botan/loadstor.h:470:15: note: vectorizing stmts using SLP.
build/include/botan/loadstor.h:470:15: optimized: basic block part vectorized
using 16 byte vectors

I tried to isolate a self-contained test-case, but I was not lucky enough.

The original loop:

void XTEA::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
   {
   verify_key_set(m_EK.empty() == false);

   const uint32_t* EK = &m_EK[0];

   const size_t blocks4 = blocks / 4;
   const size_t blocks_left = blocks % 4;

   BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks4; i++)
      {
      uint32_t L0, R0, L1, R1, L2, R2, L3, R3;
      load_be(in + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3);

      for(size_t r = 0; r != 32; ++r)
         {
         L0 += (((R0 << 4) ^ (R0 >> 5)) + R0) ^ EK[2*r];
         L1 += (((R1 << 4) ^ (R1 >> 5)) + R1) ^ EK[2*r];
         L2 += (((R2 << 4) ^ (R2 >> 5)) + R2) ^ EK[2*r];
         L3 += (((R3 << 4) ^ (R3 >> 5)) + R3) ^ EK[2*r];

         R0 += (((L0 << 4) ^ (L0 >> 5)) + L0) ^ EK[2*r+1];
         R1 += (((L1 << 4) ^ (L1 >> 5)) + L1) ^ EK[2*r+1];
         R2 += (((L2 << 4) ^ (L2 >> 5)) + L2) ^ EK[2*r+1];
         R3 += (((L3 << 4) ^ (L3 >> 5)) + L3) ^ EK[2*r+1];
         }

      store_be(out + 4*BLOCK_SIZE*i, L0, R0, L1, R1, L2, R2, L3, R3);
      }

   BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks_left; ++i)
      {
      uint32_t L, R;
      load_be(in + BLOCK_SIZE*(4*blocks4+i), L, R);

      for(size_t r = 0; r != 32; ++r)
         {
         L += (((R << 4) ^ (R >> 5)) + R) ^ EK[2*r];
         R += (((L << 4) ^ (L >> 5)) + L) ^ EK[2*r+1];
         }

      store_be(out + BLOCK_SIZE*(4*blocks4+i), L, R);
      }
   }

====

   BOTAN_PARALLEL_FOR(size_t i = 0; i < blocks_left; ++i)
should not be executed, the benchmark runs it with block == 128

Reply via email to