https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71270

--- Comment #3 from vekumar at gcc dot gnu.org ---
Built armeb-none-linux-gnueabihf -with-cpu=cortex-a9 --with-fpu=neon-fp16
--with-float=hard

And compared gimple output from intrinsic_pack_1.f90.151t.slp1 before and after
my patch.

The difference is shown below and is similar to x86_64 dump. The gimple dump
after SLP looks correct to me. I think something in backend is causing the
issues.

Any thoughts?

Gimple SLP dumps.

Before

 # .MEM_1450 = VDEF <.MEM_1492>
  d_i1D.3585[0].vD.3582 = 1;
  # .MEM_1454 = VDEF <.MEM_1450>
  d_i1D.3585[1].vD.3582 = -1;
  # .MEM_1458 = VDEF <.MEM_1454>
  d_i1D.3585[2].vD.3582 = 2;
  # .MEM_1468 = VDEF <.MEM_1458>
  d_i1D.3585[3].vD.3582 = -2;
  # .MEM_1472 = VDEF <.MEM_1468>
  d_i1D.3585[4].vD.3582 = 3;
  # .MEM_1476 = VDEF <.MEM_1472>
  d_i1D.3585[5].vD.3582 = -3;
  # .MEM_1486 = VDEF <.MEM_1476>
  d_i1D.3585[6].vD.3582 = 4;
  # .MEM_1490 = VDEF <.MEM_1486>
  d_i1D.3585[7].vD.3582 = -4;
  # .MEM_1494 = VDEF <.MEM_1490>
  d_i1D.3585[8].vD.3582 = 5;


After 

  vect_cst__817 = { 1, 0, 1, 0 };
  vect_cst__873 = { 1, 0, 1, 0 };
  vect_cst__1413 = { 1, -1, 2, -2 };
  vect_cst__1461 = { 3, -3, 4, -4 };

  # .MEM_910 = VDEF <.MEM_1492>
  MEM[(integer(kind=1)D.3 *)&d_i1D.3585] = vect_cst__1413;
  # PT = anything
  # ALIGN = 4, MISALIGN = 0
  _918 = &d_i1D.3585[0].vD.3582 + 4;
  # .MEM_865 = VDEF <.MEM_910>
  MEM[(integer(kind=1)D.3 *)_918] = vect_cst__1461;
  # .MEM_1494 = VDEF <.MEM_865>
  d_i1D.3585[8].vD.3582 = 5;

Before 

 # .MEM_1388 = VDEF <.MEM_217>
  MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][0] = 1;
  # .MEM_1393 = VDEF <.MEM_1388>
  MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][1] = 0;
  # .MEM_1398 = VDEF <.MEM_1393>
  MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][2] = 1;
  # .MEM_1409 = VDEF <.MEM_1398>
  MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][3] = 0;
  # .MEM_1414 = VDEF <.MEM_1409>
  MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][4] = 1;
  # .MEM_1419 = VDEF <.MEM_1414>
  MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][5] = 0;
  # .MEM_1430 = VDEF <.MEM_1419>
  MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][6] = 1;
  # .MEM_1435 = VDEF <.MEM_1430>
  MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][7] = 0;
  # .MEM_1440 = VDEF <.MEM_1435>
  MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][8] = 1;

After 

  # .MEM_825 = VDEF <.MEM_217>
  MEM[(logical(kind=1)D.7 *)&A.8D.3679] = vect_cst__817;
  # PT = anything
  # ALIGN = 4, MISALIGN = 0
  _769 = &MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][0] + 4;
  # .MEM_777 = VDEF <.MEM_825>
  MEM[(logical(kind=1)D.7 *)_769] = vect_cst__873;
  # .MEM_1440 = VDEF <.MEM_777>
  MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][8] = 1;

Before 

  # .MEM_1271 = VDEF <.MEM_264>
  MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][0] = 1;
  # .MEM_1276 = VDEF <.MEM_1271>
  MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][1] = 0;
  # .MEM_1281 = VDEF <.MEM_1276>
  MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][2] = 1;
  # .MEM_1292 = VDEF <.MEM_1281>
  MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][3] = 0;
  # .MEM_1297 = VDEF <.MEM_1292>
  MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][4] = 1;
  # .MEM_1302 = VDEF <.MEM_1297>
  MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][5] = 0;
  # .MEM_1313 = VDEF <.MEM_1302>
  MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][6] = 1;
  # .MEM_1318 = VDEF <.MEM_1313>
  MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][7] = 0;
  # .MEM_1323 = VDEF <.MEM_1318>
  MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][8] = 1;

After 

 vect_cst__729 = { 1, 0, 1, 0 };
  vect_cst__721 = { 1, 0, 1, 0 };

  # .MEM_673 = VDEF <.MEM_264>
  MEM[(logical(kind=1)D.7 *)&A.23D.3720] = vect_cst__729;
  # PT = anything
  # ALIGN = 4, MISALIGN = 0
  _681 = &MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][0] + 4;
  # .MEM_942 = VDEF <.MEM_673>
  MEM[(logical(kind=1)D.7 *)_681] = vect_cst__721;
  # .MEM_1323 = VDEF <.MEM_942>
  MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][8] = 1;

Reply via email to