https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71270
--- Comment #3 from vekumar at gcc dot gnu.org --- Built armeb-none-linux-gnueabihf -with-cpu=cortex-a9 --with-fpu=neon-fp16 --with-float=hard And compared gimple output from intrinsic_pack_1.f90.151t.slp1 before and after my patch. The difference is shown below and is similar to x86_64 dump. The gimple dump after SLP looks correct to me. I think something in backend is causing the issues. Any thoughts? Gimple SLP dumps. Before # .MEM_1450 = VDEF <.MEM_1492> d_i1D.3585[0].vD.3582 = 1; # .MEM_1454 = VDEF <.MEM_1450> d_i1D.3585[1].vD.3582 = -1; # .MEM_1458 = VDEF <.MEM_1454> d_i1D.3585[2].vD.3582 = 2; # .MEM_1468 = VDEF <.MEM_1458> d_i1D.3585[3].vD.3582 = -2; # .MEM_1472 = VDEF <.MEM_1468> d_i1D.3585[4].vD.3582 = 3; # .MEM_1476 = VDEF <.MEM_1472> d_i1D.3585[5].vD.3582 = -3; # .MEM_1486 = VDEF <.MEM_1476> d_i1D.3585[6].vD.3582 = 4; # .MEM_1490 = VDEF <.MEM_1486> d_i1D.3585[7].vD.3582 = -4; # .MEM_1494 = VDEF <.MEM_1490> d_i1D.3585[8].vD.3582 = 5; After vect_cst__817 = { 1, 0, 1, 0 }; vect_cst__873 = { 1, 0, 1, 0 }; vect_cst__1413 = { 1, -1, 2, -2 }; vect_cst__1461 = { 3, -3, 4, -4 }; # .MEM_910 = VDEF <.MEM_1492> MEM[(integer(kind=1)D.3 *)&d_i1D.3585] = vect_cst__1413; # PT = anything # ALIGN = 4, MISALIGN = 0 _918 = &d_i1D.3585[0].vD.3582 + 4; # .MEM_865 = VDEF <.MEM_910> MEM[(integer(kind=1)D.3 *)_918] = vect_cst__1461; # .MEM_1494 = VDEF <.MEM_865> d_i1D.3585[8].vD.3582 = 5; Before # .MEM_1388 = VDEF <.MEM_217> MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][0] = 1; # .MEM_1393 = VDEF <.MEM_1388> MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][1] = 0; # .MEM_1398 = VDEF <.MEM_1393> MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][2] = 1; # .MEM_1409 = VDEF <.MEM_1398> MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][3] = 0; # .MEM_1414 = VDEF <.MEM_1409> MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][4] = 1; # .MEM_1419 = VDEF <.MEM_1414> MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][5] = 0; # .MEM_1430 = VDEF <.MEM_1419> MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][6] = 1; # .MEM_1435 = VDEF <.MEM_1430> MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][7] = 0; # .MEM_1440 = VDEF <.MEM_1435> MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][8] = 1; After # .MEM_825 = VDEF <.MEM_217> MEM[(logical(kind=1)D.7 *)&A.8D.3679] = vect_cst__817; # PT = anything # ALIGN = 4, MISALIGN = 0 _769 = &MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][0] + 4; # .MEM_777 = VDEF <.MEM_825> MEM[(logical(kind=1)D.7 *)_769] = vect_cst__873; # .MEM_1440 = VDEF <.MEM_777> MEM[(logical(kind=1)D.7[9] *)&A.8D.3679][8] = 1; Before # .MEM_1271 = VDEF <.MEM_264> MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][0] = 1; # .MEM_1276 = VDEF <.MEM_1271> MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][1] = 0; # .MEM_1281 = VDEF <.MEM_1276> MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][2] = 1; # .MEM_1292 = VDEF <.MEM_1281> MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][3] = 0; # .MEM_1297 = VDEF <.MEM_1292> MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][4] = 1; # .MEM_1302 = VDEF <.MEM_1297> MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][5] = 0; # .MEM_1313 = VDEF <.MEM_1302> MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][6] = 1; # .MEM_1318 = VDEF <.MEM_1313> MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][7] = 0; # .MEM_1323 = VDEF <.MEM_1318> MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][8] = 1; After vect_cst__729 = { 1, 0, 1, 0 }; vect_cst__721 = { 1, 0, 1, 0 }; # .MEM_673 = VDEF <.MEM_264> MEM[(logical(kind=1)D.7 *)&A.23D.3720] = vect_cst__729; # PT = anything # ALIGN = 4, MISALIGN = 0 _681 = &MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][0] + 4; # .MEM_942 = VDEF <.MEM_673> MEM[(logical(kind=1)D.7 *)_681] = vect_cst__721; # .MEM_1323 = VDEF <.MEM_942> MEM[(logical(kind=1)D.7[9] *)&A.23D.3720][8] = 1;