https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95018
--- Comment #9 from Thomas Koenig <tkoenig at gcc dot gnu.org> --- Created attachment 48502 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=48502&action=edit Assembly file on x86 with -O2 -funroll-loops So, it seems the decisions made for unrolling are bad for this case independent of architecture - the cold loop is also unrolled 15 times on x86_64 with -funroll-loops. The change to POWER just happened to expose it. The code on x86_64 looks like # ../../../trunk/libgfortran/generated/in_pack_i4.c:76: destptr = xmallocarray (ssize, sizeof (GFC_INTEGER_4)); movq %rax, 152(%rsp) # tmp309, %sfp # ../../../trunk/libgfortran/generated/in_pack_i4.c:78: src = source->base_addr; movq 0(%rbp), %rax # source_42(D)->base_addr, src # ../../../trunk/libgfortran/generated/in_pack_i4.c:82: while (src) testq %rax, %rax # src je .L1 #, # ../../../trunk/libgfortran/generated/in_pack_i4.c:91: while (count[n] == extent[n]) movq 416(%rsp), %r15 # extent, _68 # ../../../trunk/libgfortran/generated/in_pack_i4.c:108: src += stride[n]; movq 552(%rsp), %rdi # stride, _92 # ../../../trunk/libgfortran/generated/in_pack_i4.c:87: src += stride0; leaq 0(,%rsi,4), %r12 #, _13 # ../../../trunk/libgfortran/generated/in_pack_i4.c:108: src += stride[n]; movq 560(%rsp), %r14 # stride, _24 movq 568(%rsp), %r13 # stride, _112 # ../../../trunk/libgfortran/generated/in_pack_i4.c:98: src -= stride[n] * extent[n]; imulq %r15, %rsi # _68, tmp225 # ../../../trunk/libgfortran/generated/in_pack_i4.c:91: while (count[n] == extent[n]) movq %r15, 16(%rsp) # _68, %sfp movq 424(%rsp), %r15 # extent, _73 movq %rdi, %r10 # _92, tmp226 movq %r14, %r9 # _24, tmp228 movq 288(%rsp), %rdx # count, count_I_lsm.8 movq 296(%rsp), %rcx # count, count_I_lsm.29 # ../../../trunk/libgfortran/generated/in_pack_i4.c:98: src -= stride[n] * extent[n]; imulq %r15, %rdi # _73, tmp227 # ../../../trunk/libgfortran/generated/in_pack_i4.c:91: while (count[n] == extent[n]) movq %r15, 32(%rsp) # _73, %sfp movq 432(%rsp), %r15 # extent, _14 subq %rsi, %r10 # tmp225, tmp226 movq 320(%rsp), %r8 # count, count_I_lsm.35 movq 304(%rsp), %rsi # count, count_I_lsm.31 # ../../../trunk/libgfortran/generated/in_pack_i4.c:98: src -= stride[n] * extent[n]; imulq %r15, %r14 # _14, tmp229 leaq 0(,%r10,4), %r11 #, _78 movq %r13, %r10 # _112, tmp230 # ../../../trunk/libgfortran/generated/in_pack_i4.c:91: while (count[n] == extent[n]) ... and so on.