This bug is caused by a change in GAS which makes it prefer shorter instruction sequences:
80580c1: 8d 14 52 lea (%edx,%edx,2),%edx 80580c4: 8d ac 95 17 00 00 00 lea 0x17(%ebp,%edx,4),%ebp 80580cb: 89 c2 mov %eax,%edx 80580cd: ff e5 jmp *%ebp 80580cf: 90 nop 80580d0: 13 44 8b fc adc 0xfffffffc(%ebx,%ecx,4),%eax 80580d4: 8b 14 8e mov (%esi,%ecx,4),%edx 80580d7: 89 44 8f fc mov %eax,0xfffffffc(%edi,%ecx,4) 80580db: 13 14 8b adc (%ebx,%ecx,4),%edx 80580de: 8b 44 8e 04 mov 0x4(%esi,%ecx,4),%eax 80580e2: 89 14 8f mov %edx,(%edi,%ecx,4) 80580e5: 13 44 8b 04 adc 0x4(%ebx,%ecx,4),%eax 80580e9: 8b 54 8e 08 mov 0x8(%esi,%ecx,4),%edx 80580ed: 89 44 8f 04 mov %eax,0x4(%edi,%ecx,4) 80580f1: 13 54 8b 08 adc 0x8(%ebx,%ecx,4),%edx 80580f5: 8b 44 8e 0c mov 0xc(%esi,%ecx,4),%eax 80580f9: 89 54 8f 08 mov %edx,0x8(%edi,%ecx,4) 80580fd: 13 44 8b 0c adc 0xc(%ebx,%ecx,4),%eax Corresponding hand-written assembler source code: leal (%edx,%edx,2), %edx # ebp <- L(begin) + 12*reste leal L(begin)-L(here)(%ebp,%edx,4), %ebp movl %eax, %edx jmp *%ebp # corps de boucle à dérouler. taille du code = 24 octets # entrer avec eax = edx = 1er chiffre de a, CF = 0 #undef BODY #define BODY(x,y,z) \ adcl x(%ebx,%ecx,4), %eax; \ movl y(%esi,%ecx,4), %edx; \ movl %eax, x(%edi,%ecx,4); \ adcl y(%ebx,%ecx,4), %edx; \ movl z(%esi,%ecx,4), %eax; \ movl %edx, y(%edi,%ecx,4) # boucle d addition déroulée pour 16 chiffres ALIGN(4) L(begin): BODY(-4,0,4); BODY(4,8,12); BODY(12,16,20); BODY(20,24,28) BODY(28,32,36); BODY(36,40,44); BODY(44,48,52); BODY(52,56,60) The first few instruction bundles are not 12 bytes long, as required, but 11 bytes, with catastrophic consequences. I will see what can be done about this. Technically, this is not a GAS bug.