> Trass3r: >> are you able and willing to show me the asm produced by gdc? There's a >> problem there. > [attach bla.rar]
In the bla.rar attach there's the unstripped Linux binary, so to read the asm I have used the objdump disassembler. But are you willing and able to show me the asm before it gets assembled? (with gcc you do it with the -S switch). (I also suggest to use only the C standard library, with time() and printf() to produce a smaller asm output: http://codepad.org/12EUo16J ). Using objdump I see it uses 16 xmm registers, this is the main routine. But what's the purpose of those callq? They seem to call the successive asm instruction. The x86 asm of this routine contains jumps only and no "call". The asm of this routine is also very long, I don't know why yet. I see too many instructions like "movss 0x80(%rsp), %xmm7" this looks like a problem. _calculateVerticesAndNormals: push %r15 push %r14 push %r13 push %r12 push %rbp push %rbx sub $0x268, %rsp mov 0x2a0(%rsp), %rax mov %rdi, 0xe8(%rsp) mov %rsi, 0xe0(%rsp) mov %rcx, 0x128(%rsp) mov %r8, 0x138(%rsp) mov %rax, 0xf0(%rsp) mov 0x2a8(%rsp), %rax mov %rdi, 0x180(%rsp) mov %rsi, 0x188(%rsp) mov %rcx, 0x170(%rsp) mov %rax, 0xf8(%rsp) mov 0x2b0(%rsp), %rax mov %r8, 0x178(%rsp) mov %rax, 0x130(%rsp) mov 0x2b8(%rsp), %rax mov %rax, 0x140(%rsp) mov %rcx, %rax add %rax, %rax cmp 0x130(%rsp), %rax je 74d <_calculateVerticesAndNormals+0xcd> mov $0x57, %edx mov $0x6, %edi mov $0x0, %esi movq $0x6, 0x190(%rsp) movq $0x0, 0x198(%rsp) callq 74d <_calculateVerticesAndNormals+0xcd> cmpq $0x0, 0x128(%rsp) je 1317 <_calculateVerticesAndNormals+0xc97> movq $0x1, 0x120(%rsp) xor %r15d, %r15d movq $0x0, 0x100(%rsp) movslq %r15d, %r12 cmp %r12, 0xf0(%rsp) movq $0x0, 0x108(%rsp) jbe f1d <_calculateVerticesAndNormals+0x89d> nopl 0x0(%rax) lea (%r12, %r12, 2), %rax shl $0x2, %rax mov %rax, 0x148(%rsp) mov 0xf8(%rsp), %rax add 0x148(%rsp), %rax movss 0x4(%rax), %xmm9 movzbl 0x8(%rax), %r13d movslq (%rax), %rax cmp 0xe8(%rsp), %rax jae f50 <_calculateVerticesAndNormals+0x8d0> lea (%rax, %rax, 2), %rax shl $0x4, %rax mov %rax, 0x110(%rsp) mov 0xe0(%rsp), %rbx add 0x110(%rsp), %rbx je 12af <_calculateVerticesAndNormals+0xc2f> movss (%rbx), %xmm7 test %r13b, %r13b movss 0x4(%rbx), %xmm8 movss 0x8(%rbx), %xmm6 mulss %xmm9, %xmm7 movss 0xc(%rbx), %xmm11 mulss %xmm9, %xmm8 movss 0x10(%rbx), %xmm4 mulss %xmm9, %xmm6 movss 0x14(%rbx), %xmm5 mulss %xmm9, %xmm11 movss 0x18(%rbx), %xmm3 mulss %xmm9, %xmm4 movss 0x1c(%rbx), %xmm10 mulss %xmm9, %xmm5 movss 0x20(%rbx), %xmm1 mulss %xmm9, %xmm3 movss 0x24(%rbx), %xmm2 mulss %xmm9, %xmm10 movss 0x28(%rbx), %xmm0 mulss %xmm9, %xmm1 mulss %xmm9, %xmm2 mulss %xmm9, %xmm0 mulss 0x2c(%rbx), %xmm9 jne cdb <_calculateVerticesAndNormals+0x65b> add $0x1, %r12 mov %r14, %rax lea (%r12, %r12, 2), %r13 shl $0x2, %r13 jmpq 99e <_calculateVerticesAndNormals+0x31e> nopl (%rax) mov %r13, %rax mov 0xf8(%rsp), %rdx add %rax, %rdx movss 0x4(%rdx), %xmm12 movzbl 0x8(%rdx), %r14d movslq (%rdx), %rdx cmp %rdx, 0xe8(%rsp) jbe aa0 <_calculateVerticesAndNormals+0x420> mov 0xe0(%rsp), %rbx lea (%rdx, %rdx, 2), %rbp shl $0x4, %rbp add %rbp, %rbx je baf <_calculateVerticesAndNormals+0x52f> movss (%rbx), %xmm13 add $0x1, %r12 add $0xc, %r13 test %r14b, %r14b mulss %xmm12, %xmm13 addss %xmm13, %xmm7 movss 0x4(%rbx), %xmm13 mulss %xmm12, %xmm13 addss %xmm13, %xmm8 movss 0x8(%rbx), %xmm13 mulss %xmm12, %xmm13 addss %xmm13, %xmm6 movss 0xc(%rbx), %xmm13 mulss %xmm12, %xmm13 addss %xmm13, %xmm11 movss 0x10(%rbx), %xmm13 mulss %xmm12, %xmm13 addss %xmm13, %xmm4 movss 0x14(%rbx), %xmm13 mulss %xmm12, %xmm13 addss %xmm13, %xmm5 movss 0x18(%rbx), %xmm13 mulss %xmm12, %xmm13 addss %xmm13, %xmm3 movss 0x1c(%rbx), %xmm13 mulss %xmm12, %xmm13 addss %xmm13, %xmm10 movss 0x20(%rbx), %xmm13 mulss %xmm12, %xmm13 addss %xmm13, %xmm1 movss 0x24(%rbx), %xmm13 mulss %xmm12, %xmm13 addss %xmm13, %xmm2 movss 0x28(%rbx), %xmm13 mulss %xmm12, %xmm13 mulss 0x2c(%rbx), %xmm12 addss %xmm13, %xmm0 addss %xmm12, %xmm9 jne cd8 <_calculateVerticesAndNormals+0x658> add $0x1, %r15d cmp %r12, 0xf0(%rsp) ja 890 <_calculateVerticesAndNormals+0x210> mov $0x63, %edx mov $0x6, %edi mov $0x0, %esi mov %rax, 0xc8(%rsp) movss %xmm0, (%rsp) movss %xmm1, 0x20(%rsp) movss %xmm2, 0x10(%rsp) movss %xmm3, 0x30(%rsp) movss %xmm4, 0x50(%rsp) movss %xmm5, 0x40(%rsp) movss %xmm6, 0x60(%rsp) movss %xmm7, 0x80(%rsp) movss %xmm8, 0x70(%rsp) movss %xmm9, 0x90(%rsp) movss %xmm10, 0xa0(%rsp) movss %xmm11, 0xb0(%rsp) movq $0x6, 0x1c0(%rsp) movq $0x0, 0x1c8(%rsp) callq a3b <_calculateVerticesAndNormals+0x3bb> mov 0xc8(%rsp), %rax movss (%rsp), %xmm0 movss 0x20(%rsp), %xmm1 movss 0x10(%rsp), %xmm2 movss 0x30(%rsp), %xmm3 movss 0x50(%rsp), %xmm4 movss 0x40(%rsp), %xmm5 movss 0x60(%rsp), %xmm6 movss 0x80(%rsp), %xmm7 movss 0x70(%rsp), %xmm8 movss 0x90(%rsp), %xmm9 movss 0xa0(%rsp), %xmm10 movss 0xb0(%rsp), %xmm11 jmpq 893 <_calculateVerticesAndNormals+0x213> nop mov $0x65, %edx mov $0x6, %edi mov $0x0, %esi mov %rax, 0xc8(%rsp) movss %xmm0, (%rsp) movss %xmm1, 0x20(%rsp) movss %xmm2, 0x10(%rsp) movss %xmm3, 0x30(%rsp) movss %xmm4, 0x50(%rsp) movss %xmm5, 0x40(%rsp) movss %xmm6, 0x60(%rsp) movss %xmm7, 0x80(%rsp) movss %xmm8, 0x70(%rsp) movss %xmm9, 0x90(%rsp) movss %xmm10, 0xa0(%rsp) movss %xmm11, 0xb0(%rsp) movss %xmm12, 0xd0(%rsp) movq $0x6, 0x1d0(%rsp) movq $0x0, 0x1d8(%rsp) callq b35 <_calculateVerticesAndNormals+0x4b5> mov 0xe0(%rsp), %rbx movss 0xd0(%rsp), %xmm12 movss 0xb0(%rsp), %xmm11 movss 0xa0(%rsp), %xmm10 add %rbp, %rbx movss 0x70(%rsp), %xmm8 movss 0x90(%rsp), %xmm9 movss 0x80(%rsp), %xmm7 movss 0x60(%rsp), %xmm6 movss 0x40(%rsp), %xmm5 movss 0x50(%rsp), %xmm4 movss 0x30(%rsp), %xmm3 movss 0x10(%rsp), %xmm2 movss 0x20(%rsp), %xmm1 movss (%rsp), %xmm0 mov 0xc8(%rsp), %rax jne 8d3 <_calculateVerticesAndNormals+0x253> mov $0x23, %r8d mov $0x6, %edx mov $0x0, %ecx mov $0x9, %edi mov $0x0, %esi movss %xmm0, (%rsp) mov %rax, 0xc8(%rsp) movss %xmm1, 0x20(%rsp) movss %xmm2, 0x10(%rsp) movss %xmm3, 0x30(%rsp) movss %xmm4, 0x50(%rsp) movss %xmm5, 0x40(%rsp) movss %xmm6, 0x60(%rsp) movss %xmm7, 0x80(%rsp) movss %xmm8, 0x70(%rsp) movss %xmm9, 0x90(%rsp) movss %xmm10, 0xa0(%rsp) movss %xmm11, 0xb0(%rsp) movss %xmm12, 0xd0(%rsp) movq $0x6, 0x240(%rsp) movq $0x0, 0x248(%rsp) movq $0x9, 0x250(%rsp) movq $0x0, 0x258(%rsp) callq c67 <_calculateVerticesAndNormals+0x5e7> movss 0x70(%rsp), %xmm8 movss 0xd0(%rsp), %xmm12 movss 0xb0(%rsp), %xmm11 movss 0xa0(%rsp), %xmm10 movss 0x90(%rsp), %xmm9 movss 0x80(%rsp), %xmm7 movss 0x60(%rsp), %xmm6 movss 0x40(%rsp), %xmm5 movss 0x50(%rsp), %xmm4 movss 0x30(%rsp), %xmm3 movss 0x10(%rsp), %xmm2 movss 0x20(%rsp), %xmm1 movss (%rsp), %xmm0 mov 0xc8(%rsp), %rax jmpq 8d3 <_calculateVerticesAndNormals+0x253> nopl (%rax) mov %rax, %r14 mov 0x108(%rsp), %rax cmp %rax, 0x128(%rsp) jbe 11d0 <_calculateVerticesAndNormals+0xb50> shl $0x5, %rax mov %rax, 0x150(%rsp) mov 0x100(%rsp), %rax mov 0x138(%rsp), %rbx add %rax, %rax add 0x150(%rsp), %rbx cmp %rax, 0x130(%rsp) jbe 10e8 <_calculateVerticesAndNormals+0xa68> mov 0x100(%rsp), %rax shl $0x5, %rax mov %rax, 0x158(%rsp) movss 0x8(%rbx), %xmm12 movaps %xmm8, %xmm15 movss (%rbx), %xmm14 movss %xmm12, 0x11c(%rsp) movss 0x4(%rbx), %xmm13 movaps %xmm7, %xmm12 mulss %xmm14, %xmm12 mov 0x140(%rsp), %rax mulss %xmm13, %xmm15 add 0x158(%rsp), %rax addss %xmm15, %xmm12 addss %xmm11, %xmm12 movl $0x0, 0xc(%rax) movss 0x11c(%rsp), %xmm11 mulss %xmm6, %xmm11 addss %xmm11, %xmm12 movaps %xmm4, %xmm11 mulss %xmm14, %xmm11 mulss %xmm1, %xmm14 movss %xmm12, (%rax) movaps %xmm5, %xmm12 mulss %xmm13, %xmm12 mulss %xmm2, %xmm13 addss %xmm12, %xmm11 addss %xmm13, %xmm14 addss %xmm10, %xmm11 movss 0x11c(%rsp), %xmm10 addss %xmm9, %xmm14 movss 0x11c(%rsp), %xmm9 mulss %xmm3, %xmm10 mulss %xmm0, %xmm9 addss %xmm10, %xmm11 addss %xmm9, %xmm14 movss %xmm11, 0x4(%rax) movss %xmm14, 0x8(%rax) mov 0x108(%rsp), %rax cmp %rax, 0x128(%rsp) jbe 1040 <_calculateVerticesAndNormals+0x9c0> shl $0x5, %rax mov %rax, 0x160(%rsp) mov 0x138(%rsp), %rbx mov 0x120(%rsp), %rax add 0x160(%rsp), %rbx cmp %rax, 0x130(%rsp) jbe f98 <_calculateVerticesAndNormals+0x918> shl $0x4, %rax mov %rax, 0x168(%rsp) movss 0x10(%rbx), %xmm10 add $0x1, %r15d movss 0x14(%rbx), %xmm11 mulss %xmm10, %xmm7 mov 0x140(%rsp), %rax mulss %xmm11, %xmm8 movss 0x18(%rbx), %xmm9 mulss %xmm11, %xmm5 mulss %xmm10, %xmm4 mulss %xmm11, %xmm2 add 0x168(%rsp), %rax addq $0x1, 0x100(%rsp) addss %xmm7, %xmm8 addq $0x2, 0x120(%rsp) addss %xmm4, %xmm5 mulss %xmm10, %xmm1 mulss %xmm9, %xmm6 movl $0x0, 0xc(%rax) mulss %xmm9, %xmm3 mulss %xmm9, %xmm0 addss %xmm1, %xmm2 addss %xmm6, %xmm8 addss %xmm3, %xmm5 addss %xmm0, %xmm2 movss %xmm8, (%rax) movss %xmm5, 0x4(%rax) movss %xmm2, 0x8(%rax) mov 0x100(%rsp), %rax cmp %rax, 0x128(%rsp) je 1317 <_calculateVerticesAndNormals+0xc97> movslq %r15d, %r12 mov %rax, 0x108(%rsp) cmp %r12, 0xf0(%rsp) ja 798 <_calculateVerticesAndNormals+0x118> mov $0x5d, %edx mov $0x6, %edi mov $0x0, %esi movq $0x6, 0x1a0(%rsp) movq $0x0, 0x1a8(%rsp) callq f49 <_calculateVerticesAndNormals+0x8c9> jmpq 7a8 <_calculateVerticesAndNormals+0x128> xchg %ax, %ax mov $0x5f, %edx mov $0x6, %edi mov $0x0, %esi movss %xmm9, 0x90(%rsp) movq $0x6, 0x1b0(%rsp) movq $0x0, 0x1b8(%rsp) callq f86 <_calculateVerticesAndNormals+0x906> movss 0x90(%rsp), %xmm9 jmpq 7e4 <_calculateVerticesAndNormals+0x164> nopl (%rax) mov $0x69, %edx mov $0x6, %edi mov $0x0, %esi movss %xmm0, (%rsp) movss %xmm1, 0x20(%rsp) movss %xmm2, 0x10(%rsp) movss %xmm3, 0x30(%rsp) movss %xmm4, 0x50(%rsp) movss %xmm5, 0x40(%rsp) movss %xmm6, 0x60(%rsp) movss %xmm7, 0x80(%rsp) movss %xmm8, 0x70(%rsp) movq $0x6, 0x210(%rsp) movq $0x0, 0x218(%rsp) callq ffd <_calculateVerticesAndNormals+0x97d> movss 0x70(%rsp), %xmm8 movss 0x80(%rsp), %xmm7 movss 0x60(%rsp), %xmm6 movss 0x40(%rsp), %xmm5 movss 0x50(%rsp), %xmm4 movss 0x30(%rsp), %xmm3 movss 0x10(%rsp), %xmm2 movss 0x20(%rsp), %xmm1 movss (%rsp), %xmm0 jmpq e59 <_calculateVerticesAndNormals+0x7d9> nopl 0x0(%rax, %rax, 1) mov $0x69, %edx mov $0x6, %edi mov $0x0, %esi movss %xmm0, (%rsp) movss %xmm1, 0x20(%rsp) movss %xmm2, 0x10(%rsp) movss %xmm3, 0x30(%rsp) movss %xmm4, 0x50(%rsp) movss %xmm5, 0x40(%rsp) movss %xmm6, 0x60(%rsp) movss %xmm7, 0x80(%rsp) movss %xmm8, 0x70(%rsp) movq $0x6, 0x200(%rsp) movq $0x0, 0x208(%rsp) callq 10a5 <_calculateVerticesAndNormals+0xa25> movss 0x70(%rsp), %xmm8 movss 0x80(%rsp), %xmm7 movss 0x60(%rsp), %xmm6 movss 0x40(%rsp), %xmm5 movss 0x50(%rsp), %xmm4 movss 0x30(%rsp), %xmm3 movss 0x10(%rsp), %xmm2 movss 0x20(%rsp), %xmm1 movss (%rsp), %xmm0 jmpq e27 <_calculateVerticesAndNormals+0x7a7> nopl 0x0(%rax, %rax, 1) mov $0x68, %edx mov $0x6, %edi mov $0x0, %esi movss %xmm0, (%rsp) movss %xmm1, 0x20(%rsp) movss %xmm2, 0x10(%rsp) movss %xmm3, 0x30(%rsp) movss %xmm4, 0x50(%rsp) movss %xmm5, 0x40(%rsp) movss %xmm6, 0x60(%rsp) movss %xmm7, 0x80(%rsp) movss %xmm8, 0x70(%rsp) movss %xmm9, 0x90(%rsp) movss %xmm10, 0xa0(%rsp) movss %xmm11, 0xb0(%rsp) movq $0x6, 0x1f0(%rsp) movq $0x0, 0x1f8(%rsp) callq 116b <_calculateVerticesAndNormals+0xaeb> movss 0x70(%rsp), %xmm8 movss 0xb0(%rsp), %xmm11 movss 0xa0(%rsp), %xmm10 movss 0x90(%rsp), %xmm9 movss 0x80(%rsp), %xmm7 movss 0x60(%rsp), %xmm6 movss 0x40(%rsp), %xmm5 movss 0x50(%rsp), %xmm4 movss 0x30(%rsp), %xmm3 movss 0x10(%rsp), %xmm2 movss 0x20(%rsp), %xmm1 movss (%rsp), %xmm0 jmpq d3a <_calculateVerticesAndNormals+0x6ba> nopw 0x0(%rax, %rax, 1) mov $0x68, %edx mov $0x6, %edi mov $0x0, %esi movss %xmm0, (%rsp) movss %xmm1, 0x20(%rsp) movss %xmm2, 0x10(%rsp) movss %xmm3, 0x30(%rsp) movss %xmm4, 0x50(%rsp) movss %xmm5, 0x40(%rsp) movss %xmm6, 0x60(%rsp) movss %xmm7, 0x80(%rsp) movss %xmm8, 0x70(%rsp) movss %xmm9, 0x90(%rsp) movss %xmm10, 0xa0(%rsp) movss %xmm11, 0xb0(%rsp) movq $0x6, 0x1e0(%rsp) movq $0x0, 0x1e8(%rsp) callq 1253 <_calculateVerticesAndNormals+0xbd3> movss 0x70(%rsp), %xmm8 movss 0xb0(%rsp), %xmm11 movss 0xa0(%rsp), %xmm10 movss 0x90(%rsp), %xmm9 movss 0x80(%rsp), %xmm7 movss 0x60(%rsp), %xmm6 movss 0x40(%rsp), %xmm5 movss 0x50(%rsp), %xmm4 movss 0x30(%rsp), %xmm3 movss 0x10(%rsp), %xmm2 movss 0x20(%rsp), %xmm1 movss (%rsp), %xmm0 jmpq cfd <_calculateVerticesAndNormals+0x67d> mov $0x12, %r8d mov $0x6, %edx mov $0x0, %ecx mov $0x9, %edi mov $0x0, %esi movss %xmm9, 0x90(%rsp) movq $0x6, 0x220(%rsp) movq $0x0, 0x228(%rsp) movq $0x9, 0x230(%rsp) movq $0x0, 0x238(%rsp) callq 1308 <_calculateVerticesAndNormals+0xc88> movss 0x90(%rsp), %xmm9 jmpq 7fa <_calculateVerticesAndNormals+0x17a> add $0x268, %rsp pop %rbx pop %rbp pop %r12 pop %r13 pop %r14 pop %r15 retq nopl 0x0(%rax) Bye, bearophile