
Sorry for my slow answer, I was quite busy for days.

> I've never heard that claim before. Do you have evidence for that?

I compare/convert code to D every day, so I am aware that D code compiled with 
DMD is often slower than C/C++ code compiled with GCC. Since some years I even 
keep a collection of snippets of slow code.

But I am also aware that the low performance has many different causes, like 
some missing inlining, missing loop unrolling, etc, so spotting a clear and 
small case of integer arithmetic code that causes a slow down, to give you 
evidence, is not easy. So I am sorry for my overly broad claim. 

>If it is true, there's a strong possibility that it's a small, fixable issue 
>(for example, DMD used to have terrible performance for ulong multiplication).<

You are right, the case I'm going to show is a precise problem that's fixable. 


// C code
#include "limits.h"
#include "stdio.h"

int divideBySeven(int x) {
    return x / 7;

int main() {
    int i = INT_MAX;
    int r;
    while (i--)
        r = divideBySeven(i);

    printf("%d\n", r);
    return 0;


// D code
int divideBySeven(int x) {
    return x / 7;

void main() {
    int i = int.max;
    int r;
    while (i--)
        r = divideBySeven(i);
    printf("%d\n", r);


Asm from the C version:

        pushl   %ebx
        movl    $-1840700269, %ebx
        movl    8(%esp), %ecx
        movl    %ebx, %eax
        popl    %ebx
        imull   %ecx
        leal    (%edx,%ecx), %eax
        sarl    $31, %ecx
        sarl    $2, %eax
        subl    %ecx, %eax

        leal    4(%esp), %ecx
        andl    $-16, %esp
        pushl   -4(%ecx)
        pushl   %ebx
        movl    $-1840700269, %ebx
        pushl   %ecx
        subl    $20, %esp
        call    ___main
        movl    $2147483646, %ecx
        .p2align 4,,10
        movl    %ecx, %eax
        imull   %ebx
        movl    %ecx, %eax
        addl    %ecx, %edx
        sarl    $31, %eax
        sarl    $2, %edx
        decl    %ecx
        subl    %eax, %edx
        cmpl    $-1, %ecx
        jne     L4
        movl    %edx, 4(%esp)
        movl    $LC0, (%esp)
        call    _printf
        addl    $20, %esp
        xorl    %eax, %eax
        popl    %ecx
        popl    %ebx
        leal    -4(%ecx), %esp
        .def    _printf;        .scl    2;      .type   32;     .endef


Asm from the D version:

_D9int_div_d13divideBySevenFiZi comdat
                mov     ECX,7
                idiv    ECX

__Dmain comdat
L0:             push    EAX
                push    EBX
                mov     EBX,07FFFFFFFh
                push    ESI
                xor     ESI,ESI
                test    EBX,EBX
                lea     EBX,-1[EBX]
                je      L24
L11:            mov     EAX,EBX
                mov     ECX,7
                idiv    ECX
                test    EBX,EBX
                mov     ESI,EAX
                lea     EBX,-1[EBX]
                jne     L11
L24:            push    ESI
                mov     EDX,offset FLAT:_DATA
                push    EDX
                call    near ptr _printf
                add     ESP,8
                xor     EAX,EAX
                pop     ESI
                pop     EBX
                pop     ECX


For a more real case see:


