For this code:

int f(unsigned int *p) {
    for (int i = 0; i < 64; ++i)
        p[i] = 0;
}

I'd expect to get something like the output for this code:

int f2(unsigned int *p) {
    int c = 64*4;
    if ((unsigned long) p % 8) *p++ = 0, c -= 4;
    unsigned long *l = p;
    do *l++ = 0; while ((c -= 8) >= 8);
    p = l;
    if (c) *p++ = 0;
}

which is

f2:
        and $16,7,$1
        lda $4,256($31)
        beq $1,$L11
        stl $31,0($16)
        lda $4,252($31)
        lda $16,4($16)
$L11:
        mov $31,$3
        .align 4
$L12:
        lda $3,8($3)
        stq $31,0($16)
        lda $16,8($16)
        subl $4,$3,$2
        cmple $2,7,$1
        beq $1,$L12
        beq $2,$L17
        stl $31,0($16)
$L17:
        ret

but I get:

f:
        and $16,4,$1
        lda $5,64($31)
        lda $6,64($31)
        mov $31,$7
        cmpult $31,$1,$1
        cmplt $1,64,$2
        cmovne $2,$1,$5
        ble $5,$L4
        mov $31,$3
        mov $31,$4
        .align 4
$L12:
        lda $3,1($3)
        s4addq $4,$16,$1
        addl $31,$3,$4
        stl $31,0($1)
        addl $31,$4,$2
        cmple $5,$2,$1
        beq $1,$L12
        lda $1,64($31)
        mov $2,$7
        subl $1,$4,$6
$L4:
        cmpeq $5,64,$1
        bne $1,$L6
        lda $1,64($31)
        subq $1,$5,$22
        sra $22,1,$4
        addq $4,$4,$8
        ble $8,$L8
        s4addq $5,$16,$2
        mov $31,$3
        .align 4
$L10:
        lda $3,1($3)
        stq $31,0($2)
        lda $2,8($2)
        addl $31,$3,$1
        cmple $4,$1,$1
        beq $1,$L10
        subl $6,$8,$6
        addl $7,$8,$7
$L8:
        cmpeq $22,$8,$1
        bne $1,$L6
        mov $31,$2
        .align 4
$L14:
        addl $2,$7,$1
        subl $6,1,$6
        lda $2,1($2)
        s4addq $1,$16,$1
        stl $31,0($1)
        bne $6,$L14
$L6:
        ret

which is pretty weird and inefficent code...

-- 
           Summary: Inefficient code generated by -ftree-vectorize on Alpha
           Product: gcc
           Version: 4.0.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P2
         Component: tree-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: falk at debian dot org
                CC: gcc-bugs at gcc dot gnu dot org
 GCC build triplet: alphaev68-unknown-linux-gnu
  GCC host triplet: alphaev68-unknown-linux-gnu
GCC target triplet: alphaev68-unknown-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18557

Reply via email to