>Submitter-Id:  net
>Originator:    Petr Vandrovec <[EMAIL PROTECTED]>
>Organization:  The Debian project
>Confidential:  no
>Synopsis:      Non-optimal code
>Severity:      non-critical
>Priority:      low
>Category:      optimization
>Class:         sw-bug
>Release:       3.0.1 CVS 20010728
>Environment:
System: Debian GNU/Linux (testing/unstable)
Architecture: i686
        
host: i386-linux
build: i386-linux
target: i386-linux
configured with: ../src/configure -v 
--enable-languages=c,c++,java,f77,proto,objc --prefix=/usr 
--infodir=/share/info --mandir=/share/man --enable-shared --with-gnu-as 
--with-gnu-ld --with-system-zlib --enable-long-long --enable-nls 
--without-included-gettext --disable-checking --enable-threads=posix 
--enable-java-gc=boehm --with-cpp-install-dir=bin --enable-objc-gc i386-linux
>Description:
[ Reported to the Debian BTS as report #107123.
  Please CC [EMAIL PROTECTED] on replies.
  Log of report can be found at http://bugs.debian.org/107123 ]
        
Hi,
  this simple code snippet generates a bit suboptimal code
when compiled with gcc-3.0. For some unknown reason
simple '--b->cnt' is correctly assembled as 'decl (somememory)',
but when this expression is tested for zero (which is available
on i386 in Z flag after simple decrement), it loads value
into register, decrements, then even again tests for zero, and
finally stores it back to memory ... This is visible in both 
freeblock and freeblockC.

  Second problem is that when decremented field in structure
is not integer (for example char) and function is chained into
another one with same arguments, some of arguments are
for unknown reason written back to stack, although their
value did not change (I even tried to mark argument
as 'struct blockChar* const b', but no change on generated 
code). This is visible in freeblockC only.

  These are all acceptable because of gcc-2.95.x generated
even worse code. 

  But what's not acceptable happens in readAsyncCallback. 
For some strange reason it allocates 16bytes of stack, for 
unknown reason (note that it was compiled with 
-mpreferred-stack-boundary=2, so it is not any stack aligning). 
It is smallest example of problem I'm suffering of - I have 
function which reserves 364 bytes on stack with gcc-3.0, but 
only 120 bytes when compiled with gcc-2.95, and code generated by
gcc-3.0 does not even touch couple of on-stack variables
which were optimized, but then somehow left in stack frame.

                                Thanks,
                                        Petr Vandrovec
                                        [EMAIL PROTECTED]

struct blockInt {
        unsigned int cnt;
};

void __freeblock(struct blockInt*);

void freeblock(struct blockInt* b) {
        if (--b->cnt) __freeblock(b);
}

void simpleDec(struct blockInt* b) {
        --b->cnt;
}

struct blockChar {
        unsigned char cnt;
        void* address;
};

void __freeblockC(struct blockChar*);

void freeblockC(struct blockChar* b) {
        if (--b->cnt) __freeblockC(b);
}

void simpleDecC(struct blockChar* b) {
        --b->cnt;
}
struct semaphore {
        void*   queue;
        int     value;
};

struct LB_async {
        struct semaphore*               sema;
};

void LBL_sSignal(struct semaphore*);

static inline void __LBL_sSignal(struct semaphore* sem) {
        if (!--sem->value && sem->queue) {
                LBL_sSignal(sem);
        }
}

void readAsyncCallback(struct LB_async* req) {
        __LBL_sSignal(req->sema);
}

---------- generated asm ----------

        .file   "x.c"
# GNU C version 3.0.1 20010728 (Debian prerelease) (i386-linux)
#       compiled by GNU C version 3.0.1 20010728 (Debian prerelease).
# options passed:  -lang-c -D__GNUC__=3 -D__GNUC_MINOR__=0
# -D__GNUC_PATCHLEVEL__=1 -D__ELF__ -Dunix -Dlinux -D__ELF__ -D__unix__
# -D__linux__ -D__unix -D__linux -Asystem=posix -D__OPTIMIZE__
# -D__STDC_HOSTED__=1 -W -Wall -Acpu=i386 -Amachine=i386 -Di386 -D__i386
# -D__i386__ -D__tune_i386__ -mpreferred-stack-boundary=2 -O2 -W -Wall
# -fomit-frame-pointer -fverbose-asm
# options enabled:  -fdefer-pop -fomit-frame-pointer
# -foptimize-sibling-calls -fcse-follow-jumps -fcse-skip-blocks
# -fexpensive-optimizations -fthread-jumps -fstrength-reduce -fpeephole
# -fforce-mem -ffunction-cse -finline -fkeep-static-consts -fcaller-saves
# -fpcc-struct-return -fgcse -frerun-cse-after-loop -frerun-loop-opt
# -fdelete-null-pointer-checks -fschedule-insns2 -fsched-interblock
# -fsched-spec -fbranch-count-reg -freorder-blocks -fcommon -fverbose-asm
# -fgnu-linker -fregmove -foptimize-register-move -fargument-alias
# -fstrict-aliasing -fident -fpeephole2 -fguess-branch-probability
# -fmath-errno -m80387 -mhard-float -mno-soft-float -mieee-fp
# -mfp-ret-in-387 -mpreferred-stack-boundary=2

        .text
        .align 4
.globl freeblock
        .type   freeblock,@function
freeblock:
        movl    4(%esp), %edx
        movl    (%edx), %eax
        decl    %eax
        testl   %eax, %eax
        movl    %eax, (%edx)
        je      .L2
        jmp     __freeblock
        .p2align 2
.L2:
        ret
.Lfe1:
        .size   freeblock,.Lfe1-freeblock
        .align 4
.globl simpleDec
        .type   simpleDec,@function
simpleDec:
        movl    4(%esp), %eax
        decl    (%eax)
        ret
.Lfe2:
        .size   simpleDec,.Lfe2-simpleDec
        .align 4
.globl freeblockC
        .type   freeblockC,@function
freeblockC:
        movl    4(%esp), %edx
        movb    (%edx), %al
        decl    %eax
        testb   %al, %al
        movb    %al, (%edx)
        je      .L5
        movl    %edx, 4(%esp)
        jmp     __freeblockC
        .p2align 2
.L5:
        ret
.Lfe3:
        .size   freeblockC,.Lfe3-freeblockC
        .align 4
.globl simpleDecC
        .type   simpleDecC,@function
simpleDecC:
        movl    4(%esp), %eax
        decb    (%eax)
        ret
.Lfe4:
        .size   simpleDecC,.Lfe4-simpleDecC
        .align 4
.globl readAsyncCallback
        .type   readAsyncCallback,@function
readAsyncCallback:
        subl    $16, %esp
        movl    20(%esp), %eax
        movl    (%eax), %edx
        movl    4(%edx), %eax
        decl    %eax
        testl   %eax, %eax
        movl    %eax, 4(%edx)
        je      .L11
.L10:
        addl    $16, %esp
        ret
        .p2align 2
.L11:
        movl    (%edx), %ecx
        testl   %ecx, %ecx
        je      .L10
        pushl   %edx
        call    LBL_sSignal
        popl    %eax
        jmp     .L10
.Lfe5:
        .size   readAsyncCallback,.Lfe5-readAsyncCallback
        .ident  "GCC: (GNU) 3.0.1 20010728 (Debian prerelease)"


>How-To-Repeat:
        
>Fix:
        


Reply via email to